Imported Upstream version 1.0.2d 41/46341/2 upstream/1.0.2d
authorYury Usishchev <y.usishchev@samsung.com>
Tue, 6 Oct 2015 11:05:17 +0000 (14:05 +0300)
committerYury Usishchev <y.usishchev@samsung.com>
Tue, 6 Oct 2015 11:06:03 +0000 (14:06 +0300)
Change-Id: I565a3e3ac5176f83139175faa2d2a11a334e8908
Signed-off-by: Yury Usishchev <y.usishchev@samsung.com>
610 files changed:
CHANGES
Configure
FAQ
GitConfigure [new file with mode: 0755]
GitMake [new file with mode: 0755]
Makefile
Makefile.bak
Makefile.org
NEWS
README
apps/apps.c
apps/apps.h
apps/ca.c
apps/ciphers.c
apps/cms.c
apps/crl.c
apps/dgst.c
apps/dhparam.c
apps/ecparam.c
apps/genrsa.c
apps/makeapps.com
apps/ocsp.c
apps/openssl-vms.cnf
apps/openssl.cnf
apps/pkcs8.c
apps/s_apps.h
apps/s_cb.c
apps/s_client.c
apps/s_server.c
apps/s_socket.c
apps/smime.c
apps/speed.c
apps/verify.c
apps/x509.c
config
crypto/Makefile
crypto/aes/Makefile
crypto/aes/aes_wrap.c
crypto/aes/aes_x86core.c
crypto/aes/asm/aes-586.pl
crypto/aes/asm/aes-armv4.pl
crypto/aes/asm/aes-mips.pl
crypto/aes/asm/aes-ppc.pl
crypto/aes/asm/aes-x86_64.pl
crypto/aes/asm/aesni-mb-x86_64.pl [new file with mode: 0644]
crypto/aes/asm/aesni-sha1-x86_64.pl
crypto/aes/asm/aesni-sha256-x86_64.pl [new file with mode: 0644]
crypto/aes/asm/aesni-x86.pl
crypto/aes/asm/aesni-x86_64.pl
crypto/aes/asm/aesp8-ppc.pl [new file with mode: 0755]
crypto/aes/asm/aest4-sparcv9.pl [new file with mode: 0644]
crypto/aes/asm/aesv8-armx.pl [new file with mode: 0755]
crypto/aes/asm/bsaes-armv7.pl [new file with mode: 0644]
crypto/aes/asm/bsaes-x86_64.pl
crypto/aes/asm/vpaes-ppc.pl [new file with mode: 0644]
crypto/aes/asm/vpaes-x86.pl
crypto/aes/asm/vpaes-x86_64.pl
crypto/arm64cpuid.S [new file with mode: 0644]
crypto/arm_arch.h
crypto/armcap.c
crypto/armv4cpuid.S
crypto/asn1/Makefile
crypto/asn1/a_gentm.c
crypto/asn1/a_time.c
crypto/asn1/a_utctm.c
crypto/asn1/ameth_lib.c
crypto/asn1/asn1.h
crypto/asn1/asn1_locl.h
crypto/asn1/t_x509.c
crypto/asn1/x_crl.c
crypto/asn1/x_x509.c
crypto/bio/b_dump.c
crypto/bio/b_sock.c
crypto/bio/bio.h
crypto/bio/bio_err.c
crypto/bio/bss_acpt.c
crypto/bio/bss_conn.c
crypto/bio/bss_dgram.c
crypto/bio/bss_fd.c
crypto/bn/Makefile
crypto/bn/asm/armv4-gf2m.pl
crypto/bn/asm/armv4-mont.pl
crypto/bn/asm/mips-mont.pl
crypto/bn/asm/mips.pl
crypto/bn/asm/mips3.s [new file with mode: 0644]
crypto/bn/asm/modexp512-x86_64.pl [deleted file]
crypto/bn/asm/ppc-mont.pl
crypto/bn/asm/ppc.pl
crypto/bn/asm/ppc64-mont.pl
crypto/bn/asm/rsaz-avx2.pl [new file with mode: 0755]
crypto/bn/asm/rsaz-x86_64.pl [new file with mode: 0755]
crypto/bn/asm/sparct4-mont.pl [new file with mode: 0755]
crypto/bn/asm/sparcv9-gf2m.pl [new file with mode: 0644]
crypto/bn/asm/vis3-mont.pl [new file with mode: 0644]
crypto/bn/asm/x86_64-gcc.c
crypto/bn/asm/x86_64-mont.pl
crypto/bn/asm/x86_64-mont5.pl
crypto/bn/bn.h
crypto/bn/bn_asm.c
crypto/bn/bn_exp.c
crypto/bn/bn_gf2m.c
crypto/bn/bn_lcl.h
crypto/bn/bntest.c
crypto/bn/rsaz_exp.c [new file with mode: 0644]
crypto/bn/rsaz_exp.h [new file with mode: 0644]
crypto/buffer/buf_str.c
crypto/buffer/buffer.h
crypto/camellia/Makefile
crypto/camellia/asm/cmll-x86_64.pl
crypto/camellia/asm/cmllt4-sparcv9.pl [new file with mode: 0644]
crypto/cast/cast_lcl.h
crypto/cms/Makefile
crypto/cms/cms.h
crypto/cms/cms_asn1.c
crypto/cms/cms_env.c
crypto/cms/cms_err.c
crypto/cms/cms_kari.c [new file with mode: 0644]
crypto/cms/cms_lcl.h
crypto/cms/cms_lib.c
crypto/cms/cms_sd.c
crypto/cms/cms_smime.c
crypto/cryptlib.c
crypto/crypto-lib.com
crypto/cversion.c
crypto/des/Makefile
crypto/des/asm/des-586.pl
crypto/des/asm/des_enc.m4
crypto/des/asm/dest4-sparcv9.pl [new file with mode: 0644]
crypto/des/des_locl.h
crypto/des/read_pwd.c
crypto/dh/Makefile
crypto/dh/dh.h
crypto/dh/dh_ameth.c
crypto/dh/dh_asn1.c
crypto/dh/dh_check.c
crypto/dh/dh_err.c
crypto/dh/dh_kdf.c [new file with mode: 0644]
crypto/dh/dh_key.c
crypto/dh/dh_pmeth.c
crypto/dh/dh_rfc5114.c [new file with mode: 0644]
crypto/dh/dhtest.c
crypto/dsa/dsa.h
crypto/dsa/dsa_ameth.c
crypto/dsa/dsa_err.c
crypto/dsa/dsa_gen.c
crypto/dsa/dsa_locl.h
crypto/dsa/dsa_ossl.c
crypto/dsa/dsa_pmeth.c
crypto/dso/dso_win32.c
crypto/ebcdic.c
crypto/ec/Makefile
crypto/ec/asm/ecp_nistz256-avx2.pl [new file with mode: 0755]
crypto/ec/asm/ecp_nistz256-x86_64.pl [new file with mode: 0755]
crypto/ec/ec.h
crypto/ec/ec_ameth.c
crypto/ec/ec_curve.c
crypto/ec/ec_cvt.c
crypto/ec/ec_err.c
crypto/ec/ec_lcl.h
crypto/ec/ec_lib.c
crypto/ec/ec_pmeth.c
crypto/ec/eck_prn.c
crypto/ec/ecp_nistp521.c
crypto/ec/ecp_nistz256.c [new file with mode: 0644]
crypto/ec/ecp_nistz256_table.c [new file with mode: 0644]
crypto/ecdh/Makefile
crypto/ecdh/ecdh.h
crypto/ecdh/ecdhtest.c
crypto/ecdh/ech_kdf.c [new file with mode: 0644]
crypto/ecdh/ech_ossl.c
crypto/ecdsa/ecdsa.h
crypto/ecdsa/ecs_err.c
crypto/ecdsa/ecs_lib.c
crypto/ecdsa/ecs_locl.h
crypto/ecdsa/ecs_ossl.c
crypto/engine/Makefile
crypto/engine/eng_all.c
crypto/engine/eng_cryptodev.c
crypto/engine/eng_rsax.c [deleted file]
crypto/engine/engine.h
crypto/err/openssl.ec
crypto/evp/Makefile
crypto/evp/c_allc.c
crypto/evp/digest.c
crypto/evp/e_aes.c
crypto/evp/e_aes_cbc_hmac_sha1.c
crypto/evp/e_aes_cbc_hmac_sha256.c [new file with mode: 0644]
crypto/evp/e_camellia.c
crypto/evp/e_des.c
crypto/evp/e_des3.c
crypto/evp/e_null.c
crypto/evp/encode.c
crypto/evp/evp.h
crypto/evp/evp_enc.c
crypto/evp/evp_err.c
crypto/evp/evp_extra_test.c
crypto/evp/evp_fips.c [deleted file]
crypto/evp/evp_lib.c
crypto/evp/evp_locl.h
crypto/evp/evp_test.c
crypto/evp/evptests.txt
crypto/evp/m_dss.c
crypto/evp/m_dss1.c
crypto/evp/m_ecdsa.c
crypto/evp/m_sha1.c
crypto/evp/m_sigver.c
crypto/evp/p_lib.c
crypto/evp/pmeth_lib.c
crypto/hmac/hm_ameth.c
crypto/hmac/hmac.c
crypto/hmac/hmactest.c
crypto/install-crypto.com
crypto/jpake/jpake.c
crypto/md32_common.h
crypto/md5/Makefile
crypto/md5/asm/md5-sparcv9.pl [new file with mode: 0644]
crypto/md5/md5_locl.h
crypto/modes/Makefile
crypto/modes/asm/aesni-gcm-x86_64.pl [new file with mode: 0644]
crypto/modes/asm/ghash-armv4.pl
crypto/modes/asm/ghash-s390x.pl
crypto/modes/asm/ghash-sparcv9.pl
crypto/modes/asm/ghash-x86.pl
crypto/modes/asm/ghash-x86_64.pl
crypto/modes/asm/ghashp8-ppc.pl [new file with mode: 0755]
crypto/modes/asm/ghashv8-armx.pl [new file with mode: 0644]
crypto/modes/cbc128.c
crypto/modes/gcm128.c
crypto/modes/modes.h
crypto/modes/modes_lcl.h
crypto/modes/wrap128.c [new file with mode: 0644]
crypto/o_str.c
crypto/o_time.c
crypto/o_time.h
crypto/objects/obj_dat.h
crypto/objects/obj_mac.h
crypto/objects/obj_mac.num
crypto/objects/obj_xref.h
crypto/objects/obj_xref.txt
crypto/objects/objects.txt
crypto/objects/objxref.pl
crypto/ocsp/ocsp.h
crypto/ocsp/ocsp_ht.c
crypto/ocsp/ocsp_lib.c
crypto/opensslconf.h
crypto/opensslv.h
crypto/ossl_typ.h
crypto/pem/Makefile
crypto/pem/pem.h
crypto/pem/pem_all.c
crypto/pem/pem_err.c
crypto/pem/pem_lib.c
crypto/pem/pem_pkey.c
crypto/perlasm/ppc-xlate.pl
crypto/perlasm/sparcv9_modes.pl [new file with mode: 0644]
crypto/perlasm/x86_64-xlate.pl
crypto/perlasm/x86asm.pl
crypto/perlasm/x86gas.pl
crypto/perlasm/x86masm.pl
crypto/perlasm/x86nasm.pl
crypto/pkcs12/p12_decr.c
crypto/pkcs12/p12_p8e.c
crypto/ppc_arch.h [new file with mode: 0644]
crypto/ppccap.c
crypto/ppccpuid.pl
crypto/rand/rand_win.c
crypto/rc4/Makefile
crypto/rc4/asm/rc4-586.pl
crypto/rc4/rc4_enc.c
crypto/rc5/rc5_locl.h
crypto/rsa/Makefile
crypto/rsa/rsa.h
crypto/rsa/rsa_ameth.c
crypto/rsa/rsa_asn1.c
crypto/rsa/rsa_err.c
crypto/rsa/rsa_oaep.c
crypto/rsa/rsa_pmeth.c
crypto/rsa/rsa_sign.c
crypto/sha/Makefile
crypto/sha/asm/sha1-586.pl
crypto/sha/asm/sha1-armv4-large.pl
crypto/sha/asm/sha1-armv8.pl [new file with mode: 0644]
crypto/sha/asm/sha1-mb-x86_64.pl [new file with mode: 0644]
crypto/sha/asm/sha1-mips.pl
crypto/sha/asm/sha1-ppc.pl
crypto/sha/asm/sha1-sparcv9.pl
crypto/sha/asm/sha1-x86_64.pl
crypto/sha/asm/sha256-586.pl
crypto/sha/asm/sha256-armv4.pl
crypto/sha/asm/sha256-mb-x86_64.pl [new file with mode: 0644]
crypto/sha/asm/sha512-586.pl
crypto/sha/asm/sha512-armv4.pl
crypto/sha/asm/sha512-armv8.pl [new file with mode: 0644]
crypto/sha/asm/sha512-ia64.pl
crypto/sha/asm/sha512-mips.pl
crypto/sha/asm/sha512-ppc.pl
crypto/sha/asm/sha512-sparcv9.pl
crypto/sha/asm/sha512-x86_64.pl
crypto/sha/asm/sha512p8-ppc.pl [new file with mode: 0755]
crypto/sha/sha512.c
crypto/sparc_arch.h [new file with mode: 0644]
crypto/sparccpuid.S
crypto/sparcv9cap.c
crypto/srp/Makefile
crypto/srp/srptest.c
crypto/stack/safestack.h
crypto/stack/stack.c
crypto/stack/stack.h
crypto/symhacks.h
crypto/ts/ts_rsp_sign.c
crypto/ts/ts_rsp_verify.c
crypto/ui/ui_openssl.c
crypto/whrlpool/asm/wp-mmx.pl
crypto/whrlpool/asm/wp-x86_64.pl
crypto/x509/Makefile
crypto/x509/verify_extra_test.c
crypto/x509/vpm_int.h [new file with mode: 0644]
crypto/x509/x509.h
crypto/x509/x509_cmp.c
crypto/x509/x509_err.c
crypto/x509/x509_lu.c
crypto/x509/x509_set.c
crypto/x509/x509_trs.c
crypto/x509/x509_txt.c
crypto/x509/x509_vfy.c
crypto/x509/x509_vfy.h
crypto/x509/x509_vpm.c
crypto/x509/x_all.c
crypto/x509v3/Makefile
crypto/x509v3/ext_dat.h
crypto/x509v3/v3_lib.c
crypto/x509v3/v3_purp.c
crypto/x509v3/v3_scts.c [new file with mode: 0644]
crypto/x509v3/v3_utl.c
crypto/x509v3/v3err.c
crypto/x509v3/v3nametest.c [new file with mode: 0644]
crypto/x509v3/x509v3.h
crypto/x86_64cpuid.pl
crypto/x86cpuid.pl
demos/bio/Makefile
demos/bio/README
demos/bio/accept.cnf [new file with mode: 0644]
demos/bio/client-arg.c [new file with mode: 0644]
demos/bio/client-conf.c [new file with mode: 0644]
demos/bio/connect.cnf [new file with mode: 0644]
demos/bio/saccept.c
demos/bio/server-arg.c [new file with mode: 0644]
demos/bio/server-conf.c [new file with mode: 0644]
demos/bio/server.pem
doc/apps/c_rehash.pod
doc/apps/ciphers.pod
doc/apps/cms.pod
doc/apps/genpkey.pod
doc/apps/ocsp.pod
doc/apps/pkcs8.pod
doc/apps/req.pod
doc/apps/s_client.pod
doc/apps/s_server.pod
doc/apps/smime.pod
doc/apps/verify.pod
doc/apps/x509.pod
doc/crypto/ASN1_STRING_length.pod
doc/crypto/ASN1_STRING_print_ex.pod
doc/crypto/ASN1_TIME_set.pod [new file with mode: 0644]
doc/crypto/BIO_f_ssl.pod
doc/crypto/BIO_find_type.pod
doc/crypto/BIO_s_accept.pod
doc/crypto/BIO_s_connect.pod
doc/crypto/BN_BLINDING_new.pod
doc/crypto/BN_CTX_new.pod
doc/crypto/BN_generate_prime.pod
doc/crypto/BN_rand.pod
doc/crypto/CMS_add0_cert.pod
doc/crypto/CMS_get0_RecipientInfos.pod
doc/crypto/CMS_get0_SignerInfos.pod
doc/crypto/CMS_verify.pod
doc/crypto/DH_generate_parameters.pod
doc/crypto/DSA_generate_parameters.pod
doc/crypto/EC_GFp_simple_method.pod [new file with mode: 0644]
doc/crypto/EC_GROUP_copy.pod [new file with mode: 0644]
doc/crypto/EC_GROUP_new.pod [new file with mode: 0644]
doc/crypto/EC_KEY_new.pod [new file with mode: 0644]
doc/crypto/EC_POINT_add.pod [new file with mode: 0644]
doc/crypto/EC_POINT_new.pod [new file with mode: 0644]
doc/crypto/ERR_remove_state.pod
doc/crypto/EVP_BytesToKey.pod
doc/crypto/EVP_DigestInit.pod
doc/crypto/EVP_DigestVerifyInit.pod
doc/crypto/EVP_EncryptInit.pod
doc/crypto/EVP_PKEY_CTX_ctrl.pod
doc/crypto/EVP_PKEY_cmp.pod
doc/crypto/OPENSSL_VERSION_NUMBER.pod
doc/crypto/OPENSSL_config.pod
doc/crypto/OPENSSL_ia32cap.pod
doc/crypto/OPENSSL_instrument_bus.pod [new file with mode: 0644]
doc/crypto/OPENSSL_load_builtin_modules.pod
doc/crypto/OpenSSL_add_all_algorithms.pod
doc/crypto/PKCS7_verify.pod
doc/crypto/RAND_egd.pod
doc/crypto/RSA_generate_key.pod
doc/crypto/SSLeay_version.pod [new file with mode: 0644]
doc/crypto/X509_NAME_add_entry_by_txt.pod
doc/crypto/X509_STORE_CTX_get_error.pod
doc/crypto/X509_VERIFY_PARAM_set_flags.pod
doc/crypto/X509_check_host.pod [new file with mode: 0644]
doc/crypto/crypto.pod
doc/crypto/d2i_DSAPublicKey.pod
doc/crypto/d2i_ECPKParameters.pod [new file with mode: 0644]
doc/crypto/d2i_X509.pod
doc/crypto/d2i_X509_CRL.pod
doc/crypto/ec.pod [new file with mode: 0644]
doc/crypto/ecdsa.pod
doc/crypto/evp.pod
doc/crypto/hmac.pod
doc/crypto/i2d_PKCS7_bio_stream.pod
doc/crypto/rand.pod
doc/crypto/sha.pod
doc/ssl/SSL_CIPHER_get_name.pod
doc/ssl/SSL_COMP_add_compression_method.pod
doc/ssl/SSL_CONF_CTX_new.pod [new file with mode: 0644]
doc/ssl/SSL_CONF_CTX_set1_prefix.pod [new file with mode: 0644]
doc/ssl/SSL_CONF_CTX_set_flags.pod [new file with mode: 0644]
doc/ssl/SSL_CONF_CTX_set_ssl_ctx.pod [new file with mode: 0644]
doc/ssl/SSL_CONF_cmd.pod [new file with mode: 0644]
doc/ssl/SSL_CONF_cmd_argv.pod [new file with mode: 0644]
doc/ssl/SSL_CTX_add1_chain_cert.pod [new file with mode: 0644]
doc/ssl/SSL_CTX_add_extra_chain_cert.pod
doc/ssl/SSL_CTX_get0_param.pod [new file with mode: 0644]
doc/ssl/SSL_CTX_sess_set_cache_size.pod
doc/ssl/SSL_CTX_set1_curves.pod [new file with mode: 0644]
doc/ssl/SSL_CTX_set1_verify_cert_store.pod [new file with mode: 0644]
doc/ssl/SSL_CTX_set_cert_cb.pod [new file with mode: 0644]
doc/ssl/SSL_CTX_set_cert_store.pod
doc/ssl/SSL_CTX_set_cipher_list.pod
doc/ssl/SSL_CTX_set_custom_cli_ext.pod [new file with mode: 0644]
doc/ssl/SSL_CTX_set_tmp_rsa_callback.pod
doc/ssl/SSL_CTX_use_certificate.pod
doc/ssl/SSL_CTX_use_psk_identity_hint.pod
doc/ssl/SSL_CTX_use_serverinfo.pod [new file with mode: 0644]
doc/ssl/SSL_accept.pod
doc/ssl/SSL_do_handshake.pod
doc/ssl/SSL_shutdown.pod
doc/ssl/ssl.pod
doc/ssleay.txt
e_os.h
e_os2.h
engines/Makefile
engines/ccgost/Makefile
engines/ccgost/gost89.c
engines/ccgost/gost_crypt.c
engines/ccgost/gost_pmeth.c
engines/e_capi.c
engines/makeengines.com
engines/vendor_defns/hwcryptohook.h
makevms.com
openssl.spec
ssl/Makefile
ssl/d1_both.c
ssl/d1_clnt.c
ssl/d1_enc.c [deleted file]
ssl/d1_lib.c
ssl/d1_meth.c
ssl/d1_pkt.c
ssl/d1_srtp.c
ssl/d1_srvr.c
ssl/dtls1.h
ssl/heartbeat_test.c
ssl/s23_clnt.c
ssl/s23_srvr.c
ssl/s2_clnt.c
ssl/s2_lib.c
ssl/s3_both.c
ssl/s3_cbc.c
ssl/s3_clnt.c
ssl/s3_enc.c
ssl/s3_lib.c
ssl/s3_pkt.c
ssl/s3_srvr.c
ssl/srtp.h
ssl/ssl-lib.com
ssl/ssl.h
ssl/ssl3.h
ssl/ssl_algs.c
ssl/ssl_cert.c
ssl/ssl_ciph.c
ssl/ssl_conf.c [new file with mode: 0644]
ssl/ssl_err.c
ssl/ssl_lib.c
ssl/ssl_locl.h
ssl/ssl_rsa.c
ssl/ssl_sess.c
ssl/ssl_txt.c
ssl/ssltest.c
ssl/t1_clnt.c
ssl/t1_enc.c
ssl/t1_ext.c [new file with mode: 0644]
ssl/t1_lib.c
ssl/t1_meth.c
ssl/t1_srvr.c
ssl/t1_trce.c [new file with mode: 0644]
ssl/tls1.h
test/Makefile
test/cms-test.pl
test/evptests.txt
test/maketests.com
test/ocsp-tests/D1.ors [new file with mode: 0644]
test/ocsp-tests/D1_Cert_EE.pem [new file with mode: 0644]
test/ocsp-tests/D1_Issuer_ICA.pem [new file with mode: 0644]
test/ocsp-tests/D2.ors [new file with mode: 0644]
test/ocsp-tests/D2_Cert_ICA.pem [new file with mode: 0644]
test/ocsp-tests/D2_Issuer_Root.pem [new file with mode: 0644]
test/ocsp-tests/D3.ors [new file with mode: 0644]
test/ocsp-tests/D3_Cert_EE.pem [new file with mode: 0644]
test/ocsp-tests/D3_Issuer_Root.pem [new file with mode: 0644]
test/ocsp-tests/ISDOSC_D1.ors [new file with mode: 0644]
test/ocsp-tests/ISDOSC_D2.ors [new file with mode: 0644]
test/ocsp-tests/ISDOSC_D3.ors [new file with mode: 0644]
test/ocsp-tests/ISIC_D1_Issuer_ICA.pem [new file with mode: 0644]
test/ocsp-tests/ISIC_D2_Issuer_Root.pem [new file with mode: 0644]
test/ocsp-tests/ISIC_D3_Issuer_Root.pem [new file with mode: 0644]
test/ocsp-tests/ISIC_ND1_Issuer_ICA.pem [new file with mode: 0644]
test/ocsp-tests/ISIC_ND2_Issuer_Root.pem [new file with mode: 0644]
test/ocsp-tests/ISIC_ND3_Issuer_Root.pem [new file with mode: 0644]
test/ocsp-tests/ISOP_D1.ors [new file with mode: 0644]
test/ocsp-tests/ISOP_D2.ors [new file with mode: 0644]
test/ocsp-tests/ISOP_D3.ors [new file with mode: 0644]
test/ocsp-tests/ISOP_ND1.ors [new file with mode: 0644]
test/ocsp-tests/ISOP_ND2.ors [new file with mode: 0644]
test/ocsp-tests/ISOP_ND3.ors [new file with mode: 0644]
test/ocsp-tests/ND1.ors [new file with mode: 0644]
test/ocsp-tests/ND1_Cert_EE.pem [new file with mode: 0644]
test/ocsp-tests/ND1_Issuer_ICA.pem [new file with mode: 0644]
test/ocsp-tests/ND2.ors [new file with mode: 0644]
test/ocsp-tests/ND2_Cert_ICA.pem [new file with mode: 0644]
test/ocsp-tests/ND2_Issuer_Root.pem [new file with mode: 0644]
test/ocsp-tests/ND3.ors [new file with mode: 0644]
test/ocsp-tests/ND3_Cert_EE.pem [new file with mode: 0644]
test/ocsp-tests/ND3_Issuer_Root.pem [new file with mode: 0644]
test/ocsp-tests/WIKH_D1.ors [new file with mode: 0644]
test/ocsp-tests/WIKH_D2.ors [new file with mode: 0644]
test/ocsp-tests/WIKH_D3.ors [new file with mode: 0644]
test/ocsp-tests/WIKH_ND1.ors [new file with mode: 0644]
test/ocsp-tests/WIKH_ND2.ors [new file with mode: 0644]
test/ocsp-tests/WIKH_ND3.ors [new file with mode: 0644]
test/ocsp-tests/WINH_D1.ors [new file with mode: 0644]
test/ocsp-tests/WINH_D2.ors [new file with mode: 0644]
test/ocsp-tests/WINH_D3.ors [new file with mode: 0644]
test/ocsp-tests/WINH_ND1.ors [new file with mode: 0644]
test/ocsp-tests/WINH_ND2.ors [new file with mode: 0644]
test/ocsp-tests/WINH_ND3.ors [new file with mode: 0644]
test/ocsp-tests/WKDOSC_D1.ors [new file with mode: 0644]
test/ocsp-tests/WKDOSC_D2.ors [new file with mode: 0644]
test/ocsp-tests/WKDOSC_D3.ors [new file with mode: 0644]
test/ocsp-tests/WKIC_D1_Issuer_ICA.pem [new file with mode: 0644]
test/ocsp-tests/WKIC_D2_Issuer_Root.pem [new file with mode: 0644]
test/ocsp-tests/WKIC_D3_Issuer_Root.pem [new file with mode: 0644]
test/ocsp-tests/WKIC_ND1_Issuer_ICA.pem [new file with mode: 0644]
test/ocsp-tests/WKIC_ND2_Issuer_Root.pem [new file with mode: 0644]
test/ocsp-tests/WKIC_ND3_Issuer_Root.pem [new file with mode: 0644]
test/ocsp-tests/WRID_D1.ors [new file with mode: 0644]
test/ocsp-tests/WRID_D2.ors [new file with mode: 0644]
test/ocsp-tests/WRID_D3.ors [new file with mode: 0644]
test/ocsp-tests/WRID_ND1.ors [new file with mode: 0644]
test/ocsp-tests/WRID_ND2.ors [new file with mode: 0644]
test/ocsp-tests/WRID_ND3.ors [new file with mode: 0644]
test/ocsp-tests/WSNIC_D1_Issuer_ICA.pem [new file with mode: 0644]
test/ocsp-tests/WSNIC_D2_Issuer_Root.pem [new file with mode: 0644]
test/ocsp-tests/WSNIC_D3_Issuer_Root.pem [new file with mode: 0644]
test/ocsp-tests/WSNIC_ND1_Issuer_ICA.pem [new file with mode: 0644]
test/ocsp-tests/WSNIC_ND2_Issuer_Root.pem [new file with mode: 0644]
test/ocsp-tests/WSNIC_ND3_Issuer_Root.pem [new file with mode: 0644]
test/serverinfo.pem [new file with mode: 0644]
test/smime-certs/ca.cnf [new file with mode: 0644]
test/smime-certs/mksmime-certs.sh [new file with mode: 0644]
test/smime-certs/smdh.pem [new file with mode: 0644]
test/smime-certs/smdsa1.pem
test/smime-certs/smdsa2.pem
test/smime-certs/smdsa3.pem
test/smime-certs/smec1.pem [new file with mode: 0644]
test/smime-certs/smec2.pem [new file with mode: 0644]
test/smime-certs/smroot.pem
test/smime-certs/smrsa1.pem
test/smime-certs/smrsa2.pem
test/smime-certs/smrsa3.pem
test/tcrl
test/testenc
test/tests.com
test/testssl
test/tocsp [new file with mode: 0644]
test/tocsp.com [new file with mode: 0644]
test/tpkcs7
test/tpkcs7d
test/treq
test/trsa
test/tsid
test/tx509
test/v3nametest.c [new symlink]
tools/c_rehash
tools/c_rehash.in
util/copy-if-different.pl [new file with mode: 0644]
util/files.pl
util/libeay.num
util/mk1mf.pl
util/mkdef.pl
util/mkerr.pl
util/mkstack.pl
util/pl/BC-32.pl
util/pl/VC-32.pl
util/pl/unix.pl
util/ssleay.num

diff --git a/CHANGES b/CHANGES
index 2e888f7..5d4c234 100644 (file)
--- a/CHANGES
+++ b/CHANGES
@@ -2,7 +2,7 @@
  OpenSSL CHANGES
  _______________
 
- Changes between 1.0.1o and 1.0.1p [9 Jul 2015]
+ Changes between 1.0.2c and 1.0.2d [9 Jul 2015]
 
   *) Alternate chains certificate forgery
 
      (Google/BoringSSL).
      [Matt Caswell]
 
- Changes between 1.0.1n and 1.0.1o [12 Jun 2015]
+ Changes between 1.0.2b and 1.0.2c [12 Jun 2015]
 
   *) Fix HMAC ABI incompatibility. The previous version introduced an ABI
      incompatibility in the handling of HMAC. The previous ABI has now been
      restored.
 
- Changes between 1.0.1m and 1.0.1n [11 Jun 2015]
+ Changes between 1.0.2a and 1.0.2b [11 Jun 2015]
 
   *) Malformed ECParameters causes infinite loop
 
      (CVE-2015-1791)
      [Matt Caswell]
 
+  *) Removed support for the two export grade static DH ciphersuites
+     EXP-DH-RSA-DES-CBC-SHA and EXP-DH-DSS-DES-CBC-SHA. These two ciphersuites
+     were newly added (along with a number of other static DH ciphersuites) to
+     1.0.2. However the two export ones have *never* worked since they were
+     introduced. It seems strange in any case to be adding new export
+     ciphersuites, and given "logjam" it also does not seem correct to fix them.
+     [Matt Caswell]
+
+  *) Only support 256-bit or stronger elliptic curves with the
+     'ecdh_auto' setting (server) or by default (client). Of supported
+     curves, prefer P-256 (both).
+     [Emilia Kasper]
+
   *) Reject DH handshakes with parameters shorter than 768 bits.
      [Kurt Roeckx and Emilia Kasper]
 
- Changes between 1.0.1l and 1.0.1m [19 Mar 2015]
+ Changes between 1.0.2 and 1.0.2a [19 Mar 2015]
+
+  *) ClientHello sigalgs DoS fix
+
+     If a client connects to an OpenSSL 1.0.2 server and renegotiates with an
+     invalid signature algorithms extension a NULL pointer dereference will
+     occur. This can be exploited in a DoS attack against the server.
+
+     This issue was was reported to OpenSSL by David Ramos of Stanford
+     University.
+     (CVE-2015-0291)
+     [Stephen Henson and Matt Caswell]
+
+  *) Multiblock corrupted pointer fix
+
+     OpenSSL 1.0.2 introduced the "multiblock" performance improvement. This
+     feature only applies on 64 bit x86 architecture platforms that support AES
+     NI instructions. A defect in the implementation of "multiblock" can cause
+     OpenSSL's internal write buffer to become incorrectly set to NULL when
+     using non-blocking IO. Typically, when the user application is using a
+     socket BIO for writing, this will only result in a failed connection.
+     However if some other BIO is used then it is likely that a segmentation
+     fault will be triggered, thus enabling a potential DoS attack.
+
+     This issue was reported to OpenSSL by Daniel Danner and Rainer Mueller.
+     (CVE-2015-0290)
+     [Matt Caswell]
+
+  *) Segmentation fault in DTLSv1_listen fix
+
+     The DTLSv1_listen function is intended to be stateless and processes the
+     initial ClientHello from many peers. It is common for user code to loop
+     over the call to DTLSv1_listen until a valid ClientHello is received with
+     an associated cookie. A defect in the implementation of DTLSv1_listen means
+     that state is preserved in the SSL object from one invocation to the next
+     that can lead to a segmentation fault. Errors processing the initial
+     ClientHello can trigger this scenario. An example of such an error could be
+     that a DTLS1.0 only client is attempting to connect to a DTLS1.2 only
+     server.
+
+     This issue was reported to OpenSSL by Per Allansson.
+     (CVE-2015-0207)
+     [Matt Caswell]
 
   *) Segmentation fault in ASN1_TYPE_cmp fix
 
      (CVE-2015-0286)
      [Stephen Henson]
 
+  *) Segmentation fault for invalid PSS parameters fix
+
+     The signature verification routines will crash with a NULL pointer
+     dereference if presented with an ASN.1 signature using the RSA PSS
+     algorithm and invalid parameters. Since these routines are used to verify
+     certificate signature algorithms this can be used to crash any
+     certificate verification operation and exploited in a DoS attack. Any
+     application which performs certificate verification is vulnerable including
+     OpenSSL clients and servers which enable client authentication.
+
+     This issue was was reported to OpenSSL by Brian Carpenter.
+     (CVE-2015-0208)
+     [Stephen Henson]
+
   *) ASN.1 structure reuse memory corruption fix
 
      Reusing a structure in ASN.1 parsing may allow an attacker to cause
      (CVE-2015-0293)
      [Emilia Käsper]
 
+  *) Empty CKE with client auth and DHE fix
+
+     If client auth is used then a server can seg fault in the event of a DHE
+     ciphersuite being selected and a zero length ClientKeyExchange message
+     being sent by the client. This could be exploited in a DoS attack.
+     (CVE-2015-1787)
+     [Matt Caswell]
+
+  *) Handshake with unseeded PRNG fix
+
+     Under certain conditions an OpenSSL 1.0.2 client can complete a handshake
+     with an unseeded PRNG. The conditions are:
+     - The client is on a platform where the PRNG has not been seeded
+     automatically, and the user has not seeded manually
+     - A protocol specific client method version has been used (i.e. not
+     SSL_client_methodv23)
+     - A ciphersuite is used that does not require additional random data from
+     the PRNG beyond the initial ClientHello client random (e.g. PSK-RC4-SHA).
+
+     If the handshake succeeds then the client random that has been used will
+     have been generated from a PRNG with insufficient entropy and therefore the
+     output may be predictable.
+
+     For example using the following command with an unseeded openssl will
+     succeed on an unpatched platform:
+
+     openssl s_client -psk 1a2b3c4d -tls1_2 -cipher PSK-RC4-SHA
+     (CVE-2015-0285)
+     [Matt Caswell]
+
   *) Use After Free following d2i_ECPrivatekey error fix
 
      A malformed EC private key file consumed via the d2i_ECPrivateKey function
   *) Removed the export ciphers from the DEFAULT ciphers
      [Kurt Roeckx]
 
+ Changes between 1.0.1l and 1.0.2 [22 Jan 2015]
+
+  *) Facilitate "universal" ARM builds targeting range of ARM ISAs, e.g.
+     ARMv5 through ARMv8, as opposite to "locking" it to single one.
+     So far those who have to target multiple plaforms would compromise
+     and argue that binary targeting say ARMv5 would still execute on
+     ARMv8. "Universal" build resolves this compromise by providing
+     near-optimal performance even on newer platforms.
+     [Andy Polyakov]
+
+  *) Accelerated NIST P-256 elliptic curve implementation for x86_64
+     (other platforms pending).
+     [Shay Gueron & Vlad Krasnov (Intel Corp), Andy Polyakov]
+
+  *) Add support for the SignedCertificateTimestampList certificate and
+     OCSP response extensions from RFC6962.
+     [Rob Stradling]
+
+  *) Fix ec_GFp_simple_points_make_affine (thus, EC_POINTs_mul etc.)
+     for corner cases. (Certain input points at infinity could lead to
+     bogus results, with non-infinity inputs mapped to infinity too.)
+     [Bodo Moeller]
+
+  *) Initial support for PowerISA 2.0.7, first implemented in POWER8.
+     This covers AES, SHA256/512 and GHASH. "Initial" means that most
+     common cases are optimized and there still is room for further
+     improvements. Vector Permutation AES for Altivec is also added.
+     [Andy Polyakov]
+
+  *) Add support for little-endian ppc64 Linux target.
+     [Marcelo Cerri (IBM)]
+
+  *) Initial support for AMRv8 ISA crypto extensions. This covers AES,
+     SHA1, SHA256 and GHASH. "Initial" means that most common cases
+     are optimized and there still is room for further improvements.
+     Both 32- and 64-bit modes are supported.
+     [Andy Polyakov, Ard Biesheuvel (Linaro)]
+
+  *) Improved ARMv7 NEON support.
+     [Andy Polyakov]
+
+  *) Support for SPARC Architecture 2011 crypto extensions, first
+     implemented in SPARC T4. This covers AES, DES, Camellia, SHA1,
+     SHA256/512, MD5, GHASH and modular exponentiation.
+     [Andy Polyakov, David Miller]
+
+  *) Accelerated modular exponentiation for Intel processors, a.k.a.
+     RSAZ.
+     [Shay Gueron & Vlad Krasnov (Intel Corp)]
+
+  *) Support for new and upcoming Intel processors, including AVX2,
+     BMI and SHA ISA extensions. This includes additional "stitched"
+     implementations, AESNI-SHA256 and GCM, and multi-buffer support
+     for TLS encrypt.
+
+     This work was sponsored by Intel Corp.
+     [Andy Polyakov]
+
+  *) Support for DTLS 1.2. This adds two sets of DTLS methods: DTLS_*_method()
+     supports both DTLS 1.2 and 1.0 and should use whatever version the peer
+     supports and DTLSv1_2_*_method() which supports DTLS 1.2 only.
+     [Steve Henson]
+
+  *) Use algorithm specific chains in SSL_CTX_use_certificate_chain_file():
+     this fixes a limiation in previous versions of OpenSSL.
+     [Steve Henson]
+
+  *) Extended RSA OAEP support via EVP_PKEY API. Options to specify digest,
+     MGF1 digest and OAEP label.
+     [Steve Henson]
+
+  *) Add EVP support for key wrapping algorithms, to avoid problems with
+     existing code the flag EVP_CIPHER_CTX_WRAP_ALLOW has to be set in
+     the EVP_CIPHER_CTX or an error is returned. Add AES and DES3 wrap
+     algorithms and include tests cases.
+     [Steve Henson]
+
+  *) Add functions to allocate and set the fields of an ECDSA_METHOD
+     structure.
+     [Douglas E. Engert, Steve Henson]
+
+  *) New functions OPENSSL_gmtime_diff and ASN1_TIME_diff to find the
+     difference in days and seconds between two tm or ASN1_TIME structures.
+     [Steve Henson]
+
+  *) Add -rev test option to s_server to just reverse order of characters
+     received by client and send back to server. Also prints an abbreviated
+     summary of the connection parameters.
+     [Steve Henson]
+
+  *) New option -brief for s_client and s_server to print out a brief summary
+     of connection parameters.
+     [Steve Henson]
+
+  *) Add callbacks for arbitrary TLS extensions.
+     [Trevor Perrin <trevp@trevp.net> and Ben Laurie]
+
+  *) New option -crl_download in several openssl utilities to download CRLs
+     from CRLDP extension in certificates.
+     [Steve Henson]
+
+  *) New options -CRL and -CRLform for s_client and s_server for CRLs.
+     [Steve Henson]
+
+  *) New function X509_CRL_diff to generate a delta CRL from the difference
+     of two full CRLs. Add support to "crl" utility.
+     [Steve Henson]
+
+  *) New functions to set lookup_crls function and to retrieve
+     X509_STORE from X509_STORE_CTX.
+     [Steve Henson]
+
+  *) Print out deprecated issuer and subject unique ID fields in
+     certificates.
+     [Steve Henson]
+
+  *) Extend OCSP I/O functions so they can be used for simple general purpose
+     HTTP as well as OCSP. New wrapper function which can be used to download
+     CRLs using the OCSP API.
+     [Steve Henson]
+
+  *) Delegate command line handling in s_client/s_server to SSL_CONF APIs.
+     [Steve Henson]
+
+  *) SSL_CONF* functions. These provide a common framework for application
+     configuration using configuration files or command lines.
+     [Steve Henson]
+
+  *) SSL/TLS tracing code. This parses out SSL/TLS records using the
+     message callback and prints the results. Needs compile time option
+     "enable-ssl-trace". New options to s_client and s_server to enable
+     tracing.
+     [Steve Henson]
+
+  *) New ctrl and macro to retrieve supported points extensions.
+     Print out extension in s_server and s_client.
+     [Steve Henson]
+
+  *) New functions to retrieve certificate signature and signature
+     OID NID.
+     [Steve Henson]
+
+  *) Add functions to retrieve and manipulate the raw cipherlist sent by a
+     client to OpenSSL.
+     [Steve Henson]
+
+  *) New Suite B modes for TLS code. These use and enforce the requirements
+     of RFC6460: restrict ciphersuites, only permit Suite B algorithms and
+     only use Suite B curves. The Suite B modes can be set by using the
+     strings "SUITEB128", "SUITEB192" or "SUITEB128ONLY" for the cipherstring.
+     [Steve Henson]
+
+  *) New chain verification flags for Suite B levels of security. Check
+     algorithms are acceptable when flags are set in X509_verify_cert.
+     [Steve Henson]
+
+  *) Make tls1_check_chain return a set of flags indicating checks passed
+     by a certificate chain. Add additional tests to handle client
+     certificates: checks for matching certificate type and issuer name
+     comparison.
+     [Steve Henson]
+
+  *) If an attempt is made to use a signature algorithm not in the peer
+     preference list abort the handshake. If client has no suitable
+     signature algorithms in response to a certificate request do not
+     use the certificate.
+     [Steve Henson]
+
+  *) If server EC tmp key is not in client preference list abort handshake.
+     [Steve Henson]
+
+  *) Add support for certificate stores in CERT structure. This makes it
+     possible to have different stores per SSL structure or one store in
+     the parent SSL_CTX. Include distint stores for certificate chain
+     verification and chain building. New ctrl SSL_CTRL_BUILD_CERT_CHAIN
+     to build and store a certificate chain in CERT structure: returing
+     an error if the chain cannot be built: this will allow applications
+     to test if a chain is correctly configured.
+
+     Note: if the CERT based stores are not set then the parent SSL_CTX
+     store is used to retain compatibility with existing behaviour.
+
+     [Steve Henson]
+
+  *) New function ssl_set_client_disabled to set a ciphersuite disabled
+     mask based on the current session, check mask when sending client
+     hello and checking the requested ciphersuite.
+     [Steve Henson]
+
+  *) New ctrls to retrieve and set certificate types in a certificate
+     request message. Print out received values in s_client. If certificate
+     types is not set with custom values set sensible values based on
+     supported signature algorithms.
+     [Steve Henson]
+
+  *) Support for distinct client and server supported signature algorithms.
+     [Steve Henson]
+
+  *) Add certificate callback. If set this is called whenever a certificate
+     is required by client or server. An application can decide which
+     certificate chain to present based on arbitrary criteria: for example
+     supported signature algorithms. Add very simple example to s_server.
+     This fixes many of the problems and restrictions of the existing client
+     certificate callback: for example you can now clear an existing
+     certificate and specify the whole chain.
+     [Steve Henson]
+
+  *) Add new "valid_flags" field to CERT_PKEY structure which determines what
+     the certificate can be used for (if anything). Set valid_flags field 
+     in new tls1_check_chain function. Simplify ssl_set_cert_masks which used
+     to have similar checks in it.
+
+     Add new "cert_flags" field to CERT structure and include a "strict mode".
+     This enforces some TLS certificate requirements (such as only permitting
+     certificate signature algorithms contained in the supported algorithms
+     extension) which some implementations ignore: this option should be used
+     with caution as it could cause interoperability issues.
+     [Steve Henson]
+
+  *) Update and tidy signature algorithm extension processing. Work out
+     shared signature algorithms based on preferences and peer algorithms
+     and print them out in s_client and s_server. Abort handshake if no
+     shared signature algorithms.
+     [Steve Henson]
+
+  *) Add new functions to allow customised supported signature algorithms
+     for SSL and SSL_CTX structures. Add options to s_client and s_server
+     to support them.
+     [Steve Henson]
+
+  *) New function SSL_certs_clear() to delete all references to certificates
+     from an SSL structure. Before this once a certificate had been added
+     it couldn't be removed.
+     [Steve Henson]
+
+  *) Integrate hostname, email address and IP address checking with certificate
+     verification. New verify options supporting checking in opensl utility.
+     [Steve Henson]
+
+  *) Fixes and wildcard matching support to hostname and email checking
+     functions. Add manual page.
+     [Florian Weimer (Red Hat Product Security Team)]
+
+  *) New functions to check a hostname email or IP address against a
+     certificate. Add options x509 utility to print results of checks against
+     a certificate.
+     [Steve Henson]
+
+  *) Fix OCSP checking.
+     [Rob Stradling <rob.stradling@comodo.com> and Ben Laurie]
+
+  *) Initial experimental support for explicitly trusted non-root CAs. 
+     OpenSSL still tries to build a complete chain to a root but if an
+     intermediate CA has a trust setting included that is used. The first
+     setting is used: whether to trust (e.g., -addtrust option to the x509
+     utility) or reject.
+     [Steve Henson]
+
+  *) Add -trusted_first option which attempts to find certificates in the
+     trusted store even if an untrusted chain is also supplied.
+     [Steve Henson]
+
+  *) MIPS assembly pack updates: support for MIPS32r2 and SmartMIPS ASE,
+     platform support for Linux and Android.
+     [Andy Polyakov]
+
+  *) Support for linux-x32, ILP32 environment in x86_64 framework.
+     [Andy Polyakov]
+
+  *) Experimental multi-implementation support for FIPS capable OpenSSL.
+     When in FIPS mode the approved implementations are used as normal,
+     when not in FIPS mode the internal unapproved versions are used instead.
+     This means that the FIPS capable OpenSSL isn't forced to use the
+     (often lower perfomance) FIPS implementations outside FIPS mode.
+     [Steve Henson]
+
+  *) Transparently support X9.42 DH parameters when calling
+     PEM_read_bio_DHparameters. This means existing applications can handle
+     the new parameter format automatically.
+     [Steve Henson]
+
+  *) Initial experimental support for X9.42 DH parameter format: mainly
+     to support use of 'q' parameter for RFC5114 parameters.
+     [Steve Henson]
+
+  *) Add DH parameters from RFC5114 including test data to dhtest.
+     [Steve Henson]
+
+  *) Support for automatic EC temporary key parameter selection. If enabled
+     the most preferred EC parameters are automatically used instead of
+     hardcoded fixed parameters. Now a server just has to call:
+     SSL_CTX_set_ecdh_auto(ctx, 1) and the server will automatically
+     support ECDH and use the most appropriate parameters.
+     [Steve Henson]
+
+  *) Enhance and tidy EC curve and point format TLS extension code. Use
+     static structures instead of allocation if default values are used.
+     New ctrls to set curves we wish to support and to retrieve shared curves.
+     Print out shared curves in s_server. New options to s_server and s_client
+     to set list of supported curves.
+     [Steve Henson]
+
+  *) New ctrls to retrieve supported signature algorithms and 
+     supported curve values as an array of NIDs. Extend openssl utility
+     to print out received values.
+     [Steve Henson]
+
+  *) Add new APIs EC_curve_nist2nid and EC_curve_nid2nist which convert
+     between NIDs and the more common NIST names such as "P-256". Enhance
+     ecparam utility and ECC method to recognise the NIST names for curves.
+     [Steve Henson]
+
+  *) Enhance SSL/TLS certificate chain handling to support different
+     chains for each certificate instead of one chain in the parent SSL_CTX.
+     [Steve Henson]
+
+  *) Support for fixed DH ciphersuite client authentication: where both
+     server and client use DH certificates with common parameters.
+     [Steve Henson]
+
+  *) Support for fixed DH ciphersuites: those requiring DH server
+     certificates.
+     [Steve Henson]
+
+  *) New function i2d_re_X509_tbs for re-encoding the TBS portion of
+     the certificate.
+     Note: Related 1.0.2-beta specific macros X509_get_cert_info,
+     X509_CINF_set_modified, X509_CINF_get_issuer, X509_CINF_get_extensions and
+     X509_CINF_get_signature were reverted post internal team review.
+
  Changes between 1.0.1k and 1.0.1l [15 Jan 2015]
 
   *) Build fixes for the Windows and OpenVMS platforms
index 60ec378..d99eed7 100755 (executable)
--- a/Configure
+++ b/Configure
@@ -105,6 +105,25 @@ my $usage="Usage: Configure [no-<cipher> ...] [enable-<cipher> ...] [experimenta
 
 my $gcc_devteam_warn = "-Wall -pedantic -DPEDANTIC -Wno-long-long -Wsign-compare -Wmissing-prototypes -Wshadow -Wformat -Werror -DCRYPTO_MDEBUG_ALL -DCRYPTO_MDEBUG_ABORT -DREF_CHECK -DOPENSSL_NO_DEPRECATED";
 
+# TODO(openssl-team): fix problems and investigate if (at least) the following
+# warnings can also be enabled:
+# -Wconditional-uninitialized, -Wswitch-enum, -Wunused-macros,
+# -Wmissing-field-initializers, -Wmissing-variable-declarations,
+# -Wincompatible-pointer-types-discards-qualifiers, -Wcast-align,
+# -Wunreachable-code -Wunused-parameter -Wlanguage-extension-token
+# -Wextended-offsetof
+my $clang_disabled_warnings = "-Wno-unused-parameter -Wno-missing-field-initializers -Wno-language-extension-token  -Wno-extended-offsetof";
+
+# These are used in addition to $gcc_devteam_warn when the compiler is clang.
+# TODO(openssl-team): fix problems and investigate if (at least) the
+# following warnings can also be enabled: -Wconditional-uninitialized,
+# -Wswitch-enum, -Wunused-macros, -Wmissing-field-initializers,
+# -Wmissing-variable-declarations,
+# -Wincompatible-pointer-types-discards-qualifiers, -Wcast-align,
+# -Wunreachable-code -Wunused-parameter -Wlanguage-extension-token
+# -Wextended-offsetof
+my $clang_devteam_warn = "-Wno-unused-parameter -Wno-missing-field-initializers -Wno-language-extension-token -Wno-extended-offsetof -Qunused-arguments";
+
 my $strict_warnings = 0;
 
 my $x86_gcc_des="DES_PTR DES_RISC1 DES_UNROLL";
@@ -124,24 +143,25 @@ my $tlib="-lnsl -lsocket";
 my $bits1="THIRTY_TWO_BIT ";
 my $bits2="SIXTY_FOUR_BIT ";
 
-my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o x86-gf2m.o:des-586.o crypt586.o:aes-586.o vpaes-x86.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cmll-x86.o:ghash-x86.o:";
+my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o x86-gf2m.o::des-586.o crypt586.o:aes-586.o vpaes-x86.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cmll-x86.o:ghash-x86.o:";
 
 my $x86_elf_asm="$x86_asm:elf";
 
-my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghash-x86_64.o:";
-my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::ghash-ia64.o::void";
-my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void";
-my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::void";
-my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-alpha.o::void";
-my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o::::::::";
-my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o sha512-mips.o::::::::";
-my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::aes-s390x.o aes-ctr.o aes-xts.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-s390x.o:::::ghash-s390x.o:";
-my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o::void";
-my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::32";
-my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::64";
-my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::::";
-my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::::";
-my $no_asm=":::::::::::::::void";
+my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-gf2m.o rsaz_exp.o rsaz-x86_64.o rsaz-avx2.o:ecp_nistz256.o ecp_nistz256-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_64.o aesni-sha1-x86_64.o aesni-sha256-x86_64.o aesni-mb-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o sha1-mb-x86_64.o sha256-mb-x86_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghash-x86_64.o aesni-gcm-x86_64.o:";
+my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o:::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::ghash-ia64.o::void";
+my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o::des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o:aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o::::::camellia.o cmll_misc.o cmll_cbc.o cmllt4-sparcv9.o:ghash-sparcv9.o::void";
+my $sparcv8_asm=":sparcv8.o::des_enc-sparc.o fcrypt_b.o:::::::::::::void";
+my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o::::::sha1-alpha.o:::::::ghash-alpha.o::void";
+my $mips64_asm=":bn-mips.o mips-mont.o:::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o sha512-mips.o::::::::";
+my $mips32_asm=$mips64_asm; $mips32_asm =~ s/\s*sha512\-mips\.o//;
+my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o:::aes-s390x.o aes-ctr.o aes-xts.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-s390x.o:::::ghash-s390x.o:";
+my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o:::aes_cbc.o aes-armv4.o bsaes-armv7.o aesv8-armx.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o ghashv8-armx.o::void";
+my $aarch64_asm="armcap.o arm64cpuid.o mem_clr.o::::aes_core.o aes_cbc.o aesv8-armx.o:::sha1-armv8.o sha256-armv8.o sha512-armv8.o:::::::ghashv8-armx.o:";
+my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o:::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::32";
+my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o:::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::64";
+my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o:::aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o aesp8-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o sha256p8-ppc.o sha512p8-ppc.o:::::::ghashp8-ppc.o:";
+my $ppc32_asm=$ppc64_asm;
+my $no_asm="::::::::::::::::void";
 
 # As for $BSDthreads. Idea is to maintain "collective" set of flags,
 # which would cover all BSD flavors. -pthread applies to them all, 
@@ -152,7 +172,7 @@ my $no_asm=":::::::::::::::void";
 # seems to be sufficient?
 my $BSDthreads="-pthread -D_THREAD_SAFE -D_REENTRANT";
 
-#config-string $cc : $cflags : $unistd : $thread_cflag : $sys_id : $lflags : $bn_ops : $cpuid_obj : $bn_obj : $des_obj : $aes_obj : $bf_obj : $md5_obj : $sha1_obj : $cast_obj : $rc4_obj : $rmd160_obj : $rc5_obj : $wp_obj : $cmll_obj : $modes_obj : $engines_obj : $dso_scheme : $shared_target : $shared_cflag : $shared_ldflag : $shared_extension : $ranlib : $arflags : $multilib
+#config-string $cc : $cflags : $unistd : $thread_cflag : $sys_id : $lflags : $bn_ops : $cpuid_obj : $bn_obj : $ec_obj : $des_obj : $aes_obj : $bf_obj : $md5_obj : $sha1_obj : $cast_obj : $rc4_obj : $rmd160_obj : $rc5_obj : $wp_obj : $cmll_obj : $modes_obj : $engines_obj : $dso_scheme : $shared_target : $shared_cflag : $shared_ldflag : $shared_extension : $ranlib : $arflags : $multilib
 
 my %table=(
 # File 'TABLE' (created by 'make TABLE') contains the data from this list,
@@ -174,14 +194,14 @@ my %table=(
 "debug-ben-debug-64",  "gcc:$gcc_devteam_warn -Wno-error=overlength-strings -DBN_DEBUG -DCONF_DEBUG -DDEBUG_SAFESTACK -DDEBUG_UNUSED -g3 -O3 -pipe::${BSDthreads}:::SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 "debug-ben-macos",     "cc:$gcc_devteam_warn -arch i386 -DBN_DEBUG -DCONF_DEBUG -DDEBUG_SAFESTACK -DDEBUG_UNUSED -DOPENSSL_THREADS -D_REENTRANT -DDSO_DLFCN -DHAVE_DLFCN_H -O3 -DL_ENDIAN -g3 -pipe::(unknown)::-Wl,-search_paths_first::::",
 "debug-ben-macos-gcc46",       "gcc-mp-4.6:$gcc_devteam_warn -Wconversion -DBN_DEBUG -DCONF_DEBUG -DDEBUG_SAFESTACK -DDEBUG_UNUSED -DOPENSSL_THREADS -D_REENTRANT -DDSO_DLFCN -DHAVE_DLFCN_H -O3 -DL_ENDIAN -g3 -pipe::(unknown)::::::",
-"debug-ben-darwin64","cc:$gcc_devteam_warn -Wno-language-extension-token -Wno-extended-offsetof -arch x86_64 -O3 -DL_ENDIAN -Wall::-D_REENTRANT:MACOSX:-Wl,-search_paths_first%:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL:".eval{my $asm=$x86_64_asm;$asm=~s/rc4\-[^:]+//;$asm}.":macosx:dlfcn:darwin-shared:-fPIC -fno-common:-arch x86_64 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
+"debug-ben-darwin64","cc:$gcc_devteam_warn -g -Wno-language-extension-token -Wno-extended-offsetof -arch x86_64 -O3 -DL_ENDIAN -Wall::-D_REENTRANT:MACOSX:-Wl,-search_paths_first%:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL:".eval{my $asm=$x86_64_asm;$asm=~s/rc4\-[^:]+//;$asm}.":macosx:dlfcn:darwin-shared:-fPIC -fno-common:-arch x86_64 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
+"debug-ben-debug-64-clang",    "clang:$gcc_devteam_warn -Wno-error=overlength-strings -Wno-error=extended-offsetof -Qunused-arguments -DBN_DEBUG -DCONF_DEBUG -DDEBUG_SAFESTACK -DDEBUG_UNUSED -g3 -O3 -pipe::${BSDthreads}:::SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 "debug-ben-no-opt",    "gcc: -Wall -Wmissing-prototypes -Wstrict-prototypes -Wmissing-declarations -DDEBUG_SAFESTACK -DCRYPTO_MDEBUG -Werror -DL_ENDIAN -DTERMIOS -Wall -g3::(unknown)::::::",
 "debug-ben-strict",    "gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG -DCONST_STRICT -O2 -Wall -Wshadow -Werror -Wpointer-arith -Wcast-qual -Wwrite-strings -pipe::(unknown)::::::",
 "debug-rse","cc:-DTERMIOS -DL_ENDIAN -pipe -O -g -ggdb3 -Wall::(unknown):::BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}",
 "debug-bodo",  "gcc:$gcc_devteam_warn -Wno-error=overlength-strings -DBN_DEBUG -DBN_DEBUG_RAND -DCONF_DEBUG -DBIO_PAIR_DEBUG -m64 -DL_ENDIAN -DTERMIO -g -DMD32_REG_T=int::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
-"debug-ulf", "gcc:-DTERMIOS -DL_ENDIAN -march=i486 -Wall -DBN_DEBUG -DBN_DEBUG_RAND -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG -DOPENSSL_NO_ASM -g -Wformat -Wshadow -Wmissing-prototypes -Wmissing-declarations:::CYGWIN32:::${no_asm}:win32:cygwin-shared:::.dll",
 "debug-steve64", "gcc:$gcc_devteam_warn -m64 -DL_ENDIAN -DTERMIO -DCONF_DEBUG -DDEBUG_SAFESTACK -Wno-overlength-strings -g::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
-"debug-steve32", "gcc:$gcc_devteam_warn -m32 -DL_ENDIAN -DCONF_DEBUG -DDEBUG_SAFESTACK -g -pipe::-D_REENTRANT::-rdynamic -ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC:-m32:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+"debug-steve32", "gcc:$gcc_devteam_warn -m32 -DL_ENDIAN -DCONF_DEBUG -DDEBUG_SAFESTACK -Wno-overlength-strings -g -pipe::-D_REENTRANT::-rdynamic -ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC:-m32:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 "debug-steve-opt", "gcc:$gcc_devteam_warn -m64 -O3 -DL_ENDIAN -DTERMIO -DCONF_DEBUG -DDEBUG_SAFESTACK -g::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 "debug-levitte-linux-elf","gcc:-DLEVITTE_DEBUG -DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -DL_ENDIAN -ggdb -g3 -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 "debug-levitte-linux-noasm","gcc:-DLEVITTE_DEBUG -DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -DOPENSSL_NO_ASM -DL_ENDIAN -ggdb -g3 -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
@@ -193,9 +213,9 @@ my %table=(
 "debug-linux-ppro","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG -DL_ENDIAN -g -mcpu=pentiumpro -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn",
 "debug-linux-elf","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG -DL_ENDIAN -g -march=i486 -Wall::-D_REENTRANT::-lefence -ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 "debug-linux-elf-noefence","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG -DL_ENDIAN -g -march=i486 -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
-"debug-linux-ia32-aes", "gcc:-DAES_EXPERIMENTAL -DL_ENDIAN -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:x86cpuid.o:bn-586.o co-586.o x86-mont.o:des-586.o crypt586.o:aes_x86core.o aes_cbc.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o::ghash-x86.o::elf:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+"debug-linux-ia32-aes", "gcc:-DAES_EXPERIMENTAL -DL_ENDIAN -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:x86cpuid.o:bn-586.o co-586.o x86-mont.o::des-586.o crypt586.o:aes_x86core.o aes_cbc.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o::ghash-x86.o::elf:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 "debug-linux-generic32","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -g -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
-"debug-linux-generic64","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -g -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+"debug-linux-generic64","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -DTERMIO -g -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 "debug-linux-x86_64","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -m64 -DL_ENDIAN -g -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
 "dist",                "cc:-O::(unknown)::::::",
 
@@ -225,7 +245,7 @@ my %table=(
 "solaris64-x86_64-gcc","gcc:-m64 -O3 -Wall -DL_ENDIAN::-D_REENTRANT::-lsocket -lnsl -ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:solaris-shared:-fPIC:-m64 -shared -static-libgcc:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::/64",
  
 #### Solaris x86 with Sun C setups
-"solaris-x86-cc","cc:-fast -O -Xa::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_PTR DES_UNROLL BF_PTR:${no_asm}:dlfcn:solaris-shared:-KPIC:-G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+"solaris-x86-cc","cc:-fast -xarch=generic -O -Xa::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_PTR DES_UNROLL BF_PTR:${no_asm}:dlfcn:solaris-shared:-KPIC:-G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 "solaris64-x86_64-cc","cc:-fast -xarch=amd64 -xstrconst -Xa -DL_ENDIAN::-D_REENTRANT::-lsocket -lnsl -ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:solaris-shared:-KPIC:-xarch=amd64 -G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::/64",
 
 #### SPARC Solaris with GNU C setups
@@ -300,7 +320,7 @@ my %table=(
 "hpux-parisc-gcc","gcc:-O3 -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-Wl,+s -ldld:BN_LLONG DES_PTR DES_UNROLL DES_RISC1:${no_asm}:dl:hpux-shared:-fPIC:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 "hpux-parisc1_1-gcc","gcc:-O3 -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-Wl,+s -ldld:BN_LLONG DES_PTR DES_UNROLL DES_RISC1:${parisc11_asm}:dl:hpux-shared:-fPIC:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::/pa1.1",
 "hpux-parisc2-gcc","gcc:-march=2.0 -O3 -DB_ENDIAN -D_REENTRANT::::-Wl,+s -ldld:SIXTY_FOUR_BIT RC4_CHAR RC4_CHUNK DES_PTR DES_UNROLL DES_RISC1:".eval{my $asm=$parisc20_asm;$asm=~s/2W\./2\./;$asm=~s/:64/:32/;$asm}.":dl:hpux-shared:-fPIC:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::/pa20_32",
-"hpux64-parisc2-gcc","gcc:-O3 -DB_ENDIAN -D_REENTRANT::::-ldl:SIXTY_FOUR_BIT_LONG MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC1 DES_INT::pa-risc2W.o::::::::::::::void:dlfcn:hpux-shared:-fpic:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::/pa20_64",
+"hpux64-parisc2-gcc","gcc:-O3 -DB_ENDIAN -D_REENTRANT::::-ldl:SIXTY_FOUR_BIT_LONG MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC1 DES_INT::pa-risc2W.o:::::::::::::::void:dlfcn:hpux-shared:-fpic:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::/pa20_64",
 
 # More attempts at unified 10.X and 11.X targets for HP C compiler.
 #
@@ -347,20 +367,57 @@ my %table=(
 # throw in -D[BL]_ENDIAN, whichever appropriate...
 "linux-generic32","gcc:-O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 "linux-ppc",   "gcc:-DB_ENDIAN -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc32_asm}:linux32:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
-# It's believed that majority of ARM toolchains predefine appropriate -march.
-# If you compiler does not, do complement config command line with one!
-"linux-armv4", "gcc:-O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+
+#######################################################################
+# Note that -march is not among compiler options in below linux-armv4
+# target line. Not specifying one is intentional to give you choice to:
+#
+# a) rely on your compiler default by not specifying one;
+# b) specify your target platform explicitly for optimal performance,
+#    e.g. -march=armv6 or -march=armv7-a;
+# c) build "universal" binary that targets *range* of platforms by
+#    specifying minimum and maximum supported architecture;
+#
+# As for c) option. It actually makes no sense to specify maximum to be
+# less than ARMv7, because it's the least requirement for run-time
+# switch between platform-specific code paths. And without run-time
+# switch performance would be equivalent to one for minimum. Secondly,
+# there are some natural limitations that you'd have to accept and
+# respect. Most notably you can *not* build "universal" binary for
+# big-endian platform. This is because ARMv7 processor always picks
+# instructions in little-endian order. Another similar limitation is
+# that -mthumb can't "cross" -march=armv6t2 boundary, because that's
+# where it became Thumb-2. Well, this limitation is a bit artificial,
+# because it's not really impossible, but it's deemed too tricky to
+# support. And of course you have to be sure that your binutils are
+# actually up to the task of handling maximum target platform. With all
+# this in mind here is an example of how to configure "universal" build:
+#
+#       ./Configure linux-armv4 -march=armv6 -D__ARM_MAX_ARCH__=8
+#
+"linux-armv4", "gcc: -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+"linux-aarch64","gcc: -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${aarch64_asm}:linux64:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+# Configure script adds minimally required -march for assembly support,
+# if no -march was specified at command line. mips32 and mips64 below
+# refer to contemporary MIPS Architecture specifications, MIPS32 and
+# MIPS64, rather than to kernel bitness.
+"linux-mips32",        "gcc:-mabi=32 -O3 -Wall -DBN_DIV3W::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${mips32_asm}:o32:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+"linux-mips64",   "gcc:-mabi=n32 -O3 -Wall -DBN_DIV3W::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${mips64_asm}:n32:dlfcn:linux-shared:-fPIC:-mabi=n32:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::32",
+"linux64-mips64",   "gcc:-mabi=64 -O3 -Wall -DBN_DIV3W::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${mips64_asm}:64:dlfcn:linux-shared:-fPIC:-mabi=64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
 #### IA-32 targets...
-"linux-ia32-icc",      "icc:-DL_ENDIAN -O2 -no_cpprt::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-KPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+"linux-ia32-icc",      "icc:-DL_ENDIAN -O2::-D_REENTRANT::-ldl -no_cpprt:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-KPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 "linux-elf",   "gcc:-DL_ENDIAN -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 "linux-aout",  "gcc:-DL_ENDIAN -O3 -fomit-frame-pointer -march=i486 -Wall::(unknown):::BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_asm}:a.out",
 ####
 "linux-generic64","gcc:-O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 "linux-ppc64", "gcc:-m64 -DB_ENDIAN -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc64_asm}:linux64:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
-"linux-ia64",  "gcc:-DL_ENDIAN -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
-"linux-ia64-ecc","ecc:-DL_ENDIAN -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
-"linux-ia64-icc","icc:-DL_ENDIAN -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+"linux-ppc64le","gcc:-m64 -DL_ENDIAN -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:$ppc64_asm:linux64le:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::",
+"linux-ia64",  "gcc:-DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+"linux-ia64-icc","icc:-DL_ENDIAN -O2 -Wall::-D_REENTRANT::-ldl -no_cpprt:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 "linux-x86_64",        "gcc:-m64 -DL_ENDIAN -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
+"linux-x86_64-clang",  "clang: -m64 -DL_ENDIAN -O3 -Wall -Wextra $clang_disabled_warnings -Qunused-arguments::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
+"linux-x86_64-icc", "icc:-DL_ENDIAN -O2::-D_REENTRANT::-ldl -no_cpprt:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
+"linux-x32",   "gcc:-mx32 -DL_ENDIAN -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-mx32:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::x32",
 "linux64-s390x",       "gcc:-m64 -DB_ENDIAN -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL:${s390x_asm}:64:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
 #### So called "highgprs" target for z/Architecture CPUs
 # "Highgprs" is kernel feature first implemented in Linux 2.6.32, see
@@ -407,6 +464,7 @@ my %table=(
 "android","gcc:-mandroid -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 "android-x86","gcc:-mandroid -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:".eval{my $asm=${x86_elf_asm};$asm=~s/:elf/:android/;$asm}.":dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 "android-armv7","gcc:-march=armv7-a -mandroid -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+"android-mips","gcc:-mandroid -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${mips32_asm}:o32:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 
 #### *BSD [do see comment about ${BSDthreads} above!]
 "BSD-generic32","gcc:-O3 -fomit-frame-pointer -Wall::${BSDthreads}:::BN_LLONG RC2_CHAR RC4_INDEX DES_INT DES_UNROLL:${no_asm}:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
@@ -421,7 +479,7 @@ my %table=(
 # triggered by RIPEMD160 code.
 "BSD-sparc64", "gcc:-DB_ENDIAN -O3 -DMD32_REG_T=int -Wall::${BSDthreads}:::BN_LLONG RC2_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC2 BF_PTR:${sparcv9_asm}:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 "BSD-ia64",    "gcc:-DL_ENDIAN -O3 -Wall::${BSDthreads}:::SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
-"BSD-x86_64",  "gcc:-DL_ENDIAN -O3 -Wall::${BSDthreads}:::SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+"BSD-x86_64",  "cc:-DL_ENDIAN -O3 -Wall::${BSDthreads}:::SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 
 "bsdi-elf-gcc",     "gcc:-DPERL5 -DL_ENDIAN -fomit-frame-pointer -O3 -march=i486 -Wall::(unknown)::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 
@@ -454,11 +512,11 @@ my %table=(
 # UnixWare 2.0x fails destest with -O.
 "unixware-2.0","cc:-DFILIO_H -DNO_STRINGS_H::-Kthread::-lsocket -lnsl -lresolv -lx:${x86_gcc_des} ${x86_gcc_opts}:::",
 "unixware-2.1","cc:-O -DFILIO_H::-Kthread::-lsocket -lnsl -lresolv -lx:${x86_gcc_des} ${x86_gcc_opts}:::",
-"unixware-7","cc:-O -DFILIO_H -Kalloca::-Kthread::-lsocket -lnsl:BN_LLONG MD2_CHAR RC4_INDEX ${x86_gcc_des}:${x86_elf_asm}:dlfcn:svr5-shared:-Kpic::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
-"unixware-7-gcc","gcc:-DL_ENDIAN -DFILIO_H -O3 -fomit-frame-pointer -march=pentium -Wall::-D_REENTRANT::-lsocket -lnsl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:gnu-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+"unixware-7","cc:-O -DFILIO_H -Kalloca::-Kthread::-lsocket -lnsl:BN_LLONG MD2_CHAR RC4_INDEX ${x86_gcc_des}:${x86_elf_asm}-1:dlfcn:svr5-shared:-Kpic::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+"unixware-7-gcc","gcc:-DL_ENDIAN -DFILIO_H -O3 -fomit-frame-pointer -march=pentium -Wall::-D_REENTRANT::-lsocket -lnsl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}-1:dlfcn:gnu-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 # SCO 5 - Ben Laurie <ben@algroup.co.uk> says the -O breaks the SCO cc.
-"sco5-cc",  "cc:-belf::(unknown)::-lsocket -lnsl:${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:svr3-shared:-Kpic::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
-"sco5-gcc",  "gcc:-O3 -fomit-frame-pointer::(unknown)::-lsocket -lnsl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:svr3-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+"sco5-cc",  "cc:-belf::(unknown)::-lsocket -lnsl:${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}-1:dlfcn:svr3-shared:-Kpic::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+"sco5-gcc",  "gcc:-O3 -fomit-frame-pointer::(unknown)::-lsocket -lnsl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}-1:dlfcn:svr3-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 
 #### IBM's AIX.
 "aix3-cc",  "cc:-O -DB_ENDIAN -qmaxmem=16384::(unknown):AIX::BN_LLONG RC4_CHAR:::",
@@ -518,9 +576,9 @@ my %table=(
 # Visual C targets
 #
 # Win64 targets, WIN64I denotes IA-64 and WIN64A - AMD64
-"VC-WIN64I","cl:-W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64I::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:ia64cpuid.o:ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o:::::::ghash-ia64.o::ias:win32",
+"VC-WIN64I","cl:-W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64I::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:ia64cpuid.o:ia64.o ia64-mont.o:::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o:::::::ghash-ia64.o::ias:win32",
 "VC-WIN64A","cl:-W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64A::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:".eval{my $asm=$x86_64_asm;$asm=~s/x86_64-gcc\.o/bn_asm.o/;$asm}.":auto:win32",
-"debug-VC-WIN64I","cl:-W3 -Gs0 -Gy -Zi -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64I::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:ia64cpuid.o:ia64.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o:::::::ghash-ia64.o::ias:win32",
+"debug-VC-WIN64I","cl:-W3 -Gs0 -Gy -Zi -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64I::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:ia64cpuid.o:ia64.o:::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o:::::::ghash-ia64.o::ias:win32",
 "debug-VC-WIN64A","cl:-W3 -Gs0 -Gy -Zi -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64A::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:".eval{my $asm=$x86_64_asm;$asm=~s/x86_64-gcc\.o/bn_asm.o/;$asm}.":auto:win32",
 # x86 Win32 target defaults to ANSI API, if you want UNICODE, complement
 # 'perl Configure VC-WIN32' with '-DUNICODE -D_UNICODE'
@@ -547,9 +605,8 @@ my %table=(
 "UWIN", "cc:-DTERMIOS -DL_ENDIAN -O -Wall:::UWIN::BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${no_asm}:win32",
 
 # Cygwin
-"Cygwin-pre1.3", "gcc:-DTERMIOS -DL_ENDIAN -fomit-frame-pointer -O3 -m486 -Wall::(unknown):CYGWIN32::BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${no_asm}:win32",
-"Cygwin", "gcc:-DTERMIOS -DL_ENDIAN -fomit-frame-pointer -O3 -march=i486 -Wall:::CYGWIN32::BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_asm}:coff:dlfcn:cygwin-shared:-D_WINDLL:-shared:.dll.a",
-"debug-Cygwin", "gcc:-DTERMIOS -DL_ENDIAN -march=i486 -Wall -DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG -DOPENSSL_NO_ASM -g -Wformat -Wshadow -Wmissing-prototypes -Wmissing-declarations -Werror:::CYGWIN32:::${no_asm}:dlfcn:cygwin-shared:-D_WINDLL:-shared:.dll.a",
+"Cygwin", "gcc:-DTERMIOS -DL_ENDIAN -fomit-frame-pointer -O3 -march=i486 -Wall:::CYGWIN::BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_asm}:coff:dlfcn:cygwin-shared:-D_WINDLL:-shared:.dll.a",
+"Cygwin-x86_64", "gcc:-DTERMIOS -DL_ENDIAN -O3 -Wall:::CYGWIN::SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:mingw64:dlfcn:cygwin-shared:-D_WINDLL:-shared:.dll.a",
 
 # NetWare from David Ward (dsward@novell.com)
 # requires either MetroWerks NLM development tools, or gcc / nlmconv
@@ -581,7 +638,8 @@ my %table=(
 "darwin64-ppc-cc","cc:-arch ppc64 -O3 -DB_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first%:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${ppc64_asm}:osx64:dlfcn:darwin-shared:-fPIC -fno-common:-arch ppc64 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
 "darwin-i386-cc","cc:-arch i386 -O3 -fomit-frame-pointer -DL_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first%:BN_LLONG RC4_INT RC4_CHUNK DES_UNROLL BF_PTR:".eval{my $asm=$x86_asm;$asm=~s/cast\-586\.o//;$asm}.":macosx:dlfcn:darwin-shared:-fPIC -fno-common:-arch i386 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
 "debug-darwin-i386-cc","cc:-arch i386 -g3 -DL_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first%:BN_LLONG RC4_INT RC4_CHUNK DES_UNROLL BF_PTR:${x86_asm}:macosx:dlfcn:darwin-shared:-fPIC -fno-common:-arch i386 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
-"darwin64-x86_64-cc","cc:-arch x86_64 -O3 -DL_ENDIAN -Wall::-D_REENTRANT:MACOSX:-Wl,-search_paths_first%:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL:".eval{my $asm=$x86_64_asm;$asm=~s/rc4\-[^:]+//;$asm}.":macosx:dlfcn:darwin-shared:-fPIC -fno-common:-arch x86_64 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
+"darwin64-x86_64-cc","cc:-arch x86_64 -O3 -DL_ENDIAN -Wall::-D_REENTRANT:MACOSX:-Wl,-search_paths_first%:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:".eval{my $asm=$x86_64_asm;$asm=~s/rc4\-[^:]+//;$asm}.":macosx:dlfcn:darwin-shared:-fPIC -fno-common:-arch x86_64 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
+"debug-darwin64-x86_64-cc","cc:-arch x86_64 -ggdb -g2 -O0 -DL_ENDIAN -Wall::-D_REENTRANT:MACOSX:-Wl,-search_paths_first%:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:".eval{my $asm=$x86_64_asm;$asm=~s/rc4\-[^:]+//;$asm}.":macosx:dlfcn:darwin-shared:-fPIC -fno-common:-arch x86_64 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
 "debug-darwin-ppc-cc","cc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -DB_ENDIAN -g -Wall -O::-D_REENTRANT:MACOSX::BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${ppc32_asm}:osx32:dlfcn:darwin-shared:-fPIC:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
 # iPhoneOS/iOS
 "iphoneos-cross","llvm-gcc:-O3 -isysroot \$(CROSS_TOP)/SDKs/\$(CROSS_SDK) -fomit-frame-pointer -fno-common::-D_REENTRANT:iOS:-Wl,-search_paths_first%:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${no_asm}:dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib",
@@ -634,6 +692,7 @@ my $idx_lflags = $idx++;
 my $idx_bn_ops = $idx++;
 my $idx_cpuid_obj = $idx++;
 my $idx_bn_obj = $idx++;
+my $idx_ec_obj = $idx++;
 my $idx_des_obj = $idx++;
 my $idx_aes_obj = $idx++;
 my $idx_bf_obj = $idx++;
@@ -714,11 +773,13 @@ my %disabled = ( # "what"         => "comment" [or special keyword "experimental
                 "ec_nistp_64_gcc_128" => "default",
                 "gmp"            => "default",
                 "jpake"          => "experimental",
+                "libunbound"     => "experimental",
                 "md2"            => "default",
                 "rc5"            => "default",
                 "rfc3779"        => "default",
                 "sctp"       => "default",
                 "shared"         => "default",
+                "ssl-trace"      => "default",
                 "store"          => "experimental",
                 "unit-test"      => "default",
                 "zlib"           => "default",
@@ -728,7 +789,7 @@ my @experimental = ();
 
 # This is what $depflags will look like with the above defaults
 # (we need this to see if we should advise the user to run "make depend"):
-my $default_depflags = " -DOPENSSL_NO_EC_NISTP_64_GCC_128 -DOPENSSL_NO_GMP -DOPENSSL_NO_JPAKE -DOPENSSL_NO_MD2 -DOPENSSL_NO_RC5 -DOPENSSL_NO_RFC3779 -DOPENSSL_NO_SCTP -DOPENSSL_NO_STORE -DOPENSSL_NO_UNIT_TEST";
+my $default_depflags = " -DOPENSSL_NO_EC_NISTP_64_GCC_128 -DOPENSSL_NO_GMP -DOPENSSL_NO_JPAKE -DOPENSSL_NO_LIBUNBOUND -DOPENSSL_NO_MD2 -DOPENSSL_NO_RC5 -DOPENSSL_NO_RFC3779 -DOPENSSL_NO_SCTP -DOPENSSL_NO_SSL_TRACE -DOPENSSL_NO_STORE -DOPENSSL_NO_UNIT_TEST";
 
 # Explicit "no-..." options will be collected in %disabled along with the defaults.
 # To remove something from %disabled, use "enable-foo" (unless it's experimental).
@@ -873,16 +934,7 @@ PROCESS_ARGS:
                        }
                elsif (/^[-+]/)
                        {
-                       if (/^-[lL](.*)$/ or /^-Wl,/)
-                               {
-                               $libs.=$_." ";
-                               }
-                       elsif (/^-[^-]/ or /^\+/)
-                               {
-                               $_ =~ s/%([0-9a-f]{1,2})/chr(hex($1))/gei;
-                               $flags.=$_." ";
-                               }
-                       elsif (/^--prefix=(.*)$/)
+                       if (/^--prefix=(.*)$/)
                                {
                                $prefix=$1;
                                }
@@ -926,10 +978,14 @@ PROCESS_ARGS:
                                {
                                $cross_compile_prefix=$1;
                                }
-                       else
+                       elsif (/^-[lL](.*)$/ or /^-Wl,/)
+                               {
+                               $libs.=$_." ";
+                               }
+                       else    # common if (/^[-+]/), just pass down...
                                {
-                               print STDERR $usage;
-                               exit(1);
+                               $_ =~ s/%([0-9a-f]{1,2})/chr(hex($1))/gei;
+                               $flags.=$_." ";
                                }
                        }
                elsif ($_ =~ /^([^:]+):(.+)$/)
@@ -1156,6 +1212,7 @@ my $cc = $fields[$idx_cc];
 if($ENV{CC}) {
     $cc = $ENV{CC};
 }
+
 my $cflags = $fields[$idx_cflags];
 my $unistd = $fields[$idx_unistd];
 my $thread_cflag = $fields[$idx_thread_cflag];
@@ -1164,6 +1221,7 @@ my $lflags = $fields[$idx_lflags];
 my $bn_ops = $fields[$idx_bn_ops];
 my $cpuid_obj = $fields[$idx_cpuid_obj];
 my $bn_obj = $fields[$idx_bn_obj];
+my $ec_obj = $fields[$idx_ec_obj];
 my $des_obj = $fields[$idx_des_obj];
 my $aes_obj = $fields[$idx_aes_obj];
 my $bf_obj = $fields[$idx_bf_obj];
@@ -1209,6 +1267,12 @@ if ($target =~ /^mingw/ && `$cc --target-help 2>&1` !~ m/\-mno\-cygwin/m)
        $shared_ldflag =~ s/\-mno\-cygwin\s*//;
        }
 
+if ($target =~ /linux.*\-mips/ && !$no_asm && $flags !~ /\-m(ips|arch=)/) {
+       # minimally required architecture flags for assembly modules
+       $cflags="-mips2 $cflags" if ($target =~ /mips32/);
+       $cflags="-mips3 $cflags" if ($target =~ /mips64/);
+}
+
 my $no_shared_warn=0;
 my $no_user_cflags=0;
 
@@ -1335,7 +1399,7 @@ $lflags="$libs$lflags" if ($libs ne "");
 
 if ($no_asm)
        {
-       $cpuid_obj=$bn_obj=
+       $cpuid_obj=$bn_obj=$ec_obj=
        $des_obj=$aes_obj=$bf_obj=$cast_obj=$rc4_obj=$rc5_obj=$cmll_obj=
        $modes_obj=$sha1_obj=$md5_obj=$rmd160_obj=$wp_obj=$engines_obj="";
        }
@@ -1416,6 +1480,7 @@ if ($target =~ /\-icc$/)  # Intel C compiler
                }
        if ($iccver>=8)
                {
+               $cflags=~s/\-KPIC/-fPIC/;
                # Eliminate unnecessary dependency from libirc.a. This is
                # essential for shared library support, as otherwise
                # apps/openssl can end up in endless loop upon startup...
@@ -1423,12 +1488,17 @@ if ($target =~ /\-icc$/)        # Intel C compiler
                }
        if ($iccver>=9)
                {
-               $cflags.=" -i-static";
-               $cflags=~s/\-no_cpprt/-no-cpprt/;
+               $lflags.=" -i-static";
+               $lflags=~s/\-no_cpprt/-no-cpprt/;
                }
        if ($iccver>=10)
                {
-               $cflags=~s/\-i\-static/-static-intel/;
+               $lflags=~s/\-i\-static/-static-intel/;
+               }
+       if ($iccver>=11)
+               {
+               $cflags.=" -no-intel-extensions";       # disable Cilk
+               $lflags=~s/\-no\-cpprt/-no-cxxlib/;
                }
        }
 
@@ -1509,7 +1579,7 @@ if ($rmd160_obj =~ /\.o$/)
        }
 if ($aes_obj =~ /\.o$/)
        {
-       $cflags.=" -DAES_ASM";
+       $cflags.=" -DAES_ASM" if ($aes_obj =~ m/\baes\-/);;
        # aes-ctr.o is not a real file, only indication that assembler
        # module implements AES_ctr32_encrypt...
        $cflags.=" -DAES_CTR_ASM" if ($aes_obj =~ s/\s*aes\-ctr\.o//);
@@ -1531,10 +1601,14 @@ else    {
        $wp_obj="wp_block.o";
        }
 $cmll_obj=$cmll_enc    unless ($cmll_obj =~ /.o$/);
-if ($modes_obj =~ /ghash/)
+if ($modes_obj =~ /ghash\-/)
        {
        $cflags.=" -DGHASH_ASM";
        }
+if ($ec_obj =~ /ecp_nistz256/)
+       {
+       $cflags.=" -DECP_NISTZ256_ASM";
+       }
 
 # "Stringify" the C flags string.  This permits it to be made part of a string
 # and works as well on command lines.
@@ -1574,12 +1648,21 @@ if ($shlib_version_number =~ /(^[0-9]*)\.([0-9\.]*)/)
 
 if ($strict_warnings)
        {
+       my $ecc = $cc;
+       $ecc = "clang" if `$cc --version 2>&1` =~ /clang/;
        my $wopt;
-       die "ERROR --strict-warnings requires gcc" unless ($cc =~ /gcc$/);
+       die "ERROR --strict-warnings requires gcc or clang" unless ($ecc =~ /gcc$/ or $ecc =~ /clang$/);
        foreach $wopt (split /\s+/, $gcc_devteam_warn)
                {
                $cflags .= " $wopt" unless ($cflags =~ /$wopt/)
                }
+       if ($ecc eq "clang")
+               {
+               foreach $wopt (split /\s+/, $clang_devteam_warn)
+                       {
+                       $cflags .= " $wopt" unless ($cflags =~ /$wopt/)
+                       }
+               }
        }
 
 open(IN,'<Makefile.org') || die "unable to read Makefile.org:$!\n";
@@ -1638,6 +1721,7 @@ while (<IN>)
        s/^EXE_EXT=.*$/EXE_EXT= $exe_ext/;
        s/^CPUID_OBJ=.*$/CPUID_OBJ= $cpuid_obj/;
        s/^BN_ASM=.*$/BN_ASM= $bn_obj/;
+       s/^EC_ASM=.*$/EC_ASM= $ec_obj/;
        s/^DES_ENC=.*$/DES_ENC= $des_obj/;
        s/^AES_ENC=.*$/AES_ENC= $aes_obj/;
        s/^BF_ENC=.*$/BF_ENC= $bf_obj/;
@@ -1699,6 +1783,7 @@ print "CFLAG         =$cflags\n";
 print "EX_LIBS       =$lflags\n";
 print "CPUID_OBJ     =$cpuid_obj\n";
 print "BN_ASM        =$bn_obj\n";
+print "EC_ASM        =$ec_obj\n";
 print "DES_ENC       =$des_obj\n";
 print "AES_ENC       =$aes_obj\n";
 print "BF_ENC        =$bf_obj\n";
@@ -1997,7 +2082,7 @@ BEGIN
            VALUE "ProductVersion", "$version\\0"
            // Optional:
            //VALUE "Comments", "\\0"
-           VALUE "LegalCopyright", "Copyright Â© 1998-2005 The OpenSSL Project. Copyright Â© 1995-1998 Eric A. Young, Tim J. Hudson. All rights reserved.\\0"
+           VALUE "LegalCopyright", "Copyright  Â© 1998-2005 The OpenSSL Project. Copyright Â© 1995-1998 Eric A. Young, Tim J. Hudson. All rights reserved.\\0"
            //VALUE "LegalTrademarks", "\\0"
            //VALUE "PrivateBuild", "\\0"
            //VALUE "SpecialBuild", "\\0"
@@ -2106,12 +2191,12 @@ sub print_table_entry
        {
        my $target = shift;
 
-       (my $cc,my $cflags,my $unistd,my $thread_cflag,my $sys_id,my $lflags,
-       my $bn_ops,my $cpuid_obj,my $bn_obj,my $des_obj,my $aes_obj, my $bf_obj,
-       my $md5_obj,my $sha1_obj,my $cast_obj,my $rc4_obj,my $rmd160_obj,
-       my $rc5_obj,my $wp_obj,my $cmll_obj,my $modes_obj, my $engines_obj,
-       my $perlasm_scheme,my $dso_scheme,my $shared_target,my $shared_cflag,
-       my $shared_ldflag,my $shared_extension,my $ranlib,my $arflags,my $multilib)=
+       my ($cc, $cflags, $unistd, $thread_cflag, $sys_id, $lflags,
+           $bn_ops, $cpuid_obj, $bn_obj, $ec_obj, $des_obj, $aes_obj, $bf_obj,
+           $md5_obj, $sha1_obj, $cast_obj, $rc4_obj, $rmd160_obj,
+           $rc5_obj, $wp_obj, $cmll_obj, $modes_obj, $engines_obj,
+           $perlasm_scheme, $dso_scheme, $shared_target, $shared_cflag,
+           $shared_ldflag, $shared_extension, $ranlib, $arflags, $multilib)=
        split(/\s*:\s*/,$table{$target} . ":" x 30 , -1);
                        
        print <<EOF
@@ -2126,6 +2211,7 @@ sub print_table_entry
 \$bn_ops       = $bn_ops
 \$cpuid_obj    = $cpuid_obj
 \$bn_obj       = $bn_obj
+\$ec_obj       = $ec_obj
 \$des_obj      = $des_obj
 \$aes_obj      = $aes_obj
 \$bf_obj       = $bf_obj
diff --git a/FAQ b/FAQ
index f8ea604..3be8310 100644 (file)
--- a/FAQ
+++ b/FAQ
@@ -83,7 +83,7 @@ OpenSSL  -  Frequently Asked Questions
 * Which is the current version of OpenSSL?
 
 The current version is available from <URL: http://www.openssl.org>.
-OpenSSL 1.0.1e was released on Feb 11th, 2013.
+OpenSSL 1.0.1a was released on Apr 19th, 2012.
 
 In addition to the current stable release, you can also access daily
 snapshots of the OpenSSL development version at <URL:
@@ -184,14 +184,18 @@ Therefore the answer to the common question "when will feature X be
 backported to OpenSSL 1.0.0/0.9.8?" is "never" but it could appear
 in the next minor release.
 
+* What happens when the letter release reaches z?
+
+It was decided after the release of OpenSSL 0.9.8y the next version should
+be 0.9.8za then 0.9.8zb and so on.
+
+
 [LEGAL] =======================================================================
 
 * Do I need patent licenses to use OpenSSL?
 
-The patents section of the README file lists patents that may apply to
-you if you want to use OpenSSL.  For information on intellectual
-property rights, please consult a lawyer.  The OpenSSL team does not
-offer legal advice.
+For information on intellectual property rights, please consult a lawyer.
+The OpenSSL team does not offer legal advice.
 
 You can configure OpenSSL so as not to use IDEA, MDC2 and RC5 by using
  ./config no-idea no-mdc2 no-rc5
@@ -608,8 +612,8 @@ valid for the current DOS session.
 * What is special about OpenSSL on Redhat?
 
 Red Hat Linux (release 7.0 and later) include a preinstalled limited
-version of OpenSSL. For patent reasons, support for IDEA, RC5 and MDC2
-is disabled in this version. The same may apply to other Linux distributions.
+version of OpenSSL. Red Hat has chosen to disable support for IDEA, RC5 and
+MDC2 in this version. The same may apply to other Linux distributions.
 Users may therefore wish to install more or all of the features left out.
 
 To do this you MUST ensure that you do not overwrite the openssl that is in
@@ -632,11 +636,6 @@ relevant updates in packages up to and including 0.9.6b.
 A possible way around this is to persuade Red Hat to produce a non-US
 version of Red Hat Linux.
 
-FYI: Patent numbers and expiry dates of US patents:
-MDC-2: 4,908,861 13/03/2007
-IDEA:  5,214,703 25/05/2010
-RC5:   5,724,428 03/03/2015
-
 
 * Why does the OpenSSL compilation fail on MacOS X?
 
@@ -862,7 +861,7 @@ The opposite assumes we already have len bytes in buf:
  p = buf;
  p7 = d2i_PKCS7(NULL, &p, len);
 
-At this point p7 contains a valid PKCS7 structure of NULL if an error
+At this point p7 contains a valid PKCS7 structure or NULL if an error
 occurred. If an error occurred ERR_print_errors(bio) should give more
 information.
 
@@ -874,6 +873,21 @@ that has been read or written. This may well be uninitialized data
 and attempts to free the buffer will have unpredictable results
 because it no longer points to the same address.
 
+Memory allocation and encoding can also be combined in a single
+operation by the ASN1 routines:
+
+ unsigned char *buf = NULL;    /* mandatory */
+ int len;
+ len = i2d_PKCS7(p7, &buf);
+ if (len < 0)
+       /* Error */
+ /* Do some things with 'buf' */
+ /* Finished with buf: free it */
+ OPENSSL_free(buf);
+
+In this special case the "buf" parameter is *not* incremented, it points
+to the start of the encoding.
+
 
 * OpenSSL uses DER but I need BER format: does OpenSSL support BER?
 
diff --git a/GitConfigure b/GitConfigure
new file mode 100755 (executable)
index 0000000..bbab33e
--- /dev/null
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+BRANCH=`git rev-parse --abbrev-ref HEAD`
+
+./Configure $@ no-symlinks
+make files
+util/mk1mf.pl OUT=out.$BRANCH TMP=tmp.$BRANCH INC=inc.$BRANCH copy > makefile.$BRANCH
+make -f makefile.$BRANCH init
diff --git a/GitMake b/GitMake
new file mode 100755 (executable)
index 0000000..47beffd
--- /dev/null
+++ b/GitMake
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+BRANCH=`git rev-parse --abbrev-ref HEAD`
+
+make -f makefile.$BRANCH $@
index cd1c08c..f361472 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -4,16 +4,16 @@
 ## Makefile for OpenSSL
 ##
 
-VERSION=1.0.1p
+VERSION=1.0.2d
 MAJOR=1
-MINOR=0.1
+MINOR=0.2
 SHLIB_VERSION_NUMBER=1.0.0
 SHLIB_VERSION_HISTORY=
 SHLIB_MAJOR=1
 SHLIB_MINOR=0.0
 SHLIB_EXT=
 PLATFORM=dist
-OPTIONS= no-ec_nistp_64_gcc_128 no-gmp no-jpake no-krb5 no-md2 no-rc5 no-rfc3779 no-sctp no-shared no-store no-unit-test no-zlib no-zlib-dynamic static-engine
+OPTIONS= no-ec_nistp_64_gcc_128 no-gmp no-jpake no-krb5 no-libunbound no-md2 no-rc5 no-rfc3779 no-sctp no-shared no-ssl-trace no-store no-unit-test no-zlib no-zlib-dynamic static-engine
 CONFIGURE_ARGS=dist
 SHLIB_TARGET=
 
@@ -61,7 +61,7 @@ OPENSSLDIR=/usr/local/ssl
 
 CC= cc
 CFLAG= -O
-DEPFLAG= -DOPENSSL_NO_EC_NISTP_64_GCC_128 -DOPENSSL_NO_GMP -DOPENSSL_NO_JPAKE -DOPENSSL_NO_MD2 -DOPENSSL_NO_RC5 -DOPENSSL_NO_RFC3779 -DOPENSSL_NO_SCTP -DOPENSSL_NO_STORE -DOPENSSL_NO_UNIT_TEST
+DEPFLAG= -DOPENSSL_NO_EC_NISTP_64_GCC_128 -DOPENSSL_NO_GMP -DOPENSSL_NO_JPAKE -DOPENSSL_NO_LIBUNBOUND -DOPENSSL_NO_MD2 -DOPENSSL_NO_RC5 -DOPENSSL_NO_RFC3779 -DOPENSSL_NO_SCTP -DOPENSSL_NO_SSL_TRACE -DOPENSSL_NO_STORE -DOPENSSL_NO_UNIT_TEST
 PEX_LIBS= 
 EX_LIBS= 
 EXE_EXT= 
@@ -71,7 +71,7 @@ RANLIB= /usr/bin/ranlib
 NM= nm
 PERL= /usr/bin/perl
 TAR= tar
-TARFLAGS= --no-recursion --record-size=10240
+TARFLAGS= --no-recursion
 MAKEDEPPROG=makedepend
 LIBDIR=lib
 
@@ -90,6 +90,7 @@ PROCESSOR=
 # CPUID module collects small commonly used assembler snippets
 CPUID_OBJ= mem_clr.o
 BN_ASM= bn_asm.o
+EC_ASM= 
 DES_ENC= des_enc.o fcrypt_b.o
 AES_ENC= aes_core.o aes_cbc.o
 BF_ENC= bf_enc.o
@@ -223,8 +224,8 @@ BUILDENV=   PLATFORM='$(PLATFORM)' PROCESSOR='$(PROCESSOR)' \
                EXE_EXT='$(EXE_EXT)' SHARED_LIBS='$(SHARED_LIBS)'       \
                SHLIB_EXT='$(SHLIB_EXT)' SHLIB_TARGET='$(SHLIB_TARGET)' \
                PEX_LIBS='$(PEX_LIBS)' EX_LIBS='$(EX_LIBS)'     \
-               CPUID_OBJ='$(CPUID_OBJ)'                        \
-               BN_ASM='$(BN_ASM)' DES_ENC='$(DES_ENC)'         \
+               CPUID_OBJ='$(CPUID_OBJ)' BN_ASM='$(BN_ASM)'     \
+               EC_ASM='$(EC_ASM)' DES_ENC='$(DES_ENC)'         \
                AES_ENC='$(AES_ENC)' CMLL_ENC='$(CMLL_ENC)'     \
                BF_ENC='$(BF_ENC)' CAST_ENC='$(CAST_ENC)'       \
                RC4_ENC='$(RC4_ENC)' RC5_ENC='$(RC5_ENC)'       \
@@ -332,7 +333,7 @@ clean-shared:
                        done; \
                fi; \
                ( set -x; rm -f lib$$i$(SHLIB_EXT) ); \
-               if [ "$(PLATFORM)" = "Cygwin" ]; then \
+               if expr "$(PLATFORM)" : "Cygwin" >/dev/null; then \
                        ( set -x; rm -f cyg$$i$(SHLIB_EXT) lib$$i$(SHLIB_EXT).a ); \
                fi; \
        done
@@ -381,11 +382,11 @@ libssl.pc: Makefile
            echo 'libdir=$${exec_prefix}/$(LIBDIR)'; \
            echo 'includedir=$${prefix}/include'; \
            echo ''; \
-           echo 'Name: OpenSSL'; \
+           echo 'Name: OpenSSL-libssl'; \
            echo 'Description: Secure Sockets Layer and cryptography libraries'; \
            echo 'Version: '$(VERSION); \
-           echo 'Requires'; \
-           echo 'Libs: -L$${libdir} -lssl -lcrypto'; \
+           echo 'Requires.private: libcrypto'; \
+           echo 'Libs: -L$${libdir} -lssl'; \
            echo 'Libs.private: $(EX_LIBS)'; \
            echo 'Cflags: -I$${includedir} $(KRB5_INCLUDES)' ) > libssl.pc
 
@@ -398,10 +399,7 @@ openssl.pc: Makefile
            echo 'Name: OpenSSL'; \
            echo 'Description: Secure Sockets Layer and cryptography libraries and tools'; \
            echo 'Version: '$(VERSION); \
-           echo 'Requires: '; \
-           echo 'Libs: -L$${libdir} -lssl -lcrypto'; \
-           echo 'Libs.private: $(EX_LIBS)'; \
-           echo 'Cflags: -I$${includedir} $(KRB5_INCLUDES)' ) > openssl.pc
+           echo 'Requires: libssl libcrypto' ) > openssl.pc
 
 Makefile: Makefile.org Configure config
        @echo "Makefile is older than Makefile.org, Configure or config."
@@ -564,11 +562,7 @@ install_sw:
                do \
                        if [ -f "$$i" -o -f "$$i.a" ]; then \
                        (       echo installing $$i; \
-                               if [ "$(PLATFORM)" != "Cygwin" ]; then \
-                                       cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \
-                                       chmod 555 $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \
-                                       mv -f $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i; \
-                               else \
+                               if expr "$(PLATFORM)" : "Cygwin" >/dev/null; then \
                                        c=`echo $$i | sed 's/^lib\(.*\)\.dll\.a/cyg\1-$(SHLIB_VERSION_NUMBER).dll/'`; \
                                        cp $$c $(INSTALL_PREFIX)$(INSTALLTOP)/bin/$$c.new; \
                                        chmod 755 $(INSTALL_PREFIX)$(INSTALLTOP)/bin/$$c.new; \
@@ -576,6 +570,10 @@ install_sw:
                                        cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \
                                        chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \
                                        mv -f $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i; \
+                               else \
+                                       cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \
+                                       chmod 555 $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \
+                                       mv -f $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i; \
                                fi ); \
                                if expr $(PLATFORM) : 'mingw' > /dev/null; then \
                                (       case $$i in \
@@ -608,6 +606,10 @@ install_sw:
 
 install_html_docs:
        here="`pwd`"; \
+       filecase=; \
+       case "$(PLATFORM)" in DJGPP|Cygwin*|mingw*|darwin*-*-cc) \
+               filecase=-i; \
+       esac; \
        for subdir in apps crypto ssl; do \
                mkdir -p $(INSTALL_PREFIX)$(HTMLDIR)/$$subdir; \
                for i in doc/$$subdir/*.pod; do \
@@ -636,9 +638,9 @@ install_docs:
        @pod2man="`cd ./util; ./pod2mantest $(PERL)`"; \
        here="`pwd`"; \
        filecase=; \
-       if [ "$(PLATFORM)" = "DJGPP" -o "$(PLATFORM)" = "Cygwin" -o "$(PLATFORM)" = "mingw" ]; then \
+       case "$(PLATFORM)" in DJGPP|Cygwin*|mingw*|darwin*-*-cc) \
                filecase=-i; \
-       fi; \
+       esac; \
        set -e; for i in doc/apps/*.pod; do \
                fn=`basename $$i .pod`; \
                sec=`$(PERL) util/extract-section.pl 1 < $$i`; \
index e7aa391..9a3e50d 100644 (file)
@@ -4,16 +4,16 @@
 ## Makefile for OpenSSL
 ##
 
-VERSION=1.0.1p-dev
+VERSION=1.0.2d-dev
 MAJOR=1
-MINOR=0.1
+MINOR=0.2
 SHLIB_VERSION_NUMBER=1.0.0
 SHLIB_VERSION_HISTORY=
 SHLIB_MAJOR=1
 SHLIB_MINOR=0.0
 SHLIB_EXT=
 PLATFORM=gcc
-OPTIONS= no-ec_nistp_64_gcc_128 no-gmp no-jpake no-krb5 no-md2 no-rc5 no-rfc3779 no-sctp no-shared no-store no-unit-test no-zlib no-zlib-dynamic static-engine
+OPTIONS= no-ec_nistp_64_gcc_128 no-gmp no-jpake no-krb5 no-libunbound no-md2 no-rc5 no-rfc3779 no-sctp no-shared no-ssl-trace no-store no-unit-test no-zlib no-zlib-dynamic static-engine
 CONFIGURE_ARGS=gcc
 SHLIB_TARGET=
 
@@ -61,7 +61,7 @@ OPENSSLDIR=/usr/local/ssl
 
 CC= gcc
 CFLAG= -O3
-DEPFLAG= -DOPENSSL_NO_EC_NISTP_64_GCC_128 -DOPENSSL_NO_GMP -DOPENSSL_NO_JPAKE -DOPENSSL_NO_MD2 -DOPENSSL_NO_RC5 -DOPENSSL_NO_RFC3779 -DOPENSSL_NO_SCTP -DOPENSSL_NO_STORE -DOPENSSL_NO_UNIT_TEST
+DEPFLAG= -DOPENSSL_NO_EC_NISTP_64_GCC_128 -DOPENSSL_NO_GMP -DOPENSSL_NO_JPAKE -DOPENSSL_NO_LIBUNBOUND -DOPENSSL_NO_MD2 -DOPENSSL_NO_RC5 -DOPENSSL_NO_RFC3779 -DOPENSSL_NO_SCTP -DOPENSSL_NO_SSL_TRACE -DOPENSSL_NO_STORE -DOPENSSL_NO_UNIT_TEST
 PEX_LIBS= 
 EX_LIBS= 
 EXE_EXT= 
@@ -71,7 +71,7 @@ RANLIB= /usr/bin/ranlib
 NM= nm
 PERL= /usr/bin/perl
 TAR= tar
-TARFLAGS= --no-recursion --record-size=10240
+TARFLAGS= --no-recursion
 MAKEDEPPROG= gcc
 LIBDIR=lib
 
@@ -90,6 +90,7 @@ PROCESSOR=
 # CPUID module collects small commonly used assembler snippets
 CPUID_OBJ= mem_clr.o
 BN_ASM= bn_asm.o
+EC_ASM= 
 DES_ENC= des_enc.o fcrypt_b.o
 AES_ENC= aes_core.o aes_cbc.o
 BF_ENC= bf_enc.o
@@ -223,8 +224,8 @@ BUILDENV=   PLATFORM='$(PLATFORM)' PROCESSOR='$(PROCESSOR)' \
                EXE_EXT='$(EXE_EXT)' SHARED_LIBS='$(SHARED_LIBS)'       \
                SHLIB_EXT='$(SHLIB_EXT)' SHLIB_TARGET='$(SHLIB_TARGET)' \
                PEX_LIBS='$(PEX_LIBS)' EX_LIBS='$(EX_LIBS)'     \
-               CPUID_OBJ='$(CPUID_OBJ)'                        \
-               BN_ASM='$(BN_ASM)' DES_ENC='$(DES_ENC)'         \
+               CPUID_OBJ='$(CPUID_OBJ)' BN_ASM='$(BN_ASM)'     \
+               EC_ASM='$(EC_ASM)' DES_ENC='$(DES_ENC)'         \
                AES_ENC='$(AES_ENC)' CMLL_ENC='$(CMLL_ENC)'     \
                BF_ENC='$(BF_ENC)' CAST_ENC='$(CAST_ENC)'       \
                RC4_ENC='$(RC4_ENC)' RC5_ENC='$(RC5_ENC)'       \
@@ -332,7 +333,7 @@ clean-shared:
                        done; \
                fi; \
                ( set -x; rm -f lib$$i$(SHLIB_EXT) ); \
-               if [ "$(PLATFORM)" = "Cygwin" ]; then \
+               if expr "$(PLATFORM)" : "Cygwin" >/dev/null; then \
                        ( set -x; rm -f cyg$$i$(SHLIB_EXT) lib$$i$(SHLIB_EXT).a ); \
                fi; \
        done
@@ -381,11 +382,11 @@ libssl.pc: Makefile
            echo 'libdir=$${exec_prefix}/$(LIBDIR)'; \
            echo 'includedir=$${prefix}/include'; \
            echo ''; \
-           echo 'Name: OpenSSL'; \
+           echo 'Name: OpenSSL-libssl'; \
            echo 'Description: Secure Sockets Layer and cryptography libraries'; \
            echo 'Version: '$(VERSION); \
-           echo 'Requires'; \
-           echo 'Libs: -L$${libdir} -lssl -lcrypto'; \
+           echo 'Requires.private: libcrypto'; \
+           echo 'Libs: -L$${libdir} -lssl'; \
            echo 'Libs.private: $(EX_LIBS)'; \
            echo 'Cflags: -I$${includedir} $(KRB5_INCLUDES)' ) > libssl.pc
 
@@ -398,10 +399,7 @@ openssl.pc: Makefile
            echo 'Name: OpenSSL'; \
            echo 'Description: Secure Sockets Layer and cryptography libraries and tools'; \
            echo 'Version: '$(VERSION); \
-           echo 'Requires: '; \
-           echo 'Libs: -L$${libdir} -lssl -lcrypto'; \
-           echo 'Libs.private: $(EX_LIBS)'; \
-           echo 'Cflags: -I$${includedir} $(KRB5_INCLUDES)' ) > openssl.pc
+           echo 'Requires: libssl libcrypto' ) > openssl.pc
 
 Makefile: Makefile.org Configure config
        @echo "Makefile is older than Makefile.org, Configure or config."
@@ -564,11 +562,7 @@ install_sw:
                do \
                        if [ -f "$$i" -o -f "$$i.a" ]; then \
                        (       echo installing $$i; \
-                               if [ "$(PLATFORM)" != "Cygwin" ]; then \
-                                       cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \
-                                       chmod 555 $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \
-                                       mv -f $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i; \
-                               else \
+                               if expr "$(PLATFORM)" : "Cygwin" >/dev/null; then \
                                        c=`echo $$i | sed 's/^lib\(.*\)\.dll\.a/cyg\1-$(SHLIB_VERSION_NUMBER).dll/'`; \
                                        cp $$c $(INSTALL_PREFIX)$(INSTALLTOP)/bin/$$c.new; \
                                        chmod 755 $(INSTALL_PREFIX)$(INSTALLTOP)/bin/$$c.new; \
@@ -576,6 +570,10 @@ install_sw:
                                        cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \
                                        chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \
                                        mv -f $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i; \
+                               else \
+                                       cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \
+                                       chmod 555 $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \
+                                       mv -f $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i; \
                                fi ); \
                                if expr $(PLATFORM) : 'mingw' > /dev/null; then \
                                (       case $$i in \
@@ -608,6 +606,10 @@ install_sw:
 
 install_html_docs:
        here="`pwd`"; \
+       filecase=; \
+       case "$(PLATFORM)" in DJGPP|Cygwin*|mingw*|darwin*-*-cc) \
+               filecase=-i; \
+       esac; \
        for subdir in apps crypto ssl; do \
                mkdir -p $(INSTALL_PREFIX)$(HTMLDIR)/$$subdir; \
                for i in doc/$$subdir/*.pod; do \
@@ -636,9 +638,9 @@ install_docs:
        @pod2man="`cd ./util; ./pod2mantest $(PERL)`"; \
        here="`pwd`"; \
        filecase=; \
-       if [ "$(PLATFORM)" = "DJGPP" -o "$(PLATFORM)" = "Cygwin" -o "$(PLATFORM)" = "mingw" ]; then \
+       case "$(PLATFORM)" in DJGPP|Cygwin*|mingw*|darwin*-*-cc) \
                filecase=-i; \
-       fi; \
+       esac; \
        set -e; for i in doc/apps/*.pod; do \
                fn=`basename $$i .pod`; \
                sec=`$(PERL) util/extract-section.pl 1 < $$i`; \
index 55a3700..d77e264 100644 (file)
@@ -69,7 +69,7 @@ RANLIB= ranlib
 NM= nm
 PERL= perl
 TAR= tar
-TARFLAGS= --no-recursion --record-size=10240
+TARFLAGS= --no-recursion
 MAKEDEPPROG=makedepend
 LIBDIR=lib
 
@@ -88,6 +88,7 @@ PROCESSOR=
 # CPUID module collects small commonly used assembler snippets
 CPUID_OBJ= 
 BN_ASM= bn_asm.o
+EC_ASM=
 DES_ENC= des_enc.o fcrypt_b.o
 AES_ENC= aes_core.o aes_cbc.o
 BF_ENC= bf_enc.o
@@ -221,8 +222,8 @@ BUILDENV=   PLATFORM='$(PLATFORM)' PROCESSOR='$(PROCESSOR)' \
                EXE_EXT='$(EXE_EXT)' SHARED_LIBS='$(SHARED_LIBS)'       \
                SHLIB_EXT='$(SHLIB_EXT)' SHLIB_TARGET='$(SHLIB_TARGET)' \
                PEX_LIBS='$(PEX_LIBS)' EX_LIBS='$(EX_LIBS)'     \
-               CPUID_OBJ='$(CPUID_OBJ)'                        \
-               BN_ASM='$(BN_ASM)' DES_ENC='$(DES_ENC)'         \
+               CPUID_OBJ='$(CPUID_OBJ)' BN_ASM='$(BN_ASM)'     \
+               EC_ASM='$(EC_ASM)' DES_ENC='$(DES_ENC)'         \
                AES_ENC='$(AES_ENC)' CMLL_ENC='$(CMLL_ENC)'     \
                BF_ENC='$(BF_ENC)' CAST_ENC='$(CAST_ENC)'       \
                RC4_ENC='$(RC4_ENC)' RC5_ENC='$(RC5_ENC)'       \
@@ -330,7 +331,7 @@ clean-shared:
                        done; \
                fi; \
                ( set -x; rm -f lib$$i$(SHLIB_EXT) ); \
-               if [ "$(PLATFORM)" = "Cygwin" ]; then \
+               if expr "$(PLATFORM)" : "Cygwin" >/dev/null; then \
                        ( set -x; rm -f cyg$$i$(SHLIB_EXT) lib$$i$(SHLIB_EXT).a ); \
                fi; \
        done
@@ -379,11 +380,11 @@ libssl.pc: Makefile
            echo 'libdir=$${exec_prefix}/$(LIBDIR)'; \
            echo 'includedir=$${prefix}/include'; \
            echo ''; \
-           echo 'Name: OpenSSL'; \
+           echo 'Name: OpenSSL-libssl'; \
            echo 'Description: Secure Sockets Layer and cryptography libraries'; \
            echo 'Version: '$(VERSION); \
-           echo 'Requires'; \
-           echo 'Libs: -L$${libdir} -lssl -lcrypto'; \
+           echo 'Requires.private: libcrypto'; \
+           echo 'Libs: -L$${libdir} -lssl'; \
            echo 'Libs.private: $(EX_LIBS)'; \
            echo 'Cflags: -I$${includedir} $(KRB5_INCLUDES)' ) > libssl.pc
 
@@ -396,10 +397,7 @@ openssl.pc: Makefile
            echo 'Name: OpenSSL'; \
            echo 'Description: Secure Sockets Layer and cryptography libraries and tools'; \
            echo 'Version: '$(VERSION); \
-           echo 'Requires: '; \
-           echo 'Libs: -L$${libdir} -lssl -lcrypto'; \
-           echo 'Libs.private: $(EX_LIBS)'; \
-           echo 'Cflags: -I$${includedir} $(KRB5_INCLUDES)' ) > openssl.pc
+           echo 'Requires: libssl libcrypto' ) > openssl.pc
 
 Makefile: Makefile.org Configure config
        @echo "Makefile is older than Makefile.org, Configure or config."
@@ -562,11 +560,7 @@ install_sw:
                do \
                        if [ -f "$$i" -o -f "$$i.a" ]; then \
                        (       echo installing $$i; \
-                               if [ "$(PLATFORM)" != "Cygwin" ]; then \
-                                       cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \
-                                       chmod 555 $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \
-                                       mv -f $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i; \
-                               else \
+                               if expr "$(PLATFORM)" : "Cygwin" >/dev/null; then \
                                        c=`echo $$i | sed 's/^lib\(.*\)\.dll\.a/cyg\1-$(SHLIB_VERSION_NUMBER).dll/'`; \
                                        cp $$c $(INSTALL_PREFIX)$(INSTALLTOP)/bin/$$c.new; \
                                        chmod 755 $(INSTALL_PREFIX)$(INSTALLTOP)/bin/$$c.new; \
@@ -574,6 +568,10 @@ install_sw:
                                        cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \
                                        chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \
                                        mv -f $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i; \
+                               else \
+                                       cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \
+                                       chmod 555 $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \
+                                       mv -f $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i; \
                                fi ); \
                                if expr $(PLATFORM) : 'mingw' > /dev/null; then \
                                (       case $$i in \
@@ -606,6 +604,10 @@ install_sw:
 
 install_html_docs:
        here="`pwd`"; \
+       filecase=; \
+       case "$(PLATFORM)" in DJGPP|Cygwin*|mingw*|darwin*-*-cc) \
+               filecase=-i; \
+       esac; \
        for subdir in apps crypto ssl; do \
                mkdir -p $(INSTALL_PREFIX)$(HTMLDIR)/$$subdir; \
                for i in doc/$$subdir/*.pod; do \
@@ -634,9 +636,9 @@ install_docs:
        @pod2man="`cd ./util; ./pod2mantest $(PERL)`"; \
        here="`pwd`"; \
        filecase=; \
-       if [ "$(PLATFORM)" = "DJGPP" -o "$(PLATFORM)" = "Cygwin" -o "$(PLATFORM)" = "mingw" ]; then \
+       case "$(PLATFORM)" in DJGPP|Cygwin*|mingw*|darwin*-*-cc) \
                filecase=-i; \
-       fi; \
+       esac; \
        set -e; for i in doc/apps/*.pod; do \
                fn=`basename $$i .pod`; \
                sec=`$(PERL) util/extract-section.pl 1 < $$i`; \
diff --git a/NEWS b/NEWS
index 5e76d3f..29e4805 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -5,15 +5,15 @@
   This file gives a brief overview of the major changes between each OpenSSL
   release. For more details please read the CHANGES file.
 
-  Major changes between OpenSSL 1.0.1o and OpenSSL 1.0.1p [9 Jul 2015]
+  Major changes between OpenSSL 1.0.2c and OpenSSL 1.0.2d [9 Jul 2015]
 
       o Alternate chains certificate forgery (CVE-2015-1793)
 
-  Major changes between OpenSSL 1.0.1n and OpenSSL 1.0.1o [12 Jun 2015]
+  Major changes between OpenSSL 1.0.2b and OpenSSL 1.0.2c [12 Jun 2015]
 
       o Fix HMAC ABI incompatibility
 
-  Major changes between OpenSSL 1.0.1m and OpenSSL 1.0.1n [11 Jun 2015]
+  Major changes between OpenSSL 1.0.2a and OpenSSL 1.0.2b [11 Jun 2015]
 
       o Malformed ECParameters causes infinite loop (CVE-2015-1788)
       o Exploitable out-of-bounds read in X509_cmp_time (CVE-2015-1789)
       o CMS verify infinite loop with unknown hash function (CVE-2015-1792)
       o Race condition handling NewSessionTicket (CVE-2015-1791)
 
-  Major changes between OpenSSL 1.0.1l and OpenSSL 1.0.1m [19 Mar 2015]
+  Major changes between OpenSSL 1.0.2 and OpenSSL 1.0.2a [19 Mar 2015]
 
+      o OpenSSL 1.0.2 ClientHello sigalgs DoS fix (CVE-2015-0291)
+      o Multiblock corrupted pointer fix (CVE-2015-0290)
+      o Segmentation fault in DTLSv1_listen fix (CVE-2015-0207)
       o Segmentation fault in ASN1_TYPE_cmp fix (CVE-2015-0286)
+      o Segmentation fault for invalid PSS parameters fix (CVE-2015-0208)
       o ASN.1 structure reuse memory corruption fix (CVE-2015-0287)
       o PKCS7 NULL pointer dereferences fix (CVE-2015-0289)
       o DoS via reachable assert in SSLv2 servers fix (CVE-2015-0293)
+      o Empty CKE with client auth and DHE fix (CVE-2015-1787)
+      o Handshake with unseeded PRNG fix (CVE-2015-0285)
       o Use After Free following d2i_ECPrivatekey error fix (CVE-2015-0209)
       o X509_to_X509_REQ NULL pointer deref fix (CVE-2015-0288)
       o Removed the export ciphers from the DEFAULT ciphers
 
+  Major changes between OpenSSL 1.0.1l and OpenSSL 1.0.2 [22 Jan 2015]:
+
+      o Suite B support for TLS 1.2 and DTLS 1.2
+      o Support for DTLS 1.2
+      o TLS automatic EC curve selection.
+      o API to set TLS supported signature algorithms and curves
+      o SSL_CONF configuration API.
+      o TLS Brainpool support.
+      o ALPN support.
+      o CMS support for RSA-PSS, RSA-OAEP, ECDH and X9.42 DH.
+
   Major changes between OpenSSL 1.0.1k and OpenSSL 1.0.1l [15 Jan 2015]
 
       o Build fixes for the Windows and OpenVMS platforms
diff --git a/README b/README
index bf03f30..493141b 100644 (file)
--- a/README
+++ b/README
@@ -1,5 +1,5 @@
 
- OpenSSL 1.0.1p 9 Jul 2015
+ OpenSSL 1.0.2d 9 Jul 2015
 
  Copyright (c) 1998-2011 The OpenSSL Project
  Copyright (c) 1995-1998 Eric A. Young, Tim J. Hudson
         SSL/TLS Client and Server Tests
         Handling of S/MIME signed or encrypted mail
 
-
- PATENTS
- -------
-
- Various companies hold various patents for various algorithms in various
- locations around the world. _YOU_ are responsible for ensuring that your use
- of any algorithms is legal by checking if there are any patents in your
- country.  The file contains some of the patents that we know about or are
- rumored to exist. This is not a definitive list.
-
- RSA Security holds software patents on the RC5 algorithm.  If you
- intend to use this cipher, you must contact RSA Security for
- licensing conditions. Their web page is http://www.rsasecurity.com/.
-
- RC4 is a trademark of RSA Security, so use of this label should perhaps
- only be used with RSA Security's permission.
-
- The IDEA algorithm is patented by Ascom in Austria, France, Germany, Italy,
- Japan, the Netherlands, Spain, Sweden, Switzerland, UK and the USA.  They
- should be contacted if that algorithm is to be used; their web page is
- http://www.ascom.ch/.
-
- NTT and Mitsubishi have patents and pending patents on the Camellia
- algorithm, but allow use at no charge without requiring an explicit
- licensing agreement: http://info.isl.ntt.co.jp/crypt/eng/info/chiteki.html
-
  INSTALLATION
  ------------
 
     - Problem Description (steps that will reproduce the problem, if known)
     - Stack Traceback (if the application dumps core)
 
- Report the bug to the OpenSSL project via the Request Tracker
- (http://www.openssl.org/support/rt.html) by mail to:
+ Email the report to:
 
     openssl-bugs@openssl.org
 
  or support queries. Just because something doesn't work the way you expect
  does not mean it is necessarily a bug in OpenSSL.
 
- Note that mail to openssl-bugs@openssl.org is recorded in the publicly
- readable request tracker database and is forwarded to a public
- mailing list. Confidential mail may be sent to openssl-security@openssl.org
- (PGP key available from the key servers).
+ Note that mail to openssl-bugs@openssl.org is recorded in the public
+ request tracker database (see https://www.openssl.org/support/rt.html
+ for details) and also forwarded to a public mailing list. Confidential
+ mail may be sent to openssl-security@openssl.org (PGP key available from
+ the key servers).
 
  HOW TO CONTRIBUTE TO OpenSSL
  ----------------------------
index 6801238..7478fc3 100644 (file)
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#if !defined(OPENSSL_SYSNAME_WIN32) && !defined(NETWARE_CLIB)
+#if !defined(OPENSSL_SYSNAME_WIN32) && !defined(OPENSSL_SYSNAME_WINCE) && !defined(NETWARE_CLIB)
 # include <strings.h>
 #endif
 #include <sys/types.h>
@@ -285,6 +285,8 @@ int str2fmt(char *s)
         return (FORMAT_PKCS12);
     else if ((*s == 'E') || (*s == 'e'))
         return (FORMAT_ENGINE);
+    else if ((*s == 'H') || (*s == 'h'))
+        return FORMAT_HTTP;
     else if ((*s == 'P') || (*s == 'p')) {
         if (s[1] == 'V' || s[1] == 'v')
             return FORMAT_PVK;
@@ -787,12 +789,72 @@ static int load_pkcs12(BIO *err, BIO *in, const char *desc,
     return ret;
 }
 
+int load_cert_crl_http(const char *url, BIO *err,
+                       X509 **pcert, X509_CRL **pcrl)
+{
+    char *host = NULL, *port = NULL, *path = NULL;
+    BIO *bio = NULL;
+    OCSP_REQ_CTX *rctx = NULL;
+    int use_ssl, rv = 0;
+    if (!OCSP_parse_url(url, &host, &port, &path, &use_ssl))
+        goto err;
+    if (use_ssl) {
+        if (err)
+            BIO_puts(err, "https not supported\n");
+        goto err;
+    }
+    bio = BIO_new_connect(host);
+    if (!bio || !BIO_set_conn_port(bio, port))
+        goto err;
+    rctx = OCSP_REQ_CTX_new(bio, 1024);
+    if (!rctx)
+        goto err;
+    if (!OCSP_REQ_CTX_http(rctx, "GET", path))
+        goto err;
+    if (!OCSP_REQ_CTX_add1_header(rctx, "Host", host))
+        goto err;
+    if (pcert) {
+        do {
+            rv = X509_http_nbio(rctx, pcert);
+        }
+        while (rv == -1);
+    } else {
+        do {
+            rv = X509_CRL_http_nbio(rctx, pcrl);
+        } while (rv == -1);
+    }
+
+ err:
+    if (host)
+        OPENSSL_free(host);
+    if (path)
+        OPENSSL_free(path);
+    if (port)
+        OPENSSL_free(port);
+    if (bio)
+        BIO_free_all(bio);
+    if (rctx)
+        OCSP_REQ_CTX_free(rctx);
+    if (rv != 1) {
+        if (bio && err)
+            BIO_printf(bio_err, "Error loading %s from %s\n",
+                       pcert ? "certificate" : "CRL", url);
+        ERR_print_errors(bio_err);
+    }
+    return rv;
+}
+
 X509 *load_cert(BIO *err, const char *file, int format,
                 const char *pass, ENGINE *e, const char *cert_descrip)
 {
     X509 *x = NULL;
     BIO *cert;
 
+    if (format == FORMAT_HTTP) {
+        load_cert_crl_http(file, err, &x, NULL);
+        return x;
+    }
+
     if ((cert = BIO_new(BIO_s_file())) == NULL) {
         ERR_print_errors(err);
         goto end;
@@ -850,6 +912,49 @@ X509 *load_cert(BIO *err, const char *file, int format,
     return (x);
 }
 
+X509_CRL *load_crl(const char *infile, int format)
+{
+    X509_CRL *x = NULL;
+    BIO *in = NULL;
+
+    if (format == FORMAT_HTTP) {
+        load_cert_crl_http(infile, bio_err, NULL, &x);
+        return x;
+    }
+
+    in = BIO_new(BIO_s_file());
+    if (in == NULL) {
+        ERR_print_errors(bio_err);
+        goto end;
+    }
+
+    if (infile == NULL)
+        BIO_set_fp(in, stdin, BIO_NOCLOSE);
+    else {
+        if (BIO_read_filename(in, infile) <= 0) {
+            perror(infile);
+            goto end;
+        }
+    }
+    if (format == FORMAT_ASN1)
+        x = d2i_X509_CRL_bio(in, NULL);
+    else if (format == FORMAT_PEM)
+        x = PEM_read_bio_X509_CRL(in, NULL, NULL, NULL);
+    else {
+        BIO_printf(bio_err, "bad input format specified for input crl\n");
+        goto end;
+    }
+    if (x == NULL) {
+        BIO_printf(bio_err, "unable to load CRL\n");
+        ERR_print_errors(bio_err);
+        goto end;
+    }
+
+ end:
+    BIO_free(in);
+    return (x);
+}
+
 EVP_PKEY *load_key(BIO *err, const char *file, int format, int maybe_stdin,
                    const char *pass, ENGINE *e, const char *key_descrip)
 {
@@ -2159,6 +2264,9 @@ int args_verify(char ***pargs, int *pargc,
     char **oldargs = *pargs;
     char *arg = **pargs, *argn = (*pargs)[1];
     time_t at_time = 0;
+    char *hostname = NULL;
+    char *email = NULL;
+    char *ipasc = NULL;
     if (!strcmp(arg, "-policy")) {
         if (!argn)
             *badarg = 1;
@@ -2212,6 +2320,21 @@ int args_verify(char ***pargs, int *pargc,
             at_time = (time_t)timestamp;
         }
         (*pargs)++;
+    } else if (strcmp(arg, "-verify_hostname") == 0) {
+        if (!argn)
+            *badarg = 1;
+        hostname = argn;
+        (*pargs)++;
+    } else if (strcmp(arg, "-verify_email") == 0) {
+        if (!argn)
+            *badarg = 1;
+        email = argn;
+        (*pargs)++;
+    } else if (strcmp(arg, "-verify_ip") == 0) {
+        if (!argn)
+            *badarg = 1;
+        ipasc = argn;
+        (*pargs)++;
     } else if (!strcmp(arg, "-ignore_critical"))
         flags |= X509_V_FLAG_IGNORE_CRITICAL;
     else if (!strcmp(arg, "-issuer_checks"))
@@ -2238,6 +2361,16 @@ int args_verify(char ***pargs, int *pargc,
         flags |= X509_V_FLAG_NOTIFY_POLICY;
     else if (!strcmp(arg, "-check_ss_sig"))
         flags |= X509_V_FLAG_CHECK_SS_SIGNATURE;
+    else if (!strcmp(arg, "-trusted_first"))
+        flags |= X509_V_FLAG_TRUSTED_FIRST;
+    else if (!strcmp(arg, "-suiteB_128_only"))
+        flags |= X509_V_FLAG_SUITEB_128_LOS_ONLY;
+    else if (!strcmp(arg, "-suiteB_128"))
+        flags |= X509_V_FLAG_SUITEB_128_LOS;
+    else if (!strcmp(arg, "-suiteB_192"))
+        flags |= X509_V_FLAG_SUITEB_192_LOS;
+    else if (!strcmp(arg, "-partial_chain"))
+        flags |= X509_V_FLAG_PARTIAL_CHAIN;
     else if (!strcmp(arg, "-no_alt_chains"))
         flags |= X509_V_FLAG_NO_ALT_CHAINS;
     else
@@ -2269,6 +2402,15 @@ int args_verify(char ***pargs, int *pargc,
     if (at_time)
         X509_VERIFY_PARAM_set_time(*pm, at_time);
 
+    if (hostname && !X509_VERIFY_PARAM_set1_host(*pm, hostname, 0))
+        *badarg = 1;
+
+    if (email && !X509_VERIFY_PARAM_set1_email(*pm, email, 0))
+        *badarg = 1;
+
+    if (ipasc && !X509_VERIFY_PARAM_set1_ip_asc(*pm, ipasc))
+        *badarg = 1;
+
  end:
 
     (*pargs)++;
@@ -2552,6 +2694,9 @@ void jpake_client_auth(BIO *out, BIO *conn, const char *secret)
 
     BIO_puts(out, "JPAKE authentication succeeded, setting PSK\n");
 
+    if (psk_key)
+        OPENSSL_free(psk_key);
+
     psk_key = BN_bn2hex(JPAKE_get_shared_key(ctx));
 
     BIO_pop(bconn);
@@ -2581,6 +2726,9 @@ void jpake_server_auth(BIO *out, BIO *conn, const char *secret)
 
     BIO_puts(out, "JPAKE authentication succeeded, setting PSK\n");
 
+    if (psk_key)
+        OPENSSL_free(psk_key);
+
     psk_key = BN_bn2hex(JPAKE_get_shared_key(ctx));
 
     BIO_pop(bconn);
@@ -2591,7 +2739,7 @@ void jpake_server_auth(BIO *out, BIO *conn, const char *secret)
 
 #endif
 
-#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG)
+#ifndef OPENSSL_NO_TLSEXT
 /*-
  * next_protos_parse parses a comma separated list of strings into a string
  * in a format suitable for passing to SSL_CTX_set_next_protos_advertised.
@@ -2630,8 +2778,106 @@ unsigned char *next_protos_parse(unsigned short *outlen, const char *in)
     *outlen = len + 1;
     return out;
 }
-#endif                          /* !OPENSSL_NO_TLSEXT &&
-                                 * !OPENSSL_NO_NEXTPROTONEG */
+#endif                          /* ndef OPENSSL_NO_TLSEXT */
+
+void print_cert_checks(BIO *bio, X509 *x,
+                       const char *checkhost,
+                       const char *checkemail, const char *checkip)
+{
+    if (x == NULL)
+        return;
+    if (checkhost) {
+        BIO_printf(bio, "Hostname %s does%s match certificate\n",
+                   checkhost, X509_check_host(x, checkhost, 0, 0, NULL) == 1
+                   ? "" : " NOT");
+    }
+
+    if (checkemail) {
+        BIO_printf(bio, "Email %s does%s match certificate\n",
+                   checkemail, X509_check_email(x, checkemail, 0,
+                                                0) ? "" : " NOT");
+    }
+
+    if (checkip) {
+        BIO_printf(bio, "IP %s does%s match certificate\n",
+                   checkip, X509_check_ip_asc(x, checkip, 0) ? "" : " NOT");
+    }
+}
+
+/* Get first http URL from a DIST_POINT structure */
+
+static const char *get_dp_url(DIST_POINT *dp)
+{
+    GENERAL_NAMES *gens;
+    GENERAL_NAME *gen;
+    int i, gtype;
+    ASN1_STRING *uri;
+    if (!dp->distpoint || dp->distpoint->type != 0)
+        return NULL;
+    gens = dp->distpoint->name.fullname;
+    for (i = 0; i < sk_GENERAL_NAME_num(gens); i++) {
+        gen = sk_GENERAL_NAME_value(gens, i);
+        uri = GENERAL_NAME_get0_value(gen, &gtype);
+        if (gtype == GEN_URI && ASN1_STRING_length(uri) > 6) {
+            char *uptr = (char *)ASN1_STRING_data(uri);
+            if (!strncmp(uptr, "http://", 7))
+                return uptr;
+        }
+    }
+    return NULL;
+}
+
+/*
+ * Look through a CRLDP structure and attempt to find an http URL to
+ * downloads a CRL from.
+ */
+
+static X509_CRL *load_crl_crldp(STACK_OF(DIST_POINT) *crldp)
+{
+    int i;
+    const char *urlptr = NULL;
+    for (i = 0; i < sk_DIST_POINT_num(crldp); i++) {
+        DIST_POINT *dp = sk_DIST_POINT_value(crldp, i);
+        urlptr = get_dp_url(dp);
+        if (urlptr)
+            return load_crl(urlptr, FORMAT_HTTP);
+    }
+    return NULL;
+}
+
+/*
+ * Example of downloading CRLs from CRLDP: not usable for real world as it
+ * always downloads, doesn't support non-blocking I/O and doesn't cache
+ * anything.
+ */
+
+static STACK_OF(X509_CRL) *crls_http_cb(X509_STORE_CTX *ctx, X509_NAME *nm)
+{
+    X509 *x;
+    STACK_OF(X509_CRL) *crls = NULL;
+    X509_CRL *crl;
+    STACK_OF(DIST_POINT) *crldp;
+    x = X509_STORE_CTX_get_current_cert(ctx);
+    crldp = X509_get_ext_d2i(x, NID_crl_distribution_points, NULL, NULL);
+    crl = load_crl_crldp(crldp);
+    sk_DIST_POINT_pop_free(crldp, DIST_POINT_free);
+    if (!crl)
+        return NULL;
+    crls = sk_X509_CRL_new_null();
+    sk_X509_CRL_push(crls, crl);
+    /* Try to download delta CRL */
+    crldp = X509_get_ext_d2i(x, NID_freshest_crl, NULL, NULL);
+    crl = load_crl_crldp(crldp);
+    sk_DIST_POINT_pop_free(crldp, DIST_POINT_free);
+    if (crl)
+        sk_X509_CRL_push(crls, crl);
+    return crls;
+}
+
+void store_setup_crl_download(X509_STORE *st)
+{
+    X509_STORE_set_lookup_crls_cb(st, crls_http_cb);
+}
 
 /*
  * Platform-specific sections
index 33b293e..8276e70 100644 (file)
@@ -205,7 +205,7 @@ extern BIO *bio_err;
 #  endif
 # endif
 
-# ifdef OPENSSL_SYSNAME_WIN32
+# if defined(OPENSSL_SYSNAME_WIN32) || defined(OPENSSL_SYSNAME_WINCE)
 #  define openssl_fdset(a,b) FD_SET((unsigned int)a, b)
 # else
 #  define openssl_fdset(a,b) FD_SET(a, b)
@@ -245,6 +245,9 @@ int app_passwd(BIO *err, char *arg1, char *arg2, char **pass1, char **pass2);
 int add_oid_section(BIO *err, CONF *conf);
 X509 *load_cert(BIO *err, const char *file, int format,
                 const char *pass, ENGINE *e, const char *cert_descrip);
+X509_CRL *load_crl(const char *infile, int format);
+int load_cert_crl_http(const char *url, BIO *err,
+                       X509 **pcert, X509_CRL **pcrl);
 EVP_PKEY *load_key(BIO *err, const char *file, int format, int maybe_stdin,
                    const char *pass, ENGINE *e, const char *key_descrip);
 EVP_PKEY *load_pubkey(BIO *err, const char *file, int format, int maybe_stdin,
@@ -262,8 +265,9 @@ ENGINE *setup_engine(BIO *err, const char *engine, int debug);
 
 # ifndef OPENSSL_NO_OCSP
 OCSP_RESPONSE *process_responder(BIO *err, OCSP_REQUEST *req,
-                                 char *host, char *path, char *port,
-                                 int use_ssl, STACK_OF(CONF_VALUE) *headers,
+                                 const char *host, const char *path,
+                                 const char *port, int use_ssl,
+                                 const STACK_OF(CONF_VALUE) *headers,
                                  int req_timeout);
 # endif
 
@@ -334,10 +338,15 @@ void jpake_client_auth(BIO *out, BIO *conn, const char *secret);
 void jpake_server_auth(BIO *out, BIO *conn, const char *secret);
 # endif
 
-# if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG)
+# ifndef OPENSSL_NO_TLSEXT
 unsigned char *next_protos_parse(unsigned short *outlen, const char *in);
-# endif                         /* !OPENSSL_NO_TLSEXT &&
-                                 * !OPENSSL_NO_NEXTPROTONEG */
+# endif                         /* ndef OPENSSL_NO_TLSEXT */
+
+void print_cert_checks(BIO *bio, X509 *x,
+                       const char *checkhost,
+                       const char *checkemail, const char *checkip);
+
+void store_setup_crl_download(X509_STORE *st);
 
 # define FORMAT_UNDEF    0
 # define FORMAT_ASN1     1
@@ -353,6 +362,7 @@ unsigned char *next_protos_parse(unsigned short *outlen, const char *in);
 # define FORMAT_ASN1RSA  10     /* DER RSAPubicKey format */
 # define FORMAT_MSBLOB   11     /* MS Key blob format */
 # define FORMAT_PVK      12     /* MS PVK file format */
+# define FORMAT_HTTP     13     /* Download using HTTP */
 
 # define EXT_COPY_NONE   0
 # define EXT_COPY_ADD    1
index 97ad0c1..3b7336c 100644 (file)
--- a/apps/ca.c
+++ b/apps/ca.c
@@ -479,6 +479,11 @@ int MAIN(int argc, char **argv)
                 goto bad;
             infile = *(++argv);
             dorevoke = 1;
+        } else if (strcmp(*argv, "-valid") == 0) {
+            if (--argc < 1)
+                goto bad;
+            infile = *(++argv);
+            dorevoke = 2;
         } else if (strcmp(*argv, "-extensions") == 0) {
             if (--argc < 1)
                 goto bad;
@@ -1441,6 +1446,8 @@ int MAIN(int argc, char **argv)
             revcert = load_cert(bio_err, infile, FORMAT_PEM, NULL, e, infile);
             if (revcert == NULL)
                 goto err;
+            if (dorevoke == 2)
+                rev_type = -1;
             j = do_revoke(revcert, db, rev_type, rev_arg);
             if (j <= 0)
                 goto err;
@@ -1968,8 +1975,12 @@ static int do_body(X509 **xret, EVP_PKEY *pkey, X509 *x509,
 
     if (enddate == NULL)
         X509_time_adj_ex(X509_get_notAfter(ret), days, 0, NULL);
-    else
+    else {
+        int tdays;
         ASN1_TIME_set_string(X509_get_notAfter(ret), enddate);
+        ASN1_TIME_diff(&tdays, NULL, NULL, X509_get_notAfter(ret));
+        days = tdays;
+    }
 
     if (!X509_set_subject_name(ret, subject))
         goto err;
@@ -2409,13 +2420,20 @@ static int do_revoke(X509 *x509, CA_DB *db, int type, char *value)
         }
 
         /* Revoke Certificate */
-        ok = do_revoke(x509, db, type, value);
+        if (type == -1)
+            ok = 1;
+        else
+            ok = do_revoke(x509, db, type, value);
 
         goto err;
 
     } else if (index_name_cmp_noconst(row, rrow)) {
         BIO_printf(bio_err, "ERROR:name does not match %s\n", row[DB_name]);
         goto err;
+    } else if (type == -1) {
+        BIO_printf(bio_err, "ERROR:Already present, serial number %s\n",
+                   row[DB_serial]);
+        goto err;
     } else if (rrow[DB_type][0] == 'R') {
         BIO_printf(bio_err, "ERROR:Already revoked, serial number %s\n",
                    row[DB_serial]);
index f299175..66636d2 100644 (file)
@@ -85,6 +85,9 @@ int MAIN(int argc, char **argv)
 {
     int ret = 1, i;
     int verbose = 0, Verbose = 0;
+#ifndef OPENSSL_NO_SSL_TRACE
+    int stdname = 0;
+#endif
     const char **pp;
     const char *p;
     int badops = 0;
@@ -119,6 +122,10 @@ int MAIN(int argc, char **argv)
             verbose = 1;
         else if (strcmp(*argv, "-V") == 0)
             verbose = Verbose = 1;
+#ifndef OPENSSL_NO_SSL_TRACE
+        else if (strcmp(*argv, "-stdname") == 0)
+            stdname = verbose = 1;
+#endif
 #ifndef OPENSSL_NO_SSL2
         else if (strcmp(*argv, "-ssl2") == 0)
             meth = SSLv2_client_method();
@@ -202,7 +209,14 @@ int MAIN(int argc, char **argv)
                                id1, id2, id3);
                 }
             }
-
+#ifndef OPENSSL_NO_SSL_TRACE
+            if (stdname) {
+                const char *nm = SSL_CIPHER_standard_name(c);
+                if (nm == NULL)
+                    nm = "UNKNOWN";
+                BIO_printf(STDout, "%s - ", nm);
+            }
+#endif
             BIO_puts(STDout, SSL_CIPHER_description(c, buf, sizeof buf));
         }
     }
index d7645c0..6047937 100644 (file)
@@ -75,6 +75,8 @@ static void receipt_request_print(BIO *out, CMS_ContentInfo *cms);
 static CMS_ReceiptRequest *make_receipt_request(STACK_OF(OPENSSL_STRING)
                                                 *rr_to, int rr_allorfirst, STACK_OF(OPENSSL_STRING)
                                                 *rr_from);
+static int cms_set_pkey_param(EVP_PKEY_CTX *pctx,
+                              STACK_OF(OPENSSL_STRING) *param);
 
 # define SMIME_OP        0x10
 # define SMIME_IP        0x20
@@ -98,6 +100,14 @@ static CMS_ReceiptRequest *make_receipt_request(STACK_OF(OPENSSL_STRING)
 
 int verify_err = 0;
 
+typedef struct cms_key_param_st cms_key_param;
+
+struct cms_key_param_st {
+    int idx;
+    STACK_OF(OPENSSL_STRING) *param;
+    cms_key_param *next;
+};
+
 int MAIN(int, char **);
 
 int MAIN(int argc, char **argv)
@@ -112,7 +122,7 @@ int MAIN(int argc, char **argv)
     STACK_OF(OPENSSL_STRING) *sksigners = NULL, *skkeys = NULL;
     char *certfile = NULL, *keyfile = NULL, *contfile = NULL;
     char *certsoutfile = NULL;
-    const EVP_CIPHER *cipher = NULL;
+    const EVP_CIPHER *cipher = NULL, *wrap_cipher = NULL;
     CMS_ContentInfo *cms = NULL, *rcms = NULL;
     X509_STORE *store = NULL;
     X509 *cert = NULL, *recip = NULL, *signer = NULL;
@@ -140,6 +150,8 @@ int MAIN(int argc, char **argv)
     unsigned char *pwri_pass = NULL, *pwri_tmp = NULL;
     size_t secret_keylen = 0, secret_keyidlen = 0;
 
+    cms_key_param *key_first = NULL, *key_param = NULL;
+
     ASN1_OBJECT *econtent_type = NULL;
 
     X509_VERIFY_PARAM *vpm = NULL;
@@ -201,6 +213,8 @@ int MAIN(int argc, char **argv)
             cipher = EVP_des_ede3_cbc();
         else if (!strcmp(*args, "-des"))
             cipher = EVP_des_cbc();
+        else if (!strcmp(*args, "-des3-wrap"))
+            wrap_cipher = EVP_des_ede3_wrap();
 # endif
 # ifndef OPENSSL_NO_SEED
         else if (!strcmp(*args, "-seed"))
@@ -221,6 +235,12 @@ int MAIN(int argc, char **argv)
             cipher = EVP_aes_192_cbc();
         else if (!strcmp(*args, "-aes256"))
             cipher = EVP_aes_256_cbc();
+        else if (!strcmp(*args, "-aes128-wrap"))
+            wrap_cipher = EVP_aes_128_wrap();
+        else if (!strcmp(*args, "-aes192-wrap"))
+            wrap_cipher = EVP_aes_192_wrap();
+        else if (!strcmp(*args, "-aes256-wrap"))
+            wrap_cipher = EVP_aes_256_wrap();
 # endif
 # ifndef OPENSSL_NO_CAMELLIA
         else if (!strcmp(*args, "-camellia128"))
@@ -378,7 +398,17 @@ int MAIN(int argc, char **argv)
         } else if (!strcmp(*args, "-recip")) {
             if (!args[1])
                 goto argerr;
-            recipfile = *++args;
+            if (operation == SMIME_ENCRYPT) {
+                if (!encerts)
+                    encerts = sk_X509_new_null();
+                cert = load_cert(bio_err, *++args, FORMAT_PEM,
+                                 NULL, e, "recipient certificate file");
+                if (!cert)
+                    goto end;
+                sk_X509_push(encerts, cert);
+                cert = NULL;
+            } else
+                recipfile = *++args;
         } else if (!strcmp(*args, "-certsout")) {
             if (!args[1])
                 goto argerr;
@@ -413,6 +443,40 @@ int MAIN(int argc, char **argv)
             if (!args[1])
                 goto argerr;
             keyform = str2fmt(*++args);
+        } else if (!strcmp(*args, "-keyopt")) {
+            int keyidx = -1;
+            if (!args[1])
+                goto argerr;
+            if (operation == SMIME_ENCRYPT) {
+                if (encerts)
+                    keyidx += sk_X509_num(encerts);
+            } else {
+                if (keyfile || signerfile)
+                    keyidx++;
+                if (skkeys)
+                    keyidx += sk_OPENSSL_STRING_num(skkeys);
+            }
+            if (keyidx < 0) {
+                BIO_printf(bio_err, "No key specified\n");
+                goto argerr;
+            }
+            if (key_param == NULL || key_param->idx != keyidx) {
+                cms_key_param *nparam;
+                nparam = OPENSSL_malloc(sizeof(cms_key_param));
+                if (!nparam) {
+                    BIO_printf(bio_err, "Out of memory\n");
+                    goto argerr;
+                }
+                nparam->idx = keyidx;
+                nparam->param = sk_OPENSSL_STRING_new_null();
+                nparam->next = NULL;
+                if (key_first == NULL)
+                    key_first = nparam;
+                else
+                    key_param->next = nparam;
+                key_param = nparam;
+            }
+            sk_OPENSSL_STRING_push(key_param->param, *++args);
         } else if (!strcmp(*args, "-rctform")) {
             if (!args[1])
                 goto argerr;
@@ -502,7 +566,7 @@ int MAIN(int argc, char **argv)
             badarg = 1;
         }
     } else if (operation == SMIME_ENCRYPT) {
-        if (!*args && !secret_key && !pwri_pass) {
+        if (!*args && !secret_key && !pwri_pass && !encerts) {
             BIO_printf(bio_err, "No recipient(s) certificate(s) specified\n");
             badarg = 1;
         }
@@ -567,6 +631,7 @@ int MAIN(int argc, char **argv)
                    "-inkey file    input private key (if not signer or recipient)\n");
         BIO_printf(bio_err,
                    "-keyform arg   input private key format (PEM or ENGINE)\n");
+        BIO_printf(bio_err, "-keyopt nm:v   set public key parameters\n");
         BIO_printf(bio_err, "-out file      output file\n");
         BIO_printf(bio_err,
                    "-outform arg   output format SMIME (default), PEM or DER\n");
@@ -652,7 +717,7 @@ int MAIN(int argc, char **argv)
             goto end;
         }
 
-        if (*args)
+        if (*args && !encerts)
             encerts = sk_X509_new_null();
         while (*args) {
             if (!(cert = load_cert(bio_err, *args, FORMAT_PEM,
@@ -804,10 +869,39 @@ int MAIN(int argc, char **argv)
     } else if (operation == SMIME_COMPRESS) {
         cms = CMS_compress(in, -1, flags);
     } else if (operation == SMIME_ENCRYPT) {
+        int i;
         flags |= CMS_PARTIAL;
-        cms = CMS_encrypt(encerts, in, cipher, flags);
+        cms = CMS_encrypt(NULL, in, cipher, flags);
         if (!cms)
             goto end;
+        for (i = 0; i < sk_X509_num(encerts); i++) {
+            CMS_RecipientInfo *ri;
+            cms_key_param *kparam;
+            int tflags = flags;
+            X509 *x = sk_X509_value(encerts, i);
+            for (kparam = key_first; kparam; kparam = kparam->next) {
+                if (kparam->idx == i) {
+                    tflags |= CMS_KEY_PARAM;
+                    break;
+                }
+            }
+            ri = CMS_add1_recipient_cert(cms, x, tflags);
+            if (!ri)
+                goto end;
+            if (kparam) {
+                EVP_PKEY_CTX *pctx;
+                pctx = CMS_RecipientInfo_get0_pkey_ctx(ri);
+                if (!cms_set_pkey_param(pctx, kparam->param))
+                    goto end;
+            }
+            if (CMS_RecipientInfo_type(ri) == CMS_RECIPINFO_AGREE
+                && wrap_cipher) {
+                EVP_CIPHER_CTX *wctx;
+                wctx = CMS_RecipientInfo_kari_get0_ctx(ri);
+                EVP_EncryptInit_ex(wctx, wrap_cipher, NULL, NULL, NULL);
+            }
+        }
+
         if (secret_key) {
             if (!CMS_add0_recipient_key(cms, NID_undef,
                                         secret_key, secret_keylen,
@@ -880,8 +974,11 @@ int MAIN(int argc, char **argv)
             flags |= CMS_REUSE_DIGEST;
         for (i = 0; i < sk_OPENSSL_STRING_num(sksigners); i++) {
             CMS_SignerInfo *si;
+            cms_key_param *kparam;
+            int tflags = flags;
             signerfile = sk_OPENSSL_STRING_value(sksigners, i);
             keyfile = sk_OPENSSL_STRING_value(skkeys, i);
+
             signer = load_cert(bio_err, signerfile, FORMAT_PEM, NULL,
                                e, "signer certificate");
             if (!signer)
@@ -890,9 +987,21 @@ int MAIN(int argc, char **argv)
                            "signing key file");
             if (!key)
                 goto end;
-            si = CMS_add1_signer(cms, signer, key, sign_md, flags);
+            for (kparam = key_first; kparam; kparam = kparam->next) {
+                if (kparam->idx == i) {
+                    tflags |= CMS_KEY_PARAM;
+                    break;
+                }
+            }
+            si = CMS_add1_signer(cms, signer, key, sign_md, tflags);
             if (!si)
                 goto end;
+            if (kparam) {
+                EVP_PKEY_CTX *pctx;
+                pctx = CMS_SignerInfo_get0_pkey_ctx(si);
+                if (!cms_set_pkey_param(pctx, kparam->param))
+                    goto end;
+            }
             if (rr && !CMS_add1_ReceiptRequest(si, rr))
                 goto end;
             X509_free(signer);
@@ -1047,6 +1156,13 @@ int MAIN(int argc, char **argv)
         sk_OPENSSL_STRING_free(rr_to);
     if (rr_from)
         sk_OPENSSL_STRING_free(rr_from);
+    for (key_param = key_first; key_param;) {
+        cms_key_param *tparam;
+        sk_OPENSSL_STRING_free(key_param->param);
+        tparam = key_param->next;
+        OPENSSL_free(key_param);
+        key_param = tparam;
+    }
     X509_STORE_free(store);
     X509_free(cert);
     X509_free(recip);
@@ -1220,4 +1336,22 @@ static CMS_ReceiptRequest *make_receipt_request(STACK_OF(OPENSSL_STRING)
     return NULL;
 }
 
+static int cms_set_pkey_param(EVP_PKEY_CTX *pctx,
+                              STACK_OF(OPENSSL_STRING) *param)
+{
+    char *keyopt;
+    int i;
+    if (sk_OPENSSL_STRING_num(param) <= 0)
+        return 1;
+    for (i = 0; i < sk_OPENSSL_STRING_num(param); i++) {
+        keyopt = sk_OPENSSL_STRING_value(param, i);
+        if (pkey_ctrl_string(pctx, keyopt) <= 0) {
+            BIO_printf(bio_err, "parameter error \"%s\"\n", keyopt);
+            ERR_print_errors(bio_err);
+            return 0;
+        }
+    }
+    return 1;
+}
+
 #endif
index 0a05870..c9c3a5f 100644 (file)
@@ -96,7 +96,6 @@ static const char *crl_usage[] = {
     NULL
 };
 
-static X509_CRL *load_crl(char *file, int format);
 static BIO *bio_out = NULL;
 
 int MAIN(int, char **);
@@ -106,10 +105,10 @@ int MAIN(int argc, char **argv)
     unsigned long nmflag = 0;
     X509_CRL *x = NULL;
     char *CAfile = NULL, *CApath = NULL;
-    int ret = 1, i, num, badops = 0;
+    int ret = 1, i, num, badops = 0, badsig = 0;
     BIO *out = NULL;
-    int informat, outformat;
-    char *infile = NULL, *outfile = NULL;
+    int informat, outformat, keyformat;
+    char *infile = NULL, *outfile = NULL, *crldiff = NULL, *keyfile = NULL;
     int hash = 0, issuer = 0, lastupdate = 0, nextupdate = 0, noout =
         0, text = 0;
 #ifndef OPENSSL_NO_MD5
@@ -147,6 +146,7 @@ int MAIN(int argc, char **argv)
 
     informat = FORMAT_PEM;
     outformat = FORMAT_PEM;
+    keyformat = FORMAT_PEM;
 
     argc--;
     argv++;
@@ -173,6 +173,18 @@ int MAIN(int argc, char **argv)
             if (--argc < 1)
                 goto bad;
             infile = *(++argv);
+        } else if (strcmp(*argv, "-gendelta") == 0) {
+            if (--argc < 1)
+                goto bad;
+            crldiff = *(++argv);
+        } else if (strcmp(*argv, "-key") == 0) {
+            if (--argc < 1)
+                goto bad;
+            keyfile = *(++argv);
+        } else if (strcmp(*argv, "-keyform") == 0) {
+            if (--argc < 1)
+                goto bad;
+            keyformat = str2fmt(*(++argv));
         } else if (strcmp(*argv, "-out") == 0) {
             if (--argc < 1)
                 goto bad;
@@ -214,6 +226,8 @@ int MAIN(int argc, char **argv)
             fingerprint = ++num;
         else if (strcmp(*argv, "-crlnumber") == 0)
             crlnumber = ++num;
+        else if (strcmp(*argv, "-badsig") == 0)
+            badsig = 1;
         else if ((md_alg = EVP_get_digestbyname(*argv + 1))) {
             /* ok */
             digest = md_alg;
@@ -281,6 +295,33 @@ int MAIN(int argc, char **argv)
             BIO_printf(bio_err, "verify OK\n");
     }
 
+    if (crldiff) {
+        X509_CRL *newcrl, *delta;
+        if (!keyfile) {
+            BIO_puts(bio_err, "Missing CRL signing key\n");
+            goto end;
+        }
+        newcrl = load_crl(crldiff, informat);
+        if (!newcrl)
+            goto end;
+        pkey = load_key(bio_err, keyfile, keyformat, 0, NULL, NULL,
+                        "CRL signing key");
+        if (!pkey) {
+            X509_CRL_free(newcrl);
+            goto end;
+        }
+        delta = X509_CRL_diff(x, newcrl, pkey, digest, 0);
+        X509_CRL_free(newcrl);
+        EVP_PKEY_free(pkey);
+        if (delta) {
+            X509_CRL_free(x);
+            x = delta;
+        } else {
+            BIO_puts(bio_err, "Error creating delta CRL\n");
+            goto end;
+        }
+    }
+
     if (num) {
         for (i = 1; i <= num; i++) {
             if (issuer == i) {
@@ -369,6 +410,9 @@ int MAIN(int argc, char **argv)
         goto end;
     }
 
+    if (badsig)
+        x->signature->data[x->signature->length - 1] ^= 0x1;
+
     if (outformat == FORMAT_ASN1)
         i = (int)i2d_X509_CRL_bio(out, x);
     else if (outformat == FORMAT_PEM)
@@ -383,6 +427,8 @@ int MAIN(int argc, char **argv)
     }
     ret = 0;
  end:
+    if (ret != 0)
+        ERR_print_errors(bio_err);
     BIO_free_all(out);
     BIO_free_all(bio_out);
     bio_out = NULL;
@@ -394,41 +440,3 @@ int MAIN(int argc, char **argv)
     apps_shutdown();
     OPENSSL_EXIT(ret);
 }
-
-static X509_CRL *load_crl(char *infile, int format)
-{
-    X509_CRL *x = NULL;
-    BIO *in = NULL;
-
-    in = BIO_new(BIO_s_file());
-    if (in == NULL) {
-        ERR_print_errors(bio_err);
-        goto end;
-    }
-
-    if (infile == NULL)
-        BIO_set_fp(in, stdin, BIO_NOCLOSE);
-    else {
-        if (BIO_read_filename(in, infile) <= 0) {
-            perror(infile);
-            goto end;
-        }
-    }
-    if (format == FORMAT_ASN1)
-        x = d2i_X509_CRL_bio(in, NULL);
-    else if (format == FORMAT_PEM)
-        x = PEM_read_bio_X509_CRL(in, NULL, NULL, NULL);
-    else {
-        BIO_printf(bio_err, "bad input format specified for input crl\n");
-        goto end;
-    }
-    if (x == NULL) {
-        BIO_printf(bio_err, "unable to load CRL\n");
-        ERR_print_errors(bio_err);
-        goto end;
-    }
-
- end:
-    BIO_free(in);
-    return (x);
-}
index ad2f234..95e5fa3 100644 (file)
@@ -103,7 +103,7 @@ int MAIN(int, char **);
 
 int MAIN(int argc, char **argv)
 {
-    ENGINE *e = NULL;
+    ENGINE *e = NULL, *impl = NULL;
     unsigned char *buf = NULL;
     int i, err = 1;
     const EVP_MD *md = NULL, *m;
@@ -124,6 +124,7 @@ int MAIN(int argc, char **argv)
     char *passargin = NULL, *passin = NULL;
 #ifndef OPENSSL_NO_ENGINE
     char *engine = NULL;
+    int engine_impl = 0;
 #endif
     char *hmac_key = NULL;
     char *mac_name = NULL;
@@ -199,7 +200,8 @@ int MAIN(int argc, char **argv)
                 break;
             engine = *(++argv);
             e = setup_engine(bio_err, engine, 0);
-        }
+        } else if (strcmp(*argv, "-engine_impl") == 0)
+            engine_impl = 1;
 #endif
         else if (strcmp(*argv, "-hex") == 0)
             out_bin = 0;
@@ -284,6 +286,10 @@ int MAIN(int argc, char **argv)
         EVP_MD_do_all_sorted(list_md_fn, bio_err);
         goto end;
     }
+#ifndef OPENSSL_NO_ENGINE
+    if (engine_impl)
+        impl = e;
+#endif
 
     in = BIO_new(BIO_s_file());
     bmd = BIO_new(BIO_f_md());
@@ -357,7 +363,7 @@ int MAIN(int argc, char **argv)
     if (mac_name) {
         EVP_PKEY_CTX *mac_ctx = NULL;
         int r = 0;
-        if (!init_gen_str(bio_err, &mac_ctx, mac_name, e, 0))
+        if (!init_gen_str(bio_err, &mac_ctx, mac_name, impl, 0))
             goto mac_end;
         if (macopts) {
             char *macopt;
@@ -391,7 +397,7 @@ int MAIN(int argc, char **argv)
     }
 
     if (hmac_key) {
-        sigkey = EVP_PKEY_new_mac_key(EVP_PKEY_HMAC, e,
+        sigkey = EVP_PKEY_new_mac_key(EVP_PKEY_HMAC, impl,
                                       (unsigned char *)hmac_key, -1);
         if (!sigkey)
             goto end;
@@ -407,9 +413,9 @@ int MAIN(int argc, char **argv)
             goto end;
         }
         if (do_verify)
-            r = EVP_DigestVerifyInit(mctx, &pctx, md, NULL, sigkey);
+            r = EVP_DigestVerifyInit(mctx, &pctx, md, impl, sigkey);
         else
-            r = EVP_DigestSignInit(mctx, &pctx, md, NULL, sigkey);
+            r = EVP_DigestSignInit(mctx, &pctx, md, impl, sigkey);
         if (!r) {
             BIO_printf(bio_err, "Error setting context\n");
             ERR_print_errors(bio_err);
@@ -429,9 +435,15 @@ int MAIN(int argc, char **argv)
     }
     /* we use md as a filter, reading from 'in' */
     else {
+        EVP_MD_CTX *mctx = NULL;
+        if (!BIO_get_md_ctx(bmd, &mctx)) {
+            BIO_printf(bio_err, "Error getting context\n");
+            ERR_print_errors(bio_err);
+            goto end;
+        }
         if (md == NULL)
             md = EVP_md5();
-        if (!BIO_set_md(bmd, md)) {
+        if (!EVP_DigestInit_ex(mctx, md, impl)) {
             BIO_printf(bio_err, "Error setting digest %s\n", pname);
             ERR_print_errors(bio_err);
             goto end;
@@ -483,7 +495,8 @@ int MAIN(int argc, char **argv)
                     EVP_PKEY_asn1_get0_info(NULL, NULL,
                                             NULL, NULL, &sig_name, ameth);
             }
-            md_name = EVP_MD_name(md);
+            if (md)
+                md_name = EVP_MD_name(md);
         }
         err = 0;
         for (i = 0; i < argc; i++) {
@@ -581,9 +594,12 @@ int do_fp(BIO *out, unsigned char *buf, BIO *bp, int sep, int binout,
             BIO_printf(out, "%02x", buf[i]);
         BIO_printf(out, " *%s\n", file);
     } else {
-        if (sig_name)
-            BIO_printf(out, "%s-%s(%s)= ", sig_name, md_name, file);
-        else if (md_name)
+        if (sig_name) {
+            BIO_puts(out, sig_name);
+            if (md_name)
+                BIO_printf(out, "-%s", md_name);
+            BIO_printf(out, "(%s)= ", file);
+        } else if (md_name)
             BIO_printf(out, "%s(%s)= ", md_name, file);
         else
             BIO_printf(out, "(%s)= ", file);
index d3b6d58..57199a8 100644 (file)
@@ -489,9 +489,12 @@ int MAIN(int argc, char **argv)
     if (!noout) {
         if (outformat == FORMAT_ASN1)
             i = i2d_DHparams_bio(out, dh);
-        else if (outformat == FORMAT_PEM)
-            i = PEM_write_bio_DHparams(out, dh);
-        else {
+        else if (outformat == FORMAT_PEM) {
+            if (dh->q)
+                i = PEM_write_bio_DHxparams(out, dh);
+            else
+                i = PEM_write_bio_DHparams(out, dh);
+        } else {
             BIO_printf(bio_err, "bad output format specified for outfile\n");
             goto end;
         }
index 1f340a9..06ac77b 100644 (file)
@@ -370,6 +370,9 @@ int MAIN(int argc, char **argv)
         } else
             nid = OBJ_sn2nid(curve_name);
 
+        if (nid == 0)
+            nid = EC_curve_nist2nid(curve_name);
+
         if (nid == 0) {
             BIO_printf(bio_err, "unknown curve name (%s)\n", curve_name);
             goto end;
index 2eabadc..91e6550 100644 (file)
@@ -80,7 +80,7 @@
 # include <openssl/pem.h>
 # include <openssl/rand.h>
 
-# define DEFBITS 1024
+# define DEFBITS 2048
 # undef PROG
 # define PROG genrsa_main
 
index 71a3336..47457af 100644 (file)
@@ -776,7 +776,7 @@ $ IF F$TYPE(USER_CCFLAGS) .NES. "" THEN CCEXTRAFLAGS = USER_CCFLAGS
 $ CCDISABLEWARNINGS = "" !!! "MAYLOSEDATA3" !!! "LONGLONGTYPE,LONGLONGSUFX,FOUNDCR"
 $ IF F$TYPE(USER_CCDISABLEWARNINGS) .NES. ""
 $ THEN
-$     IF CCDISABLEWARNINGS .NES. "" THEN CCDISABLEWARNINGS = CCDISABLEWARNINGS + ","
+$     IF CCDISABLEWARNINGS .NES. THEN CCDISABLEWARNINGS = CCDISABLEWARNINGS + ","
 $     CCDISABLEWARNINGS = CCDISABLEWARNINGS + USER_CCDISABLEWARNINGS
 $ ENDIF
 $!
index 572f064..926083d 100644 (file)
@@ -110,16 +110,17 @@ static int print_ocsp_summary(BIO *out, OCSP_BASICRESP *bs, OCSP_REQUEST *req,
 
 static int make_ocsp_response(OCSP_RESPONSE **resp, OCSP_REQUEST *req,
                               CA_DB *db, X509 *ca, X509 *rcert,
-                              EVP_PKEY *rkey, STACK_OF(X509) *rother,
-                              unsigned long flags, int nmin, int ndays);
+                              EVP_PKEY *rkey, const EVP_MD *md,
+                              STACK_OF(X509) *rother, unsigned long flags,
+                              int nmin, int ndays, int badsig);
 
 static char **lookup_serial(CA_DB *db, ASN1_INTEGER *ser);
-static BIO *init_responder(char *port);
+static BIO *init_responder(const char *port);
 static int do_responder(OCSP_REQUEST **preq, BIO **pcbio, BIO *acbio,
-                        char *port);
+                        const char *port);
 static int send_ocsp_response(BIO *cbio, OCSP_RESPONSE *resp);
-static OCSP_RESPONSE *query_responder(BIO *err, BIO *cbio, char *path,
-                                      STACK_OF(CONF_VALUE) *headers,
+static OCSP_RESPONSE *query_responder(BIO *err, BIO *cbio, const char *path,
+                                      const STACK_OF(CONF_VALUE) *headers,
                                       OCSP_REQUEST *req, int req_timeout);
 
 # undef PROG
@@ -154,12 +155,14 @@ int MAIN(int argc, char **argv)
     long nsec = MAX_VALIDITY_PERIOD, maxage = -1;
     char *CAfile = NULL, *CApath = NULL;
     X509_STORE *store = NULL;
+    X509_VERIFY_PARAM *vpm = NULL;
     STACK_OF(X509) *sign_other = NULL, *verify_other = NULL, *rother = NULL;
     char *sign_certfile = NULL, *verify_certfile = NULL, *rcertfile = NULL;
     unsigned long sign_flags = 0, verify_flags = 0, rflags = 0;
     int ret = 1;
     int accept_count = -1;
     int badarg = 0;
+    int badsig = 0;
     int i;
     int ignore_err = 0;
     STACK_OF(OPENSSL_STRING) *reqnames = NULL;
@@ -170,7 +173,7 @@ int MAIN(int argc, char **argv)
     char *rca_filename = NULL;
     CA_DB *rdb = NULL;
     int nmin = 0, ndays = -1;
-    const EVP_MD *cert_id_md = NULL;
+    const EVP_MD *cert_id_md = NULL, *rsign_md = NULL;
 
     if (bio_err == NULL)
         bio_err = BIO_new_fp(stderr, BIO_NOCLOSE);
@@ -206,6 +209,7 @@ int MAIN(int argc, char **argv)
                 OPENSSL_free(tport);
             if (tpath)
                 OPENSSL_free(tpath);
+            thost = tport = tpath = NULL;
             if (args[1]) {
                 args++;
                 if (!OCSP_parse_url(*args, &host, &port, &path, &use_ssl)) {
@@ -264,6 +268,8 @@ int MAIN(int argc, char **argv)
             verify_flags |= OCSP_TRUSTOTHER;
         else if (!strcmp(*args, "-no_intern"))
             verify_flags |= OCSP_NOINTERN;
+        else if (!strcmp(*args, "-badsig"))
+            badsig = 1;
         else if (!strcmp(*args, "-text")) {
             req_text = 1;
             resp_text = 1;
@@ -320,6 +326,10 @@ int MAIN(int argc, char **argv)
                 CApath = *args;
             } else
                 badarg = 1;
+        } else if (args_verify(&args, NULL, &badarg, bio_err, &vpm)) {
+            if (badarg)
+                goto end;
+            continue;
         } else if (!strcmp(*args, "-validity_period")) {
             if (args[1]) {
                 args++;
@@ -465,6 +475,14 @@ int MAIN(int argc, char **argv)
                 rcertfile = *args;
             } else
                 badarg = 1;
+        } else if (!strcmp(*args, "-rmd")) {
+            if (args[1]) {
+                args++;
+                rsign_md = EVP_get_digestbyname(*args);
+                if (!rsign_md)
+                    badarg = 1;
+            } else
+                badarg = 1;
         } else if ((cert_id_md = EVP_get_digestbyname((*args) + 1)) == NULL) {
             badarg = 1;
         }
@@ -584,7 +602,10 @@ int MAIN(int argc, char **argv)
         add_nonce = 0;
 
     if (!req && reqin) {
-        derbio = BIO_new_file(reqin, "rb");
+        if (!strcmp(reqin, "-"))
+            derbio = BIO_new_fp(stdin, BIO_NOCLOSE);
+        else
+            derbio = BIO_new_file(reqin, "rb");
         if (!derbio) {
             BIO_printf(bio_err, "Error Opening OCSP request file\n");
             goto end;
@@ -681,7 +702,10 @@ int MAIN(int argc, char **argv)
         OCSP_REQUEST_print(out, req, 0);
 
     if (reqout) {
-        derbio = BIO_new_file(reqout, "wb");
+        if (!strcmp(reqout, "-"))
+            derbio = BIO_new_fp(stdout, BIO_NOCLOSE);
+        else
+            derbio = BIO_new_file(reqout, "wb");
         if (!derbio) {
             BIO_printf(bio_err, "Error opening file %s\n", reqout);
             goto end;
@@ -706,7 +730,7 @@ int MAIN(int argc, char **argv)
 
     if (rdb) {
         i = make_ocsp_response(&resp, req, rdb, rca_cert, rsigner, rkey,
-                               rother, rflags, nmin, ndays);
+                               rsign_md, rother, rflags, nmin, ndays, badsig);
         if (cbio)
             send_ocsp_response(cbio, resp);
     } else if (host) {
@@ -721,7 +745,10 @@ int MAIN(int argc, char **argv)
         goto end;
 # endif
     } else if (respin) {
-        derbio = BIO_new_file(respin, "rb");
+        if (!strcmp(respin, "-"))
+            derbio = BIO_new_fp(stdin, BIO_NOCLOSE);
+        else
+            derbio = BIO_new_file(respin, "rb");
         if (!derbio) {
             BIO_printf(bio_err, "Error Opening OCSP response file\n");
             goto end;
@@ -741,7 +768,10 @@ int MAIN(int argc, char **argv)
  done_resp:
 
     if (respout) {
-        derbio = BIO_new_file(respout, "wb");
+        if (!strcmp(respout, "-"))
+            derbio = BIO_new_fp(stdout, BIO_NOCLOSE);
+        else
+            derbio = BIO_new_file(respout, "wb");
         if (!derbio) {
             BIO_printf(bio_err, "Error opening file %s\n", respout);
             goto end;
@@ -778,6 +808,10 @@ int MAIN(int argc, char **argv)
             resp = NULL;
             goto redo_accept;
         }
+        ret = 0;
+        goto end;
+    } else if (ridx_filename) {
+        ret = 0;
         goto end;
     }
 
@@ -785,6 +819,8 @@ int MAIN(int argc, char **argv)
         store = setup_verify(bio_err, CAfile, CApath);
     if (!store)
         goto end;
+    if (vpm)
+        X509_STORE_set1_param(store, vpm);
     if (verify_certfile) {
         verify_other = load_certs(bio_err, verify_certfile, FORMAT_PEM,
                                   NULL, e, "validator certificate");
@@ -799,37 +835,38 @@ int MAIN(int argc, char **argv)
         goto end;
     }
 
+    ret = 0;
+
     if (!noverify) {
         if (req && ((i = OCSP_check_nonce(req, bs)) <= 0)) {
             if (i == -1)
                 BIO_printf(bio_err, "WARNING: no nonce in response\n");
             else {
                 BIO_printf(bio_err, "Nonce Verify error\n");
+                ret = 1;
                 goto end;
             }
         }
 
         i = OCSP_basic_verify(bs, verify_other, store, verify_flags);
-        if (i < 0)
-            i = OCSP_basic_verify(bs, NULL, store, 0);
-
         if (i <= 0) {
             BIO_printf(bio_err, "Response Verify Failure\n");
             ERR_print_errors(bio_err);
+            ret = 1;
         } else
             BIO_printf(bio_err, "Response verify OK\n");
 
     }
 
     if (!print_ocsp_summary(out, bs, req, reqnames, ids, nsec, maxage))
-        goto end;
-
-    ret = 0;
+        ret = 1;
 
  end:
     ERR_print_errors(bio_err);
     X509_free(signer);
     X509_STORE_free(store);
+    if (vpm)
+        X509_VERIFY_PARAM_free(vpm);
     EVP_PKEY_free(key);
     EVP_PKEY_free(rkey);
     X509_free(issuer);
@@ -984,8 +1021,9 @@ static int print_ocsp_summary(BIO *out, OCSP_BASICRESP *bs, OCSP_REQUEST *req,
 
 static int make_ocsp_response(OCSP_RESPONSE **resp, OCSP_REQUEST *req,
                               CA_DB *db, X509 *ca, X509 *rcert,
-                              EVP_PKEY *rkey, STACK_OF(X509) *rother,
-                              unsigned long flags, int nmin, int ndays)
+                              EVP_PKEY *rkey, const EVP_MD *rmd,
+                              STACK_OF(X509) *rother, unsigned long flags,
+                              int nmin, int ndays, int badsig)
 {
     ASN1_TIME *thisupd = NULL, *nextupd = NULL;
     OCSP_CERTID *cid, *ca_id = NULL;
@@ -1069,7 +1107,10 @@ static int make_ocsp_response(OCSP_RESPONSE **resp, OCSP_REQUEST *req,
 
     OCSP_copy_nonce(bs, req);
 
-    OCSP_basic_sign(bs, rcert, rkey, NULL, rother, flags);
+    OCSP_basic_sign(bs, rcert, rkey, rmd, rother, flags);
+
+    if (badsig)
+        bs->signature->data[bs->signature->length - 1] ^= 0x1;
 
     *resp = OCSP_response_create(OCSP_RESPONSE_STATUS_SUCCESSFUL, bs);
 
@@ -1105,7 +1146,7 @@ static char **lookup_serial(CA_DB *db, ASN1_INTEGER *ser)
 
 /* Quick and dirty OCSP server: read in and parse input request */
 
-static BIO *init_responder(char *port)
+static BIO *init_responder(const char *port)
 {
     BIO *acbio = NULL, *bufbio = NULL;
     bufbio = BIO_new(BIO_f_buffer());
@@ -1137,7 +1178,7 @@ static BIO *init_responder(char *port)
 }
 
 static int do_responder(OCSP_REQUEST **preq, BIO **pcbio, BIO *acbio,
-                        char *port)
+                        const char *port)
 {
     int have_post = 0, len;
     OCSP_REQUEST *req = NULL;
@@ -1198,8 +1239,8 @@ static int send_ocsp_response(BIO *cbio, OCSP_RESPONSE *resp)
     return 1;
 }
 
-static OCSP_RESPONSE *query_responder(BIO *err, BIO *cbio, char *path,
-                                      STACK_OF(CONF_VALUE) *headers,
+static OCSP_RESPONSE *query_responder(BIO *err, BIO *cbio, const char *path,
+                                      const STACK_OF(CONF_VALUE) *headers,
                                       OCSP_REQUEST *req, int req_timeout)
 {
     int fd;
@@ -1286,8 +1327,9 @@ static OCSP_RESPONSE *query_responder(BIO *err, BIO *cbio, char *path,
 }
 
 OCSP_RESPONSE *process_responder(BIO *err, OCSP_REQUEST *req,
-                                 char *host, char *path, char *port,
-                                 int use_ssl, STACK_OF(CONF_VALUE) *headers,
+                                 const char *host, const char *path,
+                                 const char *port, int use_ssl,
+                                 const STACK_OF(CONF_VALUE) *headers,
                                  int req_timeout)
 {
     BIO *cbio = NULL;
index 45e46a0..94baac1 100644 (file)
@@ -103,7 +103,7 @@ emailAddress                = optional
 
 ####################################################################
 [ req ]
-default_bits           = 1024
+default_bits           = 2048
 default_keyfile        = privkey.pem
 distinguished_name     = req_distinguished_name
 attributes             = req_attributes
index 18760c6..1eb86c4 100644 (file)
@@ -103,7 +103,7 @@ emailAddress                = optional
 
 ####################################################################
 [ req ]
-default_bits           = 1024
+default_bits           = 2048
 default_keyfile        = privkey.pem
 distinguished_name     = req_distinguished_name
 attributes             = req_attributes
index 5c7290e..5099e18 100644 (file)
@@ -124,6 +124,16 @@ int MAIN(int argc, char **argv)
                 }
             } else
                 badarg = 1;
+        } else if (!strcmp(*args, "-v2prf")) {
+            if (args[1]) {
+                args++;
+                pbe_nid = OBJ_txt2nid(*args);
+                if (!EVP_PBE_find(EVP_PBE_TYPE_PRF, pbe_nid, NULL, NULL, 0)) {
+                    BIO_printf(bio_err, "Unknown PRF algorithm %s\n", *args);
+                    badarg = 1;
+                }
+            } else
+                badarg = 1;
         } else if (!strcmp(*args, "-inform")) {
             if (args[1]) {
                 args++;
index 2769b89..5b54bfd 100644 (file)
@@ -152,15 +152,21 @@ typedef fd_mask fd_set;
 #define PROTOCOL        "tcp"
 
 int do_server(int port, int type, int *ret,
-              int (*cb) (char *hostname, int s, unsigned char *context),
-              unsigned char *context);
+              int (*cb) (char *hostname, int s, int stype,
+                         unsigned char *context), unsigned char *context,
+              int naccept);
 #ifdef HEADER_X509_H
 int MS_CALLBACK verify_callback(int ok, X509_STORE_CTX *ctx);
 #endif
 #ifdef HEADER_SSL_H
 int set_cert_stuff(SSL_CTX *ctx, char *cert_file, char *key_file);
-int set_cert_key_stuff(SSL_CTX *ctx, X509 *cert, EVP_PKEY *key);
+int set_cert_key_stuff(SSL_CTX *ctx, X509 *cert, EVP_PKEY *key,
+                       STACK_OF(X509) *chain, int build_chain);
+int ssl_print_sigalgs(BIO *out, SSL *s);
+int ssl_print_point_formats(BIO *out, SSL *s);
+int ssl_print_curves(BIO *out, SSL *s, int noshared);
 #endif
+int ssl_print_tmp_key(BIO *out, SSL *s);
 int init_client(int *sock, char *server, int port, int type);
 int should_retry(int i);
 int extract_port(char *str, short *port_ptr);
@@ -182,3 +188,24 @@ int MS_CALLBACK generate_cookie_callback(SSL *ssl, unsigned char *cookie,
                                          unsigned int *cookie_len);
 int MS_CALLBACK verify_cookie_callback(SSL *ssl, unsigned char *cookie,
                                        unsigned int cookie_len);
+
+typedef struct ssl_excert_st SSL_EXCERT;
+
+void ssl_ctx_set_excert(SSL_CTX *ctx, SSL_EXCERT *exc);
+void ssl_excert_free(SSL_EXCERT *exc);
+int args_excert(char ***pargs, int *pargc,
+                int *badarg, BIO *err, SSL_EXCERT **pexc);
+int load_excert(SSL_EXCERT **pexc, BIO *err);
+void print_ssl_summary(BIO *bio, SSL *s);
+#ifdef HEADER_SSL_H
+int args_ssl(char ***pargs, int *pargc, SSL_CONF_CTX *cctx,
+             int *badarg, BIO *err, STACK_OF(OPENSSL_STRING) **pstr);
+int args_ssl_call(SSL_CTX *ctx, BIO *err, SSL_CONF_CTX *cctx,
+                  STACK_OF(OPENSSL_STRING) *str, int no_ecdhe, int no_jpake);
+int ssl_ctx_add_crls(SSL_CTX *ctx, STACK_OF(X509_CRL) *crls,
+                     int crl_download);
+int ssl_load_stores(SSL_CTX *ctx, const char *vfyCApath,
+                    const char *vfyCAfile, const char *chCApath,
+                    const char *chCAfile, STACK_OF(X509_CRL) *crls,
+                    int crl_download);
+#endif
index fabf9cf..dd3aa74 100644 (file)
 
 #include <stdio.h>
 #include <stdlib.h>
-#include <string.h> /* for memcpy() */
+#include <string.h> /* for memcpy() and strcmp() */
 #define USE_SOCKETS
 #define NON_MAIN
 #include "apps.h"
 #define COOKIE_SECRET_LENGTH    16
 
 int verify_depth = 0;
+int verify_quiet = 0;
 int verify_error = X509_V_OK;
 int verify_return_error = 0;
 unsigned char cookie_secret[COOKIE_SECRET_LENGTH];
@@ -140,13 +141,16 @@ int MS_CALLBACK verify_callback(int ok, X509_STORE_CTX *ctx)
     err = X509_STORE_CTX_get_error(ctx);
     depth = X509_STORE_CTX_get_error_depth(ctx);
 
-    BIO_printf(bio_err, "depth=%d ", depth);
-    if (err_cert) {
-        X509_NAME_print_ex(bio_err, X509_get_subject_name(err_cert),
-                           0, XN_FLAG_ONELINE);
-        BIO_puts(bio_err, "\n");
-    } else
-        BIO_puts(bio_err, "<no cert>\n");
+    if (!verify_quiet || !ok) {
+        BIO_printf(bio_err, "depth=%d ", depth);
+        if (err_cert) {
+            X509_NAME_print_ex(bio_err,
+                               X509_get_subject_name(err_cert),
+                               0, XN_FLAG_ONELINE);
+            BIO_puts(bio_err, "\n");
+        } else
+            BIO_puts(bio_err, "<no cert>\n");
+    }
     if (!ok) {
         BIO_printf(bio_err, "verify error:num=%d:%s\n", err,
                    X509_verify_cert_error_string(err));
@@ -179,13 +183,14 @@ int MS_CALLBACK verify_callback(int ok, X509_STORE_CTX *ctx)
         BIO_printf(bio_err, "\n");
         break;
     case X509_V_ERR_NO_EXPLICIT_POLICY:
-        policies_print(bio_err, ctx);
+        if (!verify_quiet)
+            policies_print(bio_err, ctx);
         break;
     }
-    if (err == X509_V_OK && ok == 2)
+    if (err == X509_V_OK && ok == 2 && !verify_quiet)
         policies_print(bio_err, ctx);
-
-    BIO_printf(bio_err, "verify return:%d\n", ok);
+    if (ok && !verify_quiet)
+        BIO_printf(bio_err, "verify return:%d\n", ok);
     return (ok);
 }
 
@@ -246,8 +251,10 @@ int set_cert_stuff(SSL_CTX *ctx, char *cert_file, char *key_file)
     return (1);
 }
 
-int set_cert_key_stuff(SSL_CTX *ctx, X509 *cert, EVP_PKEY *key)
+int set_cert_key_stuff(SSL_CTX *ctx, X509 *cert, EVP_PKEY *key,
+                       STACK_OF(X509) *chain, int build_chain)
 {
+    int chflags = chain ? SSL_BUILD_CHAIN_FLAG_CHECK : 0;
     if (cert == NULL)
         return 1;
     if (SSL_CTX_use_certificate(ctx, cert) <= 0) {
@@ -255,6 +262,7 @@ int set_cert_key_stuff(SSL_CTX *ctx, X509 *cert, EVP_PKEY *key)
         ERR_print_errors(bio_err);
         return 0;
     }
+
     if (SSL_CTX_use_PrivateKey(ctx, key) <= 0) {
         BIO_printf(bio_err, "error setting private key\n");
         ERR_print_errors(bio_err);
@@ -269,6 +277,263 @@ int set_cert_key_stuff(SSL_CTX *ctx, X509 *cert, EVP_PKEY *key)
                    "Private key does not match the certificate public key\n");
         return 0;
     }
+    if (chain && !SSL_CTX_set1_chain(ctx, chain)) {
+        BIO_printf(bio_err, "error setting certificate chain\n");
+        ERR_print_errors(bio_err);
+        return 0;
+    }
+    if (build_chain && !SSL_CTX_build_cert_chain(ctx, chflags)) {
+        BIO_printf(bio_err, "error building certificate chain\n");
+        ERR_print_errors(bio_err);
+        return 0;
+    }
+    return 1;
+}
+
+static void ssl_print_client_cert_types(BIO *bio, SSL *s)
+{
+    const unsigned char *p;
+    int i;
+    int cert_type_num = SSL_get0_certificate_types(s, &p);
+    if (!cert_type_num)
+        return;
+    BIO_puts(bio, "Client Certificate Types: ");
+    for (i = 0; i < cert_type_num; i++) {
+        unsigned char cert_type = p[i];
+        char *cname;
+        switch (cert_type) {
+        case TLS_CT_RSA_SIGN:
+            cname = "RSA sign";
+            break;
+
+        case TLS_CT_DSS_SIGN:
+            cname = "DSA sign";
+            break;
+
+        case TLS_CT_RSA_FIXED_DH:
+            cname = "RSA fixed DH";
+            break;
+
+        case TLS_CT_DSS_FIXED_DH:
+            cname = "DSS fixed DH";
+            break;
+
+        case TLS_CT_ECDSA_SIGN:
+            cname = "ECDSA sign";
+            break;
+
+        case TLS_CT_RSA_FIXED_ECDH:
+            cname = "RSA fixed ECDH";
+            break;
+
+        case TLS_CT_ECDSA_FIXED_ECDH:
+            cname = "ECDSA fixed ECDH";
+            break;
+
+        case TLS_CT_GOST94_SIGN:
+            cname = "GOST94 Sign";
+            break;
+
+        case TLS_CT_GOST01_SIGN:
+            cname = "GOST01 Sign";
+            break;
+
+        default:
+            cname = NULL;
+        }
+
+        if (i)
+            BIO_puts(bio, ", ");
+
+        if (cname)
+            BIO_puts(bio, cname);
+        else
+            BIO_printf(bio, "UNKNOWN (%d),", cert_type);
+    }
+    BIO_puts(bio, "\n");
+}
+
+static int do_print_sigalgs(BIO *out, SSL *s, int shared)
+{
+    int i, nsig, client;
+    client = SSL_is_server(s) ? 0 : 1;
+    if (shared)
+        nsig = SSL_get_shared_sigalgs(s, -1, NULL, NULL, NULL, NULL, NULL);
+    else
+        nsig = SSL_get_sigalgs(s, -1, NULL, NULL, NULL, NULL, NULL);
+    if (nsig == 0)
+        return 1;
+
+    if (shared)
+        BIO_puts(out, "Shared ");
+
+    if (client)
+        BIO_puts(out, "Requested ");
+    BIO_puts(out, "Signature Algorithms: ");
+    for (i = 0; i < nsig; i++) {
+        int hash_nid, sign_nid;
+        unsigned char rhash, rsign;
+        const char *sstr = NULL;
+        if (shared)
+            SSL_get_shared_sigalgs(s, i, &sign_nid, &hash_nid, NULL,
+                                   &rsign, &rhash);
+        else
+            SSL_get_sigalgs(s, i, &sign_nid, &hash_nid, NULL, &rsign, &rhash);
+        if (i)
+            BIO_puts(out, ":");
+        if (sign_nid == EVP_PKEY_RSA)
+            sstr = "RSA";
+        else if (sign_nid == EVP_PKEY_DSA)
+            sstr = "DSA";
+        else if (sign_nid == EVP_PKEY_EC)
+            sstr = "ECDSA";
+        if (sstr)
+            BIO_printf(out, "%s+", sstr);
+        else
+            BIO_printf(out, "0x%02X+", (int)rsign);
+        if (hash_nid != NID_undef)
+            BIO_printf(out, "%s", OBJ_nid2sn(hash_nid));
+        else
+            BIO_printf(out, "0x%02X", (int)rhash);
+    }
+    BIO_puts(out, "\n");
+    return 1;
+}
+
+int ssl_print_sigalgs(BIO *out, SSL *s)
+{
+    int mdnid;
+    if (!SSL_is_server(s))
+        ssl_print_client_cert_types(out, s);
+    do_print_sigalgs(out, s, 0);
+    do_print_sigalgs(out, s, 1);
+    if (SSL_get_peer_signature_nid(s, &mdnid))
+        BIO_printf(out, "Peer signing digest: %s\n", OBJ_nid2sn(mdnid));
+    return 1;
+}
+
+#ifndef OPENSSL_NO_EC
+int ssl_print_point_formats(BIO *out, SSL *s)
+{
+    int i, nformats;
+    const char *pformats;
+    nformats = SSL_get0_ec_point_formats(s, &pformats);
+    if (nformats <= 0)
+        return 1;
+    BIO_puts(out, "Supported Elliptic Curve Point Formats: ");
+    for (i = 0; i < nformats; i++, pformats++) {
+        if (i)
+            BIO_puts(out, ":");
+        switch (*pformats) {
+        case TLSEXT_ECPOINTFORMAT_uncompressed:
+            BIO_puts(out, "uncompressed");
+            break;
+
+        case TLSEXT_ECPOINTFORMAT_ansiX962_compressed_prime:
+            BIO_puts(out, "ansiX962_compressed_prime");
+            break;
+
+        case TLSEXT_ECPOINTFORMAT_ansiX962_compressed_char2:
+            BIO_puts(out, "ansiX962_compressed_char2");
+            break;
+
+        default:
+            BIO_printf(out, "unknown(%d)", (int)*pformats);
+            break;
+
+        }
+    }
+    if (nformats <= 0)
+        BIO_puts(out, "NONE");
+    BIO_puts(out, "\n");
+    return 1;
+}
+
+int ssl_print_curves(BIO *out, SSL *s, int noshared)
+{
+    int i, ncurves, *curves, nid;
+    const char *cname;
+    ncurves = SSL_get1_curves(s, NULL);
+    if (ncurves <= 0)
+        return 1;
+    curves = OPENSSL_malloc(ncurves * sizeof(int));
+    if (!curves) {
+        BIO_puts(out, "Malloc error getting supported curves\n");
+        return 0;
+    }
+    SSL_get1_curves(s, curves);
+
+
+    BIO_puts(out, "Supported Elliptic Curves: ");
+    for (i = 0; i < ncurves; i++) {
+        if (i)
+            BIO_puts(out, ":");
+        nid = curves[i];
+        /* If unrecognised print out hex version */
+        if (nid & TLSEXT_nid_unknown)
+            BIO_printf(out, "0x%04X", nid & 0xFFFF);
+        else {
+            /* Use NIST name for curve if it exists */
+            cname = EC_curve_nid2nist(nid);
+            if (!cname)
+                cname = OBJ_nid2sn(nid);
+            BIO_printf(out, "%s", cname);
+        }
+    }
+    if (ncurves == 0)
+        BIO_puts(out, "NONE");
+    OPENSSL_free(curves);
+    if (noshared) {
+        BIO_puts(out, "\n");
+        return 1;
+    }
+    BIO_puts(out, "\nShared Elliptic curves: ");
+    ncurves = SSL_get_shared_curve(s, -1);
+    for (i = 0; i < ncurves; i++) {
+        if (i)
+            BIO_puts(out, ":");
+        nid = SSL_get_shared_curve(s, i);
+        cname = EC_curve_nid2nist(nid);
+        if (!cname)
+            cname = OBJ_nid2sn(nid);
+        BIO_printf(out, "%s", cname);
+    }
+    if (ncurves == 0)
+        BIO_puts(out, "NONE");
+    BIO_puts(out, "\n");
+    return 1;
+}
+#endif
+int ssl_print_tmp_key(BIO *out, SSL *s)
+{
+    EVP_PKEY *key;
+    if (!SSL_get_server_tmp_key(s, &key))
+        return 1;
+    BIO_puts(out, "Server Temp Key: ");
+    switch (EVP_PKEY_id(key)) {
+    case EVP_PKEY_RSA:
+        BIO_printf(out, "RSA, %d bits\n", EVP_PKEY_bits(key));
+        break;
+
+    case EVP_PKEY_DH:
+        BIO_printf(out, "DH, %d bits\n", EVP_PKEY_bits(key));
+        break;
+#ifndef OPENSSL_NO_ECDH
+    case EVP_PKEY_EC:
+        {
+            EC_KEY *ec = EVP_PKEY_get1_EC_KEY(key);
+            int nid;
+            const char *cname;
+            nid = EC_GROUP_get_curve_name(EC_KEY_get0_group(ec));
+            EC_KEY_free(ec);
+            cname = EC_curve_nid2nist(nid);
+            if (!cname)
+                cname = OBJ_nid2sn(nid);
+            BIO_printf(out, "ECDH, %s, %d bits\n", cname, EVP_PKEY_bits(key));
+        }
+#endif
+    }
+    EVP_PKEY_free(key);
     return 1;
 }
 
@@ -884,3 +1149,504 @@ int MS_CALLBACK verify_cookie_callback(SSL *ssl, unsigned char *cookie,
 
     return 0;
 }
+
+/*
+ * Example of extended certificate handling. Where the standard support of
+ * one certificate per algorithm is not sufficient an application can decide
+ * which certificate(s) to use at runtime based on whatever criteria it deems
+ * appropriate.
+ */
+
+/* Linked list of certificates, keys and chains */
+struct ssl_excert_st {
+    int certform;
+    const char *certfile;
+    int keyform;
+    const char *keyfile;
+    const char *chainfile;
+    X509 *cert;
+    EVP_PKEY *key;
+    STACK_OF(X509) *chain;
+    int build_chain;
+    struct ssl_excert_st *next, *prev;
+};
+
+struct chain_flags {
+    int flag;
+    const char *name;
+};
+
+struct chain_flags chain_flags_list[] = {
+    {CERT_PKEY_VALID, "Overall Validity"},
+    {CERT_PKEY_SIGN, "Sign with EE key"},
+    {CERT_PKEY_EE_SIGNATURE, "EE signature"},
+    {CERT_PKEY_CA_SIGNATURE, "CA signature"},
+    {CERT_PKEY_EE_PARAM, "EE key parameters"},
+    {CERT_PKEY_CA_PARAM, "CA key parameters"},
+    {CERT_PKEY_EXPLICIT_SIGN, "Explicity sign with EE key"},
+    {CERT_PKEY_ISSUER_NAME, "Issuer Name"},
+    {CERT_PKEY_CERT_TYPE, "Certificate Type"},
+    {0, NULL}
+};
+
+static void print_chain_flags(BIO *out, SSL *s, int flags)
+{
+    struct chain_flags *ctmp = chain_flags_list;
+    while (ctmp->name) {
+        BIO_printf(out, "\t%s: %s\n", ctmp->name,
+                   flags & ctmp->flag ? "OK" : "NOT OK");
+        ctmp++;
+    }
+    BIO_printf(out, "\tSuite B: ");
+    if (SSL_set_cert_flags(s, 0) & SSL_CERT_FLAG_SUITEB_128_LOS)
+        BIO_puts(out, flags & CERT_PKEY_SUITEB ? "OK\n" : "NOT OK\n");
+    else
+        BIO_printf(out, "not tested\n");
+}
+
+/*
+ * Very basic selection callback: just use any certificate chain reported as
+ * valid. More sophisticated could prioritise according to local policy.
+ */
+static int set_cert_cb(SSL *ssl, void *arg)
+{
+    int i, rv;
+    SSL_EXCERT *exc = arg;
+#ifdef CERT_CB_TEST_RETRY
+    static int retry_cnt;
+    if (retry_cnt < 5) {
+        retry_cnt++;
+        fprintf(stderr, "Certificate callback retry test: count %d\n",
+                retry_cnt);
+        return -1;
+    }
+#endif
+    SSL_certs_clear(ssl);
+
+    if (!exc)
+        return 1;
+
+    /*
+     * Go to end of list and traverse backwards since we prepend newer
+     * entries this retains the original order.
+     */
+    while (exc->next)
+        exc = exc->next;
+
+    i = 0;
+
+    while (exc) {
+        i++;
+        rv = SSL_check_chain(ssl, exc->cert, exc->key, exc->chain);
+        BIO_printf(bio_err, "Checking cert chain %d:\nSubject: ", i);
+        X509_NAME_print_ex(bio_err, X509_get_subject_name(exc->cert), 0,
+                           XN_FLAG_ONELINE);
+        BIO_puts(bio_err, "\n");
+
+        print_chain_flags(bio_err, ssl, rv);
+        if (rv & CERT_PKEY_VALID) {
+            SSL_use_certificate(ssl, exc->cert);
+            SSL_use_PrivateKey(ssl, exc->key);
+            /*
+             * NB: we wouldn't normally do this as it is not efficient
+             * building chains on each connection better to cache the chain
+             * in advance.
+             */
+            if (exc->build_chain) {
+                if (!SSL_build_cert_chain(ssl, 0))
+                    return 0;
+            } else if (exc->chain)
+                SSL_set1_chain(ssl, exc->chain);
+        }
+        exc = exc->prev;
+    }
+    return 1;
+}
+
+void ssl_ctx_set_excert(SSL_CTX *ctx, SSL_EXCERT *exc)
+{
+    SSL_CTX_set_cert_cb(ctx, set_cert_cb, exc);
+}
+
+static int ssl_excert_prepend(SSL_EXCERT **pexc)
+{
+    SSL_EXCERT *exc;
+    exc = OPENSSL_malloc(sizeof(SSL_EXCERT));
+    if (!exc)
+        return 0;
+    exc->certfile = NULL;
+    exc->keyfile = NULL;
+    exc->chainfile = NULL;
+    exc->cert = NULL;
+    exc->key = NULL;
+    exc->chain = NULL;
+    exc->prev = NULL;
+    exc->build_chain = 0;
+
+    exc->next = *pexc;
+    *pexc = exc;
+
+    if (exc->next) {
+        exc->certform = exc->next->certform;
+        exc->keyform = exc->next->keyform;
+        exc->next->prev = exc;
+    } else {
+        exc->certform = FORMAT_PEM;
+        exc->keyform = FORMAT_PEM;
+    }
+    return 1;
+
+}
+
+void ssl_excert_free(SSL_EXCERT *exc)
+{
+    SSL_EXCERT *curr;
+    while (exc) {
+        if (exc->cert)
+            X509_free(exc->cert);
+        if (exc->key)
+            EVP_PKEY_free(exc->key);
+        if (exc->chain)
+            sk_X509_pop_free(exc->chain, X509_free);
+        curr = exc;
+        exc = exc->next;
+        OPENSSL_free(curr);
+    }
+}
+
+int load_excert(SSL_EXCERT **pexc, BIO *err)
+{
+    SSL_EXCERT *exc = *pexc;
+    if (!exc)
+        return 1;
+    /* If nothing in list, free and set to NULL */
+    if (!exc->certfile && !exc->next) {
+        ssl_excert_free(exc);
+        *pexc = NULL;
+        return 1;
+    }
+    for (; exc; exc = exc->next) {
+        if (!exc->certfile) {
+            BIO_printf(err, "Missing filename\n");
+            return 0;
+        }
+        exc->cert = load_cert(err, exc->certfile, exc->certform,
+                              NULL, NULL, "Server Certificate");
+        if (!exc->cert)
+            return 0;
+        if (exc->keyfile) {
+            exc->key = load_key(err, exc->keyfile, exc->keyform,
+                                0, NULL, NULL, "Server Key");
+        } else {
+            exc->key = load_key(err, exc->certfile, exc->certform,
+                                0, NULL, NULL, "Server Key");
+        }
+        if (!exc->key)
+            return 0;
+        if (exc->chainfile) {
+            exc->chain = load_certs(err,
+                                    exc->chainfile, FORMAT_PEM,
+                                    NULL, NULL, "Server Chain");
+            if (!exc->chain)
+                return 0;
+        }
+    }
+    return 1;
+}
+
+int args_excert(char ***pargs, int *pargc,
+                int *badarg, BIO *err, SSL_EXCERT **pexc)
+{
+    char *arg = **pargs, *argn = (*pargs)[1];
+    SSL_EXCERT *exc = *pexc;
+    int narg = 2;
+    if (!exc) {
+        if (ssl_excert_prepend(&exc))
+            *pexc = exc;
+        else {
+            BIO_printf(err, "Error initialising xcert\n");
+            *badarg = 1;
+            goto err;
+        }
+    }
+    if (strcmp(arg, "-xcert") == 0) {
+        if (!argn) {
+            *badarg = 1;
+            return 1;
+        }
+        if (exc->certfile && !ssl_excert_prepend(&exc)) {
+            BIO_printf(err, "Error adding xcert\n");
+            *badarg = 1;
+            goto err;
+        }
+        exc->certfile = argn;
+    } else if (strcmp(arg, "-xkey") == 0) {
+        if (!argn) {
+            *badarg = 1;
+            return 1;
+        }
+        if (exc->keyfile) {
+            BIO_printf(err, "Key already specified\n");
+            *badarg = 1;
+            return 1;
+        }
+        exc->keyfile = argn;
+    } else if (strcmp(arg, "-xchain") == 0) {
+        if (!argn) {
+            *badarg = 1;
+            return 1;
+        }
+        if (exc->chainfile) {
+            BIO_printf(err, "Chain already specified\n");
+            *badarg = 1;
+            return 1;
+        }
+        exc->chainfile = argn;
+    } else if (strcmp(arg, "-xchain_build") == 0) {
+        narg = 1;
+        exc->build_chain = 1;
+    } else if (strcmp(arg, "-xcertform") == 0) {
+        if (!argn) {
+            *badarg = 1;
+            goto err;
+        }
+        exc->certform = str2fmt(argn);
+    } else if (strcmp(arg, "-xkeyform") == 0) {
+        if (!argn) {
+            *badarg = 1;
+            goto err;
+        }
+        exc->keyform = str2fmt(argn);
+    } else
+        return 0;
+
+    (*pargs) += narg;
+
+    if (pargc)
+        *pargc -= narg;
+
+    *pexc = exc;
+
+    return 1;
+
+ err:
+    ERR_print_errors(err);
+    ssl_excert_free(exc);
+    *pexc = NULL;
+    return 1;
+}
+
+static void print_raw_cipherlist(BIO *bio, SSL *s)
+{
+    const unsigned char *rlist;
+    static const unsigned char scsv_id[] = { 0, 0, 0xFF };
+    size_t i, rlistlen, num;
+    if (!SSL_is_server(s))
+        return;
+    num = SSL_get0_raw_cipherlist(s, NULL);
+    rlistlen = SSL_get0_raw_cipherlist(s, &rlist);
+    BIO_puts(bio, "Client cipher list: ");
+    for (i = 0; i < rlistlen; i += num, rlist += num) {
+        const SSL_CIPHER *c = SSL_CIPHER_find(s, rlist);
+        if (i)
+            BIO_puts(bio, ":");
+        if (c)
+            BIO_puts(bio, SSL_CIPHER_get_name(c));
+        else if (!memcmp(rlist, scsv_id - num + 3, num))
+            BIO_puts(bio, "SCSV");
+        else {
+            size_t j;
+            BIO_puts(bio, "0x");
+            for (j = 0; j < num; j++)
+                BIO_printf(bio, "%02X", rlist[j]);
+        }
+    }
+    BIO_puts(bio, "\n");
+}
+
+void print_ssl_summary(BIO *bio, SSL *s)
+{
+    const SSL_CIPHER *c;
+    X509 *peer;
+    /*
+     * const char *pnam = SSL_is_server(s) ? "client" : "server";
+     */
+    BIO_printf(bio, "Protocol version: %s\n", SSL_get_version(s));
+    print_raw_cipherlist(bio, s);
+    c = SSL_get_current_cipher(s);
+    BIO_printf(bio, "Ciphersuite: %s\n", SSL_CIPHER_get_name(c));
+    do_print_sigalgs(bio, s, 0);
+    peer = SSL_get_peer_certificate(s);
+    if (peer) {
+        int nid;
+        BIO_puts(bio, "Peer certificate: ");
+        X509_NAME_print_ex(bio, X509_get_subject_name(peer),
+                           0, XN_FLAG_ONELINE);
+        BIO_puts(bio, "\n");
+        if (SSL_get_peer_signature_nid(s, &nid))
+            BIO_printf(bio, "Hash used: %s\n", OBJ_nid2sn(nid));
+    } else
+        BIO_puts(bio, "No peer certificate\n");
+    if (peer)
+        X509_free(peer);
+#ifndef OPENSSL_NO_EC
+    ssl_print_point_formats(bio, s);
+    if (SSL_is_server(s))
+        ssl_print_curves(bio, s, 1);
+    else
+        ssl_print_tmp_key(bio, s);
+#else
+    if (!SSL_is_server(s))
+        ssl_print_tmp_key(bio, s);
+#endif
+}
+
+int args_ssl(char ***pargs, int *pargc, SSL_CONF_CTX *cctx,
+             int *badarg, BIO *err, STACK_OF(OPENSSL_STRING) **pstr)
+{
+    char *arg = **pargs, *argn = (*pargs)[1];
+    int rv;
+
+    /* Attempt to run SSL configuration command */
+    rv = SSL_CONF_cmd_argv(cctx, pargc, pargs);
+    /* If parameter not recognised just return */
+    if (rv == 0)
+        return 0;
+    /* see if missing argument error */
+    if (rv == -3) {
+        BIO_printf(err, "%s needs an argument\n", arg);
+        *badarg = 1;
+        goto end;
+    }
+    /* Check for some other error */
+    if (rv < 0) {
+        BIO_printf(err, "Error with command: \"%s %s\"\n",
+                   arg, argn ? argn : "");
+        *badarg = 1;
+        goto end;
+    }
+    /* Store command and argument */
+    /* If only one argument processed store value as NULL */
+    if (rv == 1)
+        argn = NULL;
+    if (!*pstr)
+        *pstr = sk_OPENSSL_STRING_new_null();
+    if (!*pstr || !sk_OPENSSL_STRING_push(*pstr, arg) ||
+        !sk_OPENSSL_STRING_push(*pstr, argn)) {
+        BIO_puts(err, "Memory allocation failure\n");
+        goto end;
+    }
+
+ end:
+    if (*badarg)
+        ERR_print_errors(err);
+
+    return 1;
+}
+
+int args_ssl_call(SSL_CTX *ctx, BIO *err, SSL_CONF_CTX *cctx,
+                  STACK_OF(OPENSSL_STRING) *str, int no_ecdhe, int no_jpake)
+{
+    int i;
+    SSL_CONF_CTX_set_ssl_ctx(cctx, ctx);
+    for (i = 0; i < sk_OPENSSL_STRING_num(str); i += 2) {
+        const char *param = sk_OPENSSL_STRING_value(str, i);
+        const char *value = sk_OPENSSL_STRING_value(str, i + 1);
+        /*
+         * If no_ecdhe or named curve already specified don't need a default.
+         */
+        if (!no_ecdhe && !strcmp(param, "-named_curve"))
+            no_ecdhe = 1;
+#ifndef OPENSSL_NO_JPAKE
+        if (!no_jpake && !strcmp(param, "-cipher")) {
+            BIO_puts(err, "JPAKE sets cipher to PSK\n");
+            return 0;
+        }
+#endif
+        if (SSL_CONF_cmd(cctx, param, value) <= 0) {
+            BIO_printf(err, "Error with command: \"%s %s\"\n",
+                       param, value ? value : "");
+            ERR_print_errors(err);
+            return 0;
+        }
+    }
+    /*
+     * This is a special case to keep existing s_server functionality: if we
+     * don't have any curve specified *and* we haven't disabled ECDHE then
+     * use P-256.
+     */
+    if (!no_ecdhe) {
+        if (SSL_CONF_cmd(cctx, "-named_curve", "P-256") <= 0) {
+            BIO_puts(err, "Error setting EC curve\n");
+            ERR_print_errors(err);
+            return 0;
+        }
+    }
+#ifndef OPENSSL_NO_JPAKE
+    if (!no_jpake) {
+        if (SSL_CONF_cmd(cctx, "-cipher", "PSK") <= 0) {
+            BIO_puts(err, "Error setting cipher to PSK\n");
+            ERR_print_errors(err);
+            return 0;
+        }
+    }
+#endif
+    if (!SSL_CONF_CTX_finish(cctx)) {
+        BIO_puts(err, "Error finishing context\n");
+        ERR_print_errors(err);
+        return 0;
+    }
+    return 1;
+}
+
+static int add_crls_store(X509_STORE *st, STACK_OF(X509_CRL) *crls)
+{
+    X509_CRL *crl;
+    int i;
+    for (i = 0; i < sk_X509_CRL_num(crls); i++) {
+        crl = sk_X509_CRL_value(crls, i);
+        X509_STORE_add_crl(st, crl);
+    }
+    return 1;
+}
+
+int ssl_ctx_add_crls(SSL_CTX *ctx, STACK_OF(X509_CRL) *crls, int crl_download)
+{
+    X509_STORE *st;
+    st = SSL_CTX_get_cert_store(ctx);
+    add_crls_store(st, crls);
+    if (crl_download)
+        store_setup_crl_download(st);
+    return 1;
+}
+
+int ssl_load_stores(SSL_CTX *ctx,
+                    const char *vfyCApath, const char *vfyCAfile,
+                    const char *chCApath, const char *chCAfile,
+                    STACK_OF(X509_CRL) *crls, int crl_download)
+{
+    X509_STORE *vfy = NULL, *ch = NULL;
+    int rv = 0;
+    if (vfyCApath || vfyCAfile) {
+        vfy = X509_STORE_new();
+        if (!X509_STORE_load_locations(vfy, vfyCAfile, vfyCApath))
+            goto err;
+        add_crls_store(vfy, crls);
+        SSL_CTX_set1_verify_cert_store(ctx, vfy);
+        if (crl_download)
+            store_setup_crl_download(vfy);
+    }
+    if (chCApath || chCAfile) {
+        ch = X509_STORE_new();
+        if (!X509_STORE_load_locations(ch, chCAfile, chCApath))
+            goto err;
+        SSL_CTX_set1_chain_cert_store(ctx, ch);
+    }
+    rv = 1;
+ err:
+    if (vfy)
+        X509_STORE_free(vfy);
+    if (ch)
+        X509_STORE_free(ch);
+    return rv;
+}
index 28737b6..e55f2c5 100644 (file)
@@ -202,6 +202,7 @@ typedef unsigned int u_int;
 extern int verify_depth;
 extern int verify_error;
 extern int verify_return_error;
+extern int verify_quiet;
 
 #ifdef FIONBIO
 static int c_nbio = 0;
@@ -224,8 +225,10 @@ static void print_stuff(BIO *berr, SSL *con, int full);
 static int ocsp_resp_cb(SSL *s, void *arg);
 #endif
 static BIO *bio_c_out = NULL;
+static BIO *bio_c_msg = NULL;
 static int c_quiet = 0;
 static int c_ign_eof = 0;
+static int c_brief = 0;
 
 #ifndef OPENSSL_NO_PSK
 /* Default PSK identity and key */
@@ -304,6 +307,12 @@ static void sc_usage(void)
     BIO_printf(bio_err,
                " -connect host:port - who to connect to (default is %s:%s)\n",
                SSL_HOST_NAME, PORT_STR);
+    BIO_printf(bio_err,
+               " -verify_host host - check peer certificate matches \"host\"\n");
+    BIO_printf(bio_err,
+               " -verify_email email - check peer certificate matches \"email\"\n");
+    BIO_printf(bio_err,
+               " -verify_ip ipaddr - check peer certificate matches \"ipaddr\"\n");
 
     BIO_printf(bio_err,
                " -verify arg   - turn on peer certificate verification\n");
@@ -413,12 +422,16 @@ static void sc_usage(void)
                " -status           - request certificate status from server\n");
     BIO_printf(bio_err,
                " -no_ticket        - disable use of RFC4507bis session tickets\n");
-# ifndef OPENSSL_NO_NEXTPROTONEG
+    BIO_printf(bio_err,
+               " -serverinfo types - send empty ClientHello extensions (comma-separated numbers)\n");
+#endif
+#ifndef OPENSSL_NO_NEXTPROTONEG
     BIO_printf(bio_err,
                " -nextprotoneg arg - enable NPN extension, considering named protocols supported (comma-separated list)\n");
-# endif
 #endif
     BIO_printf(bio_err,
+               " -alpn arg         - enable ALPN extension, considering named protocols supported (comma-separated list)\n");
+    BIO_printf(bio_err,
                " -legacy_renegotiation - enable use of legacy renegotiation (dangerous)\n");
 #ifndef OPENSSL_NO_SRTP
     BIO_printf(bio_err,
@@ -605,6 +618,27 @@ static int next_proto_cb(SSL *s, unsigned char **out, unsigned char *outlen,
     return SSL_TLSEXT_ERR_OK;
 }
 # endif                         /* ndef OPENSSL_NO_NEXTPROTONEG */
+
+static int serverinfo_cli_parse_cb(SSL *s, unsigned int ext_type,
+                                   const unsigned char *in, size_t inlen,
+                                   int *al, void *arg)
+{
+    char pem_name[100];
+    unsigned char ext_buf[4 + 65536];
+
+    /* Reconstruct the type/len fields prior to extension data */
+    ext_buf[0] = ext_type >> 8;
+    ext_buf[1] = ext_type & 0xFF;
+    ext_buf[2] = inlen >> 8;
+    ext_buf[3] = inlen & 0xFF;
+    memcpy(ext_buf + 4, in, inlen);
+
+    BIO_snprintf(pem_name, sizeof(pem_name), "SERVERINFO FOR EXTENSION %d",
+                 ext_type);
+    PEM_write_bio(bio_c_out, pem_name, "", ext_buf, 4 + inlen);
+    return 1;
+}
+
 #endif
 
 enum {
@@ -620,7 +654,7 @@ int MAIN(int, char **);
 
 int MAIN(int argc, char **argv)
 {
-    unsigned int off = 0, clr = 0;
+    int build_chain = 0;
     SSL *con = NULL;
 #ifndef OPENSSL_NO_KRB5
     KSSL_CTX *kctx;
@@ -633,13 +667,16 @@ int MAIN(int argc, char **argv)
     short port = PORT;
     int full_log = 1;
     char *host = SSL_HOST_NAME;
-    char *cert_file = NULL, *key_file = NULL;
+    char *cert_file = NULL, *key_file = NULL, *chain_file = NULL;
     int cert_format = FORMAT_PEM, key_format = FORMAT_PEM;
     char *passarg = NULL, *pass = NULL;
     X509 *cert = NULL;
     EVP_PKEY *key = NULL;
-    char *CApath = NULL, *CAfile = NULL, *cipher = NULL;
-    int reconnect = 0, badop = 0, verify = SSL_VERIFY_NONE, bugs = 0;
+    STACK_OF(X509) *chain = NULL;
+    char *CApath = NULL, *CAfile = NULL;
+    char *chCApath = NULL, *chCAfile = NULL;
+    char *vfyCApath = NULL, *vfyCAfile = NULL;
+    int reconnect = 0, badop = 0, verify = SSL_VERIFY_NONE;
     int crlf = 0;
     int write_tty, read_tty, write_ssl, read_ssl, tty_on, ssl_pending;
     SSL_CTX *ctx = NULL;
@@ -672,6 +709,10 @@ int MAIN(int argc, char **argv)
 # ifndef OPENSSL_NO_NEXTPROTONEG
     const char *next_proto_neg_in = NULL;
 # endif
+    const char *alpn_in = NULL;
+# define MAX_SI_TYPES 100
+    unsigned short serverinfo_types[MAX_SI_TYPES];
+    int serverinfo_types_count = 0;
 #endif
     char *sess_in = NULL;
     char *sess_out = NULL;
@@ -681,13 +722,25 @@ int MAIN(int argc, char **argv)
     int enable_timeouts = 0;
     long socket_mtu = 0;
 #ifndef OPENSSL_NO_JPAKE
-    char *jpake_secret = NULL;
+    static char *jpake_secret = NULL;
+# define no_jpake !jpake_secret
+#else
+# define no_jpake 1
 #endif
 #ifndef OPENSSL_NO_SRP
     char *srppass = NULL;
     int srp_lateuser = 0;
     SRP_ARG srp_arg = { NULL, NULL, 0, 0, 0, 1024 };
 #endif
+    SSL_EXCERT *exc = NULL;
+
+    SSL_CONF_CTX *cctx = NULL;
+    STACK_OF(OPENSSL_STRING) *ssl_args = NULL;
+
+    char *crl_file = NULL;
+    int crl_format = FORMAT_PEM;
+    int crl_download = 0;
+    STACK_OF(X509_CRL) *crls = NULL;
 
     meth = SSLv23_client_method();
 
@@ -705,6 +758,12 @@ int MAIN(int argc, char **argv)
     if (!load_config(bio_err, NULL))
         goto end;
 
+    cctx = SSL_CONF_CTX_new();
+    if (!cctx)
+        goto end;
+    SSL_CONF_CTX_set_flags(cctx, SSL_CONF_FLAG_CLIENT);
+    SSL_CONF_CTX_set_flags(cctx, SSL_CONF_FLAG_CMDLINE);
+
     if (((cbuf = OPENSSL_malloc(BUFSIZZ)) == NULL) ||
         ((sbuf = OPENSSL_malloc(BUFSIZZ)) == NULL) ||
         ((mbuf = OPENSSL_malloc(BUFSIZZ)) == NULL)) {
@@ -741,12 +800,19 @@ int MAIN(int argc, char **argv)
             if (--argc < 1)
                 goto bad;
             verify_depth = atoi(*(++argv));
-            BIO_printf(bio_err, "verify depth is %d\n", verify_depth);
+            if (!c_quiet)
+                BIO_printf(bio_err, "verify depth is %d\n", verify_depth);
         } else if (strcmp(*argv, "-cert") == 0) {
             if (--argc < 1)
                 goto bad;
             cert_file = *(++argv);
-        } else if (strcmp(*argv, "-sess_out") == 0) {
+        } else if (strcmp(*argv, "-CRL") == 0) {
+            if (--argc < 1)
+                goto bad;
+            crl_file = *(++argv);
+        } else if (strcmp(*argv, "-crl_download") == 0)
+            crl_download = 1;
+        else if (strcmp(*argv, "-sess_out") == 0) {
             if (--argc < 1)
                 goto bad;
             sess_out = *(++argv);
@@ -758,13 +824,31 @@ int MAIN(int argc, char **argv)
             if (--argc < 1)
                 goto bad;
             cert_format = str2fmt(*(++argv));
+        } else if (strcmp(*argv, "-CRLform") == 0) {
+            if (--argc < 1)
+                goto bad;
+            crl_format = str2fmt(*(++argv));
         } else if (args_verify(&argv, &argc, &badarg, bio_err, &vpm)) {
             if (badarg)
                 goto bad;
             continue;
         } else if (strcmp(*argv, "-verify_return_error") == 0)
             verify_return_error = 1;
-        else if (strcmp(*argv, "-prexit") == 0)
+        else if (strcmp(*argv, "-verify_quiet") == 0)
+            verify_quiet = 1;
+        else if (strcmp(*argv, "-brief") == 0) {
+            c_brief = 1;
+            verify_quiet = 1;
+            c_quiet = 1;
+        } else if (args_excert(&argv, &argc, &badarg, bio_err, &exc)) {
+            if (badarg)
+                goto bad;
+            continue;
+        } else if (args_ssl(&argv, &argc, cctx, &badarg, bio_err, &ssl_args)) {
+            if (badarg)
+                goto bad;
+            continue;
+        } else if (strcmp(*argv, "-prexit") == 0)
             prexit = 1;
         else if (strcmp(*argv, "-crlf") == 0)
             crlf = 1;
@@ -791,6 +875,15 @@ int MAIN(int argc, char **argv)
 #endif
         else if (strcmp(*argv, "-msg") == 0)
             c_msg = 1;
+        else if (strcmp(*argv, "-msgfile") == 0) {
+            if (--argc < 1)
+                goto bad;
+            bio_c_msg = BIO_new_file(*(++argv), "w");
+        }
+#ifndef OPENSSL_NO_SSL_TRACE
+        else if (strcmp(*argv, "-trace") == 0)
+            c_msg = 2;
+#endif
         else if (strcmp(*argv, "-showcerts") == 0)
             c_showcerts = 1;
         else if (strcmp(*argv, "-nbio_test") == 0)
@@ -859,11 +952,15 @@ int MAIN(int argc, char **argv)
             meth = TLSv1_client_method();
 #endif
 #ifndef OPENSSL_NO_DTLS1
-        else if (strcmp(*argv, "-dtls1") == 0) {
+        else if (strcmp(*argv, "-dtls") == 0) {
+            meth = DTLS_client_method();
+            socket_type = SOCK_DGRAM;
+        } else if (strcmp(*argv, "-dtls1") == 0) {
             meth = DTLSv1_client_method();
             socket_type = SOCK_DGRAM;
-        } else if (strcmp(*argv, "-fallback_scsv") == 0) {
-            fallback_scsv = 1;
+        } else if (strcmp(*argv, "-dtls1_2") == 0) {
+            meth = DTLSv1_2_client_method();
+            socket_type = SOCK_DGRAM;
         } else if (strcmp(*argv, "-timeout") == 0)
             enable_timeouts = 1;
         else if (strcmp(*argv, "-mtu") == 0) {
@@ -872,9 +969,9 @@ int MAIN(int argc, char **argv)
             socket_mtu = atol(*(++argv));
         }
 #endif
-        else if (strcmp(*argv, "-bugs") == 0)
-            bugs = 1;
-        else if (strcmp(*argv, "-keyform") == 0) {
+        else if (strcmp(*argv, "-fallback_scsv") == 0) {
+            fallback_scsv = 1;
+        else if (strcmp(*argv, "-keyform") == 0) {
             if (--argc < 1)
                 goto bad;
             key_format = str2fmt(*(++argv));
@@ -882,6 +979,10 @@ int MAIN(int argc, char **argv)
             if (--argc < 1)
                 goto bad;
             passarg = *(++argv);
+        } else if (strcmp(*argv, "-cert_chain") == 0) {
+            if (--argc < 1)
+                goto bad;
+            chain_file = *(++argv);
         } else if (strcmp(*argv, "-key") == 0) {
             if (--argc < 1)
                 goto bad;
@@ -892,27 +993,30 @@ int MAIN(int argc, char **argv)
             if (--argc < 1)
                 goto bad;
             CApath = *(++argv);
-        } else if (strcmp(*argv, "-CAfile") == 0) {
+        } else if (strcmp(*argv, "-chainCApath") == 0) {
+            if (--argc < 1)
+                goto bad;
+            chCApath = *(++argv);
+        } else if (strcmp(*argv, "-verifyCApath") == 0) {
+            if (--argc < 1)
+                goto bad;
+            vfyCApath = *(++argv);
+        } else if (strcmp(*argv, "-build_chain") == 0)
+            build_chain = 1;
+        else if (strcmp(*argv, "-CAfile") == 0) {
             if (--argc < 1)
                 goto bad;
             CAfile = *(++argv);
-        } else if (strcmp(*argv, "-no_tls1_2") == 0)
-            off |= SSL_OP_NO_TLSv1_2;
-        else if (strcmp(*argv, "-no_tls1_1") == 0)
-            off |= SSL_OP_NO_TLSv1_1;
-        else if (strcmp(*argv, "-no_tls1") == 0)
-            off |= SSL_OP_NO_TLSv1;
-        else if (strcmp(*argv, "-no_ssl3") == 0)
-            off |= SSL_OP_NO_SSLv3;
-        else if (strcmp(*argv, "-no_ssl2") == 0)
-            off |= SSL_OP_NO_SSLv2;
-        else if (strcmp(*argv, "-no_comp") == 0) {
-            off |= SSL_OP_NO_COMPRESSION;
+        } else if (strcmp(*argv, "-chainCAfile") == 0) {
+            if (--argc < 1)
+                goto bad;
+            chCAfile = *(++argv);
+        } else if (strcmp(*argv, "-verifyCAfile") == 0) {
+            if (--argc < 1)
+                goto bad;
+            vfyCAfile = *(++argv);
         }
 #ifndef OPENSSL_NO_TLSEXT
-        else if (strcmp(*argv, "-no_ticket") == 0) {
-            off |= SSL_OP_NO_TICKET;
-        }
 # ifndef OPENSSL_NO_NEXTPROTONEG
         else if (strcmp(*argv, "-nextprotoneg") == 0) {
             if (--argc < 1)
@@ -920,20 +1024,32 @@ int MAIN(int argc, char **argv)
             next_proto_neg_in = *(++argv);
         }
 # endif
-#endif
-        else if (strcmp(*argv, "-serverpref") == 0)
-            off |= SSL_OP_CIPHER_SERVER_PREFERENCE;
-        else if (strcmp(*argv, "-legacy_renegotiation") == 0)
-            off |= SSL_OP_ALLOW_UNSAFE_LEGACY_RENEGOTIATION;
-        else if (strcmp(*argv, "-legacy_server_connect") == 0) {
-            off |= SSL_OP_LEGACY_SERVER_CONNECT;
-        } else if (strcmp(*argv, "-no_legacy_server_connect") == 0) {
-            clr |= SSL_OP_LEGACY_SERVER_CONNECT;
-        } else if (strcmp(*argv, "-cipher") == 0) {
+        else if (strcmp(*argv, "-alpn") == 0) {
+            if (--argc < 1)
+                goto bad;
+            alpn_in = *(++argv);
+        } else if (strcmp(*argv, "-serverinfo") == 0) {
+            char *c;
+            int start = 0;
+            int len;
+
             if (--argc < 1)
                 goto bad;
-            cipher = *(++argv);
+            c = *(++argv);
+            serverinfo_types_count = 0;
+            len = strlen(c);
+            for (i = 0; i <= len; ++i) {
+                if (i == len || c[i] == ',') {
+                    serverinfo_types[serverinfo_types_count]
+                        = atoi(c + start);
+                    serverinfo_types_count++;
+                    start = i + 1;
+                }
+                if (serverinfo_types_count == MAX_SI_TYPES)
+                    break;
+            }
         }
+#endif
 #ifdef FIONBIO
         else if (strcmp(*argv, "-nbio") == 0) {
             c_nbio = 1;
@@ -1024,11 +1140,6 @@ int MAIN(int argc, char **argv)
             goto end;
         }
         psk_identity = "JPAKE";
-        if (cipher) {
-            BIO_printf(bio_err, "JPAKE sets cipher to PSK\n");
-            goto end;
-        }
-        cipher = "PSK";
     }
 #endif
 
@@ -1087,6 +1198,33 @@ int MAIN(int argc, char **argv)
         }
     }
 
+    if (chain_file) {
+        chain = load_certs(bio_err, chain_file, FORMAT_PEM,
+                           NULL, e, "client certificate chain");
+        if (!chain)
+            goto end;
+    }
+
+    if (crl_file) {
+        X509_CRL *crl;
+        crl = load_crl(crl_file, crl_format);
+        if (!crl) {
+            BIO_puts(bio_err, "Error loading CRL\n");
+            ERR_print_errors(bio_err);
+            goto end;
+        }
+        crls = sk_X509_CRL_new_null();
+        if (!crls || !sk_X509_CRL_push(crls, crl)) {
+            BIO_puts(bio_err, "Error adding CRL\n");
+            ERR_print_errors(bio_err);
+            X509_CRL_free(crl);
+            goto end;
+        }
+    }
+
+    if (!load_excert(&exc, bio_err))
+        goto end;
+
     if (!app_RAND_load_file(NULL, bio_err, 1) && inrand == NULL
         && !RAND_status()) {
         BIO_printf(bio_err,
@@ -1097,8 +1235,10 @@ int MAIN(int argc, char **argv)
                    app_RAND_load_files(inrand));
 
     if (bio_c_out == NULL) {
-        if (c_quiet && !c_debug && !c_msg) {
+        if (c_quiet && !c_debug) {
             bio_c_out = BIO_new(BIO_s_null());
+            if (c_msg && !bio_c_msg)
+                bio_c_msg = BIO_new_fp(stdout, BIO_NOCLOSE);
         } else {
             if (bio_c_out == NULL)
                 bio_c_out = BIO_new_fp(stdout, BIO_NOCLOSE);
@@ -1120,6 +1260,17 @@ int MAIN(int argc, char **argv)
     if (vpm)
         SSL_CTX_set1_param(ctx, vpm);
 
+    if (!args_ssl_call(ctx, bio_err, cctx, ssl_args, 1, no_jpake)) {
+        ERR_print_errors(bio_err);
+        goto end;
+    }
+
+    if (!ssl_load_stores(ctx, vfyCApath, vfyCAfile, chCApath, chCAfile,
+                         crls, crl_download)) {
+        BIO_printf(bio_err, "Error loading store locations\n");
+        ERR_print_errors(bio_err);
+        goto end;
+    }
 #ifndef OPENSSL_NO_ENGINE
     if (ssl_client_engine) {
         if (!SSL_CTX_set_client_cert_engine(ctx, ssl_client_engine)) {
@@ -1149,35 +1300,43 @@ int MAIN(int argc, char **argv)
     if (srtp_profiles != NULL)
         SSL_CTX_set_tlsext_use_srtp(ctx, srtp_profiles);
 #endif
-    if (bugs)
-        SSL_CTX_set_options(ctx, SSL_OP_ALL | off);
-    else
-        SSL_CTX_set_options(ctx, off);
+    if (exc)
+        ssl_ctx_set_excert(ctx, exc);
 
-    if (clr)
-        SSL_CTX_clear_options(ctx, clr);
-
-#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG)
+#if !defined(OPENSSL_NO_TLSEXT)
+# if !defined(OPENSSL_NO_NEXTPROTONEG)
     if (next_proto.data)
         SSL_CTX_set_next_proto_select_cb(ctx, next_proto_cb, &next_proto);
+# endif
+    if (alpn_in) {
+        unsigned short alpn_len;
+        unsigned char *alpn = next_protos_parse(&alpn_len, alpn_in);
+
+        if (alpn == NULL) {
+            BIO_printf(bio_err, "Error parsing -alpn argument\n");
+            goto end;
+        }
+        SSL_CTX_set_alpn_protos(ctx, alpn, alpn_len);
+        OPENSSL_free(alpn);
+    }
+#endif
+#ifndef OPENSSL_NO_TLSEXT
+    for (i = 0; i < serverinfo_types_count; i++) {
+        SSL_CTX_add_client_custom_ext(ctx,
+                                      serverinfo_types[i],
+                                      NULL, NULL, NULL,
+                                      serverinfo_cli_parse_cb, NULL);
+    }
 #endif
 
     if (state)
         SSL_CTX_set_info_callback(ctx, apps_ssl_info_callback);
-    if (cipher != NULL)
-        if (!SSL_CTX_set_cipher_list(ctx, cipher)) {
-            BIO_printf(bio_err, "error setting cipher list\n");
-            ERR_print_errors(bio_err);
-            goto end;
-        }
 #if 0
-        else
-            SSL_CTX_set_cipher_list(ctx, getenv("SSL_CIPHER"));
+    else
+        SSL_CTX_set_cipher_list(ctx, getenv("SSL_CIPHER"));
 #endif
 
     SSL_CTX_set_verify(ctx, verify, verify_callback);
-    if (!set_cert_key_stuff(ctx, cert, key))
-        goto end;
 
     if ((CAfile || CApath)
         && !SSL_CTX_load_verify_locations(ctx, CAfile, CApath)) {
@@ -1186,6 +1345,11 @@ int MAIN(int argc, char **argv)
     if (!SSL_CTX_set_default_verify_paths(ctx)) {
         ERR_print_errors(bio_err);
     }
+
+    ssl_ctx_add_crls(ctx, crls, crl_download);
+    if (!set_cert_key_stuff(ctx, cert, key, chain, build_chain))
+        goto end;
+
 #ifndef OPENSSL_NO_TLSEXT
     if (servername != NULL) {
         tlsextcbp.biodebug = bio_err;
@@ -1277,7 +1441,7 @@ int MAIN(int argc, char **argv)
     if (c_Pause & 0x01)
         SSL_set_debug(con, 1);
 
-    if (SSL_version(con) == DTLS1_VERSION) {
+    if (socket_type == SOCK_DGRAM) {
 
         sbio = BIO_new_dgram(s, BIO_NOCLOSE);
         if (getsockname(s, &peer, (void *)&peerlen) < 0) {
@@ -1331,8 +1495,13 @@ int MAIN(int argc, char **argv)
         BIO_set_callback_arg(sbio, (char *)bio_c_out);
     }
     if (c_msg) {
-        SSL_set_msg_callback(con, msg_cb);
-        SSL_set_msg_callback_arg(con, bio_c_out);
+#ifndef OPENSSL_NO_SSL_TRACE
+        if (c_msg == 2)
+            SSL_set_msg_callback(con, SSL_trace);
+        else
+#endif
+            SSL_set_msg_callback(con, msg_cb);
+        SSL_set_msg_callback_arg(con, bio_c_msg ? bio_c_msg : bio_c_out);
     }
 #ifndef OPENSSL_NO_TLSEXT
     if (c_tlsextdebug) {
@@ -1515,6 +1684,11 @@ int MAIN(int argc, char **argv)
                         BIO_printf(bio_err, "Error writing session file %s\n",
                                    sess_out);
                 }
+                if (c_brief) {
+                    BIO_puts(bio_err, "CONNECTION ESTABLISHED\n");
+                    print_ssl_summary(bio_err, con);
+                }
+
                 print_stuff(bio_c_out, con, full_log);
                 if (full_log > 0)
                     full_log--;
@@ -1780,7 +1954,10 @@ int MAIN(int argc, char **argv)
                 break;
             case SSL_ERROR_SYSCALL:
                 ret = get_last_socket_error();
-                BIO_printf(bio_err, "read:errno=%d\n", ret);
+                if (c_brief)
+                    BIO_puts(bio_err, "CONNECTION CLOSED BY SERVER\n");
+                else
+                    BIO_printf(bio_err, "read:errno=%d\n", ret);
                 goto shut;
             case SSL_ERROR_ZERO_RETURN:
                 BIO_printf(bio_c_out, "closed\n");
@@ -1880,12 +2057,25 @@ int MAIN(int argc, char **argv)
         SSL_CTX_free(ctx);
     if (cert)
         X509_free(cert);
+    if (crls)
+        sk_X509_CRL_pop_free(crls, X509_CRL_free);
     if (key)
         EVP_PKEY_free(key);
+    if (chain)
+        sk_X509_pop_free(chain, X509_free);
     if (pass)
         OPENSSL_free(pass);
     if (vpm)
         X509_VERIFY_PARAM_free(vpm);
+    ssl_excert_free(exc);
+    if (ssl_args)
+        sk_OPENSSL_STRING_free(ssl_args);
+    if (cctx)
+        SSL_CONF_CTX_free(cctx);
+#ifndef OPENSSL_NO_JPAKE
+    if (jpake_secret && psk_key)
+        OPENSSL_free(psk_key);
+#endif
     if (cbuf != NULL) {
         OPENSSL_cleanse(cbuf, BUFSIZZ);
         OPENSSL_free(cbuf);
@@ -1902,6 +2092,10 @@ int MAIN(int argc, char **argv)
         BIO_free(bio_c_out);
         bio_c_out = NULL;
     }
+    if (bio_c_msg != NULL) {
+        BIO_free(bio_c_msg);
+        bio_c_msg = NULL;
+    }
     apps_shutdown();
     OPENSSL_EXIT(ret);
 }
@@ -1995,6 +2189,9 @@ static void print_stuff(BIO *bio, SSL *s, int full)
             BIO_write(bio, "\n", 1);
         }
 
+        ssl_print_sigalgs(bio, s);
+        ssl_print_tmp_key(bio, s);
+
         BIO_printf(bio,
                    "---\nSSL handshake has read %ld bytes and written %ld bytes\n",
                    BIO_number_read(SSL_get_rbio(s)),
@@ -2034,7 +2231,8 @@ static void print_stuff(BIO *bio, SSL *s, int full)
     }
 #endif
 
-#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG)
+#if !defined(OPENSSL_NO_TLSEXT)
+# if !defined(OPENSSL_NO_NEXTPROTONEG)
     if (next_proto.status != -1) {
         const unsigned char *proto;
         unsigned int proto_len;
@@ -2043,6 +2241,18 @@ static void print_stuff(BIO *bio, SSL *s, int full)
         BIO_write(bio, proto, proto_len);
         BIO_write(bio, "\n", 1);
     }
+# endif
+    {
+        const unsigned char *proto;
+        unsigned int proto_len;
+        SSL_get0_alpn_selected(s, &proto, &proto_len);
+        if (proto_len > 0) {
+            BIO_printf(bio, "ALPN protocol: ");
+            BIO_write(bio, proto, proto_len);
+            BIO_write(bio, "\n", 1);
+        } else
+            BIO_printf(bio, "No ALPN negotiated\n");
+    }
 #endif
 
 #ifndef OPENSSL_NO_SRTP
index b58e5e0..acef382 100644 (file)
@@ -209,14 +209,17 @@ typedef unsigned int u_int;
 #ifndef OPENSSL_NO_RSA
 static RSA MS_CALLBACK *tmp_rsa_cb(SSL *s, int is_export, int keylength);
 #endif
-static int sv_body(char *hostname, int s, unsigned char *context);
-static int www_body(char *hostname, int s, unsigned char *context);
+static int sv_body(char *hostname, int s, int stype, unsigned char *context);
+static int www_body(char *hostname, int s, int stype, unsigned char *context);
+static int rev_body(char *hostname, int s, int stype, unsigned char *context);
 static void close_accept_socket(void);
 static void sv_usage(void);
 static int init_ssl_connection(SSL *s);
 static void print_stats(BIO *bp, SSL_CTX *ctx);
 static int generate_session_id(const SSL *ssl, unsigned char *id,
                                unsigned int *id_len);
+static void init_session_cache_ctx(SSL_CTX *sctx);
+static void free_sessions(void);
 #ifndef OPENSSL_NO_DH
 static DH *load_dh_param(const char *dhfile);
 static DH *get_dh2048(void);
@@ -286,16 +289,16 @@ static int accept_socket = -1;
 #undef PROG
 #define PROG            s_server_main
 
-extern int verify_depth, verify_return_error;
+extern int verify_depth, verify_return_error, verify_quiet;
 
-static char *cipher = NULL;
 static int s_server_verify = SSL_VERIFY_NONE;
 static int s_server_session_id_context = 1; /* anything will do */
-static const char *s_cert_file = TEST_CERT, *s_key_file = NULL;
+static const char *s_cert_file = TEST_CERT, *s_key_file =
+    NULL, *s_chain_file = NULL;
 #ifndef OPENSSL_NO_TLSEXT
 static const char *s_cert_file2 = TEST_CERT2, *s_key_file2 = NULL;
 #endif
-static char *s_dcert_file = NULL, *s_dkey_file = NULL;
+static char *s_dcert_file = NULL, *s_dkey_file = NULL, *s_dchain_file = NULL;
 #ifdef FIONBIO
 static int s_nbio = 0;
 #endif
@@ -308,14 +311,18 @@ static SSL_CTX *ctx2 = NULL;
 static int www = 0;
 
 static BIO *bio_s_out = NULL;
+static BIO *bio_s_msg = NULL;
 static int s_debug = 0;
 #ifndef OPENSSL_NO_TLSEXT
 static int s_tlsextdebug = 0;
 static int s_tlsextstatus = 0;
 static int cert_status_cb(SSL *s, void *arg);
 #endif
+static int no_resume_ephemeral = 0;
 static int s_msg = 0;
 static int s_quiet = 0;
+static int s_ign_eof = 0;
+static int s_brief = 0;
 
 static char *keymatexportlabel = NULL;
 static int keymatexportlen = 20;
@@ -332,6 +339,12 @@ static long socket_mtu;
 static int cert_chain = 0;
 #endif
 
+#ifndef OPENSSL_NO_TLSEXT
+static BIO *serverinfo_in = NULL;
+static const char *s_serverinfo_file = NULL;
+
+#endif
+
 #ifndef OPENSSL_NO_PSK
 static char *psk_identity = "Client_identity";
 char *psk_key = NULL;           /* by default PSK is not used */
@@ -447,12 +460,13 @@ static int MS_CALLBACK ssl_srp_server_param_cb(SSL *s, int *ad, void *arg)
 static void s_server_init(void)
 {
     accept_socket = -1;
-    cipher = NULL;
     s_server_verify = SSL_VERIFY_NONE;
     s_dcert_file = NULL;
     s_dkey_file = NULL;
+    s_dchain_file = NULL;
     s_cert_file = TEST_CERT;
     s_key_file = NULL;
+    s_chain_file = NULL;
 # ifndef OPENSSL_NO_TLSEXT
     s_cert_file2 = TEST_CERT2;
     s_key_file2 = NULL;
@@ -469,6 +483,7 @@ static void s_server_init(void)
     s_debug = 0;
     s_msg = 0;
     s_quiet = 0;
+    s_brief = 0;
     hack = 0;
 # ifndef OPENSSL_NO_ENGINE
     engine_id = NULL;
@@ -482,6 +497,12 @@ static void sv_usage(void)
     BIO_printf(bio_err, "\n");
     BIO_printf(bio_err,
                " -accept arg   - port to accept on (default is %d)\n", PORT);
+    BIO_printf(bio_err,
+               " -verify_host host - check peer certificate matches \"host\"\n");
+    BIO_printf(bio_err,
+               " -verify_email email - check peer certificate matches \"email\"\n");
+    BIO_printf(bio_err,
+               " -verify_ip ipaddr - check peer certificate matches \"ipaddr\"\n");
     BIO_printf(bio_err, " -context arg  - set session ID context\n");
     BIO_printf(bio_err,
                " -verify arg   - turn on peer certificate verification\n");
@@ -491,6 +512,16 @@ static void sv_usage(void)
                " -verify_return_error - return verification errors\n");
     BIO_printf(bio_err, " -cert arg     - certificate file to use\n");
     BIO_printf(bio_err, "                 (default is %s)\n", TEST_CERT);
+#ifndef OPENSSL_NO_TLSEXT
+    BIO_printf(bio_err,
+               " -serverinfo arg - PEM serverinfo file for certificate\n");
+    BIO_printf(bio_err,
+               " -auth               - send and receive RFC 5878 TLS auth extensions and supplemental data\n");
+    BIO_printf(bio_err,
+               " -auth_require_reneg - Do not send TLS auth extensions until renegotiation\n");
+#endif
+    BIO_printf(bio_err,
+               " -no_resumption_on_reneg - set SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION flag\n");
     BIO_printf(bio_err,
                " -crl_check    - check the peer certificate has not been revoked by its CA.\n"
                "                 The CRL(s) are appended to the certificate file\n");
@@ -569,6 +600,7 @@ static void sv_usage(void)
     BIO_printf(bio_err, " -tls1_1       - Just talk TLSv1.1\n");
     BIO_printf(bio_err, " -tls1         - Just talk TLSv1\n");
     BIO_printf(bio_err, " -dtls1        - Just talk DTLSv1\n");
+    BIO_printf(bio_err, " -dtls1_2      - Just talk DTLSv1.2\n");
     BIO_printf(bio_err, " -timeout      - Enable timeouts\n");
     BIO_printf(bio_err, " -mtu          - Set link layer MTU\n");
     BIO_printf(bio_err, " -chain        - Read a certificate chain\n");
@@ -628,6 +660,8 @@ static void sv_usage(void)
     BIO_printf(bio_err,
                " -use_srtp profiles - Offer SRTP key management with a colon-separated profile list\n");
 # endif
+    BIO_printf(bio_err,
+               " -alpn arg  - set the advertised protocols for the ALPN extension (comma-separated list)\n");
 #endif
     BIO_printf(bio_err,
                " -keymatexport label   - Export keying material using label\n");
@@ -988,12 +1022,53 @@ static int next_proto_cb(SSL *s, const unsigned char **data,
 }
 # endif                         /* ndef OPENSSL_NO_NEXTPROTONEG */
 
-#endif
+/* This the context that we pass to alpn_cb */
+typedef struct tlsextalpnctx_st {
+    unsigned char *data;
+    unsigned short len;
+} tlsextalpnctx;
+
+static int alpn_cb(SSL *s, const unsigned char **out, unsigned char *outlen,
+                   const unsigned char *in, unsigned int inlen, void *arg)
+{
+    tlsextalpnctx *alpn_ctx = arg;
+
+    if (!s_quiet) {
+        /* We can assume that |in| is syntactically valid. */
+        unsigned i;
+        BIO_printf(bio_s_out, "ALPN protocols advertised by the client: ");
+        for (i = 0; i < inlen;) {
+            if (i)
+                BIO_write(bio_s_out, ", ", 2);
+            BIO_write(bio_s_out, &in[i + 1], in[i]);
+            i += in[i] + 1;
+        }
+        BIO_write(bio_s_out, "\n", 1);
+    }
+
+    if (SSL_select_next_proto
+        ((unsigned char **)out, outlen, alpn_ctx->data, alpn_ctx->len, in,
+         inlen) != OPENSSL_NPN_NEGOTIATED) {
+        return SSL_TLSEXT_ERR_NOACK;
+    }
+
+    if (!s_quiet) {
+        BIO_printf(bio_s_out, "ALPN protocols selected: ");
+        BIO_write(bio_s_out, *out, *outlen);
+        BIO_write(bio_s_out, "\n", 1);
+    }
+
+    return SSL_TLSEXT_ERR_OK;
+}
+#endif                          /* ndef OPENSSL_NO_TLSEXT */
 
 int MAIN(int, char **);
 
 #ifndef OPENSSL_NO_JPAKE
 static char *jpake_secret = NULL;
+# define no_jpake !jpake_secret
+#else
+# define no_jpake 1
 #endif
 #ifndef OPENSSL_NO_SRP
 static srpsrvparm srp_callback_parm;
@@ -1008,18 +1083,14 @@ int MAIN(int argc, char *argv[])
     int badarg = 0;
     short port = PORT;
     char *CApath = NULL, *CAfile = NULL;
+    char *chCApath = NULL, *chCAfile = NULL;
+    char *vfyCApath = NULL, *vfyCAfile = NULL;
     unsigned char *context = NULL;
     char *dhfile = NULL;
-#ifndef OPENSSL_NO_ECDH
-    char *named_curve = NULL;
-#endif
-    int badop = 0, bugs = 0;
+    int badop = 0;
     int ret = 1;
-    int off = 0;
-    int no_tmp_rsa = 0, no_dhe = 0, nocert = 0;
-#ifndef OPENSSL_NO_ECDH
-    int no_ecdhe = 0;
-#endif
+    int build_chain = 0;
+    int no_tmp_rsa = 0, no_dhe = 0, no_ecdhe = 0, nocert = 0;
     int state = 0;
     const SSL_METHOD *meth = NULL;
     int socket_type = SOCK_STREAM;
@@ -1030,16 +1101,20 @@ int MAIN(int argc, char *argv[])
     char *dpassarg = NULL, *dpass = NULL;
     int s_dcert_format = FORMAT_PEM, s_dkey_format = FORMAT_PEM;
     X509 *s_cert = NULL, *s_dcert = NULL;
+    STACK_OF(X509) *s_chain = NULL, *s_dchain = NULL;
     EVP_PKEY *s_key = NULL, *s_dkey = NULL;
-    int no_cache = 0;
+    int no_cache = 0, ext_cache = 0;
+    int rev = 0, naccept = -1;
 #ifndef OPENSSL_NO_TLSEXT
     EVP_PKEY *s_key2 = NULL;
     X509 *s_cert2 = NULL;
     tlsextctx tlsextcbp = { NULL, NULL, SSL_TLSEXT_ERR_ALERT_WARNING };
 # ifndef OPENSSL_NO_NEXTPROTONEG
     const char *next_proto_neg_in = NULL;
-    tlsextnextprotoctx next_proto;
+    tlsextnextprotoctx next_proto = { NULL, 0 };
 # endif
+    const char *alpn_in = NULL;
+    tlsextalpnctx alpn_ctx = { NULL, 0 };
 #endif
 #ifndef OPENSSL_NO_PSK
     /* by default do not send a PSK identity hint */
@@ -1049,6 +1124,15 @@ int MAIN(int argc, char *argv[])
     char *srpuserseed = NULL;
     char *srp_verifier_file = NULL;
 #endif
+    SSL_EXCERT *exc = NULL;
+    SSL_CONF_CTX *cctx = NULL;
+    STACK_OF(OPENSSL_STRING) *ssl_args = NULL;
+
+    char *crl_file = NULL;
+    int crl_format = FORMAT_PEM;
+    int crl_download = 0;
+    STACK_OF(X509_CRL) *crls = NULL;
+
     meth = SSLv23_server_method();
 
     local_argc = argc;
@@ -1065,6 +1149,12 @@ int MAIN(int argc, char *argv[])
     if (!load_config(bio_err, NULL))
         goto end;
 
+    cctx = SSL_CONF_CTX_new();
+    if (!cctx)
+        goto end;
+    SSL_CONF_CTX_set_flags(cctx, SSL_CONF_FLAG_SERVER);
+    SSL_CONF_CTX_set_flags(cctx, SSL_CONF_FLAG_CMDLINE);
+
     verify_depth = 0;
 #ifdef FIONBIO
     s_nbio = 0;
@@ -1080,12 +1170,21 @@ int MAIN(int argc, char *argv[])
                 goto bad;
             if (!extract_port(*(++argv), &port))
                 goto bad;
+        } else if (strcmp(*argv, "-naccept") == 0) {
+            if (--argc < 1)
+                goto bad;
+            naccept = atol(*(++argv));
+            if (naccept <= 0) {
+                BIO_printf(bio_err, "bad accept value %s\n", *argv);
+                goto bad;
+            }
         } else if (strcmp(*argv, "-verify") == 0) {
             s_server_verify = SSL_VERIFY_PEER | SSL_VERIFY_CLIENT_ONCE;
             if (--argc < 1)
                 goto bad;
             verify_depth = atoi(*(++argv));
-            BIO_printf(bio_err, "verify depth is %d\n", verify_depth);
+            if (!s_quiet)
+                BIO_printf(bio_err, "verify depth is %d\n", verify_depth);
         } else if (strcmp(*argv, "-Verify") == 0) {
             s_server_verify =
                 SSL_VERIFY_PEER | SSL_VERIFY_FAIL_IF_NO_PEER_CERT |
@@ -1093,9 +1192,10 @@ int MAIN(int argc, char *argv[])
             if (--argc < 1)
                 goto bad;
             verify_depth = atoi(*(++argv));
-            BIO_printf(bio_err,
-                       "verify depth is %d, must return a certificate\n",
-                       verify_depth);
+            if (!s_quiet)
+                BIO_printf(bio_err,
+                           "verify depth is %d, must return a certificate\n",
+                           verify_depth);
         } else if (strcmp(*argv, "-context") == 0) {
             if (--argc < 1)
                 goto bad;
@@ -1104,7 +1204,20 @@ int MAIN(int argc, char *argv[])
             if (--argc < 1)
                 goto bad;
             s_cert_file = *(++argv);
-        } else if (strcmp(*argv, "-certform") == 0) {
+        } else if (strcmp(*argv, "-CRL") == 0) {
+            if (--argc < 1)
+                goto bad;
+            crl_file = *(++argv);
+        } else if (strcmp(*argv, "-crl_download") == 0)
+            crl_download = 1;
+#ifndef OPENSSL_NO_TLSEXT
+        else if (strcmp(*argv, "-serverinfo") == 0) {
+            if (--argc < 1)
+                goto bad;
+            s_serverinfo_file = *(++argv);
+        }
+#endif
+        else if (strcmp(*argv, "-certform") == 0) {
             if (--argc < 1)
                 goto bad;
             s_cert_format = str2fmt(*(++argv));
@@ -1120,19 +1233,15 @@ int MAIN(int argc, char *argv[])
             if (--argc < 1)
                 goto bad;
             passarg = *(++argv);
-        } else if (strcmp(*argv, "-dhparam") == 0) {
+        } else if (strcmp(*argv, "-cert_chain") == 0) {
             if (--argc < 1)
                 goto bad;
-            dhfile = *(++argv);
-        }
-#ifndef OPENSSL_NO_ECDH
-        else if (strcmp(*argv, "-named_curve") == 0) {
+            s_chain_file = *(++argv);
+        } else if (strcmp(*argv, "-dhparam") == 0) {
             if (--argc < 1)
                 goto bad;
-            named_curve = *(++argv);
-        }
-#endif
-        else if (strcmp(*argv, "-dcertform") == 0) {
+            dhfile = *(++argv);
+        } else if (strcmp(*argv, "-dcertform") == 0) {
             if (--argc < 1)
                 goto bad;
             s_dcert_format = str2fmt(*(++argv));
@@ -1152,32 +1261,62 @@ int MAIN(int argc, char *argv[])
             if (--argc < 1)
                 goto bad;
             s_dkey_file = *(++argv);
+        } else if (strcmp(*argv, "-dcert_chain") == 0) {
+            if (--argc < 1)
+                goto bad;
+            s_dchain_file = *(++argv);
         } else if (strcmp(*argv, "-nocert") == 0) {
             nocert = 1;
         } else if (strcmp(*argv, "-CApath") == 0) {
             if (--argc < 1)
                 goto bad;
             CApath = *(++argv);
+        } else if (strcmp(*argv, "-chainCApath") == 0) {
+            if (--argc < 1)
+                goto bad;
+            chCApath = *(++argv);
+        } else if (strcmp(*argv, "-verifyCApath") == 0) {
+            if (--argc < 1)
+                goto bad;
+            vfyCApath = *(++argv);
         } else if (strcmp(*argv, "-no_cache") == 0)
             no_cache = 1;
-        else if (args_verify(&argv, &argc, &badarg, bio_err, &vpm)) {
+        else if (strcmp(*argv, "-ext_cache") == 0)
+            ext_cache = 1;
+        else if (strcmp(*argv, "-CRLform") == 0) {
+            if (--argc < 1)
+                goto bad;
+            crl_format = str2fmt(*(++argv));
+        } else if (args_verify(&argv, &argc, &badarg, bio_err, &vpm)) {
+            if (badarg)
+                goto bad;
+            continue;
+        } else if (args_excert(&argv, &argc, &badarg, bio_err, &exc)) {
+            if (badarg)
+                goto bad;
+            continue;
+        } else if (args_ssl(&argv, &argc, cctx, &badarg, bio_err, &ssl_args)) {
             if (badarg)
                 goto bad;
             continue;
         } else if (strcmp(*argv, "-verify_return_error") == 0)
             verify_return_error = 1;
-        else if (strcmp(*argv, "-serverpref") == 0) {
-            off |= SSL_OP_CIPHER_SERVER_PREFERENCE;
-        } else if (strcmp(*argv, "-legacy_renegotiation") == 0)
-            off |= SSL_OP_ALLOW_UNSAFE_LEGACY_RENEGOTIATION;
-        else if (strcmp(*argv, "-cipher") == 0) {
+        else if (strcmp(*argv, "-verify_quiet") == 0)
+            verify_quiet = 1;
+        else if (strcmp(*argv, "-build_chain") == 0)
+            build_chain = 1;
+        else if (strcmp(*argv, "-CAfile") == 0) {
             if (--argc < 1)
                 goto bad;
-            cipher = *(++argv);
-        } else if (strcmp(*argv, "-CAfile") == 0) {
+            CAfile = *(++argv);
+        } else if (strcmp(*argv, "-chainCAfile") == 0) {
             if (--argc < 1)
                 goto bad;
-            CAfile = *(++argv);
+            chCAfile = *(++argv);
+        } else if (strcmp(*argv, "-verifyCAfile") == 0) {
+            if (--argc < 1)
+                goto bad;
+            vfyCAfile = *(++argv);
         }
 #ifdef FIONBIO
         else if (strcmp(*argv, "-nbio") == 0) {
@@ -1189,7 +1328,11 @@ int MAIN(int argc, char *argv[])
             s_nbio = 1;
 #endif
             s_nbio_test = 1;
-        } else if (strcmp(*argv, "-debug") == 0) {
+        } else if (strcmp(*argv, "-ign_eof") == 0)
+            s_ign_eof = 1;
+        else if (strcmp(*argv, "-no_ign_eof") == 0)
+            s_ign_eof = 0;
+        else if (strcmp(*argv, "-debug") == 0) {
             s_debug = 1;
         }
 #ifndef OPENSSL_NO_TLSEXT
@@ -1220,7 +1363,17 @@ int MAIN(int argc, char *argv[])
 #endif
         else if (strcmp(*argv, "-msg") == 0) {
             s_msg = 1;
-        } else if (strcmp(*argv, "-hack") == 0) {
+        } else if (strcmp(*argv, "-msgfile") == 0) {
+            if (--argc < 1)
+                goto bad;
+            bio_s_msg = BIO_new_file(*(++argv), "w");
+        }
+#ifndef OPENSSL_NO_SSL_TRACE
+        else if (strcmp(*argv, "-trace") == 0) {
+            s_msg = 2;
+        }
+#endif
+        else if (strcmp(*argv, "-hack") == 0) {
             hack = 1;
         } else if (strcmp(*argv, "-state") == 0) {
             state = 1;
@@ -1228,18 +1381,19 @@ int MAIN(int argc, char *argv[])
             s_crlf = 1;
         } else if (strcmp(*argv, "-quiet") == 0) {
             s_quiet = 1;
-        } else if (strcmp(*argv, "-bugs") == 0) {
-            bugs = 1;
+        } else if (strcmp(*argv, "-brief") == 0) {
+            s_quiet = 1;
+            s_brief = 1;
+            verify_quiet = 1;
         } else if (strcmp(*argv, "-no_tmp_rsa") == 0) {
             no_tmp_rsa = 1;
         } else if (strcmp(*argv, "-no_dhe") == 0) {
             no_dhe = 1;
-        }
-#ifndef OPENSSL_NO_ECDH
-        else if (strcmp(*argv, "-no_ecdhe") == 0) {
+        } else if (strcmp(*argv, "-no_ecdhe") == 0) {
             no_ecdhe = 1;
+        } else if (strcmp(*argv, "-no_resume_ephemeral") == 0) {
+            no_resume_ephemeral = 1;
         }
-#endif
 #ifndef OPENSSL_NO_PSK
         else if (strcmp(*argv, "-psk_hint") == 0) {
             if (--argc < 1)
@@ -1272,32 +1426,18 @@ int MAIN(int argc, char *argv[])
             meth = TLSv1_server_method();
         }
 #endif
-        else if (strcmp(*argv, "-www") == 0) {
+        else if (strcmp(*argv, "-rev") == 0) {
+            rev = 1;
+        } else if (strcmp(*argv, "-www") == 0) {
             www = 1;
         } else if (strcmp(*argv, "-WWW") == 0) {
             www = 2;
         } else if (strcmp(*argv, "-HTTP") == 0) {
             www = 3;
-        } else if (strcmp(*argv, "-no_ssl2") == 0) {
-            off |= SSL_OP_NO_SSLv2;
-        } else if (strcmp(*argv, "-no_ssl3") == 0) {
-            off |= SSL_OP_NO_SSLv3;
-        } else if (strcmp(*argv, "-no_tls1") == 0) {
-            off |= SSL_OP_NO_TLSv1;
-        } else if (strcmp(*argv, "-no_tls1_1") == 0) {
-            off |= SSL_OP_NO_TLSv1_1;
-        } else if (strcmp(*argv, "-no_tls1_2") == 0) {
-            off |= SSL_OP_NO_TLSv1_2;
-        } else if (strcmp(*argv, "-no_comp") == 0) {
-            off |= SSL_OP_NO_COMPRESSION;
         }
-#ifndef OPENSSL_NO_TLSEXT
-        else if (strcmp(*argv, "-no_ticket") == 0) {
-            off |= SSL_OP_NO_TICKET;
-        }
-#endif
 #ifndef OPENSSL_NO_SSL2
         else if (strcmp(*argv, "-ssl2") == 0) {
+            no_ecdhe = 1;
             meth = SSLv2_server_method();
         }
 #endif
@@ -1316,9 +1456,15 @@ int MAIN(int argc, char *argv[])
         }
 #endif
 #ifndef OPENSSL_NO_DTLS1
-        else if (strcmp(*argv, "-dtls1") == 0) {
+        else if (strcmp(*argv, "-dtls") == 0) {
+            meth = DTLS_server_method();
+            socket_type = SOCK_DGRAM;
+        } else if (strcmp(*argv, "-dtls1") == 0) {
             meth = DTLSv1_server_method();
             socket_type = SOCK_DGRAM;
+        } else if (strcmp(*argv, "-dtls1_2") == 0) {
+            meth = DTLSv1_2_server_method();
+            socket_type = SOCK_DGRAM;
         } else if (strcmp(*argv, "-timeout") == 0)
             enable_timeouts = 1;
         else if (strcmp(*argv, "-mtu") == 0) {
@@ -1368,6 +1514,11 @@ int MAIN(int argc, char *argv[])
             next_proto_neg_in = *(++argv);
         }
 # endif
+        else if (strcmp(*argv, "-alpn") == 0) {
+            if (--argc < 1)
+                goto bad;
+            alpn_in = *(++argv);
+        }
 #endif
 #if !defined(OPENSSL_NO_JPAKE) && !defined(OPENSSL_NO_PSK)
         else if (strcmp(*argv, "-jpake") == 0) {
@@ -1420,11 +1571,6 @@ int MAIN(int argc, char *argv[])
             goto end;
         }
         psk_identity = "JPAKE";
-        if (cipher) {
-            BIO_printf(bio_err, "JPAKE sets cipher to PSK\n");
-            goto end;
-        }
-        cipher = "PSK";
     }
 #endif
 
@@ -1447,6 +1593,9 @@ int MAIN(int argc, char *argv[])
         s_key_file2 = s_cert_file2;
 #endif
 
+    if (!load_excert(&exc, bio_err))
+        goto end;
+
     if (nocert == 0) {
         s_key = load_key(bio_err, s_key_file, s_key_format, 0, pass, e,
                          "server certificate private key file");
@@ -1462,6 +1611,12 @@ int MAIN(int argc, char *argv[])
             ERR_print_errors(bio_err);
             goto end;
         }
+        if (s_chain_file) {
+            s_chain = load_certs(bio_err, s_chain_file, FORMAT_PEM,
+                                 NULL, e, "server certificate chain");
+            if (!s_chain)
+                goto end;
+        }
 #ifndef OPENSSL_NO_TLSEXT
         if (tlsextcbp.servername) {
             s_key2 = load_key(bio_err, s_key_file2, s_key_format, 0, pass, e,
@@ -1479,9 +1634,10 @@ int MAIN(int argc, char *argv[])
                 goto end;
             }
         }
-#endif
+#endif                          /* OPENSSL_NO_TLSEXT */
     }
-#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG)
+#if !defined(OPENSSL_NO_TLSEXT)
+# if !defined(OPENSSL_NO_NEXTPROTONEG)
     if (next_proto_neg_in) {
         unsigned short len;
         next_proto.data = next_protos_parse(&len, next_proto_neg_in);
@@ -1491,8 +1647,34 @@ int MAIN(int argc, char *argv[])
     } else {
         next_proto.data = NULL;
     }
+# endif
+    alpn_ctx.data = NULL;
+    if (alpn_in) {
+        unsigned short len;
+        alpn_ctx.data = next_protos_parse(&len, alpn_in);
+        if (alpn_ctx.data == NULL)
+            goto end;
+        alpn_ctx.len = len;
+    }
 #endif
 
+    if (crl_file) {
+        X509_CRL *crl;
+        crl = load_crl(crl_file, crl_format);
+        if (!crl) {
+            BIO_puts(bio_err, "Error loading CRL\n");
+            ERR_print_errors(bio_err);
+            goto end;
+        }
+        crls = sk_X509_CRL_new_null();
+        if (!crls || !sk_X509_CRL_push(crls, crl)) {
+            BIO_puts(bio_err, "Error adding CRL\n");
+            ERR_print_errors(bio_err);
+            X509_CRL_free(crl);
+            goto end;
+        }
+    }
+
     if (s_dcert_file) {
 
         if (s_dkey_file == NULL)
@@ -1512,6 +1694,12 @@ int MAIN(int argc, char *argv[])
             ERR_print_errors(bio_err);
             goto end;
         }
+        if (s_dchain_file) {
+            s_dchain = load_certs(bio_err, s_dchain_file, FORMAT_PEM,
+                                  NULL, e, "second server certificate chain");
+            if (!s_dchain)
+                goto end;
+        }
 
     }
 
@@ -1525,8 +1713,10 @@ int MAIN(int argc, char *argv[])
                    app_RAND_load_files(inrand));
 
     if (bio_s_out == NULL) {
-        if (s_quiet && !s_debug && !s_msg) {
+        if (s_quiet && !s_debug) {
             bio_s_out = BIO_new(BIO_s_null());
+            if (s_msg && !bio_s_msg)
+                bio_s_msg = BIO_new_fp(stdout, BIO_NOCLOSE);
         } else {
             if (bio_s_out == NULL)
                 bio_s_out = BIO_new_fp(stdout, BIO_NOCLOSE);
@@ -1566,16 +1756,17 @@ int MAIN(int argc, char *argv[])
         BIO_printf(bio_err, "id_prefix '%s' set.\n", session_id_prefix);
     }
     SSL_CTX_set_quiet_shutdown(ctx, 1);
-    if (bugs)
-        SSL_CTX_set_options(ctx, SSL_OP_ALL);
     if (hack)
         SSL_CTX_set_options(ctx, SSL_OP_NETSCAPE_DEMO_CIPHER_CHANGE_BUG);
-    SSL_CTX_set_options(ctx, off);
+    if (exc)
+        ssl_ctx_set_excert(ctx, exc);
 
     if (state)
         SSL_CTX_set_info_callback(ctx, apps_ssl_info_callback);
     if (no_cache)
         SSL_CTX_set_session_cache_mode(ctx, SSL_SESS_CACHE_OFF);
+    else if (ext_cache)
+        init_session_cache_ctx(ctx);
     else
         SSL_CTX_sess_set_cache_size(ctx, 128);
 
@@ -1606,6 +1797,17 @@ int MAIN(int argc, char *argv[])
     if (vpm)
         SSL_CTX_set1_param(ctx, vpm);
 
+    ssl_ctx_add_crls(ctx, crls, 0);
+
+    if (!args_ssl_call(ctx, bio_err, cctx, ssl_args, no_ecdhe, no_jpake))
+        goto end;
+
+    if (!ssl_load_stores(ctx, vfyCApath, vfyCAfile, chCApath, chCAfile,
+                         crls, crl_download)) {
+        BIO_printf(bio_err, "Error loading store locations\n");
+        ERR_print_errors(bio_err);
+        goto end;
+    }
 #ifndef OPENSSL_NO_TLSEXT
     if (s_cert2) {
         ctx2 = SSL_CTX_new(meth);
@@ -1633,17 +1835,18 @@ int MAIN(int argc, char *argv[])
             BIO_printf(bio_err, "id_prefix '%s' set.\n", session_id_prefix);
         }
         SSL_CTX_set_quiet_shutdown(ctx2, 1);
-        if (bugs)
-            SSL_CTX_set_options(ctx2, SSL_OP_ALL);
         if (hack)
             SSL_CTX_set_options(ctx2, SSL_OP_NETSCAPE_DEMO_CIPHER_CHANGE_BUG);
-        SSL_CTX_set_options(ctx2, off);
+        if (exc)
+            ssl_ctx_set_excert(ctx2, exc);
 
         if (state)
             SSL_CTX_set_info_callback(ctx2, apps_ssl_info_callback);
 
         if (no_cache)
             SSL_CTX_set_session_cache_mode(ctx2, SSL_SESS_CACHE_OFF);
+        else if (ext_cache)
+            init_session_cache_ctx(ctx2);
         else
             SSL_CTX_sess_set_cache_size(ctx2, 128);
 
@@ -1653,12 +1856,20 @@ int MAIN(int argc, char *argv[])
         }
         if (vpm)
             SSL_CTX_set1_param(ctx2, vpm);
+
+        ssl_ctx_add_crls(ctx2, crls, 0);
+
+        if (!args_ssl_call(ctx2, bio_err, cctx, ssl_args, no_ecdhe, no_jpake))
+            goto end;
+
     }
 # ifndef OPENSSL_NO_NEXTPROTONEG
     if (next_proto.data)
         SSL_CTX_set_next_protos_advertised_cb(ctx, next_proto_cb,
                                               &next_proto);
 # endif
+    if (alpn_ctx.data)
+        SSL_CTX_set_alpn_select_cb(ctx, alpn_cb, &alpn_ctx);
 #endif
 
 #ifndef OPENSSL_NO_DH
@@ -1702,54 +1913,21 @@ int MAIN(int argc, char *argv[])
     }
 #endif
 
-#ifndef OPENSSL_NO_ECDH
-    if (!no_ecdhe) {
-        EC_KEY *ecdh = NULL;
-
-        if (named_curve) {
-            int nid = OBJ_sn2nid(named_curve);
-
-            if (nid == 0) {
-                BIO_printf(bio_err, "unknown curve name (%s)\n", named_curve);
-                goto end;
-            }
-            ecdh = EC_KEY_new_by_curve_name(nid);
-            if (ecdh == NULL) {
-                BIO_printf(bio_err, "unable to create curve (%s)\n",
-                           named_curve);
-                goto end;
-            }
-        }
-
-        if (ecdh != NULL) {
-            BIO_printf(bio_s_out, "Setting temp ECDH parameters\n");
-        } else {
-            BIO_printf(bio_s_out, "Using default temp ECDH parameters\n");
-            ecdh = EC_KEY_new_by_curve_name(NID_X9_62_prime256v1);
-            if (ecdh == NULL) {
-                BIO_printf(bio_err, "unable to create curve (nistp256)\n");
-                goto end;
-            }
-        }
-        (void)BIO_flush(bio_s_out);
-
-        SSL_CTX_set_tmp_ecdh(ctx, ecdh);
-# ifndef OPENSSL_NO_TLSEXT
-        if (ctx2)
-            SSL_CTX_set_tmp_ecdh(ctx2, ecdh);
-# endif
-        EC_KEY_free(ecdh);
+    if (!set_cert_key_stuff(ctx, s_cert, s_key, s_chain, build_chain))
+        goto end;
+#ifndef OPENSSL_NO_TLSEXT
+    if (s_serverinfo_file != NULL
+        && !SSL_CTX_use_serverinfo_file(ctx, s_serverinfo_file)) {
+        ERR_print_errors(bio_err);
+        goto end;
     }
 #endif
-
-    if (!set_cert_key_stuff(ctx, s_cert, s_key))
-        goto end;
 #ifndef OPENSSL_NO_TLSEXT
-    if (ctx2 && !set_cert_key_stuff(ctx2, s_cert2, s_key2))
+    if (ctx2 && !set_cert_key_stuff(ctx2, s_cert2, s_key2, NULL, build_chain))
         goto end;
 #endif
     if (s_dcert != NULL) {
-        if (!set_cert_key_stuff(ctx, s_dcert, s_dkey))
+        if (!set_cert_key_stuff(ctx, s_dcert, s_dkey, s_dchain, build_chain))
             goto end;
     }
 #ifndef OPENSSL_NO_RSA
@@ -1808,20 +1986,6 @@ int MAIN(int argc, char *argv[])
     }
 #endif
 
-    if (cipher != NULL) {
-        if (!SSL_CTX_set_cipher_list(ctx, cipher)) {
-            BIO_printf(bio_err, "error setting cipher list\n");
-            ERR_print_errors(bio_err);
-            goto end;
-        }
-#ifndef OPENSSL_NO_TLSEXT
-        if (ctx2 && !SSL_CTX_set_cipher_list(ctx2, cipher)) {
-            BIO_printf(bio_err, "error setting cipher list\n");
-            ERR_print_errors(bio_err);
-            goto end;
-        }
-#endif
-    }
     SSL_CTX_set_verify(ctx, s_server_verify, verify_callback);
     SSL_CTX_set_session_id_context(ctx, (void *)&s_server_session_id_context,
                                    sizeof s_server_session_id_context);
@@ -1873,10 +2037,15 @@ int MAIN(int argc, char *argv[])
 
     BIO_printf(bio_s_out, "ACCEPT\n");
     (void)BIO_flush(bio_s_out);
-    if (www)
-        do_server(port, socket_type, &accept_socket, www_body, context);
+    if (rev)
+        do_server(port, socket_type, &accept_socket, rev_body, context,
+                  naccept);
+    else if (www)
+        do_server(port, socket_type, &accept_socket, www_body, context,
+                  naccept);
     else
-        do_server(port, socket_type, &accept_socket, sv_body, context);
+        do_server(port, socket_type, &accept_socket, sv_body, context,
+                  naccept);
     print_stats(bio_s_out, ctx);
     ret = 0;
  end:
@@ -1884,18 +2053,25 @@ int MAIN(int argc, char *argv[])
         SSL_CTX_free(ctx);
     if (s_cert)
         X509_free(s_cert);
+    if (crls)
+        sk_X509_CRL_pop_free(crls, X509_CRL_free);
     if (s_dcert)
         X509_free(s_dcert);
     if (s_key)
         EVP_PKEY_free(s_key);
     if (s_dkey)
         EVP_PKEY_free(s_dkey);
+    if (s_chain)
+        sk_X509_pop_free(s_chain, X509_free);
+    if (s_dchain)
+        sk_X509_pop_free(s_dchain, X509_free);
     if (pass)
         OPENSSL_free(pass);
     if (dpass)
         OPENSSL_free(dpass);
     if (vpm)
         X509_VERIFY_PARAM_free(vpm);
+    free_sessions();
 #ifndef OPENSSL_NO_TLSEXT
     if (tlscstatp.host)
         OPENSSL_free(tlscstatp.host);
@@ -1909,11 +2085,32 @@ int MAIN(int argc, char *argv[])
         X509_free(s_cert2);
     if (s_key2)
         EVP_PKEY_free(s_key2);
+    if (serverinfo_in != NULL)
+        BIO_free(serverinfo_in);
+# ifndef OPENSSL_NO_NEXTPROTONEG
+    if (next_proto.data)
+        OPENSSL_free(next_proto.data);
+# endif
+    if (alpn_ctx.data)
+        OPENSSL_free(alpn_ctx.data);
+#endif
+    ssl_excert_free(exc);
+    if (ssl_args)
+        sk_OPENSSL_STRING_free(ssl_args);
+    if (cctx)
+        SSL_CONF_CTX_free(cctx);
+#ifndef OPENSSL_NO_JPAKE
+    if (jpake_secret && psk_key)
+        OPENSSL_free(psk_key);
 #endif
     if (bio_s_out != NULL) {
         BIO_free(bio_s_out);
         bio_s_out = NULL;
     }
+    if (bio_s_msg != NULL) {
+        BIO_free(bio_s_msg);
+        bio_s_msg = NULL;
+    }
     apps_shutdown();
     OPENSSL_EXIT(ret);
 }
@@ -1946,7 +2143,7 @@ static void print_stats(BIO *bio, SSL_CTX *ssl_ctx)
                SSL_CTX_sess_get_cache_size(ssl_ctx));
 }
 
-static int sv_body(char *hostname, int s, unsigned char *context)
+static int sv_body(char *hostname, int s, int stype, unsigned char *context)
 {
     char *buf = NULL;
     fd_set readfds;
@@ -2010,7 +2207,7 @@ static int sv_body(char *hostname, int s, unsigned char *context)
 # endif
 #endif
 
-    if (SSL_version(con) == DTLS1_VERSION) {
+    if (stype == SOCK_DGRAM) {
 
         sbio = BIO_new_dgram(s, BIO_NOCLOSE);
 
@@ -2069,8 +2266,13 @@ static int sv_body(char *hostname, int s, unsigned char *context)
         BIO_set_callback_arg(SSL_get_rbio(con), (char *)bio_s_out);
     }
     if (s_msg) {
-        SSL_set_msg_callback(con, msg_cb);
-        SSL_set_msg_callback_arg(con, bio_s_out);
+#ifndef OPENSSL_NO_SSL_TRACE
+        if (s_msg == 2)
+            SSL_set_msg_callback(con, SSL_trace);
+        else
+#endif
+            SSL_set_msg_callback(con, msg_cb);
+        SSL_set_msg_callback_arg(con, bio_s_msg ? bio_s_msg : bio_s_out);
     }
 #ifndef OPENSSL_NO_TLSEXT
     if (s_tlsextdebug) {
@@ -2168,7 +2370,7 @@ static int sv_body(char *hostname, int s, unsigned char *context)
                 assert(lf_num == 0);
             } else
                 i = raw_read_stdin(buf, bufsize);
-            if (!s_quiet) {
+            if (!s_quiet && !s_brief) {
                 if ((i <= 0) || (buf[0] == 'Q')) {
                     BIO_printf(bio_s_out, "DONE\n");
                     SHUTDOWN(s);
@@ -2383,6 +2585,16 @@ static int init_ssl_connection(SSL *con)
     unsigned char *exportedkeymat;
 
     i = SSL_accept(con);
+#ifdef CERT_CB_TEST_RETRY
+    {
+        while (i <= 0 && SSL_get_error(con, i) == SSL_ERROR_WANT_X509_LOOKUP
+               && SSL_state(con) == SSL3_ST_SR_CLNT_HELLO_C) {
+            fprintf(stderr,
+                    "LOOKUP from certificate callback during accept\n");
+            i = SSL_accept(con);
+        }
+    }
+#endif
 #ifndef OPENSSL_NO_SRP
     while (i <= 0 && SSL_get_error(con, i) == SSL_ERROR_WANT_X509_LOOKUP) {
         BIO_printf(bio_s_out, "LOOKUP during accept %s\n",
@@ -2398,6 +2610,7 @@ static int init_ssl_connection(SSL *con)
         i = SSL_accept(con);
     }
 #endif
+
     if (i <= 0) {
         if (BIO_sock_should_retry(i)) {
             BIO_printf(bio_s_out, "DELAY\n");
@@ -2409,11 +2622,15 @@ static int init_ssl_connection(SSL *con)
         if (verify_error != X509_V_OK) {
             BIO_printf(bio_err, "verify error:%s\n",
                        X509_verify_cert_error_string(verify_error));
-        } else
-            ERR_print_errors(bio_err);
+        }
+        /* Always print any error messages */
+        ERR_print_errors(bio_err);
         return (0);
     }
 
+    if (s_brief)
+        print_ssl_summary(bio_err, con);
+
     PEM_write_bio_SSL_SESSION(bio_s_out, SSL_get_session(con));
 
     peer = SSL_get_peer_certificate(con);
@@ -2430,6 +2647,11 @@ static int init_ssl_connection(SSL *con)
     if (SSL_get_shared_ciphers(con, buf, sizeof buf) != NULL)
         BIO_printf(bio_s_out, "Shared ciphers:%s\n", buf);
     str = SSL_CIPHER_get_name(SSL_get_current_cipher(con));
+    ssl_print_sigalgs(bio_s_out, con);
+#ifndef OPENSSL_NO_EC
+    ssl_print_point_formats(bio_s_out, con);
+    ssl_print_curves(bio_s_out, con, 0);
+#endif
     BIO_printf(bio_s_out, "CIPHER is %s\n", (str != NULL) ? str : "(NONE)");
 
 #if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG)
@@ -2529,7 +2751,7 @@ static int load_CA(SSL_CTX *ctx, char *file)
 }
 #endif
 
-static int www_body(char *hostname, int s, unsigned char *context)
+static int www_body(char *hostname, int s, int stype, unsigned char *context)
 {
     char *buf = NULL;
     int ret = 1;
@@ -2604,8 +2826,13 @@ static int www_body(char *hostname, int s, unsigned char *context)
         BIO_set_callback_arg(SSL_get_rbio(con), (char *)bio_s_out);
     }
     if (s_msg) {
-        SSL_set_msg_callback(con, msg_cb);
-        SSL_set_msg_callback_arg(con, bio_s_out);
+#ifndef OPENSSL_NO_SSL_TRACE
+        if (s_msg == 2)
+            SSL_set_msg_callback(con, SSL_trace);
+        else
+#endif
+            SSL_set_msg_callback(con, msg_cb);
+        SSL_set_msg_callback_arg(con, bio_s_msg ? bio_s_msg : bio_s_out);
     }
 
     for (;;) {
@@ -2724,6 +2951,10 @@ static int www_body(char *hostname, int s, unsigned char *context)
                 }
                 BIO_puts(io, "\n");
             }
+            ssl_print_sigalgs(io, con);
+#ifndef OPENSSL_NO_EC
+            ssl_print_curves(io, con, 0);
+#endif
             BIO_printf(io, (SSL_cache_hit(con)
                             ? "---\nReused, " : "---\nNew, "));
             c = SSL_get_current_cipher(con);
@@ -2906,6 +3137,140 @@ static int www_body(char *hostname, int s, unsigned char *context)
     return (ret);
 }
 
+static int rev_body(char *hostname, int s, int stype, unsigned char *context)
+{
+    char *buf = NULL;
+    int i;
+    int ret = 1;
+    SSL *con;
+    BIO *io, *ssl_bio, *sbio;
+#ifndef OPENSSL_NO_KRB5
+    KSSL_CTX *kctx;
+#endif
+
+    buf = OPENSSL_malloc(bufsize);
+    if (buf == NULL)
+        return (0);
+    io = BIO_new(BIO_f_buffer());
+    ssl_bio = BIO_new(BIO_f_ssl());
+    if ((io == NULL) || (ssl_bio == NULL))
+        goto err;
+
+    /* lets make the output buffer a reasonable size */
+    if (!BIO_set_write_buffer_size(io, bufsize))
+        goto err;
+
+    if ((con = SSL_new(ctx)) == NULL)
+        goto err;
+#ifndef OPENSSL_NO_TLSEXT
+    if (s_tlsextdebug) {
+        SSL_set_tlsext_debug_callback(con, tlsext_cb);
+        SSL_set_tlsext_debug_arg(con, bio_s_out);
+    }
+#endif
+#ifndef OPENSSL_NO_KRB5
+    if ((kctx = kssl_ctx_new()) != NULL) {
+        kssl_ctx_setstring(kctx, KSSL_SERVICE, KRB5SVC);
+        kssl_ctx_setstring(kctx, KSSL_KEYTAB, KRB5KEYTAB);
+    }
+#endif                          /* OPENSSL_NO_KRB5 */
+    if (context)
+        SSL_set_session_id_context(con, context, strlen((char *)context));
+
+    sbio = BIO_new_socket(s, BIO_NOCLOSE);
+    SSL_set_bio(con, sbio, sbio);
+    SSL_set_accept_state(con);
+
+    BIO_set_ssl(ssl_bio, con, BIO_CLOSE);
+    BIO_push(io, ssl_bio);
+#ifdef CHARSET_EBCDIC
+    io = BIO_push(BIO_new(BIO_f_ebcdic_filter()), io);
+#endif
+
+    if (s_debug) {
+        SSL_set_debug(con, 1);
+        BIO_set_callback(SSL_get_rbio(con), bio_dump_callback);
+        BIO_set_callback_arg(SSL_get_rbio(con), (char *)bio_s_out);
+    }
+    if (s_msg) {
+#ifndef OPENSSL_NO_SSL_TRACE
+        if (s_msg == 2)
+            SSL_set_msg_callback(con, SSL_trace);
+        else
+#endif
+            SSL_set_msg_callback(con, msg_cb);
+        SSL_set_msg_callback_arg(con, bio_s_msg ? bio_s_msg : bio_s_out);
+    }
+
+    for (;;) {
+        i = BIO_do_handshake(io);
+        if (i > 0)
+            break;
+        if (!BIO_should_retry(io)) {
+            BIO_puts(bio_err, "CONNECTION FAILURE\n");
+            ERR_print_errors(bio_err);
+            goto end;
+        }
+    }
+    BIO_printf(bio_err, "CONNECTION ESTABLISHED\n");
+    print_ssl_summary(bio_err, con);
+
+    for (;;) {
+        i = BIO_gets(io, buf, bufsize - 1);
+        if (i < 0) {            /* error */
+            if (!BIO_should_retry(io)) {
+                if (!s_quiet)
+                    ERR_print_errors(bio_err);
+                goto err;
+            } else {
+                BIO_printf(bio_s_out, "read R BLOCK\n");
+#if defined(OPENSSL_SYS_NETWARE)
+                delay(1000);
+#elif !defined(OPENSSL_SYS_MSDOS) && !defined(__DJGPP__)
+                sleep(1);
+#endif
+                continue;
+            }
+        } else if (i == 0) {    /* end of input */
+            ret = 1;
+            BIO_printf(bio_err, "CONNECTION CLOSED\n");
+            goto end;
+        } else {
+            char *p = buf + i - 1;
+            while (i && (*p == '\n' || *p == '\r')) {
+                p--;
+                i--;
+            }
+            if (!s_ign_eof && i == 5 && !strncmp(buf, "CLOSE", 5)) {
+                ret = 1;
+                BIO_printf(bio_err, "CONNECTION CLOSED\n");
+                goto end;
+            }
+            BUF_reverse((unsigned char *)buf, NULL, i);
+            buf[i] = '\n';
+            BIO_write(io, buf, i + 1);
+            for (;;) {
+                i = BIO_flush(io);
+                if (i > 0)
+                    break;
+                if (!BIO_should_retry(io))
+                    goto end;
+            }
+        }
+    }
+ end:
+    /* make sure we re-use sessions */
+    SSL_set_shutdown(con, SSL_SENT_SHUTDOWN | SSL_RECEIVED_SHUTDOWN);
+
+ err:
+
+    if (buf != NULL)
+        OPENSSL_free(buf);
+    if (io != NULL)
+        BIO_free_all(io);
+    return (ret);
+}
+
 #ifndef OPENSSL_NO_RSA
 static RSA MS_CALLBACK *tmp_rsa_cb(SSL *s, int is_export, int keylength)
 {
@@ -2961,3 +3326,116 @@ static int generate_session_id(const SSL *ssl, unsigned char *id,
         return 0;
     return 1;
 }
+
+/*
+ * By default s_server uses an in-memory cache which caches SSL_SESSION
+ * structures without any serialisation. This hides some bugs which only
+ * become apparent in deployed servers. By implementing a basic external
+ * session cache some issues can be debugged using s_server.
+ */
+
+typedef struct simple_ssl_session_st {
+    unsigned char *id;
+    unsigned int idlen;
+    unsigned char *der;
+    int derlen;
+    struct simple_ssl_session_st *next;
+} simple_ssl_session;
+
+static simple_ssl_session *first = NULL;
+
+static int add_session(SSL *ssl, SSL_SESSION *session)
+{
+    simple_ssl_session *sess;
+    unsigned char *p;
+
+    sess = OPENSSL_malloc(sizeof(simple_ssl_session));
+    if (!sess) {
+        BIO_printf(bio_err, "Out of memory adding session to external cache\n");
+        return 0;
+    }
+
+    SSL_SESSION_get_id(session, &sess->idlen);
+    sess->derlen = i2d_SSL_SESSION(session, NULL);
+
+    sess->id = BUF_memdup(SSL_SESSION_get_id(session, NULL), sess->idlen);
+
+    sess->der = OPENSSL_malloc(sess->derlen);
+    if (!sess->id || !sess->der) {
+        BIO_printf(bio_err, "Out of memory adding session to external cache\n");
+
+        if (sess->id)
+            OPENSSL_free(sess->id);
+        if (sess->der)
+            OPENSSL_free(sess->der);
+        OPENSSL_free(sess);
+        return 0;
+    }
+    p = sess->der;
+    i2d_SSL_SESSION(session, &p);
+
+    sess->next = first;
+    first = sess;
+    BIO_printf(bio_err, "New session added to external cache\n");
+    return 0;
+}
+
+static SSL_SESSION *get_session(SSL *ssl, unsigned char *id, int idlen,
+                                int *do_copy)
+{
+    simple_ssl_session *sess;
+    *do_copy = 0;
+    for (sess = first; sess; sess = sess->next) {
+        if (idlen == (int)sess->idlen && !memcmp(sess->id, id, idlen)) {
+            const unsigned char *p = sess->der;
+            BIO_printf(bio_err, "Lookup session: cache hit\n");
+            return d2i_SSL_SESSION(NULL, &p, sess->derlen);
+        }
+    }
+    BIO_printf(bio_err, "Lookup session: cache miss\n");
+    return NULL;
+}
+
+static void del_session(SSL_CTX *sctx, SSL_SESSION *session)
+{
+    simple_ssl_session *sess, *prev = NULL;
+    const unsigned char *id;
+    unsigned int idlen;
+    id = SSL_SESSION_get_id(session, &idlen);
+    for (sess = first; sess; sess = sess->next) {
+        if (idlen == sess->idlen && !memcmp(sess->id, id, idlen)) {
+            if (prev)
+                prev->next = sess->next;
+            else
+                first = sess->next;
+            OPENSSL_free(sess->id);
+            OPENSSL_free(sess->der);
+            OPENSSL_free(sess);
+            return;
+        }
+        prev = sess;
+    }
+}
+
+static void init_session_cache_ctx(SSL_CTX *sctx)
+{
+    SSL_CTX_set_session_cache_mode(sctx,
+                                   SSL_SESS_CACHE_NO_INTERNAL |
+                                   SSL_SESS_CACHE_SERVER);
+    SSL_CTX_sess_set_new_cb(sctx, add_session);
+    SSL_CTX_sess_set_get_cb(sctx, get_session);
+    SSL_CTX_sess_set_remove_cb(sctx, del_session);
+}
+
+static void free_sessions(void)
+{
+    simple_ssl_session *sess, *tsess;
+    for (sess = first; sess;) {
+        OPENSSL_free(sess->id);
+        OPENSSL_free(sess->der);
+        tsess = sess;
+        sess = sess->next;
+        OPENSSL_free(tsess);
+    }
+    first = NULL;
+}
index 9e5565d..77a7688 100644 (file)
@@ -290,8 +290,9 @@ static int init_client_ip(int *sock, unsigned char ip[4], int port, int type)
 }
 
 int do_server(int port, int type, int *ret,
-              int (*cb) (char *hostname, int s, unsigned char *context),
-              unsigned char *context)
+              int (*cb) (char *hostname, int s, int stype,
+                         unsigned char *context), unsigned char *context,
+              int naccept)
 {
     int sock;
     char *name = NULL;
@@ -313,12 +314,14 @@ int do_server(int port, int type, int *ret,
             }
         } else
             sock = accept_socket;
-        i = (*cb) (name, sock, context);
+        i = (*cb) (name, sock, type, context);
         if (name != NULL)
             OPENSSL_free(name);
         if (type == SOCK_STREAM)
             SHUTDOWN2(sock);
-        if (i < 0) {
+        if (naccept != -1)
+            naccept--;
+        if (i < 0 || naccept == 0) {
             SHUTDOWN2(accept_socket);
             return (i);
         }
index 53e43c5..6044ccf 100644 (file)
@@ -634,6 +634,12 @@ int MAIN(int argc, char **argv)
             p7 = PKCS7_sign(NULL, NULL, other, in, flags);
             if (!p7)
                 goto end;
+            if (flags & PKCS7_NOCERTS) {
+                for (i = 0; i < sk_X509_num(other); i++) {
+                    X509 *x = sk_X509_value(other, i);
+                    PKCS7_add_certificate(p7, x);
+                }
+            }
         } else
             flags |= PKCS7_REUSE_DIGEST;
         for (i = 0; i < sk_OPENSSL_STRING_num(sksigners); i++) {
index 7d9fd8a..3697b71 100644 (file)
@@ -366,6 +366,8 @@ static void *KDF1_SHA1(const void *in, size_t inlen, void *out,
 }
 # endif                         /* OPENSSL_NO_ECDH */
 
+static void multiblock_speed(const EVP_CIPHER *evp_cipher);
+
 int MAIN(int, char **);
 
 int MAIN(int argc, char **argv)
@@ -646,6 +648,7 @@ int MAIN(int argc, char **argv)
 # ifndef NO_FORK
     int multi = 0;
 # endif
+    int multiblock = 0;
 
 # ifndef TIMES
     usertime = -1;
@@ -776,6 +779,9 @@ int MAIN(int argc, char **argv)
             mr = 1;
             j--;                /* Otherwise, -mr gets confused with an
                                  * algorithm. */
+        } else if (argc > 0 && !strcmp(*argv, "-mb")) {
+            multiblock = 1;
+            j--;
         } else
 # ifndef OPENSSL_NO_MD2
         if (strcmp(*argv, "md2") == 0)
@@ -1941,6 +1947,20 @@ int MAIN(int argc, char **argv)
 # endif
 
     if (doit[D_EVP]) {
+# ifdef EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK
+        if (multiblock && evp_cipher) {
+            if (!
+                (EVP_CIPHER_flags(evp_cipher) &
+                 EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK)) {
+                fprintf(stderr, "%s is not multi-block capable\n",
+                        OBJ_nid2ln(evp_cipher->nid));
+                goto end;
+            }
+            multiblock_speed(evp_cipher);
+            mret = 0;
+            goto end;
+        }
+# endif
         for (j = 0; j < SIZE_NUM; j++) {
             if (evp_cipher) {
                 EVP_CIPHER_CTX ctx;
@@ -2742,4 +2762,113 @@ static int do_multi(int multi)
     return 1;
 }
 # endif
+
+static void multiblock_speed(const EVP_CIPHER *evp_cipher)
+{
+    static int mblengths[] =
+        { 8 * 1024, 2 * 8 * 1024, 4 * 8 * 1024, 8 * 8 * 1024, 8 * 16 * 1024 };
+    int j, count, num = sizeof(lengths) / sizeof(lengths[0]);
+    const char *alg_name;
+    unsigned char *inp, *out, no_key[32], no_iv[16];
+    EVP_CIPHER_CTX ctx;
+    double d = 0.0;
+
+    inp = OPENSSL_malloc(mblengths[num - 1]);
+    out = OPENSSL_malloc(mblengths[num - 1] + 1024);
+    if (!inp || !out) {
+        BIO_printf(bio_err,"Out of memory\n");
+        goto end;
+    }
+
+
+    EVP_CIPHER_CTX_init(&ctx);
+    EVP_EncryptInit_ex(&ctx, evp_cipher, NULL, no_key, no_iv);
+    EVP_CIPHER_CTX_ctrl(&ctx, EVP_CTRL_AEAD_SET_MAC_KEY, sizeof(no_key),
+                        no_key);
+    alg_name = OBJ_nid2ln(evp_cipher->nid);
+
+    for (j = 0; j < num; j++) {
+        print_message(alg_name, 0, mblengths[j]);
+        Time_F(START);
+        for (count = 0, run = 1; run && count < 0x7fffffff; count++) {
+            unsigned char aad[EVP_AEAD_TLS1_AAD_LEN];
+            EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM mb_param;
+            size_t len = mblengths[j];
+            int packlen;
+
+            memset(aad, 0, 8);  /* avoid uninitialized values */
+            aad[8] = 23;        /* SSL3_RT_APPLICATION_DATA */
+            aad[9] = 3;         /* version */
+            aad[10] = 2;
+            aad[11] = 0;        /* length */
+            aad[12] = 0;
+            mb_param.out = NULL;
+            mb_param.inp = aad;
+            mb_param.len = len;
+            mb_param.interleave = 8;
+
+            packlen = EVP_CIPHER_CTX_ctrl(&ctx,
+                                          EVP_CTRL_TLS1_1_MULTIBLOCK_AAD,
+                                          sizeof(mb_param), &mb_param);
+
+            if (packlen > 0) {
+                mb_param.out = out;
+                mb_param.inp = inp;
+                mb_param.len = len;
+                EVP_CIPHER_CTX_ctrl(&ctx,
+                                    EVP_CTRL_TLS1_1_MULTIBLOCK_ENCRYPT,
+                                    sizeof(mb_param), &mb_param);
+            } else {
+                int pad;
+
+                RAND_bytes(out, 16);
+                len += 16;
+                aad[11] = len >> 8;
+                aad[12] = len;
+                pad = EVP_CIPHER_CTX_ctrl(&ctx,
+                                          EVP_CTRL_AEAD_TLS1_AAD,
+                                          EVP_AEAD_TLS1_AAD_LEN, aad);
+                EVP_Cipher(&ctx, out, inp, len + pad);
+            }
+        }
+        d = Time_F(STOP);
+        BIO_printf(bio_err,
+                   mr ? "+R:%d:%s:%f\n"
+                   : "%d %s's in %.2fs\n", count, "evp", d);
+        results[D_EVP][j] = ((double)count) / d * mblengths[j];
+    }
+
+    if (mr) {
+        fprintf(stdout, "+H");
+        for (j = 0; j < num; j++)
+            fprintf(stdout, ":%d", mblengths[j]);
+        fprintf(stdout, "\n");
+        fprintf(stdout, "+F:%d:%s", D_EVP, alg_name);
+        for (j = 0; j < num; j++)
+            fprintf(stdout, ":%.2f", results[D_EVP][j]);
+        fprintf(stdout, "\n");
+    } else {
+        fprintf(stdout,
+                "The 'numbers' are in 1000s of bytes per second processed.\n");
+        fprintf(stdout, "type                    ");
+        for (j = 0; j < num; j++)
+            fprintf(stdout, "%7d bytes", mblengths[j]);
+        fprintf(stdout, "\n");
+        fprintf(stdout, "%-24s", alg_name);
+
+        for (j = 0; j < num; j++) {
+            if (results[D_EVP][j] > 10000)
+                fprintf(stdout, " %11.2fk", results[D_EVP][j] / 1e3);
+            else
+                fprintf(stdout, " %11.2f ", results[D_EVP][j]);
+        }
+        fprintf(stdout, "\n");
+    }
+
+end:
+    if (inp)
+        OPENSSL_free(inp);
+    if (out)
+        OPENSSL_free(out);
+}
 #endif
index e29f9bb..78e729f 100644 (file)
@@ -88,6 +88,7 @@ int MAIN(int argc, char **argv)
     X509_STORE *cert_ctx = NULL;
     X509_LOOKUP *lookup = NULL;
     X509_VERIFY_PARAM *vpm = NULL;
+    int crl_download = 0;
 #ifndef OPENSSL_NO_ENGINE
     char *engine = NULL;
 #endif
@@ -136,7 +137,8 @@ int MAIN(int argc, char **argv)
                 if (argc-- < 1)
                     goto end;
                 crlfile = *(++argv);
-            }
+            } else if (strcmp(*argv, "-crl_download") == 0)
+                crl_download = 1;
 #ifndef OPENSSL_NO_ENGINE
             else if (strcmp(*argv, "-engine") == 0) {
                 if (--argc < 1)
@@ -214,6 +216,9 @@ int MAIN(int argc, char **argv)
     }
 
     ret = 0;
+
+    if (crl_download)
+        store_setup_crl_download(cert_ctx);
     if (argc < 1) {
         if (1 != check(cert_ctx, NULL, untrusted, trusted, crls, e))
             ret = -1;
index 929359b..864a60d 100644 (file)
@@ -150,6 +150,9 @@ static const char *x509_usage[] = {
     " -engine e       - use engine e, possibly a hardware device.\n",
 #endif
     " -certopt arg    - various certificate text options\n",
+    " -checkhost host - check certificate matches \"host\"\n",
+    " -checkemail email - check certificate matches \"email\"\n",
+    " -checkip ipaddr - check certificate matches \"ipaddr\"\n",
     NULL
 };
 
@@ -163,6 +166,9 @@ static int x509_certify(X509_STORE *ctx, char *CAfile, const EVP_MD *digest,
                         char *section, ASN1_INTEGER *sno);
 static int purpose_print(BIO *bio, X509 *cert, X509_PURPOSE *pt);
 static int reqfile = 0;
+#ifdef OPENSSL_SSL_DEBUG_BROKEN_PROTOCOL
+static int force_version = 2;
+#endif
 
 int MAIN(int, char **);
 
@@ -174,15 +180,16 @@ int MAIN(int argc, char **argv)
     X509 *x = NULL, *xca = NULL;
     ASN1_OBJECT *objtmp;
     STACK_OF(OPENSSL_STRING) *sigopts = NULL;
-    EVP_PKEY *Upkey = NULL, *CApkey = NULL;
+    EVP_PKEY *Upkey = NULL, *CApkey = NULL, *fkey = NULL;
     ASN1_INTEGER *sno = NULL;
-    int i, num, badops = 0;
+    int i, num, badops = 0, badsig = 0;
     BIO *out = NULL;
     BIO *STDout = NULL;
     STACK_OF(ASN1_OBJECT) *trust = NULL, *reject = NULL;
     int informat, outformat, keyformat, CAformat, CAkeyformat;
     char *infile = NULL, *outfile = NULL, *keyfile = NULL, *CAfile = NULL;
     char *CAkeyfile = NULL, *CAserial = NULL;
+    char *fkeyfile = NULL;
     char *alias = NULL;
     int text = 0, serial = 0, subject = 0, issuer = 0, startdate =
         0, enddate = 0;
@@ -208,6 +215,9 @@ int MAIN(int argc, char **argv)
     int need_rand = 0;
     int checkend = 0, checkoffset = 0;
     unsigned long nmflag = 0, certflag = 0;
+    char *checkhost = NULL;
+    char *checkemail = NULL;
+    char *checkip = NULL;
 #ifndef OPENSSL_NO_ENGINE
     char *engine = NULL;
 #endif
@@ -274,7 +284,15 @@ int MAIN(int argc, char **argv)
                 sigopts = sk_OPENSSL_STRING_new_null();
             if (!sigopts || !sk_OPENSSL_STRING_push(sigopts, *(++argv)))
                 goto bad;
-        } else if (strcmp(*argv, "-days") == 0) {
+        }
+#ifdef OPENSSL_SSL_DEBUG_BROKEN_PROTOCOL
+        else if (strcmp(*argv, "-force_version") == 0) {
+            if (--argc < 1)
+                goto bad;
+            force_version = atoi(*(++argv)) - 1;
+        }
+#endif
+        else if (strcmp(*argv, "-days") == 0) {
             if (--argc < 1)
                 goto bad;
             days = atoi(*(++argv));
@@ -327,6 +345,10 @@ int MAIN(int argc, char **argv)
                 goto bad;
             if (!(sno = s2i_ASN1_INTEGER(NULL, *(++argv))))
                 goto bad;
+        } else if (strcmp(*argv, "-force_pubkey") == 0) {
+            if (--argc < 1)
+                goto bad;
+            fkeyfile = *(++argv);
         } else if (strcmp(*argv, "-addtrust") == 0) {
             if (--argc < 1)
                 goto bad;
@@ -424,6 +446,18 @@ int MAIN(int argc, char **argv)
                 goto bad;
             checkoffset = atoi(*(++argv));
             checkend = 1;
+        } else if (strcmp(*argv, "-checkhost") == 0) {
+            if (--argc < 1)
+                goto bad;
+            checkhost = *(++argv);
+        } else if (strcmp(*argv, "-checkemail") == 0) {
+            if (--argc < 1)
+                goto bad;
+            checkemail = *(++argv);
+        } else if (strcmp(*argv, "-checkip") == 0) {
+            if (--argc < 1)
+                goto bad;
+            checkip = *(++argv);
         } else if (strcmp(*argv, "-noout") == 0)
             noout = ++num;
         else if (strcmp(*argv, "-trustout") == 0)
@@ -447,6 +481,8 @@ int MAIN(int argc, char **argv)
 #endif
         else if (strcmp(*argv, "-ocspid") == 0)
             ocspid = ++num;
+        else if (strcmp(*argv, "-badsig") == 0)
+            badsig = 1;
         else if ((md_alg = EVP_get_digestbyname(*argv + 1))) {
             /* ok */
             digest = md_alg;
@@ -484,6 +520,13 @@ int MAIN(int argc, char **argv)
         goto end;
     }
 
+    if (fkeyfile) {
+        fkey = load_pubkey(bio_err, fkeyfile, keyformat, 0,
+                           NULL, e, "Forced key");
+        if (fkey == NULL)
+            goto end;
+    }
+
     if ((CAkeyfile == NULL) && (CA_flag) && (CAformat == FORMAT_PEM)) {
         CAkeyfile = CAfile;
     } else if ((CA_flag) && (CAkeyfile == NULL)) {
@@ -605,10 +648,13 @@ int MAIN(int argc, char **argv)
 
         X509_gmtime_adj(X509_get_notBefore(x), 0);
         X509_time_adj_ex(X509_get_notAfter(x), days, 0, NULL);
-
-        pkey = X509_REQ_get_pubkey(req);
-        X509_set_pubkey(x, pkey);
-        EVP_PKEY_free(pkey);
+        if (fkey)
+            X509_set_pubkey(x, fkey);
+        else {
+            pkey = X509_REQ_get_pubkey(req);
+            X509_set_pubkey(x, pkey);
+            EVP_PKEY_free(pkey);
+        }
     } else
         x = load_cert(bio_err, infile, informat, NULL, e, "Certificate");
 
@@ -937,11 +983,16 @@ int MAIN(int argc, char **argv)
         goto end;
     }
 
+    print_cert_checks(STDout, x, checkhost, checkemail, checkip);
+
     if (noout) {
         ret = 0;
         goto end;
     }
 
+    if (badsig)
+        x->signature->data[x->signature->length - 1] ^= 0x1;
+
     if (outformat == FORMAT_ASN1)
         i = i2d_X509_bio(out, x);
     else if (outformat == FORMAT_PEM) {
@@ -982,6 +1033,7 @@ int MAIN(int argc, char **argv)
     X509_free(xca);
     EVP_PKEY_free(Upkey);
     EVP_PKEY_free(CApkey);
+    EVP_PKEY_free(fkey);
     if (sigopts)
         sk_OPENSSL_STRING_free(sigopts);
     X509_REQ_free(rq);
@@ -1101,7 +1153,11 @@ static int x509_certify(X509_STORE *ctx, char *CAfile, const EVP_MD *digest,
 
     if (conf) {
         X509V3_CTX ctx2;
+#ifdef OPENSSL_SSL_DEBUG_BROKEN_PROTOCOL
+        X509_set_version(x, force_version);
+#else
         X509_set_version(x, 2); /* version 3 certificate */
+#endif
         X509V3_set_ctx(&ctx2, xca, x, NULL, NULL, 0);
         X509V3_set_nconf(&ctx2, conf);
         if (!X509V3_EXT_add_nconf(conf, &ctx2, section, x))
@@ -1186,7 +1242,11 @@ static int sign(X509 *x, EVP_PKEY *pkey, int days, int clrext,
     }
     if (conf) {
         X509V3_CTX ctx;
+#ifdef OPENSSL_SSL_DEBUG_BROKEN_PROTOCOL
+        X509_set_version(x, force_version);
+#else
         X509_set_version(x, 2); /* version 3 certificate */
+#endif
         X509V3_set_ctx(&ctx, x, x, NULL, NULL, 0);
         X509V3_set_nconf(&ctx, conf);
         if (!X509V3_EXT_add_nconf(conf, &ctx, section, x))
diff --git a/config b/config
index 41fa2a6..77f730f 100755 (executable)
--- a/config
+++ b/config
@@ -587,15 +587,33 @@ case "$GUESSOS" in
        fi
        ;;
   ppc64-*-linux2)
+       if [ -z "$KERNEL_BITS" ]; then
+           echo "WARNING! If you wish to build 64-bit library, then you have to"
+           echo "         invoke './Configure linux-ppc64' *manually*."
+           if [ "$TEST" = "false" -a -t 1 ]; then
+               echo "         You have about 5 seconds to press Ctrl-C to abort."
+               (trap "stty `stty -g`" 2 0; stty -icanon min 0 time 50; read waste) <&1
+           fi
+       fi
+       if [ "$KERNEL_BITS" = "64" ]; then
+           OUT="linux-ppc64"
+       else
+           OUT="linux-ppc"
+           (echo "__LP64__" | gcc -E -x c - 2>/dev/null | grep "^__LP64__" 2>&1 > /dev/null) || options="$options -m32"
+       fi
+       ;;
+  ppc64le-*-linux2) OUT="linux-ppc64le" ;;
+  ppc-*-linux2) OUT="linux-ppc" ;;
+  mips64*-*-linux2)
        echo "WARNING! If you wish to build 64-bit library, then you have to"
-       echo "         invoke './Configure linux-ppc64' *manually*."
+       echo "         invoke './Configure linux64-mips64' *manually*."
        if [ "$TEST" = "false" -a -t 1 ]; then
            echo "         You have about 5 seconds to press Ctrl-C to abort."
            (trap "stty `stty -g`" 2 0; stty -icanon min 0 time 50; read waste) <&1
        fi
-       OUT="linux-ppc"
+       OUT="linux-mips64"
        ;;
-  ppc-*-linux2) OUT="linux-ppc" ;;
+  mips*-*-linux2) OUT="linux-mips32" ;;
   ppc60x-*-vxworks*) OUT="vxworks-ppc60x" ;;
   ppcgen-*-vxworks*) OUT="vxworks-ppcgen" ;;
   pentium-*-vxworks*) OUT="vxworks-pentium" ;;
@@ -644,6 +662,7 @@ case "$GUESSOS" in
   armv[1-3]*-*-linux2) OUT="linux-generic32" ;;
   armv[7-9]*-*-linux2) OUT="linux-armv4"; options="$options -march=armv7-a" ;;
   arm*-*-linux2) OUT="linux-armv4" ;;
+  aarch64-*-linux2) OUT="linux-aarch64" ;;
   sh*b-*-linux2) OUT="linux-generic32"; options="$options -DB_ENDIAN" ;;
   sh*-*-linux2)  OUT="linux-generic32"; options="$options -DL_ENDIAN" ;;
   m68k*-*-linux2) OUT="linux-generic32"; options="$options -DB_ENDIAN" ;;
index 618c958..7869996 100644 (file)
@@ -74,9 +74,9 @@ ia64cpuid.s: ia64cpuid.S;     $(CC) $(CFLAGS) -E ia64cpuid.S > $@
 ppccpuid.s:    ppccpuid.pl;    $(PERL) ppccpuid.pl $(PERLASM_SCHEME) $@
 pariscid.s:    pariscid.pl;    $(PERL) pariscid.pl $(PERLASM_SCHEME) $@
 alphacpuid.s:  alphacpuid.pl
-       (preproc=/tmp/$$$$.$@; trap "rm $$preproc" INT; \
+       (preproc=$$$$.$@.S; trap "rm $$preproc" INT; \
        $(PERL) alphacpuid.pl > $$preproc && \
-       $(CC) -E $$preproc > $@ && rm $$preproc)
+       $(CC) -E -P $$preproc > $@ && rm $$preproc)
 
 testapps:
        [ -z "$(THIS)" ] || (   if echo $(SDIRS) | fgrep ' des '; \
@@ -88,7 +88,7 @@ subdirs:
        @target=all; $(RECURSIVE_MAKE)
 
 files:
-       $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
+       $(PERL) $(TOP)/util/files.pl "CPUID_OBJ=$(CPUID_OBJ)" Makefile >> $(TOP)/MINFO
        @target=files; $(RECURSIVE_MAKE)
 
 links:
@@ -102,7 +102,7 @@ lib:        $(LIB)
        @touch lib
 $(LIB):        $(LIBOBJ)
        $(AR) $(LIB) $(LIBOBJ)
-       [ -z "$(FIPSLIBDIR)" ] || $(AR) $(LIB) $(FIPSLIBDIR)fipscanister.o
+       test -z "$(FIPSLIBDIR)" || $(AR) $(LIB) $(FIPSLIBDIR)fipscanister.o
        $(RANLIB) $(LIB) || echo Never mind.
 
 shared: buildinf.h lib subdirs
index b3a9581..e825c14 100644 (file)
@@ -65,12 +65,22 @@ aesni-x86_64.s: asm/aesni-x86_64.pl
        $(PERL) asm/aesni-x86_64.pl $(PERLASM_SCHEME) > $@
 aesni-sha1-x86_64.s:   asm/aesni-sha1-x86_64.pl
        $(PERL) asm/aesni-sha1-x86_64.pl $(PERLASM_SCHEME) > $@
+aesni-sha256-x86_64.s: asm/aesni-sha256-x86_64.pl
+       $(PERL) asm/aesni-sha256-x86_64.pl $(PERLASM_SCHEME) > $@
+aesni-mb-x86_64.s:     asm/aesni-mb-x86_64.pl
+       $(PERL) asm/aesni-mb-x86_64.pl $(PERLASM_SCHEME) > $@
 
 aes-sparcv9.s: asm/aes-sparcv9.pl
        $(PERL) asm/aes-sparcv9.pl $(CFLAGS) > $@
+aest4-sparcv9.s: asm/aest4-sparcv9.pl ../perlasm/sparcv9_modes.pl
+       $(PERL) asm/aest4-sparcv9.pl $(CFLAGS) > $@
 
 aes-ppc.s:     asm/aes-ppc.pl
        $(PERL) asm/aes-ppc.pl $(PERLASM_SCHEME) $@
+vpaes-ppc.s:   asm/vpaes-ppc.pl
+       $(PERL) asm/vpaes-ppc.pl $(PERLASM_SCHEME) $@
+aesp8-ppc.s:   asm/aesp8-ppc.pl
+       $(PERL) asm/aesp8-ppc.pl $(PERLASM_SCHEME) $@
 
 aes-parisc.s:  asm/aes-parisc.pl
        $(PERL) asm/aes-parisc.pl $(PERLASM_SCHEME) $@
@@ -78,12 +88,18 @@ aes-parisc.s:       asm/aes-parisc.pl
 aes-mips.S:    asm/aes-mips.pl
        $(PERL) asm/aes-mips.pl $(PERLASM_SCHEME) $@
 
+aesv8-armx.S:  asm/aesv8-armx.pl
+       $(PERL) asm/aesv8-armx.pl $(PERLASM_SCHEME) $@
+aesv8-armx.o:  aesv8-armx.S
+
 # GNU make "catch all"
 aes-%.S:       asm/aes-%.pl;   $(PERL) $< $(PERLASM_SCHEME) > $@
 aes-armv4.o:   aes-armv4.S
+bsaes-%.S:     asm/bsaes-%.pl; $(PERL) $< $(PERLASM_SCHEME) $@
+bsaes-armv7.o: bsaes-armv7.S
 
 files:
-       $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
+       $(PERL) $(TOP)/util/files.pl "AES_ENC=$(AES_ENC)" Makefile >> $(TOP)/MINFO
 
 links:
        @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER)
@@ -149,7 +165,7 @@ aes_wrap.o: ../../e_os.h ../../include/openssl/aes.h
 aes_wrap.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h
 aes_wrap.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
 aes_wrap.o: ../../include/openssl/err.h ../../include/openssl/lhash.h
-aes_wrap.o: ../../include/openssl/opensslconf.h
+aes_wrap.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
 aes_wrap.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
 aes_wrap.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
 aes_wrap.o: ../../include/openssl/symhacks.h ../cryptlib.h aes_wrap.c
index b1ab8e2..b7b64d5 100644 (file)
 
 #include "cryptlib.h"
 #include <openssl/aes.h>
-#include <openssl/bio.h>
-
-static const unsigned char default_iv[] = {
-    0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6,
-};
+#include <openssl/modes.h>
 
 int AES_wrap_key(AES_KEY *key, const unsigned char *iv,
                  unsigned char *out,
                  const unsigned char *in, unsigned int inlen)
 {
-    unsigned char *A, B[16], *R;
-    unsigned int i, j, t;
-    if ((inlen & 0x7) || (inlen < 8))
-        return -1;
-    A = B;
-    t = 1;
-    memcpy(out + 8, in, inlen);
-    if (!iv)
-        iv = default_iv;
-
-    memcpy(A, iv, 8);
-
-    for (j = 0; j < 6; j++) {
-        R = out + 8;
-        for (i = 0; i < inlen; i += 8, t++, R += 8) {
-            memcpy(B + 8, R, 8);
-            AES_encrypt(B, B, key);
-            A[7] ^= (unsigned char)(t & 0xff);
-            if (t > 0xff) {
-                A[6] ^= (unsigned char)((t >> 8) & 0xff);
-                A[5] ^= (unsigned char)((t >> 16) & 0xff);
-                A[4] ^= (unsigned char)((t >> 24) & 0xff);
-            }
-            memcpy(R, B + 8, 8);
-        }
-    }
-    memcpy(out, A, 8);
-    return inlen + 8;
+    return CRYPTO_128_wrap(key, iv, out, in, inlen, (block128_f) AES_encrypt);
 }
 
 int AES_unwrap_key(AES_KEY *key, const unsigned char *iv,
                    unsigned char *out,
                    const unsigned char *in, unsigned int inlen)
 {
-    unsigned char *A, B[16], *R;
-    unsigned int i, j, t;
-    inlen -= 8;
-    if (inlen & 0x7)
-        return -1;
-    if (inlen < 8)
-        return -1;
-    A = B;
-    t = 6 * (inlen >> 3);
-    memcpy(A, in, 8);
-    memcpy(out, in + 8, inlen);
-    for (j = 0; j < 6; j++) {
-        R = out + inlen - 8;
-        for (i = 0; i < inlen; i += 8, t--, R -= 8) {
-            A[7] ^= (unsigned char)(t & 0xff);
-            if (t > 0xff) {
-                A[6] ^= (unsigned char)((t >> 8) & 0xff);
-                A[5] ^= (unsigned char)((t >> 16) & 0xff);
-                A[4] ^= (unsigned char)((t >> 24) & 0xff);
-            }
-            memcpy(B + 8, R, 8);
-            AES_decrypt(B, B, key);
-            memcpy(R, B + 8, 8);
-        }
-    }
-    if (!iv)
-        iv = default_iv;
-    if (memcmp(A, iv, 8)) {
-        OPENSSL_cleanse(out, inlen);
-        return 0;
-    }
-    return inlen;
-}
-
-#ifdef AES_WRAP_TEST
-
-int AES_wrap_unwrap_test(const unsigned char *kek, int keybits,
-                         const unsigned char *iv,
-                         const unsigned char *eout,
-                         const unsigned char *key, int keylen)
-{
-    unsigned char *otmp = NULL, *ptmp = NULL;
-    int r, ret = 0;
-    AES_KEY wctx;
-    otmp = OPENSSL_malloc(keylen + 8);
-    ptmp = OPENSSL_malloc(keylen);
-    if (!otmp || !ptmp)
-        return 0;
-    if (AES_set_encrypt_key(kek, keybits, &wctx))
-        goto err;
-    r = AES_wrap_key(&wctx, iv, otmp, key, keylen);
-    if (r <= 0)
-        goto err;
-
-    if (eout && memcmp(eout, otmp, keylen))
-        goto err;
-
-    if (AES_set_decrypt_key(kek, keybits, &wctx))
-        goto err;
-    r = AES_unwrap_key(&wctx, iv, ptmp, otmp, r);
-
-    if (memcmp(key, ptmp, keylen))
-        goto err;
-
-    ret = 1;
-
- err:
-    if (otmp)
-        OPENSSL_free(otmp);
-    if (ptmp)
-        OPENSSL_free(ptmp);
-
-    return ret;
-
+    return CRYPTO_128_unwrap(key, iv, out, in, inlen,
+                             (block128_f) AES_decrypt);
 }
-
-int main(int argc, char **argv)
-{
-
-    static const unsigned char kek[] = {
-        0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
-        0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
-        0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
-        0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
-    };
-
-    static const unsigned char key[] = {
-        0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
-        0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff,
-        0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
-        0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
-    };
-
-    static const unsigned char e1[] = {
-        0x1f, 0xa6, 0x8b, 0x0a, 0x81, 0x12, 0xb4, 0x47,
-        0xae, 0xf3, 0x4b, 0xd8, 0xfb, 0x5a, 0x7b, 0x82,
-        0x9d, 0x3e, 0x86, 0x23, 0x71, 0xd2, 0xcf, 0xe5
-    };
-
-    static const unsigned char e2[] = {
-        0x96, 0x77, 0x8b, 0x25, 0xae, 0x6c, 0xa4, 0x35,
-        0xf9, 0x2b, 0x5b, 0x97, 0xc0, 0x50, 0xae, 0xd2,
-        0x46, 0x8a, 0xb8, 0xa1, 0x7a, 0xd8, 0x4e, 0x5d
-    };
-
-    static const unsigned char e3[] = {
-        0x64, 0xe8, 0xc3, 0xf9, 0xce, 0x0f, 0x5b, 0xa2,
-        0x63, 0xe9, 0x77, 0x79, 0x05, 0x81, 0x8a, 0x2a,
-        0x93, 0xc8, 0x19, 0x1e, 0x7d, 0x6e, 0x8a, 0xe7
-    };
-
-    static const unsigned char e4[] = {
-        0x03, 0x1d, 0x33, 0x26, 0x4e, 0x15, 0xd3, 0x32,
-        0x68, 0xf2, 0x4e, 0xc2, 0x60, 0x74, 0x3e, 0xdc,
-        0xe1, 0xc6, 0xc7, 0xdd, 0xee, 0x72, 0x5a, 0x93,
-        0x6b, 0xa8, 0x14, 0x91, 0x5c, 0x67, 0x62, 0xd2
-    };
-
-    static const unsigned char e5[] = {
-        0xa8, 0xf9, 0xbc, 0x16, 0x12, 0xc6, 0x8b, 0x3f,
-        0xf6, 0xe6, 0xf4, 0xfb, 0xe3, 0x0e, 0x71, 0xe4,
-        0x76, 0x9c, 0x8b, 0x80, 0xa3, 0x2c, 0xb8, 0x95,
-        0x8c, 0xd5, 0xd1, 0x7d, 0x6b, 0x25, 0x4d, 0xa1
-    };
-
-    static const unsigned char e6[] = {
-        0x28, 0xc9, 0xf4, 0x04, 0xc4, 0xb8, 0x10, 0xf4,
-        0xcb, 0xcc, 0xb3, 0x5c, 0xfb, 0x87, 0xf8, 0x26,
-        0x3f, 0x57, 0x86, 0xe2, 0xd8, 0x0e, 0xd3, 0x26,
-        0xcb, 0xc7, 0xf0, 0xe7, 0x1a, 0x99, 0xf4, 0x3b,
-        0xfb, 0x98, 0x8b, 0x9b, 0x7a, 0x02, 0xdd, 0x21
-    };
-
-    AES_KEY wctx, xctx;
-    int ret;
-    ret = AES_wrap_unwrap_test(kek, 128, NULL, e1, key, 16);
-    fprintf(stderr, "Key test result %d\n", ret);
-    ret = AES_wrap_unwrap_test(kek, 192, NULL, e2, key, 16);
-    fprintf(stderr, "Key test result %d\n", ret);
-    ret = AES_wrap_unwrap_test(kek, 256, NULL, e3, key, 16);
-    fprintf(stderr, "Key test result %d\n", ret);
-    ret = AES_wrap_unwrap_test(kek, 192, NULL, e4, key, 24);
-    fprintf(stderr, "Key test result %d\n", ret);
-    ret = AES_wrap_unwrap_test(kek, 256, NULL, e5, key, 24);
-    fprintf(stderr, "Key test result %d\n", ret);
-    ret = AES_wrap_unwrap_test(kek, 256, NULL, e6, key, 32);
-    fprintf(stderr, "Key test result %d\n", ret);
-}
-
-#endif
index 1defbb1..c869ed7 100644 (file)
@@ -89,8 +89,10 @@ typedef unsigned long long u64;
 #endif
 
 #undef ROTATE
-#if defined(_MSC_VER) || defined(__ICC)
-# define ROTATE(a,n)   _lrotl(a,n)
+#if defined(_MSC_VER)
+# define ROTATE(a,n)    _lrotl(a,n)
+#elif defined(__ICC)
+# define ROTATE(a,n)    _rotl(a,n)
 #elif defined(__GNUC__) && __GNUC__>=2
 # if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
 #   define ROTATE(a,n)  ({ register unsigned int ret;   \
index 687ed81..451d0e0 100755 (executable)
@@ -39,7 +39,7 @@
 # but exhibits up to 10% improvement on other cores.
 #
 # Second version is "monolithic" replacement for aes_core.c, which in
-# addition to AES_[de|en]crypt implements private_AES_set_[de|en]cryption_key.
+# addition to AES_[de|en]crypt implements AES_set_[de|en]cryption_key.
 # This made it possible to implement little-endian variant of the
 # algorithm without modifying the base C code. Motivating factor for
 # the undertaken effort was that it appeared that in tight IA-32
 # byte for 128-bit key.
 #
 #              ECB encrypt     ECB decrypt     CBC large chunk
-# P4           56[60]          84[100]         23
-# AMD K8       48[44]          70[79]          18
-# PIII         41[50]          61[91]          24
-# Core 2       32[38]          45[70]          18.5
-# Pentium      120             160             77
+# P4           52[54]          83[95]          23
+# AMD K8       46[41]          66[70]          18
+# PIII         41[50]          60[77]          24
+# Core 2       31[36]          45[64]          18.5
+# Atom         76[100]         96[138]         60
+# Pentium      115             150             77
 #
 # Version 4.1 switches to compact S-box even in key schedule setup.
 #
@@ -242,7 +243,7 @@ $vertical_spin=0;   # shift "verticaly" defaults to 0, because of
 
 sub encvert()
 { my ($te,@s) = @_;
-  my $v0 = $acc, $v1 = $key;
+  my ($v0,$v1) = ($acc,$key);
 
        &mov    ($v0,$s[3]);                            # copy s3
        &mov    (&DWP(4,"esp"),$s[2]);                  # save s2
@@ -299,7 +300,7 @@ sub encvert()
 # Another experimental routine, which features "horizontal spin," but
 # eliminates one reference to stack. Strangely enough runs slower...
 sub enchoriz()
-{ my $v0 = $key, $v1 = $acc;
+{ my ($v0,$v1) = ($key,$acc);
 
        &movz   ($v0,&LB($s0));                 #  3, 2, 1, 0*
        &rotr   ($s2,8);                        #  8,11,10, 9
@@ -427,7 +428,7 @@ sub sse_encbody()
 ######################################################################
 
 sub enccompact()
-{ my $Fn = mov;
+{ my $Fn = \&mov;
   while ($#_>5) { pop(@_); $Fn=sub{}; }
   my ($i,$te,@s)=@_;
   my $tmp = $key;
@@ -476,24 +477,25 @@ sub enctransform()
   my $tmp = $tbl;
   my $r2  = $key ;
 
-       &mov    ($acc,$s[$i]);
-       &and    ($acc,0x80808080);
-       &mov    ($tmp,$acc);
-       &shr    ($tmp,7);
+       &and    ($tmp,$s[$i]);
        &lea    ($r2,&DWP(0,$s[$i],$s[$i]));
-       &sub    ($acc,$tmp);
+       &mov    ($acc,$tmp);
+       &shr    ($tmp,7);
        &and    ($r2,0xfefefefe);
-       &and    ($acc,0x1b1b1b1b);
+       &sub    ($acc,$tmp);
        &mov    ($tmp,$s[$i]);
+       &and    ($acc,0x1b1b1b1b);
+       &rotr   ($tmp,16);
        &xor    ($acc,$r2);     # r2
+       &mov    ($r2,$s[$i]);
 
        &xor    ($s[$i],$acc);  # r0 ^ r2
+       &rotr   ($r2,16+8);
+       &xor    ($acc,$tmp);
        &rotl   ($s[$i],24);
-       &xor    ($s[$i],$acc)   # ROTATE(r2^r0,24) ^ r2
-       &rotr   ($tmp,16);
-       &xor    ($s[$i],$tmp);
-       &rotr   ($tmp,8);
-       &xor    ($s[$i],$tmp);
+       &xor    ($acc,$r2);
+       &mov    ($tmp,0x80808080)       if ($i!=1);
+       &xor    ($s[$i],$acc);  # ROTATE(r2^r0,24) ^ r2
 }
 
 &function_begin_B("_x86_AES_encrypt_compact");
@@ -526,6 +528,7 @@ sub enctransform()
                &enccompact(1,$tbl,$s1,$s2,$s3,$s0,1);
                &enccompact(2,$tbl,$s2,$s3,$s0,$s1,1);
                &enccompact(3,$tbl,$s3,$s0,$s1,$s2,1);
+               &mov    ($tbl,0x80808080);
                &enctransform(2);
                &enctransform(3);
                &enctransform(0);
@@ -607,82 +610,84 @@ sub sse_enccompact()
        &pshufw ("mm5","mm4",0x0d);             # 15,14,11,10
        &movd   ("eax","mm1");                  #  5, 4, 1, 0
        &movd   ("ebx","mm5");                  # 15,14,11,10
+       &mov    ($__key,$key);
 
        &movz   ($acc,&LB("eax"));              #  0
-       &movz   ("ecx",&BP(-128,$tbl,$acc,1));  #  0
-       &pshufw ("mm2","mm0",0x0d);             #  7, 6, 3, 2
        &movz   ("edx",&HB("eax"));             #  1
+       &pshufw ("mm2","mm0",0x0d);             #  7, 6, 3, 2
+       &movz   ("ecx",&BP(-128,$tbl,$acc,1));  #  0
+       &movz   ($key,&LB("ebx"));              # 10
        &movz   ("edx",&BP(-128,$tbl,"edx",1)); #  1
-       &shl    ("edx",8);                      #  1
        &shr    ("eax",16);                     #  5, 4
+       &shl    ("edx",8);                      #  1
 
-       &movz   ($acc,&LB("ebx"));              # 10
-       &movz   ($acc,&BP(-128,$tbl,$acc,1));   # 10
+       &movz   ($acc,&BP(-128,$tbl,$key,1));   # 10
+       &movz   ($key,&HB("ebx"));              # 11
        &shl    ($acc,16);                      # 10
-       &or     ("ecx",$acc);                   # 10
        &pshufw ("mm6","mm4",0x08);             # 13,12, 9, 8
-       &movz   ($acc,&HB("ebx"));              # 11
-       &movz   ($acc,&BP(-128,$tbl,$acc,1));   # 11
+       &or     ("ecx",$acc);                   # 10
+       &movz   ($acc,&BP(-128,$tbl,$key,1));   # 11
+       &movz   ($key,&HB("eax"));              #  5
        &shl    ($acc,24);                      # 11
-       &or     ("edx",$acc);                   # 11
        &shr    ("ebx",16);                     # 15,14
+       &or     ("edx",$acc);                   # 11
 
-       &movz   ($acc,&HB("eax"));              #  5
-       &movz   ($acc,&BP(-128,$tbl,$acc,1));   #  5
+       &movz   ($acc,&BP(-128,$tbl,$key,1));   #  5
+       &movz   ($key,&HB("ebx"));              # 15
        &shl    ($acc,8);                       #  5
        &or     ("ecx",$acc);                   #  5
-       &movz   ($acc,&HB("ebx"));              # 15
-       &movz   ($acc,&BP(-128,$tbl,$acc,1));   # 15
+       &movz   ($acc,&BP(-128,$tbl,$key,1));   # 15
+       &movz   ($key,&LB("eax"));              #  4
        &shl    ($acc,24);                      # 15
        &or     ("ecx",$acc);                   # 15
-       &movd   ("mm0","ecx");                  # t[0] collected
 
-       &movz   ($acc,&LB("eax"));              #  4
-       &movz   ("ecx",&BP(-128,$tbl,$acc,1));  #  4
+       &movz   ($acc,&BP(-128,$tbl,$key,1));   #  4
+       &movz   ($key,&LB("ebx"));              # 14
        &movd   ("eax","mm2");                  #  7, 6, 3, 2
-       &movz   ($acc,&LB("ebx"));              # 14
-       &movz   ($acc,&BP(-128,$tbl,$acc,1));   # 14
-       &shl    ($acc,16);                      # 14
+       &movd   ("mm0","ecx");                  # t[0] collected
+       &movz   ("ecx",&BP(-128,$tbl,$key,1));  # 14
+       &movz   ($key,&HB("eax"));              #  3
+       &shl    ("ecx",16);                     # 14
+       &movd   ("ebx","mm6");                  # 13,12, 9, 8
        &or     ("ecx",$acc);                   # 14
 
-       &movd   ("ebx","mm6");                  # 13,12, 9, 8
-       &movz   ($acc,&HB("eax"));              #  3
-       &movz   ($acc,&BP(-128,$tbl,$acc,1));   #  3
+       &movz   ($acc,&BP(-128,$tbl,$key,1));   #  3
+       &movz   ($key,&HB("ebx"));              #  9
        &shl    ($acc,24);                      #  3
        &or     ("ecx",$acc);                   #  3
-       &movz   ($acc,&HB("ebx"));              #  9
-       &movz   ($acc,&BP(-128,$tbl,$acc,1));   #  9
+       &movz   ($acc,&BP(-128,$tbl,$key,1));   #  9
+       &movz   ($key,&LB("ebx"));              #  8
        &shl    ($acc,8);                       #  9
+       &shr    ("ebx",16);                     # 13,12
        &or     ("ecx",$acc);                   #  9
-       &movd   ("mm1","ecx");                  # t[1] collected
 
-       &movz   ($acc,&LB("ebx"));              #  8
-       &movz   ("ecx",&BP(-128,$tbl,$acc,1));  #  8
-       &shr    ("ebx",16);                     # 13,12
-       &movz   ($acc,&LB("eax"));              #  2
-       &movz   ($acc,&BP(-128,$tbl,$acc,1));   #  2
-       &shl    ($acc,16);                      #  2
-       &or     ("ecx",$acc);                   #  2
+       &movz   ($acc,&BP(-128,$tbl,$key,1));   #  8
+       &movz   ($key,&LB("eax"));              #  2
        &shr    ("eax",16);                     #  7, 6
+       &movd   ("mm1","ecx");                  # t[1] collected
+       &movz   ("ecx",&BP(-128,$tbl,$key,1));  #  2
+       &movz   ($key,&HB("eax"));              #  7
+       &shl    ("ecx",16);                     #  2
+       &and    ("eax",0xff);                   #  6
+       &or     ("ecx",$acc);                   #  2
 
        &punpckldq      ("mm0","mm1");          # t[0,1] collected
 
-       &movz   ($acc,&HB("eax"));              #  7
-       &movz   ($acc,&BP(-128,$tbl,$acc,1));   #  7
+       &movz   ($acc,&BP(-128,$tbl,$key,1));   #  7
+       &movz   ($key,&HB("ebx"));              # 13
        &shl    ($acc,24);                      #  7
-       &or     ("ecx",$acc);                   #  7
-       &and    ("eax",0xff);                   #  6
+       &and    ("ebx",0xff);                   # 12
        &movz   ("eax",&BP(-128,$tbl,"eax",1)); #  6
+       &or     ("ecx",$acc);                   #  7
        &shl    ("eax",16);                     #  6
+       &movz   ($acc,&BP(-128,$tbl,$key,1));   # 13
        &or     ("edx","eax");                  #  6
-       &movz   ($acc,&HB("ebx"));              # 13
-       &movz   ($acc,&BP(-128,$tbl,$acc,1));   # 13
        &shl    ($acc,8);                       # 13
-       &or     ("ecx",$acc);                   # 13
-       &movd   ("mm4","ecx");                  # t[2] collected
-       &and    ("ebx",0xff);                   # 12
        &movz   ("ebx",&BP(-128,$tbl,"ebx",1)); # 12
+       &or     ("ecx",$acc);                   # 13
        &or     ("edx","ebx");                  # 12
+       &mov    ($key,$__key);
+       &movd   ("mm4","ecx");                  # t[2] collected
        &movd   ("mm5","edx");                  # t[3] collected
 
        &punpckldq      ("mm4","mm5");          # t[2,3] collected
@@ -1222,7 +1227,7 @@ sub enclast()
 ######################################################################
 
 sub deccompact()
-{ my $Fn = mov;
+{ my $Fn = \&mov;
   while ($#_>5) { pop(@_); $Fn=sub{}; }
   my ($i,$td,@s)=@_;
   my $tmp = $key;
@@ -1270,30 +1275,30 @@ sub dectransform()
   my $tp4 = @s[($i+3)%4]; $tp4 = @s[3] if ($i==1);
   my $tp8 = $tbl;
 
-       &mov    ($acc,$s[$i]);
-       &and    ($acc,0x80808080);
-       &mov    ($tmp,$acc);
+       &mov    ($tmp,0x80808080);
+       &and    ($tmp,$s[$i]);
+       &mov    ($acc,$tmp);
        &shr    ($tmp,7);
        &lea    ($tp2,&DWP(0,$s[$i],$s[$i]));
        &sub    ($acc,$tmp);
        &and    ($tp2,0xfefefefe);
        &and    ($acc,0x1b1b1b1b);
-       &xor    ($acc,$tp2);
-       &mov    ($tp2,$acc);
+       &xor    ($tp2,$acc);
+       &mov    ($tmp,0x80808080);
 
-       &and    ($acc,0x80808080);
-       &mov    ($tmp,$acc);
+       &and    ($tmp,$tp2);
+       &mov    ($acc,$tmp);
        &shr    ($tmp,7);
        &lea    ($tp4,&DWP(0,$tp2,$tp2));
        &sub    ($acc,$tmp);
        &and    ($tp4,0xfefefefe);
        &and    ($acc,0x1b1b1b1b);
         &xor   ($tp2,$s[$i]);  # tp2^tp1
-       &xor    ($acc,$tp4);
-       &mov    ($tp4,$acc);
+       &xor    ($tp4,$acc);
+       &mov    ($tmp,0x80808080);
 
-       &and    ($acc,0x80808080);
-       &mov    ($tmp,$acc);
+       &and    ($tmp,$tp4);
+       &mov    ($acc,$tmp);
        &shr    ($tmp,7);
        &lea    ($tp8,&DWP(0,$tp4,$tp4));
        &sub    ($acc,$tmp);
@@ -1305,13 +1310,13 @@ sub dectransform()
 
        &xor    ($s[$i],$tp2);
        &xor    ($tp2,$tp8);
-       &rotl   ($tp2,24);
        &xor    ($s[$i],$tp4);
        &xor    ($tp4,$tp8);
-       &rotl   ($tp4,16);
+       &rotl   ($tp2,24);
        &xor    ($s[$i],$tp8);  # ^= tp8^(tp4^tp1)^(tp2^tp1)
-       &rotl   ($tp8,8);
+       &rotl   ($tp4,16);
        &xor    ($s[$i],$tp2);  # ^= ROTATE(tp8^tp2^tp1,24)
+       &rotl   ($tp8,8);
        &xor    ($s[$i],$tp4);  # ^= ROTATE(tp8^tp4^tp1,16)
         &mov   ($s[0],$__s0)                   if($i==2); #prefetch $s0
         &mov   ($s[1],$__s1)                   if($i==3); #prefetch $s1
@@ -1389,85 +1394,87 @@ sub dectransform()
 sub sse_deccompact()
 {
        &pshufw ("mm1","mm0",0x0c);             #  7, 6, 1, 0
+       &pshufw ("mm5","mm4",0x09);             # 13,12,11,10
        &movd   ("eax","mm1");                  #  7, 6, 1, 0
+       &movd   ("ebx","mm5");                  # 13,12,11,10
+       &mov    ($__key,$key);
 
-       &pshufw ("mm5","mm4",0x09);             # 13,12,11,10
        &movz   ($acc,&LB("eax"));              #  0
-       &movz   ("ecx",&BP(-128,$tbl,$acc,1));  #  0
-       &movd   ("ebx","mm5");                  # 13,12,11,10
        &movz   ("edx",&HB("eax"));             #  1
+       &pshufw ("mm2","mm0",0x06);             #  3, 2, 5, 4
+       &movz   ("ecx",&BP(-128,$tbl,$acc,1));  #  0
+       &movz   ($key,&LB("ebx"));              # 10
        &movz   ("edx",&BP(-128,$tbl,"edx",1)); #  1
+       &shr    ("eax",16);                     #  7, 6
        &shl    ("edx",8);                      #  1
 
-       &pshufw ("mm2","mm0",0x06);             #  3, 2, 5, 4
-       &movz   ($acc,&LB("ebx"));              # 10
-       &movz   ($acc,&BP(-128,$tbl,$acc,1));   # 10
+       &movz   ($acc,&BP(-128,$tbl,$key,1));   # 10
+       &movz   ($key,&HB("ebx"));              # 11
        &shl    ($acc,16);                      # 10
+       &pshufw ("mm6","mm4",0x03);             # 9, 8,15,14
        &or     ("ecx",$acc);                   # 10
-       &shr    ("eax",16);                     #  7, 6
-       &movz   ($acc,&HB("ebx"));              # 11
-       &movz   ($acc,&BP(-128,$tbl,$acc,1));   # 11
+       &movz   ($acc,&BP(-128,$tbl,$key,1));   # 11
+       &movz   ($key,&HB("eax"));              #  7
        &shl    ($acc,24);                      # 11
-       &or     ("edx",$acc);                   # 11
        &shr    ("ebx",16);                     # 13,12
+       &or     ("edx",$acc);                   # 11
 
-       &pshufw ("mm6","mm4",0x03);             # 9, 8,15,14
-       &movz   ($acc,&HB("eax"));              #  7
-       &movz   ($acc,&BP(-128,$tbl,$acc,1));   #  7
+       &movz   ($acc,&BP(-128,$tbl,$key,1));   #  7
+       &movz   ($key,&HB("ebx"));              # 13
        &shl    ($acc,24);                      #  7
        &or     ("ecx",$acc);                   #  7
-       &movz   ($acc,&HB("ebx"));              # 13
-       &movz   ($acc,&BP(-128,$tbl,$acc,1));   # 13
+       &movz   ($acc,&BP(-128,$tbl,$key,1));   # 13
+       &movz   ($key,&LB("eax"));              #  6
        &shl    ($acc,8);                       # 13
+       &movd   ("eax","mm2");                  #  3, 2, 5, 4
        &or     ("ecx",$acc);                   # 13
-       &movd   ("mm0","ecx");                  # t[0] collected
 
-       &movz   ($acc,&LB("eax"));              #  6
-       &movd   ("eax","mm2");                  #  3, 2, 5, 4
-       &movz   ("ecx",&BP(-128,$tbl,$acc,1));  #  6
-       &shl    ("ecx",16);                     #  6
-       &movz   ($acc,&LB("ebx"));              # 12
+       &movz   ($acc,&BP(-128,$tbl,$key,1));   #  6
+       &movz   ($key,&LB("ebx"));              # 12
+       &shl    ($acc,16);                      #  6
        &movd   ("ebx","mm6");                  #  9, 8,15,14
-       &movz   ($acc,&BP(-128,$tbl,$acc,1));   # 12
+       &movd   ("mm0","ecx");                  # t[0] collected
+       &movz   ("ecx",&BP(-128,$tbl,$key,1));  # 12
+       &movz   ($key,&LB("eax"));              #  4
        &or     ("ecx",$acc);                   # 12
 
-       &movz   ($acc,&LB("eax"));              #  4
-       &movz   ($acc,&BP(-128,$tbl,$acc,1));   #  4
+       &movz   ($acc,&BP(-128,$tbl,$key,1));   #  4
+       &movz   ($key,&LB("ebx"));              # 14
        &or     ("edx",$acc);                   #  4
-       &movz   ($acc,&LB("ebx"));              # 14
-       &movz   ($acc,&BP(-128,$tbl,$acc,1));   # 14
+       &movz   ($acc,&BP(-128,$tbl,$key,1));   # 14
+       &movz   ($key,&HB("eax"));              #  5
        &shl    ($acc,16);                      # 14
+       &shr    ("eax",16);                     #  3, 2
        &or     ("edx",$acc);                   # 14
-       &movd   ("mm1","edx");                  # t[1] collected
 
-       &movz   ($acc,&HB("eax"));              #  5
-       &movz   ("edx",&BP(-128,$tbl,$acc,1));  #  5
-       &shl    ("edx",8);                      #  5
-       &movz   ($acc,&HB("ebx"));              # 15
-       &shr    ("eax",16);                     #  3, 2
-       &movz   ($acc,&BP(-128,$tbl,$acc,1));   # 15
-       &shl    ($acc,24);                      # 15
-       &or     ("edx",$acc);                   # 15
+       &movz   ($acc,&BP(-128,$tbl,$key,1));   #  5
+       &movz   ($key,&HB("ebx"));              # 15
        &shr    ("ebx",16);                     #  9, 8
+       &shl    ($acc,8);                       #  5
+       &movd   ("mm1","edx");                  # t[1] collected
+       &movz   ("edx",&BP(-128,$tbl,$key,1));  # 15
+       &movz   ($key,&HB("ebx"));              #  9
+       &shl    ("edx",24);                     # 15
+       &and    ("ebx",0xff);                   #  8
+       &or     ("edx",$acc);                   # 15
 
        &punpckldq      ("mm0","mm1");          # t[0,1] collected
 
-       &movz   ($acc,&HB("ebx"));              #  9
-       &movz   ($acc,&BP(-128,$tbl,$acc,1));   #  9
+       &movz   ($acc,&BP(-128,$tbl,$key,1));   #  9
+       &movz   ($key,&LB("eax"));              #  2
        &shl    ($acc,8);                       #  9
-       &or     ("ecx",$acc);                   #  9
-       &and    ("ebx",0xff);                   #  8
+       &movz   ("eax",&HB("eax"));             #  3
        &movz   ("ebx",&BP(-128,$tbl,"ebx",1)); #  8
+       &or     ("ecx",$acc);                   #  9
+       &movz   ($acc,&BP(-128,$tbl,$key,1));   #  2
        &or     ("edx","ebx");                  #  8
-       &movz   ($acc,&LB("eax"));              #  2
-       &movz   ($acc,&BP(-128,$tbl,$acc,1));   #  2
        &shl    ($acc,16);                      #  2
-       &or     ("edx",$acc);                   #  2
-       &movd   ("mm4","edx");                  # t[2] collected
-       &movz   ("eax",&HB("eax"));             #  3
        &movz   ("eax",&BP(-128,$tbl,"eax",1)); #  3
+       &or     ("edx",$acc);                   #  2
        &shl    ("eax",24);                     #  3
        &or     ("ecx","eax");                  #  3
+       &mov    ($key,$__key);
+       &movd   ("mm4","edx");                  # t[2] collected
        &movd   ("mm5","ecx");                  # t[3] collected
 
        &punpckldq      ("mm4","mm5");          # t[2,3] collected
@@ -2181,8 +2188,8 @@ my $mark=&DWP(76+240,"esp");      # copy of aes_key->rounds
        &mov    ("ecx",240/4);
        &xor    ("eax","eax");
        &align  (4);
-       &data_word(0xABF3F689); # rep stosd
-       &set_label("skip_ezero")
+       &data_word(0xABF3F689);         # rep stosd
+       &set_label("skip_ezero");
        &mov    ("esp",$_esp);
        &popf   ();
     &set_label("drop_out");
@@ -2301,8 +2308,8 @@ my $mark=&DWP(76+240,"esp");      # copy of aes_key->rounds
        &mov    ("ecx",240/4);
        &xor    ("eax","eax");
        &align  (4);
-       &data_word(0xABF3F689); # rep stosd
-       &set_label("skip_dzero")
+       &data_word(0xABF3F689);         # rep stosd
+       &set_label("skip_dzero");
        &mov    ("esp",$_esp);
        &popf   ();
        &function_end_A();
@@ -2865,32 +2872,32 @@ sub deckey()
 { my ($i,$key,$tp1,$tp2,$tp4,$tp8) = @_;
   my $tmp = $tbl;
 
-       &mov    ($acc,$tp1);
-       &and    ($acc,0x80808080);
-       &mov    ($tmp,$acc);
-       &shr    ($tmp,7);
+       &mov    ($tmp,0x80808080);
+       &and    ($tmp,$tp1);
        &lea    ($tp2,&DWP(0,$tp1,$tp1));
+       &mov    ($acc,$tmp);
+       &shr    ($tmp,7);
        &sub    ($acc,$tmp);
        &and    ($tp2,0xfefefefe);
        &and    ($acc,0x1b1b1b1b);
-       &xor    ($acc,$tp2);
-       &mov    ($tp2,$acc);
+       &xor    ($tp2,$acc);
+       &mov    ($tmp,0x80808080);
 
-       &and    ($acc,0x80808080);
-       &mov    ($tmp,$acc);
-       &shr    ($tmp,7);
+       &and    ($tmp,$tp2);
        &lea    ($tp4,&DWP(0,$tp2,$tp2));
+       &mov    ($acc,$tmp);
+       &shr    ($tmp,7);
        &sub    ($acc,$tmp);
        &and    ($tp4,0xfefefefe);
        &and    ($acc,0x1b1b1b1b);
         &xor   ($tp2,$tp1);    # tp2^tp1
-       &xor    ($acc,$tp4);
-       &mov    ($tp4,$acc);
+       &xor    ($tp4,$acc);
+       &mov    ($tmp,0x80808080);
 
-       &and    ($acc,0x80808080);
-       &mov    ($tmp,$acc);
-       &shr    ($tmp,7);
+       &and    ($tmp,$tp4);
        &lea    ($tp8,&DWP(0,$tp4,$tp4));
+       &mov    ($acc,$tmp);
+       &shr    ($tmp,7);
         &xor   ($tp4,$tp1);    # tp4^tp1
        &sub    ($acc,$tmp);
        &and    ($tp8,0xfefefefe);
index 86b86c4..4f89170 100644 (file)
@@ -1,7 +1,7 @@
 #!/usr/bin/env perl
 
 # ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
 # details see http://www.openssl.org/~appro/cryptogams/.
@@ -51,9 +51,23 @@ $key="r11";
 $rounds="r12";
 
 $code=<<___;
-#include "arm_arch.h"
+#ifndef __KERNEL__
+# include "arm_arch.h"
+#else
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+#endif
+
 .text
+#if __ARM_ARCH__<7
+.code  32
+#else
+.syntax        unified
+# ifdef __thumb2__
+.thumb
+# else
 .code  32
+# endif
+#endif
 
 .type  AES_Te,%object
 .align 5
@@ -167,7 +181,11 @@ AES_Te:
 .type   AES_encrypt,%function
 .align 5
 AES_encrypt:
+#if __ARM_ARCH__<7
        sub     r3,pc,#8                @ AES_encrypt
+#else
+       adr     r3,AES_encrypt
+#endif
        stmdb   sp!,{r1,r4-r12,lr}
        mov     $rounds,r0              @ inp
        mov     $key,r2
@@ -409,11 +427,21 @@ _armv4_AES_encrypt:
 .align 5
 private_AES_set_encrypt_key:
 _armv4_AES_set_encrypt_key:
+#if __ARM_ARCH__<7
        sub     r3,pc,#8                @ AES_set_encrypt_key
+#else
+       adr     r3,private_AES_set_encrypt_key
+#endif
        teq     r0,#0
+#if __ARM_ARCH__>=7
+       itt     eq                      @ Thumb2 thing, sanity check in ARM
+#endif
        moveq   r0,#-1
        beq     .Labrt
        teq     r2,#0
+#if __ARM_ARCH__>=7
+       itt     eq                      @ Thumb2 thing, sanity check in ARM
+#endif
        moveq   r0,#-1
        beq     .Labrt
 
@@ -422,6 +450,9 @@ _armv4_AES_set_encrypt_key:
        teq     r1,#192
        beq     .Lok
        teq     r1,#256
+#if __ARM_ARCH__>=7
+       itt     ne                      @ Thumb2 thing, sanity check in ARM
+#endif
        movne   r0,#-1
        bne     .Labrt
 
@@ -576,6 +607,9 @@ _armv4_AES_set_encrypt_key:
        str     $s2,[$key,#-16]
        subs    $rounds,$rounds,#1
        str     $s3,[$key,#-12]
+#if __ARM_ARCH__>=7
+       itt     eq                              @ Thumb2 thing, sanity check in ARM
+#endif
        subeq   r2,$key,#216
        beq     .Ldone
 
@@ -645,6 +679,9 @@ _armv4_AES_set_encrypt_key:
        str     $s2,[$key,#-24]
        subs    $rounds,$rounds,#1
        str     $s3,[$key,#-20]
+#if __ARM_ARCH__>=7
+       itt     eq                              @ Thumb2 thing, sanity check in ARM
+#endif
        subeq   r2,$key,#256
        beq     .Ldone
 
@@ -674,11 +711,17 @@ _armv4_AES_set_encrypt_key:
        str     $i3,[$key,#-4]
        b       .L256_loop
 
+.align 2
 .Ldone:        mov     r0,#0
        ldmia   sp!,{r4-r12,lr}
-.Labrt:        tst     lr,#1
+.Labrt:
+#if __ARM_ARCH__>=5
+       ret                             @ bx lr
+#else
+       tst     lr,#1
        moveq   pc,lr                   @ be binary compatible with V4, yet
        bx      lr                      @ interoperable with Thumb ISA:-)
+#endif
 .size  private_AES_set_encrypt_key,.-private_AES_set_encrypt_key
 
 .global private_AES_set_decrypt_key
@@ -688,34 +731,57 @@ private_AES_set_decrypt_key:
        str     lr,[sp,#-4]!            @ push lr
        bl      _armv4_AES_set_encrypt_key
        teq     r0,#0
-       ldrne   lr,[sp],#4              @ pop lr
+       ldr     lr,[sp],#4              @ pop lr
        bne     .Labrt
 
-       stmdb   sp!,{r4-r12}
+       mov     r0,r2                   @ AES_set_encrypt_key preserves r2,
+       mov     r1,r2                   @ which is AES_KEY *key
+       b       _armv4_AES_set_enc2dec_key
+.size  private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
 
-       ldr     $rounds,[r2,#240]       @ AES_set_encrypt_key preserves r2,
-       mov     $key,r2                 @ which is AES_KEY *key
-       mov     $i1,r2
-       add     $i2,r2,$rounds,lsl#4
+@ void AES_set_enc2dec_key(const AES_KEY *inp,AES_KEY *out)
+.global        AES_set_enc2dec_key
+.type  AES_set_enc2dec_key,%function
+.align 5
+AES_set_enc2dec_key:
+_armv4_AES_set_enc2dec_key:
+       stmdb   sp!,{r4-r12,lr}
+
+       ldr     $rounds,[r0,#240]
+       mov     $i1,r0                  @ input
+       add     $i2,r0,$rounds,lsl#4
+       mov     $key,r1                 @ ouput
+       add     $tbl,r1,$rounds,lsl#4
+       str     $rounds,[r1,#240]
+
+.Linv: ldr     $s0,[$i1],#16
+       ldr     $s1,[$i1,#-12]
+       ldr     $s2,[$i1,#-8]
+       ldr     $s3,[$i1,#-4]
+       ldr     $t1,[$i2],#-16
+       ldr     $t2,[$i2,#16+4]
+       ldr     $t3,[$i2,#16+8]
+       ldr     $i3,[$i2,#16+12]
+       str     $s0,[$tbl],#-16
+       str     $s1,[$tbl,#16+4]
+       str     $s2,[$tbl,#16+8]
+       str     $s3,[$tbl,#16+12]
+       str     $t1,[$key],#16
+       str     $t2,[$key,#-12]
+       str     $t3,[$key,#-8]
+       str     $i3,[$key,#-4]
+       teq     $i1,$i2
+       bne     .Linv
 
-.Linv: ldr     $s0,[$i1]
+       ldr     $s0,[$i1]
        ldr     $s1,[$i1,#4]
        ldr     $s2,[$i1,#8]
        ldr     $s3,[$i1,#12]
-       ldr     $t1,[$i2]
-       ldr     $t2,[$i2,#4]
-       ldr     $t3,[$i2,#8]
-       ldr     $i3,[$i2,#12]
-       str     $s0,[$i2],#-16
-       str     $s1,[$i2,#16+4]
-       str     $s2,[$i2,#16+8]
-       str     $s3,[$i2,#16+12]
-       str     $t1,[$i1],#16
-       str     $t2,[$i1,#-12]
-       str     $t3,[$i1,#-8]
-       str     $i3,[$i1,#-4]
-       teq     $i1,$i2
-       bne     .Linv
+       str     $s0,[$key]
+       str     $s1,[$key,#4]
+       str     $s2,[$key,#8]
+       str     $s3,[$key,#12]
+       sub     $key,$key,$rounds,lsl#3
 ___
 $mask80=$i1;
 $mask1b=$i2;
@@ -773,7 +839,7 @@ $code.=<<___;
        moveq   pc,lr                   @ be binary compatible with V4, yet
        bx      lr                      @ interoperable with Thumb ISA:-)
 #endif
-.size  private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
+.size  AES_set_enc2dec_key,.-AES_set_enc2dec_key
 
 .type  AES_Td,%object
 .align 5
@@ -883,7 +949,11 @@ AES_Td:
 .type   AES_decrypt,%function
 .align 5
 AES_decrypt:
+#if __ARM_ARCH__<7
        sub     r3,pc,#8                @ AES_decrypt
+#else
+       adr     r3,AES_decrypt
+#endif
        stmdb   sp!,{r1,r4-r12,lr}
        mov     $rounds,r0              @ inp
        mov     $key,r2
@@ -1080,8 +1150,9 @@ _armv4_AES_decrypt:
        ldrb    $t3,[$tbl,$i3]          @ Td4[s0>>0]
        and     $i3,lr,$s1,lsr#8
 
+       add     $s1,$tbl,$s1,lsr#24
        ldrb    $i1,[$tbl,$i1]          @ Td4[s1>>0]
-       ldrb    $s1,[$tbl,$s1,lsr#24]   @ Td4[s1>>24]
+       ldrb    $s1,[$s1]               @ Td4[s1>>24]
        ldrb    $i2,[$tbl,$i2]          @ Td4[s1>>16]
        eor     $s0,$i1,$s0,lsl#24
        ldrb    $i3,[$tbl,$i3]          @ Td4[s1>>8]
@@ -1094,7 +1165,8 @@ _armv4_AES_decrypt:
        ldrb    $i2,[$tbl,$i2]          @ Td4[s2>>0]
        and     $i3,lr,$s2,lsr#16
 
-       ldrb    $s2,[$tbl,$s2,lsr#24]   @ Td4[s2>>24]
+       add     $s2,$tbl,$s2,lsr#24
+       ldrb    $s2,[$s2]               @ Td4[s2>>24]
        eor     $s0,$s0,$i1,lsl#8
        ldrb    $i3,[$tbl,$i3]          @ Td4[s2>>16]
        eor     $s1,$i2,$s1,lsl#16
@@ -1106,8 +1178,9 @@ _armv4_AES_decrypt:
        ldrb    $i2,[$tbl,$i2]          @ Td4[s3>>8]
        and     $i3,lr,$s3              @ i2
 
+       add     $s3,$tbl,$s3,lsr#24
        ldrb    $i3,[$tbl,$i3]          @ Td4[s3>>0]
-       ldrb    $s3,[$tbl,$s3,lsr#24]   @ Td4[s3>>24]
+       ldrb    $s3,[$s3]               @ Td4[s3>>24]
        eor     $s0,$s0,$i1,lsl#16
        ldr     $i1,[$key,#0]
        eor     $s1,$s1,$i2,lsl#8
@@ -1130,5 +1203,15 @@ _armv4_AES_decrypt:
 ___
 
 $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;   # make it possible to compile with -march=armv4
+$code =~ s/\bret\b/bx\tlr/gm;
+
+open SELF,$0;
+while(<SELF>) {
+       next if (/^#!/);
+       last if (!s/^#/@/ and !/^$/);
+       print;
+}
+close SELF;
+
 print $code;
 close STDOUT;  # enforce flush
index 537c8d3..4de3ee2 100644 (file)
 # thing about this module is its endian neutrality, which means that
 # it processes data without ever changing byte order...
 
+# September 2012
+#
+# Add MIPS32R2 (~10% less instructions) and SmartMIPS ASE (further
+# ~25% less instructions) code. Note that there is no run-time switch,
+# instead, code path is chosen upon pre-process time, pass -mips32r2
+# or/and -msmartmips.
+
 ######################################################################
 # There is a number of MIPS ABI in use, O32 and N32/64 are most
 # widely used. Then there is a new contender: NUBI. It appears that if
 # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
 # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
 #
-$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64
+$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
 
 if ($flavour =~ /64|n32/i) {
        $PTR_ADD="dadd";        # incidentally works even on n32
        $PTR_SUB="dsub";        # incidentally works even on n32
+       $PTR_INS="dins";
        $REG_S="sd";
        $REG_L="ld";
        $PTR_SLL="dsll";        # incidentally works even on n32
@@ -59,6 +67,7 @@ if ($flavour =~ /64|n32/i) {
 } else {
        $PTR_ADD="add";
        $PTR_SUB="sub";
+       $PTR_INS="ins";
        $REG_S="sw";
        $REG_L="lw";
        $PTR_SLL="sll";
@@ -89,7 +98,11 @@ $code.=<<___;
 # include <openssl/fipssyms.h>
 #endif
 
-#if !defined(__vxworks) || defined(__pic__)
+#if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2)
+#define _MIPS_ARCH_MIPS32R2
+#endif
+
+#if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__))
 .option        pic2
 #endif
 .set   noat
@@ -125,6 +138,89 @@ _mips_AES_encrypt:
        xor     $s3,$t3
 
        sub     $cnt,1
+#if defined(__mips_smartmips)
+       ext     $i0,$s1,16,8
+.Loop_enc:
+       ext     $i1,$s2,16,8
+       ext     $i2,$s3,16,8
+       ext     $i3,$s0,16,8
+       lwxs    $t0,$i0($Tbl)           # Te1[s1>>16]
+       ext     $i0,$s2,8,8
+       lwxs    $t1,$i1($Tbl)           # Te1[s2>>16]
+       ext     $i1,$s3,8,8
+       lwxs    $t2,$i2($Tbl)           # Te1[s3>>16]
+       ext     $i2,$s0,8,8
+       lwxs    $t3,$i3($Tbl)           # Te1[s0>>16]
+       ext     $i3,$s1,8,8
+
+       lwxs    $t4,$i0($Tbl)           # Te2[s2>>8]
+       ext     $i0,$s3,0,8
+       lwxs    $t5,$i1($Tbl)           # Te2[s3>>8]
+       ext     $i1,$s0,0,8
+       lwxs    $t6,$i2($Tbl)           # Te2[s0>>8]
+       ext     $i2,$s1,0,8
+       lwxs    $t7,$i3($Tbl)           # Te2[s1>>8]
+       ext     $i3,$s2,0,8
+
+       lwxs    $t8,$i0($Tbl)           # Te3[s3]
+       ext     $i0,$s0,24,8
+       lwxs    $t9,$i1($Tbl)           # Te3[s0]
+       ext     $i1,$s1,24,8
+       lwxs    $t10,$i2($Tbl)          # Te3[s1]
+       ext     $i2,$s2,24,8
+       lwxs    $t11,$i3($Tbl)          # Te3[s2]
+       ext     $i3,$s3,24,8
+
+       rotr    $t0,$t0,8
+       rotr    $t1,$t1,8
+       rotr    $t2,$t2,8
+       rotr    $t3,$t3,8
+
+       rotr    $t4,$t4,16
+       rotr    $t5,$t5,16
+       rotr    $t6,$t6,16
+       rotr    $t7,$t7,16
+
+       xor     $t0,$t4
+       lwxs    $t4,$i0($Tbl)           # Te0[s0>>24]
+       xor     $t1,$t5
+       lwxs    $t5,$i1($Tbl)           # Te0[s1>>24]
+       xor     $t2,$t6
+       lwxs    $t6,$i2($Tbl)           # Te0[s2>>24]
+       xor     $t3,$t7
+       lwxs    $t7,$i3($Tbl)           # Te0[s3>>24]
+
+       rotr    $t8,$t8,24
+       lw      $s0,0($key0)
+       rotr    $t9,$t9,24
+       lw      $s1,4($key0)
+       rotr    $t10,$t10,24
+       lw      $s2,8($key0)
+       rotr    $t11,$t11,24
+       lw      $s3,12($key0)
+
+       xor     $t0,$t8
+       xor     $t1,$t9
+       xor     $t2,$t10
+       xor     $t3,$t11
+
+       xor     $t0,$t4
+       xor     $t1,$t5
+       xor     $t2,$t6
+       xor     $t3,$t7
+
+       sub     $cnt,1
+       $PTR_ADD $key0,16
+       xor     $s0,$t0
+       xor     $s1,$t1
+       xor     $s2,$t2
+       xor     $s3,$t3
+       .set    noreorder
+       bnez    $cnt,.Loop_enc
+       ext     $i0,$s1,16,8
+
+       _xtr    $i0,$s1,16-2
+#else
        _xtr    $i0,$s1,16-2
 .Loop_enc:
        _xtr    $i1,$s2,16-2
@@ -138,19 +234,29 @@ _mips_AES_encrypt:
        $PTR_ADD $i1,$Tbl
        $PTR_ADD $i2,$Tbl
        $PTR_ADD $i3,$Tbl
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+       lw      $t0,0($i0)              # Te1[s1>>16]
+       _xtr    $i0,$s2,8-2
+       lw      $t1,0($i1)              # Te1[s2>>16]
+       _xtr    $i1,$s3,8-2
+       lw      $t2,0($i2)              # Te1[s3>>16]
+       _xtr    $i2,$s0,8-2
+       lw      $t3,0($i3)              # Te1[s0>>16]
+       _xtr    $i3,$s1,8-2
+#else
        lwl     $t0,3($i0)              # Te1[s1>>16]
        lwl     $t1,3($i1)              # Te1[s2>>16]
        lwl     $t2,3($i2)              # Te1[s3>>16]
        lwl     $t3,3($i3)              # Te1[s0>>16]
        lwr     $t0,2($i0)              # Te1[s1>>16]
-       lwr     $t1,2($i1)              # Te1[s2>>16]
-       lwr     $t2,2($i2)              # Te1[s3>>16]
-       lwr     $t3,2($i3)              # Te1[s0>>16]
-
        _xtr    $i0,$s2,8-2
+       lwr     $t1,2($i1)              # Te1[s2>>16]
        _xtr    $i1,$s3,8-2
+       lwr     $t2,2($i2)              # Te1[s3>>16]
        _xtr    $i2,$s0,8-2
+       lwr     $t3,2($i3)              # Te1[s0>>16]
        _xtr    $i3,$s1,8-2
+#endif
        and     $i0,0x3fc
        and     $i1,0x3fc
        and     $i2,0x3fc
@@ -159,19 +265,88 @@ _mips_AES_encrypt:
        $PTR_ADD $i1,$Tbl
        $PTR_ADD $i2,$Tbl
        $PTR_ADD $i3,$Tbl
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+       rotr    $t0,$t0,8
+       rotr    $t1,$t1,8
+       rotr    $t2,$t2,8
+       rotr    $t3,$t3,8
+# if defined(_MIPSEL)
+       lw      $t4,0($i0)              # Te2[s2>>8]
+       _xtr    $i0,$s3,0-2
+       lw      $t5,0($i1)              # Te2[s3>>8]
+       _xtr    $i1,$s0,0-2
+       lw      $t6,0($i2)              # Te2[s0>>8]
+       _xtr    $i2,$s1,0-2
+       lw      $t7,0($i3)              # Te2[s1>>8]
+       _xtr    $i3,$s2,0-2
+
+       and     $i0,0x3fc
+       and     $i1,0x3fc
+       and     $i2,0x3fc
+       and     $i3,0x3fc
+       $PTR_ADD $i0,$Tbl
+       $PTR_ADD $i1,$Tbl
+       $PTR_ADD $i2,$Tbl
+       $PTR_ADD $i3,$Tbl
+       lw      $t8,0($i0)              # Te3[s3]
+       $PTR_INS $i0,$s0,2,8
+       lw      $t9,0($i1)              # Te3[s0]
+       $PTR_INS $i1,$s1,2,8
+       lw      $t10,0($i2)             # Te3[s1]
+       $PTR_INS $i2,$s2,2,8
+       lw      $t11,0($i3)             # Te3[s2]
+       $PTR_INS $i3,$s3,2,8
+# else
+       lw      $t4,0($i0)              # Te2[s2>>8]
+       $PTR_INS $i0,$s3,2,8
+       lw      $t5,0($i1)              # Te2[s3>>8]
+       $PTR_INS $i1,$s0,2,8
+       lw      $t6,0($i2)              # Te2[s0>>8]
+       $PTR_INS $i2,$s1,2,8
+       lw      $t7,0($i3)              # Te2[s1>>8]
+       $PTR_INS $i3,$s2,2,8
+
+       lw      $t8,0($i0)              # Te3[s3]
+       _xtr    $i0,$s0,24-2
+       lw      $t9,0($i1)              # Te3[s0]
+       _xtr    $i1,$s1,24-2
+       lw      $t10,0($i2)             # Te3[s1]
+       _xtr    $i2,$s2,24-2
+       lw      $t11,0($i3)             # Te3[s2]
+       _xtr    $i3,$s3,24-2
+
+       and     $i0,0x3fc
+       and     $i1,0x3fc
+       and     $i2,0x3fc
+       and     $i3,0x3fc
+       $PTR_ADD $i0,$Tbl
+       $PTR_ADD $i1,$Tbl
+       $PTR_ADD $i2,$Tbl
+       $PTR_ADD $i3,$Tbl
+# endif
+       rotr    $t4,$t4,16
+       rotr    $t5,$t5,16
+       rotr    $t6,$t6,16
+       rotr    $t7,$t7,16
+
+       rotr    $t8,$t8,24
+       rotr    $t9,$t9,24
+       rotr    $t10,$t10,24
+       rotr    $t11,$t11,24
+#else
        lwl     $t4,2($i0)              # Te2[s2>>8]
        lwl     $t5,2($i1)              # Te2[s3>>8]
        lwl     $t6,2($i2)              # Te2[s0>>8]
        lwl     $t7,2($i3)              # Te2[s1>>8]
        lwr     $t4,1($i0)              # Te2[s2>>8]
-       lwr     $t5,1($i1)              # Te2[s3>>8]
-       lwr     $t6,1($i2)              # Te2[s0>>8]
-       lwr     $t7,1($i3)              # Te2[s1>>8]
-
        _xtr    $i0,$s3,0-2
+       lwr     $t5,1($i1)              # Te2[s3>>8]
        _xtr    $i1,$s0,0-2
+       lwr     $t6,1($i2)              # Te2[s0>>8]
        _xtr    $i2,$s1,0-2
+       lwr     $t7,1($i3)              # Te2[s1>>8]
        _xtr    $i3,$s2,0-2
+
        and     $i0,0x3fc
        and     $i1,0x3fc
        and     $i2,0x3fc
@@ -185,14 +360,14 @@ _mips_AES_encrypt:
        lwl     $t10,1($i2)             # Te3[s1]
        lwl     $t11,1($i3)             # Te3[s2]
        lwr     $t8,0($i0)              # Te3[s3]
-       lwr     $t9,0($i1)              # Te3[s0]
-       lwr     $t10,0($i2)             # Te3[s1]
-       lwr     $t11,0($i3)             # Te3[s2]
-
        _xtr    $i0,$s0,24-2
+       lwr     $t9,0($i1)              # Te3[s0]
        _xtr    $i1,$s1,24-2
+       lwr     $t10,0($i2)             # Te3[s1]
        _xtr    $i2,$s2,24-2
+       lwr     $t11,0($i3)             # Te3[s2]
        _xtr    $i3,$s3,24-2
+
        and     $i0,0x3fc
        and     $i1,0x3fc
        and     $i2,0x3fc
@@ -201,24 +376,24 @@ _mips_AES_encrypt:
        $PTR_ADD $i1,$Tbl
        $PTR_ADD $i2,$Tbl
        $PTR_ADD $i3,$Tbl
+#endif
        xor     $t0,$t4
-       xor     $t1,$t5
-       xor     $t2,$t6
-       xor     $t3,$t7
        lw      $t4,0($i0)              # Te0[s0>>24]
+       xor     $t1,$t5
        lw      $t5,0($i1)              # Te0[s1>>24]
+       xor     $t2,$t6
        lw      $t6,0($i2)              # Te0[s2>>24]
+       xor     $t3,$t7
        lw      $t7,0($i3)              # Te0[s3>>24]
 
-       lw      $s0,0($key0)
-       lw      $s1,4($key0)
-       lw      $s2,8($key0)
-       lw      $s3,12($key0)
-
        xor     $t0,$t8
+       lw      $s0,0($key0)
        xor     $t1,$t9
+       lw      $s1,4($key0)
        xor     $t2,$t10
+       lw      $s2,8($key0)
        xor     $t3,$t11
+       lw      $s3,12($key0)
 
        xor     $t0,$t4
        xor     $t1,$t5
@@ -234,6 +409,7 @@ _mips_AES_encrypt:
        .set    noreorder
        bnez    $cnt,.Loop_enc
        _xtr    $i0,$s1,16-2
+#endif
 
        .set    reorder
        _xtr    $i1,$s2,16-2
@@ -248,14 +424,14 @@ _mips_AES_encrypt:
        $PTR_ADD $i2,$Tbl
        $PTR_ADD $i3,$Tbl
        lbu     $t0,2($i0)              # Te4[s1>>16]
-       lbu     $t1,2($i1)              # Te4[s2>>16]
-       lbu     $t2,2($i2)              # Te4[s3>>16]
-       lbu     $t3,2($i3)              # Te4[s0>>16]
-
        _xtr    $i0,$s2,8-2
+       lbu     $t1,2($i1)              # Te4[s2>>16]
        _xtr    $i1,$s3,8-2
+       lbu     $t2,2($i2)              # Te4[s3>>16]
        _xtr    $i2,$s0,8-2
+       lbu     $t3,2($i3)              # Te4[s0>>16]
        _xtr    $i3,$s1,8-2
+
        and     $i0,0x3fc
        and     $i1,0x3fc
        and     $i2,0x3fc
@@ -264,15 +440,44 @@ _mips_AES_encrypt:
        $PTR_ADD $i1,$Tbl
        $PTR_ADD $i2,$Tbl
        $PTR_ADD $i3,$Tbl
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+# if defined(_MIPSEL)
        lbu     $t4,2($i0)              # Te4[s2>>8]
+       $PTR_INS $i0,$s0,2,8
        lbu     $t5,2($i1)              # Te4[s3>>8]
+       $PTR_INS $i1,$s1,2,8
        lbu     $t6,2($i2)              # Te4[s0>>8]
+       $PTR_INS $i2,$s2,2,8
        lbu     $t7,2($i3)              # Te4[s1>>8]
+       $PTR_INS $i3,$s3,2,8
 
+       lbu     $t8,2($i0)              # Te4[s0>>24]
+       _xtr    $i0,$s3,0-2
+       lbu     $t9,2($i1)              # Te4[s1>>24]
+       _xtr    $i1,$s0,0-2
+       lbu     $t10,2($i2)             # Te4[s2>>24]
+       _xtr    $i2,$s1,0-2
+       lbu     $t11,2($i3)             # Te4[s3>>24]
+       _xtr    $i3,$s2,0-2
+
+       and     $i0,0x3fc
+       and     $i1,0x3fc
+       and     $i2,0x3fc
+       and     $i3,0x3fc
+       $PTR_ADD $i0,$Tbl
+       $PTR_ADD $i1,$Tbl
+       $PTR_ADD $i2,$Tbl
+       $PTR_ADD $i3,$Tbl
+# else
+       lbu     $t4,2($i0)              # Te4[s2>>8]
        _xtr    $i0,$s0,24-2
+       lbu     $t5,2($i1)              # Te4[s3>>8]
        _xtr    $i1,$s1,24-2
+       lbu     $t6,2($i2)              # Te4[s0>>8]
        _xtr    $i2,$s2,24-2
+       lbu     $t7,2($i3)              # Te4[s1>>8]
        _xtr    $i3,$s3,24-2
+
        and     $i0,0x3fc
        and     $i1,0x3fc
        and     $i2,0x3fc
@@ -282,18 +487,76 @@ _mips_AES_encrypt:
        $PTR_ADD $i2,$Tbl
        $PTR_ADD $i3,$Tbl
        lbu     $t8,2($i0)              # Te4[s0>>24]
+       $PTR_INS $i0,$s3,2,8
        lbu     $t9,2($i1)              # Te4[s1>>24]
+       $PTR_INS $i1,$s0,2,8
        lbu     $t10,2($i2)             # Te4[s2>>24]
+       $PTR_INS $i2,$s1,2,8
        lbu     $t11,2($i3)             # Te4[s3>>24]
+       $PTR_INS $i3,$s2,2,8
+# endif
+       _ins    $t0,16
+       _ins    $t1,16
+       _ins    $t2,16
+       _ins    $t3,16
 
+       _ins2   $t0,$t4,8
+       lbu     $t4,2($i0)              # Te4[s3]
+       _ins2   $t1,$t5,8
+       lbu     $t5,2($i1)              # Te4[s0]
+       _ins2   $t2,$t6,8
+       lbu     $t6,2($i2)              # Te4[s1]
+       _ins2   $t3,$t7,8
+       lbu     $t7,2($i3)              # Te4[s2]
+
+       _ins2   $t0,$t8,24
+       lw      $s0,0($key0)
+       _ins2   $t1,$t9,24
+       lw      $s1,4($key0)
+       _ins2   $t2,$t10,24
+       lw      $s2,8($key0)
+       _ins2   $t3,$t11,24
+       lw      $s3,12($key0)
+
+       _ins2   $t0,$t4,0
+       _ins2   $t1,$t5,0
+       _ins2   $t2,$t6,0
+       _ins2   $t3,$t7,0
+#else
+       lbu     $t4,2($i0)              # Te4[s2>>8]
+       _xtr    $i0,$s0,24-2
+       lbu     $t5,2($i1)              # Te4[s3>>8]
+       _xtr    $i1,$s1,24-2
+       lbu     $t6,2($i2)              # Te4[s0>>8]
+       _xtr    $i2,$s2,24-2
+       lbu     $t7,2($i3)              # Te4[s1>>8]
+       _xtr    $i3,$s3,24-2
+
+       and     $i0,0x3fc
+       and     $i1,0x3fc
+       and     $i2,0x3fc
+       and     $i3,0x3fc
+       $PTR_ADD $i0,$Tbl
+       $PTR_ADD $i1,$Tbl
+       $PTR_ADD $i2,$Tbl
+       $PTR_ADD $i3,$Tbl
+       lbu     $t8,2($i0)              # Te4[s0>>24]
        _xtr    $i0,$s3,0-2
+       lbu     $t9,2($i1)              # Te4[s1>>24]
        _xtr    $i1,$s0,0-2
+       lbu     $t10,2($i2)             # Te4[s2>>24]
        _xtr    $i2,$s1,0-2
+       lbu     $t11,2($i3)             # Te4[s3>>24]
        _xtr    $i3,$s2,0-2
+
        and     $i0,0x3fc
        and     $i1,0x3fc
        and     $i2,0x3fc
        and     $i3,0x3fc
+       $PTR_ADD $i0,$Tbl
+       $PTR_ADD $i1,$Tbl
+       $PTR_ADD $i2,$Tbl
+       $PTR_ADD $i3,$Tbl
 
        _ins    $t0,16
        _ins    $t1,16
@@ -306,27 +569,21 @@ _mips_AES_encrypt:
        _ins    $t7,8
 
        xor     $t0,$t4
-       xor     $t1,$t5
-       xor     $t2,$t6
-       xor     $t3,$t7
-
-       $PTR_ADD $i0,$Tbl
-       $PTR_ADD $i1,$Tbl
-       $PTR_ADD $i2,$Tbl
-       $PTR_ADD $i3,$Tbl
        lbu     $t4,2($i0)              # Te4[s3]
+       xor     $t1,$t5
        lbu     $t5,2($i1)              # Te4[s0]
+       xor     $t2,$t6
        lbu     $t6,2($i2)              # Te4[s1]
+       xor     $t3,$t7
        lbu     $t7,2($i3)              # Te4[s2]
 
        _ins    $t8,24
-       _ins    $t9,24
-       _ins    $t10,24
-       _ins    $t11,24
-
        lw      $s0,0($key0)
+       _ins    $t9,24
        lw      $s1,4($key0)
+       _ins    $t10,24
        lw      $s2,8($key0)
+       _ins    $t11,24
        lw      $s3,12($key0)
 
        xor     $t0,$t8
@@ -343,7 +600,7 @@ _mips_AES_encrypt:
        xor     $t1,$t5
        xor     $t2,$t6
        xor     $t3,$t7
-
+#endif
        xor     $s0,$t0
        xor     $s1,$t1
        xor     $s2,$t2
@@ -455,6 +712,89 @@ _mips_AES_decrypt:
        xor     $s3,$t3
 
        sub     $cnt,1
+#if defined(__mips_smartmips)
+       ext     $i0,$s3,16,8
+.Loop_dec:
+       ext     $i1,$s0,16,8
+       ext     $i2,$s1,16,8
+       ext     $i3,$s2,16,8
+       lwxs    $t0,$i0($Tbl)           # Td1[s3>>16]
+       ext     $i0,$s2,8,8
+       lwxs    $t1,$i1($Tbl)           # Td1[s0>>16]
+       ext     $i1,$s3,8,8
+       lwxs    $t2,$i2($Tbl)           # Td1[s1>>16]
+       ext     $i2,$s0,8,8
+       lwxs    $t3,$i3($Tbl)           # Td1[s2>>16]
+       ext     $i3,$s1,8,8
+
+       lwxs    $t4,$i0($Tbl)           # Td2[s2>>8]
+       ext     $i0,$s1,0,8
+       lwxs    $t5,$i1($Tbl)           # Td2[s3>>8]
+       ext     $i1,$s2,0,8
+       lwxs    $t6,$i2($Tbl)           # Td2[s0>>8]
+       ext     $i2,$s3,0,8
+       lwxs    $t7,$i3($Tbl)           # Td2[s1>>8]
+       ext     $i3,$s0,0,8
+
+       lwxs    $t8,$i0($Tbl)           # Td3[s1]
+       ext     $i0,$s0,24,8
+       lwxs    $t9,$i1($Tbl)           # Td3[s2]
+       ext     $i1,$s1,24,8
+       lwxs    $t10,$i2($Tbl)          # Td3[s3]
+       ext     $i2,$s2,24,8
+       lwxs    $t11,$i3($Tbl)          # Td3[s0]
+       ext     $i3,$s3,24,8
+
+       rotr    $t0,$t0,8
+       rotr    $t1,$t1,8
+       rotr    $t2,$t2,8
+       rotr    $t3,$t3,8
+
+       rotr    $t4,$t4,16
+       rotr    $t5,$t5,16
+       rotr    $t6,$t6,16
+       rotr    $t7,$t7,16
+
+       xor     $t0,$t4
+       lwxs    $t4,$i0($Tbl)           # Td0[s0>>24]
+       xor     $t1,$t5
+       lwxs    $t5,$i1($Tbl)           # Td0[s1>>24]
+       xor     $t2,$t6
+       lwxs    $t6,$i2($Tbl)           # Td0[s2>>24]
+       xor     $t3,$t7
+       lwxs    $t7,$i3($Tbl)           # Td0[s3>>24]
+
+       rotr    $t8,$t8,24
+       lw      $s0,0($key0)
+       rotr    $t9,$t9,24
+       lw      $s1,4($key0)
+       rotr    $t10,$t10,24
+       lw      $s2,8($key0)
+       rotr    $t11,$t11,24
+       lw      $s3,12($key0)
+
+       xor     $t0,$t8
+       xor     $t1,$t9
+       xor     $t2,$t10
+       xor     $t3,$t11
+
+       xor     $t0,$t4
+       xor     $t1,$t5
+       xor     $t2,$t6
+       xor     $t3,$t7
+
+       sub     $cnt,1
+       $PTR_ADD $key0,16
+       xor     $s0,$t0
+       xor     $s1,$t1
+       xor     $s2,$t2
+       xor     $s3,$t3
+       .set    noreorder
+       bnez    $cnt,.Loop_dec
+       ext     $i0,$s3,16,8
+
+       _xtr    $i0,$s3,16-2
+#else
        _xtr    $i0,$s3,16-2
 .Loop_dec:
        _xtr    $i1,$s0,16-2
@@ -468,19 +808,88 @@ _mips_AES_decrypt:
        $PTR_ADD $i1,$Tbl
        $PTR_ADD $i2,$Tbl
        $PTR_ADD $i3,$Tbl
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+       lw      $t0,0($i0)              # Td1[s3>>16]
+       _xtr    $i0,$s2,8-2
+       lw      $t1,0($i1)              # Td1[s0>>16]
+       _xtr    $i1,$s3,8-2
+       lw      $t2,0($i2)              # Td1[s1>>16]
+       _xtr    $i2,$s0,8-2
+       lw      $t3,0($i3)              # Td1[s2>>16]
+       _xtr    $i3,$s1,8-2
+#else
        lwl     $t0,3($i0)              # Td1[s3>>16]
        lwl     $t1,3($i1)              # Td1[s0>>16]
        lwl     $t2,3($i2)              # Td1[s1>>16]
        lwl     $t3,3($i3)              # Td1[s2>>16]
        lwr     $t0,2($i0)              # Td1[s3>>16]
-       lwr     $t1,2($i1)              # Td1[s0>>16]
-       lwr     $t2,2($i2)              # Td1[s1>>16]
-       lwr     $t3,2($i3)              # Td1[s2>>16]
-
        _xtr    $i0,$s2,8-2
+       lwr     $t1,2($i1)              # Td1[s0>>16]
        _xtr    $i1,$s3,8-2
+       lwr     $t2,2($i2)              # Td1[s1>>16]
        _xtr    $i2,$s0,8-2
+       lwr     $t3,2($i3)              # Td1[s2>>16]
        _xtr    $i3,$s1,8-2
+#endif
+
+       and     $i0,0x3fc
+       and     $i1,0x3fc
+       and     $i2,0x3fc
+       and     $i3,0x3fc
+       $PTR_ADD $i0,$Tbl
+       $PTR_ADD $i1,$Tbl
+       $PTR_ADD $i2,$Tbl
+       $PTR_ADD $i3,$Tbl
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+       rotr    $t0,$t0,8
+       rotr    $t1,$t1,8
+       rotr    $t2,$t2,8
+       rotr    $t3,$t3,8
+# if defined(_MIPSEL)
+       lw      $t4,0($i0)              # Td2[s2>>8]
+       _xtr    $i0,$s1,0-2
+       lw      $t5,0($i1)              # Td2[s3>>8]
+       _xtr    $i1,$s2,0-2
+       lw      $t6,0($i2)              # Td2[s0>>8]
+       _xtr    $i2,$s3,0-2
+       lw      $t7,0($i3)              # Td2[s1>>8]
+       _xtr    $i3,$s0,0-2
+
+       and     $i0,0x3fc
+       and     $i1,0x3fc
+       and     $i2,0x3fc
+       and     $i3,0x3fc
+       $PTR_ADD $i0,$Tbl
+       $PTR_ADD $i1,$Tbl
+       $PTR_ADD $i2,$Tbl
+       $PTR_ADD $i3,$Tbl
+       lw      $t8,0($i0)              # Td3[s1]
+       $PTR_INS $i0,$s0,2,8
+       lw      $t9,0($i1)              # Td3[s2]
+       $PTR_INS $i1,$s1,2,8
+       lw      $t10,0($i2)             # Td3[s3]
+       $PTR_INS $i2,$s2,2,8
+       lw      $t11,0($i3)             # Td3[s0]
+       $PTR_INS $i3,$s3,2,8
+#else
+       lw      $t4,0($i0)              # Td2[s2>>8]
+       $PTR_INS $i0,$s1,2,8
+       lw      $t5,0($i1)              # Td2[s3>>8]
+       $PTR_INS $i1,$s2,2,8
+       lw      $t6,0($i2)              # Td2[s0>>8]
+       $PTR_INS $i2,$s3,2,8
+       lw      $t7,0($i3)              # Td2[s1>>8]
+       $PTR_INS $i3,$s0,2,8
+
+       lw      $t8,0($i0)              # Td3[s1]
+       _xtr    $i0,$s0,24-2
+       lw      $t9,0($i1)              # Td3[s2]
+       _xtr    $i1,$s1,24-2
+       lw      $t10,0($i2)             # Td3[s3]
+       _xtr    $i2,$s2,24-2
+       lw      $t11,0($i3)             # Td3[s0]
+       _xtr    $i3,$s3,24-2
+
        and     $i0,0x3fc
        and     $i1,0x3fc
        and     $i2,0x3fc
@@ -489,19 +898,30 @@ _mips_AES_decrypt:
        $PTR_ADD $i1,$Tbl
        $PTR_ADD $i2,$Tbl
        $PTR_ADD $i3,$Tbl
+#endif
+       rotr    $t4,$t4,16
+       rotr    $t5,$t5,16
+       rotr    $t6,$t6,16
+       rotr    $t7,$t7,16
+
+       rotr    $t8,$t8,24
+       rotr    $t9,$t9,24
+       rotr    $t10,$t10,24
+       rotr    $t11,$t11,24
+#else
        lwl     $t4,2($i0)              # Td2[s2>>8]
        lwl     $t5,2($i1)              # Td2[s3>>8]
        lwl     $t6,2($i2)              # Td2[s0>>8]
        lwl     $t7,2($i3)              # Td2[s1>>8]
        lwr     $t4,1($i0)              # Td2[s2>>8]
-       lwr     $t5,1($i1)              # Td2[s3>>8]
-       lwr     $t6,1($i2)              # Td2[s0>>8]
-       lwr     $t7,1($i3)              # Td2[s1>>8]
-
        _xtr    $i0,$s1,0-2
+       lwr     $t5,1($i1)              # Td2[s3>>8]
        _xtr    $i1,$s2,0-2
+       lwr     $t6,1($i2)              # Td2[s0>>8]
        _xtr    $i2,$s3,0-2
+       lwr     $t7,1($i3)              # Td2[s1>>8]
        _xtr    $i3,$s0,0-2
+
        and     $i0,0x3fc
        and     $i1,0x3fc
        and     $i2,0x3fc
@@ -515,14 +935,14 @@ _mips_AES_decrypt:
        lwl     $t10,1($i2)             # Td3[s3]
        lwl     $t11,1($i3)             # Td3[s0]
        lwr     $t8,0($i0)              # Td3[s1]
-       lwr     $t9,0($i1)              # Td3[s2]
-       lwr     $t10,0($i2)             # Td3[s3]
-       lwr     $t11,0($i3)             # Td3[s0]
-
        _xtr    $i0,$s0,24-2
+       lwr     $t9,0($i1)              # Td3[s2]
        _xtr    $i1,$s1,24-2
+       lwr     $t10,0($i2)             # Td3[s3]
        _xtr    $i2,$s2,24-2
+       lwr     $t11,0($i3)             # Td3[s0]
        _xtr    $i3,$s3,24-2
+
        and     $i0,0x3fc
        and     $i1,0x3fc
        and     $i2,0x3fc
@@ -531,27 +951,25 @@ _mips_AES_decrypt:
        $PTR_ADD $i1,$Tbl
        $PTR_ADD $i2,$Tbl
        $PTR_ADD $i3,$Tbl
+#endif
 
        xor     $t0,$t4
-       xor     $t1,$t5
-       xor     $t2,$t6
-       xor     $t3,$t7
-
-
        lw      $t4,0($i0)              # Td0[s0>>24]
+       xor     $t1,$t5
        lw      $t5,0($i1)              # Td0[s1>>24]
+       xor     $t2,$t6
        lw      $t6,0($i2)              # Td0[s2>>24]
+       xor     $t3,$t7
        lw      $t7,0($i3)              # Td0[s3>>24]
 
-       lw      $s0,0($key0)
-       lw      $s1,4($key0)
-       lw      $s2,8($key0)
-       lw      $s3,12($key0)
-
        xor     $t0,$t8
+       lw      $s0,0($key0)
        xor     $t1,$t9
+       lw      $s1,4($key0)
        xor     $t2,$t10
+       lw      $s2,8($key0)
        xor     $t3,$t11
+       lw      $s3,12($key0)
 
        xor     $t0,$t4
        xor     $t1,$t5
@@ -567,38 +985,39 @@ _mips_AES_decrypt:
        .set    noreorder
        bnez    $cnt,.Loop_dec
        _xtr    $i0,$s3,16-2
+#endif
 
        .set    reorder
        lw      $t4,1024($Tbl)          # prefetch Td4
-       lw      $t5,1024+32($Tbl)
-       lw      $t6,1024+64($Tbl)
-       lw      $t7,1024+96($Tbl)
-       lw      $t8,1024+128($Tbl)
-       lw      $t9,1024+160($Tbl)
-       lw      $t10,1024+192($Tbl)
-       lw      $t11,1024+224($Tbl)
-
        _xtr    $i0,$s3,16
+       lw      $t5,1024+32($Tbl)
        _xtr    $i1,$s0,16
+       lw      $t6,1024+64($Tbl)
        _xtr    $i2,$s1,16
+       lw      $t7,1024+96($Tbl)
        _xtr    $i3,$s2,16
+       lw      $t8,1024+128($Tbl)
        and     $i0,0xff
+       lw      $t9,1024+160($Tbl)
        and     $i1,0xff
+       lw      $t10,1024+192($Tbl)
        and     $i2,0xff
+       lw      $t11,1024+224($Tbl)
        and     $i3,0xff
+
        $PTR_ADD $i0,$Tbl
        $PTR_ADD $i1,$Tbl
        $PTR_ADD $i2,$Tbl
        $PTR_ADD $i3,$Tbl
        lbu     $t0,1024($i0)           # Td4[s3>>16]
-       lbu     $t1,1024($i1)           # Td4[s0>>16]
-       lbu     $t2,1024($i2)           # Td4[s1>>16]
-       lbu     $t3,1024($i3)           # Td4[s2>>16]
-
        _xtr    $i0,$s2,8
+       lbu     $t1,1024($i1)           # Td4[s0>>16]
        _xtr    $i1,$s3,8
+       lbu     $t2,1024($i2)           # Td4[s1>>16]
        _xtr    $i2,$s0,8
+       lbu     $t3,1024($i3)           # Td4[s2>>16]
        _xtr    $i3,$s1,8
+
        and     $i0,0xff
        and     $i1,0xff
        and     $i2,0xff
@@ -607,29 +1026,108 @@ _mips_AES_decrypt:
        $PTR_ADD $i1,$Tbl
        $PTR_ADD $i2,$Tbl
        $PTR_ADD $i3,$Tbl
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+# if defined(_MIPSEL)
        lbu     $t4,1024($i0)           # Td4[s2>>8]
+       $PTR_INS $i0,$s0,0,8
        lbu     $t5,1024($i1)           # Td4[s3>>8]
+       $PTR_INS $i1,$s1,0,8
        lbu     $t6,1024($i2)           # Td4[s0>>8]
+       $PTR_INS $i2,$s2,0,8
        lbu     $t7,1024($i3)           # Td4[s1>>8]
+       $PTR_INS $i3,$s3,0,8
 
+       lbu     $t8,1024($i0)           # Td4[s0>>24]
+       _xtr    $i0,$s1,0
+       lbu     $t9,1024($i1)           # Td4[s1>>24]
+       _xtr    $i1,$s2,0
+       lbu     $t10,1024($i2)          # Td4[s2>>24]
+       _xtr    $i2,$s3,0
+       lbu     $t11,1024($i3)          # Td4[s3>>24]
+       _xtr    $i3,$s0,0
+
+       $PTR_ADD $i0,$Tbl
+       $PTR_ADD $i1,$Tbl
+       $PTR_ADD $i2,$Tbl
+       $PTR_ADD $i3,$Tbl
+# else
+       lbu     $t4,1024($i0)           # Td4[s2>>8]
        _xtr    $i0,$s0,24
+       lbu     $t5,1024($i1)           # Td4[s3>>8]
        _xtr    $i1,$s1,24
+       lbu     $t6,1024($i2)           # Td4[s0>>8]
        _xtr    $i2,$s2,24
+       lbu     $t7,1024($i3)           # Td4[s1>>8]
        _xtr    $i3,$s3,24
+
        $PTR_ADD $i0,$Tbl
        $PTR_ADD $i1,$Tbl
        $PTR_ADD $i2,$Tbl
        $PTR_ADD $i3,$Tbl
        lbu     $t8,1024($i0)           # Td4[s0>>24]
+       $PTR_INS $i0,$s1,0,8
        lbu     $t9,1024($i1)           # Td4[s1>>24]
+       $PTR_INS $i1,$s2,0,8
        lbu     $t10,1024($i2)          # Td4[s2>>24]
+       $PTR_INS $i2,$s3,0,8
        lbu     $t11,1024($i3)          # Td4[s3>>24]
+       $PTR_INS $i3,$s0,0,8
+# endif
+       _ins    $t0,16
+       _ins    $t1,16
+       _ins    $t2,16
+       _ins    $t3,16
+
+       _ins2   $t0,$t4,8
+       lbu     $t4,1024($i0)           # Td4[s1]
+       _ins2   $t1,$t5,8
+       lbu     $t5,1024($i1)           # Td4[s2]
+       _ins2   $t2,$t6,8
+       lbu     $t6,1024($i2)           # Td4[s3]
+       _ins2   $t3,$t7,8
+       lbu     $t7,1024($i3)           # Td4[s0]
+
+       _ins2   $t0,$t8,24
+       lw      $s0,0($key0)
+       _ins2   $t1,$t9,24
+       lw      $s1,4($key0)
+       _ins2   $t2,$t10,24
+       lw      $s2,8($key0)
+       _ins2   $t3,$t11,24
+       lw      $s3,12($key0)
 
+       _ins2   $t0,$t4,0
+       _ins2   $t1,$t5,0
+       _ins2   $t2,$t6,0
+       _ins2   $t3,$t7,0
+#else
+       lbu     $t4,1024($i0)           # Td4[s2>>8]
+       _xtr    $i0,$s0,24
+       lbu     $t5,1024($i1)           # Td4[s3>>8]
+       _xtr    $i1,$s1,24
+       lbu     $t6,1024($i2)           # Td4[s0>>8]
+       _xtr    $i2,$s2,24
+       lbu     $t7,1024($i3)           # Td4[s1>>8]
+       _xtr    $i3,$s3,24
+
+       $PTR_ADD $i0,$Tbl
+       $PTR_ADD $i1,$Tbl
+       $PTR_ADD $i2,$Tbl
+       $PTR_ADD $i3,$Tbl
+       lbu     $t8,1024($i0)           # Td4[s0>>24]
        _xtr    $i0,$s1,0
+       lbu     $t9,1024($i1)           # Td4[s1>>24]
        _xtr    $i1,$s2,0
+       lbu     $t10,1024($i2)          # Td4[s2>>24]
        _xtr    $i2,$s3,0
+       lbu     $t11,1024($i3)          # Td4[s3>>24]
        _xtr    $i3,$s0,0
 
+       $PTR_ADD $i0,$Tbl
+       $PTR_ADD $i1,$Tbl
+       $PTR_ADD $i2,$Tbl
+       $PTR_ADD $i3,$Tbl
+
        _ins    $t0,16
        _ins    $t1,16
        _ins    $t2,16
@@ -641,44 +1139,38 @@ _mips_AES_decrypt:
        _ins    $t7,8
 
        xor     $t0,$t4
-       xor     $t1,$t5
-       xor     $t2,$t6
-       xor     $t3,$t7
-
-       $PTR_ADD $i0,$Tbl
-       $PTR_ADD $i1,$Tbl
-       $PTR_ADD $i2,$Tbl
-       $PTR_ADD $i3,$Tbl
        lbu     $t4,1024($i0)           # Td4[s1]
+       xor     $t1,$t5
        lbu     $t5,1024($i1)           # Td4[s2]
+       xor     $t2,$t6
        lbu     $t6,1024($i2)           # Td4[s3]
+       xor     $t3,$t7
        lbu     $t7,1024($i3)           # Td4[s0]
 
        _ins    $t8,24
-       _ins    $t9,24
-       _ins    $t10,24
-       _ins    $t11,24
-
        lw      $s0,0($key0)
+       _ins    $t9,24
        lw      $s1,4($key0)
+       _ins    $t10,24
        lw      $s2,8($key0)
+       _ins    $t11,24
        lw      $s3,12($key0)
 
-       _ins    $t4,0
-       _ins    $t5,0
-       _ins    $t6,0
-       _ins    $t7,0
-
-
        xor     $t0,$t8
        xor     $t1,$t9
        xor     $t2,$t10
        xor     $t3,$t11
 
+       _ins    $t4,0
+       _ins    $t5,0
+       _ins    $t6,0
+       _ins    $t7,0
+
        xor     $t0,$t4
        xor     $t1,$t5
        xor     $t2,$t6
        xor     $t3,$t7
+#endif
 
        xor     $s0,$t0
        xor     $s1,$t1
@@ -791,7 +1283,7 @@ _mips_AES_set_encrypt_key:
        beqz    $inp,.Lekey_done
        li      $t0,-1
        beqz    $key,.Lekey_done
-       $PTR_ADD $rcon,$Tbl,1024+256
+       $PTR_ADD $rcon,$Tbl,256
 
        .set    reorder
        lwl     $rk0,0+$MSB($inp)       # load 128 bits
@@ -843,10 +1335,10 @@ _mips_AES_set_encrypt_key:
        $PTR_ADD $i1,$Tbl
        $PTR_ADD $i2,$Tbl
        $PTR_ADD $i3,$Tbl
-       lbu     $i0,1024($i0)
-       lbu     $i1,1024($i1)
-       lbu     $i2,1024($i2)
-       lbu     $i3,1024($i3)
+       lbu     $i0,0($i0)
+       lbu     $i1,0($i1)
+       lbu     $i2,0($i2)
+       lbu     $i3,0($i3)
 
        sw      $rk0,0($key)
        sw      $rk1,4($key)
@@ -898,10 +1390,10 @@ _mips_AES_set_encrypt_key:
        $PTR_ADD $i1,$Tbl
        $PTR_ADD $i2,$Tbl
        $PTR_ADD $i3,$Tbl
-       lbu     $i0,1024($i0)
-       lbu     $i1,1024($i1)
-       lbu     $i2,1024($i2)
-       lbu     $i3,1024($i3)
+       lbu     $i0,0($i0)
+       lbu     $i1,0($i1)
+       lbu     $i2,0($i2)
+       lbu     $i3,0($i3)
 
        sw      $rk0,0($key)
        sw      $rk1,4($key)
@@ -957,10 +1449,10 @@ _mips_AES_set_encrypt_key:
        $PTR_ADD $i1,$Tbl
        $PTR_ADD $i2,$Tbl
        $PTR_ADD $i3,$Tbl
-       lbu     $i0,1024($i0)
-       lbu     $i1,1024($i1)
-       lbu     $i2,1024($i2)
-       lbu     $i3,1024($i3)
+       lbu     $i0,0($i0)
+       lbu     $i1,0($i1)
+       lbu     $i2,0($i2)
+       lbu     $i3,0($i3)
 
        sw      $rk0,0($key)
        sw      $rk1,4($key)
@@ -999,10 +1491,10 @@ _mips_AES_set_encrypt_key:
        $PTR_ADD $i1,$Tbl
        $PTR_ADD $i2,$Tbl
        $PTR_ADD $i3,$Tbl
-       lbu     $i0,1024($i0)
-       lbu     $i1,1024($i1)
-       lbu     $i2,1024($i2)
-       lbu     $i3,1024($i3)
+       lbu     $i0,0($i0)
+       lbu     $i1,0($i1)
+       lbu     $i2,0($i2)
+       lbu     $i3,0($i3)
        sll     $i0,24
        sll     $i1,16
        sll     $i2,8
@@ -1064,7 +1556,7 @@ $code.=<<___ if ($flavour !~ /o32/i);     # non-o32 PIC-ification
 ___
 $code.=<<___;
        .set    reorder
-       la      $Tbl,AES_Te             # PIC-ified 'load address'
+       la      $Tbl,AES_Te4            # PIC-ified 'load address'
 
        bal     _mips_AES_set_encrypt_key
 
@@ -1119,7 +1611,7 @@ $code.=<<___ if ($flavour !~ /o32/i);     # non-o32 PIC-ification
 ___
 $code.=<<___;
        .set    reorder
-       la      $Tbl,AES_Te             # PIC-ified 'load address'
+       la      $Tbl,AES_Te4            # PIC-ified 'load address'
 
        bal     _mips_AES_set_encrypt_key
 
@@ -1190,6 +1682,16 @@ $code.=<<___;
        xor     $tpb,$tp9,$tp2
        xor     $tpd,$tp9,$tp4
 
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+       rotr    $tp1,$tpd,16
+        xor    $tpe,$tp2
+       rotr    $tp2,$tp9,8
+       xor     $tpe,$tp1
+       rotr    $tp4,$tpb,24
+       xor     $tpe,$tp2
+       lw      $tp1,4($key)            # modulo-scheduled
+       xor     $tpe,$tp4
+#else
        _ror    $tp1,$tpd,16
         xor    $tpe,$tp2
        _ror    $tp2,$tpd,-16
@@ -1204,6 +1706,7 @@ $code.=<<___;
        xor     $tpe,$tp1
        lw      $tp1,4($key)            # modulo-scheduled
        xor     $tpe,$tp2
+#endif
        sub     $cnt,1
        sw      $tpe,0($key)
        $PTR_ADD $key,4
@@ -1234,7 +1737,7 @@ ___
 # Tables are kept in endian-neutral manner
 $code.=<<___;
 .rdata
-.align 6
+.align 10
 AES_Te:
 .byte  0xc6,0x63,0x63,0xa5,    0xf8,0x7c,0x7c,0x84     # Te0
 .byte  0xee,0x77,0x77,0x99,    0xf6,0x7b,0x7b,0x8d
@@ -1365,46 +1868,6 @@ AES_Te:
 .byte  0x7b,0xb0,0xb0,0xcb,    0xa8,0x54,0x54,0xfc
 .byte  0x6d,0xbb,0xbb,0xd6,    0x2c,0x16,0x16,0x3a
 
-.byte  0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5  # Te4
-.byte  0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
-.byte  0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
-.byte  0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
-.byte  0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
-.byte  0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
-.byte  0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
-.byte  0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
-.byte  0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
-.byte  0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
-.byte  0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
-.byte  0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
-.byte  0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
-.byte  0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
-.byte  0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
-.byte  0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
-.byte  0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
-.byte  0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
-.byte  0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
-.byte  0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
-.byte  0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
-.byte  0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
-.byte  0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
-.byte  0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
-.byte  0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
-.byte  0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
-.byte  0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
-.byte  0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
-.byte  0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
-.byte  0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
-.byte  0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
-.byte  0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
-
-.byte  0x01,0x00,0x00,0x00,    0x02,0x00,0x00,0x00     # rcon
-.byte  0x04,0x00,0x00,0x00,    0x08,0x00,0x00,0x00
-.byte  0x10,0x00,0x00,0x00,    0x20,0x00,0x00,0x00
-.byte  0x40,0x00,0x00,0x00,    0x80,0x00,0x00,0x00
-.byte  0x1B,0x00,0x00,0x00,    0x36,0x00,0x00,0x00
-
-.align 6
 AES_Td:
 .byte  0x51,0xf4,0xa7,0x50,    0x7e,0x41,0x65,0x53     # Td0
 .byte  0x1a,0x17,0xa4,0xc3,    0x3a,0x27,0x5e,0x96
@@ -1567,6 +2030,46 @@ AES_Td:
 .byte  0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
 .byte  0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
 .byte  0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+
+AES_Te4:
+.byte  0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5  # Te4
+.byte  0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
+.byte  0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
+.byte  0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
+.byte  0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
+.byte  0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
+.byte  0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
+.byte  0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
+.byte  0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
+.byte  0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
+.byte  0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
+.byte  0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
+.byte  0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
+.byte  0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
+.byte  0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
+.byte  0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
+.byte  0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
+.byte  0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
+.byte  0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
+.byte  0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
+.byte  0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
+.byte  0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
+.byte  0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
+.byte  0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
+.byte  0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
+.byte  0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
+.byte  0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
+.byte  0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
+.byte  0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
+.byte  0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
+.byte  0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
+.byte  0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
+
+.byte  0x01,0x00,0x00,0x00,    0x02,0x00,0x00,0x00     # rcon
+.byte  0x04,0x00,0x00,0x00,    0x08,0x00,0x00,0x00
+.byte  0x10,0x00,0x00,0x00,    0x20,0x00,0x00,0x00
+.byte  0x40,0x00,0x00,0x00,    0x80,0x00,0x00,0x00
+.byte  0x1B,0x00,0x00,0x00,    0x36,0x00,0x00,0x00
 ___
 \f
 foreach (split("\n",$code)) {
@@ -1583,6 +2086,9 @@ foreach (split("\n",$code)) {
            s/_ins\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
                sprintf("sll\t$1,$2,%d",$big_endian ?   eval($3)
                                        :               eval("24-$3"))/e or
+           s/_ins2\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
+               sprintf("ins\t$1,$2,%d,8",$big_endian ? eval($3)
+                                       :               eval("24-$3"))/e or
            s/_ror\s+(\$[0-9]+),(\$[0-9]+),(\-?[0-9]+)/
                sprintf("srl\t$1,$2,%d",$big_endian ?   eval($3)
                                        :               eval("$3*-1"))/e or
@@ -1605,6 +2111,11 @@ foreach (split("\n",$code)) {
                sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e;
        }
 
+       if (!$big_endian) {
+           s/(rotr\s+\$[0-9]+,\$[0-9]+),([0-9]+)/sprintf("$1,%d",32-$2)/e;
+           s/(ext\s+\$[0-9]+,\$[0-9]+),([0-9]+),8/sprintf("$1,%d,8",24-$2)/e;
+       }
+
        print $_,"\n";
 }
 
index 7c52cbe..7a99fc3 100644 (file)
@@ -45,6 +45,8 @@ if ($flavour =~ /64/) {
        $PUSH   ="stw";
 } else { die "nonsense $flavour"; }
 
+$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
+
 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
@@ -68,7 +70,7 @@ $key="r5";
 $Tbl0="r3";
 $Tbl1="r6";
 $Tbl2="r7";
-$Tbl3="r2";
+$Tbl3=$out;    # stay away from "r2"; $out is offloaded to stack
 
 $s0="r8";
 $s1="r9";
@@ -76,7 +78,7 @@ $s2="r10";
 $s3="r11";
 
 $t0="r12";
-$t1="r13";
+$t1="r0";      # stay away from "r13";
 $t2="r14";
 $t3="r15";
 
@@ -100,9 +102,6 @@ $acc13="r29";
 $acc14="r30";
 $acc15="r31";
 
-# stay away from TLS pointer
-if ($SIZE_T==8)        { die if ($t1 ne "r13");  $t1="r0";             }
-else           { die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0";  }
 $mask80=$Tbl2;
 $mask1b=$Tbl3;
 
@@ -337,8 +336,7 @@ $code.=<<___;
        $STU    $sp,-$FRAME($sp)
        mflr    r0
 
-       $PUSH   $toc,`$FRAME-$SIZE_T*20`($sp)
-       $PUSH   r13,`$FRAME-$SIZE_T*19`($sp)
+       $PUSH   $out,`$FRAME-$SIZE_T*19`($sp)
        $PUSH   r14,`$FRAME-$SIZE_T*18`($sp)
        $PUSH   r15,`$FRAME-$SIZE_T*17`($sp)
        $PUSH   r16,`$FRAME-$SIZE_T*16`($sp)
@@ -365,16 +363,61 @@ $code.=<<___;
        bne     Lenc_unaligned
 
 Lenc_unaligned_ok:
+___
+$code.=<<___ if (!$LITTLE_ENDIAN);
        lwz     $s0,0($inp)
        lwz     $s1,4($inp)
        lwz     $s2,8($inp)
        lwz     $s3,12($inp)
+___
+$code.=<<___ if ($LITTLE_ENDIAN);
+       lwz     $t0,0($inp)
+       lwz     $t1,4($inp)
+       lwz     $t2,8($inp)
+       lwz     $t3,12($inp)
+       rotlwi  $s0,$t0,8
+       rotlwi  $s1,$t1,8
+       rotlwi  $s2,$t2,8
+       rotlwi  $s3,$t3,8
+       rlwimi  $s0,$t0,24,0,7
+       rlwimi  $s1,$t1,24,0,7
+       rlwimi  $s2,$t2,24,0,7
+       rlwimi  $s3,$t3,24,0,7
+       rlwimi  $s0,$t0,24,16,23
+       rlwimi  $s1,$t1,24,16,23
+       rlwimi  $s2,$t2,24,16,23
+       rlwimi  $s3,$t3,24,16,23
+___
+$code.=<<___;
        bl      LAES_Te
        bl      Lppc_AES_encrypt_compact
+       $POP    $out,`$FRAME-$SIZE_T*19`($sp)
+___
+$code.=<<___ if ($LITTLE_ENDIAN);
+       rotlwi  $t0,$s0,8
+       rotlwi  $t1,$s1,8
+       rotlwi  $t2,$s2,8
+       rotlwi  $t3,$s3,8
+       rlwimi  $t0,$s0,24,0,7
+       rlwimi  $t1,$s1,24,0,7
+       rlwimi  $t2,$s2,24,0,7
+       rlwimi  $t3,$s3,24,0,7
+       rlwimi  $t0,$s0,24,16,23
+       rlwimi  $t1,$s1,24,16,23
+       rlwimi  $t2,$s2,24,16,23
+       rlwimi  $t3,$s3,24,16,23
+       stw     $t0,0($out)
+       stw     $t1,4($out)
+       stw     $t2,8($out)
+       stw     $t3,12($out)
+___
+$code.=<<___ if (!$LITTLE_ENDIAN);
        stw     $s0,0($out)
        stw     $s1,4($out)
        stw     $s2,8($out)
        stw     $s3,12($out)
+___
+$code.=<<___;
        b       Lenc_done
 
 Lenc_unaligned:
@@ -417,6 +460,7 @@ Lenc_xpage:
 
        bl      LAES_Te
        bl      Lppc_AES_encrypt_compact
+       $POP    $out,`$FRAME-$SIZE_T*19`($sp)
 
        extrwi  $acc00,$s0,8,0
        extrwi  $acc01,$s0,8,8
@@ -449,8 +493,6 @@ Lenc_xpage:
 
 Lenc_done:
        $POP    r0,`$FRAME+$LRSAVE`($sp)
-       $POP    $toc,`$FRAME-$SIZE_T*20`($sp)
-       $POP    r13,`$FRAME-$SIZE_T*19`($sp)
        $POP    r14,`$FRAME-$SIZE_T*18`($sp)
        $POP    r15,`$FRAME-$SIZE_T*17`($sp)
        $POP    r16,`$FRAME-$SIZE_T*16`($sp)
@@ -764,6 +806,7 @@ Lenc_compact_done:
        blr
        .long   0
        .byte   0,12,0x14,0,0,0,0,0
+.size  .AES_encrypt,.-.AES_encrypt
 
 .globl .AES_decrypt
 .align 7
@@ -771,8 +814,7 @@ Lenc_compact_done:
        $STU    $sp,-$FRAME($sp)
        mflr    r0
 
-       $PUSH   $toc,`$FRAME-$SIZE_T*20`($sp)
-       $PUSH   r13,`$FRAME-$SIZE_T*19`($sp)
+       $PUSH   $out,`$FRAME-$SIZE_T*19`($sp)
        $PUSH   r14,`$FRAME-$SIZE_T*18`($sp)
        $PUSH   r15,`$FRAME-$SIZE_T*17`($sp)
        $PUSH   r16,`$FRAME-$SIZE_T*16`($sp)
@@ -799,16 +841,61 @@ Lenc_compact_done:
        bne     Ldec_unaligned
 
 Ldec_unaligned_ok:
+___
+$code.=<<___ if (!$LITTLE_ENDIAN);
        lwz     $s0,0($inp)
        lwz     $s1,4($inp)
        lwz     $s2,8($inp)
        lwz     $s3,12($inp)
+___
+$code.=<<___ if ($LITTLE_ENDIAN);
+       lwz     $t0,0($inp)
+       lwz     $t1,4($inp)
+       lwz     $t2,8($inp)
+       lwz     $t3,12($inp)
+       rotlwi  $s0,$t0,8
+       rotlwi  $s1,$t1,8
+       rotlwi  $s2,$t2,8
+       rotlwi  $s3,$t3,8
+       rlwimi  $s0,$t0,24,0,7
+       rlwimi  $s1,$t1,24,0,7
+       rlwimi  $s2,$t2,24,0,7
+       rlwimi  $s3,$t3,24,0,7
+       rlwimi  $s0,$t0,24,16,23
+       rlwimi  $s1,$t1,24,16,23
+       rlwimi  $s2,$t2,24,16,23
+       rlwimi  $s3,$t3,24,16,23
+___
+$code.=<<___;
        bl      LAES_Td
        bl      Lppc_AES_decrypt_compact
+       $POP    $out,`$FRAME-$SIZE_T*19`($sp)
+___
+$code.=<<___ if ($LITTLE_ENDIAN);
+       rotlwi  $t0,$s0,8
+       rotlwi  $t1,$s1,8
+       rotlwi  $t2,$s2,8
+       rotlwi  $t3,$s3,8
+       rlwimi  $t0,$s0,24,0,7
+       rlwimi  $t1,$s1,24,0,7
+       rlwimi  $t2,$s2,24,0,7
+       rlwimi  $t3,$s3,24,0,7
+       rlwimi  $t0,$s0,24,16,23
+       rlwimi  $t1,$s1,24,16,23
+       rlwimi  $t2,$s2,24,16,23
+       rlwimi  $t3,$s3,24,16,23
+       stw     $t0,0($out)
+       stw     $t1,4($out)
+       stw     $t2,8($out)
+       stw     $t3,12($out)
+___
+$code.=<<___ if (!$LITTLE_ENDIAN);
        stw     $s0,0($out)
        stw     $s1,4($out)
        stw     $s2,8($out)
        stw     $s3,12($out)
+___
+$code.=<<___;
        b       Ldec_done
 
 Ldec_unaligned:
@@ -851,6 +938,7 @@ Ldec_xpage:
 
        bl      LAES_Td
        bl      Lppc_AES_decrypt_compact
+       $POP    $out,`$FRAME-$SIZE_T*19`($sp)
 
        extrwi  $acc00,$s0,8,0
        extrwi  $acc01,$s0,8,8
@@ -883,8 +971,6 @@ Ldec_xpage:
 
 Ldec_done:
        $POP    r0,`$FRAME+$LRSAVE`($sp)
-       $POP    $toc,`$FRAME-$SIZE_T*20`($sp)
-       $POP    r13,`$FRAME-$SIZE_T*19`($sp)
        $POP    r14,`$FRAME-$SIZE_T*18`($sp)
        $POP    r15,`$FRAME-$SIZE_T*17`($sp)
        $POP    r16,`$FRAME-$SIZE_T*16`($sp)
@@ -1355,6 +1441,7 @@ Ldec_compact_done:
        blr
        .long   0
        .byte   0,12,0x14,0,0,0,0,0
+.size  .AES_decrypt,.-.AES_decrypt
 
 .asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
 .align 7
index 34cbb5d..47f4163 100755 (executable)
 # Performance in number of cycles per processed byte for 128-bit key:
 #
 #              ECB encrypt     ECB decrypt     CBC large chunk
-# AMD64                33              41              13.0
-# EM64T                38              59              18.6(*)
-# Core 2       30              43              14.5(*)
+# AMD64                33              43              13.0
+# EM64T                38              56              18.6(*)
+# Core 2       30              42              14.5(*)
+# Atom         65              86              32.1(*)
 #
 # (*) with hyper-threading off
 
@@ -366,68 +367,66 @@ $code.=<<___;
        movzb   `&lo("$s0")`,$t0
        movzb   `&lo("$s1")`,$t1
        movzb   `&lo("$s2")`,$t2
-       movzb   ($sbox,$t0,1),$t0
-       movzb   ($sbox,$t1,1),$t1
-       movzb   ($sbox,$t2,1),$t2
-
        movzb   `&lo("$s3")`,$t3
        movzb   `&hi("$s1")`,$acc0
        movzb   `&hi("$s2")`,$acc1
+       shr     \$16,$s2
+       movzb   `&hi("$s3")`,$acc2
+       movzb   ($sbox,$t0,1),$t0
+       movzb   ($sbox,$t1,1),$t1
+       movzb   ($sbox,$t2,1),$t2
        movzb   ($sbox,$t3,1),$t3
-       movzb   ($sbox,$acc0,1),$t4     #$t0
-       movzb   ($sbox,$acc1,1),$t5     #$t1
 
-       movzb   `&hi("$s3")`,$acc2
+       movzb   ($sbox,$acc0,1),$t4     #$t0
        movzb   `&hi("$s0")`,$acc0
-       shr     \$16,$s2
+       movzb   ($sbox,$acc1,1),$t5     #$t1
+       movzb   `&lo("$s2")`,$acc1
        movzb   ($sbox,$acc2,1),$acc2   #$t2
        movzb   ($sbox,$acc0,1),$acc0   #$t3
-       shr     \$16,$s3
 
-       movzb   `&lo("$s2")`,$acc1
        shl     \$8,$t4
+       shr     \$16,$s3
        shl     \$8,$t5
-       movzb   ($sbox,$acc1,1),$acc1   #$t0
        xor     $t4,$t0
-       xor     $t5,$t1
-
-       movzb   `&lo("$s3")`,$t4
        shr     \$16,$s0
+       movzb   `&lo("$s3")`,$t4
        shr     \$16,$s1
-       movzb   `&lo("$s0")`,$t5
+       xor     $t5,$t1
        shl     \$8,$acc2
-       shl     \$8,$acc0
-       movzb   ($sbox,$t4,1),$t4       #$t1
-       movzb   ($sbox,$t5,1),$t5       #$t2
+       movzb   `&lo("$s0")`,$t5
+       movzb   ($sbox,$acc1,1),$acc1   #$t0
        xor     $acc2,$t2
-       xor     $acc0,$t3
 
+       shl     \$8,$acc0
        movzb   `&lo("$s1")`,$acc2
-       movzb   `&hi("$s3")`,$acc0
        shl     \$16,$acc1
-       movzb   ($sbox,$acc2,1),$acc2   #$t3
-       movzb   ($sbox,$acc0,1),$acc0   #$t0
+       xor     $acc0,$t3
+       movzb   ($sbox,$t4,1),$t4       #$t1
+       movzb   `&hi("$s3")`,$acc0
+       movzb   ($sbox,$t5,1),$t5       #$t2
        xor     $acc1,$t0
 
-       movzb   `&hi("$s0")`,$acc1
        shr     \$8,$s2
+       movzb   `&hi("$s0")`,$acc1
+       shl     \$16,$t4
        shr     \$8,$s1
+       shl     \$16,$t5
+       xor     $t4,$t1
+       movzb   ($sbox,$acc2,1),$acc2   #$t3
+       movzb   ($sbox,$acc0,1),$acc0   #$t0
        movzb   ($sbox,$acc1,1),$acc1   #$t1
        movzb   ($sbox,$s2,1),$s3       #$t3
        movzb   ($sbox,$s1,1),$s2       #$t2
-       shl     \$16,$t4
-       shl     \$16,$t5
+
        shl     \$16,$acc2
-       xor     $t4,$t1
        xor     $t5,$t2
-       xor     $acc2,$t3
-
        shl     \$24,$acc0
+       xor     $acc2,$t3
        shl     \$24,$acc1
-       shl     \$24,$s3
        xor     $acc0,$t0
-       shl     \$24,$s2
+       shl     \$24,$s3
        xor     $acc1,$t1
+       shl     \$24,$s2
        mov     $t0,$s0
        mov     $t1,$s1
        xor     $t2,$s2
@@ -466,12 +465,12 @@ sub enctransform()
 { my ($t3,$r20,$r21)=($acc2,"%r8d","%r9d");
 
 $code.=<<___;
-       mov     $s0,$acc0
-       mov     $s1,$acc1
-       and     \$0x80808080,$acc0
-       and     \$0x80808080,$acc1
-       mov     $acc0,$t0
-       mov     $acc1,$t1
+       mov     \$0x80808080,$t0
+       mov     \$0x80808080,$t1
+       and     $s0,$t0
+       and     $s1,$t1
+       mov     $t0,$acc0
+       mov     $t1,$acc1
        shr     \$7,$t0
        lea     ($s0,$s0),$r20
        shr     \$7,$t1
@@ -489,25 +488,25 @@ $code.=<<___;
 
        xor     $r20,$s0
        xor     $r21,$s1
-        mov    $s2,$acc0
-        mov    $s3,$acc1
+        mov    \$0x80808080,$t2
        rol     \$24,$s0
+        mov    \$0x80808080,$t3
        rol     \$24,$s1
-        and    \$0x80808080,$acc0
-        and    \$0x80808080,$acc1
+        and    $s2,$t2
+        and    $s3,$t3
        xor     $r20,$s0
        xor     $r21,$s1
-        mov    $acc0,$t2
-        mov    $acc1,$t3
+        mov    $t2,$acc0
        ror     \$16,$t0
+        mov    $t3,$acc1
        ror     \$16,$t1
-        shr    \$7,$t2
         lea    ($s2,$s2),$r20
+        shr    \$7,$t2
        xor     $t0,$s0
-       xor     $t1,$s1
         shr    \$7,$t3
-        lea    ($s3,$s3),$r21
+       xor     $t1,$s1
        ror     \$8,$t0
+        lea    ($s3,$s3),$r21
        ror     \$8,$t1
         sub    $t2,$acc0
         sub    $t3,$acc1
@@ -523,23 +522,23 @@ $code.=<<___;
        xor     $acc0,$r20
        xor     $acc1,$r21
 
+       ror     \$16,$t2
        xor     $r20,$s2
+       ror     \$16,$t3
        xor     $r21,$s3
        rol     \$24,$s2
+       mov     0($sbox),$acc0                  # prefetch Te4
        rol     \$24,$s3
        xor     $r20,$s2
-       xor     $r21,$s3
-       mov     0($sbox),$acc0                  # prefetch Te4
-       ror     \$16,$t2
-       ror     \$16,$t3
        mov     64($sbox),$acc1
-       xor     $t2,$s2
-       xor     $t3,$s3
+       xor     $r21,$s3
        mov     128($sbox),$r20
+       xor     $t2,$s2
        ror     \$8,$t2
+       xor     $t3,$s3
        ror     \$8,$t3
-       mov     192($sbox),$r21
        xor     $t2,$s2
+       mov     192($sbox),$r21
        xor     $t3,$s3
 ___
 }
@@ -936,70 +935,69 @@ $code.=<<___;
        movzb   `&lo("$s0")`,$t0
        movzb   `&lo("$s1")`,$t1
        movzb   `&lo("$s2")`,$t2
-       movzb   ($sbox,$t0,1),$t0
-       movzb   ($sbox,$t1,1),$t1
-       movzb   ($sbox,$t2,1),$t2
-
        movzb   `&lo("$s3")`,$t3
        movzb   `&hi("$s3")`,$acc0
        movzb   `&hi("$s0")`,$acc1
+       shr     \$16,$s3
+       movzb   `&hi("$s1")`,$acc2
+       movzb   ($sbox,$t0,1),$t0
+       movzb   ($sbox,$t1,1),$t1
+       movzb   ($sbox,$t2,1),$t2
        movzb   ($sbox,$t3,1),$t3
-       movzb   ($sbox,$acc0,1),$t4     #$t0
-       movzb   ($sbox,$acc1,1),$t5     #$t1
 
-       movzb   `&hi("$s1")`,$acc2
+       movzb   ($sbox,$acc0,1),$t4     #$t0
        movzb   `&hi("$s2")`,$acc0
-       shr     \$16,$s2
+       movzb   ($sbox,$acc1,1),$t5     #$t1
        movzb   ($sbox,$acc2,1),$acc2   #$t2
        movzb   ($sbox,$acc0,1),$acc0   #$t3
-       shr     \$16,$s3
 
-       movzb   `&lo("$s2")`,$acc1
-       shl     \$8,$t4
+       shr     \$16,$s2
        shl     \$8,$t5
-       movzb   ($sbox,$acc1,1),$acc1   #$t0
-       xor     $t4,$t0
-       xor     $t5,$t1
-
-       movzb   `&lo("$s3")`,$t4
+       shl     \$8,$t4
+       movzb   `&lo("$s2")`,$acc1
        shr     \$16,$s0
+       xor     $t4,$t0
        shr     \$16,$s1
-       movzb   `&lo("$s0")`,$t5
+       movzb   `&lo("$s3")`,$t4
+
        shl     \$8,$acc2
+       xor     $t5,$t1
        shl     \$8,$acc0
-       movzb   ($sbox,$t4,1),$t4       #$t1
-       movzb   ($sbox,$t5,1),$t5       #$t2
+       movzb   `&lo("$s0")`,$t5
+       movzb   ($sbox,$acc1,1),$acc1   #$t0
        xor     $acc2,$t2
-       xor     $acc0,$t3
-
        movzb   `&lo("$s1")`,$acc2
-       movzb   `&hi("$s1")`,$acc0
+
        shl     \$16,$acc1
+       xor     $acc0,$t3
+       movzb   ($sbox,$t4,1),$t4       #$t1
+       movzb   `&hi("$s1")`,$acc0
        movzb   ($sbox,$acc2,1),$acc2   #$t3
-       movzb   ($sbox,$acc0,1),$acc0   #$t0
        xor     $acc1,$t0
-
+       movzb   ($sbox,$t5,1),$t5       #$t2
        movzb   `&hi("$s2")`,$acc1
+
+       shl     \$16,$acc2
        shl     \$16,$t4
        shl     \$16,$t5
-       movzb   ($sbox,$acc1,1),$s1     #$t1
+       xor     $acc2,$t3
+       movzb   `&hi("$s3")`,$acc2
        xor     $t4,$t1
+       shr     \$8,$s0
        xor     $t5,$t2
 
-       movzb   `&hi("$s3")`,$acc1
-       shr     \$8,$s0
-       shl     \$16,$acc2
-       movzb   ($sbox,$acc1,1),$s2     #$t2
+       movzb   ($sbox,$acc0,1),$acc0   #$t0
+       movzb   ($sbox,$acc1,1),$s1     #$t1
+       movzb   ($sbox,$acc2,1),$s2     #$t2
        movzb   ($sbox,$s0,1),$s3       #$t3
-       xor     $acc2,$t3
 
+       mov     $t0,$s0
        shl     \$24,$acc0
        shl     \$24,$s1
        shl     \$24,$s2
-       xor     $acc0,$t0
+       xor     $acc0,$s0
        shl     \$24,$s3
        xor     $t1,$s1
-       mov     $t0,$s0
        xor     $t2,$s2
        xor     $t3,$s3
 ___
@@ -1014,12 +1012,12 @@ sub dectransform()
   my $prefetch = shift;
 
 $code.=<<___;
-       mov     $tp10,$acc0
-       mov     $tp18,$acc8
-       and     $mask80,$acc0
-       and     $mask80,$acc8
-       mov     $acc0,$tp40
-       mov     $acc8,$tp48
+       mov     $mask80,$tp40
+       mov     $mask80,$tp48
+       and     $tp10,$tp40
+       and     $tp18,$tp48
+       mov     $tp40,$acc0
+       mov     $tp48,$acc8
        shr     \$7,$tp40
        lea     ($tp10,$tp10),$tp20
        shr     \$7,$tp48
@@ -1030,15 +1028,15 @@ $code.=<<___;
        and     $maskfe,$tp28
        and     $mask1b,$acc0
        and     $mask1b,$acc8
-       xor     $tp20,$acc0
-       xor     $tp28,$acc8
-       mov     $acc0,$tp20
-       mov     $acc8,$tp28
-
-       and     $mask80,$acc0
-       and     $mask80,$acc8
-       mov     $acc0,$tp80
-       mov     $acc8,$tp88
+       xor     $acc0,$tp20
+       xor     $acc8,$tp28
+       mov     $mask80,$tp80
+       mov     $mask80,$tp88
+
+       and     $tp20,$tp80
+       and     $tp28,$tp88
+       mov     $tp80,$acc0
+       mov     $tp88,$acc8
        shr     \$7,$tp80
        lea     ($tp20,$tp20),$tp40
        shr     \$7,$tp88
@@ -1049,15 +1047,15 @@ $code.=<<___;
        and     $maskfe,$tp48
        and     $mask1b,$acc0
        and     $mask1b,$acc8
-       xor     $tp40,$acc0
-       xor     $tp48,$acc8
-       mov     $acc0,$tp40
-       mov     $acc8,$tp48
-
-       and     $mask80,$acc0
-       and     $mask80,$acc8
-       mov     $acc0,$tp80
-       mov     $acc8,$tp88
+       xor     $acc0,$tp40
+       xor     $acc8,$tp48
+       mov     $mask80,$tp80
+       mov     $mask80,$tp88
+
+       and     $tp40,$tp80
+       and     $tp48,$tp88
+       mov     $tp80,$acc0
+       mov     $tp88,$acc8
        shr     \$7,$tp80
         xor    $tp10,$tp20             # tp2^=tp1
        shr     \$7,$tp88
@@ -1082,51 +1080,51 @@ $code.=<<___;
        mov     $tp10,$acc0
        mov     $tp18,$acc8
        xor     $tp80,$tp40             # tp4^tp1^=tp8
-       xor     $tp88,$tp48             # tp4^tp1^=tp8
        shr     \$32,$acc0
+       xor     $tp88,$tp48             # tp4^tp1^=tp8
        shr     \$32,$acc8
        xor     $tp20,$tp80             # tp8^=tp8^tp2^tp1=tp2^tp1
-       xor     $tp28,$tp88             # tp8^=tp8^tp2^tp1=tp2^tp1
        rol     \$8,`&LO("$tp10")`      # ROTATE(tp1^tp8,8)
+       xor     $tp28,$tp88             # tp8^=tp8^tp2^tp1=tp2^tp1
        rol     \$8,`&LO("$tp18")`      # ROTATE(tp1^tp8,8)
        xor     $tp40,$tp80             # tp2^tp1^=tp8^tp4^tp1=tp8^tp4^tp2
+       rol     \$8,`&LO("$acc0")`      # ROTATE(tp1^tp8,8)
        xor     $tp48,$tp88             # tp2^tp1^=tp8^tp4^tp1=tp8^tp4^tp2
 
-       rol     \$8,`&LO("$acc0")`      # ROTATE(tp1^tp8,8)
        rol     \$8,`&LO("$acc8")`      # ROTATE(tp1^tp8,8)
        xor     `&LO("$tp80")`,`&LO("$tp10")`
-       xor     `&LO("$tp88")`,`&LO("$tp18")`
        shr     \$32,$tp80
+       xor     `&LO("$tp88")`,`&LO("$tp18")`
        shr     \$32,$tp88
        xor     `&LO("$tp80")`,`&LO("$acc0")`
        xor     `&LO("$tp88")`,`&LO("$acc8")`
 
        mov     $tp20,$tp80
-       mov     $tp28,$tp88
-       shr     \$32,$tp80
-       shr     \$32,$tp88
        rol     \$24,`&LO("$tp20")`     # ROTATE(tp2^tp1^tp8,24)
+       mov     $tp28,$tp88
        rol     \$24,`&LO("$tp28")`     # ROTATE(tp2^tp1^tp8,24)
-       rol     \$24,`&LO("$tp80")`     # ROTATE(tp2^tp1^tp8,24)
-       rol     \$24,`&LO("$tp88")`     # ROTATE(tp2^tp1^tp8,24)
+       shr     \$32,$tp80
        xor     `&LO("$tp20")`,`&LO("$tp10")`
+       shr     \$32,$tp88
        xor     `&LO("$tp28")`,`&LO("$tp18")`
+       rol     \$24,`&LO("$tp80")`     # ROTATE(tp2^tp1^tp8,24)
        mov     $tp40,$tp20
+       rol     \$24,`&LO("$tp88")`     # ROTATE(tp2^tp1^tp8,24)
        mov     $tp48,$tp28
+       shr     \$32,$tp20
        xor     `&LO("$tp80")`,`&LO("$acc0")`
+       shr     \$32,$tp28
        xor     `&LO("$tp88")`,`&LO("$acc8")`
 
        `"mov   0($sbox),$mask80"       if ($prefetch)`
-       shr     \$32,$tp20
-       shr     \$32,$tp28
-       `"mov   64($sbox),$maskfe"      if ($prefetch)`
        rol     \$16,`&LO("$tp40")`     # ROTATE(tp4^tp1^tp8,16)
+       `"mov   64($sbox),$maskfe"      if ($prefetch)`
        rol     \$16,`&LO("$tp48")`     # ROTATE(tp4^tp1^tp8,16)
        `"mov   128($sbox),$mask1b"     if ($prefetch)`
        rol     \$16,`&LO("$tp20")`     # ROTATE(tp4^tp1^tp8,16)
-       rol     \$16,`&LO("$tp28")`     # ROTATE(tp4^tp1^tp8,16)
        `"mov   192($sbox),$tp80"       if ($prefetch)`
        xor     `&LO("$tp40")`,`&LO("$tp10")`
+       rol     \$16,`&LO("$tp28")`     # ROTATE(tp4^tp1^tp8,16)
        xor     `&LO("$tp48")`,`&LO("$tp18")`
        `"mov   256($sbox),$tp88"       if ($prefetch)`
        xor     `&LO("$tp20")`,`&LO("$acc0")`
@@ -1302,10 +1300,6 @@ private_AES_set_encrypt_key:
 
        call    _x86_64_AES_set_encrypt_key
 
-       mov     8(%rsp),%r15
-       mov     16(%rsp),%r14
-       mov     24(%rsp),%r13
-       mov     32(%rsp),%r12
        mov     40(%rsp),%rbp
        mov     48(%rsp),%rbx
        add     \$56,%rsp
diff --git a/crypto/aes/asm/aesni-mb-x86_64.pl b/crypto/aes/asm/aesni-mb-x86_64.pl
new file mode 100644 (file)
index 0000000..33b1aed
--- /dev/null
@@ -0,0 +1,1395 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+
+# Multi-buffer AES-NI procedures process several independent buffers
+# in parallel by interleaving independent instructions.
+#
+# Cycles per byte for interleave factor 4:
+#
+#                      asymptotic      measured
+#                      ---------------------------
+# Westmere             5.00/4=1.25     5.13/4=1.28
+# Atom                 15.0/4=3.75     ?15.7/4=3.93
+# Sandy Bridge         5.06/4=1.27     5.18/4=1.29
+# Ivy Bridge           5.06/4=1.27     5.14/4=1.29
+# Haswell              4.44/4=1.11     4.44/4=1.11
+# Bulldozer            5.75/4=1.44     5.76/4=1.44
+#
+# Cycles per byte for interleave factor 8 (not implemented for
+# pre-AVX processors, where higher interleave factor incidentally
+# doesn't result in improvement):
+#
+#                      asymptotic      measured
+#                      ---------------------------
+# Sandy Bridge         5.06/8=0.64     7.10/8=0.89(*)
+# Ivy Bridge           5.06/8=0.64     7.14/8=0.89(*)
+# Haswell              5.00/8=0.63     5.00/8=0.63
+# Bulldozer            5.75/8=0.72     5.77/8=0.72
+#
+# (*)  Sandy/Ivy Bridge are known to handle high interleave factors
+#      suboptimally;
+
+$flavour = shift;
+$output  = shift;
+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
+
+$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
+die "can't locate x86_64-xlate.pl";
+
+$avx=0;
+
+if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
+               =~ /GNU assembler version ([2-9]\.[0-9]+)/) {
+       $avx = ($1>=2.19) + ($1>=2.22);
+}
+
+if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
+          `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) {
+       $avx = ($1>=2.09) + ($1>=2.10);
+}
+
+if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
+          `ml64 2>&1` =~ /Version ([0-9]+)\./) {
+       $avx = ($1>=10) + ($1>=11);
+}
+
+if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/) {
+       $avx = ($2>=3.0) + ($2>3.0);
+}
+
+open OUT,"| \"$^X\" $xlate $flavour $output";
+*STDOUT=*OUT;
+
+# void aesni_multi_cbc_encrypt (
+#     struct { void *inp,*out; int blocks; double iv[2]; } inp[8];
+#     const AES_KEY *key,
+#     int num);                /* 1 or 2 */
+#
+$inp="%rdi";   # 1st arg
+$key="%rsi";   # 2nd arg
+$num="%edx";
+
+@inptr=map("%r$_",(8..11));
+@outptr=map("%r$_",(12..15));
+
+($rndkey0,$rndkey1)=("%xmm0","%xmm1");
+@out=map("%xmm$_",(2..5));
+@inp=map("%xmm$_",(6..9));
+($counters,$mask,$zero)=map("%xmm$_",(10..12));
+
+($rounds,$one,$sink,$offset)=("%eax","%ecx","%rbp","%rbx");
+
+$code.=<<___;
+.text
+
+.extern        OPENSSL_ia32cap_P
+
+.globl aesni_multi_cbc_encrypt
+.type  aesni_multi_cbc_encrypt,\@function,3
+.align 32
+aesni_multi_cbc_encrypt:
+___
+$code.=<<___ if ($avx);
+       cmp     \$2,$num
+       jb      .Lenc_non_avx
+       mov     OPENSSL_ia32cap_P+4(%rip),%ecx
+       test    \$`1<<28`,%ecx                  # AVX bit
+       jnz     _avx_cbc_enc_shortcut
+       jmp     .Lenc_non_avx
+.align 16
+.Lenc_non_avx:
+___
+$code.=<<___;
+       mov     %rsp,%rax
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+___
+$code.=<<___ if ($win64);
+       lea     -0xa8(%rsp),%rsp
+       movaps  %xmm6,(%rsp)
+       movaps  %xmm7,0x10(%rsp)
+       movaps  %xmm8,0x20(%rsp)
+       movaps  %xmm9,0x30(%rsp)
+       movaps  %xmm10,0x40(%rsp)
+       movaps  %xmm11,0x50(%rsp)
+       movaps  %xmm12,0x60(%rsp)
+       movaps  %xmm13,-0x68(%rax)      # not used, saved to share se_handler 
+       movaps  %xmm14,-0x58(%rax)
+       movaps  %xmm15,-0x48(%rax)
+___
+$code.=<<___;
+       # stack layout
+       #
+       # +0    output sink
+       # +16   input sink [original %rsp and $num]
+       # +32   counters
+
+       sub     \$48,%rsp
+       and     \$-64,%rsp
+       mov     %rax,16(%rsp)                   # original %rsp
+
+.Lenc4x_body:
+       movdqu  ($key),$zero                    # 0-round key
+       lea     0x78($key),$key                 # size optimization
+       lea     40*2($inp),$inp
+
+.Lenc4x_loop_grande:
+       mov     $num,24(%rsp)                   # original $num
+       xor     $num,$num
+___
+for($i=0;$i<4;$i++) {
+    $code.=<<___;
+       mov     `40*$i+16-40*2`($inp),$one      # borrow $one for number of blocks
+       mov     `40*$i+0-40*2`($inp),@inptr[$i]
+       cmp     $num,$one
+       mov     `40*$i+8-40*2`($inp),@outptr[$i]
+       cmovg   $one,$num                       # find maximum
+       test    $one,$one
+       movdqu  `40*$i+24-40*2`($inp),@out[$i]  # load IV
+       mov     $one,`32+4*$i`(%rsp)            # initialize counters
+       cmovle  %rsp,@inptr[$i]                 # cancel input
+___
+}
+$code.=<<___;
+       test    $num,$num
+       jz      .Lenc4x_done
+
+       movups  0x10-0x78($key),$rndkey1
+        pxor   $zero,@out[0]
+       movups  0x20-0x78($key),$rndkey0
+        pxor   $zero,@out[1]
+       mov     0xf0-0x78($key),$rounds
+        pxor   $zero,@out[2]
+       movdqu  (@inptr[0]),@inp[0]             # load inputs
+        pxor   $zero,@out[3]
+       movdqu  (@inptr[1]),@inp[1]
+        pxor   @inp[0],@out[0]
+       movdqu  (@inptr[2]),@inp[2]
+        pxor   @inp[1],@out[1]
+       movdqu  (@inptr[3]),@inp[3]
+        pxor   @inp[2],@out[2]
+        pxor   @inp[3],@out[3]
+       movdqa  32(%rsp),$counters              # load counters
+       xor     $offset,$offset
+       jmp     .Loop_enc4x
+
+.align 32
+.Loop_enc4x:
+       add     \$16,$offset
+       lea     16(%rsp),$sink                  # sink pointer
+       mov     \$1,$one                        # constant of 1
+       sub     $offset,$sink
+
+       aesenc          $rndkey1,@out[0]
+       prefetcht0      31(@inptr[0],$offset)   # prefetch input
+       prefetcht0      31(@inptr[1],$offset)
+       aesenc          $rndkey1,@out[1]
+       prefetcht0      31(@inptr[2],$offset)
+       prefetcht0      31(@inptr[2],$offset)
+       aesenc          $rndkey1,@out[2]
+       aesenc          $rndkey1,@out[3]
+       movups          0x30-0x78($key),$rndkey1
+___
+for($i=0;$i<4;$i++) {
+my $rndkey = ($i&1) ? $rndkey1 : $rndkey0;
+$code.=<<___;
+        cmp            `32+4*$i`(%rsp),$one
+       aesenc          $rndkey,@out[0]
+       aesenc          $rndkey,@out[1]
+       aesenc          $rndkey,@out[2]
+        cmovge         $sink,@inptr[$i]        # cancel input
+        cmovg          $sink,@outptr[$i]       # sink output
+       aesenc          $rndkey,@out[3]
+       movups          `0x40+16*$i-0x78`($key),$rndkey
+___
+}
+$code.=<<___;
+        movdqa         $counters,$mask
+       aesenc          $rndkey0,@out[0]
+       prefetcht0      15(@outptr[0],$offset)  # prefetch output
+       prefetcht0      15(@outptr[1],$offset)
+       aesenc          $rndkey0,@out[1]
+       prefetcht0      15(@outptr[2],$offset)
+       prefetcht0      15(@outptr[3],$offset)
+       aesenc          $rndkey0,@out[2]
+       aesenc          $rndkey0,@out[3]
+       movups          0x80-0x78($key),$rndkey0
+        pxor           $zero,$zero
+
+       aesenc          $rndkey1,@out[0]
+        pcmpgtd        $zero,$mask
+        movdqu         -0x78($key),$zero       # reload 0-round key
+       aesenc          $rndkey1,@out[1]
+        paddd          $mask,$counters         # decrement counters
+        movdqa         $counters,32(%rsp)      # update counters
+       aesenc          $rndkey1,@out[2]
+       aesenc          $rndkey1,@out[3]
+       movups          0x90-0x78($key),$rndkey1
+
+       cmp     \$11,$rounds
+
+       aesenc          $rndkey0,@out[0]
+       aesenc          $rndkey0,@out[1]
+       aesenc          $rndkey0,@out[2]
+       aesenc          $rndkey0,@out[3]
+       movups          0xa0-0x78($key),$rndkey0
+
+       jb      .Lenc4x_tail
+
+       aesenc          $rndkey1,@out[0]
+       aesenc          $rndkey1,@out[1]
+       aesenc          $rndkey1,@out[2]
+       aesenc          $rndkey1,@out[3]
+       movups          0xb0-0x78($key),$rndkey1
+
+       aesenc          $rndkey0,@out[0]
+       aesenc          $rndkey0,@out[1]
+       aesenc          $rndkey0,@out[2]
+       aesenc          $rndkey0,@out[3]
+       movups          0xc0-0x78($key),$rndkey0
+
+       je      .Lenc4x_tail
+
+       aesenc          $rndkey1,@out[0]
+       aesenc          $rndkey1,@out[1]
+       aesenc          $rndkey1,@out[2]
+       aesenc          $rndkey1,@out[3]
+       movups          0xd0-0x78($key),$rndkey1
+
+       aesenc          $rndkey0,@out[0]
+       aesenc          $rndkey0,@out[1]
+       aesenc          $rndkey0,@out[2]
+       aesenc          $rndkey0,@out[3]
+       movups          0xe0-0x78($key),$rndkey0
+       jmp     .Lenc4x_tail
+
+.align 32
+.Lenc4x_tail:
+       aesenc          $rndkey1,@out[0]
+       aesenc          $rndkey1,@out[1]
+       aesenc          $rndkey1,@out[2]
+       aesenc          $rndkey1,@out[3]
+        movdqu         (@inptr[0],$offset),@inp[0]
+       movdqu          0x10-0x78($key),$rndkey1
+
+       aesenclast      $rndkey0,@out[0]
+        movdqu         (@inptr[1],$offset),@inp[1]
+        pxor           $zero,@inp[0]
+       aesenclast      $rndkey0,@out[1]
+        movdqu         (@inptr[2],$offset),@inp[2]
+        pxor           $zero,@inp[1]
+       aesenclast      $rndkey0,@out[2]
+        movdqu         (@inptr[3],$offset),@inp[3]
+        pxor           $zero,@inp[2]
+       aesenclast      $rndkey0,@out[3]
+       movdqu          0x20-0x78($key),$rndkey0
+        pxor           $zero,@inp[3]
+
+       movups          @out[0],-16(@outptr[0],$offset)
+        pxor           @inp[0],@out[0]
+       movups          @out[1],-16(@outptr[1],$offset) 
+        pxor           @inp[1],@out[1]
+       movups          @out[2],-16(@outptr[2],$offset) 
+        pxor           @inp[2],@out[2]
+       movups          @out[3],-16(@outptr[3],$offset)
+        pxor           @inp[3],@out[3]
+
+       dec     $num
+       jnz     .Loop_enc4x
+
+       mov     16(%rsp),%rax                   # original %rsp
+       mov     24(%rsp),$num
+
+       #pxor   @inp[0],@out[0]
+       #pxor   @inp[1],@out[1]
+       #movdqu @out[0],`40*0+24-40*2`($inp)    # output iv FIX ME!
+       #pxor   @inp[2],@out[2]
+       #movdqu @out[1],`40*1+24-40*2`($inp)
+       #pxor   @inp[3],@out[3]
+       #movdqu @out[2],`40*2+24-40*2`($inp)    # won't fix, let caller
+       #movdqu @out[3],`40*3+24-40*2`($inp)    # figure this out...
+
+       lea     `40*4`($inp),$inp
+       dec     $num
+       jnz     .Lenc4x_loop_grande
+
+.Lenc4x_done:
+___
+$code.=<<___ if ($win64);
+       movaps  -0xd8(%rax),%xmm6
+       movaps  -0xc8(%rax),%xmm7
+       movaps  -0xb8(%rax),%xmm8
+       movaps  -0xa8(%rax),%xmm9
+       movaps  -0x98(%rax),%xmm10
+       movaps  -0x88(%rax),%xmm11
+       movaps  -0x78(%rax),%xmm12
+       #movaps -0x68(%rax),%xmm13
+       #movaps -0x58(%rax),%xmm14
+       #movaps -0x48(%rax),%xmm15
+___
+$code.=<<___;
+       mov     -48(%rax),%r15
+       mov     -40(%rax),%r14
+       mov     -32(%rax),%r13
+       mov     -24(%rax),%r12
+       mov     -16(%rax),%rbp
+       mov     -8(%rax),%rbx
+       lea     (%rax),%rsp
+.Lenc4x_epilogue:
+       ret
+.size  aesni_multi_cbc_encrypt,.-aesni_multi_cbc_encrypt
+
+.globl aesni_multi_cbc_decrypt
+.type  aesni_multi_cbc_decrypt,\@function,3
+.align 32
+aesni_multi_cbc_decrypt:
+___
+$code.=<<___ if ($avx);
+       cmp     \$2,$num
+       jb      .Ldec_non_avx
+       mov     OPENSSL_ia32cap_P+4(%rip),%ecx
+       test    \$`1<<28`,%ecx                  # AVX bit
+       jnz     _avx_cbc_dec_shortcut
+       jmp     .Ldec_non_avx
+.align 16
+.Ldec_non_avx:
+___
+$code.=<<___;
+       mov     %rsp,%rax
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+___
+$code.=<<___ if ($win64);
+       lea     -0xa8(%rsp),%rsp
+       movaps  %xmm6,(%rsp)
+       movaps  %xmm7,0x10(%rsp)
+       movaps  %xmm8,0x20(%rsp)
+       movaps  %xmm9,0x30(%rsp)
+       movaps  %xmm10,0x40(%rsp)
+       movaps  %xmm11,0x50(%rsp)
+       movaps  %xmm12,0x60(%rsp)
+       movaps  %xmm13,-0x68(%rax)      # not used, saved to share se_handler 
+       movaps  %xmm14,-0x58(%rax)
+       movaps  %xmm15,-0x48(%rax)
+___
+$code.=<<___;
+       # stack layout
+       #
+       # +0    output sink
+       # +16   input sink [original %rsp and $num]
+       # +32   counters
+
+       sub     \$48,%rsp
+       and     \$-64,%rsp
+       mov     %rax,16(%rsp)                   # original %rsp
+
+.Ldec4x_body:
+       movdqu  ($key),$zero                    # 0-round key
+       lea     0x78($key),$key                 # size optimization
+       lea     40*2($inp),$inp
+
+.Ldec4x_loop_grande:
+       mov     $num,24(%rsp)                   # original $num
+       xor     $num,$num
+___
+for($i=0;$i<4;$i++) {
+    $code.=<<___;
+       mov     `40*$i+16-40*2`($inp),$one      # borrow $one for number of blocks
+       mov     `40*$i+0-40*2`($inp),@inptr[$i]
+       cmp     $num,$one
+       mov     `40*$i+8-40*2`($inp),@outptr[$i]
+       cmovg   $one,$num                       # find maximum
+       test    $one,$one
+       movdqu  `40*$i+24-40*2`($inp),@inp[$i]  # load IV
+       mov     $one,`32+4*$i`(%rsp)            # initialize counters
+       cmovle  %rsp,@inptr[$i]                 # cancel input
+___
+}
+$code.=<<___;
+       test    $num,$num
+       jz      .Ldec4x_done
+
+       movups  0x10-0x78($key),$rndkey1
+       movups  0x20-0x78($key),$rndkey0
+       mov     0xf0-0x78($key),$rounds
+       movdqu  (@inptr[0]),@out[0]             # load inputs
+       movdqu  (@inptr[1]),@out[1]
+        pxor   $zero,@out[0]
+       movdqu  (@inptr[2]),@out[2]
+        pxor   $zero,@out[1]
+       movdqu  (@inptr[3]),@out[3]
+        pxor   $zero,@out[2]
+        pxor   $zero,@out[3]
+       movdqa  32(%rsp),$counters              # load counters
+       xor     $offset,$offset
+       jmp     .Loop_dec4x
+
+.align 32
+.Loop_dec4x:
+       add     \$16,$offset
+       lea     16(%rsp),$sink                  # sink pointer
+       mov     \$1,$one                        # constant of 1
+       sub     $offset,$sink
+
+       aesdec          $rndkey1,@out[0]
+       prefetcht0      31(@inptr[0],$offset)   # prefetch input
+       prefetcht0      31(@inptr[1],$offset)
+       aesdec          $rndkey1,@out[1]
+       prefetcht0      31(@inptr[2],$offset)
+       prefetcht0      31(@inptr[3],$offset)
+       aesdec          $rndkey1,@out[2]
+       aesdec          $rndkey1,@out[3]
+       movups          0x30-0x78($key),$rndkey1
+___
+for($i=0;$i<4;$i++) {
+my $rndkey = ($i&1) ? $rndkey1 : $rndkey0;
+$code.=<<___;
+        cmp            `32+4*$i`(%rsp),$one
+       aesdec          $rndkey,@out[0]
+       aesdec          $rndkey,@out[1]
+       aesdec          $rndkey,@out[2]
+        cmovge         $sink,@inptr[$i]        # cancel input
+        cmovg          $sink,@outptr[$i]       # sink output
+       aesdec          $rndkey,@out[3]
+       movups          `0x40+16*$i-0x78`($key),$rndkey
+___
+}
+$code.=<<___;
+        movdqa         $counters,$mask
+       aesdec          $rndkey0,@out[0]
+       prefetcht0      15(@outptr[0],$offset)  # prefetch output
+       prefetcht0      15(@outptr[1],$offset)
+       aesdec          $rndkey0,@out[1]
+       prefetcht0      15(@outptr[2],$offset)
+       prefetcht0      15(@outptr[3],$offset)
+       aesdec          $rndkey0,@out[2]
+       aesdec          $rndkey0,@out[3]
+       movups          0x80-0x78($key),$rndkey0
+        pxor           $zero,$zero
+
+       aesdec          $rndkey1,@out[0]
+        pcmpgtd        $zero,$mask
+        movdqu         -0x78($key),$zero       # reload 0-round key
+       aesdec          $rndkey1,@out[1]
+        paddd          $mask,$counters         # decrement counters
+        movdqa         $counters,32(%rsp)      # update counters
+       aesdec          $rndkey1,@out[2]
+       aesdec          $rndkey1,@out[3]
+       movups          0x90-0x78($key),$rndkey1
+
+       cmp     \$11,$rounds
+
+       aesdec          $rndkey0,@out[0]
+       aesdec          $rndkey0,@out[1]
+       aesdec          $rndkey0,@out[2]
+       aesdec          $rndkey0,@out[3]
+       movups          0xa0-0x78($key),$rndkey0
+
+       jb      .Ldec4x_tail
+
+       aesdec          $rndkey1,@out[0]
+       aesdec          $rndkey1,@out[1]
+       aesdec          $rndkey1,@out[2]
+       aesdec          $rndkey1,@out[3]
+       movups          0xb0-0x78($key),$rndkey1
+
+       aesdec          $rndkey0,@out[0]
+       aesdec          $rndkey0,@out[1]
+       aesdec          $rndkey0,@out[2]
+       aesdec          $rndkey0,@out[3]
+       movups          0xc0-0x78($key),$rndkey0
+
+       je      .Ldec4x_tail
+
+       aesdec          $rndkey1,@out[0]
+       aesdec          $rndkey1,@out[1]
+       aesdec          $rndkey1,@out[2]
+       aesdec          $rndkey1,@out[3]
+       movups          0xd0-0x78($key),$rndkey1
+
+       aesdec          $rndkey0,@out[0]
+       aesdec          $rndkey0,@out[1]
+       aesdec          $rndkey0,@out[2]
+       aesdec          $rndkey0,@out[3]
+       movups          0xe0-0x78($key),$rndkey0
+       jmp     .Ldec4x_tail
+
+.align 32
+.Ldec4x_tail:
+       aesdec          $rndkey1,@out[0]
+       aesdec          $rndkey1,@out[1]
+       aesdec          $rndkey1,@out[2]
+        pxor           $rndkey0,@inp[0]
+        pxor           $rndkey0,@inp[1]
+       aesdec          $rndkey1,@out[3]
+       movdqu          0x10-0x78($key),$rndkey1
+        pxor           $rndkey0,@inp[2]
+        pxor           $rndkey0,@inp[3]
+       movdqu          0x20-0x78($key),$rndkey0
+
+       aesdeclast      @inp[0],@out[0]
+       aesdeclast      @inp[1],@out[1]
+        movdqu         -16(@inptr[0],$offset),@inp[0]  # load next IV
+        movdqu         -16(@inptr[1],$offset),@inp[1]
+       aesdeclast      @inp[2],@out[2]
+       aesdeclast      @inp[3],@out[3]
+        movdqu         -16(@inptr[2],$offset),@inp[2]
+        movdqu         -16(@inptr[3],$offset),@inp[3]
+
+       movups          @out[0],-16(@outptr[0],$offset)
+        movdqu         (@inptr[0],$offset),@out[0]
+       movups          @out[1],-16(@outptr[1],$offset) 
+        movdqu         (@inptr[1],$offset),@out[1]
+        pxor           $zero,@out[0]
+       movups          @out[2],-16(@outptr[2],$offset) 
+        movdqu         (@inptr[2],$offset),@out[2]
+        pxor           $zero,@out[1]
+       movups          @out[3],-16(@outptr[3],$offset)
+        movdqu         (@inptr[3],$offset),@out[3]
+        pxor           $zero,@out[2]
+        pxor           $zero,@out[3]
+
+       dec     $num
+       jnz     .Loop_dec4x
+
+       mov     16(%rsp),%rax                   # original %rsp
+       mov     24(%rsp),$num
+
+       lea     `40*4`($inp),$inp
+       dec     $num
+       jnz     .Ldec4x_loop_grande
+
+.Ldec4x_done:
+___
+$code.=<<___ if ($win64);
+       movaps  -0xd8(%rax),%xmm6
+       movaps  -0xc8(%rax),%xmm7
+       movaps  -0xb8(%rax),%xmm8
+       movaps  -0xa8(%rax),%xmm9
+       movaps  -0x98(%rax),%xmm10
+       movaps  -0x88(%rax),%xmm11
+       movaps  -0x78(%rax),%xmm12
+       #movaps -0x68(%rax),%xmm13
+       #movaps -0x58(%rax),%xmm14
+       #movaps -0x48(%rax),%xmm15
+___
+$code.=<<___;
+       mov     -48(%rax),%r15
+       mov     -40(%rax),%r14
+       mov     -32(%rax),%r13
+       mov     -24(%rax),%r12
+       mov     -16(%rax),%rbp
+       mov     -8(%rax),%rbx
+       lea     (%rax),%rsp
+.Ldec4x_epilogue:
+       ret
+.size  aesni_multi_cbc_decrypt,.-aesni_multi_cbc_decrypt
+___
+
+                                               if ($avx) {{{
+my @ptr=map("%r$_",(8..15));
+my $offload=$sink;
+
+my @out=map("%xmm$_",(2..9));
+my @inp=map("%xmm$_",(10..13));
+my ($counters,$zero)=("%xmm14","%xmm15");
+
+$code.=<<___;
+.type  aesni_multi_cbc_encrypt_avx,\@function,3
+.align 32
+aesni_multi_cbc_encrypt_avx:
+_avx_cbc_enc_shortcut:
+       mov     %rsp,%rax
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+___
+$code.=<<___ if ($win64);
+       lea     -0xa8(%rsp),%rsp
+       movaps  %xmm6,(%rsp)
+       movaps  %xmm7,0x10(%rsp)
+       movaps  %xmm8,0x20(%rsp)
+       movaps  %xmm9,0x30(%rsp)
+       movaps  %xmm10,0x40(%rsp)
+       movaps  %xmm11,0x50(%rsp)
+       movaps  %xmm12,-0x78(%rax)
+       movaps  %xmm13,-0x68(%rax)
+       movaps  %xmm14,-0x58(%rax)
+       movaps  %xmm15,-0x48(%rax)
+___
+$code.=<<___;
+       # stack layout
+       #
+       # +0    output sink
+       # +16   input sink [original %rsp and $num]
+       # +32   counters
+       # +64   distances between inputs and outputs
+       # +128  off-load area for @inp[0..3]
+
+       sub     \$192,%rsp
+       and     \$-128,%rsp
+       mov     %rax,16(%rsp)                   # original %rsp
+
+.Lenc8x_body:
+       vzeroupper
+       vmovdqu ($key),$zero                    # 0-round key
+       lea     0x78($key),$key                 # size optimization
+       lea     40*4($inp),$inp
+       shr     \$1,$num
+
+.Lenc8x_loop_grande:
+       #mov    $num,24(%rsp)                   # original $num
+       xor     $num,$num
+___
+for($i=0;$i<8;$i++) {
+  my $temp = $i ? $offload : $offset;
+    $code.=<<___;
+       mov     `40*$i+16-40*4`($inp),$one      # borrow $one for number of blocks
+       mov     `40*$i+0-40*4`($inp),@ptr[$i]   # input pointer
+       cmp     $num,$one
+       mov     `40*$i+8-40*4`($inp),$temp      # output pointer
+       cmovg   $one,$num                       # find maximum
+       test    $one,$one
+       vmovdqu `40*$i+24-40*4`($inp),@out[$i]  # load IV
+       mov     $one,`32+4*$i`(%rsp)            # initialize counters
+       cmovle  %rsp,@ptr[$i]                   # cancel input
+       sub     @ptr[$i],$temp                  # distance between input and output
+       mov     $temp,`64+8*$i`(%rsp)           # initialize distances
+___
+}
+$code.=<<___;
+       test    $num,$num
+       jz      .Lenc8x_done
+
+       vmovups 0x10-0x78($key),$rndkey1
+       vmovups 0x20-0x78($key),$rndkey0
+       mov     0xf0-0x78($key),$rounds
+
+       vpxor   (@ptr[0]),$zero,@inp[0]         # load inputs and xor with 0-round
+        lea    128(%rsp),$offload              # offload area
+       vpxor   (@ptr[1]),$zero,@inp[1]
+       vpxor   (@ptr[2]),$zero,@inp[2]
+       vpxor   (@ptr[3]),$zero,@inp[3]
+        vpxor  @inp[0],@out[0],@out[0]
+       vpxor   (@ptr[4]),$zero,@inp[0]
+        vpxor  @inp[1],@out[1],@out[1]
+       vpxor   (@ptr[5]),$zero,@inp[1]
+        vpxor  @inp[2],@out[2],@out[2]
+       vpxor   (@ptr[6]),$zero,@inp[2]
+        vpxor  @inp[3],@out[3],@out[3]
+       vpxor   (@ptr[7]),$zero,@inp[3]
+        vpxor  @inp[0],@out[4],@out[4]
+       mov     \$1,$one                        # constant of 1
+        vpxor  @inp[1],@out[5],@out[5]
+        vpxor  @inp[2],@out[6],@out[6]
+        vpxor  @inp[3],@out[7],@out[7]
+       jmp     .Loop_enc8x
+
+.align 32
+.Loop_enc8x:
+___
+for($i=0;$i<8;$i++) {
+my $rndkey=($i&1)?$rndkey0:$rndkey1;
+$code.=<<___;
+       vaesenc         $rndkey,@out[0],@out[0]
+        cmp            32+4*$i(%rsp),$one
+___
+$code.=<<___ if ($i);
+        mov            64+8*$i(%rsp),$offset
+___
+$code.=<<___;
+       vaesenc         $rndkey,@out[1],@out[1]
+       prefetcht0      31(@ptr[$i])                    # prefetch input
+       vaesenc         $rndkey,@out[2],@out[2]
+___
+$code.=<<___ if ($i>1);
+       prefetcht0      15(@ptr[$i-2])                  # prefetch output
+___
+$code.=<<___;
+       vaesenc         $rndkey,@out[3],@out[3]
+        lea            (@ptr[$i],$offset),$offset
+        cmovge         %rsp,@ptr[$i]                   # cancel input
+       vaesenc         $rndkey,@out[4],@out[4]
+        cmovg          %rsp,$offset                    # sink output
+       vaesenc         $rndkey,@out[5],@out[5]
+        sub            @ptr[$i],$offset
+       vaesenc         $rndkey,@out[6],@out[6]
+        vpxor          16(@ptr[$i]),$zero,@inp[$i%4]   # load input and xor with 0-round
+        mov            $offset,64+8*$i(%rsp)
+       vaesenc         $rndkey,@out[7],@out[7]
+       vmovups         `16*(3+$i)-0x78`($key),$rndkey
+        lea            16(@ptr[$i],$offset),@ptr[$i]   # switch to output
+___
+$code.=<<___ if ($i<4)
+        vmovdqu        @inp[$i%4],`16*$i`($offload)    # off-load
+___
+}
+$code.=<<___;
+        vmovdqu        32(%rsp),$counters
+       prefetcht0      15(@ptr[$i-2])                  # prefetch output
+       prefetcht0      15(@ptr[$i-1])
+       cmp     \$11,$rounds
+       jb      .Lenc8x_tail
+
+       vaesenc         $rndkey1,@out[0],@out[0]
+       vaesenc         $rndkey1,@out[1],@out[1]
+       vaesenc         $rndkey1,@out[2],@out[2]
+       vaesenc         $rndkey1,@out[3],@out[3]
+       vaesenc         $rndkey1,@out[4],@out[4]
+       vaesenc         $rndkey1,@out[5],@out[5]
+       vaesenc         $rndkey1,@out[6],@out[6]
+       vaesenc         $rndkey1,@out[7],@out[7]
+       vmovups         0xb0-0x78($key),$rndkey1
+
+       vaesenc         $rndkey0,@out[0],@out[0]
+       vaesenc         $rndkey0,@out[1],@out[1]
+       vaesenc         $rndkey0,@out[2],@out[2]
+       vaesenc         $rndkey0,@out[3],@out[3]
+       vaesenc         $rndkey0,@out[4],@out[4]
+       vaesenc         $rndkey0,@out[5],@out[5]
+       vaesenc         $rndkey0,@out[6],@out[6]
+       vaesenc         $rndkey0,@out[7],@out[7]
+       vmovups         0xc0-0x78($key),$rndkey0
+       je      .Lenc8x_tail
+
+       vaesenc         $rndkey1,@out[0],@out[0]
+       vaesenc         $rndkey1,@out[1],@out[1]
+       vaesenc         $rndkey1,@out[2],@out[2]
+       vaesenc         $rndkey1,@out[3],@out[3]
+       vaesenc         $rndkey1,@out[4],@out[4]
+       vaesenc         $rndkey1,@out[5],@out[5]
+       vaesenc         $rndkey1,@out[6],@out[6]
+       vaesenc         $rndkey1,@out[7],@out[7]
+       vmovups         0xd0-0x78($key),$rndkey1
+
+       vaesenc         $rndkey0,@out[0],@out[0]
+       vaesenc         $rndkey0,@out[1],@out[1]
+       vaesenc         $rndkey0,@out[2],@out[2]
+       vaesenc         $rndkey0,@out[3],@out[3]
+       vaesenc         $rndkey0,@out[4],@out[4]
+       vaesenc         $rndkey0,@out[5],@out[5]
+       vaesenc         $rndkey0,@out[6],@out[6]
+       vaesenc         $rndkey0,@out[7],@out[7]
+       vmovups         0xe0-0x78($key),$rndkey0
+
+.Lenc8x_tail:
+       vaesenc         $rndkey1,@out[0],@out[0]
+        vpxor          $zero,$zero,$zero
+       vaesenc         $rndkey1,@out[1],@out[1]
+       vaesenc         $rndkey1,@out[2],@out[2]
+        vpcmpgtd       $zero,$counters,$zero
+       vaesenc         $rndkey1,@out[3],@out[3]
+       vaesenc         $rndkey1,@out[4],@out[4]
+        vpaddd         $counters,$zero,$zero           # decrement counters
+        vmovdqu        48(%rsp),$counters
+       vaesenc         $rndkey1,@out[5],@out[5]
+        mov            64(%rsp),$offset                # pre-load 1st offset
+       vaesenc         $rndkey1,@out[6],@out[6]
+       vaesenc         $rndkey1,@out[7],@out[7]
+       vmovups         0x10-0x78($key),$rndkey1
+
+       vaesenclast     $rndkey0,@out[0],@out[0]
+        vmovdqa        $zero,32(%rsp)                  # update counters
+        vpxor          $zero,$zero,$zero
+       vaesenclast     $rndkey0,@out[1],@out[1]
+       vaesenclast     $rndkey0,@out[2],@out[2]
+        vpcmpgtd       $zero,$counters,$zero
+       vaesenclast     $rndkey0,@out[3],@out[3]
+       vaesenclast     $rndkey0,@out[4],@out[4]
+        vpaddd         $zero,$counters,$counters       # decrement counters
+        vmovdqu        -0x78($key),$zero               # 0-round
+       vaesenclast     $rndkey0,@out[5],@out[5]
+       vaesenclast     $rndkey0,@out[6],@out[6]
+        vmovdqa        $counters,48(%rsp)              # update counters
+       vaesenclast     $rndkey0,@out[7],@out[7]
+       vmovups         0x20-0x78($key),$rndkey0
+
+       vmovups         @out[0],-16(@ptr[0])            # write output
+        sub            $offset,@ptr[0]                 # switch to input
+        vpxor          0x00($offload),@out[0],@out[0]
+       vmovups         @out[1],-16(@ptr[1])    
+        sub            `64+1*8`(%rsp),@ptr[1]
+        vpxor          0x10($offload),@out[1],@out[1]
+       vmovups         @out[2],-16(@ptr[2])    
+        sub            `64+2*8`(%rsp),@ptr[2]
+        vpxor          0x20($offload),@out[2],@out[2]
+       vmovups         @out[3],-16(@ptr[3])
+        sub            `64+3*8`(%rsp),@ptr[3]
+        vpxor          0x30($offload),@out[3],@out[3]
+       vmovups         @out[4],-16(@ptr[4])
+        sub            `64+4*8`(%rsp),@ptr[4]
+        vpxor          @inp[0],@out[4],@out[4]
+       vmovups         @out[5],-16(@ptr[5])    
+        sub            `64+5*8`(%rsp),@ptr[5]
+        vpxor          @inp[1],@out[5],@out[5]
+       vmovups         @out[6],-16(@ptr[6])    
+        sub            `64+6*8`(%rsp),@ptr[6]
+        vpxor          @inp[2],@out[6],@out[6]
+       vmovups         @out[7],-16(@ptr[7])
+        sub            `64+7*8`(%rsp),@ptr[7]
+        vpxor          @inp[3],@out[7],@out[7]
+
+       dec     $num
+       jnz     .Loop_enc8x
+
+       mov     16(%rsp),%rax                   # original %rsp
+       #mov    24(%rsp),$num
+       #lea    `40*8`($inp),$inp
+       #dec    $num
+       #jnz    .Lenc8x_loop_grande
+
+.Lenc8x_done:
+       vzeroupper
+___
+$code.=<<___ if ($win64);
+       movaps  -0xd8(%rax),%xmm6
+       movaps  -0xc8(%rax),%xmm7
+       movaps  -0xb8(%rax),%xmm8
+       movaps  -0xa8(%rax),%xmm9
+       movaps  -0x98(%rax),%xmm10
+       movaps  -0x88(%rax),%xmm11
+       movaps  -0x78(%rax),%xmm12
+       movaps  -0x68(%rax),%xmm13
+       movaps  -0x58(%rax),%xmm14
+       movaps  -0x48(%rax),%xmm15
+___
+$code.=<<___;
+       mov     -48(%rax),%r15
+       mov     -40(%rax),%r14
+       mov     -32(%rax),%r13
+       mov     -24(%rax),%r12
+       mov     -16(%rax),%rbp
+       mov     -8(%rax),%rbx
+       lea     (%rax),%rsp
+.Lenc8x_epilogue:
+       ret
+.size  aesni_multi_cbc_encrypt_avx,.-aesni_multi_cbc_encrypt_avx
+
+.type  aesni_multi_cbc_decrypt_avx,\@function,3
+.align 32
+aesni_multi_cbc_decrypt_avx:
+_avx_cbc_dec_shortcut:
+       mov     %rsp,%rax
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+___
+$code.=<<___ if ($win64);
+       lea     -0xa8(%rsp),%rsp
+       movaps  %xmm6,(%rsp)
+       movaps  %xmm7,0x10(%rsp)
+       movaps  %xmm8,0x20(%rsp)
+       movaps  %xmm9,0x30(%rsp)
+       movaps  %xmm10,0x40(%rsp)
+       movaps  %xmm11,0x50(%rsp)
+       movaps  %xmm12,-0x78(%rax)
+       movaps  %xmm13,-0x68(%rax)
+       movaps  %xmm14,-0x58(%rax)
+       movaps  %xmm15,-0x48(%rax)
+___
+$code.=<<___;
+       # stack layout
+       #
+       # +0    output sink
+       # +16   input sink [original %rsp and $num]
+       # +32   counters
+       # +64   distances between inputs and outputs
+       # +128  off-load area for @inp[0..3]
+       # +192  IV/input offload
+
+       sub     \$256,%rsp
+       and     \$-256,%rsp
+       sub     \$192,%rsp
+       mov     %rax,16(%rsp)                   # original %rsp
+
+.Ldec8x_body:
+       vzeroupper
+       vmovdqu ($key),$zero                    # 0-round key
+       lea     0x78($key),$key                 # size optimization
+       lea     40*4($inp),$inp
+       shr     \$1,$num
+
+.Ldec8x_loop_grande:
+       #mov    $num,24(%rsp)                   # original $num
+       xor     $num,$num
+___
+for($i=0;$i<8;$i++) {
+  my $temp = $i ? $offload : $offset;
+    $code.=<<___;
+       mov     `40*$i+16-40*4`($inp),$one      # borrow $one for number of blocks
+       mov     `40*$i+0-40*4`($inp),@ptr[$i]   # input pointer
+       cmp     $num,$one
+       mov     `40*$i+8-40*4`($inp),$temp      # output pointer
+       cmovg   $one,$num                       # find maximum
+       test    $one,$one
+       vmovdqu `40*$i+24-40*4`($inp),@out[$i]  # load IV
+       mov     $one,`32+4*$i`(%rsp)            # initialize counters
+       cmovle  %rsp,@ptr[$i]                   # cancel input
+       sub     @ptr[$i],$temp                  # distance between input and output
+       mov     $temp,`64+8*$i`(%rsp)           # initialize distances
+       vmovdqu @out[$i],`192+16*$i`(%rsp)      # offload IV
+___
+}
+$code.=<<___;
+       test    $num,$num
+       jz      .Ldec8x_done
+
+       vmovups 0x10-0x78($key),$rndkey1
+       vmovups 0x20-0x78($key),$rndkey0
+       mov     0xf0-0x78($key),$rounds
+        lea    192+128(%rsp),$offload          # offload area
+
+       vmovdqu (@ptr[0]),@out[0]               # load inputs
+       vmovdqu (@ptr[1]),@out[1]
+       vmovdqu (@ptr[2]),@out[2]
+       vmovdqu (@ptr[3]),@out[3]
+       vmovdqu (@ptr[4]),@out[4]
+       vmovdqu (@ptr[5]),@out[5]
+       vmovdqu (@ptr[6]),@out[6]
+       vmovdqu (@ptr[7]),@out[7]
+       vmovdqu @out[0],0x00($offload)          # offload inputs
+       vpxor   $zero,@out[0],@out[0]           # xor inputs with 0-round
+       vmovdqu @out[1],0x10($offload)
+       vpxor   $zero,@out[1],@out[1]
+       vmovdqu @out[2],0x20($offload)
+       vpxor   $zero,@out[2],@out[2]
+       vmovdqu @out[3],0x30($offload)
+       vpxor   $zero,@out[3],@out[3]
+       vmovdqu @out[4],0x40($offload)
+       vpxor   $zero,@out[4],@out[4]
+       vmovdqu @out[5],0x50($offload)
+       vpxor   $zero,@out[5],@out[5]
+       vmovdqu @out[6],0x60($offload)
+       vpxor   $zero,@out[6],@out[6]
+       vmovdqu @out[7],0x70($offload)
+       vpxor   $zero,@out[7],@out[7]
+       xor     \$0x80,$offload
+       mov     \$1,$one                        # constant of 1
+       jmp     .Loop_dec8x
+
+.align 32
+.Loop_dec8x:
+___
+for($i=0;$i<8;$i++) {
+my $rndkey=($i&1)?$rndkey0:$rndkey1;
+$code.=<<___;
+       vaesdec         $rndkey,@out[0],@out[0]
+        cmp            32+4*$i(%rsp),$one
+___
+$code.=<<___ if ($i);
+        mov            64+8*$i(%rsp),$offset
+___
+$code.=<<___;
+       vaesdec         $rndkey,@out[1],@out[1]
+       prefetcht0      31(@ptr[$i])                    # prefetch input
+       vaesdec         $rndkey,@out[2],@out[2]
+___
+$code.=<<___ if ($i>1);
+       prefetcht0      15(@ptr[$i-2])                  # prefetch output
+___
+$code.=<<___;
+       vaesdec         $rndkey,@out[3],@out[3]
+        lea            (@ptr[$i],$offset),$offset
+        cmovge         %rsp,@ptr[$i]                   # cancel input
+       vaesdec         $rndkey,@out[4],@out[4]
+        cmovg          %rsp,$offset                    # sink output
+       vaesdec         $rndkey,@out[5],@out[5]
+        sub            @ptr[$i],$offset
+       vaesdec         $rndkey,@out[6],@out[6]
+        vmovdqu        16(@ptr[$i]),@inp[$i%4]         # load input
+        mov            $offset,64+8*$i(%rsp)
+       vaesdec         $rndkey,@out[7],@out[7]
+       vmovups         `16*(3+$i)-0x78`($key),$rndkey
+        lea            16(@ptr[$i],$offset),@ptr[$i]   # switch to output
+___
+$code.=<<___ if ($i<4);
+        vmovdqu        @inp[$i%4],`128+16*$i`(%rsp)    # off-load
+___
+}
+$code.=<<___;
+        vmovdqu        32(%rsp),$counters
+       prefetcht0      15(@ptr[$i-2])                  # prefetch output
+       prefetcht0      15(@ptr[$i-1])
+       cmp     \$11,$rounds
+       jb      .Ldec8x_tail
+
+       vaesdec         $rndkey1,@out[0],@out[0]
+       vaesdec         $rndkey1,@out[1],@out[1]
+       vaesdec         $rndkey1,@out[2],@out[2]
+       vaesdec         $rndkey1,@out[3],@out[3]
+       vaesdec         $rndkey1,@out[4],@out[4]
+       vaesdec         $rndkey1,@out[5],@out[5]
+       vaesdec         $rndkey1,@out[6],@out[6]
+       vaesdec         $rndkey1,@out[7],@out[7]
+       vmovups         0xb0-0x78($key),$rndkey1
+
+       vaesdec         $rndkey0,@out[0],@out[0]
+       vaesdec         $rndkey0,@out[1],@out[1]
+       vaesdec         $rndkey0,@out[2],@out[2]
+       vaesdec         $rndkey0,@out[3],@out[3]
+       vaesdec         $rndkey0,@out[4],@out[4]
+       vaesdec         $rndkey0,@out[5],@out[5]
+       vaesdec         $rndkey0,@out[6],@out[6]
+       vaesdec         $rndkey0,@out[7],@out[7]
+       vmovups         0xc0-0x78($key),$rndkey0
+       je      .Ldec8x_tail
+
+       vaesdec         $rndkey1,@out[0],@out[0]
+       vaesdec         $rndkey1,@out[1],@out[1]
+       vaesdec         $rndkey1,@out[2],@out[2]
+       vaesdec         $rndkey1,@out[3],@out[3]
+       vaesdec         $rndkey1,@out[4],@out[4]
+       vaesdec         $rndkey1,@out[5],@out[5]
+       vaesdec         $rndkey1,@out[6],@out[6]
+       vaesdec         $rndkey1,@out[7],@out[7]
+       vmovups         0xd0-0x78($key),$rndkey1
+
+       vaesdec         $rndkey0,@out[0],@out[0]
+       vaesdec         $rndkey0,@out[1],@out[1]
+       vaesdec         $rndkey0,@out[2],@out[2]
+       vaesdec         $rndkey0,@out[3],@out[3]
+       vaesdec         $rndkey0,@out[4],@out[4]
+       vaesdec         $rndkey0,@out[5],@out[5]
+       vaesdec         $rndkey0,@out[6],@out[6]
+       vaesdec         $rndkey0,@out[7],@out[7]
+       vmovups         0xe0-0x78($key),$rndkey0
+
+.Ldec8x_tail:
+       vaesdec         $rndkey1,@out[0],@out[0]
+        vpxor          $zero,$zero,$zero
+       vaesdec         $rndkey1,@out[1],@out[1]
+       vaesdec         $rndkey1,@out[2],@out[2]
+        vpcmpgtd       $zero,$counters,$zero
+       vaesdec         $rndkey1,@out[3],@out[3]
+       vaesdec         $rndkey1,@out[4],@out[4]
+        vpaddd         $counters,$zero,$zero           # decrement counters
+        vmovdqu        48(%rsp),$counters
+       vaesdec         $rndkey1,@out[5],@out[5]
+        mov            64(%rsp),$offset                # pre-load 1st offset
+       vaesdec         $rndkey1,@out[6],@out[6]
+       vaesdec         $rndkey1,@out[7],@out[7]
+       vmovups         0x10-0x78($key),$rndkey1
+
+       vaesdeclast     $rndkey0,@out[0],@out[0]
+        vmovdqa        $zero,32(%rsp)                  # update counters
+        vpxor          $zero,$zero,$zero
+       vaesdeclast     $rndkey0,@out[1],@out[1]
+       vpxor           0x00($offload),@out[0],@out[0]  # xor with IV
+       vaesdeclast     $rndkey0,@out[2],@out[2]
+       vpxor           0x10($offload),@out[1],@out[1]
+        vpcmpgtd       $zero,$counters,$zero
+       vaesdeclast     $rndkey0,@out[3],@out[3]
+       vpxor           0x20($offload),@out[2],@out[2]
+       vaesdeclast     $rndkey0,@out[4],@out[4]
+       vpxor           0x30($offload),@out[3],@out[3]
+        vpaddd         $zero,$counters,$counters       # decrement counters
+        vmovdqu        -0x78($key),$zero               # 0-round
+       vaesdeclast     $rndkey0,@out[5],@out[5]
+       vpxor           0x40($offload),@out[4],@out[4]
+       vaesdeclast     $rndkey0,@out[6],@out[6]
+       vpxor           0x50($offload),@out[5],@out[5]
+        vmovdqa        $counters,48(%rsp)              # update counters
+       vaesdeclast     $rndkey0,@out[7],@out[7]
+       vpxor           0x60($offload),@out[6],@out[6]
+       vmovups         0x20-0x78($key),$rndkey0
+
+       vmovups         @out[0],-16(@ptr[0])            # write output
+        sub            $offset,@ptr[0]                 # switch to input
+        vmovdqu        128+0(%rsp),@out[0]
+       vpxor           0x70($offload),@out[7],@out[7]
+       vmovups         @out[1],-16(@ptr[1])    
+        sub            `64+1*8`(%rsp),@ptr[1]
+        vmovdqu        @out[0],0x00($offload)
+        vpxor          $zero,@out[0],@out[0]
+        vmovdqu        128+16(%rsp),@out[1]
+       vmovups         @out[2],-16(@ptr[2])    
+        sub            `64+2*8`(%rsp),@ptr[2]
+        vmovdqu        @out[1],0x10($offload)
+        vpxor          $zero,@out[1],@out[1]
+        vmovdqu        128+32(%rsp),@out[2]
+       vmovups         @out[3],-16(@ptr[3])
+        sub            `64+3*8`(%rsp),@ptr[3]
+        vmovdqu        @out[2],0x20($offload)
+        vpxor          $zero,@out[2],@out[2]
+        vmovdqu        128+48(%rsp),@out[3]
+       vmovups         @out[4],-16(@ptr[4])
+        sub            `64+4*8`(%rsp),@ptr[4]
+        vmovdqu        @out[3],0x30($offload)
+        vpxor          $zero,@out[3],@out[3]
+        vmovdqu        @inp[0],0x40($offload)
+        vpxor          @inp[0],$zero,@out[4]
+       vmovups         @out[5],-16(@ptr[5])    
+        sub            `64+5*8`(%rsp),@ptr[5]
+        vmovdqu        @inp[1],0x50($offload)
+        vpxor          @inp[1],$zero,@out[5]
+       vmovups         @out[6],-16(@ptr[6])    
+        sub            `64+6*8`(%rsp),@ptr[6]
+        vmovdqu        @inp[2],0x60($offload)
+        vpxor          @inp[2],$zero,@out[6]
+       vmovups         @out[7],-16(@ptr[7])
+        sub            `64+7*8`(%rsp),@ptr[7]
+        vmovdqu        @inp[3],0x70($offload)
+        vpxor          @inp[3],$zero,@out[7]
+
+       xor     \$128,$offload
+       dec     $num
+       jnz     .Loop_dec8x
+
+       mov     16(%rsp),%rax                   # original %rsp
+       #mov    24(%rsp),$num
+       #lea    `40*8`($inp),$inp
+       #dec    $num
+       #jnz    .Ldec8x_loop_grande
+
+.Ldec8x_done:
+       vzeroupper
+___
+$code.=<<___ if ($win64);
+       movaps  -0xd8(%rax),%xmm6
+       movaps  -0xc8(%rax),%xmm7
+       movaps  -0xb8(%rax),%xmm8
+       movaps  -0xa8(%rax),%xmm9
+       movaps  -0x98(%rax),%xmm10
+       movaps  -0x88(%rax),%xmm11
+       movaps  -0x78(%rax),%xmm12
+       movaps  -0x68(%rax),%xmm13
+       movaps  -0x58(%rax),%xmm14
+       movaps  -0x48(%rax),%xmm15
+___
+$code.=<<___;
+       mov     -48(%rax),%r15
+       mov     -40(%rax),%r14
+       mov     -32(%rax),%r13
+       mov     -24(%rax),%r12
+       mov     -16(%rax),%rbp
+       mov     -8(%rax),%rbx
+       lea     (%rax),%rsp
+.Ldec8x_epilogue:
+       ret
+.size  aesni_multi_cbc_decrypt_avx,.-aesni_multi_cbc_decrypt_avx
+___
+                                               }}}
+
+if ($win64) {
+# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
+#              CONTEXT *context,DISPATCHER_CONTEXT *disp)
+$rec="%rcx";
+$frame="%rdx";
+$context="%r8";
+$disp="%r9";
+
+$code.=<<___;
+.extern        __imp_RtlVirtualUnwind
+.type  se_handler,\@abi-omnipotent
+.align 16
+se_handler:
+       push    %rsi
+       push    %rdi
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+       pushfq
+       sub     \$64,%rsp
+
+       mov     120($context),%rax      # pull context->Rax
+       mov     248($context),%rbx      # pull context->Rip
+
+       mov     8($disp),%rsi           # disp->ImageBase
+       mov     56($disp),%r11          # disp->HandlerData
+
+       mov     0(%r11),%r10d           # HandlerData[0]
+       lea     (%rsi,%r10),%r10        # prologue label
+       cmp     %r10,%rbx               # context->Rip<.Lprologue
+       jb      .Lin_prologue
+
+       mov     152($context),%rax      # pull context->Rsp
+
+       mov     4(%r11),%r10d           # HandlerData[1]
+       lea     (%rsi,%r10),%r10        # epilogue label
+       cmp     %r10,%rbx               # context->Rip>=.Lepilogue
+       jae     .Lin_prologue
+
+       mov     16(%rax),%rax           # pull saved stack pointer
+
+       mov     -8(%rax),%rbx
+       mov     -16(%rax),%rbp
+       mov     -24(%rax),%r12
+       mov     -32(%rax),%r13
+       mov     -40(%rax),%r14
+       mov     -48(%rax),%r15
+       mov     %rbx,144($context)      # restore context->Rbx
+       mov     %rbp,160($context)      # restore context->Rbp
+       mov     %r12,216($context)      # restore cotnext->R12
+       mov     %r13,224($context)      # restore cotnext->R13
+       mov     %r14,232($context)      # restore cotnext->R14
+       mov     %r15,240($context)      # restore cotnext->R15
+
+       lea     -56-10*16(%rax),%rsi
+       lea     512($context),%rdi      # &context.Xmm6
+       mov     \$20,%ecx
+       .long   0xa548f3fc              # cld; rep movsq
+
+.Lin_prologue:
+       mov     8(%rax),%rdi
+       mov     16(%rax),%rsi
+       mov     %rax,152($context)      # restore context->Rsp
+       mov     %rsi,168($context)      # restore context->Rsi
+       mov     %rdi,176($context)      # restore context->Rdi
+
+       mov     40($disp),%rdi          # disp->ContextRecord
+       mov     $context,%rsi           # context
+       mov     \$154,%ecx              # sizeof(CONTEXT)
+       .long   0xa548f3fc              # cld; rep movsq
+
+       mov     $disp,%rsi
+       xor     %rcx,%rcx               # arg1, UNW_FLAG_NHANDLER
+       mov     8(%rsi),%rdx            # arg2, disp->ImageBase
+       mov     0(%rsi),%r8             # arg3, disp->ControlPc
+       mov     16(%rsi),%r9            # arg4, disp->FunctionEntry
+       mov     40(%rsi),%r10           # disp->ContextRecord
+       lea     56(%rsi),%r11           # &disp->HandlerData
+       lea     24(%rsi),%r12           # &disp->EstablisherFrame
+       mov     %r10,32(%rsp)           # arg5
+       mov     %r11,40(%rsp)           # arg6
+       mov     %r12,48(%rsp)           # arg7
+       mov     %rcx,56(%rsp)           # arg8, (NULL)
+       call    *__imp_RtlVirtualUnwind(%rip)
+
+       mov     \$1,%eax                # ExceptionContinueSearch
+       add     \$64,%rsp
+       popfq
+       pop     %r15
+       pop     %r14
+       pop     %r13
+       pop     %r12
+       pop     %rbp
+       pop     %rbx
+       pop     %rdi
+       pop     %rsi
+       ret
+.size  se_handler,.-se_handler
+
+.section       .pdata
+.align 4
+       .rva    .LSEH_begin_aesni_multi_cbc_encrypt
+       .rva    .LSEH_end_aesni_multi_cbc_encrypt
+       .rva    .LSEH_info_aesni_multi_cbc_encrypt
+       .rva    .LSEH_begin_aesni_multi_cbc_decrypt
+       .rva    .LSEH_end_aesni_multi_cbc_decrypt
+       .rva    .LSEH_info_aesni_multi_cbc_decrypt
+___
+$code.=<<___ if ($avx);
+       .rva    .LSEH_begin_aesni_multi_cbc_encrypt_avx
+       .rva    .LSEH_end_aesni_multi_cbc_encrypt_avx
+       .rva    .LSEH_info_aesni_multi_cbc_encrypt_avx
+       .rva    .LSEH_begin_aesni_multi_cbc_decrypt_avx
+       .rva    .LSEH_end_aesni_multi_cbc_decrypt_avx
+       .rva    .LSEH_info_aesni_multi_cbc_decrypt_avx
+___
+$code.=<<___;
+.section       .xdata
+.align 8
+.LSEH_info_aesni_multi_cbc_encrypt:
+       .byte   9,0,0,0
+       .rva    se_handler
+       .rva    .Lenc4x_body,.Lenc4x_epilogue           # HandlerData[]
+.LSEH_info_aesni_multi_cbc_decrypt:
+       .byte   9,0,0,0
+       .rva    se_handler
+       .rva    .Ldec4x_body,.Ldec4x_epilogue           # HandlerData[]
+___
+$code.=<<___ if ($avx);
+.LSEH_info_aesni_multi_cbc_encrypt_avx:
+       .byte   9,0,0,0
+       .rva    se_handler
+       .rva    .Lenc8x_body,.Lenc8x_epilogue           # HandlerData[]
+.LSEH_info_aesni_multi_cbc_decrypt_avx:
+       .byte   9,0,0,0
+       .rva    se_handler
+       .rva    .Ldec8x_body,.Ldec8x_epilogue           # HandlerData[]
+___
+}
+####################################################################
+
+sub rex {
+  local *opcode=shift;
+  my ($dst,$src)=@_;
+  my $rex=0;
+
+    $rex|=0x04                 if($dst>=8);
+    $rex|=0x01                 if($src>=8);
+    push @opcode,$rex|0x40     if($rex);
+}
+
+sub aesni {
+  my $line=shift;
+  my @opcode=(0x66);
+
+    if ($line=~/(aeskeygenassist)\s+\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) {
+       rex(\@opcode,$4,$3);
+       push @opcode,0x0f,0x3a,0xdf;
+       push @opcode,0xc0|($3&7)|(($4&7)<<3);   # ModR/M
+       my $c=$2;
+       push @opcode,$c=~/^0/?oct($c):$c;
+       return ".byte\t".join(',',@opcode);
+    }
+    elsif ($line=~/(aes[a-z]+)\s+%xmm([0-9]+),\s*%xmm([0-9]+)/) {
+       my %opcodelet = (
+               "aesimc" => 0xdb,
+               "aesenc" => 0xdc,       "aesenclast" => 0xdd,
+               "aesdec" => 0xde,       "aesdeclast" => 0xdf
+       );
+       return undef if (!defined($opcodelet{$1}));
+       rex(\@opcode,$3,$2);
+       push @opcode,0x0f,0x38,$opcodelet{$1};
+       push @opcode,0xc0|($2&7)|(($3&7)<<3);   # ModR/M
+       return ".byte\t".join(',',@opcode);
+    }
+    elsif ($line=~/(aes[a-z]+)\s+([0x1-9a-fA-F]*)\(%rsp\),\s*%xmm([0-9]+)/) {
+       my %opcodelet = (
+               "aesenc" => 0xdc,       "aesenclast" => 0xdd,
+               "aesdec" => 0xde,       "aesdeclast" => 0xdf
+       );
+       return undef if (!defined($opcodelet{$1}));
+       my $off = $2;
+       push @opcode,0x44 if ($3>=8);
+       push @opcode,0x0f,0x38,$opcodelet{$1};
+       push @opcode,0x44|(($3&7)<<3),0x24;     # ModR/M
+       push @opcode,($off=~/^0/?oct($off):$off)&0xff;
+       return ".byte\t".join(',',@opcode);
+    }
+    return $line;
+}
+
+$code =~ s/\`([^\`]*)\`/eval($1)/gem;
+$code =~ s/\b(aes.*%xmm[0-9]+).*$/aesni($1)/gem;
+
+print $code;
+close STDOUT;
index 3c8f6c1..97992ad 100644 (file)
 # subroutine:
 #
 #              AES-128-CBC     +SHA1           stitch      gain
-# Westmere     3.77[+5.6]      9.37            6.65        +41%
-# Sandy Bridge 5.05[+5.2(6.3)] 10.25(11.35)    6.16(7.08)  +67%(+60%)
+# Westmere     3.77[+5.3]      9.07            6.55        +38%
+# Sandy Bridge 5.05[+5.0(6.1)] 10.06(11.15)    5.98(7.05)  +68%(+58%)
+# Ivy Bridge   5.05[+4.6]      9.65            5.54        +74%
+# Haswell      4.43[+3.6(4.2)] 8.00(8.58)      4.55(5.21)  +75%(+65%)
+# Bulldozer    5.77[+6.0]      11.72           6.37        +84%
 #
 #              AES-192-CBC
-# Westmere     4.51            10.11           6.97        +45%
-# Sandy Bridge 6.05            11.25(12.35)    6.34(7.27)  +77%(+70%)
+# Westmere     4.51            9.81            6.80        +44%
+# Sandy Bridge 6.05            11.06(12.15)    6.11(7.19)  +81%(+69%)
+# Ivy Bridge   6.05            10.65           6.07        +75%
+# Haswell      5.29            8.86(9.44)      5.32(5.32)  +67%(+77%)
+# Bulldozer    6.89            12.84           6.96        +84%
 #
 #              AES-256-CBC
-# Westmere     5.25            10.85           7.25        +50%
-# Sandy Bridge 7.05            12.25(13.35)    7.06(7.70)  +74%(+73%)
+# Westmere     5.25            10.55           7.21        +46%
+# Sandy Bridge 7.05            12.06(13.15)    7.12(7.72)  +69%(+70%)
+# Ivy Bridge   7.05            11.65           7.12        +64%
+# Haswell      6.19            9.76(10.34)     6.21(6.25)  +57%(+65%)
+# Bulldozer    8.00            13.95           8.25        +69%
 #
 # (*)  There are two code paths: SSSE3 and AVX. See sha1-568.pl for
 #      background information. Above numbers in parentheses are SSSE3
 # standalone AESNI-CBC decrypt:
 #
 #              AES-128-CBC     AES-192-CBC     AES-256-CBC
-# Westmere     1.31            1.55            1.80
-# Sandy Bridge 0.93            1.06            1.22
+# Westmere     1.25            1.50            1.75
+# Sandy Bridge 0.74            0.91            1.09
+# Ivy Bridge   0.74            0.90            1.11
+# Haswell      0.63            0.76            0.88
+# Bulldozer    0.70            0.85            0.99
+
+# And indeed:
+#
+#              AES-256-CBC     +SHA1           stitch      gain
+# Westmere     1.75            7.20            6.68        +7.8%
+# Sandy Bridge 1.09            6.09(7.22)      5.82(6.95)  +4.6%(+3.9%)
+# Ivy Bridge   1.11            5.70            5.45        +4.6%
+# Haswell      0.88            4.45(5.00)      4.39(4.69)  +1.4%(*)(+6.6%)
+# Bulldozer    0.99            6.95            5.95        +17%(**)
+#
+# (*)  Tiny improvement coefficient on Haswell is because we compare
+#      AVX1 stitch to sum with AVX2 SHA1.
+# (**) Execution is fully dominated by integer code sequence and
+#      SIMD still hardly shows [in single-process benchmark;-]
 
 $flavour = shift;
 $output  = shift;
@@ -68,6 +94,11 @@ $avx=1 if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
 $avx=1 if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
           `ml64 2>&1` =~ /Version ([0-9]+)\./ &&
           $1>=10);
+$avx=1 if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/ && $2>=3.0);
+
+$shaext=1;     ### set to zero if compiling for 1.0.1
+
+$stitched_decrypt=0;
 
 open OUT,"| \"$^X\" $xlate $flavour $output";
 *STDOUT=*OUT;
@@ -86,11 +117,15 @@ $code.=<<___;
 
 .globl aesni_cbc_sha1_enc
 .type  aesni_cbc_sha1_enc,\@abi-omnipotent
-.align 16
+.align 32
 aesni_cbc_sha1_enc:
        # caller should check for SSSE3 and AES-NI bits
        mov     OPENSSL_ia32cap_P+0(%rip),%r10d
-       mov     OPENSSL_ia32cap_P+4(%rip),%r11d
+       mov     OPENSSL_ia32cap_P+4(%rip),%r11
+___
+$code.=<<___ if ($shaext);
+       bt      \$61,%r11               # check SHA bit
+       jc      aesni_cbc_sha1_enc_shaext
 ___
 $code.=<<___ if ($avx);
        and     \$`1<<28`,%r11d         # mask AVX bit
@@ -112,10 +147,21 @@ my @X=map("%xmm$_",(4..7,0..3));
 my @Tx=map("%xmm$_",(8..10));
 my @V=($A,$B,$C,$D,$E)=("%eax","%ebx","%ecx","%edx","%ebp");   # size optimization
 my @T=("%esi","%edi");
-my $j=0; my $jj=0; my $r=0; my $sn=0;
+my $j=0; my $jj=0; my $r=0; my $sn=0; my $rx=0;
 my $K_XX_XX="%r11";
-my ($iv,$in,$rndkey0)=map("%xmm$_",(11..13));
-my @rndkey=("%xmm14","%xmm15");
+my ($rndkey0,$iv,$in)=map("%xmm$_",(11..13));                  # for enc
+my @rndkey=("%xmm14","%xmm15");                                        # for enc
+my ($inout0,$inout1,$inout2,$inout3)=map("%xmm$_",(12..15));   # for dec
+
+if (1) {       # reassign for Atom Silvermont
+    # The goal is to minimize amount of instructions with more than
+    # 3 prefix bytes. Or in more practical terms to keep AES-NI *and*
+    # SSSE3 instructions to upper half of the register bank.
+    @X=map("%xmm$_",(8..11,4..7));
+    @Tx=map("%xmm$_",(12,13,3));
+    ($iv,$in,$rndkey0)=map("%xmm$_",(2,14,15));
+    @rndkey=("%xmm0","%xmm1");
+}
 
 sub AUTOLOAD()         # thunk [simplified] 32-bit style perlasm
 { my $opcode = $AUTOLOAD; $opcode =~ s/.*:://;
@@ -129,7 +175,7 @@ my $_ror=sub { &ror(@_) };
 
 $code.=<<___;
 .type  aesni_cbc_sha1_enc_ssse3,\@function,6
-.align 16
+.align 32
 aesni_cbc_sha1_enc_ssse3:
        mov     `($win64?56:8)`(%rsp),$inp      # load 7th argument
        #shr    \$6,$len                        # debugging artefact
@@ -161,16 +207,16 @@ $code.=<<___;
        mov     $in0,%r12                       # reassign arguments
        mov     $out,%r13
        mov     $len,%r14
-       mov     $key,%r15
+       lea     112($key),%r15                  # size optimization
        movdqu  ($ivp),$iv                      # load IV
        mov     $ivp,88(%rsp)                   # save $ivp
 ___
-my ($in0,$out,$len,$key)=map("%r$_",(12..15)); # reassign arguments
+($in0,$out,$len,$key)=map("%r$_",(12..15));    # reassign arguments
 my $rounds="${ivp}d";
 $code.=<<___;
        shl     \$6,$len
        sub     $in0,$out
-       mov     240($key),$rounds
+       mov     240-112($key),$rounds
        add     $inp,$len               # end of input
 
        lea     K_XX_XX(%rip),$K_XX_XX
@@ -180,19 +226,22 @@ $code.=<<___;
        mov     12($ctx),$D
        mov     $B,@T[0]                # magic seed
        mov     16($ctx),$E
+       mov     $C,@T[1]
+       xor     $D,@T[1]
+       and     @T[1],@T[0]
 
-       movdqa  64($K_XX_XX),@X[2]      # pbswap mask
+       movdqa  64($K_XX_XX),@Tx[2]     # pbswap mask
        movdqa  0($K_XX_XX),@Tx[1]      # K_00_19
        movdqu  0($inp),@X[-4&7]        # load input to %xmm[0-3]
        movdqu  16($inp),@X[-3&7]
        movdqu  32($inp),@X[-2&7]
        movdqu  48($inp),@X[-1&7]
-       pshufb  @X[2],@X[-4&7]          # byte swap
+       pshufb  @Tx[2],@X[-4&7]         # byte swap
+       pshufb  @Tx[2],@X[-3&7]
+       pshufb  @Tx[2],@X[-2&7]
        add     \$64,$inp
-       pshufb  @X[2],@X[-3&7]
-       pshufb  @X[2],@X[-2&7]
-       pshufb  @X[2],@X[-1&7]
        paddd   @Tx[1],@X[-4&7]         # add K_00_19
+       pshufb  @Tx[2],@X[-1&7]
        paddd   @Tx[1],@X[-3&7]
        paddd   @Tx[1],@X[-2&7]
        movdqa  @X[-4&7],0(%rsp)        # X[]+K xfer to IALU
@@ -201,8 +250,8 @@ $code.=<<___;
        psubd   @Tx[1],@X[-3&7]
        movdqa  @X[-2&7],32(%rsp)
        psubd   @Tx[1],@X[-2&7]
-       movups  ($key),$rndkey0         # $key[0]
-       movups  16($key),$rndkey[0]     # forward reference
+       movups  -112($key),$rndkey0     # $key[0]
+       movups  16-112($key),$rndkey[0] # forward reference
        jmp     .Loop_ssse3
 ___
 
@@ -219,31 +268,31 @@ ___
 ___
       $code.=<<___;
        xorps           $in,$iv
+       movups          `32+16*$k-112`($key),$rndkey[1]
        aesenc          $rndkey[0],$iv
-       movups          `32+16*$k`($key),$rndkey[1]
 ___
     } elsif ($k==9) {
       $sn++;
       $code.=<<___;
        cmp             \$11,$rounds
        jb              .Laesenclast$sn
-       movups          `32+16*($k+0)`($key),$rndkey[1]
+       movups          `32+16*($k+0)-112`($key),$rndkey[1]
        aesenc          $rndkey[0],$iv
-       movups          `32+16*($k+1)`($key),$rndkey[0]
+       movups          `32+16*($k+1)-112`($key),$rndkey[0]
        aesenc          $rndkey[1],$iv
        je              .Laesenclast$sn
-       movups          `32+16*($k+2)`($key),$rndkey[1]
+       movups          `32+16*($k+2)-112`($key),$rndkey[1]
        aesenc          $rndkey[0],$iv
-       movups          `32+16*($k+3)`($key),$rndkey[0]
+       movups          `32+16*($k+3)-112`($key),$rndkey[0]
        aesenc          $rndkey[1],$iv
 .Laesenclast$sn:
        aesenclast      $rndkey[0],$iv
-       movups          16($key),$rndkey[1]             # forward reference
+       movups          16-112($key),$rndkey[1]         # forward reference
 ___
     } else {
       $code.=<<___;
+       movups          `32+16*$k-112`($key),$rndkey[1]
        aesenc          $rndkey[0],$iv
-       movups          `32+16*$k`($key),$rndkey[1]
 ___
     }
     $r++;      unshift(@rndkey,pop(@rndkey));
@@ -255,61 +304,61 @@ sub Xupdate_ssse3_16_31()         # recall that $Xi starts wtih 4
   my @insns = (&$body,&$body,&$body,&$body);   # 40 instructions
   my ($a,$b,$c,$d,$e);
 
-       &movdqa (@X[0],@X[-3&7]);
-        eval(shift(@insns));
+        eval(shift(@insns));           # ror
+       &pshufd (@X[0],@X[-4&7],0xee);  # was &movdqa   (@X[0],@X[-3&7]);
         eval(shift(@insns));
        &movdqa (@Tx[0],@X[-1&7]);
-       &palignr(@X[0],@X[-4&7],8);     # compose "X[-14]" in "X[0]"
+         &paddd        (@Tx[1],@X[-1&7]);
         eval(shift(@insns));
         eval(shift(@insns));
 
-         &paddd        (@Tx[1],@X[-1&7]);
+       &punpcklqdq(@X[0],@X[-3&7]);    # compose "X[-14]" in "X[0]", was &palignr(@X[0],@X[-4&7],8);
         eval(shift(@insns));
+        eval(shift(@insns));           # rol
         eval(shift(@insns));
        &psrldq (@Tx[0],4);             # "X[-3]", 3 dwords
         eval(shift(@insns));
         eval(shift(@insns));
+
        &pxor   (@X[0],@X[-4&7]);       # "X[0]"^="X[-16]"
         eval(shift(@insns));
-        eval(shift(@insns));
-
+        eval(shift(@insns));           # ror
        &pxor   (@Tx[0],@X[-2&7]);      # "X[-3]"^"X[-8]"
         eval(shift(@insns));
         eval(shift(@insns));
         eval(shift(@insns));
-        eval(shift(@insns));
 
        &pxor   (@X[0],@Tx[0]);         # "X[0]"^="X[-3]"^"X[-8]"
         eval(shift(@insns));
-        eval(shift(@insns));
+        eval(shift(@insns));           # rol
          &movdqa       (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU
         eval(shift(@insns));
         eval(shift(@insns));
 
        &movdqa (@Tx[2],@X[0]);
-       &movdqa (@Tx[0],@X[0]);
-        eval(shift(@insns));
         eval(shift(@insns));
         eval(shift(@insns));
+        eval(shift(@insns));           # ror
+       &movdqa (@Tx[0],@X[0]);
         eval(shift(@insns));
 
        &pslldq (@Tx[2],12);            # "X[0]"<<96, extract one dword
        &paddd  (@X[0],@X[0]);
         eval(shift(@insns));
         eval(shift(@insns));
-        eval(shift(@insns));
-        eval(shift(@insns));
 
        &psrld  (@Tx[0],31);
         eval(shift(@insns));
+        eval(shift(@insns));           # rol
         eval(shift(@insns));
        &movdqa (@Tx[1],@Tx[2]);
         eval(shift(@insns));
         eval(shift(@insns));
 
        &psrld  (@Tx[2],30);
-       &por    (@X[0],@Tx[0]);         # "X[0]"<<<=1
         eval(shift(@insns));
+        eval(shift(@insns));           # ror
+       &por    (@X[0],@Tx[0]);         # "X[0]"<<<=1
         eval(shift(@insns));
         eval(shift(@insns));
         eval(shift(@insns));
@@ -317,12 +366,13 @@ sub Xupdate_ssse3_16_31()         # recall that $Xi starts wtih 4
        &pslld  (@Tx[1],2);
        &pxor   (@X[0],@Tx[2]);
         eval(shift(@insns));
-        eval(shift(@insns));
          &movdqa       (@Tx[2],eval(16*(($Xi)/5))."($K_XX_XX)");       # K_XX_XX
+        eval(shift(@insns));           # rol
         eval(shift(@insns));
         eval(shift(@insns));
 
        &pxor   (@X[0],@Tx[1]);         # "X[0]"^=("X[0]">>96)<<<2
+       &pshufd (@Tx[1],@X[-1&7],0xee)  if ($Xi==7);    # was &movdqa   (@Tx[0],@X[-1&7]) in Xupdate_ssse3_32_79
 
         foreach (@insns) { eval; }     # remaining instructions [if any]
 
@@ -333,27 +383,30 @@ sub Xupdate_ssse3_16_31()         # recall that $Xi starts wtih 4
 sub Xupdate_ssse3_32_79()
 { use integer;
   my $body = shift;
-  my @insns = (&$body,&$body,&$body,&$body);   # 32 to 48 instructions
+  my @insns = (&$body,&$body,&$body,&$body);   # 32 to 44 instructions
   my ($a,$b,$c,$d,$e);
 
-       &movdqa (@Tx[0],@X[-1&7])       if ($Xi==8);
-        eval(shift(@insns));           # body_20_39
+        eval(shift(@insns))            if ($Xi==8);
        &pxor   (@X[0],@X[-4&7]);       # "X[0]"="X[-32]"^"X[-16]"
-       &palignr(@Tx[0],@X[-2&7],8);    # compose "X[-6]"
+        eval(shift(@insns))            if ($Xi==8);
+        eval(shift(@insns));           # body_20_39
         eval(shift(@insns));
+        eval(shift(@insns))            if (@insns[1] =~ /_ror/);
+        eval(shift(@insns))            if (@insns[0] =~ /_ror/);
+       &punpcklqdq(@Tx[0],@X[-1&7]);   # compose "X[-6]", was &palignr(@Tx[0],@X[-2&7],8);
         eval(shift(@insns));
         eval(shift(@insns));           # rol
 
        &pxor   (@X[0],@X[-7&7]);       # "X[0]"^="X[-28]"
         eval(shift(@insns));
-        eval(shift(@insns))    if (@insns[0] !~ /&ro[rl]/);
+        eval(shift(@insns));
        if ($Xi%5) {
          &movdqa       (@Tx[2],@Tx[1]);# "perpetuate" K_XX_XX...
        } else {                        # ... or load next one
          &movdqa       (@Tx[2],eval(16*($Xi/5))."($K_XX_XX)");
        }
-         &paddd        (@Tx[1],@X[-1&7]);
         eval(shift(@insns));           # ror
+         &paddd        (@Tx[1],@X[-1&7]);
         eval(shift(@insns));
 
        &pxor   (@X[0],@Tx[0]);         # "X[0]"^="X[-6]"
@@ -361,29 +414,31 @@ sub Xupdate_ssse3_32_79()
         eval(shift(@insns));
         eval(shift(@insns));
         eval(shift(@insns));           # rol
+        eval(shift(@insns))            if (@insns[0] =~ /_ror/);
 
        &movdqa (@Tx[0],@X[0]);
-         &movdqa       (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU
         eval(shift(@insns));
         eval(shift(@insns));
+         &movdqa       (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU
         eval(shift(@insns));           # ror
         eval(shift(@insns));
+        eval(shift(@insns));           # body_20_39
 
        &pslld  (@X[0],2);
-        eval(shift(@insns));           # body_20_39
         eval(shift(@insns));
-       &psrld  (@Tx[0],30);
         eval(shift(@insns));
-        eval(shift(@insns));           # rol
+       &psrld  (@Tx[0],30);
+        eval(shift(@insns))            if (@insns[0] =~ /_rol/);# rol
         eval(shift(@insns));
         eval(shift(@insns));
         eval(shift(@insns));           # ror
-        eval(shift(@insns));
 
        &por    (@X[0],@Tx[0]);         # "X[0]"<<<=2
-        eval(shift(@insns));           # body_20_39
         eval(shift(@insns));
-         &movdqa       (@Tx[1],@X[0])  if ($Xi<19);
+        eval(shift(@insns));           # body_20_39
+        eval(shift(@insns))            if (@insns[1] =~ /_rol/);
+        eval(shift(@insns))            if (@insns[0] =~ /_rol/);
+         &pshufd(@Tx[1],@X[-1&7],0xee) if ($Xi<19);    # was &movdqa   (@Tx[1],@X[0])
         eval(shift(@insns));
         eval(shift(@insns));           # rol
         eval(shift(@insns));
@@ -404,10 +459,11 @@ sub Xuplast_ssse3_80()
   my ($a,$b,$c,$d,$e);
 
         eval(shift(@insns));
-         &paddd        (@Tx[1],@X[-1&7]);
         eval(shift(@insns));
         eval(shift(@insns));
         eval(shift(@insns));
+         &paddd        (@Tx[1],@X[-1&7]);
+        eval(shift(@insns));
         eval(shift(@insns));
 
          &movdqa       (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer IALU
@@ -415,17 +471,17 @@ sub Xuplast_ssse3_80()
         foreach (@insns) { eval; }             # remaining instructions
 
        &cmp    ($inp,$len);
-       &je     (".Ldone_ssse3");
+       &je     (shift);
 
        unshift(@Tx,pop(@Tx));
 
-       &movdqa (@X[2],"64($K_XX_XX)");         # pbswap mask
+       &movdqa (@Tx[2],"64($K_XX_XX)");        # pbswap mask
        &movdqa (@Tx[1],"0($K_XX_XX)");         # K_00_19
        &movdqu (@X[-4&7],"0($inp)");           # load input
        &movdqu (@X[-3&7],"16($inp)");
        &movdqu (@X[-2&7],"32($inp)");
        &movdqu (@X[-1&7],"48($inp)");
-       &pshufb (@X[-4&7],@X[2]);               # byte swap
+       &pshufb (@X[-4&7],@Tx[2]);              # byte swap
        &add    ($inp,64);
 
   $Xi=0;
@@ -439,7 +495,10 @@ sub Xloop_ssse3()
 
         eval(shift(@insns));
         eval(shift(@insns));
-       &pshufb (@X[($Xi-3)&7],@X[2]);
+        eval(shift(@insns));
+       &pshufb (@X[($Xi-3)&7],@Tx[2]);
+        eval(shift(@insns));
+        eval(shift(@insns));
         eval(shift(@insns));
         eval(shift(@insns));
        &paddd  (@X[($Xi-4)&7],@Tx[1]);
@@ -450,6 +509,8 @@ sub Xloop_ssse3()
        &movdqa (eval(16*$Xi)."(%rsp)",@X[($Xi-4)&7]);  # X[]+K xfer to IALU
         eval(shift(@insns));
         eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
        &psubd  (@X[($Xi-4)&7],@Tx[1]);
 
        foreach (@insns) { eval; }
@@ -465,76 +526,106 @@ sub Xtail_ssse3()
        foreach (@insns) { eval; }
 }
 
-sub body_00_19 () {
-  use integer;
-  my ($k,$n);
-  my @r=(
+my @body_00_19 = (
        '($a,$b,$c,$d,$e)=@V;'.
-       '&add   ($e,eval(4*($j&15))."(%rsp)");',        # X[]+K xfer
-       '&xor   ($c,$d);',
-       '&mov   (@T[1],$a);',   # $b in next round
-       '&$_rol ($a,5);',
-       '&and   (@T[0],$c);',   # ($b&($c^$d))
-       '&xor   ($c,$d);',      # restore $c
-       '&xor   (@T[0],$d);',
-       '&add   ($e,$a);',
        '&$_ror ($b,$j?7:2);',  # $b>>>2
-       '&add   ($e,@T[0]);'    .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));'
+       '&xor   (@T[0],$d);',
+       '&mov   (@T[1],$a);',   # $b for next round
+
+       '&add   ($e,eval(4*($j&15))."(%rsp)");',# X[]+K xfer
+       '&xor   ($b,$c);',      # $c^$d for next round
+
+       '&$_rol ($a,5);',
+       '&add   ($e,@T[0]);',
+       '&and   (@T[1],$b);',   # ($b&($c^$d)) for next round
+
+       '&xor   ($b,$c);',      # restore $b
+       '&add   ($e,$a);'       .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));'
        );
+
+sub body_00_19 () {    # ((c^d)&b)^d
+    # on start @T[0]=(c^d)&b
+    return &body_20_39() if ($rx==19); $rx++;
+
+    use integer;
+    my ($k,$n);
+    my @r=@body_00_19;
+
        $n = scalar(@r);
        $k = (($jj+1)*12/20)*20*$n/12;  # 12 aesencs per these 20 rounds
        @r[$k%$n].='&$aesenc();'        if ($jj==$k/$n);
        $jj++;
+
     return @r;
 }
 
-sub body_20_39 () {
-  use integer;
-  my ($k,$n);
-  my @r=(
+my @body_20_39 = (
        '($a,$b,$c,$d,$e)=@V;'.
-       '&add   ($e,eval(4*($j++&15))."(%rsp)");',      # X[]+K xfer
-       '&xor   (@T[0],$d);',   # ($b^$d)
-       '&mov   (@T[1],$a);',   # $b in next round
+       '&add   ($e,eval(4*($j&15))."(%rsp)");',# X[]+K xfer
+       '&xor   (@T[0],$d)      if($j==19);'.
+       '&xor   (@T[0],$c)      if($j> 19);',   # ($b^$d^$c)
+       '&mov   (@T[1],$a);',   # $b for next round
+
        '&$_rol ($a,5);',
-       '&xor   (@T[0],$c);',   # ($b^$d^$c)
-       '&add   ($e,$a);',
+       '&add   ($e,@T[0]);',
+       '&xor   (@T[1],$c)      if ($j< 79);',  # $b^$d for next round
+
        '&$_ror ($b,7);',       # $b>>>2
-       '&add   ($e,@T[0]);'    .'unshift(@V,pop(@V)); unshift(@T,pop(@T));'
+       '&add   ($e,$a);'       .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));'
        );
+
+sub body_20_39 () {    # b^d^c
+    # on entry @T[0]=b^d
+    return &body_40_59() if ($rx==39); $rx++;
+
+    use integer;
+    my ($k,$n);
+    my @r=@body_20_39;
+
        $n = scalar(@r);
        $k = (($jj+1)*8/20)*20*$n/8;    # 8 aesencs per these 20 rounds
-       @r[$k%$n].='&$aesenc();'        if ($jj==$k/$n);
+       @r[$k%$n].='&$aesenc();'        if ($jj==$k/$n && $rx!=20);
        $jj++;
+
     return @r;
 }
 
-sub body_40_59 () {
-  use integer;
-  my ($k,$n);
-  my @r=(
+my @body_40_59 = (
        '($a,$b,$c,$d,$e)=@V;'.
-       '&mov   (@T[1],$c);',
-       '&xor   ($c,$d);',
-       '&add   ($e,eval(4*($j++&15))."(%rsp)");',      # X[]+K xfer
-       '&and   (@T[1],$d);',
-       '&and   (@T[0],$c);',   # ($b&($c^$d))
+       '&add   ($e,eval(4*($j&15))."(%rsp)");',# X[]+K xfer
+       '&and   (@T[0],$c)      if ($j>=40);',  # (b^c)&(c^d)
+       '&xor   ($c,$d)         if ($j>=40);',  # restore $c
+
        '&$_ror ($b,7);',       # $b>>>2
-       '&add   ($e,@T[1]);',
-       '&mov   (@T[1],$a);',   # $b in next round
+       '&mov   (@T[1],$a);',   # $b for next round
+       '&xor   (@T[0],$c);',
+
        '&$_rol ($a,5);',
        '&add   ($e,@T[0]);',
-       '&xor   ($c,$d);',      # restore $c
-       '&add   ($e,$a);'       .'unshift(@V,pop(@V)); unshift(@T,pop(@T));'
+       '&xor   (@T[1],$c)      if ($j==59);'.
+       '&xor   (@T[1],$b)      if ($j< 59);',  # b^c for next round
+
+       '&xor   ($b,$c)         if ($j< 59);',  # c^d for next round
+       '&add   ($e,$a);'       .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));'
        );
+
+sub body_40_59 () {    # ((b^c)&(c^d))^c
+    # on entry @T[0]=(b^c), (c^=d)
+    $rx++;
+
+    use integer;
+    my ($k,$n);
+    my @r=@body_40_59;
+
        $n = scalar(@r);
        $k=(($jj+1)*12/20)*20*$n/12;    # 12 aesencs per these 20 rounds
-       @r[$k%$n].='&$aesenc();'        if ($jj==$k/$n);
+       @r[$k%$n].='&$aesenc();'        if ($jj==$k/$n && $rx!=40);
        $jj++;
+
     return @r;
 }
 $code.=<<___;
-.align 16
+.align 32
 .Loop_ssse3:
 ___
        &Xupdate_ssse3_16_31(\&body_00_19);
@@ -553,7 +644,7 @@ ___
        &Xupdate_ssse3_32_79(\&body_40_59);
        &Xupdate_ssse3_32_79(\&body_40_59);
        &Xupdate_ssse3_32_79(\&body_20_39);
-       &Xuplast_ssse3_80(\&body_20_39);        # can jump to "done"
+       &Xuplast_ssse3_80(\&body_20_39,".Ldone_ssse3"); # can jump to "done"
 
                                $saved_j=$j; @saved_V=@V;
                                $saved_r=$r; @saved_rndkey=@rndkey;
@@ -575,11 +666,13 @@ $code.=<<___;
        mov     @T[0],4($ctx)
        mov     @T[0],$B                        # magic seed
        mov     $C,8($ctx)
+       mov     $C,@T[1]
        mov     $D,12($ctx)
+       xor     $D,@T[1]
        mov     $E,16($ctx)
+       and     @T[1],@T[0]
        jmp     .Loop_ssse3
 
-.align 16
 .Ldone_ssse3:
 ___
                                $jj=$j=$saved_j; @V=@saved_V;
@@ -631,7 +724,278 @@ $code.=<<___;
 .size  aesni_cbc_sha1_enc_ssse3,.-aesni_cbc_sha1_enc_ssse3
 ___
 
-$j=$jj=$r=$sn=0;
+                                               if ($stitched_decrypt) {{{
+# reset
+($in0,$out,$len,$key,$ivp,$ctx,$inp)=("%rdi","%rsi","%rdx","%rcx","%r8","%r9","%r10");
+$j=$jj=$r=$rx=0;
+$Xi=4;
+
+# reassign for Atom Silvermont (see above)
+($inout0,$inout1,$inout2,$inout3,$rndkey0)=map("%xmm$_",(0..4));
+@X=map("%xmm$_",(8..13,6,7));
+@Tx=map("%xmm$_",(14,15,5));
+
+my @aes256_dec = (
+       '&movdqu($inout0,"0x00($in0)");',
+       '&movdqu($inout1,"0x10($in0)"); &pxor   ($inout0,$rndkey0);',
+       '&movdqu($inout2,"0x20($in0)"); &pxor   ($inout1,$rndkey0);',
+       '&movdqu($inout3,"0x30($in0)"); &pxor   ($inout2,$rndkey0);',
+
+       '&pxor  ($inout3,$rndkey0);     &movups ($rndkey0,"16-112($key)");',
+       '&movaps("64(%rsp)",@X[2]);',   # save IV, originally @X[3]
+       undef,undef
+       );
+for ($i=0;$i<13;$i++) {
+    push (@aes256_dec,(
+       '&aesdec        ($inout0,$rndkey0);',
+       '&aesdec        ($inout1,$rndkey0);',
+       '&aesdec        ($inout2,$rndkey0);',
+       '&aesdec        ($inout3,$rndkey0);     &movups($rndkey0,"'.(16*($i+2)-112).'($key)");'
+       ));
+    push (@aes256_dec,(undef,undef))   if (($i>=3 && $i<=5) || $i>=11);
+    push (@aes256_dec,(undef,undef))   if ($i==5);
+}
+push(@aes256_dec,(
+       '&aesdeclast    ($inout0,$rndkey0);     &movups (@X[0],"0x00($in0)");',
+       '&aesdeclast    ($inout1,$rndkey0);     &movups (@X[1],"0x10($in0)");',
+       '&aesdeclast    ($inout2,$rndkey0);     &movups (@X[2],"0x20($in0)");',
+       '&aesdeclast    ($inout3,$rndkey0);     &movups (@X[3],"0x30($in0)");',
+
+       '&xorps         ($inout0,"64(%rsp)");   &movdqu ($rndkey0,"-112($key)");',
+       '&xorps         ($inout1,@X[0]);        &movups ("0x00($out,$in0)",$inout0);',
+       '&xorps         ($inout2,@X[1]);        &movups ("0x10($out,$in0)",$inout1);',
+       '&xorps         ($inout3,@X[2]);        &movups ("0x20($out,$in0)",$inout2);',
+
+       '&movups        ("0x30($out,$in0)",$inout3);'
+       ));
+
+sub body_00_19_dec () {        # ((c^d)&b)^d
+    # on start @T[0]=(c^d)&b
+    return &body_20_39_dec() if ($rx==19);
+
+    my @r=@body_00_19;
+
+       unshift (@r,@aes256_dec[$rx])   if (@aes256_dec[$rx]);
+       $rx++;
+
+    return @r;
+}
+
+sub body_20_39_dec () {        # b^d^c
+    # on entry @T[0]=b^d
+    return &body_40_59_dec() if ($rx==39);
+  
+    my @r=@body_20_39;
+
+       unshift (@r,@aes256_dec[$rx])   if (@aes256_dec[$rx]);
+       $rx++;
+
+    return @r;
+}
+
+sub body_40_59_dec () {        # ((b^c)&(c^d))^c
+    # on entry @T[0]=(b^c), (c^=d)
+
+    my @r=@body_40_59;
+
+       unshift (@r,@aes256_dec[$rx])   if (@aes256_dec[$rx]);
+       $rx++;
+
+    return @r;
+}
+
+$code.=<<___;
+.globl aesni256_cbc_sha1_dec
+.type  aesni256_cbc_sha1_dec,\@abi-omnipotent
+.align 32
+aesni256_cbc_sha1_dec:
+       # caller should check for SSSE3 and AES-NI bits
+       mov     OPENSSL_ia32cap_P+0(%rip),%r10d
+       mov     OPENSSL_ia32cap_P+4(%rip),%r11d
+___
+$code.=<<___ if ($avx);
+       and     \$`1<<28`,%r11d         # mask AVX bit
+       and     \$`1<<30`,%r10d         # mask "Intel CPU" bit
+       or      %r11d,%r10d
+       cmp     \$`1<<28|1<<30`,%r10d
+       je      aesni256_cbc_sha1_dec_avx
+___
+$code.=<<___;
+       jmp     aesni256_cbc_sha1_dec_ssse3
+       ret
+.size  aesni256_cbc_sha1_dec,.-aesni256_cbc_sha1_dec
+
+.type  aesni256_cbc_sha1_dec_ssse3,\@function,6
+.align 32
+aesni256_cbc_sha1_dec_ssse3:
+       mov     `($win64?56:8)`(%rsp),$inp      # load 7th argument
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+       lea     `-104-($win64?10*16:0)`(%rsp),%rsp
+___
+$code.=<<___ if ($win64);
+       movaps  %xmm6,96+0(%rsp)
+       movaps  %xmm7,96+16(%rsp)
+       movaps  %xmm8,96+32(%rsp)
+       movaps  %xmm9,96+48(%rsp)
+       movaps  %xmm10,96+64(%rsp)
+       movaps  %xmm11,96+80(%rsp)
+       movaps  %xmm12,96+96(%rsp)
+       movaps  %xmm13,96+112(%rsp)
+       movaps  %xmm14,96+128(%rsp)
+       movaps  %xmm15,96+144(%rsp)
+.Lprologue_dec_ssse3:
+___
+$code.=<<___;
+       mov     $in0,%r12                       # reassign arguments
+       mov     $out,%r13
+       mov     $len,%r14
+       lea     112($key),%r15                  # size optimization
+       movdqu  ($ivp),@X[3]                    # load IV
+       #mov    $ivp,88(%rsp)                   # save $ivp
+___
+($in0,$out,$len,$key)=map("%r$_",(12..15));    # reassign arguments
+$code.=<<___;
+       shl     \$6,$len
+       sub     $in0,$out
+       add     $inp,$len               # end of input
+
+       lea     K_XX_XX(%rip),$K_XX_XX
+       mov     0($ctx),$A              # load context
+       mov     4($ctx),$B
+       mov     8($ctx),$C
+       mov     12($ctx),$D
+       mov     $B,@T[0]                # magic seed
+       mov     16($ctx),$E
+       mov     $C,@T[1]
+       xor     $D,@T[1]
+       and     @T[1],@T[0]
+
+       movdqa  64($K_XX_XX),@Tx[2]     # pbswap mask
+       movdqa  0($K_XX_XX),@Tx[1]      # K_00_19
+       movdqu  0($inp),@X[-4&7]        # load input to %xmm[0-3]
+       movdqu  16($inp),@X[-3&7]
+       movdqu  32($inp),@X[-2&7]
+       movdqu  48($inp),@X[-1&7]
+       pshufb  @Tx[2],@X[-4&7]         # byte swap
+       add     \$64,$inp
+       pshufb  @Tx[2],@X[-3&7]
+       pshufb  @Tx[2],@X[-2&7]
+       pshufb  @Tx[2],@X[-1&7]
+       paddd   @Tx[1],@X[-4&7]         # add K_00_19
+       paddd   @Tx[1],@X[-3&7]
+       paddd   @Tx[1],@X[-2&7]
+       movdqa  @X[-4&7],0(%rsp)        # X[]+K xfer to IALU
+       psubd   @Tx[1],@X[-4&7]         # restore X[]
+       movdqa  @X[-3&7],16(%rsp)
+       psubd   @Tx[1],@X[-3&7]
+       movdqa  @X[-2&7],32(%rsp)
+       psubd   @Tx[1],@X[-2&7]
+       movdqu  -112($key),$rndkey0     # $key[0]
+       jmp     .Loop_dec_ssse3
+
+.align 32
+.Loop_dec_ssse3:
+___
+       &Xupdate_ssse3_16_31(\&body_00_19_dec);
+       &Xupdate_ssse3_16_31(\&body_00_19_dec);
+       &Xupdate_ssse3_16_31(\&body_00_19_dec);
+       &Xupdate_ssse3_16_31(\&body_00_19_dec);
+       &Xupdate_ssse3_32_79(\&body_00_19_dec);
+       &Xupdate_ssse3_32_79(\&body_20_39_dec);
+       &Xupdate_ssse3_32_79(\&body_20_39_dec);
+       &Xupdate_ssse3_32_79(\&body_20_39_dec);
+       &Xupdate_ssse3_32_79(\&body_20_39_dec);
+       &Xupdate_ssse3_32_79(\&body_20_39_dec);
+       &Xupdate_ssse3_32_79(\&body_40_59_dec);
+       &Xupdate_ssse3_32_79(\&body_40_59_dec);
+       &Xupdate_ssse3_32_79(\&body_40_59_dec);
+       &Xupdate_ssse3_32_79(\&body_40_59_dec);
+       &Xupdate_ssse3_32_79(\&body_40_59_dec);
+       &Xupdate_ssse3_32_79(\&body_20_39_dec);
+       &Xuplast_ssse3_80(\&body_20_39_dec,".Ldone_dec_ssse3"); # can jump to "done"
+
+                               $saved_j=$j;   @saved_V=@V;
+                               $saved_rx=$rx;
+
+       &Xloop_ssse3(\&body_20_39_dec);
+       &Xloop_ssse3(\&body_20_39_dec);
+       &Xloop_ssse3(\&body_20_39_dec);
+
+       eval(@aes256_dec[-1]);                  # last store
+$code.=<<___;
+       lea     64($in0),$in0
+
+       add     0($ctx),$A                      # update context
+       add     4($ctx),@T[0]
+       add     8($ctx),$C
+       add     12($ctx),$D
+       mov     $A,0($ctx)
+       add     16($ctx),$E
+       mov     @T[0],4($ctx)
+       mov     @T[0],$B                        # magic seed
+       mov     $C,8($ctx)
+       mov     $C,@T[1]
+       mov     $D,12($ctx)
+       xor     $D,@T[1]
+       mov     $E,16($ctx)
+       and     @T[1],@T[0]
+       jmp     .Loop_dec_ssse3
+
+.Ldone_dec_ssse3:
+___
+                               $jj=$j=$saved_j; @V=@saved_V;
+                               $rx=$saved_rx;
+
+       &Xtail_ssse3(\&body_20_39_dec);
+       &Xtail_ssse3(\&body_20_39_dec);
+       &Xtail_ssse3(\&body_20_39_dec);
+
+       eval(@aes256_dec[-1]);                  # last store
+$code.=<<___;
+       add     0($ctx),$A                      # update context
+       add     4($ctx),@T[0]
+       add     8($ctx),$C
+       mov     $A,0($ctx)
+       add     12($ctx),$D
+       mov     @T[0],4($ctx)
+       add     16($ctx),$E
+       mov     $C,8($ctx)
+       mov     $D,12($ctx)
+       mov     $E,16($ctx)
+       movups  @X[3],($ivp)                    # write IV
+___
+$code.=<<___ if ($win64);
+       movaps  96+0(%rsp),%xmm6
+       movaps  96+16(%rsp),%xmm7
+       movaps  96+32(%rsp),%xmm8
+       movaps  96+48(%rsp),%xmm9
+       movaps  96+64(%rsp),%xmm10
+       movaps  96+80(%rsp),%xmm11
+       movaps  96+96(%rsp),%xmm12
+       movaps  96+112(%rsp),%xmm13
+       movaps  96+128(%rsp),%xmm14
+       movaps  96+144(%rsp),%xmm15
+___
+$code.=<<___;
+       lea     `104+($win64?10*16:0)`(%rsp),%rsi
+       mov     0(%rsi),%r15
+       mov     8(%rsi),%r14
+       mov     16(%rsi),%r13
+       mov     24(%rsi),%r12
+       mov     32(%rsi),%rbp
+       mov     40(%rsi),%rbx
+       lea     48(%rsi),%rsp
+.Lepilogue_dec_ssse3:
+       ret
+.size  aesni256_cbc_sha1_dec_ssse3,.-aesni256_cbc_sha1_dec_ssse3
+___
+                                               }}}
+$j=$jj=$r=$rx=0;
 
 if ($avx) {
 my ($in0,$out,$len,$key,$ivp,$ctx,$inp)=("%rdi","%rsi","%rdx","%rcx","%r8","%r9","%r10");
@@ -641,13 +1005,17 @@ my @X=map("%xmm$_",(4..7,0..3));
 my @Tx=map("%xmm$_",(8..10));
 my @V=($A,$B,$C,$D,$E)=("%eax","%ebx","%ecx","%edx","%ebp");   # size optimization
 my @T=("%esi","%edi");
+my ($rndkey0,$iv,$in)=map("%xmm$_",(11..13));
+my @rndkey=("%xmm14","%xmm15");
+my ($inout0,$inout1,$inout2,$inout3)=map("%xmm$_",(12..15));   # for dec
+my $Kx=@Tx[2];
 
 my $_rol=sub { &shld(@_[0],@_) };
 my $_ror=sub { &shrd(@_[0],@_) };
 
 $code.=<<___;
 .type  aesni_cbc_sha1_enc_avx,\@function,6
-.align 16
+.align 32
 aesni_cbc_sha1_enc_avx:
        mov     `($win64?56:8)`(%rsp),$inp      # load 7th argument
        #shr    \$6,$len                        # debugging artefact
@@ -680,17 +1048,16 @@ $code.=<<___;
        mov     $in0,%r12                       # reassign arguments
        mov     $out,%r13
        mov     $len,%r14
-       mov     $key,%r15
+       lea     112($key),%r15                  # size optimization
        vmovdqu ($ivp),$iv                      # load IV
        mov     $ivp,88(%rsp)                   # save $ivp
 ___
-my ($in0,$out,$len,$key)=map("%r$_",(12..15)); # reassign arguments
+($in0,$out,$len,$key)=map("%r$_",(12..15));    # reassign arguments
 my $rounds="${ivp}d";
 $code.=<<___;
        shl     \$6,$len
        sub     $in0,$out
-       mov     240($key),$rounds
-       add     \$112,$key              # size optimization
+       mov     240-112($key),$rounds
        add     $inp,$len               # end of input
 
        lea     K_XX_XX(%rip),$K_XX_XX
@@ -700,9 +1067,12 @@ $code.=<<___;
        mov     12($ctx),$D
        mov     $B,@T[0]                # magic seed
        mov     16($ctx),$E
+       mov     $C,@T[1]
+       xor     $D,@T[1]
+       and     @T[1],@T[0]
 
        vmovdqa 64($K_XX_XX),@X[2]      # pbswap mask
-       vmovdqa 0($K_XX_XX),@Tx[1]      # K_00_19
+       vmovdqa 0($K_XX_XX),$Kx         # K_00_19
        vmovdqu 0($inp),@X[-4&7]        # load input to %xmm[0-3]
        vmovdqu 16($inp),@X[-3&7]
        vmovdqu 32($inp),@X[-2&7]
@@ -712,13 +1082,13 @@ $code.=<<___;
        vpshufb @X[2],@X[-3&7],@X[-3&7]
        vpshufb @X[2],@X[-2&7],@X[-2&7]
        vpshufb @X[2],@X[-1&7],@X[-1&7]
-       vpaddd  @Tx[1],@X[-4&7],@X[0]   # add K_00_19
-       vpaddd  @Tx[1],@X[-3&7],@X[1]
-       vpaddd  @Tx[1],@X[-2&7],@X[2]
+       vpaddd  $Kx,@X[-4&7],@X[0]      # add K_00_19
+       vpaddd  $Kx,@X[-3&7],@X[1]
+       vpaddd  $Kx,@X[-2&7],@X[2]
        vmovdqa @X[0],0(%rsp)           # X[]+K xfer to IALU
        vmovdqa @X[1],16(%rsp)
        vmovdqa @X[2],32(%rsp)
-       vmovups -112($key),$rndkey0     # $key[0]
+       vmovups -112($key),$rndkey[1]   # $key[0]
        vmovups 16-112($key),$rndkey[0] # forward reference
        jmp     .Loop_avx
 ___
@@ -728,14 +1098,14 @@ my $aesenc=sub {
   my ($n,$k)=($r/10,$r%10);
     if ($k==0) {
       $code.=<<___;
-       vmovups         `16*$n`($in0),$in               # load input
-       vxorps          $rndkey0,$in,$in
+       vmovdqu         `16*$n`($in0),$in               # load input
+       vpxor           $rndkey[1],$in,$in
 ___
       $code.=<<___ if ($n);
        vmovups         $iv,`16*($n-1)`($out,$in0)      # write output
 ___
       $code.=<<___;
-       vxorps          $in,$iv,$iv
+       vpxor           $in,$iv,$iv
        vaesenc         $rndkey[0],$iv,$iv
        vmovups         `32+16*$k-112`($key),$rndkey[1]
 ___
@@ -755,6 +1125,7 @@ ___
        vmovups         `32+16*($k+3)-112`($key),$rndkey[0]
 .Lvaesenclast$sn:
        vaesenclast     $rndkey[0],$iv,$iv
+       vmovups         -112($key),$rndkey[0]
        vmovups         16-112($key),$rndkey[1]         # forward reference
 ___
     } else {
@@ -778,10 +1149,10 @@ sub Xupdate_avx_16_31()          # recall that $Xi starts wtih 4
         eval(shift(@insns));
         eval(shift(@insns));
 
-         &vpaddd       (@Tx[1],@Tx[1],@X[-1&7]);
+         &vpaddd       (@Tx[1],$Kx,@X[-1&7]);
         eval(shift(@insns));
         eval(shift(@insns));
-       &vpsrldq(@Tx[0],@X[-1&7],4);    # "X[-3]", 3 dwords
+       &vpsrldq(@Tx[0],@X[-1&7],4);            # "X[-3]", 3 dwords
         eval(shift(@insns));
         eval(shift(@insns));
        &vpxor  (@X[0],@X[0],@X[-4&7]);         # "X[0]"^="X[-16]"
@@ -807,31 +1178,31 @@ sub Xupdate_avx_16_31()          # recall that $Xi starts wtih 4
         eval(shift(@insns));
         eval(shift(@insns));
 
-       &vpslldq(@Tx[2],@X[0],12);              # "X[0]"<<96, extract one dword
+       &vpslldq(@Tx[1],@X[0],12);              # "X[0]"<<96, extract one dword
        &vpaddd (@X[0],@X[0],@X[0]);
         eval(shift(@insns));
         eval(shift(@insns));
         eval(shift(@insns));
         eval(shift(@insns));
 
-       &vpsrld (@Tx[1],@Tx[2],30);
        &vpor   (@X[0],@X[0],@Tx[0]);           # "X[0]"<<<=1
+       &vpsrld (@Tx[0],@Tx[1],30);
         eval(shift(@insns));
         eval(shift(@insns));
         eval(shift(@insns));
         eval(shift(@insns));
 
-       &vpslld (@Tx[2],@Tx[2],2);
-       &vpxor  (@X[0],@X[0],@Tx[1]);
+       &vpslld (@Tx[1],@Tx[1],2);
+       &vpxor  (@X[0],@X[0],@Tx[0]);
         eval(shift(@insns));
         eval(shift(@insns));
         eval(shift(@insns));
         eval(shift(@insns));
 
-       &vpxor  (@X[0],@X[0],@Tx[2]);           # "X[0]"^=("X[0]">>96)<<<2
+       &vpxor  (@X[0],@X[0],@Tx[1]);           # "X[0]"^=("X[0]">>96)<<<2
         eval(shift(@insns));
         eval(shift(@insns));
-         &vmovdqa      (@Tx[2],eval(16*(($Xi)/5))."($K_XX_XX)");       # K_XX_XX
+         &vmovdqa      ($Kx,eval(16*(($Xi)/5))."($K_XX_XX)")   if ($Xi%5==0);  # K_XX_XX
         eval(shift(@insns));
         eval(shift(@insns));
 
@@ -839,7 +1210,6 @@ sub Xupdate_avx_16_31()            # recall that $Xi starts wtih 4
         foreach (@insns) { eval; }     # remaining instructions [if any]
 
   $Xi++;       push(@X,shift(@X));     # "rotate" X[]
-               push(@Tx,shift(@Tx));
 }
 
 sub Xupdate_avx_32_79()
@@ -858,12 +1228,8 @@ sub Xupdate_avx_32_79()
        &vpxor  (@X[0],@X[0],@X[-7&7]);         # "X[0]"^="X[-28]"
         eval(shift(@insns));
         eval(shift(@insns))    if (@insns[0] !~ /&ro[rl]/);
-       if ($Xi%5) {
-         &vmovdqa      (@Tx[2],@Tx[1]);# "perpetuate" K_XX_XX...
-       } else {                        # ... or load next one
-         &vmovdqa      (@Tx[2],eval(16*($Xi/5))."($K_XX_XX)");
-       }
-         &vpaddd       (@Tx[1],@Tx[1],@X[-1&7]);
+         &vpaddd       (@Tx[1],$Kx,@X[-1&7]);
+         &vmovdqa      ($Kx,eval(16*($Xi/5))."($K_XX_XX)")     if ($Xi%5==0);
         eval(shift(@insns));           # ror
         eval(shift(@insns));
 
@@ -893,7 +1259,6 @@ sub Xupdate_avx_32_79()
        &vpor   (@X[0],@X[0],@Tx[0]);           # "X[0]"<<<=2
         eval(shift(@insns));           # body_20_39
         eval(shift(@insns));
-         &vmovdqa      (@Tx[1],@X[0])  if ($Xi<19);
         eval(shift(@insns));
         eval(shift(@insns));           # rol
         eval(shift(@insns));
@@ -904,7 +1269,6 @@ sub Xupdate_avx_32_79()
         foreach (@insns) { eval; }     # remaining instructions
 
   $Xi++;       push(@X,shift(@X));     # "rotate" X[]
-               push(@Tx,shift(@Tx));
 }
 
 sub Xuplast_avx_80()
@@ -914,28 +1278,26 @@ sub Xuplast_avx_80()
   my ($a,$b,$c,$d,$e);
 
         eval(shift(@insns));
-         &vpaddd       (@Tx[1],@Tx[1],@X[-1&7]);
+         &vpaddd       (@Tx[1],$Kx,@X[-1&7]);
         eval(shift(@insns));
         eval(shift(@insns));
         eval(shift(@insns));
         eval(shift(@insns));
 
-         &movdqa       (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer IALU
+         &vmovdqa      (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer IALU
 
         foreach (@insns) { eval; }             # remaining instructions
 
        &cmp    ($inp,$len);
-       &je     (".Ldone_avx");
+       &je     (shift);
 
-       unshift(@Tx,pop(@Tx));
-
-       &vmovdqa(@X[2],"64($K_XX_XX)");         # pbswap mask
-       &vmovdqa(@Tx[1],"0($K_XX_XX)");         # K_00_19
+       &vmovdqa(@Tx[1],"64($K_XX_XX)");        # pbswap mask
+       &vmovdqa($Kx,"0($K_XX_XX)");            # K_00_19
        &vmovdqu(@X[-4&7],"0($inp)");           # load input
        &vmovdqu(@X[-3&7],"16($inp)");
        &vmovdqu(@X[-2&7],"32($inp)");
        &vmovdqu(@X[-1&7],"48($inp)");
-       &vpshufb(@X[-4&7],@X[-4&7],@X[2]);      # byte swap
+       &vpshufb(@X[-4&7],@X[-4&7],@Tx[1]);     # byte swap
        &add    ($inp,64);
 
   $Xi=0;
@@ -949,15 +1311,15 @@ sub Xloop_avx()
 
         eval(shift(@insns));
         eval(shift(@insns));
-       &vpshufb(@X[($Xi-3)&7],@X[($Xi-3)&7],@X[2]);
+       &vpshufb(@X[($Xi-3)&7],@X[($Xi-3)&7],@Tx[1]);
         eval(shift(@insns));
         eval(shift(@insns));
-       &vpaddd (@X[$Xi&7],@X[($Xi-4)&7],@Tx[1]);
+       &vpaddd (@Tx[0],@X[($Xi-4)&7],$Kx);
         eval(shift(@insns));
         eval(shift(@insns));
         eval(shift(@insns));
         eval(shift(@insns));
-       &vmovdqa(eval(16*$Xi)."(%rsp)",@X[$Xi&7]);      # X[]+K xfer to IALU
+       &vmovdqa(eval(16*$Xi)."(%rsp)",@Tx[0]); # X[]+K xfer to IALU
         eval(shift(@insns));
         eval(shift(@insns));
 
@@ -975,7 +1337,7 @@ sub Xtail_avx()
 }
 
 $code.=<<___;
-.align 16
+.align 32
 .Loop_avx:
 ___
        &Xupdate_avx_16_31(\&body_00_19);
@@ -994,7 +1356,7 @@ ___
        &Xupdate_avx_32_79(\&body_40_59);
        &Xupdate_avx_32_79(\&body_40_59);
        &Xupdate_avx_32_79(\&body_20_39);
-       &Xuplast_avx_80(\&body_20_39);  # can jump to "done"
+       &Xuplast_avx_80(\&body_20_39,".Ldone_avx");     # can jump to "done"
 
                                $saved_j=$j; @saved_V=@V;
                                $saved_r=$r; @saved_rndkey=@rndkey;
@@ -1016,11 +1378,13 @@ $code.=<<___;
        mov     @T[0],4($ctx)
        mov     @T[0],$B                        # magic seed
        mov     $C,8($ctx)
+       mov     $C,@T[1]
        mov     $D,12($ctx)
+       xor     $D,@T[1]
        mov     $E,16($ctx)
+       and     @T[1],@T[0]
        jmp     .Loop_avx
 
-.align 16
 .Ldone_avx:
 ___
                                $jj=$j=$saved_j; @V=@saved_V;
@@ -1072,6 +1436,218 @@ $code.=<<___;
        ret
 .size  aesni_cbc_sha1_enc_avx,.-aesni_cbc_sha1_enc_avx
 ___
+
+                                               if ($stitched_decrypt) {{{
+# reset
+($in0,$out,$len,$key,$ivp,$ctx,$inp)=("%rdi","%rsi","%rdx","%rcx","%r8","%r9","%r10");
+
+$j=$jj=$r=$rx=0;
+$Xi=4;
+
+@aes256_dec = (
+       '&vpxor ($inout0,$rndkey0,"0x00($in0)");',
+       '&vpxor ($inout1,$rndkey0,"0x10($in0)");',
+       '&vpxor ($inout2,$rndkey0,"0x20($in0)");',
+       '&vpxor ($inout3,$rndkey0,"0x30($in0)");',
+
+       '&vmovups($rndkey0,"16-112($key)");',
+       '&vmovups("64(%rsp)",@X[2]);',          # save IV, originally @X[3]
+       undef,undef
+       );
+for ($i=0;$i<13;$i++) {
+    push (@aes256_dec,(
+       '&vaesdec       ($inout0,$inout0,$rndkey0);',
+       '&vaesdec       ($inout1,$inout1,$rndkey0);',
+       '&vaesdec       ($inout2,$inout2,$rndkey0);',
+       '&vaesdec       ($inout3,$inout3,$rndkey0);     &vmovups($rndkey0,"'.(16*($i+2)-112).'($key)");'
+       ));
+    push (@aes256_dec,(undef,undef))   if (($i>=3 && $i<=5) || $i>=11);
+    push (@aes256_dec,(undef,undef))   if ($i==5);
+}
+push(@aes256_dec,(
+       '&vaesdeclast   ($inout0,$inout0,$rndkey0);     &vmovups(@X[0],"0x00($in0)");',
+       '&vaesdeclast   ($inout1,$inout1,$rndkey0);     &vmovups(@X[1],"0x10($in0)");',
+       '&vaesdeclast   ($inout2,$inout2,$rndkey0);     &vmovups(@X[2],"0x20($in0)");',
+       '&vaesdeclast   ($inout3,$inout3,$rndkey0);     &vmovups(@X[3],"0x30($in0)");',
+
+       '&vxorps        ($inout0,$inout0,"64(%rsp)");   &vmovdqu($rndkey0,"-112($key)");',
+       '&vxorps        ($inout1,$inout1,@X[0]);        &vmovups("0x00($out,$in0)",$inout0);',
+       '&vxorps        ($inout2,$inout2,@X[1]);        &vmovups("0x10($out,$in0)",$inout1);',
+       '&vxorps        ($inout3,$inout3,@X[2]);        &vmovups("0x20($out,$in0)",$inout2);',
+
+       '&vmovups       ("0x30($out,$in0)",$inout3);'
+       ));
+
+$code.=<<___;
+.type  aesni256_cbc_sha1_dec_avx,\@function,6
+.align 32
+aesni256_cbc_sha1_dec_avx:
+       mov     `($win64?56:8)`(%rsp),$inp      # load 7th argument
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+       lea     `-104-($win64?10*16:0)`(%rsp),%rsp
+___
+$code.=<<___ if ($win64);
+       movaps  %xmm6,96+0(%rsp)
+       movaps  %xmm7,96+16(%rsp)
+       movaps  %xmm8,96+32(%rsp)
+       movaps  %xmm9,96+48(%rsp)
+       movaps  %xmm10,96+64(%rsp)
+       movaps  %xmm11,96+80(%rsp)
+       movaps  %xmm12,96+96(%rsp)
+       movaps  %xmm13,96+112(%rsp)
+       movaps  %xmm14,96+128(%rsp)
+       movaps  %xmm15,96+144(%rsp)
+.Lprologue_dec_avx:
+___
+$code.=<<___;
+       vzeroall
+       mov     $in0,%r12                       # reassign arguments
+       mov     $out,%r13
+       mov     $len,%r14
+       lea     112($key),%r15                  # size optimization
+       vmovdqu ($ivp),@X[3]                    # load IV
+___
+($in0,$out,$len,$key)=map("%r$_",(12..15));    # reassign arguments
+$code.=<<___;
+       shl     \$6,$len
+       sub     $in0,$out
+       add     $inp,$len               # end of input
+
+       lea     K_XX_XX(%rip),$K_XX_XX
+       mov     0($ctx),$A              # load context
+       mov     4($ctx),$B
+       mov     8($ctx),$C
+       mov     12($ctx),$D
+       mov     $B,@T[0]                # magic seed
+       mov     16($ctx),$E
+       mov     $C,@T[1]
+       xor     $D,@T[1]
+       and     @T[1],@T[0]
+
+       vmovdqa 64($K_XX_XX),@X[2]      # pbswap mask
+       vmovdqa 0($K_XX_XX),$Kx         # K_00_19
+       vmovdqu 0($inp),@X[-4&7]        # load input to %xmm[0-3]
+       vmovdqu 16($inp),@X[-3&7]
+       vmovdqu 32($inp),@X[-2&7]
+       vmovdqu 48($inp),@X[-1&7]
+       vpshufb @X[2],@X[-4&7],@X[-4&7] # byte swap
+       add     \$64,$inp
+       vpshufb @X[2],@X[-3&7],@X[-3&7]
+       vpshufb @X[2],@X[-2&7],@X[-2&7]
+       vpshufb @X[2],@X[-1&7],@X[-1&7]
+       vpaddd  $Kx,@X[-4&7],@X[0]      # add K_00_19
+       vpaddd  $Kx,@X[-3&7],@X[1]
+       vpaddd  $Kx,@X[-2&7],@X[2]
+       vmovdqa @X[0],0(%rsp)           # X[]+K xfer to IALU
+       vmovdqa @X[1],16(%rsp)
+       vmovdqa @X[2],32(%rsp)
+       vmovups -112($key),$rndkey0     # $key[0]
+       jmp     .Loop_dec_avx
+
+.align 32
+.Loop_dec_avx:
+___
+       &Xupdate_avx_16_31(\&body_00_19_dec);
+       &Xupdate_avx_16_31(\&body_00_19_dec);
+       &Xupdate_avx_16_31(\&body_00_19_dec);
+       &Xupdate_avx_16_31(\&body_00_19_dec);
+       &Xupdate_avx_32_79(\&body_00_19_dec);
+       &Xupdate_avx_32_79(\&body_20_39_dec);
+       &Xupdate_avx_32_79(\&body_20_39_dec);
+       &Xupdate_avx_32_79(\&body_20_39_dec);
+       &Xupdate_avx_32_79(\&body_20_39_dec);
+       &Xupdate_avx_32_79(\&body_20_39_dec);
+       &Xupdate_avx_32_79(\&body_40_59_dec);
+       &Xupdate_avx_32_79(\&body_40_59_dec);
+       &Xupdate_avx_32_79(\&body_40_59_dec);
+       &Xupdate_avx_32_79(\&body_40_59_dec);
+       &Xupdate_avx_32_79(\&body_40_59_dec);
+       &Xupdate_avx_32_79(\&body_20_39_dec);
+       &Xuplast_avx_80(\&body_20_39_dec,".Ldone_dec_avx");     # can jump to "done"
+
+                               $saved_j=$j; @saved_V=@V;
+                               $saved_rx=$rx;
+
+       &Xloop_avx(\&body_20_39_dec);
+       &Xloop_avx(\&body_20_39_dec);
+       &Xloop_avx(\&body_20_39_dec);
+
+       eval(@aes256_dec[-1]);                  # last store
+$code.=<<___;
+       lea     64($in0),$in0
+
+       add     0($ctx),$A                      # update context
+       add     4($ctx),@T[0]
+       add     8($ctx),$C
+       add     12($ctx),$D
+       mov     $A,0($ctx)
+       add     16($ctx),$E
+       mov     @T[0],4($ctx)
+       mov     @T[0],$B                        # magic seed
+       mov     $C,8($ctx)
+       mov     $C,@T[1]
+       mov     $D,12($ctx)
+       xor     $D,@T[1]
+       mov     $E,16($ctx)
+       and     @T[1],@T[0]
+       jmp     .Loop_dec_avx
+
+.Ldone_dec_avx:
+___
+                               $jj=$j=$saved_j; @V=@saved_V;
+                               $rx=$saved_rx;
+
+       &Xtail_avx(\&body_20_39_dec);
+       &Xtail_avx(\&body_20_39_dec);
+       &Xtail_avx(\&body_20_39_dec);
+
+       eval(@aes256_dec[-1]);                  # last store
+$code.=<<___;
+
+       add     0($ctx),$A                      # update context
+       add     4($ctx),@T[0]
+       add     8($ctx),$C
+       mov     $A,0($ctx)
+       add     12($ctx),$D
+       mov     @T[0],4($ctx)
+       add     16($ctx),$E
+       mov     $C,8($ctx)
+       mov     $D,12($ctx)
+       mov     $E,16($ctx)
+       vmovups @X[3],($ivp)                    # write IV
+       vzeroall
+___
+$code.=<<___ if ($win64);
+       movaps  96+0(%rsp),%xmm6
+       movaps  96+16(%rsp),%xmm7
+       movaps  96+32(%rsp),%xmm8
+       movaps  96+48(%rsp),%xmm9
+       movaps  96+64(%rsp),%xmm10
+       movaps  96+80(%rsp),%xmm11
+       movaps  96+96(%rsp),%xmm12
+       movaps  96+112(%rsp),%xmm13
+       movaps  96+128(%rsp),%xmm14
+       movaps  96+144(%rsp),%xmm15
+___
+$code.=<<___;
+       lea     `104+($win64?10*16:0)`(%rsp),%rsi
+       mov     0(%rsi),%r15
+       mov     8(%rsi),%r14
+       mov     16(%rsi),%r13
+       mov     24(%rsi),%r12
+       mov     32(%rsi),%rbp
+       mov     40(%rsi),%rbx
+       lea     48(%rsi),%rsp
+.Lepilogue_dec_avx:
+       ret
+.size  aesni256_cbc_sha1_dec_avx,.-aesni256_cbc_sha1_dec_avx
+___
+                                               }}}
 }
 $code.=<<___;
 .align 64
@@ -1081,11 +1657,180 @@ K_XX_XX:
 .long  0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc     # K_40_59
 .long  0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6     # K_60_79
 .long  0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f     # pbswap mask
+.byte  0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0
 
 .asciz "AESNI-CBC+SHA1 stitch for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
 .align 64
 ___
+                                               if ($shaext) {{{
+($in0,$out,$len,$key,$ivp,$ctx,$inp)=("%rdi","%rsi","%rdx","%rcx","%r8","%r9","%r10");
+
+$rounds="%r11d";
+
+($iv,$in,$rndkey0)=map("%xmm$_",(2,14,15));
+@rndkey=("%xmm0","%xmm1");
+$r=0;
+
+my ($BSWAP,$ABCD,$E,$E_,$ABCD_SAVE,$E_SAVE)=map("%xmm$_",(7..12));
+my @MSG=map("%xmm$_",(3..6));
+
+$code.=<<___;
+.type  aesni_cbc_sha1_enc_shaext,\@function,6
+.align 32
+aesni_cbc_sha1_enc_shaext:
+       mov     `($win64?56:8)`(%rsp),$inp      # load 7th argument
+___
+$code.=<<___ if ($win64);
+       lea     `-8-10*16`(%rsp),%rsp
+       movaps  %xmm6,-8-10*16(%rax)
+       movaps  %xmm7,-8-9*16(%rax)
+       movaps  %xmm8,-8-8*16(%rax)
+       movaps  %xmm9,-8-7*16(%rax)
+       movaps  %xmm10,-8-6*16(%rax)
+       movaps  %xmm11,-8-5*16(%rax)
+       movaps  %xmm12,-8-4*16(%rax)
+       movaps  %xmm13,-8-3*16(%rax)
+       movaps  %xmm14,-8-2*16(%rax)
+       movaps  %xmm15,-8-1*16(%rax)
+.Lprologue_shaext:
+___
+$code.=<<___;
+       movdqu  ($ctx),$ABCD
+       movd    16($ctx),$E
+       movdqa  K_XX_XX+0x50(%rip),$BSWAP       # byte-n-word swap
+
+       mov     240($key),$rounds
+       sub     $in0,$out
+       movups  ($key),$rndkey0                 # $key[0]
+       movups  16($key),$rndkey[0]             # forward reference
+       lea     112($key),$key                  # size optimization
+
+       pshufd  \$0b00011011,$ABCD,$ABCD        # flip word order
+       pshufd  \$0b00011011,$E,$E              # flip word order
+       jmp     .Loop_shaext
 
+.align 16
+.Loop_shaext:
+___
+       &$aesenc();
+$code.=<<___;
+       movdqu          ($inp),@MSG[0]
+       movdqa          $E,$E_SAVE              # offload $E
+       pshufb          $BSWAP,@MSG[0]
+       movdqu          0x10($inp),@MSG[1]
+       movdqa          $ABCD,$ABCD_SAVE        # offload $ABCD
+___
+       &$aesenc();
+$code.=<<___;
+       pshufb          $BSWAP,@MSG[1]
+
+       paddd           @MSG[0],$E
+       movdqu          0x20($inp),@MSG[2]
+       lea             0x40($inp),$inp
+       pxor            $E_SAVE,@MSG[0]         # black magic
+___
+       &$aesenc();
+$code.=<<___;
+       pxor            $E_SAVE,@MSG[0]         # black magic
+       movdqa          $ABCD,$E_
+       pshufb          $BSWAP,@MSG[2]
+       sha1rnds4       \$0,$E,$ABCD            # 0-3
+       sha1nexte       @MSG[1],$E_
+___
+       &$aesenc();
+$code.=<<___;
+       sha1msg1        @MSG[1],@MSG[0]
+       movdqu          -0x10($inp),@MSG[3]
+       movdqa          $ABCD,$E
+       pshufb          $BSWAP,@MSG[3]
+___
+       &$aesenc();
+$code.=<<___;
+       sha1rnds4       \$0,$E_,$ABCD           # 4-7
+       sha1nexte       @MSG[2],$E
+       pxor            @MSG[2],@MSG[0]
+       sha1msg1        @MSG[2],@MSG[1]
+___
+       &$aesenc();
+
+for($i=2;$i<20-4;$i++) {
+$code.=<<___;
+       movdqa          $ABCD,$E_
+       sha1rnds4       \$`int($i/5)`,$E,$ABCD  # 8-11
+       sha1nexte       @MSG[3],$E_
+___
+       &$aesenc();
+$code.=<<___;
+       sha1msg2        @MSG[3],@MSG[0]
+       pxor            @MSG[3],@MSG[1]
+       sha1msg1        @MSG[3],@MSG[2]
+___
+       ($E,$E_)=($E_,$E);
+       push(@MSG,shift(@MSG));
+
+       &$aesenc();
+}
+$code.=<<___;
+       movdqa          $ABCD,$E_
+       sha1rnds4       \$3,$E,$ABCD            # 64-67
+       sha1nexte       @MSG[3],$E_
+       sha1msg2        @MSG[3],@MSG[0]
+       pxor            @MSG[3],@MSG[1]
+___
+       &$aesenc();
+$code.=<<___;
+       movdqa          $ABCD,$E
+       sha1rnds4       \$3,$E_,$ABCD           # 68-71
+       sha1nexte       @MSG[0],$E
+       sha1msg2        @MSG[0],@MSG[1]
+___
+       &$aesenc();
+$code.=<<___;
+       movdqa          $E_SAVE,@MSG[0]
+       movdqa          $ABCD,$E_
+       sha1rnds4       \$3,$E,$ABCD            # 72-75
+       sha1nexte       @MSG[1],$E_
+___
+       &$aesenc();
+$code.=<<___;
+       movdqa          $ABCD,$E
+       sha1rnds4       \$3,$E_,$ABCD           # 76-79
+       sha1nexte       $MSG[0],$E
+___
+       while($r<40)    { &$aesenc(); }         # remaining aesenc's
+$code.=<<___;
+       dec             $len
+
+       paddd           $ABCD_SAVE,$ABCD
+       movups          $iv,48($out,$in0)       # write output
+       lea             64($in0),$in0
+       jnz             .Loop_shaext
+
+       pshufd  \$0b00011011,$ABCD,$ABCD
+       pshufd  \$0b00011011,$E,$E
+       movups  $iv,($ivp)                      # write IV
+       movdqu  $ABCD,($ctx)
+       movd    $E,16($ctx)
+___
+$code.=<<___ if ($win64);
+       movaps  -8-10*16(%rax),%xmm6
+       movaps  -8-9*16(%rax),%xmm7
+       movaps  -8-8*16(%rax),%xmm8
+       movaps  -8-7*16(%rax),%xmm9
+       movaps  -8-6*16(%rax),%xmm10
+       movaps  -8-5*16(%rax),%xmm11
+       movaps  -8-4*16(%rax),%xmm12
+       movaps  -8-3*16(%rax),%xmm13
+       movaps  -8-2*16(%rax),%xmm14
+       movaps  -8-1*16(%rax),%xmm15
+       mov     %rax,%rsp
+.Lepilogue_shaext:
+___
+$code.=<<___;
+       ret
+.size  aesni_cbc_sha1_enc_shaext,.-aesni_cbc_sha1_enc_shaext
+___
+                                               }}}
 # EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
 #              CONTEXT *context,DISPATCHER_CONTEXT *disp)
 if ($win64) {
@@ -1127,7 +1872,21 @@ ssse3_handler:
        lea     (%rsi,%r10),%r10        # epilogue label
        cmp     %r10,%rbx               # context->Rip>=epilogue label
        jae     .Lcommon_seh_tail
+___
+$code.=<<___ if ($shaext);
+       lea     aesni_cbc_sha1_enc_shaext(%rip),%r10
+       cmp     %r10,%rbx
+       jb      .Lseh_no_shaext
 
+       lea     (%rax),%rsi
+       lea     512($context),%rdi      # &context.Xmm6
+       mov     \$20,%ecx
+       .long   0xa548f3fc              # cld; rep movsq
+       lea     168(%rax),%rax          # adjust stack pointer
+       jmp     .Lcommon_seh_tail
+.Lseh_no_shaext:
+___
+$code.=<<___;
        lea     96(%rax),%rsi
        lea     512($context),%rdi      # &context.Xmm6
        mov     \$20,%ecx
@@ -1199,6 +1958,11 @@ $code.=<<___ if ($avx);
        .rva    .LSEH_end_aesni_cbc_sha1_enc_avx
        .rva    .LSEH_info_aesni_cbc_sha1_enc_avx
 ___
+$code.=<<___ if ($shaext);
+       .rva    .LSEH_begin_aesni_cbc_sha1_enc_shaext
+       .rva    .LSEH_end_aesni_cbc_sha1_enc_shaext
+       .rva    .LSEH_info_aesni_cbc_sha1_enc_shaext
+___
 $code.=<<___;
 .section       .xdata
 .align 8
@@ -1213,6 +1977,12 @@ $code.=<<___ if ($avx);
        .rva    ssse3_handler
        .rva    .Lprologue_avx,.Lepilogue_avx           # HandlerData[]
 ___
+$code.=<<___ if ($shaext);
+.LSEH_info_aesni_cbc_sha1_enc_shaext:
+       .byte   9,0,0,0
+       .rva    ssse3_handler
+       .rva    .Lprologue_shaext,.Lepilogue_shaext     # HandlerData[]
+___
 }
 
 ####################################################################
@@ -1223,28 +1993,65 @@ sub rex {
 
     $rex|=0x04                 if($dst>=8);
     $rex|=0x01                 if($src>=8);
-    push @opcode,$rex|0x40     if($rex);
+    unshift @opcode,$rex|0x40  if($rex);
+}
+
+sub sha1rnds4 {
+    if (@_[0] =~ /\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) {
+      my @opcode=(0x0f,0x3a,0xcc);
+       rex(\@opcode,$3,$2);
+       push @opcode,0xc0|($2&7)|(($3&7)<<3);           # ModR/M
+       my $c=$1;
+       push @opcode,$c=~/^0/?oct($c):$c;
+       return ".byte\t".join(',',@opcode);
+    } else {
+       return "sha1rnds4\t".@_[0];
+    }
+}
+
+sub sha1op38 {
+    my $instr = shift;
+    my %opcodelet = (
+               "sha1nexte" => 0xc8,
+               "sha1msg1"  => 0xc9,
+               "sha1msg2"  => 0xca     );
+
+    if (defined($opcodelet{$instr}) && @_[0] =~ /%xmm([0-9]+),\s*%xmm([0-9]+)/) {
+      my @opcode=(0x0f,0x38);
+       rex(\@opcode,$2,$1);
+       push @opcode,$opcodelet{$instr};
+       push @opcode,0xc0|($1&7)|(($2&7)<<3);           # ModR/M
+       return ".byte\t".join(',',@opcode);
+    } else {
+       return $instr."\t".@_[0];
+    }
 }
 
 sub aesni {
   my $line=shift;
-  my @opcode=(0x66);
+  my @opcode=(0x0f,0x38);
 
     if ($line=~/(aes[a-z]+)\s+%xmm([0-9]+),\s*%xmm([0-9]+)/) {
        my %opcodelet = (
-               "aesenc" => 0xdc,       "aesenclast" => 0xdd
+               "aesenc" => 0xdc,       "aesenclast" => 0xdd,
+               "aesdec" => 0xde,       "aesdeclast" => 0xdf
        );
        return undef if (!defined($opcodelet{$1}));
        rex(\@opcode,$3,$2);
-       push @opcode,0x0f,0x38,$opcodelet{$1};
-       push @opcode,0xc0|($2&7)|(($3&7)<<3);   # ModR/M
+       push @opcode,$opcodelet{$1},0xc0|($2&7)|(($3&7)<<3);    # ModR/M
+       unshift @opcode,0x66;
        return ".byte\t".join(',',@opcode);
     }
     return $line;
 }
 
-$code =~ s/\`([^\`]*)\`/eval($1)/gem;
-$code =~ s/\b(aes.*%xmm[0-9]+).*$/aesni($1)/gem;
+foreach (split("\n",$code)) {
+        s/\`([^\`]*)\`/eval $1/geo;
+
+       s/\b(sha1rnds4)\s+(.*)/sha1rnds4($2)/geo                or
+       s/\b(sha1[^\s]*)\s+(.*)/sha1op38($1,$2)/geo             or
+       s/\b(aes.*%xmm[0-9]+).*$/aesni($1)/geo;
 
-print $code;
+       print $_,"\n";
+}
 close STDOUT;
diff --git a/crypto/aes/asm/aesni-sha256-x86_64.pl b/crypto/aes/asm/aesni-sha256-x86_64.pl
new file mode 100644 (file)
index 0000000..19b0433
--- /dev/null
@@ -0,0 +1,1708 @@
+#!/usr/bin/env perl
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# January 2013
+#
+# This is AESNI-CBC+SHA256 stitch implementation. The idea, as spelled
+# in http://download.intel.com/design/intarch/papers/323686.pdf, is
+# that since AESNI-CBC encrypt exhibit *very* low instruction-level
+# parallelism, interleaving it with another algorithm would allow to
+# utilize processor resources better and achieve better performance.
+# SHA256 instruction sequences(*) are taken from sha512-x86_64.pl and
+# AESNI code is weaved into it. As SHA256 dominates execution time,
+# stitch performance does not depend on AES key length. Below are
+# performance numbers in cycles per processed byte, less is better,
+# for standalone AESNI-CBC encrypt, standalone SHA256, and stitched
+# subroutine:
+#
+#               AES-128/-192/-256+SHA256       this(**)gain
+# Sandy Bridge     5.05/6.05/7.05+11.6         13.0    +28%/36%/43%
+# Ivy Bridge       5.05/6.05/7.05+10.3         11.6    +32%/41%/50%
+# Haswell          4.43/5.29/6.19+7.80         8.79    +39%/49%/59%
+# Bulldozer        5.77/6.89/8.00+13.7         13.7    +42%/50%/58%
+#
+# (*)  there are XOP, AVX1 and AVX2 code pathes, meaning that
+#      Westmere is omitted from loop, this is because gain was not
+#      estimated high enough to justify the effort;
+# (**) these are EVP-free results, results obtained with 'speed
+#      -evp aes-256-cbc-hmac-sha256' will vary by percent or two;
+
+$flavour = shift;
+$output  = shift;
+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
+
+$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
+die "can't locate x86_64-xlate.pl";
+
+if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
+               =~ /GNU assembler version ([2-9]\.[0-9]+)/) {
+       $avx = ($1>=2.19) + ($1>=2.22);
+}
+
+if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
+          `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) {
+       $avx = ($1>=2.09) + ($1>=2.10);
+}
+
+if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
+          `ml64 2>&1` =~ /Version ([0-9]+)\./) {
+       $avx = ($1>=10) + ($1>=12);
+}
+
+if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/) {
+       $avx = ($2>=3.0) + ($2>3.0);
+}
+
+$shaext=$avx;  ### set to zero if compiling for 1.0.1
+$avx=1         if (!$shaext && $avx);
+
+open OUT,"| \"$^X\" $xlate $flavour $output";
+*STDOUT=*OUT;
+
+$func="aesni_cbc_sha256_enc";
+$TABLE="K256";
+$SZ=4;
+@ROT=($A,$B,$C,$D,$E,$F,$G,$H)=("%eax","%ebx","%ecx","%edx",
+                               "%r8d","%r9d","%r10d","%r11d");
+($T1,$a0,$a1,$a2,$a3)=("%r12d","%r13d","%r14d","%r15d","%esi");
+@Sigma0=( 2,13,22);
+@Sigma1=( 6,11,25);
+@sigma0=( 7,18, 3);
+@sigma1=(17,19,10);
+$rounds=64;
+
+########################################################################
+# void aesni_cbc_sha256_enc(const void *inp,
+#                      void *out,
+#                      size_t length,
+#                      const AES_KEY *key,
+#                      unsigned char *iv,
+#                      SHA256_CTX *ctx,
+#                      const void *in0);
+($inp,  $out,  $len,  $key,  $ivp, $ctx, $in0) =
+("%rdi","%rsi","%rdx","%rcx","%r8","%r9","%r10");
+
+$Tbl="%rbp";
+
+$_inp="16*$SZ+0*8(%rsp)";
+$_out="16*$SZ+1*8(%rsp)";
+$_end="16*$SZ+2*8(%rsp)";
+$_key="16*$SZ+3*8(%rsp)";
+$_ivp="16*$SZ+4*8(%rsp)";
+$_ctx="16*$SZ+5*8(%rsp)";
+$_in0="16*$SZ+6*8(%rsp)";
+$_rsp="16*$SZ+7*8(%rsp)";
+$framesz=16*$SZ+8*8;
+
+$code=<<___;
+.text
+
+.extern        OPENSSL_ia32cap_P
+.globl $func
+.type  $func,\@abi-omnipotent
+.align 16
+$func:
+___
+                                               if ($avx) {
+$code.=<<___;
+       lea     OPENSSL_ia32cap_P(%rip),%r11
+       mov     \$1,%eax
+       cmp     \$0,`$win64?"%rcx":"%rdi"`
+       je      .Lprobe
+       mov     0(%r11),%eax
+       mov     4(%r11),%r10
+___
+$code.=<<___ if ($shaext);
+       bt      \$61,%r10                       # check for SHA
+       jc      ${func}_shaext
+___
+$code.=<<___;
+       mov     %r10,%r11
+       shr     \$32,%r11
+
+       test    \$`1<<11`,%r10d                 # check for XOP
+       jnz     ${func}_xop
+___
+$code.=<<___ if ($avx>1);
+       and     \$`1<<8|1<<5|1<<3`,%r11d        # check for BMI2+AVX2+BMI1
+       cmp     \$`1<<8|1<<5|1<<3`,%r11d
+       je      ${func}_avx2
+___
+$code.=<<___;
+       and     \$`1<<30`,%eax                  # mask "Intel CPU" bit
+       and     \$`1<<28|1<<9`,%r10d            # mask AVX+SSSE3 bits
+       or      %eax,%r10d
+       cmp     \$`1<<28|1<<9|1<<30`,%r10d
+       je      ${func}_avx
+       ud2
+___
+                                               }
+$code.=<<___;
+       xor     %eax,%eax
+       cmp     \$0,`$win64?"%rcx":"%rdi"`
+       je      .Lprobe
+       ud2
+.Lprobe:
+       ret
+.size  $func,.-$func
+
+.align 64
+.type  $TABLE,\@object
+$TABLE:
+       .long   0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+       .long   0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+       .long   0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+       .long   0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+       .long   0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+       .long   0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+       .long   0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+       .long   0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+       .long   0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+       .long   0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+       .long   0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+       .long   0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+       .long   0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+       .long   0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+       .long   0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+       .long   0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+       .long   0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+       .long   0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+       .long   0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+       .long   0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+       .long   0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+       .long   0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+       .long   0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+       .long   0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+       .long   0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+       .long   0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+       .long   0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+       .long   0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+       .long   0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+       .long   0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+       .long   0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+       .long   0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+
+       .long   0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+       .long   0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+       .long   0,0,0,0,   0,0,0,0,   -1,-1,-1,-1
+       .long   0,0,0,0,   0,0,0,0
+       .asciz  "AESNI-CBC+SHA256 stitch for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
+.align 64
+___
+
+######################################################################
+# SIMD code paths
+#
+{{{
+($iv,$inout,$roundkey,$temp,
+ $mask10,$mask12,$mask14,$offload)=map("%xmm$_",(8..15));
+
+$aesni_cbc_idx=0;
+@aesni_cbc_block = (
+##     &vmovdqu        ($roundkey,"0x00-0x80($inp)");'
+##     &vmovdqu        ($inout,($inp));
+##     &mov            ($_inp,$inp);
+
+       '&vpxor         ($inout,$inout,$roundkey);'.
+       ' &vmovdqu      ($roundkey,"0x10-0x80($inp)");',
+
+       '&vpxor         ($inout,$inout,$iv);',
+
+       '&vaesenc       ($inout,$inout,$roundkey);'.
+       ' &vmovdqu      ($roundkey,"0x20-0x80($inp)");',
+
+       '&vaesenc       ($inout,$inout,$roundkey);'.
+       ' &vmovdqu      ($roundkey,"0x30-0x80($inp)");',
+
+       '&vaesenc       ($inout,$inout,$roundkey);'.
+       ' &vmovdqu      ($roundkey,"0x40-0x80($inp)");',
+
+       '&vaesenc       ($inout,$inout,$roundkey);'.
+       ' &vmovdqu      ($roundkey,"0x50-0x80($inp)");',
+
+       '&vaesenc       ($inout,$inout,$roundkey);'.
+       ' &vmovdqu      ($roundkey,"0x60-0x80($inp)");',
+
+       '&vaesenc       ($inout,$inout,$roundkey);'.
+       ' &vmovdqu      ($roundkey,"0x70-0x80($inp)");',
+
+       '&vaesenc       ($inout,$inout,$roundkey);'.
+       ' &vmovdqu      ($roundkey,"0x80-0x80($inp)");',
+
+       '&vaesenc       ($inout,$inout,$roundkey);'.
+       ' &vmovdqu      ($roundkey,"0x90-0x80($inp)");',
+
+       '&vaesenc       ($inout,$inout,$roundkey);'.
+       ' &vmovdqu      ($roundkey,"0xa0-0x80($inp)");',
+
+       '&vaesenclast   ($temp,$inout,$roundkey);'.
+       ' &vaesenc      ($inout,$inout,$roundkey);'.
+       ' &vmovdqu      ($roundkey,"0xb0-0x80($inp)");',
+
+       '&vpand         ($iv,$temp,$mask10);'.
+       ' &vaesenc      ($inout,$inout,$roundkey);'.
+       ' &vmovdqu      ($roundkey,"0xc0-0x80($inp)");',
+
+       '&vaesenclast   ($temp,$inout,$roundkey);'.
+       ' &vaesenc      ($inout,$inout,$roundkey);'.
+       ' &vmovdqu      ($roundkey,"0xd0-0x80($inp)");',
+
+       '&vpand         ($temp,$temp,$mask12);'.
+       ' &vaesenc      ($inout,$inout,$roundkey);'.
+        '&vmovdqu      ($roundkey,"0xe0-0x80($inp)");',
+
+       '&vpor          ($iv,$iv,$temp);'.
+       ' &vaesenclast  ($temp,$inout,$roundkey);'.
+       ' &vmovdqu      ($roundkey,"0x00-0x80($inp)");'
+
+##     &mov            ($inp,$_inp);
+##     &mov            ($out,$_out);
+##     &vpand          ($temp,$temp,$mask14);
+##     &vpor           ($iv,$iv,$temp);
+##     &vmovdqu        ($iv,($out,$inp);
+##     &lea            (inp,16($inp));
+);
+
+my $a4=$T1;
+my ($a,$b,$c,$d,$e,$f,$g,$h);
+
+sub AUTOLOAD()         # thunk [simplified] 32-bit style perlasm
+{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://;
+  my $arg = pop;
+    $arg = "\$$arg" if ($arg*1 eq $arg);
+    $code .= "\t$opcode\t".join(',',$arg,reverse @_)."\n";
+}
+
+sub body_00_15 () {
+       (
+       '($a,$b,$c,$d,$e,$f,$g,$h)=@ROT;'.
+
+       '&ror   ($a0,$Sigma1[2]-$Sigma1[1])',
+       '&mov   ($a,$a1)',
+       '&mov   ($a4,$f)',
+
+       '&xor   ($a0,$e)',
+       '&ror   ($a1,$Sigma0[2]-$Sigma0[1])',
+       '&xor   ($a4,$g)',                      # f^g
+
+       '&ror   ($a0,$Sigma1[1]-$Sigma1[0])',
+       '&xor   ($a1,$a)',
+       '&and   ($a4,$e)',                      # (f^g)&e
+
+       @aesni_cbc_block[$aesni_cbc_idx++].
+       '&xor   ($a0,$e)',
+       '&add   ($h,$SZ*($i&15)."(%rsp)")',     # h+=X[i]+K[i]
+       '&mov   ($a2,$a)',
+
+       '&ror   ($a1,$Sigma0[1]-$Sigma0[0])',
+       '&xor   ($a4,$g)',                      # Ch(e,f,g)=((f^g)&e)^g
+       '&xor   ($a2,$b)',                      # a^b, b^c in next round
+
+       '&ror   ($a0,$Sigma1[0])',              # Sigma1(e)
+       '&add   ($h,$a4)',                      # h+=Ch(e,f,g)
+       '&and   ($a3,$a2)',                     # (b^c)&(a^b)
+
+       '&xor   ($a1,$a)',
+       '&add   ($h,$a0)',                      # h+=Sigma1(e)
+       '&xor   ($a3,$b)',                      # Maj(a,b,c)=Ch(a^b,c,b)
+
+       '&add   ($d,$h)',                       # d+=h
+       '&ror   ($a1,$Sigma0[0])',              # Sigma0(a)
+       '&add   ($h,$a3)',                      # h+=Maj(a,b,c)
+
+       '&mov   ($a0,$d)',
+       '&add   ($a1,$h);'.                     # h+=Sigma0(a)
+       '($a2,$a3) = ($a3,$a2); unshift(@ROT,pop(@ROT)); $i++;'
+       );
+}
+
+if ($avx) {{
+######################################################################
+# XOP code path
+#
+$code.=<<___;
+.type  ${func}_xop,\@function,6
+.align 64
+${func}_xop:
+.Lxop_shortcut:
+       mov     `($win64?56:8)`(%rsp),$in0      # load 7th parameter
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+       mov     %rsp,%r11               # copy %rsp
+       sub     \$`$framesz+$win64*16*10`,%rsp
+       and     \$-64,%rsp              # align stack frame
+
+       shl     \$6,$len
+       sub     $inp,$out               # re-bias
+       sub     $inp,$in0
+       add     $inp,$len               # end of input
+
+       #mov    $inp,$_inp              # saved later
+       mov     $out,$_out
+       mov     $len,$_end
+       #mov    $key,$_key              # remains resident in $inp register
+       mov     $ivp,$_ivp
+       mov     $ctx,$_ctx
+       mov     $in0,$_in0
+       mov     %r11,$_rsp
+___
+$code.=<<___ if ($win64);
+       movaps  %xmm6,`$framesz+16*0`(%rsp)
+       movaps  %xmm7,`$framesz+16*1`(%rsp)
+       movaps  %xmm8,`$framesz+16*2`(%rsp)
+       movaps  %xmm9,`$framesz+16*3`(%rsp)
+       movaps  %xmm10,`$framesz+16*4`(%rsp)
+       movaps  %xmm11,`$framesz+16*5`(%rsp)
+       movaps  %xmm12,`$framesz+16*6`(%rsp)
+       movaps  %xmm13,`$framesz+16*7`(%rsp)
+       movaps  %xmm14,`$framesz+16*8`(%rsp)
+       movaps  %xmm15,`$framesz+16*9`(%rsp)
+___
+$code.=<<___;
+.Lprologue_xop:
+       vzeroall
+
+       mov     $inp,%r12               # borrow $a4
+       lea     0x80($key),$inp         # size optimization, reassign
+       lea     $TABLE+`$SZ*2*$rounds+32`(%rip),%r13    # borrow $a0
+       mov     0xf0-0x80($inp),%r14d   # rounds, borrow $a1
+       mov     $ctx,%r15               # borrow $a2
+       mov     $in0,%rsi               # borrow $a3
+       vmovdqu ($ivp),$iv              # load IV
+       sub     \$9,%r14
+
+       mov     $SZ*0(%r15),$A
+       mov     $SZ*1(%r15),$B
+       mov     $SZ*2(%r15),$C
+       mov     $SZ*3(%r15),$D
+       mov     $SZ*4(%r15),$E
+       mov     $SZ*5(%r15),$F
+       mov     $SZ*6(%r15),$G
+       mov     $SZ*7(%r15),$H
+
+       vmovdqa 0x00(%r13,%r14,8),$mask14
+       vmovdqa 0x10(%r13,%r14,8),$mask12
+       vmovdqa 0x20(%r13,%r14,8),$mask10
+       vmovdqu 0x00-0x80($inp),$roundkey
+       jmp     .Lloop_xop
+___
+                                       if ($SZ==4) {   # SHA256
+    my @X = map("%xmm$_",(0..3));
+    my ($t0,$t1,$t2,$t3) = map("%xmm$_",(4..7));
+
+$code.=<<___;
+.align 16
+.Lloop_xop:
+       vmovdqa $TABLE+`$SZ*2*$rounds`(%rip),$t3
+       vmovdqu 0x00(%rsi,%r12),@X[0]
+       vmovdqu 0x10(%rsi,%r12),@X[1]
+       vmovdqu 0x20(%rsi,%r12),@X[2]
+       vmovdqu 0x30(%rsi,%r12),@X[3]
+       vpshufb $t3,@X[0],@X[0]
+       lea     $TABLE(%rip),$Tbl
+       vpshufb $t3,@X[1],@X[1]
+       vpshufb $t3,@X[2],@X[2]
+       vpaddd  0x00($Tbl),@X[0],$t0
+       vpshufb $t3,@X[3],@X[3]
+       vpaddd  0x20($Tbl),@X[1],$t1
+       vpaddd  0x40($Tbl),@X[2],$t2
+       vpaddd  0x60($Tbl),@X[3],$t3
+       vmovdqa $t0,0x00(%rsp)
+       mov     $A,$a1
+       vmovdqa $t1,0x10(%rsp)
+       mov     $B,$a3
+       vmovdqa $t2,0x20(%rsp)
+       xor     $C,$a3                  # magic
+       vmovdqa $t3,0x30(%rsp)
+       mov     $E,$a0
+       jmp     .Lxop_00_47
+
+.align 16
+.Lxop_00_47:
+       sub     \$-16*2*$SZ,$Tbl        # size optimization
+       vmovdqu (%r12),$inout           # $a4
+       mov     %r12,$_inp              # $a4
+___
+sub XOP_256_00_47 () {
+my $j = shift;
+my $body = shift;
+my @X = @_;
+my @insns = (&$body,&$body,&$body,&$body);     # 104 instructions
+
+       &vpalignr       ($t0,@X[1],@X[0],$SZ);  # X[1..4]
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &vpalignr      ($t3,@X[3],@X[2],$SZ);  # X[9..12]
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &vprotd         ($t1,$t0,8*$SZ-$sigma0[1]);
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &vpsrld         ($t0,$t0,$sigma0[2]);
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &vpaddd        (@X[0],@X[0],$t3);      # X[0..3] += X[9..12]
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &vprotd         ($t2,$t1,$sigma0[1]-$sigma0[0]);
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &vpxor          ($t0,$t0,$t1);
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &vprotd        ($t3,@X[3],8*$SZ-$sigma1[1]);
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &vpxor          ($t0,$t0,$t2);          # sigma0(X[1..4])
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &vpsrld        ($t2,@X[3],$sigma1[2]);
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &vpaddd         (@X[0],@X[0],$t0);      # X[0..3] += sigma0(X[1..4])
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &vprotd        ($t1,$t3,$sigma1[1]-$sigma1[0]);
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &vpxor         ($t3,$t3,$t2);
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &vpxor         ($t3,$t3,$t1);          # sigma1(X[14..15])
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &vpsrldq        ($t3,$t3,8);
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &vpaddd         (@X[0],@X[0],$t3);      # X[0..1] += sigma1(X[14..15])
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &vprotd        ($t3,@X[0],8*$SZ-$sigma1[1]);
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &vpsrld        ($t2,@X[0],$sigma1[2]);
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &vprotd        ($t1,$t3,$sigma1[1]-$sigma1[0]);
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &vpxor         ($t3,$t3,$t2);
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &vpxor         ($t3,$t3,$t1);          # sigma1(X[16..17])
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &vpslldq        ($t3,$t3,8);            # 22 instructions
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &vpaddd         (@X[0],@X[0],$t3);      # X[2..3] += sigma1(X[16..17])
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &vpaddd         ($t2,@X[0],16*2*$j."($Tbl)");
+         foreach (@insns) { eval; }            # remaining instructions
+       &vmovdqa        (16*$j."(%rsp)",$t2);
+}
+
+    $aesni_cbc_idx=0;
+    for ($i=0,$j=0; $j<4; $j++) {
+       &XOP_256_00_47($j,\&body_00_15,@X);
+       push(@X,shift(@X));                     # rotate(@X)
+    }
+       &mov            ("%r12",$_inp);         # borrow $a4
+       &vpand          ($temp,$temp,$mask14);
+       &mov            ("%r15",$_out);         # borrow $a2
+       &vpor           ($iv,$iv,$temp);
+       &vmovdqu        ("(%r15,%r12)",$iv);    # write output
+       &lea            ("%r12","16(%r12)");    # inp++
+
+       &cmpb   ($SZ-1+16*2*$SZ."($Tbl)",0);
+       &jne    (".Lxop_00_47");
+
+       &vmovdqu        ($inout,"(%r12)");
+       &mov            ($_inp,"%r12");
+
+    $aesni_cbc_idx=0;
+    for ($i=0; $i<16; ) {
+       foreach(body_00_15()) { eval; }
+    }
+                                       }
+$code.=<<___;
+       mov     $_inp,%r12              # borrow $a4
+       mov     $_out,%r13              # borrow $a0
+       mov     $_ctx,%r15              # borrow $a2
+       mov     $_in0,%rsi              # borrow $a3
+
+       vpand   $mask14,$temp,$temp
+       mov     $a1,$A
+       vpor    $temp,$iv,$iv
+       vmovdqu $iv,(%r13,%r12)         # write output
+       lea     16(%r12),%r12           # inp++
+
+       add     $SZ*0(%r15),$A
+       add     $SZ*1(%r15),$B
+       add     $SZ*2(%r15),$C
+       add     $SZ*3(%r15),$D
+       add     $SZ*4(%r15),$E
+       add     $SZ*5(%r15),$F
+       add     $SZ*6(%r15),$G
+       add     $SZ*7(%r15),$H
+
+       cmp     $_end,%r12
+
+       mov     $A,$SZ*0(%r15)
+       mov     $B,$SZ*1(%r15)
+       mov     $C,$SZ*2(%r15)
+       mov     $D,$SZ*3(%r15)
+       mov     $E,$SZ*4(%r15)
+       mov     $F,$SZ*5(%r15)
+       mov     $G,$SZ*6(%r15)
+       mov     $H,$SZ*7(%r15)
+
+       jb      .Lloop_xop
+
+       mov     $_ivp,$ivp
+       mov     $_rsp,%rsi
+       vmovdqu $iv,($ivp)              # output IV
+       vzeroall
+___
+$code.=<<___ if ($win64);
+       movaps  `$framesz+16*0`(%rsp),%xmm6
+       movaps  `$framesz+16*1`(%rsp),%xmm7
+       movaps  `$framesz+16*2`(%rsp),%xmm8
+       movaps  `$framesz+16*3`(%rsp),%xmm9
+       movaps  `$framesz+16*4`(%rsp),%xmm10
+       movaps  `$framesz+16*5`(%rsp),%xmm11
+       movaps  `$framesz+16*6`(%rsp),%xmm12
+       movaps  `$framesz+16*7`(%rsp),%xmm13
+       movaps  `$framesz+16*8`(%rsp),%xmm14
+       movaps  `$framesz+16*9`(%rsp),%xmm15
+___
+$code.=<<___;
+       mov     (%rsi),%r15
+       mov     8(%rsi),%r14
+       mov     16(%rsi),%r13
+       mov     24(%rsi),%r12
+       mov     32(%rsi),%rbp
+       mov     40(%rsi),%rbx
+       lea     48(%rsi),%rsp
+.Lepilogue_xop:
+       ret
+.size  ${func}_xop,.-${func}_xop
+___
+######################################################################
+# AVX+shrd code path
+#
+local *ror = sub { &shrd(@_[0],@_) };
+
+$code.=<<___;
+.type  ${func}_avx,\@function,6
+.align 64
+${func}_avx:
+.Lavx_shortcut:
+       mov     `($win64?56:8)`(%rsp),$in0      # load 7th parameter
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+       mov     %rsp,%r11               # copy %rsp
+       sub     \$`$framesz+$win64*16*10`,%rsp
+       and     \$-64,%rsp              # align stack frame
+
+       shl     \$6,$len
+       sub     $inp,$out               # re-bias
+       sub     $inp,$in0
+       add     $inp,$len               # end of input
+
+       #mov    $inp,$_inp              # saved later
+       mov     $out,$_out
+       mov     $len,$_end
+       #mov    $key,$_key              # remains resident in $inp register
+       mov     $ivp,$_ivp
+       mov     $ctx,$_ctx
+       mov     $in0,$_in0
+       mov     %r11,$_rsp
+___
+$code.=<<___ if ($win64);
+       movaps  %xmm6,`$framesz+16*0`(%rsp)
+       movaps  %xmm7,`$framesz+16*1`(%rsp)
+       movaps  %xmm8,`$framesz+16*2`(%rsp)
+       movaps  %xmm9,`$framesz+16*3`(%rsp)
+       movaps  %xmm10,`$framesz+16*4`(%rsp)
+       movaps  %xmm11,`$framesz+16*5`(%rsp)
+       movaps  %xmm12,`$framesz+16*6`(%rsp)
+       movaps  %xmm13,`$framesz+16*7`(%rsp)
+       movaps  %xmm14,`$framesz+16*8`(%rsp)
+       movaps  %xmm15,`$framesz+16*9`(%rsp)
+___
+$code.=<<___;
+.Lprologue_avx:
+       vzeroall
+
+       mov     $inp,%r12               # borrow $a4
+       lea     0x80($key),$inp         # size optimization, reassign
+       lea     $TABLE+`$SZ*2*$rounds+32`(%rip),%r13    # borrow $a0
+       mov     0xf0-0x80($inp),%r14d   # rounds, borrow $a1
+       mov     $ctx,%r15               # borrow $a2
+       mov     $in0,%rsi               # borrow $a3
+       vmovdqu ($ivp),$iv              # load IV
+       sub     \$9,%r14
+
+       mov     $SZ*0(%r15),$A
+       mov     $SZ*1(%r15),$B
+       mov     $SZ*2(%r15),$C
+       mov     $SZ*3(%r15),$D
+       mov     $SZ*4(%r15),$E
+       mov     $SZ*5(%r15),$F
+       mov     $SZ*6(%r15),$G
+       mov     $SZ*7(%r15),$H
+
+       vmovdqa 0x00(%r13,%r14,8),$mask14
+       vmovdqa 0x10(%r13,%r14,8),$mask12
+       vmovdqa 0x20(%r13,%r14,8),$mask10
+       vmovdqu 0x00-0x80($inp),$roundkey
+___
+                                       if ($SZ==4) {   # SHA256
+    my @X = map("%xmm$_",(0..3));
+    my ($t0,$t1,$t2,$t3) = map("%xmm$_",(4..7));
+
+$code.=<<___;
+       jmp     .Lloop_avx
+.align 16
+.Lloop_avx:
+       vmovdqa $TABLE+`$SZ*2*$rounds`(%rip),$t3
+       vmovdqu 0x00(%rsi,%r12),@X[0]
+       vmovdqu 0x10(%rsi,%r12),@X[1]
+       vmovdqu 0x20(%rsi,%r12),@X[2]
+       vmovdqu 0x30(%rsi,%r12),@X[3]
+       vpshufb $t3,@X[0],@X[0]
+       lea     $TABLE(%rip),$Tbl
+       vpshufb $t3,@X[1],@X[1]
+       vpshufb $t3,@X[2],@X[2]
+       vpaddd  0x00($Tbl),@X[0],$t0
+       vpshufb $t3,@X[3],@X[3]
+       vpaddd  0x20($Tbl),@X[1],$t1
+       vpaddd  0x40($Tbl),@X[2],$t2
+       vpaddd  0x60($Tbl),@X[3],$t3
+       vmovdqa $t0,0x00(%rsp)
+       mov     $A,$a1
+       vmovdqa $t1,0x10(%rsp)
+       mov     $B,$a3
+       vmovdqa $t2,0x20(%rsp)
+       xor     $C,$a3                  # magic
+       vmovdqa $t3,0x30(%rsp)
+       mov     $E,$a0
+       jmp     .Lavx_00_47
+
+.align 16
+.Lavx_00_47:
+       sub     \$-16*2*$SZ,$Tbl        # size optimization
+       vmovdqu (%r12),$inout           # $a4
+       mov     %r12,$_inp              # $a4
+___
+sub Xupdate_256_AVX () {
+       (
+       '&vpalignr      ($t0,@X[1],@X[0],$SZ)', # X[1..4]
+        '&vpalignr     ($t3,@X[3],@X[2],$SZ)', # X[9..12]
+       '&vpsrld        ($t2,$t0,$sigma0[0]);',
+        '&vpaddd       (@X[0],@X[0],$t3)',     # X[0..3] += X[9..12]
+       '&vpsrld        ($t3,$t0,$sigma0[2])',
+       '&vpslld        ($t1,$t0,8*$SZ-$sigma0[1]);',
+       '&vpxor         ($t0,$t3,$t2)',
+        '&vpshufd      ($t3,@X[3],0b11111010)',# X[14..15]
+       '&vpsrld        ($t2,$t2,$sigma0[1]-$sigma0[0]);',
+       '&vpxor         ($t0,$t0,$t1)',
+       '&vpslld        ($t1,$t1,$sigma0[1]-$sigma0[0]);',
+       '&vpxor         ($t0,$t0,$t2)',
+        '&vpsrld       ($t2,$t3,$sigma1[2]);',
+       '&vpxor         ($t0,$t0,$t1)',         # sigma0(X[1..4])
+        '&vpsrlq       ($t3,$t3,$sigma1[0]);',
+       '&vpaddd        (@X[0],@X[0],$t0)',     # X[0..3] += sigma0(X[1..4])
+        '&vpxor        ($t2,$t2,$t3);',
+        '&vpsrlq       ($t3,$t3,$sigma1[1]-$sigma1[0])',
+        '&vpxor        ($t2,$t2,$t3)',         # sigma1(X[14..15])
+        '&vpshufd      ($t2,$t2,0b10000100)',
+        '&vpsrldq      ($t2,$t2,8)',
+       '&vpaddd        (@X[0],@X[0],$t2)',     # X[0..1] += sigma1(X[14..15])
+        '&vpshufd      ($t3,@X[0],0b01010000)',# X[16..17]
+        '&vpsrld       ($t2,$t3,$sigma1[2])',
+        '&vpsrlq       ($t3,$t3,$sigma1[0])',
+        '&vpxor        ($t2,$t2,$t3);',
+        '&vpsrlq       ($t3,$t3,$sigma1[1]-$sigma1[0])',
+        '&vpxor        ($t2,$t2,$t3)',
+        '&vpshufd      ($t2,$t2,0b11101000)',
+        '&vpslldq      ($t2,$t2,8)',
+       '&vpaddd        (@X[0],@X[0],$t2)'      # X[2..3] += sigma1(X[16..17])
+       );
+}
+
+sub AVX_256_00_47 () {
+my $j = shift;
+my $body = shift;
+my @X = @_;
+my @insns = (&$body,&$body,&$body,&$body);     # 104 instructions
+
+       foreach (Xupdate_256_AVX()) {           # 29 instructions
+           eval;
+           eval(shift(@insns));
+           eval(shift(@insns));
+           eval(shift(@insns));
+       }
+       &vpaddd         ($t2,@X[0],16*2*$j."($Tbl)");
+         foreach (@insns) { eval; }            # remaining instructions
+       &vmovdqa        (16*$j."(%rsp)",$t2);
+}
+
+    $aesni_cbc_idx=0;
+    for ($i=0,$j=0; $j<4; $j++) {
+       &AVX_256_00_47($j,\&body_00_15,@X);
+       push(@X,shift(@X));                     # rotate(@X)
+    }
+       &mov            ("%r12",$_inp);         # borrow $a4
+       &vpand          ($temp,$temp,$mask14);
+       &mov            ("%r15",$_out);         # borrow $a2
+       &vpor           ($iv,$iv,$temp);
+       &vmovdqu        ("(%r15,%r12)",$iv);    # write output
+       &lea            ("%r12","16(%r12)");    # inp++
+
+       &cmpb   ($SZ-1+16*2*$SZ."($Tbl)",0);
+       &jne    (".Lavx_00_47");
+
+       &vmovdqu        ($inout,"(%r12)");
+       &mov            ($_inp,"%r12");
+
+    $aesni_cbc_idx=0;
+    for ($i=0; $i<16; ) {
+       foreach(body_00_15()) { eval; }
+    }
+
+                                       }
+$code.=<<___;
+       mov     $_inp,%r12              # borrow $a4
+       mov     $_out,%r13              # borrow $a0
+       mov     $_ctx,%r15              # borrow $a2
+       mov     $_in0,%rsi              # borrow $a3
+
+       vpand   $mask14,$temp,$temp
+       mov     $a1,$A
+       vpor    $temp,$iv,$iv
+       vmovdqu $iv,(%r13,%r12)         # write output
+       lea     16(%r12),%r12           # inp++
+
+       add     $SZ*0(%r15),$A
+       add     $SZ*1(%r15),$B
+       add     $SZ*2(%r15),$C
+       add     $SZ*3(%r15),$D
+       add     $SZ*4(%r15),$E
+       add     $SZ*5(%r15),$F
+       add     $SZ*6(%r15),$G
+       add     $SZ*7(%r15),$H
+
+       cmp     $_end,%r12
+
+       mov     $A,$SZ*0(%r15)
+       mov     $B,$SZ*1(%r15)
+       mov     $C,$SZ*2(%r15)
+       mov     $D,$SZ*3(%r15)
+       mov     $E,$SZ*4(%r15)
+       mov     $F,$SZ*5(%r15)
+       mov     $G,$SZ*6(%r15)
+       mov     $H,$SZ*7(%r15)
+       jb      .Lloop_avx
+
+       mov     $_ivp,$ivp
+       mov     $_rsp,%rsi
+       vmovdqu $iv,($ivp)              # output IV
+       vzeroall
+___
+$code.=<<___ if ($win64);
+       movaps  `$framesz+16*0`(%rsp),%xmm6
+       movaps  `$framesz+16*1`(%rsp),%xmm7
+       movaps  `$framesz+16*2`(%rsp),%xmm8
+       movaps  `$framesz+16*3`(%rsp),%xmm9
+       movaps  `$framesz+16*4`(%rsp),%xmm10
+       movaps  `$framesz+16*5`(%rsp),%xmm11
+       movaps  `$framesz+16*6`(%rsp),%xmm12
+       movaps  `$framesz+16*7`(%rsp),%xmm13
+       movaps  `$framesz+16*8`(%rsp),%xmm14
+       movaps  `$framesz+16*9`(%rsp),%xmm15
+___
+$code.=<<___;
+       mov     (%rsi),%r15
+       mov     8(%rsi),%r14
+       mov     16(%rsi),%r13
+       mov     24(%rsi),%r12
+       mov     32(%rsi),%rbp
+       mov     40(%rsi),%rbx
+       lea     48(%rsi),%rsp
+.Lepilogue_avx:
+       ret
+.size  ${func}_avx,.-${func}_avx
+___
+
+if ($avx>1) {{
+######################################################################
+# AVX2+BMI code path
+#
+my $a5=$SZ==4?"%esi":"%rsi";   # zap $inp 
+my $PUSH8=8*2*$SZ;
+use integer;
+
+sub bodyx_00_15 () {
+       # at start $a1 should be zero, $a3 - $b^$c and $a4 copy of $f
+       (
+       '($a,$b,$c,$d,$e,$f,$g,$h)=@ROT;'.
+
+       '&add   ($h,(32*($i/(16/$SZ))+$SZ*($i%(16/$SZ)))%$PUSH8.$base)',    # h+=X[i]+K[i]
+       '&and   ($a4,$e)',              # f&e
+       '&rorx  ($a0,$e,$Sigma1[2])',
+       '&rorx  ($a2,$e,$Sigma1[1])',
+
+       '&lea   ($a,"($a,$a1)")',       # h+=Sigma0(a) from the past
+       '&lea   ($h,"($h,$a4)")',
+       '&andn  ($a4,$e,$g)',           # ~e&g
+       '&xor   ($a0,$a2)',
+
+       '&rorx  ($a1,$e,$Sigma1[0])',
+       '&lea   ($h,"($h,$a4)")',       # h+=Ch(e,f,g)=(e&f)+(~e&g)
+       '&xor   ($a0,$a1)',             # Sigma1(e)
+       '&mov   ($a2,$a)',
+
+       '&rorx  ($a4,$a,$Sigma0[2])',
+       '&lea   ($h,"($h,$a0)")',       # h+=Sigma1(e)
+       '&xor   ($a2,$b)',              # a^b, b^c in next round
+       '&rorx  ($a1,$a,$Sigma0[1])',
+
+       '&rorx  ($a0,$a,$Sigma0[0])',
+       '&lea   ($d,"($d,$h)")',        # d+=h
+       '&and   ($a3,$a2)',             # (b^c)&(a^b)
+       @aesni_cbc_block[$aesni_cbc_idx++].
+       '&xor   ($a1,$a4)',
+
+       '&xor   ($a3,$b)',              # Maj(a,b,c)=Ch(a^b,c,b)
+       '&xor   ($a1,$a0)',             # Sigma0(a)
+       '&lea   ($h,"($h,$a3)");'.      # h+=Maj(a,b,c)
+       '&mov   ($a4,$e)',              # copy of f in future
+
+       '($a2,$a3) = ($a3,$a2); unshift(@ROT,pop(@ROT)); $i++;'
+       );
+       # and at the finish one has to $a+=$a1
+}
+
+$code.=<<___;
+.type  ${func}_avx2,\@function,6
+.align 64
+${func}_avx2:
+.Lavx2_shortcut:
+       mov     `($win64?56:8)`(%rsp),$in0      # load 7th parameter
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+       mov     %rsp,%r11               # copy %rsp
+       sub     \$`2*$SZ*$rounds+8*8+$win64*16*10`,%rsp
+       and     \$-256*$SZ,%rsp         # align stack frame
+       add     \$`2*$SZ*($rounds-8)`,%rsp
+
+       shl     \$6,$len
+       sub     $inp,$out               # re-bias
+       sub     $inp,$in0
+       add     $inp,$len               # end of input
+
+       #mov    $inp,$_inp              # saved later
+       #mov    $out,$_out              # kept in $offload
+       mov     $len,$_end
+       #mov    $key,$_key              # remains resident in $inp register
+       mov     $ivp,$_ivp
+       mov     $ctx,$_ctx
+       mov     $in0,$_in0
+       mov     %r11,$_rsp
+___
+$code.=<<___ if ($win64);
+       movaps  %xmm6,`$framesz+16*0`(%rsp)
+       movaps  %xmm7,`$framesz+16*1`(%rsp)
+       movaps  %xmm8,`$framesz+16*2`(%rsp)
+       movaps  %xmm9,`$framesz+16*3`(%rsp)
+       movaps  %xmm10,`$framesz+16*4`(%rsp)
+       movaps  %xmm11,`$framesz+16*5`(%rsp)
+       movaps  %xmm12,`$framesz+16*6`(%rsp)
+       movaps  %xmm13,`$framesz+16*7`(%rsp)
+       movaps  %xmm14,`$framesz+16*8`(%rsp)
+       movaps  %xmm15,`$framesz+16*9`(%rsp)
+___
+$code.=<<___;
+.Lprologue_avx2:
+       vzeroall
+
+       mov     $inp,%r13               # borrow $a0
+       vpinsrq \$1,$out,$offload,$offload
+       lea     0x80($key),$inp         # size optimization, reassign
+       lea     $TABLE+`$SZ*2*$rounds+32`(%rip),%r12    # borrow $a4
+       mov     0xf0-0x80($inp),%r14d   # rounds, borrow $a1
+       mov     $ctx,%r15               # borrow $a2
+       mov     $in0,%rsi               # borrow $a3
+       vmovdqu ($ivp),$iv              # load IV
+       lea     -9(%r14),%r14
+
+       vmovdqa 0x00(%r12,%r14,8),$mask14
+       vmovdqa 0x10(%r12,%r14,8),$mask12
+       vmovdqa 0x20(%r12,%r14,8),$mask10
+
+       sub     \$-16*$SZ,%r13          # inp++, size optimization
+       mov     $SZ*0(%r15),$A
+       lea     (%rsi,%r13),%r12        # borrow $a0
+       mov     $SZ*1(%r15),$B
+       cmp     $len,%r13               # $_end
+       mov     $SZ*2(%r15),$C
+       cmove   %rsp,%r12               # next block or random data
+       mov     $SZ*3(%r15),$D
+       mov     $SZ*4(%r15),$E
+       mov     $SZ*5(%r15),$F
+       mov     $SZ*6(%r15),$G
+       mov     $SZ*7(%r15),$H
+       vmovdqu 0x00-0x80($inp),$roundkey
+___
+                                       if ($SZ==4) {   # SHA256
+    my @X = map("%ymm$_",(0..3));
+    my ($t0,$t1,$t2,$t3) = map("%ymm$_",(4..7));
+
+$code.=<<___;
+       jmp     .Loop_avx2
+.align 16
+.Loop_avx2:
+       vmovdqa $TABLE+`$SZ*2*$rounds`(%rip),$t3
+       vmovdqu -16*$SZ+0(%rsi,%r13),%xmm0
+       vmovdqu -16*$SZ+16(%rsi,%r13),%xmm1
+       vmovdqu -16*$SZ+32(%rsi,%r13),%xmm2
+       vmovdqu -16*$SZ+48(%rsi,%r13),%xmm3
+
+       vinserti128     \$1,(%r12),@X[0],@X[0]
+       vinserti128     \$1,16(%r12),@X[1],@X[1]
+        vpshufb        $t3,@X[0],@X[0]
+       vinserti128     \$1,32(%r12),@X[2],@X[2]
+        vpshufb        $t3,@X[1],@X[1]
+       vinserti128     \$1,48(%r12),@X[3],@X[3]
+
+       lea     $TABLE(%rip),$Tbl
+       vpshufb $t3,@X[2],@X[2]
+       lea     -16*$SZ(%r13),%r13
+       vpaddd  0x00($Tbl),@X[0],$t0
+       vpshufb $t3,@X[3],@X[3]
+       vpaddd  0x20($Tbl),@X[1],$t1
+       vpaddd  0x40($Tbl),@X[2],$t2
+       vpaddd  0x60($Tbl),@X[3],$t3
+       vmovdqa $t0,0x00(%rsp)
+       xor     $a1,$a1
+       vmovdqa $t1,0x20(%rsp)
+       lea     -$PUSH8(%rsp),%rsp
+       mov     $B,$a3
+       vmovdqa $t2,0x00(%rsp)
+       xor     $C,$a3                  # magic
+       vmovdqa $t3,0x20(%rsp)
+       mov     $F,$a4
+       sub     \$-16*2*$SZ,$Tbl        # size optimization
+       jmp     .Lavx2_00_47
+
+.align 16
+.Lavx2_00_47:
+       vmovdqu (%r13),$inout
+       vpinsrq \$0,%r13,$offload,$offload
+___
+
+sub AVX2_256_00_47 () {
+my $j = shift;
+my $body = shift;
+my @X = @_;
+my @insns = (&$body,&$body,&$body,&$body);     # 96 instructions
+my $base = "+2*$PUSH8(%rsp)";
+
+       &lea    ("%rsp","-$PUSH8(%rsp)")        if (($j%2)==0);
+       foreach (Xupdate_256_AVX()) {           # 29 instructions
+           eval;
+           eval(shift(@insns));
+           eval(shift(@insns));
+           eval(shift(@insns));
+       }
+       &vpaddd         ($t2,@X[0],16*2*$j."($Tbl)");
+         foreach (@insns) { eval; }            # remaining instructions
+       &vmovdqa        ((32*$j)%$PUSH8."(%rsp)",$t2);
+}
+    $aesni_cbc_idx=0;
+    for ($i=0,$j=0; $j<4; $j++) {
+       &AVX2_256_00_47($j,\&bodyx_00_15,@X);
+       push(@X,shift(@X));                     # rotate(@X)
+    }
+       &vmovq          ("%r13",$offload);      # borrow $a0
+       &vpextrq        ("%r15",$offload,1);    # borrow $a2
+       &vpand          ($temp,$temp,$mask14);
+       &vpor           ($iv,$iv,$temp);
+       &vmovdqu        ("(%r15,%r13)",$iv);    # write output
+       &lea            ("%r13","16(%r13)");    # inp++
+
+       &lea    ($Tbl,16*2*$SZ."($Tbl)");
+       &cmpb   (($SZ-1)."($Tbl)",0);
+       &jne    (".Lavx2_00_47");
+
+       &vmovdqu        ($inout,"(%r13)");
+       &vpinsrq        ($offload,$offload,"%r13",0);
+
+    $aesni_cbc_idx=0;
+    for ($i=0; $i<16; ) {
+       my $base=$i<8?"+$PUSH8(%rsp)":"(%rsp)";
+       foreach(bodyx_00_15()) { eval; }
+    }
+                                       }
+$code.=<<___;
+       vpextrq \$1,$offload,%r12               # $_out, borrow $a4
+       vmovq   $offload,%r13                   # $_inp, borrow $a0
+       mov     `2*$SZ*$rounds+5*8`(%rsp),%r15  # $_ctx, borrow $a2
+       add     $a1,$A
+       lea     `2*$SZ*($rounds-8)`(%rsp),$Tbl
+
+       vpand   $mask14,$temp,$temp
+       vpor    $temp,$iv,$iv
+       vmovdqu $iv,(%r12,%r13)                 # write output
+       lea     16(%r13),%r13
+
+       add     $SZ*0(%r15),$A
+       add     $SZ*1(%r15),$B
+       add     $SZ*2(%r15),$C
+       add     $SZ*3(%r15),$D
+       add     $SZ*4(%r15),$E
+       add     $SZ*5(%r15),$F
+       add     $SZ*6(%r15),$G
+       add     $SZ*7(%r15),$H
+
+       mov     $A,$SZ*0(%r15)
+       mov     $B,$SZ*1(%r15)
+       mov     $C,$SZ*2(%r15)
+       mov     $D,$SZ*3(%r15)
+       mov     $E,$SZ*4(%r15)
+       mov     $F,$SZ*5(%r15)
+       mov     $G,$SZ*6(%r15)
+       mov     $H,$SZ*7(%r15)
+
+       cmp     `$PUSH8+2*8`($Tbl),%r13         # $_end
+       je      .Ldone_avx2
+
+       xor     $a1,$a1
+       mov     $B,$a3
+       mov     $F,$a4
+       xor     $C,$a3                  # magic
+       jmp     .Lower_avx2
+.align 16
+.Lower_avx2:
+       vmovdqu (%r13),$inout
+       vpinsrq \$0,%r13,$offload,$offload
+___
+    $aesni_cbc_idx=0;
+    for ($i=0; $i<16; ) {
+       my $base="+16($Tbl)";
+       foreach(bodyx_00_15()) { eval; }
+       &lea    ($Tbl,"-$PUSH8($Tbl)")  if ($i==8);
+    }
+$code.=<<___;
+       vmovq   $offload,%r13                   # borrow $a0
+       vpextrq \$1,$offload,%r15               # borrow $a2
+       vpand   $mask14,$temp,$temp
+       vpor    $temp,$iv,$iv
+       lea     -$PUSH8($Tbl),$Tbl
+       vmovdqu $iv,(%r15,%r13)                 # write output
+       lea     16(%r13),%r13                   # inp++
+       cmp     %rsp,$Tbl
+       jae     .Lower_avx2
+
+       mov     `2*$SZ*$rounds+5*8`(%rsp),%r15  # $_ctx, borrow $a2
+       lea     16*$SZ(%r13),%r13
+       mov     `2*$SZ*$rounds+6*8`(%rsp),%rsi  # $_in0, borrow $a3
+       add     $a1,$A
+       lea     `2*$SZ*($rounds-8)`(%rsp),%rsp
+
+       add     $SZ*0(%r15),$A
+       add     $SZ*1(%r15),$B
+       add     $SZ*2(%r15),$C
+       add     $SZ*3(%r15),$D
+       add     $SZ*4(%r15),$E
+       add     $SZ*5(%r15),$F
+       add     $SZ*6(%r15),$G
+       lea     (%rsi,%r13),%r12
+       add     $SZ*7(%r15),$H
+
+       cmp     $_end,%r13
+
+       mov     $A,$SZ*0(%r15)
+       cmove   %rsp,%r12               # next block or stale data
+       mov     $B,$SZ*1(%r15)
+       mov     $C,$SZ*2(%r15)
+       mov     $D,$SZ*3(%r15)
+       mov     $E,$SZ*4(%r15)
+       mov     $F,$SZ*5(%r15)
+       mov     $G,$SZ*6(%r15)
+       mov     $H,$SZ*7(%r15)
+
+       jbe     .Loop_avx2
+       lea     (%rsp),$Tbl
+
+.Ldone_avx2:
+       lea     ($Tbl),%rsp
+       mov     $_ivp,$ivp
+       mov     $_rsp,%rsi
+       vmovdqu $iv,($ivp)              # output IV
+       vzeroall
+___
+$code.=<<___ if ($win64);
+       movaps  `$framesz+16*0`(%rsp),%xmm6
+       movaps  `$framesz+16*1`(%rsp),%xmm7
+       movaps  `$framesz+16*2`(%rsp),%xmm8
+       movaps  `$framesz+16*3`(%rsp),%xmm9
+       movaps  `$framesz+16*4`(%rsp),%xmm10
+       movaps  `$framesz+16*5`(%rsp),%xmm11
+       movaps  `$framesz+16*6`(%rsp),%xmm12
+       movaps  `$framesz+16*7`(%rsp),%xmm13
+       movaps  `$framesz+16*8`(%rsp),%xmm14
+       movaps  `$framesz+16*9`(%rsp),%xmm15
+___
+$code.=<<___;
+       mov     (%rsi),%r15
+       mov     8(%rsi),%r14
+       mov     16(%rsi),%r13
+       mov     24(%rsi),%r12
+       mov     32(%rsi),%rbp
+       mov     40(%rsi),%rbx
+       lea     48(%rsi),%rsp
+.Lepilogue_avx2:
+       ret
+.size  ${func}_avx2,.-${func}_avx2
+___
+}}
+}}
+{{
+my ($in0,$out,$len,$key,$ivp,$ctx,$inp)=("%rdi","%rsi","%rdx","%rcx","%r8","%r9","%r10");
+
+my ($rounds,$Tbl)=("%r11d","%rbx");
+
+my ($iv,$in,$rndkey0)=map("%xmm$_",(6,14,15));
+my @rndkey=("%xmm4","%xmm5");
+my $r=0;
+my $sn=0;
+
+my ($Wi,$ABEF,$CDGH,$TMP,$BSWAP,$ABEF_SAVE,$CDGH_SAVE)=map("%xmm$_",(0..3,7..9));
+my @MSG=map("%xmm$_",(10..13));
+
+my $aesenc=sub {
+  use integer;
+  my ($n,$k)=($r/10,$r%10);
+    if ($k==0) {
+      $code.=<<___;
+       movups          `16*$n`($in0),$in               # load input
+       xorps           $rndkey0,$in
+___
+      $code.=<<___ if ($n);
+       movups          $iv,`16*($n-1)`($out,$in0)      # write output
+___
+      $code.=<<___;
+       xorps           $in,$iv
+       movups          `32+16*$k-112`($key),$rndkey[1]
+       aesenc          $rndkey[0],$iv
+___
+    } elsif ($k==9) {
+      $sn++;
+      $code.=<<___;
+       cmp             \$11,$rounds
+       jb              .Laesenclast$sn
+       movups          `32+16*($k+0)-112`($key),$rndkey[1]
+       aesenc          $rndkey[0],$iv
+       movups          `32+16*($k+1)-112`($key),$rndkey[0]
+       aesenc          $rndkey[1],$iv
+       je              .Laesenclast$sn
+       movups          `32+16*($k+2)-112`($key),$rndkey[1]
+       aesenc          $rndkey[0],$iv
+       movups          `32+16*($k+3)-112`($key),$rndkey[0]
+       aesenc          $rndkey[1],$iv
+.Laesenclast$sn:
+       aesenclast      $rndkey[0],$iv
+       movups          16-112($key),$rndkey[1]         # forward reference
+       nop
+___
+    } else {
+      $code.=<<___;
+       movups          `32+16*$k-112`($key),$rndkey[1]
+       aesenc          $rndkey[0],$iv
+___
+    }
+    $r++;      unshift(@rndkey,pop(@rndkey));
+};
+
+if ($shaext) {
+my $Tbl="%rax";
+
+$code.=<<___;
+.type  ${func}_shaext,\@function,6
+.align 32
+${func}_shaext:
+       mov     `($win64?56:8)`(%rsp),$inp      # load 7th argument
+___
+$code.=<<___ if ($win64);
+       lea     `-8-10*16`(%rsp),%rsp
+       movaps  %xmm6,-8-10*16(%rax)
+       movaps  %xmm7,-8-9*16(%rax)
+       movaps  %xmm8,-8-8*16(%rax)
+       movaps  %xmm9,-8-7*16(%rax)
+       movaps  %xmm10,-8-6*16(%rax)
+       movaps  %xmm11,-8-5*16(%rax)
+       movaps  %xmm12,-8-4*16(%rax)
+       movaps  %xmm13,-8-3*16(%rax)
+       movaps  %xmm14,-8-2*16(%rax)
+       movaps  %xmm15,-8-1*16(%rax)
+.Lprologue_shaext:
+___
+$code.=<<___;
+       lea             K256+0x80(%rip),$Tbl
+       movdqu          ($ctx),$ABEF            # DCBA
+       movdqu          16($ctx),$CDGH          # HGFE
+       movdqa          0x200-0x80($Tbl),$TMP   # byte swap mask
+
+       mov             240($key),$rounds
+       sub             $in0,$out
+       movups          ($key),$rndkey0         # $key[0]
+       movups          16($key),$rndkey[0]     # forward reference
+       lea             112($key),$key          # size optimization
+
+       pshufd          \$0x1b,$ABEF,$Wi        # ABCD
+       pshufd          \$0xb1,$ABEF,$ABEF      # CDAB
+       pshufd          \$0x1b,$CDGH,$CDGH      # EFGH
+       movdqa          $TMP,$BSWAP             # offload
+       palignr         \$8,$CDGH,$ABEF         # ABEF
+       punpcklqdq      $Wi,$CDGH               # CDGH
+
+       jmp     .Loop_shaext
+
+.align 16
+.Loop_shaext:
+       movdqu          ($inp),@MSG[0]
+       movdqu          0x10($inp),@MSG[1]
+       movdqu          0x20($inp),@MSG[2]
+       pshufb          $TMP,@MSG[0]
+       movdqu          0x30($inp),@MSG[3]
+
+       movdqa          0*32-0x80($Tbl),$Wi
+       paddd           @MSG[0],$Wi
+       pshufb          $TMP,@MSG[1]
+       movdqa          $CDGH,$CDGH_SAVE        # offload
+       movdqa          $ABEF,$ABEF_SAVE        # offload
+___
+       &$aesenc();
+$code.=<<___;
+       sha256rnds2     $ABEF,$CDGH             # 0-3
+       pshufd          \$0x0e,$Wi,$Wi
+___
+       &$aesenc();
+$code.=<<___;
+       sha256rnds2     $CDGH,$ABEF
+
+       movdqa          1*32-0x80($Tbl),$Wi
+       paddd           @MSG[1],$Wi
+       pshufb          $TMP,@MSG[2]
+       lea             0x40($inp),$inp
+___
+       &$aesenc();
+$code.=<<___;
+       sha256rnds2     $ABEF,$CDGH             # 4-7
+       pshufd          \$0x0e,$Wi,$Wi
+___
+       &$aesenc();
+$code.=<<___;
+       sha256rnds2     $CDGH,$ABEF
+
+       movdqa          2*32-0x80($Tbl),$Wi
+       paddd           @MSG[2],$Wi
+       pshufb          $TMP,@MSG[3]
+       sha256msg1      @MSG[1],@MSG[0]
+___
+       &$aesenc();
+$code.=<<___;
+       sha256rnds2     $ABEF,$CDGH             # 8-11
+       pshufd          \$0x0e,$Wi,$Wi
+       movdqa          @MSG[3],$TMP
+       palignr         \$4,@MSG[2],$TMP
+       paddd           $TMP,@MSG[0]
+___
+       &$aesenc();
+$code.=<<___;
+       sha256rnds2     $CDGH,$ABEF
+
+       movdqa          3*32-0x80($Tbl),$Wi
+       paddd           @MSG[3],$Wi
+       sha256msg2      @MSG[3],@MSG[0]
+       sha256msg1      @MSG[2],@MSG[1]
+___
+       &$aesenc();
+$code.=<<___;
+       sha256rnds2     $ABEF,$CDGH             # 12-15
+       pshufd          \$0x0e,$Wi,$Wi
+___
+       &$aesenc();
+$code.=<<___;
+       movdqa          @MSG[0],$TMP
+       palignr         \$4,@MSG[3],$TMP
+       paddd           $TMP,@MSG[1]
+       sha256rnds2     $CDGH,$ABEF
+___
+for($i=4;$i<16-3;$i++) {
+       &$aesenc()      if (($r%10)==0);
+$code.=<<___;
+       movdqa          $i*32-0x80($Tbl),$Wi
+       paddd           @MSG[0],$Wi
+       sha256msg2      @MSG[0],@MSG[1]
+       sha256msg1      @MSG[3],@MSG[2]
+___
+       &$aesenc();
+$code.=<<___;
+       sha256rnds2     $ABEF,$CDGH             # 16-19...
+       pshufd          \$0x0e,$Wi,$Wi
+       movdqa          @MSG[1],$TMP
+       palignr         \$4,@MSG[0],$TMP
+       paddd           $TMP,@MSG[2]
+___
+       &$aesenc();
+       &$aesenc()      if ($r==19);
+$code.=<<___;
+       sha256rnds2     $CDGH,$ABEF
+___
+       push(@MSG,shift(@MSG));
+}
+$code.=<<___;
+       movdqa          13*32-0x80($Tbl),$Wi
+       paddd           @MSG[0],$Wi
+       sha256msg2      @MSG[0],@MSG[1]
+       sha256msg1      @MSG[3],@MSG[2]
+___
+       &$aesenc();
+$code.=<<___;
+       sha256rnds2     $ABEF,$CDGH             # 52-55
+       pshufd          \$0x0e,$Wi,$Wi
+       movdqa          @MSG[1],$TMP
+       palignr         \$4,@MSG[0],$TMP
+       paddd           $TMP,@MSG[2]
+___
+       &$aesenc();
+       &$aesenc();
+$code.=<<___;
+       sha256rnds2     $CDGH,$ABEF
+
+       movdqa          14*32-0x80($Tbl),$Wi
+       paddd           @MSG[1],$Wi
+       sha256msg2      @MSG[1],@MSG[2]
+       movdqa          $BSWAP,$TMP
+___
+       &$aesenc();
+$code.=<<___;
+       sha256rnds2     $ABEF,$CDGH             # 56-59
+       pshufd          \$0x0e,$Wi,$Wi
+___
+       &$aesenc();
+$code.=<<___;
+       sha256rnds2     $CDGH,$ABEF
+
+       movdqa          15*32-0x80($Tbl),$Wi
+       paddd           @MSG[2],$Wi
+___
+       &$aesenc();
+       &$aesenc();
+$code.=<<___;
+       sha256rnds2     $ABEF,$CDGH             # 60-63
+       pshufd          \$0x0e,$Wi,$Wi
+___
+       &$aesenc();
+$code.=<<___;
+       sha256rnds2     $CDGH,$ABEF
+       #pxor           $CDGH,$rndkey0          # black magic
+___
+       while ($r<40)   { &$aesenc(); }         # remaining aesenc's
+$code.=<<___;
+       #xorps          $CDGH,$rndkey0          # black magic
+       paddd           $CDGH_SAVE,$CDGH
+       paddd           $ABEF_SAVE,$ABEF
+
+       dec             $len
+       movups          $iv,48($out,$in0)       # write output
+       lea             64($in0),$in0
+       jnz             .Loop_shaext
+
+       pshufd          \$0xb1,$CDGH,$CDGH      # DCHG
+       pshufd          \$0x1b,$ABEF,$TMP       # FEBA
+       pshufd          \$0xb1,$ABEF,$ABEF      # BAFE
+       punpckhqdq      $CDGH,$ABEF             # DCBA
+       palignr         \$8,$TMP,$CDGH          # HGFE
+
+       movups          $iv,($ivp)              # write IV
+       movdqu          $ABEF,($ctx)
+       movdqu          $CDGH,16($ctx)
+___
+$code.=<<___ if ($win64);
+       movaps  0*16(%rsp),%xmm6
+       movaps  1*16(%rsp),%xmm7
+       movaps  2*16(%rsp),%xmm8
+       movaps  3*16(%rsp),%xmm9
+       movaps  4*16(%rsp),%xmm10
+       movaps  5*16(%rsp),%xmm11
+       movaps  6*16(%rsp),%xmm12
+       movaps  7*16(%rsp),%xmm13
+       movaps  8*16(%rsp),%xmm14
+       movaps  9*16(%rsp),%xmm15
+       lea     8+10*16(%rsp),%rsp
+.Lepilogue_shaext:
+___
+$code.=<<___;
+       ret
+.size  ${func}_shaext,.-${func}_shaext
+___
+}
+}}}}}
+
+# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
+#              CONTEXT *context,DISPATCHER_CONTEXT *disp)
+if ($win64 && $avx) {
+$rec="%rcx";
+$frame="%rdx";
+$context="%r8";
+$disp="%r9";
+
+$code.=<<___;
+.extern        __imp_RtlVirtualUnwind
+.type  se_handler,\@abi-omnipotent
+.align 16
+se_handler:
+       push    %rsi
+       push    %rdi
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+       pushfq
+       sub     \$64,%rsp
+
+       mov     120($context),%rax      # pull context->Rax
+       mov     248($context),%rbx      # pull context->Rip
+
+       mov     8($disp),%rsi           # disp->ImageBase
+       mov     56($disp),%r11          # disp->HanderlData
+
+       mov     0(%r11),%r10d           # HandlerData[0]
+       lea     (%rsi,%r10),%r10        # prologue label
+       cmp     %r10,%rbx               # context->Rip<prologue label
+       jb      .Lin_prologue
+
+       mov     152($context),%rax      # pull context->Rsp
+
+       mov     4(%r11),%r10d           # HandlerData[1]
+       lea     (%rsi,%r10),%r10        # epilogue label
+       cmp     %r10,%rbx               # context->Rip>=epilogue label
+       jae     .Lin_prologue
+___
+$code.=<<___ if ($shaext);
+       lea     aesni_cbc_sha256_enc_shaext(%rip),%r10
+       cmp     %r10,%rbx
+       jb      .Lnot_in_shaext
+
+       lea     (%rax),%rsi
+       lea     512($context),%rdi      # &context.Xmm6
+       mov     \$20,%ecx
+       .long   0xa548f3fc              # cld; rep movsq
+       lea     168(%rax),%rax          # adjust stack pointer
+       jmp     .Lin_prologue
+.Lnot_in_shaext:
+___
+$code.=<<___ if ($avx>1);
+       lea     .Lavx2_shortcut(%rip),%r10
+       cmp     %r10,%rbx               # context->Rip<avx2_shortcut
+       jb      .Lnot_in_avx2
+
+       and     \$-256*$SZ,%rax
+       add     \$`2*$SZ*($rounds-8)`,%rax
+.Lnot_in_avx2:
+___
+$code.=<<___;
+       mov     %rax,%rsi               # put aside Rsp
+       mov     16*$SZ+7*8(%rax),%rax   # pull $_rsp
+       lea     48(%rax),%rax
+
+       mov     -8(%rax),%rbx
+       mov     -16(%rax),%rbp
+       mov     -24(%rax),%r12
+       mov     -32(%rax),%r13
+       mov     -40(%rax),%r14
+       mov     -48(%rax),%r15
+       mov     %rbx,144($context)      # restore context->Rbx
+       mov     %rbp,160($context)      # restore context->Rbp
+       mov     %r12,216($context)      # restore context->R12
+       mov     %r13,224($context)      # restore context->R13
+       mov     %r14,232($context)      # restore context->R14
+       mov     %r15,240($context)      # restore context->R15
+
+       lea     16*$SZ+8*8(%rsi),%rsi   # Xmm6- save area
+       lea     512($context),%rdi      # &context.Xmm6
+       mov     \$20,%ecx
+       .long   0xa548f3fc              # cld; rep movsq
+
+.Lin_prologue:
+       mov     8(%rax),%rdi
+       mov     16(%rax),%rsi
+       mov     %rax,152($context)      # restore context->Rsp
+       mov     %rsi,168($context)      # restore context->Rsi
+       mov     %rdi,176($context)      # restore context->Rdi
+
+       mov     40($disp),%rdi          # disp->ContextRecord
+       mov     $context,%rsi           # context
+       mov     \$154,%ecx              # sizeof(CONTEXT)
+       .long   0xa548f3fc              # cld; rep movsq
+
+       mov     $disp,%rsi
+       xor     %rcx,%rcx               # arg1, UNW_FLAG_NHANDLER
+       mov     8(%rsi),%rdx            # arg2, disp->ImageBase
+       mov     0(%rsi),%r8             # arg3, disp->ControlPc
+       mov     16(%rsi),%r9            # arg4, disp->FunctionEntry
+       mov     40(%rsi),%r10           # disp->ContextRecord
+       lea     56(%rsi),%r11           # &disp->HandlerData
+       lea     24(%rsi),%r12           # &disp->EstablisherFrame
+       mov     %r10,32(%rsp)           # arg5
+       mov     %r11,40(%rsp)           # arg6
+       mov     %r12,48(%rsp)           # arg7
+       mov     %rcx,56(%rsp)           # arg8, (NULL)
+       call    *__imp_RtlVirtualUnwind(%rip)
+
+       mov     \$1,%eax                # ExceptionContinueSearch
+       add     \$64,%rsp
+       popfq
+       pop     %r15
+       pop     %r14
+       pop     %r13
+       pop     %r12
+       pop     %rbp
+       pop     %rbx
+       pop     %rdi
+       pop     %rsi
+       ret
+.size  se_handler,.-se_handler
+
+.section       .pdata
+       .rva    .LSEH_begin_${func}_xop
+       .rva    .LSEH_end_${func}_xop
+       .rva    .LSEH_info_${func}_xop
+
+       .rva    .LSEH_begin_${func}_avx
+       .rva    .LSEH_end_${func}_avx
+       .rva    .LSEH_info_${func}_avx
+___
+$code.=<<___ if ($avx>1);
+       .rva    .LSEH_begin_${func}_avx2
+       .rva    .LSEH_end_${func}_avx2
+       .rva    .LSEH_info_${func}_avx2
+___
+$code.=<<___ if ($shaext);
+       .rva    .LSEH_begin_${func}_shaext
+       .rva    .LSEH_end_${func}_shaext
+       .rva    .LSEH_info_${func}_shaext
+___
+$code.=<<___;
+.section       .xdata
+.align 8
+.LSEH_info_${func}_xop:
+       .byte   9,0,0,0
+       .rva    se_handler
+       .rva    .Lprologue_xop,.Lepilogue_xop           # HandlerData[]
+
+.LSEH_info_${func}_avx:
+       .byte   9,0,0,0
+       .rva    se_handler
+       .rva    .Lprologue_avx,.Lepilogue_avx           # HandlerData[]
+___
+$code.=<<___ if ($avx>1);
+.LSEH_info_${func}_avx2:
+       .byte   9,0,0,0
+       .rva    se_handler
+       .rva    .Lprologue_avx2,.Lepilogue_avx2         # HandlerData[]
+___
+$code.=<<___ if ($shaext);
+.LSEH_info_${func}_shaext:
+       .byte   9,0,0,0
+       .rva    se_handler
+       .rva    .Lprologue_shaext,.Lepilogue_shaext     # HandlerData[]
+___
+}
+
+####################################################################
+sub rex {
+  local *opcode=shift;
+  my ($dst,$src)=@_;
+  my $rex=0;
+
+    $rex|=0x04                 if($dst>=8);
+    $rex|=0x01                 if($src>=8);
+    unshift @opcode,$rex|0x40  if($rex);
+}
+
+{
+  my %opcodelet = (
+               "sha256rnds2" => 0xcb,
+               "sha256msg1"  => 0xcc,
+               "sha256msg2"  => 0xcd   );
+
+  sub sha256op38 {
+    my $instr = shift;
+
+    if (defined($opcodelet{$instr}) && @_[0] =~ /%xmm([0-9]+),\s*%xmm([0-9]+)/) {
+      my @opcode=(0x0f,0x38);
+       rex(\@opcode,$2,$1);
+       push @opcode,$opcodelet{$instr};
+       push @opcode,0xc0|($1&7)|(($2&7)<<3);           # ModR/M
+       return ".byte\t".join(',',@opcode);
+    } else {
+       return $instr."\t".@_[0];
+    }
+  }
+}
+
+$code =~ s/\`([^\`]*)\`/eval $1/gem;
+$code =~ s/\b(sha256[^\s]*)\s+(.*)/sha256op38($1,$2)/gem;
+print $code;
+close STDOUT;
index 3dc345b..f67df8c 100644 (file)
@@ -1,7 +1,7 @@
 #!/usr/bin/env perl
 
 # ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
 # details see http://www.openssl.org/~appro/cryptogams/.
 # Add aesni_xts_[en|de]crypt. Westmere spends 1.50 cycles processing
 # one byte out of 8KB with 128-bit key, Sandy Bridge - 1.09.
 
+######################################################################
+# Current large-block performance in cycles per byte processed with
+# 128-bit key (less is better).
+#
+#              CBC en-/decrypt CTR     XTS     ECB
+# Westmere     3.77/1.37       1.37    1.52    1.27
+# * Bridge     5.07/0.98       0.99    1.09    0.91
+# Haswell      4.44/0.80       0.97    1.03    0.72
+# Silvermont   5.77/3.56       3.67    4.03    3.46
+# Bulldozer    5.80/0.98       1.05    1.24    0.93
+
 $PREFIX="aesni";       # if $PREFIX is set to "AES", the script
                        # generates drop-in replacement for
                        # crypto/aes/asm/aes-586.pl:-)
@@ -54,8 +65,11 @@ require "x86asm.pl";
 
 &asm_init($ARGV[0],$0);
 
-if ($PREFIX eq "aesni")        { $movekey=*movups; }
-else                   { $movekey=*movups; }
+&external_label("OPENSSL_ia32cap_P");
+&static_label("key_const");
+
+if ($PREFIX eq "aesni")        { $movekey=\&movups; }
+else                   { $movekey=\&movups; }
 
 $len="eax";
 $rounds="ecx";
@@ -170,7 +184,10 @@ sub aesni_generate1        # fully unrolled loop
        {   &aesni_inline_generate1("enc");     }
        else
        {   &call       ("_aesni_encrypt1");    }
+       &pxor   ($rndkey0,$rndkey0);            # clear register bank
+       &pxor   ($rndkey1,$rndkey1);
        &movups (&QWP(0,"eax"),$inout0);
+       &pxor   ($inout0,$inout0);
        &ret    ();
 &function_end_B("${PREFIX}_encrypt");
 
@@ -186,7 +203,10 @@ sub aesni_generate1        # fully unrolled loop
        {   &aesni_inline_generate1("dec");     }
        else
        {   &call       ("_aesni_decrypt1");    }
+       &pxor   ($rndkey0,$rndkey0);            # clear register bank
+       &pxor   ($rndkey1,$rndkey1);
        &movups (&QWP(0,"eax"),$inout0);
+       &pxor   ($inout0,$inout0);
        &ret    ();
 &function_end_B("${PREFIX}_decrypt");
 
@@ -196,37 +216,71 @@ sub aesni_generate1       # fully unrolled loop
 # every *2nd* cycle. Thus 3x interleave was the one providing optimal
 # utilization, i.e. when subroutine's throughput is virtually same as
 # of non-interleaved subroutine [for number of input blocks up to 3].
-# This is why it makes no sense to implement 2x subroutine.
-# aes[enc|dec] latency in next processor generation is 8, but the
-# instructions can be scheduled every cycle. Optimal interleave for
-# new processor is therefore 8x, but it's unfeasible to accommodate it
-# in XMM registers addreassable in 32-bit mode and therefore 6x is
-# used instead...
+# This is why it originally made no sense to implement 2x subroutine.
+# But times change and it became appropriate to spend extra 192 bytes
+# on 2x subroutine on Atom Silvermont account. For processors that
+# can schedule aes[enc|dec] every cycle optimal interleave factor
+# equals to corresponding instructions latency. 8x is optimal for
+# * Bridge, but it's unfeasible to accommodate such implementation
+# in XMM registers addreassable in 32-bit mode and therefore maximum
+# of 6x is used instead...
+
+sub aesni_generate2
+{ my $p=shift;
+
+    &function_begin_B("_aesni_${p}rypt2");
+       &$movekey       ($rndkey0,&QWP(0,$key));
+       &shl            ($rounds,4);
+       &$movekey       ($rndkey1,&QWP(16,$key));
+       &xorps          ($inout0,$rndkey0);
+       &pxor           ($inout1,$rndkey0);
+       &$movekey       ($rndkey0,&QWP(32,$key));
+       &lea            ($key,&DWP(32,$key,$rounds));
+       &neg            ($rounds);
+       &add            ($rounds,16);
+
+    &set_label("${p}2_loop");
+       eval"&aes${p}   ($inout0,$rndkey1)";
+       eval"&aes${p}   ($inout1,$rndkey1)";
+       &$movekey       ($rndkey1,&QWP(0,$key,$rounds));
+       &add            ($rounds,32);
+       eval"&aes${p}   ($inout0,$rndkey0)";
+       eval"&aes${p}   ($inout1,$rndkey0)";
+       &$movekey       ($rndkey0,&QWP(-16,$key,$rounds));
+       &jnz            (&label("${p}2_loop"));
+    eval"&aes${p}      ($inout0,$rndkey1)";
+    eval"&aes${p}      ($inout1,$rndkey1)";
+    eval"&aes${p}last  ($inout0,$rndkey0)";
+    eval"&aes${p}last  ($inout1,$rndkey0)";
+    &ret();
+    &function_end_B("_aesni_${p}rypt2");
+}
 
 sub aesni_generate3
 { my $p=shift;
 
     &function_begin_B("_aesni_${p}rypt3");
        &$movekey       ($rndkey0,&QWP(0,$key));
-       &shr            ($rounds,1);
+       &shl            ($rounds,4);
        &$movekey       ($rndkey1,&QWP(16,$key));
-       &lea            ($key,&DWP(32,$key));
        &xorps          ($inout0,$rndkey0);
        &pxor           ($inout1,$rndkey0);
        &pxor           ($inout2,$rndkey0);
-       &$movekey       ($rndkey0,&QWP(0,$key));
+       &$movekey       ($rndkey0,&QWP(32,$key));
+       &lea            ($key,&DWP(32,$key,$rounds));
+       &neg            ($rounds);
+       &add            ($rounds,16);
 
     &set_label("${p}3_loop");
        eval"&aes${p}   ($inout0,$rndkey1)";
        eval"&aes${p}   ($inout1,$rndkey1)";
-       &dec            ($rounds);
        eval"&aes${p}   ($inout2,$rndkey1)";
-       &$movekey       ($rndkey1,&QWP(16,$key));
+       &$movekey       ($rndkey1,&QWP(0,$key,$rounds));
+       &add            ($rounds,32);
        eval"&aes${p}   ($inout0,$rndkey0)";
        eval"&aes${p}   ($inout1,$rndkey0)";
-       &lea            ($key,&DWP(32,$key));
        eval"&aes${p}   ($inout2,$rndkey0)";
-       &$movekey       ($rndkey0,&QWP(0,$key));
+       &$movekey       ($rndkey0,&QWP(-16,$key,$rounds));
        &jnz            (&label("${p}3_loop"));
     eval"&aes${p}      ($inout0,$rndkey1)";
     eval"&aes${p}      ($inout1,$rndkey1)";
@@ -248,27 +302,29 @@ sub aesni_generate4
     &function_begin_B("_aesni_${p}rypt4");
        &$movekey       ($rndkey0,&QWP(0,$key));
        &$movekey       ($rndkey1,&QWP(16,$key));
-       &shr            ($rounds,1);
-       &lea            ($key,&DWP(32,$key));
+       &shl            ($rounds,4);
        &xorps          ($inout0,$rndkey0);
        &pxor           ($inout1,$rndkey0);
        &pxor           ($inout2,$rndkey0);
        &pxor           ($inout3,$rndkey0);
-       &$movekey       ($rndkey0,&QWP(0,$key));
+       &$movekey       ($rndkey0,&QWP(32,$key));
+       &lea            ($key,&DWP(32,$key,$rounds));
+       &neg            ($rounds);
+       &data_byte      (0x0f,0x1f,0x40,0x00);
+       &add            ($rounds,16);
 
     &set_label("${p}4_loop");
        eval"&aes${p}   ($inout0,$rndkey1)";
        eval"&aes${p}   ($inout1,$rndkey1)";
-       &dec            ($rounds);
        eval"&aes${p}   ($inout2,$rndkey1)";
        eval"&aes${p}   ($inout3,$rndkey1)";
-       &$movekey       ($rndkey1,&QWP(16,$key));
+       &$movekey       ($rndkey1,&QWP(0,$key,$rounds));
+       &add            ($rounds,32);
        eval"&aes${p}   ($inout0,$rndkey0)";
        eval"&aes${p}   ($inout1,$rndkey0)";
-       &lea            ($key,&DWP(32,$key));
        eval"&aes${p}   ($inout2,$rndkey0)";
        eval"&aes${p}   ($inout3,$rndkey0)";
-       &$movekey       ($rndkey0,&QWP(0,$key));
+       &$movekey       ($rndkey0,&QWP(-16,$key,$rounds));
     &jnz               (&label("${p}4_loop"));
 
     eval"&aes${p}      ($inout0,$rndkey1)";
@@ -289,43 +345,41 @@ sub aesni_generate6
     &function_begin_B("_aesni_${p}rypt6");
     &static_label("_aesni_${p}rypt6_enter");
        &$movekey       ($rndkey0,&QWP(0,$key));
-       &shr            ($rounds,1);
+       &shl            ($rounds,4);
        &$movekey       ($rndkey1,&QWP(16,$key));
-       &lea            ($key,&DWP(32,$key));
        &xorps          ($inout0,$rndkey0);
        &pxor           ($inout1,$rndkey0);     # pxor does better here
-       eval"&aes${p}   ($inout0,$rndkey1)";
        &pxor           ($inout2,$rndkey0);
-       eval"&aes${p}   ($inout1,$rndkey1)";
+       eval"&aes${p}   ($inout0,$rndkey1)";
        &pxor           ($inout3,$rndkey0);
-       &dec            ($rounds);
-       eval"&aes${p}   ($inout2,$rndkey1)";
        &pxor           ($inout4,$rndkey0);
-       eval"&aes${p}   ($inout3,$rndkey1)";
+       eval"&aes${p}   ($inout1,$rndkey1)";
+       &lea            ($key,&DWP(32,$key,$rounds));
+       &neg            ($rounds);
+       eval"&aes${p}   ($inout2,$rndkey1)";
        &pxor           ($inout5,$rndkey0);
-       eval"&aes${p}   ($inout4,$rndkey1)";
-       &$movekey       ($rndkey0,&QWP(0,$key));
-       eval"&aes${p}   ($inout5,$rndkey1)";
-       &jmp            (&label("_aesni_${p}rypt6_enter"));
+       &$movekey       ($rndkey0,&QWP(0,$key,$rounds));
+       &add            ($rounds,16);
+       &jmp            (&label("_aesni_${p}rypt6_inner"));
 
     &set_label("${p}6_loop",16);
        eval"&aes${p}   ($inout0,$rndkey1)";
        eval"&aes${p}   ($inout1,$rndkey1)";
-       &dec            ($rounds);
        eval"&aes${p}   ($inout2,$rndkey1)";
+    &set_label("_aesni_${p}rypt6_inner");
        eval"&aes${p}   ($inout3,$rndkey1)";
        eval"&aes${p}   ($inout4,$rndkey1)";
        eval"&aes${p}   ($inout5,$rndkey1)";
-    &set_label("_aesni_${p}rypt6_enter",16);
-       &$movekey       ($rndkey1,&QWP(16,$key));
+    &set_label("_aesni_${p}rypt6_enter");
+       &$movekey       ($rndkey1,&QWP(0,$key,$rounds));
+       &add            ($rounds,32);
        eval"&aes${p}   ($inout0,$rndkey0)";
        eval"&aes${p}   ($inout1,$rndkey0)";
-       &lea            ($key,&DWP(32,$key));
        eval"&aes${p}   ($inout2,$rndkey0)";
        eval"&aes${p}   ($inout3,$rndkey0)";
        eval"&aes${p}   ($inout4,$rndkey0)";
        eval"&aes${p}   ($inout5,$rndkey0)";
-       &$movekey       ($rndkey0,&QWP(0,$key));
+       &$movekey       ($rndkey0,&QWP(-16,$key,$rounds));
     &jnz               (&label("${p}6_loop"));
 
     eval"&aes${p}      ($inout0,$rndkey1)";
@@ -343,6 +397,8 @@ sub aesni_generate6
     &ret();
     &function_end_B("_aesni_${p}rypt6");
 }
+&aesni_generate2("enc") if ($PREFIX eq "aesni");
+&aesni_generate2("dec");
 &aesni_generate3("enc") if ($PREFIX eq "aesni");
 &aesni_generate3("dec");
 &aesni_generate4("enc") if ($PREFIX eq "aesni");
@@ -446,8 +502,7 @@ if ($PREFIX eq "aesni") {
        &jmp    (&label("ecb_ret"));
 
 &set_label("ecb_enc_two",16);
-       &xorps  ($inout2,$inout2);
-       &call   ("_aesni_encrypt3");
+       &call   ("_aesni_encrypt2");
        &movups (&QWP(0,$out),$inout0);
        &movups (&QWP(0x10,$out),$inout1);
        &jmp    (&label("ecb_ret"));
@@ -547,8 +602,7 @@ if ($PREFIX eq "aesni") {
        &jmp    (&label("ecb_ret"));
 
 &set_label("ecb_dec_two",16);
-       &xorps  ($inout2,$inout2);
-       &call   ("_aesni_decrypt3");
+       &call   ("_aesni_decrypt2");
        &movups (&QWP(0,$out),$inout0);
        &movups (&QWP(0x10,$out),$inout1);
        &jmp    (&label("ecb_ret"));
@@ -568,6 +622,14 @@ if ($PREFIX eq "aesni") {
        &movups (&QWP(0x30,$out),$inout3);
 
 &set_label("ecb_ret");
+       &pxor   ("xmm0","xmm0");                # clear register bank
+       &pxor   ("xmm1","xmm1");
+       &pxor   ("xmm2","xmm2");
+       &pxor   ("xmm3","xmm3");
+       &pxor   ("xmm4","xmm4");
+       &pxor   ("xmm5","xmm5");
+       &pxor   ("xmm6","xmm6");
+       &pxor   ("xmm7","xmm7");
 &function_end("aesni_ecb_encrypt");
 \f
 ######################################################################
@@ -610,11 +672,13 @@ if ($PREFIX eq "aesni") {
        &mov    (&DWP(24,"esp"),$key_);
        &mov    (&DWP(28,"esp"),$key_);
 
-       &shr    ($rounds,1);
+       &shl    ($rounds,4);
+       &mov    ($rounds_,16);
        &lea    ($key_,&DWP(0,$key));
        &movdqa ($inout3,&QWP(0,"esp"));
        &movdqa ($inout0,$ivec);
-       &mov    ($rounds_,$rounds);
+       &lea    ($key,&DWP(32,$key,$rounds));
+       &sub    ($rounds_,$rounds);
        &pshufb ($ivec,$inout3);
 
 &set_label("ccm64_enc_outer");
@@ -625,38 +689,45 @@ if ($PREFIX eq "aesni") {
        &xorps          ($inout0,$rndkey0);
        &$movekey       ($rndkey1,&QWP(16,$key_));
        &xorps          ($rndkey0,$in0);
-       &lea            ($key,&DWP(32,$key_));
        &xorps          ($cmac,$rndkey0);               # cmac^=inp
-       &$movekey       ($rndkey0,&QWP(0,$key));
+       &$movekey       ($rndkey0,&QWP(32,$key_));
 
 &set_label("ccm64_enc2_loop");
        &aesenc         ($inout0,$rndkey1);
-       &dec            ($rounds);
        &aesenc         ($cmac,$rndkey1);
-       &$movekey       ($rndkey1,&QWP(16,$key));
+       &$movekey       ($rndkey1,&QWP(0,$key,$rounds));
+       &add            ($rounds,32);
        &aesenc         ($inout0,$rndkey0);
-       &lea            ($key,&DWP(32,$key));
        &aesenc         ($cmac,$rndkey0);
-       &$movekey       ($rndkey0,&QWP(0,$key));
+       &$movekey       ($rndkey0,&QWP(-16,$key,$rounds));
        &jnz            (&label("ccm64_enc2_loop"));
        &aesenc         ($inout0,$rndkey1);
        &aesenc         ($cmac,$rndkey1);
        &paddq          ($ivec,&QWP(16,"esp"));
+       &dec            ($len);
        &aesenclast     ($inout0,$rndkey0);
        &aesenclast     ($cmac,$rndkey0);
 
-       &dec    ($len);
        &lea    ($inp,&DWP(16,$inp));
        &xorps  ($in0,$inout0);                 # inp^=E(ivec)
        &movdqa ($inout0,$ivec);
        &movups (&QWP(0,$out),$in0);            # save output
-       &lea    ($out,&DWP(16,$out));
        &pshufb ($inout0,$inout3);
+       &lea    ($out,&DWP(16,$out));
        &jnz    (&label("ccm64_enc_outer"));
 
        &mov    ("esp",&DWP(48,"esp"));
        &mov    ($out,&wparam(5));
        &movups (&QWP(0,$out),$cmac);
+
+       &pxor   ("xmm0","xmm0");                # clear register bank
+       &pxor   ("xmm1","xmm1");
+       &pxor   ("xmm2","xmm2");
+       &pxor   ("xmm3","xmm3");
+       &pxor   ("xmm4","xmm4");
+       &pxor   ("xmm5","xmm5");
+       &pxor   ("xmm6","xmm6");
+       &pxor   ("xmm7","xmm7");
 &function_end("aesni_ccm64_encrypt_blocks");
 
 &function_begin("aesni_ccm64_decrypt_blocks");
@@ -700,15 +771,19 @@ if ($PREFIX eq "aesni") {
        {   &aesni_inline_generate1("enc");     }
        else
        {   &call       ("_aesni_encrypt1");    }
+       &shl    ($rounds_,4);
+       &mov    ($rounds,16);
        &movups ($in0,&QWP(0,$inp));            # load inp
        &paddq  ($ivec,&QWP(16,"esp"));
        &lea    ($inp,&QWP(16,$inp));
+       &sub    ($rounds,$rounds_);
+       &lea    ($key,&DWP(32,$key_,$rounds_));
+       &mov    ($rounds_,$rounds);
        &jmp    (&label("ccm64_dec_outer"));
 
 &set_label("ccm64_dec_outer",16);
        &xorps  ($in0,$inout0);                 # inp ^= E(ivec)
        &movdqa ($inout0,$ivec);
-       &mov    ($rounds,$rounds_);
        &movups (&QWP(0,$out),$in0);            # save output
        &lea    ($out,&DWP(16,$out));
        &pshufb ($inout0,$inout3);
@@ -717,34 +792,33 @@ if ($PREFIX eq "aesni") {
        &jz     (&label("ccm64_dec_break"));
 
        &$movekey       ($rndkey0,&QWP(0,$key_));
-       &shr            ($rounds,1);
+       &mov            ($rounds,$rounds_);
        &$movekey       ($rndkey1,&QWP(16,$key_));
        &xorps          ($in0,$rndkey0);
-       &lea            ($key,&DWP(32,$key_));
        &xorps          ($inout0,$rndkey0);
        &xorps          ($cmac,$in0);           # cmac^=out
-       &$movekey       ($rndkey0,&QWP(0,$key));
+       &$movekey       ($rndkey0,&QWP(32,$key_));
 
 &set_label("ccm64_dec2_loop");
        &aesenc         ($inout0,$rndkey1);
-       &dec            ($rounds);
        &aesenc         ($cmac,$rndkey1);
-       &$movekey       ($rndkey1,&QWP(16,$key));
+       &$movekey       ($rndkey1,&QWP(0,$key,$rounds));
+       &add            ($rounds,32);
        &aesenc         ($inout0,$rndkey0);
-       &lea            ($key,&DWP(32,$key));
        &aesenc         ($cmac,$rndkey0);
-       &$movekey       ($rndkey0,&QWP(0,$key));
+       &$movekey       ($rndkey0,&QWP(-16,$key,$rounds));
        &jnz            (&label("ccm64_dec2_loop"));
        &movups         ($in0,&QWP(0,$inp));    # load inp
        &paddq          ($ivec,&QWP(16,"esp"));
        &aesenc         ($inout0,$rndkey1);
        &aesenc         ($cmac,$rndkey1);
-       &lea            ($inp,&QWP(16,$inp));
        &aesenclast     ($inout0,$rndkey0);
        &aesenclast     ($cmac,$rndkey0);
+       &lea            ($inp,&QWP(16,$inp));
        &jmp    (&label("ccm64_dec_outer"));
 
 &set_label("ccm64_dec_break",16);
+       &mov    ($rounds,&DWP(240,$key_));
        &mov    ($key,$key_);
        if ($inline)
        {   &aesni_inline_generate1("enc",$cmac,$in0);  }
@@ -754,6 +828,15 @@ if ($PREFIX eq "aesni") {
        &mov    ("esp",&DWP(48,"esp"));
        &mov    ($out,&wparam(5));
        &movups (&QWP(0,$out),$cmac);
+
+       &pxor   ("xmm0","xmm0");                # clear register bank
+       &pxor   ("xmm1","xmm1");
+       &pxor   ("xmm2","xmm2");
+       &pxor   ("xmm3","xmm3");
+       &pxor   ("xmm4","xmm4");
+       &pxor   ("xmm5","xmm5");
+       &pxor   ("xmm6","xmm6");
+       &pxor   ("xmm7","xmm7");
 &function_end("aesni_ccm64_decrypt_blocks");
 }
 \f
@@ -763,7 +846,7 @@ if ($PREFIX eq "aesni") {
 #                         const char *ivec);
 #
 # Handles only complete blocks, operates on 32-bit counter and
-# does not update *ivec! (see engine/eng_aesni.c for details)
+# does not update *ivec! (see crypto/modes/ctr128.c for details)
 #
 # stack layout:
 #      0       pshufb mask
@@ -810,66 +893,61 @@ if ($PREFIX eq "aesni") {
 
        # compose 2 vectors of 3x32-bit counters
        &bswap  ($rounds_);
-       &pxor   ($rndkey1,$rndkey1);
        &pxor   ($rndkey0,$rndkey0);
+       &pxor   ($rndkey1,$rndkey1);
        &movdqa ($inout0,&QWP(0,"esp"));        # load byte-swap mask
-       &pinsrd ($rndkey1,$rounds_,0);
+       &pinsrd ($rndkey0,$rounds_,0);
        &lea    ($key_,&DWP(3,$rounds_));
-       &pinsrd ($rndkey0,$key_,0);
+       &pinsrd ($rndkey1,$key_,0);
        &inc    ($rounds_);
-       &pinsrd ($rndkey1,$rounds_,1);
+       &pinsrd ($rndkey0,$rounds_,1);
        &inc    ($key_);
-       &pinsrd ($rndkey0,$key_,1);
+       &pinsrd ($rndkey1,$key_,1);
        &inc    ($rounds_);
-       &pinsrd ($rndkey1,$rounds_,2);
+       &pinsrd ($rndkey0,$rounds_,2);
        &inc    ($key_);
-       &pinsrd ($rndkey0,$key_,2);
-       &movdqa (&QWP(48,"esp"),$rndkey1);      # save 1st triplet
-       &pshufb ($rndkey1,$inout0);             # byte swap
-       &movdqa (&QWP(64,"esp"),$rndkey0);      # save 2nd triplet
+       &pinsrd ($rndkey1,$key_,2);
+       &movdqa (&QWP(48,"esp"),$rndkey0);      # save 1st triplet
        &pshufb ($rndkey0,$inout0);             # byte swap
+       &movdqu ($inout4,&QWP(0,$key));         # key[0]
+       &movdqa (&QWP(64,"esp"),$rndkey1);      # save 2nd triplet
+       &pshufb ($rndkey1,$inout0);             # byte swap
 
-       &pshufd ($inout0,$rndkey1,3<<6);        # place counter to upper dword
-       &pshufd ($inout1,$rndkey1,2<<6);
+       &pshufd ($inout0,$rndkey0,3<<6);        # place counter to upper dword
+       &pshufd ($inout1,$rndkey0,2<<6);
        &cmp    ($len,6);
        &jb     (&label("ctr32_tail"));
-       &movdqa (&QWP(32,"esp"),$inout5);       # save counter-less ivec
-       &shr    ($rounds,1);
+       &pxor   ($inout5,$inout4);              # counter-less ivec^key[0]
+       &shl    ($rounds,4);
+       &mov    ($rounds_,16);
+       &movdqa (&QWP(32,"esp"),$inout5);       # save counter-less ivec^key[0]
        &mov    ($key_,$key);                   # backup $key
-       &mov    ($rounds_,$rounds);             # backup $rounds
+       &sub    ($rounds_,$rounds);             # backup twisted $rounds
+       &lea    ($key,&DWP(32,$key,$rounds));
        &sub    ($len,6);
        &jmp    (&label("ctr32_loop6"));
 
 &set_label("ctr32_loop6",16);
-       &pshufd ($inout2,$rndkey1,1<<6);
-       &movdqa ($rndkey1,&QWP(32,"esp"));      # pull counter-less ivec
-       &pshufd ($inout3,$rndkey0,3<<6);
-       &por    ($inout0,$rndkey1);             # merge counter-less ivec
-       &pshufd ($inout4,$rndkey0,2<<6);
-       &por    ($inout1,$rndkey1);
-       &pshufd ($inout5,$rndkey0,1<<6);
-       &por    ($inout2,$rndkey1);
-       &por    ($inout3,$rndkey1);
-       &por    ($inout4,$rndkey1);
-       &por    ($inout5,$rndkey1);
-
-       # inlining _aesni_encrypt6's prologue gives ~4% improvement...
-       &$movekey       ($rndkey0,&QWP(0,$key_));
-       &$movekey       ($rndkey1,&QWP(16,$key_));
-       &lea            ($key,&DWP(32,$key_));
-       &dec            ($rounds);
-       &pxor           ($inout0,$rndkey0);
+       # inlining _aesni_encrypt6's prologue gives ~6% improvement...
+       &pshufd ($inout2,$rndkey0,1<<6);
+       &movdqa ($rndkey0,&QWP(32,"esp"));      # pull counter-less ivec
+       &pshufd ($inout3,$rndkey1,3<<6);
+       &pxor           ($inout0,$rndkey0);     # merge counter-less ivec
+       &pshufd ($inout4,$rndkey1,2<<6);
        &pxor           ($inout1,$rndkey0);
-       &aesenc         ($inout0,$rndkey1);
+       &pshufd ($inout5,$rndkey1,1<<6);
+       &$movekey       ($rndkey1,&QWP(16,$key_));
        &pxor           ($inout2,$rndkey0);
-       &aesenc         ($inout1,$rndkey1);
        &pxor           ($inout3,$rndkey0);
-       &aesenc         ($inout2,$rndkey1);
+       &aesenc         ($inout0,$rndkey1);
        &pxor           ($inout4,$rndkey0);
-       &aesenc         ($inout3,$rndkey1);
        &pxor           ($inout5,$rndkey0);
+       &aesenc         ($inout1,$rndkey1);
+       &$movekey       ($rndkey0,&QWP(32,$key_));
+       &mov            ($rounds,$rounds_);
+       &aesenc         ($inout2,$rndkey1);
+       &aesenc         ($inout3,$rndkey1);
        &aesenc         ($inout4,$rndkey1);
-       &$movekey       ($rndkey0,&QWP(0,$key));
        &aesenc         ($inout5,$rndkey1);
 
        &call           (&label("_aesni_encrypt6_enter"));
@@ -882,12 +960,12 @@ if ($PREFIX eq "aesni") {
        &movups (&QWP(0,$out),$inout0);
        &movdqa ($rndkey0,&QWP(16,"esp"));      # load increment
        &xorps  ($inout2,$rndkey1);
-       &movdqa ($rndkey1,&QWP(48,"esp"));      # load 1st triplet
+       &movdqa ($rndkey1,&QWP(64,"esp"));      # load 2nd triplet
        &movups (&QWP(0x10,$out),$inout1);
        &movups (&QWP(0x20,$out),$inout2);
 
-       &paddd  ($rndkey1,$rndkey0);            # 1st triplet increment
-       &paddd  ($rndkey0,&QWP(64,"esp"));      # 2nd triplet increment
+       &paddd  ($rndkey1,$rndkey0);            # 2nd triplet increment
+       &paddd  ($rndkey0,&QWP(48,"esp"));      # 1st triplet increment
        &movdqa ($inout0,&QWP(0,"esp"));        # load byte swap mask
 
        &movups ($inout1,&QWP(0x30,$inp));
@@ -895,44 +973,44 @@ if ($PREFIX eq "aesni") {
        &xorps  ($inout3,$inout1);
        &movups ($inout1,&QWP(0x50,$inp));
        &lea    ($inp,&DWP(0x60,$inp));
-       &movdqa (&QWP(48,"esp"),$rndkey1);      # save 1st triplet
-       &pshufb ($rndkey1,$inout0);             # byte swap
+       &movdqa (&QWP(48,"esp"),$rndkey0);      # save 1st triplet
+       &pshufb ($rndkey0,$inout0);             # byte swap
        &xorps  ($inout4,$inout2);
        &movups (&QWP(0x30,$out),$inout3);
        &xorps  ($inout5,$inout1);
-       &movdqa (&QWP(64,"esp"),$rndkey0);      # save 2nd triplet
-       &pshufb ($rndkey0,$inout0);             # byte swap
+       &movdqa (&QWP(64,"esp"),$rndkey1);      # save 2nd triplet
+       &pshufb ($rndkey1,$inout0);             # byte swap
        &movups (&QWP(0x40,$out),$inout4);
-       &pshufd ($inout0,$rndkey1,3<<6);
+       &pshufd ($inout0,$rndkey0,3<<6);
        &movups (&QWP(0x50,$out),$inout5);
        &lea    ($out,&DWP(0x60,$out));
 
-       &mov    ($rounds,$rounds_);
-       &pshufd ($inout1,$rndkey1,2<<6);
+       &pshufd ($inout1,$rndkey0,2<<6);
        &sub    ($len,6);
        &jnc    (&label("ctr32_loop6"));
 
        &add    ($len,6);
        &jz     (&label("ctr32_ret"));
+       &movdqu ($inout5,&QWP(0,$key_));
        &mov    ($key,$key_);
-       &lea    ($rounds,&DWP(1,"",$rounds,2)); # restore $rounds
-       &movdqa ($inout5,&QWP(32,"esp"));       # pull count-less ivec
+       &pxor   ($inout5,&QWP(32,"esp"));       # restore count-less ivec
+       &mov    ($rounds,&DWP(240,$key_));      # restore $rounds
 
 &set_label("ctr32_tail");
        &por    ($inout0,$inout5);
        &cmp    ($len,2);
        &jb     (&label("ctr32_one"));
 
-       &pshufd ($inout2,$rndkey1,1<<6);
+       &pshufd ($inout2,$rndkey0,1<<6);
        &por    ($inout1,$inout5);
        &je     (&label("ctr32_two"));
 
-       &pshufd ($inout3,$rndkey0,3<<6);
+       &pshufd ($inout3,$rndkey1,3<<6);
        &por    ($inout2,$inout5);
        &cmp    ($len,4);
        &jb     (&label("ctr32_three"));
 
-       &pshufd ($inout4,$rndkey0,2<<6);
+       &pshufd ($inout4,$rndkey1,2<<6);
        &por    ($inout3,$inout5);
        &je     (&label("ctr32_four"));
 
@@ -970,7 +1048,7 @@ if ($PREFIX eq "aesni") {
        &jmp    (&label("ctr32_ret"));
 
 &set_label("ctr32_two",16);
-       &call   ("_aesni_encrypt3");
+       &call   ("_aesni_encrypt2");
        &movups ($inout3,&QWP(0,$inp));
        &movups ($inout4,&QWP(0x10,$inp));
        &xorps  ($inout0,$inout3);
@@ -1008,6 +1086,17 @@ if ($PREFIX eq "aesni") {
        &movups (&QWP(0x30,$out),$inout3);
 
 &set_label("ctr32_ret");
+       &pxor   ("xmm0","xmm0");                # clear register bank
+       &pxor   ("xmm1","xmm1");
+       &pxor   ("xmm2","xmm2");
+       &pxor   ("xmm3","xmm3");
+       &pxor   ("xmm4","xmm4");
+       &movdqa (&QWP(32,"esp"),"xmm0");        # clear stack
+       &pxor   ("xmm5","xmm5");
+       &movdqa (&QWP(48,"esp"),"xmm0");
+       &pxor   ("xmm6","xmm6");
+       &movdqa (&QWP(64,"esp"),"xmm0");
+       &pxor   ("xmm7","xmm7");
        &mov    ("esp",&DWP(80,"esp"));
 &function_end("aesni_ctr32_encrypt_blocks");
 \f
@@ -1057,8 +1146,10 @@ if ($PREFIX eq "aesni") {
        &sub    ($len,16*6);
        &jc     (&label("xts_enc_short"));
 
-       &shr    ($rounds,1);
-       &mov    ($rounds_,$rounds);
+       &shl    ($rounds,4);
+       &mov    ($rounds_,16);
+       &sub    ($rounds_,$rounds);
+       &lea    ($key,&DWP(32,$key,$rounds));
        &jmp    (&label("xts_enc_loop6"));
 
 &set_label("xts_enc_loop6",16);
@@ -1080,6 +1171,7 @@ if ($PREFIX eq "aesni") {
        &pxor   ($inout5,$tweak);
 
        # inline _aesni_encrypt6 prologue and flip xor with tweak and key[0]
+       &mov    ($rounds,$rounds_);             # restore $rounds
        &movdqu ($inout1,&QWP(16*1,$inp));
         &xorps         ($inout0,$rndkey0);     # input^=rndkey[0]
        &movdqu ($inout2,&QWP(16*2,$inp));
@@ -1096,19 +1188,17 @@ if ($PREFIX eq "aesni") {
        &pxor   ($inout5,$rndkey1);
 
         &$movekey      ($rndkey1,&QWP(16,$key_));
-        &lea           ($key,&DWP(32,$key_));
        &pxor   ($inout1,&QWP(16*1,"esp"));
-        &aesenc        ($inout0,$rndkey1);
        &pxor   ($inout2,&QWP(16*2,"esp"));
-        &aesenc        ($inout1,$rndkey1);
+        &aesenc        ($inout0,$rndkey1);
        &pxor   ($inout3,&QWP(16*3,"esp"));
-        &dec           ($rounds);
-        &aesenc        ($inout2,$rndkey1);
        &pxor   ($inout4,&QWP(16*4,"esp"));
-        &aesenc        ($inout3,$rndkey1);
+        &aesenc        ($inout1,$rndkey1);
        &pxor           ($inout5,$rndkey0);
+        &$movekey      ($rndkey0,&QWP(32,$key_));
+        &aesenc        ($inout2,$rndkey1);
+        &aesenc        ($inout3,$rndkey1);
         &aesenc        ($inout4,$rndkey1);
-        &$movekey      ($rndkey0,&QWP(0,$key));
         &aesenc        ($inout5,$rndkey1);
        &call           (&label("_aesni_encrypt6_enter"));
 
@@ -1135,13 +1225,12 @@ if ($PREFIX eq "aesni") {
        &paddq  ($tweak,$tweak);                # &psllq($tweak,1);
        &pand   ($twres,$twmask);               # isolate carry and residue
        &pcmpgtd($twtmp,$tweak);                # broadcast upper bits
-       &mov    ($rounds,$rounds_);             # restore $rounds
        &pxor   ($tweak,$twres);
 
        &sub    ($len,16*6);
        &jnc    (&label("xts_enc_loop6"));
 
-       &lea    ($rounds,&DWP(1,"",$rounds,2)); # restore $rounds
+       &mov    ($rounds,&DWP(240,$key_));      # restore $rounds
        &mov    ($key,$key_);                   # restore $key
        &mov    ($rounds_,$rounds);
 
@@ -1241,9 +1330,8 @@ if ($PREFIX eq "aesni") {
        &lea    ($inp,&DWP(16*2,$inp));
        &xorps  ($inout0,$inout3);              # input^=tweak
        &xorps  ($inout1,$inout4);
-       &xorps  ($inout2,$inout2);
 
-       &call   ("_aesni_encrypt3");
+       &call   ("_aesni_encrypt2");
 
        &xorps  ($inout0,$inout3);              # output^=tweak
        &xorps  ($inout1,$inout4);
@@ -1350,6 +1438,20 @@ if ($PREFIX eq "aesni") {
        &movups (&QWP(-16,$out),$inout0);       # write output
 
 &set_label("xts_enc_ret");
+       &pxor   ("xmm0","xmm0");                # clear register bank
+       &pxor   ("xmm1","xmm1");
+       &pxor   ("xmm2","xmm2");
+       &movdqa (&QWP(16*0,"esp"),"xmm0");      # clear stack
+       &pxor   ("xmm3","xmm3");
+       &movdqa (&QWP(16*1,"esp"),"xmm0");
+       &pxor   ("xmm4","xmm4");
+       &movdqa (&QWP(16*2,"esp"),"xmm0");
+       &pxor   ("xmm5","xmm5");
+       &movdqa (&QWP(16*3,"esp"),"xmm0");
+       &pxor   ("xmm6","xmm6");
+       &movdqa (&QWP(16*4,"esp"),"xmm0");
+       &pxor   ("xmm7","xmm7");
+       &movdqa (&QWP(16*5,"esp"),"xmm0");
        &mov    ("esp",&DWP(16*7+4,"esp"));     # restore %esp
 &function_end("aesni_xts_encrypt");
 
@@ -1399,8 +1501,10 @@ if ($PREFIX eq "aesni") {
        &sub    ($len,16*6);
        &jc     (&label("xts_dec_short"));
 
-       &shr    ($rounds,1);
-       &mov    ($rounds_,$rounds);
+       &shl    ($rounds,4);
+       &mov    ($rounds_,16);
+       &sub    ($rounds_,$rounds);
+       &lea    ($key,&DWP(32,$key,$rounds));
        &jmp    (&label("xts_dec_loop6"));
 
 &set_label("xts_dec_loop6",16);
@@ -1422,6 +1526,7 @@ if ($PREFIX eq "aesni") {
        &pxor   ($inout5,$tweak);
 
        # inline _aesni_encrypt6 prologue and flip xor with tweak and key[0]
+       &mov    ($rounds,$rounds_);
        &movdqu ($inout1,&QWP(16*1,$inp));
         &xorps         ($inout0,$rndkey0);     # input^=rndkey[0]
        &movdqu ($inout2,&QWP(16*2,$inp));
@@ -1438,19 +1543,17 @@ if ($PREFIX eq "aesni") {
        &pxor   ($inout5,$rndkey1);
 
         &$movekey      ($rndkey1,&QWP(16,$key_));
-        &lea           ($key,&DWP(32,$key_));
        &pxor   ($inout1,&QWP(16*1,"esp"));
-        &aesdec        ($inout0,$rndkey1);
        &pxor   ($inout2,&QWP(16*2,"esp"));
-        &aesdec        ($inout1,$rndkey1);
+        &aesdec        ($inout0,$rndkey1);
        &pxor   ($inout3,&QWP(16*3,"esp"));
-        &dec           ($rounds);
-        &aesdec        ($inout2,$rndkey1);
        &pxor   ($inout4,&QWP(16*4,"esp"));
-        &aesdec        ($inout3,$rndkey1);
+        &aesdec        ($inout1,$rndkey1);
        &pxor           ($inout5,$rndkey0);
+        &$movekey      ($rndkey0,&QWP(32,$key_));
+        &aesdec        ($inout2,$rndkey1);
+        &aesdec        ($inout3,$rndkey1);
         &aesdec        ($inout4,$rndkey1);
-        &$movekey      ($rndkey0,&QWP(0,$key));
         &aesdec        ($inout5,$rndkey1);
        &call           (&label("_aesni_decrypt6_enter"));
 
@@ -1477,13 +1580,12 @@ if ($PREFIX eq "aesni") {
        &paddq  ($tweak,$tweak);                # &psllq($tweak,1);
        &pand   ($twres,$twmask);               # isolate carry and residue
        &pcmpgtd($twtmp,$tweak);                # broadcast upper bits
-       &mov    ($rounds,$rounds_);             # restore $rounds
        &pxor   ($tweak,$twres);
 
        &sub    ($len,16*6);
        &jnc    (&label("xts_dec_loop6"));
 
-       &lea    ($rounds,&DWP(1,"",$rounds,2)); # restore $rounds
+       &mov    ($rounds,&DWP(240,$key_));      # restore $rounds
        &mov    ($key,$key_);                   # restore $key
        &mov    ($rounds_,$rounds);
 
@@ -1584,7 +1686,7 @@ if ($PREFIX eq "aesni") {
        &xorps  ($inout0,$inout3);              # input^=tweak
        &xorps  ($inout1,$inout4);
 
-       &call   ("_aesni_decrypt3");
+       &call   ("_aesni_decrypt2");
 
        &xorps  ($inout0,$inout3);              # output^=tweak
        &xorps  ($inout1,$inout4);
@@ -1712,6 +1814,20 @@ if ($PREFIX eq "aesni") {
        &movups (&QWP(0,$out),$inout0);         # write output
 
 &set_label("xts_dec_ret");
+       &pxor   ("xmm0","xmm0");                # clear register bank
+       &pxor   ("xmm1","xmm1");
+       &pxor   ("xmm2","xmm2");
+       &movdqa (&QWP(16*0,"esp"),"xmm0");      # clear stack
+       &pxor   ("xmm3","xmm3");
+       &movdqa (&QWP(16*1,"esp"),"xmm0");
+       &pxor   ("xmm4","xmm4");
+       &movdqa (&QWP(16*2,"esp"),"xmm0");
+       &pxor   ("xmm5","xmm5");
+       &movdqa (&QWP(16*3,"esp"),"xmm0");
+       &pxor   ("xmm6","xmm6");
+       &movdqa (&QWP(16*4,"esp"),"xmm0");
+       &pxor   ("xmm7","xmm7");
+       &movdqa (&QWP(16*5,"esp"),"xmm0");
        &mov    ("esp",&DWP(16*7+4,"esp"));     # restore %esp
 &function_end("aesni_xts_decrypt");
 }
@@ -1764,6 +1880,7 @@ if ($PREFIX eq "aesni") {
        &add    ($len,16);
        &jnz    (&label("cbc_enc_tail"));
        &movaps ($ivec,$inout0);
+       &pxor   ($inout0,$inout0);
        &jmp    (&label("cbc_ret"));
 
 &set_label("cbc_enc_tail");
@@ -1816,7 +1933,7 @@ if ($PREFIX eq "aesni") {
        &movups (&QWP(0x10,$out),$inout1);
        &lea    ($inp,&DWP(0x60,$inp));
        &movups (&QWP(0x20,$out),$inout2);
-       &mov    ($rounds,$rounds_)              # restore $rounds
+       &mov    ($rounds,$rounds_);             # restore $rounds
        &movups (&QWP(0x30,$out),$inout3);
        &mov    ($key,$key_);                   # restore $key
        &movups (&QWP(0x40,$out),$inout4);
@@ -1827,7 +1944,7 @@ if ($PREFIX eq "aesni") {
        &movaps ($inout0,$inout5);
        &movaps ($ivec,$rndkey0);
        &add    ($len,0x50);
-       &jle    (&label("cbc_dec_tail_collected"));
+       &jle    (&label("cbc_dec_clear_tail_collected"));
        &movups (&QWP(0,$out),$inout0);
        &lea    ($out,&DWP(0x10,$out));
 &set_label("cbc_dec_tail");
@@ -1866,10 +1983,14 @@ if ($PREFIX eq "aesni") {
        &xorps  ($inout4,$rndkey0);
        &movups (&QWP(0,$out),$inout0);
        &movups (&QWP(0x10,$out),$inout1);
+       &pxor   ($inout1,$inout1);
        &movups (&QWP(0x20,$out),$inout2);
+       &pxor   ($inout2,$inout2);
        &movups (&QWP(0x30,$out),$inout3);
+       &pxor   ($inout3,$inout3);
        &lea    ($out,&DWP(0x40,$out));
        &movaps ($inout0,$inout4);
+       &pxor   ($inout4,$inout4);
        &sub    ($len,0x50);
        &jmp    (&label("cbc_dec_tail_collected"));
 
@@ -1884,12 +2005,12 @@ if ($PREFIX eq "aesni") {
        &jmp    (&label("cbc_dec_tail_collected"));
 
 &set_label("cbc_dec_two",16);
-       &xorps  ($inout2,$inout2);
-       &call   ("_aesni_decrypt3");
+       &call   ("_aesni_decrypt2");
        &xorps  ($inout0,$ivec);
        &xorps  ($inout1,$in0);
        &movups (&QWP(0,$out),$inout0);
        &movaps ($inout0,$inout1);
+       &pxor   ($inout1,$inout1);
        &lea    ($out,&DWP(0x10,$out));
        &movaps ($ivec,$in1);
        &sub    ($len,0x20);
@@ -1902,7 +2023,9 @@ if ($PREFIX eq "aesni") {
        &xorps  ($inout2,$in1);
        &movups (&QWP(0,$out),$inout0);
        &movaps ($inout0,$inout2);
+       &pxor   ($inout2,$inout2);
        &movups (&QWP(0x10,$out),$inout1);
+       &pxor   ($inout1,$inout1);
        &lea    ($out,&DWP(0x20,$out));
        &movups ($ivec,&QWP(0x20,$inp));
        &sub    ($len,0x30);
@@ -1918,29 +2041,44 @@ if ($PREFIX eq "aesni") {
        &movups (&QWP(0,$out),$inout0);
        &xorps  ($inout2,$rndkey1);
        &movups (&QWP(0x10,$out),$inout1);
+       &pxor   ($inout1,$inout1);
        &xorps  ($inout3,$rndkey0);
        &movups (&QWP(0x20,$out),$inout2);
+       &pxor   ($inout2,$inout2);
        &lea    ($out,&DWP(0x30,$out));
        &movaps ($inout0,$inout3);
+       &pxor   ($inout3,$inout3);
        &sub    ($len,0x40);
+       &jmp    (&label("cbc_dec_tail_collected"));
 
+&set_label("cbc_dec_clear_tail_collected",16);
+       &pxor   ($inout1,$inout1);
+       &pxor   ($inout2,$inout2);
+       &pxor   ($inout3,$inout3);
+       &pxor   ($inout4,$inout4);
 &set_label("cbc_dec_tail_collected");
        &and    ($len,15);
        &jnz    (&label("cbc_dec_tail_partial"));
        &movups (&QWP(0,$out),$inout0);
+       &pxor   ($rndkey0,$rndkey0);
        &jmp    (&label("cbc_ret"));
 
 &set_label("cbc_dec_tail_partial",16);
        &movaps (&QWP(0,"esp"),$inout0);
+       &pxor   ($rndkey0,$rndkey0);
        &mov    ("ecx",16);
        &mov    ($inp,"esp");
        &sub    ("ecx",$len);
        &data_word(0xA4F3F689);         # rep movsb
+       &movdqa (&QWP(0,"esp"),$inout0);
 
 &set_label("cbc_ret");
        &mov    ("esp",&DWP(16,"esp")); # pull original %esp
        &mov    ($key_,&wparam(4));
+       &pxor   ($inout0,$inout0);
+       &pxor   ($rndkey1,$rndkey1);
        &movups (&QWP(0,$key_),$ivec);  # output IV
+       &pxor   ($ivec,$ivec);
 &set_label("cbc_abort");
 &function_end("${PREFIX}_cbc_encrypt");
 \f
@@ -1957,14 +2095,24 @@ if ($PREFIX eq "aesni") {
 #      $round  rounds
 
 &function_begin_B("_aesni_set_encrypt_key");
+       &push   ("ebp");
+       &push   ("ebx");
        &test   ("eax","eax");
        &jz     (&label("bad_pointer"));
        &test   ($key,$key);
        &jz     (&label("bad_pointer"));
 
+       &call   (&label("pic"));
+&set_label("pic");
+       &blindpop("ebx");
+       &lea    ("ebx",&DWP(&label("key_const")."-".&label("pic"),"ebx"));
+
+       &picmeup("ebp","OPENSSL_ia32cap_P","ebx",&label("key_const"));
        &movups ("xmm0",&QWP(0,"eax")); # pull first 128 bits of *userKey
        &xorps  ("xmm4","xmm4");        # low dword of xmm4 is assumed 0
+       &mov    ("ebp",&DWP(4,"ebp"));
        &lea    ($key,&DWP(16,$key));
+       &and    ("ebp",1<<28|1<<11);    # AVX and XOP bits
        &cmp    ($rounds,256);
        &je     (&label("14rounds"));
        &cmp    ($rounds,192);
@@ -1973,6 +2121,9 @@ if ($PREFIX eq "aesni") {
        &jne    (&label("bad_keybits"));
 
 &set_label("10rounds",16);
+       &cmp            ("ebp",1<<28);
+       &je             (&label("10rounds_alt"));
+
        &mov            ($rounds,9);
        &$movekey       (&QWP(-16,$key),"xmm0");        # round 0
        &aeskeygenassist("xmm1","xmm0",0x01);           # round 1
@@ -1997,8 +2148,8 @@ if ($PREFIX eq "aesni") {
        &call           (&label("key_128"));
        &$movekey       (&QWP(0,$key),"xmm0");
        &mov            (&DWP(80,$key),$rounds);
-       &xor            ("eax","eax");
-       &ret();
+
+       &jmp    (&label("good_key"));
 
 &set_label("key_128",16);
        &$movekey       (&QWP(0,$key),"xmm0");
@@ -2012,10 +2163,78 @@ if ($PREFIX eq "aesni") {
        &xorps          ("xmm0","xmm1");
        &ret();
 
+&set_label("10rounds_alt",16);
+       &movdqa         ("xmm5",&QWP(0x00,"ebx"));
+       &mov            ($rounds,8);
+       &movdqa         ("xmm4",&QWP(0x20,"ebx"));
+       &movdqa         ("xmm2","xmm0");
+       &movdqu         (&QWP(-16,$key),"xmm0");
+
+&set_label("loop_key128");
+       &pshufb         ("xmm0","xmm5");
+       &aesenclast     ("xmm0","xmm4");
+       &pslld          ("xmm4",1);
+       &lea            ($key,&DWP(16,$key));
+
+       &movdqa         ("xmm3","xmm2");
+       &pslldq         ("xmm2",4);
+       &pxor           ("xmm3","xmm2");
+       &pslldq         ("xmm2",4);
+       &pxor           ("xmm3","xmm2");
+       &pslldq         ("xmm2",4);
+       &pxor           ("xmm2","xmm3");
+
+       &pxor           ("xmm0","xmm2");
+       &movdqu         (&QWP(-16,$key),"xmm0");
+       &movdqa         ("xmm2","xmm0");
+
+       &dec            ($rounds);
+       &jnz            (&label("loop_key128"));
+
+       &movdqa         ("xmm4",&QWP(0x30,"ebx"));
+
+       &pshufb         ("xmm0","xmm5");
+       &aesenclast     ("xmm0","xmm4");
+       &pslld          ("xmm4",1);
+
+       &movdqa         ("xmm3","xmm2");
+       &pslldq         ("xmm2",4);
+       &pxor           ("xmm3","xmm2");
+       &pslldq         ("xmm2",4);
+       &pxor           ("xmm3","xmm2");
+       &pslldq         ("xmm2",4);
+       &pxor           ("xmm2","xmm3");
+
+       &pxor           ("xmm0","xmm2");
+       &movdqu         (&QWP(0,$key),"xmm0");
+
+       &movdqa         ("xmm2","xmm0");
+       &pshufb         ("xmm0","xmm5");
+       &aesenclast     ("xmm0","xmm4");
+
+       &movdqa         ("xmm3","xmm2");
+       &pslldq         ("xmm2",4);
+       &pxor           ("xmm3","xmm2");
+       &pslldq         ("xmm2",4);
+       &pxor           ("xmm3","xmm2");
+       &pslldq         ("xmm2",4);
+       &pxor           ("xmm2","xmm3");
+
+       &pxor           ("xmm0","xmm2");
+       &movdqu         (&QWP(16,$key),"xmm0");
+
+       &mov            ($rounds,9);
+       &mov            (&DWP(96,$key),$rounds);
+
+       &jmp    (&label("good_key"));
+
 &set_label("12rounds",16);
        &movq           ("xmm2",&QWP(16,"eax"));        # remaining 1/3 of *userKey
+       &cmp            ("ebp",1<<28);
+       &je             (&label("12rounds_alt"));
+
        &mov            ($rounds,11);
-       &$movekey       (&QWP(-16,$key),"xmm0")         # round 0
+       &$movekey       (&QWP(-16,$key),"xmm0");        # round 0
        &aeskeygenassist("xmm1","xmm2",0x01);           # round 1,2
        &call           (&label("key_192a_cold"));
        &aeskeygenassist("xmm1","xmm2",0x02);           # round 2,3
@@ -2034,8 +2253,8 @@ if ($PREFIX eq "aesni") {
        &call           (&label("key_192b"));
        &$movekey       (&QWP(0,$key),"xmm0");
        &mov            (&DWP(48,$key),$rounds);
-       &xor            ("eax","eax");
-       &ret();
+
+       &jmp    (&label("good_key"));
 
 &set_label("key_192a",16);
        &$movekey       (&QWP(0,$key),"xmm0");
@@ -2065,10 +2284,52 @@ if ($PREFIX eq "aesni") {
        &lea            ($key,&DWP(32,$key));
        &jmp            (&label("key_192b_warm"));
 
+&set_label("12rounds_alt",16);
+       &movdqa         ("xmm5",&QWP(0x10,"ebx"));
+       &movdqa         ("xmm4",&QWP(0x20,"ebx"));
+       &mov            ($rounds,8);
+       &movdqu         (&QWP(-16,$key),"xmm0");
+
+&set_label("loop_key192");
+       &movq           (&QWP(0,$key),"xmm2");
+       &movdqa         ("xmm1","xmm2");
+       &pshufb         ("xmm2","xmm5");
+       &aesenclast     ("xmm2","xmm4");
+       &pslld          ("xmm4",1);
+       &lea            ($key,&DWP(24,$key));
+
+       &movdqa         ("xmm3","xmm0");
+       &pslldq         ("xmm0",4);
+       &pxor           ("xmm3","xmm0");
+       &pslldq         ("xmm0",4);
+       &pxor           ("xmm3","xmm0");
+       &pslldq         ("xmm0",4);
+       &pxor           ("xmm0","xmm3");
+
+       &pshufd         ("xmm3","xmm0",0xff);
+       &pxor           ("xmm3","xmm1");
+       &pslldq         ("xmm1",4);
+       &pxor           ("xmm3","xmm1");
+
+       &pxor           ("xmm0","xmm2");
+       &pxor           ("xmm2","xmm3");
+       &movdqu         (&QWP(-16,$key),"xmm0");
+
+       &dec            ($rounds);
+       &jnz            (&label("loop_key192"));
+
+       &mov    ($rounds,11);
+       &mov    (&DWP(32,$key),$rounds);
+
+       &jmp    (&label("good_key"));
+
 &set_label("14rounds",16);
        &movups         ("xmm2",&QWP(16,"eax"));        # remaining half of *userKey
-       &mov            ($rounds,13);
        &lea            ($key,&DWP(16,$key));
+       &cmp            ("ebp",1<<28);
+       &je             (&label("14rounds_alt"));
+
+       &mov            ($rounds,13);
        &$movekey       (&QWP(-32,$key),"xmm0");        # round 0
        &$movekey       (&QWP(-16,$key),"xmm2");        # round 1
        &aeskeygenassist("xmm1","xmm2",0x01);           # round 2
@@ -2100,7 +2361,8 @@ if ($PREFIX eq "aesni") {
        &$movekey       (&QWP(0,$key),"xmm0");
        &mov            (&DWP(16,$key),$rounds);
        &xor            ("eax","eax");
-       &ret();
+
+       &jmp    (&label("good_key"));
 
 &set_label("key_256a",16);
        &$movekey       (&QWP(0,$key),"xmm2");
@@ -2126,11 +2388,77 @@ if ($PREFIX eq "aesni") {
        &xorps          ("xmm2","xmm1");
        &ret();
 
+&set_label("14rounds_alt",16);
+       &movdqa         ("xmm5",&QWP(0x00,"ebx"));
+       &movdqa         ("xmm4",&QWP(0x20,"ebx"));
+       &mov            ($rounds,7);
+       &movdqu         (&QWP(-32,$key),"xmm0");
+       &movdqa         ("xmm1","xmm2");
+       &movdqu         (&QWP(-16,$key),"xmm2");
+
+&set_label("loop_key256");
+       &pshufb         ("xmm2","xmm5");
+       &aesenclast     ("xmm2","xmm4");
+
+       &movdqa         ("xmm3","xmm0");
+       &pslldq         ("xmm0",4);
+       &pxor           ("xmm3","xmm0");
+       &pslldq         ("xmm0",4);
+       &pxor           ("xmm3","xmm0");
+       &pslldq         ("xmm0",4);
+       &pxor           ("xmm0","xmm3");
+       &pslld          ("xmm4",1);
+
+       &pxor           ("xmm0","xmm2");
+       &movdqu         (&QWP(0,$key),"xmm0");
+
+       &dec            ($rounds);
+       &jz             (&label("done_key256"));
+
+       &pshufd         ("xmm2","xmm0",0xff);
+       &pxor           ("xmm3","xmm3");
+       &aesenclast     ("xmm2","xmm3");
+
+       &movdqa         ("xmm3","xmm1")
+       &pslldq         ("xmm1",4);
+       &pxor           ("xmm3","xmm1");
+       &pslldq         ("xmm1",4);
+       &pxor           ("xmm3","xmm1");
+       &pslldq         ("xmm1",4);
+       &pxor           ("xmm1","xmm3");
+
+       &pxor           ("xmm2","xmm1");
+       &movdqu         (&QWP(16,$key),"xmm2");
+       &lea            ($key,&DWP(32,$key));
+       &movdqa         ("xmm1","xmm2");
+       &jmp            (&label("loop_key256"));
+
+&set_label("done_key256");
+       &mov            ($rounds,13);
+       &mov            (&DWP(16,$key),$rounds);
+
+&set_label("good_key");
+       &pxor   ("xmm0","xmm0");
+       &pxor   ("xmm1","xmm1");
+       &pxor   ("xmm2","xmm2");
+       &pxor   ("xmm3","xmm3");
+       &pxor   ("xmm4","xmm4");
+       &pxor   ("xmm5","xmm5");
+       &xor    ("eax","eax");
+       &pop    ("ebx");
+       &pop    ("ebp");
+       &ret    ();
+
 &set_label("bad_pointer",4);
        &mov    ("eax",-1);
+       &pop    ("ebx");
+       &pop    ("ebp");
        &ret    ();
 &set_label("bad_keybits",4);
+       &pxor   ("xmm0","xmm0");
        &mov    ("eax",-2);
+       &pop    ("ebx");
+       &pop    ("ebp");
        &ret    ();
 &function_end_B("_aesni_set_encrypt_key");
 
@@ -2152,7 +2480,7 @@ if ($PREFIX eq "aesni") {
        &mov    ($key,&wparam(2));
        &call   ("_aesni_set_encrypt_key");
        &mov    ($key,&wparam(2));
-       &shl    ($rounds,4)     # rounds-1 after _aesni_set_encrypt_key
+       &shl    ($rounds,4);    # rounds-1 after _aesni_set_encrypt_key
        &test   ("eax","eax");
        &jnz    (&label("dec_key_ret"));
        &lea    ("eax",&DWP(16,$key,$rounds));  # end of key schedule
@@ -2180,10 +2508,18 @@ if ($PREFIX eq "aesni") {
        &aesimc         ("xmm0","xmm0");
        &$movekey       (&QWP(0,$key),"xmm0");
 
+       &pxor           ("xmm0","xmm0");
+       &pxor           ("xmm1","xmm1");
        &xor            ("eax","eax");          # return success
 &set_label("dec_key_ret");
        &ret    ();
 &function_end_B("${PREFIX}_set_decrypt_key");
+
+&set_label("key_const",64);
+&data_word(0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d);
+&data_word(0x04070605,0x04070605,0x04070605,0x04070605);
+&data_word(1,1,1,1);
+&data_word(0x1b,0x1b,0x1b,0x1b);
 &asciz("AES for Intel AES-NI, CRYPTOGAMS by <appro\@openssl.org>");
 
 &asm_finish();
index c9270df..25ca574 100644 (file)
@@ -1,7 +1,7 @@
 #!/usr/bin/env perl
 #
 # ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
 # details see http://www.openssl.org/~appro/cryptogams/.
 #
 # Further data for other parallelizable modes:
 #
-# CBC decrypt                          1.16    0.93    0.93
-# CTR                                  1.14    0.91    n/a
+# CBC decrypt                          1.16    0.93    0.74
+# CTR                                  1.14    0.91    0.74
 #
 # Well, given 3x column it's probably inappropriate to call the limit
 # asymptotic, if it can be surpassed, isn't it? What happens there?
 
 # April 2011
 #
-# Add aesni_xts_[en|de]crypt. Westmere spends 1.33 cycles processing
-# one byte out of 8KB with 128-bit key, Sandy Bridge - 0.97. Just like
+# Add aesni_xts_[en|de]crypt. Westmere spends 1.25 cycles processing
+# one byte out of 8KB with 128-bit key, Sandy Bridge - 0.90. Just like
 # in CTR mode AES instruction interleave factor was chosen to be 6x.
 
+######################################################################
+# Current large-block performance in cycles per byte processed with
+# 128-bit key (less is better).
+#
+#              CBC en-/decrypt CTR     XTS     ECB
+# Westmere     3.77/1.25       1.25    1.25    1.26
+# * Bridge     5.07/0.74       0.75    0.90    0.85
+# Haswell      4.44/0.63       0.63    0.73    0.63
+# Silvermont   5.75/3.54       3.56    4.12    3.87(*)
+# Bulldozer    5.77/0.70       0.72    0.90    0.70
+#
+# (*)  Atom Silvermont ECB result is suboptimal because of penalties
+#      incurred by operations on %xmm8-15. As ECB is not considered
+#      critical, nothing was done to mitigate the problem.
+
 $PREFIX="aesni";       # if $PREFIX is set to "AES", the script
                        # generates drop-in replacement for
                        # crypto/aes/asm/aes-x86_64.pl:-)
@@ -180,6 +195,7 @@ $movkey = $PREFIX eq "aesni" ? "movups" : "movups";
                ("%rdi","%rsi","%rdx","%rcx");  # Unix order
 
 $code=".text\n";
+$code.=".extern        OPENSSL_ia32cap_P\n";
 
 $rounds="%eax";        # input to and changed by aesni_[en|de]cryptN !!!
 # this is natural Unix argument order for public $PREFIX_[ecb|cbc]_encrypt ...
@@ -247,7 +263,10 @@ ${PREFIX}_encrypt:
 ___
        &aesni_generate1("enc",$key,$rounds);
 $code.=<<___;
+        pxor   $rndkey0,$rndkey0       # clear register bank
+        pxor   $rndkey1,$rndkey1
        movups  $inout0,($out)          # output
+        pxor   $inout0,$inout0
        ret
 .size  ${PREFIX}_encrypt,.-${PREFIX}_encrypt
 
@@ -260,7 +279,10 @@ ${PREFIX}_decrypt:
 ___
        &aesni_generate1("dec",$key,$rounds);
 $code.=<<___;
+        pxor   $rndkey0,$rndkey0       # clear register bank
+        pxor   $rndkey1,$rndkey1
        movups  $inout0,($out)          # output
+        pxor   $inout0,$inout0
        ret
 .size  ${PREFIX}_decrypt, .-${PREFIX}_decrypt
 ___
@@ -272,10 +294,49 @@ ___
 # every *2nd* cycle. Thus 3x interleave was the one providing optimal
 # utilization, i.e. when subroutine's throughput is virtually same as
 # of non-interleaved subroutine [for number of input blocks up to 3].
-# This is why it makes no sense to implement 2x subroutine.
-# aes[enc|dec] latency in next processor generation is 8, but the
-# instructions can be scheduled every cycle. Optimal interleave for
-# new processor is therefore 8x...
+# This is why it originally made no sense to implement 2x subroutine.
+# But times change and it became appropriate to spend extra 192 bytes
+# on 2x subroutine on Atom Silvermont account. For processors that
+# can schedule aes[enc|dec] every cycle optimal interleave factor
+# equals to corresponding instructions latency. 8x is optimal for
+# * Bridge and "super-optimal" for other Intel CPUs... 
+
+sub aesni_generate2 {
+my $dir=shift;
+# As already mentioned it takes in $key and $rounds, which are *not*
+# preserved. $inout[0-1] is cipher/clear text...
+$code.=<<___;
+.type  _aesni_${dir}rypt2,\@abi-omnipotent
+.align 16
+_aesni_${dir}rypt2:
+       $movkey ($key),$rndkey0
+       shl     \$4,$rounds
+       $movkey 16($key),$rndkey1
+       xorps   $rndkey0,$inout0
+       xorps   $rndkey0,$inout1
+       $movkey 32($key),$rndkey0
+       lea     32($key,$rounds),$key
+       neg     %rax                            # $rounds
+       add     \$16,%rax
+
+.L${dir}_loop2:
+       aes${dir}       $rndkey1,$inout0
+       aes${dir}       $rndkey1,$inout1
+       $movkey         ($key,%rax),$rndkey1
+       add             \$32,%rax
+       aes${dir}       $rndkey0,$inout0
+       aes${dir}       $rndkey0,$inout1
+       $movkey         -16($key,%rax),$rndkey0
+       jnz             .L${dir}_loop2
+
+       aes${dir}       $rndkey1,$inout0
+       aes${dir}       $rndkey1,$inout1
+       aes${dir}last   $rndkey0,$inout0
+       aes${dir}last   $rndkey0,$inout1
+       ret
+.size  _aesni_${dir}rypt2,.-_aesni_${dir}rypt2
+___
+}
 sub aesni_generate3 {
 my $dir=shift;
 # As already mentioned it takes in $key and $rounds, which are *not*
@@ -285,25 +346,26 @@ $code.=<<___;
 .align 16
 _aesni_${dir}rypt3:
        $movkey ($key),$rndkey0
-       shr     \$1,$rounds
+       shl     \$4,$rounds
        $movkey 16($key),$rndkey1
-       lea     32($key),$key
        xorps   $rndkey0,$inout0
        xorps   $rndkey0,$inout1
        xorps   $rndkey0,$inout2
-       $movkey         ($key),$rndkey0
+       $movkey 32($key),$rndkey0
+       lea     32($key,$rounds),$key
+       neg     %rax                            # $rounds
+       add     \$16,%rax
 
 .L${dir}_loop3:
        aes${dir}       $rndkey1,$inout0
        aes${dir}       $rndkey1,$inout1
-       dec             $rounds
        aes${dir}       $rndkey1,$inout2
-       $movkey         16($key),$rndkey1
+       $movkey         ($key,%rax),$rndkey1
+       add             \$32,%rax
        aes${dir}       $rndkey0,$inout0
        aes${dir}       $rndkey0,$inout1
-       lea             32($key),$key
        aes${dir}       $rndkey0,$inout2
-       $movkey         ($key),$rndkey0
+       $movkey         -16($key,%rax),$rndkey0
        jnz             .L${dir}_loop3
 
        aes${dir}       $rndkey1,$inout0
@@ -329,28 +391,30 @@ $code.=<<___;
 .align 16
 _aesni_${dir}rypt4:
        $movkey ($key),$rndkey0
-       shr     \$1,$rounds
+       shl     \$4,$rounds
        $movkey 16($key),$rndkey1
-       lea     32($key),$key
        xorps   $rndkey0,$inout0
        xorps   $rndkey0,$inout1
        xorps   $rndkey0,$inout2
        xorps   $rndkey0,$inout3
-       $movkey ($key),$rndkey0
+       $movkey 32($key),$rndkey0
+       lea     32($key,$rounds),$key
+       neg     %rax                            # $rounds
+       .byte   0x0f,0x1f,0x00
+       add     \$16,%rax
 
 .L${dir}_loop4:
        aes${dir}       $rndkey1,$inout0
        aes${dir}       $rndkey1,$inout1
-       dec             $rounds
        aes${dir}       $rndkey1,$inout2
        aes${dir}       $rndkey1,$inout3
-       $movkey         16($key),$rndkey1
+       $movkey         ($key,%rax),$rndkey1
+       add             \$32,%rax
        aes${dir}       $rndkey0,$inout0
        aes${dir}       $rndkey0,$inout1
-       lea             32($key),$key
        aes${dir}       $rndkey0,$inout2
        aes${dir}       $rndkey0,$inout3
-       $movkey         ($key),$rndkey0
+       $movkey         -16($key,%rax),$rndkey0
        jnz             .L${dir}_loop4
 
        aes${dir}       $rndkey1,$inout0
@@ -374,43 +438,40 @@ $code.=<<___;
 .align 16
 _aesni_${dir}rypt6:
        $movkey         ($key),$rndkey0
-       shr             \$1,$rounds
+       shl             \$4,$rounds
        $movkey         16($key),$rndkey1
-       lea             32($key),$key
        xorps           $rndkey0,$inout0
        pxor            $rndkey0,$inout1
-       aes${dir}       $rndkey1,$inout0
        pxor            $rndkey0,$inout2
+       aes${dir}       $rndkey1,$inout0
+       lea             32($key,$rounds),$key
+       neg             %rax                    # $rounds
        aes${dir}       $rndkey1,$inout1
        pxor            $rndkey0,$inout3
-       aes${dir}       $rndkey1,$inout2
        pxor            $rndkey0,$inout4
-       aes${dir}       $rndkey1,$inout3
+       aes${dir}       $rndkey1,$inout2
        pxor            $rndkey0,$inout5
-       dec             $rounds
-       aes${dir}       $rndkey1,$inout4
-       $movkey         ($key),$rndkey0
-       aes${dir}       $rndkey1,$inout5
+       $movkey         ($key,%rax),$rndkey0
+       add             \$16,%rax
        jmp             .L${dir}_loop6_enter
 .align 16
 .L${dir}_loop6:
        aes${dir}       $rndkey1,$inout0
        aes${dir}       $rndkey1,$inout1
-       dec             $rounds
        aes${dir}       $rndkey1,$inout2
+.L${dir}_loop6_enter:
        aes${dir}       $rndkey1,$inout3
        aes${dir}       $rndkey1,$inout4
        aes${dir}       $rndkey1,$inout5
-.L${dir}_loop6_enter:                          # happens to be 16-byte aligned
-       $movkey         16($key),$rndkey1
+       $movkey         ($key,%rax),$rndkey1
+       add             \$32,%rax
        aes${dir}       $rndkey0,$inout0
        aes${dir}       $rndkey0,$inout1
-       lea             32($key),$key
        aes${dir}       $rndkey0,$inout2
        aes${dir}       $rndkey0,$inout3
        aes${dir}       $rndkey0,$inout4
        aes${dir}       $rndkey0,$inout5
-       $movkey         ($key),$rndkey0
+       $movkey         -16($key,%rax),$rndkey0
        jnz             .L${dir}_loop6
 
        aes${dir}       $rndkey1,$inout0
@@ -438,52 +499,46 @@ $code.=<<___;
 .align 16
 _aesni_${dir}rypt8:
        $movkey         ($key),$rndkey0
-       shr             \$1,$rounds
+       shl             \$4,$rounds
        $movkey         16($key),$rndkey1
-       lea             32($key),$key
        xorps           $rndkey0,$inout0
        xorps           $rndkey0,$inout1
-       aes${dir}       $rndkey1,$inout0
        pxor            $rndkey0,$inout2
-       aes${dir}       $rndkey1,$inout1
        pxor            $rndkey0,$inout3
-       aes${dir}       $rndkey1,$inout2
        pxor            $rndkey0,$inout4
-       aes${dir}       $rndkey1,$inout3
+       lea             32($key,$rounds),$key
+       neg             %rax                    # $rounds
+       aes${dir}       $rndkey1,$inout0
        pxor            $rndkey0,$inout5
-       dec             $rounds
-       aes${dir}       $rndkey1,$inout4
        pxor            $rndkey0,$inout6
-       aes${dir}       $rndkey1,$inout5
+       aes${dir}       $rndkey1,$inout1
        pxor            $rndkey0,$inout7
-       $movkey         ($key),$rndkey0
-       aes${dir}       $rndkey1,$inout6
-       aes${dir}       $rndkey1,$inout7
-       $movkey         16($key),$rndkey1
-       jmp             .L${dir}_loop8_enter
+       $movkey         ($key,%rax),$rndkey0
+       add             \$16,%rax
+       jmp             .L${dir}_loop8_inner
 .align 16
 .L${dir}_loop8:
        aes${dir}       $rndkey1,$inout0
        aes${dir}       $rndkey1,$inout1
-       dec             $rounds
+.L${dir}_loop8_inner:
        aes${dir}       $rndkey1,$inout2
        aes${dir}       $rndkey1,$inout3
        aes${dir}       $rndkey1,$inout4
        aes${dir}       $rndkey1,$inout5
        aes${dir}       $rndkey1,$inout6
        aes${dir}       $rndkey1,$inout7
-       $movkey         16($key),$rndkey1
-.L${dir}_loop8_enter:                          # happens to be 16-byte aligned
+.L${dir}_loop8_enter:
+       $movkey         ($key,%rax),$rndkey1
+       add             \$32,%rax
        aes${dir}       $rndkey0,$inout0
        aes${dir}       $rndkey0,$inout1
-       lea             32($key),$key
        aes${dir}       $rndkey0,$inout2
        aes${dir}       $rndkey0,$inout3
        aes${dir}       $rndkey0,$inout4
        aes${dir}       $rndkey0,$inout5
        aes${dir}       $rndkey0,$inout6
        aes${dir}       $rndkey0,$inout7
-       $movkey         ($key),$rndkey0
+       $movkey         -16($key,%rax),$rndkey0
        jnz             .L${dir}_loop8
 
        aes${dir}       $rndkey1,$inout0
@@ -506,6 +561,8 @@ _aesni_${dir}rypt8:
 .size  _aesni_${dir}rypt8,.-_aesni_${dir}rypt8
 ___
 }
+&aesni_generate2("enc") if ($PREFIX eq "aesni");
+&aesni_generate2("dec");
 &aesni_generate3("enc") if ($PREFIX eq "aesni");
 &aesni_generate3("dec");
 &aesni_generate4("enc") if ($PREFIX eq "aesni");
@@ -528,15 +585,15 @@ aesni_ecb_encrypt:
 ___
 $code.=<<___ if ($win64);
        lea     -0x58(%rsp),%rsp
-       movaps  %xmm6,(%rsp)
+       movaps  %xmm6,(%rsp)            # offload $inout4..7
        movaps  %xmm7,0x10(%rsp)
        movaps  %xmm8,0x20(%rsp)
        movaps  %xmm9,0x30(%rsp)
 .Lecb_enc_body:
 ___
 $code.=<<___;
-       and     \$-16,$len
-       jz      .Lecb_ret
+       and     \$-16,$len              # if ($len<16)
+       jz      .Lecb_ret               # return
 
        mov     240($key),$rounds       # key->rounds
        $movkey ($key),$rndkey0
@@ -545,10 +602,10 @@ $code.=<<___;
        test    %r8d,%r8d               # 5th argument
        jz      .Lecb_decrypt
 #--------------------------- ECB ENCRYPT ------------------------------#
-       cmp     \$0x80,$len
-       jb      .Lecb_enc_tail
+       cmp     \$0x80,$len             # if ($len<8*16)
+       jb      .Lecb_enc_tail          # short input
 
-       movdqu  ($inp),$inout0
+       movdqu  ($inp),$inout0          # load 8 input blocks
        movdqu  0x10($inp),$inout1
        movdqu  0x20($inp),$inout2
        movdqu  0x30($inp),$inout3
@@ -556,14 +613,14 @@ $code.=<<___;
        movdqu  0x50($inp),$inout5
        movdqu  0x60($inp),$inout6
        movdqu  0x70($inp),$inout7
-       lea     0x80($inp),$inp
-       sub     \$0x80,$len
+       lea     0x80($inp),$inp         # $inp+=8*16
+       sub     \$0x80,$len             # $len-=8*16 (can be zero)
        jmp     .Lecb_enc_loop8_enter
 .align 16
 .Lecb_enc_loop8:
-       movups  $inout0,($out)
+       movups  $inout0,($out)          # store 8 output blocks
        mov     $key_,$key              # restore $key
-       movdqu  ($inp),$inout0
+       movdqu  ($inp),$inout0          # load 8 input blocks
        mov     $rnds_,$rounds          # restore $rounds
        movups  $inout1,0x10($out)
        movdqu  0x10($inp),$inout1
@@ -578,17 +635,17 @@ $code.=<<___;
        movups  $inout6,0x60($out)
        movdqu  0x60($inp),$inout6
        movups  $inout7,0x70($out)
-       lea     0x80($out),$out
+       lea     0x80($out),$out         # $out+=8*16
        movdqu  0x70($inp),$inout7
-       lea     0x80($inp),$inp
+       lea     0x80($inp),$inp         # $inp+=8*16
 .Lecb_enc_loop8_enter:
 
        call    _aesni_encrypt8
 
        sub     \$0x80,$len
-       jnc     .Lecb_enc_loop8
+       jnc     .Lecb_enc_loop8         # loop if $len-=8*16 didn't borrow
 
-       movups  $inout0,($out)
+       movups  $inout0,($out)          # store 8 output blocks
        mov     $key_,$key              # restore $key
        movups  $inout1,0x10($out)
        mov     $rnds_,$rounds          # restore $rounds
@@ -598,11 +655,11 @@ $code.=<<___;
        movups  $inout5,0x50($out)
        movups  $inout6,0x60($out)
        movups  $inout7,0x70($out)
-       lea     0x80($out),$out
-       add     \$0x80,$len
-       jz      .Lecb_ret
+       lea     0x80($out),$out         # $out+=8*16
+       add     \$0x80,$len             # restore real remaining $len
+       jz      .Lecb_ret               # done if ($len==0)
 
-.Lecb_enc_tail:
+.Lecb_enc_tail:                                # $len is less than 8*16
        movups  ($inp),$inout0
        cmp     \$0x20,$len
        jb      .Lecb_enc_one
@@ -619,8 +676,9 @@ $code.=<<___;
        movups  0x50($inp),$inout5
        je      .Lecb_enc_six
        movdqu  0x60($inp),$inout6
+       xorps   $inout7,$inout7
        call    _aesni_encrypt8
-       movups  $inout0,($out)
+       movups  $inout0,($out)          # store 7 output blocks
        movups  $inout1,0x10($out)
        movups  $inout2,0x20($out)
        movups  $inout3,0x30($out)
@@ -633,26 +691,25 @@ $code.=<<___;
 ___
        &aesni_generate1("enc",$key,$rounds);
 $code.=<<___;
-       movups  $inout0,($out)
+       movups  $inout0,($out)          # store one output block
        jmp     .Lecb_ret
 .align 16
 .Lecb_enc_two:
-       xorps   $inout2,$inout2
-       call    _aesni_encrypt3
-       movups  $inout0,($out)
+       call    _aesni_encrypt2
+       movups  $inout0,($out)          # store 2 output blocks
        movups  $inout1,0x10($out)
        jmp     .Lecb_ret
 .align 16
 .Lecb_enc_three:
        call    _aesni_encrypt3
-       movups  $inout0,($out)
+       movups  $inout0,($out)          # store 3 output blocks
        movups  $inout1,0x10($out)
        movups  $inout2,0x20($out)
        jmp     .Lecb_ret
 .align 16
 .Lecb_enc_four:
        call    _aesni_encrypt4
-       movups  $inout0,($out)
+       movups  $inout0,($out)          # store 4 output blocks
        movups  $inout1,0x10($out)
        movups  $inout2,0x20($out)
        movups  $inout3,0x30($out)
@@ -661,7 +718,7 @@ $code.=<<___;
 .Lecb_enc_five:
        xorps   $inout5,$inout5
        call    _aesni_encrypt6
-       movups  $inout0,($out)
+       movups  $inout0,($out)          # store 5 output blocks
        movups  $inout1,0x10($out)
        movups  $inout2,0x20($out)
        movups  $inout3,0x30($out)
@@ -670,7 +727,7 @@ $code.=<<___;
 .align 16
 .Lecb_enc_six:
        call    _aesni_encrypt6
-       movups  $inout0,($out)
+       movups  $inout0,($out)          # store 6 output blocks
        movups  $inout1,0x10($out)
        movups  $inout2,0x20($out)
        movups  $inout3,0x30($out)
@@ -680,10 +737,10 @@ $code.=<<___;
 \f#--------------------------- ECB DECRYPT ------------------------------#
 .align 16
 .Lecb_decrypt:
-       cmp     \$0x80,$len
-       jb      .Lecb_dec_tail
+       cmp     \$0x80,$len             # if ($len<8*16)
+       jb      .Lecb_dec_tail          # short input
 
-       movdqu  ($inp),$inout0
+       movdqu  ($inp),$inout0          # load 8 input blocks
        movdqu  0x10($inp),$inout1
        movdqu  0x20($inp),$inout2
        movdqu  0x30($inp),$inout3
@@ -691,14 +748,14 @@ $code.=<<___;
        movdqu  0x50($inp),$inout5
        movdqu  0x60($inp),$inout6
        movdqu  0x70($inp),$inout7
-       lea     0x80($inp),$inp
-       sub     \$0x80,$len
+       lea     0x80($inp),$inp         # $inp+=8*16
+       sub     \$0x80,$len             # $len-=8*16 (can be zero)
        jmp     .Lecb_dec_loop8_enter
 .align 16
 .Lecb_dec_loop8:
-       movups  $inout0,($out)
+       movups  $inout0,($out)          # store 8 output blocks
        mov     $key_,$key              # restore $key
-       movdqu  ($inp),$inout0
+       movdqu  ($inp),$inout0          # load 8 input blocks
        mov     $rnds_,$rounds          # restore $rounds
        movups  $inout1,0x10($out)
        movdqu  0x10($inp),$inout1
@@ -713,30 +770,38 @@ $code.=<<___;
        movups  $inout6,0x60($out)
        movdqu  0x60($inp),$inout6
        movups  $inout7,0x70($out)
-       lea     0x80($out),$out
+       lea     0x80($out),$out         # $out+=8*16
        movdqu  0x70($inp),$inout7
-       lea     0x80($inp),$inp
+       lea     0x80($inp),$inp         # $inp+=8*16
 .Lecb_dec_loop8_enter:
 
        call    _aesni_decrypt8
 
        $movkey ($key_),$rndkey0
        sub     \$0x80,$len
-       jnc     .Lecb_dec_loop8
+       jnc     .Lecb_dec_loop8         # loop if $len-=8*16 didn't borrow
 
-       movups  $inout0,($out)
+       movups  $inout0,($out)          # store 8 output blocks
+        pxor   $inout0,$inout0         # clear register bank
        mov     $key_,$key              # restore $key
        movups  $inout1,0x10($out)
+        pxor   $inout1,$inout1
        mov     $rnds_,$rounds          # restore $rounds
        movups  $inout2,0x20($out)
+        pxor   $inout2,$inout2
        movups  $inout3,0x30($out)
+        pxor   $inout3,$inout3
        movups  $inout4,0x40($out)
+        pxor   $inout4,$inout4
        movups  $inout5,0x50($out)
+        pxor   $inout5,$inout5
        movups  $inout6,0x60($out)
+        pxor   $inout6,$inout6
        movups  $inout7,0x70($out)
-       lea     0x80($out),$out
-       add     \$0x80,$len
-       jz      .Lecb_ret
+        pxor   $inout7,$inout7
+       lea     0x80($out),$out         # $out+=8*16
+       add     \$0x80,$len             # restore real remaining $len
+       jz      .Lecb_ret               # done if ($len==0)
 
 .Lecb_dec_tail:
        movups  ($inp),$inout0
@@ -756,71 +821,107 @@ $code.=<<___;
        je      .Lecb_dec_six
        movups  0x60($inp),$inout6
        $movkey ($key),$rndkey0
+       xorps   $inout7,$inout7
        call    _aesni_decrypt8
-       movups  $inout0,($out)
+       movups  $inout0,($out)          # store 7 output blocks
+        pxor   $inout0,$inout0         # clear register bank
        movups  $inout1,0x10($out)
+        pxor   $inout1,$inout1
        movups  $inout2,0x20($out)
+        pxor   $inout2,$inout2
        movups  $inout3,0x30($out)
+        pxor   $inout3,$inout3
        movups  $inout4,0x40($out)
+        pxor   $inout4,$inout4
        movups  $inout5,0x50($out)
+        pxor   $inout5,$inout5
        movups  $inout6,0x60($out)
+        pxor   $inout6,$inout6
+        pxor   $inout7,$inout7
        jmp     .Lecb_ret
 .align 16
 .Lecb_dec_one:
 ___
        &aesni_generate1("dec",$key,$rounds);
 $code.=<<___;
-       movups  $inout0,($out)
+       movups  $inout0,($out)          # store one output block
+        pxor   $inout0,$inout0         # clear register bank
        jmp     .Lecb_ret
 .align 16
 .Lecb_dec_two:
-       xorps   $inout2,$inout2
-       call    _aesni_decrypt3
-       movups  $inout0,($out)
+       call    _aesni_decrypt2
+       movups  $inout0,($out)          # store 2 output blocks
+        pxor   $inout0,$inout0         # clear register bank
        movups  $inout1,0x10($out)
+        pxor   $inout1,$inout1
        jmp     .Lecb_ret
 .align 16
 .Lecb_dec_three:
        call    _aesni_decrypt3
-       movups  $inout0,($out)
+       movups  $inout0,($out)          # store 3 output blocks
+        pxor   $inout0,$inout0         # clear register bank
        movups  $inout1,0x10($out)
+        pxor   $inout1,$inout1
        movups  $inout2,0x20($out)
+        pxor   $inout2,$inout2
        jmp     .Lecb_ret
 .align 16
 .Lecb_dec_four:
        call    _aesni_decrypt4
-       movups  $inout0,($out)
+       movups  $inout0,($out)          # store 4 output blocks
+        pxor   $inout0,$inout0         # clear register bank
        movups  $inout1,0x10($out)
+        pxor   $inout1,$inout1
        movups  $inout2,0x20($out)
+        pxor   $inout2,$inout2
        movups  $inout3,0x30($out)
+        pxor   $inout3,$inout3
        jmp     .Lecb_ret
 .align 16
 .Lecb_dec_five:
        xorps   $inout5,$inout5
        call    _aesni_decrypt6
-       movups  $inout0,($out)
+       movups  $inout0,($out)          # store 5 output blocks
+        pxor   $inout0,$inout0         # clear register bank
        movups  $inout1,0x10($out)
+        pxor   $inout1,$inout1
        movups  $inout2,0x20($out)
+        pxor   $inout2,$inout2
        movups  $inout3,0x30($out)
+        pxor   $inout3,$inout3
        movups  $inout4,0x40($out)
+        pxor   $inout4,$inout4
+        pxor   $inout5,$inout5
        jmp     .Lecb_ret
 .align 16
 .Lecb_dec_six:
        call    _aesni_decrypt6
-       movups  $inout0,($out)
+       movups  $inout0,($out)          # store 6 output blocks
+        pxor   $inout0,$inout0         # clear register bank
        movups  $inout1,0x10($out)
+        pxor   $inout1,$inout1
        movups  $inout2,0x20($out)
+        pxor   $inout2,$inout2
        movups  $inout3,0x30($out)
+        pxor   $inout3,$inout3
        movups  $inout4,0x40($out)
+        pxor   $inout4,$inout4
        movups  $inout5,0x50($out)
+        pxor   $inout5,$inout5
 
 .Lecb_ret:
+       xorps   $rndkey0,$rndkey0       # %xmm0
+       pxor    $rndkey1,$rndkey1
 ___
 $code.=<<___ if ($win64);
        movaps  (%rsp),%xmm6
+       movaps  %xmm0,(%rsp)            # clear stack
        movaps  0x10(%rsp),%xmm7
+       movaps  %xmm0,0x10(%rsp)
        movaps  0x20(%rsp),%xmm8
+       movaps  %xmm0,0x20(%rsp)
        movaps  0x30(%rsp),%xmm9
+       movaps  %xmm0,0x30(%rsp)
        lea     0x58(%rsp),%rsp
 .Lecb_enc_ret:
 ___
@@ -842,7 +943,8 @@ ___
 {
 my $cmac="%r9";        # 6th argument
 
-my $increment="%xmm6";
+my $increment="%xmm9";
+my $iv="%xmm6";
 my $bswap_mask="%xmm7";
 
 $code.=<<___;
@@ -853,10 +955,10 @@ aesni_ccm64_encrypt_blocks:
 ___
 $code.=<<___ if ($win64);
        lea     -0x58(%rsp),%rsp
-       movaps  %xmm6,(%rsp)
-       movaps  %xmm7,0x10(%rsp)
-       movaps  %xmm8,0x20(%rsp)
-       movaps  %xmm9,0x30(%rsp)
+       movaps  %xmm6,(%rsp)            # $iv
+       movaps  %xmm7,0x10(%rsp)        # $bswap_mask
+       movaps  %xmm8,0x20(%rsp)        # $in0
+       movaps  %xmm9,0x30(%rsp)        # $increment
 .Lccm64_enc_body:
 ___
 $code.=<<___;
@@ -865,58 +967,68 @@ $code.=<<___;
        movdqa  .Lincrement64(%rip),$increment
        movdqa  .Lbswap_mask(%rip),$bswap_mask
 
-       shr     \$1,$rounds
+       shl     \$4,$rounds
+       mov     \$16,$rnds_
        lea     0($key),$key_
        movdqu  ($cmac),$inout1
        movdqa  $iv,$inout0
-       mov     $rounds,$rnds_
+       lea     32($key,$rounds),$key           # end of key schedule
        pshufb  $bswap_mask,$iv
+       sub     %rax,%r10                       # twisted $rounds
        jmp     .Lccm64_enc_outer
 .align 16
 .Lccm64_enc_outer:
        $movkey ($key_),$rndkey0
-       mov     $rnds_,$rounds
+       mov     %r10,%rax
        movups  ($inp),$in0                     # load inp
 
        xorps   $rndkey0,$inout0                # counter
        $movkey 16($key_),$rndkey1
        xorps   $in0,$rndkey0
-       lea     32($key_),$key
        xorps   $rndkey0,$inout1                # cmac^=inp
-       $movkey ($key),$rndkey0
+       $movkey 32($key_),$rndkey0
 
 .Lccm64_enc2_loop:
        aesenc  $rndkey1,$inout0
-       dec     $rounds
        aesenc  $rndkey1,$inout1
-       $movkey 16($key),$rndkey1
+       $movkey ($key,%rax),$rndkey1
+       add     \$32,%rax
        aesenc  $rndkey0,$inout0
-       lea     32($key),$key
        aesenc  $rndkey0,$inout1
-       $movkey 0($key),$rndkey0
+       $movkey -16($key,%rax),$rndkey0
        jnz     .Lccm64_enc2_loop
        aesenc  $rndkey1,$inout0
        aesenc  $rndkey1,$inout1
        paddq   $increment,$iv
+       dec     $len                            # $len-- ($len is in blocks)
        aesenclast      $rndkey0,$inout0
        aesenclast      $rndkey0,$inout1
 
-       dec     $len
        lea     16($inp),$inp
        xorps   $inout0,$in0                    # inp ^= E(iv)
        movdqa  $iv,$inout0
        movups  $in0,($out)                     # save output
-       lea     16($out),$out
        pshufb  $bswap_mask,$inout0
-       jnz     .Lccm64_enc_outer
+       lea     16($out),$out                   # $out+=16
+       jnz     .Lccm64_enc_outer               # loop if ($len!=0)
 
-       movups  $inout1,($cmac)
+        pxor   $rndkey0,$rndkey0               # clear register bank
+        pxor   $rndkey1,$rndkey1
+        pxor   $inout0,$inout0
+       movups  $inout1,($cmac)                 # store resulting mac
+        pxor   $inout1,$inout1
+        pxor   $in0,$in0
+        pxor   $iv,$iv
 ___
 $code.=<<___ if ($win64);
        movaps  (%rsp),%xmm6
+       movaps  %xmm0,(%rsp)                    # clear stack
        movaps  0x10(%rsp),%xmm7
+       movaps  %xmm0,0x10(%rsp)
        movaps  0x20(%rsp),%xmm8
+       movaps  %xmm0,0x20(%rsp)
        movaps  0x30(%rsp),%xmm9
+       movaps  %xmm0,0x30(%rsp)
        lea     0x58(%rsp),%rsp
 .Lccm64_enc_ret:
 ___
@@ -933,10 +1045,10 @@ aesni_ccm64_decrypt_blocks:
 ___
 $code.=<<___ if ($win64);
        lea     -0x58(%rsp),%rsp
-       movaps  %xmm6,(%rsp)
-       movaps  %xmm7,0x10(%rsp)
-       movaps  %xmm8,0x20(%rsp)
-       movaps  %xmm9,0x30(%rsp)
+       movaps  %xmm6,(%rsp)            # $iv
+       movaps  %xmm7,0x10(%rsp)        # $bswap_mask
+       movaps  %xmm8,0x20(%rsp)        # $in8
+       movaps  %xmm9,0x30(%rsp)        # $increment
 .Lccm64_dec_body:
 ___
 $code.=<<___;
@@ -953,63 +1065,77 @@ $code.=<<___;
 ___
        &aesni_generate1("enc",$key,$rounds);
 $code.=<<___;
+       shl     \$4,$rnds_
+       mov     \$16,$rounds
        movups  ($inp),$in0                     # load inp
        paddq   $increment,$iv
-       lea     16($inp),$inp
+       lea     16($inp),$inp                   # $inp+=16
+       sub     %r10,%rax                       # twisted $rounds
+       lea     32($key_,$rnds_),$key           # end of key schedule
+       mov     %rax,%r10
        jmp     .Lccm64_dec_outer
 .align 16
 .Lccm64_dec_outer:
        xorps   $inout0,$in0                    # inp ^= E(iv)
        movdqa  $iv,$inout0
-       mov     $rnds_,$rounds
        movups  $in0,($out)                     # save output
-       lea     16($out),$out
+       lea     16($out),$out                   # $out+=16
        pshufb  $bswap_mask,$inout0
 
-       sub     \$1,$len
-       jz      .Lccm64_dec_break
+       sub     \$1,$len                        # $len-- ($len is in blocks)
+       jz      .Lccm64_dec_break               # if ($len==0) break
 
        $movkey ($key_),$rndkey0
-       shr     \$1,$rounds
+       mov     %r10,%rax
        $movkey 16($key_),$rndkey1
        xorps   $rndkey0,$in0
-       lea     32($key_),$key
        xorps   $rndkey0,$inout0
        xorps   $in0,$inout1                    # cmac^=out
-       $movkey ($key),$rndkey0
-
+       $movkey 32($key_),$rndkey0
+       jmp     .Lccm64_dec2_loop
+.align 16
 .Lccm64_dec2_loop:
        aesenc  $rndkey1,$inout0
-       dec     $rounds
        aesenc  $rndkey1,$inout1
-       $movkey 16($key),$rndkey1
+       $movkey ($key,%rax),$rndkey1
+       add     \$32,%rax
        aesenc  $rndkey0,$inout0
-       lea     32($key),$key
        aesenc  $rndkey0,$inout1
-       $movkey 0($key),$rndkey0
+       $movkey -16($key,%rax),$rndkey0
        jnz     .Lccm64_dec2_loop
-       movups  ($inp),$in0                     # load inp
+       movups  ($inp),$in0                     # load input
        paddq   $increment,$iv
        aesenc  $rndkey1,$inout0
        aesenc  $rndkey1,$inout1
-       lea     16($inp),$inp
        aesenclast      $rndkey0,$inout0
        aesenclast      $rndkey0,$inout1
+       lea     16($inp),$inp                   # $inp+=16
        jmp     .Lccm64_dec_outer
 
 .align 16
 .Lccm64_dec_break:
        #xorps  $in0,$inout1                    # cmac^=out
+       mov     240($key_),$rounds
 ___
        &aesni_generate1("enc",$key_,$rounds,$inout1,$in0);
 $code.=<<___;
-       movups  $inout1,($cmac)
+        pxor   $rndkey0,$rndkey0               # clear register bank
+        pxor   $rndkey1,$rndkey1
+        pxor   $inout0,$inout0
+       movups  $inout1,($cmac)                 # store resulting mac
+        pxor   $inout1,$inout1
+        pxor   $in0,$in0
+        pxor   $iv,$iv
 ___
 $code.=<<___ if ($win64);
        movaps  (%rsp),%xmm6
+       movaps  %xmm0,(%rsp)                    # clear stack
        movaps  0x10(%rsp),%xmm7
+       movaps  %xmm0,0x10(%rsp)
        movaps  0x20(%rsp),%xmm8
+       movaps  %xmm0,0x20(%rsp)
        movaps  0x30(%rsp),%xmm9
+       movaps  %xmm0,0x30(%rsp)
        lea     0x58(%rsp),%rsp
 .Lccm64_dec_ret:
 ___
@@ -1024,285 +1150,567 @@ ___
 #                         const char *ivec);
 #
 # Handles only complete blocks, operates on 32-bit counter and
-# does not update *ivec! (see engine/eng_aesni.c for details)
+# does not update *ivec! (see crypto/modes/ctr128.c for details)
 #
+# Overhaul based on suggestions from Shay Gueron and Vlad Krasnov,
+# http://rt.openssl.org/Ticket/Display.html?id=3021&user=guest&pass=guest.
+# Keywords are full unroll and modulo-schedule counter calculations
+# with zero-round key xor.
 {
-my $reserved = $win64?0:-0x28;
-my ($in0,$in1,$in2,$in3)=map("%xmm$_",(8..11));
-my ($iv0,$iv1,$ivec)=("%xmm12","%xmm13","%xmm14");
-my $bswap_mask="%xmm15";
+my ($in0,$in1,$in2,$in3,$in4,$in5)=map("%xmm$_",(10..15));
+my ($key0,$ctr)=("${key_}d","${ivp}d");
+my $frame_size = 0x80 + ($win64?160:0);
 
 $code.=<<___;
 .globl aesni_ctr32_encrypt_blocks
 .type  aesni_ctr32_encrypt_blocks,\@function,5
 .align 16
 aesni_ctr32_encrypt_blocks:
+       cmp     \$1,$len
+       jne     .Lctr32_bulk
+
+       # handle single block without allocating stack frame,
+       # useful when handling edges
+       movups  ($ivp),$inout0
+       movups  ($inp),$inout1
+       mov     240($key),%edx                  # key->rounds
+___
+       &aesni_generate1("enc",$key,"%edx");
+$code.=<<___;
+        pxor   $rndkey0,$rndkey0               # clear register bank
+        pxor   $rndkey1,$rndkey1
+       xorps   $inout1,$inout0
+        pxor   $inout1,$inout1
+       movups  $inout0,($out)
+        xorps  $inout0,$inout0
+       jmp     .Lctr32_epilogue
+
+.align 16
+.Lctr32_bulk:
+       lea     (%rsp),%rax
+       push    %rbp
+       sub     \$$frame_size,%rsp
+       and     \$-16,%rsp      # Linux kernel stack can be incorrectly seeded
 ___
 $code.=<<___ if ($win64);
-       lea     -0xc8(%rsp),%rsp
-       movaps  %xmm6,0x20(%rsp)
-       movaps  %xmm7,0x30(%rsp)
-       movaps  %xmm8,0x40(%rsp)
-       movaps  %xmm9,0x50(%rsp)
-       movaps  %xmm10,0x60(%rsp)
-       movaps  %xmm11,0x70(%rsp)
-       movaps  %xmm12,0x80(%rsp)
-       movaps  %xmm13,0x90(%rsp)
-       movaps  %xmm14,0xa0(%rsp)
-       movaps  %xmm15,0xb0(%rsp)
+       movaps  %xmm6,-0xa8(%rax)               # offload everything
+       movaps  %xmm7,-0x98(%rax)
+       movaps  %xmm8,-0x88(%rax)
+       movaps  %xmm9,-0x78(%rax)
+       movaps  %xmm10,-0x68(%rax)
+       movaps  %xmm11,-0x58(%rax)
+       movaps  %xmm12,-0x48(%rax)
+       movaps  %xmm13,-0x38(%rax)
+       movaps  %xmm14,-0x28(%rax)
+       movaps  %xmm15,-0x18(%rax)
 .Lctr32_body:
 ___
 $code.=<<___;
-       cmp     \$1,$len
-       je      .Lctr32_one_shortcut
-
-       movdqu  ($ivp),$ivec
-       movdqa  .Lbswap_mask(%rip),$bswap_mask
-       xor     $rounds,$rounds
-       pextrd  \$3,$ivec,$rnds_                # pull 32-bit counter
-       pinsrd  \$3,$rounds,$ivec               # wipe 32-bit counter
-
+       lea     -8(%rax),%rbp
+
+       # 8 16-byte words on top of stack are counter values
+       # xor-ed with zero-round key
+
+       movdqu  ($ivp),$inout0
+       movdqu  ($key),$rndkey0
+       mov     12($ivp),$ctr                   # counter LSB
+       pxor    $rndkey0,$inout0
+       mov     12($key),$key0                  # 0-round key LSB
+       movdqa  $inout0,0x00(%rsp)              # populate counter block
+       bswap   $ctr
+       movdqa  $inout0,$inout1
+       movdqa  $inout0,$inout2
+       movdqa  $inout0,$inout3
+       movdqa  $inout0,0x40(%rsp)
+       movdqa  $inout0,0x50(%rsp)
+       movdqa  $inout0,0x60(%rsp)
+       mov     %rdx,%r10                       # about to borrow %rdx
+       movdqa  $inout0,0x70(%rsp)
+
+       lea     1($ctr),%rax
+        lea    2($ctr),%rdx
+       bswap   %eax
+        bswap  %edx
+       xor     $key0,%eax
+        xor    $key0,%edx
+       pinsrd  \$3,%eax,$inout1
+       lea     3($ctr),%rax
+       movdqa  $inout1,0x10(%rsp)
+        pinsrd \$3,%edx,$inout2
+       bswap   %eax
+        mov    %r10,%rdx                       # restore %rdx
+        lea    4($ctr),%r10
+        movdqa $inout2,0x20(%rsp)
+       xor     $key0,%eax
+        bswap  %r10d
+       pinsrd  \$3,%eax,$inout3
+        xor    $key0,%r10d
+       movdqa  $inout3,0x30(%rsp)
+       lea     5($ctr),%r9
+        mov    %r10d,0x40+12(%rsp)
+       bswap   %r9d
+        lea    6($ctr),%r10
        mov     240($key),$rounds               # key->rounds
-       bswap   $rnds_
-       pxor    $iv0,$iv0                       # vector of 3 32-bit counters
-       pxor    $iv1,$iv1                       # vector of 3 32-bit counters
-       pinsrd  \$0,$rnds_,$iv0
-       lea     3($rnds_),$key_
-       pinsrd  \$0,$key_,$iv1
-       inc     $rnds_
-       pinsrd  \$1,$rnds_,$iv0
-       inc     $key_
-       pinsrd  \$1,$key_,$iv1
-       inc     $rnds_
-       pinsrd  \$2,$rnds_,$iv0
-       inc     $key_
-       pinsrd  \$2,$key_,$iv1
-       movdqa  $iv0,$reserved(%rsp)
-       pshufb  $bswap_mask,$iv0
-       movdqa  $iv1,`$reserved+0x10`(%rsp)
-       pshufb  $bswap_mask,$iv1
-
-       pshufd  \$`3<<6`,$iv0,$inout0           # place counter to upper dword
-       pshufd  \$`2<<6`,$iv0,$inout1
-       pshufd  \$`1<<6`,$iv0,$inout2
-       cmp     \$6,$len
-       jb      .Lctr32_tail
-       shr     \$1,$rounds
-       mov     $key,$key_                      # backup $key
-       mov     $rounds,$rnds_                  # backup $rounds
-       sub     \$6,$len
+       xor     $key0,%r9d
+        bswap  %r10d
+       mov     %r9d,0x50+12(%rsp)
+        xor    $key0,%r10d
+       lea     7($ctr),%r9
+        mov    %r10d,0x60+12(%rsp)
+       bswap   %r9d
+        mov    OPENSSL_ia32cap_P+4(%rip),%r10d 
+       xor     $key0,%r9d
+        and    \$`1<<26|1<<22`,%r10d           # isolate XSAVE+MOVBE
+       mov     %r9d,0x70+12(%rsp)
+
+       $movkey 0x10($key),$rndkey1
+
+       movdqa  0x40(%rsp),$inout4
+       movdqa  0x50(%rsp),$inout5
+
+       cmp     \$8,$len                # $len is in blocks
+       jb      .Lctr32_tail            # short input if ($len<8)
+
+       sub     \$6,$len                # $len is biased by -6
+       cmp     \$`1<<22`,%r10d         # check for MOVBE without XSAVE
+       je      .Lctr32_6x              # [which denotes Atom Silvermont]
+
+       lea     0x80($key),$key         # size optimization
+       sub     \$2,$len                # $len is biased by -8
+       jmp     .Lctr32_loop8
+
+.align 16
+.Lctr32_6x:
+       shl     \$4,$rounds
+       mov     \$48,$rnds_
+       bswap   $key0
+       lea     32($key,$rounds),$key   # end of key schedule
+       sub     %rax,%r10               # twisted $rounds
        jmp     .Lctr32_loop6
 
 .align 16
 .Lctr32_loop6:
-       pshufd  \$`3<<6`,$iv1,$inout3
-       por     $ivec,$inout0                   # merge counter-less ivec
-        $movkey        ($key_),$rndkey0
-       pshufd  \$`2<<6`,$iv1,$inout4
-       por     $ivec,$inout1
-        $movkey        16($key_),$rndkey1
-       pshufd  \$`1<<6`,$iv1,$inout5
-       por     $ivec,$inout2
-       por     $ivec,$inout3
-        xorps          $rndkey0,$inout0
-       por     $ivec,$inout4
-       por     $ivec,$inout5
-
-       # inline _aesni_encrypt6 and interleave last rounds
-       # with own code...
+        add    \$6,$ctr                # next counter value
+       $movkey -48($key,$rnds_),$rndkey0
+       aesenc  $rndkey1,$inout0
+        mov    $ctr,%eax
+        xor    $key0,%eax
+       aesenc  $rndkey1,$inout1
+        movbe  %eax,`0x00+12`(%rsp)    # store next counter value
+        lea    1($ctr),%eax
+       aesenc  $rndkey1,$inout2
+        xor    $key0,%eax
+        movbe  %eax,`0x10+12`(%rsp)
+       aesenc  $rndkey1,$inout3
+        lea    2($ctr),%eax
+        xor    $key0,%eax
+       aesenc  $rndkey1,$inout4
+        movbe  %eax,`0x20+12`(%rsp)
+        lea    3($ctr),%eax
+       aesenc  $rndkey1,$inout5
+       $movkey -32($key,$rnds_),$rndkey1
+        xor    $key0,%eax
 
-       pxor            $rndkey0,$inout1
+       aesenc  $rndkey0,$inout0
+        movbe  %eax,`0x30+12`(%rsp)
+        lea    4($ctr),%eax
+       aesenc  $rndkey0,$inout1
+        xor    $key0,%eax
+        movbe  %eax,`0x40+12`(%rsp)
+       aesenc  $rndkey0,$inout2
+        lea    5($ctr),%eax
+        xor    $key0,%eax
+       aesenc  $rndkey0,$inout3
+        movbe  %eax,`0x50+12`(%rsp)
+        mov    %r10,%rax               # mov   $rnds_,$rounds
+       aesenc  $rndkey0,$inout4
+       aesenc  $rndkey0,$inout5
+       $movkey -16($key,$rnds_),$rndkey0
+
+       call    .Lenc_loop6
+
+       movdqu  ($inp),$inout6          # load 6 input blocks
+       movdqu  0x10($inp),$inout7
+       movdqu  0x20($inp),$in0
+       movdqu  0x30($inp),$in1
+       movdqu  0x40($inp),$in2
+       movdqu  0x50($inp),$in3
+       lea     0x60($inp),$inp         # $inp+=6*16
+       $movkey -64($key,$rnds_),$rndkey1
+       pxor    $inout0,$inout6         # inp^=E(ctr)
+       movaps  0x00(%rsp),$inout0      # load next counter [xor-ed with 0 round]
+       pxor    $inout1,$inout7
+       movaps  0x10(%rsp),$inout1
+       pxor    $inout2,$in0
+       movaps  0x20(%rsp),$inout2
+       pxor    $inout3,$in1
+       movaps  0x30(%rsp),$inout3
+       pxor    $inout4,$in2
+       movaps  0x40(%rsp),$inout4
+       pxor    $inout5,$in3
+       movaps  0x50(%rsp),$inout5
+       movdqu  $inout6,($out)          # store 6 output blocks
+       movdqu  $inout7,0x10($out)
+       movdqu  $in0,0x20($out)
+       movdqu  $in1,0x30($out)
+       movdqu  $in2,0x40($out)
+       movdqu  $in3,0x50($out)
+       lea     0x60($out),$out         # $out+=6*16
+
+       sub     \$6,$len
+       jnc     .Lctr32_loop6           # loop if $len-=6 didn't borrow
+
+       add     \$6,$len                # restore real remaining $len
+       jz      .Lctr32_done            # done if ($len==0)
+
+       lea     -48($rnds_),$rounds
+       lea     -80($key,$rnds_),$key   # restore $key
+       neg     $rounds
+       shr     \$4,$rounds             # restore $rounds
+       jmp     .Lctr32_tail
+
+.align 32
+.Lctr32_loop8:
+        add            \$8,$ctr                # next counter value
+       movdqa          0x60(%rsp),$inout6
        aesenc          $rndkey1,$inout0
-       lea             32($key_),$key
-       pxor            $rndkey0,$inout2
+        mov            $ctr,%r9d
+       movdqa          0x70(%rsp),$inout7
        aesenc          $rndkey1,$inout1
-        movdqa         .Lincrement32(%rip),$iv1
-       pxor            $rndkey0,$inout3
+        bswap          %r9d
+       $movkey         0x20-0x80($key),$rndkey0
        aesenc          $rndkey1,$inout2
-        movdqa         $reserved(%rsp),$iv0
-       pxor            $rndkey0,$inout4
+        xor            $key0,%r9d
+        nop
        aesenc          $rndkey1,$inout3
-       pxor            $rndkey0,$inout5
-       $movkey         ($key),$rndkey0
-       dec             $rounds
+        mov            %r9d,0x00+12(%rsp)      # store next counter value
+        lea            1($ctr),%r9
        aesenc          $rndkey1,$inout4
        aesenc          $rndkey1,$inout5
-       jmp             .Lctr32_enc_loop6_enter
-.align 16
-.Lctr32_enc_loop6:
+       aesenc          $rndkey1,$inout6
+       aesenc          $rndkey1,$inout7
+       $movkey         0x30-0x80($key),$rndkey1
+___
+for($i=2;$i<8;$i++) {
+my $rndkeyx = ($i&1)?$rndkey1:$rndkey0;
+$code.=<<___;
+        bswap          %r9d
+       aesenc          $rndkeyx,$inout0
+       aesenc          $rndkeyx,$inout1
+        xor            $key0,%r9d
+        .byte          0x66,0x90
+       aesenc          $rndkeyx,$inout2
+       aesenc          $rndkeyx,$inout3
+        mov            %r9d,`0x10*($i-1)`+12(%rsp)
+        lea            $i($ctr),%r9
+       aesenc          $rndkeyx,$inout4
+       aesenc          $rndkeyx,$inout5
+       aesenc          $rndkeyx,$inout6
+       aesenc          $rndkeyx,$inout7
+       $movkey         `0x20+0x10*$i`-0x80($key),$rndkeyx
+___
+}
+$code.=<<___;
+        bswap          %r9d
+       aesenc          $rndkey0,$inout0
+       aesenc          $rndkey0,$inout1
+       aesenc          $rndkey0,$inout2
+        xor            $key0,%r9d
+        movdqu         0x00($inp),$in0         # start loading input
+       aesenc          $rndkey0,$inout3
+        mov            %r9d,0x70+12(%rsp)
+        cmp            \$11,$rounds
+       aesenc          $rndkey0,$inout4
+       aesenc          $rndkey0,$inout5
+       aesenc          $rndkey0,$inout6
+       aesenc          $rndkey0,$inout7
+       $movkey         0xa0-0x80($key),$rndkey0
+
+       jb              .Lctr32_enc_done
+
        aesenc          $rndkey1,$inout0
        aesenc          $rndkey1,$inout1
-       dec             $rounds
        aesenc          $rndkey1,$inout2
        aesenc          $rndkey1,$inout3
        aesenc          $rndkey1,$inout4
        aesenc          $rndkey1,$inout5
-.Lctr32_enc_loop6_enter:
-       $movkey         16($key),$rndkey1
+       aesenc          $rndkey1,$inout6
+       aesenc          $rndkey1,$inout7
+       $movkey         0xb0-0x80($key),$rndkey1
+
        aesenc          $rndkey0,$inout0
        aesenc          $rndkey0,$inout1
-       lea             32($key),$key
        aesenc          $rndkey0,$inout2
        aesenc          $rndkey0,$inout3
        aesenc          $rndkey0,$inout4
        aesenc          $rndkey0,$inout5
-       $movkey         ($key),$rndkey0
-       jnz             .Lctr32_enc_loop6
+       aesenc          $rndkey0,$inout6
+       aesenc          $rndkey0,$inout7
+       $movkey         0xc0-0x80($key),$rndkey0
+       je              .Lctr32_enc_done
 
        aesenc          $rndkey1,$inout0
-        paddd          $iv1,$iv0               # increment counter vector
        aesenc          $rndkey1,$inout1
-        paddd          `$reserved+0x10`(%rsp),$iv1
        aesenc          $rndkey1,$inout2
-        movdqa         $iv0,$reserved(%rsp)    # save counter vector
        aesenc          $rndkey1,$inout3
-        movdqa         $iv1,`$reserved+0x10`(%rsp)
        aesenc          $rndkey1,$inout4
-        pshufb         $bswap_mask,$iv0        # byte swap
        aesenc          $rndkey1,$inout5
-        pshufb         $bswap_mask,$iv1
-
-       aesenclast      $rndkey0,$inout0
-        movups         ($inp),$in0             # load input
-       aesenclast      $rndkey0,$inout1
-        movups         0x10($inp),$in1
-       aesenclast      $rndkey0,$inout2
-        movups         0x20($inp),$in2
-       aesenclast      $rndkey0,$inout3
-        movups         0x30($inp),$in3
-       aesenclast      $rndkey0,$inout4
-        movups         0x40($inp),$rndkey1
-       aesenclast      $rndkey0,$inout5
-        movups         0x50($inp),$rndkey0
-        lea    0x60($inp),$inp
-
-       xorps   $inout0,$in0                    # xor
-        pshufd \$`3<<6`,$iv0,$inout0
-       xorps   $inout1,$in1
-        pshufd \$`2<<6`,$iv0,$inout1
-       movups  $in0,($out)                     # store output
-       xorps   $inout2,$in2
-        pshufd \$`1<<6`,$iv0,$inout2
-       movups  $in1,0x10($out)
-       xorps   $inout3,$in3
-       movups  $in2,0x20($out)
-       xorps   $inout4,$rndkey1
-       movups  $in3,0x30($out)
-       xorps   $inout5,$rndkey0
-       movups  $rndkey1,0x40($out)
-       movups  $rndkey0,0x50($out)
-       lea     0x60($out),$out
-       mov     $rnds_,$rounds
-       sub     \$6,$len
-       jnc     .Lctr32_loop6
+       aesenc          $rndkey1,$inout6
+       aesenc          $rndkey1,$inout7
+       $movkey         0xd0-0x80($key),$rndkey1
 
-       add     \$6,$len
-       jz      .Lctr32_done
-       mov     $key_,$key                      # restore $key
-       lea     1($rounds,$rounds),$rounds      # restore original value
+       aesenc          $rndkey0,$inout0
+       aesenc          $rndkey0,$inout1
+       aesenc          $rndkey0,$inout2
+       aesenc          $rndkey0,$inout3
+       aesenc          $rndkey0,$inout4
+       aesenc          $rndkey0,$inout5
+       aesenc          $rndkey0,$inout6
+       aesenc          $rndkey0,$inout7
+       $movkey         0xe0-0x80($key),$rndkey0
+       jmp             .Lctr32_enc_done
+
+.align 16
+.Lctr32_enc_done:
+       movdqu          0x10($inp),$in1
+       pxor            $rndkey0,$in0           # input^=round[last]
+       movdqu          0x20($inp),$in2
+       pxor            $rndkey0,$in1
+       movdqu          0x30($inp),$in3
+       pxor            $rndkey0,$in2
+       movdqu          0x40($inp),$in4
+       pxor            $rndkey0,$in3
+       movdqu          0x50($inp),$in5
+       pxor            $rndkey0,$in4
+       pxor            $rndkey0,$in5
+       aesenc          $rndkey1,$inout0
+       aesenc          $rndkey1,$inout1
+       aesenc          $rndkey1,$inout2
+       aesenc          $rndkey1,$inout3
+       aesenc          $rndkey1,$inout4
+       aesenc          $rndkey1,$inout5
+       aesenc          $rndkey1,$inout6
+       aesenc          $rndkey1,$inout7
+       movdqu          0x60($inp),$rndkey1     # borrow $rndkey1 for inp[6]
+       lea             0x80($inp),$inp         # $inp+=8*16
+
+       aesenclast      $in0,$inout0            # $inN is inp[N]^round[last]
+       pxor            $rndkey0,$rndkey1       # borrowed $rndkey
+       movdqu          0x70-0x80($inp),$in0
+       aesenclast      $in1,$inout1
+       pxor            $rndkey0,$in0
+       movdqa          0x00(%rsp),$in1         # load next counter block
+       aesenclast      $in2,$inout2
+       aesenclast      $in3,$inout3
+       movdqa          0x10(%rsp),$in2
+       movdqa          0x20(%rsp),$in3
+       aesenclast      $in4,$inout4
+       aesenclast      $in5,$inout5
+       movdqa          0x30(%rsp),$in4
+       movdqa          0x40(%rsp),$in5
+       aesenclast      $rndkey1,$inout6
+       movdqa          0x50(%rsp),$rndkey0
+       $movkey         0x10-0x80($key),$rndkey1#real 1st-round key
+       aesenclast      $in0,$inout7
+
+       movups          $inout0,($out)          # store 8 output blocks
+       movdqa          $in1,$inout0
+       movups          $inout1,0x10($out)
+       movdqa          $in2,$inout1
+       movups          $inout2,0x20($out)
+       movdqa          $in3,$inout2
+       movups          $inout3,0x30($out)
+       movdqa          $in4,$inout3
+       movups          $inout4,0x40($out)
+       movdqa          $in5,$inout4
+       movups          $inout5,0x50($out)
+       movdqa          $rndkey0,$inout5
+       movups          $inout6,0x60($out)
+       movups          $inout7,0x70($out)
+       lea             0x80($out),$out         # $out+=8*16
+
+       sub     \$8,$len
+       jnc     .Lctr32_loop8                   # loop if $len-=8 didn't borrow
+
+       add     \$8,$len                        # restore real remainig $len
+       jz      .Lctr32_done                    # done if ($len==0)
+       lea     -0x80($key),$key
 
 .Lctr32_tail:
-       por     $ivec,$inout0
-       movups  ($inp),$in0
-       cmp     \$2,$len
-       jb      .Lctr32_one
-
-       por     $ivec,$inout1
-       movups  0x10($inp),$in1
-       je      .Lctr32_two
-
-       pshufd  \$`3<<6`,$iv1,$inout3
-       por     $ivec,$inout2
-       movups  0x20($inp),$in2
+       # note that at this point $inout0..5 are populated with
+       # counter values xor-ed with 0-round key 
+       lea     16($key),$key
        cmp     \$4,$len
-       jb      .Lctr32_three
+       jb      .Lctr32_loop3
+       je      .Lctr32_loop4
 
-       pshufd  \$`2<<6`,$iv1,$inout4
-       por     $ivec,$inout3
-       movups  0x30($inp),$in3
-       je      .Lctr32_four
+       # if ($len>4) compute 7 E(counter)
+       shl             \$4,$rounds
+       movdqa          0x60(%rsp),$inout6
+       pxor            $inout7,$inout7
 
-       por     $ivec,$inout4
-       xorps   $inout5,$inout5
+       $movkey         16($key),$rndkey0
+       aesenc          $rndkey1,$inout0
+       aesenc          $rndkey1,$inout1
+       lea             32-16($key,$rounds),$key# prepare for .Lenc_loop8_enter
+       neg             %rax
+       aesenc          $rndkey1,$inout2
+       add             \$16,%rax               # prepare for .Lenc_loop8_enter
+        movups         ($inp),$in0
+       aesenc          $rndkey1,$inout3
+       aesenc          $rndkey1,$inout4
+        movups         0x10($inp),$in1         # pre-load input
+        movups         0x20($inp),$in2
+       aesenc          $rndkey1,$inout5
+       aesenc          $rndkey1,$inout6
+
+       call            .Lenc_loop8_enter
+
+       movdqu  0x30($inp),$in3
+       pxor    $in0,$inout0
+       movdqu  0x40($inp),$in0
+       pxor    $in1,$inout1
+       movdqu  $inout0,($out)                  # store output
+       pxor    $in2,$inout2
+       movdqu  $inout1,0x10($out)
+       pxor    $in3,$inout3
+       movdqu  $inout2,0x20($out)
+       pxor    $in0,$inout4
+       movdqu  $inout3,0x30($out)
+       movdqu  $inout4,0x40($out)
+       cmp     \$6,$len
+       jb      .Lctr32_done                    # $len was 5, stop store
 
-       call    _aesni_encrypt6
+       movups  0x50($inp),$in1
+       xorps   $in1,$inout5
+       movups  $inout5,0x50($out)
+       je      .Lctr32_done                    # $len was 6, stop store
 
-       movups  0x40($inp),$rndkey1
-       xorps   $inout0,$in0
-       xorps   $inout1,$in1
-       movups  $in0,($out)
-       xorps   $inout2,$in2
-       movups  $in1,0x10($out)
-       xorps   $inout3,$in3
-       movups  $in2,0x20($out)
-       xorps   $inout4,$rndkey1
-       movups  $in3,0x30($out)
-       movups  $rndkey1,0x40($out)
-       jmp     .Lctr32_done
-
-.align 16
-.Lctr32_one_shortcut:
-       movups  ($ivp),$inout0
-       movups  ($inp),$in0
-       mov     240($key),$rounds               # key->rounds
-.Lctr32_one:
-___
-       &aesni_generate1("enc",$key,$rounds);
-$code.=<<___;
-       xorps   $inout0,$in0
-       movups  $in0,($out)
-       jmp     .Lctr32_done
+       movups  0x60($inp),$in2
+       xorps   $in2,$inout6
+       movups  $inout6,0x60($out)
+       jmp     .Lctr32_done                    # $len was 7, stop store
 
-.align 16
-.Lctr32_two:
-       xorps   $inout2,$inout2
-       call    _aesni_encrypt3
-       xorps   $inout0,$in0
-       xorps   $inout1,$in1
-       movups  $in0,($out)
-       movups  $in1,0x10($out)
-       jmp     .Lctr32_done
+.align 32
+.Lctr32_loop4:
+       aesenc          $rndkey1,$inout0
+       lea             16($key),$key
+       dec             $rounds
+       aesenc          $rndkey1,$inout1
+       aesenc          $rndkey1,$inout2
+       aesenc          $rndkey1,$inout3
+       $movkey         ($key),$rndkey1
+       jnz             .Lctr32_loop4
+       aesenclast      $rndkey1,$inout0
+       aesenclast      $rndkey1,$inout1
+        movups         ($inp),$in0             # load input
+        movups         0x10($inp),$in1
+       aesenclast      $rndkey1,$inout2
+       aesenclast      $rndkey1,$inout3
+        movups         0x20($inp),$in2
+        movups         0x30($inp),$in3
 
-.align 16
-.Lctr32_three:
-       call    _aesni_encrypt3
-       xorps   $inout0,$in0
-       xorps   $inout1,$in1
-       movups  $in0,($out)
-       xorps   $inout2,$in2
-       movups  $in1,0x10($out)
-       movups  $in2,0x20($out)
-       jmp     .Lctr32_done
+       xorps   $in0,$inout0
+       movups  $inout0,($out)                  # store output
+       xorps   $in1,$inout1
+       movups  $inout1,0x10($out)
+       pxor    $in2,$inout2
+       movdqu  $inout2,0x20($out)
+       pxor    $in3,$inout3
+       movdqu  $inout3,0x30($out)
+       jmp     .Lctr32_done                    # $len was 4, stop store
+
+.align 32
+.Lctr32_loop3:
+       aesenc          $rndkey1,$inout0
+       lea             16($key),$key
+       dec             $rounds
+       aesenc          $rndkey1,$inout1
+       aesenc          $rndkey1,$inout2
+       $movkey         ($key),$rndkey1
+       jnz             .Lctr32_loop3
+       aesenclast      $rndkey1,$inout0
+       aesenclast      $rndkey1,$inout1
+       aesenclast      $rndkey1,$inout2
+
+       movups  ($inp),$in0                     # load input
+       xorps   $in0,$inout0
+       movups  $inout0,($out)                  # store output
+       cmp     \$2,$len
+       jb      .Lctr32_done                    # $len was 1, stop store
 
-.align 16
-.Lctr32_four:
-       call    _aesni_encrypt4
-       xorps   $inout0,$in0
-       xorps   $inout1,$in1
-       movups  $in0,($out)
-       xorps   $inout2,$in2
-       movups  $in1,0x10($out)
-       xorps   $inout3,$in3
-       movups  $in2,0x20($out)
-       movups  $in3,0x30($out)
+       movups  0x10($inp),$in1
+       xorps   $in1,$inout1
+       movups  $inout1,0x10($out)
+       je      .Lctr32_done                    # $len was 2, stop store
+
+       movups  0x20($inp),$in2
+       xorps   $in2,$inout2
+       movups  $inout2,0x20($out)              # $len was 3, stop store
 
 .Lctr32_done:
+       xorps   %xmm0,%xmm0                     # clear regiser bank
+       xor     $key0,$key0
+       pxor    %xmm1,%xmm1
+       pxor    %xmm2,%xmm2
+       pxor    %xmm3,%xmm3
+       pxor    %xmm4,%xmm4
+       pxor    %xmm5,%xmm5
+___
+$code.=<<___ if (!$win64);
+       pxor    %xmm6,%xmm6
+       pxor    %xmm7,%xmm7
+       movaps  %xmm0,0x00(%rsp)                # clear stack
+       pxor    %xmm8,%xmm8
+       movaps  %xmm0,0x10(%rsp)
+       pxor    %xmm9,%xmm9
+       movaps  %xmm0,0x20(%rsp)
+       pxor    %xmm10,%xmm10
+       movaps  %xmm0,0x30(%rsp)
+       pxor    %xmm11,%xmm11
+       movaps  %xmm0,0x40(%rsp)
+       pxor    %xmm12,%xmm12
+       movaps  %xmm0,0x50(%rsp)
+       pxor    %xmm13,%xmm13
+       movaps  %xmm0,0x60(%rsp)
+       pxor    %xmm14,%xmm14
+       movaps  %xmm0,0x70(%rsp)
+       pxor    %xmm15,%xmm15
 ___
 $code.=<<___ if ($win64);
-       movaps  0x20(%rsp),%xmm6
-       movaps  0x30(%rsp),%xmm7
-       movaps  0x40(%rsp),%xmm8
-       movaps  0x50(%rsp),%xmm9
-       movaps  0x60(%rsp),%xmm10
-       movaps  0x70(%rsp),%xmm11
-       movaps  0x80(%rsp),%xmm12
-       movaps  0x90(%rsp),%xmm13
-       movaps  0xa0(%rsp),%xmm14
-       movaps  0xb0(%rsp),%xmm15
-       lea     0xc8(%rsp),%rsp
-.Lctr32_ret:
+       movaps  -0xa0(%rbp),%xmm6
+       movaps  %xmm0,-0xa0(%rbp)               # clear stack
+       movaps  -0x90(%rbp),%xmm7
+       movaps  %xmm0,-0x90(%rbp)
+       movaps  -0x80(%rbp),%xmm8
+       movaps  %xmm0,-0x80(%rbp)
+       movaps  -0x70(%rbp),%xmm9
+       movaps  %xmm0,-0x70(%rbp)
+       movaps  -0x60(%rbp),%xmm10
+       movaps  %xmm0,-0x60(%rbp)
+       movaps  -0x50(%rbp),%xmm11
+       movaps  %xmm0,-0x50(%rbp)
+       movaps  -0x40(%rbp),%xmm12
+       movaps  %xmm0,-0x40(%rbp)
+       movaps  -0x30(%rbp),%xmm13
+       movaps  %xmm0,-0x30(%rbp)
+       movaps  -0x20(%rbp),%xmm14
+       movaps  %xmm0,-0x20(%rbp)
+       movaps  -0x10(%rbp),%xmm15
+       movaps  %xmm0,-0x10(%rbp)
+       movaps  %xmm0,0x00(%rsp)
+       movaps  %xmm0,0x10(%rsp)
+       movaps  %xmm0,0x20(%rsp)
+       movaps  %xmm0,0x30(%rsp)
+       movaps  %xmm0,0x40(%rsp)
+       movaps  %xmm0,0x50(%rsp)
+       movaps  %xmm0,0x60(%rsp)
+       movaps  %xmm0,0x70(%rsp)
 ___
 $code.=<<___;
+       lea     (%rbp),%rsp
+       pop     %rbp
+.Lctr32_epilogue:
        ret
 .size  aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
 ___
@@ -1317,252 +1725,297 @@ ___
 my @tweak=map("%xmm$_",(10..15));
 my ($twmask,$twres,$twtmp)=("%xmm8","%xmm9",@tweak[4]);
 my ($key2,$ivp,$len_)=("%r8","%r9","%r9");
-my $frame_size = 0x68 + ($win64?160:0);
+my $frame_size = 0x70 + ($win64?160:0);
 
 $code.=<<___;
 .globl aesni_xts_encrypt
 .type  aesni_xts_encrypt,\@function,6
 .align 16
 aesni_xts_encrypt:
-       lea     -$frame_size(%rsp),%rsp
+       lea     (%rsp),%rax
+       push    %rbp
+       sub     \$$frame_size,%rsp
+       and     \$-16,%rsp      # Linux kernel stack can be incorrectly seeded
 ___
 $code.=<<___ if ($win64);
-       movaps  %xmm6,0x60(%rsp)
-       movaps  %xmm7,0x70(%rsp)
-       movaps  %xmm8,0x80(%rsp)
-       movaps  %xmm9,0x90(%rsp)
-       movaps  %xmm10,0xa0(%rsp)
-       movaps  %xmm11,0xb0(%rsp)
-       movaps  %xmm12,0xc0(%rsp)
-       movaps  %xmm13,0xd0(%rsp)
-       movaps  %xmm14,0xe0(%rsp)
-       movaps  %xmm15,0xf0(%rsp)
+       movaps  %xmm6,-0xa8(%rax)               # offload everything
+       movaps  %xmm7,-0x98(%rax)
+       movaps  %xmm8,-0x88(%rax)
+       movaps  %xmm9,-0x78(%rax)
+       movaps  %xmm10,-0x68(%rax)
+       movaps  %xmm11,-0x58(%rax)
+       movaps  %xmm12,-0x48(%rax)
+       movaps  %xmm13,-0x38(%rax)
+       movaps  %xmm14,-0x28(%rax)
+       movaps  %xmm15,-0x18(%rax)
 .Lxts_enc_body:
 ___
 $code.=<<___;
-       movups  ($ivp),@tweak[5]                # load clear-text tweak
+       lea     -8(%rax),%rbp
+       movups  ($ivp),$inout0                  # load clear-text tweak
        mov     240(%r8),$rounds                # key2->rounds
        mov     240($key),$rnds_                # key1->rounds
 ___
        # generate the tweak
-       &aesni_generate1("enc",$key2,$rounds,@tweak[5]);
+       &aesni_generate1("enc",$key2,$rounds,$inout0);
 $code.=<<___;
+       $movkey ($key),$rndkey0                 # zero round key
        mov     $key,$key_                      # backup $key
        mov     $rnds_,$rounds                  # backup $rounds
+       shl     \$4,$rnds_
        mov     $len,$len_                      # backup $len
        and     \$-16,$len
 
+       $movkey 16($key,$rnds_),$rndkey1        # last round key
+
        movdqa  .Lxts_magic(%rip),$twmask
-       pxor    $twtmp,$twtmp
-       pcmpgtd @tweak[5],$twtmp                # broadcast upper bits
+       movdqa  $inout0,@tweak[5]
+       pshufd  \$0x5f,$inout0,$twres
+       pxor    $rndkey0,$rndkey1
 ___
+    # alternative tweak calculation algorithm is based on suggestions
+    # by Shay Gueron. psrad doesn't conflict with AES-NI instructions
+    # and should help in the future...
     for ($i=0;$i<4;$i++) {
     $code.=<<___;
-       pshufd  \$0x13,$twtmp,$twres
-       pxor    $twtmp,$twtmp
+       movdqa  $twres,$twtmp
+       paddd   $twres,$twres
        movdqa  @tweak[5],@tweak[$i]
-       paddq   @tweak[5],@tweak[5]             # psllq 1,$tweak
-       pand    $twmask,$twres                  # isolate carry and residue
-       pcmpgtd @tweak[5],$twtmp                # broadcat upper bits
-       pxor    $twres,@tweak[5]
+       psrad   \$31,$twtmp                     # broadcast upper bits
+       paddq   @tweak[5],@tweak[5]
+       pand    $twmask,$twtmp
+       pxor    $rndkey0,@tweak[$i]
+       pxor    $twtmp,@tweak[5]
 ___
     }
 $code.=<<___;
+       movdqa  @tweak[5],@tweak[4]
+       psrad   \$31,$twres
+       paddq   @tweak[5],@tweak[5]
+       pand    $twmask,$twres
+       pxor    $rndkey0,@tweak[4]
+       pxor    $twres,@tweak[5]
+       movaps  $rndkey1,0x60(%rsp)             # save round[0]^round[last]
+
        sub     \$16*6,$len
-       jc      .Lxts_enc_short
+       jc      .Lxts_enc_short                 # if $len-=6*16 borrowed
 
-       shr     \$1,$rounds
-       sub     \$1,$rounds
-       mov     $rounds,$rnds_
+       mov     \$16+96,$rounds
+       lea     32($key_,$rnds_),$key           # end of key schedule
+       sub     %r10,%rax                       # twisted $rounds
+       $movkey 16($key_),$rndkey1
+       mov     %rax,%r10                       # backup twisted $rounds
+       lea     .Lxts_magic(%rip),%r8
        jmp     .Lxts_enc_grandloop
 
-.align 16
+.align 32
 .Lxts_enc_grandloop:
-       pshufd  \$0x13,$twtmp,$twres
-       movdqa  @tweak[5],@tweak[4]
-       paddq   @tweak[5],@tweak[5]             # psllq 1,$tweak
        movdqu  `16*0`($inp),$inout0            # load input
-       pand    $twmask,$twres                  # isolate carry and residue
+       movdqa  $rndkey0,$twmask
        movdqu  `16*1`($inp),$inout1
-       pxor    $twres,@tweak[5]
-
+       pxor    @tweak[0],$inout0               # input^=tweak^round[0]
        movdqu  `16*2`($inp),$inout2
-       pxor    @tweak[0],$inout0               # input^=tweak
-       movdqu  `16*3`($inp),$inout3
        pxor    @tweak[1],$inout1
-       movdqu  `16*4`($inp),$inout4
+        aesenc         $rndkey1,$inout0
+       movdqu  `16*3`($inp),$inout3
        pxor    @tweak[2],$inout2
-       movdqu  `16*5`($inp),$inout5
-       lea     `16*6`($inp),$inp
+        aesenc         $rndkey1,$inout1
+       movdqu  `16*4`($inp),$inout4
        pxor    @tweak[3],$inout3
-       $movkey         ($key_),$rndkey0
+        aesenc         $rndkey1,$inout2
+       movdqu  `16*5`($inp),$inout5
+       pxor    @tweak[5],$twmask               # round[0]^=tweak[5]
+        movdqa 0x60(%rsp),$twres               # load round[0]^round[last]
        pxor    @tweak[4],$inout4
-       pxor    @tweak[5],$inout5
+        aesenc         $rndkey1,$inout3
+       $movkey 32($key_),$rndkey0
+       lea     `16*6`($inp),$inp
+       pxor    $twmask,$inout5
 
-       # inline _aesni_encrypt6 and interleave first and last rounds
-       # with own code...
-       $movkey         16($key_),$rndkey1
-       pxor            $rndkey0,$inout0
-       pxor            $rndkey0,$inout1
-        movdqa @tweak[0],`16*0`(%rsp)          # put aside tweaks
-       aesenc          $rndkey1,$inout0
-       lea             32($key_),$key
-       pxor            $rndkey0,$inout2
-        movdqa @tweak[1],`16*1`(%rsp)
-       aesenc          $rndkey1,$inout1
-       pxor            $rndkey0,$inout3
-        movdqa @tweak[2],`16*2`(%rsp)
-       aesenc          $rndkey1,$inout2
-       pxor            $rndkey0,$inout4
-        movdqa @tweak[3],`16*3`(%rsp)
-       aesenc          $rndkey1,$inout3
-       pxor            $rndkey0,$inout5
-       $movkey         ($key),$rndkey0
-       dec             $rounds
-        movdqa @tweak[4],`16*4`(%rsp)
+        pxor   $twres,@tweak[0]                # calclulate tweaks^round[last]
        aesenc          $rndkey1,$inout4
-        movdqa @tweak[5],`16*5`(%rsp)
+        pxor   $twres,@tweak[1]
+        movdqa @tweak[0],`16*0`(%rsp)          # put aside tweaks^round[last]
        aesenc          $rndkey1,$inout5
-       pxor    $twtmp,$twtmp
-       pcmpgtd @tweak[5],$twtmp
-       jmp             .Lxts_enc_loop6_enter
+       $movkey         48($key_),$rndkey1
+        pxor   $twres,@tweak[2]
 
-.align 16
+       aesenc          $rndkey0,$inout0
+        pxor   $twres,@tweak[3]
+        movdqa @tweak[1],`16*1`(%rsp)
+       aesenc          $rndkey0,$inout1
+        pxor   $twres,@tweak[4]
+        movdqa @tweak[2],`16*2`(%rsp)
+       aesenc          $rndkey0,$inout2
+       aesenc          $rndkey0,$inout3
+        pxor   $twres,$twmask
+        movdqa @tweak[4],`16*4`(%rsp)
+       aesenc          $rndkey0,$inout4
+       aesenc          $rndkey0,$inout5
+       $movkey         64($key_),$rndkey0
+        movdqa $twmask,`16*5`(%rsp)
+       pshufd  \$0x5f,@tweak[5],$twres
+       jmp     .Lxts_enc_loop6
+.align 32
 .Lxts_enc_loop6:
        aesenc          $rndkey1,$inout0
        aesenc          $rndkey1,$inout1
-       dec             $rounds
        aesenc          $rndkey1,$inout2
        aesenc          $rndkey1,$inout3
        aesenc          $rndkey1,$inout4
        aesenc          $rndkey1,$inout5
-.Lxts_enc_loop6_enter:
-       $movkey         16($key),$rndkey1
+       $movkey         -64($key,%rax),$rndkey1
+       add             \$32,%rax
+
        aesenc          $rndkey0,$inout0
        aesenc          $rndkey0,$inout1
-       lea             32($key),$key
        aesenc          $rndkey0,$inout2
        aesenc          $rndkey0,$inout3
        aesenc          $rndkey0,$inout4
        aesenc          $rndkey0,$inout5
-       $movkey         ($key),$rndkey0
+       $movkey         -80($key,%rax),$rndkey0
        jnz             .Lxts_enc_loop6
 
-       pshufd  \$0x13,$twtmp,$twres
-       pxor    $twtmp,$twtmp
-       paddq   @tweak[5],@tweak[5]             # psllq 1,$tweak
+       movdqa  (%r8),$twmask                   # start calculating next tweak
+       movdqa  $twres,$twtmp
+       paddd   $twres,$twres
         aesenc         $rndkey1,$inout0
-       pand    $twmask,$twres                  # isolate carry and residue
+       paddq   @tweak[5],@tweak[5]
+       psrad   \$31,$twtmp
         aesenc         $rndkey1,$inout1
-       pcmpgtd @tweak[5],$twtmp                # broadcast upper bits
+       pand    $twmask,$twtmp
+       $movkey ($key_),@tweak[0]               # load round[0]
         aesenc         $rndkey1,$inout2
-       pxor    $twres,@tweak[5]
         aesenc         $rndkey1,$inout3
         aesenc         $rndkey1,$inout4
+       pxor    $twtmp,@tweak[5]
+       movaps  @tweak[0],@tweak[1]             # copy round[0]
         aesenc         $rndkey1,$inout5
-        $movkey        16($key),$rndkey1
+        $movkey        -64($key),$rndkey1
 
-       pshufd  \$0x13,$twtmp,$twres
-       pxor    $twtmp,$twtmp
-       movdqa  @tweak[5],@tweak[0]
-       paddq   @tweak[5],@tweak[5]             # psllq 1,$tweak
+       movdqa  $twres,$twtmp
         aesenc         $rndkey0,$inout0
-       pand    $twmask,$twres                  # isolate carry and residue
+       paddd   $twres,$twres
+       pxor    @tweak[5],@tweak[0]
         aesenc         $rndkey0,$inout1
-       pcmpgtd @tweak[5],$twtmp                # broadcat upper bits
+       psrad   \$31,$twtmp
+       paddq   @tweak[5],@tweak[5]
         aesenc         $rndkey0,$inout2
-       pxor    $twres,@tweak[5]
         aesenc         $rndkey0,$inout3
+       pand    $twmask,$twtmp
+       movaps  @tweak[1],@tweak[2]
         aesenc         $rndkey0,$inout4
+       pxor    $twtmp,@tweak[5]
+       movdqa  $twres,$twtmp
         aesenc         $rndkey0,$inout5
-        $movkey        32($key),$rndkey0
+        $movkey        -48($key),$rndkey0
 
-       pshufd  \$0x13,$twtmp,$twres
-       pxor    $twtmp,$twtmp
-       movdqa  @tweak[5],@tweak[1]
-       paddq   @tweak[5],@tweak[5]             # psllq 1,$tweak
+       paddd   $twres,$twres
         aesenc         $rndkey1,$inout0
-       pand    $twmask,$twres                  # isolate carry and residue
+       pxor    @tweak[5],@tweak[1]
+       psrad   \$31,$twtmp
         aesenc         $rndkey1,$inout1
-       pcmpgtd @tweak[5],$twtmp                # broadcat upper bits
+       paddq   @tweak[5],@tweak[5]
+       pand    $twmask,$twtmp
         aesenc         $rndkey1,$inout2
-       pxor    $twres,@tweak[5]
         aesenc         $rndkey1,$inout3
+        movdqa @tweak[3],`16*3`(%rsp)
+       pxor    $twtmp,@tweak[5]
         aesenc         $rndkey1,$inout4
+       movaps  @tweak[2],@tweak[3]
+       movdqa  $twres,$twtmp
         aesenc         $rndkey1,$inout5
+        $movkey        -32($key),$rndkey1
 
-       pshufd  \$0x13,$twtmp,$twres
-       pxor    $twtmp,$twtmp
-       movdqa  @tweak[5],@tweak[2]
-       paddq   @tweak[5],@tweak[5]             # psllq 1,$tweak
-        aesenclast     $rndkey0,$inout0
-       pand    $twmask,$twres                  # isolate carry and residue
-        aesenclast     $rndkey0,$inout1
-       pcmpgtd @tweak[5],$twtmp                # broadcat upper bits
-        aesenclast     $rndkey0,$inout2
-       pxor    $twres,@tweak[5]
-        aesenclast     $rndkey0,$inout3
-        aesenclast     $rndkey0,$inout4
-        aesenclast     $rndkey0,$inout5
-
-       pshufd  \$0x13,$twtmp,$twres
-       pxor    $twtmp,$twtmp
-       movdqa  @tweak[5],@tweak[3]
-       paddq   @tweak[5],@tweak[5]             # psllq 1,$tweak
-        xorps  `16*0`(%rsp),$inout0            # output^=tweak
-       pand    $twmask,$twres                  # isolate carry and residue
-        xorps  `16*1`(%rsp),$inout1
-       pcmpgtd @tweak[5],$twtmp                # broadcat upper bits
+       paddd   $twres,$twres
+        aesenc         $rndkey0,$inout0
+       pxor    @tweak[5],@tweak[2]
+       psrad   \$31,$twtmp
+        aesenc         $rndkey0,$inout1
+       paddq   @tweak[5],@tweak[5]
+       pand    $twmask,$twtmp
+        aesenc         $rndkey0,$inout2
+        aesenc         $rndkey0,$inout3
+        aesenc         $rndkey0,$inout4
+       pxor    $twtmp,@tweak[5]
+       movaps  @tweak[3],@tweak[4]
+        aesenc         $rndkey0,$inout5
+
+       movdqa  $twres,$rndkey0
+       paddd   $twres,$twres
+        aesenc         $rndkey1,$inout0
+       pxor    @tweak[5],@tweak[3]
+       psrad   \$31,$rndkey0
+        aesenc         $rndkey1,$inout1
+       paddq   @tweak[5],@tweak[5]
+       pand    $twmask,$rndkey0
+        aesenc         $rndkey1,$inout2
+        aesenc         $rndkey1,$inout3
+       pxor    $rndkey0,@tweak[5]
+       $movkey         ($key_),$rndkey0
+        aesenc         $rndkey1,$inout4
+        aesenc         $rndkey1,$inout5
+       $movkey         16($key_),$rndkey1
+
+       pxor    @tweak[5],@tweak[4]
+        aesenclast     `16*0`(%rsp),$inout0
+       psrad   \$31,$twres
+       paddq   @tweak[5],@tweak[5]
+        aesenclast     `16*1`(%rsp),$inout1
+        aesenclast     `16*2`(%rsp),$inout2
+       pand    $twmask,$twres
+       mov     %r10,%rax                       # restore $rounds
+        aesenclast     `16*3`(%rsp),$inout3
+        aesenclast     `16*4`(%rsp),$inout4
+        aesenclast     `16*5`(%rsp),$inout5
        pxor    $twres,@tweak[5]
 
-       xorps   `16*2`(%rsp),$inout2
-       movups  $inout0,`16*0`($out)            # write output
-       xorps   `16*3`(%rsp),$inout3
-       movups  $inout1,`16*1`($out)
-       xorps   `16*4`(%rsp),$inout4
-       movups  $inout2,`16*2`($out)
-       xorps   `16*5`(%rsp),$inout5
-       movups  $inout3,`16*3`($out)
-       mov     $rnds_,$rounds                  # restore $rounds
-       movups  $inout4,`16*4`($out)
-       movups  $inout5,`16*5`($out)
-       lea     `16*6`($out),$out
+       lea     `16*6`($out),$out               # $out+=6*16
+       movups  $inout0,`-16*6`($out)           # store 6 output blocks
+       movups  $inout1,`-16*5`($out)
+       movups  $inout2,`-16*4`($out)
+       movups  $inout3,`-16*3`($out)
+       movups  $inout4,`-16*2`($out)
+       movups  $inout5,`-16*1`($out)
        sub     \$16*6,$len
-       jnc     .Lxts_enc_grandloop
+       jnc     .Lxts_enc_grandloop             # loop if $len-=6*16 didn't borrow
 
-       lea     3($rounds,$rounds),$rounds      # restore original value
+       mov     \$16+96,$rounds
+       sub     $rnds_,$rounds
        mov     $key_,$key                      # restore $key
-       mov     $rounds,$rnds_                  # backup $rounds
+       shr     \$4,$rounds                     # restore original value
 
 .Lxts_enc_short:
-       add     \$16*6,$len
-       jz      .Lxts_enc_done
+       # at the point @tweak[0..5] are populated with tweak values
+       mov     $rounds,$rnds_                  # backup $rounds
+       pxor    $rndkey0,@tweak[0]
+       add     \$16*6,$len                     # restore real remaining $len
+       jz      .Lxts_enc_done                  # done if ($len==0)
 
+       pxor    $rndkey0,@tweak[1]
        cmp     \$0x20,$len
-       jb      .Lxts_enc_one
-       je      .Lxts_enc_two
+       jb      .Lxts_enc_one                   # $len is 1*16
+       pxor    $rndkey0,@tweak[2]
+       je      .Lxts_enc_two                   # $len is 2*16
 
+       pxor    $rndkey0,@tweak[3]
        cmp     \$0x40,$len
-       jb      .Lxts_enc_three
-       je      .Lxts_enc_four
-
-       pshufd  \$0x13,$twtmp,$twres
-       movdqa  @tweak[5],@tweak[4]
-       paddq   @tweak[5],@tweak[5]             # psllq 1,$tweak
-        movdqu ($inp),$inout0
-       pand    $twmask,$twres                  # isolate carry and residue
-        movdqu 16*1($inp),$inout1
-       pxor    $twres,@tweak[5]
+       jb      .Lxts_enc_three                 # $len is 3*16
+       pxor    $rndkey0,@tweak[4]
+       je      .Lxts_enc_four                  # $len is 4*16
 
+       movdqu  ($inp),$inout0                  # $len is 5*16
+       movdqu  16*1($inp),$inout1
        movdqu  16*2($inp),$inout2
        pxor    @tweak[0],$inout0
        movdqu  16*3($inp),$inout3
        pxor    @tweak[1],$inout1
        movdqu  16*4($inp),$inout4
-       lea     16*5($inp),$inp
+       lea     16*5($inp),$inp                 # $inp+=5*16
        pxor    @tweak[2],$inout2
        pxor    @tweak[3],$inout3
        pxor    @tweak[4],$inout4
+       pxor    $inout5,$inout5
 
        call    _aesni_encrypt6
 
@@ -1570,46 +2023,46 @@ $code.=<<___;
        movdqa  @tweak[5],@tweak[0]
        xorps   @tweak[1],$inout1
        xorps   @tweak[2],$inout2
-       movdqu  $inout0,($out)
+       movdqu  $inout0,($out)                  # store 5 output blocks
        xorps   @tweak[3],$inout3
        movdqu  $inout1,16*1($out)
        xorps   @tweak[4],$inout4
        movdqu  $inout2,16*2($out)
        movdqu  $inout3,16*3($out)
        movdqu  $inout4,16*4($out)
-       lea     16*5($out),$out
+       lea     16*5($out),$out                 # $out+=5*16
        jmp     .Lxts_enc_done
 
 .align 16
 .Lxts_enc_one:
        movups  ($inp),$inout0
-       lea     16*1($inp),$inp
+       lea     16*1($inp),$inp                 # inp+=1*16
        xorps   @tweak[0],$inout0
 ___
        &aesni_generate1("enc",$key,$rounds);
 $code.=<<___;
        xorps   @tweak[0],$inout0
        movdqa  @tweak[1],@tweak[0]
-       movups  $inout0,($out)
-       lea     16*1($out),$out
+       movups  $inout0,($out)                  # store one output block
+       lea     16*1($out),$out                 # $out+=1*16
        jmp     .Lxts_enc_done
 
 .align 16
 .Lxts_enc_two:
        movups  ($inp),$inout0
        movups  16($inp),$inout1
-       lea     32($inp),$inp
+       lea     32($inp),$inp                   # $inp+=2*16
        xorps   @tweak[0],$inout0
        xorps   @tweak[1],$inout1
 
-       call    _aesni_encrypt3
+       call    _aesni_encrypt2
 
        xorps   @tweak[0],$inout0
        movdqa  @tweak[2],@tweak[0]
        xorps   @tweak[1],$inout1
-       movups  $inout0,($out)
+       movups  $inout0,($out)                  # store 2 output blocks
        movups  $inout1,16*1($out)
-       lea     16*2($out),$out
+       lea     16*2($out),$out                 # $out+=2*16
        jmp     .Lxts_enc_done
 
 .align 16
@@ -1617,7 +2070,7 @@ $code.=<<___;
        movups  ($inp),$inout0
        movups  16*1($inp),$inout1
        movups  16*2($inp),$inout2
-       lea     16*3($inp),$inp
+       lea     16*3($inp),$inp                 # $inp+=3*16
        xorps   @tweak[0],$inout0
        xorps   @tweak[1],$inout1
        xorps   @tweak[2],$inout2
@@ -1628,10 +2081,10 @@ $code.=<<___;
        movdqa  @tweak[3],@tweak[0]
        xorps   @tweak[1],$inout1
        xorps   @tweak[2],$inout2
-       movups  $inout0,($out)
+       movups  $inout0,($out)                  # store 3 output blocks
        movups  $inout1,16*1($out)
        movups  $inout2,16*2($out)
-       lea     16*3($out),$out
+       lea     16*3($out),$out                 # $out+=3*16
        jmp     .Lxts_enc_done
 
 .align 16
@@ -1641,28 +2094,28 @@ $code.=<<___;
        movups  16*2($inp),$inout2
        xorps   @tweak[0],$inout0
        movups  16*3($inp),$inout3
-       lea     16*4($inp),$inp
+       lea     16*4($inp),$inp                 # $inp+=4*16
        xorps   @tweak[1],$inout1
        xorps   @tweak[2],$inout2
        xorps   @tweak[3],$inout3
 
        call    _aesni_encrypt4
 
-       xorps   @tweak[0],$inout0
-       movdqa  @tweak[5],@tweak[0]
-       xorps   @tweak[1],$inout1
-       xorps   @tweak[2],$inout2
-       movups  $inout0,($out)
-       xorps   @tweak[3],$inout3
-       movups  $inout1,16*1($out)
-       movups  $inout2,16*2($out)
-       movups  $inout3,16*3($out)
-       lea     16*4($out),$out
+       pxor    @tweak[0],$inout0
+       movdqa  @tweak[4],@tweak[0]
+       pxor    @tweak[1],$inout1
+       pxor    @tweak[2],$inout2
+       movdqu  $inout0,($out)                  # store 4 output blocks
+       pxor    @tweak[3],$inout3
+       movdqu  $inout1,16*1($out)
+       movdqu  $inout2,16*2($out)
+       movdqu  $inout3,16*3($out)
+       lea     16*4($out),$out                 # $out+=4*16
        jmp     .Lxts_enc_done
 
 .align 16
 .Lxts_enc_done:
-       and     \$15,$len_
+       and     \$15,$len_                      # see if $len%16 is 0
        jz      .Lxts_enc_ret
        mov     $len_,$len
 
@@ -1689,21 +2142,64 @@ $code.=<<___;
        movups  $inout0,-16($out)
 
 .Lxts_enc_ret:
+       xorps   %xmm0,%xmm0                     # clear register bank
+       pxor    %xmm1,%xmm1
+       pxor    %xmm2,%xmm2
+       pxor    %xmm3,%xmm3
+       pxor    %xmm4,%xmm4
+       pxor    %xmm5,%xmm5
+___
+$code.=<<___ if (!$win64);
+       pxor    %xmm6,%xmm6
+       pxor    %xmm7,%xmm7
+       movaps  %xmm0,0x00(%rsp)                # clear stack
+       pxor    %xmm8,%xmm8
+       movaps  %xmm0,0x10(%rsp)
+       pxor    %xmm9,%xmm9
+       movaps  %xmm0,0x20(%rsp)
+       pxor    %xmm10,%xmm10
+       movaps  %xmm0,0x30(%rsp)
+       pxor    %xmm11,%xmm11
+       movaps  %xmm0,0x40(%rsp)
+       pxor    %xmm12,%xmm12
+       movaps  %xmm0,0x50(%rsp)
+       pxor    %xmm13,%xmm13
+       movaps  %xmm0,0x60(%rsp)
+       pxor    %xmm14,%xmm14
+       pxor    %xmm15,%xmm15
 ___
 $code.=<<___ if ($win64);
-       movaps  0x60(%rsp),%xmm6
-       movaps  0x70(%rsp),%xmm7
-       movaps  0x80(%rsp),%xmm8
-       movaps  0x90(%rsp),%xmm9
-       movaps  0xa0(%rsp),%xmm10
-       movaps  0xb0(%rsp),%xmm11
-       movaps  0xc0(%rsp),%xmm12
-       movaps  0xd0(%rsp),%xmm13
-       movaps  0xe0(%rsp),%xmm14
-       movaps  0xf0(%rsp),%xmm15
+       movaps  -0xa0(%rbp),%xmm6
+       movaps  %xmm0,-0xa0(%rbp)               # clear stack
+       movaps  -0x90(%rbp),%xmm7
+       movaps  %xmm0,-0x90(%rbp)
+       movaps  -0x80(%rbp),%xmm8
+       movaps  %xmm0,-0x80(%rbp)
+       movaps  -0x70(%rbp),%xmm9
+       movaps  %xmm0,-0x70(%rbp)
+       movaps  -0x60(%rbp),%xmm10
+       movaps  %xmm0,-0x60(%rbp)
+       movaps  -0x50(%rbp),%xmm11
+       movaps  %xmm0,-0x50(%rbp)
+       movaps  -0x40(%rbp),%xmm12
+       movaps  %xmm0,-0x40(%rbp)
+       movaps  -0x30(%rbp),%xmm13
+       movaps  %xmm0,-0x30(%rbp)
+       movaps  -0x20(%rbp),%xmm14
+       movaps  %xmm0,-0x20(%rbp)
+       movaps  -0x10(%rbp),%xmm15
+       movaps  %xmm0,-0x10(%rbp)
+       movaps  %xmm0,0x00(%rsp)
+       movaps  %xmm0,0x10(%rsp)
+       movaps  %xmm0,0x20(%rsp)
+       movaps  %xmm0,0x30(%rsp)
+       movaps  %xmm0,0x40(%rsp)
+       movaps  %xmm0,0x50(%rsp)
+       movaps  %xmm0,0x60(%rsp)
 ___
 $code.=<<___;
-       lea     $frame_size(%rsp),%rsp
+       lea     (%rbp),%rsp
+       pop     %rbp
 .Lxts_enc_epilogue:
        ret
 .size  aesni_xts_encrypt,.-aesni_xts_encrypt
@@ -1714,28 +2210,32 @@ $code.=<<___;
 .type  aesni_xts_decrypt,\@function,6
 .align 16
 aesni_xts_decrypt:
-       lea     -$frame_size(%rsp),%rsp
+       lea     (%rsp),%rax
+       push    %rbp
+       sub     \$$frame_size,%rsp
+       and     \$-16,%rsp      # Linux kernel stack can be incorrectly seeded
 ___
 $code.=<<___ if ($win64);
-       movaps  %xmm6,0x60(%rsp)
-       movaps  %xmm7,0x70(%rsp)
-       movaps  %xmm8,0x80(%rsp)
-       movaps  %xmm9,0x90(%rsp)
-       movaps  %xmm10,0xa0(%rsp)
-       movaps  %xmm11,0xb0(%rsp)
-       movaps  %xmm12,0xc0(%rsp)
-       movaps  %xmm13,0xd0(%rsp)
-       movaps  %xmm14,0xe0(%rsp)
-       movaps  %xmm15,0xf0(%rsp)
+       movaps  %xmm6,-0xa8(%rax)               # offload everything
+       movaps  %xmm7,-0x98(%rax)
+       movaps  %xmm8,-0x88(%rax)
+       movaps  %xmm9,-0x78(%rax)
+       movaps  %xmm10,-0x68(%rax)
+       movaps  %xmm11,-0x58(%rax)
+       movaps  %xmm12,-0x48(%rax)
+       movaps  %xmm13,-0x38(%rax)
+       movaps  %xmm14,-0x28(%rax)
+       movaps  %xmm15,-0x18(%rax)
 .Lxts_dec_body:
 ___
 $code.=<<___;
-       movups  ($ivp),@tweak[5]                # load clear-text tweak
+       lea     -8(%rax),%rbp
+       movups  ($ivp),$inout0                  # load clear-text tweak
        mov     240($key2),$rounds              # key2->rounds
        mov     240($key),$rnds_                # key1->rounds
 ___
        # generate the tweak
-       &aesni_generate1("enc",$key2,$rounds,@tweak[5]);
+       &aesni_generate1("enc",$key2,$rounds,$inout0);
 $code.=<<___;
        xor     %eax,%eax                       # if ($len%16) len-=16;
        test    \$15,$len
@@ -1743,219 +2243,256 @@ $code.=<<___;
        shl     \$4,%rax
        sub     %rax,$len
 
+       $movkey ($key),$rndkey0                 # zero round key
        mov     $key,$key_                      # backup $key
        mov     $rnds_,$rounds                  # backup $rounds
+       shl     \$4,$rnds_
        mov     $len,$len_                      # backup $len
        and     \$-16,$len
 
+       $movkey 16($key,$rnds_),$rndkey1        # last round key
+
        movdqa  .Lxts_magic(%rip),$twmask
-       pxor    $twtmp,$twtmp
-       pcmpgtd @tweak[5],$twtmp                # broadcast upper bits
+       movdqa  $inout0,@tweak[5]
+       pshufd  \$0x5f,$inout0,$twres
+       pxor    $rndkey0,$rndkey1
 ___
     for ($i=0;$i<4;$i++) {
     $code.=<<___;
-       pshufd  \$0x13,$twtmp,$twres
-       pxor    $twtmp,$twtmp
+       movdqa  $twres,$twtmp
+       paddd   $twres,$twres
        movdqa  @tweak[5],@tweak[$i]
-       paddq   @tweak[5],@tweak[5]             # psllq 1,$tweak
-       pand    $twmask,$twres                  # isolate carry and residue
-       pcmpgtd @tweak[5],$twtmp                # broadcat upper bits
-       pxor    $twres,@tweak[5]
+       psrad   \$31,$twtmp                     # broadcast upper bits
+       paddq   @tweak[5],@tweak[5]
+       pand    $twmask,$twtmp
+       pxor    $rndkey0,@tweak[$i]
+       pxor    $twtmp,@tweak[5]
 ___
     }
 $code.=<<___;
+       movdqa  @tweak[5],@tweak[4]
+       psrad   \$31,$twres
+       paddq   @tweak[5],@tweak[5]
+       pand    $twmask,$twres
+       pxor    $rndkey0,@tweak[4]
+       pxor    $twres,@tweak[5]
+       movaps  $rndkey1,0x60(%rsp)             # save round[0]^round[last]
+
        sub     \$16*6,$len
-       jc      .Lxts_dec_short
+       jc      .Lxts_dec_short                 # if $len-=6*16 borrowed
 
-       shr     \$1,$rounds
-       sub     \$1,$rounds
-       mov     $rounds,$rnds_
+       mov     \$16+96,$rounds
+       lea     32($key_,$rnds_),$key           # end of key schedule
+       sub     %r10,%rax                       # twisted $rounds
+       $movkey 16($key_),$rndkey1
+       mov     %rax,%r10                       # backup twisted $rounds
+       lea     .Lxts_magic(%rip),%r8
        jmp     .Lxts_dec_grandloop
 
-.align 16
+.align 32
 .Lxts_dec_grandloop:
-       pshufd  \$0x13,$twtmp,$twres
-       movdqa  @tweak[5],@tweak[4]
-       paddq   @tweak[5],@tweak[5]             # psllq 1,$tweak
        movdqu  `16*0`($inp),$inout0            # load input
-       pand    $twmask,$twres                  # isolate carry and residue
+       movdqa  $rndkey0,$twmask
        movdqu  `16*1`($inp),$inout1
-       pxor    $twres,@tweak[5]
-
+       pxor    @tweak[0],$inout0               # intput^=tweak^round[0]
        movdqu  `16*2`($inp),$inout2
-       pxor    @tweak[0],$inout0               # input^=tweak
-       movdqu  `16*3`($inp),$inout3
        pxor    @tweak[1],$inout1
-       movdqu  `16*4`($inp),$inout4
+        aesdec         $rndkey1,$inout0
+       movdqu  `16*3`($inp),$inout3
        pxor    @tweak[2],$inout2
-       movdqu  `16*5`($inp),$inout5
-       lea     `16*6`($inp),$inp
+        aesdec         $rndkey1,$inout1
+       movdqu  `16*4`($inp),$inout4
        pxor    @tweak[3],$inout3
-       $movkey         ($key_),$rndkey0
+        aesdec         $rndkey1,$inout2
+       movdqu  `16*5`($inp),$inout5
+       pxor    @tweak[5],$twmask               # round[0]^=tweak[5]
+        movdqa 0x60(%rsp),$twres               # load round[0]^round[last]
        pxor    @tweak[4],$inout4
-       pxor    @tweak[5],$inout5
+        aesdec         $rndkey1,$inout3
+       $movkey 32($key_),$rndkey0
+       lea     `16*6`($inp),$inp
+       pxor    $twmask,$inout5
 
-       # inline _aesni_decrypt6 and interleave first and last rounds
-       # with own code...
-       $movkey         16($key_),$rndkey1
-       pxor            $rndkey0,$inout0
-       pxor            $rndkey0,$inout1
-        movdqa @tweak[0],`16*0`(%rsp)          # put aside tweaks
-       aesdec          $rndkey1,$inout0
-       lea             32($key_),$key
-       pxor            $rndkey0,$inout2
-        movdqa @tweak[1],`16*1`(%rsp)
-       aesdec          $rndkey1,$inout1
-       pxor            $rndkey0,$inout3
-        movdqa @tweak[2],`16*2`(%rsp)
-       aesdec          $rndkey1,$inout2
-       pxor            $rndkey0,$inout4
-        movdqa @tweak[3],`16*3`(%rsp)
-       aesdec          $rndkey1,$inout3
-       pxor            $rndkey0,$inout5
-       $movkey         ($key),$rndkey0
-       dec             $rounds
-        movdqa @tweak[4],`16*4`(%rsp)
+        pxor   $twres,@tweak[0]                # calclulate tweaks^round[last]
        aesdec          $rndkey1,$inout4
-        movdqa @tweak[5],`16*5`(%rsp)
+        pxor   $twres,@tweak[1]
+        movdqa @tweak[0],`16*0`(%rsp)          # put aside tweaks^last round key
        aesdec          $rndkey1,$inout5
-       pxor    $twtmp,$twtmp
-       pcmpgtd @tweak[5],$twtmp
-       jmp             .Lxts_dec_loop6_enter
+       $movkey         48($key_),$rndkey1
+        pxor   $twres,@tweak[2]
 
-.align 16
+       aesdec          $rndkey0,$inout0
+        pxor   $twres,@tweak[3]
+        movdqa @tweak[1],`16*1`(%rsp)
+       aesdec          $rndkey0,$inout1
+        pxor   $twres,@tweak[4]
+        movdqa @tweak[2],`16*2`(%rsp)
+       aesdec          $rndkey0,$inout2
+       aesdec          $rndkey0,$inout3
+        pxor   $twres,$twmask
+        movdqa @tweak[4],`16*4`(%rsp)
+       aesdec          $rndkey0,$inout4
+       aesdec          $rndkey0,$inout5
+       $movkey         64($key_),$rndkey0
+        movdqa $twmask,`16*5`(%rsp)
+       pshufd  \$0x5f,@tweak[5],$twres
+       jmp     .Lxts_dec_loop6
+.align 32
 .Lxts_dec_loop6:
        aesdec          $rndkey1,$inout0
        aesdec          $rndkey1,$inout1
-       dec             $rounds
        aesdec          $rndkey1,$inout2
        aesdec          $rndkey1,$inout3
        aesdec          $rndkey1,$inout4
        aesdec          $rndkey1,$inout5
-.Lxts_dec_loop6_enter:
-       $movkey         16($key),$rndkey1
+       $movkey         -64($key,%rax),$rndkey1
+       add             \$32,%rax
+
        aesdec          $rndkey0,$inout0
        aesdec          $rndkey0,$inout1
-       lea             32($key),$key
        aesdec          $rndkey0,$inout2
        aesdec          $rndkey0,$inout3
        aesdec          $rndkey0,$inout4
        aesdec          $rndkey0,$inout5
-       $movkey         ($key),$rndkey0
+       $movkey         -80($key,%rax),$rndkey0
        jnz             .Lxts_dec_loop6
 
-       pshufd  \$0x13,$twtmp,$twres
-       pxor    $twtmp,$twtmp
-       paddq   @tweak[5],@tweak[5]             # psllq 1,$tweak
+       movdqa  (%r8),$twmask                   # start calculating next tweak
+       movdqa  $twres,$twtmp
+       paddd   $twres,$twres
         aesdec         $rndkey1,$inout0
-       pand    $twmask,$twres                  # isolate carry and residue
+       paddq   @tweak[5],@tweak[5]
+       psrad   \$31,$twtmp
         aesdec         $rndkey1,$inout1
-       pcmpgtd @tweak[5],$twtmp                # broadcast upper bits
+       pand    $twmask,$twtmp
+       $movkey ($key_),@tweak[0]               # load round[0]
         aesdec         $rndkey1,$inout2
-       pxor    $twres,@tweak[5]
         aesdec         $rndkey1,$inout3
         aesdec         $rndkey1,$inout4
+       pxor    $twtmp,@tweak[5]
+       movaps  @tweak[0],@tweak[1]             # copy round[0]
         aesdec         $rndkey1,$inout5
-        $movkey        16($key),$rndkey1
+        $movkey        -64($key),$rndkey1
 
-       pshufd  \$0x13,$twtmp,$twres
-       pxor    $twtmp,$twtmp
-       movdqa  @tweak[5],@tweak[0]
-       paddq   @tweak[5],@tweak[5]             # psllq 1,$tweak
+       movdqa  $twres,$twtmp
         aesdec         $rndkey0,$inout0
-       pand    $twmask,$twres                  # isolate carry and residue
+       paddd   $twres,$twres
+       pxor    @tweak[5],@tweak[0]
         aesdec         $rndkey0,$inout1
-       pcmpgtd @tweak[5],$twtmp                # broadcat upper bits
+       psrad   \$31,$twtmp
+       paddq   @tweak[5],@tweak[5]
         aesdec         $rndkey0,$inout2
-       pxor    $twres,@tweak[5]
         aesdec         $rndkey0,$inout3
+       pand    $twmask,$twtmp
+       movaps  @tweak[1],@tweak[2]
         aesdec         $rndkey0,$inout4
+       pxor    $twtmp,@tweak[5]
+       movdqa  $twres,$twtmp
         aesdec         $rndkey0,$inout5
-        $movkey        32($key),$rndkey0
+        $movkey        -48($key),$rndkey0
 
-       pshufd  \$0x13,$twtmp,$twres
-       pxor    $twtmp,$twtmp
-       movdqa  @tweak[5],@tweak[1]
-       paddq   @tweak[5],@tweak[5]             # psllq 1,$tweak
+       paddd   $twres,$twres
         aesdec         $rndkey1,$inout0
-       pand    $twmask,$twres                  # isolate carry and residue
+       pxor    @tweak[5],@tweak[1]
+       psrad   \$31,$twtmp
         aesdec         $rndkey1,$inout1
-       pcmpgtd @tweak[5],$twtmp                # broadcat upper bits
+       paddq   @tweak[5],@tweak[5]
+       pand    $twmask,$twtmp
         aesdec         $rndkey1,$inout2
-       pxor    $twres,@tweak[5]
         aesdec         $rndkey1,$inout3
+        movdqa @tweak[3],`16*3`(%rsp)
+       pxor    $twtmp,@tweak[5]
         aesdec         $rndkey1,$inout4
+       movaps  @tweak[2],@tweak[3]
+       movdqa  $twres,$twtmp
         aesdec         $rndkey1,$inout5
+        $movkey        -32($key),$rndkey1
 
-       pshufd  \$0x13,$twtmp,$twres
-       pxor    $twtmp,$twtmp
-       movdqa  @tweak[5],@tweak[2]
-       paddq   @tweak[5],@tweak[5]             # psllq 1,$tweak
-        aesdeclast     $rndkey0,$inout0
-       pand    $twmask,$twres                  # isolate carry and residue
-        aesdeclast     $rndkey0,$inout1
-       pcmpgtd @tweak[5],$twtmp                # broadcat upper bits
-        aesdeclast     $rndkey0,$inout2
-       pxor    $twres,@tweak[5]
-        aesdeclast     $rndkey0,$inout3
-        aesdeclast     $rndkey0,$inout4
-        aesdeclast     $rndkey0,$inout5
-
-       pshufd  \$0x13,$twtmp,$twres
-       pxor    $twtmp,$twtmp
-       movdqa  @tweak[5],@tweak[3]
-       paddq   @tweak[5],@tweak[5]             # psllq 1,$tweak
-        xorps  `16*0`(%rsp),$inout0            # output^=tweak
-       pand    $twmask,$twres                  # isolate carry and residue
-        xorps  `16*1`(%rsp),$inout1
-       pcmpgtd @tweak[5],$twtmp                # broadcat upper bits
+       paddd   $twres,$twres
+        aesdec         $rndkey0,$inout0
+       pxor    @tweak[5],@tweak[2]
+       psrad   \$31,$twtmp
+        aesdec         $rndkey0,$inout1
+       paddq   @tweak[5],@tweak[5]
+       pand    $twmask,$twtmp
+        aesdec         $rndkey0,$inout2
+        aesdec         $rndkey0,$inout3
+        aesdec         $rndkey0,$inout4
+       pxor    $twtmp,@tweak[5]
+       movaps  @tweak[3],@tweak[4]
+        aesdec         $rndkey0,$inout5
+
+       movdqa  $twres,$rndkey0
+       paddd   $twres,$twres
+        aesdec         $rndkey1,$inout0
+       pxor    @tweak[5],@tweak[3]
+       psrad   \$31,$rndkey0
+        aesdec         $rndkey1,$inout1
+       paddq   @tweak[5],@tweak[5]
+       pand    $twmask,$rndkey0
+        aesdec         $rndkey1,$inout2
+        aesdec         $rndkey1,$inout3
+       pxor    $rndkey0,@tweak[5]
+       $movkey         ($key_),$rndkey0
+        aesdec         $rndkey1,$inout4
+        aesdec         $rndkey1,$inout5
+       $movkey         16($key_),$rndkey1
+
+       pxor    @tweak[5],@tweak[4]
+        aesdeclast     `16*0`(%rsp),$inout0
+       psrad   \$31,$twres
+       paddq   @tweak[5],@tweak[5]
+        aesdeclast     `16*1`(%rsp),$inout1
+        aesdeclast     `16*2`(%rsp),$inout2
+       pand    $twmask,$twres
+       mov     %r10,%rax                       # restore $rounds
+        aesdeclast     `16*3`(%rsp),$inout3
+        aesdeclast     `16*4`(%rsp),$inout4
+        aesdeclast     `16*5`(%rsp),$inout5
        pxor    $twres,@tweak[5]
 
-       xorps   `16*2`(%rsp),$inout2
-       movups  $inout0,`16*0`($out)            # write output
-       xorps   `16*3`(%rsp),$inout3
-       movups  $inout1,`16*1`($out)
-       xorps   `16*4`(%rsp),$inout4
-       movups  $inout2,`16*2`($out)
-       xorps   `16*5`(%rsp),$inout5
-       movups  $inout3,`16*3`($out)
-       mov     $rnds_,$rounds                  # restore $rounds
-       movups  $inout4,`16*4`($out)
-       movups  $inout5,`16*5`($out)
-       lea     `16*6`($out),$out
+       lea     `16*6`($out),$out               # $out+=6*16
+       movups  $inout0,`-16*6`($out)           # store 6 output blocks
+       movups  $inout1,`-16*5`($out)
+       movups  $inout2,`-16*4`($out)
+       movups  $inout3,`-16*3`($out)
+       movups  $inout4,`-16*2`($out)
+       movups  $inout5,`-16*1`($out)
        sub     \$16*6,$len
-       jnc     .Lxts_dec_grandloop
+       jnc     .Lxts_dec_grandloop             # loop if $len-=6*16 didn't borrow
 
-       lea     3($rounds,$rounds),$rounds      # restore original value
+       mov     \$16+96,$rounds
+       sub     $rnds_,$rounds
        mov     $key_,$key                      # restore $key
-       mov     $rounds,$rnds_                  # backup $rounds
+       shr     \$4,$rounds                     # restore original value
 
 .Lxts_dec_short:
-       add     \$16*6,$len
-       jz      .Lxts_dec_done
+       # at the point @tweak[0..5] are populated with tweak values
+       mov     $rounds,$rnds_                  # backup $rounds
+       pxor    $rndkey0,@tweak[0]
+       pxor    $rndkey0,@tweak[1]
+       add     \$16*6,$len                     # restore real remaining $len
+       jz      .Lxts_dec_done                  # done if ($len==0)
 
+       pxor    $rndkey0,@tweak[2]
        cmp     \$0x20,$len
-       jb      .Lxts_dec_one
-       je      .Lxts_dec_two
+       jb      .Lxts_dec_one                   # $len is 1*16
+       pxor    $rndkey0,@tweak[3]
+       je      .Lxts_dec_two                   # $len is 2*16
 
+       pxor    $rndkey0,@tweak[4]
        cmp     \$0x40,$len
-       jb      .Lxts_dec_three
-       je      .Lxts_dec_four
-
-       pshufd  \$0x13,$twtmp,$twres
-       movdqa  @tweak[5],@tweak[4]
-       paddq   @tweak[5],@tweak[5]             # psllq 1,$tweak
-        movdqu ($inp),$inout0
-       pand    $twmask,$twres                  # isolate carry and residue
-        movdqu 16*1($inp),$inout1
-       pxor    $twres,@tweak[5]
+       jb      .Lxts_dec_three                 # $len is 3*16
+       je      .Lxts_dec_four                  # $len is 4*16
 
+       movdqu  ($inp),$inout0                  # $len is 5*16
+       movdqu  16*1($inp),$inout1
        movdqu  16*2($inp),$inout2
        pxor    @tweak[0],$inout0
        movdqu  16*3($inp),$inout3
        pxor    @tweak[1],$inout1
        movdqu  16*4($inp),$inout4
-       lea     16*5($inp),$inp
+       lea     16*5($inp),$inp                 # $inp+=5*16
        pxor    @tweak[2],$inout2
        pxor    @tweak[3],$inout3
        pxor    @tweak[4],$inout4
@@ -1965,7 +2502,7 @@ $code.=<<___;
        xorps   @tweak[0],$inout0
        xorps   @tweak[1],$inout1
        xorps   @tweak[2],$inout2
-       movdqu  $inout0,($out)
+       movdqu  $inout0,($out)                  # store 5 output blocks
        xorps   @tweak[3],$inout3
        movdqu  $inout1,16*1($out)
        xorps   @tweak[4],$inout4
@@ -1974,7 +2511,7 @@ $code.=<<___;
        movdqu  $inout3,16*3($out)
         pcmpgtd        @tweak[5],$twtmp
        movdqu  $inout4,16*4($out)
-       lea     16*5($out),$out
+       lea     16*5($out),$out                 # $out+=5*16
         pshufd         \$0x13,$twtmp,@tweak[1] # $twres
        and     \$15,$len_
        jz      .Lxts_dec_ret
@@ -1988,35 +2525,35 @@ $code.=<<___;
 .align 16
 .Lxts_dec_one:
        movups  ($inp),$inout0
-       lea     16*1($inp),$inp
+       lea     16*1($inp),$inp                 # $inp+=1*16
        xorps   @tweak[0],$inout0
 ___
        &aesni_generate1("dec",$key,$rounds);
 $code.=<<___;
        xorps   @tweak[0],$inout0
        movdqa  @tweak[1],@tweak[0]
-       movups  $inout0,($out)
+       movups  $inout0,($out)                  # store one output block
        movdqa  @tweak[2],@tweak[1]
-       lea     16*1($out),$out
+       lea     16*1($out),$out                 # $out+=1*16
        jmp     .Lxts_dec_done
 
 .align 16
 .Lxts_dec_two:
        movups  ($inp),$inout0
        movups  16($inp),$inout1
-       lea     32($inp),$inp
+       lea     32($inp),$inp                   # $inp+=2*16
        xorps   @tweak[0],$inout0
        xorps   @tweak[1],$inout1
 
-       call    _aesni_decrypt3
+       call    _aesni_decrypt2
 
        xorps   @tweak[0],$inout0
        movdqa  @tweak[2],@tweak[0]
        xorps   @tweak[1],$inout1
        movdqa  @tweak[3],@tweak[1]
-       movups  $inout0,($out)
+       movups  $inout0,($out)                  # store 2 output blocks
        movups  $inout1,16*1($out)
-       lea     16*2($out),$out
+       lea     16*2($out),$out                 # $out+=2*16
        jmp     .Lxts_dec_done
 
 .align 16
@@ -2024,7 +2561,7 @@ $code.=<<___;
        movups  ($inp),$inout0
        movups  16*1($inp),$inout1
        movups  16*2($inp),$inout2
-       lea     16*3($inp),$inp
+       lea     16*3($inp),$inp                 # $inp+=3*16
        xorps   @tweak[0],$inout0
        xorps   @tweak[1],$inout1
        xorps   @tweak[2],$inout2
@@ -2034,50 +2571,44 @@ $code.=<<___;
        xorps   @tweak[0],$inout0
        movdqa  @tweak[3],@tweak[0]
        xorps   @tweak[1],$inout1
-       movdqa  @tweak[5],@tweak[1]
+       movdqa  @tweak[4],@tweak[1]
        xorps   @tweak[2],$inout2
-       movups  $inout0,($out)
+       movups  $inout0,($out)                  # store 3 output blocks
        movups  $inout1,16*1($out)
        movups  $inout2,16*2($out)
-       lea     16*3($out),$out
+       lea     16*3($out),$out                 # $out+=3*16
        jmp     .Lxts_dec_done
 
 .align 16
 .Lxts_dec_four:
-       pshufd  \$0x13,$twtmp,$twres
-       movdqa  @tweak[5],@tweak[4]
-       paddq   @tweak[5],@tweak[5]             # psllq 1,$tweak
-        movups ($inp),$inout0
-       pand    $twmask,$twres                  # isolate carry and residue
-        movups 16*1($inp),$inout1
-       pxor    $twres,@tweak[5]
-
+       movups  ($inp),$inout0
+       movups  16*1($inp),$inout1
        movups  16*2($inp),$inout2
        xorps   @tweak[0],$inout0
        movups  16*3($inp),$inout3
-       lea     16*4($inp),$inp
+       lea     16*4($inp),$inp                 # $inp+=4*16
        xorps   @tweak[1],$inout1
        xorps   @tweak[2],$inout2
        xorps   @tweak[3],$inout3
 
        call    _aesni_decrypt4
 
-       xorps   @tweak[0],$inout0
+       pxor    @tweak[0],$inout0
        movdqa  @tweak[4],@tweak[0]
-       xorps   @tweak[1],$inout1
+       pxor    @tweak[1],$inout1
        movdqa  @tweak[5],@tweak[1]
-       xorps   @tweak[2],$inout2
-       movups  $inout0,($out)
-       xorps   @tweak[3],$inout3
-       movups  $inout1,16*1($out)
-       movups  $inout2,16*2($out)
-       movups  $inout3,16*3($out)
-       lea     16*4($out),$out
+       pxor    @tweak[2],$inout2
+       movdqu  $inout0,($out)                  # store 4 output blocks
+       pxor    @tweak[3],$inout3
+       movdqu  $inout1,16*1($out)
+       movdqu  $inout2,16*2($out)
+       movdqu  $inout3,16*3($out)
+       lea     16*4($out),$out                 # $out+=4*16
        jmp     .Lxts_dec_done
 
 .align 16
 .Lxts_dec_done:
-       and     \$15,$len_
+       and     \$15,$len_                      # see if $len%16 is 0
        jz      .Lxts_dec_ret
 .Lxts_dec_done2:
        mov     $len_,$len
@@ -2115,21 +2646,64 @@ $code.=<<___;
        movups  $inout0,($out)
 
 .Lxts_dec_ret:
+       xorps   %xmm0,%xmm0                     # clear register bank
+       pxor    %xmm1,%xmm1
+       pxor    %xmm2,%xmm2
+       pxor    %xmm3,%xmm3
+       pxor    %xmm4,%xmm4
+       pxor    %xmm5,%xmm5
+___
+$code.=<<___ if (!$win64);
+       pxor    %xmm6,%xmm6
+       pxor    %xmm7,%xmm7
+       movaps  %xmm0,0x00(%rsp)                # clear stack
+       pxor    %xmm8,%xmm8
+       movaps  %xmm0,0x10(%rsp)
+       pxor    %xmm9,%xmm9
+       movaps  %xmm0,0x20(%rsp)
+       pxor    %xmm10,%xmm10
+       movaps  %xmm0,0x30(%rsp)
+       pxor    %xmm11,%xmm11
+       movaps  %xmm0,0x40(%rsp)
+       pxor    %xmm12,%xmm12
+       movaps  %xmm0,0x50(%rsp)
+       pxor    %xmm13,%xmm13
+       movaps  %xmm0,0x60(%rsp)
+       pxor    %xmm14,%xmm14
+       pxor    %xmm15,%xmm15
 ___
 $code.=<<___ if ($win64);
-       movaps  0x60(%rsp),%xmm6
-       movaps  0x70(%rsp),%xmm7
-       movaps  0x80(%rsp),%xmm8
-       movaps  0x90(%rsp),%xmm9
-       movaps  0xa0(%rsp),%xmm10
-       movaps  0xb0(%rsp),%xmm11
-       movaps  0xc0(%rsp),%xmm12
-       movaps  0xd0(%rsp),%xmm13
-       movaps  0xe0(%rsp),%xmm14
-       movaps  0xf0(%rsp),%xmm15
+       movaps  -0xa0(%rbp),%xmm6
+       movaps  %xmm0,-0xa0(%rbp)               # clear stack
+       movaps  -0x90(%rbp),%xmm7
+       movaps  %xmm0,-0x90(%rbp)
+       movaps  -0x80(%rbp),%xmm8
+       movaps  %xmm0,-0x80(%rbp)
+       movaps  -0x70(%rbp),%xmm9
+       movaps  %xmm0,-0x70(%rbp)
+       movaps  -0x60(%rbp),%xmm10
+       movaps  %xmm0,-0x60(%rbp)
+       movaps  -0x50(%rbp),%xmm11
+       movaps  %xmm0,-0x50(%rbp)
+       movaps  -0x40(%rbp),%xmm12
+       movaps  %xmm0,-0x40(%rbp)
+       movaps  -0x30(%rbp),%xmm13
+       movaps  %xmm0,-0x30(%rbp)
+       movaps  -0x20(%rbp),%xmm14
+       movaps  %xmm0,-0x20(%rbp)
+       movaps  -0x10(%rbp),%xmm15
+       movaps  %xmm0,-0x10(%rbp)
+       movaps  %xmm0,0x00(%rsp)
+       movaps  %xmm0,0x10(%rsp)
+       movaps  %xmm0,0x20(%rsp)
+       movaps  %xmm0,0x30(%rsp)
+       movaps  %xmm0,0x40(%rsp)
+       movaps  %xmm0,0x50(%rsp)
+       movaps  %xmm0,0x60(%rsp)
 ___
 $code.=<<___;
-       lea     $frame_size(%rsp),%rsp
+       lea     (%rbp),%rsp
+       pop     %rbp
 .Lxts_dec_epilogue:
        ret
 .size  aesni_xts_decrypt,.-aesni_xts_decrypt
@@ -2141,7 +2715,10 @@ ___
 #                          size_t length, const AES_KEY *key,
 #                          unsigned char *ivp,const int enc);
 {
-my $reserved = $win64?0x40:-0x18;      # used in decrypt
+my $frame_size = 0x10 + ($win64?0xa0:0);       # used in decrypt
+my ($iv,$in0,$in1,$in2,$in3,$in4)=map("%xmm$_",(10..15));
+my $inp_=$key_;
+
 $code.=<<___;
 .globl ${PREFIX}_cbc_encrypt
 .type  ${PREFIX}_cbc_encrypt,\@function,6
@@ -2177,7 +2754,11 @@ $code.=<<___;
        jnc     .Lcbc_enc_loop
        add     \$16,$len
        jnz     .Lcbc_enc_tail
+        pxor   $rndkey0,$rndkey0       # clear register bank
+        pxor   $rndkey1,$rndkey1
        movups  $inout0,($ivp)
+        pxor   $inout0,$inout0
+        pxor   $inout1,$inout1
        jmp     .Lcbc_ret
 
 .Lcbc_enc_tail:
@@ -2197,283 +2778,483 @@ $code.=<<___;
 \f#--------------------------- CBC DECRYPT ------------------------------#
 .align 16
 .Lcbc_decrypt:
+       cmp     \$16,$len
+       jne     .Lcbc_decrypt_bulk
+
+       # handle single block without allocating stack frame,
+       # useful in ciphertext stealing mode
+       movdqu  ($inp),$inout0          # load input
+       movdqu  ($ivp),$inout1          # load iv
+       movdqa  $inout0,$inout2         # future iv
+___
+       &aesni_generate1("dec",$key,$rnds_);
+$code.=<<___;
+        pxor   $rndkey0,$rndkey0       # clear register bank
+        pxor   $rndkey1,$rndkey1
+       movdqu  $inout2,($ivp)          # store iv
+       xorps   $inout1,$inout0         # ^=iv
+        pxor   $inout1,$inout1
+       movups  $inout0,($out)          # store output
+        pxor   $inout0,$inout0
+       jmp     .Lcbc_ret
+.align 16
+.Lcbc_decrypt_bulk:
+       lea     (%rsp),%rax
+       push    %rbp
+       sub     \$$frame_size,%rsp
+       and     \$-16,%rsp      # Linux kernel stack can be incorrectly seeded
 ___
 $code.=<<___ if ($win64);
-       lea     -0x58(%rsp),%rsp
-       movaps  %xmm6,(%rsp)
-       movaps  %xmm7,0x10(%rsp)
-       movaps  %xmm8,0x20(%rsp)
-       movaps  %xmm9,0x30(%rsp)
+       movaps  %xmm6,0x10(%rsp)
+       movaps  %xmm7,0x20(%rsp)
+       movaps  %xmm8,0x30(%rsp)
+       movaps  %xmm9,0x40(%rsp)
+       movaps  %xmm10,0x50(%rsp)
+       movaps  %xmm11,0x60(%rsp)
+       movaps  %xmm12,0x70(%rsp)
+       movaps  %xmm13,0x80(%rsp)
+       movaps  %xmm14,0x90(%rsp)
+       movaps  %xmm15,0xa0(%rsp)
 .Lcbc_decrypt_body:
 ___
 $code.=<<___;
+       lea     -8(%rax),%rbp
        movups  ($ivp),$iv
        mov     $rnds_,$rounds
-       cmp     \$0x70,$len
+       cmp     \$0x50,$len
        jbe     .Lcbc_dec_tail
-       shr     \$1,$rnds_
-       sub     \$0x70,$len
-       mov     $rnds_,$rounds
-       movaps  $iv,$reserved(%rsp)
+
+       $movkey ($key),$rndkey0
+       movdqu  0x00($inp),$inout0      # load input
+       movdqu  0x10($inp),$inout1
+       movdqa  $inout0,$in0
+       movdqu  0x20($inp),$inout2
+       movdqa  $inout1,$in1
+       movdqu  0x30($inp),$inout3
+       movdqa  $inout2,$in2
+       movdqu  0x40($inp),$inout4
+       movdqa  $inout3,$in3
+       movdqu  0x50($inp),$inout5
+       movdqa  $inout4,$in4
+       mov     OPENSSL_ia32cap_P+4(%rip),%r9d
+       cmp     \$0x70,$len
+       jbe     .Lcbc_dec_six_or_seven
+
+       and     \$`1<<26|1<<22`,%r9d    # isolate XSAVE+MOVBE
+       sub     \$0x50,$len             # $len is biased by -5*16
+       cmp     \$`1<<22`,%r9d          # check for MOVBE without XSAVE
+       je      .Lcbc_dec_loop6_enter   # [which denotes Atom Silvermont]
+       sub     \$0x20,$len             # $len is biased by -7*16
+       lea     0x70($key),$key         # size optimization
        jmp     .Lcbc_dec_loop8_enter
 .align 16
 .Lcbc_dec_loop8:
-       movaps  $rndkey0,$reserved(%rsp)        # save IV
        movups  $inout7,($out)
        lea     0x10($out),$out
 .Lcbc_dec_loop8_enter:
-       $movkey         ($key),$rndkey0
-       movups  ($inp),$inout0                  # load input
-       movups  0x10($inp),$inout1
-       $movkey         16($key),$rndkey1
+       movdqu          0x60($inp),$inout6
+       pxor            $rndkey0,$inout0
+       movdqu          0x70($inp),$inout7
+       pxor            $rndkey0,$inout1
+       $movkey         0x10-0x70($key),$rndkey1
+       pxor            $rndkey0,$inout2
+       xor             $inp_,$inp_
+       cmp             \$0x70,$len     # is there at least 0x60 bytes ahead?
+       pxor            $rndkey0,$inout3
+       pxor            $rndkey0,$inout4
+       pxor            $rndkey0,$inout5
+       pxor            $rndkey0,$inout6
 
-       lea             32($key),$key
-       movdqu  0x20($inp),$inout2
-       xorps           $rndkey0,$inout0
-       movdqu  0x30($inp),$inout3
-       xorps           $rndkey0,$inout1
-       movdqu  0x40($inp),$inout4
        aesdec          $rndkey1,$inout0
-       pxor            $rndkey0,$inout2
-       movdqu  0x50($inp),$inout5
+       pxor            $rndkey0,$inout7
+       $movkey         0x20-0x70($key),$rndkey0
        aesdec          $rndkey1,$inout1
-       pxor            $rndkey0,$inout3
-       movdqu  0x60($inp),$inout6
        aesdec          $rndkey1,$inout2
-       pxor            $rndkey0,$inout4
-       movdqu  0x70($inp),$inout7
        aesdec          $rndkey1,$inout3
-       pxor            $rndkey0,$inout5
-       dec             $rounds
        aesdec          $rndkey1,$inout4
-       pxor            $rndkey0,$inout6
        aesdec          $rndkey1,$inout5
-       pxor            $rndkey0,$inout7
-       $movkey         ($key),$rndkey0
        aesdec          $rndkey1,$inout6
+       setnc           ${inp_}b
+       shl             \$7,$inp_
        aesdec          $rndkey1,$inout7
-       $movkey         16($key),$rndkey1
-
-       call            .Ldec_loop8_enter
+       add             $inp,$inp_
+       $movkey         0x30-0x70($key),$rndkey1
+___
+for($i=1;$i<12;$i++) {
+my $rndkeyx = ($i&1)?$rndkey0:$rndkey1;
+$code.=<<___   if ($i==7);
+       cmp             \$11,$rounds
+___
+$code.=<<___;
+       aesdec          $rndkeyx,$inout0
+       aesdec          $rndkeyx,$inout1
+       aesdec          $rndkeyx,$inout2
+       aesdec          $rndkeyx,$inout3
+       aesdec          $rndkeyx,$inout4
+       aesdec          $rndkeyx,$inout5
+       aesdec          $rndkeyx,$inout6
+       aesdec          $rndkeyx,$inout7
+       $movkey         `0x30+0x10*$i`-0x70($key),$rndkeyx
+___
+$code.=<<___   if ($i<6 || (!($i&1) && $i>7));
+       nop
+___
+$code.=<<___   if ($i==7);
+       jb              .Lcbc_dec_done
+___
+$code.=<<___   if ($i==9);
+       je              .Lcbc_dec_done
+___
+$code.=<<___   if ($i==11);
+       jmp             .Lcbc_dec_done
+___
+}
+$code.=<<___;
+.align 16
+.Lcbc_dec_done:
+       aesdec          $rndkey1,$inout0
+       aesdec          $rndkey1,$inout1
+       pxor            $rndkey0,$iv
+       pxor            $rndkey0,$in0
+       aesdec          $rndkey1,$inout2
+       aesdec          $rndkey1,$inout3
+       pxor            $rndkey0,$in1
+       pxor            $rndkey0,$in2
+       aesdec          $rndkey1,$inout4
+       aesdec          $rndkey1,$inout5
+       pxor            $rndkey0,$in3
+       pxor            $rndkey0,$in4
+       aesdec          $rndkey1,$inout6
+       aesdec          $rndkey1,$inout7
+       movdqu          0x50($inp),$rndkey1
+
+       aesdeclast      $iv,$inout0
+       movdqu          0x60($inp),$iv          # borrow $iv
+       pxor            $rndkey0,$rndkey1
+       aesdeclast      $in0,$inout1
+       pxor            $rndkey0,$iv
+       movdqu          0x70($inp),$rndkey0     # next IV
+       aesdeclast      $in1,$inout2
+       lea             0x80($inp),$inp
+       movdqu          0x00($inp_),$in0
+       aesdeclast      $in2,$inout3
+       aesdeclast      $in3,$inout4
+       movdqu          0x10($inp_),$in1
+       movdqu          0x20($inp_),$in2
+       aesdeclast      $in4,$inout5
+       aesdeclast      $rndkey1,$inout6
+       movdqu          0x30($inp_),$in3
+       movdqu          0x40($inp_),$in4
+       aesdeclast      $iv,$inout7
+       movdqa          $rndkey0,$iv            # return $iv
+       movdqu          0x50($inp_),$rndkey1
+       $movkey         -0x70($key),$rndkey0
+
+       movups          $inout0,($out)          # store output
+       movdqa          $in0,$inout0
+       movups          $inout1,0x10($out)
+       movdqa          $in1,$inout1
+       movups          $inout2,0x20($out)
+       movdqa          $in2,$inout2
+       movups          $inout3,0x30($out)
+       movdqa          $in3,$inout3
+       movups          $inout4,0x40($out)
+       movdqa          $in4,$inout4
+       movups          $inout5,0x50($out)
+       movdqa          $rndkey1,$inout5
+       movups          $inout6,0x60($out)
+       lea             0x70($out),$out
 
-       movups  ($inp),$rndkey1         # re-load input
-       movups  0x10($inp),$rndkey0
-       xorps   $reserved(%rsp),$inout0 # ^= IV
-       xorps   $rndkey1,$inout1
-       movups  0x20($inp),$rndkey1
-       xorps   $rndkey0,$inout2
-       movups  0x30($inp),$rndkey0
-       xorps   $rndkey1,$inout3
-       movups  0x40($inp),$rndkey1
-       xorps   $rndkey0,$inout4
-       movups  0x50($inp),$rndkey0
-       xorps   $rndkey1,$inout5
-       movups  0x60($inp),$rndkey1
-       xorps   $rndkey0,$inout6
-       movups  0x70($inp),$rndkey0     # IV
-       xorps   $rndkey1,$inout7
-       movups  $inout0,($out)
-       movups  $inout1,0x10($out)
-       movups  $inout2,0x20($out)
-       movups  $inout3,0x30($out)
-       mov     $rnds_,$rounds          # restore $rounds
-       movups  $inout4,0x40($out)
-       mov     $key_,$key              # restore $key
-       movups  $inout5,0x50($out)
-       lea     0x80($inp),$inp
-       movups  $inout6,0x60($out)
-       lea     0x70($out),$out
        sub     \$0x80,$len
        ja      .Lcbc_dec_loop8
 
        movaps  $inout7,$inout0
-       movaps  $rndkey0,$iv
+       lea     -0x70($key),$key
        add     \$0x70,$len
-       jle     .Lcbc_dec_tail_collected
-       movups  $inout0,($out)
-       lea     1($rnds_,$rnds_),$rounds
+       jle     .Lcbc_dec_clear_tail_collected
+       movups  $inout7,($out)
+       lea     0x10($out),$out
+       cmp     \$0x50,$len
+       jbe     .Lcbc_dec_tail
+
+       movaps  $in0,$inout0
+.Lcbc_dec_six_or_seven:
+       cmp     \$0x60,$len
+       ja      .Lcbc_dec_seven
+
+       movaps  $inout5,$inout6
+       call    _aesni_decrypt6
+       pxor    $iv,$inout0             # ^= IV
+       movaps  $inout6,$iv
+       pxor    $in0,$inout1
+       movdqu  $inout0,($out)
+       pxor    $in1,$inout2
+       movdqu  $inout1,0x10($out)
+        pxor   $inout1,$inout1         # clear register bank
+       pxor    $in2,$inout3
+       movdqu  $inout2,0x20($out)
+        pxor   $inout2,$inout2
+       pxor    $in3,$inout4
+       movdqu  $inout3,0x30($out)
+        pxor   $inout3,$inout3
+       pxor    $in4,$inout5
+       movdqu  $inout4,0x40($out)
+        pxor   $inout4,$inout4
+       lea     0x50($out),$out
+       movdqa  $inout5,$inout0
+        pxor   $inout5,$inout5
+       jmp     .Lcbc_dec_tail_collected
+
+.align 16
+.Lcbc_dec_seven:
+       movups  0x60($inp),$inout6
+       xorps   $inout7,$inout7
+       call    _aesni_decrypt8
+       movups  0x50($inp),$inout7
+       pxor    $iv,$inout0             # ^= IV
+       movups  0x60($inp),$iv
+       pxor    $in0,$inout1
+       movdqu  $inout0,($out)
+       pxor    $in1,$inout2
+       movdqu  $inout1,0x10($out)
+        pxor   $inout1,$inout1         # clear register bank
+       pxor    $in2,$inout3
+       movdqu  $inout2,0x20($out)
+        pxor   $inout2,$inout2
+       pxor    $in3,$inout4
+       movdqu  $inout3,0x30($out)
+        pxor   $inout3,$inout3
+       pxor    $in4,$inout5
+       movdqu  $inout4,0x40($out)
+        pxor   $inout4,$inout4
+       pxor    $inout7,$inout6
+       movdqu  $inout5,0x50($out)
+        pxor   $inout5,$inout5
+       lea     0x60($out),$out
+       movdqa  $inout6,$inout0
+        pxor   $inout6,$inout6
+        pxor   $inout7,$inout7
+       jmp     .Lcbc_dec_tail_collected
+
+.align 16
+.Lcbc_dec_loop6:
+       movups  $inout5,($out)
+       lea     0x10($out),$out
+       movdqu  0x00($inp),$inout0      # load input
+       movdqu  0x10($inp),$inout1
+       movdqa  $inout0,$in0
+       movdqu  0x20($inp),$inout2
+       movdqa  $inout1,$in1
+       movdqu  0x30($inp),$inout3
+       movdqa  $inout2,$in2
+       movdqu  0x40($inp),$inout4
+       movdqa  $inout3,$in3
+       movdqu  0x50($inp),$inout5
+       movdqa  $inout4,$in4
+.Lcbc_dec_loop6_enter:
+       lea     0x60($inp),$inp
+       movdqa  $inout5,$inout6
+
+       call    _aesni_decrypt6
+
+       pxor    $iv,$inout0             # ^= IV
+       movdqa  $inout6,$iv
+       pxor    $in0,$inout1
+       movdqu  $inout0,($out)
+       pxor    $in1,$inout2
+       movdqu  $inout1,0x10($out)
+       pxor    $in2,$inout3
+       movdqu  $inout2,0x20($out)
+       pxor    $in3,$inout4
+       mov     $key_,$key
+       movdqu  $inout3,0x30($out)
+       pxor    $in4,$inout5
+       mov     $rnds_,$rounds
+       movdqu  $inout4,0x40($out)
+       lea     0x50($out),$out
+       sub     \$0x60,$len
+       ja      .Lcbc_dec_loop6
+
+       movdqa  $inout5,$inout0
+       add     \$0x50,$len
+       jle     .Lcbc_dec_clear_tail_collected
+       movups  $inout5,($out)
        lea     0x10($out),$out
+
 .Lcbc_dec_tail:
        movups  ($inp),$inout0
-       movaps  $inout0,$in0
-       cmp     \$0x10,$len
-       jbe     .Lcbc_dec_one
+       sub     \$0x10,$len
+       jbe     .Lcbc_dec_one           # $len is 1*16 or less
 
        movups  0x10($inp),$inout1
-       movaps  $inout1,$in1
-       cmp     \$0x20,$len
-       jbe     .Lcbc_dec_two
+       movaps  $inout0,$in0
+       sub     \$0x10,$len
+       jbe     .Lcbc_dec_two           # $len is 2*16 or less
 
        movups  0x20($inp),$inout2
-       movaps  $inout2,$in2
-       cmp     \$0x30,$len
-       jbe     .Lcbc_dec_three
+       movaps  $inout1,$in1
+       sub     \$0x10,$len
+       jbe     .Lcbc_dec_three         # $len is 3*16 or less
 
        movups  0x30($inp),$inout3
-       cmp     \$0x40,$len
-       jbe     .Lcbc_dec_four
-
-       movups  0x40($inp),$inout4
-       cmp     \$0x50,$len
-       jbe     .Lcbc_dec_five
-
-       movups  0x50($inp),$inout5
-       cmp     \$0x60,$len
-       jbe     .Lcbc_dec_six
+       movaps  $inout2,$in2
+       sub     \$0x10,$len
+       jbe     .Lcbc_dec_four          # $len is 4*16 or less
 
-       movups  0x60($inp),$inout6
-       movaps  $iv,$reserved(%rsp)     # save IV
-       call    _aesni_decrypt8
-       movups  ($inp),$rndkey1
-       movups  0x10($inp),$rndkey0
-       xorps   $reserved(%rsp),$inout0 # ^= IV
-       xorps   $rndkey1,$inout1
-       movups  0x20($inp),$rndkey1
-       xorps   $rndkey0,$inout2
-       movups  0x30($inp),$rndkey0
-       xorps   $rndkey1,$inout3
-       movups  0x40($inp),$rndkey1
-       xorps   $rndkey0,$inout4
-       movups  0x50($inp),$rndkey0
-       xorps   $rndkey1,$inout5
-       movups  0x60($inp),$iv          # IV
-       xorps   $rndkey0,$inout6
-       movups  $inout0,($out)
-       movups  $inout1,0x10($out)
-       movups  $inout2,0x20($out)
-       movups  $inout3,0x30($out)
-       movups  $inout4,0x40($out)
-       movups  $inout5,0x50($out)
-       lea     0x60($out),$out
-       movaps  $inout6,$inout0
-       sub     \$0x70,$len
+       movups  0x40($inp),$inout4      # $len is 5*16 or less
+       movaps  $inout3,$in3
+       movaps  $inout4,$in4
+       xorps   $inout5,$inout5
+       call    _aesni_decrypt6
+       pxor    $iv,$inout0
+       movaps  $in4,$iv
+       pxor    $in0,$inout1
+       movdqu  $inout0,($out)
+       pxor    $in1,$inout2
+       movdqu  $inout1,0x10($out)
+        pxor   $inout1,$inout1         # clear register bank
+       pxor    $in2,$inout3
+       movdqu  $inout2,0x20($out)
+        pxor   $inout2,$inout2
+       pxor    $in3,$inout4
+       movdqu  $inout3,0x30($out)
+        pxor   $inout3,$inout3
+       lea     0x40($out),$out
+       movdqa  $inout4,$inout0
+        pxor   $inout4,$inout4
+        pxor   $inout5,$inout5
+       sub     \$0x10,$len
        jmp     .Lcbc_dec_tail_collected
+
 .align 16
 .Lcbc_dec_one:
+       movaps  $inout0,$in0
 ___
        &aesni_generate1("dec",$key,$rounds);
 $code.=<<___;
        xorps   $iv,$inout0
        movaps  $in0,$iv
-       sub     \$0x10,$len
        jmp     .Lcbc_dec_tail_collected
 .align 16
 .Lcbc_dec_two:
-       xorps   $inout2,$inout2
-       call    _aesni_decrypt3
-       xorps   $iv,$inout0
-       xorps   $in0,$inout1
-       movups  $inout0,($out)
+       movaps  $inout1,$in1
+       call    _aesni_decrypt2
+       pxor    $iv,$inout0
        movaps  $in1,$iv
-       movaps  $inout1,$inout0
+       pxor    $in0,$inout1
+       movdqu  $inout0,($out)
+       movdqa  $inout1,$inout0
+        pxor   $inout1,$inout1         # clear register bank
        lea     0x10($out),$out
-       sub     \$0x20,$len
        jmp     .Lcbc_dec_tail_collected
 .align 16
 .Lcbc_dec_three:
+       movaps  $inout2,$in2
        call    _aesni_decrypt3
-       xorps   $iv,$inout0
-       xorps   $in0,$inout1
-       movups  $inout0,($out)
-       xorps   $in1,$inout2
-       movups  $inout1,0x10($out)
+       pxor    $iv,$inout0
        movaps  $in2,$iv
-       movaps  $inout2,$inout0
+       pxor    $in0,$inout1
+       movdqu  $inout0,($out)
+       pxor    $in1,$inout2
+       movdqu  $inout1,0x10($out)
+        pxor   $inout1,$inout1         # clear register bank
+       movdqa  $inout2,$inout0
+        pxor   $inout2,$inout2
        lea     0x20($out),$out
-       sub     \$0x30,$len
        jmp     .Lcbc_dec_tail_collected
 .align 16
 .Lcbc_dec_four:
+       movaps  $inout3,$in3
        call    _aesni_decrypt4
-       xorps   $iv,$inout0
-       movups  0x30($inp),$iv
-       xorps   $in0,$inout1
-       movups  $inout0,($out)
-       xorps   $in1,$inout2
-       movups  $inout1,0x10($out)
-       xorps   $in2,$inout3
-       movups  $inout2,0x20($out)
-       movaps  $inout3,$inout0
+       pxor    $iv,$inout0
+       movaps  $in3,$iv
+       pxor    $in0,$inout1
+       movdqu  $inout0,($out)
+       pxor    $in1,$inout2
+       movdqu  $inout1,0x10($out)
+        pxor   $inout1,$inout1         # clear register bank
+       pxor    $in2,$inout3
+       movdqu  $inout2,0x20($out)
+        pxor   $inout2,$inout2
+       movdqa  $inout3,$inout0
+        pxor   $inout3,$inout3
        lea     0x30($out),$out
-       sub     \$0x40,$len
-       jmp     .Lcbc_dec_tail_collected
-.align 16
-.Lcbc_dec_five:
-       xorps   $inout5,$inout5
-       call    _aesni_decrypt6
-       movups  0x10($inp),$rndkey1
-       movups  0x20($inp),$rndkey0
-       xorps   $iv,$inout0
-       xorps   $in0,$inout1
-       xorps   $rndkey1,$inout2
-       movups  0x30($inp),$rndkey1
-       xorps   $rndkey0,$inout3
-       movups  0x40($inp),$iv
-       xorps   $rndkey1,$inout4
-       movups  $inout0,($out)
-       movups  $inout1,0x10($out)
-       movups  $inout2,0x20($out)
-       movups  $inout3,0x30($out)
-       lea     0x40($out),$out
-       movaps  $inout4,$inout0
-       sub     \$0x50,$len
-       jmp     .Lcbc_dec_tail_collected
-.align 16
-.Lcbc_dec_six:
-       call    _aesni_decrypt6
-       movups  0x10($inp),$rndkey1
-       movups  0x20($inp),$rndkey0
-       xorps   $iv,$inout0
-       xorps   $in0,$inout1
-       xorps   $rndkey1,$inout2
-       movups  0x30($inp),$rndkey1
-       xorps   $rndkey0,$inout3
-       movups  0x40($inp),$rndkey0
-       xorps   $rndkey1,$inout4
-       movups  0x50($inp),$iv
-       xorps   $rndkey0,$inout5
-       movups  $inout0,($out)
-       movups  $inout1,0x10($out)
-       movups  $inout2,0x20($out)
-       movups  $inout3,0x30($out)
-       movups  $inout4,0x40($out)
-       lea     0x50($out),$out
-       movaps  $inout5,$inout0
-       sub     \$0x60,$len
        jmp     .Lcbc_dec_tail_collected
+
 .align 16
+.Lcbc_dec_clear_tail_collected:
+       pxor    $inout1,$inout1         # clear register bank
+       pxor    $inout2,$inout2
+       pxor    $inout3,$inout3
+___
+$code.=<<___ if (!$win64);
+       pxor    $inout4,$inout4         # %xmm6..9
+       pxor    $inout5,$inout5
+       pxor    $inout6,$inout6
+       pxor    $inout7,$inout7
+___
+$code.=<<___;
 .Lcbc_dec_tail_collected:
-       and     \$15,$len
        movups  $iv,($ivp)
+       and     \$15,$len
        jnz     .Lcbc_dec_tail_partial
        movups  $inout0,($out)
+       pxor    $inout0,$inout0
        jmp     .Lcbc_dec_ret
 .align 16
 .Lcbc_dec_tail_partial:
-       movaps  $inout0,$reserved(%rsp)
+       movaps  $inout0,(%rsp)
+       pxor    $inout0,$inout0
        mov     \$16,%rcx
        mov     $out,%rdi
        sub     $len,%rcx
-       lea     $reserved(%rsp),%rsi
-       .long   0x9066A4F3      # rep movsb
+       lea     (%rsp),%rsi
+       .long   0x9066A4F3              # rep movsb
+       movdqa  $inout0,(%rsp)
 
 .Lcbc_dec_ret:
+       xorps   $rndkey0,$rndkey0       # %xmm0
+       pxor    $rndkey1,$rndkey1
 ___
 $code.=<<___ if ($win64);
-       movaps  (%rsp),%xmm6
-       movaps  0x10(%rsp),%xmm7
-       movaps  0x20(%rsp),%xmm8
-       movaps  0x30(%rsp),%xmm9
-       lea     0x58(%rsp),%rsp
+       movaps  0x10(%rsp),%xmm6
+       movaps  %xmm0,0x10(%rsp)        # clear stack
+       movaps  0x20(%rsp),%xmm7
+       movaps  %xmm0,0x20(%rsp)
+       movaps  0x30(%rsp),%xmm8
+       movaps  %xmm0,0x30(%rsp)
+       movaps  0x40(%rsp),%xmm9
+       movaps  %xmm0,0x40(%rsp)
+       movaps  0x50(%rsp),%xmm10
+       movaps  %xmm0,0x50(%rsp)
+       movaps  0x60(%rsp),%xmm11
+       movaps  %xmm0,0x60(%rsp)
+       movaps  0x70(%rsp),%xmm12
+       movaps  %xmm0,0x70(%rsp)
+       movaps  0x80(%rsp),%xmm13
+       movaps  %xmm0,0x80(%rsp)
+       movaps  0x90(%rsp),%xmm14
+       movaps  %xmm0,0x90(%rsp)
+       movaps  0xa0(%rsp),%xmm15
+       movaps  %xmm0,0xa0(%rsp)
 ___
 $code.=<<___;
+       lea     (%rbp),%rsp
+       pop     %rbp
 .Lcbc_ret:
        ret
 .size  ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt
 ___
 } \f
-# int $PREFIX_set_[en|de]crypt_key (const unsigned char *userKey,
+# int ${PREFIX}_set_decrypt_key(const unsigned char *inp,
 #                              int bits, AES_KEY *key)
+#
+# input:       $inp    user-supplied key
+#              $bits   $inp length in bits
+#              $key    pointer to key schedule
+# output:      %eax    0 denoting success, -1 or -2 - failure (see C)
+#              *$key   key schedule
+#
 { my ($inp,$bits,$key) = @_4args;
   $bits =~ s/%r/%e/;
 
@@ -2510,7 +3291,9 @@ ${PREFIX}_set_decrypt_key:
 
        $movkey ($key),%xmm0            # inverse middle
        aesimc  %xmm0,%xmm0
+       pxor    %xmm1,%xmm1
        $movkey %xmm0,($inp)
+       pxor    %xmm0,%xmm0
 .Ldec_key_ret:
        add     \$8,%rsp
        ret
@@ -2527,6 +3310,22 @@ ___
 # Agressively optimized in respect to aeskeygenassist's critical path
 # and is contained in %xmm0-5 to meet Win64 ABI requirement.
 #
+# int ${PREFIX}_set_encrypt_key(const unsigned char *inp,
+#                              int bits, AES_KEY * const key);
+#
+# input:       $inp    user-supplied key
+#              $bits   $inp length in bits
+#              $key    pointer to key schedule
+# output:      %eax    0 denoting success, -1 or -2 - failure (see C)
+#              $bits   rounds-1 (used in aesni_set_decrypt_key)
+#              *$key   key schedule
+#              $key    pointer to key schedule (used in
+#                      aesni_set_decrypt_key)
+#
+# Subroutine is frame-less, which means that only volatile registers
+# are used. Note that it's declared "abi-omnipotent", which means that
+# amount of volatile registers is smaller on Windows.
+#
 $code.=<<___;
 .globl ${PREFIX}_set_encrypt_key
 .type  ${PREFIX}_set_encrypt_key,\@abi-omnipotent
@@ -2540,9 +3339,11 @@ __aesni_set_encrypt_key:
        test    $key,$key
        jz      .Lenc_key_ret
 
+       mov     \$`1<<28|1<<11`,%r10d   # AVX and XOP bits
        movups  ($inp),%xmm0            # pull first 128 bits of *userKey
        xorps   %xmm4,%xmm4             # low dword of xmm4 is assumed 0
-       lea     16($key),%rax
+       and     OPENSSL_ia32cap_P+4(%rip),%r10d
+       lea     16($key),%rax           # %rax is used as modifiable copy of $key
        cmp     \$256,$bits
        je      .L14rounds
        cmp     \$192,$bits
@@ -2552,6 +3353,9 @@ __aesni_set_encrypt_key:
 
 .L10rounds:
        mov     \$9,$bits                       # 10 rounds for 128-bit key
+       cmp     \$`1<<28`,%r10d                 # AVX, bit no XOP
+       je      .L10rounds_alt
+
        $movkey %xmm0,($key)                    # round 0
        aeskeygenassist \$0x1,%xmm0,%xmm1       # round 1
        call            .Lkey_expansion_128_cold
@@ -2579,9 +3383,79 @@ __aesni_set_encrypt_key:
        jmp     .Lenc_key_ret
 
 .align 16
+.L10rounds_alt:
+       movdqa  .Lkey_rotate(%rip),%xmm5
+       mov     \$8,%r10d
+       movdqa  .Lkey_rcon1(%rip),%xmm4
+       movdqa  %xmm0,%xmm2
+       movdqu  %xmm0,($key)
+       jmp     .Loop_key128
+
+.align 16
+.Loop_key128:
+       pshufb          %xmm5,%xmm0
+       aesenclast      %xmm4,%xmm0
+       pslld           \$1,%xmm4
+       lea             16(%rax),%rax
+
+       movdqa          %xmm2,%xmm3
+       pslldq          \$4,%xmm2
+       pxor            %xmm2,%xmm3
+       pslldq          \$4,%xmm2
+       pxor            %xmm2,%xmm3
+       pslldq          \$4,%xmm2
+       pxor            %xmm3,%xmm2
+
+       pxor            %xmm2,%xmm0
+       movdqu          %xmm0,-16(%rax)
+       movdqa          %xmm0,%xmm2
+
+       dec     %r10d
+       jnz     .Loop_key128
+
+       movdqa          .Lkey_rcon1b(%rip),%xmm4
+
+       pshufb          %xmm5,%xmm0
+       aesenclast      %xmm4,%xmm0
+       pslld           \$1,%xmm4
+
+       movdqa          %xmm2,%xmm3
+       pslldq          \$4,%xmm2
+       pxor            %xmm2,%xmm3
+       pslldq          \$4,%xmm2
+       pxor            %xmm2,%xmm3
+       pslldq          \$4,%xmm2
+       pxor            %xmm3,%xmm2
+
+       pxor            %xmm2,%xmm0
+       movdqu          %xmm0,(%rax)
+
+       movdqa          %xmm0,%xmm2
+       pshufb          %xmm5,%xmm0
+       aesenclast      %xmm4,%xmm0
+
+       movdqa          %xmm2,%xmm3
+       pslldq          \$4,%xmm2
+       pxor            %xmm2,%xmm3
+       pslldq          \$4,%xmm2
+       pxor            %xmm2,%xmm3
+       pslldq          \$4,%xmm2
+       pxor            %xmm3,%xmm2
+
+       pxor            %xmm2,%xmm0
+       movdqu          %xmm0,16(%rax)
+
+       mov     $bits,96(%rax)  # 240($key)
+       xor     %eax,%eax
+       jmp     .Lenc_key_ret
+
+.align 16
 .L12rounds:
        movq    16($inp),%xmm2                  # remaining 1/3 of *userKey
        mov     \$11,$bits                      # 12 rounds for 192
+       cmp     \$`1<<28`,%r10d                 # AVX, but no XOP
+       je      .L12rounds_alt
+
        $movkey %xmm0,($key)                    # round 0
        aeskeygenassist \$0x1,%xmm2,%xmm1       # round 1,2
        call            .Lkey_expansion_192a_cold
@@ -2605,10 +3479,54 @@ __aesni_set_encrypt_key:
        jmp     .Lenc_key_ret
 
 .align 16
+.L12rounds_alt:
+       movdqa  .Lkey_rotate192(%rip),%xmm5
+       movdqa  .Lkey_rcon1(%rip),%xmm4
+       mov     \$8,%r10d
+       movdqu  %xmm0,($key)
+       jmp     .Loop_key192
+
+.align 16
+.Loop_key192:
+       movq            %xmm2,0(%rax)
+       movdqa          %xmm2,%xmm1
+       pshufb          %xmm5,%xmm2
+       aesenclast      %xmm4,%xmm2
+       pslld           \$1, %xmm4
+       lea             24(%rax),%rax
+
+       movdqa          %xmm0,%xmm3
+       pslldq          \$4,%xmm0
+       pxor            %xmm0,%xmm3
+       pslldq          \$4,%xmm0
+       pxor            %xmm0,%xmm3
+       pslldq          \$4,%xmm0
+       pxor            %xmm3,%xmm0
+
+       pshufd          \$0xff,%xmm0,%xmm3
+       pxor            %xmm1,%xmm3
+       pslldq          \$4,%xmm1
+       pxor            %xmm1,%xmm3
+
+       pxor            %xmm2,%xmm0
+       pxor            %xmm3,%xmm2
+       movdqu          %xmm0,-16(%rax)
+
+       dec     %r10d
+       jnz     .Loop_key192
+
+       mov     $bits,32(%rax)  # 240($key)
+       xor     %eax,%eax
+       jmp     .Lenc_key_ret
+
+.align 16
 .L14rounds:
        movups  16($inp),%xmm2                  # remaning half of *userKey
        mov     \$13,$bits                      # 14 rounds for 256
        lea     16(%rax),%rax
+       cmp     \$`1<<28`,%r10d                 # AVX, but no XOP
+       je      .L14rounds_alt
+
        $movkey %xmm0,($key)                    # round 0
        $movkey %xmm2,16($key)                  # round 1
        aeskeygenassist \$0x1,%xmm2,%xmm1       # round 2
@@ -2643,9 +3561,69 @@ __aesni_set_encrypt_key:
        jmp     .Lenc_key_ret
 
 .align 16
+.L14rounds_alt:
+       movdqa  .Lkey_rotate(%rip),%xmm5
+       movdqa  .Lkey_rcon1(%rip),%xmm4
+       mov     \$7,%r10d
+       movdqu  %xmm0,0($key)
+       movdqa  %xmm2,%xmm1
+       movdqu  %xmm2,16($key)
+       jmp     .Loop_key256
+
+.align 16
+.Loop_key256:
+       pshufb          %xmm5,%xmm2
+       aesenclast      %xmm4,%xmm2
+
+       movdqa          %xmm0,%xmm3
+       pslldq          \$4,%xmm0
+       pxor            %xmm0,%xmm3
+       pslldq          \$4,%xmm0
+       pxor            %xmm0,%xmm3
+       pslldq          \$4,%xmm0
+       pxor            %xmm3,%xmm0
+       pslld           \$1,%xmm4
+
+       pxor            %xmm2,%xmm0
+       movdqu          %xmm0,(%rax)
+
+       dec     %r10d
+       jz      .Ldone_key256
+
+       pshufd          \$0xff,%xmm0,%xmm2
+       pxor            %xmm3,%xmm3
+       aesenclast      %xmm3,%xmm2
+
+       movdqa          %xmm1,%xmm3
+       pslldq          \$4,%xmm1
+       pxor            %xmm1,%xmm3
+       pslldq          \$4,%xmm1
+       pxor            %xmm1,%xmm3
+       pslldq          \$4,%xmm1
+       pxor            %xmm3,%xmm1
+
+       pxor            %xmm1,%xmm2
+       movdqu          %xmm2,16(%rax)
+       lea             32(%rax),%rax
+       movdqa          %xmm2,%xmm1
+
+       jmp     .Loop_key256
+
+.Ldone_key256:
+       mov     $bits,16(%rax)  # 240($key)
+       xor     %eax,%eax
+       jmp     .Lenc_key_ret
+
+.align 16
 .Lbad_keybits:
        mov     \$-2,%rax
 .Lenc_key_ret:
+       pxor    %xmm0,%xmm0
+       pxor    %xmm1,%xmm1
+       pxor    %xmm2,%xmm2
+       pxor    %xmm3,%xmm3
+       pxor    %xmm4,%xmm4
+       pxor    %xmm5,%xmm5
        add     \$8,%rsp
        ret
 .LSEH_end_set_encrypt_key:
@@ -2733,6 +3711,16 @@ $code.=<<___;
        .long   1,0,0,0
 .Lxts_magic:
        .long   0x87,0,1,0
+.Lincrement1:
+       .byte   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
+.Lkey_rotate:
+       .long   0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
+.Lkey_rotate192:
+       .long   0x04070605,0x04070605,0x04070605,0x04070605
+.Lkey_rcon1:
+       .long   1,1,1,1
+.Lkey_rcon1b:
+       .long   0x1b,0x1b,0x1b,0x1b
 
 .asciz  "AES for Intel AES-NI, CRYPTOGAMS by <appro\@openssl.org>"
 .align 64
@@ -2791,45 +3779,9 @@ ecb_ccm64_se_handler:
        jmp     .Lcommon_seh_tail
 .size  ecb_ccm64_se_handler,.-ecb_ccm64_se_handler
 
-.type  ctr32_se_handler,\@abi-omnipotent
+.type  ctr_xts_se_handler,\@abi-omnipotent
 .align 16
-ctr32_se_handler:
-       push    %rsi
-       push    %rdi
-       push    %rbx
-       push    %rbp
-       push    %r12
-       push    %r13
-       push    %r14
-       push    %r15
-       pushfq
-       sub     \$64,%rsp
-
-       mov     120($context),%rax      # pull context->Rax
-       mov     248($context),%rbx      # pull context->Rip
-
-       lea     .Lctr32_body(%rip),%r10
-       cmp     %r10,%rbx               # context->Rip<"prologue" label
-       jb      .Lcommon_seh_tail
-
-       mov     152($context),%rax      # pull context->Rsp
-
-       lea     .Lctr32_ret(%rip),%r10
-       cmp     %r10,%rbx
-       jae     .Lcommon_seh_tail
-
-       lea     0x20(%rax),%rsi         # %xmm save area
-       lea     512($context),%rdi      # &context.Xmm6
-       mov     \$20,%ecx               # 10*sizeof(%xmm0)/sizeof(%rax)
-       .long   0xa548f3fc              # cld; rep movsq
-       lea     0xc8(%rax),%rax         # adjust stack pointer
-
-       jmp     .Lcommon_seh_tail
-.size  ctr32_se_handler,.-ctr32_se_handler
-
-.type  xts_se_handler,\@abi-omnipotent
-.align 16
-xts_se_handler:
+ctr_xts_se_handler:
        push    %rsi
        push    %rdi
        push    %rbx
@@ -2859,14 +3811,14 @@ xts_se_handler:
        cmp     %r10,%rbx               # context->Rip>=epilogue label
        jae     .Lcommon_seh_tail
 
-       lea     0x60(%rax),%rsi         # %xmm save area
+       mov     160($context),%rax      # pull context->Rbp
+       lea     -0xa0(%rax),%rsi        # %xmm save area
        lea     512($context),%rdi      # & context.Xmm6
        mov     \$20,%ecx               # 10*sizeof(%xmm0)/sizeof(%rax)
        .long   0xa548f3fc              # cld; rep movsq
-       lea     0x68+160(%rax),%rax     # adjust stack pointer
 
-       jmp     .Lcommon_seh_tail
-.size  xts_se_handler,.-xts_se_handler
+       jmp     .Lcommon_rbp_tail
+.size  ctr_xts_se_handler,.-ctr_xts_se_handler
 ___
 $code.=<<___;
 .type  cbc_se_handler,\@abi-omnipotent
@@ -2886,7 +3838,7 @@ cbc_se_handler:
        mov     152($context),%rax      # pull context->Rsp
        mov     248($context),%rbx      # pull context->Rip
 
-       lea     .Lcbc_decrypt(%rip),%r10
+       lea     .Lcbc_decrypt_bulk(%rip),%r10
        cmp     %r10,%rbx               # context->Rip<"prologue" label
        jb      .Lcommon_seh_tail
 
@@ -2898,11 +3850,16 @@ cbc_se_handler:
        cmp     %r10,%rbx               # context->Rip>="epilogue" label
        jae     .Lcommon_seh_tail
 
-       lea     0(%rax),%rsi            # top of stack
+       lea     16(%rax),%rsi           # %xmm save area
        lea     512($context),%rdi      # &context.Xmm6
-       mov     \$8,%ecx                # 4*sizeof(%xmm0)/sizeof(%rax)
+       mov     \$20,%ecx               # 10*sizeof(%xmm0)/sizeof(%rax)
        .long   0xa548f3fc              # cld; rep movsq
-       lea     0x58(%rax),%rax         # adjust stack pointer
+
+.Lcommon_rbp_tail:
+       mov     160($context),%rax      # pull context->Rbp
+       mov     (%rax),%rbp             # restore saved %rbp
+       lea     8(%rax),%rax            # adjust stack pointer
+       mov     %rbp,160($context)      # restore context->Rbp
        jmp     .Lcommon_seh_tail
 
 .Lrestore_cbc_rax:
@@ -3006,14 +3963,15 @@ $code.=<<___ if ($PREFIX eq "aesni");
        .rva    .Lccm64_dec_body,.Lccm64_dec_ret        # HandlerData[]
 .LSEH_info_ctr32:
        .byte   9,0,0,0
-       .rva    ctr32_se_handler
+       .rva    ctr_xts_se_handler
+       .rva    .Lctr32_body,.Lctr32_epilogue           # HandlerData[]
 .LSEH_info_xts_enc:
        .byte   9,0,0,0
-       .rva    xts_se_handler
+       .rva    ctr_xts_se_handler
        .rva    .Lxts_enc_body,.Lxts_enc_epilogue       # HandlerData[]
 .LSEH_info_xts_dec:
        .byte   9,0,0,0
-       .rva    xts_se_handler
+       .rva    ctr_xts_se_handler
        .rva    .Lxts_dec_body,.Lxts_dec_epilogue       # HandlerData[]
 ___
 $code.=<<___;
@@ -3060,11 +4018,30 @@ sub aesni {
        push @opcode,0xc0|($2&7)|(($3&7)<<3);   # ModR/M
        return ".byte\t".join(',',@opcode);
     }
+    elsif ($line=~/(aes[a-z]+)\s+([0x1-9a-fA-F]*)\(%rsp\),\s*%xmm([0-9]+)/) {
+       my %opcodelet = (
+               "aesenc" => 0xdc,       "aesenclast" => 0xdd,
+               "aesdec" => 0xde,       "aesdeclast" => 0xdf
+       );
+       return undef if (!defined($opcodelet{$1}));
+       my $off = $2;
+       push @opcode,0x44 if ($3>=8);
+       push @opcode,0x0f,0x38,$opcodelet{$1};
+       push @opcode,0x44|(($3&7)<<3),0x24;     # ModR/M
+       push @opcode,($off=~/^0/?oct($off):$off)&0xff;
+       return ".byte\t".join(',',@opcode);
+    }
     return $line;
 }
 
+sub movbe {
+       ".byte  0x0f,0x38,0xf1,0x44,0x24,".shift;
+}
+
 $code =~ s/\`([^\`]*)\`/eval($1)/gem;
 $code =~ s/\b(aes.*%xmm[0-9]+).*$/aesni($1)/gem;
+#$code =~ s/\bmovbe\s+%eax/bswap %eax; mov %eax/gm;    # debugging artefact
+$code =~ s/\bmovbe\s+%eax,\s*([0-9]+)\(%rsp\)/movbe($1)/gem;
 
 print $code;
 
diff --git a/crypto/aes/asm/aesp8-ppc.pl b/crypto/aes/asm/aesp8-ppc.pl
new file mode 100755 (executable)
index 0000000..a1891cc
--- /dev/null
@@ -0,0 +1,1942 @@
+#!/usr/bin/env perl
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# This module implements support for AES instructions as per PowerISA
+# specification version 2.07, first implemented by POWER8 processor.
+# The module is endian-agnostic in sense that it supports both big-
+# and little-endian cases. Data alignment in parallelizable modes is
+# handled with VSX loads and stores, which implies MSR.VSX flag being
+# set. It should also be noted that ISA specification doesn't prohibit
+# alignment exceptions for these instructions on page boundaries.
+# Initially alignment was handled in pure AltiVec/VMX way [when data
+# is aligned programmatically, which in turn guarantees exception-
+# free execution], but it turned to hamper performance when vcipher
+# instructions are interleaved. It's reckoned that eventual
+# misalignment penalties at page boundaries are in average lower
+# than additional overhead in pure AltiVec approach.
+
+$flavour = shift;
+
+if ($flavour =~ /64/) {
+       $SIZE_T =8;
+       $LRSAVE =2*$SIZE_T;
+       $STU    ="stdu";
+       $POP    ="ld";
+       $PUSH   ="std";
+       $UCMP   ="cmpld";
+       $SHL    ="sldi";
+} elsif ($flavour =~ /32/) {
+       $SIZE_T =4;
+       $LRSAVE =$SIZE_T;
+       $STU    ="stwu";
+       $POP    ="lwz";
+       $PUSH   ="stw";
+       $UCMP   ="cmplw";
+       $SHL    ="slwi";
+} else { die "nonsense $flavour"; }
+
+$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
+die "can't locate ppc-xlate.pl";
+
+open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
+
+$FRAME=8*$SIZE_T;
+$prefix="aes_p8";
+
+$sp="r1";
+$vrsave="r12";
+
+#########################################################################
+{{{    # Key setup procedures                                          #
+my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
+my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
+my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
+
+$code.=<<___;
+.machine       "any"
+
+.text
+
+.align 7
+rcon:
+.long  0x01000000, 0x01000000, 0x01000000, 0x01000000  ?rev
+.long  0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000  ?rev
+.long  0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c  ?rev
+.long  0,0,0,0                                         ?asis
+Lconsts:
+       mflr    r0
+       bcl     20,31,\$+4
+       mflr    $ptr     #vvvvv "distance between . and rcon
+       addi    $ptr,$ptr,-0x48
+       mtlr    r0
+       blr
+       .long   0
+       .byte   0,12,0x14,0,0,0,0,0
+.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
+
+.globl .${prefix}_set_encrypt_key
+.align 5
+.${prefix}_set_encrypt_key:
+Lset_encrypt_key:
+       mflr            r11
+       $PUSH           r11,$LRSAVE($sp)
+
+       li              $ptr,-1
+       ${UCMP}i        $inp,0
+       beq-            Lenc_key_abort          # if ($inp==0) return -1;
+       ${UCMP}i        $out,0
+       beq-            Lenc_key_abort          # if ($out==0) return -1;
+       li              $ptr,-2
+       cmpwi           $bits,128
+       blt-            Lenc_key_abort
+       cmpwi           $bits,256
+       bgt-            Lenc_key_abort
+       andi.           r0,$bits,0x3f
+       bne-            Lenc_key_abort
+
+       lis             r0,0xfff0
+       mfspr           $vrsave,256
+       mtspr           256,r0
+
+       bl              Lconsts
+       mtlr            r11
+
+       neg             r9,$inp
+       lvx             $in0,0,$inp
+       addi            $inp,$inp,15            # 15 is not typo
+       lvsr            $key,0,r9               # borrow $key
+       li              r8,0x20
+       cmpwi           $bits,192
+       lvx             $in1,0,$inp
+       le?vspltisb     $mask,0x0f              # borrow $mask
+       lvx             $rcon,0,$ptr
+       le?vxor         $key,$key,$mask         # adjust for byte swap
+       lvx             $mask,r8,$ptr
+       addi            $ptr,$ptr,0x10
+       vperm           $in0,$in0,$in1,$key     # align [and byte swap in LE]
+       li              $cnt,8
+       vxor            $zero,$zero,$zero
+       mtctr           $cnt
+
+       ?lvsr           $outperm,0,$out
+       vspltisb        $outmask,-1
+       lvx             $outhead,0,$out
+       ?vperm          $outmask,$zero,$outmask,$outperm
+
+       blt             Loop128
+       addi            $inp,$inp,8
+       beq             L192
+       addi            $inp,$inp,8
+       b               L256
+
+.align 4
+Loop128:
+       vperm           $key,$in0,$in0,$mask    # rotate-n-splat
+       vsldoi          $tmp,$zero,$in0,12      # >>32
+        vperm          $outtail,$in0,$in0,$outperm     # rotate
+        vsel           $stage,$outhead,$outtail,$outmask
+        vmr            $outhead,$outtail
+       vcipherlast     $key,$key,$rcon
+        stvx           $stage,0,$out
+        addi           $out,$out,16
+
+       vxor            $in0,$in0,$tmp
+       vsldoi          $tmp,$zero,$tmp,12      # >>32
+       vxor            $in0,$in0,$tmp
+       vsldoi          $tmp,$zero,$tmp,12      # >>32
+       vxor            $in0,$in0,$tmp
+        vadduwm        $rcon,$rcon,$rcon
+       vxor            $in0,$in0,$key
+       bdnz            Loop128
+
+       lvx             $rcon,0,$ptr            # last two round keys
+
+       vperm           $key,$in0,$in0,$mask    # rotate-n-splat
+       vsldoi          $tmp,$zero,$in0,12      # >>32
+        vperm          $outtail,$in0,$in0,$outperm     # rotate
+        vsel           $stage,$outhead,$outtail,$outmask
+        vmr            $outhead,$outtail
+       vcipherlast     $key,$key,$rcon
+        stvx           $stage,0,$out
+        addi           $out,$out,16
+
+       vxor            $in0,$in0,$tmp
+       vsldoi          $tmp,$zero,$tmp,12      # >>32
+       vxor            $in0,$in0,$tmp
+       vsldoi          $tmp,$zero,$tmp,12      # >>32
+       vxor            $in0,$in0,$tmp
+        vadduwm        $rcon,$rcon,$rcon
+       vxor            $in0,$in0,$key
+
+       vperm           $key,$in0,$in0,$mask    # rotate-n-splat
+       vsldoi          $tmp,$zero,$in0,12      # >>32
+        vperm          $outtail,$in0,$in0,$outperm     # rotate
+        vsel           $stage,$outhead,$outtail,$outmask
+        vmr            $outhead,$outtail
+       vcipherlast     $key,$key,$rcon
+        stvx           $stage,0,$out
+        addi           $out,$out,16
+
+       vxor            $in0,$in0,$tmp
+       vsldoi          $tmp,$zero,$tmp,12      # >>32
+       vxor            $in0,$in0,$tmp
+       vsldoi          $tmp,$zero,$tmp,12      # >>32
+       vxor            $in0,$in0,$tmp
+       vxor            $in0,$in0,$key
+        vperm          $outtail,$in0,$in0,$outperm     # rotate
+        vsel           $stage,$outhead,$outtail,$outmask
+        vmr            $outhead,$outtail
+        stvx           $stage,0,$out
+
+       addi            $inp,$out,15            # 15 is not typo
+       addi            $out,$out,0x50
+
+       li              $rounds,10
+       b               Ldone
+
+.align 4
+L192:
+       lvx             $tmp,0,$inp
+       li              $cnt,4
+        vperm          $outtail,$in0,$in0,$outperm     # rotate
+        vsel           $stage,$outhead,$outtail,$outmask
+        vmr            $outhead,$outtail
+        stvx           $stage,0,$out
+        addi           $out,$out,16
+       vperm           $in1,$in1,$tmp,$key     # align [and byte swap in LE]
+       vspltisb        $key,8                  # borrow $key
+       mtctr           $cnt
+       vsububm         $mask,$mask,$key        # adjust the mask
+
+Loop192:
+       vperm           $key,$in1,$in1,$mask    # roate-n-splat
+       vsldoi          $tmp,$zero,$in0,12      # >>32
+       vcipherlast     $key,$key,$rcon
+
+       vxor            $in0,$in0,$tmp
+       vsldoi          $tmp,$zero,$tmp,12      # >>32
+       vxor            $in0,$in0,$tmp
+       vsldoi          $tmp,$zero,$tmp,12      # >>32
+       vxor            $in0,$in0,$tmp
+
+        vsldoi         $stage,$zero,$in1,8
+       vspltw          $tmp,$in0,3
+       vxor            $tmp,$tmp,$in1
+       vsldoi          $in1,$zero,$in1,12      # >>32
+        vadduwm        $rcon,$rcon,$rcon
+       vxor            $in1,$in1,$tmp
+       vxor            $in0,$in0,$key
+       vxor            $in1,$in1,$key
+        vsldoi         $stage,$stage,$in0,8
+
+       vperm           $key,$in1,$in1,$mask    # rotate-n-splat
+       vsldoi          $tmp,$zero,$in0,12      # >>32
+        vperm          $outtail,$stage,$stage,$outperm # rotate
+        vsel           $stage,$outhead,$outtail,$outmask
+        vmr            $outhead,$outtail
+       vcipherlast     $key,$key,$rcon
+        stvx           $stage,0,$out
+        addi           $out,$out,16
+
+        vsldoi         $stage,$in0,$in1,8
+       vxor            $in0,$in0,$tmp
+       vsldoi          $tmp,$zero,$tmp,12      # >>32
+        vperm          $outtail,$stage,$stage,$outperm # rotate
+        vsel           $stage,$outhead,$outtail,$outmask
+        vmr            $outhead,$outtail
+       vxor            $in0,$in0,$tmp
+       vsldoi          $tmp,$zero,$tmp,12      # >>32
+       vxor            $in0,$in0,$tmp
+        stvx           $stage,0,$out
+        addi           $out,$out,16
+
+       vspltw          $tmp,$in0,3
+       vxor            $tmp,$tmp,$in1
+       vsldoi          $in1,$zero,$in1,12      # >>32
+        vadduwm        $rcon,$rcon,$rcon
+       vxor            $in1,$in1,$tmp
+       vxor            $in0,$in0,$key
+       vxor            $in1,$in1,$key
+        vperm          $outtail,$in0,$in0,$outperm     # rotate
+        vsel           $stage,$outhead,$outtail,$outmask
+        vmr            $outhead,$outtail
+        stvx           $stage,0,$out
+        addi           $inp,$out,15            # 15 is not typo
+        addi           $out,$out,16
+       bdnz            Loop192
+
+       li              $rounds,12
+       addi            $out,$out,0x20
+       b               Ldone
+
+.align 4
+L256:
+       lvx             $tmp,0,$inp
+       li              $cnt,7
+       li              $rounds,14
+        vperm          $outtail,$in0,$in0,$outperm     # rotate
+        vsel           $stage,$outhead,$outtail,$outmask
+        vmr            $outhead,$outtail
+        stvx           $stage,0,$out
+        addi           $out,$out,16
+       vperm           $in1,$in1,$tmp,$key     # align [and byte swap in LE]
+       mtctr           $cnt
+
+Loop256:
+       vperm           $key,$in1,$in1,$mask    # rotate-n-splat
+       vsldoi          $tmp,$zero,$in0,12      # >>32
+        vperm          $outtail,$in1,$in1,$outperm     # rotate
+        vsel           $stage,$outhead,$outtail,$outmask
+        vmr            $outhead,$outtail
+       vcipherlast     $key,$key,$rcon
+        stvx           $stage,0,$out
+        addi           $out,$out,16
+
+       vxor            $in0,$in0,$tmp
+       vsldoi          $tmp,$zero,$tmp,12      # >>32
+       vxor            $in0,$in0,$tmp
+       vsldoi          $tmp,$zero,$tmp,12      # >>32
+       vxor            $in0,$in0,$tmp
+        vadduwm        $rcon,$rcon,$rcon
+       vxor            $in0,$in0,$key
+        vperm          $outtail,$in0,$in0,$outperm     # rotate
+        vsel           $stage,$outhead,$outtail,$outmask
+        vmr            $outhead,$outtail
+        stvx           $stage,0,$out
+        addi           $inp,$out,15            # 15 is not typo
+        addi           $out,$out,16
+       bdz             Ldone
+
+       vspltw          $key,$in0,3             # just splat
+       vsldoi          $tmp,$zero,$in1,12      # >>32
+       vsbox           $key,$key
+
+       vxor            $in1,$in1,$tmp
+       vsldoi          $tmp,$zero,$tmp,12      # >>32
+       vxor            $in1,$in1,$tmp
+       vsldoi          $tmp,$zero,$tmp,12      # >>32
+       vxor            $in1,$in1,$tmp
+
+       vxor            $in1,$in1,$key
+       b               Loop256
+
+.align 4
+Ldone:
+       lvx             $in1,0,$inp             # redundant in aligned case
+       vsel            $in1,$outhead,$in1,$outmask
+       stvx            $in1,0,$inp
+       li              $ptr,0
+       mtspr           256,$vrsave
+       stw             $rounds,0($out)
+
+Lenc_key_abort:
+       mr              r3,$ptr
+       blr
+       .long           0
+       .byte           0,12,0x14,1,0,0,3,0
+       .long           0
+.size  .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
+
+.globl .${prefix}_set_decrypt_key
+.align 5
+.${prefix}_set_decrypt_key:
+       $STU            $sp,-$FRAME($sp)
+       mflr            r10
+       $PUSH           r10,$FRAME+$LRSAVE($sp)
+       bl              Lset_encrypt_key
+       mtlr            r10
+
+       cmpwi           r3,0
+       bne-            Ldec_key_abort
+
+       slwi            $cnt,$rounds,4
+       subi            $inp,$out,240           # first round key
+       srwi            $rounds,$rounds,1
+       add             $out,$inp,$cnt          # last round key
+       mtctr           $rounds
+
+Ldeckey:
+       lwz             r0, 0($inp)
+       lwz             r6, 4($inp)
+       lwz             r7, 8($inp)
+       lwz             r8, 12($inp)
+       addi            $inp,$inp,16
+       lwz             r9, 0($out)
+       lwz             r10,4($out)
+       lwz             r11,8($out)
+       lwz             r12,12($out)
+       stw             r0, 0($out)
+       stw             r6, 4($out)
+       stw             r7, 8($out)
+       stw             r8, 12($out)
+       subi            $out,$out,16
+       stw             r9, -16($inp)
+       stw             r10,-12($inp)
+       stw             r11,-8($inp)
+       stw             r12,-4($inp)
+       bdnz            Ldeckey
+
+       xor             r3,r3,r3                # return value
+Ldec_key_abort:
+       addi            $sp,$sp,$FRAME
+       blr
+       .long           0
+       .byte           0,12,4,1,0x80,0,3,0
+       .long           0
+.size  .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
+___
+}}}
+#########################################################################
+{{{    # Single block en- and decrypt procedures                       #
+sub gen_block () {
+my $dir = shift;
+my $n   = $dir eq "de" ? "n" : "";
+my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
+
+$code.=<<___;
+.globl .${prefix}_${dir}crypt
+.align 5
+.${prefix}_${dir}crypt:
+       lwz             $rounds,240($key)
+       lis             r0,0xfc00
+       mfspr           $vrsave,256
+       li              $idx,15                 # 15 is not typo
+       mtspr           256,r0
+
+       lvx             v0,0,$inp
+       neg             r11,$out
+       lvx             v1,$idx,$inp
+       lvsl            v2,0,$inp               # inpperm
+       le?vspltisb     v4,0x0f
+       ?lvsl           v3,0,r11                # outperm
+       le?vxor         v2,v2,v4
+       li              $idx,16
+       vperm           v0,v0,v1,v2             # align [and byte swap in LE]
+       lvx             v1,0,$key
+       ?lvsl           v5,0,$key               # keyperm
+       srwi            $rounds,$rounds,1
+       lvx             v2,$idx,$key
+       addi            $idx,$idx,16
+       subi            $rounds,$rounds,1
+       ?vperm          v1,v1,v2,v5             # align round key
+
+       vxor            v0,v0,v1
+       lvx             v1,$idx,$key
+       addi            $idx,$idx,16
+       mtctr           $rounds
+
+Loop_${dir}c:
+       ?vperm          v2,v2,v1,v5
+       v${n}cipher     v0,v0,v2
+       lvx             v2,$idx,$key
+       addi            $idx,$idx,16
+       ?vperm          v1,v1,v2,v5
+       v${n}cipher     v0,v0,v1
+       lvx             v1,$idx,$key
+       addi            $idx,$idx,16
+       bdnz            Loop_${dir}c
+
+       ?vperm          v2,v2,v1,v5
+       v${n}cipher     v0,v0,v2
+       lvx             v2,$idx,$key
+       ?vperm          v1,v1,v2,v5
+       v${n}cipherlast v0,v0,v1
+
+       vspltisb        v2,-1
+       vxor            v1,v1,v1
+       li              $idx,15                 # 15 is not typo
+       ?vperm          v2,v1,v2,v3             # outmask
+       le?vxor         v3,v3,v4
+       lvx             v1,0,$out               # outhead
+       vperm           v0,v0,v0,v3             # rotate [and byte swap in LE]
+       vsel            v1,v1,v0,v2
+       lvx             v4,$idx,$out
+       stvx            v1,0,$out
+       vsel            v0,v0,v4,v2
+       stvx            v0,$idx,$out
+
+       mtspr           256,$vrsave
+       blr
+       .long           0
+       .byte           0,12,0x14,0,0,0,3,0
+       .long           0
+.size  .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
+___
+}
+&gen_block("en");
+&gen_block("de");
+}}}
+#########################################################################
+{{{    # CBC en- and decrypt procedures                                #
+my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
+my ($rndkey0,$rndkey1,$inout,$tmp)=            map("v$_",(0..3));
+my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
+                                               map("v$_",(4..10));
+$code.=<<___;
+.globl .${prefix}_cbc_encrypt
+.align 5
+.${prefix}_cbc_encrypt:
+       ${UCMP}i        $len,16
+       bltlr-
+
+       cmpwi           $enc,0                  # test direction
+       lis             r0,0xffe0
+       mfspr           $vrsave,256
+       mtspr           256,r0
+
+       li              $idx,15
+       vxor            $rndkey0,$rndkey0,$rndkey0
+       le?vspltisb     $tmp,0x0f
+
+       lvx             $ivec,0,$ivp            # load [unaligned] iv
+       lvsl            $inpperm,0,$ivp
+       lvx             $inptail,$idx,$ivp
+       le?vxor         $inpperm,$inpperm,$tmp
+       vperm           $ivec,$ivec,$inptail,$inpperm
+
+       neg             r11,$inp
+       ?lvsl           $keyperm,0,$key         # prepare for unaligned key
+       lwz             $rounds,240($key)
+
+       lvsr            $inpperm,0,r11          # prepare for unaligned load
+       lvx             $inptail,0,$inp
+       addi            $inp,$inp,15            # 15 is not typo
+       le?vxor         $inpperm,$inpperm,$tmp
+
+       ?lvsr           $outperm,0,$out         # prepare for unaligned store
+       vspltisb        $outmask,-1
+       lvx             $outhead,0,$out
+       ?vperm          $outmask,$rndkey0,$outmask,$outperm
+       le?vxor         $outperm,$outperm,$tmp
+
+       srwi            $rounds,$rounds,1
+       li              $idx,16
+       subi            $rounds,$rounds,1
+       beq             Lcbc_dec
+
+Lcbc_enc:
+       vmr             $inout,$inptail
+       lvx             $inptail,0,$inp
+       addi            $inp,$inp,16
+       mtctr           $rounds
+       subi            $len,$len,16            # len-=16
+
+       lvx             $rndkey0,0,$key
+        vperm          $inout,$inout,$inptail,$inpperm
+       lvx             $rndkey1,$idx,$key
+       addi            $idx,$idx,16
+       ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
+       vxor            $inout,$inout,$rndkey0
+       lvx             $rndkey0,$idx,$key
+       addi            $idx,$idx,16
+       vxor            $inout,$inout,$ivec
+
+Loop_cbc_enc:
+       ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
+       vcipher         $inout,$inout,$rndkey1
+       lvx             $rndkey1,$idx,$key
+       addi            $idx,$idx,16
+       ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
+       vcipher         $inout,$inout,$rndkey0
+       lvx             $rndkey0,$idx,$key
+       addi            $idx,$idx,16
+       bdnz            Loop_cbc_enc
+
+       ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
+       vcipher         $inout,$inout,$rndkey1
+       lvx             $rndkey1,$idx,$key
+       li              $idx,16
+       ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
+       vcipherlast     $ivec,$inout,$rndkey0
+       ${UCMP}i        $len,16
+
+       vperm           $tmp,$ivec,$ivec,$outperm
+       vsel            $inout,$outhead,$tmp,$outmask
+       vmr             $outhead,$tmp
+       stvx            $inout,0,$out
+       addi            $out,$out,16
+       bge             Lcbc_enc
+
+       b               Lcbc_done
+
+.align 4
+Lcbc_dec:
+       ${UCMP}i        $len,128
+       bge             _aesp8_cbc_decrypt8x
+       vmr             $tmp,$inptail
+       lvx             $inptail,0,$inp
+       addi            $inp,$inp,16
+       mtctr           $rounds
+       subi            $len,$len,16            # len-=16
+
+       lvx             $rndkey0,0,$key
+        vperm          $tmp,$tmp,$inptail,$inpperm
+       lvx             $rndkey1,$idx,$key
+       addi            $idx,$idx,16
+       ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
+       vxor            $inout,$tmp,$rndkey0
+       lvx             $rndkey0,$idx,$key
+       addi            $idx,$idx,16
+
+Loop_cbc_dec:
+       ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
+       vncipher        $inout,$inout,$rndkey1
+       lvx             $rndkey1,$idx,$key
+       addi            $idx,$idx,16
+       ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
+       vncipher        $inout,$inout,$rndkey0
+       lvx             $rndkey0,$idx,$key
+       addi            $idx,$idx,16
+       bdnz            Loop_cbc_dec
+
+       ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
+       vncipher        $inout,$inout,$rndkey1
+       lvx             $rndkey1,$idx,$key
+       li              $idx,16
+       ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
+       vncipherlast    $inout,$inout,$rndkey0
+       ${UCMP}i        $len,16
+
+       vxor            $inout,$inout,$ivec
+       vmr             $ivec,$tmp
+       vperm           $tmp,$inout,$inout,$outperm
+       vsel            $inout,$outhead,$tmp,$outmask
+       vmr             $outhead,$tmp
+       stvx            $inout,0,$out
+       addi            $out,$out,16
+       bge             Lcbc_dec
+
+Lcbc_done:
+       addi            $out,$out,-1
+       lvx             $inout,0,$out           # redundant in aligned case
+       vsel            $inout,$outhead,$inout,$outmask
+       stvx            $inout,0,$out
+
+       neg             $enc,$ivp               # write [unaligned] iv
+       li              $idx,15                 # 15 is not typo
+       vxor            $rndkey0,$rndkey0,$rndkey0
+       vspltisb        $outmask,-1
+       le?vspltisb     $tmp,0x0f
+       ?lvsl           $outperm,0,$enc
+       ?vperm          $outmask,$rndkey0,$outmask,$outperm
+       le?vxor         $outperm,$outperm,$tmp
+       lvx             $outhead,0,$ivp
+       vperm           $ivec,$ivec,$ivec,$outperm
+       vsel            $inout,$outhead,$ivec,$outmask
+       lvx             $inptail,$idx,$ivp
+       stvx            $inout,0,$ivp
+       vsel            $inout,$ivec,$inptail,$outmask
+       stvx            $inout,$idx,$ivp
+
+       mtspr           256,$vrsave
+       blr
+       .long           0
+       .byte           0,12,0x14,0,0,0,6,0
+       .long           0
+___
+#########################################################################
+{{     # Optimized CBC decrypt procedure                               #
+my $key_="r11";
+my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
+    $x00=0 if ($flavour =~ /osx/);
+my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
+my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
+my $rndkey0="v23";     # v24-v25 rotating buffer for first found keys
+                       # v26-v31 last 6 round keys
+my ($tmp,$keyperm)=($in3,$in4);        # aliases with "caller", redundant assignment
+
+$code.=<<___;
+.align 5
+_aesp8_cbc_decrypt8x:
+       $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
+       li              r10,`$FRAME+8*16+15`
+       li              r11,`$FRAME+8*16+31`
+       stvx            v20,r10,$sp             # ABI says so
+       addi            r10,r10,32
+       stvx            v21,r11,$sp
+       addi            r11,r11,32
+       stvx            v22,r10,$sp
+       addi            r10,r10,32
+       stvx            v23,r11,$sp
+       addi            r11,r11,32
+       stvx            v24,r10,$sp
+       addi            r10,r10,32
+       stvx            v25,r11,$sp
+       addi            r11,r11,32
+       stvx            v26,r10,$sp
+       addi            r10,r10,32
+       stvx            v27,r11,$sp
+       addi            r11,r11,32
+       stvx            v28,r10,$sp
+       addi            r10,r10,32
+       stvx            v29,r11,$sp
+       addi            r11,r11,32
+       stvx            v30,r10,$sp
+       stvx            v31,r11,$sp
+       li              r0,-1
+       stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
+       li              $x10,0x10
+       $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+       li              $x20,0x20
+       $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+       li              $x30,0x30
+       $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+       li              $x40,0x40
+       $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+       li              $x50,0x50
+       $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+       li              $x60,0x60
+       $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+       li              $x70,0x70
+       mtspr           256,r0
+
+       subi            $rounds,$rounds,3       # -4 in total
+       subi            $len,$len,128           # bias
+
+       lvx             $rndkey0,$x00,$key      # load key schedule
+       lvx             v30,$x10,$key
+       addi            $key,$key,0x20
+       lvx             v31,$x00,$key
+       ?vperm          $rndkey0,$rndkey0,v30,$keyperm
+       addi            $key_,$sp,$FRAME+15
+       mtctr           $rounds
+
+Load_cbc_dec_key:
+       ?vperm          v24,v30,v31,$keyperm
+       lvx             v30,$x10,$key
+       addi            $key,$key,0x20
+       stvx            v24,$x00,$key_          # off-load round[1]
+       ?vperm          v25,v31,v30,$keyperm
+       lvx             v31,$x00,$key
+       stvx            v25,$x10,$key_          # off-load round[2]
+       addi            $key_,$key_,0x20
+       bdnz            Load_cbc_dec_key
+
+       lvx             v26,$x10,$key
+       ?vperm          v24,v30,v31,$keyperm
+       lvx             v27,$x20,$key
+       stvx            v24,$x00,$key_          # off-load round[3]
+       ?vperm          v25,v31,v26,$keyperm
+       lvx             v28,$x30,$key
+       stvx            v25,$x10,$key_          # off-load round[4]
+       addi            $key_,$sp,$FRAME+15     # rewind $key_
+       ?vperm          v26,v26,v27,$keyperm
+       lvx             v29,$x40,$key
+       ?vperm          v27,v27,v28,$keyperm
+       lvx             v30,$x50,$key
+       ?vperm          v28,v28,v29,$keyperm
+       lvx             v31,$x60,$key
+       ?vperm          v29,v29,v30,$keyperm
+       lvx             $out0,$x70,$key         # borrow $out0
+       ?vperm          v30,v30,v31,$keyperm
+       lvx             v24,$x00,$key_          # pre-load round[1]
+       ?vperm          v31,v31,$out0,$keyperm
+       lvx             v25,$x10,$key_          # pre-load round[2]
+
+       #lvx            $inptail,0,$inp         # "caller" already did this
+       #addi           $inp,$inp,15            # 15 is not typo
+       subi            $inp,$inp,15            # undo "caller"
+
+        le?li          $idx,8
+       lvx_u           $in0,$x00,$inp          # load first 8 "words"
+        le?lvsl        $inpperm,0,$idx
+        le?vspltisb    $tmp,0x0f
+       lvx_u           $in1,$x10,$inp
+        le?vxor        $inpperm,$inpperm,$tmp  # transform for lvx_u/stvx_u
+       lvx_u           $in2,$x20,$inp
+        le?vperm       $in0,$in0,$in0,$inpperm
+       lvx_u           $in3,$x30,$inp
+        le?vperm       $in1,$in1,$in1,$inpperm
+       lvx_u           $in4,$x40,$inp
+        le?vperm       $in2,$in2,$in2,$inpperm
+       vxor            $out0,$in0,$rndkey0
+       lvx_u           $in5,$x50,$inp
+        le?vperm       $in3,$in3,$in3,$inpperm
+       vxor            $out1,$in1,$rndkey0
+       lvx_u           $in6,$x60,$inp
+        le?vperm       $in4,$in4,$in4,$inpperm
+       vxor            $out2,$in2,$rndkey0
+       lvx_u           $in7,$x70,$inp
+       addi            $inp,$inp,0x80
+        le?vperm       $in5,$in5,$in5,$inpperm
+       vxor            $out3,$in3,$rndkey0
+        le?vperm       $in6,$in6,$in6,$inpperm
+       vxor            $out4,$in4,$rndkey0
+        le?vperm       $in7,$in7,$in7,$inpperm
+       vxor            $out5,$in5,$rndkey0
+       vxor            $out6,$in6,$rndkey0
+       vxor            $out7,$in7,$rndkey0
+
+       mtctr           $rounds
+       b               Loop_cbc_dec8x
+.align 5
+Loop_cbc_dec8x:
+       vncipher        $out0,$out0,v24
+       vncipher        $out1,$out1,v24
+       vncipher        $out2,$out2,v24
+       vncipher        $out3,$out3,v24
+       vncipher        $out4,$out4,v24
+       vncipher        $out5,$out5,v24
+       vncipher        $out6,$out6,v24
+       vncipher        $out7,$out7,v24
+       lvx             v24,$x20,$key_          # round[3]
+       addi            $key_,$key_,0x20
+
+       vncipher        $out0,$out0,v25
+       vncipher        $out1,$out1,v25
+       vncipher        $out2,$out2,v25
+       vncipher        $out3,$out3,v25
+       vncipher        $out4,$out4,v25
+       vncipher        $out5,$out5,v25
+       vncipher        $out6,$out6,v25
+       vncipher        $out7,$out7,v25
+       lvx             v25,$x10,$key_          # round[4]
+       bdnz            Loop_cbc_dec8x
+
+       subic           $len,$len,128           # $len-=128
+       vncipher        $out0,$out0,v24
+       vncipher        $out1,$out1,v24
+       vncipher        $out2,$out2,v24
+       vncipher        $out3,$out3,v24
+       vncipher        $out4,$out4,v24
+       vncipher        $out5,$out5,v24
+       vncipher        $out6,$out6,v24
+       vncipher        $out7,$out7,v24
+
+       subfe.          r0,r0,r0                # borrow?-1:0
+       vncipher        $out0,$out0,v25
+       vncipher        $out1,$out1,v25
+       vncipher        $out2,$out2,v25
+       vncipher        $out3,$out3,v25
+       vncipher        $out4,$out4,v25
+       vncipher        $out5,$out5,v25
+       vncipher        $out6,$out6,v25
+       vncipher        $out7,$out7,v25
+
+       and             r0,r0,$len
+       vncipher        $out0,$out0,v26
+       vncipher        $out1,$out1,v26
+       vncipher        $out2,$out2,v26
+       vncipher        $out3,$out3,v26
+       vncipher        $out4,$out4,v26
+       vncipher        $out5,$out5,v26
+       vncipher        $out6,$out6,v26
+       vncipher        $out7,$out7,v26
+
+       add             $inp,$inp,r0            # $inp is adjusted in such
+                                               # way that at exit from the
+                                               # loop inX-in7 are loaded
+                                               # with last "words"
+       vncipher        $out0,$out0,v27
+       vncipher        $out1,$out1,v27
+       vncipher        $out2,$out2,v27
+       vncipher        $out3,$out3,v27
+       vncipher        $out4,$out4,v27
+       vncipher        $out5,$out5,v27
+       vncipher        $out6,$out6,v27
+       vncipher        $out7,$out7,v27
+
+       addi            $key_,$sp,$FRAME+15     # rewind $key_
+       vncipher        $out0,$out0,v28
+       vncipher        $out1,$out1,v28
+       vncipher        $out2,$out2,v28
+       vncipher        $out3,$out3,v28
+       vncipher        $out4,$out4,v28
+       vncipher        $out5,$out5,v28
+       vncipher        $out6,$out6,v28
+       vncipher        $out7,$out7,v28
+       lvx             v24,$x00,$key_          # re-pre-load round[1]
+
+       vncipher        $out0,$out0,v29
+       vncipher        $out1,$out1,v29
+       vncipher        $out2,$out2,v29
+       vncipher        $out3,$out3,v29
+       vncipher        $out4,$out4,v29
+       vncipher        $out5,$out5,v29
+       vncipher        $out6,$out6,v29
+       vncipher        $out7,$out7,v29
+       lvx             v25,$x10,$key_          # re-pre-load round[2]
+
+       vncipher        $out0,$out0,v30
+        vxor           $ivec,$ivec,v31         # xor with last round key
+       vncipher        $out1,$out1,v30
+        vxor           $in0,$in0,v31
+       vncipher        $out2,$out2,v30
+        vxor           $in1,$in1,v31
+       vncipher        $out3,$out3,v30
+        vxor           $in2,$in2,v31
+       vncipher        $out4,$out4,v30
+        vxor           $in3,$in3,v31
+       vncipher        $out5,$out5,v30
+        vxor           $in4,$in4,v31
+       vncipher        $out6,$out6,v30
+        vxor           $in5,$in5,v31
+       vncipher        $out7,$out7,v30
+        vxor           $in6,$in6,v31
+
+       vncipherlast    $out0,$out0,$ivec
+       vncipherlast    $out1,$out1,$in0
+        lvx_u          $in0,$x00,$inp          # load next input block
+       vncipherlast    $out2,$out2,$in1
+        lvx_u          $in1,$x10,$inp
+       vncipherlast    $out3,$out3,$in2
+        le?vperm       $in0,$in0,$in0,$inpperm
+        lvx_u          $in2,$x20,$inp
+       vncipherlast    $out4,$out4,$in3
+        le?vperm       $in1,$in1,$in1,$inpperm
+        lvx_u          $in3,$x30,$inp
+       vncipherlast    $out5,$out5,$in4
+        le?vperm       $in2,$in2,$in2,$inpperm
+        lvx_u          $in4,$x40,$inp
+       vncipherlast    $out6,$out6,$in5
+        le?vperm       $in3,$in3,$in3,$inpperm
+        lvx_u          $in5,$x50,$inp
+       vncipherlast    $out7,$out7,$in6
+        le?vperm       $in4,$in4,$in4,$inpperm
+        lvx_u          $in6,$x60,$inp
+       vmr             $ivec,$in7
+        le?vperm       $in5,$in5,$in5,$inpperm
+        lvx_u          $in7,$x70,$inp
+        addi           $inp,$inp,0x80
+
+       le?vperm        $out0,$out0,$out0,$inpperm
+       le?vperm        $out1,$out1,$out1,$inpperm
+       stvx_u          $out0,$x00,$out
+        le?vperm       $in6,$in6,$in6,$inpperm
+        vxor           $out0,$in0,$rndkey0
+       le?vperm        $out2,$out2,$out2,$inpperm
+       stvx_u          $out1,$x10,$out
+        le?vperm       $in7,$in7,$in7,$inpperm
+        vxor           $out1,$in1,$rndkey0
+       le?vperm        $out3,$out3,$out3,$inpperm
+       stvx_u          $out2,$x20,$out
+        vxor           $out2,$in2,$rndkey0
+       le?vperm        $out4,$out4,$out4,$inpperm
+       stvx_u          $out3,$x30,$out
+        vxor           $out3,$in3,$rndkey0
+       le?vperm        $out5,$out5,$out5,$inpperm
+       stvx_u          $out4,$x40,$out
+        vxor           $out4,$in4,$rndkey0
+       le?vperm        $out6,$out6,$out6,$inpperm
+       stvx_u          $out5,$x50,$out
+        vxor           $out5,$in5,$rndkey0
+       le?vperm        $out7,$out7,$out7,$inpperm
+       stvx_u          $out6,$x60,$out
+        vxor           $out6,$in6,$rndkey0
+       stvx_u          $out7,$x70,$out
+       addi            $out,$out,0x80
+        vxor           $out7,$in7,$rndkey0
+
+       mtctr           $rounds
+       beq             Loop_cbc_dec8x          # did $len-=128 borrow?
+
+       addic.          $len,$len,128
+       beq             Lcbc_dec8x_done
+       nop
+       nop
+
+Loop_cbc_dec8x_tail:                           # up to 7 "words" tail...
+       vncipher        $out1,$out1,v24
+       vncipher        $out2,$out2,v24
+       vncipher        $out3,$out3,v24
+       vncipher        $out4,$out4,v24
+       vncipher        $out5,$out5,v24
+       vncipher        $out6,$out6,v24
+       vncipher        $out7,$out7,v24
+       lvx             v24,$x20,$key_          # round[3]
+       addi            $key_,$key_,0x20
+
+       vncipher        $out1,$out1,v25
+       vncipher        $out2,$out2,v25
+       vncipher        $out3,$out3,v25
+       vncipher        $out4,$out4,v25
+       vncipher        $out5,$out5,v25
+       vncipher        $out6,$out6,v25
+       vncipher        $out7,$out7,v25
+       lvx             v25,$x10,$key_          # round[4]
+       bdnz            Loop_cbc_dec8x_tail
+
+       vncipher        $out1,$out1,v24
+       vncipher        $out2,$out2,v24
+       vncipher        $out3,$out3,v24
+       vncipher        $out4,$out4,v24
+       vncipher        $out5,$out5,v24
+       vncipher        $out6,$out6,v24
+       vncipher        $out7,$out7,v24
+
+       vncipher        $out1,$out1,v25
+       vncipher        $out2,$out2,v25
+       vncipher        $out3,$out3,v25
+       vncipher        $out4,$out4,v25
+       vncipher        $out5,$out5,v25
+       vncipher        $out6,$out6,v25
+       vncipher        $out7,$out7,v25
+
+       vncipher        $out1,$out1,v26
+       vncipher        $out2,$out2,v26
+       vncipher        $out3,$out3,v26
+       vncipher        $out4,$out4,v26
+       vncipher        $out5,$out5,v26
+       vncipher        $out6,$out6,v26
+       vncipher        $out7,$out7,v26
+
+       vncipher        $out1,$out1,v27
+       vncipher        $out2,$out2,v27
+       vncipher        $out3,$out3,v27
+       vncipher        $out4,$out4,v27
+       vncipher        $out5,$out5,v27
+       vncipher        $out6,$out6,v27
+       vncipher        $out7,$out7,v27
+
+       vncipher        $out1,$out1,v28
+       vncipher        $out2,$out2,v28
+       vncipher        $out3,$out3,v28
+       vncipher        $out4,$out4,v28
+       vncipher        $out5,$out5,v28
+       vncipher        $out6,$out6,v28
+       vncipher        $out7,$out7,v28
+
+       vncipher        $out1,$out1,v29
+       vncipher        $out2,$out2,v29
+       vncipher        $out3,$out3,v29
+       vncipher        $out4,$out4,v29
+       vncipher        $out5,$out5,v29
+       vncipher        $out6,$out6,v29
+       vncipher        $out7,$out7,v29
+
+       vncipher        $out1,$out1,v30
+        vxor           $ivec,$ivec,v31         # last round key
+       vncipher        $out2,$out2,v30
+        vxor           $in1,$in1,v31
+       vncipher        $out3,$out3,v30
+        vxor           $in2,$in2,v31
+       vncipher        $out4,$out4,v30
+        vxor           $in3,$in3,v31
+       vncipher        $out5,$out5,v30
+        vxor           $in4,$in4,v31
+       vncipher        $out6,$out6,v30
+        vxor           $in5,$in5,v31
+       vncipher        $out7,$out7,v30
+        vxor           $in6,$in6,v31
+
+       cmplwi          $len,32                 # switch($len)
+       blt             Lcbc_dec8x_one
+       nop
+       beq             Lcbc_dec8x_two
+       cmplwi          $len,64
+       blt             Lcbc_dec8x_three
+       nop
+       beq             Lcbc_dec8x_four
+       cmplwi          $len,96
+       blt             Lcbc_dec8x_five
+       nop
+       beq             Lcbc_dec8x_six
+
+Lcbc_dec8x_seven:
+       vncipherlast    $out1,$out1,$ivec
+       vncipherlast    $out2,$out2,$in1
+       vncipherlast    $out3,$out3,$in2
+       vncipherlast    $out4,$out4,$in3
+       vncipherlast    $out5,$out5,$in4
+       vncipherlast    $out6,$out6,$in5
+       vncipherlast    $out7,$out7,$in6
+       vmr             $ivec,$in7
+
+       le?vperm        $out1,$out1,$out1,$inpperm
+       le?vperm        $out2,$out2,$out2,$inpperm
+       stvx_u          $out1,$x00,$out
+       le?vperm        $out3,$out3,$out3,$inpperm
+       stvx_u          $out2,$x10,$out
+       le?vperm        $out4,$out4,$out4,$inpperm
+       stvx_u          $out3,$x20,$out
+       le?vperm        $out5,$out5,$out5,$inpperm
+       stvx_u          $out4,$x30,$out
+       le?vperm        $out6,$out6,$out6,$inpperm
+       stvx_u          $out5,$x40,$out
+       le?vperm        $out7,$out7,$out7,$inpperm
+       stvx_u          $out6,$x50,$out
+       stvx_u          $out7,$x60,$out
+       addi            $out,$out,0x70
+       b               Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_six:
+       vncipherlast    $out2,$out2,$ivec
+       vncipherlast    $out3,$out3,$in2
+       vncipherlast    $out4,$out4,$in3
+       vncipherlast    $out5,$out5,$in4
+       vncipherlast    $out6,$out6,$in5
+       vncipherlast    $out7,$out7,$in6
+       vmr             $ivec,$in7
+
+       le?vperm        $out2,$out2,$out2,$inpperm
+       le?vperm        $out3,$out3,$out3,$inpperm
+       stvx_u          $out2,$x00,$out
+       le?vperm        $out4,$out4,$out4,$inpperm
+       stvx_u          $out3,$x10,$out
+       le?vperm        $out5,$out5,$out5,$inpperm
+       stvx_u          $out4,$x20,$out
+       le?vperm        $out6,$out6,$out6,$inpperm
+       stvx_u          $out5,$x30,$out
+       le?vperm        $out7,$out7,$out7,$inpperm
+       stvx_u          $out6,$x40,$out
+       stvx_u          $out7,$x50,$out
+       addi            $out,$out,0x60
+       b               Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_five:
+       vncipherlast    $out3,$out3,$ivec
+       vncipherlast    $out4,$out4,$in3
+       vncipherlast    $out5,$out5,$in4
+       vncipherlast    $out6,$out6,$in5
+       vncipherlast    $out7,$out7,$in6
+       vmr             $ivec,$in7
+
+       le?vperm        $out3,$out3,$out3,$inpperm
+       le?vperm        $out4,$out4,$out4,$inpperm
+       stvx_u          $out3,$x00,$out
+       le?vperm        $out5,$out5,$out5,$inpperm
+       stvx_u          $out4,$x10,$out
+       le?vperm        $out6,$out6,$out6,$inpperm
+       stvx_u          $out5,$x20,$out
+       le?vperm        $out7,$out7,$out7,$inpperm
+       stvx_u          $out6,$x30,$out
+       stvx_u          $out7,$x40,$out
+       addi            $out,$out,0x50
+       b               Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_four:
+       vncipherlast    $out4,$out4,$ivec
+       vncipherlast    $out5,$out5,$in4
+       vncipherlast    $out6,$out6,$in5
+       vncipherlast    $out7,$out7,$in6
+       vmr             $ivec,$in7
+
+       le?vperm        $out4,$out4,$out4,$inpperm
+       le?vperm        $out5,$out5,$out5,$inpperm
+       stvx_u          $out4,$x00,$out
+       le?vperm        $out6,$out6,$out6,$inpperm
+       stvx_u          $out5,$x10,$out
+       le?vperm        $out7,$out7,$out7,$inpperm
+       stvx_u          $out6,$x20,$out
+       stvx_u          $out7,$x30,$out
+       addi            $out,$out,0x40
+       b               Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_three:
+       vncipherlast    $out5,$out5,$ivec
+       vncipherlast    $out6,$out6,$in5
+       vncipherlast    $out7,$out7,$in6
+       vmr             $ivec,$in7
+
+       le?vperm        $out5,$out5,$out5,$inpperm
+       le?vperm        $out6,$out6,$out6,$inpperm
+       stvx_u          $out5,$x00,$out
+       le?vperm        $out7,$out7,$out7,$inpperm
+       stvx_u          $out6,$x10,$out
+       stvx_u          $out7,$x20,$out
+       addi            $out,$out,0x30
+       b               Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_two:
+       vncipherlast    $out6,$out6,$ivec
+       vncipherlast    $out7,$out7,$in6
+       vmr             $ivec,$in7
+
+       le?vperm        $out6,$out6,$out6,$inpperm
+       le?vperm        $out7,$out7,$out7,$inpperm
+       stvx_u          $out6,$x00,$out
+       stvx_u          $out7,$x10,$out
+       addi            $out,$out,0x20
+       b               Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_one:
+       vncipherlast    $out7,$out7,$ivec
+       vmr             $ivec,$in7
+
+       le?vperm        $out7,$out7,$out7,$inpperm
+       stvx_u          $out7,0,$out
+       addi            $out,$out,0x10
+
+Lcbc_dec8x_done:
+       le?vperm        $ivec,$ivec,$ivec,$inpperm
+       stvx_u          $ivec,0,$ivp            # write [unaligned] iv
+
+       li              r10,`$FRAME+15`
+       li              r11,`$FRAME+31`
+       stvx            $inpperm,r10,$sp        # wipe copies of round keys
+       addi            r10,r10,32
+       stvx            $inpperm,r11,$sp
+       addi            r11,r11,32
+       stvx            $inpperm,r10,$sp
+       addi            r10,r10,32
+       stvx            $inpperm,r11,$sp
+       addi            r11,r11,32
+       stvx            $inpperm,r10,$sp
+       addi            r10,r10,32
+       stvx            $inpperm,r11,$sp
+       addi            r11,r11,32
+       stvx            $inpperm,r10,$sp
+       addi            r10,r10,32
+       stvx            $inpperm,r11,$sp
+       addi            r11,r11,32
+
+       mtspr           256,$vrsave
+       lvx             v20,r10,$sp             # ABI says so
+       addi            r10,r10,32
+       lvx             v21,r11,$sp
+       addi            r11,r11,32
+       lvx             v22,r10,$sp
+       addi            r10,r10,32
+       lvx             v23,r11,$sp
+       addi            r11,r11,32
+       lvx             v24,r10,$sp
+       addi            r10,r10,32
+       lvx             v25,r11,$sp
+       addi            r11,r11,32
+       lvx             v26,r10,$sp
+       addi            r10,r10,32
+       lvx             v27,r11,$sp
+       addi            r11,r11,32
+       lvx             v28,r10,$sp
+       addi            r10,r10,32
+       lvx             v29,r11,$sp
+       addi            r11,r11,32
+       lvx             v30,r10,$sp
+       lvx             v31,r11,$sp
+       $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+       $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+       $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+       $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+       $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+       $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+       addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
+       blr
+       .long           0
+       .byte           0,12,0x04,0,0x80,6,6,0
+       .long           0
+.size  .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
+___
+}}     }}}
+
+#########################################################################
+{{{    # CTR procedure[s]                                              #
+my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
+my ($rndkey0,$rndkey1,$inout,$tmp)=            map("v$_",(0..3));
+my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
+                                               map("v$_",(4..11));
+my $dat=$tmp;
+
+$code.=<<___;
+.globl .${prefix}_ctr32_encrypt_blocks
+.align 5
+.${prefix}_ctr32_encrypt_blocks:
+       ${UCMP}i        $len,1
+       bltlr-
+
+       lis             r0,0xfff0
+       mfspr           $vrsave,256
+       mtspr           256,r0
+
+       li              $idx,15
+       vxor            $rndkey0,$rndkey0,$rndkey0
+       le?vspltisb     $tmp,0x0f
+
+       lvx             $ivec,0,$ivp            # load [unaligned] iv
+       lvsl            $inpperm,0,$ivp
+       lvx             $inptail,$idx,$ivp
+        vspltisb       $one,1
+       le?vxor         $inpperm,$inpperm,$tmp
+       vperm           $ivec,$ivec,$inptail,$inpperm
+        vsldoi         $one,$rndkey0,$one,1
+
+       neg             r11,$inp
+       ?lvsl           $keyperm,0,$key         # prepare for unaligned key
+       lwz             $rounds,240($key)
+
+       lvsr            $inpperm,0,r11          # prepare for unaligned load
+       lvx             $inptail,0,$inp
+       addi            $inp,$inp,15            # 15 is not typo
+       le?vxor         $inpperm,$inpperm,$tmp
+
+       srwi            $rounds,$rounds,1
+       li              $idx,16
+       subi            $rounds,$rounds,1
+
+       ${UCMP}i        $len,8
+       bge             _aesp8_ctr32_encrypt8x
+
+       ?lvsr           $outperm,0,$out         # prepare for unaligned store
+       vspltisb        $outmask,-1
+       lvx             $outhead,0,$out
+       ?vperm          $outmask,$rndkey0,$outmask,$outperm
+       le?vxor         $outperm,$outperm,$tmp
+
+       lvx             $rndkey0,0,$key
+       mtctr           $rounds
+       lvx             $rndkey1,$idx,$key
+       addi            $idx,$idx,16
+       ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
+       vxor            $inout,$ivec,$rndkey0
+       lvx             $rndkey0,$idx,$key
+       addi            $idx,$idx,16
+       b               Loop_ctr32_enc
+
+.align 5
+Loop_ctr32_enc:
+       ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
+       vcipher         $inout,$inout,$rndkey1
+       lvx             $rndkey1,$idx,$key
+       addi            $idx,$idx,16
+       ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
+       vcipher         $inout,$inout,$rndkey0
+       lvx             $rndkey0,$idx,$key
+       addi            $idx,$idx,16
+       bdnz            Loop_ctr32_enc
+
+       vadduwm         $ivec,$ivec,$one
+        vmr            $dat,$inptail
+        lvx            $inptail,0,$inp
+        addi           $inp,$inp,16
+        subic.         $len,$len,1             # blocks--
+
+       ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
+       vcipher         $inout,$inout,$rndkey1
+       lvx             $rndkey1,$idx,$key
+        vperm          $dat,$dat,$inptail,$inpperm
+        li             $idx,16
+       ?vperm          $rndkey1,$rndkey0,$rndkey1,$keyperm
+        lvx            $rndkey0,0,$key
+       vxor            $dat,$dat,$rndkey1      # last round key
+       vcipherlast     $inout,$inout,$dat
+
+        lvx            $rndkey1,$idx,$key
+        addi           $idx,$idx,16
+       vperm           $inout,$inout,$inout,$outperm
+       vsel            $dat,$outhead,$inout,$outmask
+        mtctr          $rounds
+        ?vperm         $rndkey0,$rndkey0,$rndkey1,$keyperm
+       vmr             $outhead,$inout
+        vxor           $inout,$ivec,$rndkey0
+        lvx            $rndkey0,$idx,$key
+        addi           $idx,$idx,16
+       stvx            $dat,0,$out
+       addi            $out,$out,16
+       bne             Loop_ctr32_enc
+
+       addi            $out,$out,-1
+       lvx             $inout,0,$out           # redundant in aligned case
+       vsel            $inout,$outhead,$inout,$outmask
+       stvx            $inout,0,$out
+
+       mtspr           256,$vrsave
+       blr
+       .long           0
+       .byte           0,12,0x14,0,0,0,6,0
+       .long           0
+___
+#########################################################################
+{{     # Optimized CTR procedure                                       #
+my $key_="r11";
+my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
+    $x00=0 if ($flavour =~ /osx/);
+my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
+my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
+my $rndkey0="v23";     # v24-v25 rotating buffer for first found keys
+                       # v26-v31 last 6 round keys
+my ($tmp,$keyperm)=($in3,$in4);        # aliases with "caller", redundant assignment
+my ($two,$three,$four)=($outhead,$outperm,$outmask);
+
+$code.=<<___;
+.align 5
+_aesp8_ctr32_encrypt8x:
+       $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
+       li              r10,`$FRAME+8*16+15`
+       li              r11,`$FRAME+8*16+31`
+       stvx            v20,r10,$sp             # ABI says so
+       addi            r10,r10,32
+       stvx            v21,r11,$sp
+       addi            r11,r11,32
+       stvx            v22,r10,$sp
+       addi            r10,r10,32
+       stvx            v23,r11,$sp
+       addi            r11,r11,32
+       stvx            v24,r10,$sp
+       addi            r10,r10,32
+       stvx            v25,r11,$sp
+       addi            r11,r11,32
+       stvx            v26,r10,$sp
+       addi            r10,r10,32
+       stvx            v27,r11,$sp
+       addi            r11,r11,32
+       stvx            v28,r10,$sp
+       addi            r10,r10,32
+       stvx            v29,r11,$sp
+       addi            r11,r11,32
+       stvx            v30,r10,$sp
+       stvx            v31,r11,$sp
+       li              r0,-1
+       stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
+       li              $x10,0x10
+       $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+       li              $x20,0x20
+       $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+       li              $x30,0x30
+       $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+       li              $x40,0x40
+       $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+       li              $x50,0x50
+       $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+       li              $x60,0x60
+       $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+       li              $x70,0x70
+       mtspr           256,r0
+
+       subi            $rounds,$rounds,3       # -4 in total
+
+       lvx             $rndkey0,$x00,$key      # load key schedule
+       lvx             v30,$x10,$key
+       addi            $key,$key,0x20
+       lvx             v31,$x00,$key
+       ?vperm          $rndkey0,$rndkey0,v30,$keyperm
+       addi            $key_,$sp,$FRAME+15
+       mtctr           $rounds
+
+Load_ctr32_enc_key:
+       ?vperm          v24,v30,v31,$keyperm
+       lvx             v30,$x10,$key
+       addi            $key,$key,0x20
+       stvx            v24,$x00,$key_          # off-load round[1]
+       ?vperm          v25,v31,v30,$keyperm
+       lvx             v31,$x00,$key
+       stvx            v25,$x10,$key_          # off-load round[2]
+       addi            $key_,$key_,0x20
+       bdnz            Load_ctr32_enc_key
+
+       lvx             v26,$x10,$key
+       ?vperm          v24,v30,v31,$keyperm
+       lvx             v27,$x20,$key
+       stvx            v24,$x00,$key_          # off-load round[3]
+       ?vperm          v25,v31,v26,$keyperm
+       lvx             v28,$x30,$key
+       stvx            v25,$x10,$key_          # off-load round[4]
+       addi            $key_,$sp,$FRAME+15     # rewind $key_
+       ?vperm          v26,v26,v27,$keyperm
+       lvx             v29,$x40,$key
+       ?vperm          v27,v27,v28,$keyperm
+       lvx             v30,$x50,$key
+       ?vperm          v28,v28,v29,$keyperm
+       lvx             v31,$x60,$key
+       ?vperm          v29,v29,v30,$keyperm
+       lvx             $out0,$x70,$key         # borrow $out0
+       ?vperm          v30,v30,v31,$keyperm
+       lvx             v24,$x00,$key_          # pre-load round[1]
+       ?vperm          v31,v31,$out0,$keyperm
+       lvx             v25,$x10,$key_          # pre-load round[2]
+
+       vadduwm         $two,$one,$one
+       subi            $inp,$inp,15            # undo "caller"
+       $SHL            $len,$len,4
+
+       vadduwm         $out1,$ivec,$one        # counter values ...
+       vadduwm         $out2,$ivec,$two
+       vxor            $out0,$ivec,$rndkey0    # ... xored with rndkey[0]
+        le?li          $idx,8
+       vadduwm         $out3,$out1,$two
+       vxor            $out1,$out1,$rndkey0
+        le?lvsl        $inpperm,0,$idx
+       vadduwm         $out4,$out2,$two
+       vxor            $out2,$out2,$rndkey0
+        le?vspltisb    $tmp,0x0f
+       vadduwm         $out5,$out3,$two
+       vxor            $out3,$out3,$rndkey0
+        le?vxor        $inpperm,$inpperm,$tmp  # transform for lvx_u/stvx_u
+       vadduwm         $out6,$out4,$two
+       vxor            $out4,$out4,$rndkey0
+       vadduwm         $out7,$out5,$two
+       vxor            $out5,$out5,$rndkey0
+       vadduwm         $ivec,$out6,$two        # next counter value
+       vxor            $out6,$out6,$rndkey0
+       vxor            $out7,$out7,$rndkey0
+
+       mtctr           $rounds
+       b               Loop_ctr32_enc8x
+.align 5
+Loop_ctr32_enc8x:
+       vcipher         $out0,$out0,v24
+       vcipher         $out1,$out1,v24
+       vcipher         $out2,$out2,v24
+       vcipher         $out3,$out3,v24
+       vcipher         $out4,$out4,v24
+       vcipher         $out5,$out5,v24
+       vcipher         $out6,$out6,v24
+       vcipher         $out7,$out7,v24
+Loop_ctr32_enc8x_middle:
+       lvx             v24,$x20,$key_          # round[3]
+       addi            $key_,$key_,0x20
+
+       vcipher         $out0,$out0,v25
+       vcipher         $out1,$out1,v25
+       vcipher         $out2,$out2,v25
+       vcipher         $out3,$out3,v25
+       vcipher         $out4,$out4,v25
+       vcipher         $out5,$out5,v25
+       vcipher         $out6,$out6,v25
+       vcipher         $out7,$out7,v25
+       lvx             v25,$x10,$key_          # round[4]
+       bdnz            Loop_ctr32_enc8x
+
+       subic           r11,$len,256            # $len-256, borrow $key_
+       vcipher         $out0,$out0,v24
+       vcipher         $out1,$out1,v24
+       vcipher         $out2,$out2,v24
+       vcipher         $out3,$out3,v24
+       vcipher         $out4,$out4,v24
+       vcipher         $out5,$out5,v24
+       vcipher         $out6,$out6,v24
+       vcipher         $out7,$out7,v24
+
+       subfe           r0,r0,r0                # borrow?-1:0
+       vcipher         $out0,$out0,v25
+       vcipher         $out1,$out1,v25
+       vcipher         $out2,$out2,v25
+       vcipher         $out3,$out3,v25
+       vcipher         $out4,$out4,v25
+       vcipher         $out5,$out5,v25
+       vcipher         $out6,$out6,v25
+       vcipher         $out7,$out7,v25
+
+       and             r0,r0,r11
+       addi            $key_,$sp,$FRAME+15     # rewind $key_
+       vcipher         $out0,$out0,v26
+       vcipher         $out1,$out1,v26
+       vcipher         $out2,$out2,v26
+       vcipher         $out3,$out3,v26
+       vcipher         $out4,$out4,v26
+       vcipher         $out5,$out5,v26
+       vcipher         $out6,$out6,v26
+       vcipher         $out7,$out7,v26
+       lvx             v24,$x00,$key_          # re-pre-load round[1]
+
+       subic           $len,$len,129           # $len-=129
+       vcipher         $out0,$out0,v27
+       addi            $len,$len,1             # $len-=128 really
+       vcipher         $out1,$out1,v27
+       vcipher         $out2,$out2,v27
+       vcipher         $out3,$out3,v27
+       vcipher         $out4,$out4,v27
+       vcipher         $out5,$out5,v27
+       vcipher         $out6,$out6,v27
+       vcipher         $out7,$out7,v27
+       lvx             v25,$x10,$key_          # re-pre-load round[2]
+
+       vcipher         $out0,$out0,v28
+        lvx_u          $in0,$x00,$inp          # load input
+       vcipher         $out1,$out1,v28
+        lvx_u          $in1,$x10,$inp
+       vcipher         $out2,$out2,v28
+        lvx_u          $in2,$x20,$inp
+       vcipher         $out3,$out3,v28
+        lvx_u          $in3,$x30,$inp
+       vcipher         $out4,$out4,v28
+        lvx_u          $in4,$x40,$inp
+       vcipher         $out5,$out5,v28
+        lvx_u          $in5,$x50,$inp
+       vcipher         $out6,$out6,v28
+        lvx_u          $in6,$x60,$inp
+       vcipher         $out7,$out7,v28
+        lvx_u          $in7,$x70,$inp
+        addi           $inp,$inp,0x80
+
+       vcipher         $out0,$out0,v29
+        le?vperm       $in0,$in0,$in0,$inpperm
+       vcipher         $out1,$out1,v29
+        le?vperm       $in1,$in1,$in1,$inpperm
+       vcipher         $out2,$out2,v29
+        le?vperm       $in2,$in2,$in2,$inpperm
+       vcipher         $out3,$out3,v29
+        le?vperm       $in3,$in3,$in3,$inpperm
+       vcipher         $out4,$out4,v29
+        le?vperm       $in4,$in4,$in4,$inpperm
+       vcipher         $out5,$out5,v29
+        le?vperm       $in5,$in5,$in5,$inpperm
+       vcipher         $out6,$out6,v29
+        le?vperm       $in6,$in6,$in6,$inpperm
+       vcipher         $out7,$out7,v29
+        le?vperm       $in7,$in7,$in7,$inpperm
+
+       add             $inp,$inp,r0            # $inp is adjusted in such
+                                               # way that at exit from the
+                                               # loop inX-in7 are loaded
+                                               # with last "words"
+       subfe.          r0,r0,r0                # borrow?-1:0
+       vcipher         $out0,$out0,v30
+        vxor           $in0,$in0,v31           # xor with last round key
+       vcipher         $out1,$out1,v30
+        vxor           $in1,$in1,v31
+       vcipher         $out2,$out2,v30
+        vxor           $in2,$in2,v31
+       vcipher         $out3,$out3,v30
+        vxor           $in3,$in3,v31
+       vcipher         $out4,$out4,v30
+        vxor           $in4,$in4,v31
+       vcipher         $out5,$out5,v30
+        vxor           $in5,$in5,v31
+       vcipher         $out6,$out6,v30
+        vxor           $in6,$in6,v31
+       vcipher         $out7,$out7,v30
+        vxor           $in7,$in7,v31
+
+       bne             Lctr32_enc8x_break      # did $len-129 borrow?
+
+       vcipherlast     $in0,$out0,$in0
+       vcipherlast     $in1,$out1,$in1
+        vadduwm        $out1,$ivec,$one        # counter values ...
+       vcipherlast     $in2,$out2,$in2
+        vadduwm        $out2,$ivec,$two
+        vxor           $out0,$ivec,$rndkey0    # ... xored with rndkey[0]
+       vcipherlast     $in3,$out3,$in3
+        vadduwm        $out3,$out1,$two
+        vxor           $out1,$out1,$rndkey0
+       vcipherlast     $in4,$out4,$in4
+        vadduwm        $out4,$out2,$two
+        vxor           $out2,$out2,$rndkey0
+       vcipherlast     $in5,$out5,$in5
+        vadduwm        $out5,$out3,$two
+        vxor           $out3,$out3,$rndkey0
+       vcipherlast     $in6,$out6,$in6
+        vadduwm        $out6,$out4,$two
+        vxor           $out4,$out4,$rndkey0
+       vcipherlast     $in7,$out7,$in7
+        vadduwm        $out7,$out5,$two
+        vxor           $out5,$out5,$rndkey0
+       le?vperm        $in0,$in0,$in0,$inpperm
+        vadduwm        $ivec,$out6,$two        # next counter value
+        vxor           $out6,$out6,$rndkey0
+       le?vperm        $in1,$in1,$in1,$inpperm
+        vxor           $out7,$out7,$rndkey0
+       mtctr           $rounds
+
+        vcipher        $out0,$out0,v24
+       stvx_u          $in0,$x00,$out
+       le?vperm        $in2,$in2,$in2,$inpperm
+        vcipher        $out1,$out1,v24
+       stvx_u          $in1,$x10,$out
+       le?vperm        $in3,$in3,$in3,$inpperm
+        vcipher        $out2,$out2,v24
+       stvx_u          $in2,$x20,$out
+       le?vperm        $in4,$in4,$in4,$inpperm
+        vcipher        $out3,$out3,v24
+       stvx_u          $in3,$x30,$out
+       le?vperm        $in5,$in5,$in5,$inpperm
+        vcipher        $out4,$out4,v24
+       stvx_u          $in4,$x40,$out
+       le?vperm        $in6,$in6,$in6,$inpperm
+        vcipher        $out5,$out5,v24
+       stvx_u          $in5,$x50,$out
+       le?vperm        $in7,$in7,$in7,$inpperm
+        vcipher        $out6,$out6,v24
+       stvx_u          $in6,$x60,$out
+        vcipher        $out7,$out7,v24
+       stvx_u          $in7,$x70,$out
+       addi            $out,$out,0x80
+
+       b               Loop_ctr32_enc8x_middle
+
+.align 5
+Lctr32_enc8x_break:
+       cmpwi           $len,-0x60
+       blt             Lctr32_enc8x_one
+       nop
+       beq             Lctr32_enc8x_two
+       cmpwi           $len,-0x40
+       blt             Lctr32_enc8x_three
+       nop
+       beq             Lctr32_enc8x_four
+       cmpwi           $len,-0x20
+       blt             Lctr32_enc8x_five
+       nop
+       beq             Lctr32_enc8x_six
+       cmpwi           $len,0x00
+       blt             Lctr32_enc8x_seven
+
+Lctr32_enc8x_eight:
+       vcipherlast     $out0,$out0,$in0
+       vcipherlast     $out1,$out1,$in1
+       vcipherlast     $out2,$out2,$in2
+       vcipherlast     $out3,$out3,$in3
+       vcipherlast     $out4,$out4,$in4
+       vcipherlast     $out5,$out5,$in5
+       vcipherlast     $out6,$out6,$in6
+       vcipherlast     $out7,$out7,$in7
+
+       le?vperm        $out0,$out0,$out0,$inpperm
+       le?vperm        $out1,$out1,$out1,$inpperm
+       stvx_u          $out0,$x00,$out
+       le?vperm        $out2,$out2,$out2,$inpperm
+       stvx_u          $out1,$x10,$out
+       le?vperm        $out3,$out3,$out3,$inpperm
+       stvx_u          $out2,$x20,$out
+       le?vperm        $out4,$out4,$out4,$inpperm
+       stvx_u          $out3,$x30,$out
+       le?vperm        $out5,$out5,$out5,$inpperm
+       stvx_u          $out4,$x40,$out
+       le?vperm        $out6,$out6,$out6,$inpperm
+       stvx_u          $out5,$x50,$out
+       le?vperm        $out7,$out7,$out7,$inpperm
+       stvx_u          $out6,$x60,$out
+       stvx_u          $out7,$x70,$out
+       addi            $out,$out,0x80
+       b               Lctr32_enc8x_done
+
+.align 5
+Lctr32_enc8x_seven:
+       vcipherlast     $out0,$out0,$in1
+       vcipherlast     $out1,$out1,$in2
+       vcipherlast     $out2,$out2,$in3
+       vcipherlast     $out3,$out3,$in4
+       vcipherlast     $out4,$out4,$in5
+       vcipherlast     $out5,$out5,$in6
+       vcipherlast     $out6,$out6,$in7
+
+       le?vperm        $out0,$out0,$out0,$inpperm
+       le?vperm        $out1,$out1,$out1,$inpperm
+       stvx_u          $out0,$x00,$out
+       le?vperm        $out2,$out2,$out2,$inpperm
+       stvx_u          $out1,$x10,$out
+       le?vperm        $out3,$out3,$out3,$inpperm
+       stvx_u          $out2,$x20,$out
+       le?vperm        $out4,$out4,$out4,$inpperm
+       stvx_u          $out3,$x30,$out
+       le?vperm        $out5,$out5,$out5,$inpperm
+       stvx_u          $out4,$x40,$out
+       le?vperm        $out6,$out6,$out6,$inpperm
+       stvx_u          $out5,$x50,$out
+       stvx_u          $out6,$x60,$out
+       addi            $out,$out,0x70
+       b               Lctr32_enc8x_done
+
+.align 5
+Lctr32_enc8x_six:
+       vcipherlast     $out0,$out0,$in2
+       vcipherlast     $out1,$out1,$in3
+       vcipherlast     $out2,$out2,$in4
+       vcipherlast     $out3,$out3,$in5
+       vcipherlast     $out4,$out4,$in6
+       vcipherlast     $out5,$out5,$in7
+
+       le?vperm        $out0,$out0,$out0,$inpperm
+       le?vperm        $out1,$out1,$out1,$inpperm
+       stvx_u          $out0,$x00,$out
+       le?vperm        $out2,$out2,$out2,$inpperm
+       stvx_u          $out1,$x10,$out
+       le?vperm        $out3,$out3,$out3,$inpperm
+       stvx_u          $out2,$x20,$out
+       le?vperm        $out4,$out4,$out4,$inpperm
+       stvx_u          $out3,$x30,$out
+       le?vperm        $out5,$out5,$out5,$inpperm
+       stvx_u          $out4,$x40,$out
+       stvx_u          $out5,$x50,$out
+       addi            $out,$out,0x60
+       b               Lctr32_enc8x_done
+
+.align 5
+Lctr32_enc8x_five:
+       vcipherlast     $out0,$out0,$in3
+       vcipherlast     $out1,$out1,$in4
+       vcipherlast     $out2,$out2,$in5
+       vcipherlast     $out3,$out3,$in6
+       vcipherlast     $out4,$out4,$in7
+
+       le?vperm        $out0,$out0,$out0,$inpperm
+       le?vperm        $out1,$out1,$out1,$inpperm
+       stvx_u          $out0,$x00,$out
+       le?vperm        $out2,$out2,$out2,$inpperm
+       stvx_u          $out1,$x10,$out
+       le?vperm        $out3,$out3,$out3,$inpperm
+       stvx_u          $out2,$x20,$out
+       le?vperm        $out4,$out4,$out4,$inpperm
+       stvx_u          $out3,$x30,$out
+       stvx_u          $out4,$x40,$out
+       addi            $out,$out,0x50
+       b               Lctr32_enc8x_done
+
+.align 5
+Lctr32_enc8x_four:
+       vcipherlast     $out0,$out0,$in4
+       vcipherlast     $out1,$out1,$in5
+       vcipherlast     $out2,$out2,$in6
+       vcipherlast     $out3,$out3,$in7
+
+       le?vperm        $out0,$out0,$out0,$inpperm
+       le?vperm        $out1,$out1,$out1,$inpperm
+       stvx_u          $out0,$x00,$out
+       le?vperm        $out2,$out2,$out2,$inpperm
+       stvx_u          $out1,$x10,$out
+       le?vperm        $out3,$out3,$out3,$inpperm
+       stvx_u          $out2,$x20,$out
+       stvx_u          $out3,$x30,$out
+       addi            $out,$out,0x40
+       b               Lctr32_enc8x_done
+
+.align 5
+Lctr32_enc8x_three:
+       vcipherlast     $out0,$out0,$in5
+       vcipherlast     $out1,$out1,$in6
+       vcipherlast     $out2,$out2,$in7
+
+       le?vperm        $out0,$out0,$out0,$inpperm
+       le?vperm        $out1,$out1,$out1,$inpperm
+       stvx_u          $out0,$x00,$out
+       le?vperm        $out2,$out2,$out2,$inpperm
+       stvx_u          $out1,$x10,$out
+       stvx_u          $out2,$x20,$out
+       addi            $out,$out,0x30
+       b               Lcbc_dec8x_done
+
+.align 5
+Lctr32_enc8x_two:
+       vcipherlast     $out0,$out0,$in6
+       vcipherlast     $out1,$out1,$in7
+
+       le?vperm        $out0,$out0,$out0,$inpperm
+       le?vperm        $out1,$out1,$out1,$inpperm
+       stvx_u          $out0,$x00,$out
+       stvx_u          $out1,$x10,$out
+       addi            $out,$out,0x20
+       b               Lcbc_dec8x_done
+
+.align 5
+Lctr32_enc8x_one:
+       vcipherlast     $out0,$out0,$in7
+
+       le?vperm        $out0,$out0,$out0,$inpperm
+       stvx_u          $out0,0,$out
+       addi            $out,$out,0x10
+
+Lctr32_enc8x_done:
+       li              r10,`$FRAME+15`
+       li              r11,`$FRAME+31`
+       stvx            $inpperm,r10,$sp        # wipe copies of round keys
+       addi            r10,r10,32
+       stvx            $inpperm,r11,$sp
+       addi            r11,r11,32
+       stvx            $inpperm,r10,$sp
+       addi            r10,r10,32
+       stvx            $inpperm,r11,$sp
+       addi            r11,r11,32
+       stvx            $inpperm,r10,$sp
+       addi            r10,r10,32
+       stvx            $inpperm,r11,$sp
+       addi            r11,r11,32
+       stvx            $inpperm,r10,$sp
+       addi            r10,r10,32
+       stvx            $inpperm,r11,$sp
+       addi            r11,r11,32
+
+       mtspr           256,$vrsave
+       lvx             v20,r10,$sp             # ABI says so
+       addi            r10,r10,32
+       lvx             v21,r11,$sp
+       addi            r11,r11,32
+       lvx             v22,r10,$sp
+       addi            r10,r10,32
+       lvx             v23,r11,$sp
+       addi            r11,r11,32
+       lvx             v24,r10,$sp
+       addi            r10,r10,32
+       lvx             v25,r11,$sp
+       addi            r11,r11,32
+       lvx             v26,r10,$sp
+       addi            r10,r10,32
+       lvx             v27,r11,$sp
+       addi            r11,r11,32
+       lvx             v28,r10,$sp
+       addi            r10,r10,32
+       lvx             v29,r11,$sp
+       addi            r11,r11,32
+       lvx             v30,r10,$sp
+       lvx             v31,r11,$sp
+       $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+       $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+       $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+       $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+       $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+       $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+       addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
+       blr
+       .long           0
+       .byte           0,12,0x04,0,0x80,6,6,0
+       .long           0
+.size  .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
+___
+}}     }}}
+
+my $consts=1;
+foreach(split("\n",$code)) {
+        s/\`([^\`]*)\`/eval($1)/geo;
+
+       # constants table endian-specific conversion
+       if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
+           my $conv=$3;
+           my @bytes=();
+
+           # convert to endian-agnostic format
+           if ($1 eq "long") {
+             foreach (split(/,\s*/,$2)) {
+               my $l = /^0/?oct:int;
+               push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
+             }
+           } else {
+               @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
+           }
+
+           # little-endian conversion
+           if ($flavour =~ /le$/o) {
+               SWITCH: for($conv)  {
+                   /\?inv/ && do   { @bytes=map($_^0xf,@bytes); last; };
+                   /\?rev/ && do   { @bytes=reverse(@bytes);    last; }; 
+               }
+           }
+
+           #emit
+           print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
+           next;
+       }
+       $consts=0 if (m/Lconsts:/o);    # end of table
+
+       # instructions prefixed with '?' are endian-specific and need
+       # to be adjusted accordingly...
+       if ($flavour =~ /le$/o) {       # little-endian
+           s/le\?//o           or
+           s/be\?/#be#/o       or
+           s/\?lvsr/lvsl/o     or
+           s/\?lvsl/lvsr/o     or
+           s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
+           s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
+           s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
+       } else {                        # big-endian
+           s/le\?/#le#/o       or
+           s/be\?//o           or
+           s/\?([a-z]+)/$1/o;
+       }
+
+        print $_,"\n";
+}
+
+close STDOUT;
diff --git a/crypto/aes/asm/aest4-sparcv9.pl b/crypto/aes/asm/aest4-sparcv9.pl
new file mode 100644 (file)
index 0000000..536f23b
--- /dev/null
@@ -0,0 +1,919 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by David S. Miller <davem@devemloft.net> and Andy Polyakov
+# <appro@openssl.org>. The module is licensed under 2-clause BSD
+# license. October 2012. All rights reserved.
+# ====================================================================
+
+######################################################################
+# AES for SPARC T4.
+#
+# AES round instructions complete in 3 cycles and can be issued every
+# cycle. It means that round calculations should take 4*rounds cycles,
+# because any given round instruction depends on result of *both*
+# previous instructions:
+#
+#      |0 |1 |2 |3 |4
+#      |01|01|01|
+#         |23|23|23|
+#                  |01|01|...
+#                     |23|...
+#
+# Provided that fxor [with IV] takes 3 cycles to complete, critical
+# path length for CBC encrypt would be 3+4*rounds, or in other words
+# it should process one byte in at least (3+4*rounds)/16 cycles. This
+# estimate doesn't account for "collateral" instructions, such as
+# fetching input from memory, xor-ing it with zero-round key and
+# storing the result. Yet, *measured* performance [for data aligned
+# at 64-bit boundary!] deviates from this equation by less than 0.5%:
+#
+#              128-bit key     192-            256-
+# CBC encrypt  2.70/2.90(*)    3.20/3.40       3.70/3.90
+#                       (*) numbers after slash are for
+#                           misaligned data.
+#
+# Out-of-order execution logic managed to fully overlap "collateral"
+# instructions with those on critical path. Amazing!
+#
+# As with Intel AES-NI, question is if it's possible to improve
+# performance of parallelizeable modes by interleaving round
+# instructions. Provided round instruction latency and throughput
+# optimal interleave factor is 2. But can we expect 2x performance
+# improvement? Well, as round instructions can be issued one per
+# cycle, they don't saturate the 2-way issue pipeline and therefore
+# there is room for "collateral" calculations... Yet, 2x speed-up
+# over CBC encrypt remains unattaintable:
+#
+#              128-bit key     192-            256-
+# CBC decrypt  1.64/2.11       1.89/2.37       2.23/2.61
+# CTR          1.64/2.08(*)    1.89/2.33       2.23/2.61
+#                       (*) numbers after slash are for
+#                           misaligned data.
+#
+# Estimates based on amount of instructions under assumption that
+# round instructions are not pairable with any other instruction
+# suggest that latter is the actual case and pipeline runs
+# underutilized. It should be noted that T4 out-of-order execution
+# logic is so capable that performance gain from 2x interleave is
+# not even impressive, ~7-13% over non-interleaved code, largest
+# for 256-bit keys.
+
+# To anchor to something else, software implementation processes
+# one byte in 29 cycles with 128-bit key on same processor. Intel
+# Sandy Bridge encrypts byte in 5.07 cycles in CBC mode and decrypts
+# in 0.93, naturally with AES-NI.
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+push(@INC,"${dir}","${dir}../../perlasm");
+require "sparcv9_modes.pl";
+
+&asm_init(@ARGV);
+
+$::evp=1;      # if $evp is set to 0, script generates module with
+# AES_[en|de]crypt, AES_set_[en|de]crypt_key and AES_cbc_encrypt entry
+# points. These however are not fully compatible with openssl/aes.h,
+# because they expect AES_KEY to be aligned at 64-bit boundary. When
+# used through EVP, alignment is arranged at EVP layer. Second thing
+# that is arranged by EVP is at least 32-bit alignment of IV.
+
+######################################################################
+# single-round subroutines
+#
+{
+my ($inp,$out,$key,$rounds,$tmp,$mask)=map("%o$_",(0..5));
+
+$code.=<<___ if ($::abibits==64);
+.register      %g2,#scratch
+.register      %g3,#scratch
+
+___
+$code.=<<___;
+.text
+
+.globl aes_t4_encrypt
+.align 32
+aes_t4_encrypt:
+       andcc           $inp, 7, %g1            ! is input aligned?
+       andn            $inp, 7, $inp
+
+       ldx             [$key + 0], %g4
+       ldx             [$key + 8], %g5
+
+       ldx             [$inp + 0], %o4
+       bz,pt           %icc, 1f
+       ldx             [$inp + 8], %o5
+       ldx             [$inp + 16], $inp
+       sll             %g1, 3, %g1
+       sub             %g0, %g1, %o3
+       sllx            %o4, %g1, %o4
+       sllx            %o5, %g1, %g1
+       srlx            %o5, %o3, %o5
+       srlx            $inp, %o3, %o3
+       or              %o5, %o4, %o4
+       or              %o3, %g1, %o5
+1:
+       ld              [$key + 240], $rounds
+       ldd             [$key + 16], %f12
+       ldd             [$key + 24], %f14
+       xor             %g4, %o4, %o4
+       xor             %g5, %o5, %o5
+       movxtod         %o4, %f0
+       movxtod         %o5, %f2
+       srl             $rounds, 1, $rounds
+       ldd             [$key + 32], %f16
+       sub             $rounds, 1, $rounds
+       ldd             [$key + 40], %f18
+       add             $key, 48, $key
+
+.Lenc:
+       aes_eround01    %f12, %f0, %f2, %f4
+       aes_eround23    %f14, %f0, %f2, %f2
+       ldd             [$key + 0], %f12
+       ldd             [$key + 8], %f14
+       sub             $rounds,1,$rounds
+       aes_eround01    %f16, %f4, %f2, %f0
+       aes_eround23    %f18, %f4, %f2, %f2
+       ldd             [$key + 16], %f16
+       ldd             [$key + 24], %f18
+       brnz,pt         $rounds, .Lenc
+       add             $key, 32, $key
+
+       andcc           $out, 7, $tmp           ! is output aligned?
+       aes_eround01    %f12, %f0, %f2, %f4
+       aes_eround23    %f14, %f0, %f2, %f2
+       aes_eround01_l  %f16, %f4, %f2, %f0
+       aes_eround23_l  %f18, %f4, %f2, %f2
+
+       bnz,pn          %icc, 2f
+       nop
+
+       std             %f0, [$out + 0]
+       retl
+       std             %f2, [$out + 8]
+
+2:     alignaddrl      $out, %g0, $out
+       mov             0xff, $mask
+       srl             $mask, $tmp, $mask
+
+       faligndata      %f0, %f0, %f4
+       faligndata      %f0, %f2, %f6
+       faligndata      %f2, %f2, %f8
+
+       stda            %f4, [$out + $mask]0xc0 ! partial store
+       std             %f6, [$out + 8]
+       add             $out, 16, $out
+       orn             %g0, $mask, $mask
+       retl
+       stda            %f8, [$out + $mask]0xc0 ! partial store
+.type  aes_t4_encrypt,#function
+.size  aes_t4_encrypt,.-aes_t4_encrypt
+
+.globl aes_t4_decrypt
+.align 32
+aes_t4_decrypt:
+       andcc           $inp, 7, %g1            ! is input aligned?
+       andn            $inp, 7, $inp
+
+       ldx             [$key + 0], %g4
+       ldx             [$key + 8], %g5
+
+       ldx             [$inp + 0], %o4
+       bz,pt           %icc, 1f
+       ldx             [$inp + 8], %o5
+       ldx             [$inp + 16], $inp
+       sll             %g1, 3, %g1
+       sub             %g0, %g1, %o3
+       sllx            %o4, %g1, %o4
+       sllx            %o5, %g1, %g1
+       srlx            %o5, %o3, %o5
+       srlx            $inp, %o3, %o3
+       or              %o5, %o4, %o4
+       or              %o3, %g1, %o5
+1:
+       ld              [$key + 240], $rounds
+       ldd             [$key + 16], %f12
+       ldd             [$key + 24], %f14
+       xor             %g4, %o4, %o4
+       xor             %g5, %o5, %o5
+       movxtod         %o4, %f0
+       movxtod         %o5, %f2
+       srl             $rounds, 1, $rounds
+       ldd             [$key + 32], %f16
+       sub             $rounds, 1, $rounds
+       ldd             [$key + 40], %f18
+       add             $key, 48, $key
+
+.Ldec:
+       aes_dround01    %f12, %f0, %f2, %f4
+       aes_dround23    %f14, %f0, %f2, %f2
+       ldd             [$key + 0], %f12
+       ldd             [$key + 8], %f14
+       sub             $rounds,1,$rounds
+       aes_dround01    %f16, %f4, %f2, %f0
+       aes_dround23    %f18, %f4, %f2, %f2
+       ldd             [$key + 16], %f16
+       ldd             [$key + 24], %f18
+       brnz,pt         $rounds, .Ldec
+       add             $key, 32, $key
+
+       andcc           $out, 7, $tmp           ! is output aligned?
+       aes_dround01    %f12, %f0, %f2, %f4
+       aes_dround23    %f14, %f0, %f2, %f2
+       aes_dround01_l  %f16, %f4, %f2, %f0
+       aes_dround23_l  %f18, %f4, %f2, %f2
+
+       bnz,pn          %icc, 2f
+       nop
+
+       std             %f0, [$out + 0]
+       retl
+       std             %f2, [$out + 8]
+
+2:     alignaddrl      $out, %g0, $out
+       mov             0xff, $mask
+       srl             $mask, $tmp, $mask
+
+       faligndata      %f0, %f0, %f4
+       faligndata      %f0, %f2, %f6
+       faligndata      %f2, %f2, %f8
+
+       stda            %f4, [$out + $mask]0xc0 ! partial store
+       std             %f6, [$out + 8]
+       add             $out, 16, $out
+       orn             %g0, $mask, $mask
+       retl
+       stda            %f8, [$out + $mask]0xc0 ! partial store
+.type  aes_t4_decrypt,#function
+.size  aes_t4_decrypt,.-aes_t4_decrypt
+___
+}
+
+######################################################################
+# key setup subroutines
+#
+{
+my ($inp,$bits,$out,$tmp)=map("%o$_",(0..5));
+$code.=<<___;
+.globl aes_t4_set_encrypt_key
+.align 32
+aes_t4_set_encrypt_key:
+.Lset_encrypt_key:
+       and             $inp, 7, $tmp
+       alignaddr       $inp, %g0, $inp
+       cmp             $bits, 192
+       ldd             [$inp + 0], %f0
+       bl,pt           %icc,.L128
+       ldd             [$inp + 8], %f2
+
+       be,pt           %icc,.L192
+       ldd             [$inp + 16], %f4
+       brz,pt          $tmp, .L256aligned
+       ldd             [$inp + 24], %f6
+
+       ldd             [$inp + 32], %f8
+       faligndata      %f0, %f2, %f0
+       faligndata      %f2, %f4, %f2
+       faligndata      %f4, %f6, %f4
+       faligndata      %f6, %f8, %f6
+.L256aligned:
+___
+for ($i=0; $i<6; $i++) {
+    $code.=<<___;
+       std             %f0, [$out + `32*$i+0`]
+       aes_kexpand1    %f0, %f6, $i, %f0
+       std             %f2, [$out + `32*$i+8`]
+       aes_kexpand2    %f2, %f0, %f2
+       std             %f4, [$out + `32*$i+16`]
+       aes_kexpand0    %f4, %f2, %f4
+       std             %f6, [$out + `32*$i+24`]
+       aes_kexpand2    %f6, %f4, %f6
+___
+}
+$code.=<<___;
+       std             %f0, [$out + `32*$i+0`]
+       aes_kexpand1    %f0, %f6, $i, %f0
+       std             %f2, [$out + `32*$i+8`]
+       aes_kexpand2    %f2, %f0, %f2
+       std             %f4, [$out + `32*$i+16`]
+       std             %f6, [$out + `32*$i+24`]
+       std             %f0, [$out + `32*$i+32`]
+       std             %f2, [$out + `32*$i+40`]
+
+       mov             14, $tmp
+       st              $tmp, [$out + 240]
+       retl
+       xor             %o0, %o0, %o0
+
+.align 16
+.L192:
+       brz,pt          $tmp, .L192aligned
+       nop
+
+       ldd             [$inp + 24], %f6
+       faligndata      %f0, %f2, %f0
+       faligndata      %f2, %f4, %f2
+       faligndata      %f4, %f6, %f4
+.L192aligned:
+___
+for ($i=0; $i<7; $i++) {
+    $code.=<<___;
+       std             %f0, [$out + `24*$i+0`]
+       aes_kexpand1    %f0, %f4, $i, %f0
+       std             %f2, [$out + `24*$i+8`]
+       aes_kexpand2    %f2, %f0, %f2
+       std             %f4, [$out + `24*$i+16`]
+       aes_kexpand2    %f4, %f2, %f4
+___
+}
+$code.=<<___;
+       std             %f0, [$out + `24*$i+0`]
+       aes_kexpand1    %f0, %f4, $i, %f0
+       std             %f2, [$out + `24*$i+8`]
+       aes_kexpand2    %f2, %f0, %f2
+       std             %f4, [$out + `24*$i+16`]
+       std             %f0, [$out + `24*$i+24`]
+       std             %f2, [$out + `24*$i+32`]
+
+       mov             12, $tmp
+       st              $tmp, [$out + 240]
+       retl
+       xor             %o0, %o0, %o0
+
+.align 16
+.L128:
+       brz,pt          $tmp, .L128aligned
+       nop
+
+       ldd             [$inp + 16], %f4
+       faligndata      %f0, %f2, %f0
+       faligndata      %f2, %f4, %f2
+.L128aligned:
+___
+for ($i=0; $i<10; $i++) {
+    $code.=<<___;
+       std             %f0, [$out + `16*$i+0`]
+       aes_kexpand1    %f0, %f2, $i, %f0
+       std             %f2, [$out + `16*$i+8`]
+       aes_kexpand2    %f2, %f0, %f2
+___
+}
+$code.=<<___;
+       std             %f0, [$out + `16*$i+0`]
+       std             %f2, [$out + `16*$i+8`]
+
+       mov             10, $tmp
+       st              $tmp, [$out + 240]
+       retl
+       xor             %o0, %o0, %o0
+.type  aes_t4_set_encrypt_key,#function
+.size  aes_t4_set_encrypt_key,.-aes_t4_set_encrypt_key
+
+.globl aes_t4_set_decrypt_key
+.align 32
+aes_t4_set_decrypt_key:
+       mov             %o7, %o5
+       call            .Lset_encrypt_key
+       nop
+
+       mov             %o5, %o7
+       sll             $tmp, 4, $inp           ! $tmp is number of rounds
+       add             $tmp, 2, $tmp
+       add             $out, $inp, $inp        ! $inp=$out+16*rounds
+       srl             $tmp, 2, $tmp           ! $tmp=(rounds+2)/4
+
+.Lkey_flip:
+       ldd             [$out + 0],  %f0
+       ldd             [$out + 8],  %f2
+       ldd             [$out + 16], %f4
+       ldd             [$out + 24], %f6
+       ldd             [$inp + 0],  %f8
+       ldd             [$inp + 8],  %f10
+       ldd             [$inp - 16], %f12
+       ldd             [$inp - 8],  %f14
+       sub             $tmp, 1, $tmp
+       std             %f0, [$inp + 0]
+       std             %f2, [$inp + 8]
+       std             %f4, [$inp - 16]
+       std             %f6, [$inp - 8]
+       std             %f8, [$out + 0]
+       std             %f10, [$out + 8]
+       std             %f12, [$out + 16]
+       std             %f14, [$out + 24]
+       add             $out, 32, $out
+       brnz            $tmp, .Lkey_flip
+       sub             $inp, 32, $inp
+
+       retl
+       xor             %o0, %o0, %o0
+.type  aes_t4_set_decrypt_key,#function
+.size  aes_t4_set_decrypt_key,.-aes_t4_set_decrypt_key
+___
+}
+
+{{{
+my ($inp,$out,$len,$key,$ivec,$enc)=map("%i$_",(0..5));
+my ($ileft,$iright,$ooff,$omask,$ivoff)=map("%l$_",(1..7));
+
+$code.=<<___;
+.align 32
+_aes128_encrypt_1x:
+___
+for ($i=0; $i<4; $i++) {
+    $code.=<<___;
+       aes_eround01    %f`16+8*$i+0`, %f0, %f2, %f4
+       aes_eround23    %f`16+8*$i+2`, %f0, %f2, %f2
+       aes_eround01    %f`16+8*$i+4`, %f4, %f2, %f0
+       aes_eround23    %f`16+8*$i+6`, %f4, %f2, %f2
+___
+}
+$code.=<<___;
+       aes_eround01    %f48, %f0, %f2, %f4
+       aes_eround23    %f50, %f0, %f2, %f2
+       aes_eround01_l  %f52, %f4, %f2, %f0
+       retl
+       aes_eround23_l  %f54, %f4, %f2, %f2
+.type  _aes128_encrypt_1x,#function
+.size  _aes128_encrypt_1x,.-_aes128_encrypt_1x
+
+.align 32
+_aes128_encrypt_2x:
+___
+for ($i=0; $i<4; $i++) {
+    $code.=<<___;
+       aes_eround01    %f`16+8*$i+0`, %f0, %f2, %f8
+       aes_eround23    %f`16+8*$i+2`, %f0, %f2, %f2
+       aes_eround01    %f`16+8*$i+0`, %f4, %f6, %f10
+       aes_eround23    %f`16+8*$i+2`, %f4, %f6, %f6
+       aes_eround01    %f`16+8*$i+4`, %f8, %f2, %f0
+       aes_eround23    %f`16+8*$i+6`, %f8, %f2, %f2
+       aes_eround01    %f`16+8*$i+4`, %f10, %f6, %f4
+       aes_eround23    %f`16+8*$i+6`, %f10, %f6, %f6
+___
+}
+$code.=<<___;
+       aes_eround01    %f48, %f0, %f2, %f8
+       aes_eround23    %f50, %f0, %f2, %f2
+       aes_eround01    %f48, %f4, %f6, %f10
+       aes_eround23    %f50, %f4, %f6, %f6
+       aes_eround01_l  %f52, %f8, %f2, %f0
+       aes_eround23_l  %f54, %f8, %f2, %f2
+       aes_eround01_l  %f52, %f10, %f6, %f4
+       retl
+       aes_eround23_l  %f54, %f10, %f6, %f6
+.type  _aes128_encrypt_2x,#function
+.size  _aes128_encrypt_2x,.-_aes128_encrypt_2x
+
+.align 32
+_aes128_loadkey:
+       ldx             [$key + 0], %g4
+       ldx             [$key + 8], %g5
+___
+for ($i=2; $i<22;$i++) {                       # load key schedule
+    $code.=<<___;
+       ldd             [$key + `8*$i`], %f`12+2*$i`
+___
+}
+$code.=<<___;
+       retl
+       nop
+.type  _aes128_loadkey,#function
+.size  _aes128_loadkey,.-_aes128_loadkey
+_aes128_load_enckey=_aes128_loadkey
+_aes128_load_deckey=_aes128_loadkey
+
+___
+
+&alg_cbc_encrypt_implement("aes",128);
+if ($::evp) {
+    &alg_ctr32_implement("aes",128);
+    &alg_xts_implement("aes",128,"en");
+    &alg_xts_implement("aes",128,"de");
+}
+&alg_cbc_decrypt_implement("aes",128);
+
+$code.=<<___;
+.align 32
+_aes128_decrypt_1x:
+___
+for ($i=0; $i<4; $i++) {
+    $code.=<<___;
+       aes_dround01    %f`16+8*$i+0`, %f0, %f2, %f4
+       aes_dround23    %f`16+8*$i+2`, %f0, %f2, %f2
+       aes_dround01    %f`16+8*$i+4`, %f4, %f2, %f0
+       aes_dround23    %f`16+8*$i+6`, %f4, %f2, %f2
+___
+}
+$code.=<<___;
+       aes_dround01    %f48, %f0, %f2, %f4
+       aes_dround23    %f50, %f0, %f2, %f2
+       aes_dround01_l  %f52, %f4, %f2, %f0
+       retl
+       aes_dround23_l  %f54, %f4, %f2, %f2
+.type  _aes128_decrypt_1x,#function
+.size  _aes128_decrypt_1x,.-_aes128_decrypt_1x
+
+.align 32
+_aes128_decrypt_2x:
+___
+for ($i=0; $i<4; $i++) {
+    $code.=<<___;
+       aes_dround01    %f`16+8*$i+0`, %f0, %f2, %f8
+       aes_dround23    %f`16+8*$i+2`, %f0, %f2, %f2
+       aes_dround01    %f`16+8*$i+0`, %f4, %f6, %f10
+       aes_dround23    %f`16+8*$i+2`, %f4, %f6, %f6
+       aes_dround01    %f`16+8*$i+4`, %f8, %f2, %f0
+       aes_dround23    %f`16+8*$i+6`, %f8, %f2, %f2
+       aes_dround01    %f`16+8*$i+4`, %f10, %f6, %f4
+       aes_dround23    %f`16+8*$i+6`, %f10, %f6, %f6
+___
+}
+$code.=<<___;
+       aes_dround01    %f48, %f0, %f2, %f8
+       aes_dround23    %f50, %f0, %f2, %f2
+       aes_dround01    %f48, %f4, %f6, %f10
+       aes_dround23    %f50, %f4, %f6, %f6
+       aes_dround01_l  %f52, %f8, %f2, %f0
+       aes_dround23_l  %f54, %f8, %f2, %f2
+       aes_dround01_l  %f52, %f10, %f6, %f4
+       retl
+       aes_dround23_l  %f54, %f10, %f6, %f6
+.type  _aes128_decrypt_2x,#function
+.size  _aes128_decrypt_2x,.-_aes128_decrypt_2x
+___
+
+$code.=<<___;
+.align 32
+_aes192_encrypt_1x:
+___
+for ($i=0; $i<5; $i++) {
+    $code.=<<___;
+       aes_eround01    %f`16+8*$i+0`, %f0, %f2, %f4
+       aes_eround23    %f`16+8*$i+2`, %f0, %f2, %f2
+       aes_eround01    %f`16+8*$i+4`, %f4, %f2, %f0
+       aes_eround23    %f`16+8*$i+6`, %f4, %f2, %f2
+___
+}
+$code.=<<___;
+       aes_eround01    %f56, %f0, %f2, %f4
+       aes_eround23    %f58, %f0, %f2, %f2
+       aes_eround01_l  %f60, %f4, %f2, %f0
+       retl
+       aes_eround23_l  %f62, %f4, %f2, %f2
+.type  _aes192_encrypt_1x,#function
+.size  _aes192_encrypt_1x,.-_aes192_encrypt_1x
+
+.align 32
+_aes192_encrypt_2x:
+___
+for ($i=0; $i<5; $i++) {
+    $code.=<<___;
+       aes_eround01    %f`16+8*$i+0`, %f0, %f2, %f8
+       aes_eround23    %f`16+8*$i+2`, %f0, %f2, %f2
+       aes_eround01    %f`16+8*$i+0`, %f4, %f6, %f10
+       aes_eround23    %f`16+8*$i+2`, %f4, %f6, %f6
+       aes_eround01    %f`16+8*$i+4`, %f8, %f2, %f0
+       aes_eround23    %f`16+8*$i+6`, %f8, %f2, %f2
+       aes_eround01    %f`16+8*$i+4`, %f10, %f6, %f4
+       aes_eround23    %f`16+8*$i+6`, %f10, %f6, %f6
+___
+}
+$code.=<<___;
+       aes_eround01    %f56, %f0, %f2, %f8
+       aes_eround23    %f58, %f0, %f2, %f2
+       aes_eround01    %f56, %f4, %f6, %f10
+       aes_eround23    %f58, %f4, %f6, %f6
+       aes_eround01_l  %f60, %f8, %f2, %f0
+       aes_eround23_l  %f62, %f8, %f2, %f2
+       aes_eround01_l  %f60, %f10, %f6, %f4
+       retl
+       aes_eround23_l  %f62, %f10, %f6, %f6
+.type  _aes192_encrypt_2x,#function
+.size  _aes192_encrypt_2x,.-_aes192_encrypt_2x
+
+.align 32
+_aes256_encrypt_1x:
+       aes_eround01    %f16, %f0, %f2, %f4
+       aes_eround23    %f18, %f0, %f2, %f2
+       ldd             [$key + 208], %f16
+       ldd             [$key + 216], %f18
+       aes_eround01    %f20, %f4, %f2, %f0
+       aes_eround23    %f22, %f4, %f2, %f2
+       ldd             [$key + 224], %f20
+       ldd             [$key + 232], %f22
+___
+for ($i=1; $i<6; $i++) {
+    $code.=<<___;
+       aes_eround01    %f`16+8*$i+0`, %f0, %f2, %f4
+       aes_eround23    %f`16+8*$i+2`, %f0, %f2, %f2
+       aes_eround01    %f`16+8*$i+4`, %f4, %f2, %f0
+       aes_eround23    %f`16+8*$i+6`, %f4, %f2, %f2
+___
+}
+$code.=<<___;
+       aes_eround01    %f16, %f0, %f2, %f4
+       aes_eround23    %f18, %f0, %f2, %f2
+       ldd             [$key + 16], %f16
+       ldd             [$key + 24], %f18
+       aes_eround01_l  %f20, %f4, %f2, %f0
+       aes_eround23_l  %f22, %f4, %f2, %f2
+       ldd             [$key + 32], %f20
+       retl
+       ldd             [$key + 40], %f22
+.type  _aes256_encrypt_1x,#function
+.size  _aes256_encrypt_1x,.-_aes256_encrypt_1x
+
+.align 32
+_aes256_encrypt_2x:
+       aes_eround01    %f16, %f0, %f2, %f8
+       aes_eround23    %f18, %f0, %f2, %f2
+       aes_eround01    %f16, %f4, %f6, %f10
+       aes_eround23    %f18, %f4, %f6, %f6
+       ldd             [$key + 208], %f16
+       ldd             [$key + 216], %f18
+       aes_eround01    %f20, %f8, %f2, %f0
+       aes_eround23    %f22, %f8, %f2, %f2
+       aes_eround01    %f20, %f10, %f6, %f4
+       aes_eround23    %f22, %f10, %f6, %f6
+       ldd             [$key + 224], %f20
+       ldd             [$key + 232], %f22
+___
+for ($i=1; $i<6; $i++) {
+    $code.=<<___;
+       aes_eround01    %f`16+8*$i+0`, %f0, %f2, %f8
+       aes_eround23    %f`16+8*$i+2`, %f0, %f2, %f2
+       aes_eround01    %f`16+8*$i+0`, %f4, %f6, %f10
+       aes_eround23    %f`16+8*$i+2`, %f4, %f6, %f6
+       aes_eround01    %f`16+8*$i+4`, %f8, %f2, %f0
+       aes_eround23    %f`16+8*$i+6`, %f8, %f2, %f2
+       aes_eround01    %f`16+8*$i+4`, %f10, %f6, %f4
+       aes_eround23    %f`16+8*$i+6`, %f10, %f6, %f6
+___
+}
+$code.=<<___;
+       aes_eround01    %f16, %f0, %f2, %f8
+       aes_eround23    %f18, %f0, %f2, %f2
+       aes_eround01    %f16, %f4, %f6, %f10
+       aes_eround23    %f18, %f4, %f6, %f6
+       ldd             [$key + 16], %f16
+       ldd             [$key + 24], %f18
+       aes_eround01_l  %f20, %f8, %f2, %f0
+       aes_eround23_l  %f22, %f8, %f2, %f2
+       aes_eround01_l  %f20, %f10, %f6, %f4
+       aes_eround23_l  %f22, %f10, %f6, %f6
+       ldd             [$key + 32], %f20
+       retl
+       ldd             [$key + 40], %f22
+.type  _aes256_encrypt_2x,#function
+.size  _aes256_encrypt_2x,.-_aes256_encrypt_2x
+
+.align 32
+_aes192_loadkey:
+       ldx             [$key + 0], %g4
+       ldx             [$key + 8], %g5
+___
+for ($i=2; $i<26;$i++) {                       # load key schedule
+    $code.=<<___;
+       ldd             [$key + `8*$i`], %f`12+2*$i`
+___
+}
+$code.=<<___;
+       retl
+       nop
+.type  _aes192_loadkey,#function
+.size  _aes192_loadkey,.-_aes192_loadkey
+_aes256_loadkey=_aes192_loadkey
+_aes192_load_enckey=_aes192_loadkey
+_aes192_load_deckey=_aes192_loadkey
+_aes256_load_enckey=_aes192_loadkey
+_aes256_load_deckey=_aes192_loadkey
+___
+
+&alg_cbc_encrypt_implement("aes",256);
+&alg_cbc_encrypt_implement("aes",192);
+if ($::evp) {
+    &alg_ctr32_implement("aes",256);
+    &alg_xts_implement("aes",256,"en");
+    &alg_xts_implement("aes",256,"de");
+    &alg_ctr32_implement("aes",192);
+}
+&alg_cbc_decrypt_implement("aes",192);
+&alg_cbc_decrypt_implement("aes",256);
+
+$code.=<<___;
+.align 32
+_aes256_decrypt_1x:
+       aes_dround01    %f16, %f0, %f2, %f4
+       aes_dround23    %f18, %f0, %f2, %f2
+       ldd             [$key + 208], %f16
+       ldd             [$key + 216], %f18
+       aes_dround01    %f20, %f4, %f2, %f0
+       aes_dround23    %f22, %f4, %f2, %f2
+       ldd             [$key + 224], %f20
+       ldd             [$key + 232], %f22
+___
+for ($i=1; $i<6; $i++) {
+    $code.=<<___;
+       aes_dround01    %f`16+8*$i+0`, %f0, %f2, %f4
+       aes_dround23    %f`16+8*$i+2`, %f0, %f2, %f2
+       aes_dround01    %f`16+8*$i+4`, %f4, %f2, %f0
+       aes_dround23    %f`16+8*$i+6`, %f4, %f2, %f2
+___
+}
+$code.=<<___;
+       aes_dround01    %f16, %f0, %f2, %f4
+       aes_dround23    %f18, %f0, %f2, %f2
+       ldd             [$key + 16], %f16
+       ldd             [$key + 24], %f18
+       aes_dround01_l  %f20, %f4, %f2, %f0
+       aes_dround23_l  %f22, %f4, %f2, %f2
+       ldd             [$key + 32], %f20
+       retl
+       ldd             [$key + 40], %f22
+.type  _aes256_decrypt_1x,#function
+.size  _aes256_decrypt_1x,.-_aes256_decrypt_1x
+
+.align 32
+_aes256_decrypt_2x:
+       aes_dround01    %f16, %f0, %f2, %f8
+       aes_dround23    %f18, %f0, %f2, %f2
+       aes_dround01    %f16, %f4, %f6, %f10
+       aes_dround23    %f18, %f4, %f6, %f6
+       ldd             [$key + 208], %f16
+       ldd             [$key + 216], %f18
+       aes_dround01    %f20, %f8, %f2, %f0
+       aes_dround23    %f22, %f8, %f2, %f2
+       aes_dround01    %f20, %f10, %f6, %f4
+       aes_dround23    %f22, %f10, %f6, %f6
+       ldd             [$key + 224], %f20
+       ldd             [$key + 232], %f22
+___
+for ($i=1; $i<6; $i++) {
+    $code.=<<___;
+       aes_dround01    %f`16+8*$i+0`, %f0, %f2, %f8
+       aes_dround23    %f`16+8*$i+2`, %f0, %f2, %f2
+       aes_dround01    %f`16+8*$i+0`, %f4, %f6, %f10
+       aes_dround23    %f`16+8*$i+2`, %f4, %f6, %f6
+       aes_dround01    %f`16+8*$i+4`, %f8, %f2, %f0
+       aes_dround23    %f`16+8*$i+6`, %f8, %f2, %f2
+       aes_dround01    %f`16+8*$i+4`, %f10, %f6, %f4
+       aes_dround23    %f`16+8*$i+6`, %f10, %f6, %f6
+___
+}
+$code.=<<___;
+       aes_dround01    %f16, %f0, %f2, %f8
+       aes_dround23    %f18, %f0, %f2, %f2
+       aes_dround01    %f16, %f4, %f6, %f10
+       aes_dround23    %f18, %f4, %f6, %f6
+       ldd             [$key + 16], %f16
+       ldd             [$key + 24], %f18
+       aes_dround01_l  %f20, %f8, %f2, %f0
+       aes_dround23_l  %f22, %f8, %f2, %f2
+       aes_dround01_l  %f20, %f10, %f6, %f4
+       aes_dround23_l  %f22, %f10, %f6, %f6
+       ldd             [$key + 32], %f20
+       retl
+       ldd             [$key + 40], %f22
+.type  _aes256_decrypt_2x,#function
+.size  _aes256_decrypt_2x,.-_aes256_decrypt_2x
+
+.align 32
+_aes192_decrypt_1x:
+___
+for ($i=0; $i<5; $i++) {
+    $code.=<<___;
+       aes_dround01    %f`16+8*$i+0`, %f0, %f2, %f4
+       aes_dround23    %f`16+8*$i+2`, %f0, %f2, %f2
+       aes_dround01    %f`16+8*$i+4`, %f4, %f2, %f0
+       aes_dround23    %f`16+8*$i+6`, %f4, %f2, %f2
+___
+}
+$code.=<<___;
+       aes_dround01    %f56, %f0, %f2, %f4
+       aes_dround23    %f58, %f0, %f2, %f2
+       aes_dround01_l  %f60, %f4, %f2, %f0
+       retl
+       aes_dround23_l  %f62, %f4, %f2, %f2
+.type  _aes192_decrypt_1x,#function
+.size  _aes192_decrypt_1x,.-_aes192_decrypt_1x
+
+.align 32
+_aes192_decrypt_2x:
+___
+for ($i=0; $i<5; $i++) {
+    $code.=<<___;
+       aes_dround01    %f`16+8*$i+0`, %f0, %f2, %f8
+       aes_dround23    %f`16+8*$i+2`, %f0, %f2, %f2
+       aes_dround01    %f`16+8*$i+0`, %f4, %f6, %f10
+       aes_dround23    %f`16+8*$i+2`, %f4, %f6, %f6
+       aes_dround01    %f`16+8*$i+4`, %f8, %f2, %f0
+       aes_dround23    %f`16+8*$i+6`, %f8, %f2, %f2
+       aes_dround01    %f`16+8*$i+4`, %f10, %f6, %f4
+       aes_dround23    %f`16+8*$i+6`, %f10, %f6, %f6
+___
+}
+$code.=<<___;
+       aes_dround01    %f56, %f0, %f2, %f8
+       aes_dround23    %f58, %f0, %f2, %f2
+       aes_dround01    %f56, %f4, %f6, %f10
+       aes_dround23    %f58, %f4, %f6, %f6
+       aes_dround01_l  %f60, %f8, %f2, %f0
+       aes_dround23_l  %f62, %f8, %f2, %f2
+       aes_dround01_l  %f60, %f10, %f6, %f4
+       retl
+       aes_dround23_l  %f62, %f10, %f6, %f6
+.type  _aes192_decrypt_2x,#function
+.size  _aes192_decrypt_2x,.-_aes192_decrypt_2x
+___
+}}}
+
+if (!$::evp) {
+$code.=<<___;
+.global        AES_encrypt
+AES_encrypt=aes_t4_encrypt
+.global        AES_decrypt
+AES_decrypt=aes_t4_decrypt
+.global        AES_set_encrypt_key
+.align 32
+AES_set_encrypt_key:
+       andcc           %o2, 7, %g0             ! check alignment
+       bnz,a,pn        %icc, 1f
+       mov             -1, %o0
+       brz,a,pn        %o0, 1f
+       mov             -1, %o0
+       brz,a,pn        %o2, 1f
+       mov             -1, %o0
+       andncc          %o1, 0x1c0, %g0
+       bnz,a,pn        %icc, 1f
+       mov             -2, %o0
+       cmp             %o1, 128
+       bl,a,pn         %icc, 1f
+       mov             -2, %o0
+       b               aes_t4_set_encrypt_key
+       nop
+1:     retl
+       nop
+.type  AES_set_encrypt_key,#function
+.size  AES_set_encrypt_key,.-AES_set_encrypt_key
+
+.global        AES_set_decrypt_key
+.align 32
+AES_set_decrypt_key:
+       andcc           %o2, 7, %g0             ! check alignment
+       bnz,a,pn        %icc, 1f
+       mov             -1, %o0
+       brz,a,pn        %o0, 1f
+       mov             -1, %o0
+       brz,a,pn        %o2, 1f
+       mov             -1, %o0
+       andncc          %o1, 0x1c0, %g0
+       bnz,a,pn        %icc, 1f
+       mov             -2, %o0
+       cmp             %o1, 128
+       bl,a,pn         %icc, 1f
+       mov             -2, %o0
+       b               aes_t4_set_decrypt_key
+       nop
+1:     retl
+       nop
+.type  AES_set_decrypt_key,#function
+.size  AES_set_decrypt_key,.-AES_set_decrypt_key
+___
+
+my ($inp,$out,$len,$key,$ivec,$enc)=map("%o$_",(0..5));
+
+$code.=<<___;
+.globl AES_cbc_encrypt
+.align 32
+AES_cbc_encrypt:
+       ld              [$key + 240], %g1
+       nop
+       brz             $enc, .Lcbc_decrypt
+       cmp             %g1, 12
+
+       bl,pt           %icc, aes128_t4_cbc_encrypt
+       nop
+       be,pn           %icc, aes192_t4_cbc_encrypt
+       nop
+       ba              aes256_t4_cbc_encrypt
+       nop
+
+.Lcbc_decrypt:
+       bl,pt           %icc, aes128_t4_cbc_decrypt
+       nop
+       be,pn           %icc, aes192_t4_cbc_decrypt
+       nop
+       ba              aes256_t4_cbc_decrypt
+       nop
+.type  AES_cbc_encrypt,#function
+.size  AES_cbc_encrypt,.-AES_cbc_encrypt
+___
+}
+$code.=<<___;
+.asciz "AES for SPARC T4, David S. Miller, Andy Polyakov"
+.align 4
+___
+
+&emit_assembler();
+
+close STDOUT;
diff --git a/crypto/aes/asm/aesv8-armx.pl b/crypto/aes/asm/aesv8-armx.pl
new file mode 100755 (executable)
index 0000000..95ebae3
--- /dev/null
@@ -0,0 +1,989 @@
+#!/usr/bin/env perl
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# This module implements support for ARMv8 AES instructions. The
+# module is endian-agnostic in sense that it supports both big- and
+# little-endian cases. As does it support both 32- and 64-bit modes
+# of operation. Latter is achieved by limiting amount of utilized
+# registers to 16, which implies additional NEON load and integer
+# instructions. This has no effect on mighty Apple A7, where results
+# are literally equal to the theoretical estimates based on AES
+# instruction latencies and issue rates. On Cortex-A53, an in-order
+# execution core, this costs up to 10-15%, which is partially
+# compensated by implementing dedicated code path for 128-bit
+# CBC encrypt case. On Cortex-A57 parallelizable mode performance
+# seems to be limited by sheer amount of NEON instructions...
+#
+# Performance in cycles per byte processed with 128-bit key:
+#
+#              CBC enc         CBC dec         CTR
+# Apple A7     2.39            1.20            1.20
+# Cortex-A53   1.32            1.29            1.46
+# Cortex-A57(*)        1.95            0.85            0.93
+# Denver       1.96            0.86            0.80
+#
+# (*)  original 3.64/1.34/1.32 results were for r0p0 revision
+#      and are still same even for updated module;
+
+$flavour = shift;
+open STDOUT,">".shift;
+
+$prefix="aes_v8";
+
+$code=<<___;
+#include "arm_arch.h"
+
+#if __ARM_MAX_ARCH__>=7
+.text
+___
+$code.=".arch  armv8-a+crypto\n"                       if ($flavour =~ /64/);
+$code.=".arch  armv7-a\n.fpu   neon\n.code     32\n"   if ($flavour !~ /64/);
+               #^^^^^^ this is done to simplify adoption by not depending
+               #       on latest binutils.
+
+# Assembler mnemonics are an eclectic mix of 32- and 64-bit syntax,
+# NEON is mostly 32-bit mnemonics, integer - mostly 64. Goal is to
+# maintain both 32- and 64-bit codes within single module and
+# transliterate common code to either flavour with regex vodoo.
+#
+{{{
+my ($inp,$bits,$out,$ptr,$rounds)=("x0","w1","x2","x3","w12");
+my ($zero,$rcon,$mask,$in0,$in1,$tmp,$key)=
+       $flavour=~/64/? map("q$_",(0..6)) : map("q$_",(0..3,8..10));
+
+
+$code.=<<___;
+.align 5
+rcon:
+.long  0x01,0x01,0x01,0x01
+.long  0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d     // rotate-n-splat
+.long  0x1b,0x1b,0x1b,0x1b
+
+.globl ${prefix}_set_encrypt_key
+.type  ${prefix}_set_encrypt_key,%function
+.align 5
+${prefix}_set_encrypt_key:
+.Lenc_key:
+___
+$code.=<<___   if ($flavour =~ /64/);
+       stp     x29,x30,[sp,#-16]!
+       add     x29,sp,#0
+___
+$code.=<<___;
+       mov     $ptr,#-1
+       cmp     $inp,#0
+       b.eq    .Lenc_key_abort
+       cmp     $out,#0
+       b.eq    .Lenc_key_abort
+       mov     $ptr,#-2
+       cmp     $bits,#128
+       b.lt    .Lenc_key_abort
+       cmp     $bits,#256
+       b.gt    .Lenc_key_abort
+       tst     $bits,#0x3f
+       b.ne    .Lenc_key_abort
+
+       adr     $ptr,rcon
+       cmp     $bits,#192
+
+       veor    $zero,$zero,$zero
+       vld1.8  {$in0},[$inp],#16
+       mov     $bits,#8                // reuse $bits
+       vld1.32 {$rcon,$mask},[$ptr],#32
+
+       b.lt    .Loop128
+       b.eq    .L192
+       b       .L256
+
+.align 4
+.Loop128:
+       vtbl.8  $key,{$in0},$mask
+       vext.8  $tmp,$zero,$in0,#12
+       vst1.32 {$in0},[$out],#16
+       aese    $key,$zero
+       subs    $bits,$bits,#1
+
+       veor    $in0,$in0,$tmp
+       vext.8  $tmp,$zero,$tmp,#12
+       veor    $in0,$in0,$tmp
+       vext.8  $tmp,$zero,$tmp,#12
+        veor   $key,$key,$rcon
+       veor    $in0,$in0,$tmp
+       vshl.u8 $rcon,$rcon,#1
+       veor    $in0,$in0,$key
+       b.ne    .Loop128
+
+       vld1.32 {$rcon},[$ptr]
+
+       vtbl.8  $key,{$in0},$mask
+       vext.8  $tmp,$zero,$in0,#12
+       vst1.32 {$in0},[$out],#16
+       aese    $key,$zero
+
+       veor    $in0,$in0,$tmp
+       vext.8  $tmp,$zero,$tmp,#12
+       veor    $in0,$in0,$tmp
+       vext.8  $tmp,$zero,$tmp,#12
+        veor   $key,$key,$rcon
+       veor    $in0,$in0,$tmp
+       vshl.u8 $rcon,$rcon,#1
+       veor    $in0,$in0,$key
+
+       vtbl.8  $key,{$in0},$mask
+       vext.8  $tmp,$zero,$in0,#12
+       vst1.32 {$in0},[$out],#16
+       aese    $key,$zero
+
+       veor    $in0,$in0,$tmp
+       vext.8  $tmp,$zero,$tmp,#12
+       veor    $in0,$in0,$tmp
+       vext.8  $tmp,$zero,$tmp,#12
+        veor   $key,$key,$rcon
+       veor    $in0,$in0,$tmp
+       veor    $in0,$in0,$key
+       vst1.32 {$in0},[$out]
+       add     $out,$out,#0x50
+
+       mov     $rounds,#10
+       b       .Ldone
+
+.align 4
+.L192:
+       vld1.8  {$in1},[$inp],#8
+       vmov.i8 $key,#8                 // borrow $key
+       vst1.32 {$in0},[$out],#16
+       vsub.i8 $mask,$mask,$key        // adjust the mask
+
+.Loop192:
+       vtbl.8  $key,{$in1},$mask
+       vext.8  $tmp,$zero,$in0,#12
+       vst1.32 {$in1},[$out],#8
+       aese    $key,$zero
+       subs    $bits,$bits,#1
+
+       veor    $in0,$in0,$tmp
+       vext.8  $tmp,$zero,$tmp,#12
+       veor    $in0,$in0,$tmp
+       vext.8  $tmp,$zero,$tmp,#12
+       veor    $in0,$in0,$tmp
+
+       vdup.32 $tmp,${in0}[3]
+       veor    $tmp,$tmp,$in1
+        veor   $key,$key,$rcon
+       vext.8  $in1,$zero,$in1,#12
+       vshl.u8 $rcon,$rcon,#1
+       veor    $in1,$in1,$tmp
+       veor    $in0,$in0,$key
+       veor    $in1,$in1,$key
+       vst1.32 {$in0},[$out],#16
+       b.ne    .Loop192
+
+       mov     $rounds,#12
+       add     $out,$out,#0x20
+       b       .Ldone
+
+.align 4
+.L256:
+       vld1.8  {$in1},[$inp]
+       mov     $bits,#7
+       mov     $rounds,#14
+       vst1.32 {$in0},[$out],#16
+
+.Loop256:
+       vtbl.8  $key,{$in1},$mask
+       vext.8  $tmp,$zero,$in0,#12
+       vst1.32 {$in1},[$out],#16
+       aese    $key,$zero
+       subs    $bits,$bits,#1
+
+       veor    $in0,$in0,$tmp
+       vext.8  $tmp,$zero,$tmp,#12
+       veor    $in0,$in0,$tmp
+       vext.8  $tmp,$zero,$tmp,#12
+        veor   $key,$key,$rcon
+       veor    $in0,$in0,$tmp
+       vshl.u8 $rcon,$rcon,#1
+       veor    $in0,$in0,$key
+       vst1.32 {$in0},[$out],#16
+       b.eq    .Ldone
+
+       vdup.32 $key,${in0}[3]          // just splat
+       vext.8  $tmp,$zero,$in1,#12
+       aese    $key,$zero
+
+       veor    $in1,$in1,$tmp
+       vext.8  $tmp,$zero,$tmp,#12
+       veor    $in1,$in1,$tmp
+       vext.8  $tmp,$zero,$tmp,#12
+       veor    $in1,$in1,$tmp
+
+       veor    $in1,$in1,$key
+       b       .Loop256
+
+.Ldone:
+       str     $rounds,[$out]
+       mov     $ptr,#0
+
+.Lenc_key_abort:
+       mov     x0,$ptr                 // return value
+       `"ldr   x29,[sp],#16"           if ($flavour =~ /64/)`
+       ret
+.size  ${prefix}_set_encrypt_key,.-${prefix}_set_encrypt_key
+
+.globl ${prefix}_set_decrypt_key
+.type  ${prefix}_set_decrypt_key,%function
+.align 5
+${prefix}_set_decrypt_key:
+___
+$code.=<<___   if ($flavour =~ /64/);
+       stp     x29,x30,[sp,#-16]!
+       add     x29,sp,#0
+___
+$code.=<<___   if ($flavour !~ /64/);
+       stmdb   sp!,{r4,lr}
+___
+$code.=<<___;
+       bl      .Lenc_key
+
+       cmp     x0,#0
+       b.ne    .Ldec_key_abort
+
+       sub     $out,$out,#240          // restore original $out
+       mov     x4,#-16
+       add     $inp,$out,x12,lsl#4     // end of key schedule
+
+       vld1.32 {v0.16b},[$out]
+       vld1.32 {v1.16b},[$inp]
+       vst1.32 {v0.16b},[$inp],x4
+       vst1.32 {v1.16b},[$out],#16
+
+.Loop_imc:
+       vld1.32 {v0.16b},[$out]
+       vld1.32 {v1.16b},[$inp]
+       aesimc  v0.16b,v0.16b
+       aesimc  v1.16b,v1.16b
+       vst1.32 {v0.16b},[$inp],x4
+       vst1.32 {v1.16b},[$out],#16
+       cmp     $inp,$out
+       b.hi    .Loop_imc
+
+       vld1.32 {v0.16b},[$out]
+       aesimc  v0.16b,v0.16b
+       vst1.32 {v0.16b},[$inp]
+
+       eor     x0,x0,x0                // return value
+.Ldec_key_abort:
+___
+$code.=<<___   if ($flavour !~ /64/);
+       ldmia   sp!,{r4,pc}
+___
+$code.=<<___   if ($flavour =~ /64/);
+       ldp     x29,x30,[sp],#16
+       ret
+___
+$code.=<<___;
+.size  ${prefix}_set_decrypt_key,.-${prefix}_set_decrypt_key
+___
+}}}
+{{{
+sub gen_block () {
+my $dir = shift;
+my ($e,$mc) = $dir eq "en" ? ("e","mc") : ("d","imc");
+my ($inp,$out,$key)=map("x$_",(0..2));
+my $rounds="w3";
+my ($rndkey0,$rndkey1,$inout)=map("q$_",(0..3));
+
+$code.=<<___;
+.globl ${prefix}_${dir}crypt
+.type  ${prefix}_${dir}crypt,%function
+.align 5
+${prefix}_${dir}crypt:
+       ldr     $rounds,[$key,#240]
+       vld1.32 {$rndkey0},[$key],#16
+       vld1.8  {$inout},[$inp]
+       sub     $rounds,$rounds,#2
+       vld1.32 {$rndkey1},[$key],#16
+
+.Loop_${dir}c:
+       aes$e   $inout,$rndkey0
+       aes$mc  $inout,$inout
+       vld1.32 {$rndkey0},[$key],#16
+       subs    $rounds,$rounds,#2
+       aes$e   $inout,$rndkey1
+       aes$mc  $inout,$inout
+       vld1.32 {$rndkey1},[$key],#16
+       b.gt    .Loop_${dir}c
+
+       aes$e   $inout,$rndkey0
+       aes$mc  $inout,$inout
+       vld1.32 {$rndkey0},[$key]
+       aes$e   $inout,$rndkey1
+       veor    $inout,$inout,$rndkey0
+
+       vst1.8  {$inout},[$out]
+       ret
+.size  ${prefix}_${dir}crypt,.-${prefix}_${dir}crypt
+___
+}
+&gen_block("en");
+&gen_block("de");
+}}}
+{{{
+my ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4)); my $enc="w5";
+my ($rounds,$cnt,$key_,$step,$step1)=($enc,"w6","x7","x8","x12");
+my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$ivec,$rndlast)=map("q$_",(0..7));
+
+my ($dat,$tmp,$rndzero_n_last)=($dat0,$tmp0,$tmp1);
+my ($key4,$key5,$key6,$key7)=("x6","x12","x14",$key);
+
+### q8-q15     preloaded key schedule
+
+$code.=<<___;
+.globl ${prefix}_cbc_encrypt
+.type  ${prefix}_cbc_encrypt,%function
+.align 5
+${prefix}_cbc_encrypt:
+___
+$code.=<<___   if ($flavour =~ /64/);
+       stp     x29,x30,[sp,#-16]!
+       add     x29,sp,#0
+___
+$code.=<<___   if ($flavour !~ /64/);
+       mov     ip,sp
+       stmdb   sp!,{r4-r8,lr}
+       vstmdb  sp!,{d8-d15}            @ ABI specification says so
+       ldmia   ip,{r4-r5}              @ load remaining args
+___
+$code.=<<___;
+       subs    $len,$len,#16
+       mov     $step,#16
+       b.lo    .Lcbc_abort
+       cclr    $step,eq
+
+       cmp     $enc,#0                 // en- or decrypting?
+       ldr     $rounds,[$key,#240]
+       and     $len,$len,#-16
+       vld1.8  {$ivec},[$ivp]
+       vld1.8  {$dat},[$inp],$step
+
+       vld1.32 {q8-q9},[$key]          // load key schedule...
+       sub     $rounds,$rounds,#6
+       add     $key_,$key,x5,lsl#4     // pointer to last 7 round keys
+       sub     $rounds,$rounds,#2
+       vld1.32 {q10-q11},[$key_],#32
+       vld1.32 {q12-q13},[$key_],#32
+       vld1.32 {q14-q15},[$key_],#32
+       vld1.32 {$rndlast},[$key_]
+
+       add     $key_,$key,#32
+       mov     $cnt,$rounds
+       b.eq    .Lcbc_dec
+
+       cmp     $rounds,#2
+       veor    $dat,$dat,$ivec
+       veor    $rndzero_n_last,q8,$rndlast
+       b.eq    .Lcbc_enc128
+
+       vld1.32 {$in0-$in1},[$key_]
+       add     $key_,$key,#16
+       add     $key4,$key,#16*4
+       add     $key5,$key,#16*5
+       aese    $dat,q8
+       aesmc   $dat,$dat
+       add     $key6,$key,#16*6
+       add     $key7,$key,#16*7
+       b       .Lenter_cbc_enc
+
+.align 4
+.Loop_cbc_enc:
+       aese    $dat,q8
+       aesmc   $dat,$dat
+        vst1.8 {$ivec},[$out],#16
+.Lenter_cbc_enc:
+       aese    $dat,q9
+       aesmc   $dat,$dat
+       aese    $dat,$in0
+       aesmc   $dat,$dat
+       vld1.32 {q8},[$key4]
+       cmp     $rounds,#4
+       aese    $dat,$in1
+       aesmc   $dat,$dat
+       vld1.32 {q9},[$key5]
+       b.eq    .Lcbc_enc192
+
+       aese    $dat,q8
+       aesmc   $dat,$dat
+       vld1.32 {q8},[$key6]
+       aese    $dat,q9
+       aesmc   $dat,$dat
+       vld1.32 {q9},[$key7]
+       nop
+
+.Lcbc_enc192:
+       aese    $dat,q8
+       aesmc   $dat,$dat
+        subs   $len,$len,#16
+       aese    $dat,q9
+       aesmc   $dat,$dat
+        cclr   $step,eq
+       aese    $dat,q10
+       aesmc   $dat,$dat
+       aese    $dat,q11
+       aesmc   $dat,$dat
+        vld1.8 {q8},[$inp],$step
+       aese    $dat,q12
+       aesmc   $dat,$dat
+        veor   q8,q8,$rndzero_n_last
+       aese    $dat,q13
+       aesmc   $dat,$dat
+        vld1.32 {q9},[$key_]           // re-pre-load rndkey[1]
+       aese    $dat,q14
+       aesmc   $dat,$dat
+       aese    $dat,q15
+       veor    $ivec,$dat,$rndlast
+       b.hs    .Loop_cbc_enc
+
+       vst1.8  {$ivec},[$out],#16
+       b       .Lcbc_done
+
+.align 5
+.Lcbc_enc128:
+       vld1.32 {$in0-$in1},[$key_]
+       aese    $dat,q8
+       aesmc   $dat,$dat
+       b       .Lenter_cbc_enc128
+.Loop_cbc_enc128:
+       aese    $dat,q8
+       aesmc   $dat,$dat
+        vst1.8 {$ivec},[$out],#16
+.Lenter_cbc_enc128:
+       aese    $dat,q9
+       aesmc   $dat,$dat
+        subs   $len,$len,#16
+       aese    $dat,$in0
+       aesmc   $dat,$dat
+        cclr   $step,eq
+       aese    $dat,$in1
+       aesmc   $dat,$dat
+       aese    $dat,q10
+       aesmc   $dat,$dat
+       aese    $dat,q11
+       aesmc   $dat,$dat
+        vld1.8 {q8},[$inp],$step
+       aese    $dat,q12
+       aesmc   $dat,$dat
+       aese    $dat,q13
+       aesmc   $dat,$dat
+       aese    $dat,q14
+       aesmc   $dat,$dat
+        veor   q8,q8,$rndzero_n_last
+       aese    $dat,q15
+       veor    $ivec,$dat,$rndlast
+       b.hs    .Loop_cbc_enc128
+
+       vst1.8  {$ivec},[$out],#16
+       b       .Lcbc_done
+___
+{
+my ($dat2,$in2,$tmp2)=map("q$_",(10,11,9));
+$code.=<<___;
+.align 5
+.Lcbc_dec:
+       vld1.8  {$dat2},[$inp],#16
+       subs    $len,$len,#32           // bias
+       add     $cnt,$rounds,#2
+       vorr    $in1,$dat,$dat
+       vorr    $dat1,$dat,$dat
+       vorr    $in2,$dat2,$dat2
+       b.lo    .Lcbc_dec_tail
+
+       vorr    $dat1,$dat2,$dat2
+       vld1.8  {$dat2},[$inp],#16
+       vorr    $in0,$dat,$dat
+       vorr    $in1,$dat1,$dat1
+       vorr    $in2,$dat2,$dat2
+
+.Loop3x_cbc_dec:
+       aesd    $dat0,q8
+       aesimc  $dat0,$dat0
+       aesd    $dat1,q8
+       aesimc  $dat1,$dat1
+       aesd    $dat2,q8
+       aesimc  $dat2,$dat2
+       vld1.32 {q8},[$key_],#16
+       subs    $cnt,$cnt,#2
+       aesd    $dat0,q9
+       aesimc  $dat0,$dat0
+       aesd    $dat1,q9
+       aesimc  $dat1,$dat1
+       aesd    $dat2,q9
+       aesimc  $dat2,$dat2
+       vld1.32 {q9},[$key_],#16
+       b.gt    .Loop3x_cbc_dec
+
+       aesd    $dat0,q8
+       aesimc  $dat0,$dat0
+       aesd    $dat1,q8
+       aesimc  $dat1,$dat1
+       aesd    $dat2,q8
+       aesimc  $dat2,$dat2
+        veor   $tmp0,$ivec,$rndlast
+        subs   $len,$len,#0x30
+        veor   $tmp1,$in0,$rndlast
+        mov.lo x6,$len                 // x6, $cnt, is zero at this point
+       aesd    $dat0,q9
+       aesimc  $dat0,$dat0
+       aesd    $dat1,q9
+       aesimc  $dat1,$dat1
+       aesd    $dat2,q9
+       aesimc  $dat2,$dat2
+        veor   $tmp2,$in1,$rndlast
+        add    $inp,$inp,x6            // $inp is adjusted in such way that
+                                       // at exit from the loop $dat1-$dat2
+                                       // are loaded with last "words"
+        vorr   $ivec,$in2,$in2
+        mov    $key_,$key
+       aesd    $dat0,q12
+       aesimc  $dat0,$dat0
+       aesd    $dat1,q12
+       aesimc  $dat1,$dat1
+       aesd    $dat2,q12
+       aesimc  $dat2,$dat2
+        vld1.8 {$in0},[$inp],#16
+       aesd    $dat0,q13
+       aesimc  $dat0,$dat0
+       aesd    $dat1,q13
+       aesimc  $dat1,$dat1
+       aesd    $dat2,q13
+       aesimc  $dat2,$dat2
+        vld1.8 {$in1},[$inp],#16
+       aesd    $dat0,q14
+       aesimc  $dat0,$dat0
+       aesd    $dat1,q14
+       aesimc  $dat1,$dat1
+       aesd    $dat2,q14
+       aesimc  $dat2,$dat2
+        vld1.8 {$in2},[$inp],#16
+       aesd    $dat0,q15
+       aesd    $dat1,q15
+       aesd    $dat2,q15
+        vld1.32 {q8},[$key_],#16       // re-pre-load rndkey[0]
+        add    $cnt,$rounds,#2
+       veor    $tmp0,$tmp0,$dat0
+       veor    $tmp1,$tmp1,$dat1
+       veor    $dat2,$dat2,$tmp2
+        vld1.32 {q9},[$key_],#16       // re-pre-load rndkey[1]
+       vst1.8  {$tmp0},[$out],#16
+        vorr   $dat0,$in0,$in0
+       vst1.8  {$tmp1},[$out],#16
+        vorr   $dat1,$in1,$in1
+       vst1.8  {$dat2},[$out],#16
+        vorr   $dat2,$in2,$in2
+       b.hs    .Loop3x_cbc_dec
+
+       cmn     $len,#0x30
+       b.eq    .Lcbc_done
+       nop
+
+.Lcbc_dec_tail:
+       aesd    $dat1,q8
+       aesimc  $dat1,$dat1
+       aesd    $dat2,q8
+       aesimc  $dat2,$dat2
+       vld1.32 {q8},[$key_],#16
+       subs    $cnt,$cnt,#2
+       aesd    $dat1,q9
+       aesimc  $dat1,$dat1
+       aesd    $dat2,q9
+       aesimc  $dat2,$dat2
+       vld1.32 {q9},[$key_],#16
+       b.gt    .Lcbc_dec_tail
+
+       aesd    $dat1,q8
+       aesimc  $dat1,$dat1
+       aesd    $dat2,q8
+       aesimc  $dat2,$dat2
+       aesd    $dat1,q9
+       aesimc  $dat1,$dat1
+       aesd    $dat2,q9
+       aesimc  $dat2,$dat2
+       aesd    $dat1,q12
+       aesimc  $dat1,$dat1
+       aesd    $dat2,q12
+       aesimc  $dat2,$dat2
+        cmn    $len,#0x20
+       aesd    $dat1,q13
+       aesimc  $dat1,$dat1
+       aesd    $dat2,q13
+       aesimc  $dat2,$dat2
+        veor   $tmp1,$ivec,$rndlast
+       aesd    $dat1,q14
+       aesimc  $dat1,$dat1
+       aesd    $dat2,q14
+       aesimc  $dat2,$dat2
+        veor   $tmp2,$in1,$rndlast
+       aesd    $dat1,q15
+       aesd    $dat2,q15
+       b.eq    .Lcbc_dec_one
+       veor    $tmp1,$tmp1,$dat1
+       veor    $tmp2,$tmp2,$dat2
+        vorr   $ivec,$in2,$in2
+       vst1.8  {$tmp1},[$out],#16
+       vst1.8  {$tmp2},[$out],#16
+       b       .Lcbc_done
+
+.Lcbc_dec_one:
+       veor    $tmp1,$tmp1,$dat2
+        vorr   $ivec,$in2,$in2
+       vst1.8  {$tmp1},[$out],#16
+
+.Lcbc_done:
+       vst1.8  {$ivec},[$ivp]
+.Lcbc_abort:
+___
+}
+$code.=<<___   if ($flavour !~ /64/);
+       vldmia  sp!,{d8-d15}
+       ldmia   sp!,{r4-r8,pc}
+___
+$code.=<<___   if ($flavour =~ /64/);
+       ldr     x29,[sp],#16
+       ret
+___
+$code.=<<___;
+.size  ${prefix}_cbc_encrypt,.-${prefix}_cbc_encrypt
+___
+}}}
+{{{
+my ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4));
+my ($rounds,$cnt,$key_)=("w5","w6","x7");
+my ($ctr,$tctr0,$tctr1,$tctr2)=map("w$_",(8..10,12));
+my $step="x12";                # aliases with $tctr2
+
+my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$ivec,$rndlast)=map("q$_",(0..7));
+my ($dat2,$in2,$tmp2)=map("q$_",(10,11,9));
+
+my ($dat,$tmp)=($dat0,$tmp0);
+
+### q8-q15     preloaded key schedule
+
+$code.=<<___;
+.globl ${prefix}_ctr32_encrypt_blocks
+.type  ${prefix}_ctr32_encrypt_blocks,%function
+.align 5
+${prefix}_ctr32_encrypt_blocks:
+___
+$code.=<<___   if ($flavour =~ /64/);
+       stp             x29,x30,[sp,#-16]!
+       add             x29,sp,#0
+___
+$code.=<<___   if ($flavour !~ /64/);
+       mov             ip,sp
+       stmdb           sp!,{r4-r10,lr}
+       vstmdb          sp!,{d8-d15}            @ ABI specification says so
+       ldr             r4, [ip]                @ load remaining arg
+___
+$code.=<<___;
+       ldr             $rounds,[$key,#240]
+
+       ldr             $ctr, [$ivp, #12]
+       vld1.32         {$dat0},[$ivp]
+
+       vld1.32         {q8-q9},[$key]          // load key schedule...
+       sub             $rounds,$rounds,#4
+       mov             $step,#16
+       cmp             $len,#2
+       add             $key_,$key,x5,lsl#4     // pointer to last 5 round keys
+       sub             $rounds,$rounds,#2
+       vld1.32         {q12-q13},[$key_],#32
+       vld1.32         {q14-q15},[$key_],#32
+       vld1.32         {$rndlast},[$key_]
+       add             $key_,$key,#32
+       mov             $cnt,$rounds
+       cclr            $step,lo
+#ifndef __ARMEB__
+       rev             $ctr, $ctr
+#endif
+       vorr            $dat1,$dat0,$dat0
+       add             $tctr1, $ctr, #1
+       vorr            $dat2,$dat0,$dat0
+       add             $ctr, $ctr, #2
+       vorr            $ivec,$dat0,$dat0
+       rev             $tctr1, $tctr1
+       vmov.32         ${dat1}[3],$tctr1
+       b.ls            .Lctr32_tail
+       rev             $tctr2, $ctr
+       sub             $len,$len,#3            // bias
+       vmov.32         ${dat2}[3],$tctr2
+       b               .Loop3x_ctr32
+
+.align 4
+.Loop3x_ctr32:
+       aese            $dat0,q8
+       aesmc           $dat0,$dat0
+       aese            $dat1,q8
+       aesmc           $dat1,$dat1
+       aese            $dat2,q8
+       aesmc           $dat2,$dat2
+       vld1.32         {q8},[$key_],#16
+       subs            $cnt,$cnt,#2
+       aese            $dat0,q9
+       aesmc           $dat0,$dat0
+       aese            $dat1,q9
+       aesmc           $dat1,$dat1
+       aese            $dat2,q9
+       aesmc           $dat2,$dat2
+       vld1.32         {q9},[$key_],#16
+       b.gt            .Loop3x_ctr32
+
+       aese            $dat0,q8
+       aesmc           $tmp0,$dat0
+       aese            $dat1,q8
+       aesmc           $tmp1,$dat1
+        vld1.8         {$in0},[$inp],#16
+        vorr           $dat0,$ivec,$ivec
+       aese            $dat2,q8
+       aesmc           $dat2,$dat2
+        vld1.8         {$in1},[$inp],#16
+        vorr           $dat1,$ivec,$ivec
+       aese            $tmp0,q9
+       aesmc           $tmp0,$tmp0
+       aese            $tmp1,q9
+       aesmc           $tmp1,$tmp1
+        vld1.8         {$in2},[$inp],#16
+        mov            $key_,$key
+       aese            $dat2,q9
+       aesmc           $tmp2,$dat2
+        vorr           $dat2,$ivec,$ivec
+        add            $tctr0,$ctr,#1
+       aese            $tmp0,q12
+       aesmc           $tmp0,$tmp0
+       aese            $tmp1,q12
+       aesmc           $tmp1,$tmp1
+        veor           $in0,$in0,$rndlast
+        add            $tctr1,$ctr,#2
+       aese            $tmp2,q12
+       aesmc           $tmp2,$tmp2
+        veor           $in1,$in1,$rndlast
+        add            $ctr,$ctr,#3
+       aese            $tmp0,q13
+       aesmc           $tmp0,$tmp0
+       aese            $tmp1,q13
+       aesmc           $tmp1,$tmp1
+        veor           $in2,$in2,$rndlast
+        rev            $tctr0,$tctr0
+       aese            $tmp2,q13
+       aesmc           $tmp2,$tmp2
+        vmov.32        ${dat0}[3], $tctr0
+        rev            $tctr1,$tctr1
+       aese            $tmp0,q14
+       aesmc           $tmp0,$tmp0
+       aese            $tmp1,q14
+       aesmc           $tmp1,$tmp1
+        vmov.32        ${dat1}[3], $tctr1
+        rev            $tctr2,$ctr
+       aese            $tmp2,q14
+       aesmc           $tmp2,$tmp2
+        vmov.32        ${dat2}[3], $tctr2
+        subs           $len,$len,#3
+       aese            $tmp0,q15
+       aese            $tmp1,q15
+       aese            $tmp2,q15
+
+       veor            $in0,$in0,$tmp0
+        vld1.32         {q8},[$key_],#16       // re-pre-load rndkey[0]
+       vst1.8          {$in0},[$out],#16
+       veor            $in1,$in1,$tmp1
+        mov            $cnt,$rounds
+       vst1.8          {$in1},[$out],#16
+       veor            $in2,$in2,$tmp2
+        vld1.32         {q9},[$key_],#16       // re-pre-load rndkey[1]
+       vst1.8          {$in2},[$out],#16
+       b.hs            .Loop3x_ctr32
+
+       adds            $len,$len,#3
+       b.eq            .Lctr32_done
+       cmp             $len,#1
+       mov             $step,#16
+       cclr            $step,eq
+
+.Lctr32_tail:
+       aese            $dat0,q8
+       aesmc           $dat0,$dat0
+       aese            $dat1,q8
+       aesmc           $dat1,$dat1
+       vld1.32         {q8},[$key_],#16
+       subs            $cnt,$cnt,#2
+       aese            $dat0,q9
+       aesmc           $dat0,$dat0
+       aese            $dat1,q9
+       aesmc           $dat1,$dat1
+       vld1.32         {q9},[$key_],#16
+       b.gt            .Lctr32_tail
+
+       aese            $dat0,q8
+       aesmc           $dat0,$dat0
+       aese            $dat1,q8
+       aesmc           $dat1,$dat1
+       aese            $dat0,q9
+       aesmc           $dat0,$dat0
+       aese            $dat1,q9
+       aesmc           $dat1,$dat1
+        vld1.8         {$in0},[$inp],$step
+       aese            $dat0,q12
+       aesmc           $dat0,$dat0
+       aese            $dat1,q12
+       aesmc           $dat1,$dat1
+        vld1.8         {$in1},[$inp]
+       aese            $dat0,q13
+       aesmc           $dat0,$dat0
+       aese            $dat1,q13
+       aesmc           $dat1,$dat1
+        veor           $in0,$in0,$rndlast
+       aese            $dat0,q14
+       aesmc           $dat0,$dat0
+       aese            $dat1,q14
+       aesmc           $dat1,$dat1
+        veor           $in1,$in1,$rndlast
+       aese            $dat0,q15
+       aese            $dat1,q15
+
+       cmp             $len,#1
+       veor            $in0,$in0,$dat0
+       veor            $in1,$in1,$dat1
+       vst1.8          {$in0},[$out],#16
+       b.eq            .Lctr32_done
+       vst1.8          {$in1},[$out]
+
+.Lctr32_done:
+___
+$code.=<<___   if ($flavour !~ /64/);
+       vldmia          sp!,{d8-d15}
+       ldmia           sp!,{r4-r10,pc}
+___
+$code.=<<___   if ($flavour =~ /64/);
+       ldr             x29,[sp],#16
+       ret
+___
+$code.=<<___;
+.size  ${prefix}_ctr32_encrypt_blocks,.-${prefix}_ctr32_encrypt_blocks
+___
+}}}
+$code.=<<___;
+#endif
+___
+########################################
+if ($flavour =~ /64/) {                        ######## 64-bit code
+    my %opcode = (
+       "aesd"  =>      0x4e285800,     "aese"  =>      0x4e284800,
+       "aesimc"=>      0x4e287800,     "aesmc" =>      0x4e286800      );
+
+    local *unaes = sub {
+       my ($mnemonic,$arg)=@_;
+
+       $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)/o   &&
+       sprintf ".inst\t0x%08x\t//%s %s",
+                       $opcode{$mnemonic}|$1|($2<<5),
+                       $mnemonic,$arg;
+    };
+
+    foreach(split("\n",$code)) {
+       s/\`([^\`]*)\`/eval($1)/geo;
+
+       s/\bq([0-9]+)\b/"v".($1<8?$1:$1+8).".16b"/geo;  # old->new registers
+       s/@\s/\/\//o;                   # old->new style commentary
+
+       #s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo     or
+       s/cclr\s+([wx])([^,]+),\s*([a-z]+)/csel $1$2,$1zr,$1$2,$3/o     or
+       s/mov\.([a-z]+)\s+([wx][0-9]+),\s*([wx][0-9]+)/csel     $2,$3,$2,$1/o   or
+       s/vmov\.i8/movi/o       or      # fix up legacy mnemonics
+       s/vext\.8/ext/o         or
+       s/vrev32\.8/rev32/o     or
+       s/vtst\.8/cmtst/o       or
+       s/vshr/ushr/o           or
+       s/^(\s+)v/$1/o          or      # strip off v prefix
+       s/\bbx\s+lr\b/ret/o;
+
+       # fix up remainig legacy suffixes
+       s/\.[ui]?8//o;
+       m/\],#8/o and s/\.16b/\.8b/go;
+       s/\.[ui]?32//o and s/\.16b/\.4s/go;
+       s/\.[ui]?64//o and s/\.16b/\.2d/go;
+       s/\.[42]([sd])\[([0-3])\]/\.$1\[$2\]/o;
+
+       print $_,"\n";
+    }
+} else {                               ######## 32-bit code
+    my %opcode = (
+       "aesd"  =>      0xf3b00340,     "aese"  =>      0xf3b00300,
+       "aesimc"=>      0xf3b003c0,     "aesmc" =>      0xf3b00380      );
+
+    local *unaes = sub {
+       my ($mnemonic,$arg)=@_;
+
+       if ($arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)/o) {
+           my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
+                                        |(($2&7)<<1) |(($2&8)<<2);
+           # since ARMv7 instructions are always encoded little-endian.
+           # correct solution is to use .inst directive, but older
+           # assemblers don't implement it:-(
+           sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s",
+                       $word&0xff,($word>>8)&0xff,
+                       ($word>>16)&0xff,($word>>24)&0xff,
+                       $mnemonic,$arg;
+       }
+    };
+
+    sub unvtbl {
+       my $arg=shift;
+
+       $arg =~ m/q([0-9]+),\s*\{q([0-9]+)\},\s*q([0-9]+)/o &&
+       sprintf "vtbl.8 d%d,{q%d},d%d\n\t".
+               "vtbl.8 d%d,{q%d},d%d", 2*$1,$2,2*$3, 2*$1+1,$2,2*$3+1; 
+    }
+
+    sub unvdup32 {
+       my $arg=shift;
+
+       $arg =~ m/q([0-9]+),\s*q([0-9]+)\[([0-3])\]/o &&
+       sprintf "vdup.32        q%d,d%d[%d]",$1,2*$2+($3>>1),$3&1;      
+    }
+
+    sub unvmov32 {
+       my $arg=shift;
+
+       $arg =~ m/q([0-9]+)\[([0-3])\],(.*)/o &&
+       sprintf "vmov.32        d%d[%d],%s",2*$1+($2>>1),$2&1,$3;       
+    }
+
+    foreach(split("\n",$code)) {
+       s/\`([^\`]*)\`/eval($1)/geo;
+
+       s/\b[wx]([0-9]+)\b/r$1/go;              # new->old registers
+       s/\bv([0-9])\.[12468]+[bsd]\b/q$1/go;   # new->old registers
+       s/\/\/\s?/@ /o;                         # new->old style commentary
+
+       # fix up remainig new-style suffixes
+       s/\{q([0-9]+)\},\s*\[(.+)\],#8/sprintf "{d%d},[$2]!",2*$1/eo    or
+       s/\],#[0-9]+/]!/o;
+
+       s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo      or
+       s/cclr\s+([^,]+),\s*([a-z]+)/mov$2      $1,#0/o or
+       s/vtbl\.8\s+(.*)/unvtbl($1)/geo                 or
+       s/vdup\.32\s+(.*)/unvdup32($1)/geo              or
+       s/vmov\.32\s+(.*)/unvmov32($1)/geo              or
+       s/^(\s+)b\./$1b/o                               or
+       s/^(\s+)mov\./$1mov/o                           or
+       s/^(\s+)ret/$1bx\tlr/o;
+
+       print $_,"\n";
+    }
+}
+
+close STDOUT;
diff --git a/crypto/aes/asm/bsaes-armv7.pl b/crypto/aes/asm/bsaes-armv7.pl
new file mode 100644 (file)
index 0000000..fcc81d1
--- /dev/null
@@ -0,0 +1,2469 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+#
+# Specific modes and adaptation for Linux kernel by Ard Biesheuvel
+# <ard.biesheuvel@linaro.org>. Permission to use under GPL terms is
+# granted.
+# ====================================================================
+
+# Bit-sliced AES for ARM NEON
+#
+# February 2012.
+#
+# This implementation is direct adaptation of bsaes-x86_64 module for
+# ARM NEON. Except that this module is endian-neutral [in sense that
+# it can be compiled for either endianness] by courtesy of vld1.8's
+# neutrality. Initial version doesn't implement interface to OpenSSL,
+# only low-level primitives and unsupported entry points, just enough
+# to collect performance results, which for Cortex-A8 core are:
+#
+# encrypt      19.5 cycles per byte processed with 128-bit key
+# decrypt      22.1 cycles per byte processed with 128-bit key
+# key conv.    440  cycles per 128-bit key/0.18 of 8x block
+#
+# Snapdragon S4 encrypts byte in 17.6 cycles and decrypts in 19.7,
+# which is [much] worse than anticipated (for further details see
+# http://www.openssl.org/~appro/Snapdragon-S4.html).
+#
+# Cortex-A15 manages in 14.2/16.1 cycles [when integer-only code
+# manages in 20.0 cycles].
+#
+# When comparing to x86_64 results keep in mind that NEON unit is
+# [mostly] single-issue and thus can't [fully] benefit from
+# instruction-level parallelism. And when comparing to aes-armv4
+# results keep in mind key schedule conversion overhead (see
+# bsaes-x86_64.pl for further details)...
+#
+#                                              <appro@openssl.org>
+
+# April-August 2013
+#
+# Add CBC, CTR and XTS subroutines, adapt for kernel use.
+#
+#                                      <ard.biesheuvel@linaro.org>
+
+while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
+open STDOUT,">$output";
+
+my ($inp,$out,$len,$key)=("r0","r1","r2","r3");
+my @XMM=map("q$_",(0..15));
+
+{
+my ($key,$rounds,$const)=("r4","r5","r6");
+
+sub Dlo()   { shift=~m|q([1]?[0-9])|?"d".($1*2):"";     }
+sub Dhi()   { shift=~m|q([1]?[0-9])|?"d".($1*2+1):"";   }
+
+sub Sbox {
+# input in  lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
+# output in lsb > [b0, b1, b4, b6, b3, b7, b2, b5] < msb
+my @b=@_[0..7];
+my @t=@_[8..11];
+my @s=@_[12..15];
+       &InBasisChange  (@b);
+       &Inv_GF256      (@b[6,5,0,3,7,1,4,2],@t,@s);
+       &OutBasisChange (@b[7,1,4,2,6,5,0,3]);
+}
+
+sub InBasisChange {
+# input in  lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
+# output in lsb > [b6, b5, b0, b3, b7, b1, b4, b2] < msb 
+my @b=@_[0..7];
+$code.=<<___;
+       veor    @b[2], @b[2], @b[1]
+       veor    @b[5], @b[5], @b[6]
+       veor    @b[3], @b[3], @b[0]
+       veor    @b[6], @b[6], @b[2]
+       veor    @b[5], @b[5], @b[0]
+
+       veor    @b[6], @b[6], @b[3]
+       veor    @b[3], @b[3], @b[7]
+       veor    @b[7], @b[7], @b[5]
+       veor    @b[3], @b[3], @b[4]
+       veor    @b[4], @b[4], @b[5]
+
+       veor    @b[2], @b[2], @b[7]
+       veor    @b[3], @b[3], @b[1]
+       veor    @b[1], @b[1], @b[5]
+___
+}
+
+sub OutBasisChange {
+# input in  lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
+# output in lsb > [b6, b1, b2, b4, b7, b0, b3, b5] < msb
+my @b=@_[0..7];
+$code.=<<___;
+       veor    @b[0], @b[0], @b[6]
+       veor    @b[1], @b[1], @b[4]
+       veor    @b[4], @b[4], @b[6]
+       veor    @b[2], @b[2], @b[0]
+       veor    @b[6], @b[6], @b[1]
+
+       veor    @b[1], @b[1], @b[5]
+       veor    @b[5], @b[5], @b[3]
+       veor    @b[3], @b[3], @b[7]
+       veor    @b[7], @b[7], @b[5]
+       veor    @b[2], @b[2], @b[5]
+
+       veor    @b[4], @b[4], @b[7]
+___
+}
+
+sub InvSbox {
+# input in lsb         > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
+# output in lsb        > [b0, b1, b6, b4, b2, b7, b3, b5] < msb
+my @b=@_[0..7];
+my @t=@_[8..11];
+my @s=@_[12..15];
+       &InvInBasisChange       (@b);
+       &Inv_GF256              (@b[5,1,2,6,3,7,0,4],@t,@s);
+       &InvOutBasisChange      (@b[3,7,0,4,5,1,2,6]);
+}
+
+sub InvInBasisChange {         # OutBasisChange in reverse (with twist)
+my @b=@_[5,1,2,6,3,7,0,4];
+$code.=<<___
+        veor   @b[1], @b[1], @b[7]
+       veor    @b[4], @b[4], @b[7]
+
+       veor    @b[7], @b[7], @b[5]
+        veor   @b[1], @b[1], @b[3]
+       veor    @b[2], @b[2], @b[5]
+       veor    @b[3], @b[3], @b[7]
+
+       veor    @b[6], @b[6], @b[1]
+       veor    @b[2], @b[2], @b[0]
+        veor   @b[5], @b[5], @b[3]
+       veor    @b[4], @b[4], @b[6]
+       veor    @b[0], @b[0], @b[6]
+       veor    @b[1], @b[1], @b[4]
+___
+}
+
+sub InvOutBasisChange {                # InBasisChange in reverse
+my @b=@_[2,5,7,3,6,1,0,4];
+$code.=<<___;
+       veor    @b[1], @b[1], @b[5]
+       veor    @b[2], @b[2], @b[7]
+
+       veor    @b[3], @b[3], @b[1]
+       veor    @b[4], @b[4], @b[5]
+       veor    @b[7], @b[7], @b[5]
+       veor    @b[3], @b[3], @b[4]
+        veor   @b[5], @b[5], @b[0]
+       veor    @b[3], @b[3], @b[7]
+        veor   @b[6], @b[6], @b[2]
+        veor   @b[2], @b[2], @b[1]
+       veor    @b[6], @b[6], @b[3]
+
+       veor    @b[3], @b[3], @b[0]
+       veor    @b[5], @b[5], @b[6]
+___
+}
+
+sub Mul_GF4 {
+#;*************************************************************
+#;* Mul_GF4: Input x0-x1,y0-y1 Output x0-x1 Temp t0 (8) *
+#;*************************************************************
+my ($x0,$x1,$y0,$y1,$t0,$t1)=@_;
+$code.=<<___;
+       veor    $t0, $y0, $y1
+       vand    $t0, $t0, $x0
+       veor    $x0, $x0, $x1
+       vand    $t1, $x1, $y0
+       vand    $x0, $x0, $y1
+       veor    $x1, $t1, $t0
+       veor    $x0, $x0, $t1
+___
+}
+
+sub Mul_GF4_N {                                # not used, see next subroutine
+# multiply and scale by N
+my ($x0,$x1,$y0,$y1,$t0)=@_;
+$code.=<<___;
+       veor    $t0, $y0, $y1
+       vand    $t0, $t0, $x0
+       veor    $x0, $x0, $x1
+       vand    $x1, $x1, $y0
+       vand    $x0, $x0, $y1
+       veor    $x1, $x1, $x0
+       veor    $x0, $x0, $t0
+___
+}
+
+sub Mul_GF4_N_GF4 {
+# interleaved Mul_GF4_N and Mul_GF4
+my ($x0,$x1,$y0,$y1,$t0,
+    $x2,$x3,$y2,$y3,$t1)=@_;
+$code.=<<___;
+       veor    $t0, $y0, $y1
+        veor   $t1, $y2, $y3
+       vand    $t0, $t0, $x0
+        vand   $t1, $t1, $x2
+       veor    $x0, $x0, $x1
+        veor   $x2, $x2, $x3
+       vand    $x1, $x1, $y0
+        vand   $x3, $x3, $y2
+       vand    $x0, $x0, $y1
+        vand   $x2, $x2, $y3
+       veor    $x1, $x1, $x0
+        veor   $x2, $x2, $x3
+       veor    $x0, $x0, $t0
+        veor   $x3, $x3, $t1
+___
+}
+sub Mul_GF16_2 {
+my @x=@_[0..7];
+my @y=@_[8..11];
+my @t=@_[12..15];
+$code.=<<___;
+       veor    @t[0], @x[0], @x[2]
+       veor    @t[1], @x[1], @x[3]
+___
+       &Mul_GF4        (@x[0], @x[1], @y[0], @y[1], @t[2..3]);
+$code.=<<___;
+       veor    @y[0], @y[0], @y[2]
+       veor    @y[1], @y[1], @y[3]
+___
+       Mul_GF4_N_GF4   (@t[0], @t[1], @y[0], @y[1], @t[3],
+                        @x[2], @x[3], @y[2], @y[3], @t[2]);
+$code.=<<___;
+       veor    @x[0], @x[0], @t[0]
+       veor    @x[2], @x[2], @t[0]
+       veor    @x[1], @x[1], @t[1]
+       veor    @x[3], @x[3], @t[1]
+
+       veor    @t[0], @x[4], @x[6]
+       veor    @t[1], @x[5], @x[7]
+___
+       &Mul_GF4_N_GF4  (@t[0], @t[1], @y[0], @y[1], @t[3],
+                        @x[6], @x[7], @y[2], @y[3], @t[2]);
+$code.=<<___;
+       veor    @y[0], @y[0], @y[2]
+       veor    @y[1], @y[1], @y[3]
+___
+       &Mul_GF4        (@x[4], @x[5], @y[0], @y[1], @t[2..3]);
+$code.=<<___;
+       veor    @x[4], @x[4], @t[0]
+       veor    @x[6], @x[6], @t[0]
+       veor    @x[5], @x[5], @t[1]
+       veor    @x[7], @x[7], @t[1]
+___
+}
+sub Inv_GF256 {
+#;********************************************************************
+#;* Inv_GF256: Input x0-x7 Output x0-x7 Temp t0-t3,s0-s3 (144)       *
+#;********************************************************************
+my @x=@_[0..7];
+my @t=@_[8..11];
+my @s=@_[12..15];
+# direct optimizations from hardware
+$code.=<<___;
+       veor    @t[3], @x[4], @x[6]
+       veor    @t[2], @x[5], @x[7]
+       veor    @t[1], @x[1], @x[3]
+       veor    @s[1], @x[7], @x[6]
+        vmov   @t[0], @t[2]
+       veor    @s[0], @x[0], @x[2]
+
+       vorr    @t[2], @t[2], @t[1]
+       veor    @s[3], @t[3], @t[0]
+       vand    @s[2], @t[3], @s[0]
+       vorr    @t[3], @t[3], @s[0]
+       veor    @s[0], @s[0], @t[1]
+       vand    @t[0], @t[0], @t[1]
+       veor    @t[1], @x[3], @x[2]
+       vand    @s[3], @s[3], @s[0]
+       vand    @s[1], @s[1], @t[1]
+       veor    @t[1], @x[4], @x[5]
+       veor    @s[0], @x[1], @x[0]
+       veor    @t[3], @t[3], @s[1]
+       veor    @t[2], @t[2], @s[1]
+       vand    @s[1], @t[1], @s[0]
+       vorr    @t[1], @t[1], @s[0]
+       veor    @t[3], @t[3], @s[3]
+       veor    @t[0], @t[0], @s[1]
+       veor    @t[2], @t[2], @s[2]
+       veor    @t[1], @t[1], @s[3]
+       veor    @t[0], @t[0], @s[2]
+       vand    @s[0], @x[7], @x[3]
+       veor    @t[1], @t[1], @s[2]
+       vand    @s[1], @x[6], @x[2]
+       vand    @s[2], @x[5], @x[1]
+       vorr    @s[3], @x[4], @x[0]
+       veor    @t[3], @t[3], @s[0]
+       veor    @t[1], @t[1], @s[2]
+       veor    @t[0], @t[0], @s[3]
+       veor    @t[2], @t[2], @s[1]
+
+       @ Inv_GF16 \t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3
+
+       @ new smaller inversion
+
+       vand    @s[2], @t[3], @t[1]
+       vmov    @s[0], @t[0]
+
+       veor    @s[1], @t[2], @s[2]
+       veor    @s[3], @t[0], @s[2]
+       veor    @s[2], @t[0], @s[2]     @ @s[2]=@s[3]
+
+       vbsl    @s[1], @t[1], @t[0]
+       vbsl    @s[3], @t[3], @t[2]
+       veor    @t[3], @t[3], @t[2]
+
+       vbsl    @s[0], @s[1], @s[2]
+       vbsl    @t[0], @s[2], @s[1]
+
+       vand    @s[2], @s[0], @s[3]
+       veor    @t[1], @t[1], @t[0]
+
+       veor    @s[2], @s[2], @t[3]
+___
+# output in s3, s2, s1, t1
+
+# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \t2, \t3, \t0, \t1, \s0, \s1, \s2, \s3
+
+# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3
+       &Mul_GF16_2(@x,@s[3,2,1],@t[1],@s[0],@t[0,2,3]);
+
+### output msb > [x3,x2,x1,x0,x7,x6,x5,x4] < lsb
+}
+
+# AES linear components
+
+sub ShiftRows {
+my @x=@_[0..7];
+my @t=@_[8..11];
+my $mask=pop;
+$code.=<<___;
+       vldmia  $key!, {@t[0]-@t[3]}
+       veor    @t[0], @t[0], @x[0]
+       veor    @t[1], @t[1], @x[1]
+       vtbl.8  `&Dlo(@x[0])`, {@t[0]}, `&Dlo($mask)`
+       vtbl.8  `&Dhi(@x[0])`, {@t[0]}, `&Dhi($mask)`
+       vldmia  $key!, {@t[0]}
+       veor    @t[2], @t[2], @x[2]
+       vtbl.8  `&Dlo(@x[1])`, {@t[1]}, `&Dlo($mask)`
+       vtbl.8  `&Dhi(@x[1])`, {@t[1]}, `&Dhi($mask)`
+       vldmia  $key!, {@t[1]}
+       veor    @t[3], @t[3], @x[3]
+       vtbl.8  `&Dlo(@x[2])`, {@t[2]}, `&Dlo($mask)`
+       vtbl.8  `&Dhi(@x[2])`, {@t[2]}, `&Dhi($mask)`
+       vldmia  $key!, {@t[2]}
+       vtbl.8  `&Dlo(@x[3])`, {@t[3]}, `&Dlo($mask)`
+       vtbl.8  `&Dhi(@x[3])`, {@t[3]}, `&Dhi($mask)`
+       vldmia  $key!, {@t[3]}
+       veor    @t[0], @t[0], @x[4]
+       veor    @t[1], @t[1], @x[5]
+       vtbl.8  `&Dlo(@x[4])`, {@t[0]}, `&Dlo($mask)`
+       vtbl.8  `&Dhi(@x[4])`, {@t[0]}, `&Dhi($mask)`
+       veor    @t[2], @t[2], @x[6]
+       vtbl.8  `&Dlo(@x[5])`, {@t[1]}, `&Dlo($mask)`
+       vtbl.8  `&Dhi(@x[5])`, {@t[1]}, `&Dhi($mask)`
+       veor    @t[3], @t[3], @x[7]
+       vtbl.8  `&Dlo(@x[6])`, {@t[2]}, `&Dlo($mask)`
+       vtbl.8  `&Dhi(@x[6])`, {@t[2]}, `&Dhi($mask)`
+       vtbl.8  `&Dlo(@x[7])`, {@t[3]}, `&Dlo($mask)`
+       vtbl.8  `&Dhi(@x[7])`, {@t[3]}, `&Dhi($mask)`
+___
+}
+
+sub MixColumns {
+# modified to emit output in order suitable for feeding back to aesenc[last]
+my @x=@_[0..7];
+my @t=@_[8..15];
+my $inv=@_[16];        # optional
+$code.=<<___;
+       vext.8  @t[0], @x[0], @x[0], #12        @ x0 <<< 32
+       vext.8  @t[1], @x[1], @x[1], #12
+        veor   @x[0], @x[0], @t[0]             @ x0 ^ (x0 <<< 32)
+       vext.8  @t[2], @x[2], @x[2], #12
+        veor   @x[1], @x[1], @t[1]
+       vext.8  @t[3], @x[3], @x[3], #12
+        veor   @x[2], @x[2], @t[2]
+       vext.8  @t[4], @x[4], @x[4], #12
+        veor   @x[3], @x[3], @t[3]
+       vext.8  @t[5], @x[5], @x[5], #12
+        veor   @x[4], @x[4], @t[4]
+       vext.8  @t[6], @x[6], @x[6], #12
+        veor   @x[5], @x[5], @t[5]
+       vext.8  @t[7], @x[7], @x[7], #12
+        veor   @x[6], @x[6], @t[6]
+
+       veor    @t[1], @t[1], @x[0]
+        veor   @x[7], @x[7], @t[7]
+        vext.8 @x[0], @x[0], @x[0], #8         @ (x0 ^ (x0 <<< 32)) <<< 64)
+       veor    @t[2], @t[2], @x[1]
+       veor    @t[0], @t[0], @x[7]
+       veor    @t[1], @t[1], @x[7]
+        vext.8 @x[1], @x[1], @x[1], #8
+       veor    @t[5], @t[5], @x[4]
+        veor   @x[0], @x[0], @t[0]
+       veor    @t[6], @t[6], @x[5]
+        veor   @x[1], @x[1], @t[1]
+        vext.8 @t[0], @x[4], @x[4], #8
+       veor    @t[4], @t[4], @x[3]
+        vext.8 @t[1], @x[5], @x[5], #8
+       veor    @t[7], @t[7], @x[6]
+        vext.8 @x[4], @x[3], @x[3], #8
+       veor    @t[3], @t[3], @x[2]
+        vext.8 @x[5], @x[7], @x[7], #8
+       veor    @t[4], @t[4], @x[7]
+        vext.8 @x[3], @x[6], @x[6], #8
+       veor    @t[3], @t[3], @x[7]
+        vext.8 @x[6], @x[2], @x[2], #8
+       veor    @x[7], @t[1], @t[5]
+___
+$code.=<<___ if (!$inv);
+       veor    @x[2], @t[0], @t[4]
+       veor    @x[4], @x[4], @t[3]
+       veor    @x[5], @x[5], @t[7]
+       veor    @x[3], @x[3], @t[6]
+        @ vmov @x[2], @t[0]
+       veor    @x[6], @x[6], @t[2]
+        @ vmov @x[7], @t[1]
+___
+$code.=<<___ if ($inv);
+       veor    @t[3], @t[3], @x[4]
+       veor    @x[5], @x[5], @t[7]
+       veor    @x[2], @x[3], @t[6]
+       veor    @x[3], @t[0], @t[4]
+       veor    @x[4], @x[6], @t[2]
+       vmov    @x[6], @t[3]
+        @ vmov @x[7], @t[1]
+___
+}
+
+sub InvMixColumns_orig {
+my @x=@_[0..7];
+my @t=@_[8..15];
+
+$code.=<<___;
+       @ multiplication by 0x0e
+       vext.8  @t[7], @x[7], @x[7], #12
+       vmov    @t[2], @x[2]
+       veor    @x[2], @x[2], @x[5]             @ 2 5
+       veor    @x[7], @x[7], @x[5]             @ 7 5
+       vext.8  @t[0], @x[0], @x[0], #12
+       vmov    @t[5], @x[5]
+       veor    @x[5], @x[5], @x[0]             @ 5 0           [1]
+       veor    @x[0], @x[0], @x[1]             @ 0 1
+       vext.8  @t[1], @x[1], @x[1], #12
+       veor    @x[1], @x[1], @x[2]             @ 1 25
+       veor    @x[0], @x[0], @x[6]             @ 01 6          [2]
+       vext.8  @t[3], @x[3], @x[3], #12
+       veor    @x[1], @x[1], @x[3]             @ 125 3         [4]
+       veor    @x[2], @x[2], @x[0]             @ 25 016        [3]
+       veor    @x[3], @x[3], @x[7]             @ 3 75
+       veor    @x[7], @x[7], @x[6]             @ 75 6          [0]
+       vext.8  @t[6], @x[6], @x[6], #12
+       vmov    @t[4], @x[4]
+       veor    @x[6], @x[6], @x[4]             @ 6 4
+       veor    @x[4], @x[4], @x[3]             @ 4 375         [6]
+       veor    @x[3], @x[3], @x[7]             @ 375 756=36
+       veor    @x[6], @x[6], @t[5]             @ 64 5          [7]
+       veor    @x[3], @x[3], @t[2]             @ 36 2
+       vext.8  @t[5], @t[5], @t[5], #12
+       veor    @x[3], @x[3], @t[4]             @ 362 4         [5]
+___
+                                       my @y = @x[7,5,0,2,1,3,4,6];
+$code.=<<___;
+       @ multiplication by 0x0b
+       veor    @y[1], @y[1], @y[0]
+       veor    @y[0], @y[0], @t[0]
+       vext.8  @t[2], @t[2], @t[2], #12
+       veor    @y[1], @y[1], @t[1]
+       veor    @y[0], @y[0], @t[5]
+       vext.8  @t[4], @t[4], @t[4], #12
+       veor    @y[1], @y[1], @t[6]
+       veor    @y[0], @y[0], @t[7]
+       veor    @t[7], @t[7], @t[6]             @ clobber t[7]
+
+       veor    @y[3], @y[3], @t[0]
+        veor   @y[1], @y[1], @y[0]
+       vext.8  @t[0], @t[0], @t[0], #12
+       veor    @y[2], @y[2], @t[1]
+       veor    @y[4], @y[4], @t[1]
+       vext.8  @t[1], @t[1], @t[1], #12
+       veor    @y[2], @y[2], @t[2]
+       veor    @y[3], @y[3], @t[2]
+       veor    @y[5], @y[5], @t[2]
+       veor    @y[2], @y[2], @t[7]
+       vext.8  @t[2], @t[2], @t[2], #12
+       veor    @y[3], @y[3], @t[3]
+       veor    @y[6], @y[6], @t[3]
+       veor    @y[4], @y[4], @t[3]
+       veor    @y[7], @y[7], @t[4]
+       vext.8  @t[3], @t[3], @t[3], #12
+       veor    @y[5], @y[5], @t[4]
+       veor    @y[7], @y[7], @t[7]
+       veor    @t[7], @t[7], @t[5]             @ clobber t[7] even more
+       veor    @y[3], @y[3], @t[5]
+       veor    @y[4], @y[4], @t[4]
+
+       veor    @y[5], @y[5], @t[7]
+       vext.8  @t[4], @t[4], @t[4], #12
+       veor    @y[6], @y[6], @t[7]
+       veor    @y[4], @y[4], @t[7]
+
+       veor    @t[7], @t[7], @t[5]
+       vext.8  @t[5], @t[5], @t[5], #12
+
+       @ multiplication by 0x0d
+       veor    @y[4], @y[4], @y[7]
+        veor   @t[7], @t[7], @t[6]             @ restore t[7]
+       veor    @y[7], @y[7], @t[4]
+       vext.8  @t[6], @t[6], @t[6], #12
+       veor    @y[2], @y[2], @t[0]
+       veor    @y[7], @y[7], @t[5]
+       vext.8  @t[7], @t[7], @t[7], #12
+       veor    @y[2], @y[2], @t[2]
+
+       veor    @y[3], @y[3], @y[1]
+       veor    @y[1], @y[1], @t[1]
+       veor    @y[0], @y[0], @t[0]
+       veor    @y[3], @y[3], @t[0]
+       veor    @y[1], @y[1], @t[5]
+       veor    @y[0], @y[0], @t[5]
+       vext.8  @t[0], @t[0], @t[0], #12
+       veor    @y[1], @y[1], @t[7]
+       veor    @y[0], @y[0], @t[6]
+       veor    @y[3], @y[3], @y[1]
+       veor    @y[4], @y[4], @t[1]
+       vext.8  @t[1], @t[1], @t[1], #12
+
+       veor    @y[7], @y[7], @t[7]
+       veor    @y[4], @y[4], @t[2]
+       veor    @y[5], @y[5], @t[2]
+       veor    @y[2], @y[2], @t[6]
+       veor    @t[6], @t[6], @t[3]             @ clobber t[6]
+       vext.8  @t[2], @t[2], @t[2], #12
+       veor    @y[4], @y[4], @y[7]
+       veor    @y[3], @y[3], @t[6]
+
+       veor    @y[6], @y[6], @t[6]
+       veor    @y[5], @y[5], @t[5]
+       vext.8  @t[5], @t[5], @t[5], #12
+       veor    @y[6], @y[6], @t[4]
+       vext.8  @t[4], @t[4], @t[4], #12
+       veor    @y[5], @y[5], @t[6]
+       veor    @y[6], @y[6], @t[7]
+       vext.8  @t[7], @t[7], @t[7], #12
+       veor    @t[6], @t[6], @t[3]             @ restore t[6]
+       vext.8  @t[3], @t[3], @t[3], #12
+
+       @ multiplication by 0x09
+       veor    @y[4], @y[4], @y[1]
+       veor    @t[1], @t[1], @y[1]             @ t[1]=y[1]
+       veor    @t[0], @t[0], @t[5]             @ clobber t[0]
+       vext.8  @t[6], @t[6], @t[6], #12
+       veor    @t[1], @t[1], @t[5]
+       veor    @y[3], @y[3], @t[0]
+       veor    @t[0], @t[0], @y[0]             @ t[0]=y[0]
+       veor    @t[1], @t[1], @t[6]
+       veor    @t[6], @t[6], @t[7]             @ clobber t[6]
+       veor    @y[4], @y[4], @t[1]
+       veor    @y[7], @y[7], @t[4]
+       veor    @y[6], @y[6], @t[3]
+       veor    @y[5], @y[5], @t[2]
+       veor    @t[4], @t[4], @y[4]             @ t[4]=y[4]
+       veor    @t[3], @t[3], @y[3]             @ t[3]=y[3]
+       veor    @t[5], @t[5], @y[5]             @ t[5]=y[5]
+       veor    @t[2], @t[2], @y[2]             @ t[2]=y[2]
+       veor    @t[3], @t[3], @t[7]
+       veor    @XMM[5], @t[5], @t[6]
+       veor    @XMM[6], @t[6], @y[6]           @ t[6]=y[6]
+       veor    @XMM[2], @t[2], @t[6]
+       veor    @XMM[7], @t[7], @y[7]           @ t[7]=y[7]
+
+       vmov    @XMM[0], @t[0]
+       vmov    @XMM[1], @t[1]
+       @ vmov  @XMM[2], @t[2]
+       vmov    @XMM[3], @t[3]
+       vmov    @XMM[4], @t[4]
+       @ vmov  @XMM[5], @t[5]
+       @ vmov  @XMM[6], @t[6]
+       @ vmov  @XMM[7], @t[7]
+___
+}
+
+sub InvMixColumns {
+my @x=@_[0..7];
+my @t=@_[8..15];
+
+# Thanks to Jussi Kivilinna for providing pointer to
+#
+# | 0e 0b 0d 09 |   | 02 03 01 01 |   | 05 00 04 00 |
+# | 09 0e 0b 0d | = | 01 02 03 01 | x | 00 05 00 04 |
+# | 0d 09 0e 0b |   | 01 01 02 03 |   | 04 00 05 00 |
+# | 0b 0d 09 0e |   | 03 01 01 02 |   | 00 04 00 05 |
+
+$code.=<<___;
+       @ multiplication by 0x05-0x00-0x04-0x00
+       vext.8  @t[0], @x[0], @x[0], #8
+       vext.8  @t[6], @x[6], @x[6], #8
+       vext.8  @t[7], @x[7], @x[7], #8
+       veor    @t[0], @t[0], @x[0]
+       vext.8  @t[1], @x[1], @x[1], #8
+       veor    @t[6], @t[6], @x[6]
+       vext.8  @t[2], @x[2], @x[2], #8
+       veor    @t[7], @t[7], @x[7]
+       vext.8  @t[3], @x[3], @x[3], #8
+       veor    @t[1], @t[1], @x[1]
+       vext.8  @t[4], @x[4], @x[4], #8
+       veor    @t[2], @t[2], @x[2]
+       vext.8  @t[5], @x[5], @x[5], #8
+       veor    @t[3], @t[3], @x[3]
+       veor    @t[4], @t[4], @x[4]
+       veor    @t[5], @t[5], @x[5]
+
+        veor   @x[0], @x[0], @t[6]
+        veor   @x[1], @x[1], @t[6]
+        veor   @x[2], @x[2], @t[0]
+        veor   @x[4], @x[4], @t[2]
+        veor   @x[3], @x[3], @t[1]
+        veor   @x[1], @x[1], @t[7]
+        veor   @x[2], @x[2], @t[7]
+        veor   @x[4], @x[4], @t[6]
+        veor   @x[5], @x[5], @t[3]
+        veor   @x[3], @x[3], @t[6]
+        veor   @x[6], @x[6], @t[4]
+        veor   @x[4], @x[4], @t[7]
+        veor   @x[5], @x[5], @t[7]
+        veor   @x[7], @x[7], @t[5]
+___
+       &MixColumns     (@x,@t,1);      # flipped 2<->3 and 4<->6
+}
+
+sub swapmove {
+my ($a,$b,$n,$mask,$t)=@_;
+$code.=<<___;
+       vshr.u64        $t, $b, #$n
+       veor            $t, $t, $a
+       vand            $t, $t, $mask
+       veor            $a, $a, $t
+       vshl.u64        $t, $t, #$n
+       veor            $b, $b, $t
+___
+}
+sub swapmove2x {
+my ($a0,$b0,$a1,$b1,$n,$mask,$t0,$t1)=@_;
+$code.=<<___;
+       vshr.u64        $t0, $b0, #$n
+        vshr.u64       $t1, $b1, #$n
+       veor            $t0, $t0, $a0
+        veor           $t1, $t1, $a1
+       vand            $t0, $t0, $mask
+        vand           $t1, $t1, $mask
+       veor            $a0, $a0, $t0
+       vshl.u64        $t0, $t0, #$n
+        veor           $a1, $a1, $t1
+        vshl.u64       $t1, $t1, #$n
+       veor            $b0, $b0, $t0
+        veor           $b1, $b1, $t1
+___
+}
+
+sub bitslice {
+my @x=reverse(@_[0..7]);
+my ($t0,$t1,$t2,$t3)=@_[8..11];
+$code.=<<___;
+       vmov.i8 $t0,#0x55                       @ compose .LBS0
+       vmov.i8 $t1,#0x33                       @ compose .LBS1
+___
+       &swapmove2x(@x[0,1,2,3],1,$t0,$t2,$t3);
+       &swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3);
+$code.=<<___;
+       vmov.i8 $t0,#0x0f                       @ compose .LBS2
+___
+       &swapmove2x(@x[0,2,1,3],2,$t1,$t2,$t3);
+       &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3);
+
+       &swapmove2x(@x[0,4,1,5],4,$t0,$t2,$t3);
+       &swapmove2x(@x[2,6,3,7],4,$t0,$t2,$t3);
+}
+
+$code.=<<___;
+#ifndef __KERNEL__
+# include "arm_arch.h"
+
+# define VFP_ABI_PUSH  vstmdb  sp!,{d8-d15}
+# define VFP_ABI_POP   vldmia  sp!,{d8-d15}
+# define VFP_ABI_FRAME 0x40
+#else
+# define VFP_ABI_PUSH
+# define VFP_ABI_POP
+# define VFP_ABI_FRAME 0
+# define BSAES_ASM_EXTENDED_KEY
+# define XTS_CHAIN_TWEAK
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__
+#endif
+
+#ifdef __thumb__
+# define adrl adr
+#endif
+
+#if __ARM_MAX_ARCH__>=7
+.arch  armv7-a
+.fpu   neon
+
+.text
+.syntax        unified         @ ARMv7-capable assembler is expected to handle this
+#ifdef __thumb2__
+.thumb
+#else
+.code   32
+#endif
+
+.type  _bsaes_decrypt8,%function
+.align 4
+_bsaes_decrypt8:
+       adr     $const,_bsaes_decrypt8
+       vldmia  $key!, {@XMM[9]}                @ round 0 key
+       add     $const,$const,#.LM0ISR-_bsaes_decrypt8
+
+       vldmia  $const!, {@XMM[8]}              @ .LM0ISR
+       veor    @XMM[10], @XMM[0], @XMM[9]      @ xor with round0 key
+       veor    @XMM[11], @XMM[1], @XMM[9]
+        vtbl.8 `&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])`
+        vtbl.8 `&Dhi(@XMM[0])`, {@XMM[10]}, `&Dhi(@XMM[8])`
+       veor    @XMM[12], @XMM[2], @XMM[9]
+        vtbl.8 `&Dlo(@XMM[1])`, {@XMM[11]}, `&Dlo(@XMM[8])`
+        vtbl.8 `&Dhi(@XMM[1])`, {@XMM[11]}, `&Dhi(@XMM[8])`
+       veor    @XMM[13], @XMM[3], @XMM[9]
+        vtbl.8 `&Dlo(@XMM[2])`, {@XMM[12]}, `&Dlo(@XMM[8])`
+        vtbl.8 `&Dhi(@XMM[2])`, {@XMM[12]}, `&Dhi(@XMM[8])`
+       veor    @XMM[14], @XMM[4], @XMM[9]
+        vtbl.8 `&Dlo(@XMM[3])`, {@XMM[13]}, `&Dlo(@XMM[8])`
+        vtbl.8 `&Dhi(@XMM[3])`, {@XMM[13]}, `&Dhi(@XMM[8])`
+       veor    @XMM[15], @XMM[5], @XMM[9]
+        vtbl.8 `&Dlo(@XMM[4])`, {@XMM[14]}, `&Dlo(@XMM[8])`
+        vtbl.8 `&Dhi(@XMM[4])`, {@XMM[14]}, `&Dhi(@XMM[8])`
+       veor    @XMM[10], @XMM[6], @XMM[9]
+        vtbl.8 `&Dlo(@XMM[5])`, {@XMM[15]}, `&Dlo(@XMM[8])`
+        vtbl.8 `&Dhi(@XMM[5])`, {@XMM[15]}, `&Dhi(@XMM[8])`
+       veor    @XMM[11], @XMM[7], @XMM[9]
+        vtbl.8 `&Dlo(@XMM[6])`, {@XMM[10]}, `&Dlo(@XMM[8])`
+        vtbl.8 `&Dhi(@XMM[6])`, {@XMM[10]}, `&Dhi(@XMM[8])`
+        vtbl.8 `&Dlo(@XMM[7])`, {@XMM[11]}, `&Dlo(@XMM[8])`
+        vtbl.8 `&Dhi(@XMM[7])`, {@XMM[11]}, `&Dhi(@XMM[8])`
+___
+       &bitslice       (@XMM[0..7, 8..11]);
+$code.=<<___;
+       sub     $rounds,$rounds,#1
+       b       .Ldec_sbox
+.align 4
+.Ldec_loop:
+___
+       &ShiftRows      (@XMM[0..7, 8..12]);
+$code.=".Ldec_sbox:\n";
+       &InvSbox        (@XMM[0..7, 8..15]);
+$code.=<<___;
+       subs    $rounds,$rounds,#1
+       bcc     .Ldec_done
+___
+       &InvMixColumns  (@XMM[0,1,6,4,2,7,3,5, 8..15]);
+$code.=<<___;
+       vldmia  $const, {@XMM[12]}              @ .LISR
+       ite     eq                              @ Thumb2 thing, sanity check in ARM
+       addeq   $const,$const,#0x10
+       bne     .Ldec_loop
+       vldmia  $const, {@XMM[12]}              @ .LISRM0
+       b       .Ldec_loop
+.align 4
+.Ldec_done:
+___
+       &bitslice       (@XMM[0,1,6,4,2,7,3,5, 8..11]);
+$code.=<<___;
+       vldmia  $key, {@XMM[8]}                 @ last round key
+       veor    @XMM[6], @XMM[6], @XMM[8]
+       veor    @XMM[4], @XMM[4], @XMM[8]
+       veor    @XMM[2], @XMM[2], @XMM[8]
+       veor    @XMM[7], @XMM[7], @XMM[8]
+       veor    @XMM[3], @XMM[3], @XMM[8]
+       veor    @XMM[5], @XMM[5], @XMM[8]
+       veor    @XMM[0], @XMM[0], @XMM[8]
+       veor    @XMM[1], @XMM[1], @XMM[8]
+       bx      lr
+.size  _bsaes_decrypt8,.-_bsaes_decrypt8
+
+.type  _bsaes_const,%object
+.align 6
+_bsaes_const:
+.LM0ISR:       @ InvShiftRows constants
+       .quad   0x0a0e0206070b0f03, 0x0004080c0d010509
+.LISR:
+       .quad   0x0504070602010003, 0x0f0e0d0c080b0a09
+.LISRM0:
+       .quad   0x01040b0e0205080f, 0x0306090c00070a0d
+.LM0SR:                @ ShiftRows constants
+       .quad   0x0a0e02060f03070b, 0x0004080c05090d01
+.LSR:
+       .quad   0x0504070600030201, 0x0f0e0d0c0a09080b
+.LSRM0:
+       .quad   0x0304090e00050a0f, 0x01060b0c0207080d
+.LM0:
+       .quad   0x02060a0e03070b0f, 0x0004080c0105090d
+.LREVM0SR:
+       .quad   0x090d01050c000408, 0x03070b0f060a0e02
+.asciz "Bit-sliced AES for NEON, CRYPTOGAMS by <appro\@openssl.org>"
+.align 6
+.size  _bsaes_const,.-_bsaes_const
+
+.type  _bsaes_encrypt8,%function
+.align 4
+_bsaes_encrypt8:
+       adr     $const,_bsaes_encrypt8
+       vldmia  $key!, {@XMM[9]}                @ round 0 key
+       sub     $const,$const,#_bsaes_encrypt8-.LM0SR
+
+       vldmia  $const!, {@XMM[8]}              @ .LM0SR
+_bsaes_encrypt8_alt:
+       veor    @XMM[10], @XMM[0], @XMM[9]      @ xor with round0 key
+       veor    @XMM[11], @XMM[1], @XMM[9]
+        vtbl.8 `&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])`
+        vtbl.8 `&Dhi(@XMM[0])`, {@XMM[10]}, `&Dhi(@XMM[8])`
+       veor    @XMM[12], @XMM[2], @XMM[9]
+        vtbl.8 `&Dlo(@XMM[1])`, {@XMM[11]}, `&Dlo(@XMM[8])`
+        vtbl.8 `&Dhi(@XMM[1])`, {@XMM[11]}, `&Dhi(@XMM[8])`
+       veor    @XMM[13], @XMM[3], @XMM[9]
+        vtbl.8 `&Dlo(@XMM[2])`, {@XMM[12]}, `&Dlo(@XMM[8])`
+        vtbl.8 `&Dhi(@XMM[2])`, {@XMM[12]}, `&Dhi(@XMM[8])`
+       veor    @XMM[14], @XMM[4], @XMM[9]
+        vtbl.8 `&Dlo(@XMM[3])`, {@XMM[13]}, `&Dlo(@XMM[8])`
+        vtbl.8 `&Dhi(@XMM[3])`, {@XMM[13]}, `&Dhi(@XMM[8])`
+       veor    @XMM[15], @XMM[5], @XMM[9]
+        vtbl.8 `&Dlo(@XMM[4])`, {@XMM[14]}, `&Dlo(@XMM[8])`
+        vtbl.8 `&Dhi(@XMM[4])`, {@XMM[14]}, `&Dhi(@XMM[8])`
+       veor    @XMM[10], @XMM[6], @XMM[9]
+        vtbl.8 `&Dlo(@XMM[5])`, {@XMM[15]}, `&Dlo(@XMM[8])`
+        vtbl.8 `&Dhi(@XMM[5])`, {@XMM[15]}, `&Dhi(@XMM[8])`
+       veor    @XMM[11], @XMM[7], @XMM[9]
+        vtbl.8 `&Dlo(@XMM[6])`, {@XMM[10]}, `&Dlo(@XMM[8])`
+        vtbl.8 `&Dhi(@XMM[6])`, {@XMM[10]}, `&Dhi(@XMM[8])`
+        vtbl.8 `&Dlo(@XMM[7])`, {@XMM[11]}, `&Dlo(@XMM[8])`
+        vtbl.8 `&Dhi(@XMM[7])`, {@XMM[11]}, `&Dhi(@XMM[8])`
+_bsaes_encrypt8_bitslice:
+___
+       &bitslice       (@XMM[0..7, 8..11]);
+$code.=<<___;
+       sub     $rounds,$rounds,#1
+       b       .Lenc_sbox
+.align 4
+.Lenc_loop:
+___
+       &ShiftRows      (@XMM[0..7, 8..12]);
+$code.=".Lenc_sbox:\n";
+       &Sbox           (@XMM[0..7, 8..15]);
+$code.=<<___;
+       subs    $rounds,$rounds,#1
+       bcc     .Lenc_done
+___
+       &MixColumns     (@XMM[0,1,4,6,3,7,2,5, 8..15]);
+$code.=<<___;
+       vldmia  $const, {@XMM[12]}              @ .LSR
+       ite     eq                              @ Thumb2 thing, samity check in ARM
+       addeq   $const,$const,#0x10
+       bne     .Lenc_loop
+       vldmia  $const, {@XMM[12]}              @ .LSRM0
+       b       .Lenc_loop
+.align 4
+.Lenc_done:
+___
+       # output in lsb > [t0, t1, t4, t6, t3, t7, t2, t5] < msb
+       &bitslice       (@XMM[0,1,4,6,3,7,2,5, 8..11]);
+$code.=<<___;
+       vldmia  $key, {@XMM[8]}                 @ last round key
+       veor    @XMM[4], @XMM[4], @XMM[8]
+       veor    @XMM[6], @XMM[6], @XMM[8]
+       veor    @XMM[3], @XMM[3], @XMM[8]
+       veor    @XMM[7], @XMM[7], @XMM[8]
+       veor    @XMM[2], @XMM[2], @XMM[8]
+       veor    @XMM[5], @XMM[5], @XMM[8]
+       veor    @XMM[0], @XMM[0], @XMM[8]
+       veor    @XMM[1], @XMM[1], @XMM[8]
+       bx      lr
+.size  _bsaes_encrypt8,.-_bsaes_encrypt8
+___
+}
+{
+my ($out,$inp,$rounds,$const)=("r12","r4","r5","r6");
+
+sub bitslice_key {
+my @x=reverse(@_[0..7]);
+my ($bs0,$bs1,$bs2,$t2,$t3)=@_[8..12];
+
+       &swapmove       (@x[0,1],1,$bs0,$t2,$t3);
+$code.=<<___;
+       @ &swapmove(@x[2,3],1,$t0,$t2,$t3);
+       vmov    @x[2], @x[0]
+       vmov    @x[3], @x[1]
+___
+       #&swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3);
+
+       &swapmove2x     (@x[0,2,1,3],2,$bs1,$t2,$t3);
+$code.=<<___;
+       @ &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3);
+       vmov    @x[4], @x[0]
+       vmov    @x[6], @x[2]
+       vmov    @x[5], @x[1]
+       vmov    @x[7], @x[3]
+___
+       &swapmove2x     (@x[0,4,1,5],4,$bs2,$t2,$t3);
+       &swapmove2x     (@x[2,6,3,7],4,$bs2,$t2,$t3);
+}
+
+$code.=<<___;
+.type  _bsaes_key_convert,%function
+.align 4
+_bsaes_key_convert:
+       adr     $const,_bsaes_key_convert
+       vld1.8  {@XMM[7]},  [$inp]!             @ load round 0 key
+       sub     $const,$const,#_bsaes_key_convert-.LM0
+       vld1.8  {@XMM[15]}, [$inp]!             @ load round 1 key
+
+       vmov.i8 @XMM[8],  #0x01                 @ bit masks
+       vmov.i8 @XMM[9],  #0x02
+       vmov.i8 @XMM[10], #0x04
+       vmov.i8 @XMM[11], #0x08
+       vmov.i8 @XMM[12], #0x10
+       vmov.i8 @XMM[13], #0x20
+       vldmia  $const, {@XMM[14]}              @ .LM0
+
+#ifdef __ARMEL__
+       vrev32.8        @XMM[7],  @XMM[7]
+       vrev32.8        @XMM[15], @XMM[15]
+#endif
+       sub     $rounds,$rounds,#1
+       vstmia  $out!, {@XMM[7]}                @ save round 0 key
+       b       .Lkey_loop
+
+.align 4
+.Lkey_loop:
+       vtbl.8  `&Dlo(@XMM[7])`,{@XMM[15]},`&Dlo(@XMM[14])`
+       vtbl.8  `&Dhi(@XMM[7])`,{@XMM[15]},`&Dhi(@XMM[14])`
+       vmov.i8 @XMM[6],  #0x40
+       vmov.i8 @XMM[15], #0x80
+
+       vtst.8  @XMM[0], @XMM[7], @XMM[8]
+       vtst.8  @XMM[1], @XMM[7], @XMM[9]
+       vtst.8  @XMM[2], @XMM[7], @XMM[10]
+       vtst.8  @XMM[3], @XMM[7], @XMM[11]
+       vtst.8  @XMM[4], @XMM[7], @XMM[12]
+       vtst.8  @XMM[5], @XMM[7], @XMM[13]
+       vtst.8  @XMM[6], @XMM[7], @XMM[6]
+       vtst.8  @XMM[7], @XMM[7], @XMM[15]
+       vld1.8  {@XMM[15]}, [$inp]!             @ load next round key
+       vmvn    @XMM[0], @XMM[0]                @ "pnot"
+       vmvn    @XMM[1], @XMM[1]
+       vmvn    @XMM[5], @XMM[5]
+       vmvn    @XMM[6], @XMM[6]
+#ifdef __ARMEL__
+       vrev32.8        @XMM[15], @XMM[15]
+#endif
+       subs    $rounds,$rounds,#1
+       vstmia  $out!,{@XMM[0]-@XMM[7]}         @ write bit-sliced round key
+       bne     .Lkey_loop
+
+       vmov.i8 @XMM[7],#0x63                   @ compose .L63
+       @ don't save last round key
+       bx      lr
+.size  _bsaes_key_convert,.-_bsaes_key_convert
+___
+}
+
+if (0) {               # following four functions are unsupported interface
+                       # used for benchmarking...
+$code.=<<___;
+.globl bsaes_enc_key_convert
+.type  bsaes_enc_key_convert,%function
+.align 4
+bsaes_enc_key_convert:
+       stmdb   sp!,{r4-r6,lr}
+       vstmdb  sp!,{d8-d15}            @ ABI specification says so
+
+       ldr     r5,[$inp,#240]                  @ pass rounds
+       mov     r4,$inp                         @ pass key
+       mov     r12,$out                        @ pass key schedule
+       bl      _bsaes_key_convert
+       veor    @XMM[7],@XMM[7],@XMM[15]        @ fix up last round key
+       vstmia  r12, {@XMM[7]}                  @ save last round key
+
+       vldmia  sp!,{d8-d15}
+       ldmia   sp!,{r4-r6,pc}
+.size  bsaes_enc_key_convert,.-bsaes_enc_key_convert
+
+.globl bsaes_encrypt_128
+.type  bsaes_encrypt_128,%function
+.align 4
+bsaes_encrypt_128:
+       stmdb   sp!,{r4-r6,lr}
+       vstmdb  sp!,{d8-d15}            @ ABI specification says so
+.Lenc128_loop:
+       vld1.8  {@XMM[0]-@XMM[1]}, [$inp]!      @ load input
+       vld1.8  {@XMM[2]-@XMM[3]}, [$inp]!
+       mov     r4,$key                         @ pass the key
+       vld1.8  {@XMM[4]-@XMM[5]}, [$inp]!
+       mov     r5,#10                          @ pass rounds
+       vld1.8  {@XMM[6]-@XMM[7]}, [$inp]!
+
+       bl      _bsaes_encrypt8
+
+       vst1.8  {@XMM[0]-@XMM[1]}, [$out]!      @ write output
+       vst1.8  {@XMM[4]}, [$out]!
+       vst1.8  {@XMM[6]}, [$out]!
+       vst1.8  {@XMM[3]}, [$out]!
+       vst1.8  {@XMM[7]}, [$out]!
+       vst1.8  {@XMM[2]}, [$out]!
+       subs    $len,$len,#0x80
+       vst1.8  {@XMM[5]}, [$out]!
+       bhi     .Lenc128_loop
+
+       vldmia  sp!,{d8-d15}
+       ldmia   sp!,{r4-r6,pc}
+.size  bsaes_encrypt_128,.-bsaes_encrypt_128
+
+.globl bsaes_dec_key_convert
+.type  bsaes_dec_key_convert,%function
+.align 4
+bsaes_dec_key_convert:
+       stmdb   sp!,{r4-r6,lr}
+       vstmdb  sp!,{d8-d15}            @ ABI specification says so
+
+       ldr     r5,[$inp,#240]                  @ pass rounds
+       mov     r4,$inp                         @ pass key
+       mov     r12,$out                        @ pass key schedule
+       bl      _bsaes_key_convert
+       vldmia  $out, {@XMM[6]}
+       vstmia  r12,  {@XMM[15]}                @ save last round key
+       veor    @XMM[7], @XMM[7], @XMM[6]       @ fix up round 0 key
+       vstmia  $out, {@XMM[7]}
+
+       vldmia  sp!,{d8-d15}
+       ldmia   sp!,{r4-r6,pc}
+.size  bsaes_dec_key_convert,.-bsaes_dec_key_convert
+
+.globl bsaes_decrypt_128
+.type  bsaes_decrypt_128,%function
+.align 4
+bsaes_decrypt_128:
+       stmdb   sp!,{r4-r6,lr}
+       vstmdb  sp!,{d8-d15}            @ ABI specification says so
+.Ldec128_loop:
+       vld1.8  {@XMM[0]-@XMM[1]}, [$inp]!      @ load input
+       vld1.8  {@XMM[2]-@XMM[3]}, [$inp]!
+       mov     r4,$key                         @ pass the key
+       vld1.8  {@XMM[4]-@XMM[5]}, [$inp]!
+       mov     r5,#10                          @ pass rounds
+       vld1.8  {@XMM[6]-@XMM[7]}, [$inp]!
+
+       bl      _bsaes_decrypt8
+
+       vst1.8  {@XMM[0]-@XMM[1]}, [$out]!      @ write output
+       vst1.8  {@XMM[6]}, [$out]!
+       vst1.8  {@XMM[4]}, [$out]!
+       vst1.8  {@XMM[2]}, [$out]!
+       vst1.8  {@XMM[7]}, [$out]!
+       vst1.8  {@XMM[3]}, [$out]!
+       subs    $len,$len,#0x80
+       vst1.8  {@XMM[5]}, [$out]!
+       bhi     .Ldec128_loop
+
+       vldmia  sp!,{d8-d15}
+       ldmia   sp!,{r4-r6,pc}
+.size  bsaes_decrypt_128,.-bsaes_decrypt_128
+___
+}
+{
+my ($inp,$out,$len,$key, $ivp,$fp,$rounds)=map("r$_",(0..3,8..10));
+my ($keysched)=("sp");
+
+$code.=<<___;
+.extern AES_cbc_encrypt
+.extern AES_decrypt
+
+.global        bsaes_cbc_encrypt
+.type  bsaes_cbc_encrypt,%function
+.align 5
+bsaes_cbc_encrypt:
+#ifndef        __KERNEL__
+       cmp     $len, #128
+#ifndef        __thumb__
+       blo     AES_cbc_encrypt
+#else
+       bhs     1f
+       b       AES_cbc_encrypt
+1:
+#endif
+#endif
+
+       @ it is up to the caller to make sure we are called with enc == 0
+
+       mov     ip, sp
+       stmdb   sp!, {r4-r10, lr}
+       VFP_ABI_PUSH
+       ldr     $ivp, [ip]                      @ IV is 1st arg on the stack
+       mov     $len, $len, lsr#4               @ len in 16 byte blocks
+       sub     sp, #0x10                       @ scratch space to carry over the IV
+       mov     $fp, sp                         @ save sp
+
+       ldr     $rounds, [$key, #240]           @ get # of rounds
+#ifndef        BSAES_ASM_EXTENDED_KEY
+       @ allocate the key schedule on the stack
+       sub     r12, sp, $rounds, lsl#7         @ 128 bytes per inner round key
+       add     r12, #`128-32`                  @ sifze of bit-slices key schedule
+
+       @ populate the key schedule
+       mov     r4, $key                        @ pass key
+       mov     r5, $rounds                     @ pass # of rounds
+       mov     sp, r12                         @ sp is $keysched
+       bl      _bsaes_key_convert
+       vldmia  $keysched, {@XMM[6]}
+       vstmia  r12,  {@XMM[15]}                @ save last round key
+       veor    @XMM[7], @XMM[7], @XMM[6]       @ fix up round 0 key
+       vstmia  $keysched, {@XMM[7]}
+#else
+       ldr     r12, [$key, #244]
+       eors    r12, #1
+       beq     0f
+
+       @ populate the key schedule
+       str     r12, [$key, #244]
+       mov     r4, $key                        @ pass key
+       mov     r5, $rounds                     @ pass # of rounds
+       add     r12, $key, #248                 @ pass key schedule
+       bl      _bsaes_key_convert
+       add     r4, $key, #248
+       vldmia  r4, {@XMM[6]}
+       vstmia  r12, {@XMM[15]}                 @ save last round key
+       veor    @XMM[7], @XMM[7], @XMM[6]       @ fix up round 0 key
+       vstmia  r4, {@XMM[7]}
+
+.align 2
+0:
+#endif
+
+       vld1.8  {@XMM[15]}, [$ivp]              @ load IV
+       b       .Lcbc_dec_loop
+
+.align 4
+.Lcbc_dec_loop:
+       subs    $len, $len, #0x8
+       bmi     .Lcbc_dec_loop_finish
+
+       vld1.8  {@XMM[0]-@XMM[1]}, [$inp]!      @ load input
+       vld1.8  {@XMM[2]-@XMM[3]}, [$inp]!
+#ifndef        BSAES_ASM_EXTENDED_KEY
+       mov     r4, $keysched                   @ pass the key
+#else
+       add     r4, $key, #248
+#endif
+       vld1.8  {@XMM[4]-@XMM[5]}, [$inp]!
+       mov     r5, $rounds
+       vld1.8  {@XMM[6]-@XMM[7]}, [$inp]
+       sub     $inp, $inp, #0x60
+       vstmia  $fp, {@XMM[15]}                 @ put aside IV
+
+       bl      _bsaes_decrypt8
+
+       vldmia  $fp, {@XMM[14]}                 @ reload IV
+       vld1.8  {@XMM[8]-@XMM[9]}, [$inp]!      @ reload input
+       veor    @XMM[0], @XMM[0], @XMM[14]      @ ^= IV
+       vld1.8  {@XMM[10]-@XMM[11]}, [$inp]!
+       veor    @XMM[1], @XMM[1], @XMM[8]
+       veor    @XMM[6], @XMM[6], @XMM[9]
+       vld1.8  {@XMM[12]-@XMM[13]}, [$inp]!
+       veor    @XMM[4], @XMM[4], @XMM[10]
+       veor    @XMM[2], @XMM[2], @XMM[11]
+       vld1.8  {@XMM[14]-@XMM[15]}, [$inp]!
+       veor    @XMM[7], @XMM[7], @XMM[12]
+       vst1.8  {@XMM[0]-@XMM[1]}, [$out]!      @ write output
+       veor    @XMM[3], @XMM[3], @XMM[13]
+       vst1.8  {@XMM[6]}, [$out]!
+       veor    @XMM[5], @XMM[5], @XMM[14]
+       vst1.8  {@XMM[4]}, [$out]!
+       vst1.8  {@XMM[2]}, [$out]!
+       vst1.8  {@XMM[7]}, [$out]!
+       vst1.8  {@XMM[3]}, [$out]!
+       vst1.8  {@XMM[5]}, [$out]!
+
+       b       .Lcbc_dec_loop
+
+.Lcbc_dec_loop_finish:
+       adds    $len, $len, #8
+       beq     .Lcbc_dec_done
+
+       vld1.8  {@XMM[0]}, [$inp]!              @ load input
+       cmp     $len, #2
+       blo     .Lcbc_dec_one
+       vld1.8  {@XMM[1]}, [$inp]!
+#ifndef        BSAES_ASM_EXTENDED_KEY
+       mov     r4, $keysched                   @ pass the key
+#else
+       add     r4, $key, #248
+#endif
+       mov     r5, $rounds
+       vstmia  $fp, {@XMM[15]}                 @ put aside IV
+       beq     .Lcbc_dec_two
+       vld1.8  {@XMM[2]}, [$inp]!
+       cmp     $len, #4
+       blo     .Lcbc_dec_three
+       vld1.8  {@XMM[3]}, [$inp]!
+       beq     .Lcbc_dec_four
+       vld1.8  {@XMM[4]}, [$inp]!
+       cmp     $len, #6
+       blo     .Lcbc_dec_five
+       vld1.8  {@XMM[5]}, [$inp]!
+       beq     .Lcbc_dec_six
+       vld1.8  {@XMM[6]}, [$inp]!
+       sub     $inp, $inp, #0x70
+
+       bl      _bsaes_decrypt8
+
+       vldmia  $fp, {@XMM[14]}                 @ reload IV
+       vld1.8  {@XMM[8]-@XMM[9]}, [$inp]!      @ reload input
+       veor    @XMM[0], @XMM[0], @XMM[14]      @ ^= IV
+       vld1.8  {@XMM[10]-@XMM[11]}, [$inp]!
+       veor    @XMM[1], @XMM[1], @XMM[8]
+       veor    @XMM[6], @XMM[6], @XMM[9]
+       vld1.8  {@XMM[12]-@XMM[13]}, [$inp]!
+       veor    @XMM[4], @XMM[4], @XMM[10]
+       veor    @XMM[2], @XMM[2], @XMM[11]
+       vld1.8  {@XMM[15]}, [$inp]!
+       veor    @XMM[7], @XMM[7], @XMM[12]
+       vst1.8  {@XMM[0]-@XMM[1]}, [$out]!      @ write output
+       veor    @XMM[3], @XMM[3], @XMM[13]
+       vst1.8  {@XMM[6]}, [$out]!
+       vst1.8  {@XMM[4]}, [$out]!
+       vst1.8  {@XMM[2]}, [$out]!
+       vst1.8  {@XMM[7]}, [$out]!
+       vst1.8  {@XMM[3]}, [$out]!
+       b       .Lcbc_dec_done
+.align 4
+.Lcbc_dec_six:
+       sub     $inp, $inp, #0x60
+       bl      _bsaes_decrypt8
+       vldmia  $fp,{@XMM[14]}                  @ reload IV
+       vld1.8  {@XMM[8]-@XMM[9]}, [$inp]!      @ reload input
+       veor    @XMM[0], @XMM[0], @XMM[14]      @ ^= IV
+       vld1.8  {@XMM[10]-@XMM[11]}, [$inp]!
+       veor    @XMM[1], @XMM[1], @XMM[8]
+       veor    @XMM[6], @XMM[6], @XMM[9]
+       vld1.8  {@XMM[12]}, [$inp]!
+       veor    @XMM[4], @XMM[4], @XMM[10]
+       veor    @XMM[2], @XMM[2], @XMM[11]
+       vld1.8  {@XMM[15]}, [$inp]!
+       veor    @XMM[7], @XMM[7], @XMM[12]
+       vst1.8  {@XMM[0]-@XMM[1]}, [$out]!      @ write output
+       vst1.8  {@XMM[6]}, [$out]!
+       vst1.8  {@XMM[4]}, [$out]!
+       vst1.8  {@XMM[2]}, [$out]!
+       vst1.8  {@XMM[7]}, [$out]!
+       b       .Lcbc_dec_done
+.align 4
+.Lcbc_dec_five:
+       sub     $inp, $inp, #0x50
+       bl      _bsaes_decrypt8
+       vldmia  $fp, {@XMM[14]}                 @ reload IV
+       vld1.8  {@XMM[8]-@XMM[9]}, [$inp]!      @ reload input
+       veor    @XMM[0], @XMM[0], @XMM[14]      @ ^= IV
+       vld1.8  {@XMM[10]-@XMM[11]}, [$inp]!
+       veor    @XMM[1], @XMM[1], @XMM[8]
+       veor    @XMM[6], @XMM[6], @XMM[9]
+       vld1.8  {@XMM[15]}, [$inp]!
+       veor    @XMM[4], @XMM[4], @XMM[10]
+       vst1.8  {@XMM[0]-@XMM[1]}, [$out]!      @ write output
+       veor    @XMM[2], @XMM[2], @XMM[11]
+       vst1.8  {@XMM[6]}, [$out]!
+       vst1.8  {@XMM[4]}, [$out]!
+       vst1.8  {@XMM[2]}, [$out]!
+       b       .Lcbc_dec_done
+.align 4
+.Lcbc_dec_four:
+       sub     $inp, $inp, #0x40
+       bl      _bsaes_decrypt8
+       vldmia  $fp, {@XMM[14]}                 @ reload IV
+       vld1.8  {@XMM[8]-@XMM[9]}, [$inp]!      @ reload input
+       veor    @XMM[0], @XMM[0], @XMM[14]      @ ^= IV
+       vld1.8  {@XMM[10]}, [$inp]!
+       veor    @XMM[1], @XMM[1], @XMM[8]
+       veor    @XMM[6], @XMM[6], @XMM[9]
+       vld1.8  {@XMM[15]}, [$inp]!
+       veor    @XMM[4], @XMM[4], @XMM[10]
+       vst1.8  {@XMM[0]-@XMM[1]}, [$out]!      @ write output
+       vst1.8  {@XMM[6]}, [$out]!
+       vst1.8  {@XMM[4]}, [$out]!
+       b       .Lcbc_dec_done
+.align 4
+.Lcbc_dec_three:
+       sub     $inp, $inp, #0x30
+       bl      _bsaes_decrypt8
+       vldmia  $fp, {@XMM[14]}                 @ reload IV
+       vld1.8  {@XMM[8]-@XMM[9]}, [$inp]!      @ reload input
+       veor    @XMM[0], @XMM[0], @XMM[14]      @ ^= IV
+       vld1.8  {@XMM[15]}, [$inp]!
+       veor    @XMM[1], @XMM[1], @XMM[8]
+       veor    @XMM[6], @XMM[6], @XMM[9]
+       vst1.8  {@XMM[0]-@XMM[1]}, [$out]!      @ write output
+       vst1.8  {@XMM[6]}, [$out]!
+       b       .Lcbc_dec_done
+.align 4
+.Lcbc_dec_two:
+       sub     $inp, $inp, #0x20
+       bl      _bsaes_decrypt8
+       vldmia  $fp, {@XMM[14]}                 @ reload IV
+       vld1.8  {@XMM[8]}, [$inp]!              @ reload input
+       veor    @XMM[0], @XMM[0], @XMM[14]      @ ^= IV
+       vld1.8  {@XMM[15]}, [$inp]!             @ reload input
+       veor    @XMM[1], @XMM[1], @XMM[8]
+       vst1.8  {@XMM[0]-@XMM[1]}, [$out]!      @ write output
+       b       .Lcbc_dec_done
+.align 4
+.Lcbc_dec_one:
+       sub     $inp, $inp, #0x10
+       mov     $rounds, $out                   @ save original out pointer
+       mov     $out, $fp                       @ use the iv scratch space as out buffer
+       mov     r2, $key
+       vmov    @XMM[4],@XMM[15]                @ just in case ensure that IV
+       vmov    @XMM[5],@XMM[0]                 @ and input are preserved
+       bl      AES_decrypt
+       vld1.8  {@XMM[0]}, [$fp,:64]            @ load result
+       veor    @XMM[0], @XMM[0], @XMM[4]       @ ^= IV
+       vmov    @XMM[15], @XMM[5]               @ @XMM[5] holds input
+       vst1.8  {@XMM[0]}, [$rounds]            @ write output
+
+.Lcbc_dec_done:
+#ifndef        BSAES_ASM_EXTENDED_KEY
+       vmov.i32        q0, #0
+       vmov.i32        q1, #0
+.Lcbc_dec_bzero:                               @ wipe key schedule [if any]
+       vstmia          $keysched!, {q0-q1}
+       cmp             $keysched, $fp
+       bne             .Lcbc_dec_bzero
+#endif
+
+       mov     sp, $fp
+       add     sp, #0x10                       @ add sp,$fp,#0x10 is no good for thumb
+       vst1.8  {@XMM[15]}, [$ivp]              @ return IV
+       VFP_ABI_POP
+       ldmia   sp!, {r4-r10, pc}
+.size  bsaes_cbc_encrypt,.-bsaes_cbc_encrypt
+___
+}
+{
+my ($inp,$out,$len,$key, $ctr,$fp,$rounds)=(map("r$_",(0..3,8..10)));
+my $const = "r6";      # shared with _bsaes_encrypt8_alt
+my $keysched = "sp";
+
+$code.=<<___;
+.extern        AES_encrypt
+.global        bsaes_ctr32_encrypt_blocks
+.type  bsaes_ctr32_encrypt_blocks,%function
+.align 5
+bsaes_ctr32_encrypt_blocks:
+       cmp     $len, #8                        @ use plain AES for
+       blo     .Lctr_enc_short                 @ small sizes
+
+       mov     ip, sp
+       stmdb   sp!, {r4-r10, lr}
+       VFP_ABI_PUSH
+       ldr     $ctr, [ip]                      @ ctr is 1st arg on the stack
+       sub     sp, sp, #0x10                   @ scratch space to carry over the ctr
+       mov     $fp, sp                         @ save sp
+
+       ldr     $rounds, [$key, #240]           @ get # of rounds
+#ifndef        BSAES_ASM_EXTENDED_KEY
+       @ allocate the key schedule on the stack
+       sub     r12, sp, $rounds, lsl#7         @ 128 bytes per inner round key
+       add     r12, #`128-32`                  @ size of bit-sliced key schedule
+
+       @ populate the key schedule
+       mov     r4, $key                        @ pass key
+       mov     r5, $rounds                     @ pass # of rounds
+       mov     sp, r12                         @ sp is $keysched
+       bl      _bsaes_key_convert
+       veor    @XMM[7],@XMM[7],@XMM[15]        @ fix up last round key
+       vstmia  r12, {@XMM[7]}                  @ save last round key
+
+       vld1.8  {@XMM[0]}, [$ctr]               @ load counter
+       add     $ctr, $const, #.LREVM0SR-.LM0   @ borrow $ctr
+       vldmia  $keysched, {@XMM[4]}            @ load round0 key
+#else
+       ldr     r12, [$key, #244]
+       eors    r12, #1
+       beq     0f
+
+       @ populate the key schedule
+       str     r12, [$key, #244]
+       mov     r4, $key                        @ pass key
+       mov     r5, $rounds                     @ pass # of rounds
+       add     r12, $key, #248                 @ pass key schedule
+       bl      _bsaes_key_convert
+       veor    @XMM[7],@XMM[7],@XMM[15]        @ fix up last round key
+       vstmia  r12, {@XMM[7]}                  @ save last round key
+
+.align 2
+0:     add     r12, $key, #248
+       vld1.8  {@XMM[0]}, [$ctr]               @ load counter
+       adrl    $ctr, .LREVM0SR                 @ borrow $ctr
+       vldmia  r12, {@XMM[4]}                  @ load round0 key
+       sub     sp, #0x10                       @ place for adjusted round0 key
+#endif
+
+       vmov.i32        @XMM[8],#1              @ compose 1<<96
+       veor            @XMM[9],@XMM[9],@XMM[9]
+       vrev32.8        @XMM[0],@XMM[0]
+       vext.8          @XMM[8],@XMM[9],@XMM[8],#4
+       vrev32.8        @XMM[4],@XMM[4]
+       vadd.u32        @XMM[9],@XMM[8],@XMM[8] @ compose 2<<96
+       vstmia  $keysched, {@XMM[4]}            @ save adjusted round0 key
+       b       .Lctr_enc_loop
+
+.align 4
+.Lctr_enc_loop:
+       vadd.u32        @XMM[10], @XMM[8], @XMM[9]      @ compose 3<<96
+       vadd.u32        @XMM[1], @XMM[0], @XMM[8]       @ +1
+       vadd.u32        @XMM[2], @XMM[0], @XMM[9]       @ +2
+       vadd.u32        @XMM[3], @XMM[0], @XMM[10]      @ +3
+       vadd.u32        @XMM[4], @XMM[1], @XMM[10]
+       vadd.u32        @XMM[5], @XMM[2], @XMM[10]
+       vadd.u32        @XMM[6], @XMM[3], @XMM[10]
+       vadd.u32        @XMM[7], @XMM[4], @XMM[10]
+       vadd.u32        @XMM[10], @XMM[5], @XMM[10]     @ next counter
+
+       @ Borrow prologue from _bsaes_encrypt8 to use the opportunity
+       @ to flip byte order in 32-bit counter
+
+       vldmia          $keysched, {@XMM[9]}            @ load round0 key
+#ifndef        BSAES_ASM_EXTENDED_KEY
+       add             r4, $keysched, #0x10            @ pass next round key
+#else
+       add             r4, $key, #`248+16`
+#endif
+       vldmia          $ctr, {@XMM[8]}                 @ .LREVM0SR
+       mov             r5, $rounds                     @ pass rounds
+       vstmia          $fp, {@XMM[10]}                 @ save next counter
+       sub             $const, $ctr, #.LREVM0SR-.LSR   @ pass constants
+
+       bl              _bsaes_encrypt8_alt
+
+       subs            $len, $len, #8
+       blo             .Lctr_enc_loop_done
+
+       vld1.8          {@XMM[8]-@XMM[9]}, [$inp]!      @ load input
+       vld1.8          {@XMM[10]-@XMM[11]}, [$inp]!
+       veor            @XMM[0], @XMM[8]
+       veor            @XMM[1], @XMM[9]
+       vld1.8          {@XMM[12]-@XMM[13]}, [$inp]!
+       veor            @XMM[4], @XMM[10]
+       veor            @XMM[6], @XMM[11]
+       vld1.8          {@XMM[14]-@XMM[15]}, [$inp]!
+       veor            @XMM[3], @XMM[12]
+       vst1.8          {@XMM[0]-@XMM[1]}, [$out]!      @ write output
+       veor            @XMM[7], @XMM[13]
+       veor            @XMM[2], @XMM[14]
+       vst1.8          {@XMM[4]}, [$out]!
+       veor            @XMM[5], @XMM[15]
+       vst1.8          {@XMM[6]}, [$out]!
+       vmov.i32        @XMM[8], #1                     @ compose 1<<96
+       vst1.8          {@XMM[3]}, [$out]!
+       veor            @XMM[9], @XMM[9], @XMM[9]
+       vst1.8          {@XMM[7]}, [$out]!
+       vext.8          @XMM[8], @XMM[9], @XMM[8], #4
+       vst1.8          {@XMM[2]}, [$out]!
+       vadd.u32        @XMM[9],@XMM[8],@XMM[8]         @ compose 2<<96
+       vst1.8          {@XMM[5]}, [$out]!
+       vldmia          $fp, {@XMM[0]}                  @ load counter
+
+       bne             .Lctr_enc_loop
+       b               .Lctr_enc_done
+
+.align 4
+.Lctr_enc_loop_done:
+       add             $len, $len, #8
+       vld1.8          {@XMM[8]}, [$inp]!      @ load input
+       veor            @XMM[0], @XMM[8]
+       vst1.8          {@XMM[0]}, [$out]!      @ write output
+       cmp             $len, #2
+       blo             .Lctr_enc_done
+       vld1.8          {@XMM[9]}, [$inp]!
+       veor            @XMM[1], @XMM[9]
+       vst1.8          {@XMM[1]}, [$out]!
+       beq             .Lctr_enc_done
+       vld1.8          {@XMM[10]}, [$inp]!
+       veor            @XMM[4], @XMM[10]
+       vst1.8          {@XMM[4]}, [$out]!
+       cmp             $len, #4
+       blo             .Lctr_enc_done
+       vld1.8          {@XMM[11]}, [$inp]!
+       veor            @XMM[6], @XMM[11]
+       vst1.8          {@XMM[6]}, [$out]!
+       beq             .Lctr_enc_done
+       vld1.8          {@XMM[12]}, [$inp]!
+       veor            @XMM[3], @XMM[12]
+       vst1.8          {@XMM[3]}, [$out]!
+       cmp             $len, #6
+       blo             .Lctr_enc_done
+       vld1.8          {@XMM[13]}, [$inp]!
+       veor            @XMM[7], @XMM[13]
+       vst1.8          {@XMM[7]}, [$out]!
+       beq             .Lctr_enc_done
+       vld1.8          {@XMM[14]}, [$inp]
+       veor            @XMM[2], @XMM[14]
+       vst1.8          {@XMM[2]}, [$out]!
+
+.Lctr_enc_done:
+       vmov.i32        q0, #0
+       vmov.i32        q1, #0
+#ifndef        BSAES_ASM_EXTENDED_KEY
+.Lctr_enc_bzero:                       @ wipe key schedule [if any]
+       vstmia          $keysched!, {q0-q1}
+       cmp             $keysched, $fp
+       bne             .Lctr_enc_bzero
+#else
+       vstmia          $keysched, {q0-q1}
+#endif
+
+       mov     sp, $fp
+       add     sp, #0x10               @ add sp,$fp,#0x10 is no good for thumb
+       VFP_ABI_POP
+       ldmia   sp!, {r4-r10, pc}       @ return
+
+.align 4
+.Lctr_enc_short:
+       ldr     ip, [sp]                @ ctr pointer is passed on stack
+       stmdb   sp!, {r4-r8, lr}
+
+       mov     r4, $inp                @ copy arguments
+       mov     r5, $out
+       mov     r6, $len
+       mov     r7, $key
+       ldr     r8, [ip, #12]           @ load counter LSW
+       vld1.8  {@XMM[1]}, [ip]         @ load whole counter value
+#ifdef __ARMEL__
+       rev     r8, r8
+#endif
+       sub     sp, sp, #0x10
+       vst1.8  {@XMM[1]}, [sp,:64]     @ copy counter value
+       sub     sp, sp, #0x10
+
+.Lctr_enc_short_loop:
+       add     r0, sp, #0x10           @ input counter value
+       mov     r1, sp                  @ output on the stack
+       mov     r2, r7                  @ key
+
+       bl      AES_encrypt
+
+       vld1.8  {@XMM[0]}, [r4]!        @ load input
+       vld1.8  {@XMM[1]}, [sp,:64]     @ load encrypted counter
+       add     r8, r8, #1
+#ifdef __ARMEL__
+       rev     r0, r8
+       str     r0, [sp, #0x1c]         @ next counter value
+#else
+       str     r8, [sp, #0x1c]         @ next counter value
+#endif
+       veor    @XMM[0],@XMM[0],@XMM[1]
+       vst1.8  {@XMM[0]}, [r5]!        @ store output
+       subs    r6, r6, #1
+       bne     .Lctr_enc_short_loop
+
+       vmov.i32        q0, #0
+       vmov.i32        q1, #0
+       vstmia          sp!, {q0-q1}
+
+       ldmia   sp!, {r4-r8, pc}
+.size  bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks
+___
+}
+{
+######################################################################
+# void bsaes_xts_[en|de]crypt(const char *inp,char *out,size_t len,
+#      const AES_KEY *key1, const AES_KEY *key2,
+#      const unsigned char iv[16]);
+#
+my ($inp,$out,$len,$key,$rounds,$magic,$fp)=(map("r$_",(7..10,1..3)));
+my $const="r6";                # returned by _bsaes_key_convert
+my $twmask=@XMM[5];
+my @T=@XMM[6..7];
+
+$code.=<<___;
+.globl bsaes_xts_encrypt
+.type  bsaes_xts_encrypt,%function
+.align 4
+bsaes_xts_encrypt:
+       mov     ip, sp
+       stmdb   sp!, {r4-r10, lr}               @ 0x20
+       VFP_ABI_PUSH
+       mov     r6, sp                          @ future $fp
+
+       mov     $inp, r0
+       mov     $out, r1
+       mov     $len, r2
+       mov     $key, r3
+
+       sub     r0, sp, #0x10                   @ 0x10
+       bic     r0, #0xf                        @ align at 16 bytes
+       mov     sp, r0
+
+#ifdef XTS_CHAIN_TWEAK
+       ldr     r0, [ip]                        @ pointer to input tweak
+#else
+       @ generate initial tweak
+       ldr     r0, [ip, #4]                    @ iv[]
+       mov     r1, sp
+       ldr     r2, [ip, #0]                    @ key2
+       bl      AES_encrypt
+       mov     r0,sp                           @ pointer to initial tweak
+#endif
+
+       ldr     $rounds, [$key, #240]           @ get # of rounds
+       mov     $fp, r6
+#ifndef        BSAES_ASM_EXTENDED_KEY
+       @ allocate the key schedule on the stack
+       sub     r12, sp, $rounds, lsl#7         @ 128 bytes per inner round key
+       @ add   r12, #`128-32`                  @ size of bit-sliced key schedule
+       sub     r12, #`32+16`                   @ place for tweak[9]
+
+       @ populate the key schedule
+       mov     r4, $key                        @ pass key
+       mov     r5, $rounds                     @ pass # of rounds
+       mov     sp, r12
+       add     r12, #0x90                      @ pass key schedule
+       bl      _bsaes_key_convert
+       veor    @XMM[7], @XMM[7], @XMM[15]      @ fix up last round key
+       vstmia  r12, {@XMM[7]}                  @ save last round key
+#else
+       ldr     r12, [$key, #244]
+       eors    r12, #1
+       beq     0f
+
+       str     r12, [$key, #244]
+       mov     r4, $key                        @ pass key
+       mov     r5, $rounds                     @ pass # of rounds
+       add     r12, $key, #248                 @ pass key schedule
+       bl      _bsaes_key_convert
+       veor    @XMM[7], @XMM[7], @XMM[15]      @ fix up last round key
+       vstmia  r12, {@XMM[7]}
+
+.align 2
+0:     sub     sp, #0x90                       @ place for tweak[9]
+#endif
+
+       vld1.8  {@XMM[8]}, [r0]                 @ initial tweak
+       adr     $magic, .Lxts_magic
+
+       subs    $len, #0x80
+       blo     .Lxts_enc_short
+       b       .Lxts_enc_loop
+
+.align 4
+.Lxts_enc_loop:
+       vldmia          $magic, {$twmask}       @ load XTS magic
+       vshr.s64        @T[0], @XMM[8], #63
+       mov             r0, sp
+       vand            @T[0], @T[0], $twmask
+___
+for($i=9;$i<16;$i++) {
+$code.=<<___;
+       vadd.u64        @XMM[$i], @XMM[$i-1], @XMM[$i-1]
+       vst1.64         {@XMM[$i-1]}, [r0,:128]!
+       vswp            `&Dhi("@T[0]")`,`&Dlo("@T[0]")`
+       vshr.s64        @T[1], @XMM[$i], #63
+       veor            @XMM[$i], @XMM[$i], @T[0]
+       vand            @T[1], @T[1], $twmask
+___
+       @T=reverse(@T);
+
+$code.=<<___ if ($i>=10);
+       vld1.8          {@XMM[$i-10]}, [$inp]!
+___
+$code.=<<___ if ($i>=11);
+       veor            @XMM[$i-11], @XMM[$i-11], @XMM[$i-3]
+___
+}
+$code.=<<___;
+       vadd.u64        @XMM[8], @XMM[15], @XMM[15]
+       vst1.64         {@XMM[15]}, [r0,:128]!
+       vswp            `&Dhi("@T[0]")`,`&Dlo("@T[0]")`
+       veor            @XMM[8], @XMM[8], @T[0]
+       vst1.64         {@XMM[8]}, [r0,:128]            @ next round tweak
+
+       vld1.8          {@XMM[6]-@XMM[7]}, [$inp]!
+       veor            @XMM[5], @XMM[5], @XMM[13]
+#ifndef        BSAES_ASM_EXTENDED_KEY
+       add             r4, sp, #0x90                   @ pass key schedule
+#else
+       add             r4, $key, #248                  @ pass key schedule
+#endif
+       veor            @XMM[6], @XMM[6], @XMM[14]
+       mov             r5, $rounds                     @ pass rounds
+       veor            @XMM[7], @XMM[7], @XMM[15]
+       mov             r0, sp
+
+       bl              _bsaes_encrypt8
+
+       vld1.64         {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+       vld1.64         {@XMM[10]-@XMM[11]}, [r0,:128]!
+       veor            @XMM[0], @XMM[0], @XMM[ 8]
+       vld1.64         {@XMM[12]-@XMM[13]}, [r0,:128]!
+       veor            @XMM[1], @XMM[1], @XMM[ 9]
+       veor            @XMM[8], @XMM[4], @XMM[10]
+       vst1.8          {@XMM[0]-@XMM[1]}, [$out]!
+       veor            @XMM[9], @XMM[6], @XMM[11]
+       vld1.64         {@XMM[14]-@XMM[15]}, [r0,:128]!
+       veor            @XMM[10], @XMM[3], @XMM[12]
+       vst1.8          {@XMM[8]-@XMM[9]}, [$out]!
+       veor            @XMM[11], @XMM[7], @XMM[13]
+       veor            @XMM[12], @XMM[2], @XMM[14]
+       vst1.8          {@XMM[10]-@XMM[11]}, [$out]!
+       veor            @XMM[13], @XMM[5], @XMM[15]
+       vst1.8          {@XMM[12]-@XMM[13]}, [$out]!
+
+       vld1.64         {@XMM[8]}, [r0,:128]            @ next round tweak
+
+       subs            $len, #0x80
+       bpl             .Lxts_enc_loop
+
+.Lxts_enc_short:
+       adds            $len, #0x70
+       bmi             .Lxts_enc_done
+
+       vldmia          $magic, {$twmask}       @ load XTS magic
+       vshr.s64        @T[0], @XMM[8], #63
+       mov             r0, sp
+       vand            @T[0], @T[0], $twmask
+___
+for($i=9;$i<16;$i++) {
+$code.=<<___;
+       vadd.u64        @XMM[$i], @XMM[$i-1], @XMM[$i-1]
+       vst1.64         {@XMM[$i-1]}, [r0,:128]!
+       vswp            `&Dhi("@T[0]")`,`&Dlo("@T[0]")`
+       vshr.s64        @T[1], @XMM[$i], #63
+       veor            @XMM[$i], @XMM[$i], @T[0]
+       vand            @T[1], @T[1], $twmask
+___
+       @T=reverse(@T);
+
+$code.=<<___ if ($i>=10);
+       vld1.8          {@XMM[$i-10]}, [$inp]!
+       subs            $len, #0x10
+       bmi             .Lxts_enc_`$i-9`
+___
+$code.=<<___ if ($i>=11);
+       veor            @XMM[$i-11], @XMM[$i-11], @XMM[$i-3]
+___
+}
+$code.=<<___;
+       sub             $len, #0x10
+       vst1.64         {@XMM[15]}, [r0,:128]           @ next round tweak
+
+       vld1.8          {@XMM[6]}, [$inp]!
+       veor            @XMM[5], @XMM[5], @XMM[13]
+#ifndef        BSAES_ASM_EXTENDED_KEY
+       add             r4, sp, #0x90                   @ pass key schedule
+#else
+       add             r4, $key, #248                  @ pass key schedule
+#endif
+       veor            @XMM[6], @XMM[6], @XMM[14]
+       mov             r5, $rounds                     @ pass rounds
+       mov             r0, sp
+
+       bl              _bsaes_encrypt8
+
+       vld1.64         {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+       vld1.64         {@XMM[10]-@XMM[11]}, [r0,:128]!
+       veor            @XMM[0], @XMM[0], @XMM[ 8]
+       vld1.64         {@XMM[12]-@XMM[13]}, [r0,:128]!
+       veor            @XMM[1], @XMM[1], @XMM[ 9]
+       veor            @XMM[8], @XMM[4], @XMM[10]
+       vst1.8          {@XMM[0]-@XMM[1]}, [$out]!
+       veor            @XMM[9], @XMM[6], @XMM[11]
+       vld1.64         {@XMM[14]}, [r0,:128]!
+       veor            @XMM[10], @XMM[3], @XMM[12]
+       vst1.8          {@XMM[8]-@XMM[9]}, [$out]!
+       veor            @XMM[11], @XMM[7], @XMM[13]
+       veor            @XMM[12], @XMM[2], @XMM[14]
+       vst1.8          {@XMM[10]-@XMM[11]}, [$out]!
+       vst1.8          {@XMM[12]}, [$out]!
+
+       vld1.64         {@XMM[8]}, [r0,:128]            @ next round tweak
+       b               .Lxts_enc_done
+.align 4
+.Lxts_enc_6:
+       vst1.64         {@XMM[14]}, [r0,:128]           @ next round tweak
+
+       veor            @XMM[4], @XMM[4], @XMM[12]
+#ifndef        BSAES_ASM_EXTENDED_KEY
+       add             r4, sp, #0x90                   @ pass key schedule
+#else
+       add             r4, $key, #248                  @ pass key schedule
+#endif
+       veor            @XMM[5], @XMM[5], @XMM[13]
+       mov             r5, $rounds                     @ pass rounds
+       mov             r0, sp
+
+       bl              _bsaes_encrypt8
+
+       vld1.64         {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+       vld1.64         {@XMM[10]-@XMM[11]}, [r0,:128]!
+       veor            @XMM[0], @XMM[0], @XMM[ 8]
+       vld1.64         {@XMM[12]-@XMM[13]}, [r0,:128]!
+       veor            @XMM[1], @XMM[1], @XMM[ 9]
+       veor            @XMM[8], @XMM[4], @XMM[10]
+       vst1.8          {@XMM[0]-@XMM[1]}, [$out]!
+       veor            @XMM[9], @XMM[6], @XMM[11]
+       veor            @XMM[10], @XMM[3], @XMM[12]
+       vst1.8          {@XMM[8]-@XMM[9]}, [$out]!
+       veor            @XMM[11], @XMM[7], @XMM[13]
+       vst1.8          {@XMM[10]-@XMM[11]}, [$out]!
+
+       vld1.64         {@XMM[8]}, [r0,:128]            @ next round tweak
+       b               .Lxts_enc_done
+
+@ put this in range for both ARM and Thumb mode adr instructions
+.align 5
+.Lxts_magic:
+       .quad   1, 0x87
+
+.align 5
+.Lxts_enc_5:
+       vst1.64         {@XMM[13]}, [r0,:128]           @ next round tweak
+
+       veor            @XMM[3], @XMM[3], @XMM[11]
+#ifndef        BSAES_ASM_EXTENDED_KEY
+       add             r4, sp, #0x90                   @ pass key schedule
+#else
+       add             r4, $key, #248                  @ pass key schedule
+#endif
+       veor            @XMM[4], @XMM[4], @XMM[12]
+       mov             r5, $rounds                     @ pass rounds
+       mov             r0, sp
+
+       bl              _bsaes_encrypt8
+
+       vld1.64         {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+       vld1.64         {@XMM[10]-@XMM[11]}, [r0,:128]!
+       veor            @XMM[0], @XMM[0], @XMM[ 8]
+       vld1.64         {@XMM[12]}, [r0,:128]!
+       veor            @XMM[1], @XMM[1], @XMM[ 9]
+       veor            @XMM[8], @XMM[4], @XMM[10]
+       vst1.8          {@XMM[0]-@XMM[1]}, [$out]!
+       veor            @XMM[9], @XMM[6], @XMM[11]
+       veor            @XMM[10], @XMM[3], @XMM[12]
+       vst1.8          {@XMM[8]-@XMM[9]}, [$out]!
+       vst1.8          {@XMM[10]}, [$out]!
+
+       vld1.64         {@XMM[8]}, [r0,:128]            @ next round tweak
+       b               .Lxts_enc_done
+.align 4
+.Lxts_enc_4:
+       vst1.64         {@XMM[12]}, [r0,:128]           @ next round tweak
+
+       veor            @XMM[2], @XMM[2], @XMM[10]
+#ifndef        BSAES_ASM_EXTENDED_KEY
+       add             r4, sp, #0x90                   @ pass key schedule
+#else
+       add             r4, $key, #248                  @ pass key schedule
+#endif
+       veor            @XMM[3], @XMM[3], @XMM[11]
+       mov             r5, $rounds                     @ pass rounds
+       mov             r0, sp
+
+       bl              _bsaes_encrypt8
+
+       vld1.64         {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+       vld1.64         {@XMM[10]-@XMM[11]}, [r0,:128]!
+       veor            @XMM[0], @XMM[0], @XMM[ 8]
+       veor            @XMM[1], @XMM[1], @XMM[ 9]
+       veor            @XMM[8], @XMM[4], @XMM[10]
+       vst1.8          {@XMM[0]-@XMM[1]}, [$out]!
+       veor            @XMM[9], @XMM[6], @XMM[11]
+       vst1.8          {@XMM[8]-@XMM[9]}, [$out]!
+
+       vld1.64         {@XMM[8]}, [r0,:128]            @ next round tweak
+       b               .Lxts_enc_done
+.align 4
+.Lxts_enc_3:
+       vst1.64         {@XMM[11]}, [r0,:128]           @ next round tweak
+
+       veor            @XMM[1], @XMM[1], @XMM[9]
+#ifndef        BSAES_ASM_EXTENDED_KEY
+       add             r4, sp, #0x90                   @ pass key schedule
+#else
+       add             r4, $key, #248                  @ pass key schedule
+#endif
+       veor            @XMM[2], @XMM[2], @XMM[10]
+       mov             r5, $rounds                     @ pass rounds
+       mov             r0, sp
+
+       bl              _bsaes_encrypt8
+
+       vld1.64         {@XMM[8]-@XMM[9]}, [r0,:128]!
+       vld1.64         {@XMM[10]}, [r0,:128]!
+       veor            @XMM[0], @XMM[0], @XMM[ 8]
+       veor            @XMM[1], @XMM[1], @XMM[ 9]
+       veor            @XMM[8], @XMM[4], @XMM[10]
+       vst1.8          {@XMM[0]-@XMM[1]}, [$out]!
+       vst1.8          {@XMM[8]}, [$out]!
+
+       vld1.64         {@XMM[8]}, [r0,:128]            @ next round tweak
+       b               .Lxts_enc_done
+.align 4
+.Lxts_enc_2:
+       vst1.64         {@XMM[10]}, [r0,:128]           @ next round tweak
+
+       veor            @XMM[0], @XMM[0], @XMM[8]
+#ifndef        BSAES_ASM_EXTENDED_KEY
+       add             r4, sp, #0x90                   @ pass key schedule
+#else
+       add             r4, $key, #248                  @ pass key schedule
+#endif
+       veor            @XMM[1], @XMM[1], @XMM[9]
+       mov             r5, $rounds                     @ pass rounds
+       mov             r0, sp
+
+       bl              _bsaes_encrypt8
+
+       vld1.64         {@XMM[8]-@XMM[9]}, [r0,:128]!
+       veor            @XMM[0], @XMM[0], @XMM[ 8]
+       veor            @XMM[1], @XMM[1], @XMM[ 9]
+       vst1.8          {@XMM[0]-@XMM[1]}, [$out]!
+
+       vld1.64         {@XMM[8]}, [r0,:128]            @ next round tweak
+       b               .Lxts_enc_done
+.align 4
+.Lxts_enc_1:
+       mov             r0, sp
+       veor            @XMM[0], @XMM[8]
+       mov             r1, sp
+       vst1.8          {@XMM[0]}, [sp,:128]
+       mov             r2, $key
+       mov             r4, $fp                         @ preserve fp
+
+       bl              AES_encrypt
+
+       vld1.8          {@XMM[0]}, [sp,:128]
+       veor            @XMM[0], @XMM[0], @XMM[8]
+       vst1.8          {@XMM[0]}, [$out]!
+       mov             $fp, r4
+
+       vmov            @XMM[8], @XMM[9]                @ next round tweak
+
+.Lxts_enc_done:
+#ifndef        XTS_CHAIN_TWEAK
+       adds            $len, #0x10
+       beq             .Lxts_enc_ret
+       sub             r6, $out, #0x10
+
+.Lxts_enc_steal:
+       ldrb            r0, [$inp], #1
+       ldrb            r1, [$out, #-0x10]
+       strb            r0, [$out, #-0x10]
+       strb            r1, [$out], #1
+
+       subs            $len, #1
+       bhi             .Lxts_enc_steal
+
+       vld1.8          {@XMM[0]}, [r6]
+       mov             r0, sp
+       veor            @XMM[0], @XMM[0], @XMM[8]
+       mov             r1, sp
+       vst1.8          {@XMM[0]}, [sp,:128]
+       mov             r2, $key
+       mov             r4, $fp                 @ preserve fp
+
+       bl              AES_encrypt
+
+       vld1.8          {@XMM[0]}, [sp,:128]
+       veor            @XMM[0], @XMM[0], @XMM[8]
+       vst1.8          {@XMM[0]}, [r6]
+       mov             $fp, r4
+#endif
+
+.Lxts_enc_ret:
+       bic             r0, $fp, #0xf
+       vmov.i32        q0, #0
+       vmov.i32        q1, #0
+#ifdef XTS_CHAIN_TWEAK
+       ldr             r1, [$fp, #0x20+VFP_ABI_FRAME]  @ chain tweak
+#endif
+.Lxts_enc_bzero:                               @ wipe key schedule [if any]
+       vstmia          sp!, {q0-q1}
+       cmp             sp, r0
+       bne             .Lxts_enc_bzero
+
+       mov             sp, $fp
+#ifdef XTS_CHAIN_TWEAK
+       vst1.8          {@XMM[8]}, [r1]
+#endif
+       VFP_ABI_POP
+       ldmia           sp!, {r4-r10, pc}       @ return
+
+.size  bsaes_xts_encrypt,.-bsaes_xts_encrypt
+
+.globl bsaes_xts_decrypt
+.type  bsaes_xts_decrypt,%function
+.align 4
+bsaes_xts_decrypt:
+       mov     ip, sp
+       stmdb   sp!, {r4-r10, lr}               @ 0x20
+       VFP_ABI_PUSH
+       mov     r6, sp                          @ future $fp
+
+       mov     $inp, r0
+       mov     $out, r1
+       mov     $len, r2
+       mov     $key, r3
+
+       sub     r0, sp, #0x10                   @ 0x10
+       bic     r0, #0xf                        @ align at 16 bytes
+       mov     sp, r0
+
+#ifdef XTS_CHAIN_TWEAK
+       ldr     r0, [ip]                        @ pointer to input tweak
+#else
+       @ generate initial tweak
+       ldr     r0, [ip, #4]                    @ iv[]
+       mov     r1, sp
+       ldr     r2, [ip, #0]                    @ key2
+       bl      AES_encrypt
+       mov     r0, sp                          @ pointer to initial tweak
+#endif
+
+       ldr     $rounds, [$key, #240]           @ get # of rounds
+       mov     $fp, r6
+#ifndef        BSAES_ASM_EXTENDED_KEY
+       @ allocate the key schedule on the stack
+       sub     r12, sp, $rounds, lsl#7         @ 128 bytes per inner round key
+       @ add   r12, #`128-32`                  @ size of bit-sliced key schedule
+       sub     r12, #`32+16`                   @ place for tweak[9]
+
+       @ populate the key schedule
+       mov     r4, $key                        @ pass key
+       mov     r5, $rounds                     @ pass # of rounds
+       mov     sp, r12
+       add     r12, #0x90                      @ pass key schedule
+       bl      _bsaes_key_convert
+       add     r4, sp, #0x90
+       vldmia  r4, {@XMM[6]}
+       vstmia  r12,  {@XMM[15]}                @ save last round key
+       veor    @XMM[7], @XMM[7], @XMM[6]       @ fix up round 0 key
+       vstmia  r4, {@XMM[7]}
+#else
+       ldr     r12, [$key, #244]
+       eors    r12, #1
+       beq     0f
+
+       str     r12, [$key, #244]
+       mov     r4, $key                        @ pass key
+       mov     r5, $rounds                     @ pass # of rounds
+       add     r12, $key, #248                 @ pass key schedule
+       bl      _bsaes_key_convert
+       add     r4, $key, #248
+       vldmia  r4, {@XMM[6]}
+       vstmia  r12,  {@XMM[15]}                @ save last round key
+       veor    @XMM[7], @XMM[7], @XMM[6]       @ fix up round 0 key
+       vstmia  r4, {@XMM[7]}
+
+.align 2
+0:     sub     sp, #0x90                       @ place for tweak[9]
+#endif
+       vld1.8  {@XMM[8]}, [r0]                 @ initial tweak
+       adr     $magic, .Lxts_magic
+
+       tst     $len, #0xf                      @ if not multiple of 16
+       it      ne                              @ Thumb2 thing, sanity check in ARM
+       subne   $len, #0x10                     @ subtract another 16 bytes
+       subs    $len, #0x80
+
+       blo     .Lxts_dec_short
+       b       .Lxts_dec_loop
+
+.align 4
+.Lxts_dec_loop:
+       vldmia          $magic, {$twmask}       @ load XTS magic
+       vshr.s64        @T[0], @XMM[8], #63
+       mov             r0, sp
+       vand            @T[0], @T[0], $twmask
+___
+for($i=9;$i<16;$i++) {
+$code.=<<___;
+       vadd.u64        @XMM[$i], @XMM[$i-1], @XMM[$i-1]
+       vst1.64         {@XMM[$i-1]}, [r0,:128]!
+       vswp            `&Dhi("@T[0]")`,`&Dlo("@T[0]")`
+       vshr.s64        @T[1], @XMM[$i], #63
+       veor            @XMM[$i], @XMM[$i], @T[0]
+       vand            @T[1], @T[1], $twmask
+___
+       @T=reverse(@T);
+
+$code.=<<___ if ($i>=10);
+       vld1.8          {@XMM[$i-10]}, [$inp]!
+___
+$code.=<<___ if ($i>=11);
+       veor            @XMM[$i-11], @XMM[$i-11], @XMM[$i-3]
+___
+}
+$code.=<<___;
+       vadd.u64        @XMM[8], @XMM[15], @XMM[15]
+       vst1.64         {@XMM[15]}, [r0,:128]!
+       vswp            `&Dhi("@T[0]")`,`&Dlo("@T[0]")`
+       veor            @XMM[8], @XMM[8], @T[0]
+       vst1.64         {@XMM[8]}, [r0,:128]            @ next round tweak
+
+       vld1.8          {@XMM[6]-@XMM[7]}, [$inp]!
+       veor            @XMM[5], @XMM[5], @XMM[13]
+#ifndef        BSAES_ASM_EXTENDED_KEY
+       add             r4, sp, #0x90                   @ pass key schedule
+#else
+       add             r4, $key, #248                  @ pass key schedule
+#endif
+       veor            @XMM[6], @XMM[6], @XMM[14]
+       mov             r5, $rounds                     @ pass rounds
+       veor            @XMM[7], @XMM[7], @XMM[15]
+       mov             r0, sp
+
+       bl              _bsaes_decrypt8
+
+       vld1.64         {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+       vld1.64         {@XMM[10]-@XMM[11]}, [r0,:128]!
+       veor            @XMM[0], @XMM[0], @XMM[ 8]
+       vld1.64         {@XMM[12]-@XMM[13]}, [r0,:128]!
+       veor            @XMM[1], @XMM[1], @XMM[ 9]
+       veor            @XMM[8], @XMM[6], @XMM[10]
+       vst1.8          {@XMM[0]-@XMM[1]}, [$out]!
+       veor            @XMM[9], @XMM[4], @XMM[11]
+       vld1.64         {@XMM[14]-@XMM[15]}, [r0,:128]!
+       veor            @XMM[10], @XMM[2], @XMM[12]
+       vst1.8          {@XMM[8]-@XMM[9]}, [$out]!
+       veor            @XMM[11], @XMM[7], @XMM[13]
+       veor            @XMM[12], @XMM[3], @XMM[14]
+       vst1.8          {@XMM[10]-@XMM[11]}, [$out]!
+       veor            @XMM[13], @XMM[5], @XMM[15]
+       vst1.8          {@XMM[12]-@XMM[13]}, [$out]!
+
+       vld1.64         {@XMM[8]}, [r0,:128]            @ next round tweak
+
+       subs            $len, #0x80
+       bpl             .Lxts_dec_loop
+
+.Lxts_dec_short:
+       adds            $len, #0x70
+       bmi             .Lxts_dec_done
+
+       vldmia          $magic, {$twmask}       @ load XTS magic
+       vshr.s64        @T[0], @XMM[8], #63
+       mov             r0, sp
+       vand            @T[0], @T[0], $twmask
+___
+for($i=9;$i<16;$i++) {
+$code.=<<___;
+       vadd.u64        @XMM[$i], @XMM[$i-1], @XMM[$i-1]
+       vst1.64         {@XMM[$i-1]}, [r0,:128]!
+       vswp            `&Dhi("@T[0]")`,`&Dlo("@T[0]")`
+       vshr.s64        @T[1], @XMM[$i], #63
+       veor            @XMM[$i], @XMM[$i], @T[0]
+       vand            @T[1], @T[1], $twmask
+___
+       @T=reverse(@T);
+
+$code.=<<___ if ($i>=10);
+       vld1.8          {@XMM[$i-10]}, [$inp]!
+       subs            $len, #0x10
+       bmi             .Lxts_dec_`$i-9`
+___
+$code.=<<___ if ($i>=11);
+       veor            @XMM[$i-11], @XMM[$i-11], @XMM[$i-3]
+___
+}
+$code.=<<___;
+       sub             $len, #0x10
+       vst1.64         {@XMM[15]}, [r0,:128]           @ next round tweak
+
+       vld1.8          {@XMM[6]}, [$inp]!
+       veor            @XMM[5], @XMM[5], @XMM[13]
+#ifndef        BSAES_ASM_EXTENDED_KEY
+       add             r4, sp, #0x90                   @ pass key schedule
+#else
+       add             r4, $key, #248                  @ pass key schedule
+#endif
+       veor            @XMM[6], @XMM[6], @XMM[14]
+       mov             r5, $rounds                     @ pass rounds
+       mov             r0, sp
+
+       bl              _bsaes_decrypt8
+
+       vld1.64         {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+       vld1.64         {@XMM[10]-@XMM[11]}, [r0,:128]!
+       veor            @XMM[0], @XMM[0], @XMM[ 8]
+       vld1.64         {@XMM[12]-@XMM[13]}, [r0,:128]!
+       veor            @XMM[1], @XMM[1], @XMM[ 9]
+       veor            @XMM[8], @XMM[6], @XMM[10]
+       vst1.8          {@XMM[0]-@XMM[1]}, [$out]!
+       veor            @XMM[9], @XMM[4], @XMM[11]
+       vld1.64         {@XMM[14]}, [r0,:128]!
+       veor            @XMM[10], @XMM[2], @XMM[12]
+       vst1.8          {@XMM[8]-@XMM[9]}, [$out]!
+       veor            @XMM[11], @XMM[7], @XMM[13]
+       veor            @XMM[12], @XMM[3], @XMM[14]
+       vst1.8          {@XMM[10]-@XMM[11]}, [$out]!
+       vst1.8          {@XMM[12]}, [$out]!
+
+       vld1.64         {@XMM[8]}, [r0,:128]            @ next round tweak
+       b               .Lxts_dec_done
+.align 4
+.Lxts_dec_6:
+       vst1.64         {@XMM[14]}, [r0,:128]           @ next round tweak
+
+       veor            @XMM[4], @XMM[4], @XMM[12]
+#ifndef        BSAES_ASM_EXTENDED_KEY
+       add             r4, sp, #0x90                   @ pass key schedule
+#else
+       add             r4, $key, #248                  @ pass key schedule
+#endif
+       veor            @XMM[5], @XMM[5], @XMM[13]
+       mov             r5, $rounds                     @ pass rounds
+       mov             r0, sp
+
+       bl              _bsaes_decrypt8
+
+       vld1.64         {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+       vld1.64         {@XMM[10]-@XMM[11]}, [r0,:128]!
+       veor            @XMM[0], @XMM[0], @XMM[ 8]
+       vld1.64         {@XMM[12]-@XMM[13]}, [r0,:128]!
+       veor            @XMM[1], @XMM[1], @XMM[ 9]
+       veor            @XMM[8], @XMM[6], @XMM[10]
+       vst1.8          {@XMM[0]-@XMM[1]}, [$out]!
+       veor            @XMM[9], @XMM[4], @XMM[11]
+       veor            @XMM[10], @XMM[2], @XMM[12]
+       vst1.8          {@XMM[8]-@XMM[9]}, [$out]!
+       veor            @XMM[11], @XMM[7], @XMM[13]
+       vst1.8          {@XMM[10]-@XMM[11]}, [$out]!
+
+       vld1.64         {@XMM[8]}, [r0,:128]            @ next round tweak
+       b               .Lxts_dec_done
+.align 4
+.Lxts_dec_5:
+       vst1.64         {@XMM[13]}, [r0,:128]           @ next round tweak
+
+       veor            @XMM[3], @XMM[3], @XMM[11]
+#ifndef        BSAES_ASM_EXTENDED_KEY
+       add             r4, sp, #0x90                   @ pass key schedule
+#else
+       add             r4, $key, #248                  @ pass key schedule
+#endif
+       veor            @XMM[4], @XMM[4], @XMM[12]
+       mov             r5, $rounds                     @ pass rounds
+       mov             r0, sp
+
+       bl              _bsaes_decrypt8
+
+       vld1.64         {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+       vld1.64         {@XMM[10]-@XMM[11]}, [r0,:128]!
+       veor            @XMM[0], @XMM[0], @XMM[ 8]
+       vld1.64         {@XMM[12]}, [r0,:128]!
+       veor            @XMM[1], @XMM[1], @XMM[ 9]
+       veor            @XMM[8], @XMM[6], @XMM[10]
+       vst1.8          {@XMM[0]-@XMM[1]}, [$out]!
+       veor            @XMM[9], @XMM[4], @XMM[11]
+       veor            @XMM[10], @XMM[2], @XMM[12]
+       vst1.8          {@XMM[8]-@XMM[9]}, [$out]!
+       vst1.8          {@XMM[10]}, [$out]!
+
+       vld1.64         {@XMM[8]}, [r0,:128]            @ next round tweak
+       b               .Lxts_dec_done
+.align 4
+.Lxts_dec_4:
+       vst1.64         {@XMM[12]}, [r0,:128]           @ next round tweak
+
+       veor            @XMM[2], @XMM[2], @XMM[10]
+#ifndef        BSAES_ASM_EXTENDED_KEY
+       add             r4, sp, #0x90                   @ pass key schedule
+#else
+       add             r4, $key, #248                  @ pass key schedule
+#endif
+       veor            @XMM[3], @XMM[3], @XMM[11]
+       mov             r5, $rounds                     @ pass rounds
+       mov             r0, sp
+
+       bl              _bsaes_decrypt8
+
+       vld1.64         {@XMM[ 8]-@XMM[ 9]}, [r0,:128]!
+       vld1.64         {@XMM[10]-@XMM[11]}, [r0,:128]!
+       veor            @XMM[0], @XMM[0], @XMM[ 8]
+       veor            @XMM[1], @XMM[1], @XMM[ 9]
+       veor            @XMM[8], @XMM[6], @XMM[10]
+       vst1.8          {@XMM[0]-@XMM[1]}, [$out]!
+       veor            @XMM[9], @XMM[4], @XMM[11]
+       vst1.8          {@XMM[8]-@XMM[9]}, [$out]!
+
+       vld1.64         {@XMM[8]}, [r0,:128]            @ next round tweak
+       b               .Lxts_dec_done
+.align 4
+.Lxts_dec_3:
+       vst1.64         {@XMM[11]}, [r0,:128]           @ next round tweak
+
+       veor            @XMM[1], @XMM[1], @XMM[9]
+#ifndef        BSAES_ASM_EXTENDED_KEY
+       add             r4, sp, #0x90                   @ pass key schedule
+#else
+       add             r4, $key, #248                  @ pass key schedule
+#endif
+       veor            @XMM[2], @XMM[2], @XMM[10]
+       mov             r5, $rounds                     @ pass rounds
+       mov             r0, sp
+
+       bl              _bsaes_decrypt8
+
+       vld1.64         {@XMM[8]-@XMM[9]}, [r0,:128]!
+       vld1.64         {@XMM[10]}, [r0,:128]!
+       veor            @XMM[0], @XMM[0], @XMM[ 8]
+       veor            @XMM[1], @XMM[1], @XMM[ 9]
+       veor            @XMM[8], @XMM[6], @XMM[10]
+       vst1.8          {@XMM[0]-@XMM[1]}, [$out]!
+       vst1.8          {@XMM[8]}, [$out]!
+
+       vld1.64         {@XMM[8]}, [r0,:128]            @ next round tweak
+       b               .Lxts_dec_done
+.align 4
+.Lxts_dec_2:
+       vst1.64         {@XMM[10]}, [r0,:128]           @ next round tweak
+
+       veor            @XMM[0], @XMM[0], @XMM[8]
+#ifndef        BSAES_ASM_EXTENDED_KEY
+       add             r4, sp, #0x90                   @ pass key schedule
+#else
+       add             r4, $key, #248                  @ pass key schedule
+#endif
+       veor            @XMM[1], @XMM[1], @XMM[9]
+       mov             r5, $rounds                     @ pass rounds
+       mov             r0, sp
+
+       bl              _bsaes_decrypt8
+
+       vld1.64         {@XMM[8]-@XMM[9]}, [r0,:128]!
+       veor            @XMM[0], @XMM[0], @XMM[ 8]
+       veor            @XMM[1], @XMM[1], @XMM[ 9]
+       vst1.8          {@XMM[0]-@XMM[1]}, [$out]!
+
+       vld1.64         {@XMM[8]}, [r0,:128]            @ next round tweak
+       b               .Lxts_dec_done
+.align 4
+.Lxts_dec_1:
+       mov             r0, sp
+       veor            @XMM[0], @XMM[8]
+       mov             r1, sp
+       vst1.8          {@XMM[0]}, [sp,:128]
+       mov             r2, $key
+       mov             r4, $fp                         @ preserve fp
+       mov             r5, $magic                      @ preserve magic
+
+       bl              AES_decrypt
+
+       vld1.8          {@XMM[0]}, [sp,:128]
+       veor            @XMM[0], @XMM[0], @XMM[8]
+       vst1.8          {@XMM[0]}, [$out]!
+       mov             $fp, r4
+       mov             $magic, r5
+
+       vmov            @XMM[8], @XMM[9]                @ next round tweak
+
+.Lxts_dec_done:
+#ifndef        XTS_CHAIN_TWEAK
+       adds            $len, #0x10
+       beq             .Lxts_dec_ret
+
+       @ calculate one round of extra tweak for the stolen ciphertext
+       vldmia          $magic, {$twmask}
+       vshr.s64        @XMM[6], @XMM[8], #63
+       vand            @XMM[6], @XMM[6], $twmask
+       vadd.u64        @XMM[9], @XMM[8], @XMM[8]
+       vswp            `&Dhi("@XMM[6]")`,`&Dlo("@XMM[6]")`
+       veor            @XMM[9], @XMM[9], @XMM[6]
+
+       @ perform the final decryption with the last tweak value
+       vld1.8          {@XMM[0]}, [$inp]!
+       mov             r0, sp
+       veor            @XMM[0], @XMM[0], @XMM[9]
+       mov             r1, sp
+       vst1.8          {@XMM[0]}, [sp,:128]
+       mov             r2, $key
+       mov             r4, $fp                 @ preserve fp
+
+       bl              AES_decrypt
+
+       vld1.8          {@XMM[0]}, [sp,:128]
+       veor            @XMM[0], @XMM[0], @XMM[9]
+       vst1.8          {@XMM[0]}, [$out]
+
+       mov             r6, $out
+.Lxts_dec_steal:
+       ldrb            r1, [$out]
+       ldrb            r0, [$inp], #1
+       strb            r1, [$out, #0x10]
+       strb            r0, [$out], #1
+
+       subs            $len, #1
+       bhi             .Lxts_dec_steal
+
+       vld1.8          {@XMM[0]}, [r6]
+       mov             r0, sp
+       veor            @XMM[0], @XMM[8]
+       mov             r1, sp
+       vst1.8          {@XMM[0]}, [sp,:128]
+       mov             r2, $key
+
+       bl              AES_decrypt
+
+       vld1.8          {@XMM[0]}, [sp,:128]
+       veor            @XMM[0], @XMM[0], @XMM[8]
+       vst1.8          {@XMM[0]}, [r6]
+       mov             $fp, r4
+#endif
+
+.Lxts_dec_ret:
+       bic             r0, $fp, #0xf
+       vmov.i32        q0, #0
+       vmov.i32        q1, #0
+#ifdef XTS_CHAIN_TWEAK
+       ldr             r1, [$fp, #0x20+VFP_ABI_FRAME]  @ chain tweak
+#endif
+.Lxts_dec_bzero:                               @ wipe key schedule [if any]
+       vstmia          sp!, {q0-q1}
+       cmp             sp, r0
+       bne             .Lxts_dec_bzero
+
+       mov             sp, $fp
+#ifdef XTS_CHAIN_TWEAK
+       vst1.8          {@XMM[8]}, [r1]
+#endif
+       VFP_ABI_POP
+       ldmia           sp!, {r4-r10, pc}       @ return
+
+.size  bsaes_xts_decrypt,.-bsaes_xts_decrypt
+___
+}
+$code.=<<___;
+#endif
+___
+
+$code =~ s/\`([^\`]*)\`/eval($1)/gem;
+
+open SELF,$0;
+while(<SELF>) {
+       next if (/^#!/);
+        last if (!s/^#/@/ and !/^$/);
+        print;
+}
+close SELF;
+
+print $code;
+
+close STDOUT;
index 41b90f0..3f7d33c 100644 (file)
@@ -38,8 +38,9 @@
 #              Emilia's        this(*)         difference
 #
 # Core 2       9.30            8.69            +7%
-# Nehalem(**)  7.63            6.98            +9%
-# Atom         17.1            17.4            -2%(***)
+# Nehalem(**)  7.63            6.88            +11%
+# Atom         17.1            16.4            +4%
+# Silvermont   -               12.9
 #
 # (*)  Comparison is not completely fair, because "this" is ECB,
 #      i.e. no extra processing such as counter values calculation
 # (**) Results were collected on Westmere, which is considered to
 #      be equivalent to Nehalem for this code.
 #
-# (***)        Slowdown on Atom is rather strange per se, because original
-#      implementation has a number of 9+-bytes instructions, which
-#      are bad for Atom front-end, and which I eliminated completely.
-#      In attempt to address deterioration sbox() was tested in FP
-#      SIMD "domain" (movaps instead of movdqa, xorps instead of
-#      pxor, etc.). While it resulted in nominal 4% improvement on
-#      Atom, it hurted Westmere by more than 2x factor.
-#
 # As for key schedule conversion subroutine. Interface to OpenSSL
 # relies on per-invocation on-the-fly conversion. This naturally
 # has impact on performance, especially for short inputs. Conversion
@@ -67,7 +60,7 @@
 #              conversion      conversion/8x block
 # Core 2       240             0.22
 # Nehalem      180             0.20
-# Atom         430             0.19
+# Atom         430             0.20
 #
 # The ratio values mean that 128-byte blocks will be processed
 # 16-18% slower, 256-byte blocks - 9-10%, 384-byte blocks - 6-7%,
 # Add decryption procedure. Performance in CPU cycles spent to decrypt
 # one byte out of 4096-byte buffer with 128-bit key is:
 #
-# Core 2       9.83
-# Nehalem      7.74
-# Atom         19.0
+# Core 2       9.98
+# Nehalem      7.80
+# Atom         17.9
+# Silvermont   14.0
 #
 # November 2011.
 #
@@ -434,21 +428,21 @@ my $mask=pop;
 $code.=<<___;
        pxor    0x00($key),@x[0]
        pxor    0x10($key),@x[1]
-       pshufb  $mask,@x[0]
        pxor    0x20($key),@x[2]
-       pshufb  $mask,@x[1]
        pxor    0x30($key),@x[3]
-       pshufb  $mask,@x[2]
+       pshufb  $mask,@x[0]
+       pshufb  $mask,@x[1]
        pxor    0x40($key),@x[4]
-       pshufb  $mask,@x[3]
        pxor    0x50($key),@x[5]
-       pshufb  $mask,@x[4]
+       pshufb  $mask,@x[2]
+       pshufb  $mask,@x[3]
        pxor    0x60($key),@x[6]
-       pshufb  $mask,@x[5]
        pxor    0x70($key),@x[7]
+       pshufb  $mask,@x[4]
+       pshufb  $mask,@x[5]
        pshufb  $mask,@x[6]
-       lea     0x80($key),$key
        pshufb  $mask,@x[7]
+       lea     0x80($key),$key
 ___
 }
 
@@ -820,18 +814,18 @@ _bsaes_encrypt8:
        movdqa  0x50($const), @XMM[8]   # .LM0SR
        pxor    @XMM[9], @XMM[0]        # xor with round0 key
        pxor    @XMM[9], @XMM[1]
-        pshufb @XMM[8], @XMM[0]
        pxor    @XMM[9], @XMM[2]
-        pshufb @XMM[8], @XMM[1]
        pxor    @XMM[9], @XMM[3]
-        pshufb @XMM[8], @XMM[2]
+        pshufb @XMM[8], @XMM[0]
+        pshufb @XMM[8], @XMM[1]
        pxor    @XMM[9], @XMM[4]
-        pshufb @XMM[8], @XMM[3]
        pxor    @XMM[9], @XMM[5]
-        pshufb @XMM[8], @XMM[4]
+        pshufb @XMM[8], @XMM[2]
+        pshufb @XMM[8], @XMM[3]
        pxor    @XMM[9], @XMM[6]
-        pshufb @XMM[8], @XMM[5]
        pxor    @XMM[9], @XMM[7]
+        pshufb @XMM[8], @XMM[4]
+        pshufb @XMM[8], @XMM[5]
         pshufb @XMM[8], @XMM[6]
         pshufb @XMM[8], @XMM[7]
 _bsaes_encrypt8_bitslice:
@@ -884,18 +878,18 @@ _bsaes_decrypt8:
        movdqa  -0x30($const), @XMM[8]  # .LM0ISR
        pxor    @XMM[9], @XMM[0]        # xor with round0 key
        pxor    @XMM[9], @XMM[1]
-        pshufb @XMM[8], @XMM[0]
        pxor    @XMM[9], @XMM[2]
-        pshufb @XMM[8], @XMM[1]
        pxor    @XMM[9], @XMM[3]
-        pshufb @XMM[8], @XMM[2]
+        pshufb @XMM[8], @XMM[0]
+        pshufb @XMM[8], @XMM[1]
        pxor    @XMM[9], @XMM[4]
-        pshufb @XMM[8], @XMM[3]
        pxor    @XMM[9], @XMM[5]
-        pshufb @XMM[8], @XMM[4]
+        pshufb @XMM[8], @XMM[2]
+        pshufb @XMM[8], @XMM[3]
        pxor    @XMM[9], @XMM[6]
-        pshufb @XMM[8], @XMM[5]
        pxor    @XMM[9], @XMM[7]
+        pshufb @XMM[8], @XMM[4]
+        pshufb @XMM[8], @XMM[5]
         pshufb @XMM[8], @XMM[6]
         pshufb @XMM[8], @XMM[7]
 ___
@@ -1937,21 +1931,21 @@ $code.=<<___;
        movdqa  -0x10(%r11), @XMM[8]    # .LSWPUPM0SR
        pxor    @XMM[9], @XMM[0]        # xor with round0 key
        pxor    @XMM[9], @XMM[1]
-        pshufb @XMM[8], @XMM[0]
        pxor    @XMM[9], @XMM[2]
-        pshufb @XMM[8], @XMM[1]
        pxor    @XMM[9], @XMM[3]
-        pshufb @XMM[8], @XMM[2]
+        pshufb @XMM[8], @XMM[0]
+        pshufb @XMM[8], @XMM[1]
        pxor    @XMM[9], @XMM[4]
-        pshufb @XMM[8], @XMM[3]
        pxor    @XMM[9], @XMM[5]
-        pshufb @XMM[8], @XMM[4]
+        pshufb @XMM[8], @XMM[2]
+        pshufb @XMM[8], @XMM[3]
        pxor    @XMM[9], @XMM[6]
-        pshufb @XMM[8], @XMM[5]
        pxor    @XMM[9], @XMM[7]
+        pshufb @XMM[8], @XMM[4]
+        pshufb @XMM[8], @XMM[5]
         pshufb @XMM[8], @XMM[6]
-       lea     .LBS0(%rip), %r11       # constants table
         pshufb @XMM[8], @XMM[7]
+       lea     .LBS0(%rip), %r11       # constants table
        mov     %ebx,%r10d              # pass rounds
 
        call    _bsaes_encrypt8_bitslice
diff --git a/crypto/aes/asm/vpaes-ppc.pl b/crypto/aes/asm/vpaes-ppc.pl
new file mode 100644 (file)
index 0000000..7fda60e
--- /dev/null
@@ -0,0 +1,1512 @@
+#!/usr/bin/env perl
+
+######################################################################
+## Constant-time SSSE3 AES core implementation.
+## version 0.1
+##
+## By Mike Hamburg (Stanford University), 2009
+## Public domain.
+##
+## For details see http://shiftleft.org/papers/vector_aes/ and
+## http://crypto.stanford.edu/vpaes/.
+
+# CBC encrypt/decrypt performance in cycles per byte processed with
+# 128-bit key.
+#
+#              aes-ppc.pl              this
+# G4e          35.5/52.1/(23.8)        11.9(*)/15.4
+# POWER6       42.7/54.3/(28.2)        63.0/92.8(**)
+# POWER7       32.3/42.9/(18.4)        18.5/23.3
+#
+# (*)  This is ~10% worse than reported in paper. The reason is
+#      twofold. This module doesn't make any assumption about
+#      key schedule (or data for that matter) alignment and handles
+#      it in-line. Secondly it, being transliterated from
+#      vpaes-x86_64.pl, relies on "nested inversion" better suited
+#      for Intel CPUs.
+# (**) Inadequate POWER6 performance is due to astronomic AltiVec
+#      latency, 9 cycles per simple logical operation.
+
+$flavour = shift;
+
+if ($flavour =~ /64/) {
+       $SIZE_T =8;
+       $LRSAVE =2*$SIZE_T;
+       $STU    ="stdu";
+       $POP    ="ld";
+       $PUSH   ="std";
+       $UCMP   ="cmpld";
+} elsif ($flavour =~ /32/) {
+       $SIZE_T =4;
+       $LRSAVE =$SIZE_T;
+       $STU    ="stwu";
+       $POP    ="lwz";
+       $PUSH   ="stw";
+       $UCMP   ="cmplw";
+} else { die "nonsense $flavour"; }
+
+$sp="r1";
+$FRAME=6*$SIZE_T+13*16;        # 13*16 is for v20-v31 offload
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
+die "can't locate ppc-xlate.pl";
+
+open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
+
+$code.=<<___;
+.machine       "any"
+
+.text
+
+.align 7       # totally strategic alignment
+_vpaes_consts:
+Lk_mc_forward: # mc_forward
+       .long   0x01020300, 0x05060704, 0x090a0b08, 0x0d0e0f0c  ?inv
+       .long   0x05060704, 0x090a0b08, 0x0d0e0f0c, 0x01020300  ?inv
+       .long   0x090a0b08, 0x0d0e0f0c, 0x01020300, 0x05060704  ?inv
+       .long   0x0d0e0f0c, 0x01020300, 0x05060704, 0x090a0b08  ?inv
+Lk_mc_backward:        # mc_backward
+       .long   0x03000102, 0x07040506, 0x0b08090a, 0x0f0c0d0e  ?inv
+       .long   0x0f0c0d0e, 0x03000102, 0x07040506, 0x0b08090a  ?inv
+       .long   0x0b08090a, 0x0f0c0d0e, 0x03000102, 0x07040506  ?inv
+       .long   0x07040506, 0x0b08090a, 0x0f0c0d0e, 0x03000102  ?inv
+Lk_sr:         # sr
+       .long   0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f  ?inv
+       .long   0x00050a0f, 0x04090e03, 0x080d0207, 0x0c01060b  ?inv
+       .long   0x0009020b, 0x040d060f, 0x08010a03, 0x0c050e07  ?inv
+       .long   0x000d0a07, 0x04010e0b, 0x0805020f, 0x0c090603  ?inv
+
+##
+## "Hot" constants
+##
+Lk_inv:                # inv, inva
+       .long   0xf001080d, 0x0f06050e, 0x020c0b0a, 0x09030704  ?rev
+       .long   0xf0070b0f, 0x060a0401, 0x09080502, 0x0c0e0d03  ?rev
+Lk_ipt:                # input transform (lo, hi)
+       .long   0x00702a5a, 0x98e8b2c2, 0x08782252, 0x90e0baca  ?rev
+       .long   0x004d7c31, 0x7d30014c, 0x81ccfdb0, 0xfcb180cd  ?rev
+Lk_sbo:                # sbou, sbot
+       .long   0x00c7bd6f, 0x176dd2d0, 0x78a802c5, 0x7abfaa15  ?rev
+       .long   0x006abb5f, 0xa574e4cf, 0xfa352b41, 0xd1901e8e  ?rev
+Lk_sb1:                # sb1u, sb1t
+       .long   0x0023e2fa, 0x15d41836, 0xefd92e0d, 0xc1ccf73b  ?rev
+       .long   0x003e50cb, 0x8fe19bb1, 0x44f52a14, 0x6e7adfa5  ?rev
+Lk_sb2:                # sb2u, sb2t
+       .long   0x0029e10a, 0x4088eb69, 0x4a2382ab, 0xc863a1c2  ?rev
+       .long   0x0024710b, 0xc6937ae2, 0xcd2f98bc, 0x55e9b75e  ?rev
+
+##
+##  Decryption stuff
+##
+Lk_dipt:       # decryption input transform
+       .long   0x005f540b, 0x045b500f, 0x1a454e11, 0x1e414a15  ?rev
+       .long   0x00650560, 0xe683e386, 0x94f191f4, 0x72177712  ?rev
+Lk_dsbo:       # decryption sbox final output
+       .long   0x0040f97e, 0x53ea8713, 0x2d3e94d4, 0xb96daac7  ?rev
+       .long   0x001d4493, 0x0f56d712, 0x9c8ec5d8, 0x59814bca  ?rev
+Lk_dsb9:       # decryption sbox output *9*u, *9*t
+       .long   0x00d6869a, 0x53031c85, 0xc94c994f, 0x501fd5ca  ?rev
+       .long   0x0049d7ec, 0x89173bc0, 0x65a5fbb2, 0x9e2c5e72  ?rev
+Lk_dsbd:       # decryption sbox output *D*u, *D*t
+       .long   0x00a2b1e6, 0xdfcc577d, 0x39442a88, 0x139b6ef5  ?rev
+       .long   0x00cbc624, 0xf7fae23c, 0xd3efde15, 0x0d183129  ?rev
+Lk_dsbb:       # decryption sbox output *B*u, *B*t
+       .long   0x0042b496, 0x926422d0, 0x04d4f2b0, 0xf6462660  ?rev
+       .long   0x006759cd, 0xa69894c1, 0x6baa5532, 0x3e0cfff3  ?rev
+Lk_dsbe:       # decryption sbox output *E*u, *E*t
+       .long   0x00d0d426, 0x9692f246, 0xb0f6b464, 0x04604222  ?rev
+       .long   0x00c1aaff, 0xcda6550c, 0x323e5998, 0x6bf36794  ?rev
+
+##
+##  Key schedule constants
+##
+Lk_dksd:       # decryption key schedule: invskew x*D
+       .long   0x0047e4a3, 0x5d1ab9fe, 0xf9be1d5a, 0xa4e34007  ?rev
+       .long   0x008336b5, 0xf477c241, 0x1e9d28ab, 0xea69dc5f  ?rev
+Lk_dksb:       # decryption key schedule: invskew x*B
+       .long   0x00d55085, 0x1fca4f9a, 0x994cc91c, 0x8653d603  ?rev
+       .long   0x004afcb6, 0xa7ed5b11, 0xc882347e, 0x6f2593d9  ?rev
+Lk_dkse:       # decryption key schedule: invskew x*E + 0x63
+       .long   0x00d6c91f, 0xca1c03d5, 0x86504f99, 0x4c9a8553  ?rev
+       .long   0xe87bdc4f, 0x059631a2, 0x8714b320, 0x6af95ecd  ?rev
+Lk_dks9:       # decryption key schedule: invskew x*9
+       .long   0x00a7d97e, 0xc86f11b6, 0xfc5b2582, 0x3493ed4a  ?rev
+       .long   0x00331427, 0x62517645, 0xcefddae9, 0xac9fb88b  ?rev
+
+Lk_rcon:       # rcon
+       .long   0xb6ee9daf, 0xb991831f, 0x817d7c4d, 0x08982a70  ?asis
+Lk_s63:
+       .long   0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b  ?asis
+
+Lk_opt:                # output transform
+       .long   0x0060b6d6, 0x29499fff, 0x0868bede, 0x214197f7  ?rev
+       .long   0x00ecbc50, 0x51bded01, 0xe00c5cb0, 0xb15d0de1  ?rev
+Lk_deskew:     # deskew tables: inverts the sbox's "skew"
+       .long   0x00e3a447, 0x40a3e407, 0x1af9be5d, 0x5ab9fe1d  ?rev
+       .long   0x0069ea83, 0xdcb5365f, 0x771e9df4, 0xabc24128  ?rev
+.align 5
+Lconsts:
+       mflr    r0
+       bcl     20,31,\$+4
+       mflr    r12     #vvvvv "distance between . and _vpaes_consts
+       addi    r12,r12,-0x308
+       mtlr    r0
+       blr
+       .long   0
+       .byte   0,12,0x14,0,0,0,0,0
+.asciz  "Vector Permutation AES for AltiVec, Mike Hamburg (Stanford University)"
+.align 6
+___
+\f
+my ($inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm) = map("v$_",(26..31));
+{
+my ($inp,$out,$key) = map("r$_",(3..5));
+
+my ($invlo,$invhi,$iptlo,$ipthi,$sbou,$sbot) = map("v$_",(10..15));
+my ($sb1u,$sb1t,$sb2u,$sb2t) = map("v$_",(16..19));
+my ($sb9u,$sb9t,$sbdu,$sbdt,$sbbu,$sbbt,$sbeu,$sbet)=map("v$_",(16..23));
+
+$code.=<<___;
+##
+##  _aes_preheat
+##
+##  Fills register %r10 -> .aes_consts (so you can -fPIC)
+##  and %xmm9-%xmm15 as specified below.
+##
+.align 4
+_vpaes_encrypt_preheat:
+       mflr    r8
+       bl      Lconsts
+       mtlr    r8
+       li      r11, 0xc0               # Lk_inv
+       li      r10, 0xd0
+       li      r9,  0xe0               # Lk_ipt
+       li      r8,  0xf0
+       vxor    v7, v7, v7              # 0x00..00
+       vspltisb        v8,4            # 0x04..04
+       vspltisb        v9,0x0f         # 0x0f..0f
+       lvx     $invlo, r12, r11
+       li      r11, 0x100
+       lvx     $invhi, r12, r10
+       li      r10, 0x110
+       lvx     $iptlo, r12, r9
+       li      r9,  0x120
+       lvx     $ipthi, r12, r8
+       li      r8,  0x130
+       lvx     $sbou, r12, r11
+       li      r11, 0x140
+       lvx     $sbot, r12, r10
+       li      r10, 0x150
+       lvx     $sb1u, r12, r9
+       lvx     $sb1t, r12, r8
+       lvx     $sb2u, r12, r11
+       lvx     $sb2t, r12, r10
+       blr
+       .long   0
+       .byte   0,12,0x14,0,0,0,0,0
+
+##
+##  _aes_encrypt_core
+##
+##  AES-encrypt %xmm0.
+##
+##  Inputs:
+##     %xmm0 = input
+##     %xmm9-%xmm15 as in _vpaes_preheat
+##    (%rdx) = scheduled keys
+##
+##  Output in %xmm0
+##  Clobbers  %xmm1-%xmm6, %r9, %r10, %r11, %rax
+##
+##
+.align 5
+_vpaes_encrypt_core:
+       lwz     r8, 240($key)           # pull rounds
+       li      r9, 16
+       lvx     v5, 0, $key             # vmovdqu       (%r9),  %xmm5           # round0 key
+       li      r11, 0x10
+       lvx     v6, r9, $key
+       addi    r9, r9, 16
+       ?vperm  v5, v5, v6, $keyperm    # align round key
+       addi    r10, r11, 0x40
+       vsrb    v1, v0, v8              # vpsrlb        \$4,    %xmm0,  %xmm0
+       vperm   v0, $iptlo, $iptlo, v0  # vpshufb       %xmm1,  %xmm2,  %xmm1
+       vperm   v1, $ipthi, $ipthi, v1  # vpshufb       %xmm0,  %xmm3,  %xmm2
+       vxor    v0, v0, v5              # vpxor %xmm5,  %xmm1,  %xmm0
+       vxor    v0, v0, v1              # vpxor %xmm2,  %xmm0,  %xmm0
+       mtctr   r8
+       b       Lenc_entry
+
+.align 4
+Lenc_loop:
+       # middle of middle round
+       vperm   v4, $sb1t, v7, v2       # vpshufb       %xmm2,  %xmm13, %xmm4   # 4 = sb1u
+       lvx     v1, r12, r11            # vmovdqa       -0x40(%r11,%r10), %xmm1 # .Lk_mc_forward[]
+       addi    r11, r11, 16
+       vperm   v0, $sb1u, v7, v3       # vpshufb       %xmm3,  %xmm12, %xmm0   # 0 = sb1t
+       vxor    v4, v4, v5              # vpxor         %xmm5,  %xmm4,  %xmm4   # 4 = sb1u + k
+       andi.   r11, r11, 0x30          # and           \$0x30, %r11    # ... mod 4
+       vperm   v5, $sb2t, v7, v2       # vpshufb       %xmm2,  %xmm15, %xmm5   # 4 = sb2u
+       vxor    v0, v0, v4              # vpxor         %xmm4,  %xmm0,  %xmm0   # 0 = A
+       vperm   v2, $sb2u, v7, v3       # vpshufb       %xmm3,  %xmm14, %xmm2   # 2 = sb2t
+       lvx     v4, r12, r10            # vmovdqa       (%r11,%r10), %xmm4      # .Lk_mc_backward[]
+       addi    r10, r11, 0x40
+       vperm   v3, v0, v7, v1          # vpshufb       %xmm1,  %xmm0,  %xmm3   # 0 = B
+       vxor    v2, v2, v5              # vpxor         %xmm5,  %xmm2,  %xmm2   # 2 = 2A
+       vperm   v0, v0, v7, v4          # vpshufb       %xmm4,  %xmm0,  %xmm0   # 3 = D
+       vxor    v3, v3, v2              # vpxor         %xmm2,  %xmm3,  %xmm3   # 0 = 2A+B
+       vperm   v4, v3, v7, v1          # vpshufb       %xmm1,  %xmm3,  %xmm4   # 0 = 2B+C
+       vxor    v0, v0, v3              # vpxor         %xmm3,  %xmm0,  %xmm0   # 3 = 2A+B+D
+       vxor    v0, v0, v4              # vpxor         %xmm4,  %xmm0, %xmm0    # 0 = 2A+3B+C+D
+
+Lenc_entry:
+       # top of round
+       vsrb    v1, v0, v8              # vpsrlb        \$4,    %xmm0,  %xmm0   # 1 = i
+       vperm   v5, $invhi, $invhi, v0  # vpshufb       %xmm1,  %xmm11, %xmm5   # 2 = a/k
+       vxor    v0, v0, v1              # vpxor         %xmm0,  %xmm1,  %xmm1   # 0 = j
+       vperm   v3, $invlo, $invlo, v1  # vpshufb       %xmm0,  %xmm10, %xmm3   # 3 = 1/i
+       vperm   v4, $invlo, $invlo, v0  # vpshufb       %xmm1,  %xmm10, %xmm4   # 4 = 1/j
+       vand    v0, v0, v9
+       vxor    v3, v3, v5              # vpxor         %xmm5,  %xmm3,  %xmm3   # 3 = iak = 1/i + a/k
+       vxor    v4, v4, v5              # vpxor         %xmm5,  %xmm4,  %xmm4   # 4 = jak = 1/j + a/k
+       vperm   v2, $invlo, v7, v3      # vpshufb       %xmm3,  %xmm10, %xmm2   # 2 = 1/iak
+       vmr     v5, v6
+       lvx     v6, r9, $key            # vmovdqu       (%r9), %xmm5
+       vperm   v3, $invlo, v7, v4      # vpshufb       %xmm4,  %xmm10, %xmm3   # 3 = 1/jak
+       addi    r9, r9, 16
+       vxor    v2, v2, v0              # vpxor         %xmm1,  %xmm2,  %xmm2   # 2 = io
+       ?vperm  v5, v5, v6, $keyperm    # align round key
+       vxor    v3, v3, v1              # vpxor         %xmm0,  %xmm3,  %xmm3   # 3 = jo
+       bdnz    Lenc_loop
+
+       # middle of last round
+       addi    r10, r11, 0x80
+                                       # vmovdqa       -0x60(%r10), %xmm4      # 3 : sbou      .Lk_sbo
+                                       # vmovdqa       -0x50(%r10), %xmm0      # 0 : sbot      .Lk_sbo+16
+       vperm   v4, $sbou, v7, v2       # vpshufb       %xmm2,  %xmm4,  %xmm4   # 4 = sbou
+       lvx     v1, r12, r10            # vmovdqa       0x40(%r11,%r10), %xmm1  # .Lk_sr[]
+       vperm   v0, $sbot, v7, v3       # vpshufb       %xmm3,  %xmm0,  %xmm0   # 0 = sb1t
+       vxor    v4, v4, v5              # vpxor         %xmm5,  %xmm4,  %xmm4   # 4 = sb1u + k
+       vxor    v0, v0, v4              # vpxor         %xmm4,  %xmm0,  %xmm0   # 0 = A
+       vperm   v0, v0, v7, v1          # vpshufb       %xmm1,  %xmm0,  %xmm0
+       blr
+       .long   0
+       .byte   0,12,0x14,0,0,0,0,0
+
+.globl .vpaes_encrypt
+.align 5
+.vpaes_encrypt:
+       $STU    $sp,-$FRAME($sp)
+       li      r10,`15+6*$SIZE_T`
+       li      r11,`31+6*$SIZE_T`
+       mflr    r6
+       mfspr   r7, 256                 # save vrsave
+       stvx    v20,r10,$sp
+       addi    r10,r10,32
+       stvx    v21,r11,$sp
+       addi    r11,r11,32
+       stvx    v22,r10,$sp
+       addi    r10,r10,32
+       stvx    v23,r11,$sp
+       addi    r11,r11,32
+       stvx    v24,r10,$sp
+       addi    r10,r10,32
+       stvx    v25,r11,$sp
+       addi    r11,r11,32
+       stvx    v26,r10,$sp
+       addi    r10,r10,32
+       stvx    v27,r11,$sp
+       addi    r11,r11,32
+       stvx    v28,r10,$sp
+       addi    r10,r10,32
+       stvx    v29,r11,$sp
+       addi    r11,r11,32
+       stvx    v30,r10,$sp
+       stvx    v31,r11,$sp
+       stw     r7,`$FRAME-4`($sp)      # save vrsave
+       li      r0, -1
+       $PUSH   r6,`$FRAME+$LRSAVE`($sp)
+       mtspr   256, r0                 # preserve all AltiVec registers
+
+       bl      _vpaes_encrypt_preheat
+
+       ?lvsl   $inpperm, 0, $inp       # prepare for unaligned access
+       lvx     v0, 0, $inp
+       addi    $inp, $inp, 15          # 15 is not a typo
+        ?lvsr  $outperm, 0, $out
+       ?lvsl   $keyperm, 0, $key       # prepare for unaligned access
+        vnor   $outmask, v7, v7        # 0xff..ff
+       lvx     $inptail, 0, $inp       # redundant in aligned case
+        ?vperm $outmask, v7, $outmask, $outperm
+        lvx    $outhead, 0, $out
+       ?vperm  v0, v0, $inptail, $inpperm
+
+       bl      _vpaes_encrypt_core
+
+       vperm   v0, v0, v0, $outperm    # rotate right/left
+       vsel    v1, $outhead, v0, $outmask
+       vmr     $outhead, v0
+       stvx    v1, 0, $out
+       addi    $out, $out, 15          # 15 is not a typo
+       ########
+
+       lvx     v1, 0, $out             # redundant in aligned case
+       vsel    v1, $outhead, v1, $outmask
+       stvx    v1, 0, $out
+
+       li      r10,`15+6*$SIZE_T`
+       li      r11,`31+6*$SIZE_T`
+       mtlr    r6
+       mtspr   256, r7                 # restore vrsave
+       lvx     v20,r10,$sp
+       addi    r10,r10,32
+       lvx     v21,r11,$sp
+       addi    r11,r11,32
+       lvx     v22,r10,$sp
+       addi    r10,r10,32
+       lvx     v23,r11,$sp
+       addi    r11,r11,32
+       lvx     v24,r10,$sp
+       addi    r10,r10,32
+       lvx     v25,r11,$sp
+       addi    r11,r11,32
+       lvx     v26,r10,$sp
+       addi    r10,r10,32
+       lvx     v27,r11,$sp
+       addi    r11,r11,32
+       lvx     v28,r10,$sp
+       addi    r10,r10,32
+       lvx     v29,r11,$sp
+       addi    r11,r11,32
+       lvx     v30,r10,$sp
+       lvx     v31,r11,$sp
+       addi    $sp,$sp,$FRAME
+       blr
+       .long   0
+       .byte   0,12,0x04,1,0x80,0,3,0
+       .long   0
+.size  .vpaes_encrypt,.-.vpaes_encrypt
+
+.align 4
+_vpaes_decrypt_preheat:
+       mflr    r8
+       bl      Lconsts
+       mtlr    r8
+       li      r11, 0xc0               # Lk_inv
+       li      r10, 0xd0
+       li      r9,  0x160              # Ldipt
+       li      r8,  0x170
+       vxor    v7, v7, v7              # 0x00..00
+       vspltisb        v8,4            # 0x04..04
+       vspltisb        v9,0x0f         # 0x0f..0f
+       lvx     $invlo, r12, r11
+       li      r11, 0x180
+       lvx     $invhi, r12, r10
+       li      r10, 0x190
+       lvx     $iptlo, r12, r9
+       li      r9,  0x1a0
+       lvx     $ipthi, r12, r8
+       li      r8,  0x1b0
+       lvx     $sbou, r12, r11
+       li      r11, 0x1c0
+       lvx     $sbot, r12, r10
+       li      r10, 0x1d0
+       lvx     $sb9u, r12, r9
+       li      r9,  0x1e0
+       lvx     $sb9t, r12, r8
+       li      r8,  0x1f0
+       lvx     $sbdu, r12, r11
+       li      r11, 0x200
+       lvx     $sbdt, r12, r10
+       li      r10, 0x210
+       lvx     $sbbu, r12, r9
+       lvx     $sbbt, r12, r8
+       lvx     $sbeu, r12, r11
+       lvx     $sbet, r12, r10
+       blr
+       .long   0
+       .byte   0,12,0x14,0,0,0,0,0
+
+##
+##  Decryption core
+##
+##  Same API as encryption core.
+##
+.align 4
+_vpaes_decrypt_core:
+       lwz     r8, 240($key)           # pull rounds
+       li      r9, 16
+       lvx     v5, 0, $key             # vmovdqu       (%r9),  %xmm4           # round0 key
+       li      r11, 0x30
+       lvx     v6, r9, $key
+       addi    r9, r9, 16
+       ?vperm  v5, v5, v6, $keyperm    # align round key
+       vsrb    v1, v0, v8              # vpsrlb        \$4,    %xmm0,  %xmm0
+       vperm   v0, $iptlo, $iptlo, v0  # vpshufb       %xmm1,  %xmm2,  %xmm2
+       vperm   v1, $ipthi, $ipthi, v1  # vpshufb       %xmm0,  %xmm1,  %xmm0
+       vxor    v0, v0, v5              # vpxor %xmm4,  %xmm2,  %xmm2
+       vxor    v0, v0, v1              # vpxor %xmm2,  %xmm0,  %xmm0
+       mtctr   r8
+       b       Ldec_entry
+
+.align 4
+Ldec_loop:
+#
+#  Inverse mix columns
+#
+       lvx     v0, r12, r11            # v5 and v0 are flipped
+                                       # vmovdqa       -0x20(%r10),%xmm4               # 4 : sb9u
+                                       # vmovdqa       -0x10(%r10),%xmm1               # 0 : sb9t
+       vperm   v4, $sb9u, v7, v2       # vpshufb       %xmm2,  %xmm4,  %xmm4           # 4 = sb9u
+       subi    r11, r11, 16
+       vperm   v1, $sb9t, v7, v3       # vpshufb       %xmm3,  %xmm1,  %xmm1           # 0 = sb9t
+       andi.   r11, r11, 0x30
+       vxor    v5, v5, v4              # vpxor         %xmm4,  %xmm0,  %xmm0
+                                       # vmovdqa       0x00(%r10),%xmm4                # 4 : sbdu
+       vxor    v5, v5, v1              # vpxor         %xmm1,  %xmm0,  %xmm0           # 0 = ch
+                                       # vmovdqa       0x10(%r10),%xmm1                # 0 : sbdt
+
+       vperm   v4, $sbdu, v7, v2       # vpshufb       %xmm2,  %xmm4,  %xmm4           # 4 = sbdu
+       vperm   v5, v5, v7, v0          # vpshufb       %xmm5,  %xmm0,  %xmm0           # MC ch
+       vperm   v1, $sbdt, v7, v3       # vpshufb       %xmm3,  %xmm1,  %xmm1           # 0 = sbdt
+       vxor    v5, v5, v4              # vpxor         %xmm4,  %xmm0,  %xmm0           # 4 = ch
+                                       # vmovdqa       0x20(%r10),     %xmm4           # 4 : sbbu
+       vxor    v5, v5, v1              # vpxor         %xmm1,  %xmm0,  %xmm0           # 0 = ch
+                                       # vmovdqa       0x30(%r10),     %xmm1           # 0 : sbbt
+
+       vperm   v4, $sbbu, v7, v2       # vpshufb       %xmm2,  %xmm4,  %xmm4           # 4 = sbbu
+       vperm   v5, v5, v7, v0          # vpshufb       %xmm5,  %xmm0,  %xmm0           # MC ch
+       vperm   v1, $sbbt, v7, v3       # vpshufb       %xmm3,  %xmm1,  %xmm1           # 0 = sbbt
+       vxor    v5, v5, v4              # vpxor         %xmm4,  %xmm0,  %xmm0           # 4 = ch
+                                       # vmovdqa       0x40(%r10),     %xmm4           # 4 : sbeu
+       vxor    v5, v5, v1              # vpxor         %xmm1,  %xmm0,  %xmm0           # 0 = ch
+                                       # vmovdqa       0x50(%r10),     %xmm1           # 0 : sbet
+
+       vperm   v4, $sbeu, v7, v2       # vpshufb       %xmm2,  %xmm4,  %xmm4           # 4 = sbeu
+       vperm   v5, v5, v7, v0          # vpshufb       %xmm5,  %xmm0,  %xmm0           # MC ch
+       vperm   v1, $sbet, v7, v3       # vpshufb       %xmm3,  %xmm1,  %xmm1           # 0 = sbet
+       vxor    v0, v5, v4              # vpxor         %xmm4,  %xmm0,  %xmm0           # 4 = ch
+       vxor    v0, v0, v1              # vpxor         %xmm1,  %xmm0,  %xmm0           # 0 = ch
+
+Ldec_entry:
+       # top of round
+       vsrb    v1, v0, v8              # vpsrlb        \$4,    %xmm0,  %xmm0   # 1 = i
+       vperm   v2, $invhi, $invhi, v0  # vpshufb       %xmm1,  %xmm11, %xmm2   # 2 = a/k
+       vxor    v0, v0, v1              # vpxor         %xmm0,  %xmm1,  %xmm1   # 0 = j
+       vperm   v3, $invlo, $invlo, v1  # vpshufb       %xmm0,  %xmm10, %xmm3   # 3 = 1/i
+       vperm   v4, $invlo, $invlo, v0  # vpshufb       %xmm1,  %xmm10, %xmm4   # 4 = 1/j
+       vand    v0, v0, v9
+       vxor    v3, v3, v2              # vpxor         %xmm2,  %xmm3,  %xmm3   # 3 = iak = 1/i + a/k
+       vxor    v4, v4, v2              # vpxor         %xmm2,  %xmm4,  %xmm4   # 4 = jak = 1/j + a/k
+       vperm   v2, $invlo, v7, v3      # vpshufb       %xmm3,  %xmm10, %xmm2   # 2 = 1/iak
+       vmr     v5, v6
+       lvx     v6, r9, $key            # vmovdqu       (%r9),  %xmm0
+       vperm   v3, $invlo, v7, v4      # vpshufb       %xmm4,  %xmm10, %xmm3   # 3 = 1/jak
+       addi    r9, r9, 16
+       vxor    v2, v2, v0              # vpxor         %xmm1,  %xmm2,  %xmm2   # 2 = io
+       ?vperm  v5, v5, v6, $keyperm    # align round key
+       vxor    v3, v3, v1              # vpxor         %xmm0,  %xmm3,  %xmm3   # 3 = jo
+       bdnz    Ldec_loop
+
+       # middle of last round
+       addi    r10, r11, 0x80
+                                       # vmovdqa       0x60(%r10),     %xmm4   # 3 : sbou
+       vperm   v4, $sbou, v7, v2       # vpshufb       %xmm2,  %xmm4,  %xmm4   # 4 = sbou
+                                       # vmovdqa       0x70(%r10),     %xmm1   # 0 : sbot
+       lvx     v2, r12, r10            # vmovdqa       -0x160(%r11),   %xmm2   # .Lk_sr-.Lk_dsbd=-0x160
+       vperm   v1, $sbot, v7, v3       # vpshufb       %xmm3,  %xmm1,  %xmm1   # 0 = sb1t
+       vxor    v4, v4, v5              # vpxor         %xmm0,  %xmm4,  %xmm4   # 4 = sb1u + k
+       vxor    v0, v1, v4              # vpxor         %xmm4,  %xmm1,  %xmm0   # 0 = A
+       vperm   v0, v0, v7, v2          # vpshufb       %xmm2,  %xmm0,  %xmm0
+       blr
+       .long   0
+       .byte   0,12,0x14,0,0,0,0,0
+
+.globl .vpaes_decrypt
+.align 5
+.vpaes_decrypt:
+       $STU    $sp,-$FRAME($sp)
+       li      r10,`15+6*$SIZE_T`
+       li      r11,`31+6*$SIZE_T`
+       mflr    r6
+       mfspr   r7, 256                 # save vrsave
+       stvx    v20,r10,$sp
+       addi    r10,r10,32
+       stvx    v21,r11,$sp
+       addi    r11,r11,32
+       stvx    v22,r10,$sp
+       addi    r10,r10,32
+       stvx    v23,r11,$sp
+       addi    r11,r11,32
+       stvx    v24,r10,$sp
+       addi    r10,r10,32
+       stvx    v25,r11,$sp
+       addi    r11,r11,32
+       stvx    v26,r10,$sp
+       addi    r10,r10,32
+       stvx    v27,r11,$sp
+       addi    r11,r11,32
+       stvx    v28,r10,$sp
+       addi    r10,r10,32
+       stvx    v29,r11,$sp
+       addi    r11,r11,32
+       stvx    v30,r10,$sp
+       stvx    v31,r11,$sp
+       stw     r7,`$FRAME-4`($sp)      # save vrsave
+       li      r0, -1
+       $PUSH   r6,`$FRAME+$LRSAVE`($sp)
+       mtspr   256, r0                 # preserve all AltiVec registers
+
+       bl      _vpaes_decrypt_preheat
+
+       ?lvsl   $inpperm, 0, $inp       # prepare for unaligned access
+       lvx     v0, 0, $inp
+       addi    $inp, $inp, 15          # 15 is not a typo
+        ?lvsr  $outperm, 0, $out
+       ?lvsl   $keyperm, 0, $key
+        vnor   $outmask, v7, v7        # 0xff..ff
+       lvx     $inptail, 0, $inp       # redundant in aligned case
+        ?vperm $outmask, v7, $outmask, $outperm
+        lvx    $outhead, 0, $out
+       ?vperm  v0, v0, $inptail, $inpperm
+
+       bl      _vpaes_decrypt_core
+
+       vperm   v0, v0, v0, $outperm    # rotate right/left
+       vsel    v1, $outhead, v0, $outmask
+       vmr     $outhead, v0
+       stvx    v1, 0, $out
+       addi    $out, $out, 15          # 15 is not a typo
+       ########
+
+       lvx     v1, 0, $out             # redundant in aligned case
+       vsel    v1, $outhead, v1, $outmask
+       stvx    v1, 0, $out
+
+       li      r10,`15+6*$SIZE_T`
+       li      r11,`31+6*$SIZE_T`
+       mtlr    r6
+       mtspr   256, r7                 # restore vrsave
+       lvx     v20,r10,$sp
+       addi    r10,r10,32
+       lvx     v21,r11,$sp
+       addi    r11,r11,32
+       lvx     v22,r10,$sp
+       addi    r10,r10,32
+       lvx     v23,r11,$sp
+       addi    r11,r11,32
+       lvx     v24,r10,$sp
+       addi    r10,r10,32
+       lvx     v25,r11,$sp
+       addi    r11,r11,32
+       lvx     v26,r10,$sp
+       addi    r10,r10,32
+       lvx     v27,r11,$sp
+       addi    r11,r11,32
+       lvx     v28,r10,$sp
+       addi    r10,r10,32
+       lvx     v29,r11,$sp
+       addi    r11,r11,32
+       lvx     v30,r10,$sp
+       lvx     v31,r11,$sp
+       addi    $sp,$sp,$FRAME
+       blr
+       .long   0
+       .byte   0,12,0x04,1,0x80,0,3,0
+       .long   0
+.size  .vpaes_decrypt,.-.vpaes_decrypt
+
+.globl .vpaes_cbc_encrypt
+.align 5
+.vpaes_cbc_encrypt:
+       ${UCMP}i r5,16
+       bltlr-
+
+       $STU    $sp,-`($FRAME+2*$SIZE_T)`($sp)
+       mflr    r0
+       li      r10,`15+6*$SIZE_T`
+       li      r11,`31+6*$SIZE_T`
+       mfspr   r12, 256
+       stvx    v20,r10,$sp
+       addi    r10,r10,32
+       stvx    v21,r11,$sp
+       addi    r11,r11,32
+       stvx    v22,r10,$sp
+       addi    r10,r10,32
+       stvx    v23,r11,$sp
+       addi    r11,r11,32
+       stvx    v24,r10,$sp
+       addi    r10,r10,32
+       stvx    v25,r11,$sp
+       addi    r11,r11,32
+       stvx    v26,r10,$sp
+       addi    r10,r10,32
+       stvx    v27,r11,$sp
+       addi    r11,r11,32
+       stvx    v28,r10,$sp
+       addi    r10,r10,32
+       stvx    v29,r11,$sp
+       addi    r11,r11,32
+       stvx    v30,r10,$sp
+       stvx    v31,r11,$sp
+       stw     r12,`$FRAME-4`($sp)     # save vrsave
+       $PUSH   r30,`$FRAME+$SIZE_T*0`($sp)
+       $PUSH   r31,`$FRAME+$SIZE_T*1`($sp)
+       li      r9, -16
+       $PUSH   r0, `$FRAME+$SIZE_T*2+$LRSAVE`($sp)
+
+       and     r30, r5, r9             # copy length&-16
+       mr      r5, r6                  # copy pointer to key
+       mr      r31, r7                 # copy pointer to iv
+       blt     Lcbc_abort
+       cmpwi   r8, 0                   # test direction
+       li      r6, -1
+       mr      r7, r12                 # copy vrsave
+       mtspr   256, r6                 # preserve all AltiVec registers
+
+       lvx     v24, 0, r31             # load [potentially unaligned] iv
+       li      r9, 15
+       ?lvsl   $inpperm, 0, r31
+       lvx     v25, r9, r31
+       ?vperm  v24, v24, v25, $inpperm
+
+       neg     r8, $inp                # prepare for unaligned access
+        vxor   v7, v7, v7
+       ?lvsl   $keyperm, 0, $key
+        ?lvsr  $outperm, 0, $out
+       ?lvsr   $inpperm, 0, r8         # -$inp
+        vnor   $outmask, v7, v7        # 0xff..ff
+       lvx     $inptail, 0, $inp
+        ?vperm $outmask, v7, $outmask, $outperm
+       addi    $inp, $inp, 15          # 15 is not a typo
+        lvx    $outhead, 0, $out
+
+       beq     Lcbc_decrypt
+
+       bl      _vpaes_encrypt_preheat
+       li      r0, 16
+
+Lcbc_enc_loop:
+       vmr     v0, $inptail
+       lvx     $inptail, 0, $inp
+       addi    $inp, $inp, 16
+       ?vperm  v0, v0, $inptail, $inpperm
+       vxor    v0, v0, v24             # ^= iv
+
+       bl      _vpaes_encrypt_core
+
+       vmr     v24, v0                 # put aside iv
+       sub.    r30, r30, r0            # len -= 16
+       vperm   v0, v0, v0, $outperm    # rotate right/left
+       vsel    v1, $outhead, v0, $outmask
+       vmr     $outhead, v0
+       stvx    v1, 0, $out
+       addi    $out, $out, 16
+       bne     Lcbc_enc_loop
+
+       b       Lcbc_done
+
+.align 5
+Lcbc_decrypt:
+       bl      _vpaes_decrypt_preheat
+       li      r0, 16
+
+Lcbc_dec_loop:
+       vmr     v0, $inptail
+       lvx     $inptail, 0, $inp
+       addi    $inp, $inp, 16
+       ?vperm  v0, v0, $inptail, $inpperm
+       vmr     v25, v0                 # put aside input
+
+       bl      _vpaes_decrypt_core
+
+       vxor    v0, v0, v24             # ^= iv
+       vmr     v24, v25
+       sub.    r30, r30, r0            # len -= 16
+       vperm   v0, v0, v0, $outperm    # rotate right/left
+       vsel    v1, $outhead, v0, $outmask
+       vmr     $outhead, v0
+       stvx    v1, 0, $out
+       addi    $out, $out, 16
+       bne     Lcbc_dec_loop
+
+Lcbc_done:
+       addi    $out, $out, -1
+       lvx     v1, 0, $out             # redundant in aligned case
+       vsel    v1, $outhead, v1, $outmask
+       stvx    v1, 0, $out
+
+       neg     r8, r31                 # write [potentially unaligned] iv
+       ?lvsl   $outperm, 0, r8
+       li      r6, 15
+       vnor    $outmask, v7, v7        # 0xff..ff
+       ?vperm  $outmask, v7, $outmask, $outperm
+       lvx     $outhead, 0, r31
+       vperm   v24, v24, v24, $outperm # rotate right/left
+       vsel    v0, $outhead, v24, $outmask
+       lvx     v1, r6, r31
+       stvx    v0, 0, r31
+       vsel    v1, v24, v1, $outmask
+       stvx    v1, r6, r31
+
+       mtspr   256, r7                 # restore vrsave
+       li      r10,`15+6*$SIZE_T`
+       li      r11,`31+6*$SIZE_T`
+       lvx     v20,r10,$sp
+       addi    r10,r10,32
+       lvx     v21,r11,$sp
+       addi    r11,r11,32
+       lvx     v22,r10,$sp
+       addi    r10,r10,32
+       lvx     v23,r11,$sp
+       addi    r11,r11,32
+       lvx     v24,r10,$sp
+       addi    r10,r10,32
+       lvx     v25,r11,$sp
+       addi    r11,r11,32
+       lvx     v26,r10,$sp
+       addi    r10,r10,32
+       lvx     v27,r11,$sp
+       addi    r11,r11,32
+       lvx     v28,r10,$sp
+       addi    r10,r10,32
+       lvx     v29,r11,$sp
+       addi    r11,r11,32
+       lvx     v30,r10,$sp
+       lvx     v31,r11,$sp
+Lcbc_abort:
+       $POP    r0, `$FRAME+$SIZE_T*2+$LRSAVE`($sp)
+       $POP    r30,`$FRAME+$SIZE_T*0`($sp)
+       $POP    r31,`$FRAME+$SIZE_T*1`($sp)
+       mtlr    r0
+       addi    $sp,$sp,`$FRAME+$SIZE_T*2`
+       blr
+       .long   0
+       .byte   0,12,0x04,1,0x80,2,6,0
+       .long   0
+.size  .vpaes_cbc_encrypt,.-.vpaes_cbc_encrypt
+___
+}\f
+{
+my ($inp,$bits,$out)=map("r$_",(3..5));
+my $dir="cr1";
+my ($invlo,$invhi,$iptlo,$ipthi,$rcon) = map("v$_",(10..13,24));
+
+$code.=<<___;
+########################################################
+##                                                    ##
+##                  AES key schedule                  ##
+##                                                    ##
+########################################################
+.align 4
+_vpaes_key_preheat:
+       mflr    r8
+       bl      Lconsts
+       mtlr    r8
+       li      r11, 0xc0               # Lk_inv
+       li      r10, 0xd0
+       li      r9,  0xe0               # L_ipt
+       li      r8,  0xf0
+
+       vspltisb        v8,4            # 0x04..04
+       vxor    v9,v9,v9                # 0x00..00
+       lvx     $invlo, r12, r11        # Lk_inv
+       li      r11, 0x120
+       lvx     $invhi, r12, r10
+       li      r10, 0x130
+       lvx     $iptlo, r12, r9         # Lk_ipt
+       li      r9, 0x220
+       lvx     $ipthi, r12, r8
+       li      r8, 0x230
+
+       lvx     v14, r12, r11           # Lk_sb1
+       li      r11, 0x240
+       lvx     v15, r12, r10
+       li      r10, 0x250
+
+       lvx     v16, r12, r9            # Lk_dksd
+       li      r9, 0x260
+       lvx     v17, r12, r8
+       li      r8, 0x270
+       lvx     v18, r12, r11           # Lk_dksb
+       li      r11, 0x280
+       lvx     v19, r12, r10
+       li      r10, 0x290
+       lvx     v20, r12, r9            # Lk_dkse
+       li      r9, 0x2a0
+       lvx     v21, r12, r8
+       li      r8, 0x2b0
+       lvx     v22, r12, r11           # Lk_dks9
+       lvx     v23, r12, r10
+
+       lvx     v24, r12, r9            # Lk_rcon
+       lvx     v25, 0, r12             # Lk_mc_forward[0]
+       lvx     v26, r12, r8            # Lks63
+       blr
+       .long   0
+       .byte   0,12,0x14,0,0,0,0,0
+
+.align 4
+_vpaes_schedule_core:
+       mflr    r7
+
+       bl      _vpaes_key_preheat      # load the tables
+
+       #lvx    v0, 0, $inp             # vmovdqu       (%rdi), %xmm0           # load key (unaligned)
+       neg     r8, $inp                # prepare for unaligned access
+       lvx     v0, 0, $inp
+       addi    $inp, $inp, 15          # 15 is not typo
+       ?lvsr   $inpperm, 0, r8         # -$inp
+       lvx     v6, 0, $inp             # v6 serves as inptail
+       addi    $inp, $inp, 8
+       ?vperm  v0, v0, v6, $inpperm
+
+       # input transform
+       vmr     v3, v0                  # vmovdqa       %xmm0,  %xmm3
+       bl      _vpaes_schedule_transform
+       vmr     v7, v0                  # vmovdqa       %xmm0,  %xmm7
+
+       bne     $dir, Lschedule_am_decrypting
+
+       # encrypting, output zeroth round key after transform
+       li      r8, 0x30                # mov   \$0x30,%r8d
+       addi    r10, r12, 0x80          # lea   .Lk_sr(%rip),%r10
+
+       ?lvsr   $outperm, 0, $out       # prepare for unaligned access
+       vnor    $outmask, v9, v9        # 0xff..ff
+       lvx     $outhead, 0, $out
+       ?vperm  $outmask, v9, $outmask, $outperm
+
+       #stvx   v0, 0, $out             # vmovdqu       %xmm0,  (%rdx)
+       vperm   v1, v0, v0, $outperm    # rotate right/left
+       vsel    v2, $outhead, v1, $outmask
+       vmr     $outhead, v1
+       stvx    v2, 0, $out
+       b       Lschedule_go
+
+Lschedule_am_decrypting:
+       srwi    r8, $bits, 1            # shr   \$1,%r8d
+       andi.   r8, r8, 32              # and   \$32,%r8d
+       xori    r8, r8, 32              # xor   \$32,%r8d       # nbits==192?0:32
+       addi    r10, r12, 0x80          # lea   .Lk_sr(%rip),%r10
+       # decrypting, output zeroth round key after shiftrows
+       lvx     v1, r8, r10             # vmovdqa       (%r8,%r10),     %xmm1
+       vperm   v4, v3, v3, v1          # vpshufb       %xmm1,  %xmm3,  %xmm3
+
+       neg     r0, $out                # prepare for unaligned access
+       ?lvsl   $outperm, 0, r0
+       addi    $out, $out, 15          # 15 is not typo
+       vnor    $outmask, v9, v9        # 0xff..ff
+       lvx     $outhead, 0, $out
+       ?vperm  $outmask, $outmask, v9, $outperm
+
+       #stvx   v4, 0, $out             # vmovdqu       %xmm3,  (%rdx)
+       vperm   v4, v4, v4, $outperm    # rotate right/left
+       vsel    v2, $outhead, v4, $outmask
+       vmr     $outhead, v4
+       stvx    v2, 0, $out
+       xori    r8, r8, 0x30            # xor   \$0x30, %r8
+
+Lschedule_go:
+       cmplwi  $bits, 192              # cmp   \$192,  %esi
+       bgt     Lschedule_256
+       beq     Lschedule_192
+       # 128: fall though
+
+##
+##  .schedule_128
+##
+##  128-bit specific part of key schedule.
+##
+##  This schedule is really simple, because all its parts
+##  are accomplished by the subroutines.
+##
+Lschedule_128:
+       li      r0, 10                  # mov   \$10, %esi
+       mtctr   r0
+
+Loop_schedule_128:
+       bl      _vpaes_schedule_round
+       bdz     Lschedule_mangle_last   # dec   %esi
+       bl      _vpaes_schedule_mangle  # write output
+       b       Loop_schedule_128
+
+##
+##  .aes_schedule_192
+##
+##  192-bit specific part of key schedule.
+##
+##  The main body of this schedule is the same as the 128-bit
+##  schedule, but with more smearing.  The long, high side is
+##  stored in %xmm7 as before, and the short, low side is in
+##  the high bits of %xmm6.
+##
+##  This schedule is somewhat nastier, however, because each
+##  round produces 192 bits of key material, or 1.5 round keys.
+##  Therefore, on each cycle we do 2 rounds and produce 3 round
+##  keys.
+##
+.align 4
+Lschedule_192:
+       li      r0, 4                   # mov   \$4,    %esi
+       lvx     v0, 0, $inp
+       ?vperm  v0, v6, v0, $inpperm
+       ?vsldoi v0, v3, v0, 8           # vmovdqu       8(%rdi),%xmm0           # load key part 2 (very unaligned)
+       bl      _vpaes_schedule_transform       # input transform
+       ?vsldoi v6, v0, v9, 8
+       ?vsldoi v6, v9, v6, 8           # clobber "low" side with zeros
+       mtctr   r0
+
+Loop_schedule_192:
+       bl      _vpaes_schedule_round
+       ?vsldoi v0, v6, v0, 8           # vpalignr      \$8,%xmm6,%xmm0,%xmm0
+       bl      _vpaes_schedule_mangle  # save key n
+       bl      _vpaes_schedule_192_smear
+       bl      _vpaes_schedule_mangle  # save key n+1
+       bl      _vpaes_schedule_round
+       bdz     Lschedule_mangle_last   # dec   %esi
+       bl      _vpaes_schedule_mangle  # save key n+2
+       bl      _vpaes_schedule_192_smear
+       b       Loop_schedule_192
+
+##
+##  .aes_schedule_256
+##
+##  256-bit specific part of key schedule.
+##
+##  The structure here is very similar to the 128-bit
+##  schedule, but with an additional "low side" in
+##  %xmm6.  The low side's rounds are the same as the
+##  high side's, except no rcon and no rotation.
+##
+.align 4
+Lschedule_256:
+       li      r0, 7                   # mov   \$7, %esi
+       addi    $inp, $inp, 8
+       lvx     v0, 0, $inp             # vmovdqu       16(%rdi),%xmm0          # load key part 2 (unaligned)
+       ?vperm  v0, v6, v0, $inpperm
+       bl      _vpaes_schedule_transform       # input transform
+       mtctr   r0
+
+Loop_schedule_256:
+       bl      _vpaes_schedule_mangle  # output low result
+       vmr     v6, v0                  # vmovdqa       %xmm0,  %xmm6           # save cur_lo in xmm6
+
+       # high round
+       bl      _vpaes_schedule_round
+       bdz     Lschedule_mangle_last   # dec   %esi
+       bl      _vpaes_schedule_mangle  
+
+       # low round. swap xmm7 and xmm6
+       ?vspltw v0, v0, 3               # vpshufd       \$0xFF, %xmm0,  %xmm0
+       vmr     v5, v7                  # vmovdqa       %xmm7,  %xmm5
+       vmr     v7, v6                  # vmovdqa       %xmm6,  %xmm7
+       bl      _vpaes_schedule_low_round
+       vmr     v7, v5                  # vmovdqa       %xmm5,  %xmm7
+       
+       b       Loop_schedule_256
+##
+##  .aes_schedule_mangle_last
+##
+##  Mangler for last round of key schedule
+##  Mangles %xmm0
+##    when encrypting, outputs out(%xmm0) ^ 63
+##    when decrypting, outputs unskew(%xmm0)
+##
+##  Always called right before return... jumps to cleanup and exits
+##
+.align 4
+Lschedule_mangle_last:
+       # schedule last round key from xmm0
+       li      r11, 0x2e0              # lea   .Lk_deskew(%rip),%r11
+       li      r9,  0x2f0
+       bne     $dir, Lschedule_mangle_last_dec
+
+       # encrypting
+       lvx     v1, r8, r10             # vmovdqa       (%r8,%r10),%xmm1
+       li      r11, 0x2c0              # lea           .Lk_opt(%rip),  %r11    # prepare to output transform
+       li      r9,  0x2d0              # prepare to output transform
+       vperm   v0, v0, v0, v1          # vpshufb       %xmm1,  %xmm0,  %xmm0   # output permute
+
+       lvx     $iptlo, r11, r12        # reload $ipt
+       lvx     $ipthi, r9, r12
+       addi    $out, $out, 16          # add   \$16,   %rdx
+       vxor    v0, v0, v26             # vpxor         .Lk_s63(%rip),  %xmm0,  %xmm0
+       bl      _vpaes_schedule_transform       # output transform
+
+       #stvx   v0, r0, $out            # vmovdqu       %xmm0,  (%rdx)          # save last key
+       vperm   v0, v0, v0, $outperm    # rotate right/left
+       vsel    v2, $outhead, v0, $outmask
+       vmr     $outhead, v0
+       stvx    v2, 0, $out
+
+       addi    $out, $out, 15          # 15 is not typo
+       lvx     v1, 0, $out             # redundant in aligned case
+       vsel    v1, $outhead, v1, $outmask
+       stvx    v1, 0, $out
+       b       Lschedule_mangle_done
+
+.align 4
+Lschedule_mangle_last_dec:
+       lvx     $iptlo, r11, r12        # reload $ipt
+       lvx     $ipthi, r9,  r12
+       addi    $out, $out, -16         # add   \$-16,  %rdx 
+       vxor    v0, v0, v26             # vpxor .Lk_s63(%rip),  %xmm0,  %xmm0
+       bl      _vpaes_schedule_transform       # output transform
+
+       #stvx   v0, r0, $out            # vmovdqu       %xmm0,  (%rdx)          # save last key
+       vperm   v0, v0, v0, $outperm    # rotate right/left
+       vsel    v2, $outhead, v0, $outmask
+       vmr     $outhead, v0
+       stvx    v2, 0, $out
+
+       addi    $out, $out, -15         # -15 is not typo
+       lvx     v1, 0, $out             # redundant in aligned case
+       vsel    v1, $outhead, v1, $outmask
+       stvx    v1, 0, $out
+
+Lschedule_mangle_done:
+       mtlr    r7
+       # cleanup
+       vxor    v0, v0, v0              # vpxor         %xmm0,  %xmm0,  %xmm0
+       vxor    v1, v1, v1              # vpxor         %xmm1,  %xmm1,  %xmm1
+       vxor    v2, v2, v2              # vpxor         %xmm2,  %xmm2,  %xmm2
+       vxor    v3, v3, v3              # vpxor         %xmm3,  %xmm3,  %xmm3
+       vxor    v4, v4, v4              # vpxor         %xmm4,  %xmm4,  %xmm4
+       vxor    v5, v5, v5              # vpxor         %xmm5,  %xmm5,  %xmm5
+       vxor    v6, v6, v6              # vpxor         %xmm6,  %xmm6,  %xmm6
+       vxor    v7, v7, v7              # vpxor         %xmm7,  %xmm7,  %xmm7
+
+       blr
+       .long   0
+       .byte   0,12,0x14,0,0,0,0,0
+
+##
+##  .aes_schedule_192_smear
+##
+##  Smear the short, low side in the 192-bit key schedule.
+##
+##  Inputs:
+##    %xmm7: high side, b  a  x  y
+##    %xmm6:  low side, d  c  0  0
+##    %xmm13: 0
+##
+##  Outputs:
+##    %xmm6: b+c+d  b+c  0  0
+##    %xmm0: b+c+d  b+c  b  a
+##
+.align 4
+_vpaes_schedule_192_smear:
+       ?vspltw v0, v7, 3
+       ?vsldoi v1, v9, v6, 12          # vpshufd       \$0x80, %xmm6,  %xmm1   # d c 0 0 -> c 0 0 0
+       ?vsldoi v0, v7, v0, 8           # vpshufd       \$0xFE, %xmm7,  %xmm0   # b a _ _ -> b b b a
+       vxor    v6, v6, v1              # vpxor         %xmm1,  %xmm6,  %xmm6   # -> c+d c 0 0
+       vxor    v6, v6, v0              # vpxor         %xmm0,  %xmm6,  %xmm6   # -> b+c+d b+c b a
+       vmr     v0, v6
+       ?vsldoi v6, v6, v9, 8
+       ?vsldoi v6, v9, v6, 8           # clobber low side with zeros
+       blr
+       .long   0
+       .byte   0,12,0x14,0,0,0,0,0
+
+##
+##  .aes_schedule_round
+##
+##  Runs one main round of the key schedule on %xmm0, %xmm7
+##
+##  Specifically, runs subbytes on the high dword of %xmm0
+##  then rotates it by one byte and xors into the low dword of
+##  %xmm7.
+##
+##  Adds rcon from low byte of %xmm8, then rotates %xmm8 for
+##  next rcon.
+##
+##  Smears the dwords of %xmm7 by xoring the low into the
+##  second low, result into third, result into highest.
+##
+##  Returns results in %xmm7 = %xmm0.
+##  Clobbers %xmm1-%xmm4, %r11.
+##
+.align 4
+_vpaes_schedule_round:
+       # extract rcon from xmm8
+       #vxor   v4, v4, v4              # vpxor         %xmm4,  %xmm4,  %xmm4
+       ?vsldoi v1, $rcon, v9, 15       # vpalignr      \$15,   %xmm8,  %xmm4,  %xmm1
+       ?vsldoi $rcon, $rcon, $rcon, 15 # vpalignr      \$15,   %xmm8,  %xmm8,  %xmm8
+       vxor    v7, v7, v1              # vpxor         %xmm1,  %xmm7,  %xmm7
+
+       # rotate
+       ?vspltw v0, v0, 3               # vpshufd       \$0xFF, %xmm0,  %xmm0
+       ?vsldoi v0, v0, v0, 1           # vpalignr      \$1,    %xmm0,  %xmm0,  %xmm0
+
+       # fall through...
+
+       # low round: same as high round, but no rotation and no rcon.
+_vpaes_schedule_low_round:
+       # smear xmm7
+       ?vsldoi v1, v9, v7, 12          # vpslldq       \$4,    %xmm7,  %xmm1
+       vxor    v7, v7, v1              # vpxor         %xmm1,  %xmm7,  %xmm7
+       vspltisb        v1, 0x0f        # 0x0f..0f
+       ?vsldoi v4, v9, v7, 8           # vpslldq       \$8,    %xmm7,  %xmm4
+
+       # subbytes
+       vand    v1, v1, v0              # vpand         %xmm9,  %xmm0,  %xmm1           # 0 = k
+       vsrb    v0, v0, v8              # vpsrlb        \$4,    %xmm0,  %xmm0           # 1 = i
+        vxor   v7, v7, v4              # vpxor         %xmm4,  %xmm7,  %xmm7
+       vperm   v2, $invhi, v9, v1      # vpshufb       %xmm1,  %xmm11, %xmm2           # 2 = a/k
+       vxor    v1, v1, v0              # vpxor         %xmm0,  %xmm1,  %xmm1           # 0 = j
+       vperm   v3, $invlo, v9, v0      # vpshufb       %xmm0,  %xmm10, %xmm3           # 3 = 1/i
+       vxor    v3, v3, v2              # vpxor         %xmm2,  %xmm3,  %xmm3           # 3 = iak = 1/i + a/k
+       vperm   v4, $invlo, v9, v1      # vpshufb       %xmm1,  %xmm10, %xmm4           # 4 = 1/j
+        vxor   v7, v7, v26             # vpxor         .Lk_s63(%rip),  %xmm7,  %xmm7
+       vperm   v3, $invlo, v9, v3      # vpshufb       %xmm3,  %xmm10, %xmm3           # 2 = 1/iak
+       vxor    v4, v4, v2              # vpxor         %xmm2,  %xmm4,  %xmm4           # 4 = jak = 1/j + a/k
+       vperm   v2, $invlo, v9, v4      # vpshufb       %xmm4,  %xmm10, %xmm2           # 3 = 1/jak
+       vxor    v3, v3, v1              # vpxor         %xmm1,  %xmm3,  %xmm3           # 2 = io
+       vxor    v2, v2, v0              # vpxor         %xmm0,  %xmm2,  %xmm2           # 3 = jo
+       vperm   v4, v15, v9, v3         # vpshufb       %xmm3,  %xmm13, %xmm4           # 4 = sbou
+       vperm   v1, v14, v9, v2         # vpshufb       %xmm2,  %xmm12, %xmm1           # 0 = sb1t
+       vxor    v1, v1, v4              # vpxor         %xmm4,  %xmm1,  %xmm1           # 0 = sbox output
+
+       # add in smeared stuff
+       vxor    v0, v1, v7              # vpxor         %xmm7,  %xmm1,  %xmm0
+       vxor    v7, v1, v7              # vmovdqa       %xmm0,  %xmm7
+       blr
+       .long   0
+       .byte   0,12,0x14,0,0,0,0,0
+
+##
+##  .aes_schedule_transform
+##
+##  Linear-transform %xmm0 according to tables at (%r11)
+##
+##  Requires that %xmm9 = 0x0F0F... as in preheat
+##  Output in %xmm0
+##  Clobbers %xmm2
+##
+.align 4
+_vpaes_schedule_transform:
+       #vand   v1, v0, v9              # vpand         %xmm9,  %xmm0,  %xmm1
+       vsrb    v2, v0, v8              # vpsrlb        \$4,    %xmm0,  %xmm0
+                                       # vmovdqa       (%r11), %xmm2   # lo
+       vperm   v0, $iptlo, $iptlo, v0  # vpshufb       %xmm1,  %xmm2,  %xmm2
+                                       # vmovdqa       16(%r11),       %xmm1 # hi
+       vperm   v2, $ipthi, $ipthi, v2  # vpshufb       %xmm0,  %xmm1,  %xmm0
+       vxor    v0, v0, v2              # vpxor         %xmm2,  %xmm0,  %xmm0
+       blr
+       .long   0
+       .byte   0,12,0x14,0,0,0,0,0
+
+##
+##  .aes_schedule_mangle
+##
+##  Mangle xmm0 from (basis-transformed) standard version
+##  to our version.
+##
+##  On encrypt,
+##    xor with 0x63
+##    multiply by circulant 0,1,1,1
+##    apply shiftrows transform
+##
+##  On decrypt,
+##    xor with 0x63
+##    multiply by "inverse mixcolumns" circulant E,B,D,9
+##    deskew
+##    apply shiftrows transform
+##
+##
+##  Writes out to (%rdx), and increments or decrements it
+##  Keeps track of round number mod 4 in %r8
+##  Preserves xmm0
+##  Clobbers xmm1-xmm5
+##
+.align 4
+_vpaes_schedule_mangle:
+       #vmr    v4, v0                  # vmovdqa       %xmm0,  %xmm4   # save xmm0 for later
+                                       # vmovdqa       .Lk_mc_forward(%rip),%xmm5
+       bne     $dir, Lschedule_mangle_dec
+
+       # encrypting
+       vxor    v4, v0, v26             # vpxor .Lk_s63(%rip),  %xmm0,  %xmm4
+       addi    $out, $out, 16          # add   \$16,   %rdx
+       vperm   v4, v4, v4, v25         # vpshufb       %xmm5,  %xmm4,  %xmm4
+       vperm   v1, v4, v4, v25         # vpshufb       %xmm5,  %xmm4,  %xmm1
+       vperm   v3, v1, v1, v25         # vpshufb       %xmm5,  %xmm1,  %xmm3
+       vxor    v4, v4, v1              # vpxor         %xmm1,  %xmm4,  %xmm4
+       lvx     v1, r8, r10             # vmovdqa       (%r8,%r10),     %xmm1
+       vxor    v3, v3, v4              # vpxor         %xmm4,  %xmm3,  %xmm3
+
+       vperm   v3, v3, v3, v1          # vpshufb       %xmm1,  %xmm3,  %xmm3
+       addi    r8, r8, -16             # add   \$-16,  %r8
+       andi.   r8, r8, 0x30            # and   \$0x30, %r8
+
+       #stvx   v3, 0, $out             # vmovdqu       %xmm3,  (%rdx)
+       vperm   v1, v3, v3, $outperm    # rotate right/left
+       vsel    v2, $outhead, v1, $outmask
+       vmr     $outhead, v1
+       stvx    v2, 0, $out
+       blr
+
+.align 4
+Lschedule_mangle_dec:
+       # inverse mix columns
+                                       # lea   .Lk_dksd(%rip),%r11
+       vsrb    v1, v0, v8              # vpsrlb        \$4,    %xmm4,  %xmm1   # 1 = hi
+       #and    v4, v0, v9              # vpand         %xmm9,  %xmm4,  %xmm4   # 4 = lo
+
+                                       # vmovdqa       0x00(%r11),     %xmm2
+       vperm   v2, v16, v16, v0        # vpshufb       %xmm4,  %xmm2,  %xmm2
+                                       # vmovdqa       0x10(%r11),     %xmm3
+       vperm   v3, v17, v17, v1        # vpshufb       %xmm1,  %xmm3,  %xmm3
+       vxor    v3, v3, v2              # vpxor         %xmm2,  %xmm3,  %xmm3
+       vperm   v3, v3, v9, v25         # vpshufb       %xmm5,  %xmm3,  %xmm3
+
+                                       # vmovdqa       0x20(%r11),     %xmm2
+       vperm   v2, v18, v18, v0        # vpshufb       %xmm4,  %xmm2,  %xmm2
+       vxor    v2, v2, v3              # vpxor         %xmm3,  %xmm2,  %xmm2
+                                       # vmovdqa       0x30(%r11),     %xmm3
+       vperm   v3, v19, v19, v1        # vpshufb       %xmm1,  %xmm3,  %xmm3
+       vxor    v3, v3, v2              # vpxor         %xmm2,  %xmm3,  %xmm3
+       vperm   v3, v3, v9, v25         # vpshufb       %xmm5,  %xmm3,  %xmm3
+
+                                       # vmovdqa       0x40(%r11),     %xmm2
+       vperm   v2, v20, v20, v0        # vpshufb       %xmm4,  %xmm2,  %xmm2
+       vxor    v2, v2, v3              # vpxor         %xmm3,  %xmm2,  %xmm2
+                                       # vmovdqa       0x50(%r11),     %xmm3
+       vperm   v3, v21, v21, v1        # vpshufb       %xmm1,  %xmm3,  %xmm3
+       vxor    v3, v3, v2              # vpxor         %xmm2,  %xmm3,  %xmm3
+
+                                       # vmovdqa       0x60(%r11),     %xmm2
+       vperm   v2, v22, v22, v0        # vpshufb       %xmm4,  %xmm2,  %xmm2
+       vperm   v3, v3, v9, v25         # vpshufb       %xmm5,  %xmm3,  %xmm3
+                                       # vmovdqa       0x70(%r11),     %xmm4
+       vperm   v4, v23, v23, v1        # vpshufb       %xmm1,  %xmm4,  %xmm4
+       lvx     v1, r8, r10             # vmovdqa       (%r8,%r10),     %xmm1
+       vxor    v2, v2, v3              # vpxor         %xmm3,  %xmm2,  %xmm2
+       vxor    v3, v4, v2              # vpxor         %xmm2,  %xmm4,  %xmm3
+
+       addi    $out, $out, -16         # add   \$-16,  %rdx
+
+       vperm   v3, v3, v3, v1          # vpshufb       %xmm1,  %xmm3,  %xmm3
+       addi    r8, r8, -16             # add   \$-16,  %r8
+       andi.   r8, r8, 0x30            # and   \$0x30, %r8
+
+       #stvx   v3, 0, $out             # vmovdqu       %xmm3,  (%rdx)
+       vperm   v1, v3, v3, $outperm    # rotate right/left
+       vsel    v2, $outhead, v1, $outmask
+       vmr     $outhead, v1
+       stvx    v2, 0, $out
+       blr
+       .long   0
+       .byte   0,12,0x14,0,0,0,0,0
+
+.globl .vpaes_set_encrypt_key
+.align 5
+.vpaes_set_encrypt_key:
+       $STU    $sp,-$FRAME($sp)
+       li      r10,`15+6*$SIZE_T`
+       li      r11,`31+6*$SIZE_T`
+       mflr    r0
+       mfspr   r6, 256                 # save vrsave
+       stvx    v20,r10,$sp
+       addi    r10,r10,32
+       stvx    v21,r11,$sp
+       addi    r11,r11,32
+       stvx    v22,r10,$sp
+       addi    r10,r10,32
+       stvx    v23,r11,$sp
+       addi    r11,r11,32
+       stvx    v24,r10,$sp
+       addi    r10,r10,32
+       stvx    v25,r11,$sp
+       addi    r11,r11,32
+       stvx    v26,r10,$sp
+       addi    r10,r10,32
+       stvx    v27,r11,$sp
+       addi    r11,r11,32
+       stvx    v28,r10,$sp
+       addi    r10,r10,32
+       stvx    v29,r11,$sp
+       addi    r11,r11,32
+       stvx    v30,r10,$sp
+       stvx    v31,r11,$sp
+       stw     r6,`$FRAME-4`($sp)      # save vrsave
+       li      r7, -1
+       $PUSH   r0, `$FRAME+$LRSAVE`($sp)
+       mtspr   256, r7                 # preserve all AltiVec registers
+
+       srwi    r9, $bits, 5            # shr   \$5,%eax
+       addi    r9, r9, 6               # add   \$5,%eax
+       stw     r9, 240($out)           # mov   %eax,240(%rdx)  # AES_KEY->rounds = nbits/32+5;
+
+       cmplw   $dir, $bits, $bits      # set encrypt direction
+       li      r8, 0x30                # mov   \$0x30,%r8d
+       bl      _vpaes_schedule_core
+
+       $POP    r0, `$FRAME+$LRSAVE`($sp)
+       li      r10,`15+6*$SIZE_T`
+       li      r11,`31+6*$SIZE_T`
+       mtspr   256, r6                 # restore vrsave
+       mtlr    r0
+       xor     r3, r3, r3
+       lvx     v20,r10,$sp
+       addi    r10,r10,32
+       lvx     v21,r11,$sp
+       addi    r11,r11,32
+       lvx     v22,r10,$sp
+       addi    r10,r10,32
+       lvx     v23,r11,$sp
+       addi    r11,r11,32
+       lvx     v24,r10,$sp
+       addi    r10,r10,32
+       lvx     v25,r11,$sp
+       addi    r11,r11,32
+       lvx     v26,r10,$sp
+       addi    r10,r10,32
+       lvx     v27,r11,$sp
+       addi    r11,r11,32
+       lvx     v28,r10,$sp
+       addi    r10,r10,32
+       lvx     v29,r11,$sp
+       addi    r11,r11,32
+       lvx     v30,r10,$sp
+       lvx     v31,r11,$sp
+       addi    $sp,$sp,$FRAME
+       blr
+       .long   0
+       .byte   0,12,0x04,1,0x80,0,3,0
+       .long   0
+.size  .vpaes_set_encrypt_key,.-.vpaes_set_encrypt_key
+
+.globl .vpaes_set_decrypt_key
+.align 4
+.vpaes_set_decrypt_key:
+       $STU    $sp,-$FRAME($sp)
+       li      r10,`15+6*$SIZE_T`
+       li      r11,`31+6*$SIZE_T`
+       mflr    r0
+       mfspr   r6, 256                 # save vrsave
+       stvx    v20,r10,$sp
+       addi    r10,r10,32
+       stvx    v21,r11,$sp
+       addi    r11,r11,32
+       stvx    v22,r10,$sp
+       addi    r10,r10,32
+       stvx    v23,r11,$sp
+       addi    r11,r11,32
+       stvx    v24,r10,$sp
+       addi    r10,r10,32
+       stvx    v25,r11,$sp
+       addi    r11,r11,32
+       stvx    v26,r10,$sp
+       addi    r10,r10,32
+       stvx    v27,r11,$sp
+       addi    r11,r11,32
+       stvx    v28,r10,$sp
+       addi    r10,r10,32
+       stvx    v29,r11,$sp
+       addi    r11,r11,32
+       stvx    v30,r10,$sp
+       stvx    v31,r11,$sp
+       stw     r6,`$FRAME-4`($sp)      # save vrsave
+       li      r7, -1
+       $PUSH   r0, `$FRAME+$LRSAVE`($sp)
+       mtspr   256, r7                 # preserve all AltiVec registers
+
+       srwi    r9, $bits, 5            # shr   \$5,%eax
+       addi    r9, r9, 6               # add   \$5,%eax
+       stw     r9, 240($out)           # mov   %eax,240(%rdx)  # AES_KEY->rounds = nbits/32+5;
+
+       slwi    r9, r9, 4               # shl   \$4,%eax
+       add     $out, $out, r9          # lea   (%rdx,%rax),%rdx
+
+       cmplwi  $dir, $bits, 0          # set decrypt direction
+       srwi    r8, $bits, 1            # shr   \$1,%r8d
+       andi.   r8, r8, 32              # and   \$32,%r8d
+       xori    r8, r8, 32              # xor   \$32,%r8d       # nbits==192?0:32
+       bl      _vpaes_schedule_core
+
+       $POP    r0,  `$FRAME+$LRSAVE`($sp)
+       li      r10,`15+6*$SIZE_T`
+       li      r11,`31+6*$SIZE_T`
+       mtspr   256, r6                 # restore vrsave
+       mtlr    r0
+       xor     r3, r3, r3
+       lvx     v20,r10,$sp
+       addi    r10,r10,32
+       lvx     v21,r11,$sp
+       addi    r11,r11,32
+       lvx     v22,r10,$sp
+       addi    r10,r10,32
+       lvx     v23,r11,$sp
+       addi    r11,r11,32
+       lvx     v24,r10,$sp
+       addi    r10,r10,32
+       lvx     v25,r11,$sp
+       addi    r11,r11,32
+       lvx     v26,r10,$sp
+       addi    r10,r10,32
+       lvx     v27,r11,$sp
+       addi    r11,r11,32
+       lvx     v28,r10,$sp
+       addi    r10,r10,32
+       lvx     v29,r11,$sp
+       addi    r11,r11,32
+       lvx     v30,r10,$sp
+       lvx     v31,r11,$sp
+       addi    $sp,$sp,$FRAME
+       blr
+       .long   0
+       .byte   0,12,0x04,1,0x80,0,3,0
+       .long   0
+.size  .vpaes_set_decrypt_key,.-.vpaes_set_decrypt_key
+___
+}
+
+my $consts=1;
+foreach  (split("\n",$code)) {
+       s/\`([^\`]*)\`/eval $1/geo;
+
+       # constants table endian-specific conversion
+       if ($consts && m/\.long\s+(.+)\s+(\?[a-z]*)$/o) {
+           my $conv=$2;
+           my @bytes=();
+
+           # convert to endian-agnostic format
+           foreach (split(/,\s+/,$1)) {
+               my $l = /^0/?oct:int;
+               push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
+           }
+
+           # little-endian conversion
+           if ($flavour =~ /le$/o) {
+               SWITCH: for($conv)  {
+                   /\?inv/ && do   { @bytes=map($_^0xf,@bytes); last; };
+                   /\?rev/ && do   { @bytes=reverse(@bytes);    last; }; 
+               }
+           }
+
+           #emit
+           print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
+           next;
+       }
+       $consts=0 if (m/Lconsts:/o);    # end of table
+
+       # instructions prefixed with '?' are endian-specific and need
+       # to be adjusted accordingly...
+       if ($flavour =~ /le$/o) {       # little-endian
+           s/\?lvsr/lvsl/o or
+           s/\?lvsl/lvsr/o or
+           s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
+           s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
+           s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
+       } else {                        # big-endian
+           s/\?([a-z]+)/$1/o;
+       }
+
+       print $_,"\n";
+}
+
+close STDOUT;
index 1533e2c..2ba149c 100644 (file)
 #
 #              aes-586.pl              vpaes-x86.pl
 #
-# Core 2(**)   29.1/42.3/18.3          22.0/25.6(***)
-# Nehalem      27.9/40.4/18.1          10.3/12.0
-# Atom         102./119./60.1          64.5/85.3(***)
+# Core 2(**)   28.1/41.4/18.3          21.9/25.2(***)
+# Nehalem      27.9/40.4/18.1          10.2/11.9
+# Atom         70.7/92.1/60.1          61.1/75.4(***)
+# Silvermont   45.4/62.9/24.1          49.2/61.1(***)
 #
 # (*)  "Hyper-threading" in the context refers rather to cache shared
 #      among multiple cores, than to specifically Intel HTT. As vast
@@ -40,8 +41,8 @@
 # (**) "Core 2" refers to initial 65nm design, a.k.a. Conroe.
 #
 # (***)        Less impressive improvement on Core 2 and Atom is due to slow
-#      pshufb, yet it's respectable +32%/65%  improvement on Core 2
-#      and +58%/40% on Atom (as implied, over "hyper-threading-safe"
+#      pshufb, yet it's respectable +28%/64%  improvement on Core 2
+#      and +15% on Atom (as implied, over "hyper-threading-safe"
 #      code path).
 #
 #                                              <appro@openssl.org>
@@ -183,35 +184,35 @@ $k_dsbo=0x2c0;            # decryption sbox final output
        &movdqa ("xmm1","xmm6")
        &movdqa ("xmm2",&QWP($k_ipt,$const));
        &pandn  ("xmm1","xmm0");
-       &movdqu ("xmm5",&QWP(0,$key));
-       &psrld  ("xmm1",4);
        &pand   ("xmm0","xmm6");
+       &movdqu ("xmm5",&QWP(0,$key));
        &pshufb ("xmm2","xmm0");
        &movdqa ("xmm0",&QWP($k_ipt+16,$const));
-       &pshufb ("xmm0","xmm1");
        &pxor   ("xmm2","xmm5");
-       &pxor   ("xmm0","xmm2");
+       &psrld  ("xmm1",4);
        &add    ($key,16);
+       &pshufb ("xmm0","xmm1");
        &lea    ($base,&DWP($k_mc_backward,$const));
+       &pxor   ("xmm0","xmm2");
        &jmp    (&label("enc_entry"));
 
 
 &set_label("enc_loop",16);
        # middle of middle round
        &movdqa ("xmm4",&QWP($k_sb1,$const));   # 4 : sb1u
-       &pshufb ("xmm4","xmm2");                # 4 = sb1u
-       &pxor   ("xmm4","xmm5");                # 4 = sb1u + k
        &movdqa ("xmm0",&QWP($k_sb1+16,$const));# 0 : sb1t
+       &pshufb ("xmm4","xmm2");                # 4 = sb1u
        &pshufb ("xmm0","xmm3");                # 0 = sb1t
-       &pxor   ("xmm0","xmm4");                # 0 = A
+       &pxor   ("xmm4","xmm5");                # 4 = sb1u + k
        &movdqa ("xmm5",&QWP($k_sb2,$const));   # 4 : sb2u
-       &pshufb ("xmm5","xmm2");                # 4 = sb2u
+       &pxor   ("xmm0","xmm4");                # 0 = A
        &movdqa ("xmm1",&QWP(-0x40,$base,$magic));# .Lk_mc_forward[]
+       &pshufb ("xmm5","xmm2");                # 4 = sb2u
        &movdqa ("xmm2",&QWP($k_sb2+16,$const));# 2 : sb2t
-       &pshufb ("xmm2","xmm3");                # 2 = sb2t
-       &pxor   ("xmm2","xmm5");                # 2 = 2A
        &movdqa ("xmm4",&QWP(0,$base,$magic));  # .Lk_mc_backward[]
+       &pshufb ("xmm2","xmm3");                # 2 = sb2t
        &movdqa ("xmm3","xmm0");                # 3 = A
+       &pxor   ("xmm2","xmm5");                # 2 = 2A
        &pshufb ("xmm0","xmm1");                # 0 = B
        &add    ($key,16);                      # next key
        &pxor   ("xmm0","xmm2");                # 0 = 2A+B
@@ -220,30 +221,30 @@ $k_dsbo=0x2c0;            # decryption sbox final output
        &pxor   ("xmm3","xmm0");                # 3 = 2A+B+D
        &pshufb ("xmm0","xmm1");                # 0 = 2B+C
        &and    ($magic,0x30);                  # ... mod 4
-       &pxor   ("xmm0","xmm3");                # 0 = 2A+3B+C+D
        &sub    ($round,1);                     # nr--
+       &pxor   ("xmm0","xmm3");                # 0 = 2A+3B+C+D
 
 &set_label("enc_entry");
        # top of round
        &movdqa ("xmm1","xmm6");                # 1 : i
+       &movdqa ("xmm5",&QWP($k_inv+16,$const));# 2 : a/k
        &pandn  ("xmm1","xmm0");                # 1 = i<<4
        &psrld  ("xmm1",4);                     # 1 = i
        &pand   ("xmm0","xmm6");                # 0 = k
-       &movdqa ("xmm5",&QWP($k_inv+16,$const));# 2 : a/k
        &pshufb ("xmm5","xmm0");                # 2 = a/k
-       &pxor   ("xmm0","xmm1");                # 0 = j
        &movdqa ("xmm3","xmm7");                # 3 : 1/i
+       &pxor   ("xmm0","xmm1");                # 0 = j
        &pshufb ("xmm3","xmm1");                # 3 = 1/i
-       &pxor   ("xmm3","xmm5");                # 3 = iak = 1/i + a/k
        &movdqa ("xmm4","xmm7");                # 4 : 1/j
+       &pxor   ("xmm3","xmm5");                # 3 = iak = 1/i + a/k
        &pshufb ("xmm4","xmm0");                # 4 = 1/j
-       &pxor   ("xmm4","xmm5");                # 4 = jak = 1/j + a/k
        &movdqa ("xmm2","xmm7");                # 2 : 1/iak
+       &pxor   ("xmm4","xmm5");                # 4 = jak = 1/j + a/k
        &pshufb ("xmm2","xmm3");                # 2 = 1/iak
-       &pxor   ("xmm2","xmm0");                # 2 = io
        &movdqa ("xmm3","xmm7");                # 3 : 1/jak
-       &movdqu ("xmm5",&QWP(0,$key));
+       &pxor   ("xmm2","xmm0");                # 2 = io
        &pshufb ("xmm3","xmm4");                # 3 = 1/jak
+       &movdqu ("xmm5",&QWP(0,$key));
        &pxor   ("xmm3","xmm1");                # 3 = jo
        &jnz    (&label("enc_loop"));
 
@@ -265,8 +266,8 @@ $k_dsbo=0x2c0;              # decryption sbox final output
 ##  Same API as encryption core.
 ##
 &function_begin_B("_vpaes_decrypt_core");
-       &mov    ($round,&DWP(240,$key));
        &lea    ($base,&DWP($k_dsbd,$const));
+       &mov    ($round,&DWP(240,$key));
        &movdqa ("xmm1","xmm6");
        &movdqa ("xmm2",&QWP($k_dipt-$k_dsbd,$base));
        &pandn  ("xmm1","xmm0");
@@ -292,62 +293,61 @@ $k_dsbo=0x2c0;            # decryption sbox final output
 ##  Inverse mix columns
 ##
        &movdqa ("xmm4",&QWP(-0x20,$base));     # 4 : sb9u
+       &movdqa ("xmm1",&QWP(-0x10,$base));     # 0 : sb9t
        &pshufb ("xmm4","xmm2");                # 4 = sb9u
-       &pxor   ("xmm4","xmm0");
-       &movdqa ("xmm0",&QWP(-0x10,$base));     # 0 : sb9t
-       &pshufb ("xmm0","xmm3");                # 0 = sb9t
-       &pxor   ("xmm0","xmm4");                # 0 = ch
-       &add    ($key,16);                      # next round key
-
-       &pshufb ("xmm0","xmm5");                # MC ch
+       &pshufb ("xmm1","xmm3");                # 0 = sb9t
+       &pxor   ("xmm0","xmm4");
        &movdqa ("xmm4",&QWP(0,$base));         # 4 : sbdu
-       &pshufb ("xmm4","xmm2");                # 4 = sbdu
-       &pxor   ("xmm4","xmm0");                # 4 = ch
-       &movdqa ("xmm0",&QWP(0x10,$base));      # 0 : sbdt
-       &pshufb ("xmm0","xmm3");                # 0 = sbdt
-       &pxor   ("xmm0","xmm4");                # 0 = ch
-       &sub    ($round,1);                     # nr--
+       &pxor   ("xmm0","xmm1");                # 0 = ch
+       &movdqa ("xmm1",&QWP(0x10,$base));      # 0 : sbdt
 
+       &pshufb ("xmm4","xmm2");                # 4 = sbdu
        &pshufb ("xmm0","xmm5");                # MC ch
+       &pshufb ("xmm1","xmm3");                # 0 = sbdt
+       &pxor   ("xmm0","xmm4");                # 4 = ch
        &movdqa ("xmm4",&QWP(0x20,$base));      # 4 : sbbu
-       &pshufb ("xmm4","xmm2");                # 4 = sbbu
-       &pxor   ("xmm4","xmm0");                # 4 = ch
-       &movdqa ("xmm0",&QWP(0x30,$base));      # 0 : sbbt
-       &pshufb ("xmm0","xmm3");                # 0 = sbbt
-       &pxor   ("xmm0","xmm4");                # 0 = ch
+       &pxor   ("xmm0","xmm1");                # 0 = ch
+       &movdqa ("xmm1",&QWP(0x30,$base));      # 0 : sbbt
 
+       &pshufb ("xmm4","xmm2");                # 4 = sbbu
        &pshufb ("xmm0","xmm5");                # MC ch
+       &pshufb ("xmm1","xmm3");                # 0 = sbbt
+       &pxor   ("xmm0","xmm4");                # 4 = ch
        &movdqa ("xmm4",&QWP(0x40,$base));      # 4 : sbeu
-       &pshufb ("xmm4","xmm2");                # 4 = sbeu
-       &pxor   ("xmm4","xmm0");                # 4 = ch
-       &movdqa ("xmm0",&QWP(0x50,$base));      # 0 : sbet
-       &pshufb ("xmm0","xmm3");                # 0 = sbet
-       &pxor   ("xmm0","xmm4");                # 0 = ch
+       &pxor   ("xmm0","xmm1");                # 0 = ch
+       &movdqa ("xmm1",&QWP(0x50,$base));      # 0 : sbet
 
+       &pshufb ("xmm4","xmm2");                # 4 = sbeu
+       &pshufb ("xmm0","xmm5");                # MC ch
+       &pshufb ("xmm1","xmm3");                # 0 = sbet
+       &pxor   ("xmm0","xmm4");                # 4 = ch
+       &add    ($key,16);                      # next round key
        &palignr("xmm5","xmm5",12);
+       &pxor   ("xmm0","xmm1");                # 0 = ch
+       &sub    ($round,1);                     # nr--
 
 &set_label("dec_entry");
        # top of round
        &movdqa ("xmm1","xmm6");                # 1 : i
+       &movdqa ("xmm2",&QWP($k_inv+16,$const));# 2 : a/k
        &pandn  ("xmm1","xmm0");                # 1 = i<<4
-       &psrld  ("xmm1",4);                     # 1 = i
        &pand   ("xmm0","xmm6");                # 0 = k
-       &movdqa ("xmm2",&QWP($k_inv+16,$const));# 2 : a/k
+       &psrld  ("xmm1",4);                     # 1 = i
        &pshufb ("xmm2","xmm0");                # 2 = a/k
-       &pxor   ("xmm0","xmm1");                # 0 = j
        &movdqa ("xmm3","xmm7");                # 3 : 1/i
+       &pxor   ("xmm0","xmm1");                # 0 = j
        &pshufb ("xmm3","xmm1");                # 3 = 1/i
-       &pxor   ("xmm3","xmm2");                # 3 = iak = 1/i + a/k
        &movdqa ("xmm4","xmm7");                # 4 : 1/j
+       &pxor   ("xmm3","xmm2");                # 3 = iak = 1/i + a/k
        &pshufb ("xmm4","xmm0");                # 4 = 1/j
        &pxor   ("xmm4","xmm2");                # 4 = jak = 1/j + a/k
        &movdqa ("xmm2","xmm7");                # 2 : 1/iak
        &pshufb ("xmm2","xmm3");                # 2 = 1/iak
-       &pxor   ("xmm2","xmm0");                # 2 = io
        &movdqa ("xmm3","xmm7");                # 3 : 1/jak
+       &pxor   ("xmm2","xmm0");                # 2 = io
        &pshufb ("xmm3","xmm4");                # 3 = 1/jak
-       &pxor   ("xmm3","xmm1");                # 3 = jo
        &movdqu ("xmm0",&QWP(0,$key));
+       &pxor   ("xmm3","xmm1");                # 3 = jo
        &jnz    (&label("dec_loop"));
 
        # middle of last round
@@ -542,12 +542,12 @@ $k_dsbo=0x2c0;            # decryption sbox final output
 ##    %xmm0: b+c+d  b+c  b  a
 ##
 &function_begin_B("_vpaes_schedule_192_smear");
-       &pshufd ("xmm0","xmm6",0x80);           # d c 0 0 -> c 0 0 0
-       &pxor   ("xmm6","xmm0");                # -> c+d c 0 0
+       &pshufd ("xmm1","xmm6",0x80);           # d c 0 0 -> c 0 0 0
        &pshufd ("xmm0","xmm7",0xFE);           # b a _ _ -> b b b a
+       &pxor   ("xmm6","xmm1");                # -> c+d c 0 0
+       &pxor   ("xmm1","xmm1");
        &pxor   ("xmm6","xmm0");                # -> b+c+d b+c b a
        &movdqa ("xmm0","xmm6");
-       &pxor   ("xmm1","xmm1");
        &movhlps("xmm6","xmm1");                # clobber low side with zeros
        &ret    ();
 &function_end_B("_vpaes_schedule_192_smear");
index bd7f45b..f2ef318 100644 (file)
 #
 #              aes-x86_64.pl           vpaes-x86_64.pl
 #
-# Core 2(**)   30.5/43.7/14.3          21.8/25.7(***)
-# Nehalem      30.5/42.2/14.6           9.8/11.8
-# Atom         63.9/79.0/32.1          64.0/84.8(***)
+# Core 2(**)   29.6/41.1/14.3          21.9/25.2(***)
+# Nehalem      29.6/40.3/14.6          10.0/11.8
+# Atom         57.3/74.2/32.1          60.9/77.2(***)
+# Silvermont   52.7/64.0/19.5          48.8/60.8(***)
 #
 # (*)  "Hyper-threading" in the context refers rather to cache shared
 #      among multiple cores, than to specifically Intel HTT. As vast
@@ -40,7 +41,7 @@
 # (**) "Core 2" refers to initial 65nm design, a.k.a. Conroe.
 #
 # (***)        Less impressive improvement on Core 2 and Atom is due to slow
-#      pshufb, yet it's respectable +40%/78% improvement on Core 2
+#      pshufb, yet it's respectable +36%/62% improvement on Core 2
 #      (as implied, over "hyper-threading-safe" code path).
 #
 #                                              <appro@openssl.org>
@@ -95,8 +96,8 @@ _vpaes_encrypt_core:
        movdqa  .Lk_ipt+16(%rip), %xmm0 # ipthi
        pshufb  %xmm1,  %xmm0
        pxor    %xmm5,  %xmm2
-       pxor    %xmm2,  %xmm0
        add     \$16,   %r9
+       pxor    %xmm2,  %xmm0
        lea     .Lk_mc_backward(%rip),%r10
        jmp     .Lenc_entry
 
@@ -104,19 +105,19 @@ _vpaes_encrypt_core:
 .Lenc_loop:
        # middle of middle round
        movdqa  %xmm13, %xmm4   # 4 : sb1u
-       pshufb  %xmm2,  %xmm4   # 4 = sb1u
-       pxor    %xmm5,  %xmm4   # 4 = sb1u + k
        movdqa  %xmm12, %xmm0   # 0 : sb1t
+       pshufb  %xmm2,  %xmm4   # 4 = sb1u
        pshufb  %xmm3,  %xmm0   # 0 = sb1t
-       pxor    %xmm4,  %xmm0   # 0 = A
+       pxor    %xmm5,  %xmm4   # 4 = sb1u + k
        movdqa  %xmm15, %xmm5   # 4 : sb2u
-       pshufb  %xmm2,  %xmm5   # 4 = sb2u
+       pxor    %xmm4,  %xmm0   # 0 = A
        movdqa  -0x40(%r11,%r10), %xmm1         # .Lk_mc_forward[]
+       pshufb  %xmm2,  %xmm5   # 4 = sb2u
+       movdqa  (%r11,%r10), %xmm4              # .Lk_mc_backward[]
        movdqa  %xmm14, %xmm2   # 2 : sb2t
        pshufb  %xmm3,  %xmm2   # 2 = sb2t
-       pxor    %xmm5,  %xmm2   # 2 = 2A
-       movdqa  (%r11,%r10), %xmm4              # .Lk_mc_backward[]
        movdqa  %xmm0,  %xmm3   # 3 = A
+       pxor    %xmm5,  %xmm2   # 2 = 2A
        pshufb  %xmm1,  %xmm0   # 0 = B
        add     \$16,   %r9     # next key
        pxor    %xmm2,  %xmm0   # 0 = 2A+B
@@ -125,30 +126,30 @@ _vpaes_encrypt_core:
        pxor    %xmm0,  %xmm3   # 3 = 2A+B+D
        pshufb  %xmm1,  %xmm0   # 0 = 2B+C
        and     \$0x30, %r11    # ... mod 4
-       pxor    %xmm3,  %xmm0   # 0 = 2A+3B+C+D
        sub     \$1,%rax        # nr--
+       pxor    %xmm3,  %xmm0   # 0 = 2A+3B+C+D
 
 .Lenc_entry:
        # top of round
        movdqa  %xmm9,  %xmm1   # 1 : i
+       movdqa  %xmm11, %xmm5   # 2 : a/k
        pandn   %xmm0,  %xmm1   # 1 = i<<4
        psrld   \$4,    %xmm1   # 1 = i
        pand    %xmm9,  %xmm0   # 0 = k
-       movdqa  %xmm11, %xmm5   # 2 : a/k
        pshufb  %xmm0,  %xmm5   # 2 = a/k
-       pxor    %xmm1,  %xmm0   # 0 = j
        movdqa  %xmm10, %xmm3   # 3 : 1/i
+       pxor    %xmm1,  %xmm0   # 0 = j
        pshufb  %xmm1,  %xmm3   # 3 = 1/i
-       pxor    %xmm5,  %xmm3   # 3 = iak = 1/i + a/k
        movdqa  %xmm10, %xmm4   # 4 : 1/j
+       pxor    %xmm5,  %xmm3   # 3 = iak = 1/i + a/k
        pshufb  %xmm0,  %xmm4   # 4 = 1/j
-       pxor    %xmm5,  %xmm4   # 4 = jak = 1/j + a/k
        movdqa  %xmm10, %xmm2   # 2 : 1/iak
+       pxor    %xmm5,  %xmm4   # 4 = jak = 1/j + a/k
        pshufb  %xmm3,  %xmm2   # 2 = 1/iak
-       pxor    %xmm0,  %xmm2   # 2 = io
        movdqa  %xmm10, %xmm3   # 3 : 1/jak
-       movdqu  (%r9),  %xmm5
+       pxor    %xmm0,  %xmm2   # 2 = io
        pshufb  %xmm4,  %xmm3   # 3 = 1/jak
+       movdqu  (%r9),  %xmm5
        pxor    %xmm1,  %xmm3   # 3 = jo
        jnz     .Lenc_loop
 
@@ -201,62 +202,61 @@ _vpaes_decrypt_core:
 ##  Inverse mix columns
 ##
        movdqa  -0x20(%r10),%xmm4       # 4 : sb9u
+       movdqa  -0x10(%r10),%xmm1       # 0 : sb9t
        pshufb  %xmm2,  %xmm4           # 4 = sb9u
-       pxor    %xmm0,  %xmm4
-       movdqa  -0x10(%r10),%xmm0       # 0 : sb9t
-       pshufb  %xmm3,  %xmm0           # 0 = sb9t
-       pxor    %xmm4,  %xmm0           # 0 = ch
-       add     \$16, %r9               # next round key
-
-       pshufb  %xmm5,  %xmm0           # MC ch
+       pshufb  %xmm3,  %xmm1           # 0 = sb9t
+       pxor    %xmm4,  %xmm0
        movdqa  0x00(%r10),%xmm4        # 4 : sbdu
+       pxor    %xmm1,  %xmm0           # 0 = ch
+       movdqa  0x10(%r10),%xmm1        # 0 : sbdt
+
        pshufb  %xmm2,  %xmm4           # 4 = sbdu
-       pxor    %xmm0,  %xmm4           # 4 = ch
-       movdqa  0x10(%r10),%xmm0        # 0 : sbdt
-       pshufb  %xmm3,  %xmm0           # 0 = sbdt
-       pxor    %xmm4,  %xmm0           # 0 = ch
-       sub     \$1,%rax                # nr--
-       
        pshufb  %xmm5,  %xmm0           # MC ch
+       pshufb  %xmm3,  %xmm1           # 0 = sbdt
+       pxor    %xmm4,  %xmm0           # 4 = ch
        movdqa  0x20(%r10),%xmm4        # 4 : sbbu
+       pxor    %xmm1,  %xmm0           # 0 = ch
+       movdqa  0x30(%r10),%xmm1        # 0 : sbbt
+
        pshufb  %xmm2,  %xmm4           # 4 = sbbu
-       pxor    %xmm0,  %xmm4           # 4 = ch
-       movdqa  0x30(%r10),%xmm0        # 0 : sbbt
-       pshufb  %xmm3,  %xmm0           # 0 = sbbt
-       pxor    %xmm4,  %xmm0           # 0 = ch
-       
        pshufb  %xmm5,  %xmm0           # MC ch
+       pshufb  %xmm3,  %xmm1           # 0 = sbbt
+       pxor    %xmm4,  %xmm0           # 4 = ch
        movdqa  0x40(%r10),%xmm4        # 4 : sbeu
-       pshufb  %xmm2,  %xmm4           # 4 = sbeu
-       pxor    %xmm0,  %xmm4           # 4 = ch
-       movdqa  0x50(%r10),%xmm0        # 0 : sbet
-       pshufb  %xmm3,  %xmm0           # 0 = sbet
-       pxor    %xmm4,  %xmm0           # 0 = ch
+       pxor    %xmm1,  %xmm0           # 0 = ch
+       movdqa  0x50(%r10),%xmm1        # 0 : sbet
 
+       pshufb  %xmm2,  %xmm4           # 4 = sbeu
+       pshufb  %xmm5,  %xmm0           # MC ch
+       pshufb  %xmm3,  %xmm1           # 0 = sbet
+       pxor    %xmm4,  %xmm0           # 4 = ch
+       add     \$16, %r9               # next round key
        palignr \$12,   %xmm5,  %xmm5
-       
+       pxor    %xmm1,  %xmm0           # 0 = ch
+       sub     \$1,%rax                # nr--
+
 .Ldec_entry:
        # top of round
        movdqa  %xmm9,  %xmm1   # 1 : i
        pandn   %xmm0,  %xmm1   # 1 = i<<4
+       movdqa  %xmm11, %xmm2   # 2 : a/k
        psrld   \$4,    %xmm1   # 1 = i
        pand    %xmm9,  %xmm0   # 0 = k
-       movdqa  %xmm11, %xmm2   # 2 : a/k
        pshufb  %xmm0,  %xmm2   # 2 = a/k
-       pxor    %xmm1,  %xmm0   # 0 = j
        movdqa  %xmm10, %xmm3   # 3 : 1/i
+       pxor    %xmm1,  %xmm0   # 0 = j
        pshufb  %xmm1,  %xmm3   # 3 = 1/i
-       pxor    %xmm2,  %xmm3   # 3 = iak = 1/i + a/k
        movdqa  %xmm10, %xmm4   # 4 : 1/j
+       pxor    %xmm2,  %xmm3   # 3 = iak = 1/i + a/k
        pshufb  %xmm0,  %xmm4   # 4 = 1/j
        pxor    %xmm2,  %xmm4   # 4 = jak = 1/j + a/k
        movdqa  %xmm10, %xmm2   # 2 : 1/iak
        pshufb  %xmm3,  %xmm2   # 2 = 1/iak
-       pxor    %xmm0,  %xmm2   # 2 = io
        movdqa  %xmm10, %xmm3   # 3 : 1/jak
+       pxor    %xmm0,  %xmm2   # 2 = io
        pshufb  %xmm4,  %xmm3   # 3 = 1/jak
-       pxor    %xmm1,  %xmm3   # 3 = jo
        movdqu  (%r9),  %xmm0
+       pxor    %xmm1,  %xmm3   # 3 = jo
        jnz     .Ldec_loop
 
        # middle of last round
@@ -464,12 +464,12 @@ _vpaes_schedule_core:
 .type  _vpaes_schedule_192_smear,\@abi-omnipotent
 .align 16
 _vpaes_schedule_192_smear:
-       pshufd  \$0x80, %xmm6,  %xmm0   # d c 0 0 -> c 0 0 0
-       pxor    %xmm0,  %xmm6           # -> c+d c 0 0
+       pshufd  \$0x80, %xmm6,  %xmm1   # d c 0 0 -> c 0 0 0
        pshufd  \$0xFE, %xmm7,  %xmm0   # b a _ _ -> b b b a
+       pxor    %xmm1,  %xmm6           # -> c+d c 0 0
+       pxor    %xmm1,  %xmm1
        pxor    %xmm0,  %xmm6           # -> b+c+d b+c b a
        movdqa  %xmm6,  %xmm0
-       pxor    %xmm1,  %xmm1
        movhlps %xmm1,  %xmm6           # clobber low side with zeros
        ret
 .size  _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
diff --git a/crypto/arm64cpuid.S b/crypto/arm64cpuid.S
new file mode 100644 (file)
index 0000000..4778ac1
--- /dev/null
@@ -0,0 +1,46 @@
+#include "arm_arch.h"
+
+.text
+.arch  armv8-a+crypto
+
+.align 5
+.global        _armv7_neon_probe
+.type  _armv7_neon_probe,%function
+_armv7_neon_probe:
+       orr     v15.16b, v15.16b, v15.16b
+       ret
+.size  _armv7_neon_probe,.-_armv7_neon_probe
+
+.global        _armv7_tick
+.type  _armv7_tick,%function
+_armv7_tick:
+       mrs     x0, CNTVCT_EL0
+       ret
+.size  _armv7_tick,.-_armv7_tick
+
+.global        _armv8_aes_probe
+.type  _armv8_aes_probe,%function
+_armv8_aes_probe:
+       aese    v0.16b, v0.16b
+       ret
+.size  _armv8_aes_probe,.-_armv8_aes_probe
+
+.global        _armv8_sha1_probe
+.type  _armv8_sha1_probe,%function
+_armv8_sha1_probe:
+       sha1h   s0, s0
+       ret
+.size  _armv8_sha1_probe,.-_armv8_sha1_probe
+
+.global        _armv8_sha256_probe
+.type  _armv8_sha256_probe,%function
+_armv8_sha256_probe:
+       sha256su0       v0.4s, v0.4s
+       ret
+.size  _armv8_sha256_probe,.-_armv8_sha256_probe
+.global        _armv8_pmull_probe
+.type  _armv8_pmull_probe,%function
+_armv8_pmull_probe:
+       pmull   v0.1q, v0.1d, v0.1d
+       ret
+.size  _armv8_pmull_probe,.-_armv8_pmull_probe
index b654371..9d6e588 100644 (file)
 #    define __ARMEL__
 #   endif
 #  elif defined(__GNUC__)
+#   if   defined(__aarch64__)
+#    define __ARM_ARCH__ 8
+#    if __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
+#     define __ARMEB__
+#    else
+#     define __ARMEL__
+#    endif
   /*
    * Why doesn't gcc define __ARM_ARCH__? Instead it defines
    * bunch of below macros. See all_architectires[] table in
    * gcc/config/arm/arm.c. On a side note it defines
    * __ARMEL__/__ARMEB__ for little-/big-endian.
    */
-#   if   defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)     || \
+#   elif defined(__ARM_ARCH)
+#    define __ARM_ARCH__ __ARM_ARCH
+#   elif defined(__ARM_ARCH_8A__)
+#    define __ARM_ARCH__ 8
+#   elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)     || \
         defined(__ARM_ARCH_7R__)|| defined(__ARM_ARCH_7M__)     || \
         defined(__ARM_ARCH_7EM__)
 #    define __ARM_ARCH__ 7
 #  include <openssl/fipssyms.h>
 # endif
 
+# if !defined(__ARM_MAX_ARCH__)
+#  define __ARM_MAX_ARCH__ __ARM_ARCH__
+# endif
+
+# if __ARM_MAX_ARCH__<__ARM_ARCH__
+#  error "__ARM_MAX_ARCH__ can't be less than __ARM_ARCH__"
+# elif __ARM_MAX_ARCH__!=__ARM_ARCH__
+#  if __ARM_ARCH__<7 && __ARM_MAX_ARCH__>=7 && defined(__ARMEB__)
+#   error "can't build universal big-endian binary"
+#  endif
+# endif
+
 # if !__ASSEMBLER__
 extern unsigned int OPENSSL_armcap_P;
-
-#  define ARMV7_NEON      (1<<0)
-#  define ARMV7_TICK      (1<<1)
 # endif
 
+# define ARMV7_NEON      (1<<0)
+# define ARMV7_TICK      (1<<1)
+# define ARMV8_AES       (1<<2)
+# define ARMV8_SHA1      (1<<3)
+# define ARMV8_SHA256    (1<<4)
+# define ARMV8_PMULL     (1<<5)
+
 #endif
index 28522ea..356fa15 100644 (file)
@@ -7,8 +7,18 @@
 
 #include "arm_arch.h"
 
-unsigned int OPENSSL_armcap_P;
+unsigned int OPENSSL_armcap_P = 0;
 
+#if __ARM_MAX_ARCH__<7
+void OPENSSL_cpuid_setup(void)
+{
+}
+
+unsigned long OPENSSL_rdtsc(void)
+{
+    return 0;
+}
+#else
 static sigset_t all_masked;
 
 static sigjmp_buf ill_jmp;
@@ -22,9 +32,13 @@ static void ill_handler(int sig)
  * ARM compilers support inline assembler...
  */
 void _armv7_neon_probe(void);
-unsigned int _armv7_tick(void);
+void _armv8_aes_probe(void);
+void _armv8_sha1_probe(void);
+void _armv8_sha256_probe(void);
+void _armv8_pmull_probe(void);
+unsigned long _armv7_tick(void);
 
-unsigned int OPENSSL_rdtsc(void)
+unsigned long OPENSSL_rdtsc(void)
 {
     if (OPENSSL_armcap_P & ARMV7_TICK)
         return _armv7_tick();
@@ -32,9 +46,44 @@ unsigned int OPENSSL_rdtsc(void)
         return 0;
 }
 
-#if defined(__GNUC__) && __GNUC__>=2
+/*
+ * Use a weak reference to getauxval() so we can use it if it is available but
+ * don't break the build if it is not.
+ */
+# if defined(__GNUC__) && __GNUC__>=2
 void OPENSSL_cpuid_setup(void) __attribute__ ((constructor));
-#endif
+extern unsigned long getauxval(unsigned long type) __attribute__ ((weak));
+# else
+static unsigned long (*getauxval) (unsigned long) = NULL;
+# endif
+
+/*
+ * ARM puts the the feature bits for Crypto Extensions in AT_HWCAP2, whereas
+ * AArch64 used AT_HWCAP.
+ */
+# if defined(__arm__) || defined (__arm)
+#  define HWCAP                  16
+                                  /* AT_HWCAP */
+#  define HWCAP_NEON             (1 << 12)
+
+#  define HWCAP_CE               26
+                                  /* AT_HWCAP2 */
+#  define HWCAP_CE_AES           (1 << 0)
+#  define HWCAP_CE_PMULL         (1 << 1)
+#  define HWCAP_CE_SHA1          (1 << 2)
+#  define HWCAP_CE_SHA256        (1 << 3)
+# elif defined(__aarch64__)
+#  define HWCAP                  16
+                                  /* AT_HWCAP */
+#  define HWCAP_NEON             (1 << 1)
+
+#  define HWCAP_CE               HWCAP
+#  define HWCAP_CE_AES           (1 << 3)
+#  define HWCAP_CE_PMULL         (1 << 4)
+#  define HWCAP_CE_SHA1          (1 << 5)
+#  define HWCAP_CE_SHA256        (1 << 6)
+# endif
+
 void OPENSSL_cpuid_setup(void)
 {
     char *e;
@@ -47,7 +96,7 @@ void OPENSSL_cpuid_setup(void)
     trigger = 1;
 
     if ((e = getenv("OPENSSL_armcap"))) {
-        OPENSSL_armcap_P = strtoul(e, NULL, 0);
+        OPENSSL_armcap_P = (unsigned int)strtoul(e, NULL, 0);
         return;
     }
 
@@ -67,9 +116,42 @@ void OPENSSL_cpuid_setup(void)
     sigprocmask(SIG_SETMASK, &ill_act.sa_mask, &oset);
     sigaction(SIGILL, &ill_act, &ill_oact);
 
-    if (sigsetjmp(ill_jmp, 1) == 0) {
+    if (getauxval != NULL) {
+        if (getauxval(HWCAP) & HWCAP_NEON) {
+            unsigned long hwcap = getauxval(HWCAP_CE);
+
+            OPENSSL_armcap_P |= ARMV7_NEON;
+
+            if (hwcap & HWCAP_CE_AES)
+                OPENSSL_armcap_P |= ARMV8_AES;
+
+            if (hwcap & HWCAP_CE_PMULL)
+                OPENSSL_armcap_P |= ARMV8_PMULL;
+
+            if (hwcap & HWCAP_CE_SHA1)
+                OPENSSL_armcap_P |= ARMV8_SHA1;
+
+            if (hwcap & HWCAP_CE_SHA256)
+                OPENSSL_armcap_P |= ARMV8_SHA256;
+        }
+    } else if (sigsetjmp(ill_jmp, 1) == 0) {
         _armv7_neon_probe();
         OPENSSL_armcap_P |= ARMV7_NEON;
+        if (sigsetjmp(ill_jmp, 1) == 0) {
+            _armv8_pmull_probe();
+            OPENSSL_armcap_P |= ARMV8_PMULL | ARMV8_AES;
+        } else if (sigsetjmp(ill_jmp, 1) == 0) {
+            _armv8_aes_probe();
+            OPENSSL_armcap_P |= ARMV8_AES;
+        }
+        if (sigsetjmp(ill_jmp, 1) == 0) {
+            _armv8_sha1_probe();
+            OPENSSL_armcap_P |= ARMV8_SHA1;
+        }
+        if (sigsetjmp(ill_jmp, 1) == 0) {
+            _armv8_sha256_probe();
+            OPENSSL_armcap_P |= ARMV8_SHA256;
+        }
     }
     if (sigsetjmp(ill_jmp, 1) == 0) {
         _armv7_tick();
@@ -79,3 +161,4 @@ void OPENSSL_cpuid_setup(void)
     sigaction(SIGILL, &ill_oact, NULL);
     sigprocmask(SIG_SETMASK, &oset, NULL);
 }
+#endif
index 2d618de..65010ae 100644 (file)
@@ -4,20 +4,6 @@
 .code  32
 
 .align 5
-.global        _armv7_neon_probe
-.type  _armv7_neon_probe,%function
-_armv7_neon_probe:
-       .word   0xf26ee1fe      @ vorr  q15,q15,q15
-       .word   0xe12fff1e      @ bx    lr
-.size  _armv7_neon_probe,.-_armv7_neon_probe
-
-.global        _armv7_tick
-.type  _armv7_tick,%function
-_armv7_tick:
-       mrc     p15,0,r0,c9,c13,0
-       .word   0xe12fff1e      @ bx    lr
-.size  _armv7_tick,.-_armv7_tick
-
 .global        OPENSSL_atomic_add
 .type  OPENSSL_atomic_add,%function
 OPENSSL_atomic_add:
@@ -28,7 +14,7 @@ OPENSSL_atomic_add:
        cmp     r2,#0
        bne     .Ladd
        mov     r0,r3
-       .word   0xe12fff1e      @ bx    lr
+       bx      lr
 #else
        stmdb   sp!,{r4-r6,lr}
        ldr     r2,.Lspinlock
@@ -81,62 +67,131 @@ OPENSSL_cleanse:
        adds    r1,r1,#4
        bne     .Little
 .Lcleanse_done:
+#if __ARM_ARCH__>=5
+       bx      lr
+#else
        tst     lr,#1
        moveq   pc,lr
        .word   0xe12fff1e      @ bx    lr
+#endif
 .size  OPENSSL_cleanse,.-OPENSSL_cleanse
 
+#if __ARM_MAX_ARCH__>=7
+.arch  armv7-a
+.fpu   neon
+
+.align 5
+.global        _armv7_neon_probe
+.type  _armv7_neon_probe,%function
+_armv7_neon_probe:
+       vorr    q0,q0,q0
+       bx      lr
+.size  _armv7_neon_probe,.-_armv7_neon_probe
+
+.global        _armv7_tick
+.type  _armv7_tick,%function
+_armv7_tick:
+       mrrc    p15,1,r0,r1,c14         @ CNTVCT
+       bx      lr
+.size  _armv7_tick,.-_armv7_tick
+
+.global        _armv8_aes_probe
+.type  _armv8_aes_probe,%function
+_armv8_aes_probe:
+       .byte   0x00,0x03,0xb0,0xf3     @ aese.8        q0,q0
+       bx      lr
+.size  _armv8_aes_probe,.-_armv8_aes_probe
+
+.global        _armv8_sha1_probe
+.type  _armv8_sha1_probe,%function
+_armv8_sha1_probe:
+       .byte   0x40,0x0c,0x00,0xf2     @ sha1c.32      q0,q0,q0
+       bx      lr
+.size  _armv8_sha1_probe,.-_armv8_sha1_probe
+
+.global        _armv8_sha256_probe
+.type  _armv8_sha256_probe,%function
+_armv8_sha256_probe:
+       .byte   0x40,0x0c,0x00,0xf3     @ sha256h.32    q0,q0,q0
+       bx      lr
+.size  _armv8_sha256_probe,.-_armv8_sha256_probe
+.global        _armv8_pmull_probe
+.type  _armv8_pmull_probe,%function
+_armv8_pmull_probe:
+       .byte   0x00,0x0e,0xa0,0xf2     @ vmull.p64     q0,d0,d0
+       bx      lr
+.size  _armv8_pmull_probe,.-_armv8_pmull_probe
+#endif
+
 .global        OPENSSL_wipe_cpu
 .type  OPENSSL_wipe_cpu,%function
 OPENSSL_wipe_cpu:
+#if __ARM_MAX_ARCH__>=7
        ldr     r0,.LOPENSSL_armcap
        adr     r1,.LOPENSSL_armcap
        ldr     r0,[r1,r0]
+#endif
        eor     r2,r2,r2
        eor     r3,r3,r3
        eor     ip,ip,ip
+#if __ARM_MAX_ARCH__>=7
        tst     r0,#1
        beq     .Lwipe_done
-       .word   0xf3000150      @ veor    q0, q0, q0
-       .word   0xf3022152      @ veor    q1, q1, q1
-       .word   0xf3044154      @ veor    q2, q2, q2
-       .word   0xf3066156      @ veor    q3, q3, q3
-       .word   0xf34001f0      @ veor    q8, q8, q8
-       .word   0xf34221f2      @ veor    q9, q9, q9
-       .word   0xf34441f4      @ veor    q10, q10, q10
-       .word   0xf34661f6      @ veor    q11, q11, q11
-       .word   0xf34881f8      @ veor    q12, q12, q12
-       .word   0xf34aa1fa      @ veor    q13, q13, q13
-       .word   0xf34cc1fc      @ veor    q14, q14, q14
-       .word   0xf34ee1fe      @ veor    q15, q15, q15
+       veor    q0, q0, q0
+       veor    q1, q1, q1
+       veor    q2, q2, q2
+       veor    q3, q3, q3
+       veor    q8, q8, q8
+       veor    q9, q9, q9
+       veor    q10, q10, q10
+       veor    q11, q11, q11
+       veor    q12, q12, q12
+       veor    q13, q13, q13
+       veor    q14, q14, q14
+       veor    q15, q15, q15
 .Lwipe_done:
+#endif
        mov     r0,sp
+#if __ARM_ARCH__>=5
+       bx      lr
+#else
        tst     lr,#1
        moveq   pc,lr
        .word   0xe12fff1e      @ bx    lr
+#endif
 .size  OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
 
 .global        OPENSSL_instrument_bus
 .type  OPENSSL_instrument_bus,%function
 OPENSSL_instrument_bus:
        eor     r0,r0,r0
+#if __ARM_ARCH__>=5
+       bx      lr
+#else
        tst     lr,#1
        moveq   pc,lr
        .word   0xe12fff1e      @ bx    lr
+#endif
 .size  OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
 
 .global        OPENSSL_instrument_bus2
 .type  OPENSSL_instrument_bus2,%function
 OPENSSL_instrument_bus2:
        eor     r0,r0,r0
+#if __ARM_ARCH__>=5
+       bx      lr
+#else
        tst     lr,#1
        moveq   pc,lr
        .word   0xe12fff1e      @ bx    lr
+#endif
 .size  OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
 
 .align 5
+#if __ARM_MAX_ARCH__>=7
 .LOPENSSL_armcap:
 .word  OPENSSL_armcap_P-.LOPENSSL_armcap
+#endif
 #if __ARM_ARCH__>=6
 .align 5
 #else
index d774f78..330fe81 100644 (file)
@@ -176,7 +176,7 @@ a_gentm.o: ../../include/openssl/err.h ../../include/openssl/lhash.h
 a_gentm.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
 a_gentm.o: ../../include/openssl/ossl_typ.h ../../include/openssl/safestack.h
 a_gentm.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
-a_gentm.o: ../cryptlib.h ../o_time.h a_gentm.c
+a_gentm.o: ../cryptlib.h ../o_time.h a_gentm.c asn1_locl.h
 a_i2d_fp.o: ../../e_os.h ../../include/openssl/asn1.h
 a_i2d_fp.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h
 a_i2d_fp.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
@@ -277,6 +277,7 @@ a_time.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
 a_time.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
 a_time.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
 a_time.o: ../../include/openssl/symhacks.h ../cryptlib.h ../o_time.h a_time.c
+a_time.o: asn1_locl.h
 a_type.o: ../../e_os.h ../../include/openssl/asn1.h
 a_type.o: ../../include/openssl/asn1t.h ../../include/openssl/bio.h
 a_type.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
@@ -293,7 +294,7 @@ a_utctm.o: ../../include/openssl/err.h ../../include/openssl/lhash.h
 a_utctm.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
 a_utctm.o: ../../include/openssl/ossl_typ.h ../../include/openssl/safestack.h
 a_utctm.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
-a_utctm.o: ../cryptlib.h ../o_time.h a_utctm.c
+a_utctm.o: ../cryptlib.h ../o_time.h a_utctm.c asn1_locl.h
 a_utf8.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.h
 a_utf8.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
 a_utf8.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
index 8b3ef71..fa76dca 100644 (file)
@@ -65,6 +65,7 @@
 #include "cryptlib.h"
 #include "o_time.h"
 #include <openssl/asn1.h>
+#include "asn1_locl.h"
 
 #if 0
 
@@ -117,7 +118,7 @@ ASN1_GENERALIZEDTIME *d2i_ASN1_GENERALIZEDTIME(ASN1_GENERALIZEDTIME **a,
 
 #endif
 
-int ASN1_GENERALIZEDTIME_check(ASN1_GENERALIZEDTIME *d)
+int asn1_generalizedtime_to_tm(struct tm *tm, const ASN1_GENERALIZEDTIME *d)
 {
     static const int min[9] = { 0, 0, 1, 1, 0, 0, 0, 0, 0 };
     static const int max[9] = { 99, 99, 12, 31, 23, 59, 59, 12, 59 };
@@ -139,6 +140,8 @@ int ASN1_GENERALIZEDTIME_check(ASN1_GENERALIZEDTIME *d)
     for (i = 0; i < 7; i++) {
         if ((i == 6) && ((a[o] == 'Z') || (a[o] == '+') || (a[o] == '-'))) {
             i++;
+            if (tm)
+                tm->tm_sec = 0;
             break;
         }
         if ((a[o] < '0') || (a[o] > '9'))
@@ -155,6 +158,31 @@ int ASN1_GENERALIZEDTIME_check(ASN1_GENERALIZEDTIME *d)
 
         if ((n < min[i]) || (n > max[i]))
             goto err;
+        if (tm) {
+            switch (i) {
+            case 0:
+                tm->tm_year = n * 100 - 1900;
+                break;
+            case 1:
+                tm->tm_year += n;
+                break;
+            case 2:
+                tm->tm_mon = n - 1;
+                break;
+            case 3:
+                tm->tm_mday = n;
+                break;
+            case 4:
+                tm->tm_hour = n;
+                break;
+            case 5:
+                tm->tm_min = n;
+                break;
+            case 6:
+                tm->tm_sec = n;
+                break;
+            }
+        }
     }
     /*
      * Optional fractional seconds: decimal point followed by one or more
@@ -174,6 +202,7 @@ int ASN1_GENERALIZEDTIME_check(ASN1_GENERALIZEDTIME *d)
     if (a[o] == 'Z')
         o++;
     else if ((a[o] == '+') || (a[o] == '-')) {
+        int offsign = a[o] == '-' ? -1 : 1, offset = 0;
         o++;
         if (o + 4 > l)
             goto err;
@@ -187,9 +216,17 @@ int ASN1_GENERALIZEDTIME_check(ASN1_GENERALIZEDTIME *d)
             n = (n * 10) + a[o] - '0';
             if ((n < min[i]) || (n > max[i]))
                 goto err;
+            if (tm) {
+                if (i == 7)
+                    offset = n * 3600;
+                else if (i == 8)
+                    offset += n * 60;
+            }
             o++;
         }
-    } else {
+        if (offset && !OPENSSL_gmtime_adj(tm, 0, offset * offsign))
+            return 0;
+    } else if (a[o]) {
         /* Missing time zone information. */
         goto err;
     }
@@ -198,6 +235,11 @@ int ASN1_GENERALIZEDTIME_check(ASN1_GENERALIZEDTIME *d)
     return (0);
 }
 
+int ASN1_GENERALIZEDTIME_check(const ASN1_GENERALIZEDTIME *d)
+{
+    return asn1_generalizedtime_to_tm(NULL, d);
+}
+
 int ASN1_GENERALIZEDTIME_set_string(ASN1_GENERALIZEDTIME *s, const char *str)
 {
     ASN1_GENERALIZEDTIME t;
index c81f0de..fcb2d56 100644 (file)
@@ -66,6 +66,7 @@
 #include "cryptlib.h"
 #include "o_time.h"
 #include <openssl/asn1t.h>
+#include "asn1_locl.h"
 
 IMPLEMENT_ASN1_MSTRING(ASN1_TIME, B_ASN1_TIME)
 
@@ -196,3 +197,32 @@ int ASN1_TIME_set_string(ASN1_TIME *s, const char *str)
 
     return 1;
 }
+
+static int asn1_time_to_tm(struct tm *tm, const ASN1_TIME *t)
+{
+    if (t == NULL) {
+        time_t now_t;
+        time(&now_t);
+        if (OPENSSL_gmtime(&now_t, tm))
+            return 1;
+        return 0;
+    }
+
+    if (t->type == V_ASN1_UTCTIME)
+        return asn1_utctime_to_tm(tm, t);
+    else if (t->type == V_ASN1_GENERALIZEDTIME)
+        return asn1_generalizedtime_to_tm(tm, t);
+
+    return 0;
+}
+
+int ASN1_TIME_diff(int *pday, int *psec,
+                   const ASN1_TIME *from, const ASN1_TIME *to)
+{
+    struct tm tm_from, tm_to;
+    if (!asn1_time_to_tm(&tm_from, from))
+        return 0;
+    if (!asn1_time_to_tm(&tm_to, to))
+        return 0;
+    return OPENSSL_gmtime_diff(pday, psec, &tm_from, &tm_to);
+}
index 179de6d..724a10b 100644 (file)
@@ -61,6 +61,7 @@
 #include "cryptlib.h"
 #include "o_time.h"
 #include <openssl/asn1.h>
+#include "asn1_locl.h"
 
 #if 0
 int i2d_ASN1_UTCTIME(ASN1_UTCTIME *a, unsigned char **pp)
@@ -109,7 +110,7 @@ ASN1_UTCTIME *d2i_ASN1_UTCTIME(ASN1_UTCTIME **a, unsigned char **pp,
 
 #endif
 
-int ASN1_UTCTIME_check(ASN1_UTCTIME *d)
+int asn1_utctime_to_tm(struct tm *tm, const ASN1_UTCTIME *d)
 {
     static const int min[8] = { 0, 1, 1, 0, 0, 0, 0, 0 };
     static const int max[8] = { 99, 12, 31, 23, 59, 59, 12, 59 };
@@ -127,6 +128,8 @@ int ASN1_UTCTIME_check(ASN1_UTCTIME *d)
     for (i = 0; i < 6; i++) {
         if ((i == 5) && ((a[o] == 'Z') || (a[o] == '+') || (a[o] == '-'))) {
             i++;
+            if (tm)
+                tm->tm_sec = 0;
             break;
         }
         if ((a[o] < '0') || (a[o] > '9'))
@@ -143,10 +146,33 @@ int ASN1_UTCTIME_check(ASN1_UTCTIME *d)
 
         if ((n < min[i]) || (n > max[i]))
             goto err;
+        if (tm) {
+            switch (i) {
+            case 0:
+                tm->tm_year = n < 50 ? n + 100 : n;
+                break;
+            case 1:
+                tm->tm_mon = n - 1;
+                break;
+            case 2:
+                tm->tm_mday = n;
+                break;
+            case 3:
+                tm->tm_hour = n;
+                break;
+            case 4:
+                tm->tm_min = n;
+                break;
+            case 5:
+                tm->tm_sec = n;
+                break;
+            }
+        }
     }
     if (a[o] == 'Z')
         o++;
     else if ((a[o] == '+') || (a[o] == '-')) {
+        int offsign = a[o] == '-' ? -1 : 1, offset = 0;
         o++;
         if (o + 4 > l)
             goto err;
@@ -160,12 +186,25 @@ int ASN1_UTCTIME_check(ASN1_UTCTIME *d)
             n = (n * 10) + a[o] - '0';
             if ((n < min[i]) || (n > max[i]))
                 goto err;
+            if (tm) {
+                if (i == 6)
+                    offset = n * 3600;
+                else if (i == 7)
+                    offset += n * 60;
+            }
             o++;
         }
+        if (offset && !OPENSSL_gmtime_adj(tm, 0, offset * offsign))
+            return 0;
     }
-    return (o == l);
+    return o == l;
  err:
-    return (0);
+    return 0;
+}
+
+int ASN1_UTCTIME_check(const ASN1_UTCTIME *d)
+{
+    return asn1_utctime_to_tm(NULL, d);
 }
 
 int ASN1_UTCTIME_set_string(ASN1_UTCTIME *s, const char *str)
@@ -249,43 +288,26 @@ ASN1_UTCTIME *ASN1_UTCTIME_adj(ASN1_UTCTIME *s, time_t t,
 
 int ASN1_UTCTIME_cmp_time_t(const ASN1_UTCTIME *s, time_t t)
 {
-    struct tm *tm;
-    struct tm data;
-    int offset;
-    int year;
-
-#define g2(p) (((p)[0]-'0')*10+(p)[1]-'0')
-
-    if (s->data[12] == 'Z')
-        offset = 0;
-    else {
-        offset = g2(s->data + 13) * 60 + g2(s->data + 15);
-        if (s->data[12] == '-')
-            offset = -offset;
-    }
+    struct tm stm, ttm;
+    int day, sec;
 
-    t -= offset * 60;           /* FIXME: may overflow in extreme cases */
+    if (!asn1_utctime_to_tm(&stm, s))
+        return -2;
 
-    tm = OPENSSL_gmtime(&t, &data);
-    /*
-     * NB: -1, 0, 1 already valid return values so use -2 to indicate error.
-     */
-    if (tm == NULL)
+    if (!OPENSSL_gmtime(&t, &ttm))
         return -2;
 
-#define return_cmp(a,b) if ((a)<(b)) return -1; else if ((a)>(b)) return 1
-    year = g2(s->data);
-    if (year < 50)
-        year += 100;
-    return_cmp(year, tm->tm_year);
-    return_cmp(g2(s->data + 2) - 1, tm->tm_mon);
-    return_cmp(g2(s->data + 4), tm->tm_mday);
-    return_cmp(g2(s->data + 6), tm->tm_hour);
-    return_cmp(g2(s->data + 8), tm->tm_min);
-    return_cmp(g2(s->data + 10), tm->tm_sec);
-#undef g2
-#undef return_cmp
+    if (!OPENSSL_gmtime_diff(&day, &sec, &ttm, &stm))
+        return -2;
 
+    if (day > 0)
+        return 1;
+    if (day < 0)
+        return -1;
+    if (sec > 0)
+        return 1;
+    if (sec < 0)
+        return -1;
     return 0;
 }
 
index 45f3f40..5389c04 100644 (file)
@@ -68,6 +68,7 @@
 extern const EVP_PKEY_ASN1_METHOD rsa_asn1_meths[];
 extern const EVP_PKEY_ASN1_METHOD dsa_asn1_meths[];
 extern const EVP_PKEY_ASN1_METHOD dh_asn1_meth;
+extern const EVP_PKEY_ASN1_METHOD dhx_asn1_meth;
 extern const EVP_PKEY_ASN1_METHOD eckey_asn1_meth;
 extern const EVP_PKEY_ASN1_METHOD hmac_asn1_meth;
 extern const EVP_PKEY_ASN1_METHOD cmac_asn1_meth;
@@ -92,7 +93,10 @@ static const EVP_PKEY_ASN1_METHOD *standard_methods[] = {
     &eckey_asn1_meth,
 #endif
     &hmac_asn1_meth,
-    &cmac_asn1_meth
+    &cmac_asn1_meth,
+#ifndef OPENSSL_NO_DH
+    &dhx_asn1_meth
+#endif
 };
 
 typedef int sk_cmp_fn_type(const char *const *a, const char *const *b);
@@ -460,3 +464,21 @@ void EVP_PKEY_asn1_set_ctrl(EVP_PKEY_ASN1_METHOD *ameth,
 {
     ameth->pkey_ctrl = pkey_ctrl;
 }
+
+void EVP_PKEY_asn1_set_item(EVP_PKEY_ASN1_METHOD *ameth,
+                            int (*item_verify) (EVP_MD_CTX *ctx,
+                                                const ASN1_ITEM *it,
+                                                void *asn,
+                                                X509_ALGOR *a,
+                                                ASN1_BIT_STRING *sig,
+                                                EVP_PKEY *pkey),
+                            int (*item_sign) (EVP_MD_CTX *ctx,
+                                              const ASN1_ITEM *it,
+                                              void *asn,
+                                              X509_ALGOR *alg1,
+                                              X509_ALGOR *alg2,
+                                              ASN1_BIT_STRING *sig))
+{
+    ameth->item_sign = item_sign;
+    ameth->item_verify = item_verify;
+}
index 39b7833..68e791f 100644 (file)
@@ -207,13 +207,13 @@ typedef struct asn1_const_ctx_st {
 # define ASN1_OBJECT_FLAG_CRITICAL        0x02/* critical x509v3 object id */
 # define ASN1_OBJECT_FLAG_DYNAMIC_STRINGS 0x04/* internal use */
 # define ASN1_OBJECT_FLAG_DYNAMIC_DATA    0x08/* internal use */
-typedef struct asn1_object_st {
+struct asn1_object_st {
     const char *sn, *ln;
     int nid;
     int length;
     const unsigned char *data;  /* data remains const after init */
     int flags;                  /* Should we free this one */
-} ASN1_OBJECT;
+};
 
 # define ASN1_STRING_FLAG_BITS_LEFT 0x08/* Set if 0x07 has bits left value */
 /*
@@ -843,7 +843,7 @@ int ASN1_INTEGER_cmp(const ASN1_INTEGER *x, const ASN1_INTEGER *y);
 
 DECLARE_ASN1_FUNCTIONS(ASN1_ENUMERATED)
 
-int ASN1_UTCTIME_check(ASN1_UTCTIME *a);
+int ASN1_UTCTIME_check(const ASN1_UTCTIME *a);
 ASN1_UTCTIME *ASN1_UTCTIME_set(ASN1_UTCTIME *s, time_t t);
 ASN1_UTCTIME *ASN1_UTCTIME_adj(ASN1_UTCTIME *s, time_t t,
                                int offset_day, long offset_sec);
@@ -853,13 +853,15 @@ int ASN1_UTCTIME_cmp_time_t(const ASN1_UTCTIME *s, time_t t);
 time_t ASN1_UTCTIME_get(const ASN1_UTCTIME *s);
 # endif
 
-int ASN1_GENERALIZEDTIME_check(ASN1_GENERALIZEDTIME *a);
+int ASN1_GENERALIZEDTIME_check(const ASN1_GENERALIZEDTIME *a);
 ASN1_GENERALIZEDTIME *ASN1_GENERALIZEDTIME_set(ASN1_GENERALIZEDTIME *s,
                                                time_t t);
 ASN1_GENERALIZEDTIME *ASN1_GENERALIZEDTIME_adj(ASN1_GENERALIZEDTIME *s,
                                                time_t t, int offset_day,
                                                long offset_sec);
 int ASN1_GENERALIZEDTIME_set_string(ASN1_GENERALIZEDTIME *s, const char *str);
+int ASN1_TIME_diff(int *pday, int *psec,
+                   const ASN1_TIME *from, const ASN1_TIME *to);
 
 DECLARE_ASN1_FUNCTIONS(ASN1_OCTET_STRING)
 ASN1_OCTET_STRING *ASN1_OCTET_STRING_dup(const ASN1_OCTET_STRING *a);
index 9f5ed84..4c004fa 100644 (file)
@@ -59,6 +59,9 @@
 
 /* Internal ASN1 structures and functions: not for application use */
 
+int asn1_utctime_to_tm(struct tm *tm, const ASN1_UTCTIME *d);
+int asn1_generalizedtime_to_tm(struct tm *tm, const ASN1_GENERALIZEDTIME *d);
+
 /* ASN1 print context structure */
 
 struct asn1_pctx_st {
index 4e7c45d..8aab551 100644 (file)
@@ -228,6 +228,21 @@ int X509_print_ex(BIO *bp, X509 *x, unsigned long nmflags,
         }
     }
 
+    if (!(cflag & X509_FLAG_NO_IDS)) {
+        if (ci->issuerUID) {
+            if (BIO_printf(bp, "%8sIssuer Unique ID: ", "") <= 0)
+                goto err;
+            if (!X509_signature_dump(bp, ci->issuerUID, 12))
+                goto err;
+        }
+        if (ci->subjectUID) {
+            if (BIO_printf(bp, "%8sSubject Unique ID: ", "") <= 0)
+                goto err;
+            if (!X509_signature_dump(bp, ci->subjectUID, 12))
+                goto err;
+        }
+    }
+
     if (!(cflag & X509_FLAG_NO_EXTENSIONS))
         X509V3_extensions_print(bp, "X509v3 extensions",
                                 ci->extensions, cflag, 8);
index e258c71..0279503 100644 (file)
@@ -58,8 +58,8 @@
 
 #include <stdio.h>
 #include "cryptlib.h"
-#include "asn1_locl.h"
 #include <openssl/asn1t.h>
+#include "asn1_locl.h"
 #include <openssl/x509.h>
 #include <openssl/x509v3.h>
 
@@ -341,6 +341,8 @@ ASN1_SEQUENCE_ref(X509_CRL, crl_cb, CRYPTO_LOCK_X509_CRL) = {
 
 IMPLEMENT_ASN1_FUNCTIONS(X509_REVOKED)
 
+IMPLEMENT_ASN1_DUP_FUNCTION(X509_REVOKED)
+
 IMPLEMENT_ASN1_FUNCTIONS(X509_CRL_INFO)
 
 IMPLEMENT_ASN1_FUNCTIONS(X509_CRL)
index f56e837..5f266a2 100644 (file)
@@ -208,3 +208,23 @@ int i2d_X509_AUX(X509 *a, unsigned char **pp)
         length += i2d_X509_CERT_AUX(a->aux, pp);
     return length;
 }
+
+int i2d_re_X509_tbs(X509 *x, unsigned char **pp)
+{
+    x->cert_info->enc.modified = 1;
+    return i2d_X509_CINF(x->cert_info, pp);
+}
+
+void X509_get0_signature(ASN1_BIT_STRING **psig, X509_ALGOR **palg,
+                         const X509 *x)
+{
+    if (psig)
+        *psig = x->signature;
+    if (palg)
+        *palg = x->sig_alg;
+}
+
+int X509_get_signature_nid(const X509 *x)
+{
+    return OBJ_obj2nid(x->sig_alg->algorithm);
+}
index 3293c72..ed8e521 100644 (file)
@@ -182,3 +182,28 @@ int BIO_dump_indent(BIO *bp, const char *s, int len, int indent)
 {
     return BIO_dump_indent_cb(write_bio, bp, s, len, indent);
 }
+
+int BIO_hex_string(BIO *out, int indent, int width, unsigned char *data,
+                   int datalen)
+{
+    int i, j = 0;
+
+    if (datalen < 1)
+        return 1;
+
+    for (i = 0; i < datalen - 1; i++) {
+        if (i && !j)
+            BIO_printf(out, "%*s", indent, "");
+
+        BIO_printf(out, "%02X:", data[i]);
+
+        j = (j + 1) % width;
+        if (!j)
+            BIO_printf(out, "\n");
+    }
+
+    if (i && !j)
+        BIO_printf(out, "%*s", indent, "");
+    BIO_printf(out, "%02X", data[datalen - 1]);
+    return 1;
+}
index bda882c..5bad0a2 100644 (file)
@@ -225,13 +225,17 @@ int BIO_get_port(const char *str, unsigned short *port_ptr)
 int BIO_sock_error(int sock)
 {
     int j, i;
-    int size;
+    union {
+        size_t s;
+        int i;
+    } size;
 
 # if defined(OPENSSL_SYS_BEOS_R5)
     return 0;
 # endif
 
-    size = sizeof(int);
+    /* heuristic way to adapt for platforms that expect 64-bit optlen */
+    size.s = 0, size.i = sizeof(j);
     /*
      * Note: under Windows the third parameter is of type (char *) whereas
      * under other systems it is (void *) if you don't have a cast it will
index d583cc1..f78796b 100644 (file)
@@ -174,6 +174,7 @@ extern "C" {
 
 # define BIO_CTRL_DGRAM_SET_NEXT_TIMEOUT   45/* Next DTLS handshake timeout
                                               * to adjust socket timeouts */
+# define BIO_CTRL_DGRAM_SET_DONT_FRAG      48
 
 # define BIO_CTRL_DGRAM_GET_MTU_OVERHEAD   49
 
@@ -725,6 +726,9 @@ int BIO_dump_indent(BIO *b, const char *bytes, int len, int indent);
 int BIO_dump_fp(FILE *fp, const char *s, int len);
 int BIO_dump_indent_fp(FILE *fp, const char *s, int len, int indent);
 # endif
+int BIO_hex_string(BIO *out, int indent, int width, unsigned char *data,
+                   int datalen);
+
 struct hostent *BIO_gethostbyname(const char *name);
 /*-
  * We might want a thread-safe interface too:
@@ -761,8 +765,8 @@ int BIO_dgram_sctp_wait_for_dry(BIO *b);
 int BIO_dgram_sctp_msg_waiting(BIO *b);
 # endif
 BIO *BIO_new_fd(int fd, int close_flag);
-BIO *BIO_new_connect(char *host_port);
-BIO *BIO_new_accept(char *host_port);
+BIO *BIO_new_connect(const char *host_port);
+BIO *BIO_new_accept(const char *host_port);
 
 int BIO_new_bio_pair(BIO **bio1, size_t writebuf1,
                      BIO **bio2, size_t writebuf2);
index 6dd6162..d9007aa 100644 (file)
@@ -1,6 +1,6 @@
 /* crypto/bio/bio_err.c */
 /* ====================================================================
- * Copyright (c) 1999-2011 The OpenSSL Project.  All rights reserved.
+ * Copyright (c) 1999-2015 The OpenSSL Project.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
index d08292c..4a5e39b 100644 (file)
@@ -445,7 +445,7 @@ static int acpt_puts(BIO *bp, const char *str)
     return (ret);
 }
 
-BIO *BIO_new_accept(char *str)
+BIO *BIO_new_accept(const char *str)
 {
     BIO *ret;
 
index 6a5e8de..42d0aff 100644 (file)
@@ -585,7 +585,7 @@ static int conn_puts(BIO *bp, const char *str)
     return (ret);
 }
 
-BIO *BIO_new_connect(char *str)
+BIO *BIO_new_connect(const char *str)
 {
     BIO *ret;
 
index e3e3dd0..7fcd831 100644 (file)
@@ -65,7 +65,7 @@
 #include <openssl/bio.h>
 #ifndef OPENSSL_NO_DGRAM
 
-# if defined(OPENSSL_SYS_WIN32) || defined(OPENSSL_SYS_VMS)
+# if defined(OPENSSL_SYS_VMS)
 #  include <sys/timeb.h>
 # endif
 
 #  define IP_MTU      14        /* linux is lame */
 # endif
 
+# if OPENSSL_USE_IPV6 && !defined(IPPROTO_IPV6)
+#  define IPPROTO_IPV6 41       /* windows is lame */
+# endif
+
 # if defined(__FreeBSD__) && defined(IN6_IS_ADDR_V4MAPPED)
 /* Standard definition causes type-punning problems. */
 #  undef IN6_IS_ADDR_V4MAPPED
@@ -496,8 +500,8 @@ static long dgram_ctrl(BIO *b, int cmd, long num, void *ptr)
     int *ip;
     struct sockaddr *to = NULL;
     bio_dgram_data *data = NULL;
-# if defined(OPENSSL_SYS_LINUX) && (defined(IP_MTU_DISCOVER) || defined(IP_MTU))
     int sockopt_val = 0;
+# if defined(OPENSSL_SYS_LINUX) && (defined(IP_MTU_DISCOVER) || defined(IP_MTU))
     socklen_t sockopt_len;      /* assume that system supporting IP_MTU is
                                  * modern enough to define socklen_t */
     socklen_t addr_len;
@@ -882,6 +886,61 @@ static long dgram_ctrl(BIO *b, int cmd, long num, void *ptr)
             ret = 0;
         break;
 # endif
+    case BIO_CTRL_DGRAM_SET_DONT_FRAG:
+        sockopt_val = num ? 1 : 0;
+
+        switch (data->peer.sa.sa_family) {
+        case AF_INET:
+# if defined(IP_DONTFRAG)
+            if ((ret = setsockopt(b->num, IPPROTO_IP, IP_DONTFRAG,
+                                  &sockopt_val, sizeof(sockopt_val))) < 0) {
+                perror("setsockopt");
+                ret = -1;
+            }
+# elif defined(OPENSSL_SYS_LINUX) && defined(IP_MTU_DISCOVER) && defined (IP_PMTUDISC_PROBE)
+            if ((sockopt_val = num ? IP_PMTUDISC_PROBE : IP_PMTUDISC_DONT),
+                (ret = setsockopt(b->num, IPPROTO_IP, IP_MTU_DISCOVER,
+                                  &sockopt_val, sizeof(sockopt_val))) < 0) {
+                perror("setsockopt");
+                ret = -1;
+            }
+# elif defined(OPENSSL_SYS_WINDOWS) && defined(IP_DONTFRAGMENT)
+            if ((ret = setsockopt(b->num, IPPROTO_IP, IP_DONTFRAGMENT,
+                                  (const char *)&sockopt_val,
+                                  sizeof(sockopt_val))) < 0) {
+                perror("setsockopt");
+                ret = -1;
+            }
+# else
+            ret = -1;
+# endif
+            break;
+# if OPENSSL_USE_IPV6
+        case AF_INET6:
+#  if defined(IPV6_DONTFRAG)
+            if ((ret = setsockopt(b->num, IPPROTO_IPV6, IPV6_DONTFRAG,
+                                  (const void *)&sockopt_val,
+                                  sizeof(sockopt_val))) < 0) {
+                perror("setsockopt");
+                ret = -1;
+            }
+#  elif defined(OPENSSL_SYS_LINUX) && defined(IPV6_MTUDISCOVER)
+            if ((sockopt_val = num ? IP_PMTUDISC_PROBE : IP_PMTUDISC_DONT),
+                (ret = setsockopt(b->num, IPPROTO_IPV6, IPV6_MTU_DISCOVER,
+                                  &sockopt_val, sizeof(sockopt_val))) < 0) {
+                perror("setsockopt");
+                ret = -1;
+            }
+#  else
+            ret = -1;
+#  endif
+            break;
+# endif
+        default:
+            ret = -1;
+            break;
+        }
+        break;
     case BIO_CTRL_DGRAM_GET_MTU_OVERHEAD:
         ret = dgram_get_mtu_overhead(data);
         break;
@@ -1995,11 +2054,22 @@ int BIO_dgram_non_fatal_error(int err)
 
 static void get_current_time(struct timeval *t)
 {
-# ifdef OPENSSL_SYS_WIN32
-    struct _timeb tb;
-    _ftime(&tb);
-    t->tv_sec = (long)tb.time;
-    t->tv_usec = (long)tb.millitm * 1000;
+# if defined(_WIN32)
+    SYSTEMTIME st;
+    union {
+        unsigned __int64 ul;
+        FILETIME ft;
+    } now;
+
+    GetSystemTime(&st);
+    SystemTimeToFileTime(&st, &now.ft);
+#  ifdef  __MINGW32__
+    now.ul -= 116444736000000000ULL;
+#  else
+    now.ul -= 116444736000000000UI64; /* re-bias to 1/1/1970 */
+#  endif
+    t->tv_sec = (long)(now.ul / 10000000);
+    t->tv_usec = ((int)(now.ul % 10000000)) / 10;
 # elif defined(OPENSSL_SYS_VMS)
     struct timeb tb;
     ftime(&tb);
index ccef578..5f4e344 100644 (file)
 
 #if defined(OPENSSL_NO_POSIX_IO)
 /*
- * One can argue that one should implement dummy placeholder for
- * BIO_s_fd here...
+ * Dummy placeholder for BIO_s_fd...
  */
+BIO *BIO_new_fd(int fd, int close_flag)
+{
+    return NULL;
+}
+
+int BIO_fd_non_fatal_error(int err)
+{
+    return 0;
+}
+
+int BIO_fd_should_retry(int i)
+{
+    return 0;
+}
+
+BIO_METHOD *BIO_s_fd(void)
+{
+    return NULL;
+}
 #else
 /*
  * As for unconditional usage of "UPLINK" interface in this module.
index 3d0158c..215855e 100644 (file)
@@ -77,6 +77,12 @@ sparcv9a-mont.s:     asm/sparcv9a-mont.pl
        $(PERL) asm/sparcv9a-mont.pl $(CFLAGS) > $@
 sparcv9-mont.s:                asm/sparcv9-mont.pl
        $(PERL) asm/sparcv9-mont.pl $(CFLAGS) > $@
+vis3-mont.s:           asm/vis3-mont.pl
+       $(PERL) asm/vis3-mont.pl $(CFLAGS) > $@
+sparct4-mont.S:        asm/sparct4-mont.pl
+       $(PERL) asm/sparct4-mont.pl $(CFLAGS) > $@
+sparcv9-gf2m.S:        asm/sparcv9-gf2m.pl
+       $(PERL) asm/sparcv9-gf2m.pl $(CFLAGS) > $@
 
 bn-mips3.o:    asm/mips3.s
        @if [ "$(CC)" = "gcc" ]; then \
@@ -102,8 +108,10 @@ x86_64-mont5.s:    asm/x86_64-mont5.pl
        $(PERL) asm/x86_64-mont5.pl $(PERLASM_SCHEME) > $@
 x86_64-gf2m.s: asm/x86_64-gf2m.pl
        $(PERL) asm/x86_64-gf2m.pl $(PERLASM_SCHEME) > $@
-modexp512-x86_64.s:    asm/modexp512-x86_64.pl
-       $(PERL) asm/modexp512-x86_64.pl $(PERLASM_SCHEME) > $@
+rsaz-x86_64.s: asm/rsaz-x86_64.pl
+       $(PERL) asm/rsaz-x86_64.pl $(PERLASM_SCHEME) > $@
+rsaz-avx2.s:   asm/rsaz-avx2.pl 
+       $(PERL) asm/rsaz-avx2.pl $(PERLASM_SCHEME) > $@
 
 bn-ia64.s:     asm/ia64.S
        $(CC) $(CFLAGS) -E asm/ia64.S > $@
@@ -125,14 +133,15 @@ ppc-mont.s:       asm/ppc-mont.pl;$(PERL) asm/ppc-mont.pl $(PERLASM_SCHEME) $@
 ppc64-mont.s:  asm/ppc64-mont.pl;$(PERL) asm/ppc64-mont.pl $(PERLASM_SCHEME) $@
 
 alpha-mont.s:  asm/alpha-mont.pl
-       (preproc=/tmp/$$$$.$@; trap "rm $$preproc" INT; \
+       (preproc=$$$$.$@.S; trap "rm $$preproc" INT; \
        $(PERL) asm/alpha-mont.pl > $$preproc && \
-       $(CC) -E $$preproc > $@ && rm $$preproc)
+       $(CC) -E -P $$preproc > $@ && rm $$preproc)
 
 # GNU make "catch all"
-%-mont.s:      asm/%-mont.pl;  $(PERL) $< $(PERLASM_SCHEME) $@
+%-mont.S:      asm/%-mont.pl;  $(PERL) $< $(PERLASM_SCHEME) $@
 %-gf2m.S:      asm/%-gf2m.pl;  $(PERL) $< $(PERLASM_SCHEME) $@
 
+armv4-mont.o:  armv4-mont.S
 armv4-gf2m.o:  armv4-gf2m.S
 
 files:
@@ -244,6 +253,7 @@ bn_exp.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
 bn_exp.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
 bn_exp.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
 bn_exp.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_exp.c bn_lcl.h
+bn_exp.o: rsaz_exp.h
 bn_exp2.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
 bn_exp2.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
 bn_exp2.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
index c52e0b7..8f529c9 100644 (file)
 # length, more for longer keys. Even though NEON 1x1 multiplication
 # runs in even less cycles, ~30, improvement is measurable only on
 # longer keys. One has to optimize code elsewhere to get NEON glow...
+#
+# April 2014
+#
+# Double bn_GF2m_mul_2x2 performance by using algorithm from paper
+# referred below, which improves ECDH and ECDSA verify benchmarks
+# by 18-40%.
+#
+# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software
+# Polynomial Multiplication on ARM Processors using the NEON Engine.
+# 
+# http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf
 
 while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
 open STDOUT,">$output";
 
-sub Dlo()   { shift=~m|q([1]?[0-9])|?"d".($1*2):"";     }
-sub Dhi()   { shift=~m|q([1]?[0-9])|?"d".($1*2+1):"";   }
-sub Q()     { shift=~m|d([1-3]?[02468])|?"q".($1/2):""; }
-
 $code=<<___;
 #include "arm_arch.h"
 
 .text
 .code  32
-
-#if __ARM_ARCH__>=7
-.fpu   neon
-
-.type  mul_1x1_neon,%function
-.align 5
-mul_1x1_neon:
-       vshl.u64        `&Dlo("q1")`,d16,#8     @ q1-q3 are slided $a
-       vmull.p8        `&Q("d0")`,d16,d17      @ a·bb
-       vshl.u64        `&Dlo("q2")`,d16,#16
-       vmull.p8        q1,`&Dlo("q1")`,d17     @ a<<8·bb
-       vshl.u64        `&Dlo("q3")`,d16,#24
-       vmull.p8        q2,`&Dlo("q2")`,d17     @ a<<16·bb
-       vshr.u64        `&Dlo("q1")`,#8
-       vmull.p8        q3,`&Dlo("q3")`,d17     @ a<<24·bb
-       vshl.u64        `&Dhi("q1")`,#24
-       veor            d0,`&Dlo("q1")`
-       vshr.u64        `&Dlo("q2")`,#16
-       veor            d0,`&Dhi("q1")`
-       vshl.u64        `&Dhi("q2")`,#16
-       veor            d0,`&Dlo("q2")`
-       vshr.u64        `&Dlo("q3")`,#24
-       veor            d0,`&Dhi("q2")`
-       vshl.u64        `&Dhi("q3")`,#8
-       veor            d0,`&Dlo("q3")`
-       veor            d0,`&Dhi("q3")`
-       bx      lr
-.size  mul_1x1_neon,.-mul_1x1_neon
-#endif
 ___
 ################
 # private interface to mul_1x1_ialu
@@ -159,56 +137,17 @@ ___
 # void bn_GF2m_mul_2x2(BN_ULONG *r,
 #      BN_ULONG a1,BN_ULONG a0,
 #      BN_ULONG b1,BN_ULONG b0);       # r[3..0]=a1a0·b1b0
-
-($A1,$B1,$A0,$B0,$A1B1,$A0B0)=map("d$_",(18..23));
-
+{
 $code.=<<___;
 .global        bn_GF2m_mul_2x2
 .type  bn_GF2m_mul_2x2,%function
 .align 5
 bn_GF2m_mul_2x2:
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
        ldr     r12,.LOPENSSL_armcap
 .Lpic: ldr     r12,[pc,r12]
        tst     r12,#1
-       beq     .Lialu
-
-       veor    $A1,$A1
-       vmov.32 $B1,r3,r3               @ two copies of b1
-       vmov.32 ${A1}[0],r1             @ a1
-
-       veor    $A0,$A0
-       vld1.32 ${B0}[],[sp,:32]        @ two copies of b0
-       vmov.32 ${A0}[0],r2             @ a0
-       mov     r12,lr
-
-       vmov    d16,$A1
-       vmov    d17,$B1
-       bl      mul_1x1_neon            @ a1·b1
-       vmov    $A1B1,d0
-
-       vmov    d16,$A0
-       vmov    d17,$B0
-       bl      mul_1x1_neon            @ a0·b0
-       vmov    $A0B0,d0
-
-       veor    d16,$A0,$A1
-       veor    d17,$B0,$B1
-       veor    $A0,$A0B0,$A1B1
-       bl      mul_1x1_neon            @ (a0+a1)·(b0+b1)
-
-       veor    d0,$A0                  @ (a0+a1)·(b0+b1)-a0·b0-a1·b1
-       vshl.u64 d1,d0,#32
-       vshr.u64 d0,d0,#32
-       veor    $A0B0,d1
-       veor    $A1B1,d0
-       vst1.32 {${A0B0}[0]},[r0,:32]!
-       vst1.32 {${A0B0}[1]},[r0,:32]!
-       vst1.32 {${A1B1}[0]},[r0,:32]!
-       vst1.32 {${A1B1}[1]},[r0,:32]
-       bx      r12
-.align 4
-.Lialu:
+       bne     .LNEON
 #endif
 ___
 $ret="r10";    # reassigned 1st argument
@@ -260,8 +199,72 @@ $code.=<<___;
        moveq   pc,lr                   @ be binary compatible with V4, yet
        bx      lr                      @ interoperable with Thumb ISA:-)
 #endif
+___
+}
+{
+my ($r,$t0,$t1,$t2,$t3)=map("q$_",(0..3,8..12));
+my ($a,$b,$k48,$k32,$k16)=map("d$_",(26..31));
+
+$code.=<<___;
+#if __ARM_MAX_ARCH__>=7
+.arch  armv7-a
+.fpu   neon
+
+.align 5
+.LNEON:
+       ldr             r12, [sp]               @ 5th argument
+       vmov.32         $a, r2, r1
+       vmov.32         $b, r12, r3
+       vmov.i64        $k48, #0x0000ffffffffffff
+       vmov.i64        $k32, #0x00000000ffffffff
+       vmov.i64        $k16, #0x000000000000ffff
+
+       vext.8          $t0#lo, $a, $a, #1      @ A1
+       vmull.p8        $t0, $t0#lo, $b         @ F = A1*B
+       vext.8          $r#lo, $b, $b, #1       @ B1
+       vmull.p8        $r, $a, $r#lo           @ E = A*B1
+       vext.8          $t1#lo, $a, $a, #2      @ A2
+       vmull.p8        $t1, $t1#lo, $b         @ H = A2*B
+       vext.8          $t3#lo, $b, $b, #2      @ B2
+       vmull.p8        $t3, $a, $t3#lo         @ G = A*B2
+       vext.8          $t2#lo, $a, $a, #3      @ A3
+       veor            $t0, $t0, $r            @ L = E + F
+       vmull.p8        $t2, $t2#lo, $b         @ J = A3*B
+       vext.8          $r#lo, $b, $b, #3       @ B3
+       veor            $t1, $t1, $t3           @ M = G + H
+       vmull.p8        $r, $a, $r#lo           @ I = A*B3
+       veor            $t0#lo, $t0#lo, $t0#hi  @ t0 = (L) (P0 + P1) << 8
+       vand            $t0#hi, $t0#hi, $k48
+       vext.8          $t3#lo, $b, $b, #4      @ B4
+       veor            $t1#lo, $t1#lo, $t1#hi  @ t1 = (M) (P2 + P3) << 16
+       vand            $t1#hi, $t1#hi, $k32
+       vmull.p8        $t3, $a, $t3#lo         @ K = A*B4
+       veor            $t2, $t2, $r            @ N = I + J
+       veor            $t0#lo, $t0#lo, $t0#hi
+       veor            $t1#lo, $t1#lo, $t1#hi
+       veor            $t2#lo, $t2#lo, $t2#hi  @ t2 = (N) (P4 + P5) << 24
+       vand            $t2#hi, $t2#hi, $k16
+       vext.8          $t0, $t0, $t0, #15
+       veor            $t3#lo, $t3#lo, $t3#hi  @ t3 = (K) (P6 + P7) << 32
+       vmov.i64        $t3#hi, #0
+       vext.8          $t1, $t1, $t1, #14
+       veor            $t2#lo, $t2#lo, $t2#hi
+       vmull.p8        $r, $a, $b              @ D = A*B
+       vext.8          $t3, $t3, $t3, #12
+       vext.8          $t2, $t2, $t2, #13
+       veor            $t0, $t0, $t1
+       veor            $t2, $t2, $t3
+       veor            $r, $r, $t0
+       veor            $r, $r, $t2
+
+       vst1.32         {$r}, [r0]
+       ret             @ bx lr
+#endif
+___
+}
+$code.=<<___;
 .size  bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
 .align 5
 .LOPENSSL_armcap:
 .word  OPENSSL_armcap_P-(.Lpic+8)
@@ -269,10 +272,18 @@ $code.=<<___;
 .asciz "GF(2^m) Multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
 .align 5
 
+#if __ARM_MAX_ARCH__>=7
 .comm  OPENSSL_armcap_P,4,4
+#endif
 ___
 
-$code =~ s/\`([^\`]*)\`/eval $1/gem;
-$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;    # make it possible to compile with -march=armv4
-print $code;
+foreach (split("\n",$code)) {
+       s/\`([^\`]*)\`/eval $1/geo;
+
+       s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo       or
+       s/\bret\b/bx    lr/go           or
+       s/\bbx\s+lr\b/.word\t0xe12fff1e/go;    # make it possible to compile with -march=armv4
+
+       print $_,"\n";
+}
 close STDOUT;   # enforce flush
index f78a8b5..1d330e9 100644 (file)
@@ -1,7 +1,7 @@
 #!/usr/bin/env perl
 
 # ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
 # details see http://www.openssl.org/~appro/cryptogams/.
 # than 1/2KB. Windows CE port would be trivial, as it's exclusively
 # about decorations, ABI and instruction syntax are identical.
 
+# November 2013
+#
+# Add NEON code path, which handles lengths divisible by 8. RSA/DSA
+# performance improvement on Cortex-A8 is ~45-100% depending on key
+# length, more for longer keys. On Cortex-A15 the span is ~10-105%.
+# On Snapdragon S4 improvement was measured to vary from ~70% to
+# incredible ~380%, yes, 4.8x faster, for RSA4096 sign. But this is
+# rather because original integer-only code seems to perform
+# suboptimally on S4. Situation on Cortex-A9 is unfortunately
+# different. It's being looked into, but the trouble is that
+# performance for vectors longer than 256 bits is actually couple
+# of percent worse than for integer-only code. The code is chosen
+# for execution on all NEON-capable processors, because gain on
+# others outweighs the marginal loss on Cortex-A9.
+
 while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
 open STDOUT,">$output";
 
@@ -52,16 +67,40 @@ $_n0="$num,#14*4";
 $_num="$num,#15*4";    $_bpend=$_num;
 
 $code=<<___;
+#include "arm_arch.h"
+
 .text
+.code  32
+
+#if __ARM_MAX_ARCH__>=7
+.align 5
+.LOPENSSL_armcap:
+.word  OPENSSL_armcap_P-bn_mul_mont
+#endif
 
 .global        bn_mul_mont
 .type  bn_mul_mont,%function
 
-.align 2
+.align 5
 bn_mul_mont:
+       ldr     ip,[sp,#4]              @ load num
        stmdb   sp!,{r0,r2}             @ sp points at argument block
-       ldr     $num,[sp,#3*4]          @ load num
-       cmp     $num,#2
+#if __ARM_MAX_ARCH__>=7
+       tst     ip,#7
+       bne     .Lialu
+       adr     r0,bn_mul_mont
+       ldr     r2,.LOPENSSL_armcap
+       ldr     r0,[r0,r2]
+       tst     r0,#1                   @ NEON available?
+       ldmia   sp, {r0,r2}
+       beq     .Lialu
+       add     sp,sp,#8
+       b       bn_mul8x_mont_neon
+.align 4
+.Lialu:
+#endif
+       cmp     ip,#2
+       mov     $num,ip                 @ load num
        movlt   r0,#0
        addlt   sp,sp,#2*4
        blt     .Labrt
@@ -191,14 +230,447 @@ bn_mul_mont:
        ldmia   sp!,{r4-r12,lr}         @ restore registers
        add     sp,sp,#2*4              @ skip over {r0,r2}
        mov     r0,#1
-.Labrt:        tst     lr,#1
+.Labrt:
+#if __ARM_ARCH__>=5
+       ret                             @ bx lr
+#else
+       tst     lr,#1
        moveq   pc,lr                   @ be binary compatible with V4, yet
        bx      lr                      @ interoperable with Thumb ISA:-)
+#endif
 .size  bn_mul_mont,.-bn_mul_mont
-.asciz "Montgomery multiplication for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
+___
+{
+sub Dlo()   { shift=~m|q([1]?[0-9])|?"d".($1*2):"";     }
+sub Dhi()   { shift=~m|q([1]?[0-9])|?"d".($1*2+1):"";   }
+
+my ($A0,$A1,$A2,$A3)=map("d$_",(0..3));
+my ($N0,$N1,$N2,$N3)=map("d$_",(4..7));
+my ($Z,$Temp)=("q4","q5");
+my ($A0xB,$A1xB,$A2xB,$A3xB,$A4xB,$A5xB,$A6xB,$A7xB)=map("q$_",(6..13));
+my ($Bi,$Ni,$M0)=map("d$_",(28..31));
+my $zero=&Dlo($Z);
+my $temp=&Dlo($Temp);
+
+my ($rptr,$aptr,$bptr,$nptr,$n0,$num)=map("r$_",(0..5));
+my ($tinptr,$toutptr,$inner,$outer)=map("r$_",(6..9));
+
+$code.=<<___;
+#if __ARM_MAX_ARCH__>=7
+.arch  armv7-a
+.fpu   neon
+
+.type  bn_mul8x_mont_neon,%function
+.align 5
+bn_mul8x_mont_neon:
+       mov     ip,sp
+       stmdb   sp!,{r4-r11}
+       vstmdb  sp!,{d8-d15}            @ ABI specification says so
+       ldmia   ip,{r4-r5}              @ load rest of parameter block
+
+       sub             $toutptr,sp,#16
+       vld1.32         {${Bi}[0]}, [$bptr,:32]!
+       sub             $toutptr,$toutptr,$num,lsl#4
+       vld1.32         {$A0-$A3},  [$aptr]!            @ can't specify :32 :-(
+       and             $toutptr,$toutptr,#-64
+       vld1.32         {${M0}[0]}, [$n0,:32]
+       mov             sp,$toutptr                     @ alloca
+       veor            $zero,$zero,$zero
+       subs            $inner,$num,#8
+       vzip.16         $Bi,$zero
+
+       vmull.u32       $A0xB,$Bi,${A0}[0]
+       vmull.u32       $A1xB,$Bi,${A0}[1]
+       vmull.u32       $A2xB,$Bi,${A1}[0]
+       vshl.i64        $temp,`&Dhi("$A0xB")`,#16
+       vmull.u32       $A3xB,$Bi,${A1}[1]
+
+       vadd.u64        $temp,$temp,`&Dlo("$A0xB")`
+       veor            $zero,$zero,$zero
+       vmul.u32        $Ni,$temp,$M0
+
+       vmull.u32       $A4xB,$Bi,${A2}[0]
+        vld1.32        {$N0-$N3}, [$nptr]!
+       vmull.u32       $A5xB,$Bi,${A2}[1]
+       vmull.u32       $A6xB,$Bi,${A3}[0]
+       vzip.16         $Ni,$zero
+       vmull.u32       $A7xB,$Bi,${A3}[1]
+
+       bne     .LNEON_1st
+
+       @ special case for num=8, everything is in register bank...
+
+       vmlal.u32       $A0xB,$Ni,${N0}[0]
+       sub             $outer,$num,#1
+       vmlal.u32       $A1xB,$Ni,${N0}[1]
+       vmlal.u32       $A2xB,$Ni,${N1}[0]
+       vmlal.u32       $A3xB,$Ni,${N1}[1]
+
+       vmlal.u32       $A4xB,$Ni,${N2}[0]
+       vmov            $Temp,$A0xB
+       vmlal.u32       $A5xB,$Ni,${N2}[1]
+       vmov            $A0xB,$A1xB
+       vmlal.u32       $A6xB,$Ni,${N3}[0]
+       vmov            $A1xB,$A2xB
+       vmlal.u32       $A7xB,$Ni,${N3}[1]
+       vmov            $A2xB,$A3xB
+       vmov            $A3xB,$A4xB
+       vshr.u64        $temp,$temp,#16
+       vmov            $A4xB,$A5xB
+       vmov            $A5xB,$A6xB
+       vadd.u64        $temp,$temp,`&Dhi("$Temp")`
+       vmov            $A6xB,$A7xB
+       veor            $A7xB,$A7xB
+       vshr.u64        $temp,$temp,#16
+
+       b       .LNEON_outer8
+
+.align 4
+.LNEON_outer8:
+       vld1.32         {${Bi}[0]}, [$bptr,:32]!
+       veor            $zero,$zero,$zero
+       vzip.16         $Bi,$zero
+       vadd.u64        `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp
+
+       vmlal.u32       $A0xB,$Bi,${A0}[0]
+       vmlal.u32       $A1xB,$Bi,${A0}[1]
+       vmlal.u32       $A2xB,$Bi,${A1}[0]
+       vshl.i64        $temp,`&Dhi("$A0xB")`,#16
+       vmlal.u32       $A3xB,$Bi,${A1}[1]
+
+       vadd.u64        $temp,$temp,`&Dlo("$A0xB")`
+       veor            $zero,$zero,$zero
+       subs            $outer,$outer,#1
+       vmul.u32        $Ni,$temp,$M0
+
+       vmlal.u32       $A4xB,$Bi,${A2}[0]
+       vmlal.u32       $A5xB,$Bi,${A2}[1]
+       vmlal.u32       $A6xB,$Bi,${A3}[0]
+       vzip.16         $Ni,$zero
+       vmlal.u32       $A7xB,$Bi,${A3}[1]
+
+       vmlal.u32       $A0xB,$Ni,${N0}[0]
+       vmlal.u32       $A1xB,$Ni,${N0}[1]
+       vmlal.u32       $A2xB,$Ni,${N1}[0]
+       vmlal.u32       $A3xB,$Ni,${N1}[1]
+
+       vmlal.u32       $A4xB,$Ni,${N2}[0]
+       vmov            $Temp,$A0xB
+       vmlal.u32       $A5xB,$Ni,${N2}[1]
+       vmov            $A0xB,$A1xB
+       vmlal.u32       $A6xB,$Ni,${N3}[0]
+       vmov            $A1xB,$A2xB
+       vmlal.u32       $A7xB,$Ni,${N3}[1]
+       vmov            $A2xB,$A3xB
+       vmov            $A3xB,$A4xB
+       vshr.u64        $temp,$temp,#16
+       vmov            $A4xB,$A5xB
+       vmov            $A5xB,$A6xB
+       vadd.u64        $temp,$temp,`&Dhi("$Temp")`
+       vmov            $A6xB,$A7xB
+       veor            $A7xB,$A7xB
+       vshr.u64        $temp,$temp,#16
+
+       bne     .LNEON_outer8
+
+       vadd.u64        `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp
+       mov             $toutptr,sp
+       vshr.u64        $temp,`&Dlo("$A0xB")`,#16
+       mov             $inner,$num
+       vadd.u64        `&Dhi("$A0xB")`,`&Dhi("$A0xB")`,$temp
+       add             $tinptr,sp,#16
+       vshr.u64        $temp,`&Dhi("$A0xB")`,#16
+       vzip.16         `&Dlo("$A0xB")`,`&Dhi("$A0xB")`
+
+       b       .LNEON_tail2
+
+.align 4
+.LNEON_1st:
+       vmlal.u32       $A0xB,$Ni,${N0}[0]
+        vld1.32        {$A0-$A3}, [$aptr]!
+       vmlal.u32       $A1xB,$Ni,${N0}[1]
+       subs            $inner,$inner,#8
+       vmlal.u32       $A2xB,$Ni,${N1}[0]
+       vmlal.u32       $A3xB,$Ni,${N1}[1]
+
+       vmlal.u32       $A4xB,$Ni,${N2}[0]
+        vld1.32        {$N0-$N1}, [$nptr]!
+       vmlal.u32       $A5xB,$Ni,${N2}[1]
+        vst1.64        {$A0xB-$A1xB}, [$toutptr,:256]!
+       vmlal.u32       $A6xB,$Ni,${N3}[0]
+       vmlal.u32       $A7xB,$Ni,${N3}[1]
+        vst1.64        {$A2xB-$A3xB}, [$toutptr,:256]!
+
+       vmull.u32       $A0xB,$Bi,${A0}[0]
+        vld1.32        {$N2-$N3}, [$nptr]!
+       vmull.u32       $A1xB,$Bi,${A0}[1]
+        vst1.64        {$A4xB-$A5xB}, [$toutptr,:256]!
+       vmull.u32       $A2xB,$Bi,${A1}[0]
+       vmull.u32       $A3xB,$Bi,${A1}[1]
+        vst1.64        {$A6xB-$A7xB}, [$toutptr,:256]!
+
+       vmull.u32       $A4xB,$Bi,${A2}[0]
+       vmull.u32       $A5xB,$Bi,${A2}[1]
+       vmull.u32       $A6xB,$Bi,${A3}[0]
+       vmull.u32       $A7xB,$Bi,${A3}[1]
+
+       bne     .LNEON_1st
+
+       vmlal.u32       $A0xB,$Ni,${N0}[0]
+       add             $tinptr,sp,#16
+       vmlal.u32       $A1xB,$Ni,${N0}[1]
+       sub             $aptr,$aptr,$num,lsl#2          @ rewind $aptr
+       vmlal.u32       $A2xB,$Ni,${N1}[0]
+        vld1.64        {$Temp}, [sp,:128]
+       vmlal.u32       $A3xB,$Ni,${N1}[1]
+       sub             $outer,$num,#1
+
+       vmlal.u32       $A4xB,$Ni,${N2}[0]
+       vst1.64         {$A0xB-$A1xB}, [$toutptr,:256]!
+       vmlal.u32       $A5xB,$Ni,${N2}[1]
+       vshr.u64        $temp,$temp,#16
+        vld1.64        {$A0xB},       [$tinptr, :128]!
+       vmlal.u32       $A6xB,$Ni,${N3}[0]
+       vst1.64         {$A2xB-$A3xB}, [$toutptr,:256]!
+       vmlal.u32       $A7xB,$Ni,${N3}[1]
+
+       vst1.64         {$A4xB-$A5xB}, [$toutptr,:256]!
+       vadd.u64        $temp,$temp,`&Dhi("$Temp")`
+       veor            $Z,$Z,$Z
+       vst1.64         {$A6xB-$A7xB}, [$toutptr,:256]!
+        vld1.64        {$A1xB-$A2xB}, [$tinptr, :256]!
+       vst1.64         {$Z},          [$toutptr,:128]
+       vshr.u64        $temp,$temp,#16
+
+       b               .LNEON_outer
+
+.align 4
+.LNEON_outer:
+       vld1.32         {${Bi}[0]}, [$bptr,:32]!
+       sub             $nptr,$nptr,$num,lsl#2          @ rewind $nptr
+       vld1.32         {$A0-$A3},  [$aptr]!
+       veor            $zero,$zero,$zero
+       mov             $toutptr,sp
+       vzip.16         $Bi,$zero
+       sub             $inner,$num,#8
+       vadd.u64        `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp
+
+       vmlal.u32       $A0xB,$Bi,${A0}[0]
+        vld1.64        {$A3xB-$A4xB},[$tinptr,:256]!
+       vmlal.u32       $A1xB,$Bi,${A0}[1]
+       vmlal.u32       $A2xB,$Bi,${A1}[0]
+        vld1.64        {$A5xB-$A6xB},[$tinptr,:256]!
+       vmlal.u32       $A3xB,$Bi,${A1}[1]
+
+       vshl.i64        $temp,`&Dhi("$A0xB")`,#16
+       veor            $zero,$zero,$zero
+       vadd.u64        $temp,$temp,`&Dlo("$A0xB")`
+        vld1.64        {$A7xB},[$tinptr,:128]!
+       vmul.u32        $Ni,$temp,$M0
+
+       vmlal.u32       $A4xB,$Bi,${A2}[0]
+        vld1.32        {$N0-$N3}, [$nptr]!
+       vmlal.u32       $A5xB,$Bi,${A2}[1]
+       vmlal.u32       $A6xB,$Bi,${A3}[0]
+       vzip.16         $Ni,$zero
+       vmlal.u32       $A7xB,$Bi,${A3}[1]
+
+.LNEON_inner:
+       vmlal.u32       $A0xB,$Ni,${N0}[0]
+        vld1.32        {$A0-$A3}, [$aptr]!
+       vmlal.u32       $A1xB,$Ni,${N0}[1]
+        subs           $inner,$inner,#8
+       vmlal.u32       $A2xB,$Ni,${N1}[0]
+       vmlal.u32       $A3xB,$Ni,${N1}[1]
+       vst1.64         {$A0xB-$A1xB}, [$toutptr,:256]!
+
+       vmlal.u32       $A4xB,$Ni,${N2}[0]
+        vld1.64        {$A0xB},       [$tinptr, :128]!
+       vmlal.u32       $A5xB,$Ni,${N2}[1]
+       vst1.64         {$A2xB-$A3xB}, [$toutptr,:256]!
+       vmlal.u32       $A6xB,$Ni,${N3}[0]
+        vld1.64        {$A1xB-$A2xB}, [$tinptr, :256]!
+       vmlal.u32       $A7xB,$Ni,${N3}[1]
+       vst1.64         {$A4xB-$A5xB}, [$toutptr,:256]!
+
+       vmlal.u32       $A0xB,$Bi,${A0}[0]
+        vld1.64        {$A3xB-$A4xB}, [$tinptr, :256]!
+       vmlal.u32       $A1xB,$Bi,${A0}[1]
+       vst1.64         {$A6xB-$A7xB}, [$toutptr,:256]!
+       vmlal.u32       $A2xB,$Bi,${A1}[0]
+        vld1.64        {$A5xB-$A6xB}, [$tinptr, :256]!
+       vmlal.u32       $A3xB,$Bi,${A1}[1]
+        vld1.32        {$N0-$N3}, [$nptr]!
+
+       vmlal.u32       $A4xB,$Bi,${A2}[0]
+        vld1.64        {$A7xB},       [$tinptr, :128]!
+       vmlal.u32       $A5xB,$Bi,${A2}[1]
+       vmlal.u32       $A6xB,$Bi,${A3}[0]
+       vmlal.u32       $A7xB,$Bi,${A3}[1]
+
+       bne     .LNEON_inner
+
+       vmlal.u32       $A0xB,$Ni,${N0}[0]
+       add             $tinptr,sp,#16
+       vmlal.u32       $A1xB,$Ni,${N0}[1]
+       sub             $aptr,$aptr,$num,lsl#2          @ rewind $aptr
+       vmlal.u32       $A2xB,$Ni,${N1}[0]
+        vld1.64        {$Temp}, [sp,:128]
+       vmlal.u32       $A3xB,$Ni,${N1}[1]
+       subs            $outer,$outer,#1
+
+       vmlal.u32       $A4xB,$Ni,${N2}[0]
+       vst1.64         {$A0xB-$A1xB}, [$toutptr,:256]!
+       vmlal.u32       $A5xB,$Ni,${N2}[1]
+        vld1.64        {$A0xB},       [$tinptr, :128]!
+       vshr.u64        $temp,$temp,#16
+       vst1.64         {$A2xB-$A3xB}, [$toutptr,:256]!
+       vmlal.u32       $A6xB,$Ni,${N3}[0]
+        vld1.64        {$A1xB-$A2xB}, [$tinptr, :256]!
+       vmlal.u32       $A7xB,$Ni,${N3}[1]
+
+       vst1.64         {$A4xB-$A5xB}, [$toutptr,:256]!
+       vadd.u64        $temp,$temp,`&Dhi("$Temp")`
+       vst1.64         {$A6xB-$A7xB}, [$toutptr,:256]!
+       vshr.u64        $temp,$temp,#16
+
+       bne     .LNEON_outer
+
+       mov             $toutptr,sp
+       mov             $inner,$num
+
+.LNEON_tail:
+       vadd.u64        `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp
+       vld1.64         {$A3xB-$A4xB}, [$tinptr, :256]!
+       vshr.u64        $temp,`&Dlo("$A0xB")`,#16
+       vadd.u64        `&Dhi("$A0xB")`,`&Dhi("$A0xB")`,$temp
+       vld1.64         {$A5xB-$A6xB}, [$tinptr, :256]!
+       vshr.u64        $temp,`&Dhi("$A0xB")`,#16
+       vld1.64         {$A7xB},       [$tinptr, :128]!
+       vzip.16         `&Dlo("$A0xB")`,`&Dhi("$A0xB")`
+
+.LNEON_tail2:
+       vadd.u64        `&Dlo("$A1xB")`,`&Dlo("$A1xB")`,$temp
+       vst1.32         {`&Dlo("$A0xB")`[0]}, [$toutptr, :32]!
+       vshr.u64        $temp,`&Dlo("$A1xB")`,#16
+       vadd.u64        `&Dhi("$A1xB")`,`&Dhi("$A1xB")`,$temp
+       vshr.u64        $temp,`&Dhi("$A1xB")`,#16
+       vzip.16         `&Dlo("$A1xB")`,`&Dhi("$A1xB")`
+
+       vadd.u64        `&Dlo("$A2xB")`,`&Dlo("$A2xB")`,$temp
+       vst1.32         {`&Dlo("$A1xB")`[0]}, [$toutptr, :32]!
+       vshr.u64        $temp,`&Dlo("$A2xB")`,#16
+       vadd.u64        `&Dhi("$A2xB")`,`&Dhi("$A2xB")`,$temp
+       vshr.u64        $temp,`&Dhi("$A2xB")`,#16
+       vzip.16         `&Dlo("$A2xB")`,`&Dhi("$A2xB")`
+
+       vadd.u64        `&Dlo("$A3xB")`,`&Dlo("$A3xB")`,$temp
+       vst1.32         {`&Dlo("$A2xB")`[0]}, [$toutptr, :32]!
+       vshr.u64        $temp,`&Dlo("$A3xB")`,#16
+       vadd.u64        `&Dhi("$A3xB")`,`&Dhi("$A3xB")`,$temp
+       vshr.u64        $temp,`&Dhi("$A3xB")`,#16
+       vzip.16         `&Dlo("$A3xB")`,`&Dhi("$A3xB")`
+
+       vadd.u64        `&Dlo("$A4xB")`,`&Dlo("$A4xB")`,$temp
+       vst1.32         {`&Dlo("$A3xB")`[0]}, [$toutptr, :32]!
+       vshr.u64        $temp,`&Dlo("$A4xB")`,#16
+       vadd.u64        `&Dhi("$A4xB")`,`&Dhi("$A4xB")`,$temp
+       vshr.u64        $temp,`&Dhi("$A4xB")`,#16
+       vzip.16         `&Dlo("$A4xB")`,`&Dhi("$A4xB")`
+
+       vadd.u64        `&Dlo("$A5xB")`,`&Dlo("$A5xB")`,$temp
+       vst1.32         {`&Dlo("$A4xB")`[0]}, [$toutptr, :32]!
+       vshr.u64        $temp,`&Dlo("$A5xB")`,#16
+       vadd.u64        `&Dhi("$A5xB")`,`&Dhi("$A5xB")`,$temp
+       vshr.u64        $temp,`&Dhi("$A5xB")`,#16
+       vzip.16         `&Dlo("$A5xB")`,`&Dhi("$A5xB")`
+
+       vadd.u64        `&Dlo("$A6xB")`,`&Dlo("$A6xB")`,$temp
+       vst1.32         {`&Dlo("$A5xB")`[0]}, [$toutptr, :32]!
+       vshr.u64        $temp,`&Dlo("$A6xB")`,#16
+       vadd.u64        `&Dhi("$A6xB")`,`&Dhi("$A6xB")`,$temp
+       vld1.64         {$A0xB}, [$tinptr, :128]!
+       vshr.u64        $temp,`&Dhi("$A6xB")`,#16
+       vzip.16         `&Dlo("$A6xB")`,`&Dhi("$A6xB")`
+
+       vadd.u64        `&Dlo("$A7xB")`,`&Dlo("$A7xB")`,$temp
+       vst1.32         {`&Dlo("$A6xB")`[0]}, [$toutptr, :32]!
+       vshr.u64        $temp,`&Dlo("$A7xB")`,#16
+       vadd.u64        `&Dhi("$A7xB")`,`&Dhi("$A7xB")`,$temp
+       vld1.64         {$A1xB-$A2xB},  [$tinptr, :256]!
+       vshr.u64        $temp,`&Dhi("$A7xB")`,#16
+       vzip.16         `&Dlo("$A7xB")`,`&Dhi("$A7xB")`
+       subs            $inner,$inner,#8
+       vst1.32         {`&Dlo("$A7xB")`[0]}, [$toutptr, :32]!
+
+       bne     .LNEON_tail
+
+       vst1.32 {${temp}[0]}, [$toutptr, :32]           @ top-most bit
+       sub     $nptr,$nptr,$num,lsl#2                  @ rewind $nptr
+       subs    $aptr,sp,#0                             @ clear carry flag
+       add     $bptr,sp,$num,lsl#2
+
+.LNEON_sub:
+       ldmia   $aptr!, {r4-r7}
+       ldmia   $nptr!, {r8-r11}
+       sbcs    r8, r4,r8
+       sbcs    r9, r5,r9
+       sbcs    r10,r6,r10
+       sbcs    r11,r7,r11
+       teq     $aptr,$bptr                             @ preserves carry
+       stmia   $rptr!, {r8-r11}
+       bne     .LNEON_sub
+
+       ldr     r10, [$aptr]                            @ load top-most bit
+       veor    q0,q0,q0
+       sub     r11,$bptr,sp                            @ this is num*4
+       veor    q1,q1,q1
+       mov     $aptr,sp
+       sub     $rptr,$rptr,r11                         @ rewind $rptr
+       mov     $nptr,$bptr                             @ second 3/4th of frame
+       sbcs    r10,r10,#0                              @ result is carry flag
+
+.LNEON_copy_n_zap:
+       ldmia   $aptr!, {r4-r7}
+       ldmia   $rptr,  {r8-r11}
+       movcc   r8, r4
+       vst1.64 {q0-q1}, [$nptr,:256]!                  @ wipe
+       movcc   r9, r5
+       movcc   r10,r6
+       vst1.64 {q0-q1}, [$nptr,:256]!                  @ wipe
+       movcc   r11,r7
+       ldmia   $aptr, {r4-r7}
+       stmia   $rptr!, {r8-r11}
+       sub     $aptr,$aptr,#16
+       ldmia   $rptr, {r8-r11}
+       movcc   r8, r4
+       vst1.64 {q0-q1}, [$aptr,:256]!                  @ wipe
+       movcc   r9, r5
+       movcc   r10,r6
+       vst1.64 {q0-q1}, [$nptr,:256]!                  @ wipe
+       movcc   r11,r7
+       teq     $aptr,$bptr                             @ preserves carry
+       stmia   $rptr!, {r8-r11}
+       bne     .LNEON_copy_n_zap
+
+       sub     sp,ip,#96
+        vldmia  sp!,{d8-d15}
+        ldmia   sp!,{r4-r11}
+       ret                                             @ bx lr
+.size  bn_mul8x_mont_neon,.-bn_mul8x_mont_neon
+#endif
+___
+}
+$code.=<<___;
+.asciz "Montgomery multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
 .align 2
+#if __ARM_MAX_ARCH__>=7
+.comm  OPENSSL_armcap_P,4,4
+#endif
 ___
 
+$code =~ s/\`([^\`]*)\`/eval $1/gem;
 $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;   # make it possible to compile with -march=armv4
+$code =~ s/\bret\b/bx  lr/gm;
 print $code;
 close STDOUT;
index caae04e..a33cdf4 100644 (file)
@@ -46,7 +46,7 @@
 # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
 # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
 #
-$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64
+$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
 
 if ($flavour =~ /64|n32/i) {
        $PTR_ADD="dadd";        # incidentally works even on n32
index 215c9a7..acafde5 100644 (file)
@@ -48,7 +48,7 @@
 # has to content with 40-85% improvement depending on benchmark and
 # key length, more for longer keys.
 
-$flavour = shift;
+$flavour = shift || "o32";
 while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
 open STDOUT,">$output";
 
diff --git a/crypto/bn/asm/mips3.s b/crypto/bn/asm/mips3.s
new file mode 100644 (file)
index 0000000..dca4105
--- /dev/null
@@ -0,0 +1,2201 @@
+.rdata
+.asciiz        "mips3.s, Version 1.1"
+.asciiz        "MIPS III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
+
+/*
+ * ====================================================================
+ * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+ * project.
+ *
+ * Rights for redistribution and usage in source and binary forms are
+ * granted according to the OpenSSL license. Warranty of any kind is
+ * disclaimed.
+ * ====================================================================
+ */
+
+/*
+ * This is my modest contributon to the OpenSSL project (see
+ * http://www.openssl.org/ for more information about it) and is
+ * a drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c
+ * module. For updates see http://fy.chalmers.se/~appro/hpe/.
+ *
+ * The module is designed to work with either of the "new" MIPS ABI(5),
+ * namely N32 or N64, offered by IRIX 6.x. It's not ment to work under
+ * IRIX 5.x not only because it doesn't support new ABIs but also
+ * because 5.x kernels put R4x00 CPU into 32-bit mode and all those
+ * 64-bit instructions (daddu, dmultu, etc.) found below gonna only
+ * cause illegal instruction exception:-(
+ *
+ * In addition the code depends on preprocessor flags set up by MIPSpro
+ * compiler driver (either as or cc) and therefore (probably?) can't be
+ * compiled by the GNU assembler. GNU C driver manages fine though...
+ * I mean as long as -mmips-as is specified or is the default option,
+ * because then it simply invokes /usr/bin/as which in turn takes
+ * perfect care of the preprocessor definitions. Another neat feature
+ * offered by the MIPSpro assembler is an optimization pass. This gave
+ * me the opportunity to have the code looking more regular as all those
+ * architecture dependent instruction rescheduling details were left to
+ * the assembler. Cool, huh?
+ *
+ * Performance improvement is astonishing! 'apps/openssl speed rsa dsa'
+ * goes way over 3 times faster!
+ *
+ *                                     <appro@fy.chalmers.se>
+ */
+#include <asm.h>
+#include <regdef.h>
+
+#if _MIPS_ISA>=4
+#define        MOVNZ(cond,dst,src)     \
+       movn    dst,src,cond
+#else
+#define        MOVNZ(cond,dst,src)     \
+       .set    noreorder;      \
+       bnezl   cond,.+8;       \
+       move    dst,src;        \
+       .set    reorder
+#endif
+
+.text
+
+.set   noat
+.set   reorder
+
+#define        MINUS4  v1
+
+.align 5
+LEAF(bn_mul_add_words)
+       .set    noreorder
+       bgtzl   a2,.L_bn_mul_add_words_proceed
+       ld      t0,0(a1)
+       jr      ra
+       move    v0,zero
+       .set    reorder
+
+.L_bn_mul_add_words_proceed:
+       li      MINUS4,-4
+       and     ta0,a2,MINUS4
+       move    v0,zero
+       beqz    ta0,.L_bn_mul_add_words_tail
+
+.L_bn_mul_add_words_loop:
+       dmultu  t0,a3
+       ld      t1,0(a0)
+       ld      t2,8(a1)
+       ld      t3,8(a0)
+       ld      ta0,16(a1)
+       ld      ta1,16(a0)
+       daddu   t1,v0
+       sltu    v0,t1,v0        /* All manuals say it "compares 32-bit
+                                * values", but it seems to work fine
+                                * even on 64-bit registers. */
+       mflo    AT
+       mfhi    t0
+       daddu   t1,AT
+       daddu   v0,t0
+       sltu    AT,t1,AT
+       sd      t1,0(a0)
+       daddu   v0,AT
+
+       dmultu  t2,a3
+       ld      ta2,24(a1)
+       ld      ta3,24(a0)
+       daddu   t3,v0
+       sltu    v0,t3,v0
+       mflo    AT
+       mfhi    t2
+       daddu   t3,AT
+       daddu   v0,t2
+       sltu    AT,t3,AT
+       sd      t3,8(a0)
+       daddu   v0,AT
+
+       dmultu  ta0,a3
+       subu    a2,4
+       PTR_ADD a0,32
+       PTR_ADD a1,32
+       daddu   ta1,v0
+       sltu    v0,ta1,v0
+       mflo    AT
+       mfhi    ta0
+       daddu   ta1,AT
+       daddu   v0,ta0
+       sltu    AT,ta1,AT
+       sd      ta1,-16(a0)
+       daddu   v0,AT
+
+
+       dmultu  ta2,a3
+       and     ta0,a2,MINUS4
+       daddu   ta3,v0
+       sltu    v0,ta3,v0
+       mflo    AT
+       mfhi    ta2
+       daddu   ta3,AT
+       daddu   v0,ta2
+       sltu    AT,ta3,AT
+       sd      ta3,-8(a0)
+       daddu   v0,AT
+       .set    noreorder
+       bgtzl   ta0,.L_bn_mul_add_words_loop
+       ld      t0,0(a1)
+
+       bnezl   a2,.L_bn_mul_add_words_tail
+       ld      t0,0(a1)
+       .set    reorder
+
+.L_bn_mul_add_words_return:
+       jr      ra
+
+.L_bn_mul_add_words_tail:
+       dmultu  t0,a3
+       ld      t1,0(a0)
+       subu    a2,1
+       daddu   t1,v0
+       sltu    v0,t1,v0
+       mflo    AT
+       mfhi    t0
+       daddu   t1,AT
+       daddu   v0,t0
+       sltu    AT,t1,AT
+       sd      t1,0(a0)
+       daddu   v0,AT
+       beqz    a2,.L_bn_mul_add_words_return
+
+       ld      t0,8(a1)
+       dmultu  t0,a3
+       ld      t1,8(a0)
+       subu    a2,1
+       daddu   t1,v0
+       sltu    v0,t1,v0
+       mflo    AT
+       mfhi    t0
+       daddu   t1,AT
+       daddu   v0,t0
+       sltu    AT,t1,AT
+       sd      t1,8(a0)
+       daddu   v0,AT
+       beqz    a2,.L_bn_mul_add_words_return
+
+       ld      t0,16(a1)
+       dmultu  t0,a3
+       ld      t1,16(a0)
+       daddu   t1,v0
+       sltu    v0,t1,v0
+       mflo    AT
+       mfhi    t0
+       daddu   t1,AT
+       daddu   v0,t0
+       sltu    AT,t1,AT
+       sd      t1,16(a0)
+       daddu   v0,AT
+       jr      ra
+END(bn_mul_add_words)
+
+.align 5
+LEAF(bn_mul_words)
+       .set    noreorder
+       bgtzl   a2,.L_bn_mul_words_proceed
+       ld      t0,0(a1)
+       jr      ra
+       move    v0,zero
+       .set    reorder
+
+.L_bn_mul_words_proceed:
+       li      MINUS4,-4
+       and     ta0,a2,MINUS4
+       move    v0,zero
+       beqz    ta0,.L_bn_mul_words_tail
+
+.L_bn_mul_words_loop:
+       dmultu  t0,a3
+       ld      t2,8(a1)
+       ld      ta0,16(a1)
+       ld      ta2,24(a1)
+       mflo    AT
+       mfhi    t0
+       daddu   v0,AT
+       sltu    t1,v0,AT
+       sd      v0,0(a0)
+       daddu   v0,t1,t0
+
+       dmultu  t2,a3
+       subu    a2,4
+       PTR_ADD a0,32
+       PTR_ADD a1,32
+       mflo    AT
+       mfhi    t2
+       daddu   v0,AT
+       sltu    t3,v0,AT
+       sd      v0,-24(a0)
+       daddu   v0,t3,t2
+
+       dmultu  ta0,a3
+       mflo    AT
+       mfhi    ta0
+       daddu   v0,AT
+       sltu    ta1,v0,AT
+       sd      v0,-16(a0)
+       daddu   v0,ta1,ta0
+
+
+       dmultu  ta2,a3
+       and     ta0,a2,MINUS4
+       mflo    AT
+       mfhi    ta2
+       daddu   v0,AT
+       sltu    ta3,v0,AT
+       sd      v0,-8(a0)
+       daddu   v0,ta3,ta2
+       .set    noreorder
+       bgtzl   ta0,.L_bn_mul_words_loop
+       ld      t0,0(a1)
+
+       bnezl   a2,.L_bn_mul_words_tail
+       ld      t0,0(a1)
+       .set    reorder
+
+.L_bn_mul_words_return:
+       jr      ra
+
+.L_bn_mul_words_tail:
+       dmultu  t0,a3
+       subu    a2,1
+       mflo    AT
+       mfhi    t0
+       daddu   v0,AT
+       sltu    t1,v0,AT
+       sd      v0,0(a0)
+       daddu   v0,t1,t0
+       beqz    a2,.L_bn_mul_words_return
+
+       ld      t0,8(a1)
+       dmultu  t0,a3
+       subu    a2,1
+       mflo    AT
+       mfhi    t0
+       daddu   v0,AT
+       sltu    t1,v0,AT
+       sd      v0,8(a0)
+       daddu   v0,t1,t0
+       beqz    a2,.L_bn_mul_words_return
+
+       ld      t0,16(a1)
+       dmultu  t0,a3
+       mflo    AT
+       mfhi    t0
+       daddu   v0,AT
+       sltu    t1,v0,AT
+       sd      v0,16(a0)
+       daddu   v0,t1,t0
+       jr      ra
+END(bn_mul_words)
+
+.align 5
+LEAF(bn_sqr_words)
+       .set    noreorder
+       bgtzl   a2,.L_bn_sqr_words_proceed
+       ld      t0,0(a1)
+       jr      ra
+       move    v0,zero
+       .set    reorder
+
+.L_bn_sqr_words_proceed:
+       li      MINUS4,-4
+       and     ta0,a2,MINUS4
+       move    v0,zero
+       beqz    ta0,.L_bn_sqr_words_tail
+
+.L_bn_sqr_words_loop:
+       dmultu  t0,t0
+       ld      t2,8(a1)
+       ld      ta0,16(a1)
+       ld      ta2,24(a1)
+       mflo    t1
+       mfhi    t0
+       sd      t1,0(a0)
+       sd      t0,8(a0)
+
+       dmultu  t2,t2
+       subu    a2,4
+       PTR_ADD a0,64
+       PTR_ADD a1,32
+       mflo    t3
+       mfhi    t2
+       sd      t3,-48(a0)
+       sd      t2,-40(a0)
+
+       dmultu  ta0,ta0
+       mflo    ta1
+       mfhi    ta0
+       sd      ta1,-32(a0)
+       sd      ta0,-24(a0)
+
+
+       dmultu  ta2,ta2
+       and     ta0,a2,MINUS4
+       mflo    ta3
+       mfhi    ta2
+       sd      ta3,-16(a0)
+       sd      ta2,-8(a0)
+
+       .set    noreorder
+       bgtzl   ta0,.L_bn_sqr_words_loop
+       ld      t0,0(a1)
+
+       bnezl   a2,.L_bn_sqr_words_tail
+       ld      t0,0(a1)
+       .set    reorder
+
+.L_bn_sqr_words_return:
+       move    v0,zero
+       jr      ra
+
+.L_bn_sqr_words_tail:
+       dmultu  t0,t0
+       subu    a2,1
+       mflo    t1
+       mfhi    t0
+       sd      t1,0(a0)
+       sd      t0,8(a0)
+       beqz    a2,.L_bn_sqr_words_return
+
+       ld      t0,8(a1)
+       dmultu  t0,t0
+       subu    a2,1
+       mflo    t1
+       mfhi    t0
+       sd      t1,16(a0)
+       sd      t0,24(a0)
+       beqz    a2,.L_bn_sqr_words_return
+
+       ld      t0,16(a1)
+       dmultu  t0,t0
+       mflo    t1
+       mfhi    t0
+       sd      t1,32(a0)
+       sd      t0,40(a0)
+       jr      ra
+END(bn_sqr_words)
+
+.align 5
+LEAF(bn_add_words)
+       .set    noreorder
+       bgtzl   a3,.L_bn_add_words_proceed
+       ld      t0,0(a1)
+       jr      ra
+       move    v0,zero
+       .set    reorder
+
+.L_bn_add_words_proceed:
+       li      MINUS4,-4
+       and     AT,a3,MINUS4
+       move    v0,zero
+       beqz    AT,.L_bn_add_words_tail
+
+.L_bn_add_words_loop:
+       ld      ta0,0(a2)
+       subu    a3,4
+       ld      t1,8(a1)
+       and     AT,a3,MINUS4
+       ld      t2,16(a1)
+       PTR_ADD a2,32
+       ld      t3,24(a1)
+       PTR_ADD a0,32
+       ld      ta1,-24(a2)
+       PTR_ADD a1,32
+       ld      ta2,-16(a2)
+       ld      ta3,-8(a2)
+       daddu   ta0,t0
+       sltu    t8,ta0,t0
+       daddu   t0,ta0,v0
+       sltu    v0,t0,ta0
+       sd      t0,-32(a0)
+       daddu   v0,t8
+
+       daddu   ta1,t1
+       sltu    t9,ta1,t1
+       daddu   t1,ta1,v0
+       sltu    v0,t1,ta1
+       sd      t1,-24(a0)
+       daddu   v0,t9
+
+       daddu   ta2,t2
+       sltu    t8,ta2,t2
+       daddu   t2,ta2,v0
+       sltu    v0,t2,ta2
+       sd      t2,-16(a0)
+       daddu   v0,t8
+       
+       daddu   ta3,t3
+       sltu    t9,ta3,t3
+       daddu   t3,ta3,v0
+       sltu    v0,t3,ta3
+       sd      t3,-8(a0)
+       daddu   v0,t9
+       
+       .set    noreorder
+       bgtzl   AT,.L_bn_add_words_loop
+       ld      t0,0(a1)
+
+       bnezl   a3,.L_bn_add_words_tail
+       ld      t0,0(a1)
+       .set    reorder
+
+.L_bn_add_words_return:
+       jr      ra
+
+.L_bn_add_words_tail:
+       ld      ta0,0(a2)
+       daddu   ta0,t0
+       subu    a3,1
+       sltu    t8,ta0,t0
+       daddu   t0,ta0,v0
+       sltu    v0,t0,ta0
+       sd      t0,0(a0)
+       daddu   v0,t8
+       beqz    a3,.L_bn_add_words_return
+
+       ld      t1,8(a1)
+       ld      ta1,8(a2)
+       daddu   ta1,t1
+       subu    a3,1
+       sltu    t9,ta1,t1
+       daddu   t1,ta1,v0
+       sltu    v0,t1,ta1
+       sd      t1,8(a0)
+       daddu   v0,t9
+       beqz    a3,.L_bn_add_words_return
+
+       ld      t2,16(a1)
+       ld      ta2,16(a2)
+       daddu   ta2,t2
+       sltu    t8,ta2,t2
+       daddu   t2,ta2,v0
+       sltu    v0,t2,ta2
+       sd      t2,16(a0)
+       daddu   v0,t8
+       jr      ra
+END(bn_add_words)
+
+.align 5
+LEAF(bn_sub_words)
+       .set    noreorder
+       bgtzl   a3,.L_bn_sub_words_proceed
+       ld      t0,0(a1)
+       jr      ra
+       move    v0,zero
+       .set    reorder
+
+.L_bn_sub_words_proceed:
+       li      MINUS4,-4
+       and     AT,a3,MINUS4
+       move    v0,zero
+       beqz    AT,.L_bn_sub_words_tail
+
+.L_bn_sub_words_loop:
+       ld      ta0,0(a2)
+       subu    a3,4
+       ld      t1,8(a1)
+       and     AT,a3,MINUS4
+       ld      t2,16(a1)
+       PTR_ADD a2,32
+       ld      t3,24(a1)
+       PTR_ADD a0,32
+       ld      ta1,-24(a2)
+       PTR_ADD a1,32
+       ld      ta2,-16(a2)
+       ld      ta3,-8(a2)
+       sltu    t8,t0,ta0
+       dsubu   t0,ta0
+       dsubu   ta0,t0,v0
+       sd      ta0,-32(a0)
+       MOVNZ   (t0,v0,t8)
+
+       sltu    t9,t1,ta1
+       dsubu   t1,ta1
+       dsubu   ta1,t1,v0
+       sd      ta1,-24(a0)
+       MOVNZ   (t1,v0,t9)
+
+
+       sltu    t8,t2,ta2
+       dsubu   t2,ta2
+       dsubu   ta2,t2,v0
+       sd      ta2,-16(a0)
+       MOVNZ   (t2,v0,t8)
+
+       sltu    t9,t3,ta3
+       dsubu   t3,ta3
+       dsubu   ta3,t3,v0
+       sd      ta3,-8(a0)
+       MOVNZ   (t3,v0,t9)
+
+       .set    noreorder
+       bgtzl   AT,.L_bn_sub_words_loop
+       ld      t0,0(a1)
+
+       bnezl   a3,.L_bn_sub_words_tail
+       ld      t0,0(a1)
+       .set    reorder
+
+.L_bn_sub_words_return:
+       jr      ra
+
+.L_bn_sub_words_tail:
+       ld      ta0,0(a2)
+       subu    a3,1
+       sltu    t8,t0,ta0
+       dsubu   t0,ta0
+       dsubu   ta0,t0,v0
+       MOVNZ   (t0,v0,t8)
+       sd      ta0,0(a0)
+       beqz    a3,.L_bn_sub_words_return
+
+       ld      t1,8(a1)
+       subu    a3,1
+       ld      ta1,8(a2)
+       sltu    t9,t1,ta1
+       dsubu   t1,ta1
+       dsubu   ta1,t1,v0
+       MOVNZ   (t1,v0,t9)
+       sd      ta1,8(a0)
+       beqz    a3,.L_bn_sub_words_return
+
+       ld      t2,16(a1)
+       ld      ta2,16(a2)
+       sltu    t8,t2,ta2
+       dsubu   t2,ta2
+       dsubu   ta2,t2,v0
+       MOVNZ   (t2,v0,t8)
+       sd      ta2,16(a0)
+       jr      ra
+END(bn_sub_words)
+
+#undef MINUS4
+
+.align 5
+LEAF(bn_div_3_words)
+       .set    reorder
+       move    a3,a0           /* we know that bn_div_words doesn't
+                                * touch a3, ta2, ta3 and preserves a2
+                                * so that we can save two arguments
+                                * and return address in registers
+                                * instead of stack:-)
+                                */
+       ld      a0,(a3)
+       move    ta2,a1
+       ld      a1,-8(a3)
+       bne     a0,a2,.L_bn_div_3_words_proceed
+       li      v0,-1
+       jr      ra
+.L_bn_div_3_words_proceed:
+       move    ta3,ra
+       bal     bn_div_words
+       move    ra,ta3
+       dmultu  ta2,v0
+       ld      t2,-16(a3)
+       move    ta0,zero
+       mfhi    t1
+       mflo    t0
+       sltu    t8,t1,v1
+.L_bn_div_3_words_inner_loop:
+       bnez    t8,.L_bn_div_3_words_inner_loop_done
+       sgeu    AT,t2,t0
+       seq     t9,t1,v1
+       and     AT,t9
+       sltu    t3,t0,ta2
+       daddu   v1,a2
+       dsubu   t1,t3
+       dsubu   t0,ta2
+       sltu    t8,t1,v1
+       sltu    ta0,v1,a2
+       or      t8,ta0
+       .set    noreorder
+       beqzl   AT,.L_bn_div_3_words_inner_loop
+       dsubu   v0,1
+       .set    reorder
+.L_bn_div_3_words_inner_loop_done:
+       jr      ra
+END(bn_div_3_words)
+
+.align 5
+LEAF(bn_div_words)
+       .set    noreorder
+       bnezl   a2,.L_bn_div_words_proceed
+       move    v1,zero
+       jr      ra
+       li      v0,-1           /* I'd rather signal div-by-zero
+                                * which can be done with 'break 7' */
+
+.L_bn_div_words_proceed:
+       bltz    a2,.L_bn_div_words_body
+       move    t9,v1
+       dsll    a2,1
+       bgtz    a2,.-4
+       addu    t9,1
+
+       .set    reorder
+       negu    t1,t9
+       li      t2,-1
+       dsll    t2,t1
+       and     t2,a0
+       dsrl    AT,a1,t1
+       .set    noreorder
+       bnezl   t2,.+8
+       break   6               /* signal overflow */
+       .set    reorder
+       dsll    a0,t9
+       dsll    a1,t9
+       or      a0,AT
+
+#define        QT      ta0
+#define        HH      ta1
+#define        DH      v1
+.L_bn_div_words_body:
+       dsrl    DH,a2,32
+       sgeu    AT,a0,a2
+       .set    noreorder
+       bnezl   AT,.+8
+       dsubu   a0,a2
+       .set    reorder
+
+       li      QT,-1
+       dsrl    HH,a0,32
+       dsrl    QT,32   /* q=0xffffffff */
+       beq     DH,HH,.L_bn_div_words_skip_div1
+       ddivu   zero,a0,DH
+       mflo    QT
+.L_bn_div_words_skip_div1:
+       dmultu  a2,QT
+       dsll    t3,a0,32
+       dsrl    AT,a1,32
+       or      t3,AT
+       mflo    t0
+       mfhi    t1
+.L_bn_div_words_inner_loop1:
+       sltu    t2,t3,t0
+       seq     t8,HH,t1
+       sltu    AT,HH,t1
+       and     t2,t8
+       sltu    v0,t0,a2
+       or      AT,t2
+       .set    noreorder
+       beqz    AT,.L_bn_div_words_inner_loop1_done
+       dsubu   t1,v0
+       dsubu   t0,a2
+       b       .L_bn_div_words_inner_loop1
+       dsubu   QT,1
+       .set    reorder
+.L_bn_div_words_inner_loop1_done:
+
+       dsll    a1,32
+       dsubu   a0,t3,t0
+       dsll    v0,QT,32
+
+       li      QT,-1
+       dsrl    HH,a0,32
+       dsrl    QT,32   /* q=0xffffffff */
+       beq     DH,HH,.L_bn_div_words_skip_div2
+       ddivu   zero,a0,DH
+       mflo    QT
+.L_bn_div_words_skip_div2:
+#undef DH
+       dmultu  a2,QT
+       dsll    t3,a0,32
+       dsrl    AT,a1,32
+       or      t3,AT
+       mflo    t0
+       mfhi    t1
+.L_bn_div_words_inner_loop2:
+       sltu    t2,t3,t0
+       seq     t8,HH,t1
+       sltu    AT,HH,t1
+       and     t2,t8
+       sltu    v1,t0,a2
+       or      AT,t2
+       .set    noreorder
+       beqz    AT,.L_bn_div_words_inner_loop2_done
+       dsubu   t1,v1
+       dsubu   t0,a2
+       b       .L_bn_div_words_inner_loop2
+       dsubu   QT,1
+       .set    reorder
+.L_bn_div_words_inner_loop2_done:      
+#undef HH
+
+       dsubu   a0,t3,t0
+       or      v0,QT
+       dsrl    v1,a0,t9        /* v1 contains remainder if anybody wants it */
+       dsrl    a2,t9           /* restore a2 */
+       jr      ra
+#undef QT
+END(bn_div_words)
+
+#define        a_0     t0
+#define        a_1     t1
+#define        a_2     t2
+#define        a_3     t3
+#define        b_0     ta0
+#define        b_1     ta1
+#define        b_2     ta2
+#define        b_3     ta3
+
+#define        a_4     s0
+#define        a_5     s2
+#define        a_6     s4
+#define        a_7     a1      /* once we load a[7] we don't need a anymore */
+#define        b_4     s1
+#define        b_5     s3
+#define        b_6     s5
+#define        b_7     a2      /* once we load b[7] we don't need b anymore */
+
+#define        t_1     t8
+#define        t_2     t9
+
+#define        c_1     v0
+#define        c_2     v1
+#define        c_3     a3
+
+#define        FRAME_SIZE      48
+
+.align 5
+LEAF(bn_mul_comba8)
+       .set    noreorder
+       PTR_SUB sp,FRAME_SIZE
+       .frame  sp,64,ra
+       .set    reorder
+       ld      a_0,0(a1)       /* If compiled with -mips3 option on
+                                * R5000 box assembler barks on this
+                                * line with "shouldn't have mult/div
+                                * as last instruction in bb (R10K
+                                * bug)" warning. If anybody out there
+                                * has a clue about how to circumvent
+                                * this do send me a note.
+                                *              <appro@fy.chalmers.se>
+                                */
+       ld      b_0,0(a2)
+       ld      a_1,8(a1)
+       ld      a_2,16(a1)
+       ld      a_3,24(a1)
+       ld      b_1,8(a2)
+       ld      b_2,16(a2)
+       ld      b_3,24(a2)
+       dmultu  a_0,b_0         /* mul_add_c(a[0],b[0],c1,c2,c3); */
+       sd      s0,0(sp)
+       sd      s1,8(sp)
+       sd      s2,16(sp)
+       sd      s3,24(sp)
+       sd      s4,32(sp)
+       sd      s5,40(sp)
+       mflo    c_1
+       mfhi    c_2
+
+       dmultu  a_0,b_1         /* mul_add_c(a[0],b[1],c2,c3,c1); */
+       ld      a_4,32(a1)
+       ld      a_5,40(a1)
+       ld      a_6,48(a1)
+       ld      a_7,56(a1)
+       ld      b_4,32(a2)
+       ld      b_5,40(a2)
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   c_3,t_2,AT
+       dmultu  a_1,b_0         /* mul_add_c(a[1],b[0],c2,c3,c1); */
+       ld      b_6,48(a2)
+       ld      b_7,56(a2)
+       sd      c_1,0(a0)       /* r[0]=c1; */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    c_1,c_3,t_2
+       sd      c_2,8(a0)       /* r[1]=c2; */
+
+       dmultu  a_2,b_0         /* mul_add_c(a[2],b[0],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       dmultu  a_1,b_1         /* mul_add_c(a[1],b[1],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    c_2,c_1,t_2
+       dmultu  a_0,b_2         /* mul_add_c(a[0],b[2],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    AT,c_1,t_2
+       daddu   c_2,AT
+       sd      c_3,16(a0)      /* r[2]=c3; */
+
+       dmultu  a_0,b_3         /* mul_add_c(a[0],b[3],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    c_3,c_2,t_2
+       dmultu  a_1,b_2         /* mul_add_c(a[1],b[2],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    AT,c_2,t_2
+       daddu   c_3,AT
+       dmultu  a_2,b_1         /* mul_add_c(a[2],b[1],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    AT,c_2,t_2
+       daddu   c_3,AT
+       dmultu  a_3,b_0         /* mul_add_c(a[3],b[0],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    AT,c_2,t_2
+       daddu   c_3,AT
+       sd      c_1,24(a0)      /* r[3]=c1; */
+
+       dmultu  a_4,b_0         /* mul_add_c(a[4],b[0],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    c_1,c_3,t_2
+       dmultu  a_3,b_1         /* mul_add_c(a[3],b[1],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    AT,c_3,t_2
+       daddu   c_1,AT
+       dmultu  a_2,b_2         /* mul_add_c(a[2],b[2],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    AT,c_3,t_2
+       daddu   c_1,AT
+       dmultu  a_1,b_3         /* mul_add_c(a[1],b[3],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    AT,c_3,t_2
+       daddu   c_1,AT
+       dmultu  a_0,b_4         /* mul_add_c(a[0],b[4],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    AT,c_3,t_2
+       daddu   c_1,AT
+       sd      c_2,32(a0)      /* r[4]=c2; */
+
+       dmultu  a_0,b_5         /* mul_add_c(a[0],b[5],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    c_2,c_1,t_2
+       dmultu  a_1,b_4         /* mul_add_c(a[1],b[4],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    AT,c_1,t_2
+       daddu   c_2,AT
+       dmultu  a_2,b_3         /* mul_add_c(a[2],b[3],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    AT,c_1,t_2
+       daddu   c_2,AT
+       dmultu  a_3,b_2         /* mul_add_c(a[3],b[2],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    AT,c_1,t_2
+       daddu   c_2,AT
+       dmultu  a_4,b_1         /* mul_add_c(a[4],b[1],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    AT,c_1,t_2
+       daddu   c_2,AT
+       dmultu  a_5,b_0         /* mul_add_c(a[5],b[0],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    AT,c_1,t_2
+       daddu   c_2,AT
+       sd      c_3,40(a0)      /* r[5]=c3; */
+
+       dmultu  a_6,b_0         /* mul_add_c(a[6],b[0],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    c_3,c_2,t_2
+       dmultu  a_5,b_1         /* mul_add_c(a[5],b[1],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    AT,c_2,t_2
+       daddu   c_3,AT
+       dmultu  a_4,b_2         /* mul_add_c(a[4],b[2],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    AT,c_2,t_2
+       daddu   c_3,AT
+       dmultu  a_3,b_3         /* mul_add_c(a[3],b[3],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    AT,c_2,t_2
+       daddu   c_3,AT
+       dmultu  a_2,b_4         /* mul_add_c(a[2],b[4],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    AT,c_2,t_2
+       daddu   c_3,AT
+       dmultu  a_1,b_5         /* mul_add_c(a[1],b[5],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    AT,c_2,t_2
+       daddu   c_3,AT
+       dmultu  a_0,b_6         /* mul_add_c(a[0],b[6],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    AT,c_2,t_2
+       daddu   c_3,AT
+       sd      c_1,48(a0)      /* r[6]=c1; */
+
+       dmultu  a_0,b_7         /* mul_add_c(a[0],b[7],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    c_1,c_3,t_2
+       dmultu  a_1,b_6         /* mul_add_c(a[1],b[6],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    AT,c_3,t_2
+       daddu   c_1,AT
+       dmultu  a_2,b_5         /* mul_add_c(a[2],b[5],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    AT,c_3,t_2
+       daddu   c_1,AT
+       dmultu  a_3,b_4         /* mul_add_c(a[3],b[4],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    AT,c_3,t_2
+       daddu   c_1,AT
+       dmultu  a_4,b_3         /* mul_add_c(a[4],b[3],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    AT,c_3,t_2
+       daddu   c_1,AT
+       dmultu  a_5,b_2         /* mul_add_c(a[5],b[2],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    AT,c_3,t_2
+       daddu   c_1,AT
+       dmultu  a_6,b_1         /* mul_add_c(a[6],b[1],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    AT,c_3,t_2
+       daddu   c_1,AT
+       dmultu  a_7,b_0         /* mul_add_c(a[7],b[0],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    AT,c_3,t_2
+       daddu   c_1,AT
+       sd      c_2,56(a0)      /* r[7]=c2; */
+
+       dmultu  a_7,b_1         /* mul_add_c(a[7],b[1],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    c_2,c_1,t_2
+       dmultu  a_6,b_2         /* mul_add_c(a[6],b[2],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    AT,c_1,t_2
+       daddu   c_2,AT
+       dmultu  a_5,b_3         /* mul_add_c(a[5],b[3],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    AT,c_1,t_2
+       daddu   c_2,AT
+       dmultu  a_4,b_4         /* mul_add_c(a[4],b[4],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    AT,c_1,t_2
+       daddu   c_2,AT
+       dmultu  a_3,b_5         /* mul_add_c(a[3],b[5],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    AT,c_1,t_2
+       daddu   c_2,AT
+       dmultu  a_2,b_6         /* mul_add_c(a[2],b[6],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    AT,c_1,t_2
+       daddu   c_2,AT
+       dmultu  a_1,b_7         /* mul_add_c(a[1],b[7],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    AT,c_1,t_2
+       daddu   c_2,AT
+       sd      c_3,64(a0)      /* r[8]=c3; */
+
+       dmultu  a_2,b_7         /* mul_add_c(a[2],b[7],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    c_3,c_2,t_2
+       dmultu  a_3,b_6         /* mul_add_c(a[3],b[6],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    AT,c_2,t_2
+       daddu   c_3,AT
+       dmultu  a_4,b_5         /* mul_add_c(a[4],b[5],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    AT,c_2,t_2
+       daddu   c_3,AT
+       dmultu  a_5,b_4         /* mul_add_c(a[5],b[4],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    AT,c_2,t_2
+       daddu   c_3,AT
+       dmultu  a_6,b_3         /* mul_add_c(a[6],b[3],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    AT,c_2,t_2
+       daddu   c_3,AT
+       dmultu  a_7,b_2         /* mul_add_c(a[7],b[2],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    AT,c_2,t_2
+       daddu   c_3,AT
+       sd      c_1,72(a0)      /* r[9]=c1; */
+
+       dmultu  a_7,b_3         /* mul_add_c(a[7],b[3],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    c_1,c_3,t_2
+       dmultu  a_6,b_4         /* mul_add_c(a[6],b[4],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    AT,c_3,t_2
+       daddu   c_1,AT
+       dmultu  a_5,b_5         /* mul_add_c(a[5],b[5],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    AT,c_3,t_2
+       daddu   c_1,AT
+       dmultu  a_4,b_6         /* mul_add_c(a[4],b[6],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    AT,c_3,t_2
+       daddu   c_1,AT
+       dmultu  a_3,b_7         /* mul_add_c(a[3],b[7],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    AT,c_3,t_2
+       daddu   c_1,AT
+       sd      c_2,80(a0)      /* r[10]=c2; */
+
+       dmultu  a_4,b_7         /* mul_add_c(a[4],b[7],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    c_2,c_1,t_2
+       dmultu  a_5,b_6         /* mul_add_c(a[5],b[6],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    AT,c_1,t_2
+       daddu   c_2,AT
+       dmultu  a_6,b_5         /* mul_add_c(a[6],b[5],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    AT,c_1,t_2
+       daddu   c_2,AT
+       dmultu  a_7,b_4         /* mul_add_c(a[7],b[4],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    AT,c_1,t_2
+       daddu   c_2,AT
+       sd      c_3,88(a0)      /* r[11]=c3; */
+
+       dmultu  a_7,b_5         /* mul_add_c(a[7],b[5],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    c_3,c_2,t_2
+       dmultu  a_6,b_6         /* mul_add_c(a[6],b[6],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    AT,c_2,t_2
+       daddu   c_3,AT
+       dmultu  a_5,b_7         /* mul_add_c(a[5],b[7],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    AT,c_2,t_2
+       daddu   c_3,AT
+       sd      c_1,96(a0)      /* r[12]=c1; */
+
+       dmultu  a_6,b_7         /* mul_add_c(a[6],b[7],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    c_1,c_3,t_2
+       dmultu  a_7,b_6         /* mul_add_c(a[7],b[6],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    AT,c_3,t_2
+       daddu   c_1,AT
+       sd      c_2,104(a0)     /* r[13]=c2; */
+
+       dmultu  a_7,b_7         /* mul_add_c(a[7],b[7],c3,c1,c2); */
+       ld      s0,0(sp)
+       ld      s1,8(sp)
+       ld      s2,16(sp)
+       ld      s3,24(sp)
+       ld      s4,32(sp)
+       ld      s5,40(sp)
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sd      c_3,112(a0)     /* r[14]=c3; */
+       sd      c_1,120(a0)     /* r[15]=c1; */
+
+       PTR_ADD sp,FRAME_SIZE
+
+       jr      ra
+END(bn_mul_comba8)
+
+.align 5
+LEAF(bn_mul_comba4)
+       .set    reorder
+       ld      a_0,0(a1)
+       ld      b_0,0(a2)
+       ld      a_1,8(a1)
+       ld      a_2,16(a1)
+       dmultu  a_0,b_0         /* mul_add_c(a[0],b[0],c1,c2,c3); */
+       ld      a_3,24(a1)
+       ld      b_1,8(a2)
+       ld      b_2,16(a2)
+       ld      b_3,24(a2)
+       mflo    c_1
+       mfhi    c_2
+       sd      c_1,0(a0)
+
+       dmultu  a_0,b_1         /* mul_add_c(a[0],b[1],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   c_3,t_2,AT
+       dmultu  a_1,b_0         /* mul_add_c(a[1],b[0],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    c_1,c_3,t_2
+       sd      c_2,8(a0)
+
+       dmultu  a_2,b_0         /* mul_add_c(a[2],b[0],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       dmultu  a_1,b_1         /* mul_add_c(a[1],b[1],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    c_2,c_1,t_2
+       dmultu  a_0,b_2         /* mul_add_c(a[0],b[2],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    AT,c_1,t_2
+       daddu   c_2,AT
+       sd      c_3,16(a0)
+
+       dmultu  a_0,b_3         /* mul_add_c(a[0],b[3],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    c_3,c_2,t_2
+       dmultu  a_1,b_2         /* mul_add_c(a[1],b[2],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    AT,c_2,t_2
+       daddu   c_3,AT
+       dmultu  a_2,b_1         /* mul_add_c(a[2],b[1],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    AT,c_2,t_2
+       daddu   c_3,AT
+       dmultu  a_3,b_0         /* mul_add_c(a[3],b[0],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    AT,c_2,t_2
+       daddu   c_3,AT
+       sd      c_1,24(a0)
+
+       dmultu  a_3,b_1         /* mul_add_c(a[3],b[1],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    c_1,c_3,t_2
+       dmultu  a_2,b_2         /* mul_add_c(a[2],b[2],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    AT,c_3,t_2
+       daddu   c_1,AT
+       dmultu  a_1,b_3         /* mul_add_c(a[1],b[3],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    AT,c_3,t_2
+       daddu   c_1,AT
+       sd      c_2,32(a0)
+
+       dmultu  a_2,b_3         /* mul_add_c(a[2],b[3],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    c_2,c_1,t_2
+       dmultu  a_3,b_2         /* mul_add_c(a[3],b[2],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    AT,c_1,t_2
+       daddu   c_2,AT
+       sd      c_3,40(a0)
+
+       dmultu  a_3,b_3         /* mul_add_c(a[3],b[3],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sd      c_1,48(a0)
+       sd      c_2,56(a0)
+
+       jr      ra
+END(bn_mul_comba4)
+
+#undef a_4
+#undef a_5
+#undef a_6
+#undef a_7
+#define        a_4     b_0
+#define        a_5     b_1
+#define        a_6     b_2
+#define        a_7     b_3
+
+.align 5
+LEAF(bn_sqr_comba8)
+       .set    reorder
+       ld      a_0,0(a1)
+       ld      a_1,8(a1)
+       ld      a_2,16(a1)
+       ld      a_3,24(a1)
+
+       dmultu  a_0,a_0         /* mul_add_c(a[0],b[0],c1,c2,c3); */
+       ld      a_4,32(a1)
+       ld      a_5,40(a1)
+       ld      a_6,48(a1)
+       ld      a_7,56(a1)
+       mflo    c_1
+       mfhi    c_2
+       sd      c_1,0(a0)
+
+       dmultu  a_0,a_1         /* mul_add_c2(a[0],b[1],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       slt     c_1,t_2,zero
+       dsll    t_2,1
+       slt     a2,t_1,zero
+       daddu   t_2,a2
+       dsll    t_1,1
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   c_3,t_2,AT
+       sd      c_2,8(a0)
+
+       dmultu  a_2,a_0         /* mul_add_c2(a[2],b[0],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       slt     c_2,t_2,zero
+       dsll    t_2,1
+       slt     a2,t_1,zero
+       daddu   t_2,a2
+       dsll    t_1,1
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    AT,c_1,t_2
+       daddu   c_2,AT
+       dmultu  a_1,a_1         /* mul_add_c(a[1],b[1],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    AT,c_1,t_2
+       daddu   c_2,AT
+       sd      c_3,16(a0)
+
+       dmultu  a_0,a_3         /* mul_add_c2(a[0],b[3],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       slt     c_3,t_2,zero
+       dsll    t_2,1
+       slt     a2,t_1,zero
+       daddu   t_2,a2
+       dsll    t_1,1
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    AT,c_2,t_2
+       daddu   c_3,AT
+       dmultu  a_1,a_2         /* mul_add_c2(a[1],b[2],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       slt     AT,t_2,zero
+       daddu   c_3,AT
+       dsll    t_2,1
+       slt     a2,t_1,zero
+       daddu   t_2,a2
+       dsll    t_1,1
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    AT,c_2,t_2
+       daddu   c_3,AT
+       sd      c_1,24(a0)
+
+       dmultu  a_4,a_0         /* mul_add_c2(a[4],b[0],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       slt     c_1,t_2,zero
+       dsll    t_2,1
+       slt     a2,t_1,zero
+       daddu   t_2,a2
+       dsll    t_1,1
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    AT,c_3,t_2
+       daddu   c_1,AT
+       dmultu  a_3,a_1         /* mul_add_c2(a[3],b[1],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       slt     AT,t_2,zero
+       daddu   c_1,AT
+       dsll    t_2,1
+       slt     a2,t_1,zero
+       daddu   t_2,a2
+       dsll    t_1,1
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    AT,c_3,t_2
+       daddu   c_1,AT
+       dmultu  a_2,a_2         /* mul_add_c(a[2],b[2],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    AT,c_3,t_2
+       daddu   c_1,AT
+       sd      c_2,32(a0)
+
+       dmultu  a_0,a_5         /* mul_add_c2(a[0],b[5],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       slt     c_2,t_2,zero
+       dsll    t_2,1
+       slt     a2,t_1,zero
+       daddu   t_2,a2
+       dsll    t_1,1
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    AT,c_1,t_2
+       daddu   c_2,AT
+       dmultu  a_1,a_4         /* mul_add_c2(a[1],b[4],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       slt     AT,t_2,zero
+       daddu   c_2,AT
+       dsll    t_2,1
+       slt     a2,t_1,zero
+       daddu   t_2,a2
+       dsll    t_1,1
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    AT,c_1,t_2
+       daddu   c_2,AT
+       dmultu  a_2,a_3         /* mul_add_c2(a[2],b[3],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       slt     AT,t_2,zero
+       daddu   c_2,AT
+       dsll    t_2,1
+       slt     a2,t_1,zero
+       daddu   t_2,a2
+       dsll    t_1,1
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    AT,c_1,t_2
+       daddu   c_2,AT
+       sd      c_3,40(a0)
+
+       dmultu  a_6,a_0         /* mul_add_c2(a[6],b[0],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       slt     c_3,t_2,zero
+       dsll    t_2,1
+       slt     a2,t_1,zero
+       daddu   t_2,a2
+       dsll    t_1,1
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    AT,c_2,t_2
+       daddu   c_3,AT
+       dmultu  a_5,a_1         /* mul_add_c2(a[5],b[1],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       slt     AT,t_2,zero
+       daddu   c_3,AT
+       dsll    t_2,1
+       slt     a2,t_1,zero
+       daddu   t_2,a2
+       dsll    t_1,1
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    AT,c_2,t_2
+       daddu   c_3,AT
+       dmultu  a_4,a_2         /* mul_add_c2(a[4],b[2],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       slt     AT,t_2,zero
+       daddu   c_3,AT
+       dsll    t_2,1
+       slt     a2,t_1,zero
+       daddu   t_2,a2
+       dsll    t_1,1
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    AT,c_2,t_2
+       daddu   c_3,AT
+       dmultu  a_3,a_3         /* mul_add_c(a[3],b[3],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    AT,c_2,t_2
+       daddu   c_3,AT
+       sd      c_1,48(a0)
+
+       dmultu  a_0,a_7         /* mul_add_c2(a[0],b[7],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       slt     c_1,t_2,zero
+       dsll    t_2,1
+       slt     a2,t_1,zero
+       daddu   t_2,a2
+       dsll    t_1,1
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    AT,c_3,t_2
+       daddu   c_1,AT
+       dmultu  a_1,a_6         /* mul_add_c2(a[1],b[6],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       slt     AT,t_2,zero
+       daddu   c_1,AT
+       dsll    t_2,1
+       slt     a2,t_1,zero
+       daddu   t_2,a2
+       dsll    t_1,1
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    AT,c_3,t_2
+       daddu   c_1,AT
+       dmultu  a_2,a_5         /* mul_add_c2(a[2],b[5],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       slt     AT,t_2,zero
+       daddu   c_1,AT
+       dsll    t_2,1
+       slt     a2,t_1,zero
+       daddu   t_2,a2
+       dsll    t_1,1
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    AT,c_3,t_2
+       daddu   c_1,AT
+       dmultu  a_3,a_4         /* mul_add_c2(a[3],b[4],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       slt     AT,t_2,zero
+       daddu   c_1,AT
+       dsll    t_2,1
+       slt     a2,t_1,zero
+       daddu   t_2,a2
+       dsll    t_1,1
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    AT,c_3,t_2
+       daddu   c_1,AT
+       sd      c_2,56(a0)
+
+       dmultu  a_7,a_1         /* mul_add_c2(a[7],b[1],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       slt     c_2,t_2,zero
+       dsll    t_2,1
+       slt     a2,t_1,zero
+       daddu   t_2,a2
+       dsll    t_1,1
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    AT,c_1,t_2
+       daddu   c_2,AT
+       dmultu  a_6,a_2         /* mul_add_c2(a[6],b[2],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       slt     AT,t_2,zero
+       daddu   c_2,AT
+       dsll    t_2,1
+       slt     a2,t_1,zero
+       daddu   t_2,a2
+       dsll    t_1,1
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    AT,c_1,t_2
+       daddu   c_2,AT
+       dmultu  a_5,a_3         /* mul_add_c2(a[5],b[3],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       slt     AT,t_2,zero
+       daddu   c_2,AT
+       dsll    t_2,1
+       slt     a2,t_1,zero
+       daddu   t_2,a2
+       dsll    t_1,1
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    AT,c_1,t_2
+       daddu   c_2,AT
+       dmultu  a_4,a_4         /* mul_add_c(a[4],b[4],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    AT,c_1,t_2
+       daddu   c_2,AT
+       sd      c_3,64(a0)
+
+       dmultu  a_2,a_7         /* mul_add_c2(a[2],b[7],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       slt     c_3,t_2,zero
+       dsll    t_2,1
+       slt     a2,t_1,zero
+       daddu   t_2,a2
+       dsll    t_1,1
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    AT,c_2,t_2
+       daddu   c_3,AT
+       dmultu  a_3,a_6         /* mul_add_c2(a[3],b[6],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       slt     AT,t_2,zero
+       daddu   c_3,AT
+       dsll    t_2,1
+       slt     a2,t_1,zero
+       daddu   t_2,a2
+       dsll    t_1,1
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    AT,c_2,t_2
+       daddu   c_3,AT
+       dmultu  a_4,a_5         /* mul_add_c2(a[4],b[5],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       slt     AT,t_2,zero
+       daddu   c_3,AT
+       dsll    t_2,1
+       slt     a2,t_1,zero
+       daddu   t_2,a2
+       dsll    t_1,1
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    AT,c_2,t_2
+       daddu   c_3,AT
+       sd      c_1,72(a0)
+
+       dmultu  a_7,a_3         /* mul_add_c2(a[7],b[3],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       slt     c_1,t_2,zero
+       dsll    t_2,1
+       slt     a2,t_1,zero
+       daddu   t_2,a2
+       dsll    t_1,1
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    AT,c_3,t_2
+       daddu   c_1,AT
+       dmultu  a_6,a_4         /* mul_add_c2(a[6],b[4],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       slt     AT,t_2,zero
+       daddu   c_1,AT
+       dsll    t_2,1
+       slt     a2,t_1,zero
+       daddu   t_2,a2
+       dsll    t_1,1
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    AT,c_3,t_2
+       daddu   c_1,AT
+       dmultu  a_5,a_5         /* mul_add_c(a[5],b[5],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    AT,c_3,t_2
+       daddu   c_1,AT
+       sd      c_2,80(a0)
+
+       dmultu  a_4,a_7         /* mul_add_c2(a[4],b[7],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       slt     c_2,t_2,zero
+       dsll    t_2,1
+       slt     a2,t_1,zero
+       daddu   t_2,a2
+       dsll    t_1,1
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    AT,c_1,t_2
+       daddu   c_2,AT
+       dmultu  a_5,a_6         /* mul_add_c2(a[5],b[6],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       slt     AT,t_2,zero
+       daddu   c_2,AT
+       dsll    t_2,1
+       slt     a2,t_1,zero
+       daddu   t_2,a2
+       dsll    t_1,1
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    AT,c_1,t_2
+       daddu   c_2,AT
+       sd      c_3,88(a0)
+
+       dmultu  a_7,a_5         /* mul_add_c2(a[7],b[5],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       slt     c_3,t_2,zero
+       dsll    t_2,1
+       slt     a2,t_1,zero
+       daddu   t_2,a2
+       dsll    t_1,1
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    AT,c_2,t_2
+       daddu   c_3,AT
+       dmultu  a_6,a_6         /* mul_add_c(a[6],b[6],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    AT,c_2,t_2
+       daddu   c_3,AT
+       sd      c_1,96(a0)
+
+       dmultu  a_6,a_7         /* mul_add_c2(a[6],b[7],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       slt     c_1,t_2,zero
+       dsll    t_2,1
+       slt     a2,t_1,zero
+       daddu   t_2,a2
+       dsll    t_1,1
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    AT,c_3,t_2
+       daddu   c_1,AT
+       sd      c_2,104(a0)
+
+       dmultu  a_7,a_7         /* mul_add_c(a[7],b[7],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sd      c_3,112(a0)
+       sd      c_1,120(a0)
+
+       jr      ra
+END(bn_sqr_comba8)
+
+.align 5
+LEAF(bn_sqr_comba4)
+       .set    reorder
+       ld      a_0,0(a1)
+       ld      a_1,8(a1)
+       ld      a_2,16(a1)
+       ld      a_3,24(a1)
+       dmultu  a_0,a_0         /* mul_add_c(a[0],b[0],c1,c2,c3); */
+       mflo    c_1
+       mfhi    c_2
+       sd      c_1,0(a0)
+
+       dmultu  a_0,a_1         /* mul_add_c2(a[0],b[1],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       slt     c_1,t_2,zero
+       dsll    t_2,1
+       slt     a2,t_1,zero
+       daddu   t_2,a2
+       dsll    t_1,1
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   c_3,t_2,AT
+       sd      c_2,8(a0)
+
+       dmultu  a_2,a_0         /* mul_add_c2(a[2],b[0],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       slt     c_2,t_2,zero
+       dsll    t_2,1
+       slt     a2,t_1,zero
+       daddu   t_2,a2
+       dsll    t_1,1
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    AT,c_1,t_2
+       daddu   c_2,AT
+       dmultu  a_1,a_1         /* mul_add_c(a[1],b[1],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    AT,c_1,t_2
+       daddu   c_2,AT
+       sd      c_3,16(a0)
+
+       dmultu  a_0,a_3         /* mul_add_c2(a[0],b[3],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       slt     c_3,t_2,zero
+       dsll    t_2,1
+       slt     a2,t_1,zero
+       daddu   t_2,a2
+       dsll    t_1,1
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    AT,c_2,t_2
+       daddu   c_3,AT
+       dmultu  a_1,a_2         /* mul_add_c(a2[1],b[2],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       slt     AT,t_2,zero
+       daddu   c_3,AT
+       dsll    t_2,1
+       slt     a2,t_1,zero
+       daddu   t_2,a2
+       dsll    t_1,1
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sltu    AT,c_2,t_2
+       daddu   c_3,AT
+       sd      c_1,24(a0)
+
+       dmultu  a_3,a_1         /* mul_add_c2(a[3],b[1],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       slt     c_1,t_2,zero
+       dsll    t_2,1
+       slt     a2,t_1,zero
+       daddu   t_2,a2
+       dsll    t_1,1
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    AT,c_3,t_2
+       daddu   c_1,AT
+       dmultu  a_2,a_2         /* mul_add_c(a[2],b[2],c2,c3,c1); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_2,t_1
+       sltu    AT,c_2,t_1
+       daddu   t_2,AT
+       daddu   c_3,t_2
+       sltu    AT,c_3,t_2
+       daddu   c_1,AT
+       sd      c_2,32(a0)
+
+       dmultu  a_2,a_3         /* mul_add_c2(a[2],b[3],c3,c1,c2); */
+       mflo    t_1
+       mfhi    t_2
+       slt     c_2,t_2,zero
+       dsll    t_2,1
+       slt     a2,t_1,zero
+       daddu   t_2,a2
+       dsll    t_1,1
+       daddu   c_3,t_1
+       sltu    AT,c_3,t_1
+       daddu   t_2,AT
+       daddu   c_1,t_2
+       sltu    AT,c_1,t_2
+       daddu   c_2,AT
+       sd      c_3,40(a0)
+
+       dmultu  a_3,a_3         /* mul_add_c(a[3],b[3],c1,c2,c3); */
+       mflo    t_1
+       mfhi    t_2
+       daddu   c_1,t_1
+       sltu    AT,c_1,t_1
+       daddu   t_2,AT
+       daddu   c_2,t_2
+       sd      c_1,48(a0)
+       sd      c_2,56(a0)
+
+       jr      ra
+END(bn_sqr_comba4)
diff --git a/crypto/bn/asm/modexp512-x86_64.pl b/crypto/bn/asm/modexp512-x86_64.pl
deleted file mode 100644 (file)
index bfd6e97..0000000
+++ /dev/null
@@ -1,1497 +0,0 @@
-#!/usr/bin/env perl
-#
-# Copyright (c) 2010-2011 Intel Corp.
-#   Author: Vinodh.Gopal@intel.com
-#           Jim Guilford
-#           Erdinc.Ozturk@intel.com
-#           Maxim.Perminov@intel.com
-#
-# More information about algorithm used can be found at:
-#   http://www.cse.buffalo.edu/srds2009/escs2009_submission_Gopal.pdf
-#
-# ====================================================================
-# Copyright (c) 2011 The OpenSSL Project.  All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-# 1. Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in
-#    the documentation and/or other materials provided with the
-#    distribution.
-#
-# 3. All advertising materials mentioning features or use of this
-#    software must display the following acknowledgment:
-#    "This product includes software developed by the OpenSSL Project
-#    for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
-#
-# 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
-#    endorse or promote products derived from this software without
-#    prior written permission. For written permission, please contact
-#    licensing@OpenSSL.org.
-#
-# 5. Products derived from this software may not be called "OpenSSL"
-#    nor may "OpenSSL" appear in their names without prior written
-#    permission of the OpenSSL Project.
-#
-# 6. Redistributions of any form whatsoever must retain the following
-#    acknowledgment:
-#    "This product includes software developed by the OpenSSL Project
-#    for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
-#
-# THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
-# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
-# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
-# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
-# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
-# OF THE POSSIBILITY OF SUCH DAMAGE.
-# ====================================================================
-
-$flavour = shift;
-$output  = shift;
-if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
-
-my $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
-
-$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
-die "can't locate x86_64-xlate.pl";
-
-open OUT,"| \"$^X\" $xlate $flavour $output";
-*STDOUT=*OUT;
-
-use strict;
-my $code=".text\n\n";
-my $m=0;
-
-#
-# Define x512 macros
-#
-
-#MULSTEP_512_ADD       MACRO   x7, x6, x5, x4, x3, x2, x1, x0, dst, src1, src2, add_src, tmp1, tmp2
-#
-# uses rax, rdx, and args
-sub MULSTEP_512_ADD
-{
- my ($x, $DST, $SRC2, $ASRC, $OP, $TMP)=@_;
- my @X=@$x;    # make a copy
-$code.=<<___;
-        mov    (+8*0)($SRC2), %rax
-        mul    $OP                     # rdx:rax = %OP * [0]
-        mov    ($ASRC), $X[0]
-        add    %rax, $X[0]
-        adc    \$0, %rdx
-        mov    $X[0], $DST
-___
-for(my $i=1;$i<8;$i++) {
-$code.=<<___;
-        mov    %rdx, $TMP
-
-        mov    (+8*$i)($SRC2), %rax
-        mul    $OP                     # rdx:rax = %OP * [$i]
-        mov    (+8*$i)($ASRC), $X[$i]
-        add    %rax, $X[$i]
-        adc    \$0, %rdx
-        add    $TMP, $X[$i]
-        adc    \$0, %rdx
-___
-}
-$code.=<<___;
-        mov    %rdx, $X[0]
-___
-}
-
-#MULSTEP_512   MACRO   x7, x6, x5, x4, x3, x2, x1, x0, dst, src2, src1_val, tmp
-#
-# uses rax, rdx, and args
-sub MULSTEP_512
-{
- my ($x, $DST, $SRC2, $OP, $TMP)=@_;
- my @X=@$x;    # make a copy
-$code.=<<___;
-        mov    (+8*0)($SRC2), %rax
-        mul    $OP                     # rdx:rax = %OP * [0]
-        add    %rax, $X[0]
-        adc    \$0, %rdx
-        mov    $X[0], $DST
-___
-for(my $i=1;$i<8;$i++) {
-$code.=<<___;
-        mov    %rdx, $TMP
-
-        mov    (+8*$i)($SRC2), %rax
-        mul    $OP                     # rdx:rax = %OP * [$i]
-        add    %rax, $X[$i]
-        adc    \$0, %rdx
-        add    $TMP, $X[$i]
-        adc    \$0, %rdx
-___
-}
-$code.=<<___;
-        mov    %rdx, $X[0]
-___
-}
-
-#
-# Swizzle Macros
-#
-
-# macro to copy data from flat space to swizzled table
-#MACRO swizzle pDst, pSrc, tmp1, tmp2
-# pDst and pSrc are modified
-sub swizzle
-{
- my ($pDst, $pSrc, $cnt, $d0)=@_;
-$code.=<<___;
-        mov    \$8, $cnt
-loop_$m:
-        mov    ($pSrc), $d0
-        mov    $d0#w, ($pDst)
-        shr    \$16, $d0
-        mov    $d0#w, (+64*1)($pDst)
-        shr    \$16, $d0
-        mov    $d0#w, (+64*2)($pDst)
-        shr    \$16, $d0
-        mov    $d0#w, (+64*3)($pDst)
-        lea    8($pSrc), $pSrc
-        lea    64*4($pDst), $pDst
-        dec    $cnt
-        jnz    loop_$m
-___
-
- $m++;
-}
-
-# macro to copy data from swizzled table to  flat space
-#MACRO unswizzle       pDst, pSrc, tmp*3
-sub unswizzle
-{
- my ($pDst, $pSrc, $cnt, $d0, $d1)=@_;
-$code.=<<___;
-        mov    \$4, $cnt
-loop_$m:
-        movzxw (+64*3+256*0)($pSrc), $d0
-        movzxw (+64*3+256*1)($pSrc), $d1
-        shl    \$16, $d0
-        shl    \$16, $d1
-        mov    (+64*2+256*0)($pSrc), $d0#w
-        mov    (+64*2+256*1)($pSrc), $d1#w
-        shl    \$16, $d0
-        shl    \$16, $d1
-        mov    (+64*1+256*0)($pSrc), $d0#w
-        mov    (+64*1+256*1)($pSrc), $d1#w
-        shl    \$16, $d0
-        shl    \$16, $d1
-        mov    (+64*0+256*0)($pSrc), $d0#w
-        mov    (+64*0+256*1)($pSrc), $d1#w
-        mov    $d0, (+8*0)($pDst)
-        mov    $d1, (+8*1)($pDst)
-        lea    256*2($pSrc), $pSrc
-        lea    8*2($pDst), $pDst
-        sub    \$1, $cnt
-        jnz    loop_$m
-___
-
- $m++;
-}
-
-#
-# Data Structures
-#
-
-# Reduce Data
-#
-#
-# Offset  Value
-# 0C0     Carries
-# 0B8     X2[10]
-# 0B0     X2[9]
-# 0A8     X2[8]
-# 0A0     X2[7]
-# 098     X2[6]
-# 090     X2[5]
-# 088     X2[4]
-# 080     X2[3]
-# 078     X2[2]
-# 070     X2[1]
-# 068     X2[0]
-# 060     X1[12]  P[10]
-# 058     X1[11]  P[9]  Z[8]
-# 050     X1[10]  P[8]  Z[7]
-# 048     X1[9]   P[7]  Z[6]
-# 040     X1[8]   P[6]  Z[5]
-# 038     X1[7]   P[5]  Z[4]
-# 030     X1[6]   P[4]  Z[3]
-# 028     X1[5]   P[3]  Z[2]
-# 020     X1[4]   P[2]  Z[1]
-# 018     X1[3]   P[1]  Z[0]
-# 010     X1[2]   P[0]  Y[2]
-# 008     X1[1]   Q[1]  Y[1]
-# 000     X1[0]   Q[0]  Y[0]
-
-my $X1_offset           =  0;                  # 13 qwords
-my $X2_offset           =  $X1_offset + 13*8;                  # 11 qwords
-my $Carries_offset      =  $X2_offset + 11*8;                  # 1 qword
-my $Q_offset            =  0;                  # 2 qwords
-my $P_offset            =  $Q_offset + 2*8;                    # 11 qwords
-my $Y_offset            =  0;                  # 3 qwords
-my $Z_offset            =  $Y_offset + 3*8;                    # 9 qwords
-
-my $Red_Data_Size       =  $Carries_offset + 1*8;                      # (25 qwords)
-
-#
-# Stack Frame
-#
-#
-# offset       value
-# ...          <old stack contents>
-# ...
-# 280          Garray
-
-# 278          tmp16[15]
-# ...          ...
-# 200          tmp16[0]
-
-# 1F8          tmp[7]
-# ...          ...
-# 1C0          tmp[0]
-
-# 1B8          GT[7]
-# ...          ...
-# 180          GT[0]
-
-# 178          Reduce Data
-# ...          ...
-# 0B8          Reduce Data
-# 0B0          reserved
-# 0A8          reserved
-# 0A0          reserved
-# 098          reserved
-# 090          reserved
-# 088          reduce result addr
-# 080          exp[8]
-
-# ...
-# 048          exp[1]
-# 040          exp[0]
-
-# 038          reserved
-# 030          loop_idx
-# 028          pg
-# 020          i
-# 018          pData   ; arg 4
-# 010          pG      ; arg 2
-# 008          pResult ; arg 1
-# 000          rsp     ; stack pointer before subtract
-
-my $rsp_offset          =  0;
-my $pResult_offset      =  8*1 + $rsp_offset;
-my $pG_offset           =  8*1 + $pResult_offset;
-my $pData_offset        =  8*1 + $pG_offset;
-my $i_offset            =  8*1 + $pData_offset;
-my $pg_offset           =  8*1 + $i_offset;
-my $loop_idx_offset     =  8*1 + $pg_offset;
-my $reserved1_offset    =  8*1 + $loop_idx_offset;
-my $exp_offset          =  8*1 + $reserved1_offset;
-my $red_result_addr_offset=  8*9 + $exp_offset;
-my $reserved2_offset    =  8*1 + $red_result_addr_offset;
-my $Reduce_Data_offset  =  8*5 + $reserved2_offset;
-my $GT_offset           =  $Red_Data_Size + $Reduce_Data_offset;
-my $tmp_offset          =  8*8 + $GT_offset;
-my $tmp16_offset        =  8*8 + $tmp_offset;
-my $garray_offset       =  8*16 + $tmp16_offset;
-my $mem_size            =  8*8*32 + $garray_offset;
-
-#
-# Offsets within Reduce Data
-#
-#
-#      struct MODF_2FOLD_MONT_512_C1_DATA {
-#      UINT64 t[8][8];
-#      UINT64 m[8];
-#      UINT64 m1[8]; /* 2^768 % m */
-#      UINT64 m2[8]; /* 2^640 % m */
-#      UINT64 k1[2]; /* (- 1/m) % 2^128 */
-#      };
-
-my $T                   =  0;
-my $M                   =  512;                        # = 8 * 8 * 8
-my $M1                  =  576;                        # = 8 * 8 * 9 /* += 8 * 8 */
-my $M2                  =  640;                        # = 8 * 8 * 10 /* += 8 * 8 */
-my $K1                  =  704;                        # = 8 * 8 * 11 /* += 8 * 8 */
-
-#
-#   FUNCTIONS
-#
-
-{{{
-#
-# MULADD_128x512 : Function to multiply 128-bits (2 qwords) by 512-bits (8 qwords)
-#                       and add 512-bits (8 qwords)
-#                       to get 640 bits (10 qwords)
-# Input: 128-bit mul source: [rdi+8*1], rbp
-#        512-bit mul source: [rsi+8*n]
-#        512-bit add source: r15, r14, ..., r9, r8
-# Output: r9, r8, r15, r14, r13, r12, r11, r10, [rcx+8*1], [rcx+8*0]
-# Clobbers all regs except: rcx, rsi, rdi
-$code.=<<___;
-.type  MULADD_128x512,\@abi-omnipotent
-.align 16
-MULADD_128x512:
-___
-       &MULSTEP_512([map("%r$_",(8..15))], "(+8*0)(%rcx)", "%rsi", "%rbp", "%rbx");
-$code.=<<___;
-        mov    (+8*1)(%rdi), %rbp
-___
-       &MULSTEP_512([map("%r$_",(9..15,8))], "(+8*1)(%rcx)", "%rsi", "%rbp", "%rbx");
-$code.=<<___;
-        ret
-.size  MULADD_128x512,.-MULADD_128x512
-___
-}}}
-
-{{{
-#MULADD_256x512        MACRO   pDst, pA, pB, OP, TMP, X7, X6, X5, X4, X3, X2, X1, X0
-#
-# Inputs: pDst: Destination  (768 bits, 12 qwords)
-#         pA:   Multiplicand (1024 bits, 16 qwords)
-#         pB:   Multiplicand (512 bits, 8 qwords)
-# Dst = Ah * B + Al
-# where Ah is (in qwords) A[15:12] (256 bits) and Al is A[7:0] (512 bits)
-# Results in X3 X2 X1 X0 X7 X6 X5 X4 Dst[3:0]
-# Uses registers: arguments, RAX, RDX
-sub MULADD_256x512
-{
- my ($pDst, $pA, $pB, $OP, $TMP, $X)=@_;
-$code.=<<___;
-       mov     (+8*12)($pA), $OP
-___
-       &MULSTEP_512_ADD($X, "(+8*0)($pDst)", $pB, $pA, $OP, $TMP);
-       push(@$X,shift(@$X));
-
-$code.=<<___;
-        mov    (+8*13)($pA), $OP
-___
-       &MULSTEP_512($X, "(+8*1)($pDst)", $pB, $OP, $TMP);
-       push(@$X,shift(@$X));
-
-$code.=<<___;
-        mov    (+8*14)($pA), $OP
-___
-       &MULSTEP_512($X, "(+8*2)($pDst)", $pB, $OP, $TMP);
-       push(@$X,shift(@$X));
-
-$code.=<<___;
-        mov    (+8*15)($pA), $OP
-___
-       &MULSTEP_512($X, "(+8*3)($pDst)", $pB, $OP, $TMP);
-       push(@$X,shift(@$X));
-}
-
-#
-# mont_reduce(UINT64 *x,  /* 1024 bits, 16 qwords */
-#             UINT64 *m,  /*  512 bits,  8 qwords */
-#             MODF_2FOLD_MONT_512_C1_DATA *data,
-#             UINT64 *r)  /*  512 bits,  8 qwords */
-# Input:  x (number to be reduced): tmp16 (Implicit)
-#         m (modulus):              [pM]  (Implicit)
-#         data (reduce data):       [pData] (Implicit)
-# Output: r (result):               Address in [red_res_addr]
-#         result also in: r9, r8, r15, r14, r13, r12, r11, r10
-
-my @X=map("%r$_",(8..15));
-
-$code.=<<___;
-.type  mont_reduce,\@abi-omnipotent
-.align 16
-mont_reduce:
-___
-
-my $STACK_DEPTH         =  8;
-       #
-       # X1 = Xh * M1 + Xl
-$code.=<<___;
-        lea    (+$Reduce_Data_offset+$X1_offset+$STACK_DEPTH)(%rsp), %rdi                      # pX1 (Dst) 769 bits, 13 qwords
-        mov    (+$pData_offset+$STACK_DEPTH)(%rsp), %rsi                       # pM1 (Bsrc) 512 bits, 8 qwords
-        add    \$$M1, %rsi
-        lea    (+$tmp16_offset+$STACK_DEPTH)(%rsp), %rcx                       # X (Asrc) 1024 bits, 16 qwords
-
-___
-
-       &MULADD_256x512("%rdi", "%rcx", "%rsi", "%rbp", "%rbx", \@X);   # rotates @X 4 times
-       # results in r11, r10, r9, r8, r15, r14, r13, r12, X1[3:0]
-
-$code.=<<___;
-        xor    %rax, %rax
-       # X1 += xl
-        add    (+8*8)(%rcx), $X[4]
-        adc    (+8*9)(%rcx), $X[5]
-        adc    (+8*10)(%rcx), $X[6]
-        adc    (+8*11)(%rcx), $X[7]
-        adc    \$0, %rax
-       # X1 is now rax, r11-r8, r15-r12, tmp16[3:0]
-
-       #
-       # check for carry ;; carry stored in rax
-        mov    $X[4], (+8*8)(%rdi)                     # rdi points to X1
-        mov    $X[5], (+8*9)(%rdi)
-        mov    $X[6], %rbp
-        mov    $X[7], (+8*11)(%rdi)
-
-        mov    %rax, (+$Reduce_Data_offset+$Carries_offset+$STACK_DEPTH)(%rsp)
-
-        mov    (+8*0)(%rdi), $X[4]
-        mov    (+8*1)(%rdi), $X[5]
-        mov    (+8*2)(%rdi), $X[6]
-        mov    (+8*3)(%rdi), $X[7]
-
-       # X1 is now stored in: X1[11], rbp, X1[9:8], r15-r8
-       # rdi -> X1
-       # rsi -> M1
-
-       #
-       # X2 = Xh * M2 + Xl
-       # do first part (X2 = Xh * M2)
-        add    \$8*10, %rdi                    # rdi -> pXh ; 128 bits, 2 qwords
-                               #        Xh is actually { [rdi+8*1], rbp }
-        add    \$`$M2-$M1`, %rsi                       # rsi -> M2
-        lea    (+$Reduce_Data_offset+$X2_offset+$STACK_DEPTH)(%rsp), %rcx                      # rcx -> pX2 ; 641 bits, 11 qwords
-___
-       unshift(@X,pop(@X));    unshift(@X,pop(@X));
-$code.=<<___;
-
-        call   MULADD_128x512                  # args in rcx, rdi / rbp, rsi, r15-r8
-       # result in r9, r8, r15, r14, r13, r12, r11, r10, X2[1:0]
-        mov    (+$Reduce_Data_offset+$Carries_offset+$STACK_DEPTH)(%rsp), %rax
-
-       # X2 += Xl
-        add    (+8*8-8*10)(%rdi), $X[6]                # (-8*10) is to adjust rdi -> Xh to Xl
-        adc    (+8*9-8*10)(%rdi), $X[7]
-        mov    $X[6], (+8*8)(%rcx)
-        mov    $X[7], (+8*9)(%rcx)
-
-        adc    %rax, %rax
-        mov    %rax, (+$Reduce_Data_offset+$Carries_offset+$STACK_DEPTH)(%rsp)
-
-        lea    (+$Reduce_Data_offset+$Q_offset+$STACK_DEPTH)(%rsp), %rdi                       # rdi -> pQ ; 128 bits, 2 qwords
-        add    \$`$K1-$M2`, %rsi                       # rsi -> pK1 ; 128 bits, 2 qwords
-
-       # MUL_128x128t128       rdi, rcx, rsi   ; Q = X2 * K1 (bottom half)
-       # B1:B0 = rsi[1:0] = K1[1:0]
-       # A1:A0 = rcx[1:0] = X2[1:0]
-       # Result = rdi[1],rbp = Q[1],rbp
-        mov    (%rsi), %r8                     # B0
-        mov    (+8*1)(%rsi), %rbx                      # B1
-
-        mov    (%rcx), %rax                    # A0
-        mul    %r8                     # B0
-        mov    %rax, %rbp
-        mov    %rdx, %r9
-
-        mov    (+8*1)(%rcx), %rax                      # A1
-        mul    %r8                     # B0
-        add    %rax, %r9
-
-        mov    (%rcx), %rax                    # A0
-        mul    %rbx                    # B1
-        add    %rax, %r9
-
-        mov    %r9, (+8*1)(%rdi)
-       # end MUL_128x128t128
-
-        sub    \$`$K1-$M`, %rsi
-
-        mov    (%rcx), $X[6]
-        mov    (+8*1)(%rcx), $X[7]                     # r9:r8 = X2[1:0]
-
-        call   MULADD_128x512                  # args in rcx, rdi / rbp, rsi, r15-r8
-       # result in r9, r8, r15, r14, r13, r12, r11, r10, X2[1:0]
-
-       # load first half of m to rdx, rdi, rbx, rax
-       # moved this here for efficiency
-        mov    (+8*0)(%rsi), %rax
-        mov    (+8*1)(%rsi), %rbx
-        mov    (+8*2)(%rsi), %rdi
-        mov    (+8*3)(%rsi), %rdx
-
-       # continue with reduction
-        mov    (+$Reduce_Data_offset+$Carries_offset+$STACK_DEPTH)(%rsp), %rbp
-
-        add    (+8*8)(%rcx), $X[6]
-        adc    (+8*9)(%rcx), $X[7]
-
-       #accumulate the final carry to rbp
-        adc    %rbp, %rbp
-
-       # Add in overflow corrections: R = (X2>>128) += T[overflow]
-       # R = {r9, r8, r15, r14, ..., r10}
-        shl    \$3, %rbp
-        mov    (+$pData_offset+$STACK_DEPTH)(%rsp), %rcx                       # rsi -> Data (and points to T)
-        add    %rcx, %rbp                      # pT ; 512 bits, 8 qwords, spread out
-
-       # rsi will be used to generate a mask after the addition
-        xor    %rsi, %rsi
-
-        add    (+8*8*0)(%rbp), $X[0]
-        adc    (+8*8*1)(%rbp), $X[1]
-        adc    (+8*8*2)(%rbp), $X[2]
-        adc    (+8*8*3)(%rbp), $X[3]
-        adc    (+8*8*4)(%rbp), $X[4]
-        adc    (+8*8*5)(%rbp), $X[5]
-        adc    (+8*8*6)(%rbp), $X[6]
-        adc    (+8*8*7)(%rbp), $X[7]
-
-       # if there is a carry:  rsi = 0xFFFFFFFFFFFFFFFF
-       # if carry is clear:    rsi = 0x0000000000000000
-        sbb    \$0, %rsi
-
-       # if carry is clear, subtract 0. Otherwise, subtract 256 bits of m
-        and    %rsi, %rax
-        and    %rsi, %rbx
-        and    %rsi, %rdi
-        and    %rsi, %rdx
-
-        mov    \$1, %rbp
-        sub    %rax, $X[0]
-        sbb    %rbx, $X[1]
-        sbb    %rdi, $X[2]
-        sbb    %rdx, $X[3]
-
-       # if there is a borrow:         rbp = 0
-       # if there is no borrow:        rbp = 1
-       # this is used to save the borrows in between the first half and the 2nd half of the subtraction of m
-        sbb    \$0, %rbp
-
-       #load second half of m to rdx, rdi, rbx, rax
-
-        add    \$$M, %rcx
-        mov    (+8*4)(%rcx), %rax
-        mov    (+8*5)(%rcx), %rbx
-        mov    (+8*6)(%rcx), %rdi
-        mov    (+8*7)(%rcx), %rdx
-
-       # use the rsi mask as before
-       # if carry is clear, subtract 0. Otherwise, subtract 256 bits of m
-        and    %rsi, %rax
-        and    %rsi, %rbx
-        and    %rsi, %rdi
-        and    %rsi, %rdx
-
-       # if rbp = 0, there was a borrow before, it is moved to the carry flag
-       # if rbp = 1, there was not a borrow before, carry flag is cleared
-        sub    \$1, %rbp
-
-        sbb    %rax, $X[4]
-        sbb    %rbx, $X[5]
-        sbb    %rdi, $X[6]
-        sbb    %rdx, $X[7]
-
-       # write R back to memory
-
-        mov    (+$red_result_addr_offset+$STACK_DEPTH)(%rsp), %rsi
-        mov    $X[0], (+8*0)(%rsi)
-        mov    $X[1], (+8*1)(%rsi)
-        mov    $X[2], (+8*2)(%rsi)
-        mov    $X[3], (+8*3)(%rsi)
-        mov    $X[4], (+8*4)(%rsi)
-        mov    $X[5], (+8*5)(%rsi)
-        mov    $X[6], (+8*6)(%rsi)
-        mov    $X[7], (+8*7)(%rsi)
-
-        ret
-.size  mont_reduce,.-mont_reduce
-___
-}}}
-
-{{{
-#MUL_512x512   MACRO   pDst, pA, pB, x7, x6, x5, x4, x3, x2, x1, x0, tmp*2
-#
-# Inputs: pDst: Destination  (1024 bits, 16 qwords)
-#         pA:   Multiplicand (512 bits, 8 qwords)
-#         pB:   Multiplicand (512 bits, 8 qwords)
-# Uses registers rax, rdx, args
-#   B operand in [pB] and also in x7...x0
-sub MUL_512x512
-{
- my ($pDst, $pA, $pB, $x, $OP, $TMP, $pDst_o)=@_;
- my ($pDst,  $pDst_o) = ($pDst =~ m/([^+]*)\+?(.*)?/);
- my @X=@$x;    # make a copy
-
-$code.=<<___;
-        mov    (+8*0)($pA), $OP
-
-        mov    $X[0], %rax
-        mul    $OP                     # rdx:rax = %OP * [0]
-        mov    %rax, (+$pDst_o+8*0)($pDst)
-        mov    %rdx, $X[0]
-___
-for(my $i=1;$i<8;$i++) {
-$code.=<<___;
-        mov    $X[$i], %rax
-        mul    $OP                     # rdx:rax = %OP * [$i]
-        add    %rax, $X[$i-1]
-        adc    \$0, %rdx
-        mov    %rdx, $X[$i]
-___
-}
-
-for(my $i=1;$i<8;$i++) {
-$code.=<<___;
-        mov    (+8*$i)($pA), $OP
-___
-
-       &MULSTEP_512(\@X, "(+$pDst_o+8*$i)($pDst)", $pB, $OP, $TMP);
-       push(@X,shift(@X));
-}
-
-$code.=<<___;
-        mov    $X[0], (+$pDst_o+8*8)($pDst)
-        mov    $X[1], (+$pDst_o+8*9)($pDst)
-        mov    $X[2], (+$pDst_o+8*10)($pDst)
-        mov    $X[3], (+$pDst_o+8*11)($pDst)
-        mov    $X[4], (+$pDst_o+8*12)($pDst)
-        mov    $X[5], (+$pDst_o+8*13)($pDst)
-        mov    $X[6], (+$pDst_o+8*14)($pDst)
-        mov    $X[7], (+$pDst_o+8*15)($pDst)
-___
-}
-
-#
-# mont_mul_a3b : subroutine to compute (Src1 * Src2) % M (all 512-bits)
-# Input:  src1: Address of source 1: rdi
-#         src2: Address of source 2: rsi
-# Output: dst:  Address of destination: [red_res_addr]
-#    src2 and result also in: r9, r8, r15, r14, r13, r12, r11, r10
-# Temp:   Clobbers [tmp16], all registers
-$code.=<<___;
-.type  mont_mul_a3b,\@abi-omnipotent
-.align 16
-mont_mul_a3b:
-       #
-       # multiply tmp = src1 * src2
-       # For multiply: dst = rcx, src1 = rdi, src2 = rsi
-       # stack depth is extra 8 from call
-___
-       &MUL_512x512("%rsp+$tmp16_offset+8", "%rdi", "%rsi", [map("%r$_",(10..15,8..9))], "%rbp", "%rbx");
-$code.=<<___;
-       #
-       # Dst = tmp % m
-       # Call reduce(tmp, m, data, dst)
-
-       # tail recursion optimization: jmp to mont_reduce and return from there
-        jmp    mont_reduce
-       # call  mont_reduce
-       # ret
-.size  mont_mul_a3b,.-mont_mul_a3b
-___
-}}}
-
-{{{
-#SQR_512 MACRO pDest, pA, x7, x6, x5, x4, x3, x2, x1, x0, tmp*4
-#
-# Input in memory [pA] and also in x7...x0
-# Uses all argument registers plus rax and rdx
-#
-# This version computes all of the off-diagonal terms into memory,
-# and then it adds in the diagonal terms
-
-sub SQR_512
-{
- my ($pDst, $pA, $x, $A, $tmp, $x7, $x6, $pDst_o)=@_;
- my ($pDst,  $pDst_o) = ($pDst =~ m/([^+]*)\+?(.*)?/);
- my @X=@$x;    # make a copy
-$code.=<<___;
-       # ------------------
-       # first pass 01...07
-       # ------------------
-        mov    $X[0], $A
-
-        mov    $X[1],%rax
-        mul    $A
-        mov    %rax, (+$pDst_o+8*1)($pDst)
-___
-for(my $i=2;$i<8;$i++) {
-$code.=<<___;
-        mov    %rdx, $X[$i-2]
-        mov    $X[$i],%rax
-        mul    $A
-        add    %rax, $X[$i-2]
-        adc    \$0, %rdx
-___
-}
-$code.=<<___;
-        mov    %rdx, $x7
-
-        mov    $X[0], (+$pDst_o+8*2)($pDst)
-
-       # ------------------
-       # second pass 12...17
-       # ------------------
-
-        mov    (+8*1)($pA), $A
-
-        mov    (+8*2)($pA),%rax
-        mul    $A
-        add    %rax, $X[1]
-        adc    \$0, %rdx
-        mov    $X[1], (+$pDst_o+8*3)($pDst)
-
-        mov    %rdx, $X[0]
-        mov    (+8*3)($pA),%rax
-        mul    $A
-        add    %rax, $X[2]
-        adc    \$0, %rdx
-        add    $X[0], $X[2]
-        adc    \$0, %rdx
-        mov    $X[2], (+$pDst_o+8*4)($pDst)
-
-        mov    %rdx, $X[0]
-        mov    (+8*4)($pA),%rax
-        mul    $A
-        add    %rax, $X[3]
-        adc    \$0, %rdx
-        add    $X[0], $X[3]
-        adc    \$0, %rdx
-
-        mov    %rdx, $X[0]
-        mov    (+8*5)($pA),%rax
-        mul    $A
-        add    %rax, $X[4]
-        adc    \$0, %rdx
-        add    $X[0], $X[4]
-        adc    \$0, %rdx
-
-        mov    %rdx, $X[0]
-        mov    $X[6],%rax
-        mul    $A
-        add    %rax, $X[5]
-        adc    \$0, %rdx
-        add    $X[0], $X[5]
-        adc    \$0, %rdx
-
-        mov    %rdx, $X[0]
-        mov    $X[7],%rax
-        mul    $A
-        add    %rax, $x7
-        adc    \$0, %rdx
-        add    $X[0], $x7
-        adc    \$0, %rdx
-
-        mov    %rdx, $X[1]
-
-       # ------------------
-       # third pass 23...27
-       # ------------------
-        mov    (+8*2)($pA), $A
-
-        mov    (+8*3)($pA),%rax
-        mul    $A
-        add    %rax, $X[3]
-        adc    \$0, %rdx
-        mov    $X[3], (+$pDst_o+8*5)($pDst)
-
-        mov    %rdx, $X[0]
-        mov    (+8*4)($pA),%rax
-        mul    $A
-        add    %rax, $X[4]
-        adc    \$0, %rdx
-        add    $X[0], $X[4]
-        adc    \$0, %rdx
-        mov    $X[4], (+$pDst_o+8*6)($pDst)
-
-        mov    %rdx, $X[0]
-        mov    (+8*5)($pA),%rax
-        mul    $A
-        add    %rax, $X[5]
-        adc    \$0, %rdx
-        add    $X[0], $X[5]
-        adc    \$0, %rdx
-
-        mov    %rdx, $X[0]
-        mov    $X[6],%rax
-        mul    $A
-        add    %rax, $x7
-        adc    \$0, %rdx
-        add    $X[0], $x7
-        adc    \$0, %rdx
-
-        mov    %rdx, $X[0]
-        mov    $X[7],%rax
-        mul    $A
-        add    %rax, $X[1]
-        adc    \$0, %rdx
-        add    $X[0], $X[1]
-        adc    \$0, %rdx
-
-        mov    %rdx, $X[2]
-
-       # ------------------
-       # fourth pass 34...37
-       # ------------------
-
-        mov    (+8*3)($pA), $A
-
-        mov    (+8*4)($pA),%rax
-        mul    $A
-        add    %rax, $X[5]
-        adc    \$0, %rdx
-        mov    $X[5], (+$pDst_o+8*7)($pDst)
-
-        mov    %rdx, $X[0]
-        mov    (+8*5)($pA),%rax
-        mul    $A
-        add    %rax, $x7
-        adc    \$0, %rdx
-        add    $X[0], $x7
-        adc    \$0, %rdx
-        mov    $x7, (+$pDst_o+8*8)($pDst)
-
-        mov    %rdx, $X[0]
-        mov    $X[6],%rax
-        mul    $A
-        add    %rax, $X[1]
-        adc    \$0, %rdx
-        add    $X[0], $X[1]
-        adc    \$0, %rdx
-
-        mov    %rdx, $X[0]
-        mov    $X[7],%rax
-        mul    $A
-        add    %rax, $X[2]
-        adc    \$0, %rdx
-        add    $X[0], $X[2]
-        adc    \$0, %rdx
-
-        mov    %rdx, $X[5]
-
-       # ------------------
-       # fifth pass 45...47
-       # ------------------
-        mov    (+8*4)($pA), $A
-
-        mov    (+8*5)($pA),%rax
-        mul    $A
-        add    %rax, $X[1]
-        adc    \$0, %rdx
-        mov    $X[1], (+$pDst_o+8*9)($pDst)
-
-        mov    %rdx, $X[0]
-        mov    $X[6],%rax
-        mul    $A
-        add    %rax, $X[2]
-        adc    \$0, %rdx
-        add    $X[0], $X[2]
-        adc    \$0, %rdx
-        mov    $X[2], (+$pDst_o+8*10)($pDst)
-
-        mov    %rdx, $X[0]
-        mov    $X[7],%rax
-        mul    $A
-        add    %rax, $X[5]
-        adc    \$0, %rdx
-        add    $X[0], $X[5]
-        adc    \$0, %rdx
-
-        mov    %rdx, $X[1]
-
-       # ------------------
-       # sixth pass 56...57
-       # ------------------
-        mov    (+8*5)($pA), $A
-
-        mov    $X[6],%rax
-        mul    $A
-        add    %rax, $X[5]
-        adc    \$0, %rdx
-        mov    $X[5], (+$pDst_o+8*11)($pDst)
-
-        mov    %rdx, $X[0]
-        mov    $X[7],%rax
-        mul    $A
-        add    %rax, $X[1]
-        adc    \$0, %rdx
-        add    $X[0], $X[1]
-        adc    \$0, %rdx
-        mov    $X[1], (+$pDst_o+8*12)($pDst)
-
-        mov    %rdx, $X[2]
-
-       # ------------------
-       # seventh pass 67
-       # ------------------
-        mov    $X[6], $A
-
-        mov    $X[7],%rax
-        mul    $A
-        add    %rax, $X[2]
-        adc    \$0, %rdx
-        mov    $X[2], (+$pDst_o+8*13)($pDst)
-
-        mov    %rdx, (+$pDst_o+8*14)($pDst)
-
-       # start finalize (add   in squares, and double off-terms)
-        mov    (+$pDst_o+8*1)($pDst), $X[0]
-        mov    (+$pDst_o+8*2)($pDst), $X[1]
-        mov    (+$pDst_o+8*3)($pDst), $X[2]
-        mov    (+$pDst_o+8*4)($pDst), $X[3]
-        mov    (+$pDst_o+8*5)($pDst), $X[4]
-        mov    (+$pDst_o+8*6)($pDst), $X[5]
-
-        mov    (+8*3)($pA), %rax
-        mul    %rax
-        mov    %rax, $x6
-        mov    %rdx, $X[6]
-
-        add    $X[0], $X[0]
-        adc    $X[1], $X[1]
-        adc    $X[2], $X[2]
-        adc    $X[3], $X[3]
-        adc    $X[4], $X[4]
-        adc    $X[5], $X[5]
-        adc    \$0, $X[6]
-
-        mov    (+8*0)($pA), %rax
-        mul    %rax
-        mov    %rax, (+$pDst_o+8*0)($pDst)
-        mov    %rdx, $A
-
-        mov    (+8*1)($pA), %rax
-        mul    %rax
-
-        add    $A, $X[0]
-        adc    %rax, $X[1]
-        adc    \$0, %rdx
-
-        mov    %rdx, $A
-        mov    $X[0], (+$pDst_o+8*1)($pDst)
-        mov    $X[1], (+$pDst_o+8*2)($pDst)
-
-        mov    (+8*2)($pA), %rax
-        mul    %rax
-
-        add    $A, $X[2]
-        adc    %rax, $X[3]
-        adc    \$0, %rdx
-
-        mov    %rdx, $A
-
-        mov    $X[2], (+$pDst_o+8*3)($pDst)
-        mov    $X[3], (+$pDst_o+8*4)($pDst)
-
-        xor    $tmp, $tmp
-        add    $A, $X[4]
-        adc    $x6, $X[5]
-        adc    \$0, $tmp
-
-        mov    $X[4], (+$pDst_o+8*5)($pDst)
-        mov    $X[5], (+$pDst_o+8*6)($pDst)
-
-       # %%tmp has 0/1 in column 7
-       # %%A6 has a full value in column 7
-
-        mov    (+$pDst_o+8*7)($pDst), $X[0]
-        mov    (+$pDst_o+8*8)($pDst), $X[1]
-        mov    (+$pDst_o+8*9)($pDst), $X[2]
-        mov    (+$pDst_o+8*10)($pDst), $X[3]
-        mov    (+$pDst_o+8*11)($pDst), $X[4]
-        mov    (+$pDst_o+8*12)($pDst), $X[5]
-        mov    (+$pDst_o+8*13)($pDst), $x6
-        mov    (+$pDst_o+8*14)($pDst), $x7
-
-        mov    $X[7], %rax
-        mul    %rax
-        mov    %rax, $X[7]
-        mov    %rdx, $A
-
-        add    $X[0], $X[0]
-        adc    $X[1], $X[1]
-        adc    $X[2], $X[2]
-        adc    $X[3], $X[3]
-        adc    $X[4], $X[4]
-        adc    $X[5], $X[5]
-        adc    $x6, $x6
-        adc    $x7, $x7
-        adc    \$0, $A
-
-        add    $tmp, $X[0]
-
-        mov    (+8*4)($pA), %rax
-        mul    %rax
-
-        add    $X[6], $X[0]
-        adc    %rax, $X[1]
-        adc    \$0, %rdx
-
-        mov    %rdx, $tmp
-
-        mov    $X[0], (+$pDst_o+8*7)($pDst)
-        mov    $X[1], (+$pDst_o+8*8)($pDst)
-
-        mov    (+8*5)($pA), %rax
-        mul    %rax
-
-        add    $tmp, $X[2]
-        adc    %rax, $X[3]
-        adc    \$0, %rdx
-
-        mov    %rdx, $tmp
-
-        mov    $X[2], (+$pDst_o+8*9)($pDst)
-        mov    $X[3], (+$pDst_o+8*10)($pDst)
-
-        mov    (+8*6)($pA), %rax
-        mul    %rax
-
-        add    $tmp, $X[4]
-        adc    %rax, $X[5]
-        adc    \$0, %rdx
-
-        mov    $X[4], (+$pDst_o+8*11)($pDst)
-        mov    $X[5], (+$pDst_o+8*12)($pDst)
-
-        add    %rdx, $x6
-        adc    $X[7], $x7
-        adc    \$0, $A
-
-        mov    $x6, (+$pDst_o+8*13)($pDst)
-        mov    $x7, (+$pDst_o+8*14)($pDst)
-        mov    $A, (+$pDst_o+8*15)($pDst)
-___
-}
-
-#
-# sqr_reduce: subroutine to compute Result = reduce(Result * Result)
-#
-# input and result also in: r9, r8, r15, r14, r13, r12, r11, r10
-#
-$code.=<<___;
-.type  sqr_reduce,\@abi-omnipotent
-.align 16
-sqr_reduce:
-        mov    (+$pResult_offset+8)(%rsp), %rcx
-___
-       &SQR_512("%rsp+$tmp16_offset+8", "%rcx", [map("%r$_",(10..15,8..9))], "%rbx", "%rbp", "%rsi", "%rdi");
-$code.=<<___;
-       # tail recursion optimization: jmp to mont_reduce and return from there
-        jmp    mont_reduce
-       # call  mont_reduce
-       # ret
-.size  sqr_reduce,.-sqr_reduce
-___
-}}}
-
-#
-# MAIN FUNCTION
-#
-
-#mod_exp_512(UINT64 *result, /* 512 bits, 8 qwords */
-#           UINT64 *g,   /* 512 bits, 8 qwords */
-#           UINT64 *exp, /* 512 bits, 8 qwords */
-#           struct mod_ctx_512 *data)
-
-# window size = 5
-# table size = 2^5 = 32
-#table_entries equ     32
-#table_size    equ     table_entries * 8
-$code.=<<___;
-.globl mod_exp_512
-.type  mod_exp_512,\@function,4
-mod_exp_512:
-        push   %rbp
-        push   %rbx
-        push   %r12
-        push   %r13
-        push   %r14
-        push   %r15
-
-       # adjust stack down and then align it with cache boundary
-        mov    %rsp, %r8
-        sub    \$$mem_size, %rsp
-        and    \$-64, %rsp
-
-       # store previous stack pointer and arguments
-        mov    %r8, (+$rsp_offset)(%rsp)
-        mov    %rdi, (+$pResult_offset)(%rsp)
-        mov    %rsi, (+$pG_offset)(%rsp)
-        mov    %rcx, (+$pData_offset)(%rsp)
-.Lbody:
-       # transform g into montgomery space
-       # GT = reduce(g * C2) = reduce(g * (2^256))
-       # reduce expects to have the input in [tmp16]
-        pxor   %xmm4, %xmm4
-        movdqu (+16*0)(%rsi), %xmm0
-        movdqu (+16*1)(%rsi), %xmm1
-        movdqu (+16*2)(%rsi), %xmm2
-        movdqu (+16*3)(%rsi), %xmm3
-        movdqa %xmm4, (+$tmp16_offset+16*0)(%rsp)
-        movdqa %xmm4, (+$tmp16_offset+16*1)(%rsp)
-        movdqa %xmm4, (+$tmp16_offset+16*6)(%rsp)
-        movdqa %xmm4, (+$tmp16_offset+16*7)(%rsp)
-        movdqa %xmm0, (+$tmp16_offset+16*2)(%rsp)
-        movdqa %xmm1, (+$tmp16_offset+16*3)(%rsp)
-        movdqa %xmm2, (+$tmp16_offset+16*4)(%rsp)
-        movdqa %xmm3, (+$tmp16_offset+16*5)(%rsp)
-
-       # load pExp before rdx gets blown away
-        movdqu (+16*0)(%rdx), %xmm0
-        movdqu (+16*1)(%rdx), %xmm1
-        movdqu (+16*2)(%rdx), %xmm2
-        movdqu (+16*3)(%rdx), %xmm3
-
-        lea    (+$GT_offset)(%rsp), %rbx
-        mov    %rbx, (+$red_result_addr_offset)(%rsp)
-        call   mont_reduce
-
-       # Initialize tmp = C
-        lea    (+$tmp_offset)(%rsp), %rcx
-        xor    %rax, %rax
-        mov    %rax, (+8*0)(%rcx)
-        mov    %rax, (+8*1)(%rcx)
-        mov    %rax, (+8*3)(%rcx)
-        mov    %rax, (+8*4)(%rcx)
-        mov    %rax, (+8*5)(%rcx)
-        mov    %rax, (+8*6)(%rcx)
-        mov    %rax, (+8*7)(%rcx)
-        mov    %rax, (+$exp_offset+8*8)(%rsp)
-        movq   \$1, (+8*2)(%rcx)
-
-        lea    (+$garray_offset)(%rsp), %rbp
-        mov    %rcx, %rsi                      # pTmp
-        mov    %rbp, %rdi                      # Garray[][0]
-___
-
-       &swizzle("%rdi", "%rcx", "%rax", "%rbx");
-
-       # for (rax = 31; rax != 0; rax--) {
-       #     tmp = reduce(tmp * G)
-       #     swizzle(pg, tmp);
-       #     pg += 2; }
-$code.=<<___;
-        mov    \$31, %rax
-        mov    %rax, (+$i_offset)(%rsp)
-        mov    %rbp, (+$pg_offset)(%rsp)
-       # rsi -> pTmp
-        mov    %rsi, (+$red_result_addr_offset)(%rsp)
-        mov    (+8*0)(%rsi), %r10
-        mov    (+8*1)(%rsi), %r11
-        mov    (+8*2)(%rsi), %r12
-        mov    (+8*3)(%rsi), %r13
-        mov    (+8*4)(%rsi), %r14
-        mov    (+8*5)(%rsi), %r15
-        mov    (+8*6)(%rsi), %r8
-        mov    (+8*7)(%rsi), %r9
-init_loop:
-        lea    (+$GT_offset)(%rsp), %rdi
-        call   mont_mul_a3b
-        lea    (+$tmp_offset)(%rsp), %rsi
-        mov    (+$pg_offset)(%rsp), %rbp
-        add    \$2, %rbp
-        mov    %rbp, (+$pg_offset)(%rsp)
-        mov    %rsi, %rcx                      # rcx = rsi = addr of tmp
-___
-
-       &swizzle("%rbp", "%rcx", "%rax", "%rbx");
-$code.=<<___;
-        mov    (+$i_offset)(%rsp), %rax
-        sub    \$1, %rax
-        mov    %rax, (+$i_offset)(%rsp)
-        jne    init_loop
-
-       #
-       # Copy exponent onto stack
-        movdqa %xmm0, (+$exp_offset+16*0)(%rsp)
-        movdqa %xmm1, (+$exp_offset+16*1)(%rsp)
-        movdqa %xmm2, (+$exp_offset+16*2)(%rsp)
-        movdqa %xmm3, (+$exp_offset+16*3)(%rsp)
-
-
-       #
-       # Do exponentiation
-       # Initialize result to G[exp{511:507}]
-        mov    (+$exp_offset+62)(%rsp), %eax
-        mov    %rax, %rdx
-        shr    \$11, %rax
-        and    \$0x07FF, %edx
-        mov    %edx, (+$exp_offset+62)(%rsp)
-        lea    (+$garray_offset)(%rsp,%rax,2), %rsi
-        mov    (+$pResult_offset)(%rsp), %rdx
-___
-
-       &unswizzle("%rdx", "%rsi", "%rbp", "%rbx", "%rax");
-
-       #
-       # Loop variables
-       # rcx = [loop_idx] = index: 510-5 to 0 by 5
-$code.=<<___;
-        movq   \$505, (+$loop_idx_offset)(%rsp)
-
-        mov    (+$pResult_offset)(%rsp), %rcx
-        mov    %rcx, (+$red_result_addr_offset)(%rsp)
-        mov    (+8*0)(%rcx), %r10
-        mov    (+8*1)(%rcx), %r11
-        mov    (+8*2)(%rcx), %r12
-        mov    (+8*3)(%rcx), %r13
-        mov    (+8*4)(%rcx), %r14
-        mov    (+8*5)(%rcx), %r15
-        mov    (+8*6)(%rcx), %r8
-        mov    (+8*7)(%rcx), %r9
-        jmp    sqr_2
-
-main_loop_a3b:
-        call   sqr_reduce
-        call   sqr_reduce
-        call   sqr_reduce
-sqr_2:
-        call   sqr_reduce
-        call   sqr_reduce
-
-       #
-       # Do multiply, first look up proper value in Garray
-        mov    (+$loop_idx_offset)(%rsp), %rcx                 # bit index
-        mov    %rcx, %rax
-        shr    \$4, %rax                       # rax is word pointer
-        mov    (+$exp_offset)(%rsp,%rax,2), %edx
-        and    \$15, %rcx
-        shrq   %cl, %rdx
-        and    \$0x1F, %rdx
-
-        lea    (+$garray_offset)(%rsp,%rdx,2), %rsi
-        lea    (+$tmp_offset)(%rsp), %rdx
-        mov    %rdx, %rdi
-___
-
-       &unswizzle("%rdx", "%rsi", "%rbp", "%rbx", "%rax");
-       # rdi = tmp = pG
-
-       #
-       # Call mod_mul_a1(pDst,  pSrc1, pSrc2, pM, pData)
-       #                 result result pG     M   Data
-$code.=<<___;
-        mov    (+$pResult_offset)(%rsp), %rsi
-        call   mont_mul_a3b
-
-       #
-       # finish loop
-        mov    (+$loop_idx_offset)(%rsp), %rcx
-        sub    \$5, %rcx
-        mov    %rcx, (+$loop_idx_offset)(%rsp)
-        jge    main_loop_a3b
-
-       #
-
-end_main_loop_a3b:
-       # transform result out of Montgomery space
-       # result = reduce(result)
-        mov    (+$pResult_offset)(%rsp), %rdx
-        pxor   %xmm4, %xmm4
-        movdqu (+16*0)(%rdx), %xmm0
-        movdqu (+16*1)(%rdx), %xmm1
-        movdqu (+16*2)(%rdx), %xmm2
-        movdqu (+16*3)(%rdx), %xmm3
-        movdqa %xmm4, (+$tmp16_offset+16*4)(%rsp)
-        movdqa %xmm4, (+$tmp16_offset+16*5)(%rsp)
-        movdqa %xmm4, (+$tmp16_offset+16*6)(%rsp)
-        movdqa %xmm4, (+$tmp16_offset+16*7)(%rsp)
-        movdqa %xmm0, (+$tmp16_offset+16*0)(%rsp)
-        movdqa %xmm1, (+$tmp16_offset+16*1)(%rsp)
-        movdqa %xmm2, (+$tmp16_offset+16*2)(%rsp)
-        movdqa %xmm3, (+$tmp16_offset+16*3)(%rsp)
-        call   mont_reduce
-
-       # If result > m, subract m
-       # load result into r15:r8
-        mov    (+$pResult_offset)(%rsp), %rax
-        mov    (+8*0)(%rax), %r8
-        mov    (+8*1)(%rax), %r9
-        mov    (+8*2)(%rax), %r10
-        mov    (+8*3)(%rax), %r11
-        mov    (+8*4)(%rax), %r12
-        mov    (+8*5)(%rax), %r13
-        mov    (+8*6)(%rax), %r14
-        mov    (+8*7)(%rax), %r15
-
-       # subtract m
-        mov    (+$pData_offset)(%rsp), %rbx
-        add    \$$M, %rbx
-
-        sub    (+8*0)(%rbx), %r8
-        sbb    (+8*1)(%rbx), %r9
-        sbb    (+8*2)(%rbx), %r10
-        sbb    (+8*3)(%rbx), %r11
-        sbb    (+8*4)(%rbx), %r12
-        sbb    (+8*5)(%rbx), %r13
-        sbb    (+8*6)(%rbx), %r14
-        sbb    (+8*7)(%rbx), %r15
-
-       # if Carry is clear, replace result with difference
-        mov    (+8*0)(%rax), %rsi
-        mov    (+8*1)(%rax), %rdi
-        mov    (+8*2)(%rax), %rcx
-        mov    (+8*3)(%rax), %rdx
-        cmovnc %r8, %rsi
-        cmovnc %r9, %rdi
-        cmovnc %r10, %rcx
-        cmovnc %r11, %rdx
-        mov    %rsi, (+8*0)(%rax)
-        mov    %rdi, (+8*1)(%rax)
-        mov    %rcx, (+8*2)(%rax)
-        mov    %rdx, (+8*3)(%rax)
-
-        mov    (+8*4)(%rax), %rsi
-        mov    (+8*5)(%rax), %rdi
-        mov    (+8*6)(%rax), %rcx
-        mov    (+8*7)(%rax), %rdx
-        cmovnc %r12, %rsi
-        cmovnc %r13, %rdi
-        cmovnc %r14, %rcx
-        cmovnc %r15, %rdx
-        mov    %rsi, (+8*4)(%rax)
-        mov    %rdi, (+8*5)(%rax)
-        mov    %rcx, (+8*6)(%rax)
-        mov    %rdx, (+8*7)(%rax)
-
-        mov    (+$rsp_offset)(%rsp), %rsi
-        mov    0(%rsi),%r15
-        mov    8(%rsi),%r14
-        mov    16(%rsi),%r13
-        mov    24(%rsi),%r12
-        mov    32(%rsi),%rbx
-        mov    40(%rsi),%rbp
-        lea    48(%rsi),%rsp
-.Lepilogue:
-        ret
-.size mod_exp_512, . - mod_exp_512
-___
-
-if ($win64) {
-# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
-#              CONTEXT *context,DISPATCHER_CONTEXT *disp)
-my $rec="%rcx";
-my $frame="%rdx";
-my $context="%r8";
-my $disp="%r9";
-
-$code.=<<___;
-.extern        __imp_RtlVirtualUnwind
-.type  mod_exp_512_se_handler,\@abi-omnipotent
-.align 16
-mod_exp_512_se_handler:
-       push    %rsi
-       push    %rdi
-       push    %rbx
-       push    %rbp
-       push    %r12
-       push    %r13
-       push    %r14
-       push    %r15
-       pushfq
-       sub     \$64,%rsp
-
-       mov     120($context),%rax      # pull context->Rax
-       mov     248($context),%rbx      # pull context->Rip
-
-       lea     .Lbody(%rip),%r10
-       cmp     %r10,%rbx               # context->Rip<prologue label
-       jb      .Lin_prologue
-
-       mov     152($context),%rax      # pull context->Rsp
-
-       lea     .Lepilogue(%rip),%r10
-       cmp     %r10,%rbx               # context->Rip>=epilogue label
-       jae     .Lin_prologue
-
-       mov     $rsp_offset(%rax),%rax  # pull saved Rsp
-
-       mov     32(%rax),%rbx
-       mov     40(%rax),%rbp
-       mov     24(%rax),%r12
-       mov     16(%rax),%r13
-       mov     8(%rax),%r14
-       mov     0(%rax),%r15
-       lea     48(%rax),%rax
-       mov     %rbx,144($context)      # restore context->Rbx
-       mov     %rbp,160($context)      # restore context->Rbp
-       mov     %r12,216($context)      # restore context->R12
-       mov     %r13,224($context)      # restore context->R13
-       mov     %r14,232($context)      # restore context->R14
-       mov     %r15,240($context)      # restore context->R15
-
-.Lin_prologue:
-       mov     8(%rax),%rdi
-       mov     16(%rax),%rsi
-       mov     %rax,152($context)      # restore context->Rsp
-       mov     %rsi,168($context)      # restore context->Rsi
-       mov     %rdi,176($context)      # restore context->Rdi
-
-       mov     40($disp),%rdi          # disp->ContextRecord
-       mov     $context,%rsi           # context
-       mov     \$154,%ecx              # sizeof(CONTEXT)
-       .long   0xa548f3fc              # cld; rep movsq
-
-       mov     $disp,%rsi
-       xor     %rcx,%rcx               # arg1, UNW_FLAG_NHANDLER
-       mov     8(%rsi),%rdx            # arg2, disp->ImageBase
-       mov     0(%rsi),%r8             # arg3, disp->ControlPc
-       mov     16(%rsi),%r9            # arg4, disp->FunctionEntry
-       mov     40(%rsi),%r10           # disp->ContextRecord
-       lea     56(%rsi),%r11           # &disp->HandlerData
-       lea     24(%rsi),%r12           # &disp->EstablisherFrame
-       mov     %r10,32(%rsp)           # arg5
-       mov     %r11,40(%rsp)           # arg6
-       mov     %r12,48(%rsp)           # arg7
-       mov     %rcx,56(%rsp)           # arg8, (NULL)
-       call    *__imp_RtlVirtualUnwind(%rip)
-
-       mov     \$1,%eax                # ExceptionContinueSearch
-       add     \$64,%rsp
-       popfq
-       pop     %r15
-       pop     %r14
-       pop     %r13
-       pop     %r12
-       pop     %rbp
-       pop     %rbx
-       pop     %rdi
-       pop     %rsi
-       ret
-.size  mod_exp_512_se_handler,.-mod_exp_512_se_handler
-
-.section       .pdata
-.align 4
-       .rva    .LSEH_begin_mod_exp_512
-       .rva    .LSEH_end_mod_exp_512
-       .rva    .LSEH_info_mod_exp_512
-
-.section       .xdata
-.align 8
-.LSEH_info_mod_exp_512:
-       .byte   9,0,0,0
-       .rva    mod_exp_512_se_handler
-___
-}
-
-sub reg_part {
-my ($reg,$conv)=@_;
-    if ($reg =~ /%r[0-9]+/)    { $reg .= $conv; }
-    elsif ($conv eq "b")       { $reg =~ s/%[er]([^x]+)x?/%$1l/;       }
-    elsif ($conv eq "w")       { $reg =~ s/%[er](.+)/%$1/;             }
-    elsif ($conv eq "d")       { $reg =~ s/%[er](.+)/%e$1/;            }
-    return $reg;
-}
-
-$code =~ s/(%[a-z0-9]+)#([bwd])/reg_part($1,$2)/gem;
-$code =~ s/\`([^\`]*)\`/eval $1/gem;
-$code =~ s/(\(\+[^)]+\))/eval $1/gem;
-print $code;
-close STDOUT;
index f9b6992..da69c6a 100644 (file)
@@ -325,6 +325,7 @@ Lcopy:                              ; copy or in-place refresh
        .long   0
        .byte   0,12,4,0,0x80,12,6,0
        .long   0
+.size  .bn_mul_mont_int,.-.bn_mul_mont_int
 
 .asciz  "Montgomery Multiplication for PPC, CRYPTOGAMS by <appro\@openssl.org>"
 ___
index 1249ce2..04df1fe 100644 (file)
@@ -392,6 +392,7 @@ $data=<<EOF;
        .long   0
        .byte   0,12,0x14,0,0,0,2,0
        .long   0
+.size  .bn_sqr_comba4,.-.bn_sqr_comba4
 
 #
 #      NOTE:   The following label name should be changed to
@@ -819,6 +820,7 @@ $data=<<EOF;
        .long   0
        .byte   0,12,0x14,0,0,0,2,0
        .long   0
+.size  .bn_sqr_comba8,.-.bn_sqr_comba8
 
 #
 #      NOTE:   The following label name should be changed to
@@ -972,6 +974,7 @@ $data=<<EOF;
        .long   0
        .byte   0,12,0x14,0,0,0,3,0
        .long   0
+.size  .bn_mul_comba4,.-.bn_mul_comba4
 
 #
 #      NOTE:   The following label name should be changed to
@@ -1510,6 +1513,7 @@ $data=<<EOF;
        .long   0
        .byte   0,12,0x14,0,0,0,3,0
        .long   0
+.size  .bn_mul_comba8,.-.bn_mul_comba8
 
 #
 #      NOTE:   The following label name should be changed to
@@ -1560,6 +1564,7 @@ Lppcasm_sub_adios:
        .long   0
        .byte   0,12,0x14,0,0,0,4,0
        .long   0
+.size  .bn_sub_words,.-.bn_sub_words
 
 #
 #      NOTE:   The following label name should be changed to
@@ -1605,6 +1610,7 @@ Lppcasm_add_adios:
        .long   0
        .byte   0,12,0x14,0,0,0,4,0
        .long   0
+.size  .bn_add_words,.-.bn_add_words
 
 #
 #      NOTE:   The following label name should be changed to
@@ -1720,6 +1726,7 @@ Lppcasm_div9:
        .long   0
        .byte   0,12,0x14,0,0,0,3,0
        .long   0
+.size  .bn_div_words,.-.bn_div_words
 
 #
 #      NOTE:   The following label name should be changed to
@@ -1761,6 +1768,7 @@ Lppcasm_sqr_adios:
        .long   0
        .byte   0,12,0x14,0,0,0,3,0
        .long   0
+.size  .bn_sqr_words,.-.bn_sqr_words
 
 #
 #      NOTE:   The following label name should be changed to
@@ -1866,6 +1874,7 @@ Lppcasm_mw_OVER:
        .long   0
        .byte   0,12,0x14,0,0,0,4,0
        .long   0
+.size  bn_mul_words,.-bn_mul_words
 
 #
 #      NOTE:   The following label name should be changed to
@@ -1991,6 +2000,7 @@ Lppcasm_maw_adios:
        .long   0
        .byte   0,12,0x14,0,0,0,4,0
        .long   0
+.size  .bn_mul_add_words,.-.bn_mul_add_words
        .align  4
 EOF
 $data =~ s/\`([^\`]*)\`/eval $1/gem;
index a14e769..68e3733 100644 (file)
@@ -1,7 +1,7 @@
 #!/usr/bin/env perl
 
 # ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
 # details see http://www.openssl.org/~appro/cryptogams/.
 # others alternative would be to break dependence on upper halves of
 # GPRs by sticking to 32-bit integer operations...
 
+# December 2012
+
+# Remove above mentioned dependence on GPRs' upper halves in 32-bit
+# build. No signal masking overhead, but integer instructions are
+# *more* numerous... It's still "universally" faster than 32-bit
+# ppc-mont.pl, but improvement coefficient is not as impressive
+# for longer keys...
+
 $flavour = shift;
 
 if ($flavour =~ /32/) {
@@ -110,6 +118,9 @@ $tp="r10";
 $j="r11";
 $i="r12";
 # non-volatile registers
+$c1="r19";
+$n1="r20";
+$a1="r21";
 $nap_d="r22";  # interleaved ap and np in double format
 $a0="r23";     # ap[0]
 $t0="r24";     # temporary registers
@@ -180,8 +191,8 @@ $T3a="f30"; $T3b="f31";
 #              .                               .
 #              +-------------------------------+
 #              .                               .
-#   -12*size_t +-------------------------------+
-#              | 10 saved gpr, r22-r31         |
+#   -13*size_t +-------------------------------+
+#              | 13 saved gpr, r19-r31         |
 #              .                               .
 #              .                               .
 #   -12*8      +-------------------------------+
@@ -215,6 +226,9 @@ $code=<<___;
        mr      $i,$sp
        $STUX   $sp,$sp,$tp     ; alloca
 
+       $PUSH   r19,`-12*8-13*$SIZE_T`($i)
+       $PUSH   r20,`-12*8-12*$SIZE_T`($i)
+       $PUSH   r21,`-12*8-11*$SIZE_T`($i)
        $PUSH   r22,`-12*8-10*$SIZE_T`($i)
        $PUSH   r23,`-12*8-9*$SIZE_T`($i)
        $PUSH   r24,`-12*8-8*$SIZE_T`($i)
@@ -237,40 +251,26 @@ $code=<<___;
        stfd    f29,`-3*8`($i)
        stfd    f30,`-2*8`($i)
        stfd    f31,`-1*8`($i)
-___
-$code.=<<___ if ($SIZE_T==8);
-       ld      $a0,0($ap)      ; pull ap[0] value
-       ld      $n0,0($n0)      ; pull n0[0] value
-       ld      $t3,0($bp)      ; bp[0]
-___
-$code.=<<___ if ($SIZE_T==4);
-       mr      $t1,$n0
-       lwz     $a0,0($ap)      ; pull ap[0,1] value
-       lwz     $t0,4($ap)
-       lwz     $n0,0($t1)      ; pull n0[0,1] value
-       lwz     $t1,4($t1)
-       lwz     $t3,0($bp)      ; bp[0,1]
-       lwz     $t2,4($bp)
-       insrdi  $a0,$t0,32,0
-       insrdi  $n0,$t1,32,0
-       insrdi  $t3,$t2,32,0
-___
-$code.=<<___;
+
        addi    $tp,$sp,`$FRAME+$TRANSFER+8+64`
        li      $i,-64
        add     $nap_d,$tp,$num
        and     $nap_d,$nap_d,$i        ; align to 64 bytes
-\f
-       mulld   $t7,$a0,$t3     ; ap[0]*bp[0]
        ; nap_d is off by 1, because it's used with stfdu/lfdu
        addi    $nap_d,$nap_d,-8
        srwi    $j,$num,`3+1`   ; counter register, num/2
-       mulld   $t7,$t7,$n0     ; tp[0]*n0
        addi    $j,$j,-1
        addi    $tp,$sp,`$FRAME+$TRANSFER-8`
        li      $carry,0
        mtctr   $j
+___
+\f
+$code.=<<___ if ($SIZE_T==8);
+       ld      $a0,0($ap)              ; pull ap[0] value
+       ld      $t3,0($bp)              ; bp[0]
+       ld      $n0,0($n0)              ; pull n0[0] value
 
+       mulld   $t7,$a0,$t3             ; ap[0]*bp[0]
        ; transfer bp[0] to FPU as 4x16-bit values
        extrdi  $t0,$t3,16,48
        extrdi  $t1,$t3,16,32
@@ -280,6 +280,8 @@ $code.=<<___;
        std     $t1,`$FRAME+8`($sp)
        std     $t2,`$FRAME+16`($sp)
        std     $t3,`$FRAME+24`($sp)
+
+       mulld   $t7,$t7,$n0             ; tp[0]*n0
        ; transfer (ap[0]*bp[0])*n0 to FPU as 4x16-bit values
        extrdi  $t4,$t7,16,48
        extrdi  $t5,$t7,16,32
@@ -289,21 +291,61 @@ $code.=<<___;
        std     $t5,`$FRAME+40`($sp)
        std     $t6,`$FRAME+48`($sp)
        std     $t7,`$FRAME+56`($sp)
-___
-$code.=<<___ if ($SIZE_T==8);
-       lwz     $t0,4($ap)              ; load a[j] as 32-bit word pair
-       lwz     $t1,0($ap)
-       lwz     $t2,12($ap)             ; load a[j+1] as 32-bit word pair
+
+       extrdi  $t0,$a0,32,32           ; lwz   $t0,4($ap)
+       extrdi  $t1,$a0,32,0            ; lwz   $t1,0($ap)
+       lwz     $t2,12($ap)             ; load a[1] as 32-bit word pair
        lwz     $t3,8($ap)
-       lwz     $t4,4($np)              ; load n[j] as 32-bit word pair
+       lwz     $t4,4($np)              ; load n[0] as 32-bit word pair
        lwz     $t5,0($np)
-       lwz     $t6,12($np)             ; load n[j+1] as 32-bit word pair
+       lwz     $t6,12($np)             ; load n[1] as 32-bit word pair
        lwz     $t7,8($np)
 ___
 $code.=<<___ if ($SIZE_T==4);
-       lwz     $t0,0($ap)              ; load a[j..j+3] as 32-bit word pairs
-       lwz     $t1,4($ap)
-       lwz     $t2,8($ap)
+       lwz     $a0,0($ap)              ; pull ap[0,1] value
+       mr      $n1,$n0
+       lwz     $a1,4($ap)
+       li      $c1,0
+       lwz     $t1,0($bp)              ; bp[0,1]
+       lwz     $t3,4($bp)
+       lwz     $n0,0($n1)              ; pull n0[0,1] value
+       lwz     $n1,4($n1)
+
+       mullw   $t4,$a0,$t1             ; mulld ap[0]*bp[0]
+       mulhwu  $t5,$a0,$t1
+       mullw   $t6,$a1,$t1
+       mullw   $t7,$a0,$t3
+       add     $t5,$t5,$t6
+       add     $t5,$t5,$t7
+       ; transfer bp[0] to FPU as 4x16-bit values
+       extrwi  $t0,$t1,16,16
+       extrwi  $t1,$t1,16,0
+       extrwi  $t2,$t3,16,16
+       extrwi  $t3,$t3,16,0
+       std     $t0,`$FRAME+0`($sp)     ; yes, std in 32-bit build
+       std     $t1,`$FRAME+8`($sp)
+       std     $t2,`$FRAME+16`($sp)
+       std     $t3,`$FRAME+24`($sp)
+
+       mullw   $t0,$t4,$n0             ; mulld tp[0]*n0
+       mulhwu  $t1,$t4,$n0
+       mullw   $t2,$t5,$n0
+       mullw   $t3,$t4,$n1
+       add     $t1,$t1,$t2
+       add     $t1,$t1,$t3
+       ; transfer (ap[0]*bp[0])*n0 to FPU as 4x16-bit values
+       extrwi  $t4,$t0,16,16
+       extrwi  $t5,$t0,16,0
+       extrwi  $t6,$t1,16,16
+       extrwi  $t7,$t1,16,0
+       std     $t4,`$FRAME+32`($sp)    ; yes, std in 32-bit build
+       std     $t5,`$FRAME+40`($sp)
+       std     $t6,`$FRAME+48`($sp)
+       std     $t7,`$FRAME+56`($sp)
+
+       mr      $t0,$a0                 ; lwz   $t0,0($ap)
+       mr      $t1,$a1                 ; lwz   $t1,4($ap)
+       lwz     $t2,8($ap)              ; load a[j..j+3] as 32-bit word pairs
        lwz     $t3,12($ap)
        lwz     $t4,0($np)              ; load n[j..j+3] as 32-bit word pairs
        lwz     $t5,4($np)
@@ -319,7 +361,7 @@ $code.=<<___;
        lfd     $nb,`$FRAME+40`($sp)
        lfd     $nc,`$FRAME+48`($sp)
        lfd     $nd,`$FRAME+56`($sp)
-       std     $t0,`$FRAME+64`($sp)
+       std     $t0,`$FRAME+64`($sp)    ; yes, std even in 32-bit build
        std     $t1,`$FRAME+72`($sp)
        std     $t2,`$FRAME+80`($sp)
        std     $t3,`$FRAME+88`($sp)
@@ -441,7 +483,7 @@ $code.=<<___ if ($SIZE_T==4);
        lwz     $t7,12($np)
 ___
 $code.=<<___;
-       std     $t0,`$FRAME+64`($sp)
+       std     $t0,`$FRAME+64`($sp)    ; yes, std even in 32-bit build
        std     $t1,`$FRAME+72`($sp)
        std     $t2,`$FRAME+80`($sp)
        std     $t3,`$FRAME+88`($sp)
@@ -449,6 +491,9 @@ $code.=<<___;
        std     $t5,`$FRAME+104`($sp)
        std     $t6,`$FRAME+112`($sp)
        std     $t7,`$FRAME+120`($sp)
+___
+if ($SIZE_T==8 or $flavour =~ /osx/) {
+$code.=<<___;
        ld      $t0,`$FRAME+0`($sp)
        ld      $t1,`$FRAME+8`($sp)
        ld      $t2,`$FRAME+16`($sp)
@@ -457,6 +502,20 @@ $code.=<<___;
        ld      $t5,`$FRAME+40`($sp)
        ld      $t6,`$FRAME+48`($sp)
        ld      $t7,`$FRAME+56`($sp)
+___
+} else {
+$code.=<<___;
+       lwz     $t1,`$FRAME+0`($sp)
+       lwz     $t0,`$FRAME+4`($sp)
+       lwz     $t3,`$FRAME+8`($sp)
+       lwz     $t2,`$FRAME+12`($sp)
+       lwz     $t5,`$FRAME+16`($sp)
+       lwz     $t4,`$FRAME+20`($sp)
+       lwz     $t7,`$FRAME+24`($sp)
+       lwz     $t6,`$FRAME+28`($sp)
+___
+}
+$code.=<<___;
        lfd     $A0,`$FRAME+64`($sp)
        lfd     $A1,`$FRAME+72`($sp)
        lfd     $A2,`$FRAME+80`($sp)
@@ -488,7 +547,9 @@ $code.=<<___;
        fmadd   $T0b,$A0,$bb,$dotb
        stfd    $A2,24($nap_d)          ; save a[j+1] in double format
        stfd    $A3,32($nap_d)
-
+___
+if ($SIZE_T==8 or $flavour =~ /osx/) {
+$code.=<<___;
        fmadd   $T1a,$A0,$bc,$T1a
        fmadd   $T1b,$A0,$bd,$T1b
        fmadd   $T2a,$A1,$bc,$T2a
@@ -561,11 +622,123 @@ $code.=<<___;
        stfd    $T3b,`$FRAME+56`($sp)
         std    $t0,8($tp)              ; tp[j-1]
         stdu   $t4,16($tp)             ; tp[j]
+___
+} else {
+$code.=<<___;
+       fmadd   $T1a,$A0,$bc,$T1a
+       fmadd   $T1b,$A0,$bd,$T1b
+        addc   $t0,$t0,$carry
+        adde   $t1,$t1,$c1
+        srwi   $carry,$t0,16
+       fmadd   $T2a,$A1,$bc,$T2a
+       fmadd   $T2b,$A1,$bd,$T2b
+       stfd    $N0,40($nap_d)          ; save n[j] in double format
+       stfd    $N1,48($nap_d)
+        srwi   $c1,$t1,16
+        insrwi $carry,$t1,16,0
+       fmadd   $T3a,$A2,$bc,$T3a
+       fmadd   $T3b,$A2,$bd,$T3b
+        addc   $t2,$t2,$carry
+        adde   $t3,$t3,$c1
+        srwi   $carry,$t2,16
+       fmul    $dota,$A3,$bc
+       fmul    $dotb,$A3,$bd
+       stfd    $N2,56($nap_d)          ; save n[j+1] in double format
+       stfdu   $N3,64($nap_d)
+        insrwi $t0,$t2,16,0            ; 0..31 bits
+        srwi   $c1,$t3,16
+        insrwi $carry,$t3,16,0
+
+       fmadd   $T1a,$N1,$na,$T1a
+       fmadd   $T1b,$N1,$nb,$T1b
+        lwz    $t3,`$FRAME+32`($sp)    ; permuted $t1
+        lwz    $t2,`$FRAME+36`($sp)    ; permuted $t0
+        addc   $t4,$t4,$carry
+        adde   $t5,$t5,$c1
+        srwi   $carry,$t4,16
+       fmadd   $T2a,$N2,$na,$T2a
+       fmadd   $T2b,$N2,$nb,$T2b
+        srwi   $c1,$t5,16
+        insrwi $carry,$t5,16,0
+       fmadd   $T3a,$N3,$na,$T3a
+       fmadd   $T3b,$N3,$nb,$T3b
+        addc   $t6,$t6,$carry
+        adde   $t7,$t7,$c1
+        srwi   $carry,$t6,16
+       fmadd   $T0a,$N0,$na,$T0a
+       fmadd   $T0b,$N0,$nb,$T0b
+        insrwi $t4,$t6,16,0            ; 32..63 bits
+        srwi   $c1,$t7,16
+        insrwi $carry,$t7,16,0
+
+       fmadd   $T1a,$N0,$nc,$T1a
+       fmadd   $T1b,$N0,$nd,$T1b
+        lwz    $t7,`$FRAME+40`($sp)    ; permuted $t3
+        lwz    $t6,`$FRAME+44`($sp)    ; permuted $t2
+        addc   $t2,$t2,$carry
+        adde   $t3,$t3,$c1
+        srwi   $carry,$t2,16
+       fmadd   $T2a,$N1,$nc,$T2a
+       fmadd   $T2b,$N1,$nd,$T2b
+        stw    $t0,12($tp)             ; tp[j-1]
+        stw    $t4,8($tp)
+        srwi   $c1,$t3,16
+        insrwi $carry,$t3,16,0
+       fmadd   $T3a,$N2,$nc,$T3a
+       fmadd   $T3b,$N2,$nd,$T3b
+        lwz    $t1,`$FRAME+48`($sp)    ; permuted $t5
+        lwz    $t0,`$FRAME+52`($sp)    ; permuted $t4
+        addc   $t6,$t6,$carry
+        adde   $t7,$t7,$c1
+        srwi   $carry,$t6,16
+       fmadd   $dota,$N3,$nc,$dota
+       fmadd   $dotb,$N3,$nd,$dotb
+        insrwi $t2,$t6,16,0            ; 64..95 bits
+        srwi   $c1,$t7,16
+        insrwi $carry,$t7,16,0
+
+       fctid   $T0a,$T0a
+       fctid   $T0b,$T0b
+        lwz    $t5,`$FRAME+56`($sp)    ; permuted $t7
+        lwz    $t4,`$FRAME+60`($sp)    ; permuted $t6
+        addc   $t0,$t0,$carry
+        adde   $t1,$t1,$c1
+        srwi   $carry,$t0,16
+       fctid   $T1a,$T1a
+       fctid   $T1b,$T1b
+        srwi   $c1,$t1,16
+        insrwi $carry,$t1,16,0
+       fctid   $T2a,$T2a
+       fctid   $T2b,$T2b
+        addc   $t4,$t4,$carry
+        adde   $t5,$t5,$c1
+        srwi   $carry,$t4,16
+       fctid   $T3a,$T3a
+       fctid   $T3b,$T3b
+        insrwi $t0,$t4,16,0            ; 96..127 bits
+        srwi   $c1,$t5,16
+        insrwi $carry,$t5,16,0
+
+       stfd    $T0a,`$FRAME+0`($sp)
+       stfd    $T0b,`$FRAME+8`($sp)
+       stfd    $T1a,`$FRAME+16`($sp)
+       stfd    $T1b,`$FRAME+24`($sp)
+       stfd    $T2a,`$FRAME+32`($sp)
+       stfd    $T2b,`$FRAME+40`($sp)
+       stfd    $T3a,`$FRAME+48`($sp)
+       stfd    $T3b,`$FRAME+56`($sp)
+        stw    $t2,20($tp)             ; tp[j]
+        stwu   $t0,16($tp)
+___
+}
+$code.=<<___;
        bdnz-   L1st
 \f
        fctid   $dota,$dota
        fctid   $dotb,$dotb
-
+___
+if ($SIZE_T==8 or $flavour =~ /osx/) {
+$code.=<<___;
        ld      $t0,`$FRAME+0`($sp)
        ld      $t1,`$FRAME+8`($sp)
        ld      $t2,`$FRAME+16`($sp)
@@ -611,33 +784,117 @@ $code.=<<___;
        insrdi  $t6,$t7,48,0
        srdi    $ovf,$t7,48
        std     $t6,8($tp)              ; tp[num-1]
+___
+} else {
+$code.=<<___;
+       lwz     $t1,`$FRAME+0`($sp)
+       lwz     $t0,`$FRAME+4`($sp)
+       lwz     $t3,`$FRAME+8`($sp)
+       lwz     $t2,`$FRAME+12`($sp)
+       lwz     $t5,`$FRAME+16`($sp)
+       lwz     $t4,`$FRAME+20`($sp)
+       lwz     $t7,`$FRAME+24`($sp)
+       lwz     $t6,`$FRAME+28`($sp)
+       stfd    $dota,`$FRAME+64`($sp)
+       stfd    $dotb,`$FRAME+72`($sp)
 
+       addc    $t0,$t0,$carry
+       adde    $t1,$t1,$c1
+       srwi    $carry,$t0,16
+       insrwi  $carry,$t1,16,0
+       srwi    $c1,$t1,16
+       addc    $t2,$t2,$carry
+       adde    $t3,$t3,$c1
+       srwi    $carry,$t2,16
+        insrwi $t0,$t2,16,0            ; 0..31 bits
+       insrwi  $carry,$t3,16,0
+       srwi    $c1,$t3,16
+       addc    $t4,$t4,$carry
+       adde    $t5,$t5,$c1
+       srwi    $carry,$t4,16
+       insrwi  $carry,$t5,16,0
+       srwi    $c1,$t5,16
+       addc    $t6,$t6,$carry
+       adde    $t7,$t7,$c1
+       srwi    $carry,$t6,16
+        insrwi $t4,$t6,16,0            ; 32..63 bits
+       insrwi  $carry,$t7,16,0
+       srwi    $c1,$t7,16
+        stw    $t0,12($tp)             ; tp[j-1]
+        stw    $t4,8($tp)
+
+       lwz     $t3,`$FRAME+32`($sp)    ; permuted $t1
+       lwz     $t2,`$FRAME+36`($sp)    ; permuted $t0
+       lwz     $t7,`$FRAME+40`($sp)    ; permuted $t3
+       lwz     $t6,`$FRAME+44`($sp)    ; permuted $t2
+       lwz     $t1,`$FRAME+48`($sp)    ; permuted $t5
+       lwz     $t0,`$FRAME+52`($sp)    ; permuted $t4
+       lwz     $t5,`$FRAME+56`($sp)    ; permuted $t7
+       lwz     $t4,`$FRAME+60`($sp)    ; permuted $t6
+
+       addc    $t2,$t2,$carry
+       adde    $t3,$t3,$c1
+       srwi    $carry,$t2,16
+       insrwi  $carry,$t3,16,0
+       srwi    $c1,$t3,16
+       addc    $t6,$t6,$carry
+       adde    $t7,$t7,$c1
+       srwi    $carry,$t6,16
+        insrwi $t2,$t6,16,0            ; 64..95 bits
+       insrwi  $carry,$t7,16,0
+       srwi    $c1,$t7,16
+       addc    $t0,$t0,$carry
+       adde    $t1,$t1,$c1
+       srwi    $carry,$t0,16
+       insrwi  $carry,$t1,16,0
+       srwi    $c1,$t1,16
+       addc    $t4,$t4,$carry
+       adde    $t5,$t5,$c1
+       srwi    $carry,$t4,16
+        insrwi $t0,$t4,16,0            ; 96..127 bits
+       insrwi  $carry,$t5,16,0
+       srwi    $c1,$t5,16
+        stw    $t2,20($tp)             ; tp[j]
+        stwu   $t0,16($tp)
+
+       lwz     $t7,`$FRAME+64`($sp)
+       lwz     $t6,`$FRAME+68`($sp)
+       lwz     $t5,`$FRAME+72`($sp)
+       lwz     $t4,`$FRAME+76`($sp)
+
+       addc    $t6,$t6,$carry
+       adde    $t7,$t7,$c1
+       srwi    $carry,$t6,16
+       insrwi  $carry,$t7,16,0
+       srwi    $c1,$t7,16
+       addc    $t4,$t4,$carry
+       adde    $t5,$t5,$c1
+
+       insrwi  $t6,$t4,16,0
+       srwi    $t4,$t4,16
+       insrwi  $t4,$t5,16,0
+       srwi    $ovf,$t5,16
+       stw     $t6,12($tp)             ; tp[num-1]
+       stw     $t4,8($tp)
+___
+}
+$code.=<<___;
        slwi    $t7,$num,2
        subf    $nap_d,$t7,$nap_d       ; rewind pointer
 \f
        li      $i,8                    ; i=1
 .align 5
 Louter:
-___
-$code.=<<___ if ($SIZE_T==8);
-       ldx     $t3,$bp,$i      ; bp[i]
-___
-$code.=<<___ if ($SIZE_T==4);
-       add     $t0,$bp,$i
-       lwz     $t3,0($t0)              ; bp[i,i+1]
-       lwz     $t0,4($t0)
-       insrdi  $t3,$t0,32,0
-___
-$code.=<<___;
-       ld      $t6,`$FRAME+$TRANSFER+8`($sp)   ; tp[0]
-       mulld   $t7,$a0,$t3     ; ap[0]*bp[i]
-
        addi    $tp,$sp,`$FRAME+$TRANSFER`
-       add     $t7,$t7,$t6     ; ap[0]*bp[i]+tp[0]
        li      $carry,0
-       mulld   $t7,$t7,$n0     ; tp[0]*n0
        mtctr   $j
+___
+$code.=<<___ if ($SIZE_T==8);
+       ldx     $t3,$bp,$i              ; bp[i]
 
+       ld      $t6,`$FRAME+$TRANSFER+8`($sp)   ; tp[0]
+       mulld   $t7,$a0,$t3             ; ap[0]*bp[i]
+       add     $t7,$t7,$t6             ; ap[0]*bp[i]+tp[0]
        ; transfer bp[i] to FPU as 4x16-bit values
        extrdi  $t0,$t3,16,48
        extrdi  $t1,$t3,16,32
@@ -647,6 +904,8 @@ $code.=<<___;
        std     $t1,`$FRAME+8`($sp)
        std     $t2,`$FRAME+16`($sp)
        std     $t3,`$FRAME+24`($sp)
+
+       mulld   $t7,$t7,$n0             ; tp[0]*n0
        ; transfer (ap[0]*bp[i]+tp[0])*n0 to FPU as 4x16-bit values
        extrdi  $t4,$t7,16,48
        extrdi  $t5,$t7,16,32
@@ -656,7 +915,50 @@ $code.=<<___;
        std     $t5,`$FRAME+40`($sp)
        std     $t6,`$FRAME+48`($sp)
        std     $t7,`$FRAME+56`($sp)
+___
+$code.=<<___ if ($SIZE_T==4);
+       add     $t0,$bp,$i
+       li      $c1,0
+       lwz     $t1,0($t0)              ; bp[i,i+1]
+       lwz     $t3,4($t0)
+
+       mullw   $t4,$a0,$t1             ; ap[0]*bp[i]
+       lwz     $t0,`$FRAME+$TRANSFER+8+4`($sp) ; tp[0]
+       mulhwu  $t5,$a0,$t1
+       lwz     $t2,`$FRAME+$TRANSFER+8`($sp)   ; tp[0]
+       mullw   $t6,$a1,$t1
+       mullw   $t7,$a0,$t3
+       add     $t5,$t5,$t6
+       add     $t5,$t5,$t7
+       addc    $t4,$t4,$t0             ; ap[0]*bp[i]+tp[0]
+       adde    $t5,$t5,$t2
+       ; transfer bp[i] to FPU as 4x16-bit values
+       extrwi  $t0,$t1,16,16
+       extrwi  $t1,$t1,16,0
+       extrwi  $t2,$t3,16,16
+       extrwi  $t3,$t3,16,0
+       std     $t0,`$FRAME+0`($sp)     ; yes, std in 32-bit build
+       std     $t1,`$FRAME+8`($sp)
+       std     $t2,`$FRAME+16`($sp)
+       std     $t3,`$FRAME+24`($sp)
 
+       mullw   $t0,$t4,$n0             ; mulld tp[0]*n0
+       mulhwu  $t1,$t4,$n0
+       mullw   $t2,$t5,$n0
+       mullw   $t3,$t4,$n1
+       add     $t1,$t1,$t2
+       add     $t1,$t1,$t3
+       ; transfer (ap[0]*bp[i]+tp[0])*n0 to FPU as 4x16-bit values
+       extrwi  $t4,$t0,16,16
+       extrwi  $t5,$t0,16,0
+       extrwi  $t6,$t1,16,16
+       extrwi  $t7,$t1,16,0
+       std     $t4,`$FRAME+32`($sp)    ; yes, std in 32-bit build
+       std     $t5,`$FRAME+40`($sp)
+       std     $t6,`$FRAME+48`($sp)
+       std     $t7,`$FRAME+56`($sp)
+___
+$code.=<<___;
        lfd     $A0,8($nap_d)           ; load a[j] in double format
        lfd     $A1,16($nap_d)
        lfd     $A2,24($nap_d)          ; load a[j+1] in double format
@@ -769,7 +1071,9 @@ Linner:
        fmul    $dotb,$A3,$bd
         lfd    $A2,24($nap_d)          ; load a[j+1] in double format
         lfd    $A3,32($nap_d)
-
+___
+if ($SIZE_T==8 or $flavour =~ /osx/) {
+$code.=<<___;
        fmadd   $T1a,$N1,$na,$T1a
        fmadd   $T1b,$N1,$nb,$T1b
         ld     $t0,`$FRAME+0`($sp)
@@ -856,10 +1160,131 @@ $code.=<<___;
         addze  $carry,$carry
         std    $t3,-16($tp)            ; tp[j-1]
         std    $t5,-8($tp)             ; tp[j]
+___
+} else {
+$code.=<<___;
+       fmadd   $T1a,$N1,$na,$T1a
+       fmadd   $T1b,$N1,$nb,$T1b
+        lwz    $t1,`$FRAME+0`($sp)
+        lwz    $t0,`$FRAME+4`($sp)
+       fmadd   $T2a,$N2,$na,$T2a
+       fmadd   $T2b,$N2,$nb,$T2b
+        lwz    $t3,`$FRAME+8`($sp)
+        lwz    $t2,`$FRAME+12`($sp)
+       fmadd   $T3a,$N3,$na,$T3a
+       fmadd   $T3b,$N3,$nb,$T3b
+        lwz    $t5,`$FRAME+16`($sp)
+        lwz    $t4,`$FRAME+20`($sp)
+        addc   $t0,$t0,$carry
+        adde   $t1,$t1,$c1
+        srwi   $carry,$t0,16
+       fmadd   $T0a,$N0,$na,$T0a
+       fmadd   $T0b,$N0,$nb,$T0b
+        lwz    $t7,`$FRAME+24`($sp)
+        lwz    $t6,`$FRAME+28`($sp)
+        srwi   $c1,$t1,16
+        insrwi $carry,$t1,16,0
+
+       fmadd   $T1a,$N0,$nc,$T1a
+       fmadd   $T1b,$N0,$nd,$T1b
+        addc   $t2,$t2,$carry
+        adde   $t3,$t3,$c1
+        srwi   $carry,$t2,16
+       fmadd   $T2a,$N1,$nc,$T2a
+       fmadd   $T2b,$N1,$nd,$T2b
+        insrwi $t0,$t2,16,0            ; 0..31 bits
+        srwi   $c1,$t3,16
+        insrwi $carry,$t3,16,0
+       fmadd   $T3a,$N2,$nc,$T3a
+       fmadd   $T3b,$N2,$nd,$T3b
+        lwz    $t2,12($tp)             ; tp[j]
+        lwz    $t3,8($tp)
+        addc   $t4,$t4,$carry
+        adde   $t5,$t5,$c1
+        srwi   $carry,$t4,16
+       fmadd   $dota,$N3,$nc,$dota
+       fmadd   $dotb,$N3,$nd,$dotb
+        srwi   $c1,$t5,16
+        insrwi $carry,$t5,16,0
+
+       fctid   $T0a,$T0a
+        addc   $t6,$t6,$carry
+        adde   $t7,$t7,$c1
+        srwi   $carry,$t6,16
+       fctid   $T0b,$T0b
+        insrwi $t4,$t6,16,0            ; 32..63 bits
+        srwi   $c1,$t7,16
+        insrwi $carry,$t7,16,0
+       fctid   $T1a,$T1a
+        addc   $t0,$t0,$t2
+        adde   $t4,$t4,$t3
+        lwz    $t3,`$FRAME+32`($sp)    ; permuted $t1
+        lwz    $t2,`$FRAME+36`($sp)    ; permuted $t0
+       fctid   $T1b,$T1b
+        addze  $carry,$carry
+        addze  $c1,$c1
+        stw    $t0,4($tp)              ; tp[j-1]
+        stw    $t4,0($tp)
+       fctid   $T2a,$T2a
+        addc   $t2,$t2,$carry
+        adde   $t3,$t3,$c1
+        srwi   $carry,$t2,16
+        lwz    $t7,`$FRAME+40`($sp)    ; permuted $t3
+        lwz    $t6,`$FRAME+44`($sp)    ; permuted $t2
+       fctid   $T2b,$T2b
+        srwi   $c1,$t3,16
+        insrwi $carry,$t3,16,0
+        lwz    $t1,`$FRAME+48`($sp)    ; permuted $t5
+        lwz    $t0,`$FRAME+52`($sp)    ; permuted $t4
+       fctid   $T3a,$T3a
+        addc   $t6,$t6,$carry
+        adde   $t7,$t7,$c1
+        srwi   $carry,$t6,16
+        lwz    $t5,`$FRAME+56`($sp)    ; permuted $t7
+        lwz    $t4,`$FRAME+60`($sp)    ; permuted $t6
+       fctid   $T3b,$T3b
+
+        insrwi $t2,$t6,16,0            ; 64..95 bits
+       insrwi  $carry,$t7,16,0
+       srwi    $c1,$t7,16
+        lwz    $t6,20($tp)
+        lwzu   $t7,16($tp)
+       addc    $t0,$t0,$carry
+        stfd   $T0a,`$FRAME+0`($sp)
+       adde    $t1,$t1,$c1
+       srwi    $carry,$t0,16
+        stfd   $T0b,`$FRAME+8`($sp)
+       insrwi  $carry,$t1,16,0
+       srwi    $c1,$t1,16
+       addc    $t4,$t4,$carry
+        stfd   $T1a,`$FRAME+16`($sp)
+       adde    $t5,$t5,$c1
+       srwi    $carry,$t4,16
+        insrwi $t0,$t4,16,0            ; 96..127 bits
+        stfd   $T1b,`$FRAME+24`($sp)
+       insrwi  $carry,$t5,16,0
+       srwi    $c1,$t5,16
+
+       addc    $t2,$t2,$t6
+        stfd   $T2a,`$FRAME+32`($sp)
+       adde    $t0,$t0,$t7
+        stfd   $T2b,`$FRAME+40`($sp)
+       addze   $carry,$carry
+        stfd   $T3a,`$FRAME+48`($sp)
+       addze   $c1,$c1
+        stfd   $T3b,`$FRAME+56`($sp)
+        stw    $t2,-4($tp)             ; tp[j]
+        stw    $t0,-8($tp)
+___
+}
+$code.=<<___;
        bdnz-   Linner
 \f
        fctid   $dota,$dota
        fctid   $dotb,$dotb
+___
+if ($SIZE_T==8 or $flavour =~ /osx/) {
+$code.=<<___;
        ld      $t0,`$FRAME+0`($sp)
        ld      $t1,`$FRAME+8`($sp)
        ld      $t2,`$FRAME+16`($sp)
@@ -926,7 +1351,116 @@ $code.=<<___;
        insrdi  $t6,$t7,48,0
        srdi    $ovf,$t7,48
        std     $t6,0($tp)              ; tp[num-1]
+___
+} else {
+$code.=<<___;
+       lwz     $t1,`$FRAME+0`($sp)
+       lwz     $t0,`$FRAME+4`($sp)
+       lwz     $t3,`$FRAME+8`($sp)
+       lwz     $t2,`$FRAME+12`($sp)
+       lwz     $t5,`$FRAME+16`($sp)
+       lwz     $t4,`$FRAME+20`($sp)
+       lwz     $t7,`$FRAME+24`($sp)
+       lwz     $t6,`$FRAME+28`($sp)
+       stfd    $dota,`$FRAME+64`($sp)
+       stfd    $dotb,`$FRAME+72`($sp)
 
+       addc    $t0,$t0,$carry
+       adde    $t1,$t1,$c1
+       srwi    $carry,$t0,16
+       insrwi  $carry,$t1,16,0
+       srwi    $c1,$t1,16
+       addc    $t2,$t2,$carry
+       adde    $t3,$t3,$c1
+       srwi    $carry,$t2,16
+        insrwi $t0,$t2,16,0            ; 0..31 bits
+        lwz    $t2,12($tp)             ; tp[j]
+       insrwi  $carry,$t3,16,0
+       srwi    $c1,$t3,16
+        lwz    $t3,8($tp)
+       addc    $t4,$t4,$carry
+       adde    $t5,$t5,$c1
+       srwi    $carry,$t4,16
+       insrwi  $carry,$t5,16,0
+       srwi    $c1,$t5,16
+       addc    $t6,$t6,$carry
+       adde    $t7,$t7,$c1
+       srwi    $carry,$t6,16
+        insrwi $t4,$t6,16,0            ; 32..63 bits
+       insrwi  $carry,$t7,16,0
+       srwi    $c1,$t7,16
+
+       addc    $t0,$t0,$t2
+       adde    $t4,$t4,$t3
+       addze   $carry,$carry
+       addze   $c1,$c1
+        stw    $t0,4($tp)              ; tp[j-1]
+        stw    $t4,0($tp)
+
+       lwz     $t3,`$FRAME+32`($sp)    ; permuted $t1
+       lwz     $t2,`$FRAME+36`($sp)    ; permuted $t0
+       lwz     $t7,`$FRAME+40`($sp)    ; permuted $t3
+       lwz     $t6,`$FRAME+44`($sp)    ; permuted $t2
+       lwz     $t1,`$FRAME+48`($sp)    ; permuted $t5
+       lwz     $t0,`$FRAME+52`($sp)    ; permuted $t4
+       lwz     $t5,`$FRAME+56`($sp)    ; permuted $t7
+       lwz     $t4,`$FRAME+60`($sp)    ; permuted $t6
+
+       addc    $t2,$t2,$carry
+       adde    $t3,$t3,$c1
+       srwi    $carry,$t2,16
+       insrwi  $carry,$t3,16,0
+       srwi    $c1,$t3,16
+       addc    $t6,$t6,$carry
+       adde    $t7,$t7,$c1
+       srwi    $carry,$t6,16
+        insrwi $t2,$t6,16,0            ; 64..95 bits
+        lwz    $t6,20($tp)
+       insrwi  $carry,$t7,16,0
+       srwi    $c1,$t7,16
+        lwzu   $t7,16($tp)
+       addc    $t0,$t0,$carry
+       adde    $t1,$t1,$c1
+       srwi    $carry,$t0,16
+       insrwi  $carry,$t1,16,0
+       srwi    $c1,$t1,16
+       addc    $t4,$t4,$carry
+       adde    $t5,$t5,$c1
+       srwi    $carry,$t4,16
+        insrwi $t0,$t4,16,0            ; 96..127 bits
+       insrwi  $carry,$t5,16,0
+       srwi    $c1,$t5,16
+
+       addc    $t2,$t2,$t6
+       adde    $t0,$t0,$t7
+        lwz    $t7,`$FRAME+64`($sp)
+        lwz    $t6,`$FRAME+68`($sp)
+       addze   $carry,$carry
+       addze   $c1,$c1
+        lwz    $t5,`$FRAME+72`($sp)
+        lwz    $t4,`$FRAME+76`($sp)
+
+       addc    $t6,$t6,$carry
+       adde    $t7,$t7,$c1
+        stw    $t2,-4($tp)             ; tp[j]
+        stw    $t0,-8($tp)
+       addc    $t6,$t6,$ovf
+       addze   $t7,$t7
+       srwi    $carry,$t6,16
+       insrwi  $carry,$t7,16,0
+       srwi    $c1,$t7,16
+       addc    $t4,$t4,$carry
+       adde    $t5,$t5,$c1
+
+       insrwi  $t6,$t4,16,0
+       srwi    $t4,$t4,16
+       insrwi  $t4,$t5,16,0
+       srwi    $ovf,$t5,16
+       stw     $t6,4($tp)              ; tp[num-1]
+       stw     $t4,0($tp)
+___
+}
+$code.=<<___;
        slwi    $t7,$num,2
        addi    $i,$i,8
        subf    $nap_d,$t7,$nap_d       ; rewind pointer
@@ -994,14 +1528,14 @@ $code.=<<___ if ($SIZE_T==4);
        mtctr   $j
 
 .align 4
-Lsub:  ld      $t0,8($tp)      ; load tp[j..j+3] in 64-bit word order
-       ldu     $t2,16($tp)
+Lsub:  lwz     $t0,12($tp)     ; load tp[j..j+3] in 64-bit word order
+       lwz     $t1,8($tp)
+       lwz     $t2,20($tp)
+       lwzu    $t3,16($tp)
        lwz     $t4,4($np)      ; load np[j..j+3] in 32-bit word order
        lwz     $t5,8($np)
        lwz     $t6,12($np)
        lwzu    $t7,16($np)
-       extrdi  $t1,$t0,32,0
-       extrdi  $t3,$t2,32,0
        subfe   $t4,$t4,$t0     ; tp[j]-np[j]
         stw    $t0,4($ap)      ; save tp[j..j+3] in 32-bit word order
        subfe   $t5,$t5,$t1     ; tp[j+1]-np[j+1]
@@ -1052,6 +1586,9 @@ ___
 $code.=<<___;
        $POP    $i,0($sp)
        li      r3,1    ; signal "handled"
+       $POP    r19,`-12*8-13*$SIZE_T`($i)
+       $POP    r20,`-12*8-12*$SIZE_T`($i)
+       $POP    r21,`-12*8-11*$SIZE_T`($i)
        $POP    r22,`-12*8-10*$SIZE_T`($i)
        $POP    r23,`-12*8-9*$SIZE_T`($i)
        $POP    r24,`-12*8-8*$SIZE_T`($i)
@@ -1077,8 +1614,9 @@ $code.=<<___;
        mr      $sp,$i
        blr
        .long   0
-       .byte   0,12,4,0,0x8c,10,6,0
+       .byte   0,12,4,0,0x8c,13,6,0
        .long   0
+.size  .$fname,.-.$fname
 
 .asciz  "Montgomery Multiplication for PPC64, CRYPTOGAMS by <appro\@openssl.org>"
 ___
diff --git a/crypto/bn/asm/rsaz-avx2.pl b/crypto/bn/asm/rsaz-avx2.pl
new file mode 100755 (executable)
index 0000000..3b6ccf8
--- /dev/null
@@ -0,0 +1,1898 @@
+#!/usr/bin/env perl
+
+##############################################################################
+#                                                                            #
+#  Copyright (c) 2012, Intel Corporation                                     #
+#                                                                            #
+#  All rights reserved.                                                      #
+#                                                                            #
+#  Redistribution and use in source and binary forms, with or without        #
+#  modification, are permitted provided that the following conditions are    #
+#  met:                                                                      #
+#                                                                            #
+#  *  Redistributions of source code must retain the above copyright         #
+#     notice, this list of conditions and the following disclaimer.          #
+#                                                                            #
+#  *  Redistributions in binary form must reproduce the above copyright      #
+#     notice, this list of conditions and the following disclaimer in the    #
+#     documentation and/or other materials provided with the                 #
+#     distribution.                                                          #
+#                                                                            #
+#  *  Neither the name of the Intel Corporation nor the names of its         #
+#     contributors may be used to endorse or promote products derived from   #
+#     this software without specific prior written permission.               #
+#                                                                            #
+#                                                                            #
+#  THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY          #
+#  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE         #
+#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR        #
+#  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR            #
+#  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,     #
+#  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,       #
+#  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR        #
+#  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF    #
+#  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING      #
+#  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS        #
+#  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.              #
+#                                                                            #
+##############################################################################
+# Developers and authors:                                                    #
+# Shay Gueron (1, 2), and Vlad Krasnov (1)                                   #
+# (1) Intel Corporation, Israel Development Center, Haifa, Israel            #
+# (2) University of Haifa, Israel                                            #
+##############################################################################
+# Reference:                                                                 #
+# [1] S. Gueron, V. Krasnov: "Software Implementation of Modular             #
+#     Exponentiation,  Using Advanced Vector Instructions Architectures",    #
+#     F. Ozbudak and F. Rodriguez-Henriquez (Eds.): WAIFI 2012, LNCS 7369,   #
+#     pp. 119?135, 2012. Springer-Verlag Berlin Heidelberg 2012              #
+# [2] S. Gueron: "Efficient Software Implementations of Modular              #
+#     Exponentiation", Journal of Cryptographic Engineering 2:31-43 (2012).  #
+# [3] S. Gueron, V. Krasnov: "Speeding up Big-numbers Squaring",IEEE         #
+#     Proceedings of 9th International Conference on Information Technology: #
+#     New Generations (ITNG 2012), pp.821-823 (2012)                         #
+# [4] S. Gueron, V. Krasnov: "[PATCH] Efficient and side channel analysis    #
+#     resistant 1024-bit modular exponentiation, for optimizing RSA2048      #
+#     on AVX2 capable x86_64 platforms",                                     #
+#     http://rt.openssl.org/Ticket/Display.html?id=2850&user=guest&pass=guest#
+##############################################################################
+#
+# +13% improvement over original submission by <appro@openssl.org>
+#
+# rsa2048 sign/sec     OpenSSL 1.0.1   scalar(*)       this
+# 2.3GHz Haswell       621             765/+23%        1113/+79%
+# 2.3GHz Broadwell(**) 688             1200(***)/+74%  1120/+63%
+#
+# (*)  if system doesn't support AVX2, for reference purposes;
+# (**) scaled to 2.3GHz to simplify comparison;
+# (***)        scalar AD*X code is faster than AVX2 and is preferred code
+#      path for Broadwell;
+
+$flavour = shift;
+$output  = shift;
+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
+
+$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
+die "can't locate x86_64-xlate.pl";
+
+if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
+               =~ /GNU assembler version ([2-9]\.[0-9]+)/) {
+       $avx = ($1>=2.19) + ($1>=2.22);
+       $addx = ($1>=2.23);
+}
+
+if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
+           `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) {
+       $avx = ($1>=2.09) + ($1>=2.10);
+       $addx = ($1>=2.10);
+}
+
+if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
+           `ml64 2>&1` =~ /Version ([0-9]+)\./) {
+       $avx = ($1>=10) + ($1>=11);
+       $addx = ($1>=11);
+}
+
+if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9])\.([0-9]+)/) {
+       my $ver = $2 + $3/100.0;        # 3.1->3.01, 3.10->3.10
+       $avx = ($ver>=3.0) + ($ver>=3.01);
+       $addx = ($ver>=3.03);
+}
+
+open OUT,"| \"$^X\" $xlate $flavour $output";
+*STDOUT = *OUT;
+
+if ($avx>1) {{{
+{ # void AMS_WW(
+my $rp="%rdi"; # BN_ULONG *rp,
+my $ap="%rsi"; # const BN_ULONG *ap,
+my $np="%rdx"; # const BN_ULONG *np,
+my $n0="%ecx"; # const BN_ULONG n0,
+my $rep="%r8d";        # int repeat);
+
+# The registers that hold the accumulated redundant result
+# The AMM works on 1024 bit operands, and redundant word size is 29
+# Therefore: ceil(1024/29)/4 = 9
+my $ACC0="%ymm0";
+my $ACC1="%ymm1";
+my $ACC2="%ymm2";
+my $ACC3="%ymm3";
+my $ACC4="%ymm4";
+my $ACC5="%ymm5";
+my $ACC6="%ymm6";
+my $ACC7="%ymm7";
+my $ACC8="%ymm8";
+my $ACC9="%ymm9";
+# Registers that hold the broadcasted words of bp, currently used
+my $B1="%ymm10";
+my $B2="%ymm11";
+# Registers that hold the broadcasted words of Y, currently used
+my $Y1="%ymm12";
+my $Y2="%ymm13";
+# Helper registers
+my $TEMP1="%ymm14";
+my $AND_MASK="%ymm15";
+# alu registers that hold the first words of the ACC
+my $r0="%r9";
+my $r1="%r10";
+my $r2="%r11";
+my $r3="%r12";
+
+my $i="%r14d";                 # loop counter
+my $tmp = "%r15";
+
+my $FrameSize=32*18+32*8;      # place for A^2 and 2*A
+
+my $aap=$r0;
+my $tp0="%rbx";
+my $tp1=$r3;
+my $tpa=$tmp;
+
+$np="%r13";                    # reassigned argument
+
+$code.=<<___;
+.text
+
+.globl rsaz_1024_sqr_avx2
+.type  rsaz_1024_sqr_avx2,\@function,5
+.align 64
+rsaz_1024_sqr_avx2:            # 702 cycles, 14% faster than rsaz_1024_mul_avx2
+       lea     (%rsp), %rax
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+       vzeroupper
+___
+$code.=<<___ if ($win64);
+       lea     -0xa8(%rsp),%rsp
+       vmovaps %xmm6,-0xd8(%rax)
+       vmovaps %xmm7,-0xc8(%rax)
+       vmovaps %xmm8,-0xb8(%rax)
+       vmovaps %xmm9,-0xa8(%rax)
+       vmovaps %xmm10,-0x98(%rax)
+       vmovaps %xmm11,-0x88(%rax)
+       vmovaps %xmm12,-0x78(%rax)
+       vmovaps %xmm13,-0x68(%rax)
+       vmovaps %xmm14,-0x58(%rax)
+       vmovaps %xmm15,-0x48(%rax)
+.Lsqr_1024_body:
+___
+$code.=<<___;
+       mov     %rax,%rbp
+       mov     %rdx, $np                       # reassigned argument
+       sub     \$$FrameSize, %rsp
+       mov     $np, $tmp
+       sub     \$-128, $rp                     # size optimization
+       sub     \$-128, $ap
+       sub     \$-128, $np
+
+       and     \$4095, $tmp                    # see if $np crosses page
+       add     \$32*10, $tmp
+       shr     \$12, $tmp
+       vpxor   $ACC9,$ACC9,$ACC9
+       jz      .Lsqr_1024_no_n_copy
+
+       # unaligned 256-bit load that crosses page boundary can
+       # cause >2x performance degradation here, so if $np does
+       # cross page boundary, copy it to stack and make sure stack
+       # frame doesn't...
+       sub             \$32*10,%rsp
+       vmovdqu         32*0-128($np), $ACC0
+       and             \$-2048, %rsp
+       vmovdqu         32*1-128($np), $ACC1
+       vmovdqu         32*2-128($np), $ACC2
+       vmovdqu         32*3-128($np), $ACC3
+       vmovdqu         32*4-128($np), $ACC4
+       vmovdqu         32*5-128($np), $ACC5
+       vmovdqu         32*6-128($np), $ACC6
+       vmovdqu         32*7-128($np), $ACC7
+       vmovdqu         32*8-128($np), $ACC8
+       lea             $FrameSize+128(%rsp),$np
+       vmovdqu         $ACC0, 32*0-128($np)
+       vmovdqu         $ACC1, 32*1-128($np)
+       vmovdqu         $ACC2, 32*2-128($np)
+       vmovdqu         $ACC3, 32*3-128($np)
+       vmovdqu         $ACC4, 32*4-128($np)
+       vmovdqu         $ACC5, 32*5-128($np)
+       vmovdqu         $ACC6, 32*6-128($np)
+       vmovdqu         $ACC7, 32*7-128($np)
+       vmovdqu         $ACC8, 32*8-128($np)
+       vmovdqu         $ACC9, 32*9-128($np)    # $ACC9 is zero
+
+.Lsqr_1024_no_n_copy:
+       and             \$-1024, %rsp
+
+       vmovdqu         32*1-128($ap), $ACC1
+       vmovdqu         32*2-128($ap), $ACC2
+       vmovdqu         32*3-128($ap), $ACC3
+       vmovdqu         32*4-128($ap), $ACC4
+       vmovdqu         32*5-128($ap), $ACC5
+       vmovdqu         32*6-128($ap), $ACC6
+       vmovdqu         32*7-128($ap), $ACC7
+       vmovdqu         32*8-128($ap), $ACC8
+
+       lea     192(%rsp), $tp0                 # 64+128=192
+       vpbroadcastq    .Land_mask(%rip), $AND_MASK
+       jmp     .LOOP_GRANDE_SQR_1024
+
+.align 32
+.LOOP_GRANDE_SQR_1024:
+       lea     32*18+128(%rsp), $aap           # size optimization
+       lea     448(%rsp), $tp1                 # 64+128+256=448
+
+       # the squaring is performed as described in Variant B of
+       # "Speeding up Big-Number Squaring", so start by calculating
+       # the A*2=A+A vector
+       vpaddq          $ACC1, $ACC1, $ACC1
+        vpbroadcastq   32*0-128($ap), $B1
+       vpaddq          $ACC2, $ACC2, $ACC2
+       vmovdqa         $ACC1, 32*0-128($aap)
+       vpaddq          $ACC3, $ACC3, $ACC3
+       vmovdqa         $ACC2, 32*1-128($aap)
+       vpaddq          $ACC4, $ACC4, $ACC4
+       vmovdqa         $ACC3, 32*2-128($aap)
+       vpaddq          $ACC5, $ACC5, $ACC5
+       vmovdqa         $ACC4, 32*3-128($aap)
+       vpaddq          $ACC6, $ACC6, $ACC6
+       vmovdqa         $ACC5, 32*4-128($aap)
+       vpaddq          $ACC7, $ACC7, $ACC7
+       vmovdqa         $ACC6, 32*5-128($aap)
+       vpaddq          $ACC8, $ACC8, $ACC8
+       vmovdqa         $ACC7, 32*6-128($aap)
+       vpxor           $ACC9, $ACC9, $ACC9
+       vmovdqa         $ACC8, 32*7-128($aap)
+
+       vpmuludq        32*0-128($ap), $B1, $ACC0
+        vpbroadcastq   32*1-128($ap), $B2
+        vmovdqu        $ACC9, 32*9-192($tp0)   # zero upper half
+       vpmuludq        $B1, $ACC1, $ACC1
+        vmovdqu        $ACC9, 32*10-448($tp1)
+       vpmuludq        $B1, $ACC2, $ACC2
+        vmovdqu        $ACC9, 32*11-448($tp1)
+       vpmuludq        $B1, $ACC3, $ACC3
+        vmovdqu        $ACC9, 32*12-448($tp1)
+       vpmuludq        $B1, $ACC4, $ACC4
+        vmovdqu        $ACC9, 32*13-448($tp1)
+       vpmuludq        $B1, $ACC5, $ACC5
+        vmovdqu        $ACC9, 32*14-448($tp1)
+       vpmuludq        $B1, $ACC6, $ACC6
+        vmovdqu        $ACC9, 32*15-448($tp1)
+       vpmuludq        $B1, $ACC7, $ACC7
+        vmovdqu        $ACC9, 32*16-448($tp1)
+       vpmuludq        $B1, $ACC8, $ACC8
+        vpbroadcastq   32*2-128($ap), $B1
+        vmovdqu        $ACC9, 32*17-448($tp1)
+
+       mov     $ap, $tpa
+       mov     \$4, $i
+       jmp     .Lsqr_entry_1024
+___
+$TEMP0=$Y1;
+$TEMP2=$Y2;
+$code.=<<___;
+.align 32
+.LOOP_SQR_1024:
+        vpbroadcastq   32*1-128($tpa), $B2
+       vpmuludq        32*0-128($ap), $B1, $ACC0
+       vpaddq          32*0-192($tp0), $ACC0, $ACC0
+       vpmuludq        32*0-128($aap), $B1, $ACC1
+       vpaddq          32*1-192($tp0), $ACC1, $ACC1
+       vpmuludq        32*1-128($aap), $B1, $ACC2
+       vpaddq          32*2-192($tp0), $ACC2, $ACC2
+       vpmuludq        32*2-128($aap), $B1, $ACC3
+       vpaddq          32*3-192($tp0), $ACC3, $ACC3
+       vpmuludq        32*3-128($aap), $B1, $ACC4
+       vpaddq          32*4-192($tp0), $ACC4, $ACC4
+       vpmuludq        32*4-128($aap), $B1, $ACC5
+       vpaddq          32*5-192($tp0), $ACC5, $ACC5
+       vpmuludq        32*5-128($aap), $B1, $ACC6
+       vpaddq          32*6-192($tp0), $ACC6, $ACC6
+       vpmuludq        32*6-128($aap), $B1, $ACC7
+       vpaddq          32*7-192($tp0), $ACC7, $ACC7
+       vpmuludq        32*7-128($aap), $B1, $ACC8
+        vpbroadcastq   32*2-128($tpa), $B1
+       vpaddq          32*8-192($tp0), $ACC8, $ACC8
+.Lsqr_entry_1024:
+       vmovdqu         $ACC0, 32*0-192($tp0)
+       vmovdqu         $ACC1, 32*1-192($tp0)
+
+       vpmuludq        32*1-128($ap), $B2, $TEMP0
+       vpaddq          $TEMP0, $ACC2, $ACC2
+       vpmuludq        32*1-128($aap), $B2, $TEMP1
+       vpaddq          $TEMP1, $ACC3, $ACC3
+       vpmuludq        32*2-128($aap), $B2, $TEMP2
+       vpaddq          $TEMP2, $ACC4, $ACC4
+       vpmuludq        32*3-128($aap), $B2, $TEMP0
+       vpaddq          $TEMP0, $ACC5, $ACC5
+       vpmuludq        32*4-128($aap), $B2, $TEMP1
+       vpaddq          $TEMP1, $ACC6, $ACC6
+       vpmuludq        32*5-128($aap), $B2, $TEMP2
+       vpaddq          $TEMP2, $ACC7, $ACC7
+       vpmuludq        32*6-128($aap), $B2, $TEMP0
+       vpaddq          $TEMP0, $ACC8, $ACC8
+       vpmuludq        32*7-128($aap), $B2, $ACC0
+        vpbroadcastq   32*3-128($tpa), $B2
+       vpaddq          32*9-192($tp0), $ACC0, $ACC0
+
+       vmovdqu         $ACC2, 32*2-192($tp0)
+       vmovdqu         $ACC3, 32*3-192($tp0)
+
+       vpmuludq        32*2-128($ap), $B1, $TEMP2
+       vpaddq          $TEMP2, $ACC4, $ACC4
+       vpmuludq        32*2-128($aap), $B1, $TEMP0
+       vpaddq          $TEMP0, $ACC5, $ACC5
+       vpmuludq        32*3-128($aap), $B1, $TEMP1
+       vpaddq          $TEMP1, $ACC6, $ACC6
+       vpmuludq        32*4-128($aap), $B1, $TEMP2
+       vpaddq          $TEMP2, $ACC7, $ACC7
+       vpmuludq        32*5-128($aap), $B1, $TEMP0
+       vpaddq          $TEMP0, $ACC8, $ACC8
+       vpmuludq        32*6-128($aap), $B1, $TEMP1
+       vpaddq          $TEMP1, $ACC0, $ACC0
+       vpmuludq        32*7-128($aap), $B1, $ACC1
+        vpbroadcastq   32*4-128($tpa), $B1
+       vpaddq          32*10-448($tp1), $ACC1, $ACC1
+
+       vmovdqu         $ACC4, 32*4-192($tp0)
+       vmovdqu         $ACC5, 32*5-192($tp0)
+
+       vpmuludq        32*3-128($ap), $B2, $TEMP0
+       vpaddq          $TEMP0, $ACC6, $ACC6
+       vpmuludq        32*3-128($aap), $B2, $TEMP1
+       vpaddq          $TEMP1, $ACC7, $ACC7
+       vpmuludq        32*4-128($aap), $B2, $TEMP2
+       vpaddq          $TEMP2, $ACC8, $ACC8
+       vpmuludq        32*5-128($aap), $B2, $TEMP0
+       vpaddq          $TEMP0, $ACC0, $ACC0
+       vpmuludq        32*6-128($aap), $B2, $TEMP1
+       vpaddq          $TEMP1, $ACC1, $ACC1
+       vpmuludq        32*7-128($aap), $B2, $ACC2
+        vpbroadcastq   32*5-128($tpa), $B2
+       vpaddq          32*11-448($tp1), $ACC2, $ACC2   
+
+       vmovdqu         $ACC6, 32*6-192($tp0)
+       vmovdqu         $ACC7, 32*7-192($tp0)
+
+       vpmuludq        32*4-128($ap), $B1, $TEMP0
+       vpaddq          $TEMP0, $ACC8, $ACC8
+       vpmuludq        32*4-128($aap), $B1, $TEMP1
+       vpaddq          $TEMP1, $ACC0, $ACC0
+       vpmuludq        32*5-128($aap), $B1, $TEMP2
+       vpaddq          $TEMP2, $ACC1, $ACC1
+       vpmuludq        32*6-128($aap), $B1, $TEMP0
+       vpaddq          $TEMP0, $ACC2, $ACC2
+       vpmuludq        32*7-128($aap), $B1, $ACC3
+        vpbroadcastq   32*6-128($tpa), $B1
+       vpaddq          32*12-448($tp1), $ACC3, $ACC3
+
+       vmovdqu         $ACC8, 32*8-192($tp0)
+       vmovdqu         $ACC0, 32*9-192($tp0)
+       lea             8($tp0), $tp0
+
+       vpmuludq        32*5-128($ap), $B2, $TEMP2
+       vpaddq          $TEMP2, $ACC1, $ACC1
+       vpmuludq        32*5-128($aap), $B2, $TEMP0
+       vpaddq          $TEMP0, $ACC2, $ACC2
+       vpmuludq        32*6-128($aap), $B2, $TEMP1
+       vpaddq          $TEMP1, $ACC3, $ACC3
+       vpmuludq        32*7-128($aap), $B2, $ACC4
+        vpbroadcastq   32*7-128($tpa), $B2
+       vpaddq          32*13-448($tp1), $ACC4, $ACC4
+
+       vmovdqu         $ACC1, 32*10-448($tp1)
+       vmovdqu         $ACC2, 32*11-448($tp1)
+
+       vpmuludq        32*6-128($ap), $B1, $TEMP0
+       vpaddq          $TEMP0, $ACC3, $ACC3
+       vpmuludq        32*6-128($aap), $B1, $TEMP1
+        vpbroadcastq   32*8-128($tpa), $ACC0           # borrow $ACC0 for $B1
+       vpaddq          $TEMP1, $ACC4, $ACC4
+       vpmuludq        32*7-128($aap), $B1, $ACC5
+        vpbroadcastq   32*0+8-128($tpa), $B1           # for next iteration
+       vpaddq          32*14-448($tp1), $ACC5, $ACC5
+
+       vmovdqu         $ACC3, 32*12-448($tp1)
+       vmovdqu         $ACC4, 32*13-448($tp1)
+       lea             8($tpa), $tpa
+
+       vpmuludq        32*7-128($ap), $B2, $TEMP0
+       vpaddq          $TEMP0, $ACC5, $ACC5
+       vpmuludq        32*7-128($aap), $B2, $ACC6
+       vpaddq          32*15-448($tp1), $ACC6, $ACC6
+
+       vpmuludq        32*8-128($ap), $ACC0, $ACC7
+       vmovdqu         $ACC5, 32*14-448($tp1)
+       vpaddq          32*16-448($tp1), $ACC7, $ACC7
+       vmovdqu         $ACC6, 32*15-448($tp1)
+       vmovdqu         $ACC7, 32*16-448($tp1)
+       lea             8($tp1), $tp1
+
+       dec     $i        
+       jnz     .LOOP_SQR_1024
+___
+$ZERO = $ACC9;
+$TEMP0 = $B1;
+$TEMP2 = $B2;
+$TEMP3 = $Y1;
+$TEMP4 = $Y2;
+$code.=<<___;
+       #we need to fix indexes 32-39 to avoid overflow
+       vmovdqu         32*8(%rsp), $ACC8               # 32*8-192($tp0),
+       vmovdqu         32*9(%rsp), $ACC1               # 32*9-192($tp0)
+       vmovdqu         32*10(%rsp), $ACC2              # 32*10-192($tp0)
+       lea             192(%rsp), $tp0                 # 64+128=192
+
+       vpsrlq          \$29, $ACC8, $TEMP1
+       vpand           $AND_MASK, $ACC8, $ACC8
+       vpsrlq          \$29, $ACC1, $TEMP2
+       vpand           $AND_MASK, $ACC1, $ACC1
+
+       vpermq          \$0x93, $TEMP1, $TEMP1
+       vpxor           $ZERO, $ZERO, $ZERO
+       vpermq          \$0x93, $TEMP2, $TEMP2
+
+       vpblendd        \$3, $ZERO, $TEMP1, $TEMP0
+       vpblendd        \$3, $TEMP1, $TEMP2, $TEMP1
+       vpaddq          $TEMP0, $ACC8, $ACC8
+       vpblendd        \$3, $TEMP2, $ZERO, $TEMP2
+       vpaddq          $TEMP1, $ACC1, $ACC1
+       vpaddq          $TEMP2, $ACC2, $ACC2
+       vmovdqu         $ACC1, 32*9-192($tp0)
+       vmovdqu         $ACC2, 32*10-192($tp0)
+
+       mov     (%rsp), %rax
+       mov     8(%rsp), $r1
+       mov     16(%rsp), $r2
+       mov     24(%rsp), $r3
+       vmovdqu 32*1(%rsp), $ACC1
+       vmovdqu 32*2-192($tp0), $ACC2
+       vmovdqu 32*3-192($tp0), $ACC3
+       vmovdqu 32*4-192($tp0), $ACC4
+       vmovdqu 32*5-192($tp0), $ACC5
+       vmovdqu 32*6-192($tp0), $ACC6
+       vmovdqu 32*7-192($tp0), $ACC7
+
+       mov     %rax, $r0
+       imull   $n0, %eax
+       and     \$0x1fffffff, %eax
+       vmovd   %eax, $Y1
+
+       mov     %rax, %rdx
+       imulq   -128($np), %rax
+        vpbroadcastq   $Y1, $Y1
+       add     %rax, $r0
+       mov     %rdx, %rax
+       imulq   8-128($np), %rax
+       shr     \$29, $r0
+       add     %rax, $r1
+       mov     %rdx, %rax
+       imulq   16-128($np), %rax
+       add     $r0, $r1
+       add     %rax, $r2
+       imulq   24-128($np), %rdx
+       add     %rdx, $r3
+
+       mov     $r1, %rax
+       imull   $n0, %eax
+       and     \$0x1fffffff, %eax
+
+       mov \$9, $i
+       jmp .LOOP_REDUCE_1024
+
+.align 32
+.LOOP_REDUCE_1024:
+       vmovd   %eax, $Y2
+       vpbroadcastq    $Y2, $Y2
+
+       vpmuludq        32*1-128($np), $Y1, $TEMP0
+        mov    %rax, %rdx
+        imulq  -128($np), %rax
+       vpaddq          $TEMP0, $ACC1, $ACC1
+        add    %rax, $r1
+       vpmuludq        32*2-128($np), $Y1, $TEMP1
+        mov    %rdx, %rax
+        imulq  8-128($np), %rax
+       vpaddq          $TEMP1, $ACC2, $ACC2
+       vpmuludq        32*3-128($np), $Y1, $TEMP2
+        .byte  0x67
+        add    %rax, $r2
+        .byte  0x67
+        mov    %rdx, %rax
+        imulq  16-128($np), %rax
+        shr    \$29, $r1
+       vpaddq          $TEMP2, $ACC3, $ACC3
+       vpmuludq        32*4-128($np), $Y1, $TEMP0
+        add    %rax, $r3
+        add    $r1, $r2
+       vpaddq          $TEMP0, $ACC4, $ACC4
+       vpmuludq        32*5-128($np), $Y1, $TEMP1
+        mov    $r2, %rax
+        imull  $n0, %eax
+       vpaddq          $TEMP1, $ACC5, $ACC5
+       vpmuludq        32*6-128($np), $Y1, $TEMP2
+        and    \$0x1fffffff, %eax
+       vpaddq          $TEMP2, $ACC6, $ACC6
+       vpmuludq        32*7-128($np), $Y1, $TEMP0
+       vpaddq          $TEMP0, $ACC7, $ACC7
+       vpmuludq        32*8-128($np), $Y1, $TEMP1
+        vmovd  %eax, $Y1
+        #vmovdqu       32*1-8-128($np), $TEMP2         # moved below
+       vpaddq          $TEMP1, $ACC8, $ACC8
+        #vmovdqu       32*2-8-128($np), $TEMP0         # moved below
+        vpbroadcastq   $Y1, $Y1
+
+       vpmuludq        32*1-8-128($np), $Y2, $TEMP2    # see above
+       vmovdqu         32*3-8-128($np), $TEMP1
+        mov    %rax, %rdx
+        imulq  -128($np), %rax
+       vpaddq          $TEMP2, $ACC1, $ACC1
+       vpmuludq        32*2-8-128($np), $Y2, $TEMP0    # see above
+       vmovdqu         32*4-8-128($np), $TEMP2
+        add    %rax, $r2
+        mov    %rdx, %rax
+        imulq  8-128($np), %rax
+       vpaddq          $TEMP0, $ACC2, $ACC2
+        add    $r3, %rax
+        shr    \$29, $r2
+       vpmuludq        $Y2, $TEMP1, $TEMP1
+       vmovdqu         32*5-8-128($np), $TEMP0
+        add    $r2, %rax
+       vpaddq          $TEMP1, $ACC3, $ACC3
+       vpmuludq        $Y2, $TEMP2, $TEMP2
+       vmovdqu         32*6-8-128($np), $TEMP1
+        .byte  0x67
+        mov    %rax, $r3
+        imull  $n0, %eax
+       vpaddq          $TEMP2, $ACC4, $ACC4
+       vpmuludq        $Y2, $TEMP0, $TEMP0
+       .byte   0xc4,0x41,0x7e,0x6f,0x9d,0x58,0x00,0x00,0x00    # vmovdqu               32*7-8-128($np), $TEMP2
+        and    \$0x1fffffff, %eax
+       vpaddq          $TEMP0, $ACC5, $ACC5
+       vpmuludq        $Y2, $TEMP1, $TEMP1
+       vmovdqu         32*8-8-128($np), $TEMP0
+       vpaddq          $TEMP1, $ACC6, $ACC6
+       vpmuludq        $Y2, $TEMP2, $TEMP2
+       vmovdqu         32*9-8-128($np), $ACC9
+        vmovd  %eax, $ACC0                     # borrow ACC0 for Y2
+        imulq  -128($np), %rax
+       vpaddq          $TEMP2, $ACC7, $ACC7
+       vpmuludq        $Y2, $TEMP0, $TEMP0
+        vmovdqu        32*1-16-128($np), $TEMP1
+        vpbroadcastq   $ACC0, $ACC0
+       vpaddq          $TEMP0, $ACC8, $ACC8
+       vpmuludq        $Y2, $ACC9, $ACC9
+        vmovdqu        32*2-16-128($np), $TEMP2
+        add    %rax, $r3
+
+___
+($ACC0,$Y2)=($Y2,$ACC0);
+$code.=<<___;
+        vmovdqu        32*1-24-128($np), $ACC0
+       vpmuludq        $Y1, $TEMP1, $TEMP1
+       vmovdqu         32*3-16-128($np), $TEMP0
+       vpaddq          $TEMP1, $ACC1, $ACC1
+        vpmuludq       $Y2, $ACC0, $ACC0
+       vpmuludq        $Y1, $TEMP2, $TEMP2
+       .byte   0xc4,0x41,0x7e,0x6f,0xb5,0xf0,0xff,0xff,0xff    # vmovdqu               32*4-16-128($np), $TEMP1
+        vpaddq         $ACC1, $ACC0, $ACC0
+       vpaddq          $TEMP2, $ACC2, $ACC2
+       vpmuludq        $Y1, $TEMP0, $TEMP0
+       vmovdqu         32*5-16-128($np), $TEMP2
+        .byte  0x67
+        vmovq          $ACC0, %rax
+        vmovdqu        $ACC0, (%rsp)           # transfer $r0-$r3
+       vpaddq          $TEMP0, $ACC3, $ACC3
+       vpmuludq        $Y1, $TEMP1, $TEMP1
+       vmovdqu         32*6-16-128($np), $TEMP0
+       vpaddq          $TEMP1, $ACC4, $ACC4
+       vpmuludq        $Y1, $TEMP2, $TEMP2
+       vmovdqu         32*7-16-128($np), $TEMP1
+       vpaddq          $TEMP2, $ACC5, $ACC5
+       vpmuludq        $Y1, $TEMP0, $TEMP0
+       vmovdqu         32*8-16-128($np), $TEMP2
+       vpaddq          $TEMP0, $ACC6, $ACC6
+       vpmuludq        $Y1, $TEMP1, $TEMP1
+        shr    \$29, $r3
+       vmovdqu         32*9-16-128($np), $TEMP0
+        add    $r3, %rax
+       vpaddq          $TEMP1, $ACC7, $ACC7
+       vpmuludq        $Y1, $TEMP2, $TEMP2
+        #vmovdqu       32*2-24-128($np), $TEMP1        # moved below
+        mov    %rax, $r0
+        imull  $n0, %eax
+       vpaddq          $TEMP2, $ACC8, $ACC8
+       vpmuludq        $Y1, $TEMP0, $TEMP0
+        and    \$0x1fffffff, %eax
+        vmovd  %eax, $Y1
+        vmovdqu        32*3-24-128($np), $TEMP2
+       .byte   0x67
+       vpaddq          $TEMP0, $ACC9, $ACC9
+        vpbroadcastq   $Y1, $Y1
+
+       vpmuludq        32*2-24-128($np), $Y2, $TEMP1   # see above
+       vmovdqu         32*4-24-128($np), $TEMP0
+        mov    %rax, %rdx
+        imulq  -128($np), %rax
+        mov    8(%rsp), $r1
+       vpaddq          $TEMP1, $ACC2, $ACC1
+       vpmuludq        $Y2, $TEMP2, $TEMP2
+       vmovdqu         32*5-24-128($np), $TEMP1
+        add    %rax, $r0
+        mov    %rdx, %rax
+        imulq  8-128($np), %rax
+        .byte  0x67
+        shr    \$29, $r0
+        mov    16(%rsp), $r2
+       vpaddq          $TEMP2, $ACC3, $ACC2
+       vpmuludq        $Y2, $TEMP0, $TEMP0
+       vmovdqu         32*6-24-128($np), $TEMP2
+        add    %rax, $r1
+        mov    %rdx, %rax
+        imulq  16-128($np), %rax
+       vpaddq          $TEMP0, $ACC4, $ACC3
+       vpmuludq        $Y2, $TEMP1, $TEMP1
+       vmovdqu         32*7-24-128($np), $TEMP0
+        imulq  24-128($np), %rdx               # future $r3
+        add    %rax, $r2
+        lea    ($r0,$r1), %rax
+       vpaddq          $TEMP1, $ACC5, $ACC4
+       vpmuludq        $Y2, $TEMP2, $TEMP2
+       vmovdqu         32*8-24-128($np), $TEMP1
+        mov    %rax, $r1
+        imull  $n0, %eax
+       vpmuludq        $Y2, $TEMP0, $TEMP0
+       vpaddq          $TEMP2, $ACC6, $ACC5
+       vmovdqu         32*9-24-128($np), $TEMP2
+        and    \$0x1fffffff, %eax
+       vpaddq          $TEMP0, $ACC7, $ACC6
+       vpmuludq        $Y2, $TEMP1, $TEMP1
+        add    24(%rsp), %rdx
+       vpaddq          $TEMP1, $ACC8, $ACC7
+       vpmuludq        $Y2, $TEMP2, $TEMP2
+       vpaddq          $TEMP2, $ACC9, $ACC8
+        vmovq  $r3, $ACC9
+        mov    %rdx, $r3
+
+       dec     $i
+       jnz     .LOOP_REDUCE_1024
+___
+($ACC0,$Y2)=($Y2,$ACC0);
+$code.=<<___;
+       lea     448(%rsp), $tp1                 # size optimization
+       vpaddq  $ACC9, $Y2, $ACC0
+       vpxor   $ZERO, $ZERO, $ZERO
+
+       vpaddq          32*9-192($tp0), $ACC0, $ACC0
+       vpaddq          32*10-448($tp1), $ACC1, $ACC1
+       vpaddq          32*11-448($tp1), $ACC2, $ACC2
+       vpaddq          32*12-448($tp1), $ACC3, $ACC3
+       vpaddq          32*13-448($tp1), $ACC4, $ACC4
+       vpaddq          32*14-448($tp1), $ACC5, $ACC5
+       vpaddq          32*15-448($tp1), $ACC6, $ACC6
+       vpaddq          32*16-448($tp1), $ACC7, $ACC7
+       vpaddq          32*17-448($tp1), $ACC8, $ACC8
+
+       vpsrlq          \$29, $ACC0, $TEMP1
+       vpand           $AND_MASK, $ACC0, $ACC0
+       vpsrlq          \$29, $ACC1, $TEMP2
+       vpand           $AND_MASK, $ACC1, $ACC1
+       vpsrlq          \$29, $ACC2, $TEMP3
+       vpermq          \$0x93, $TEMP1, $TEMP1
+       vpand           $AND_MASK, $ACC2, $ACC2
+       vpsrlq          \$29, $ACC3, $TEMP4
+       vpermq          \$0x93, $TEMP2, $TEMP2
+       vpand           $AND_MASK, $ACC3, $ACC3
+       vpermq          \$0x93, $TEMP3, $TEMP3
+
+       vpblendd        \$3, $ZERO, $TEMP1, $TEMP0
+       vpermq          \$0x93, $TEMP4, $TEMP4
+       vpblendd        \$3, $TEMP1, $TEMP2, $TEMP1
+       vpaddq          $TEMP0, $ACC0, $ACC0
+       vpblendd        \$3, $TEMP2, $TEMP3, $TEMP2
+       vpaddq          $TEMP1, $ACC1, $ACC1
+       vpblendd        \$3, $TEMP3, $TEMP4, $TEMP3
+       vpaddq          $TEMP2, $ACC2, $ACC2
+       vpblendd        \$3, $TEMP4, $ZERO, $TEMP4
+       vpaddq          $TEMP3, $ACC3, $ACC3
+       vpaddq          $TEMP4, $ACC4, $ACC4
+
+       vpsrlq          \$29, $ACC0, $TEMP1
+       vpand           $AND_MASK, $ACC0, $ACC0
+       vpsrlq          \$29, $ACC1, $TEMP2
+       vpand           $AND_MASK, $ACC1, $ACC1
+       vpsrlq          \$29, $ACC2, $TEMP3
+       vpermq          \$0x93, $TEMP1, $TEMP1
+       vpand           $AND_MASK, $ACC2, $ACC2
+       vpsrlq          \$29, $ACC3, $TEMP4
+       vpermq          \$0x93, $TEMP2, $TEMP2
+       vpand           $AND_MASK, $ACC3, $ACC3
+       vpermq          \$0x93, $TEMP3, $TEMP3
+
+       vpblendd        \$3, $ZERO, $TEMP1, $TEMP0
+       vpermq          \$0x93, $TEMP4, $TEMP4
+       vpblendd        \$3, $TEMP1, $TEMP2, $TEMP1
+       vpaddq          $TEMP0, $ACC0, $ACC0
+       vpblendd        \$3, $TEMP2, $TEMP3, $TEMP2
+       vpaddq          $TEMP1, $ACC1, $ACC1
+       vmovdqu         $ACC0, 32*0-128($rp)
+       vpblendd        \$3, $TEMP3, $TEMP4, $TEMP3
+       vpaddq          $TEMP2, $ACC2, $ACC2
+       vmovdqu         $ACC1, 32*1-128($rp)
+       vpblendd        \$3, $TEMP4, $ZERO, $TEMP4
+       vpaddq          $TEMP3, $ACC3, $ACC3
+       vmovdqu         $ACC2, 32*2-128($rp)
+       vpaddq          $TEMP4, $ACC4, $ACC4
+       vmovdqu         $ACC3, 32*3-128($rp)
+___
+$TEMP5=$ACC0;
+$code.=<<___;
+       vpsrlq          \$29, $ACC4, $TEMP1
+       vpand           $AND_MASK, $ACC4, $ACC4
+       vpsrlq          \$29, $ACC5, $TEMP2
+       vpand           $AND_MASK, $ACC5, $ACC5
+       vpsrlq          \$29, $ACC6, $TEMP3
+       vpermq          \$0x93, $TEMP1, $TEMP1
+       vpand           $AND_MASK, $ACC6, $ACC6
+       vpsrlq          \$29, $ACC7, $TEMP4
+       vpermq          \$0x93, $TEMP2, $TEMP2
+       vpand           $AND_MASK, $ACC7, $ACC7
+       vpsrlq          \$29, $ACC8, $TEMP5
+       vpermq          \$0x93, $TEMP3, $TEMP3
+       vpand           $AND_MASK, $ACC8, $ACC8
+       vpermq          \$0x93, $TEMP4, $TEMP4
+
+       vpblendd        \$3, $ZERO, $TEMP1, $TEMP0
+       vpermq          \$0x93, $TEMP5, $TEMP5
+       vpblendd        \$3, $TEMP1, $TEMP2, $TEMP1
+       vpaddq          $TEMP0, $ACC4, $ACC4
+       vpblendd        \$3, $TEMP2, $TEMP3, $TEMP2
+       vpaddq          $TEMP1, $ACC5, $ACC5
+       vpblendd        \$3, $TEMP3, $TEMP4, $TEMP3
+       vpaddq          $TEMP2, $ACC6, $ACC6
+       vpblendd        \$3, $TEMP4, $TEMP5, $TEMP4
+       vpaddq          $TEMP3, $ACC7, $ACC7
+       vpaddq          $TEMP4, $ACC8, $ACC8
+     
+       vpsrlq          \$29, $ACC4, $TEMP1
+       vpand           $AND_MASK, $ACC4, $ACC4
+       vpsrlq          \$29, $ACC5, $TEMP2
+       vpand           $AND_MASK, $ACC5, $ACC5
+       vpsrlq          \$29, $ACC6, $TEMP3
+       vpermq          \$0x93, $TEMP1, $TEMP1
+       vpand           $AND_MASK, $ACC6, $ACC6
+       vpsrlq          \$29, $ACC7, $TEMP4
+       vpermq          \$0x93, $TEMP2, $TEMP2
+       vpand           $AND_MASK, $ACC7, $ACC7
+       vpsrlq          \$29, $ACC8, $TEMP5
+       vpermq          \$0x93, $TEMP3, $TEMP3
+       vpand           $AND_MASK, $ACC8, $ACC8
+       vpermq          \$0x93, $TEMP4, $TEMP4
+
+       vpblendd        \$3, $ZERO, $TEMP1, $TEMP0
+       vpermq          \$0x93, $TEMP5, $TEMP5
+       vpblendd        \$3, $TEMP1, $TEMP2, $TEMP1
+       vpaddq          $TEMP0, $ACC4, $ACC4
+       vpblendd        \$3, $TEMP2, $TEMP3, $TEMP2
+       vpaddq          $TEMP1, $ACC5, $ACC5
+       vmovdqu         $ACC4, 32*4-128($rp)
+       vpblendd        \$3, $TEMP3, $TEMP4, $TEMP3
+       vpaddq          $TEMP2, $ACC6, $ACC6
+       vmovdqu         $ACC5, 32*5-128($rp)
+       vpblendd        \$3, $TEMP4, $TEMP5, $TEMP4
+       vpaddq          $TEMP3, $ACC7, $ACC7
+       vmovdqu         $ACC6, 32*6-128($rp)
+       vpaddq          $TEMP4, $ACC8, $ACC8
+       vmovdqu         $ACC7, 32*7-128($rp)
+       vmovdqu         $ACC8, 32*8-128($rp)
+
+       mov     $rp, $ap
+       dec     $rep
+       jne     .LOOP_GRANDE_SQR_1024
+
+       vzeroall
+       mov     %rbp, %rax
+___
+$code.=<<___ if ($win64);
+       movaps  -0xd8(%rax),%xmm6
+       movaps  -0xc8(%rax),%xmm7
+       movaps  -0xb8(%rax),%xmm8
+       movaps  -0xa8(%rax),%xmm9
+       movaps  -0x98(%rax),%xmm10
+       movaps  -0x88(%rax),%xmm11
+       movaps  -0x78(%rax),%xmm12
+       movaps  -0x68(%rax),%xmm13
+       movaps  -0x58(%rax),%xmm14
+       movaps  -0x48(%rax),%xmm15
+___
+$code.=<<___;
+       mov     -48(%rax),%r15
+       mov     -40(%rax),%r14
+       mov     -32(%rax),%r13
+       mov     -24(%rax),%r12
+       mov     -16(%rax),%rbp
+       mov     -8(%rax),%rbx
+       lea     (%rax),%rsp             # restore %rsp
+.Lsqr_1024_epilogue:
+       ret
+.size  rsaz_1024_sqr_avx2,.-rsaz_1024_sqr_avx2
+___
+}
+
+{ # void AMM_WW(
+my $rp="%rdi"; # BN_ULONG *rp,
+my $ap="%rsi"; # const BN_ULONG *ap,
+my $bp="%rdx"; # const BN_ULONG *bp,
+my $np="%rcx"; # const BN_ULONG *np,
+my $n0="%r8d"; # unsigned int n0);
+
+# The registers that hold the accumulated redundant result
+# The AMM works on 1024 bit operands, and redundant word size is 29
+# Therefore: ceil(1024/29)/4 = 9
+my $ACC0="%ymm0";
+my $ACC1="%ymm1";
+my $ACC2="%ymm2";
+my $ACC3="%ymm3";
+my $ACC4="%ymm4";
+my $ACC5="%ymm5";
+my $ACC6="%ymm6";
+my $ACC7="%ymm7";
+my $ACC8="%ymm8";
+my $ACC9="%ymm9";
+
+# Registers that hold the broadcasted words of multiplier, currently used
+my $Bi="%ymm10";
+my $Yi="%ymm11";
+
+# Helper registers
+my $TEMP0=$ACC0;
+my $TEMP1="%ymm12";
+my $TEMP2="%ymm13";
+my $ZERO="%ymm14";
+my $AND_MASK="%ymm15";
+
+# alu registers that hold the first words of the ACC
+my $r0="%r9";
+my $r1="%r10";
+my $r2="%r11";
+my $r3="%r12";
+
+my $i="%r14d";
+my $tmp="%r15";
+
+$bp="%r13";    # reassigned argument
+
+$code.=<<___;
+.globl rsaz_1024_mul_avx2
+.type  rsaz_1024_mul_avx2,\@function,5
+.align 64
+rsaz_1024_mul_avx2:
+       lea     (%rsp), %rax
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+___
+$code.=<<___ if ($win64);
+       vzeroupper
+       lea     -0xa8(%rsp),%rsp
+       vmovaps %xmm6,-0xd8(%rax)
+       vmovaps %xmm7,-0xc8(%rax)
+       vmovaps %xmm8,-0xb8(%rax)
+       vmovaps %xmm9,-0xa8(%rax)
+       vmovaps %xmm10,-0x98(%rax)
+       vmovaps %xmm11,-0x88(%rax)
+       vmovaps %xmm12,-0x78(%rax)
+       vmovaps %xmm13,-0x68(%rax)
+       vmovaps %xmm14,-0x58(%rax)
+       vmovaps %xmm15,-0x48(%rax)
+.Lmul_1024_body:
+___
+$code.=<<___;
+       mov     %rax,%rbp
+       vzeroall
+       mov     %rdx, $bp       # reassigned argument
+       sub     \$64,%rsp
+
+       # unaligned 256-bit load that crosses page boundary can
+       # cause severe performance degradation here, so if $ap does
+       # cross page boundary, swap it with $bp [meaning that caller
+       # is advised to lay down $ap and $bp next to each other, so
+       # that only one can cross page boundary].
+       .byte   0x67,0x67
+       mov     $ap, $tmp
+       and     \$4095, $tmp
+       add     \$32*10, $tmp
+       shr     \$12, $tmp
+       mov     $ap, $tmp
+       cmovnz  $bp, $ap
+       cmovnz  $tmp, $bp
+
+       mov     $np, $tmp
+       sub     \$-128,$ap      # size optimization
+       sub     \$-128,$np
+       sub     \$-128,$rp
+
+       and     \$4095, $tmp    # see if $np crosses page
+       add     \$32*10, $tmp
+       .byte   0x67,0x67
+       shr     \$12, $tmp
+       jz      .Lmul_1024_no_n_copy
+
+       # unaligned 256-bit load that crosses page boundary can
+       # cause severe performance degradation here, so if $np does
+       # cross page boundary, copy it to stack and make sure stack
+       # frame doesn't...
+       sub             \$32*10,%rsp
+       vmovdqu         32*0-128($np), $ACC0
+       and             \$-512, %rsp
+       vmovdqu         32*1-128($np), $ACC1
+       vmovdqu         32*2-128($np), $ACC2
+       vmovdqu         32*3-128($np), $ACC3
+       vmovdqu         32*4-128($np), $ACC4
+       vmovdqu         32*5-128($np), $ACC5
+       vmovdqu         32*6-128($np), $ACC6
+       vmovdqu         32*7-128($np), $ACC7
+       vmovdqu         32*8-128($np), $ACC8
+       lea             64+128(%rsp),$np
+       vmovdqu         $ACC0, 32*0-128($np)
+       vpxor           $ACC0, $ACC0, $ACC0
+       vmovdqu         $ACC1, 32*1-128($np)
+       vpxor           $ACC1, $ACC1, $ACC1
+       vmovdqu         $ACC2, 32*2-128($np)
+       vpxor           $ACC2, $ACC2, $ACC2
+       vmovdqu         $ACC3, 32*3-128($np)
+       vpxor           $ACC3, $ACC3, $ACC3
+       vmovdqu         $ACC4, 32*4-128($np)
+       vpxor           $ACC4, $ACC4, $ACC4
+       vmovdqu         $ACC5, 32*5-128($np)
+       vpxor           $ACC5, $ACC5, $ACC5
+       vmovdqu         $ACC6, 32*6-128($np)
+       vpxor           $ACC6, $ACC6, $ACC6
+       vmovdqu         $ACC7, 32*7-128($np)
+       vpxor           $ACC7, $ACC7, $ACC7
+       vmovdqu         $ACC8, 32*8-128($np)
+       vmovdqa         $ACC0, $ACC8
+       vmovdqu         $ACC9, 32*9-128($np)    # $ACC9 is zero after vzeroall
+.Lmul_1024_no_n_copy:
+       and     \$-64,%rsp
+
+       mov     ($bp), %rbx
+       vpbroadcastq ($bp), $Bi
+       vmovdqu $ACC0, (%rsp)                   # clear top of stack
+       xor     $r0, $r0
+       .byte   0x67
+       xor     $r1, $r1
+       xor     $r2, $r2
+       xor     $r3, $r3
+
+       vmovdqu .Land_mask(%rip), $AND_MASK
+       mov     \$9, $i
+       vmovdqu $ACC9, 32*9-128($rp)            # $ACC9 is zero after vzeroall
+       jmp     .Loop_mul_1024
+
+.align 32
+.Loop_mul_1024:
+        vpsrlq         \$29, $ACC3, $ACC9              # correct $ACC3(*)
+       mov     %rbx, %rax
+       imulq   -128($ap), %rax
+       add     $r0, %rax
+       mov     %rbx, $r1
+       imulq   8-128($ap), $r1
+       add     8(%rsp), $r1
+
+       mov     %rax, $r0
+       imull   $n0, %eax
+       and     \$0x1fffffff, %eax
+
+        mov    %rbx, $r2
+        imulq  16-128($ap), $r2
+        add    16(%rsp), $r2
+
+        mov    %rbx, $r3
+        imulq  24-128($ap), $r3
+        add    24(%rsp), $r3
+       vpmuludq        32*1-128($ap),$Bi,$TEMP0
+        vmovd          %eax, $Yi
+       vpaddq          $TEMP0,$ACC1,$ACC1
+       vpmuludq        32*2-128($ap),$Bi,$TEMP1
+        vpbroadcastq   $Yi, $Yi
+       vpaddq          $TEMP1,$ACC2,$ACC2
+       vpmuludq        32*3-128($ap),$Bi,$TEMP2
+        vpand          $AND_MASK, $ACC3, $ACC3         # correct $ACC3
+       vpaddq          $TEMP2,$ACC3,$ACC3
+       vpmuludq        32*4-128($ap),$Bi,$TEMP0
+       vpaddq          $TEMP0,$ACC4,$ACC4
+       vpmuludq        32*5-128($ap),$Bi,$TEMP1
+       vpaddq          $TEMP1,$ACC5,$ACC5
+       vpmuludq        32*6-128($ap),$Bi,$TEMP2
+       vpaddq          $TEMP2,$ACC6,$ACC6
+       vpmuludq        32*7-128($ap),$Bi,$TEMP0
+        vpermq         \$0x93, $ACC9, $ACC9            # correct $ACC3
+       vpaddq          $TEMP0,$ACC7,$ACC7
+       vpmuludq        32*8-128($ap),$Bi,$TEMP1
+        vpbroadcastq   8($bp), $Bi
+       vpaddq          $TEMP1,$ACC8,$ACC8
+
+       mov     %rax,%rdx
+       imulq   -128($np),%rax
+       add     %rax,$r0
+       mov     %rdx,%rax
+       imulq   8-128($np),%rax
+       add     %rax,$r1
+       mov     %rdx,%rax
+       imulq   16-128($np),%rax
+       add     %rax,$r2
+       shr     \$29, $r0
+       imulq   24-128($np),%rdx
+       add     %rdx,$r3
+       add     $r0, $r1
+
+       vpmuludq        32*1-128($np),$Yi,$TEMP2
+        vmovq          $Bi, %rbx
+       vpaddq          $TEMP2,$ACC1,$ACC1
+       vpmuludq        32*2-128($np),$Yi,$TEMP0
+       vpaddq          $TEMP0,$ACC2,$ACC2
+       vpmuludq        32*3-128($np),$Yi,$TEMP1
+       vpaddq          $TEMP1,$ACC3,$ACC3
+       vpmuludq        32*4-128($np),$Yi,$TEMP2
+       vpaddq          $TEMP2,$ACC4,$ACC4
+       vpmuludq        32*5-128($np),$Yi,$TEMP0
+       vpaddq          $TEMP0,$ACC5,$ACC5
+       vpmuludq        32*6-128($np),$Yi,$TEMP1
+       vpaddq          $TEMP1,$ACC6,$ACC6
+       vpmuludq        32*7-128($np),$Yi,$TEMP2
+        vpblendd       \$3, $ZERO, $ACC9, $ACC9        # correct $ACC3
+       vpaddq          $TEMP2,$ACC7,$ACC7
+       vpmuludq        32*8-128($np),$Yi,$TEMP0
+        vpaddq         $ACC9, $ACC3, $ACC3             # correct $ACC3
+       vpaddq          $TEMP0,$ACC8,$ACC8
+
+       mov     %rbx, %rax
+       imulq   -128($ap),%rax
+       add     %rax,$r1
+        vmovdqu        -8+32*1-128($ap),$TEMP1
+       mov     %rbx, %rax
+       imulq   8-128($ap),%rax
+       add     %rax,$r2
+        vmovdqu        -8+32*2-128($ap),$TEMP2
+
+       mov     $r1, %rax
+       imull   $n0, %eax
+       and     \$0x1fffffff, %eax
+
+        imulq  16-128($ap),%rbx
+        add    %rbx,$r3
+       vpmuludq        $Bi,$TEMP1,$TEMP1
+        vmovd          %eax, $Yi
+       vmovdqu         -8+32*3-128($ap),$TEMP0
+       vpaddq          $TEMP1,$ACC1,$ACC1
+       vpmuludq        $Bi,$TEMP2,$TEMP2
+        vpbroadcastq   $Yi, $Yi
+       vmovdqu         -8+32*4-128($ap),$TEMP1
+       vpaddq          $TEMP2,$ACC2,$ACC2
+       vpmuludq        $Bi,$TEMP0,$TEMP0
+       vmovdqu         -8+32*5-128($ap),$TEMP2
+       vpaddq          $TEMP0,$ACC3,$ACC3
+       vpmuludq        $Bi,$TEMP1,$TEMP1
+       vmovdqu         -8+32*6-128($ap),$TEMP0
+       vpaddq          $TEMP1,$ACC4,$ACC4
+       vpmuludq        $Bi,$TEMP2,$TEMP2
+       vmovdqu         -8+32*7-128($ap),$TEMP1
+       vpaddq          $TEMP2,$ACC5,$ACC5
+       vpmuludq        $Bi,$TEMP0,$TEMP0
+       vmovdqu         -8+32*8-128($ap),$TEMP2
+       vpaddq          $TEMP0,$ACC6,$ACC6
+       vpmuludq        $Bi,$TEMP1,$TEMP1
+       vmovdqu         -8+32*9-128($ap),$ACC9
+       vpaddq          $TEMP1,$ACC7,$ACC7
+       vpmuludq        $Bi,$TEMP2,$TEMP2
+       vpaddq          $TEMP2,$ACC8,$ACC8
+       vpmuludq        $Bi,$ACC9,$ACC9
+        vpbroadcastq   16($bp), $Bi
+
+       mov     %rax,%rdx
+       imulq   -128($np),%rax
+       add     %rax,$r1
+        vmovdqu        -8+32*1-128($np),$TEMP0
+       mov     %rdx,%rax
+       imulq   8-128($np),%rax
+       add     %rax,$r2
+        vmovdqu        -8+32*2-128($np),$TEMP1
+       shr     \$29, $r1
+       imulq   16-128($np),%rdx
+       add     %rdx,$r3
+       add     $r1, $r2
+
+       vpmuludq        $Yi,$TEMP0,$TEMP0
+        vmovq          $Bi, %rbx
+       vmovdqu         -8+32*3-128($np),$TEMP2
+       vpaddq          $TEMP0,$ACC1,$ACC1
+       vpmuludq        $Yi,$TEMP1,$TEMP1
+       vmovdqu         -8+32*4-128($np),$TEMP0
+       vpaddq          $TEMP1,$ACC2,$ACC2
+       vpmuludq        $Yi,$TEMP2,$TEMP2
+       vmovdqu         -8+32*5-128($np),$TEMP1
+       vpaddq          $TEMP2,$ACC3,$ACC3
+       vpmuludq        $Yi,$TEMP0,$TEMP0
+       vmovdqu         -8+32*6-128($np),$TEMP2
+       vpaddq          $TEMP0,$ACC4,$ACC4
+       vpmuludq        $Yi,$TEMP1,$TEMP1
+       vmovdqu         -8+32*7-128($np),$TEMP0
+       vpaddq          $TEMP1,$ACC5,$ACC5
+       vpmuludq        $Yi,$TEMP2,$TEMP2
+       vmovdqu         -8+32*8-128($np),$TEMP1
+       vpaddq          $TEMP2,$ACC6,$ACC6
+       vpmuludq        $Yi,$TEMP0,$TEMP0
+       vmovdqu         -8+32*9-128($np),$TEMP2
+       vpaddq          $TEMP0,$ACC7,$ACC7
+       vpmuludq        $Yi,$TEMP1,$TEMP1
+       vpaddq          $TEMP1,$ACC8,$ACC8
+       vpmuludq        $Yi,$TEMP2,$TEMP2
+       vpaddq          $TEMP2,$ACC9,$ACC9
+
+        vmovdqu        -16+32*1-128($ap),$TEMP0
+       mov     %rbx,%rax
+       imulq   -128($ap),%rax
+       add     $r2,%rax
+
+        vmovdqu        -16+32*2-128($ap),$TEMP1
+       mov     %rax,$r2
+       imull   $n0, %eax
+       and     \$0x1fffffff, %eax
+
+        imulq  8-128($ap),%rbx
+        add    %rbx,$r3
+       vpmuludq        $Bi,$TEMP0,$TEMP0
+        vmovd          %eax, $Yi
+       vmovdqu         -16+32*3-128($ap),$TEMP2
+       vpaddq          $TEMP0,$ACC1,$ACC1
+       vpmuludq        $Bi,$TEMP1,$TEMP1
+        vpbroadcastq   $Yi, $Yi
+       vmovdqu         -16+32*4-128($ap),$TEMP0
+       vpaddq          $TEMP1,$ACC2,$ACC2
+       vpmuludq        $Bi,$TEMP2,$TEMP2
+       vmovdqu         -16+32*5-128($ap),$TEMP1
+       vpaddq          $TEMP2,$ACC3,$ACC3
+       vpmuludq        $Bi,$TEMP0,$TEMP0
+       vmovdqu         -16+32*6-128($ap),$TEMP2
+       vpaddq          $TEMP0,$ACC4,$ACC4
+       vpmuludq        $Bi,$TEMP1,$TEMP1
+       vmovdqu         -16+32*7-128($ap),$TEMP0
+       vpaddq          $TEMP1,$ACC5,$ACC5
+       vpmuludq        $Bi,$TEMP2,$TEMP2
+       vmovdqu         -16+32*8-128($ap),$TEMP1
+       vpaddq          $TEMP2,$ACC6,$ACC6
+       vpmuludq        $Bi,$TEMP0,$TEMP0
+       vmovdqu         -16+32*9-128($ap),$TEMP2
+       vpaddq          $TEMP0,$ACC7,$ACC7
+       vpmuludq        $Bi,$TEMP1,$TEMP1
+       vpaddq          $TEMP1,$ACC8,$ACC8
+       vpmuludq        $Bi,$TEMP2,$TEMP2
+        vpbroadcastq   24($bp), $Bi
+       vpaddq          $TEMP2,$ACC9,$ACC9
+
+        vmovdqu        -16+32*1-128($np),$TEMP0
+       mov     %rax,%rdx
+       imulq   -128($np),%rax
+       add     %rax,$r2
+        vmovdqu        -16+32*2-128($np),$TEMP1
+       imulq   8-128($np),%rdx
+       add     %rdx,$r3
+       shr     \$29, $r2
+
+       vpmuludq        $Yi,$TEMP0,$TEMP0
+        vmovq          $Bi, %rbx
+       vmovdqu         -16+32*3-128($np),$TEMP2
+       vpaddq          $TEMP0,$ACC1,$ACC1
+       vpmuludq        $Yi,$TEMP1,$TEMP1
+       vmovdqu         -16+32*4-128($np),$TEMP0
+       vpaddq          $TEMP1,$ACC2,$ACC2
+       vpmuludq        $Yi,$TEMP2,$TEMP2
+       vmovdqu         -16+32*5-128($np),$TEMP1
+       vpaddq          $TEMP2,$ACC3,$ACC3
+       vpmuludq        $Yi,$TEMP0,$TEMP0
+       vmovdqu         -16+32*6-128($np),$TEMP2
+       vpaddq          $TEMP0,$ACC4,$ACC4
+       vpmuludq        $Yi,$TEMP1,$TEMP1
+       vmovdqu         -16+32*7-128($np),$TEMP0
+       vpaddq          $TEMP1,$ACC5,$ACC5
+       vpmuludq        $Yi,$TEMP2,$TEMP2
+       vmovdqu         -16+32*8-128($np),$TEMP1
+       vpaddq          $TEMP2,$ACC6,$ACC6
+       vpmuludq        $Yi,$TEMP0,$TEMP0
+       vmovdqu         -16+32*9-128($np),$TEMP2
+       vpaddq          $TEMP0,$ACC7,$ACC7
+       vpmuludq        $Yi,$TEMP1,$TEMP1
+        vmovdqu        -24+32*1-128($ap),$TEMP0
+       vpaddq          $TEMP1,$ACC8,$ACC8
+       vpmuludq        $Yi,$TEMP2,$TEMP2
+        vmovdqu        -24+32*2-128($ap),$TEMP1
+       vpaddq          $TEMP2,$ACC9,$ACC9
+
+       add     $r2, $r3
+       imulq   -128($ap),%rbx
+       add     %rbx,$r3
+
+       mov     $r3, %rax
+       imull   $n0, %eax
+       and     \$0x1fffffff, %eax
+
+       vpmuludq        $Bi,$TEMP0,$TEMP0
+        vmovd          %eax, $Yi
+       vmovdqu         -24+32*3-128($ap),$TEMP2
+       vpaddq          $TEMP0,$ACC1,$ACC1
+       vpmuludq        $Bi,$TEMP1,$TEMP1
+        vpbroadcastq   $Yi, $Yi
+       vmovdqu         -24+32*4-128($ap),$TEMP0
+       vpaddq          $TEMP1,$ACC2,$ACC2
+       vpmuludq        $Bi,$TEMP2,$TEMP2
+       vmovdqu         -24+32*5-128($ap),$TEMP1
+       vpaddq          $TEMP2,$ACC3,$ACC3
+       vpmuludq        $Bi,$TEMP0,$TEMP0
+       vmovdqu         -24+32*6-128($ap),$TEMP2
+       vpaddq          $TEMP0,$ACC4,$ACC4
+       vpmuludq        $Bi,$TEMP1,$TEMP1
+       vmovdqu         -24+32*7-128($ap),$TEMP0
+       vpaddq          $TEMP1,$ACC5,$ACC5
+       vpmuludq        $Bi,$TEMP2,$TEMP2
+       vmovdqu         -24+32*8-128($ap),$TEMP1
+       vpaddq          $TEMP2,$ACC6,$ACC6
+       vpmuludq        $Bi,$TEMP0,$TEMP0
+       vmovdqu         -24+32*9-128($ap),$TEMP2
+       vpaddq          $TEMP0,$ACC7,$ACC7
+       vpmuludq        $Bi,$TEMP1,$TEMP1
+       vpaddq          $TEMP1,$ACC8,$ACC8
+       vpmuludq        $Bi,$TEMP2,$TEMP2
+        vpbroadcastq   32($bp), $Bi
+       vpaddq          $TEMP2,$ACC9,$ACC9
+        add            \$32, $bp                       # $bp++
+
+       vmovdqu         -24+32*1-128($np),$TEMP0
+       imulq   -128($np),%rax
+       add     %rax,$r3
+       shr     \$29, $r3
+
+       vmovdqu         -24+32*2-128($np),$TEMP1
+       vpmuludq        $Yi,$TEMP0,$TEMP0
+        vmovq          $Bi, %rbx
+       vmovdqu         -24+32*3-128($np),$TEMP2
+       vpaddq          $TEMP0,$ACC1,$ACC0              # $ACC0==$TEMP0
+       vpmuludq        $Yi,$TEMP1,$TEMP1
+        vmovdqu        $ACC0, (%rsp)                   # transfer $r0-$r3
+       vpaddq          $TEMP1,$ACC2,$ACC1
+       vmovdqu         -24+32*4-128($np),$TEMP0
+       vpmuludq        $Yi,$TEMP2,$TEMP2
+       vmovdqu         -24+32*5-128($np),$TEMP1
+       vpaddq          $TEMP2,$ACC3,$ACC2
+       vpmuludq        $Yi,$TEMP0,$TEMP0
+       vmovdqu         -24+32*6-128($np),$TEMP2
+       vpaddq          $TEMP0,$ACC4,$ACC3
+       vpmuludq        $Yi,$TEMP1,$TEMP1
+       vmovdqu         -24+32*7-128($np),$TEMP0
+       vpaddq          $TEMP1,$ACC5,$ACC4
+       vpmuludq        $Yi,$TEMP2,$TEMP2
+       vmovdqu         -24+32*8-128($np),$TEMP1
+       vpaddq          $TEMP2,$ACC6,$ACC5
+       vpmuludq        $Yi,$TEMP0,$TEMP0
+       vmovdqu         -24+32*9-128($np),$TEMP2
+        mov    $r3, $r0
+       vpaddq          $TEMP0,$ACC7,$ACC6
+       vpmuludq        $Yi,$TEMP1,$TEMP1
+        add    (%rsp), $r0
+       vpaddq          $TEMP1,$ACC8,$ACC7
+       vpmuludq        $Yi,$TEMP2,$TEMP2
+        vmovq  $r3, $TEMP1
+       vpaddq          $TEMP2,$ACC9,$ACC8
+
+       dec     $i
+       jnz     .Loop_mul_1024
+___
+
+# (*)  Original implementation was correcting ACC1-ACC3 for overflow
+#      after 7 loop runs, or after 28 iterations, or 56 additions.
+#      But as we underutilize resources, it's possible to correct in
+#      each iteration with marginal performance loss. But then, as
+#      we do it in each iteration, we can correct less digits, and
+#      avoid performance penalties completely. Also note that we
+#      correct only three digits out of four. This works because
+#      most significant digit is subjected to less additions.
+
+$TEMP0 = $ACC9;
+$TEMP3 = $Bi;
+$TEMP4 = $Yi;
+$code.=<<___;
+       vpermq          \$0, $AND_MASK, $AND_MASK
+       vpaddq          (%rsp), $TEMP1, $ACC0
+
+       vpsrlq          \$29, $ACC0, $TEMP1
+       vpand           $AND_MASK, $ACC0, $ACC0
+       vpsrlq          \$29, $ACC1, $TEMP2
+       vpand           $AND_MASK, $ACC1, $ACC1
+       vpsrlq          \$29, $ACC2, $TEMP3
+       vpermq          \$0x93, $TEMP1, $TEMP1
+       vpand           $AND_MASK, $ACC2, $ACC2
+       vpsrlq          \$29, $ACC3, $TEMP4
+       vpermq          \$0x93, $TEMP2, $TEMP2
+       vpand           $AND_MASK, $ACC3, $ACC3
+
+       vpblendd        \$3, $ZERO, $TEMP1, $TEMP0
+       vpermq          \$0x93, $TEMP3, $TEMP3
+       vpblendd        \$3, $TEMP1, $TEMP2, $TEMP1
+       vpermq          \$0x93, $TEMP4, $TEMP4
+       vpaddq          $TEMP0, $ACC0, $ACC0
+       vpblendd        \$3, $TEMP2, $TEMP3, $TEMP2
+       vpaddq          $TEMP1, $ACC1, $ACC1
+       vpblendd        \$3, $TEMP3, $TEMP4, $TEMP3
+       vpaddq          $TEMP2, $ACC2, $ACC2
+       vpblendd        \$3, $TEMP4, $ZERO, $TEMP4
+       vpaddq          $TEMP3, $ACC3, $ACC3
+       vpaddq          $TEMP4, $ACC4, $ACC4
+
+       vpsrlq          \$29, $ACC0, $TEMP1
+       vpand           $AND_MASK, $ACC0, $ACC0
+       vpsrlq          \$29, $ACC1, $TEMP2
+       vpand           $AND_MASK, $ACC1, $ACC1
+       vpsrlq          \$29, $ACC2, $TEMP3
+       vpermq          \$0x93, $TEMP1, $TEMP1
+       vpand           $AND_MASK, $ACC2, $ACC2
+       vpsrlq          \$29, $ACC3, $TEMP4
+       vpermq          \$0x93, $TEMP2, $TEMP2
+       vpand           $AND_MASK, $ACC3, $ACC3
+       vpermq          \$0x93, $TEMP3, $TEMP3
+
+       vpblendd        \$3, $ZERO, $TEMP1, $TEMP0
+       vpermq          \$0x93, $TEMP4, $TEMP4
+       vpblendd        \$3, $TEMP1, $TEMP2, $TEMP1
+       vpaddq          $TEMP0, $ACC0, $ACC0
+       vpblendd        \$3, $TEMP2, $TEMP3, $TEMP2
+       vpaddq          $TEMP1, $ACC1, $ACC1
+       vpblendd        \$3, $TEMP3, $TEMP4, $TEMP3
+       vpaddq          $TEMP2, $ACC2, $ACC2
+       vpblendd        \$3, $TEMP4, $ZERO, $TEMP4
+       vpaddq          $TEMP3, $ACC3, $ACC3
+       vpaddq          $TEMP4, $ACC4, $ACC4
+
+       vmovdqu         $ACC0, 0-128($rp)
+       vmovdqu         $ACC1, 32-128($rp)
+       vmovdqu         $ACC2, 64-128($rp)
+       vmovdqu         $ACC3, 96-128($rp)
+___
+
+$TEMP5=$ACC0;
+$code.=<<___;
+       vpsrlq          \$29, $ACC4, $TEMP1
+       vpand           $AND_MASK, $ACC4, $ACC4
+       vpsrlq          \$29, $ACC5, $TEMP2
+       vpand           $AND_MASK, $ACC5, $ACC5
+       vpsrlq          \$29, $ACC6, $TEMP3
+       vpermq          \$0x93, $TEMP1, $TEMP1
+       vpand           $AND_MASK, $ACC6, $ACC6
+       vpsrlq          \$29, $ACC7, $TEMP4
+       vpermq          \$0x93, $TEMP2, $TEMP2
+       vpand           $AND_MASK, $ACC7, $ACC7
+       vpsrlq          \$29, $ACC8, $TEMP5
+       vpermq          \$0x93, $TEMP3, $TEMP3
+       vpand           $AND_MASK, $ACC8, $ACC8
+       vpermq          \$0x93, $TEMP4, $TEMP4
+
+       vpblendd        \$3, $ZERO, $TEMP1, $TEMP0
+       vpermq          \$0x93, $TEMP5, $TEMP5
+       vpblendd        \$3, $TEMP1, $TEMP2, $TEMP1
+       vpaddq          $TEMP0, $ACC4, $ACC4
+       vpblendd        \$3, $TEMP2, $TEMP3, $TEMP2
+       vpaddq          $TEMP1, $ACC5, $ACC5
+       vpblendd        \$3, $TEMP3, $TEMP4, $TEMP3
+       vpaddq          $TEMP2, $ACC6, $ACC6
+       vpblendd        \$3, $TEMP4, $TEMP5, $TEMP4
+       vpaddq          $TEMP3, $ACC7, $ACC7
+       vpaddq          $TEMP4, $ACC8, $ACC8
+
+       vpsrlq          \$29, $ACC4, $TEMP1
+       vpand           $AND_MASK, $ACC4, $ACC4
+       vpsrlq          \$29, $ACC5, $TEMP2
+       vpand           $AND_MASK, $ACC5, $ACC5
+       vpsrlq          \$29, $ACC6, $TEMP3
+       vpermq          \$0x93, $TEMP1, $TEMP1
+       vpand           $AND_MASK, $ACC6, $ACC6
+       vpsrlq          \$29, $ACC7, $TEMP4
+       vpermq          \$0x93, $TEMP2, $TEMP2
+       vpand           $AND_MASK, $ACC7, $ACC7
+       vpsrlq          \$29, $ACC8, $TEMP5
+       vpermq          \$0x93, $TEMP3, $TEMP3
+       vpand           $AND_MASK, $ACC8, $ACC8
+       vpermq          \$0x93, $TEMP4, $TEMP4
+
+       vpblendd        \$3, $ZERO, $TEMP1, $TEMP0
+       vpermq          \$0x93, $TEMP5, $TEMP5
+       vpblendd        \$3, $TEMP1, $TEMP2, $TEMP1
+       vpaddq          $TEMP0, $ACC4, $ACC4
+       vpblendd        \$3, $TEMP2, $TEMP3, $TEMP2
+       vpaddq          $TEMP1, $ACC5, $ACC5
+       vpblendd        \$3, $TEMP3, $TEMP4, $TEMP3
+       vpaddq          $TEMP2, $ACC6, $ACC6
+       vpblendd        \$3, $TEMP4, $TEMP5, $TEMP4
+       vpaddq          $TEMP3, $ACC7, $ACC7
+       vpaddq          $TEMP4, $ACC8, $ACC8
+
+       vmovdqu         $ACC4, 128-128($rp)
+       vmovdqu         $ACC5, 160-128($rp)    
+       vmovdqu         $ACC6, 192-128($rp)
+       vmovdqu         $ACC7, 224-128($rp)
+       vmovdqu         $ACC8, 256-128($rp)
+       vzeroupper
+
+       mov     %rbp, %rax
+___
+$code.=<<___ if ($win64);
+       movaps  -0xd8(%rax),%xmm6
+       movaps  -0xc8(%rax),%xmm7
+       movaps  -0xb8(%rax),%xmm8
+       movaps  -0xa8(%rax),%xmm9
+       movaps  -0x98(%rax),%xmm10
+       movaps  -0x88(%rax),%xmm11
+       movaps  -0x78(%rax),%xmm12
+       movaps  -0x68(%rax),%xmm13
+       movaps  -0x58(%rax),%xmm14
+       movaps  -0x48(%rax),%xmm15
+___
+$code.=<<___;
+       mov     -48(%rax),%r15
+       mov     -40(%rax),%r14
+       mov     -32(%rax),%r13
+       mov     -24(%rax),%r12
+       mov     -16(%rax),%rbp
+       mov     -8(%rax),%rbx
+       lea     (%rax),%rsp             # restore %rsp
+.Lmul_1024_epilogue:
+       ret
+.size  rsaz_1024_mul_avx2,.-rsaz_1024_mul_avx2
+___
+}
+{
+my ($out,$inp) = $win64 ? ("%rcx","%rdx") : ("%rdi","%rsi");
+my @T = map("%r$_",(8..11));
+
+$code.=<<___;
+.globl rsaz_1024_red2norm_avx2
+.type  rsaz_1024_red2norm_avx2,\@abi-omnipotent
+.align 32
+rsaz_1024_red2norm_avx2:
+       sub     \$-128,$inp     # size optimization
+       xor     %rax,%rax
+___
+
+for ($j=0,$i=0; $i<16; $i++) {
+    my $k=0;
+    while (29*$j<64*($i+1)) {  # load data till boundary
+       $code.="        mov     `8*$j-128`($inp), @T[0]\n";
+       $j++; $k++; push(@T,shift(@T));
+    }
+    $l=$k;
+    while ($k>1) {             # shift loaded data but last value
+       $code.="        shl     \$`29*($j-$k)`,@T[-$k]\n";
+       $k--;
+    }
+    $code.=<<___;              # shift last value
+       mov     @T[-1], @T[0]
+       shl     \$`29*($j-1)`, @T[-1]
+       shr     \$`-29*($j-1)`, @T[0]
+___
+    while ($l) {               # accumulate all values
+       $code.="        add     @T[-$l], %rax\n";
+       $l--;
+    }
+       $code.=<<___;
+       adc     \$0, @T[0]      # consume eventual carry
+       mov     %rax, 8*$i($out)
+       mov     @T[0], %rax
+___
+    push(@T,shift(@T));
+}
+$code.=<<___;
+       ret
+.size  rsaz_1024_red2norm_avx2,.-rsaz_1024_red2norm_avx2
+
+.globl rsaz_1024_norm2red_avx2
+.type  rsaz_1024_norm2red_avx2,\@abi-omnipotent
+.align 32
+rsaz_1024_norm2red_avx2:
+       sub     \$-128,$out     # size optimization
+       mov     ($inp),@T[0]
+       mov     \$0x1fffffff,%eax
+___
+for ($j=0,$i=0; $i<16; $i++) {
+    $code.="   mov     `8*($i+1)`($inp),@T[1]\n"       if ($i<15);
+    $code.="   xor     @T[1],@T[1]\n"                  if ($i==15);
+    my $k=1;
+    while (29*($j+1)<64*($i+1)) {
+       $code.=<<___;
+       mov     @T[0],@T[-$k]
+       shr     \$`29*$j`,@T[-$k]
+       and     %rax,@T[-$k]                            # &0x1fffffff
+       mov     @T[-$k],`8*$j-128`($out)
+___
+       $j++; $k++;
+    }
+    $code.=<<___;
+       shrd    \$`29*$j`,@T[1],@T[0]
+       and     %rax,@T[0]
+       mov     @T[0],`8*$j-128`($out)
+___
+    $j++;
+    push(@T,shift(@T));
+}
+$code.=<<___;
+       mov     @T[0],`8*$j-128`($out)                  # zero
+       mov     @T[0],`8*($j+1)-128`($out)
+       mov     @T[0],`8*($j+2)-128`($out)
+       mov     @T[0],`8*($j+3)-128`($out)
+       ret
+.size  rsaz_1024_norm2red_avx2,.-rsaz_1024_norm2red_avx2
+___
+}
+{
+my ($out,$inp,$power) = $win64 ? ("%rcx","%rdx","%r8d") : ("%rdi","%rsi","%edx");
+
+$code.=<<___;
+.globl rsaz_1024_scatter5_avx2
+.type  rsaz_1024_scatter5_avx2,\@abi-omnipotent
+.align 32
+rsaz_1024_scatter5_avx2:
+       vzeroupper
+       vmovdqu .Lscatter_permd(%rip),%ymm5
+       shl     \$4,$power
+       lea     ($out,$power),$out
+       mov     \$9,%eax
+       jmp     .Loop_scatter_1024
+
+.align 32
+.Loop_scatter_1024:
+       vmovdqu         ($inp),%ymm0
+       lea             32($inp),$inp
+       vpermd          %ymm0,%ymm5,%ymm0
+       vmovdqu         %xmm0,($out)
+       lea             16*32($out),$out
+       dec     %eax
+       jnz     .Loop_scatter_1024
+
+       vzeroupper
+       ret
+.size  rsaz_1024_scatter5_avx2,.-rsaz_1024_scatter5_avx2
+
+.globl rsaz_1024_gather5_avx2
+.type  rsaz_1024_gather5_avx2,\@abi-omnipotent
+.align 32
+rsaz_1024_gather5_avx2:
+___
+$code.=<<___ if ($win64);
+       lea     -0x88(%rsp),%rax
+       vzeroupper
+.LSEH_begin_rsaz_1024_gather5:
+       # I can't trust assembler to use specific encoding:-(
+       .byte   0x48,0x8d,0x60,0xe0             #lea    -0x20(%rax),%rsp
+       .byte   0xc5,0xf8,0x29,0x70,0xe0        #vmovaps %xmm6,-0x20(%rax)
+       .byte   0xc5,0xf8,0x29,0x78,0xf0        #vmovaps %xmm7,-0x10(%rax)
+       .byte   0xc5,0x78,0x29,0x40,0x00        #vmovaps %xmm8,0(%rax)
+       .byte   0xc5,0x78,0x29,0x48,0x10        #vmovaps %xmm9,0x10(%rax)
+       .byte   0xc5,0x78,0x29,0x50,0x20        #vmovaps %xmm10,0x20(%rax)
+       .byte   0xc5,0x78,0x29,0x58,0x30        #vmovaps %xmm11,0x30(%rax)
+       .byte   0xc5,0x78,0x29,0x60,0x40        #vmovaps %xmm12,0x40(%rax)
+       .byte   0xc5,0x78,0x29,0x68,0x50        #vmovaps %xmm13,0x50(%rax)
+       .byte   0xc5,0x78,0x29,0x70,0x60        #vmovaps %xmm14,0x60(%rax)
+       .byte   0xc5,0x78,0x29,0x78,0x70        #vmovaps %xmm15,0x70(%rax)
+___
+$code.=<<___;
+       lea     .Lgather_table(%rip),%r11
+       mov     $power,%eax
+       and     \$3,$power
+       shr     \$2,%eax                        # cache line number
+       shl     \$4,$power                      # offset within cache line
+
+       vmovdqu         -32(%r11),%ymm7         # .Lgather_permd
+       vpbroadcastb    8(%r11,%rax), %xmm8
+       vpbroadcastb    7(%r11,%rax), %xmm9
+       vpbroadcastb    6(%r11,%rax), %xmm10
+       vpbroadcastb    5(%r11,%rax), %xmm11
+       vpbroadcastb    4(%r11,%rax), %xmm12
+       vpbroadcastb    3(%r11,%rax), %xmm13
+       vpbroadcastb    2(%r11,%rax), %xmm14
+       vpbroadcastb    1(%r11,%rax), %xmm15
+
+       lea     64($inp,$power),$inp
+       mov     \$64,%r11                       # size optimization
+       mov     \$9,%eax
+       jmp     .Loop_gather_1024
+
+.align 32
+.Loop_gather_1024:
+       vpand           -64($inp),              %xmm8,%xmm0
+       vpand           ($inp),                 %xmm9,%xmm1
+       vpand           64($inp),               %xmm10,%xmm2
+       vpand           ($inp,%r11,2),          %xmm11,%xmm3
+        vpor                                   %xmm0,%xmm1,%xmm1
+       vpand           64($inp,%r11,2),        %xmm12,%xmm4
+        vpor                                   %xmm2,%xmm3,%xmm3
+       vpand           ($inp,%r11,4),          %xmm13,%xmm5
+        vpor                                   %xmm1,%xmm3,%xmm3
+       vpand           64($inp,%r11,4),        %xmm14,%xmm6
+        vpor                                   %xmm4,%xmm5,%xmm5
+       vpand           -128($inp,%r11,8),      %xmm15,%xmm2
+       lea             ($inp,%r11,8),$inp
+        vpor                                   %xmm3,%xmm5,%xmm5
+        vpor                                   %xmm2,%xmm6,%xmm6
+        vpor                                   %xmm5,%xmm6,%xmm6
+       vpermd          %ymm6,%ymm7,%ymm6
+       vmovdqu         %ymm6,($out)
+       lea             32($out),$out
+       dec     %eax
+       jnz     .Loop_gather_1024
+
+       vpxor   %ymm0,%ymm0,%ymm0
+       vmovdqu %ymm0,($out)
+       vzeroupper
+___
+$code.=<<___ if ($win64);
+       movaps  (%rsp),%xmm6
+       movaps  0x10(%rsp),%xmm7
+       movaps  0x20(%rsp),%xmm8
+       movaps  0x30(%rsp),%xmm9
+       movaps  0x40(%rsp),%xmm10
+       movaps  0x50(%rsp),%xmm11
+       movaps  0x60(%rsp),%xmm12
+       movaps  0x70(%rsp),%xmm13
+       movaps  0x80(%rsp),%xmm14
+       movaps  0x90(%rsp),%xmm15
+       lea     0xa8(%rsp),%rsp
+.LSEH_end_rsaz_1024_gather5:
+___
+$code.=<<___;
+       ret
+.size  rsaz_1024_gather5_avx2,.-rsaz_1024_gather5_avx2
+___
+}
+
+$code.=<<___;
+.extern        OPENSSL_ia32cap_P
+.globl rsaz_avx2_eligible
+.type  rsaz_avx2_eligible,\@abi-omnipotent
+.align 32
+rsaz_avx2_eligible:
+       mov     OPENSSL_ia32cap_P+8(%rip),%eax
+___
+$code.=<<___   if ($addx);
+       mov     \$`1<<8|1<<19`,%ecx
+       mov     \$0,%edx
+       and     %eax,%ecx
+       cmp     \$`1<<8|1<<19`,%ecx     # check for BMI2+AD*X
+       cmove   %edx,%eax
+___
+$code.=<<___;
+       and     \$`1<<5`,%eax
+       shr     \$5,%eax
+       ret
+.size  rsaz_avx2_eligible,.-rsaz_avx2_eligible
+
+.align 64
+.Land_mask:
+       .quad   0x1fffffff,0x1fffffff,0x1fffffff,-1
+.Lscatter_permd:
+       .long   0,2,4,6,7,7,7,7
+.Lgather_permd:
+       .long   0,7,1,7,2,7,3,7
+.Lgather_table:
+       .byte   0,0,0,0,0,0,0,0, 0xff,0,0,0,0,0,0,0
+.align 64
+___
+
+if ($win64) {
+$rec="%rcx";
+$frame="%rdx";
+$context="%r8";
+$disp="%r9";
+
+$code.=<<___
+.extern        __imp_RtlVirtualUnwind
+.type  rsaz_se_handler,\@abi-omnipotent
+.align 16
+rsaz_se_handler:
+       push    %rsi
+       push    %rdi
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+       pushfq
+       sub     \$64,%rsp
+
+       mov     120($context),%rax      # pull context->Rax
+       mov     248($context),%rbx      # pull context->Rip
+
+       mov     8($disp),%rsi           # disp->ImageBase
+       mov     56($disp),%r11          # disp->HandlerData
+
+       mov     0(%r11),%r10d           # HandlerData[0]
+       lea     (%rsi,%r10),%r10        # prologue label
+       cmp     %r10,%rbx               # context->Rip<prologue label
+       jb      .Lcommon_seh_tail
+
+       mov     152($context),%rax      # pull context->Rsp
+
+       mov     4(%r11),%r10d           # HandlerData[1]
+       lea     (%rsi,%r10),%r10        # epilogue label
+       cmp     %r10,%rbx               # context->Rip>=epilogue label
+       jae     .Lcommon_seh_tail
+
+       mov     160($context),%rax      # pull context->Rbp
+
+       mov     -48(%rax),%r15
+       mov     -40(%rax),%r14
+       mov     -32(%rax),%r13
+       mov     -24(%rax),%r12
+       mov     -16(%rax),%rbp
+       mov     -8(%rax),%rbx
+       mov     %r15,240($context)
+       mov     %r14,232($context)
+       mov     %r13,224($context)
+       mov     %r12,216($context)
+       mov     %rbp,160($context)
+       mov     %rbx,144($context)
+
+       lea     -0xd8(%rax),%rsi        # %xmm save area
+       lea     512($context),%rdi      # & context.Xmm6
+       mov     \$20,%ecx               # 10*sizeof(%xmm0)/sizeof(%rax)
+       .long   0xa548f3fc              # cld; rep movsq
+
+.Lcommon_seh_tail:
+       mov     8(%rax),%rdi
+       mov     16(%rax),%rsi
+       mov     %rax,152($context)      # restore context->Rsp
+       mov     %rsi,168($context)      # restore context->Rsi
+       mov     %rdi,176($context)      # restore context->Rdi
+
+       mov     40($disp),%rdi          # disp->ContextRecord
+       mov     $context,%rsi           # context
+       mov     \$154,%ecx              # sizeof(CONTEXT)
+       .long   0xa548f3fc              # cld; rep movsq
+
+       mov     $disp,%rsi
+       xor     %rcx,%rcx               # arg1, UNW_FLAG_NHANDLER
+       mov     8(%rsi),%rdx            # arg2, disp->ImageBase
+       mov     0(%rsi),%r8             # arg3, disp->ControlPc
+       mov     16(%rsi),%r9            # arg4, disp->FunctionEntry
+       mov     40(%rsi),%r10           # disp->ContextRecord
+       lea     56(%rsi),%r11           # &disp->HandlerData
+       lea     24(%rsi),%r12           # &disp->EstablisherFrame
+       mov     %r10,32(%rsp)           # arg5
+       mov     %r11,40(%rsp)           # arg6
+       mov     %r12,48(%rsp)           # arg7
+       mov     %rcx,56(%rsp)           # arg8, (NULL)
+       call    *__imp_RtlVirtualUnwind(%rip)
+
+       mov     \$1,%eax                # ExceptionContinueSearch
+       add     \$64,%rsp
+       popfq
+       pop     %r15
+       pop     %r14
+       pop     %r13
+       pop     %r12
+       pop     %rbp
+       pop     %rbx
+       pop     %rdi
+       pop     %rsi
+       ret
+.size  rsaz_se_handler,.-rsaz_se_handler
+
+.section       .pdata
+.align 4
+       .rva    .LSEH_begin_rsaz_1024_sqr_avx2
+       .rva    .LSEH_end_rsaz_1024_sqr_avx2
+       .rva    .LSEH_info_rsaz_1024_sqr_avx2
+
+       .rva    .LSEH_begin_rsaz_1024_mul_avx2
+       .rva    .LSEH_end_rsaz_1024_mul_avx2
+       .rva    .LSEH_info_rsaz_1024_mul_avx2
+
+       .rva    .LSEH_begin_rsaz_1024_gather5
+       .rva    .LSEH_end_rsaz_1024_gather5
+       .rva    .LSEH_info_rsaz_1024_gather5
+.section       .xdata
+.align 8
+.LSEH_info_rsaz_1024_sqr_avx2:
+       .byte   9,0,0,0
+       .rva    rsaz_se_handler
+       .rva    .Lsqr_1024_body,.Lsqr_1024_epilogue
+.LSEH_info_rsaz_1024_mul_avx2:
+       .byte   9,0,0,0
+       .rva    rsaz_se_handler
+       .rva    .Lmul_1024_body,.Lmul_1024_epilogue
+.LSEH_info_rsaz_1024_gather5:
+       .byte   0x01,0x33,0x16,0x00
+       .byte   0x36,0xf8,0x09,0x00     #vmovaps 0x90(rsp),xmm15
+       .byte   0x31,0xe8,0x08,0x00     #vmovaps 0x80(rsp),xmm14
+       .byte   0x2c,0xd8,0x07,0x00     #vmovaps 0x70(rsp),xmm13
+       .byte   0x27,0xc8,0x06,0x00     #vmovaps 0x60(rsp),xmm12
+       .byte   0x22,0xb8,0x05,0x00     #vmovaps 0x50(rsp),xmm11
+       .byte   0x1d,0xa8,0x04,0x00     #vmovaps 0x40(rsp),xmm10
+       .byte   0x18,0x98,0x03,0x00     #vmovaps 0x30(rsp),xmm9
+       .byte   0x13,0x88,0x02,0x00     #vmovaps 0x20(rsp),xmm8
+       .byte   0x0e,0x78,0x01,0x00     #vmovaps 0x10(rsp),xmm7
+       .byte   0x09,0x68,0x00,0x00     #vmovaps 0x00(rsp),xmm6
+       .byte   0x04,0x01,0x15,0x00     #sub    rsp,0xa8
+___
+}
+
+foreach (split("\n",$code)) {
+       s/\`([^\`]*)\`/eval($1)/ge;
+
+       s/\b(sh[rl]d?\s+\$)(-?[0-9]+)/$1.$2%64/ge               or
+
+       s/\b(vmov[dq])\b(.+)%ymm([0-9]+)/$1$2%xmm$3/go          or
+       s/\b(vmovdqu)\b(.+)%x%ymm([0-9]+)/$1$2%xmm$3/go         or
+       s/\b(vpinsr[qd])\b(.+)%ymm([0-9]+)/$1$2%xmm$3/go        or
+       s/\b(vpextr[qd])\b(.+)%ymm([0-9]+)/$1$2%xmm$3/go        or
+       s/\b(vpbroadcast[qd]\s+)%ymm([0-9]+)/$1%xmm$2/go;
+       print $_,"\n";
+}
+
+}}} else {{{
+print <<___;   # assembler is too old
+.text
+
+.globl rsaz_avx2_eligible
+.type  rsaz_avx2_eligible,\@abi-omnipotent
+rsaz_avx2_eligible:
+       xor     %eax,%eax
+       ret
+.size  rsaz_avx2_eligible,.-rsaz_avx2_eligible
+
+.globl rsaz_1024_sqr_avx2
+.globl rsaz_1024_mul_avx2
+.globl rsaz_1024_norm2red_avx2
+.globl rsaz_1024_red2norm_avx2
+.globl rsaz_1024_scatter5_avx2
+.globl rsaz_1024_gather5_avx2
+.type  rsaz_1024_sqr_avx2,\@abi-omnipotent
+rsaz_1024_sqr_avx2:
+rsaz_1024_mul_avx2:
+rsaz_1024_norm2red_avx2:
+rsaz_1024_red2norm_avx2:
+rsaz_1024_scatter5_avx2:
+rsaz_1024_gather5_avx2:
+       .byte   0x0f,0x0b       # ud2
+       ret
+.size  rsaz_1024_sqr_avx2,.-rsaz_1024_sqr_avx2
+___
+}}}
+
+close STDOUT;
diff --git a/crypto/bn/asm/rsaz-x86_64.pl b/crypto/bn/asm/rsaz-x86_64.pl
new file mode 100755 (executable)
index 0000000..3bd45db
--- /dev/null
@@ -0,0 +1,2144 @@
+#!/usr/bin/env perl
+
+##############################################################################
+#                                                                            #
+#  Copyright (c) 2012, Intel Corporation                                     #
+#                                                                            #
+#  All rights reserved.                                                      #
+#                                                                            #
+#  Redistribution and use in source and binary forms, with or without        #
+#  modification, are permitted provided that the following conditions are    #
+#  met:                                                                      #
+#                                                                            #
+#  *  Redistributions of source code must retain the above copyright         #
+#     notice, this list of conditions and the following disclaimer.          #
+#                                                                            #
+#  *  Redistributions in binary form must reproduce the above copyright      #
+#     notice, this list of conditions and the following disclaimer in the    #
+#     documentation and/or other materials provided with the                 #
+#     distribution.                                                          #
+#                                                                            #
+#  *  Neither the name of the Intel Corporation nor the names of its         #
+#     contributors may be used to endorse or promote products derived from   #
+#     this software without specific prior written permission.               #
+#                                                                            #
+#                                                                            #
+#  THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY          #
+#  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE         #
+#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR        #
+#  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR            #
+#  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,     #
+#  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,       #
+#  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR        #
+#  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF    #
+#  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING      #
+#  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS        #
+#  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.              #
+#                                                                            #
+##############################################################################
+# Developers and authors:                                                    #
+# Shay Gueron (1, 2), and Vlad Krasnov (1)                                   #
+# (1) Intel Architecture Group, Microprocessor and Chipset Development,      #
+#     Israel Development Center, Haifa, Israel                               #
+# (2) University of Haifa                                                    #
+##############################################################################
+# Reference:                                                                 #
+# [1] S. Gueron, "Efficient Software Implementations of Modular              #
+#     Exponentiation", http://eprint.iacr.org/2011/239                       #
+# [2] S. Gueron, V. Krasnov. "Speeding up Big-Numbers Squaring".             #
+#     IEEE Proceedings of 9th International Conference on Information        #
+#     Technology: New Generations (ITNG 2012), 821-823 (2012).               #
+# [3] S. Gueron, Efficient Software Implementations of Modular Exponentiation#
+#     Journal of Cryptographic Engineering 2:31-43 (2012).                   #
+# [4] S. Gueron, V. Krasnov: "[PATCH] Efficient and side channel analysis    #
+#     resistant 512-bit and 1024-bit modular exponentiation for optimizing   #
+#     RSA1024 and RSA2048 on x86_64 platforms",                              #
+#     http://rt.openssl.org/Ticket/Display.html?id=2582&user=guest&pass=guest#
+##############################################################################
+
+# While original submission covers 512- and 1024-bit exponentiation,
+# this module is limited to 512-bit version only (and as such
+# accelerates RSA1024 sign). This is because improvement for longer
+# keys is not high enough to justify the effort, highest measured
+# was ~5% on Westmere. [This is relative to OpenSSL 1.0.2, upcoming
+# for the moment of this writing!] Nor does this module implement
+# "monolithic" complete exponentiation jumbo-subroutine, but adheres
+# to more modular mixture of C and assembly. And it's optimized even
+# for processors other than Intel Core family (see table below for
+# improvement coefficients).
+#                                              <appro@openssl.org>
+#
+# RSA1024 sign/sec     this/original   |this/rsax(*)   this/fips(*)
+#                      ----------------+---------------------------
+# Opteron              +13%            |+5%            +20%
+# Bulldozer            -0%             |-1%            +10%
+# P4                   +11%            |+7%            +8%
+# Westmere             +5%             |+14%           +17%
+# Sandy Bridge         +2%             |+12%           +29%
+# Ivy Bridge           +1%             |+11%           +35%
+# Haswell(**)          -0%             |+12%           +39%
+# Atom                 +13%            |+11%           +4%
+# VIA Nano             +70%            |+9%            +25%
+#
+# (*)  rsax engine and fips numbers are presented for reference
+#      purposes;
+# (**) MULX was attempted, but found to give only marginal improvement;
+
+$flavour = shift;
+$output  = shift;
+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
+
+$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
+die "can't locate x86_64-xlate.pl";
+
+open OUT,"| \"$^X\" $xlate $flavour $output";
+*STDOUT=*OUT;
+
+if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
+               =~ /GNU assembler version ([2-9]\.[0-9]+)/) {
+       $addx = ($1>=2.23);
+}
+
+if (!$addx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
+           `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) {
+       $addx = ($1>=2.10);
+}
+
+if (!$addx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
+           `ml64 2>&1` =~ /Version ([0-9]+)\./) {
+       $addx = ($1>=12);
+}
+
+if (!$addx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9])\.([0-9]+)/) {
+       my $ver = $2 + $3/100.0;        # 3.1->3.01, 3.10->3.10
+       $addx = ($ver>=3.03);
+}
+
+($out, $inp, $mod) = ("%rdi", "%rsi", "%rbp"); # common internal API
+{
+my ($out,$inp,$mod,$n0,$times) = ("%rdi","%rsi","%rdx","%rcx","%r8d");
+
+$code.=<<___;
+.text
+
+.extern        OPENSSL_ia32cap_P
+
+.globl rsaz_512_sqr
+.type  rsaz_512_sqr,\@function,5
+.align 32
+rsaz_512_sqr:                          # 25-29% faster than rsaz_512_mul
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+
+       subq    \$128+24, %rsp
+.Lsqr_body:
+       movq    $mod, %rbp              # common argument
+       movq    ($inp), %rdx
+       movq    8($inp), %rax
+       movq    $n0, 128(%rsp)
+___
+$code.=<<___ if ($addx);
+       movl    \$0x80100,%r11d
+       andl    OPENSSL_ia32cap_P+8(%rip),%r11d
+       cmpl    \$0x80100,%r11d         # check for MULX and ADO/CX
+       je      .Loop_sqrx
+___
+$code.=<<___;
+       jmp     .Loop_sqr
+
+.align 32
+.Loop_sqr:
+       movl    $times,128+8(%rsp)
+#first iteration
+       movq    %rdx, %rbx
+       mulq    %rdx
+       movq    %rax, %r8
+       movq    16($inp), %rax
+       movq    %rdx, %r9
+
+       mulq    %rbx
+       addq    %rax, %r9
+       movq    24($inp), %rax
+       movq    %rdx, %r10
+       adcq    \$0, %r10
+
+       mulq    %rbx
+       addq    %rax, %r10
+       movq    32($inp), %rax
+       movq    %rdx, %r11
+       adcq    \$0, %r11
+
+       mulq    %rbx
+       addq    %rax, %r11
+       movq    40($inp), %rax
+       movq    %rdx, %r12
+       adcq    \$0, %r12
+
+       mulq    %rbx
+       addq    %rax, %r12
+       movq    48($inp), %rax
+       movq    %rdx, %r13
+       adcq    \$0, %r13
+
+       mulq    %rbx
+       addq    %rax, %r13
+       movq    56($inp), %rax
+       movq    %rdx, %r14
+       adcq    \$0, %r14
+
+       mulq    %rbx
+       addq    %rax, %r14
+       movq    %rbx, %rax
+       movq    %rdx, %r15
+       adcq    \$0, %r15
+
+       addq    %r8, %r8                #shlq   \$1, %r8
+       movq    %r9, %rcx
+       adcq    %r9, %r9                #shld   \$1, %r8, %r9
+
+       mulq    %rax
+       movq    %rax, (%rsp)
+       addq    %rdx, %r8
+       adcq    \$0, %r9
+
+       movq    %r8, 8(%rsp)
+       shrq    \$63, %rcx
+
+#second iteration
+       movq    8($inp), %r8
+       movq    16($inp), %rax
+       mulq    %r8
+       addq    %rax, %r10
+       movq    24($inp), %rax
+       movq    %rdx, %rbx
+       adcq    \$0, %rbx
+
+       mulq    %r8
+       addq    %rax, %r11
+       movq    32($inp), %rax
+       adcq    \$0, %rdx
+       addq    %rbx, %r11
+       movq    %rdx, %rbx
+       adcq    \$0, %rbx
+
+       mulq    %r8
+       addq    %rax, %r12
+       movq    40($inp), %rax
+       adcq    \$0, %rdx
+       addq    %rbx, %r12
+       movq    %rdx, %rbx
+       adcq    \$0, %rbx
+
+       mulq    %r8
+       addq    %rax, %r13
+       movq    48($inp), %rax
+       adcq    \$0, %rdx
+       addq    %rbx, %r13
+       movq    %rdx, %rbx
+       adcq    \$0, %rbx
+
+       mulq    %r8
+       addq    %rax, %r14
+       movq    56($inp), %rax
+       adcq    \$0, %rdx
+       addq    %rbx, %r14
+       movq    %rdx, %rbx
+       adcq    \$0, %rbx
+
+       mulq    %r8
+       addq    %rax, %r15
+       movq    %r8, %rax
+       adcq    \$0, %rdx
+       addq    %rbx, %r15
+       movq    %rdx, %r8
+       movq    %r10, %rdx
+       adcq    \$0, %r8
+
+       add     %rdx, %rdx
+       lea     (%rcx,%r10,2), %r10     #shld   \$1, %rcx, %r10
+       movq    %r11, %rbx
+       adcq    %r11, %r11              #shld   \$1, %r10, %r11
+
+       mulq    %rax
+       addq    %rax, %r9
+       adcq    %rdx, %r10
+       adcq    \$0, %r11
+
+       movq    %r9, 16(%rsp)
+       movq    %r10, 24(%rsp)
+       shrq    \$63, %rbx
+       
+#third iteration
+       movq    16($inp), %r9   
+       movq    24($inp), %rax
+       mulq    %r9
+       addq    %rax, %r12
+       movq    32($inp), %rax
+       movq    %rdx, %rcx
+       adcq    \$0, %rcx
+
+       mulq    %r9
+       addq    %rax, %r13
+       movq    40($inp), %rax
+       adcq    \$0, %rdx
+       addq    %rcx, %r13
+       movq    %rdx, %rcx
+       adcq    \$0, %rcx
+
+       mulq    %r9
+       addq    %rax, %r14
+       movq    48($inp), %rax
+       adcq    \$0, %rdx
+       addq    %rcx, %r14
+       movq    %rdx, %rcx
+       adcq    \$0, %rcx
+
+       mulq    %r9
+        movq   %r12, %r10
+        lea    (%rbx,%r12,2), %r12     #shld   \$1, %rbx, %r12
+       addq    %rax, %r15
+       movq    56($inp), %rax
+       adcq    \$0, %rdx
+       addq    %rcx, %r15
+       movq    %rdx, %rcx
+       adcq    \$0, %rcx
+
+       mulq    %r9
+        shrq   \$63, %r10
+       addq    %rax, %r8
+       movq    %r9, %rax
+       adcq    \$0, %rdx
+       addq    %rcx, %r8
+       movq    %rdx, %r9
+       adcq    \$0, %r9
+
+       movq    %r13, %rcx
+       leaq    (%r10,%r13,2), %r13     #shld   \$1, %r12, %r13
+
+       mulq    %rax
+       addq    %rax, %r11
+       adcq    %rdx, %r12
+       adcq    \$0, %r13
+
+       movq    %r11, 32(%rsp)
+       movq    %r12, 40(%rsp)
+       shrq    \$63, %rcx
+
+#fourth iteration
+       movq    24($inp), %r10
+       movq    32($inp), %rax
+       mulq    %r10
+       addq    %rax, %r14
+       movq    40($inp), %rax
+       movq    %rdx, %rbx
+       adcq    \$0, %rbx
+
+       mulq    %r10
+       addq    %rax, %r15
+       movq    48($inp), %rax
+       adcq    \$0, %rdx
+       addq    %rbx, %r15
+       movq    %rdx, %rbx
+       adcq    \$0, %rbx
+
+       mulq    %r10
+        movq   %r14, %r12
+        leaq   (%rcx,%r14,2), %r14     #shld   \$1, %rcx, %r14
+       addq    %rax, %r8
+       movq    56($inp), %rax
+       adcq    \$0, %rdx
+       addq    %rbx, %r8
+       movq    %rdx, %rbx
+       adcq    \$0, %rbx
+
+       mulq    %r10
+        shrq   \$63, %r12
+       addq    %rax, %r9
+       movq    %r10, %rax
+       adcq    \$0, %rdx
+       addq    %rbx, %r9
+       movq    %rdx, %r10
+       adcq    \$0, %r10
+
+       movq    %r15, %rbx
+       leaq    (%r12,%r15,2),%r15      #shld   \$1, %r14, %r15
+
+       mulq    %rax
+       addq    %rax, %r13
+       adcq    %rdx, %r14
+       adcq    \$0, %r15
+
+       movq    %r13, 48(%rsp)
+       movq    %r14, 56(%rsp)
+       shrq    \$63, %rbx
+
+#fifth iteration
+       movq    32($inp), %r11
+       movq    40($inp), %rax
+       mulq    %r11
+       addq    %rax, %r8
+       movq    48($inp), %rax
+       movq    %rdx, %rcx
+       adcq    \$0, %rcx
+
+       mulq    %r11
+       addq    %rax, %r9
+       movq    56($inp), %rax
+       adcq    \$0, %rdx
+        movq   %r8, %r12
+        leaq   (%rbx,%r8,2), %r8       #shld   \$1, %rbx, %r8
+       addq    %rcx, %r9
+       movq    %rdx, %rcx
+       adcq    \$0, %rcx
+
+       mulq    %r11
+        shrq   \$63, %r12
+       addq    %rax, %r10
+       movq    %r11, %rax
+       adcq    \$0, %rdx
+       addq    %rcx, %r10
+       movq    %rdx, %r11
+       adcq    \$0, %r11
+
+       movq    %r9, %rcx
+       leaq    (%r12,%r9,2), %r9       #shld   \$1, %r8, %r9
+
+       mulq    %rax
+       addq    %rax, %r15
+       adcq    %rdx, %r8
+       adcq    \$0, %r9
+
+       movq    %r15, 64(%rsp)
+       movq    %r8, 72(%rsp)
+       shrq    \$63, %rcx
+
+#sixth iteration
+       movq    40($inp), %r12
+       movq    48($inp), %rax
+       mulq    %r12
+       addq    %rax, %r10
+       movq    56($inp), %rax
+       movq    %rdx, %rbx
+       adcq    \$0, %rbx
+
+       mulq    %r12
+       addq    %rax, %r11
+       movq    %r12, %rax
+        movq   %r10, %r15
+        leaq   (%rcx,%r10,2), %r10     #shld   \$1, %rcx, %r10
+       adcq    \$0, %rdx
+        shrq   \$63, %r15
+       addq    %rbx, %r11
+       movq    %rdx, %r12
+       adcq    \$0, %r12
+
+       movq    %r11, %rbx
+       leaq    (%r15,%r11,2), %r11     #shld   \$1, %r10, %r11
+
+       mulq    %rax
+       addq    %rax, %r9
+       adcq    %rdx, %r10
+       adcq    \$0, %r11
+
+       movq    %r9, 80(%rsp)
+       movq    %r10, 88(%rsp)
+
+#seventh iteration
+       movq    48($inp), %r13
+       movq    56($inp), %rax
+       mulq    %r13
+       addq    %rax, %r12
+       movq    %r13, %rax
+       movq    %rdx, %r13
+       adcq    \$0, %r13
+
+       xorq    %r14, %r14
+       shlq    \$1, %rbx
+       adcq    %r12, %r12              #shld   \$1, %rbx, %r12
+       adcq    %r13, %r13              #shld   \$1, %r12, %r13
+       adcq    %r14, %r14              #shld   \$1, %r13, %r14
+
+       mulq    %rax
+       addq    %rax, %r11
+       adcq    %rdx, %r12
+       adcq    \$0, %r13
+
+       movq    %r11, 96(%rsp)
+       movq    %r12, 104(%rsp)
+
+#eighth iteration
+       movq    56($inp), %rax
+       mulq    %rax
+       addq    %rax, %r13
+       adcq    \$0, %rdx
+
+       addq    %rdx, %r14
+
+       movq    %r13, 112(%rsp)
+       movq    %r14, 120(%rsp)
+
+       movq    (%rsp), %r8
+       movq    8(%rsp), %r9
+       movq    16(%rsp), %r10
+       movq    24(%rsp), %r11
+       movq    32(%rsp), %r12
+       movq    40(%rsp), %r13
+       movq    48(%rsp), %r14
+       movq    56(%rsp), %r15
+
+       call    __rsaz_512_reduce
+
+       addq    64(%rsp), %r8
+       adcq    72(%rsp), %r9
+       adcq    80(%rsp), %r10
+       adcq    88(%rsp), %r11
+       adcq    96(%rsp), %r12
+       adcq    104(%rsp), %r13
+       adcq    112(%rsp), %r14
+       adcq    120(%rsp), %r15
+       sbbq    %rcx, %rcx
+
+       call    __rsaz_512_subtract
+
+       movq    %r8, %rdx
+       movq    %r9, %rax
+       movl    128+8(%rsp), $times
+       movq    $out, $inp
+
+       decl    $times
+       jnz     .Loop_sqr
+___
+if ($addx) {
+$code.=<<___;
+       jmp     .Lsqr_tail
+
+.align 32
+.Loop_sqrx:
+       movl    $times,128+8(%rsp)
+       movq    $out, %xmm0             # off-load
+       movq    %rbp, %xmm1             # off-load
+#first iteration       
+       mulx    %rax, %r8, %r9
+
+       mulx    16($inp), %rcx, %r10
+       xor     %rbp, %rbp              # cf=0, of=0
+
+       mulx    24($inp), %rax, %r11
+       adcx    %rcx, %r9
+
+       mulx    32($inp), %rcx, %r12
+       adcx    %rax, %r10
+
+       mulx    40($inp), %rax, %r13
+       adcx    %rcx, %r11
+
+       .byte   0xc4,0x62,0xf3,0xf6,0xb6,0x30,0x00,0x00,0x00    # mulx  48($inp), %rcx, %r14
+       adcx    %rax, %r12
+       adcx    %rcx, %r13
+
+       .byte   0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00    # mulx  56($inp), %rax, %r15
+       adcx    %rax, %r14
+       adcx    %rbp, %r15              # %rbp is 0
+
+       mov     %r9, %rcx
+       shld    \$1, %r8, %r9
+       shl     \$1, %r8
+
+       xor     %ebp, %ebp
+       mulx    %rdx, %rax, %rdx
+       adcx    %rdx, %r8
+        mov    8($inp), %rdx
+       adcx    %rbp, %r9
+
+       mov     %rax, (%rsp)
+       mov     %r8, 8(%rsp)
+
+#second iteration      
+       mulx    16($inp), %rax, %rbx
+       adox    %rax, %r10
+       adcx    %rbx, %r11
+
+       .byte   0xc4,0x62,0xc3,0xf6,0x86,0x18,0x00,0x00,0x00    # mulx  24($inp), $out, %r8
+       adox    $out, %r11
+       adcx    %r8, %r12
+
+       mulx    32($inp), %rax, %rbx
+       adox    %rax, %r12
+       adcx    %rbx, %r13
+
+       mulx    40($inp), $out, %r8
+       adox    $out, %r13
+       adcx    %r8, %r14
+
+       .byte   0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00    # mulx  48($inp), %rax, %rbx
+       adox    %rax, %r14
+       adcx    %rbx, %r15
+
+       .byte   0xc4,0x62,0xc3,0xf6,0x86,0x38,0x00,0x00,0x00    # mulx  56($inp), $out, %r8
+       adox    $out, %r15
+       adcx    %rbp, %r8
+       adox    %rbp, %r8
+
+       mov     %r11, %rbx
+       shld    \$1, %r10, %r11
+       shld    \$1, %rcx, %r10
+
+       xor     %ebp,%ebp
+       mulx    %rdx, %rax, %rcx
+        mov    16($inp), %rdx
+       adcx    %rax, %r9
+       adcx    %rcx, %r10
+       adcx    %rbp, %r11
+
+       mov     %r9, 16(%rsp)
+       .byte   0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00         # mov   %r10, 24(%rsp)
+       
+#third iteration       
+       .byte   0xc4,0x62,0xc3,0xf6,0x8e,0x18,0x00,0x00,0x00    # mulx  24($inp), $out, %r9
+       adox    $out, %r12
+       adcx    %r9, %r13
+
+       mulx    32($inp), %rax, %rcx
+       adox    %rax, %r13
+       adcx    %rcx, %r14
+
+       mulx    40($inp), $out, %r9
+       adox    $out, %r14
+       adcx    %r9, %r15
+
+       .byte   0xc4,0xe2,0xfb,0xf6,0x8e,0x30,0x00,0x00,0x00    # mulx  48($inp), %rax, %rcx
+       adox    %rax, %r15
+       adcx    %rcx, %r8
+
+       .byte   0xc4,0x62,0xc3,0xf6,0x8e,0x38,0x00,0x00,0x00    # mulx  56($inp), $out, %r9
+       adox    $out, %r8
+       adcx    %rbp, %r9
+       adox    %rbp, %r9
+
+       mov     %r13, %rcx
+       shld    \$1, %r12, %r13
+       shld    \$1, %rbx, %r12
+
+       xor     %ebp, %ebp
+       mulx    %rdx, %rax, %rdx
+       adcx    %rax, %r11
+       adcx    %rdx, %r12
+        mov    24($inp), %rdx
+       adcx    %rbp, %r13
+
+       mov     %r11, 32(%rsp)
+       .byte   0x4c,0x89,0xa4,0x24,0x28,0x00,0x00,0x00         # mov   %r12, 40(%rsp)
+       
+#fourth iteration      
+       .byte   0xc4,0xe2,0xfb,0xf6,0x9e,0x20,0x00,0x00,0x00    # mulx  32($inp), %rax, %rbx
+       adox    %rax, %r14
+       adcx    %rbx, %r15
+
+       mulx    40($inp), $out, %r10
+       adox    $out, %r15
+       adcx    %r10, %r8
+
+       mulx    48($inp), %rax, %rbx
+       adox    %rax, %r8
+       adcx    %rbx, %r9
+
+       mulx    56($inp), $out, %r10
+       adox    $out, %r9
+       adcx    %rbp, %r10
+       adox    %rbp, %r10
+
+       .byte   0x66
+       mov     %r15, %rbx
+       shld    \$1, %r14, %r15
+       shld    \$1, %rcx, %r14
+
+       xor     %ebp, %ebp
+       mulx    %rdx, %rax, %rdx
+       adcx    %rax, %r13
+       adcx    %rdx, %r14
+        mov    32($inp), %rdx
+       adcx    %rbp, %r15
+
+       mov     %r13, 48(%rsp)
+       mov     %r14, 56(%rsp)
+       
+#fifth iteration       
+       .byte   0xc4,0x62,0xc3,0xf6,0x9e,0x28,0x00,0x00,0x00    # mulx  40($inp), $out, %r11
+       adox    $out, %r8
+       adcx    %r11, %r9
+
+       mulx    48($inp), %rax, %rcx
+       adox    %rax, %r9
+       adcx    %rcx, %r10
+
+       mulx    56($inp), $out, %r11
+       adox    $out, %r10
+       adcx    %rbp, %r11
+       adox    %rbp, %r11
+
+       mov     %r9, %rcx
+       shld    \$1, %r8, %r9
+       shld    \$1, %rbx, %r8
+
+       xor     %ebp, %ebp
+       mulx    %rdx, %rax, %rdx
+       adcx    %rax, %r15
+       adcx    %rdx, %r8
+        mov    40($inp), %rdx
+       adcx    %rbp, %r9
+
+       mov     %r15, 64(%rsp)
+       mov     %r8, 72(%rsp)
+       
+#sixth iteration       
+       .byte   0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00    # mulx  48($inp), %rax, %rbx
+       adox    %rax, %r10
+       adcx    %rbx, %r11
+
+       .byte   0xc4,0x62,0xc3,0xf6,0xa6,0x38,0x00,0x00,0x00    # mulx  56($inp), $out, %r12
+       adox    $out, %r11
+       adcx    %rbp, %r12
+       adox    %rbp, %r12
+
+       mov     %r11, %rbx
+       shld    \$1, %r10, %r11
+       shld    \$1, %rcx, %r10
+
+       xor     %ebp, %ebp
+       mulx    %rdx, %rax, %rdx
+       adcx    %rax, %r9
+       adcx    %rdx, %r10
+        mov    48($inp), %rdx
+       adcx    %rbp, %r11
+
+       mov     %r9, 80(%rsp)
+       mov     %r10, 88(%rsp)
+
+#seventh iteration
+       .byte   0xc4,0x62,0xfb,0xf6,0xae,0x38,0x00,0x00,0x00    # mulx  56($inp), %rax, %r13
+       adox    %rax, %r12
+       adox    %rbp, %r13
+
+       xor     %r14, %r14
+       shld    \$1, %r13, %r14
+       shld    \$1, %r12, %r13
+       shld    \$1, %rbx, %r12
+
+       xor     %ebp, %ebp
+       mulx    %rdx, %rax, %rdx
+       adcx    %rax, %r11
+       adcx    %rdx, %r12
+        mov    56($inp), %rdx
+       adcx    %rbp, %r13
+
+       .byte   0x4c,0x89,0x9c,0x24,0x60,0x00,0x00,0x00         # mov   %r11, 96(%rsp)
+       .byte   0x4c,0x89,0xa4,0x24,0x68,0x00,0x00,0x00         # mov   %r12, 104(%rsp)
+
+#eighth iteration
+       mulx    %rdx, %rax, %rdx
+       adox    %rax, %r13
+       adox    %rbp, %rdx
+
+       .byte   0x66
+       add     %rdx, %r14
+
+       movq    %r13, 112(%rsp)
+       movq    %r14, 120(%rsp)
+       movq    %xmm0, $out
+       movq    %xmm1, %rbp
+
+       movq    128(%rsp), %rdx         # pull $n0
+       movq    (%rsp), %r8
+       movq    8(%rsp), %r9
+       movq    16(%rsp), %r10
+       movq    24(%rsp), %r11
+       movq    32(%rsp), %r12
+       movq    40(%rsp), %r13
+       movq    48(%rsp), %r14
+       movq    56(%rsp), %r15
+
+       call    __rsaz_512_reducex
+
+       addq    64(%rsp), %r8
+       adcq    72(%rsp), %r9
+       adcq    80(%rsp), %r10
+       adcq    88(%rsp), %r11
+       adcq    96(%rsp), %r12
+       adcq    104(%rsp), %r13
+       adcq    112(%rsp), %r14
+       adcq    120(%rsp), %r15
+       sbbq    %rcx, %rcx
+
+       call    __rsaz_512_subtract
+
+       movq    %r8, %rdx
+       movq    %r9, %rax
+       movl    128+8(%rsp), $times
+       movq    $out, $inp
+
+       decl    $times
+       jnz     .Loop_sqrx
+
+.Lsqr_tail:
+___
+}
+$code.=<<___;
+
+       leaq    128+24+48(%rsp), %rax
+       movq    -48(%rax), %r15
+       movq    -40(%rax), %r14
+       movq    -32(%rax), %r13
+       movq    -24(%rax), %r12
+       movq    -16(%rax), %rbp
+       movq    -8(%rax), %rbx
+       leaq    (%rax), %rsp
+.Lsqr_epilogue:
+       ret
+.size  rsaz_512_sqr,.-rsaz_512_sqr
+___
+}
+{
+my ($out,$ap,$bp,$mod,$n0) = ("%rdi","%rsi","%rdx","%rcx","%r8");
+$code.=<<___;
+.globl rsaz_512_mul
+.type  rsaz_512_mul,\@function,5
+.align 32
+rsaz_512_mul:
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+
+       subq    \$128+24, %rsp
+.Lmul_body:
+       movq    $out, %xmm0             # off-load arguments
+       movq    $mod, %xmm1
+       movq    $n0, 128(%rsp)
+___
+$code.=<<___ if ($addx);
+       movl    \$0x80100,%r11d
+       andl    OPENSSL_ia32cap_P+8(%rip),%r11d
+       cmpl    \$0x80100,%r11d         # check for MULX and ADO/CX
+       je      .Lmulx
+___
+$code.=<<___;
+       movq    ($bp), %rbx             # pass b[0]
+       movq    $bp, %rbp               # pass argument
+       call    __rsaz_512_mul
+
+       movq    %xmm0, $out
+       movq    %xmm1, %rbp
+
+       movq    (%rsp), %r8
+       movq    8(%rsp), %r9
+       movq    16(%rsp), %r10
+       movq    24(%rsp), %r11
+       movq    32(%rsp), %r12
+       movq    40(%rsp), %r13
+       movq    48(%rsp), %r14
+       movq    56(%rsp), %r15
+
+       call    __rsaz_512_reduce
+___
+$code.=<<___ if ($addx);
+       jmp     .Lmul_tail
+
+.align 32
+.Lmulx:
+       movq    $bp, %rbp               # pass argument
+       movq    ($bp), %rdx             # pass b[0]
+       call    __rsaz_512_mulx
+
+       movq    %xmm0, $out
+       movq    %xmm1, %rbp
+
+       movq    128(%rsp), %rdx         # pull $n0
+       movq    (%rsp), %r8
+       movq    8(%rsp), %r9
+       movq    16(%rsp), %r10
+       movq    24(%rsp), %r11
+       movq    32(%rsp), %r12
+       movq    40(%rsp), %r13
+       movq    48(%rsp), %r14
+       movq    56(%rsp), %r15
+
+       call    __rsaz_512_reducex
+.Lmul_tail:
+___
+$code.=<<___;
+       addq    64(%rsp), %r8
+       adcq    72(%rsp), %r9
+       adcq    80(%rsp), %r10
+       adcq    88(%rsp), %r11
+       adcq    96(%rsp), %r12
+       adcq    104(%rsp), %r13
+       adcq    112(%rsp), %r14
+       adcq    120(%rsp), %r15
+       sbbq    %rcx, %rcx
+
+       call    __rsaz_512_subtract
+
+       leaq    128+24+48(%rsp), %rax
+       movq    -48(%rax), %r15
+       movq    -40(%rax), %r14
+       movq    -32(%rax), %r13
+       movq    -24(%rax), %r12
+       movq    -16(%rax), %rbp
+       movq    -8(%rax), %rbx
+       leaq    (%rax), %rsp
+.Lmul_epilogue:
+       ret
+.size  rsaz_512_mul,.-rsaz_512_mul
+___
+}
+{
+my ($out,$ap,$bp,$mod,$n0,$pwr) = ("%rdi","%rsi","%rdx","%rcx","%r8","%r9d");
+$code.=<<___;
+.globl rsaz_512_mul_gather4
+.type  rsaz_512_mul_gather4,\@function,6
+.align 32
+rsaz_512_mul_gather4:
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+
+       mov     $pwr, $pwr
+       subq    \$128+24, %rsp
+.Lmul_gather4_body:
+___
+$code.=<<___ if ($addx);
+       movl    \$0x80100,%r11d
+       andl    OPENSSL_ia32cap_P+8(%rip),%r11d
+       cmpl    \$0x80100,%r11d         # check for MULX and ADO/CX
+       je      .Lmulx_gather
+___
+$code.=<<___;
+       movl    64($bp,$pwr,4), %eax
+       movq    $out, %xmm0             # off-load arguments
+       movl    ($bp,$pwr,4), %ebx
+       movq    $mod, %xmm1
+       movq    $n0, 128(%rsp)
+
+       shlq    \$32, %rax
+       or      %rax, %rbx
+       movq    ($ap), %rax
+        movq   8($ap), %rcx
+        leaq   128($bp,$pwr,4), %rbp
+       mulq    %rbx                    # 0 iteration
+       movq    %rax, (%rsp)
+       movq    %rcx, %rax
+       movq    %rdx, %r8
+
+       mulq    %rbx
+        movd   (%rbp), %xmm4
+       addq    %rax, %r8
+       movq    16($ap), %rax
+       movq    %rdx, %r9
+       adcq    \$0, %r9
+
+       mulq    %rbx
+        movd   64(%rbp), %xmm5
+       addq    %rax, %r9
+       movq    24($ap), %rax
+       movq    %rdx, %r10
+       adcq    \$0, %r10
+
+       mulq    %rbx
+        pslldq \$4, %xmm5
+       addq    %rax, %r10
+       movq    32($ap), %rax
+       movq    %rdx, %r11
+       adcq    \$0, %r11
+
+       mulq    %rbx
+        por    %xmm5, %xmm4
+       addq    %rax, %r11
+       movq    40($ap), %rax
+       movq    %rdx, %r12
+       adcq    \$0, %r12
+
+       mulq    %rbx
+       addq    %rax, %r12
+       movq    48($ap), %rax
+       movq    %rdx, %r13
+       adcq    \$0, %r13
+
+       mulq    %rbx
+        leaq   128(%rbp), %rbp
+       addq    %rax, %r13
+       movq    56($ap), %rax
+       movq    %rdx, %r14
+       adcq    \$0, %r14
+       
+       mulq    %rbx
+        movq   %xmm4, %rbx
+       addq    %rax, %r14
+        movq   ($ap), %rax
+       movq    %rdx, %r15
+       adcq    \$0, %r15
+
+       leaq    8(%rsp), %rdi
+       movl    \$7, %ecx
+       jmp     .Loop_mul_gather
+
+.align 32
+.Loop_mul_gather:
+       mulq    %rbx
+       addq    %rax, %r8
+       movq    8($ap), %rax
+       movq    %r8, (%rdi)
+       movq    %rdx, %r8
+       adcq    \$0, %r8
+
+       mulq    %rbx
+        movd   (%rbp), %xmm4
+       addq    %rax, %r9
+       movq    16($ap), %rax
+       adcq    \$0, %rdx
+       addq    %r9, %r8
+       movq    %rdx, %r9
+       adcq    \$0, %r9
+
+       mulq    %rbx
+        movd   64(%rbp), %xmm5
+       addq    %rax, %r10
+       movq    24($ap), %rax
+       adcq    \$0, %rdx
+       addq    %r10, %r9
+       movq    %rdx, %r10
+       adcq    \$0, %r10
+
+       mulq    %rbx
+        pslldq \$4, %xmm5
+       addq    %rax, %r11
+       movq    32($ap), %rax
+       adcq    \$0, %rdx
+       addq    %r11, %r10
+       movq    %rdx, %r11
+       adcq    \$0, %r11
+
+       mulq    %rbx
+        por    %xmm5, %xmm4
+       addq    %rax, %r12
+       movq    40($ap), %rax
+       adcq    \$0, %rdx
+       addq    %r12, %r11
+       movq    %rdx, %r12
+       adcq    \$0, %r12
+
+       mulq    %rbx
+       addq    %rax, %r13
+       movq    48($ap), %rax
+       adcq    \$0, %rdx
+       addq    %r13, %r12
+       movq    %rdx, %r13
+       adcq    \$0, %r13
+
+       mulq    %rbx
+       addq    %rax, %r14
+       movq    56($ap), %rax
+       adcq    \$0, %rdx
+       addq    %r14, %r13
+       movq    %rdx, %r14
+       adcq    \$0, %r14
+
+       mulq    %rbx
+        movq   %xmm4, %rbx
+       addq    %rax, %r15
+        movq   ($ap), %rax
+       adcq    \$0, %rdx
+       addq    %r15, %r14
+       movq    %rdx, %r15      
+       adcq    \$0, %r15
+
+       leaq    128(%rbp), %rbp
+       leaq    8(%rdi), %rdi
+
+       decl    %ecx
+       jnz     .Loop_mul_gather
+
+       movq    %r8, (%rdi)
+       movq    %r9, 8(%rdi)
+       movq    %r10, 16(%rdi)
+       movq    %r11, 24(%rdi)
+       movq    %r12, 32(%rdi)
+       movq    %r13, 40(%rdi)
+       movq    %r14, 48(%rdi)
+       movq    %r15, 56(%rdi)
+
+       movq    %xmm0, $out
+       movq    %xmm1, %rbp
+
+       movq    (%rsp), %r8
+       movq    8(%rsp), %r9
+       movq    16(%rsp), %r10
+       movq    24(%rsp), %r11
+       movq    32(%rsp), %r12
+       movq    40(%rsp), %r13
+       movq    48(%rsp), %r14
+       movq    56(%rsp), %r15
+
+       call    __rsaz_512_reduce
+___
+$code.=<<___ if ($addx);
+       jmp     .Lmul_gather_tail
+
+.align 32
+.Lmulx_gather:
+       mov     64($bp,$pwr,4), %eax
+       movq    $out, %xmm0             # off-load arguments
+       lea     128($bp,$pwr,4), %rbp
+       mov     ($bp,$pwr,4), %edx
+       movq    $mod, %xmm1
+       mov     $n0, 128(%rsp)
+
+       shl     \$32, %rax
+       or      %rax, %rdx
+       mulx    ($ap), %rbx, %r8        # 0 iteration
+       mov     %rbx, (%rsp)
+       xor     %edi, %edi              # cf=0, of=0
+
+       mulx    8($ap), %rax, %r9
+        movd   (%rbp), %xmm4
+
+       mulx    16($ap), %rbx, %r10
+        movd   64(%rbp), %xmm5
+       adcx    %rax, %r8
+
+       mulx    24($ap), %rax, %r11
+        pslldq \$4, %xmm5
+       adcx    %rbx, %r9
+
+       mulx    32($ap), %rbx, %r12
+        por    %xmm5, %xmm4
+       adcx    %rax, %r10
+
+       mulx    40($ap), %rax, %r13
+       adcx    %rbx, %r11
+
+       mulx    48($ap), %rbx, %r14
+        lea    128(%rbp), %rbp
+       adcx    %rax, %r12
+       
+       mulx    56($ap), %rax, %r15
+        movq   %xmm4, %rdx
+       adcx    %rbx, %r13
+       adcx    %rax, %r14
+       mov     %r8, %rbx
+       adcx    %rdi, %r15              # %rdi is 0
+
+       mov     \$-7, %rcx
+       jmp     .Loop_mulx_gather
+
+.align 32
+.Loop_mulx_gather:
+       mulx    ($ap), %rax, %r8
+       adcx    %rax, %rbx
+       adox    %r9, %r8
+
+       mulx    8($ap), %rax, %r9
+       .byte   0x66,0x0f,0x6e,0xa5,0x00,0x00,0x00,0x00         # movd  (%rbp), %xmm4
+       adcx    %rax, %r8
+       adox    %r10, %r9
+
+       mulx    16($ap), %rax, %r10
+        movd   64(%rbp), %xmm5
+        lea    128(%rbp), %rbp
+       adcx    %rax, %r9
+       adox    %r11, %r10
+
+       .byte   0xc4,0x62,0xfb,0xf6,0x9e,0x18,0x00,0x00,0x00    # mulx  24($ap), %rax, %r11
+        pslldq \$4, %xmm5
+        por    %xmm5, %xmm4
+       adcx    %rax, %r10
+       adox    %r12, %r11
+
+       mulx    32($ap), %rax, %r12
+       adcx    %rax, %r11
+       adox    %r13, %r12
+
+       mulx    40($ap), %rax, %r13
+       adcx    %rax, %r12
+       adox    %r14, %r13
+
+       .byte   0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00    # mulx  48($ap), %rax, %r14
+       adcx    %rax, %r13
+       adox    %r15, %r14
+
+       mulx    56($ap), %rax, %r15
+        movq   %xmm4, %rdx
+        mov    %rbx, 64(%rsp,%rcx,8)
+       adcx    %rax, %r14
+       adox    %rdi, %r15
+       mov     %r8, %rbx
+       adcx    %rdi, %r15              # cf=0
+
+       inc     %rcx                    # of=0
+       jnz     .Loop_mulx_gather
+
+       mov     %r8, 64(%rsp)
+       mov     %r9, 64+8(%rsp)
+       mov     %r10, 64+16(%rsp)
+       mov     %r11, 64+24(%rsp)
+       mov     %r12, 64+32(%rsp)
+       mov     %r13, 64+40(%rsp)
+       mov     %r14, 64+48(%rsp)
+       mov     %r15, 64+56(%rsp)
+
+       movq    %xmm0, $out
+       movq    %xmm1, %rbp
+
+       mov     128(%rsp), %rdx         # pull $n0
+       mov     (%rsp), %r8
+       mov     8(%rsp), %r9
+       mov     16(%rsp), %r10
+       mov     24(%rsp), %r11
+       mov     32(%rsp), %r12
+       mov     40(%rsp), %r13
+       mov     48(%rsp), %r14
+       mov     56(%rsp), %r15
+
+       call    __rsaz_512_reducex
+
+.Lmul_gather_tail:
+___
+$code.=<<___;
+       addq    64(%rsp), %r8
+       adcq    72(%rsp), %r9
+       adcq    80(%rsp), %r10
+       adcq    88(%rsp), %r11
+       adcq    96(%rsp), %r12
+       adcq    104(%rsp), %r13
+       adcq    112(%rsp), %r14
+       adcq    120(%rsp), %r15
+       sbbq    %rcx, %rcx
+
+       call    __rsaz_512_subtract
+
+       leaq    128+24+48(%rsp), %rax
+       movq    -48(%rax), %r15
+       movq    -40(%rax), %r14
+       movq    -32(%rax), %r13
+       movq    -24(%rax), %r12
+       movq    -16(%rax), %rbp
+       movq    -8(%rax), %rbx
+       leaq    (%rax), %rsp
+.Lmul_gather4_epilogue:
+       ret
+.size  rsaz_512_mul_gather4,.-rsaz_512_mul_gather4
+___
+}
+{
+my ($out,$ap,$mod,$n0,$tbl,$pwr) = ("%rdi","%rsi","%rdx","%rcx","%r8","%r9d");
+$code.=<<___;
+.globl rsaz_512_mul_scatter4
+.type  rsaz_512_mul_scatter4,\@function,6
+.align 32
+rsaz_512_mul_scatter4:
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+
+       mov     $pwr, $pwr
+       subq    \$128+24, %rsp
+.Lmul_scatter4_body:
+       leaq    ($tbl,$pwr,4), $tbl
+       movq    $out, %xmm0             # off-load arguments
+       movq    $mod, %xmm1
+       movq    $tbl, %xmm2
+       movq    $n0, 128(%rsp)
+
+       movq    $out, %rbp
+___
+$code.=<<___ if ($addx);
+       movl    \$0x80100,%r11d
+       andl    OPENSSL_ia32cap_P+8(%rip),%r11d
+       cmpl    \$0x80100,%r11d         # check for MULX and ADO/CX
+       je      .Lmulx_scatter
+___
+$code.=<<___;
+       movq    ($out),%rbx             # pass b[0]
+       call    __rsaz_512_mul
+
+       movq    %xmm0, $out
+       movq    %xmm1, %rbp
+
+       movq    (%rsp), %r8
+       movq    8(%rsp), %r9
+       movq    16(%rsp), %r10
+       movq    24(%rsp), %r11
+       movq    32(%rsp), %r12
+       movq    40(%rsp), %r13
+       movq    48(%rsp), %r14
+       movq    56(%rsp), %r15
+
+       call    __rsaz_512_reduce
+___
+$code.=<<___ if ($addx);
+       jmp     .Lmul_scatter_tail
+       
+.align 32
+.Lmulx_scatter:
+       movq    ($out), %rdx            # pass b[0]
+       call    __rsaz_512_mulx
+
+       movq    %xmm0, $out
+       movq    %xmm1, %rbp
+
+       movq    128(%rsp), %rdx         # pull $n0
+       movq    (%rsp), %r8
+       movq    8(%rsp), %r9
+       movq    16(%rsp), %r10
+       movq    24(%rsp), %r11
+       movq    32(%rsp), %r12
+       movq    40(%rsp), %r13
+       movq    48(%rsp), %r14
+       movq    56(%rsp), %r15
+
+       call    __rsaz_512_reducex
+
+.Lmul_scatter_tail:
+___
+$code.=<<___;
+       addq    64(%rsp), %r8
+       adcq    72(%rsp), %r9
+       adcq    80(%rsp), %r10
+       adcq    88(%rsp), %r11
+       adcq    96(%rsp), %r12
+       adcq    104(%rsp), %r13
+       adcq    112(%rsp), %r14
+       adcq    120(%rsp), %r15
+       movq    %xmm2, $inp
+       sbbq    %rcx, %rcx
+
+       call    __rsaz_512_subtract
+
+       movl    %r8d, 64*0($inp)        # scatter
+       shrq    \$32, %r8
+       movl    %r9d, 64*2($inp)
+       shrq    \$32, %r9
+       movl    %r10d, 64*4($inp)
+       shrq    \$32, %r10
+       movl    %r11d, 64*6($inp)
+       shrq    \$32, %r11
+       movl    %r12d, 64*8($inp)
+       shrq    \$32, %r12
+       movl    %r13d, 64*10($inp)
+       shrq    \$32, %r13
+       movl    %r14d, 64*12($inp)
+       shrq    \$32, %r14
+       movl    %r15d, 64*14($inp)
+       shrq    \$32, %r15
+       movl    %r8d, 64*1($inp)
+       movl    %r9d, 64*3($inp)
+       movl    %r10d, 64*5($inp)
+       movl    %r11d, 64*7($inp)
+       movl    %r12d, 64*9($inp)
+       movl    %r13d, 64*11($inp)
+       movl    %r14d, 64*13($inp)
+       movl    %r15d, 64*15($inp)
+
+       leaq    128+24+48(%rsp), %rax
+       movq    -48(%rax), %r15
+       movq    -40(%rax), %r14
+       movq    -32(%rax), %r13
+       movq    -24(%rax), %r12
+       movq    -16(%rax), %rbp
+       movq    -8(%rax), %rbx
+       leaq    (%rax), %rsp
+.Lmul_scatter4_epilogue:
+       ret
+.size  rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4
+___
+}
+{
+my ($out,$inp,$mod,$n0) = ("%rdi","%rsi","%rdx","%rcx");
+$code.=<<___;
+.globl rsaz_512_mul_by_one
+.type  rsaz_512_mul_by_one,\@function,4
+.align 32
+rsaz_512_mul_by_one:
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+
+       subq    \$128+24, %rsp
+.Lmul_by_one_body:
+___
+$code.=<<___ if ($addx);
+       movl    OPENSSL_ia32cap_P+8(%rip),%eax
+___
+$code.=<<___;
+       movq    $mod, %rbp      # reassign argument
+       movq    $n0, 128(%rsp)
+
+       movq    ($inp), %r8
+       pxor    %xmm0, %xmm0
+       movq    8($inp), %r9
+       movq    16($inp), %r10
+       movq    24($inp), %r11
+       movq    32($inp), %r12
+       movq    40($inp), %r13
+       movq    48($inp), %r14
+       movq    56($inp), %r15
+
+       movdqa  %xmm0, (%rsp)
+       movdqa  %xmm0, 16(%rsp)
+       movdqa  %xmm0, 32(%rsp)
+       movdqa  %xmm0, 48(%rsp)
+       movdqa  %xmm0, 64(%rsp)
+       movdqa  %xmm0, 80(%rsp)
+       movdqa  %xmm0, 96(%rsp)
+___
+$code.=<<___ if ($addx);
+       andl    \$0x80100,%eax
+       cmpl    \$0x80100,%eax          # check for MULX and ADO/CX
+       je      .Lby_one_callx
+___
+$code.=<<___;
+       call    __rsaz_512_reduce
+___
+$code.=<<___ if ($addx);
+       jmp     .Lby_one_tail
+.align 32
+.Lby_one_callx:
+       movq    128(%rsp), %rdx         # pull $n0
+       call    __rsaz_512_reducex
+.Lby_one_tail:
+___
+$code.=<<___;
+       movq    %r8, ($out)
+       movq    %r9, 8($out)
+       movq    %r10, 16($out)
+       movq    %r11, 24($out)
+       movq    %r12, 32($out)
+       movq    %r13, 40($out)
+       movq    %r14, 48($out)
+       movq    %r15, 56($out)
+
+       leaq    128+24+48(%rsp), %rax
+       movq    -48(%rax), %r15
+       movq    -40(%rax), %r14
+       movq    -32(%rax), %r13
+       movq    -24(%rax), %r12
+       movq    -16(%rax), %rbp
+       movq    -8(%rax), %rbx
+       leaq    (%rax), %rsp
+.Lmul_by_one_epilogue:
+       ret
+.size  rsaz_512_mul_by_one,.-rsaz_512_mul_by_one
+___
+}
+{      # __rsaz_512_reduce
+       #
+       # input:        %r8-%r15, %rbp - mod, 128(%rsp) - n0
+       # output:       %r8-%r15
+       # clobbers:     everything except %rbp and %rdi
+$code.=<<___;
+.type  __rsaz_512_reduce,\@abi-omnipotent
+.align 32
+__rsaz_512_reduce:
+       movq    %r8, %rbx
+       imulq   128+8(%rsp), %rbx
+       movq    0(%rbp), %rax
+       movl    \$8, %ecx
+       jmp     .Lreduction_loop
+
+.align 32
+.Lreduction_loop:
+       mulq    %rbx
+       movq    8(%rbp), %rax
+       negq    %r8
+       movq    %rdx, %r8
+       adcq    \$0, %r8
+
+       mulq    %rbx
+       addq    %rax, %r9
+       movq    16(%rbp), %rax
+       adcq    \$0, %rdx
+       addq    %r9, %r8
+       movq    %rdx, %r9
+       adcq    \$0, %r9
+
+       mulq    %rbx
+       addq    %rax, %r10
+       movq    24(%rbp), %rax
+       adcq    \$0, %rdx
+       addq    %r10, %r9
+       movq    %rdx, %r10
+       adcq    \$0, %r10
+
+       mulq    %rbx
+       addq    %rax, %r11
+       movq    32(%rbp), %rax
+       adcq    \$0, %rdx
+       addq    %r11, %r10
+        movq   128+8(%rsp), %rsi
+       #movq   %rdx, %r11
+       #adcq   \$0, %r11
+       adcq    \$0, %rdx
+       movq    %rdx, %r11
+
+       mulq    %rbx
+       addq    %rax, %r12
+       movq    40(%rbp), %rax
+       adcq    \$0, %rdx
+        imulq  %r8, %rsi
+       addq    %r12, %r11
+       movq    %rdx, %r12
+       adcq    \$0, %r12
+
+       mulq    %rbx
+       addq    %rax, %r13
+       movq    48(%rbp), %rax
+       adcq    \$0, %rdx
+       addq    %r13, %r12
+       movq    %rdx, %r13
+       adcq    \$0, %r13
+
+       mulq    %rbx
+       addq    %rax, %r14
+       movq    56(%rbp), %rax
+       adcq    \$0, %rdx
+       addq    %r14, %r13
+       movq    %rdx, %r14
+       adcq    \$0, %r14
+
+       mulq    %rbx
+        movq   %rsi, %rbx
+       addq    %rax, %r15
+        movq   0(%rbp), %rax
+       adcq    \$0, %rdx
+       addq    %r15, %r14
+       movq    %rdx, %r15
+       adcq    \$0, %r15
+
+       decl    %ecx
+       jne     .Lreduction_loop
+
+       ret
+.size  __rsaz_512_reduce,.-__rsaz_512_reduce
+___
+}
+if ($addx) {
+       # __rsaz_512_reducex
+       #
+       # input:        %r8-%r15, %rbp - mod, 128(%rsp) - n0
+       # output:       %r8-%r15
+       # clobbers:     everything except %rbp and %rdi
+$code.=<<___;
+.type  __rsaz_512_reducex,\@abi-omnipotent
+.align 32
+__rsaz_512_reducex:
+       #movq   128+8(%rsp), %rdx               # pull $n0
+       imulq   %r8, %rdx
+       xorq    %rsi, %rsi                      # cf=0,of=0
+       movl    \$8, %ecx
+       jmp     .Lreduction_loopx
+
+.align 32
+.Lreduction_loopx:
+       mov     %r8, %rbx
+       mulx    0(%rbp), %rax, %r8
+       adcx    %rbx, %rax
+       adox    %r9, %r8
+
+       mulx    8(%rbp), %rax, %r9
+       adcx    %rax, %r8
+       adox    %r10, %r9
+
+       mulx    16(%rbp), %rbx, %r10
+       adcx    %rbx, %r9
+       adox    %r11, %r10
+
+       mulx    24(%rbp), %rbx, %r11
+       adcx    %rbx, %r10
+       adox    %r12, %r11
+
+       .byte   0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00    # mulx  32(%rbp), %rbx, %r12
+        mov    %rdx, %rax
+        mov    %r8, %rdx
+       adcx    %rbx, %r11
+       adox    %r13, %r12
+
+        mulx   128+8(%rsp), %rbx, %rdx
+        mov    %rax, %rdx
+
+       mulx    40(%rbp), %rax, %r13
+       adcx    %rax, %r12
+       adox    %r14, %r13
+
+       .byte   0xc4,0x62,0xfb,0xf6,0xb5,0x30,0x00,0x00,0x00    # mulx  48(%rbp), %rax, %r14
+       adcx    %rax, %r13
+       adox    %r15, %r14
+
+       mulx    56(%rbp), %rax, %r15
+        mov    %rbx, %rdx
+       adcx    %rax, %r14
+       adox    %rsi, %r15                      # %rsi is 0
+       adcx    %rsi, %r15                      # cf=0
+
+       decl    %ecx                            # of=0
+       jne     .Lreduction_loopx
+
+       ret
+.size  __rsaz_512_reducex,.-__rsaz_512_reducex
+___
+}
+{      # __rsaz_512_subtract
+       # input: %r8-%r15, %rdi - $out, %rbp - $mod, %rcx - mask
+       # output:
+       # clobbers: everything but %rdi, %rsi and %rbp
+$code.=<<___;
+.type  __rsaz_512_subtract,\@abi-omnipotent
+.align 32
+__rsaz_512_subtract:
+       movq    %r8, ($out)
+       movq    %r9, 8($out)
+       movq    %r10, 16($out)
+       movq    %r11, 24($out)
+       movq    %r12, 32($out)
+       movq    %r13, 40($out)
+       movq    %r14, 48($out)
+       movq    %r15, 56($out)
+
+       movq    0($mod), %r8
+       movq    8($mod), %r9
+       negq    %r8
+       notq    %r9
+       andq    %rcx, %r8
+       movq    16($mod), %r10
+       andq    %rcx, %r9
+       notq    %r10
+       movq    24($mod), %r11
+       andq    %rcx, %r10
+       notq    %r11
+       movq    32($mod), %r12
+       andq    %rcx, %r11
+       notq    %r12
+       movq    40($mod), %r13
+       andq    %rcx, %r12
+       notq    %r13
+       movq    48($mod), %r14
+       andq    %rcx, %r13
+       notq    %r14
+       movq    56($mod), %r15
+       andq    %rcx, %r14
+       notq    %r15
+       andq    %rcx, %r15
+
+       addq    ($out), %r8
+       adcq    8($out), %r9
+       adcq    16($out), %r10
+       adcq    24($out), %r11
+       adcq    32($out), %r12
+       adcq    40($out), %r13
+       adcq    48($out), %r14
+       adcq    56($out), %r15
+
+       movq    %r8, ($out)
+       movq    %r9, 8($out)
+       movq    %r10, 16($out)
+       movq    %r11, 24($out)
+       movq    %r12, 32($out)
+       movq    %r13, 40($out)
+       movq    %r14, 48($out)
+       movq    %r15, 56($out)
+
+       ret
+.size  __rsaz_512_subtract,.-__rsaz_512_subtract
+___
+}
+{      # __rsaz_512_mul
+       #
+       # input: %rsi - ap, %rbp - bp
+       # ouput:
+       # clobbers: everything
+my ($ap,$bp) = ("%rsi","%rbp");
+$code.=<<___;
+.type  __rsaz_512_mul,\@abi-omnipotent
+.align 32
+__rsaz_512_mul:
+       leaq    8(%rsp), %rdi
+
+       movq    ($ap), %rax
+       mulq    %rbx
+       movq    %rax, (%rdi)
+       movq    8($ap), %rax
+       movq    %rdx, %r8
+
+       mulq    %rbx
+       addq    %rax, %r8
+       movq    16($ap), %rax
+       movq    %rdx, %r9
+       adcq    \$0, %r9
+
+       mulq    %rbx
+       addq    %rax, %r9
+       movq    24($ap), %rax
+       movq    %rdx, %r10
+       adcq    \$0, %r10
+
+       mulq    %rbx
+       addq    %rax, %r10
+       movq    32($ap), %rax
+       movq    %rdx, %r11
+       adcq    \$0, %r11
+
+       mulq    %rbx
+       addq    %rax, %r11
+       movq    40($ap), %rax
+       movq    %rdx, %r12
+       adcq    \$0, %r12
+
+       mulq    %rbx
+       addq    %rax, %r12
+       movq    48($ap), %rax
+       movq    %rdx, %r13
+       adcq    \$0, %r13
+
+       mulq    %rbx
+       addq    %rax, %r13
+       movq    56($ap), %rax
+       movq    %rdx, %r14
+       adcq    \$0, %r14
+       
+       mulq    %rbx
+       addq    %rax, %r14
+        movq   ($ap), %rax
+       movq    %rdx, %r15
+       adcq    \$0, %r15
+
+       leaq    8($bp), $bp
+       leaq    8(%rdi), %rdi
+
+       movl    \$7, %ecx
+       jmp     .Loop_mul
+
+.align 32
+.Loop_mul:
+       movq    ($bp), %rbx
+       mulq    %rbx
+       addq    %rax, %r8
+       movq    8($ap), %rax
+       movq    %r8, (%rdi)
+       movq    %rdx, %r8
+       adcq    \$0, %r8
+
+       mulq    %rbx
+       addq    %rax, %r9
+       movq    16($ap), %rax
+       adcq    \$0, %rdx
+       addq    %r9, %r8
+       movq    %rdx, %r9
+       adcq    \$0, %r9
+
+       mulq    %rbx
+       addq    %rax, %r10
+       movq    24($ap), %rax
+       adcq    \$0, %rdx
+       addq    %r10, %r9
+       movq    %rdx, %r10
+       adcq    \$0, %r10
+
+       mulq    %rbx
+       addq    %rax, %r11
+       movq    32($ap), %rax
+       adcq    \$0, %rdx
+       addq    %r11, %r10
+       movq    %rdx, %r11
+       adcq    \$0, %r11
+
+       mulq    %rbx
+       addq    %rax, %r12
+       movq    40($ap), %rax
+       adcq    \$0, %rdx
+       addq    %r12, %r11
+       movq    %rdx, %r12
+       adcq    \$0, %r12
+
+       mulq    %rbx
+       addq    %rax, %r13
+       movq    48($ap), %rax
+       adcq    \$0, %rdx
+       addq    %r13, %r12
+       movq    %rdx, %r13
+       adcq    \$0, %r13
+
+       mulq    %rbx
+       addq    %rax, %r14
+       movq    56($ap), %rax
+       adcq    \$0, %rdx
+       addq    %r14, %r13
+       movq    %rdx, %r14
+        leaq   8($bp), $bp
+       adcq    \$0, %r14
+
+       mulq    %rbx
+       addq    %rax, %r15
+        movq   ($ap), %rax
+       adcq    \$0, %rdx
+       addq    %r15, %r14
+       movq    %rdx, %r15      
+       adcq    \$0, %r15
+
+       leaq    8(%rdi), %rdi
+
+       decl    %ecx
+       jnz     .Loop_mul
+
+       movq    %r8, (%rdi)
+       movq    %r9, 8(%rdi)
+       movq    %r10, 16(%rdi)
+       movq    %r11, 24(%rdi)
+       movq    %r12, 32(%rdi)
+       movq    %r13, 40(%rdi)
+       movq    %r14, 48(%rdi)
+       movq    %r15, 56(%rdi)
+
+       ret
+.size  __rsaz_512_mul,.-__rsaz_512_mul
+___
+}
+if ($addx) {
+       # __rsaz_512_mulx
+       #
+       # input: %rsi - ap, %rbp - bp
+       # ouput:
+       # clobbers: everything
+my ($ap,$bp,$zero) = ("%rsi","%rbp","%rdi");
+$code.=<<___;
+.type  __rsaz_512_mulx,\@abi-omnipotent
+.align 32
+__rsaz_512_mulx:
+       mulx    ($ap), %rbx, %r8        # initial %rdx preloaded by caller
+       mov     \$-6, %rcx
+
+       mulx    8($ap), %rax, %r9
+       movq    %rbx, 8(%rsp)
+
+       mulx    16($ap), %rbx, %r10
+       adc     %rax, %r8
+
+       mulx    24($ap), %rax, %r11
+       adc     %rbx, %r9
+
+       mulx    32($ap), %rbx, %r12
+       adc     %rax, %r10
+
+       mulx    40($ap), %rax, %r13
+       adc     %rbx, %r11
+
+       mulx    48($ap), %rbx, %r14
+       adc     %rax, %r12
+
+       mulx    56($ap), %rax, %r15
+        mov    8($bp), %rdx
+       adc     %rbx, %r13
+       adc     %rax, %r14
+       adc     \$0, %r15
+
+       xor     $zero, $zero            # cf=0,of=0
+       jmp     .Loop_mulx
+
+.align 32
+.Loop_mulx:
+       movq    %r8, %rbx
+       mulx    ($ap), %rax, %r8
+       adcx    %rax, %rbx
+       adox    %r9, %r8
+
+       mulx    8($ap), %rax, %r9
+       adcx    %rax, %r8
+       adox    %r10, %r9
+
+       mulx    16($ap), %rax, %r10
+       adcx    %rax, %r9
+       adox    %r11, %r10
+
+       mulx    24($ap), %rax, %r11
+       adcx    %rax, %r10
+       adox    %r12, %r11
+
+       .byte   0x3e,0xc4,0x62,0xfb,0xf6,0xa6,0x20,0x00,0x00,0x00       # mulx  32($ap), %rax, %r12
+       adcx    %rax, %r11
+       adox    %r13, %r12
+
+       mulx    40($ap), %rax, %r13
+       adcx    %rax, %r12
+       adox    %r14, %r13
+
+       mulx    48($ap), %rax, %r14
+       adcx    %rax, %r13
+       adox    %r15, %r14
+
+       mulx    56($ap), %rax, %r15
+        movq   64($bp,%rcx,8), %rdx
+        movq   %rbx, 8+64-8(%rsp,%rcx,8)
+       adcx    %rax, %r14
+       adox    $zero, %r15
+       adcx    $zero, %r15             # cf=0
+
+       inc     %rcx                    # of=0
+       jnz     .Loop_mulx
+
+       movq    %r8, %rbx
+       mulx    ($ap), %rax, %r8
+       adcx    %rax, %rbx
+       adox    %r9, %r8
+
+       .byte   0xc4,0x62,0xfb,0xf6,0x8e,0x08,0x00,0x00,0x00    # mulx  8($ap), %rax, %r9
+       adcx    %rax, %r8
+       adox    %r10, %r9
+
+       .byte   0xc4,0x62,0xfb,0xf6,0x96,0x10,0x00,0x00,0x00    # mulx  16($ap), %rax, %r10
+       adcx    %rax, %r9
+       adox    %r11, %r10
+
+       mulx    24($ap), %rax, %r11
+       adcx    %rax, %r10
+       adox    %r12, %r11
+
+       mulx    32($ap), %rax, %r12
+       adcx    %rax, %r11
+       adox    %r13, %r12
+
+       mulx    40($ap), %rax, %r13
+       adcx    %rax, %r12
+       adox    %r14, %r13
+
+       .byte   0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00    # mulx  48($ap), %rax, %r14
+       adcx    %rax, %r13
+       adox    %r15, %r14
+
+       .byte   0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00    # mulx  56($ap), %rax, %r15
+       adcx    %rax, %r14
+       adox    $zero, %r15
+       adcx    $zero, %r15
+
+       mov     %rbx, 8+64-8(%rsp)
+       mov     %r8, 8+64(%rsp)
+       mov     %r9, 8+64+8(%rsp)
+       mov     %r10, 8+64+16(%rsp)
+       mov     %r11, 8+64+24(%rsp)
+       mov     %r12, 8+64+32(%rsp)
+       mov     %r13, 8+64+40(%rsp)
+       mov     %r14, 8+64+48(%rsp)
+       mov     %r15, 8+64+56(%rsp)
+
+       ret
+.size  __rsaz_512_mulx,.-__rsaz_512_mulx
+___
+}
+{
+my ($out,$inp,$power)= $win64 ? ("%rcx","%rdx","%r8d") : ("%rdi","%rsi","%edx");
+$code.=<<___;
+.globl rsaz_512_scatter4
+.type  rsaz_512_scatter4,\@abi-omnipotent
+.align 16
+rsaz_512_scatter4:
+       leaq    ($out,$power,4), $out
+       movl    \$8, %r9d
+       jmp     .Loop_scatter
+.align 16
+.Loop_scatter:
+       movq    ($inp), %rax
+       leaq    8($inp), $inp
+       movl    %eax, ($out)
+       shrq    \$32, %rax
+       movl    %eax, 64($out)
+       leaq    128($out), $out
+       decl    %r9d
+       jnz     .Loop_scatter
+       ret
+.size  rsaz_512_scatter4,.-rsaz_512_scatter4
+
+.globl rsaz_512_gather4
+.type  rsaz_512_gather4,\@abi-omnipotent
+.align 16
+rsaz_512_gather4:
+       leaq    ($inp,$power,4), $inp
+       movl    \$8, %r9d
+       jmp     .Loop_gather
+.align 16
+.Loop_gather:
+       movl    ($inp), %eax
+       movl    64($inp), %r8d
+       leaq    128($inp), $inp
+       shlq    \$32, %r8
+       or      %r8, %rax
+       movq    %rax, ($out)
+       leaq    8($out), $out
+       decl    %r9d
+       jnz     .Loop_gather
+       ret
+.size  rsaz_512_gather4,.-rsaz_512_gather4
+___
+}
+
+# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
+#              CONTEXT *context,DISPATCHER_CONTEXT *disp)
+if ($win64) {
+$rec="%rcx";
+$frame="%rdx";
+$context="%r8";
+$disp="%r9";
+
+$code.=<<___;
+.extern        __imp_RtlVirtualUnwind
+.type  se_handler,\@abi-omnipotent
+.align 16
+se_handler:
+       push    %rsi
+       push    %rdi
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+       pushfq
+       sub     \$64,%rsp
+
+       mov     120($context),%rax      # pull context->Rax
+       mov     248($context),%rbx      # pull context->Rip
+
+       mov     8($disp),%rsi           # disp->ImageBase
+       mov     56($disp),%r11          # disp->HandlerData
+
+       mov     0(%r11),%r10d           # HandlerData[0]
+       lea     (%rsi,%r10),%r10        # end of prologue label
+       cmp     %r10,%rbx               # context->Rip<end of prologue label
+       jb      .Lcommon_seh_tail
+
+       mov     152($context),%rax      # pull context->Rsp
+
+       mov     4(%r11),%r10d           # HandlerData[1]
+       lea     (%rsi,%r10),%r10        # epilogue label
+       cmp     %r10,%rbx               # context->Rip>=epilogue label
+       jae     .Lcommon_seh_tail
+
+       lea     128+24+48(%rax),%rax
+
+       mov     -8(%rax),%rbx
+       mov     -16(%rax),%rbp
+       mov     -24(%rax),%r12
+       mov     -32(%rax),%r13
+       mov     -40(%rax),%r14
+       mov     -48(%rax),%r15
+       mov     %rbx,144($context)      # restore context->Rbx
+       mov     %rbp,160($context)      # restore context->Rbp
+       mov     %r12,216($context)      # restore context->R12
+       mov     %r13,224($context)      # restore context->R13
+       mov     %r14,232($context)      # restore context->R14
+       mov     %r15,240($context)      # restore context->R15
+
+.Lcommon_seh_tail:
+       mov     8(%rax),%rdi
+       mov     16(%rax),%rsi
+       mov     %rax,152($context)      # restore context->Rsp
+       mov     %rsi,168($context)      # restore context->Rsi
+       mov     %rdi,176($context)      # restore context->Rdi
+
+       mov     40($disp),%rdi          # disp->ContextRecord
+       mov     $context,%rsi           # context
+       mov     \$154,%ecx              # sizeof(CONTEXT)
+       .long   0xa548f3fc              # cld; rep movsq
+
+       mov     $disp,%rsi
+       xor     %rcx,%rcx               # arg1, UNW_FLAG_NHANDLER
+       mov     8(%rsi),%rdx            # arg2, disp->ImageBase
+       mov     0(%rsi),%r8             # arg3, disp->ControlPc
+       mov     16(%rsi),%r9            # arg4, disp->FunctionEntry
+       mov     40(%rsi),%r10           # disp->ContextRecord
+       lea     56(%rsi),%r11           # &disp->HandlerData
+       lea     24(%rsi),%r12           # &disp->EstablisherFrame
+       mov     %r10,32(%rsp)           # arg5
+       mov     %r11,40(%rsp)           # arg6
+       mov     %r12,48(%rsp)           # arg7
+       mov     %rcx,56(%rsp)           # arg8, (NULL)
+       call    *__imp_RtlVirtualUnwind(%rip)
+
+       mov     \$1,%eax                # ExceptionContinueSearch
+       add     \$64,%rsp
+       popfq
+       pop     %r15
+       pop     %r14
+       pop     %r13
+       pop     %r12
+       pop     %rbp
+       pop     %rbx
+       pop     %rdi
+       pop     %rsi
+       ret
+.size  sqr_handler,.-sqr_handler
+
+.section       .pdata
+.align 4
+       .rva    .LSEH_begin_rsaz_512_sqr
+       .rva    .LSEH_end_rsaz_512_sqr
+       .rva    .LSEH_info_rsaz_512_sqr
+
+       .rva    .LSEH_begin_rsaz_512_mul
+       .rva    .LSEH_end_rsaz_512_mul
+       .rva    .LSEH_info_rsaz_512_mul
+
+       .rva    .LSEH_begin_rsaz_512_mul_gather4
+       .rva    .LSEH_end_rsaz_512_mul_gather4
+       .rva    .LSEH_info_rsaz_512_mul_gather4
+
+       .rva    .LSEH_begin_rsaz_512_mul_scatter4
+       .rva    .LSEH_end_rsaz_512_mul_scatter4
+       .rva    .LSEH_info_rsaz_512_mul_scatter4
+
+       .rva    .LSEH_begin_rsaz_512_mul_by_one
+       .rva    .LSEH_end_rsaz_512_mul_by_one
+       .rva    .LSEH_info_rsaz_512_mul_by_one
+
+.section       .xdata
+.align 8
+.LSEH_info_rsaz_512_sqr:
+       .byte   9,0,0,0
+       .rva    se_handler
+       .rva    .Lsqr_body,.Lsqr_epilogue                       # HandlerData[]
+.LSEH_info_rsaz_512_mul:
+       .byte   9,0,0,0
+       .rva    se_handler
+       .rva    .Lmul_body,.Lmul_epilogue                       # HandlerData[]
+.LSEH_info_rsaz_512_mul_gather4:
+       .byte   9,0,0,0
+       .rva    se_handler
+       .rva    .Lmul_gather4_body,.Lmul_gather4_epilogue       # HandlerData[]
+.LSEH_info_rsaz_512_mul_scatter4:
+       .byte   9,0,0,0
+       .rva    se_handler
+       .rva    .Lmul_scatter4_body,.Lmul_scatter4_epilogue     # HandlerData[]
+.LSEH_info_rsaz_512_mul_by_one:
+       .byte   9,0,0,0
+       .rva    se_handler
+       .rva    .Lmul_by_one_body,.Lmul_by_one_epilogue         # HandlerData[]
+___
+}
+
+$code =~ s/\`([^\`]*)\`/eval $1/gem;
+print $code;
+close STDOUT;
diff --git a/crypto/bn/asm/sparct4-mont.pl b/crypto/bn/asm/sparct4-mont.pl
new file mode 100755 (executable)
index 0000000..71b4500
--- /dev/null
@@ -0,0 +1,1222 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by David S. Miller <davem@devemloft.net> and Andy Polyakov
+# <appro@openssl.org>. The module is licensed under 2-clause BSD
+# license. November 2012. All rights reserved.
+# ====================================================================
+
+######################################################################
+# Montgomery squaring-n-multiplication module for SPARC T4.
+#
+# The module consists of three parts:
+#
+# 1) collection of "single-op" subroutines that perform single
+#    operation, Montgomery squaring or multiplication, on 512-,
+#    1024-, 1536- and 2048-bit operands;
+# 2) collection of "multi-op" subroutines that perform 5 squaring and
+#    1 multiplication operations on operands of above lengths;
+# 3) fall-back and helper VIS3 subroutines.
+#
+# RSA sign is dominated by multi-op subroutine, while RSA verify and
+# DSA - by single-op. Special note about 4096-bit RSA verify result.
+# Operands are too long for dedicated hardware and it's handled by
+# VIS3 code, which is why you don't see any improvement. It's surely
+# possible to improve it [by deploying 'mpmul' instruction], maybe in
+# the future...
+#
+# Performance improvement.
+#
+# 64-bit process, VIS3:
+#                   sign    verify    sign/s verify/s
+# rsa 1024 bits 0.000628s 0.000028s   1592.4  35434.4
+# rsa 2048 bits 0.003282s 0.000106s    304.7   9438.3
+# rsa 4096 bits 0.025866s 0.000340s     38.7   2940.9
+# dsa 1024 bits 0.000301s 0.000332s   3323.7   3013.9
+# dsa 2048 bits 0.001056s 0.001233s    946.9    810.8
+#
+# 64-bit process, this module:
+#                   sign    verify    sign/s verify/s
+# rsa 1024 bits 0.000256s 0.000016s   3904.4  61411.9
+# rsa 2048 bits 0.000946s 0.000029s   1056.8  34292.7
+# rsa 4096 bits 0.005061s 0.000340s    197.6   2940.5
+# dsa 1024 bits 0.000176s 0.000195s   5674.7   5130.5
+# dsa 2048 bits 0.000296s 0.000354s   3383.2   2827.6
+#
+######################################################################
+# 32-bit process, VIS3:
+#                   sign    verify    sign/s verify/s
+# rsa 1024 bits 0.000665s 0.000028s   1504.8  35233.3
+# rsa 2048 bits 0.003349s 0.000106s    298.6   9433.4
+# rsa 4096 bits 0.025959s 0.000341s     38.5   2934.8
+# dsa 1024 bits 0.000320s 0.000341s   3123.3   2929.6
+# dsa 2048 bits 0.001101s 0.001260s    908.2    793.4
+#
+# 32-bit process, this module:
+#                   sign    verify    sign/s verify/s
+# rsa 1024 bits 0.000301s 0.000017s   3317.1  60240.0
+# rsa 2048 bits 0.001034s 0.000030s    966.9  33812.7
+# rsa 4096 bits 0.005244s 0.000341s    190.7   2935.4
+# dsa 1024 bits 0.000201s 0.000205s   4976.1   4879.2
+# dsa 2048 bits 0.000328s 0.000360s   3051.1   2774.2
+#
+# 32-bit code is prone to performance degradation as interrupt rate
+# dispatched to CPU executing the code grows. This is because in
+# standard process of handling interrupt in 32-bit process context
+# upper halves of most integer registers used as input or output are
+# zeroed. This renders result invalid, and operation has to be re-run.
+# If CPU is "bothered" with timer interrupts only, the penalty is
+# hardly measurable. But in order to mitigate this problem for higher
+# interrupt rates contemporary Linux kernel recognizes biased stack
+# even in 32-bit process context and preserves full register contents.
+# See http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=517ffce4e1a03aea979fe3a18a3dd1761a24fafb
+# for details.
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+push(@INC,"${dir}","${dir}../../perlasm");
+require "sparcv9_modes.pl";
+
+$code.=<<___;
+#include "sparc_arch.h"
+
+#ifdef __arch64__
+.register      %g2,#scratch
+.register      %g3,#scratch
+#endif
+
+.section       ".text",#alloc,#execinstr
+
+#ifdef __PIC__
+SPARC_PIC_THUNK(%g1)
+#endif
+___
+
+########################################################################
+# Register layout for mont[mul|sqr] instructions.
+# For details see "Oracle SPARC Architecture 2011" manual at
+# http://www.oracle.com/technetwork/server-storage/sun-sparc-enterprise/documentation/.
+#
+my @R=map("%f".2*$_,(0..11,30,31,12..29));
+my @N=(map("%l$_",(0..7)),map("%o$_",(0..5))); @N=(@N,@N,@N[0..3]);
+my @A=(@N[0..13],@R[14..31]);
+my @B=(map("%i$_",(0..5)),map("%l$_",(0..7))); @B=(@B,@B,map("%o$_",(0..3)));
+\f
+########################################################################
+# int bn_mul_mont_t4_$NUM(u64 *rp,const u64 *ap,const u64 *bp,
+#                        const u64 *np,const BN_ULONG *n0);
+#
+sub generate_bn_mul_mont_t4() {
+my $NUM=shift;
+my ($rp,$ap,$bp,$np,$sentinel)=map("%g$_",(1..5));
+
+$code.=<<___;
+.globl bn_mul_mont_t4_$NUM
+.align 32
+bn_mul_mont_t4_$NUM:
+#ifdef __arch64__
+       mov     0,$sentinel
+       mov     -128,%g4
+#elif defined(SPARCV9_64BIT_STACK)
+       SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
+       ld      [%g1+0],%g1     ! OPENSSL_sparcv9_P[0]
+       mov     -2047,%g4
+       and     %g1,SPARCV9_64BIT_STACK,%g1
+       movrz   %g1,0,%g4
+       mov     -1,$sentinel
+       add     %g4,-128,%g4
+#else
+       mov     -1,$sentinel
+       mov     -128,%g4
+#endif
+       sllx    $sentinel,32,$sentinel
+       save    %sp,%g4,%sp
+#ifndef        __arch64__
+       save    %sp,-128,%sp    ! warm it up
+       save    %sp,-128,%sp
+       save    %sp,-128,%sp
+       save    %sp,-128,%sp
+       save    %sp,-128,%sp
+       save    %sp,-128,%sp
+       restore
+       restore
+       restore
+       restore
+       restore
+       restore
+#endif
+       and     %sp,1,%g4
+       or      $sentinel,%fp,%fp
+       or      %g4,$sentinel,$sentinel
+
+       ! copy arguments to global registers
+       mov     %i0,$rp
+       mov     %i1,$ap
+       mov     %i2,$bp
+       mov     %i3,$np
+       ld      [%i4+0],%f1     ! load *n0
+       ld      [%i4+4],%f0
+       fsrc2   %f0,%f60
+___
+\f
+# load ap[$NUM] ########################################################
+$code.=<<___;
+       save    %sp,-128,%sp;           or      $sentinel,%fp,%fp
+___
+for($i=0; $i<14 && $i<$NUM; $i++) {
+my $lo=$i<13?@A[$i+1]:"%o7";
+$code.=<<___;
+       ld      [$ap+$i*8+0],$lo
+       ld      [$ap+$i*8+4],@A[$i]
+       sllx    @A[$i],32,@A[$i]
+       or      $lo,@A[$i],@A[$i]
+___
+}
+for(; $i<$NUM; $i++) {
+my ($hi,$lo)=("%f".2*($i%4),"%f".(2*($i%4)+1));
+$code.=<<___;
+       ld      [$ap+$i*8+0],$lo
+       ld      [$ap+$i*8+4],$hi
+       fsrc2   $hi,@A[$i]
+___
+}
+# load np[$NUM] ########################################################
+$code.=<<___;
+       save    %sp,-128,%sp;           or      $sentinel,%fp,%fp
+___
+for($i=0; $i<14 && $i<$NUM; $i++) {
+my $lo=$i<13?@N[$i+1]:"%o7";
+$code.=<<___;
+       ld      [$np+$i*8+0],$lo
+       ld      [$np+$i*8+4],@N[$i]
+       sllx    @N[$i],32,@N[$i]
+       or      $lo,@N[$i],@N[$i]
+___
+}
+$code.=<<___;
+       save    %sp,-128,%sp;           or      $sentinel,%fp,%fp
+___
+for(; $i<28 && $i<$NUM; $i++) {
+my $lo=$i<27?@N[$i+1]:"%o7";
+$code.=<<___;
+       ld      [$np+$i*8+0],$lo
+       ld      [$np+$i*8+4],@N[$i]
+       sllx    @N[$i],32,@N[$i]
+       or      $lo,@N[$i],@N[$i]
+___
+}
+$code.=<<___;
+       save    %sp,-128,%sp;           or      $sentinel,%fp,%fp
+___
+for(; $i<$NUM; $i++) {
+my $lo=($i<$NUM-1)?@N[$i+1]:"%o7";
+$code.=<<___;
+       ld      [$np+$i*8+0],$lo
+       ld      [$np+$i*8+4],@N[$i]
+       sllx    @N[$i],32,@N[$i]
+       or      $lo,@N[$i],@N[$i]
+___
+}
+$code.=<<___;
+       cmp     $ap,$bp
+       be      SIZE_T_CC,.Lmsquare_$NUM
+       nop
+___
+\f
+# load bp[$NUM] ########################################################
+$code.=<<___;
+       save    %sp,-128,%sp;           or      $sentinel,%fp,%fp
+___
+for($i=0; $i<14 && $i<$NUM; $i++) {
+my $lo=$i<13?@B[$i+1]:"%o7";
+$code.=<<___;
+       ld      [$bp+$i*8+0],$lo
+       ld      [$bp+$i*8+4],@B[$i]
+       sllx    @B[$i],32,@B[$i]
+       or      $lo,@B[$i],@B[$i]
+___
+}
+$code.=<<___;
+       save    %sp,-128,%sp;           or      $sentinel,%fp,%fp
+___
+for(; $i<$NUM; $i++) {
+my $lo=($i<$NUM-1)?@B[$i+1]:"%o7";
+$code.=<<___;
+       ld      [$bp+$i*8+0],$lo
+       ld      [$bp+$i*8+4],@B[$i]
+       sllx    @B[$i],32,@B[$i]
+       or      $lo,@B[$i],@B[$i]
+___
+}
+# magic ################################################################
+$code.=<<___;
+       .word   0x81b02920+$NUM-1       ! montmul       $NUM-1
+.Lmresume_$NUM:
+       fbu,pn  %fcc3,.Lmabort_$NUM
+#ifndef        __arch64__
+       and     %fp,$sentinel,$sentinel
+       brz,pn  $sentinel,.Lmabort_$NUM
+#endif
+       nop
+#ifdef __arch64__
+       restore
+       restore
+       restore
+       restore
+       restore
+#else
+       restore;                and     %fp,$sentinel,$sentinel
+       restore;                and     %fp,$sentinel,$sentinel
+       restore;                and     %fp,$sentinel,$sentinel
+       restore;                and     %fp,$sentinel,$sentinel
+        brz,pn $sentinel,.Lmabort1_$NUM
+       restore
+#endif
+___
+\f
+# save tp[$NUM] ########################################################
+for($i=0; $i<14 && $i<$NUM; $i++) {
+$code.=<<___;
+       movxtod @A[$i],@R[$i]
+___
+}
+$code.=<<___;
+#ifdef __arch64__
+       restore
+#else
+        and    %fp,$sentinel,$sentinel
+       restore
+        and    $sentinel,1,%o7
+        and    %fp,$sentinel,$sentinel
+        srl    %fp,0,%fp               ! just in case?
+        or     %o7,$sentinel,$sentinel
+       brz,a,pn $sentinel,.Lmdone_$NUM
+       mov     0,%i0           ! return failure
+#endif
+___
+for($i=0; $i<12 && $i<$NUM; $i++) {
+@R[$i] =~ /%f([0-9]+)/;
+my $lo = "%f".($1+1);
+$code.=<<___;
+       st      $lo,[$rp+$i*8+0]
+       st      @R[$i],[$rp+$i*8+4]
+___
+}
+for(; $i<$NUM; $i++) {
+my ($hi,$lo)=("%f".2*($i%4),"%f".(2*($i%4)+1));
+$code.=<<___;
+       fsrc2   @R[$i],$hi
+       st      $lo,[$rp+$i*8+0]
+       st      $hi,[$rp+$i*8+4]
+___
+}
+$code.=<<___;
+       mov     1,%i0           ! return success
+.Lmdone_$NUM:
+       ret
+       restore
+
+.Lmabort_$NUM:
+       restore
+       restore
+       restore
+       restore
+       restore
+.Lmabort1_$NUM:
+       restore
+
+       mov     0,%i0           ! return failure
+       ret
+       restore
+
+.align 32
+.Lmsquare_$NUM:
+       save    %sp,-128,%sp;           or      $sentinel,%fp,%fp
+       save    %sp,-128,%sp;           or      $sentinel,%fp,%fp
+       .word   0x81b02940+$NUM-1       ! montsqr       $NUM-1
+       ba      .Lmresume_$NUM
+       nop
+.type  bn_mul_mont_t4_$NUM, #function
+.size  bn_mul_mont_t4_$NUM, .-bn_mul_mont_t4_$NUM
+___
+}
+
+for ($i=8;$i<=32;$i+=8) {
+       &generate_bn_mul_mont_t4($i);
+}
+\f
+########################################################################
+#
+sub load_ccr {
+my ($ptbl,$pwr,$ccr,$skip_wr)=@_;
+$code.=<<___;
+       srl     $pwr,   2,      %o4
+       and     $pwr,   3,      %o5
+       and     %o4,    7,      %o4
+       sll     %o5,    3,      %o5     ! offset within first cache line
+       add     %o5,    $ptbl,  $ptbl   ! of the pwrtbl
+       or      %g0,    1,      %o5
+       sll     %o5,    %o4,    $ccr
+___
+$code.=<<___   if (!$skip_wr);
+       wr      $ccr,   %g0,    %ccr
+___
+}
+sub load_b_pair {
+my ($pwrtbl,$B0,$B1)=@_;
+
+$code.=<<___;
+       ldx     [$pwrtbl+0*32], $B0
+       ldx     [$pwrtbl+8*32], $B1
+       ldx     [$pwrtbl+1*32], %o4
+       ldx     [$pwrtbl+9*32], %o5
+       movvs   %icc,   %o4,    $B0
+       ldx     [$pwrtbl+2*32], %o4
+       movvs   %icc,   %o5,    $B1
+       ldx     [$pwrtbl+10*32],%o5
+       move    %icc,   %o4,    $B0
+       ldx     [$pwrtbl+3*32], %o4
+       move    %icc,   %o5,    $B1
+       ldx     [$pwrtbl+11*32],%o5
+       movneg  %icc,   %o4,    $B0
+       ldx     [$pwrtbl+4*32], %o4
+       movneg  %icc,   %o5,    $B1
+       ldx     [$pwrtbl+12*32],%o5
+       movcs   %xcc,   %o4,    $B0
+       ldx     [$pwrtbl+5*32],%o4
+       movcs   %xcc,   %o5,    $B1
+       ldx     [$pwrtbl+13*32],%o5
+       movvs   %xcc,   %o4,    $B0
+       ldx     [$pwrtbl+6*32], %o4
+       movvs   %xcc,   %o5,    $B1
+       ldx     [$pwrtbl+14*32],%o5
+       move    %xcc,   %o4,    $B0
+       ldx     [$pwrtbl+7*32], %o4
+       move    %xcc,   %o5,    $B1
+       ldx     [$pwrtbl+15*32],%o5
+       movneg  %xcc,   %o4,    $B0
+       add     $pwrtbl,16*32,  $pwrtbl
+       movneg  %xcc,   %o5,    $B1
+___
+}
+sub load_b {
+my ($pwrtbl,$Bi)=@_;
+
+$code.=<<___;
+       ldx     [$pwrtbl+0*32], $Bi
+       ldx     [$pwrtbl+1*32], %o4
+       ldx     [$pwrtbl+2*32], %o5
+       movvs   %icc,   %o4,    $Bi
+       ldx     [$pwrtbl+3*32], %o4
+       move    %icc,   %o5,    $Bi
+       ldx     [$pwrtbl+4*32], %o5
+       movneg  %icc,   %o4,    $Bi
+       ldx     [$pwrtbl+5*32], %o4
+       movcs   %xcc,   %o5,    $Bi
+       ldx     [$pwrtbl+6*32], %o5
+       movvs   %xcc,   %o4,    $Bi
+       ldx     [$pwrtbl+7*32], %o4
+       move    %xcc,   %o5,    $Bi
+       add     $pwrtbl,8*32,   $pwrtbl
+       movneg  %xcc,   %o4,    $Bi
+___
+}
+\f
+########################################################################
+# int bn_pwr5_mont_t4_$NUM(u64 *tp,const u64 *np,const BN_ULONG *n0,
+#                         const u64 *pwrtbl,int pwr,int stride);
+#
+sub generate_bn_pwr5_mont_t4() {
+my $NUM=shift;
+my ($tp,$np,$pwrtbl,$pwr,$sentinel)=map("%g$_",(1..5));
+
+$code.=<<___;
+.globl bn_pwr5_mont_t4_$NUM
+.align 32
+bn_pwr5_mont_t4_$NUM:
+#ifdef __arch64__
+       mov     0,$sentinel
+       mov     -128,%g4
+#elif defined(SPARCV9_64BIT_STACK)
+       SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
+       ld      [%g1+0],%g1     ! OPENSSL_sparcv9_P[0]
+       mov     -2047,%g4
+       and     %g1,SPARCV9_64BIT_STACK,%g1
+       movrz   %g1,0,%g4
+       mov     -1,$sentinel
+       add     %g4,-128,%g4
+#else
+       mov     -1,$sentinel
+       mov     -128,%g4
+#endif
+       sllx    $sentinel,32,$sentinel
+       save    %sp,%g4,%sp
+#ifndef        __arch64__
+       save    %sp,-128,%sp    ! warm it up
+       save    %sp,-128,%sp
+       save    %sp,-128,%sp
+       save    %sp,-128,%sp
+       save    %sp,-128,%sp
+       save    %sp,-128,%sp
+       restore
+       restore
+       restore
+       restore
+       restore
+       restore
+#endif
+       and     %sp,1,%g4
+       or      $sentinel,%fp,%fp
+       or      %g4,$sentinel,$sentinel
+
+       ! copy arguments to global registers
+       mov     %i0,$tp
+       mov     %i1,$np
+       ld      [%i2+0],%f1     ! load *n0
+       ld      [%i2+4],%f0
+       mov     %i3,$pwrtbl
+       srl     %i4,%g0,%i4     ! pack last arguments
+       sllx    %i5,32,$pwr
+       or      %i4,$pwr,$pwr
+       fsrc2   %f0,%f60
+___
+\f
+# load tp[$NUM] ########################################################
+$code.=<<___;
+       save    %sp,-128,%sp;           or      $sentinel,%fp,%fp
+___
+for($i=0; $i<14 && $i<$NUM; $i++) {
+$code.=<<___;
+       ldx     [$tp+$i*8],@A[$i]
+___
+}
+for(; $i<$NUM; $i++) {
+$code.=<<___;
+       ldd     [$tp+$i*8],@A[$i]
+___
+}
+# load np[$NUM] ########################################################
+$code.=<<___;
+       save    %sp,-128,%sp;           or      $sentinel,%fp,%fp
+___
+for($i=0; $i<14 && $i<$NUM; $i++) {
+$code.=<<___;
+       ldx     [$np+$i*8],@N[$i]
+___
+}
+$code.=<<___;
+       save    %sp,-128,%sp;           or      $sentinel,%fp,%fp
+___
+for(; $i<28 && $i<$NUM; $i++) {
+$code.=<<___;
+       ldx     [$np+$i*8],@N[$i]
+___
+}
+$code.=<<___;
+       save    %sp,-128,%sp;           or      $sentinel,%fp,%fp
+___
+for(; $i<$NUM; $i++) {
+$code.=<<___;
+       ldx     [$np+$i*8],@N[$i]
+___
+}
+# load pwrtbl[pwr] ########################################################
+$code.=<<___;
+       save    %sp,-128,%sp;           or      $sentinel,%fp,%fp
+
+       srlx    $pwr,   32,     %o4             ! unpack $pwr
+       srl     $pwr,   %g0,    %o5
+       sub     %o4,    5,      %o4
+       mov     $pwrtbl,        %o7
+       sllx    %o4,    32,     $pwr            ! re-pack $pwr
+       or      %o5,    $pwr,   $pwr
+       srl     %o5,    %o4,    %o5
+___
+       &load_ccr("%o7","%o5","%o4");
+$code.=<<___;
+       b       .Lstride_$NUM
+       nop
+.align 16
+.Lstride_$NUM:
+___
+for($i=0; $i<14 && $i<$NUM; $i+=2) {
+       &load_b_pair("%o7",@B[$i],@B[$i+1]);
+}
+$code.=<<___;
+       save    %sp,-128,%sp;           or      $sentinel,%fp,%fp
+___
+for(; $i<$NUM; $i+=2) {
+       &load_b_pair("%i7",@B[$i],@B[$i+1]);
+}
+$code.=<<___;
+       srax    $pwr,   32,     %o4             ! unpack $pwr
+       srl     $pwr,   %g0,    %o5
+       sub     %o4,    5,      %o4
+       mov     $pwrtbl,        %i7
+       sllx    %o4,    32,     $pwr            ! re-pack $pwr
+       or      %o5,    $pwr,   $pwr
+       srl     %o5,    %o4,    %o5
+___
+       &load_ccr("%i7","%o5","%o4",1);
+\f
+# magic ################################################################
+for($i=0; $i<5; $i++) {
+$code.=<<___;
+       .word   0x81b02940+$NUM-1       ! montsqr       $NUM-1
+       fbu,pn  %fcc3,.Labort_$NUM
+#ifndef        __arch64__
+       and     %fp,$sentinel,$sentinel
+       brz,pn  $sentinel,.Labort_$NUM
+#endif
+       nop
+___
+}
+$code.=<<___;
+       wr      %o4,    %g0,    %ccr
+       .word   0x81b02920+$NUM-1       ! montmul       $NUM-1
+       fbu,pn  %fcc3,.Labort_$NUM
+#ifndef        __arch64__
+       and     %fp,$sentinel,$sentinel
+       brz,pn  $sentinel,.Labort_$NUM
+#endif
+
+       srax    $pwr,   32,     %o4
+#ifdef __arch64__
+       brgez   %o4,.Lstride_$NUM
+       restore
+       restore
+       restore
+       restore
+       restore
+#else
+       brgez   %o4,.Lstride_$NUM
+       restore;                and     %fp,$sentinel,$sentinel
+       restore;                and     %fp,$sentinel,$sentinel
+       restore;                and     %fp,$sentinel,$sentinel
+       restore;                and     %fp,$sentinel,$sentinel
+        brz,pn $sentinel,.Labort1_$NUM
+       restore
+#endif
+___
+\f
+# save tp[$NUM] ########################################################
+for($i=0; $i<14 && $i<$NUM; $i++) {
+$code.=<<___;
+       movxtod @A[$i],@R[$i]
+___
+}
+$code.=<<___;
+#ifdef __arch64__
+       restore
+#else
+        and    %fp,$sentinel,$sentinel
+       restore
+        and    $sentinel,1,%o7
+        and    %fp,$sentinel,$sentinel
+        srl    %fp,0,%fp               ! just in case?
+        or     %o7,$sentinel,$sentinel
+       brz,a,pn $sentinel,.Ldone_$NUM
+       mov     0,%i0           ! return failure
+#endif
+___
+for($i=0; $i<$NUM; $i++) {
+$code.=<<___;
+       std     @R[$i],[$tp+$i*8]
+___
+}
+$code.=<<___;
+       mov     1,%i0           ! return success
+.Ldone_$NUM:
+       ret
+       restore
+
+.Labort_$NUM:
+       restore
+       restore
+       restore
+       restore
+       restore
+.Labort1_$NUM:
+       restore
+
+       mov     0,%i0           ! return failure
+       ret
+       restore
+.type  bn_pwr5_mont_t4_$NUM, #function
+.size  bn_pwr5_mont_t4_$NUM, .-bn_pwr5_mont_t4_$NUM
+___
+}
+
+for ($i=8;$i<=32;$i+=8) {
+       &generate_bn_pwr5_mont_t4($i);
+}
+\f
+{
+########################################################################
+# Fall-back subroutines
+#
+# copy of bn_mul_mont_vis3 adjusted for vectors of 64-bit values
+#
+($n0,$m0,$m1,$lo0,$hi0, $lo1,$hi1,$aj,$alo,$nj,$nlo,$tj)=
+       (map("%g$_",(1..5)),map("%o$_",(0..5,7)));
+
+# int bn_mul_mont(
+$rp="%o0";     # u64 *rp,
+$ap="%o1";     # const u64 *ap,
+$bp="%o2";     # const u64 *bp,
+$np="%o3";     # const u64 *np,
+$n0p="%o4";    # const BN_ULONG *n0,
+$num="%o5";    # int num);     # caller ensures that num is >=3
+$code.=<<___;
+.globl bn_mul_mont_t4
+.align 32
+bn_mul_mont_t4:
+       add     %sp,    STACK_BIAS,     %g4     ! real top of stack
+       sll     $num,   3,      $num            ! size in bytes
+       add     $num,   63,     %g1
+       andn    %g1,    63,     %g1             ! buffer size rounded up to 64 bytes
+       sub     %g4,    %g1,    %g1
+       andn    %g1,    63,     %g1             ! align at 64 byte
+       sub     %g1,    STACK_FRAME,    %g1     ! new top of stack
+       sub     %g1,    %g4,    %g1
+
+       save    %sp,    %g1,    %sp
+___
+#      +-------------------------------+<----- %sp
+#      .                               .
+#      +-------------------------------+<----- aligned at 64 bytes
+#      | __int64 tmp[0]                |
+#      +-------------------------------+
+#      .                               .
+#      .                               .
+#      +-------------------------------+<----- aligned at 64 bytes
+#      .                               .
+($rp,$ap,$bp,$np,$n0p,$num)=map("%i$_",(0..5));
+($t0,$t1,$t2,$t3,$cnt,$tp,$bufsz)=map("%l$_",(0..7));
+($ovf,$i)=($t0,$t1);
+$code.=<<___;
+       ld      [$n0p+0],       $t0     ! pull n0[0..1] value
+       ld      [$n0p+4],       $t1
+       add     %sp, STACK_BIAS+STACK_FRAME, $tp
+       ldx     [$bp+0],        $m0     ! m0=bp[0]
+       sllx    $t1,    32,     $n0
+       add     $bp,    8,      $bp
+       or      $t0,    $n0,    $n0
+\f
+       ldx     [$ap+0],        $aj     ! ap[0]
+
+       mulx    $aj,    $m0,    $lo0    ! ap[0]*bp[0]
+       umulxhi $aj,    $m0,    $hi0
+
+       ldx     [$ap+8],        $aj     ! ap[1]
+       add     $ap,    16,     $ap
+       ldx     [$np+0],        $nj     ! np[0]
+
+       mulx    $lo0,   $n0,    $m1     ! "tp[0]"*n0
+
+       mulx    $aj,    $m0,    $alo    ! ap[1]*bp[0]
+       umulxhi $aj,    $m0,    $aj     ! ahi=aj
+
+       mulx    $nj,    $m1,    $lo1    ! np[0]*m1
+       umulxhi $nj,    $m1,    $hi1
+
+       ldx     [$np+8],        $nj     ! np[1]
+
+       addcc   $lo0,   $lo1,   $lo1
+       add     $np,    16,     $np
+       addxc   %g0,    $hi1,   $hi1
+
+       mulx    $nj,    $m1,    $nlo    ! np[1]*m1
+       umulxhi $nj,    $m1,    $nj     ! nhi=nj
+\f
+       ba      .L1st
+       sub     $num,   24,     $cnt    ! cnt=num-3
+
+.align 16
+.L1st:
+       addcc   $alo,   $hi0,   $lo0
+       addxc   $aj,    %g0,    $hi0
+
+       ldx     [$ap+0],        $aj     ! ap[j]
+       addcc   $nlo,   $hi1,   $lo1
+       add     $ap,    8,      $ap
+       addxc   $nj,    %g0,    $hi1    ! nhi=nj
+
+       ldx     [$np+0],        $nj     ! np[j]
+       mulx    $aj,    $m0,    $alo    ! ap[j]*bp[0]
+       add     $np,    8,      $np
+       umulxhi $aj,    $m0,    $aj     ! ahi=aj
+
+       mulx    $nj,    $m1,    $nlo    ! np[j]*m1
+       addcc   $lo0,   $lo1,   $lo1    ! np[j]*m1+ap[j]*bp[0]
+       umulxhi $nj,    $m1,    $nj     ! nhi=nj
+       addxc   %g0,    $hi1,   $hi1
+       stxa    $lo1,   [$tp]0xe2       ! tp[j-1]
+       add     $tp,    8,      $tp     ! tp++
+
+       brnz,pt $cnt,   .L1st
+       sub     $cnt,   8,      $cnt    ! j--
+!.L1st
+       addcc   $alo,   $hi0,   $lo0
+       addxc   $aj,    %g0,    $hi0    ! ahi=aj
+
+       addcc   $nlo,   $hi1,   $lo1
+       addxc   $nj,    %g0,    $hi1
+       addcc   $lo0,   $lo1,   $lo1    ! np[j]*m1+ap[j]*bp[0]
+       addxc   %g0,    $hi1,   $hi1
+       stxa    $lo1,   [$tp]0xe2       ! tp[j-1]
+       add     $tp,    8,      $tp
+
+       addcc   $hi0,   $hi1,   $hi1
+       addxc   %g0,    %g0,    $ovf    ! upmost overflow bit
+       stxa    $hi1,   [$tp]0xe2
+       add     $tp,    8,      $tp
+\f
+       ba      .Louter
+       sub     $num,   16,     $i      ! i=num-2
+
+.align 16
+.Louter:
+       ldx     [$bp+0],        $m0     ! m0=bp[i]
+       add     $bp,    8,      $bp
+
+       sub     $ap,    $num,   $ap     ! rewind
+       sub     $np,    $num,   $np
+       sub     $tp,    $num,   $tp
+
+       ldx     [$ap+0],        $aj     ! ap[0]
+       ldx     [$np+0],        $nj     ! np[0]
+
+       mulx    $aj,    $m0,    $lo0    ! ap[0]*bp[i]
+       ldx     [$tp],          $tj     ! tp[0]
+       umulxhi $aj,    $m0,    $hi0
+       ldx     [$ap+8],        $aj     ! ap[1]
+       addcc   $lo0,   $tj,    $lo0    ! ap[0]*bp[i]+tp[0]
+       mulx    $aj,    $m0,    $alo    ! ap[1]*bp[i]
+       addxc   %g0,    $hi0,   $hi0
+       mulx    $lo0,   $n0,    $m1     ! tp[0]*n0
+       umulxhi $aj,    $m0,    $aj     ! ahi=aj
+       mulx    $nj,    $m1,    $lo1    ! np[0]*m1
+       add     $ap,    16,     $ap
+       umulxhi $nj,    $m1,    $hi1
+       ldx     [$np+8],        $nj     ! np[1]
+       add     $np,    16,     $np
+       addcc   $lo1,   $lo0,   $lo1
+       mulx    $nj,    $m1,    $nlo    ! np[1]*m1
+       addxc   %g0,    $hi1,   $hi1
+       umulxhi $nj,    $m1,    $nj     ! nhi=nj
+\f
+       ba      .Linner
+       sub     $num,   24,     $cnt    ! cnt=num-3
+.align 16
+.Linner:
+       addcc   $alo,   $hi0,   $lo0
+       ldx     [$tp+8],        $tj     ! tp[j]
+       addxc   $aj,    %g0,    $hi0    ! ahi=aj
+       ldx     [$ap+0],        $aj     ! ap[j]
+       add     $ap,    8,      $ap
+       addcc   $nlo,   $hi1,   $lo1
+       mulx    $aj,    $m0,    $alo    ! ap[j]*bp[i]
+       addxc   $nj,    %g0,    $hi1    ! nhi=nj
+       ldx     [$np+0],        $nj     ! np[j]
+       add     $np,    8,      $np
+       umulxhi $aj,    $m0,    $aj     ! ahi=aj
+       addcc   $lo0,   $tj,    $lo0    ! ap[j]*bp[i]+tp[j]
+       mulx    $nj,    $m1,    $nlo    ! np[j]*m1
+       addxc   %g0,    $hi0,   $hi0
+       umulxhi $nj,    $m1,    $nj     ! nhi=nj
+       addcc   $lo1,   $lo0,   $lo1    ! np[j]*m1+ap[j]*bp[i]+tp[j]
+       addxc   %g0,    $hi1,   $hi1
+       stx     $lo1,   [$tp]           ! tp[j-1]
+       add     $tp,    8,      $tp
+       brnz,pt $cnt,   .Linner
+       sub     $cnt,   8,      $cnt
+!.Linner
+       ldx     [$tp+8],        $tj     ! tp[j]
+       addcc   $alo,   $hi0,   $lo0
+       addxc   $aj,    %g0,    $hi0    ! ahi=aj
+       addcc   $lo0,   $tj,    $lo0    ! ap[j]*bp[i]+tp[j]
+       addxc   %g0,    $hi0,   $hi0
+
+       addcc   $nlo,   $hi1,   $lo1
+       addxc   $nj,    %g0,    $hi1    ! nhi=nj
+       addcc   $lo1,   $lo0,   $lo1    ! np[j]*m1+ap[j]*bp[i]+tp[j]
+       addxc   %g0,    $hi1,   $hi1
+       stx     $lo1,   [$tp]           ! tp[j-1]
+
+       subcc   %g0,    $ovf,   %g0     ! move upmost overflow to CCR.xcc
+       addxccc $hi1,   $hi0,   $hi1
+       addxc   %g0,    %g0,    $ovf
+       stx     $hi1,   [$tp+8]
+       add     $tp,    16,     $tp
+
+       brnz,pt $i,     .Louter
+       sub     $i,     8,      $i
+\f
+       sub     $ap,    $num,   $ap     ! rewind
+       sub     $np,    $num,   $np
+       sub     $tp,    $num,   $tp
+       ba      .Lsub
+       subcc   $num,   8,      $cnt    ! cnt=num-1 and clear CCR.xcc
+
+.align 16
+.Lsub:
+       ldx     [$tp],          $tj
+       add     $tp,    8,      $tp
+       ldx     [$np+0],        $nj
+       add     $np,    8,      $np
+       subccc  $tj,    $nj,    $t2     ! tp[j]-np[j]
+       srlx    $tj,    32,     $tj
+       srlx    $nj,    32,     $nj
+       subccc  $tj,    $nj,    $t3
+       add     $rp,    8,      $rp
+       st      $t2,    [$rp-4]         ! reverse order
+       st      $t3,    [$rp-8]
+       brnz,pt $cnt,   .Lsub
+       sub     $cnt,   8,      $cnt
+
+       sub     $np,    $num,   $np     ! rewind
+       sub     $tp,    $num,   $tp
+       sub     $rp,    $num,   $rp
+
+       subc    $ovf,   %g0,    $ovf    ! handle upmost overflow bit
+       and     $tp,    $ovf,   $ap
+       andn    $rp,    $ovf,   $np
+       or      $np,    $ap,    $ap     ! ap=borrow?tp:rp
+       ba      .Lcopy
+       sub     $num,   8,      $cnt
+
+.align 16
+.Lcopy:                                        ! copy or in-place refresh
+       ldx     [$ap+0],        $t2
+       add     $ap,    8,      $ap
+       stx     %g0,    [$tp]           ! zap
+       add     $tp,    8,      $tp
+       stx     $t2,    [$rp+0]
+       add     $rp,    8,      $rp
+       brnz    $cnt,   .Lcopy
+       sub     $cnt,   8,      $cnt
+
+       mov     1,      %o0
+       ret
+       restore
+.type  bn_mul_mont_t4, #function
+.size  bn_mul_mont_t4, .-bn_mul_mont_t4
+___
+\f
+# int bn_mul_mont_gather5(
+$rp="%o0";     # u64 *rp,
+$ap="%o1";     # const u64 *ap,
+$bp="%o2";     # const u64 *pwrtbl,
+$np="%o3";     # const u64 *np,
+$n0p="%o4";    # const BN_ULONG *n0,
+$num="%o5";    # int num,      # caller ensures that num is >=3
+               # int power);
+$code.=<<___;
+.globl bn_mul_mont_gather5_t4
+.align 32
+bn_mul_mont_gather5_t4:
+       add     %sp,    STACK_BIAS,     %g4     ! real top of stack
+       sll     $num,   3,      $num            ! size in bytes
+       add     $num,   63,     %g1
+       andn    %g1,    63,     %g1             ! buffer size rounded up to 64 bytes
+       sub     %g4,    %g1,    %g1
+       andn    %g1,    63,     %g1             ! align at 64 byte
+       sub     %g1,    STACK_FRAME,    %g1     ! new top of stack
+       sub     %g1,    %g4,    %g1
+       LDPTR   [%sp+STACK_7thARG],     %g4     ! load power, 7th argument
+
+       save    %sp,    %g1,    %sp
+___
+#      +-------------------------------+<----- %sp
+#      .                               .
+#      +-------------------------------+<----- aligned at 64 bytes
+#      | __int64 tmp[0]                |
+#      +-------------------------------+
+#      .                               .
+#      .                               .
+#      +-------------------------------+<----- aligned at 64 bytes
+#      .                               .
+($rp,$ap,$bp,$np,$n0p,$num)=map("%i$_",(0..5));
+($t0,$t1,$t2,$t3,$cnt,$tp,$bufsz,$ccr)=map("%l$_",(0..7));
+($ovf,$i)=($t0,$t1);
+       &load_ccr($bp,"%g4",$ccr);
+       &load_b($bp,$m0,"%o7");         # m0=bp[0]
+
+$code.=<<___;
+       ld      [$n0p+0],       $t0     ! pull n0[0..1] value
+       ld      [$n0p+4],       $t1
+       add     %sp, STACK_BIAS+STACK_FRAME, $tp
+       sllx    $t1,    32,     $n0
+       or      $t0,    $n0,    $n0
+\f
+       ldx     [$ap+0],        $aj     ! ap[0]
+
+       mulx    $aj,    $m0,    $lo0    ! ap[0]*bp[0]
+       umulxhi $aj,    $m0,    $hi0
+
+       ldx     [$ap+8],        $aj     ! ap[1]
+       add     $ap,    16,     $ap
+       ldx     [$np+0],        $nj     ! np[0]
+
+       mulx    $lo0,   $n0,    $m1     ! "tp[0]"*n0
+
+       mulx    $aj,    $m0,    $alo    ! ap[1]*bp[0]
+       umulxhi $aj,    $m0,    $aj     ! ahi=aj
+
+       mulx    $nj,    $m1,    $lo1    ! np[0]*m1
+       umulxhi $nj,    $m1,    $hi1
+
+       ldx     [$np+8],        $nj     ! np[1]
+
+       addcc   $lo0,   $lo1,   $lo1
+       add     $np,    16,     $np
+       addxc   %g0,    $hi1,   $hi1
+
+       mulx    $nj,    $m1,    $nlo    ! np[1]*m1
+       umulxhi $nj,    $m1,    $nj     ! nhi=nj
+\f
+       ba      .L1st_g5
+       sub     $num,   24,     $cnt    ! cnt=num-3
+
+.align 16
+.L1st_g5:
+       addcc   $alo,   $hi0,   $lo0
+       addxc   $aj,    %g0,    $hi0
+
+       ldx     [$ap+0],        $aj     ! ap[j]
+       addcc   $nlo,   $hi1,   $lo1
+       add     $ap,    8,      $ap
+       addxc   $nj,    %g0,    $hi1    ! nhi=nj
+
+       ldx     [$np+0],        $nj     ! np[j]
+       mulx    $aj,    $m0,    $alo    ! ap[j]*bp[0]
+       add     $np,    8,      $np
+       umulxhi $aj,    $m0,    $aj     ! ahi=aj
+
+       mulx    $nj,    $m1,    $nlo    ! np[j]*m1
+       addcc   $lo0,   $lo1,   $lo1    ! np[j]*m1+ap[j]*bp[0]
+       umulxhi $nj,    $m1,    $nj     ! nhi=nj
+       addxc   %g0,    $hi1,   $hi1
+       stxa    $lo1,   [$tp]0xe2       ! tp[j-1]
+       add     $tp,    8,      $tp     ! tp++
+
+       brnz,pt $cnt,   .L1st_g5
+       sub     $cnt,   8,      $cnt    ! j--
+!.L1st_g5
+       addcc   $alo,   $hi0,   $lo0
+       addxc   $aj,    %g0,    $hi0    ! ahi=aj
+
+       addcc   $nlo,   $hi1,   $lo1
+       addxc   $nj,    %g0,    $hi1
+       addcc   $lo0,   $lo1,   $lo1    ! np[j]*m1+ap[j]*bp[0]
+       addxc   %g0,    $hi1,   $hi1
+       stxa    $lo1,   [$tp]0xe2       ! tp[j-1]
+       add     $tp,    8,      $tp
+
+       addcc   $hi0,   $hi1,   $hi1
+       addxc   %g0,    %g0,    $ovf    ! upmost overflow bit
+       stxa    $hi1,   [$tp]0xe2
+       add     $tp,    8,      $tp
+\f
+       ba      .Louter_g5
+       sub     $num,   16,     $i      ! i=num-2
+
+.align 16
+.Louter_g5:
+       wr      $ccr,   %g0,    %ccr
+___
+       &load_b($bp,$m0);               # m0=bp[i]
+$code.=<<___;
+       sub     $ap,    $num,   $ap     ! rewind
+       sub     $np,    $num,   $np
+       sub     $tp,    $num,   $tp
+
+       ldx     [$ap+0],        $aj     ! ap[0]
+       ldx     [$np+0],        $nj     ! np[0]
+
+       mulx    $aj,    $m0,    $lo0    ! ap[0]*bp[i]
+       ldx     [$tp],          $tj     ! tp[0]
+       umulxhi $aj,    $m0,    $hi0
+       ldx     [$ap+8],        $aj     ! ap[1]
+       addcc   $lo0,   $tj,    $lo0    ! ap[0]*bp[i]+tp[0]
+       mulx    $aj,    $m0,    $alo    ! ap[1]*bp[i]
+       addxc   %g0,    $hi0,   $hi0
+       mulx    $lo0,   $n0,    $m1     ! tp[0]*n0
+       umulxhi $aj,    $m0,    $aj     ! ahi=aj
+       mulx    $nj,    $m1,    $lo1    ! np[0]*m1
+       add     $ap,    16,     $ap
+       umulxhi $nj,    $m1,    $hi1
+       ldx     [$np+8],        $nj     ! np[1]
+       add     $np,    16,     $np
+       addcc   $lo1,   $lo0,   $lo1
+       mulx    $nj,    $m1,    $nlo    ! np[1]*m1
+       addxc   %g0,    $hi1,   $hi1
+       umulxhi $nj,    $m1,    $nj     ! nhi=nj
+\f
+       ba      .Linner_g5
+       sub     $num,   24,     $cnt    ! cnt=num-3
+.align 16
+.Linner_g5:
+       addcc   $alo,   $hi0,   $lo0
+       ldx     [$tp+8],        $tj     ! tp[j]
+       addxc   $aj,    %g0,    $hi0    ! ahi=aj
+       ldx     [$ap+0],        $aj     ! ap[j]
+       add     $ap,    8,      $ap
+       addcc   $nlo,   $hi1,   $lo1
+       mulx    $aj,    $m0,    $alo    ! ap[j]*bp[i]
+       addxc   $nj,    %g0,    $hi1    ! nhi=nj
+       ldx     [$np+0],        $nj     ! np[j]
+       add     $np,    8,      $np
+       umulxhi $aj,    $m0,    $aj     ! ahi=aj
+       addcc   $lo0,   $tj,    $lo0    ! ap[j]*bp[i]+tp[j]
+       mulx    $nj,    $m1,    $nlo    ! np[j]*m1
+       addxc   %g0,    $hi0,   $hi0
+       umulxhi $nj,    $m1,    $nj     ! nhi=nj
+       addcc   $lo1,   $lo0,   $lo1    ! np[j]*m1+ap[j]*bp[i]+tp[j]
+       addxc   %g0,    $hi1,   $hi1
+       stx     $lo1,   [$tp]           ! tp[j-1]
+       add     $tp,    8,      $tp
+       brnz,pt $cnt,   .Linner_g5
+       sub     $cnt,   8,      $cnt
+!.Linner_g5
+       ldx     [$tp+8],        $tj     ! tp[j]
+       addcc   $alo,   $hi0,   $lo0
+       addxc   $aj,    %g0,    $hi0    ! ahi=aj
+       addcc   $lo0,   $tj,    $lo0    ! ap[j]*bp[i]+tp[j]
+       addxc   %g0,    $hi0,   $hi0
+
+       addcc   $nlo,   $hi1,   $lo1
+       addxc   $nj,    %g0,    $hi1    ! nhi=nj
+       addcc   $lo1,   $lo0,   $lo1    ! np[j]*m1+ap[j]*bp[i]+tp[j]
+       addxc   %g0,    $hi1,   $hi1
+       stx     $lo1,   [$tp]           ! tp[j-1]
+
+       subcc   %g0,    $ovf,   %g0     ! move upmost overflow to CCR.xcc
+       addxccc $hi1,   $hi0,   $hi1
+       addxc   %g0,    %g0,    $ovf
+       stx     $hi1,   [$tp+8]
+       add     $tp,    16,     $tp
+
+       brnz,pt $i,     .Louter_g5
+       sub     $i,     8,      $i
+\f
+       sub     $ap,    $num,   $ap     ! rewind
+       sub     $np,    $num,   $np
+       sub     $tp,    $num,   $tp
+       ba      .Lsub_g5
+       subcc   $num,   8,      $cnt    ! cnt=num-1 and clear CCR.xcc
+
+.align 16
+.Lsub_g5:
+       ldx     [$tp],          $tj
+       add     $tp,    8,      $tp
+       ldx     [$np+0],        $nj
+       add     $np,    8,      $np
+       subccc  $tj,    $nj,    $t2     ! tp[j]-np[j]
+       srlx    $tj,    32,     $tj
+       srlx    $nj,    32,     $nj
+       subccc  $tj,    $nj,    $t3
+       add     $rp,    8,      $rp
+       st      $t2,    [$rp-4]         ! reverse order
+       st      $t3,    [$rp-8]
+       brnz,pt $cnt,   .Lsub_g5
+       sub     $cnt,   8,      $cnt
+
+       sub     $np,    $num,   $np     ! rewind
+       sub     $tp,    $num,   $tp
+       sub     $rp,    $num,   $rp
+
+       subc    $ovf,   %g0,    $ovf    ! handle upmost overflow bit
+       and     $tp,    $ovf,   $ap
+       andn    $rp,    $ovf,   $np
+       or      $np,    $ap,    $ap     ! ap=borrow?tp:rp
+       ba      .Lcopy_g5
+       sub     $num,   8,      $cnt
+
+.align 16
+.Lcopy_g5:                             ! copy or in-place refresh
+       ldx     [$ap+0],        $t2
+       add     $ap,    8,      $ap
+       stx     %g0,    [$tp]           ! zap
+       add     $tp,    8,      $tp
+       stx     $t2,    [$rp+0]
+       add     $rp,    8,      $rp
+       brnz    $cnt,   .Lcopy_g5
+       sub     $cnt,   8,      $cnt
+
+       mov     1,      %o0
+       ret
+       restore
+.type  bn_mul_mont_gather5_t4, #function
+.size  bn_mul_mont_gather5_t4, .-bn_mul_mont_gather5_t4
+___
+}
+\f
+$code.=<<___;
+.globl bn_flip_t4
+.align 32
+bn_flip_t4:
+.Loop_flip:
+       ld      [%o1+0],        %o4
+       sub     %o2,    1,      %o2
+       ld      [%o1+4],        %o5
+       add     %o1,    8,      %o1
+       st      %o5,    [%o0+0]
+       st      %o4,    [%o0+4]
+       brnz    %o2,    .Loop_flip
+       add     %o0,    8,      %o0
+       retl
+       nop
+.type  bn_flip_t4, #function
+.size  bn_flip_t4, .-bn_flip_t4
+
+.globl bn_flip_n_scatter5_t4
+.align 32
+bn_flip_n_scatter5_t4:
+       sll     %o3,    3,      %o3
+       srl     %o1,    1,      %o1
+       add     %o3,    %o2,    %o2     ! &pwrtbl[pwr]
+       sub     %o1,    1,      %o1
+.Loop_flip_n_scatter5:
+       ld      [%o0+0],        %o4     ! inp[i]
+       ld      [%o0+4],        %o5
+       add     %o0,    8,      %o0
+       sllx    %o5,    32,     %o5
+       or      %o4,    %o5,    %o5
+       stx     %o5,    [%o2]
+       add     %o2,    32*8,   %o2
+       brnz    %o1,    .Loop_flip_n_scatter5
+       sub     %o1,    1,      %o1
+       retl
+       nop
+.type  bn_flip_n_scatter5_t4, #function
+.size  bn_flip_n_scatter5_t4, .-bn_flip_n_scatter5_t4
+
+.globl bn_gather5_t4
+.align 32
+bn_gather5_t4:
+___
+       &load_ccr("%o2","%o3","%g1");
+$code.=<<___;
+       sub     %o1,    1,      %o1
+.Loop_gather5:
+___
+       &load_b("%o2","%g1");
+$code.=<<___;
+       stx     %g1,    [%o0]
+       add     %o0,    8,      %o0
+       brnz    %o1,    .Loop_gather5
+       sub     %o1,    1,      %o1
+
+       retl
+       nop
+.type  bn_gather5_t4, #function
+.size  bn_gather5_t4, .-bn_gather5_t4
+
+.asciz "Montgomery Multiplication for SPARC T4, David S. Miller, Andy Polyakov"
+.align 4
+___
+
+&emit_assembler();
+
+close STDOUT;
diff --git a/crypto/bn/asm/sparcv9-gf2m.pl b/crypto/bn/asm/sparcv9-gf2m.pl
new file mode 100644 (file)
index 0000000..ab94cd9
--- /dev/null
@@ -0,0 +1,190 @@
+#!/usr/bin/env perl
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# October 2012
+#
+# The module implements bn_GF2m_mul_2x2 polynomial multiplication used
+# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for
+# the time being... Except that it has two code paths: one suitable
+# for all SPARCv9 processors and one for VIS3-capable ones. Former
+# delivers ~25-45% more, more for longer keys, heaviest DH and DSA
+# verify operations on venerable UltraSPARC II. On T4 VIS3 code is
+# ~100-230% faster than gcc-generated code and ~35-90% faster than
+# the pure SPARCv9 code path.
+
+$locals=16*8;
+
+$tab="%l0";
+
+@T=("%g2","%g3");
+@i=("%g4","%g5");
+
+($a1,$a2,$a4,$a8,$a12,$a48)=map("%o$_",(0..5));
+($lo,$hi,$b)=("%g1",$a8,"%o7"); $a=$lo;
+
+$code.=<<___;
+#include <sparc_arch.h>
+
+#ifdef __arch64__
+.register      %g2,#scratch
+.register      %g3,#scratch
+#endif
+
+#ifdef __PIC__
+SPARC_PIC_THUNK(%g1)
+#endif
+
+.globl bn_GF2m_mul_2x2
+.align 16
+bn_GF2m_mul_2x2:
+        SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
+        ld     [%g1+0],%g1                     ! OPENSSL_sparcv9cap_P[0]
+
+        andcc  %g1, SPARCV9_VIS3, %g0
+        bz,pn  %icc,.Lsoftware
+        nop
+
+       sllx    %o1, 32, %o1
+       sllx    %o3, 32, %o3
+       or      %o2, %o1, %o1
+       or      %o4, %o3, %o3
+       .word   0x95b262ab                      ! xmulx   %o1, %o3, %o2
+       .word   0x99b262cb                      ! xmulxhi %o1, %o3, %o4
+       srlx    %o2, 32, %o1                    ! 13 cycles later
+       st      %o2, [%o0+0]
+       st      %o1, [%o0+4]
+       srlx    %o4, 32, %o3
+       st      %o4, [%o0+8]
+       retl
+       st      %o3, [%o0+12]
+
+.align 16
+.Lsoftware:
+       save    %sp,-STACK_FRAME-$locals,%sp
+
+       sllx    %i1,32,$a
+       mov     -1,$a12
+       sllx    %i3,32,$b
+       or      %i2,$a,$a
+       srlx    $a12,1,$a48                     ! 0x7fff...
+       or      %i4,$b,$b
+       srlx    $a12,2,$a12                     ! 0x3fff...
+       add     %sp,STACK_BIAS+STACK_FRAME,$tab
+
+       sllx    $a,2,$a4
+       mov     $a,$a1
+       sllx    $a,1,$a2
+
+       srax    $a4,63,@i[1]                    ! broadcast 61st bit
+       and     $a48,$a4,$a4                    ! (a<<2)&0x7fff...
+       srlx    $a48,2,$a48
+       srax    $a2,63,@i[0]                    ! broadcast 62nd bit
+       and     $a12,$a2,$a2                    ! (a<<1)&0x3fff...
+       srax    $a1,63,$lo                      ! broadcast 63rd bit
+       and     $a48,$a1,$a1                    ! (a<<0)&0x1fff...
+
+       sllx    $a1,3,$a8
+       and     $b,$lo,$lo
+       and     $b,@i[0],@i[0]
+       and     $b,@i[1],@i[1]
+
+       stx     %g0,[$tab+0*8]                  ! tab[0]=0
+       xor     $a1,$a2,$a12
+       stx     $a1,[$tab+1*8]                  ! tab[1]=a1
+       stx     $a2,[$tab+2*8]                  ! tab[2]=a2
+        xor    $a4,$a8,$a48
+       stx     $a12,[$tab+3*8]                 ! tab[3]=a1^a2
+        xor    $a4,$a1,$a1
+
+       stx     $a4,[$tab+4*8]                  ! tab[4]=a4
+       xor     $a4,$a2,$a2
+       stx     $a1,[$tab+5*8]                  ! tab[5]=a1^a4
+       xor     $a4,$a12,$a12
+       stx     $a2,[$tab+6*8]                  ! tab[6]=a2^a4
+        xor    $a48,$a1,$a1
+       stx     $a12,[$tab+7*8]                 ! tab[7]=a1^a2^a4
+        xor    $a48,$a2,$a2
+
+       stx     $a8,[$tab+8*8]                  ! tab[8]=a8
+       xor     $a48,$a12,$a12
+       stx     $a1,[$tab+9*8]                  ! tab[9]=a1^a8
+        xor    $a4,$a1,$a1
+       stx     $a2,[$tab+10*8]                 ! tab[10]=a2^a8
+        xor    $a4,$a2,$a2
+       stx     $a12,[$tab+11*8]                ! tab[11]=a1^a2^a8
+
+       xor     $a4,$a12,$a12
+       stx     $a48,[$tab+12*8]                ! tab[12]=a4^a8
+        srlx   $lo,1,$hi
+       stx     $a1,[$tab+13*8]                 ! tab[13]=a1^a4^a8
+        sllx   $lo,63,$lo
+       stx     $a2,[$tab+14*8]                 ! tab[14]=a2^a4^a8
+        srlx   @i[0],2,@T[0]
+       stx     $a12,[$tab+15*8]                ! tab[15]=a1^a2^a4^a8
+
+       sllx    @i[0],62,$a1
+        sllx   $b,3,@i[0]
+       srlx    @i[1],3,@T[1]
+        and    @i[0],`0xf<<3`,@i[0]
+       sllx    @i[1],61,$a2
+        ldx    [$tab+@i[0]],@i[0]
+        srlx   $b,4-3,@i[1]
+       xor     @T[0],$hi,$hi
+        and    @i[1],`0xf<<3`,@i[1]
+       xor     $a1,$lo,$lo
+        ldx    [$tab+@i[1]],@i[1]
+       xor     @T[1],$hi,$hi
+
+       xor     @i[0],$lo,$lo
+       srlx    $b,8-3,@i[0]
+        xor    $a2,$lo,$lo
+       and     @i[0],`0xf<<3`,@i[0]
+___
+for($n=1;$n<14;$n++) {
+$code.=<<___;
+       sllx    @i[1],`$n*4`,@T[0]
+       ldx     [$tab+@i[0]],@i[0]
+       srlx    @i[1],`64-$n*4`,@T[1]
+       xor     @T[0],$lo,$lo
+       srlx    $b,`($n+2)*4`-3,@i[1]
+       xor     @T[1],$hi,$hi
+       and     @i[1],`0xf<<3`,@i[1]
+___
+       push(@i,shift(@i)); push(@T,shift(@T));
+}
+$code.=<<___;
+       sllx    @i[1],`$n*4`,@T[0]
+       ldx     [$tab+@i[0]],@i[0]
+       srlx    @i[1],`64-$n*4`,@T[1]
+       xor     @T[0],$lo,$lo
+
+       sllx    @i[0],`($n+1)*4`,@T[0]
+        xor    @T[1],$hi,$hi
+       srlx    @i[0],`64-($n+1)*4`,@T[1]
+       xor     @T[0],$lo,$lo
+       xor     @T[1],$hi,$hi
+
+       srlx    $lo,32,%i1
+       st      $lo,[%i0+0]
+       st      %i1,[%i0+4]
+       srlx    $hi,32,%i2
+       st      $hi,[%i0+8]
+       st      %i2,[%i0+12]
+
+       ret
+       restore
+.type  bn_GF2m_mul_2x2,#function
+.size  bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
+.asciz "GF(2^m) Multiplication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>"
+.align 4
+___
+
+$code =~ s/\`([^\`]*)\`/eval($1)/gem;
+print $code;
+close STDOUT;
diff --git a/crypto/bn/asm/vis3-mont.pl b/crypto/bn/asm/vis3-mont.pl
new file mode 100644 (file)
index 0000000..263ac02
--- /dev/null
@@ -0,0 +1,373 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+
+# October 2012.
+#
+# SPARCv9 VIS3 Montgomery multiplicaion procedure suitable for T3 and
+# onward. There are three new instructions used here: umulxhi,
+# addxc[cc] and initializing store. On T3 RSA private key operations
+# are 1.54/1.87/2.11/2.26 times faster for 512/1024/2048/4096-bit key
+# lengths. This is without dedicated squaring procedure. On T4
+# corresponding coefficients are 1.47/2.10/2.80/2.90x, which is mostly
+# for reference purposes, because T4 has dedicated Montgomery
+# multiplication and squaring *instructions* that deliver even more.
+
+$bits=32;
+for (@ARGV)     { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
+if ($bits==64)  { $bias=2047; $frame=192; }
+else            { $bias=0;    $frame=112; }
+
+$code.=<<___ if ($bits==64);
+.register      %g2,#scratch
+.register      %g3,#scratch
+___
+$code.=<<___;
+.section       ".text",#alloc,#execinstr
+___
+
+($n0,$m0,$m1,$lo0,$hi0, $lo1,$hi1,$aj,$alo,$nj,$nlo,$tj)=
+       (map("%g$_",(1..5)),map("%o$_",(0..5,7)));
+
+# int bn_mul_mont(
+$rp="%o0";     # BN_ULONG *rp,
+$ap="%o1";     # const BN_ULONG *ap,
+$bp="%o2";     # const BN_ULONG *bp,
+$np="%o3";     # const BN_ULONG *np,
+$n0p="%o4";    # const BN_ULONG *n0,
+$num="%o5";    # int num);     # caller ensures that num is even
+                               # and >=6
+$code.=<<___;
+.globl bn_mul_mont_vis3
+.align 32
+bn_mul_mont_vis3:
+       add     %sp,    $bias,  %g4     ! real top of stack
+       sll     $num,   2,      $num    ! size in bytes
+       add     $num,   63,     %g5
+       andn    %g5,    63,     %g5     ! buffer size rounded up to 64 bytes
+       add     %g5,    %g5,    %g1
+       add     %g5,    %g1,    %g1     ! 3*buffer size
+       sub     %g4,    %g1,    %g1
+       andn    %g1,    63,     %g1     ! align at 64 byte
+       sub     %g1,    $frame, %g1     ! new top of stack
+       sub     %g1,    %g4,    %g1
+
+       save    %sp,    %g1,    %sp
+___
+\f
+#      +-------------------------------+<----- %sp
+#      .                               .
+#      +-------------------------------+<----- aligned at 64 bytes
+#      | __int64 tmp[0]                |
+#      +-------------------------------+
+#      .                               .
+#      .                               .
+#      +-------------------------------+<----- aligned at 64 bytes
+#      | __int64 ap[1..0]              |       converted ap[]
+#      +-------------------------------+
+#      | __int64 np[1..0]              |       converted np[]
+#      +-------------------------------+
+#      | __int64 ap[3..2]              |
+#      .                               .
+#      .                               .
+#      +-------------------------------+
+($rp,$ap,$bp,$np,$n0p,$num)=map("%i$_",(0..5));
+($t0,$t1,$t2,$t3,$cnt,$tp,$bufsz,$anp)=map("%l$_",(0..7));
+($ovf,$i)=($t0,$t1);
+$code.=<<___;
+       ld      [$n0p+0],       $t0     ! pull n0[0..1] value
+       add     %sp, $bias+$frame, $tp
+       ld      [$n0p+4],       $t1
+       add     $tp,    %g5,    $anp
+       ld      [$bp+0],        $t2     ! m0=bp[0]
+       sllx    $t1,    32,     $n0
+       ld      [$bp+4],        $t3
+       or      $t0,    $n0,    $n0
+       add     $bp,    8,      $bp
+\f
+       ld      [$ap+0],        $t0     ! ap[0]
+       sllx    $t3,    32,     $m0
+       ld      [$ap+4],        $t1
+       or      $t2,    $m0,    $m0
+
+       ld      [$ap+8],        $t2     ! ap[1]
+       sllx    $t1,    32,     $aj
+       ld      [$ap+12],       $t3
+       or      $t0,    $aj,    $aj
+       add     $ap,    16,     $ap
+       stx     $aj,    [$anp]          ! converted ap[0]
+
+       mulx    $aj,    $m0,    $lo0    ! ap[0]*bp[0]
+       umulxhi $aj,    $m0,    $hi0
+
+       ld      [$np+0],        $t0     ! np[0]
+       sllx    $t3,    32,     $aj
+       ld      [$np+4],        $t1
+       or      $t2,    $aj,    $aj
+
+       ld      [$np+8],        $t2     ! np[1]
+       sllx    $t1,    32,     $nj
+       ld      [$np+12],       $t3
+       or      $t0, $nj,       $nj
+       add     $np,    16,     $np
+       stx     $nj,    [$anp+8]        ! converted np[0]
+
+       mulx    $lo0,   $n0,    $m1     ! "tp[0]"*n0
+       stx     $aj,    [$anp+16]       ! converted ap[1]
+
+       mulx    $aj,    $m0,    $alo    ! ap[1]*bp[0]
+       umulxhi $aj,    $m0,    $aj     ! ahi=aj
+
+       mulx    $nj,    $m1,    $lo1    ! np[0]*m1
+       umulxhi $nj,    $m1,    $hi1
+
+       sllx    $t3,    32,     $nj
+       or      $t2,    $nj,    $nj
+       stx     $nj,    [$anp+24]       ! converted np[1]
+       add     $anp,   32,     $anp
+
+       addcc   $lo0,   $lo1,   $lo1
+       addxc   %g0,    $hi1,   $hi1
+
+       mulx    $nj,    $m1,    $nlo    ! np[1]*m1
+       umulxhi $nj,    $m1,    $nj     ! nhi=nj
+\f
+       ba      .L1st
+       sub     $num,   24,     $cnt    ! cnt=num-3
+
+.align 16
+.L1st:
+       ld      [$ap+0],        $t0     ! ap[j]
+       addcc   $alo,   $hi0,   $lo0
+       ld      [$ap+4],        $t1
+       addxc   $aj,    %g0,    $hi0
+
+       sllx    $t1,    32,     $aj
+       add     $ap,    8,      $ap
+       or      $t0,    $aj,    $aj
+       stx     $aj,    [$anp]          ! converted ap[j]
+
+       ld      [$np+0],        $t2     ! np[j]
+       addcc   $nlo,   $hi1,   $lo1
+       ld      [$np+4],        $t3
+       addxc   $nj,    %g0,    $hi1    ! nhi=nj
+
+       sllx    $t3,    32,     $nj
+       add     $np,    8,      $np
+       mulx    $aj,    $m0,    $alo    ! ap[j]*bp[0]
+       or      $t2,    $nj,    $nj
+       umulxhi $aj,    $m0,    $aj     ! ahi=aj
+       stx     $nj,    [$anp+8]        ! converted np[j]
+       add     $anp,   16,     $anp    ! anp++
+
+       mulx    $nj,    $m1,    $nlo    ! np[j]*m1
+       addcc   $lo0,   $lo1,   $lo1    ! np[j]*m1+ap[j]*bp[0]
+       umulxhi $nj,    $m1,    $nj     ! nhi=nj
+       addxc   %g0,    $hi1,   $hi1
+       stx     $lo1,   [$tp]           ! tp[j-1]
+       add     $tp,    8,      $tp     ! tp++
+
+       brnz,pt $cnt,   .L1st
+       sub     $cnt,   8,      $cnt    ! j--
+!.L1st
+       addcc   $alo,   $hi0,   $lo0
+       addxc   $aj,    %g0,    $hi0    ! ahi=aj
+
+       addcc   $nlo,   $hi1,   $lo1
+       addxc   $nj,    %g0,    $hi1
+       addcc   $lo0,   $lo1,   $lo1    ! np[j]*m1+ap[j]*bp[0]
+       addxc   %g0,    $hi1,   $hi1
+       stx     $lo1,   [$tp]           ! tp[j-1]
+       add     $tp,    8,      $tp
+
+       addcc   $hi0,   $hi1,   $hi1
+       addxc   %g0,    %g0,    $ovf    ! upmost overflow bit
+       stx     $hi1,   [$tp]
+       add     $tp,    8,      $tp
+\f
+       ba      .Louter
+       sub     $num,   16,     $i      ! i=num-2
+
+.align 16
+.Louter:
+       ld      [$bp+0],        $t2     ! m0=bp[i]
+       ld      [$bp+4],        $t3
+
+       sub     $anp,   $num,   $anp    ! rewind
+       sub     $tp,    $num,   $tp
+       sub     $anp,   $num,   $anp
+
+       add     $bp,    8,      $bp
+       sllx    $t3,    32,     $m0
+       ldx     [$anp+0],       $aj     ! ap[0]
+       or      $t2,    $m0,    $m0
+       ldx     [$anp+8],       $nj     ! np[0]
+
+       mulx    $aj,    $m0,    $lo0    ! ap[0]*bp[i]
+       ldx     [$tp],          $tj     ! tp[0]
+       umulxhi $aj,    $m0,    $hi0
+       ldx     [$anp+16],      $aj     ! ap[1]
+       addcc   $lo0,   $tj,    $lo0    ! ap[0]*bp[i]+tp[0]
+       mulx    $aj,    $m0,    $alo    ! ap[1]*bp[i]
+       addxc   %g0,    $hi0,   $hi0
+       mulx    $lo0,   $n0,    $m1     ! tp[0]*n0
+       umulxhi $aj,    $m0,    $aj     ! ahi=aj
+       mulx    $nj,    $m1,    $lo1    ! np[0]*m1
+       umulxhi $nj,    $m1,    $hi1
+       ldx     [$anp+24],      $nj     ! np[1]
+       add     $anp,   32,     $anp
+       addcc   $lo1,   $lo0,   $lo1
+       mulx    $nj,    $m1,    $nlo    ! np[1]*m1
+       addxc   %g0,    $hi1,   $hi1
+       umulxhi $nj,    $m1,    $nj     ! nhi=nj
+\f
+       ba      .Linner
+       sub     $num,   24,     $cnt    ! cnt=num-3
+.align 16
+.Linner:
+       addcc   $alo,   $hi0,   $lo0
+       ldx     [$tp+8],        $tj     ! tp[j]
+       addxc   $aj,    %g0,    $hi0    ! ahi=aj
+       ldx     [$anp+0],       $aj     ! ap[j]
+       addcc   $nlo,   $hi1,   $lo1
+       mulx    $aj,    $m0,    $alo    ! ap[j]*bp[i]
+       addxc   $nj,    %g0,    $hi1    ! nhi=nj
+       ldx     [$anp+8],       $nj     ! np[j]
+       add     $anp,   16,     $anp
+       umulxhi $aj,    $m0,    $aj     ! ahi=aj
+       addcc   $lo0,   $tj,    $lo0    ! ap[j]*bp[i]+tp[j]
+       mulx    $nj,    $m1,    $nlo    ! np[j]*m1
+       addxc   %g0,    $hi0,   $hi0
+       umulxhi $nj,    $m1,    $nj     ! nhi=nj
+       addcc   $lo1,   $lo0,   $lo1    ! np[j]*m1+ap[j]*bp[i]+tp[j]
+       addxc   %g0,    $hi1,   $hi1
+       stx     $lo1,   [$tp]           ! tp[j-1]
+       add     $tp,    8,      $tp
+       brnz,pt $cnt,   .Linner
+       sub     $cnt,   8,      $cnt
+!.Linner
+       ldx     [$tp+8],        $tj     ! tp[j]
+       addcc   $alo,   $hi0,   $lo0
+       addxc   $aj,    %g0,    $hi0    ! ahi=aj
+       addcc   $lo0,   $tj,    $lo0    ! ap[j]*bp[i]+tp[j]
+       addxc   %g0,    $hi0,   $hi0
+
+       addcc   $nlo,   $hi1,   $lo1
+       addxc   $nj,    %g0,    $hi1    ! nhi=nj
+       addcc   $lo1,   $lo0,   $lo1    ! np[j]*m1+ap[j]*bp[i]+tp[j]
+       addxc   %g0,    $hi1,   $hi1
+       stx     $lo1,   [$tp]           ! tp[j-1]
+
+       subcc   %g0,    $ovf,   %g0     ! move upmost overflow to CCR.xcc
+       addxccc $hi1,   $hi0,   $hi1
+       addxc   %g0,    %g0,    $ovf
+       stx     $hi1,   [$tp+8]
+       add     $tp,    16,     $tp
+
+       brnz,pt $i,     .Louter
+       sub     $i,     8,      $i
+\f
+       sub     $anp,   $num,   $anp    ! rewind
+       sub     $tp,    $num,   $tp
+       sub     $anp,   $num,   $anp
+       ba      .Lsub
+       subcc   $num,   8,      $cnt    ! cnt=num-1 and clear CCR.xcc
+
+.align 16
+.Lsub:
+       ldx     [$tp],          $tj
+       add     $tp,    8,      $tp
+       ldx     [$anp+8],       $nj
+       add     $anp,   16,     $anp
+       subccc  $tj,    $nj,    $t2     ! tp[j]-np[j]
+       srlx    $tj,    32,     $tj
+       srlx    $nj,    32,     $nj
+       subccc  $tj,    $nj,    $t3
+       add     $rp,    8,      $rp
+       st      $t2,    [$rp-4]         ! reverse order
+       st      $t3,    [$rp-8]
+       brnz,pt $cnt,   .Lsub
+       sub     $cnt,   8,      $cnt
+
+       sub     $anp,   $num,   $anp    ! rewind
+       sub     $tp,    $num,   $tp
+       sub     $anp,   $num,   $anp
+       sub     $rp,    $num,   $rp
+
+       subc    $ovf,   %g0,    $ovf    ! handle upmost overflow bit
+       and     $tp,    $ovf,   $ap
+       andn    $rp,    $ovf,   $np
+       or      $np,    $ap,    $ap     ! ap=borrow?tp:rp
+       ba      .Lcopy
+       sub     $num,   8,      $cnt
+
+.align 16
+.Lcopy:                                        ! copy or in-place refresh
+       ld      [$ap+0],        $t2
+       ld      [$ap+4],        $t3
+       add     $ap,    8,      $ap
+       stx     %g0,    [$tp]           ! zap
+       add     $tp,    8,      $tp
+       stx     %g0,    [$anp]          ! zap
+       stx     %g0,    [$anp+8]
+       add     $anp,   16,     $anp
+       st      $t3,    [$rp+0]         ! flip order
+       st      $t2,    [$rp+4]
+       add     $rp,    8,      $rp
+       brnz    $cnt,   .Lcopy
+       sub     $cnt,   8,      $cnt
+
+       mov     1,      %o0
+       ret
+       restore
+.type  bn_mul_mont_vis3, #function
+.size  bn_mul_mont_vis3, .-bn_mul_mont_vis3
+.asciz  "Montgomery Multiplication for SPARCv9 VIS3, CRYPTOGAMS by <appro\@openssl.org>"
+.align 4
+___
+\f
+# Purpose of these subroutines is to explicitly encode VIS instructions,
+# so that one can compile the module without having to specify VIS
+# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
+# Idea is to reserve for option to produce "universal" binary and let
+# programmer detect if current CPU is VIS capable at run-time.
+sub unvis3 {
+my ($mnemonic,$rs1,$rs2,$rd)=@_;
+my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 );
+my ($ref,$opf);
+my %visopf = ( "addxc"         => 0x011,
+               "addxccc"       => 0x013,
+               "umulxhi"       => 0x016        );
+
+    $ref = "$mnemonic\t$rs1,$rs2,$rd";
+
+    if ($opf=$visopf{$mnemonic}) {
+       foreach ($rs1,$rs2,$rd) {
+           return $ref if (!/%([goli])([0-9])/);
+           $_=$bias{$1}+$2;
+       }
+
+       return  sprintf ".word\t0x%08x !%s",
+                       0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2,
+                       $ref;
+    } else {
+       return $ref;
+    }
+}
+
+foreach (split("\n",$code)) {
+       s/\`([^\`]*)\`/eval $1/ge;
+
+       s/\b(umulxhi|addxc[c]{0,2})\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/
+               &unvis3($1,$2,$3,$4)
+        /ge;
+
+       print $_,"\n";
+}
+
+close STDOUT;
index 9c5074b..d548886 100644 (file)
@@ -55,7 +55,7 @@
  *    machine.
  */
 
-# ifdef _WIN64
+# if defined(_WIN64) || !defined(__LP64__)
 #  define BN_ULONG unsigned long long
 # else
 #  define BN_ULONG unsigned long
@@ -63,7 +63,6 @@
 
 # undef mul
 # undef mul_add
-# undef sqr
 
 /*-
  * "m"(a), "+m"(r)      is the way to favor DirectPath Âµ-code;
@@ -99,8 +98,8 @@
                 : "cc");                \
         (r)=carry, carry=high;          \
         } while (0)
-
-# define sqr(r0,r1,a)                    \
+# undef sqr
+# define sqr(r0,r1,a)                   \
         asm ("mulq %2"                  \
                 : "=a"(r0),"=d"(r1)     \
                 : "a"(a)                \
@@ -204,20 +203,22 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
 BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                       int n)
 {
-    BN_ULONG ret = 0, i = 0;
+    BN_ULONG ret;
+    size_t i = 0;
 
     if (n <= 0)
         return 0;
 
-    asm volatile ("       subq    %2,%2           \n"
+    asm volatile ("       subq    %0,%0           \n" /* clear carry */
+                  "       jmp     1f              \n"
                   ".p2align 4                     \n"
                   "1:     movq    (%4,%2,8),%0    \n"
                   "       adcq    (%5,%2,8),%0    \n"
                   "       movq    %0,(%3,%2,8)    \n"
-                  "       leaq    1(%2),%2        \n"
+                  "       lea     1(%2),%2        \n"
                   "       loop    1b              \n"
-                  "       sbbq    %0,%0           \n":"=&a" (ret), "+c"(n),
-                  "=&r"(i)
+                  "       sbbq    %0,%0           \n":"=&r" (ret), "+c"(n),
+                  "+r"(i)
                   :"r"(rp), "r"(ap), "r"(bp)
                   :"cc", "memory");
 
@@ -228,20 +229,22 @@ BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
 BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                       int n)
 {
-    BN_ULONG ret = 0, i = 0;
+    BN_ULONG ret;
+    size_t i = 0;
 
     if (n <= 0)
         return 0;
 
-    asm volatile ("       subq    %2,%2           \n"
+    asm volatile ("       subq    %0,%0           \n" /* clear borrow */
+                  "       jmp     1f              \n"
                   ".p2align 4                     \n"
                   "1:     movq    (%4,%2,8),%0    \n"
                   "       sbbq    (%5,%2,8),%0    \n"
                   "       movq    %0,(%3,%2,8)    \n"
-                  "       leaq    1(%2),%2        \n"
+                  "       lea     1(%2),%2        \n"
                   "       loop    1b              \n"
-                  "       sbbq    %0,%0           \n":"=&a" (ret), "+c"(n),
-                  "=&r"(i)
+                  "       sbbq    %0,%0           \n":"=&r" (ret), "+c"(n),
+                  "+r"(i)
                   :"r"(rp), "r"(ap), "r"(bp)
                   :"cc", "memory");
 
@@ -313,55 +316,58 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
  */
 # if 0
 /* original macros are kept for reference purposes */
-#  define mul_add_c(a,b,c0,c1,c2) {       \
-        BN_ULONG ta=(a),tb=(b);         \
-        t1 = ta * tb;                   \
-        t2 = BN_UMULT_HIGH(ta,tb);      \
-        c0 += t1; t2 += (c0<t1)?1:0;    \
-        c1 += t2; c2 += (c1<t2)?1:0;    \
-        }
-
-#  define mul_add_c2(a,b,c0,c1,c2) {      \
-        BN_ULONG ta=(a),tb=(b),t0;      \
-        t1 = BN_UMULT_HIGH(ta,tb);      \
-        t0 = ta * tb;                   \
-        c0 += t0; t2 = t1+((c0<t0)?1:0);\
-        c1 += t2; c2 += (c1<t2)?1:0;    \
-        c0 += t0; t1 += (c0<t0)?1:0;    \
-        c1 += t1; c2 += (c1<t1)?1:0;    \
-        }
+#  define mul_add_c(a,b,c0,c1,c2)       do {    \
+        BN_ULONG ta = (a), tb = (b);            \
+        BN_ULONG lo, hi;                        \
+        BN_UMULT_LOHI(lo,hi,ta,tb);             \
+        c0 += lo; hi += (c0<lo)?1:0;            \
+        c1 += hi; c2 += (c1<hi)?1:0;            \
+        } while(0)
+
+#  define mul_add_c2(a,b,c0,c1,c2)      do {    \
+        BN_ULONG ta = (a), tb = (b);            \
+        BN_ULONG lo, hi, tt;                    \
+        BN_UMULT_LOHI(lo,hi,ta,tb);             \
+        c0 += lo; tt = hi+((c0<lo)?1:0);        \
+        c1 += tt; c2 += (c1<tt)?1:0;            \
+        c0 += lo; hi += (c0<lo)?1:0;            \
+        c1 += hi; c2 += (c1<hi)?1:0;            \
+        } while(0)
+
+#  define sqr_add_c(a,i,c0,c1,c2)       do {    \
+        BN_ULONG ta = (a)[i];                   \
+        BN_ULONG lo, hi;                        \
+        BN_UMULT_LOHI(lo,hi,ta,ta);             \
+        c0 += lo; hi += (c0<lo)?1:0;            \
+        c1 += hi; c2 += (c1<hi)?1:0;            \
+        } while(0)
 # else
-#  define mul_add_c(a,b,c0,c1,c2) do {    \
+#  define mul_add_c(a,b,c0,c1,c2) do {  \
+        BN_ULONG t1,t2;                 \
         asm ("mulq %3"                  \
                 : "=a"(t1),"=d"(t2)     \
                 : "a"(a),"m"(b)         \
                 : "cc");                \
-        asm ("addq %2,%0; adcq %3,%1"   \
-                : "+r"(c0),"+d"(t2)     \
-                : "a"(t1),"g"(0)        \
-                : "cc");                \
-        asm ("addq %2,%0; adcq %3,%1"   \
-                : "+r"(c1),"+r"(c2)     \
-                : "d"(t2),"g"(0)        \
-                : "cc");                \
+        asm ("addq %3,%0; adcq %4,%1; adcq %5,%2"       \
+                : "+r"(c0),"+r"(c1),"+r"(c2)            \
+                : "r"(t1),"r"(t2),"g"(0)                \
+                : "cc");                                \
         } while (0)
 
-#  define sqr_add_c(a,i,c0,c1,c2) do {    \
+#  define sqr_add_c(a,i,c0,c1,c2) do {  \
+        BN_ULONG t1,t2;                 \
         asm ("mulq %2"                  \
                 : "=a"(t1),"=d"(t2)     \
                 : "a"(a[i])             \
                 : "cc");                \
-        asm ("addq %2,%0; adcq %3,%1"   \
-                : "+r"(c0),"+d"(t2)     \
-                : "a"(t1),"g"(0)        \
-                : "cc");                \
-        asm ("addq %2,%0; adcq %3,%1"   \
-                : "+r"(c1),"+r"(c2)     \
-                : "d"(t2),"g"(0)        \
-                : "cc");                \
+        asm ("addq %3,%0; adcq %4,%1; adcq %5,%2"       \
+                : "+r"(c0),"+r"(c1),"+r"(c2)            \
+                : "r"(t1),"r"(t2),"g"(0)                \
+                : "cc");                                \
         } while (0)
 
-#  define mul_add_c2(a,b,c0,c1,c2) do {   \
+#  define mul_add_c2(a,b,c0,c1,c2) do { \
+        BN_ULONG t1,t2;                 \
         asm ("mulq %3"                  \
                 : "=a"(t1),"=d"(t2)     \
                 : "a"(a),"m"(b)         \
@@ -382,7 +388,6 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
 
 void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
 {
-    BN_ULONG t1, t2;
     BN_ULONG c1, c2, c3;
 
     c1 = 0;
@@ -486,7 +491,6 @@ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
 
 void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
 {
-    BN_ULONG t1, t2;
     BN_ULONG c1, c2, c3;
 
     c1 = 0;
@@ -526,7 +530,6 @@ void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
 
 void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
 {
-    BN_ULONG t1, t2;
     BN_ULONG c1, c2, c3;
 
     c1 = 0;
@@ -602,7 +605,6 @@ void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
 
 void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
 {
-    BN_ULONG t1, t2;
     BN_ULONG c1, c2, c3;
 
     c1 = 0;
index 17fb94c..2989b58 100755 (executable)
 # to *initial* version of this module from 2005 is ~0%/30%/40%/45%
 # for 512-/1024-/2048-/4096-bit RSA *sign* benchmarks respectively.
 
+# June 2013.
+#
+# Optimize reduction in squaring procedure and improve 1024+-bit RSA
+# sign performance by 10-16% on Intel Sandy Bridge and later
+# (virtually same on non-Intel processors).
+
+# August 2013.
+#
+# Add MULX/ADOX/ADCX code path.
+
 $flavour = shift;
 $output  = shift;
 if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
@@ -43,6 +53,21 @@ die "can't locate x86_64-xlate.pl";
 open OUT,"| \"$^X\" $xlate $flavour $output";
 *STDOUT=*OUT;
 
+if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
+               =~ /GNU assembler version ([2-9]\.[0-9]+)/) {
+       $addx = ($1>=2.23);
+}
+
+if (!$addx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
+           `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) {
+       $addx = ($1>=2.10);
+}
+
+if (!$addx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
+           `ml64 2>&1` =~ /Version ([0-9]+)\./) {
+       $addx = ($1>=12);
+}
+
 # int bn_mul_mont(
 $rp="%rdi";    # BN_ULONG *rp,
 $ap="%rsi";    # const BN_ULONG *ap,
@@ -61,6 +86,8 @@ $m1="%rbp";
 $code=<<___;
 .text
 
+.extern        OPENSSL_ia32cap_P
+
 .globl bn_mul_mont
 .type  bn_mul_mont,\@function,6
 .align 16
@@ -69,9 +96,16 @@ bn_mul_mont:
        jnz     .Lmul_enter
        cmp     \$8,${num}d
        jb      .Lmul_enter
+___
+$code.=<<___ if ($addx);
+       mov     OPENSSL_ia32cap_P+8(%rip),%r11d
+___
+$code.=<<___;
        cmp     $ap,$bp
        jne     .Lmul4x_enter
-       jmp     .Lsqr4x_enter
+       test    \$7,${num}d
+       jz      .Lsqr8x_enter
+       jmp     .Lmul4x_enter
 
 .align 16
 .Lmul_enter:
@@ -227,7 +261,7 @@ $code.=<<___;
 
        lea     1($i),$i                # i++
        cmp     $num,$i
-       jl      .Louter
+       jb      .Louter
 
        xor     $i,$i                   # i=0 and clear CF!
        mov     (%rsp),%rax             # tp[0]
@@ -280,6 +314,13 @@ $code.=<<___;
 .align 16
 bn_mul4x_mont:
 .Lmul4x_enter:
+___
+$code.=<<___ if ($addx);
+       and     \$0x80100,%r11d
+       cmp     \$0x80100,%r11d
+       je      .Lmulx4x_enter
+___
+$code.=<<___;
        push    %rbx
        push    %rbp
        push    %r12
@@ -401,7 +442,7 @@ $code.=<<___;
        mov     $N[1],-32(%rsp,$j,8)    # tp[j-1]
        mov     %rdx,$N[0]
        cmp     $num,$j
-       jl      .L1st4x
+       jb      .L1st4x
 
        mulq    $m0                     # ap[j]*bp[0]
        add     %rax,$A[0]
@@ -549,7 +590,7 @@ $code.=<<___;
        mov     $N[1],-32(%rsp,$j,8)    # tp[j-1]
        mov     %rdx,$N[0]
        cmp     $num,$j
-       jl      .Linner4x
+       jb      .Linner4x
 
        mulq    $m0                     # ap[j]*bp[i]
        add     %rax,$A[0]
@@ -595,7 +636,7 @@ $code.=<<___;
        mov     $N[1],(%rsp,$j,8)       # store upmost overflow bit
 
        cmp     $num,$i
-       jl      .Louter4x
+       jb      .Louter4x
 ___
 {
 my @ri=("%rax","%rdx",$m0,$m1);
@@ -688,25 +729,30 @@ ___
 }}}
 \f{{{
 ######################################################################
-# void bn_sqr4x_mont(
+# void bn_sqr8x_mont(
 my $rptr="%rdi";       # const BN_ULONG *rptr,
 my $aptr="%rsi";       # const BN_ULONG *aptr,
 my $bptr="%rdx";       # not used
 my $nptr="%rcx";       # const BN_ULONG *nptr,
 my $n0  ="%r8";                # const BN_ULONG *n0);
-my $num ="%r9";                # int num, has to be divisible by 4 and
-                       # not less than 8
+my $num ="%r9";                # int num, has to be divisible by 8
 
 my ($i,$j,$tptr)=("%rbp","%rcx",$rptr);
 my @A0=("%r10","%r11");
 my @A1=("%r12","%r13");
 my ($a0,$a1,$ai)=("%r14","%r15","%rbx");
 
+$code.=<<___   if ($addx);
+.extern        bn_sqrx8x_internal              # see x86_64-mont5 module
+___
 $code.=<<___;
-.type  bn_sqr4x_mont,\@function,6
-.align 16
-bn_sqr4x_mont:
-.Lsqr4x_enter:
+.extern        bn_sqr8x_internal               # see x86_64-mont5 module
+
+.type  bn_sqr8x_mont,\@function,6
+.align 32
+bn_sqr8x_mont:
+.Lsqr8x_enter:
+       mov     %rsp,%rax
        push    %rbx
        push    %rbp
        push    %r12
@@ -714,787 +760,445 @@ bn_sqr4x_mont:
        push    %r14
        push    %r15
 
+       mov     ${num}d,%r10d
        shl     \$3,${num}d             # convert $num to bytes
-       xor     %r10,%r10
-       mov     %rsp,%r11               # put aside %rsp
-       sub     $num,%r10               # -$num
-       mov     ($n0),$n0               # *n0
-       lea     -72(%rsp,%r10,2),%rsp   # alloca(frame+2*$num)
-       and     \$-1024,%rsp            # minimize TLB usage
-       ##############################################################
-       # Stack layout
-       #
-       # +0    saved $num, used in reduction section
-       # +8    &t[2*$num], used in reduction section
-       # +32   saved $rptr
-       # +40   saved $nptr
-       # +48   saved *n0
-       # +56   saved %rsp
-       # +64   t[2*$num]
-       #
-       mov     $rptr,32(%rsp)          # save $rptr
-       mov     $nptr,40(%rsp)
-       mov     $n0,  48(%rsp)
-       mov     %r11, 56(%rsp)          # save original %rsp
-.Lsqr4x_body:
+       shl     \$3+2,%r10              # 4*$num
+       neg     $num
+
        ##############################################################
-       # Squaring part:
-       #
-       # a) multiply-n-add everything but a[i]*a[i];
-       # b) shift result of a) by 1 to the left and accumulate
-       #    a[i]*a[i] products;
+       # ensure that stack frame doesn't alias with $aptr modulo
+       # 4096. this is done to allow memory disambiguation logic
+       # do its job.
        #
-       lea     32(%r10),$i             # $i=-($num-32)
-       lea     ($aptr,$num),$aptr      # end of a[] buffer, ($aptr,$i)=&ap[2]
-
-       mov     $num,$j                 # $j=$num
-
-                                       # comments apply to $num==8 case
-       mov     -32($aptr,$i),$a0       # a[0]
-       lea     64(%rsp,$num,2),$tptr   # end of tp[] buffer, &tp[2*$num]
-       mov     -24($aptr,$i),%rax      # a[1]
-       lea     -32($tptr,$i),$tptr     # end of tp[] window, &tp[2*$num-"$i"]
-       mov     -16($aptr,$i),$ai       # a[2]
-       mov     %rax,$a1
-
-       mul     $a0                     # a[1]*a[0]
-       mov     %rax,$A0[0]             # a[1]*a[0]
-        mov    $ai,%rax                # a[2]
-       mov     %rdx,$A0[1]
-       mov     $A0[0],-24($tptr,$i)    # t[1]
-
-       xor     $A0[0],$A0[0]
-       mul     $a0                     # a[2]*a[0]
-       add     %rax,$A0[1]
-        mov    $ai,%rax
-       adc     %rdx,$A0[0]
-       mov     $A0[1],-16($tptr,$i)    # t[2]
-
-       lea     -16($i),$j              # j=-16
-
-
-        mov    8($aptr,$j),$ai         # a[3]
-       mul     $a1                     # a[2]*a[1]
-       mov     %rax,$A1[0]             # a[2]*a[1]+t[3]
-        mov    $ai,%rax
-       mov     %rdx,$A1[1]
-
-       xor     $A0[1],$A0[1]
-       add     $A1[0],$A0[0]
-        lea    16($j),$j
-       adc     \$0,$A0[1]
-       mul     $a0                     # a[3]*a[0]
-       add     %rax,$A0[0]             # a[3]*a[0]+a[2]*a[1]+t[3]
-        mov    $ai,%rax
-       adc     %rdx,$A0[1]
-       mov     $A0[0],-8($tptr,$j)     # t[3]
-       jmp     .Lsqr4x_1st
+       lea     -64(%rsp,$num,4),%r11
+       mov     ($n0),$n0               # *n0
+       sub     $aptr,%r11
+       and     \$4095,%r11
+       cmp     %r11,%r10
+       jb      .Lsqr8x_sp_alt
+       sub     %r11,%rsp               # align with $aptr
+       lea     -64(%rsp,$num,4),%rsp   # alloca(frame+4*$num)
+       jmp     .Lsqr8x_sp_done
+
+.align 32
+.Lsqr8x_sp_alt:
+       lea     4096-64(,$num,4),%r10   # 4096-frame-4*$num
+       lea     -64(%rsp,$num,4),%rsp   # alloca(frame+4*$num)
+       sub     %r10,%r11
+       mov     \$0,%r10
+       cmovc   %r10,%r11
+       sub     %r11,%rsp
+.Lsqr8x_sp_done:
+       and     \$-64,%rsp
+       mov     $num,%r10       
+       neg     $num
+
+       lea     64(%rsp,$num,2),%r11    # copy of modulus
+       mov     $n0,  32(%rsp)
+       mov     %rax, 40(%rsp)          # save original %rsp
+.Lsqr8x_body:
+
+       mov     $num,$i
+       movq    %r11, %xmm2             # save pointer to modulus copy
+       shr     \$3+2,$i
+       mov     OPENSSL_ia32cap_P+8(%rip),%eax
+       jmp     .Lsqr8x_copy_n
+
+.align 32
+.Lsqr8x_copy_n:
+       movq    8*0($nptr),%xmm0
+       movq    8*1($nptr),%xmm1
+       movq    8*2($nptr),%xmm3
+       movq    8*3($nptr),%xmm4
+       lea     8*4($nptr),$nptr
+       movdqa  %xmm0,16*0(%r11)
+       movdqa  %xmm1,16*1(%r11)
+       movdqa  %xmm3,16*2(%r11)
+       movdqa  %xmm4,16*3(%r11)
+       lea     16*4(%r11),%r11
+       dec     $i
+       jnz     .Lsqr8x_copy_n
 
-.align 16
-.Lsqr4x_1st:
-        mov    ($aptr,$j),$ai          # a[4]
-       xor     $A1[0],$A1[0]
-       mul     $a1                     # a[3]*a[1]
-       add     %rax,$A1[1]             # a[3]*a[1]+t[4]
-        mov    $ai,%rax
-       adc     %rdx,$A1[0]
-
-       xor     $A0[0],$A0[0]
-       add     $A1[1],$A0[1]
-       adc     \$0,$A0[0]
-       mul     $a0                     # a[4]*a[0]
-       add     %rax,$A0[1]             # a[4]*a[0]+a[3]*a[1]+t[4]
-        mov    $ai,%rax                # a[3]
-       adc     %rdx,$A0[0]
-       mov     $A0[1],($tptr,$j)       # t[4]
-
-
-        mov    8($aptr,$j),$ai         # a[5]
-       xor     $A1[1],$A1[1]
-       mul     $a1                     # a[4]*a[3]
-       add     %rax,$A1[0]             # a[4]*a[3]+t[5]
-        mov    $ai,%rax
-       adc     %rdx,$A1[1]
-
-       xor     $A0[1],$A0[1]
-       add     $A1[0],$A0[0]
-       adc     \$0,$A0[1]
-       mul     $a0                     # a[5]*a[2]
-       add     %rax,$A0[0]             # a[5]*a[2]+a[4]*a[3]+t[5]
-        mov    $ai,%rax
-       adc     %rdx,$A0[1]
-       mov     $A0[0],8($tptr,$j)      # t[5]
-
-        mov    16($aptr,$j),$ai        # a[6]
-       xor     $A1[0],$A1[0]
-       mul     $a1                     # a[5]*a[3]
-       add     %rax,$A1[1]             # a[5]*a[3]+t[6]
-        mov    $ai,%rax
-       adc     %rdx,$A1[0]
-
-       xor     $A0[0],$A0[0]
-       add     $A1[1],$A0[1]
-       adc     \$0,$A0[0]
-       mul     $a0                     # a[6]*a[2]
-       add     %rax,$A0[1]             # a[6]*a[2]+a[5]*a[3]+t[6]
-        mov    $ai,%rax                # a[3]
-       adc     %rdx,$A0[0]
-       mov     $A0[1],16($tptr,$j)     # t[6]
-
-
-        mov    24($aptr,$j),$ai        # a[7]
-       xor     $A1[1],$A1[1]
-       mul     $a1                     # a[6]*a[5]
-       add     %rax,$A1[0]             # a[6]*a[5]+t[7]
-        mov    $ai,%rax
-       adc     %rdx,$A1[1]
-
-       xor     $A0[1],$A0[1]
-       add     $A1[0],$A0[0]
-        lea    32($j),$j
-       adc     \$0,$A0[1]
-       mul     $a0                     # a[7]*a[4]
-       add     %rax,$A0[0]             # a[7]*a[4]+a[6]*a[5]+t[6]
-        mov    $ai,%rax
-       adc     %rdx,$A0[1]
-       mov     $A0[0],-8($tptr,$j)     # t[7]
-
-       cmp     \$0,$j
-       jne     .Lsqr4x_1st
-
-       xor     $A1[0],$A1[0]
-       add     $A0[1],$A1[1]
-       adc     \$0,$A1[0]
-       mul     $a1                     # a[7]*a[5]
-       add     %rax,$A1[1]
-       adc     %rdx,$A1[0]
-
-       mov     $A1[1],($tptr)          # t[8]
-       lea     16($i),$i
-       mov     $A1[0],8($tptr)         # t[9]
-       jmp     .Lsqr4x_outer
+       pxor    %xmm0,%xmm0
+       movq    $rptr,%xmm1             # save $rptr
+       movq    %r10, %xmm3             # -$num
+___
+$code.=<<___ if ($addx);
+       and     \$0x80100,%eax
+       cmp     \$0x80100,%eax
+       jne     .Lsqr8x_nox
 
-.align 16
-.Lsqr4x_outer:                         # comments apply to $num==6 case
-       mov     -32($aptr,$i),$a0       # a[0]
-       lea     64(%rsp,$num,2),$tptr   # end of tp[] buffer, &tp[2*$num]
-       mov     -24($aptr,$i),%rax      # a[1]
-       lea     -32($tptr,$i),$tptr     # end of tp[] window, &tp[2*$num-"$i"]
-       mov     -16($aptr,$i),$ai       # a[2]
-       mov     %rax,$a1
-
-       mov     -24($tptr,$i),$A0[0]    # t[1]
-       xor     $A0[1],$A0[1]
-       mul     $a0                     # a[1]*a[0]
-       add     %rax,$A0[0]             # a[1]*a[0]+t[1]
-        mov    $ai,%rax                # a[2]
-       adc     %rdx,$A0[1]
-       mov     $A0[0],-24($tptr,$i)    # t[1]
-
-       xor     $A0[0],$A0[0]
-       add     -16($tptr,$i),$A0[1]    # a[2]*a[0]+t[2]
-       adc     \$0,$A0[0]
-       mul     $a0                     # a[2]*a[0]
-       add     %rax,$A0[1]
-        mov    $ai,%rax
-       adc     %rdx,$A0[0]
-       mov     $A0[1],-16($tptr,$i)    # t[2]
-
-       lea     -16($i),$j              # j=-16
-       xor     $A1[0],$A1[0]
-
-
-        mov    8($aptr,$j),$ai         # a[3]
-       xor     $A1[1],$A1[1]
-       add     8($tptr,$j),$A1[0]
-       adc     \$0,$A1[1]
-       mul     $a1                     # a[2]*a[1]
-       add     %rax,$A1[0]             # a[2]*a[1]+t[3]
-        mov    $ai,%rax
-       adc     %rdx,$A1[1]
-
-       xor     $A0[1],$A0[1]
-       add     $A1[0],$A0[0]
-       adc     \$0,$A0[1]
-       mul     $a0                     # a[3]*a[0]
-       add     %rax,$A0[0]             # a[3]*a[0]+a[2]*a[1]+t[3]
-        mov    $ai,%rax
-       adc     %rdx,$A0[1]
-       mov     $A0[0],8($tptr,$j)      # t[3]
-
-       lea     16($j),$j
-       jmp     .Lsqr4x_inner
+       call    bn_sqrx8x_internal      # see x86_64-mont5 module
 
-.align 16
-.Lsqr4x_inner:
-        mov    ($aptr,$j),$ai          # a[4]
-       xor     $A1[0],$A1[0]
-       add     ($tptr,$j),$A1[1]
-       adc     \$0,$A1[0]
-       mul     $a1                     # a[3]*a[1]
-       add     %rax,$A1[1]             # a[3]*a[1]+t[4]
-        mov    $ai,%rax
-       adc     %rdx,$A1[0]
-
-       xor     $A0[0],$A0[0]
-       add     $A1[1],$A0[1]
-       adc     \$0,$A0[0]
-       mul     $a0                     # a[4]*a[0]
-       add     %rax,$A0[1]             # a[4]*a[0]+a[3]*a[1]+t[4]
-        mov    $ai,%rax                # a[3]
-       adc     %rdx,$A0[0]
-       mov     $A0[1],($tptr,$j)       # t[4]
-
-        mov    8($aptr,$j),$ai         # a[5]
-       xor     $A1[1],$A1[1]
-       add     8($tptr,$j),$A1[0]
-       adc     \$0,$A1[1]
-       mul     $a1                     # a[4]*a[3]
-       add     %rax,$A1[0]             # a[4]*a[3]+t[5]
-        mov    $ai,%rax
-       adc     %rdx,$A1[1]
-
-       xor     $A0[1],$A0[1]
-       add     $A1[0],$A0[0]
-       lea     16($j),$j               # j++
-       adc     \$0,$A0[1]
-       mul     $a0                     # a[5]*a[2]
-       add     %rax,$A0[0]             # a[5]*a[2]+a[4]*a[3]+t[5]
-        mov    $ai,%rax
-       adc     %rdx,$A0[1]
-       mov     $A0[0],-8($tptr,$j)     # t[5], "preloaded t[1]" below
-
-       cmp     \$0,$j
-       jne     .Lsqr4x_inner
-
-       xor     $A1[0],$A1[0]
-       add     $A0[1],$A1[1]
-       adc     \$0,$A1[0]
-       mul     $a1                     # a[5]*a[3]
-       add     %rax,$A1[1]
-       adc     %rdx,$A1[0]
-
-       mov     $A1[1],($tptr)          # t[6], "preloaded t[2]" below
-       mov     $A1[0],8($tptr)         # t[7], "preloaded t[3]" below
-
-       add     \$16,$i
-       jnz     .Lsqr4x_outer
-
-                                       # comments apply to $num==4 case
-       mov     -32($aptr),$a0          # a[0]
-       lea     64(%rsp,$num,2),$tptr   # end of tp[] buffer, &tp[2*$num]
-       mov     -24($aptr),%rax         # a[1]
-       lea     -32($tptr,$i),$tptr     # end of tp[] window, &tp[2*$num-"$i"]
-       mov     -16($aptr),$ai          # a[2]
-       mov     %rax,$a1
-
-       xor     $A0[1],$A0[1]
-       mul     $a0                     # a[1]*a[0]
-       add     %rax,$A0[0]             # a[1]*a[0]+t[1], preloaded t[1]
-        mov    $ai,%rax                # a[2]
-       adc     %rdx,$A0[1]
-       mov     $A0[0],-24($tptr)       # t[1]
-
-       xor     $A0[0],$A0[0]
-       add     $A1[1],$A0[1]           # a[2]*a[0]+t[2], preloaded t[2]
-       adc     \$0,$A0[0]
-       mul     $a0                     # a[2]*a[0]
-       add     %rax,$A0[1]
-        mov    $ai,%rax
-       adc     %rdx,$A0[0]
-       mov     $A0[1],-16($tptr)       # t[2]
-
-        mov    -8($aptr),$ai           # a[3]
-       mul     $a1                     # a[2]*a[1]
-       add     %rax,$A1[0]             # a[2]*a[1]+t[3], preloaded t[3]
-        mov    $ai,%rax
-       adc     \$0,%rdx
-
-       xor     $A0[1],$A0[1]
-       add     $A1[0],$A0[0]
-        mov    %rdx,$A1[1]
-       adc     \$0,$A0[1]
-       mul     $a0                     # a[3]*a[0]
-       add     %rax,$A0[0]             # a[3]*a[0]+a[2]*a[1]+t[3]
-        mov    $ai,%rax
-       adc     %rdx,$A0[1]
-       mov     $A0[0],-8($tptr)        # t[3]
-
-       xor     $A1[0],$A1[0]
-       add     $A0[1],$A1[1]
-       adc     \$0,$A1[0]
-       mul     $a1                     # a[3]*a[1]
-       add     %rax,$A1[1]
-        mov    -16($aptr),%rax         # a[2]
-       adc     %rdx,$A1[0]
-
-       mov     $A1[1],($tptr)          # t[4]
-       mov     $A1[0],8($tptr)         # t[5]
-
-       mul     $ai                     # a[2]*a[3]
+       pxor    %xmm0,%xmm0
+       lea     48(%rsp),%rax
+       lea     64(%rsp,$num,2),%rdx
+       shr     \$3+2,$num
+       mov     40(%rsp),%rsi           # restore %rsp
+       jmp     .Lsqr8x_zero
+
+.align 32
+.Lsqr8x_nox:
 ___
-{
-my ($shift,$carry)=($a0,$a1);
-my @S=(@A1,$ai,$n0);
 $code.=<<___;
-        add    \$16,$i
-        xor    $shift,$shift
-        sub    $num,$i                 # $i=16-$num
-        xor    $carry,$carry
-
-       add     $A1[0],%rax             # t[5]
-       adc     \$0,%rdx
-       mov     %rax,8($tptr)           # t[5]
-       mov     %rdx,16($tptr)          # t[6]
-       mov     $carry,24($tptr)        # t[7]
-
-        mov    -16($aptr,$i),%rax      # a[0]
-       lea     64(%rsp,$num,2),$tptr
-        xor    $A0[0],$A0[0]           # t[0]
-        mov    -24($tptr,$i,2),$A0[1]  # t[1]
-
-       lea     ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift
-       shr     \$63,$A0[0]
-       lea     ($j,$A0[1],2),$S[1]     # t[2*i+1]<<1 |
-       shr     \$63,$A0[1]
-       or      $A0[0],$S[1]            # | t[2*i]>>63
-        mov    -16($tptr,$i,2),$A0[0]  # t[2*i+2]      # prefetch
-       mov     $A0[1],$shift           # shift=t[2*i+1]>>63
-       mul     %rax                    # a[i]*a[i]
-       neg     $carry                  # mov $carry,cf
-        mov    -8($tptr,$i,2),$A0[1]   # t[2*i+2+1]    # prefetch
-       adc     %rax,$S[0]
-        mov    -8($aptr,$i),%rax       # a[i+1]        # prefetch
-       mov     $S[0],-32($tptr,$i,2)
-       adc     %rdx,$S[1]
-
-       lea     ($shift,$A0[0],2),$S[2] # t[2*i]<<1 | shift
-        mov    $S[1],-24($tptr,$i,2)
-        sbb    $carry,$carry           # mov cf,$carry
-       shr     \$63,$A0[0]
-       lea     ($j,$A0[1],2),$S[3]     # t[2*i+1]<<1 |
-       shr     \$63,$A0[1]
-       or      $A0[0],$S[3]            # | t[2*i]>>63
-        mov    0($tptr,$i,2),$A0[0]    # t[2*i+2]      # prefetch
-       mov     $A0[1],$shift           # shift=t[2*i+1]>>63
-       mul     %rax                    # a[i]*a[i]
-       neg     $carry                  # mov $carry,cf
-        mov    8($tptr,$i,2),$A0[1]    # t[2*i+2+1]    # prefetch
-       adc     %rax,$S[2]
-        mov    0($aptr,$i),%rax        # a[i+1]        # prefetch
-       mov     $S[2],-16($tptr,$i,2)
-       adc     %rdx,$S[3]
-       lea     16($i),$i
-       mov     $S[3],-40($tptr,$i,2)
-       sbb     $carry,$carry           # mov cf,$carry
-       jmp     .Lsqr4x_shift_n_add
+       call    bn_sqr8x_internal       # see x86_64-mont5 module
 
-.align 16
-.Lsqr4x_shift_n_add:
-       lea     ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift
-       shr     \$63,$A0[0]
-       lea     ($j,$A0[1],2),$S[1]     # t[2*i+1]<<1 |
-       shr     \$63,$A0[1]
-       or      $A0[0],$S[1]            # | t[2*i]>>63
-        mov    -16($tptr,$i,2),$A0[0]  # t[2*i+2]      # prefetch
-       mov     $A0[1],$shift           # shift=t[2*i+1]>>63
-       mul     %rax                    # a[i]*a[i]
-       neg     $carry                  # mov $carry,cf
-        mov    -8($tptr,$i,2),$A0[1]   # t[2*i+2+1]    # prefetch
-       adc     %rax,$S[0]
-        mov    -8($aptr,$i),%rax       # a[i+1]        # prefetch
-       mov     $S[0],-32($tptr,$i,2)
-       adc     %rdx,$S[1]
-
-       lea     ($shift,$A0[0],2),$S[2] # t[2*i]<<1 | shift
-        mov    $S[1],-24($tptr,$i,2)
-        sbb    $carry,$carry           # mov cf,$carry
-       shr     \$63,$A0[0]
-       lea     ($j,$A0[1],2),$S[3]     # t[2*i+1]<<1 |
-       shr     \$63,$A0[1]
-       or      $A0[0],$S[3]            # | t[2*i]>>63
-        mov    0($tptr,$i,2),$A0[0]    # t[2*i+2]      # prefetch
-       mov     $A0[1],$shift           # shift=t[2*i+1]>>63
-       mul     %rax                    # a[i]*a[i]
-       neg     $carry                  # mov $carry,cf
-        mov    8($tptr,$i,2),$A0[1]    # t[2*i+2+1]    # prefetch
-       adc     %rax,$S[2]
-        mov    0($aptr,$i),%rax        # a[i+1]        # prefetch
-       mov     $S[2],-16($tptr,$i,2)
-       adc     %rdx,$S[3]
-
-       lea     ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift
-        mov    $S[3],-8($tptr,$i,2)
-        sbb    $carry,$carry           # mov cf,$carry
-       shr     \$63,$A0[0]
-       lea     ($j,$A0[1],2),$S[1]     # t[2*i+1]<<1 |
-       shr     \$63,$A0[1]
-       or      $A0[0],$S[1]            # | t[2*i]>>63
-        mov    16($tptr,$i,2),$A0[0]   # t[2*i+2]      # prefetch
-       mov     $A0[1],$shift           # shift=t[2*i+1]>>63
-       mul     %rax                    # a[i]*a[i]
-       neg     $carry                  # mov $carry,cf
-        mov    24($tptr,$i,2),$A0[1]   # t[2*i+2+1]    # prefetch
-       adc     %rax,$S[0]
-        mov    8($aptr,$i),%rax        # a[i+1]        # prefetch
-       mov     $S[0],0($tptr,$i,2)
-       adc     %rdx,$S[1]
-
-       lea     ($shift,$A0[0],2),$S[2] # t[2*i]<<1 | shift
-        mov    $S[1],8($tptr,$i,2)
-        sbb    $carry,$carry           # mov cf,$carry
-       shr     \$63,$A0[0]
-       lea     ($j,$A0[1],2),$S[3]     # t[2*i+1]<<1 |
-       shr     \$63,$A0[1]
-       or      $A0[0],$S[3]            # | t[2*i]>>63
-        mov    32($tptr,$i,2),$A0[0]   # t[2*i+2]      # prefetch
-       mov     $A0[1],$shift           # shift=t[2*i+1]>>63
-       mul     %rax                    # a[i]*a[i]
-       neg     $carry                  # mov $carry,cf
-        mov    40($tptr,$i,2),$A0[1]   # t[2*i+2+1]    # prefetch
-       adc     %rax,$S[2]
-        mov    16($aptr,$i),%rax       # a[i+1]        # prefetch
-       mov     $S[2],16($tptr,$i,2)
-       adc     %rdx,$S[3]
-       mov     $S[3],24($tptr,$i,2)
-       sbb     $carry,$carry           # mov cf,$carry
-       add     \$32,$i
-       jnz     .Lsqr4x_shift_n_add
-
-       lea     ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift
-       shr     \$63,$A0[0]
-       lea     ($j,$A0[1],2),$S[1]     # t[2*i+1]<<1 |
-       shr     \$63,$A0[1]
-       or      $A0[0],$S[1]            # | t[2*i]>>63
-        mov    -16($tptr),$A0[0]       # t[2*i+2]      # prefetch
-       mov     $A0[1],$shift           # shift=t[2*i+1]>>63
-       mul     %rax                    # a[i]*a[i]
-       neg     $carry                  # mov $carry,cf
-        mov    -8($tptr),$A0[1]        # t[2*i+2+1]    # prefetch
-       adc     %rax,$S[0]
-        mov    -8($aptr),%rax          # a[i+1]        # prefetch
-       mov     $S[0],-32($tptr)
-       adc     %rdx,$S[1]
-
-       lea     ($shift,$A0[0],2),$S[2] # t[2*i]<<1|shift
-        mov    $S[1],-24($tptr)
-        sbb    $carry,$carry           # mov cf,$carry
-       shr     \$63,$A0[0]
-       lea     ($j,$A0[1],2),$S[3]     # t[2*i+1]<<1 |
-       shr     \$63,$A0[1]
-       or      $A0[0],$S[3]            # | t[2*i]>>63
-       mul     %rax                    # a[i]*a[i]
-       neg     $carry                  # mov $carry,cf
-       adc     %rax,$S[2]
-       adc     %rdx,$S[3]
-       mov     $S[2],-16($tptr)
-       mov     $S[3],-8($tptr)
-___
-}\f
-##############################################################
-# Montgomery reduction part, "word-by-word" algorithm.
-#
-{
-my ($topbit,$nptr)=("%rbp",$aptr);
-my ($m0,$m1)=($a0,$a1);
-my @Ni=("%rbx","%r9");
-$code.=<<___;
-       mov     40(%rsp),$nptr          # restore $nptr
-       mov     48(%rsp),$n0            # restore *n0
-       xor     $j,$j
-       mov     $num,0(%rsp)            # save $num
-       sub     $num,$j                 # $j=-$num
-        mov    64(%rsp),$A0[0]         # t[0]          # modsched #
-        mov    $n0,$m0                 #               # modsched #
-       lea     64(%rsp,$num,2),%rax    # end of t[] buffer
-       lea     64(%rsp,$num),$tptr     # end of t[] window
-       mov     %rax,8(%rsp)            # save end of t[] buffer
-       lea     ($nptr,$num),$nptr      # end of n[] buffer
-       xor     $topbit,$topbit         # $topbit=0
-
-       mov     0($nptr,$j),%rax        # n[0]          # modsched #
-       mov     8($nptr,$j),$Ni[1]      # n[1]          # modsched #
-        imulq  $A0[0],$m0              # m0=t[0]*n0    # modsched #
-        mov    %rax,$Ni[0]             #               # modsched #
-       jmp     .Lsqr4x_mont_outer
+       pxor    %xmm0,%xmm0
+       lea     48(%rsp),%rax
+       lea     64(%rsp,$num,2),%rdx
+       shr     \$3+2,$num
+       mov     40(%rsp),%rsi           # restore %rsp
+       jmp     .Lsqr8x_zero
+
+.align 32
+.Lsqr8x_zero:
+       movdqa  %xmm0,16*0(%rax)        # wipe t
+       movdqa  %xmm0,16*1(%rax)
+       movdqa  %xmm0,16*2(%rax)
+       movdqa  %xmm0,16*3(%rax)
+       lea     16*4(%rax),%rax
+       movdqa  %xmm0,16*0(%rdx)        # wipe n
+       movdqa  %xmm0,16*1(%rdx)
+       movdqa  %xmm0,16*2(%rdx)
+       movdqa  %xmm0,16*3(%rdx)
+       lea     16*4(%rdx),%rdx
+       dec     $num
+       jnz     .Lsqr8x_zero
 
-.align 16
-.Lsqr4x_mont_outer:
-       xor     $A0[1],$A0[1]
-       mul     $m0                     # n[0]*m0
-       add     %rax,$A0[0]             # n[0]*m0+t[0]
-        mov    $Ni[1],%rax
-       adc     %rdx,$A0[1]
-       mov     $n0,$m1
+       mov     \$1,%rax
+       mov     -48(%rsi),%r15
+       mov     -40(%rsi),%r14
+       mov     -32(%rsi),%r13
+       mov     -24(%rsi),%r12
+       mov     -16(%rsi),%rbp
+       mov     -8(%rsi),%rbx
+       lea     (%rsi),%rsp
+.Lsqr8x_epilogue:
+       ret
+.size  bn_sqr8x_mont,.-bn_sqr8x_mont
+___
+}}}
+\f
+if ($addx) {{{
+my $bp="%rdx"; # original value
 
-       xor     $A0[0],$A0[0]
-       add     8($tptr,$j),$A0[1]
-       adc     \$0,$A0[0]
-       mul     $m0                     # n[1]*m0
-       add     %rax,$A0[1]             # n[1]*m0+t[1]
-        mov    $Ni[0],%rax
-       adc     %rdx,$A0[0]
-
-       imulq   $A0[1],$m1
-
-       mov     16($nptr,$j),$Ni[0]     # n[2]
-       xor     $A1[1],$A1[1]
-       add     $A0[1],$A1[0]
-       adc     \$0,$A1[1]
-       mul     $m1                     # n[0]*m1
-       add     %rax,$A1[0]             # n[0]*m1+"t[1]"
-        mov    $Ni[0],%rax
-       adc     %rdx,$A1[1]
-       mov     $A1[0],8($tptr,$j)      # "t[1]"
-
-       xor     $A0[1],$A0[1]
-       add     16($tptr,$j),$A0[0]
-       adc     \$0,$A0[1]
-       mul     $m0                     # n[2]*m0
-       add     %rax,$A0[0]             # n[2]*m0+t[2]
-        mov    $Ni[1],%rax
-       adc     %rdx,$A0[1]
-
-       mov     24($nptr,$j),$Ni[1]     # n[3]
-       xor     $A1[0],$A1[0]
-       add     $A0[0],$A1[1]
-       adc     \$0,$A1[0]
-       mul     $m1                     # n[1]*m1
-       add     %rax,$A1[1]             # n[1]*m1+"t[2]"
-        mov    $Ni[1],%rax
-       adc     %rdx,$A1[0]
-       mov     $A1[1],16($tptr,$j)     # "t[2]"
-
-       xor     $A0[0],$A0[0]
-       add     24($tptr,$j),$A0[1]
-       lea     32($j),$j
-       adc     \$0,$A0[0]
-       mul     $m0                     # n[3]*m0
-       add     %rax,$A0[1]             # n[3]*m0+t[3]
-        mov    $Ni[0],%rax
-       adc     %rdx,$A0[0]
-       jmp     .Lsqr4x_mont_inner
+$code.=<<___;
+.type  bn_mulx4x_mont,\@function,6
+.align 32
+bn_mulx4x_mont:
+.Lmulx4x_enter:
+       mov     %rsp,%rax
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
 
-.align 16
-.Lsqr4x_mont_inner:
-       mov     ($nptr,$j),$Ni[0]       # n[4]
-       xor     $A1[1],$A1[1]
-       add     $A0[1],$A1[0]
-       adc     \$0,$A1[1]
-       mul     $m1                     # n[2]*m1
-       add     %rax,$A1[0]             # n[2]*m1+"t[3]"
-        mov    $Ni[0],%rax
-       adc     %rdx,$A1[1]
-       mov     $A1[0],-8($tptr,$j)     # "t[3]"
-
-       xor     $A0[1],$A0[1]
-       add     ($tptr,$j),$A0[0]
-       adc     \$0,$A0[1]
-       mul     $m0                     # n[4]*m0
-       add     %rax,$A0[0]             # n[4]*m0+t[4]
-        mov    $Ni[1],%rax
-       adc     %rdx,$A0[1]
-
-       mov     8($nptr,$j),$Ni[1]      # n[5]
-       xor     $A1[0],$A1[0]
-       add     $A0[0],$A1[1]
-       adc     \$0,$A1[0]
-       mul     $m1                     # n[3]*m1
-       add     %rax,$A1[1]             # n[3]*m1+"t[4]"
-        mov    $Ni[1],%rax
-       adc     %rdx,$A1[0]
-       mov     $A1[1],($tptr,$j)       # "t[4]"
-
-       xor     $A0[0],$A0[0]
-       add     8($tptr,$j),$A0[1]
-       adc     \$0,$A0[0]
-       mul     $m0                     # n[5]*m0
-       add     %rax,$A0[1]             # n[5]*m0+t[5]
-        mov    $Ni[0],%rax
-       adc     %rdx,$A0[0]
-
-
-       mov     16($nptr,$j),$Ni[0]     # n[6]
-       xor     $A1[1],$A1[1]
-       add     $A0[1],$A1[0]
-       adc     \$0,$A1[1]
-       mul     $m1                     # n[4]*m1
-       add     %rax,$A1[0]             # n[4]*m1+"t[5]"
-        mov    $Ni[0],%rax
-       adc     %rdx,$A1[1]
-       mov     $A1[0],8($tptr,$j)      # "t[5]"
-
-       xor     $A0[1],$A0[1]
-       add     16($tptr,$j),$A0[0]
-       adc     \$0,$A0[1]
-       mul     $m0                     # n[6]*m0
-       add     %rax,$A0[0]             # n[6]*m0+t[6]
-        mov    $Ni[1],%rax
-       adc     %rdx,$A0[1]
-
-       mov     24($nptr,$j),$Ni[1]     # n[7]
-       xor     $A1[0],$A1[0]
-       add     $A0[0],$A1[1]
-       adc     \$0,$A1[0]
-       mul     $m1                     # n[5]*m1
-       add     %rax,$A1[1]             # n[5]*m1+"t[6]"
-        mov    $Ni[1],%rax
-       adc     %rdx,$A1[0]
-       mov     $A1[1],16($tptr,$j)     # "t[6]"
-
-       xor     $A0[0],$A0[0]
-       add     24($tptr,$j),$A0[1]
-       lea     32($j),$j
-       adc     \$0,$A0[0]
-       mul     $m0                     # n[7]*m0
-       add     %rax,$A0[1]             # n[7]*m0+t[7]
-        mov    $Ni[0],%rax
-       adc     %rdx,$A0[0]
-       cmp     \$0,$j
-       jne     .Lsqr4x_mont_inner
-
-        sub    0(%rsp),$j              # $j=-$num      # modsched #
-        mov    $n0,$m0                 #               # modsched #
-
-       xor     $A1[1],$A1[1]
-       add     $A0[1],$A1[0]
-       adc     \$0,$A1[1]
-       mul     $m1                     # n[6]*m1
-       add     %rax,$A1[0]             # n[6]*m1+"t[7]"
-       mov     $Ni[1],%rax
-       adc     %rdx,$A1[1]
-       mov     $A1[0],-8($tptr)        # "t[7]"
-
-       xor     $A0[1],$A0[1]
-       add     ($tptr),$A0[0]          # +t[8]
-       adc     \$0,$A0[1]
-        mov    0($nptr,$j),$Ni[0]      # n[0]          # modsched #
-       add     $topbit,$A0[0]
-       adc     \$0,$A0[1]
-
-        imulq  16($tptr,$j),$m0        # m0=t[0]*n0    # modsched #
-       xor     $A1[0],$A1[0]
-        mov    8($nptr,$j),$Ni[1]      # n[1]          # modsched #
-       add     $A0[0],$A1[1]
-        mov    16($tptr,$j),$A0[0]     # t[0]          # modsched #
-       adc     \$0,$A1[0]
-       mul     $m1                     # n[7]*m1
-       add     %rax,$A1[1]             # n[7]*m1+"t[8]"
-        mov    $Ni[0],%rax             #               # modsched #
-       adc     %rdx,$A1[0]
-       mov     $A1[1],($tptr)          # "t[8]"
-
-       xor     $topbit,$topbit
-       add     8($tptr),$A1[0]         # +t[9]
-       adc     $topbit,$topbit
-       add     $A0[1],$A1[0]
-       lea     16($tptr),$tptr         # "t[$num]>>128"
-       adc     \$0,$topbit
-       mov     $A1[0],-8($tptr)        # "t[9]"
-       cmp     8(%rsp),$tptr           # are we done?
-       jb      .Lsqr4x_mont_outer
-
-       mov     0(%rsp),$num            # restore $num
-       mov     $topbit,($tptr)         # save $topbit
+       shl     \$3,${num}d             # convert $num to bytes
+       .byte   0x67
+       xor     %r10,%r10
+       sub     $num,%r10               # -$num
+       mov     ($n0),$n0               # *n0
+       lea     -72(%rsp,%r10),%rsp     # alloca(frame+$num+8)
+       lea     ($bp,$num),%r10
+       and     \$-128,%rsp
+       ##############################################################
+       # Stack layout
+       # +0    num
+       # +8    off-loaded &b[i]
+       # +16   end of b[num]
+       # +24   saved n0
+       # +32   saved rp
+       # +40   saved %rsp
+       # +48   inner counter
+       # +56
+       # +64   tmp[num+1]
+       #
+       mov     $num,0(%rsp)            # save $num
+       shr     \$5,$num
+       mov     %r10,16(%rsp)           # end of b[num]
+       sub     \$1,$num
+       mov     $n0, 24(%rsp)           # save *n0
+       mov     $rp, 32(%rsp)           # save $rp
+       mov     %rax,40(%rsp)           # save original %rsp
+       mov     $num,48(%rsp)           # inner counter
+       jmp     .Lmulx4x_body
+
+.align 32
+.Lmulx4x_body:
 ___
-}\f
-##############################################################
-# Post-condition, 4x unrolled copy from bn_mul_mont
-#
-{
-my ($tptr,$nptr)=("%rbx",$aptr);
-my @ri=("%rax","%rdx","%r10","%r11");
+my ($aptr, $bptr, $nptr, $tptr, $mi,  $bi,  $zero, $num)=
+   ("%rsi","%rdi","%rcx","%rbx","%r8","%r9","%rbp","%rax");
+my $rptr=$bptr;
 $code.=<<___;
-       mov     64(%rsp,$num),@ri[0]    # tp[0]
-       lea     64(%rsp,$num),$tptr     # upper half of t[2*$num] holds result
-       mov     40(%rsp),$nptr          # restore $nptr
-       shr     \$5,$num                # num/4
-       mov     8($tptr),@ri[1]         # t[1]
-       xor     $i,$i                   # i=0 and clear CF!
-
-       mov     32(%rsp),$rptr          # restore $rptr
-       sub     0($nptr),@ri[0]
-       mov     16($tptr),@ri[2]        # t[2]
-       mov     24($tptr),@ri[3]        # t[3]
-       sbb     8($nptr),@ri[1]
-       lea     -1($num),$j             # j=num/4-1
-       jmp     .Lsqr4x_sub
-.align 16
-.Lsqr4x_sub:
-       mov     @ri[0],0($rptr,$i,8)    # rp[i]=tp[i]-np[i]
-       mov     @ri[1],8($rptr,$i,8)    # rp[i]=tp[i]-np[i]
-       sbb     16($nptr,$i,8),@ri[2]
-       mov     32($tptr,$i,8),@ri[0]   # tp[i+1]
-       mov     40($tptr,$i,8),@ri[1]
-       sbb     24($nptr,$i,8),@ri[3]
-       mov     @ri[2],16($rptr,$i,8)   # rp[i]=tp[i]-np[i]
-       mov     @ri[3],24($rptr,$i,8)   # rp[i]=tp[i]-np[i]
-       sbb     32($nptr,$i,8),@ri[0]
-       mov     48($tptr,$i,8),@ri[2]
-       mov     56($tptr,$i,8),@ri[3]
-       sbb     40($nptr,$i,8),@ri[1]
-       lea     4($i),$i                # i++
-       dec     $j                      # doesn't affect CF!
-       jnz     .Lsqr4x_sub
-
-       mov     @ri[0],0($rptr,$i,8)    # rp[i]=tp[i]-np[i]
-       mov     32($tptr,$i,8),@ri[0]   # load overflow bit
-       sbb     16($nptr,$i,8),@ri[2]
-       mov     @ri[1],8($rptr,$i,8)    # rp[i]=tp[i]-np[i]
-       sbb     24($nptr,$i,8),@ri[3]
-       mov     @ri[2],16($rptr,$i,8)   # rp[i]=tp[i]-np[i]
-
-       sbb     \$0,@ri[0]              # handle upmost overflow bit
-       mov     @ri[3],24($rptr,$i,8)   # rp[i]=tp[i]-np[i]
-       xor     $i,$i                   # i=0
-       and     @ri[0],$tptr
-       not     @ri[0]
-       mov     $rptr,$nptr
-       and     @ri[0],$nptr
-       lea     -1($num),$j
-       or      $nptr,$tptr             # tp=borrow?tp:rp
+       lea     8($bp),$bptr
+       mov     ($bp),%rdx              # b[0], $bp==%rdx actually
+       lea     64+32(%rsp),$tptr
+       mov     %rdx,$bi
+
+       mulx    0*8($aptr),$mi,%rax     # a[0]*b[0]
+       mulx    1*8($aptr),%r11,%r14    # a[1]*b[0]
+       add     %rax,%r11
+       mov     $bptr,8(%rsp)           # off-load &b[i]
+       mulx    2*8($aptr),%r12,%r13    # ...
+       adc     %r14,%r12
+       adc     \$0,%r13
+
+       mov     $mi,$bptr               # borrow $bptr
+       imulq   24(%rsp),$mi            # "t[0]"*n0
+       xor     $zero,$zero             # cf=0, of=0
+
+       mulx    3*8($aptr),%rax,%r14
+        mov    $mi,%rdx
+       lea     4*8($aptr),$aptr
+       adcx    %rax,%r13
+       adcx    $zero,%r14              # cf=0
+
+       mulx    0*8($nptr),%rax,%r10
+       adcx    %rax,$bptr              # discarded
+       adox    %r11,%r10
+       mulx    1*8($nptr),%rax,%r11
+       adcx    %rax,%r10
+       adox    %r12,%r11
+       .byte   0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x00    # mulx  2*8($nptr),%rax,%r12
+       mov     48(%rsp),$bptr          # counter value
+       mov     %r10,-4*8($tptr)
+       adcx    %rax,%r11
+       adox    %r13,%r12
+       mulx    3*8($nptr),%rax,%r15
+        mov    $bi,%rdx
+       mov     %r11,-3*8($tptr)
+       adcx    %rax,%r12
+       adox    $zero,%r15              # of=0
+       lea     4*8($nptr),$nptr
+       mov     %r12,-2*8($tptr)
+
+       jmp     .Lmulx4x_1st
+
+.align 32
+.Lmulx4x_1st:
+       adcx    $zero,%r15              # cf=0, modulo-scheduled
+       mulx    0*8($aptr),%r10,%rax    # a[4]*b[0]
+       adcx    %r14,%r10
+       mulx    1*8($aptr),%r11,%r14    # a[5]*b[0]
+       adcx    %rax,%r11
+       mulx    2*8($aptr),%r12,%rax    # ...
+       adcx    %r14,%r12
+       mulx    3*8($aptr),%r13,%r14
+        .byte  0x67,0x67
+        mov    $mi,%rdx
+       adcx    %rax,%r13
+       adcx    $zero,%r14              # cf=0
+       lea     4*8($aptr),$aptr
+       lea     4*8($tptr),$tptr
+
+       adox    %r15,%r10
+       mulx    0*8($nptr),%rax,%r15
+       adcx    %rax,%r10
+       adox    %r15,%r11
+       mulx    1*8($nptr),%rax,%r15
+       adcx    %rax,%r11
+       adox    %r15,%r12
+       mulx    2*8($nptr),%rax,%r15
+       mov     %r10,-5*8($tptr)
+       adcx    %rax,%r12
+       mov     %r11,-4*8($tptr)
+       adox    %r15,%r13
+       mulx    3*8($nptr),%rax,%r15
+        mov    $bi,%rdx
+       mov     %r12,-3*8($tptr)
+       adcx    %rax,%r13
+       adox    $zero,%r15
+       lea     4*8($nptr),$nptr
+       mov     %r13,-2*8($tptr)
+
+       dec     $bptr                   # of=0, pass cf
+       jnz     .Lmulx4x_1st
+
+       mov     0(%rsp),$num            # load num
+       mov     8(%rsp),$bptr           # re-load &b[i]
+       adc     $zero,%r15              # modulo-scheduled
+       add     %r15,%r14
+       sbb     %r15,%r15               # top-most carry
+       mov     %r14,-1*8($tptr)
+       jmp     .Lmulx4x_outer
+
+.align 32
+.Lmulx4x_outer:
+       mov     ($bptr),%rdx            # b[i]
+       lea     8($bptr),$bptr          # b++
+       sub     $num,$aptr              # rewind $aptr
+       mov     %r15,($tptr)            # save top-most carry
+       lea     64+4*8(%rsp),$tptr
+       sub     $num,$nptr              # rewind $nptr
+
+       mulx    0*8($aptr),$mi,%r11     # a[0]*b[i]
+       xor     %ebp,%ebp               # xor   $zero,$zero     # cf=0, of=0
+       mov     %rdx,$bi
+       mulx    1*8($aptr),%r14,%r12    # a[1]*b[i]
+       adox    -4*8($tptr),$mi
+       adcx    %r14,%r11
+       mulx    2*8($aptr),%r15,%r13    # ...
+       adox    -3*8($tptr),%r11
+       adcx    %r15,%r12
+       adox    $zero,%r12
+       adcx    $zero,%r13
+
+       mov     $bptr,8(%rsp)           # off-load &b[i]
+       .byte   0x67
+       mov     $mi,%r15
+       imulq   24(%rsp),$mi            # "t[0]"*n0
+       xor     %ebp,%ebp               # xor   $zero,$zero     # cf=0, of=0
+
+       mulx    3*8($aptr),%rax,%r14
+        mov    $mi,%rdx
+       adox    -2*8($tptr),%r12
+       adcx    %rax,%r13
+       adox    -1*8($tptr),%r13
+       adcx    $zero,%r14
+       lea     4*8($aptr),$aptr
+       adox    $zero,%r14
+
+       mulx    0*8($nptr),%rax,%r10
+       adcx    %rax,%r15               # discarded
+       adox    %r11,%r10
+       mulx    1*8($nptr),%rax,%r11
+       adcx    %rax,%r10
+       adox    %r12,%r11
+       mulx    2*8($nptr),%rax,%r12
+       mov     %r10,-4*8($tptr)
+       adcx    %rax,%r11
+       adox    %r13,%r12
+       mulx    3*8($nptr),%rax,%r15
+        mov    $bi,%rdx
+       mov     %r11,-3*8($tptr)
+       lea     4*8($nptr),$nptr
+       adcx    %rax,%r12
+       adox    $zero,%r15              # of=0
+       mov     48(%rsp),$bptr          # counter value
+       mov     %r12,-2*8($tptr)
+
+       jmp     .Lmulx4x_inner
+
+.align 32
+.Lmulx4x_inner:
+       mulx    0*8($aptr),%r10,%rax    # a[4]*b[i]
+       adcx    $zero,%r15              # cf=0, modulo-scheduled
+       adox    %r14,%r10
+       mulx    1*8($aptr),%r11,%r14    # a[5]*b[i]
+       adcx    0*8($tptr),%r10
+       adox    %rax,%r11
+       mulx    2*8($aptr),%r12,%rax    # ...
+       adcx    1*8($tptr),%r11
+       adox    %r14,%r12
+       mulx    3*8($aptr),%r13,%r14
+        mov    $mi,%rdx
+       adcx    2*8($tptr),%r12
+       adox    %rax,%r13
+       adcx    3*8($tptr),%r13
+       adox    $zero,%r14              # of=0
+       lea     4*8($aptr),$aptr
+       lea     4*8($tptr),$tptr
+       adcx    $zero,%r14              # cf=0
+
+       adox    %r15,%r10
+       mulx    0*8($nptr),%rax,%r15
+       adcx    %rax,%r10
+       adox    %r15,%r11
+       mulx    1*8($nptr),%rax,%r15
+       adcx    %rax,%r11
+       adox    %r15,%r12
+       mulx    2*8($nptr),%rax,%r15
+       mov     %r10,-5*8($tptr)
+       adcx    %rax,%r12
+       adox    %r15,%r13
+       mulx    3*8($nptr),%rax,%r15
+        mov    $bi,%rdx
+       mov     %r11,-4*8($tptr)
+       mov     %r12,-3*8($tptr)
+       adcx    %rax,%r13
+       adox    $zero,%r15
+       lea     4*8($nptr),$nptr
+       mov     %r13,-2*8($tptr)
+
+       dec     $bptr                   # of=0, pass cf
+       jnz     .Lmulx4x_inner
+
+       mov     0(%rsp),$num            # load num
+       mov     8(%rsp),$bptr           # re-load &b[i]
+       adc     $zero,%r15              # modulo-scheduled
+       sub     0*8($tptr),$zero        # pull top-most carry
+       adc     %r15,%r14
+       mov     -8($nptr),$mi
+       sbb     %r15,%r15               # top-most carry
+       mov     %r14,-1*8($tptr)
+
+       cmp     16(%rsp),$bptr
+       jne     .Lmulx4x_outer
+
+       sub     %r14,$mi                # compare top-most words
+       sbb     $mi,$mi
+       or      $mi,%r15
+
+       neg     $num
+       xor     %rdx,%rdx
+       mov     32(%rsp),$rptr          # restore rp
+       lea     64(%rsp),$tptr
 
        pxor    %xmm0,%xmm0
-       lea     64(%rsp,$num,8),$nptr
-       movdqu  ($tptr),%xmm1
-       lea     ($nptr,$num,8),$nptr
-       movdqa  %xmm0,64(%rsp)          # zap lower half of temporary vector
-       movdqa  %xmm0,($nptr)           # zap upper half of temporary vector
-       movdqu  %xmm1,($rptr)
-       jmp     .Lsqr4x_copy
-.align 16
-.Lsqr4x_copy:                          # copy or in-place refresh
-       movdqu  16($tptr,$i),%xmm2
-       movdqu  32($tptr,$i),%xmm1
-       movdqa  %xmm0,80(%rsp,$i)       # zap lower half of temporary vector
-       movdqa  %xmm0,96(%rsp,$i)       # zap lower half of temporary vector
-       movdqa  %xmm0,16($nptr,$i)      # zap upper half of temporary vector
-       movdqa  %xmm0,32($nptr,$i)      # zap upper half of temporary vector
-       movdqu  %xmm2,16($rptr,$i)
-       movdqu  %xmm1,32($rptr,$i)
-       lea     32($i),$i
-       dec     $j
-       jnz     .Lsqr4x_copy
-
-       movdqu  16($tptr,$i),%xmm2
-       movdqa  %xmm0,80(%rsp,$i)       # zap lower half of temporary vector
-       movdqa  %xmm0,16($nptr,$i)      # zap upper half of temporary vector
-       movdqu  %xmm2,16($rptr,$i)
-___
-}
-$code.=<<___;
-       mov     56(%rsp),%rsi           # restore %rsp
+       mov     0*8($nptr,$num),%r8
+       mov     1*8($nptr,$num),%r9
+       neg     %r8
+       jmp     .Lmulx4x_sub_entry
+
+.align 32
+.Lmulx4x_sub:
+       mov     0*8($nptr,$num),%r8
+       mov     1*8($nptr,$num),%r9
+       not     %r8
+.Lmulx4x_sub_entry:
+       mov     2*8($nptr,$num),%r10
+       not     %r9
+       and     %r15,%r8
+       mov     3*8($nptr,$num),%r11
+       not     %r10
+       and     %r15,%r9
+       not     %r11
+       and     %r15,%r10
+       and     %r15,%r11
+
+       neg     %rdx                    # mov %rdx,%cf
+       adc     0*8($tptr),%r8
+       adc     1*8($tptr),%r9
+       movdqa  %xmm0,($tptr)
+       adc     2*8($tptr),%r10
+       adc     3*8($tptr),%r11
+       movdqa  %xmm0,16($tptr)
+       lea     4*8($tptr),$tptr
+       sbb     %rdx,%rdx               # mov %cf,%rdx
+
+       mov     %r8,0*8($rptr)
+       mov     %r9,1*8($rptr)
+       mov     %r10,2*8($rptr)
+       mov     %r11,3*8($rptr)
+       lea     4*8($rptr),$rptr
+
+       add     \$32,$num
+       jnz     .Lmulx4x_sub
+
+       mov     40(%rsp),%rsi           # restore %rsp
        mov     \$1,%rax
-       mov     0(%rsi),%r15
-       mov     8(%rsi),%r14
-       mov     16(%rsi),%r13
-       mov     24(%rsi),%r12
-       mov     32(%rsi),%rbp
-       mov     40(%rsi),%rbx
-       lea     48(%rsi),%rsp
-.Lsqr4x_epilogue:
+       mov     -48(%rsi),%r15
+       mov     -40(%rsi),%r14
+       mov     -32(%rsi),%r13
+       mov     -24(%rsi),%r12
+       mov     -16(%rsi),%rbp
+       mov     -8(%rsi),%rbx
+       lea     (%rsi),%rsp
+.Lmulx4x_epilogue:
        ret
-.size  bn_sqr4x_mont,.-bn_sqr4x_mont
+.size  bn_mulx4x_mont,.-bn_mulx4x_mont
 ___
 }}}
 $code.=<<___;
@@ -1581,18 +1285,22 @@ sqr_handler:
        mov     120($context),%rax      # pull context->Rax
        mov     248($context),%rbx      # pull context->Rip
 
-       lea     .Lsqr4x_body(%rip),%r10
+       mov     8($disp),%rsi           # disp->ImageBase
+       mov     56($disp),%r11          # disp->HandlerData
+
+       mov     0(%r11),%r10d           # HandlerData[0]
+       lea     (%rsi,%r10),%r10        # end of prologue label
        cmp     %r10,%rbx               # context->Rip<.Lsqr_body
        jb      .Lcommon_seh_tail
 
        mov     152($context),%rax      # pull context->Rsp
 
-       lea     .Lsqr4x_epilogue(%rip),%r10
+       mov     4(%r11),%r10d           # HandlerData[1]
+       lea     (%rsi,%r10),%r10        # epilogue label
        cmp     %r10,%rbx               # context->Rip>=.Lsqr_epilogue
        jae     .Lcommon_seh_tail
 
-       mov     56(%rax),%rax           # pull saved stack pointer
-       lea     48(%rax),%rax
+       mov     40(%rax),%rax           # pull saved stack pointer
 
        mov     -8(%rax),%rbx
        mov     -16(%rax),%rbp
@@ -1657,10 +1365,16 @@ sqr_handler:
        .rva    .LSEH_end_bn_mul4x_mont
        .rva    .LSEH_info_bn_mul4x_mont
 
-       .rva    .LSEH_begin_bn_sqr4x_mont
-       .rva    .LSEH_end_bn_sqr4x_mont
-       .rva    .LSEH_info_bn_sqr4x_mont
-
+       .rva    .LSEH_begin_bn_sqr8x_mont
+       .rva    .LSEH_end_bn_sqr8x_mont
+       .rva    .LSEH_info_bn_sqr8x_mont
+___
+$code.=<<___ if ($addx);
+       .rva    .LSEH_begin_bn_mulx4x_mont
+       .rva    .LSEH_end_bn_mulx4x_mont
+       .rva    .LSEH_info_bn_mulx4x_mont
+___
+$code.=<<___;
 .section       .xdata
 .align 8
 .LSEH_info_bn_mul_mont:
@@ -1671,9 +1385,16 @@ sqr_handler:
        .byte   9,0,0,0
        .rva    mul_handler
        .rva    .Lmul4x_body,.Lmul4x_epilogue   # HandlerData[]
-.LSEH_info_bn_sqr4x_mont:
+.LSEH_info_bn_sqr8x_mont:
+       .byte   9,0,0,0
+       .rva    sqr_handler
+       .rva    .Lsqr8x_body,.Lsqr8x_epilogue   # HandlerData[]
+___
+$code.=<<___ if ($addx);
+.LSEH_info_bn_mulx4x_mont:
        .byte   9,0,0,0
        .rva    sqr_handler
+       .rva    .Lmulx4x_body,.Lmulx4x_epilogue # HandlerData[]
 ___
 }
 
index dae0fe2..820de3d 100755 (executable)
 # is implemented, so that scatter-/gathering can be tuned without
 # bn_exp.c modifications.
 
+# August 2013.
+#
+# Add MULX/AD*X code paths and additional interfaces to optimize for
+# branch prediction unit. For input lengths that are multiples of 8
+# the np argument is not just modulus value, but one interleaved
+# with 0. This is to optimize post-condition...
+
 $flavour = shift;
 $output  = shift;
 if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
@@ -31,6 +38,21 @@ die "can't locate x86_64-xlate.pl";
 open OUT,"| \"$^X\" $xlate $flavour $output";
 *STDOUT=*OUT;
 
+if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
+               =~ /GNU assembler version ([2-9]\.[0-9]+)/) {
+       $addx = ($1>=2.23);
+}
+
+if (!$addx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
+           `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) {
+       $addx = ($1>=2.10);
+}
+
+if (!$addx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
+           `ml64 2>&1` =~ /Version ([0-9]+)\./) {
+       $addx = ($1>=12);
+}
+
 # int bn_mul_mont_gather5(
 $rp="%rdi";    # BN_ULONG *rp,
 $ap="%rsi";    # const BN_ULONG *ap,
@@ -53,19 +75,25 @@ $m1="%rbp";
 $code=<<___;
 .text
 
+.extern        OPENSSL_ia32cap_P
+
 .globl bn_mul_mont_gather5
 .type  bn_mul_mont_gather5,\@function,6
 .align 64
 bn_mul_mont_gather5:
-       test    \$3,${num}d
+       test    \$7,${num}d
        jnz     .Lmul_enter
-       cmp     \$8,${num}d
-       jb      .Lmul_enter
+___
+$code.=<<___ if ($addx);
+       mov     OPENSSL_ia32cap_P+8(%rip),%r11d
+___
+$code.=<<___;
        jmp     .Lmul4x_enter
 
 .align 16
 .Lmul_enter:
        mov     ${num}d,${num}d
+       mov     %rsp,%rax
        mov     `($win64?56:8)`(%rsp),%r10d     # load 7th argument
        push    %rbx
        push    %rbp
@@ -78,10 +106,8 @@ $code.=<<___ if ($win64);
        lea     -0x28(%rsp),%rsp
        movaps  %xmm6,(%rsp)
        movaps  %xmm7,0x10(%rsp)
-.Lmul_alloca:
 ___
 $code.=<<___;
-       mov     %rsp,%rax
        lea     2($num),%r11
        neg     %r11
        lea     (%rsp,%r11,8),%rsp      # tp=alloca(8*(num+2))
@@ -287,7 +313,7 @@ $code.=<<___;
 
        lea     1($i),$i                # i++
        cmp     $num,$i
-       jl      .Louter
+       jb      .Louter
 
        xor     $i,$i                   # i=0 and clear CF!
        mov     (%rsp),%rax             # tp[0]
@@ -323,18 +349,17 @@ $code.=<<___;
        mov     \$1,%rax
 ___
 $code.=<<___ if ($win64);
-       movaps  (%rsi),%xmm6
-       movaps  0x10(%rsi),%xmm7
-       lea     0x28(%rsi),%rsi
+       movaps  -88(%rsi),%xmm6
+       movaps  -72(%rsi),%xmm7
 ___
 $code.=<<___;
-       mov     (%rsi),%r15
-       mov     8(%rsi),%r14
-       mov     16(%rsi),%r13
-       mov     24(%rsi),%r12
-       mov     32(%rsi),%rbp
-       mov     40(%rsi),%rbx
-       lea     48(%rsi),%rsp
+       mov     -48(%rsi),%r15
+       mov     -40(%rsi),%r14
+       mov     -32(%rsi),%r13
+       mov     -24(%rsi),%r12
+       mov     -16(%rsi),%rbp
+       mov     -8(%rsi),%rbx
+       lea     (%rsi),%rsp
 .Lmul_epilogue:
        ret
 .size  bn_mul_mont_gather5,.-bn_mul_mont_gather5
@@ -344,11 +369,18 @@ my @A=("%r10","%r11");
 my @N=("%r13","%rdi");
 $code.=<<___;
 .type  bn_mul4x_mont_gather5,\@function,6
-.align 16
+.align 32
 bn_mul4x_mont_gather5:
 .Lmul4x_enter:
-       mov     ${num}d,${num}d
-       mov     `($win64?56:8)`(%rsp),%r10d     # load 7th argument
+___
+$code.=<<___ if ($addx);
+       and     \$0x80100,%r11d
+       cmp     \$0x80100,%r11d
+       je      .Lmulx4x_enter
+___
+$code.=<<___;
+       .byte   0x67
+       mov     %rsp,%rax
        push    %rbx
        push    %rbp
        push    %r12
@@ -360,23 +392,78 @@ $code.=<<___ if ($win64);
        lea     -0x28(%rsp),%rsp
        movaps  %xmm6,(%rsp)
        movaps  %xmm7,0x10(%rsp)
-.Lmul4x_alloca:
 ___
 $code.=<<___;
-       mov     %rsp,%rax
-       lea     4($num),%r11
-       neg     %r11
-       lea     (%rsp,%r11,8),%rsp      # tp=alloca(8*(num+4))
-       and     \$-1024,%rsp            # minimize TLB usage
-
-       mov     %rax,8(%rsp,$num,8)     # tp[num+1]=%rsp
+       .byte   0x67
+       mov     ${num}d,%r10d
+       shl     \$3,${num}d
+       shl     \$3+2,%r10d             # 4*$num
+       neg     $num                    # -$num
+
+       ##############################################################
+       # ensure that stack frame doesn't alias with $aptr+4*$num
+       # modulo 4096, which covers ret[num], am[num] and n[2*num]
+       # (see bn_exp.c). this is done to allow memory disambiguation
+       # logic do its magic. [excessive frame is allocated in order
+       # to allow bn_from_mont8x to clear it.]
+       #
+       lea     -64(%rsp,$num,2),%r11
+       sub     $ap,%r11
+       and     \$4095,%r11
+       cmp     %r11,%r10
+       jb      .Lmul4xsp_alt
+       sub     %r11,%rsp               # align with $ap
+       lea     -64(%rsp,$num,2),%rsp   # alloca(128+num*8)
+       jmp     .Lmul4xsp_done
+
+.align 32
+.Lmul4xsp_alt:
+       lea     4096-64(,$num,2),%r10
+       lea     -64(%rsp,$num,2),%rsp   # alloca(128+num*8)
+       sub     %r10,%r11
+       mov     \$0,%r10
+       cmovc   %r10,%r11
+       sub     %r11,%rsp
+.Lmul4xsp_done:
+       and     \$-64,%rsp
+       neg     $num
+
+       mov     %rax,40(%rsp)
 .Lmul4x_body:
-       mov     $rp,16(%rsp,$num,8)     # tp[num+2]=$rp
-       mov     %rdx,%r12               # reassign $bp
+
+       call    mul4x_internal
+
+       mov     40(%rsp),%rsi           # restore %rsp
+       mov     \$1,%rax
+___
+$code.=<<___ if ($win64);
+       movaps  -88(%rsi),%xmm6
+       movaps  -72(%rsi),%xmm7
+___
+$code.=<<___;
+       mov     -48(%rsi),%r15
+       mov     -40(%rsi),%r14
+       mov     -32(%rsi),%r13
+       mov     -24(%rsi),%r12
+       mov     -16(%rsi),%rbp
+       mov     -8(%rsi),%rbx
+       lea     (%rsi),%rsp
+.Lmul4x_epilogue:
+       ret
+.size  bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5
+
+.type  mul4x_internal,\@abi-omnipotent
+.align 32
+mul4x_internal:
+       shl     \$5,$num
+       mov     `($win64?56:8)`(%rax),%r10d     # load 7th argument
+       lea     256(%rdx,$num),%r13
+       shr     \$5,$num                # restore $num
 ___
                $bp="%r12";
                $STRIDE=2**5*8;         # 5 is "window size"
                $N=$STRIDE/4;           # should match cache line size
+               $tp=$i;
 $code.=<<___;
        mov     %r10,%r11
        shr     \$`log($N/8)/log(2)`,%r10
@@ -384,459 +471,2776 @@ $code.=<<___;
        not     %r10
        lea     .Lmagic_masks(%rip),%rax
        and     \$`2**5/($N/8)-1`,%r10  # 5 is "window size"
-       lea     96($bp,%r11,8),$bp      # pointer within 1st cache line
+       lea     96(%rdx,%r11,8),$bp     # pointer within 1st cache line
        movq    0(%rax,%r10,8),%xmm4    # set of masks denoting which
        movq    8(%rax,%r10,8),%xmm5    # cache line contains element
+       add     \$7,%r11
        movq    16(%rax,%r10,8),%xmm6   # denoted by 7th argument
        movq    24(%rax,%r10,8),%xmm7
+       and     \$7,%r11
 
        movq    `0*$STRIDE/4-96`($bp),%xmm0
+       lea     $STRIDE($bp),$tp        # borrow $tp
        movq    `1*$STRIDE/4-96`($bp),%xmm1
        pand    %xmm4,%xmm0
        movq    `2*$STRIDE/4-96`($bp),%xmm2
        pand    %xmm5,%xmm1
        movq    `3*$STRIDE/4-96`($bp),%xmm3
        pand    %xmm6,%xmm2
+       .byte   0x67
        por     %xmm1,%xmm0
+       movq    `0*$STRIDE/4-96`($tp),%xmm1
+       .byte   0x67
        pand    %xmm7,%xmm3
+       .byte   0x67
        por     %xmm2,%xmm0
-       lea     $STRIDE($bp),$bp
+       movq    `1*$STRIDE/4-96`($tp),%xmm2
+       .byte   0x67
+       pand    %xmm4,%xmm1
+       .byte   0x67
        por     %xmm3,%xmm0
+       movq    `2*$STRIDE/4-96`($tp),%xmm3
 
        movq    %xmm0,$m0               # m0=bp[0]
+       movq    `3*$STRIDE/4-96`($tp),%xmm0
+       mov     %r13,16+8(%rsp)         # save end of b[num]
+       mov     $rp, 56+8(%rsp)         # save $rp
+
        mov     ($n0),$n0               # pull n0[0] value
        mov     ($ap),%rax
-
-       xor     $i,$i                   # i=0
-       xor     $j,$j                   # j=0
-
-       movq    `0*$STRIDE/4-96`($bp),%xmm0
-       movq    `1*$STRIDE/4-96`($bp),%xmm1
-       pand    %xmm4,%xmm0
-       movq    `2*$STRIDE/4-96`($bp),%xmm2
-       pand    %xmm5,%xmm1
+       lea     ($ap,$num),$ap          # end of a[num]
+       neg     $num
 
        mov     $n0,$m1
        mulq    $m0                     # ap[0]*bp[0]
        mov     %rax,$A[0]
        mov     ($np),%rax
 
-       movq    `3*$STRIDE/4-96`($bp),%xmm3
-       pand    %xmm6,%xmm2
-       por     %xmm1,%xmm0
-       pand    %xmm7,%xmm3
+       pand    %xmm5,%xmm2
+       pand    %xmm6,%xmm3
+       por     %xmm2,%xmm1
 
        imulq   $A[0],$m1               # "tp[0]"*n0
+       ##############################################################
+       # $tp is chosen so that writing to top-most element of the
+       # vector occurs just "above" references to powers table,
+       # "above" modulo cache-line size, which effectively precludes
+       # possibility of memory disambiguation logic failure when
+       # accessing the table.
+       # 
+       lea     64+8(%rsp,%r11,8),$tp
        mov     %rdx,$A[1]
 
-       por     %xmm2,%xmm0
-       lea     $STRIDE($bp),$bp
-       por     %xmm3,%xmm0
+       pand    %xmm7,%xmm0
+       por     %xmm3,%xmm1
+       lea     2*$STRIDE($bp),$bp
+       por     %xmm1,%xmm0
 
        mulq    $m1                     # np[0]*m1
        add     %rax,$A[0]              # discarded
-       mov     8($ap),%rax
+       mov     8($ap,$num),%rax
        adc     \$0,%rdx
        mov     %rdx,$N[1]
 
        mulq    $m0
        add     %rax,$A[1]
-       mov     8($np),%rax
+       mov     16*1($np),%rax          # interleaved with 0, therefore 16*n
        adc     \$0,%rdx
        mov     %rdx,$A[0]
 
        mulq    $m1
        add     %rax,$N[1]
-       mov     16($ap),%rax
+       mov     16($ap,$num),%rax
        adc     \$0,%rdx
        add     $A[1],$N[1]
-       lea     4($j),$j                # j++
+       lea     4*8($num),$j            # j=4
+       lea     16*4($np),$np
        adc     \$0,%rdx
-       mov     $N[1],(%rsp)
+       mov     $N[1],($tp)
        mov     %rdx,$N[0]
        jmp     .L1st4x
-.align 16
+
+.align 32
 .L1st4x:
        mulq    $m0                     # ap[j]*bp[0]
        add     %rax,$A[0]
-       mov     -16($np,$j,8),%rax
+       mov     -16*2($np),%rax
+       lea     32($tp),$tp
        adc     \$0,%rdx
        mov     %rdx,$A[1]
 
        mulq    $m1                     # np[j]*m1
        add     %rax,$N[0]
-       mov     -8($ap,$j,8),%rax
+       mov     -8($ap,$j),%rax
        adc     \$0,%rdx
        add     $A[0],$N[0]             # np[j]*m1+ap[j]*bp[0]
        adc     \$0,%rdx
-       mov     $N[0],-24(%rsp,$j,8)    # tp[j-1]
+       mov     $N[0],-24($tp)          # tp[j-1]
        mov     %rdx,$N[1]
 
        mulq    $m0                     # ap[j]*bp[0]
        add     %rax,$A[1]
-       mov     -8($np,$j,8),%rax
+       mov     -16*1($np),%rax
        adc     \$0,%rdx
        mov     %rdx,$A[0]
 
        mulq    $m1                     # np[j]*m1
        add     %rax,$N[1]
-       mov     ($ap,$j,8),%rax
+       mov     ($ap,$j),%rax
        adc     \$0,%rdx
        add     $A[1],$N[1]             # np[j]*m1+ap[j]*bp[0]
        adc     \$0,%rdx
-       mov     $N[1],-16(%rsp,$j,8)    # tp[j-1]
+       mov     $N[1],-16($tp)          # tp[j-1]
        mov     %rdx,$N[0]
 
        mulq    $m0                     # ap[j]*bp[0]
        add     %rax,$A[0]
-       mov     ($np,$j,8),%rax
+       mov     16*0($np),%rax
        adc     \$0,%rdx
        mov     %rdx,$A[1]
 
        mulq    $m1                     # np[j]*m1
        add     %rax,$N[0]
-       mov     8($ap,$j,8),%rax
+       mov     8($ap,$j),%rax
        adc     \$0,%rdx
        add     $A[0],$N[0]             # np[j]*m1+ap[j]*bp[0]
        adc     \$0,%rdx
-       mov     $N[0],-8(%rsp,$j,8)     # tp[j-1]
+       mov     $N[0],-8($tp)           # tp[j-1]
        mov     %rdx,$N[1]
 
        mulq    $m0                     # ap[j]*bp[0]
        add     %rax,$A[1]
-       mov     8($np,$j,8),%rax
+       mov     16*1($np),%rax
        adc     \$0,%rdx
-       lea     4($j),$j                # j++
        mov     %rdx,$A[0]
 
        mulq    $m1                     # np[j]*m1
        add     %rax,$N[1]
-       mov     -16($ap,$j,8),%rax
+       mov     16($ap,$j),%rax
        adc     \$0,%rdx
        add     $A[1],$N[1]             # np[j]*m1+ap[j]*bp[0]
+       lea     16*4($np),$np
        adc     \$0,%rdx
-       mov     $N[1],-32(%rsp,$j,8)    # tp[j-1]
+       mov     $N[1],($tp)             # tp[j-1]
        mov     %rdx,$N[0]
-       cmp     $num,$j
-       jl      .L1st4x
+
+       add     \$32,$j                 # j+=4
+       jnz     .L1st4x
 
        mulq    $m0                     # ap[j]*bp[0]
        add     %rax,$A[0]
-       mov     -16($np,$j,8),%rax
+       mov     -16*2($np),%rax
+       lea     32($tp),$tp
        adc     \$0,%rdx
        mov     %rdx,$A[1]
 
        mulq    $m1                     # np[j]*m1
        add     %rax,$N[0]
-       mov     -8($ap,$j,8),%rax
+       mov     -8($ap),%rax
        adc     \$0,%rdx
        add     $A[0],$N[0]             # np[j]*m1+ap[j]*bp[0]
        adc     \$0,%rdx
-       mov     $N[0],-24(%rsp,$j,8)    # tp[j-1]
+       mov     $N[0],-24($tp)          # tp[j-1]
        mov     %rdx,$N[1]
 
        mulq    $m0                     # ap[j]*bp[0]
        add     %rax,$A[1]
-       mov     -8($np,$j,8),%rax
+       mov     -16*1($np),%rax
        adc     \$0,%rdx
        mov     %rdx,$A[0]
 
        mulq    $m1                     # np[j]*m1
        add     %rax,$N[1]
-       mov     ($ap),%rax              # ap[0]
+       mov     ($ap,$num),%rax         # ap[0]
        adc     \$0,%rdx
        add     $A[1],$N[1]             # np[j]*m1+ap[j]*bp[0]
        adc     \$0,%rdx
-       mov     $N[1],-16(%rsp,$j,8)    # tp[j-1]
+       mov     $N[1],-16($tp)          # tp[j-1]
        mov     %rdx,$N[0]
 
        movq    %xmm0,$m0               # bp[1]
+       lea     ($np,$num,2),$np        # rewind $np
 
        xor     $N[1],$N[1]
        add     $A[0],$N[0]
        adc     \$0,$N[1]
-       mov     $N[0],-8(%rsp,$j,8)
-       mov     $N[1],(%rsp,$j,8)       # store upmost overflow bit
+       mov     $N[0],-8($tp)
 
-       lea     1($i),$i                # i++
-.align 4
-.Louter4x:
-       xor     $j,$j                   # j=0
-       movq    `0*$STRIDE/4-96`($bp),%xmm0
-       movq    `1*$STRIDE/4-96`($bp),%xmm1
-       pand    %xmm4,%xmm0
-       movq    `2*$STRIDE/4-96`($bp),%xmm2
-       pand    %xmm5,%xmm1
+       jmp     .Louter4x
 
-       mov     (%rsp),$A[0]
+.align 32
+.Louter4x:
+       mov     ($tp,$num),$A[0]
        mov     $n0,$m1
        mulq    $m0                     # ap[0]*bp[i]
        add     %rax,$A[0]              # ap[0]*bp[i]+tp[0]
        mov     ($np),%rax
        adc     \$0,%rdx
 
+       movq    `0*$STRIDE/4-96`($bp),%xmm0
+       movq    `1*$STRIDE/4-96`($bp),%xmm1
+       pand    %xmm4,%xmm0
+       movq    `2*$STRIDE/4-96`($bp),%xmm2
+       pand    %xmm5,%xmm1
        movq    `3*$STRIDE/4-96`($bp),%xmm3
-       pand    %xmm6,%xmm2
-       por     %xmm1,%xmm0
-       pand    %xmm7,%xmm3
 
        imulq   $A[0],$m1               # tp[0]*n0
+       .byte   0x67
        mov     %rdx,$A[1]
+       mov     $N[1],($tp)             # store upmost overflow bit
 
+       pand    %xmm6,%xmm2
+       por     %xmm1,%xmm0
+       pand    %xmm7,%xmm3
        por     %xmm2,%xmm0
+       lea     ($tp,$num),$tp          # rewind $tp
        lea     $STRIDE($bp),$bp
        por     %xmm3,%xmm0
 
        mulq    $m1                     # np[0]*m1
        add     %rax,$A[0]              # "$N[0]", discarded
-       mov     8($ap),%rax
+       mov     8($ap,$num),%rax
        adc     \$0,%rdx
        mov     %rdx,$N[1]
 
        mulq    $m0                     # ap[j]*bp[i]
        add     %rax,$A[1]
-       mov     8($np),%rax
+       mov     16*1($np),%rax          # interleaved with 0, therefore 16*n
        adc     \$0,%rdx
-       add     8(%rsp),$A[1]           # +tp[1]
+       add     8($tp),$A[1]            # +tp[1]
        adc     \$0,%rdx
        mov     %rdx,$A[0]
 
        mulq    $m1                     # np[j]*m1
        add     %rax,$N[1]
-       mov     16($ap),%rax
+       mov     16($ap,$num),%rax
        adc     \$0,%rdx
        add     $A[1],$N[1]             # np[j]*m1+ap[j]*bp[i]+tp[j]
-       lea     4($j),$j                # j+=2
+       lea     4*8($num),$j            # j=4
+       lea     16*4($np),$np
        adc     \$0,%rdx
        mov     %rdx,$N[0]
        jmp     .Linner4x
-.align 16
+
+.align 32
 .Linner4x:
        mulq    $m0                     # ap[j]*bp[i]
        add     %rax,$A[0]
-       mov     -16($np,$j,8),%rax
+       mov     -16*2($np),%rax
        adc     \$0,%rdx
-       add     -16(%rsp,$j,8),$A[0]    # ap[j]*bp[i]+tp[j]
+       add     16($tp),$A[0]           # ap[j]*bp[i]+tp[j]
+       lea     32($tp),$tp
        adc     \$0,%rdx
        mov     %rdx,$A[1]
 
        mulq    $m1                     # np[j]*m1
        add     %rax,$N[0]
-       mov     -8($ap,$j,8),%rax
+       mov     -8($ap,$j),%rax
        adc     \$0,%rdx
        add     $A[0],$N[0]
        adc     \$0,%rdx
-       mov     $N[1],-32(%rsp,$j,8)    # tp[j-1]
+       mov     $N[1],-32($tp)          # tp[j-1]
        mov     %rdx,$N[1]
 
        mulq    $m0                     # ap[j]*bp[i]
        add     %rax,$A[1]
-       mov     -8($np,$j,8),%rax
+       mov     -16*1($np),%rax
        adc     \$0,%rdx
-       add     -8(%rsp,$j,8),$A[1]
+       add     -8($tp),$A[1]
        adc     \$0,%rdx
        mov     %rdx,$A[0]
 
        mulq    $m1                     # np[j]*m1
        add     %rax,$N[1]
-       mov     ($ap,$j,8),%rax
+       mov     ($ap,$j),%rax
        adc     \$0,%rdx
        add     $A[1],$N[1]
        adc     \$0,%rdx
-       mov     $N[0],-24(%rsp,$j,8)    # tp[j-1]
+       mov     $N[0],-24($tp)          # tp[j-1]
        mov     %rdx,$N[0]
 
        mulq    $m0                     # ap[j]*bp[i]
        add     %rax,$A[0]
-       mov     ($np,$j,8),%rax
+       mov     16*0($np),%rax
        adc     \$0,%rdx
-       add     (%rsp,$j,8),$A[0]       # ap[j]*bp[i]+tp[j]
+       add     ($tp),$A[0]             # ap[j]*bp[i]+tp[j]
        adc     \$0,%rdx
        mov     %rdx,$A[1]
 
        mulq    $m1                     # np[j]*m1
        add     %rax,$N[0]
-       mov     8($ap,$j,8),%rax
+       mov     8($ap,$j),%rax
        adc     \$0,%rdx
        add     $A[0],$N[0]
        adc     \$0,%rdx
-       mov     $N[1],-16(%rsp,$j,8)    # tp[j-1]
+       mov     $N[1],-16($tp)          # tp[j-1]
        mov     %rdx,$N[1]
 
        mulq    $m0                     # ap[j]*bp[i]
        add     %rax,$A[1]
-       mov     8($np,$j,8),%rax
+       mov     16*1($np),%rax
        adc     \$0,%rdx
-       add     8(%rsp,$j,8),$A[1]
+       add     8($tp),$A[1]
        adc     \$0,%rdx
-       lea     4($j),$j                # j++
        mov     %rdx,$A[0]
 
        mulq    $m1                     # np[j]*m1
        add     %rax,$N[1]
-       mov     -16($ap,$j,8),%rax
+       mov     16($ap,$j),%rax
        adc     \$0,%rdx
        add     $A[1],$N[1]
+       lea     16*4($np),$np
        adc     \$0,%rdx
-       mov     $N[0],-40(%rsp,$j,8)    # tp[j-1]
+       mov     $N[0],-8($tp)           # tp[j-1]
        mov     %rdx,$N[0]
-       cmp     $num,$j
-       jl      .Linner4x
+
+       add     \$32,$j                 # j+=4
+       jnz     .Linner4x
 
        mulq    $m0                     # ap[j]*bp[i]
        add     %rax,$A[0]
-       mov     -16($np,$j,8),%rax
+       mov     -16*2($np),%rax
        adc     \$0,%rdx
-       add     -16(%rsp,$j,8),$A[0]    # ap[j]*bp[i]+tp[j]
+       add     16($tp),$A[0]           # ap[j]*bp[i]+tp[j]
+       lea     32($tp),$tp
        adc     \$0,%rdx
        mov     %rdx,$A[1]
 
        mulq    $m1                     # np[j]*m1
        add     %rax,$N[0]
-       mov     -8($ap,$j,8),%rax
+       mov     -8($ap),%rax
        adc     \$0,%rdx
        add     $A[0],$N[0]
        adc     \$0,%rdx
-       mov     $N[1],-32(%rsp,$j,8)    # tp[j-1]
+       mov     $N[1],-32($tp)          # tp[j-1]
        mov     %rdx,$N[1]
 
        mulq    $m0                     # ap[j]*bp[i]
        add     %rax,$A[1]
-       mov     -8($np,$j,8),%rax
+       mov     $m1,%rax
+       mov     -16*1($np),$m1
        adc     \$0,%rdx
-       add     -8(%rsp,$j,8),$A[1]
+       add     -8($tp),$A[1]
        adc     \$0,%rdx
-       lea     1($i),$i                # i++
        mov     %rdx,$A[0]
 
        mulq    $m1                     # np[j]*m1
        add     %rax,$N[1]
-       mov     ($ap),%rax              # ap[0]
+       mov     ($ap,$num),%rax         # ap[0]
        adc     \$0,%rdx
        add     $A[1],$N[1]
        adc     \$0,%rdx
-       mov     $N[0],-24(%rsp,$j,8)    # tp[j-1]
+       mov     $N[0],-24($tp)          # tp[j-1]
        mov     %rdx,$N[0]
 
        movq    %xmm0,$m0               # bp[i+1]
-       mov     $N[1],-16(%rsp,$j,8)    # tp[j-1]
+       mov     $N[1],-16($tp)          # tp[j-1]
+       lea     ($np,$num,2),$np        # rewind $np
 
        xor     $N[1],$N[1]
        add     $A[0],$N[0]
        adc     \$0,$N[1]
-       add     (%rsp,$num,8),$N[0]     # pull upmost overflow bit
-       adc     \$0,$N[1]
-       mov     $N[0],-8(%rsp,$j,8)
-       mov     $N[1],(%rsp,$j,8)       # store upmost overflow bit
+       add     ($tp),$N[0]             # pull upmost overflow bit
+       adc     \$0,$N[1]               # upmost overflow bit
+       mov     $N[0],-8($tp)
 
-       cmp     $num,$i
-       jl      .Louter4x
+       cmp     16+8(%rsp),$bp
+       jb      .Louter4x
 ___
-{
-my @ri=("%rax","%rdx",$m0,$m1);
+if (1) {
 $code.=<<___;
-       mov     16(%rsp,$num,8),$rp     # restore $rp
-       mov     0(%rsp),@ri[0]          # tp[0]
-       pxor    %xmm0,%xmm0
-       mov     8(%rsp),@ri[1]          # tp[1]
-       shr     \$2,$num                # num/=4
-       lea     (%rsp),$ap              # borrow ap for tp
-       xor     $i,$i                   # i=0 and clear CF!
-
-       sub     0($np),@ri[0]
-       mov     16($ap),@ri[2]          # tp[2]
-       mov     24($ap),@ri[3]          # tp[3]
-       sbb     8($np),@ri[1]
-       lea     -1($num),$j             # j=num/4-1
+       sub     $N[0],$m1               # compare top-most words
+       adc     $j,$j                   # $j is zero
+       or      $j,$N[1]
+       xor     \$1,$N[1]
+       lea     ($tp,$num),%rbx         # tptr in .sqr4x_sub
+       lea     ($np,$N[1],8),%rbp      # nptr in .sqr4x_sub
+       mov     %r9,%rcx
+       sar     \$3+2,%rcx              # cf=0
+       mov     56+8(%rsp),%rdi         # rptr in .sqr4x_sub
+       jmp     .Lsqr4x_sub
+___
+} else {
+my @ri=("%rax",$bp,$m0,$m1);
+my $rp="%rdx";
+$code.=<<___
+       xor     \$1,$N[1]
+       lea     ($tp,$num),$tp          # rewind $tp
+       sar     \$5,$num                # cf=0
+       lea     ($np,$N[1],8),$np
+       mov     56+8(%rsp),$rp          # restore $rp
        jmp     .Lsub4x
-.align 16
+
+.align 32
 .Lsub4x:
-       mov     @ri[0],0($rp,$i,8)      # rp[i]=tp[i]-np[i]
-       mov     @ri[1],8($rp,$i,8)      # rp[i]=tp[i]-np[i]
-       sbb     16($np,$i,8),@ri[2]
-       mov     32($ap,$i,8),@ri[0]     # tp[i+1]
-       mov     40($ap,$i,8),@ri[1]
-       sbb     24($np,$i,8),@ri[3]
-       mov     @ri[2],16($rp,$i,8)     # rp[i]=tp[i]-np[i]
-       mov     @ri[3],24($rp,$i,8)     # rp[i]=tp[i]-np[i]
-       sbb     32($np,$i,8),@ri[0]
-       mov     48($ap,$i,8),@ri[2]
-       mov     56($ap,$i,8),@ri[3]
-       sbb     40($np,$i,8),@ri[1]
-       lea     4($i),$i                # i++
-       dec     $j                      # doesnn't affect CF!
+       .byte   0x66
+       mov     8*0($tp),@ri[0]
+       mov     8*1($tp),@ri[1]
+       .byte   0x66
+       sbb     16*0($np),@ri[0]
+       mov     8*2($tp),@ri[2]
+       sbb     16*1($np),@ri[1]
+       mov     3*8($tp),@ri[3]
+       lea     4*8($tp),$tp
+       sbb     16*2($np),@ri[2]
+       mov     @ri[0],8*0($rp)
+       sbb     16*3($np),@ri[3]
+       lea     16*4($np),$np
+       mov     @ri[1],8*1($rp)
+       mov     @ri[2],8*2($rp)
+       mov     @ri[3],8*3($rp)
+       lea     8*4($rp),$rp
+
+       inc     $num
        jnz     .Lsub4x
 
-       mov     @ri[0],0($rp,$i,8)      # rp[i]=tp[i]-np[i]
-       mov     32($ap,$i,8),@ri[0]     # load overflow bit
-       sbb     16($np,$i,8),@ri[2]
-       mov     @ri[1],8($rp,$i,8)      # rp[i]=tp[i]-np[i]
-       sbb     24($np,$i,8),@ri[3]
-       mov     @ri[2],16($rp,$i,8)     # rp[i]=tp[i]-np[i]
+       ret
+___
+}
+$code.=<<___;
+.size  mul4x_internal,.-mul4x_internal
+___
+}}}
+\f{{{
+######################################################################
+# void bn_power5(
+my $rptr="%rdi";       # BN_ULONG *rptr,
+my $aptr="%rsi";       # const BN_ULONG *aptr,
+my $bptr="%rdx";       # const void *table,
+my $nptr="%rcx";       # const BN_ULONG *nptr,
+my $n0  ="%r8";                # const BN_ULONG *n0);
+my $num ="%r9";                # int num, has to be divisible by 8
+                       # int pwr 
+
+my ($i,$j,$tptr)=("%rbp","%rcx",$rptr);
+my @A0=("%r10","%r11");
+my @A1=("%r12","%r13");
+my ($a0,$a1,$ai)=("%r14","%r15","%rbx");
 
-       sbb     \$0,@ri[0]              # handle upmost overflow bit
-       mov     @ri[3],24($rp,$i,8)     # rp[i]=tp[i]-np[i]
-       xor     $i,$i                   # i=0
-       and     @ri[0],$ap
-       not     @ri[0]
-       mov     $rp,$np
-       and     @ri[0],$np
-       lea     -1($num),$j
-       or      $np,$ap                 # ap=borrow?tp:rp
+$code.=<<___;
+.globl bn_power5
+.type  bn_power5,\@function,6
+.align 32
+bn_power5:
+___
+$code.=<<___ if ($addx);
+       mov     OPENSSL_ia32cap_P+8(%rip),%r11d
+       and     \$0x80100,%r11d
+       cmp     \$0x80100,%r11d
+       je      .Lpowerx5_enter
+___
+$code.=<<___;
+       mov     %rsp,%rax
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+___
+$code.=<<___ if ($win64);
+       lea     -0x28(%rsp),%rsp
+       movaps  %xmm6,(%rsp)
+       movaps  %xmm7,0x10(%rsp)
+___
+$code.=<<___;
+       mov     ${num}d,%r10d
+       shl     \$3,${num}d             # convert $num to bytes
+       shl     \$3+2,%r10d             # 4*$num
+       neg     $num
+       mov     ($n0),$n0               # *n0
+
+       ##############################################################
+       # ensure that stack frame doesn't alias with $aptr+4*$num
+       # modulo 4096, which covers ret[num], am[num] and n[2*num]
+       # (see bn_exp.c). this is done to allow memory disambiguation
+       # logic do its magic.
+       #
+       lea     -64(%rsp,$num,2),%r11
+       sub     $aptr,%r11
+       and     \$4095,%r11
+       cmp     %r11,%r10
+       jb      .Lpwr_sp_alt
+       sub     %r11,%rsp               # align with $aptr
+       lea     -64(%rsp,$num,2),%rsp   # alloca(frame+2*$num)
+       jmp     .Lpwr_sp_done
+
+.align 32
+.Lpwr_sp_alt:
+       lea     4096-64(,$num,2),%r10   # 4096-frame-2*$num
+       lea     -64(%rsp,$num,2),%rsp   # alloca(frame+2*$num)
+       sub     %r10,%r11
+       mov     \$0,%r10
+       cmovc   %r10,%r11
+       sub     %r11,%rsp
+.Lpwr_sp_done:
+       and     \$-64,%rsp
+       mov     $num,%r10       
+       neg     $num
+
+       ##############################################################
+       # Stack layout
+       #
+       # +0    saved $num, used in reduction section
+       # +8    &t[2*$num], used in reduction section
+       # +32   saved *n0
+       # +40   saved %rsp
+       # +48   t[2*$num]
+       #
+       mov     $n0,  32(%rsp)
+       mov     %rax, 40(%rsp)          # save original %rsp
+.Lpower5_body:
+       movq    $rptr,%xmm1             # save $rptr
+       movq    $nptr,%xmm2             # save $nptr
+       movq    %r10, %xmm3             # -$num
+       movq    $bptr,%xmm4
+
+       call    __bn_sqr8x_internal
+       call    __bn_sqr8x_internal
+       call    __bn_sqr8x_internal
+       call    __bn_sqr8x_internal
+       call    __bn_sqr8x_internal
+
+       movq    %xmm2,$nptr
+       movq    %xmm4,$bptr
+       mov     $aptr,$rptr
+       mov     40(%rsp),%rax
+       lea     32(%rsp),$n0
+
+       call    mul4x_internal
+
+       mov     40(%rsp),%rsi           # restore %rsp
+       mov     \$1,%rax
+       mov     -48(%rsi),%r15
+       mov     -40(%rsi),%r14
+       mov     -32(%rsi),%r13
+       mov     -24(%rsi),%r12
+       mov     -16(%rsi),%rbp
+       mov     -8(%rsi),%rbx
+       lea     (%rsi),%rsp
+.Lpower5_epilogue:
+       ret
+.size  bn_power5,.-bn_power5
+
+.globl bn_sqr8x_internal
+.hidden        bn_sqr8x_internal
+.type  bn_sqr8x_internal,\@abi-omnipotent
+.align 32
+bn_sqr8x_internal:
+__bn_sqr8x_internal:
+       ##############################################################
+       # Squaring part:
+       #
+       # a) multiply-n-add everything but a[i]*a[i];
+       # b) shift result of a) by 1 to the left and accumulate
+       #    a[i]*a[i] products;
+       #
+       ##############################################################
+       #                                                     a[1]a[0]
+       #                                                 a[2]a[0]
+       #                                             a[3]a[0]
+       #                                             a[2]a[1]
+       #                                         a[4]a[0]
+       #                                         a[3]a[1]
+       #                                     a[5]a[0]
+       #                                     a[4]a[1]
+       #                                     a[3]a[2]
+       #                                 a[6]a[0]
+       #                                 a[5]a[1]
+       #                                 a[4]a[2]
+       #                             a[7]a[0]
+       #                             a[6]a[1]
+       #                             a[5]a[2]
+       #                             a[4]a[3]
+       #                         a[7]a[1]
+       #                         a[6]a[2]
+       #                         a[5]a[3]
+       #                     a[7]a[2]
+       #                     a[6]a[3]
+       #                     a[5]a[4]
+       #                 a[7]a[3]
+       #                 a[6]a[4]
+       #             a[7]a[4]
+       #             a[6]a[5]
+       #         a[7]a[5]
+       #     a[7]a[6]
+       #                                                     a[1]a[0]
+       #                                                 a[2]a[0]
+       #                                             a[3]a[0]
+       #                                         a[4]a[0]
+       #                                     a[5]a[0]
+       #                                 a[6]a[0]
+       #                             a[7]a[0]
+       #                                             a[2]a[1]
+       #                                         a[3]a[1]
+       #                                     a[4]a[1]
+       #                                 a[5]a[1]
+       #                             a[6]a[1]
+       #                         a[7]a[1]
+       #                                     a[3]a[2]
+       #                                 a[4]a[2]
+       #                             a[5]a[2]
+       #                         a[6]a[2]
+       #                     a[7]a[2]
+       #                             a[4]a[3]
+       #                         a[5]a[3]
+       #                     a[6]a[3]
+       #                 a[7]a[3]
+       #                     a[5]a[4]
+       #                 a[6]a[4]
+       #             a[7]a[4]
+       #             a[6]a[5]
+       #         a[7]a[5]
+       #     a[7]a[6]
+       #                                                         a[0]a[0]
+       #                                                 a[1]a[1]
+       #                                         a[2]a[2]
+       #                                 a[3]a[3]
+       #                         a[4]a[4]
+       #                 a[5]a[5]
+       #         a[6]a[6]
+       # a[7]a[7]
+
+       lea     32(%r10),$i             # $i=-($num-32)
+       lea     ($aptr,$num),$aptr      # end of a[] buffer, ($aptr,$i)=&ap[2]
+
+       mov     $num,$j                 # $j=$num
+
+                                       # comments apply to $num==8 case
+       mov     -32($aptr,$i),$a0       # a[0]
+       lea     48+8(%rsp,$num,2),$tptr # end of tp[] buffer, &tp[2*$num]
+       mov     -24($aptr,$i),%rax      # a[1]
+       lea     -32($tptr,$i),$tptr     # end of tp[] window, &tp[2*$num-"$i"]
+       mov     -16($aptr,$i),$ai       # a[2]
+       mov     %rax,$a1
+
+       mul     $a0                     # a[1]*a[0]
+       mov     %rax,$A0[0]             # a[1]*a[0]
+        mov    $ai,%rax                # a[2]
+       mov     %rdx,$A0[1]
+       mov     $A0[0],-24($tptr,$i)    # t[1]
+
+       mul     $a0                     # a[2]*a[0]
+       add     %rax,$A0[1]
+        mov    $ai,%rax
+       adc     \$0,%rdx
+       mov     $A0[1],-16($tptr,$i)    # t[2]
+       mov     %rdx,$A0[0]
+
+
+        mov    -8($aptr,$i),$ai        # a[3]
+       mul     $a1                     # a[2]*a[1]
+       mov     %rax,$A1[0]             # a[2]*a[1]+t[3]
+        mov    $ai,%rax
+       mov     %rdx,$A1[1]
+
+        lea    ($i),$j
+       mul     $a0                     # a[3]*a[0]
+       add     %rax,$A0[0]             # a[3]*a[0]+a[2]*a[1]+t[3]
+        mov    $ai,%rax
+       mov     %rdx,$A0[1]
+       adc     \$0,$A0[1]
+       add     $A1[0],$A0[0]
+       adc     \$0,$A0[1]
+       mov     $A0[0],-8($tptr,$j)     # t[3]
+       jmp     .Lsqr4x_1st
+
+.align 32
+.Lsqr4x_1st:
+        mov    ($aptr,$j),$ai          # a[4]
+       mul     $a1                     # a[3]*a[1]
+       add     %rax,$A1[1]             # a[3]*a[1]+t[4]
+        mov    $ai,%rax
+       mov     %rdx,$A1[0]
+       adc     \$0,$A1[0]
+
+       mul     $a0                     # a[4]*a[0]
+       add     %rax,$A0[1]             # a[4]*a[0]+a[3]*a[1]+t[4]
+        mov    $ai,%rax                # a[3]
+        mov    8($aptr,$j),$ai         # a[5]
+       mov     %rdx,$A0[0]
+       adc     \$0,$A0[0]
+       add     $A1[1],$A0[1]
+       adc     \$0,$A0[0]
+
+
+       mul     $a1                     # a[4]*a[3]
+       add     %rax,$A1[0]             # a[4]*a[3]+t[5]
+        mov    $ai,%rax
+        mov    $A0[1],($tptr,$j)       # t[4]
+       mov     %rdx,$A1[1]
+       adc     \$0,$A1[1]
+
+       mul     $a0                     # a[5]*a[2]
+       add     %rax,$A0[0]             # a[5]*a[2]+a[4]*a[3]+t[5]
+        mov    $ai,%rax
+        mov    16($aptr,$j),$ai        # a[6]
+       mov     %rdx,$A0[1]
+       adc     \$0,$A0[1]
+       add     $A1[0],$A0[0]
+       adc     \$0,$A0[1]
+
+       mul     $a1                     # a[5]*a[3]
+       add     %rax,$A1[1]             # a[5]*a[3]+t[6]
+        mov    $ai,%rax
+        mov    $A0[0],8($tptr,$j)      # t[5]
+       mov     %rdx,$A1[0]
+       adc     \$0,$A1[0]
+
+       mul     $a0                     # a[6]*a[2]
+       add     %rax,$A0[1]             # a[6]*a[2]+a[5]*a[3]+t[6]
+        mov    $ai,%rax                # a[3]
+        mov    24($aptr,$j),$ai        # a[7]
+       mov     %rdx,$A0[0]
+       adc     \$0,$A0[0]
+       add     $A1[1],$A0[1]
+       adc     \$0,$A0[0]
+
+
+       mul     $a1                     # a[6]*a[5]
+       add     %rax,$A1[0]             # a[6]*a[5]+t[7]
+        mov    $ai,%rax
+        mov    $A0[1],16($tptr,$j)     # t[6]
+       mov     %rdx,$A1[1]
+       adc     \$0,$A1[1]
+        lea    32($j),$j
+
+       mul     $a0                     # a[7]*a[4]
+       add     %rax,$A0[0]             # a[7]*a[4]+a[6]*a[5]+t[6]
+        mov    $ai,%rax
+       mov     %rdx,$A0[1]
+       adc     \$0,$A0[1]
+       add     $A1[0],$A0[0]
+       adc     \$0,$A0[1]
+       mov     $A0[0],-8($tptr,$j)     # t[7]
+
+       cmp     \$0,$j
+       jne     .Lsqr4x_1st
+
+       mul     $a1                     # a[7]*a[5]
+       add     %rax,$A1[1]
+       lea     16($i),$i
+       adc     \$0,%rdx
+       add     $A0[1],$A1[1]
+       adc     \$0,%rdx
+
+       mov     $A1[1],($tptr)          # t[8]
+       mov     %rdx,$A1[0]
+       mov     %rdx,8($tptr)           # t[9]
+       jmp     .Lsqr4x_outer
+
+.align 32
+.Lsqr4x_outer:                         # comments apply to $num==6 case
+       mov     -32($aptr,$i),$a0       # a[0]
+       lea     48+8(%rsp,$num,2),$tptr # end of tp[] buffer, &tp[2*$num]
+       mov     -24($aptr,$i),%rax      # a[1]
+       lea     -32($tptr,$i),$tptr     # end of tp[] window, &tp[2*$num-"$i"]
+       mov     -16($aptr,$i),$ai       # a[2]
+       mov     %rax,$a1
+
+       mul     $a0                     # a[1]*a[0]
+       mov     -24($tptr,$i),$A0[0]    # t[1]
+       add     %rax,$A0[0]             # a[1]*a[0]+t[1]
+        mov    $ai,%rax                # a[2]
+       adc     \$0,%rdx
+       mov     $A0[0],-24($tptr,$i)    # t[1]
+       mov     %rdx,$A0[1]
+
+       mul     $a0                     # a[2]*a[0]
+       add     %rax,$A0[1]
+        mov    $ai,%rax
+       adc     \$0,%rdx
+       add     -16($tptr,$i),$A0[1]    # a[2]*a[0]+t[2]
+       mov     %rdx,$A0[0]
+       adc     \$0,$A0[0]
+       mov     $A0[1],-16($tptr,$i)    # t[2]
+
+       xor     $A1[0],$A1[0]
+
+        mov    -8($aptr,$i),$ai        # a[3]
+       mul     $a1                     # a[2]*a[1]
+       add     %rax,$A1[0]             # a[2]*a[1]+t[3]
+        mov    $ai,%rax
+       adc     \$0,%rdx
+       add     -8($tptr,$i),$A1[0]
+       mov     %rdx,$A1[1]
+       adc     \$0,$A1[1]
+
+       mul     $a0                     # a[3]*a[0]
+       add     %rax,$A0[0]             # a[3]*a[0]+a[2]*a[1]+t[3]
+        mov    $ai,%rax
+       adc     \$0,%rdx
+       add     $A1[0],$A0[0]
+       mov     %rdx,$A0[1]
+       adc     \$0,$A0[1]
+       mov     $A0[0],-8($tptr,$i)     # t[3]
+
+       lea     ($i),$j
+       jmp     .Lsqr4x_inner
+
+.align 32
+.Lsqr4x_inner:
+        mov    ($aptr,$j),$ai          # a[4]
+       mul     $a1                     # a[3]*a[1]
+       add     %rax,$A1[1]             # a[3]*a[1]+t[4]
+        mov    $ai,%rax
+       mov     %rdx,$A1[0]
+       adc     \$0,$A1[0]
+       add     ($tptr,$j),$A1[1]
+       adc     \$0,$A1[0]
+
+       .byte   0x67
+       mul     $a0                     # a[4]*a[0]
+       add     %rax,$A0[1]             # a[4]*a[0]+a[3]*a[1]+t[4]
+        mov    $ai,%rax                # a[3]
+        mov    8($aptr,$j),$ai         # a[5]
+       mov     %rdx,$A0[0]
+       adc     \$0,$A0[0]
+       add     $A1[1],$A0[1]
+       adc     \$0,$A0[0]
+
+       mul     $a1                     # a[4]*a[3]
+       add     %rax,$A1[0]             # a[4]*a[3]+t[5]
+       mov     $A0[1],($tptr,$j)       # t[4]
+        mov    $ai,%rax
+       mov     %rdx,$A1[1]
+       adc     \$0,$A1[1]
+       add     8($tptr,$j),$A1[0]
+       lea     16($j),$j               # j++
+       adc     \$0,$A1[1]
+
+       mul     $a0                     # a[5]*a[2]
+       add     %rax,$A0[0]             # a[5]*a[2]+a[4]*a[3]+t[5]
+        mov    $ai,%rax
+       adc     \$0,%rdx
+       add     $A1[0],$A0[0]
+       mov     %rdx,$A0[1]
+       adc     \$0,$A0[1]
+       mov     $A0[0],-8($tptr,$j)     # t[5], "preloaded t[1]" below
+
+       cmp     \$0,$j
+       jne     .Lsqr4x_inner
+
+       .byte   0x67
+       mul     $a1                     # a[5]*a[3]
+       add     %rax,$A1[1]
+       adc     \$0,%rdx
+       add     $A0[1],$A1[1]
+       adc     \$0,%rdx
+
+       mov     $A1[1],($tptr)          # t[6], "preloaded t[2]" below
+       mov     %rdx,$A1[0]
+       mov     %rdx,8($tptr)           # t[7], "preloaded t[3]" below
+
+       add     \$16,$i
+       jnz     .Lsqr4x_outer
+
+                                       # comments apply to $num==4 case
+       mov     -32($aptr),$a0          # a[0]
+       lea     48+8(%rsp,$num,2),$tptr # end of tp[] buffer, &tp[2*$num]
+       mov     -24($aptr),%rax         # a[1]
+       lea     -32($tptr,$i),$tptr     # end of tp[] window, &tp[2*$num-"$i"]
+       mov     -16($aptr),$ai          # a[2]
+       mov     %rax,$a1
+
+       mul     $a0                     # a[1]*a[0]
+       add     %rax,$A0[0]             # a[1]*a[0]+t[1], preloaded t[1]
+        mov    $ai,%rax                # a[2]
+       mov     %rdx,$A0[1]
+       adc     \$0,$A0[1]
+
+       mul     $a0                     # a[2]*a[0]
+       add     %rax,$A0[1]
+        mov    $ai,%rax
+        mov    $A0[0],-24($tptr)       # t[1]
+       mov     %rdx,$A0[0]
+       adc     \$0,$A0[0]
+       add     $A1[1],$A0[1]           # a[2]*a[0]+t[2], preloaded t[2]
+        mov    -8($aptr),$ai           # a[3]
+       adc     \$0,$A0[0]
+
+       mul     $a1                     # a[2]*a[1]
+       add     %rax,$A1[0]             # a[2]*a[1]+t[3], preloaded t[3]
+        mov    $ai,%rax
+        mov    $A0[1],-16($tptr)       # t[2]
+       mov     %rdx,$A1[1]
+       adc     \$0,$A1[1]
+
+       mul     $a0                     # a[3]*a[0]
+       add     %rax,$A0[0]             # a[3]*a[0]+a[2]*a[1]+t[3]
+        mov    $ai,%rax
+       mov     %rdx,$A0[1]
+       adc     \$0,$A0[1]
+       add     $A1[0],$A0[0]
+       adc     \$0,$A0[1]
+       mov     $A0[0],-8($tptr)        # t[3]
+
+       mul     $a1                     # a[3]*a[1]
+       add     %rax,$A1[1]
+        mov    -16($aptr),%rax         # a[2]
+       adc     \$0,%rdx
+       add     $A0[1],$A1[1]
+       adc     \$0,%rdx
+
+       mov     $A1[1],($tptr)          # t[4]
+       mov     %rdx,$A1[0]
+       mov     %rdx,8($tptr)           # t[5]
+
+       mul     $ai                     # a[2]*a[3]
+___
+{
+my ($shift,$carry)=($a0,$a1);
+my @S=(@A1,$ai,$n0);
+$code.=<<___;
+        add    \$16,$i
+        xor    $shift,$shift
+        sub    $num,$i                 # $i=16-$num
+        xor    $carry,$carry
+
+       add     $A1[0],%rax             # t[5]
+       adc     \$0,%rdx
+       mov     %rax,8($tptr)           # t[5]
+       mov     %rdx,16($tptr)          # t[6]
+       mov     $carry,24($tptr)        # t[7]
+
+        mov    -16($aptr,$i),%rax      # a[0]
+       lea     48+8(%rsp),$tptr
+        xor    $A0[0],$A0[0]           # t[0]
+        mov    8($tptr),$A0[1]         # t[1]
+
+       lea     ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift
+       shr     \$63,$A0[0]
+       lea     ($j,$A0[1],2),$S[1]     # t[2*i+1]<<1 |
+       shr     \$63,$A0[1]
+       or      $A0[0],$S[1]            # | t[2*i]>>63
+        mov    16($tptr),$A0[0]        # t[2*i+2]      # prefetch
+       mov     $A0[1],$shift           # shift=t[2*i+1]>>63
+       mul     %rax                    # a[i]*a[i]
+       neg     $carry                  # mov $carry,cf
+        mov    24($tptr),$A0[1]        # t[2*i+2+1]    # prefetch
+       adc     %rax,$S[0]
+        mov    -8($aptr,$i),%rax       # a[i+1]        # prefetch
+       mov     $S[0],($tptr)
+       adc     %rdx,$S[1]
+
+       lea     ($shift,$A0[0],2),$S[2] # t[2*i]<<1 | shift
+        mov    $S[1],8($tptr)
+        sbb    $carry,$carry           # mov cf,$carry
+       shr     \$63,$A0[0]
+       lea     ($j,$A0[1],2),$S[3]     # t[2*i+1]<<1 |
+       shr     \$63,$A0[1]
+       or      $A0[0],$S[3]            # | t[2*i]>>63
+        mov    32($tptr),$A0[0]        # t[2*i+2]      # prefetch
+       mov     $A0[1],$shift           # shift=t[2*i+1]>>63
+       mul     %rax                    # a[i]*a[i]
+       neg     $carry                  # mov $carry,cf
+        mov    40($tptr),$A0[1]        # t[2*i+2+1]    # prefetch
+       adc     %rax,$S[2]
+        mov    0($aptr,$i),%rax        # a[i+1]        # prefetch
+       mov     $S[2],16($tptr)
+       adc     %rdx,$S[3]
+       lea     16($i),$i
+       mov     $S[3],24($tptr)
+       sbb     $carry,$carry           # mov cf,$carry
+       lea     64($tptr),$tptr
+       jmp     .Lsqr4x_shift_n_add
+
+.align 32
+.Lsqr4x_shift_n_add:
+       lea     ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift
+       shr     \$63,$A0[0]
+       lea     ($j,$A0[1],2),$S[1]     # t[2*i+1]<<1 |
+       shr     \$63,$A0[1]
+       or      $A0[0],$S[1]            # | t[2*i]>>63
+        mov    -16($tptr),$A0[0]       # t[2*i+2]      # prefetch
+       mov     $A0[1],$shift           # shift=t[2*i+1]>>63
+       mul     %rax                    # a[i]*a[i]
+       neg     $carry                  # mov $carry,cf
+        mov    -8($tptr),$A0[1]        # t[2*i+2+1]    # prefetch
+       adc     %rax,$S[0]
+        mov    -8($aptr,$i),%rax       # a[i+1]        # prefetch
+       mov     $S[0],-32($tptr)
+       adc     %rdx,$S[1]
+
+       lea     ($shift,$A0[0],2),$S[2] # t[2*i]<<1 | shift
+        mov    $S[1],-24($tptr)
+        sbb    $carry,$carry           # mov cf,$carry
+       shr     \$63,$A0[0]
+       lea     ($j,$A0[1],2),$S[3]     # t[2*i+1]<<1 |
+       shr     \$63,$A0[1]
+       or      $A0[0],$S[3]            # | t[2*i]>>63
+        mov    0($tptr),$A0[0]         # t[2*i+2]      # prefetch
+       mov     $A0[1],$shift           # shift=t[2*i+1]>>63
+       mul     %rax                    # a[i]*a[i]
+       neg     $carry                  # mov $carry,cf
+        mov    8($tptr),$A0[1]         # t[2*i+2+1]    # prefetch
+       adc     %rax,$S[2]
+        mov    0($aptr,$i),%rax        # a[i+1]        # prefetch
+       mov     $S[2],-16($tptr)
+       adc     %rdx,$S[3]
+
+       lea     ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift
+        mov    $S[3],-8($tptr)
+        sbb    $carry,$carry           # mov cf,$carry
+       shr     \$63,$A0[0]
+       lea     ($j,$A0[1],2),$S[1]     # t[2*i+1]<<1 |
+       shr     \$63,$A0[1]
+       or      $A0[0],$S[1]            # | t[2*i]>>63
+        mov    16($tptr),$A0[0]        # t[2*i+2]      # prefetch
+       mov     $A0[1],$shift           # shift=t[2*i+1]>>63
+       mul     %rax                    # a[i]*a[i]
+       neg     $carry                  # mov $carry,cf
+        mov    24($tptr),$A0[1]        # t[2*i+2+1]    # prefetch
+       adc     %rax,$S[0]
+        mov    8($aptr,$i),%rax        # a[i+1]        # prefetch
+       mov     $S[0],0($tptr)
+       adc     %rdx,$S[1]
+
+       lea     ($shift,$A0[0],2),$S[2] # t[2*i]<<1 | shift
+        mov    $S[1],8($tptr)
+        sbb    $carry,$carry           # mov cf,$carry
+       shr     \$63,$A0[0]
+       lea     ($j,$A0[1],2),$S[3]     # t[2*i+1]<<1 |
+       shr     \$63,$A0[1]
+       or      $A0[0],$S[3]            # | t[2*i]>>63
+        mov    32($tptr),$A0[0]        # t[2*i+2]      # prefetch
+       mov     $A0[1],$shift           # shift=t[2*i+1]>>63
+       mul     %rax                    # a[i]*a[i]
+       neg     $carry                  # mov $carry,cf
+        mov    40($tptr),$A0[1]        # t[2*i+2+1]    # prefetch
+       adc     %rax,$S[2]
+        mov    16($aptr,$i),%rax       # a[i+1]        # prefetch
+       mov     $S[2],16($tptr)
+       adc     %rdx,$S[3]
+       mov     $S[3],24($tptr)
+       sbb     $carry,$carry           # mov cf,$carry
+       lea     64($tptr),$tptr
+       add     \$32,$i
+       jnz     .Lsqr4x_shift_n_add
+
+       lea     ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift
+       .byte   0x67
+       shr     \$63,$A0[0]
+       lea     ($j,$A0[1],2),$S[1]     # t[2*i+1]<<1 |
+       shr     \$63,$A0[1]
+       or      $A0[0],$S[1]            # | t[2*i]>>63
+        mov    -16($tptr),$A0[0]       # t[2*i+2]      # prefetch
+       mov     $A0[1],$shift           # shift=t[2*i+1]>>63
+       mul     %rax                    # a[i]*a[i]
+       neg     $carry                  # mov $carry,cf
+        mov    -8($tptr),$A0[1]        # t[2*i+2+1]    # prefetch
+       adc     %rax,$S[0]
+        mov    -8($aptr),%rax          # a[i+1]        # prefetch
+       mov     $S[0],-32($tptr)
+       adc     %rdx,$S[1]
+
+       lea     ($shift,$A0[0],2),$S[2] # t[2*i]<<1|shift
+        mov    $S[1],-24($tptr)
+        sbb    $carry,$carry           # mov cf,$carry
+       shr     \$63,$A0[0]
+       lea     ($j,$A0[1],2),$S[3]     # t[2*i+1]<<1 |
+       shr     \$63,$A0[1]
+       or      $A0[0],$S[3]            # | t[2*i]>>63
+       mul     %rax                    # a[i]*a[i]
+       neg     $carry                  # mov $carry,cf
+       adc     %rax,$S[2]
+       adc     %rdx,$S[3]
+       mov     $S[2],-16($tptr)
+       mov     $S[3],-8($tptr)
+___
+}\f
+######################################################################
+# Montgomery reduction part, "word-by-word" algorithm.
+#
+# This new path is inspired by multiple submissions from Intel, by
+# Shay Gueron, Vlad Krasnov, Erdinc Ozturk, James Guilford,
+# Vinodh Gopal...
+{
+my ($nptr,$tptr,$carry,$m0)=("%rbp","%rdi","%rsi","%rbx");
 
-       movdqu  ($ap),%xmm1
-       movdqa  %xmm0,(%rsp)
-       movdqu  %xmm1,($rp)
-       jmp     .Lcopy4x
-.align 16
-.Lcopy4x:                                      # copy or in-place refresh
-       movdqu  16($ap,$i),%xmm2
-       movdqu  32($ap,$i),%xmm1
-       movdqa  %xmm0,16(%rsp,$i)
-       movdqu  %xmm2,16($rp,$i)
-       movdqa  %xmm0,32(%rsp,$i)
-       movdqu  %xmm1,32($rp,$i)
-       lea     32($i),$i
-       dec     $j
-       jnz     .Lcopy4x
-
-       shl     \$2,$num
-       movdqu  16($ap,$i),%xmm2
-       movdqa  %xmm0,16(%rsp,$i)
-       movdqu  %xmm2,16($rp,$i)
+$code.=<<___;
+       movq    %xmm2,$nptr
+sqr8x_reduction:
+       xor     %rax,%rax
+       lea     ($nptr,$num,2),%rcx     # end of n[]
+       lea     48+8(%rsp,$num,2),%rdx  # end of t[] buffer
+       mov     %rcx,0+8(%rsp)
+       lea     48+8(%rsp,$num),$tptr   # end of initial t[] window
+       mov     %rdx,8+8(%rsp)
+       neg     $num
+       jmp     .L8x_reduction_loop
+
+.align 32
+.L8x_reduction_loop:
+       lea     ($tptr,$num),$tptr      # start of current t[] window
+       .byte   0x66
+       mov     8*0($tptr),$m0
+       mov     8*1($tptr),%r9
+       mov     8*2($tptr),%r10
+       mov     8*3($tptr),%r11
+       mov     8*4($tptr),%r12
+       mov     8*5($tptr),%r13
+       mov     8*6($tptr),%r14
+       mov     8*7($tptr),%r15
+       mov     %rax,(%rdx)             # store top-most carry bit
+       lea     8*8($tptr),$tptr
+
+       .byte   0x67
+       mov     $m0,%r8
+       imulq   32+8(%rsp),$m0          # n0*a[0]
+       mov     16*0($nptr),%rax        # n[0]
+       mov     \$8,%ecx
+       jmp     .L8x_reduce
+
+.align 32
+.L8x_reduce:
+       mulq    $m0
+        mov    16*1($nptr),%rax        # n[1]
+       neg     %r8
+       mov     %rdx,%r8
+       adc     \$0,%r8
+
+       mulq    $m0
+       add     %rax,%r9
+        mov    16*2($nptr),%rax
+       adc     \$0,%rdx
+       add     %r9,%r8
+        mov    $m0,48-8+8(%rsp,%rcx,8) # put aside n0*a[i]
+       mov     %rdx,%r9
+       adc     \$0,%r9
+
+       mulq    $m0
+       add     %rax,%r10
+        mov    16*3($nptr),%rax
+       adc     \$0,%rdx
+       add     %r10,%r9
+        mov    32+8(%rsp),$carry       # pull n0, borrow $carry
+       mov     %rdx,%r10
+       adc     \$0,%r10
+
+       mulq    $m0
+       add     %rax,%r11
+        mov    16*4($nptr),%rax
+       adc     \$0,%rdx
+        imulq  %r8,$carry              # modulo-scheduled
+       add     %r11,%r10
+       mov     %rdx,%r11
+       adc     \$0,%r11
+
+       mulq    $m0
+       add     %rax,%r12
+        mov    16*5($nptr),%rax
+       adc     \$0,%rdx
+       add     %r12,%r11
+       mov     %rdx,%r12
+       adc     \$0,%r12
+
+       mulq    $m0
+       add     %rax,%r13
+        mov    16*6($nptr),%rax
+       adc     \$0,%rdx
+       add     %r13,%r12
+       mov     %rdx,%r13
+       adc     \$0,%r13
+
+       mulq    $m0
+       add     %rax,%r14
+        mov    16*7($nptr),%rax
+       adc     \$0,%rdx
+       add     %r14,%r13
+       mov     %rdx,%r14
+       adc     \$0,%r14
+
+       mulq    $m0
+        mov    $carry,$m0              # n0*a[i]
+       add     %rax,%r15
+        mov    16*0($nptr),%rax        # n[0]
+       adc     \$0,%rdx
+       add     %r15,%r14
+       mov     %rdx,%r15
+       adc     \$0,%r15
+
+       dec     %ecx
+       jnz     .L8x_reduce
+
+       lea     16*8($nptr),$nptr
+       xor     %rax,%rax
+       mov     8+8(%rsp),%rdx          # pull end of t[]
+       cmp     0+8(%rsp),$nptr         # end of n[]?
+       jae     .L8x_no_tail
+
+       .byte   0x66
+       add     8*0($tptr),%r8
+       adc     8*1($tptr),%r9
+       adc     8*2($tptr),%r10
+       adc     8*3($tptr),%r11
+       adc     8*4($tptr),%r12
+       adc     8*5($tptr),%r13
+       adc     8*6($tptr),%r14
+       adc     8*7($tptr),%r15
+       sbb     $carry,$carry           # top carry
+
+       mov     48+56+8(%rsp),$m0       # pull n0*a[0]
+       mov     \$8,%ecx
+       mov     16*0($nptr),%rax
+       jmp     .L8x_tail
+
+.align 32
+.L8x_tail:
+       mulq    $m0
+       add     %rax,%r8
+        mov    16*1($nptr),%rax
+        mov    %r8,($tptr)             # save result
+       mov     %rdx,%r8
+       adc     \$0,%r8
+
+       mulq    $m0
+       add     %rax,%r9
+        mov    16*2($nptr),%rax
+       adc     \$0,%rdx
+       add     %r9,%r8
+        lea    8($tptr),$tptr          # $tptr++
+       mov     %rdx,%r9
+       adc     \$0,%r9
+
+       mulq    $m0
+       add     %rax,%r10
+        mov    16*3($nptr),%rax
+       adc     \$0,%rdx
+       add     %r10,%r9
+       mov     %rdx,%r10
+       adc     \$0,%r10
+
+       mulq    $m0
+       add     %rax,%r11
+        mov    16*4($nptr),%rax
+       adc     \$0,%rdx
+       add     %r11,%r10
+       mov     %rdx,%r11
+       adc     \$0,%r11
+
+       mulq    $m0
+       add     %rax,%r12
+        mov    16*5($nptr),%rax
+       adc     \$0,%rdx
+       add     %r12,%r11
+       mov     %rdx,%r12
+       adc     \$0,%r12
+
+       mulq    $m0
+       add     %rax,%r13
+        mov    16*6($nptr),%rax
+       adc     \$0,%rdx
+       add     %r13,%r12
+       mov     %rdx,%r13
+       adc     \$0,%r13
+
+       mulq    $m0
+       add     %rax,%r14
+        mov    16*7($nptr),%rax
+       adc     \$0,%rdx
+       add     %r14,%r13
+       mov     %rdx,%r14
+       adc     \$0,%r14
+
+       mulq    $m0
+        mov    48-16+8(%rsp,%rcx,8),$m0# pull n0*a[i]
+       add     %rax,%r15
+       adc     \$0,%rdx
+       add     %r15,%r14
+        mov    16*0($nptr),%rax        # pull n[0]
+       mov     %rdx,%r15
+       adc     \$0,%r15
+
+       dec     %ecx
+       jnz     .L8x_tail
+
+       lea     16*8($nptr),$nptr
+       mov     8+8(%rsp),%rdx          # pull end of t[]
+       cmp     0+8(%rsp),$nptr         # end of n[]?
+       jae     .L8x_tail_done          # break out of loop
+
+        mov    48+56+8(%rsp),$m0       # pull n0*a[0]
+       neg     $carry
+        mov    8*0($nptr),%rax         # pull n[0]
+       adc     8*0($tptr),%r8
+       adc     8*1($tptr),%r9
+       adc     8*2($tptr),%r10
+       adc     8*3($tptr),%r11
+       adc     8*4($tptr),%r12
+       adc     8*5($tptr),%r13
+       adc     8*6($tptr),%r14
+       adc     8*7($tptr),%r15
+       sbb     $carry,$carry           # top carry
+
+       mov     \$8,%ecx
+       jmp     .L8x_tail
+
+.align 32
+.L8x_tail_done:
+       add     (%rdx),%r8              # can this overflow?
+       xor     %rax,%rax
+
+       neg     $carry
+.L8x_no_tail:
+       adc     8*0($tptr),%r8
+       adc     8*1($tptr),%r9
+       adc     8*2($tptr),%r10
+       adc     8*3($tptr),%r11
+       adc     8*4($tptr),%r12
+       adc     8*5($tptr),%r13
+       adc     8*6($tptr),%r14
+       adc     8*7($tptr),%r15
+       adc     \$0,%rax                # top-most carry
+        mov    -16($nptr),%rcx         # np[num-1]
+        xor    $carry,$carry
+
+       movq    %xmm2,$nptr             # restore $nptr
+
+       mov     %r8,8*0($tptr)          # store top 512 bits
+       mov     %r9,8*1($tptr)
+        movq   %xmm3,$num              # $num is %r9, can't be moved upwards
+       mov     %r10,8*2($tptr)
+       mov     %r11,8*3($tptr)
+       mov     %r12,8*4($tptr)
+       mov     %r13,8*5($tptr)
+       mov     %r14,8*6($tptr)
+       mov     %r15,8*7($tptr)
+       lea     8*8($tptr),$tptr
+
+       cmp     %rdx,$tptr              # end of t[]?
+       jb      .L8x_reduction_loop
+___
+}\f
+##############################################################
+# Post-condition, 4x unrolled
+#
+{
+my ($tptr,$nptr)=("%rbx","%rbp");
+$code.=<<___;
+       #xor    %rsi,%rsi               # %rsi was $carry above
+       sub     %r15,%rcx               # compare top-most words
+       lea     (%rdi,$num),$tptr       # %rdi was $tptr above
+       adc     %rsi,%rsi
+       mov     $num,%rcx
+       or      %rsi,%rax
+       movq    %xmm1,$rptr             # restore $rptr
+       xor     \$1,%rax
+       movq    %xmm1,$aptr             # prepare for back-to-back call
+       lea     ($nptr,%rax,8),$nptr
+       sar     \$3+2,%rcx              # cf=0
+       jmp     .Lsqr4x_sub
+
+.align 32
+.Lsqr4x_sub:
+       .byte   0x66
+       mov     8*0($tptr),%r12
+       mov     8*1($tptr),%r13
+       sbb     16*0($nptr),%r12
+       mov     8*2($tptr),%r14
+       sbb     16*1($nptr),%r13
+       mov     8*3($tptr),%r15
+       lea     8*4($tptr),$tptr
+       sbb     16*2($nptr),%r14
+       mov     %r12,8*0($rptr)
+       sbb     16*3($nptr),%r15
+       lea     16*4($nptr),$nptr
+       mov     %r13,8*1($rptr)
+       mov     %r14,8*2($rptr)
+       mov     %r15,8*3($rptr)
+       lea     8*4($rptr),$rptr
+
+       inc     %rcx                    # pass %cf
+       jnz     .Lsqr4x_sub
 ___
 }
 $code.=<<___;
-       mov     8(%rsp,$num,8),%rsi     # restore %rsp
-       mov     \$1,%rax
+       mov     $num,%r10               # prepare for back-to-back call
+       neg     $num                    # restore $num  
+       ret
+.size  bn_sqr8x_internal,.-bn_sqr8x_internal
+___
+{
+$code.=<<___;
+.globl bn_from_montgomery
+.type  bn_from_montgomery,\@abi-omnipotent
+.align 32
+bn_from_montgomery:
+       testl   \$7,`($win64?"48(%rsp)":"%r9d")`
+       jz      bn_from_mont8x
+       xor     %eax,%eax
+       ret
+.size  bn_from_montgomery,.-bn_from_montgomery
+
+.type  bn_from_mont8x,\@function,6
+.align 32
+bn_from_mont8x:
+       .byte   0x67
+       mov     %rsp,%rax
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
 ___
 $code.=<<___ if ($win64);
-       movaps  (%rsi),%xmm6
-       movaps  0x10(%rsi),%xmm7
-       lea     0x28(%rsi),%rsi
+       lea     -0x28(%rsp),%rsp
+       movaps  %xmm6,(%rsp)
+       movaps  %xmm7,0x10(%rsp)
 ___
 $code.=<<___;
-       mov     (%rsi),%r15
-       mov     8(%rsi),%r14
-       mov     16(%rsi),%r13
-       mov     24(%rsi),%r12
-       mov     32(%rsi),%rbp
-       mov     40(%rsi),%rbx
-       lea     48(%rsi),%rsp
-.Lmul4x_epilogue:
+       .byte   0x67
+       mov     ${num}d,%r10d
+       shl     \$3,${num}d             # convert $num to bytes
+       shl     \$3+2,%r10d             # 4*$num
+       neg     $num
+       mov     ($n0),$n0               # *n0
+
+       ##############################################################
+       # ensure that stack frame doesn't alias with $aptr+4*$num
+       # modulo 4096, which covers ret[num], am[num] and n[2*num]
+       # (see bn_exp.c). this is done to allow memory disambiguation
+       # logic do its magic.
+       #
+       lea     -64(%rsp,$num,2),%r11
+       sub     $aptr,%r11
+       and     \$4095,%r11
+       cmp     %r11,%r10
+       jb      .Lfrom_sp_alt
+       sub     %r11,%rsp               # align with $aptr
+       lea     -64(%rsp,$num,2),%rsp   # alloca(frame+2*$num)
+       jmp     .Lfrom_sp_done
+
+.align 32
+.Lfrom_sp_alt:
+       lea     4096-64(,$num,2),%r10   # 4096-frame-2*$num
+       lea     -64(%rsp,$num,2),%rsp   # alloca(frame+2*$num)
+       sub     %r10,%r11
+       mov     \$0,%r10
+       cmovc   %r10,%r11
+       sub     %r11,%rsp
+.Lfrom_sp_done:
+       and     \$-64,%rsp
+       mov     $num,%r10       
+       neg     $num
+
+       ##############################################################
+       # Stack layout
+       #
+       # +0    saved $num, used in reduction section
+       # +8    &t[2*$num], used in reduction section
+       # +32   saved *n0
+       # +40   saved %rsp
+       # +48   t[2*$num]
+       #
+       mov     $n0,  32(%rsp)
+       mov     %rax, 40(%rsp)          # save original %rsp
+.Lfrom_body:
+       mov     $num,%r11
+       lea     48(%rsp),%rax
+       pxor    %xmm0,%xmm0
+       jmp     .Lmul_by_1
+
+.align 32
+.Lmul_by_1:
+       movdqu  ($aptr),%xmm1
+       movdqu  16($aptr),%xmm2
+       movdqu  32($aptr),%xmm3
+       movdqa  %xmm0,(%rax,$num)
+       movdqu  48($aptr),%xmm4
+       movdqa  %xmm0,16(%rax,$num)
+       .byte   0x48,0x8d,0xb6,0x40,0x00,0x00,0x00      # lea   64($aptr),$aptr
+       movdqa  %xmm1,(%rax)
+       movdqa  %xmm0,32(%rax,$num)
+       movdqa  %xmm2,16(%rax)
+       movdqa  %xmm0,48(%rax,$num)
+       movdqa  %xmm3,32(%rax)
+       movdqa  %xmm4,48(%rax)
+       lea     64(%rax),%rax
+       sub     \$64,%r11
+       jnz     .Lmul_by_1
+
+       movq    $rptr,%xmm1
+       movq    $nptr,%xmm2
+       .byte   0x67
+       mov     $nptr,%rbp
+       movq    %r10, %xmm3             # -num
+___
+$code.=<<___ if ($addx);
+       mov     OPENSSL_ia32cap_P+8(%rip),%r11d
+       and     \$0x80100,%r11d
+       cmp     \$0x80100,%r11d
+       jne     .Lfrom_mont_nox
+
+       lea     (%rax,$num),$rptr
+       call    sqrx8x_reduction
+
+       pxor    %xmm0,%xmm0
+       lea     48(%rsp),%rax
+       mov     40(%rsp),%rsi           # restore %rsp
+       jmp     .Lfrom_mont_zero
+
+.align 32
+.Lfrom_mont_nox:
+___
+$code.=<<___;
+       call    sqr8x_reduction
+
+       pxor    %xmm0,%xmm0
+       lea     48(%rsp),%rax
+       mov     40(%rsp),%rsi           # restore %rsp
+       jmp     .Lfrom_mont_zero
+
+.align 32
+.Lfrom_mont_zero:
+       movdqa  %xmm0,16*0(%rax)
+       movdqa  %xmm0,16*1(%rax)
+       movdqa  %xmm0,16*2(%rax)
+       movdqa  %xmm0,16*3(%rax)
+       lea     16*4(%rax),%rax
+       sub     \$32,$num
+       jnz     .Lfrom_mont_zero
+
+       mov     \$1,%rax
+       mov     -48(%rsi),%r15
+       mov     -40(%rsi),%r14
+       mov     -32(%rsi),%r13
+       mov     -24(%rsi),%r12
+       mov     -16(%rsi),%rbp
+       mov     -8(%rsi),%rbx
+       lea     (%rsi),%rsp
+.Lfrom_epilogue:
        ret
-.size  bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5
+.size  bn_from_mont8x,.-bn_from_mont8x
 ___
+}
 }}}
+\f
+if ($addx) {{{
+my $bp="%rdx"; # restore original value
+
+$code.=<<___;
+.type  bn_mulx4x_mont_gather5,\@function,6
+.align 32
+bn_mulx4x_mont_gather5:
+.Lmulx4x_enter:
+       .byte   0x67
+       mov     %rsp,%rax
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+___
+$code.=<<___ if ($win64);
+       lea     -0x28(%rsp),%rsp
+       movaps  %xmm6,(%rsp)
+       movaps  %xmm7,0x10(%rsp)
+___
+$code.=<<___;
+       .byte   0x67
+       mov     ${num}d,%r10d
+       shl     \$3,${num}d             # convert $num to bytes
+       shl     \$3+2,%r10d             # 4*$num
+       neg     $num                    # -$num
+       mov     ($n0),$n0               # *n0
+
+       ##############################################################
+       # ensure that stack frame doesn't alias with $aptr+4*$num
+       # modulo 4096, which covers a[num], ret[num] and n[2*num]
+       # (see bn_exp.c). this is done to allow memory disambiguation
+       # logic do its magic. [excessive frame is allocated in order
+       # to allow bn_from_mont8x to clear it.]
+       #
+       lea     -64(%rsp,$num,2),%r11
+       sub     $ap,%r11
+       and     \$4095,%r11
+       cmp     %r11,%r10
+       jb      .Lmulx4xsp_alt
+       sub     %r11,%rsp               # align with $aptr
+       lea     -64(%rsp,$num,2),%rsp   # alloca(frame+$num)
+       jmp     .Lmulx4xsp_done
+
+.align 32
+.Lmulx4xsp_alt:
+       lea     4096-64(,$num,2),%r10   # 4096-frame-$num
+       lea     -64(%rsp,$num,2),%rsp   # alloca(frame+$num)
+       sub     %r10,%r11
+       mov     \$0,%r10
+       cmovc   %r10,%r11
+       sub     %r11,%rsp
+.Lmulx4xsp_done:       
+       and     \$-64,%rsp              # ensure alignment
+       ##############################################################
+       # Stack layout
+       # +0    -num
+       # +8    off-loaded &b[i]
+       # +16   end of b[num]
+       # +24   inner counter
+       # +32   saved n0
+       # +40   saved %rsp
+       # +48
+       # +56   saved rp
+       # +64   tmp[num+1]
+       #
+       mov     $n0, 32(%rsp)           # save *n0
+       mov     %rax,40(%rsp)           # save original %rsp
+.Lmulx4x_body:
+       call    mulx4x_internal
+
+       mov     40(%rsp),%rsi           # restore %rsp
+       mov     \$1,%rax
+___
+$code.=<<___ if ($win64);
+       movaps  -88(%rsi),%xmm6
+       movaps  -72(%rsi),%xmm7
+___
+$code.=<<___;
+       mov     -48(%rsi),%r15
+       mov     -40(%rsi),%r14
+       mov     -32(%rsi),%r13
+       mov     -24(%rsi),%r12
+       mov     -16(%rsi),%rbp
+       mov     -8(%rsi),%rbx
+       lea     (%rsi),%rsp
+.Lmulx4x_epilogue:
+       ret
+.size  bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5
+
+.type  mulx4x_internal,\@abi-omnipotent
+.align 32
+mulx4x_internal:
+       .byte   0x4c,0x89,0x8c,0x24,0x08,0x00,0x00,0x00 # mov   $num,8(%rsp)            # save -$num
+       .byte   0x67
+       neg     $num                    # restore $num
+       shl     \$5,$num
+       lea     256($bp,$num),%r13
+       shr     \$5+5,$num
+       mov     `($win64?56:8)`(%rax),%r10d     # load 7th argument
+       sub     \$1,$num
+       mov     %r13,16+8(%rsp)         # end of b[num]
+       mov     $num,24+8(%rsp)         # inner counter
+       mov     $rp, 56+8(%rsp)         # save $rp
+___
+my ($aptr, $bptr, $nptr, $tptr, $mi,  $bi,  $zero, $num)=
+   ("%rsi","%rdi","%rcx","%rbx","%r8","%r9","%rbp","%rax");
+my $rptr=$bptr;
+my $STRIDE=2**5*8;             # 5 is "window size"
+my $N=$STRIDE/4;               # should match cache line size
+$code.=<<___;
+       mov     %r10,%r11
+       shr     \$`log($N/8)/log(2)`,%r10
+       and     \$`$N/8-1`,%r11
+       not     %r10
+       lea     .Lmagic_masks(%rip),%rax
+       and     \$`2**5/($N/8)-1`,%r10  # 5 is "window size"
+       lea     96($bp,%r11,8),$bptr    # pointer within 1st cache line
+       movq    0(%rax,%r10,8),%xmm4    # set of masks denoting which
+       movq    8(%rax,%r10,8),%xmm5    # cache line contains element
+       add     \$7,%r11
+       movq    16(%rax,%r10,8),%xmm6   # denoted by 7th argument
+       movq    24(%rax,%r10,8),%xmm7
+       and     \$7,%r11
+
+       movq    `0*$STRIDE/4-96`($bptr),%xmm0
+       lea     $STRIDE($bptr),$tptr    # borrow $tptr
+       movq    `1*$STRIDE/4-96`($bptr),%xmm1
+       pand    %xmm4,%xmm0
+       movq    `2*$STRIDE/4-96`($bptr),%xmm2
+       pand    %xmm5,%xmm1
+       movq    `3*$STRIDE/4-96`($bptr),%xmm3
+       pand    %xmm6,%xmm2
+       por     %xmm1,%xmm0
+       movq    `0*$STRIDE/4-96`($tptr),%xmm1
+       pand    %xmm7,%xmm3
+       por     %xmm2,%xmm0
+       movq    `1*$STRIDE/4-96`($tptr),%xmm2
+       por     %xmm3,%xmm0
+       .byte   0x67,0x67
+       pand    %xmm4,%xmm1
+       movq    `2*$STRIDE/4-96`($tptr),%xmm3
+
+       movq    %xmm0,%rdx              # bp[0]
+       movq    `3*$STRIDE/4-96`($tptr),%xmm0
+       lea     2*$STRIDE($bptr),$bptr  # next &b[i]
+       pand    %xmm5,%xmm2
+       .byte   0x67,0x67
+       pand    %xmm6,%xmm3
+       ##############################################################
+       # $tptr is chosen so that writing to top-most element of the
+       # vector occurs just "above" references to powers table,
+       # "above" modulo cache-line size, which effectively precludes
+       # possibility of memory disambiguation logic failure when
+       # accessing the table.
+       # 
+       lea     64+8*4+8(%rsp,%r11,8),$tptr
+
+       mov     %rdx,$bi
+       mulx    0*8($aptr),$mi,%rax     # a[0]*b[0]
+       mulx    1*8($aptr),%r11,%r12    # a[1]*b[0]
+       add     %rax,%r11
+       mulx    2*8($aptr),%rax,%r13    # ...
+       adc     %rax,%r12
+       adc     \$0,%r13
+       mulx    3*8($aptr),%rax,%r14
+
+       mov     $mi,%r15
+       imulq   32+8(%rsp),$mi          # "t[0]"*n0
+       xor     $zero,$zero             # cf=0, of=0
+       mov     $mi,%rdx
+
+       por     %xmm2,%xmm1
+       pand    %xmm7,%xmm0
+       por     %xmm3,%xmm1
+       mov     $bptr,8+8(%rsp)         # off-load &b[i]
+       por     %xmm1,%xmm0
+
+       .byte   0x48,0x8d,0xb6,0x20,0x00,0x00,0x00      # lea   4*8($aptr),$aptr
+       adcx    %rax,%r13
+       adcx    $zero,%r14              # cf=0
+
+       mulx    0*16($nptr),%rax,%r10
+       adcx    %rax,%r15               # discarded
+       adox    %r11,%r10
+       mulx    1*16($nptr),%rax,%r11
+       adcx    %rax,%r10
+       adox    %r12,%r11
+       mulx    2*16($nptr),%rax,%r12
+       mov     24+8(%rsp),$bptr        # counter value
+       .byte   0x66
+       mov     %r10,-8*4($tptr)
+       adcx    %rax,%r11
+       adox    %r13,%r12
+       mulx    3*16($nptr),%rax,%r15
+        .byte  0x67,0x67
+        mov    $bi,%rdx
+       mov     %r11,-8*3($tptr)
+       adcx    %rax,%r12
+       adox    $zero,%r15              # of=0
+       .byte   0x48,0x8d,0x89,0x40,0x00,0x00,0x00      # lea   4*16($nptr),$nptr
+       mov     %r12,-8*2($tptr)
+       #jmp    .Lmulx4x_1st
+
+.align 32
+.Lmulx4x_1st:
+       adcx    $zero,%r15              # cf=0, modulo-scheduled
+       mulx    0*8($aptr),%r10,%rax    # a[4]*b[0]
+       adcx    %r14,%r10
+       mulx    1*8($aptr),%r11,%r14    # a[5]*b[0]
+       adcx    %rax,%r11
+       mulx    2*8($aptr),%r12,%rax    # ...
+       adcx    %r14,%r12
+       mulx    3*8($aptr),%r13,%r14
+        .byte  0x67,0x67
+        mov    $mi,%rdx
+       adcx    %rax,%r13
+       adcx    $zero,%r14              # cf=0
+       lea     4*8($aptr),$aptr
+       lea     4*8($tptr),$tptr
+
+       adox    %r15,%r10
+       mulx    0*16($nptr),%rax,%r15
+       adcx    %rax,%r10
+       adox    %r15,%r11
+       mulx    1*16($nptr),%rax,%r15
+       adcx    %rax,%r11
+       adox    %r15,%r12
+       mulx    2*16($nptr),%rax,%r15
+       mov     %r10,-5*8($tptr)
+       adcx    %rax,%r12
+       mov     %r11,-4*8($tptr)
+       adox    %r15,%r13
+       mulx    3*16($nptr),%rax,%r15
+        mov    $bi,%rdx
+       mov     %r12,-3*8($tptr)
+       adcx    %rax,%r13
+       adox    $zero,%r15
+       lea     4*16($nptr),$nptr
+       mov     %r13,-2*8($tptr)
+
+       dec     $bptr                   # of=0, pass cf
+       jnz     .Lmulx4x_1st
+
+       mov     8(%rsp),$num            # load -num
+       movq    %xmm0,%rdx              # bp[1]
+       adc     $zero,%r15              # modulo-scheduled
+       lea     ($aptr,$num),$aptr      # rewind $aptr
+       add     %r15,%r14
+       mov     8+8(%rsp),$bptr         # re-load &b[i]
+       adc     $zero,$zero             # top-most carry
+       mov     %r14,-1*8($tptr)
+       jmp     .Lmulx4x_outer
+
+.align 32
+.Lmulx4x_outer:
+       mov     $zero,($tptr)           # save top-most carry
+       lea     4*8($tptr,$num),$tptr   # rewind $tptr
+       mulx    0*8($aptr),$mi,%r11     # a[0]*b[i]
+       xor     $zero,$zero             # cf=0, of=0
+       mov     %rdx,$bi
+       mulx    1*8($aptr),%r14,%r12    # a[1]*b[i]
+       adox    -4*8($tptr),$mi         # +t[0]
+       adcx    %r14,%r11
+       mulx    2*8($aptr),%r15,%r13    # ...
+       adox    -3*8($tptr),%r11
+       adcx    %r15,%r12
+       mulx    3*8($aptr),%rdx,%r14
+       adox    -2*8($tptr),%r12
+       adcx    %rdx,%r13
+       lea     ($nptr,$num,2),$nptr    # rewind $nptr
+       lea     4*8($aptr),$aptr
+       adox    -1*8($tptr),%r13
+       adcx    $zero,%r14
+       adox    $zero,%r14
+
+       .byte   0x67
+       mov     $mi,%r15
+       imulq   32+8(%rsp),$mi          # "t[0]"*n0
+
+       movq    `0*$STRIDE/4-96`($bptr),%xmm0
+       .byte   0x67,0x67
+       mov     $mi,%rdx
+       movq    `1*$STRIDE/4-96`($bptr),%xmm1
+       .byte   0x67
+       pand    %xmm4,%xmm0
+       movq    `2*$STRIDE/4-96`($bptr),%xmm2
+       .byte   0x67
+       pand    %xmm5,%xmm1
+       movq    `3*$STRIDE/4-96`($bptr),%xmm3
+       add     \$$STRIDE,$bptr         # next &b[i]
+       .byte   0x67
+       pand    %xmm6,%xmm2
+       por     %xmm1,%xmm0
+       pand    %xmm7,%xmm3
+       xor     $zero,$zero             # cf=0, of=0
+       mov     $bptr,8+8(%rsp)         # off-load &b[i]
+
+       mulx    0*16($nptr),%rax,%r10
+       adcx    %rax,%r15               # discarded
+       adox    %r11,%r10
+       mulx    1*16($nptr),%rax,%r11
+       adcx    %rax,%r10
+       adox    %r12,%r11
+       mulx    2*16($nptr),%rax,%r12
+       adcx    %rax,%r11
+       adox    %r13,%r12
+       mulx    3*16($nptr),%rax,%r15
+        mov    $bi,%rdx
+        por    %xmm2,%xmm0
+       mov     24+8(%rsp),$bptr        # counter value
+       mov     %r10,-8*4($tptr)
+        por    %xmm3,%xmm0
+       adcx    %rax,%r12
+       mov     %r11,-8*3($tptr)
+       adox    $zero,%r15              # of=0
+       mov     %r12,-8*2($tptr)
+       lea     4*16($nptr),$nptr
+       jmp     .Lmulx4x_inner
+
+.align 32
+.Lmulx4x_inner:
+       mulx    0*8($aptr),%r10,%rax    # a[4]*b[i]
+       adcx    $zero,%r15              # cf=0, modulo-scheduled
+       adox    %r14,%r10
+       mulx    1*8($aptr),%r11,%r14    # a[5]*b[i]
+       adcx    0*8($tptr),%r10
+       adox    %rax,%r11
+       mulx    2*8($aptr),%r12,%rax    # ...
+       adcx    1*8($tptr),%r11
+       adox    %r14,%r12
+       mulx    3*8($aptr),%r13,%r14
+        mov    $mi,%rdx
+       adcx    2*8($tptr),%r12
+       adox    %rax,%r13
+       adcx    3*8($tptr),%r13
+       adox    $zero,%r14              # of=0
+       lea     4*8($aptr),$aptr
+       lea     4*8($tptr),$tptr
+       adcx    $zero,%r14              # cf=0
+
+       adox    %r15,%r10
+       mulx    0*16($nptr),%rax,%r15
+       adcx    %rax,%r10
+       adox    %r15,%r11
+       mulx    1*16($nptr),%rax,%r15
+       adcx    %rax,%r11
+       adox    %r15,%r12
+       mulx    2*16($nptr),%rax,%r15
+       mov     %r10,-5*8($tptr)
+       adcx    %rax,%r12
+       adox    %r15,%r13
+       mov     %r11,-4*8($tptr)
+       mulx    3*16($nptr),%rax,%r15
+        mov    $bi,%rdx
+       lea     4*16($nptr),$nptr
+       mov     %r12,-3*8($tptr)
+       adcx    %rax,%r13
+       adox    $zero,%r15
+       mov     %r13,-2*8($tptr)
+
+       dec     $bptr                   # of=0, pass cf
+       jnz     .Lmulx4x_inner
+
+       mov     0+8(%rsp),$num          # load -num
+       movq    %xmm0,%rdx              # bp[i+1]
+       adc     $zero,%r15              # modulo-scheduled
+       sub     0*8($tptr),$bptr        # pull top-most carry to %cf
+       mov     8+8(%rsp),$bptr         # re-load &b[i]
+       mov     16+8(%rsp),%r10
+       adc     %r15,%r14
+       lea     ($aptr,$num),$aptr      # rewind $aptr
+       adc     $zero,$zero             # top-most carry
+       mov     %r14,-1*8($tptr)
+
+       cmp     %r10,$bptr
+       jb      .Lmulx4x_outer
+
+       mov     -16($nptr),%r10
+       xor     %r15,%r15
+       sub     %r14,%r10               # compare top-most words
+       adc     %r15,%r15
+       or      %r15,$zero
+       xor     \$1,$zero
+       lea     ($tptr,$num),%rdi       # rewind $tptr
+       lea     ($nptr,$num,2),$nptr    # rewind $nptr
+       .byte   0x67,0x67
+       sar     \$3+2,$num              # cf=0
+       lea     ($nptr,$zero,8),%rbp
+       mov     56+8(%rsp),%rdx         # restore rp
+       mov     $num,%rcx
+       jmp     .Lsqrx4x_sub            # common post-condition
+.size  mulx4x_internal,.-mulx4x_internal
+___
+}\f{
+######################################################################
+# void bn_power5(
+my $rptr="%rdi";       # BN_ULONG *rptr,
+my $aptr="%rsi";       # const BN_ULONG *aptr,
+my $bptr="%rdx";       # const void *table,
+my $nptr="%rcx";       # const BN_ULONG *nptr,
+my $n0  ="%r8";                # const BN_ULONG *n0);
+my $num ="%r9";                # int num, has to be divisible by 8
+                       # int pwr);
+
+my ($i,$j,$tptr)=("%rbp","%rcx",$rptr);
+my @A0=("%r10","%r11");
+my @A1=("%r12","%r13");
+my ($a0,$a1,$ai)=("%r14","%r15","%rbx");
+
+$code.=<<___;
+.type  bn_powerx5,\@function,6
+.align 32
+bn_powerx5:
+.Lpowerx5_enter:
+       .byte   0x67
+       mov     %rsp,%rax
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+___
+$code.=<<___ if ($win64);
+       lea     -0x28(%rsp),%rsp
+       movaps  %xmm6,(%rsp)
+       movaps  %xmm7,0x10(%rsp)
+___
+$code.=<<___;
+       .byte   0x67
+       mov     ${num}d,%r10d
+       shl     \$3,${num}d             # convert $num to bytes
+       shl     \$3+2,%r10d             # 4*$num
+       neg     $num
+       mov     ($n0),$n0               # *n0
+
+       ##############################################################
+       # ensure that stack frame doesn't alias with $aptr+4*$num
+       # modulo 4096, which covers ret[num], am[num] and n[2*num]
+       # (see bn_exp.c). this is done to allow memory disambiguation
+       # logic do its magic.
+       #
+       lea     -64(%rsp,$num,2),%r11
+       sub     $aptr,%r11
+       and     \$4095,%r11
+       cmp     %r11,%r10
+       jb      .Lpwrx_sp_alt
+       sub     %r11,%rsp               # align with $aptr
+       lea     -64(%rsp,$num,2),%rsp   # alloca(frame+2*$num)
+       jmp     .Lpwrx_sp_done
+
+.align 32
+.Lpwrx_sp_alt:
+       lea     4096-64(,$num,2),%r10   # 4096-frame-2*$num
+       lea     -64(%rsp,$num,2),%rsp   # alloca(frame+2*$num)
+       sub     %r10,%r11
+       mov     \$0,%r10
+       cmovc   %r10,%r11
+       sub     %r11,%rsp
+.Lpwrx_sp_done:
+       and     \$-64,%rsp
+       mov     $num,%r10       
+       neg     $num
+
+       ##############################################################
+       # Stack layout
+       #
+       # +0    saved $num, used in reduction section
+       # +8    &t[2*$num], used in reduction section
+       # +16   intermediate carry bit
+       # +24   top-most carry bit, used in reduction section
+       # +32   saved *n0
+       # +40   saved %rsp
+       # +48   t[2*$num]
+       #
+       pxor    %xmm0,%xmm0
+       movq    $rptr,%xmm1             # save $rptr
+       movq    $nptr,%xmm2             # save $nptr
+       movq    %r10, %xmm3             # -$num
+       movq    $bptr,%xmm4
+       mov     $n0,  32(%rsp)
+       mov     %rax, 40(%rsp)          # save original %rsp
+.Lpowerx5_body:
+
+       call    __bn_sqrx8x_internal
+       call    __bn_sqrx8x_internal
+       call    __bn_sqrx8x_internal
+       call    __bn_sqrx8x_internal
+       call    __bn_sqrx8x_internal
+
+       mov     %r10,$num               # -num
+       mov     $aptr,$rptr
+       movq    %xmm2,$nptr
+       movq    %xmm4,$bptr
+       mov     40(%rsp),%rax
+
+       call    mulx4x_internal
+
+       mov     40(%rsp),%rsi           # restore %rsp
+       mov     \$1,%rax
+___
+$code.=<<___ if ($win64);
+       movaps  -88(%rsi),%xmm6
+       movaps  -72(%rsi),%xmm7
+___
+$code.=<<___;
+       mov     -48(%rsi),%r15
+       mov     -40(%rsi),%r14
+       mov     -32(%rsi),%r13
+       mov     -24(%rsi),%r12
+       mov     -16(%rsi),%rbp
+       mov     -8(%rsi),%rbx
+       lea     (%rsi),%rsp
+.Lpowerx5_epilogue:
+       ret
+.size  bn_powerx5,.-bn_powerx5
+
+.globl bn_sqrx8x_internal
+.hidden        bn_sqrx8x_internal
+.type  bn_sqrx8x_internal,\@abi-omnipotent
+.align 32
+bn_sqrx8x_internal:
+__bn_sqrx8x_internal:
+       ##################################################################
+       # Squaring part:
+       #
+       # a) multiply-n-add everything but a[i]*a[i];
+       # b) shift result of a) by 1 to the left and accumulate
+       #    a[i]*a[i] products;
+       #
+       ##################################################################
+       # a[7]a[7]a[6]a[6]a[5]a[5]a[4]a[4]a[3]a[3]a[2]a[2]a[1]a[1]a[0]a[0]
+       #                                                     a[1]a[0]
+       #                                                 a[2]a[0]
+       #                                             a[3]a[0]
+       #                                             a[2]a[1]
+       #                                         a[3]a[1]
+       #                                     a[3]a[2]
+       #
+       #                                         a[4]a[0]
+       #                                     a[5]a[0]
+       #                                 a[6]a[0]
+       #                             a[7]a[0]
+       #                                     a[4]a[1]
+       #                                 a[5]a[1]
+       #                             a[6]a[1]
+       #                         a[7]a[1]
+       #                                 a[4]a[2]
+       #                             a[5]a[2]
+       #                         a[6]a[2]
+       #                     a[7]a[2]
+       #                             a[4]a[3]
+       #                         a[5]a[3]
+       #                     a[6]a[3]
+       #                 a[7]a[3]
+       #
+       #                     a[5]a[4]
+       #                 a[6]a[4]
+       #             a[7]a[4]
+       #             a[6]a[5]
+       #         a[7]a[5]
+       #     a[7]a[6]
+       # a[7]a[7]a[6]a[6]a[5]a[5]a[4]a[4]a[3]a[3]a[2]a[2]a[1]a[1]a[0]a[0]
+___
+{
+my ($zero,$carry)=("%rbp","%rcx");
+my $aaptr=$zero;
+$code.=<<___;
+       lea     48+8(%rsp),$tptr
+       lea     ($aptr,$num),$aaptr
+       mov     $num,0+8(%rsp)                  # save $num
+       mov     $aaptr,8+8(%rsp)                # save end of $aptr
+       jmp     .Lsqr8x_zero_start
+
+.align 32
+.byte  0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00
+.Lsqrx8x_zero:
+       .byte   0x3e
+       movdqa  %xmm0,0*8($tptr)
+       movdqa  %xmm0,2*8($tptr)
+       movdqa  %xmm0,4*8($tptr)
+       movdqa  %xmm0,6*8($tptr)
+.Lsqr8x_zero_start:                    # aligned at 32
+       movdqa  %xmm0,8*8($tptr)
+       movdqa  %xmm0,10*8($tptr)
+       movdqa  %xmm0,12*8($tptr)
+       movdqa  %xmm0,14*8($tptr)
+       lea     16*8($tptr),$tptr
+       sub     \$64,$num
+       jnz     .Lsqrx8x_zero
+
+       mov     0*8($aptr),%rdx         # a[0], modulo-scheduled
+       #xor    %r9,%r9                 # t[1], ex-$num, zero already
+       xor     %r10,%r10
+       xor     %r11,%r11
+       xor     %r12,%r12
+       xor     %r13,%r13
+       xor     %r14,%r14
+       xor     %r15,%r15
+       lea     48+8(%rsp),$tptr
+       xor     $zero,$zero             # cf=0, cf=0
+       jmp     .Lsqrx8x_outer_loop
+
+.align 32
+.Lsqrx8x_outer_loop:
+       mulx    1*8($aptr),%r8,%rax     # a[1]*a[0]
+       adcx    %r9,%r8                 # a[1]*a[0]+=t[1]
+       adox    %rax,%r10
+       mulx    2*8($aptr),%r9,%rax     # a[2]*a[0]
+       adcx    %r10,%r9
+       adox    %rax,%r11
+       .byte   0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00    # mulx  3*8($aptr),%r10,%rax    # ...
+       adcx    %r11,%r10
+       adox    %rax,%r12
+       .byte   0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00    # mulx  4*8($aptr),%r11,%rax
+       adcx    %r12,%r11
+       adox    %rax,%r13
+       mulx    5*8($aptr),%r12,%rax
+       adcx    %r13,%r12
+       adox    %rax,%r14
+       mulx    6*8($aptr),%r13,%rax
+       adcx    %r14,%r13
+       adox    %r15,%rax
+       mulx    7*8($aptr),%r14,%r15
+        mov    1*8($aptr),%rdx         # a[1]
+       adcx    %rax,%r14
+       adox    $zero,%r15
+       adc     8*8($tptr),%r15
+       mov     %r8,1*8($tptr)          # t[1]
+       mov     %r9,2*8($tptr)          # t[2]
+       sbb     $carry,$carry           # mov %cf,$carry
+       xor     $zero,$zero             # cf=0, of=0
+
+
+       mulx    2*8($aptr),%r8,%rbx     # a[2]*a[1]
+       mulx    3*8($aptr),%r9,%rax     # a[3]*a[1]
+       adcx    %r10,%r8
+       adox    %rbx,%r9
+       mulx    4*8($aptr),%r10,%rbx    # ...
+       adcx    %r11,%r9
+       adox    %rax,%r10
+       .byte   0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00    # mulx  5*8($aptr),%r11,%rax
+       adcx    %r12,%r10
+       adox    %rbx,%r11
+       .byte   0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00    # mulx  6*8($aptr),%r12,%rbx
+       adcx    %r13,%r11
+       adox    %r14,%r12
+       .byte   0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00    # mulx  7*8($aptr),%r13,%r14
+        mov    2*8($aptr),%rdx         # a[2]
+       adcx    %rax,%r12
+       adox    %rbx,%r13
+       adcx    %r15,%r13
+       adox    $zero,%r14              # of=0
+       adcx    $zero,%r14              # cf=0
+
+       mov     %r8,3*8($tptr)          # t[3]
+       mov     %r9,4*8($tptr)          # t[4]
+
+       mulx    3*8($aptr),%r8,%rbx     # a[3]*a[2]
+       mulx    4*8($aptr),%r9,%rax     # a[4]*a[2]
+       adcx    %r10,%r8
+       adox    %rbx,%r9
+       mulx    5*8($aptr),%r10,%rbx    # ...
+       adcx    %r11,%r9
+       adox    %rax,%r10
+       .byte   0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00    # mulx  6*8($aptr),%r11,%rax
+       adcx    %r12,%r10
+       adox    %r13,%r11
+       .byte   0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00    # mulx  7*8($aptr),%r12,%r13
+       .byte   0x3e
+        mov    3*8($aptr),%rdx         # a[3]
+       adcx    %rbx,%r11
+       adox    %rax,%r12
+       adcx    %r14,%r12
+       mov     %r8,5*8($tptr)          # t[5]
+       mov     %r9,6*8($tptr)          # t[6]
+        mulx   4*8($aptr),%r8,%rax     # a[4]*a[3]
+       adox    $zero,%r13              # of=0
+       adcx    $zero,%r13              # cf=0
+
+       mulx    5*8($aptr),%r9,%rbx     # a[5]*a[3]
+       adcx    %r10,%r8
+       adox    %rax,%r9
+       mulx    6*8($aptr),%r10,%rax    # ...
+       adcx    %r11,%r9
+       adox    %r12,%r10
+       mulx    7*8($aptr),%r11,%r12
+        mov    4*8($aptr),%rdx         # a[4]
+        mov    5*8($aptr),%r14         # a[5]
+       adcx    %rbx,%r10
+       adox    %rax,%r11
+        mov    6*8($aptr),%r15         # a[6]
+       adcx    %r13,%r11
+       adox    $zero,%r12              # of=0
+       adcx    $zero,%r12              # cf=0
+
+       mov     %r8,7*8($tptr)          # t[7]
+       mov     %r9,8*8($tptr)          # t[8]
+
+       mulx    %r14,%r9,%rax           # a[5]*a[4]
+        mov    7*8($aptr),%r8          # a[7]
+       adcx    %r10,%r9
+       mulx    %r15,%r10,%rbx          # a[6]*a[4]
+       adox    %rax,%r10
+       adcx    %r11,%r10
+       mulx    %r8,%r11,%rax           # a[7]*a[4]
+        mov    %r14,%rdx               # a[5]
+       adox    %rbx,%r11
+       adcx    %r12,%r11
+       #adox   $zero,%rax              # of=0
+       adcx    $zero,%rax              # cf=0
+
+       mulx    %r15,%r14,%rbx          # a[6]*a[5]
+       mulx    %r8,%r12,%r13           # a[7]*a[5]
+        mov    %r15,%rdx               # a[6]
+        lea    8*8($aptr),$aptr
+       adcx    %r14,%r11
+       adox    %rbx,%r12
+       adcx    %rax,%r12
+       adox    $zero,%r13
+
+       .byte   0x67,0x67
+       mulx    %r8,%r8,%r14            # a[7]*a[6]
+       adcx    %r8,%r13
+       adcx    $zero,%r14
+
+       cmp     8+8(%rsp),$aptr
+       je      .Lsqrx8x_outer_break
+
+       neg     $carry                  # mov $carry,%cf
+       mov     \$-8,%rcx
+       mov     $zero,%r15
+       mov     8*8($tptr),%r8
+       adcx    9*8($tptr),%r9          # +=t[9]
+       adcx    10*8($tptr),%r10        # ...
+       adcx    11*8($tptr),%r11
+       adc     12*8($tptr),%r12
+       adc     13*8($tptr),%r13
+       adc     14*8($tptr),%r14
+       adc     15*8($tptr),%r15
+       lea     ($aptr),$aaptr
+       lea     2*64($tptr),$tptr
+       sbb     %rax,%rax               # mov %cf,$carry
+
+       mov     -64($aptr),%rdx         # a[0]
+       mov     %rax,16+8(%rsp)         # offload $carry
+       mov     $tptr,24+8(%rsp)
+
+       #lea    8*8($tptr),$tptr        # see 2*8*8($tptr) above
+       xor     %eax,%eax               # cf=0, of=0
+       jmp     .Lsqrx8x_loop
+
+.align 32
+.Lsqrx8x_loop:
+       mov     %r8,%rbx
+       mulx    0*8($aaptr),%rax,%r8    # a[8]*a[i]
+       adcx    %rax,%rbx               # +=t[8]
+       adox    %r9,%r8
+
+       mulx    1*8($aaptr),%rax,%r9    # ...
+       adcx    %rax,%r8
+       adox    %r10,%r9
+
+       mulx    2*8($aaptr),%rax,%r10
+       adcx    %rax,%r9
+       adox    %r11,%r10
+
+       mulx    3*8($aaptr),%rax,%r11
+       adcx    %rax,%r10
+       adox    %r12,%r11
+
+       .byte   0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00    # mulx  4*8($aaptr),%rax,%r12
+       adcx    %rax,%r11
+       adox    %r13,%r12
+
+       mulx    5*8($aaptr),%rax,%r13
+       adcx    %rax,%r12
+       adox    %r14,%r13
+
+       mulx    6*8($aaptr),%rax,%r14
+        mov    %rbx,($tptr,%rcx,8)     # store t[8+i]
+        mov    \$0,%ebx
+       adcx    %rax,%r13
+       adox    %r15,%r14
+
+       .byte   0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00    # mulx  7*8($aaptr),%rax,%r15
+        mov    8($aptr,%rcx,8),%rdx    # a[i]
+       adcx    %rax,%r14
+       adox    %rbx,%r15               # %rbx is 0, of=0
+       adcx    %rbx,%r15               # cf=0
+
+       .byte   0x67
+       inc     %rcx                    # of=0
+       jnz     .Lsqrx8x_loop
+
+       lea     8*8($aaptr),$aaptr
+       mov     \$-8,%rcx
+       cmp     8+8(%rsp),$aaptr        # done?
+       je      .Lsqrx8x_break
+
+       sub     16+8(%rsp),%rbx         # mov 16(%rsp),%cf
+       .byte   0x66
+       mov     -64($aptr),%rdx
+       adcx    0*8($tptr),%r8
+       adcx    1*8($tptr),%r9
+       adc     2*8($tptr),%r10
+       adc     3*8($tptr),%r11
+       adc     4*8($tptr),%r12
+       adc     5*8($tptr),%r13
+       adc     6*8($tptr),%r14
+       adc     7*8($tptr),%r15
+       lea     8*8($tptr),$tptr
+       .byte   0x67
+       sbb     %rax,%rax               # mov %cf,%rax
+       xor     %ebx,%ebx               # cf=0, of=0
+       mov     %rax,16+8(%rsp)         # offload carry
+       jmp     .Lsqrx8x_loop
+
+.align 32
+.Lsqrx8x_break:
+       sub     16+8(%rsp),%r8          # consume last carry
+       mov     24+8(%rsp),$carry       # initial $tptr, borrow $carry
+       mov     0*8($aptr),%rdx         # a[8], modulo-scheduled
+       xor     %ebp,%ebp               # xor   $zero,$zero
+       mov     %r8,0*8($tptr)
+       cmp     $carry,$tptr            # cf=0, of=0
+       je      .Lsqrx8x_outer_loop
+
+       mov     %r9,1*8($tptr)
+        mov    1*8($carry),%r9
+       mov     %r10,2*8($tptr)
+        mov    2*8($carry),%r10
+       mov     %r11,3*8($tptr)
+        mov    3*8($carry),%r11
+       mov     %r12,4*8($tptr)
+        mov    4*8($carry),%r12
+       mov     %r13,5*8($tptr)
+        mov    5*8($carry),%r13
+       mov     %r14,6*8($tptr)
+        mov    6*8($carry),%r14
+       mov     %r15,7*8($tptr)
+        mov    7*8($carry),%r15
+       mov     $carry,$tptr
+       jmp     .Lsqrx8x_outer_loop
+
+.align 32
+.Lsqrx8x_outer_break:
+       mov     %r9,9*8($tptr)          # t[9]
+        movq   %xmm3,%rcx              # -$num
+       mov     %r10,10*8($tptr)        # ...
+       mov     %r11,11*8($tptr)
+       mov     %r12,12*8($tptr)
+       mov     %r13,13*8($tptr)
+       mov     %r14,14*8($tptr)
+___
+}\f{
+my $i="%rcx";
+$code.=<<___;
+       lea     48+8(%rsp),$tptr
+       mov     ($aptr,$i),%rdx         # a[0]
+
+       mov     8($tptr),$A0[1]         # t[1]
+       xor     $A0[0],$A0[0]           # t[0], of=0, cf=0
+       mov     0+8(%rsp),$num          # restore $num
+       adox    $A0[1],$A0[1]
+        mov    16($tptr),$A1[0]        # t[2]  # prefetch
+        mov    24($tptr),$A1[1]        # t[3]  # prefetch
+       #jmp    .Lsqrx4x_shift_n_add    # happens to be aligned
+
+.align 32
+.Lsqrx4x_shift_n_add:
+       mulx    %rdx,%rax,%rbx
+        adox   $A1[0],$A1[0]
+       adcx    $A0[0],%rax
+        .byte  0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00 # mov   8($aptr,$i),%rdx        # a[i+1]        # prefetch
+        .byte  0x4c,0x8b,0x97,0x20,0x00,0x00,0x00      # mov   32($tptr),$A0[0]        # t[2*i+4]      # prefetch
+        adox   $A1[1],$A1[1]
+       adcx    $A0[1],%rbx
+        mov    40($tptr),$A0[1]                # t[2*i+4+1]    # prefetch
+       mov     %rax,0($tptr)
+       mov     %rbx,8($tptr)
+
+       mulx    %rdx,%rax,%rbx
+        adox   $A0[0],$A0[0]
+       adcx    $A1[0],%rax
+        mov    16($aptr,$i),%rdx       # a[i+2]        # prefetch
+        mov    48($tptr),$A1[0]        # t[2*i+6]      # prefetch
+        adox   $A0[1],$A0[1]
+       adcx    $A1[1],%rbx
+        mov    56($tptr),$A1[1]        # t[2*i+6+1]    # prefetch
+       mov     %rax,16($tptr)
+       mov     %rbx,24($tptr)
+
+       mulx    %rdx,%rax,%rbx
+        adox   $A1[0],$A1[0]
+       adcx    $A0[0],%rax
+        mov    24($aptr,$i),%rdx       # a[i+3]        # prefetch
+        lea    32($i),$i
+        mov    64($tptr),$A0[0]        # t[2*i+8]      # prefetch
+        adox   $A1[1],$A1[1]
+       adcx    $A0[1],%rbx
+        mov    72($tptr),$A0[1]        # t[2*i+8+1]    # prefetch
+       mov     %rax,32($tptr)
+       mov     %rbx,40($tptr)
+
+       mulx    %rdx,%rax,%rbx
+        adox   $A0[0],$A0[0]
+       adcx    $A1[0],%rax
+       jrcxz   .Lsqrx4x_shift_n_add_break
+        .byte  0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00 # mov   0($aptr,$i),%rdx        # a[i+4]        # prefetch
+        adox   $A0[1],$A0[1]
+       adcx    $A1[1],%rbx
+        mov    80($tptr),$A1[0]        # t[2*i+10]     # prefetch
+        mov    88($tptr),$A1[1]        # t[2*i+10+1]   # prefetch
+       mov     %rax,48($tptr)
+       mov     %rbx,56($tptr)
+       lea     64($tptr),$tptr
+       nop
+       jmp     .Lsqrx4x_shift_n_add
+
+.align 32
+.Lsqrx4x_shift_n_add_break:
+       adcx    $A1[1],%rbx
+       mov     %rax,48($tptr)
+       mov     %rbx,56($tptr)
+       lea     64($tptr),$tptr         # end of t[] buffer
+___
+}\f
+######################################################################
+# Montgomery reduction part, "word-by-word" algorithm.
+#
+# This new path is inspired by multiple submissions from Intel, by
+# Shay Gueron, Vlad Krasnov, Erdinc Ozturk, James Guilford,
+# Vinodh Gopal...
+{
+my ($nptr,$carry,$m0)=("%rbp","%rsi","%rdx");
+
+$code.=<<___;
+       movq    %xmm2,$nptr
+sqrx8x_reduction:
+       xor     %eax,%eax               # initial top-most carry bit
+       mov     32+8(%rsp),%rbx         # n0
+       mov     48+8(%rsp),%rdx         # "%r8", 8*0($tptr)
+       lea     -128($nptr,$num,2),%rcx # end of n[]
+       #lea    48+8(%rsp,$num,2),$tptr # end of t[] buffer
+       mov     %rcx, 0+8(%rsp)         # save end of n[]
+       mov     $tptr,8+8(%rsp)         # save end of t[]
+
+       lea     48+8(%rsp),$tptr                # initial t[] window
+       jmp     .Lsqrx8x_reduction_loop
+
+.align 32
+.Lsqrx8x_reduction_loop:
+       mov     8*1($tptr),%r9
+       mov     8*2($tptr),%r10
+       mov     8*3($tptr),%r11
+       mov     8*4($tptr),%r12
+       mov     %rdx,%r8
+       imulq   %rbx,%rdx               # n0*a[i]
+       mov     8*5($tptr),%r13
+       mov     8*6($tptr),%r14
+       mov     8*7($tptr),%r15
+       mov     %rax,24+8(%rsp)         # store top-most carry bit
+
+       lea     8*8($tptr),$tptr
+       xor     $carry,$carry           # cf=0,of=0
+       mov     \$-8,%rcx
+       jmp     .Lsqrx8x_reduce
+
+.align 32
+.Lsqrx8x_reduce:
+       mov     %r8, %rbx
+       mulx    16*0($nptr),%rax,%r8    # n[0]
+       adcx    %rbx,%rax               # discarded
+       adox    %r9,%r8
+
+       mulx    16*1($nptr),%rbx,%r9    # n[1]
+       adcx    %rbx,%r8
+       adox    %r10,%r9
+
+       mulx    16*2($nptr),%rbx,%r10
+       adcx    %rbx,%r9
+       adox    %r11,%r10
+
+       mulx    16*3($nptr),%rbx,%r11
+       adcx    %rbx,%r10
+       adox    %r12,%r11
+
+       .byte   0xc4,0x62,0xe3,0xf6,0xa5,0x40,0x00,0x00,0x00    # mulx  16*4($nptr),%rbx,%r12
+        mov    %rdx,%rax
+        mov    %r8,%rdx
+       adcx    %rbx,%r11
+       adox    %r13,%r12
+
+        mulx   32+8(%rsp),%rbx,%rdx    # %rdx discarded
+        mov    %rax,%rdx
+        mov    %rax,64+48+8(%rsp,%rcx,8)       # put aside n0*a[i]
+
+       mulx    16*5($nptr),%rax,%r13
+       adcx    %rax,%r12
+       adox    %r14,%r13
+
+       mulx    16*6($nptr),%rax,%r14
+       adcx    %rax,%r13
+       adox    %r15,%r14
+
+       mulx    16*7($nptr),%rax,%r15
+        mov    %rbx,%rdx
+       adcx    %rax,%r14
+       adox    $carry,%r15             # $carry is 0
+       adcx    $carry,%r15             # cf=0
+
+       .byte   0x67,0x67,0x67
+       inc     %rcx                    # of=0
+       jnz     .Lsqrx8x_reduce
+
+       mov     $carry,%rax             # xor   %rax,%rax
+       cmp     0+8(%rsp),$nptr         # end of n[]?
+       jae     .Lsqrx8x_no_tail
+
+       mov     48+8(%rsp),%rdx         # pull n0*a[0]
+       add     8*0($tptr),%r8
+       lea     16*8($nptr),$nptr
+       mov     \$-8,%rcx
+       adcx    8*1($tptr),%r9
+       adcx    8*2($tptr),%r10
+       adc     8*3($tptr),%r11
+       adc     8*4($tptr),%r12
+       adc     8*5($tptr),%r13
+       adc     8*6($tptr),%r14
+       adc     8*7($tptr),%r15
+       lea     8*8($tptr),$tptr
+       sbb     %rax,%rax               # top carry
+
+       xor     $carry,$carry           # of=0, cf=0
+       mov     %rax,16+8(%rsp)
+       jmp     .Lsqrx8x_tail
+
+.align 32
+.Lsqrx8x_tail:
+       mov     %r8,%rbx
+       mulx    16*0($nptr),%rax,%r8
+       adcx    %rax,%rbx
+       adox    %r9,%r8
+
+       mulx    16*1($nptr),%rax,%r9
+       adcx    %rax,%r8
+       adox    %r10,%r9
+
+       mulx    16*2($nptr),%rax,%r10
+       adcx    %rax,%r9
+       adox    %r11,%r10
+
+       mulx    16*3($nptr),%rax,%r11
+       adcx    %rax,%r10
+       adox    %r12,%r11
+
+       .byte   0xc4,0x62,0xfb,0xf6,0xa5,0x40,0x00,0x00,0x00    # mulx  16*4($nptr),%rax,%r12
+       adcx    %rax,%r11
+       adox    %r13,%r12
+
+       mulx    16*5($nptr),%rax,%r13
+       adcx    %rax,%r12
+       adox    %r14,%r13
+
+       mulx    16*6($nptr),%rax,%r14
+       adcx    %rax,%r13
+       adox    %r15,%r14
+
+       mulx    16*7($nptr),%rax,%r15
+        mov    72+48+8(%rsp,%rcx,8),%rdx       # pull n0*a[i]
+       adcx    %rax,%r14
+       adox    $carry,%r15
+        mov    %rbx,($tptr,%rcx,8)     # save result
+        mov    %r8,%rbx
+       adcx    $carry,%r15             # cf=0
+
+       inc     %rcx                    # of=0
+       jnz     .Lsqrx8x_tail
+
+       cmp     0+8(%rsp),$nptr         # end of n[]?
+       jae     .Lsqrx8x_tail_done      # break out of loop
+
+       sub     16+8(%rsp),$carry       # mov 16(%rsp),%cf
+        mov    48+8(%rsp),%rdx         # pull n0*a[0]
+        lea    16*8($nptr),$nptr
+       adc     8*0($tptr),%r8
+       adc     8*1($tptr),%r9
+       adc     8*2($tptr),%r10
+       adc     8*3($tptr),%r11
+       adc     8*4($tptr),%r12
+       adc     8*5($tptr),%r13
+       adc     8*6($tptr),%r14
+       adc     8*7($tptr),%r15
+       lea     8*8($tptr),$tptr
+       sbb     %rax,%rax
+       sub     \$8,%rcx                # mov   \$-8,%rcx
+
+       xor     $carry,$carry           # of=0, cf=0
+       mov     %rax,16+8(%rsp)
+       jmp     .Lsqrx8x_tail
+
+.align 32
+.Lsqrx8x_tail_done:
+       add     24+8(%rsp),%r8          # can this overflow?
+       mov     $carry,%rax             # xor   %rax,%rax
+
+       sub     16+8(%rsp),$carry       # mov 16(%rsp),%cf
+.Lsqrx8x_no_tail:                      # %cf is 0 if jumped here
+       adc     8*0($tptr),%r8
+        movq   %xmm3,%rcx
+       adc     8*1($tptr),%r9
+        mov    16*7($nptr),$carry
+        movq   %xmm2,$nptr             # restore $nptr
+       adc     8*2($tptr),%r10
+       adc     8*3($tptr),%r11
+       adc     8*4($tptr),%r12
+       adc     8*5($tptr),%r13
+       adc     8*6($tptr),%r14
+       adc     8*7($tptr),%r15
+       adc     %rax,%rax               # top-most carry
+
+       mov     32+8(%rsp),%rbx         # n0
+       mov     8*8($tptr,%rcx),%rdx    # modulo-scheduled "%r8"
+
+       mov     %r8,8*0($tptr)          # store top 512 bits
+        lea    8*8($tptr),%r8          # borrow %r8
+       mov     %r9,8*1($tptr)
+       mov     %r10,8*2($tptr)
+       mov     %r11,8*3($tptr)
+       mov     %r12,8*4($tptr)
+       mov     %r13,8*5($tptr)
+       mov     %r14,8*6($tptr)
+       mov     %r15,8*7($tptr)
+
+       lea     8*8($tptr,%rcx),$tptr   # start of current t[] window
+       cmp     8+8(%rsp),%r8           # end of t[]?
+       jb      .Lsqrx8x_reduction_loop
+___
+}\f
+##############################################################
+# Post-condition, 4x unrolled
+#
+{
+my ($rptr,$nptr)=("%rdx","%rbp");
+my @ri=map("%r$_",(10..13));
+my @ni=map("%r$_",(14..15));
+$code.=<<___;
+       xor     %rbx,%rbx
+       sub     %r15,%rsi               # compare top-most words
+       adc     %rbx,%rbx
+       mov     %rcx,%r10               # -$num
+       .byte   0x67
+       or      %rbx,%rax
+       .byte   0x67
+       mov     %rcx,%r9                # -$num
+       xor     \$1,%rax
+       sar     \$3+2,%rcx              # cf=0
+       #lea    48+8(%rsp,%r9),$tptr
+       lea     ($nptr,%rax,8),$nptr
+       movq    %xmm1,$rptr             # restore $rptr
+       movq    %xmm1,$aptr             # prepare for back-to-back call
+       jmp     .Lsqrx4x_sub
+
+.align 32
+.Lsqrx4x_sub:
+       .byte   0x66
+       mov     8*0($tptr),%r12
+       mov     8*1($tptr),%r13
+       sbb     16*0($nptr),%r12
+       mov     8*2($tptr),%r14
+       sbb     16*1($nptr),%r13
+       mov     8*3($tptr),%r15
+       lea     8*4($tptr),$tptr
+       sbb     16*2($nptr),%r14
+       mov     %r12,8*0($rptr)
+       sbb     16*3($nptr),%r15
+       lea     16*4($nptr),$nptr
+       mov     %r13,8*1($rptr)
+       mov     %r14,8*2($rptr)
+       mov     %r15,8*3($rptr)
+       lea     8*4($rptr),$rptr
+
+       inc     %rcx
+       jnz     .Lsqrx4x_sub
+___
+}
+$code.=<<___;
+       neg     %r9                     # restore $num
 
+       ret
+.size  bn_sqrx8x_internal,.-bn_sqrx8x_internal
+___
+}}}
 {
-my ($inp,$num,$tbl,$idx)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
-                               ("%rdi","%rsi","%rdx","%rcx"); # Unix order
+my ($inp,$num,$tbl,$idx)=$win64?("%rcx","%edx","%r8", "%r9d") : # Win64 order
+                               ("%rdi","%esi","%rdx","%ecx");  # Unix order
 my $out=$inp;
 my $STRIDE=2**5*8;
 my $N=$STRIDE/4;
 
 $code.=<<___;
+.globl bn_get_bits5
+.type  bn_get_bits5,\@abi-omnipotent
+.align 16
+bn_get_bits5:
+       lea     0($inp),%r10
+       lea     1($inp),%r11
+       mov     $num,%ecx
+       shr     \$4,$num
+       and     \$15,%ecx
+       lea     -8(%ecx),%eax
+       cmp     \$11,%ecx
+       cmova   %r11,%r10
+       cmova   %eax,%ecx
+       movzw   (%r10,$num,2),%eax
+       shrl    %cl,%eax
+       and     \$31,%eax
+       ret
+.size  bn_get_bits5,.-bn_get_bits5
+
 .globl bn_scatter5
 .type  bn_scatter5,\@abi-omnipotent
 .align 16
@@ -868,13 +3272,13 @@ $code.=<<___ if ($win64);
        .byte   0x0f,0x29,0x7c,0x24,0x10        #movdqa %xmm7,0x10(%rsp)
 ___
 $code.=<<___;
-       mov     $idx,%r11
+       mov     $idx,%r11d
        shr     \$`log($N/8)/log(2)`,$idx
        and     \$`$N/8-1`,%r11
        not     $idx
        lea     .Lmagic_masks(%rip),%rax
        and     \$`2**5/($N/8)-1`,$idx  # 5 is "window size"
-       lea     96($tbl,%r11,8),$tbl    # pointer within 1st cache line
+       lea     128($tbl,%r11,8),$tbl   # pointer within 1st cache line
        movq    0(%rax,$idx,8),%xmm4    # set of masks denoting which
        movq    8(%rax,$idx,8),%xmm5    # cache line contains element
        movq    16(%rax,$idx,8),%xmm6   # denoted by 7th argument
@@ -882,15 +3286,16 @@ $code.=<<___;
        jmp     .Lgather
 .align 16
 .Lgather:
-       movq    `0*$STRIDE/4-96`($tbl),%xmm0
-       movq    `1*$STRIDE/4-96`($tbl),%xmm1
+       movq    `0*$STRIDE/4-128`($tbl),%xmm0
+       movq    `1*$STRIDE/4-128`($tbl),%xmm1
        pand    %xmm4,%xmm0
-       movq    `2*$STRIDE/4-96`($tbl),%xmm2
+       movq    `2*$STRIDE/4-128`($tbl),%xmm2
        pand    %xmm5,%xmm1
-       movq    `3*$STRIDE/4-96`($tbl),%xmm3
+       movq    `3*$STRIDE/4-128`($tbl),%xmm3
        pand    %xmm6,%xmm2
        por     %xmm1,%xmm0
        pand    %xmm7,%xmm3
+       .byte   0x67,0x67
        por     %xmm2,%xmm0
        lea     $STRIDE($tbl),$tbl
        por     %xmm3,%xmm0
@@ -954,26 +3359,27 @@ mul_handler:
        cmp     %r10,%rbx               # context->Rip<end of prologue label
        jb      .Lcommon_seh_tail
 
-       lea     `40+48`(%rax),%rax
-
-       mov     4(%r11),%r10d           # HandlerData[1]
-       lea     (%rsi,%r10),%r10        # end of alloca label
-       cmp     %r10,%rbx               # context->Rip<end of alloca label
-       jb      .Lcommon_seh_tail
-
        mov     152($context),%rax      # pull context->Rsp
 
-       mov     8(%r11),%r10d           # HandlerData[2]
+       mov     4(%r11),%r10d           # HandlerData[1]
        lea     (%rsi,%r10),%r10        # epilogue label
        cmp     %r10,%rbx               # context->Rip>=epilogue label
        jae     .Lcommon_seh_tail
 
+       lea     .Lmul_epilogue(%rip),%r10
+       cmp     %r10,%rbx
+       jb      .Lbody_40
+
        mov     192($context),%r10      # pull $num
        mov     8(%rax,%r10,8),%rax     # pull saved stack pointer
+       jmp     .Lbody_proceed
+
+.Lbody_40:
+       mov     40(%rax),%rax           # pull saved stack pointer
+.Lbody_proceed:
 
-       movaps  (%rax),%xmm0
-       movaps  16(%rax),%xmm1
-       lea     `40+48`(%rax),%rax
+       movaps  -88(%rax),%xmm0
+       movaps  -72(%rax),%xmm1
 
        mov     -8(%rax),%rbx
        mov     -16(%rax),%rbp
@@ -1040,6 +3446,24 @@ mul_handler:
        .rva    .LSEH_end_bn_mul4x_mont_gather5
        .rva    .LSEH_info_bn_mul4x_mont_gather5
 
+       .rva    .LSEH_begin_bn_power5
+       .rva    .LSEH_end_bn_power5
+       .rva    .LSEH_info_bn_power5
+
+       .rva    .LSEH_begin_bn_from_mont8x
+       .rva    .LSEH_end_bn_from_mont8x
+       .rva    .LSEH_info_bn_from_mont8x
+___
+$code.=<<___ if ($addx);
+       .rva    .LSEH_begin_bn_mulx4x_mont_gather5
+       .rva    .LSEH_end_bn_mulx4x_mont_gather5
+       .rva    .LSEH_info_bn_mulx4x_mont_gather5
+
+       .rva    .LSEH_begin_bn_powerx5
+       .rva    .LSEH_end_bn_powerx5
+       .rva    .LSEH_info_bn_powerx5
+___
+$code.=<<___;
        .rva    .LSEH_begin_bn_gather5
        .rva    .LSEH_end_bn_gather5
        .rva    .LSEH_info_bn_gather5
@@ -1049,12 +3473,36 @@ mul_handler:
 .LSEH_info_bn_mul_mont_gather5:
        .byte   9,0,0,0
        .rva    mul_handler
-       .rva    .Lmul_alloca,.Lmul_body,.Lmul_epilogue          # HandlerData[]
+       .rva    .Lmul_body,.Lmul_epilogue               # HandlerData[]
 .align 8
 .LSEH_info_bn_mul4x_mont_gather5:
        .byte   9,0,0,0
        .rva    mul_handler
-       .rva    .Lmul4x_alloca,.Lmul4x_body,.Lmul4x_epilogue    # HandlerData[]
+       .rva    .Lmul4x_body,.Lmul4x_epilogue           # HandlerData[]
+.align 8
+.LSEH_info_bn_power5:
+       .byte   9,0,0,0
+       .rva    mul_handler
+       .rva    .Lpower5_body,.Lpower5_epilogue         # HandlerData[]
+.align 8
+.LSEH_info_bn_from_mont8x:
+       .byte   9,0,0,0
+       .rva    mul_handler
+       .rva    .Lfrom_body,.Lfrom_epilogue             # HandlerData[]
+___
+$code.=<<___ if ($addx);
+.align 8
+.LSEH_info_bn_mulx4x_mont_gather5:
+       .byte   9,0,0,0
+       .rva    mul_handler
+       .rva    .Lmulx4x_body,.Lmulx4x_epilogue         # HandlerData[]
+.align 8
+.LSEH_info_bn_powerx5:
+       .byte   9,0,0,0
+       .rva    mul_handler
+       .rva    .Lpowerx5_body,.Lpowerx5_epilogue       # HandlerData[]
+___
+$code.=<<___;
 .align 8
 .LSEH_info_bn_gather5:
         .byte   0x01,0x0d,0x05,0x00
index 47d8c71..5696965 100644 (file)
@@ -256,24 +256,6 @@ extern "C" {
 #  define BN_HEX_FMT2     "%08X"
 # endif
 
-/*
- * 2011-02-22 SMS. In various places, a size_t variable or a type cast to
- * size_t was used to perform integer-only operations on pointers.  This
- * failed on VMS with 64-bit pointers (CC /POINTER_SIZE = 64) because size_t
- * is still only 32 bits.  What's needed in these cases is an integer type
- * with the same size as a pointer, which size_t is not certain to be. The
- * only fix here is VMS-specific.
- */
-# if defined(OPENSSL_SYS_VMS)
-#  if __INITIAL_POINTER_SIZE == 64
-#   define PTR_SIZE_INT long long
-#  else                         /* __INITIAL_POINTER_SIZE == 64 */
-#   define PTR_SIZE_INT int
-#  endif                        /* __INITIAL_POINTER_SIZE == 64 [else] */
-# else                          /* defined(OPENSSL_SYS_VMS) */
-#  define PTR_SIZE_INT size_t
-# endif                         /* defined(OPENSSL_SYS_VMS) [else] */
-
 # define BN_DEFAULT_BITS 1280
 
 # define BN_FLG_MALLOCED         0x01
index 114acf3..03a33cf 100644 (file)
@@ -489,121 +489,144 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
  * c=(c2,c1,c0)
  */
 
+# ifdef BN_LLONG
 /*
- * Keep in mind that carrying into high part of multiplication result
- * can not overflow, because it cannot be all-ones.
+ * Keep in mind that additions to multiplication result can not
+ * overflow, because its high half cannot be all-ones.
  */
-# ifdef BN_LLONG
-#  define mul_add_c(a,b,c0,c1,c2) \
-        t=(BN_ULLONG)a*b; \
-        t1=(BN_ULONG)Lw(t); \
-        t2=(BN_ULONG)Hw(t); \
-        c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
-        c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
-
-#  define mul_add_c2(a,b,c0,c1,c2) \
-        t=(BN_ULLONG)a*b; \
-        tt=(t+t)&BN_MASK; \
-        if (tt < t) c2++; \
-        t1=(BN_ULONG)Lw(tt); \
-        t2=(BN_ULONG)Hw(tt); \
-        c0=(c0+t1)&BN_MASK2;  \
-        if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
-        c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
-
-#  define sqr_add_c(a,i,c0,c1,c2) \
-        t=(BN_ULLONG)a[i]*a[i]; \
-        t1=(BN_ULONG)Lw(t); \
-        t2=(BN_ULONG)Hw(t); \
-        c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
-        c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
+#  define mul_add_c(a,b,c0,c1,c2)       do {    \
+        BN_ULONG hi;                            \
+        BN_ULLONG t = (BN_ULLONG)(a)*(b);       \
+        t += c0;                /* no carry */  \
+        c0 = (BN_ULONG)Lw(t);                   \
+        hi = (BN_ULONG)Hw(t);                   \
+        c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
+        } while(0)
+
+#  define mul_add_c2(a,b,c0,c1,c2)      do {    \
+        BN_ULONG hi;                            \
+        BN_ULLONG t = (BN_ULLONG)(a)*(b);       \
+        BN_ULLONG tt = t+c0;    /* no carry */  \
+        c0 = (BN_ULONG)Lw(tt);                  \
+        hi = (BN_ULONG)Hw(tt);                  \
+        c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
+        t += c0;                /* no carry */  \
+        c0 = (BN_ULONG)Lw(t);                   \
+        hi = (BN_ULONG)Hw(t);                   \
+        c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
+        } while(0)
+
+#  define sqr_add_c(a,i,c0,c1,c2)       do {    \
+        BN_ULONG hi;                            \
+        BN_ULLONG t = (BN_ULLONG)a[i]*a[i];     \
+        t += c0;                /* no carry */  \
+        c0 = (BN_ULONG)Lw(t);                   \
+        hi = (BN_ULONG)Hw(t);                   \
+        c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
+        } while(0)
 
 #  define sqr_add_c2(a,i,j,c0,c1,c2) \
         mul_add_c2((a)[i],(a)[j],c0,c1,c2)
 
 # elif defined(BN_UMULT_LOHI)
-
-#  define mul_add_c(a,b,c0,c1,c2) {       \
-        BN_ULONG ta=(a),tb=(b);         \
-        BN_UMULT_LOHI(t1,t2,ta,tb);     \
-        c0 += t1; t2 += (c0<t1)?1:0;    \
-        c1 += t2; c2 += (c1<t2)?1:0;    \
-        }
-
-#  define mul_add_c2(a,b,c0,c1,c2) {      \
-        BN_ULONG ta=(a),tb=(b),t0;      \
-        BN_UMULT_LOHI(t0,t1,ta,tb);     \
-        c0 += t0; t2 = t1+((c0<t0)?1:0);\
-        c1 += t2; c2 += (c1<t2)?1:0;    \
-        c0 += t0; t1 += (c0<t0)?1:0;    \
-        c1 += t1; c2 += (c1<t1)?1:0;    \
-        }
-
-#  define sqr_add_c(a,i,c0,c1,c2) {       \
-        BN_ULONG ta=(a)[i];             \
-        BN_UMULT_LOHI(t1,t2,ta,ta);     \
-        c0 += t1; t2 += (c0<t1)?1:0;    \
-        c1 += t2; c2 += (c1<t2)?1:0;    \
-        }
+/*
+ * Keep in mind that additions to hi can not overflow, because
+ * the high word of a multiplication result cannot be all-ones.
+ */
+#  define mul_add_c(a,b,c0,c1,c2)       do {    \
+        BN_ULONG ta = (a), tb = (b);            \
+        BN_ULONG lo, hi;                        \
+        BN_UMULT_LOHI(lo,hi,ta,tb);             \
+        c0 += lo; hi += (c0<lo)?1:0;            \
+        c1 += hi; c2 += (c1<hi)?1:0;            \
+        } while(0)
+
+#  define mul_add_c2(a,b,c0,c1,c2)      do {    \
+        BN_ULONG ta = (a), tb = (b);            \
+        BN_ULONG lo, hi, tt;                    \
+        BN_UMULT_LOHI(lo,hi,ta,tb);             \
+        c0 += lo; tt = hi+((c0<lo)?1:0);        \
+        c1 += tt; c2 += (c1<tt)?1:0;            \
+        c0 += lo; hi += (c0<lo)?1:0;            \
+        c1 += hi; c2 += (c1<hi)?1:0;            \
+        } while(0)
+
+#  define sqr_add_c(a,i,c0,c1,c2)       do {    \
+        BN_ULONG ta = (a)[i];                   \
+        BN_ULONG lo, hi;                        \
+        BN_UMULT_LOHI(lo,hi,ta,ta);             \
+        c0 += lo; hi += (c0<lo)?1:0;            \
+        c1 += hi; c2 += (c1<hi)?1:0;            \
+        } while(0)
 
 #  define sqr_add_c2(a,i,j,c0,c1,c2)    \
         mul_add_c2((a)[i],(a)[j],c0,c1,c2)
 
 # elif defined(BN_UMULT_HIGH)
-
-#  define mul_add_c(a,b,c0,c1,c2) {       \
-        BN_ULONG ta=(a),tb=(b);         \
-        t1 = ta * tb;                   \
-        t2 = BN_UMULT_HIGH(ta,tb);      \
-        c0 += t1; t2 += (c0<t1)?1:0;    \
-        c1 += t2; c2 += (c1<t2)?1:0;    \
-        }
-
-#  define mul_add_c2(a,b,c0,c1,c2) {      \
-        BN_ULONG ta=(a),tb=(b),t0;      \
-        t1 = BN_UMULT_HIGH(ta,tb);      \
-        t0 = ta * tb;                   \
-        c0 += t0; t2 = t1+((c0<t0)?1:0);\
-        c1 += t2; c2 += (c1<t2)?1:0;    \
-        c0 += t0; t1 += (c0<t0)?1:0;    \
-        c1 += t1; c2 += (c1<t1)?1:0;    \
-        }
-
-#  define sqr_add_c(a,i,c0,c1,c2) {       \
-        BN_ULONG ta=(a)[i];             \
-        t1 = ta * ta;                   \
-        t2 = BN_UMULT_HIGH(ta,ta);      \
-        c0 += t1; t2 += (c0<t1)?1:0;    \
-        c1 += t2; c2 += (c1<t2)?1:0;    \
-        }
+/*
+ * Keep in mind that additions to hi can not overflow, because
+ * the high word of a multiplication result cannot be all-ones.
+ */
+#  define mul_add_c(a,b,c0,c1,c2)       do {    \
+        BN_ULONG ta = (a), tb = (b);            \
+        BN_ULONG lo = ta * tb;                  \
+        BN_ULONG hi = BN_UMULT_HIGH(ta,tb);     \
+        c0 += lo; hi += (c0<lo)?1:0;            \
+        c1 += hi; c2 += (c1<hi)?1:0;            \
+        } while(0)
+
+#  define mul_add_c2(a,b,c0,c1,c2)      do {    \
+        BN_ULONG ta = (a), tb = (b), tt;        \
+        BN_ULONG lo = ta * tb;                  \
+        BN_ULONG hi = BN_UMULT_HIGH(ta,tb);     \
+        c0 += lo; tt = hi + ((c0<lo)?1:0);      \
+        c1 += tt; c2 += (c1<tt)?1:0;            \
+        c0 += lo; hi += (c0<lo)?1:0;            \
+        c1 += hi; c2 += (c1<hi)?1:0;            \
+        } while(0)
+
+#  define sqr_add_c(a,i,c0,c1,c2)       do {    \
+        BN_ULONG ta = (a)[i];                   \
+        BN_ULONG lo = ta * ta;                  \
+        BN_ULONG hi = BN_UMULT_HIGH(ta,ta);     \
+        c0 += lo; hi += (c0<lo)?1:0;            \
+        c1 += hi; c2 += (c1<hi)?1:0;            \
+        } while(0)
 
 #  define sqr_add_c2(a,i,j,c0,c1,c2)      \
         mul_add_c2((a)[i],(a)[j],c0,c1,c2)
 
 # else                          /* !BN_LLONG */
-#  define mul_add_c(a,b,c0,c1,c2) \
-        t1=LBITS(a); t2=HBITS(a); \
-        bl=LBITS(b); bh=HBITS(b); \
-        mul64(t1,t2,bl,bh); \
-        c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
-        c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
-
-#  define mul_add_c2(a,b,c0,c1,c2) \
-        t1=LBITS(a); t2=HBITS(a); \
-        bl=LBITS(b); bh=HBITS(b); \
-        mul64(t1,t2,bl,bh); \
-        if (t2 & BN_TBIT) c2++; \
-        t2=(t2+t2)&BN_MASK2; \
-        if (t1 & BN_TBIT) t2++; \
-        t1=(t1+t1)&BN_MASK2; \
-        c0=(c0+t1)&BN_MASK2;  \
-        if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
-        c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
-
-#  define sqr_add_c(a,i,c0,c1,c2) \
-        sqr64(t1,t2,(a)[i]); \
-        c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
-        c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
+/*
+ * Keep in mind that additions to hi can not overflow, because
+ * the high word of a multiplication result cannot be all-ones.
+ */
+#  define mul_add_c(a,b,c0,c1,c2)       do {    \
+        BN_ULONG lo = LBITS(a), hi = HBITS(a);  \
+        BN_ULONG bl = LBITS(b), bh = HBITS(b);  \
+        mul64(lo,hi,bl,bh);                     \
+        c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
+        c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
+        } while(0)
+
+#  define mul_add_c2(a,b,c0,c1,c2)      do {    \
+        BN_ULONG tt;                            \
+        BN_ULONG lo = LBITS(a), hi = HBITS(a);  \
+        BN_ULONG bl = LBITS(b), bh = HBITS(b);  \
+        mul64(lo,hi,bl,bh);                     \
+        tt = hi;                                \
+        c0 = (c0+lo)&BN_MASK2; if (c0<lo) tt++; \
+        c1 = (c1+tt)&BN_MASK2; if (c1<tt) c2++; \
+        c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
+        c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
+        } while(0)
+
+#  define sqr_add_c(a,i,c0,c1,c2)       do {    \
+        BN_ULONG lo, hi;                        \
+        sqr64(lo,hi,(a)[i]);                    \
+        c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
+        c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
+        } while(0)
 
 #  define sqr_add_c2(a,i,j,c0,c1,c2) \
         mul_add_c2((a)[i],(a)[j],c0,c1,c2)
@@ -611,12 +634,6 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
 
 void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
 {
-# ifdef BN_LLONG
-    BN_ULLONG t;
-# else
-    BN_ULONG bl, bh;
-# endif
-    BN_ULONG t1, t2;
     BN_ULONG c1, c2, c3;
 
     c1 = 0;
@@ -720,12 +737,6 @@ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
 
 void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
 {
-# ifdef BN_LLONG
-    BN_ULLONG t;
-# else
-    BN_ULONG bl, bh;
-# endif
-    BN_ULONG t1, t2;
     BN_ULONG c1, c2, c3;
 
     c1 = 0;
@@ -765,12 +776,6 @@ void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
 
 void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
 {
-# ifdef BN_LLONG
-    BN_ULLONG t, tt;
-# else
-    BN_ULONG bl, bh;
-# endif
-    BN_ULONG t1, t2;
     BN_ULONG c1, c2, c3;
 
     c1 = 0;
@@ -846,12 +851,6 @@ void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
 
 void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
 {
-# ifdef BN_LLONG
-    BN_ULLONG t, tt;
-# else
-    BN_ULONG bl, bh;
-# endif
-    BN_ULONG t1, t2;
     BN_ULONG c1, c2, c3;
 
     c1 = 0;
index 27146c8..24afdd6 100644 (file)
 # ifndef alloca
 #  define alloca(s) __builtin_alloca((s))
 # endif
+#elif defined(__sun)
+# include <alloca.h>
+#endif
+
+#include "rsaz_exp.h"
+
+#undef SPARC_T4_MONT
+#if defined(OPENSSL_BN_ASM_MONT) && (defined(__sparc__) || defined(__sparc))
+# include "sparc_arch.h"
+extern unsigned int OPENSSL_sparcv9cap_P[];
+# define SPARC_T4_MONT
 #endif
 
 /* maximum precomputation table size for *variable* sliding windows */
@@ -464,6 +475,23 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
     wstart = bits - 1;          /* The top bit of the window */
     wend = 0;                   /* The bottom bit of the window */
 
+#if 1                           /* by Shay Gueron's suggestion */
+    j = m->top;                 /* borrow j */
+    if (m->d[j - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) {
+        if (bn_wexpand(r, j) == NULL)
+            goto err;
+        /* 2^(top*BN_BITS2) - m */
+        r->d[0] = (0 - m->d[0]) & BN_MASK2;
+        for (i = 1; i < j; i++)
+            r->d[i] = (~m->d[i]) & BN_MASK2;
+        r->top = j;
+        /*
+         * Upper words will be zero if the corresponding words of 'm' were
+         * 0xfff[...], so decrement r->top accordingly.
+         */
+        bn_correct_top(r);
+    } else
+#endif
     if (!BN_to_montgomery(r, BN_value_one(), mont, ctx))
         goto err;
     for (;;) {
@@ -515,6 +543,17 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
         if (wstart < 0)
             break;
     }
+#if defined(SPARC_T4_MONT)
+    if (OPENSSL_sparcv9cap_P[0] & (SPARCV9_VIS3 | SPARCV9_PREFER_FPU)) {
+        j = mont->N.top;        /* borrow j */
+        val[0]->d[0] = 1;       /* borrow val[0] */
+        for (i = 1; i < j; i++)
+            val[0]->d[i] = 0;
+        val[0]->top = j;
+        if (!BN_mod_mul_montgomery(rr, r, val[0], mont, ctx))
+            goto err;
+    } else
+#endif
     if (!BN_from_montgomery(rr, r, mont, ctx))
         goto err;
     ret = 1;
@@ -526,6 +565,27 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
     return (ret);
 }
 
+#if defined(SPARC_T4_MONT)
+static BN_ULONG bn_get_bits(const BIGNUM *a, int bitpos)
+{
+    BN_ULONG ret = 0;
+    int wordpos;
+
+    wordpos = bitpos / BN_BITS2;
+    bitpos %= BN_BITS2;
+    if (wordpos >= 0 && wordpos < a->top) {
+        ret = a->d[wordpos] & BN_MASK2;
+        if (bitpos) {
+            ret >>= bitpos;
+            if (++wordpos < a->top)
+                ret |= a->d[wordpos] << (BN_BITS2 - bitpos);
+        }
+    }
+
+    return ret & BN_MASK2;
+}
+#endif
+
 /*
  * BN_mod_exp_mont_consttime() stores the precomputed powers in a specific
  * layout so that accessing any of these table values shows the same access
@@ -594,6 +654,9 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
     int powerbufLen = 0;
     unsigned char *powerbuf = NULL;
     BIGNUM tmp, am;
+#if defined(SPARC_T4_MONT)
+    unsigned int t4 = 0;
+#endif
 
     bn_check_top(a);
     bn_check_top(p);
@@ -626,21 +689,62 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
             goto err;
     }
 
+#ifdef RSAZ_ENABLED
+    /*
+     * If the size of the operands allow it, perform the optimized
+     * RSAZ exponentiation. For further information see
+     * crypto/bn/rsaz_exp.c and accompanying assembly modules.
+     */
+    if ((16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024)
+        && rsaz_avx2_eligible()) {
+        if (NULL == bn_wexpand(rr, 16))
+            goto err;
+        RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d,
+                               mont->n0[0]);
+        rr->top = 16;
+        rr->neg = 0;
+        bn_correct_top(rr);
+        ret = 1;
+        goto err;
+    } else if ((8 == a->top) && (8 == p->top) && (BN_num_bits(m) == 512)) {
+        if (NULL == bn_wexpand(rr, 8))
+            goto err;
+        RSAZ_512_mod_exp(rr->d, a->d, p->d, m->d, mont->n0[0], mont->RR.d);
+        rr->top = 8;
+        rr->neg = 0;
+        bn_correct_top(rr);
+        ret = 1;
+        goto err;
+    }
+#endif
+
     /* Get the window size to use with size of p. */
     window = BN_window_bits_for_ctime_exponent_size(bits);
+#if defined(SPARC_T4_MONT)
+    if (window >= 5 && (top & 15) == 0 && top <= 64 &&
+        (OPENSSL_sparcv9cap_P[1] & (CFR_MONTMUL | CFR_MONTSQR)) ==
+        (CFR_MONTMUL | CFR_MONTSQR) && (t4 = OPENSSL_sparcv9cap_P[0]))
+        window = 5;
+    else
+#endif
 #if defined(OPENSSL_BN_ASM_MONT5)
-    if (window == 6 && bits <= 1024)
-        window = 5;             /* ~5% improvement of 2048-bit RSA sign */
+    if (window >= 5) {
+        window = 5;             /* ~5% improvement for RSA2048 sign, and even
+                                 * for RSA4096 */
+        if ((top & 7) == 0)
+            powerbufLen += 2 * top * sizeof(m->d[0]);
+    }
 #endif
+    (void)0;
 
     /*
      * Allocate a buffer large enough to hold all of the pre-computed powers
      * of am, am itself and tmp.
      */
     numPowers = 1 << window;
-    powerbufLen = sizeof(m->d[0]) * (top * numPowers +
-                                     ((2 * top) >
-                                      numPowers ? (2 * top) : numPowers));
+    powerbufLen += sizeof(m->d[0]) * (top * numPowers +
+                                      ((2 * top) >
+                                       numPowers ? (2 * top) : numPowers));
 #ifdef alloca
     if (powerbufLen < 3072)
         powerbufFree =
@@ -670,15 +774,17 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
     tmp.flags = am.flags = BN_FLG_STATIC_DATA;
 
     /* prepare a^0 in Montgomery domain */
-#if 1
+#if 1                           /* by Shay Gueron's suggestion */
+    if (m->d[top - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) {
+        /* 2^(top*BN_BITS2) - m */
+        tmp.d[0] = (0 - m->d[0]) & BN_MASK2;
+        for (i = 1; i < top; i++)
+            tmp.d[i] = (~m->d[i]) & BN_MASK2;
+        tmp.top = top;
+    } else
+#endif
     if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx))
         goto err;
-#else
-    tmp.d[0] = (0 - m->d[0]) & BN_MASK2; /* 2^(top*BN_BITS2) - m */
-    for (i = 1; i < top; i++)
-        tmp.d[i] = (~m->d[i]) & BN_MASK2;
-    tmp.top = top;
-#endif
 
     /* prepare a^1 in Montgomery domain */
     if (a->neg || BN_ucmp(a, m) >= 0) {
@@ -689,6 +795,138 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
     } else if (!BN_to_montgomery(&am, a, mont, ctx))
         goto err;
 
+#if defined(SPARC_T4_MONT)
+    if (t4) {
+        typedef int (*bn_pwr5_mont_f) (BN_ULONG *tp, const BN_ULONG *np,
+                                       const BN_ULONG *n0, const void *table,
+                                       int power, int bits);
+        int bn_pwr5_mont_t4_8(BN_ULONG *tp, const BN_ULONG *np,
+                              const BN_ULONG *n0, const void *table,
+                              int power, int bits);
+        int bn_pwr5_mont_t4_16(BN_ULONG *tp, const BN_ULONG *np,
+                               const BN_ULONG *n0, const void *table,
+                               int power, int bits);
+        int bn_pwr5_mont_t4_24(BN_ULONG *tp, const BN_ULONG *np,
+                               const BN_ULONG *n0, const void *table,
+                               int power, int bits);
+        int bn_pwr5_mont_t4_32(BN_ULONG *tp, const BN_ULONG *np,
+                               const BN_ULONG *n0, const void *table,
+                               int power, int bits);
+        static const bn_pwr5_mont_f pwr5_funcs[4] = {
+            bn_pwr5_mont_t4_8, bn_pwr5_mont_t4_16,
+            bn_pwr5_mont_t4_24, bn_pwr5_mont_t4_32
+        };
+        bn_pwr5_mont_f pwr5_worker = pwr5_funcs[top / 16 - 1];
+
+        typedef int (*bn_mul_mont_f) (BN_ULONG *rp, const BN_ULONG *ap,
+                                      const void *bp, const BN_ULONG *np,
+                                      const BN_ULONG *n0);
+        int bn_mul_mont_t4_8(BN_ULONG *rp, const BN_ULONG *ap, const void *bp,
+                             const BN_ULONG *np, const BN_ULONG *n0);
+        int bn_mul_mont_t4_16(BN_ULONG *rp, const BN_ULONG *ap,
+                              const void *bp, const BN_ULONG *np,
+                              const BN_ULONG *n0);
+        int bn_mul_mont_t4_24(BN_ULONG *rp, const BN_ULONG *ap,
+                              const void *bp, const BN_ULONG *np,
+                              const BN_ULONG *n0);
+        int bn_mul_mont_t4_32(BN_ULONG *rp, const BN_ULONG *ap,
+                              const void *bp, const BN_ULONG *np,
+                              const BN_ULONG *n0);
+        static const bn_mul_mont_f mul_funcs[4] = {
+            bn_mul_mont_t4_8, bn_mul_mont_t4_16,
+            bn_mul_mont_t4_24, bn_mul_mont_t4_32
+        };
+        bn_mul_mont_f mul_worker = mul_funcs[top / 16 - 1];
+
+        void bn_mul_mont_vis3(BN_ULONG *rp, const BN_ULONG *ap,
+                              const void *bp, const BN_ULONG *np,
+                              const BN_ULONG *n0, int num);
+        void bn_mul_mont_t4(BN_ULONG *rp, const BN_ULONG *ap,
+                            const void *bp, const BN_ULONG *np,
+                            const BN_ULONG *n0, int num);
+        void bn_mul_mont_gather5_t4(BN_ULONG *rp, const BN_ULONG *ap,
+                                    const void *table, const BN_ULONG *np,
+                                    const BN_ULONG *n0, int num, int power);
+        void bn_flip_n_scatter5_t4(const BN_ULONG *inp, size_t num,
+                                   void *table, size_t power);
+        void bn_gather5_t4(BN_ULONG *out, size_t num,
+                           void *table, size_t power);
+        void bn_flip_t4(BN_ULONG *dst, BN_ULONG *src, size_t num);
+
+        BN_ULONG *np = mont->N.d, *n0 = mont->n0;
+        int stride = 5 * (6 - (top / 16 - 1)); /* multiple of 5, but less
+                                                * than 32 */
+
+        /*
+         * BN_to_montgomery can contaminate words above .top [in
+         * BN_DEBUG[_DEBUG] build]...
+         */
+        for (i = am.top; i < top; i++)
+            am.d[i] = 0;
+        for (i = tmp.top; i < top; i++)
+            tmp.d[i] = 0;
+
+        bn_flip_n_scatter5_t4(tmp.d, top, powerbuf, 0);
+        bn_flip_n_scatter5_t4(am.d, top, powerbuf, 1);
+        if (!(*mul_worker) (tmp.d, am.d, am.d, np, n0) &&
+            !(*mul_worker) (tmp.d, am.d, am.d, np, n0))
+            bn_mul_mont_vis3(tmp.d, am.d, am.d, np, n0, top);
+        bn_flip_n_scatter5_t4(tmp.d, top, powerbuf, 2);
+
+        for (i = 3; i < 32; i++) {
+            /* Calculate a^i = a^(i-1) * a */
+            if (!(*mul_worker) (tmp.d, tmp.d, am.d, np, n0) &&
+                !(*mul_worker) (tmp.d, tmp.d, am.d, np, n0))
+                bn_mul_mont_vis3(tmp.d, tmp.d, am.d, np, n0, top);
+            bn_flip_n_scatter5_t4(tmp.d, top, powerbuf, i);
+        }
+
+        /* switch to 64-bit domain */
+        np = alloca(top * sizeof(BN_ULONG));
+        top /= 2;
+        bn_flip_t4(np, mont->N.d, top);
+
+        bits--;
+        for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--)
+            wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
+        bn_gather5_t4(tmp.d, top, powerbuf, wvalue);
+
+        /*
+         * Scan the exponent one window at a time starting from the most
+         * significant bits.
+         */
+        while (bits >= 0) {
+            if (bits < stride)
+                stride = bits + 1;
+            bits -= stride;
+            wvalue = bn_get_bits(p, bits + 1);
+
+            if ((*pwr5_worker) (tmp.d, np, n0, powerbuf, wvalue, stride))
+                continue;
+            /* retry once and fall back */
+            if ((*pwr5_worker) (tmp.d, np, n0, powerbuf, wvalue, stride))
+                continue;
+
+            bits += stride - 5;
+            wvalue >>= stride - 5;
+            wvalue &= 31;
+            bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top);
+            bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top);
+            bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top);
+            bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top);
+            bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top);
+            bn_mul_mont_gather5_t4(tmp.d, tmp.d, powerbuf, np, n0, top,
+                                   wvalue);
+        }
+
+        bn_flip_t4(tmp.d, tmp.d, top);
+        top *= 2;
+        /* back to 32-bit domain */
+        tmp.top = top;
+        bn_correct_top(&tmp);
+        OPENSSL_cleanse(np, top * sizeof(BN_ULONG));
+    } else
+#endif
 #if defined(OPENSSL_BN_ASM_MONT5)
     if (window == 5 && top > 1) {
         /*
@@ -707,8 +945,15 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
         void bn_scatter5(const BN_ULONG *inp, size_t num,
                          void *table, size_t power);
         void bn_gather5(BN_ULONG *out, size_t num, void *table, size_t power);
+        void bn_power5(BN_ULONG *rp, const BN_ULONG *ap,
+                       const void *table, const BN_ULONG *np,
+                       const BN_ULONG *n0, int num, int power);
+        int bn_get_bits5(const BN_ULONG *ap, int off);
+        int bn_from_montgomery(BN_ULONG *rp, const BN_ULONG *ap,
+                               const BN_ULONG *not_used, const BN_ULONG *np,
+                               const BN_ULONG *n0, int num);
 
-        BN_ULONG *np = mont->N.d, *n0 = mont->n0;
+        BN_ULONG *np = mont->N.d, *n0 = mont->n0, *np2;
 
         /*
          * BN_to_montgomery can contaminate words above .top [in
@@ -719,6 +964,12 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
         for (i = tmp.top; i < top; i++)
             tmp.d[i] = 0;
 
+        if (top & 7)
+            np2 = np;
+        else
+            for (np2 = am.d + top, i = 0; i < top; i++)
+                np2[2 * i] = np[i];
+
         bn_scatter5(tmp.d, top, powerbuf, 0);
         bn_scatter5(am.d, am.top, powerbuf, 1);
         bn_mul_mont(tmp.d, am.d, am.d, np, n0, top);
@@ -727,7 +978,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
 # if 0
         for (i = 3; i < 32; i++) {
             /* Calculate a^i = a^(i-1) * a */
-            bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
+            bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
             bn_scatter5(tmp.d, top, powerbuf, i);
         }
 # else
@@ -738,7 +989,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
         }
         for (i = 3; i < 8; i += 2) {
             int j;
-            bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
+            bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
             bn_scatter5(tmp.d, top, powerbuf, i);
             for (j = 2 * i; j < 32; j *= 2) {
                 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
@@ -746,13 +997,13 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
             }
         }
         for (; i < 16; i += 2) {
-            bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
+            bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
             bn_scatter5(tmp.d, top, powerbuf, i);
             bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
             bn_scatter5(tmp.d, top, powerbuf, 2 * i);
         }
         for (; i < 32; i += 2) {
-            bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
+            bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
             bn_scatter5(tmp.d, top, powerbuf, i);
         }
 # endif
@@ -765,20 +1016,34 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
          * Scan the exponent one window at a time starting from the most
          * significant bits.
          */
-        while (bits >= 0) {
-            for (wvalue = 0, i = 0; i < 5; i++, bits--)
-                wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
+        if (top & 7)
+            while (bits >= 0) {
+                for (wvalue = 0, i = 0; i < 5; i++, bits--)
+                    wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
 
-            bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
-            bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
-            bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
-            bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
-            bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
-            bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
+                bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
+                bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
+                bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
+                bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
+                bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
+                bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top,
+                                    wvalue);
+        } else {
+            while (bits >= 0) {
+                wvalue = bn_get_bits5(p->d, bits - 4);
+                bits -= 5;
+                bn_power5(tmp.d, tmp.d, powerbuf, np2, n0, top, wvalue);
+            }
         }
 
+        ret = bn_from_montgomery(tmp.d, tmp.d, NULL, np2, n0, top);
         tmp.top = top;
         bn_correct_top(&tmp);
+        if (ret) {
+            if (!BN_copy(rr, &tmp))
+                ret = 0;
+            goto err;           /* non-zero ret means it's not error */
+        }
     } else
 #endif
     {
@@ -844,6 +1109,15 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
     }
 
     /* Convert the final result from montgomery to standard format */
+#if defined(SPARC_T4_MONT)
+    if (OPENSSL_sparcv9cap_P[0] & (SPARCV9_VIS3 | SPARCV9_PREFER_FPU)) {
+        am.d[0] = 1;            /* borrow am */
+        for (i = 1; i < top; i++)
+            am.d[i] = 0;
+        if (!BN_mod_mul_montgomery(rr, &tmp, &am, mont, ctx))
+            goto err;
+    } else
+#endif
     if (!BN_from_montgomery(rr, &tmp, mont, ctx))
         goto err;
     ret = 1;
index a0ba8de..cfa1c7c 100644 (file)
@@ -450,8 +450,7 @@ int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const int p[])
             d0 = p[k] % BN_BITS2;
             d1 = BN_BITS2 - d0;
             z[n] ^= (zz << d0);
-            tmp_ulong = zz >> d1;
-            if (d0 && tmp_ulong)
+            if (d0 && (tmp_ulong = zz >> d1))
                 z[n + 1] ^= tmp_ulong;
         }
 
index 904a723..00f4f09 100644 (file)
@@ -204,6 +204,24 @@ extern "C" {
 # define BN_MUL_LOW_RECURSIVE_SIZE_NORMAL        (32)/* 32 */
 # define BN_MONT_CTX_SET_SIZE_WORD               (64)/* 32 */
 
+/*
+ * 2011-02-22 SMS. In various places, a size_t variable or a type cast to
+ * size_t was used to perform integer-only operations on pointers.  This
+ * failed on VMS with 64-bit pointers (CC /POINTER_SIZE = 64) because size_t
+ * is still only 32 bits.  What's needed in these cases is an integer type
+ * with the same size as a pointer, which size_t is not certain to be. The
+ * only fix here is VMS-specific.
+ */
+# if defined(OPENSSL_SYS_VMS)
+#  if __INITIAL_POINTER_SIZE == 64
+#   define PTR_SIZE_INT long long
+#  else                         /* __INITIAL_POINTER_SIZE == 64 */
+#   define PTR_SIZE_INT int
+#  endif                        /* __INITIAL_POINTER_SIZE == 64 [else] */
+# elif !defined(PTR_SIZE_INT)   /* defined(OPENSSL_SYS_VMS) */
+#  define PTR_SIZE_INT size_t
+# endif                         /* defined(OPENSSL_SYS_VMS) [else] */
+
 # if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) && !defined(PEDANTIC)
 /*
  * BN_UMULT_HIGH section.
@@ -295,6 +313,15 @@ unsigned __int64 _umul128(unsigned __int64 a, unsigned __int64 b,
              : "r"(a), "r"(b));
 #    endif
 #   endif
+#  elif defined(__aarch64__) && defined(SIXTY_FOUR_BIT_LONG)
+#   if defined(__GNUC__) && __GNUC__>=2
+#    define BN_UMULT_HIGH(a,b)   ({      \
+        register BN_ULONG ret;          \
+        asm ("umulh     %0,%1,%2"       \
+             : "=r"(ret)                \
+             : "r"(a), "r"(b));         \
+        ret;                    })
+#   endif
 #  endif                        /* cpu */
 # endif                         /* OPENSSL_NO_ASM */
 
index 06662c5..470d5da 100644 (file)
@@ -1042,7 +1042,6 @@ int test_mod_exp_mont_consttime(BIO *bp, BN_CTX *ctx)
 int test_mod_exp_mont5(BIO *bp, BN_CTX *ctx)
 {
     BIGNUM *a, *p, *m, *d, *e;
-
     BN_MONT_CTX *mont;
 
     a = BN_new();
@@ -1050,7 +1049,6 @@ int test_mod_exp_mont5(BIO *bp, BN_CTX *ctx)
     m = BN_new();
     d = BN_new();
     e = BN_new();
-
     mont = BN_MONT_CTX_new();
 
     BN_bntest_rand(m, 1024, 0, 1); /* must be odd for montgomery */
@@ -1099,6 +1097,7 @@ int test_mod_exp_mont5(BIO *bp, BN_CTX *ctx)
         fprintf(stderr, "Modular exponentiation test failed!\n");
         return 0;
     }
+    BN_MONT_CTX_free(mont);
     BN_free(a);
     BN_free(p);
     BN_free(m);
diff --git a/crypto/bn/rsaz_exp.c b/crypto/bn/rsaz_exp.c
new file mode 100644 (file)
index 0000000..c54c6fe
--- /dev/null
@@ -0,0 +1,346 @@
+/*****************************************************************************
+*                                                                            *
+*  Copyright (c) 2012, Intel Corporation                                     *
+*                                                                            *
+*  All rights reserved.                                                      *
+*                                                                            *
+*  Redistribution and use in source and binary forms, with or without        *
+*  modification, are permitted provided that the following conditions are    *
+*  met:                                                                      *
+*                                                                            *
+*  *  Redistributions of source code must retain the above copyright         *
+*     notice, this list of conditions and the following disclaimer.          *
+*                                                                            *
+*  *  Redistributions in binary form must reproduce the above copyright      *
+*     notice, this list of conditions and the following disclaimer in the    *
+*     documentation and/or other materials provided with the                 *
+*     distribution.                                                          *
+*                                                                            *
+*  *  Neither the name of the Intel Corporation nor the names of its         *
+*     contributors may be used to endorse or promote products derived from   *
+*     this software without specific prior written permission.               *
+*                                                                            *
+*                                                                            *
+*  THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY          *
+*  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE         *
+*  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR        *
+*  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR            *
+*  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,     *
+*  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,       *
+*  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR        *
+*  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF    *
+*  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING      *
+*  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS        *
+*  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.              *
+*                                                                            *
+******************************************************************************
+* Developers and authors:                                                    *
+* Shay Gueron (1, 2), and Vlad Krasnov (1)                                   *
+* (1) Intel Corporation, Israel Development Center, Haifa, Israel            *
+* (2) University of Haifa, Israel                                            *
+*****************************************************************************/
+
+#include "rsaz_exp.h"
+
+#ifdef RSAZ_ENABLED
+
+/*
+ * See crypto/bn/asm/rsaz-avx2.pl for further details.
+ */
+void rsaz_1024_norm2red_avx2(void *red, const void *norm);
+void rsaz_1024_mul_avx2(void *ret, const void *a, const void *b,
+                        const void *n, BN_ULONG k);
+void rsaz_1024_sqr_avx2(void *ret, const void *a, const void *n, BN_ULONG k,
+                        int cnt);
+void rsaz_1024_scatter5_avx2(void *tbl, const void *val, int i);
+void rsaz_1024_gather5_avx2(void *val, const void *tbl, int i);
+void rsaz_1024_red2norm_avx2(void *norm, const void *red);
+
+#if defined(__GNUC__)
+# define ALIGN64        __attribute__((aligned(64)))
+#elif defined(_MSC_VER)
+# define ALIGN64        __declspec(align(64))
+#elif defined(__SUNPRO_C)
+# define ALIGN64
+# pragma align 64(one,two80)
+#else
+/* not fatal, might hurt performance a little */
+# define ALIGN64
+#endif
+
+ALIGN64 static const BN_ULONG one[40] = {
+    1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+ALIGN64 static const BN_ULONG two80[40] = {
+    0, 0, 1 << 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+void RSAZ_1024_mod_exp_avx2(BN_ULONG result_norm[16],
+                            const BN_ULONG base_norm[16],
+                            const BN_ULONG exponent[16],
+                            const BN_ULONG m_norm[16], const BN_ULONG RR[16],
+                            BN_ULONG k0)
+{
+    unsigned char storage[320 * 3 + 32 * 9 * 16 + 64]; /* 5.5KB */
+    unsigned char *p_str = storage + (64 - ((size_t)storage % 64));
+    unsigned char *a_inv, *m, *result;
+    unsigned char *table_s = p_str + 320 * 3;
+    unsigned char *R2 = table_s; /* borrow */
+    int index;
+    int wvalue;
+
+    if ((((size_t)p_str & 4095) + 320) >> 12) {
+        result = p_str;
+        a_inv = p_str + 320;
+        m = p_str + 320 * 2;    /* should not cross page */
+    } else {
+        m = p_str;              /* should not cross page */
+        result = p_str + 320;
+        a_inv = p_str + 320 * 2;
+    }
+
+    rsaz_1024_norm2red_avx2(m, m_norm);
+    rsaz_1024_norm2red_avx2(a_inv, base_norm);
+    rsaz_1024_norm2red_avx2(R2, RR);
+
+    rsaz_1024_mul_avx2(R2, R2, R2, m, k0);
+    rsaz_1024_mul_avx2(R2, R2, two80, m, k0);
+
+    /* table[0] = 1 */
+    rsaz_1024_mul_avx2(result, R2, one, m, k0);
+    /* table[1] = a_inv^1 */
+    rsaz_1024_mul_avx2(a_inv, a_inv, R2, m, k0);
+
+    rsaz_1024_scatter5_avx2(table_s, result, 0);
+    rsaz_1024_scatter5_avx2(table_s, a_inv, 1);
+
+    /* table[2] = a_inv^2 */
+    rsaz_1024_sqr_avx2(result, a_inv, m, k0, 1);
+    rsaz_1024_scatter5_avx2(table_s, result, 2);
+#if 0
+    /* this is almost 2x smaller and less than 1% slower */
+    for (index = 3; index < 32; index++) {
+        rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+        rsaz_1024_scatter5_avx2(table_s, result, index);
+    }
+#else
+    /* table[4] = a_inv^4 */
+    rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+    rsaz_1024_scatter5_avx2(table_s, result, 4);
+    /* table[8] = a_inv^8 */
+    rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+    rsaz_1024_scatter5_avx2(table_s, result, 8);
+    /* table[16] = a_inv^16 */
+    rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+    rsaz_1024_scatter5_avx2(table_s, result, 16);
+    /* table[17] = a_inv^17 */
+    rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+    rsaz_1024_scatter5_avx2(table_s, result, 17);
+
+    /* table[3] */
+    rsaz_1024_gather5_avx2(result, table_s, 2);
+    rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+    rsaz_1024_scatter5_avx2(table_s, result, 3);
+    /* table[6] */
+    rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+    rsaz_1024_scatter5_avx2(table_s, result, 6);
+    /* table[12] */
+    rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+    rsaz_1024_scatter5_avx2(table_s, result, 12);
+    /* table[24] */
+    rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+    rsaz_1024_scatter5_avx2(table_s, result, 24);
+    /* table[25] */
+    rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+    rsaz_1024_scatter5_avx2(table_s, result, 25);
+
+    /* table[5] */
+    rsaz_1024_gather5_avx2(result, table_s, 4);
+    rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+    rsaz_1024_scatter5_avx2(table_s, result, 5);
+    /* table[10] */
+    rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+    rsaz_1024_scatter5_avx2(table_s, result, 10);
+    /* table[20] */
+    rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+    rsaz_1024_scatter5_avx2(table_s, result, 20);
+    /* table[21] */
+    rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+    rsaz_1024_scatter5_avx2(table_s, result, 21);
+
+    /* table[7] */
+    rsaz_1024_gather5_avx2(result, table_s, 6);
+    rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+    rsaz_1024_scatter5_avx2(table_s, result, 7);
+    /* table[14] */
+    rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+    rsaz_1024_scatter5_avx2(table_s, result, 14);
+    /* table[28] */
+    rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+    rsaz_1024_scatter5_avx2(table_s, result, 28);
+    /* table[29] */
+    rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+    rsaz_1024_scatter5_avx2(table_s, result, 29);
+
+    /* table[9] */
+    rsaz_1024_gather5_avx2(result, table_s, 8);
+    rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+    rsaz_1024_scatter5_avx2(table_s, result, 9);
+    /* table[18] */
+    rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+    rsaz_1024_scatter5_avx2(table_s, result, 18);
+    /* table[19] */
+    rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+    rsaz_1024_scatter5_avx2(table_s, result, 19);
+
+    /* table[11] */
+    rsaz_1024_gather5_avx2(result, table_s, 10);
+    rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+    rsaz_1024_scatter5_avx2(table_s, result, 11);
+    /* table[22] */
+    rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+    rsaz_1024_scatter5_avx2(table_s, result, 22);
+    /* table[23] */
+    rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+    rsaz_1024_scatter5_avx2(table_s, result, 23);
+
+    /* table[13] */
+    rsaz_1024_gather5_avx2(result, table_s, 12);
+    rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+    rsaz_1024_scatter5_avx2(table_s, result, 13);
+    /* table[26] */
+    rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+    rsaz_1024_scatter5_avx2(table_s, result, 26);
+    /* table[27] */
+    rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+    rsaz_1024_scatter5_avx2(table_s, result, 27);
+
+    /* table[15] */
+    rsaz_1024_gather5_avx2(result, table_s, 14);
+    rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+    rsaz_1024_scatter5_avx2(table_s, result, 15);
+    /* table[30] */
+    rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+    rsaz_1024_scatter5_avx2(table_s, result, 30);
+    /* table[31] */
+    rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+    rsaz_1024_scatter5_avx2(table_s, result, 31);
+#endif
+
+    /* load first window */
+    p_str = (unsigned char *)exponent;
+    wvalue = p_str[127] >> 3;
+    rsaz_1024_gather5_avx2(result, table_s, wvalue);
+
+    index = 1014;
+
+    while (index > -1) {        /* loop for the remaining 127 windows */
+
+        rsaz_1024_sqr_avx2(result, result, m, k0, 5);
+
+        wvalue = *((unsigned short *)&p_str[index / 8]);
+        wvalue = (wvalue >> (index % 8)) & 31;
+        index -= 5;
+
+        rsaz_1024_gather5_avx2(a_inv, table_s, wvalue); /* borrow a_inv */
+        rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+    }
+
+    /* square four times */
+    rsaz_1024_sqr_avx2(result, result, m, k0, 4);
+
+    wvalue = p_str[0] & 15;
+
+    rsaz_1024_gather5_avx2(a_inv, table_s, wvalue); /* borrow a_inv */
+    rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+
+    /* from Montgomery */
+    rsaz_1024_mul_avx2(result, result, one, m, k0);
+
+    rsaz_1024_red2norm_avx2(result_norm, result);
+
+    OPENSSL_cleanse(storage, sizeof(storage));
+}
+
+/*
+ * See crypto/bn/rsaz-x86_64.pl for further details.
+ */
+void rsaz_512_mul(void *ret, const void *a, const void *b, const void *n,
+                  BN_ULONG k);
+void rsaz_512_mul_scatter4(void *ret, const void *a, const void *n,
+                           BN_ULONG k, const void *tbl, unsigned int power);
+void rsaz_512_mul_gather4(void *ret, const void *a, const void *tbl,
+                          const void *n, BN_ULONG k, unsigned int power);
+void rsaz_512_mul_by_one(void *ret, const void *a, const void *n, BN_ULONG k);
+void rsaz_512_sqr(void *ret, const void *a, const void *n, BN_ULONG k,
+                  int cnt);
+void rsaz_512_scatter4(void *tbl, const BN_ULONG *val, int power);
+void rsaz_512_gather4(BN_ULONG *val, const void *tbl, int power);
+
+void RSAZ_512_mod_exp(BN_ULONG result[8],
+                      const BN_ULONG base[8], const BN_ULONG exponent[8],
+                      const BN_ULONG m[8], BN_ULONG k0, const BN_ULONG RR[8])
+{
+    unsigned char storage[16 * 8 * 8 + 64 * 2 + 64]; /* 1.2KB */
+    unsigned char *table = storage + (64 - ((size_t)storage % 64));
+    BN_ULONG *a_inv = (BN_ULONG *)(table + 16 * 8 * 8);
+    BN_ULONG *temp = (BN_ULONG *)(table + 16 * 8 * 8 + 8 * 8);
+    unsigned char *p_str = (unsigned char *)exponent;
+    int index;
+    unsigned int wvalue;
+
+    /* table[0] = 1_inv */
+    temp[0] = 0 - m[0];
+    temp[1] = ~m[1];
+    temp[2] = ~m[2];
+    temp[3] = ~m[3];
+    temp[4] = ~m[4];
+    temp[5] = ~m[5];
+    temp[6] = ~m[6];
+    temp[7] = ~m[7];
+    rsaz_512_scatter4(table, temp, 0);
+
+    /* table [1] = a_inv^1 */
+    rsaz_512_mul(a_inv, base, RR, m, k0);
+    rsaz_512_scatter4(table, a_inv, 1);
+
+    /* table [2] = a_inv^2 */
+    rsaz_512_sqr(temp, a_inv, m, k0, 1);
+    rsaz_512_scatter4(table, temp, 2);
+
+    for (index = 3; index < 16; index++)
+        rsaz_512_mul_scatter4(temp, a_inv, m, k0, table, index);
+
+    /* load first window */
+    wvalue = p_str[63];
+
+    rsaz_512_gather4(temp, table, wvalue >> 4);
+    rsaz_512_sqr(temp, temp, m, k0, 4);
+    rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue & 0xf);
+
+    for (index = 62; index >= 0; index--) {
+        wvalue = p_str[index];
+
+        rsaz_512_sqr(temp, temp, m, k0, 4);
+        rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue >> 4);
+
+        rsaz_512_sqr(temp, temp, m, k0, 4);
+        rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue & 0x0f);
+    }
+
+    /* from Montgomery */
+    rsaz_512_mul_by_one(result, temp, m, k0);
+
+    OPENSSL_cleanse(storage, sizeof(storage));
+}
+
+#else
+
+# if defined(PEDANTIC) || defined(__DECC) || defined(__clang__)
+static void *dummy = &dummy;
+# endif
+
+#endif
diff --git a/crypto/bn/rsaz_exp.h b/crypto/bn/rsaz_exp.h
new file mode 100644 (file)
index 0000000..33361de
--- /dev/null
@@ -0,0 +1,56 @@
+/******************************************************************************
+* Copyright(c) 2012, Intel Corp.
+* Developers and authors:
+* Shay Gueron (1, 2), and Vlad Krasnov (1)
+* (1) Intel Corporation, Israel Development Center, Haifa, Israel
+* (2) University of Haifa, Israel
+******************************************************************************
+* LICENSE:
+* This submission to OpenSSL is to be made available under the OpenSSL
+* license, and only to the OpenSSL project, in order to allow integration
+* into the publicly distributed code.
+* The use of this code, or portions of this code, or concepts embedded in
+* this code, or modification of this code and/or algorithm(s) in it, or the
+* use of this code for any other purpose than stated above, requires special
+* licensing.
+******************************************************************************
+* DISCLAIMER:
+* THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS AND THE COPYRIGHT OWNERS
+* ``AS IS''. ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS OR THE COPYRIGHT
+* OWNERS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
+* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+* POSSIBILITY OF SUCH DAMAGE.
+******************************************************************************/
+
+#ifndef RSAZ_EXP_H
+# define RSAZ_EXP_H
+
+# undef RSAZ_ENABLED
+# if defined(OPENSSL_BN_ASM_MONT) && \
+        (defined(__x86_64) || defined(__x86_64__) || \
+         defined(_M_AMD64) || defined(_M_X64))
+#  define RSAZ_ENABLED
+
+#  include <openssl/bn.h>
+
+void RSAZ_1024_mod_exp_avx2(BN_ULONG result[16],
+                            const BN_ULONG base_norm[16],
+                            const BN_ULONG exponent[16],
+                            const BN_ULONG m_norm[16], const BN_ULONG RR[16],
+                            BN_ULONG k0);
+int rsaz_avx2_eligible();
+
+void RSAZ_512_mod_exp(BN_ULONG result[8],
+                      const BN_ULONG base_norm[8], const BN_ULONG exponent[8],
+                      const BN_ULONG m_norm[8], BN_ULONG k0,
+                      const BN_ULONG RR[8]);
+
+# endif
+
+#endif
index fdde3d7..ebc5ab4 100644 (file)
 #include "cryptlib.h"
 #include <openssl/buffer.h>
 
+size_t BUF_strnlen(const char *str, size_t maxlen)
+{
+    const char *p;
+
+    for (p = str; maxlen-- != 0 && *p != '\0'; ++p) ;
+
+    return p - str;
+}
+
 char *BUF_strdup(const char *str)
 {
     if (str == NULL)
@@ -74,6 +83,8 @@ char *BUF_strndup(const char *str, size_t siz)
     if (str == NULL)
         return (NULL);
 
+    siz = BUF_strnlen(str, siz);
+
     ret = OPENSSL_malloc(siz + 1);
     if (ret == NULL) {
         BUFerr(BUF_F_BUF_STRNDUP, ERR_R_MALLOC_FAILURE);
index 632df93..c343dd7 100644 (file)
@@ -84,6 +84,7 @@ BUF_MEM *BUF_MEM_new(void);
 void BUF_MEM_free(BUF_MEM *a);
 int BUF_MEM_grow(BUF_MEM *str, size_t len);
 int BUF_MEM_grow_clean(BUF_MEM *str, size_t len);
+size_t BUF_strnlen(const char *str, size_t maxlen);
 char *BUF_strdup(const char *str);
 char *BUF_strndup(const char *str, size_t siz);
 void *BUF_memdup(const void *data, size_t siz);
index 228f1dc..ab1225e 100644 (file)
@@ -48,6 +48,8 @@ cmll-x86.s:   asm/cmll-x86.pl ../perlasm/x86asm.pl
        $(PERL) asm/cmll-x86.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@
 cmll-x86_64.s:  asm/cmll-x86_64.pl
        $(PERL) asm/cmll-x86_64.pl $(PERLASM_SCHEME) > $@
+cmllt4-sparcv9.s: asm/cmllt4-sparcv9.pl ../perlasm/sparcv9_modes.pl
+       $(PERL) asm/cmllt4-sparcv9.pl $(CFLAGS) > $@
 
 files:
        $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
index 9f4b82f..d94f46b 100644 (file)
@@ -72,7 +72,7 @@ my $i=@_[0];
 my $seed=defined(@_[1])?@_[1]:0;
 my $scale=$seed<0?-8:8;
 my $j=($i&1)*2;
-my $s0=@S[($j)%4],$s1=@S[($j+1)%4],$s2=@S[($j+2)%4],$s3=@S[($j+3)%4];
+my ($s0,$s1,$s2,$s3)=(@S[($j)%4],@S[($j+1)%4],@S[($j+2)%4],@S[($j+3)%4]);
 
 $code.=<<___;
        xor     $s0,$t0                         # t0^=key[0]
@@ -409,7 +409,7 @@ Camellia_Ekeygen:
        push    %r15
 .Lkey_prologue:
 
-       mov     %rdi,$keyend            # put away arguments, keyBitLength
+       mov     %edi,${keyend}d         # put away arguments, keyBitLength
        mov     %rdx,$out               # keyTable
 
        mov     0(%rsi),@S[0]           # load 0-127 bits
diff --git a/crypto/camellia/asm/cmllt4-sparcv9.pl b/crypto/camellia/asm/cmllt4-sparcv9.pl
new file mode 100644 (file)
index 0000000..a813168
--- /dev/null
@@ -0,0 +1,929 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by David S. Miller <davem@devemloft.net> and Andy Polyakov
+# <appro@openssl.org>. The module is licensed under 2-clause BSD
+# license. October 2012. All rights reserved.
+# ====================================================================
+
+######################################################################
+# Camellia for SPARC T4.
+#
+# As with AES below results [for aligned data] are virtually identical
+# to critical path lenths for 3-cycle instruction latency:
+#
+#              128-bit key     192/256-
+# CBC encrypt  4.14/4.21(*)    5.46/5.52
+#                       (*) numbers after slash are for
+#                           misaligned data.
+#
+# As with Intel AES-NI, question is if it's possible to improve
+# performance of parallelizeable modes by interleaving round
+# instructions. In Camellia every instruction is dependent on
+# previous, which means that there is place for 2 additional ones
+# in between two dependent. Can we expect 3x performance improvement?
+# At least one can argue that it should be possible to break 2x
+# barrier... For some reason not even 2x appears to be possible:
+#
+#              128-bit key     192/256-
+# CBC decrypt  2.21/2.74       2.99/3.40
+# CTR          2.15/2.68(*)    2.93/3.34
+#                       (*) numbers after slash are for
+#                           misaligned data.
+#
+# This is for 2x interleave. But compared to 1x interleave CBC decrypt
+# improved by ... 0% for 128-bit key, and 11% for 192/256-bit one.
+# So that out-of-order execution logic can take non-interleaved code
+# to 1.87x, but can't take 2x interleaved one any further. There
+# surely is some explanation... As result 3x interleave was not even
+# attempted. Instead an effort was made to share specific modes
+# implementations with AES module (therefore sparct4_modes.pl).
+#
+# To anchor to something else, software C implementation processes
+# one byte in 38 cycles with 128-bit key on same processor.
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+push(@INC,"${dir}","${dir}../../perlasm");
+require "sparcv9_modes.pl";
+
+&asm_init(@ARGV);
+
+$::evp=1;      # if $evp is set to 0, script generates module with
+# Camellia_[en|de]crypt, Camellia_set_key and Camellia_cbc_encrypt
+# entry points. These are fully compatible with openssl/camellia.h.
+
+######################################################################
+# single-round subroutines
+#
+{
+my ($inp,$out,$key,$rounds,$tmp,$mask)=map("%o$_",(0..5));
+
+$code=<<___;
+.text
+
+.globl cmll_t4_encrypt
+.align 32
+cmll_t4_encrypt:
+       andcc           $inp, 7, %g1            ! is input aligned?
+       andn            $inp, 7, $inp
+
+       ldx             [$key + 0], %g4
+       ldx             [$key + 8], %g5
+
+       ldx             [$inp + 0], %o4
+       bz,pt           %icc, 1f
+       ldx             [$inp + 8], %o5
+       ldx             [$inp + 16], $inp
+       sll             %g1, 3, %g1
+       sub             %g0, %g1, %o3
+       sllx            %o4, %g1, %o4
+       sllx            %o5, %g1, %g1
+       srlx            %o5, %o3, %o5
+       srlx            $inp, %o3, %o3
+       or              %o5, %o4, %o4
+       or              %o3, %g1, %o5
+1:
+       ld              [$key + 272], $rounds   ! grandRounds, 3 or 4
+       ldd             [$key + 16], %f12
+       ldd             [$key + 24], %f14
+       xor             %g4, %o4, %o4
+       xor             %g5, %o5, %o5
+       ldd             [$key + 32], %f16
+       ldd             [$key + 40], %f18
+       movxtod         %o4, %f0
+       movxtod         %o5, %f2
+       ldd             [$key + 48], %f20
+       ldd             [$key + 56], %f22
+       sub             $rounds, 1, $rounds
+       ldd             [$key + 64], %f24
+       ldd             [$key + 72], %f26
+       add             $key, 80, $key
+
+.Lenc:
+       camellia_f      %f12, %f2, %f0, %f2
+       ldd             [$key + 0], %f12
+       sub             $rounds,1,$rounds
+       camellia_f      %f14, %f0, %f2, %f0
+       ldd             [$key + 8], %f14
+       camellia_f      %f16, %f2, %f0, %f2
+       ldd             [$key + 16], %f16
+       camellia_f      %f18, %f0, %f2, %f0
+       ldd             [$key + 24], %f18
+       camellia_f      %f20, %f2, %f0, %f2
+       ldd             [$key + 32], %f20
+       camellia_f      %f22, %f0, %f2, %f0
+       ldd             [$key + 40], %f22
+       camellia_fl     %f24, %f0, %f0
+       ldd             [$key + 48], %f24
+       camellia_fli    %f26, %f2, %f2
+       ldd             [$key + 56], %f26
+       brnz,pt         $rounds, .Lenc
+       add             $key, 64, $key
+
+       andcc           $out, 7, $tmp           ! is output aligned?
+       camellia_f      %f12, %f2, %f0, %f2
+       camellia_f      %f14, %f0, %f2, %f0
+       camellia_f      %f16, %f2, %f0, %f2
+       camellia_f      %f18, %f0, %f2, %f0
+       camellia_f      %f20, %f2, %f0, %f4
+       camellia_f      %f22, %f0, %f4, %f2
+       fxor            %f24, %f4, %f0
+       fxor            %f26, %f2, %f2
+
+       bnz,pn          %icc, 2f
+       nop
+
+       std             %f0, [$out + 0]
+       retl
+       std             %f2, [$out + 8]
+
+2:     alignaddrl      $out, %g0, $out
+       mov             0xff, $mask
+       srl             $mask, $tmp, $mask
+
+       faligndata      %f0, %f0, %f4
+       faligndata      %f0, %f2, %f6
+       faligndata      %f2, %f2, %f8
+
+       stda            %f4, [$out + $mask]0xc0 ! partial store
+       std             %f6, [$out + 8]
+       add             $out, 16, $out
+       orn             %g0, $mask, $mask
+       retl
+       stda            %f8, [$out + $mask]0xc0 ! partial store
+.type  cmll_t4_encrypt,#function
+.size  cmll_t4_encrypt,.-cmll_t4_encrypt
+
+.globl cmll_t4_decrypt
+.align 32
+cmll_t4_decrypt:
+       ld              [$key + 272], $rounds   ! grandRounds, 3 or 4
+       andcc           $inp, 7, %g1            ! is input aligned?
+       andn            $inp, 7, $inp
+
+       sll             $rounds, 6, $rounds
+       add             $rounds, $key, $key
+
+       ldx             [$inp + 0], %o4
+       bz,pt           %icc, 1f
+       ldx             [$inp + 8], %o5
+       ldx             [$inp + 16], $inp
+       sll             %g1, 3, %g1
+       sub             %g0, %g1, %g4
+       sllx            %o4, %g1, %o4
+       sllx            %o5, %g1, %g1
+       srlx            %o5, %g4, %o5
+       srlx            $inp, %g4, %g4
+       or              %o5, %o4, %o4
+       or              %g4, %g1, %o5
+1:
+       ldx             [$key + 0], %g4
+       ldx             [$key + 8], %g5
+       ldd             [$key - 8], %f12
+       ldd             [$key - 16], %f14
+       xor             %g4, %o4, %o4
+       xor             %g5, %o5, %o5
+       ldd             [$key - 24], %f16
+       ldd             [$key - 32], %f18
+       movxtod         %o4, %f0
+       movxtod         %o5, %f2
+       ldd             [$key - 40], %f20
+       ldd             [$key - 48], %f22
+       sub             $rounds, 64, $rounds
+       ldd             [$key - 56], %f24
+       ldd             [$key - 64], %f26
+       sub             $key, 64, $key
+
+.Ldec:
+       camellia_f      %f12, %f2, %f0, %f2
+       ldd             [$key - 8], %f12
+       sub             $rounds, 64, $rounds
+       camellia_f      %f14, %f0, %f2, %f0
+       ldd             [$key - 16], %f14
+       camellia_f      %f16, %f2, %f0, %f2
+       ldd             [$key - 24], %f16
+       camellia_f      %f18, %f0, %f2, %f0
+       ldd             [$key - 32], %f18
+       camellia_f      %f20, %f2, %f0, %f2
+       ldd             [$key - 40], %f20
+       camellia_f      %f22, %f0, %f2, %f0
+       ldd             [$key - 48], %f22
+       camellia_fl     %f24, %f0, %f0
+       ldd             [$key - 56], %f24
+       camellia_fli    %f26, %f2, %f2
+       ldd             [$key - 64], %f26
+       brnz,pt         $rounds, .Ldec
+       sub             $key, 64, $key
+
+       andcc           $out, 7, $tmp           ! is output aligned?
+       camellia_f      %f12, %f2, %f0, %f2
+       camellia_f      %f14, %f0, %f2, %f0
+       camellia_f      %f16, %f2, %f0, %f2
+       camellia_f      %f18, %f0, %f2, %f0
+       camellia_f      %f20, %f2, %f0, %f4
+       camellia_f      %f22, %f0, %f4, %f2
+       fxor            %f26, %f4, %f0
+       fxor            %f24, %f2, %f2
+
+       bnz,pn          %icc, 2f
+       nop
+
+       std             %f0, [$out + 0]
+       retl
+       std             %f2, [$out + 8]
+
+2:     alignaddrl      $out, %g0, $out
+       mov             0xff, $mask
+       srl             $mask, $tmp, $mask
+
+       faligndata      %f0, %f0, %f4
+       faligndata      %f0, %f2, %f6
+       faligndata      %f2, %f2, %f8
+
+       stda            %f4, [$out + $mask]0xc0 ! partial store
+       std             %f6, [$out + 8]
+       add             $out, 16, $out
+       orn             %g0, $mask, $mask
+       retl
+       stda            %f8, [$out + $mask]0xc0 ! partial store
+.type  cmll_t4_decrypt,#function
+.size  cmll_t4_decrypt,.-cmll_t4_decrypt
+___
+}
+
+######################################################################
+# key setup subroutines
+#
+{
+sub ROTL128 {
+  my $rot = shift;
+
+       "srlx   %o4, 64-$rot, %g4\n\t".
+       "sllx   %o4, $rot, %o4\n\t".
+       "srlx   %o5, 64-$rot, %g5\n\t".
+       "sllx   %o5, $rot, %o5\n\t".
+       "or     %o4, %g5, %o4\n\t".
+       "or     %o5, %g4, %o5";
+}
+
+my ($inp,$bits,$out,$tmp)=map("%o$_",(0..5));
+$code.=<<___;
+.globl cmll_t4_set_key
+.align 32
+cmll_t4_set_key:
+       and             $inp, 7, $tmp
+       alignaddr       $inp, %g0, $inp
+       cmp             $bits, 192
+       ldd             [$inp + 0], %f0
+       bl,pt           %icc,.L128
+       ldd             [$inp + 8], %f2
+
+       be,pt           %icc,.L192
+       ldd             [$inp + 16], %f4
+
+       brz,pt          $tmp, .L256aligned
+       ldd             [$inp + 24], %f6
+
+       ldd             [$inp + 32], %f8
+       faligndata      %f0, %f2, %f0
+       faligndata      %f2, %f4, %f2
+       faligndata      %f4, %f6, %f4
+       b               .L256aligned
+       faligndata      %f6, %f8, %f6
+
+.align 16
+.L192:
+       brz,a,pt        $tmp, .L256aligned
+       fnot2           %f4, %f6
+
+       ldd             [$inp + 24], %f6
+       nop
+       faligndata      %f0, %f2, %f0
+       faligndata      %f2, %f4, %f2
+       faligndata      %f4, %f6, %f4
+       fnot2           %f4, %f6
+
+.L256aligned:
+       std             %f0, [$out + 0]         ! k[0, 1]
+       fsrc2           %f0, %f28
+       std             %f2, [$out + 8]         ! k[2, 3]
+       fsrc2           %f2, %f30
+       fxor            %f4, %f0, %f0
+       b               .L128key
+       fxor            %f6, %f2, %f2
+
+.align 16
+.L128:
+       brz,pt          $tmp, .L128aligned
+       nop
+
+       ldd             [$inp + 16], %f4
+       nop
+       faligndata      %f0, %f2, %f0
+       faligndata      %f2, %f4, %f2
+
+.L128aligned:
+       std             %f0, [$out + 0]         ! k[0, 1]
+       fsrc2           %f0, %f28
+       std             %f2, [$out + 8]         ! k[2, 3]
+       fsrc2           %f2, %f30
+
+.L128key:
+       mov             %o7, %o5
+1:     call            .+8
+       add             %o7, SIGMA-1b, %o4
+       mov             %o5, %o7
+
+       ldd             [%o4 + 0], %f16
+       ldd             [%o4 + 8], %f18
+       ldd             [%o4 + 16], %f20
+       ldd             [%o4 + 24], %f22
+
+       camellia_f      %f16, %f2, %f0, %f2
+       camellia_f      %f18, %f0, %f2, %f0
+       fxor            %f28, %f0, %f0
+       fxor            %f30, %f2, %f2
+       camellia_f      %f20, %f2, %f0, %f2
+       camellia_f      %f22, %f0, %f2, %f0
+
+       bge,pn          %icc, .L256key
+       nop
+       std     %f0, [$out + 0x10]      ! k[ 4,  5]
+       std     %f2, [$out + 0x18]      ! k[ 6,  7]
+
+       movdtox %f0, %o4
+       movdtox %f2, %o5
+       `&ROTL128(15)`
+       stx     %o4, [$out + 0x30]      ! k[12, 13]
+       stx     %o5, [$out + 0x38]      ! k[14, 15]
+       `&ROTL128(15)`
+       stx     %o4, [$out + 0x40]      ! k[16, 17]
+       stx     %o5, [$out + 0x48]      ! k[18, 19]
+       `&ROTL128(15)`
+       stx     %o4, [$out + 0x60]      ! k[24, 25]
+       `&ROTL128(15)`
+       stx     %o4, [$out + 0x70]      ! k[28, 29]
+       stx     %o5, [$out + 0x78]      ! k[30, 31]
+       `&ROTL128(34)`
+       stx     %o4, [$out + 0xa0]      ! k[40, 41]
+       stx     %o5, [$out + 0xa8]      ! k[42, 43]
+       `&ROTL128(17)`
+       stx     %o4, [$out + 0xc0]      ! k[48, 49]
+       stx     %o5, [$out + 0xc8]      ! k[50, 51]
+
+       movdtox %f28, %o4               ! k[ 0,  1]
+       movdtox %f30, %o5               ! k[ 2,  3]
+       `&ROTL128(15)`
+       stx     %o4, [$out + 0x20]      ! k[ 8,  9]
+       stx     %o5, [$out + 0x28]      ! k[10, 11]
+       `&ROTL128(30)`
+       stx     %o4, [$out + 0x50]      ! k[20, 21]
+       stx     %o5, [$out + 0x58]      ! k[22, 23]
+       `&ROTL128(15)`
+       stx     %o5, [$out + 0x68]      ! k[26, 27]
+       `&ROTL128(17)`
+       stx     %o4, [$out + 0x80]      ! k[32, 33]
+       stx     %o5, [$out + 0x88]      ! k[34, 35]
+       `&ROTL128(17)`
+       stx     %o4, [$out + 0x90]      ! k[36, 37]
+       stx     %o5, [$out + 0x98]      ! k[38, 39]
+       `&ROTL128(17)`
+       stx     %o4, [$out + 0xb0]      ! k[44, 45]
+       stx     %o5, [$out + 0xb8]      ! k[46, 47]
+
+       mov             3, $tmp
+       st              $tmp, [$out + 0x110]
+       retl
+       xor             %o0, %o0, %o0
+
+.align 16
+.L256key:
+       ldd             [%o4 + 32], %f24
+       ldd             [%o4 + 40], %f26
+
+       std             %f0, [$out + 0x30]      ! k[12, 13]
+       std             %f2, [$out + 0x38]      ! k[14, 15]
+
+       fxor            %f4, %f0, %f0
+       fxor            %f6, %f2, %f2
+       camellia_f      %f24, %f2, %f0, %f2
+       camellia_f      %f26, %f0, %f2, %f0
+
+       std     %f0, [$out + 0x10]      ! k[ 4,  5]
+       std     %f2, [$out + 0x18]      ! k[ 6,  7]
+
+       movdtox %f0, %o4
+       movdtox %f2, %o5
+       `&ROTL128(30)`
+       stx     %o4, [$out + 0x50]      ! k[20, 21]
+       stx     %o5, [$out + 0x58]      ! k[22, 23]
+       `&ROTL128(30)`
+       stx     %o4, [$out + 0xa0]      ! k[40, 41]
+       stx     %o5, [$out + 0xa8]      ! k[42, 43]
+       `&ROTL128(51)`
+       stx     %o4, [$out + 0x100]     ! k[64, 65]
+       stx     %o5, [$out + 0x108]     ! k[66, 67]
+
+       movdtox %f4, %o4                ! k[ 8,  9]
+       movdtox %f6, %o5                ! k[10, 11]
+       `&ROTL128(15)`
+       stx     %o4, [$out + 0x20]      ! k[ 8,  9]
+       stx     %o5, [$out + 0x28]      ! k[10, 11]
+       `&ROTL128(15)`
+       stx     %o4, [$out + 0x40]      ! k[16, 17]
+       stx     %o5, [$out + 0x48]      ! k[18, 19]
+       `&ROTL128(30)`
+       stx     %o4, [$out + 0x90]      ! k[36, 37]
+       stx     %o5, [$out + 0x98]      ! k[38, 39]
+       `&ROTL128(34)`
+       stx     %o4, [$out + 0xd0]      ! k[52, 53]
+       stx     %o5, [$out + 0xd8]      ! k[54, 55]
+       ldx     [$out + 0x30], %o4      ! k[12, 13]
+       ldx     [$out + 0x38], %o5      ! k[14, 15]
+       `&ROTL128(15)`
+       stx     %o4, [$out + 0x30]      ! k[12, 13]
+       stx     %o5, [$out + 0x38]      ! k[14, 15]
+       `&ROTL128(30)`
+       stx     %o4, [$out + 0x70]      ! k[28, 29]
+       stx     %o5, [$out + 0x78]      ! k[30, 31]
+       srlx    %o4, 32, %g4
+       srlx    %o5, 32, %g5
+       st      %o4, [$out + 0xc0]      ! k[48]
+       st      %g5, [$out + 0xc4]      ! k[49]
+       st      %o5, [$out + 0xc8]      ! k[50]
+       st      %g4, [$out + 0xcc]      ! k[51]
+       `&ROTL128(49)`
+       stx     %o4, [$out + 0xe0]      ! k[56, 57]
+       stx     %o5, [$out + 0xe8]      ! k[58, 59]
+
+       movdtox %f28, %o4               ! k[ 0,  1]
+       movdtox %f30, %o5               ! k[ 2,  3]
+       `&ROTL128(45)`
+       stx     %o4, [$out + 0x60]      ! k[24, 25]
+       stx     %o5, [$out + 0x68]      ! k[26, 27]
+       `&ROTL128(15)`
+       stx     %o4, [$out + 0x80]      ! k[32, 33]
+       stx     %o5, [$out + 0x88]      ! k[34, 35]
+       `&ROTL128(17)`
+       stx     %o4, [$out + 0xb0]      ! k[44, 45]
+       stx     %o5, [$out + 0xb8]      ! k[46, 47]
+       `&ROTL128(34)`
+       stx     %o4, [$out + 0xf0]      ! k[60, 61]
+       stx     %o5, [$out + 0xf8]      ! k[62, 63]
+
+       mov             4, $tmp
+       st              $tmp, [$out + 0x110]
+       retl
+       xor             %o0, %o0, %o0
+.type  cmll_t4_set_key,#function
+.size  cmll_t4_set_key,.-cmll_t4_set_key
+.align 32
+SIGMA:
+       .long   0xa09e667f, 0x3bcc908b, 0xb67ae858, 0x4caa73b2
+       .long   0xc6ef372f, 0xe94f82be, 0x54ff53a5, 0xf1d36f1c
+       .long   0x10e527fa, 0xde682d1d, 0xb05688c2, 0xb3e6c1fd
+.type  SIGMA,#object
+.size  SIGMA,.-SIGMA
+.asciz "Camellia for SPARC T4, David S. Miller, Andy Polyakov"
+___
+}
+
+{{{
+my ($inp,$out,$len,$key,$ivec,$enc)=map("%i$_",(0..5));
+my ($ileft,$iright,$ooff,$omask,$ivoff)=map("%l$_",(1..7));
+
+$code.=<<___;
+.align 32
+_cmll128_load_enckey:
+       ldx             [$key + 0], %g4
+       ldx             [$key + 8], %g5
+___
+for ($i=2; $i<26;$i++) {                       # load key schedule
+    $code.=<<___;
+       ldd             [$key + `8*$i`], %f`12+2*$i`
+___
+}
+$code.=<<___;
+       retl
+       nop
+.type  _cmll128_load_enckey,#function
+.size  _cmll128_load_enckey,.-_cmll128_load_enckey
+_cmll256_load_enckey=_cmll128_load_enckey
+
+.align 32
+_cmll256_load_deckey:
+       ldd             [$key + 64], %f62
+       ldd             [$key + 72], %f60
+       b               .Load_deckey
+       add             $key, 64, $key
+_cmll128_load_deckey:
+       ldd             [$key + 0], %f60
+       ldd             [$key + 8], %f62
+.Load_deckey:
+___
+for ($i=2; $i<24;$i++) {                       # load key schedule
+    $code.=<<___;
+       ldd             [$key + `8*$i`], %f`62-2*$i`
+___
+}
+$code.=<<___;
+       ldx             [$key + 192], %g4
+       retl
+       ldx             [$key + 200], %g5
+.type  _cmll256_load_deckey,#function
+.size  _cmll256_load_deckey,.-_cmll256_load_deckey
+
+.align 32
+_cmll128_encrypt_1x:
+___
+for ($i=0; $i<3; $i++) {
+    $code.=<<___;
+       camellia_f      %f`16+16*$i+0`, %f2, %f0, %f2
+       camellia_f      %f`16+16*$i+2`, %f0, %f2, %f0
+       camellia_f      %f`16+16*$i+4`, %f2, %f0, %f2
+       camellia_f      %f`16+16*$i+6`, %f0, %f2, %f0
+___
+$code.=<<___ if ($i<2);
+       camellia_f      %f`16+16*$i+8`, %f2, %f0, %f2
+       camellia_f      %f`16+16*$i+10`, %f0, %f2, %f0
+       camellia_fl     %f`16+16*$i+12`, %f0,      %f0
+       camellia_fli    %f`16+16*$i+14`, %f2,      %f2
+___
+}
+$code.=<<___;
+       camellia_f      %f56, %f2, %f0, %f4
+       camellia_f      %f58, %f0, %f4, %f2
+       fxor            %f60, %f4, %f0
+       retl
+       fxor            %f62, %f2, %f2
+.type  _cmll128_encrypt_1x,#function
+.size  _cmll128_encrypt_1x,.-_cmll128_encrypt_1x
+_cmll128_decrypt_1x=_cmll128_encrypt_1x
+
+.align 32
+_cmll128_encrypt_2x:
+___
+for ($i=0; $i<3; $i++) {
+    $code.=<<___;
+       camellia_f      %f`16+16*$i+0`, %f2, %f0, %f2
+       camellia_f      %f`16+16*$i+0`, %f6, %f4, %f6
+       camellia_f      %f`16+16*$i+2`, %f0, %f2, %f0
+       camellia_f      %f`16+16*$i+2`, %f4, %f6, %f4
+       camellia_f      %f`16+16*$i+4`, %f2, %f0, %f2
+       camellia_f      %f`16+16*$i+4`, %f6, %f4, %f6
+       camellia_f      %f`16+16*$i+6`, %f0, %f2, %f0
+       camellia_f      %f`16+16*$i+6`, %f4, %f6, %f4
+___
+$code.=<<___ if ($i<2);
+       camellia_f      %f`16+16*$i+8`, %f2, %f0, %f2
+       camellia_f      %f`16+16*$i+8`, %f6, %f4, %f6
+       camellia_f      %f`16+16*$i+10`, %f0, %f2, %f0
+       camellia_f      %f`16+16*$i+10`, %f4, %f6, %f4
+       camellia_fl     %f`16+16*$i+12`, %f0,      %f0
+       camellia_fl     %f`16+16*$i+12`, %f4,      %f4
+       camellia_fli    %f`16+16*$i+14`, %f2,      %f2
+       camellia_fli    %f`16+16*$i+14`, %f6,      %f6
+___
+}
+$code.=<<___;
+       camellia_f      %f56, %f2, %f0, %f8
+       camellia_f      %f56, %f6, %f4, %f10
+       camellia_f      %f58, %f0, %f8, %f2
+       camellia_f      %f58, %f4, %f10, %f6
+       fxor            %f60, %f8, %f0
+       fxor            %f60, %f10, %f4
+       fxor            %f62, %f2, %f2
+       retl
+       fxor            %f62, %f6, %f6
+.type  _cmll128_encrypt_2x,#function
+.size  _cmll128_encrypt_2x,.-_cmll128_encrypt_2x
+_cmll128_decrypt_2x=_cmll128_encrypt_2x
+
+.align 32
+_cmll256_encrypt_1x:
+       camellia_f      %f16, %f2, %f0, %f2
+       camellia_f      %f18, %f0, %f2, %f0
+       ldd             [$key + 208], %f16
+       ldd             [$key + 216], %f18
+       camellia_f      %f20, %f2, %f0, %f2
+       camellia_f      %f22, %f0, %f2, %f0
+       ldd             [$key + 224], %f20
+       ldd             [$key + 232], %f22
+       camellia_f      %f24, %f2, %f0, %f2
+       camellia_f      %f26, %f0, %f2, %f0
+       ldd             [$key + 240], %f24
+       ldd             [$key + 248], %f26
+       camellia_fl     %f28, %f0, %f0
+       camellia_fli    %f30, %f2, %f2
+       ldd             [$key + 256], %f28
+       ldd             [$key + 264], %f30
+___
+for ($i=1; $i<3; $i++) {
+    $code.=<<___;
+       camellia_f      %f`16+16*$i+0`, %f2, %f0, %f2
+       camellia_f      %f`16+16*$i+2`, %f0, %f2, %f0
+       camellia_f      %f`16+16*$i+4`, %f2, %f0, %f2
+       camellia_f      %f`16+16*$i+6`, %f0, %f2, %f0
+       camellia_f      %f`16+16*$i+8`, %f2, %f0, %f2
+       camellia_f      %f`16+16*$i+10`, %f0, %f2, %f0
+       camellia_fl     %f`16+16*$i+12`, %f0,      %f0
+       camellia_fli    %f`16+16*$i+14`, %f2,      %f2
+___
+}
+$code.=<<___;
+       camellia_f      %f16, %f2, %f0, %f2
+       camellia_f      %f18, %f0, %f2, %f0
+       ldd             [$key + 16], %f16
+       ldd             [$key + 24], %f18
+       camellia_f      %f20, %f2, %f0, %f2
+       camellia_f      %f22, %f0, %f2, %f0
+       ldd             [$key + 32], %f20
+       ldd             [$key + 40], %f22
+       camellia_f      %f24, %f2, %f0, %f4
+       camellia_f      %f26, %f0, %f4, %f2
+       ldd             [$key + 48], %f24
+       ldd             [$key + 56], %f26
+       fxor            %f28, %f4, %f0
+       fxor            %f30, %f2, %f2
+       ldd             [$key + 64], %f28
+       retl
+       ldd             [$key + 72], %f30
+.type  _cmll256_encrypt_1x,#function
+.size  _cmll256_encrypt_1x,.-_cmll256_encrypt_1x
+
+.align 32
+_cmll256_encrypt_2x:
+       camellia_f      %f16, %f2, %f0, %f2
+       camellia_f      %f16, %f6, %f4, %f6
+       camellia_f      %f18, %f0, %f2, %f0
+       camellia_f      %f18, %f4, %f6, %f4
+       ldd             [$key + 208], %f16
+       ldd             [$key + 216], %f18
+       camellia_f      %f20, %f2, %f0, %f2
+       camellia_f      %f20, %f6, %f4, %f6
+       camellia_f      %f22, %f0, %f2, %f0
+       camellia_f      %f22, %f4, %f6, %f4
+       ldd             [$key + 224], %f20
+       ldd             [$key + 232], %f22
+       camellia_f      %f24, %f2, %f0, %f2
+       camellia_f      %f24, %f6, %f4, %f6
+       camellia_f      %f26, %f0, %f2, %f0
+       camellia_f      %f26, %f4, %f6, %f4
+       ldd             [$key + 240], %f24
+       ldd             [$key + 248], %f26
+       camellia_fl     %f28, %f0, %f0
+       camellia_fl     %f28, %f4, %f4
+       camellia_fli    %f30, %f2, %f2
+       camellia_fli    %f30, %f6, %f6
+       ldd             [$key + 256], %f28
+       ldd             [$key + 264], %f30
+___
+for ($i=1; $i<3; $i++) {
+    $code.=<<___;
+       camellia_f      %f`16+16*$i+0`, %f2, %f0, %f2
+       camellia_f      %f`16+16*$i+0`, %f6, %f4, %f6
+       camellia_f      %f`16+16*$i+2`, %f0, %f2, %f0
+       camellia_f      %f`16+16*$i+2`, %f4, %f6, %f4
+       camellia_f      %f`16+16*$i+4`, %f2, %f0, %f2
+       camellia_f      %f`16+16*$i+4`, %f6, %f4, %f6
+       camellia_f      %f`16+16*$i+6`, %f0, %f2, %f0
+       camellia_f      %f`16+16*$i+6`, %f4, %f6, %f4
+       camellia_f      %f`16+16*$i+8`, %f2, %f0, %f2
+       camellia_f      %f`16+16*$i+8`, %f6, %f4, %f6
+       camellia_f      %f`16+16*$i+10`, %f0, %f2, %f0
+       camellia_f      %f`16+16*$i+10`, %f4, %f6, %f4
+       camellia_fl     %f`16+16*$i+12`, %f0,      %f0
+       camellia_fl     %f`16+16*$i+12`, %f4,      %f4
+       camellia_fli    %f`16+16*$i+14`, %f2,      %f2
+       camellia_fli    %f`16+16*$i+14`, %f6,      %f6
+___
+}
+$code.=<<___;
+       camellia_f      %f16, %f2, %f0, %f2
+       camellia_f      %f16, %f6, %f4, %f6
+       camellia_f      %f18, %f0, %f2, %f0
+       camellia_f      %f18, %f4, %f6, %f4
+       ldd             [$key + 16], %f16
+       ldd             [$key + 24], %f18
+       camellia_f      %f20, %f2, %f0, %f2
+       camellia_f      %f20, %f6, %f4, %f6
+       camellia_f      %f22, %f0, %f2, %f0
+       camellia_f      %f22, %f4, %f6, %f4
+       ldd             [$key + 32], %f20
+       ldd             [$key + 40], %f22
+       camellia_f      %f24, %f2, %f0, %f8
+       camellia_f      %f24, %f6, %f4, %f10
+       camellia_f      %f26, %f0, %f8, %f2
+       camellia_f      %f26, %f4, %f10, %f6
+       ldd             [$key + 48], %f24
+       ldd             [$key + 56], %f26
+       fxor            %f28, %f8, %f0
+       fxor            %f28, %f10, %f4
+       fxor            %f30, %f2, %f2
+       fxor            %f30, %f6, %f6
+       ldd             [$key + 64], %f28
+       retl
+       ldd             [$key + 72], %f30
+.type  _cmll256_encrypt_2x,#function
+.size  _cmll256_encrypt_2x,.-_cmll256_encrypt_2x
+
+.align 32
+_cmll256_decrypt_1x:
+       camellia_f      %f16, %f2, %f0, %f2
+       camellia_f      %f18, %f0, %f2, %f0
+       ldd             [$key - 8], %f16
+       ldd             [$key - 16], %f18
+       camellia_f      %f20, %f2, %f0, %f2
+       camellia_f      %f22, %f0, %f2, %f0
+       ldd             [$key - 24], %f20
+       ldd             [$key - 32], %f22
+       camellia_f      %f24, %f2, %f0, %f2
+       camellia_f      %f26, %f0, %f2, %f0
+       ldd             [$key - 40], %f24
+       ldd             [$key - 48], %f26
+       camellia_fl     %f28, %f0, %f0
+       camellia_fli    %f30, %f2, %f2
+       ldd             [$key - 56], %f28
+       ldd             [$key - 64], %f30
+___
+for ($i=1; $i<3; $i++) {
+    $code.=<<___;
+       camellia_f      %f`16+16*$i+0`, %f2, %f0, %f2
+       camellia_f      %f`16+16*$i+2`, %f0, %f2, %f0
+       camellia_f      %f`16+16*$i+4`, %f2, %f0, %f2
+       camellia_f      %f`16+16*$i+6`, %f0, %f2, %f0
+       camellia_f      %f`16+16*$i+8`, %f2, %f0, %f2
+       camellia_f      %f`16+16*$i+10`, %f0, %f2, %f0
+       camellia_fl     %f`16+16*$i+12`, %f0,      %f0
+       camellia_fli    %f`16+16*$i+14`, %f2,      %f2
+___
+}
+$code.=<<___;
+       camellia_f      %f16, %f2, %f0, %f2
+       camellia_f      %f18, %f0, %f2, %f0
+       ldd             [$key + 184], %f16
+       ldd             [$key + 176], %f18
+       camellia_f      %f20, %f2, %f0, %f2
+       camellia_f      %f22, %f0, %f2, %f0
+       ldd             [$key + 168], %f20
+       ldd             [$key + 160], %f22
+       camellia_f      %f24, %f2, %f0, %f4
+       camellia_f      %f26, %f0, %f4, %f2
+       ldd             [$key + 152], %f24
+       ldd             [$key + 144], %f26
+       fxor            %f30, %f4, %f0
+       fxor            %f28, %f2, %f2
+       ldd             [$key + 136], %f28
+       retl
+       ldd             [$key + 128], %f30
+.type  _cmll256_decrypt_1x,#function
+.size  _cmll256_decrypt_1x,.-_cmll256_decrypt_1x
+
+.align 32
+_cmll256_decrypt_2x:
+       camellia_f      %f16, %f2, %f0, %f2
+       camellia_f      %f16, %f6, %f4, %f6
+       camellia_f      %f18, %f0, %f2, %f0
+       camellia_f      %f18, %f4, %f6, %f4
+       ldd             [$key - 8], %f16
+       ldd             [$key - 16], %f18
+       camellia_f      %f20, %f2, %f0, %f2
+       camellia_f      %f20, %f6, %f4, %f6
+       camellia_f      %f22, %f0, %f2, %f0
+       camellia_f      %f22, %f4, %f6, %f4
+       ldd             [$key - 24], %f20
+       ldd             [$key - 32], %f22
+       camellia_f      %f24, %f2, %f0, %f2
+       camellia_f      %f24, %f6, %f4, %f6
+       camellia_f      %f26, %f0, %f2, %f0
+       camellia_f      %f26, %f4, %f6, %f4
+       ldd             [$key - 40], %f24
+       ldd             [$key - 48], %f26
+       camellia_fl     %f28, %f0, %f0
+       camellia_fl     %f28, %f4, %f4
+       camellia_fli    %f30, %f2, %f2
+       camellia_fli    %f30, %f6, %f6
+       ldd             [$key - 56], %f28
+       ldd             [$key - 64], %f30
+___
+for ($i=1; $i<3; $i++) {
+    $code.=<<___;
+       camellia_f      %f`16+16*$i+0`, %f2, %f0, %f2
+       camellia_f      %f`16+16*$i+0`, %f6, %f4, %f6
+       camellia_f      %f`16+16*$i+2`, %f0, %f2, %f0
+       camellia_f      %f`16+16*$i+2`, %f4, %f6, %f4
+       camellia_f      %f`16+16*$i+4`, %f2, %f0, %f2
+       camellia_f      %f`16+16*$i+4`, %f6, %f4, %f6
+       camellia_f      %f`16+16*$i+6`, %f0, %f2, %f0
+       camellia_f      %f`16+16*$i+6`, %f4, %f6, %f4
+       camellia_f      %f`16+16*$i+8`, %f2, %f0, %f2
+       camellia_f      %f`16+16*$i+8`, %f6, %f4, %f6
+       camellia_f      %f`16+16*$i+10`, %f0, %f2, %f0
+       camellia_f      %f`16+16*$i+10`, %f4, %f6, %f4
+       camellia_fl     %f`16+16*$i+12`, %f0,      %f0
+       camellia_fl     %f`16+16*$i+12`, %f4,      %f4
+       camellia_fli    %f`16+16*$i+14`, %f2,      %f2
+       camellia_fli    %f`16+16*$i+14`, %f6,      %f6
+___
+}
+$code.=<<___;
+       camellia_f      %f16, %f2, %f0, %f2
+       camellia_f      %f16, %f6, %f4, %f6
+       camellia_f      %f18, %f0, %f2, %f0
+       camellia_f      %f18, %f4, %f6, %f4
+       ldd             [$key + 184], %f16
+       ldd             [$key + 176], %f18
+       camellia_f      %f20, %f2, %f0, %f2
+       camellia_f      %f20, %f6, %f4, %f6
+       camellia_f      %f22, %f0, %f2, %f0
+       camellia_f      %f22, %f4, %f6, %f4
+       ldd             [$key + 168], %f20
+       ldd             [$key + 160], %f22
+       camellia_f      %f24, %f2, %f0, %f8
+       camellia_f      %f24, %f6, %f4, %f10
+       camellia_f      %f26, %f0, %f8, %f2
+       camellia_f      %f26, %f4, %f10, %f6
+       ldd             [$key + 152], %f24
+       ldd             [$key + 144], %f26
+       fxor            %f30, %f8, %f0
+       fxor            %f30, %f10, %f4
+       fxor            %f28, %f2, %f2
+       fxor            %f28, %f6, %f6
+       ldd             [$key + 136], %f28
+       retl
+       ldd             [$key + 128], %f30
+.type  _cmll256_decrypt_2x,#function
+.size  _cmll256_decrypt_2x,.-_cmll256_decrypt_2x
+___
+
+&alg_cbc_encrypt_implement("cmll",128);
+&alg_cbc_encrypt_implement("cmll",256);
+
+&alg_cbc_decrypt_implement("cmll",128);
+&alg_cbc_decrypt_implement("cmll",256);
+
+if ($::evp) {
+    &alg_ctr32_implement("cmll",128);
+    &alg_ctr32_implement("cmll",256);
+}
+}}}
+
+if (!$::evp) {
+$code.=<<___;
+.global        Camellia_encrypt
+Camellia_encrypt=cmll_t4_encrypt
+.global        Camellia_decrypt
+Camellia_decrypt=cmll_t4_decrypt
+.global        Camellia_set_key
+.align 32
+Camellia_set_key:
+       andcc           %o2, 7, %g0             ! double-check alignment
+       bnz,a,pn        %icc, 1f
+       mov             -1, %o0
+       brz,a,pn        %o0, 1f
+       mov             -1, %o0
+       brz,a,pn        %o2, 1f
+       mov             -1, %o0
+       andncc          %o1, 0x1c0, %g0
+       bnz,a,pn        %icc, 1f
+       mov             -2, %o0
+       cmp             %o1, 128
+       bl,a,pn         %icc, 1f
+       mov             -2, %o0
+       b               cmll_t4_set_key
+       nop
+1:     retl
+       nop
+.type  Camellia_set_key,#function
+.size  Camellia_set_key,.-Camellia_set_key
+___
+
+my ($inp,$out,$len,$key,$ivec,$enc)=map("%o$_",(0..5));
+
+$code.=<<___;
+.globl Camellia_cbc_encrypt
+.align 32
+Camellia_cbc_encrypt:
+       ld              [$key + 272], %g1
+       nop
+       brz             $enc, .Lcbc_decrypt
+       cmp             %g1, 3
+
+       be,pt           %icc, cmll128_t4_cbc_encrypt
+       nop
+       ba              cmll256_t4_cbc_encrypt
+       nop
+
+.Lcbc_decrypt:
+       be,pt           %icc, cmll128_t4_cbc_decrypt
+       nop
+       ba              cmll256_t4_cbc_decrypt
+       nop
+.type  Camellia_cbc_encrypt,#function
+.size  Camellia_cbc_encrypt,.-Camellia_cbc_encrypt
+___
+}
+
+&emit_assembler();
+
+close STDOUT;
index 7c4ad41..b0f0829 100644 (file)
 
 #if defined(OPENSSL_SYS_WIN32) && defined(_MSC_VER)
 # define ROTL(a,n)     (_lrotl(a,n))
+#elif defined(PEDANTIC)
+# define ROTL(a,n)     ((((a)<<(n))&0xffffffffL)|((a)>>((32-(n))&31)))
 #else
 # define ROTL(a,n)     ((((a)<<(n))&0xffffffffL)|((a)>>(32-(n))))
 #endif
index b124b5d..6f3a832 100644 (file)
@@ -19,10 +19,10 @@ APPS=
 LIB=$(TOP)/libcrypto.a
 LIBSRC= cms_lib.c cms_asn1.c cms_att.c cms_io.c cms_smime.c cms_err.c \
        cms_sd.c cms_dd.c cms_cd.c cms_env.c cms_enc.c cms_ess.c \
-       cms_pwri.c
+       cms_pwri.c cms_kari.c
 LIBOBJ= cms_lib.o cms_asn1.o cms_att.o cms_io.o cms_smime.o cms_err.o \
        cms_sd.o cms_dd.o cms_cd.o cms_env.o cms_enc.o cms_ess.o \
-       cms_pwri.o
+       cms_pwri.o cms_kari.o
 
 SRC= $(LIBSRC)
 
@@ -220,20 +220,39 @@ cms_io.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
 cms_io.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
 cms_io.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h cms.h
 cms_io.o: cms_io.c cms_lcl.h
+cms_kari.o: ../../e_os.h ../../include/openssl/aes.h
+cms_kari.o: ../../include/openssl/asn1.h ../../include/openssl/asn1t.h
+cms_kari.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h
+cms_kari.o: ../../include/openssl/cms.h ../../include/openssl/conf.h
+cms_kari.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
+cms_kari.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h
+cms_kari.o: ../../include/openssl/ecdsa.h ../../include/openssl/err.h
+cms_kari.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h
+cms_kari.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
+cms_kari.o: ../../include/openssl/opensslconf.h
+cms_kari.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
+cms_kari.o: ../../include/openssl/pem.h ../../include/openssl/pem2.h
+cms_kari.o: ../../include/openssl/pkcs7.h ../../include/openssl/rand.h
+cms_kari.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
+cms_kari.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
+cms_kari.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h
+cms_kari.o: ../../include/openssl/x509v3.h ../asn1/asn1_locl.h ../cryptlib.h
+cms_kari.o: cms_kari.c cms_lcl.h
 cms_lib.o: ../../include/openssl/asn1.h ../../include/openssl/asn1t.h
 cms_lib.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h
-cms_lib.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
-cms_lib.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h
-cms_lib.o: ../../include/openssl/ecdsa.h ../../include/openssl/err.h
-cms_lib.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h
-cms_lib.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
-cms_lib.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
-cms_lib.o: ../../include/openssl/ossl_typ.h ../../include/openssl/pem.h
-cms_lib.o: ../../include/openssl/pem2.h ../../include/openssl/pkcs7.h
-cms_lib.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
-cms_lib.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
-cms_lib.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h cms.h
-cms_lib.o: cms_lcl.h cms_lib.c
+cms_lib.o: ../../include/openssl/conf.h ../../include/openssl/crypto.h
+cms_lib.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
+cms_lib.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h
+cms_lib.o: ../../include/openssl/err.h ../../include/openssl/evp.h
+cms_lib.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
+cms_lib.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
+cms_lib.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
+cms_lib.o: ../../include/openssl/pem.h ../../include/openssl/pem2.h
+cms_lib.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
+cms_lib.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
+cms_lib.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h
+cms_lib.o: ../../include/openssl/x509_vfy.h ../../include/openssl/x509v3.h
+cms_lib.o: cms.h cms_lcl.h cms_lib.c
 cms_pwri.o: ../../e_os.h ../../include/openssl/aes.h
 cms_pwri.o: ../../include/openssl/asn1.h ../../include/openssl/asn1t.h
 cms_pwri.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h
@@ -283,4 +302,4 @@ cms_smime.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
 cms_smime.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
 cms_smime.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h
 cms_smime.o: ../../include/openssl/x509_vfy.h ../../include/openssl/x509v3.h
-cms_smime.o: ../cryptlib.h cms_lcl.h cms_smime.c
+cms_smime.o: ../asn1/asn1_locl.h ../cryptlib.h cms_lcl.h cms_smime.c
index a2281ed..e6c7f96 100644 (file)
@@ -72,9 +72,12 @@ typedef struct CMS_RevocationInfoChoice_st CMS_RevocationInfoChoice;
 typedef struct CMS_RecipientInfo_st CMS_RecipientInfo;
 typedef struct CMS_ReceiptRequest_st CMS_ReceiptRequest;
 typedef struct CMS_Receipt_st CMS_Receipt;
+typedef struct CMS_RecipientEncryptedKey_st CMS_RecipientEncryptedKey;
+typedef struct CMS_OtherKeyAttribute_st CMS_OtherKeyAttribute;
 
 DECLARE_STACK_OF(CMS_SignerInfo)
 DECLARE_STACK_OF(GENERAL_NAMES)
+DECLARE_STACK_OF(CMS_RecipientEncryptedKey)
 DECLARE_ASN1_FUNCTIONS(CMS_ContentInfo)
 DECLARE_ASN1_FUNCTIONS(CMS_ReceiptRequest)
 DECLARE_ASN1_PRINT_FUNCTION(CMS_ContentInfo)
@@ -82,6 +85,7 @@ DECLARE_ASN1_PRINT_FUNCTION(CMS_ContentInfo)
 # define CMS_SIGNERINFO_ISSUER_SERIAL    0
 # define CMS_SIGNERINFO_KEYIDENTIFIER    1
 
+# define CMS_RECIPINFO_NONE              -1
 # define CMS_RECIPINFO_TRANS             0
 # define CMS_RECIPINFO_AGREE             1
 # define CMS_RECIPINFO_KEK               2
@@ -111,6 +115,7 @@ DECLARE_ASN1_PRINT_FUNCTION(CMS_ContentInfo)
 # define CMS_REUSE_DIGEST                0x8000
 # define CMS_USE_KEYID                   0x10000
 # define CMS_DEBUG_DECRYPT               0x20000
+# define CMS_KEY_PARAM                   0x40000
 
 const ASN1_OBJECT *CMS_get0_type(CMS_ContentInfo *cms);
 
@@ -189,6 +194,7 @@ int CMS_decrypt_set1_password(CMS_ContentInfo *cms,
 
 STACK_OF(CMS_RecipientInfo) *CMS_get0_RecipientInfos(CMS_ContentInfo *cms);
 int CMS_RecipientInfo_type(CMS_RecipientInfo *ri);
+EVP_PKEY_CTX *CMS_RecipientInfo_get0_pkey_ctx(CMS_RecipientInfo *ri);
 CMS_ContentInfo *CMS_EnvelopedData_create(const EVP_CIPHER *cipher);
 CMS_RecipientInfo *CMS_add1_recipient_cert(CMS_ContentInfo *cms,
                                            X509 *recip, unsigned int flags);
@@ -234,6 +240,7 @@ CMS_RecipientInfo *CMS_add0_recipient_password(CMS_ContentInfo *cms,
                                                const EVP_CIPHER *kekciph);
 
 int CMS_RecipientInfo_decrypt(CMS_ContentInfo *cms, CMS_RecipientInfo *ri);
+int CMS_RecipientInfo_encrypt(CMS_ContentInfo *cms, CMS_RecipientInfo *ri);
 
 int CMS_uncompress(CMS_ContentInfo *cms, BIO *dcont, BIO *out,
                    unsigned int flags);
@@ -256,6 +263,8 @@ int CMS_SignedData_init(CMS_ContentInfo *cms);
 CMS_SignerInfo *CMS_add1_signer(CMS_ContentInfo *cms,
                                 X509 *signer, EVP_PKEY *pk, const EVP_MD *md,
                                 unsigned int flags);
+EVP_PKEY_CTX *CMS_SignerInfo_get0_pkey_ctx(CMS_SignerInfo *si);
+EVP_MD_CTX *CMS_SignerInfo_get0_md_ctx(CMS_SignerInfo *si);
 STACK_OF(CMS_SignerInfo) *CMS_get0_SignerInfos(CMS_ContentInfo *cms);
 
 void CMS_SignerInfo_set1_signer_cert(CMS_SignerInfo *si, X509 *signer);
@@ -268,6 +277,7 @@ int CMS_set1_signers_certs(CMS_ContentInfo *cms, STACK_OF(X509) *certs,
 void CMS_SignerInfo_get0_algs(CMS_SignerInfo *si, EVP_PKEY **pk,
                               X509 **signer, X509_ALGOR **pdig,
                               X509_ALGOR **psig);
+ASN1_OCTET_STRING *CMS_SignerInfo_get0_signature(CMS_SignerInfo *si);
 int CMS_SignerInfo_sign(CMS_SignerInfo *si);
 int CMS_SignerInfo_verify(CMS_SignerInfo *si);
 int CMS_SignerInfo_verify_content(CMS_SignerInfo *si, BIO *chain);
@@ -331,8 +341,37 @@ void CMS_ReceiptRequest_get0_values(CMS_ReceiptRequest *rr,
                                     int *pallorfirst,
                                     STACK_OF(GENERAL_NAMES) **plist,
                                     STACK_OF(GENERAL_NAMES) **prto);
-
 # endif
+int CMS_RecipientInfo_kari_get0_alg(CMS_RecipientInfo *ri,
+                                    X509_ALGOR **palg,
+                                    ASN1_OCTET_STRING **pukm);
+STACK_OF(CMS_RecipientEncryptedKey)
+*CMS_RecipientInfo_kari_get0_reks(CMS_RecipientInfo *ri);
+
+int CMS_RecipientInfo_kari_get0_orig_id(CMS_RecipientInfo *ri,
+                                        X509_ALGOR **pubalg,
+                                        ASN1_BIT_STRING **pubkey,
+                                        ASN1_OCTET_STRING **keyid,
+                                        X509_NAME **issuer,
+                                        ASN1_INTEGER **sno);
+
+int CMS_RecipientInfo_kari_orig_id_cmp(CMS_RecipientInfo *ri, X509 *cert);
+
+int CMS_RecipientEncryptedKey_get0_id(CMS_RecipientEncryptedKey *rek,
+                                      ASN1_OCTET_STRING **keyid,
+                                      ASN1_GENERALIZEDTIME **tm,
+                                      CMS_OtherKeyAttribute **other,
+                                      X509_NAME **issuer, ASN1_INTEGER **sno);
+int CMS_RecipientEncryptedKey_cert_cmp(CMS_RecipientEncryptedKey *rek,
+                                       X509 *cert);
+int CMS_RecipientInfo_kari_set0_pkey(CMS_RecipientInfo *ri, EVP_PKEY *pk);
+EVP_CIPHER_CTX *CMS_RecipientInfo_kari_get0_ctx(CMS_RecipientInfo *ri);
+int CMS_RecipientInfo_kari_decrypt(CMS_ContentInfo *cms,
+                                   CMS_RecipientInfo *ri,
+                                   CMS_RecipientEncryptedKey *rek);
+
+int CMS_SharedInfo_encode(unsigned char **pder, X509_ALGOR *kekalg,
+                          ASN1_OCTET_STRING *ukm, int keylen);
 
 /* BEGIN ERROR CODES */
 /*
@@ -377,6 +416,7 @@ void ERR_load_CMS_strings(void);
 # define CMS_F_CMS_ENVELOPEDDATA_CREATE                   124
 # define CMS_F_CMS_ENVELOPEDDATA_INIT_BIO                 125
 # define CMS_F_CMS_ENVELOPED_DATA_INIT                    126
+# define CMS_F_CMS_ENV_ASN1_CTRL                          171
 # define CMS_F_CMS_FINAL                                  127
 # define CMS_F_CMS_GET0_CERTIFICATE_CHOICES               128
 # define CMS_F_CMS_GET0_CONTENT                           129
@@ -388,6 +428,12 @@ void ERR_load_CMS_strings(void);
 # define CMS_F_CMS_RECEIPTREQUEST_CREATE0                 159
 # define CMS_F_CMS_RECEIPT_VERIFY                         160
 # define CMS_F_CMS_RECIPIENTINFO_DECRYPT                  134
+# define CMS_F_CMS_RECIPIENTINFO_ENCRYPT                  169
+# define CMS_F_CMS_RECIPIENTINFO_KARI_ENCRYPT             178
+# define CMS_F_CMS_RECIPIENTINFO_KARI_GET0_ALG            175
+# define CMS_F_CMS_RECIPIENTINFO_KARI_GET0_ORIG_ID        173
+# define CMS_F_CMS_RECIPIENTINFO_KARI_GET0_REKS           172
+# define CMS_F_CMS_RECIPIENTINFO_KARI_ORIG_ID_CMP         174
 # define CMS_F_CMS_RECIPIENTINFO_KEKRI_DECRYPT            135
 # define CMS_F_CMS_RECIPIENTINFO_KEKRI_ENCRYPT            136
 # define CMS_F_CMS_RECIPIENTINFO_KEKRI_GET0_ID            137
@@ -401,6 +447,9 @@ void ERR_load_CMS_strings(void);
 # define CMS_F_CMS_RECIPIENTINFO_SET0_KEY                 144
 # define CMS_F_CMS_RECIPIENTINFO_SET0_PASSWORD            168
 # define CMS_F_CMS_RECIPIENTINFO_SET0_PKEY                145
+# define CMS_F_CMS_SD_ASN1_CTRL                           170
+# define CMS_F_CMS_SET1_IAS                               176
+# define CMS_F_CMS_SET1_KEYID                             177
 # define CMS_F_CMS_SET1_SIGNERIDENTIFIER                  146
 # define CMS_F_CMS_SET_DETACHED                           147
 # define CMS_F_CMS_SIGN                                   148
@@ -452,6 +501,7 @@ void ERR_load_CMS_strings(void);
 # define CMS_R_NOT_A_SIGNED_RECEIPT                       165
 # define CMS_R_NOT_ENCRYPTED_DATA                         122
 # define CMS_R_NOT_KEK                                    123
+# define CMS_R_NOT_KEY_AGREEMENT                          181
 # define CMS_R_NOT_KEY_TRANSPORT                          124
 # define CMS_R_NOT_PWRI                                   177
 # define CMS_R_NOT_SUPPORTED_FOR_THIS_KEY_TYPE            125
index f9f267a..81a3407 100644 (file)
@@ -97,6 +97,8 @@ static int cms_si_cb(int operation, ASN1_VALUE **pval, const ASN1_ITEM *it,
             EVP_PKEY_free(si->pkey);
         if (si->signer)
             X509_free(si->signer);
+        if (si->pctx)
+            EVP_MD_CTX_cleanup(&si->mctx);
     }
     return 1;
 }
@@ -164,10 +166,21 @@ ASN1_CHOICE(CMS_KeyAgreeRecipientIdentifier) = {
   ASN1_IMP(CMS_KeyAgreeRecipientIdentifier, d.rKeyId, CMS_RecipientKeyIdentifier, 0)
 } ASN1_CHOICE_END(CMS_KeyAgreeRecipientIdentifier)
 
-ASN1_SEQUENCE(CMS_RecipientEncryptedKey) = {
+static int cms_rek_cb(int operation, ASN1_VALUE **pval, const ASN1_ITEM *it,
+                      void *exarg)
+{
+    CMS_RecipientEncryptedKey *rek = (CMS_RecipientEncryptedKey *)*pval;
+    if (operation == ASN1_OP_FREE_POST) {
+        if (rek->pkey)
+            EVP_PKEY_free(rek->pkey);
+    }
+    return 1;
+}
+
+ASN1_SEQUENCE_cb(CMS_RecipientEncryptedKey, cms_rek_cb) = {
         ASN1_SIMPLE(CMS_RecipientEncryptedKey, rid, CMS_KeyAgreeRecipientIdentifier),
         ASN1_SIMPLE(CMS_RecipientEncryptedKey, encryptedKey, ASN1_OCTET_STRING)
-} ASN1_SEQUENCE_END(CMS_RecipientEncryptedKey)
+} ASN1_SEQUENCE_END_cb(CMS_RecipientEncryptedKey, CMS_RecipientEncryptedKey)
 
 ASN1_SEQUENCE(CMS_OriginatorPublicKey) = {
   ASN1_SIMPLE(CMS_OriginatorPublicKey, algorithm, X509_ALGOR),
@@ -180,13 +193,29 @@ ASN1_CHOICE(CMS_OriginatorIdentifierOrKey) = {
   ASN1_IMP(CMS_OriginatorIdentifierOrKey, d.originatorKey, CMS_OriginatorPublicKey, 1)
 } ASN1_CHOICE_END(CMS_OriginatorIdentifierOrKey)
 
-ASN1_SEQUENCE(CMS_KeyAgreeRecipientInfo) = {
+static int cms_kari_cb(int operation, ASN1_VALUE **pval, const ASN1_ITEM *it,
+                       void *exarg)
+{
+    CMS_KeyAgreeRecipientInfo *kari = (CMS_KeyAgreeRecipientInfo *)*pval;
+    if (operation == ASN1_OP_NEW_POST) {
+        EVP_CIPHER_CTX_init(&kari->ctx);
+        EVP_CIPHER_CTX_set_flags(&kari->ctx, EVP_CIPHER_CTX_FLAG_WRAP_ALLOW);
+        kari->pctx = NULL;
+    } else if (operation == ASN1_OP_FREE_POST) {
+        if (kari->pctx)
+            EVP_PKEY_CTX_free(kari->pctx);
+        EVP_CIPHER_CTX_cleanup(&kari->ctx);
+    }
+    return 1;
+}
+
+ASN1_SEQUENCE_cb(CMS_KeyAgreeRecipientInfo, cms_kari_cb) = {
         ASN1_SIMPLE(CMS_KeyAgreeRecipientInfo, version, LONG),
         ASN1_EXP(CMS_KeyAgreeRecipientInfo, originator, CMS_OriginatorIdentifierOrKey, 0),
         ASN1_EXP_OPT(CMS_KeyAgreeRecipientInfo, ukm, ASN1_OCTET_STRING, 1),
         ASN1_SIMPLE(CMS_KeyAgreeRecipientInfo, keyEncryptionAlgorithm, X509_ALGOR),
         ASN1_SEQUENCE_OF(CMS_KeyAgreeRecipientInfo, recipientEncryptedKeys, CMS_RecipientEncryptedKey)
-} ASN1_SEQUENCE_END(CMS_KeyAgreeRecipientInfo)
+} ASN1_SEQUENCE_END_cb(CMS_KeyAgreeRecipientInfo, CMS_KeyAgreeRecipientInfo)
 
 ASN1_SEQUENCE(CMS_KEKIdentifier) = {
         ASN1_SIMPLE(CMS_KEKIdentifier, keyIdentifier, ASN1_OCTET_STRING),
@@ -225,6 +254,8 @@ static int cms_ri_cb(int operation, ASN1_VALUE **pval, const ASN1_ITEM *it,
                 EVP_PKEY_free(ktri->pkey);
             if (ktri->recip)
                 X509_free(ktri->recip);
+            if (ktri->pctx)
+                EVP_PKEY_CTX_free(ktri->pctx);
         } else if (ri->type == CMS_RECIPINFO_KEK) {
             CMS_KEKRecipientInfo *kekri = ri->d.kekri;
             if (kekri->key) {
@@ -379,3 +410,50 @@ ASN1_SEQUENCE(CMS_Receipt) = {
   ASN1_SIMPLE(CMS_Receipt, signedContentIdentifier, ASN1_OCTET_STRING),
   ASN1_SIMPLE(CMS_Receipt, originatorSignatureValue, ASN1_OCTET_STRING)
 } ASN1_SEQUENCE_END(CMS_Receipt)
+
+/*
+ * Utilities to encode the CMS_SharedInfo structure used during key
+ * derivation.
+ */
+
+typedef struct {
+    X509_ALGOR *keyInfo;
+    ASN1_OCTET_STRING *entityUInfo;
+    ASN1_OCTET_STRING *suppPubInfo;
+} CMS_SharedInfo;
+
+ASN1_SEQUENCE(CMS_SharedInfo) = {
+  ASN1_SIMPLE(CMS_SharedInfo, keyInfo, X509_ALGOR),
+  ASN1_EXP_OPT(CMS_SharedInfo, entityUInfo, ASN1_OCTET_STRING, 0),
+  ASN1_EXP_OPT(CMS_SharedInfo, suppPubInfo, ASN1_OCTET_STRING, 2),
+} ASN1_SEQUENCE_END(CMS_SharedInfo)
+
+int CMS_SharedInfo_encode(unsigned char **pder, X509_ALGOR *kekalg,
+                          ASN1_OCTET_STRING *ukm, int keylen)
+{
+    union {
+        CMS_SharedInfo *pecsi;
+        ASN1_VALUE *a;
+    } intsi = {
+        NULL
+    };
+
+    ASN1_OCTET_STRING oklen;
+    unsigned char kl[4];
+    CMS_SharedInfo ecsi;
+
+    keylen <<= 3;
+    kl[0] = (keylen >> 24) & 0xff;
+    kl[1] = (keylen >> 16) & 0xff;
+    kl[2] = (keylen >> 8) & 0xff;
+    kl[3] = keylen & 0xff;
+    oklen.length = 4;
+    oklen.data = kl;
+    oklen.type = V_ASN1_OCTET_STRING;
+    oklen.flags = 0;
+    ecsi.keyInfo = kekalg;
+    ecsi.entityUInfo = ukm;
+    ecsi.suppPubInfo = &oklen;
+    intsi.pecsi = &ecsi;
+    return ASN1_item_i2d(intsi.a, pder, ASN1_ITEM_rptr(CMS_SharedInfo));
+}
index 1c3046c..93c06cb 100644 (file)
@@ -100,6 +100,36 @@ static CMS_EnvelopedData *cms_enveloped_data_init(CMS_ContentInfo *cms)
     return cms_get0_enveloped(cms);
 }
 
+int cms_env_asn1_ctrl(CMS_RecipientInfo *ri, int cmd)
+{
+    EVP_PKEY *pkey;
+    int i;
+    if (ri->type == CMS_RECIPINFO_TRANS)
+        pkey = ri->d.ktri->pkey;
+    else if (ri->type == CMS_RECIPINFO_AGREE) {
+        EVP_PKEY_CTX *pctx = ri->d.kari->pctx;
+        if (!pctx)
+            return 0;
+        pkey = EVP_PKEY_CTX_get0_pkey(pctx);
+        if (!pkey)
+            return 0;
+    } else
+        return 0;
+    if (!pkey->ameth || !pkey->ameth->pkey_ctrl)
+        return 1;
+    i = pkey->ameth->pkey_ctrl(pkey, ASN1_PKEY_CTRL_CMS_ENVELOPE, cmd, ri);
+    if (i == -2) {
+        CMSerr(CMS_F_CMS_ENV_ASN1_CTRL,
+               CMS_R_NOT_SUPPORTED_FOR_THIS_KEY_TYPE);
+        return 0;
+    }
+    if (i <= 0) {
+        CMSerr(CMS_F_CMS_ENV_ASN1_CTRL, CMS_R_CTRL_FAILURE);
+        return 0;
+    }
+    return 1;
+}
+
 STACK_OF(CMS_RecipientInfo) *CMS_get0_RecipientInfos(CMS_ContentInfo *cms)
 {
     CMS_EnvelopedData *env;
@@ -114,6 +144,15 @@ int CMS_RecipientInfo_type(CMS_RecipientInfo *ri)
     return ri->type;
 }
 
+EVP_PKEY_CTX *CMS_RecipientInfo_get0_pkey_ctx(CMS_RecipientInfo *ri)
+{
+    if (ri->type == CMS_RECIPINFO_TRANS)
+        return ri->d.ktri->pctx;
+    else if (ri->type == CMS_RECIPINFO_AGREE)
+        return ri->d.kari->pctx;
+    return NULL;
+}
+
 CMS_ContentInfo *CMS_EnvelopedData_create(const EVP_CIPHER *cipher)
 {
     CMS_ContentInfo *cms;
@@ -137,19 +176,63 @@ CMS_ContentInfo *CMS_EnvelopedData_create(const EVP_CIPHER *cipher)
 
 /* Key Transport Recipient Info (KTRI) routines */
 
+/* Initialise a ktri based on passed certificate and key */
+
+static int cms_RecipientInfo_ktri_init(CMS_RecipientInfo *ri, X509 *recip,
+                                       EVP_PKEY *pk, unsigned int flags)
+{
+    CMS_KeyTransRecipientInfo *ktri;
+    int idtype;
+
+    ri->d.ktri = M_ASN1_new_of(CMS_KeyTransRecipientInfo);
+    if (!ri->d.ktri)
+        return 0;
+    ri->type = CMS_RECIPINFO_TRANS;
+
+    ktri = ri->d.ktri;
+
+    if (flags & CMS_USE_KEYID) {
+        ktri->version = 2;
+        idtype = CMS_RECIPINFO_KEYIDENTIFIER;
+    } else {
+        ktri->version = 0;
+        idtype = CMS_RECIPINFO_ISSUER_SERIAL;
+    }
+
+    /*
+     * Not a typo: RecipientIdentifier and SignerIdentifier are the same
+     * structure.
+     */
+
+    if (!cms_set1_SignerIdentifier(ktri->rid, recip, idtype))
+        return 0;
+
+    CRYPTO_add(&recip->references, 1, CRYPTO_LOCK_X509);
+    CRYPTO_add(&pk->references, 1, CRYPTO_LOCK_EVP_PKEY);
+    ktri->pkey = pk;
+    ktri->recip = recip;
+
+    if (flags & CMS_KEY_PARAM) {
+        ktri->pctx = EVP_PKEY_CTX_new(ktri->pkey, NULL);
+        if (!ktri->pctx)
+            return 0;
+        if (EVP_PKEY_encrypt_init(ktri->pctx) <= 0)
+            return 0;
+    } else if (!cms_env_asn1_ctrl(ri, 0))
+        return 0;
+    return 1;
+}
+
 /*
- * Add a recipient certificate. For now only handle key transport. If we ever
- * handle key agreement will need updating.
+ * Add a recipient certificate using appropriate type of RecipientInfo
  */
 
 CMS_RecipientInfo *CMS_add1_recipient_cert(CMS_ContentInfo *cms,
                                            X509 *recip, unsigned int flags)
 {
     CMS_RecipientInfo *ri = NULL;
-    CMS_KeyTransRecipientInfo *ktri;
     CMS_EnvelopedData *env;
     EVP_PKEY *pk = NULL;
-    int i, type;
     env = cms_get0_enveloped(cms);
     if (!env)
         goto err;
@@ -159,59 +242,36 @@ CMS_RecipientInfo *CMS_add1_recipient_cert(CMS_ContentInfo *cms,
     if (!ri)
         goto merr;
 
-    /* Initialize and add key transport recipient info */
-
-    ri->d.ktri = M_ASN1_new_of(CMS_KeyTransRecipientInfo);
-    if (!ri->d.ktri)
-        goto merr;
-    ri->type = CMS_RECIPINFO_TRANS;
-
-    ktri = ri->d.ktri;
-
-    X509_check_purpose(recip, -1, -1);
     pk = X509_get_pubkey(recip);
     if (!pk) {
         CMSerr(CMS_F_CMS_ADD1_RECIPIENT_CERT, CMS_R_ERROR_GETTING_PUBLIC_KEY);
         goto err;
     }
-    CRYPTO_add(&recip->references, 1, CRYPTO_LOCK_X509);
-    ktri->pkey = pk;
-    ktri->recip = recip;
 
-    if (flags & CMS_USE_KEYID) {
-        ktri->version = 2;
-        if (env->version < 2)
-            env->version = 2;
-        type = CMS_RECIPINFO_KEYIDENTIFIER;
-    } else {
-        ktri->version = 0;
-        type = CMS_RECIPINFO_ISSUER_SERIAL;
-    }
+    switch (cms_pkey_get_ri_type(pk)) {
 
-    /*
-     * Not a typo: RecipientIdentifier and SignerIdentifier are the same
-     * structure.
-     */
+    case CMS_RECIPINFO_TRANS:
+        if (!cms_RecipientInfo_ktri_init(ri, recip, pk, flags))
+            goto err;
+        break;
 
-    if (!cms_set1_SignerIdentifier(ktri->rid, recip, type))
+    case CMS_RECIPINFO_AGREE:
+        if (!cms_RecipientInfo_kari_init(ri, recip, pk, flags))
+            goto err;
+        break;
+
+    default:
+        CMSerr(CMS_F_CMS_ADD1_RECIPIENT_CERT,
+               CMS_R_NOT_SUPPORTED_FOR_THIS_KEY_TYPE);
         goto err;
 
-    if (pk->ameth && pk->ameth->pkey_ctrl) {
-        i = pk->ameth->pkey_ctrl(pk, ASN1_PKEY_CTRL_CMS_ENVELOPE, 0, ri);
-        if (i == -2) {
-            CMSerr(CMS_F_CMS_ADD1_RECIPIENT_CERT,
-                   CMS_R_NOT_SUPPORTED_FOR_THIS_KEY_TYPE);
-            goto err;
-        }
-        if (i <= 0) {
-            CMSerr(CMS_F_CMS_ADD1_RECIPIENT_CERT, CMS_R_CTRL_FAILURE);
-            goto err;
-        }
     }
 
     if (!sk_CMS_RecipientInfo_push(env->recipientInfos, ri))
         goto merr;
 
+    EVP_PKEY_free(pk);
+
     return ri;
 
  merr:
@@ -219,6 +279,8 @@ CMS_RecipientInfo *CMS_add1_recipient_cert(CMS_ContentInfo *cms,
  err:
     if (ri)
         M_ASN1_free_of(ri, CMS_RecipientInfo);
+    if (pk)
+        EVP_PKEY_free(pk);
     return NULL;
 
 }
@@ -288,7 +350,7 @@ static int cms_RecipientInfo_ktri_encrypt(CMS_ContentInfo *cms,
 {
     CMS_KeyTransRecipientInfo *ktri;
     CMS_EncryptedContentInfo *ec;
-    EVP_PKEY_CTX *pctx = NULL;
+    EVP_PKEY_CTX *pctx;
     unsigned char *ek = NULL;
     size_t eklen;
 
@@ -301,12 +363,19 @@ static int cms_RecipientInfo_ktri_encrypt(CMS_ContentInfo *cms,
     ktri = ri->d.ktri;
     ec = cms->d.envelopedData->encryptedContentInfo;
 
-    pctx = EVP_PKEY_CTX_new(ktri->pkey, NULL);
-    if (!pctx)
-        return 0;
+    pctx = ktri->pctx;
 
-    if (EVP_PKEY_encrypt_init(pctx) <= 0)
-        goto err;
+    if (pctx) {
+        if (!cms_env_asn1_ctrl(ri, 0))
+            goto err;
+    } else {
+        pctx = EVP_PKEY_CTX_new(ktri->pkey, NULL);
+        if (!pctx)
+            return 0;
+
+        if (EVP_PKEY_encrypt_init(pctx) <= 0)
+            goto err;
+    }
 
     if (EVP_PKEY_CTX_ctrl(pctx, -1, EVP_PKEY_OP_ENCRYPT,
                           EVP_PKEY_CTRL_CMS_ENCRYPT, 0, ri) <= 0) {
@@ -333,8 +402,10 @@ static int cms_RecipientInfo_ktri_encrypt(CMS_ContentInfo *cms,
     ret = 1;
 
  err:
-    if (pctx)
+    if (pctx) {
         EVP_PKEY_CTX_free(pctx);
+        ktri->pctx = NULL;
+    }
     if (ek)
         OPENSSL_free(ek);
     return ret;
@@ -347,7 +418,7 @@ static int cms_RecipientInfo_ktri_decrypt(CMS_ContentInfo *cms,
                                           CMS_RecipientInfo *ri)
 {
     CMS_KeyTransRecipientInfo *ktri = ri->d.ktri;
-    EVP_PKEY_CTX *pctx = NULL;
+    EVP_PKEY *pkey = ktri->pkey;
     unsigned char *ek = NULL;
     size_t eklen;
     int ret = 0;
@@ -359,20 +430,23 @@ static int cms_RecipientInfo_ktri_decrypt(CMS_ContentInfo *cms,
         return 0;
     }
 
-    pctx = EVP_PKEY_CTX_new(ktri->pkey, NULL);
-    if (!pctx)
+    ktri->pctx = EVP_PKEY_CTX_new(pkey, NULL);
+    if (!ktri->pctx)
         return 0;
 
-    if (EVP_PKEY_decrypt_init(pctx) <= 0)
+    if (EVP_PKEY_decrypt_init(ktri->pctx) <= 0)
         goto err;
 
-    if (EVP_PKEY_CTX_ctrl(pctx, -1, EVP_PKEY_OP_DECRYPT,
+    if (!cms_env_asn1_ctrl(ri, 1))
+        goto err;
+
+    if (EVP_PKEY_CTX_ctrl(ktri->pctx, -1, EVP_PKEY_OP_DECRYPT,
                           EVP_PKEY_CTRL_CMS_DECRYPT, 0, ri) <= 0) {
         CMSerr(CMS_F_CMS_RECIPIENTINFO_KTRI_DECRYPT, CMS_R_CTRL_ERROR);
         goto err;
     }
 
-    if (EVP_PKEY_decrypt(pctx, NULL, &eklen,
+    if (EVP_PKEY_decrypt(ktri->pctx, NULL, &eklen,
                          ktri->encryptedKey->data,
                          ktri->encryptedKey->length) <= 0)
         goto err;
@@ -384,7 +458,7 @@ static int cms_RecipientInfo_ktri_decrypt(CMS_ContentInfo *cms,
         goto err;
     }
 
-    if (EVP_PKEY_decrypt(pctx, ek, &eklen,
+    if (EVP_PKEY_decrypt(ktri->pctx, ek, &eklen,
                          ktri->encryptedKey->data,
                          ktri->encryptedKey->length) <= 0) {
         CMSerr(CMS_F_CMS_RECIPIENTINFO_KTRI_DECRYPT, CMS_R_CMS_LIB);
@@ -402,8 +476,10 @@ static int cms_RecipientInfo_ktri_decrypt(CMS_ContentInfo *cms,
     ec->keylen = eklen;
 
  err:
-    if (pctx)
-        EVP_PKEY_CTX_free(pctx);
+    if (ktri->pctx) {
+        EVP_PKEY_CTX_free(ktri->pctx);
+        ktri->pctx = NULL;
+    }
     if (!ret && ek)
         OPENSSL_free(ek);
 
@@ -745,12 +821,99 @@ int CMS_RecipientInfo_decrypt(CMS_ContentInfo *cms, CMS_RecipientInfo *ri)
     }
 }
 
+int CMS_RecipientInfo_encrypt(CMS_ContentInfo *cms, CMS_RecipientInfo *ri)
+{
+    switch (ri->type) {
+    case CMS_RECIPINFO_TRANS:
+        return cms_RecipientInfo_ktri_encrypt(cms, ri);
+
+    case CMS_RECIPINFO_AGREE:
+        return cms_RecipientInfo_kari_encrypt(cms, ri);
+
+    case CMS_RECIPINFO_KEK:
+        return cms_RecipientInfo_kekri_encrypt(cms, ri);
+        break;
+
+    case CMS_RECIPINFO_PASS:
+        return cms_RecipientInfo_pwri_crypt(cms, ri, 1);
+        break;
+
+    default:
+        CMSerr(CMS_F_CMS_RECIPIENTINFO_ENCRYPT,
+               CMS_R_UNSUPPORTED_RECIPIENT_TYPE);
+        return 0;
+    }
+}
+
+/* Check structures and fixup version numbers (if necessary) */
+
+static void cms_env_set_originfo_version(CMS_EnvelopedData *env)
+{
+    CMS_OriginatorInfo *org = env->originatorInfo;
+    int i;
+    if (org == NULL)
+        return;
+    for (i = 0; i < sk_CMS_CertificateChoices_num(org->certificates); i++) {
+        CMS_CertificateChoices *cch;
+        cch = sk_CMS_CertificateChoices_value(org->certificates, i);
+        if (cch->type == CMS_CERTCHOICE_OTHER) {
+            env->version = 4;
+            return;
+        } else if (cch->type == CMS_CERTCHOICE_V2ACERT) {
+            if (env->version < 3)
+                env->version = 3;
+        }
+    }
+
+    for (i = 0; i < sk_CMS_RevocationInfoChoice_num(org->crls); i++) {
+        CMS_RevocationInfoChoice *rch;
+        rch = sk_CMS_RevocationInfoChoice_value(org->crls, i);
+        if (rch->type == CMS_REVCHOICE_OTHER) {
+            env->version = 4;
+            return;
+        }
+    }
+}
+
+static void cms_env_set_version(CMS_EnvelopedData *env)
+{
+    int i;
+    CMS_RecipientInfo *ri;
+
+    /*
+     * Can't set version higher than 4 so if 4 or more already nothing to do.
+     */
+    if (env->version >= 4)
+        return;
+
+    cms_env_set_originfo_version(env);
+
+    if (env->version >= 3)
+        return;
+
+    for (i = 0; i < sk_CMS_RecipientInfo_num(env->recipientInfos); i++) {
+        ri = sk_CMS_RecipientInfo_value(env->recipientInfos, i);
+        if (ri->type == CMS_RECIPINFO_PASS || ri->type == CMS_RECIPINFO_OTHER) {
+            env->version = 3;
+            return;
+        } else if (ri->type != CMS_RECIPINFO_TRANS
+                   || ri->d.ktri->version != 0) {
+            env->version = 2;
+        }
+    }
+    if (env->version == 2)
+        return;
+    if (env->originatorInfo || env->unprotectedAttrs)
+        env->version = 2;
+    env->version = 0;
+}
+
 BIO *cms_EnvelopedData_init_bio(CMS_ContentInfo *cms)
 {
     CMS_EncryptedContentInfo *ec;
     STACK_OF(CMS_RecipientInfo) *rinfos;
     CMS_RecipientInfo *ri;
-    int i, r, ok = 0;
+    int i, ok = 0;
     BIO *ret;
 
     /* Get BIO first to set up key */
@@ -769,32 +932,13 @@ BIO *cms_EnvelopedData_init_bio(CMS_ContentInfo *cms)
 
     for (i = 0; i < sk_CMS_RecipientInfo_num(rinfos); i++) {
         ri = sk_CMS_RecipientInfo_value(rinfos, i);
-
-        switch (ri->type) {
-        case CMS_RECIPINFO_TRANS:
-            r = cms_RecipientInfo_ktri_encrypt(cms, ri);
-            break;
-
-        case CMS_RECIPINFO_KEK:
-            r = cms_RecipientInfo_kekri_encrypt(cms, ri);
-            break;
-
-        case CMS_RECIPINFO_PASS:
-            r = cms_RecipientInfo_pwri_crypt(cms, ri, 1);
-            break;
-
-        default:
-            CMSerr(CMS_F_CMS_ENVELOPEDDATA_INIT_BIO,
-                   CMS_R_UNSUPPORTED_RECIPIENT_TYPE);
-            goto err;
-        }
-
-        if (r <= 0) {
+        if (CMS_RecipientInfo_encrypt(cms, ri) <= 0) {
             CMSerr(CMS_F_CMS_ENVELOPEDDATA_INIT_BIO,
                    CMS_R_ERROR_SETTING_RECIPIENTINFO);
             goto err;
         }
     }
+    cms_env_set_version(cms->d.envelopedData);
 
     ok = 1;
 
@@ -812,3 +956,19 @@ BIO *cms_EnvelopedData_init_bio(CMS_ContentInfo *cms)
     return NULL;
 
 }
+
+/*
+ * Get RecipientInfo type (if any) supported by a key (public or private). To
+ * retain compatibility with previous behaviour if the ctrl value isn't
+ * supported we assume key transport.
+ */
+int cms_pkey_get_ri_type(EVP_PKEY *pk)
+{
+    if (pk->ameth && pk->ameth->pkey_ctrl) {
+        int i, r;
+        i = pk->ameth->pkey_ctrl(pk, ASN1_PKEY_CTRL_CMS_RI_TYPE, 0, &r);
+        if (i > 0)
+            return r;
+    }
+    return CMS_RECIPINFO_TRANS;
+}
index faf2fcc..15572ea 100644 (file)
@@ -1,6 +1,6 @@
 /* crypto/cms/cms_err.c */
 /* ====================================================================
- * Copyright (c) 1999-2009 The OpenSSL Project.  All rights reserved.
+ * Copyright (c) 1999-2013 The OpenSSL Project.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -110,6 +110,7 @@ static ERR_STRING_DATA CMS_str_functs[] = {
     {ERR_FUNC(CMS_F_CMS_ENVELOPEDDATA_INIT_BIO),
      "cms_EnvelopedData_init_bio"},
     {ERR_FUNC(CMS_F_CMS_ENVELOPED_DATA_INIT), "CMS_ENVELOPED_DATA_INIT"},
+    {ERR_FUNC(CMS_F_CMS_ENV_ASN1_CTRL), "cms_env_asn1_ctrl"},
     {ERR_FUNC(CMS_F_CMS_FINAL), "CMS_final"},
     {ERR_FUNC(CMS_F_CMS_GET0_CERTIFICATE_CHOICES),
      "CMS_GET0_CERTIFICATE_CHOICES"},
@@ -124,6 +125,17 @@ static ERR_STRING_DATA CMS_str_functs[] = {
      "CMS_ReceiptRequest_create0"},
     {ERR_FUNC(CMS_F_CMS_RECEIPT_VERIFY), "cms_Receipt_verify"},
     {ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_DECRYPT), "CMS_RecipientInfo_decrypt"},
+    {ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_ENCRYPT), "CMS_RecipientInfo_encrypt"},
+    {ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KARI_ENCRYPT),
+     "cms_RecipientInfo_kari_encrypt"},
+    {ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KARI_GET0_ALG),
+     "CMS_RecipientInfo_kari_get0_alg"},
+    {ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KARI_GET0_ORIG_ID),
+     "CMS_RecipientInfo_kari_get0_orig_id"},
+    {ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KARI_GET0_REKS),
+     "CMS_RecipientInfo_kari_get0_reks"},
+    {ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KARI_ORIG_ID_CMP),
+     "CMS_RecipientInfo_kari_orig_id_cmp"},
     {ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KEKRI_DECRYPT),
      "CMS_RECIPIENTINFO_KEKRI_DECRYPT"},
     {ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KEKRI_ENCRYPT),
@@ -150,6 +162,9 @@ static ERR_STRING_DATA CMS_str_functs[] = {
      "CMS_RecipientInfo_set0_password"},
     {ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_SET0_PKEY),
      "CMS_RecipientInfo_set0_pkey"},
+    {ERR_FUNC(CMS_F_CMS_SD_ASN1_CTRL), "CMS_SD_ASN1_CTRL"},
+    {ERR_FUNC(CMS_F_CMS_SET1_IAS), "cms_set1_ias"},
+    {ERR_FUNC(CMS_F_CMS_SET1_KEYID), "cms_set1_keyid"},
     {ERR_FUNC(CMS_F_CMS_SET1_SIGNERIDENTIFIER), "cms_set1_SignerIdentifier"},
     {ERR_FUNC(CMS_F_CMS_SET_DETACHED), "CMS_set_detached"},
     {ERR_FUNC(CMS_F_CMS_SIGN), "CMS_sign"},
@@ -221,6 +236,7 @@ static ERR_STRING_DATA CMS_str_reasons[] = {
     {ERR_REASON(CMS_R_NOT_A_SIGNED_RECEIPT), "not a signed receipt"},
     {ERR_REASON(CMS_R_NOT_ENCRYPTED_DATA), "not encrypted data"},
     {ERR_REASON(CMS_R_NOT_KEK), "not kek"},
+    {ERR_REASON(CMS_R_NOT_KEY_AGREEMENT), "not key agreement"},
     {ERR_REASON(CMS_R_NOT_KEY_TRANSPORT), "not key transport"},
     {ERR_REASON(CMS_R_NOT_PWRI), "not pwri"},
     {ERR_REASON(CMS_R_NOT_SUPPORTED_FOR_THIS_KEY_TYPE),
diff --git a/crypto/cms/cms_kari.c b/crypto/cms/cms_kari.c
new file mode 100644 (file)
index 0000000..2cfcdb2
--- /dev/null
@@ -0,0 +1,465 @@
+/* crypto/cms/cms_kari.c */
+/*
+ * Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
+ * project.
+ */
+/* ====================================================================
+ * Copyright (c) 2013 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    licensing@OpenSSL.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ */
+
+#include "cryptlib.h"
+#include <openssl/asn1t.h>
+#include <openssl/pem.h>
+#include <openssl/x509v3.h>
+#include <openssl/err.h>
+#include <openssl/cms.h>
+#include <openssl/rand.h>
+#include <openssl/aes.h>
+#include "cms_lcl.h"
+#include "asn1_locl.h"
+
+DECLARE_ASN1_ITEM(CMS_KeyAgreeRecipientInfo)
+DECLARE_ASN1_ITEM(CMS_RecipientEncryptedKey)
+DECLARE_ASN1_ITEM(CMS_OriginatorPublicKey)
+DECLARE_ASN1_ITEM(CMS_RecipientKeyIdentifier)
+
+/* Key Agreement Recipient Info (KARI) routines */
+
+int CMS_RecipientInfo_kari_get0_alg(CMS_RecipientInfo *ri,
+                                    X509_ALGOR **palg,
+                                    ASN1_OCTET_STRING **pukm)
+{
+    if (ri->type != CMS_RECIPINFO_AGREE) {
+        CMSerr(CMS_F_CMS_RECIPIENTINFO_KARI_GET0_ALG,
+               CMS_R_NOT_KEY_AGREEMENT);
+        return 0;
+    }
+    if (palg)
+        *palg = ri->d.kari->keyEncryptionAlgorithm;
+    if (pukm)
+        *pukm = ri->d.kari->ukm;
+    return 1;
+}
+
+/* Retrieve recipient encrypted keys from a kari */
+
+STACK_OF(CMS_RecipientEncryptedKey)
+*CMS_RecipientInfo_kari_get0_reks(CMS_RecipientInfo *ri)
+{
+    if (ri->type != CMS_RECIPINFO_AGREE) {
+        CMSerr(CMS_F_CMS_RECIPIENTINFO_KARI_GET0_REKS,
+               CMS_R_NOT_KEY_AGREEMENT);
+        return NULL;
+    }
+    return ri->d.kari->recipientEncryptedKeys;
+}
+
+int CMS_RecipientInfo_kari_get0_orig_id(CMS_RecipientInfo *ri,
+                                        X509_ALGOR **pubalg,
+                                        ASN1_BIT_STRING **pubkey,
+                                        ASN1_OCTET_STRING **keyid,
+                                        X509_NAME **issuer,
+                                        ASN1_INTEGER **sno)
+{
+    CMS_OriginatorIdentifierOrKey *oik;
+    if (ri->type != CMS_RECIPINFO_AGREE) {
+        CMSerr(CMS_F_CMS_RECIPIENTINFO_KARI_GET0_ORIG_ID,
+               CMS_R_NOT_KEY_AGREEMENT);
+        return 0;
+    }
+    oik = ri->d.kari->originator;
+    if (issuer)
+        *issuer = NULL;
+    if (sno)
+        *sno = NULL;
+    if (keyid)
+        *keyid = NULL;
+    if (pubalg)
+        *pubalg = NULL;
+    if (pubkey)
+        *pubkey = NULL;
+    if (oik->type == CMS_OIK_ISSUER_SERIAL) {
+        if (issuer)
+            *issuer = oik->d.issuerAndSerialNumber->issuer;
+        if (sno)
+            *sno = oik->d.issuerAndSerialNumber->serialNumber;
+    } else if (oik->type == CMS_OIK_KEYIDENTIFIER) {
+        if (keyid)
+            *keyid = oik->d.subjectKeyIdentifier;
+    } else if (oik->type == CMS_OIK_PUBKEY) {
+        if (pubalg)
+            *pubalg = oik->d.originatorKey->algorithm;
+        if (pubkey)
+            *pubkey = oik->d.originatorKey->publicKey;
+    } else
+        return 0;
+    return 1;
+}
+
+int CMS_RecipientInfo_kari_orig_id_cmp(CMS_RecipientInfo *ri, X509 *cert)
+{
+    CMS_OriginatorIdentifierOrKey *oik;
+    if (ri->type != CMS_RECIPINFO_AGREE) {
+        CMSerr(CMS_F_CMS_RECIPIENTINFO_KARI_ORIG_ID_CMP,
+               CMS_R_NOT_KEY_AGREEMENT);
+        return -2;
+    }
+    oik = ri->d.kari->originator;
+    if (oik->type == CMS_OIK_ISSUER_SERIAL)
+        return cms_ias_cert_cmp(oik->d.issuerAndSerialNumber, cert);
+    else if (oik->type == CMS_OIK_KEYIDENTIFIER)
+        return cms_keyid_cert_cmp(oik->d.subjectKeyIdentifier, cert);
+    return -1;
+}
+
+int CMS_RecipientEncryptedKey_get0_id(CMS_RecipientEncryptedKey *rek,
+                                      ASN1_OCTET_STRING **keyid,
+                                      ASN1_GENERALIZEDTIME **tm,
+                                      CMS_OtherKeyAttribute **other,
+                                      X509_NAME **issuer, ASN1_INTEGER **sno)
+{
+    CMS_KeyAgreeRecipientIdentifier *rid = rek->rid;
+    if (rid->type == CMS_REK_ISSUER_SERIAL) {
+        if (issuer)
+            *issuer = rid->d.issuerAndSerialNumber->issuer;
+        if (sno)
+            *sno = rid->d.issuerAndSerialNumber->serialNumber;
+        if (keyid)
+            *keyid = NULL;
+        if (tm)
+            *tm = NULL;
+        if (other)
+            *other = NULL;
+    } else if (rid->type == CMS_REK_KEYIDENTIFIER) {
+        if (keyid)
+            *keyid = rid->d.rKeyId->subjectKeyIdentifier;
+        if (tm)
+            *tm = rid->d.rKeyId->date;
+        if (other)
+            *other = rid->d.rKeyId->other;
+        if (issuer)
+            *issuer = NULL;
+        if (sno)
+            *sno = NULL;
+    } else
+        return 0;
+    return 1;
+}
+
+int CMS_RecipientEncryptedKey_cert_cmp(CMS_RecipientEncryptedKey *rek,
+                                       X509 *cert)
+{
+    CMS_KeyAgreeRecipientIdentifier *rid = rek->rid;
+    if (rid->type == CMS_REK_ISSUER_SERIAL)
+        return cms_ias_cert_cmp(rid->d.issuerAndSerialNumber, cert);
+    else if (rid->type == CMS_REK_KEYIDENTIFIER)
+        return cms_keyid_cert_cmp(rid->d.rKeyId->subjectKeyIdentifier, cert);
+    else
+        return -1;
+}
+
+int CMS_RecipientInfo_kari_set0_pkey(CMS_RecipientInfo *ri, EVP_PKEY *pk)
+{
+    EVP_PKEY_CTX *pctx;
+    CMS_KeyAgreeRecipientInfo *kari = ri->d.kari;
+    if (kari->pctx) {
+        EVP_PKEY_CTX_free(kari->pctx);
+        kari->pctx = NULL;
+    }
+    if (!pk)
+        return 1;
+    pctx = EVP_PKEY_CTX_new(pk, NULL);
+    if (!pctx || !EVP_PKEY_derive_init(pctx))
+        goto err;
+    kari->pctx = pctx;
+    return 1;
+ err:
+    if (pctx)
+        EVP_PKEY_CTX_free(pctx);
+    return 0;
+}
+
+EVP_CIPHER_CTX *CMS_RecipientInfo_kari_get0_ctx(CMS_RecipientInfo *ri)
+{
+    if (ri->type == CMS_RECIPINFO_AGREE)
+        return &ri->d.kari->ctx;
+    return NULL;
+}
+
+/*
+ * Derive KEK and decrypt/encrypt with it to produce either the original CEK
+ * or the encrypted CEK.
+ */
+
+static int cms_kek_cipher(unsigned char **pout, size_t *poutlen,
+                          const unsigned char *in, size_t inlen,
+                          CMS_KeyAgreeRecipientInfo *kari, int enc)
+{
+    /* Key encryption key */
+    unsigned char kek[EVP_MAX_KEY_LENGTH];
+    size_t keklen;
+    int rv = 0;
+    unsigned char *out = NULL;
+    int outlen;
+    keklen = EVP_CIPHER_CTX_key_length(&kari->ctx);
+    if (keklen > EVP_MAX_KEY_LENGTH)
+        return 0;
+    /* Derive KEK */
+    if (EVP_PKEY_derive(kari->pctx, kek, &keklen) <= 0)
+        goto err;
+    /* Set KEK in context */
+    if (!EVP_CipherInit_ex(&kari->ctx, NULL, NULL, kek, NULL, enc))
+        goto err;
+    /* obtain output length of ciphered key */
+    if (!EVP_CipherUpdate(&kari->ctx, NULL, &outlen, in, inlen))
+        goto err;
+    out = OPENSSL_malloc(outlen);
+    if (!out)
+        goto err;
+    if (!EVP_CipherUpdate(&kari->ctx, out, &outlen, in, inlen))
+        goto err;
+    *pout = out;
+    *poutlen = (size_t)outlen;
+    rv = 1;
+
+ err:
+    OPENSSL_cleanse(kek, keklen);
+    if (!rv && out)
+        OPENSSL_free(out);
+    EVP_CIPHER_CTX_cleanup(&kari->ctx);
+    EVP_PKEY_CTX_free(kari->pctx);
+    kari->pctx = NULL;
+    return rv;
+}
+
+int CMS_RecipientInfo_kari_decrypt(CMS_ContentInfo *cms,
+                                   CMS_RecipientInfo *ri,
+                                   CMS_RecipientEncryptedKey *rek)
+{
+    int rv = 0;
+    unsigned char *enckey = NULL, *cek = NULL;
+    size_t enckeylen;
+    size_t ceklen;
+    CMS_EncryptedContentInfo *ec;
+    enckeylen = rek->encryptedKey->length;
+    enckey = rek->encryptedKey->data;
+    /* Setup all parameters to derive KEK */
+    if (!cms_env_asn1_ctrl(ri, 1))
+        goto err;
+    /* Attempt to decrypt CEK */
+    if (!cms_kek_cipher(&cek, &ceklen, enckey, enckeylen, ri->d.kari, 0))
+        goto err;
+    ec = cms->d.envelopedData->encryptedContentInfo;
+    if (ec->key) {
+        OPENSSL_cleanse(ec->key, ec->keylen);
+        OPENSSL_free(ec->key);
+    }
+    ec->key = cek;
+    ec->keylen = ceklen;
+    cek = NULL;
+    rv = 1;
+ err:
+    if (cek)
+        OPENSSL_free(cek);
+    return rv;
+}
+
+/* Create ephemeral key and initialise context based on it */
+static int cms_kari_create_ephemeral_key(CMS_KeyAgreeRecipientInfo *kari,
+                                         EVP_PKEY *pk)
+{
+    EVP_PKEY_CTX *pctx = NULL;
+    EVP_PKEY *ekey = NULL;
+    int rv = 0;
+    pctx = EVP_PKEY_CTX_new(pk, NULL);
+    if (!pctx)
+        goto err;
+    if (EVP_PKEY_keygen_init(pctx) <= 0)
+        goto err;
+    if (EVP_PKEY_keygen(pctx, &ekey) <= 0)
+        goto err;
+    EVP_PKEY_CTX_free(pctx);
+    pctx = EVP_PKEY_CTX_new(ekey, NULL);
+    if (!pctx)
+        goto err;
+    if (EVP_PKEY_derive_init(pctx) <= 0)
+        goto err;
+    kari->pctx = pctx;
+    rv = 1;
+ err:
+    if (!rv && pctx)
+        EVP_PKEY_CTX_free(pctx);
+    if (ekey)
+        EVP_PKEY_free(ekey);
+    return rv;
+}
+
+/* Initialise a ktri based on passed certificate and key */
+
+int cms_RecipientInfo_kari_init(CMS_RecipientInfo *ri, X509 *recip,
+                                EVP_PKEY *pk, unsigned int flags)
+{
+    CMS_KeyAgreeRecipientInfo *kari;
+    CMS_RecipientEncryptedKey *rek = NULL;
+
+    ri->d.kari = M_ASN1_new_of(CMS_KeyAgreeRecipientInfo);
+    if (!ri->d.kari)
+        return 0;
+    ri->type = CMS_RECIPINFO_AGREE;
+
+    kari = ri->d.kari;
+    kari->version = 3;
+
+    rek = M_ASN1_new_of(CMS_RecipientEncryptedKey);
+    if (!sk_CMS_RecipientEncryptedKey_push(kari->recipientEncryptedKeys, rek)) {
+        M_ASN1_free_of(rek, CMS_RecipientEncryptedKey);
+        return 0;
+    }
+
+    if (flags & CMS_USE_KEYID) {
+        rek->rid->type = CMS_REK_KEYIDENTIFIER;
+        rek->rid->d.rKeyId = M_ASN1_new_of(CMS_RecipientKeyIdentifier);
+        if (rek->rid->d.rKeyId == NULL)
+            return 0;
+        if (!cms_set1_keyid(&rek->rid->d.rKeyId->subjectKeyIdentifier, recip))
+            return 0;
+    } else {
+        rek->rid->type = CMS_REK_ISSUER_SERIAL;
+        if (!cms_set1_ias(&rek->rid->d.issuerAndSerialNumber, recip))
+            return 0;
+    }
+
+    /* Create ephemeral key */
+    if (!cms_kari_create_ephemeral_key(kari, pk))
+        return 0;
+
+    CRYPTO_add(&pk->references, 1, CRYPTO_LOCK_EVP_PKEY);
+    rek->pkey = pk;
+    return 1;
+}
+
+static int cms_wrap_init(CMS_KeyAgreeRecipientInfo *kari,
+                         const EVP_CIPHER *cipher)
+{
+    EVP_CIPHER_CTX *ctx = &kari->ctx;
+    const EVP_CIPHER *kekcipher;
+    int keylen = EVP_CIPHER_key_length(cipher);
+    /* If a suitable wrap algorithm is already set nothing to do */
+    kekcipher = EVP_CIPHER_CTX_cipher(ctx);
+
+    if (kekcipher) {
+        if (EVP_CIPHER_CTX_mode(ctx) != EVP_CIPH_WRAP_MODE)
+            return 0;
+        return 1;
+    }
+    /*
+     * Pick a cipher based on content encryption cipher. If it is DES3 use
+     * DES3 wrap otherwise use AES wrap similar to key size.
+     */
+    if (EVP_CIPHER_type(cipher) == NID_des_ede3_cbc)
+        kekcipher = EVP_des_ede3_wrap();
+    else if (keylen <= 16)
+        kekcipher = EVP_aes_128_wrap();
+    else if (keylen <= 24)
+        kekcipher = EVP_aes_192_wrap();
+    else
+        kekcipher = EVP_aes_256_wrap();
+    return EVP_EncryptInit_ex(ctx, kekcipher, NULL, NULL, NULL);
+}
+
+/* Encrypt content key in key agreement recipient info */
+
+int cms_RecipientInfo_kari_encrypt(CMS_ContentInfo *cms,
+                                   CMS_RecipientInfo *ri)
+{
+    CMS_KeyAgreeRecipientInfo *kari;
+    CMS_EncryptedContentInfo *ec;
+    CMS_RecipientEncryptedKey *rek;
+    STACK_OF(CMS_RecipientEncryptedKey) *reks;
+    int i;
+
+    if (ri->type != CMS_RECIPINFO_AGREE) {
+        CMSerr(CMS_F_CMS_RECIPIENTINFO_KARI_ENCRYPT, CMS_R_NOT_KEY_AGREEMENT);
+        return 0;
+    }
+    kari = ri->d.kari;
+    reks = kari->recipientEncryptedKeys;
+    ec = cms->d.envelopedData->encryptedContentInfo;
+    /* Initialise wrap algorithm parameters */
+    if (!cms_wrap_init(kari, ec->cipher))
+        return 0;
+    /*
+     * If no orignator key set up initialise for ephemeral key the public key
+     * ASN1 structure will set the actual public key value.
+     */
+    if (kari->originator->type == -1) {
+        CMS_OriginatorIdentifierOrKey *oik = kari->originator;
+        oik->type = CMS_OIK_PUBKEY;
+        oik->d.originatorKey = M_ASN1_new_of(CMS_OriginatorPublicKey);
+        if (!oik->d.originatorKey)
+            return 0;
+    }
+    /* Initialise KDF algorithm */
+    if (!cms_env_asn1_ctrl(ri, 0))
+        return 0;
+    /* For each rek, derive KEK, encrypt CEK */
+    for (i = 0; i < sk_CMS_RecipientEncryptedKey_num(reks); i++) {
+        unsigned char *enckey;
+        size_t enckeylen;
+        rek = sk_CMS_RecipientEncryptedKey_value(reks, i);
+        if (EVP_PKEY_derive_set_peer(kari->pctx, rek->pkey) <= 0)
+            return 0;
+        if (!cms_kek_cipher(&enckey, &enckeylen, ec->key, ec->keylen,
+                            kari, 1))
+            return 0;
+        ASN1_STRING_set0(rek->encryptedKey, enckey, enckeylen);
+    }
+
+    return 1;
+
+}
index 4f4c4c7..20f2c25 100644 (file)
@@ -84,11 +84,9 @@ typedef struct CMS_KeyTransRecipientInfo_st CMS_KeyTransRecipientInfo;
 typedef struct CMS_OriginatorPublicKey_st CMS_OriginatorPublicKey;
 typedef struct CMS_OriginatorIdentifierOrKey_st CMS_OriginatorIdentifierOrKey;
 typedef struct CMS_KeyAgreeRecipientInfo_st CMS_KeyAgreeRecipientInfo;
-typedef struct CMS_OtherKeyAttribute_st CMS_OtherKeyAttribute;
 typedef struct CMS_RecipientKeyIdentifier_st CMS_RecipientKeyIdentifier;
 typedef struct CMS_KeyAgreeRecipientIdentifier_st
     CMS_KeyAgreeRecipientIdentifier;
-typedef struct CMS_RecipientEncryptedKey_st CMS_RecipientEncryptedKey;
 typedef struct CMS_KEKIdentifier_st CMS_KEKIdentifier;
 typedef struct CMS_KEKRecipientInfo_st CMS_KEKRecipientInfo;
 typedef struct CMS_PasswordRecipientInfo_st CMS_PasswordRecipientInfo;
@@ -138,6 +136,9 @@ struct CMS_SignerInfo_st {
     /* Signing certificate and key */
     X509 *signer;
     EVP_PKEY *pkey;
+    /* Digest and public key context for alternative parameters */
+    EVP_MD_CTX mctx;
+    EVP_PKEY_CTX *pctx;
 };
 
 struct CMS_SignerIdentifier_st {
@@ -194,6 +195,8 @@ struct CMS_KeyTransRecipientInfo_st {
     /* Recipient Key and cert */
     X509 *recip;
     EVP_PKEY *pkey;
+    /* Public key context for this operation */
+    EVP_PKEY_CTX *pctx;
 };
 
 struct CMS_KeyAgreeRecipientInfo_st {
@@ -202,6 +205,10 @@ struct CMS_KeyAgreeRecipientInfo_st {
     ASN1_OCTET_STRING *ukm;
     X509_ALGOR *keyEncryptionAlgorithm;
     STACK_OF(CMS_RecipientEncryptedKey) *recipientEncryptedKeys;
+    /* Public key context associated with current operation */
+    EVP_PKEY_CTX *pctx;
+    /* Cipher context for CEK wrapping */
+    EVP_CIPHER_CTX ctx;
 };
 
 struct CMS_OriginatorIdentifierOrKey_st {
@@ -221,6 +228,8 @@ struct CMS_OriginatorPublicKey_st {
 struct CMS_RecipientEncryptedKey_st {
     CMS_KeyAgreeRecipientIdentifier *rid;
     ASN1_OCTET_STRING *encryptedKey;
+    /* Public key associated with this recipient */
+    EVP_PKEY *pkey;
 };
 
 struct CMS_KeyAgreeRecipientIdentifier_st {
@@ -394,6 +403,13 @@ DECLARE_ASN1_ALLOC_FUNCTIONS(CMS_IssuerAndSerialNumber)
 # define CMS_RECIPINFO_ISSUER_SERIAL     0
 # define CMS_RECIPINFO_KEYIDENTIFIER     1
 
+# define CMS_REK_ISSUER_SERIAL           0
+# define CMS_REK_KEYIDENTIFIER           1
+
+# define CMS_OIK_ISSUER_SERIAL           0
+# define CMS_OIK_KEYIDENTIFIER           1
+# define CMS_OIK_PUBKEY                  2
+
 BIO *cms_content_bio(CMS_ContentInfo *cms);
 
 CMS_ContentInfo *cms_Data_create(void);
@@ -420,6 +436,11 @@ BIO *cms_DigestAlgorithm_init_bio(X509_ALGOR *digestAlgorithm);
 int cms_DigestAlgorithm_find_ctx(EVP_MD_CTX *mctx, BIO *chain,
                                  X509_ALGOR *mdalg);
 
+int cms_ias_cert_cmp(CMS_IssuerAndSerialNumber *ias, X509 *cert);
+int cms_keyid_cert_cmp(ASN1_OCTET_STRING *keyid, X509 *cert);
+int cms_set1_ias(CMS_IssuerAndSerialNumber **pias, X509 *cert);
+int cms_set1_keyid(ASN1_OCTET_STRING **pkeyid, X509 *cert);
+
 BIO *cms_EncryptedContent_init_bio(CMS_EncryptedContentInfo *ec);
 BIO *cms_EncryptedData_init_bio(CMS_ContentInfo *cms);
 int cms_EncryptedContent_init(CMS_EncryptedContentInfo *ec,
@@ -432,6 +453,13 @@ ASN1_OCTET_STRING *cms_encode_Receipt(CMS_SignerInfo *si);
 
 BIO *cms_EnvelopedData_init_bio(CMS_ContentInfo *cms);
 CMS_EnvelopedData *cms_get0_enveloped(CMS_ContentInfo *cms);
+int cms_env_asn1_ctrl(CMS_RecipientInfo *ri, int cmd);
+int cms_pkey_get_ri_type(EVP_PKEY *pk);
+/* KARI routines */
+int cms_RecipientInfo_kari_init(CMS_RecipientInfo *ri, X509 *recip,
+                                EVP_PKEY *pk, unsigned int flags);
+int cms_RecipientInfo_kari_encrypt(CMS_ContentInfo *cms,
+                                   CMS_RecipientInfo *ri);
 
 /* PWRI routines */
 int cms_RecipientInfo_pwri_crypt(CMS_ContentInfo *cms, CMS_RecipientInfo *ri,
index e938461..d6cb60d 100644 (file)
@@ -53,7 +53,7 @@
  */
 
 #include <openssl/asn1t.h>
-#include <openssl/x509.h>
+#include <openssl/x509v3.h>
 #include <openssl/err.h>
 #include <openssl/pem.h>
 #include <openssl/bio.h>
@@ -593,3 +593,60 @@ STACK_OF(X509_CRL) *CMS_get1_crls(CMS_ContentInfo *cms)
     }
     return crls;
 }
+
+int cms_ias_cert_cmp(CMS_IssuerAndSerialNumber *ias, X509 *cert)
+{
+    int ret;
+    ret = X509_NAME_cmp(ias->issuer, X509_get_issuer_name(cert));
+    if (ret)
+        return ret;
+    return ASN1_INTEGER_cmp(ias->serialNumber, X509_get_serialNumber(cert));
+}
+
+int cms_keyid_cert_cmp(ASN1_OCTET_STRING *keyid, X509 *cert)
+{
+    X509_check_purpose(cert, -1, -1);
+    if (!cert->skid)
+        return -1;
+    return ASN1_OCTET_STRING_cmp(keyid, cert->skid);
+}
+
+int cms_set1_ias(CMS_IssuerAndSerialNumber **pias, X509 *cert)
+{
+    CMS_IssuerAndSerialNumber *ias;
+    ias = M_ASN1_new_of(CMS_IssuerAndSerialNumber);
+    if (!ias)
+        goto err;
+    if (!X509_NAME_set(&ias->issuer, X509_get_issuer_name(cert)))
+        goto err;
+    if (!ASN1_STRING_copy(ias->serialNumber, X509_get_serialNumber(cert)))
+        goto err;
+    if (*pias)
+        M_ASN1_free_of(*pias, CMS_IssuerAndSerialNumber);
+    *pias = ias;
+    return 1;
+ err:
+    if (ias)
+        M_ASN1_free_of(ias, CMS_IssuerAndSerialNumber);
+    CMSerr(CMS_F_CMS_SET1_IAS, ERR_R_MALLOC_FAILURE);
+    return 0;
+}
+
+int cms_set1_keyid(ASN1_OCTET_STRING **pkeyid, X509 *cert)
+{
+    ASN1_OCTET_STRING *keyid = NULL;
+    X509_check_purpose(cert, -1, -1);
+    if (!cert->skid) {
+        CMSerr(CMS_F_CMS_SET1_KEYID, CMS_R_CERTIFICATE_HAS_NO_KEYID);
+        return 0;
+    }
+    keyid = ASN1_STRING_dup(cert->skid);
+    if (!keyid) {
+        CMSerr(CMS_F_CMS_SET1_KEYID, ERR_R_MALLOC_FAILURE);
+        return 0;
+    }
+    if (*pkeyid)
+        ASN1_OCTET_STRING_free(*pkeyid);
+    *pkeyid = keyid;
+    return 1;
+}
index 6daa262..721ffd5 100644 (file)
@@ -55,6 +55,7 @@
 #include "cryptlib.h"
 #include <openssl/asn1t.h>
 #include <openssl/pem.h>
+#include <openssl/x509.h>
 #include <openssl/x509v3.h>
 #include <openssl/err.h>
 #include <openssl/cms.h>
@@ -197,27 +198,13 @@ int cms_set1_SignerIdentifier(CMS_SignerIdentifier *sid, X509 *cert, int type)
 {
     switch (type) {
     case CMS_SIGNERINFO_ISSUER_SERIAL:
-        sid->d.issuerAndSerialNumber =
-            M_ASN1_new_of(CMS_IssuerAndSerialNumber);
-        if (!sid->d.issuerAndSerialNumber)
-            goto merr;
-        if (!X509_NAME_set(&sid->d.issuerAndSerialNumber->issuer,
-                           X509_get_issuer_name(cert)))
-            goto merr;
-        if (!ASN1_STRING_copy(sid->d.issuerAndSerialNumber->serialNumber,
-                              X509_get_serialNumber(cert)))
-            goto merr;
+        if (!cms_set1_ias(&sid->d.issuerAndSerialNumber, cert))
+            return 0;
         break;
 
     case CMS_SIGNERINFO_KEYIDENTIFIER:
-        if (!cert->skid) {
-            CMSerr(CMS_F_CMS_SET1_SIGNERIDENTIFIER,
-                   CMS_R_CERTIFICATE_HAS_NO_KEYID);
+        if (!cms_set1_keyid(&sid->d.subjectKeyIdentifier, cert))
             return 0;
-        }
-        sid->d.subjectKeyIdentifier = ASN1_STRING_dup(cert->skid);
-        if (!sid->d.subjectKeyIdentifier)
-            goto merr;
         break;
 
     default:
@@ -228,11 +215,6 @@ int cms_set1_SignerIdentifier(CMS_SignerIdentifier *sid, X509 *cert, int type)
     sid->type = type;
 
     return 1;
-
- merr:
-    CMSerr(CMS_F_CMS_SET1_SIGNERIDENTIFIER, ERR_R_MALLOC_FAILURE);
-    return 0;
-
 }
 
 int cms_SignerIdentifier_get0_signer_id(CMS_SignerIdentifier *sid,
@@ -255,23 +237,32 @@ int cms_SignerIdentifier_get0_signer_id(CMS_SignerIdentifier *sid,
 
 int cms_SignerIdentifier_cert_cmp(CMS_SignerIdentifier *sid, X509 *cert)
 {
-    int ret;
-    if (sid->type == CMS_SIGNERINFO_ISSUER_SERIAL) {
-        ret = X509_NAME_cmp(sid->d.issuerAndSerialNumber->issuer,
-                            X509_get_issuer_name(cert));
-        if (ret)
-            return ret;
-        return ASN1_INTEGER_cmp(sid->d.issuerAndSerialNumber->serialNumber,
-                                X509_get_serialNumber(cert));
-    } else if (sid->type == CMS_SIGNERINFO_KEYIDENTIFIER) {
-        X509_check_purpose(cert, -1, -1);
-        if (!cert->skid)
-            return -1;
-        return ASN1_OCTET_STRING_cmp(sid->d.subjectKeyIdentifier, cert->skid);
-    } else
+    if (sid->type == CMS_SIGNERINFO_ISSUER_SERIAL)
+        return cms_ias_cert_cmp(sid->d.issuerAndSerialNumber, cert);
+    else if (sid->type == CMS_SIGNERINFO_KEYIDENTIFIER)
+        return cms_keyid_cert_cmp(sid->d.subjectKeyIdentifier, cert);
+    else
         return -1;
 }
 
+static int cms_sd_asn1_ctrl(CMS_SignerInfo *si, int cmd)
+{
+    EVP_PKEY *pkey = si->pkey;
+    int i;
+    if (!pkey->ameth || !pkey->ameth->pkey_ctrl)
+        return 1;
+    i = pkey->ameth->pkey_ctrl(pkey, ASN1_PKEY_CTRL_CMS_SIGN, cmd, si);
+    if (i == -2) {
+        CMSerr(CMS_F_CMS_SD_ASN1_CTRL, CMS_R_NOT_SUPPORTED_FOR_THIS_KEY_TYPE);
+        return 0;
+    }
+    if (i <= 0) {
+        CMSerr(CMS_F_CMS_SD_ASN1_CTRL, CMS_R_CTRL_FAILURE);
+        return 0;
+    }
+    return 1;
+}
+
 CMS_SignerInfo *CMS_add1_signer(CMS_ContentInfo *cms,
                                 X509 *signer, EVP_PKEY *pk, const EVP_MD *md,
                                 unsigned int flags)
@@ -298,6 +289,8 @@ CMS_SignerInfo *CMS_add1_signer(CMS_ContentInfo *cms,
 
     si->pkey = pk;
     si->signer = signer;
+    EVP_MD_CTX_init(&si->mctx);
+    si->pctx = NULL;
 
     if (flags & CMS_USE_KEYID) {
         si->version = 3;
@@ -350,19 +343,8 @@ CMS_SignerInfo *CMS_add1_signer(CMS_ContentInfo *cms,
         }
     }
 
-    if (pk->ameth && pk->ameth->pkey_ctrl) {
-        i = pk->ameth->pkey_ctrl(pk, ASN1_PKEY_CTRL_CMS_SIGN, 0, si);
-        if (i == -2) {
-            CMSerr(CMS_F_CMS_ADD1_SIGNER,
-                   CMS_R_NOT_SUPPORTED_FOR_THIS_KEY_TYPE);
-            goto err;
-        }
-        if (i <= 0) {
-            CMSerr(CMS_F_CMS_ADD1_SIGNER, CMS_R_CTRL_FAILURE);
-            goto err;
-        }
-    }
-
+    if (!(flags & CMS_KEY_PARAM) && !cms_sd_asn1_ctrl(si, 0))
+        goto err;
     if (!(flags & CMS_NOATTR)) {
         /*
          * Initialialize signed attributes strutucture so other attributes
@@ -386,7 +368,8 @@ CMS_SignerInfo *CMS_add1_signer(CMS_ContentInfo *cms,
         if (flags & CMS_REUSE_DIGEST) {
             if (!cms_copy_messageDigest(cms, si))
                 goto err;
-            if (!(flags & CMS_PARTIAL) && !CMS_SignerInfo_sign(si))
+            if (!(flags & (CMS_PARTIAL | CMS_KEY_PARAM)) &&
+                !CMS_SignerInfo_sign(si))
                 goto err;
         }
     }
@@ -397,6 +380,20 @@ CMS_SignerInfo *CMS_add1_signer(CMS_ContentInfo *cms,
             goto merr;
     }
 
+    if (flags & CMS_KEY_PARAM) {
+        if (flags & CMS_NOATTR) {
+            si->pctx = EVP_PKEY_CTX_new(si->pkey, NULL);
+            if (!si->pctx)
+                goto err;
+            if (EVP_PKEY_sign_init(si->pctx) <= 0)
+                goto err;
+            if (EVP_PKEY_CTX_set_signature_md(si->pctx, md) <= 0)
+                goto err;
+        } else if (EVP_DigestSignInit(&si->mctx, &si->pctx, md, NULL, pk) <=
+                   0)
+            goto err;
+    }
+
     if (!sd->signerInfos)
         sd->signerInfos = sk_CMS_SignerInfo_new_null();
     if (!sd->signerInfos || !sk_CMS_SignerInfo_push(sd->signerInfos, si))
@@ -443,6 +440,16 @@ static int cms_add1_signingTime(CMS_SignerInfo *si, ASN1_TIME *t)
 
 }
 
+EVP_PKEY_CTX *CMS_SignerInfo_get0_pkey_ctx(CMS_SignerInfo *si)
+{
+    return si->pctx;
+}
+
+EVP_MD_CTX *CMS_SignerInfo_get0_md_ctx(CMS_SignerInfo *si)
+{
+    return &si->mctx;
+}
+
 STACK_OF(CMS_SignerInfo) *CMS_get0_SignerInfos(CMS_ContentInfo *cms)
 {
     CMS_SignedData *sd;
@@ -561,11 +568,17 @@ void CMS_SignerInfo_get0_algs(CMS_SignerInfo *si, EVP_PKEY **pk,
         *psig = si->signatureAlgorithm;
 }
 
+ASN1_OCTET_STRING *CMS_SignerInfo_get0_signature(CMS_SignerInfo *si)
+{
+    return si->signature;
+}
+
 static int cms_SignerInfo_content_sign(CMS_ContentInfo *cms,
                                        CMS_SignerInfo *si, BIO *chain)
 {
     EVP_MD_CTX mctx;
     int r = 0;
+    EVP_PKEY_CTX *pctx = NULL;
     EVP_MD_CTX_init(&mctx);
 
     if (!si->pkey) {
@@ -575,6 +588,9 @@ static int cms_SignerInfo_content_sign(CMS_ContentInfo *cms,
 
     if (!cms_DigestAlgorithm_find_ctx(&mctx, chain, si->digestAlgorithm))
         goto err;
+    /* Set SignerInfo algortihm details if we used custom parametsr */
+    if (si->pctx && !cms_sd_asn1_ctrl(si, 0))
+        goto err;
 
     /*
      * If any signed attributes calculate and add messageDigest attribute
@@ -596,6 +612,23 @@ static int cms_SignerInfo_content_sign(CMS_ContentInfo *cms,
             goto err;
         if (!CMS_SignerInfo_sign(si))
             goto err;
+    } else if (si->pctx) {
+        unsigned char *sig;
+        size_t siglen;
+        unsigned char md[EVP_MAX_MD_SIZE];
+        unsigned int mdlen;
+        pctx = si->pctx;
+        if (!EVP_DigestFinal_ex(&mctx, md, &mdlen))
+            goto err;
+        siglen = EVP_PKEY_size(si->pkey);
+        sig = OPENSSL_malloc(siglen);
+        if (!sig) {
+            CMSerr(CMS_F_CMS_SIGNERINFO_CONTENT_SIGN, ERR_R_MALLOC_FAILURE);
+            goto err;
+        }
+        if (EVP_PKEY_sign(pctx, sig, &siglen, md, mdlen) <= 0)
+            goto err;
+        ASN1_STRING_set0(si->signature, sig, siglen);
     } else {
         unsigned char *sig;
         unsigned int siglen;
@@ -616,6 +649,8 @@ static int cms_SignerInfo_content_sign(CMS_ContentInfo *cms,
 
  err:
     EVP_MD_CTX_cleanup(&mctx);
+    if (pctx)
+        EVP_PKEY_CTX_free(pctx);
     return r;
 
 }
@@ -637,7 +672,7 @@ int cms_SignedData_final(CMS_ContentInfo *cms, BIO *chain)
 
 int CMS_SignerInfo_sign(CMS_SignerInfo *si)
 {
-    EVP_MD_CTX mctx;
+    EVP_MD_CTX *mctx = &si->mctx;
     EVP_PKEY_CTX *pctx;
     unsigned char *abuf = NULL;
     int alen;
@@ -648,15 +683,18 @@ int CMS_SignerInfo_sign(CMS_SignerInfo *si)
     if (md == NULL)
         return 0;
 
-    EVP_MD_CTX_init(&mctx);
-
     if (CMS_signed_get_attr_by_NID(si, NID_pkcs9_signingTime, -1) < 0) {
         if (!cms_add1_signingTime(si, NULL))
             goto err;
     }
 
-    if (EVP_DigestSignInit(&mctx, &pctx, md, NULL, si->pkey) <= 0)
-        goto err;
+    if (si->pctx)
+        pctx = si->pctx;
+    else {
+        EVP_MD_CTX_init(mctx);
+        if (EVP_DigestSignInit(mctx, &pctx, md, NULL, si->pkey) <= 0)
+            goto err;
+    }
 
     if (EVP_PKEY_CTX_ctrl(pctx, -1, EVP_PKEY_OP_SIGN,
                           EVP_PKEY_CTRL_CMS_SIGN, 0, si) <= 0) {
@@ -668,15 +706,15 @@ int CMS_SignerInfo_sign(CMS_SignerInfo *si)
                          ASN1_ITEM_rptr(CMS_Attributes_Sign));
     if (!abuf)
         goto err;
-    if (EVP_DigestSignUpdate(&mctx, abuf, alen) <= 0)
+    if (EVP_DigestSignUpdate(mctx, abuf, alen) <= 0)
         goto err;
-    if (EVP_DigestSignFinal(&mctx, NULL, &siglen) <= 0)
+    if (EVP_DigestSignFinal(mctx, NULL, &siglen) <= 0)
         goto err;
     OPENSSL_free(abuf);
     abuf = OPENSSL_malloc(siglen);
     if (!abuf)
         goto err;
-    if (EVP_DigestSignFinal(&mctx, abuf, &siglen) <= 0)
+    if (EVP_DigestSignFinal(mctx, abuf, &siglen) <= 0)
         goto err;
 
     if (EVP_PKEY_CTX_ctrl(pctx, -1, EVP_PKEY_OP_SIGN,
@@ -685,7 +723,7 @@ int CMS_SignerInfo_sign(CMS_SignerInfo *si)
         goto err;
     }
 
-    EVP_MD_CTX_cleanup(&mctx);
+    EVP_MD_CTX_cleanup(mctx);
 
     ASN1_STRING_set0(si->signature, abuf, siglen);
 
@@ -694,15 +732,14 @@ int CMS_SignerInfo_sign(CMS_SignerInfo *si)
  err:
     if (abuf)
         OPENSSL_free(abuf);
-    EVP_MD_CTX_cleanup(&mctx);
+    EVP_MD_CTX_cleanup(mctx);
     return 0;
 
 }
 
 int CMS_SignerInfo_verify(CMS_SignerInfo *si)
 {
-    EVP_MD_CTX mctx;
-    EVP_PKEY_CTX *pctx;
+    EVP_MD_CTX *mctx = &si->mctx;
     unsigned char *abuf = NULL;
     int alen, r = -1;
     const EVP_MD *md = NULL;
@@ -715,26 +752,29 @@ int CMS_SignerInfo_verify(CMS_SignerInfo *si)
     md = EVP_get_digestbyobj(si->digestAlgorithm->algorithm);
     if (md == NULL)
         return -1;
-    EVP_MD_CTX_init(&mctx);
-    if (EVP_DigestVerifyInit(&mctx, &pctx, md, NULL, si->pkey) <= 0)
+    EVP_MD_CTX_init(mctx);
+    if (EVP_DigestVerifyInit(mctx, &si->pctx, md, NULL, si->pkey) <= 0)
+        goto err;
+
+    if (!cms_sd_asn1_ctrl(si, 1))
         goto err;
 
     alen = ASN1_item_i2d((ASN1_VALUE *)si->signedAttrs, &abuf,
                          ASN1_ITEM_rptr(CMS_Attributes_Verify));
     if (!abuf)
         goto err;
-    r = EVP_DigestVerifyUpdate(&mctx, abuf, alen);
+    r = EVP_DigestVerifyUpdate(mctx, abuf, alen);
     OPENSSL_free(abuf);
     if (r <= 0) {
         r = -1;
         goto err;
     }
-    r = EVP_DigestVerifyFinal(&mctx,
+    r = EVP_DigestVerifyFinal(mctx,
                               si->signature->data, si->signature->length);
     if (r <= 0)
         CMSerr(CMS_F_CMS_SIGNERINFO_VERIFY, CMS_R_VERIFICATION_FAILURE);
  err:
-    EVP_MD_CTX_cleanup(&mctx);
+    EVP_MD_CTX_cleanup(mctx);
     return r;
 }
 
@@ -773,7 +813,10 @@ int CMS_SignerInfo_verify_content(CMS_SignerInfo *si, BIO *chain)
 {
     ASN1_OCTET_STRING *os = NULL;
     EVP_MD_CTX mctx;
+    EVP_PKEY_CTX *pkctx = NULL;
     int r = -1;
+    unsigned char mval[EVP_MAX_MD_SIZE];
+    unsigned int mlen;
     EVP_MD_CTX_init(&mctx);
     /* If we have any signed attributes look for messageDigest value */
     if (CMS_signed_get_attr_count(si) >= 0) {
@@ -790,16 +833,15 @@ int CMS_SignerInfo_verify_content(CMS_SignerInfo *si, BIO *chain)
     if (!cms_DigestAlgorithm_find_ctx(&mctx, chain, si->digestAlgorithm))
         goto err;
 
+    if (EVP_DigestFinal_ex(&mctx, mval, &mlen) <= 0) {
+        CMSerr(CMS_F_CMS_SIGNERINFO_VERIFY_CONTENT,
+               CMS_R_UNABLE_TO_FINALIZE_CONTEXT);
+        goto err;
+    }
+
     /* If messageDigest found compare it */
 
     if (os) {
-        unsigned char mval[EVP_MAX_MD_SIZE];
-        unsigned int mlen;
-        if (EVP_DigestFinal_ex(&mctx, mval, &mlen) <= 0) {
-            CMSerr(CMS_F_CMS_SIGNERINFO_VERIFY_CONTENT,
-                   CMS_R_UNABLE_TO_FINALIZE_CONTEXT);
-            goto err;
-        }
         if (mlen != (unsigned int)os->length) {
             CMSerr(CMS_F_CMS_SIGNERINFO_VERIFY_CONTENT,
                    CMS_R_MESSAGEDIGEST_ATTRIBUTE_WRONG_LENGTH);
@@ -813,8 +855,17 @@ int CMS_SignerInfo_verify_content(CMS_SignerInfo *si, BIO *chain)
         } else
             r = 1;
     } else {
-        r = EVP_VerifyFinal(&mctx, si->signature->data,
-                            si->signature->length, si->pkey);
+        const EVP_MD *md = EVP_MD_CTX_md(&mctx);
+        pkctx = EVP_PKEY_CTX_new(si->pkey, NULL);
+        if (EVP_PKEY_verify_init(pkctx) <= 0)
+            goto err;
+        if (EVP_PKEY_CTX_set_signature_md(pkctx, md) <= 0)
+            goto err;
+        si->pctx = pkctx;
+        if (!cms_sd_asn1_ctrl(si, 1))
+            goto err;
+        r = EVP_PKEY_verify(pkctx, si->signature->data,
+                            si->signature->length, mval, mlen);
         if (r <= 0) {
             CMSerr(CMS_F_CMS_SIGNERINFO_VERIFY_CONTENT,
                    CMS_R_VERIFICATION_FAILURE);
@@ -823,6 +874,8 @@ int CMS_SignerInfo_verify_content(CMS_SignerInfo *si, BIO *chain)
     }
 
  err:
+    if (pkctx)
+        EVP_PKEY_CTX_free(pkctx);
     EVP_MD_CTX_cleanup(&mctx);
     return r;
 
index 8b37560..5522a37 100644 (file)
@@ -59,6 +59,7 @@
 #include <openssl/err.h>
 #include <openssl/cms.h>
 #include "cms_lcl.h"
+#include "asn1_locl.h"
 
 static int cms_copy_content(BIO *out, BIO *in, unsigned int flags)
 {
@@ -373,7 +374,7 @@ int CMS_verify(CMS_ContentInfo *cms, STACK_OF(X509) *certs,
         tmpin = BIO_new_mem_buf(ptr, len);
         if (tmpin == NULL) {
             CMSerr(CMS_F_CMS_VERIFY, ERR_R_MALLOC_FAILURE);
-            return 0;
+            goto err2;
         }
     } else
         tmpin = dcont;
@@ -404,6 +405,7 @@ int CMS_verify(CMS_ContentInfo *cms, STACK_OF(X509) *certs,
     else
         BIO_free_all(cmsbio);
 
+ err2:
     if (cms_certs)
         sk_X509_pop_free(cms_certs, X509_free);
     if (crls)
@@ -567,25 +569,63 @@ CMS_ContentInfo *CMS_encrypt(STACK_OF(X509) *certs, BIO *data,
     return NULL;
 }
 
+static int cms_kari_set1_pkey(CMS_ContentInfo *cms, CMS_RecipientInfo *ri,
+                              EVP_PKEY *pk, X509 *cert)
+{
+    int i;
+    STACK_OF(CMS_RecipientEncryptedKey) *reks;
+    CMS_RecipientEncryptedKey *rek;
+    reks = CMS_RecipientInfo_kari_get0_reks(ri);
+    if (!cert)
+        return 0;
+    for (i = 0; i < sk_CMS_RecipientEncryptedKey_num(reks); i++) {
+        int rv;
+        rek = sk_CMS_RecipientEncryptedKey_value(reks, i);
+        if (CMS_RecipientEncryptedKey_cert_cmp(rek, cert))
+            continue;
+        CMS_RecipientInfo_kari_set0_pkey(ri, pk);
+        rv = CMS_RecipientInfo_kari_decrypt(cms, ri, rek);
+        CMS_RecipientInfo_kari_set0_pkey(ri, NULL);
+        if (rv > 0)
+            return 1;
+        return -1;
+    }
+    return 0;
+}
+
 int CMS_decrypt_set1_pkey(CMS_ContentInfo *cms, EVP_PKEY *pk, X509 *cert)
 {
     STACK_OF(CMS_RecipientInfo) *ris;
     CMS_RecipientInfo *ri;
-    int i, r;
-    int debug = 0, ri_match = 0;
+    int i, r, ri_type;
+    int debug = 0, match_ri = 0;
     ris = CMS_get0_RecipientInfos(cms);
     if (ris)
         debug = cms->d.envelopedData->encryptedContentInfo->debug;
+    ri_type = cms_pkey_get_ri_type(pk);
+    if (ri_type == CMS_RECIPINFO_NONE) {
+        CMSerr(CMS_F_CMS_DECRYPT_SET1_PKEY,
+               CMS_R_NOT_SUPPORTED_FOR_THIS_KEY_TYPE);
+        return 0;
+    }
+
     for (i = 0; i < sk_CMS_RecipientInfo_num(ris); i++) {
         ri = sk_CMS_RecipientInfo_value(ris, i);
-        if (CMS_RecipientInfo_type(ri) != CMS_RECIPINFO_TRANS)
+        if (CMS_RecipientInfo_type(ri) != ri_type)
             continue;
-        ri_match = 1;
+        match_ri = 1;
+        if (ri_type == CMS_RECIPINFO_AGREE) {
+            r = cms_kari_set1_pkey(cms, ri, pk, cert);
+            if (r > 0)
+                return 1;
+            if (r < 0)
+                return 0;
+        }
         /*
          * If we have a cert try matching RecipientInfo otherwise try them
          * all.
          */
-        if (!cert || (CMS_RecipientInfo_ktri_cert_cmp(ri, cert) == 0)) {
+        else if (!cert || !CMS_RecipientInfo_ktri_cert_cmp(ri, cert)) {
             CMS_RecipientInfo_set0_pkey(ri, pk);
             r = CMS_RecipientInfo_decrypt(cms, ri);
             CMS_RecipientInfo_set0_pkey(ri, NULL);
@@ -613,7 +653,7 @@ int CMS_decrypt_set1_pkey(CMS_ContentInfo *cms, EVP_PKEY *pk, X509 *cert)
         }
     }
     /* If no cert and not debugging always return success */
-    if (ri_match && !cert && !debug) {
+    if (match_ri && !cert && !debug) {
         ERR_clear_error();
         return 1;
     }
index c654a5c..ca0e3cc 100644 (file)
@@ -653,7 +653,7 @@ const char *CRYPTO_get_lock_name(int type)
         defined(__x86_64) || defined(__x86_64__) || \
         defined(_M_AMD64) || defined(_M_X64)
 
-unsigned int OPENSSL_ia32cap_P[2];
+extern unsigned int OPENSSL_ia32cap_P[4];
 unsigned long *OPENSSL_ia32cap_loc(void)
 {
     if (sizeof(long) == 4)
@@ -663,6 +663,9 @@ unsigned long *OPENSSL_ia32cap_loc(void)
          * is 32-bit.
          */
         OPENSSL_ia32cap_P[1] = 0;
+
+    OPENSSL_ia32cap_P[2] = 0;
+
     return (unsigned long *)OPENSSL_ia32cap_P;
 }
 
@@ -676,7 +679,7 @@ typedef unsigned long long IA32CAP;
 void OPENSSL_cpuid_setup(void)
 {
     static int trigger = 0;
-    IA32CAP OPENSSL_ia32_cpuid(void);
+    IA32CAP OPENSSL_ia32_cpuid(unsigned int *);
     IA32CAP vec;
     char *env;
 
@@ -694,9 +697,23 @@ void OPENSSL_cpuid_setup(void)
             vec = strtoul(env + off, NULL, 0);
 #  endif
         if (off)
-            vec = OPENSSL_ia32_cpuid() & ~vec;
+            vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P) & ~vec;
+        else if (env[0] == ':')
+            vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P);
+
+        OPENSSL_ia32cap_P[2] = 0;
+        if ((env = strchr(env, ':'))) {
+            unsigned int vecx;
+            env++;
+            off = (env[0] == '~') ? 1 : 0;
+            vecx = strtoul(env + off, NULL, 0);
+            if (off)
+                OPENSSL_ia32cap_P[2] &= ~vecx;
+            else
+                OPENSSL_ia32cap_P[2] = vecx;
+        }
     } else
-        vec = OPENSSL_ia32_cpuid();
+        vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P);
 
     /*
      * |(1<<10) sets a reserved bit to signal that variable
@@ -706,6 +723,8 @@ void OPENSSL_cpuid_setup(void)
     OPENSSL_ia32cap_P[0] = (unsigned int)vec | (1 << 10);
     OPENSSL_ia32cap_P[1] = (unsigned int)(vec >> 32);
 }
+# else
+unsigned int OPENSSL_ia32cap_P[4];
 # endif
 
 #else
@@ -857,8 +876,12 @@ void OPENSSL_showfatal(const char *fmta, ...)
     if ((h = GetStdHandle(STD_ERROR_HANDLE)) != NULL &&
         GetFileType(h) != FILE_TYPE_UNKNOWN) {
         /* must be console application */
+        int len;
+        DWORD out;
+
         va_start(ap, fmta);
-        vfprintf(stderr, fmta, ap);
+        len = _vsnprintf((char *)buf, sizeof(buf), fmta, ap);
+        WriteFile(h, buf, len < 0 ? sizeof(buf) : (DWORD) len, &out, NULL);
         va_end(ap);
         return;
     }
@@ -965,7 +988,9 @@ void OpenSSLDie(const char *file, int line, const char *assertion)
     /*
      * Win32 abort() customarily shows a dialog, but we just did that...
      */
+# if !defined(_WIN32_WCE)
     raise(SIGABRT);
+# endif
     _exit(3);
 #endif
 }
index a136f4b..1423cac 100644 (file)
@@ -214,7 +214,7 @@ $! The contents of these variables are copied from the LIBOBJ variable in the
 $! corresponding Makefile from each corresponding subdirectory, with .o stripped
 $! and spaces replaced with commas.
 $ LIB_ = "cryptlib,mem,mem_dbg,cversion,ex_data,cpt_err,ebcdic,"+ -
-       "uid,o_time,o_str,o_dir,o_fips.c,o_init,fips_ers,mem_clr"
+       "uid,o_time,o_str,o_dir,o_fips,o_init,fips_ers,mem_clr"
 $ LIB_OBJECTS = "o_names,obj_dat,obj_lib,obj_err,obj_xref"
 $ LIB_MD2 = "md2_dgst,md2_one"
 $ LIB_MD4 = "md4_dgst,md4_one"
@@ -231,17 +231,19 @@ $ LIB_DES = "set_key,ecb_enc,cbc_enc,"+ -
        "des_enc,fcrypt_b,"+ -
        "fcrypt,xcbc_enc,rpc_enc,cbc_cksm,"+ -
        "ede_cbcm_enc,des_old,des_old2,read2pwd"
+$ LIB_AES = "aes_misc,aes_ecb,aes_cfb,aes_ofb,aes_ctr,aes_ige,aes_wrap,"+ -
+       "aes_core,aes_cbc"
 $ LIB_RC2 = "rc2_ecb,rc2_skey,rc2_cbc,rc2cfb64,rc2ofb64"
 $ LIB_RC4 = "rc4_enc,rc4_skey,rc4_utl"
 $ LIB_RC5 = "rc5_skey,rc5_ecb,rc5_enc,rc5cfb64,rc5ofb64"
 $ LIB_IDEA = "i_cbc,i_cfb64,i_ofb64,i_ecb,i_skey"
 $ LIB_BF = "bf_skey,bf_ecb,bf_enc,bf_cfb64,bf_ofb64"
 $ LIB_CAST = "c_skey,c_ecb,c_enc,c_cfb64,c_ofb64"
-$ LIB_CAMELLIA = "cmll_ecb,cmll_ofb,cmll_cfb,cmll_ctr,cmll_utl,"+ -
-       "camellia,cmll_misc,cmll_cbc"
+$ LIB_CAMELLIA = "cmll_ecb,cmll_ofb,cmll_cfb,cmll_ctr,"+ -
+       "cmll_utl,camellia,cmll_misc,cmll_cbc"
 $ LIB_SEED = "seed,seed_ecb,seed_cbc,seed_cfb,seed_ofb"
 $ LIB_MODES = "cbc128,ctr128,cts128,cfb128,ofb128,gcm128,"+ -
-       "ccm128,xts128"
+       "ccm128,xts128,wrap128"
 $ LIB_BN_ASM = "[.asm]vms.mar,vms-helper"
 $ IF F$TRNLNM("OPENSSL_NO_ASM") .OR. ARCH .NES. "VAX" THEN -
      LIB_BN_ASM = "bn_asm"
@@ -263,8 +265,8 @@ $ LIB_DSA = "dsa_gen,dsa_key,dsa_lib,dsa_asn1,dsa_vrf,dsa_sign,"+ -
        "dsa_err,dsa_ossl,dsa_depr,dsa_ameth,dsa_pmeth,dsa_prn"
 $ LIB_ECDSA = "ecs_lib,ecs_asn1,ecs_ossl,ecs_sign,ecs_vrf,ecs_err"
 $ LIB_DH = "dh_asn1,dh_gen,dh_key,dh_lib,dh_check,dh_err,dh_depr,"+ -
-       "dh_ameth,dh_pmeth,dh_prn"
-$ LIB_ECDH = "ech_lib,ech_ossl,ech_key,ech_err"
+       "dh_ameth,dh_pmeth,dh_prn,dh_rfc5114,dh_kdf"
+$ LIB_ECDH = "ech_lib,ech_ossl,ech_key,ech_err,ech_kdf"
 $ LIB_DSO = "dso_dl,dso_dlfcn,dso_err,dso_lib,dso_null,"+ -
        "dso_openssl,dso_win32,dso_vms,dso_beos"
 $ LIB_ENGINE = "eng_err,eng_lib,eng_list,eng_init,eng_ctrl,"+ -
@@ -272,9 +274,7 @@ $ LIB_ENGINE = "eng_err,eng_lib,eng_list,eng_init,eng_ctrl,"+ -
        "tb_rsa,tb_dsa,tb_ecdsa,tb_dh,tb_ecdh,tb_rand,tb_store,"+ -
        "tb_cipher,tb_digest,tb_pkmeth,tb_asnmth,"+ -
        "eng_openssl,eng_cnf,eng_dyn,eng_cryptodev,"+ -
-       "eng_rsax,eng_rdrand"
-$ LIB_AES = "aes_misc,aes_ecb,aes_cfb,aes_ofb,aes_ctr,aes_ige,aes_wrap,"+ -
-       "aes_core,aes_cbc"
+       "eng_rdrand"
 $ LIB_BUFFER = "buffer,buf_str,buf_err"
 $ LIB_BIO = "bio_lib,bio_cb,bio_err,"+ -
        "bss_mem,bss_null,bss_fd,"+ -
@@ -298,8 +298,8 @@ $ LIB_EVP_2 = "m_null,m_md2,m_md4,m_md5,m_sha,m_sha1,m_wp," + -
        "bio_md,bio_b64,bio_enc,evp_err,e_null,"+ -
        "c_all,c_allc,c_alld,evp_lib,bio_ok,"+-
        "evp_pkey,evp_pbe,p5_crpt,p5_crpt2"
-$ LIB_EVP_3 = "e_old,pmeth_lib,pmeth_fn,pmeth_gn,m_sigver,evp_fips,"+ -
-       "e_aes_cbc_hmac_sha1,e_rc4_hmac_md5"
+$ LIB_EVP_3 = "e_old,pmeth_lib,pmeth_fn,pmeth_gn,m_sigver,"+ -
+       "e_aes_cbc_hmac_sha1,e_aes_cbc_hmac_sha256,e_rc4_hmac_md5"
 $ LIB_ASN1 = "a_object,a_bitstr,a_utctm,a_gentm,a_time,a_int,a_octet,"+ -
        "a_print,a_type,a_set,a_dup,a_d2i_fp,a_i2d_fp,"+ -
        "a_enum,a_utf8,a_sign,a_digest,a_verify,a_mbstr,a_strex,"+ -
@@ -326,7 +326,7 @@ $ LIB_X509V3 = "v3_bcons,v3_bitst,v3_conf,v3_extku,v3_ia5,v3_lib,"+ -
        "v3_int,v3_enum,v3_sxnet,v3_cpols,v3_crld,v3_purp,v3_info,"+ -
        "v3_ocsp,v3_akeya,v3_pmaps,v3_pcons,v3_ncons,v3_pcia,v3_pci,"+ -
        "pcy_cache,pcy_node,pcy_data,pcy_map,pcy_tree,pcy_lib,"+ -
-       "v3_asid,v3_addr"
+       "v3_asid,v3_addr,v3_scts"
 $ LIB_CONF = "conf_err,conf_lib,conf_api,conf_def,conf_mod,conf_mall,conf_sap"
 $ LIB_TXT_DB = "txt_db"
 $ LIB_PKCS7 = "pk7_asn1,pk7_lib,pkcs7err,pk7_doit,pk7_smime,pk7_attr,"+ -
@@ -343,7 +343,7 @@ $ LIB_UI = "ui_err,ui_lib,ui_openssl,ui_util"+LIB_UI_COMPAT
 $ LIB_KRB5 = "krb5_asn"
 $ LIB_CMS = "cms_lib,cms_asn1,cms_att,cms_io,cms_smime,cms_err,"+ -
        "cms_sd,cms_dd,cms_cd,cms_env,cms_enc,cms_ess,"+ -
-       "cms_pwri"
+       "cms_pwri,cms_kari"
 $ LIB_PQUEUE = "pqueue"
 $ LIB_TS = "ts_err,ts_req_utils,ts_req_print,ts_rsp_utils,ts_rsp_print,"+ -
        "ts_rsp_sign,ts_rsp_verify,ts_verify_ctx,ts_lib,ts_conf,"+ -
@@ -1141,7 +1141,7 @@ $ IF F$TYPE(USER_CCFLAGS) .NES. "" THEN CCEXTRAFLAGS = USER_CCFLAGS
 $ CCDISABLEWARNINGS = "" !!! "MAYLOSEDATA3" !!! "LONGLONGTYPE,LONGLONGSUFX,FOUNDCR"
 $ IF F$TYPE(USER_CCDISABLEWARNINGS) .NES. ""
 $ THEN
-$     IF CCDISABLEWARNINGS .NES. "" THEN CCDISABLEWARNINGS = CCDISABLEWARNINGS + ","
+$     IF CCDISABLEWARNINGS .NES. THEN CCDISABLEWARNINGS = CCDISABLEWARNINGS + ","
 $     CCDISABLEWARNINGS = CCDISABLEWARNINGS + USER_CCDISABLEWARNINGS
 $ ENDIF
 $!
index 9e6f50d..bfff699 100644 (file)
@@ -68,7 +68,11 @@ const char *SSLeay_version(int t)
         return OPENSSL_VERSION_TEXT;
     if (t == SSLEAY_BUILT_ON) {
 #ifdef DATE
+# ifdef OPENSSL_USE_BUILD_DATE
         return (DATE);
+# else
+        return ("built on: reproducible build, date unspecified");
+# endif
 #else
         return ("built on: date not available");
 #endif
index fbc77c1..8b5166c 100644 (file)
@@ -61,6 +61,8 @@ des: des.o cbc3_enc.o lib
 
 des_enc-sparc.S:       asm/des_enc.m4
        m4 -B 8192 asm/des_enc.m4 > des_enc-sparc.S
+dest4-sparcv9.s:       asm/dest4-sparcv9.pl
+       $(PERL) asm/dest4-sparcv9.pl $(CFLAGS) > $@
 
 des-586.s:     asm/des-586.pl ../perlasm/x86asm.pl ../perlasm/cbc.pl
        $(PERL) asm/des-586.pl $(PERLASM_SCHEME) $(CFLAGS) > $@
index 5b5f39c..bd6a7dd 100644 (file)
@@ -25,6 +25,7 @@ $small_footprint=1 if (grep(/\-DOPENSSL_SMALL_FOOTPRINT/,@ARGV));
 # the folded loop is only 3% slower than unrolled, but >7 times smaller
 
 &public_label("DES_SPtrans");
+&static_label("des_sptrans");
 
 &DES_encrypt_internal();
 &DES_decrypt_internal();
@@ -158,7 +159,7 @@ sub DES_encrypt
        &call   (&label("pic_point"));
        &set_label("pic_point");
        &blindpop($trans);
-       &lea    ($trans,&DWP(&label("DES_SPtrans")."-".&label("pic_point"),$trans));
+       &lea    ($trans,&DWP(&label("des_sptrans")."-".&label("pic_point"),$trans));
 
        &mov(   "ecx",  &wparam(1)      );
 
@@ -315,6 +316,7 @@ sub FP_new
 sub DES_SPtrans
        {
        &set_label("DES_SPtrans",64);
+       &set_label("des_sptrans");
        &data_word(0x02080800, 0x00080000, 0x02000002, 0x02080802);
        &data_word(0x02000000, 0x00080802, 0x00080002, 0x02000002);
        &data_word(0x00080802, 0x02080800, 0x02080000, 0x00000802);
index 3280595..dda08e1 100644 (file)
@@ -46,6 +46,8 @@
 .ident "des_enc.m4 2.1"
 .file  "des_enc-sparc.S"
 
+#include <openssl/opensslconf.h>
+
 #if defined(__SUNPRO_C) && defined(__sparcv9)
 # define ABI64  /* They've said -xarch=v9 at command line */
 #elif defined(__GNUC__) && defined(__arch64__)
diff --git a/crypto/des/asm/dest4-sparcv9.pl b/crypto/des/asm/dest4-sparcv9.pl
new file mode 100644 (file)
index 0000000..1dc6024
--- /dev/null
@@ -0,0 +1,617 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by David S. Miller <davem@devemloft.net> and Andy Polyakov
+# <appro@openssl.org>. The module is licensed under 2-clause BSD
+# license. March 2013. All rights reserved.
+# ====================================================================
+
+######################################################################
+# DES for SPARC T4.
+#
+# As with other hardware-assisted ciphers CBC encrypt results [for
+# aligned data] are virtually identical to critical path lengths:
+#
+#              DES             Triple-DES
+# CBC encrypt  4.14/4.15(*)    11.7/11.7
+# CBC decrypt  1.77/4.11(**)   6.42/7.47
+#
+#                       (*)    numbers after slash are for
+#                              misaligned data;
+#                       (**)   this is result for largest
+#                              block size, unlike all other
+#                              cases smaller blocks results
+#                              are better[?];
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+push(@INC,"${dir}","${dir}../../perlasm");
+require "sparcv9_modes.pl";
+
+&asm_init(@ARGV);
+
+$code.=<<___ if ($::abibits==64);
+.register       %g2,#scratch
+.register       %g3,#scratch
+___
+
+$code.=<<___;
+.text
+___
+
+{ my ($inp,$out)=("%o0","%o1");
+
+$code.=<<___;
+.align 32
+.globl des_t4_key_expand
+.type  des_t4_key_expand,#function
+des_t4_key_expand:
+       andcc           $inp, 0x7, %g0
+       alignaddr       $inp, %g0, $inp
+       bz,pt           %icc, 1f
+       ldd             [$inp + 0x00], %f0
+       ldd             [$inp + 0x08], %f2
+       faligndata      %f0, %f2, %f0
+1:     des_kexpand     %f0, 0, %f0
+       des_kexpand     %f0, 1, %f2
+       std             %f0, [$out + 0x00]
+       des_kexpand     %f2, 3, %f6
+       std             %f2, [$out + 0x08]
+       des_kexpand     %f2, 2, %f4
+       des_kexpand     %f6, 3, %f10
+       std             %f6, [$out + 0x18]
+       des_kexpand     %f6, 2, %f8
+       std             %f4, [$out + 0x10]
+       des_kexpand     %f10, 3, %f14
+       std             %f10, [$out + 0x28]
+       des_kexpand     %f10, 2, %f12
+       std             %f8, [$out + 0x20]
+       des_kexpand     %f14, 1, %f16
+       std             %f14, [$out + 0x38]
+       des_kexpand     %f16, 3, %f20
+       std             %f12, [$out + 0x30]
+       des_kexpand     %f16, 2, %f18
+       std             %f16, [$out + 0x40]
+       des_kexpand     %f20, 3, %f24
+       std             %f20, [$out + 0x50]
+       des_kexpand     %f20, 2, %f22
+       std             %f18, [$out + 0x48]
+       des_kexpand     %f24, 3, %f28
+       std             %f24, [$out + 0x60]
+       des_kexpand     %f24, 2, %f26
+       std             %f22, [$out + 0x58]
+       des_kexpand     %f28, 1, %f30
+       std             %f28, [$out + 0x70]
+       std             %f26, [$out + 0x68]
+       retl
+       std             %f30, [$out + 0x78]
+.size  des_t4_key_expand,.-des_t4_key_expand
+___
+}
+{ my ($inp,$out,$len,$key,$ivec) = map("%o$_",(0..4));
+  my ($ileft,$iright,$omask) = map("%g$_",(1..3));
+
+$code.=<<___;
+.globl des_t4_cbc_encrypt
+.align 32
+des_t4_cbc_encrypt:
+       cmp             $len, 0
+       be,pn           $::size_t_cc, .Lcbc_abort
+       nop
+       ld              [$ivec + 0], %f0        ! load ivec
+       ld              [$ivec + 4], %f1
+
+       and             $inp, 7, $ileft
+       andn            $inp, 7, $inp
+       sll             $ileft, 3, $ileft
+       mov             0xff, $omask
+       prefetch        [$inp], 20
+       prefetch        [$inp + 63], 20
+       sub             %g0, $ileft, $iright
+       and             $out, 7, %g4
+       alignaddrl      $out, %g0, $out
+       srl             $omask, %g4, $omask
+       srlx            $len, 3, $len
+       movrz           %g4, 0, $omask
+       prefetch        [$out], 22
+
+       ldd             [$key + 0x00], %f4      ! load key schedule
+       ldd             [$key + 0x08], %f6
+       ldd             [$key + 0x10], %f8
+       ldd             [$key + 0x18], %f10
+       ldd             [$key + 0x20], %f12
+       ldd             [$key + 0x28], %f14
+       ldd             [$key + 0x30], %f16
+       ldd             [$key + 0x38], %f18
+       ldd             [$key + 0x40], %f20
+       ldd             [$key + 0x48], %f22
+       ldd             [$key + 0x50], %f24
+       ldd             [$key + 0x58], %f26
+       ldd             [$key + 0x60], %f28
+       ldd             [$key + 0x68], %f30
+       ldd             [$key + 0x70], %f32
+       ldd             [$key + 0x78], %f34
+
+.Ldes_cbc_enc_loop:
+       ldx             [$inp + 0], %g4
+       brz,pt          $ileft, 4f
+       nop
+
+       ldx             [$inp + 8], %g5
+       sllx            %g4, $ileft, %g4
+       srlx            %g5, $iright, %g5
+       or              %g5, %g4, %g4
+4:
+       movxtod         %g4, %f2
+       prefetch        [$inp + 8+63], 20
+       add             $inp, 8, $inp
+       fxor            %f2, %f0, %f0           ! ^= ivec
+       prefetch        [$out + 63], 22
+
+       des_ip          %f0, %f0
+       des_round       %f4, %f6, %f0, %f0
+       des_round       %f8, %f10, %f0, %f0
+       des_round       %f12, %f14, %f0, %f0
+       des_round       %f16, %f18, %f0, %f0
+       des_round       %f20, %f22, %f0, %f0
+       des_round       %f24, %f26, %f0, %f0
+       des_round       %f28, %f30, %f0, %f0
+       des_round       %f32, %f34, %f0, %f0
+       des_iip         %f0, %f0
+
+       brnz,pn         $omask, 2f
+       sub             $len, 1, $len
+
+       std             %f0, [$out + 0]
+       brnz,pt         $len, .Ldes_cbc_enc_loop
+       add             $out, 8, $out
+
+       st              %f0, [$ivec + 0]        ! write out ivec
+       retl
+       st              %f1, [$ivec + 4]
+.Lcbc_abort:
+       retl
+       nop
+
+.align 16
+2:     ldxa            [$inp]0x82, %g4         ! avoid read-after-write hazard
+                                               ! and ~4x deterioration
+                                               ! in inp==out case
+       faligndata      %f0, %f0, %f2           ! handle unaligned output
+
+       stda            %f2, [$out + $omask]0xc0        ! partial store
+       add             $out, 8, $out
+       orn             %g0, $omask, $omask
+       stda            %f2, [$out + $omask]0xc0        ! partial store
+
+       brnz,pt         $len, .Ldes_cbc_enc_loop+4
+       orn             %g0, $omask, $omask
+
+       st              %f0, [$ivec + 0]        ! write out ivec
+       retl
+       st              %f1, [$ivec + 4]
+.type  des_t4_cbc_encrypt,#function
+.size  des_t4_cbc_encrypt,.-des_t4_cbc_encrypt
+
+.globl des_t4_cbc_decrypt
+.align 32
+des_t4_cbc_decrypt:
+       cmp             $len, 0
+       be,pn           $::size_t_cc, .Lcbc_abort
+       nop
+       ld              [$ivec + 0], %f2        ! load ivec
+       ld              [$ivec + 4], %f3
+
+       and             $inp, 7, $ileft
+       andn            $inp, 7, $inp
+       sll             $ileft, 3, $ileft
+       mov             0xff, $omask
+       prefetch        [$inp], 20
+       prefetch        [$inp + 63], 20
+       sub             %g0, $ileft, $iright
+       and             $out, 7, %g4
+       alignaddrl      $out, %g0, $out
+       srl             $omask, %g4, $omask
+       srlx            $len, 3, $len
+       movrz           %g4, 0, $omask
+       prefetch        [$out], 22
+
+       ldd             [$key + 0x78], %f4      ! load key schedule
+       ldd             [$key + 0x70], %f6
+       ldd             [$key + 0x68], %f8
+       ldd             [$key + 0x60], %f10
+       ldd             [$key + 0x58], %f12
+       ldd             [$key + 0x50], %f14
+       ldd             [$key + 0x48], %f16
+       ldd             [$key + 0x40], %f18
+       ldd             [$key + 0x38], %f20
+       ldd             [$key + 0x30], %f22
+       ldd             [$key + 0x28], %f24
+       ldd             [$key + 0x20], %f26
+       ldd             [$key + 0x18], %f28
+       ldd             [$key + 0x10], %f30
+       ldd             [$key + 0x08], %f32
+       ldd             [$key + 0x00], %f34
+
+.Ldes_cbc_dec_loop:
+       ldx             [$inp + 0], %g4
+       brz,pt          $ileft, 4f
+       nop
+
+       ldx             [$inp + 8], %g5
+       sllx            %g4, $ileft, %g4
+       srlx            %g5, $iright, %g5
+       or              %g5, %g4, %g4
+4:
+       movxtod         %g4, %f0
+       prefetch        [$inp + 8+63], 20
+       add             $inp, 8, $inp
+       prefetch        [$out + 63], 22
+
+       des_ip          %f0, %f0
+       des_round       %f4, %f6, %f0, %f0
+       des_round       %f8, %f10, %f0, %f0
+       des_round       %f12, %f14, %f0, %f0
+       des_round       %f16, %f18, %f0, %f0
+       des_round       %f20, %f22, %f0, %f0
+       des_round       %f24, %f26, %f0, %f0
+       des_round       %f28, %f30, %f0, %f0
+       des_round       %f32, %f34, %f0, %f0
+       des_iip         %f0, %f0
+
+       fxor            %f2, %f0, %f0           ! ^= ivec
+       movxtod         %g4, %f2
+
+       brnz,pn         $omask, 2f
+       sub             $len, 1, $len
+
+       std             %f0, [$out + 0]
+       brnz,pt         $len, .Ldes_cbc_dec_loop
+       add             $out, 8, $out
+
+       st              %f2, [$ivec + 0]        ! write out ivec
+       retl
+       st              %f3, [$ivec + 4]
+
+.align 16
+2:     ldxa            [$inp]0x82, %g4         ! avoid read-after-write hazard
+                                               ! and ~4x deterioration
+                                               ! in inp==out case
+       faligndata      %f0, %f0, %f0           ! handle unaligned output
+
+       stda            %f0, [$out + $omask]0xc0        ! partial store
+       add             $out, 8, $out
+       orn             %g0, $omask, $omask
+       stda            %f0, [$out + $omask]0xc0        ! partial store
+
+       brnz,pt         $len, .Ldes_cbc_dec_loop+4
+       orn             %g0, $omask, $omask
+
+       st              %f2, [$ivec + 0]        ! write out ivec
+       retl
+       st              %f3, [$ivec + 4]
+.type  des_t4_cbc_decrypt,#function
+.size  des_t4_cbc_decrypt,.-des_t4_cbc_decrypt
+___
+
+# One might wonder why does one have back-to-back des_iip/des_ip
+# pairs between EDE passes. Indeed, aren't they inverse of each other?
+# They almost are. Outcome of the pair is 32-bit words being swapped
+# in target register. Consider pair of des_iip/des_ip as a way to
+# perform the due swap, it's actually fastest way in this case.
+
+$code.=<<___;
+.globl des_t4_ede3_cbc_encrypt
+.align 32
+des_t4_ede3_cbc_encrypt:
+       cmp             $len, 0
+       be,pn           $::size_t_cc, .Lcbc_abort
+       nop
+       ld              [$ivec + 0], %f0        ! load ivec
+       ld              [$ivec + 4], %f1
+
+       and             $inp, 7, $ileft
+       andn            $inp, 7, $inp
+       sll             $ileft, 3, $ileft
+       mov             0xff, $omask
+       prefetch        [$inp], 20
+       prefetch        [$inp + 63], 20
+       sub             %g0, $ileft, $iright
+       and             $out, 7, %g4
+       alignaddrl      $out, %g0, $out
+       srl             $omask, %g4, $omask
+       srlx            $len, 3, $len
+       movrz           %g4, 0, $omask
+       prefetch        [$out], 22
+
+       ldd             [$key + 0x00], %f4      ! load key schedule
+       ldd             [$key + 0x08], %f6
+       ldd             [$key + 0x10], %f8
+       ldd             [$key + 0x18], %f10
+       ldd             [$key + 0x20], %f12
+       ldd             [$key + 0x28], %f14
+       ldd             [$key + 0x30], %f16
+       ldd             [$key + 0x38], %f18
+       ldd             [$key + 0x40], %f20
+       ldd             [$key + 0x48], %f22
+       ldd             [$key + 0x50], %f24
+       ldd             [$key + 0x58], %f26
+       ldd             [$key + 0x60], %f28
+       ldd             [$key + 0x68], %f30
+       ldd             [$key + 0x70], %f32
+       ldd             [$key + 0x78], %f34
+
+.Ldes_ede3_cbc_enc_loop:
+       ldx             [$inp + 0], %g4
+       brz,pt          $ileft, 4f
+       nop
+
+       ldx             [$inp + 8], %g5
+       sllx            %g4, $ileft, %g4
+       srlx            %g5, $iright, %g5
+       or              %g5, %g4, %g4
+4:
+       movxtod         %g4, %f2
+       prefetch        [$inp + 8+63], 20
+       add             $inp, 8, $inp
+       fxor            %f2, %f0, %f0           ! ^= ivec
+       prefetch        [$out + 63], 22
+
+       des_ip          %f0, %f0
+       des_round       %f4, %f6, %f0, %f0
+       des_round       %f8, %f10, %f0, %f0
+       des_round       %f12, %f14, %f0, %f0
+       des_round       %f16, %f18, %f0, %f0
+       ldd             [$key + 0x100-0x08], %f36
+       ldd             [$key + 0x100-0x10], %f38
+       des_round       %f20, %f22, %f0, %f0
+       ldd             [$key + 0x100-0x18], %f40
+       ldd             [$key + 0x100-0x20], %f42
+       des_round       %f24, %f26, %f0, %f0
+       ldd             [$key + 0x100-0x28], %f44
+       ldd             [$key + 0x100-0x30], %f46
+       des_round       %f28, %f30, %f0, %f0
+       ldd             [$key + 0x100-0x38], %f48
+       ldd             [$key + 0x100-0x40], %f50
+       des_round       %f32, %f34, %f0, %f0
+       ldd             [$key + 0x100-0x48], %f52
+       ldd             [$key + 0x100-0x50], %f54
+       des_iip         %f0, %f0
+
+       ldd             [$key + 0x100-0x58], %f56
+       ldd             [$key + 0x100-0x60], %f58
+       des_ip          %f0, %f0
+       ldd             [$key + 0x100-0x68], %f60
+       ldd             [$key + 0x100-0x70], %f62
+       des_round       %f36, %f38, %f0, %f0
+       ldd             [$key + 0x100-0x78], %f36
+       ldd             [$key + 0x100-0x80], %f38
+       des_round       %f40, %f42, %f0, %f0
+       des_round       %f44, %f46, %f0, %f0
+       des_round       %f48, %f50, %f0, %f0
+       ldd             [$key + 0x100+0x00], %f40
+       ldd             [$key + 0x100+0x08], %f42
+       des_round       %f52, %f54, %f0, %f0
+       ldd             [$key + 0x100+0x10], %f44
+       ldd             [$key + 0x100+0x18], %f46
+       des_round       %f56, %f58, %f0, %f0
+       ldd             [$key + 0x100+0x20], %f48
+       ldd             [$key + 0x100+0x28], %f50
+       des_round       %f60, %f62, %f0, %f0
+       ldd             [$key + 0x100+0x30], %f52
+       ldd             [$key + 0x100+0x38], %f54
+       des_round       %f36, %f38, %f0, %f0
+       ldd             [$key + 0x100+0x40], %f56
+       ldd             [$key + 0x100+0x48], %f58
+       des_iip         %f0, %f0
+
+       ldd             [$key + 0x100+0x50], %f60
+       ldd             [$key + 0x100+0x58], %f62
+       des_ip          %f0, %f0
+       ldd             [$key + 0x100+0x60], %f36
+       ldd             [$key + 0x100+0x68], %f38
+       des_round       %f40, %f42, %f0, %f0
+       ldd             [$key + 0x100+0x70], %f40
+       ldd             [$key + 0x100+0x78], %f42
+       des_round       %f44, %f46, %f0, %f0
+       des_round       %f48, %f50, %f0, %f0
+       des_round       %f52, %f54, %f0, %f0
+       des_round       %f56, %f58, %f0, %f0
+       des_round       %f60, %f62, %f0, %f0
+       des_round       %f36, %f38, %f0, %f0
+       des_round       %f40, %f42, %f0, %f0
+       des_iip         %f0, %f0
+
+       brnz,pn         $omask, 2f
+       sub             $len, 1, $len
+
+       std             %f0, [$out + 0]
+       brnz,pt         $len, .Ldes_ede3_cbc_enc_loop
+       add             $out, 8, $out
+
+       st              %f0, [$ivec + 0]        ! write out ivec
+       retl
+       st              %f1, [$ivec + 4]
+
+.align 16
+2:     ldxa            [$inp]0x82, %g4         ! avoid read-after-write hazard
+                                               ! and ~2x deterioration
+                                               ! in inp==out case
+       faligndata      %f0, %f0, %f2           ! handle unaligned output
+
+       stda            %f2, [$out + $omask]0xc0        ! partial store
+       add             $out, 8, $out
+       orn             %g0, $omask, $omask
+       stda            %f2, [$out + $omask]0xc0        ! partial store
+
+       brnz,pt         $len, .Ldes_ede3_cbc_enc_loop+4
+       orn             %g0, $omask, $omask
+
+       st              %f0, [$ivec + 0]        ! write out ivec
+       retl
+       st              %f1, [$ivec + 4]
+.type  des_t4_ede3_cbc_encrypt,#function
+.size  des_t4_ede3_cbc_encrypt,.-des_t4_ede3_cbc_encrypt
+
+.globl des_t4_ede3_cbc_decrypt
+.align 32
+des_t4_ede3_cbc_decrypt:
+       cmp             $len, 0
+       be,pn           $::size_t_cc, .Lcbc_abort
+       nop
+       ld              [$ivec + 0], %f2        ! load ivec
+       ld              [$ivec + 4], %f3
+
+       and             $inp, 7, $ileft
+       andn            $inp, 7, $inp
+       sll             $ileft, 3, $ileft
+       mov             0xff, $omask
+       prefetch        [$inp], 20
+       prefetch        [$inp + 63], 20
+       sub             %g0, $ileft, $iright
+       and             $out, 7, %g4
+       alignaddrl      $out, %g0, $out
+       srl             $omask, %g4, $omask
+       srlx            $len, 3, $len
+       movrz           %g4, 0, $omask
+       prefetch        [$out], 22
+
+       ldd             [$key + 0x100+0x78], %f4        ! load key schedule
+       ldd             [$key + 0x100+0x70], %f6
+       ldd             [$key + 0x100+0x68], %f8
+       ldd             [$key + 0x100+0x60], %f10
+       ldd             [$key + 0x100+0x58], %f12
+       ldd             [$key + 0x100+0x50], %f14
+       ldd             [$key + 0x100+0x48], %f16
+       ldd             [$key + 0x100+0x40], %f18
+       ldd             [$key + 0x100+0x38], %f20
+       ldd             [$key + 0x100+0x30], %f22
+       ldd             [$key + 0x100+0x28], %f24
+       ldd             [$key + 0x100+0x20], %f26
+       ldd             [$key + 0x100+0x18], %f28
+       ldd             [$key + 0x100+0x10], %f30
+       ldd             [$key + 0x100+0x08], %f32
+       ldd             [$key + 0x100+0x00], %f34
+
+.Ldes_ede3_cbc_dec_loop:
+       ldx             [$inp + 0], %g4
+       brz,pt          $ileft, 4f
+       nop
+
+       ldx             [$inp + 8], %g5
+       sllx            %g4, $ileft, %g4
+       srlx            %g5, $iright, %g5
+       or              %g5, %g4, %g4
+4:
+       movxtod         %g4, %f0
+       prefetch        [$inp + 8+63], 20
+       add             $inp, 8, $inp
+       prefetch        [$out + 63], 22
+
+       des_ip          %f0, %f0
+       des_round       %f4, %f6, %f0, %f0
+       des_round       %f8, %f10, %f0, %f0
+       des_round       %f12, %f14, %f0, %f0
+       des_round       %f16, %f18, %f0, %f0
+       ldd             [$key + 0x80+0x00], %f36
+       ldd             [$key + 0x80+0x08], %f38
+       des_round       %f20, %f22, %f0, %f0
+       ldd             [$key + 0x80+0x10], %f40
+       ldd             [$key + 0x80+0x18], %f42
+       des_round       %f24, %f26, %f0, %f0
+       ldd             [$key + 0x80+0x20], %f44
+       ldd             [$key + 0x80+0x28], %f46
+       des_round       %f28, %f30, %f0, %f0
+       ldd             [$key + 0x80+0x30], %f48
+       ldd             [$key + 0x80+0x38], %f50
+       des_round       %f32, %f34, %f0, %f0
+       ldd             [$key + 0x80+0x40], %f52
+       ldd             [$key + 0x80+0x48], %f54
+       des_iip         %f0, %f0
+
+       ldd             [$key + 0x80+0x50], %f56
+       ldd             [$key + 0x80+0x58], %f58
+       des_ip          %f0, %f0
+       ldd             [$key + 0x80+0x60], %f60
+       ldd             [$key + 0x80+0x68], %f62
+       des_round       %f36, %f38, %f0, %f0
+       ldd             [$key + 0x80+0x70], %f36
+       ldd             [$key + 0x80+0x78], %f38
+       des_round       %f40, %f42, %f0, %f0
+       des_round       %f44, %f46, %f0, %f0
+       des_round       %f48, %f50, %f0, %f0
+       ldd             [$key + 0x80-0x08], %f40
+       ldd             [$key + 0x80-0x10], %f42
+       des_round       %f52, %f54, %f0, %f0
+       ldd             [$key + 0x80-0x18], %f44
+       ldd             [$key + 0x80-0x20], %f46
+       des_round       %f56, %f58, %f0, %f0
+       ldd             [$key + 0x80-0x28], %f48
+       ldd             [$key + 0x80-0x30], %f50
+       des_round       %f60, %f62, %f0, %f0
+       ldd             [$key + 0x80-0x38], %f52
+       ldd             [$key + 0x80-0x40], %f54
+       des_round       %f36, %f38, %f0, %f0
+       ldd             [$key + 0x80-0x48], %f56
+       ldd             [$key + 0x80-0x50], %f58
+       des_iip         %f0, %f0
+
+       ldd             [$key + 0x80-0x58], %f60
+       ldd             [$key + 0x80-0x60], %f62
+       des_ip          %f0, %f0
+       ldd             [$key + 0x80-0x68], %f36
+       ldd             [$key + 0x80-0x70], %f38
+       des_round       %f40, %f42, %f0, %f0
+       ldd             [$key + 0x80-0x78], %f40
+       ldd             [$key + 0x80-0x80], %f42
+       des_round       %f44, %f46, %f0, %f0
+       des_round       %f48, %f50, %f0, %f0
+       des_round       %f52, %f54, %f0, %f0
+       des_round       %f56, %f58, %f0, %f0
+       des_round       %f60, %f62, %f0, %f0
+       des_round       %f36, %f38, %f0, %f0
+       des_round       %f40, %f42, %f0, %f0
+       des_iip         %f0, %f0
+
+       fxor            %f2, %f0, %f0           ! ^= ivec
+       movxtod         %g4, %f2
+
+       brnz,pn         $omask, 2f
+       sub             $len, 1, $len
+
+       std             %f0, [$out + 0]
+       brnz,pt         $len, .Ldes_ede3_cbc_dec_loop
+       add             $out, 8, $out
+
+       st              %f2, [$ivec + 0]        ! write out ivec
+       retl
+       st              %f3, [$ivec + 4]
+
+.align 16
+2:     ldxa            [$inp]0x82, %g4         ! avoid read-after-write hazard
+                                               ! and ~3x deterioration
+                                               ! in inp==out case
+       faligndata      %f0, %f0, %f0           ! handle unaligned output
+
+       stda            %f0, [$out + $omask]0xc0        ! partial store
+       add             $out, 8, $out
+       orn             %g0, $omask, $omask
+       stda            %f0, [$out + $omask]0xc0        ! partial store
+
+       brnz,pt         $len, .Ldes_ede3_cbc_dec_loop+4
+       orn             %g0, $omask, $omask
+
+       st              %f2, [$ivec + 0]        ! write out ivec
+       retl
+       st              %f3, [$ivec + 4]
+.type  des_t4_ede3_cbc_decrypt,#function
+.size  des_t4_ede3_cbc_decrypt,.-des_t4_ede3_cbc_decrypt
+___
+}
+$code.=<<___;
+.asciz  "DES for SPARC T4, David S. Miller, Andy Polyakov"
+.align  4
+___
+
+&emit_assembler();
+
+close STDOUT;
index 1a8e41d..23ea9d3 100644 (file)
                                 } \
                         }
 
-# if (defined(OPENSSL_SYS_WIN32) && defined(_MSC_VER)) || defined(__ICC)
+# if (defined(OPENSSL_SYS_WIN32) && defined(_MSC_VER))
 #  define ROTATE(a,n)     (_lrotr(a,n))
+# elif defined(__ICC)
+#  define ROTATE(a,n)     (_rotr(a,n))
 # elif defined(__GNUC__) && __GNUC__>=2 && !defined(__STRICT_ANSI__) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) && !defined(PEDANTIC)
 #  if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
 #   define ROTATE(a,n)   ({ register unsigned int ret;   \
index 16ba0a9..514a706 100644 (file)
 # include <sys/ioctl.h>
 #endif
 
-#if defined(OPENSSL_SYS_MSDOS) && !defined(__CYGWIN32__) && !defined(OPENSSL_SYS_WINCE)
+#if defined(OPENSSL_SYS_MSDOS) && !defined(OPENSSL_SYS_WINCE)
 # include <conio.h>
 # define fgets(a,b,c) noecho_fgets(a,b,c)
 #endif
index 6d574f4..46fa5ac 100644 (file)
@@ -18,9 +18,9 @@ APPS=
 
 LIB=$(TOP)/libcrypto.a
 LIBSRC= dh_asn1.c dh_gen.c dh_key.c dh_lib.c dh_check.c dh_err.c dh_depr.c \
-       dh_ameth.c dh_pmeth.c dh_prn.c
+       dh_ameth.c dh_pmeth.c dh_prn.c dh_rfc5114.c dh_kdf.c
 LIBOBJ= dh_asn1.o dh_gen.o dh_key.o dh_lib.o dh_check.o dh_err.o dh_depr.o \
-       dh_ameth.o dh_pmeth.o dh_prn.o
+       dh_ameth.o dh_pmeth.o dh_prn.o dh_rfc5114.o dh_kdf.o
 
 SRC= $(LIBSRC)
 
@@ -80,13 +80,13 @@ clean:
 
 dh_ameth.o: ../../e_os.h ../../include/openssl/asn1.h
 dh_ameth.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
-dh_ameth.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
-dh_ameth.o: ../../include/openssl/dh.h ../../include/openssl/e_os2.h
-dh_ameth.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h
-dh_ameth.o: ../../include/openssl/ecdsa.h ../../include/openssl/err.h
-dh_ameth.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h
-dh_ameth.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
-dh_ameth.o: ../../include/openssl/opensslconf.h
+dh_ameth.o: ../../include/openssl/buffer.h ../../include/openssl/cms.h
+dh_ameth.o: ../../include/openssl/crypto.h ../../include/openssl/dh.h
+dh_ameth.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
+dh_ameth.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h
+dh_ameth.o: ../../include/openssl/err.h ../../include/openssl/evp.h
+dh_ameth.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
+dh_ameth.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
 dh_ameth.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
 dh_ameth.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
 dh_ameth.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
@@ -134,6 +134,19 @@ dh_gen.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
 dh_gen.o: ../../include/openssl/ossl_typ.h ../../include/openssl/safestack.h
 dh_gen.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
 dh_gen.o: ../cryptlib.h dh_gen.c
+dh_kdf.o: ../../include/openssl/asn1.h ../../include/openssl/bio.h
+dh_kdf.o: ../../include/openssl/buffer.h ../../include/openssl/cms.h
+dh_kdf.o: ../../include/openssl/crypto.h ../../include/openssl/dh.h
+dh_kdf.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
+dh_kdf.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h
+dh_kdf.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h
+dh_kdf.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
+dh_kdf.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
+dh_kdf.o: ../../include/openssl/ossl_typ.h ../../include/openssl/pkcs7.h
+dh_kdf.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
+dh_kdf.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
+dh_kdf.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h
+dh_kdf.o: dh_kdf.c
 dh_key.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
 dh_key.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
 dh_key.o: ../../include/openssl/dh.h ../../include/openssl/e_os2.h
@@ -160,11 +173,12 @@ dh_pmeth.o: ../../e_os.h ../../include/openssl/asn1.h
 dh_pmeth.o: ../../include/openssl/asn1t.h ../../include/openssl/bio.h
 dh_pmeth.o: ../../include/openssl/bn.h ../../include/openssl/buffer.h
 dh_pmeth.o: ../../include/openssl/crypto.h ../../include/openssl/dh.h
-dh_pmeth.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
-dh_pmeth.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h
-dh_pmeth.o: ../../include/openssl/err.h ../../include/openssl/evp.h
-dh_pmeth.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
-dh_pmeth.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
+dh_pmeth.o: ../../include/openssl/dsa.h ../../include/openssl/e_os2.h
+dh_pmeth.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h
+dh_pmeth.o: ../../include/openssl/ecdsa.h ../../include/openssl/err.h
+dh_pmeth.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h
+dh_pmeth.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
+dh_pmeth.o: ../../include/openssl/opensslconf.h
 dh_pmeth.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
 dh_pmeth.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
 dh_pmeth.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
@@ -180,3 +194,11 @@ dh_prn.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
 dh_prn.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
 dh_prn.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
 dh_prn.o: ../../include/openssl/symhacks.h ../cryptlib.h dh_prn.c
+dh_rfc5114.o: ../../e_os.h ../../include/openssl/bio.h
+dh_rfc5114.o: ../../include/openssl/bn.h ../../include/openssl/buffer.h
+dh_rfc5114.o: ../../include/openssl/crypto.h ../../include/openssl/dh.h
+dh_rfc5114.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
+dh_rfc5114.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
+dh_rfc5114.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
+dh_rfc5114.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
+dh_rfc5114.o: ../../include/openssl/symhacks.h ../cryptlib.h dh_rfc5114.c
index 4cbaa97..0502f1a 100644 (file)
@@ -167,6 +167,9 @@ struct dh_st {
 # define DH_CHECK_P_NOT_SAFE_PRIME       0x02
 # define DH_UNABLE_TO_CHECK_GENERATOR    0x04
 # define DH_NOT_SUITABLE_GENERATOR       0x08
+# define DH_CHECK_Q_NOT_PRIME            0x10
+# define DH_CHECK_INVALID_Q_VALUE        0x20
+# define DH_CHECK_INVALID_J_VALUE        0x40
 
 /* DH_check_pub_key error codes */
 # define DH_CHECK_PUBKEY_TOO_SMALL       0x01
@@ -217,8 +220,11 @@ int DH_check(const DH *dh, int *codes);
 int DH_check_pub_key(const DH *dh, const BIGNUM *pub_key, int *codes);
 int DH_generate_key(DH *dh);
 int DH_compute_key(unsigned char *key, const BIGNUM *pub_key, DH *dh);
+int DH_compute_key_padded(unsigned char *key, const BIGNUM *pub_key, DH *dh);
 DH *d2i_DHparams(DH **a, const unsigned char **pp, long length);
 int i2d_DHparams(const DH *a, unsigned char **pp);
+DH *d2i_DHxparams(DH **a, const unsigned char **pp, long length);
+int i2d_DHxparams(const DH *a, unsigned char **pp);
 # ifndef OPENSSL_NO_FP_API
 int DHparams_print_fp(FILE *fp, const DH *x);
 # endif
@@ -228,16 +234,109 @@ int DHparams_print(BIO *bp, const DH *x);
 int DHparams_print(char *bp, const DH *x);
 # endif
 
+/* RFC 5114 parameters */
+DH *DH_get_1024_160(void);
+DH *DH_get_2048_224(void);
+DH *DH_get_2048_256(void);
+
+/* RFC2631 KDF */
+int DH_KDF_X9_42(unsigned char *out, size_t outlen,
+                 const unsigned char *Z, size_t Zlen,
+                 ASN1_OBJECT *key_oid,
+                 const unsigned char *ukm, size_t ukmlen, const EVP_MD *md);
+
 # define EVP_PKEY_CTX_set_dh_paramgen_prime_len(ctx, len) \
         EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_DH, EVP_PKEY_OP_PARAMGEN, \
                         EVP_PKEY_CTRL_DH_PARAMGEN_PRIME_LEN, len, NULL)
 
+# define EVP_PKEY_CTX_set_dh_paramgen_subprime_len(ctx, len) \
+        EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_DH, EVP_PKEY_OP_PARAMGEN, \
+                        EVP_PKEY_CTRL_DH_PARAMGEN_SUBPRIME_LEN, len, NULL)
+
+# define EVP_PKEY_CTX_set_dh_paramgen_type(ctx, typ) \
+        EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_DH, EVP_PKEY_OP_PARAMGEN, \
+                        EVP_PKEY_CTRL_DH_PARAMGEN_TYPE, typ, NULL)
+
 # define EVP_PKEY_CTX_set_dh_paramgen_generator(ctx, gen) \
         EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_DH, EVP_PKEY_OP_PARAMGEN, \
                         EVP_PKEY_CTRL_DH_PARAMGEN_GENERATOR, gen, NULL)
 
+# define EVP_PKEY_CTX_set_dh_rfc5114(ctx, gen) \
+        EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_DHX, EVP_PKEY_OP_PARAMGEN, \
+                        EVP_PKEY_CTRL_DH_RFC5114, gen, NULL)
+
+# define EVP_PKEY_CTX_set_dhx_rfc5114(ctx, gen) \
+        EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_DHX, EVP_PKEY_OP_PARAMGEN, \
+                        EVP_PKEY_CTRL_DH_RFC5114, gen, NULL)
+
+# define EVP_PKEY_CTX_set_dh_kdf_type(ctx, kdf) \
+        EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_DHX, \
+                                EVP_PKEY_OP_DERIVE, \
+                                EVP_PKEY_CTRL_DH_KDF_TYPE, kdf, NULL)
+
+# define EVP_PKEY_CTX_get_dh_kdf_type(ctx) \
+        EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_DHX, \
+                                EVP_PKEY_OP_DERIVE, \
+                                EVP_PKEY_CTRL_DH_KDF_TYPE, -2, NULL)
+
+# define EVP_PKEY_CTX_set0_dh_kdf_oid(ctx, oid) \
+        EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_DHX, \
+                                EVP_PKEY_OP_DERIVE, \
+                                EVP_PKEY_CTRL_DH_KDF_OID, 0, (void *)oid)
+
+# define EVP_PKEY_CTX_get0_dh_kdf_oid(ctx, poid) \
+        EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_DHX, \
+                                EVP_PKEY_OP_DERIVE, \
+                                EVP_PKEY_CTRL_GET_DH_KDF_OID, 0, (void *)poid)
+
+# define EVP_PKEY_CTX_set_dh_kdf_md(ctx, md) \
+        EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_DHX, \
+                                EVP_PKEY_OP_DERIVE, \
+                                EVP_PKEY_CTRL_DH_KDF_MD, 0, (void *)md)
+
+# define EVP_PKEY_CTX_get_dh_kdf_md(ctx, pmd) \
+        EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_DHX, \
+                                EVP_PKEY_OP_DERIVE, \
+                                EVP_PKEY_CTRL_GET_DH_KDF_MD, 0, (void *)pmd)
+
+# define EVP_PKEY_CTX_set_dh_kdf_outlen(ctx, len) \
+        EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_DHX, \
+                                EVP_PKEY_OP_DERIVE, \
+                                EVP_PKEY_CTRL_DH_KDF_OUTLEN, len, NULL)
+
+# define EVP_PKEY_CTX_get_dh_kdf_outlen(ctx, plen) \
+        EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_DHX, \
+                                EVP_PKEY_OP_DERIVE, \
+                        EVP_PKEY_CTRL_GET_DH_KDF_OUTLEN, 0, (void *)plen)
+
+# define EVP_PKEY_CTX_set0_dh_kdf_ukm(ctx, p, plen) \
+        EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_DHX, \
+                                EVP_PKEY_OP_DERIVE, \
+                                EVP_PKEY_CTRL_DH_KDF_UKM, plen, (void *)p)
+
+# define EVP_PKEY_CTX_get0_dh_kdf_ukm(ctx, p) \
+        EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_DHX, \
+                                EVP_PKEY_OP_DERIVE, \
+                                EVP_PKEY_CTRL_GET_DH_KDF_UKM, 0, (void *)p)
+
 # define EVP_PKEY_CTRL_DH_PARAMGEN_PRIME_LEN     (EVP_PKEY_ALG_CTRL + 1)
 # define EVP_PKEY_CTRL_DH_PARAMGEN_GENERATOR     (EVP_PKEY_ALG_CTRL + 2)
+# define EVP_PKEY_CTRL_DH_RFC5114                (EVP_PKEY_ALG_CTRL + 3)
+# define EVP_PKEY_CTRL_DH_PARAMGEN_SUBPRIME_LEN  (EVP_PKEY_ALG_CTRL + 4)
+# define EVP_PKEY_CTRL_DH_PARAMGEN_TYPE          (EVP_PKEY_ALG_CTRL + 5)
+# define EVP_PKEY_CTRL_DH_KDF_TYPE               (EVP_PKEY_ALG_CTRL + 6)
+# define EVP_PKEY_CTRL_DH_KDF_MD                 (EVP_PKEY_ALG_CTRL + 7)
+# define EVP_PKEY_CTRL_GET_DH_KDF_MD             (EVP_PKEY_ALG_CTRL + 8)
+# define EVP_PKEY_CTRL_DH_KDF_OUTLEN             (EVP_PKEY_ALG_CTRL + 9)
+# define EVP_PKEY_CTRL_GET_DH_KDF_OUTLEN         (EVP_PKEY_ALG_CTRL + 10)
+# define EVP_PKEY_CTRL_DH_KDF_UKM                (EVP_PKEY_ALG_CTRL + 11)
+# define EVP_PKEY_CTRL_GET_DH_KDF_UKM            (EVP_PKEY_ALG_CTRL + 12)
+# define EVP_PKEY_CTRL_DH_KDF_OID                (EVP_PKEY_ALG_CTRL + 13)
+# define EVP_PKEY_CTRL_GET_DH_KDF_OID            (EVP_PKEY_ALG_CTRL + 14)
+
+/* KDF types */
+# define EVP_PKEY_DH_KDF_NONE                            1
+# define EVP_PKEY_DH_KDF_X9_42                           2
 
 /* BEGIN ERROR CODES */
 /*
@@ -252,6 +351,9 @@ void ERR_load_DH_strings(void);
 # define DH_F_COMPUTE_KEY                                 102
 # define DH_F_DHPARAMS_PRINT_FP                           101
 # define DH_F_DH_BUILTIN_GENPARAMS                        106
+# define DH_F_DH_CMS_DECRYPT                              117
+# define DH_F_DH_CMS_SET_PEERKEY                          118
+# define DH_F_DH_CMS_SET_SHARED_INFO                      119
 # define DH_F_DH_COMPUTE_KEY                              114
 # define DH_F_DH_GENERATE_KEY                             115
 # define DH_F_DH_GENERATE_PARAMETERS_EX                   116
@@ -273,6 +375,7 @@ void ERR_load_DH_strings(void);
 # define DH_R_BN_ERROR                                    106
 # define DH_R_DECODE_ERROR                                104
 # define DH_R_INVALID_PUBKEY                              102
+# define DH_R_KDF_PARAMETER_ERROR                         112
 # define DH_R_KEYS_NOT_SET                                108
 # define DH_R_KEY_SIZE_TOO_SMALL                          110
 # define DH_R_MODULUS_TOO_LARGE                           103
@@ -280,6 +383,8 @@ void ERR_load_DH_strings(void);
 # define DH_R_NO_PARAMETERS_SET                           107
 # define DH_R_NO_PRIVATE_VALUE                            100
 # define DH_R_PARAMETER_ENCODING_ERROR                    105
+# define DH_R_PEER_KEY_ERROR                              113
+# define DH_R_SHARED_INFO_ERROR                           114
 
 #ifdef  __cplusplus
 }
index 873eb2e..ac72468 100644 (file)
 #include <openssl/dh.h>
 #include <openssl/bn.h>
 #include "asn1_locl.h"
+#ifndef OPENSSL_NO_CMS
+# include <openssl/cms.h>
+#endif
+
+extern const EVP_PKEY_ASN1_METHOD dhx_asn1_meth;
+
+/*
+ * i2d/d2i like DH parameter functions which use the appropriate routine for
+ * PKCS#3 DH or X9.42 DH.
+ */
+
+static DH *d2i_dhp(const EVP_PKEY *pkey, const unsigned char **pp,
+                   long length)
+{
+    if (pkey->ameth == &dhx_asn1_meth)
+        return d2i_DHxparams(NULL, pp, length);
+    return d2i_DHparams(NULL, pp, length);
+}
+
+static int i2d_dhp(const EVP_PKEY *pkey, const DH *a, unsigned char **pp)
+{
+    if (pkey->ameth == &dhx_asn1_meth)
+        return i2d_DHxparams(a, pp);
+    return i2d_DHparams(a, pp);
+}
 
 static void int_dh_free(EVP_PKEY *pkey)
 {
@@ -94,7 +119,7 @@ static int dh_pub_decode(EVP_PKEY *pkey, X509_PUBKEY *pubkey)
     pm = pstr->data;
     pmlen = pstr->length;
 
-    if (!(dh = d2i_DHparams(NULL, &pm, pmlen))) {
+    if (!(dh = d2i_dhp(pkey, &pm, pmlen))) {
         DHerr(DH_F_DH_PUB_DECODE, DH_R_DECODE_ERROR);
         goto err;
     }
@@ -111,7 +136,7 @@ static int dh_pub_decode(EVP_PKEY *pkey, X509_PUBKEY *pubkey)
     }
 
     ASN1_INTEGER_free(public_key);
-    EVP_PKEY_assign_DH(pkey, dh);
+    EVP_PKEY_assign(pkey, pkey->ameth->pkey_id, dh);
     return 1;
 
  err:
@@ -139,7 +164,7 @@ static int dh_pub_encode(X509_PUBKEY *pk, const EVP_PKEY *pkey)
         DHerr(DH_F_DH_PUB_ENCODE, ERR_R_MALLOC_FAILURE);
         goto err;
     }
-    str->length = i2d_DHparams(dh, &str->data);
+    str->length = i2d_dhp(pkey, dh, &str->data);
     if (str->length <= 0) {
         DHerr(DH_F_DH_PUB_ENCODE, ERR_R_MALLOC_FAILURE);
         goto err;
@@ -159,7 +184,7 @@ static int dh_pub_encode(X509_PUBKEY *pk, const EVP_PKEY *pkey)
         goto err;
     }
 
-    if (X509_PUBKEY_set0_param(pk, OBJ_nid2obj(EVP_PKEY_DH),
+    if (X509_PUBKEY_set0_param(pk, OBJ_nid2obj(pkey->ameth->pkey_id),
                                ptype, str, penc, penclen))
         return 1;
 
@@ -204,7 +229,7 @@ static int dh_priv_decode(EVP_PKEY *pkey, PKCS8_PRIV_KEY_INFO *p8)
     pstr = pval;
     pm = pstr->data;
     pmlen = pstr->length;
-    if (!(dh = d2i_DHparams(NULL, &pm, pmlen)))
+    if (!(dh = d2i_dhp(pkey, &pm, pmlen)))
         goto decerr;
     /* We have parameters now set private key */
     if (!(dh->priv_key = ASN1_INTEGER_to_BN(privkey, NULL))) {
@@ -215,7 +240,7 @@ static int dh_priv_decode(EVP_PKEY *pkey, PKCS8_PRIV_KEY_INFO *p8)
     if (!DH_generate_key(dh))
         goto dherr;
 
-    EVP_PKEY_assign_DH(pkey, dh);
+    EVP_PKEY_assign(pkey, pkey->ameth->pkey_id, dh);
 
     ASN1_STRING_clear_free(privkey);
 
@@ -243,7 +268,7 @@ static int dh_priv_encode(PKCS8_PRIV_KEY_INFO *p8, const EVP_PKEY *pkey)
         goto err;
     }
 
-    params->length = i2d_DHparams(pkey->pkey.dh, &params->data);
+    params->length = i2d_dhp(pkey, pkey->pkey.dh, &params->data);
     if (params->length <= 0) {
         DHerr(DH_F_DH_PRIV_ENCODE, ERR_R_MALLOC_FAILURE);
         goto err;
@@ -263,7 +288,7 @@ static int dh_priv_encode(PKCS8_PRIV_KEY_INFO *p8, const EVP_PKEY *pkey)
     ASN1_STRING_clear_free(prkey);
     prkey = NULL;
 
-    if (!PKCS8_pkey_set0(p8, OBJ_nid2obj(NID_dhKeyAgreement), 0,
+    if (!PKCS8_pkey_set0(p8, OBJ_nid2obj(pkey->ameth->pkey_id), 0,
                          V_ASN1_SEQUENCE, params, dp, dplen))
         goto err;
 
@@ -292,17 +317,17 @@ static int dh_param_decode(EVP_PKEY *pkey,
                            const unsigned char **pder, int derlen)
 {
     DH *dh;
-    if (!(dh = d2i_DHparams(NULL, pder, derlen))) {
+    if (!(dh = d2i_dhp(pkey, pder, derlen))) {
         DHerr(DH_F_DH_PARAM_DECODE, ERR_R_DH_LIB);
         return 0;
     }
-    EVP_PKEY_assign_DH(pkey, dh);
+    EVP_PKEY_assign(pkey, pkey->ameth->pkey_id, dh);
     return 1;
 }
 
 static int dh_param_encode(const EVP_PKEY *pkey, unsigned char **pder)
 {
-    return i2d_DHparams(pkey->pkey.dh, pder);
+    return i2d_dhp(pkey, pkey->pkey.dh, pder);
 }
 
 static int do_dh_print(BIO *bp, const DH *x, int indent,
@@ -334,15 +359,18 @@ static int do_dh_print(BIO *bp, const DH *x, int indent,
     }
 
     update_buflen(x->g, &buf_len);
+    update_buflen(x->q, &buf_len);
+    update_buflen(x->j, &buf_len);
+    update_buflen(x->counter, &buf_len);
     update_buflen(pub_key, &buf_len);
     update_buflen(priv_key, &buf_len);
 
     if (ptype == 2)
-        ktype = "PKCS#3 DH Private-Key";
+        ktype = "DH Private-Key";
     else if (ptype == 1)
-        ktype = "PKCS#3 DH Public-Key";
+        ktype = "DH Public-Key";
     else
-        ktype = "PKCS#3 DH Parameters";
+        ktype = "DH Parameters";
 
     m = OPENSSL_malloc(buf_len + 10);
     if (m == NULL) {
@@ -364,6 +392,29 @@ static int do_dh_print(BIO *bp, const DH *x, int indent,
         goto err;
     if (!ASN1_bn_print(bp, "generator:", x->g, m, indent))
         goto err;
+    if (x->q && !ASN1_bn_print(bp, "subgroup order:", x->q, m, indent))
+        goto err;
+    if (x->j && !ASN1_bn_print(bp, "subgroup factor:", x->j, m, indent))
+        goto err;
+    if (x->seed) {
+        int i;
+        BIO_indent(bp, indent, 128);
+        BIO_puts(bp, "seed:");
+        for (i = 0; i < x->seedlen; i++) {
+            if ((i % 15) == 0) {
+                if (BIO_puts(bp, "\n") <= 0
+                    || !BIO_indent(bp, indent + 4, 128))
+                    goto err;
+            }
+            if (BIO_printf(bp, "%02x%s", x->seed[i],
+                           ((i + 1) == x->seedlen) ? "" : ":") <= 0)
+                goto err;
+        }
+        if (BIO_write(bp, "\n", 1) <= 0)
+            return (0);
+    }
+    if (x->counter && !ASN1_bn_print(bp, "counter:", x->counter, m, indent))
+        goto err;
     if (x->length != 0) {
         BIO_indent(bp, indent, 128);
         if (BIO_printf(bp, "recommended-private-length: %d bits\n",
@@ -396,29 +447,76 @@ static int dh_cmp_parameters(const EVP_PKEY *a, const EVP_PKEY *b)
     if (BN_cmp(a->pkey.dh->p, b->pkey.dh->p) ||
         BN_cmp(a->pkey.dh->g, b->pkey.dh->g))
         return 0;
-    else
-        return 1;
+    else if (a->ameth == &dhx_asn1_meth) {
+        if (BN_cmp(a->pkey.dh->q, b->pkey.dh->q))
+            return 0;
+    }
+    return 1;
 }
 
-static int dh_copy_parameters(EVP_PKEY *to, const EVP_PKEY *from)
+static int int_dh_bn_cpy(BIGNUM **dst, const BIGNUM *src)
 {
     BIGNUM *a;
+    if (src) {
+        a = BN_dup(src);
+        if (!a)
+            return 0;
+    } else
+        a = NULL;
+    if (*dst)
+        BN_free(*dst);
+    *dst = a;
+    return 1;
+}
 
-    if ((a = BN_dup(from->pkey.dh->p)) == NULL)
+static int int_dh_param_copy(DH *to, const DH *from, int is_x942)
+{
+    if (is_x942 == -1)
+        is_x942 = ! !from->q;
+    if (!int_dh_bn_cpy(&to->p, from->p))
         return 0;
-    if (to->pkey.dh->p != NULL)
-        BN_free(to->pkey.dh->p);
-    to->pkey.dh->p = a;
-
-    if ((a = BN_dup(from->pkey.dh->g)) == NULL)
+    if (!int_dh_bn_cpy(&to->g, from->g))
         return 0;
-    if (to->pkey.dh->g != NULL)
-        BN_free(to->pkey.dh->g);
-    to->pkey.dh->g = a;
-
+    if (is_x942) {
+        if (!int_dh_bn_cpy(&to->q, from->q))
+            return 0;
+        if (!int_dh_bn_cpy(&to->j, from->j))
+            return 0;
+        if (to->seed) {
+            OPENSSL_free(to->seed);
+            to->seed = NULL;
+            to->seedlen = 0;
+        }
+        if (from->seed) {
+            to->seed = BUF_memdup(from->seed, from->seedlen);
+            if (!to->seed)
+                return 0;
+            to->seedlen = from->seedlen;
+        }
+    } else
+        to->length = from->length;
     return 1;
 }
 
+DH *DHparams_dup(DH *dh)
+{
+    DH *ret;
+    ret = DH_new();
+    if (!ret)
+        return NULL;
+    if (!int_dh_param_copy(ret, dh, -1)) {
+        DH_free(ret);
+        return NULL;
+    }
+    return ret;
+}
+
+static int dh_copy_parameters(EVP_PKEY *to, const EVP_PKEY *from)
+{
+    return int_dh_param_copy(to->pkey.dh, from->pkey.dh,
+                             from->ameth == &dhx_asn1_meth);
+}
+
 static int dh_missing_parameters(const EVP_PKEY *a)
 {
     if (!a->pkey.dh->p || !a->pkey.dh->g)
@@ -459,6 +557,33 @@ int DHparams_print(BIO *bp, const DH *x)
     return do_dh_print(bp, x, 4, NULL, 0);
 }
 
+#ifndef OPENSSL_NO_CMS
+static int dh_cms_decrypt(CMS_RecipientInfo *ri);
+static int dh_cms_encrypt(CMS_RecipientInfo *ri);
+#endif
+
+static int dh_pkey_ctrl(EVP_PKEY *pkey, int op, long arg1, void *arg2)
+{
+    switch (op) {
+#ifndef OPENSSL_NO_CMS
+
+    case ASN1_PKEY_CTRL_CMS_ENVELOPE:
+        if (arg1 == 1)
+            return dh_cms_decrypt(arg2);
+        else if (arg1 == 0)
+            return dh_cms_encrypt(arg2);
+        return -2;
+
+    case ASN1_PKEY_CTRL_CMS_RI_TYPE:
+        *(int *)arg2 = CMS_RECIPINFO_AGREE;
+        return 1;
+#endif
+    default:
+        return -2;
+    }
+
+}
+
 const EVP_PKEY_ASN1_METHOD dh_asn1_meth = {
     EVP_PKEY_DH,
     EVP_PKEY_DH,
@@ -490,3 +615,343 @@ const EVP_PKEY_ASN1_METHOD dh_asn1_meth = {
     int_dh_free,
     0
 };
+
+const EVP_PKEY_ASN1_METHOD dhx_asn1_meth = {
+    EVP_PKEY_DHX,
+    EVP_PKEY_DHX,
+    0,
+
+    "X9.42 DH",
+    "OpenSSL X9.42 DH method",
+
+    dh_pub_decode,
+    dh_pub_encode,
+    dh_pub_cmp,
+    dh_public_print,
+
+    dh_priv_decode,
+    dh_priv_encode,
+    dh_private_print,
+
+    int_dh_size,
+    dh_bits,
+
+    dh_param_decode,
+    dh_param_encode,
+    dh_missing_parameters,
+    dh_copy_parameters,
+    dh_cmp_parameters,
+    dh_param_print,
+    0,
+
+    int_dh_free,
+    dh_pkey_ctrl
+};
+
+#ifndef OPENSSL_NO_CMS
+
+static int dh_cms_set_peerkey(EVP_PKEY_CTX *pctx,
+                              X509_ALGOR *alg, ASN1_BIT_STRING *pubkey)
+{
+    ASN1_OBJECT *aoid;
+    int atype;
+    void *aval;
+    ASN1_INTEGER *public_key = NULL;
+    int rv = 0;
+    EVP_PKEY *pkpeer = NULL, *pk = NULL;
+    DH *dhpeer = NULL;
+    const unsigned char *p;
+    int plen;
+
+    X509_ALGOR_get0(&aoid, &atype, &aval, alg);
+    if (OBJ_obj2nid(aoid) != NID_dhpublicnumber)
+        goto err;
+    /* Only absent parameters allowed in RFC XXXX */
+    if (atype != V_ASN1_UNDEF && atype == V_ASN1_NULL)
+        goto err;
+
+    pk = EVP_PKEY_CTX_get0_pkey(pctx);
+    if (!pk)
+        goto err;
+    if (pk->type != EVP_PKEY_DHX)
+        goto err;
+    /* Get parameters from parent key */
+    dhpeer = DHparams_dup(pk->pkey.dh);
+    /* We have parameters now set public key */
+    plen = ASN1_STRING_length(pubkey);
+    p = ASN1_STRING_data(pubkey);
+    if (!p || !plen)
+        goto err;
+
+    if (!(public_key = d2i_ASN1_INTEGER(NULL, &p, plen))) {
+        DHerr(DH_F_DH_CMS_SET_PEERKEY, DH_R_DECODE_ERROR);
+        goto err;
+    }
+
+    /* We have parameters now set public key */
+    if (!(dhpeer->pub_key = ASN1_INTEGER_to_BN(public_key, NULL))) {
+        DHerr(DH_F_DH_CMS_SET_PEERKEY, DH_R_BN_DECODE_ERROR);
+        goto err;
+    }
+
+    pkpeer = EVP_PKEY_new();
+    if (!pkpeer)
+        goto err;
+    EVP_PKEY_assign(pkpeer, pk->ameth->pkey_id, dhpeer);
+    dhpeer = NULL;
+    if (EVP_PKEY_derive_set_peer(pctx, pkpeer) > 0)
+        rv = 1;
+ err:
+    if (public_key)
+        ASN1_INTEGER_free(public_key);
+    if (pkpeer)
+        EVP_PKEY_free(pkpeer);
+    if (dhpeer)
+        DH_free(dhpeer);
+    return rv;
+}
+
+static int dh_cms_set_shared_info(EVP_PKEY_CTX *pctx, CMS_RecipientInfo *ri)
+{
+    int rv = 0;
+
+    X509_ALGOR *alg, *kekalg = NULL;
+    ASN1_OCTET_STRING *ukm;
+    const unsigned char *p;
+    unsigned char *dukm = NULL;
+    size_t dukmlen = 0;
+    int keylen, plen;
+    const EVP_CIPHER *kekcipher;
+    EVP_CIPHER_CTX *kekctx;
+
+    if (!CMS_RecipientInfo_kari_get0_alg(ri, &alg, &ukm))
+        goto err;
+
+    /*
+     * For DH we only have one OID permissible. If ever any more get defined
+     * we will need something cleverer.
+     */
+    if (OBJ_obj2nid(alg->algorithm) != NID_id_smime_alg_ESDH) {
+        DHerr(DH_F_DH_CMS_SET_SHARED_INFO, DH_R_KDF_PARAMETER_ERROR);
+        goto err;
+    }
+
+    if (EVP_PKEY_CTX_set_dh_kdf_type(pctx, EVP_PKEY_DH_KDF_X9_42) <= 0)
+        goto err;
+
+    if (EVP_PKEY_CTX_set_dh_kdf_md(pctx, EVP_sha1()) <= 0)
+        goto err;
+
+    if (alg->parameter->type != V_ASN1_SEQUENCE)
+        goto err;
+
+    p = alg->parameter->value.sequence->data;
+    plen = alg->parameter->value.sequence->length;
+    kekalg = d2i_X509_ALGOR(NULL, &p, plen);
+    if (!kekalg)
+        goto err;
+    kekctx = CMS_RecipientInfo_kari_get0_ctx(ri);
+    if (!kekctx)
+        goto err;
+    kekcipher = EVP_get_cipherbyobj(kekalg->algorithm);
+    if (!kekcipher || EVP_CIPHER_mode(kekcipher) != EVP_CIPH_WRAP_MODE)
+        goto err;
+    if (!EVP_EncryptInit_ex(kekctx, kekcipher, NULL, NULL, NULL))
+        goto err;
+    if (EVP_CIPHER_asn1_to_param(kekctx, kekalg->parameter) <= 0)
+        goto err;
+
+    keylen = EVP_CIPHER_CTX_key_length(kekctx);
+    if (EVP_PKEY_CTX_set_dh_kdf_outlen(pctx, keylen) <= 0)
+        goto err;
+    /* Use OBJ_nid2obj to ensure we use built in OID that isn't freed */
+    if (EVP_PKEY_CTX_set0_dh_kdf_oid(pctx,
+                                     OBJ_nid2obj(EVP_CIPHER_type(kekcipher)))
+        <= 0)
+        goto err;
+
+    if (ukm) {
+        dukmlen = ASN1_STRING_length(ukm);
+        dukm = BUF_memdup(ASN1_STRING_data(ukm), dukmlen);
+        if (!dukm)
+            goto err;
+    }
+
+    if (EVP_PKEY_CTX_set0_dh_kdf_ukm(pctx, dukm, dukmlen) <= 0)
+        goto err;
+    dukm = NULL;
+
+    rv = 1;
+ err:
+    if (kekalg)
+        X509_ALGOR_free(kekalg);
+    if (dukm)
+        OPENSSL_free(dukm);
+    return rv;
+}
+
+static int dh_cms_decrypt(CMS_RecipientInfo *ri)
+{
+    EVP_PKEY_CTX *pctx;
+    pctx = CMS_RecipientInfo_get0_pkey_ctx(ri);
+    if (!pctx)
+        return 0;
+    /* See if we need to set peer key */
+    if (!EVP_PKEY_CTX_get0_peerkey(pctx)) {
+        X509_ALGOR *alg;
+        ASN1_BIT_STRING *pubkey;
+        if (!CMS_RecipientInfo_kari_get0_orig_id(ri, &alg, &pubkey,
+                                                 NULL, NULL, NULL))
+            return 0;
+        if (!alg || !pubkey)
+            return 0;
+        if (!dh_cms_set_peerkey(pctx, alg, pubkey)) {
+            DHerr(DH_F_DH_CMS_DECRYPT, DH_R_PEER_KEY_ERROR);
+            return 0;
+        }
+    }
+    /* Set DH derivation parameters and initialise unwrap context */
+    if (!dh_cms_set_shared_info(pctx, ri)) {
+        DHerr(DH_F_DH_CMS_DECRYPT, DH_R_SHARED_INFO_ERROR);
+        return 0;
+    }
+    return 1;
+}
+
+static int dh_cms_encrypt(CMS_RecipientInfo *ri)
+{
+    EVP_PKEY_CTX *pctx;
+    EVP_PKEY *pkey;
+    EVP_CIPHER_CTX *ctx;
+    int keylen;
+    X509_ALGOR *talg, *wrap_alg = NULL;
+    ASN1_OBJECT *aoid;
+    ASN1_BIT_STRING *pubkey;
+    ASN1_STRING *wrap_str;
+    ASN1_OCTET_STRING *ukm;
+    unsigned char *penc = NULL, *dukm = NULL;
+    int penclen;
+    size_t dukmlen = 0;
+    int rv = 0;
+    int kdf_type, wrap_nid;
+    const EVP_MD *kdf_md;
+    pctx = CMS_RecipientInfo_get0_pkey_ctx(ri);
+    if (!pctx)
+        return 0;
+    /* Get ephemeral key */
+    pkey = EVP_PKEY_CTX_get0_pkey(pctx);
+    if (!CMS_RecipientInfo_kari_get0_orig_id(ri, &talg, &pubkey,
+                                             NULL, NULL, NULL))
+        goto err;
+    X509_ALGOR_get0(&aoid, NULL, NULL, talg);
+    /* Is everything uninitialised? */
+    if (aoid == OBJ_nid2obj(NID_undef)) {
+        ASN1_INTEGER *pubk;
+        pubk = BN_to_ASN1_INTEGER(pkey->pkey.dh->pub_key, NULL);
+        if (!pubk)
+            goto err;
+        /* Set the key */
+
+        penclen = i2d_ASN1_INTEGER(pubk, &penc);
+        ASN1_INTEGER_free(pubk);
+        if (penclen <= 0)
+            goto err;
+        ASN1_STRING_set0(pubkey, penc, penclen);
+        pubkey->flags &= ~(ASN1_STRING_FLAG_BITS_LEFT | 0x07);
+        pubkey->flags |= ASN1_STRING_FLAG_BITS_LEFT;
+
+        penc = NULL;
+        X509_ALGOR_set0(talg, OBJ_nid2obj(NID_dhpublicnumber),
+                        V_ASN1_UNDEF, NULL);
+    }
+
+    /* See if custom paraneters set */
+    kdf_type = EVP_PKEY_CTX_get_dh_kdf_type(pctx);
+    if (kdf_type <= 0)
+        goto err;
+    if (!EVP_PKEY_CTX_get_dh_kdf_md(pctx, &kdf_md))
+        goto err;
+
+    if (kdf_type == EVP_PKEY_DH_KDF_NONE) {
+        kdf_type = EVP_PKEY_DH_KDF_X9_42;
+        if (EVP_PKEY_CTX_set_dh_kdf_type(pctx, kdf_type) <= 0)
+            goto err;
+    } else if (kdf_type != EVP_PKEY_DH_KDF_X9_42)
+        /* Unknown KDF */
+        goto err;
+    if (kdf_md == NULL) {
+        /* Only SHA1 supported */
+        kdf_md = EVP_sha1();
+        if (EVP_PKEY_CTX_set_dh_kdf_md(pctx, kdf_md) <= 0)
+            goto err;
+    } else if (EVP_MD_type(kdf_md) != NID_sha1)
+        /* Unsupported digest */
+        goto err;
+
+    if (!CMS_RecipientInfo_kari_get0_alg(ri, &talg, &ukm))
+        goto err;
+
+    /* Get wrap NID */
+    ctx = CMS_RecipientInfo_kari_get0_ctx(ri);
+    wrap_nid = EVP_CIPHER_CTX_type(ctx);
+    if (EVP_PKEY_CTX_set0_dh_kdf_oid(pctx, OBJ_nid2obj(wrap_nid)) <= 0)
+        goto err;
+    keylen = EVP_CIPHER_CTX_key_length(ctx);
+
+    /* Package wrap algorithm in an AlgorithmIdentifier */
+
+    wrap_alg = X509_ALGOR_new();
+    if (!wrap_alg)
+        goto err;
+    wrap_alg->algorithm = OBJ_nid2obj(wrap_nid);
+    wrap_alg->parameter = ASN1_TYPE_new();
+    if (!wrap_alg->parameter)
+        goto err;
+    if (EVP_CIPHER_param_to_asn1(ctx, wrap_alg->parameter) <= 0)
+        goto err;
+    if (ASN1_TYPE_get(wrap_alg->parameter) == NID_undef) {
+        ASN1_TYPE_free(wrap_alg->parameter);
+        wrap_alg->parameter = NULL;
+    }
+
+    if (EVP_PKEY_CTX_set_dh_kdf_outlen(pctx, keylen) <= 0)
+        goto err;
+
+    if (ukm) {
+        dukmlen = ASN1_STRING_length(ukm);
+        dukm = BUF_memdup(ASN1_STRING_data(ukm), dukmlen);
+        if (!dukm)
+            goto err;
+    }
+
+    if (EVP_PKEY_CTX_set0_dh_kdf_ukm(pctx, dukm, dukmlen) <= 0)
+        goto err;
+    dukm = NULL;
+
+    /*
+     * Now need to wrap encoding of wrap AlgorithmIdentifier into parameter
+     * of another AlgorithmIdentifier.
+     */
+    penc = NULL;
+    penclen = i2d_X509_ALGOR(wrap_alg, &penc);
+    if (!penc || !penclen)
+        goto err;
+    wrap_str = ASN1_STRING_new();
+    if (!wrap_str)
+        goto err;
+    ASN1_STRING_set0(wrap_str, penc, penclen);
+    penc = NULL;
+    X509_ALGOR_set0(talg, OBJ_nid2obj(NID_id_smime_alg_ESDH),
+                    V_ASN1_SEQUENCE, wrap_str);
+
+    rv = 1;
+
+ err:
+    if (penc)
+        OPENSSL_free(penc);
+    if (wrap_alg)
+        X509_ALGOR_free(wrap_alg);
+    return rv;
+}
+
+#endif
index e6ee3cf..f470214 100644 (file)
@@ -89,7 +89,101 @@ ASN1_SEQUENCE_cb(DHparams, dh_cb) = {
 
 IMPLEMENT_ASN1_ENCODE_FUNCTIONS_const_fname(DH, DHparams, DHparams)
 
-DH *DHparams_dup(DH *dh)
+/*
+ * Internal only structures for handling X9.42 DH: this gets translated to or
+ * from a DH structure straight away.
+ */
+
+typedef struct {
+    ASN1_BIT_STRING *seed;
+    BIGNUM *counter;
+} int_dhvparams;
+
+typedef struct {
+    BIGNUM *p;
+    BIGNUM *q;
+    BIGNUM *g;
+    BIGNUM *j;
+    int_dhvparams *vparams;
+} int_dhx942_dh;
+
+ASN1_SEQUENCE(DHvparams) = {
+        ASN1_SIMPLE(int_dhvparams, seed, ASN1_BIT_STRING),
+        ASN1_SIMPLE(int_dhvparams, counter, BIGNUM)
+} ASN1_SEQUENCE_END_name(int_dhvparams, DHvparams)
+
+ASN1_SEQUENCE(DHxparams) = {
+        ASN1_SIMPLE(int_dhx942_dh, p, BIGNUM),
+        ASN1_SIMPLE(int_dhx942_dh, g, BIGNUM),
+        ASN1_SIMPLE(int_dhx942_dh, q, BIGNUM),
+        ASN1_OPT(int_dhx942_dh, j, BIGNUM),
+        ASN1_OPT(int_dhx942_dh, vparams, DHvparams),
+} ASN1_SEQUENCE_END_name(int_dhx942_dh, DHxparams)
+
+int_dhx942_dh *d2i_int_dhx(int_dhx942_dh **a,
+                           const unsigned char **pp, long length);
+int i2d_int_dhx(const int_dhx942_dh *a, unsigned char **pp);
+
+IMPLEMENT_ASN1_ENCODE_FUNCTIONS_const_fname(int_dhx942_dh, DHxparams, int_dhx)
+
+/* Application leve function: read in X9.42 DH parameters into DH structure */
+
+DH *d2i_DHxparams(DH **a, const unsigned char **pp, long length)
 {
-    return ASN1_item_dup(ASN1_ITEM_rptr(DHparams), dh);
+    int_dhx942_dh *dhx = NULL;
+    DH *dh = NULL;
+    dh = DH_new();
+    if (!dh)
+        return NULL;
+    dhx = d2i_int_dhx(NULL, pp, length);
+    if (!dhx) {
+        DH_free(dh);
+        return NULL;
+    }
+
+    if (a) {
+        if (*a)
+            DH_free(*a);
+        *a = dh;
+    }
+
+    dh->p = dhx->p;
+    dh->q = dhx->q;
+    dh->g = dhx->g;
+    dh->j = dhx->j;
+
+    if (dhx->vparams) {
+        dh->seed = dhx->vparams->seed->data;
+        dh->seedlen = dhx->vparams->seed->length;
+        dh->counter = dhx->vparams->counter;
+        dhx->vparams->seed->data = NULL;
+        ASN1_BIT_STRING_free(dhx->vparams->seed);
+        OPENSSL_free(dhx->vparams);
+        dhx->vparams = NULL;
+    }
+
+    OPENSSL_free(dhx);
+    return dh;
+}
+
+int i2d_DHxparams(const DH *dh, unsigned char **pp)
+{
+    int_dhx942_dh dhx;
+    int_dhvparams dhv;
+    ASN1_BIT_STRING bs;
+    dhx.p = dh->p;
+    dhx.g = dh->g;
+    dhx.q = dh->q;
+    dhx.j = dh->j;
+    if (dh->counter && dh->seed && dh->seedlen > 0) {
+        bs.flags = ASN1_STRING_FLAG_BITS_LEFT;
+        bs.data = dh->seed;
+        bs.length = dh->seedlen;
+        dhv.seed = &bs;
+        dhv.counter = dh->counter;
+        dhx.vparams = &dhv;
+    } else
+        dhx.vparams = NULL;
+
+    return i2d_int_dhx(&dhx, pp);
 }
index c39ed97..347467c 100644 (file)
@@ -76,17 +76,43 @@ int DH_check(const DH *dh, int *ret)
     int ok = 0;
     BN_CTX *ctx = NULL;
     BN_ULONG l;
-    BIGNUM *q = NULL;
+    BIGNUM *t1 = NULL, *t2 = NULL;
 
     *ret = 0;
     ctx = BN_CTX_new();
     if (ctx == NULL)
         goto err;
-    q = BN_new();
-    if (q == NULL)
+    BN_CTX_start(ctx);
+    t1 = BN_CTX_get(ctx);
+    if (t1 == NULL)
+        goto err;
+    t2 = BN_CTX_get(ctx);
+    if (t2 == NULL)
         goto err;
 
-    if (BN_is_word(dh->g, DH_GENERATOR_2)) {
+    if (dh->q) {
+        if (BN_cmp(dh->g, BN_value_one()) <= 0)
+            *ret |= DH_NOT_SUITABLE_GENERATOR;
+        else if (BN_cmp(dh->g, dh->p) >= 0)
+            *ret |= DH_NOT_SUITABLE_GENERATOR;
+        else {
+            /* Check g^q == 1 mod p */
+            if (!BN_mod_exp(t1, dh->g, dh->q, dh->p, ctx))
+                goto err;
+            if (!BN_is_one(t1))
+                *ret |= DH_NOT_SUITABLE_GENERATOR;
+        }
+        if (!BN_is_prime_ex(dh->q, BN_prime_checks, ctx, NULL))
+            *ret |= DH_CHECK_Q_NOT_PRIME;
+        /* Check p == 1 mod q  i.e. q divides p - 1 */
+        if (!BN_div(t1, t2, dh->p, dh->q, ctx))
+            goto err;
+        if (!BN_is_one(t2))
+            *ret |= DH_CHECK_INVALID_Q_VALUE;
+        if (dh->j && BN_cmp(dh->j, t1))
+            *ret |= DH_CHECK_INVALID_J_VALUE;
+
+    } else if (BN_is_word(dh->g, DH_GENERATOR_2)) {
         l = BN_mod_word(dh->p, 24);
         if (l != 11)
             *ret |= DH_NOT_SUITABLE_GENERATOR;
@@ -107,18 +133,18 @@ int DH_check(const DH *dh, int *ret)
 
     if (!BN_is_prime_ex(dh->p, BN_prime_checks, ctx, NULL))
         *ret |= DH_CHECK_P_NOT_PRIME;
-    else {
-        if (!BN_rshift1(q, dh->p))
+    else if (!dh->q) {
+        if (!BN_rshift1(t1, dh->p))
             goto err;
-        if (!BN_is_prime_ex(q, BN_prime_checks, ctx, NULL))
+        if (!BN_is_prime_ex(t1, BN_prime_checks, ctx, NULL))
             *ret |= DH_CHECK_P_NOT_SAFE_PRIME;
     }
     ok = 1;
  err:
-    if (ctx != NULL)
+    if (ctx != NULL) {
+        BN_CTX_end(ctx);
         BN_CTX_free(ctx);
-    if (q != NULL)
-        BN_free(q);
+    }
     return (ok);
 }
 
index 6ed5eb7..b890cca 100644 (file)
@@ -1,6 +1,6 @@
 /* crypto/dh/dh_err.c */
 /* ====================================================================
- * Copyright (c) 1999-2011 The OpenSSL Project.  All rights reserved.
+ * Copyright (c) 1999-2013 The OpenSSL Project.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -73,6 +73,9 @@ static ERR_STRING_DATA DH_str_functs[] = {
     {ERR_FUNC(DH_F_COMPUTE_KEY), "COMPUTE_KEY"},
     {ERR_FUNC(DH_F_DHPARAMS_PRINT_FP), "DHparams_print_fp"},
     {ERR_FUNC(DH_F_DH_BUILTIN_GENPARAMS), "DH_BUILTIN_GENPARAMS"},
+    {ERR_FUNC(DH_F_DH_CMS_DECRYPT), "DH_CMS_DECRYPT"},
+    {ERR_FUNC(DH_F_DH_CMS_SET_PEERKEY), "DH_CMS_SET_PEERKEY"},
+    {ERR_FUNC(DH_F_DH_CMS_SET_SHARED_INFO), "DH_CMS_SET_SHARED_INFO"},
     {ERR_FUNC(DH_F_DH_COMPUTE_KEY), "DH_compute_key"},
     {ERR_FUNC(DH_F_DH_GENERATE_KEY), "DH_generate_key"},
     {ERR_FUNC(DH_F_DH_GENERATE_PARAMETERS_EX), "DH_generate_parameters_ex"},
@@ -96,6 +99,7 @@ static ERR_STRING_DATA DH_str_reasons[] = {
     {ERR_REASON(DH_R_BN_ERROR), "bn error"},
     {ERR_REASON(DH_R_DECODE_ERROR), "decode error"},
     {ERR_REASON(DH_R_INVALID_PUBKEY), "invalid public key"},
+    {ERR_REASON(DH_R_KDF_PARAMETER_ERROR), "kdf parameter error"},
     {ERR_REASON(DH_R_KEYS_NOT_SET), "keys not set"},
     {ERR_REASON(DH_R_KEY_SIZE_TOO_SMALL), "key size too small"},
     {ERR_REASON(DH_R_MODULUS_TOO_LARGE), "modulus too large"},
@@ -103,6 +107,8 @@ static ERR_STRING_DATA DH_str_reasons[] = {
     {ERR_REASON(DH_R_NO_PARAMETERS_SET), "no parameters set"},
     {ERR_REASON(DH_R_NO_PRIVATE_VALUE), "no private value"},
     {ERR_REASON(DH_R_PARAMETER_ENCODING_ERROR), "parameter encoding error"},
+    {ERR_REASON(DH_R_PEER_KEY_ERROR), "peer key error"},
+    {ERR_REASON(DH_R_SHARED_INFO_ERROR), "shared info error"},
     {0, NULL}
 };
 
diff --git a/crypto/dh/dh_kdf.c b/crypto/dh/dh_kdf.c
new file mode 100644 (file)
index 0000000..a882cb2
--- /dev/null
@@ -0,0 +1,187 @@
+/* crypto/dh/dh_kdf.c */
+/*
+ * Written by Stephen Henson for the OpenSSL project.
+ */
+/* ====================================================================
+ * Copyright (c) 2013 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ */
+
+#include <string.h>
+#include <openssl/dh.h>
+#include <openssl/evp.h>
+#include <openssl/asn1.h>
+#include <openssl/cms.h>
+
+/* Key derivation from X9.42/RFC2631 */
+
+#define DH_KDF_MAX      (1L << 30)
+
+/* Skip past an ASN1 structure: for OBJECT skip content octets too */
+
+static int skip_asn1(unsigned char **pp, long *plen, int exptag)
+{
+    const unsigned char *q = *pp;
+    int i, tag, xclass;
+    long tmplen;
+    i = ASN1_get_object(&q, &tmplen, &tag, &xclass, *plen);
+    if (i & 0x80)
+        return 0;
+    if (tag != exptag || xclass != V_ASN1_UNIVERSAL)
+        return 0;
+    if (tag == V_ASN1_OBJECT)
+        q += tmplen;
+    *plen -= q - *pp;
+    *pp = (unsigned char *)q;
+    return 1;
+}
+
+/*
+ * Encode the DH shared info structure, return an offset to the counter value
+ * so we can update the structure without reencoding it.
+ */
+
+static int dh_sharedinfo_encode(unsigned char **pder, unsigned char **pctr,
+                                ASN1_OBJECT *key_oid, size_t outlen,
+                                const unsigned char *ukm, size_t ukmlen)
+{
+    unsigned char *p;
+    int derlen;
+    long tlen;
+    /* "magic" value to check offset is sane */
+    static unsigned char ctr[4] = { 0xF3, 0x17, 0x22, 0x53 };
+    X509_ALGOR atmp;
+    ASN1_OCTET_STRING ctr_oct, ukm_oct, *pukm_oct;
+    ASN1_TYPE ctr_atype;
+    if (ukmlen > DH_KDF_MAX || outlen > DH_KDF_MAX)
+        return 0;
+    ctr_oct.data = ctr;
+    ctr_oct.length = 4;
+    ctr_oct.flags = 0;
+    ctr_oct.type = V_ASN1_OCTET_STRING;
+    ctr_atype.type = V_ASN1_OCTET_STRING;
+    ctr_atype.value.octet_string = &ctr_oct;
+    atmp.algorithm = key_oid;
+    atmp.parameter = &ctr_atype;
+    if (ukm) {
+        ukm_oct.type = V_ASN1_OCTET_STRING;
+        ukm_oct.flags = 0;
+        ukm_oct.data = (unsigned char *)ukm;
+        ukm_oct.length = ukmlen;
+        pukm_oct = &ukm_oct;
+    } else
+        pukm_oct = NULL;
+    derlen = CMS_SharedInfo_encode(pder, &atmp, pukm_oct, outlen);
+    if (derlen <= 0)
+        return 0;
+    p = *pder;
+    tlen = derlen;
+    if (!skip_asn1(&p, &tlen, V_ASN1_SEQUENCE))
+        return 0;
+    if (!skip_asn1(&p, &tlen, V_ASN1_SEQUENCE))
+        return 0;
+    if (!skip_asn1(&p, &tlen, V_ASN1_OBJECT))
+        return 0;
+    if (!skip_asn1(&p, &tlen, V_ASN1_OCTET_STRING))
+        return 0;
+    if (CRYPTO_memcmp(p, ctr, 4))
+        return 0;
+    *pctr = p;
+    return derlen;
+}
+
+int DH_KDF_X9_42(unsigned char *out, size_t outlen,
+                 const unsigned char *Z, size_t Zlen,
+                 ASN1_OBJECT *key_oid,
+                 const unsigned char *ukm, size_t ukmlen, const EVP_MD *md)
+{
+    EVP_MD_CTX mctx;
+    int rv = 0;
+    unsigned int i;
+    size_t mdlen;
+    unsigned char *der = NULL, *ctr;
+    int derlen;
+    if (Zlen > DH_KDF_MAX)
+        return 0;
+    mdlen = EVP_MD_size(md);
+    EVP_MD_CTX_init(&mctx);
+    derlen = dh_sharedinfo_encode(&der, &ctr, key_oid, outlen, ukm, ukmlen);
+    if (derlen == 0)
+        goto err;
+    for (i = 1;; i++) {
+        unsigned char mtmp[EVP_MAX_MD_SIZE];
+        EVP_DigestInit_ex(&mctx, md, NULL);
+        if (!EVP_DigestUpdate(&mctx, Z, Zlen))
+            goto err;
+        ctr[3] = i & 0xFF;
+        ctr[2] = (i >> 8) & 0xFF;
+        ctr[1] = (i >> 16) & 0xFF;
+        ctr[0] = (i >> 24) & 0xFF;
+        if (!EVP_DigestUpdate(&mctx, der, derlen))
+            goto err;
+        if (outlen >= mdlen) {
+            if (!EVP_DigestFinal(&mctx, out, NULL))
+                goto err;
+            outlen -= mdlen;
+            if (outlen == 0)
+                break;
+            out += mdlen;
+        } else {
+            if (!EVP_DigestFinal(&mctx, mtmp, NULL))
+                goto err;
+            memcpy(out, mtmp, outlen);
+            OPENSSL_cleanse(mtmp, mdlen);
+            break;
+        }
+    }
+    rv = 1;
+ err:
+    if (der)
+        OPENSSL_free(der);
+    EVP_MD_CTX_cleanup(&mctx);
+    return rv;
+}
index 9e1d8e5..1d80fb2 100644 (file)
@@ -94,6 +94,20 @@ int DH_compute_key(unsigned char *key, const BIGNUM *pub_key, DH *dh)
     return dh->meth->compute_key(key, pub_key, dh);
 }
 
+int DH_compute_key_padded(unsigned char *key, const BIGNUM *pub_key, DH *dh)
+{
+    int rv, pad;
+    rv = dh->meth->compute_key(key, pub_key, dh);
+    if (rv <= 0)
+        return rv;
+    pad = BN_num_bytes(dh->p) - rv;
+    if (pad > 0) {
+        memmove(key + pad, key, rv);
+        memset(key, 0, pad);
+    }
+    return rv + pad;
+}
+
 static DH_METHOD dh_ossl = {
     "OpenSSL DH Method",
     generate_key,
index 65bc388..b58e3fa 100644 (file)
 #include <openssl/evp.h>
 #include <openssl/dh.h>
 #include <openssl/bn.h>
+#ifndef OPENSSL_NO_DSA
+# include <openssl/dsa.h>
+#endif
+#include <openssl/objects.h>
 #include "evp_locl.h"
 
 /* DH pkey context structure */
@@ -72,9 +76,23 @@ typedef struct {
     int prime_len;
     int generator;
     int use_dsa;
+    int subprime_len;
+    /* message digest used for parameter generation */
+    const EVP_MD *md;
+    int rfc5114_param;
     /* Keygen callback info */
     int gentmp[2];
-    /* message digest */
+    /* KDF (if any) to use for DH */
+    char kdf_type;
+    /* OID to use for KDF */
+    ASN1_OBJECT *kdf_oid;
+    /* Message digest to use for key derivation */
+    const EVP_MD *kdf_md;
+    /* User key material */
+    unsigned char *kdf_ukm;
+    size_t kdf_ukmlen;
+    /* KDF output length */
+    size_t kdf_outlen;
 } DH_PKEY_CTX;
 
 static int pkey_dh_init(EVP_PKEY_CTX *ctx)
@@ -84,8 +102,18 @@ static int pkey_dh_init(EVP_PKEY_CTX *ctx)
     if (!dctx)
         return 0;
     dctx->prime_len = 1024;
+    dctx->subprime_len = -1;
     dctx->generator = 2;
     dctx->use_dsa = 0;
+    dctx->md = NULL;
+    dctx->rfc5114_param = 0;
+
+    dctx->kdf_type = EVP_PKEY_DH_KDF_NONE;
+    dctx->kdf_oid = NULL;
+    dctx->kdf_md = NULL;
+    dctx->kdf_ukm = NULL;
+    dctx->kdf_ukmlen = 0;
+    dctx->kdf_outlen = 0;
 
     ctx->data = dctx;
     ctx->keygen_info = dctx->gentmp;
@@ -102,16 +130,35 @@ static int pkey_dh_copy(EVP_PKEY_CTX *dst, EVP_PKEY_CTX *src)
     sctx = src->data;
     dctx = dst->data;
     dctx->prime_len = sctx->prime_len;
+    dctx->subprime_len = sctx->subprime_len;
     dctx->generator = sctx->generator;
     dctx->use_dsa = sctx->use_dsa;
+    dctx->md = sctx->md;
+    dctx->rfc5114_param = sctx->rfc5114_param;
+
+    dctx->kdf_type = sctx->kdf_type;
+    dctx->kdf_oid = OBJ_dup(sctx->kdf_oid);
+    if (!dctx->kdf_oid)
+        return 0;
+    dctx->kdf_md = sctx->kdf_md;
+    if (dctx->kdf_ukm) {
+        dctx->kdf_ukm = BUF_memdup(sctx->kdf_ukm, sctx->kdf_ukmlen);
+        dctx->kdf_ukmlen = sctx->kdf_ukmlen;
+    }
+    dctx->kdf_outlen = sctx->kdf_outlen;
     return 1;
 }
 
 static void pkey_dh_cleanup(EVP_PKEY_CTX *ctx)
 {
     DH_PKEY_CTX *dctx = ctx->data;
-    if (dctx)
+    if (dctx) {
+        if (dctx->kdf_ukm)
+            OPENSSL_free(dctx->kdf_ukm);
+        if (dctx->kdf_oid)
+            ASN1_OBJECT_free(dctx->kdf_oid);
         OPENSSL_free(dctx);
+    }
 }
 
 static int pkey_dh_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2)
@@ -124,14 +171,89 @@ static int pkey_dh_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2)
         dctx->prime_len = p1;
         return 1;
 
+    case EVP_PKEY_CTRL_DH_PARAMGEN_SUBPRIME_LEN:
+        if (dctx->use_dsa == 0)
+            return -2;
+        dctx->subprime_len = p1;
+        return 1;
+
     case EVP_PKEY_CTRL_DH_PARAMGEN_GENERATOR:
+        if (dctx->use_dsa)
+            return -2;
         dctx->generator = p1;
         return 1;
 
+    case EVP_PKEY_CTRL_DH_PARAMGEN_TYPE:
+#ifdef OPENSSL_NO_DSA
+        if (p1 != 0)
+            return -2;
+#else
+        if (p1 < 0 || p1 > 2)
+            return -2;
+#endif
+        dctx->use_dsa = p1;
+        return 1;
+
+    case EVP_PKEY_CTRL_DH_RFC5114:
+        if (p1 < 1 || p1 > 3)
+            return -2;
+        dctx->rfc5114_param = p1;
+        return 1;
+
     case EVP_PKEY_CTRL_PEER_KEY:
         /* Default behaviour is OK */
         return 1;
 
+    case EVP_PKEY_CTRL_DH_KDF_TYPE:
+        if (p1 == -2)
+            return dctx->kdf_type;
+        if (p1 != EVP_PKEY_DH_KDF_NONE && p1 != EVP_PKEY_DH_KDF_X9_42)
+            return -2;
+        dctx->kdf_type = p1;
+        return 1;
+
+    case EVP_PKEY_CTRL_DH_KDF_MD:
+        dctx->kdf_md = p2;
+        return 1;
+
+    case EVP_PKEY_CTRL_GET_DH_KDF_MD:
+        *(const EVP_MD **)p2 = dctx->kdf_md;
+        return 1;
+
+    case EVP_PKEY_CTRL_DH_KDF_OUTLEN:
+        if (p1 <= 0)
+            return -2;
+        dctx->kdf_outlen = (size_t)p1;
+        return 1;
+
+    case EVP_PKEY_CTRL_GET_DH_KDF_OUTLEN:
+        *(int *)p2 = dctx->kdf_outlen;
+        return 1;
+
+    case EVP_PKEY_CTRL_DH_KDF_UKM:
+        if (dctx->kdf_ukm)
+            OPENSSL_free(dctx->kdf_ukm);
+        dctx->kdf_ukm = p2;
+        if (p2)
+            dctx->kdf_ukmlen = p1;
+        else
+            dctx->kdf_ukmlen = 0;
+        return 1;
+
+    case EVP_PKEY_CTRL_GET_DH_KDF_UKM:
+        *(unsigned char **)p2 = dctx->kdf_ukm;
+        return dctx->kdf_ukmlen;
+
+    case EVP_PKEY_CTRL_DH_KDF_OID:
+        if (dctx->kdf_oid)
+            ASN1_OBJECT_free(dctx->kdf_oid);
+        dctx->kdf_oid = p2;
+        return 1;
+
+    case EVP_PKEY_CTRL_GET_DH_KDF_OID:
+        *(ASN1_OBJECT **)p2 = dctx->kdf_oid;
+        return 1;
+
     default:
         return -2;
 
@@ -146,30 +268,139 @@ static int pkey_dh_ctrl_str(EVP_PKEY_CTX *ctx,
         len = atoi(value);
         return EVP_PKEY_CTX_set_dh_paramgen_prime_len(ctx, len);
     }
+    if (!strcmp(type, "dh_rfc5114")) {
+        DH_PKEY_CTX *dctx = ctx->data;
+        int len;
+        len = atoi(value);
+        if (len < 0 || len > 3)
+            return -2;
+        dctx->rfc5114_param = len;
+        return 1;
+    }
     if (!strcmp(type, "dh_paramgen_generator")) {
         int len;
         len = atoi(value);
         return EVP_PKEY_CTX_set_dh_paramgen_generator(ctx, len);
     }
+    if (!strcmp(type, "dh_paramgen_subprime_len")) {
+        int len;
+        len = atoi(value);
+        return EVP_PKEY_CTX_set_dh_paramgen_subprime_len(ctx, len);
+    }
+    if (!strcmp(type, "dh_paramgen_type")) {
+        int typ;
+        typ = atoi(value);
+        return EVP_PKEY_CTX_set_dh_paramgen_type(ctx, typ);
+    }
     return -2;
 }
 
+#ifndef OPENSSL_NO_DSA
+
+extern int dsa_builtin_paramgen(DSA *ret, size_t bits, size_t qbits,
+                                const EVP_MD *evpmd,
+                                const unsigned char *seed_in, size_t seed_len,
+                                unsigned char *seed_out, int *counter_ret,
+                                unsigned long *h_ret, BN_GENCB *cb);
+
+extern int dsa_builtin_paramgen2(DSA *ret, size_t L, size_t N,
+                                 const EVP_MD *evpmd,
+                                 const unsigned char *seed_in,
+                                 size_t seed_len, int idx,
+                                 unsigned char *seed_out, int *counter_ret,
+                                 unsigned long *h_ret, BN_GENCB *cb);
+
+static DSA *dsa_dh_generate(DH_PKEY_CTX *dctx, BN_GENCB *pcb)
+{
+    DSA *ret;
+    int rv = 0;
+    int prime_len = dctx->prime_len;
+    int subprime_len = dctx->subprime_len;
+    const EVP_MD *md = dctx->md;
+    if (dctx->use_dsa > 2)
+        return NULL;
+    ret = DSA_new();
+    if (!ret)
+        return NULL;
+    if (subprime_len == -1) {
+        if (prime_len >= 2048)
+            subprime_len = 256;
+        else
+            subprime_len = 160;
+    }
+    if (md == NULL) {
+        if (prime_len >= 2048)
+            md = EVP_sha256();
+        else
+            md = EVP_sha1();
+    }
+    if (dctx->use_dsa == 1)
+        rv = dsa_builtin_paramgen(ret, prime_len, subprime_len, md,
+                                  NULL, 0, NULL, NULL, NULL, pcb);
+    else if (dctx->use_dsa == 2)
+        rv = dsa_builtin_paramgen2(ret, prime_len, subprime_len, md,
+                                   NULL, 0, -1, NULL, NULL, NULL, pcb);
+    if (rv <= 0) {
+        DSA_free(ret);
+        return NULL;
+    }
+    return ret;
+}
+
+#endif
+
 static int pkey_dh_paramgen(EVP_PKEY_CTX *ctx, EVP_PKEY *pkey)
 {
     DH *dh = NULL;
     DH_PKEY_CTX *dctx = ctx->data;
     BN_GENCB *pcb, cb;
     int ret;
+    if (dctx->rfc5114_param) {
+        switch (dctx->rfc5114_param) {
+        case 1:
+            dh = DH_get_1024_160();
+            break;
+
+        case 2:
+            dh = DH_get_2048_224();
+            break;
+
+        case 3:
+            dh = DH_get_2048_256();
+            break;
+
+        default:
+            return -2;
+        }
+        EVP_PKEY_assign(pkey, EVP_PKEY_DHX, dh);
+        return 1;
+    }
+
     if (ctx->pkey_gencb) {
         pcb = &cb;
         evp_pkey_set_cb_translate(pcb, ctx);
     } else
         pcb = NULL;
+#ifndef OPENSSL_NO_DSA
+    if (dctx->use_dsa) {
+        DSA *dsa_dh;
+        dsa_dh = dsa_dh_generate(dctx, pcb);
+        if (!dsa_dh)
+            return 0;
+        dh = DSA_dup_DH(dsa_dh);
+        DSA_free(dsa_dh);
+        if (!dh)
+            return 0;
+        EVP_PKEY_assign(pkey, EVP_PKEY_DHX, dh);
+        return 1;
+    }
+#endif
     dh = DH_new();
     if (!dh)
         return 0;
     ret = DH_generate_parameters_ex(dh,
                                     dctx->prime_len, dctx->generator, pcb);
+
     if (ret)
         EVP_PKEY_assign_DH(pkey, dh);
     else
@@ -187,7 +418,7 @@ static int pkey_dh_keygen(EVP_PKEY_CTX *ctx, EVP_PKEY *pkey)
     dh = DH_new();
     if (!dh)
         return 0;
-    EVP_PKEY_assign_DH(pkey, dh);
+    EVP_PKEY_assign(pkey, ctx->pmeth->pkey_id, dh);
     /* Note: if error return, pkey is freed by parent routine */
     if (!EVP_PKEY_copy_parameters(pkey, ctx->pkey))
         return 0;
@@ -198,21 +429,96 @@ static int pkey_dh_derive(EVP_PKEY_CTX *ctx, unsigned char *key,
                           size_t *keylen)
 {
     int ret;
+    DH *dh;
+    DH_PKEY_CTX *dctx = ctx->data;
+    BIGNUM *dhpub;
     if (!ctx->pkey || !ctx->peerkey) {
         DHerr(DH_F_PKEY_DH_DERIVE, DH_R_KEYS_NOT_SET);
         return 0;
     }
-    ret = DH_compute_key(key, ctx->peerkey->pkey.dh->pub_key,
-                         ctx->pkey->pkey.dh);
-    if (ret < 0)
+    dh = ctx->pkey->pkey.dh;
+    dhpub = ctx->peerkey->pkey.dh->pub_key;
+    if (dctx->kdf_type == EVP_PKEY_DH_KDF_NONE) {
+        if (key == NULL) {
+            *keylen = DH_size(dh);
+            return 1;
+        }
+        ret = DH_compute_key(key, dhpub, dh);
+        if (ret < 0)
+            return ret;
+        *keylen = ret;
+        return 1;
+    } else if (dctx->kdf_type == EVP_PKEY_DH_KDF_X9_42) {
+        unsigned char *Z = NULL;
+        size_t Zlen = 0;
+        if (!dctx->kdf_outlen || !dctx->kdf_oid)
+            return 0;
+        if (key == NULL) {
+            *keylen = dctx->kdf_outlen;
+            return 1;
+        }
+        if (*keylen != dctx->kdf_outlen)
+            return 0;
+        ret = 0;
+        Zlen = DH_size(dh);
+        Z = OPENSSL_malloc(Zlen);
+        if (!Z) {
+            goto err;
+        }
+        if (DH_compute_key_padded(Z, dhpub, dh) <= 0)
+            goto err;
+        if (!DH_KDF_X9_42(key, *keylen, Z, Zlen, dctx->kdf_oid,
+                          dctx->kdf_ukm, dctx->kdf_ukmlen, dctx->kdf_md))
+            goto err;
+        *keylen = dctx->kdf_outlen;
+        ret = 1;
+ err:
+        if (Z) {
+            OPENSSL_cleanse(Z, Zlen);
+            OPENSSL_free(Z);
+        }
         return ret;
-    *keylen = ret;
+    }
     return 1;
 }
 
 const EVP_PKEY_METHOD dh_pkey_meth = {
     EVP_PKEY_DH,
-    EVP_PKEY_FLAG_AUTOARGLEN,
+    0,
+    pkey_dh_init,
+    pkey_dh_copy,
+    pkey_dh_cleanup,
+
+    0,
+    pkey_dh_paramgen,
+
+    0,
+    pkey_dh_keygen,
+
+    0,
+    0,
+
+    0,
+    0,
+
+    0, 0,
+
+    0, 0, 0, 0,
+
+    0, 0,
+
+    0, 0,
+
+    0,
+    pkey_dh_derive,
+
+    pkey_dh_ctrl,
+    pkey_dh_ctrl_str
+};
+
+const EVP_PKEY_METHOD dhx_pkey_meth = {
+    EVP_PKEY_DHX,
+    0,
     pkey_dh_init,
     pkey_dh_copy,
     pkey_dh_cleanup,
diff --git a/crypto/dh/dh_rfc5114.c b/crypto/dh/dh_rfc5114.c
new file mode 100644 (file)
index 0000000..e96e2aa
--- /dev/null
@@ -0,0 +1,285 @@
+/*
+ * Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL project
+ * 2011.
+ */
+/* ====================================================================
+ * Copyright (c) 2011 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    licensing@OpenSSL.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This product includes cryptographic software written by Eric Young
+ * (eay@cryptsoft.com).  This product includes software written by Tim
+ * Hudson (tjh@cryptsoft.com).
+ *
+ */
+
+#include <stdio.h>
+#include "cryptlib.h"
+#include <openssl/dh.h>
+#include <openssl/bn.h>
+
+/* DH parameters from RFC5114 */
+
+#if BN_BITS2 == 64
+static const BN_ULONG dh1024_160_p[] = {
+    0xDF1FB2BC2E4A4371ULL, 0xE68CFDA76D4DA708ULL, 0x45BF37DF365C1A65ULL,
+    0xA151AF5F0DC8B4BDULL, 0xFAA31A4FF55BCCC0ULL, 0x4EFFD6FAE5644738ULL,
+    0x98488E9C219A7372ULL, 0xACCBDD7D90C4BD70ULL, 0x24975C3CD49B83BFULL,
+    0x13ECB4AEA9061123ULL, 0x9838EF1E2EE652C0ULL, 0x6073E28675A23D18ULL,
+    0x9A6A9DCA52D23B61ULL, 0x52C99FBCFB06A3C6ULL, 0xDE92DE5EAE5D54ECULL,
+    0xB10B8F96A080E01DULL
+};
+
+static const BN_ULONG dh1024_160_g[] = {
+    0x855E6EEB22B3B2E5ULL, 0x858F4DCEF97C2A24ULL, 0x2D779D5918D08BC8ULL,
+    0xD662A4D18E73AFA3ULL, 0x1DBF0A0169B6A28AULL, 0xA6A24C087A091F53ULL,
+    0x909D0D2263F80A76ULL, 0xD7FBD7D3B9A92EE1ULL, 0x5E91547F9E2749F4ULL,
+    0x160217B4B01B886AULL, 0x777E690F5504F213ULL, 0x266FEA1E5C41564BULL,
+    0xD6406CFF14266D31ULL, 0xF8104DD258AC507FULL, 0x6765A442EFB99905ULL,
+    0xA4D1CBD5C3FD3412ULL
+};
+
+static const BN_ULONG dh1024_160_q[] = {
+    0x64B7CB9D49462353ULL, 0x81A8DF278ABA4E7DULL, 0x00000000F518AA87ULL
+};
+
+static const BN_ULONG dh2048_224_p[] = {
+    0x0AC4DFFE0C10E64FULL, 0xCF9DE5384E71B81CULL, 0x7EF363E2FFA31F71ULL,
+    0xE3FB73C16B8E75B9ULL, 0xC9B53DCF4BA80A29ULL, 0x23F10B0E16E79763ULL,
+    0xC52172E413042E9BULL, 0xBE60E69CC928B2B9ULL, 0x80CD86A1B9E587E8ULL,
+    0x315D75E198C641A4ULL, 0xCDF93ACC44328387ULL, 0x15987D9ADC0A486DULL,
+    0x7310F7121FD5A074ULL, 0x278273C7DE31EFDCULL, 0x1602E714415D9330ULL,
+    0x81286130BC8985DBULL, 0xB3BF8A3170918836ULL, 0x6A00E0A0B9C49708ULL,
+    0xC6BA0B2C8BBC27BEULL, 0xC9F98D11ED34DBF6ULL, 0x7AD5B7D0B6C12207ULL,
+    0xD91E8FEF55B7394BULL, 0x9037C9EDEFDA4DF8ULL, 0x6D3F8152AD6AC212ULL,
+    0x1DE6B85A1274A0A6ULL, 0xEB3D688A309C180EULL, 0xAF9A3C407BA1DF15ULL,
+    0xE6FA141DF95A56DBULL, 0xB54B1597B61D0A75ULL, 0xA20D64E5683B9FD1ULL,
+    0xD660FAA79559C51FULL, 0xAD107E1E9123A9D0ULL
+};
+
+static const BN_ULONG dh2048_224_g[] = {
+    0x84B890D3191F2BFAULL, 0x81BC087F2A7065B3ULL, 0x19C418E1F6EC0179ULL,
+    0x7B5A0F1C71CFFF4CULL, 0xEDFE72FE9B6AA4BDULL, 0x81E1BCFE94B30269ULL,
+    0x566AFBB48D6C0191ULL, 0xB539CCE3409D13CDULL, 0x6AA21E7F5F2FF381ULL,
+    0xD9E263E4770589EFULL, 0x10E183EDD19963DDULL, 0xB70A8137150B8EEBULL,
+    0x051AE3D428C8F8ACULL, 0xBB77A86F0C1AB15BULL, 0x6E3025E316A330EFULL,
+    0x19529A45D6F83456ULL, 0xF180EB34118E98D1ULL, 0xB5F6C6B250717CBEULL,
+    0x09939D54DA7460CDULL, 0xE247150422EA1ED4ULL, 0xB8A762D0521BC98AULL,
+    0xF4D027275AC1348BULL, 0xC17669101999024AULL, 0xBE5E9001A8D66AD7ULL,
+    0xC57DB17C620A8652ULL, 0xAB739D7700C29F52ULL, 0xDD921F01A70C4AFAULL,
+    0xA6824A4E10B9A6F0ULL, 0x74866A08CFE4FFE3ULL, 0x6CDEBE7B89998CAFULL,
+    0x9DF30B5C8FFDAC50ULL, 0xAC4032EF4F2D9AE3ULL
+};
+
+static const BN_ULONG dh2048_224_q[] = {
+    0xBF389A99B36371EBULL, 0x1F80535A4738CEBCULL, 0xC58D93FE99717710ULL,
+    0x00000000801C0D34ULL
+};
+
+static const BN_ULONG dh2048_256_p[] = {
+    0xDB094AE91E1A1597ULL, 0x693877FAD7EF09CAULL, 0x6116D2276E11715FULL,
+    0xA4B54330C198AF12ULL, 0x75F26375D7014103ULL, 0xC3A3960A54E710C3ULL,
+    0xDED4010ABD0BE621ULL, 0xC0B857F689962856ULL, 0xB3CA3F7971506026ULL,
+    0x1CCACB83E6B486F6ULL, 0x67E144E514056425ULL, 0xF6A167B5A41825D9ULL,
+    0x3AD8347796524D8EULL, 0xF13C6D9A51BFA4ABULL, 0x2D52526735488A0EULL,
+    0xB63ACAE1CAA6B790ULL, 0x4FDB70C581B23F76ULL, 0xBC39A0BF12307F5CULL,
+    0xB941F54EB1E59BB8ULL, 0x6C5BFC11D45F9088ULL, 0x22E0B1EF4275BF7BULL,
+    0x91F9E6725B4758C0ULL, 0x5A8A9D306BCF67EDULL, 0x209E0C6497517ABDULL,
+    0x3BF4296D830E9A7CULL, 0x16C3D91134096FAAULL, 0xFAF7DF4561B2AA30ULL,
+    0xE00DF8F1D61957D4ULL, 0x5D2CEED4435E3B00ULL, 0x8CEEF608660DD0F2ULL,
+    0xFFBBD19C65195999ULL, 0x87A8E61DB4B6663CULL
+};
+
+static const BN_ULONG dh2048_256_g[] = {
+    0x664B4C0F6CC41659ULL, 0x5E2327CFEF98C582ULL, 0xD647D148D4795451ULL,
+    0x2F63078490F00EF8ULL, 0x184B523D1DB246C3ULL, 0xC7891428CDC67EB6ULL,
+    0x7FD028370DF92B52ULL, 0xB3353BBB64E0EC37ULL, 0xECD06E1557CD0915ULL,
+    0xB7D2BBD2DF016199ULL, 0xC8484B1E052588B9ULL, 0xDB2A3B7313D3FE14ULL,
+    0xD052B985D182EA0AULL, 0xA4BD1BFFE83B9C80ULL, 0xDFC967C1FB3F2E55ULL,
+    0xB5045AF2767164E1ULL, 0x1D14348F6F2F9193ULL, 0x64E67982428EBC83ULL,
+    0x8AC376D282D6ED38ULL, 0x777DE62AAAB8A862ULL, 0xDDF463E5E9EC144BULL,
+    0x0196F931C77A57F2ULL, 0xA55AE31341000A65ULL, 0x901228F8C28CBB18ULL,
+    0xBC3773BF7E8C6F62ULL, 0xBE3A6C1B0C6B47B1ULL, 0xFF4FED4AAC0BB555ULL,
+    0x10DBC15077BE463FULL, 0x07F4793A1A0BA125ULL, 0x4CA7B18F21EF2054ULL,
+    0x2E77506660EDBD48ULL, 0x3FB32C9B73134D0BULL
+};
+
+static const BN_ULONG dh2048_256_q[] = {
+    0xA308B0FE64F5FBD3ULL, 0x99B1A47D1EB3750BULL, 0xB447997640129DA2ULL,
+    0x8CF83642A709A097ULL
+};
+
+#elif BN_BITS2 == 32
+
+static const BN_ULONG dh1024_160_p[] = {
+    0x2E4A4371, 0xDF1FB2BC, 0x6D4DA708, 0xE68CFDA7, 0x365C1A65, 0x45BF37DF,
+    0x0DC8B4BD, 0xA151AF5F, 0xF55BCCC0, 0xFAA31A4F, 0xE5644738, 0x4EFFD6FA,
+    0x219A7372, 0x98488E9C, 0x90C4BD70, 0xACCBDD7D, 0xD49B83BF, 0x24975C3C,
+    0xA9061123, 0x13ECB4AE, 0x2EE652C0, 0x9838EF1E, 0x75A23D18, 0x6073E286,
+    0x52D23B61, 0x9A6A9DCA, 0xFB06A3C6, 0x52C99FBC, 0xAE5D54EC, 0xDE92DE5E,
+    0xA080E01D, 0xB10B8F96
+};
+
+static const BN_ULONG dh1024_160_g[] = {
+    0x22B3B2E5, 0x855E6EEB, 0xF97C2A24, 0x858F4DCE, 0x18D08BC8, 0x2D779D59,
+    0x8E73AFA3, 0xD662A4D1, 0x69B6A28A, 0x1DBF0A01, 0x7A091F53, 0xA6A24C08,
+    0x63F80A76, 0x909D0D22, 0xB9A92EE1, 0xD7FBD7D3, 0x9E2749F4, 0x5E91547F,
+    0xB01B886A, 0x160217B4, 0x5504F213, 0x777E690F, 0x5C41564B, 0x266FEA1E,
+    0x14266D31, 0xD6406CFF, 0x58AC507F, 0xF8104DD2, 0xEFB99905, 0x6765A442,
+    0xC3FD3412, 0xA4D1CBD5
+};
+
+static const BN_ULONG dh1024_160_q[] = {
+    0x49462353, 0x64B7CB9D, 0x8ABA4E7D, 0x81A8DF27, 0xF518AA87
+};
+
+static const BN_ULONG dh2048_224_p[] = {
+    0x0C10E64F, 0x0AC4DFFE, 0x4E71B81C, 0xCF9DE538, 0xFFA31F71, 0x7EF363E2,
+    0x6B8E75B9, 0xE3FB73C1, 0x4BA80A29, 0xC9B53DCF, 0x16E79763, 0x23F10B0E,
+    0x13042E9B, 0xC52172E4, 0xC928B2B9, 0xBE60E69C, 0xB9E587E8, 0x80CD86A1,
+    0x98C641A4, 0x315D75E1, 0x44328387, 0xCDF93ACC, 0xDC0A486D, 0x15987D9A,
+    0x1FD5A074, 0x7310F712, 0xDE31EFDC, 0x278273C7, 0x415D9330, 0x1602E714,
+    0xBC8985DB, 0x81286130, 0x70918836, 0xB3BF8A31, 0xB9C49708, 0x6A00E0A0,
+    0x8BBC27BE, 0xC6BA0B2C, 0xED34DBF6, 0xC9F98D11, 0xB6C12207, 0x7AD5B7D0,
+    0x55B7394B, 0xD91E8FEF, 0xEFDA4DF8, 0x9037C9ED, 0xAD6AC212, 0x6D3F8152,
+    0x1274A0A6, 0x1DE6B85A, 0x309C180E, 0xEB3D688A, 0x7BA1DF15, 0xAF9A3C40,
+    0xF95A56DB, 0xE6FA141D, 0xB61D0A75, 0xB54B1597, 0x683B9FD1, 0xA20D64E5,
+    0x9559C51F, 0xD660FAA7, 0x9123A9D0, 0xAD107E1E
+};
+
+static const BN_ULONG dh2048_224_g[] = {
+    0x191F2BFA, 0x84B890D3, 0x2A7065B3, 0x81BC087F, 0xF6EC0179, 0x19C418E1,
+    0x71CFFF4C, 0x7B5A0F1C, 0x9B6AA4BD, 0xEDFE72FE, 0x94B30269, 0x81E1BCFE,
+    0x8D6C0191, 0x566AFBB4, 0x409D13CD, 0xB539CCE3, 0x5F2FF381, 0x6AA21E7F,
+    0x770589EF, 0xD9E263E4, 0xD19963DD, 0x10E183ED, 0x150B8EEB, 0xB70A8137,
+    0x28C8F8AC, 0x051AE3D4, 0x0C1AB15B, 0xBB77A86F, 0x16A330EF, 0x6E3025E3,
+    0xD6F83456, 0x19529A45, 0x118E98D1, 0xF180EB34, 0x50717CBE, 0xB5F6C6B2,
+    0xDA7460CD, 0x09939D54, 0x22EA1ED4, 0xE2471504, 0x521BC98A, 0xB8A762D0,
+    0x5AC1348B, 0xF4D02727, 0x1999024A, 0xC1766910, 0xA8D66AD7, 0xBE5E9001,
+    0x620A8652, 0xC57DB17C, 0x00C29F52, 0xAB739D77, 0xA70C4AFA, 0xDD921F01,
+    0x10B9A6F0, 0xA6824A4E, 0xCFE4FFE3, 0x74866A08, 0x89998CAF, 0x6CDEBE7B,
+    0x8FFDAC50, 0x9DF30B5C, 0x4F2D9AE3, 0xAC4032EF
+};
+
+static const BN_ULONG dh2048_224_q[] = {
+    0xB36371EB, 0xBF389A99, 0x4738CEBC, 0x1F80535A, 0x99717710, 0xC58D93FE,
+    0x801C0D34
+};
+
+static const BN_ULONG dh2048_256_p[] = {
+    0x1E1A1597, 0xDB094AE9, 0xD7EF09CA, 0x693877FA, 0x6E11715F, 0x6116D227,
+    0xC198AF12, 0xA4B54330, 0xD7014103, 0x75F26375, 0x54E710C3, 0xC3A3960A,
+    0xBD0BE621, 0xDED4010A, 0x89962856, 0xC0B857F6, 0x71506026, 0xB3CA3F79,
+    0xE6B486F6, 0x1CCACB83, 0x14056425, 0x67E144E5, 0xA41825D9, 0xF6A167B5,
+    0x96524D8E, 0x3AD83477, 0x51BFA4AB, 0xF13C6D9A, 0x35488A0E, 0x2D525267,
+    0xCAA6B790, 0xB63ACAE1, 0x81B23F76, 0x4FDB70C5, 0x12307F5C, 0xBC39A0BF,
+    0xB1E59BB8, 0xB941F54E, 0xD45F9088, 0x6C5BFC11, 0x4275BF7B, 0x22E0B1EF,
+    0x5B4758C0, 0x91F9E672, 0x6BCF67ED, 0x5A8A9D30, 0x97517ABD, 0x209E0C64,
+    0x830E9A7C, 0x3BF4296D, 0x34096FAA, 0x16C3D911, 0x61B2AA30, 0xFAF7DF45,
+    0xD61957D4, 0xE00DF8F1, 0x435E3B00, 0x5D2CEED4, 0x660DD0F2, 0x8CEEF608,
+    0x65195999, 0xFFBBD19C, 0xB4B6663C, 0x87A8E61D
+};
+
+static const BN_ULONG dh2048_256_g[] = {
+    0x6CC41659, 0x664B4C0F, 0xEF98C582, 0x5E2327CF, 0xD4795451, 0xD647D148,
+    0x90F00EF8, 0x2F630784, 0x1DB246C3, 0x184B523D, 0xCDC67EB6, 0xC7891428,
+    0x0DF92B52, 0x7FD02837, 0x64E0EC37, 0xB3353BBB, 0x57CD0915, 0xECD06E15,
+    0xDF016199, 0xB7D2BBD2, 0x052588B9, 0xC8484B1E, 0x13D3FE14, 0xDB2A3B73,
+    0xD182EA0A, 0xD052B985, 0xE83B9C80, 0xA4BD1BFF, 0xFB3F2E55, 0xDFC967C1,
+    0x767164E1, 0xB5045AF2, 0x6F2F9193, 0x1D14348F, 0x428EBC83, 0x64E67982,
+    0x82D6ED38, 0x8AC376D2, 0xAAB8A862, 0x777DE62A, 0xE9EC144B, 0xDDF463E5,
+    0xC77A57F2, 0x0196F931, 0x41000A65, 0xA55AE313, 0xC28CBB18, 0x901228F8,
+    0x7E8C6F62, 0xBC3773BF, 0x0C6B47B1, 0xBE3A6C1B, 0xAC0BB555, 0xFF4FED4A,
+    0x77BE463F, 0x10DBC150, 0x1A0BA125, 0x07F4793A, 0x21EF2054, 0x4CA7B18F,
+    0x60EDBD48, 0x2E775066, 0x73134D0B, 0x3FB32C9B
+};
+
+static const BN_ULONG dh2048_256_q[] = {
+    0x64F5FBD3, 0xA308B0FE, 0x1EB3750B, 0x99B1A47D, 0x40129DA2, 0xB4479976,
+    0xA709A097, 0x8CF83642
+};
+
+#else
+# error "unsupported BN_BITS2"
+#endif
+
+/* Macro to make a BIGNUM from static data */
+
+#define make_dh_bn(x) static const BIGNUM _bignum_##x = { (BN_ULONG *) x, \
+                        sizeof(x)/sizeof(BN_ULONG),\
+                        sizeof(x)/sizeof(BN_ULONG),\
+                        0, BN_FLG_STATIC_DATA }
+
+/*
+ * Macro to make a DH structure from BIGNUM data. NB: although just copying
+ * the BIGNUM static pointers would be more efficient we can't as they get
+ * wiped using BN_clear_free() when DH_free() is called.
+ */
+
+#define make_dh(x) \
+DH * DH_get_##x(void) \
+        { \
+        DH *dh; \
+        make_dh_bn(dh##x##_p); \
+        make_dh_bn(dh##x##_q); \
+        make_dh_bn(dh##x##_g); \
+        dh = DH_new(); \
+        if (!dh) \
+                return NULL; \
+        dh->p = BN_dup(&_bignum_dh##x##_p); \
+        dh->g = BN_dup(&_bignum_dh##x##_g); \
+        dh->q = BN_dup(&_bignum_dh##x##_q); \
+        if (!dh->p || !dh->q || !dh->g) \
+                { \
+                DH_free(dh); \
+                return NULL; \
+                } \
+        return dh; \
+        }
+
+make_dh(1024_160)
+make_dh(2048_224)
+make_dh(2048_256)
index 5a4ee9a..c9dd76b 100644 (file)
@@ -96,6 +96,8 @@ static int MS_CALLBACK cb(int p, int n, BN_GENCB *arg);
 static const char rnd_seed[] =
     "string to make the random number generator think it has entropy";
 
+static int run_rfc5114_tests(void);
+
 int main(int argc, char *argv[])
 {
     BN_GENCB _cb;
@@ -199,6 +201,8 @@ int main(int argc, char *argv[])
         ret = 1;
     } else
         ret = 0;
+    if (!run_rfc5114_tests())
+        ret = 1;
  err:
     ERR_print_errors_fp(stderr);
 
@@ -238,4 +242,323 @@ static int MS_CALLBACK cb(int p, int n, BN_GENCB *arg)
 # endif
     return 1;
 }
+
+/* Test data from RFC 5114 */
+
+static const unsigned char dhtest_1024_160_xA[] = {
+    0xB9, 0xA3, 0xB3, 0xAE, 0x8F, 0xEF, 0xC1, 0xA2, 0x93, 0x04, 0x96, 0x50,
+    0x70, 0x86, 0xF8, 0x45, 0x5D, 0x48, 0x94, 0x3E
+};
+
+static const unsigned char dhtest_1024_160_yA[] = {
+    0x2A, 0x85, 0x3B, 0x3D, 0x92, 0x19, 0x75, 0x01, 0xB9, 0x01, 0x5B, 0x2D,
+    0xEB, 0x3E, 0xD8, 0x4F, 0x5E, 0x02, 0x1D, 0xCC, 0x3E, 0x52, 0xF1, 0x09,
+    0xD3, 0x27, 0x3D, 0x2B, 0x75, 0x21, 0x28, 0x1C, 0xBA, 0xBE, 0x0E, 0x76,
+    0xFF, 0x57, 0x27, 0xFA, 0x8A, 0xCC, 0xE2, 0x69, 0x56, 0xBA, 0x9A, 0x1F,
+    0xCA, 0x26, 0xF2, 0x02, 0x28, 0xD8, 0x69, 0x3F, 0xEB, 0x10, 0x84, 0x1D,
+    0x84, 0xA7, 0x36, 0x00, 0x54, 0xEC, 0xE5, 0xA7, 0xF5, 0xB7, 0xA6, 0x1A,
+    0xD3, 0xDF, 0xB3, 0xC6, 0x0D, 0x2E, 0x43, 0x10, 0x6D, 0x87, 0x27, 0xDA,
+    0x37, 0xDF, 0x9C, 0xCE, 0x95, 0xB4, 0x78, 0x75, 0x5D, 0x06, 0xBC, 0xEA,
+    0x8F, 0x9D, 0x45, 0x96, 0x5F, 0x75, 0xA5, 0xF3, 0xD1, 0xDF, 0x37, 0x01,
+    0x16, 0x5F, 0xC9, 0xE5, 0x0C, 0x42, 0x79, 0xCE, 0xB0, 0x7F, 0x98, 0x95,
+    0x40, 0xAE, 0x96, 0xD5, 0xD8, 0x8E, 0xD7, 0x76
+};
+
+static const unsigned char dhtest_1024_160_xB[] = {
+    0x93, 0x92, 0xC9, 0xF9, 0xEB, 0x6A, 0x7A, 0x6A, 0x90, 0x22, 0xF7, 0xD8,
+    0x3E, 0x72, 0x23, 0xC6, 0x83, 0x5B, 0xBD, 0xDA
+};
+
+static const unsigned char dhtest_1024_160_yB[] = {
+    0x71, 0x7A, 0x6C, 0xB0, 0x53, 0x37, 0x1F, 0xF4, 0xA3, 0xB9, 0x32, 0x94,
+    0x1C, 0x1E, 0x56, 0x63, 0xF8, 0x61, 0xA1, 0xD6, 0xAD, 0x34, 0xAE, 0x66,
+    0x57, 0x6D, 0xFB, 0x98, 0xF6, 0xC6, 0xCB, 0xF9, 0xDD, 0xD5, 0xA5, 0x6C,
+    0x78, 0x33, 0xF6, 0xBC, 0xFD, 0xFF, 0x09, 0x55, 0x82, 0xAD, 0x86, 0x8E,
+    0x44, 0x0E, 0x8D, 0x09, 0xFD, 0x76, 0x9E, 0x3C, 0xEC, 0xCD, 0xC3, 0xD3,
+    0xB1, 0xE4, 0xCF, 0xA0, 0x57, 0x77, 0x6C, 0xAA, 0xF9, 0x73, 0x9B, 0x6A,
+    0x9F, 0xEE, 0x8E, 0x74, 0x11, 0xF8, 0xD6, 0xDA, 0xC0, 0x9D, 0x6A, 0x4E,
+    0xDB, 0x46, 0xCC, 0x2B, 0x5D, 0x52, 0x03, 0x09, 0x0E, 0xAE, 0x61, 0x26,
+    0x31, 0x1E, 0x53, 0xFD, 0x2C, 0x14, 0xB5, 0x74, 0xE6, 0xA3, 0x10, 0x9A,
+    0x3D, 0xA1, 0xBE, 0x41, 0xBD, 0xCE, 0xAA, 0x18, 0x6F, 0x5C, 0xE0, 0x67,
+    0x16, 0xA2, 0xB6, 0xA0, 0x7B, 0x3C, 0x33, 0xFE
+};
+
+static const unsigned char dhtest_1024_160_Z[] = {
+    0x5C, 0x80, 0x4F, 0x45, 0x4D, 0x30, 0xD9, 0xC4, 0xDF, 0x85, 0x27, 0x1F,
+    0x93, 0x52, 0x8C, 0x91, 0xDF, 0x6B, 0x48, 0xAB, 0x5F, 0x80, 0xB3, 0xB5,
+    0x9C, 0xAA, 0xC1, 0xB2, 0x8F, 0x8A, 0xCB, 0xA9, 0xCD, 0x3E, 0x39, 0xF3,
+    0xCB, 0x61, 0x45, 0x25, 0xD9, 0x52, 0x1D, 0x2E, 0x64, 0x4C, 0x53, 0xB8,
+    0x07, 0xB8, 0x10, 0xF3, 0x40, 0x06, 0x2F, 0x25, 0x7D, 0x7D, 0x6F, 0xBF,
+    0xE8, 0xD5, 0xE8, 0xF0, 0x72, 0xE9, 0xB6, 0xE9, 0xAF, 0xDA, 0x94, 0x13,
+    0xEA, 0xFB, 0x2E, 0x8B, 0x06, 0x99, 0xB1, 0xFB, 0x5A, 0x0C, 0xAC, 0xED,
+    0xDE, 0xAE, 0xAD, 0x7E, 0x9C, 0xFB, 0xB3, 0x6A, 0xE2, 0xB4, 0x20, 0x83,
+    0x5B, 0xD8, 0x3A, 0x19, 0xFB, 0x0B, 0x5E, 0x96, 0xBF, 0x8F, 0xA4, 0xD0,
+    0x9E, 0x34, 0x55, 0x25, 0x16, 0x7E, 0xCD, 0x91, 0x55, 0x41, 0x6F, 0x46,
+    0xF4, 0x08, 0xED, 0x31, 0xB6, 0x3C, 0x6E, 0x6D
+};
+
+static const unsigned char dhtest_2048_224_xA[] = {
+    0x22, 0xE6, 0x26, 0x01, 0xDB, 0xFF, 0xD0, 0x67, 0x08, 0xA6, 0x80, 0xF7,
+    0x47, 0xF3, 0x61, 0xF7, 0x6D, 0x8F, 0x4F, 0x72, 0x1A, 0x05, 0x48, 0xE4,
+    0x83, 0x29, 0x4B, 0x0C
+};
+
+static const unsigned char dhtest_2048_224_yA[] = {
+    0x1B, 0x3A, 0x63, 0x45, 0x1B, 0xD8, 0x86, 0xE6, 0x99, 0xE6, 0x7B, 0x49,
+    0x4E, 0x28, 0x8B, 0xD7, 0xF8, 0xE0, 0xD3, 0x70, 0xBA, 0xDD, 0xA7, 0xA0,
+    0xEF, 0xD2, 0xFD, 0xE7, 0xD8, 0xF6, 0x61, 0x45, 0xCC, 0x9F, 0x28, 0x04,
+    0x19, 0x97, 0x5E, 0xB8, 0x08, 0x87, 0x7C, 0x8A, 0x4C, 0x0C, 0x8E, 0x0B,
+    0xD4, 0x8D, 0x4A, 0x54, 0x01, 0xEB, 0x1E, 0x87, 0x76, 0xBF, 0xEE, 0xE1,
+    0x34, 0xC0, 0x38, 0x31, 0xAC, 0x27, 0x3C, 0xD9, 0xD6, 0x35, 0xAB, 0x0C,
+    0xE0, 0x06, 0xA4, 0x2A, 0x88, 0x7E, 0x3F, 0x52, 0xFB, 0x87, 0x66, 0xB6,
+    0x50, 0xF3, 0x80, 0x78, 0xBC, 0x8E, 0xE8, 0x58, 0x0C, 0xEF, 0xE2, 0x43,
+    0x96, 0x8C, 0xFC, 0x4F, 0x8D, 0xC3, 0xDB, 0x08, 0x45, 0x54, 0x17, 0x1D,
+    0x41, 0xBF, 0x2E, 0x86, 0x1B, 0x7B, 0xB4, 0xD6, 0x9D, 0xD0, 0xE0, 0x1E,
+    0xA3, 0x87, 0xCB, 0xAA, 0x5C, 0xA6, 0x72, 0xAF, 0xCB, 0xE8, 0xBD, 0xB9,
+    0xD6, 0x2D, 0x4C, 0xE1, 0x5F, 0x17, 0xDD, 0x36, 0xF9, 0x1E, 0xD1, 0xEE,
+    0xDD, 0x65, 0xCA, 0x4A, 0x06, 0x45, 0x5C, 0xB9, 0x4C, 0xD4, 0x0A, 0x52,
+    0xEC, 0x36, 0x0E, 0x84, 0xB3, 0xC9, 0x26, 0xE2, 0x2C, 0x43, 0x80, 0xA3,
+    0xBF, 0x30, 0x9D, 0x56, 0x84, 0x97, 0x68, 0xB7, 0xF5, 0x2C, 0xFD, 0xF6,
+    0x55, 0xFD, 0x05, 0x3A, 0x7E, 0xF7, 0x06, 0x97, 0x9E, 0x7E, 0x58, 0x06,
+    0xB1, 0x7D, 0xFA, 0xE5, 0x3A, 0xD2, 0xA5, 0xBC, 0x56, 0x8E, 0xBB, 0x52,
+    0x9A, 0x7A, 0x61, 0xD6, 0x8D, 0x25, 0x6F, 0x8F, 0xC9, 0x7C, 0x07, 0x4A,
+    0x86, 0x1D, 0x82, 0x7E, 0x2E, 0xBC, 0x8C, 0x61, 0x34, 0x55, 0x31, 0x15,
+    0xB7, 0x0E, 0x71, 0x03, 0x92, 0x0A, 0xA1, 0x6D, 0x85, 0xE5, 0x2B, 0xCB,
+    0xAB, 0x8D, 0x78, 0x6A, 0x68, 0x17, 0x8F, 0xA8, 0xFF, 0x7C, 0x2F, 0x5C,
+    0x71, 0x64, 0x8D, 0x6F
+};
+
+static const unsigned char dhtest_2048_224_xB[] = {
+    0x4F, 0xF3, 0xBC, 0x96, 0xC7, 0xFC, 0x6A, 0x6D, 0x71, 0xD3, 0xB3, 0x63,
+    0x80, 0x0A, 0x7C, 0xDF, 0xEF, 0x6F, 0xC4, 0x1B, 0x44, 0x17, 0xEA, 0x15,
+    0x35, 0x3B, 0x75, 0x90
+};
+
+static const unsigned char dhtest_2048_224_yB[] = {
+    0x4D, 0xCE, 0xE9, 0x92, 0xA9, 0x76, 0x2A, 0x13, 0xF2, 0xF8, 0x38, 0x44,
+    0xAD, 0x3D, 0x77, 0xEE, 0x0E, 0x31, 0xC9, 0x71, 0x8B, 0x3D, 0xB6, 0xC2,
+    0x03, 0x5D, 0x39, 0x61, 0x18, 0x2C, 0x3E, 0x0B, 0xA2, 0x47, 0xEC, 0x41,
+    0x82, 0xD7, 0x60, 0xCD, 0x48, 0xD9, 0x95, 0x99, 0x97, 0x06, 0x22, 0xA1,
+    0x88, 0x1B, 0xBA, 0x2D, 0xC8, 0x22, 0x93, 0x9C, 0x78, 0xC3, 0x91, 0x2C,
+    0x66, 0x61, 0xFA, 0x54, 0x38, 0xB2, 0x07, 0x66, 0x22, 0x2B, 0x75, 0xE2,
+    0x4C, 0x2E, 0x3A, 0xD0, 0xC7, 0x28, 0x72, 0x36, 0x12, 0x95, 0x25, 0xEE,
+    0x15, 0xB5, 0xDD, 0x79, 0x98, 0xAA, 0x04, 0xC4, 0xA9, 0x69, 0x6C, 0xAC,
+    0xD7, 0x17, 0x20, 0x83, 0xA9, 0x7A, 0x81, 0x66, 0x4E, 0xAD, 0x2C, 0x47,
+    0x9E, 0x44, 0x4E, 0x4C, 0x06, 0x54, 0xCC, 0x19, 0xE2, 0x8D, 0x77, 0x03,
+    0xCE, 0xE8, 0xDA, 0xCD, 0x61, 0x26, 0xF5, 0xD6, 0x65, 0xEC, 0x52, 0xC6,
+    0x72, 0x55, 0xDB, 0x92, 0x01, 0x4B, 0x03, 0x7E, 0xB6, 0x21, 0xA2, 0xAC,
+    0x8E, 0x36, 0x5D, 0xE0, 0x71, 0xFF, 0xC1, 0x40, 0x0A, 0xCF, 0x07, 0x7A,
+    0x12, 0x91, 0x3D, 0xD8, 0xDE, 0x89, 0x47, 0x34, 0x37, 0xAB, 0x7B, 0xA3,
+    0x46, 0x74, 0x3C, 0x1B, 0x21, 0x5D, 0xD9, 0xC1, 0x21, 0x64, 0xA7, 0xE4,
+    0x05, 0x31, 0x18, 0xD1, 0x99, 0xBE, 0xC8, 0xEF, 0x6F, 0xC5, 0x61, 0x17,
+    0x0C, 0x84, 0xC8, 0x7D, 0x10, 0xEE, 0x9A, 0x67, 0x4A, 0x1F, 0xA8, 0xFF,
+    0xE1, 0x3B, 0xDF, 0xBA, 0x1D, 0x44, 0xDE, 0x48, 0x94, 0x6D, 0x68, 0xDC,
+    0x0C, 0xDD, 0x77, 0x76, 0x35, 0xA7, 0xAB, 0x5B, 0xFB, 0x1E, 0x4B, 0xB7,
+    0xB8, 0x56, 0xF9, 0x68, 0x27, 0x73, 0x4C, 0x18, 0x41, 0x38, 0xE9, 0x15,
+    0xD9, 0xC3, 0x00, 0x2E, 0xBC, 0xE5, 0x31, 0x20, 0x54, 0x6A, 0x7E, 0x20,
+    0x02, 0x14, 0x2B, 0x6C
+};
+
+static const unsigned char dhtest_2048_224_Z[] = {
+    0x34, 0xD9, 0xBD, 0xDC, 0x1B, 0x42, 0x17, 0x6C, 0x31, 0x3F, 0xEA, 0x03,
+    0x4C, 0x21, 0x03, 0x4D, 0x07, 0x4A, 0x63, 0x13, 0xBB, 0x4E, 0xCD, 0xB3,
+    0x70, 0x3F, 0xFF, 0x42, 0x45, 0x67, 0xA4, 0x6B, 0xDF, 0x75, 0x53, 0x0E,
+    0xDE, 0x0A, 0x9D, 0xA5, 0x22, 0x9D, 0xE7, 0xD7, 0x67, 0x32, 0x28, 0x6C,
+    0xBC, 0x0F, 0x91, 0xDA, 0x4C, 0x3C, 0x85, 0x2F, 0xC0, 0x99, 0xC6, 0x79,
+    0x53, 0x1D, 0x94, 0xC7, 0x8A, 0xB0, 0x3D, 0x9D, 0xEC, 0xB0, 0xA4, 0xE4,
+    0xCA, 0x8B, 0x2B, 0xB4, 0x59, 0x1C, 0x40, 0x21, 0xCF, 0x8C, 0xE3, 0xA2,
+    0x0A, 0x54, 0x1D, 0x33, 0x99, 0x40, 0x17, 0xD0, 0x20, 0x0A, 0xE2, 0xC9,
+    0x51, 0x6E, 0x2F, 0xF5, 0x14, 0x57, 0x79, 0x26, 0x9E, 0x86, 0x2B, 0x0F,
+    0xB4, 0x74, 0xA2, 0xD5, 0x6D, 0xC3, 0x1E, 0xD5, 0x69, 0xA7, 0x70, 0x0B,
+    0x4C, 0x4A, 0xB1, 0x6B, 0x22, 0xA4, 0x55, 0x13, 0x53, 0x1E, 0xF5, 0x23,
+    0xD7, 0x12, 0x12, 0x07, 0x7B, 0x5A, 0x16, 0x9B, 0xDE, 0xFF, 0xAD, 0x7A,
+    0xD9, 0x60, 0x82, 0x84, 0xC7, 0x79, 0x5B, 0x6D, 0x5A, 0x51, 0x83, 0xB8,
+    0x70, 0x66, 0xDE, 0x17, 0xD8, 0xD6, 0x71, 0xC9, 0xEB, 0xD8, 0xEC, 0x89,
+    0x54, 0x4D, 0x45, 0xEC, 0x06, 0x15, 0x93, 0xD4, 0x42, 0xC6, 0x2A, 0xB9,
+    0xCE, 0x3B, 0x1C, 0xB9, 0x94, 0x3A, 0x1D, 0x23, 0xA5, 0xEA, 0x3B, 0xCF,
+    0x21, 0xA0, 0x14, 0x71, 0xE6, 0x7E, 0x00, 0x3E, 0x7F, 0x8A, 0x69, 0xC7,
+    0x28, 0xBE, 0x49, 0x0B, 0x2F, 0xC8, 0x8C, 0xFE, 0xB9, 0x2D, 0xB6, 0xA2,
+    0x15, 0xE5, 0xD0, 0x3C, 0x17, 0xC4, 0x64, 0xC9, 0xAC, 0x1A, 0x46, 0xE2,
+    0x03, 0xE1, 0x3F, 0x95, 0x29, 0x95, 0xFB, 0x03, 0xC6, 0x9D, 0x3C, 0xC4,
+    0x7F, 0xCB, 0x51, 0x0B, 0x69, 0x98, 0xFF, 0xD3, 0xAA, 0x6D, 0xE7, 0x3C,
+    0xF9, 0xF6, 0x38, 0x69
+};
+
+static const unsigned char dhtest_2048_256_xA[] = {
+    0x08, 0x81, 0x38, 0x2C, 0xDB, 0x87, 0x66, 0x0C, 0x6D, 0xC1, 0x3E, 0x61,
+    0x49, 0x38, 0xD5, 0xB9, 0xC8, 0xB2, 0xF2, 0x48, 0x58, 0x1C, 0xC5, 0xE3,
+    0x1B, 0x35, 0x45, 0x43, 0x97, 0xFC, 0xE5, 0x0E
+};
+
+static const unsigned char dhtest_2048_256_yA[] = {
+    0x2E, 0x93, 0x80, 0xC8, 0x32, 0x3A, 0xF9, 0x75, 0x45, 0xBC, 0x49, 0x41,
+    0xDE, 0xB0, 0xEC, 0x37, 0x42, 0xC6, 0x2F, 0xE0, 0xEC, 0xE8, 0x24, 0xA6,
+    0xAB, 0xDB, 0xE6, 0x6C, 0x59, 0xBE, 0xE0, 0x24, 0x29, 0x11, 0xBF, 0xB9,
+    0x67, 0x23, 0x5C, 0xEB, 0xA3, 0x5A, 0xE1, 0x3E, 0x4E, 0xC7, 0x52, 0xBE,
+    0x63, 0x0B, 0x92, 0xDC, 0x4B, 0xDE, 0x28, 0x47, 0xA9, 0xC6, 0x2C, 0xB8,
+    0x15, 0x27, 0x45, 0x42, 0x1F, 0xB7, 0xEB, 0x60, 0xA6, 0x3C, 0x0F, 0xE9,
+    0x15, 0x9F, 0xCC, 0xE7, 0x26, 0xCE, 0x7C, 0xD8, 0x52, 0x3D, 0x74, 0x50,
+    0x66, 0x7E, 0xF8, 0x40, 0xE4, 0x91, 0x91, 0x21, 0xEB, 0x5F, 0x01, 0xC8,
+    0xC9, 0xB0, 0xD3, 0xD6, 0x48, 0xA9, 0x3B, 0xFB, 0x75, 0x68, 0x9E, 0x82,
+    0x44, 0xAC, 0x13, 0x4A, 0xF5, 0x44, 0x71, 0x1C, 0xE7, 0x9A, 0x02, 0xDC,
+    0xC3, 0x42, 0x26, 0x68, 0x47, 0x80, 0xDD, 0xDC, 0xB4, 0x98, 0x59, 0x41,
+    0x06, 0xC3, 0x7F, 0x5B, 0xC7, 0x98, 0x56, 0x48, 0x7A, 0xF5, 0xAB, 0x02,
+    0x2A, 0x2E, 0x5E, 0x42, 0xF0, 0x98, 0x97, 0xC1, 0xA8, 0x5A, 0x11, 0xEA,
+    0x02, 0x12, 0xAF, 0x04, 0xD9, 0xB4, 0xCE, 0xBC, 0x93, 0x7C, 0x3C, 0x1A,
+    0x3E, 0x15, 0xA8, 0xA0, 0x34, 0x2E, 0x33, 0x76, 0x15, 0xC8, 0x4E, 0x7F,
+    0xE3, 0xB8, 0xB9, 0xB8, 0x7F, 0xB1, 0xE7, 0x3A, 0x15, 0xAF, 0x12, 0xA3,
+    0x0D, 0x74, 0x6E, 0x06, 0xDF, 0xC3, 0x4F, 0x29, 0x0D, 0x79, 0x7C, 0xE5,
+    0x1A, 0xA1, 0x3A, 0xA7, 0x85, 0xBF, 0x66, 0x58, 0xAF, 0xF5, 0xE4, 0xB0,
+    0x93, 0x00, 0x3C, 0xBE, 0xAF, 0x66, 0x5B, 0x3C, 0x2E, 0x11, 0x3A, 0x3A,
+    0x4E, 0x90, 0x52, 0x69, 0x34, 0x1D, 0xC0, 0x71, 0x14, 0x26, 0x68, 0x5F,
+    0x4E, 0xF3, 0x7E, 0x86, 0x8A, 0x81, 0x26, 0xFF, 0x3F, 0x22, 0x79, 0xB5,
+    0x7C, 0xA6, 0x7E, 0x29
+};
+
+static const unsigned char dhtest_2048_256_xB[] = {
+    0x7D, 0x62, 0xA7, 0xE3, 0xEF, 0x36, 0xDE, 0x61, 0x7B, 0x13, 0xD1, 0xAF,
+    0xB8, 0x2C, 0x78, 0x0D, 0x83, 0xA2, 0x3B, 0xD4, 0xEE, 0x67, 0x05, 0x64,
+    0x51, 0x21, 0xF3, 0x71, 0xF5, 0x46, 0xA5, 0x3D
+};
+
+static const unsigned char dhtest_2048_256_yB[] = {
+    0x57, 0x5F, 0x03, 0x51, 0xBD, 0x2B, 0x1B, 0x81, 0x74, 0x48, 0xBD, 0xF8,
+    0x7A, 0x6C, 0x36, 0x2C, 0x1E, 0x28, 0x9D, 0x39, 0x03, 0xA3, 0x0B, 0x98,
+    0x32, 0xC5, 0x74, 0x1F, 0xA2, 0x50, 0x36, 0x3E, 0x7A, 0xCB, 0xC7, 0xF7,
+    0x7F, 0x3D, 0xAC, 0xBC, 0x1F, 0x13, 0x1A, 0xDD, 0x8E, 0x03, 0x36, 0x7E,
+    0xFF, 0x8F, 0xBB, 0xB3, 0xE1, 0xC5, 0x78, 0x44, 0x24, 0x80, 0x9B, 0x25,
+    0xAF, 0xE4, 0xD2, 0x26, 0x2A, 0x1A, 0x6F, 0xD2, 0xFA, 0xB6, 0x41, 0x05,
+    0xCA, 0x30, 0xA6, 0x74, 0xE0, 0x7F, 0x78, 0x09, 0x85, 0x20, 0x88, 0x63,
+    0x2F, 0xC0, 0x49, 0x23, 0x37, 0x91, 0xAD, 0x4E, 0xDD, 0x08, 0x3A, 0x97,
+    0x8B, 0x88, 0x3E, 0xE6, 0x18, 0xBC, 0x5E, 0x0D, 0xD0, 0x47, 0x41, 0x5F,
+    0x2D, 0x95, 0xE6, 0x83, 0xCF, 0x14, 0x82, 0x6B, 0x5F, 0xBE, 0x10, 0xD3,
+    0xCE, 0x41, 0xC6, 0xC1, 0x20, 0xC7, 0x8A, 0xB2, 0x00, 0x08, 0xC6, 0x98,
+    0xBF, 0x7F, 0x0B, 0xCA, 0xB9, 0xD7, 0xF4, 0x07, 0xBE, 0xD0, 0xF4, 0x3A,
+    0xFB, 0x29, 0x70, 0xF5, 0x7F, 0x8D, 0x12, 0x04, 0x39, 0x63, 0xE6, 0x6D,
+    0xDD, 0x32, 0x0D, 0x59, 0x9A, 0xD9, 0x93, 0x6C, 0x8F, 0x44, 0x13, 0x7C,
+    0x08, 0xB1, 0x80, 0xEC, 0x5E, 0x98, 0x5C, 0xEB, 0xE1, 0x86, 0xF3, 0xD5,
+    0x49, 0x67, 0x7E, 0x80, 0x60, 0x73, 0x31, 0xEE, 0x17, 0xAF, 0x33, 0x80,
+    0xA7, 0x25, 0xB0, 0x78, 0x23, 0x17, 0xD7, 0xDD, 0x43, 0xF5, 0x9D, 0x7A,
+    0xF9, 0x56, 0x8A, 0x9B, 0xB6, 0x3A, 0x84, 0xD3, 0x65, 0xF9, 0x22, 0x44,
+    0xED, 0x12, 0x09, 0x88, 0x21, 0x93, 0x02, 0xF4, 0x29, 0x24, 0xC7, 0xCA,
+    0x90, 0xB8, 0x9D, 0x24, 0xF7, 0x1B, 0x0A, 0xB6, 0x97, 0x82, 0x3D, 0x7D,
+    0xEB, 0x1A, 0xFF, 0x5B, 0x0E, 0x8E, 0x4A, 0x45, 0xD4, 0x9F, 0x7F, 0x53,
+    0x75, 0x7E, 0x19, 0x13
+};
+
+static const unsigned char dhtest_2048_256_Z[] = {
+    0x86, 0xC7, 0x0B, 0xF8, 0xD0, 0xBB, 0x81, 0xBB, 0x01, 0x07, 0x8A, 0x17,
+    0x21, 0x9C, 0xB7, 0xD2, 0x72, 0x03, 0xDB, 0x2A, 0x19, 0xC8, 0x77, 0xF1,
+    0xD1, 0xF1, 0x9F, 0xD7, 0xD7, 0x7E, 0xF2, 0x25, 0x46, 0xA6, 0x8F, 0x00,
+    0x5A, 0xD5, 0x2D, 0xC8, 0x45, 0x53, 0xB7, 0x8F, 0xC6, 0x03, 0x30, 0xBE,
+    0x51, 0xEA, 0x7C, 0x06, 0x72, 0xCA, 0xC1, 0x51, 0x5E, 0x4B, 0x35, 0xC0,
+    0x47, 0xB9, 0xA5, 0x51, 0xB8, 0x8F, 0x39, 0xDC, 0x26, 0xDA, 0x14, 0xA0,
+    0x9E, 0xF7, 0x47, 0x74, 0xD4, 0x7C, 0x76, 0x2D, 0xD1, 0x77, 0xF9, 0xED,
+    0x5B, 0xC2, 0xF1, 0x1E, 0x52, 0xC8, 0x79, 0xBD, 0x95, 0x09, 0x85, 0x04,
+    0xCD, 0x9E, 0xEC, 0xD8, 0xA8, 0xF9, 0xB3, 0xEF, 0xBD, 0x1F, 0x00, 0x8A,
+    0xC5, 0x85, 0x30, 0x97, 0xD9, 0xD1, 0x83, 0x7F, 0x2B, 0x18, 0xF7, 0x7C,
+    0xD7, 0xBE, 0x01, 0xAF, 0x80, 0xA7, 0xC7, 0xB5, 0xEA, 0x3C, 0xA5, 0x4C,
+    0xC0, 0x2D, 0x0C, 0x11, 0x6F, 0xEE, 0x3F, 0x95, 0xBB, 0x87, 0x39, 0x93,
+    0x85, 0x87, 0x5D, 0x7E, 0x86, 0x74, 0x7E, 0x67, 0x6E, 0x72, 0x89, 0x38,
+    0xAC, 0xBF, 0xF7, 0x09, 0x8E, 0x05, 0xBE, 0x4D, 0xCF, 0xB2, 0x40, 0x52,
+    0xB8, 0x3A, 0xEF, 0xFB, 0x14, 0x78, 0x3F, 0x02, 0x9A, 0xDB, 0xDE, 0x7F,
+    0x53, 0xFA, 0xE9, 0x20, 0x84, 0x22, 0x40, 0x90, 0xE0, 0x07, 0xCE, 0xE9,
+    0x4D, 0x4B, 0xF2, 0xBA, 0xCE, 0x9F, 0xFD, 0x4B, 0x57, 0xD2, 0xAF, 0x7C,
+    0x72, 0x4D, 0x0C, 0xAA, 0x19, 0xBF, 0x05, 0x01, 0xF6, 0xF1, 0x7B, 0x4A,
+    0xA1, 0x0F, 0x42, 0x5E, 0x3E, 0xA7, 0x60, 0x80, 0xB4, 0xB9, 0xD6, 0xB3,
+    0xCE, 0xFE, 0xA1, 0x15, 0xB2, 0xCE, 0xB8, 0x78, 0x9B, 0xB8, 0xA3, 0xB0,
+    0xEA, 0x87, 0xFE, 0xBE, 0x63, 0xB6, 0xC8, 0xF8, 0x46, 0xEC, 0x6D, 0xB0,
+    0xC2, 0x6C, 0x5D, 0x7C
+};
+
+typedef struct {
+    DH *(*get_param) (void);
+    const unsigned char *xA;
+    size_t xA_len;
+    const unsigned char *yA;
+    size_t yA_len;
+    const unsigned char *xB;
+    size_t xB_len;
+    const unsigned char *yB;
+    size_t yB_len;
+    const unsigned char *Z;
+    size_t Z_len;
+} rfc5114_td;
+
+# define make_rfc5114_td(pre) { \
+        DH_get_##pre, \
+        dhtest_##pre##_xA, sizeof(dhtest_##pre##_xA), \
+        dhtest_##pre##_yA, sizeof(dhtest_##pre##_yA), \
+        dhtest_##pre##_xB, sizeof(dhtest_##pre##_xB), \
+        dhtest_##pre##_yB, sizeof(dhtest_##pre##_yB), \
+        dhtest_##pre##_Z, sizeof(dhtest_##pre##_Z) \
+        }
+
+static const rfc5114_td rfctd[] = {
+        make_rfc5114_td(1024_160),
+        make_rfc5114_td(2048_224),
+        make_rfc5114_td(2048_256)
+};
+
+static int run_rfc5114_tests(void)
+{
+    int i;
+    for (i = 0; i < (int)(sizeof(rfctd) / sizeof(rfc5114_td)); i++) {
+        DH *dhA, *dhB;
+        unsigned char *Z1 = NULL, *Z2 = NULL;
+        const rfc5114_td *td = rfctd + i;
+        /* Set up DH structures setting key components */
+        dhA = td->get_param();
+        dhB = td->get_param();
+        if (!dhA || !dhB)
+            goto bad_err;
+
+        dhA->priv_key = BN_bin2bn(td->xA, td->xA_len, NULL);
+        dhA->pub_key = BN_bin2bn(td->yA, td->yA_len, NULL);
+
+        dhB->priv_key = BN_bin2bn(td->xB, td->xB_len, NULL);
+        dhB->pub_key = BN_bin2bn(td->yB, td->yB_len, NULL);
+
+        if (!dhA->priv_key || !dhA->pub_key
+            || !dhB->priv_key || !dhB->pub_key)
+            goto bad_err;
+
+        if ((td->Z_len != (size_t)DH_size(dhA))
+            || (td->Z_len != (size_t)DH_size(dhB)))
+            goto err;
+
+        Z1 = OPENSSL_malloc(DH_size(dhA));
+        Z2 = OPENSSL_malloc(DH_size(dhB));
+        /*
+         * Work out shared secrets using both sides and compare with expected
+         * values.
+         */
+        if (!DH_compute_key(Z1, dhB->pub_key, dhA))
+            goto bad_err;
+        if (!DH_compute_key(Z2, dhA->pub_key, dhB))
+            goto bad_err;
+
+        if (memcmp(Z1, td->Z, td->Z_len))
+            goto err;
+        if (memcmp(Z2, td->Z, td->Z_len))
+            goto err;
+
+        printf("RFC5114 parameter test %d OK\n", i + 1);
+
+        DH_free(dhA);
+        DH_free(dhB);
+        OPENSSL_free(Z1);
+        OPENSSL_free(Z2);
+
+    }
+    return 1;
+ bad_err:
+    fprintf(stderr, "Initalisation error RFC5114 set %d\n", i + 1);
+    ERR_print_errors_fp(stderr);
+    return 0;
+ err:
+    fprintf(stderr, "Test failed RFC5114 set %d\n", i + 1);
+    return 0;
+}
+
 #endif
index a2f0ee7..545358f 100644 (file)
@@ -287,6 +287,7 @@ void ERR_load_DSA_strings(void);
 # define DSA_F_DO_DSA_PRINT                               104
 # define DSA_F_DSAPARAMS_PRINT                            100
 # define DSA_F_DSAPARAMS_PRINT_FP                         101
+# define DSA_F_DSA_BUILTIN_PARAMGEN2                      126
 # define DSA_F_DSA_DO_SIGN                                112
 # define DSA_F_DSA_DO_VERIFY                              113
 # define DSA_F_DSA_GENERATE_KEY                           124
@@ -316,12 +317,14 @@ void ERR_load_DSA_strings(void);
 # define DSA_R_DATA_TOO_LARGE_FOR_KEY_SIZE                100
 # define DSA_R_DECODE_ERROR                               104
 # define DSA_R_INVALID_DIGEST_TYPE                        106
+# define DSA_R_INVALID_PARAMETERS                         112
 # define DSA_R_MISSING_PARAMETERS                         101
 # define DSA_R_MODULUS_TOO_LARGE                          103
 # define DSA_R_NEED_NEW_SETUP_VALUES                      110
 # define DSA_R_NON_FIPS_DSA_METHOD                        111
 # define DSA_R_NO_PARAMETERS_SET                          107
 # define DSA_R_PARAMETER_ENCODING_ERROR                   105
+# define DSA_R_Q_NOT_PRIME                                113
 
 #ifdef  __cplusplus
 }
index a2840ea..2a5cd71 100644 (file)
@@ -601,10 +601,14 @@ static int dsa_pkey_ctrl(EVP_PKEY *pkey, int op, long arg1, void *arg2)
             X509_ALGOR_set0(alg2, OBJ_nid2obj(snid), V_ASN1_UNDEF, 0);
         }
         return 1;
+
+    case ASN1_PKEY_CTRL_CMS_RI_TYPE:
+        *(int *)arg2 = CMS_RECIPINFO_NONE;
+        return 1;
 #endif
 
     case ASN1_PKEY_CTRL_DEFAULT_MD_NID:
-        *(int *)arg2 = NID_sha1;
+        *(int *)arg2 = NID_sha256;
         return 2;
 
     default:
index 746f5df..f5ddc66 100644 (file)
@@ -1,6 +1,6 @@
 /* crypto/dsa/dsa_err.c */
 /* ====================================================================
- * Copyright (c) 1999-2011 The OpenSSL Project.  All rights reserved.
+ * Copyright (c) 1999-2013 The OpenSSL Project.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -74,6 +74,7 @@ static ERR_STRING_DATA DSA_str_functs[] = {
     {ERR_FUNC(DSA_F_DO_DSA_PRINT), "DO_DSA_PRINT"},
     {ERR_FUNC(DSA_F_DSAPARAMS_PRINT), "DSAparams_print"},
     {ERR_FUNC(DSA_F_DSAPARAMS_PRINT_FP), "DSAparams_print_fp"},
+    {ERR_FUNC(DSA_F_DSA_BUILTIN_PARAMGEN2), "DSA_BUILTIN_PARAMGEN2"},
     {ERR_FUNC(DSA_F_DSA_DO_SIGN), "DSA_do_sign"},
     {ERR_FUNC(DSA_F_DSA_DO_VERIFY), "DSA_do_verify"},
     {ERR_FUNC(DSA_F_DSA_GENERATE_KEY), "DSA_generate_key"},
@@ -107,12 +108,14 @@ static ERR_STRING_DATA DSA_str_reasons[] = {
      "data too large for key size"},
     {ERR_REASON(DSA_R_DECODE_ERROR), "decode error"},
     {ERR_REASON(DSA_R_INVALID_DIGEST_TYPE), "invalid digest type"},
+    {ERR_REASON(DSA_R_INVALID_PARAMETERS), "invalid parameters"},
     {ERR_REASON(DSA_R_MISSING_PARAMETERS), "missing parameters"},
     {ERR_REASON(DSA_R_MODULUS_TOO_LARGE), "modulus too large"},
     {ERR_REASON(DSA_R_NEED_NEW_SETUP_VALUES), "need new setup values"},
     {ERR_REASON(DSA_R_NON_FIPS_DSA_METHOD), "non fips dsa method"},
     {ERR_REASON(DSA_R_NO_PARAMETERS_SET), "no parameters set"},
     {ERR_REASON(DSA_R_PARAMETER_ENCODING_ERROR), "parameter encoding error"},
+    {ERR_REASON(DSA_R_Q_NOT_PRIME), "q not prime"},
     {0, NULL}
 };
 
index d686ab0..5a328aa 100644 (file)
@@ -86,6 +86,8 @@
 # include "dsa_locl.h"
 
 # ifdef OPENSSL_FIPS
+/* Workaround bug in prototype */
+#  define fips_dsa_builtin_paramgen2 fips_dsa_paramgen_bad
 #  include <openssl/fips.h>
 # endif
 
@@ -383,4 +385,371 @@ int dsa_builtin_paramgen(DSA *ret, size_t bits, size_t qbits,
         BN_MONT_CTX_free(mont);
     return ok;
 }
+
+# ifdef OPENSSL_FIPS
+#  undef fips_dsa_builtin_paramgen2
+extern int fips_dsa_builtin_paramgen2(DSA *ret, size_t L, size_t N,
+                                      const EVP_MD *evpmd,
+                                      const unsigned char *seed_in,
+                                      size_t seed_len, int idx,
+                                      unsigned char *seed_out,
+                                      int *counter_ret, unsigned long *h_ret,
+                                      BN_GENCB *cb);
+# endif
+
+/*
+ * This is a parameter generation algorithm for the DSA2 algorithm as
+ * described in FIPS 186-3.
+ */
+
+int dsa_builtin_paramgen2(DSA *ret, size_t L, size_t N,
+                          const EVP_MD *evpmd, const unsigned char *seed_in,
+                          size_t seed_len, int idx, unsigned char *seed_out,
+                          int *counter_ret, unsigned long *h_ret,
+                          BN_GENCB *cb)
+{
+    int ok = -1;
+    unsigned char *seed = NULL, *seed_tmp = NULL;
+    unsigned char md[EVP_MAX_MD_SIZE];
+    int mdsize;
+    BIGNUM *r0, *W, *X, *c, *test;
+    BIGNUM *g = NULL, *q = NULL, *p = NULL;
+    BN_MONT_CTX *mont = NULL;
+    int i, k, n = 0, m = 0, qsize = N >> 3;
+    int counter = 0;
+    int r = 0;
+    BN_CTX *ctx = NULL;
+    EVP_MD_CTX mctx;
+    unsigned int h = 2;
+
+# ifdef OPENSSL_FIPS
+
+    if (FIPS_mode())
+        return fips_dsa_builtin_paramgen2(ret, L, N, evpmd,
+                                          seed_in, seed_len, idx,
+                                          seed_out, counter_ret, h_ret, cb);
+# endif
+
+    EVP_MD_CTX_init(&mctx);
+
+    if (evpmd == NULL) {
+        if (N == 160)
+            evpmd = EVP_sha1();
+        else if (N == 224)
+            evpmd = EVP_sha224();
+        else
+            evpmd = EVP_sha256();
+    }
+
+    mdsize = EVP_MD_size(evpmd);
+    /* If unverificable g generation only don't need seed */
+    if (!ret->p || !ret->q || idx >= 0) {
+        if (seed_len == 0)
+            seed_len = mdsize;
+
+        seed = OPENSSL_malloc(seed_len);
+
+        if (seed_out)
+            seed_tmp = seed_out;
+        else
+            seed_tmp = OPENSSL_malloc(seed_len);
+
+        if (!seed || !seed_tmp)
+            goto err;
+
+        if (seed_in)
+            memcpy(seed, seed_in, seed_len);
+
+    }
+
+    if ((ctx = BN_CTX_new()) == NULL)
+        goto err;
+
+    if ((mont = BN_MONT_CTX_new()) == NULL)
+        goto err;
+
+    BN_CTX_start(ctx);
+    r0 = BN_CTX_get(ctx);
+    g = BN_CTX_get(ctx);
+    W = BN_CTX_get(ctx);
+    X = BN_CTX_get(ctx);
+    c = BN_CTX_get(ctx);
+    test = BN_CTX_get(ctx);
+
+    /* if p, q already supplied generate g only */
+    if (ret->p && ret->q) {
+        p = ret->p;
+        q = ret->q;
+        if (idx >= 0)
+            memcpy(seed_tmp, seed, seed_len);
+        goto g_only;
+    } else {
+        p = BN_CTX_get(ctx);
+        q = BN_CTX_get(ctx);
+    }
+
+    if (!BN_lshift(test, BN_value_one(), L - 1))
+        goto err;
+    for (;;) {
+        for (;;) {              /* find q */
+            unsigned char *pmd;
+            /* step 1 */
+            if (!BN_GENCB_call(cb, 0, m++))
+                goto err;
+
+            if (!seed_in) {
+                if (RAND_pseudo_bytes(seed, seed_len) < 0)
+                    goto err;
+            }
+            /* step 2 */
+            if (!EVP_Digest(seed, seed_len, md, NULL, evpmd, NULL))
+                goto err;
+            /* Take least significant bits of md */
+            if (mdsize > qsize)
+                pmd = md + mdsize - qsize;
+            else
+                pmd = md;
+
+            if (mdsize < qsize)
+                memset(md + mdsize, 0, qsize - mdsize);
+
+            /* step 3 */
+            pmd[0] |= 0x80;
+            pmd[qsize - 1] |= 0x01;
+            if (!BN_bin2bn(pmd, qsize, q))
+                goto err;
+
+            /* step 4 */
+            r = BN_is_prime_fasttest_ex(q, DSS_prime_checks, ctx,
+                                        seed_in ? 1 : 0, cb);
+            if (r > 0)
+                break;
+            if (r != 0)
+                goto err;
+            /* Provided seed didn't produce a prime: error */
+            if (seed_in) {
+                ok = 0;
+                DSAerr(DSA_F_DSA_BUILTIN_PARAMGEN2, DSA_R_Q_NOT_PRIME);
+                goto err;
+            }
+
+            /* do a callback call */
+            /* step 5 */
+        }
+        /* Copy seed to seed_out before we mess with it */
+        if (seed_out)
+            memcpy(seed_out, seed, seed_len);
+
+        if (!BN_GENCB_call(cb, 2, 0))
+            goto err;
+        if (!BN_GENCB_call(cb, 3, 0))
+            goto err;
+
+        /* step 6 */
+        counter = 0;
+        /* "offset = 1" */
+
+        n = (L - 1) / (mdsize << 3);
+
+        for (;;) {
+            if ((counter != 0) && !BN_GENCB_call(cb, 0, counter))
+                goto err;
+
+            /* step 7 */
+            BN_zero(W);
+            /* now 'buf' contains "SEED + offset - 1" */
+            for (k = 0; k <= n; k++) {
+                /*
+                 * obtain "SEED + offset + k" by incrementing:
+                 */
+                for (i = seed_len - 1; i >= 0; i--) {
+                    seed[i]++;
+                    if (seed[i] != 0)
+                        break;
+                }
+
+                if (!EVP_Digest(seed, seed_len, md, NULL, evpmd, NULL))
+                    goto err;
+
+                /* step 8 */
+                if (!BN_bin2bn(md, mdsize, r0))
+                    goto err;
+                if (!BN_lshift(r0, r0, (mdsize << 3) * k))
+                    goto err;
+                if (!BN_add(W, W, r0))
+                    goto err;
+            }
+
+            /* more of step 8 */
+            if (!BN_mask_bits(W, L - 1))
+                goto err;
+            if (!BN_copy(X, W))
+                goto err;
+            if (!BN_add(X, X, test))
+                goto err;
+
+            /* step 9 */
+            if (!BN_lshift1(r0, q))
+                goto err;
+            if (!BN_mod(c, X, r0, ctx))
+                goto err;
+            if (!BN_sub(r0, c, BN_value_one()))
+                goto err;
+            if (!BN_sub(p, X, r0))
+                goto err;
+
+            /* step 10 */
+            if (BN_cmp(p, test) >= 0) {
+                /* step 11 */
+                r = BN_is_prime_fasttest_ex(p, DSS_prime_checks, ctx, 1, cb);
+                if (r > 0)
+                    goto end;   /* found it */
+                if (r != 0)
+                    goto err;
+            }
+
+            /* step 13 */
+            counter++;
+            /* "offset = offset + n + 1" */
+
+            /* step 14 */
+            if (counter >= (int)(4 * L))
+                break;
+        }
+        if (seed_in) {
+            ok = 0;
+            DSAerr(DSA_F_DSA_BUILTIN_PARAMGEN2, DSA_R_INVALID_PARAMETERS);
+            goto err;
+        }
+    }
+ end:
+    if (!BN_GENCB_call(cb, 2, 1))
+        goto err;
+
+ g_only:
+
+    /* We now need to generate g */
+    /* Set r0=(p-1)/q */
+    if (!BN_sub(test, p, BN_value_one()))
+        goto err;
+    if (!BN_div(r0, NULL, test, q, ctx))
+        goto err;
+
+    if (idx < 0) {
+        if (!BN_set_word(test, h))
+            goto err;
+    } else
+        h = 1;
+    if (!BN_MONT_CTX_set(mont, p, ctx))
+        goto err;
+
+    for (;;) {
+        static const unsigned char ggen[4] = { 0x67, 0x67, 0x65, 0x6e };
+        if (idx >= 0) {
+            md[0] = idx & 0xff;
+            md[1] = (h >> 8) & 0xff;
+            md[2] = h & 0xff;
+            if (!EVP_DigestInit_ex(&mctx, evpmd, NULL))
+                goto err;
+            if (!EVP_DigestUpdate(&mctx, seed_tmp, seed_len))
+                goto err;
+            if (!EVP_DigestUpdate(&mctx, ggen, sizeof(ggen)))
+                goto err;
+            if (!EVP_DigestUpdate(&mctx, md, 3))
+                goto err;
+            if (!EVP_DigestFinal_ex(&mctx, md, NULL))
+                goto err;
+            if (!BN_bin2bn(md, mdsize, test))
+                goto err;
+        }
+        /* g=test^r0%p */
+        if (!BN_mod_exp_mont(g, test, r0, p, ctx, mont))
+            goto err;
+        if (!BN_is_one(g))
+            break;
+        if (idx < 0 && !BN_add(test, test, BN_value_one()))
+            goto err;
+        h++;
+        if (idx >= 0 && h > 0xffff)
+            goto err;
+    }
+
+    if (!BN_GENCB_call(cb, 3, 1))
+        goto err;
+
+    ok = 1;
+ err:
+    if (ok == 1) {
+        if (p != ret->p) {
+            if (ret->p)
+                BN_free(ret->p);
+            ret->p = BN_dup(p);
+        }
+        if (q != ret->q) {
+            if (ret->q)
+                BN_free(ret->q);
+            ret->q = BN_dup(q);
+        }
+        if (ret->g)
+            BN_free(ret->g);
+        ret->g = BN_dup(g);
+        if (ret->p == NULL || ret->q == NULL || ret->g == NULL) {
+            ok = -1;
+            goto err;
+        }
+        if (counter_ret != NULL)
+            *counter_ret = counter;
+        if (h_ret != NULL)
+            *h_ret = h;
+    }
+    if (seed)
+        OPENSSL_free(seed);
+    if (seed_out != seed_tmp)
+        OPENSSL_free(seed_tmp);
+    if (ctx) {
+        BN_CTX_end(ctx);
+        BN_CTX_free(ctx);
+    }
+    if (mont != NULL)
+        BN_MONT_CTX_free(mont);
+    EVP_MD_CTX_cleanup(&mctx);
+    return ok;
+}
+
+int dsa_paramgen_check_g(DSA *dsa)
+{
+    BN_CTX *ctx;
+    BIGNUM *tmp;
+    BN_MONT_CTX *mont = NULL;
+    int rv = -1;
+    ctx = BN_CTX_new();
+    if (!ctx)
+        return -1;
+    BN_CTX_start(ctx);
+    if (BN_cmp(dsa->g, BN_value_one()) <= 0)
+        return 0;
+    if (BN_cmp(dsa->g, dsa->p) >= 0)
+        return 0;
+    tmp = BN_CTX_get(ctx);
+    if (!tmp)
+        goto err;
+    if ((mont = BN_MONT_CTX_new()) == NULL)
+        goto err;
+    if (!BN_MONT_CTX_set(mont, dsa->p, ctx))
+        goto err;
+    /* Work out g^q mod p */
+    if (!BN_mod_exp_mont(tmp, dsa->g, dsa->q, dsa->p, ctx, mont))
+        goto err;
+    if (!BN_cmp(tmp, BN_value_one()))
+        rv = 1;
+    else
+        rv = 0;
+ err:
+    BN_CTX_end(ctx);
+    if (mont)
+        BN_MONT_CTX_free(mont);
+    BN_CTX_free(ctx);
+    return rv;
+
+}
 #endif
index f32ee96..9c23c3e 100644 (file)
@@ -59,3 +59,11 @@ int dsa_builtin_paramgen(DSA *ret, size_t bits, size_t qbits,
                          size_t seed_len, unsigned char *seed_out,
                          int *counter_ret, unsigned long *h_ret,
                          BN_GENCB *cb);
+
+int dsa_builtin_paramgen2(DSA *ret, size_t L, size_t N,
+                          const EVP_MD *evpmd, const unsigned char *seed_in,
+                          size_t seed_len, int idx, unsigned char *seed_out,
+                          int *counter_ret, unsigned long *h_ret,
+                          BN_GENCB *cb);
+
+int dsa_paramgen_check_g(DSA *dsa);
index 6edb26d..f0ec8fa 100644 (file)
@@ -398,11 +398,7 @@ static int dsa_do_verify(const unsigned char *dgst, int dgst_len,
     ret = (BN_ucmp(&u1, sig->r) == 0);
 
  err:
-    /*
-     * XXX: surely this is wrong - if ret is 0, it just didn't verify; there
-     * is no error in BN. Test should be ret == -1 (Ben)
-     */
-    if (ret != 1)
+    if (ret < 0)
         DSAerr(DSA_F_DSA_DO_VERIFY, ERR_R_BN_LIB);
     if (ctx != NULL)
         BN_CTX_free(ctx);
index 0d480f6..42b8bb0 100644 (file)
@@ -197,6 +197,10 @@ static int pkey_dsa_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2)
         dctx->md = p2;
         return 1;
 
+    case EVP_PKEY_CTRL_GET_MD:
+        *(const EVP_MD **)p2 = dctx->md;
+        return 1;
+
     case EVP_PKEY_CTRL_DIGESTINIT:
     case EVP_PKEY_CTRL_PKCS7_SIGN:
     case EVP_PKEY_CTRL_CMS_SIGN:
index 54c943d..c65234e 100644 (file)
@@ -633,7 +633,6 @@ static int win32_pathbyaddr(void *addr, char *path, int sz)
     CREATETOOLHELP32SNAPSHOT create_snap;
     CLOSETOOLHELP32SNAPSHOT close_snap;
     MODULE32 module_first, module_next;
-    int len;
 
     if (addr == NULL) {
         union {
@@ -694,25 +693,29 @@ static int win32_pathbyaddr(void *addr, char *path, int sz)
             return WideCharToMultiByte(CP_ACP, 0, me32.szExePath, -1,
                                        path, sz, NULL, NULL);
 #  else
-            len = (int)wcslen(me32.szExePath);
-            if (sz <= 0)
-                return len + 1;
-            if (len >= sz)
-                len = sz - 1;
-            for (i = 0; i < len; i++)
-                path[i] = (char)me32.szExePath[i];
-            path[len++] = 0;
-            return len;
+            {
+                int i, len = (int)wcslen(me32.szExePath);
+                if (sz <= 0)
+                    return len + 1;
+                if (len >= sz)
+                    len = sz - 1;
+                for (i = 0; i < len; i++)
+                    path[i] = (char)me32.szExePath[i];
+                path[len++] = 0;
+                return len;
+            }
 #  endif
 # else
-            len = (int)strlen(me32.szExePath);
-            if (sz <= 0)
-                return len + 1;
-            if (len >= sz)
-                len = sz - 1;
-            memcpy(path, me32.szExePath, len);
-            path[len++] = 0;
-            return len;
+            {
+                int len = (int)strlen(me32.szExePath);
+                if (sz <= 0)
+                    return len + 1;
+                if (len >= sz)
+                    len = sz - 1;
+                memcpy(path, me32.szExePath, len);
+                path[len++] = 0;
+                return len;
+            }
 # endif
         }
     } while ((*module_next) (hModuleSnap, &me32));
index 4b7652c..fd6df92 100644 (file)
@@ -3,7 +3,7 @@
 #ifndef CHARSET_EBCDIC
 
 # include <openssl/e_os2.h>
-# if defined(PEDANTIC) || defined(__DECC) || defined(OPENSSL_SYS_MACOSX)
+# if defined(PEDANTIC) || defined(__DECC) || defined(OPENSSL_SYS_MACOSX) || defined(__clang__)
 static void *dummy = &dummy;
 # endif
 
index 2753b28..359ef4e 100644 (file)
@@ -11,6 +11,8 @@ MAKEFILE=     Makefile
 AR=            ar r
 
 CFLAGS= $(INCLUDES) $(CFLAG)
+ASFLAGS= $(INCLUDES) $(ASFLAG)
+AFLAGS= $(ASFLAGS)
 
 GENERAL=Makefile
 TEST=ectest.c
@@ -27,7 +29,7 @@ LIBOBJ=       ec_lib.o ecp_smpl.o ecp_mont.o ecp_nist.o ec_cvt.o ec_mult.o\
        ec_err.o ec_curve.o ec_check.o ec_print.o ec_asn1.o ec_key.o\
        ec2_smpl.o ec2_mult.o ec_ameth.o ec_pmeth.o eck_prn.o \
        ecp_nistp224.o ecp_nistp256.o ecp_nistp521.o ecp_nistputil.o \
-       ecp_oct.o ec2_oct.o ec_oct.o
+       ecp_oct.o ec2_oct.o ec_oct.o $(EC_ASM)
 
 SRC= $(LIBSRC)
 
@@ -46,6 +48,12 @@ lib: $(LIBOBJ)
        $(RANLIB) $(LIB) || echo Never mind.
        @touch lib
 
+ecp_nistz256-x86_64.s: asm/ecp_nistz256-x86_64.pl
+       $(PERL) asm/ecp_nistz256-x86_64.pl $(PERLASM_SCHEME) > $@
+
+ecp_nistz256-avx2.s:   asm/ecp_nistz256-avx2.pl
+       $(PERL) asm/ecp_nistz256-avx2.pl $(PERLASM_SCHEME) > $@
+
 files:
        $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
 
@@ -110,14 +118,14 @@ ec2_smpl.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
 ec2_smpl.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
 ec2_smpl.o: ../../include/openssl/symhacks.h ec2_smpl.c ec_lcl.h
 ec_ameth.o: ../../e_os.h ../../include/openssl/asn1.h
-ec_ameth.o: ../../include/openssl/bio.h ../../include/openssl/bn.h
-ec_ameth.o: ../../include/openssl/buffer.h ../../include/openssl/cms.h
-ec_ameth.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
-ec_ameth.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h
-ec_ameth.o: ../../include/openssl/ecdsa.h ../../include/openssl/err.h
-ec_ameth.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h
-ec_ameth.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
-ec_ameth.o: ../../include/openssl/opensslconf.h
+ec_ameth.o: ../../include/openssl/asn1t.h ../../include/openssl/bio.h
+ec_ameth.o: ../../include/openssl/bn.h ../../include/openssl/buffer.h
+ec_ameth.o: ../../include/openssl/cms.h ../../include/openssl/crypto.h
+ec_ameth.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
+ec_ameth.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h
+ec_ameth.o: ../../include/openssl/err.h ../../include/openssl/evp.h
+ec_ameth.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
+ec_ameth.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
 ec_ameth.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
 ec_ameth.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
 ec_ameth.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
@@ -198,18 +206,19 @@ ec_oct.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
 ec_oct.o: ../../include/openssl/symhacks.h ec_lcl.h ec_oct.c
 ec_pmeth.o: ../../e_os.h ../../include/openssl/asn1.h
 ec_pmeth.o: ../../include/openssl/asn1t.h ../../include/openssl/bio.h
-ec_pmeth.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
-ec_pmeth.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
-ec_pmeth.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h
-ec_pmeth.o: ../../include/openssl/err.h ../../include/openssl/evp.h
-ec_pmeth.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
-ec_pmeth.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
+ec_pmeth.o: ../../include/openssl/bn.h ../../include/openssl/buffer.h
+ec_pmeth.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
+ec_pmeth.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h
+ec_pmeth.o: ../../include/openssl/ecdsa.h ../../include/openssl/err.h
+ec_pmeth.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h
+ec_pmeth.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
+ec_pmeth.o: ../../include/openssl/opensslconf.h
 ec_pmeth.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
 ec_pmeth.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
 ec_pmeth.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
 ec_pmeth.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h
 ec_pmeth.o: ../../include/openssl/x509_vfy.h ../cryptlib.h ../evp/evp_locl.h
-ec_pmeth.o: ec_pmeth.c
+ec_pmeth.o: ec_lcl.h ec_pmeth.c
 ec_print.o: ../../include/openssl/asn1.h ../../include/openssl/bio.h
 ec_print.o: ../../include/openssl/bn.h ../../include/openssl/crypto.h
 ec_print.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
diff --git a/crypto/ec/asm/ecp_nistz256-avx2.pl b/crypto/ec/asm/ecp_nistz256-avx2.pl
new file mode 100755 (executable)
index 0000000..4c220aa
--- /dev/null
@@ -0,0 +1,2093 @@
+#!/usr/bin/env perl
+
+##############################################################################
+#                                                                            #
+# Copyright 2014 Intel Corporation                                           #
+#                                                                            #
+# Licensed under the Apache License, Version 2.0 (the "License");            #
+# you may not use this file except in compliance with the License.           #
+# You may obtain a copy of the License at                                    #
+#                                                                            #
+#    http://www.apache.org/licenses/LICENSE-2.0                              #
+#                                                                            #
+# Unless required by applicable law or agreed to in writing, software        #
+# distributed under the License is distributed on an "AS IS" BASIS,          #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   #
+# See the License for the specific language governing permissions and        #
+# limitations under the License.                                             #
+#                                                                            #
+##############################################################################
+#                                                                            #
+#  Developers and authors:                                                   #
+#  Shay Gueron (1, 2), and Vlad Krasnov (1)                                  #
+#  (1) Intel Corporation, Israel Development Center                          #
+#  (2) University of Haifa                                                   #
+#  Reference:                                                                #
+#  S.Gueron and V.Krasnov, "Fast Prime Field Elliptic Curve Cryptography with#
+#                           256 Bit Primes"                                  #
+#                                                                            #
+##############################################################################
+
+$flavour = shift;
+$output  = shift;
+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
+
+$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
+die "can't locate x86_64-xlate.pl";
+
+open OUT,"| \"$^X\" $xlate $flavour $output";
+*STDOUT=*OUT;
+
+if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
+               =~ /GNU assembler version ([2-9]\.[0-9]+)/) {
+       $avx = ($1>=2.19) + ($1>=2.22);
+       $addx = ($1>=2.23);
+}
+
+if (!$addx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
+           `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) {
+       $avx = ($1>=2.09) + ($1>=2.10);
+       $addx = ($1>=2.10);
+}
+
+if (!$addx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
+           `ml64 2>&1` =~ /Version ([0-9]+)\./) {
+       $avx = ($1>=10) + ($1>=11);
+       $addx = ($1>=12);
+}
+
+if (!$addx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9])\.([0-9]+)/) {
+       my $ver = $2 + $3/100.0;        # 3.1->3.01, 3.10->3.10
+       $avx = ($ver>=3.0) + ($ver>=3.01);
+       $addx = ($ver>=3.03);
+}
+
+if ($avx>=2) {{
+$digit_size = "\$29";
+$n_digits = "\$9";
+
+$code.=<<___;
+.text
+
+.align 64
+.LAVX2_AND_MASK:
+.LAVX2_POLY:
+.quad 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff
+.quad 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff
+.quad 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff
+.quad 0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff
+.quad 0x00000000, 0x00000000, 0x00000000, 0x00000000
+.quad 0x00000000, 0x00000000, 0x00000000, 0x00000000
+.quad 0x00040000, 0x00040000, 0x00040000, 0x00040000
+.quad 0x1fe00000, 0x1fe00000, 0x1fe00000, 0x1fe00000
+.quad 0x00ffffff, 0x00ffffff, 0x00ffffff, 0x00ffffff
+
+.LAVX2_POLY_x2:
+.quad 0x7FFFFFFC, 0x7FFFFFFC, 0x7FFFFFFC, 0x7FFFFFFC
+.quad 0x7FFFFFFC, 0x7FFFFFFC, 0x7FFFFFFC, 0x7FFFFFFC
+.quad 0x7FFFFFFC, 0x7FFFFFFC, 0x7FFFFFFC, 0x7FFFFFFC
+.quad 0x400007FC, 0x400007FC, 0x400007FC, 0x400007FC
+.quad 0x3FFFFFFE, 0x3FFFFFFE, 0x3FFFFFFE, 0x3FFFFFFE
+.quad 0x3FFFFFFE, 0x3FFFFFFE, 0x3FFFFFFE, 0x3FFFFFFE
+.quad 0x400FFFFE, 0x400FFFFE, 0x400FFFFE, 0x400FFFFE
+.quad 0x7F7FFFFE, 0x7F7FFFFE, 0x7F7FFFFE, 0x7F7FFFFE
+.quad 0x03FFFFFC, 0x03FFFFFC, 0x03FFFFFC, 0x03FFFFFC
+
+.LAVX2_POLY_x8:
+.quad 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8
+.quad 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8
+.quad 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8
+.quad 0x80000FF8, 0x80000FF8, 0x80000FF8, 0x80000FF8
+.quad 0x7FFFFFFC, 0x7FFFFFFC, 0x7FFFFFFC, 0x7FFFFFFC
+.quad 0x7FFFFFFC, 0x7FFFFFFC, 0x7FFFFFFC, 0x7FFFFFFC
+.quad 0x801FFFFC, 0x801FFFFC, 0x801FFFFC, 0x801FFFFC
+.quad 0xFEFFFFFC, 0xFEFFFFFC, 0xFEFFFFFC, 0xFEFFFFFC
+.quad 0x07FFFFF8, 0x07FFFFF8, 0x07FFFFF8, 0x07FFFFF8
+
+.LONE:
+.quad 0x00000020, 0x00000020, 0x00000020, 0x00000020
+.quad 0x00000000, 0x00000000, 0x00000000, 0x00000000
+.quad 0x00000000, 0x00000000, 0x00000000, 0x00000000
+.quad 0x1fffc000, 0x1fffc000, 0x1fffc000, 0x1fffc000
+.quad 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff
+.quad 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff
+.quad 0x1f7fffff, 0x1f7fffff, 0x1f7fffff, 0x1f7fffff
+.quad 0x03ffffff, 0x03ffffff, 0x03ffffff, 0x03ffffff
+.quad 0x00000000, 0x00000000, 0x00000000, 0x00000000
+
+# RR = 2^266 mod p in AVX2 format, to transform from the native OpenSSL
+# Montgomery form (*2^256) to our format (*2^261)
+
+.LTO_MONT_AVX2:
+.quad 0x00000400, 0x00000400, 0x00000400, 0x00000400
+.quad 0x00000000, 0x00000000, 0x00000000, 0x00000000
+.quad 0x00000000, 0x00000000, 0x00000000, 0x00000000
+.quad 0x1ff80000, 0x1ff80000, 0x1ff80000, 0x1ff80000
+.quad 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff
+.quad 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff
+.quad 0x0fffffff, 0x0fffffff, 0x0fffffff, 0x0fffffff
+.quad 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff
+.quad 0x00000003, 0x00000003, 0x00000003, 0x00000003
+
+.LFROM_MONT_AVX2:
+.quad 0x00000001, 0x00000001, 0x00000001, 0x00000001
+.quad 0x00000000, 0x00000000, 0x00000000, 0x00000000
+.quad 0x00000000, 0x00000000, 0x00000000, 0x00000000
+.quad 0x1ffffe00, 0x1ffffe00, 0x1ffffe00, 0x1ffffe00
+.quad 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff
+.quad 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff
+.quad 0x1ffbffff, 0x1ffbffff, 0x1ffbffff, 0x1ffbffff
+.quad 0x001fffff, 0x001fffff, 0x001fffff, 0x001fffff
+.quad 0x00000000, 0x00000000, 0x00000000, 0x00000000
+
+.LIntOne:
+.long 1,1,1,1,1,1,1,1
+___
+
+{
+# This function recieves a pointer to an array of four affine points
+# (X, Y, <1>) and rearanges the data for AVX2 execution, while
+# converting it to 2^29 radix redundant form
+
+my ($X0,$X1,$X2,$X3, $Y0,$Y1,$Y2,$Y3,
+    $T0,$T1,$T2,$T3, $T4,$T5,$T6,$T7)=map("%ymm$_",(0..15));
+
+$code.=<<___;
+.globl ecp_nistz256_avx2_transpose_convert
+.type  ecp_nistz256_avx2_transpose_convert,\@function,2
+.align 64
+ecp_nistz256_avx2_transpose_convert:
+       vzeroupper
+___
+$code.=<<___   if ($win64);
+       lea     -8-16*10(%rsp), %rsp
+       vmovaps %xmm6, -8-16*10(%rax)
+       vmovaps %xmm7, -8-16*9(%rax)
+       vmovaps %xmm8, -8-16*8(%rax)
+       vmovaps %xmm9, -8-16*7(%rax)
+       vmovaps %xmm10, -8-16*6(%rax)
+       vmovaps %xmm11, -8-16*5(%rax)
+       vmovaps %xmm12, -8-16*4(%rax)
+       vmovaps %xmm13, -8-16*3(%rax)
+       vmovaps %xmm14, -8-16*2(%rax)
+       vmovaps %xmm15, -8-16*1(%rax)
+___
+$code.=<<___;
+       # Load the data
+       vmovdqa         32*0(%rsi), $X0
+       lea             112(%rsi), %rax         # size optimization
+       vmovdqa         32*1(%rsi), $Y0
+       lea             .LAVX2_AND_MASK(%rip), %rdx
+       vmovdqa         32*2(%rsi), $X1
+       vmovdqa         32*3(%rsi), $Y1
+       vmovdqa         32*4-112(%rax), $X2
+       vmovdqa         32*5-112(%rax), $Y2
+       vmovdqa         32*6-112(%rax), $X3
+       vmovdqa         32*7-112(%rax), $Y3
+
+       # Transpose X and Y independently
+       vpunpcklqdq     $X1, $X0, $T0           # T0 = [B2 A2 B0 A0]
+       vpunpcklqdq     $X3, $X2, $T1           # T1 = [D2 C2 D0 C0]
+       vpunpckhqdq     $X1, $X0, $T2           # T2 = [B3 A3 B1 A1]
+       vpunpckhqdq     $X3, $X2, $T3           # T3 = [D3 C3 D1 C1]
+
+       vpunpcklqdq     $Y1, $Y0, $T4
+       vpunpcklqdq     $Y3, $Y2, $T5
+       vpunpckhqdq     $Y1, $Y0, $T6
+       vpunpckhqdq     $Y3, $Y2, $T7
+
+       vperm2i128      \$0x20, $T1, $T0, $X0   # X0 = [D0 C0 B0 A0]
+       vperm2i128      \$0x20, $T3, $T2, $X1   # X1 = [D1 C1 B1 A1]
+       vperm2i128      \$0x31, $T1, $T0, $X2   # X2 = [D2 C2 B2 A2]
+       vperm2i128      \$0x31, $T3, $T2, $X3   # X3 = [D3 C3 B3 A3]
+
+       vperm2i128      \$0x20, $T5, $T4, $Y0
+       vperm2i128      \$0x20, $T7, $T6, $Y1
+       vperm2i128      \$0x31, $T5, $T4, $Y2
+       vperm2i128      \$0x31, $T7, $T6, $Y3
+       vmovdqa         (%rdx), $T7
+
+       vpand           (%rdx), $X0, $T0        # out[0] = in[0] & mask;
+       vpsrlq          \$29, $X0, $X0
+       vpand           $T7, $X0, $T1           # out[1] = (in[0] >> shift) & mask;
+       vpsrlq          \$29, $X0, $X0
+       vpsllq          \$6, $X1, $T2
+       vpxor           $X0, $T2, $T2
+       vpand           $T7, $T2, $T2           # out[2] = ((in[0] >> (shift*2)) ^ (in[1] << (64-shift*2))) & mask;
+       vpsrlq          \$23, $X1, $X1
+       vpand           $T7, $X1, $T3           # out[3] = (in[1] >> ((shift*3)%64)) & mask;
+       vpsrlq          \$29, $X1, $X1
+       vpsllq          \$12, $X2, $T4
+       vpxor           $X1, $T4, $T4
+       vpand           $T7, $T4, $T4           # out[4] = ((in[1] >> ((shift*4)%64)) ^ (in[2] << (64*2-shift*4))) & mask;
+       vpsrlq          \$17, $X2, $X2
+       vpand           $T7, $X2, $T5           # out[5] = (in[2] >> ((shift*5)%64)) & mask;
+       vpsrlq          \$29, $X2, $X2
+       vpsllq          \$18, $X3, $T6
+       vpxor           $X2, $T6, $T6
+       vpand           $T7, $T6, $T6           # out[6] = ((in[2] >> ((shift*6)%64)) ^ (in[3] << (64*3-shift*6))) & mask;
+       vpsrlq          \$11, $X3, $X3
+        vmovdqa        $T0, 32*0(%rdi)
+        lea            112(%rdi), %rax         # size optimization
+       vpand           $T7, $X3, $T0           # out[7] = (in[3] >> ((shift*7)%64)) & mask;
+       vpsrlq          \$29, $X3, $X3          # out[8] = (in[3] >> ((shift*8)%64)) & mask;
+
+       vmovdqa         $T1, 32*1(%rdi)
+       vmovdqa         $T2, 32*2(%rdi)
+       vmovdqa         $T3, 32*3(%rdi)
+       vmovdqa         $T4, 32*4-112(%rax)
+       vmovdqa         $T5, 32*5-112(%rax)
+       vmovdqa         $T6, 32*6-112(%rax)
+       vmovdqa         $T0, 32*7-112(%rax)
+       vmovdqa         $X3, 32*8-112(%rax)
+       lea             448(%rdi), %rax         # size optimization
+
+       vpand           $T7, $Y0, $T0           # out[0] = in[0] & mask;
+       vpsrlq          \$29, $Y0, $Y0
+       vpand           $T7, $Y0, $T1           # out[1] = (in[0] >> shift) & mask;
+       vpsrlq          \$29, $Y0, $Y0
+       vpsllq          \$6, $Y1, $T2
+       vpxor           $Y0, $T2, $T2
+       vpand           $T7, $T2, $T2           # out[2] = ((in[0] >> (shift*2)) ^ (in[1] << (64-shift*2))) & mask;
+       vpsrlq          \$23, $Y1, $Y1
+       vpand           $T7, $Y1, $T3           # out[3] = (in[1] >> ((shift*3)%64)) & mask;
+       vpsrlq          \$29, $Y1, $Y1
+       vpsllq          \$12, $Y2, $T4
+       vpxor           $Y1, $T4, $T4
+       vpand           $T7, $T4, $T4           # out[4] = ((in[1] >> ((shift*4)%64)) ^ (in[2] << (64*2-shift*4))) & mask;
+       vpsrlq          \$17, $Y2, $Y2
+       vpand           $T7, $Y2, $T5           # out[5] = (in[2] >> ((shift*5)%64)) & mask;
+       vpsrlq          \$29, $Y2, $Y2
+       vpsllq          \$18, $Y3, $T6
+       vpxor           $Y2, $T6, $T6
+       vpand           $T7, $T6, $T6           # out[6] = ((in[2] >> ((shift*6)%64)) ^ (in[3] << (64*3-shift*6))) & mask;
+       vpsrlq          \$11, $Y3, $Y3
+        vmovdqa        $T0, 32*9-448(%rax)
+       vpand           $T7, $Y3, $T0           # out[7] = (in[3] >> ((shift*7)%64)) & mask;
+       vpsrlq          \$29, $Y3, $Y3          # out[8] = (in[3] >> ((shift*8)%64)) & mask;
+
+       vmovdqa         $T1, 32*10-448(%rax)
+       vmovdqa         $T2, 32*11-448(%rax)
+       vmovdqa         $T3, 32*12-448(%rax)
+       vmovdqa         $T4, 32*13-448(%rax)
+       vmovdqa         $T5, 32*14-448(%rax)
+       vmovdqa         $T6, 32*15-448(%rax)
+       vmovdqa         $T0, 32*16-448(%rax)
+       vmovdqa         $Y3, 32*17-448(%rax)
+
+       vzeroupper
+___
+$code.=<<___   if ($win64);
+       movaps  16*0(%rsp), %xmm6
+       movaps  16*1(%rsp), %xmm7
+       movaps  16*2(%rsp), %xmm8
+       movaps  16*3(%rsp), %xmm9
+       movaps  16*4(%rsp), %xmm10
+       movaps  16*5(%rsp), %xmm11
+       movaps  16*6(%rsp), %xmm12
+       movaps  16*7(%rsp), %xmm13
+       movaps  16*8(%rsp), %xmm14
+       movaps  16*9(%rsp), %xmm15
+       lea     8+16*10(%rsp), %rsp
+___
+$code.=<<___;
+       ret
+.size  ecp_nistz256_avx2_transpose_convert,.-ecp_nistz256_avx2_transpose_convert
+___
+}
+{
+################################################################################
+# This function recieves a pointer to an array of four AVX2 formatted points
+# (X, Y, Z) convert the data to normal representation, and rearanges the data
+
+my ($D0,$D1,$D2,$D3, $D4,$D5,$D6,$D7, $D8)=map("%ymm$_",(0..8));
+my ($T0,$T1,$T2,$T3, $T4,$T5,$T6)=map("%ymm$_",(9..15));
+
+$code.=<<___;
+
+.globl ecp_nistz256_avx2_convert_transpose_back
+.type  ecp_nistz256_avx2_convert_transpose_back,\@function,2
+.align 32
+ecp_nistz256_avx2_convert_transpose_back:
+       vzeroupper
+___
+$code.=<<___   if ($win64);
+       lea     -8-16*10(%rsp), %rsp
+       vmovaps %xmm6, -8-16*10(%rax)
+       vmovaps %xmm7, -8-16*9(%rax)
+       vmovaps %xmm8, -8-16*8(%rax)
+       vmovaps %xmm9, -8-16*7(%rax)
+       vmovaps %xmm10, -8-16*6(%rax)
+       vmovaps %xmm11, -8-16*5(%rax)
+       vmovaps %xmm12, -8-16*4(%rax)
+       vmovaps %xmm13, -8-16*3(%rax)
+       vmovaps %xmm14, -8-16*2(%rax)
+       vmovaps %xmm15, -8-16*1(%rax)
+___
+$code.=<<___;
+       mov     \$3, %ecx
+
+.Lconv_loop:
+       vmovdqa         32*0(%rsi), $D0
+       lea             160(%rsi), %rax         # size optimization
+       vmovdqa         32*1(%rsi), $D1
+       vmovdqa         32*2(%rsi), $D2
+       vmovdqa         32*3(%rsi), $D3
+       vmovdqa         32*4-160(%rax), $D4
+       vmovdqa         32*5-160(%rax), $D5
+       vmovdqa         32*6-160(%rax), $D6
+       vmovdqa         32*7-160(%rax), $D7
+       vmovdqa         32*8-160(%rax), $D8
+
+       vpsllq          \$29, $D1, $D1
+       vpsllq          \$58, $D2, $T0
+       vpaddq          $D1, $D0, $D0
+       vpaddq          $T0, $D0, $D0           # out[0] = (in[0]) ^ (in[1] << shift*1) ^ (in[2] << shift*2);
+
+       vpsrlq          \$6, $D2, $D2
+       vpsllq          \$23, $D3, $D3
+       vpsllq          \$52, $D4, $T1
+       vpaddq          $D2, $D3, $D3
+       vpaddq          $D3, $T1, $D1           # out[1] = (in[2] >> (64*1-shift*2)) ^ (in[3] << shift*3%64) ^ (in[4] << shift*4%64);
+
+       vpsrlq          \$12, $D4, $D4
+       vpsllq          \$17, $D5, $D5
+       vpsllq          \$46, $D6, $T2
+       vpaddq          $D4, $D5, $D5
+       vpaddq          $D5, $T2, $D2           # out[2] = (in[4] >> (64*2-shift*4)) ^ (in[5] << shift*5%64) ^ (in[6] << shift*6%64);
+
+       vpsrlq          \$18, $D6, $D6
+       vpsllq          \$11, $D7, $D7
+       vpsllq          \$40, $D8, $T3
+       vpaddq          $D6, $D7, $D7
+       vpaddq          $D7, $T3, $D3           # out[3] = (in[6] >> (64*3-shift*6)) ^ (in[7] << shift*7%64) ^ (in[8] << shift*8%64);
+
+       vpunpcklqdq     $D1, $D0, $T0           # T0 = [B2 A2 B0 A0]
+       vpunpcklqdq     $D3, $D2, $T1           # T1 = [D2 C2 D0 C0]
+       vpunpckhqdq     $D1, $D0, $T2           # T2 = [B3 A3 B1 A1]
+       vpunpckhqdq     $D3, $D2, $T3           # T3 = [D3 C3 D1 C1]
+
+       vperm2i128      \$0x20, $T1, $T0, $D0   # X0 = [D0 C0 B0 A0]
+       vperm2i128      \$0x20, $T3, $T2, $D1   # X1 = [D1 C1 B1 A1]
+       vperm2i128      \$0x31, $T1, $T0, $D2   # X2 = [D2 C2 B2 A2]
+       vperm2i128      \$0x31, $T3, $T2, $D3   # X3 = [D3 C3 B3 A3]
+
+       vmovdqa         $D0, 32*0(%rdi)
+       vmovdqa         $D1, 32*3(%rdi)
+       vmovdqa         $D2, 32*6(%rdi)
+       vmovdqa         $D3, 32*9(%rdi)
+
+       lea             32*9(%rsi), %rsi
+       lea             32*1(%rdi), %rdi
+
+       dec     %ecx
+       jnz     .Lconv_loop
+
+       vzeroupper
+___
+$code.=<<___   if ($win64);
+       movaps  16*0(%rsp), %xmm6
+       movaps  16*1(%rsp), %xmm7
+       movaps  16*2(%rsp), %xmm8
+       movaps  16*3(%rsp), %xmm9
+       movaps  16*4(%rsp), %xmm10
+       movaps  16*5(%rsp), %xmm11
+       movaps  16*6(%rsp), %xmm12
+       movaps  16*7(%rsp), %xmm13
+       movaps  16*8(%rsp), %xmm14
+       movaps  16*9(%rsp), %xmm15
+       lea     8+16*10(%rsp), %rsp
+___
+$code.=<<___;
+       ret
+.size  ecp_nistz256_avx2_convert_transpose_back,.-ecp_nistz256_avx2_convert_transpose_back
+___
+}
+{
+my ($r_ptr,$a_ptr,$b_ptr,$itr)=("%rdi","%rsi","%rdx","%ecx");
+my ($ACC0,$ACC1,$ACC2,$ACC3,$ACC4,$ACC5,$ACC6,$ACC7,$ACC8)=map("%ymm$_",(0..8));
+my ($B,$Y,$T0,$AND_MASK,$OVERFLOW)=map("%ymm$_",(9..13));
+
+sub NORMALIZE {
+my $ret=<<___;
+       vpsrlq          $digit_size, $ACC0, $T0
+       vpand           $AND_MASK, $ACC0, $ACC0
+       vpaddq          $T0, $ACC1, $ACC1
+
+       vpsrlq          $digit_size, $ACC1, $T0
+       vpand           $AND_MASK, $ACC1, $ACC1
+       vpaddq          $T0, $ACC2, $ACC2
+
+       vpsrlq          $digit_size, $ACC2, $T0
+       vpand           $AND_MASK, $ACC2, $ACC2
+       vpaddq          $T0, $ACC3, $ACC3
+
+       vpsrlq          $digit_size, $ACC3, $T0
+       vpand           $AND_MASK, $ACC3, $ACC3
+       vpaddq          $T0, $ACC4, $ACC4
+
+       vpsrlq          $digit_size, $ACC4, $T0
+       vpand           $AND_MASK, $ACC4, $ACC4
+       vpaddq          $T0, $ACC5, $ACC5
+
+       vpsrlq          $digit_size, $ACC5, $T0
+       vpand           $AND_MASK, $ACC5, $ACC5
+       vpaddq          $T0, $ACC6, $ACC6
+
+       vpsrlq          $digit_size, $ACC6, $T0
+       vpand           $AND_MASK, $ACC6, $ACC6
+       vpaddq          $T0, $ACC7, $ACC7
+
+       vpsrlq          $digit_size, $ACC7, $T0
+       vpand           $AND_MASK, $ACC7, $ACC7
+       vpaddq          $T0, $ACC8, $ACC8
+       #vpand          $AND_MASK, $ACC8, $ACC8
+___
+    $ret;
+}
+
+sub STORE {
+my $ret=<<___;
+       vmovdqa         $ACC0, 32*0(%rdi)
+       lea             160(%rdi), %rax         # size optimization
+       vmovdqa         $ACC1, 32*1(%rdi)
+       vmovdqa         $ACC2, 32*2(%rdi)
+       vmovdqa         $ACC3, 32*3(%rdi)
+       vmovdqa         $ACC4, 32*4-160(%rax)
+       vmovdqa         $ACC5, 32*5-160(%rax)
+       vmovdqa         $ACC6, 32*6-160(%rax)
+       vmovdqa         $ACC7, 32*7-160(%rax)
+       vmovdqa         $ACC8, 32*8-160(%rax)
+___
+    $ret;
+}
+
+$code.=<<___;
+.type  avx2_normalize,\@abi-omnipotent
+.align 32
+avx2_normalize:
+       vpsrlq          $digit_size, $ACC0, $T0
+       vpand           $AND_MASK, $ACC0, $ACC0
+       vpaddq          $T0, $ACC1, $ACC1
+
+       vpsrlq          $digit_size, $ACC1, $T0
+       vpand           $AND_MASK, $ACC1, $ACC1
+       vpaddq          $T0, $ACC2, $ACC2
+
+       vpsrlq          $digit_size, $ACC2, $T0
+       vpand           $AND_MASK, $ACC2, $ACC2
+       vpaddq          $T0, $ACC3, $ACC3
+
+       vpsrlq          $digit_size, $ACC3, $T0
+       vpand           $AND_MASK, $ACC3, $ACC3
+       vpaddq          $T0, $ACC4, $ACC4
+
+       vpsrlq          $digit_size, $ACC4, $T0
+       vpand           $AND_MASK, $ACC4, $ACC4
+       vpaddq          $T0, $ACC5, $ACC5
+
+       vpsrlq          $digit_size, $ACC5, $T0
+       vpand           $AND_MASK, $ACC5, $ACC5
+       vpaddq          $T0, $ACC6, $ACC6
+
+       vpsrlq          $digit_size, $ACC6, $T0
+       vpand           $AND_MASK, $ACC6, $ACC6
+       vpaddq          $T0, $ACC7, $ACC7
+
+       vpsrlq          $digit_size, $ACC7, $T0
+       vpand           $AND_MASK, $ACC7, $ACC7
+       vpaddq          $T0, $ACC8, $ACC8
+       #vpand          $AND_MASK, $ACC8, $ACC8
+
+       ret
+.size  avx2_normalize,.-avx2_normalize
+
+.type  avx2_normalize_n_store,\@abi-omnipotent
+.align 32
+avx2_normalize_n_store:
+       vpsrlq          $digit_size, $ACC0, $T0
+       vpand           $AND_MASK, $ACC0, $ACC0
+       vpaddq          $T0, $ACC1, $ACC1
+
+       vpsrlq          $digit_size, $ACC1, $T0
+       vpand           $AND_MASK, $ACC1, $ACC1
+        vmovdqa        $ACC0, 32*0(%rdi)
+        lea            160(%rdi), %rax         # size optimization
+       vpaddq          $T0, $ACC2, $ACC2
+
+       vpsrlq          $digit_size, $ACC2, $T0
+       vpand           $AND_MASK, $ACC2, $ACC2
+        vmovdqa        $ACC1, 32*1(%rdi)
+       vpaddq          $T0, $ACC3, $ACC3
+
+       vpsrlq          $digit_size, $ACC3, $T0
+       vpand           $AND_MASK, $ACC3, $ACC3
+        vmovdqa        $ACC2, 32*2(%rdi)
+       vpaddq          $T0, $ACC4, $ACC4
+
+       vpsrlq          $digit_size, $ACC4, $T0
+       vpand           $AND_MASK, $ACC4, $ACC4
+        vmovdqa        $ACC3, 32*3(%rdi)
+       vpaddq          $T0, $ACC5, $ACC5
+
+       vpsrlq          $digit_size, $ACC5, $T0
+       vpand           $AND_MASK, $ACC5, $ACC5
+        vmovdqa        $ACC4, 32*4-160(%rax)
+       vpaddq          $T0, $ACC6, $ACC6
+
+       vpsrlq          $digit_size, $ACC6, $T0
+       vpand           $AND_MASK, $ACC6, $ACC6
+        vmovdqa        $ACC5, 32*5-160(%rax)
+       vpaddq          $T0, $ACC7, $ACC7
+
+       vpsrlq          $digit_size, $ACC7, $T0
+       vpand           $AND_MASK, $ACC7, $ACC7
+        vmovdqa        $ACC6, 32*6-160(%rax)
+       vpaddq          $T0, $ACC8, $ACC8
+       #vpand          $AND_MASK, $ACC8, $ACC8
+        vmovdqa        $ACC7, 32*7-160(%rax)
+        vmovdqa        $ACC8, 32*8-160(%rax)
+
+       ret
+.size  avx2_normalize_n_store,.-avx2_normalize_n_store
+
+################################################################################
+# void avx2_mul_x4(void* RESULTx4, void *Ax4, void *Bx4);
+.type  avx2_mul_x4,\@abi-omnipotent
+.align 32
+avx2_mul_x4:
+       lea     .LAVX2_POLY(%rip), %rax
+
+       vpxor   $ACC0, $ACC0, $ACC0
+       vpxor   $ACC1, $ACC1, $ACC1
+       vpxor   $ACC2, $ACC2, $ACC2
+       vpxor   $ACC3, $ACC3, $ACC3
+       vpxor   $ACC4, $ACC4, $ACC4
+       vpxor   $ACC5, $ACC5, $ACC5
+       vpxor   $ACC6, $ACC6, $ACC6
+       vpxor   $ACC7, $ACC7, $ACC7
+
+       vmovdqa 32*7(%rax), %ymm14
+       vmovdqa 32*8(%rax), %ymm15
+
+       mov     $n_digits, $itr
+       lea     -512($a_ptr), $a_ptr    # strategic bias to control u-op density
+       jmp     .Lavx2_mul_x4_loop
+
+.align 32
+.Lavx2_mul_x4_loop:
+       vmovdqa         32*0($b_ptr), $B
+       lea             32*1($b_ptr), $b_ptr
+
+       vpmuludq        32*0+512($a_ptr), $B, $T0
+       vpmuludq        32*1+512($a_ptr), $B, $OVERFLOW # borrow $OVERFLOW
+       vpaddq          $T0, $ACC0, $ACC0
+       vpmuludq        32*2+512($a_ptr), $B, $T0
+       vpaddq          $OVERFLOW, $ACC1, $ACC1
+        vpand          $AND_MASK, $ACC0, $Y
+       vpmuludq        32*3+512($a_ptr), $B, $OVERFLOW
+       vpaddq          $T0, $ACC2, $ACC2
+       vpmuludq        32*4+512($a_ptr), $B, $T0
+       vpaddq          $OVERFLOW, $ACC3, $ACC3
+       vpmuludq        32*5+512($a_ptr), $B, $OVERFLOW
+       vpaddq          $T0, $ACC4, $ACC4
+       vpmuludq        32*6+512($a_ptr), $B, $T0
+       vpaddq          $OVERFLOW, $ACC5, $ACC5
+       vpmuludq        32*7+512($a_ptr), $B, $OVERFLOW
+       vpaddq          $T0, $ACC6, $ACC6
+
+       # Skip some multiplications, optimizing for the constant poly
+       vpmuludq        $AND_MASK, $Y, $T0
+        vpaddq         $OVERFLOW, $ACC7, $ACC7
+        vpmuludq       32*8+512($a_ptr), $B, $ACC8
+       vpaddq          $T0, $ACC0, $OVERFLOW
+       vpaddq          $T0, $ACC1, $ACC0
+       vpsrlq          $digit_size, $OVERFLOW, $OVERFLOW
+       vpaddq          $T0, $ACC2, $ACC1
+       vpmuludq        32*3(%rax), $Y, $T0
+       vpaddq          $OVERFLOW, $ACC0, $ACC0
+       vpaddq          $T0, $ACC3, $ACC2
+       .byte           0x67
+       vmovdqa         $ACC4, $ACC3
+       vpsllq          \$18, $Y, $OVERFLOW
+       .byte           0x67
+       vmovdqa         $ACC5, $ACC4
+       vpmuludq        %ymm14, $Y, $T0
+       vpaddq          $OVERFLOW, $ACC6, $ACC5
+       vpmuludq        %ymm15, $Y, $OVERFLOW
+       vpaddq          $T0, $ACC7, $ACC6
+       vpaddq          $OVERFLOW, $ACC8, $ACC7
+
+       dec     $itr
+       jnz     .Lavx2_mul_x4_loop
+
+       vpxor   $ACC8, $ACC8, $ACC8
+
+       ret
+.size  avx2_mul_x4,.-avx2_mul_x4
+
+# Function optimized for the constant 1
+################################################################################
+# void avx2_mul_by1_x4(void* RESULTx4, void *Ax4);
+.type  avx2_mul_by1_x4,\@abi-omnipotent
+.align 32
+avx2_mul_by1_x4:
+       lea     .LAVX2_POLY(%rip), %rax
+
+       vpxor   $ACC0, $ACC0, $ACC0
+       vpxor   $ACC1, $ACC1, $ACC1
+       vpxor   $ACC2, $ACC2, $ACC2
+       vpxor   $ACC3, $ACC3, $ACC3
+       vpxor   $ACC4, $ACC4, $ACC4
+       vpxor   $ACC5, $ACC5, $ACC5
+       vpxor   $ACC6, $ACC6, $ACC6
+       vpxor   $ACC7, $ACC7, $ACC7
+       vpxor   $ACC8, $ACC8, $ACC8
+
+       vmovdqa 32*3+.LONE(%rip), %ymm14
+       vmovdqa 32*7+.LONE(%rip), %ymm15
+
+       mov     $n_digits, $itr
+       jmp     .Lavx2_mul_by1_x4_loop
+
+.align 32
+.Lavx2_mul_by1_x4_loop:
+       vmovdqa         32*0($a_ptr), $B
+       .byte           0x48,0x8d,0xb6,0x20,0,0,0       # lea   32*1($a_ptr), $a_ptr
+
+       vpsllq          \$5, $B, $OVERFLOW
+       vpmuludq        %ymm14, $B, $T0
+       vpaddq          $OVERFLOW, $ACC0, $ACC0
+       vpaddq          $T0, $ACC3, $ACC3
+       .byte           0x67
+       vpmuludq        $AND_MASK, $B, $T0
+       vpand           $AND_MASK, $ACC0, $Y
+       vpaddq          $T0, $ACC4, $ACC4
+       vpaddq          $T0, $ACC5, $ACC5
+       vpaddq          $T0, $ACC6, $ACC6
+       vpsllq          \$23, $B, $T0
+
+       .byte           0x67,0x67
+       vpmuludq        %ymm15, $B, $OVERFLOW
+       vpsubq          $T0, $ACC6, $ACC6
+
+       vpmuludq        $AND_MASK, $Y, $T0
+       vpaddq          $OVERFLOW, $ACC7, $ACC7
+       vpaddq          $T0, $ACC0, $OVERFLOW
+       vpaddq          $T0, $ACC1, $ACC0
+       .byte           0x67,0x67
+       vpsrlq          $digit_size, $OVERFLOW, $OVERFLOW
+       vpaddq          $T0, $ACC2, $ACC1
+       vpmuludq        32*3(%rax), $Y, $T0
+       vpaddq          $OVERFLOW, $ACC0, $ACC0
+       vpaddq          $T0, $ACC3, $ACC2
+       vmovdqa         $ACC4, $ACC3
+       vpsllq          \$18, $Y, $OVERFLOW
+       vmovdqa         $ACC5, $ACC4
+       vpmuludq        32*7(%rax), $Y, $T0
+       vpaddq          $OVERFLOW, $ACC6, $ACC5
+       vpaddq          $T0, $ACC7, $ACC6
+       vpmuludq        32*8(%rax), $Y, $ACC7
+
+       dec     $itr
+       jnz     .Lavx2_mul_by1_x4_loop
+
+       ret
+.size  avx2_mul_by1_x4,.-avx2_mul_by1_x4
+
+################################################################################
+# void avx2_sqr_x4(void* RESULTx4, void *Ax4, void *Bx4);
+.type  avx2_sqr_x4,\@abi-omnipotent
+.align 32
+avx2_sqr_x4:
+       lea             .LAVX2_POLY(%rip), %rax
+
+       vmovdqa         32*7(%rax), %ymm14
+       vmovdqa         32*8(%rax), %ymm15
+
+       vmovdqa         32*0($a_ptr), $B
+       vmovdqa         32*1($a_ptr), $ACC1
+       vmovdqa         32*2($a_ptr), $ACC2
+       vmovdqa         32*3($a_ptr), $ACC3
+       vmovdqa         32*4($a_ptr), $ACC4
+       vmovdqa         32*5($a_ptr), $ACC5
+       vmovdqa         32*6($a_ptr), $ACC6
+       vmovdqa         32*7($a_ptr), $ACC7
+       vpaddq          $ACC1, $ACC1, $ACC1     # 2*$ACC0..7
+       vmovdqa         32*8($a_ptr), $ACC8
+       vpaddq          $ACC2, $ACC2, $ACC2
+       vmovdqa         $ACC1, 32*0(%rcx)
+       vpaddq          $ACC3, $ACC3, $ACC3
+       vmovdqa         $ACC2, 32*1(%rcx)
+       vpaddq          $ACC4, $ACC4, $ACC4
+       vmovdqa         $ACC3, 32*2(%rcx)
+       vpaddq          $ACC5, $ACC5, $ACC5
+       vmovdqa         $ACC4, 32*3(%rcx)
+       vpaddq          $ACC6, $ACC6, $ACC6
+       vmovdqa         $ACC5, 32*4(%rcx)
+       vpaddq          $ACC7, $ACC7, $ACC7
+       vmovdqa         $ACC6, 32*5(%rcx)
+       vpaddq          $ACC8, $ACC8, $ACC8
+       vmovdqa         $ACC7, 32*6(%rcx)
+       vmovdqa         $ACC8, 32*7(%rcx)
+
+       #itr            1
+       vpmuludq        $B, $B, $ACC0
+       vpmuludq        $B, $ACC1, $ACC1
+        vpand          $AND_MASK, $ACC0, $Y
+       vpmuludq        $B, $ACC2, $ACC2
+       vpmuludq        $B, $ACC3, $ACC3
+       vpmuludq        $B, $ACC4, $ACC4
+       vpmuludq        $B, $ACC5, $ACC5
+       vpmuludq        $B, $ACC6, $ACC6
+        vpmuludq       $AND_MASK, $Y, $T0
+       vpmuludq        $B, $ACC7, $ACC7
+       vpmuludq        $B, $ACC8, $ACC8
+        vmovdqa        32*1($a_ptr), $B
+
+       vpaddq          $T0, $ACC0, $OVERFLOW
+       vpaddq          $T0, $ACC1, $ACC0
+       vpsrlq          $digit_size, $OVERFLOW, $OVERFLOW
+       vpaddq          $T0, $ACC2, $ACC1
+       vpmuludq        32*3(%rax), $Y, $T0
+       vpaddq          $OVERFLOW, $ACC0, $ACC0
+       vpaddq          $T0, $ACC3, $ACC2
+       vmovdqa         $ACC4, $ACC3
+       vpsllq          \$18, $Y, $T0
+       vmovdqa         $ACC5, $ACC4
+       vpmuludq        %ymm14, $Y, $OVERFLOW
+       vpaddq          $T0, $ACC6, $ACC5
+       vpmuludq        %ymm15, $Y, $T0
+       vpaddq          $OVERFLOW, $ACC7, $ACC6
+       vpaddq          $T0, $ACC8, $ACC7
+
+       #itr            2
+       vpmuludq        $B, $B, $OVERFLOW
+        vpand          $AND_MASK, $ACC0, $Y
+       vpmuludq        32*1(%rcx), $B, $T0
+       vpaddq          $OVERFLOW, $ACC1, $ACC1
+       vpmuludq        32*2(%rcx), $B, $OVERFLOW
+       vpaddq          $T0, $ACC2, $ACC2
+       vpmuludq        32*3(%rcx), $B, $T0
+       vpaddq          $OVERFLOW, $ACC3, $ACC3
+       vpmuludq        32*4(%rcx), $B, $OVERFLOW
+       vpaddq          $T0, $ACC4, $ACC4
+       vpmuludq        32*5(%rcx), $B, $T0
+       vpaddq          $OVERFLOW, $ACC5, $ACC5
+       vpmuludq        32*6(%rcx), $B, $OVERFLOW
+       vpaddq          $T0, $ACC6, $ACC6
+
+       vpmuludq        $AND_MASK, $Y, $T0
+        vpaddq         $OVERFLOW, $ACC7, $ACC7
+        vpmuludq       32*7(%rcx), $B, $ACC8
+        vmovdqa        32*2($a_ptr), $B
+       vpaddq          $T0, $ACC0, $OVERFLOW
+       vpaddq          $T0, $ACC1, $ACC0
+       vpsrlq          $digit_size, $OVERFLOW, $OVERFLOW
+       vpaddq          $T0, $ACC2, $ACC1
+       vpmuludq        32*3(%rax), $Y, $T0
+       vpaddq          $OVERFLOW, $ACC0, $ACC0
+       vpaddq          $T0, $ACC3, $ACC2
+       vmovdqa         $ACC4, $ACC3
+       vpsllq          \$18, $Y, $T0
+       vmovdqa         $ACC5, $ACC4
+       vpmuludq        %ymm14, $Y, $OVERFLOW
+       vpaddq          $T0, $ACC6, $ACC5
+       vpmuludq        %ymm15, $Y, $T0
+       vpaddq          $OVERFLOW, $ACC7, $ACC6
+       vpaddq          $T0, $ACC8, $ACC7
+
+       #itr            3
+       vpmuludq        $B, $B, $T0
+        vpand          $AND_MASK, $ACC0, $Y
+       vpmuludq        32*2(%rcx), $B, $OVERFLOW
+       vpaddq          $T0, $ACC2, $ACC2
+       vpmuludq        32*3(%rcx), $B, $T0
+       vpaddq          $OVERFLOW, $ACC3, $ACC3
+       vpmuludq        32*4(%rcx), $B, $OVERFLOW
+       vpaddq          $T0, $ACC4, $ACC4
+       vpmuludq        32*5(%rcx), $B, $T0
+       vpaddq          $OVERFLOW, $ACC5, $ACC5
+       vpmuludq        32*6(%rcx), $B, $OVERFLOW
+       vpaddq          $T0, $ACC6, $ACC6
+
+       vpmuludq        $AND_MASK, $Y, $T0
+        vpaddq         $OVERFLOW, $ACC7, $ACC7
+        vpmuludq       32*7(%rcx), $B, $ACC8
+        vmovdqa        32*3($a_ptr), $B
+       vpaddq          $T0, $ACC0, $OVERFLOW
+       vpaddq          $T0, $ACC1, $ACC0
+       vpsrlq          $digit_size, $OVERFLOW, $OVERFLOW
+       vpaddq          $T0, $ACC2, $ACC1
+       vpmuludq        32*3(%rax), $Y, $T0
+       vpaddq          $OVERFLOW, $ACC0, $ACC0
+       vpaddq          $T0, $ACC3, $ACC2
+       vmovdqa         $ACC4, $ACC3
+       vpsllq          \$18, $Y, $T0
+       vmovdqa         $ACC5, $ACC4
+       vpmuludq        %ymm14, $Y, $OVERFLOW
+       vpaddq          $T0, $ACC6, $ACC5
+       vpmuludq        %ymm15, $Y, $T0
+        vpand          $AND_MASK, $ACC0, $Y
+       vpaddq          $OVERFLOW, $ACC7, $ACC6
+       vpaddq          $T0, $ACC8, $ACC7
+
+       #itr            4
+       vpmuludq        $B, $B, $OVERFLOW
+       vpmuludq        32*3(%rcx), $B, $T0
+       vpaddq          $OVERFLOW, $ACC3, $ACC3
+       vpmuludq        32*4(%rcx), $B, $OVERFLOW
+       vpaddq          $T0, $ACC4, $ACC4
+       vpmuludq        32*5(%rcx), $B, $T0
+       vpaddq          $OVERFLOW, $ACC5, $ACC5
+       vpmuludq        32*6(%rcx), $B, $OVERFLOW
+       vpaddq          $T0, $ACC6, $ACC6
+
+       vpmuludq        $AND_MASK, $Y, $T0
+        vpaddq         $OVERFLOW, $ACC7, $ACC7
+        vpmuludq       32*7(%rcx), $B, $ACC8
+        vmovdqa        32*4($a_ptr), $B
+       vpaddq          $T0, $ACC0, $OVERFLOW
+       vpaddq          $T0, $ACC1, $ACC0
+       vpsrlq          $digit_size, $OVERFLOW, $OVERFLOW
+       vpaddq          $T0, $ACC2, $ACC1
+       vpmuludq        32*3(%rax), $Y, $T0
+       vpaddq          $OVERFLOW, $ACC0, $ACC0
+       vpaddq          $T0, $ACC3, $ACC2
+       vmovdqa         $ACC4, $ACC3
+       vpsllq          \$18, $Y, $T0
+       vmovdqa         $ACC5, $ACC4
+       vpmuludq        %ymm14, $Y, $OVERFLOW
+       vpaddq          $T0, $ACC6, $ACC5
+       vpmuludq        %ymm15, $Y, $T0
+        vpand          $AND_MASK, $ACC0, $Y
+       vpaddq          $OVERFLOW, $ACC7, $ACC6
+       vpaddq          $T0, $ACC8, $ACC7
+
+       #itr            5
+       vpmuludq        $B, $B, $T0
+       vpmuludq        32*4(%rcx), $B, $OVERFLOW
+       vpaddq          $T0, $ACC4, $ACC4
+       vpmuludq        32*5(%rcx), $B, $T0
+       vpaddq          $OVERFLOW, $ACC5, $ACC5
+       vpmuludq        32*6(%rcx), $B, $OVERFLOW
+       vpaddq          $T0, $ACC6, $ACC6
+
+       vpmuludq        $AND_MASK, $Y, $T0
+        vpaddq         $OVERFLOW, $ACC7, $ACC7
+        vpmuludq       32*7(%rcx), $B, $ACC8
+        vmovdqa        32*5($a_ptr), $B
+       vpaddq          $T0, $ACC0, $OVERFLOW
+       vpsrlq          $digit_size, $OVERFLOW, $OVERFLOW
+       vpaddq          $T0, $ACC1, $ACC0
+       vpaddq          $T0, $ACC2, $ACC1
+       vpmuludq        32*3+.LAVX2_POLY(%rip), $Y, $T0
+       vpaddq          $OVERFLOW, $ACC0, $ACC0
+       vpaddq          $T0, $ACC3, $ACC2
+       vmovdqa         $ACC4, $ACC3
+       vpsllq          \$18, $Y, $T0
+       vmovdqa         $ACC5, $ACC4
+       vpmuludq        %ymm14, $Y, $OVERFLOW
+       vpaddq          $T0, $ACC6, $ACC5
+       vpmuludq        %ymm15, $Y, $T0
+        vpand          $AND_MASK, $ACC0, $Y
+       vpaddq          $OVERFLOW, $ACC7, $ACC6
+       vpaddq          $T0, $ACC8, $ACC7
+
+       #itr            6
+       vpmuludq        $B, $B, $OVERFLOW
+       vpmuludq        32*5(%rcx), $B, $T0
+       vpaddq          $OVERFLOW, $ACC5, $ACC5
+       vpmuludq        32*6(%rcx), $B, $OVERFLOW
+       vpaddq          $T0, $ACC6, $ACC6
+
+       vpmuludq        $AND_MASK, $Y, $T0
+        vpaddq         $OVERFLOW, $ACC7, $ACC7
+        vpmuludq       32*7(%rcx), $B, $ACC8
+        vmovdqa        32*6($a_ptr), $B
+       vpaddq          $T0, $ACC0, $OVERFLOW
+       vpaddq          $T0, $ACC1, $ACC0
+       vpsrlq          $digit_size, $OVERFLOW, $OVERFLOW
+       vpaddq          $T0, $ACC2, $ACC1
+       vpmuludq        32*3(%rax), $Y, $T0
+       vpaddq          $OVERFLOW, $ACC0, $ACC0
+       vpaddq          $T0, $ACC3, $ACC2
+       vmovdqa         $ACC4, $ACC3
+       vpsllq          \$18, $Y, $T0
+       vmovdqa         $ACC5, $ACC4
+       vpmuludq        %ymm14, $Y, $OVERFLOW
+       vpaddq          $T0, $ACC6, $ACC5
+       vpmuludq        %ymm15, $Y, $T0
+        vpand          $AND_MASK, $ACC0, $Y
+       vpaddq          $OVERFLOW, $ACC7, $ACC6
+       vpaddq          $T0, $ACC8, $ACC7
+
+       #itr            7
+       vpmuludq        $B, $B, $T0
+       vpmuludq        32*6(%rcx), $B, $OVERFLOW
+       vpaddq          $T0, $ACC6, $ACC6
+
+       vpmuludq        $AND_MASK, $Y, $T0
+        vpaddq         $OVERFLOW, $ACC7, $ACC7
+        vpmuludq       32*7(%rcx), $B, $ACC8
+        vmovdqa        32*7($a_ptr), $B
+       vpaddq          $T0, $ACC0, $OVERFLOW
+       vpsrlq          $digit_size, $OVERFLOW, $OVERFLOW
+       vpaddq          $T0, $ACC1, $ACC0
+       vpaddq          $T0, $ACC2, $ACC1
+       vpmuludq        32*3(%rax), $Y, $T0
+       vpaddq          $OVERFLOW, $ACC0, $ACC0
+       vpaddq          $T0, $ACC3, $ACC2
+       vmovdqa         $ACC4, $ACC3
+       vpsllq          \$18, $Y, $T0
+       vmovdqa         $ACC5, $ACC4
+       vpmuludq        %ymm14, $Y, $OVERFLOW
+       vpaddq          $T0, $ACC6, $ACC5
+       vpmuludq        %ymm15, $Y, $T0
+        vpand          $AND_MASK, $ACC0, $Y
+       vpaddq          $OVERFLOW, $ACC7, $ACC6
+       vpaddq          $T0, $ACC8, $ACC7
+
+       #itr            8
+       vpmuludq        $B, $B, $OVERFLOW
+
+       vpmuludq        $AND_MASK, $Y, $T0
+        vpaddq         $OVERFLOW, $ACC7, $ACC7
+        vpmuludq       32*7(%rcx), $B, $ACC8
+        vmovdqa        32*8($a_ptr), $B
+       vpaddq          $T0, $ACC0, $OVERFLOW
+       vpsrlq          $digit_size, $OVERFLOW, $OVERFLOW
+       vpaddq          $T0, $ACC1, $ACC0
+       vpaddq          $T0, $ACC2, $ACC1
+       vpmuludq        32*3(%rax), $Y, $T0
+       vpaddq          $OVERFLOW, $ACC0, $ACC0
+       vpaddq          $T0, $ACC3, $ACC2
+       vmovdqa         $ACC4, $ACC3
+       vpsllq          \$18, $Y, $T0
+       vmovdqa         $ACC5, $ACC4
+       vpmuludq        %ymm14, $Y, $OVERFLOW
+       vpaddq          $T0, $ACC6, $ACC5
+       vpmuludq        %ymm15, $Y, $T0
+        vpand          $AND_MASK, $ACC0, $Y
+       vpaddq          $OVERFLOW, $ACC7, $ACC6
+       vpaddq          $T0, $ACC8, $ACC7
+
+       #itr            9
+       vpmuludq        $B, $B, $ACC8
+
+       vpmuludq        $AND_MASK, $Y, $T0
+       vpaddq          $T0, $ACC0, $OVERFLOW
+       vpsrlq          $digit_size, $OVERFLOW, $OVERFLOW
+       vpaddq          $T0, $ACC1, $ACC0
+       vpaddq          $T0, $ACC2, $ACC1
+       vpmuludq        32*3(%rax), $Y, $T0
+       vpaddq          $OVERFLOW, $ACC0, $ACC0
+       vpaddq          $T0, $ACC3, $ACC2
+       vmovdqa         $ACC4, $ACC3
+       vpsllq          \$18, $Y, $T0
+       vmovdqa         $ACC5, $ACC4
+       vpmuludq        %ymm14, $Y, $OVERFLOW
+       vpaddq          $T0, $ACC6, $ACC5
+       vpmuludq        %ymm15, $Y, $T0
+       vpaddq          $OVERFLOW, $ACC7, $ACC6
+       vpaddq          $T0, $ACC8, $ACC7
+
+       vpxor           $ACC8, $ACC8, $ACC8
+
+       ret
+.size  avx2_sqr_x4,.-avx2_sqr_x4
+
+################################################################################
+# void avx2_sub_x4(void* RESULTx4, void *Ax4, void *Bx4);
+.type  avx2_sub_x4,\@abi-omnipotent
+.align 32
+avx2_sub_x4:
+       vmovdqa 32*0($a_ptr), $ACC0
+       lea     160($a_ptr), $a_ptr
+       lea     .LAVX2_POLY_x8+128(%rip), %rax
+       lea     128($b_ptr), $b_ptr
+       vmovdqa 32*1-160($a_ptr), $ACC1
+       vmovdqa 32*2-160($a_ptr), $ACC2
+       vmovdqa 32*3-160($a_ptr), $ACC3
+       vmovdqa 32*4-160($a_ptr), $ACC4
+       vmovdqa 32*5-160($a_ptr), $ACC5
+       vmovdqa 32*6-160($a_ptr), $ACC6
+       vmovdqa 32*7-160($a_ptr), $ACC7
+       vmovdqa 32*8-160($a_ptr), $ACC8
+
+       vpaddq  32*0-128(%rax), $ACC0, $ACC0
+       vpaddq  32*1-128(%rax), $ACC1, $ACC1
+       vpaddq  32*2-128(%rax), $ACC2, $ACC2
+       vpaddq  32*3-128(%rax), $ACC3, $ACC3
+       vpaddq  32*4-128(%rax), $ACC4, $ACC4
+       vpaddq  32*5-128(%rax), $ACC5, $ACC5
+       vpaddq  32*6-128(%rax), $ACC6, $ACC6
+       vpaddq  32*7-128(%rax), $ACC7, $ACC7
+       vpaddq  32*8-128(%rax), $ACC8, $ACC8
+
+       vpsubq  32*0-128($b_ptr), $ACC0, $ACC0
+       vpsubq  32*1-128($b_ptr), $ACC1, $ACC1
+       vpsubq  32*2-128($b_ptr), $ACC2, $ACC2
+       vpsubq  32*3-128($b_ptr), $ACC3, $ACC3
+       vpsubq  32*4-128($b_ptr), $ACC4, $ACC4
+       vpsubq  32*5-128($b_ptr), $ACC5, $ACC5
+       vpsubq  32*6-128($b_ptr), $ACC6, $ACC6
+       vpsubq  32*7-128($b_ptr), $ACC7, $ACC7
+       vpsubq  32*8-128($b_ptr), $ACC8, $ACC8
+
+       ret
+.size  avx2_sub_x4,.-avx2_sub_x4
+
+.type  avx2_select_n_store,\@abi-omnipotent
+.align 32
+avx2_select_n_store:
+       vmovdqa `8+32*9*8`(%rsp), $Y
+       vpor    `8+32*9*8+32`(%rsp), $Y, $Y
+
+       vpandn  $ACC0, $Y, $ACC0
+       vpandn  $ACC1, $Y, $ACC1
+       vpandn  $ACC2, $Y, $ACC2
+       vpandn  $ACC3, $Y, $ACC3
+       vpandn  $ACC4, $Y, $ACC4
+       vpandn  $ACC5, $Y, $ACC5
+       vpandn  $ACC6, $Y, $ACC6
+       vmovdqa `8+32*9*8+32`(%rsp), $B
+       vpandn  $ACC7, $Y, $ACC7
+       vpandn  `8+32*9*8`(%rsp), $B, $B
+       vpandn  $ACC8, $Y, $ACC8
+
+       vpand   32*0(%rsi), $B, $T0
+       lea     160(%rsi), %rax
+       vpand   32*1(%rsi), $B, $Y
+       vpxor   $T0, $ACC0, $ACC0
+       vpand   32*2(%rsi), $B, $T0
+       vpxor   $Y, $ACC1, $ACC1
+       vpand   32*3(%rsi), $B, $Y
+       vpxor   $T0, $ACC2, $ACC2
+       vpand   32*4-160(%rax), $B, $T0
+       vpxor   $Y, $ACC3, $ACC3
+       vpand   32*5-160(%rax), $B, $Y
+       vpxor   $T0, $ACC4, $ACC4
+       vpand   32*6-160(%rax), $B, $T0
+       vpxor   $Y, $ACC5, $ACC5
+       vpand   32*7-160(%rax), $B, $Y
+       vpxor   $T0, $ACC6, $ACC6
+       vpand   32*8-160(%rax), $B, $T0
+       vmovdqa `8+32*9*8+32`(%rsp), $B
+       vpxor   $Y, $ACC7, $ACC7
+
+       vpand   32*0(%rdx), $B, $Y
+       lea     160(%rdx), %rax
+       vpxor   $T0, $ACC8, $ACC8
+       vpand   32*1(%rdx), $B, $T0
+       vpxor   $Y, $ACC0, $ACC0
+       vpand   32*2(%rdx), $B, $Y
+       vpxor   $T0, $ACC1, $ACC1
+       vpand   32*3(%rdx), $B, $T0
+       vpxor   $Y, $ACC2, $ACC2
+       vpand   32*4-160(%rax), $B, $Y
+       vpxor   $T0, $ACC3, $ACC3
+       vpand   32*5-160(%rax), $B, $T0
+       vpxor   $Y, $ACC4, $ACC4
+       vpand   32*6-160(%rax), $B, $Y
+       vpxor   $T0, $ACC5, $ACC5
+       vpand   32*7-160(%rax), $B, $T0
+       vpxor   $Y, $ACC6, $ACC6
+       vpand   32*8-160(%rax), $B, $Y
+       vpxor   $T0, $ACC7, $ACC7
+       vpxor   $Y, $ACC8, $ACC8
+       `&STORE`
+
+       ret
+.size  avx2_select_n_store,.-avx2_select_n_store
+___
+$code.=<<___   if (0);                         # inlined
+################################################################################
+# void avx2_mul_by2_x4(void* RESULTx4, void *Ax4);
+.type  avx2_mul_by2_x4,\@abi-omnipotent
+.align 32
+avx2_mul_by2_x4:
+       vmovdqa 32*0($a_ptr), $ACC0
+       lea     160($a_ptr), %rax
+       vmovdqa 32*1($a_ptr), $ACC1
+       vmovdqa 32*2($a_ptr), $ACC2
+       vmovdqa 32*3($a_ptr), $ACC3
+       vmovdqa 32*4-160(%rax), $ACC4
+       vmovdqa 32*5-160(%rax), $ACC5
+       vmovdqa 32*6-160(%rax), $ACC6
+       vmovdqa 32*7-160(%rax), $ACC7
+       vmovdqa 32*8-160(%rax), $ACC8
+
+       vpaddq  $ACC0, $ACC0, $ACC0
+       vpaddq  $ACC1, $ACC1, $ACC1
+       vpaddq  $ACC2, $ACC2, $ACC2
+       vpaddq  $ACC3, $ACC3, $ACC3
+       vpaddq  $ACC4, $ACC4, $ACC4
+       vpaddq  $ACC5, $ACC5, $ACC5
+       vpaddq  $ACC6, $ACC6, $ACC6
+       vpaddq  $ACC7, $ACC7, $ACC7
+       vpaddq  $ACC8, $ACC8, $ACC8
+
+       ret
+.size  avx2_mul_by2_x4,.-avx2_mul_by2_x4
+___
+my ($r_ptr_in,$a_ptr_in,$b_ptr_in)=("%rdi","%rsi","%rdx");
+my ($r_ptr,$a_ptr,$b_ptr)=("%r8","%r9","%r10");
+
+$code.=<<___;
+################################################################################
+# void ecp_nistz256_avx2_point_add_affine_x4(void* RESULTx4, void *Ax4, void *Bx4);
+.globl ecp_nistz256_avx2_point_add_affine_x4
+.type  ecp_nistz256_avx2_point_add_affine_x4,\@function,3
+.align 32
+ecp_nistz256_avx2_point_add_affine_x4:
+       mov     %rsp, %rax
+       push    %rbp
+       vzeroupper
+___
+$code.=<<___   if ($win64);
+       lea     -16*10(%rsp), %rsp
+       vmovaps %xmm6, -8-16*10(%rax)
+       vmovaps %xmm7, -8-16*9(%rax)
+       vmovaps %xmm8, -8-16*8(%rax)
+       vmovaps %xmm9, -8-16*7(%rax)
+       vmovaps %xmm10, -8-16*6(%rax)
+       vmovaps %xmm11, -8-16*5(%rax)
+       vmovaps %xmm12, -8-16*4(%rax)
+       vmovaps %xmm13, -8-16*3(%rax)
+       vmovaps %xmm14, -8-16*2(%rax)
+       vmovaps %xmm15, -8-16*1(%rax)
+___
+$code.=<<___;
+       lea     -8(%rax), %rbp
+
+# Result + 32*0 = Result.X
+# Result + 32*9 = Result.Y
+# Result + 32*18 = Result.Z
+
+# A + 32*0 = A.X
+# A + 32*9 = A.Y
+# A + 32*18 = A.Z
+
+# B + 32*0 = B.X
+# B + 32*9 = B.Y
+
+       sub     \$`32*9*8+32*2+32*8`, %rsp
+       and     \$-64, %rsp
+
+       mov     $r_ptr_in, $r_ptr
+       mov     $a_ptr_in, $a_ptr
+       mov     $b_ptr_in, $b_ptr
+
+       vmovdqa 32*0($a_ptr_in), %ymm0
+       vmovdqa .LAVX2_AND_MASK(%rip), $AND_MASK
+       vpxor   %ymm1, %ymm1, %ymm1
+       lea     256($a_ptr_in), %rax            # size optimization
+       vpor    32*1($a_ptr_in), %ymm0, %ymm0
+       vpor    32*2($a_ptr_in), %ymm0, %ymm0
+       vpor    32*3($a_ptr_in), %ymm0, %ymm0
+       vpor    32*4-256(%rax), %ymm0, %ymm0
+       lea     256(%rax), %rcx                 # size optimization
+       vpor    32*5-256(%rax), %ymm0, %ymm0
+       vpor    32*6-256(%rax), %ymm0, %ymm0
+       vpor    32*7-256(%rax), %ymm0, %ymm0
+       vpor    32*8-256(%rax), %ymm0, %ymm0
+       vpor    32*9-256(%rax), %ymm0, %ymm0
+       vpor    32*10-256(%rax), %ymm0, %ymm0
+       vpor    32*11-256(%rax), %ymm0, %ymm0
+       vpor    32*12-512(%rcx), %ymm0, %ymm0
+       vpor    32*13-512(%rcx), %ymm0, %ymm0
+       vpor    32*14-512(%rcx), %ymm0, %ymm0
+       vpor    32*15-512(%rcx), %ymm0, %ymm0
+       vpor    32*16-512(%rcx), %ymm0, %ymm0
+       vpor    32*17-512(%rcx), %ymm0, %ymm0
+       vpcmpeqq %ymm1, %ymm0, %ymm0
+       vmovdqa %ymm0, `32*9*8`(%rsp)
+
+       vpxor   %ymm1, %ymm1, %ymm1
+       vmovdqa 32*0($b_ptr), %ymm0
+       lea     256($b_ptr), %rax               # size optimization
+       vpor    32*1($b_ptr), %ymm0, %ymm0
+       vpor    32*2($b_ptr), %ymm0, %ymm0
+       vpor    32*3($b_ptr), %ymm0, %ymm0
+       vpor    32*4-256(%rax), %ymm0, %ymm0
+       lea     256(%rax), %rcx                 # size optimization
+       vpor    32*5-256(%rax), %ymm0, %ymm0
+       vpor    32*6-256(%rax), %ymm0, %ymm0
+       vpor    32*7-256(%rax), %ymm0, %ymm0
+       vpor    32*8-256(%rax), %ymm0, %ymm0
+       vpor    32*9-256(%rax), %ymm0, %ymm0
+       vpor    32*10-256(%rax), %ymm0, %ymm0
+       vpor    32*11-256(%rax), %ymm0, %ymm0
+       vpor    32*12-512(%rcx), %ymm0, %ymm0
+       vpor    32*13-512(%rcx), %ymm0, %ymm0
+       vpor    32*14-512(%rcx), %ymm0, %ymm0
+       vpor    32*15-512(%rcx), %ymm0, %ymm0
+       vpor    32*16-512(%rcx), %ymm0, %ymm0
+       vpor    32*17-512(%rcx), %ymm0, %ymm0
+       vpcmpeqq %ymm1, %ymm0, %ymm0
+       vmovdqa %ymm0, `32*9*8+32`(%rsp)
+
+       #       Z1^2 = Z1*Z1
+       lea     `32*9*2`($a_ptr), %rsi
+       lea     `32*9*2`(%rsp), %rdi
+       lea     `32*9*8+32*2`(%rsp), %rcx       # temporary vector
+       call    avx2_sqr_x4
+       call    avx2_normalize_n_store
+
+       #       U2 = X2*Z1^2
+       lea     `32*9*0`($b_ptr), %rsi
+       lea     `32*9*2`(%rsp), %rdx
+       lea     `32*9*0`(%rsp), %rdi
+       call    avx2_mul_x4
+       #call   avx2_normalize
+       `&STORE`
+
+       #       S2 = Z1*Z1^2 = Z1^3
+       lea     `32*9*2`($a_ptr), %rsi
+       lea     `32*9*2`(%rsp), %rdx
+       lea     `32*9*1`(%rsp), %rdi
+       call    avx2_mul_x4
+       call    avx2_normalize_n_store
+
+       #       S2 = S2*Y2 = Y2*Z1^3
+       lea     `32*9*1`($b_ptr), %rsi
+       lea     `32*9*1`(%rsp), %rdx
+       lea     `32*9*1`(%rsp), %rdi
+       call    avx2_mul_x4
+       call    avx2_normalize_n_store
+
+       #       H = U2 - U1 = U2 - X1
+       lea     `32*9*0`(%rsp), %rsi
+       lea     `32*9*0`($a_ptr), %rdx
+       lea     `32*9*3`(%rsp), %rdi
+       call    avx2_sub_x4
+       call    avx2_normalize_n_store
+
+       #       R = S2 - S1 = S2 - Y1
+       lea     `32*9*1`(%rsp), %rsi
+       lea     `32*9*1`($a_ptr), %rdx
+       lea     `32*9*4`(%rsp), %rdi
+       call    avx2_sub_x4
+       call    avx2_normalize_n_store
+
+       #       Z3 = H*Z1*Z2
+       lea     `32*9*3`(%rsp), %rsi
+       lea     `32*9*2`($a_ptr), %rdx
+       lea     `32*9*2`($r_ptr), %rdi
+       call    avx2_mul_x4
+       call    avx2_normalize
+
+       lea     .LONE(%rip), %rsi
+       lea     `32*9*2`($a_ptr), %rdx
+       call    avx2_select_n_store
+
+       #       R^2 = R^2
+       lea     `32*9*4`(%rsp), %rsi
+       lea     `32*9*6`(%rsp), %rdi
+       lea     `32*9*8+32*2`(%rsp), %rcx       # temporary vector
+       call    avx2_sqr_x4
+       call    avx2_normalize_n_store
+
+       #       H^2 = H^2
+       lea     `32*9*3`(%rsp), %rsi
+       lea     `32*9*5`(%rsp), %rdi
+       call    avx2_sqr_x4
+       call    avx2_normalize_n_store
+
+       #       H^3 = H^2*H
+       lea     `32*9*3`(%rsp), %rsi
+       lea     `32*9*5`(%rsp), %rdx
+       lea     `32*9*7`(%rsp), %rdi
+       call    avx2_mul_x4
+       call    avx2_normalize_n_store
+
+       #       U2 = U1*H^2
+       lea     `32*9*0`($a_ptr), %rsi
+       lea     `32*9*5`(%rsp), %rdx
+       lea     `32*9*0`(%rsp), %rdi
+       call    avx2_mul_x4
+       #call   avx2_normalize
+       `&STORE`
+
+       #       Hsqr = U2*2
+       #lea    32*9*0(%rsp), %rsi
+       #lea    32*9*5(%rsp), %rdi
+       #call   avx2_mul_by2_x4
+
+       vpaddq  $ACC0, $ACC0, $ACC0     # inlined avx2_mul_by2_x4
+       lea     `32*9*5`(%rsp), %rdi
+       vpaddq  $ACC1, $ACC1, $ACC1
+       vpaddq  $ACC2, $ACC2, $ACC2
+       vpaddq  $ACC3, $ACC3, $ACC3
+       vpaddq  $ACC4, $ACC4, $ACC4
+       vpaddq  $ACC5, $ACC5, $ACC5
+       vpaddq  $ACC6, $ACC6, $ACC6
+       vpaddq  $ACC7, $ACC7, $ACC7
+       vpaddq  $ACC8, $ACC8, $ACC8
+       call    avx2_normalize_n_store
+
+       #       X3 = R^2 - H^3
+       #lea    32*9*6(%rsp), %rsi
+       #lea    32*9*7(%rsp), %rdx
+       #lea    32*9*5(%rsp), %rcx
+       #lea    32*9*0($r_ptr), %rdi
+       #call   avx2_sub_x4
+       #NORMALIZE
+       #STORE
+
+       #       X3 = X3 - U2*2
+       #lea    32*9*0($r_ptr), %rsi
+       #lea    32*9*0($r_ptr), %rdi
+       #call   avx2_sub_x4
+       #NORMALIZE
+       #STORE
+
+       lea     `32*9*6+128`(%rsp), %rsi
+       lea     .LAVX2_POLY_x2+128(%rip), %rax
+       lea     `32*9*7+128`(%rsp), %rdx
+       lea     `32*9*5+128`(%rsp), %rcx
+       lea     `32*9*0`($r_ptr), %rdi
+
+       vmovdqa 32*0-128(%rsi), $ACC0
+       vmovdqa 32*1-128(%rsi), $ACC1
+       vmovdqa 32*2-128(%rsi), $ACC2
+       vmovdqa 32*3-128(%rsi), $ACC3
+       vmovdqa 32*4-128(%rsi), $ACC4
+       vmovdqa 32*5-128(%rsi), $ACC5
+       vmovdqa 32*6-128(%rsi), $ACC6
+       vmovdqa 32*7-128(%rsi), $ACC7
+       vmovdqa 32*8-128(%rsi), $ACC8
+
+       vpaddq  32*0-128(%rax), $ACC0, $ACC0
+       vpaddq  32*1-128(%rax), $ACC1, $ACC1
+       vpaddq  32*2-128(%rax), $ACC2, $ACC2
+       vpaddq  32*3-128(%rax), $ACC3, $ACC3
+       vpaddq  32*4-128(%rax), $ACC4, $ACC4
+       vpaddq  32*5-128(%rax), $ACC5, $ACC5
+       vpaddq  32*6-128(%rax), $ACC6, $ACC6
+       vpaddq  32*7-128(%rax), $ACC7, $ACC7
+       vpaddq  32*8-128(%rax), $ACC8, $ACC8
+
+       vpsubq  32*0-128(%rdx), $ACC0, $ACC0
+       vpsubq  32*1-128(%rdx), $ACC1, $ACC1
+       vpsubq  32*2-128(%rdx), $ACC2, $ACC2
+       vpsubq  32*3-128(%rdx), $ACC3, $ACC3
+       vpsubq  32*4-128(%rdx), $ACC4, $ACC4
+       vpsubq  32*5-128(%rdx), $ACC5, $ACC5
+       vpsubq  32*6-128(%rdx), $ACC6, $ACC6
+       vpsubq  32*7-128(%rdx), $ACC7, $ACC7
+       vpsubq  32*8-128(%rdx), $ACC8, $ACC8
+
+       vpsubq  32*0-128(%rcx), $ACC0, $ACC0
+       vpsubq  32*1-128(%rcx), $ACC1, $ACC1
+       vpsubq  32*2-128(%rcx), $ACC2, $ACC2
+       vpsubq  32*3-128(%rcx), $ACC3, $ACC3
+       vpsubq  32*4-128(%rcx), $ACC4, $ACC4
+       vpsubq  32*5-128(%rcx), $ACC5, $ACC5
+       vpsubq  32*6-128(%rcx), $ACC6, $ACC6
+       vpsubq  32*7-128(%rcx), $ACC7, $ACC7
+       vpsubq  32*8-128(%rcx), $ACC8, $ACC8
+       call    avx2_normalize
+
+       lea     32*0($b_ptr), %rsi
+       lea     32*0($a_ptr), %rdx
+       call    avx2_select_n_store
+
+       #       H = U2 - X3
+       lea     `32*9*0`(%rsp), %rsi
+       lea     `32*9*0`($r_ptr), %rdx
+       lea     `32*9*3`(%rsp), %rdi
+       call    avx2_sub_x4
+       call    avx2_normalize_n_store
+
+       #
+       lea     `32*9*3`(%rsp), %rsi
+       lea     `32*9*4`(%rsp), %rdx
+       lea     `32*9*3`(%rsp), %rdi
+       call    avx2_mul_x4
+       call    avx2_normalize_n_store
+
+       #
+       lea     `32*9*7`(%rsp), %rsi
+       lea     `32*9*1`($a_ptr), %rdx
+       lea     `32*9*1`(%rsp), %rdi
+       call    avx2_mul_x4
+       call    avx2_normalize_n_store
+
+       #
+       lea     `32*9*3`(%rsp), %rsi
+       lea     `32*9*1`(%rsp), %rdx
+       lea     `32*9*1`($r_ptr), %rdi
+       call    avx2_sub_x4
+       call    avx2_normalize
+
+       lea     32*9($b_ptr), %rsi
+       lea     32*9($a_ptr), %rdx
+       call    avx2_select_n_store
+
+       #lea    32*9*0($r_ptr), %rsi
+       #lea    32*9*0($r_ptr), %rdi
+       #call   avx2_mul_by1_x4
+       #NORMALIZE
+       #STORE
+
+       lea     `32*9*1`($r_ptr), %rsi
+       lea     `32*9*1`($r_ptr), %rdi
+       call    avx2_mul_by1_x4
+       call    avx2_normalize_n_store
+
+       vzeroupper
+___
+$code.=<<___   if ($win64);
+       movaps  %xmm6, -16*10(%rbp)
+       movaps  %xmm7, -16*9(%rbp)
+       movaps  %xmm8, -16*8(%rbp)
+       movaps  %xmm9, -16*7(%rbp)
+       movaps  %xmm10, -16*6(%rbp)
+       movaps  %xmm11, -16*5(%rbp)
+       movaps  %xmm12, -16*4(%rbp)
+       movaps  %xmm13, -16*3(%rbp)
+       movaps  %xmm14, -16*2(%rbp)
+       movaps  %xmm15, -16*1(%rbp)
+___
+$code.=<<___;
+       mov     %rbp, %rsp
+       pop     %rbp
+       ret
+.size  ecp_nistz256_avx2_point_add_affine_x4,.-ecp_nistz256_avx2_point_add_affine_x4
+
+################################################################################
+# void ecp_nistz256_avx2_point_add_affines_x4(void* RESULTx4, void *Ax4, void *Bx4);
+.globl ecp_nistz256_avx2_point_add_affines_x4
+.type  ecp_nistz256_avx2_point_add_affines_x4,\@function,3
+.align 32
+ecp_nistz256_avx2_point_add_affines_x4:
+       mov     %rsp, %rax
+       push    %rbp
+       vzeroupper
+___
+$code.=<<___   if ($win64);
+       lea     -16*10(%rsp), %rsp
+       vmovaps %xmm6, -8-16*10(%rax)
+       vmovaps %xmm7, -8-16*9(%rax)
+       vmovaps %xmm8, -8-16*8(%rax)
+       vmovaps %xmm9, -8-16*7(%rax)
+       vmovaps %xmm10, -8-16*6(%rax)
+       vmovaps %xmm11, -8-16*5(%rax)
+       vmovaps %xmm12, -8-16*4(%rax)
+       vmovaps %xmm13, -8-16*3(%rax)
+       vmovaps %xmm14, -8-16*2(%rax)
+       vmovaps %xmm15, -8-16*1(%rax)
+___
+$code.=<<___;
+       lea     -8(%rax), %rbp
+
+# Result + 32*0 = Result.X
+# Result + 32*9 = Result.Y
+# Result + 32*18 = Result.Z
+
+# A + 32*0 = A.X
+# A + 32*9 = A.Y
+
+# B + 32*0 = B.X
+# B + 32*9 = B.Y
+
+       sub     \$`32*9*8+32*2+32*8`, %rsp
+       and     \$-64, %rsp
+
+       mov     $r_ptr_in, $r_ptr
+       mov     $a_ptr_in, $a_ptr
+       mov     $b_ptr_in, $b_ptr
+
+       vmovdqa 32*0($a_ptr_in), %ymm0
+       vmovdqa .LAVX2_AND_MASK(%rip), $AND_MASK
+       vpxor   %ymm1, %ymm1, %ymm1
+       lea     256($a_ptr_in), %rax            # size optimization
+       vpor    32*1($a_ptr_in), %ymm0, %ymm0
+       vpor    32*2($a_ptr_in), %ymm0, %ymm0
+       vpor    32*3($a_ptr_in), %ymm0, %ymm0
+       vpor    32*4-256(%rax), %ymm0, %ymm0
+       lea     256(%rax), %rcx                 # size optimization
+       vpor    32*5-256(%rax), %ymm0, %ymm0
+       vpor    32*6-256(%rax), %ymm0, %ymm0
+       vpor    32*7-256(%rax), %ymm0, %ymm0
+       vpor    32*8-256(%rax), %ymm0, %ymm0
+       vpor    32*9-256(%rax), %ymm0, %ymm0
+       vpor    32*10-256(%rax), %ymm0, %ymm0
+       vpor    32*11-256(%rax), %ymm0, %ymm0
+       vpor    32*12-512(%rcx), %ymm0, %ymm0
+       vpor    32*13-512(%rcx), %ymm0, %ymm0
+       vpor    32*14-512(%rcx), %ymm0, %ymm0
+       vpor    32*15-512(%rcx), %ymm0, %ymm0
+       vpor    32*16-512(%rcx), %ymm0, %ymm0
+       vpor    32*17-512(%rcx), %ymm0, %ymm0
+       vpcmpeqq %ymm1, %ymm0, %ymm0
+       vmovdqa %ymm0, `32*9*8`(%rsp)
+
+       vpxor   %ymm1, %ymm1, %ymm1
+       vmovdqa 32*0($b_ptr), %ymm0
+       lea     256($b_ptr), %rax               # size optimization
+       vpor    32*1($b_ptr), %ymm0, %ymm0
+       vpor    32*2($b_ptr), %ymm0, %ymm0
+       vpor    32*3($b_ptr), %ymm0, %ymm0
+       vpor    32*4-256(%rax), %ymm0, %ymm0
+       lea     256(%rax), %rcx                 # size optimization
+       vpor    32*5-256(%rax), %ymm0, %ymm0
+       vpor    32*6-256(%rax), %ymm0, %ymm0
+       vpor    32*7-256(%rax), %ymm0, %ymm0
+       vpor    32*8-256(%rax), %ymm0, %ymm0
+       vpor    32*9-256(%rax), %ymm0, %ymm0
+       vpor    32*10-256(%rax), %ymm0, %ymm0
+       vpor    32*11-256(%rax), %ymm0, %ymm0
+       vpor    32*12-512(%rcx), %ymm0, %ymm0
+       vpor    32*13-512(%rcx), %ymm0, %ymm0
+       vpor    32*14-512(%rcx), %ymm0, %ymm0
+       vpor    32*15-512(%rcx), %ymm0, %ymm0
+       vpor    32*16-512(%rcx), %ymm0, %ymm0
+       vpor    32*17-512(%rcx), %ymm0, %ymm0
+       vpcmpeqq %ymm1, %ymm0, %ymm0
+       vmovdqa %ymm0, `32*9*8+32`(%rsp)
+
+       #       H = U2 - U1 = X2 - X1
+       lea     `32*9*0`($b_ptr), %rsi
+       lea     `32*9*0`($a_ptr), %rdx
+       lea     `32*9*3`(%rsp), %rdi
+       call    avx2_sub_x4
+       call    avx2_normalize_n_store
+
+       #       R = S2 - S1 = Y2 - Y1
+       lea     `32*9*1`($b_ptr), %rsi
+       lea     `32*9*1`($a_ptr), %rdx
+       lea     `32*9*4`(%rsp), %rdi
+       call    avx2_sub_x4
+       call    avx2_normalize_n_store
+
+       #       Z3 = H*Z1*Z2 = H
+       lea     `32*9*3`(%rsp), %rsi
+       lea     `32*9*2`($r_ptr), %rdi
+       call    avx2_mul_by1_x4
+       call    avx2_normalize
+
+       vmovdqa `32*9*8`(%rsp), $B
+       vpor    `32*9*8+32`(%rsp), $B, $B
+
+       vpandn  $ACC0, $B, $ACC0
+       lea     .LONE+128(%rip), %rax
+       vpandn  $ACC1, $B, $ACC1
+       vpandn  $ACC2, $B, $ACC2
+       vpandn  $ACC3, $B, $ACC3
+       vpandn  $ACC4, $B, $ACC4
+       vpandn  $ACC5, $B, $ACC5
+       vpandn  $ACC6, $B, $ACC6
+       vpandn  $ACC7, $B, $ACC7
+
+       vpand   32*0-128(%rax), $B, $T0
+        vpandn $ACC8, $B, $ACC8
+       vpand   32*1-128(%rax), $B, $Y
+       vpxor   $T0, $ACC0, $ACC0
+       vpand   32*2-128(%rax), $B, $T0
+       vpxor   $Y, $ACC1, $ACC1
+       vpand   32*3-128(%rax), $B, $Y
+       vpxor   $T0, $ACC2, $ACC2
+       vpand   32*4-128(%rax), $B, $T0
+       vpxor   $Y, $ACC3, $ACC3
+       vpand   32*5-128(%rax), $B, $Y
+       vpxor   $T0, $ACC4, $ACC4
+       vpand   32*6-128(%rax), $B, $T0
+       vpxor   $Y, $ACC5, $ACC5
+       vpand   32*7-128(%rax), $B, $Y
+       vpxor   $T0, $ACC6, $ACC6
+       vpand   32*8-128(%rax), $B, $T0
+       vpxor   $Y, $ACC7, $ACC7
+       vpxor   $T0, $ACC8, $ACC8
+       `&STORE`
+
+       #       R^2 = R^2
+       lea     `32*9*4`(%rsp), %rsi
+       lea     `32*9*6`(%rsp), %rdi
+       lea     `32*9*8+32*2`(%rsp), %rcx       # temporary vector
+       call    avx2_sqr_x4
+       call    avx2_normalize_n_store
+
+       #       H^2 = H^2
+       lea     `32*9*3`(%rsp), %rsi
+       lea     `32*9*5`(%rsp), %rdi
+       call    avx2_sqr_x4
+       call    avx2_normalize_n_store
+
+       #       H^3 = H^2*H
+       lea     `32*9*3`(%rsp), %rsi
+       lea     `32*9*5`(%rsp), %rdx
+       lea     `32*9*7`(%rsp), %rdi
+       call    avx2_mul_x4
+       call    avx2_normalize_n_store
+
+       #       U2 = U1*H^2
+       lea     `32*9*0`($a_ptr), %rsi
+       lea     `32*9*5`(%rsp), %rdx
+       lea     `32*9*0`(%rsp), %rdi
+       call    avx2_mul_x4
+       #call   avx2_normalize
+       `&STORE`
+
+       #       Hsqr = U2*2
+       #lea    32*9*0(%rsp), %rsi
+       #lea    32*9*5(%rsp), %rdi
+       #call   avx2_mul_by2_x4
+
+       vpaddq  $ACC0, $ACC0, $ACC0     # inlined avx2_mul_by2_x4
+       lea     `32*9*5`(%rsp), %rdi
+       vpaddq  $ACC1, $ACC1, $ACC1
+       vpaddq  $ACC2, $ACC2, $ACC2
+       vpaddq  $ACC3, $ACC3, $ACC3
+       vpaddq  $ACC4, $ACC4, $ACC4
+       vpaddq  $ACC5, $ACC5, $ACC5
+       vpaddq  $ACC6, $ACC6, $ACC6
+       vpaddq  $ACC7, $ACC7, $ACC7
+       vpaddq  $ACC8, $ACC8, $ACC8
+       call    avx2_normalize_n_store
+
+       #       X3 = R^2 - H^3
+       #lea    32*9*6(%rsp), %rsi
+       #lea    32*9*7(%rsp), %rdx
+       #lea    32*9*5(%rsp), %rcx
+       #lea    32*9*0($r_ptr), %rdi
+       #call   avx2_sub_x4
+       #NORMALIZE
+       #STORE
+
+       #       X3 = X3 - U2*2
+       #lea    32*9*0($r_ptr), %rsi
+       #lea    32*9*0($r_ptr), %rdi
+       #call   avx2_sub_x4
+       #NORMALIZE
+       #STORE
+
+       lea     `32*9*6+128`(%rsp), %rsi
+       lea     .LAVX2_POLY_x2+128(%rip), %rax
+       lea     `32*9*7+128`(%rsp), %rdx
+       lea     `32*9*5+128`(%rsp), %rcx
+       lea     `32*9*0`($r_ptr), %rdi
+
+       vmovdqa 32*0-128(%rsi), $ACC0
+       vmovdqa 32*1-128(%rsi), $ACC1
+       vmovdqa 32*2-128(%rsi), $ACC2
+       vmovdqa 32*3-128(%rsi), $ACC3
+       vmovdqa 32*4-128(%rsi), $ACC4
+       vmovdqa 32*5-128(%rsi), $ACC5
+       vmovdqa 32*6-128(%rsi), $ACC6
+       vmovdqa 32*7-128(%rsi), $ACC7
+       vmovdqa 32*8-128(%rsi), $ACC8
+
+       vpaddq  32*0-128(%rax), $ACC0, $ACC0
+       vpaddq  32*1-128(%rax), $ACC1, $ACC1
+       vpaddq  32*2-128(%rax), $ACC2, $ACC2
+       vpaddq  32*3-128(%rax), $ACC3, $ACC3
+       vpaddq  32*4-128(%rax), $ACC4, $ACC4
+       vpaddq  32*5-128(%rax), $ACC5, $ACC5
+       vpaddq  32*6-128(%rax), $ACC6, $ACC6
+       vpaddq  32*7-128(%rax), $ACC7, $ACC7
+       vpaddq  32*8-128(%rax), $ACC8, $ACC8
+
+       vpsubq  32*0-128(%rdx), $ACC0, $ACC0
+       vpsubq  32*1-128(%rdx), $ACC1, $ACC1
+       vpsubq  32*2-128(%rdx), $ACC2, $ACC2
+       vpsubq  32*3-128(%rdx), $ACC3, $ACC3
+       vpsubq  32*4-128(%rdx), $ACC4, $ACC4
+       vpsubq  32*5-128(%rdx), $ACC5, $ACC5
+       vpsubq  32*6-128(%rdx), $ACC6, $ACC6
+       vpsubq  32*7-128(%rdx), $ACC7, $ACC7
+       vpsubq  32*8-128(%rdx), $ACC8, $ACC8
+
+       vpsubq  32*0-128(%rcx), $ACC0, $ACC0
+       vpsubq  32*1-128(%rcx), $ACC1, $ACC1
+       vpsubq  32*2-128(%rcx), $ACC2, $ACC2
+       vpsubq  32*3-128(%rcx), $ACC3, $ACC3
+       vpsubq  32*4-128(%rcx), $ACC4, $ACC4
+       vpsubq  32*5-128(%rcx), $ACC5, $ACC5
+       vpsubq  32*6-128(%rcx), $ACC6, $ACC6
+       vpsubq  32*7-128(%rcx), $ACC7, $ACC7
+       vpsubq  32*8-128(%rcx), $ACC8, $ACC8
+       call    avx2_normalize
+
+       lea     32*0($b_ptr), %rsi
+       lea     32*0($a_ptr), %rdx
+       call    avx2_select_n_store
+
+       #       H = U2 - X3
+       lea     `32*9*0`(%rsp), %rsi
+       lea     `32*9*0`($r_ptr), %rdx
+       lea     `32*9*3`(%rsp), %rdi
+       call    avx2_sub_x4
+       call    avx2_normalize_n_store
+
+       #       H = H*R
+       lea     `32*9*3`(%rsp), %rsi
+       lea     `32*9*4`(%rsp), %rdx
+       lea     `32*9*3`(%rsp), %rdi
+       call    avx2_mul_x4
+       call    avx2_normalize_n_store
+
+       #       S2 = S1 * H^3
+       lea     `32*9*7`(%rsp), %rsi
+       lea     `32*9*1`($a_ptr), %rdx
+       lea     `32*9*1`(%rsp), %rdi
+       call    avx2_mul_x4
+       call    avx2_normalize_n_store
+
+       #
+       lea     `32*9*3`(%rsp), %rsi
+       lea     `32*9*1`(%rsp), %rdx
+       lea     `32*9*1`($r_ptr), %rdi
+       call    avx2_sub_x4
+       call    avx2_normalize
+
+       lea     32*9($b_ptr), %rsi
+       lea     32*9($a_ptr), %rdx
+       call    avx2_select_n_store
+
+       #lea    32*9*0($r_ptr), %rsi
+       #lea    32*9*0($r_ptr), %rdi
+       #call   avx2_mul_by1_x4
+       #NORMALIZE
+       #STORE
+
+       lea     `32*9*1`($r_ptr), %rsi
+       lea     `32*9*1`($r_ptr), %rdi
+       call    avx2_mul_by1_x4
+       call    avx2_normalize_n_store
+
+       vzeroupper
+___
+$code.=<<___   if ($win64);
+       movaps  %xmm6, -16*10(%rbp)
+       movaps  %xmm7, -16*9(%rbp)
+       movaps  %xmm8, -16*8(%rbp)
+       movaps  %xmm9, -16*7(%rbp)
+       movaps  %xmm10, -16*6(%rbp)
+       movaps  %xmm11, -16*5(%rbp)
+       movaps  %xmm12, -16*4(%rbp)
+       movaps  %xmm13, -16*3(%rbp)
+       movaps  %xmm14, -16*2(%rbp)
+       movaps  %xmm15, -16*1(%rbp)
+___
+$code.=<<___;
+       mov     %rbp, %rsp
+       pop     %rbp
+       ret
+.size  ecp_nistz256_avx2_point_add_affines_x4,.-ecp_nistz256_avx2_point_add_affines_x4
+
+################################################################################
+# void ecp_nistz256_avx2_to_mont(void* RESULTx4, void *Ax4);
+.globl ecp_nistz256_avx2_to_mont
+.type  ecp_nistz256_avx2_to_mont,\@function,2
+.align 32
+ecp_nistz256_avx2_to_mont:
+       vzeroupper
+___
+$code.=<<___   if ($win64);
+       lea     -8-16*10(%rsp), %rsp
+       vmovaps %xmm6, -8-16*10(%rax)
+       vmovaps %xmm7, -8-16*9(%rax)
+       vmovaps %xmm8, -8-16*8(%rax)
+       vmovaps %xmm9, -8-16*7(%rax)
+       vmovaps %xmm10, -8-16*6(%rax)
+       vmovaps %xmm11, -8-16*5(%rax)
+       vmovaps %xmm12, -8-16*4(%rax)
+       vmovaps %xmm13, -8-16*3(%rax)
+       vmovaps %xmm14, -8-16*2(%rax)
+       vmovaps %xmm15, -8-16*1(%rax)
+___
+$code.=<<___;
+       vmovdqa .LAVX2_AND_MASK(%rip), $AND_MASK
+       lea     .LTO_MONT_AVX2(%rip), %rdx
+       call    avx2_mul_x4
+       call    avx2_normalize_n_store
+
+       vzeroupper
+___
+$code.=<<___   if ($win64);
+       movaps  16*0(%rsp), %xmm6
+       movaps  16*1(%rsp), %xmm7
+       movaps  16*2(%rsp), %xmm8
+       movaps  16*3(%rsp), %xmm9
+       movaps  16*4(%rsp), %xmm10
+       movaps  16*5(%rsp), %xmm11
+       movaps  16*6(%rsp), %xmm12
+       movaps  16*7(%rsp), %xmm13
+       movaps  16*8(%rsp), %xmm14
+       movaps  16*9(%rsp), %xmm15
+       lea     8+16*10(%rsp), %rsp
+___
+$code.=<<___;
+       ret
+.size  ecp_nistz256_avx2_to_mont,.-ecp_nistz256_avx2_to_mont
+
+################################################################################
+# void ecp_nistz256_avx2_from_mont(void* RESULTx4, void *Ax4);
+.globl ecp_nistz256_avx2_from_mont
+.type  ecp_nistz256_avx2_from_mont,\@function,2
+.align 32
+ecp_nistz256_avx2_from_mont:
+       vzeroupper
+___
+$code.=<<___   if ($win64);
+       lea     -8-16*10(%rsp), %rsp
+       vmovaps %xmm6, -8-16*10(%rax)
+       vmovaps %xmm7, -8-16*9(%rax)
+       vmovaps %xmm8, -8-16*8(%rax)
+       vmovaps %xmm9, -8-16*7(%rax)
+       vmovaps %xmm10, -8-16*6(%rax)
+       vmovaps %xmm11, -8-16*5(%rax)
+       vmovaps %xmm12, -8-16*4(%rax)
+       vmovaps %xmm13, -8-16*3(%rax)
+       vmovaps %xmm14, -8-16*2(%rax)
+       vmovaps %xmm15, -8-16*1(%rax)
+___
+$code.=<<___;
+       vmovdqa .LAVX2_AND_MASK(%rip), $AND_MASK
+       lea     .LFROM_MONT_AVX2(%rip), %rdx
+       call    avx2_mul_x4
+       call    avx2_normalize_n_store
+
+       vzeroupper
+___
+$code.=<<___   if ($win64);
+       movaps  16*0(%rsp), %xmm6
+       movaps  16*1(%rsp), %xmm7
+       movaps  16*2(%rsp), %xmm8
+       movaps  16*3(%rsp), %xmm9
+       movaps  16*4(%rsp), %xmm10
+       movaps  16*5(%rsp), %xmm11
+       movaps  16*6(%rsp), %xmm12
+       movaps  16*7(%rsp), %xmm13
+       movaps  16*8(%rsp), %xmm14
+       movaps  16*9(%rsp), %xmm15
+       lea     8+16*10(%rsp), %rsp
+___
+$code.=<<___;
+       ret
+.size  ecp_nistz256_avx2_from_mont,.-ecp_nistz256_avx2_from_mont
+
+################################################################################
+# void ecp_nistz256_avx2_set1(void* RESULTx4);
+.globl ecp_nistz256_avx2_set1
+.type  ecp_nistz256_avx2_set1,\@function,1
+.align 32
+ecp_nistz256_avx2_set1:
+       lea     .LONE+128(%rip), %rax
+       lea     128(%rdi), %rdi
+       vzeroupper
+       vmovdqa 32*0-128(%rax), %ymm0
+       vmovdqa 32*1-128(%rax), %ymm1
+       vmovdqa 32*2-128(%rax), %ymm2
+       vmovdqa 32*3-128(%rax), %ymm3
+       vmovdqa 32*4-128(%rax), %ymm4
+       vmovdqa 32*5-128(%rax), %ymm5
+       vmovdqa %ymm0, 32*0-128(%rdi)
+       vmovdqa 32*6-128(%rax), %ymm0
+       vmovdqa %ymm1, 32*1-128(%rdi)
+       vmovdqa 32*7-128(%rax), %ymm1
+       vmovdqa %ymm2, 32*2-128(%rdi)
+       vmovdqa 32*8-128(%rax), %ymm2
+       vmovdqa %ymm3, 32*3-128(%rdi)
+       vmovdqa %ymm4, 32*4-128(%rdi)
+       vmovdqa %ymm5, 32*5-128(%rdi)
+       vmovdqa %ymm0, 32*6-128(%rdi)
+       vmovdqa %ymm1, 32*7-128(%rdi)
+       vmovdqa %ymm2, 32*8-128(%rdi)
+
+       vzeroupper
+       ret
+.size  ecp_nistz256_avx2_set1,.-ecp_nistz256_avx2_set1
+___
+}
+{
+################################################################################
+# void ecp_nistz256_avx2_multi_select_w7(void* RESULT, void *in,
+#                          int index0, int index1, int index2, int index3);
+################################################################################
+
+my ($val,$in_t,$index0,$index1,$index2,$index3)=("%rdi","%rsi","%edx","%ecx","%r8d","%r9d");
+my ($INDEX0,$INDEX1,$INDEX2,$INDEX3)=map("%ymm$_",(0..3));
+my ($R0a,$R0b,$R1a,$R1b,$R2a,$R2b,$R3a,$R3b)=map("%ymm$_",(4..11));
+my ($M0,$T0,$T1,$TMP0)=map("%ymm$_",(12..15));
+
+$code.=<<___;
+.globl ecp_nistz256_avx2_multi_select_w7
+.type  ecp_nistz256_avx2_multi_select_w7,\@function,6
+.align 32
+ecp_nistz256_avx2_multi_select_w7:
+       vzeroupper
+___
+$code.=<<___   if ($win64);
+       lea     -8-16*10(%rsp), %rsp
+       vmovaps %xmm6, -8-16*10(%rax)
+       vmovaps %xmm7, -8-16*9(%rax)
+       vmovaps %xmm8, -8-16*8(%rax)
+       vmovaps %xmm9, -8-16*7(%rax)
+       vmovaps %xmm10, -8-16*6(%rax)
+       vmovaps %xmm11, -8-16*5(%rax)
+       vmovaps %xmm12, -8-16*4(%rax)
+       vmovaps %xmm13, -8-16*3(%rax)
+       vmovaps %xmm14, -8-16*2(%rax)
+       vmovaps %xmm15, -8-16*1(%rax)
+___
+$code.=<<___;
+       lea     .LIntOne(%rip), %rax
+
+       vmovd   $index0, %xmm0
+       vmovd   $index1, %xmm1
+       vmovd   $index2, %xmm2
+       vmovd   $index3, %xmm3
+
+       vpxor   $R0a, $R0a, $R0a
+       vpxor   $R0b, $R0b, $R0b
+       vpxor   $R1a, $R1a, $R1a
+       vpxor   $R1b, $R1b, $R1b
+       vpxor   $R2a, $R2a, $R2a
+       vpxor   $R2b, $R2b, $R2b
+       vpxor   $R3a, $R3a, $R3a
+       vpxor   $R3b, $R3b, $R3b
+       vmovdqa (%rax), $M0
+
+       vpermd  $INDEX0, $R0a, $INDEX0
+       vpermd  $INDEX1, $R0a, $INDEX1
+       vpermd  $INDEX2, $R0a, $INDEX2
+       vpermd  $INDEX3, $R0a, $INDEX3
+
+       mov     \$64, %ecx
+       lea     112($val), $val         # size optimization
+       jmp     .Lmulti_select_loop_avx2
+
+# INDEX=0, corresponds to the point at infty (0,0)
+.align 32
+.Lmulti_select_loop_avx2:
+       vpcmpeqd        $INDEX0, $M0, $TMP0
+
+       vmovdqa         `32*0+32*64*2*0`($in_t), $T0
+       vmovdqa         `32*1+32*64*2*0`($in_t), $T1
+       vpand           $TMP0, $T0, $T0
+       vpand           $TMP0, $T1, $T1
+       vpxor           $T0, $R0a, $R0a
+       vpxor           $T1, $R0b, $R0b
+
+       vpcmpeqd        $INDEX1, $M0, $TMP0
+
+       vmovdqa         `32*0+32*64*2*1`($in_t), $T0
+       vmovdqa         `32*1+32*64*2*1`($in_t), $T1
+       vpand           $TMP0, $T0, $T0
+       vpand           $TMP0, $T1, $T1
+       vpxor           $T0, $R1a, $R1a
+       vpxor           $T1, $R1b, $R1b
+
+       vpcmpeqd        $INDEX2, $M0, $TMP0
+
+       vmovdqa         `32*0+32*64*2*2`($in_t), $T0
+       vmovdqa         `32*1+32*64*2*2`($in_t), $T1
+       vpand           $TMP0, $T0, $T0
+       vpand           $TMP0, $T1, $T1
+       vpxor           $T0, $R2a, $R2a
+       vpxor           $T1, $R2b, $R2b
+
+       vpcmpeqd        $INDEX3, $M0, $TMP0
+
+       vmovdqa         `32*0+32*64*2*3`($in_t), $T0
+       vmovdqa         `32*1+32*64*2*3`($in_t), $T1
+       vpand           $TMP0, $T0, $T0
+       vpand           $TMP0, $T1, $T1
+       vpxor           $T0, $R3a, $R3a
+       vpxor           $T1, $R3b, $R3b
+
+       vpaddd          (%rax), $M0, $M0        # increment
+       lea             32*2($in_t), $in_t
+
+        dec    %ecx
+       jnz     .Lmulti_select_loop_avx2
+
+       vmovdqu $R0a, 32*0-112($val)
+       vmovdqu $R0b, 32*1-112($val)
+       vmovdqu $R1a, 32*2-112($val)
+       vmovdqu $R1b, 32*3-112($val)
+       vmovdqu $R2a, 32*4-112($val)
+       vmovdqu $R2b, 32*5-112($val)
+       vmovdqu $R3a, 32*6-112($val)
+       vmovdqu $R3b, 32*7-112($val)
+
+       vzeroupper
+___
+$code.=<<___   if ($win64);
+       movaps  16*0(%rsp), %xmm6
+       movaps  16*1(%rsp), %xmm7
+       movaps  16*2(%rsp), %xmm8
+       movaps  16*3(%rsp), %xmm9
+       movaps  16*4(%rsp), %xmm10
+       movaps  16*5(%rsp), %xmm11
+       movaps  16*6(%rsp), %xmm12
+       movaps  16*7(%rsp), %xmm13
+       movaps  16*8(%rsp), %xmm14
+       movaps  16*9(%rsp), %xmm15
+       lea     8+16*10(%rsp), %rsp
+___
+$code.=<<___;
+       ret
+.size  ecp_nistz256_avx2_multi_select_w7,.-ecp_nistz256_avx2_multi_select_w7
+
+.extern        OPENSSL_ia32cap_P
+.globl ecp_nistz_avx2_eligible
+.type  ecp_nistz_avx2_eligible,\@abi-omnipotent
+.align 32
+ecp_nistz_avx2_eligible:
+       mov     OPENSSL_ia32cap_P+8(%rip),%eax
+       shr     \$5,%eax
+       and     \$1,%eax
+       ret
+.size  ecp_nistz_avx2_eligible,.-ecp_nistz_avx2_eligible
+___
+}
+}} else {{     # assembler is too old
+$code.=<<___;
+.text
+
+.globl ecp_nistz256_avx2_transpose_convert
+.globl ecp_nistz256_avx2_convert_transpose_back
+.globl ecp_nistz256_avx2_point_add_affine_x4
+.globl ecp_nistz256_avx2_point_add_affines_x4
+.globl ecp_nistz256_avx2_to_mont
+.globl ecp_nistz256_avx2_from_mont
+.globl ecp_nistz256_avx2_set1
+.globl ecp_nistz256_avx2_multi_select_w7
+.type  ecp_nistz256_avx2_multi_select_w7,\@abi-omnipotent
+ecp_nistz256_avx2_transpose_convert:
+ecp_nistz256_avx2_convert_transpose_back:
+ecp_nistz256_avx2_point_add_affine_x4:
+ecp_nistz256_avx2_point_add_affines_x4:
+ecp_nistz256_avx2_to_mont:
+ecp_nistz256_avx2_from_mont:
+ecp_nistz256_avx2_set1:
+ecp_nistz256_avx2_multi_select_w7:
+       .byte   0x0f,0x0b       # ud2
+       ret
+.size  ecp_nistz256_avx2_multi_select_w7,.-ecp_nistz256_avx2_multi_select_w7
+
+.globl ecp_nistz_avx2_eligible
+.type  ecp_nistz_avx2_eligible,\@abi-omnipotent
+ecp_nistz_avx2_eligible:
+       xor     %eax,%eax
+       ret
+.size  ecp_nistz_avx2_eligible,.-ecp_nistz_avx2_eligible
+___
+}}
+
+foreach (split("\n",$code)) {
+       s/\`([^\`]*)\`/eval($1)/geo;
+
+       print $_,"\n";
+}
+
+close STDOUT;
diff --git a/crypto/ec/asm/ecp_nistz256-x86_64.pl b/crypto/ec/asm/ecp_nistz256-x86_64.pl
new file mode 100755 (executable)
index 0000000..84379fc
--- /dev/null
@@ -0,0 +1,2997 @@
+#!/usr/bin/env perl
+
+##############################################################################
+#                                                                            #
+# Copyright 2014 Intel Corporation                                           #
+#                                                                            #
+# Licensed under the Apache License, Version 2.0 (the "License");            #
+# you may not use this file except in compliance with the License.           #
+# You may obtain a copy of the License at                                    #
+#                                                                            #
+#    http://www.apache.org/licenses/LICENSE-2.0                              #
+#                                                                            #
+# Unless required by applicable law or agreed to in writing, software        #
+# distributed under the License is distributed on an "AS IS" BASIS,          #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   #
+# See the License for the specific language governing permissions and        #
+# limitations under the License.                                             #
+#                                                                            #
+##############################################################################
+#                                                                            #
+#  Developers and authors:                                                   #
+#  Shay Gueron (1, 2), and Vlad Krasnov (1)                                  #
+#  (1) Intel Corporation, Israel Development Center                          #
+#  (2) University of Haifa                                                   #
+#  Reference:                                                                #
+#  S.Gueron and V.Krasnov, "Fast Prime Field Elliptic Curve Cryptography with#
+#                           256 Bit Primes"                                  #
+#                                                                            #
+##############################################################################
+
+# Further optimization by <appro@openssl.org>:
+#
+#              this/original   with/without -DECP_NISTZ256_ASM(*)
+# Opteron      +12-49%         +110-150%
+# Bulldozer    +14-45%         +175-210%
+# P4           +18-46%         n/a :-(
+# Westmere     +12-34%         +80-87%
+# Sandy Bridge +9-35%          +110-120%
+# Ivy Bridge   +9-35%          +110-125%
+# Haswell      +8-37%          +140-160%
+# Broadwell    +18-58%         +145-210%
+# Atom         +15-50%         +130-180%
+# VIA Nano     +43-160%        +300-480%
+#
+# (*)  "without -DECP_NISTZ256_ASM" refers to build with
+#      "enable-ec_nistp_64_gcc_128";
+#
+# Ranges denote minimum and maximum improvement coefficients depending
+# on benchmark. Lower coefficients are for ECDSA sign, relatively fastest
+# server-side operation. Keep in mind that +100% means 2x improvement.
+
+$flavour = shift;
+$output  = shift;
+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
+
+$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
+die "can't locate x86_64-xlate.pl";
+
+open OUT,"| \"$^X\" $xlate $flavour $output";
+*STDOUT=*OUT;
+
+if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
+               =~ /GNU assembler version ([2-9]\.[0-9]+)/) {
+       $avx = ($1>=2.19) + ($1>=2.22);
+       $addx = ($1>=2.23);
+}
+
+if (!$addx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
+           `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) {
+       $avx = ($1>=2.09) + ($1>=2.10);
+       $addx = ($1>=2.10);
+}
+
+if (!$addx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
+           `ml64 2>&1` =~ /Version ([0-9]+)\./) {
+       $avx = ($1>=10) + ($1>=11);
+       $addx = ($1>=12);
+}
+
+if (!$addx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9])\.([0-9]+)/) {
+       my $ver = $2 + $3/100.0;        # 3.1->3.01, 3.10->3.10
+       $avx = ($ver>=3.0) + ($ver>=3.01);
+       $addx = ($ver>=3.03);
+}
+
+$code.=<<___;
+.text
+.extern        OPENSSL_ia32cap_P
+
+# The polynomial
+.align 64
+.Lpoly:
+.quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001
+
+# 2^512 mod P precomputed for NIST P256 polynomial
+.LRR:
+.quad 0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd
+
+.LOne:
+.long 1,1,1,1,1,1,1,1
+.LTwo:
+.long 2,2,2,2,2,2,2,2
+.LThree:
+.long 3,3,3,3,3,3,3,3
+.LONE_mont:
+.quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe
+___
+
+{
+################################################################################
+# void ecp_nistz256_mul_by_2(uint64_t res[4], uint64_t a[4]);
+
+my ($a0,$a1,$a2,$a3)=map("%r$_",(8..11));
+my ($t0,$t1,$t2,$t3,$t4)=("%rax","%rdx","%rcx","%r12","%r13");
+my ($r_ptr,$a_ptr,$b_ptr)=("%rdi","%rsi","%rdx");
+
+$code.=<<___;
+
+.globl ecp_nistz256_mul_by_2
+.type  ecp_nistz256_mul_by_2,\@function,2
+.align 64
+ecp_nistz256_mul_by_2:
+       push    %r12
+       push    %r13
+
+       mov     8*0($a_ptr), $a0
+       mov     8*1($a_ptr), $a1
+       add     $a0, $a0                # a0:a3+a0:a3
+       mov     8*2($a_ptr), $a2
+       adc     $a1, $a1
+       mov     8*3($a_ptr), $a3
+       lea     .Lpoly(%rip), $a_ptr
+        mov    $a0, $t0
+       adc     $a2, $a2
+       adc     $a3, $a3
+        mov    $a1, $t1
+       sbb     $t4, $t4
+
+       sub     8*0($a_ptr), $a0
+        mov    $a2, $t2
+       sbb     8*1($a_ptr), $a1
+       sbb     8*2($a_ptr), $a2
+        mov    $a3, $t3
+       sbb     8*3($a_ptr), $a3
+       test    $t4, $t4
+
+       cmovz   $t0, $a0
+       cmovz   $t1, $a1
+       mov     $a0, 8*0($r_ptr)
+       cmovz   $t2, $a2
+       mov     $a1, 8*1($r_ptr)
+       cmovz   $t3, $a3
+       mov     $a2, 8*2($r_ptr)
+       mov     $a3, 8*3($r_ptr)
+
+       pop     %r13
+       pop     %r12
+       ret
+.size  ecp_nistz256_mul_by_2,.-ecp_nistz256_mul_by_2
+
+################################################################################
+# void ecp_nistz256_div_by_2(uint64_t res[4], uint64_t a[4]);
+.globl ecp_nistz256_div_by_2
+.type  ecp_nistz256_div_by_2,\@function,2
+.align 32
+ecp_nistz256_div_by_2:
+       push    %r12
+       push    %r13
+
+       mov     8*0($a_ptr), $a0
+       mov     8*1($a_ptr), $a1
+       mov     8*2($a_ptr), $a2
+        mov    $a0, $t0
+       mov     8*3($a_ptr), $a3
+       lea     .Lpoly(%rip), $a_ptr
+
+        mov    $a1, $t1
+       xor     $t4, $t4
+       add     8*0($a_ptr), $a0
+        mov    $a2, $t2
+       adc     8*1($a_ptr), $a1
+       adc     8*2($a_ptr), $a2
+        mov    $a3, $t3
+       adc     8*3($a_ptr), $a3
+       adc     \$0, $t4
+       xor     $a_ptr, $a_ptr          # borrow $a_ptr
+       test    \$1, $t0
+
+       cmovz   $t0, $a0
+       cmovz   $t1, $a1
+       cmovz   $t2, $a2
+       cmovz   $t3, $a3
+       cmovz   $a_ptr, $t4
+
+       mov     $a1, $t0                # a0:a3>>1
+       shr     \$1, $a0
+       shl     \$63, $t0
+       mov     $a2, $t1
+       shr     \$1, $a1
+       or      $t0, $a0
+       shl     \$63, $t1
+       mov     $a3, $t2
+       shr     \$1, $a2
+       or      $t1, $a1
+       shl     \$63, $t2
+       shr     \$1, $a3
+       shl     \$63, $t4
+       or      $t2, $a2
+       or      $t4, $a3
+
+       mov     $a0, 8*0($r_ptr)
+       mov     $a1, 8*1($r_ptr)
+       mov     $a2, 8*2($r_ptr)
+       mov     $a3, 8*3($r_ptr)
+
+       pop     %r13
+       pop     %r12
+       ret
+.size  ecp_nistz256_div_by_2,.-ecp_nistz256_div_by_2
+
+################################################################################
+# void ecp_nistz256_mul_by_3(uint64_t res[4], uint64_t a[4]);
+.globl ecp_nistz256_mul_by_3
+.type  ecp_nistz256_mul_by_3,\@function,2
+.align 32
+ecp_nistz256_mul_by_3:
+       push    %r12
+       push    %r13
+
+       mov     8*0($a_ptr), $a0
+       xor     $t4, $t4
+       mov     8*1($a_ptr), $a1
+       add     $a0, $a0                # a0:a3+a0:a3
+       mov     8*2($a_ptr), $a2
+       adc     $a1, $a1
+       mov     8*3($a_ptr), $a3
+        mov    $a0, $t0
+       adc     $a2, $a2
+       adc     $a3, $a3
+        mov    $a1, $t1
+       adc     \$0, $t4
+
+       sub     \$-1, $a0
+        mov    $a2, $t2
+       sbb     .Lpoly+8*1(%rip), $a1
+       sbb     \$0, $a2
+        mov    $a3, $t3
+       sbb     .Lpoly+8*3(%rip), $a3
+       test    $t4, $t4
+
+       cmovz   $t0, $a0
+       cmovz   $t1, $a1
+       cmovz   $t2, $a2
+       cmovz   $t3, $a3
+
+       xor     $t4, $t4
+       add     8*0($a_ptr), $a0        # a0:a3+=a_ptr[0:3]
+       adc     8*1($a_ptr), $a1
+        mov    $a0, $t0
+       adc     8*2($a_ptr), $a2
+       adc     8*3($a_ptr), $a3
+        mov    $a1, $t1
+       adc     \$0, $t4
+
+       sub     \$-1, $a0
+        mov    $a2, $t2
+       sbb     .Lpoly+8*1(%rip), $a1
+       sbb     \$0, $a2
+        mov    $a3, $t3
+       sbb     .Lpoly+8*3(%rip), $a3
+       test    $t4, $t4
+
+       cmovz   $t0, $a0
+       cmovz   $t1, $a1
+       mov     $a0, 8*0($r_ptr)
+       cmovz   $t2, $a2
+       mov     $a1, 8*1($r_ptr)
+       cmovz   $t3, $a3
+       mov     $a2, 8*2($r_ptr)
+       mov     $a3, 8*3($r_ptr)
+
+       pop %r13
+       pop %r12
+       ret
+.size  ecp_nistz256_mul_by_3,.-ecp_nistz256_mul_by_3
+
+################################################################################
+# void ecp_nistz256_add(uint64_t res[4], uint64_t a[4], uint64_t b[4]);
+.globl ecp_nistz256_add
+.type  ecp_nistz256_add,\@function,3
+.align 32
+ecp_nistz256_add:
+       push    %r12
+       push    %r13
+
+       mov     8*0($a_ptr), $a0
+       xor     $t4, $t4
+       mov     8*1($a_ptr), $a1
+       mov     8*2($a_ptr), $a2
+       mov     8*3($a_ptr), $a3
+       lea     .Lpoly(%rip), $a_ptr
+
+       add     8*0($b_ptr), $a0
+       adc     8*1($b_ptr), $a1
+        mov    $a0, $t0
+       adc     8*2($b_ptr), $a2
+       adc     8*3($b_ptr), $a3
+        mov    $a1, $t1
+       adc     \$0, $t4
+
+       sub     8*0($a_ptr), $a0
+        mov    $a2, $t2
+       sbb     8*1($a_ptr), $a1
+       sbb     8*2($a_ptr), $a2
+        mov    $a3, $t3
+       sbb     8*3($a_ptr), $a3
+       test    $t4, $t4
+
+       cmovz   $t0, $a0
+       cmovz   $t1, $a1
+       mov     $a0, 8*0($r_ptr)
+       cmovz   $t2, $a2
+       mov     $a1, 8*1($r_ptr)
+       cmovz   $t3, $a3
+       mov     $a2, 8*2($r_ptr)
+       mov     $a3, 8*3($r_ptr)
+
+       pop %r13
+       pop %r12
+       ret
+.size  ecp_nistz256_add,.-ecp_nistz256_add
+
+################################################################################
+# void ecp_nistz256_sub(uint64_t res[4], uint64_t a[4], uint64_t b[4]);
+.globl ecp_nistz256_sub
+.type  ecp_nistz256_sub,\@function,3
+.align 32
+ecp_nistz256_sub:
+       push    %r12
+       push    %r13
+
+       mov     8*0($a_ptr), $a0
+       xor     $t4, $t4
+       mov     8*1($a_ptr), $a1
+       mov     8*2($a_ptr), $a2
+       mov     8*3($a_ptr), $a3
+       lea     .Lpoly(%rip), $a_ptr
+
+       sub     8*0($b_ptr), $a0
+       sbb     8*1($b_ptr), $a1
+        mov    $a0, $t0
+       sbb     8*2($b_ptr), $a2
+       sbb     8*3($b_ptr), $a3
+        mov    $a1, $t1
+       sbb     \$0, $t4
+
+       add     8*0($a_ptr), $a0
+        mov    $a2, $t2
+       adc     8*1($a_ptr), $a1
+       adc     8*2($a_ptr), $a2
+        mov    $a3, $t3
+       adc     8*3($a_ptr), $a3
+       test    $t4, $t4
+
+       cmovz   $t0, $a0
+       cmovz   $t1, $a1
+       mov     $a0, 8*0($r_ptr)
+       cmovz   $t2, $a2
+       mov     $a1, 8*1($r_ptr)
+       cmovz   $t3, $a3
+       mov     $a2, 8*2($r_ptr)
+       mov     $a3, 8*3($r_ptr)
+
+       pop %r13
+       pop %r12
+       ret
+.size  ecp_nistz256_sub,.-ecp_nistz256_sub
+
+################################################################################
+# void ecp_nistz256_neg(uint64_t res[4], uint64_t a[4]);
+.globl ecp_nistz256_neg
+.type  ecp_nistz256_neg,\@function,2
+.align 32
+ecp_nistz256_neg:
+       push    %r12
+       push    %r13
+
+       xor     $a0, $a0
+       xor     $a1, $a1
+       xor     $a2, $a2
+       xor     $a3, $a3
+       xor     $t4, $t4
+
+       sub     8*0($a_ptr), $a0
+       sbb     8*1($a_ptr), $a1
+       sbb     8*2($a_ptr), $a2
+        mov    $a0, $t0
+       sbb     8*3($a_ptr), $a3
+       lea     .Lpoly(%rip), $a_ptr
+        mov    $a1, $t1
+       sbb     \$0, $t4
+
+       add     8*0($a_ptr), $a0
+        mov    $a2, $t2
+       adc     8*1($a_ptr), $a1
+       adc     8*2($a_ptr), $a2
+        mov    $a3, $t3
+       adc     8*3($a_ptr), $a3
+       test    $t4, $t4
+
+       cmovz   $t0, $a0
+       cmovz   $t1, $a1
+       mov     $a0, 8*0($r_ptr)
+       cmovz   $t2, $a2
+       mov     $a1, 8*1($r_ptr)
+       cmovz   $t3, $a3
+       mov     $a2, 8*2($r_ptr)
+       mov     $a3, 8*3($r_ptr)
+
+       pop %r13
+       pop %r12
+       ret
+.size  ecp_nistz256_neg,.-ecp_nistz256_neg
+___
+}
+{
+my ($r_ptr,$a_ptr,$b_org,$b_ptr)=("%rdi","%rsi","%rdx","%rbx");
+my ($acc0,$acc1,$acc2,$acc3,$acc4,$acc5,$acc6,$acc7)=map("%r$_",(8..15));
+my ($t0,$t1,$t2,$t3,$t4)=("%rcx","%rbp","%rbx","%rdx","%rax");
+my ($poly1,$poly3)=($acc6,$acc7);
+
+$code.=<<___;
+################################################################################
+# void ecp_nistz256_to_mont(
+#   uint64_t res[4],
+#   uint64_t in[4]);
+.globl ecp_nistz256_to_mont
+.type  ecp_nistz256_to_mont,\@function,2
+.align 32
+ecp_nistz256_to_mont:
+___
+$code.=<<___   if ($addx);
+       mov     \$0x80100, %ecx
+       and     OPENSSL_ia32cap_P+8(%rip), %ecx
+___
+$code.=<<___;
+       lea     .LRR(%rip), $b_org
+       jmp     .Lmul_mont
+.size  ecp_nistz256_to_mont,.-ecp_nistz256_to_mont
+
+################################################################################
+# void ecp_nistz256_mul_mont(
+#   uint64_t res[4],
+#   uint64_t a[4],
+#   uint64_t b[4]);
+
+.globl ecp_nistz256_mul_mont
+.type  ecp_nistz256_mul_mont,\@function,3
+.align 32
+ecp_nistz256_mul_mont:
+___
+$code.=<<___   if ($addx);
+       mov     \$0x80100, %ecx
+       and     OPENSSL_ia32cap_P+8(%rip), %ecx
+___
+$code.=<<___;
+.Lmul_mont:
+       push    %rbp
+       push    %rbx
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+___
+$code.=<<___   if ($addx);
+       cmp     \$0x80100, %ecx
+       je      .Lmul_montx
+___
+$code.=<<___;
+       mov     $b_org, $b_ptr
+       mov     8*0($b_org), %rax
+       mov     8*0($a_ptr), $acc1
+       mov     8*1($a_ptr), $acc2
+       mov     8*2($a_ptr), $acc3
+       mov     8*3($a_ptr), $acc4
+
+       call    __ecp_nistz256_mul_montq
+___
+$code.=<<___   if ($addx);
+       jmp     .Lmul_mont_done
+
+.align 32
+.Lmul_montx:
+       mov     $b_org, $b_ptr
+       mov     8*0($b_org), %rdx
+       mov     8*0($a_ptr), $acc1
+       mov     8*1($a_ptr), $acc2
+       mov     8*2($a_ptr), $acc3
+       mov     8*3($a_ptr), $acc4
+       lea     -128($a_ptr), $a_ptr    # control u-op density
+
+       call    __ecp_nistz256_mul_montx
+___
+$code.=<<___;
+.Lmul_mont_done:
+       pop     %r15
+       pop     %r14
+       pop     %r13
+       pop     %r12
+       pop     %rbx
+       pop     %rbp
+       ret
+.size  ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont
+
+.type  __ecp_nistz256_mul_montq,\@abi-omnipotent
+.align 32
+__ecp_nistz256_mul_montq:
+       ########################################################################
+       # Multiply a by b[0]
+       mov     %rax, $t1
+       mulq    $acc1
+       mov     .Lpoly+8*1(%rip),$poly1
+       mov     %rax, $acc0
+       mov     $t1, %rax
+       mov     %rdx, $acc1
+
+       mulq    $acc2
+       mov     .Lpoly+8*3(%rip),$poly3
+       add     %rax, $acc1
+       mov     $t1, %rax
+       adc     \$0, %rdx
+       mov     %rdx, $acc2
+
+       mulq    $acc3
+       add     %rax, $acc2
+       mov     $t1, %rax
+       adc     \$0, %rdx
+       mov     %rdx, $acc3
+
+       mulq    $acc4
+       add     %rax, $acc3
+        mov    $acc0, %rax
+       adc     \$0, %rdx
+       xor     $acc5, $acc5
+       mov     %rdx, $acc4
+
+       ########################################################################
+       # First reduction step
+       # Basically now we want to multiply acc[0] by p256,
+       # and add the result to the acc.
+       # Due to the special form of p256 we do some optimizations
+       #
+       # acc[0] x p256[0..1] = acc[0] x 2^96 - acc[0]
+       # then we add acc[0] and get acc[0] x 2^96
+
+       mov     $acc0, $t1
+       shl     \$32, $acc0
+       mulq    $poly3
+       shr     \$32, $t1
+       add     $acc0, $acc1            # +=acc[0]<<96
+       adc     $t1, $acc2
+       adc     %rax, $acc3
+        mov    8*1($b_ptr), %rax
+       adc     %rdx, $acc4
+       adc     \$0, $acc5
+       xor     $acc0, $acc0
+
+       ########################################################################
+       # Multiply by b[1]
+       mov     %rax, $t1
+       mulq    8*0($a_ptr)
+       add     %rax, $acc1
+       mov     $t1, %rax
+       adc     \$0, %rdx
+       mov     %rdx, $t0
+
+       mulq    8*1($a_ptr)
+       add     $t0, $acc2
+       adc     \$0, %rdx
+       add     %rax, $acc2
+       mov     $t1, %rax
+       adc     \$0, %rdx
+       mov     %rdx, $t0
+
+       mulq    8*2($a_ptr)
+       add     $t0, $acc3
+       adc     \$0, %rdx
+       add     %rax, $acc3
+       mov     $t1, %rax
+       adc     \$0, %rdx
+       mov     %rdx, $t0
+
+       mulq    8*3($a_ptr)
+       add     $t0, $acc4
+       adc     \$0, %rdx
+       add     %rax, $acc4
+        mov    $acc1, %rax
+       adc     %rdx, $acc5
+       adc     \$0, $acc0
+
+       ########################################################################
+       # Second reduction step 
+       mov     $acc1, $t1
+       shl     \$32, $acc1
+       mulq    $poly3
+       shr     \$32, $t1
+       add     $acc1, $acc2
+       adc     $t1, $acc3
+       adc     %rax, $acc4
+        mov    8*2($b_ptr), %rax
+       adc     %rdx, $acc5
+       adc     \$0, $acc0
+       xor     $acc1, $acc1
+
+       ########################################################################
+       # Multiply by b[2]
+       mov     %rax, $t1
+       mulq    8*0($a_ptr)
+       add     %rax, $acc2
+       mov     $t1, %rax
+       adc     \$0, %rdx
+       mov     %rdx, $t0
+
+       mulq    8*1($a_ptr)
+       add     $t0, $acc3
+       adc     \$0, %rdx
+       add     %rax, $acc3
+       mov     $t1, %rax
+       adc     \$0, %rdx
+       mov     %rdx, $t0
+
+       mulq    8*2($a_ptr)
+       add     $t0, $acc4
+       adc     \$0, %rdx
+       add     %rax, $acc4
+       mov     $t1, %rax
+       adc     \$0, %rdx
+       mov     %rdx, $t0
+
+       mulq    8*3($a_ptr)
+       add     $t0, $acc5
+       adc     \$0, %rdx
+       add     %rax, $acc5
+        mov    $acc2, %rax
+       adc     %rdx, $acc0
+       adc     \$0, $acc1
+
+       ########################################################################
+       # Third reduction step  
+       mov     $acc2, $t1
+       shl     \$32, $acc2
+       mulq    $poly3
+       shr     \$32, $t1
+       add     $acc2, $acc3
+       adc     $t1, $acc4
+       adc     %rax, $acc5
+        mov    8*3($b_ptr), %rax
+       adc     %rdx, $acc0
+       adc     \$0, $acc1
+       xor     $acc2, $acc2
+
+       ########################################################################
+       # Multiply by b[3]
+       mov     %rax, $t1
+       mulq    8*0($a_ptr)
+       add     %rax, $acc3
+       mov     $t1, %rax
+       adc     \$0, %rdx
+       mov     %rdx, $t0
+
+       mulq    8*1($a_ptr)
+       add     $t0, $acc4
+       adc     \$0, %rdx
+       add     %rax, $acc4
+       mov     $t1, %rax
+       adc     \$0, %rdx
+       mov     %rdx, $t0
+
+       mulq    8*2($a_ptr)
+       add     $t0, $acc5
+       adc     \$0, %rdx
+       add     %rax, $acc5
+       mov     $t1, %rax
+       adc     \$0, %rdx
+       mov     %rdx, $t0
+
+       mulq    8*3($a_ptr)
+       add     $t0, $acc0
+       adc     \$0, %rdx
+       add     %rax, $acc0
+        mov    $acc3, %rax
+       adc     %rdx, $acc1
+       adc     \$0, $acc2
+
+       ########################################################################
+       # Final reduction step  
+       mov     $acc3, $t1
+       shl     \$32, $acc3
+       mulq    $poly3
+       shr     \$32, $t1
+       add     $acc3, $acc4
+       adc     $t1, $acc5
+        mov    $acc4, $t0
+       adc     %rax, $acc0
+       adc     %rdx, $acc1
+        mov    $acc5, $t1
+       adc     \$0, $acc2
+
+       ########################################################################        
+       # Branch-less conditional subtraction of P
+       sub     \$-1, $acc4             # .Lpoly[0]
+        mov    $acc0, $t2
+       sbb     $poly1, $acc5           # .Lpoly[1]
+       sbb     \$0, $acc0              # .Lpoly[2]
+        mov    $acc1, $t3
+       sbb     $poly3, $acc1           # .Lpoly[3]
+       sbb     \$0, $acc2
+
+       cmovc   $t0, $acc4
+       cmovc   $t1, $acc5
+       mov     $acc4, 8*0($r_ptr)
+       cmovc   $t2, $acc0
+       mov     $acc5, 8*1($r_ptr)
+       cmovc   $t3, $acc1
+       mov     $acc0, 8*2($r_ptr)
+       mov     $acc1, 8*3($r_ptr)
+
+       ret
+.size  __ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq
+
+################################################################################
+# void ecp_nistz256_sqr_mont(
+#   uint64_t res[4],
+#   uint64_t a[4]);
+
+# we optimize the square according to S.Gueron and V.Krasnov,
+# "Speeding up Big-Number Squaring"
+.globl ecp_nistz256_sqr_mont
+.type  ecp_nistz256_sqr_mont,\@function,2
+.align 32
+ecp_nistz256_sqr_mont:
+___
+$code.=<<___   if ($addx);
+       mov     \$0x80100, %ecx
+       and     OPENSSL_ia32cap_P+8(%rip), %ecx
+___
+$code.=<<___;
+       push    %rbp
+       push    %rbx
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+___
+$code.=<<___   if ($addx);
+       cmp     \$0x80100, %ecx
+       je      .Lsqr_montx
+___
+$code.=<<___;
+       mov     8*0($a_ptr), %rax
+       mov     8*1($a_ptr), $acc6
+       mov     8*2($a_ptr), $acc7
+       mov     8*3($a_ptr), $acc0
+
+       call    __ecp_nistz256_sqr_montq
+___
+$code.=<<___   if ($addx);
+       jmp     .Lsqr_mont_done
+
+.align 32
+.Lsqr_montx:
+       mov     8*0($a_ptr), %rdx
+       mov     8*1($a_ptr), $acc6
+       mov     8*2($a_ptr), $acc7
+       mov     8*3($a_ptr), $acc0
+       lea     -128($a_ptr), $a_ptr    # control u-op density
+
+       call    __ecp_nistz256_sqr_montx
+___
+$code.=<<___;
+.Lsqr_mont_done:
+       pop     %r15
+       pop     %r14
+       pop     %r13
+       pop     %r12
+       pop     %rbx
+       pop     %rbp
+       ret
+.size  ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont
+
+.type  __ecp_nistz256_sqr_montq,\@abi-omnipotent
+.align 32
+__ecp_nistz256_sqr_montq:
+       mov     %rax, $acc5
+       mulq    $acc6                   # a[1]*a[0]
+       mov     %rax, $acc1
+       mov     $acc7, %rax
+       mov     %rdx, $acc2
+
+       mulq    $acc5                   # a[0]*a[2]
+       add     %rax, $acc2
+       mov     $acc0, %rax
+       adc     \$0, %rdx
+       mov     %rdx, $acc3
+
+       mulq    $acc5                   # a[0]*a[3]
+       add     %rax, $acc3
+        mov    $acc7, %rax
+       adc     \$0, %rdx
+       mov     %rdx, $acc4
+
+       #################################
+       mulq    $acc6                   # a[1]*a[2]
+       add     %rax, $acc3
+       mov     $acc0, %rax
+       adc     \$0, %rdx
+       mov     %rdx, $t1
+
+       mulq    $acc6                   # a[1]*a[3]
+       add     %rax, $acc4
+        mov    $acc0, %rax
+       adc     \$0, %rdx
+       add     $t1, $acc4
+       mov     %rdx, $acc5
+       adc     \$0, $acc5
+
+       #################################
+       mulq    $acc7                   # a[2]*a[3]
+       xor     $acc7, $acc7
+       add     %rax, $acc5
+        mov    8*0($a_ptr), %rax
+       mov     %rdx, $acc6
+       adc     \$0, $acc6
+
+       add     $acc1, $acc1            # acc1:6<<1
+       adc     $acc2, $acc2
+       adc     $acc3, $acc3
+       adc     $acc4, $acc4
+       adc     $acc5, $acc5
+       adc     $acc6, $acc6
+       adc     \$0, $acc7
+
+       mulq    %rax
+       mov     %rax, $acc0
+       mov     8*1($a_ptr), %rax
+       mov     %rdx, $t0
+
+       mulq    %rax
+       add     $t0, $acc1
+       adc     %rax, $acc2
+       mov     8*2($a_ptr), %rax
+       adc     \$0, %rdx
+       mov     %rdx, $t0
+
+       mulq    %rax
+       add     $t0, $acc3
+       adc     %rax, $acc4
+       mov     8*3($a_ptr), %rax
+       adc     \$0, %rdx
+       mov     %rdx, $t0
+
+       mulq    %rax
+       add     $t0, $acc5
+       adc     %rax, $acc6
+        mov    $acc0, %rax
+       adc     %rdx, $acc7
+
+       mov     .Lpoly+8*1(%rip), $a_ptr
+       mov     .Lpoly+8*3(%rip), $t1
+
+       ##########################################
+       # Now the reduction
+       # First iteration
+       mov     $acc0, $t0
+       shl     \$32, $acc0
+       mulq    $t1
+       shr     \$32, $t0
+       add     $acc0, $acc1            # +=acc[0]<<96
+       adc     $t0, $acc2
+       adc     %rax, $acc3
+        mov    $acc1, %rax
+       adc     \$0, %rdx
+
+       ##########################################
+       # Second iteration
+       mov     $acc1, $t0
+       shl     \$32, $acc1
+       mov     %rdx, $acc0
+       mulq    $t1
+       shr     \$32, $t0
+       add     $acc1, $acc2
+       adc     $t0, $acc3
+       adc     %rax, $acc0
+        mov    $acc2, %rax
+       adc     \$0, %rdx
+
+       ##########################################
+       # Third iteration
+       mov     $acc2, $t0
+       shl     \$32, $acc2
+       mov     %rdx, $acc1
+       mulq    $t1
+       shr     \$32, $t0
+       add     $acc2, $acc3
+       adc     $t0, $acc0
+       adc     %rax, $acc1
+        mov    $acc3, %rax
+       adc     \$0, %rdx
+
+       ###########################################
+       # Last iteration
+       mov     $acc3, $t0
+       shl     \$32, $acc3
+       mov     %rdx, $acc2
+       mulq    $t1
+       shr     \$32, $t0
+       add     $acc3, $acc0
+       adc     $t0, $acc1
+       adc     %rax, $acc2
+       adc     \$0, %rdx
+       xor     $acc3, $acc3
+
+       ############################################
+       # Add the rest of the acc
+       add     $acc0, $acc4
+       adc     $acc1, $acc5
+        mov    $acc4, $acc0
+       adc     $acc2, $acc6
+       adc     %rdx, $acc7
+        mov    $acc5, $acc1
+       adc     \$0, $acc3
+
+       sub     \$-1, $acc4             # .Lpoly[0]
+        mov    $acc6, $acc2
+       sbb     $a_ptr, $acc5           # .Lpoly[1]
+       sbb     \$0, $acc6              # .Lpoly[2]
+        mov    $acc7, $t0
+       sbb     $t1, $acc7              # .Lpoly[3]
+       sbb     \$0, $acc3
+
+       cmovc   $acc0, $acc4
+       cmovc   $acc1, $acc5
+       mov     $acc4, 8*0($r_ptr)
+       cmovc   $acc2, $acc6
+       mov     $acc5, 8*1($r_ptr)
+       cmovc   $t0, $acc7
+       mov     $acc6, 8*2($r_ptr)
+       mov     $acc7, 8*3($r_ptr)
+
+       ret
+.size  __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq
+___
+
+if ($addx) {
+$code.=<<___;
+.type  __ecp_nistz256_mul_montx,\@abi-omnipotent
+.align 32
+__ecp_nistz256_mul_montx:
+       ########################################################################
+       # Multiply by b[0]
+       mulx    $acc1, $acc0, $acc1
+       mulx    $acc2, $t0, $acc2
+       mov     \$32, $poly1
+       xor     $acc5, $acc5            # cf=0
+       mulx    $acc3, $t1, $acc3
+       mov     .Lpoly+8*3(%rip), $poly3
+       adc     $t0, $acc1
+       mulx    $acc4, $t0, $acc4
+        mov    $acc0, %rdx
+       adc     $t1, $acc2
+        shlx   $poly1,$acc0,$t1
+       adc     $t0, $acc3
+        shrx   $poly1,$acc0,$t0
+       adc     \$0, $acc4
+
+       ########################################################################
+       # First reduction step
+       add     $t1, $acc1
+       adc     $t0, $acc2
+
+       mulx    $poly3, $t0, $t1
+        mov    8*1($b_ptr), %rdx
+       adc     $t0, $acc3
+       adc     $t1, $acc4
+       adc     \$0, $acc5
+       xor     $acc0, $acc0            # $acc0=0,cf=0,of=0
+
+       ########################################################################
+       # Multiply by b[1]
+       mulx    8*0+128($a_ptr), $t0, $t1
+       adcx    $t0, $acc1
+       adox    $t1, $acc2
+
+       mulx    8*1+128($a_ptr), $t0, $t1
+       adcx    $t0, $acc2
+       adox    $t1, $acc3
+
+       mulx    8*2+128($a_ptr), $t0, $t1
+       adcx    $t0, $acc3
+       adox    $t1, $acc4
+
+       mulx    8*3+128($a_ptr), $t0, $t1
+        mov    $acc1, %rdx
+       adcx    $t0, $acc4
+        shlx   $poly1, $acc1, $t0
+       adox    $t1, $acc5
+        shrx   $poly1, $acc1, $t1
+
+       adcx    $acc0, $acc5
+       adox    $acc0, $acc0
+       adc     \$0, $acc0
+
+       ########################################################################
+       # Second reduction step
+       add     $t0, $acc2
+       adc     $t1, $acc3
+
+       mulx    $poly3, $t0, $t1
+        mov    8*2($b_ptr), %rdx
+       adc     $t0, $acc4
+       adc     $t1, $acc5
+       adc     \$0, $acc0
+       xor     $acc1 ,$acc1            # $acc1=0,cf=0,of=0
+
+       ########################################################################
+       # Multiply by b[2]
+       mulx    8*0+128($a_ptr), $t0, $t1
+       adcx    $t0, $acc2
+       adox    $t1, $acc3
+
+       mulx    8*1+128($a_ptr), $t0, $t1
+       adcx    $t0, $acc3
+       adox    $t1, $acc4
+
+       mulx    8*2+128($a_ptr), $t0, $t1
+       adcx    $t0, $acc4
+       adox    $t1, $acc5
+
+       mulx    8*3+128($a_ptr), $t0, $t1
+        mov    $acc2, %rdx
+       adcx    $t0, $acc5
+        shlx   $poly1, $acc2, $t0
+       adox    $t1, $acc0
+        shrx   $poly1, $acc2, $t1
+
+       adcx    $acc1, $acc0
+       adox    $acc1, $acc1
+       adc     \$0, $acc1
+
+       ########################################################################
+       # Third reduction step
+       add     $t0, $acc3
+       adc     $t1, $acc4
+
+       mulx    $poly3, $t0, $t1
+        mov    8*3($b_ptr), %rdx
+       adc     $t0, $acc5
+       adc     $t1, $acc0
+       adc     \$0, $acc1
+       xor     $acc2, $acc2            # $acc2=0,cf=0,of=0
+
+       ########################################################################
+       # Multiply by b[3]
+       mulx    8*0+128($a_ptr), $t0, $t1
+       adcx    $t0, $acc3
+       adox    $t1, $acc4
+
+       mulx    8*1+128($a_ptr), $t0, $t1
+       adcx    $t0, $acc4
+       adox    $t1, $acc5
+
+       mulx    8*2+128($a_ptr), $t0, $t1
+       adcx    $t0, $acc5
+       adox    $t1, $acc0
+
+       mulx    8*3+128($a_ptr), $t0, $t1
+        mov    $acc3, %rdx
+       adcx    $t0, $acc0
+        shlx   $poly1, $acc3, $t0
+       adox    $t1, $acc1
+        shrx   $poly1, $acc3, $t1
+
+       adcx    $acc2, $acc1
+       adox    $acc2, $acc2
+       adc     \$0, $acc2
+
+       ########################################################################
+       # Fourth reduction step
+       add     $t0, $acc4
+       adc     $t1, $acc5
+
+       mulx    $poly3, $t0, $t1
+        mov    $acc4, $t2
+       mov     .Lpoly+8*1(%rip), $poly1
+       adc     $t0, $acc0
+        mov    $acc5, $t3
+       adc     $t1, $acc1
+       adc     \$0, $acc2
+
+       ########################################################################
+       # Branch-less conditional subtraction of P
+       xor     %eax, %eax
+        mov    $acc0, $t0
+       sbb     \$-1, $acc4             # .Lpoly[0]
+       sbb     $poly1, $acc5           # .Lpoly[1]
+       sbb     \$0, $acc0              # .Lpoly[2]
+        mov    $acc1, $t1
+       sbb     $poly3, $acc1           # .Lpoly[3]
+       sbb     \$0, $acc2
+
+       cmovc   $t2, $acc4
+       cmovc   $t3, $acc5
+       mov     $acc4, 8*0($r_ptr)
+       cmovc   $t0, $acc0
+       mov     $acc5, 8*1($r_ptr)
+       cmovc   $t1, $acc1
+       mov     $acc0, 8*2($r_ptr)
+       mov     $acc1, 8*3($r_ptr)
+
+       ret
+.size  __ecp_nistz256_mul_montx,.-__ecp_nistz256_mul_montx
+
+.type  __ecp_nistz256_sqr_montx,\@abi-omnipotent
+.align 32
+__ecp_nistz256_sqr_montx:
+       mulx    $acc6, $acc1, $acc2     # a[0]*a[1]
+       mulx    $acc7, $t0, $acc3       # a[0]*a[2]
+       xor     %eax, %eax
+       adc     $t0, $acc2
+       mulx    $acc0, $t1, $acc4       # a[0]*a[3]
+        mov    $acc6, %rdx
+       adc     $t1, $acc3
+       adc     \$0, $acc4
+       xor     $acc5, $acc5            # $acc5=0,cf=0,of=0
+
+       #################################
+       mulx    $acc7, $t0, $t1         # a[1]*a[2]
+       adcx    $t0, $acc3
+       adox    $t1, $acc4
+
+       mulx    $acc0, $t0, $t1         # a[1]*a[3]
+        mov    $acc7, %rdx
+       adcx    $t0, $acc4
+       adox    $t1, $acc5
+       adc     \$0, $acc5
+
+       #################################
+       mulx    $acc0, $t0, $acc6       # a[2]*a[3]
+        mov    8*0+128($a_ptr), %rdx
+       xor     $acc7, $acc7            # $acc7=0,cf=0,of=0
+        adcx   $acc1, $acc1            # acc1:6<<1
+       adox    $t0, $acc5
+        adcx   $acc2, $acc2
+       adox    $acc7, $acc6            # of=0
+
+       mulx    %rdx, $acc0, $t1
+       mov     8*1+128($a_ptr), %rdx
+        adcx   $acc3, $acc3
+       adox    $t1, $acc1
+        adcx   $acc4, $acc4
+       mulx    %rdx, $t0, $t4
+       mov     8*2+128($a_ptr), %rdx
+        adcx   $acc5, $acc5
+       adox    $t0, $acc2
+        adcx   $acc6, $acc6
+       .byte   0x67
+       mulx    %rdx, $t0, $t1
+       mov     8*3+128($a_ptr), %rdx
+       adox    $t4, $acc3
+        adcx   $acc7, $acc7
+       adox    $t0, $acc4
+        mov    \$32, $a_ptr
+       adox    $t1, $acc5
+       .byte   0x67,0x67
+       mulx    %rdx, $t0, $t4
+        mov    $acc0, %rdx
+       adox    $t0, $acc6
+        shlx   $a_ptr, $acc0, $t0
+       adox    $t4, $acc7
+        shrx   $a_ptr, $acc0, $t4
+        mov    .Lpoly+8*3(%rip), $t1
+
+       # reduction step 1
+       add     $t0, $acc1
+       adc     $t4, $acc2
+
+       mulx    $t1, $t0, $acc0
+        mov    $acc1, %rdx
+       adc     $t0, $acc3
+        shlx   $a_ptr, $acc1, $t0
+       adc     \$0, $acc0
+        shrx   $a_ptr, $acc1, $t4
+
+       # reduction step 2
+       add     $t0, $acc2
+       adc     $t4, $acc3
+
+       mulx    $t1, $t0, $acc1
+        mov    $acc2, %rdx
+       adc     $t0, $acc0
+        shlx   $a_ptr, $acc2, $t0
+       adc     \$0, $acc1
+        shrx   $a_ptr, $acc2, $t4
+
+       # reduction step 3
+       add     $t0, $acc3
+       adc     $t4, $acc0
+
+       mulx    $t1, $t0, $acc2
+        mov    $acc3, %rdx
+       adc     $t0, $acc1
+        shlx   $a_ptr, $acc3, $t0
+       adc     \$0, $acc2
+        shrx   $a_ptr, $acc3, $t4
+
+       # reduction step 4
+       add     $t0, $acc0
+       adc     $t4, $acc1
+
+       mulx    $t1, $t0, $acc3
+       adc     $t0, $acc2
+       adc     \$0, $acc3
+
+       xor     $t3, $t3                # cf=0
+       adc     $acc0, $acc4            # accumulate upper half
+        mov    .Lpoly+8*1(%rip), $a_ptr
+       adc     $acc1, $acc5
+        mov    $acc4, $acc0
+       adc     $acc2, $acc6
+       adc     $acc3, $acc7
+        mov    $acc5, $acc1
+       adc     \$0, $t3
+
+       xor     %eax, %eax              # cf=0
+       sbb     \$-1, $acc4             # .Lpoly[0]
+        mov    $acc6, $acc2
+       sbb     $a_ptr, $acc5           # .Lpoly[1]
+       sbb     \$0, $acc6              # .Lpoly[2]
+        mov    $acc7, $acc3
+       sbb     $t1, $acc7              # .Lpoly[3]
+       sbb     \$0, $t3
+
+       cmovc   $acc0, $acc4
+       cmovc   $acc1, $acc5
+       mov     $acc4, 8*0($r_ptr)
+       cmovc   $acc2, $acc6
+       mov     $acc5, 8*1($r_ptr)
+       cmovc   $acc3, $acc7
+       mov     $acc6, 8*2($r_ptr)
+       mov     $acc7, 8*3($r_ptr)
+
+       ret
+.size  __ecp_nistz256_sqr_montx,.-__ecp_nistz256_sqr_montx
+___
+}
+}
+{
+my ($r_ptr,$in_ptr)=("%rdi","%rsi");
+my ($acc0,$acc1,$acc2,$acc3)=map("%r$_",(8..11));
+my ($t0,$t1,$t2)=("%rcx","%r12","%r13");
+
+$code.=<<___;
+################################################################################
+# void ecp_nistz256_from_mont(
+#   uint64_t res[4],
+#   uint64_t in[4]);
+# This one performs Montgomery multiplication by 1, so we only need the reduction
+
+.globl ecp_nistz256_from_mont
+.type  ecp_nistz256_from_mont,\@function,2
+.align 32
+ecp_nistz256_from_mont:
+       push    %r12
+       push    %r13
+
+       mov     8*0($in_ptr), %rax
+       mov     .Lpoly+8*3(%rip), $t2
+       mov     8*1($in_ptr), $acc1
+       mov     8*2($in_ptr), $acc2
+       mov     8*3($in_ptr), $acc3
+       mov     %rax, $acc0
+       mov     .Lpoly+8*1(%rip), $t1
+
+       #########################################
+       # First iteration
+       mov     %rax, $t0
+       shl     \$32, $acc0
+       mulq    $t2
+       shr     \$32, $t0
+       add     $acc0, $acc1
+       adc     $t0, $acc2
+       adc     %rax, $acc3
+        mov    $acc1, %rax
+       adc     \$0, %rdx
+
+       #########################################
+       # Second iteration
+       mov     $acc1, $t0
+       shl     \$32, $acc1
+       mov     %rdx, $acc0
+       mulq    $t2
+       shr     \$32, $t0
+       add     $acc1, $acc2
+       adc     $t0, $acc3
+       adc     %rax, $acc0
+        mov    $acc2, %rax
+       adc     \$0, %rdx
+
+       ##########################################
+       # Third iteration
+       mov     $acc2, $t0
+       shl     \$32, $acc2
+       mov     %rdx, $acc1
+       mulq    $t2
+       shr     \$32, $t0
+       add     $acc2, $acc3
+       adc     $t0, $acc0
+       adc     %rax, $acc1
+        mov    $acc3, %rax
+       adc     \$0, %rdx
+
+       ###########################################
+       # Last iteration
+       mov     $acc3, $t0
+       shl     \$32, $acc3
+       mov     %rdx, $acc2
+       mulq    $t2
+       shr     \$32, $t0
+       add     $acc3, $acc0
+       adc     $t0, $acc1
+        mov    $acc0, $t0
+       adc     %rax, $acc2
+        mov    $acc1, $in_ptr
+       adc     \$0, %rdx
+
+       ###########################################
+       # Branch-less conditional subtraction
+       sub     \$-1, $acc0
+        mov    $acc2, %rax
+       sbb     $t1, $acc1
+       sbb     \$0, $acc2
+        mov    %rdx, $acc3
+       sbb     $t2, %rdx
+       sbb     $t2, $t2
+
+       cmovnz  $t0, $acc0
+       cmovnz  $in_ptr, $acc1
+       mov     $acc0, 8*0($r_ptr)
+       cmovnz  %rax, $acc2
+       mov     $acc1, 8*1($r_ptr)
+       cmovz   %rdx, $acc3
+       mov     $acc2, 8*2($r_ptr)
+       mov     $acc3, 8*3($r_ptr)
+
+       pop     %r13
+       pop     %r12
+       ret
+.size  ecp_nistz256_from_mont,.-ecp_nistz256_from_mont
+___
+}
+{
+my ($val,$in_t,$index)=$win64?("%rcx","%rdx","%r8d"):("%rdi","%rsi","%edx");
+my ($ONE,$INDEX,$Ra,$Rb,$Rc,$Rd,$Re,$Rf)=map("%xmm$_",(0..7));
+my ($M0,$T0a,$T0b,$T0c,$T0d,$T0e,$T0f,$TMP0)=map("%xmm$_",(8..15));
+my ($M1,$T2a,$T2b,$TMP2,$M2,$T2a,$T2b,$TMP2)=map("%xmm$_",(8..15));
+
+$code.=<<___;
+################################################################################
+# void ecp_nistz256_select_w5(uint64_t *val, uint64_t *in_t, int index);
+.globl ecp_nistz256_select_w5
+.type  ecp_nistz256_select_w5,\@abi-omnipotent
+.align 32
+ecp_nistz256_select_w5:
+___
+$code.=<<___   if ($avx>1);
+       mov     OPENSSL_ia32cap_P+8(%rip), %eax
+       test    \$`1<<5`, %eax
+       jnz     .Lavx2_select_w5
+___
+$code.=<<___   if ($win64);
+       lea     -0x88(%rsp), %rax
+.LSEH_begin_ecp_nistz256_select_w5:
+       .byte   0x48,0x8d,0x60,0xe0             #lea    -0x20(%rax), %rsp
+       .byte   0x0f,0x29,0x70,0xe0             #movaps %xmm6, -0x20(%rax)
+       .byte   0x0f,0x29,0x78,0xf0             #movaps %xmm7, -0x10(%rax)
+       .byte   0x44,0x0f,0x29,0x00             #movaps %xmm8, 0(%rax)
+       .byte   0x44,0x0f,0x29,0x48,0x10        #movaps %xmm9, 0x10(%rax)
+       .byte   0x44,0x0f,0x29,0x50,0x20        #movaps %xmm10, 0x20(%rax)
+       .byte   0x44,0x0f,0x29,0x58,0x30        #movaps %xmm11, 0x30(%rax)
+       .byte   0x44,0x0f,0x29,0x60,0x40        #movaps %xmm12, 0x40(%rax)
+       .byte   0x44,0x0f,0x29,0x68,0x50        #movaps %xmm13, 0x50(%rax)
+       .byte   0x44,0x0f,0x29,0x70,0x60        #movaps %xmm14, 0x60(%rax)
+       .byte   0x44,0x0f,0x29,0x78,0x70        #movaps %xmm15, 0x70(%rax)
+___
+$code.=<<___;
+       movdqa  .LOne(%rip), $ONE
+       movd    $index, $INDEX
+
+       pxor    $Ra, $Ra
+       pxor    $Rb, $Rb
+       pxor    $Rc, $Rc
+       pxor    $Rd, $Rd
+       pxor    $Re, $Re
+       pxor    $Rf, $Rf
+
+       movdqa  $ONE, $M0
+       pshufd  \$0, $INDEX, $INDEX
+
+       mov     \$16, %rax
+.Lselect_loop_sse_w5:
+
+       movdqa  $M0, $TMP0
+       paddd   $ONE, $M0
+       pcmpeqd $INDEX, $TMP0
+
+       movdqa  16*0($in_t), $T0a
+       movdqa  16*1($in_t), $T0b
+       movdqa  16*2($in_t), $T0c
+       movdqa  16*3($in_t), $T0d
+       movdqa  16*4($in_t), $T0e
+       movdqa  16*5($in_t), $T0f
+       lea 16*6($in_t), $in_t
+
+       pand    $TMP0, $T0a
+       pand    $TMP0, $T0b
+       por     $T0a, $Ra
+       pand    $TMP0, $T0c
+       por     $T0b, $Rb
+       pand    $TMP0, $T0d
+       por     $T0c, $Rc
+       pand    $TMP0, $T0e
+       por     $T0d, $Rd
+       pand    $TMP0, $T0f
+       por     $T0e, $Re
+       por     $T0f, $Rf
+
+       dec     %rax
+       jnz     .Lselect_loop_sse_w5
+
+       movdqu  $Ra, 16*0($val)
+       movdqu  $Rb, 16*1($val)
+       movdqu  $Rc, 16*2($val)
+       movdqu  $Rd, 16*3($val)
+       movdqu  $Re, 16*4($val)
+       movdqu  $Rf, 16*5($val)
+___
+$code.=<<___   if ($win64);
+       movaps  (%rsp), %xmm6
+       movaps  0x10(%rsp), %xmm7
+       movaps  0x20(%rsp), %xmm8
+       movaps  0x30(%rsp), %xmm9
+       movaps  0x40(%rsp), %xmm10
+       movaps  0x50(%rsp), %xmm11
+       movaps  0x60(%rsp), %xmm12
+       movaps  0x70(%rsp), %xmm13
+       movaps  0x80(%rsp), %xmm14
+       movaps  0x90(%rsp), %xmm15
+       lea     0xa8(%rsp), %rsp
+.LSEH_end_ecp_nistz256_select_w5:
+___
+$code.=<<___;
+       ret
+.size  ecp_nistz256_select_w5,.-ecp_nistz256_select_w5
+
+################################################################################
+# void ecp_nistz256_select_w7(uint64_t *val, uint64_t *in_t, int index);
+.globl ecp_nistz256_select_w7
+.type  ecp_nistz256_select_w7,\@abi-omnipotent
+.align 32
+ecp_nistz256_select_w7:
+___
+$code.=<<___   if ($avx>1);
+       mov     OPENSSL_ia32cap_P+8(%rip), %eax
+       test    \$`1<<5`, %eax
+       jnz     .Lavx2_select_w7
+___
+$code.=<<___   if ($win64);
+       lea     -0x88(%rsp), %rax
+.LSEH_begin_ecp_nistz256_select_w7:
+       .byte   0x48,0x8d,0x60,0xe0             #lea    -0x20(%rax), %rsp
+       .byte   0x0f,0x29,0x70,0xe0             #movaps %xmm6, -0x20(%rax)
+       .byte   0x0f,0x29,0x78,0xf0             #movaps %xmm7, -0x10(%rax)
+       .byte   0x44,0x0f,0x29,0x00             #movaps %xmm8, 0(%rax)
+       .byte   0x44,0x0f,0x29,0x48,0x10        #movaps %xmm9, 0x10(%rax)
+       .byte   0x44,0x0f,0x29,0x50,0x20        #movaps %xmm10, 0x20(%rax)
+       .byte   0x44,0x0f,0x29,0x58,0x30        #movaps %xmm11, 0x30(%rax)
+       .byte   0x44,0x0f,0x29,0x60,0x40        #movaps %xmm12, 0x40(%rax)
+       .byte   0x44,0x0f,0x29,0x68,0x50        #movaps %xmm13, 0x50(%rax)
+       .byte   0x44,0x0f,0x29,0x70,0x60        #movaps %xmm14, 0x60(%rax)
+       .byte   0x44,0x0f,0x29,0x78,0x70        #movaps %xmm15, 0x70(%rax)
+___
+$code.=<<___;
+       movdqa  .LOne(%rip), $M0
+       movd    $index, $INDEX
+
+       pxor    $Ra, $Ra
+       pxor    $Rb, $Rb
+       pxor    $Rc, $Rc
+       pxor    $Rd, $Rd
+
+       movdqa  $M0, $ONE
+       pshufd  \$0, $INDEX, $INDEX
+       mov     \$64, %rax
+
+.Lselect_loop_sse_w7:
+       movdqa  $M0, $TMP0
+       paddd   $ONE, $M0
+       movdqa  16*0($in_t), $T0a
+       movdqa  16*1($in_t), $T0b
+       pcmpeqd $INDEX, $TMP0
+       movdqa  16*2($in_t), $T0c
+       movdqa  16*3($in_t), $T0d
+       lea     16*4($in_t), $in_t
+
+       pand    $TMP0, $T0a
+       pand    $TMP0, $T0b
+       por     $T0a, $Ra
+       pand    $TMP0, $T0c
+       por     $T0b, $Rb
+       pand    $TMP0, $T0d
+       por     $T0c, $Rc
+       prefetcht0      255($in_t)
+       por     $T0d, $Rd
+
+       dec     %rax
+       jnz     .Lselect_loop_sse_w7
+
+       movdqu  $Ra, 16*0($val)
+       movdqu  $Rb, 16*1($val)
+       movdqu  $Rc, 16*2($val)
+       movdqu  $Rd, 16*3($val)
+___
+$code.=<<___   if ($win64);
+       movaps  (%rsp), %xmm6
+       movaps  0x10(%rsp), %xmm7
+       movaps  0x20(%rsp), %xmm8
+       movaps  0x30(%rsp), %xmm9
+       movaps  0x40(%rsp), %xmm10
+       movaps  0x50(%rsp), %xmm11
+       movaps  0x60(%rsp), %xmm12
+       movaps  0x70(%rsp), %xmm13
+       movaps  0x80(%rsp), %xmm14
+       movaps  0x90(%rsp), %xmm15
+       lea     0xa8(%rsp), %rsp
+.LSEH_end_ecp_nistz256_select_w7:
+___
+$code.=<<___;
+       ret
+.size  ecp_nistz256_select_w7,.-ecp_nistz256_select_w7
+___
+}
+if ($avx>1) {
+my ($val,$in_t,$index)=$win64?("%rcx","%rdx","%r8d"):("%rdi","%rsi","%edx");
+my ($TWO,$INDEX,$Ra,$Rb,$Rc)=map("%ymm$_",(0..4));
+my ($M0,$T0a,$T0b,$T0c,$TMP0)=map("%ymm$_",(5..9));
+my ($M1,$T1a,$T1b,$T1c,$TMP1)=map("%ymm$_",(10..14));
+
+$code.=<<___;
+################################################################################
+# void ecp_nistz256_avx2_select_w5(uint64_t *val, uint64_t *in_t, int index);
+.type  ecp_nistz256_avx2_select_w5,\@abi-omnipotent
+.align 32
+ecp_nistz256_avx2_select_w5:
+.Lavx2_select_w5:
+       vzeroupper
+___
+$code.=<<___   if ($win64);
+       lea     -0x88(%rsp), %rax
+.LSEH_begin_ecp_nistz256_avx2_select_w5:
+       .byte   0x48,0x8d,0x60,0xe0             #lea    -0x20(%rax), %rsp
+       .byte   0xc5,0xf8,0x29,0x70,0xe0        #vmovaps %xmm6, -0x20(%rax)
+       .byte   0xc5,0xf8,0x29,0x78,0xf0        #vmovaps %xmm7, -0x10(%rax)
+       .byte   0xc5,0x78,0x29,0x40,0x00        #vmovaps %xmm8, 8(%rax)
+       .byte   0xc5,0x78,0x29,0x48,0x10        #vmovaps %xmm9, 0x10(%rax)
+       .byte   0xc5,0x78,0x29,0x50,0x20        #vmovaps %xmm10, 0x20(%rax)
+       .byte   0xc5,0x78,0x29,0x58,0x30        #vmovaps %xmm11, 0x30(%rax)
+       .byte   0xc5,0x78,0x29,0x60,0x40        #vmovaps %xmm12, 0x40(%rax)
+       .byte   0xc5,0x78,0x29,0x68,0x50        #vmovaps %xmm13, 0x50(%rax)
+       .byte   0xc5,0x78,0x29,0x70,0x60        #vmovaps %xmm14, 0x60(%rax)
+       .byte   0xc5,0x78,0x29,0x78,0x70        #vmovaps %xmm15, 0x70(%rax)
+___
+$code.=<<___;
+       vmovdqa .LTwo(%rip), $TWO
+
+       vpxor   $Ra, $Ra, $Ra
+       vpxor   $Rb, $Rb, $Rb
+       vpxor   $Rc, $Rc, $Rc
+
+       vmovdqa .LOne(%rip), $M0
+       vmovdqa .LTwo(%rip), $M1
+
+       vmovd   $index, %xmm1
+       vpermd  $INDEX, $Ra, $INDEX
+
+       mov     \$8, %rax
+.Lselect_loop_avx2_w5:
+
+       vmovdqa 32*0($in_t), $T0a
+       vmovdqa 32*1($in_t), $T0b
+       vmovdqa 32*2($in_t), $T0c
+
+       vmovdqa 32*3($in_t), $T1a
+       vmovdqa 32*4($in_t), $T1b
+       vmovdqa 32*5($in_t), $T1c
+
+       vpcmpeqd        $INDEX, $M0, $TMP0
+       vpcmpeqd        $INDEX, $M1, $TMP1
+
+       vpaddd  $TWO, $M0, $M0
+       vpaddd  $TWO, $M1, $M1
+       lea     32*6($in_t), $in_t
+
+       vpand   $TMP0, $T0a, $T0a
+       vpand   $TMP0, $T0b, $T0b
+       vpand   $TMP0, $T0c, $T0c
+       vpand   $TMP1, $T1a, $T1a
+       vpand   $TMP1, $T1b, $T1b
+       vpand   $TMP1, $T1c, $T1c
+
+       vpxor   $T0a, $Ra, $Ra
+       vpxor   $T0b, $Rb, $Rb
+       vpxor   $T0c, $Rc, $Rc
+       vpxor   $T1a, $Ra, $Ra
+       vpxor   $T1b, $Rb, $Rb
+       vpxor   $T1c, $Rc, $Rc
+
+       dec %rax
+       jnz .Lselect_loop_avx2_w5
+
+       vmovdqu $Ra, 32*0($val)
+       vmovdqu $Rb, 32*1($val)
+       vmovdqu $Rc, 32*2($val)
+       vzeroupper
+___
+$code.=<<___   if ($win64);
+       movaps  (%rsp), %xmm6
+       movaps  0x10(%rsp), %xmm7
+       movaps  0x20(%rsp), %xmm8
+       movaps  0x30(%rsp), %xmm9
+       movaps  0x40(%rsp), %xmm10
+       movaps  0x50(%rsp), %xmm11
+       movaps  0x60(%rsp), %xmm12
+       movaps  0x70(%rsp), %xmm13
+       movaps  0x80(%rsp), %xmm14
+       movaps  0x90(%rsp), %xmm15
+       lea     0xa8(%rsp), %rsp
+.LSEH_end_ecp_nistz256_avx2_select_w5:
+___
+$code.=<<___;
+       ret
+.size  ecp_nistz256_avx2_select_w5,.-ecp_nistz256_avx2_select_w5
+___
+}
+if ($avx>1) {
+my ($val,$in_t,$index)=$win64?("%rcx","%rdx","%r8d"):("%rdi","%rsi","%edx");
+my ($THREE,$INDEX,$Ra,$Rb)=map("%ymm$_",(0..3));
+my ($M0,$T0a,$T0b,$TMP0)=map("%ymm$_",(4..7));
+my ($M1,$T1a,$T1b,$TMP1)=map("%ymm$_",(8..11));
+my ($M2,$T2a,$T2b,$TMP2)=map("%ymm$_",(12..15));
+
+$code.=<<___;
+
+################################################################################
+# void ecp_nistz256_avx2_select_w7(uint64_t *val, uint64_t *in_t, int index);
+.globl ecp_nistz256_avx2_select_w7
+.type  ecp_nistz256_avx2_select_w7,\@abi-omnipotent
+.align 32
+ecp_nistz256_avx2_select_w7:
+.Lavx2_select_w7:
+       vzeroupper
+___
+$code.=<<___   if ($win64);
+       lea     -0x88(%rsp), %rax
+.LSEH_begin_ecp_nistz256_avx2_select_w7:
+       .byte   0x48,0x8d,0x60,0xe0             #lea    -0x20(%rax), %rsp
+       .byte   0xc5,0xf8,0x29,0x70,0xe0        #vmovaps %xmm6, -0x20(%rax)
+       .byte   0xc5,0xf8,0x29,0x78,0xf0        #vmovaps %xmm7, -0x10(%rax)
+       .byte   0xc5,0x78,0x29,0x40,0x00        #vmovaps %xmm8, 8(%rax)
+       .byte   0xc5,0x78,0x29,0x48,0x10        #vmovaps %xmm9, 0x10(%rax)
+       .byte   0xc5,0x78,0x29,0x50,0x20        #vmovaps %xmm10, 0x20(%rax)
+       .byte   0xc5,0x78,0x29,0x58,0x30        #vmovaps %xmm11, 0x30(%rax)
+       .byte   0xc5,0x78,0x29,0x60,0x40        #vmovaps %xmm12, 0x40(%rax)
+       .byte   0xc5,0x78,0x29,0x68,0x50        #vmovaps %xmm13, 0x50(%rax)
+       .byte   0xc5,0x78,0x29,0x70,0x60        #vmovaps %xmm14, 0x60(%rax)
+       .byte   0xc5,0x78,0x29,0x78,0x70        #vmovaps %xmm15, 0x70(%rax)
+___
+$code.=<<___;
+       vmovdqa .LThree(%rip), $THREE
+
+       vpxor   $Ra, $Ra, $Ra
+       vpxor   $Rb, $Rb, $Rb
+
+       vmovdqa .LOne(%rip), $M0
+       vmovdqa .LTwo(%rip), $M1
+       vmovdqa .LThree(%rip), $M2
+
+       vmovd   $index, %xmm1
+       vpermd  $INDEX, $Ra, $INDEX
+       # Skip index = 0, because it is implicitly the point at infinity
+
+       mov     \$21, %rax
+.Lselect_loop_avx2_w7:
+
+       vmovdqa 32*0($in_t), $T0a
+       vmovdqa 32*1($in_t), $T0b
+
+       vmovdqa 32*2($in_t), $T1a
+       vmovdqa 32*3($in_t), $T1b
+
+       vmovdqa 32*4($in_t), $T2a
+       vmovdqa 32*5($in_t), $T2b
+
+       vpcmpeqd        $INDEX, $M0, $TMP0
+       vpcmpeqd        $INDEX, $M1, $TMP1
+       vpcmpeqd        $INDEX, $M2, $TMP2
+
+       vpaddd  $THREE, $M0, $M0
+       vpaddd  $THREE, $M1, $M1
+       vpaddd  $THREE, $M2, $M2
+       lea     32*6($in_t), $in_t
+
+       vpand   $TMP0, $T0a, $T0a
+       vpand   $TMP0, $T0b, $T0b
+       vpand   $TMP1, $T1a, $T1a
+       vpand   $TMP1, $T1b, $T1b
+       vpand   $TMP2, $T2a, $T2a
+       vpand   $TMP2, $T2b, $T2b
+
+       vpxor   $T0a, $Ra, $Ra
+       vpxor   $T0b, $Rb, $Rb
+       vpxor   $T1a, $Ra, $Ra
+       vpxor   $T1b, $Rb, $Rb
+       vpxor   $T2a, $Ra, $Ra
+       vpxor   $T2b, $Rb, $Rb
+
+       dec %rax
+       jnz .Lselect_loop_avx2_w7
+
+
+       vmovdqa 32*0($in_t), $T0a
+       vmovdqa 32*1($in_t), $T0b
+
+       vpcmpeqd        $INDEX, $M0, $TMP0
+
+       vpand   $TMP0, $T0a, $T0a
+       vpand   $TMP0, $T0b, $T0b
+
+       vpxor   $T0a, $Ra, $Ra
+       vpxor   $T0b, $Rb, $Rb
+
+       vmovdqu $Ra, 32*0($val)
+       vmovdqu $Rb, 32*1($val)
+       vzeroupper
+___
+$code.=<<___   if ($win64);
+       movaps  (%rsp), %xmm6
+       movaps  0x10(%rsp), %xmm7
+       movaps  0x20(%rsp), %xmm8
+       movaps  0x30(%rsp), %xmm9
+       movaps  0x40(%rsp), %xmm10
+       movaps  0x50(%rsp), %xmm11
+       movaps  0x60(%rsp), %xmm12
+       movaps  0x70(%rsp), %xmm13
+       movaps  0x80(%rsp), %xmm14
+       movaps  0x90(%rsp), %xmm15
+       lea     0xa8(%rsp), %rsp
+.LSEH_end_ecp_nistz256_avx2_select_w7:
+___
+$code.=<<___;
+       ret
+.size  ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7
+___
+} else {
+$code.=<<___;
+.globl ecp_nistz256_avx2_select_w7
+.type  ecp_nistz256_avx2_select_w7,\@function,3
+.align 32
+ecp_nistz256_avx2_select_w7:
+       .byte   0x0f,0x0b       # ud2
+       ret
+.size  ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7
+___
+}
+{{{
+########################################################################
+# This block implements higher level point_double, point_add and
+# point_add_affine. The key to performance in this case is to allow
+# out-of-order execution logic to overlap computations from next step
+# with tail processing from current step. By using tailored calling
+# sequence we minimize inter-step overhead to give processor better
+# shot at overlapping operations...
+#
+# You will notice that input data is copied to stack. Trouble is that
+# there are no registers to spare for holding original pointers and
+# reloading them, pointers, would create undesired dependencies on
+# effective addresses calculation paths. In other words it's too done
+# to favour out-of-order execution logic.
+#                                              <appro@openssl.org>
+
+my ($r_ptr,$a_ptr,$b_org,$b_ptr)=("%rdi","%rsi","%rdx","%rbx");
+my ($acc0,$acc1,$acc2,$acc3,$acc4,$acc5,$acc6,$acc7)=map("%r$_",(8..15));
+my ($t0,$t1,$t2,$t3,$t4)=("%rax","%rbp","%rcx",$acc4,$acc4);
+my ($poly1,$poly3)=($acc6,$acc7);
+
+sub load_for_mul () {
+my ($a,$b,$src0) = @_;
+my $bias = $src0 eq "%rax" ? 0 : -128;
+
+"      mov     $b, $src0
+       lea     $b, $b_ptr
+       mov     8*0+$a, $acc1
+       mov     8*1+$a, $acc2
+       lea     $bias+$a, $a_ptr
+       mov     8*2+$a, $acc3
+       mov     8*3+$a, $acc4"
+}
+
+sub load_for_sqr () {
+my ($a,$src0) = @_;
+my $bias = $src0 eq "%rax" ? 0 : -128;
+
+"      mov     8*0+$a, $src0
+       mov     8*1+$a, $acc6
+       lea     $bias+$a, $a_ptr
+       mov     8*2+$a, $acc7
+       mov     8*3+$a, $acc0"
+}
+
+                                                                       {
+########################################################################
+# operate in 4-5-0-1 "name space" that matches multiplication output
+#
+my ($a0,$a1,$a2,$a3,$t3,$t4)=($acc4,$acc5,$acc0,$acc1,$acc2,$acc3);
+
+$code.=<<___;
+.type  __ecp_nistz256_add_toq,\@abi-omnipotent
+.align 32
+__ecp_nistz256_add_toq:
+       add     8*0($b_ptr), $a0
+       adc     8*1($b_ptr), $a1
+        mov    $a0, $t0
+       adc     8*2($b_ptr), $a2
+       adc     8*3($b_ptr), $a3
+        mov    $a1, $t1
+       sbb     $t4, $t4
+
+       sub     \$-1, $a0
+        mov    $a2, $t2
+       sbb     $poly1, $a1
+       sbb     \$0, $a2
+        mov    $a3, $t3
+       sbb     $poly3, $a3
+       test    $t4, $t4
+
+       cmovz   $t0, $a0
+       cmovz   $t1, $a1
+       mov     $a0, 8*0($r_ptr)
+       cmovz   $t2, $a2
+       mov     $a1, 8*1($r_ptr)
+       cmovz   $t3, $a3
+       mov     $a2, 8*2($r_ptr)
+       mov     $a3, 8*3($r_ptr)
+
+       ret
+.size  __ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq
+
+.type  __ecp_nistz256_sub_fromq,\@abi-omnipotent
+.align 32
+__ecp_nistz256_sub_fromq:
+       sub     8*0($b_ptr), $a0
+       sbb     8*1($b_ptr), $a1
+        mov    $a0, $t0
+       sbb     8*2($b_ptr), $a2
+       sbb     8*3($b_ptr), $a3
+        mov    $a1, $t1
+       sbb     $t4, $t4
+
+       add     \$-1, $a0
+        mov    $a2, $t2
+       adc     $poly1, $a1
+       adc     \$0, $a2
+        mov    $a3, $t3
+       adc     $poly3, $a3
+       test    $t4, $t4
+
+       cmovz   $t0, $a0
+       cmovz   $t1, $a1
+       mov     $a0, 8*0($r_ptr)
+       cmovz   $t2, $a2
+       mov     $a1, 8*1($r_ptr)
+       cmovz   $t3, $a3
+       mov     $a2, 8*2($r_ptr)
+       mov     $a3, 8*3($r_ptr)
+
+       ret
+.size  __ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq
+
+.type  __ecp_nistz256_subq,\@abi-omnipotent
+.align 32
+__ecp_nistz256_subq:
+       sub     $a0, $t0
+       sbb     $a1, $t1
+        mov    $t0, $a0
+       sbb     $a2, $t2
+       sbb     $a3, $t3
+        mov    $t1, $a1
+       sbb     $t4, $t4
+
+       add     \$-1, $t0
+        mov    $t2, $a2
+       adc     $poly1, $t1
+       adc     \$0, $t2
+        mov    $t3, $a3
+       adc     $poly3, $t3
+       test    $t4, $t4
+
+       cmovnz  $t0, $a0
+       cmovnz  $t1, $a1
+       cmovnz  $t2, $a2
+       cmovnz  $t3, $a3
+
+       ret
+.size  __ecp_nistz256_subq,.-__ecp_nistz256_subq
+
+.type  __ecp_nistz256_mul_by_2q,\@abi-omnipotent
+.align 32
+__ecp_nistz256_mul_by_2q:
+       add     $a0, $a0                # a0:a3+a0:a3
+       adc     $a1, $a1
+        mov    $a0, $t0
+       adc     $a2, $a2
+       adc     $a3, $a3
+        mov    $a1, $t1
+       sbb     $t4, $t4
+
+       sub     \$-1, $a0
+        mov    $a2, $t2
+       sbb     $poly1, $a1
+       sbb     \$0, $a2
+        mov    $a3, $t3
+       sbb     $poly3, $a3
+       test    $t4, $t4
+
+       cmovz   $t0, $a0
+       cmovz   $t1, $a1
+       mov     $a0, 8*0($r_ptr)
+       cmovz   $t2, $a2
+       mov     $a1, 8*1($r_ptr)
+       cmovz   $t3, $a3
+       mov     $a2, 8*2($r_ptr)
+       mov     $a3, 8*3($r_ptr)
+
+       ret
+.size  __ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q
+___
+                                                                       }
+sub gen_double () {
+    my $x = shift;
+    my ($src0,$sfx,$bias);
+    my ($S,$M,$Zsqr,$in_x,$tmp0)=map(32*$_,(0..4));
+
+    if ($x ne "x") {
+       $src0 = "%rax";
+       $sfx  = "";
+       $bias = 0;
+
+$code.=<<___;
+.globl ecp_nistz256_point_double
+.type  ecp_nistz256_point_double,\@function,2
+.align 32
+ecp_nistz256_point_double:
+___
+$code.=<<___   if ($addx);
+       mov     \$0x80100, %ecx
+       and     OPENSSL_ia32cap_P+8(%rip), %ecx
+       cmp     \$0x80100, %ecx
+       je      .Lpoint_doublex
+___
+    } else {
+       $src0 = "%rdx";
+       $sfx  = "x";
+       $bias = 128;
+
+$code.=<<___;
+.type  ecp_nistz256_point_doublex,\@function,2
+.align 32
+ecp_nistz256_point_doublex:
+.Lpoint_doublex:
+___
+    }
+$code.=<<___;
+       push    %rbp
+       push    %rbx
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+       sub     \$32*5+8, %rsp
+
+       movdqu  0x00($a_ptr), %xmm0             # copy  *(P256_POINT *)$a_ptr.x
+       mov     $a_ptr, $b_ptr                  # backup copy
+       movdqu  0x10($a_ptr), %xmm1
+        mov    0x20+8*0($a_ptr), $acc4         # load in_y in "5-4-0-1" order
+        mov    0x20+8*1($a_ptr), $acc5
+        mov    0x20+8*2($a_ptr), $acc0
+        mov    0x20+8*3($a_ptr), $acc1
+        mov    .Lpoly+8*1(%rip), $poly1
+        mov    .Lpoly+8*3(%rip), $poly3
+       movdqa  %xmm0, $in_x(%rsp)
+       movdqa  %xmm1, $in_x+0x10(%rsp)
+       lea     0x20($r_ptr), $acc2
+       lea     0x40($r_ptr), $acc3
+       movq    $r_ptr, %xmm0
+       movq    $acc2, %xmm1
+       movq    $acc3, %xmm2
+
+       lea     $S(%rsp), $r_ptr
+       call    __ecp_nistz256_mul_by_2$x       # p256_mul_by_2(S, in_y);
+
+       mov     0x40+8*0($a_ptr), $src0
+       mov     0x40+8*1($a_ptr), $acc6
+       mov     0x40+8*2($a_ptr), $acc7
+       mov     0x40+8*3($a_ptr), $acc0
+       lea     0x40-$bias($a_ptr), $a_ptr
+       lea     $Zsqr(%rsp), $r_ptr
+       call    __ecp_nistz256_sqr_mont$x       # p256_sqr_mont(Zsqr, in_z);
+
+       `&load_for_sqr("$S(%rsp)", "$src0")`
+       lea     $S(%rsp), $r_ptr
+       call    __ecp_nistz256_sqr_mont$x       # p256_sqr_mont(S, S);
+
+       mov     0x20($b_ptr), $src0             # $b_ptr is still valid
+       mov     0x40+8*0($b_ptr), $acc1
+       mov     0x40+8*1($b_ptr), $acc2
+       mov     0x40+8*2($b_ptr), $acc3
+       mov     0x40+8*3($b_ptr), $acc4
+       lea     0x40-$bias($b_ptr), $a_ptr
+       lea     0x20($b_ptr), $b_ptr
+       movq    %xmm2, $r_ptr
+       call    __ecp_nistz256_mul_mont$x       # p256_mul_mont(res_z, in_z, in_y);
+       call    __ecp_nistz256_mul_by_2$x       # p256_mul_by_2(res_z, res_z);
+
+       mov     $in_x+8*0(%rsp), $acc4          # "5-4-0-1" order
+       mov     $in_x+8*1(%rsp), $acc5
+       lea     $Zsqr(%rsp), $b_ptr
+       mov     $in_x+8*2(%rsp), $acc0
+       mov     $in_x+8*3(%rsp), $acc1
+       lea     $M(%rsp), $r_ptr
+       call    __ecp_nistz256_add_to$x         # p256_add(M, in_x, Zsqr);
+
+       mov     $in_x+8*0(%rsp), $acc4          # "5-4-0-1" order
+       mov     $in_x+8*1(%rsp), $acc5
+       lea     $Zsqr(%rsp), $b_ptr
+       mov     $in_x+8*2(%rsp), $acc0
+       mov     $in_x+8*3(%rsp), $acc1
+       lea     $Zsqr(%rsp), $r_ptr
+       call    __ecp_nistz256_sub_from$x       # p256_sub(Zsqr, in_x, Zsqr);
+
+       `&load_for_sqr("$S(%rsp)", "$src0")`
+       movq    %xmm1, $r_ptr
+       call    __ecp_nistz256_sqr_mont$x       # p256_sqr_mont(res_y, S);
+___
+{      
+######## ecp_nistz256_div_by_2(res_y, res_y); ##########################
+# operate in 4-5-6-7 "name space" that matches squaring output
+#
+my ($poly1,$poly3)=($a_ptr,$t1);
+my ($a0,$a1,$a2,$a3,$t3,$t4,$t1)=($acc4,$acc5,$acc6,$acc7,$acc0,$acc1,$acc2);
+
+$code.=<<___;
+       xor     $t4, $t4
+       mov     $a0, $t0
+       add     \$-1, $a0
+       mov     $a1, $t1
+       adc     $poly1, $a1
+       mov     $a2, $t2
+       adc     \$0, $a2
+       mov     $a3, $t3
+       adc     $poly3, $a3
+       adc     \$0, $t4
+       xor     $a_ptr, $a_ptr          # borrow $a_ptr
+       test    \$1, $t0
+
+       cmovz   $t0, $a0
+       cmovz   $t1, $a1
+       cmovz   $t2, $a2
+       cmovz   $t3, $a3
+       cmovz   $a_ptr, $t4
+
+       mov     $a1, $t0                # a0:a3>>1
+       shr     \$1, $a0
+       shl     \$63, $t0
+       mov     $a2, $t1
+       shr     \$1, $a1
+       or      $t0, $a0
+       shl     \$63, $t1
+       mov     $a3, $t2
+       shr     \$1, $a2
+       or      $t1, $a1
+       shl     \$63, $t2
+       mov     $a0, 8*0($r_ptr)
+       shr     \$1, $a3
+       mov     $a1, 8*1($r_ptr)
+       shl     \$63, $t4
+       or      $t2, $a2
+       or      $t4, $a3
+       mov     $a2, 8*2($r_ptr)
+       mov     $a3, 8*3($r_ptr)
+___
+}
+$code.=<<___;
+       `&load_for_mul("$M(%rsp)", "$Zsqr(%rsp)", "$src0")`
+       lea     $M(%rsp), $r_ptr
+       call    __ecp_nistz256_mul_mont$x       # p256_mul_mont(M, M, Zsqr);
+
+       lea     $tmp0(%rsp), $r_ptr
+       call    __ecp_nistz256_mul_by_2$x
+
+       lea     $M(%rsp), $b_ptr
+       lea     $M(%rsp), $r_ptr
+       call    __ecp_nistz256_add_to$x         # p256_mul_by_3(M, M);
+
+       `&load_for_mul("$S(%rsp)", "$in_x(%rsp)", "$src0")`
+       lea     $S(%rsp), $r_ptr
+       call    __ecp_nistz256_mul_mont$x       # p256_mul_mont(S, S, in_x);
+
+       lea     $tmp0(%rsp), $r_ptr
+       call    __ecp_nistz256_mul_by_2$x       # p256_mul_by_2(tmp0, S);
+
+       `&load_for_sqr("$M(%rsp)", "$src0")`
+       movq    %xmm0, $r_ptr
+       call    __ecp_nistz256_sqr_mont$x       # p256_sqr_mont(res_x, M);
+
+       lea     $tmp0(%rsp), $b_ptr
+       mov     $acc6, $acc0                    # harmonize sqr output and sub input
+       mov     $acc7, $acc1
+       mov     $a_ptr, $poly1
+       mov     $t1, $poly3
+       call    __ecp_nistz256_sub_from$x       # p256_sub(res_x, res_x, tmp0);
+
+       mov     $S+8*0(%rsp), $t0
+       mov     $S+8*1(%rsp), $t1
+       mov     $S+8*2(%rsp), $t2
+       mov     $S+8*3(%rsp), $acc2             # "4-5-0-1" order
+       lea     $S(%rsp), $r_ptr
+       call    __ecp_nistz256_sub$x            # p256_sub(S, S, res_x);
+
+       mov     $M(%rsp), $src0
+       lea     $M(%rsp), $b_ptr
+       mov     $acc4, $acc6                    # harmonize sub output and mul input
+       xor     %ecx, %ecx
+       mov     $acc4, $S+8*0(%rsp)             # have to save:-(       
+       mov     $acc5, $acc2
+       mov     $acc5, $S+8*1(%rsp)
+       cmovz   $acc0, $acc3
+       mov     $acc0, $S+8*2(%rsp)
+       lea     $S-$bias(%rsp), $a_ptr
+       cmovz   $acc1, $acc4
+       mov     $acc1, $S+8*3(%rsp)
+       mov     $acc6, $acc1
+       lea     $S(%rsp), $r_ptr
+       call    __ecp_nistz256_mul_mont$x       # p256_mul_mont(S, S, M);
+
+       movq    %xmm1, $b_ptr
+       movq    %xmm1, $r_ptr
+       call    __ecp_nistz256_sub_from$x       # p256_sub(res_y, S, res_y);
+
+       add     \$32*5+8, %rsp
+       pop     %r15
+       pop     %r14
+       pop     %r13
+       pop     %r12
+       pop     %rbx
+       pop     %rbp
+       ret
+.size  ecp_nistz256_point_double$sfx,.-ecp_nistz256_point_double$sfx
+___
+}
+&gen_double("q");
+
+sub gen_add () {
+    my $x = shift;
+    my ($src0,$sfx,$bias);
+    my ($H,$Hsqr,$R,$Rsqr,$Hcub,
+       $U1,$U2,$S1,$S2,
+       $res_x,$res_y,$res_z,
+       $in1_x,$in1_y,$in1_z,
+       $in2_x,$in2_y,$in2_z)=map(32*$_,(0..17));
+    my ($Z1sqr, $Z2sqr) = ($Hsqr, $Rsqr);
+
+    if ($x ne "x") {
+       $src0 = "%rax";
+       $sfx  = "";
+       $bias = 0;
+
+$code.=<<___;
+.globl ecp_nistz256_point_add
+.type  ecp_nistz256_point_add,\@function,3
+.align 32
+ecp_nistz256_point_add:
+___
+$code.=<<___   if ($addx);
+       mov     \$0x80100, %ecx
+       and     OPENSSL_ia32cap_P+8(%rip), %ecx
+       cmp     \$0x80100, %ecx
+       je      .Lpoint_addx
+___
+    } else {
+       $src0 = "%rdx";
+       $sfx  = "x";
+       $bias = 128;
+
+$code.=<<___;
+.type  ecp_nistz256_point_addx,\@function,3
+.align 32
+ecp_nistz256_point_addx:
+.Lpoint_addx:
+___
+    }
+$code.=<<___;
+       push    %rbp
+       push    %rbx
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+       sub     \$32*18+8, %rsp
+
+       movdqu  0x00($a_ptr), %xmm0             # copy  *(P256_POINT *)$a_ptr
+       movdqu  0x10($a_ptr), %xmm1
+       movdqu  0x20($a_ptr), %xmm2
+       movdqu  0x30($a_ptr), %xmm3
+       movdqu  0x40($a_ptr), %xmm4
+       movdqu  0x50($a_ptr), %xmm5
+       mov     $a_ptr, $b_ptr                  # reassign
+       mov     $b_org, $a_ptr                  # reassign
+       movdqa  %xmm0, $in1_x(%rsp)
+       movdqa  %xmm1, $in1_x+0x10(%rsp)
+       por     %xmm0, %xmm1
+       movdqa  %xmm2, $in1_y(%rsp)
+       movdqa  %xmm3, $in1_y+0x10(%rsp)
+       por     %xmm2, %xmm3
+       movdqa  %xmm4, $in1_z(%rsp)
+       movdqa  %xmm5, $in1_z+0x10(%rsp)
+       por     %xmm1, %xmm3
+
+       movdqu  0x00($a_ptr), %xmm0             # copy  *(P256_POINT *)$b_ptr
+        pshufd \$0xb1, %xmm3, %xmm5
+       movdqu  0x10($a_ptr), %xmm1
+       movdqu  0x20($a_ptr), %xmm2
+        por    %xmm3, %xmm5
+       movdqu  0x30($a_ptr), %xmm3
+        mov    0x40+8*0($a_ptr), $src0         # load original in2_z
+        mov    0x40+8*1($a_ptr), $acc6
+        mov    0x40+8*2($a_ptr), $acc7
+        mov    0x40+8*3($a_ptr), $acc0
+       movdqa  %xmm0, $in2_x(%rsp)
+        pshufd \$0x1e, %xmm5, %xmm4
+       movdqa  %xmm1, $in2_x+0x10(%rsp)
+       por     %xmm0, %xmm1
+        movq   $r_ptr, %xmm0                   # save $r_ptr
+       movdqa  %xmm2, $in2_y(%rsp)
+       movdqa  %xmm3, $in2_y+0x10(%rsp)
+       por     %xmm2, %xmm3
+        por    %xmm4, %xmm5
+        pxor   %xmm4, %xmm4
+       por     %xmm1, %xmm3
+
+       lea     0x40-$bias($a_ptr), $a_ptr      # $a_ptr is still valid
+        mov    $src0, $in2_z+8*0(%rsp)         # make in2_z copy
+        mov    $acc6, $in2_z+8*1(%rsp)
+        mov    $acc7, $in2_z+8*2(%rsp)
+        mov    $acc0, $in2_z+8*3(%rsp)
+       lea     $Z2sqr(%rsp), $r_ptr            # Z2^2
+       call    __ecp_nistz256_sqr_mont$x       # p256_sqr_mont(Z2sqr, in2_z);
+
+       pcmpeqd %xmm4, %xmm5
+       pshufd  \$0xb1, %xmm3, %xmm4
+       por     %xmm3, %xmm4
+       pshufd  \$0, %xmm5, %xmm5               # in1infty
+       pshufd  \$0x1e, %xmm4, %xmm3
+       por     %xmm3, %xmm4
+       pxor    %xmm3, %xmm3
+       pcmpeqd %xmm3, %xmm4
+       pshufd  \$0, %xmm4, %xmm4               # in2infty
+        mov    0x40+8*0($b_ptr), $src0         # load original in1_z
+        mov    0x40+8*1($b_ptr), $acc6
+        mov    0x40+8*2($b_ptr), $acc7
+        mov    0x40+8*3($b_ptr), $acc0
+
+       lea     0x40-$bias($b_ptr), $a_ptr
+       lea     $Z1sqr(%rsp), $r_ptr            # Z1^2
+       call    __ecp_nistz256_sqr_mont$x       # p256_sqr_mont(Z1sqr, in1_z);
+
+       `&load_for_mul("$Z2sqr(%rsp)", "$in2_z(%rsp)", "$src0")`
+       lea     $S1(%rsp), $r_ptr               # S1 = Z2^3
+       call    __ecp_nistz256_mul_mont$x       # p256_mul_mont(S1, Z2sqr, in2_z);
+
+       `&load_for_mul("$Z1sqr(%rsp)", "$in1_z(%rsp)", "$src0")`
+       lea     $S2(%rsp), $r_ptr               # S2 = Z1^3
+       call    __ecp_nistz256_mul_mont$x       # p256_mul_mont(S2, Z1sqr, in1_z);
+
+       `&load_for_mul("$S1(%rsp)", "$in1_y(%rsp)", "$src0")`
+       lea     $S1(%rsp), $r_ptr               # S1 = Y1*Z2^3
+       call    __ecp_nistz256_mul_mont$x       # p256_mul_mont(S1, S1, in1_y);
+
+       `&load_for_mul("$S2(%rsp)", "$in2_y(%rsp)", "$src0")`
+       lea     $S2(%rsp), $r_ptr               # S2 = Y2*Z1^3
+       call    __ecp_nistz256_mul_mont$x       # p256_mul_mont(S2, S2, in2_y);
+
+       lea     $S1(%rsp), $b_ptr
+       lea     $R(%rsp), $r_ptr                # R = S2 - S1
+       call    __ecp_nistz256_sub_from$x       # p256_sub(R, S2, S1);
+
+       or      $acc5, $acc4                    # see if result is zero
+       movdqa  %xmm4, %xmm2
+       or      $acc0, $acc4
+       or      $acc1, $acc4
+       por     %xmm5, %xmm2                    # in1infty || in2infty
+       movq    $acc4, %xmm3
+
+       `&load_for_mul("$Z2sqr(%rsp)", "$in1_x(%rsp)", "$src0")`
+       lea     $U1(%rsp), $r_ptr               # U1 = X1*Z2^2
+       call    __ecp_nistz256_mul_mont$x       # p256_mul_mont(U1, in1_x, Z2sqr);
+
+       `&load_for_mul("$Z1sqr(%rsp)", "$in2_x(%rsp)", "$src0")`
+       lea     $U2(%rsp), $r_ptr               # U2 = X2*Z1^2
+       call    __ecp_nistz256_mul_mont$x       # p256_mul_mont(U2, in2_x, Z1sqr);
+
+       lea     $U1(%rsp), $b_ptr
+       lea     $H(%rsp), $r_ptr                # H = U2 - U1
+       call    __ecp_nistz256_sub_from$x       # p256_sub(H, U2, U1);
+
+       or      $acc5, $acc4                    # see if result is zero
+       or      $acc0, $acc4
+       or      $acc1, $acc4
+
+       .byte   0x3e                            # predict taken
+       jnz     .Ladd_proceed$x                 # is_equal(U1,U2)?
+       movq    %xmm2, $acc0
+       movq    %xmm3, $acc1
+       test    $acc0, $acc0
+       jnz     .Ladd_proceed$x                 # (in1infty || in2infty)?
+       test    $acc1, $acc1
+       jz      .Ladd_proceed$x                 # is_equal(S1,S2)?
+
+       movq    %xmm0, $r_ptr                   # restore $r_ptr
+       pxor    %xmm0, %xmm0
+       movdqu  %xmm0, 0x00($r_ptr)
+       movdqu  %xmm0, 0x10($r_ptr)
+       movdqu  %xmm0, 0x20($r_ptr)
+       movdqu  %xmm0, 0x30($r_ptr)
+       movdqu  %xmm0, 0x40($r_ptr)
+       movdqu  %xmm0, 0x50($r_ptr)
+       jmp     .Ladd_done$x
+
+.align 32
+.Ladd_proceed$x:
+       `&load_for_sqr("$R(%rsp)", "$src0")`
+       lea     $Rsqr(%rsp), $r_ptr             # R^2
+       call    __ecp_nistz256_sqr_mont$x       # p256_sqr_mont(Rsqr, R);
+
+       `&load_for_mul("$H(%rsp)", "$in1_z(%rsp)", "$src0")`
+       lea     $res_z(%rsp), $r_ptr            # Z3 = H*Z1*Z2
+       call    __ecp_nistz256_mul_mont$x       # p256_mul_mont(res_z, H, in1_z);
+
+       `&load_for_sqr("$H(%rsp)", "$src0")`
+       lea     $Hsqr(%rsp), $r_ptr             # H^2
+       call    __ecp_nistz256_sqr_mont$x       # p256_sqr_mont(Hsqr, H);
+
+       `&load_for_mul("$res_z(%rsp)", "$in2_z(%rsp)", "$src0")`
+       lea     $res_z(%rsp), $r_ptr            # Z3 = H*Z1*Z2
+       call    __ecp_nistz256_mul_mont$x       # p256_mul_mont(res_z, res_z, in2_z);
+
+       `&load_for_mul("$Hsqr(%rsp)", "$H(%rsp)", "$src0")`
+       lea     $Hcub(%rsp), $r_ptr             # H^3
+       call    __ecp_nistz256_mul_mont$x       # p256_mul_mont(Hcub, Hsqr, H);
+
+       `&load_for_mul("$Hsqr(%rsp)", "$U1(%rsp)", "$src0")`
+       lea     $U2(%rsp), $r_ptr               # U1*H^2
+       call    __ecp_nistz256_mul_mont$x       # p256_mul_mont(U2, U1, Hsqr);
+___
+{
+#######################################################################
+# operate in 4-5-0-1 "name space" that matches multiplication output
+#
+my ($acc0,$acc1,$acc2,$acc3,$t3,$t4)=($acc4,$acc5,$acc0,$acc1,$acc2,$acc3);
+my ($poly1, $poly3)=($acc6,$acc7);
+
+$code.=<<___;
+       #lea    $U2(%rsp), $a_ptr
+       #lea    $Hsqr(%rsp), $r_ptr     # 2*U1*H^2
+       #call   __ecp_nistz256_mul_by_2 # ecp_nistz256_mul_by_2(Hsqr, U2);
+
+       add     $acc0, $acc0            # a0:a3+a0:a3
+       lea     $Rsqr(%rsp), $a_ptr
+       adc     $acc1, $acc1
+        mov    $acc0, $t0
+       adc     $acc2, $acc2
+       adc     $acc3, $acc3
+        mov    $acc1, $t1
+       sbb     $t4, $t4
+
+       sub     \$-1, $acc0
+        mov    $acc2, $t2
+       sbb     $poly1, $acc1
+       sbb     \$0, $acc2
+        mov    $acc3, $t3
+       sbb     $poly3, $acc3
+       test    $t4, $t4
+
+       cmovz   $t0, $acc0
+       mov     8*0($a_ptr), $t0
+       cmovz   $t1, $acc1
+       mov     8*1($a_ptr), $t1
+       cmovz   $t2, $acc2
+       mov     8*2($a_ptr), $t2
+       cmovz   $t3, $acc3
+       mov     8*3($a_ptr), $t3
+
+       call    __ecp_nistz256_sub$x            # p256_sub(res_x, Rsqr, Hsqr);
+
+       lea     $Hcub(%rsp), $b_ptr
+       lea     $res_x(%rsp), $r_ptr
+       call    __ecp_nistz256_sub_from$x       # p256_sub(res_x, res_x, Hcub);
+
+       mov     $U2+8*0(%rsp), $t0
+       mov     $U2+8*1(%rsp), $t1
+       mov     $U2+8*2(%rsp), $t2
+       mov     $U2+8*3(%rsp), $t3
+       lea     $res_y(%rsp), $r_ptr
+
+       call    __ecp_nistz256_sub$x            # p256_sub(res_y, U2, res_x);
+
+       mov     $acc0, 8*0($r_ptr)              # save the result, as
+       mov     $acc1, 8*1($r_ptr)              # __ecp_nistz256_sub doesn't
+       mov     $acc2, 8*2($r_ptr)
+       mov     $acc3, 8*3($r_ptr)
+___
+}
+$code.=<<___;
+       `&load_for_mul("$S1(%rsp)", "$Hcub(%rsp)", "$src0")`
+       lea     $S2(%rsp), $r_ptr
+       call    __ecp_nistz256_mul_mont$x       # p256_mul_mont(S2, S1, Hcub);
+
+       `&load_for_mul("$R(%rsp)", "$res_y(%rsp)", "$src0")`
+       lea     $res_y(%rsp), $r_ptr
+       call    __ecp_nistz256_mul_mont$x       # p256_mul_mont(res_y, R, res_y);
+
+       lea     $S2(%rsp), $b_ptr
+       lea     $res_y(%rsp), $r_ptr
+       call    __ecp_nistz256_sub_from$x       # p256_sub(res_y, res_y, S2);
+
+       movq    %xmm0, $r_ptr           # restore $r_ptr
+
+       movdqa  %xmm5, %xmm0            # copy_conditional(res_z, in2_z, in1infty);
+       movdqa  %xmm5, %xmm1
+       pandn   $res_z(%rsp), %xmm0
+       movdqa  %xmm5, %xmm2
+       pandn   $res_z+0x10(%rsp), %xmm1
+       movdqa  %xmm5, %xmm3
+       pand    $in2_z(%rsp), %xmm2
+       pand    $in2_z+0x10(%rsp), %xmm3
+       por     %xmm0, %xmm2
+       por     %xmm1, %xmm3
+
+       movdqa  %xmm4, %xmm0            # copy_conditional(res_z, in1_z, in2infty);
+       movdqa  %xmm4, %xmm1
+       pandn   %xmm2, %xmm0
+       movdqa  %xmm4, %xmm2
+       pandn   %xmm3, %xmm1
+       movdqa  %xmm4, %xmm3
+       pand    $in1_z(%rsp), %xmm2
+       pand    $in1_z+0x10(%rsp), %xmm3
+       por     %xmm0, %xmm2
+       por     %xmm1, %xmm3
+       movdqu  %xmm2, 0x40($r_ptr)
+       movdqu  %xmm3, 0x50($r_ptr)
+
+       movdqa  %xmm5, %xmm0            # copy_conditional(res_x, in2_x, in1infty);
+       movdqa  %xmm5, %xmm1
+       pandn   $res_x(%rsp), %xmm0
+       movdqa  %xmm5, %xmm2
+       pandn   $res_x+0x10(%rsp), %xmm1
+       movdqa  %xmm5, %xmm3
+       pand    $in2_x(%rsp), %xmm2
+       pand    $in2_x+0x10(%rsp), %xmm3
+       por     %xmm0, %xmm2
+       por     %xmm1, %xmm3
+
+       movdqa  %xmm4, %xmm0            # copy_conditional(res_x, in1_x, in2infty);
+       movdqa  %xmm4, %xmm1
+       pandn   %xmm2, %xmm0
+       movdqa  %xmm4, %xmm2
+       pandn   %xmm3, %xmm1
+       movdqa  %xmm4, %xmm3
+       pand    $in1_x(%rsp), %xmm2
+       pand    $in1_x+0x10(%rsp), %xmm3
+       por     %xmm0, %xmm2
+       por     %xmm1, %xmm3
+       movdqu  %xmm2, 0x00($r_ptr)
+       movdqu  %xmm3, 0x10($r_ptr)
+
+       movdqa  %xmm5, %xmm0            # copy_conditional(res_y, in2_y, in1infty);
+       movdqa  %xmm5, %xmm1
+       pandn   $res_y(%rsp), %xmm0
+       movdqa  %xmm5, %xmm2
+       pandn   $res_y+0x10(%rsp), %xmm1
+       movdqa  %xmm5, %xmm3
+       pand    $in2_y(%rsp), %xmm2
+       pand    $in2_y+0x10(%rsp), %xmm3
+       por     %xmm0, %xmm2
+       por     %xmm1, %xmm3
+
+       movdqa  %xmm4, %xmm0            # copy_conditional(res_y, in1_y, in2infty);
+       movdqa  %xmm4, %xmm1
+       pandn   %xmm2, %xmm0
+       movdqa  %xmm4, %xmm2
+       pandn   %xmm3, %xmm1
+       movdqa  %xmm4, %xmm3
+       pand    $in1_y(%rsp), %xmm2
+       pand    $in1_y+0x10(%rsp), %xmm3
+       por     %xmm0, %xmm2
+       por     %xmm1, %xmm3
+       movdqu  %xmm2, 0x20($r_ptr)
+       movdqu  %xmm3, 0x30($r_ptr)
+
+.Ladd_done$x:
+       add     \$32*18+8, %rsp
+       pop     %r15
+       pop     %r14
+       pop     %r13
+       pop     %r12
+       pop     %rbx
+       pop     %rbp
+       ret
+.size  ecp_nistz256_point_add$sfx,.-ecp_nistz256_point_add$sfx
+___
+}
+&gen_add("q");
+
+sub gen_add_affine () {
+    my $x = shift;
+    my ($src0,$sfx,$bias);
+    my ($U2,$S2,$H,$R,$Hsqr,$Hcub,$Rsqr,
+       $res_x,$res_y,$res_z,
+       $in1_x,$in1_y,$in1_z,
+       $in2_x,$in2_y)=map(32*$_,(0..14));
+    my $Z1sqr = $S2;
+
+    if ($x ne "x") {
+       $src0 = "%rax";
+       $sfx  = "";
+       $bias = 0;
+
+$code.=<<___;
+.globl ecp_nistz256_point_add_affine
+.type  ecp_nistz256_point_add_affine,\@function,3
+.align 32
+ecp_nistz256_point_add_affine:
+___
+$code.=<<___   if ($addx);
+       mov     \$0x80100, %ecx
+       and     OPENSSL_ia32cap_P+8(%rip), %ecx
+       cmp     \$0x80100, %ecx
+       je      .Lpoint_add_affinex
+___
+    } else {
+       $src0 = "%rdx";
+       $sfx  = "x";
+       $bias = 128;
+
+$code.=<<___;
+.type  ecp_nistz256_point_add_affinex,\@function,3
+.align 32
+ecp_nistz256_point_add_affinex:
+.Lpoint_add_affinex:
+___
+    }
+$code.=<<___;
+       push    %rbp
+       push    %rbx
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+       sub     \$32*15+8, %rsp
+
+       movdqu  0x00($a_ptr), %xmm0     # copy  *(P256_POINT *)$a_ptr
+       mov     $b_org, $b_ptr          # reassign
+       movdqu  0x10($a_ptr), %xmm1
+       movdqu  0x20($a_ptr), %xmm2
+       movdqu  0x30($a_ptr), %xmm3
+       movdqu  0x40($a_ptr), %xmm4
+       movdqu  0x50($a_ptr), %xmm5
+        mov    0x40+8*0($a_ptr), $src0 # load original in1_z
+        mov    0x40+8*1($a_ptr), $acc6
+        mov    0x40+8*2($a_ptr), $acc7
+        mov    0x40+8*3($a_ptr), $acc0
+       movdqa  %xmm0, $in1_x(%rsp)
+       movdqa  %xmm1, $in1_x+0x10(%rsp)
+       por     %xmm0, %xmm1
+       movdqa  %xmm2, $in1_y(%rsp)
+       movdqa  %xmm3, $in1_y+0x10(%rsp)
+       por     %xmm2, %xmm3
+       movdqa  %xmm4, $in1_z(%rsp)
+       movdqa  %xmm5, $in1_z+0x10(%rsp)
+       por     %xmm1, %xmm3
+
+       movdqu  0x00($b_ptr), %xmm0     # copy  *(P256_POINT_AFFINE *)$b_ptr
+        pshufd \$0xb1, %xmm3, %xmm5
+       movdqu  0x10($b_ptr), %xmm1
+       movdqu  0x20($b_ptr), %xmm2
+        por    %xmm3, %xmm5
+       movdqu  0x30($b_ptr), %xmm3
+       movdqa  %xmm0, $in2_x(%rsp)
+        pshufd \$0x1e, %xmm5, %xmm4
+       movdqa  %xmm1, $in2_x+0x10(%rsp)
+       por     %xmm0, %xmm1
+        movq   $r_ptr, %xmm0           # save $r_ptr
+       movdqa  %xmm2, $in2_y(%rsp)
+       movdqa  %xmm3, $in2_y+0x10(%rsp)
+       por     %xmm2, %xmm3
+        por    %xmm4, %xmm5
+        pxor   %xmm4, %xmm4
+       por     %xmm1, %xmm3
+
+       lea     0x40-$bias($a_ptr), $a_ptr      # $a_ptr is still valid
+       lea     $Z1sqr(%rsp), $r_ptr            # Z1^2
+       call    __ecp_nistz256_sqr_mont$x       # p256_sqr_mont(Z1sqr, in1_z);
+
+       pcmpeqd %xmm4, %xmm5
+       pshufd  \$0xb1, %xmm3, %xmm4
+        mov    0x00($b_ptr), $src0             # $b_ptr is still valid
+        #lea   0x00($b_ptr), $b_ptr
+        mov    $acc4, $acc1                    # harmonize sqr output and mul input
+       por     %xmm3, %xmm4
+       pshufd  \$0, %xmm5, %xmm5               # in1infty
+       pshufd  \$0x1e, %xmm4, %xmm3
+        mov    $acc5, $acc2
+       por     %xmm3, %xmm4
+       pxor    %xmm3, %xmm3
+        mov    $acc6, $acc3
+       pcmpeqd %xmm3, %xmm4
+       pshufd  \$0, %xmm4, %xmm4               # in2infty
+
+       lea     $Z1sqr-$bias(%rsp), $a_ptr
+       mov     $acc7, $acc4
+       lea     $U2(%rsp), $r_ptr               # U2 = X2*Z1^2
+       call    __ecp_nistz256_mul_mont$x       # p256_mul_mont(U2, Z1sqr, in2_x);
+
+       lea     $in1_x(%rsp), $b_ptr
+       lea     $H(%rsp), $r_ptr                # H = U2 - U1
+       call    __ecp_nistz256_sub_from$x       # p256_sub(H, U2, in1_x);
+
+       `&load_for_mul("$Z1sqr(%rsp)", "$in1_z(%rsp)", "$src0")`
+       lea     $S2(%rsp), $r_ptr               # S2 = Z1^3
+       call    __ecp_nistz256_mul_mont$x       # p256_mul_mont(S2, Z1sqr, in1_z);
+
+       `&load_for_mul("$H(%rsp)", "$in1_z(%rsp)", "$src0")`
+       lea     $res_z(%rsp), $r_ptr            # Z3 = H*Z1*Z2
+       call    __ecp_nistz256_mul_mont$x       # p256_mul_mont(res_z, H, in1_z);
+
+       `&load_for_mul("$S2(%rsp)", "$in2_y(%rsp)", "$src0")`
+       lea     $S2(%rsp), $r_ptr               # S2 = Y2*Z1^3
+       call    __ecp_nistz256_mul_mont$x       # p256_mul_mont(S2, S2, in2_y);
+
+       lea     $in1_y(%rsp), $b_ptr
+       lea     $R(%rsp), $r_ptr                # R = S2 - S1
+       call    __ecp_nistz256_sub_from$x       # p256_sub(R, S2, in1_y);
+
+       `&load_for_sqr("$H(%rsp)", "$src0")`
+       lea     $Hsqr(%rsp), $r_ptr             # H^2
+       call    __ecp_nistz256_sqr_mont$x       # p256_sqr_mont(Hsqr, H);
+
+       `&load_for_sqr("$R(%rsp)", "$src0")`
+       lea     $Rsqr(%rsp), $r_ptr             # R^2
+       call    __ecp_nistz256_sqr_mont$x       # p256_sqr_mont(Rsqr, R);
+
+       `&load_for_mul("$H(%rsp)", "$Hsqr(%rsp)", "$src0")`
+       lea     $Hcub(%rsp), $r_ptr             # H^3
+       call    __ecp_nistz256_mul_mont$x       # p256_mul_mont(Hcub, Hsqr, H);
+
+       `&load_for_mul("$Hsqr(%rsp)", "$in1_x(%rsp)", "$src0")`
+       lea     $U2(%rsp), $r_ptr               # U1*H^2
+       call    __ecp_nistz256_mul_mont$x       # p256_mul_mont(U2, in1_x, Hsqr);
+___
+{
+#######################################################################
+# operate in 4-5-0-1 "name space" that matches multiplication output
+#
+my ($acc0,$acc1,$acc2,$acc3,$t3,$t4)=($acc4,$acc5,$acc0,$acc1,$acc2,$acc3);
+my ($poly1, $poly3)=($acc6,$acc7);
+
+$code.=<<___;
+       #lea    $U2(%rsp), $a_ptr
+       #lea    $Hsqr(%rsp), $r_ptr     # 2*U1*H^2
+       #call   __ecp_nistz256_mul_by_2 # ecp_nistz256_mul_by_2(Hsqr, U2);
+
+       add     $acc0, $acc0            # a0:a3+a0:a3
+       lea     $Rsqr(%rsp), $a_ptr
+       adc     $acc1, $acc1
+        mov    $acc0, $t0
+       adc     $acc2, $acc2
+       adc     $acc3, $acc3
+        mov    $acc1, $t1
+       sbb     $t4, $t4
+
+       sub     \$-1, $acc0
+        mov    $acc2, $t2
+       sbb     $poly1, $acc1
+       sbb     \$0, $acc2
+        mov    $acc3, $t3
+       sbb     $poly3, $acc3
+       test    $t4, $t4
+
+       cmovz   $t0, $acc0
+       mov     8*0($a_ptr), $t0
+       cmovz   $t1, $acc1
+       mov     8*1($a_ptr), $t1
+       cmovz   $t2, $acc2
+       mov     8*2($a_ptr), $t2
+       cmovz   $t3, $acc3
+       mov     8*3($a_ptr), $t3
+
+       call    __ecp_nistz256_sub$x            # p256_sub(res_x, Rsqr, Hsqr);
+
+       lea     $Hcub(%rsp), $b_ptr
+       lea     $res_x(%rsp), $r_ptr
+       call    __ecp_nistz256_sub_from$x       # p256_sub(res_x, res_x, Hcub);
+
+       mov     $U2+8*0(%rsp), $t0
+       mov     $U2+8*1(%rsp), $t1
+       mov     $U2+8*2(%rsp), $t2
+       mov     $U2+8*3(%rsp), $t3
+       lea     $H(%rsp), $r_ptr
+
+       call    __ecp_nistz256_sub$x            # p256_sub(H, U2, res_x);
+
+       mov     $acc0, 8*0($r_ptr)              # save the result, as
+       mov     $acc1, 8*1($r_ptr)              # __ecp_nistz256_sub doesn't
+       mov     $acc2, 8*2($r_ptr)
+       mov     $acc3, 8*3($r_ptr)
+___
+}
+$code.=<<___;
+       `&load_for_mul("$Hcub(%rsp)", "$in1_y(%rsp)", "$src0")`
+       lea     $S2(%rsp), $r_ptr
+       call    __ecp_nistz256_mul_mont$x       # p256_mul_mont(S2, Hcub, in1_y);
+
+       `&load_for_mul("$H(%rsp)", "$R(%rsp)", "$src0")`
+       lea     $H(%rsp), $r_ptr
+       call    __ecp_nistz256_mul_mont$x       # p256_mul_mont(H, H, R);
+
+       lea     $S2(%rsp), $b_ptr
+       lea     $res_y(%rsp), $r_ptr
+       call    __ecp_nistz256_sub_from$x       # p256_sub(res_y, H, S2);
+
+       movq    %xmm0, $r_ptr           # restore $r_ptr
+
+       movdqa  %xmm5, %xmm0            # copy_conditional(res_z, ONE, in1infty);
+       movdqa  %xmm5, %xmm1
+       pandn   $res_z(%rsp), %xmm0
+       movdqa  %xmm5, %xmm2
+       pandn   $res_z+0x10(%rsp), %xmm1
+       movdqa  %xmm5, %xmm3
+       pand    .LONE_mont(%rip), %xmm2
+       pand    .LONE_mont+0x10(%rip), %xmm3
+       por     %xmm0, %xmm2
+       por     %xmm1, %xmm3
+
+       movdqa  %xmm4, %xmm0            # copy_conditional(res_z, in1_z, in2infty);
+       movdqa  %xmm4, %xmm1
+       pandn   %xmm2, %xmm0
+       movdqa  %xmm4, %xmm2
+       pandn   %xmm3, %xmm1
+       movdqa  %xmm4, %xmm3
+       pand    $in1_z(%rsp), %xmm2
+       pand    $in1_z+0x10(%rsp), %xmm3
+       por     %xmm0, %xmm2
+       por     %xmm1, %xmm3
+       movdqu  %xmm2, 0x40($r_ptr)
+       movdqu  %xmm3, 0x50($r_ptr)
+
+       movdqa  %xmm5, %xmm0            # copy_conditional(res_x, in2_x, in1infty);
+       movdqa  %xmm5, %xmm1
+       pandn   $res_x(%rsp), %xmm0
+       movdqa  %xmm5, %xmm2
+       pandn   $res_x+0x10(%rsp), %xmm1
+       movdqa  %xmm5, %xmm3
+       pand    $in2_x(%rsp), %xmm2
+       pand    $in2_x+0x10(%rsp), %xmm3
+       por     %xmm0, %xmm2
+       por     %xmm1, %xmm3
+
+       movdqa  %xmm4, %xmm0            # copy_conditional(res_x, in1_x, in2infty);
+       movdqa  %xmm4, %xmm1
+       pandn   %xmm2, %xmm0
+       movdqa  %xmm4, %xmm2
+       pandn   %xmm3, %xmm1
+       movdqa  %xmm4, %xmm3
+       pand    $in1_x(%rsp), %xmm2
+       pand    $in1_x+0x10(%rsp), %xmm3
+       por     %xmm0, %xmm2
+       por     %xmm1, %xmm3
+       movdqu  %xmm2, 0x00($r_ptr)
+       movdqu  %xmm3, 0x10($r_ptr)
+
+       movdqa  %xmm5, %xmm0            # copy_conditional(res_y, in2_y, in1infty);
+       movdqa  %xmm5, %xmm1
+       pandn   $res_y(%rsp), %xmm0
+       movdqa  %xmm5, %xmm2
+       pandn   $res_y+0x10(%rsp), %xmm1
+       movdqa  %xmm5, %xmm3
+       pand    $in2_y(%rsp), %xmm2
+       pand    $in2_y+0x10(%rsp), %xmm3
+       por     %xmm0, %xmm2
+       por     %xmm1, %xmm3
+
+       movdqa  %xmm4, %xmm0            # copy_conditional(res_y, in1_y, in2infty);
+       movdqa  %xmm4, %xmm1
+       pandn   %xmm2, %xmm0
+       movdqa  %xmm4, %xmm2
+       pandn   %xmm3, %xmm1
+       movdqa  %xmm4, %xmm3
+       pand    $in1_y(%rsp), %xmm2
+       pand    $in1_y+0x10(%rsp), %xmm3
+       por     %xmm0, %xmm2
+       por     %xmm1, %xmm3
+       movdqu  %xmm2, 0x20($r_ptr)
+       movdqu  %xmm3, 0x30($r_ptr)
+
+       add     \$32*15+8, %rsp
+       pop     %r15
+       pop     %r14
+       pop     %r13
+       pop     %r12
+       pop     %rbx
+       pop     %rbp
+       ret
+.size  ecp_nistz256_point_add_affine$sfx,.-ecp_nistz256_point_add_affine$sfx
+___
+}
+&gen_add_affine("q");
+
+########################################################################
+# AD*X magic
+#
+if ($addx) {                                                           {
+########################################################################
+# operate in 4-5-0-1 "name space" that matches multiplication output
+#
+my ($a0,$a1,$a2,$a3,$t3,$t4)=($acc4,$acc5,$acc0,$acc1,$acc2,$acc3);
+
+$code.=<<___;
+.type  __ecp_nistz256_add_tox,\@abi-omnipotent
+.align 32
+__ecp_nistz256_add_tox:
+       xor     $t4, $t4
+       adc     8*0($b_ptr), $a0
+       adc     8*1($b_ptr), $a1
+        mov    $a0, $t0
+       adc     8*2($b_ptr), $a2
+       adc     8*3($b_ptr), $a3
+        mov    $a1, $t1
+       adc     \$0, $t4
+
+       xor     $t3, $t3
+       sbb     \$-1, $a0
+        mov    $a2, $t2
+       sbb     $poly1, $a1
+       sbb     \$0, $a2
+        mov    $a3, $t3
+       sbb     $poly3, $a3
+
+       bt      \$0, $t4
+       cmovnc  $t0, $a0
+       cmovnc  $t1, $a1
+       mov     $a0, 8*0($r_ptr)
+       cmovnc  $t2, $a2
+       mov     $a1, 8*1($r_ptr)
+       cmovnc  $t3, $a3
+       mov     $a2, 8*2($r_ptr)
+       mov     $a3, 8*3($r_ptr)
+
+       ret
+.size  __ecp_nistz256_add_tox,.-__ecp_nistz256_add_tox
+
+.type  __ecp_nistz256_sub_fromx,\@abi-omnipotent
+.align 32
+__ecp_nistz256_sub_fromx:
+       xor     $t4, $t4
+       sbb     8*0($b_ptr), $a0
+       sbb     8*1($b_ptr), $a1
+        mov    $a0, $t0
+       sbb     8*2($b_ptr), $a2
+       sbb     8*3($b_ptr), $a3
+        mov    $a1, $t1
+       sbb     \$0, $t4
+
+       xor     $t3, $t3
+       adc     \$-1, $a0
+        mov    $a2, $t2
+       adc     $poly1, $a1
+       adc     \$0, $a2
+        mov    $a3, $t3
+       adc     $poly3, $a3
+
+       bt      \$0, $t4
+       cmovnc  $t0, $a0
+       cmovnc  $t1, $a1
+       mov     $a0, 8*0($r_ptr)
+       cmovnc  $t2, $a2
+       mov     $a1, 8*1($r_ptr)
+       cmovnc  $t3, $a3
+       mov     $a2, 8*2($r_ptr)
+       mov     $a3, 8*3($r_ptr)
+
+       ret
+.size  __ecp_nistz256_sub_fromx,.-__ecp_nistz256_sub_fromx
+
+.type  __ecp_nistz256_subx,\@abi-omnipotent
+.align 32
+__ecp_nistz256_subx:
+       xor     $t4, $t4
+       sbb     $a0, $t0
+       sbb     $a1, $t1
+        mov    $t0, $a0
+       sbb     $a2, $t2
+       sbb     $a3, $t3
+        mov    $t1, $a1
+       sbb     \$0, $t4
+
+       xor     $a3 ,$a3
+       adc     \$-1, $t0
+        mov    $t2, $a2
+       adc     $poly1, $t1
+       adc     \$0, $t2
+        mov    $t3, $a3
+       adc     $poly3, $t3
+
+       bt      \$0, $t4
+       cmovc   $t0, $a0
+       cmovc   $t1, $a1
+       cmovc   $t2, $a2
+       cmovc   $t3, $a3
+
+       ret
+.size  __ecp_nistz256_subx,.-__ecp_nistz256_subx
+
+.type  __ecp_nistz256_mul_by_2x,\@abi-omnipotent
+.align 32
+__ecp_nistz256_mul_by_2x:
+       xor     $t4, $t4
+       adc     $a0, $a0                # a0:a3+a0:a3
+       adc     $a1, $a1
+        mov    $a0, $t0
+       adc     $a2, $a2
+       adc     $a3, $a3
+        mov    $a1, $t1
+       adc     \$0, $t4
+
+       xor     $t3, $t3
+       sbb     \$-1, $a0
+        mov    $a2, $t2
+       sbb     $poly1, $a1
+       sbb     \$0, $a2
+        mov    $a3, $t3
+       sbb     $poly3, $a3
+
+       bt      \$0, $t4
+       cmovnc  $t0, $a0
+       cmovnc  $t1, $a1
+       mov     $a0, 8*0($r_ptr)
+       cmovnc  $t2, $a2
+       mov     $a1, 8*1($r_ptr)
+       cmovnc  $t3, $a3
+       mov     $a2, 8*2($r_ptr)
+       mov     $a3, 8*3($r_ptr)
+
+       ret
+.size  __ecp_nistz256_mul_by_2x,.-__ecp_nistz256_mul_by_2x
+___
+                                                                       }
+&gen_double("x");
+&gen_add("x");
+&gen_add_affine("x");
+}
+}}}
+
+$code =~ s/\`([^\`]*)\`/eval $1/gem;
+print $code;
+close STDOUT;
index c4e7aea..6d3178f 100644 (file)
@@ -240,6 +240,12 @@ int EC_GROUP_set_generator(EC_GROUP *group, const EC_POINT *generator,
  */
 const EC_POINT *EC_GROUP_get0_generator(const EC_GROUP *group);
 
+/** Returns the montgomery data for order(Generator)
+ *  \param  group  EC_GROUP object
+ *  \return the currently used generator (possibly NULL).
+*/
+BN_MONT_CTX *EC_GROUP_get_mont_data(const EC_GROUP *group);
+
 /** Gets the order of a EC_GROUP
  *  \param  group  EC_GROUP object
  *  \param  order  BIGNUM to which the order is copied
@@ -404,6 +410,9 @@ typedef struct {
  */
 size_t EC_get_builtin_curves(EC_builtin_curve *r, size_t nitems);
 
+const char *EC_curve_nid2nist(int nid);
+int EC_curve_nist2nid(const char *name);
+
 /********************************************************************/
 /*                    EC_POINT functions                            */
 /********************************************************************/
@@ -986,10 +995,78 @@ int EC_KEY_print_fp(FILE *fp, const EC_KEY *key, int off);
 # endif
 
 # define EVP_PKEY_CTX_set_ec_paramgen_curve_nid(ctx, nid) \
-        EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_EC, EVP_PKEY_OP_PARAMGEN, \
+        EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_EC, \
+                                EVP_PKEY_OP_PARAMGEN|EVP_PKEY_OP_KEYGEN, \
                                 EVP_PKEY_CTRL_EC_PARAMGEN_CURVE_NID, nid, NULL)
 
+# define EVP_PKEY_CTX_set_ec_param_enc(ctx, flag) \
+        EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_EC, \
+                                EVP_PKEY_OP_PARAMGEN|EVP_PKEY_OP_KEYGEN, \
+                                EVP_PKEY_CTRL_EC_PARAM_ENC, flag, NULL)
+
+# define EVP_PKEY_CTX_set_ecdh_cofactor_mode(ctx, flag) \
+        EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_EC, \
+                                EVP_PKEY_OP_DERIVE, \
+                                EVP_PKEY_CTRL_EC_ECDH_COFACTOR, flag, NULL)
+
+# define EVP_PKEY_CTX_get_ecdh_cofactor_mode(ctx) \
+        EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_EC, \
+                                EVP_PKEY_OP_DERIVE, \
+                                EVP_PKEY_CTRL_EC_ECDH_COFACTOR, -2, NULL)
+
+# define EVP_PKEY_CTX_set_ecdh_kdf_type(ctx, kdf) \
+        EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_EC, \
+                                EVP_PKEY_OP_DERIVE, \
+                                EVP_PKEY_CTRL_EC_KDF_TYPE, kdf, NULL)
+
+# define EVP_PKEY_CTX_get_ecdh_kdf_type(ctx) \
+        EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_EC, \
+                                EVP_PKEY_OP_DERIVE, \
+                                EVP_PKEY_CTRL_EC_KDF_TYPE, -2, NULL)
+
+# define EVP_PKEY_CTX_set_ecdh_kdf_md(ctx, md) \
+        EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_EC, \
+                                EVP_PKEY_OP_DERIVE, \
+                                EVP_PKEY_CTRL_EC_KDF_MD, 0, (void *)md)
+
+# define EVP_PKEY_CTX_get_ecdh_kdf_md(ctx, pmd) \
+        EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_EC, \
+                                EVP_PKEY_OP_DERIVE, \
+                                EVP_PKEY_CTRL_GET_EC_KDF_MD, 0, (void *)pmd)
+
+# define EVP_PKEY_CTX_set_ecdh_kdf_outlen(ctx, len) \
+        EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_EC, \
+                                EVP_PKEY_OP_DERIVE, \
+                                EVP_PKEY_CTRL_EC_KDF_OUTLEN, len, NULL)
+
+# define EVP_PKEY_CTX_get_ecdh_kdf_outlen(ctx, plen) \
+        EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_EC, \
+                                EVP_PKEY_OP_DERIVE, \
+                        EVP_PKEY_CTRL_GET_EC_KDF_OUTLEN, 0, (void *)plen)
+
+# define EVP_PKEY_CTX_set0_ecdh_kdf_ukm(ctx, p, plen) \
+        EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_EC, \
+                                EVP_PKEY_OP_DERIVE, \
+                                EVP_PKEY_CTRL_EC_KDF_UKM, plen, (void *)p)
+
+# define EVP_PKEY_CTX_get0_ecdh_kdf_ukm(ctx, p) \
+        EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_EC, \
+                                EVP_PKEY_OP_DERIVE, \
+                                EVP_PKEY_CTRL_GET_EC_KDF_UKM, 0, (void *)p)
+
 # define EVP_PKEY_CTRL_EC_PARAMGEN_CURVE_NID             (EVP_PKEY_ALG_CTRL + 1)
+# define EVP_PKEY_CTRL_EC_PARAM_ENC                      (EVP_PKEY_ALG_CTRL + 2)
+# define EVP_PKEY_CTRL_EC_ECDH_COFACTOR                  (EVP_PKEY_ALG_CTRL + 3)
+# define EVP_PKEY_CTRL_EC_KDF_TYPE                       (EVP_PKEY_ALG_CTRL + 4)
+# define EVP_PKEY_CTRL_EC_KDF_MD                         (EVP_PKEY_ALG_CTRL + 5)
+# define EVP_PKEY_CTRL_GET_EC_KDF_MD                     (EVP_PKEY_ALG_CTRL + 6)
+# define EVP_PKEY_CTRL_EC_KDF_OUTLEN                     (EVP_PKEY_ALG_CTRL + 7)
+# define EVP_PKEY_CTRL_GET_EC_KDF_OUTLEN                 (EVP_PKEY_ALG_CTRL + 8)
+# define EVP_PKEY_CTRL_EC_KDF_UKM                        (EVP_PKEY_ALG_CTRL + 9)
+# define EVP_PKEY_CTRL_GET_EC_KDF_UKM                    (EVP_PKEY_ALG_CTRL + 10)
+/* KDF types */
+# define EVP_PKEY_ECDH_KDF_NONE                          1
+# define EVP_PKEY_ECDH_KDF_X9_62                         2
 
 /* BEGIN ERROR CODES */
 /*
@@ -1007,6 +1084,8 @@ void ERR_load_EC_strings(void);
 # define EC_F_D2I_ECPKPARAMETERS                          145
 # define EC_F_D2I_ECPRIVATEKEY                            146
 # define EC_F_DO_EC_KEY_PRINT                             221
+# define EC_F_ECDH_CMS_DECRYPT                            238
+# define EC_F_ECDH_CMS_SET_SHARED_INFO                    239
 # define EC_F_ECKEY_PARAM2TYPE                            223
 # define EC_F_ECKEY_PARAM_DECODE                          212
 # define EC_F_ECKEY_PRIV_DECODE                           213
@@ -1018,6 +1097,12 @@ void ERR_load_EC_strings(void);
 # define EC_F_ECPARAMETERS_PRINT_FP                       148
 # define EC_F_ECPKPARAMETERS_PRINT                        149
 # define EC_F_ECPKPARAMETERS_PRINT_FP                     150
+# define EC_F_ECP_NISTZ256_GET_AFFINE                     240
+# define EC_F_ECP_NISTZ256_MULT_PRECOMPUTE                243
+# define EC_F_ECP_NISTZ256_POINTS_MUL                     241
+# define EC_F_ECP_NISTZ256_PRE_COMP_NEW                   244
+# define EC_F_ECP_NISTZ256_SET_WORDS                      245
+# define EC_F_ECP_NISTZ256_WINDOWED_MUL                   242
 # define EC_F_ECP_NIST_MOD_192                            203
 # define EC_F_ECP_NIST_MOD_224                            204
 # define EC_F_ECP_NIST_MOD_256                            205
@@ -1157,6 +1242,7 @@ void ERR_load_EC_strings(void);
 # define EC_R_INVALID_COMPRESSED_POINT                    110
 # define EC_R_INVALID_COMPRESSION_BIT                     109
 # define EC_R_INVALID_CURVE                               141
+# define EC_R_INVALID_DIGEST                              151
 # define EC_R_INVALID_DIGEST_TYPE                         138
 # define EC_R_INVALID_ENCODING                            102
 # define EC_R_INVALID_FIELD                               103
@@ -1165,6 +1251,7 @@ void ERR_load_EC_strings(void);
 # define EC_R_INVALID_PENTANOMIAL_BASIS                   132
 # define EC_R_INVALID_PRIVATE_KEY                         123
 # define EC_R_INVALID_TRINOMIAL_BASIS                     137
+# define EC_R_KDF_PARAMETER_ERROR                         148
 # define EC_R_KEYS_NOT_SET                                140
 # define EC_R_MISSING_PARAMETERS                          124
 # define EC_R_MISSING_PRIVATE_KEY                         125
@@ -1175,9 +1262,11 @@ void ERR_load_EC_strings(void);
 # define EC_R_NO_FIELD_MOD                                133
 # define EC_R_NO_PARAMETERS_SET                           139
 # define EC_R_PASSED_NULL_PARAMETER                       134
+# define EC_R_PEER_KEY_ERROR                              149
 # define EC_R_PKPARAMETERS2GROUP_FAILURE                  127
 # define EC_R_POINT_AT_INFINITY                           106
 # define EC_R_POINT_IS_NOT_ON_CURVE                       107
+# define EC_R_SHARED_INFO_ERROR                           150
 # define EC_R_SLOT_FULL                                   108
 # define EC_R_UNDEFINED_GENERATOR                         113
 # define EC_R_UNDEFINED_ORDER                             128
index 5cefb5a..83e208c 100644 (file)
 #ifndef OPENSSL_NO_CMS
 # include <openssl/cms.h>
 #endif
+#include <openssl/asn1t.h>
 #include "asn1_locl.h"
 
+static int ecdh_cms_decrypt(CMS_RecipientInfo *ri);
+static int ecdh_cms_encrypt(CMS_RecipientInfo *ri);
+
 static int eckey_param2type(int *pptype, void **ppval, EC_KEY *ec_key)
 {
     const EC_GROUP *group;
@@ -580,10 +584,21 @@ static int ec_pkey_ctrl(EVP_PKEY *pkey, int op, long arg1, void *arg2)
             X509_ALGOR_set0(alg2, OBJ_nid2obj(snid), V_ASN1_UNDEF, 0);
         }
         return 1;
+
+    case ASN1_PKEY_CTRL_CMS_ENVELOPE:
+        if (arg1 == 1)
+            return ecdh_cms_decrypt(arg2);
+        else if (arg1 == 0)
+            return ecdh_cms_encrypt(arg2);
+        return -2;
+
+    case ASN1_PKEY_CTRL_CMS_RI_TYPE:
+        *(int *)arg2 = CMS_RECIPINFO_AGREE;
+        return 1;
 #endif
 
     case ASN1_PKEY_CTRL_DEFAULT_MD_NID:
-        *(int *)arg2 = NID_sha1;
+        *(int *)arg2 = NID_sha256;
         return 2;
 
     default:
@@ -625,3 +640,326 @@ const EVP_PKEY_ASN1_METHOD eckey_asn1_meth = {
     old_ec_priv_decode,
     old_ec_priv_encode
 };
+
+#ifndef OPENSSL_NO_CMS
+
+static int ecdh_cms_set_peerkey(EVP_PKEY_CTX *pctx,
+                                X509_ALGOR *alg, ASN1_BIT_STRING *pubkey)
+{
+    ASN1_OBJECT *aoid;
+    int atype;
+    void *aval;
+    int rv = 0;
+    EVP_PKEY *pkpeer = NULL;
+    EC_KEY *ecpeer = NULL;
+    const unsigned char *p;
+    int plen;
+    X509_ALGOR_get0(&aoid, &atype, &aval, alg);
+    if (OBJ_obj2nid(aoid) != NID_X9_62_id_ecPublicKey)
+        goto err;
+    /* If absent parameters get group from main key */
+    if (atype == V_ASN1_UNDEF || atype == V_ASN1_NULL) {
+        const EC_GROUP *grp;
+        EVP_PKEY *pk;
+        pk = EVP_PKEY_CTX_get0_pkey(pctx);
+        if (!pk)
+            goto err;
+        grp = EC_KEY_get0_group(pk->pkey.ec);
+        ecpeer = EC_KEY_new();
+        if (!ecpeer)
+            goto err;
+        if (!EC_KEY_set_group(ecpeer, grp))
+            goto err;
+    } else {
+        ecpeer = eckey_type2param(atype, aval);
+        if (!ecpeer)
+            goto err;
+    }
+    /* We have parameters now set public key */
+    plen = ASN1_STRING_length(pubkey);
+    p = ASN1_STRING_data(pubkey);
+    if (!p || !plen)
+        goto err;
+    if (!o2i_ECPublicKey(&ecpeer, &p, plen))
+        goto err;
+    pkpeer = EVP_PKEY_new();
+    if (!pkpeer)
+        goto err;
+    EVP_PKEY_set1_EC_KEY(pkpeer, ecpeer);
+    if (EVP_PKEY_derive_set_peer(pctx, pkpeer) > 0)
+        rv = 1;
+ err:
+    if (ecpeer)
+        EC_KEY_free(ecpeer);
+    if (pkpeer)
+        EVP_PKEY_free(pkpeer);
+    return rv;
+}
+
+/* Set KDF parameters based on KDF NID */
+static int ecdh_cms_set_kdf_param(EVP_PKEY_CTX *pctx, int eckdf_nid)
+{
+    int kdf_nid, kdfmd_nid, cofactor;
+    const EVP_MD *kdf_md;
+    if (eckdf_nid == NID_undef)
+        return 0;
+
+    /* Lookup KDF type, cofactor mode and digest */
+    if (!OBJ_find_sigid_algs(eckdf_nid, &kdfmd_nid, &kdf_nid))
+        return 0;
+
+    if (kdf_nid == NID_dh_std_kdf)
+        cofactor = 0;
+    else if (kdf_nid == NID_dh_cofactor_kdf)
+        cofactor = 1;
+    else
+        return 0;
+
+    if (EVP_PKEY_CTX_set_ecdh_cofactor_mode(pctx, cofactor) <= 0)
+        return 0;
+
+    if (EVP_PKEY_CTX_set_ecdh_kdf_type(pctx, EVP_PKEY_ECDH_KDF_X9_62) <= 0)
+        return 0;
+
+    kdf_md = EVP_get_digestbynid(kdfmd_nid);
+    if (!kdf_md)
+        return 0;
+
+    if (EVP_PKEY_CTX_set_ecdh_kdf_md(pctx, kdf_md) <= 0)
+        return 0;
+    return 1;
+}
+
+static int ecdh_cms_set_shared_info(EVP_PKEY_CTX *pctx, CMS_RecipientInfo *ri)
+{
+    int rv = 0;
+
+    X509_ALGOR *alg, *kekalg = NULL;
+    ASN1_OCTET_STRING *ukm;
+    const unsigned char *p;
+    unsigned char *der = NULL;
+    int plen, keylen;
+    const EVP_CIPHER *kekcipher;
+    EVP_CIPHER_CTX *kekctx;
+
+    if (!CMS_RecipientInfo_kari_get0_alg(ri, &alg, &ukm))
+        return 0;
+
+    if (!ecdh_cms_set_kdf_param(pctx, OBJ_obj2nid(alg->algorithm))) {
+        ECerr(EC_F_ECDH_CMS_SET_SHARED_INFO, EC_R_KDF_PARAMETER_ERROR);
+        return 0;
+    }
+
+    if (alg->parameter->type != V_ASN1_SEQUENCE)
+        return 0;
+
+    p = alg->parameter->value.sequence->data;
+    plen = alg->parameter->value.sequence->length;
+    kekalg = d2i_X509_ALGOR(NULL, &p, plen);
+    if (!kekalg)
+        goto err;
+    kekctx = CMS_RecipientInfo_kari_get0_ctx(ri);
+    if (!kekctx)
+        goto err;
+    kekcipher = EVP_get_cipherbyobj(kekalg->algorithm);
+    if (!kekcipher || EVP_CIPHER_mode(kekcipher) != EVP_CIPH_WRAP_MODE)
+        goto err;
+    if (!EVP_EncryptInit_ex(kekctx, kekcipher, NULL, NULL, NULL))
+        goto err;
+    if (EVP_CIPHER_asn1_to_param(kekctx, kekalg->parameter) <= 0)
+        goto err;
+
+    keylen = EVP_CIPHER_CTX_key_length(kekctx);
+    if (EVP_PKEY_CTX_set_ecdh_kdf_outlen(pctx, keylen) <= 0)
+        goto err;
+
+    plen = CMS_SharedInfo_encode(&der, kekalg, ukm, keylen);
+
+    if (!plen)
+        goto err;
+
+    if (EVP_PKEY_CTX_set0_ecdh_kdf_ukm(pctx, der, plen) <= 0)
+        goto err;
+    der = NULL;
+
+    rv = 1;
+ err:
+    if (kekalg)
+        X509_ALGOR_free(kekalg);
+    if (der)
+        OPENSSL_free(der);
+    return rv;
+}
+
+static int ecdh_cms_decrypt(CMS_RecipientInfo *ri)
+{
+    EVP_PKEY_CTX *pctx;
+    pctx = CMS_RecipientInfo_get0_pkey_ctx(ri);
+    if (!pctx)
+        return 0;
+    /* See if we need to set peer key */
+    if (!EVP_PKEY_CTX_get0_peerkey(pctx)) {
+        X509_ALGOR *alg;
+        ASN1_BIT_STRING *pubkey;
+        if (!CMS_RecipientInfo_kari_get0_orig_id(ri, &alg, &pubkey,
+                                                 NULL, NULL, NULL))
+            return 0;
+        if (!alg || !pubkey)
+            return 0;
+        if (!ecdh_cms_set_peerkey(pctx, alg, pubkey)) {
+            ECerr(EC_F_ECDH_CMS_DECRYPT, EC_R_PEER_KEY_ERROR);
+            return 0;
+        }
+    }
+    /* Set ECDH derivation parameters and initialise unwrap context */
+    if (!ecdh_cms_set_shared_info(pctx, ri)) {
+        ECerr(EC_F_ECDH_CMS_DECRYPT, EC_R_SHARED_INFO_ERROR);
+        return 0;
+    }
+    return 1;
+}
+
+static int ecdh_cms_encrypt(CMS_RecipientInfo *ri)
+{
+    EVP_PKEY_CTX *pctx;
+    EVP_PKEY *pkey;
+    EVP_CIPHER_CTX *ctx;
+    int keylen;
+    X509_ALGOR *talg, *wrap_alg = NULL;
+    ASN1_OBJECT *aoid;
+    ASN1_BIT_STRING *pubkey;
+    ASN1_STRING *wrap_str;
+    ASN1_OCTET_STRING *ukm;
+    unsigned char *penc = NULL;
+    int penclen;
+    int rv = 0;
+    int ecdh_nid, kdf_type, kdf_nid, wrap_nid;
+    const EVP_MD *kdf_md;
+    pctx = CMS_RecipientInfo_get0_pkey_ctx(ri);
+    if (!pctx)
+        return 0;
+    /* Get ephemeral key */
+    pkey = EVP_PKEY_CTX_get0_pkey(pctx);
+    if (!CMS_RecipientInfo_kari_get0_orig_id(ri, &talg, &pubkey,
+                                             NULL, NULL, NULL))
+        goto err;
+    X509_ALGOR_get0(&aoid, NULL, NULL, talg);
+    /* Is everything uninitialised? */
+    if (aoid == OBJ_nid2obj(NID_undef)) {
+
+        EC_KEY *eckey = pkey->pkey.ec;
+        /* Set the key */
+        unsigned char *p;
+
+        penclen = i2o_ECPublicKey(eckey, NULL);
+        if (penclen <= 0)
+            goto err;
+        penc = OPENSSL_malloc(penclen);
+        if (!penc)
+            goto err;
+        p = penc;
+        penclen = i2o_ECPublicKey(eckey, &p);
+        if (penclen <= 0)
+            goto err;
+        ASN1_STRING_set0(pubkey, penc, penclen);
+        pubkey->flags &= ~(ASN1_STRING_FLAG_BITS_LEFT | 0x07);
+        pubkey->flags |= ASN1_STRING_FLAG_BITS_LEFT;
+
+        penc = NULL;
+        X509_ALGOR_set0(talg, OBJ_nid2obj(NID_X9_62_id_ecPublicKey),
+                        V_ASN1_UNDEF, NULL);
+    }
+
+    /* See if custom paraneters set */
+    kdf_type = EVP_PKEY_CTX_get_ecdh_kdf_type(pctx);
+    if (kdf_type <= 0)
+        goto err;
+    if (!EVP_PKEY_CTX_get_ecdh_kdf_md(pctx, &kdf_md))
+        goto err;
+    ecdh_nid = EVP_PKEY_CTX_get_ecdh_cofactor_mode(pctx);
+    if (ecdh_nid < 0)
+        goto err;
+    else if (ecdh_nid == 0)
+        ecdh_nid = NID_dh_std_kdf;
+    else if (ecdh_nid == 1)
+        ecdh_nid = NID_dh_cofactor_kdf;
+
+    if (kdf_type == EVP_PKEY_ECDH_KDF_NONE) {
+        kdf_type = EVP_PKEY_ECDH_KDF_X9_62;
+        if (EVP_PKEY_CTX_set_ecdh_kdf_type(pctx, kdf_type) <= 0)
+            goto err;
+    } else
+        /* Uknown KDF */
+        goto err;
+    if (kdf_md == NULL) {
+        /* Fixme later for better MD */
+        kdf_md = EVP_sha1();
+        if (EVP_PKEY_CTX_set_ecdh_kdf_md(pctx, kdf_md) <= 0)
+            goto err;
+    }
+
+    if (!CMS_RecipientInfo_kari_get0_alg(ri, &talg, &ukm))
+        goto err;
+
+    /* Lookup NID for KDF+cofactor+digest */
+
+    if (!OBJ_find_sigid_by_algs(&kdf_nid, EVP_MD_type(kdf_md), ecdh_nid))
+        goto err;
+    /* Get wrap NID */
+    ctx = CMS_RecipientInfo_kari_get0_ctx(ri);
+    wrap_nid = EVP_CIPHER_CTX_type(ctx);
+    keylen = EVP_CIPHER_CTX_key_length(ctx);
+
+    /* Package wrap algorithm in an AlgorithmIdentifier */
+
+    wrap_alg = X509_ALGOR_new();
+    if (!wrap_alg)
+        goto err;
+    wrap_alg->algorithm = OBJ_nid2obj(wrap_nid);
+    wrap_alg->parameter = ASN1_TYPE_new();
+    if (!wrap_alg->parameter)
+        goto err;
+    if (EVP_CIPHER_param_to_asn1(ctx, wrap_alg->parameter) <= 0)
+        goto err;
+    if (ASN1_TYPE_get(wrap_alg->parameter) == NID_undef) {
+        ASN1_TYPE_free(wrap_alg->parameter);
+        wrap_alg->parameter = NULL;
+    }
+
+    if (EVP_PKEY_CTX_set_ecdh_kdf_outlen(pctx, keylen) <= 0)
+        goto err;
+
+    penclen = CMS_SharedInfo_encode(&penc, wrap_alg, ukm, keylen);
+
+    if (!penclen)
+        goto err;
+
+    if (EVP_PKEY_CTX_set0_ecdh_kdf_ukm(pctx, penc, penclen) <= 0)
+        goto err;
+    penc = NULL;
+
+    /*
+     * Now need to wrap encoding of wrap AlgorithmIdentifier into parameter
+     * of another AlgorithmIdentifier.
+     */
+    penclen = i2d_X509_ALGOR(wrap_alg, &penc);
+    if (!penc || !penclen)
+        goto err;
+    wrap_str = ASN1_STRING_new();
+    if (!wrap_str)
+        goto err;
+    ASN1_STRING_set0(wrap_str, penc, penclen);
+    penc = NULL;
+    X509_ALGOR_set0(talg, OBJ_nid2obj(kdf_nid), V_ASN1_SEQUENCE, wrap_str);
+
+    rv = 1;
+
+ err:
+    if (penc)
+        OPENSSL_free(penc);
+    if (wrap_alg)
+        X509_ALGOR_free(wrap_alg);
+    return rv;
+}
+
+#endif
index e4bcb5d..6dbe9d8 100644 (file)
  *
  */
 
+#include <string.h>
 #include "ec_lcl.h"
 #include <openssl/err.h>
 #include <openssl/obj_mac.h>
 #include <openssl/opensslconf.h>
 
+#ifdef OPENSSL_FIPS
+# include <openssl/fips.h>
+#endif
+
 typedef struct {
     int field_type,             /* either NID_X9_62_prime_field or
                                  * NID_X9_62_characteristic_two_field */
@@ -2282,6 +2287,554 @@ static const struct {
 
 #endif
 
+/*
+ * These curves were added by Annie Yousar <a.yousar@informatik.hu-berlin.de>
+ * For the definition of RFC 5639 curves see
+ * http://www.ietf.org/rfc/rfc5639.txt These curves are generated verifiable
+ * at random, nevertheless the seed is omitted as parameter because the
+ * generation mechanism is different from those defined in ANSI X9.62.
+ */
+
+static const struct {
+    EC_CURVE_DATA h;
+    unsigned char data[0 + 20 * 6];
+} _EC_brainpoolP160r1 = {
+    {
+        NID_X9_62_prime_field, 0, 20, 1
+    },
+    {
+        /* no seed */
+        /* p */
+        0xE9, 0x5E, 0x4A, 0x5F, 0x73, 0x70, 0x59, 0xDC, 0x60, 0xDF, 0xC7, 0xAD,
+        0x95, 0xB3, 0xD8, 0x13, 0x95, 0x15, 0x62, 0x0F,
+        /* a */
+        0x34, 0x0E, 0x7B, 0xE2, 0xA2, 0x80, 0xEB, 0x74, 0xE2, 0xBE, 0x61, 0xBA,
+        0xDA, 0x74, 0x5D, 0x97, 0xE8, 0xF7, 0xC3, 0x00,
+        /* b */
+        0x1E, 0x58, 0x9A, 0x85, 0x95, 0x42, 0x34, 0x12, 0x13, 0x4F, 0xAA, 0x2D,
+        0xBD, 0xEC, 0x95, 0xC8, 0xD8, 0x67, 0x5E, 0x58,
+        /* x */
+        0xBE, 0xD5, 0xAF, 0x16, 0xEA, 0x3F, 0x6A, 0x4F, 0x62, 0x93, 0x8C, 0x46,
+        0x31, 0xEB, 0x5A, 0xF7, 0xBD, 0xBC, 0xDB, 0xC3,
+        /* y */
+        0x16, 0x67, 0xCB, 0x47, 0x7A, 0x1A, 0x8E, 0xC3, 0x38, 0xF9, 0x47, 0x41,
+        0x66, 0x9C, 0x97, 0x63, 0x16, 0xDA, 0x63, 0x21,
+        /* order */
+        0xE9, 0x5E, 0x4A, 0x5F, 0x73, 0x70, 0x59, 0xDC, 0x60, 0xDF, 0x59, 0x91,
+        0xD4, 0x50, 0x29, 0x40, 0x9E, 0x60, 0xFC, 0x09
+    }
+};
+
+static const struct {
+    EC_CURVE_DATA h;
+    unsigned char data[0 + 20 * 6];
+} _EC_brainpoolP160t1 = {
+    {
+        NID_X9_62_prime_field, 0, 20, 1
+    },
+    {
+        /* no seed */
+        /* p */
+        0xE9, 0x5E, 0x4A, 0x5F, 0x73, 0x70, 0x59, 0xDC, 0x60, 0xDF, 0xC7, 0xAD,
+        0x95, 0xB3, 0xD8, 0x13, 0x95, 0x15, 0x62, 0x0F,
+        /* a */
+        0xE9, 0x5E, 0x4A, 0x5F, 0x73, 0x70, 0x59, 0xDC, 0x60, 0xDF, 0xC7, 0xAD,
+        0x95, 0xB3, 0xD8, 0x13, 0x95, 0x15, 0x62, 0x0C,
+        /* b */
+        0x7A, 0x55, 0x6B, 0x6D, 0xAE, 0x53, 0x5B, 0x7B, 0x51, 0xED, 0x2C, 0x4D,
+        0x7D, 0xAA, 0x7A, 0x0B, 0x5C, 0x55, 0xF3, 0x80,
+        /* x */
+        0xB1, 0x99, 0xB1, 0x3B, 0x9B, 0x34, 0xEF, 0xC1, 0x39, 0x7E, 0x64, 0xBA,
+        0xEB, 0x05, 0xAC, 0xC2, 0x65, 0xFF, 0x23, 0x78,
+        /* y */
+        0xAD, 0xD6, 0x71, 0x8B, 0x7C, 0x7C, 0x19, 0x61, 0xF0, 0x99, 0x1B, 0x84,
+        0x24, 0x43, 0x77, 0x21, 0x52, 0xC9, 0xE0, 0xAD,
+        /* order */
+        0xE9, 0x5E, 0x4A, 0x5F, 0x73, 0x70, 0x59, 0xDC, 0x60, 0xDF, 0x59, 0x91,
+        0xD4, 0x50, 0x29, 0x40, 0x9E, 0x60, 0xFC, 0x09
+    }
+};
+
+static const struct {
+    EC_CURVE_DATA h;
+    unsigned char data[0 + 24 * 6];
+} _EC_brainpoolP192r1 = {
+    {
+        NID_X9_62_prime_field, 0, 24, 1
+    },
+    {
+        /* no seed */
+        /* p */
+        0xC3, 0x02, 0xF4, 0x1D, 0x93, 0x2A, 0x36, 0xCD, 0xA7, 0xA3, 0x46, 0x30,
+        0x93, 0xD1, 0x8D, 0xB7, 0x8F, 0xCE, 0x47, 0x6D, 0xE1, 0xA8, 0x62, 0x97,
+        /* a */
+        0x6A, 0x91, 0x17, 0x40, 0x76, 0xB1, 0xE0, 0xE1, 0x9C, 0x39, 0xC0, 0x31,
+        0xFE, 0x86, 0x85, 0xC1, 0xCA, 0xE0, 0x40, 0xE5, 0xC6, 0x9A, 0x28, 0xEF,
+        /* b */
+        0x46, 0x9A, 0x28, 0xEF, 0x7C, 0x28, 0xCC, 0xA3, 0xDC, 0x72, 0x1D, 0x04,
+        0x4F, 0x44, 0x96, 0xBC, 0xCA, 0x7E, 0xF4, 0x14, 0x6F, 0xBF, 0x25, 0xC9,
+        /* x */
+        0xC0, 0xA0, 0x64, 0x7E, 0xAA, 0xB6, 0xA4, 0x87, 0x53, 0xB0, 0x33, 0xC5,
+        0x6C, 0xB0, 0xF0, 0x90, 0x0A, 0x2F, 0x5C, 0x48, 0x53, 0x37, 0x5F, 0xD6,
+        /* y */
+        0x14, 0xB6, 0x90, 0x86, 0x6A, 0xBD, 0x5B, 0xB8, 0x8B, 0x5F, 0x48, 0x28,
+        0xC1, 0x49, 0x00, 0x02, 0xE6, 0x77, 0x3F, 0xA2, 0xFA, 0x29, 0x9B, 0x8F,
+        /* order */
+        0xC3, 0x02, 0xF4, 0x1D, 0x93, 0x2A, 0x36, 0xCD, 0xA7, 0xA3, 0x46, 0x2F,
+        0x9E, 0x9E, 0x91, 0x6B, 0x5B, 0xE8, 0xF1, 0x02, 0x9A, 0xC4, 0xAC, 0xC1
+    }
+};
+
+static const struct {
+    EC_CURVE_DATA h;
+    unsigned char data[0 + 24 * 6];
+} _EC_brainpoolP192t1 = {
+    {
+        NID_X9_62_prime_field, 0, 24, 1
+    },
+    {
+        /* no seed */
+        /* p */
+        0xC3, 0x02, 0xF4, 0x1D, 0x93, 0x2A, 0x36, 0xCD, 0xA7, 0xA3, 0x46, 0x30,
+        0x93, 0xD1, 0x8D, 0xB7, 0x8F, 0xCE, 0x47, 0x6D, 0xE1, 0xA8, 0x62, 0x97,
+        /* a */
+        0xC3, 0x02, 0xF4, 0x1D, 0x93, 0x2A, 0x36, 0xCD, 0xA7, 0xA3, 0x46, 0x30,
+        0x93, 0xD1, 0x8D, 0xB7, 0x8F, 0xCE, 0x47, 0x6D, 0xE1, 0xA8, 0x62, 0x94,
+        /* b */
+        0x13, 0xD5, 0x6F, 0xFA, 0xEC, 0x78, 0x68, 0x1E, 0x68, 0xF9, 0xDE, 0xB4,
+        0x3B, 0x35, 0xBE, 0xC2, 0xFB, 0x68, 0x54, 0x2E, 0x27, 0x89, 0x7B, 0x79,
+        /* x */
+        0x3A, 0xE9, 0xE5, 0x8C, 0x82, 0xF6, 0x3C, 0x30, 0x28, 0x2E, 0x1F, 0xE7,
+        0xBB, 0xF4, 0x3F, 0xA7, 0x2C, 0x44, 0x6A, 0xF6, 0xF4, 0x61, 0x81, 0x29,
+        /* y */
+        0x09, 0x7E, 0x2C, 0x56, 0x67, 0xC2, 0x22, 0x3A, 0x90, 0x2A, 0xB5, 0xCA,
+        0x44, 0x9D, 0x00, 0x84, 0xB7, 0xE5, 0xB3, 0xDE, 0x7C, 0xCC, 0x01, 0xC9,
+        /* order */
+        0xC3, 0x02, 0xF4, 0x1D, 0x93, 0x2A, 0x36, 0xCD, 0xA7, 0xA3, 0x46, 0x2F,
+        0x9E, 0x9E, 0x91, 0x6B, 0x5B, 0xE8, 0xF1, 0x02, 0x9A, 0xC4, 0xAC, 0xC1
+    }
+};
+
+static const struct {
+    EC_CURVE_DATA h;
+    unsigned char data[0 + 28 * 6];
+} _EC_brainpoolP224r1 = {
+    {
+        NID_X9_62_prime_field, 0, 28, 1
+    },
+    {
+        /* no seed */
+        /* p */
+        0xD7, 0xC1, 0x34, 0xAA, 0x26, 0x43, 0x66, 0x86, 0x2A, 0x18, 0x30, 0x25,
+        0x75, 0xD1, 0xD7, 0x87, 0xB0, 0x9F, 0x07, 0x57, 0x97, 0xDA, 0x89, 0xF5,
+        0x7E, 0xC8, 0xC0, 0xFF,
+        /* a */
+        0x68, 0xA5, 0xE6, 0x2C, 0xA9, 0xCE, 0x6C, 0x1C, 0x29, 0x98, 0x03, 0xA6,
+        0xC1, 0x53, 0x0B, 0x51, 0x4E, 0x18, 0x2A, 0xD8, 0xB0, 0x04, 0x2A, 0x59,
+        0xCA, 0xD2, 0x9F, 0x43,
+        /* b */
+        0x25, 0x80, 0xF6, 0x3C, 0xCF, 0xE4, 0x41, 0x38, 0x87, 0x07, 0x13, 0xB1,
+        0xA9, 0x23, 0x69, 0xE3, 0x3E, 0x21, 0x35, 0xD2, 0x66, 0xDB, 0xB3, 0x72,
+        0x38, 0x6C, 0x40, 0x0B,
+        /* x */
+        0x0D, 0x90, 0x29, 0xAD, 0x2C, 0x7E, 0x5C, 0xF4, 0x34, 0x08, 0x23, 0xB2,
+        0xA8, 0x7D, 0xC6, 0x8C, 0x9E, 0x4C, 0xE3, 0x17, 0x4C, 0x1E, 0x6E, 0xFD,
+        0xEE, 0x12, 0xC0, 0x7D,
+        /* y */
+        0x58, 0xAA, 0x56, 0xF7, 0x72, 0xC0, 0x72, 0x6F, 0x24, 0xC6, 0xB8, 0x9E,
+        0x4E, 0xCD, 0xAC, 0x24, 0x35, 0x4B, 0x9E, 0x99, 0xCA, 0xA3, 0xF6, 0xD3,
+        0x76, 0x14, 0x02, 0xCD,
+        /* order */
+        0xD7, 0xC1, 0x34, 0xAA, 0x26, 0x43, 0x66, 0x86, 0x2A, 0x18, 0x30, 0x25,
+        0x75, 0xD0, 0xFB, 0x98, 0xD1, 0x16, 0xBC, 0x4B, 0x6D, 0xDE, 0xBC, 0xA3,
+        0xA5, 0xA7, 0x93, 0x9F
+    }
+};
+
+static const struct {
+    EC_CURVE_DATA h;
+    unsigned char data[0 + 28 * 6];
+} _EC_brainpoolP224t1 = {
+    {
+        NID_X9_62_prime_field, 0, 28, 1
+    },
+    {
+        /* no seed */
+        /* p */
+        0xD7, 0xC1, 0x34, 0xAA, 0x26, 0x43, 0x66, 0x86, 0x2A, 0x18, 0x30, 0x25,
+        0x75, 0xD1, 0xD7, 0x87, 0xB0, 0x9F, 0x07, 0x57, 0x97, 0xDA, 0x89, 0xF5,
+        0x7E, 0xC8, 0xC0, 0xFF,
+        /* a */
+        0xD7, 0xC1, 0x34, 0xAA, 0x26, 0x43, 0x66, 0x86, 0x2A, 0x18, 0x30, 0x25,
+        0x75, 0xD1, 0xD7, 0x87, 0xB0, 0x9F, 0x07, 0x57, 0x97, 0xDA, 0x89, 0xF5,
+        0x7E, 0xC8, 0xC0, 0xFC,
+        /* b */
+        0x4B, 0x33, 0x7D, 0x93, 0x41, 0x04, 0xCD, 0x7B, 0xEF, 0x27, 0x1B, 0xF6,
+        0x0C, 0xED, 0x1E, 0xD2, 0x0D, 0xA1, 0x4C, 0x08, 0xB3, 0xBB, 0x64, 0xF1,
+        0x8A, 0x60, 0x88, 0x8D,
+        /* x */
+        0x6A, 0xB1, 0xE3, 0x44, 0xCE, 0x25, 0xFF, 0x38, 0x96, 0x42, 0x4E, 0x7F,
+        0xFE, 0x14, 0x76, 0x2E, 0xCB, 0x49, 0xF8, 0x92, 0x8A, 0xC0, 0xC7, 0x60,
+        0x29, 0xB4, 0xD5, 0x80,
+        /* y */
+        0x03, 0x74, 0xE9, 0xF5, 0x14, 0x3E, 0x56, 0x8C, 0xD2, 0x3F, 0x3F, 0x4D,
+        0x7C, 0x0D, 0x4B, 0x1E, 0x41, 0xC8, 0xCC, 0x0D, 0x1C, 0x6A, 0xBD, 0x5F,
+        0x1A, 0x46, 0xDB, 0x4C,
+        /* order */
+        0xD7, 0xC1, 0x34, 0xAA, 0x26, 0x43, 0x66, 0x86, 0x2A, 0x18, 0x30, 0x25,
+        0x75, 0xD0, 0xFB, 0x98, 0xD1, 0x16, 0xBC, 0x4B, 0x6D, 0xDE, 0xBC, 0xA3,
+        0xA5, 0xA7, 0x93, 0x9F
+    }
+};
+
+static const struct {
+    EC_CURVE_DATA h;
+    unsigned char data[0 + 32 * 6];
+} _EC_brainpoolP256r1 = {
+    {
+        NID_X9_62_prime_field, 0, 32, 1
+    },
+    {
+        /* no seed */
+        /* p */
+        0xA9, 0xFB, 0x57, 0xDB, 0xA1, 0xEE, 0xA9, 0xBC, 0x3E, 0x66, 0x0A, 0x90,
+        0x9D, 0x83, 0x8D, 0x72, 0x6E, 0x3B, 0xF6, 0x23, 0xD5, 0x26, 0x20, 0x28,
+        0x20, 0x13, 0x48, 0x1D, 0x1F, 0x6E, 0x53, 0x77,
+        /* a */
+        0x7D, 0x5A, 0x09, 0x75, 0xFC, 0x2C, 0x30, 0x57, 0xEE, 0xF6, 0x75, 0x30,
+        0x41, 0x7A, 0xFF, 0xE7, 0xFB, 0x80, 0x55, 0xC1, 0x26, 0xDC, 0x5C, 0x6C,
+        0xE9, 0x4A, 0x4B, 0x44, 0xF3, 0x30, 0xB5, 0xD9,
+        /* b */
+        0x26, 0xDC, 0x5C, 0x6C, 0xE9, 0x4A, 0x4B, 0x44, 0xF3, 0x30, 0xB5, 0xD9,
+        0xBB, 0xD7, 0x7C, 0xBF, 0x95, 0x84, 0x16, 0x29, 0x5C, 0xF7, 0xE1, 0xCE,
+        0x6B, 0xCC, 0xDC, 0x18, 0xFF, 0x8C, 0x07, 0xB6,
+        /* x */
+        0x8B, 0xD2, 0xAE, 0xB9, 0xCB, 0x7E, 0x57, 0xCB, 0x2C, 0x4B, 0x48, 0x2F,
+        0xFC, 0x81, 0xB7, 0xAF, 0xB9, 0xDE, 0x27, 0xE1, 0xE3, 0xBD, 0x23, 0xC2,
+        0x3A, 0x44, 0x53, 0xBD, 0x9A, 0xCE, 0x32, 0x62,
+        /* y */
+        0x54, 0x7E, 0xF8, 0x35, 0xC3, 0xDA, 0xC4, 0xFD, 0x97, 0xF8, 0x46, 0x1A,
+        0x14, 0x61, 0x1D, 0xC9, 0xC2, 0x77, 0x45, 0x13, 0x2D, 0xED, 0x8E, 0x54,
+        0x5C, 0x1D, 0x54, 0xC7, 0x2F, 0x04, 0x69, 0x97,
+        /* order */
+        0xA9, 0xFB, 0x57, 0xDB, 0xA1, 0xEE, 0xA9, 0xBC, 0x3E, 0x66, 0x0A, 0x90,
+        0x9D, 0x83, 0x8D, 0x71, 0x8C, 0x39, 0x7A, 0xA3, 0xB5, 0x61, 0xA6, 0xF7,
+        0x90, 0x1E, 0x0E, 0x82, 0x97, 0x48, 0x56, 0xA7
+    }
+};
+
+static const struct {
+    EC_CURVE_DATA h;
+    unsigned char data[0 + 32 * 6];
+} _EC_brainpoolP256t1 = {
+    {
+        NID_X9_62_prime_field, 0, 32, 1
+    },
+    {
+        /* no seed */
+        /* p */
+        0xA9, 0xFB, 0x57, 0xDB, 0xA1, 0xEE, 0xA9, 0xBC, 0x3E, 0x66, 0x0A, 0x90,
+        0x9D, 0x83, 0x8D, 0x72, 0x6E, 0x3B, 0xF6, 0x23, 0xD5, 0x26, 0x20, 0x28,
+        0x20, 0x13, 0x48, 0x1D, 0x1F, 0x6E, 0x53, 0x77,
+        /* a */
+        0xA9, 0xFB, 0x57, 0xDB, 0xA1, 0xEE, 0xA9, 0xBC, 0x3E, 0x66, 0x0A, 0x90,
+        0x9D, 0x83, 0x8D, 0x72, 0x6E, 0x3B, 0xF6, 0x23, 0xD5, 0x26, 0x20, 0x28,
+        0x20, 0x13, 0x48, 0x1D, 0x1F, 0x6E, 0x53, 0x74,
+        /* b */
+        0x66, 0x2C, 0x61, 0xC4, 0x30, 0xD8, 0x4E, 0xA4, 0xFE, 0x66, 0xA7, 0x73,
+        0x3D, 0x0B, 0x76, 0xB7, 0xBF, 0x93, 0xEB, 0xC4, 0xAF, 0x2F, 0x49, 0x25,
+        0x6A, 0xE5, 0x81, 0x01, 0xFE, 0xE9, 0x2B, 0x04,
+        /* x */
+        0xA3, 0xE8, 0xEB, 0x3C, 0xC1, 0xCF, 0xE7, 0xB7, 0x73, 0x22, 0x13, 0xB2,
+        0x3A, 0x65, 0x61, 0x49, 0xAF, 0xA1, 0x42, 0xC4, 0x7A, 0xAF, 0xBC, 0x2B,
+        0x79, 0xA1, 0x91, 0x56, 0x2E, 0x13, 0x05, 0xF4,
+        /* y */
+        0x2D, 0x99, 0x6C, 0x82, 0x34, 0x39, 0xC5, 0x6D, 0x7F, 0x7B, 0x22, 0xE1,
+        0x46, 0x44, 0x41, 0x7E, 0x69, 0xBC, 0xB6, 0xDE, 0x39, 0xD0, 0x27, 0x00,
+        0x1D, 0xAB, 0xE8, 0xF3, 0x5B, 0x25, 0xC9, 0xBE,
+        /* order */
+        0xA9, 0xFB, 0x57, 0xDB, 0xA1, 0xEE, 0xA9, 0xBC, 0x3E, 0x66, 0x0A, 0x90,
+        0x9D, 0x83, 0x8D, 0x71, 0x8C, 0x39, 0x7A, 0xA3, 0xB5, 0x61, 0xA6, 0xF7,
+        0x90, 0x1E, 0x0E, 0x82, 0x97, 0x48, 0x56, 0xA7
+    }
+};
+
+static const struct {
+    EC_CURVE_DATA h;
+    unsigned char data[0 + 40 * 6];
+} _EC_brainpoolP320r1 = {
+    {
+        NID_X9_62_prime_field, 0, 40, 1
+    },
+    {
+        /* no seed */
+        /* p */
+        0xD3, 0x5E, 0x47, 0x20, 0x36, 0xBC, 0x4F, 0xB7, 0xE1, 0x3C, 0x78, 0x5E,
+        0xD2, 0x01, 0xE0, 0x65, 0xF9, 0x8F, 0xCF, 0xA6, 0xF6, 0xF4, 0x0D, 0xEF,
+        0x4F, 0x92, 0xB9, 0xEC, 0x78, 0x93, 0xEC, 0x28, 0xFC, 0xD4, 0x12, 0xB1,
+        0xF1, 0xB3, 0x2E, 0x27,
+        /* a */
+        0x3E, 0xE3, 0x0B, 0x56, 0x8F, 0xBA, 0xB0, 0xF8, 0x83, 0xCC, 0xEB, 0xD4,
+        0x6D, 0x3F, 0x3B, 0xB8, 0xA2, 0xA7, 0x35, 0x13, 0xF5, 0xEB, 0x79, 0xDA,
+        0x66, 0x19, 0x0E, 0xB0, 0x85, 0xFF, 0xA9, 0xF4, 0x92, 0xF3, 0x75, 0xA9,
+        0x7D, 0x86, 0x0E, 0xB4,
+        /* b */
+        0x52, 0x08, 0x83, 0x94, 0x9D, 0xFD, 0xBC, 0x42, 0xD3, 0xAD, 0x19, 0x86,
+        0x40, 0x68, 0x8A, 0x6F, 0xE1, 0x3F, 0x41, 0x34, 0x95, 0x54, 0xB4, 0x9A,
+        0xCC, 0x31, 0xDC, 0xCD, 0x88, 0x45, 0x39, 0x81, 0x6F, 0x5E, 0xB4, 0xAC,
+        0x8F, 0xB1, 0xF1, 0xA6,
+        /* x */
+        0x43, 0xBD, 0x7E, 0x9A, 0xFB, 0x53, 0xD8, 0xB8, 0x52, 0x89, 0xBC, 0xC4,
+        0x8E, 0xE5, 0xBF, 0xE6, 0xF2, 0x01, 0x37, 0xD1, 0x0A, 0x08, 0x7E, 0xB6,
+        0xE7, 0x87, 0x1E, 0x2A, 0x10, 0xA5, 0x99, 0xC7, 0x10, 0xAF, 0x8D, 0x0D,
+        0x39, 0xE2, 0x06, 0x11,
+        /* y */
+        0x14, 0xFD, 0xD0, 0x55, 0x45, 0xEC, 0x1C, 0xC8, 0xAB, 0x40, 0x93, 0x24,
+        0x7F, 0x77, 0x27, 0x5E, 0x07, 0x43, 0xFF, 0xED, 0x11, 0x71, 0x82, 0xEA,
+        0xA9, 0xC7, 0x78, 0x77, 0xAA, 0xAC, 0x6A, 0xC7, 0xD3, 0x52, 0x45, 0xD1,
+        0x69, 0x2E, 0x8E, 0xE1,
+        /* order */
+        0xD3, 0x5E, 0x47, 0x20, 0x36, 0xBC, 0x4F, 0xB7, 0xE1, 0x3C, 0x78, 0x5E,
+        0xD2, 0x01, 0xE0, 0x65, 0xF9, 0x8F, 0xCF, 0xA5, 0xB6, 0x8F, 0x12, 0xA3,
+        0x2D, 0x48, 0x2E, 0xC7, 0xEE, 0x86, 0x58, 0xE9, 0x86, 0x91, 0x55, 0x5B,
+        0x44, 0xC5, 0x93, 0x11
+    }
+};
+
+static const struct {
+    EC_CURVE_DATA h;
+    unsigned char data[0 + 40 * 6];
+} _EC_brainpoolP320t1 = {
+    {
+        NID_X9_62_prime_field, 0, 40, 1
+    },
+    {
+        /* no seed */
+        /* p */
+        0xD3, 0x5E, 0x47, 0x20, 0x36, 0xBC, 0x4F, 0xB7, 0xE1, 0x3C, 0x78, 0x5E,
+        0xD2, 0x01, 0xE0, 0x65, 0xF9, 0x8F, 0xCF, 0xA6, 0xF6, 0xF4, 0x0D, 0xEF,
+        0x4F, 0x92, 0xB9, 0xEC, 0x78, 0x93, 0xEC, 0x28, 0xFC, 0xD4, 0x12, 0xB1,
+        0xF1, 0xB3, 0x2E, 0x27,
+        /* a */
+        0xD3, 0x5E, 0x47, 0x20, 0x36, 0xBC, 0x4F, 0xB7, 0xE1, 0x3C, 0x78, 0x5E,
+        0xD2, 0x01, 0xE0, 0x65, 0xF9, 0x8F, 0xCF, 0xA6, 0xF6, 0xF4, 0x0D, 0xEF,
+        0x4F, 0x92, 0xB9, 0xEC, 0x78, 0x93, 0xEC, 0x28, 0xFC, 0xD4, 0x12, 0xB1,
+        0xF1, 0xB3, 0x2E, 0x24,
+        /* b */
+        0xA7, 0xF5, 0x61, 0xE0, 0x38, 0xEB, 0x1E, 0xD5, 0x60, 0xB3, 0xD1, 0x47,
+        0xDB, 0x78, 0x20, 0x13, 0x06, 0x4C, 0x19, 0xF2, 0x7E, 0xD2, 0x7C, 0x67,
+        0x80, 0xAA, 0xF7, 0x7F, 0xB8, 0xA5, 0x47, 0xCE, 0xB5, 0xB4, 0xFE, 0xF4,
+        0x22, 0x34, 0x03, 0x53,
+        /* x */
+        0x92, 0x5B, 0xE9, 0xFB, 0x01, 0xAF, 0xC6, 0xFB, 0x4D, 0x3E, 0x7D, 0x49,
+        0x90, 0x01, 0x0F, 0x81, 0x34, 0x08, 0xAB, 0x10, 0x6C, 0x4F, 0x09, 0xCB,
+        0x7E, 0xE0, 0x78, 0x68, 0xCC, 0x13, 0x6F, 0xFF, 0x33, 0x57, 0xF6, 0x24,
+        0xA2, 0x1B, 0xED, 0x52,
+        /* y */
+        0x63, 0xBA, 0x3A, 0x7A, 0x27, 0x48, 0x3E, 0xBF, 0x66, 0x71, 0xDB, 0xEF,
+        0x7A, 0xBB, 0x30, 0xEB, 0xEE, 0x08, 0x4E, 0x58, 0xA0, 0xB0, 0x77, 0xAD,
+        0x42, 0xA5, 0xA0, 0x98, 0x9D, 0x1E, 0xE7, 0x1B, 0x1B, 0x9B, 0xC0, 0x45,
+        0x5F, 0xB0, 0xD2, 0xC3,
+        /* order */
+        0xD3, 0x5E, 0x47, 0x20, 0x36, 0xBC, 0x4F, 0xB7, 0xE1, 0x3C, 0x78, 0x5E,
+        0xD2, 0x01, 0xE0, 0x65, 0xF9, 0x8F, 0xCF, 0xA5, 0xB6, 0x8F, 0x12, 0xA3,
+        0x2D, 0x48, 0x2E, 0xC7, 0xEE, 0x86, 0x58, 0xE9, 0x86, 0x91, 0x55, 0x5B,
+        0x44, 0xC5, 0x93, 0x11
+    }
+};
+
+static const struct {
+    EC_CURVE_DATA h;
+    unsigned char data[0 + 48 * 6];
+} _EC_brainpoolP384r1 = {
+    {
+        NID_X9_62_prime_field, 0, 48, 1
+    },
+    {
+        /* no seed */
+        /* p */
+        0x8C, 0xB9, 0x1E, 0x82, 0xA3, 0x38, 0x6D, 0x28, 0x0F, 0x5D, 0x6F, 0x7E,
+        0x50, 0xE6, 0x41, 0xDF, 0x15, 0x2F, 0x71, 0x09, 0xED, 0x54, 0x56, 0xB4,
+        0x12, 0xB1, 0xDA, 0x19, 0x7F, 0xB7, 0x11, 0x23, 0xAC, 0xD3, 0xA7, 0x29,
+        0x90, 0x1D, 0x1A, 0x71, 0x87, 0x47, 0x00, 0x13, 0x31, 0x07, 0xEC, 0x53,
+        /* a */
+        0x7B, 0xC3, 0x82, 0xC6, 0x3D, 0x8C, 0x15, 0x0C, 0x3C, 0x72, 0x08, 0x0A,
+        0xCE, 0x05, 0xAF, 0xA0, 0xC2, 0xBE, 0xA2, 0x8E, 0x4F, 0xB2, 0x27, 0x87,
+        0x13, 0x91, 0x65, 0xEF, 0xBA, 0x91, 0xF9, 0x0F, 0x8A, 0xA5, 0x81, 0x4A,
+        0x50, 0x3A, 0xD4, 0xEB, 0x04, 0xA8, 0xC7, 0xDD, 0x22, 0xCE, 0x28, 0x26,
+        /* b */
+        0x04, 0xA8, 0xC7, 0xDD, 0x22, 0xCE, 0x28, 0x26, 0x8B, 0x39, 0xB5, 0x54,
+        0x16, 0xF0, 0x44, 0x7C, 0x2F, 0xB7, 0x7D, 0xE1, 0x07, 0xDC, 0xD2, 0xA6,
+        0x2E, 0x88, 0x0E, 0xA5, 0x3E, 0xEB, 0x62, 0xD5, 0x7C, 0xB4, 0x39, 0x02,
+        0x95, 0xDB, 0xC9, 0x94, 0x3A, 0xB7, 0x86, 0x96, 0xFA, 0x50, 0x4C, 0x11,
+        /* x */
+        0x1D, 0x1C, 0x64, 0xF0, 0x68, 0xCF, 0x45, 0xFF, 0xA2, 0xA6, 0x3A, 0x81,
+        0xB7, 0xC1, 0x3F, 0x6B, 0x88, 0x47, 0xA3, 0xE7, 0x7E, 0xF1, 0x4F, 0xE3,
+        0xDB, 0x7F, 0xCA, 0xFE, 0x0C, 0xBD, 0x10, 0xE8, 0xE8, 0x26, 0xE0, 0x34,
+        0x36, 0xD6, 0x46, 0xAA, 0xEF, 0x87, 0xB2, 0xE2, 0x47, 0xD4, 0xAF, 0x1E,
+        /* y */
+        0x8A, 0xBE, 0x1D, 0x75, 0x20, 0xF9, 0xC2, 0xA4, 0x5C, 0xB1, 0xEB, 0x8E,
+        0x95, 0xCF, 0xD5, 0x52, 0x62, 0xB7, 0x0B, 0x29, 0xFE, 0xEC, 0x58, 0x64,
+        0xE1, 0x9C, 0x05, 0x4F, 0xF9, 0x91, 0x29, 0x28, 0x0E, 0x46, 0x46, 0x21,
+        0x77, 0x91, 0x81, 0x11, 0x42, 0x82, 0x03, 0x41, 0x26, 0x3C, 0x53, 0x15,
+        /* order */
+        0x8C, 0xB9, 0x1E, 0x82, 0xA3, 0x38, 0x6D, 0x28, 0x0F, 0x5D, 0x6F, 0x7E,
+        0x50, 0xE6, 0x41, 0xDF, 0x15, 0x2F, 0x71, 0x09, 0xED, 0x54, 0x56, 0xB3,
+        0x1F, 0x16, 0x6E, 0x6C, 0xAC, 0x04, 0x25, 0xA7, 0xCF, 0x3A, 0xB6, 0xAF,
+        0x6B, 0x7F, 0xC3, 0x10, 0x3B, 0x88, 0x32, 0x02, 0xE9, 0x04, 0x65, 0x65
+    }
+};
+
+static const struct {
+    EC_CURVE_DATA h;
+    unsigned char data[0 + 48 * 6];
+} _EC_brainpoolP384t1 = {
+    {
+        NID_X9_62_prime_field, 0, 48, 1
+    },
+    {
+        /* no seed */
+        /* p */
+        0x8C, 0xB9, 0x1E, 0x82, 0xA3, 0x38, 0x6D, 0x28, 0x0F, 0x5D, 0x6F, 0x7E,
+        0x50, 0xE6, 0x41, 0xDF, 0x15, 0x2F, 0x71, 0x09, 0xED, 0x54, 0x56, 0xB4,
+        0x12, 0xB1, 0xDA, 0x19, 0x7F, 0xB7, 0x11, 0x23, 0xAC, 0xD3, 0xA7, 0x29,
+        0x90, 0x1D, 0x1A, 0x71, 0x87, 0x47, 0x00, 0x13, 0x31, 0x07, 0xEC, 0x53,
+        /* a */
+        0x8C, 0xB9, 0x1E, 0x82, 0xA3, 0x38, 0x6D, 0x28, 0x0F, 0x5D, 0x6F, 0x7E,
+        0x50, 0xE6, 0x41, 0xDF, 0x15, 0x2F, 0x71, 0x09, 0xED, 0x54, 0x56, 0xB4,
+        0x12, 0xB1, 0xDA, 0x19, 0x7F, 0xB7, 0x11, 0x23, 0xAC, 0xD3, 0xA7, 0x29,
+        0x90, 0x1D, 0x1A, 0x71, 0x87, 0x47, 0x00, 0x13, 0x31, 0x07, 0xEC, 0x50,
+        /* b */
+        0x7F, 0x51, 0x9E, 0xAD, 0xA7, 0xBD, 0xA8, 0x1B, 0xD8, 0x26, 0xDB, 0xA6,
+        0x47, 0x91, 0x0F, 0x8C, 0x4B, 0x93, 0x46, 0xED, 0x8C, 0xCD, 0xC6, 0x4E,
+        0x4B, 0x1A, 0xBD, 0x11, 0x75, 0x6D, 0xCE, 0x1D, 0x20, 0x74, 0xAA, 0x26,
+        0x3B, 0x88, 0x80, 0x5C, 0xED, 0x70, 0x35, 0x5A, 0x33, 0xB4, 0x71, 0xEE,
+        /* x */
+        0x18, 0xDE, 0x98, 0xB0, 0x2D, 0xB9, 0xA3, 0x06, 0xF2, 0xAF, 0xCD, 0x72,
+        0x35, 0xF7, 0x2A, 0x81, 0x9B, 0x80, 0xAB, 0x12, 0xEB, 0xD6, 0x53, 0x17,
+        0x24, 0x76, 0xFE, 0xCD, 0x46, 0x2A, 0xAB, 0xFF, 0xC4, 0xFF, 0x19, 0x1B,
+        0x94, 0x6A, 0x5F, 0x54, 0xD8, 0xD0, 0xAA, 0x2F, 0x41, 0x88, 0x08, 0xCC,
+        /* y */
+        0x25, 0xAB, 0x05, 0x69, 0x62, 0xD3, 0x06, 0x51, 0xA1, 0x14, 0xAF, 0xD2,
+        0x75, 0x5A, 0xD3, 0x36, 0x74, 0x7F, 0x93, 0x47, 0x5B, 0x7A, 0x1F, 0xCA,
+        0x3B, 0x88, 0xF2, 0xB6, 0xA2, 0x08, 0xCC, 0xFE, 0x46, 0x94, 0x08, 0x58,
+        0x4D, 0xC2, 0xB2, 0x91, 0x26, 0x75, 0xBF, 0x5B, 0x9E, 0x58, 0x29, 0x28,
+        /* order */
+        0x8C, 0xB9, 0x1E, 0x82, 0xA3, 0x38, 0x6D, 0x28, 0x0F, 0x5D, 0x6F, 0x7E,
+        0x50, 0xE6, 0x41, 0xDF, 0x15, 0x2F, 0x71, 0x09, 0xED, 0x54, 0x56, 0xB3,
+        0x1F, 0x16, 0x6E, 0x6C, 0xAC, 0x04, 0x25, 0xA7, 0xCF, 0x3A, 0xB6, 0xAF,
+        0x6B, 0x7F, 0xC3, 0x10, 0x3B, 0x88, 0x32, 0x02, 0xE9, 0x04, 0x65, 0x65
+    }
+};
+
+static const struct {
+    EC_CURVE_DATA h;
+    unsigned char data[0 + 64 * 6];
+} _EC_brainpoolP512r1 = {
+    {
+        NID_X9_62_prime_field, 0, 64, 1
+    },
+    {
+        /* no seed */
+        /* p */
+        0xAA, 0xDD, 0x9D, 0xB8, 0xDB, 0xE9, 0xC4, 0x8B, 0x3F, 0xD4, 0xE6, 0xAE,
+        0x33, 0xC9, 0xFC, 0x07, 0xCB, 0x30, 0x8D, 0xB3, 0xB3, 0xC9, 0xD2, 0x0E,
+        0xD6, 0x63, 0x9C, 0xCA, 0x70, 0x33, 0x08, 0x71, 0x7D, 0x4D, 0x9B, 0x00,
+        0x9B, 0xC6, 0x68, 0x42, 0xAE, 0xCD, 0xA1, 0x2A, 0xE6, 0xA3, 0x80, 0xE6,
+        0x28, 0x81, 0xFF, 0x2F, 0x2D, 0x82, 0xC6, 0x85, 0x28, 0xAA, 0x60, 0x56,
+        0x58, 0x3A, 0x48, 0xF3,
+        /* a */
+        0x78, 0x30, 0xA3, 0x31, 0x8B, 0x60, 0x3B, 0x89, 0xE2, 0x32, 0x71, 0x45,
+        0xAC, 0x23, 0x4C, 0xC5, 0x94, 0xCB, 0xDD, 0x8D, 0x3D, 0xF9, 0x16, 0x10,
+        0xA8, 0x34, 0x41, 0xCA, 0xEA, 0x98, 0x63, 0xBC, 0x2D, 0xED, 0x5D, 0x5A,
+        0xA8, 0x25, 0x3A, 0xA1, 0x0A, 0x2E, 0xF1, 0xC9, 0x8B, 0x9A, 0xC8, 0xB5,
+        0x7F, 0x11, 0x17, 0xA7, 0x2B, 0xF2, 0xC7, 0xB9, 0xE7, 0xC1, 0xAC, 0x4D,
+        0x77, 0xFC, 0x94, 0xCA,
+        /* b */
+        0x3D, 0xF9, 0x16, 0x10, 0xA8, 0x34, 0x41, 0xCA, 0xEA, 0x98, 0x63, 0xBC,
+        0x2D, 0xED, 0x5D, 0x5A, 0xA8, 0x25, 0x3A, 0xA1, 0x0A, 0x2E, 0xF1, 0xC9,
+        0x8B, 0x9A, 0xC8, 0xB5, 0x7F, 0x11, 0x17, 0xA7, 0x2B, 0xF2, 0xC7, 0xB9,
+        0xE7, 0xC1, 0xAC, 0x4D, 0x77, 0xFC, 0x94, 0xCA, 0xDC, 0x08, 0x3E, 0x67,
+        0x98, 0x40, 0x50, 0xB7, 0x5E, 0xBA, 0xE5, 0xDD, 0x28, 0x09, 0xBD, 0x63,
+        0x80, 0x16, 0xF7, 0x23,
+        /* x */
+        0x81, 0xAE, 0xE4, 0xBD, 0xD8, 0x2E, 0xD9, 0x64, 0x5A, 0x21, 0x32, 0x2E,
+        0x9C, 0x4C, 0x6A, 0x93, 0x85, 0xED, 0x9F, 0x70, 0xB5, 0xD9, 0x16, 0xC1,
+        0xB4, 0x3B, 0x62, 0xEE, 0xF4, 0xD0, 0x09, 0x8E, 0xFF, 0x3B, 0x1F, 0x78,
+        0xE2, 0xD0, 0xD4, 0x8D, 0x50, 0xD1, 0x68, 0x7B, 0x93, 0xB9, 0x7D, 0x5F,
+        0x7C, 0x6D, 0x50, 0x47, 0x40, 0x6A, 0x5E, 0x68, 0x8B, 0x35, 0x22, 0x09,
+        0xBC, 0xB9, 0xF8, 0x22,
+        /* y */
+        0x7D, 0xDE, 0x38, 0x5D, 0x56, 0x63, 0x32, 0xEC, 0xC0, 0xEA, 0xBF, 0xA9,
+        0xCF, 0x78, 0x22, 0xFD, 0xF2, 0x09, 0xF7, 0x00, 0x24, 0xA5, 0x7B, 0x1A,
+        0xA0, 0x00, 0xC5, 0x5B, 0x88, 0x1F, 0x81, 0x11, 0xB2, 0xDC, 0xDE, 0x49,
+        0x4A, 0x5F, 0x48, 0x5E, 0x5B, 0xCA, 0x4B, 0xD8, 0x8A, 0x27, 0x63, 0xAE,
+        0xD1, 0xCA, 0x2B, 0x2F, 0xA8, 0xF0, 0x54, 0x06, 0x78, 0xCD, 0x1E, 0x0F,
+        0x3A, 0xD8, 0x08, 0x92,
+        /* order */
+        0xAA, 0xDD, 0x9D, 0xB8, 0xDB, 0xE9, 0xC4, 0x8B, 0x3F, 0xD4, 0xE6, 0xAE,
+        0x33, 0xC9, 0xFC, 0x07, 0xCB, 0x30, 0x8D, 0xB3, 0xB3, 0xC9, 0xD2, 0x0E,
+        0xD6, 0x63, 0x9C, 0xCA, 0x70, 0x33, 0x08, 0x70, 0x55, 0x3E, 0x5C, 0x41,
+        0x4C, 0xA9, 0x26, 0x19, 0x41, 0x86, 0x61, 0x19, 0x7F, 0xAC, 0x10, 0x47,
+        0x1D, 0xB1, 0xD3, 0x81, 0x08, 0x5D, 0xDA, 0xDD, 0xB5, 0x87, 0x96, 0x82,
+        0x9C, 0xA9, 0x00, 0x69
+    }
+};
+
+static const struct {
+    EC_CURVE_DATA h;
+    unsigned char data[0 + 64 * 6];
+} _EC_brainpoolP512t1 = {
+    {
+        NID_X9_62_prime_field, 0, 64, 1
+    },
+    {
+        /* no seed */
+        /* p */
+        0xAA, 0xDD, 0x9D, 0xB8, 0xDB, 0xE9, 0xC4, 0x8B, 0x3F, 0xD4, 0xE6, 0xAE,
+        0x33, 0xC9, 0xFC, 0x07, 0xCB, 0x30, 0x8D, 0xB3, 0xB3, 0xC9, 0xD2, 0x0E,
+        0xD6, 0x63, 0x9C, 0xCA, 0x70, 0x33, 0x08, 0x71, 0x7D, 0x4D, 0x9B, 0x00,
+        0x9B, 0xC6, 0x68, 0x42, 0xAE, 0xCD, 0xA1, 0x2A, 0xE6, 0xA3, 0x80, 0xE6,
+        0x28, 0x81, 0xFF, 0x2F, 0x2D, 0x82, 0xC6, 0x85, 0x28, 0xAA, 0x60, 0x56,
+        0x58, 0x3A, 0x48, 0xF3,
+        /* a */
+        0xAA, 0xDD, 0x9D, 0xB8, 0xDB, 0xE9, 0xC4, 0x8B, 0x3F, 0xD4, 0xE6, 0xAE,
+        0x33, 0xC9, 0xFC, 0x07, 0xCB, 0x30, 0x8D, 0xB3, 0xB3, 0xC9, 0xD2, 0x0E,
+        0xD6, 0x63, 0x9C, 0xCA, 0x70, 0x33, 0x08, 0x71, 0x7D, 0x4D, 0x9B, 0x00,
+        0x9B, 0xC6, 0x68, 0x42, 0xAE, 0xCD, 0xA1, 0x2A, 0xE6, 0xA3, 0x80, 0xE6,
+        0x28, 0x81, 0xFF, 0x2F, 0x2D, 0x82, 0xC6, 0x85, 0x28, 0xAA, 0x60, 0x56,
+        0x58, 0x3A, 0x48, 0xF0,
+        /* b */
+        0x7C, 0xBB, 0xBC, 0xF9, 0x44, 0x1C, 0xFA, 0xB7, 0x6E, 0x18, 0x90, 0xE4,
+        0x68, 0x84, 0xEA, 0xE3, 0x21, 0xF7, 0x0C, 0x0B, 0xCB, 0x49, 0x81, 0x52,
+        0x78, 0x97, 0x50, 0x4B, 0xEC, 0x3E, 0x36, 0xA6, 0x2B, 0xCD, 0xFA, 0x23,
+        0x04, 0x97, 0x65, 0x40, 0xF6, 0x45, 0x00, 0x85, 0xF2, 0xDA, 0xE1, 0x45,
+        0xC2, 0x25, 0x53, 0xB4, 0x65, 0x76, 0x36, 0x89, 0x18, 0x0E, 0xA2, 0x57,
+        0x18, 0x67, 0x42, 0x3E,
+        /* x */
+        0x64, 0x0E, 0xCE, 0x5C, 0x12, 0x78, 0x87, 0x17, 0xB9, 0xC1, 0xBA, 0x06,
+        0xCB, 0xC2, 0xA6, 0xFE, 0xBA, 0x85, 0x84, 0x24, 0x58, 0xC5, 0x6D, 0xDE,
+        0x9D, 0xB1, 0x75, 0x8D, 0x39, 0xC0, 0x31, 0x3D, 0x82, 0xBA, 0x51, 0x73,
+        0x5C, 0xDB, 0x3E, 0xA4, 0x99, 0xAA, 0x77, 0xA7, 0xD6, 0x94, 0x3A, 0x64,
+        0xF7, 0xA3, 0xF2, 0x5F, 0xE2, 0x6F, 0x06, 0xB5, 0x1B, 0xAA, 0x26, 0x96,
+        0xFA, 0x90, 0x35, 0xDA,
+        /* y */
+        0x5B, 0x53, 0x4B, 0xD5, 0x95, 0xF5, 0xAF, 0x0F, 0xA2, 0xC8, 0x92, 0x37,
+        0x6C, 0x84, 0xAC, 0xE1, 0xBB, 0x4E, 0x30, 0x19, 0xB7, 0x16, 0x34, 0xC0,
+        0x11, 0x31, 0x15, 0x9C, 0xAE, 0x03, 0xCE, 0xE9, 0xD9, 0x93, 0x21, 0x84,
+        0xBE, 0xEF, 0x21, 0x6B, 0xD7, 0x1D, 0xF2, 0xDA, 0xDF, 0x86, 0xA6, 0x27,
+        0x30, 0x6E, 0xCF, 0xF9, 0x6D, 0xBB, 0x8B, 0xAC, 0xE1, 0x98, 0xB6, 0x1E,
+        0x00, 0xF8, 0xB3, 0x32,
+        /* order */
+        0xAA, 0xDD, 0x9D, 0xB8, 0xDB, 0xE9, 0xC4, 0x8B, 0x3F, 0xD4, 0xE6, 0xAE,
+        0x33, 0xC9, 0xFC, 0x07, 0xCB, 0x30, 0x8D, 0xB3, 0xB3, 0xC9, 0xD2, 0x0E,
+        0xD6, 0x63, 0x9C, 0xCA, 0x70, 0x33, 0x08, 0x70, 0x55, 0x3E, 0x5C, 0x41,
+        0x4C, 0xA9, 0x26, 0x19, 0x41, 0x86, 0x61, 0x19, 0x7F, 0xAC, 0x10, 0x47,
+        0x1D, 0xB1, 0xD3, 0x81, 0x08, 0x5D, 0xDA, 0xDD, 0xB5, 0x87, 0x96, 0x82,
+        0x9C, 0xA9, 0x00, 0x69
+    }
+};
+
 typedef struct _ec_list_element_st {
     int nid;
     const EC_CURVE_DATA *data;
@@ -2343,13 +2896,15 @@ static const ec_list_element curve_list[] = {
      "X9.62 curve over a 239 bit prime field"},
     {NID_X9_62_prime239v3, &_EC_X9_62_PRIME_239V3.h, 0,
      "X9.62 curve over a 239 bit prime field"},
-#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
-    {NID_X9_62_prime256v1, &_EC_X9_62_PRIME_256V1.h, EC_GFp_nistp256_method,
-     "X9.62/SECG curve over a 256 bit prime field"},
+    {NID_X9_62_prime256v1, &_EC_X9_62_PRIME_256V1.h,
+#if defined(ECP_NISTZ256_ASM)
+     EC_GFp_nistz256_method,
+#elif !defined(OPENSSL_NO_EC_NISTP_64_GCC_128)
+     EC_GFp_nistp256_method,
 #else
-    {NID_X9_62_prime256v1, &_EC_X9_62_PRIME_256V1.h, 0,
-     "X9.62/SECG curve over a 256 bit prime field"},
+     0,
 #endif
+     "X9.62/SECG curve over a 256 bit prime field"},
 #ifndef OPENSSL_NO_EC2M
     /* characteristic two field curves */
     /* NIST/SECG curves */
@@ -2460,6 +3015,35 @@ static const ec_list_element curve_list[] = {
      "\n\tIPSec/IKE/Oakley curve #4 over a 185 bit binary field.\n"
      "\tNot suitable for ECDSA.\n\tQuestionable extension field!"},
 #endif
+    /* brainpool curves */
+    {NID_brainpoolP160r1, &_EC_brainpoolP160r1.h, 0,
+     "RFC 5639 curve over a 160 bit prime field"},
+    {NID_brainpoolP160t1, &_EC_brainpoolP160t1.h, 0,
+     "RFC 5639 curve over a 160 bit prime field"},
+    {NID_brainpoolP192r1, &_EC_brainpoolP192r1.h, 0,
+     "RFC 5639 curve over a 192 bit prime field"},
+    {NID_brainpoolP192t1, &_EC_brainpoolP192t1.h, 0,
+     "RFC 5639 curve over a 192 bit prime field"},
+    {NID_brainpoolP224r1, &_EC_brainpoolP224r1.h, 0,
+     "RFC 5639 curve over a 224 bit prime field"},
+    {NID_brainpoolP224t1, &_EC_brainpoolP224t1.h, 0,
+     "RFC 5639 curve over a 224 bit prime field"},
+    {NID_brainpoolP256r1, &_EC_brainpoolP256r1.h, 0,
+     "RFC 5639 curve over a 256 bit prime field"},
+    {NID_brainpoolP256t1, &_EC_brainpoolP256t1.h, 0,
+     "RFC 5639 curve over a 256 bit prime field"},
+    {NID_brainpoolP320r1, &_EC_brainpoolP320r1.h, 0,
+     "RFC 5639 curve over a 320 bit prime field"},
+    {NID_brainpoolP320t1, &_EC_brainpoolP320t1.h, 0,
+     "RFC 5639 curve over a 320 bit prime field"},
+    {NID_brainpoolP384r1, &_EC_brainpoolP384r1.h, 0,
+     "RFC 5639 curve over a 384 bit prime field"},
+    {NID_brainpoolP384t1, &_EC_brainpoolP384t1.h, 0,
+     "RFC 5639 curve over a 384 bit prime field"},
+    {NID_brainpoolP512r1, &_EC_brainpoolP512r1.h, 0,
+     "RFC 5639 curve over a 512 bit prime field"},
+    {NID_brainpoolP512t1, &_EC_brainpoolP512t1.h, 0,
+     "RFC 5639 curve over a 512 bit prime field"},
 };
 
 #define curve_list_length (sizeof(curve_list)/sizeof(ec_list_element))
@@ -2578,6 +3162,10 @@ EC_GROUP *EC_GROUP_new_by_curve_name(int nid)
     size_t i;
     EC_GROUP *ret = NULL;
 
+#ifdef OPENSSL_FIPS
+    if (FIPS_mode())
+        return FIPS_ec_group_new_by_curve_name(nid);
+#endif
     if (nid <= 0)
         return NULL;
 
@@ -2613,3 +3201,48 @@ size_t EC_get_builtin_curves(EC_builtin_curve *r, size_t nitems)
 
     return curve_list_length;
 }
+
+/* Functions to translate between common NIST curve names and NIDs */
+
+typedef struct {
+    const char *name;           /* NIST Name of curve */
+    int nid;                    /* Curve NID */
+} EC_NIST_NAME;
+
+static EC_NIST_NAME nist_curves[] = {
+    {"B-163", NID_sect163r2},
+    {"B-233", NID_sect233r1},
+    {"B-283", NID_sect283r1},
+    {"B-409", NID_sect409r1},
+    {"B-571", NID_sect571r1},
+    {"K-163", NID_sect163k1},
+    {"K-233", NID_sect233k1},
+    {"K-283", NID_sect283k1},
+    {"K-409", NID_sect409k1},
+    {"K-571", NID_sect571k1},
+    {"P-192", NID_X9_62_prime192v1},
+    {"P-224", NID_secp224r1},
+    {"P-256", NID_X9_62_prime256v1},
+    {"P-384", NID_secp384r1},
+    {"P-521", NID_secp521r1}
+};
+
+const char *EC_curve_nid2nist(int nid)
+{
+    size_t i;
+    for (i = 0; i < sizeof(nist_curves) / sizeof(EC_NIST_NAME); i++) {
+        if (nist_curves[i].nid == nid)
+            return nist_curves[i].name;
+    }
+    return NULL;
+}
+
+int EC_curve_nist2nid(const char *name)
+{
+    size_t i;
+    for (i = 0; i < sizeof(nist_curves) / sizeof(EC_NIST_NAME); i++) {
+        if (!strcmp(nist_curves[i].name, name))
+            return nist_curves[i].nid;
+    }
+    return NID_undef;
+}
index 487d727..5a832ba 100644 (file)
 #include <openssl/err.h>
 #include "ec_lcl.h"
 
+#ifdef OPENSSL_FIPS
+# include <openssl/fips.h>
+#endif
+
 EC_GROUP *EC_GROUP_new_curve_GFp(const BIGNUM *p, const BIGNUM *a,
                                  const BIGNUM *b, BN_CTX *ctx)
 {
     const EC_METHOD *meth;
     EC_GROUP *ret;
 
+#ifdef OPENSSL_FIPS
+    if (FIPS_mode())
+        return FIPS_ec_group_new_curve_gfp(p, a, b, ctx);
+#endif
 #if defined(OPENSSL_BN_ASM_MONT)
     /*
      * This might appear controversial, but the fact is that generic
@@ -152,6 +160,10 @@ EC_GROUP *EC_GROUP_new_curve_GF2m(const BIGNUM *p, const BIGNUM *a,
     const EC_METHOD *meth;
     EC_GROUP *ret;
 
+# ifdef OPENSSL_FIPS
+    if (FIPS_mode())
+        return FIPS_ec_group_new_curve_gf2m(p, a, b, ctx);
+# endif
     meth = EC_GF2m_simple_method();
 
     ret = EC_GROUP_new(meth);
index 58eae7c..6fe5baa 100644 (file)
@@ -1,6 +1,6 @@
 /* crypto/ec/ec_err.c */
 /* ====================================================================
- * Copyright (c) 1999-2011 The OpenSSL Project.  All rights reserved.
+ * Copyright (c) 1999-2015 The OpenSSL Project.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -76,6 +76,8 @@ static ERR_STRING_DATA EC_str_functs[] = {
     {ERR_FUNC(EC_F_D2I_ECPKPARAMETERS), "d2i_ECPKParameters"},
     {ERR_FUNC(EC_F_D2I_ECPRIVATEKEY), "d2i_ECPrivateKey"},
     {ERR_FUNC(EC_F_DO_EC_KEY_PRINT), "DO_EC_KEY_PRINT"},
+    {ERR_FUNC(EC_F_ECDH_CMS_DECRYPT), "ECDH_CMS_DECRYPT"},
+    {ERR_FUNC(EC_F_ECDH_CMS_SET_SHARED_INFO), "ECDH_CMS_SET_SHARED_INFO"},
     {ERR_FUNC(EC_F_ECKEY_PARAM2TYPE), "ECKEY_PARAM2TYPE"},
     {ERR_FUNC(EC_F_ECKEY_PARAM_DECODE), "ECKEY_PARAM_DECODE"},
     {ERR_FUNC(EC_F_ECKEY_PRIV_DECODE), "ECKEY_PRIV_DECODE"},
@@ -87,6 +89,13 @@ static ERR_STRING_DATA EC_str_functs[] = {
     {ERR_FUNC(EC_F_ECPARAMETERS_PRINT_FP), "ECParameters_print_fp"},
     {ERR_FUNC(EC_F_ECPKPARAMETERS_PRINT), "ECPKParameters_print"},
     {ERR_FUNC(EC_F_ECPKPARAMETERS_PRINT_FP), "ECPKParameters_print_fp"},
+    {ERR_FUNC(EC_F_ECP_NISTZ256_GET_AFFINE), "ecp_nistz256_get_affine"},
+    {ERR_FUNC(EC_F_ECP_NISTZ256_MULT_PRECOMPUTE),
+     "ecp_nistz256_mult_precompute"},
+    {ERR_FUNC(EC_F_ECP_NISTZ256_POINTS_MUL), "ecp_nistz256_points_mul"},
+    {ERR_FUNC(EC_F_ECP_NISTZ256_PRE_COMP_NEW), "ecp_nistz256_pre_comp_new"},
+    {ERR_FUNC(EC_F_ECP_NISTZ256_SET_WORDS), "ecp_nistz256_set_words"},
+    {ERR_FUNC(EC_F_ECP_NISTZ256_WINDOWED_MUL), "ecp_nistz256_windowed_mul"},
     {ERR_FUNC(EC_F_ECP_NIST_MOD_192), "ECP_NIST_MOD_192"},
     {ERR_FUNC(EC_F_ECP_NIST_MOD_224), "ECP_NIST_MOD_224"},
     {ERR_FUNC(EC_F_ECP_NIST_MOD_256), "ECP_NIST_MOD_256"},
@@ -271,6 +280,7 @@ static ERR_STRING_DATA EC_str_reasons[] = {
     {ERR_REASON(EC_R_INVALID_COMPRESSED_POINT), "invalid compressed point"},
     {ERR_REASON(EC_R_INVALID_COMPRESSION_BIT), "invalid compression bit"},
     {ERR_REASON(EC_R_INVALID_CURVE), "invalid curve"},
+    {ERR_REASON(EC_R_INVALID_DIGEST), "invalid digest"},
     {ERR_REASON(EC_R_INVALID_DIGEST_TYPE), "invalid digest type"},
     {ERR_REASON(EC_R_INVALID_ENCODING), "invalid encoding"},
     {ERR_REASON(EC_R_INVALID_FIELD), "invalid field"},
@@ -279,6 +289,7 @@ static ERR_STRING_DATA EC_str_reasons[] = {
     {ERR_REASON(EC_R_INVALID_PENTANOMIAL_BASIS), "invalid pentanomial basis"},
     {ERR_REASON(EC_R_INVALID_PRIVATE_KEY), "invalid private key"},
     {ERR_REASON(EC_R_INVALID_TRINOMIAL_BASIS), "invalid trinomial basis"},
+    {ERR_REASON(EC_R_KDF_PARAMETER_ERROR), "kdf parameter error"},
     {ERR_REASON(EC_R_KEYS_NOT_SET), "keys not set"},
     {ERR_REASON(EC_R_MISSING_PARAMETERS), "missing parameters"},
     {ERR_REASON(EC_R_MISSING_PRIVATE_KEY), "missing private key"},
@@ -290,10 +301,12 @@ static ERR_STRING_DATA EC_str_reasons[] = {
     {ERR_REASON(EC_R_NO_FIELD_MOD), "no field mod"},
     {ERR_REASON(EC_R_NO_PARAMETERS_SET), "no parameters set"},
     {ERR_REASON(EC_R_PASSED_NULL_PARAMETER), "passed null parameter"},
+    {ERR_REASON(EC_R_PEER_KEY_ERROR), "peer key error"},
     {ERR_REASON(EC_R_PKPARAMETERS2GROUP_FAILURE),
      "pkparameters2group failure"},
     {ERR_REASON(EC_R_POINT_AT_INFINITY), "point at infinity"},
     {ERR_REASON(EC_R_POINT_IS_NOT_ON_CURVE), "point is not on curve"},
+    {ERR_REASON(EC_R_SHARED_INFO_ERROR), "shared info error"},
     {ERR_REASON(EC_R_SLOT_FULL), "slot full"},
     {ERR_REASON(EC_R_UNDEFINED_GENERATOR), "undefined generator"},
     {ERR_REASON(EC_R_UNDEFINED_ORDER), "undefined order"},
index d79ed1e..969fd14 100644 (file)
@@ -212,6 +212,13 @@ struct ec_group_st {
     BIGNUM order, cofactor;
     int curve_name;             /* optional NID for named curve */
     int asn1_flag;              /* flag to control the asn1 encoding */
+    /*
+     * Kludge: upper bit of ans1_flag is used to denote structure
+     * version. Is set, then last field is present. This is done
+     * for interoperation with FIPS code.
+     */
+#define EC_GROUP_ASN1_FLAG_MASK 0x7fffffff
+#define EC_GROUP_VERSION(p) (p->asn1_flag&~EC_GROUP_ASN1_FLAG_MASK)
     point_conversion_form_t asn1_form;
     unsigned char *seed;        /* optional seed for parameters (appears in
                                  * ASN1) */
@@ -252,6 +259,7 @@ struct ec_group_st {
     /* method-specific */
     int (*field_mod_func) (BIGNUM *, const BIGNUM *, const BIGNUM *,
                            BN_CTX *);
+    BN_MONT_CTX *mont_data;     /* data for ECDSA inverse */
 } /* EC_GROUP */ ;
 
 struct ec_key_st {
@@ -541,3 +549,20 @@ void ec_GFp_nistp_points_make_affine_internal(size_t num, void *point_array,
 void ec_GFp_nistp_recode_scalar_bits(unsigned char *sign,
                                      unsigned char *digit, unsigned char in);
 #endif
+int ec_precompute_mont_data(EC_GROUP *);
+
+#ifdef ECP_NISTZ256_ASM
+/** Returns GFp methods using montgomery multiplication, with x86-64 optimized
+ * P256. See http://eprint.iacr.org/2013/816.
+ *  \return  EC_METHOD object
+ */
+const EC_METHOD *EC_GFp_nistz256_method(void);
+#endif
+
+#ifdef OPENSSL_FIPS
+EC_GROUP *FIPS_ec_group_new_curve_gfp(const BIGNUM *p, const BIGNUM *a,
+                                      const BIGNUM *b, BN_CTX *ctx);
+EC_GROUP *FIPS_ec_group_new_curve_gf2m(const BIGNUM *p, const BIGNUM *a,
+                                       const BIGNUM *b, BN_CTX *ctx);
+EC_GROUP *FIPS_ec_group_new_by_curve_name(int nid);
+#endif
index e227520..3ffa112 100644 (file)
@@ -94,13 +94,14 @@ EC_GROUP *EC_GROUP_new(const EC_METHOD *meth)
     ret->meth = meth;
 
     ret->extra_data = NULL;
+    ret->mont_data = NULL;
 
     ret->generator = NULL;
     BN_init(&ret->order);
     BN_init(&ret->cofactor);
 
     ret->curve_name = 0;
-    ret->asn1_flag = 0;
+    ret->asn1_flag = ~EC_GROUP_ASN1_FLAG_MASK;
     ret->asn1_form = POINT_CONVERSION_UNCOMPRESSED;
 
     ret->seed = NULL;
@@ -124,6 +125,9 @@ void EC_GROUP_free(EC_GROUP *group)
 
     EC_EX_DATA_free_all_data(&group->extra_data);
 
+    if (EC_GROUP_VERSION(group) && group->mont_data)
+        BN_MONT_CTX_free(group->mont_data);
+
     if (group->generator != NULL)
         EC_POINT_free(group->generator);
     BN_free(&group->order);
@@ -147,6 +151,9 @@ void EC_GROUP_clear_free(EC_GROUP *group)
 
     EC_EX_DATA_clear_free_all_data(&group->extra_data);
 
+    if (EC_GROUP_VERSION(group) && group->mont_data)
+        BN_MONT_CTX_free(group->mont_data);
+
     if (group->generator != NULL)
         EC_POINT_clear_free(group->generator);
     BN_clear_free(&group->order);
@@ -189,6 +196,22 @@ int EC_GROUP_copy(EC_GROUP *dest, const EC_GROUP *src)
             return 0;
     }
 
+    if (EC_GROUP_VERSION(src) && src->mont_data != NULL) {
+        if (dest->mont_data == NULL) {
+            dest->mont_data = BN_MONT_CTX_new();
+            if (dest->mont_data == NULL)
+                return 0;
+        }
+        if (!BN_MONT_CTX_copy(dest->mont_data, src->mont_data))
+            return 0;
+    } else {
+        /* src->generator == NULL */
+        if (EC_GROUP_VERSION(dest) && dest->mont_data != NULL) {
+            BN_MONT_CTX_free(dest->mont_data);
+            dest->mont_data = NULL;
+        }
+    }
+
     if (src->generator != NULL) {
         if (dest->generator == NULL) {
             dest->generator = EC_POINT_new(dest);
@@ -295,6 +318,13 @@ int EC_GROUP_set_generator(EC_GROUP *group, const EC_POINT *generator,
     } else
         BN_zero(&group->cofactor);
 
+    /*
+     * We ignore the return value because some groups have an order with
+     * factors of two, which makes the Montgomery setup fail.
+     * |group->mont_data| will be NULL in this case.
+     */
+    ec_precompute_mont_data(group);
+
     return 1;
 }
 
@@ -303,6 +333,11 @@ const EC_POINT *EC_GROUP_get0_generator(const EC_GROUP *group)
     return group->generator;
 }
 
+BN_MONT_CTX *EC_GROUP_get_mont_data(const EC_GROUP *group)
+{
+    return EC_GROUP_VERSION(group) ? group->mont_data : NULL;
+}
+
 int EC_GROUP_get_order(const EC_GROUP *group, BIGNUM *order, BN_CTX *ctx)
 {
     if (!BN_copy(order, &group->order))
@@ -332,12 +367,13 @@ int EC_GROUP_get_curve_name(const EC_GROUP *group)
 
 void EC_GROUP_set_asn1_flag(EC_GROUP *group, int flag)
 {
-    group->asn1_flag = flag;
+    group->asn1_flag &= ~EC_GROUP_ASN1_FLAG_MASK;
+    group->asn1_flag |= flag & EC_GROUP_ASN1_FLAG_MASK;
 }
 
 int EC_GROUP_get_asn1_flag(const EC_GROUP *group)
 {
-    return group->asn1_flag;
+    return group->asn1_flag & EC_GROUP_ASN1_FLAG_MASK;
 }
 
 void EC_GROUP_set_point_conversion_form(EC_GROUP *group,
@@ -1057,3 +1093,42 @@ int EC_GROUP_have_precompute_mult(const EC_GROUP *group)
         return 0;               /* cannot tell whether precomputation has
                                  * been performed */
 }
+
+/*
+ * ec_precompute_mont_data sets |group->mont_data| from |group->order| and
+ * returns one on success. On error it returns zero.
+ */
+int ec_precompute_mont_data(EC_GROUP *group)
+{
+    BN_CTX *ctx = BN_CTX_new();
+    int ret = 0;
+
+    if (!EC_GROUP_VERSION(group))
+        goto err;
+
+    if (group->mont_data) {
+        BN_MONT_CTX_free(group->mont_data);
+        group->mont_data = NULL;
+    }
+
+    if (ctx == NULL)
+        goto err;
+
+    group->mont_data = BN_MONT_CTX_new();
+    if (!group->mont_data)
+        goto err;
+
+    if (!BN_MONT_CTX_set(group->mont_data, &group->order, ctx)) {
+        BN_MONT_CTX_free(group->mont_data);
+        group->mont_data = NULL;
+        goto err;
+    }
+
+    ret = 1;
+
+ err:
+
+    if (ctx)
+        BN_CTX_free(ctx);
+    return ret;
+}
index c189d3f..b767490 100644 (file)
@@ -61,6 +61,7 @@
 #include <openssl/asn1t.h>
 #include <openssl/x509.h>
 #include <openssl/ec.h>
+#include "ec_lcl.h"
 #include <openssl/ecdsa.h>
 #include <openssl/evp.h>
 #include "evp_locl.h"
@@ -72,6 +73,19 @@ typedef struct {
     EC_GROUP *gen_group;
     /* message digest */
     const EVP_MD *md;
+    /* Duplicate key if custom cofactor needed */
+    EC_KEY *co_key;
+    /* Cofactor mode */
+    signed char cofactor_mode;
+    /* KDF (if any) to use for ECDH */
+    char kdf_type;
+    /* Message digest to use for key derivation */
+    const EVP_MD *kdf_md;
+    /* User key material */
+    unsigned char *kdf_ukm;
+    size_t kdf_ukmlen;
+    /* KDF output length */
+    size_t kdf_outlen;
 } EC_PKEY_CTX;
 
 static int pkey_ec_init(EVP_PKEY_CTX *ctx)
@@ -83,6 +97,14 @@ static int pkey_ec_init(EVP_PKEY_CTX *ctx)
     dctx->gen_group = NULL;
     dctx->md = NULL;
 
+    dctx->cofactor_mode = -1;
+    dctx->co_key = NULL;
+    dctx->kdf_type = EVP_PKEY_ECDH_KDF_NONE;
+    dctx->kdf_md = NULL;
+    dctx->kdf_outlen = 0;
+    dctx->kdf_ukm = NULL;
+    dctx->kdf_ukmlen = 0;
+
     ctx->data = dctx;
 
     return 1;
@@ -101,6 +123,22 @@ static int pkey_ec_copy(EVP_PKEY_CTX *dst, EVP_PKEY_CTX *src)
             return 0;
     }
     dctx->md = sctx->md;
+
+    if (sctx->co_key) {
+        dctx->co_key = EC_KEY_dup(sctx->co_key);
+        if (!dctx->co_key)
+            return 0;
+    }
+    dctx->kdf_type = sctx->kdf_type;
+    dctx->kdf_md = sctx->kdf_md;
+    dctx->kdf_outlen = sctx->kdf_outlen;
+    if (sctx->kdf_ukm) {
+        dctx->kdf_ukm = BUF_memdup(sctx->kdf_ukm, sctx->kdf_ukmlen);
+        if (!dctx->kdf_ukm)
+            return 0;
+    } else
+        dctx->kdf_ukm = NULL;
+    dctx->kdf_ukmlen = sctx->kdf_ukmlen;
     return 1;
 }
 
@@ -110,6 +148,10 @@ static void pkey_ec_cleanup(EVP_PKEY_CTX *ctx)
     if (dctx) {
         if (dctx->gen_group)
             EC_GROUP_free(dctx->gen_group);
+        if (dctx->co_key)
+            EC_KEY_free(dctx->co_key);
+        if (dctx->kdf_ukm)
+            OPENSSL_free(dctx->kdf_ukm);
         OPENSSL_free(dctx);
     }
 }
@@ -168,18 +210,21 @@ static int pkey_ec_derive(EVP_PKEY_CTX *ctx, unsigned char *key,
     int ret;
     size_t outlen;
     const EC_POINT *pubkey = NULL;
+    EC_KEY *eckey;
+    EC_PKEY_CTX *dctx = ctx->data;
     if (!ctx->pkey || !ctx->peerkey) {
         ECerr(EC_F_PKEY_EC_DERIVE, EC_R_KEYS_NOT_SET);
         return 0;
     }
 
+    eckey = dctx->co_key ? dctx->co_key : ctx->pkey->pkey.ec;
+
     if (!key) {
         const EC_GROUP *group;
-        group = EC_KEY_get0_group(ctx->pkey->pkey.ec);
+        group = EC_KEY_get0_group(eckey);
         *keylen = (EC_GROUP_get_degree(group) + 7) / 8;
         return 1;
     }
-
     pubkey = EC_KEY_get0_public_key(ctx->peerkey->pkey.ec);
 
     /*
@@ -189,12 +234,48 @@ static int pkey_ec_derive(EVP_PKEY_CTX *ctx, unsigned char *key,
 
     outlen = *keylen;
 
-    ret = ECDH_compute_key(key, outlen, pubkey, ctx->pkey->pkey.ec, 0);
-    if (ret < 0)
-        return ret;
+    ret = ECDH_compute_key(key, outlen, pubkey, eckey, 0);
+    if (ret <= 0)
+        return 0;
     *keylen = ret;
     return 1;
 }
+
+static int pkey_ec_kdf_derive(EVP_PKEY_CTX *ctx,
+                              unsigned char *key, size_t *keylen)
+{
+    EC_PKEY_CTX *dctx = ctx->data;
+    unsigned char *ktmp = NULL;
+    size_t ktmplen;
+    int rv = 0;
+    if (dctx->kdf_type == EVP_PKEY_ECDH_KDF_NONE)
+        return pkey_ec_derive(ctx, key, keylen);
+    if (!key) {
+        *keylen = dctx->kdf_outlen;
+        return 1;
+    }
+    if (*keylen != dctx->kdf_outlen)
+        return 0;
+    if (!pkey_ec_derive(ctx, NULL, &ktmplen))
+        return 0;
+    ktmp = OPENSSL_malloc(ktmplen);
+    if (!ktmp)
+        return 0;
+    if (!pkey_ec_derive(ctx, ktmp, &ktmplen))
+        goto err;
+    /* Do KDF stuff */
+    if (!ECDH_KDF_X9_62(key, *keylen, ktmp, ktmplen,
+                        dctx->kdf_ukm, dctx->kdf_ukmlen, dctx->kdf_md))
+        goto err;
+    rv = 1;
+
+ err:
+    if (ktmp) {
+        OPENSSL_cleanse(ktmp, ktmplen);
+        OPENSSL_free(ktmp);
+    }
+    return rv;
+}
 #endif
 
 static int pkey_ec_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2)
@@ -213,6 +294,90 @@ static int pkey_ec_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2)
         dctx->gen_group = group;
         return 1;
 
+    case EVP_PKEY_CTRL_EC_PARAM_ENC:
+        if (!dctx->gen_group) {
+            ECerr(EC_F_PKEY_EC_CTRL, EC_R_NO_PARAMETERS_SET);
+            return 0;
+        }
+        EC_GROUP_set_asn1_flag(dctx->gen_group, p1);
+        return 1;
+
+#ifndef OPENSSL_NO_ECDH
+    case EVP_PKEY_CTRL_EC_ECDH_COFACTOR:
+        if (p1 == -2) {
+            if (dctx->cofactor_mode != -1)
+                return dctx->cofactor_mode;
+            else {
+                EC_KEY *ec_key = ctx->pkey->pkey.ec;
+                return EC_KEY_get_flags(ec_key) & EC_FLAG_COFACTOR_ECDH ? 1 :
+                    0;
+            }
+        } else if (p1 < -1 || p1 > 1)
+            return -2;
+        dctx->cofactor_mode = p1;
+        if (p1 != -1) {
+            EC_KEY *ec_key = ctx->pkey->pkey.ec;
+            if (!ec_key->group)
+                return -2;
+            /* If cofactor is 1 cofactor mode does nothing */
+            if (BN_is_one(&ec_key->group->cofactor))
+                return 1;
+            if (!dctx->co_key) {
+                dctx->co_key = EC_KEY_dup(ec_key);
+                if (!dctx->co_key)
+                    return 0;
+            }
+            if (p1)
+                EC_KEY_set_flags(dctx->co_key, EC_FLAG_COFACTOR_ECDH);
+            else
+                EC_KEY_clear_flags(dctx->co_key, EC_FLAG_COFACTOR_ECDH);
+        } else if (dctx->co_key) {
+            EC_KEY_free(dctx->co_key);
+            dctx->co_key = NULL;
+        }
+        return 1;
+#endif
+
+    case EVP_PKEY_CTRL_EC_KDF_TYPE:
+        if (p1 == -2)
+            return dctx->kdf_type;
+        if (p1 != EVP_PKEY_ECDH_KDF_NONE && p1 != EVP_PKEY_ECDH_KDF_X9_62)
+            return -2;
+        dctx->kdf_type = p1;
+        return 1;
+
+    case EVP_PKEY_CTRL_EC_KDF_MD:
+        dctx->kdf_md = p2;
+        return 1;
+
+    case EVP_PKEY_CTRL_GET_EC_KDF_MD:
+        *(const EVP_MD **)p2 = dctx->kdf_md;
+        return 1;
+
+    case EVP_PKEY_CTRL_EC_KDF_OUTLEN:
+        if (p1 <= 0)
+            return -2;
+        dctx->kdf_outlen = (size_t)p1;
+        return 1;
+
+    case EVP_PKEY_CTRL_GET_EC_KDF_OUTLEN:
+        *(int *)p2 = dctx->kdf_outlen;
+        return 1;
+
+    case EVP_PKEY_CTRL_EC_KDF_UKM:
+        if (dctx->kdf_ukm)
+            OPENSSL_free(dctx->kdf_ukm);
+        dctx->kdf_ukm = p2;
+        if (p2)
+            dctx->kdf_ukmlen = p1;
+        else
+            dctx->kdf_ukmlen = 0;
+        return 1;
+
+    case EVP_PKEY_CTRL_GET_EC_KDF_UKM:
+        *(unsigned char **)p2 = dctx->kdf_ukm;
+        return dctx->kdf_ukmlen;
+
     case EVP_PKEY_CTRL_MD:
         if (EVP_MD_type((const EVP_MD *)p2) != NID_sha1 &&
             EVP_MD_type((const EVP_MD *)p2) != NID_ecdsa_with_SHA1 &&
@@ -226,6 +391,10 @@ static int pkey_ec_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2)
         dctx->md = p2;
         return 1;
 
+    case EVP_PKEY_CTRL_GET_MD:
+        *(const EVP_MD **)p2 = dctx->md;
+        return 1;
+
     case EVP_PKEY_CTRL_PEER_KEY:
         /* Default behaviour is OK */
     case EVP_PKEY_CTRL_DIGESTINIT:
@@ -244,7 +413,9 @@ static int pkey_ec_ctrl_str(EVP_PKEY_CTX *ctx,
 {
     if (!strcmp(type, "ec_paramgen_curve")) {
         int nid;
-        nid = OBJ_sn2nid(value);
+        nid = EC_curve_nist2nid(value);
+        if (nid == NID_undef)
+            nid = OBJ_sn2nid(value);
         if (nid == NID_undef)
             nid = OBJ_ln2nid(value);
         if (nid == NID_undef) {
@@ -252,7 +423,28 @@ static int pkey_ec_ctrl_str(EVP_PKEY_CTX *ctx,
             return 0;
         }
         return EVP_PKEY_CTX_set_ec_paramgen_curve_nid(ctx, nid);
+    } else if (!strcmp(type, "ec_param_enc")) {
+        int param_enc;
+        if (!strcmp(value, "explicit"))
+            param_enc = 0;
+        else if (!strcmp(value, "named_curve"))
+            param_enc = OPENSSL_EC_NAMED_CURVE;
+        else
+            return -2;
+        return EVP_PKEY_CTX_set_ec_param_enc(ctx, param_enc);
+    } else if (!strcmp(type, "ecdh_kdf_md")) {
+        const EVP_MD *md;
+        if (!(md = EVP_get_digestbyname(value))) {
+            ECerr(EC_F_PKEY_EC_CTRL_STR, EC_R_INVALID_DIGEST);
+            return 0;
+        }
+        return EVP_PKEY_CTX_set_ecdh_kdf_md(ctx, md);
+    } else if (!strcmp(type, "ecdh_cofactor_mode")) {
+        int co_mode;
+        co_mode = atoi(value);
+        return EVP_PKEY_CTX_set_ecdh_cofactor_mode(ctx, co_mode);
     }
+
     return -2;
 }
 
@@ -279,7 +471,8 @@ static int pkey_ec_paramgen(EVP_PKEY_CTX *ctx, EVP_PKEY *pkey)
 static int pkey_ec_keygen(EVP_PKEY_CTX *ctx, EVP_PKEY *pkey)
 {
     EC_KEY *ec = NULL;
-    if (ctx->pkey == NULL) {
+    EC_PKEY_CTX *dctx = ctx->data;
+    if (ctx->pkey == NULL && dctx->gen_group == NULL) {
         ECerr(EC_F_PKEY_EC_KEYGEN, EC_R_NO_PARAMETERS_SET);
         return 0;
     }
@@ -287,9 +480,14 @@ static int pkey_ec_keygen(EVP_PKEY_CTX *ctx, EVP_PKEY *pkey)
     if (!ec)
         return 0;
     EVP_PKEY_assign_EC_KEY(pkey, ec);
-    /* Note: if error return, pkey is freed by parent routine */
-    if (!EVP_PKEY_copy_parameters(pkey, ctx->pkey))
-        return 0;
+    if (ctx->pkey) {
+        /* Note: if error return, pkey is freed by parent routine */
+        if (!EVP_PKEY_copy_parameters(pkey, ctx->pkey))
+            return 0;
+    } else {
+        if (!EC_KEY_set_group(ec, dctx->gen_group))
+            return 0;
+    }
     return EC_KEY_generate_key(pkey->pkey.ec);
 }
 
@@ -322,7 +520,7 @@ const EVP_PKEY_METHOD ec_pkey_meth = {
 
     0,
 #ifndef OPENSSL_NO_ECDH
-    pkey_ec_derive,
+    pkey_ec_kdf_derive,
 #else
     0,
 #endif
index 5ef12ec..df9b37a 100644 (file)
@@ -171,6 +171,7 @@ int ECPKParameters_print(BIO *bp, const EC_GROUP *x, int off)
     if (EC_GROUP_get_asn1_flag(x)) {
         /* the curve parameter are given by an asn1 OID */
         int nid;
+        const char *nname;
 
         if (!BIO_indent(bp, off, 128))
             goto err;
@@ -183,6 +184,13 @@ int ECPKParameters_print(BIO *bp, const EC_GROUP *x, int off)
             goto err;
         if (BIO_printf(bp, "\n") <= 0)
             goto err;
+        nname = EC_curve_nid2nist(nid);
+        if (nname) {
+            if (!BIO_indent(bp, off, 128))
+                goto err;
+            if (BIO_printf(bp, "NIST CURVE: %s\n", nname) <= 0)
+                goto err;
+        }
     } else {
         /* explicit parameters */
         int is_char_two = 0;
index cc76345..360b9a3 100644 (file)
@@ -1282,11 +1282,11 @@ static void point_add(felem x3, felem y3, felem z3,
     felem_scalar128(tmp2, 2);
     /* tmp2[i] < 17*2^121 */
     felem_diff128(tmp, tmp2);
-    /*-
-     * tmp[i] < 2^127 - 2^69 + 17*2^122
-     *        = 2^126 - 2^122 - 2^6 - 2^2 - 1
-     *        < 2^127
-     */
+        /*-
+         * tmp[i] < 2^127 - 2^69 + 17*2^122
+         *        = 2^126 - 2^122 - 2^6 - 2^2 - 1
+         *        < 2^127
+         */
     felem_reduce(y_out, tmp);
 
     copy_conditional(x_out, x2, z1_is_zero);
diff --git a/crypto/ec/ecp_nistz256.c b/crypto/ec/ecp_nistz256.c
new file mode 100644 (file)
index 0000000..ca44d0a
--- /dev/null
@@ -0,0 +1,1521 @@
+/******************************************************************************
+ *                                                                            *
+ * Copyright 2014 Intel Corporation                                           *
+ *                                                                            *
+ * Licensed under the Apache License, Version 2.0 (the "License");            *
+ * you may not use this file except in compliance with the License.           *
+ * You may obtain a copy of the License at                                    *
+ *                                                                            *
+ *    http://www.apache.org/licenses/LICENSE-2.0                              *
+ *                                                                            *
+ * Unless required by applicable law or agreed to in writing, software        *
+ * distributed under the License is distributed on an "AS IS" BASIS,          *
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   *
+ * See the License for the specific language governing permissions and        *
+ * limitations under the License.                                             *
+ *                                                                            *
+ ******************************************************************************
+ *                                                                            *
+ * Developers and authors:                                                    *
+ * Shay Gueron (1, 2), and Vlad Krasnov (1)                                   *
+ * (1) Intel Corporation, Israel Development Center                           *
+ * (2) University of Haifa                                                    *
+ * Reference:                                                                 *
+ * S.Gueron and V.Krasnov, "Fast Prime Field Elliptic Curve Cryptography with *
+ *                          256 Bit Primes"                                   *
+ *                                                                            *
+ ******************************************************************************/
+
+#include <string.h>
+
+#include <openssl/bn.h>
+#include <openssl/err.h>
+#include <openssl/ec.h>
+#include "cryptlib.h"
+
+#include "ec_lcl.h"
+
+#if BN_BITS2 != 64
+# define TOBN(hi,lo)    lo,hi
+#else
+# define TOBN(hi,lo)    ((BN_ULONG)hi<<32|lo)
+#endif
+
+#if defined(__GNUC__)
+# define ALIGN32        __attribute((aligned(32)))
+#elif defined(_MSC_VER)
+# define ALIGN32        __declspec(align(32))
+#else
+# define ALIGN32
+#endif
+
+#define ALIGNPTR(p,N)   ((unsigned char *)p+N-(size_t)p%N)
+#define P256_LIMBS      (256/BN_BITS2)
+
+typedef unsigned short u16;
+
+typedef struct {
+    BN_ULONG X[P256_LIMBS];
+    BN_ULONG Y[P256_LIMBS];
+    BN_ULONG Z[P256_LIMBS];
+} P256_POINT;
+
+typedef struct {
+    BN_ULONG X[P256_LIMBS];
+    BN_ULONG Y[P256_LIMBS];
+} P256_POINT_AFFINE;
+
+typedef P256_POINT_AFFINE PRECOMP256_ROW[64];
+
+/* structure for precomputed multiples of the generator */
+typedef struct ec_pre_comp_st {
+    const EC_GROUP *group;      /* Parent EC_GROUP object */
+    size_t w;                   /* Window size */
+    /*
+     * Constant time access to the X and Y coordinates of the pre-computed,
+     * generator multiplies, in the Montgomery domain. Pre-calculated
+     * multiplies are stored in affine form.
+     */
+    PRECOMP256_ROW *precomp;
+    void *precomp_storage;
+    int references;
+} EC_PRE_COMP;
+
+/* Functions implemented in assembly */
+/* Modular mul by 2: res = 2*a mod P */
+void ecp_nistz256_mul_by_2(BN_ULONG res[P256_LIMBS],
+                           const BN_ULONG a[P256_LIMBS]);
+/* Modular div by 2: res = a/2 mod P */
+void ecp_nistz256_div_by_2(BN_ULONG res[P256_LIMBS],
+                           const BN_ULONG a[P256_LIMBS]);
+/* Modular mul by 3: res = 3*a mod P */
+void ecp_nistz256_mul_by_3(BN_ULONG res[P256_LIMBS],
+                           const BN_ULONG a[P256_LIMBS]);
+/* Modular add: res = a+b mod P   */
+void ecp_nistz256_add(BN_ULONG res[P256_LIMBS],
+                      const BN_ULONG a[P256_LIMBS],
+                      const BN_ULONG b[P256_LIMBS]);
+/* Modular sub: res = a-b mod P   */
+void ecp_nistz256_sub(BN_ULONG res[P256_LIMBS],
+                      const BN_ULONG a[P256_LIMBS],
+                      const BN_ULONG b[P256_LIMBS]);
+/* Modular neg: res = -a mod P    */
+void ecp_nistz256_neg(BN_ULONG res[P256_LIMBS], const BN_ULONG a[P256_LIMBS]);
+/* Montgomery mul: res = a*b*2^-256 mod P */
+void ecp_nistz256_mul_mont(BN_ULONG res[P256_LIMBS],
+                           const BN_ULONG a[P256_LIMBS],
+                           const BN_ULONG b[P256_LIMBS]);
+/* Montgomery sqr: res = a*a*2^-256 mod P */
+void ecp_nistz256_sqr_mont(BN_ULONG res[P256_LIMBS],
+                           const BN_ULONG a[P256_LIMBS]);
+/* Convert a number from Montgomery domain, by multiplying with 1 */
+void ecp_nistz256_from_mont(BN_ULONG res[P256_LIMBS],
+                            const BN_ULONG in[P256_LIMBS]);
+/* Convert a number to Montgomery domain, by multiplying with 2^512 mod P*/
+void ecp_nistz256_to_mont(BN_ULONG res[P256_LIMBS],
+                          const BN_ULONG in[P256_LIMBS]);
+/* Functions that perform constant time access to the precomputed tables */
+void ecp_nistz256_select_w5(P256_POINT * val,
+                            const P256_POINT * in_t, int index);
+void ecp_nistz256_select_w7(P256_POINT_AFFINE * val,
+                            const P256_POINT_AFFINE * in_t, int index);
+
+/* One converted into the Montgomery domain */
+static const BN_ULONG ONE[P256_LIMBS] = {
+    TOBN(0x00000000, 0x00000001), TOBN(0xffffffff, 0x00000000),
+    TOBN(0xffffffff, 0xffffffff), TOBN(0x00000000, 0xfffffffe)
+};
+
+static void *ecp_nistz256_pre_comp_dup(void *);
+static void ecp_nistz256_pre_comp_free(void *);
+static void ecp_nistz256_pre_comp_clear_free(void *);
+static EC_PRE_COMP *ecp_nistz256_pre_comp_new(const EC_GROUP *group);
+
+/* Precomputed tables for the default generator */
+#include "ecp_nistz256_table.c"
+
+/* Recode window to a signed digit, see ecp_nistputil.c for details */
+static unsigned int _booth_recode_w5(unsigned int in)
+{
+    unsigned int s, d;
+
+    s = ~((in >> 5) - 1);
+    d = (1 << 6) - in - 1;
+    d = (d & s) | (in & ~s);
+    d = (d >> 1) + (d & 1);
+
+    return (d << 1) + (s & 1);
+}
+
+static unsigned int _booth_recode_w7(unsigned int in)
+{
+    unsigned int s, d;
+
+    s = ~((in >> 7) - 1);
+    d = (1 << 8) - in - 1;
+    d = (d & s) | (in & ~s);
+    d = (d >> 1) + (d & 1);
+
+    return (d << 1) + (s & 1);
+}
+
+static void copy_conditional(BN_ULONG dst[P256_LIMBS],
+                             const BN_ULONG src[P256_LIMBS], BN_ULONG move)
+{
+    BN_ULONG mask1 = -move;
+    BN_ULONG mask2 = ~mask1;
+
+    dst[0] = (src[0] & mask1) ^ (dst[0] & mask2);
+    dst[1] = (src[1] & mask1) ^ (dst[1] & mask2);
+    dst[2] = (src[2] & mask1) ^ (dst[2] & mask2);
+    dst[3] = (src[3] & mask1) ^ (dst[3] & mask2);
+    if (P256_LIMBS == 8) {
+        dst[4] = (src[4] & mask1) ^ (dst[4] & mask2);
+        dst[5] = (src[5] & mask1) ^ (dst[5] & mask2);
+        dst[6] = (src[6] & mask1) ^ (dst[6] & mask2);
+        dst[7] = (src[7] & mask1) ^ (dst[7] & mask2);
+    }
+}
+
+static BN_ULONG is_zero(BN_ULONG in)
+{
+    in |= (0 - in);
+    in = ~in;
+    in &= BN_MASK2;
+    in >>= BN_BITS2 - 1;
+    return in;
+}
+
+static BN_ULONG is_equal(const BN_ULONG a[P256_LIMBS],
+                         const BN_ULONG b[P256_LIMBS])
+{
+    BN_ULONG res;
+
+    res = a[0] ^ b[0];
+    res |= a[1] ^ b[1];
+    res |= a[2] ^ b[2];
+    res |= a[3] ^ b[3];
+    if (P256_LIMBS == 8) {
+        res |= a[4] ^ b[4];
+        res |= a[5] ^ b[5];
+        res |= a[6] ^ b[6];
+        res |= a[7] ^ b[7];
+    }
+
+    return is_zero(res);
+}
+
+static BN_ULONG is_one(const BN_ULONG a[P256_LIMBS])
+{
+    BN_ULONG res;
+
+    res = a[0] ^ ONE[0];
+    res |= a[1] ^ ONE[1];
+    res |= a[2] ^ ONE[2];
+    res |= a[3] ^ ONE[3];
+    if (P256_LIMBS == 8) {
+        res |= a[4] ^ ONE[4];
+        res |= a[5] ^ ONE[5];
+        res |= a[6] ^ ONE[6];
+    }
+
+    return is_zero(res);
+}
+
+static int ecp_nistz256_set_words(BIGNUM *a, BN_ULONG words[P256_LIMBS])
+ {
+     if (bn_wexpand(a, P256_LIMBS) == NULL) {
+         ECerr(EC_F_ECP_NISTZ256_SET_WORDS, ERR_R_MALLOC_FAILURE);
+         return 0;
+     }
+     memcpy(a->d, words, sizeof(BN_ULONG) * P256_LIMBS);
+     a->top = P256_LIMBS;
+     bn_correct_top(a);
+     return 1;
+}
+
+#ifndef ECP_NISTZ256_REFERENCE_IMPLEMENTATION
+void ecp_nistz256_point_double(P256_POINT *r, const P256_POINT *a);
+void ecp_nistz256_point_add(P256_POINT *r,
+                            const P256_POINT *a, const P256_POINT *b);
+void ecp_nistz256_point_add_affine(P256_POINT *r,
+                                   const P256_POINT *a,
+                                   const P256_POINT_AFFINE *b);
+#else
+/* Point double: r = 2*a */
+static void ecp_nistz256_point_double(P256_POINT *r, const P256_POINT *a)
+{
+    BN_ULONG S[P256_LIMBS];
+    BN_ULONG M[P256_LIMBS];
+    BN_ULONG Zsqr[P256_LIMBS];
+    BN_ULONG tmp0[P256_LIMBS];
+
+    const BN_ULONG *in_x = a->X;
+    const BN_ULONG *in_y = a->Y;
+    const BN_ULONG *in_z = a->Z;
+
+    BN_ULONG *res_x = r->X;
+    BN_ULONG *res_y = r->Y;
+    BN_ULONG *res_z = r->Z;
+
+    ecp_nistz256_mul_by_2(S, in_y);
+
+    ecp_nistz256_sqr_mont(Zsqr, in_z);
+
+    ecp_nistz256_sqr_mont(S, S);
+
+    ecp_nistz256_mul_mont(res_z, in_z, in_y);
+    ecp_nistz256_mul_by_2(res_z, res_z);
+
+    ecp_nistz256_add(M, in_x, Zsqr);
+    ecp_nistz256_sub(Zsqr, in_x, Zsqr);
+
+    ecp_nistz256_sqr_mont(res_y, S);
+    ecp_nistz256_div_by_2(res_y, res_y);
+
+    ecp_nistz256_mul_mont(M, M, Zsqr);
+    ecp_nistz256_mul_by_3(M, M);
+
+    ecp_nistz256_mul_mont(S, S, in_x);
+    ecp_nistz256_mul_by_2(tmp0, S);
+
+    ecp_nistz256_sqr_mont(res_x, M);
+
+    ecp_nistz256_sub(res_x, res_x, tmp0);
+    ecp_nistz256_sub(S, S, res_x);
+
+    ecp_nistz256_mul_mont(S, S, M);
+    ecp_nistz256_sub(res_y, S, res_y);
+}
+
+/* Point addition: r = a+b */
+static void ecp_nistz256_point_add(P256_POINT *r,
+                                   const P256_POINT *a, const P256_POINT *b)
+{
+    BN_ULONG U2[P256_LIMBS], S2[P256_LIMBS];
+    BN_ULONG U1[P256_LIMBS], S1[P256_LIMBS];
+    BN_ULONG Z1sqr[P256_LIMBS];
+    BN_ULONG Z2sqr[P256_LIMBS];
+    BN_ULONG H[P256_LIMBS], R[P256_LIMBS];
+    BN_ULONG Hsqr[P256_LIMBS];
+    BN_ULONG Rsqr[P256_LIMBS];
+    BN_ULONG Hcub[P256_LIMBS];
+
+    BN_ULONG res_x[P256_LIMBS];
+    BN_ULONG res_y[P256_LIMBS];
+    BN_ULONG res_z[P256_LIMBS];
+
+    BN_ULONG in1infty, in2infty;
+
+    const BN_ULONG *in1_x = a->X;
+    const BN_ULONG *in1_y = a->Y;
+    const BN_ULONG *in1_z = a->Z;
+
+    const BN_ULONG *in2_x = b->X;
+    const BN_ULONG *in2_y = b->Y;
+    const BN_ULONG *in2_z = b->Z;
+
+    /* We encode infinity as (0,0), which is not on the curve,
+     * so it is OK. */
+    in1infty = (in1_x[0] | in1_x[1] | in1_x[2] | in1_x[3] |
+                in1_y[0] | in1_y[1] | in1_y[2] | in1_y[3]);
+    if (P256_LIMBS == 8)
+        in1infty |= (in1_x[4] | in1_x[5] | in1_x[6] | in1_x[7] |
+                     in1_y[4] | in1_y[5] | in1_y[6] | in1_y[7]);
+
+    in2infty = (in2_x[0] | in2_x[1] | in2_x[2] | in2_x[3] |
+                in2_y[0] | in2_y[1] | in2_y[2] | in2_y[3]);
+    if (P256_LIMBS == 8)
+        in2infty |= (in2_x[4] | in2_x[5] | in2_x[6] | in2_x[7] |
+                     in2_y[4] | in2_y[5] | in2_y[6] | in2_y[7]);
+
+    in1infty = is_zero(in1infty);
+    in2infty = is_zero(in2infty);
+
+    ecp_nistz256_sqr_mont(Z2sqr, in2_z);        /* Z2^2 */
+    ecp_nistz256_sqr_mont(Z1sqr, in1_z);        /* Z1^2 */
+
+    ecp_nistz256_mul_mont(S1, Z2sqr, in2_z);    /* S1 = Z2^3 */
+    ecp_nistz256_mul_mont(S2, Z1sqr, in1_z);    /* S2 = Z1^3 */
+
+    ecp_nistz256_mul_mont(S1, S1, in1_y);       /* S1 = Y1*Z2^3 */
+    ecp_nistz256_mul_mont(S2, S2, in2_y);       /* S2 = Y2*Z1^3 */
+    ecp_nistz256_sub(R, S2, S1);                /* R = S2 - S1 */
+
+    ecp_nistz256_mul_mont(U1, in1_x, Z2sqr);    /* U1 = X1*Z2^2 */
+    ecp_nistz256_mul_mont(U2, in2_x, Z1sqr);    /* U2 = X2*Z1^2 */
+    ecp_nistz256_sub(H, U2, U1);                /* H = U2 - U1 */
+
+    /*
+     * This should not happen during sign/ecdh, so no constant time violation
+     */
+    if (is_equal(U1, U2) && !in1infty && !in2infty) {
+        if (is_equal(S1, S2)) {
+            ecp_nistz256_point_double(r, a);
+            return;
+        } else {
+            memset(r, 0, sizeof(*r));
+            return;
+        }
+    }
+
+    ecp_nistz256_sqr_mont(Rsqr, R);             /* R^2 */
+    ecp_nistz256_mul_mont(res_z, H, in1_z);     /* Z3 = H*Z1*Z2 */
+    ecp_nistz256_sqr_mont(Hsqr, H);             /* H^2 */
+    ecp_nistz256_mul_mont(res_z, res_z, in2_z); /* Z3 = H*Z1*Z2 */
+    ecp_nistz256_mul_mont(Hcub, Hsqr, H);       /* H^3 */
+
+    ecp_nistz256_mul_mont(U2, U1, Hsqr);        /* U1*H^2 */
+    ecp_nistz256_mul_by_2(Hsqr, U2);            /* 2*U1*H^2 */
+
+    ecp_nistz256_sub(res_x, Rsqr, Hsqr);
+    ecp_nistz256_sub(res_x, res_x, Hcub);
+
+    ecp_nistz256_sub(res_y, U2, res_x);
+
+    ecp_nistz256_mul_mont(S2, S1, Hcub);
+    ecp_nistz256_mul_mont(res_y, R, res_y);
+    ecp_nistz256_sub(res_y, res_y, S2);
+
+    copy_conditional(res_x, in2_x, in1infty);
+    copy_conditional(res_y, in2_y, in1infty);
+    copy_conditional(res_z, in2_z, in1infty);
+
+    copy_conditional(res_x, in1_x, in2infty);
+    copy_conditional(res_y, in1_y, in2infty);
+    copy_conditional(res_z, in1_z, in2infty);
+
+    memcpy(r->X, res_x, sizeof(res_x));
+    memcpy(r->Y, res_y, sizeof(res_y));
+    memcpy(r->Z, res_z, sizeof(res_z));
+}
+
+/* Point addition when b is known to be affine: r = a+b */
+static void ecp_nistz256_point_add_affine(P256_POINT *r,
+                                          const P256_POINT *a,
+                                          const P256_POINT_AFFINE *b)
+{
+    BN_ULONG U2[P256_LIMBS], S2[P256_LIMBS];
+    BN_ULONG Z1sqr[P256_LIMBS];
+    BN_ULONG H[P256_LIMBS], R[P256_LIMBS];
+    BN_ULONG Hsqr[P256_LIMBS];
+    BN_ULONG Rsqr[P256_LIMBS];
+    BN_ULONG Hcub[P256_LIMBS];
+
+    BN_ULONG res_x[P256_LIMBS];
+    BN_ULONG res_y[P256_LIMBS];
+    BN_ULONG res_z[P256_LIMBS];
+
+    BN_ULONG in1infty, in2infty;
+
+    const BN_ULONG *in1_x = a->X;
+    const BN_ULONG *in1_y = a->Y;
+    const BN_ULONG *in1_z = a->Z;
+
+    const BN_ULONG *in2_x = b->X;
+    const BN_ULONG *in2_y = b->Y;
+
+    /*
+     * In affine representation we encode infty as (0,0), which is not on the
+     * curve, so it is OK
+     */
+    in1infty = (in1_x[0] | in1_x[1] | in1_x[2] | in1_x[3] |
+                in1_y[0] | in1_y[1] | in1_y[2] | in1_y[3]);
+    if (P256_LIMBS == 8)
+        in1infty |= (in1_x[4] | in1_x[5] | in1_x[6] | in1_x[7] |
+                     in1_y[4] | in1_y[5] | in1_y[6] | in1_y[7]);
+
+    in2infty = (in2_x[0] | in2_x[1] | in2_x[2] | in2_x[3] |
+                in2_y[0] | in2_y[1] | in2_y[2] | in2_y[3]);
+    if (P256_LIMBS == 8)
+        in2infty |= (in2_x[4] | in2_x[5] | in2_x[6] | in2_x[7] |
+                     in2_y[4] | in2_y[5] | in2_y[6] | in2_y[7]);
+
+    in1infty = is_zero(in1infty);
+    in2infty = is_zero(in2infty);
+
+    ecp_nistz256_sqr_mont(Z1sqr, in1_z);        /* Z1^2 */
+
+    ecp_nistz256_mul_mont(U2, in2_x, Z1sqr);    /* U2 = X2*Z1^2 */
+    ecp_nistz256_sub(H, U2, in1_x);             /* H = U2 - U1 */
+
+    ecp_nistz256_mul_mont(S2, Z1sqr, in1_z);    /* S2 = Z1^3 */
+
+    ecp_nistz256_mul_mont(res_z, H, in1_z);     /* Z3 = H*Z1*Z2 */
+
+    ecp_nistz256_mul_mont(S2, S2, in2_y);       /* S2 = Y2*Z1^3 */
+    ecp_nistz256_sub(R, S2, in1_y);             /* R = S2 - S1 */
+
+    ecp_nistz256_sqr_mont(Hsqr, H);             /* H^2 */
+    ecp_nistz256_sqr_mont(Rsqr, R);             /* R^2 */
+    ecp_nistz256_mul_mont(Hcub, Hsqr, H);       /* H^3 */
+
+    ecp_nistz256_mul_mont(U2, in1_x, Hsqr);     /* U1*H^2 */
+    ecp_nistz256_mul_by_2(Hsqr, U2);            /* 2*U1*H^2 */
+
+    ecp_nistz256_sub(res_x, Rsqr, Hsqr);
+    ecp_nistz256_sub(res_x, res_x, Hcub);
+    ecp_nistz256_sub(H, U2, res_x);
+
+    ecp_nistz256_mul_mont(S2, in1_y, Hcub);
+    ecp_nistz256_mul_mont(H, H, R);
+    ecp_nistz256_sub(res_y, H, S2);
+
+    copy_conditional(res_x, in2_x, in1infty);
+    copy_conditional(res_x, in1_x, in2infty);
+
+    copy_conditional(res_y, in2_y, in1infty);
+    copy_conditional(res_y, in1_y, in2infty);
+
+    copy_conditional(res_z, ONE, in1infty);
+    copy_conditional(res_z, in1_z, in2infty);
+
+    memcpy(r->X, res_x, sizeof(res_x));
+    memcpy(r->Y, res_y, sizeof(res_y));
+    memcpy(r->Z, res_z, sizeof(res_z));
+}
+#endif
+
+/* r = in^-1 mod p */
+static void ecp_nistz256_mod_inverse(BN_ULONG r[P256_LIMBS],
+                                     const BN_ULONG in[P256_LIMBS])
+{
+    /*
+     * The poly is ffffffff 00000001 00000000 00000000 00000000 ffffffff
+     * ffffffff ffffffff We use FLT and used poly-2 as exponent
+     */
+    BN_ULONG p2[P256_LIMBS];
+    BN_ULONG p4[P256_LIMBS];
+    BN_ULONG p8[P256_LIMBS];
+    BN_ULONG p16[P256_LIMBS];
+    BN_ULONG p32[P256_LIMBS];
+    BN_ULONG res[P256_LIMBS];
+    int i;
+
+    ecp_nistz256_sqr_mont(res, in);
+    ecp_nistz256_mul_mont(p2, res, in);         /* 3*p */
+
+    ecp_nistz256_sqr_mont(res, p2);
+    ecp_nistz256_sqr_mont(res, res);
+    ecp_nistz256_mul_mont(p4, res, p2);         /* f*p */
+
+    ecp_nistz256_sqr_mont(res, p4);
+    ecp_nistz256_sqr_mont(res, res);
+    ecp_nistz256_sqr_mont(res, res);
+    ecp_nistz256_sqr_mont(res, res);
+    ecp_nistz256_mul_mont(p8, res, p4);         /* ff*p */
+
+    ecp_nistz256_sqr_mont(res, p8);
+    for (i = 0; i < 7; i++)
+        ecp_nistz256_sqr_mont(res, res);
+    ecp_nistz256_mul_mont(p16, res, p8);        /* ffff*p */
+
+    ecp_nistz256_sqr_mont(res, p16);
+    for (i = 0; i < 15; i++)
+        ecp_nistz256_sqr_mont(res, res);
+    ecp_nistz256_mul_mont(p32, res, p16);       /* ffffffff*p */
+
+    ecp_nistz256_sqr_mont(res, p32);
+    for (i = 0; i < 31; i++)
+        ecp_nistz256_sqr_mont(res, res);
+    ecp_nistz256_mul_mont(res, res, in);
+
+    for (i = 0; i < 32 * 4; i++)
+        ecp_nistz256_sqr_mont(res, res);
+    ecp_nistz256_mul_mont(res, res, p32);
+
+    for (i = 0; i < 32; i++)
+        ecp_nistz256_sqr_mont(res, res);
+    ecp_nistz256_mul_mont(res, res, p32);
+
+    for (i = 0; i < 16; i++)
+        ecp_nistz256_sqr_mont(res, res);
+    ecp_nistz256_mul_mont(res, res, p16);
+
+    for (i = 0; i < 8; i++)
+        ecp_nistz256_sqr_mont(res, res);
+    ecp_nistz256_mul_mont(res, res, p8);
+
+    ecp_nistz256_sqr_mont(res, res);
+    ecp_nistz256_sqr_mont(res, res);
+    ecp_nistz256_sqr_mont(res, res);
+    ecp_nistz256_sqr_mont(res, res);
+    ecp_nistz256_mul_mont(res, res, p4);
+
+    ecp_nistz256_sqr_mont(res, res);
+    ecp_nistz256_sqr_mont(res, res);
+    ecp_nistz256_mul_mont(res, res, p2);
+
+    ecp_nistz256_sqr_mont(res, res);
+    ecp_nistz256_sqr_mont(res, res);
+    ecp_nistz256_mul_mont(res, res, in);
+
+    memcpy(r, res, sizeof(res));
+}
+
+/*
+ * ecp_nistz256_bignum_to_field_elem copies the contents of |in| to |out| and
+ * returns one if it fits. Otherwise it returns zero.
+ */
+static int ecp_nistz256_bignum_to_field_elem(BN_ULONG out[P256_LIMBS],
+                                             const BIGNUM *in)
+{
+    if (in->top > P256_LIMBS)
+        return 0;
+
+    memset(out, 0, sizeof(BN_ULONG) * P256_LIMBS);
+    memcpy(out, in->d, sizeof(BN_ULONG) * in->top);
+    return 1;
+}
+
+/* r = sum(scalar[i]*point[i]) */
+static int ecp_nistz256_windowed_mul(const EC_GROUP *group,
+                                      P256_POINT *r,
+                                      const BIGNUM **scalar,
+                                      const EC_POINT **point,
+                                      int num, BN_CTX *ctx)
+{
+
+    int i, j, ret = 0;
+    unsigned int index;
+    unsigned char (*p_str)[33] = NULL;
+    const unsigned int window_size = 5;
+    const unsigned int mask = (1 << (window_size + 1)) - 1;
+    unsigned int wvalue;
+    BN_ULONG tmp[P256_LIMBS];
+    ALIGN32 P256_POINT h;
+    const BIGNUM **scalars = NULL;
+    P256_POINT (*table)[16] = NULL;
+    void *table_storage = NULL;
+
+    if ((table_storage =
+         OPENSSL_malloc(num * 16 * sizeof(P256_POINT) + 64)) == NULL
+        || (p_str =
+            OPENSSL_malloc(num * 33 * sizeof(unsigned char))) == NULL
+        || (scalars = OPENSSL_malloc(num * sizeof(BIGNUM *))) == NULL) {
+        ECerr(EC_F_ECP_NISTZ256_WINDOWED_MUL, ERR_R_MALLOC_FAILURE);
+        goto err;
+    } else {
+        table = (void *)ALIGNPTR(table_storage, 64);
+    }
+
+    for (i = 0; i < num; i++) {
+        P256_POINT *row = table[i];
+
+        /* This is an unusual input, we don't guarantee constant-timeness. */
+        if ((BN_num_bits(scalar[i]) > 256) || BN_is_negative(scalar[i])) {
+            BIGNUM *mod;
+
+            if ((mod = BN_CTX_get(ctx)) == NULL)
+                goto err;
+            if (!BN_nnmod(mod, scalar[i], &group->order, ctx)) {
+                ECerr(EC_F_ECP_NISTZ256_WINDOWED_MUL, ERR_R_BN_LIB);
+                goto err;
+            }
+            scalars[i] = mod;
+        } else
+            scalars[i] = scalar[i];
+
+        for (j = 0; j < scalars[i]->top * BN_BYTES; j += BN_BYTES) {
+            BN_ULONG d = scalars[i]->d[j / BN_BYTES];
+
+            p_str[i][j + 0] = d & 0xff;
+            p_str[i][j + 1] = (d >> 8) & 0xff;
+            p_str[i][j + 2] = (d >> 16) & 0xff;
+            p_str[i][j + 3] = (d >>= 24) & 0xff;
+            if (BN_BYTES == 8) {
+                d >>= 8;
+                p_str[i][j + 4] = d & 0xff;
+                p_str[i][j + 5] = (d >> 8) & 0xff;
+                p_str[i][j + 6] = (d >> 16) & 0xff;
+                p_str[i][j + 7] = (d >> 24) & 0xff;
+            }
+        }
+        for (; j < 33; j++)
+            p_str[i][j] = 0;
+
+        /* table[0] is implicitly (0,0,0) (the point at infinity),
+         * therefore it is not stored. All other values are actually
+         * stored with an offset of -1 in table.
+         */
+
+        if (!ecp_nistz256_bignum_to_field_elem(row[1 - 1].X, &point[i]->X)
+            || !ecp_nistz256_bignum_to_field_elem(row[1 - 1].Y, &point[i]->Y)
+            || !ecp_nistz256_bignum_to_field_elem(row[1 - 1].Z, &point[i]->Z)) {
+            ECerr(EC_F_ECP_NISTZ256_WINDOWED_MUL, EC_R_COORDINATES_OUT_OF_RANGE);
+            goto err;
+        }
+
+        ecp_nistz256_point_double(&row[ 2 - 1], &row[ 1 - 1]);
+        ecp_nistz256_point_add   (&row[ 3 - 1], &row[ 2 - 1], &row[1 - 1]);
+        ecp_nistz256_point_double(&row[ 4 - 1], &row[ 2 - 1]);
+        ecp_nistz256_point_double(&row[ 6 - 1], &row[ 3 - 1]);
+        ecp_nistz256_point_double(&row[ 8 - 1], &row[ 4 - 1]);
+        ecp_nistz256_point_double(&row[12 - 1], &row[ 6 - 1]);
+        ecp_nistz256_point_add   (&row[ 5 - 1], &row[ 4 - 1], &row[1 - 1]);
+        ecp_nistz256_point_add   (&row[ 7 - 1], &row[ 6 - 1], &row[1 - 1]);
+        ecp_nistz256_point_add   (&row[ 9 - 1], &row[ 8 - 1], &row[1 - 1]);
+        ecp_nistz256_point_add   (&row[13 - 1], &row[12 - 1], &row[1 - 1]);
+        ecp_nistz256_point_double(&row[14 - 1], &row[ 7 - 1]);
+        ecp_nistz256_point_double(&row[10 - 1], &row[ 5 - 1]);
+        ecp_nistz256_point_add   (&row[15 - 1], &row[14 - 1], &row[1 - 1]);
+        ecp_nistz256_point_add   (&row[11 - 1], &row[10 - 1], &row[1 - 1]);
+        ecp_nistz256_point_add   (&row[16 - 1], &row[15 - 1], &row[1 - 1]);
+    }
+
+    index = 255;
+
+    wvalue = p_str[0][(index - 1) / 8];
+    wvalue = (wvalue >> ((index - 1) % 8)) & mask;
+
+    ecp_nistz256_select_w5(r, table[0], _booth_recode_w5(wvalue) >> 1);
+
+    while (index >= 5) {
+        for (i = (index == 255 ? 1 : 0); i < num; i++) {
+            unsigned int off = (index - 1) / 8;
+
+            wvalue = p_str[i][off] | p_str[i][off + 1] << 8;
+            wvalue = (wvalue >> ((index - 1) % 8)) & mask;
+
+            wvalue = _booth_recode_w5(wvalue);
+
+            ecp_nistz256_select_w5(&h, table[i], wvalue >> 1);
+
+            ecp_nistz256_neg(tmp, h.Y);
+            copy_conditional(h.Y, tmp, (wvalue & 1));
+
+            ecp_nistz256_point_add(r, r, &h);
+        }
+
+        index -= window_size;
+
+        ecp_nistz256_point_double(r, r);
+        ecp_nistz256_point_double(r, r);
+        ecp_nistz256_point_double(r, r);
+        ecp_nistz256_point_double(r, r);
+        ecp_nistz256_point_double(r, r);
+    }
+
+    /* Final window */
+    for (i = 0; i < num; i++) {
+        wvalue = p_str[i][0];
+        wvalue = (wvalue << 1) & mask;
+
+        wvalue = _booth_recode_w5(wvalue);
+
+        ecp_nistz256_select_w5(&h, table[i], wvalue >> 1);
+
+        ecp_nistz256_neg(tmp, h.Y);
+        copy_conditional(h.Y, tmp, wvalue & 1);
+
+        ecp_nistz256_point_add(r, r, &h);
+    }
+
+    ret = 1;
+ err:
+    if (table_storage)
+        OPENSSL_free(table_storage);
+    if (p_str)
+        OPENSSL_free(p_str);
+    if (scalars)
+        OPENSSL_free(scalars);
+    return ret;
+}
+
+/* Coordinates of G, for which we have precomputed tables */
+const static BN_ULONG def_xG[P256_LIMBS] = {
+    TOBN(0x79e730d4, 0x18a9143c), TOBN(0x75ba95fc, 0x5fedb601),
+    TOBN(0x79fb732b, 0x77622510), TOBN(0x18905f76, 0xa53755c6)
+};
+
+const static BN_ULONG def_yG[P256_LIMBS] = {
+    TOBN(0xddf25357, 0xce95560a), TOBN(0x8b4ab8e4, 0xba19e45c),
+    TOBN(0xd2e88688, 0xdd21f325), TOBN(0x8571ff18, 0x25885d85)
+};
+
+/*
+ * ecp_nistz256_is_affine_G returns one if |generator| is the standard, P-256
+ * generator.
+ */
+static int ecp_nistz256_is_affine_G(const EC_POINT *generator)
+{
+    return (generator->X.top == P256_LIMBS) &&
+        (generator->Y.top == P256_LIMBS) &&
+        (generator->Z.top == (P256_LIMBS - P256_LIMBS / 8)) &&
+        is_equal(generator->X.d, def_xG) &&
+        is_equal(generator->Y.d, def_yG) && is_one(generator->Z.d);
+}
+
+static int ecp_nistz256_mult_precompute(EC_GROUP *group, BN_CTX *ctx)
+{
+    /*
+     * We precompute a table for a Booth encoded exponent (wNAF) based
+     * computation. Each table holds 64 values for safe access, with an
+     * implicit value of infinity at index zero. We use window of size 7, and
+     * therefore require ceil(256/7) = 37 tables.
+     */
+    BIGNUM *order;
+    EC_POINT *P = NULL, *T = NULL;
+    const EC_POINT *generator;
+    EC_PRE_COMP *pre_comp;
+    BN_CTX *new_ctx = NULL;
+    int i, j, k, ret = 0;
+    size_t w;
+
+    PRECOMP256_ROW *preComputedTable = NULL;
+    unsigned char *precomp_storage = NULL;
+
+    /* if there is an old EC_PRE_COMP object, throw it away */
+    EC_EX_DATA_free_data(&group->extra_data, ecp_nistz256_pre_comp_dup,
+                         ecp_nistz256_pre_comp_free,
+                         ecp_nistz256_pre_comp_clear_free);
+
+    generator = EC_GROUP_get0_generator(group);
+    if (generator == NULL) {
+        ECerr(EC_F_ECP_NISTZ256_MULT_PRECOMPUTE, EC_R_UNDEFINED_GENERATOR);
+        return 0;
+    }
+
+    if (ecp_nistz256_is_affine_G(generator)) {
+        /*
+         * No need to calculate tables for the standard generator because we
+         * have them statically.
+         */
+        return 1;
+    }
+
+    if ((pre_comp = ecp_nistz256_pre_comp_new(group)) == NULL)
+        return 0;
+
+    if (ctx == NULL) {
+        ctx = new_ctx = BN_CTX_new();
+        if (ctx == NULL)
+            goto err;
+    }
+
+    BN_CTX_start(ctx);
+    order = BN_CTX_get(ctx);
+
+    if (order == NULL)
+        goto err;
+
+    if (!EC_GROUP_get_order(group, order, ctx))
+        goto err;
+
+    if (BN_is_zero(order)) {
+        ECerr(EC_F_ECP_NISTZ256_MULT_PRECOMPUTE, EC_R_UNKNOWN_ORDER);
+        goto err;
+    }
+
+    w = 7;
+
+    if ((precomp_storage =
+         OPENSSL_malloc(37 * 64 * sizeof(P256_POINT_AFFINE) + 64)) == NULL) {
+        ECerr(EC_F_ECP_NISTZ256_MULT_PRECOMPUTE, ERR_R_MALLOC_FAILURE);
+        goto err;
+    } else {
+        preComputedTable = (void *)ALIGNPTR(precomp_storage, 64);
+    }
+
+    P = EC_POINT_new(group);
+    T = EC_POINT_new(group);
+    if (P == NULL || T == NULL)
+        goto err;
+
+    /*
+     * The zero entry is implicitly infinity, and we skip it, storing other
+     * values with -1 offset.
+     */
+    if (!EC_POINT_copy(T, generator))
+        goto err;
+
+    for (k = 0; k < 64; k++) {
+        if (!EC_POINT_copy(P, T))
+            goto err;
+        for (j = 0; j < 37; j++) {
+            /*
+             * It would be faster to use EC_POINTs_make_affine and
+             * make multiple points affine at the same time.
+             */
+            if (!EC_POINT_make_affine(group, P, ctx))
+                goto err;
+            if (!ecp_nistz256_bignum_to_field_elem(preComputedTable[j][k].X,
+                                                   &P->X) ||
+                !ecp_nistz256_bignum_to_field_elem(preComputedTable[j][k].Y,
+                                                   &P->Y)) {
+                ECerr(EC_F_ECP_NISTZ256_MULT_PRECOMPUTE,
+                      EC_R_COORDINATES_OUT_OF_RANGE);
+                goto err;
+            }
+            for (i = 0; i < 7; i++) {
+                if (!EC_POINT_dbl(group, P, P, ctx))
+                    goto err;
+            }
+        }
+        if (!EC_POINT_add(group, T, T, generator, ctx))
+            goto err;
+    }
+
+    pre_comp->group = group;
+    pre_comp->w = w;
+    pre_comp->precomp = preComputedTable;
+    pre_comp->precomp_storage = precomp_storage;
+
+    precomp_storage = NULL;
+
+    if (!EC_EX_DATA_set_data(&group->extra_data, pre_comp,
+                             ecp_nistz256_pre_comp_dup,
+                             ecp_nistz256_pre_comp_free,
+                             ecp_nistz256_pre_comp_clear_free)) {
+        goto err;
+    }
+
+    pre_comp = NULL;
+
+    ret = 1;
+
+ err:
+    if (ctx != NULL)
+        BN_CTX_end(ctx);
+    BN_CTX_free(new_ctx);
+
+    if (pre_comp)
+        ecp_nistz256_pre_comp_free(pre_comp);
+    if (precomp_storage)
+        OPENSSL_free(precomp_storage);
+    if (P)
+        EC_POINT_free(P);
+    if (T)
+        EC_POINT_free(T);
+    return ret;
+}
+
+/*
+ * Note that by default ECP_NISTZ256_AVX2 is undefined. While it's great
+ * code processing 4 points in parallel, corresponding serial operation
+ * is several times slower, because it uses 29x29=58-bit multiplication
+ * as opposite to 64x64=128-bit in integer-only scalar case. As result
+ * it doesn't provide *significant* performance improvement. Note that
+ * just defining ECP_NISTZ256_AVX2 is not sufficient to make it work,
+ * you'd need to compile even asm/ecp_nistz256-avx.pl module.
+ */
+#if defined(ECP_NISTZ256_AVX2)
+# if !(defined(__x86_64) || defined(__x86_64__)) || \
+       defined(_M_AMD64) || defined(_MX64)) || \
+     !(defined(__GNUC__) || defined(_MSC_VER)) /* this is for ALIGN32 */
+#  undef ECP_NISTZ256_AVX2
+# else
+/* Constant time access, loading four values, from four consecutive tables */
+void ecp_nistz256_avx2_select_w7(P256_POINT_AFFINE * val,
+                                 const P256_POINT_AFFINE * in_t, int index);
+void ecp_nistz256_avx2_multi_select_w7(void *result, const void *in, int index0,
+                                       int index1, int index2, int index3);
+void ecp_nistz256_avx2_transpose_convert(void *RESULTx4, const void *in);
+void ecp_nistz256_avx2_convert_transpose_back(void *result, const void *Ax4);
+void ecp_nistz256_avx2_point_add_affine_x4(void *RESULTx4, const void *Ax4,
+                                           const void *Bx4);
+void ecp_nistz256_avx2_point_add_affines_x4(void *RESULTx4, const void *Ax4,
+                                            const void *Bx4);
+void ecp_nistz256_avx2_to_mont(void *RESULTx4, const void *Ax4);
+void ecp_nistz256_avx2_from_mont(void *RESULTx4, const void *Ax4);
+void ecp_nistz256_avx2_set1(void *RESULTx4);
+int ecp_nistz_avx2_eligible(void);
+
+static void booth_recode_w7(unsigned char *sign,
+                            unsigned char *digit, unsigned char in)
+{
+    unsigned char s, d;
+
+    s = ~((in >> 7) - 1);
+    d = (1 << 8) - in - 1;
+    d = (d & s) | (in & ~s);
+    d = (d >> 1) + (d & 1);
+
+    *sign = s & 1;
+    *digit = d;
+}
+
+/*
+ * ecp_nistz256_avx2_mul_g performs multiplication by G, using only the
+ * precomputed table. It does 4 affine point additions in parallel,
+ * significantly speeding up point multiplication for a fixed value.
+ */
+static void ecp_nistz256_avx2_mul_g(P256_POINT *r,
+                                    unsigned char p_str[33],
+                                    const P256_POINT_AFFINE(*preComputedTable)[64])
+{
+    const unsigned int window_size = 7;
+    const unsigned int mask = (1 << (window_size + 1)) - 1;
+    unsigned int wvalue;
+    /* Using 4 windows at a time */
+    unsigned char sign0, digit0;
+    unsigned char sign1, digit1;
+    unsigned char sign2, digit2;
+    unsigned char sign3, digit3;
+    unsigned int index = 0;
+    BN_ULONG tmp[P256_LIMBS];
+    int i;
+
+    ALIGN32 BN_ULONG aX4[4 * 9 * 3] = { 0 };
+    ALIGN32 BN_ULONG bX4[4 * 9 * 2] = { 0 };
+    ALIGN32 P256_POINT_AFFINE point_arr[P256_LIMBS];
+    ALIGN32 P256_POINT res_point_arr[P256_LIMBS];
+
+    /* Initial four windows */
+    wvalue = *((u16 *) & p_str[0]);
+    wvalue = (wvalue << 1) & mask;
+    index += window_size;
+    booth_recode_w7(&sign0, &digit0, wvalue);
+    wvalue = *((u16 *) & p_str[(index - 1) / 8]);
+    wvalue = (wvalue >> ((index - 1) % 8)) & mask;
+    index += window_size;
+    booth_recode_w7(&sign1, &digit1, wvalue);
+    wvalue = *((u16 *) & p_str[(index - 1) / 8]);
+    wvalue = (wvalue >> ((index - 1) % 8)) & mask;
+    index += window_size;
+    booth_recode_w7(&sign2, &digit2, wvalue);
+    wvalue = *((u16 *) & p_str[(index - 1) / 8]);
+    wvalue = (wvalue >> ((index - 1) % 8)) & mask;
+    index += window_size;
+    booth_recode_w7(&sign3, &digit3, wvalue);
+
+    ecp_nistz256_avx2_multi_select_w7(point_arr, preComputedTable[0],
+                                      digit0, digit1, digit2, digit3);
+
+    ecp_nistz256_neg(tmp, point_arr[0].Y);
+    copy_conditional(point_arr[0].Y, tmp, sign0);
+    ecp_nistz256_neg(tmp, point_arr[1].Y);
+    copy_conditional(point_arr[1].Y, tmp, sign1);
+    ecp_nistz256_neg(tmp, point_arr[2].Y);
+    copy_conditional(point_arr[2].Y, tmp, sign2);
+    ecp_nistz256_neg(tmp, point_arr[3].Y);
+    copy_conditional(point_arr[3].Y, tmp, sign3);
+
+    ecp_nistz256_avx2_transpose_convert(aX4, point_arr);
+    ecp_nistz256_avx2_to_mont(aX4, aX4);
+    ecp_nistz256_avx2_to_mont(&aX4[4 * 9], &aX4[4 * 9]);
+    ecp_nistz256_avx2_set1(&aX4[4 * 9 * 2]);
+
+    wvalue = *((u16 *) & p_str[(index - 1) / 8]);
+    wvalue = (wvalue >> ((index - 1) % 8)) & mask;
+    index += window_size;
+    booth_recode_w7(&sign0, &digit0, wvalue);
+    wvalue = *((u16 *) & p_str[(index - 1) / 8]);
+    wvalue = (wvalue >> ((index - 1) % 8)) & mask;
+    index += window_size;
+    booth_recode_w7(&sign1, &digit1, wvalue);
+    wvalue = *((u16 *) & p_str[(index - 1) / 8]);
+    wvalue = (wvalue >> ((index - 1) % 8)) & mask;
+    index += window_size;
+    booth_recode_w7(&sign2, &digit2, wvalue);
+    wvalue = *((u16 *) & p_str[(index - 1) / 8]);
+    wvalue = (wvalue >> ((index - 1) % 8)) & mask;
+    index += window_size;
+    booth_recode_w7(&sign3, &digit3, wvalue);
+
+    ecp_nistz256_avx2_multi_select_w7(point_arr, preComputedTable[4 * 1],
+                                      digit0, digit1, digit2, digit3);
+
+    ecp_nistz256_neg(tmp, point_arr[0].Y);
+    copy_conditional(point_arr[0].Y, tmp, sign0);
+    ecp_nistz256_neg(tmp, point_arr[1].Y);
+    copy_conditional(point_arr[1].Y, tmp, sign1);
+    ecp_nistz256_neg(tmp, point_arr[2].Y);
+    copy_conditional(point_arr[2].Y, tmp, sign2);
+    ecp_nistz256_neg(tmp, point_arr[3].Y);
+    copy_conditional(point_arr[3].Y, tmp, sign3);
+
+    ecp_nistz256_avx2_transpose_convert(bX4, point_arr);
+    ecp_nistz256_avx2_to_mont(bX4, bX4);
+    ecp_nistz256_avx2_to_mont(&bX4[4 * 9], &bX4[4 * 9]);
+    /* Optimized when both inputs are affine */
+    ecp_nistz256_avx2_point_add_affines_x4(aX4, aX4, bX4);
+
+    for (i = 2; i < 9; i++) {
+        wvalue = *((u16 *) & p_str[(index - 1) / 8]);
+        wvalue = (wvalue >> ((index - 1) % 8)) & mask;
+        index += window_size;
+        booth_recode_w7(&sign0, &digit0, wvalue);
+        wvalue = *((u16 *) & p_str[(index - 1) / 8]);
+        wvalue = (wvalue >> ((index - 1) % 8)) & mask;
+        index += window_size;
+        booth_recode_w7(&sign1, &digit1, wvalue);
+        wvalue = *((u16 *) & p_str[(index - 1) / 8]);
+        wvalue = (wvalue >> ((index - 1) % 8)) & mask;
+        index += window_size;
+        booth_recode_w7(&sign2, &digit2, wvalue);
+        wvalue = *((u16 *) & p_str[(index - 1) / 8]);
+        wvalue = (wvalue >> ((index - 1) % 8)) & mask;
+        index += window_size;
+        booth_recode_w7(&sign3, &digit3, wvalue);
+
+        ecp_nistz256_avx2_multi_select_w7(point_arr,
+                                          preComputedTable[4 * i],
+                                          digit0, digit1, digit2, digit3);
+
+        ecp_nistz256_neg(tmp, point_arr[0].Y);
+        copy_conditional(point_arr[0].Y, tmp, sign0);
+        ecp_nistz256_neg(tmp, point_arr[1].Y);
+        copy_conditional(point_arr[1].Y, tmp, sign1);
+        ecp_nistz256_neg(tmp, point_arr[2].Y);
+        copy_conditional(point_arr[2].Y, tmp, sign2);
+        ecp_nistz256_neg(tmp, point_arr[3].Y);
+        copy_conditional(point_arr[3].Y, tmp, sign3);
+
+        ecp_nistz256_avx2_transpose_convert(bX4, point_arr);
+        ecp_nistz256_avx2_to_mont(bX4, bX4);
+        ecp_nistz256_avx2_to_mont(&bX4[4 * 9], &bX4[4 * 9]);
+
+        ecp_nistz256_avx2_point_add_affine_x4(aX4, aX4, bX4);
+    }
+
+    ecp_nistz256_avx2_from_mont(&aX4[4 * 9 * 0], &aX4[4 * 9 * 0]);
+    ecp_nistz256_avx2_from_mont(&aX4[4 * 9 * 1], &aX4[4 * 9 * 1]);
+    ecp_nistz256_avx2_from_mont(&aX4[4 * 9 * 2], &aX4[4 * 9 * 2]);
+
+    ecp_nistz256_avx2_convert_transpose_back(res_point_arr, aX4);
+    /* Last window is performed serially */
+    wvalue = *((u16 *) & p_str[(index - 1) / 8]);
+    wvalue = (wvalue >> ((index - 1) % 8)) & mask;
+    booth_recode_w7(&sign0, &digit0, wvalue);
+    ecp_nistz256_avx2_select_w7((P256_POINT_AFFINE *) r,
+                                preComputedTable[36], digit0);
+    ecp_nistz256_neg(tmp, r->Y);
+    copy_conditional(r->Y, tmp, sign0);
+    memcpy(r->Z, ONE, sizeof(ONE));
+    /* Sum the four windows */
+    ecp_nistz256_point_add(r, r, &res_point_arr[0]);
+    ecp_nistz256_point_add(r, r, &res_point_arr[1]);
+    ecp_nistz256_point_add(r, r, &res_point_arr[2]);
+    ecp_nistz256_point_add(r, r, &res_point_arr[3]);
+}
+# endif
+#endif
+
+static int ecp_nistz256_set_from_affine(EC_POINT *out, const EC_GROUP *group,
+                                        const P256_POINT_AFFINE *in,
+                                        BN_CTX *ctx)
+{
+    BIGNUM x, y;
+    BN_ULONG d_x[P256_LIMBS], d_y[P256_LIMBS];
+    int ret = 0;
+
+    memcpy(d_x, in->X, sizeof(d_x));
+    x.d = d_x;
+    x.dmax = x.top = P256_LIMBS;
+    x.neg = 0;
+    x.flags = BN_FLG_STATIC_DATA;
+
+    memcpy(d_y, in->Y, sizeof(d_y));
+    y.d = d_y;
+    y.dmax = y.top = P256_LIMBS;
+    y.neg = 0;
+    y.flags = BN_FLG_STATIC_DATA;
+
+    ret = EC_POINT_set_affine_coordinates_GFp(group, out, &x, &y, ctx);
+
+    return ret;
+}
+
+/* r = scalar*G + sum(scalars[i]*points[i]) */
+static int ecp_nistz256_points_mul(const EC_GROUP *group,
+                                   EC_POINT *r,
+                                   const BIGNUM *scalar,
+                                   size_t num,
+                                   const EC_POINT *points[],
+                                   const BIGNUM *scalars[], BN_CTX *ctx)
+{
+    int i = 0, ret = 0, no_precomp_for_generator = 0, p_is_infinity = 0;
+    size_t j;
+    unsigned char p_str[33] = { 0 };
+    const PRECOMP256_ROW *preComputedTable = NULL;
+    const EC_PRE_COMP *pre_comp = NULL;
+    const EC_POINT *generator = NULL;
+    unsigned int index = 0;
+    BN_CTX *new_ctx = NULL;
+    const BIGNUM **new_scalars = NULL;
+    const EC_POINT **new_points = NULL;
+    const unsigned int window_size = 7;
+    const unsigned int mask = (1 << (window_size + 1)) - 1;
+    unsigned int wvalue;
+    ALIGN32 union {
+        P256_POINT p;
+        P256_POINT_AFFINE a;
+    } t, p;
+    BIGNUM *tmp_scalar;
+
+    if (group->meth != r->meth) {
+        ECerr(EC_F_ECP_NISTZ256_POINTS_MUL, EC_R_INCOMPATIBLE_OBJECTS);
+        return 0;
+    }
+
+    if ((scalar == NULL) && (num == 0))
+        return EC_POINT_set_to_infinity(group, r);
+
+    for (j = 0; j < num; j++) {
+        if (group->meth != points[j]->meth) {
+            ECerr(EC_F_ECP_NISTZ256_POINTS_MUL, EC_R_INCOMPATIBLE_OBJECTS);
+            return 0;
+        }
+    }
+
+    if (ctx == NULL) {
+        ctx = new_ctx = BN_CTX_new();
+        if (ctx == NULL)
+            goto err;
+    }
+
+    BN_CTX_start(ctx);
+
+    if (scalar) {
+        generator = EC_GROUP_get0_generator(group);
+        if (generator == NULL) {
+            ECerr(EC_F_ECP_NISTZ256_POINTS_MUL, EC_R_UNDEFINED_GENERATOR);
+            goto err;
+        }
+
+        /* look if we can use precomputed multiples of generator */
+        pre_comp =
+            EC_EX_DATA_get_data(group->extra_data, ecp_nistz256_pre_comp_dup,
+                                ecp_nistz256_pre_comp_free,
+                                ecp_nistz256_pre_comp_clear_free);
+
+        if (pre_comp) {
+            /*
+             * If there is a precomputed table for the generator, check that
+             * it was generated with the same generator.
+             */
+            EC_POINT *pre_comp_generator = EC_POINT_new(group);
+            if (pre_comp_generator == NULL)
+                goto err;
+
+            if (!ecp_nistz256_set_from_affine
+                (pre_comp_generator, group, pre_comp->precomp[0], ctx)) {
+                EC_POINT_free(pre_comp_generator);
+                goto err;
+            }
+
+            if (0 == EC_POINT_cmp(group, generator, pre_comp_generator, ctx))
+                preComputedTable = (const PRECOMP256_ROW *)pre_comp->precomp;
+
+            EC_POINT_free(pre_comp_generator);
+        }
+
+        if (preComputedTable == NULL && ecp_nistz256_is_affine_G(generator)) {
+            /*
+             * If there is no precomputed data, but the generator
+             * is the default, a hardcoded table of precomputed
+             * data is used. This is because applications, such as
+             * Apache, do not use EC_KEY_precompute_mult.
+             */
+            preComputedTable = (const PRECOMP256_ROW *)ecp_nistz256_precomputed;
+        }
+
+        if (preComputedTable) {
+            if ((BN_num_bits(scalar) > 256)
+                || BN_is_negative(scalar)) {
+                if ((tmp_scalar = BN_CTX_get(ctx)) == NULL)
+                    goto err;
+
+                if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) {
+                    ECerr(EC_F_ECP_NISTZ256_POINTS_MUL, ERR_R_BN_LIB);
+                    goto err;
+                }
+                scalar = tmp_scalar;
+            }
+
+            for (i = 0; i < scalar->top * BN_BYTES; i += BN_BYTES) {
+                BN_ULONG d = scalar->d[i / BN_BYTES];
+
+                p_str[i + 0] = d & 0xff;
+                p_str[i + 1] = (d >> 8) & 0xff;
+                p_str[i + 2] = (d >> 16) & 0xff;
+                p_str[i + 3] = (d >>= 24) & 0xff;
+                if (BN_BYTES == 8) {
+                    d >>= 8;
+                    p_str[i + 4] = d & 0xff;
+                    p_str[i + 5] = (d >> 8) & 0xff;
+                    p_str[i + 6] = (d >> 16) & 0xff;
+                    p_str[i + 7] = (d >> 24) & 0xff;
+                }
+            }
+
+            for (; i < 33; i++)
+                p_str[i] = 0;
+
+#if defined(ECP_NISTZ256_AVX2)
+            if (ecp_nistz_avx2_eligible()) {
+                ecp_nistz256_avx2_mul_g(&p.p, p_str, preComputedTable);
+            } else
+#endif
+            {
+                /* First window */
+                wvalue = (p_str[0] << 1) & mask;
+                index += window_size;
+
+                wvalue = _booth_recode_w7(wvalue);
+
+                ecp_nistz256_select_w7(&p.a, preComputedTable[0], wvalue >> 1);
+
+                ecp_nistz256_neg(p.p.Z, p.p.Y);
+                copy_conditional(p.p.Y, p.p.Z, wvalue & 1);
+
+                memcpy(p.p.Z, ONE, sizeof(ONE));
+
+                for (i = 1; i < 37; i++) {
+                    unsigned int off = (index - 1) / 8;
+                    wvalue = p_str[off] | p_str[off + 1] << 8;
+                    wvalue = (wvalue >> ((index - 1) % 8)) & mask;
+                    index += window_size;
+
+                    wvalue = _booth_recode_w7(wvalue);
+
+                    ecp_nistz256_select_w7(&t.a,
+                                           preComputedTable[i], wvalue >> 1);
+
+                    ecp_nistz256_neg(t.p.Z, t.a.Y);
+                    copy_conditional(t.a.Y, t.p.Z, wvalue & 1);
+
+                    ecp_nistz256_point_add_affine(&p.p, &p.p, &t.a);
+                }
+            }
+        } else {
+            p_is_infinity = 1;
+            no_precomp_for_generator = 1;
+        }
+    } else
+        p_is_infinity = 1;
+
+    if (no_precomp_for_generator) {
+        /*
+         * Without a precomputed table for the generator, it has to be
+         * handled like a normal point.
+         */
+        new_scalars = OPENSSL_malloc((num + 1) * sizeof(BIGNUM *));
+        if (!new_scalars) {
+            ECerr(EC_F_ECP_NISTZ256_POINTS_MUL, ERR_R_MALLOC_FAILURE);
+            goto err;
+        }
+
+        new_points = OPENSSL_malloc((num + 1) * sizeof(EC_POINT *));
+        if (!new_points) {
+            ECerr(EC_F_ECP_NISTZ256_POINTS_MUL, ERR_R_MALLOC_FAILURE);
+            goto err;
+        }
+
+        memcpy(new_scalars, scalars, num * sizeof(BIGNUM *));
+        new_scalars[num] = scalar;
+        memcpy(new_points, points, num * sizeof(EC_POINT *));
+        new_points[num] = generator;
+
+        scalars = new_scalars;
+        points = new_points;
+        num++;
+    }
+
+    if (num) {
+        P256_POINT *out = &t.p;
+        if (p_is_infinity)
+            out = &p.p;
+
+        if (!ecp_nistz256_windowed_mul(group, out, scalars, points, num, ctx))
+            goto err;
+
+        if (!p_is_infinity)
+            ecp_nistz256_point_add(&p.p, &p.p, out);
+    }
+
+    /* Not constant-time, but we're only operating on the public output. */
+    if (!ecp_nistz256_set_words(&r->X, p.p.X) ||
+        !ecp_nistz256_set_words(&r->Y, p.p.Y) ||
+        !ecp_nistz256_set_words(&r->Z, p.p.Z)) {
+        goto err;
+    }
+    r->Z_is_one = is_one(p.p.Z) & 1;
+
+    ret = 1;
+
+err:
+    if (ctx)
+        BN_CTX_end(ctx);
+    BN_CTX_free(new_ctx);
+    if (new_points)
+        OPENSSL_free(new_points);
+    if (new_scalars)
+        OPENSSL_free(new_scalars);
+    return ret;
+}
+
+static int ecp_nistz256_get_affine(const EC_GROUP *group,
+                                   const EC_POINT *point,
+                                   BIGNUM *x, BIGNUM *y, BN_CTX *ctx)
+{
+    BN_ULONG z_inv2[P256_LIMBS];
+    BN_ULONG z_inv3[P256_LIMBS];
+    BN_ULONG x_aff[P256_LIMBS];
+    BN_ULONG y_aff[P256_LIMBS];
+    BN_ULONG point_x[P256_LIMBS], point_y[P256_LIMBS], point_z[P256_LIMBS];
+    BN_ULONG x_ret[P256_LIMBS], y_ret[P256_LIMBS];
+
+    if (EC_POINT_is_at_infinity(group, point)) {
+        ECerr(EC_F_ECP_NISTZ256_GET_AFFINE, EC_R_POINT_AT_INFINITY);
+        return 0;
+    }
+
+    if (!ecp_nistz256_bignum_to_field_elem(point_x, &point->X) ||
+        !ecp_nistz256_bignum_to_field_elem(point_y, &point->Y) ||
+        !ecp_nistz256_bignum_to_field_elem(point_z, &point->Z)) {
+        ECerr(EC_F_ECP_NISTZ256_GET_AFFINE, EC_R_COORDINATES_OUT_OF_RANGE);
+        return 0;
+    }
+
+    ecp_nistz256_mod_inverse(z_inv3, point_z);
+    ecp_nistz256_sqr_mont(z_inv2, z_inv3);
+    ecp_nistz256_mul_mont(x_aff, z_inv2, point_x);
+
+    if (x != NULL) {
+        ecp_nistz256_from_mont(x_ret, x_aff);
+        if (!ecp_nistz256_set_words(x, x_ret))
+            return 0;
+    }
+
+    if (y != NULL) {
+        ecp_nistz256_mul_mont(z_inv3, z_inv3, z_inv2);
+        ecp_nistz256_mul_mont(y_aff, z_inv3, point_y);
+        ecp_nistz256_from_mont(y_ret, y_aff);
+        if (!ecp_nistz256_set_words(y, y_ret))
+            return 0;
+    }
+
+    return 1;
+}
+
+static EC_PRE_COMP *ecp_nistz256_pre_comp_new(const EC_GROUP *group)
+{
+    EC_PRE_COMP *ret = NULL;
+
+    if (!group)
+        return NULL;
+
+    ret = (EC_PRE_COMP *)OPENSSL_malloc(sizeof(EC_PRE_COMP));
+
+    if (!ret) {
+        ECerr(EC_F_ECP_NISTZ256_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
+        return ret;
+    }
+
+    ret->group = group;
+    ret->w = 6;                 /* default */
+    ret->precomp = NULL;
+    ret->precomp_storage = NULL;
+    ret->references = 1;
+    return ret;
+}
+
+static void *ecp_nistz256_pre_comp_dup(void *src_)
+{
+    EC_PRE_COMP *src = src_;
+
+    /* no need to actually copy, these objects never change! */
+    CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
+
+    return src_;
+}
+
+static void ecp_nistz256_pre_comp_free(void *pre_)
+{
+    int i;
+    EC_PRE_COMP *pre = pre_;
+
+    if (!pre)
+        return;
+
+    i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
+    if (i > 0)
+        return;
+
+    if (pre->precomp_storage)
+        OPENSSL_free(pre->precomp_storage);
+
+    OPENSSL_free(pre);
+}
+
+static void ecp_nistz256_pre_comp_clear_free(void *pre_)
+{
+    int i;
+    EC_PRE_COMP *pre = pre_;
+
+    if (!pre)
+        return;
+
+    i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
+    if (i > 0)
+        return;
+
+    if (pre->precomp_storage) {
+        OPENSSL_cleanse(pre->precomp,
+                        32 * sizeof(unsigned char) * (1 << pre->w) * 2 * 37);
+        OPENSSL_free(pre->precomp_storage);
+    }
+    OPENSSL_cleanse(pre, sizeof *pre);
+    OPENSSL_free(pre);
+}
+
+static int ecp_nistz256_window_have_precompute_mult(const EC_GROUP *group)
+{
+    /* There is a hard-coded table for the default generator. */
+    const EC_POINT *generator = EC_GROUP_get0_generator(group);
+    if (generator != NULL && ecp_nistz256_is_affine_G(generator)) {
+        /* There is a hard-coded table for the default generator. */
+        return 1;
+    }
+
+    return EC_EX_DATA_get_data(group->extra_data, ecp_nistz256_pre_comp_dup,
+                               ecp_nistz256_pre_comp_free,
+                               ecp_nistz256_pre_comp_clear_free) != NULL;
+}
+
+const EC_METHOD *EC_GFp_nistz256_method(void)
+{
+    static const EC_METHOD ret = {
+        EC_FLAGS_DEFAULT_OCT,
+        NID_X9_62_prime_field,
+        ec_GFp_mont_group_init,
+        ec_GFp_mont_group_finish,
+        ec_GFp_mont_group_clear_finish,
+        ec_GFp_mont_group_copy,
+        ec_GFp_mont_group_set_curve,
+        ec_GFp_simple_group_get_curve,
+        ec_GFp_simple_group_get_degree,
+        ec_GFp_simple_group_check_discriminant,
+        ec_GFp_simple_point_init,
+        ec_GFp_simple_point_finish,
+        ec_GFp_simple_point_clear_finish,
+        ec_GFp_simple_point_copy,
+        ec_GFp_simple_point_set_to_infinity,
+        ec_GFp_simple_set_Jprojective_coordinates_GFp,
+        ec_GFp_simple_get_Jprojective_coordinates_GFp,
+        ec_GFp_simple_point_set_affine_coordinates,
+        ecp_nistz256_get_affine,
+        0, 0, 0,
+        ec_GFp_simple_add,
+        ec_GFp_simple_dbl,
+        ec_GFp_simple_invert,
+        ec_GFp_simple_is_at_infinity,
+        ec_GFp_simple_is_on_curve,
+        ec_GFp_simple_cmp,
+        ec_GFp_simple_make_affine,
+        ec_GFp_simple_points_make_affine,
+        ecp_nistz256_points_mul,                    /* mul */
+        ecp_nistz256_mult_precompute,               /* precompute_mult */
+        ecp_nistz256_window_have_precompute_mult,   /* have_precompute_mult */
+        ec_GFp_mont_field_mul,
+        ec_GFp_mont_field_sqr,
+        0,                                          /* field_div */
+        ec_GFp_mont_field_encode,
+        ec_GFp_mont_field_decode,
+        ec_GFp_mont_field_set_to_one
+    };
+
+    return &ret;
+}
diff --git a/crypto/ec/ecp_nistz256_table.c b/crypto/ec/ecp_nistz256_table.c
new file mode 100644 (file)
index 0000000..216d024
--- /dev/null
@@ -0,0 +1,9533 @@
+/*
+ * This is the precomputed constant time access table for the code in
+ * ecp_montp256.c, for the default generator. The table consists of 37
+ * subtables, each subtable contains 64 affine points. The affine points are
+ * encoded as eight uint64's, four for the x coordinate and four for the y.
+ * Both values are in little-endian order. There are 37 tables because a
+ * signed, 6-bit wNAF form of the scalar is used and ceil(256/(6 + 1)) = 37.
+ * Within each table there are 64 values because the 6-bit wNAF value can
+ * take 64 values, ignoring the sign bit, which is implemented by performing
+ * a negation of the affine point when required. We would like to align it
+ * to 2MB in order to increase the chances of using a large page but that
+ * appears to lead to invalid ELF files being produced.
+ */
+
+#if defined(__GNUC__)
+__attribute((aligned(4096)))
+#elif defined(_MSC_VER)
+__declspec(align(4096))
+#elif defined(__SUNPRO_C)
+# pragma align 4096(ecp_nistz256_precomputed)
+#endif
+static const BN_ULONG ecp_nistz256_precomputed[37][64 *
+                                                   sizeof(P256_POINT_AFFINE) /
+                                                   sizeof(BN_ULONG)] = {
+    {TOBN(0x79e730d4, 0x18a9143c), TOBN(0x75ba95fc, 0x5fedb601),
+     TOBN(0x79fb732b, 0x77622510), TOBN(0x18905f76, 0xa53755c6),
+     TOBN(0xddf25357, 0xce95560a), TOBN(0x8b4ab8e4, 0xba19e45c),
+     TOBN(0xd2e88688, 0xdd21f325), TOBN(0x8571ff18, 0x25885d85),
+     TOBN(0x850046d4, 0x10ddd64d), TOBN(0xaa6ae3c1, 0xa433827d),
+     TOBN(0x73220503, 0x8d1490d9), TOBN(0xf6bb32e4, 0x3dcf3a3b),
+     TOBN(0x2f3648d3, 0x61bee1a5), TOBN(0x152cd7cb, 0xeb236ff8),
+     TOBN(0x19a8fb0e, 0x92042dbe), TOBN(0x78c57751, 0x0a5b8a3b),
+     TOBN(0xffac3f90, 0x4eebc127), TOBN(0xb027f84a, 0x087d81fb),
+     TOBN(0x66ad77dd, 0x87cbbc98), TOBN(0x26936a3f, 0xb6ff747e),
+     TOBN(0xb04c5c1f, 0xc983a7eb), TOBN(0x583e47ad, 0x0861fe1a),
+     TOBN(0x78820831, 0x1a2ee98e), TOBN(0xd5f06a29, 0xe587cc07),
+     TOBN(0x74b0b50d, 0x46918dcc), TOBN(0x4650a6ed, 0xc623c173),
+     TOBN(0x0cdaacac, 0xe8100af2), TOBN(0x577362f5, 0x41b0176b),
+     TOBN(0x2d96f24c, 0xe4cbaba6), TOBN(0x17628471, 0xfad6f447),
+     TOBN(0x6b6c36de, 0xe5ddd22e), TOBN(0x84b14c39, 0x4c5ab863),
+     TOBN(0xbe1b8aae, 0xc45c61f5), TOBN(0x90ec649a, 0x94b9537d),
+     TOBN(0x941cb5aa, 0xd076c20c), TOBN(0xc9079605, 0x890523c8),
+     TOBN(0xeb309b4a, 0xe7ba4f10), TOBN(0x73c568ef, 0xe5eb882b),
+     TOBN(0x3540a987, 0x7e7a1f68), TOBN(0x73a076bb, 0x2dd1e916),
+     TOBN(0x40394737, 0x3e77664a), TOBN(0x55ae744f, 0x346cee3e),
+     TOBN(0xd50a961a, 0x5b17a3ad), TOBN(0x13074b59, 0x54213673),
+     TOBN(0x93d36220, 0xd377e44b), TOBN(0x299c2b53, 0xadff14b5),
+     TOBN(0xf424d44c, 0xef639f11), TOBN(0xa4c9916d, 0x4a07f75f),
+     TOBN(0x0746354e, 0xa0173b4f), TOBN(0x2bd20213, 0xd23c00f7),
+     TOBN(0xf43eaab5, 0x0c23bb08), TOBN(0x13ba5119, 0xc3123e03),
+     TOBN(0x2847d030, 0x3f5b9d4d), TOBN(0x6742f2f2, 0x5da67bdd),
+     TOBN(0xef933bdc, 0x77c94195), TOBN(0xeaedd915, 0x6e240867),
+     TOBN(0x27f14cd1, 0x9499a78f), TOBN(0x462ab5c5, 0x6f9b3455),
+     TOBN(0x8f90f02a, 0xf02cfc6b), TOBN(0xb763891e, 0xb265230d),
+     TOBN(0xf59da3a9, 0x532d4977), TOBN(0x21e3327d, 0xcf9eba15),
+     TOBN(0x123c7b84, 0xbe60bbf0), TOBN(0x56ec12f2, 0x7706df76),
+     TOBN(0x75c96e8f, 0x264e20e8), TOBN(0xabe6bfed, 0x59a7a841),
+     TOBN(0x2cc09c04, 0x44c8eb00), TOBN(0xe05b3080, 0xf0c4e16b),
+     TOBN(0x1eb7777a, 0xa45f3314), TOBN(0x56af7bed, 0xce5d45e3),
+     TOBN(0x2b6e019a, 0x88b12f1a), TOBN(0x086659cd, 0xfd835f9b),
+     TOBN(0x2c18dbd1, 0x9dc21ec8), TOBN(0x98f9868a, 0x0fcf8139),
+     TOBN(0x737d2cd6, 0x48250b49), TOBN(0xcc61c947, 0x24b3428f),
+     TOBN(0x0c2b4078, 0x80dd9e76), TOBN(0xc43a8991, 0x383fbe08),
+     TOBN(0x5f7d2d65, 0x779be5d2), TOBN(0x78719a54, 0xeb3b4ab5),
+     TOBN(0xea7d260a, 0x6245e404), TOBN(0x9de40795, 0x6e7fdfe0),
+     TOBN(0x1ff3a415, 0x8dac1ab5), TOBN(0x3e7090f1, 0x649c9073),
+     TOBN(0x1a768561, 0x2b944e88), TOBN(0x250f939e, 0xe57f61c8),
+     TOBN(0x0c0daa89, 0x1ead643d), TOBN(0x68930023, 0xe125b88e),
+     TOBN(0x04b71aa7, 0xd2697768), TOBN(0xabdedef5, 0xca345a33),
+     TOBN(0x2409d29d, 0xee37385e), TOBN(0x4ee1df77, 0xcb83e156),
+     TOBN(0x0cac12d9, 0x1cbb5b43), TOBN(0x170ed2f6, 0xca895637),
+     TOBN(0x28228cfa, 0x8ade6d66), TOBN(0x7ff57c95, 0x53238aca),
+     TOBN(0xccc42563, 0x4b2ed709), TOBN(0x0e356769, 0x856fd30d),
+     TOBN(0xbcbcd43f, 0x559e9811), TOBN(0x738477ac, 0x5395b759),
+     TOBN(0x35752b90, 0xc00ee17f), TOBN(0x68748390, 0x742ed2e3),
+     TOBN(0x7cd06422, 0xbd1f5bc1), TOBN(0xfbc08769, 0xc9e7b797),
+     TOBN(0xa242a35b, 0xb0cf664a), TOBN(0x126e48f7, 0x7f9707e3),
+     TOBN(0x1717bf54, 0xc6832660), TOBN(0xfaae7332, 0xfd12c72e),
+     TOBN(0x27b52db7, 0x995d586b), TOBN(0xbe29569e, 0x832237c2),
+     TOBN(0xe8e4193e, 0x2a65e7db), TOBN(0x152706dc, 0x2eaa1bbb),
+     TOBN(0x72bcd8b7, 0xbc60055b), TOBN(0x03cc23ee, 0x56e27e4b),
+     TOBN(0xee337424, 0xe4819370), TOBN(0xe2aa0e43, 0x0ad3da09),
+     TOBN(0x40b8524f, 0x6383c45d), TOBN(0xd7663554, 0x42a41b25),
+     TOBN(0x64efa6de, 0x778a4797), TOBN(0x2042170a, 0x7079adf4),
+     TOBN(0x808b0b65, 0x0bc6fb80), TOBN(0x5882e075, 0x3ffe2e6b),
+     TOBN(0xd5ef2f7c, 0x2c83f549), TOBN(0x54d63c80, 0x9103b723),
+     TOBN(0xf2f11bd6, 0x52a23f9b), TOBN(0x3670c319, 0x4b0b6587),
+     TOBN(0x55c4623b, 0xb1580e9e), TOBN(0x64edf7b2, 0x01efe220),
+     TOBN(0x97091dcb, 0xd53c5c9d), TOBN(0xf17624b6, 0xac0a177b),
+     TOBN(0xb0f13975, 0x2cfe2dff), TOBN(0xc1a35c0a, 0x6c7a574e),
+     TOBN(0x227d3146, 0x93e79987), TOBN(0x0575bf30, 0xe89cb80e),
+     TOBN(0x2f4e247f, 0x0d1883bb), TOBN(0xebd51226, 0x3274c3d0),
+     TOBN(0x5f3e51c8, 0x56ada97a), TOBN(0x4afc964d, 0x8f8b403e),
+     TOBN(0xa6f247ab, 0x412e2979), TOBN(0x675abd1b, 0x6f80ebda),
+     TOBN(0x66a2bd72, 0x5e485a1d), TOBN(0x4b2a5caf, 0x8f4f0b3c),
+     TOBN(0x2626927f, 0x1b847bba), TOBN(0x6c6fc7d9, 0x0502394d),
+     TOBN(0xfea912ba, 0xa5659ae8), TOBN(0x68363aba, 0x25e1a16e),
+     TOBN(0xb8842277, 0x752c41ac), TOBN(0xfe545c28, 0x2897c3fc),
+     TOBN(0x2d36e9e7, 0xdc4c696b), TOBN(0x5806244a, 0xfba977c5),
+     TOBN(0x85665e9b, 0xe39508c1), TOBN(0xf720ee25, 0x6d12597b),
+     TOBN(0x8a979129, 0xd2337a31), TOBN(0x5916868f, 0x0f862bdc),
+     TOBN(0x048099d9, 0x5dd283ba), TOBN(0xe2d1eeb6, 0xfe5bfb4e),
+     TOBN(0x82ef1c41, 0x7884005d), TOBN(0xa2d4ec17, 0xffffcbae),
+     TOBN(0x9161c53f, 0x8aa95e66), TOBN(0x5ee104e1, 0xc5fee0d0),
+     TOBN(0x562e4cec, 0xc135b208), TOBN(0x74e1b265, 0x4783f47d),
+     TOBN(0x6d2a506c, 0x5a3f3b30), TOBN(0xecead9f4, 0xc16762fc),
+     TOBN(0xf29dd4b2, 0xe286e5b9), TOBN(0x1b0fadc0, 0x83bb3c61),
+     TOBN(0x7a75023e, 0x7fac29a4), TOBN(0xc086d5f1, 0xc9477fa3),
+     TOBN(0x0fc61135, 0x2f6f3076), TOBN(0xc99ffa23, 0xe3912a9a),
+     TOBN(0x6a0b0685, 0xd2f8ba3d), TOBN(0xfdc777e8, 0xe93358a4),
+     TOBN(0x94a787bb, 0x35415f04), TOBN(0x640c2d6a, 0x4d23fea4),
+     TOBN(0x9de917da, 0x153a35b5), TOBN(0x793e8d07, 0x5d5cd074),
+     TOBN(0xf4f87653, 0x2de45068), TOBN(0x37c7a7e8, 0x9e2e1f6e),
+     TOBN(0xd0825fa2, 0xa3584069), TOBN(0xaf2cea7c, 0x1727bf42),
+     TOBN(0x0360a4fb, 0x9e4785a9), TOBN(0xe5fda49c, 0x27299f4a),
+     TOBN(0x48068e13, 0x71ac2f71), TOBN(0x83d0687b, 0x9077666f),
+     TOBN(0x6d3883b2, 0x15d02819), TOBN(0x6d0d7550, 0x40dd9a35),
+     TOBN(0x61d7cbf9, 0x1d2b469f), TOBN(0xf97b232f, 0x2efc3115),
+     TOBN(0xa551d750, 0xb24bcbc7), TOBN(0x11ea4949, 0x88a1e356),
+     TOBN(0x7669f031, 0x93cb7501), TOBN(0x595dc55e, 0xca737b8a),
+     TOBN(0xa4a319ac, 0xd837879f), TOBN(0x6fc1b49e, 0xed6b67b0),
+     TOBN(0xe3959933, 0x32f1f3af), TOBN(0x966742eb, 0x65432a2e),
+     TOBN(0x4b8dc9fe, 0xb4966228), TOBN(0x96cc6312, 0x43f43950),
+     TOBN(0x12068859, 0xc9b731ee), TOBN(0x7b948dc3, 0x56f79968),
+     TOBN(0x61e4ad32, 0xed1f8008), TOBN(0xe6c9267a, 0xd8b17538),
+     TOBN(0x1ac7c5eb, 0x857ff6fb), TOBN(0x994baaa8, 0x55f2fb10),
+     TOBN(0x84cf14e1, 0x1d248018), TOBN(0x5a39898b, 0x628ac508),
+     TOBN(0x14fde97b, 0x5fa944f5), TOBN(0xed178030, 0xd12e5ac7),
+     TOBN(0x042c2af4, 0x97e2feb4), TOBN(0xd36a42d7, 0xaebf7313),
+     TOBN(0x49d2c9eb, 0x084ffdd7), TOBN(0x9f8aa54b, 0x2ef7c76a),
+     TOBN(0x9200b7ba, 0x09895e70), TOBN(0x3bd0c66f, 0xddb7fb58),
+     TOBN(0x2d97d108, 0x78eb4cbb), TOBN(0x2d431068, 0xd84bde31),
+     TOBN(0x4b523eb7, 0x172ccd1f), TOBN(0x7323cb28, 0x30a6a892),
+     TOBN(0x97082ec0, 0xcfe153eb), TOBN(0xe97f6b6a, 0xf2aadb97),
+     TOBN(0x1d3d393e, 0xd1a83da1), TOBN(0xa6a7f9c7, 0x804b2a68),
+     TOBN(0x4a688b48, 0x2d0cb71e), TOBN(0xa9b4cc5f, 0x40585278),
+     TOBN(0x5e5db46a, 0xcb66e132), TOBN(0xf1be963a, 0x0d925880),
+     TOBN(0x944a7027, 0x0317b9e2), TOBN(0xe266f959, 0x48603d48),
+     TOBN(0x98db6673, 0x5c208899), TOBN(0x90472447, 0xa2fb18a3),
+     TOBN(0x8a966939, 0x777c619f), TOBN(0x3798142a, 0x2a3be21b),
+     TOBN(0xb4241cb1, 0x3298b343), TOBN(0xa3a14e49, 0xb44f65a1),
+     TOBN(0xc5f4d6cd, 0x3ac77acd), TOBN(0xd0288cb5, 0x52b6fc3c),
+     TOBN(0xd5cc8c2f, 0x1c040abc), TOBN(0xb675511e, 0x06bf9b4a),
+     TOBN(0xd667da37, 0x9b3aa441), TOBN(0x460d45ce, 0x51601f72),
+     TOBN(0xe2f73c69, 0x6755ff89), TOBN(0xdd3cf7e7, 0x473017e6),
+     TOBN(0x8ef5689d, 0x3cf7600d), TOBN(0x948dc4f8, 0xb1fc87b4),
+     TOBN(0xd9e9fe81, 0x4ea53299), TOBN(0x2d921ca2, 0x98eb6028),
+     TOBN(0xfaecedfd, 0x0c9803fc), TOBN(0xf38ae891, 0x4d7b4745),
+     TOBN(0xd8c5fccf, 0xc5e3a3d8), TOBN(0xbefd904c, 0x4079dfbf),
+     TOBN(0xbc6d6a58, 0xfead0197), TOBN(0x39227077, 0x695532a4),
+     TOBN(0x09e23e6d, 0xdbef42f5), TOBN(0x7e449b64, 0x480a9908),
+     TOBN(0x7b969c1a, 0xad9a2e40), TOBN(0x6231d792, 0x9591c2a4),
+     TOBN(0x87151456, 0x0f664534), TOBN(0x85ceae7c, 0x4b68f103),
+     TOBN(0xac09c4ae, 0x65578ab9), TOBN(0x33ec6868, 0xf044b10c),
+     TOBN(0x6ac4832b, 0x3a8ec1f1), TOBN(0x5509d128, 0x5847d5ef),
+     TOBN(0xf909604f, 0x763f1574), TOBN(0xb16c4303, 0xc32f63c4),
+     TOBN(0xb6ab2014, 0x7ca23cd3), TOBN(0xcaa7a5c6, 0xa391849d),
+     TOBN(0x5b0673a3, 0x75678d94), TOBN(0xc982ddd4, 0xdd303e64),
+     TOBN(0xfd7b000b, 0x5db6f971), TOBN(0xbba2cb1f, 0x6f876f92),
+     TOBN(0xc77332a3, 0x3c569426), TOBN(0xa159100c, 0x570d74f8),
+     TOBN(0xfd16847f, 0xdec67ef5), TOBN(0x742ee464, 0x233e76b7),
+     TOBN(0x0b8e4134, 0xefc2b4c8), TOBN(0xca640b86, 0x42a3e521),
+     TOBN(0x653a0190, 0x8ceb6aa9), TOBN(0x313c300c, 0x547852d5),
+     TOBN(0x24e4ab12, 0x6b237af7), TOBN(0x2ba90162, 0x8bb47af8),
+     TOBN(0x3d5e58d6, 0xa8219bb7), TOBN(0xc691d0bd, 0x1b06c57f),
+     TOBN(0x0ae4cb10, 0xd257576e), TOBN(0x3569656c, 0xd54a3dc3),
+     TOBN(0xe5ebaebd, 0x94cda03a), TOBN(0x934e82d3, 0x162bfe13),
+     TOBN(0x450ac0ba, 0xe251a0c6), TOBN(0x480b9e11, 0xdd6da526),
+     TOBN(0x00467bc5, 0x8cce08b5), TOBN(0xb636458c, 0x7f178d55),
+     TOBN(0xc5748bae, 0xa677d806), TOBN(0x2763a387, 0xdfa394eb),
+     TOBN(0xa12b448a, 0x7d3cebb6), TOBN(0xe7adda3e, 0x6f20d850),
+     TOBN(0xf63ebce5, 0x1558462c), TOBN(0x58b36143, 0x620088a8),
+     TOBN(0x8a2cc3ca, 0x4d63c0ee), TOBN(0x51233117, 0x0fe948ce),
+     TOBN(0x7463fd85, 0x222ef33b), TOBN(0xadf0c7dc, 0x7c603d6c),
+     TOBN(0x0ec32d3b, 0xfe7765e5), TOBN(0xccaab359, 0xbf380409),
+     TOBN(0xbdaa84d6, 0x8e59319c), TOBN(0xd9a4c280, 0x9c80c34d),
+     TOBN(0xa9d89488, 0xa059c142), TOBN(0x6f5ae714, 0xff0b9346),
+     TOBN(0x068f237d, 0x16fb3664), TOBN(0x5853e4c4, 0x363186ac),
+     TOBN(0xe2d87d23, 0x63c52f98), TOBN(0x2ec4a766, 0x81828876),
+     TOBN(0x47b864fa, 0xe14e7b1c), TOBN(0x0c0bc0e5, 0x69192408),
+     TOBN(0xe4d7681d, 0xb82e9f3e), TOBN(0x83200f0b, 0xdf25e13c),
+     TOBN(0x8909984c, 0x66f27280), TOBN(0x462d7b00, 0x75f73227),
+     TOBN(0xd90ba188, 0xf2651798), TOBN(0x74c6e18c, 0x36ab1c34),
+     TOBN(0xab256ea3, 0x5ef54359), TOBN(0x03466612, 0xd1aa702f),
+     TOBN(0x624d6049, 0x2ed22e91), TOBN(0x6fdfe0b5, 0x6f072822),
+     TOBN(0xeeca1115, 0x39ce2271), TOBN(0x98100a4f, 0xdb01614f),
+     TOBN(0xb6b0daa2, 0xa35c628f), TOBN(0xb6f94d2e, 0xc87e9a47),
+     TOBN(0xc6773259, 0x1d57d9ce), TOBN(0xf70bfeec, 0x03884a7b),
+     TOBN(0x5fb35ccf, 0xed2bad01), TOBN(0xa155cbe3, 0x1da6a5c7),
+     TOBN(0xc2e2594c, 0x30a92f8f), TOBN(0x649c89ce, 0x5bfafe43),
+     TOBN(0xd158667d, 0xe9ff257a), TOBN(0x9b359611, 0xf32c50ae),
+     TOBN(0x4b00b20b, 0x906014cf), TOBN(0xf3a8cfe3, 0x89bc7d3d),
+     TOBN(0x4ff23ffd, 0x248a7d06), TOBN(0x80c5bfb4, 0x878873fa),
+     TOBN(0xb7d9ad90, 0x05745981), TOBN(0x179c85db, 0x3db01994),
+     TOBN(0xba41b062, 0x61a6966c), TOBN(0x4d82d052, 0xeadce5a8),
+     TOBN(0x9e91cd3b, 0xa5e6a318), TOBN(0x47795f4f, 0x95b2dda0),
+     TOBN(0xecfd7c1f, 0xd55a897c), TOBN(0x009194ab, 0xb29110fb),
+     TOBN(0x5f0e2046, 0xe381d3b0), TOBN(0x5f3425f6, 0xa98dd291),
+     TOBN(0xbfa06687, 0x730d50da), TOBN(0x0423446c, 0x4b083b7f),
+     TOBN(0x397a247d, 0xd69d3417), TOBN(0xeb629f90, 0x387ba42a),
+     TOBN(0x1ee426cc, 0xd5cd79bf), TOBN(0x0032940b, 0x946c6e18),
+     TOBN(0x1b1e8ae0, 0x57477f58), TOBN(0xe94f7d34, 0x6d823278),
+     TOBN(0xc747cb96, 0x782ba21a), TOBN(0xc5254469, 0xf72b33a5),
+     TOBN(0x772ef6de, 0xc7f80c81), TOBN(0xd73acbfe, 0x2cd9e6b5),
+     TOBN(0x4075b5b1, 0x49ee90d9), TOBN(0x785c339a, 0xa06e9eba),
+     TOBN(0xa1030d5b, 0xabf825e0), TOBN(0xcec684c3, 0xa42931dc),
+     TOBN(0x42ab62c9, 0xc1586e63), TOBN(0x45431d66, 0x5ab43f2b),
+     TOBN(0x57c8b2c0, 0x55f7835d), TOBN(0x033da338, 0xc1b7f865),
+     TOBN(0x283c7513, 0xcaa76097), TOBN(0x0a624fa9, 0x36c83906),
+     TOBN(0x6b20afec, 0x715af2c7), TOBN(0x4b969974, 0xeba78bfd),
+     TOBN(0x220755cc, 0xd921d60e), TOBN(0x9b944e10, 0x7baeca13),
+     TOBN(0x04819d51, 0x5ded93d4), TOBN(0x9bbff86e, 0x6dddfd27),
+     TOBN(0x6b344130, 0x77adc612), TOBN(0xa7496529, 0xbbd803a0),
+     TOBN(0x1a1baaa7, 0x6d8805bd), TOBN(0xc8403902, 0x470343ad),
+     TOBN(0x39f59f66, 0x175adff1), TOBN(0x0b26d7fb, 0xb7d8c5b7),
+     TOBN(0xa875f5ce, 0x529d75e3), TOBN(0x85efc7e9, 0x41325cc2),
+     TOBN(0x21950b42, 0x1ff6acd3), TOBN(0xffe70484, 0x53dc6909),
+     TOBN(0xff4cd0b2, 0x28766127), TOBN(0xabdbe608, 0x4fb7db2b),
+     TOBN(0x837c9228, 0x5e1109e8), TOBN(0x26147d27, 0xf4645b5a),
+     TOBN(0x4d78f592, 0xf7818ed8), TOBN(0xd394077e, 0xf247fa36),
+     TOBN(0x0fb9c2d0, 0x488c171a), TOBN(0xa78bfbaa, 0x13685278),
+     TOBN(0xedfbe268, 0xd5b1fa6a), TOBN(0x0dceb8db, 0x2b7eaba7),
+     TOBN(0xbf9e8089, 0x9ae2b710), TOBN(0xefde7ae6, 0xa4449c96),
+     TOBN(0x43b7716b, 0xcc143a46), TOBN(0xd7d34194, 0xc3628c13),
+     TOBN(0x508cec1c, 0x3b3f64c9), TOBN(0xe20bc0ba, 0x1e5edf3f),
+     TOBN(0xda1deb85, 0x2f4318d4), TOBN(0xd20ebe0d, 0x5c3fa443),
+     TOBN(0x370b4ea7, 0x73241ea3), TOBN(0x61f1511c, 0x5e1a5f65),
+     TOBN(0x99a5e23d, 0x82681c62), TOBN(0xd731e383, 0xa2f54c2d),
+     TOBN(0x2692f36e, 0x83445904), TOBN(0x2e0ec469, 0xaf45f9c0),
+     TOBN(0x905a3201, 0xc67528b7), TOBN(0x88f77f34, 0xd0e5e542),
+     TOBN(0xf67a8d29, 0x5864687c), TOBN(0x23b92eae, 0x22df3562),
+     TOBN(0x5c27014b, 0x9bbec39e), TOBN(0x7ef2f226, 0x9c0f0f8d),
+     TOBN(0x97359638, 0x546c4d8d), TOBN(0x5f9c3fc4, 0x92f24679),
+     TOBN(0x912e8bed, 0xa8c8acd9), TOBN(0xec3a318d, 0x306634b0),
+     TOBN(0x80167f41, 0xc31cb264), TOBN(0x3db82f6f, 0x522113f2),
+     TOBN(0xb155bcd2, 0xdcafe197), TOBN(0xfba1da59, 0x43465283),
+     TOBN(0xa0425b8e, 0xb212cf53), TOBN(0x4f2e512e, 0xf8557c5f),
+     TOBN(0xc1286ff9, 0x25c4d56c), TOBN(0xbb8a0fea, 0xee26c851),
+     TOBN(0xc28f70d2, 0xe7d6107e), TOBN(0x7ee0c444, 0xe76265aa),
+     TOBN(0x3df277a4, 0x1d1936b1), TOBN(0x1a556e3f, 0xea9595eb),
+     TOBN(0x258bbbf9, 0xe7305683), TOBN(0x31eea5bf, 0x07ef5be6),
+     TOBN(0x0deb0e4a, 0x46c814c1), TOBN(0x5cee8449, 0xa7b730dd),
+     TOBN(0xeab495c5, 0xa0182bde), TOBN(0xee759f87, 0x9e27a6b4),
+     TOBN(0xc2cf6a68, 0x80e518ca), TOBN(0x25e8013f, 0xf14cf3f4),
+     TOBN(0x8fc44140, 0x7e8d7a14), TOBN(0xbb1ff3ca, 0x9556f36a),
+     TOBN(0x6a844385, 0x14600044), TOBN(0xba3f0c4a, 0x7451ae63),
+     TOBN(0xdfcac25b, 0x1f9af32a), TOBN(0x01e0db86, 0xb1f2214b),
+     TOBN(0x4e9a5bc2, 0xa4b596ac), TOBN(0x83927681, 0x026c2c08),
+     TOBN(0x3ec832e7, 0x7acaca28), TOBN(0x1bfeea57, 0xc7385b29),
+     TOBN(0x068212e3, 0xfd1eaf38), TOBN(0xc1329830, 0x6acf8ccc),
+     TOBN(0xb909f2db, 0x2aac9e59), TOBN(0x5748060d, 0xb661782a),
+     TOBN(0xc5ab2632, 0xc79b7a01), TOBN(0xda44c6c6, 0x00017626),
+     TOBN(0xf26c00e8, 0xa7ea82f0), TOBN(0x99cac80d, 0xe4299aaf),
+     TOBN(0xd66fe3b6, 0x7ed78be1), TOBN(0x305f725f, 0x648d02cd),
+     TOBN(0x33ed1bc4, 0x623fb21b), TOBN(0xfa70533e, 0x7a6319ad),
+     TOBN(0x17ab562d, 0xbe5ffb3e), TOBN(0x06374994, 0x56674741),
+     TOBN(0x69d44ed6, 0x5c46aa8e), TOBN(0x2100d5d3, 0xa8d063d1),
+     TOBN(0xcb9727ea, 0xa2d17c36), TOBN(0x4c2bab1b, 0x8add53b7),
+     TOBN(0xa084e90c, 0x15426704), TOBN(0x778afcd3, 0xa837ebea),
+     TOBN(0x6651f701, 0x7ce477f8), TOBN(0xa0624998, 0x46fb7a8b),
+     TOBN(0xdc1e6828, 0xed8a6e19), TOBN(0x33fc2336, 0x4189d9c7),
+     TOBN(0x026f8fe2, 0x671c39bc), TOBN(0xd40c4ccd, 0xbc6f9915),
+     TOBN(0xafa135bb, 0xf80e75ca), TOBN(0x12c651a0, 0x22adff2c),
+     TOBN(0xc40a04bd, 0x4f51ad96), TOBN(0x04820109, 0xbbe4e832),
+     TOBN(0x3667eb1a, 0x7f4c04cc), TOBN(0x59556621, 0xa9404f84),
+     TOBN(0x71cdf653, 0x7eceb50a), TOBN(0x994a44a6, 0x9b8335fa),
+     TOBN(0xd7faf819, 0xdbeb9b69), TOBN(0x473c5680, 0xeed4350d),
+     TOBN(0xb6658466, 0xda44bba2), TOBN(0x0d1bc780, 0x872bdbf3),
+     TOBN(0xe535f175, 0xa1962f91), TOBN(0x6ed7e061, 0xed58f5a7),
+     TOBN(0x177aa4c0, 0x2089a233), TOBN(0x0dbcb03a, 0xe539b413),
+     TOBN(0xe3dc424e, 0xbb32e38e), TOBN(0x6472e5ef, 0x6806701e),
+     TOBN(0xdd47ff98, 0x814be9ee), TOBN(0x6b60cfff, 0x35ace009),
+     TOBN(0xb8d3d931, 0x9ff91fe5), TOBN(0x039c4800, 0xf0518eed),
+     TOBN(0x95c37632, 0x9182cb26), TOBN(0x0763a434, 0x82fc568d),
+     TOBN(0x707c04d5, 0x383e76ba), TOBN(0xac98b930, 0x824e8197),
+     TOBN(0x92bf7c8f, 0x91230de0), TOBN(0x90876a01, 0x40959b70),
+     TOBN(0xdb6d96f3, 0x05968b80), TOBN(0x380a0913, 0x089f73b9),
+     TOBN(0x7da70b83, 0xc2c61e01), TOBN(0x95fb8394, 0x569b38c7),
+     TOBN(0x9a3c6512, 0x80edfe2f), TOBN(0x8f726bb9, 0x8faeaf82),
+     TOBN(0x8010a4a0, 0x78424bf8), TOBN(0x29672044, 0x0e844970)}
+    ,
+    {TOBN(0x63c5cb81, 0x7a2ad62a), TOBN(0x7ef2b6b9, 0xac62ff54),
+     TOBN(0x3749bba4, 0xb3ad9db5), TOBN(0xad311f2c, 0x46d5a617),
+     TOBN(0xb77a8087, 0xc2ff3b6d), TOBN(0xb46feaf3, 0x367834ff),
+     TOBN(0xf8aa266d, 0x75d6b138), TOBN(0xfa38d320, 0xec008188),
+     TOBN(0x486d8ffa, 0x696946fc), TOBN(0x50fbc6d8, 0xb9cba56d),
+     TOBN(0x7e3d423e, 0x90f35a15), TOBN(0x7c3da195, 0xc0dd962c),
+     TOBN(0xe673fdb0, 0x3cfd5d8b), TOBN(0x0704b7c2, 0x889dfca5),
+     TOBN(0xf6ce581f, 0xf52305aa), TOBN(0x399d49eb, 0x914d5e53),
+     TOBN(0x380a496d, 0x6ec293cd), TOBN(0x733dbda7, 0x8e7051f5),
+     TOBN(0x037e388d, 0xb849140a), TOBN(0xee4b32b0, 0x5946dbf6),
+     TOBN(0xb1c4fda9, 0xcae368d1), TOBN(0x5001a7b0, 0xfdb0b2f3),
+     TOBN(0x6df59374, 0x2e3ac46e), TOBN(0x4af675f2, 0x39b3e656),
+     TOBN(0x44e38110, 0x39949296), TOBN(0x5b63827b, 0x361db1b5),
+     TOBN(0x3e5323ed, 0x206eaff5), TOBN(0x942370d2, 0xc21f4290),
+     TOBN(0xf2caaf2e, 0xe0d985a1), TOBN(0x192cc64b, 0x7239846d),
+     TOBN(0x7c0b8f47, 0xae6312f8), TOBN(0x7dc61f91, 0x96620108),
+     TOBN(0xb830fb5b, 0xc2da7de9), TOBN(0xd0e643df, 0x0ff8d3be),
+     TOBN(0x31ee77ba, 0x188a9641), TOBN(0x4e8aa3aa, 0xbcf6d502),
+     TOBN(0xf9fb6532, 0x9a49110f), TOBN(0xd18317f6, 0x2dd6b220),
+     TOBN(0x7e3ced41, 0x52c3ea5a), TOBN(0x0d296a14, 0x7d579c4a),
+     TOBN(0x35d6a53e, 0xed4c3717), TOBN(0x9f8240cf, 0x3d0ed2a3),
+     TOBN(0x8c0d4d05, 0xe5543aa5), TOBN(0x45d5bbfb, 0xdd33b4b4),
+     TOBN(0xfa04cc73, 0x137fd28e), TOBN(0x862ac6ef, 0xc73b3ffd),
+     TOBN(0x403ff9f5, 0x31f51ef2), TOBN(0x34d5e0fc, 0xbc73f5a2),
+     TOBN(0xf2526820, 0x08913f4f), TOBN(0xea20ed61, 0xeac93d95),
+     TOBN(0x51ed38b4, 0x6ca6b26c), TOBN(0x8662dcbc, 0xea4327b0),
+     TOBN(0x6daf295c, 0x725d2aaa), TOBN(0xbad2752f, 0x8e52dcda),
+     TOBN(0x2210e721, 0x0b17dacc), TOBN(0xa37f7912, 0xd51e8232),
+     TOBN(0x4f7081e1, 0x44cc3add), TOBN(0xd5ffa1d6, 0x87be82cf),
+     TOBN(0x89890b6c, 0x0edd6472), TOBN(0xada26e1a, 0x3ed17863),
+     TOBN(0x276f2715, 0x63483caa), TOBN(0xe6924cd9, 0x2f6077fd),
+     TOBN(0x05a7fe98, 0x0a466e3c), TOBN(0xf1c794b0, 0xb1902d1f),
+     TOBN(0xe5213688, 0x82a8042c), TOBN(0xd931cfaf, 0xcd278298),
+     TOBN(0x069a0ae0, 0xf597a740), TOBN(0x0adbb3f3, 0xeb59107c),
+     TOBN(0x983e951e, 0x5eaa8eb8), TOBN(0xe663a8b5, 0x11b48e78),
+     TOBN(0x1631cc0d, 0x8a03f2c5), TOBN(0x7577c11e, 0x11e271e2),
+     TOBN(0x33b2385c, 0x08369a90), TOBN(0x2990c59b, 0x190eb4f8),
+     TOBN(0x819a6145, 0xc68eac80), TOBN(0x7a786d62, 0x2ec4a014),
+     TOBN(0x33faadbe, 0x20ac3a8d), TOBN(0x31a21781, 0x5aba2d30),
+     TOBN(0x209d2742, 0xdba4f565), TOBN(0xdb2ce9e3, 0x55aa0fbb),
+     TOBN(0x8cef334b, 0x168984df), TOBN(0xe81dce17, 0x33879638),
+     TOBN(0xf6e6949c, 0x263720f0), TOBN(0x5c56feaf, 0xf593cbec),
+     TOBN(0x8bff5601, 0xfde58c84), TOBN(0x74e24117, 0x2eccb314),
+     TOBN(0xbcf01b61, 0x4c9a8a78), TOBN(0xa233e35e, 0x544c9868),
+     TOBN(0xb3156bf3, 0x8bd7aff1), TOBN(0x1b5ee4cb, 0x1d81b146),
+     TOBN(0x7ba1ac41, 0xd628a915), TOBN(0x8f3a8f9c, 0xfd89699e),
+     TOBN(0x7329b9c9, 0xa0748be7), TOBN(0x1d391c95, 0xa92e621f),
+     TOBN(0xe51e6b21, 0x4d10a837), TOBN(0xd255f53a, 0x4947b435),
+     TOBN(0x07669e04, 0xf1788ee3), TOBN(0xc14f27af, 0xa86938a2),
+     TOBN(0x8b47a334, 0xe93a01c0), TOBN(0xff627438, 0xd9366808),
+     TOBN(0x7a0985d8, 0xca2a5965), TOBN(0x3d9a5542, 0xd6e9b9b3),
+     TOBN(0xc23eb80b, 0x4cf972e8), TOBN(0x5c1c33bb, 0x4fdf72fd),
+     TOBN(0x0c4a58d4, 0x74a86108), TOBN(0xf8048a8f, 0xee4c5d90),
+     TOBN(0xe3c7c924, 0xe86d4c80), TOBN(0x28c889de, 0x056a1e60),
+     TOBN(0x57e2662e, 0xb214a040), TOBN(0xe8c48e98, 0x37e10347),
+     TOBN(0x87742862, 0x80ac748a), TOBN(0xf1c24022, 0x186b06f2),
+     TOBN(0xac2dd4c3, 0x5f74040a), TOBN(0x409aeb71, 0xfceac957),
+     TOBN(0x4fbad782, 0x55c4ec23), TOBN(0xb359ed61, 0x8a7b76ec),
+     TOBN(0x12744926, 0xed6f4a60), TOBN(0xe21e8d7f, 0x4b912de3),
+     TOBN(0xe2575a59, 0xfc705a59), TOBN(0x72f1d4de, 0xed2dbc0e),
+     TOBN(0x3d2b24b9, 0xeb7926b8), TOBN(0xbff88cb3, 0xcdbe5509),
+     TOBN(0xd0f399af, 0xe4dd640b), TOBN(0x3c5fe130, 0x2f76ed45),
+     TOBN(0x6f3562f4, 0x3764fb3d), TOBN(0x7b5af318, 0x3151b62d),
+     TOBN(0xd5bd0bc7, 0xd79ce5f3), TOBN(0xfdaf6b20, 0xec66890f),
+     TOBN(0x735c67ec, 0x6063540c), TOBN(0x50b259c2, 0xe5f9cb8f),
+     TOBN(0xb8734f9a, 0x3f99c6ab), TOBN(0xf8cc13d5, 0xa3a7bc85),
+     TOBN(0x80c1b305, 0xc5217659), TOBN(0xfe5364d4, 0x4ec12a54),
+     TOBN(0xbd87045e, 0x681345fe), TOBN(0x7f8efeb1, 0x582f897f),
+     TOBN(0xe8cbf1e5, 0xd5923359), TOBN(0xdb0cea9d, 0x539b9fb0),
+     TOBN(0x0c5b34cf, 0x49859b98), TOBN(0x5e583c56, 0xa4403cc6),
+     TOBN(0x11fc1a2d, 0xd48185b7), TOBN(0xc93fbc7e, 0x6e521787),
+     TOBN(0x47e7a058, 0x05105b8b), TOBN(0x7b4d4d58, 0xdb8260c8),
+     TOBN(0xe33930b0, 0x46eb842a), TOBN(0x8e844a9a, 0x7bdae56d),
+     TOBN(0x34ef3a9e, 0x13f7fdfc), TOBN(0xb3768f82, 0x636ca176),
+     TOBN(0x2821f4e0, 0x4e09e61c), TOBN(0x414dc3a1, 0xa0c7cddc),
+     TOBN(0xd5379437, 0x54945fcd), TOBN(0x151b6eef, 0xb3555ff1),
+     TOBN(0xb31bd613, 0x6339c083), TOBN(0x39ff8155, 0xdfb64701),
+     TOBN(0x7c3388d2, 0xe29604ab), TOBN(0x1e19084b, 0xa6b10442),
+     TOBN(0x17cf54c0, 0xeccd47ef), TOBN(0x89693385, 0x4a5dfb30),
+     TOBN(0x69d023fb, 0x47daf9f6), TOBN(0x9222840b, 0x7d91d959),
+     TOBN(0x439108f5, 0x803bac62), TOBN(0x0b7dd91d, 0x379bd45f),
+     TOBN(0xd651e827, 0xca63c581), TOBN(0x5c5d75f6, 0x509c104f),
+     TOBN(0x7d5fc738, 0x1f2dc308), TOBN(0x20faa7bf, 0xd98454be),
+     TOBN(0x95374bee, 0xa517b031), TOBN(0xf036b9b1, 0x642692ac),
+     TOBN(0xc5106109, 0x39842194), TOBN(0xb7e2353e, 0x49d05295),
+     TOBN(0xfc8c1d5c, 0xefb42ee0), TOBN(0xe04884eb, 0x08ce811c),
+     TOBN(0xf1f75d81, 0x7419f40e), TOBN(0x5b0ac162, 0xa995c241),
+     TOBN(0x120921bb, 0xc4c55646), TOBN(0x713520c2, 0x8d33cf97),
+     TOBN(0xb4a65a5c, 0xe98c5100), TOBN(0x6cec871d, 0x2ddd0f5a),
+     TOBN(0x251f0b7f, 0x9ba2e78b), TOBN(0x224a8434, 0xce3a2a5f),
+     TOBN(0x26827f61, 0x25f5c46f), TOBN(0x6a22bedc, 0x48545ec0),
+     TOBN(0x25ae5fa0, 0xb1bb5cdc), TOBN(0xd693682f, 0xfcb9b98f),
+     TOBN(0x32027fe8, 0x91e5d7d3), TOBN(0xf14b7d17, 0x73a07678),
+     TOBN(0xf88497b3, 0xc0dfdd61), TOBN(0xf7c2eec0, 0x2a8c4f48),
+     TOBN(0xaa5573f4, 0x3756e621), TOBN(0xc013a240, 0x1825b948),
+     TOBN(0x1c03b345, 0x63878572), TOBN(0xa0472bea, 0x653a4184),
+     TOBN(0xf4222e27, 0x0ac69a80), TOBN(0x34096d25, 0xf51e54f6),
+     TOBN(0x00a648cb, 0x8fffa591), TOBN(0x4e87acdc, 0x69b6527f),
+     TOBN(0x0575e037, 0xe285ccb4), TOBN(0x188089e4, 0x50ddcf52),
+     TOBN(0xaa96c9a8, 0x870ff719), TOBN(0x74a56cd8, 0x1fc7e369),
+     TOBN(0x41d04ee2, 0x1726931a), TOBN(0x0bbbb2c8, 0x3660ecfd),
+     TOBN(0xa6ef6de5, 0x24818e18), TOBN(0xe421cc51, 0xe7d57887),
+     TOBN(0xf127d208, 0xbea87be6), TOBN(0x16a475d3, 0xb1cdd682),
+     TOBN(0x9db1b684, 0x439b63f7), TOBN(0x5359b3db, 0xf0f113b6),
+     TOBN(0xdfccf1de, 0x8bf06e31), TOBN(0x1fdf8f44, 0xdd383901),
+     TOBN(0x10775cad, 0x5017e7d2), TOBN(0xdfc3a597, 0x58d11eef),
+     TOBN(0x6ec9c8a0, 0xb1ecff10), TOBN(0xee6ed6cc, 0x28400549),
+     TOBN(0xb5ad7bae, 0x1b4f8d73), TOBN(0x61b4f11d, 0xe00aaab9),
+     TOBN(0x7b32d69b, 0xd4eff2d7), TOBN(0x88ae6771, 0x4288b60f),
+     TOBN(0x159461b4, 0x37a1e723), TOBN(0x1f3d4789, 0x570aae8c),
+     TOBN(0x869118c0, 0x7f9871da), TOBN(0x35fbda78, 0xf635e278),
+     TOBN(0x738f3641, 0xe1541dac), TOBN(0x6794b13a, 0xc0dae45f),
+     TOBN(0x065064ac, 0x09cc0917), TOBN(0x27c53729, 0xc68540fd),
+     TOBN(0x0d2d4c8e, 0xef227671), TOBN(0xd23a9f80, 0xa1785a04),
+     TOBN(0x98c59528, 0x52650359), TOBN(0xfa09ad01, 0x74a1acad),
+     TOBN(0x082d5a29, 0x0b55bf5c), TOBN(0xa40f1c67, 0x419b8084),
+     TOBN(0x3a5c752e, 0xdcc18770), TOBN(0x4baf1f2f, 0x8825c3a5),
+     TOBN(0xebd63f74, 0x21b153ed), TOBN(0xa2383e47, 0xb2f64723),
+     TOBN(0xe7bf620a, 0x2646d19a), TOBN(0x56cb44ec, 0x03c83ffd),
+     TOBN(0xaf7267c9, 0x4f6be9f1), TOBN(0x8b2dfd7b, 0xc06bb5e9),
+     TOBN(0xb87072f2, 0xa672c5c7), TOBN(0xeacb11c8, 0x0d53c5e2),
+     TOBN(0x22dac29d, 0xff435932), TOBN(0x37bdb99d, 0x4408693c),
+     TOBN(0xf6e62fb6, 0x2899c20f), TOBN(0x3535d512, 0x447ece24),
+     TOBN(0xfbdc6b88, 0xff577ce3), TOBN(0x726693bd, 0x190575f2),
+     TOBN(0x6772b0e5, 0xab4b35a2), TOBN(0x1d8b6001, 0xf5eeaacf),
+     TOBN(0x728f7ce4, 0x795b9580), TOBN(0x4a20ed2a, 0x41fb81da),
+     TOBN(0x9f685cd4, 0x4fec01e6), TOBN(0x3ed7ddcc, 0xa7ff50ad),
+     TOBN(0x460fd264, 0x0c2d97fd), TOBN(0x3a241426, 0xeb82f4f9),
+     TOBN(0x17d1df2c, 0x6a8ea820), TOBN(0xb2b50d3b, 0xf22cc254),
+     TOBN(0x03856cba, 0xb7291426), TOBN(0x87fd26ae, 0x04f5ee39),
+     TOBN(0x9cb696cc, 0x02bee4ba), TOBN(0x53121804, 0x06820fd6),
+     TOBN(0xa5dfc269, 0x0212e985), TOBN(0x666f7ffa, 0x160f9a09),
+     TOBN(0xc503cd33, 0xbccd9617), TOBN(0x365dede4, 0xba7730a3),
+     TOBN(0x798c6355, 0x5ddb0786), TOBN(0xa6c3200e, 0xfc9cd3bc),
+     TOBN(0x060ffb2c, 0xe5e35efd), TOBN(0x99a4e25b, 0x5555a1c1),
+     TOBN(0x11d95375, 0xf70b3751), TOBN(0x0a57354a, 0x160e1bf6),
+     TOBN(0xecb3ae4b, 0xf8e4b065), TOBN(0x07a834c4, 0x2e53022b),
+     TOBN(0x1cd300b3, 0x8692ed96), TOBN(0x16a6f792, 0x61ee14ec),
+     TOBN(0x8f1063c6, 0x6a8649ed), TOBN(0xfbcdfcfe, 0x869f3e14),
+     TOBN(0x2cfb97c1, 0x00a7b3ec), TOBN(0xcea49b3c, 0x7130c2f1),
+     TOBN(0x462d044f, 0xe9d96488), TOBN(0x4b53d52e, 0x8182a0c1),
+     TOBN(0x84b6ddd3, 0x0391e9e9), TOBN(0x80ab7b48, 0xb1741a09),
+     TOBN(0xec0e15d4, 0x27d3317f), TOBN(0x8dfc1ddb, 0x1a64671e),
+     TOBN(0x93cc5d5f, 0xd49c5b92), TOBN(0xc995d53d, 0x3674a331),
+     TOBN(0x302e41ec, 0x090090ae), TOBN(0x2278a0cc, 0xedb06830),
+     TOBN(0x1d025932, 0xfbc99690), TOBN(0x0c32fbd2, 0xb80d68da),
+     TOBN(0xd79146da, 0xf341a6c1), TOBN(0xae0ba139, 0x1bef68a0),
+     TOBN(0xc6b8a563, 0x8d774b3a), TOBN(0x1cf307bd, 0x880ba4d7),
+     TOBN(0xc033bdc7, 0x19803511), TOBN(0xa9f97b3b, 0x8888c3be),
+     TOBN(0x3d68aebc, 0x85c6d05e), TOBN(0xc3b88a9d, 0x193919eb),
+     TOBN(0x2d300748, 0xc48b0ee3), TOBN(0x7506bc7c, 0x07a746c1),
+     TOBN(0xfc48437c, 0x6e6d57f3), TOBN(0x5bd71587, 0xcfeaa91a),
+     TOBN(0xa4ed0408, 0xc1bc5225), TOBN(0xd0b946db, 0x2719226d),
+     TOBN(0x109ecd62, 0x758d2d43), TOBN(0x75c8485a, 0x2751759b),
+     TOBN(0xb0b75f49, 0x9ce4177a), TOBN(0x4fa61a1e, 0x79c10c3d),
+     TOBN(0xc062d300, 0xa167fcd7), TOBN(0x4df3874c, 0x750f0fa8),
+     TOBN(0x29ae2cf9, 0x83dfedc9), TOBN(0xf8437134, 0x8d87631a),
+     TOBN(0xaf571711, 0x7429c8d2), TOBN(0x18d15867, 0x146d9272),
+     TOBN(0x83053ecf, 0x69769bb7), TOBN(0xc55eb856, 0xc479ab82),
+     TOBN(0x5ef7791c, 0x21b0f4b2), TOBN(0xaa5956ba, 0x3d491525),
+     TOBN(0x407a96c2, 0x9fe20eba), TOBN(0xf27168bb, 0xe52a5ad3),
+     TOBN(0x43b60ab3, 0xbf1d9d89), TOBN(0xe45c51ef, 0x710e727a),
+     TOBN(0xdfca5276, 0x099b4221), TOBN(0x8dc6407c, 0x2557a159),
+     TOBN(0x0ead8335, 0x91035895), TOBN(0x0a9db957, 0x9c55dc32),
+     TOBN(0xe40736d3, 0xdf61bc76), TOBN(0x13a619c0, 0x3f778cdb),
+     TOBN(0x6dd921a4, 0xc56ea28f), TOBN(0x76a52433, 0x2fa647b4),
+     TOBN(0x23591891, 0xac5bdc5d), TOBN(0xff4a1a72, 0xbac7dc01),
+     TOBN(0x9905e261, 0x62df8453), TOBN(0x3ac045df, 0xe63b265f),
+     TOBN(0x8a3f341b, 0xad53dba7), TOBN(0x8ec269cc, 0x837b625a),
+     TOBN(0xd71a2782, 0x3ae31189), TOBN(0x8fb4f9a3, 0x55e96120),
+     TOBN(0x804af823, 0xff9875cf), TOBN(0x23224f57, 0x5d442a9b),
+     TOBN(0x1c4d3b9e, 0xecc62679), TOBN(0x91da22fb, 0xa0e7ddb1),
+     TOBN(0xa370324d, 0x6c04a661), TOBN(0x9710d3b6, 0x5e376d17),
+     TOBN(0xed8c98f0, 0x3044e357), TOBN(0xc364ebbe, 0x6422701c),
+     TOBN(0x347f5d51, 0x7733d61c), TOBN(0xd55644b9, 0xcea826c3),
+     TOBN(0x80c6e0ad, 0x55a25548), TOBN(0x0aa7641d, 0x844220a7),
+     TOBN(0x1438ec81, 0x31810660), TOBN(0x9dfa6507, 0xde4b4043),
+     TOBN(0x10b515d8, 0xcc3e0273), TOBN(0x1b6066dd, 0x28d8cfb2),
+     TOBN(0xd3b04591, 0x9c9efebd), TOBN(0x425d4bdf, 0xa21c1ff4),
+     TOBN(0x5fe5af19, 0xd57607d3), TOBN(0xbbf773f7, 0x54481084),
+     TOBN(0x8435bd69, 0x94b03ed1), TOBN(0xd9ad1de3, 0x634cc546),
+     TOBN(0x2cf423fc, 0x00e420ca), TOBN(0xeed26d80, 0xa03096dd),
+     TOBN(0xd7f60be7, 0xa4db09d2), TOBN(0xf47f569d, 0x960622f7),
+     TOBN(0xe5925fd7, 0x7296c729), TOBN(0xeff2db26, 0x26ca2715),
+     TOBN(0xa6fcd014, 0xb913e759), TOBN(0x53da4786, 0x8ff4de93),
+     TOBN(0x14616d79, 0xc32068e1), TOBN(0xb187d664, 0xccdf352e),
+     TOBN(0xf7afb650, 0x1dc90b59), TOBN(0x8170e943, 0x7daa1b26),
+     TOBN(0xc8e3bdd8, 0x700c0a84), TOBN(0x6e8d345f, 0x6482bdfa),
+     TOBN(0x84cfbfa1, 0xc5c5ea50), TOBN(0xd3baf14c, 0x67960681),
+     TOBN(0x26398403, 0x0dd50942), TOBN(0xe4b7839c, 0x4716a663),
+     TOBN(0xd5f1f794, 0xe7de6dc0), TOBN(0x5cd0f4d4, 0x622aa7ce),
+     TOBN(0x5295f3f1, 0x59acfeec), TOBN(0x8d933552, 0x953e0607),
+     TOBN(0xc7db8ec5, 0x776c5722), TOBN(0xdc467e62, 0x2b5f290c),
+     TOBN(0xd4297e70, 0x4ff425a9), TOBN(0x4be924c1, 0x0cf7bb72),
+     TOBN(0x0d5dc5ae, 0xa1892131), TOBN(0x8bf8a8e3, 0xa705c992),
+     TOBN(0x73a0b064, 0x7a305ac5), TOBN(0x00c9ca4e, 0x9a8c77a8),
+     TOBN(0x5dfee80f, 0x83774bdd), TOBN(0x63131602, 0x85734485),
+     TOBN(0xa1b524ae, 0x914a69a9), TOBN(0xebc2ffaf, 0xd4e300d7),
+     TOBN(0x52c93db7, 0x7cfa46a5), TOBN(0x71e6161f, 0x21653b50),
+     TOBN(0x3574fc57, 0xa4bc580a), TOBN(0xc09015dd, 0xe1bc1253),
+     TOBN(0x4b7b47b2, 0xd174d7aa), TOBN(0x4072d8e8, 0xf3a15d04),
+     TOBN(0xeeb7d47f, 0xd6fa07ed), TOBN(0x6f2b9ff9, 0xedbdafb1),
+     TOBN(0x18c51615, 0x3760fe8a), TOBN(0x7a96e6bf, 0xf06c6c13),
+     TOBN(0x4d7a0410, 0x0ea2d071), TOBN(0xa1914e9b, 0x0be2a5ce),
+     TOBN(0x5726e357, 0xd8a3c5cf), TOBN(0x1197ecc3, 0x2abb2b13),
+     TOBN(0x6c0d7f7f, 0x31ae88dd), TOBN(0x15b20d1a, 0xfdbb3efe),
+     TOBN(0xcd06aa26, 0x70584039), TOBN(0x2277c969, 0xa7dc9747),
+     TOBN(0xbca69587, 0x7855d815), TOBN(0x899ea238, 0x5188b32a),
+     TOBN(0x37d9228b, 0x760c1c9d), TOBN(0xc7efbb11, 0x9b5c18da),
+     TOBN(0x7f0d1bc8, 0x19f6dbc5), TOBN(0x4875384b, 0x07e6905b),
+     TOBN(0xc7c50baa, 0x3ba8cd86), TOBN(0xb0ce40fb, 0xc2905de0),
+     TOBN(0x70840673, 0x7a231952), TOBN(0xa912a262, 0xcf43de26),
+     TOBN(0x9c38ddcc, 0xeb5b76c1), TOBN(0x746f5285, 0x26fc0ab4),
+     TOBN(0x52a63a50, 0xd62c269f), TOBN(0x60049c55, 0x99458621),
+     TOBN(0xe7f48f82, 0x3c2f7c9e), TOBN(0x6bd99043, 0x917d5cf3),
+     TOBN(0xeb1317a8, 0x8701f469), TOBN(0xbd3fe2ed, 0x9a449fe0),
+     TOBN(0x421e79ca, 0x12ef3d36), TOBN(0x9ee3c36c, 0x3e7ea5de),
+     TOBN(0xe48198b5, 0xcdff36f7), TOBN(0xaff4f967, 0xc6b82228),
+     TOBN(0x15e19dd0, 0xc47adb7e), TOBN(0x45699b23, 0x032e7dfa),
+     TOBN(0x40680c8b, 0x1fae026a), TOBN(0x5a347a48, 0x550dbf4d),
+     TOBN(0xe652533b, 0x3cef0d7d), TOBN(0xd94f7b18, 0x2bbb4381),
+     TOBN(0x838752be, 0x0e80f500), TOBN(0x8e6e2488, 0x9e9c9bfb),
+     TOBN(0xc9751697, 0x16caca6a), TOBN(0x866c49d8, 0x38531ad9),
+     TOBN(0xc917e239, 0x7151ade1), TOBN(0x2d016ec1, 0x6037c407),
+     TOBN(0xa407ccc9, 0x00eac3f9), TOBN(0x835f6280, 0xe2ed4748),
+     TOBN(0xcc54c347, 0x1cc98e0d), TOBN(0x0e969937, 0xdcb572eb),
+     TOBN(0x1b16c8e8, 0x8f30c9cb), TOBN(0xa606ae75, 0x373c4661),
+     TOBN(0x47aa689b, 0x35502cab), TOBN(0xf89014ae, 0x4d9bb64f),
+     TOBN(0x202f6a9c, 0x31c71f7b), TOBN(0x01f95aa3, 0x296ffe5c),
+     TOBN(0x5fc06014, 0x53cec3a3), TOBN(0xeb991237, 0x5f498a45),
+     TOBN(0xae9a935e, 0x5d91ba87), TOBN(0xc6ac6281, 0x0b564a19),
+     TOBN(0x8a8fe81c, 0x3bd44e69), TOBN(0x7c8b467f, 0x9dd11d45),
+     TOBN(0xf772251f, 0xea5b8e69), TOBN(0xaeecb3bd, 0xc5b75fbc),
+     TOBN(0x1aca3331, 0x887ff0e5), TOBN(0xbe5d49ff, 0x19f0a131),
+     TOBN(0x582c13aa, 0xe5c8646f), TOBN(0xdbaa12e8, 0x20e19980),
+     TOBN(0x8f40f31a, 0xf7abbd94), TOBN(0x1f13f5a8, 0x1dfc7663),
+     TOBN(0x5d81f1ee, 0xaceb4fc0), TOBN(0x36256002, 0x5e6f0f42),
+     TOBN(0x4b67d6d7, 0x751370c8), TOBN(0x2608b698, 0x03e80589),
+     TOBN(0xcfc0d2fc, 0x05268301), TOBN(0xa6943d39, 0x40309212),
+     TOBN(0x192a90c2, 0x1fd0e1c2), TOBN(0xb209f113, 0x37f1dc76),
+     TOBN(0xefcc5e06, 0x97bf1298), TOBN(0xcbdb6730, 0x219d639e),
+     TOBN(0xd009c116, 0xb81e8c6f), TOBN(0xa3ffdde3, 0x1a7ce2e5),
+     TOBN(0xc53fbaaa, 0xa914d3ba), TOBN(0x836d500f, 0x88df85ee),
+     TOBN(0xd98dc71b, 0x66ee0751), TOBN(0x5a3d7005, 0x714516fd),
+     TOBN(0x21d3634d, 0x39eedbba), TOBN(0x35cd2e68, 0x0455a46d),
+     TOBN(0xc8cafe65, 0xf9d7eb0c), TOBN(0xbda3ce9e, 0x00cefb3e),
+     TOBN(0xddc17a60, 0x2c9cf7a4), TOBN(0x01572ee4, 0x7bcb8773),
+     TOBN(0xa92b2b01, 0x8c7548df), TOBN(0x732fd309, 0xa84600e3),
+     TOBN(0xe22109c7, 0x16543a40), TOBN(0x9acafd36, 0xfede3c6c),
+     TOBN(0xfb206852, 0x6824e614), TOBN(0x2a4544a9, 0xda25dca0),
+     TOBN(0x25985262, 0x91d60b06), TOBN(0x281b7be9, 0x28753545),
+     TOBN(0xec667b1a, 0x90f13b27), TOBN(0x33a83aff, 0x940e2eb4),
+     TOBN(0x80009862, 0xd5d721d5), TOBN(0x0c3357a3, 0x5bd3a182),
+     TOBN(0x27f3a83b, 0x7aa2cda4), TOBN(0xb58ae74e, 0xf6f83085),
+     TOBN(0x2a911a81, 0x2e6dad6b), TOBN(0xde286051, 0xf43d6c5b),
+     TOBN(0x4bdccc41, 0xf996c4d8), TOBN(0xe7312ec0, 0x0ae1e24e)}
+    ,
+    {TOBN(0xf8d112e7, 0x6e6485b3), TOBN(0x4d3e24db, 0x771c52f8),
+     TOBN(0x48e3ee41, 0x684a2f6d), TOBN(0x7161957d, 0x21d95551),
+     TOBN(0x19631283, 0xcdb12a6c), TOBN(0xbf3fa882, 0x2e50e164),
+     TOBN(0xf6254b63, 0x3166cc73), TOBN(0x3aefa7ae, 0xaee8cc38),
+     TOBN(0x79b0fe62, 0x3b36f9fd), TOBN(0x26543b23, 0xfde19fc0),
+     TOBN(0x136e64a0, 0x958482ef), TOBN(0x23f63771, 0x9b095825),
+     TOBN(0x14cfd596, 0xb6a1142e), TOBN(0x5ea6aac6, 0x335aac0b),
+     TOBN(0x86a0e8bd, 0xf3081dd5), TOBN(0x5fb89d79, 0x003dc12a),
+     TOBN(0xf615c33a, 0xf72e34d4), TOBN(0x0bd9ea40, 0x110eec35),
+     TOBN(0x1c12bc5b, 0xc1dea34e), TOBN(0x686584c9, 0x49ae4699),
+     TOBN(0x13ad95d3, 0x8c97b942), TOBN(0x4609561a, 0x4e5c7562),
+     TOBN(0x9e94a4ae, 0xf2737f89), TOBN(0xf57594c6, 0x371c78b6),
+     TOBN(0x0f0165fc, 0xe3779ee3), TOBN(0xe00e7f9d, 0xbd495d9e),
+     TOBN(0x1fa4efa2, 0x20284e7a), TOBN(0x4564bade, 0x47ac6219),
+     TOBN(0x90e6312a, 0xc4708e8e), TOBN(0x4f5725fb, 0xa71e9adf),
+     TOBN(0xe95f55ae, 0x3d684b9f), TOBN(0x47f7ccb1, 0x1e94b415),
+     TOBN(0x7322851b, 0x8d946581), TOBN(0xf0d13133, 0xbdf4a012),
+     TOBN(0xa3510f69, 0x6584dae0), TOBN(0x03a7c171, 0x3c9f6c6d),
+     TOBN(0x5be97f38, 0xe475381a), TOBN(0xca1ba422, 0x85823334),
+     TOBN(0xf83cc5c7, 0x0be17dda), TOBN(0x158b1494, 0x0b918c0f),
+     TOBN(0xda3a77e5, 0x522e6b69), TOBN(0x69c908c3, 0xbbcd6c18),
+     TOBN(0x1f1b9e48, 0xd924fd56), TOBN(0x37c64e36, 0xaa4bb3f7),
+     TOBN(0x5a4fdbdf, 0xee478d7d), TOBN(0xba75c8bc, 0x0193f7a0),
+     TOBN(0x84bc1e84, 0x56cd16df), TOBN(0x1fb08f08, 0x46fad151),
+     TOBN(0x8a7cabf9, 0x842e9f30), TOBN(0xa331d4bf, 0x5eab83af),
+     TOBN(0xd272cfba, 0x017f2a6a), TOBN(0x27560abc, 0x83aba0e3),
+     TOBN(0x94b83387, 0x0e3a6b75), TOBN(0x25c6aea2, 0x6b9f50f5),
+     TOBN(0x803d691d, 0xb5fdf6d0), TOBN(0x03b77509, 0xe6333514),
+     TOBN(0x36178903, 0x61a341c1), TOBN(0x3604dc60, 0x0cfd6142),
+     TOBN(0x022295eb, 0x8533316c), TOBN(0x3dbde4ac, 0x44af2922),
+     TOBN(0x898afc5d, 0x1c7eef69), TOBN(0x58896805, 0xd14f4fa1),
+     TOBN(0x05002160, 0x203c21ca), TOBN(0x6f0d1f30, 0x40ef730b),
+     TOBN(0x8e8c44d4, 0x196224f8), TOBN(0x75a4ab95, 0x374d079d),
+     TOBN(0x79085ecc, 0x7d48f123), TOBN(0x56f04d31, 0x1bf65ad8),
+     TOBN(0xe220bf1c, 0xbda602b2), TOBN(0x73ee1742, 0xf9612c69),
+     TOBN(0x76008fc8, 0x084fd06b), TOBN(0x4000ef9f, 0xf11380d1),
+     TOBN(0x48201b4b, 0x12cfe297), TOBN(0x3eee129c, 0x292f74e5),
+     TOBN(0xe1fe114e, 0xc9e874e8), TOBN(0x899b055c, 0x92c5fc41),
+     TOBN(0x4e477a64, 0x3a39c8cf), TOBN(0x82f09efe, 0x78963cc9),
+     TOBN(0x6fd3fd8f, 0xd333f863), TOBN(0x85132b2a, 0xdc949c63),
+     TOBN(0x7e06a3ab, 0x516eb17b), TOBN(0x73bec06f, 0xd2c7372b),
+     TOBN(0xe4f74f55, 0xba896da6), TOBN(0xbb4afef8, 0x8e9eb40f),
+     TOBN(0x2d75bec8, 0xe61d66b0), TOBN(0x02bda4b4, 0xef29300b),
+     TOBN(0x8bbaa8de, 0x026baa5a), TOBN(0xff54befd, 0xa07f4440),
+     TOBN(0xbd9b8b1d, 0xbe7a2af3), TOBN(0xec51caa9, 0x4fb74a72),
+     TOBN(0xb9937a4b, 0x63879697), TOBN(0x7c9a9d20, 0xec2687d5),
+     TOBN(0x1773e44f, 0x6ef5f014), TOBN(0x8abcf412, 0xe90c6900),
+     TOBN(0x387bd022, 0x8142161e), TOBN(0x50393755, 0xfcb6ff2a),
+     TOBN(0x9813fd56, 0xed6def63), TOBN(0x53cf6482, 0x7d53106c),
+     TOBN(0x991a35bd, 0x431f7ac1), TOBN(0xf1e274dd, 0x63e65faf),
+     TOBN(0xf63ffa3c, 0x44cc7880), TOBN(0x411a426b, 0x7c256981),
+     TOBN(0xb698b9fd, 0x93a420e0), TOBN(0x89fdddc0, 0xae53f8fe),
+     TOBN(0x766e0722, 0x32398baa), TOBN(0x205fee42, 0x5cfca031),
+     TOBN(0xa49f5341, 0x7a029cf2), TOBN(0xa88c68b8, 0x4023890d),
+     TOBN(0xbc275041, 0x7337aaa8), TOBN(0x9ed364ad, 0x0eb384f4),
+     TOBN(0xe0816f85, 0x29aba92f), TOBN(0x2e9e1941, 0x04e38a88),
+     TOBN(0x57eef44a, 0x3dafd2d5), TOBN(0x35d1fae5, 0x97ed98d8),
+     TOBN(0x50628c09, 0x2307f9b1), TOBN(0x09d84aae, 0xd6cba5c6),
+     TOBN(0x67071bc7, 0x88aaa691), TOBN(0x2dea57a9, 0xafe6cb03),
+     TOBN(0xdfe11bb4, 0x3d78ac01), TOBN(0x7286418c, 0x7fd7aa51),
+     TOBN(0xfabf7709, 0x77f7195a), TOBN(0x8ec86167, 0xadeb838f),
+     TOBN(0xea1285a8, 0xbb4f012d), TOBN(0xd6883503, 0x9a3eab3f),
+     TOBN(0xee5d24f8, 0x309004c2), TOBN(0xa96e4b76, 0x13ffe95e),
+     TOBN(0x0cdffe12, 0xbd223ea4), TOBN(0x8f5c2ee5, 0xb6739a53),
+     TOBN(0x5cb4aaa5, 0xdd968198), TOBN(0xfa131c52, 0x72413a6c),
+     TOBN(0x53d46a90, 0x9536d903), TOBN(0xb270f0d3, 0x48606d8e),
+     TOBN(0x518c7564, 0xa053a3bc), TOBN(0x088254b7, 0x1a86caef),
+     TOBN(0xb3ba8cb4, 0x0ab5efd0), TOBN(0x5c59900e, 0x4605945d),
+     TOBN(0xecace1dd, 0xa1887395), TOBN(0x40960f36, 0x932a65de),
+     TOBN(0x9611ff5c, 0x3aa95529), TOBN(0xc58215b0, 0x7c1e5a36),
+     TOBN(0xd48c9b58, 0xf0e1a524), TOBN(0xb406856b, 0xf590dfb8),
+     TOBN(0xc7605e04, 0x9cd95662), TOBN(0x0dd036ee, 0xa33ecf82),
+     TOBN(0xa50171ac, 0xc33156b3), TOBN(0xf09d24ea, 0x4a80172e),
+     TOBN(0x4e1f72c6, 0x76dc8eef), TOBN(0xe60caadc, 0x5e3d44ee),
+     TOBN(0x006ef8a6, 0x979b1d8f), TOBN(0x60908a1c, 0x97788d26),
+     TOBN(0x6e08f95b, 0x266feec0), TOBN(0x618427c2, 0x22e8c94e),
+     TOBN(0x3d613339, 0x59145a65), TOBN(0xcd9bc368, 0xfa406337),
+     TOBN(0x82d11be3, 0x2d8a52a0), TOBN(0xf6877b27, 0x97a1c590),
+     TOBN(0x837a819b, 0xf5cbdb25), TOBN(0x2a4fd1d8, 0xde090249),
+     TOBN(0x622a7de7, 0x74990e5f), TOBN(0x840fa5a0, 0x7945511b),
+     TOBN(0x30b974be, 0x6558842d), TOBN(0x70df8c64, 0x17f3d0a6),
+     TOBN(0x7c803520, 0x7542e46d), TOBN(0x7251fe7f, 0xe4ecc823),
+     TOBN(0xe59134cb, 0x5e9aac9a), TOBN(0x11bb0934, 0xf0045d71),
+     TOBN(0x53e5d9b5, 0xdbcb1d4e), TOBN(0x8d97a905, 0x92defc91),
+     TOBN(0xfe289327, 0x7946d3f9), TOBN(0xe132bd24, 0x07472273),
+     TOBN(0xeeeb510c, 0x1eb6ae86), TOBN(0x777708c5, 0xf0595067),
+     TOBN(0x18e2c8cd, 0x1297029e), TOBN(0x2c61095c, 0xbbf9305e),
+     TOBN(0xe466c258, 0x6b85d6d9), TOBN(0x8ac06c36, 0xda1ea530),
+     TOBN(0xa365dc39, 0xa1304668), TOBN(0xe4a9c885, 0x07f89606),
+     TOBN(0x65a4898f, 0xacc7228d), TOBN(0x3e2347ff, 0x84ca8303),
+     TOBN(0xa5f6fb77, 0xea7d23a3), TOBN(0x2fac257d, 0x672a71cd),
+     TOBN(0x6908bef8, 0x7e6a44d3), TOBN(0x8ff87566, 0x891d3d7a),
+     TOBN(0xe58e90b3, 0x6b0cf82e), TOBN(0x6438d246, 0x2615b5e7),
+     TOBN(0x07b1f8fc, 0x669c145a), TOBN(0xb0d8b2da, 0x36f1e1cb),
+     TOBN(0x54d5dadb, 0xd9184c4d), TOBN(0x3dbb18d5, 0xf93d9976),
+     TOBN(0x0a3e0f56, 0xd1147d47), TOBN(0x2afa8c8d, 0xa0a48609),
+     TOBN(0x275353e8, 0xbc36742c), TOBN(0x898f427e, 0xeea0ed90),
+     TOBN(0x26f4947e, 0x3e477b00), TOBN(0x8ad8848a, 0x308741e3),
+     TOBN(0x6c703c38, 0xd74a2a46), TOBN(0x5e3e05a9, 0x9ba17ba2),
+     TOBN(0xc1fa6f66, 0x4ab9a9e4), TOBN(0x474a2d9a, 0x3841d6ec),
+     TOBN(0x871239ad, 0x653ae326), TOBN(0x14bcf72a, 0xa74cbb43),
+     TOBN(0x8737650e, 0x20d4c083), TOBN(0x3df86536, 0x110ed4af),
+     TOBN(0xd2d86fe7, 0xb53ca555), TOBN(0x688cb00d, 0xabd5d538),
+     TOBN(0xcf81bda3, 0x1ad38468), TOBN(0x7ccfe3cc, 0xf01167b6),
+     TOBN(0xcf4f47e0, 0x6c4c1fe6), TOBN(0x557e1f1a, 0x298bbb79),
+     TOBN(0xf93b974f, 0x30d45a14), TOBN(0x174a1d2d, 0x0baf97c4),
+     TOBN(0x7a003b30, 0xc51fbf53), TOBN(0xd8940991, 0xee68b225),
+     TOBN(0x5b0aa7b7, 0x1c0f4173), TOBN(0x975797c9, 0xa20a7153),
+     TOBN(0x26e08c07, 0xe3533d77), TOBN(0xd7222e6a, 0x2e341c99),
+     TOBN(0x9d60ec3d, 0x8d2dc4ed), TOBN(0xbdfe0d8f, 0x7c476cf8),
+     TOBN(0x1fe59ab6, 0x1d056605), TOBN(0xa9ea9df6, 0x86a8551f),
+     TOBN(0x8489941e, 0x47fb8d8c), TOBN(0xfeb874eb, 0x4a7f1b10),
+     TOBN(0xfe5fea86, 0x7ee0d98f), TOBN(0x201ad34b, 0xdbf61864),
+     TOBN(0x45d8fe47, 0x37c031d4), TOBN(0xd5f49fae, 0x795f0822),
+     TOBN(0xdb0fb291, 0xc7f4a40c), TOBN(0x2e69d9c1, 0x730ddd92),
+     TOBN(0x754e1054, 0x49d76987), TOBN(0x8a24911d, 0x7662db87),
+     TOBN(0x61fc1810, 0x60a71676), TOBN(0xe852d1a8, 0xf66a8ad1),
+     TOBN(0x172bbd65, 0x6417231e), TOBN(0x0d6de7bd, 0x3babb11f),
+     TOBN(0x6fde6f88, 0xc8e347f8), TOBN(0x1c587547, 0x9bd99cc3),
+     TOBN(0x78e54ed0, 0x34076950), TOBN(0x97f0f334, 0x796e83ba),
+     TOBN(0xe4dbe1ce, 0x4924867a), TOBN(0xbd5f51b0, 0x60b84917),
+     TOBN(0x37530040, 0x3cb09a79), TOBN(0xdb3fe0f8, 0xff1743d8),
+     TOBN(0xed7894d8, 0x556fa9db), TOBN(0xfa262169, 0x23412fbf),
+     TOBN(0x563be0db, 0xba7b9291), TOBN(0x6ca8b8c0, 0x0c9fb234),
+     TOBN(0xed406aa9, 0xbd763802), TOBN(0xc21486a0, 0x65303da1),
+     TOBN(0x61ae291e, 0xc7e62ec4), TOBN(0x622a0492, 0xdf99333e),
+     TOBN(0x7fd80c9d, 0xbb7a8ee0), TOBN(0xdc2ed3bc, 0x6c01aedb),
+     TOBN(0x35c35a12, 0x08be74ec), TOBN(0xd540cb1a, 0x469f671f),
+     TOBN(0xd16ced4e, 0xcf84f6c7), TOBN(0x8561fb9c, 0x2d090f43),
+     TOBN(0x7e693d79, 0x6f239db4), TOBN(0xa736f928, 0x77bd0d94),
+     TOBN(0x07b4d929, 0x2c1950ee), TOBN(0xda177543, 0x56dc11b3),
+     TOBN(0xa5dfbbaa, 0x7a6a878e), TOBN(0x1c70cb29, 0x4decb08a),
+     TOBN(0xfba28c8b, 0x6f0f7c50), TOBN(0xa8eba2b8, 0x854dcc6d),
+     TOBN(0x5ff8e89a, 0x36b78642), TOBN(0x070c1c8e, 0xf6873adf),
+     TOBN(0xbbd3c371, 0x6484d2e4), TOBN(0xfb78318f, 0x0d414129),
+     TOBN(0x2621a39c, 0x6ad93b0b), TOBN(0x979d74c2, 0xa9e917f7),
+     TOBN(0xfc195647, 0x61fb0428), TOBN(0x4d78954a, 0xbee624d4),
+     TOBN(0xb94896e0, 0xb8ae86fd), TOBN(0x6667ac0c, 0xc91c8b13),
+     TOBN(0x9f180512, 0x43bcf832), TOBN(0xfbadf8b7, 0xa0010137),
+     TOBN(0xc69b4089, 0xb3ba8aa7), TOBN(0xfac4bacd, 0xe687ce85),
+     TOBN(0x9164088d, 0x977eab40), TOBN(0x51f4c5b6, 0x2760b390),
+     TOBN(0xd238238f, 0x340dd553), TOBN(0x358566c3, 0xdb1d31c9),
+     TOBN(0x3a5ad69e, 0x5068f5ff), TOBN(0xf31435fc, 0xdaff6b06),
+     TOBN(0xae549a5b, 0xd6debff0), TOBN(0x59e5f0b7, 0x75e01331),
+     TOBN(0x5d492fb8, 0x98559acf), TOBN(0x96018c2e, 0x4db79b50),
+     TOBN(0x55f4a48f, 0x609f66aa), TOBN(0x1943b3af, 0x4900a14f),
+     TOBN(0xc22496df, 0x15a40d39), TOBN(0xb2a44684, 0x4c20f7c5),
+     TOBN(0x76a35afa, 0x3b98404c), TOBN(0xbec75725, 0xff5d1b77),
+     TOBN(0xb67aa163, 0xbea06444), TOBN(0x27e95bb2, 0xf724b6f2),
+     TOBN(0x3c20e3e9, 0xd238c8ab), TOBN(0x1213754e, 0xddd6ae17),
+     TOBN(0x8c431020, 0x716e0f74), TOBN(0x6679c82e, 0xffc095c2),
+     TOBN(0x2eb3adf4, 0xd0ac2932), TOBN(0x2cc970d3, 0x01bb7a76),
+     TOBN(0x70c71f2f, 0x740f0e66), TOBN(0x545c616b, 0x2b6b23cc),
+     TOBN(0x4528cfcb, 0xb40a8bd7), TOBN(0xff839633, 0x2ab27722),
+     TOBN(0x049127d9, 0x025ac99a), TOBN(0xd314d4a0, 0x2b63e33b),
+     TOBN(0xc8c310e7, 0x28d84519), TOBN(0x0fcb8983, 0xb3bc84ba),
+     TOBN(0x2cc52261, 0x38634818), TOBN(0x501814f4, 0xb44c2e0b),
+     TOBN(0xf7e181aa, 0x54dfdba3), TOBN(0xcfd58ff0, 0xe759718c),
+     TOBN(0xf90cdb14, 0xd3b507a8), TOBN(0x57bd478e, 0xc50bdad8),
+     TOBN(0x29c197e2, 0x50e5f9aa), TOBN(0x4db6eef8, 0xe40bc855),
+     TOBN(0x2cc8f21a, 0xd1fc0654), TOBN(0xc71cc963, 0x81269d73),
+     TOBN(0xecfbb204, 0x077f49f9), TOBN(0xdde92571, 0xca56b793),
+     TOBN(0x9abed6a3, 0xf97ad8f7), TOBN(0xe6c19d3f, 0x924de3bd),
+     TOBN(0x8dce92f4, 0xa140a800), TOBN(0x85f44d1e, 0x1337af07),
+     TOBN(0x5953c08b, 0x09d64c52), TOBN(0xa1b5e49f, 0xf5df9749),
+     TOBN(0x336a8fb8, 0x52735f7d), TOBN(0xb332b6db, 0x9add676b),
+     TOBN(0x558b88a0, 0xb4511aa4), TOBN(0x09788752, 0xdbd5cc55),
+     TOBN(0x16b43b9c, 0xd8cd52bd), TOBN(0x7f0bc5a0, 0xc2a2696b),
+     TOBN(0x146e12d4, 0xc11f61ef), TOBN(0x9ce10754, 0x3a83e79e),
+     TOBN(0x08ec73d9, 0x6cbfca15), TOBN(0x09ff29ad, 0x5b49653f),
+     TOBN(0xe31b72bd, 0xe7da946e), TOBN(0xebf9eb3b, 0xee80a4f2),
+     TOBN(0xd1aabd08, 0x17598ce4), TOBN(0x18b5fef4, 0x53f37e80),
+     TOBN(0xd5d5cdd3, 0x5958cd79), TOBN(0x3580a1b5, 0x1d373114),
+     TOBN(0xa36e4c91, 0xfa935726), TOBN(0xa38c534d, 0xef20d760),
+     TOBN(0x7088e40a, 0x2ff5845b), TOBN(0xe5bb40bd, 0xbd78177f),
+     TOBN(0x4f06a7a8, 0x857f9920), TOBN(0xe3cc3e50, 0xe968f05d),
+     TOBN(0x1d68b7fe, 0xe5682d26), TOBN(0x5206f76f, 0xaec7f87c),
+     TOBN(0x41110530, 0x041951ab), TOBN(0x58ec52c1, 0xd4b5a71a),
+     TOBN(0xf3488f99, 0x0f75cf9a), TOBN(0xf411951f, 0xba82d0d5),
+     TOBN(0x27ee75be, 0x618895ab), TOBN(0xeae060d4, 0x6d8aab14),
+     TOBN(0x9ae1df73, 0x7fb54dc2), TOBN(0x1f3e391b, 0x25963649),
+     TOBN(0x242ec32a, 0xfe055081), TOBN(0x5bd450ef, 0x8491c9bd),
+     TOBN(0x367efc67, 0x981eb389), TOBN(0xed7e1928, 0x3a0550d5),
+     TOBN(0x362e776b, 0xab3ce75c), TOBN(0xe890e308, 0x1f24c523),
+     TOBN(0xb961b682, 0xfeccef76), TOBN(0x8b8e11f5, 0x8bba6d92),
+     TOBN(0x8f2ccc4c, 0x2b2375c4), TOBN(0x0d7f7a52, 0xe2f86cfa),
+     TOBN(0xfd94d30a, 0x9efe5633), TOBN(0x2d8d246b, 0x5451f934),
+     TOBN(0x2234c6e3, 0x244e6a00), TOBN(0xde2b5b0d, 0xddec8c50),
+     TOBN(0x2ce53c5a, 0xbf776f5b), TOBN(0x6f724071, 0x60357b05),
+     TOBN(0xb2593717, 0x71bf3f7a), TOBN(0x87d2501c, 0x440c4a9f),
+     TOBN(0x440552e1, 0x87b05340), TOBN(0xb7bf7cc8, 0x21624c32),
+     TOBN(0x4155a6ce, 0x22facddb), TOBN(0x5a4228cb, 0x889837ef),
+     TOBN(0xef87d6d6, 0xfd4fd671), TOBN(0xa233687e, 0xc2daa10e),
+     TOBN(0x75622244, 0x03c0eb96), TOBN(0x7632d184, 0x8bf19be6),
+     TOBN(0x05d0f8e9, 0x40735ff4), TOBN(0x3a3e6e13, 0xc00931f1),
+     TOBN(0x31ccde6a, 0xdafe3f18), TOBN(0xf381366a, 0xcfe51207),
+     TOBN(0x24c222a9, 0x60167d92), TOBN(0x62f9d6f8, 0x7529f18c),
+     TOBN(0x412397c0, 0x0353b114), TOBN(0x334d89dc, 0xef808043),
+     TOBN(0xd9ec63ba, 0x2a4383ce), TOBN(0xcec8e937, 0x5cf92ba0),
+     TOBN(0xfb8b4288, 0xc8be74c0), TOBN(0x67d6912f, 0x105d4391),
+     TOBN(0x7b996c46, 0x1b913149), TOBN(0x36aae2ef, 0x3a4e02da),
+     TOBN(0xb68aa003, 0x972de594), TOBN(0x284ec70d, 0x4ec6d545),
+     TOBN(0xf3d2b2d0, 0x61391d54), TOBN(0x69c5d5d6, 0xfe114e92),
+     TOBN(0xbe0f00b5, 0xb4482dff), TOBN(0xe1596fa5, 0xf5bf33c5),
+     TOBN(0x10595b56, 0x96a71cba), TOBN(0x944938b2, 0xfdcadeb7),
+     TOBN(0xa282da4c, 0xfccd8471), TOBN(0x98ec05f3, 0x0d37bfe1),
+     TOBN(0xe171ce1b, 0x0698304a), TOBN(0x2d691444, 0x21bdf79b),
+     TOBN(0xd0cd3b74, 0x1b21dec1), TOBN(0x712ecd8b, 0x16a15f71),
+     TOBN(0x8d4c00a7, 0x00fd56e1), TOBN(0x02ec9692, 0xf9527c18),
+     TOBN(0x21c44937, 0x4a3e42e1), TOBN(0x9176fbab, 0x1392ae0a),
+     TOBN(0x8726f1ba, 0x44b7b618), TOBN(0xb4d7aae9, 0xf1de491c),
+     TOBN(0xf91df7b9, 0x07b582c0), TOBN(0x7e116c30, 0xef60aa3a),
+     TOBN(0x99270f81, 0x466265d7), TOBN(0xb15b6fe2, 0x4df7adf0),
+     TOBN(0xfe33b2d3, 0xf9738f7f), TOBN(0x48553ab9, 0xd6d70f95),
+     TOBN(0x2cc72ac8, 0xc21e94db), TOBN(0x795ac38d, 0xbdc0bbee),
+     TOBN(0x0a1be449, 0x2e40478f), TOBN(0x81bd3394, 0x052bde55),
+     TOBN(0x63c8dbe9, 0x56b3c4f2), TOBN(0x017a99cf, 0x904177cc),
+     TOBN(0x947bbddb, 0x4d010fc1), TOBN(0xacf9b00b, 0xbb2c9b21),
+     TOBN(0x2970bc8d, 0x47173611), TOBN(0x1a4cbe08, 0xac7d756f),
+     TOBN(0x06d9f4aa, 0x67d541a2), TOBN(0xa3e8b689, 0x59c2cf44),
+     TOBN(0xaad066da, 0x4d88f1dd), TOBN(0xc604f165, 0x7ad35dea),
+     TOBN(0x7edc0720, 0x4478ca67), TOBN(0xa10dfae0, 0xba02ce06),
+     TOBN(0xeceb1c76, 0xaf36f4e4), TOBN(0x994b2292, 0xaf3f8f48),
+     TOBN(0xbf9ed77b, 0x77c8a68c), TOBN(0x74f544ea, 0x51744c9d),
+     TOBN(0x82d05bb9, 0x8113a757), TOBN(0x4ef2d2b4, 0x8a9885e4),
+     TOBN(0x1e332be5, 0x1aa7865f), TOBN(0x22b76b18, 0x290d1a52),
+     TOBN(0x308a2310, 0x44351683), TOBN(0x9d861896, 0xa3f22840),
+     TOBN(0x5959ddcd, 0x841ed947), TOBN(0x0def0c94, 0x154b73bf),
+     TOBN(0xf0105417, 0x4c7c15e0), TOBN(0x539bfb02, 0x3a277c32),
+     TOBN(0xe699268e, 0xf9dccf5f), TOBN(0x9f5796a5, 0x0247a3bd),
+     TOBN(0x8b839de8, 0x4f157269), TOBN(0xc825c1e5, 0x7a30196b),
+     TOBN(0x6ef0aabc, 0xdc8a5a91), TOBN(0xf4a8ce6c, 0x498b7fe6),
+     TOBN(0x1cce35a7, 0x70cbac78), TOBN(0x83488e9b, 0xf6b23958),
+     TOBN(0x0341a070, 0xd76cb011), TOBN(0xda6c9d06, 0xae1b2658),
+     TOBN(0xb701fb30, 0xdd648c52), TOBN(0x994ca02c, 0x52fb9fd1),
+     TOBN(0x06933117, 0x6f563086), TOBN(0x3d2b8100, 0x17856bab),
+     TOBN(0xe89f48c8, 0x5963a46e), TOBN(0x658ab875, 0xa99e61c7),
+     TOBN(0x6e296f87, 0x4b8517b4), TOBN(0x36c4fcdc, 0xfc1bc656),
+     TOBN(0xde5227a1, 0xa3906def), TOBN(0x9fe95f57, 0x62418945),
+     TOBN(0x20c91e81, 0xfdd96cde), TOBN(0x5adbe47e, 0xda4480de),
+     TOBN(0xa009370f, 0x396de2b6), TOBN(0x98583d4b, 0xf0ecc7bd),
+     TOBN(0xf44f6b57, 0xe51d0672), TOBN(0x03d6b078, 0x556b1984),
+     TOBN(0x27dbdd93, 0xb0b64912), TOBN(0x9b3a3434, 0x15687b09),
+     TOBN(0x0dba6461, 0x51ec20a9), TOBN(0xec93db7f, 0xff28187c),
+     TOBN(0x00ff8c24, 0x66e48bdd), TOBN(0x2514f2f9, 0x11ccd78e),
+     TOBN(0xeba11f4f, 0xe1250603), TOBN(0x8a22cd41, 0x243fa156),
+     TOBN(0xa4e58df4, 0xb283e4c6), TOBN(0x78c29859, 0x8b39783f),
+     TOBN(0x5235aee2, 0xa5259809), TOBN(0xc16284b5, 0x0e0227dd),
+     TOBN(0xa5f57916, 0x1338830d), TOBN(0x6d4b8a6b, 0xd2123fca),
+     TOBN(0x236ea68a, 0xf9c546f8), TOBN(0xc1d36873, 0xfa608d36),
+     TOBN(0xcd76e495, 0x8d436d13), TOBN(0xd4d9c221, 0x8fb080af),
+     TOBN(0x665c1728, 0xe8ad3fb5), TOBN(0xcf1ebe4d, 0xb3d572e0),
+     TOBN(0xa7a8746a, 0x584c5e20), TOBN(0x267e4ea1, 0xb9dc7035),
+     TOBN(0x593a15cf, 0xb9548c9b), TOBN(0x5e6e2135, 0x4bd012f3),
+     TOBN(0xdf31cc6a, 0x8c8f936e), TOBN(0x8af84d04, 0xb5c241dc),
+     TOBN(0x63990a6f, 0x345efb86), TOBN(0x6fef4e61, 0xb9b962cb)}
+    ,
+    {TOBN(0xf6368f09, 0x25722608), TOBN(0x131260db, 0x131cf5c6),
+     TOBN(0x40eb353b, 0xfab4f7ac), TOBN(0x85c78880, 0x37eee829),
+     TOBN(0x4c1581ff, 0xc3bdf24e), TOBN(0x5bff75cb, 0xf5c3c5a8),
+     TOBN(0x35e8c83f, 0xa14e6f40), TOBN(0xb81d1c0f, 0x0295e0ca),
+     TOBN(0xfcde7cc8, 0xf43a730f), TOBN(0xe89b6f3c, 0x33ab590e),
+     TOBN(0xc823f529, 0xad03240b), TOBN(0x82b79afe, 0x98bea5db),
+     TOBN(0x568f2856, 0x962fe5de), TOBN(0x0c590adb, 0x60c591f3),
+     TOBN(0x1fc74a14, 0x4a28a858), TOBN(0x3b662498, 0xb3203f4c),
+     TOBN(0x91e3cf0d, 0x6c39765a), TOBN(0xa2db3acd, 0xac3cca0b),
+     TOBN(0x288f2f08, 0xcb953b50), TOBN(0x2414582c, 0xcf43cf1a),
+     TOBN(0x8dec8bbc, 0x60eee9a8), TOBN(0x54c79f02, 0x729aa042),
+     TOBN(0xd81cd5ec, 0x6532f5d5), TOBN(0xa672303a, 0xcf82e15f),
+     TOBN(0x376aafa8, 0x719c0563), TOBN(0xcd8ad2dc, 0xbc5fc79f),
+     TOBN(0x303fdb9f, 0xcb750cd3), TOBN(0x14ff052f, 0x4418b08e),
+     TOBN(0xf75084cf, 0x3e2d6520), TOBN(0x7ebdf0f8, 0x144ed509),
+     TOBN(0xf43bf0f2, 0xd3f25b98), TOBN(0x86ad71cf, 0xa354d837),
+     TOBN(0xb827fe92, 0x26f43572), TOBN(0xdfd3ab5b, 0x5d824758),
+     TOBN(0x315dd23a, 0x539094c1), TOBN(0x85c0e37a, 0x66623d68),
+     TOBN(0x575c7972, 0x7be19ae0), TOBN(0x616a3396, 0xdf0d36b5),
+     TOBN(0xa1ebb3c8, 0x26b1ff7e), TOBN(0x635b9485, 0x140ad453),
+     TOBN(0x92bf3cda, 0xda430c0b), TOBN(0x4702850e, 0x3a96dac6),
+     TOBN(0xc91cf0a5, 0x15ac326a), TOBN(0x95de4f49, 0xab8c25e4),
+     TOBN(0xb01bad09, 0xe265c17c), TOBN(0x24e45464, 0x087b3881),
+     TOBN(0xd43e583c, 0xe1fac5ca), TOBN(0xe17cb318, 0x6ead97a6),
+     TOBN(0x6cc39243, 0x74dcec46), TOBN(0x33cfc02d, 0x54c2b73f),
+     TOBN(0x82917844, 0xf26cd99c), TOBN(0x8819dd95, 0xd1773f89),
+     TOBN(0x09572aa6, 0x0871f427), TOBN(0x8e0cf365, 0xf6f01c34),
+     TOBN(0x7fa52988, 0xbff1f5af), TOBN(0x4eb357ea, 0xe75e8e50),
+     TOBN(0xd9d0c8c4, 0x868af75d), TOBN(0xd7325cff, 0x45c8c7ea),
+     TOBN(0xab471996, 0xcc81ecb0), TOBN(0xff5d55f3, 0x611824ed),
+     TOBN(0xbe314541, 0x1977a0ee), TOBN(0x5085c4c5, 0x722038c6),
+     TOBN(0x2d5335bf, 0xf94bb495), TOBN(0x894ad8a6, 0xc8e2a082),
+     TOBN(0x5c3e2341, 0xada35438), TOBN(0xf4a9fc89, 0x049b8c4e),
+     TOBN(0xbeeb355a, 0x9f17cf34), TOBN(0x3f311e0e, 0x6c91fe10),
+     TOBN(0xc2d20038, 0x92ab9891), TOBN(0x257bdcc1, 0x3e8ce9a9),
+     TOBN(0x1b2d9789, 0x88c53bee), TOBN(0x927ce89a, 0xcdba143a),
+     TOBN(0xb0a32cca, 0x523db280), TOBN(0x5c889f8a, 0x50d43783),
+     TOBN(0x503e04b3, 0x4897d16f), TOBN(0x8cdb6e78, 0x08f5f2e8),
+     TOBN(0x6ab91cf0, 0x179c8e74), TOBN(0xd8874e52, 0x48211d60),
+     TOBN(0xf948d4d5, 0xea851200), TOBN(0x4076d41e, 0xe6f9840a),
+     TOBN(0xc20e263c, 0x47b517ea), TOBN(0x79a448fd, 0x30685e5e),
+     TOBN(0xe55f6f78, 0xf90631a0), TOBN(0x88a790b1, 0xa79e6346),
+     TOBN(0x62160c7d, 0x80969fe8), TOBN(0x54f92fd4, 0x41491bb9),
+     TOBN(0xa6645c23, 0x5c957526), TOBN(0xf44cc5ae, 0xbea3ce7b),
+     TOBN(0xf7628327, 0x8b1e68b7), TOBN(0xc731ad7a, 0x303f29d3),
+     TOBN(0xfe5a9ca9, 0x57d03ecb), TOBN(0x96c0d50c, 0x41bc97a7),
+     TOBN(0xc4669fe7, 0x9b4f7f24), TOBN(0xfdd781d8, 0x3d9967ef),
+     TOBN(0x7892c7c3, 0x5d2c208d), TOBN(0x8bf64f7c, 0xae545cb3),
+     TOBN(0xc01f862c, 0x467be912), TOBN(0xf4c85ee9, 0xc73d30cc),
+     TOBN(0x1fa6f4be, 0x6ab83ec7), TOBN(0xa07a3c1c, 0x4e3e3cf9),
+     TOBN(0x87f8ef45, 0x0c00beb3), TOBN(0x30e2c2b3, 0x000d4c3e),
+     TOBN(0x1aa00b94, 0xfe08bf5b), TOBN(0x32c133aa, 0x9224ef52),
+     TOBN(0x38df16bb, 0x32e5685d), TOBN(0x68a9e069, 0x58e6f544),
+     TOBN(0x495aaff7, 0xcdc5ebc6), TOBN(0xf894a645, 0x378b135f),
+     TOBN(0xf316350a, 0x09e27ecf), TOBN(0xeced201e, 0x58f7179d),
+     TOBN(0x2eec273c, 0xe97861ba), TOBN(0x47ec2cae, 0xd693be2e),
+     TOBN(0xfa4c97c4, 0xf68367ce), TOBN(0xe4f47d0b, 0xbe5a5755),
+     TOBN(0x17de815d, 0xb298a979), TOBN(0xd7eca659, 0xc177dc7d),
+     TOBN(0x20fdbb71, 0x49ded0a3), TOBN(0x4cb2aad4, 0xfb34d3c5),
+     TOBN(0x2cf31d28, 0x60858a33), TOBN(0x3b6873ef, 0xa24aa40f),
+     TOBN(0x540234b2, 0x2c11bb37), TOBN(0x2d0366dd, 0xed4c74a3),
+     TOBN(0xf9a968da, 0xeec5f25d), TOBN(0x36601068, 0x67b63142),
+     TOBN(0x07cd6d2c, 0x68d7b6d4), TOBN(0xa8f74f09, 0x0c842942),
+     TOBN(0xe2751404, 0x7768b1ee), TOBN(0x4b5f7e89, 0xfe62aee4),
+     TOBN(0xc6a77177, 0x89070d26), TOBN(0xa1f28e4e, 0xdd1c8bc7),
+     TOBN(0xea5f4f06, 0x469e1f17), TOBN(0x78fc242a, 0xfbdb78e0),
+     TOBN(0xc9c7c592, 0x8b0588f1), TOBN(0xb6b7a0fd, 0x1535921e),
+     TOBN(0xcc5bdb91, 0xbde5ae35), TOBN(0xb42c485e, 0x12ff1864),
+     TOBN(0xa1113e13, 0xdbab98aa), TOBN(0xde9d469b, 0xa17b1024),
+     TOBN(0x23f48b37, 0xc0462d3a), TOBN(0x3752e537, 0x7c5c078d),
+     TOBN(0xe3a86add, 0x15544eb9), TOBN(0xf013aea7, 0x80fba279),
+     TOBN(0x8b5bb76c, 0xf22001b5), TOBN(0xe617ba14, 0xf02891ab),
+     TOBN(0xd39182a6, 0x936219d3), TOBN(0x5ce1f194, 0xae51cb19),
+     TOBN(0xc78f8598, 0xbf07a74c), TOBN(0x6d7158f2, 0x22cbf1bc),
+     TOBN(0x3b846b21, 0xe300ce18), TOBN(0x35fba630, 0x2d11275d),
+     TOBN(0x5fe25c36, 0xa0239b9b), TOBN(0xd8beb35d, 0xdf05d940),
+     TOBN(0x4db02bb0, 0x1f7e320d), TOBN(0x0641c364, 0x6da320ea),
+     TOBN(0x6d95fa5d, 0x821389a3), TOBN(0x92699748, 0x8fcd8e3d),
+     TOBN(0x316fef17, 0xceb6c143), TOBN(0x67fcb841, 0xd933762b),
+     TOBN(0xbb837e35, 0x118b17f8), TOBN(0x4b92552f, 0x9fd24821),
+     TOBN(0xae6bc70e, 0x46aca793), TOBN(0x1cf0b0e4, 0xe579311b),
+     TOBN(0x8dc631be, 0x5802f716), TOBN(0x099bdc6f, 0xbddbee4d),
+     TOBN(0xcc352bb2, 0x0caf8b05), TOBN(0xf74d505a, 0x72d63df2),
+     TOBN(0xb9876d4b, 0x91c4f408), TOBN(0x1ce18473, 0x9e229b2d),
+     TOBN(0x49507597, 0x83abdb4a), TOBN(0x850fbcb6, 0xdee84b18),
+     TOBN(0x6325236e, 0x609e67dc), TOBN(0x04d831d9, 0x9336c6d8),
+     TOBN(0x8deaae3b, 0xfa12d45d), TOBN(0xe425f8ce, 0x4746e246),
+     TOBN(0x8004c175, 0x24f5f31e), TOBN(0xaca16d8f, 0xad62c3b7),
+     TOBN(0x0dc15a6a, 0x9152f934), TOBN(0xf1235e5d, 0xed0e12c1),
+     TOBN(0xc33c06ec, 0xda477dac), TOBN(0x76be8732, 0xb2ea0006),
+     TOBN(0xcf3f7831, 0x0c0cd313), TOBN(0x3c524553, 0xa614260d),
+     TOBN(0x31a756f8, 0xcab22d15), TOBN(0x03ee10d1, 0x77827a20),
+     TOBN(0xd1e059b2, 0x1994ef20), TOBN(0x2a653b69, 0x638ae318),
+     TOBN(0x70d5eb58, 0x2f699010), TOBN(0x279739f7, 0x09f5f84a),
+     TOBN(0x5da4663c, 0x8b799336), TOBN(0xfdfdf14d, 0x203c37eb),
+     TOBN(0x32d8a9dc, 0xa1dbfb2d), TOBN(0xab40cff0, 0x77d48f9b),
+     TOBN(0xc018b383, 0xd20b42d5), TOBN(0xf9a810ef, 0x9f78845f),
+     TOBN(0x40af3753, 0xbdba9df0), TOBN(0xb90bdcfc, 0x131dfdf9),
+     TOBN(0x18720591, 0xf01ab782), TOBN(0xc823f211, 0x6af12a88),
+     TOBN(0xa51b80f3, 0x0dc14401), TOBN(0xde248f77, 0xfb2dfbe3),
+     TOBN(0xef5a44e5, 0x0cafe751), TOBN(0x73997c9c, 0xd4dcd221),
+     TOBN(0x32fd86d1, 0xde854024), TOBN(0xd5b53adc, 0xa09b84bb),
+     TOBN(0x008d7a11, 0xdcedd8d1), TOBN(0x406bd1c8, 0x74b32c84),
+     TOBN(0x5d4472ff, 0x05dde8b1), TOBN(0x2e25f2cd, 0xfce2b32f),
+     TOBN(0xbec0dd5e, 0x29dfc254), TOBN(0x4455fcf6, 0x2b98b267),
+     TOBN(0x0b4d43a5, 0xc72df2ad), TOBN(0xea70e6be, 0x48a75397),
+     TOBN(0x2aad6169, 0x5820f3bf), TOBN(0xf410d2dd, 0x9e37f68f),
+     TOBN(0x70fb7dba, 0x7be5ac83), TOBN(0x636bb645, 0x36ec3eec),
+     TOBN(0x27104ea3, 0x9754e21c), TOBN(0xbc87a3e6, 0x8d63c373),
+     TOBN(0x483351d7, 0x4109db9a), TOBN(0x0fa724e3, 0x60134da7),
+     TOBN(0x9ff44c29, 0xb0720b16), TOBN(0x2dd0cf13, 0x06aceead),
+     TOBN(0x5942758c, 0xe26929a6), TOBN(0x96c5db92, 0xb766a92b),
+     TOBN(0xcec7d4c0, 0x5f18395e), TOBN(0xd3f22744, 0x1f80d032),
+     TOBN(0x7a68b37a, 0xcb86075b), TOBN(0x074764dd, 0xafef92db),
+     TOBN(0xded1e950, 0x7bc7f389), TOBN(0xc580c850, 0xb9756460),
+     TOBN(0xaeeec2a4, 0x7da48157), TOBN(0x3f0b4e7f, 0x82c587b3),
+     TOBN(0x231c6de8, 0xa9f19c53), TOBN(0x5717bd73, 0x6974e34e),
+     TOBN(0xd9e1d216, 0xf1508fa9), TOBN(0x9f112361, 0xdadaa124),
+     TOBN(0x80145e31, 0x823b7348), TOBN(0x4dd8f0d5, 0xac634069),
+     TOBN(0xe3d82fc7, 0x2297c258), TOBN(0x276fcfee, 0x9cee7431),
+     TOBN(0x8eb61b5e, 0x2bc0aea9), TOBN(0x4f668fd5, 0xde329431),
+     TOBN(0x03a32ab1, 0x38e4b87e), TOBN(0xe1374517, 0x73d0ef0b),
+     TOBN(0x1a46f7e6, 0x853ac983), TOBN(0xc3bdf42e, 0x68e78a57),
+     TOBN(0xacf20785, 0x2ea96dd1), TOBN(0xa10649b9, 0xf1638460),
+     TOBN(0xf2369f0b, 0x879fbbed), TOBN(0x0ff0ae86, 0xda9d1869),
+     TOBN(0x5251d759, 0x56766f45), TOBN(0x4984d8c0, 0x2be8d0fc),
+     TOBN(0x7ecc95a6, 0xd21008f0), TOBN(0x29bd54a0, 0x3a1a1c49),
+     TOBN(0xab9828c5, 0xd26c50f3), TOBN(0x32c0087c, 0x51d0d251),
+     TOBN(0x9bac3ce6, 0x0c1cdb26), TOBN(0xcd94d947, 0x557ca205),
+     TOBN(0x1b1bd598, 0x9db1fdcd), TOBN(0x0eda0108, 0xa3d8b149),
+     TOBN(0x95066610, 0x56152fcc), TOBN(0xc2f037e6, 0xe7192b33),
+     TOBN(0xdeffb41a, 0xc92e05a4), TOBN(0x1105f6c2, 0xc2f6c62e),
+     TOBN(0x68e73500, 0x8733913c), TOBN(0xcce86163, 0x3f3adc40),
+     TOBN(0xf407a942, 0x38a278e9), TOBN(0xd13c1b9d, 0x2ab21292),
+     TOBN(0x93ed7ec7, 0x1c74cf5c), TOBN(0x8887dc48, 0xf1a4c1b4),
+     TOBN(0x3830ff30, 0x4b3a11f1), TOBN(0x358c5a3c, 0x58937cb6),
+     TOBN(0x027dc404, 0x89022829), TOBN(0x40e93977, 0x3b798f79),
+     TOBN(0x90ad3337, 0x38be6ead), TOBN(0x9c23f6bc, 0xf34c0a5d),
+     TOBN(0xd1711a35, 0xfbffd8bb), TOBN(0x60fcfb49, 0x1949d3dd),
+     TOBN(0x09c8ef4b, 0x7825d93a), TOBN(0x24233cff, 0xa0a8c968),
+     TOBN(0x67ade46c, 0xe6d982af), TOBN(0xebb6bf3e, 0xe7544d7c),
+     TOBN(0xd6b9ba76, 0x3d8bd087), TOBN(0x46fe382d, 0x4dc61280),
+     TOBN(0xbd39a7e8, 0xb5bdbd75), TOBN(0xab381331, 0xb8f228fe),
+     TOBN(0x0709a77c, 0xce1c4300), TOBN(0x6a247e56, 0xf337ceac),
+     TOBN(0x8f34f21b, 0x636288be), TOBN(0x9dfdca74, 0xc8a7c305),
+     TOBN(0x6decfd1b, 0xea919e04), TOBN(0xcdf2688d, 0x8e1991f8),
+     TOBN(0xe607df44, 0xd0f8a67e), TOBN(0xd985df4b, 0x0b58d010),
+     TOBN(0x57f834c5, 0x0c24f8f4), TOBN(0xe976ef56, 0xa0bf01ae),
+     TOBN(0x536395ac, 0xa1c32373), TOBN(0x351027aa, 0x734c0a13),
+     TOBN(0xd2f1b5d6, 0x5e6bd5bc), TOBN(0x2b539e24, 0x223debed),
+     TOBN(0xd4994cec, 0x0eaa1d71), TOBN(0x2a83381d, 0x661dcf65),
+     TOBN(0x5f1aed2f, 0x7b54c740), TOBN(0x0bea3fa5, 0xd6dda5ee),
+     TOBN(0x9d4fb684, 0x36cc6134), TOBN(0x8eb9bbf3, 0xc0a443dd),
+     TOBN(0xfc500e2e, 0x383b7d2a), TOBN(0x7aad621c, 0x5b775257),
+     TOBN(0x69284d74, 0x0a8f7cc0), TOBN(0xe820c2ce, 0x07562d65),
+     TOBN(0xbf9531b9, 0x499758ee), TOBN(0x73e95ca5, 0x6ee0cc2d),
+     TOBN(0xf61790ab, 0xfbaf50a5), TOBN(0xdf55e76b, 0x684e0750),
+     TOBN(0xec516da7, 0xf176b005), TOBN(0x575553bb, 0x7a2dddc7),
+     TOBN(0x37c87ca3, 0x553afa73), TOBN(0x315f3ffc, 0x4d55c251),
+     TOBN(0xe846442a, 0xaf3e5d35), TOBN(0x61b91149, 0x6495ff28),
+     TOBN(0x23cc95d3, 0xfa326dc3), TOBN(0x1df4da1f, 0x18fc2cea),
+     TOBN(0x24bf9adc, 0xd0a37d59), TOBN(0xb6710053, 0x320d6e1e),
+     TOBN(0x96f9667e, 0x618344d1), TOBN(0xcc7ce042, 0xa06445af),
+     TOBN(0xa02d8514, 0xd68dbc3a), TOBN(0x4ea109e4, 0x280b5a5b),
+     TOBN(0x5741a7ac, 0xb40961bf), TOBN(0x4ada5937, 0x6aa56bfa),
+     TOBN(0x7feb9145, 0x02b765d1), TOBN(0x561e97be, 0xe6ad1582),
+     TOBN(0xbbc4a5b6, 0xda3982f5), TOBN(0x0c2659ed, 0xb546f468),
+     TOBN(0xb8e7e6aa, 0x59612d20), TOBN(0xd83dfe20, 0xac19e8e0),
+     TOBN(0x8530c45f, 0xb835398c), TOBN(0x6106a8bf, 0xb38a41c2),
+     TOBN(0x21e8f9a6, 0x35f5dcdb), TOBN(0x39707137, 0xcae498ed),
+     TOBN(0x70c23834, 0xd8249f00), TOBN(0x9f14b58f, 0xab2537a0),
+     TOBN(0xd043c365, 0x5f61c0c2), TOBN(0xdc5926d6, 0x09a194a7),
+     TOBN(0xddec0339, 0x8e77738a), TOBN(0xd07a63ef, 0xfba46426),
+     TOBN(0x2e58e79c, 0xee7f6e86), TOBN(0xe59b0459, 0xff32d241),
+     TOBN(0xc5ec84e5, 0x20fa0338), TOBN(0x97939ac8, 0xeaff5ace),
+     TOBN(0x0310a4e3, 0xb4a38313), TOBN(0x9115fba2, 0x8f9d9885),
+     TOBN(0x8dd710c2, 0x5fadf8c3), TOBN(0x66be38a2, 0xce19c0e2),
+     TOBN(0xd42a279c, 0x4cfe5022), TOBN(0x597bb530, 0x0e24e1b8),
+     TOBN(0x3cde86b7, 0xc153ca7f), TOBN(0xa8d30fb3, 0x707d63bd),
+     TOBN(0xac905f92, 0xbd60d21e), TOBN(0x98e7ffb6, 0x7b9a54ab),
+     TOBN(0xd7147df8, 0xe9726a30), TOBN(0xb5e216ff, 0xafce3533),
+     TOBN(0xb550b799, 0x2ff1ec40), TOBN(0x6b613b87, 0xa1e953fd),
+     TOBN(0x87b88dba, 0x792d5610), TOBN(0x2ee1270a, 0xa190fbe1),
+     TOBN(0x02f4e2dc, 0x2ef581da), TOBN(0x016530e4, 0xeff82a95),
+     TOBN(0xcbb93dfd, 0x8fd6ee89), TOBN(0x16d3d986, 0x46848fff),
+     TOBN(0x600eff24, 0x1da47adf), TOBN(0x1b9754a0, 0x0ad47a71),
+     TOBN(0x8f9266df, 0x70c33b98), TOBN(0xaadc87ae, 0xdf34186e),
+     TOBN(0x0d2ce8e1, 0x4ad24132), TOBN(0x8a47cbfc, 0x19946eba),
+     TOBN(0x47feeb66, 0x62b5f3af), TOBN(0xcefab561, 0x0abb3734),
+     TOBN(0x449de60e, 0x19f35cb1), TOBN(0x39f8db14, 0x157f0eb9),
+     TOBN(0xffaecc5b, 0x3c61bfd6), TOBN(0xa5a4d41d, 0x41216703),
+     TOBN(0x7f8fabed, 0x224e1cc2), TOBN(0x0d5a8186, 0x871ad953),
+     TOBN(0xf10774f7, 0xd22da9a9), TOBN(0x45b8a678, 0xcc8a9b0d),
+     TOBN(0xd9c2e722, 0xbdc32cff), TOBN(0xbf71b5f5, 0x337202a5),
+     TOBN(0x95c57f2f, 0x69fc4db9), TOBN(0xb6dad34c, 0x765d01e1),
+     TOBN(0x7e0bd13f, 0xcb904635), TOBN(0x61751253, 0x763a588c),
+     TOBN(0xd85c2997, 0x81af2c2d), TOBN(0xc0f7d9c4, 0x81b9d7da),
+     TOBN(0x838a34ae, 0x08533e8d), TOBN(0x15c4cb08, 0x311d8311),
+     TOBN(0x97f83285, 0x8e121e14), TOBN(0xeea7dc1e, 0x85000a5f),
+     TOBN(0x0c6059b6, 0x5d256274), TOBN(0xec9beace, 0xb95075c0),
+     TOBN(0x173daad7, 0x1df97828), TOBN(0xbf851cb5, 0xa8937877),
+     TOBN(0xb083c594, 0x01646f3c), TOBN(0x3bad30cf, 0x50c6d352),
+     TOBN(0xfeb2b202, 0x496bbcea), TOBN(0x3cf9fd4f, 0x18a1e8ba),
+     TOBN(0xd26de7ff, 0x1c066029), TOBN(0x39c81e9e, 0x4e9ed4f8),
+     TOBN(0xd8be0cb9, 0x7b390d35), TOBN(0x01df2bbd, 0x964aab27),
+     TOBN(0x3e8c1a65, 0xc3ef64f8), TOBN(0x567291d1, 0x716ed1dd),
+     TOBN(0x95499c6c, 0x5f5406d3), TOBN(0x71fdda39, 0x5ba8e23f),
+     TOBN(0xcfeb320e, 0xd5096ece), TOBN(0xbe7ba92b, 0xca66dd16),
+     TOBN(0x4608d36b, 0xc6fb5a7d), TOBN(0xe3eea15a, 0x6d2dd0e0),
+     TOBN(0x75b0a3eb, 0x8f97a36a), TOBN(0xf59814cc, 0x1c83de1e),
+     TOBN(0x56c9c5b0, 0x1c33c23f), TOBN(0xa96c1da4, 0x6faa4136),
+     TOBN(0x46bf2074, 0xde316551), TOBN(0x3b866e7b, 0x1f756c8f),
+     TOBN(0x727727d8, 0x1495ed6b), TOBN(0xb2394243, 0xb682dce7),
+     TOBN(0x8ab8454e, 0x758610f3), TOBN(0xc243ce84, 0x857d72a4),
+     TOBN(0x7b320d71, 0xdbbf370f), TOBN(0xff9afa37, 0x78e0f7ca),
+     TOBN(0x0119d1e0, 0xea7b523f), TOBN(0xb997f8cb, 0x058c7d42),
+     TOBN(0x285bcd2a, 0x37bbb184), TOBN(0x51dcec49, 0xa45d1fa6),
+     TOBN(0x6ade3b64, 0xe29634cb), TOBN(0x080c94a7, 0x26b86ef1),
+     TOBN(0xba583db1, 0x2283fbe3), TOBN(0x902bddc8, 0x5a9315ed),
+     TOBN(0x07c1ccb3, 0x86964bec), TOBN(0x78f4eacf, 0xb6258301),
+     TOBN(0x4bdf3a49, 0x56f90823), TOBN(0xba0f5080, 0x741d777b),
+     TOBN(0x091d71c3, 0xf38bf760), TOBN(0x9633d50f, 0x9b625b02),
+     TOBN(0x03ecb743, 0xb8c9de61), TOBN(0xb4751254, 0x5de74720),
+     TOBN(0x9f9defc9, 0x74ce1cb2), TOBN(0x774a4f6a, 0x00bd32ef),
+     TOBN(0xaca385f7, 0x73848f22), TOBN(0x53dad716, 0xf3f8558e),
+     TOBN(0xab7b34b0, 0x93c471f9), TOBN(0xf530e069, 0x19644bc7),
+     TOBN(0x3d9fb1ff, 0xdd59d31a), TOBN(0x4382e0df, 0x08daa795),
+     TOBN(0x165c6f4b, 0xd5cc88d7), TOBN(0xeaa392d5, 0x4a18c900),
+     TOBN(0x94203c67, 0x648024ee), TOBN(0x188763f2, 0x8c2fabcd),
+     TOBN(0xa80f87ac, 0xbbaec835), TOBN(0x632c96e0, 0xf29d8d54),
+     TOBN(0x29b0a60e, 0x4c00a95e), TOBN(0x2ef17f40, 0xe011e9fa),
+     TOBN(0xf6c0e1d1, 0x15b77223), TOBN(0xaaec2c62, 0x14b04e32),
+     TOBN(0xd35688d8, 0x3d84e58c), TOBN(0x2af5094c, 0x958571db),
+     TOBN(0x4fff7e19, 0x760682a6), TOBN(0x4cb27077, 0xe39a407c),
+     TOBN(0x0f59c547, 0x4ff0e321), TOBN(0x169f34a6, 0x1b34c8ff),
+     TOBN(0x2bff1096, 0x52bc1ba7), TOBN(0xa25423b7, 0x83583544),
+     TOBN(0x5d55d5d5, 0x0ac8b782), TOBN(0xff6622ec, 0x2db3c892),
+     TOBN(0x48fce741, 0x6b8bb642), TOBN(0x31d6998c, 0x69d7e3dc),
+     TOBN(0xdbaf8004, 0xcadcaed0), TOBN(0x801b0142, 0xd81d053c),
+     TOBN(0x94b189fc, 0x59630ec6), TOBN(0x120e9934, 0xaf762c8e),
+     TOBN(0x53a29aa4, 0xfdc6a404), TOBN(0x19d8e01e, 0xa1909948),
+     TOBN(0x3cfcabf1, 0xd7e89681), TOBN(0x3321a50d, 0x4e132d37),
+     TOBN(0xd0496863, 0xe9a86111), TOBN(0x8c0cde61, 0x06a3bc65),
+     TOBN(0xaf866c49, 0xfc9f8eef), TOBN(0x2066350e, 0xff7f5141),
+     TOBN(0x4f8a4689, 0xe56ddfbd), TOBN(0xea1b0c07, 0xfe32983a),
+     TOBN(0x2b317462, 0x873cb8cb), TOBN(0x658deddc, 0x2d93229f),
+     TOBN(0x65efaf4d, 0x0f64ef58), TOBN(0xfe43287d, 0x730cc7a8),
+     TOBN(0xaebc0c72, 0x3d047d70), TOBN(0x92efa539, 0xd92d26c9),
+     TOBN(0x06e78457, 0x94b56526), TOBN(0x415cb80f, 0x0961002d),
+     TOBN(0x89e5c565, 0x76dcb10f), TOBN(0x8bbb6982, 0xff9259fe),
+     TOBN(0x4fe8795b, 0x9abc2668), TOBN(0xb5d4f534, 0x1e678fb1),
+     TOBN(0x6601f3be, 0x7b7da2b9), TOBN(0x98da59e2, 0xa13d6805),
+     TOBN(0x190d8ea6, 0x01799a52), TOBN(0xa20cec41, 0xb86d2952),
+     TOBN(0x3062ffb2, 0x7fff2a7c), TOBN(0x741b32e5, 0x79f19d37),
+     TOBN(0xf80d8181, 0x4eb57d47), TOBN(0x7a2d0ed4, 0x16aef06b),
+     TOBN(0x09735fb0, 0x1cecb588), TOBN(0x1641caaa, 0xc6061f5b)}
+    ,
+    {TOBN(0x7f99824f, 0x20151427), TOBN(0x206828b6, 0x92430206),
+     TOBN(0xaa9097d7, 0xe1112357), TOBN(0xacf9a2f2, 0x09e414ec),
+     TOBN(0xdbdac9da, 0x27915356), TOBN(0x7e0734b7, 0x001efee3),
+     TOBN(0x54fab5bb, 0xd2b288e2), TOBN(0x4c630fc4, 0xf62dd09c),
+     TOBN(0x8537107a, 0x1ac2703b), TOBN(0xb49258d8, 0x6bc857b5),
+     TOBN(0x57df14de, 0xbcdaccd1), TOBN(0x24ab68d7, 0xc4ae8529),
+     TOBN(0x7ed8b5d4, 0x734e59d0), TOBN(0x5f8740c8, 0xc495cc80),
+     TOBN(0x84aedd5a, 0x291db9b3), TOBN(0x80b360f8, 0x4fb995be),
+     TOBN(0xae915f5d, 0x5fa067d1), TOBN(0x4134b57f, 0x9668960c),
+     TOBN(0xbd3656d6, 0xa48edaac), TOBN(0xdac1e3e4, 0xfc1d7436),
+     TOBN(0x674ff869, 0xd81fbb26), TOBN(0x449ed3ec, 0xb26c33d4),
+     TOBN(0x85138705, 0xd94203e8), TOBN(0xccde538b, 0xbeeb6f4a),
+     TOBN(0x55d5c68d, 0xa61a76fa), TOBN(0x598b441d, 0xca1554dc),
+     TOBN(0xd39923b9, 0x773b279c), TOBN(0x33331d3c, 0x36bf9efc),
+     TOBN(0x2d4c848e, 0x298de399), TOBN(0xcfdb8e77, 0xa1a27f56),
+     TOBN(0x94c855ea, 0x57b8ab70), TOBN(0xdcdb9dae, 0x6f7879ba),
+     TOBN(0x7bdff8c2, 0x019f2a59), TOBN(0xb3ce5bb3, 0xcb4fbc74),
+     TOBN(0xea907f68, 0x8a9173dd), TOBN(0x6cd3d0d3, 0x95a75439),
+     TOBN(0x92ecc4d6, 0xefed021c), TOBN(0x09a9f9b0, 0x6a77339a),
+     TOBN(0x87ca6b15, 0x7188c64a), TOBN(0x10c29968, 0x44899158),
+     TOBN(0x5859a229, 0xed6e82ef), TOBN(0x16f338e3, 0x65ebaf4e),
+     TOBN(0x0cd31387, 0x5ead67ae), TOBN(0x1c73d228, 0x54ef0bb4),
+     TOBN(0x4cb55131, 0x74a5c8c7), TOBN(0x01cd2970, 0x7f69ad6a),
+     TOBN(0xa04d00dd, 0xe966f87e), TOBN(0xd96fe447, 0x0b7b0321),
+     TOBN(0x342ac06e, 0x88fbd381), TOBN(0x02cd4a84, 0x5c35a493),
+     TOBN(0xe8fa89de, 0x54f1bbcd), TOBN(0x341d6367, 0x2575ed4c),
+     TOBN(0xebe357fb, 0xd238202b), TOBN(0x600b4d1a, 0xa984ead9),
+     TOBN(0xc35c9f44, 0x52436ea0), TOBN(0x96fe0a39, 0xa370751b),
+     TOBN(0x4c4f0736, 0x7f636a38), TOBN(0x9f943fb7, 0x0e76d5cb),
+     TOBN(0xb03510ba, 0xa8b68b8b), TOBN(0xc246780a, 0x9ed07a1f),
+     TOBN(0x3c051415, 0x6d549fc2), TOBN(0xc2953f31, 0x607781ca),
+     TOBN(0x955e2c69, 0xd8d95413), TOBN(0xb300fadc, 0x7bd282e3),
+     TOBN(0x81fe7b50, 0x87e9189f), TOBN(0xdb17375c, 0xf42dda27),
+     TOBN(0x22f7d896, 0xcf0a5904), TOBN(0xa0e57c5a, 0xebe348e6),
+     TOBN(0xa61011d3, 0xf40e3c80), TOBN(0xb1189321, 0x8db705c5),
+     TOBN(0x4ed9309e, 0x50fedec3), TOBN(0xdcf14a10, 0x4d6d5c1d),
+     TOBN(0x056c265b, 0x55691342), TOBN(0xe8e08504, 0x91049dc7),
+     TOBN(0x131329f5, 0xc9bae20a), TOBN(0x96c8b3e8, 0xd9dccdb4),
+     TOBN(0x8c5ff838, 0xfb4ee6b4), TOBN(0xfc5a9aeb, 0x41e8ccf0),
+     TOBN(0x7417b764, 0xfae050c6), TOBN(0x0953c3d7, 0x00452080),
+     TOBN(0x21372682, 0x38dfe7e8), TOBN(0xea417e15, 0x2bb79d4b),
+     TOBN(0x59641f1c, 0x76e7cf2d), TOBN(0x271e3059, 0xea0bcfcc),
+     TOBN(0x624c7dfd, 0x7253ecbd), TOBN(0x2f552e25, 0x4fca6186),
+     TOBN(0xcbf84ecd, 0x4d866e9c), TOBN(0x73967709, 0xf68d4610),
+     TOBN(0xa14b1163, 0xc27901b4), TOBN(0xfd9236e0, 0x899b8bf3),
+     TOBN(0x42b091ec, 0xcbc6da0a), TOBN(0xbb1dac6f, 0x5ad1d297),
+     TOBN(0x80e61d53, 0xa91cf76e), TOBN(0x4110a412, 0xd31f1ee7),
+     TOBN(0x2d87c3ba, 0x13efcf77), TOBN(0x1f374bb4, 0xdf450d76),
+     TOBN(0x5e78e2f2, 0x0d188dab), TOBN(0xe3968ed0, 0xf4b885ef),
+     TOBN(0x46c0568e, 0x7314570f), TOBN(0x31616338, 0x01170521),
+     TOBN(0x18e1e7e2, 0x4f0c8afe), TOBN(0x4caa75ff, 0xdeea78da),
+     TOBN(0x82db67f2, 0x7c5d8a51), TOBN(0x36a44d86, 0x6f505370),
+     TOBN(0xd72c5bda, 0x0333974f), TOBN(0x5db516ae, 0x27a70146),
+     TOBN(0x34705281, 0x210ef921), TOBN(0xbff17a8f, 0x0c9c38e5),
+     TOBN(0x78f4814e, 0x12476da1), TOBN(0xc1e16613, 0x33c16980),
+     TOBN(0x9e5b386f, 0x424d4bca), TOBN(0x4c274e87, 0xc85740de),
+     TOBN(0xb6a9b88d, 0x6c2f5226), TOBN(0x14d1b944, 0x550d7ca8),
+     TOBN(0x580c85fc, 0x1fc41709), TOBN(0xc1da368b, 0x54c6d519),
+     TOBN(0x2b0785ce, 0xd5113cf7), TOBN(0x0670f633, 0x5a34708f),
+     TOBN(0x46e23767, 0x15cc3f88), TOBN(0x1b480cfa, 0x50c72c8f),
+     TOBN(0x20288602, 0x4147519a), TOBN(0xd0981eac, 0x26b372f0),
+     TOBN(0xa9d4a7ca, 0xa785ebc8), TOBN(0xd953c50d, 0xdbdf58e9),
+     TOBN(0x9d6361cc, 0xfd590f8f), TOBN(0x72e9626b, 0x44e6c917),
+     TOBN(0x7fd96110, 0x22eb64cf), TOBN(0x863ebb7e, 0x9eb288f3),
+     TOBN(0x6e6ab761, 0x6aca8ee7), TOBN(0x97d10b39, 0xd7b40358),
+     TOBN(0x1687d377, 0x1e5feb0d), TOBN(0xc83e50e4, 0x8265a27a),
+     TOBN(0x8f75a9fe, 0xc954b313), TOBN(0xcc2e8f47, 0x310d1f61),
+     TOBN(0xf5ba81c5, 0x6557d0e0), TOBN(0x25f9680c, 0x3eaf6207),
+     TOBN(0xf95c6609, 0x4354080b), TOBN(0x5225bfa5, 0x7bf2fe1c),
+     TOBN(0xc5c004e2, 0x5c7d98fa), TOBN(0x3561bf1c, 0x019aaf60),
+     TOBN(0x5e6f9f17, 0xba151474), TOBN(0xdec2f934, 0xb04f6eca),
+     TOBN(0x64e368a1, 0x269acb1e), TOBN(0x1332d9e4, 0x0cdda493),
+     TOBN(0x60d6cf69, 0xdf23de05), TOBN(0x66d17da2, 0x009339a0),
+     TOBN(0x9fcac985, 0x0a693923), TOBN(0xbcf057fc, 0xed7c6a6d),
+     TOBN(0xc3c5c8c5, 0xf0b5662c), TOBN(0x25318dd8, 0xdcba4f24),
+     TOBN(0x60e8cb75, 0x082b69ff), TOBN(0x7c23b3ee, 0x1e728c01),
+     TOBN(0x15e10a0a, 0x097e4403), TOBN(0xcb3d0a86, 0x19854665),
+     TOBN(0x88d8e211, 0xd67d4826), TOBN(0xb39af66e, 0x0b9d2839),
+     TOBN(0xa5f94588, 0xbd475ca8), TOBN(0xe06b7966, 0xc077b80b),
+     TOBN(0xfedb1485, 0xda27c26c), TOBN(0xd290d33a, 0xfe0fd5e0),
+     TOBN(0xa40bcc47, 0xf34fb0fa), TOBN(0xb4760cc8, 0x1fb1ab09),
+     TOBN(0x8fca0993, 0xa273bfe3), TOBN(0x13e4fe07, 0xf70b213c),
+     TOBN(0x3bcdb992, 0xfdb05163), TOBN(0x8c484b11, 0x0c2b19b6),
+     TOBN(0x1acb815f, 0xaaf2e3e2), TOBN(0xc6905935, 0xb89ff1b4),
+     TOBN(0xb2ad6f9d, 0x586e74e1), TOBN(0x488883ad, 0x67b80484),
+     TOBN(0x758aa2c7, 0x369c3ddb), TOBN(0x8ab74e69, 0x9f9afd31),
+     TOBN(0x10fc2d28, 0x5e21beb1), TOBN(0x3484518a, 0x318c42f9),
+     TOBN(0x377427dc, 0x53cf40c3), TOBN(0x9de0781a, 0x391bc1d9),
+     TOBN(0x8faee858, 0x693807e1), TOBN(0xa3865327, 0x4e81ccc7),
+     TOBN(0x02c30ff2, 0x6f835b84), TOBN(0xb604437b, 0x0d3d38d4),
+     TOBN(0xb3fc8a98, 0x5ca1823d), TOBN(0xb82f7ec9, 0x03be0324),
+     TOBN(0xee36d761, 0xcf684a33), TOBN(0x5a01df0e, 0x9f29bf7d),
+     TOBN(0x686202f3, 0x1306583d), TOBN(0x05b10da0, 0x437c622e),
+     TOBN(0xbf9aaa0f, 0x076a7bc8), TOBN(0x25e94efb, 0x8f8f4e43),
+     TOBN(0x8a35c9b7, 0xfa3dc26d), TOBN(0xe0e5fb93, 0x96ff03c5),
+     TOBN(0xa77e3843, 0xebc394ce), TOBN(0xcede6595, 0x8361de60),
+     TOBN(0xd27c22f6, 0xa1993545), TOBN(0xab01cc36, 0x24d671ba),
+     TOBN(0x63fa2877, 0xa169c28e), TOBN(0x925ef904, 0x2eb08376),
+     TOBN(0x3b2fa3cf, 0x53aa0b32), TOBN(0xb27beb5b, 0x71c49d7a),
+     TOBN(0xb60e1834, 0xd105e27f), TOBN(0xd6089788, 0x4f68570d),
+     TOBN(0x23094ce0, 0xd6fbc2ac), TOBN(0x738037a1, 0x815ff551),
+     TOBN(0xda73b1bb, 0x6bef119c), TOBN(0xdcf6c430, 0xeef506ba),
+     TOBN(0x00e4fe7b, 0xe3ef104a), TOBN(0xebdd9a2c, 0x0a065628),
+     TOBN(0x853a81c3, 0x8792043e), TOBN(0x22ad6ece, 0xb3b59108),
+     TOBN(0x9fb813c0, 0x39cd297d), TOBN(0x8ec7e16e, 0x05bda5d9),
+     TOBN(0x2834797c, 0x0d104b96), TOBN(0xcc11a2e7, 0x7c511510),
+     TOBN(0x96ca5a53, 0x96ee6380), TOBN(0x054c8655, 0xcea38742),
+     TOBN(0xb5946852, 0xd54dfa7d), TOBN(0x97c422e7, 0x1f4ab207),
+     TOBN(0xbf907509, 0x0c22b540), TOBN(0x2cde42aa, 0xb7c267d4),
+     TOBN(0xba18f9ed, 0x5ab0d693), TOBN(0x3ba62aa6, 0x6e4660d9),
+     TOBN(0xb24bf97b, 0xab9ea96a), TOBN(0x5d039642, 0xe3b60e32),
+     TOBN(0x4e6a4506, 0x7c4d9bd5), TOBN(0x666c5b9e, 0x7ed4a6a4),
+     TOBN(0xfa3fdcd9, 0x8edbd7cc), TOBN(0x4660bb87, 0xc6ccd753),
+     TOBN(0x9ae90820, 0x21e6b64f), TOBN(0x8a56a713, 0xb36bfb3f),
+     TOBN(0xabfce096, 0x5726d47f), TOBN(0x9eed01b2, 0x0b1a9a7f),
+     TOBN(0x30e9cad4, 0x4eb74a37), TOBN(0x7b2524cc, 0x53e9666d),
+     TOBN(0x6a29683b, 0x8f4b002f), TOBN(0xc2200d7a, 0x41f4fc20),
+     TOBN(0xcf3af47a, 0x3a338acc), TOBN(0x6539a4fb, 0xe7128975),
+     TOBN(0xcec31c14, 0xc33c7fcf), TOBN(0x7eb6799b, 0xc7be322b),
+     TOBN(0x119ef4e9, 0x6646f623), TOBN(0x7b7a26a5, 0x54d7299b),
+     TOBN(0xcb37f08d, 0x403f46f2), TOBN(0x94b8fc43, 0x1a0ec0c7),
+     TOBN(0xbb8514e3, 0xc332142f), TOBN(0xf3ed2c33, 0xe80d2a7a),
+     TOBN(0x8d2080af, 0xb639126c), TOBN(0xf7b6be60, 0xe3553ade),
+     TOBN(0x3950aa9f, 0x1c7e2b09), TOBN(0x847ff958, 0x6410f02b),
+     TOBN(0x877b7cf5, 0x678a31b0), TOBN(0xd50301ae, 0x3998b620),
+     TOBN(0x734257c5, 0xc00fb396), TOBN(0xf9fb18a0, 0x04e672a6),
+     TOBN(0xff8bd8eb, 0xe8758851), TOBN(0x1e64e4c6, 0x5d99ba44),
+     TOBN(0x4b8eaedf, 0x7dfd93b7), TOBN(0xba2f2a98, 0x04e76b8c),
+     TOBN(0x7d790cba, 0xe8053433), TOBN(0xc8e725a0, 0x3d2c9585),
+     TOBN(0x58c5c476, 0xcdd8f5ed), TOBN(0xd106b952, 0xefa9fe1d),
+     TOBN(0x3c5c775b, 0x0eff13a9), TOBN(0x242442ba, 0xe057b930),
+     TOBN(0xe9f458d4, 0xc9b70cbd), TOBN(0x69b71448, 0xa3cdb89a),
+     TOBN(0x41ee46f6, 0x0e2ed742), TOBN(0x573f1045, 0x40067493),
+     TOBN(0xb1e154ff, 0x9d54c304), TOBN(0x2ad0436a, 0x8d3a7502),
+     TOBN(0xee4aaa2d, 0x431a8121), TOBN(0xcd38b3ab, 0x886f11ed),
+     TOBN(0x57d49ea6, 0x034a0eb7), TOBN(0xd2b773bd, 0xf7e85e58),
+     TOBN(0x4a559ac4, 0x9b5c1f14), TOBN(0xc444be1a, 0x3e54df2b),
+     TOBN(0x13aad704, 0xeda41891), TOBN(0xcd927bec, 0x5eb5c788),
+     TOBN(0xeb3c8516, 0xe48c8a34), TOBN(0x1b7ac812, 0x4b546669),
+     TOBN(0x1815f896, 0x594df8ec), TOBN(0x87c6a79c, 0x79227865),
+     TOBN(0xae02a2f0, 0x9b56ddbd), TOBN(0x1339b5ac, 0x8a2f1cf3),
+     TOBN(0xf2b569c7, 0x839dff0d), TOBN(0xb0b9e864, 0xfee9a43d),
+     TOBN(0x4ff8ca41, 0x77bb064e), TOBN(0x145a2812, 0xfd249f63),
+     TOBN(0x3ab7beac, 0xf86f689a), TOBN(0x9bafec27, 0x01d35f5e),
+     TOBN(0x28054c65, 0x4265aa91), TOBN(0xa4b18304, 0x035efe42),
+     TOBN(0x6887b0e6, 0x9639dec7), TOBN(0xf4b8f6ad, 0x3d52aea5),
+     TOBN(0xfb9293cc, 0x971a8a13), TOBN(0x3f159e5d, 0x4c934d07),
+     TOBN(0x2c50e9b1, 0x09acbc29), TOBN(0x08eb65e6, 0x7154d129),
+     TOBN(0x4feff589, 0x30b75c3e), TOBN(0x0bb82fe2, 0x94491c93),
+     TOBN(0xd8ac377a, 0x89af62bb), TOBN(0xd7b51490, 0x9685e49f),
+     TOBN(0xabca9a7b, 0x04497f19), TOBN(0x1b35ed0a, 0x1a7ad13f),
+     TOBN(0x6b601e21, 0x3ec86ed6), TOBN(0xda91fcb9, 0xce0c76f1),
+     TOBN(0x9e28507b, 0xd7ab27e1), TOBN(0x7c19a555, 0x63945b7b),
+     TOBN(0x6b43f0a1, 0xaafc9827), TOBN(0x443b4fbd, 0x3aa55b91),
+     TOBN(0x962b2e65, 0x6962c88f), TOBN(0x139da8d4, 0xce0db0ca),
+     TOBN(0xb93f05dd, 0x1b8d6c4f), TOBN(0x779cdff7, 0x180b9824),
+     TOBN(0xbba23fdd, 0xae57c7b7), TOBN(0x345342f2, 0x1b932522),
+     TOBN(0xfd9c80fe, 0x556d4aa3), TOBN(0xa03907ba, 0x6525bb61),
+     TOBN(0x38b010e1, 0xff218933), TOBN(0xc066b654, 0xaa52117b),
+     TOBN(0x8e141920, 0x94f2e6ea), TOBN(0x66a27dca, 0x0d32f2b2),
+     TOBN(0x69c7f993, 0x048b3717), TOBN(0xbf5a989a, 0xb178ae1c),
+     TOBN(0x49fa9058, 0x564f1d6b), TOBN(0x27ec6e15, 0xd31fde4e),
+     TOBN(0x4cce0373, 0x7276e7fc), TOBN(0x64086d79, 0x89d6bf02),
+     TOBN(0x5a72f046, 0x4ccdd979), TOBN(0x909c3566, 0x47775631),
+     TOBN(0x1c07bc6b, 0x75dd7125), TOBN(0xb4c6bc97, 0x87a0428d),
+     TOBN(0x507ece52, 0xfdeb6b9d), TOBN(0xfca56512, 0xb2c95432),
+     TOBN(0x15d97181, 0xd0e8bd06), TOBN(0x384dd317, 0xc6bb46ea),
+     TOBN(0x5441ea20, 0x3952b624), TOBN(0xbcf70dee, 0x4e7dc2fb),
+     TOBN(0x372b016e, 0x6628e8c3), TOBN(0x07a0d667, 0xb60a7522),
+     TOBN(0xcf05751b, 0x0a344ee2), TOBN(0x0ec09a48, 0x118bdeec),
+     TOBN(0x6e4b3d4e, 0xd83dce46), TOBN(0x43a6316d, 0x99d2fc6e),
+     TOBN(0xa99d8989, 0x56cf044c), TOBN(0x7c7f4454, 0xae3e5fb7),
+     TOBN(0xb2e6b121, 0xfbabbe92), TOBN(0x281850fb, 0xe1330076),
+     TOBN(0x093581ec, 0x97890015), TOBN(0x69b1dded, 0x75ff77f5),
+     TOBN(0x7cf0b18f, 0xab105105), TOBN(0x953ced31, 0xa89ccfef),
+     TOBN(0x3151f85f, 0xeb914009), TOBN(0x3c9f1b87, 0x88ed48ad),
+     TOBN(0xc9aba1a1, 0x4a7eadcb), TOBN(0x928e7501, 0x522e71cf),
+     TOBN(0xeaede727, 0x3a2e4f83), TOBN(0x467e10d1, 0x1ce3bbd3),
+     TOBN(0xf3442ac3, 0xb955dcf0), TOBN(0xba96307d, 0xd3d5e527),
+     TOBN(0xf763a10e, 0xfd77f474), TOBN(0x5d744bd0, 0x6a6e1ff0),
+     TOBN(0xd287282a, 0xa777899e), TOBN(0xe20eda8f, 0xd03f3cde),
+     TOBN(0x6a7e75bb, 0x50b07d31), TOBN(0x0b7e2a94, 0x6f379de4),
+     TOBN(0x31cb64ad, 0x19f593cf), TOBN(0x7b1a9e4f, 0x1e76ef1d),
+     TOBN(0xe18c9c9d, 0xb62d609c), TOBN(0x439bad6d, 0xe779a650),
+     TOBN(0x219d9066, 0xe032f144), TOBN(0x1db632b8, 0xe8b2ec6a),
+     TOBN(0xff0d0fd4, 0xfda12f78), TOBN(0x56fb4c2d, 0x2a25d265),
+     TOBN(0x5f4e2ee1, 0x255a03f1), TOBN(0x61cd6af2, 0xe96af176),
+     TOBN(0xe0317ba8, 0xd068bc97), TOBN(0x927d6bab, 0x264b988e),
+     TOBN(0xa18f07e0, 0xe90fb21e), TOBN(0x00fd2b80, 0xbba7fca1),
+     TOBN(0x20387f27, 0x95cd67b5), TOBN(0x5b89a4e7, 0xd39707f7),
+     TOBN(0x8f83ad3f, 0x894407ce), TOBN(0xa0025b94, 0x6c226132),
+     TOBN(0xc79563c7, 0xf906c13b), TOBN(0x5f548f31, 0x4e7bb025),
+     TOBN(0x2b4c6b8f, 0xeac6d113), TOBN(0xa67e3f9c, 0x0e813c76),
+     TOBN(0x3982717c, 0x3fe1f4b9), TOBN(0x58865819, 0x26d8050e),
+     TOBN(0x99f3640c, 0xf7f06f20), TOBN(0xdc610216, 0x2a66ebc2),
+     TOBN(0x52f2c175, 0x767a1e08), TOBN(0x05660e1a, 0x5999871b),
+     TOBN(0x6b0f1762, 0x6d3c4693), TOBN(0xf0e7d627, 0x37ed7bea),
+     TOBN(0xc51758c7, 0xb75b226d), TOBN(0x40a88628, 0x1f91613b),
+     TOBN(0x889dbaa7, 0xbbb38ce0), TOBN(0xe0404b65, 0xbddcad81),
+     TOBN(0xfebccd3a, 0x8bc9671f), TOBN(0xfbf9a357, 0xee1f5375),
+     TOBN(0x5dc169b0, 0x28f33398), TOBN(0xb07ec11d, 0x72e90f65),
+     TOBN(0xae7f3b4a, 0xfaab1eb1), TOBN(0xd970195e, 0x5f17538a),
+     TOBN(0x52b05cbe, 0x0181e640), TOBN(0xf5debd62, 0x2643313d),
+     TOBN(0x76148154, 0x5df31f82), TOBN(0x23e03b33, 0x3a9e13c5),
+     TOBN(0xff758949, 0x4fde0c1f), TOBN(0xbf8a1abe, 0xe5b6ec20),
+     TOBN(0x702278fb, 0x87e1db6c), TOBN(0xc447ad7a, 0x35ed658f),
+     TOBN(0x48d4aa38, 0x03d0ccf2), TOBN(0x80acb338, 0x819a7c03),
+     TOBN(0x9bc7c89e, 0x6e17cecc), TOBN(0x46736b8b, 0x03be1d82),
+     TOBN(0xd65d7b60, 0xc0432f96), TOBN(0xddebe7a3, 0xdeb5442f),
+     TOBN(0x79a25307, 0x7dff69a2), TOBN(0x37a56d94, 0x02cf3122),
+     TOBN(0x8bab8aed, 0xf2350d0a), TOBN(0x13c3f276, 0x037b0d9a),
+     TOBN(0xc664957c, 0x44c65cae), TOBN(0x88b44089, 0xc2e71a88),
+     TOBN(0xdb88e5a3, 0x5cb02664), TOBN(0x5d4c0bf1, 0x8686c72e),
+     TOBN(0xea3d9b62, 0xa682d53e), TOBN(0x9b605ef4, 0x0b2ad431),
+     TOBN(0x71bac202, 0xc69645d0), TOBN(0xa115f03a, 0x6a1b66e7),
+     TOBN(0xfe2c563a, 0x158f4dc4), TOBN(0xf715b3a0, 0x4d12a78c),
+     TOBN(0x8f7f0a48, 0xd413213a), TOBN(0x2035806d, 0xc04becdb),
+     TOBN(0xecd34a99, 0x5d8587f5), TOBN(0x4d8c3079, 0x9f6d3a71),
+     TOBN(0x1b2a2a67, 0x8d95a8f6), TOBN(0xc58c9d7d, 0xf2110d0d),
+     TOBN(0xdeee81d5, 0xcf8fba3f), TOBN(0xa42be3c0, 0x0c7cdf68),
+     TOBN(0x2126f742, 0xd43b5eaa), TOBN(0x054a0766, 0xdfa59b85),
+     TOBN(0x9d0d5e36, 0x126bfd45), TOBN(0xa1f8fbd7, 0x384f8a8f),
+     TOBN(0x317680f5, 0xd563fccc), TOBN(0x48ca5055, 0xf280a928),
+     TOBN(0xe00b81b2, 0x27b578cf), TOBN(0x10aad918, 0x2994a514),
+     TOBN(0xd9e07b62, 0xb7bdc953), TOBN(0x9f0f6ff2, 0x5bc086dd),
+     TOBN(0x09d1ccff, 0x655eee77), TOBN(0x45475f79, 0x5bef7df1),
+     TOBN(0x3faa28fa, 0x86f702cc), TOBN(0x92e60905, 0x0f021f07),
+     TOBN(0xe9e62968, 0x7f8fa8c6), TOBN(0xbd71419a, 0xf036ea2c),
+     TOBN(0x171ee1cc, 0x6028da9a), TOBN(0x5352fe1a, 0xc251f573),
+     TOBN(0xf8ff236e, 0x3fa997f4), TOBN(0xd831b6c9, 0xa5749d5f),
+     TOBN(0x7c872e1d, 0xe350e2c2), TOBN(0xc56240d9, 0x1e0ce403),
+     TOBN(0xf9deb077, 0x6974f5cb), TOBN(0x7d50ba87, 0x961c3728),
+     TOBN(0xd6f89426, 0x5a3a2518), TOBN(0xcf817799, 0xc6303d43),
+     TOBN(0x510a0471, 0x619e5696), TOBN(0xab049ff6, 0x3a5e307b),
+     TOBN(0xe4cdf9b0, 0xfeb13ec7), TOBN(0xd5e97117, 0x9d8ff90c),
+     TOBN(0xf6f64d06, 0x9afa96af), TOBN(0x00d0bf5e, 0x9d2012a2),
+     TOBN(0xe63f301f, 0x358bcdc0), TOBN(0x07689e99, 0x0a9d47f8),
+     TOBN(0x1f689e2f, 0x4f43d43a), TOBN(0x4d542a16, 0x90920904),
+     TOBN(0xaea293d5, 0x9ca0a707), TOBN(0xd061fe45, 0x8ac68065),
+     TOBN(0x1033bf1b, 0x0090008c), TOBN(0x29749558, 0xc08a6db6),
+     TOBN(0x74b5fc59, 0xc1d5d034), TOBN(0xf712e9f6, 0x67e215e0),
+     TOBN(0xfd520cbd, 0x860200e6), TOBN(0x0229acb4, 0x3ea22588),
+     TOBN(0x9cd1e14c, 0xfff0c82e), TOBN(0x87684b62, 0x59c69e73),
+     TOBN(0xda85e61c, 0x96ccb989), TOBN(0x2d5dbb02, 0xa3d06493),
+     TOBN(0xf22ad33a, 0xe86b173c), TOBN(0xe8e41ea5, 0xa79ff0e3),
+     TOBN(0x01d2d725, 0xdd0d0c10), TOBN(0x31f39088, 0x032d28f9),
+     TOBN(0x7b3f71e1, 0x7829839e), TOBN(0x0cf691b4, 0x4502ae58),
+     TOBN(0xef658dbd, 0xbefc6115), TOBN(0xa5cd6ee5, 0xb3ab5314),
+     TOBN(0x206c8d7b, 0x5f1d2347), TOBN(0x794645ba, 0x4cc2253a),
+     TOBN(0xd517d8ff, 0x58389e08), TOBN(0x4fa20dee, 0x9f847288),
+     TOBN(0xeba072d8, 0xd797770a), TOBN(0x7360c91d, 0xbf429e26),
+     TOBN(0x7200a3b3, 0x80af8279), TOBN(0x6a1c9150, 0x82dadce3),
+     TOBN(0x0ee6d3a7, 0xc35d8794), TOBN(0x042e6558, 0x0356bae5),
+     TOBN(0x9f59698d, 0x643322fd), TOBN(0x9379ae15, 0x50a61967),
+     TOBN(0x64b9ae62, 0xfcc9981e), TOBN(0xaed3d631, 0x6d2934c6),
+     TOBN(0x2454b302, 0x5e4e65eb), TOBN(0xab09f647, 0xf9950428)}
+    ,
+    {TOBN(0xb2083a12, 0x22248acc), TOBN(0x1f6ec0ef, 0x3264e366),
+     TOBN(0x5659b704, 0x5afdee28), TOBN(0x7a823a40, 0xe6430bb5),
+     TOBN(0x24592a04, 0xe1900a79), TOBN(0xcde09d4a, 0xc9ee6576),
+     TOBN(0x52b6463f, 0x4b5ea54a), TOBN(0x1efe9ed3, 0xd3ca65a7),
+     TOBN(0xe27a6dbe, 0x305406dd), TOBN(0x8eb7dc7f, 0xdd5d1957),
+     TOBN(0xf54a6876, 0x387d4d8f), TOBN(0x9c479409, 0xc7762de4),
+     TOBN(0xbe4d5b5d, 0x99b30778), TOBN(0x25380c56, 0x6e793682),
+     TOBN(0x602d37f3, 0xdac740e3), TOBN(0x140deabe, 0x1566e4ae),
+     TOBN(0x4481d067, 0xafd32acf), TOBN(0xd8f0fcca, 0xe1f71ccf),
+     TOBN(0xd208dd0c, 0xb596f2da), TOBN(0xd049d730, 0x9aad93f9),
+     TOBN(0xc79f263d, 0x42ab580e), TOBN(0x09411bb1, 0x23f707b4),
+     TOBN(0x8cfde1ff, 0x835e0eda), TOBN(0x72707490, 0x90f03402),
+     TOBN(0xeaee6126, 0xc49a861e), TOBN(0x024f3b65, 0xe14f0d06),
+     TOBN(0x51a3f1e8, 0xc69bfc17), TOBN(0xc3c3a8e9, 0xa7686381),
+     TOBN(0x3400752c, 0xb103d4c8), TOBN(0x02bc4613, 0x9218b36b),
+     TOBN(0xc67f75eb, 0x7651504a), TOBN(0xd6848b56, 0xd02aebfa),
+     TOBN(0xbd9802e6, 0xc30fa92b), TOBN(0x5a70d96d, 0x9a552784),
+     TOBN(0x9085c4ea, 0x3f83169b), TOBN(0xfa9423bb, 0x06908228),
+     TOBN(0x2ffebe12, 0xfe97a5b9), TOBN(0x85da6049, 0x71b99118),
+     TOBN(0x9cbc2f7f, 0x63178846), TOBN(0xfd96bc70, 0x9153218e),
+     TOBN(0x958381db, 0x1782269b), TOBN(0xae34bf79, 0x2597e550),
+     TOBN(0xbb5c6064, 0x5f385153), TOBN(0x6f0e96af, 0xe3088048),
+     TOBN(0xbf6a0215, 0x77884456), TOBN(0xb3b5688c, 0x69310ea7),
+     TOBN(0x17c94295, 0x04fad2de), TOBN(0xe020f0e5, 0x17896d4d),
+     TOBN(0x730ba0ab, 0x0976505f), TOBN(0x567f6813, 0x095e2ec5),
+     TOBN(0x47062010, 0x6331ab71), TOBN(0x72cfa977, 0x41d22b9f),
+     TOBN(0x33e55ead, 0x8a2373da), TOBN(0xa8d0d5f4, 0x7ba45a68),
+     TOBN(0xba1d8f9c, 0x03029d15), TOBN(0x8f34f1cc, 0xfc55b9f3),
+     TOBN(0xcca4428d, 0xbbe5a1a9), TOBN(0x8187fd5f, 0x3126bd67),
+     TOBN(0x0036973a, 0x48105826), TOBN(0xa39b6663, 0xb8bd61a0),
+     TOBN(0x6d42deef, 0x2d65a808), TOBN(0x4969044f, 0x94636b19),
+     TOBN(0xf611ee47, 0xdd5d564c), TOBN(0x7b2f3a49, 0xd2873077),
+     TOBN(0x94157d45, 0x300eb294), TOBN(0x2b2a656e, 0x169c1494),
+     TOBN(0xc000dd76, 0xd3a47aa9), TOBN(0xa2864e4f, 0xa6243ea4),
+     TOBN(0x82716c47, 0xdb89842e), TOBN(0x12dfd7d7, 0x61479fb7),
+     TOBN(0x3b9a2c56, 0xe0b2f6dc), TOBN(0x46be862a, 0xd7f85d67),
+     TOBN(0x03b0d8dd, 0x0f82b214), TOBN(0x460c34f9, 0xf103cbc6),
+     TOBN(0xf32e5c03, 0x18d79e19), TOBN(0x8b8888ba, 0xa84117f8),
+     TOBN(0x8f3c37dc, 0xc0722677), TOBN(0x10d21be9, 0x1c1c0f27),
+     TOBN(0xd47c8468, 0xe0f7a0c6), TOBN(0x9bf02213, 0xadecc0e0),
+     TOBN(0x0baa7d12, 0x42b48b99), TOBN(0x1bcb665d, 0x48424096),
+     TOBN(0x8b847cd6, 0xebfb5cfb), TOBN(0x87c2ae56, 0x9ad4d10d),
+     TOBN(0xf1cbb122, 0x0de36726), TOBN(0xe7043c68, 0x3fdfbd21),
+     TOBN(0x4bd0826a, 0x4e79d460), TOBN(0x11f5e598, 0x4bd1a2cb),
+     TOBN(0x97554160, 0xb7fe7b6e), TOBN(0x7d16189a, 0x400a3fb2),
+     TOBN(0xd73e9bea, 0xe328ca1e), TOBN(0x0dd04b97, 0xe793d8cc),
+     TOBN(0xa9c83c9b, 0x506db8cc), TOBN(0x5cd47aae, 0xcf38814c),
+     TOBN(0x26fc430d, 0xb64b45e6), TOBN(0x079b5499, 0xd818ea84),
+     TOBN(0xebb01102, 0xc1c24a3b), TOBN(0xca24e568, 0x1c161c1a),
+     TOBN(0x103eea69, 0x36f00a4a), TOBN(0x9ad76ee8, 0x76176c7b),
+     TOBN(0x97451fc2, 0x538e0ff7), TOBN(0x94f89809, 0x6604b3b0),
+     TOBN(0x6311436e, 0x3249cfd7), TOBN(0x27b4a7bd, 0x41224f69),
+     TOBN(0x03b5d21a, 0xe0ac2941), TOBN(0x279b0254, 0xc2d31937),
+     TOBN(0x3307c052, 0xcac992d0), TOBN(0x6aa7cb92, 0xefa8b1f3),
+     TOBN(0x5a182580, 0x0d37c7a5), TOBN(0x13380c37, 0x342d5422),
+     TOBN(0x92ac2d66, 0xd5d2ef92), TOBN(0x035a70c9, 0x030c63c6),
+     TOBN(0xc16025dd, 0x4ce4f152), TOBN(0x1f419a71, 0xf9df7c06),
+     TOBN(0x6d5b2214, 0x91e4bb14), TOBN(0xfc43c6cc, 0x839fb4ce),
+     TOBN(0x49f06591, 0x925d6b2d), TOBN(0x4b37d9d3, 0x62186598),
+     TOBN(0x8c54a971, 0xd01b1629), TOBN(0xe1a9c29f, 0x51d50e05),
+     TOBN(0x5109b785, 0x71ba1861), TOBN(0x48b22d5c, 0xd0c8f93d),
+     TOBN(0xe8fa84a7, 0x8633bb93), TOBN(0x53fba6ba, 0x5aebbd08),
+     TOBN(0x7ff27df3, 0xe5eea7d8), TOBN(0x521c8796, 0x68ca7158),
+     TOBN(0xb9d5133b, 0xce6f1a05), TOBN(0x2d50cd53, 0xfd0ebee4),
+     TOBN(0xc82115d6, 0xc5a3ef16), TOBN(0x993eff9d, 0xba079221),
+     TOBN(0xe4da2c5e, 0x4b5da81c), TOBN(0x9a89dbdb, 0x8033fd85),
+     TOBN(0x60819ebf, 0x2b892891), TOBN(0x53902b21, 0x5d14a4d5),
+     TOBN(0x6ac35051, 0xd7fda421), TOBN(0xcc6ab885, 0x61c83284),
+     TOBN(0x14eba133, 0xf74cff17), TOBN(0x240aaa03, 0xecb813f2),
+     TOBN(0xcfbb6540, 0x6f665bee), TOBN(0x084b1fe4, 0xa425ad73),
+     TOBN(0x009d5d16, 0xd081f6a6), TOBN(0x35304fe8, 0xeef82c90),
+     TOBN(0xf20346d5, 0xaa9eaa22), TOBN(0x0ada9f07, 0xac1c91e3),
+     TOBN(0xa6e21678, 0x968a6144), TOBN(0x54c1f77c, 0x07b31a1e),
+     TOBN(0xd6bb787e, 0x5781fbe1), TOBN(0x61bd2ee0, 0xe31f1c4a),
+     TOBN(0xf25aa1e9, 0x781105fc), TOBN(0x9cf2971f, 0x7b2f8e80),
+     TOBN(0x26d15412, 0xcdff919b), TOBN(0x01db4ebe, 0x34bc896e),
+     TOBN(0x7d9b3e23, 0xb40df1cf), TOBN(0x59337373, 0x94e971b4),
+     TOBN(0xbf57bd14, 0x669cf921), TOBN(0x865daedf, 0x0c1a1064),
+     TOBN(0x3eb70bd3, 0x83279125), TOBN(0xbc3d5b9f, 0x34ecdaab),
+     TOBN(0x91e3ed7e, 0x5f755caf), TOBN(0x49699f54, 0xd41e6f02),
+     TOBN(0x185770e1, 0xd4a7a15b), TOBN(0x08f3587a, 0xeaac87e7),
+     TOBN(0x352018db, 0x473133ea), TOBN(0x674ce719, 0x04fd30fc),
+     TOBN(0x7b8d9835, 0x088b3e0e), TOBN(0x7a0356a9, 0x5d0d47a1),
+     TOBN(0x9d9e7659, 0x6474a3c4), TOBN(0x61ea48a7, 0xff66966c),
+     TOBN(0x30417758, 0x0f3e4834), TOBN(0xfdbb21c2, 0x17a9afcb),
+     TOBN(0x756fa17f, 0x2f9a67b3), TOBN(0x2a6b2421, 0xa245c1a8),
+     TOBN(0x64be2794, 0x4af02291), TOBN(0xade465c6, 0x2a5804fe),
+     TOBN(0x8dffbd39, 0xa6f08fd7), TOBN(0xc4efa84c, 0xaa14403b),
+     TOBN(0xa1b91b2a, 0x442b0f5c), TOBN(0xb748e317, 0xcf997736),
+     TOBN(0x8d1b62bf, 0xcee90e16), TOBN(0x907ae271, 0x0b2078c0),
+     TOBN(0xdf31534b, 0x0c9bcddd), TOBN(0x043fb054, 0x39adce83),
+     TOBN(0x99031043, 0xd826846a), TOBN(0x61a9c0d6, 0xb144f393),
+     TOBN(0xdab48046, 0x47718427), TOBN(0xdf17ff9b, 0x6e830f8b),
+     TOBN(0x408d7ee8, 0xe49a1347), TOBN(0x6ac71e23, 0x91c1d4ae),
+     TOBN(0xc8cbb9fd, 0x1defd73c), TOBN(0x19840657, 0xbbbbfec5),
+     TOBN(0x39db1cb5, 0x9e7ef8ea), TOBN(0x78aa8296, 0x64105f30),
+     TOBN(0xa3d9b7f0, 0xa3738c29), TOBN(0x0a2f235a, 0xbc3250a3),
+     TOBN(0x55e506f6, 0x445e4caf), TOBN(0x0974f73d, 0x33475f7a),
+     TOBN(0xd37dbba3, 0x5ba2f5a8), TOBN(0x542c6e63, 0x6af40066),
+     TOBN(0x26d99b53, 0xc5d73e2c), TOBN(0x06060d7d, 0x6c3ca33e),
+     TOBN(0xcdbef1c2, 0x065fef4a), TOBN(0x77e60f7d, 0xfd5b92e3),
+     TOBN(0xd7c549f0, 0x26708350), TOBN(0x201b3ad0, 0x34f121bf),
+     TOBN(0x5fcac2a1, 0x0334fc14), TOBN(0x8a9a9e09, 0x344552f6),
+     TOBN(0x7dd8a1d3, 0x97653082), TOBN(0x5fc0738f, 0x79d4f289),
+     TOBN(0x787d244d, 0x17d2d8c3), TOBN(0xeffc6345, 0x70830684),
+     TOBN(0x5ddb96dd, 0xe4f73ae5), TOBN(0x8efb14b1, 0x172549a5),
+     TOBN(0x6eb73eee, 0x2245ae7a), TOBN(0xbca4061e, 0xea11f13e),
+     TOBN(0xb577421d, 0x30b01f5d), TOBN(0xaa688b24, 0x782e152c),
+     TOBN(0x67608e71, 0xbd3502ba), TOBN(0x4ef41f24, 0xb4de75a0),
+     TOBN(0xb08dde5e, 0xfd6125e5), TOBN(0xde484825, 0xa409543f),
+     TOBN(0x1f198d98, 0x65cc2295), TOBN(0x428a3771, 0x6e0edfa2),
+     TOBN(0x4f9697a2, 0xadf35fc7), TOBN(0x01a43c79, 0xf7cac3c7),
+     TOBN(0xb05d7059, 0x0fd3659a), TOBN(0x8927f30c, 0xbb7f2d9a),
+     TOBN(0x4023d1ac, 0x8cf984d3), TOBN(0x32125ed3, 0x02897a45),
+     TOBN(0xfb572dad, 0x3d414205), TOBN(0x73000ef2, 0xe3fa82a9),
+     TOBN(0x4c0868e9, 0xf10a5581), TOBN(0x5b61fc67, 0x6b0b3ca5),
+     TOBN(0xc1258d5b, 0x7cae440c), TOBN(0x21c08b41, 0x402b7531),
+     TOBN(0xf61a8955, 0xde932321), TOBN(0x3568faf8, 0x2d1408af),
+     TOBN(0x71b15e99, 0x9ecf965b), TOBN(0xf14ed248, 0xe917276f),
+     TOBN(0xc6f4caa1, 0x820cf9e2), TOBN(0x681b20b2, 0x18d83c7e),
+     TOBN(0x6cde738d, 0xc6c01120), TOBN(0x71db0813, 0xae70e0db),
+     TOBN(0x95fc0644, 0x74afe18c), TOBN(0x34619053, 0x129e2be7),
+     TOBN(0x80615cea, 0xdb2a3b15), TOBN(0x0a49a19e, 0xdb4c7073),
+     TOBN(0x0e1b84c8, 0x8fd2d367), TOBN(0xd74bf462, 0x033fb8aa),
+     TOBN(0x889f6d65, 0x533ef217), TOBN(0x7158c7e4, 0xc3ca2e87),
+     TOBN(0xfb670dfb, 0xdc2b4167), TOBN(0x75910a01, 0x844c257f),
+     TOBN(0xf336bf07, 0xcf88577d), TOBN(0x22245250, 0xe45e2ace),
+     TOBN(0x2ed92e8d, 0x7ca23d85), TOBN(0x29f8be4c, 0x2b812f58),
+     TOBN(0xdd9ebaa7, 0x076fe12b), TOBN(0x3f2400cb, 0xae1537f9),
+     TOBN(0x1aa93528, 0x17bdfb46), TOBN(0xc0f98430, 0x67883b41),
+     TOBN(0x5590ede1, 0x0170911d), TOBN(0x7562f5bb, 0x34d4b17f),
+     TOBN(0xe1fa1df2, 0x1826b8d2), TOBN(0xb40b796a, 0x6bd80d59),
+     TOBN(0xd65bf197, 0x3467ba92), TOBN(0x8c9b46db, 0xf70954b0),
+     TOBN(0x97c8a0f3, 0x0e78f15d), TOBN(0xa8f3a69a, 0x85a4c961),
+     TOBN(0x4242660f, 0x61e4ce9b), TOBN(0xbf06aab3, 0x6ea6790c),
+     TOBN(0xc6706f8e, 0xec986416), TOBN(0x9e56dec1, 0x9a9fc225),
+     TOBN(0x527c46f4, 0x9a9898d9), TOBN(0xd799e77b, 0x5633cdef),
+     TOBN(0x24eacc16, 0x7d9e4297), TOBN(0xabb61cea, 0x6b1cb734),
+     TOBN(0xbee2e8a7, 0xf778443c), TOBN(0x3bb42bf1, 0x29de2fe6),
+     TOBN(0xcbed86a1, 0x3003bb6f), TOBN(0xd3918e6c, 0xd781cdf6),
+     TOBN(0x4bee3271, 0x9a5103f1), TOBN(0x5243efc6, 0xf50eac06),
+     TOBN(0xb8e122cb, 0x6adcc119), TOBN(0x1b7faa84, 0xc0b80a08),
+     TOBN(0x32c3d1bd, 0x6dfcd08c), TOBN(0x129dec4e, 0x0be427de),
+     TOBN(0x98ab679c, 0x1d263c83), TOBN(0xafc83cb7, 0xcef64eff),
+     TOBN(0x85eb6088, 0x2fa6be76), TOBN(0x892585fb, 0x1328cbfe),
+     TOBN(0xc154d3ed, 0xcf618dda), TOBN(0xc44f601b, 0x3abaf26e),
+     TOBN(0x7bf57d0b, 0x2be1fdfd), TOBN(0xa833bd2d, 0x21137fee),
+     TOBN(0x9353af36, 0x2db591a8), TOBN(0xc76f26dc, 0x5562a056),
+     TOBN(0x1d87e47d, 0x3fdf5a51), TOBN(0x7afb5f93, 0x55c9cab0),
+     TOBN(0x91bbf58f, 0x89e0586e), TOBN(0x7c72c018, 0x0d843709),
+     TOBN(0xa9a5aafb, 0x99b5c3dc), TOBN(0xa48a0f1d, 0x3844aeb0),
+     TOBN(0x7178b7dd, 0xb667e482), TOBN(0x453985e9, 0x6e23a59a),
+     TOBN(0x4a54c860, 0x01b25dd8), TOBN(0x0dd37f48, 0xfb897c8a),
+     TOBN(0x5f8aa610, 0x0ea90cd9), TOBN(0xc8892c68, 0x16d5830d),
+     TOBN(0xeb4befc0, 0xef514ca5), TOBN(0x478eb679, 0xe72c9ee6),
+     TOBN(0x9bca20da, 0xdbc40d5f), TOBN(0xf015de21, 0xdde4f64a),
+     TOBN(0xaa6a4de0, 0xeaf4b8a5), TOBN(0x68cfd9ca, 0x4bc60e32),
+     TOBN(0x668a4b01, 0x7fd15e70), TOBN(0xd9f0694a, 0xf27dc09d),
+     TOBN(0xf6c3cad5, 0xba708bcd), TOBN(0x5cd2ba69, 0x5bb95c2a),
+     TOBN(0xaa28c1d3, 0x33c0a58f), TOBN(0x23e274e3, 0xabc77870),
+     TOBN(0x44c3692d, 0xdfd20a4a), TOBN(0x091c5fd3, 0x81a66653),
+     TOBN(0x6c0bb691, 0x09a0757d), TOBN(0x9072e8b9, 0x667343ea),
+     TOBN(0x31d40eb0, 0x80848bec), TOBN(0x95bd480a, 0x79fd36cc),
+     TOBN(0x01a77c61, 0x65ed43f5), TOBN(0xafccd127, 0x2e0d40bf),
+     TOBN(0xeccfc82d, 0x1cc1884b), TOBN(0xc85ac201, 0x5d4753b4),
+     TOBN(0xc7a6caac, 0x658e099f), TOBN(0xcf46369e, 0x04b27390),
+     TOBN(0xe2e7d049, 0x506467ea), TOBN(0x481b63a2, 0x37cdeccc),
+     TOBN(0x4029abd8, 0xed80143a), TOBN(0x28bfe3c7, 0xbcb00b88),
+     TOBN(0x3bec1009, 0x0643d84a), TOBN(0x885f3668, 0xabd11041),
+     TOBN(0xdb02432c, 0xf83a34d6), TOBN(0x32f7b360, 0x719ceebe),
+     TOBN(0xf06c7837, 0xdad1fe7a), TOBN(0x60a157a9, 0x5441a0b0),
+     TOBN(0x704970e9, 0xe2d47550), TOBN(0xcd2bd553, 0x271b9020),
+     TOBN(0xff57f82f, 0x33e24a0b), TOBN(0x9cbee23f, 0xf2565079),
+     TOBN(0x16353427, 0xeb5f5825), TOBN(0x276feec4, 0xe948d662),
+     TOBN(0xd1b62bc6, 0xda10032b), TOBN(0x718351dd, 0xf0e72a53),
+     TOBN(0x93452076, 0x2420e7ba), TOBN(0x96368fff, 0x3a00118d),
+     TOBN(0x00ce2d26, 0x150a49e4), TOBN(0x0c28b636, 0x3f04706b),
+     TOBN(0xbad65a46, 0x58b196d0), TOBN(0x6c8455fc, 0xec9f8b7c),
+     TOBN(0xe90c895f, 0x2d71867e), TOBN(0x5c0be31b, 0xedf9f38c),
+     TOBN(0x2a37a15e, 0xd8f6ec04), TOBN(0x239639e7, 0x8cd85251),
+     TOBN(0xd8975315, 0x9c7c4c6b), TOBN(0x603aa3c0, 0xd7409af7),
+     TOBN(0xb8d53d0c, 0x007132fb), TOBN(0x68d12af7, 0xa6849238),
+     TOBN(0xbe0607e7, 0xbf5d9279), TOBN(0x9aa50055, 0xaada74ce),
+     TOBN(0xe81079cb, 0xba7e8ccb), TOBN(0x610c71d1, 0xa5f4ff5e),
+     TOBN(0x9e2ee1a7, 0x5aa07093), TOBN(0xca84004b, 0xa75da47c),
+     TOBN(0x074d3951, 0x3de75401), TOBN(0xf938f756, 0xbb311592),
+     TOBN(0x96197618, 0x00a43421), TOBN(0x39a25362, 0x07bc78c8),
+     TOBN(0x278f710a, 0x0a171276), TOBN(0xb28446ea, 0x8d1a8f08),
+     TOBN(0x184781bf, 0xe3b6a661), TOBN(0x7751cb1d, 0xe6d279f7),
+     TOBN(0xf8ff95d6, 0xc59eb662), TOBN(0x186d90b7, 0x58d3dea7),
+     TOBN(0x0e4bb6c1, 0xdfb4f754), TOBN(0x5c5cf56b, 0x2b2801dc),
+     TOBN(0xc561e452, 0x1f54564d), TOBN(0xb4fb8c60, 0xf0dd7f13),
+     TOBN(0xf8849630, 0x33ff98c7), TOBN(0x9619fffa, 0xcf17769c),
+     TOBN(0xf8090bf6, 0x1bfdd80a), TOBN(0x14d9a149, 0x422cfe63),
+     TOBN(0xb354c360, 0x6f6df9ea), TOBN(0xdbcf770d, 0x218f17ea),
+     TOBN(0x207db7c8, 0x79eb3480), TOBN(0x213dbda8, 0x559b6a26),
+     TOBN(0xac4c200b, 0x29fc81b3), TOBN(0xebc3e09f, 0x171d87c1),
+     TOBN(0x91799530, 0x1481aa9e), TOBN(0x051b92e1, 0x92e114fa),
+     TOBN(0xdf8f92e9, 0xecb5537f), TOBN(0x44b1b2cc, 0x290c7483),
+     TOBN(0xa711455a, 0x2adeb016), TOBN(0x964b6856, 0x81a10c2c),
+     TOBN(0x4f159d99, 0xcec03623), TOBN(0x05532225, 0xef3271ea),
+     TOBN(0xb231bea3, 0xc5ee4849), TOBN(0x57a54f50, 0x7094f103),
+     TOBN(0x3e2d421d, 0x9598b352), TOBN(0xe865a49c, 0x67412ab4),
+     TOBN(0xd2998a25, 0x1cc3a912), TOBN(0x5d092808, 0x0c74d65d),
+     TOBN(0x73f45908, 0x4088567a), TOBN(0xeb6b280e, 0x1f214a61),
+     TOBN(0x8c9adc34, 0xcaf0c13d), TOBN(0x39d12938, 0xf561fb80),
+     TOBN(0xb2dc3a5e, 0xbc6edfb4), TOBN(0x7485b1b1, 0xfe4d210e),
+     TOBN(0x062e0400, 0xe186ae72), TOBN(0x91e32d5c, 0x6eeb3b88),
+     TOBN(0x6df574d7, 0x4be59224), TOBN(0xebc88ccc, 0x716d55f3),
+     TOBN(0x26c2e6d0, 0xcad6ed33), TOBN(0xc6e21e7d, 0x0d3e8b10),
+     TOBN(0x2cc5840e, 0x5bcc36bb), TOBN(0x9292445e, 0x7da74f69),
+     TOBN(0x8be8d321, 0x4e5193a8), TOBN(0x3ec23629, 0x8df06413),
+     TOBN(0xc7e9ae85, 0xb134defa), TOBN(0x6073b1d0, 0x1bb2d475),
+     TOBN(0xb9ad615e, 0x2863c00d), TOBN(0x9e29493d, 0x525f4ac4),
+     TOBN(0xc32b1dea, 0x4e9acf4f), TOBN(0x3e1f01c8, 0xa50db88d),
+     TOBN(0xb05d70ea, 0x04da916c), TOBN(0x714b0d0a, 0xd865803e),
+     TOBN(0x4bd493fc, 0x9920cb5e), TOBN(0x5b44b1f7, 0x92c7a3ac),
+     TOBN(0xa2a77293, 0xbcec9235), TOBN(0x5ee06e87, 0xcd378553),
+     TOBN(0xceff8173, 0xda621607), TOBN(0x2bb03e4c, 0x99f5d290),
+     TOBN(0x2945106a, 0xa6f734ac), TOBN(0xb5056604, 0xd25c4732),
+     TOBN(0x5945920c, 0xe079afee), TOBN(0x686e17a0, 0x6789831f),
+     TOBN(0x5966bee8, 0xb74a5ae5), TOBN(0x38a673a2, 0x1e258d46),
+     TOBN(0xbd1cc1f2, 0x83141c95), TOBN(0x3b2ecf4f, 0x0e96e486),
+     TOBN(0xcd3aa896, 0x74e5fc78), TOBN(0x415ec10c, 0x2482fa7a),
+     TOBN(0x15234419, 0x80503380), TOBN(0x513d917a, 0xd314b392),
+     TOBN(0xb0b52f4e, 0x63caecae), TOBN(0x07bf22ad, 0x2dc7780b),
+     TOBN(0xe761e8a1, 0xe4306839), TOBN(0x1b3be962, 0x5dd7feaa),
+     TOBN(0x4fe728de, 0x74c778f1), TOBN(0xf1fa0bda, 0x5e0070f6),
+     TOBN(0x85205a31, 0x6ec3f510), TOBN(0x2c7e4a14, 0xd2980475),
+     TOBN(0xde3c19c0, 0x6f30ebfd), TOBN(0xdb1c1f38, 0xd4b7e644),
+     TOBN(0xfe291a75, 0x5dce364a), TOBN(0xb7b22a3c, 0x058f5be3),
+     TOBN(0x2cd2c302, 0x37fea38c), TOBN(0x2930967a, 0x2e17be17),
+     TOBN(0x87f009de, 0x0c061c65), TOBN(0xcb014aac, 0xedc6ed44),
+     TOBN(0x49bd1cb4, 0x3bafb1eb), TOBN(0x81bd8b5c, 0x282d3688),
+     TOBN(0x1cdab87e, 0xf01a17af), TOBN(0x21f37ac4, 0xe710063b),
+     TOBN(0x5a6c5676, 0x42fc8193), TOBN(0xf4753e70, 0x56a6015c),
+     TOBN(0x020f795e, 0xa15b0a44), TOBN(0x8f37c8d7, 0x8958a958),
+     TOBN(0x63b7e89b, 0xa4b675b5), TOBN(0xb4fb0c0c, 0x0fc31aea),
+     TOBN(0xed95e639, 0xa7ff1f2e), TOBN(0x9880f5a3, 0x619614fb),
+     TOBN(0xdeb6ff02, 0x947151ab), TOBN(0x5bc5118c, 0xa868dcdb),
+     TOBN(0xd8da2055, 0x4c20cea5), TOBN(0xcac2776e, 0x14c4d69a),
+     TOBN(0xcccb22c1, 0x622d599b), TOBN(0xa4ddb653, 0x68a9bb50),
+     TOBN(0x2c4ff151, 0x1b4941b4), TOBN(0xe1ff19b4, 0x6efba588),
+     TOBN(0x35034363, 0xc48345e0), TOBN(0x45542e3d, 0x1e29dfc4),
+     TOBN(0xf197cb91, 0x349f7aed), TOBN(0x3b2b5a00, 0x8fca8420),
+     TOBN(0x7c175ee8, 0x23aaf6d8), TOBN(0x54dcf421, 0x35af32b6),
+     TOBN(0x0ba14307, 0x27d6561e), TOBN(0x879d5ee4, 0xd175b1e2),
+     TOBN(0xc7c43673, 0x99807db5), TOBN(0x77a54455, 0x9cd55bcd),
+     TOBN(0xe6c2ff13, 0x0105c072), TOBN(0x18f7a99f, 0x8dda7da4),
+     TOBN(0x4c301820, 0x0e2d35c1), TOBN(0x06a53ca0, 0xd9cc6c82),
+     TOBN(0xaa21cc1e, 0xf1aa1d9e), TOBN(0x32414334, 0x4a75b1e8),
+     TOBN(0x2a6d1328, 0x0ebe9fdc), TOBN(0x16bd173f, 0x98a4755a),
+     TOBN(0xfbb9b245, 0x2133ffd9), TOBN(0x39a8b2f1, 0x830f1a20),
+     TOBN(0x484bc97d, 0xd5a1f52a), TOBN(0xd6aebf56, 0xa40eddf8),
+     TOBN(0x32257acb, 0x76ccdac6), TOBN(0xaf4d36ec, 0x1586ff27),
+     TOBN(0x8eaa8863, 0xf8de7dd1), TOBN(0x0045d5cf, 0x88647c16)}
+    ,
+    {TOBN(0xa6f3d574, 0xc005979d), TOBN(0xc2072b42, 0x6a40e350),
+     TOBN(0xfca5c156, 0x8de2ecf9), TOBN(0xa8c8bf5b, 0xa515344e),
+     TOBN(0x97aee555, 0x114df14a), TOBN(0xd4374a4d, 0xfdc5ec6b),
+     TOBN(0x754cc28f, 0x2ca85418), TOBN(0x71cb9e27, 0xd3c41f78),
+     TOBN(0x89105079, 0x03605c39), TOBN(0xf0843d9e, 0xa142c96c),
+     TOBN(0xf3744934, 0x16923684), TOBN(0x732caa2f, 0xfa0a2893),
+     TOBN(0xb2e8c270, 0x61160170), TOBN(0xc32788cc, 0x437fbaa3),
+     TOBN(0x39cd818e, 0xa6eda3ac), TOBN(0xe2e94239, 0x9e2b2e07),
+     TOBN(0x6967d39b, 0x0260e52a), TOBN(0xd42585cc, 0x90653325),
+     TOBN(0x0d9bd605, 0x21ca7954), TOBN(0x4fa20877, 0x81ed57b3),
+     TOBN(0x60c1eff8, 0xe34a0bbe), TOBN(0x56b0040c, 0x84f6ef64),
+     TOBN(0x28be2b24, 0xb1af8483), TOBN(0xb2278163, 0xf5531614),
+     TOBN(0x8df27545, 0x5922ac1c), TOBN(0xa7b3ef5c, 0xa52b3f63),
+     TOBN(0x8e77b214, 0x71de57c4), TOBN(0x31682c10, 0x834c008b),
+     TOBN(0xc76824f0, 0x4bd55d31), TOBN(0xb6d1c086, 0x17b61c71),
+     TOBN(0x31db0903, 0xc2a5089d), TOBN(0x9c092172, 0x184e5d3f),
+     TOBN(0xdd7ced5b, 0xc00cc638), TOBN(0x1a2015eb, 0x61278fc2),
+     TOBN(0x2e8e5288, 0x6a37f8d6), TOBN(0xc457786f, 0xe79933ad),
+     TOBN(0xb3fe4cce, 0x2c51211a), TOBN(0xad9b10b2, 0x24c20498),
+     TOBN(0x90d87a4f, 0xd28db5e5), TOBN(0x698cd105, 0x3aca2fc3),
+     TOBN(0x4f112d07, 0xe91b536d), TOBN(0xceb982f2, 0x9eba09d6),
+     TOBN(0x3c157b2c, 0x197c396f), TOBN(0xe23c2d41, 0x7b66eb24),
+     TOBN(0x480c57d9, 0x3f330d37), TOBN(0xb3a4c8a1, 0x79108deb),
+     TOBN(0x702388de, 0xcb199ce5), TOBN(0x0b019211, 0xb944a8d4),
+     TOBN(0x24f2a692, 0x840bb336), TOBN(0x7c353bdc, 0xa669fa7b),
+     TOBN(0xda20d6fc, 0xdec9c300), TOBN(0x625fbe2f, 0xa13a4f17),
+     TOBN(0xa2b1b61a, 0xdbc17328), TOBN(0x008965bf, 0xa9515621),
+     TOBN(0x49690939, 0xc620ff46), TOBN(0x182dd27d, 0x8717e91c),
+     TOBN(0x5ace5035, 0xea6c3997), TOBN(0x54259aaa, 0xc2610bef),
+     TOBN(0xef18bb3f, 0x3c80dd39), TOBN(0x6910b95b, 0x5fc3fa39),
+     TOBN(0xfce2f510, 0x43e09aee), TOBN(0xced56c9f, 0xa7675665),
+     TOBN(0x10e265ac, 0xd872db61), TOBN(0x6982812e, 0xae9fce69),
+     TOBN(0x29be11c6, 0xce800998), TOBN(0x72bb1752, 0xb90360d9),
+     TOBN(0x2c193197, 0x5a4ad590), TOBN(0x2ba2f548, 0x9fc1dbc0),
+     TOBN(0x7fe4eebb, 0xe490ebe0), TOBN(0x12a0a4cd, 0x7fae11c0),
+     TOBN(0x7197cf81, 0xe903ba37), TOBN(0xcf7d4aa8, 0xde1c6dd8),
+     TOBN(0x92af6bf4, 0x3fd5684c), TOBN(0x2b26eecf, 0x80360aa1),
+     TOBN(0xbd960f30, 0x00546a82), TOBN(0x407b3c43, 0xf59ad8fe),
+     TOBN(0x86cae5fe, 0x249c82ba), TOBN(0x9e0faec7, 0x2463744c),
+     TOBN(0x87f551e8, 0x94916272), TOBN(0x033f9344, 0x6ceb0615),
+     TOBN(0x1e5eb0d1, 0x8be82e84), TOBN(0x89967f0e, 0x7a582fef),
+     TOBN(0xbcf687d5, 0xa6e921fa), TOBN(0xdfee4cf3, 0xd37a09ba),
+     TOBN(0x94f06965, 0xb493c465), TOBN(0x638b9a1c, 0x7635c030),
+     TOBN(0x76667864, 0x66f05e9f), TOBN(0xccaf6808, 0xc04da725),
+     TOBN(0xca2eb690, 0x768fccfc), TOBN(0xf402d37d, 0xb835b362),
+     TOBN(0x0efac0d0, 0xe2fdfcce), TOBN(0xefc9cdef, 0xb638d990),
+     TOBN(0x2af12b72, 0xd1669a8b), TOBN(0x33c536bc, 0x5774ccbd),
+     TOBN(0x30b21909, 0xfb34870e), TOBN(0xc38fa2f7, 0x7df25aca),
+     TOBN(0x74c5f02b, 0xbf81f3f5), TOBN(0x0525a5ae, 0xaf7e4581),
+     TOBN(0x88d2aaba, 0x433c54ae), TOBN(0xed9775db, 0x806a56c5),
+     TOBN(0xd320738a, 0xc0edb37d), TOBN(0x25fdb6ee, 0x66cc1f51),
+     TOBN(0xac661d17, 0x10600d76), TOBN(0x931ec1f3, 0xbdd1ed76),
+     TOBN(0x65c11d62, 0x19ee43f1), TOBN(0x5cd57c3e, 0x60829d97),
+     TOBN(0xd26c91a3, 0x984be6e8), TOBN(0xf08d9309, 0x8b0c53bd),
+     TOBN(0x94bc9e5b, 0xc016e4ea), TOBN(0xd3916839, 0x11d43d2b),
+     TOBN(0x886c5ad7, 0x73701155), TOBN(0xe0377626, 0x20b00715),
+     TOBN(0x7f01c9ec, 0xaa80ba59), TOBN(0x3083411a, 0x68538e51),
+     TOBN(0x970370f1, 0xe88128af), TOBN(0x625cc3db, 0x91dec14b),
+     TOBN(0xfef9666c, 0x01ac3107), TOBN(0xb2a8d577, 0xd5057ac3),
+     TOBN(0xb0f26299, 0x92be5df7), TOBN(0xf579c8e5, 0x00353924),
+     TOBN(0xb8fa3d93, 0x1341ed7a), TOBN(0x4223272c, 0xa7b59d49),
+     TOBN(0x3dcb1947, 0x83b8c4a4), TOBN(0x4e413c01, 0xed1302e4),
+     TOBN(0x6d999127, 0xe17e44ce), TOBN(0xee86bf75, 0x33b3adfb),
+     TOBN(0xf6902fe6, 0x25aa96ca), TOBN(0xb73540e4, 0xe5aae47d),
+     TOBN(0x32801d7b, 0x1b4a158c), TOBN(0xe571c99e, 0x27e2a369),
+     TOBN(0x40cb76c0, 0x10d9f197), TOBN(0xc308c289, 0x3167c0ae),
+     TOBN(0xa6ef9dd3, 0xeb7958f2), TOBN(0xa7226dfc, 0x300879b1),
+     TOBN(0x6cd0b362, 0x7edf0636), TOBN(0x4efbce6c, 0x7bc37eed),
+     TOBN(0x75f92a05, 0x8d699021), TOBN(0x586d4c79, 0x772566e3),
+     TOBN(0x378ca5f1, 0x761ad23a), TOBN(0x650d86fc, 0x1465a8ac),
+     TOBN(0x7a4ed457, 0x842ba251), TOBN(0x6b65e3e6, 0x42234933),
+     TOBN(0xaf1543b7, 0x31aad657), TOBN(0xa4cefe98, 0xcbfec369),
+     TOBN(0xb587da90, 0x9f47befb), TOBN(0x6562e9fb, 0x41312d13),
+     TOBN(0xa691ea59, 0xeff1cefe), TOBN(0xcc30477a, 0x05fc4cf6),
+     TOBN(0xa1632461, 0x0b0ffd3d), TOBN(0xa1f16f3b, 0x5b355956),
+     TOBN(0x5b148d53, 0x4224ec24), TOBN(0xdc834e7b, 0xf977012a),
+     TOBN(0x7bfc5e75, 0xb2c69dbc), TOBN(0x3aa77a29, 0x03c3da6c),
+     TOBN(0xde0df03c, 0xca910271), TOBN(0xcbd5ca4a, 0x7806dc55),
+     TOBN(0xe1ca5807, 0x6db476cb), TOBN(0xfde15d62, 0x5f37a31e),
+     TOBN(0xf49af520, 0xf41af416), TOBN(0x96c5c5b1, 0x7d342db5),
+     TOBN(0x155c43b7, 0xeb4ceb9b), TOBN(0x2e993010, 0x4e77371a),
+     TOBN(0x1d2987da, 0x675d43af), TOBN(0xef2bc1c0, 0x8599fd72),
+     TOBN(0x96894b7b, 0x9342f6b2), TOBN(0x201eadf2, 0x7c8e71f0),
+     TOBN(0xf3479d9f, 0x4a1f3efc), TOBN(0xe0f8a742, 0x702a9704),
+     TOBN(0xeafd44b6, 0xb3eba40c), TOBN(0xf9739f29, 0xc1c1e0d0),
+     TOBN(0x0091471a, 0x619d505e), TOBN(0xc15f9c96, 0x9d7c263e),
+     TOBN(0x5be47285, 0x83afbe33), TOBN(0xa3b6d6af, 0x04f1e092),
+     TOBN(0xe76526b9, 0x751a9d11), TOBN(0x2ec5b26d, 0x9a4ae4d2),
+     TOBN(0xeb66f4d9, 0x02f6fb8d), TOBN(0x4063c561, 0x96912164),
+     TOBN(0xeb7050c1, 0x80ef3000), TOBN(0x288d1c33, 0xeaa5b3f0),
+     TOBN(0xe87c68d6, 0x07806fd8), TOBN(0xb2f7f9d5, 0x4bbbf50f),
+     TOBN(0x25972f3a, 0xac8d6627), TOBN(0xf8547774, 0x10e8c13b),
+     TOBN(0xcc50ef6c, 0x872b4a60), TOBN(0xab2a34a4, 0x4613521b),
+     TOBN(0x39c5c190, 0x983e15d1), TOBN(0x61dde5df, 0x59905512),
+     TOBN(0xe417f621, 0x9f2275f3), TOBN(0x0750c8b6, 0x451d894b),
+     TOBN(0x75b04ab9, 0x78b0bdaa), TOBN(0x3bfd9fd4, 0x458589bd),
+     TOBN(0xf1013e30, 0xee9120b6), TOBN(0x2b51af93, 0x23a4743e),
+     TOBN(0xea96ffae, 0x48d14d9e), TOBN(0x71dc0dbe, 0x698a1d32),
+     TOBN(0x914962d2, 0x0180cca4), TOBN(0x1ae60677, 0xc3568963),
+     TOBN(0x8cf227b1, 0x437bc444), TOBN(0xc650c83b, 0xc9962c7a),
+     TOBN(0x23c2c7dd, 0xfe7ccfc4), TOBN(0xf925c89d, 0x1b929d48),
+     TOBN(0x4460f74b, 0x06783c33), TOBN(0xac2c8d49, 0xa590475a),
+     TOBN(0xfb40b407, 0xb807bba0), TOBN(0x9d1e362d, 0x69ff8f3a),
+     TOBN(0xa33e9681, 0xcbef64a4), TOBN(0x67ece5fa, 0x332fb4b2),
+     TOBN(0x6900a99b, 0x739f10e3), TOBN(0xc3341ca9, 0xff525925),
+     TOBN(0xee18a626, 0xa9e2d041), TOBN(0xa5a83685, 0x29580ddd),
+     TOBN(0xf3470c81, 0x9d7de3cd), TOBN(0xedf02586, 0x2062cf9c),
+     TOBN(0xf43522fa, 0xc010edb0), TOBN(0x30314135, 0x13a4b1ae),
+     TOBN(0xc792e02a, 0xdb22b94b), TOBN(0x993d8ae9, 0xa1eaa45b),
+     TOBN(0x8aad6cd3, 0xcd1e1c63), TOBN(0x89529ca7, 0xc5ce688a),
+     TOBN(0x2ccee3aa, 0xe572a253), TOBN(0xe02b6438, 0x02a21efb),
+     TOBN(0xa7091b6e, 0xc9430358), TOBN(0x06d1b1fa, 0x9d7db504),
+     TOBN(0x58846d32, 0xc4744733), TOBN(0x40517c71, 0x379f9e34),
+     TOBN(0x2f65655f, 0x130ef6ca), TOBN(0x526e4488, 0xf1f3503f),
+     TOBN(0x8467bd17, 0x7ee4a976), TOBN(0x1d9dc913, 0x921363d1),
+     TOBN(0xd8d24c33, 0xb069e041), TOBN(0x5eb5da0a, 0x2cdf7f51),
+     TOBN(0x1c0f3cb1, 0x197b994f), TOBN(0x3c95a6c5, 0x2843eae9),
+     TOBN(0x7766ffc9, 0xa6097ea5), TOBN(0x7bea4093, 0xd723b867),
+     TOBN(0xb48e1f73, 0x4db378f9), TOBN(0x70025b00, 0xe37b77ac),
+     TOBN(0x943dc8e7, 0xaf24ad46), TOBN(0xb98a15ac, 0x16d00a85),
+     TOBN(0x3adc38ba, 0x2743b004), TOBN(0xb1c7f4f7, 0x334415ee),
+     TOBN(0xea43df8f, 0x1e62d05a), TOBN(0x32618905, 0x9d76a3b6),
+     TOBN(0x2fbd0bb5, 0xa23a0f46), TOBN(0x5bc971db, 0x6a01918c),
+     TOBN(0x7801d94a, 0xb4743f94), TOBN(0xb94df65e, 0x676ae22b),
+     TOBN(0xaafcbfab, 0xaf95894c), TOBN(0x7b9bdc07, 0x276b2241),
+     TOBN(0xeaf98362, 0x5bdda48b), TOBN(0x5977faf2, 0xa3fcb4df),
+     TOBN(0xbed042ef, 0x052c4b5b), TOBN(0x9fe87f71, 0x067591f0),
+     TOBN(0xc89c73ca, 0x22f24ec7), TOBN(0x7d37fa9e, 0xe64a9f1b),
+     TOBN(0x2710841a, 0x15562627), TOBN(0x2c01a613, 0xc243b034),
+     TOBN(0x1d135c56, 0x2bc68609), TOBN(0xc2ca1715, 0x8b03f1f6),
+     TOBN(0xc9966c2d, 0x3eb81d82), TOBN(0xc02abf4a, 0x8f6df13e),
+     TOBN(0x77b34bd7, 0x8f72b43b), TOBN(0xaff6218f, 0x360c82b0),
+     TOBN(0x0aa5726c, 0x8d55b9d2), TOBN(0xdc0adbe9, 0x99e9bffb),
+     TOBN(0x9097549c, 0xefb9e72a), TOBN(0x16755712, 0x9dfb3111),
+     TOBN(0xdd8bf984, 0xf26847f9), TOBN(0xbcb8e387, 0xdfb30cb7),
+     TOBN(0xc1fd32a7, 0x5171ef9c), TOBN(0x977f3fc7, 0x389b363f),
+     TOBN(0x116eaf2b, 0xf4babda0), TOBN(0xfeab68bd, 0xf7113c8e),
+     TOBN(0xd1e3f064, 0xb7def526), TOBN(0x1ac30885, 0xe0b3fa02),
+     TOBN(0x1c5a6e7b, 0x40142d9d), TOBN(0x839b5603, 0x30921c0b),
+     TOBN(0x48f301fa, 0x36a116a3), TOBN(0x380e1107, 0xcfd9ee6d),
+     TOBN(0x7945ead8, 0x58854be1), TOBN(0x4111c12e, 0xcbd4d49d),
+     TOBN(0xece3b1ec, 0x3a29c2ef), TOBN(0x6356d404, 0x8d3616f5),
+     TOBN(0x9f0d6a8f, 0x594d320e), TOBN(0x0989316d, 0xf651ccd2),
+     TOBN(0x6c32117a, 0x0f8fdde4), TOBN(0x9abe5cc5, 0xa26a9bbc),
+     TOBN(0xcff560fb, 0x9723f671), TOBN(0x21b2a12d, 0x7f3d593c),
+     TOBN(0xe4cb18da, 0x24ba0696), TOBN(0x186e2220, 0xc3543384),
+     TOBN(0x722f64e0, 0x88312c29), TOBN(0x94282a99, 0x17dc7752),
+     TOBN(0x62467bbf, 0x5a85ee89), TOBN(0xf435c650, 0xf10076a0),
+     TOBN(0xc9ff1539, 0x43b3a50b), TOBN(0x7132130c, 0x1a53efbc),
+     TOBN(0x31bfe063, 0xf7b0c5b7), TOBN(0xb0179a7d, 0x4ea994cc),
+     TOBN(0x12d064b3, 0xc85f455b), TOBN(0x47259328, 0x8f6e0062),
+     TOBN(0xf64e590b, 0xb875d6d9), TOBN(0x22dd6225, 0xad92bcc7),
+     TOBN(0xb658038e, 0xb9c3bd6d), TOBN(0x00cdb0d6, 0xfbba27c8),
+     TOBN(0x0c681337, 0x1062c45d), TOBN(0xd8515b8c, 0x2d33407d),
+     TOBN(0xcb8f699e, 0x8cbb5ecf), TOBN(0x8c4347f8, 0xc608d7d8),
+     TOBN(0x2c11850a, 0xbb3e00db), TOBN(0x20a8dafd, 0xecb49d19),
+     TOBN(0xbd781480, 0x45ee2f40), TOBN(0x75e354af, 0x416b60cf),
+     TOBN(0xde0b58a1, 0x8d49a8c4), TOBN(0xe40e94e2, 0xfa359536),
+     TOBN(0xbd4fa59f, 0x62accd76), TOBN(0x05cf466a, 0x8c762837),
+     TOBN(0xb5abda99, 0x448c277b), TOBN(0x5a9e01bf, 0x48b13740),
+     TOBN(0x9d457798, 0x326aad8d), TOBN(0xbdef4954, 0xc396f7e7),
+     TOBN(0x6fb274a2, 0xc253e292), TOBN(0x2800bf0a, 0x1cfe53e7),
+     TOBN(0x22426d31, 0x44438fd4), TOBN(0xef233923, 0x5e259f9a),
+     TOBN(0x4188503c, 0x03f66264), TOBN(0x9e5e7f13, 0x7f9fdfab),
+     TOBN(0x565eb76c, 0x5fcc1aba), TOBN(0xea632548, 0x59b5bff8),
+     TOBN(0x5587c087, 0xaab6d3fa), TOBN(0x92b639ea, 0x6ce39c1b),
+     TOBN(0x0706e782, 0x953b135c), TOBN(0x7308912e, 0x425268ef),
+     TOBN(0x599e92c7, 0x090e7469), TOBN(0x83b90f52, 0x9bc35e75),
+     TOBN(0x4750b3d0, 0x244975b3), TOBN(0xf3a44358, 0x11965d72),
+     TOBN(0x179c6774, 0x9c8dc751), TOBN(0xff18cdfe, 0xd23d9ff0),
+     TOBN(0xc4013833, 0x2028e247), TOBN(0x96e280e2, 0xf3bfbc79),
+     TOBN(0xf60417bd, 0xd0880a84), TOBN(0x263c9f3d, 0x2a568151),
+     TOBN(0x36be15b3, 0x2d2ce811), TOBN(0x846dc0c2, 0xf8291d21),
+     TOBN(0x5cfa0ecb, 0x789fcfdb), TOBN(0x45a0beed, 0xd7535b9a),
+     TOBN(0xec8e9f07, 0x96d69af1), TOBN(0x31a7c5b8, 0x599ab6dc),
+     TOBN(0xd36d45ef, 0xf9e2e09f), TOBN(0x3cf49ef1, 0xdcee954b),
+     TOBN(0x6be34cf3, 0x086cff9b), TOBN(0x88dbd491, 0x39a3360f),
+     TOBN(0x1e96b8cc, 0x0dbfbd1d), TOBN(0xc1e5f7bf, 0xcb7e2552),
+     TOBN(0x0547b214, 0x28819d98), TOBN(0xc770dd9c, 0x7aea9dcb),
+     TOBN(0xaef0d4c7, 0x041d68c8), TOBN(0xcc2b9818, 0x13cb9ba8),
+     TOBN(0x7fc7bc76, 0xfe86c607), TOBN(0x6b7b9337, 0x502a9a95),
+     TOBN(0x1948dc27, 0xd14dab63), TOBN(0x249dd198, 0xdae047be),
+     TOBN(0xe8356584, 0xa981a202), TOBN(0x3531dd18, 0x3a893387),
+     TOBN(0x1be11f90, 0xc85c7209), TOBN(0x93d2fe1e, 0xe2a52b5a),
+     TOBN(0x8225bfe2, 0xec6d6b97), TOBN(0x9cf6d6f4, 0xbd0aa5de),
+     TOBN(0x911459cb, 0x54779f5f), TOBN(0x5649cddb, 0x86aeb1f3),
+     TOBN(0x32133579, 0x3f26ce5a), TOBN(0xc289a102, 0x550f431e),
+     TOBN(0x559dcfda, 0x73b84c6f), TOBN(0x84973819, 0xee3ac4d7),
+     TOBN(0xb51e55e6, 0xf2606a82), TOBN(0xe25f7061, 0x90f2fb57),
+     TOBN(0xacef6c2a, 0xb1a4e37c), TOBN(0x864e359d, 0x5dcf2706),
+     TOBN(0x479e6b18, 0x7ce57316), TOBN(0x2cab2500, 0x3a96b23d),
+     TOBN(0xed489862, 0x8ef16df7), TOBN(0x2056538c, 0xef3758b5),
+     TOBN(0xa7df865e, 0xf15d3101), TOBN(0x80c5533a, 0x61b553d7),
+     TOBN(0x366e1997, 0x4ed14294), TOBN(0x6620741f, 0xb3c0bcd6),
+     TOBN(0x21d1d9c4, 0xedc45418), TOBN(0x005b859e, 0xc1cc4a9d),
+     TOBN(0xdf01f630, 0xa1c462f0), TOBN(0x15d06cf3, 0xf26820c7),
+     TOBN(0x9f7f24ee, 0x3484be47), TOBN(0x2ff33e96, 0x4a0c902f),
+     TOBN(0x00bdf457, 0x5a0bc453), TOBN(0x2378dfaf, 0x1aa238db),
+     TOBN(0x272420ec, 0x856720f2), TOBN(0x2ad9d95b, 0x96797291),
+     TOBN(0xd1242cc6, 0x768a1558), TOBN(0x2e287f8b, 0x5cc86aa8),
+     TOBN(0x796873d0, 0x990cecaa), TOBN(0xade55f81, 0x675d4080),
+     TOBN(0x2645eea3, 0x21f0cd84), TOBN(0x7a1efa0f, 0xb4e17d02),
+     TOBN(0xf6858420, 0x037cc061), TOBN(0x682e05f0, 0xd5d43e12),
+     TOBN(0x59c36994, 0x27218710), TOBN(0x85cbba4d, 0x3f7cd2fc),
+     TOBN(0x726f9729, 0x7a3cd22a), TOBN(0x9f8cd5dc, 0x4a628397),
+     TOBN(0x17b93ab9, 0xc23165ed), TOBN(0xff5f5dbf, 0x122823d4),
+     TOBN(0xc1e4e4b5, 0x654a446d), TOBN(0xd1a9496f, 0x677257ba),
+     TOBN(0x6387ba94, 0xde766a56), TOBN(0x23608bc8, 0x521ec74a),
+     TOBN(0x16a522d7, 0x6688c4d4), TOBN(0x9d6b4282, 0x07373abd),
+     TOBN(0xa62f07ac, 0xb42efaa3), TOBN(0xf73e00f7, 0xe3b90180),
+     TOBN(0x36175fec, 0x49421c3e), TOBN(0xc4e44f9b, 0x3dcf2678),
+     TOBN(0x76df436b, 0x7220f09f), TOBN(0x172755fb, 0x3aa8b6cf),
+     TOBN(0xbab89d57, 0x446139cc), TOBN(0x0a0a6e02, 0x5fe0208f),
+     TOBN(0xcdbb63e2, 0x11e5d399), TOBN(0x33ecaa12, 0xa8977f0b),
+     TOBN(0x59598b21, 0xf7c42664), TOBN(0xb3e91b32, 0xab65d08a),
+     TOBN(0x035822ee, 0xf4502526), TOBN(0x1dcf0176, 0x720a82a9),
+     TOBN(0x50f8598f, 0x3d589e02), TOBN(0xdf0478ff, 0xb1d63d2c),
+     TOBN(0x8b8068bd, 0x1571cd07), TOBN(0x30c3aa4f, 0xd79670cd),
+     TOBN(0x25e8fd4b, 0x941ade7f), TOBN(0x3d1debdc, 0x32790011),
+     TOBN(0x65b6dcbd, 0x3a3f9ff0), TOBN(0x282736a4, 0x793de69c),
+     TOBN(0xef69a0c3, 0xd41d3bd3), TOBN(0xb533b8c9, 0x07a26bde),
+     TOBN(0xe2801d97, 0xdb2edf9f), TOBN(0xdc4a8269, 0xe1877af0),
+     TOBN(0x6c1c5851, 0x3d590dbe), TOBN(0x84632f6b, 0xee4e9357),
+     TOBN(0xd36d36b7, 0x79b33374), TOBN(0xb46833e3, 0x9bbca2e6),
+     TOBN(0x37893913, 0xf7fc0586), TOBN(0x385315f7, 0x66bf4719),
+     TOBN(0x72c56293, 0xb31855dc), TOBN(0xd1416d4e, 0x849061fe),
+     TOBN(0xbeb3ab78, 0x51047213), TOBN(0x447f6e61, 0xf040c996),
+     TOBN(0xd06d310d, 0x638b1d0c), TOBN(0xe28a413f, 0xbad1522e),
+     TOBN(0x685a76cb, 0x82003f86), TOBN(0x610d07f7, 0x0bcdbca3),
+     TOBN(0x6ff66021, 0x9ca4c455), TOBN(0x7df39b87, 0xcea10eec),
+     TOBN(0xb9255f96, 0xe22db218), TOBN(0x8cc6d9eb, 0x08a34c44),
+     TOBN(0xcd4ffb86, 0x859f9276), TOBN(0x8fa15eb2, 0x50d07335),
+     TOBN(0xdf553845, 0xcf2c24b5), TOBN(0x89f66a9f, 0x52f9c3ba),
+     TOBN(0x8f22b5b9, 0xe4a7ceb3), TOBN(0xaffef809, 0x0e134686),
+     TOBN(0x3e53e1c6, 0x8eb8fac2), TOBN(0x93c1e4eb, 0x28aec98e),
+     TOBN(0xb6b91ec5, 0x32a43bcb), TOBN(0x2dbfa947, 0xb2d74a51),
+     TOBN(0xe065d190, 0xca84bad7), TOBN(0xfb13919f, 0xad58e65c),
+     TOBN(0x3c41718b, 0xf1cb6e31), TOBN(0x688969f0, 0x06d05c3f),
+     TOBN(0xd4f94ce7, 0x21264d45), TOBN(0xfdfb65e9, 0x7367532b),
+     TOBN(0x5b1be8b1, 0x0945a39d), TOBN(0x229f789c, 0x2b8baf3b),
+     TOBN(0xd8f41f3e, 0x6f49f15d), TOBN(0x678ce828, 0x907f0792),
+     TOBN(0xc69ace82, 0xfca6e867), TOBN(0x106451ae, 0xd01dcc89),
+     TOBN(0x1bb4f7f0, 0x19fc32d2), TOBN(0x64633dfc, 0xb00c52d2),
+     TOBN(0x8f13549a, 0xad9ea445), TOBN(0x99a3bf50, 0xfb323705),
+     TOBN(0x0c9625a2, 0x534d4dbc), TOBN(0x45b8f1d1, 0xc2a2fea3),
+     TOBN(0x76ec21a1, 0xa530fc1a), TOBN(0x4bac9c2a, 0x9e5bd734),
+     TOBN(0x5996d76a, 0x7b4e3587), TOBN(0x0045cdee, 0x1182d9e3),
+     TOBN(0x1aee24b9, 0x1207f13d), TOBN(0x66452e97, 0x97345a41),
+     TOBN(0x16e5b054, 0x9f950cd0), TOBN(0x9cc72fb1, 0xd7fdd075),
+     TOBN(0x6edd61e7, 0x66249663), TOBN(0xde4caa4d, 0xf043cccb),
+     TOBN(0x11b1f57a, 0x55c7ac17), TOBN(0x779cbd44, 0x1a85e24d),
+     TOBN(0x78030f86, 0xe46081e7), TOBN(0xfd4a6032, 0x8e20f643),
+     TOBN(0xcc7a6488, 0x0a750c0f), TOBN(0x39bacfe3, 0x4e548e83),
+     TOBN(0x3d418c76, 0x0c110f05), TOBN(0x3e4daa4c, 0xb1f11588),
+     TOBN(0x2733e7b5, 0x5ffc69ff), TOBN(0x46f147bc, 0x92053127),
+     TOBN(0x885b2434, 0xd722df94), TOBN(0x6a444f65, 0xe6fc6b7c)}
+    ,
+    {TOBN(0x7a1a465a, 0xc3f16ea8), TOBN(0x115a461d, 0xb2f1d11c),
+     TOBN(0x4767dd95, 0x6c68a172), TOBN(0x3392f2eb, 0xd13a4698),
+     TOBN(0xc7a99ccd, 0xe526cdc7), TOBN(0x8e537fdc, 0x22292b81),
+     TOBN(0x76d8cf69, 0xa6d39198), TOBN(0xffc5ff43, 0x2446852d),
+     TOBN(0x97b14f7e, 0xa90567e6), TOBN(0x513257b7, 0xb6ae5cb7),
+     TOBN(0x85454a3c, 0x9f10903d), TOBN(0xd8d2c9ad, 0x69bc3724),
+     TOBN(0x38da9324, 0x6b29cb44), TOBN(0xb540a21d, 0x77c8cbac),
+     TOBN(0x9bbfe435, 0x01918e42), TOBN(0xfffa707a, 0x56c3614e),
+     TOBN(0x0ce4e3f1, 0xd4e353b7), TOBN(0x062d8a14, 0xef46b0a0),
+     TOBN(0x6408d5ab, 0x574b73fd), TOBN(0xbc41d1c9, 0xd3273ffd),
+     TOBN(0x3538e1e7, 0x6be77800), TOBN(0x71fe8b37, 0xc5655031),
+     TOBN(0x1cd91621, 0x6b9b331a), TOBN(0xad825d0b, 0xbb388f73),
+     TOBN(0x56c2e05b, 0x1cb76219), TOBN(0x0ec0bf91, 0x71567e7e),
+     TOBN(0xe7076f86, 0x61c4c910), TOBN(0xd67b085b, 0xbabc04d9),
+     TOBN(0x9fb90459, 0x5e93a96a), TOBN(0x7526c1ea, 0xfbdc249a),
+     TOBN(0x0d44d367, 0xecdd0bb7), TOBN(0x95399917, 0x9dc0d695),
+     TOBN(0x61360ee9, 0x9e240d18), TOBN(0x057cdcac, 0xb4b94466),
+     TOBN(0xe7667cd1, 0x2fe5325c), TOBN(0x1fa297b5, 0x21974e3b),
+     TOBN(0xfa4081e7, 0xdb083d76), TOBN(0x31993be6, 0xf206bd15),
+     TOBN(0x8949269b, 0x14c19f8c), TOBN(0x21468d72, 0xa9d92357),
+     TOBN(0x2ccbc583, 0xa4c506ec), TOBN(0x957ed188, 0xd1acfe97),
+     TOBN(0x8baed833, 0x12f1aea2), TOBN(0xef2a6cb4, 0x8325362d),
+     TOBN(0x130dde42, 0x8e195c43), TOBN(0xc842025a, 0x0e6050c6),
+     TOBN(0x2da972a7, 0x08686a5d), TOBN(0xb52999a1, 0xe508b4a8),
+     TOBN(0xd9f090b9, 0x10a5a8bd), TOBN(0xca91d249, 0x096864da),
+     TOBN(0x8e6a93be, 0x3f67dbc1), TOBN(0xacae6fba, 0xf5f4764c),
+     TOBN(0x1563c6e0, 0xd21411a0), TOBN(0x28fa787f, 0xda0a4ad8),
+     TOBN(0xd524491c, 0x908c8030), TOBN(0x1257ba0e, 0x4c795f07),
+     TOBN(0x83f49167, 0xceca9754), TOBN(0x426d2cf6, 0x4b7939a0),
+     TOBN(0x2555e355, 0x723fd0bf), TOBN(0xa96e6d06, 0xc4f144e2),
+     TOBN(0x4768a8dd, 0x87880e61), TOBN(0x15543815, 0xe508e4d5),
+     TOBN(0x09d7e772, 0xb1b65e15), TOBN(0x63439dd6, 0xac302fa0),
+     TOBN(0xb93f802f, 0xc14e35c2), TOBN(0x71735b7c, 0x4341333c),
+     TOBN(0x03a25104, 0x16d4f362), TOBN(0x3f4d069b, 0xbf433c8e),
+     TOBN(0x0d83ae01, 0xf78f5a7c), TOBN(0x50a8ffbe, 0x7c4eed07),
+     TOBN(0xc74f8906, 0x76e10f83), TOBN(0x7d080966, 0x9ddaf8e1),
+     TOBN(0xb11df8e1, 0x698e04cc), TOBN(0x877be203, 0x169005c8),
+     TOBN(0x32749e8c, 0x4f3c6179), TOBN(0x2dbc9d0a, 0x7853fc05),
+     TOBN(0x187d4f93, 0x9454d937), TOBN(0xe682ce9d, 0xb4800e1b),
+     TOBN(0xa9129ad8, 0x165e68e8), TOBN(0x0fe29735, 0xbe7f785b),
+     TOBN(0x5303f40c, 0x5b9e02b7), TOBN(0xa37c9692, 0x35ee04e8),
+     TOBN(0x5f46cc20, 0x34d6632b), TOBN(0x55ef72b2, 0x96ac545b),
+     TOBN(0xabec5c1f, 0x7b91b062), TOBN(0x0a79e1c7, 0xbb33e821),
+     TOBN(0xbb04b428, 0x3a9f4117), TOBN(0x0de1f28f, 0xfd2a475a),
+     TOBN(0x31019ccf, 0x3a4434b4), TOBN(0xa3458111, 0x1a7954dc),
+     TOBN(0xa9dac80d, 0xe34972a7), TOBN(0xb043d054, 0x74f6b8dd),
+     TOBN(0x021c319e, 0x11137b1a), TOBN(0x00a754ce, 0xed5cc03f),
+     TOBN(0x0aa2c794, 0xcbea5ad4), TOBN(0x093e67f4, 0x70c015b6),
+     TOBN(0x72cdfee9, 0xc97e3f6b), TOBN(0xc10bcab4, 0xb6da7461),
+     TOBN(0x3b02d2fc, 0xb59806b9), TOBN(0x85185e89, 0xa1de6f47),
+     TOBN(0x39e6931f, 0x0eb6c4d4), TOBN(0x4d4440bd, 0xd4fa5b04),
+     TOBN(0x5418786e, 0x34be7eb8), TOBN(0x6380e521, 0x9d7259bc),
+     TOBN(0x20ac0351, 0xd598d710), TOBN(0x272c4166, 0xcb3a4da4),
+     TOBN(0xdb82fe1a, 0xca71de1f), TOBN(0x746e79f2, 0xd8f54b0f),
+     TOBN(0x6e7fc736, 0x4b573e9b), TOBN(0x75d03f46, 0xfd4b5040),
+     TOBN(0x5c1cc36d, 0x0b98d87b), TOBN(0x513ba3f1, 0x1f472da1),
+     TOBN(0x79d0af26, 0xabb177dd), TOBN(0xf82ab568, 0x7891d564),
+     TOBN(0x2b6768a9, 0x72232173), TOBN(0xefbb3bb0, 0x8c1f6619),
+     TOBN(0xb29c11db, 0xa6d18358), TOBN(0x519e2797, 0xb0916d3a),
+     TOBN(0xd4dc18f0, 0x9188e290), TOBN(0x648e86e3, 0x98b0ca7f),
+     TOBN(0x859d3145, 0x983c38b5), TOBN(0xb14f176c, 0x637abc8b),
+     TOBN(0x2793fb9d, 0xcaff7be6), TOBN(0xebe5a55f, 0x35a66a5a),
+     TOBN(0x7cec1dcd, 0x9f87dc59), TOBN(0x7c595cd3, 0xfbdbf560),
+     TOBN(0x5b543b22, 0x26eb3257), TOBN(0x69080646, 0xc4c935fd),
+     TOBN(0x7f2e4403, 0x81e9ede3), TOBN(0x243c3894, 0xcaf6df0a),
+     TOBN(0x7c605bb1, 0x1c073b11), TOBN(0xcd06a541, 0xba6a4a62),
+     TOBN(0x29168949, 0x49d4e2e5), TOBN(0x33649d07, 0x4af66880),
+     TOBN(0xbfc0c885, 0xe9a85035), TOBN(0xb4e52113, 0xfc410f4b),
+     TOBN(0xdca3b706, 0x78a6513b), TOBN(0x92ea4a2a, 0x9edb1943),
+     TOBN(0x02642216, 0xdb6e2dd8), TOBN(0x9b45d0b4, 0x9fd57894),
+     TOBN(0x114e70db, 0xc69d11ae), TOBN(0x1477dd19, 0x4c57595f),
+     TOBN(0xbc2208b4, 0xec77c272), TOBN(0x95c5b4d7, 0xdb68f59c),
+     TOBN(0xb8c4fc63, 0x42e532b7), TOBN(0x386ba422, 0x9ae35290),
+     TOBN(0xfb5dda42, 0xd201ecbc), TOBN(0x2353dc8b, 0xa0e38fd6),
+     TOBN(0x9a0b85ea, 0x68f7e978), TOBN(0x96ec5682, 0x2ad6d11f),
+     TOBN(0x5e279d6c, 0xe5f6886d), TOBN(0xd3fe03cd, 0x3cb1914d),
+     TOBN(0xfe541fa4, 0x7ea67c77), TOBN(0x952bd2af, 0xe3ea810c),
+     TOBN(0x791fef56, 0x8d01d374), TOBN(0xa3a1c621, 0x0f11336e),
+     TOBN(0x5ad0d5a9, 0xc7ec6d79), TOBN(0xff7038af, 0x3225c342),
+     TOBN(0x003c6689, 0xbc69601b), TOBN(0x25059bc7, 0x45e8747d),
+     TOBN(0xfa4965b2, 0xf2086fbf), TOBN(0xf6840ea6, 0x86916078),
+     TOBN(0xd7ac7620, 0x70081d6c), TOBN(0xe600da31, 0xb5328645),
+     TOBN(0x01916f63, 0x529b8a80), TOBN(0xe80e4858, 0x2d7d6f3e),
+     TOBN(0x29eb0fe8, 0xd664ca7c), TOBN(0xf017637b, 0xe7b43b0c),
+     TOBN(0x9a75c806, 0x76cb2566), TOBN(0x8f76acb1, 0xb24892d9),
+     TOBN(0x7ae7b9cc, 0x1f08fe45), TOBN(0x19ef7329, 0x6a4907d8),
+     TOBN(0x2db4ab71, 0x5f228bf0), TOBN(0xf3cdea39, 0x817032d7),
+     TOBN(0x0b1f482e, 0xdcabe3c0), TOBN(0x3baf76b4, 0xbb86325c),
+     TOBN(0xd49065e0, 0x10089465), TOBN(0x3bab5d29, 0x8e77c596),
+     TOBN(0x7636c3a6, 0x193dbd95), TOBN(0xdef5d294, 0xb246e499),
+     TOBN(0xb22c58b9, 0x286b2475), TOBN(0xa0b93939, 0xcd80862b),
+     TOBN(0x3002c83a, 0xf0992388), TOBN(0x6de01f9b, 0xeacbe14c),
+     TOBN(0x6aac688e, 0xadd70482), TOBN(0x708de92a, 0x7b4a4e8a),
+     TOBN(0x75b6dd73, 0x758a6eef), TOBN(0xea4bf352, 0x725b3c43),
+     TOBN(0x10041f2c, 0x87912868), TOBN(0xb1b1be95, 0xef09297a),
+     TOBN(0x19ae23c5, 0xa9f3860a), TOBN(0xc4f0f839, 0x515dcf4b),
+     TOBN(0x3c7ecca3, 0x97f6306a), TOBN(0x744c44ae, 0x68a3a4b0),
+     TOBN(0x69cd13a0, 0xb3a1d8a2), TOBN(0x7cad0a1e, 0x5256b578),
+     TOBN(0xea653fcd, 0x33791d9e), TOBN(0x9cc2a05d, 0x74b2e05f),
+     TOBN(0x73b391dc, 0xfd7affa2), TOBN(0xddb7091e, 0xb6b05442),
+     TOBN(0xc71e27bf, 0x8538a5c6), TOBN(0x195c63dd, 0x89abff17),
+     TOBN(0xfd315285, 0x1b71e3da), TOBN(0x9cbdfda7, 0xfa680fa0),
+     TOBN(0x9db876ca, 0x849d7eab), TOBN(0xebe2764b, 0x3c273271),
+     TOBN(0x663357e3, 0xf208dcea), TOBN(0x8c5bd833, 0x565b1b70),
+     TOBN(0xccc3b4f5, 0x9837fc0d), TOBN(0x9b641ba8, 0xa79cf00f),
+     TOBN(0x7428243d, 0xdfdf3990), TOBN(0x83a594c4, 0x020786b1),
+     TOBN(0xb712451a, 0x526c4502), TOBN(0x9d39438e, 0x6adb3f93),
+     TOBN(0xfdb261e3, 0xe9ff0ccd), TOBN(0x80344e3c, 0xe07af4c3),
+     TOBN(0x75900d7c, 0x2fa4f126), TOBN(0x08a3b865, 0x5c99a232),
+     TOBN(0x2478b6bf, 0xdb25e0c3), TOBN(0x482cc2c2, 0x71db2edf),
+     TOBN(0x37df7e64, 0x5f321bb8), TOBN(0x8a93821b, 0x9a8005b4),
+     TOBN(0x3fa2f10c, 0xcc8c1958), TOBN(0x0d332218, 0x2c269d0a),
+     TOBN(0x20ab8119, 0xe246b0e6), TOBN(0xb39781e4, 0xd349fd17),
+     TOBN(0xd293231e, 0xb31aa100), TOBN(0x4b779c97, 0xbb032168),
+     TOBN(0x4b3f19e1, 0xc8470500), TOBN(0x45b7efe9, 0x0c4c869d),
+     TOBN(0xdb84f38a, 0xa1a6bbcc), TOBN(0x3b59cb15, 0xb2fddbc1),
+     TOBN(0xba5514df, 0x3fd165e8), TOBN(0x499fd6a9, 0x061f8811),
+     TOBN(0x72cd1fe0, 0xbfef9f00), TOBN(0x120a4bb9, 0x79ad7e8a),
+     TOBN(0xf2ffd095, 0x5f4a5ac5), TOBN(0xcfd174f1, 0x95a7a2f0),
+     TOBN(0xd42301ba, 0x9d17baf1), TOBN(0xd2fa487a, 0x77f22089),
+     TOBN(0x9cb09efe, 0xb1dc77e1), TOBN(0xe9566939, 0x21c99682),
+     TOBN(0x8c546901, 0x6c6067bb), TOBN(0xfd378574, 0x61c24456),
+     TOBN(0x2b6a6cbe, 0x81796b33), TOBN(0x62d550f6, 0x58e87f8b),
+     TOBN(0x1b763e1c, 0x7f1b01b4), TOBN(0x4b93cfea, 0x1b1b5e12),
+     TOBN(0xb9345238, 0x1d531696), TOBN(0x57201c00, 0x88cdde69),
+     TOBN(0xdde92251, 0x9a86afc7), TOBN(0xe3043895, 0xbd35cea8),
+     TOBN(0x7608c1e1, 0x8555970d), TOBN(0x8267dfa9, 0x2535935e),
+     TOBN(0xd4c60a57, 0x322ea38b), TOBN(0xe0bf7977, 0x804ef8b5),
+     TOBN(0x1a0dab28, 0xc06fece4), TOBN(0xd405991e, 0x94e7b49d),
+     TOBN(0xc542b6d2, 0x706dab28), TOBN(0xcb228da3, 0xa91618fb),
+     TOBN(0x224e4164, 0x107d1cea), TOBN(0xeb9fdab3, 0xd0f5d8f1),
+     TOBN(0xc02ba386, 0x0d6e41cd), TOBN(0x676a72c5, 0x9b1f7146),
+     TOBN(0xffd6dd98, 0x4d6cb00b), TOBN(0xcef9c5ca, 0xde2e8d7c),
+     TOBN(0xa1bbf5d7, 0x641c7936), TOBN(0x1b95b230, 0xee8f772e),
+     TOBN(0xf765a92e, 0xe8ac25b1), TOBN(0xceb04cfc, 0x3a18b7c6),
+     TOBN(0x27944cef, 0x0acc8966), TOBN(0xcbb3c957, 0x434c1004),
+     TOBN(0x9c9971a1, 0xa43ff93c), TOBN(0x5bc2db17, 0xa1e358a9),
+     TOBN(0x45b4862e, 0xa8d9bc82), TOBN(0x70ebfbfb, 0x2201e052),
+     TOBN(0xafdf64c7, 0x92871591), TOBN(0xea5bcae6, 0xb42d0219),
+     TOBN(0xde536c55, 0x2ad8f03c), TOBN(0xcd6c3f4d, 0xa76aa33c),
+     TOBN(0xbeb5f623, 0x0bca6de3), TOBN(0xdd20dd99, 0xb1e706fd),
+     TOBN(0x90b3ff9d, 0xac9059d4), TOBN(0x2d7b2902, 0x7ccccc4e),
+     TOBN(0x8a090a59, 0xce98840f), TOBN(0xa5d947e0, 0x8410680a),
+     TOBN(0x49ae346a, 0x923379a5), TOBN(0x7dbc84f9, 0xb28a3156),
+     TOBN(0xfd40d916, 0x54a1aff2), TOBN(0xabf318ba, 0x3a78fb9b),
+     TOBN(0x50152ed8, 0x3029f95e), TOBN(0x9fc1dd77, 0xc58ad7fa),
+     TOBN(0x5fa57915, 0x13595c17), TOBN(0xb9504668, 0x8f62b3a9),
+     TOBN(0x907b5b24, 0xff3055b0), TOBN(0x2e995e35, 0x9a84f125),
+     TOBN(0x87dacf69, 0x7e9bbcfb), TOBN(0x95d0c1d6, 0xe86d96e3),
+     TOBN(0x65726e3c, 0x2d95a75c), TOBN(0x2c3c9001, 0xacd27f21),
+     TOBN(0x1deab561, 0x6c973f57), TOBN(0x108b7e2c, 0xa5221643),
+     TOBN(0x5fee9859, 0xc4ef79d4), TOBN(0xbd62b88a, 0x40d4b8c6),
+     TOBN(0xb4dd29c4, 0x197c75d6), TOBN(0x266a6df2, 0xb7076feb),
+     TOBN(0x9512d0ea, 0x4bf2df11), TOBN(0x1320c24f, 0x6b0cc9ec),
+     TOBN(0x6bb1e0e1, 0x01a59596), TOBN(0x8317c5bb, 0xeff9aaac),
+     TOBN(0x65bb405e, 0x385aa6c9), TOBN(0x613439c1, 0x8f07988f),
+     TOBN(0xd730049f, 0x16a66e91), TOBN(0xe97f2820, 0xfa1b0e0d),
+     TOBN(0x4131e003, 0x304c28ea), TOBN(0x820ab732, 0x526bac62),
+     TOBN(0xb2ac9ef9, 0x28714423), TOBN(0x54ecfffa, 0xadb10cb2),
+     TOBN(0x8781476e, 0xf886a4cc), TOBN(0x4b2c87b5, 0xdb2f8d49),
+     TOBN(0xe857cd20, 0x0a44295d), TOBN(0x707d7d21, 0x58c6b044),
+     TOBN(0xae8521f9, 0xf596757c), TOBN(0x87448f03, 0x67b2b714),
+     TOBN(0x13a9bc45, 0x5ebcd58d), TOBN(0x79bcced9, 0x9122d3c1),
+     TOBN(0x3c644247, 0x9e076642), TOBN(0x0cf22778, 0x2df4767d),
+     TOBN(0x5e61aee4, 0x71d444b6), TOBN(0x211236bf, 0xc5084a1d),
+     TOBN(0x7e15bc9a, 0x4fd3eaf6), TOBN(0x68df2c34, 0xab622bf5),
+     TOBN(0x9e674f0f, 0x59bf4f36), TOBN(0xf883669b, 0xd7f34d73),
+     TOBN(0xc48ac1b8, 0x31497b1d), TOBN(0x323b925d, 0x5106703b),
+     TOBN(0x22156f42, 0x74082008), TOBN(0xeffc521a, 0xc8482bcb),
+     TOBN(0x5c6831bf, 0x12173479), TOBN(0xcaa2528f, 0xc4739490),
+     TOBN(0x84d2102a, 0x8f1b3c4d), TOBN(0xcf64dfc1, 0x2d9bec0d),
+     TOBN(0x433febad, 0x78a546ef), TOBN(0x1f621ec3, 0x7b73cef1),
+     TOBN(0x6aecd627, 0x37338615), TOBN(0x162082ab, 0x01d8edf6),
+     TOBN(0x833a8119, 0x19e86b66), TOBN(0x6023a251, 0xd299b5db),
+     TOBN(0xf5bb0c3a, 0xbbf04b89), TOBN(0x6735eb69, 0xae749a44),
+     TOBN(0xd0e058c5, 0x4713de3b), TOBN(0xfdf2593e, 0x2c3d4ccd),
+     TOBN(0x1b8f414e, 0xfdd23667), TOBN(0xdd52aaca, 0xfa2015ee),
+     TOBN(0x3e31b517, 0xbd9625ff), TOBN(0x5ec9322d, 0x8db5918c),
+     TOBN(0xbc73ac85, 0xa96f5294), TOBN(0x82aa5bf3, 0x61a0666a),
+     TOBN(0x49755810, 0xbf08ac42), TOBN(0xd21cdfd5, 0x891cedfc),
+     TOBN(0x918cb57b, 0x67f8be10), TOBN(0x365d1a7c, 0x56ffa726),
+     TOBN(0x2435c504, 0x6532de93), TOBN(0xc0fc5e10, 0x2674cd02),
+     TOBN(0x6e51fcf8, 0x9cbbb142), TOBN(0x1d436e5a, 0xafc50692),
+     TOBN(0x766bffff, 0x3fbcae22), TOBN(0x3148c2fd, 0xfd55d3b8),
+     TOBN(0x52c7fdc9, 0x233222fa), TOBN(0x89ff1092, 0xe419fb6b),
+     TOBN(0x3cd6db99, 0x25254977), TOBN(0x2e85a161, 0x1cf12ca7),
+     TOBN(0xadd2547c, 0xdc810bc9), TOBN(0xea3f458f, 0x9d257c22),
+     TOBN(0x642c1fbe, 0x27d6b19b), TOBN(0xed07e6b5, 0x140481a6),
+     TOBN(0x6ada1d42, 0x86d2e0f8), TOBN(0xe5920122, 0x0e8a9fd5),
+     TOBN(0x02c936af, 0x708c1b49), TOBN(0x60f30fee, 0x2b4bfaff),
+     TOBN(0x6637ad06, 0x858e6a61), TOBN(0xce4c7767, 0x3fd374d0),
+     TOBN(0x39d54b2d, 0x7188defb), TOBN(0xa8c9d250, 0xf56a6b66),
+     TOBN(0x58fc0f5e, 0xb24fe1dc), TOBN(0x9eaf9dee, 0x6b73f24c),
+     TOBN(0xa90d588b, 0x33650705), TOBN(0xde5b62c5, 0xaf2ec729),
+     TOBN(0x5c72cfae, 0xd3c2b36e), TOBN(0x868c19d5, 0x034435da),
+     TOBN(0x88605f93, 0xe17ee145), TOBN(0xaa60c4ee, 0x77a5d5b1),
+     TOBN(0xbcf5bfd2, 0x3b60c472), TOBN(0xaf4ef13c, 0xeb1d3049),
+     TOBN(0x373f44fc, 0xe13895c9), TOBN(0xf29b382f, 0x0cbc9822),
+     TOBN(0x1bfcb853, 0x73efaef6), TOBN(0xcf56ac9c, 0xa8c96f40),
+     TOBN(0xd7adf109, 0x7a191e24), TOBN(0x98035f44, 0xbf8a8dc2),
+     TOBN(0xf40a71b9, 0x1e750c84), TOBN(0xc57f7b0c, 0x5dc6c469),
+     TOBN(0x49a0e79c, 0x6fbc19c1), TOBN(0x6b0f5889, 0xa48ebdb8),
+     TOBN(0x5d3fd084, 0xa07c4e9f), TOBN(0xc3830111, 0xab27de14),
+     TOBN(0x0e4929fe, 0x33e08dcc), TOBN(0xf4a5ad24, 0x40bb73a3),
+     TOBN(0xde86c2bf, 0x490f97ca), TOBN(0x288f09c6, 0x67a1ce18),
+     TOBN(0x364bb886, 0x1844478d), TOBN(0x7840fa42, 0xceedb040),
+     TOBN(0x1269fdd2, 0x5a631b37), TOBN(0x94761f1e, 0xa47c8b7d),
+     TOBN(0xfc0c2e17, 0x481c6266), TOBN(0x85e16ea2, 0x3daa5fa7),
+     TOBN(0xccd86033, 0x92491048), TOBN(0x0c2f6963, 0xf4d402d7),
+     TOBN(0x6336f7df, 0xdf6a865c), TOBN(0x0a2a463c, 0xb5c02a87),
+     TOBN(0xb0e29be7, 0xbf2f12ee), TOBN(0xf0a22002, 0x66bad988),
+     TOBN(0x27f87e03, 0x9123c1d7), TOBN(0x21669c55, 0x328a8c98),
+     TOBN(0x186b9803, 0x92f14529), TOBN(0xd3d056cc, 0x63954df3),
+     TOBN(0x2f03fd58, 0x175a46f6), TOBN(0x63e34ebe, 0x11558558),
+     TOBN(0xe13fedee, 0x5b80cfa5), TOBN(0xe872a120, 0xd401dbd1),
+     TOBN(0x52657616, 0xe8a9d667), TOBN(0xbc8da4b6, 0xe08d6693),
+     TOBN(0x370fb9bb, 0x1b703e75), TOBN(0x6773b186, 0xd4338363),
+     TOBN(0x18dad378, 0xecef7bff), TOBN(0xaac787ed, 0x995677da),
+     TOBN(0x4801ea8b, 0x0437164b), TOBN(0xf430ad20, 0x73fe795e),
+     TOBN(0xb164154d, 0x8ee5eb73), TOBN(0x0884ecd8, 0x108f7c0e),
+     TOBN(0x0e6ec096, 0x5f520698), TOBN(0x640631fe, 0x44f7b8d9),
+     TOBN(0x92fd34fc, 0xa35a68b9), TOBN(0x9c5a4b66, 0x4d40cf4e),
+     TOBN(0x949454bf, 0x80b6783d), TOBN(0x80e701fe, 0x3a320a10),
+     TOBN(0x8d1a564a, 0x1a0a39b2), TOBN(0x1436d53d, 0x320587db),
+     TOBN(0xf5096e6d, 0x6556c362), TOBN(0xbc23a3c0, 0xe2455d7e),
+     TOBN(0x3a7aee54, 0x807230f9), TOBN(0x9ba1cfa6, 0x22ae82fd),
+     TOBN(0x833a057a, 0x99c5d706), TOBN(0x8be85f4b, 0x842315c9),
+     TOBN(0xd083179a, 0x66a72f12), TOBN(0x2fc77d5d, 0xcdcc73cd),
+     TOBN(0x22b88a80, 0x5616ee30), TOBN(0xfb09548f, 0xe7ab1083),
+     TOBN(0x8ad6ab0d, 0x511270cd), TOBN(0x61f6c57a, 0x6924d9ab),
+     TOBN(0xa0f7bf72, 0x90aecb08), TOBN(0x849f87c9, 0x0df784a4),
+     TOBN(0x27c79c15, 0xcfaf1d03), TOBN(0xbbf9f675, 0xc463face),
+     TOBN(0x91502c65, 0x765ba543), TOBN(0x18ce3cac, 0x42ea60dd),
+     TOBN(0xe5cee6ac, 0x6e43ecb3), TOBN(0x63e4e910, 0x68f2aeeb),
+     TOBN(0x26234fa3, 0xc85932ee), TOBN(0x96883e8b, 0x4c90c44d),
+     TOBN(0x29b9e738, 0xa18a50f6), TOBN(0xbfc62b2a, 0x3f0420df),
+     TOBN(0xd22a7d90, 0x6d3e1fa9), TOBN(0x17115618, 0xfe05b8a3),
+     TOBN(0x2a0c9926, 0xbb2b9c01), TOBN(0xc739fcc6, 0xe07e76a2),
+     TOBN(0x540e9157, 0x165e439a), TOBN(0x06353a62, 0x6a9063d8),
+     TOBN(0x84d95594, 0x61e927a3), TOBN(0x013b9b26, 0xe2e0be7f),
+     TOBN(0x4feaec3b, 0x973497f1), TOBN(0x15c0f94e, 0x093ebc2d),
+     TOBN(0x6af5f227, 0x33af0583), TOBN(0x0c2af206, 0xc61f3340),
+     TOBN(0xd25dbdf1, 0x4457397c), TOBN(0x2e8ed017, 0xcabcbae0),
+     TOBN(0xe3010938, 0xc2815306), TOBN(0xbaa99337, 0xe8c6cd68),
+     TOBN(0x08513182, 0x3b0ec7de), TOBN(0x1e1b822b, 0x58df05df),
+     TOBN(0x5c14842f, 0xa5c3b683), TOBN(0x98fe977e, 0x3eba34ce),
+     TOBN(0xfd2316c2, 0x0d5e8873), TOBN(0xe48d839a, 0xbd0d427d),
+     TOBN(0x495b2218, 0x623fc961), TOBN(0x24ee56e7, 0xb46fba5e),
+     TOBN(0x9184a55b, 0x91e4de58), TOBN(0xa7488ca5, 0xdfdea288),
+     TOBN(0xa723862e, 0xa8dcc943), TOBN(0x92d762b2, 0x849dc0fc),
+     TOBN(0x3c444a12, 0x091ff4a9), TOBN(0x581113fa, 0x0cada274),
+     TOBN(0xb9de0a45, 0x30d8eae2), TOBN(0x5e0fcd85, 0xdf6b41ea),
+     TOBN(0x6233ea68, 0xc094dbb5), TOBN(0xb77d062e, 0xd968d410),
+     TOBN(0x3e719bbc, 0x58b3002d), TOBN(0x68e7dd3d, 0x3dc49d58),
+     TOBN(0x8d825740, 0x013a5e58), TOBN(0x21311747, 0x3c9e3c1b),
+     TOBN(0x0cb0a2a7, 0x7c99b6ab), TOBN(0x5c48a3b3, 0xc2f888f2)}
+    ,
+    {TOBN(0xc7913e91, 0x991724f3), TOBN(0x5eda799c, 0x39cbd686),
+     TOBN(0xddb595c7, 0x63d4fc1e), TOBN(0x6b63b80b, 0xac4fed54),
+     TOBN(0x6ea0fc69, 0x7e5fb516), TOBN(0x737708ba, 0xd0f1c964),
+     TOBN(0x9628745f, 0x11a92ca5), TOBN(0x61f37958, 0x9a86967a),
+     TOBN(0x9af39b2c, 0xaa665072), TOBN(0x78322fa4, 0xefd324ef),
+     TOBN(0x3d153394, 0xc327bd31), TOBN(0x81d5f271, 0x3129dab0),
+     TOBN(0xc72e0c42, 0xf48027f5), TOBN(0xaa40cdbc, 0x8536e717),
+     TOBN(0xf45a657a, 0x2d369d0f), TOBN(0xb03bbfc4, 0xea7f74e6),
+     TOBN(0x46a8c418, 0x0d738ded), TOBN(0x6f1a5bb0, 0xe0de5729),
+     TOBN(0xf10230b9, 0x8ba81675), TOBN(0x32c6f30c, 0x112b33d4),
+     TOBN(0x7559129d, 0xd8fffb62), TOBN(0x6a281b47, 0xb459bf05),
+     TOBN(0x77c1bd3a, 0xfa3b6776), TOBN(0x0709b380, 0x7829973a),
+     TOBN(0x8c26b232, 0xa3326505), TOBN(0x38d69272, 0xee1d41bf),
+     TOBN(0x0459453e, 0xffe32afa), TOBN(0xce8143ad, 0x7cb3ea87),
+     TOBN(0x932ec1fa, 0x7e6ab666), TOBN(0x6cd2d230, 0x22286264),
+     TOBN(0x459a46fe, 0x6736f8ed), TOBN(0x50bf0d00, 0x9eca85bb),
+     TOBN(0x0b825852, 0x877a21ec), TOBN(0x300414a7, 0x0f537a94),
+     TOBN(0x3f1cba40, 0x21a9a6a2), TOBN(0x50824eee, 0x76943c00),
+     TOBN(0xa0dbfcec, 0xf83cba5d), TOBN(0xf9538148, 0x93b4f3c0),
+     TOBN(0x61744162, 0x48f24dd7), TOBN(0x5322d64d, 0xe4fb09dd),
+     TOBN(0x57447384, 0x3d9325f3), TOBN(0xa9bef2d0, 0xf371cb84),
+     TOBN(0x77d2188b, 0xa61e36c5), TOBN(0xbbd6a7d7, 0xc602df72),
+     TOBN(0xba3aa902, 0x8f61bc0b), TOBN(0xf49085ed, 0x6ed0b6a1),
+     TOBN(0x8bc625d6, 0xae6e8298), TOBN(0x832b0b1d, 0xa2e9c01d),
+     TOBN(0xa337c447, 0xf1f0ced1), TOBN(0x800cc793, 0x9492dd2b),
+     TOBN(0x4b93151d, 0xbea08efa), TOBN(0x820cf3f8, 0xde0a741e),
+     TOBN(0xff1982dc, 0x1c0f7d13), TOBN(0xef921960, 0x84dde6ca),
+     TOBN(0x1ad7d972, 0x45f96ee3), TOBN(0x319c8dbe, 0x29dea0c7),
+     TOBN(0xd3ea3871, 0x7b82b99b), TOBN(0x75922d4d, 0x470eb624),
+     TOBN(0x8f66ec54, 0x3b95d466), TOBN(0x66e673cc, 0xbee1e346),
+     TOBN(0x6afe67c4, 0xb5f2b89a), TOBN(0x3de9c1e6, 0x290e5cd3),
+     TOBN(0x8c278bb6, 0x310a2ada), TOBN(0x420fa384, 0x0bdb323b),
+     TOBN(0x0ae1d63b, 0x0eb919b0), TOBN(0xd74ee51d, 0xa74b9620),
+     TOBN(0x395458d0, 0xa674290c), TOBN(0x324c930f, 0x4620a510),
+     TOBN(0x2d1f4d19, 0xfbac27d4), TOBN(0x4086e8ca, 0x9bedeeac),
+     TOBN(0x0cdd211b, 0x9b679ab8), TOBN(0x5970167d, 0x7090fec4),
+     TOBN(0x3420f2c9, 0xfaf1fc63), TOBN(0x616d333a, 0x328c8bb4),
+     TOBN(0x7d65364c, 0x57f1fe4a), TOBN(0x9343e877, 0x55e5c73a),
+     TOBN(0x5795176b, 0xe970e78c), TOBN(0xa36ccebf, 0x60533627),
+     TOBN(0xfc7c7380, 0x09cdfc1b), TOBN(0xb39a2afe, 0xb3fec326),
+     TOBN(0xb7ff1ba1, 0x6224408a), TOBN(0xcc856e92, 0x247cfc5e),
+     TOBN(0x01f102e7, 0xc18bc493), TOBN(0x4613ab74, 0x2091c727),
+     TOBN(0xaa25e89c, 0xc420bf2b), TOBN(0x00a53176, 0x90337ec2),
+     TOBN(0xd2be9f43, 0x7d025fc7), TOBN(0x3316fb85, 0x6e6fe3dc),
+     TOBN(0x27520af5, 0x9ac50814), TOBN(0xfdf95e78, 0x9a8e4223),
+     TOBN(0xb7e7df2a, 0x56bec5a0), TOBN(0xf7022f7d, 0xdf159e5d),
+     TOBN(0x93eeeab1, 0xcac1fe8f), TOBN(0x8040188c, 0x37451168),
+     TOBN(0x7ee8aa8a, 0xd967dce6), TOBN(0xfa0e79e7, 0x3abc9299),
+     TOBN(0x67332cfc, 0x2064cfd1), TOBN(0x339c31de, 0xb0651934),
+     TOBN(0x719b28d5, 0x2a3bcbea), TOBN(0xee74c82b, 0x9d6ae5c6),
+     TOBN(0x0927d05e, 0xbaf28ee6), TOBN(0x82cecf2c, 0x9d719028),
+     TOBN(0x0b0d353e, 0xddb30289), TOBN(0xfe4bb977, 0xfddb2e29),
+     TOBN(0xbb5bb990, 0x640bfd9e), TOBN(0xd226e277, 0x82f62108),
+     TOBN(0x4bf00985, 0x02ffdd56), TOBN(0x7756758a, 0x2ca1b1b5),
+     TOBN(0xc32b62a3, 0x5285fe91), TOBN(0xedbc546a, 0x8c9cd140),
+     TOBN(0x1e47a013, 0xaf5cb008), TOBN(0xbca7e720, 0x073ce8f2),
+     TOBN(0xe10b2ab8, 0x17a91cae), TOBN(0xb89aab65, 0x08e27f63),
+     TOBN(0x7b3074a7, 0xdba3ddf9), TOBN(0x1c20ce09, 0x330c2972),
+     TOBN(0x6b9917b4, 0x5fcf7e33), TOBN(0xe6793743, 0x945ceb42),
+     TOBN(0x18fc2215, 0x5c633d19), TOBN(0xad1adb3c, 0xc7485474),
+     TOBN(0x646f9679, 0x6424c49b), TOBN(0xf888dfe8, 0x67c241c9),
+     TOBN(0xe12d4b93, 0x24f68b49), TOBN(0x9a6b62d8, 0xa571df20),
+     TOBN(0x81b4b26d, 0x179483cb), TOBN(0x666f9632, 0x9511fae2),
+     TOBN(0xd281b3e4, 0xd53aa51f), TOBN(0x7f96a765, 0x7f3dbd16),
+     TOBN(0xa7f8b5bf, 0x074a30ce), TOBN(0xd7f52107, 0x005a32e6),
+     TOBN(0x6f9e0907, 0x50237ed4), TOBN(0x2f21da47, 0x8096fa2b),
+     TOBN(0xf3e19cb4, 0xeec863a0), TOBN(0xd18f77fd, 0x9527620a),
+     TOBN(0x9505c81c, 0x407c1cf8), TOBN(0x9998db4e, 0x1b6ec284),
+     TOBN(0x7e3389e5, 0xc247d44d), TOBN(0x12507141, 0x3f4f3d80),
+     TOBN(0xd4ba0110, 0x4a78a6c7), TOBN(0x312874a0, 0x767720be),
+     TOBN(0xded059a6, 0x75944370), TOBN(0xd6123d90, 0x3b2c0bdd),
+     TOBN(0xa56b717b, 0x51c108e3), TOBN(0x9bb7940e, 0x070623e9),
+     TOBN(0x794e2d59, 0x84ac066c), TOBN(0xf5954a92, 0xe68c69a0),
+     TOBN(0x28c52458, 0x4fd99dcc), TOBN(0x60e639fc, 0xb1012517),
+     TOBN(0xc2e60125, 0x7de79248), TOBN(0xe9ef6404, 0xf12fc6d7),
+     TOBN(0x4c4f2808, 0x2a3b5d32), TOBN(0x865ad32e, 0xc768eb8a),
+     TOBN(0xac02331b, 0x13fb70b6), TOBN(0x037b44c1, 0x95599b27),
+     TOBN(0x1a860fc4, 0x60bd082c), TOBN(0xa2e25745, 0xc980cd01),
+     TOBN(0xee3387a8, 0x1da0263e), TOBN(0x931bfb95, 0x2d10f3d6),
+     TOBN(0x5b687270, 0xa1f24a32), TOBN(0xf140e65d, 0xca494b86),
+     TOBN(0x4f4ddf91, 0xb2f1ac7a), TOBN(0xf99eaabb, 0x760fee27),
+     TOBN(0x57f4008a, 0x49c228e5), TOBN(0x090be440, 0x1cf713bb),
+     TOBN(0xac91fbe4, 0x5004f022), TOBN(0xd838c2c2, 0x569e1af6),
+     TOBN(0xd6c7d20b, 0x0f1daaa5), TOBN(0xaa063ac1, 0x1bbb02c0),
+     TOBN(0x0938a422, 0x59558a78), TOBN(0x5343c669, 0x8435da2f),
+     TOBN(0x96f67b18, 0x034410dc), TOBN(0x7cc1e424, 0x84510804),
+     TOBN(0x86a1543f, 0x16dfbb7d), TOBN(0x921fa942, 0x5b5bd592),
+     TOBN(0x9dcccb6e, 0xb33dd03c), TOBN(0x8581ddd9, 0xb843f51e),
+     TOBN(0x54935fcb, 0x81d73c9e), TOBN(0x6d07e979, 0x0a5e97ab),
+     TOBN(0x4dc7b30a, 0xcf3a6bab), TOBN(0x147ab1f3, 0x170bee11),
+     TOBN(0x0aaf8e3d, 0x9fafdee4), TOBN(0xfab3dbcb, 0x538a8b95),
+     TOBN(0x405df4b3, 0x6ef13871), TOBN(0xf1f4e9cb, 0x088d5a49),
+     TOBN(0x9bcd24d3, 0x66b33f1d), TOBN(0x3b97b820, 0x5ce445c0),
+     TOBN(0xe2926549, 0xba93ff61), TOBN(0xd9c341ce, 0x4dafe616),
+     TOBN(0xfb30a76e, 0x16efb6f3), TOBN(0xdf24b8ca, 0x605b953c),
+     TOBN(0x8bd52afe, 0xc2fffb9f), TOBN(0xbbac5ff7, 0xe19d0b96),
+     TOBN(0x43c01b87, 0x459afccd), TOBN(0x6bd45143, 0xb7432652),
+     TOBN(0x84734530, 0x55b5d78e), TOBN(0x81088fdb, 0x1554ba7d),
+     TOBN(0xada0a52c, 0x1e269375), TOBN(0xf9f037c4, 0x2dc5ec10),
+     TOBN(0xc0660607, 0x94bfbc11), TOBN(0xc0a630bb, 0xc9c40d2f),
+     TOBN(0x5efc797e, 0xab64c31e), TOBN(0xffdb1dab, 0x74507144),
+     TOBN(0xf6124287, 0x1ca6790c), TOBN(0xe9609d81, 0xe69bf1bf),
+     TOBN(0xdb898595, 0x00d24fc9), TOBN(0x9c750333, 0xe51fb417),
+     TOBN(0x51830a91, 0xfef7bbde), TOBN(0x0ce67dc8, 0x945f585c),
+     TOBN(0x9a730ed4, 0x4763eb50), TOBN(0x24a0e221, 0xc1ab0d66),
+     TOBN(0x643b6393, 0x648748f3), TOBN(0x1982daa1, 0x6d3c6291),
+     TOBN(0x6f00a9f7, 0x8bbc5549), TOBN(0x7a1783e1, 0x7f36384e),
+     TOBN(0xe8346323, 0xde977f50), TOBN(0x91ab688d, 0xb245502a),
+     TOBN(0x331ab6b5, 0x6d0bdd66), TOBN(0x0a6ef32e, 0x64b71229),
+     TOBN(0x1028150e, 0xfe7c352f), TOBN(0x27e04350, 0xce7b39d3),
+     TOBN(0x2a3c8acd, 0xc1070c82), TOBN(0xfb2034d3, 0x80c9feef),
+     TOBN(0x2d729621, 0x709f3729), TOBN(0x8df290bf, 0x62cb4549),
+     TOBN(0x02f99f33, 0xfc2e4326), TOBN(0x3b30076d, 0x5eddf032),
+     TOBN(0xbb21f8cf, 0x0c652fb5), TOBN(0x314fb49e, 0xed91cf7b),
+     TOBN(0xa013eca5, 0x2f700750), TOBN(0x2b9e3c23, 0x712a4575),
+     TOBN(0xe5355557, 0xaf30fbb0), TOBN(0x1ada3516, 0x7c77e771),
+     TOBN(0x45f6ecb2, 0x7b135670), TOBN(0xe85d19df, 0x7cfc202e),
+     TOBN(0x0f1b50c7, 0x58d1be9f), TOBN(0x5ebf2c0a, 0xead2e344),
+     TOBN(0x1531fe4e, 0xabc199c9), TOBN(0xc7032592, 0x56bab0ae),
+     TOBN(0x16ab2e48, 0x6c1fec54), TOBN(0x0f87fda8, 0x04280188),
+     TOBN(0xdc9f46fc, 0x609e4a74), TOBN(0x2a44a143, 0xba667f91),
+     TOBN(0xbc3d8b95, 0xb4d83436), TOBN(0xa01e4bd0, 0xc7bd2958),
+     TOBN(0x7b182932, 0x73483c90), TOBN(0xa79c6aa1, 0xa7c7b598),
+     TOBN(0xbf3983c6, 0xeaaac07e), TOBN(0x8f18181e, 0x96e0d4e6),
+     TOBN(0x8553d37c, 0x051af62b), TOBN(0xe9a998eb, 0x0bf94496),
+     TOBN(0xe0844f9f, 0xb0d59aa1), TOBN(0x983fd558, 0xe6afb813),
+     TOBN(0x9670c0ca, 0x65d69804), TOBN(0x732b22de, 0x6ea5ff2d),
+     TOBN(0xd7640ba9, 0x5fd8623b), TOBN(0x9f619163, 0xa6351782),
+     TOBN(0x0bfc27ee, 0xacee5043), TOBN(0xae419e73, 0x2eb10f02),
+     TOBN(0x19c028d1, 0x8943fb05), TOBN(0x71f01cf7, 0xff13aa2a),
+     TOBN(0x7790737e, 0x8887a132), TOBN(0x67513309, 0x66318410),
+     TOBN(0x9819e8a3, 0x7ddb795e), TOBN(0xfecb8ef5, 0xdad100b2),
+     TOBN(0x59f74a22, 0x3021926a), TOBN(0xb7c28a49, 0x6f9b4c1c),
+     TOBN(0xed1a733f, 0x912ad0ab), TOBN(0x42a910af, 0x01a5659c),
+     TOBN(0x3842c6e0, 0x7bd68cab), TOBN(0x2b57fa38, 0x76d70ac8),
+     TOBN(0x8a6707a8, 0x3c53aaeb), TOBN(0x62c1c510, 0x65b4db18),
+     TOBN(0x8de2c1fb, 0xb2d09dc7), TOBN(0xc3dfed12, 0x266bd23b),
+     TOBN(0x927d039b, 0xd5b27db6), TOBN(0x2fb2f0f1, 0x103243da),
+     TOBN(0xf855a07b, 0x80be7399), TOBN(0xed9327ce, 0x1f9f27a8),
+     TOBN(0xa0bd99c7, 0x729bdef7), TOBN(0x2b67125e, 0x28250d88),
+     TOBN(0x784b26e8, 0x8670ced7), TOBN(0xe3dfe41f, 0xc31bd3b4),
+     TOBN(0x9e353a06, 0xbcc85cbc), TOBN(0x302e2909, 0x60178a9d),
+     TOBN(0x860abf11, 0xa6eac16e), TOBN(0x76447000, 0xaa2b3aac),
+     TOBN(0x46ff9d19, 0x850afdab), TOBN(0x35bdd6a5, 0xfdb2d4c1),
+     TOBN(0xe82594b0, 0x7e5c9ce9), TOBN(0x0f379e53, 0x20af346e),
+     TOBN(0x608b31e3, 0xbc65ad4a), TOBN(0x710c6b12, 0x267c4826),
+     TOBN(0x51c966f9, 0x71954cf1), TOBN(0xb1cec793, 0x0d0aa215),
+     TOBN(0x1f155989, 0x86bd23a8), TOBN(0xae2ff99c, 0xf9452e86),
+     TOBN(0xd8dd953c, 0x340ceaa2), TOBN(0x26355275, 0x2e2e9333),
+     TOBN(0x15d4e5f9, 0x8586f06d), TOBN(0xd6bf94a8, 0xf7cab546),
+     TOBN(0x33c59a0a, 0xb76a9af0), TOBN(0x52740ab3, 0xba095af7),
+     TOBN(0xc444de8a, 0x24389ca0), TOBN(0xcc6f9863, 0x706da0cb),
+     TOBN(0xb5a741a7, 0x6b2515cf), TOBN(0x71c41601, 0x9585c749),
+     TOBN(0x78350d4f, 0xe683de97), TOBN(0x31d61524, 0x63d0b5f5),
+     TOBN(0x7a0cc5e1, 0xfbce090b), TOBN(0xaac927ed, 0xfbcb2a5b),
+     TOBN(0xe920de49, 0x20d84c35), TOBN(0x8c06a0b6, 0x22b4de26),
+     TOBN(0xd34dd58b, 0xafe7ddf3), TOBN(0x55851fed, 0xc1e6e55b),
+     TOBN(0xd1395616, 0x960696e7), TOBN(0x940304b2, 0x5f22705f),
+     TOBN(0x6f43f861, 0xb0a2a860), TOBN(0xcf121282, 0x0e7cc981),
+     TOBN(0x12186212, 0x0ab64a96), TOBN(0x09215b9a, 0xb789383c),
+     TOBN(0x311eb305, 0x37387c09), TOBN(0xc5832fce, 0xf03ee760),
+     TOBN(0x30358f58, 0x32f7ea19), TOBN(0xe01d3c34, 0x91d53551),
+     TOBN(0x1ca5ee41, 0xda48ea80), TOBN(0x34e71e8e, 0xcf4fa4c1),
+     TOBN(0x312abd25, 0x7af1e1c7), TOBN(0xe3afcdeb, 0x2153f4a5),
+     TOBN(0x9d5c84d7, 0x00235e9a), TOBN(0x0308d3f4, 0x8c4c836f),
+     TOBN(0xc0a66b04, 0x89332de5), TOBN(0x610dd399, 0x89e566ef),
+     TOBN(0xf8eea460, 0xd1ac1635), TOBN(0x84cbb3fb, 0x20a2c0df),
+     TOBN(0x40afb488, 0xe74a48c5), TOBN(0x29738198, 0xd326b150),
+     TOBN(0x2a17747f, 0xa6d74081), TOBN(0x60ea4c05, 0x55a26214),
+     TOBN(0x53514bb4, 0x1f88c5fe), TOBN(0xedd64567, 0x7e83426c),
+     TOBN(0xd5d6cbec, 0x96460b25), TOBN(0xa12fd0ce, 0x68dc115e),
+     TOBN(0xc5bc3ed2, 0x697840ea), TOBN(0x969876a8, 0xa6331e31),
+     TOBN(0x60c36217, 0x472ff580), TOBN(0xf4229705, 0x4ad41393),
+     TOBN(0x4bd99ef0, 0xa03b8b92), TOBN(0x501c7317, 0xc144f4f6),
+     TOBN(0x159009b3, 0x18464945), TOBN(0x6d5e594c, 0x74c5c6be),
+     TOBN(0x2d587011, 0x321a3660), TOBN(0xd1e184b1, 0x3898d022),
+     TOBN(0x5ba04752, 0x4c6a7e04), TOBN(0x47fa1e2b, 0x45550b65),
+     TOBN(0x9419daf0, 0x48c0a9a5), TOBN(0x66362953, 0x7c243236),
+     TOBN(0xcd0744b1, 0x5cb12a88), TOBN(0x561b6f9a, 0x2b646188),
+     TOBN(0x599415a5, 0x66c2c0c0), TOBN(0xbe3f0859, 0x0f83f09a),
+     TOBN(0x9141c5be, 0xb92041b8), TOBN(0x01ae38c7, 0x26477d0d),
+     TOBN(0xca8b71f3, 0xd12c7a94), TOBN(0xfab5b31f, 0x765c70db),
+     TOBN(0x76ae7492, 0x487443e9), TOBN(0x8595a310, 0x990d1349),
+     TOBN(0xf8dbeda8, 0x7d460a37), TOBN(0x7f7ad082, 0x1e45a38f),
+     TOBN(0xed1d4db6, 0x1059705a), TOBN(0xa3dd492a, 0xe6b9c697),
+     TOBN(0x4b92ee3a, 0x6eb38bd5), TOBN(0xbab2609d, 0x67cc0bb7),
+     TOBN(0x7fc4fe89, 0x6e70ee82), TOBN(0xeff2c56e, 0x13e6b7e3),
+     TOBN(0x9b18959e, 0x34d26fca), TOBN(0x2517ab66, 0x889d6b45),
+     TOBN(0xf167b4e0, 0xbdefdd4f), TOBN(0x69958465, 0xf366e401),
+     TOBN(0x5aa368ab, 0xa73bbec0), TOBN(0x12148709, 0x7b240c21),
+     TOBN(0x378c3233, 0x18969006), TOBN(0xcb4d73ce, 0xe1fe53d1),
+     TOBN(0x5f50a80e, 0x130c4361), TOBN(0xd67f5951, 0x7ef5212b),
+     TOBN(0xf145e21e, 0x9e70c72e), TOBN(0xb2e52e29, 0x5566d2fb),
+     TOBN(0x44eaba4a, 0x032397f5), TOBN(0x5e56937b, 0x7e31a7de),
+     TOBN(0x68dcf517, 0x456c61e1), TOBN(0xbc2e954a, 0xa8b0a388),
+     TOBN(0xe3552fa7, 0x60a8b755), TOBN(0x03442dae, 0x73ad0cde),
+     TOBN(0x37ffe747, 0xceb26210), TOBN(0x983545e8, 0x787baef9),
+     TOBN(0x8b8c8535, 0x86a3de31), TOBN(0xc621dbcb, 0xfacd46db),
+     TOBN(0x82e442e9, 0x59266fbb), TOBN(0xa3514c37, 0x339d471c),
+     TOBN(0x3a11b771, 0x62cdad96), TOBN(0xf0cb3b3c, 0xecf9bdf0),
+     TOBN(0x3fcbdbce, 0x478e2135), TOBN(0x7547b5cf, 0xbda35342),
+     TOBN(0xa97e81f1, 0x8a677af6), TOBN(0xc8c2bf83, 0x28817987),
+     TOBN(0xdf07eaaf, 0x45580985), TOBN(0xc68d1f05, 0xc93b45cb),
+     TOBN(0x106aa2fe, 0xc77b4cac), TOBN(0x4c1d8afc, 0x04a7ae86),
+     TOBN(0xdb41c3fd, 0x9eb45ab2), TOBN(0x5b234b5b, 0xd4b22e74),
+     TOBN(0xda253dec, 0xf215958a), TOBN(0x67e0606e, 0xa04edfa0),
+     TOBN(0xabbbf070, 0xef751b11), TOBN(0xf352f175, 0xf6f06dce),
+     TOBN(0xdfc4b6af, 0x6839f6b4), TOBN(0x53ddf9a8, 0x9959848e),
+     TOBN(0xda49c379, 0xc21520b0), TOBN(0x90864ff0, 0xdbd5d1b6),
+     TOBN(0x2f055d23, 0x5f49c7f7), TOBN(0xe51e4e6a, 0xa796b2d8),
+     TOBN(0xc361a67f, 0x5c9dc340), TOBN(0x5ad53c37, 0xbca7c620),
+     TOBN(0xda1d6588, 0x32c756d0), TOBN(0xad60d911, 0x8bb67e13),
+     TOBN(0xd6c47bdf, 0x0eeec8c6), TOBN(0x4a27fec1, 0x078a1821),
+     TOBN(0x081f7415, 0xc3099524), TOBN(0x8effdf0b, 0x82cd8060),
+     TOBN(0xdb70ec1c, 0x65842df8), TOBN(0x8821b358, 0xd319a901),
+     TOBN(0x72ee56ee, 0xde42b529), TOBN(0x5bb39592, 0x236e4286),
+     TOBN(0xd1183316, 0xfd6f7140), TOBN(0xf9fadb5b, 0xbd8e81f7),
+     TOBN(0x701d5e0c, 0x5a02d962), TOBN(0xfdee4dbf, 0x1b601324),
+     TOBN(0xbed17407, 0x35d7620e), TOBN(0x04e3c2c3, 0xf48c0012),
+     TOBN(0x9ee29da7, 0x3455449a), TOBN(0x562cdef4, 0x91a836c4),
+     TOBN(0x8f682a5f, 0x47701097), TOBN(0x617125d8, 0xff88d0c2),
+     TOBN(0x948fda24, 0x57bb86dd), TOBN(0x348abb8f, 0x289f7286),
+     TOBN(0xeb10eab5, 0x99d94bbd), TOBN(0xd51ba28e, 0x4684d160),
+     TOBN(0xabe0e51c, 0x30c8f41a), TOBN(0x66588b45, 0x13254f4a),
+     TOBN(0x147ebf01, 0xfad097a5), TOBN(0x49883ea8, 0x610e815d),
+     TOBN(0xe44d60ba, 0x8a11de56), TOBN(0xa970de6e, 0x827a7a6d),
+     TOBN(0x2be41424, 0x5e17fc19), TOBN(0xd833c657, 0x01214057),
+     TOBN(0x1375813b, 0x363e723f), TOBN(0x6820bb88, 0xe6a52e9b),
+     TOBN(0x7e7f6970, 0xd875d56a), TOBN(0xd6a0a9ac, 0x51fbf6bf),
+     TOBN(0x54ba8790, 0xa3083c12), TOBN(0xebaeb23d, 0x6ae7eb64),
+     TOBN(0xa8685c3a, 0xb99a907a), TOBN(0xf1e74550, 0x026bf40b),
+     TOBN(0x7b73a027, 0xc802cd9e), TOBN(0x9a8a927c, 0x4fef4635),
+     TOBN(0xe1b6f60c, 0x08191224), TOBN(0xc4126ebb, 0xde4ec091),
+     TOBN(0xe1dff4dc, 0x4ae38d84), TOBN(0xde3f57db, 0x4f2ef985),
+     TOBN(0x34964337, 0xd446a1dd), TOBN(0x7bf217a0, 0x859e77f6),
+     TOBN(0x8ff10527, 0x8e1d13f5), TOBN(0xa304ef03, 0x74eeae27),
+     TOBN(0xfc6f5e47, 0xd19dfa5a), TOBN(0xdb007de3, 0x7fad982b),
+     TOBN(0x28205ad1, 0x613715f5), TOBN(0x251e6729, 0x7889529e),
+     TOBN(0x72705184, 0x1ae98e78), TOBN(0xf818537d, 0x271cac32),
+     TOBN(0xc8a15b7e, 0xb7f410f5), TOBN(0xc474356f, 0x81f62393),
+     TOBN(0x92dbdc5a, 0xc242316b), TOBN(0xabe060ac, 0xdbf4aff5),
+     TOBN(0x6e8c38fe, 0x909a8ec6), TOBN(0x43e514e5, 0x6116cb94),
+     TOBN(0x2078fa38, 0x07d784f9), TOBN(0x1161a880, 0xf4b5b357),
+     TOBN(0x5283ce79, 0x13adea3d), TOBN(0x0756c3e6, 0xcc6a910b),
+     TOBN(0x60bcfe01, 0xaaa79697), TOBN(0x04a73b29, 0x56391db1),
+     TOBN(0xdd8dad47, 0x189b45a0), TOBN(0xbfac0dd0, 0x48d5b8d9),
+     TOBN(0x34ab3af5, 0x7d3d2ec2), TOBN(0x6fa2fc2d, 0x207bd3af),
+     TOBN(0x9ff40092, 0x66550ded), TOBN(0x719b3e87, 0x1fd5b913),
+     TOBN(0xa573a496, 0x6d17fbc7), TOBN(0x0cd1a70a, 0x73d2b24e),
+     TOBN(0x34e2c5ca, 0xb2676937), TOBN(0xe7050b06, 0xbf669f21),
+     TOBN(0xfbe948b6, 0x1ede9046), TOBN(0xa0530051, 0x97662659),
+     TOBN(0x58cbd4ed, 0xf10124c5), TOBN(0xde2646e4, 0xdd6c06c8),
+     TOBN(0x332f8108, 0x8cad38c0), TOBN(0x471b7e90, 0x6bd68ae2),
+     TOBN(0x56ac3fb2, 0x0d8e27a3), TOBN(0xb54660db, 0x136b4b0d),
+     TOBN(0x123a1e11, 0xa6fd8de4), TOBN(0x44dbffea, 0xa37799ef),
+     TOBN(0x4540b977, 0xce6ac17c), TOBN(0x495173a8, 0xaf60acef)}
+    ,
+    {TOBN(0x9ebb284d, 0x391c2a82), TOBN(0xbcdd4863, 0x158308e8),
+     TOBN(0x006f16ec, 0x83f1edca), TOBN(0xa13e2c37, 0x695dc6c8),
+     TOBN(0x2ab756f0, 0x4a057a87), TOBN(0xa8765500, 0xa6b48f98),
+     TOBN(0x4252face, 0x68651c44), TOBN(0xa52b540b, 0xe1765e02),
+     TOBN(0x4f922fc5, 0x16a0d2bb), TOBN(0x0d5cc16c, 0x1a623499),
+     TOBN(0x9241cf3a, 0x57c62c8b), TOBN(0x2f5e6961, 0xfd1b667f),
+     TOBN(0x5c15c70b, 0xf5a01797), TOBN(0x3d20b44d, 0x60956192),
+     TOBN(0x04911b37, 0x071fdb52), TOBN(0xf648f916, 0x8d6f0f7b),
+     TOBN(0x6dc1acaf, 0xe60b7cf7), TOBN(0x25860a50, 0x84a9d869),
+     TOBN(0x56fc6f09, 0xe7ba8ac4), TOBN(0x828c5bd0, 0x6148d29e),
+     TOBN(0xac6b435e, 0xdc55ae5f), TOBN(0xa527f56c, 0xc0117411),
+     TOBN(0x94d5045e, 0xfd24342c), TOBN(0x2c4c0a35, 0x70b67c0d),
+     TOBN(0x027cc8b8, 0xfac61d9a), TOBN(0x7d25e062, 0xe3c6fe8a),
+     TOBN(0xe08805bf, 0xe5bff503), TOBN(0x13271e6c, 0x6ff632f7),
+     TOBN(0x55dca6c0, 0x232f76a5), TOBN(0x8957c32d, 0x701ef426),
+     TOBN(0xee728bcb, 0xa10a5178), TOBN(0x5ea60411, 0xb62c5173),
+     TOBN(0xfc4e964e, 0xd0b8892b), TOBN(0x9ea17683, 0x9301bb74),
+     TOBN(0x6265c5ae, 0xfcc48626), TOBN(0xe60cf82e, 0xbb3e9102),
+     TOBN(0x57adf797, 0xd4df5531), TOBN(0x235b59a1, 0x8deeefe2),
+     TOBN(0x60adcf58, 0x3f306eb1), TOBN(0x105c2753, 0x3d09492d),
+     TOBN(0x4090914b, 0xb5def996), TOBN(0x1cb69c83, 0x233dd1e7),
+     TOBN(0xc1e9c1d3, 0x9b3d5e76), TOBN(0x1f3338ed, 0xfccf6012),
+     TOBN(0xb1e95d0d, 0x2f5378a8), TOBN(0xacf4c2c7, 0x2f00cd21),
+     TOBN(0x6e984240, 0xeb5fe290), TOBN(0xd66c038d, 0x248088ae),
+     TOBN(0x804d264a, 0xf94d70cf), TOBN(0xbdb802ef, 0x7314bf7e),
+     TOBN(0x8fb54de2, 0x4333ed02), TOBN(0x740461e0, 0x285635d9),
+     TOBN(0x4113b2c8, 0x365e9383), TOBN(0xea762c83, 0x3fdef652),
+     TOBN(0x4eec6e2e, 0x47b956c1), TOBN(0xa3d814be, 0x65620fa4),
+     TOBN(0x9ad5462b, 0xb4d8bc50), TOBN(0x181c0b16, 0xa9195770),
+     TOBN(0xebd4fe1c, 0x78412a68), TOBN(0xae0341bc, 0xc0dff48c),
+     TOBN(0xb6bc45cf, 0x7003e866), TOBN(0xf11a6dea, 0x8a24a41b),
+     TOBN(0x5407151a, 0xd04c24c2), TOBN(0x62c9d27d, 0xda5b7b68),
+     TOBN(0x2e964235, 0x88cceff6), TOBN(0x8594c54f, 0x8b07ed69),
+     TOBN(0x1578e73c, 0xc84d0d0d), TOBN(0x7b4e1055, 0xff532868),
+     TOBN(0xa348c0d5, 0xb5ec995a), TOBN(0xbf4b9d55, 0x14289a54),
+     TOBN(0x9ba155a6, 0x58fbd777), TOBN(0x186ed7a8, 0x1a84491d),
+     TOBN(0xd4992b30, 0x614c0900), TOBN(0xda98d121, 0xbd00c24b),
+     TOBN(0x7f534dc8, 0x7ec4bfa1), TOBN(0x4a5ff674, 0x37dc34bc),
+     TOBN(0x68c196b8, 0x1d7ea1d7), TOBN(0x38cf2893, 0x80a6d208),
+     TOBN(0xfd56cd09, 0xe3cbbd6e), TOBN(0xec72e27e, 0x4205a5b6),
+     TOBN(0x15ea68f5, 0xa44f77f7), TOBN(0x7aa5f9fd, 0xb43c52bc),
+     TOBN(0x86ff676f, 0x94f0e609), TOBN(0xa4cde963, 0x2e2d432b),
+     TOBN(0x8cafa0c0, 0xeee470af), TOBN(0x84137d0e, 0x8a3f5ec8),
+     TOBN(0xebb40411, 0xfaa31231), TOBN(0xa239c13f, 0x6f7f7ccf),
+     TOBN(0x32865719, 0xa8afd30b), TOBN(0x86798328, 0x8a826dce),
+     TOBN(0xdf04e891, 0xc4a8fbe0), TOBN(0xbb6b6e1b, 0xebf56ad3),
+     TOBN(0x0a695b11, 0x471f1ff0), TOBN(0xd76c3389, 0xbe15baf0),
+     TOBN(0x018edb95, 0xbe96c43e), TOBN(0xf2beaaf4, 0x90794158),
+     TOBN(0x152db09e, 0xc3076a27), TOBN(0x5e82908e, 0xe416545d),
+     TOBN(0xa2c41272, 0x356d6f2e), TOBN(0xdc9c9642, 0x31fd74e1),
+     TOBN(0x66ceb88d, 0x519bf615), TOBN(0xe29ecd76, 0x05a2274e),
+     TOBN(0x3a0473c4, 0xbf5e2fa0), TOBN(0x6b6eb671, 0x64284e67),
+     TOBN(0xe8b97932, 0xb88756dd), TOBN(0xed4e8652, 0xf17e3e61),
+     TOBN(0xc2dd1499, 0x3ee1c4a4), TOBN(0xc0aaee17, 0x597f8c0e),
+     TOBN(0x15c4edb9, 0x6c168af3), TOBN(0x6563c7bf, 0xb39ae875),
+     TOBN(0xadfadb6f, 0x20adb436), TOBN(0xad55e8c9, 0x9a042ac0),
+     TOBN(0x975a1ed8, 0xb76da1f5), TOBN(0x10dfa466, 0xa58acb94),
+     TOBN(0x8dd7f7e3, 0xac060282), TOBN(0x6813e66a, 0x572a051e),
+     TOBN(0xb4ccae1e, 0x350cb901), TOBN(0xb653d656, 0x50cb7822),
+     TOBN(0x42484710, 0xdfab3b87), TOBN(0xcd7ee537, 0x9b670fd0),
+     TOBN(0x0a50b12e, 0x523b8bf6), TOBN(0x8009eb5b, 0x8f910c1b),
+     TOBN(0xf535af82, 0x4a167588), TOBN(0x0f835f9c, 0xfb2a2abd),
+     TOBN(0xf59b2931, 0x2afceb62), TOBN(0xc797df2a, 0x169d383f),
+     TOBN(0xeb3f5fb0, 0x66ac02b0), TOBN(0x029d4c6f, 0xdaa2d0ca),
+     TOBN(0xd4059bc1, 0xafab4bc5), TOBN(0x833f5c6f, 0x56783247),
+     TOBN(0xb5346630, 0x8d2d3605), TOBN(0x83387891, 0xd34d8433),
+     TOBN(0xd973b30f, 0xadd9419a), TOBN(0xbcca1099, 0xafe3fce8),
+     TOBN(0x08178315, 0x0809aac6), TOBN(0x01b7f21a, 0x540f0f11),
+     TOBN(0x65c29219, 0x909523c8), TOBN(0xa62f648f, 0xa3a1c741),
+     TOBN(0x88598d4f, 0x60c9e55a), TOBN(0xbce9141b, 0x0e4f347a),
+     TOBN(0x9af97d84, 0x35f9b988), TOBN(0x0210da62, 0x320475b6),
+     TOBN(0x3c076e22, 0x9191476c), TOBN(0x7520dbd9, 0x44fc7834),
+     TOBN(0x6a6b2cfe, 0xc1ab1bbd), TOBN(0xef8a65be, 0xdc650938),
+     TOBN(0x72855540, 0x805d7bc4), TOBN(0xda389396, 0xed11fdfd),
+     TOBN(0xa9d5bd36, 0x74660876), TOBN(0x11d67c54, 0xb45dff35),
+     TOBN(0x6af7d148, 0xa4f5da94), TOBN(0xbb8d4c3f, 0xc0bbeb31),
+     TOBN(0x87a7ebd1, 0xe0a1b12a), TOBN(0x1e4ef88d, 0x770ba95f),
+     TOBN(0x8c33345c, 0xdc2ae9cb), TOBN(0xcecf1276, 0x01cc8403),
+     TOBN(0x687c012e, 0x1b39b80f), TOBN(0xfd90d0ad, 0x35c33ba4),
+     TOBN(0xa3ef5a67, 0x5c9661c2), TOBN(0x368fc88e, 0xe017429e),
+     TOBN(0xd30c6761, 0x196a2fa2), TOBN(0x931b9817, 0xbd5b312e),
+     TOBN(0xba01000c, 0x72f54a31), TOBN(0xa203d2c8, 0x66eaa541),
+     TOBN(0xf2abdee0, 0x98939db3), TOBN(0xe37d6c2c, 0x3e606c02),
+     TOBN(0xf2921574, 0x521ff643), TOBN(0x2781b3c4, 0xd7e2fca3),
+     TOBN(0x664300b0, 0x7850ec06), TOBN(0xac5a38b9, 0x7d3a10cf),
+     TOBN(0x9233188d, 0xe34ab39d), TOBN(0xe77057e4, 0x5072cbb9),
+     TOBN(0xbcf0c042, 0xb59e78df), TOBN(0x4cfc91e8, 0x1d97de52),
+     TOBN(0x4661a26c, 0x3ee0ca4a), TOBN(0x5620a4c1, 0xfb8507bc),
+     TOBN(0x4b44d4aa, 0x049f842c), TOBN(0xceabc5d5, 0x1540e82b),
+     TOBN(0x306710fd, 0x15c6f156), TOBN(0xbe5ae52b, 0x63db1d72),
+     TOBN(0x06f1e7e6, 0x334957f1), TOBN(0x57e388f0, 0x31144a70),
+     TOBN(0xfb69bb2f, 0xdf96447b), TOBN(0x0f78ebd3, 0x73e38a12),
+     TOBN(0xb8222605, 0x2b7ce542), TOBN(0xe6d4ce99, 0x7472bde1),
+     TOBN(0x53e16ebe, 0x09d2f4da), TOBN(0x180ff42e, 0x53b92b2e),
+     TOBN(0xc59bcc02, 0x2c34a1c6), TOBN(0x3803d6f9, 0x422c46c2),
+     TOBN(0x18aff74f, 0x5c14a8a2), TOBN(0x55aebf80, 0x10a08b28),
+     TOBN(0x66097d58, 0x7135593f), TOBN(0x32e6eff7, 0x2be570cd),
+     TOBN(0x584e6a10, 0x2a8c860d), TOBN(0xcd185890, 0xa2eb4163),
+     TOBN(0x7ceae99d, 0x6d97e134), TOBN(0xd42c6b70, 0xdd8447ce),
+     TOBN(0x59ddbb4a, 0xb8c50273), TOBN(0x03c612df, 0x3cf34e1e),
+     TOBN(0x84b9ca15, 0x04b6c5a0), TOBN(0x35216f39, 0x18f0e3a3),
+     TOBN(0x3ec2d2bc, 0xbd986c00), TOBN(0x8bf546d9, 0xd19228fe),
+     TOBN(0xd1c655a4, 0x4cd623c3), TOBN(0x366ce718, 0x502b8e5a),
+     TOBN(0x2cfc84b4, 0xeea0bfe7), TOBN(0xe01d5cee, 0xcf443e8e),
+     TOBN(0x8ec045d9, 0x036520f8), TOBN(0xdfb3c3d1, 0x92d40e98),
+     TOBN(0x0bac4cce, 0xcc559a04), TOBN(0x35eccae5, 0x240ea6b1),
+     TOBN(0x180b32db, 0xf8a5a0ac), TOBN(0x547972a5, 0xeb699700),
+     TOBN(0xa3765801, 0xca26bca0), TOBN(0x57e09d0e, 0xa647f25a),
+     TOBN(0xb956970e, 0x2fdd23cc), TOBN(0xb80288bc, 0x5682e971),
+     TOBN(0xe6e6d91e, 0x9ae86ebc), TOBN(0x0564c83f, 0x8c9f1939),
+     TOBN(0x551932a2, 0x39560368), TOBN(0xe893752b, 0x049c28e2),
+     TOBN(0x0b03cee5, 0xa6a158c3), TOBN(0xe12d656b, 0x04964263),
+     TOBN(0x4b47554e, 0x63e3bc1d), TOBN(0xc719b6a2, 0x45044ff7),
+     TOBN(0x4f24d30a, 0xe48daa07), TOBN(0xa3f37556, 0xc8c1edc3),
+     TOBN(0x9a47bf76, 0x0700d360), TOBN(0xbb1a1824, 0x822ae4e2),
+     TOBN(0x22e275a3, 0x89f1fb4c), TOBN(0x72b1aa23, 0x9968c5f5),
+     TOBN(0xa75feaca, 0xbe063f64), TOBN(0x9b392f43, 0xbce47a09),
+     TOBN(0xd4241509, 0x1ad07aca), TOBN(0x4b0c591b, 0x8d26cd0f),
+     TOBN(0x2d42ddfd, 0x92f1169a), TOBN(0x63aeb1ac, 0x4cbf2392),
+     TOBN(0x1de9e877, 0x0691a2af), TOBN(0xebe79af7, 0xd98021da),
+     TOBN(0xcfdf2a4e, 0x40e50acf), TOBN(0xf0a98ad7, 0xaf01d665),
+     TOBN(0xefb640bf, 0x1831be1f), TOBN(0x6fe8bd2f, 0x80e9ada0),
+     TOBN(0x94c103a1, 0x6cafbc91), TOBN(0x170f8759, 0x8308e08c),
+     TOBN(0x5de2d2ab, 0x9780ff4f), TOBN(0x666466bc, 0x45b201f2),
+     TOBN(0x58af2010, 0xf5b343bc), TOBN(0x0f2e400a, 0xf2f142fe),
+     TOBN(0x3483bfde, 0xa85f4bdf), TOBN(0xf0b1d093, 0x03bfeaa9),
+     TOBN(0x2ea01b95, 0xc7081603), TOBN(0xe943e4c9, 0x3dba1097),
+     TOBN(0x47be92ad, 0xb438f3a6), TOBN(0x00bb7742, 0xe5bf6636),
+     TOBN(0x136b7083, 0x824297b4), TOBN(0x9d0e5580, 0x5584455f),
+     TOBN(0xab48cedc, 0xf1c7d69e), TOBN(0x53a9e481, 0x2a256e76),
+     TOBN(0x0402b0e0, 0x65eb2413), TOBN(0xdadbbb84, 0x8fc407a7),
+     TOBN(0xa65cd5a4, 0x8d7f5492), TOBN(0x21d44293, 0x74bae294),
+     TOBN(0x66917ce6, 0x3b5f1cc4), TOBN(0x37ae52ea, 0xce872e62),
+     TOBN(0xbb087b72, 0x2905f244), TOBN(0x12077086, 0x1e6af74f),
+     TOBN(0x4b644e49, 0x1058edea), TOBN(0x827510e3, 0xb638ca1d),
+     TOBN(0x8cf2b704, 0x6038591c), TOBN(0xffc8b47a, 0xfe635063),
+     TOBN(0x3ae220e6, 0x1b4d5e63), TOBN(0xbd864742, 0x9d961b4b),
+     TOBN(0x610c107e, 0x9bd16bed), TOBN(0x4270352a, 0x1127147b),
+     TOBN(0x7d17ffe6, 0x64cfc50e), TOBN(0x50dee01a, 0x1e36cb42),
+     TOBN(0x068a7622, 0x35dc5f9a), TOBN(0x9a08d536, 0xdf53f62c),
+     TOBN(0x4ed71457, 0x6be5f7de), TOBN(0xd93006f8, 0xc2263c9e),
+     TOBN(0xe073694c, 0xcacacb36), TOBN(0x2ff7a5b4, 0x3ae118ab),
+     TOBN(0x3cce53f1, 0xcd871236), TOBN(0xf156a39d, 0xc2aa6d52),
+     TOBN(0x9cc5f271, 0xb198d76d), TOBN(0xbc615b6f, 0x81383d39),
+     TOBN(0xa54538e8, 0xde3eee6b), TOBN(0x58c77538, 0xab910d91),
+     TOBN(0x31e5bdbc, 0x58d278bd), TOBN(0x3cde4adf, 0xb963acae),
+     TOBN(0xb1881fd2, 0x5302169c), TOBN(0x8ca60fa0, 0xa989ed8b),
+     TOBN(0xa1999458, 0xff96a0ee), TOBN(0xc1141f03, 0xac6c283d),
+     TOBN(0x7677408d, 0x6dfafed3), TOBN(0x33a01653, 0x39661588),
+     TOBN(0x3c9c15ec, 0x0b726fa0), TOBN(0x090cfd93, 0x6c9b56da),
+     TOBN(0xe34f4bae, 0xa3c40af5), TOBN(0x3469eadb, 0xd21129f1),
+     TOBN(0xcc51674a, 0x1e207ce8), TOBN(0x1e293b24, 0xc83b1ef9),
+     TOBN(0x17173d13, 0x1e6c0bb4), TOBN(0x19004695, 0x90776d35),
+     TOBN(0xe7980e34, 0x6de6f922), TOBN(0x873554cb, 0xf4dd9a22),
+     TOBN(0x0316c627, 0xcbf18a51), TOBN(0x4d93651b, 0x3032c081),
+     TOBN(0x207f2771, 0x3946834d), TOBN(0x2c08d7b4, 0x30cdbf80),
+     TOBN(0x137a4fb4, 0x86df2a61), TOBN(0xa1ed9c07, 0xecf7b4a2),
+     TOBN(0xb2e460e2, 0x7bd042ff), TOBN(0xb7f5e2fa, 0x5f62f5ec),
+     TOBN(0x7aa6ec6b, 0xcc2423b7), TOBN(0x75ce0a7f, 0xba63eea7),
+     TOBN(0x67a45fb1, 0xf250a6e1), TOBN(0x93bc919c, 0xe53cdc9f),
+     TOBN(0x9271f56f, 0x871942df), TOBN(0x2372ff6f, 0x7859ad66),
+     TOBN(0x5f4c2b96, 0x33cb1a78), TOBN(0xe3e29101, 0x5838aa83),
+     TOBN(0xa7ed1611, 0xe4e8110c), TOBN(0x2a2d70d5, 0x330198ce),
+     TOBN(0xbdf132e8, 0x6720efe0), TOBN(0xe61a8962, 0x66a471bf),
+     TOBN(0x796d3a85, 0x825808bd), TOBN(0x51dc3cb7, 0x3fd6e902),
+     TOBN(0x643c768a, 0x916219d1), TOBN(0x36cd7685, 0xa2ad7d32),
+     TOBN(0xe3db9d05, 0xb22922a4), TOBN(0x6494c87e, 0xdba29660),
+     TOBN(0xf0ac91df, 0xbcd2ebc7), TOBN(0x4deb57a0, 0x45107f8d),
+     TOBN(0x42271f59, 0xc3d12a73), TOBN(0x5f71687c, 0xa5c2c51d),
+     TOBN(0xcb1f50c6, 0x05797bcb), TOBN(0x29ed0ed9, 0xd6d34eb0),
+     TOBN(0xe5fe5b47, 0x4683c2eb), TOBN(0x4956eeb5, 0x97447c46),
+     TOBN(0x5b163a43, 0x71207167), TOBN(0x93fa2fed, 0x0248c5ef),
+     TOBN(0x67930af2, 0x31f63950), TOBN(0xa77797c1, 0x14caa2c9),
+     TOBN(0x526e80ee, 0x27ac7e62), TOBN(0xe1e6e626, 0x58b28aec),
+     TOBN(0x636178b0, 0xb3c9fef0), TOBN(0xaf7752e0, 0x6d5f90be),
+     TOBN(0x94ecaf18, 0xeece51cf), TOBN(0x2864d0ed, 0xca806e1f),
+     TOBN(0x6de2e383, 0x97c69134), TOBN(0x5a42c316, 0xeb291293),
+     TOBN(0xc7779219, 0x6a60bae0), TOBN(0xa24de346, 0x6b7599d1),
+     TOBN(0x49d374aa, 0xb75d4941), TOBN(0x98900586, 0x2d501ff0),
+     TOBN(0x9f16d40e, 0xeb7974cf), TOBN(0x1033860b, 0xcdd8c115),
+     TOBN(0xb6c69ac8, 0x2094cec3), TOBN(0x9976fb88, 0x403b770c),
+     TOBN(0x1dea026c, 0x4859590d), TOBN(0xb6acbb46, 0x8562d1fd),
+     TOBN(0x7cd6c461, 0x44569d85), TOBN(0xc3190a36, 0x97f0891d),
+     TOBN(0xc6f53195, 0x48d5a17d), TOBN(0x7d919966, 0xd749abc8),
+     TOBN(0x65104837, 0xdd1c8a20), TOBN(0x7e5410c8, 0x2f683419),
+     TOBN(0x958c3ca8, 0xbe94022e), TOBN(0x605c3197, 0x6145dac2),
+     TOBN(0x3fc07501, 0x01683d54), TOBN(0x1d7127c5, 0x595b1234),
+     TOBN(0x10b8f87c, 0x9481277f), TOBN(0x677db2a8, 0xe65a1adb),
+     TOBN(0xec2fccaa, 0xddce3345), TOBN(0x2a6811b7, 0x012a4350),
+     TOBN(0x96760ff1, 0xac598bdc), TOBN(0x054d652a, 0xd1bf4128),
+     TOBN(0x0a1151d4, 0x92a21005), TOBN(0xad7f3971, 0x33110fdf),
+     TOBN(0x8c95928c, 0x1960100f), TOBN(0x6c91c825, 0x7bf03362),
+     TOBN(0xc8c8b2a2, 0xce309f06), TOBN(0xfdb27b59, 0xca27204b),
+     TOBN(0xd223eaa5, 0x0848e32e), TOBN(0xb93e4b2e, 0xe7bfaf1e),
+     TOBN(0xc5308ae6, 0x44aa3ded), TOBN(0x317a666a, 0xc015d573),
+     TOBN(0xc888ce23, 0x1a979707), TOBN(0xf141c1e6, 0x0d5c4958),
+     TOBN(0xb53b7de5, 0x61906373), TOBN(0x858dbade, 0xeb999595),
+     TOBN(0x8cbb47b2, 0xa59e5c36), TOBN(0x660318b3, 0xdcf4e842),
+     TOBN(0xbd161ccd, 0x12ba4b7a), TOBN(0xf399daab, 0xf8c8282a),
+     TOBN(0x1587633a, 0xeeb2130d), TOBN(0xa465311a, 0xda38dd7d),
+     TOBN(0x5f75eec8, 0x64d3779b), TOBN(0x3c5d0476, 0xad64c171),
+     TOBN(0x87410371, 0x2a914428), TOBN(0x8096a891, 0x90e2fc29),
+     TOBN(0xd3d2ae9d, 0x23b3ebc2), TOBN(0x90bdd6db, 0xa580cfd6),
+     TOBN(0x52dbb7f3, 0xc5b01f6c), TOBN(0xe68eded4, 0xe102a2dc),
+     TOBN(0x17785b77, 0x99eb6df0), TOBN(0x26c3cc51, 0x7386b779),
+     TOBN(0x345ed988, 0x6417a48e), TOBN(0xe990b4e4, 0x07d6ef31),
+     TOBN(0x0f456b7e, 0x2586abba), TOBN(0x239ca6a5, 0x59c96e9a),
+     TOBN(0xe327459c, 0xe2eb4206), TOBN(0x3a4c3313, 0xa002b90a),
+     TOBN(0x2a114806, 0xf6a3f6fb), TOBN(0xad5cad2f, 0x85c251dd),
+     TOBN(0x92c1f613, 0xf5a784d3), TOBN(0xec7bfacf, 0x349766d5),
+     TOBN(0x04b3cd33, 0x3e23cb3b), TOBN(0x3979fe84, 0xc5a64b2d),
+     TOBN(0x192e2720, 0x7e589106), TOBN(0xa60c43d1, 0xa15b527f),
+     TOBN(0x2dae9082, 0xbe7cf3a6), TOBN(0xcc86ba92, 0xbc967274),
+     TOBN(0xf28a2ce8, 0xaea0a8a9), TOBN(0x404ca6d9, 0x6ee988b3),
+     TOBN(0xfd7e9c5d, 0x005921b8), TOBN(0xf56297f1, 0x44e79bf9),
+     TOBN(0xa163b460, 0x0d75ddc2), TOBN(0x30b23616, 0xa1f2be87),
+     TOBN(0x4b070d21, 0xbfe50e2b), TOBN(0x7ef8cfd0, 0xe1bfede1),
+     TOBN(0xadba0011, 0x2aac4ae0), TOBN(0x2a3e7d01, 0xb9ebd033),
+     TOBN(0x995277ec, 0xe38d9d1c), TOBN(0xb500249e, 0x9c5d2de3),
+     TOBN(0x8912b820, 0xf13ca8c9), TOBN(0xc8798114, 0x877793af),
+     TOBN(0x19e6125d, 0xec3f1dec), TOBN(0x07b1f040, 0x911178da),
+     TOBN(0xd93ededa, 0x904a6738), TOBN(0x55187a5a, 0x0bebedcd),
+     TOBN(0xf7d04722, 0xeb329d41), TOBN(0xf449099e, 0xf170b391),
+     TOBN(0xfd317a69, 0xca99f828), TOBN(0x50c3db2b, 0x34a4976d),
+     TOBN(0xe9ba7784, 0x3757b392), TOBN(0x326caefd, 0xaa3ca05a),
+     TOBN(0x78e5293b, 0xf1e593d4), TOBN(0x7842a937, 0x0d98fd13),
+     TOBN(0xe694bf96, 0x5f96b10d), TOBN(0x373a9df6, 0x06a8cd05),
+     TOBN(0x997d1e51, 0xe8f0c7fc), TOBN(0x1d019790, 0x63fd972e),
+     TOBN(0x0064d858, 0x5499fb32), TOBN(0x7b67bad9, 0x77a8aeb7),
+     TOBN(0x1d3eb977, 0x2d08eec5), TOBN(0x5fc047a6, 0xcbabae1d),
+     TOBN(0x0577d159, 0xe54a64bb), TOBN(0x8862201b, 0xc43497e4),
+     TOBN(0xad6b4e28, 0x2ce0608d), TOBN(0x8b687b7d, 0x0b167aac),
+     TOBN(0x6ed4d367, 0x8b2ecfa9), TOBN(0x24dfe62d, 0xa90c3c38),
+     TOBN(0xa1862e10, 0x3fe5c42b), TOBN(0x1ca73dca, 0xd5732a9f),
+     TOBN(0x35f038b7, 0x76bb87ad), TOBN(0x674976ab, 0xf242b81f),
+     TOBN(0x4f2bde7e, 0xb0fd90cd), TOBN(0x6efc172e, 0xa7fdf092),
+     TOBN(0x3806b69b, 0x92222f1f), TOBN(0x5a2459ca, 0x6cf7ae70),
+     TOBN(0x6789f69c, 0xa85217ee), TOBN(0x5f232b5e, 0xe3dc85ac),
+     TOBN(0x660e3ec5, 0x48e9e516), TOBN(0x124b4e47, 0x3197eb31),
+     TOBN(0x10a0cb13, 0xaafcca23), TOBN(0x7bd63ba4, 0x8213224f),
+     TOBN(0xaffad7cc, 0x290a7f4f), TOBN(0x6b409c9e, 0x0286b461),
+     TOBN(0x58ab809f, 0xffa407af), TOBN(0xc3122eed, 0xc68ac073),
+     TOBN(0x17bf9e50, 0x4ef24d7e), TOBN(0x5d929794, 0x3e2a5811),
+     TOBN(0x519bc867, 0x02902e01), TOBN(0x76bba5da, 0x39c8a851),
+     TOBN(0xe9f9669c, 0xda94951e), TOBN(0x4b6af58d, 0x66b8d418),
+     TOBN(0xfa321074, 0x17d426a4), TOBN(0xc78e66a9, 0x9dde6027),
+     TOBN(0x0516c083, 0x4a53b964), TOBN(0xfc659d38, 0xff602330),
+     TOBN(0x0ab55e5c, 0x58c5c897), TOBN(0x985099b2, 0x838bc5df),
+     TOBN(0x061d9efc, 0xc52fc238), TOBN(0x712b2728, 0x6ac1da3f),
+     TOBN(0xfb658149, 0x9283fe08), TOBN(0x4954ac94, 0xb8aaa2f7),
+     TOBN(0x85c0ada4, 0x7fb2e74f), TOBN(0xee8ba98e, 0xb89926b0),
+     TOBN(0xe4f9d37d, 0x23d1af5b), TOBN(0x14ccdbf9, 0xba9b015e),
+     TOBN(0xb674481b, 0x7bfe7178), TOBN(0x4e1debae, 0x65405868),
+     TOBN(0x061b2821, 0xc48c867d), TOBN(0x69c15b35, 0x513b30ea),
+     TOBN(0x3b4a1666, 0x36871088), TOBN(0xe5e29f5d, 0x1220b1ff),
+     TOBN(0x4b82bb35, 0x233d9f4d), TOBN(0x4e076333, 0x18cdc675)}
+    ,
+    {TOBN(0x0d53f5c7, 0xa3e6fced), TOBN(0xe8cbbdd5, 0xf45fbdeb),
+     TOBN(0xf85c01df, 0x13339a70), TOBN(0x0ff71880, 0x142ceb81),
+     TOBN(0x4c4e8774, 0xbd70437a), TOBN(0x5fb32891, 0xba0bda6a),
+     TOBN(0x1cdbebd2, 0xf18bd26e), TOBN(0x2f9526f1, 0x03a9d522),
+     TOBN(0x40ce3051, 0x92c4d684), TOBN(0x8b04d725, 0x7612efcd),
+     TOBN(0xb9dcda36, 0x6f9cae20), TOBN(0x0edc4d24, 0xf058856c),
+     TOBN(0x64f2e6bf, 0x85427900), TOBN(0x3de81295, 0xdc09dfea),
+     TOBN(0xd41b4487, 0x379bf26c), TOBN(0x50b62c6d, 0x6df135a9),
+     TOBN(0xd4f8e3b4, 0xc72dfe67), TOBN(0xc416b0f6, 0x90e19fdf),
+     TOBN(0x18b9098d, 0x4c13bd35), TOBN(0xac11118a, 0x15b8cb9e),
+     TOBN(0xf598a318, 0xf0062841), TOBN(0xbfe0602f, 0x89f356f4),
+     TOBN(0x7ae3637e, 0x30177a0c), TOBN(0x34097747, 0x61136537),
+     TOBN(0x0db2fb5e, 0xd005832a), TOBN(0x5f5efd3b, 0x91042e4f),
+     TOBN(0x8c4ffdc6, 0xed70f8ca), TOBN(0xe4645d0b, 0xb52da9cc),
+     TOBN(0x9596f58b, 0xc9001d1f), TOBN(0x52c8f0bc, 0x4e117205),
+     TOBN(0xfd4aa0d2, 0xe398a084), TOBN(0x815bfe3a, 0x104f49de),
+     TOBN(0x97e5443f, 0x23885e5f), TOBN(0xf72f8f99, 0xe8433aab),
+     TOBN(0xbd00b154, 0xe4d4e604), TOBN(0xd0b35e6a, 0xe5e173ff),
+     TOBN(0x57b2a048, 0x9164722d), TOBN(0x3e3c665b, 0x88761ec8),
+     TOBN(0x6bdd1397, 0x3da83832), TOBN(0x3c8b1a1e, 0x73dafe3b),
+     TOBN(0x4497ace6, 0x54317cac), TOBN(0xbe600ab9, 0x521771b3),
+     TOBN(0xb42e409e, 0xb0dfe8b8), TOBN(0x386a67d7, 0x3942310f),
+     TOBN(0x25548d8d, 0x4431cc28), TOBN(0xa7cff142, 0x985dc524),
+     TOBN(0x4d60f5a1, 0x93c4be32), TOBN(0x83ebd5c8, 0xd071c6e1),
+     TOBN(0xba3a80a7, 0xb1fd2b0b), TOBN(0x9b3ad396, 0x5bec33e8),
+     TOBN(0xb3868d61, 0x79743fb3), TOBN(0xcfd169fc, 0xfdb462fa),
+     TOBN(0xd3b499d7, 0x9ce0a6af), TOBN(0x55dc1cf1, 0xe42d3ff8),
+     TOBN(0x04fb9e6c, 0xc6c3e1b2), TOBN(0x47e6961d, 0x6f69a474),
+     TOBN(0x54eb3acc, 0xe548b37b), TOBN(0xb38e7542, 0x84d40549),
+     TOBN(0x8c3daa51, 0x7b341b4f), TOBN(0x2f6928ec, 0x690bf7fa),
+     TOBN(0x0496b323, 0x86ce6c41), TOBN(0x01be1c55, 0x10adadcd),
+     TOBN(0xc04e67e7, 0x4bb5faf9), TOBN(0x3cbaf678, 0xe15c9985),
+     TOBN(0x8cd12145, 0x50ca4247), TOBN(0xba1aa47a, 0xe7dd30aa),
+     TOBN(0x2f81ddf1, 0xe58fee24), TOBN(0x03452936, 0xeec9b0e8),
+     TOBN(0x8bdc3b81, 0x243aea96), TOBN(0x9a2919af, 0x15c3d0e5),
+     TOBN(0x9ea640ec, 0x10948361), TOBN(0x5ac86d5b, 0x6e0bcccf),
+     TOBN(0xf892d918, 0xc36cf440), TOBN(0xaed3e837, 0xc939719c),
+     TOBN(0xb07b08d2, 0xc0218b64), TOBN(0x6f1bcbba, 0xce9790dd),
+     TOBN(0x4a84d6ed, 0x60919b8e), TOBN(0xd8900791, 0x8ac1f9eb),
+     TOBN(0xf84941aa, 0x0dd5daef), TOBN(0xb22fe40a, 0x67fd62c5),
+     TOBN(0x97e15ba2, 0x157f2db3), TOBN(0xbda2fc8f, 0x8e28ca9c),
+     TOBN(0x5d050da4, 0x37b9f454), TOBN(0x3d57eb57, 0x2379d72e),
+     TOBN(0xe9b5eba2, 0xfb5ee997), TOBN(0x01648ca2, 0xe11538ca),
+     TOBN(0x32bb76f6, 0xf6327974), TOBN(0x338f14b8, 0xff3f4bb7),
+     TOBN(0x524d226a, 0xd7ab9a2d), TOBN(0x9c00090d, 0x7dfae958),
+     TOBN(0x0ba5f539, 0x8751d8c2), TOBN(0x8afcbcdd, 0x3ab8262d),
+     TOBN(0x57392729, 0xe99d043b), TOBN(0xef51263b, 0xaebc943a),
+     TOBN(0x9feace93, 0x20862935), TOBN(0x639efc03, 0xb06c817b),
+     TOBN(0x1fe054b3, 0x66b4be7a), TOBN(0x3f25a9de, 0x84a37a1e),
+     TOBN(0xf39ef1ad, 0x78d75cd9), TOBN(0xd7b58f49, 0x5062c1b5),
+     TOBN(0x6f74f9a9, 0xff563436), TOBN(0xf718ff29, 0xe8af51e7),
+     TOBN(0x5234d313, 0x15e97fec), TOBN(0xb6a8e2b1, 0x292f1c0a),
+     TOBN(0xa7f53aa8, 0x327720c1), TOBN(0x956ca322, 0xba092cc8),
+     TOBN(0x8f03d64a, 0x28746c4d), TOBN(0x51fe1782, 0x66d0d392),
+     TOBN(0xd19b34db, 0x3c832c80), TOBN(0x60dccc5c, 0x6da2e3b4),
+     TOBN(0x245dd62e, 0x0a104ccc), TOBN(0xa7ab1de1, 0x620b21fd),
+     TOBN(0xb293ae0b, 0x3893d123), TOBN(0xf7b75783, 0xb15ee71c),
+     TOBN(0x5aa3c614, 0x42a9468b), TOBN(0xd686123c, 0xdb15d744),
+     TOBN(0x8c616891, 0xa7ab4116), TOBN(0x6fcd72c8, 0xa4e6a459),
+     TOBN(0xac219110, 0x77e5fad7), TOBN(0xfb6a20e7, 0x704fa46b),
+     TOBN(0xe839be7d, 0x341d81dc), TOBN(0xcddb6889, 0x32148379),
+     TOBN(0xda6211a1, 0xf7026ead), TOBN(0xf3b2575f, 0xf4d1cc5e),
+     TOBN(0x40cfc8f6, 0xa7a73ae6), TOBN(0x83879a5e, 0x61d5b483),
+     TOBN(0xc5acb1ed, 0x41a50ebc), TOBN(0x59a60cc8, 0x3c07d8fa),
+     TOBN(0x1b73bdce, 0xb1876262), TOBN(0x2b0d79f0, 0x12af4ee9),
+     TOBN(0x8bcf3b0b, 0xd46e1d07), TOBN(0x17d6af9d, 0xe45d152f),
+     TOBN(0x73520461, 0x6d736451), TOBN(0x43cbbd97, 0x56b0bf5a),
+     TOBN(0xb0833a5b, 0xd5999b9d), TOBN(0x702614f0, 0xeb72e398),
+     TOBN(0x0aadf01a, 0x59c3e9f8), TOBN(0x40200e77, 0xce6b3d16),
+     TOBN(0xda22bdd3, 0xdeddafad), TOBN(0x76dedaf4, 0x310d72e1),
+     TOBN(0x49ef807c, 0x4bc2e88f), TOBN(0x6ba81291, 0x146dd5a5),
+     TOBN(0xa1a4077a, 0x7d8d59e9), TOBN(0x87b6a2e7, 0x802db349),
+     TOBN(0xd5679997, 0x1b4e598e), TOBN(0xf499ef1f, 0x06fe4b1d),
+     TOBN(0x3978d3ae, 0xfcb267c5), TOBN(0xb582b557, 0x235786d0),
+     TOBN(0x32b3b2ca, 0x1715cb07), TOBN(0x4c3de6a2, 0x8480241d),
+     TOBN(0x63b5ffed, 0xcb571ecd), TOBN(0xeaf53900, 0xed2fe9a9),
+     TOBN(0xdec98d4a, 0xc3b81990), TOBN(0x1cb83722, 0x9e0cc8fe),
+     TOBN(0xfe0b0491, 0xd2b427b9), TOBN(0x0f2386ac, 0xe983a66c),
+     TOBN(0x930c4d1e, 0xb3291213), TOBN(0xa2f82b2e, 0x59a62ae4),
+     TOBN(0x77233853, 0xf93e89e3), TOBN(0x7f8063ac, 0x11777c7f),
+     TOBN(0xff0eb567, 0x59ad2877), TOBN(0x6f454642, 0x9865c754),
+     TOBN(0xe6fe701a, 0x236e9a84), TOBN(0xc586ef16, 0x06e40fc3),
+     TOBN(0x3f62b6e0, 0x24bafad9), TOBN(0xc8b42bd2, 0x64da906a),
+     TOBN(0xc98e1eb4, 0xda3276a0), TOBN(0x30d0e5fc, 0x06cbf852),
+     TOBN(0x1b6b2ae1, 0xe8b4dfd4), TOBN(0xd754d5c7, 0x8301cbac),
+     TOBN(0x66097629, 0x112a39ac), TOBN(0xf86b5999, 0x93ba4ab9),
+     TOBN(0x26c9dea7, 0x99f9d581), TOBN(0x0473b1a8, 0xc2fafeaa),
+     TOBN(0x1469af55, 0x3b2505a5), TOBN(0x227d16d7, 0xd6a43323),
+     TOBN(0x3316f73c, 0xad3d97f9), TOBN(0x52bf3bb5, 0x1f137455),
+     TOBN(0x953eafeb, 0x09954e7c), TOBN(0xa721dfed, 0xdd732411),
+     TOBN(0xb4929821, 0x141d4579), TOBN(0x3411321c, 0xaa3bd435),
+     TOBN(0xafb355aa, 0x17fa6015), TOBN(0xb4e7ef4a, 0x18e42f0e),
+     TOBN(0x604ac97c, 0x59371000), TOBN(0xe1c48c70, 0x7f759c18),
+     TOBN(0x3f62ecc5, 0xa5db6b65), TOBN(0x0a78b173, 0x38a21495),
+     TOBN(0x6be1819d, 0xbcc8ad94), TOBN(0x70dc04f6, 0xd89c3400),
+     TOBN(0x462557b4, 0xa6b4840a), TOBN(0x544c6ade, 0x60bd21c0),
+     TOBN(0x6a00f24e, 0x907a544b), TOBN(0xa7520dcb, 0x313da210),
+     TOBN(0xfe939b75, 0x11e4994b), TOBN(0x918b6ba6, 0xbc275d70),
+     TOBN(0xd3e5e0fc, 0x644be892), TOBN(0x707a9816, 0xfdaf6c42),
+     TOBN(0x60145567, 0xf15c13fe), TOBN(0x4818ebaa, 0xe130a54a),
+     TOBN(0x28aad3ad, 0x58d2f767), TOBN(0xdc5267fd, 0xd7e7c773),
+     TOBN(0x4919cc88, 0xc3afcc98), TOBN(0xaa2e6ab0, 0x2db8cd4b),
+     TOBN(0xd46fec04, 0xd0c63eaa), TOBN(0xa1cb92c5, 0x19ffa832),
+     TOBN(0x678dd178, 0xe43a631f), TOBN(0xfb5ae1cd, 0x3dc788b3),
+     TOBN(0x68b4fb90, 0x6e77de04), TOBN(0x7992bcf0, 0xf06dbb97),
+     TOBN(0x896e6a13, 0xc417c01d), TOBN(0x8d96332c, 0xb956be01),
+     TOBN(0x902fc93a, 0x413aa2b9), TOBN(0x99a4d915, 0xfc98c8a5),
+     TOBN(0x52c29407, 0x565f1137), TOBN(0x4072690f, 0x21e4f281),
+     TOBN(0x36e607cf, 0x02ff6072), TOBN(0xa47d2ca9, 0x8ad98cdc),
+     TOBN(0xbf471d1e, 0xf5f56609), TOBN(0xbcf86623, 0xf264ada0),
+     TOBN(0xb70c0687, 0xaa9e5cb6), TOBN(0xc98124f2, 0x17401c6c),
+     TOBN(0x8189635f, 0xd4a61435), TOBN(0xd28fb8af, 0xa9d98ea6),
+     TOBN(0xb9a67c2a, 0x40c251f8), TOBN(0x88cd5d87, 0xa2da44be),
+     TOBN(0x437deb96, 0xe09b5423), TOBN(0x150467db, 0x64287dc1),
+     TOBN(0xe161debb, 0xcdabb839), TOBN(0xa79e9742, 0xf1839a3e),
+     TOBN(0xbb8dd3c2, 0x652d202b), TOBN(0x7b3e67f7, 0xe9f97d96),
+     TOBN(0x5aa5d78f, 0xb1cb6ac9), TOBN(0xffa13e8e, 0xca1d0d45),
+     TOBN(0x369295dd, 0x2ba5bf95), TOBN(0xd68bd1f8, 0x39aff05e),
+     TOBN(0xaf0d86f9, 0x26d783f2), TOBN(0x543a59b3, 0xfc3aafc1),
+     TOBN(0x3fcf81d2, 0x7b7da97c), TOBN(0xc990a056, 0xd25dee46),
+     TOBN(0x3e6775b8, 0x519cce2c), TOBN(0xfc9af71f, 0xae13d863),
+     TOBN(0x774a4a6f, 0x47c1605c), TOBN(0x46ba4245, 0x2fd205e8),
+     TOBN(0xa06feea4, 0xd3fd524d), TOBN(0x1e724641, 0x6de1acc2),
+     TOBN(0xf53816f1, 0x334e2b42), TOBN(0x49e5918e, 0x922f0024),
+     TOBN(0x439530b6, 0x65c7322d), TOBN(0xcf12cc01, 0xb3c1b3fb),
+     TOBN(0xc70b0186, 0x0172f685), TOBN(0xb915ee22, 0x1b58391d),
+     TOBN(0x9afdf03b, 0xa317db24), TOBN(0x87dec659, 0x17b8ffc4),
+     TOBN(0x7f46597b, 0xe4d3d050), TOBN(0x80a1c1ed, 0x006500e7),
+     TOBN(0x84902a96, 0x78bf030e), TOBN(0xfb5e9c9a, 0x50560148),
+     TOBN(0x6dae0a92, 0x63362426), TOBN(0xdcaeecf4, 0xa9e30c40),
+     TOBN(0xc0d887bb, 0x518d0c6b), TOBN(0x99181152, 0xcb985b9d),
+     TOBN(0xad186898, 0xef7bc381), TOBN(0x18168ffb, 0x9ee46201),
+     TOBN(0x9a04cdaa, 0x2502753c), TOBN(0xbb279e26, 0x51407c41),
+     TOBN(0xeacb03aa, 0xf23564e5), TOBN(0x18336582, 0x71e61016),
+     TOBN(0x8684b8c4, 0xeb809877), TOBN(0xb336e18d, 0xea0e672e),
+     TOBN(0xefb601f0, 0x34ee5867), TOBN(0x2733edbe, 0x1341cfd1),
+     TOBN(0xb15e809a, 0x26025c3c), TOBN(0xe6e981a6, 0x9350df88),
+     TOBN(0x92376237, 0x8502fd8e), TOBN(0x4791f216, 0x0c12be9b),
+     TOBN(0xb7256789, 0x25f02425), TOBN(0xec863194, 0x7a974443),
+     TOBN(0x7c0ce882, 0xfb41cc52), TOBN(0xc266ff7e, 0xf25c07f2),
+     TOBN(0x3d4da8c3, 0x017025f3), TOBN(0xefcf628c, 0xfb9579b4),
+     TOBN(0x5c4d0016, 0x1f3716ec), TOBN(0x9c27ebc4, 0x6801116e),
+     TOBN(0x5eba0ea1, 0x1da1767e), TOBN(0xfe151452, 0x47004c57),
+     TOBN(0x3ace6df6, 0x8c2373b7), TOBN(0x75c3dffe, 0x5dbc37ac),
+     TOBN(0x3dc32a73, 0xddc925fc), TOBN(0xb679c841, 0x2f65ee0b),
+     TOBN(0x715a3295, 0x451cbfeb), TOBN(0xd9889768, 0xf76e9a29),
+     TOBN(0xec20ce7f, 0xb28ad247), TOBN(0xe99146c4, 0x00894d79),
+     TOBN(0x71457d7c, 0x9f5e3ea7), TOBN(0x097b2662, 0x38030031),
+     TOBN(0xdb7f6ae6, 0xcf9f82a8), TOBN(0x319decb9, 0x438f473a),
+     TOBN(0xa63ab386, 0x283856c3), TOBN(0x13e3172f, 0xb06a361b),
+     TOBN(0x2959f8dc, 0x7d5a006c), TOBN(0x2dbc27c6, 0x75fba752),
+     TOBN(0xc1227ab2, 0x87c22c9e), TOBN(0x06f61f75, 0x71a268b2),
+     TOBN(0x1b6bb971, 0x04779ce2), TOBN(0xaca83812, 0x0aadcb1d),
+     TOBN(0x297ae0bc, 0xaeaab2d5), TOBN(0xa5c14ee7, 0x5bfb9f13),
+     TOBN(0xaa00c583, 0xf17a62c7), TOBN(0x39eb962c, 0x173759f6),
+     TOBN(0x1eeba1d4, 0x86c9a88f), TOBN(0x0ab6c37a, 0xdf016c5e),
+     TOBN(0xa2a147db, 0xa28a0749), TOBN(0x246c20d6, 0xee519165),
+     TOBN(0x5068d1b1, 0xd3810715), TOBN(0xb1e7018c, 0x748160b9),
+     TOBN(0x03f5b1fa, 0xf380ff62), TOBN(0xef7fb1dd, 0xf3cb2c1e),
+     TOBN(0xeab539a8, 0xfc91a7da), TOBN(0x83ddb707, 0xf3f9b561),
+     TOBN(0xc550e211, 0xfe7df7a4), TOBN(0xa7cd07f2, 0x063f6f40),
+     TOBN(0xb0de3635, 0x2976879c), TOBN(0xb5f83f85, 0xe55741da),
+     TOBN(0x4ea9d25e, 0xf3d8ac3d), TOBN(0x6fe2066f, 0x62819f02),
+     TOBN(0x4ab2b9c2, 0xcef4a564), TOBN(0x1e155d96, 0x5ffa2de3),
+     TOBN(0x0eb0a19b, 0xc3a72d00), TOBN(0x4037665b, 0x8513c31b),
+     TOBN(0x2fb2b6bf, 0x04c64637), TOBN(0x45c34d6e, 0x08cdc639),
+     TOBN(0x56f1e10f, 0xf01fd796), TOBN(0x4dfb8101, 0xfe3667b8),
+     TOBN(0xe0eda253, 0x9021d0c0), TOBN(0x7a94e9ff, 0x8a06c6ab),
+     TOBN(0x2d3bb0d9, 0xbb9aa882), TOBN(0xea20e4e5, 0xec05fd10),
+     TOBN(0xed7eeb5f, 0x1a1ca64e), TOBN(0x2fa6b43c, 0xc6327cbd),
+     TOBN(0xb577e3cf, 0x3aa91121), TOBN(0x8c6bd5ea, 0x3a34079b),
+     TOBN(0xd7e5ba39, 0x60e02fc0), TOBN(0xf16dd2c3, 0x90141bf8),
+     TOBN(0xb57276d9, 0x80101b98), TOBN(0x760883fd, 0xb82f0f66),
+     TOBN(0x89d7de75, 0x4bc3eff3), TOBN(0x03b60643, 0x5dc2ab40),
+     TOBN(0xcd6e53df, 0xe05beeac), TOBN(0xf2f1e862, 0xbc3325cd),
+     TOBN(0xdd0f7921, 0x774f03c3), TOBN(0x97ca7221, 0x4552cc1b),
+     TOBN(0x5a0d6afe, 0x1cd19f72), TOBN(0xa20915dc, 0xf183fbeb),
+     TOBN(0x9fda4b40, 0x832c403c), TOBN(0x32738edd, 0xbe425442),
+     TOBN(0x469a1df6, 0xb5eccf1a), TOBN(0x4b5aff42, 0x28bbe1f0),
+     TOBN(0x31359d7f, 0x570dfc93), TOBN(0xa18be235, 0xf0088628),
+     TOBN(0xa5b30fba, 0xb00ed3a9), TOBN(0x34c61374, 0x73cdf8be),
+     TOBN(0x2c5c5f46, 0xabc56797), TOBN(0x5cecf93d, 0xb82a8ae2),
+     TOBN(0x7d3dbe41, 0xa968fbf0), TOBN(0xd23d4583, 0x1a5c7f3d),
+     TOBN(0xf28f69a0, 0xc087a9c7), TOBN(0xc2d75471, 0x474471ca),
+     TOBN(0x36ec9f4a, 0x4eb732ec), TOBN(0x6c943bbd, 0xb1ca6bed),
+     TOBN(0xd64535e1, 0xf2457892), TOBN(0x8b84a8ea, 0xf7e2ac06),
+     TOBN(0xe0936cd3, 0x2499dd5f), TOBN(0x12053d7e, 0x0ed04e57),
+     TOBN(0x4bdd0076, 0xe4305d9d), TOBN(0x34a527b9, 0x1f67f0a2),
+     TOBN(0xe79a4af0, 0x9cec46ea), TOBN(0xb15347a1, 0x658b9bc7),
+     TOBN(0x6bd2796f, 0x35af2f75), TOBN(0xac957990, 0x4051c435),
+     TOBN(0x2669dda3, 0xc33a655d), TOBN(0x5d503c2e, 0x88514aa3),
+     TOBN(0xdfa11337, 0x3753dd41), TOBN(0x3f054673, 0x0b754f78),
+     TOBN(0xbf185677, 0x496125bd), TOBN(0xfb0023c8, 0x3775006c),
+     TOBN(0xfa0f072f, 0x3a037899), TOBN(0x4222b6eb, 0x0e4aea57),
+     TOBN(0x3dde5e76, 0x7866d25a), TOBN(0xb6eb04f8, 0x4837aa6f),
+     TOBN(0x5315591a, 0x2cf1cdb8), TOBN(0x6dfb4f41, 0x2d4e683c),
+     TOBN(0x7e923ea4, 0x48ee1f3a), TOBN(0x9604d9f7, 0x05a2afd5),
+     TOBN(0xbe1d4a33, 0x40ea4948), TOBN(0x5b45f1f4, 0xb44cbd2f),
+     TOBN(0x5faf8376, 0x4acc757e), TOBN(0xa7cf9ab8, 0x63d68ff7),
+     TOBN(0x8ad62f69, 0xdf0e404b), TOBN(0xd65f33c2, 0x12bdafdf),
+     TOBN(0xc365de15, 0xa377b14e), TOBN(0x6bf5463b, 0x8e39f60c),
+     TOBN(0x62030d2d, 0x2ce68148), TOBN(0xd95867ef, 0xe6f843a8),
+     TOBN(0xd39a0244, 0xef5ab017), TOBN(0x0bd2d8c1, 0x4ab55d12),
+     TOBN(0xc9503db3, 0x41639169), TOBN(0x2d4e25b0, 0xf7660c8a),
+     TOBN(0x760cb3b5, 0xe224c5d7), TOBN(0xfa3baf8c, 0x68616919),
+     TOBN(0x9fbca113, 0x8d142552), TOBN(0x1ab18bf1, 0x7669ebf5),
+     TOBN(0x55e6f53e, 0x9bdf25dd), TOBN(0x04cc0bf3, 0xcb6cd154),
+     TOBN(0x595bef49, 0x95e89080), TOBN(0xfe9459a8, 0x104a9ac1),
+     TOBN(0xad2d89ca, 0xcce9bb32), TOBN(0xddea65e1, 0xf7de8285),
+     TOBN(0x62ed8c35, 0xb351bd4b), TOBN(0x4150ff36, 0x0c0e19a7),
+     TOBN(0x86e3c801, 0x345f4e47), TOBN(0x3bf21f71, 0x203a266c),
+     TOBN(0x7ae110d4, 0x855b1f13), TOBN(0x5d6aaf6a, 0x07262517),
+     TOBN(0x1e0f12e1, 0x813d28f1), TOBN(0x6000e11d, 0x7ad7a523),
+     TOBN(0xc7d8deef, 0xc744a17b), TOBN(0x1e990b48, 0x14c05a00),
+     TOBN(0x68fddaee, 0x93e976d5), TOBN(0x696241d1, 0x46610d63),
+     TOBN(0xb204e7c3, 0x893dda88), TOBN(0x8bccfa65, 0x6a3a6946),
+     TOBN(0xb59425b4, 0xc5cd1411), TOBN(0x701b4042, 0xff3658b1),
+     TOBN(0xe3e56bca, 0x4784cf93), TOBN(0x27de5f15, 0x8fe68d60),
+     TOBN(0x4ab9cfce, 0xf8d53f19), TOBN(0xddb10311, 0xa40a730d),
+     TOBN(0x6fa73cd1, 0x4eee0a8a), TOBN(0xfd548748, 0x5249719d),
+     TOBN(0x49d66316, 0xa8123ef0), TOBN(0x73c32db4, 0xe7f95438),
+     TOBN(0x2e2ed209, 0x0d9e7854), TOBN(0xf98a9329, 0x9d9f0507),
+     TOBN(0xc5d33cf6, 0x0c6aa20a), TOBN(0x9a32ba14, 0x75279bb2),
+     TOBN(0x7e3202cb, 0x774a7307), TOBN(0x64ed4bc4, 0xe8c42dbd),
+     TOBN(0xc20f1a06, 0xd4caed0d), TOBN(0xb8021407, 0x171d22b3),
+     TOBN(0xd426ca04, 0xd13268d7), TOBN(0x92377007, 0x25f4d126),
+     TOBN(0x4204cbc3, 0x71f21a85), TOBN(0x18461b7a, 0xf82369ba),
+     TOBN(0xc0c07d31, 0x3fc858f9), TOBN(0x5deb5a50, 0xe2bab569),
+     TOBN(0xd5959d46, 0xd5eea89e), TOBN(0xfdff8424, 0x08437f4b),
+     TOBN(0xf21071e4, 0x3cfe254f), TOBN(0x72417696, 0x95468321),
+     TOBN(0x5d8288b9, 0x102cae3e), TOBN(0x2d143e3d, 0xf1965dff),
+     TOBN(0x00c9a376, 0xa078d847), TOBN(0x6fc0da31, 0x26028731),
+     TOBN(0xa2baeadf, 0xe45083a2), TOBN(0x66bc7218, 0x5e5b4bcd),
+     TOBN(0x2c826442, 0xd04b8e7f), TOBN(0xc19f5451, 0x6c4b586b),
+     TOBN(0x60182c49, 0x5b7eeed5), TOBN(0xd9954ecd, 0x7aa9dfa1),
+     TOBN(0xa403a8ec, 0xc73884ad), TOBN(0x7fb17de2, 0x9bb39041),
+     TOBN(0x694b64c5, 0xabb020e8), TOBN(0x3d18c184, 0x19c4eec7),
+     TOBN(0x9c4673ef, 0x1c4793e5), TOBN(0xc7b8aeb5, 0x056092e6),
+     TOBN(0x3aa1ca43, 0xf0f8c16b), TOBN(0x224ed5ec, 0xd679b2f6),
+     TOBN(0x0d56eeaf, 0x55a205c9), TOBN(0xbfe115ba, 0x4b8e028b),
+     TOBN(0x97e60849, 0x3927f4fe), TOBN(0xf91fbf94, 0x759aa7c5),
+     TOBN(0x985af769, 0x6be90a51), TOBN(0xc1277b78, 0x78ccb823),
+     TOBN(0x395b656e, 0xe7a75952), TOBN(0x00df7de0, 0x928da5f5),
+     TOBN(0x09c23175, 0x4ca4454f), TOBN(0x4ec971f4, 0x7aa2d3c1),
+     TOBN(0x45c3c507, 0xe75d9ccc), TOBN(0x63b7be8a, 0x3dc90306),
+     TOBN(0x37e09c66, 0x5db44bdc), TOBN(0x50d60da1, 0x6841c6a2),
+     TOBN(0x6f9b65ee, 0x08df1b12), TOBN(0x38734879, 0x7ff089df),
+     TOBN(0x9c331a66, 0x3fe8013d), TOBN(0x017f5de9, 0x5f42fcc8),
+     TOBN(0x43077866, 0xe8e57567), TOBN(0xc9f781ce, 0xf9fcdb18),
+     TOBN(0x38131dda, 0x9b12e174), TOBN(0x25d84aa3, 0x8a03752a),
+     TOBN(0x45e09e09, 0x4d0c0ce2), TOBN(0x1564008b, 0x92bebba5),
+     TOBN(0xf7e8ad31, 0xa87284c7), TOBN(0xb7c4b46c, 0x97e7bbaa),
+     TOBN(0x3e22a7b3, 0x97acf4ec), TOBN(0x0426c400, 0x5ea8b640),
+     TOBN(0x5e3295a6, 0x4e969285), TOBN(0x22aabc59, 0xa6a45670),
+     TOBN(0xb929714c, 0x5f5942bc), TOBN(0x9a6168bd, 0xfa3182ed),
+     TOBN(0x2216a665, 0x104152ba), TOBN(0x46908d03, 0xb6926368)}
+    ,
+    {TOBN(0xa9f5d874, 0x5a1251fb), TOBN(0x967747a8, 0xc72725c7),
+     TOBN(0x195c33e5, 0x31ffe89e), TOBN(0x609d210f, 0xe964935e),
+     TOBN(0xcafd6ca8, 0x2fe12227), TOBN(0xaf9b5b96, 0x0426469d),
+     TOBN(0x2e9ee04c, 0x5693183c), TOBN(0x1084a333, 0xc8146fef),
+     TOBN(0x96649933, 0xaed1d1f7), TOBN(0x566eaff3, 0x50563090),
+     TOBN(0x345057f0, 0xad2e39cf), TOBN(0x148ff65b, 0x1f832124),
+     TOBN(0x042e89d4, 0xcf94cf0d), TOBN(0x319bec84, 0x520c58b3),
+     TOBN(0x2a267626, 0x5361aa0d), TOBN(0xc86fa302, 0x8fbc87ad),
+     TOBN(0xfc83d2ab, 0x5c8b06d5), TOBN(0xb1a785a2, 0xfe4eac46),
+     TOBN(0xb99315bc, 0x846f7779), TOBN(0xcf31d816, 0xef9ea505),
+     TOBN(0x2391fe6a, 0x15d7dc85), TOBN(0x2f132b04, 0xb4016b33),
+     TOBN(0x29547fe3, 0x181cb4c7), TOBN(0xdb66d8a6, 0x650155a1),
+     TOBN(0x6b66d7e1, 0xadc1696f), TOBN(0x98ebe593, 0x0acd72d0),
+     TOBN(0x65f24550, 0xcc1b7435), TOBN(0xce231393, 0xb4b9a5ec),
+     TOBN(0x234a22d4, 0xdb067df9), TOBN(0x98dda095, 0xcaff9b00),
+     TOBN(0x1bbc75a0, 0x6100c9c1), TOBN(0x1560a9c8, 0x939cf695),
+     TOBN(0xcf006d3e, 0x99e0925f), TOBN(0x2dd74a96, 0x6322375a),
+     TOBN(0xc58b446a, 0xb56af5ba), TOBN(0x50292683, 0xe0b9b4f1),
+     TOBN(0xe2c34cb4, 0x1aeaffa3), TOBN(0x8b17203f, 0x9b9587c1),
+     TOBN(0x6d559207, 0xead1350c), TOBN(0x2b66a215, 0xfb7f9604),
+     TOBN(0x0850325e, 0xfe51bf74), TOBN(0x9c4f579e, 0x5e460094),
+     TOBN(0x5c87b92a, 0x76da2f25), TOBN(0x889de4e0, 0x6febef33),
+     TOBN(0x6900ec06, 0x646083ce), TOBN(0xbe2a0335, 0xbfe12773),
+     TOBN(0xadd1da35, 0xc5344110), TOBN(0x757568b7, 0xb802cd20),
+     TOBN(0x75559779, 0x00f7e6c8), TOBN(0x38e8b94f, 0x0facd2f0),
+     TOBN(0xfea1f3af, 0x03fde375), TOBN(0x5e11a1d8, 0x75881dfc),
+     TOBN(0xb3a6b02e, 0xc1e2f2ef), TOBN(0x193d2bbb, 0xc605a6c5),
+     TOBN(0x325ffeee, 0x339a0b2d), TOBN(0x27b6a724, 0x9e0c8846),
+     TOBN(0xe4050f1c, 0xf1c367ca), TOBN(0x9bc85a9b, 0xc90fbc7d),
+     TOBN(0xa373c4a2, 0xe1a11032), TOBN(0xb64232b7, 0xad0393a9),
+     TOBN(0xf5577eb0, 0x167dad29), TOBN(0x1604f301, 0x94b78ab2),
+     TOBN(0x0baa94af, 0xe829348b), TOBN(0x77fbd8dd, 0x41654342),
+     TOBN(0xdab50ea5, 0xb964e39a), TOBN(0xd4c29e3c, 0xd0d3c76e),
+     TOBN(0x80dae67c, 0x56d11964), TOBN(0x7307a8bf, 0xe5ffcc2f),
+     TOBN(0x65bbc1aa, 0x91708c3b), TOBN(0xa151e62c, 0x28bf0eeb),
+     TOBN(0x6cb53381, 0x6fa34db7), TOBN(0x5139e05c, 0xa29403a8),
+     TOBN(0x6ff651b4, 0x94a7cd2e), TOBN(0x5671ffd1, 0x0699336c),
+     TOBN(0x6f5fd2cc, 0x979a896a), TOBN(0x11e893a8, 0xd8148cef),
+     TOBN(0x988906a1, 0x65cf7b10), TOBN(0x81b67178, 0xc50d8485),
+     TOBN(0x7c0deb35, 0x8a35b3de), TOBN(0x423ac855, 0xc1d29799),
+     TOBN(0xaf580d87, 0xdac50b74), TOBN(0x28b2b89f, 0x5869734c),
+     TOBN(0x99a3b936, 0x874e28fb), TOBN(0xbb2c9190, 0x25f3f73a),
+     TOBN(0x199f6918, 0x84a9d5b7), TOBN(0x7ebe2325, 0x7e770374),
+     TOBN(0xf442e107, 0x0738efe2), TOBN(0xcf9f3f56, 0xcf9082d2),
+     TOBN(0x719f69e1, 0x09618708), TOBN(0xcc9e8364, 0xc183f9b1),
+     TOBN(0xec203a95, 0x366a21af), TOBN(0x6aec5d6d, 0x068b141f),
+     TOBN(0xee2df78a, 0x994f04e9), TOBN(0xb39ccae8, 0x271245b0),
+     TOBN(0xb875a4a9, 0x97e43f4f), TOBN(0x507dfe11, 0xdb2cea98),
+     TOBN(0x4fbf81cb, 0x489b03e9), TOBN(0xdb86ec5b, 0x6ec414fa),
+     TOBN(0xfad444f9, 0xf51b3ae5), TOBN(0xca7d33d6, 0x1914e3fe),
+     TOBN(0xa9c32f5c, 0x0ae6c4d0), TOBN(0xa9ca1d1e, 0x73969568),
+     TOBN(0x98043c31, 0x1aa7467e), TOBN(0xe832e75c, 0xe21b5ac6),
+     TOBN(0x314b7aea, 0x5232123d), TOBN(0x08307c8c, 0x65ae86db),
+     TOBN(0x06e7165c, 0xaa4668ed), TOBN(0xb170458b, 0xb4d3ec39),
+     TOBN(0x4d2e3ec6, 0xc19bb986), TOBN(0xc5f34846, 0xae0304ed),
+     TOBN(0x917695a0, 0x6c9f9722), TOBN(0x6c7f7317, 0x4cab1c0a),
+     TOBN(0x6295940e, 0x9d6d2e8b), TOBN(0xd318b8c1, 0x549f7c97),
+     TOBN(0x22453204, 0x97713885), TOBN(0x468d834b, 0xa8a440fe),
+     TOBN(0xd81fe5b2, 0xbfba796e), TOBN(0x152364db, 0x6d71f116),
+     TOBN(0xbb8c7c59, 0xb5b66e53), TOBN(0x0b12c61b, 0x2641a192),
+     TOBN(0x31f14802, 0xfcf0a7fd), TOBN(0x42fd0789, 0x5488b01e),
+     TOBN(0x71d78d6d, 0x9952b498), TOBN(0x8eb572d9, 0x07ac5201),
+     TOBN(0xe0a2a44c, 0x4d194a88), TOBN(0xd2b63fd9, 0xba017e66),
+     TOBN(0x78efc6c8, 0xf888aefc), TOBN(0xb76f6bda, 0x4a881a11),
+     TOBN(0x187f314b, 0xb46c2397), TOBN(0x004cf566, 0x5ded2819),
+     TOBN(0xa9ea5704, 0x38764d34), TOBN(0xbba45217, 0x78084709),
+     TOBN(0x06474571, 0x1171121e), TOBN(0xad7b7eb1, 0xe7c9b671),
+     TOBN(0xdacfbc40, 0x730f7507), TOBN(0x178cd8c6, 0xc7ad7bd1),
+     TOBN(0xbf0be101, 0xb2a67238), TOBN(0x3556d367, 0xaf9c14f2),
+     TOBN(0x104b7831, 0xa5662075), TOBN(0x58ca59bb, 0x79d9e60a),
+     TOBN(0x4bc45392, 0xa569a73b), TOBN(0x517a52e8, 0x5698f6c9),
+     TOBN(0x85643da5, 0xaeadd755), TOBN(0x1aed0cd5, 0x2a581b84),
+     TOBN(0xb9b4ff84, 0x80af1372), TOBN(0x244c3113, 0xf1ba5d1f),
+     TOBN(0x2a5dacbe, 0xf5f98d31), TOBN(0x2c3323e8, 0x4375bc2a),
+     TOBN(0x17a3ab4a, 0x5594b1dd), TOBN(0xa1928bfb, 0xceb4797e),
+     TOBN(0xe83af245, 0xe4886a19), TOBN(0x8979d546, 0x72b5a74a),
+     TOBN(0xa0f726bc, 0x19f9e967), TOBN(0xd9d03152, 0xe8fbbf4e),
+     TOBN(0xcfd6f51d, 0xb7707d40), TOBN(0x633084d9, 0x63f6e6e0),
+     TOBN(0xedcd9cdc, 0x55667eaf), TOBN(0x73b7f92b, 0x2e44d56f),
+     TOBN(0xfb2e39b6, 0x4e962b14), TOBN(0x7d408f6e, 0xf671fcbf),
+     TOBN(0xcc634ddc, 0x164a89bb), TOBN(0x74a42bb2, 0x3ef3bd05),
+     TOBN(0x1280dbb2, 0x428decbb), TOBN(0x6103f6bb, 0x402c8596),
+     TOBN(0xfa2bf581, 0x355a5752), TOBN(0x562f96a8, 0x00946674),
+     TOBN(0x4e4ca16d, 0x6da0223b), TOBN(0xfe47819f, 0x28d3aa25),
+     TOBN(0x9eea3075, 0xf8dfcf8a), TOBN(0xa284f0aa, 0x95669825),
+     TOBN(0xb3fca250, 0x867d3fd8), TOBN(0x20757b5f, 0x269d691e),
+     TOBN(0xf2c24020, 0x93b8a5de), TOBN(0xd3f93359, 0xebc06da6),
+     TOBN(0x1178293e, 0xb2739c33), TOBN(0xd2a3e770, 0xbcd686e5),
+     TOBN(0xa76f49f4, 0xcd941534), TOBN(0x0d37406b, 0xe3c71c0e),
+     TOBN(0x172d9397, 0x3b97f7e3), TOBN(0xec17e239, 0xbd7fd0de),
+     TOBN(0xe3290551, 0x6f496ba2), TOBN(0x6a693172, 0x36ad50e7),
+     TOBN(0xc4e539a2, 0x83e7eff5), TOBN(0x752737e7, 0x18e1b4cf),
+     TOBN(0xa2f7932c, 0x68af43ee), TOBN(0x5502468e, 0x703d00bd),
+     TOBN(0xe5dc978f, 0x2fb061f5), TOBN(0xc9a1904a, 0x28c815ad),
+     TOBN(0xd3af538d, 0x470c56a4), TOBN(0x159abc5f, 0x193d8ced),
+     TOBN(0x2a37245f, 0x20108ef3), TOBN(0xfa17081e, 0x223f7178),
+     TOBN(0x27b0fb2b, 0x10c8c0f5), TOBN(0x2102c3ea, 0x40650547),
+     TOBN(0x594564df, 0x8ac3bfa7), TOBN(0x98102033, 0x509dad96),
+     TOBN(0x6989643f, 0xf1d18a13), TOBN(0x35eebd91, 0xd7fc5af0),
+     TOBN(0x078d096a, 0xfaeaafd8), TOBN(0xb7a89341, 0xdef3de98),
+     TOBN(0x2a206e8d, 0xecf2a73a), TOBN(0x066a6397, 0x8e551994),
+     TOBN(0x3a6a088a, 0xb98d53a2), TOBN(0x0ce7c67c, 0x2d1124aa),
+     TOBN(0x48cec671, 0x759a113c), TOBN(0xe3b373d3, 0x4f6f67fa),
+     TOBN(0x5455d479, 0xfd36727b), TOBN(0xe5a428ee, 0xa13c0d81),
+     TOBN(0xb853dbc8, 0x1c86682b), TOBN(0xb78d2727, 0xb8d02b2a),
+     TOBN(0xaaf69bed, 0x8ebc329a), TOBN(0xdb6b40b3, 0x293b2148),
+     TOBN(0xe42ea77d, 0xb8c4961f), TOBN(0xb1a12f7c, 0x20e5e0ab),
+     TOBN(0xa0ec5274, 0x79e8b05e), TOBN(0x68027391, 0xfab60a80),
+     TOBN(0x6bfeea5f, 0x16b1bd5e), TOBN(0xf957e420, 0x4de30ad3),
+     TOBN(0xcbaf664e, 0x6a353b9e), TOBN(0x5c873312, 0x26d14feb),
+     TOBN(0x4e87f98c, 0xb65f57cb), TOBN(0xdb60a621, 0x5e0cdd41),
+     TOBN(0x67c16865, 0xa6881440), TOBN(0x1093ef1a, 0x46ab52aa),
+     TOBN(0xc095afb5, 0x3f4ece64), TOBN(0x6a6bb02e, 0x7604551a),
+     TOBN(0x55d44b4e, 0x0b26b8cd), TOBN(0xe5f9a999, 0xf971268a),
+     TOBN(0xc08ec425, 0x11a7de84), TOBN(0x83568095, 0xfda469dd),
+     TOBN(0x737bfba1, 0x6c6c90a2), TOBN(0x1cb9c4a0, 0xbe229831),
+     TOBN(0x93bccbba, 0xbb2eec64), TOBN(0xa0c23b64, 0xda03adbe),
+     TOBN(0x5f7aa00a, 0xe0e86ac4), TOBN(0x470b941e, 0xfc1401e6),
+     TOBN(0x5ad8d679, 0x9df43574), TOBN(0x4ccfb8a9, 0x0f65d810),
+     TOBN(0x1bce80e3, 0xaa7fbd81), TOBN(0x273291ad, 0x9508d20a),
+     TOBN(0xf5c4b46b, 0x42a92806), TOBN(0x810684ec, 0xa86ab44a),
+     TOBN(0x4591640b, 0xca0bc9f8), TOBN(0xb5efcdfc, 0x5c4b6054),
+     TOBN(0x16fc8907, 0x6e9edd12), TOBN(0xe29d0b50, 0xd4d792f9),
+     TOBN(0xa45fd01c, 0x9b03116d), TOBN(0x85035235, 0xc81765a4),
+     TOBN(0x1fe2a9b2, 0xb4b4b67c), TOBN(0xc1d10df0, 0xe8020604),
+     TOBN(0x9d64abfc, 0xbc8058d8), TOBN(0x8943b9b2, 0x712a0fbb),
+     TOBN(0x90eed914, 0x3b3def04), TOBN(0x85ab3aa2, 0x4ce775ff),
+     TOBN(0x605fd4ca, 0x7bbc9040), TOBN(0x8b34a564, 0xe2c75dfb),
+     TOBN(0x41ffc94a, 0x10358560), TOBN(0x2d8a5072, 0x9e5c28aa),
+     TOBN(0xe915a0fc, 0x4cc7eb15), TOBN(0xe9efab05, 0x8f6d0f5d),
+     TOBN(0xdbab47a9, 0xd19e9b91), TOBN(0x8cfed745, 0x0276154c),
+     TOBN(0x154357ae, 0x2cfede0d), TOBN(0x520630df, 0x19f5a4ef),
+     TOBN(0x25759f7c, 0xe382360f), TOBN(0xb6db05c9, 0x88bf5857),
+     TOBN(0x2917d61d, 0x6c58d46c), TOBN(0x14f8e491, 0xfd20cb7a),
+     TOBN(0xb68a727a, 0x11c20340), TOBN(0x0386f86f, 0xaf7ccbb6),
+     TOBN(0x5c8bc6cc, 0xfee09a20), TOBN(0x7d76ff4a, 0xbb7eea35),
+     TOBN(0xa7bdebe7, 0xdb15be7a), TOBN(0x67a08054, 0xd89f0302),
+     TOBN(0x56bf0ea9, 0xc1193364), TOBN(0xc8244467, 0x62837ebe),
+     TOBN(0x32bd8e8b, 0x20d841b8), TOBN(0x127a0548, 0xdbb8a54f),
+     TOBN(0x83dd4ca6, 0x63b20236), TOBN(0x87714718, 0x203491fa),
+     TOBN(0x4dabcaaa, 0xaa8a5288), TOBN(0x91cc0c8a, 0xaf23a1c9),
+     TOBN(0x34c72c6a, 0x3f220e0c), TOBN(0xbcc20bdf, 0x1232144a),
+     TOBN(0x6e2f42da, 0xa20ede1b), TOBN(0xc441f00c, 0x74a00515),
+     TOBN(0xbf46a5b6, 0x734b8c4b), TOBN(0x57409503, 0x7b56c9a4),
+     TOBN(0x9f735261, 0xe4585d45), TOBN(0x9231faed, 0x6734e642),
+     TOBN(0x1158a176, 0xbe70ee6c), TOBN(0x35f1068d, 0x7c3501bf),
+     TOBN(0x6beef900, 0xa2d26115), TOBN(0x649406f2, 0xef0afee3),
+     TOBN(0x3f43a60a, 0xbc2420a1), TOBN(0x509002a7, 0xd5aee4ac),
+     TOBN(0xb46836a5, 0x3ff3571b), TOBN(0x24f98b78, 0x837927c1),
+     TOBN(0x6254256a, 0x4533c716), TOBN(0xf27abb0b, 0xd07ee196),
+     TOBN(0xd7cf64fc, 0x5c6d5bfd), TOBN(0x6915c751, 0xf0cd7a77),
+     TOBN(0xd9f59012, 0x8798f534), TOBN(0x772b0da8, 0xf81d8b5f),
+     TOBN(0x1244260c, 0x2e03fa69), TOBN(0x36cf0e3a, 0x3be1a374),
+     TOBN(0x6e7c1633, 0xef06b960), TOBN(0xa71a4c55, 0x671f90f6),
+     TOBN(0x7a941251, 0x33c673db), TOBN(0xc0bea510, 0x73e8c131),
+     TOBN(0x61a8a699, 0xd4f6c734), TOBN(0x25e78c88, 0x341ed001),
+     TOBN(0x5c18acf8, 0x8e2f7d90), TOBN(0xfdbf33d7, 0x77be32cd),
+     TOBN(0x0a085cd7, 0xd2eb5ee9), TOBN(0x2d702cfb, 0xb3201115),
+     TOBN(0xb6e0ebdb, 0x85c88ce8), TOBN(0x23a3ce3c, 0x1e01d617),
+     TOBN(0x3041618e, 0x567333ac), TOBN(0x9dd0fd8f, 0x157edb6b),
+     TOBN(0x27f74702, 0xb57872b8), TOBN(0x2ef26b4f, 0x657d5fe1),
+     TOBN(0x95426f0a, 0x57cf3d40), TOBN(0x847e2ad1, 0x65a6067a),
+     TOBN(0xd474d9a0, 0x09996a74), TOBN(0x16a56acd, 0x2a26115c),
+     TOBN(0x02a615c3, 0xd16f4d43), TOBN(0xcc3fc965, 0xaadb85b7),
+     TOBN(0x386bda73, 0xce07d1b0), TOBN(0xd82910c2, 0x58ad4178),
+     TOBN(0x124f82cf, 0xcd2617f4), TOBN(0xcc2f5e8d, 0xef691770),
+     TOBN(0x82702550, 0xb8c30ccc), TOBN(0x7b856aea, 0x1a8e575a),
+     TOBN(0xbb822fef, 0xb1ab9459), TOBN(0x085928bc, 0xec24e38e),
+     TOBN(0x5d0402ec, 0xba8f4b4d), TOBN(0xc07cd4ba, 0x00b4d58b),
+     TOBN(0x5d8dffd5, 0x29227e7a), TOBN(0x61d44d0c, 0x31bf386f),
+     TOBN(0xe486dc2b, 0x135e6f4d), TOBN(0x680962eb, 0xe79410ef),
+     TOBN(0xa61bd343, 0xf10088b5), TOBN(0x6aa76076, 0xe2e28686),
+     TOBN(0x80463d11, 0x8fb98871), TOBN(0xcb26f5c3, 0xbbc76aff),
+     TOBN(0xd4ab8edd, 0xfbe03614), TOBN(0xc8eb579b, 0xc0cf2dee),
+     TOBN(0xcc004c15, 0xc93bae41), TOBN(0x46fbae5d, 0x3aeca3b2),
+     TOBN(0x671235cf, 0x0f1e9ab1), TOBN(0xadfba934, 0x9ec285c1),
+     TOBN(0x88ded013, 0xf216c980), TOBN(0xc8ac4fb8, 0xf79e0bc1),
+     TOBN(0xa29b89c6, 0xfb97a237), TOBN(0xb697b780, 0x9922d8e7),
+     TOBN(0x3142c639, 0xddb945b5), TOBN(0x447b06c7, 0xe094c3a9),
+     TOBN(0xcdcb3642, 0x72266c90), TOBN(0x633aad08, 0xa9385046),
+     TOBN(0xa36c936b, 0xb57c6477), TOBN(0x871f8b64, 0xe94dbcc6),
+     TOBN(0x28d0fb62, 0xa591a67b), TOBN(0x9d40e081, 0xc1d926f5),
+     TOBN(0x3111eaf6, 0xf2d84b5a), TOBN(0x228993f9, 0xa565b644),
+     TOBN(0x0ccbf592, 0x2c83188b), TOBN(0xf87b30ab, 0x3df3e197),
+     TOBN(0xb8658b31, 0x7642bca8), TOBN(0x1a032d7f, 0x52800f17),
+     TOBN(0x051dcae5, 0x79bf9445), TOBN(0xeba6b8ee, 0x54a2e253),
+     TOBN(0x5c8b9cad, 0xd4485692), TOBN(0x84bda40e, 0x8986e9be),
+     TOBN(0xd16d16a4, 0x2f0db448), TOBN(0x8ec80050, 0xa14d4188),
+     TOBN(0xb2b26107, 0x98fa7aaa), TOBN(0x41209ee4, 0xf073aa4e),
+     TOBN(0xf1570359, 0xf2d6b19b), TOBN(0xcbe6868c, 0xfc577caf),
+     TOBN(0x186c4bdc, 0x32c04dd3), TOBN(0xa6c35fae, 0xcfeee397),
+     TOBN(0xb4a1b312, 0xf086c0cf), TOBN(0xe0a5ccc6, 0xd9461fe2),
+     TOBN(0xc32278aa, 0x1536189f), TOBN(0x1126c55f, 0xba6df571),
+     TOBN(0x0f71a602, 0xb194560e), TOBN(0x8b2d7405, 0x324bd6e1),
+     TOBN(0x8481939e, 0x3738be71), TOBN(0xb5090b1a, 0x1a4d97a9),
+     TOBN(0x116c65a3, 0xf05ba915), TOBN(0x21863ad3, 0xaae448aa),
+     TOBN(0xd24e2679, 0xa7aae5d3), TOBN(0x7076013d, 0x0de5c1c4),
+     TOBN(0x2d50f8ba, 0xbb05b629), TOBN(0x73c1abe2, 0x6e66efbb),
+     TOBN(0xefd4b422, 0xf2488af7), TOBN(0xe4105d02, 0x663ba575),
+     TOBN(0x7eb60a8b, 0x53a69457), TOBN(0x62210008, 0xc945973b),
+     TOBN(0xfb255478, 0x77a50ec6), TOBN(0xbf0392f7, 0x0a37a72c),
+     TOBN(0xa0a7a19c, 0x4be18e7a), TOBN(0x90d8ea16, 0x25b1e0af),
+     TOBN(0x7582a293, 0xef953f57), TOBN(0x90a64d05, 0xbdc5465a),
+     TOBN(0xca79c497, 0xe2510717), TOBN(0x560dbb7c, 0x18cb641f),
+     TOBN(0x1d8e3286, 0x4b66abfb), TOBN(0xd26f52e5, 0x59030900),
+     TOBN(0x1ee3f643, 0x5584941a), TOBN(0x6d3b3730, 0x569f5958),
+     TOBN(0x9ff2a62f, 0x4789dba5), TOBN(0x91fcb815, 0x72b5c9b7),
+     TOBN(0xf446cb7d, 0x6c8f9a0e), TOBN(0x48f625c1, 0x39b7ecb5),
+     TOBN(0xbabae801, 0x1c6219b8), TOBN(0xe7a562d9, 0x28ac2f23),
+     TOBN(0xe1b48732, 0x26e20588), TOBN(0x06ee1cad, 0x775af051),
+     TOBN(0xda29ae43, 0xfaff79f7), TOBN(0xc141a412, 0x652ee9e0),
+     TOBN(0x1e127f6f, 0x195f4bd0), TOBN(0x29c6ab4f, 0x072f34f8),
+     TOBN(0x7b7c1477, 0x30448112), TOBN(0x82b51af1, 0xe4a38656),
+     TOBN(0x2bf2028a, 0x2f315010), TOBN(0xc9a4a01f, 0x6ea88cd4),
+     TOBN(0xf63e95d8, 0x257e5818), TOBN(0xdd8efa10, 0xb4519b16),
+     TOBN(0xed8973e0, 0x0da910bf), TOBN(0xed49d077, 0x5c0fe4a9),
+     TOBN(0xac3aac5e, 0xb7caee1e), TOBN(0x1033898d, 0xa7f4da57),
+     TOBN(0x42145c0e, 0x5c6669b9), TOBN(0x42daa688, 0xc1aa2aa0),
+     TOBN(0x629cc15c, 0x1a1d885a), TOBN(0x25572ec0, 0xf4b76817),
+     TOBN(0x8312e435, 0x9c8f8f28), TOBN(0x8107f8cd, 0x81965490),
+     TOBN(0x516ff3a3, 0x6fa6110c), TOBN(0x74fb1eb1, 0xfb93561f),
+     TOBN(0x6c0c9047, 0x8457522b), TOBN(0xcfd32104, 0x6bb8bdc6),
+     TOBN(0x2d6884a2, 0xcc80ad57), TOBN(0x7c27fc35, 0x86a9b637),
+     TOBN(0x3461baed, 0xadf4e8cd), TOBN(0x1d56251a, 0x617242f0),
+     TOBN(0x0b80d209, 0xc955bef4), TOBN(0xdf02cad2, 0x06adb047),
+     TOBN(0xf0d7cb91, 0x5ec74fee), TOBN(0xd2503375, 0x1111ba44),
+     TOBN(0x9671755e, 0xdf53cb36), TOBN(0x54dcb612, 0x3368551b),
+     TOBN(0x66d69aac, 0xc8a025a4), TOBN(0x6be946c6, 0xe77ef445),
+     TOBN(0x719946d1, 0xa995e094), TOBN(0x65e848f6, 0xe51e04d8),
+     TOBN(0xe62f3300, 0x6a1e3113), TOBN(0x1541c7c1, 0x501de503),
+     TOBN(0x4daac9fa, 0xf4acfade), TOBN(0x0e585897, 0x44cd0b71),
+     TOBN(0x544fd869, 0x0a51cd77), TOBN(0x60fc20ed, 0x0031016d),
+     TOBN(0x58b404ec, 0xa4276867), TOBN(0x46f6c3cc, 0x34f34993),
+     TOBN(0x477ca007, 0xc636e5bd), TOBN(0x8018f5e5, 0x7c458b47),
+     TOBN(0xa1202270, 0xe47b668f), TOBN(0xcef48ccd, 0xee14f203),
+     TOBN(0x23f98bae, 0x62ff9b4d), TOBN(0x55acc035, 0xc589eddd),
+     TOBN(0x3fe712af, 0x64db4444), TOBN(0x19e9d634, 0xbecdd480),
+     TOBN(0xe08bc047, 0xa930978a), TOBN(0x2dbf24ec, 0xa1280733),
+     TOBN(0x3c0ae38c, 0x2cd706b2), TOBN(0x5b012a5b, 0x359017b9),
+     TOBN(0x3943c38c, 0x72e0f5ae), TOBN(0x786167ea, 0x57176fa3),
+     TOBN(0xe5f9897d, 0x594881dc), TOBN(0x6b5efad8, 0xcfb820c1),
+     TOBN(0xb2179093, 0xd55018de), TOBN(0x39ad7d32, 0x0bac56ce),
+     TOBN(0xb55122e0, 0x2cfc0e81), TOBN(0x117c4661, 0xf6d89daa),
+     TOBN(0x362d01e1, 0xcb64fa09), TOBN(0x6a309b4e, 0x3e9c4ddd),
+     TOBN(0xfa979fb7, 0xabea49b1), TOBN(0xb4b1d27d, 0x10e2c6c5),
+     TOBN(0xbd61c2c4, 0x23afde7a), TOBN(0xeb6614f8, 0x9786d358),
+     TOBN(0x4a5d816b, 0x7f6f7459), TOBN(0xe431a44f, 0x09360e7b),
+     TOBN(0x8c27a032, 0xc309914c), TOBN(0xcea5d68a, 0xcaede3d8),
+     TOBN(0x3668f665, 0x3a0a3f95), TOBN(0x89369416, 0x7ceba27b),
+     TOBN(0x89981fad, 0xe4728fe9), TOBN(0x7102c8a0, 0x8a093562),
+     TOBN(0xbb80310e, 0x235d21c8), TOBN(0x505e55d1, 0xbefb7f7b),
+     TOBN(0xa0a90811, 0x12958a67), TOBN(0xd67e106a, 0x4d851fef),
+     TOBN(0xb84011a9, 0x431dd80e), TOBN(0xeb7c7cca, 0x73306cd9),
+     TOBN(0x20fadd29, 0xd1b3b730), TOBN(0x83858b5b, 0xfe37b3d3),
+     TOBN(0xbf4cd193, 0xb6251d5c), TOBN(0x1cca1fd3, 0x1352d952),
+     TOBN(0xc66157a4, 0x90fbc051), TOBN(0x7990a638, 0x89b98636),}
+    ,
+    {TOBN(0xe5aa692a, 0x87dec0e1), TOBN(0x010ded8d, 0xf7b39d00),
+     TOBN(0x7b1b80c8, 0x54cfa0b5), TOBN(0x66beb876, 0xa0f8ea28),
+     TOBN(0x50d7f531, 0x3476cd0e), TOBN(0xa63d0e65, 0xb08d3949),
+     TOBN(0x1a09eea9, 0x53479fc6), TOBN(0x82ae9891, 0xf499e742),
+     TOBN(0xab58b910, 0x5ca7d866), TOBN(0x582967e2, 0x3adb3b34),
+     TOBN(0x89ae4447, 0xcceac0bc), TOBN(0x919c667c, 0x7bf56af5),
+     TOBN(0x9aec17b1, 0x60f5dcd7), TOBN(0xec697b9f, 0xddcaadbc),
+     TOBN(0x0b98f341, 0x463467f5), TOBN(0xb187f1f7, 0xa967132f),
+     TOBN(0x90fe7a1d, 0x214aeb18), TOBN(0x1506af3c, 0x741432f7),
+     TOBN(0xbb5565f9, 0xe591a0c4), TOBN(0x10d41a77, 0xb44f1bc3),
+     TOBN(0xa09d65e4, 0xa84bde96), TOBN(0x42f060d8, 0xf20a6a1c),
+     TOBN(0x652a3bfd, 0xf27f9ce7), TOBN(0xb6bdb65c, 0x3b3d739f),
+     TOBN(0xeb5ddcb6, 0xec7fae9f), TOBN(0x995f2714, 0xefb66e5a),
+     TOBN(0xdee95d8e, 0x69445d52), TOBN(0x1b6c2d46, 0x09e27620),
+     TOBN(0x32621c31, 0x8129d716), TOBN(0xb03909f1, 0x0958c1aa),
+     TOBN(0x8c468ef9, 0x1af4af63), TOBN(0x162c429f, 0xfba5cdf6),
+     TOBN(0x2f682343, 0x753b9371), TOBN(0x29cab45a, 0x5f1f9cd7),
+     TOBN(0x571623ab, 0xb245db96), TOBN(0xc507db09, 0x3fd79999),
+     TOBN(0x4e2ef652, 0xaf036c32), TOBN(0x86f0cc78, 0x05018e5c),
+     TOBN(0xc10a73d4, 0xab8be350), TOBN(0x6519b397, 0x7e826327),
+     TOBN(0xe8cb5eef, 0x9c053df7), TOBN(0x8de25b37, 0xb300ea6f),
+     TOBN(0xdb03fa92, 0xc849cffb), TOBN(0x242e43a7, 0xe84169bb),
+     TOBN(0xe4fa51f4, 0xdd6f958e), TOBN(0x6925a77f, 0xf4445a8d),
+     TOBN(0xe6e72a50, 0xe90d8949), TOBN(0xc66648e3, 0x2b1f6390),
+     TOBN(0xb2ab1957, 0x173e460c), TOBN(0x1bbbce75, 0x30704590),
+     TOBN(0xc0a90dbd, 0xdb1c7162), TOBN(0x505e399e, 0x15cdd65d),
+     TOBN(0x68434dcb, 0x57797ab7), TOBN(0x60ad35ba, 0x6a2ca8e8),
+     TOBN(0x4bfdb1e0, 0xde3336c1), TOBN(0xbbef99eb, 0xd8b39015),
+     TOBN(0x6c3b96f3, 0x1711ebec), TOBN(0x2da40f1f, 0xce98fdc4),
+     TOBN(0xb99774d3, 0x57b4411f), TOBN(0x87c8bdf4, 0x15b65bb6),
+     TOBN(0xda3a89e3, 0xc2eef12d), TOBN(0xde95bb9b, 0x3c7471f3),
+     TOBN(0x600f225b, 0xd812c594), TOBN(0x54907c5d, 0x2b75a56b),
+     TOBN(0xa93cc5f0, 0x8db60e35), TOBN(0x743e3cd6, 0xfa833319),
+     TOBN(0x7dad5c41, 0xf81683c9), TOBN(0x70c1e7d9, 0x9c34107e),
+     TOBN(0x0edc4a39, 0xa6be0907), TOBN(0x36d47035, 0x86d0b7d3),
+     TOBN(0x8c76da03, 0x272bfa60), TOBN(0x0b4a07ea, 0x0f08a414),
+     TOBN(0x699e4d29, 0x45c1dd53), TOBN(0xcadc5898, 0x231debb5),
+     TOBN(0xdf49fcc7, 0xa77f00e0), TOBN(0x93057bbf, 0xa73e5a0e),
+     TOBN(0x2f8b7ecd, 0x027a4cd1), TOBN(0x114734b3, 0xc614011a),
+     TOBN(0xe7a01db7, 0x67677c68), TOBN(0x89d9be5e, 0x7e273f4f),
+     TOBN(0xd225cb2e, 0x089808ef), TOBN(0xf1f7a27d, 0xd59e4107),
+     TOBN(0x53afc761, 0x8211b9c9), TOBN(0x0361bc67, 0xe6819159),
+     TOBN(0x2a865d0b, 0x7f071426), TOBN(0x6a3c1810, 0xe7072567),
+     TOBN(0x3e3bca1e, 0x0d6bcabd), TOBN(0xa1b02bc1, 0x408591bc),
+     TOBN(0xe0deee59, 0x31fba239), TOBN(0xf47424d3, 0x98bd91d1),
+     TOBN(0x0f8886f4, 0x071a3c1d), TOBN(0x3f7d41e8, 0xa819233b),
+     TOBN(0x708623c2, 0xcf6eb998), TOBN(0x86bb49af, 0x609a287f),
+     TOBN(0x942bb249, 0x63c90762), TOBN(0x0ef6eea5, 0x55a9654b),
+     TOBN(0x5f6d2d72, 0x36f5defe), TOBN(0xfa9922dc, 0x56f99176),
+     TOBN(0x6c8c5ece, 0xf78ce0c7), TOBN(0x7b44589d, 0xbe09b55e),
+     TOBN(0xe11b3bca, 0x9ea83770), TOBN(0xd7fa2c7f, 0x2ab71547),
+     TOBN(0x2a3dd6fa, 0x2a1ddcc0), TOBN(0x09acb430, 0x5a7b7707),
+     TOBN(0x4add4a2e, 0x649d4e57), TOBN(0xcd53a2b0, 0x1917526e),
+     TOBN(0xc5262330, 0x20b44ac4), TOBN(0x4028746a, 0xbaa2c31d),
+     TOBN(0x51318390, 0x64291d4c), TOBN(0xbf48f151, 0xee5ad909),
+     TOBN(0xcce57f59, 0x7b185681), TOBN(0x7c3ac1b0, 0x4854d442),
+     TOBN(0x65587dc3, 0xc093c171), TOBN(0xae7acb24, 0x24f42b65),
+     TOBN(0x5a338adb, 0x955996cb), TOBN(0xc8e65675, 0x6051f91b),
+     TOBN(0x66711fba, 0x28b8d0b1), TOBN(0x15d74137, 0xb6c10a90),
+     TOBN(0x70cdd7eb, 0x3a232a80), TOBN(0xc9e2f07f, 0x6191ed24),
+     TOBN(0xa80d1db6, 0xf79588c0), TOBN(0xfa52fc69, 0xb55768cc),
+     TOBN(0x0b4df1ae, 0x7f54438a), TOBN(0x0cadd1a7, 0xf9b46a4f),
+     TOBN(0xb40ea6b3, 0x1803dd6f), TOBN(0x488e4fa5, 0x55eaae35),
+     TOBN(0x9f047d55, 0x382e4e16), TOBN(0xc9b5b7e0, 0x2f6e0c98),
+     TOBN(0x6b1bd2d3, 0x95762649), TOBN(0xa9604ee7, 0xc7aea3f6),
+     TOBN(0x3646ff27, 0x6dc6f896), TOBN(0x9bf0e7f5, 0x2860bad1),
+     TOBN(0x2d92c821, 0x7cb44b92), TOBN(0xa2f5ce63, 0xaea9c182),
+     TOBN(0xd0a2afb1, 0x9154a5fd), TOBN(0x482e474c, 0x95801da6),
+     TOBN(0xc19972d0, 0xb611c24b), TOBN(0x1d468e65, 0x60a8f351),
+     TOBN(0xeb758069, 0x7bcf6421), TOBN(0xec9dd0ee, 0x88fbc491),
+     TOBN(0x5b59d2bf, 0x956c2e32), TOBN(0x73dc6864, 0xdcddf94e),
+     TOBN(0xfd5e2321, 0xbcee7665), TOBN(0xa7b4f8ef, 0x5e9a06c4),
+     TOBN(0xfba918dd, 0x7280f855), TOBN(0xbbaac260, 0x8baec688),
+     TOBN(0xa3b3f00f, 0x33400f42), TOBN(0x3d2dba29, 0x66f2e6e4),
+     TOBN(0xb6f71a94, 0x98509375), TOBN(0x8f33031f, 0xcea423cc),
+     TOBN(0x009b8dd0, 0x4807e6fb), TOBN(0x5163cfe5, 0x5cdb954c),
+     TOBN(0x03cc8f17, 0xcf41c6e8), TOBN(0xf1f03c2a, 0x037b925c),
+     TOBN(0xc39c19cc, 0x66d2427c), TOBN(0x823d24ba, 0x7b6c18e4),
+     TOBN(0x32ef9013, 0x901f0b4f), TOBN(0x684360f1, 0xf8941c2e),
+     TOBN(0x0ebaff52, 0x2c28092e), TOBN(0x7891e4e3, 0x256c932f),
+     TOBN(0x51264319, 0xac445e3d), TOBN(0x553432e7, 0x8ea74381),
+     TOBN(0xe6eeaa69, 0x67e9c50a), TOBN(0x27ced284, 0x62e628c7),
+     TOBN(0x3f96d375, 0x7a4afa57), TOBN(0xde0a14c3, 0xe484c150),
+     TOBN(0x364a24eb, 0x38bd9923), TOBN(0x1df18da0, 0xe5177422),
+     TOBN(0x174e8f82, 0xd8d38a9b), TOBN(0x2e97c600, 0xe7de1391),
+     TOBN(0xc5709850, 0xa1c175dd), TOBN(0x969041a0, 0x32ae5035),
+     TOBN(0xcbfd533b, 0x76a2086b), TOBN(0xd6bba71b, 0xd7c2e8fe),
+     TOBN(0xb2d58ee6, 0x099dfb67), TOBN(0x3a8b342d, 0x064a85d9),
+     TOBN(0x3bc07649, 0x522f9be3), TOBN(0x690c075b, 0xdf1f49a8),
+     TOBN(0x80e1aee8, 0x3854ec42), TOBN(0x2a7dbf44, 0x17689dc7),
+     TOBN(0xc004fc0e, 0x3faf4078), TOBN(0xb2f02e9e, 0xdf11862c),
+     TOBN(0xf10a5e0f, 0xa0a1b7b3), TOBN(0x30aca623, 0x8936ec80),
+     TOBN(0xf83cbf05, 0x02f40d9a), TOBN(0x4681c468, 0x2c318a4d),
+     TOBN(0x98575618, 0x0e9c2674), TOBN(0xbe79d046, 0x1847092e),
+     TOBN(0xaf1e480a, 0x78bd01e0), TOBN(0x6dd359e4, 0x72a51db9),
+     TOBN(0x62ce3821, 0xe3afbab6), TOBN(0xc5cee5b6, 0x17733199),
+     TOBN(0xe08b30d4, 0x6ffd9fbb), TOBN(0x6e5bc699, 0x36c610b7),
+     TOBN(0xf343cff2, 0x9ce262cf), TOBN(0xca2e4e35, 0x68b914c1),
+     TOBN(0x011d64c0, 0x16de36c5), TOBN(0xe0b10fdd, 0x42e2b829),
+     TOBN(0x78942981, 0x6685aaf8), TOBN(0xe7511708, 0x230ede97),
+     TOBN(0x671ed8fc, 0x3b922bf8), TOBN(0xe4d8c0a0, 0x4c29b133),
+     TOBN(0x87eb1239, 0x3b6e99c4), TOBN(0xaff3974c, 0x8793beba),
+     TOBN(0x03749405, 0x2c18df9b), TOBN(0xc5c3a293, 0x91007139),
+     TOBN(0x6a77234f, 0xe37a0b95), TOBN(0x02c29a21, 0xb661c96b),
+     TOBN(0xc3aaf1d6, 0x141ecf61), TOBN(0x9195509e, 0x3bb22f53),
+     TOBN(0x29597404, 0x22d51357), TOBN(0x1b083822, 0x537bed60),
+     TOBN(0xcd7d6e35, 0xe07289f0), TOBN(0x1f94c48c, 0x6dd86eff),
+     TOBN(0xc8bb1f82, 0xeb0f9cfa), TOBN(0x9ee0b7e6, 0x1b2eb97d),
+     TOBN(0x5a52fe2e, 0x34d74e31), TOBN(0xa352c310, 0x3bf79ab6),
+     TOBN(0x97ff6c5a, 0xabfeeb8f), TOBN(0xbfbe8fef, 0xf5c97305),
+     TOBN(0xd6081ce6, 0xa7904608), TOBN(0x1f812f3a, 0xc4fca249),
+     TOBN(0x9b24bc9a, 0xb9e5e200), TOBN(0x91022c67, 0x38012ee8),
+     TOBN(0xe83d9c5d, 0x30a713a1), TOBN(0x4876e3f0, 0x84ef0f93),
+     TOBN(0xc9777029, 0xc1fbf928), TOBN(0xef7a6bb3, 0xbce7d2a4),
+     TOBN(0xb8067228, 0xdfa2a659), TOBN(0xd5cd3398, 0xd877a48f),
+     TOBN(0xbea4fd8f, 0x025d0f3f), TOBN(0xd67d2e35, 0x2eae7c2b),
+     TOBN(0x184de7d7, 0xcc5f4394), TOBN(0xb5551b5c, 0x4536e142),
+     TOBN(0x2e89b212, 0xd34aa60a), TOBN(0x14a96fea, 0xf50051d5),
+     TOBN(0x4e21ef74, 0x0d12bb0b), TOBN(0xc522f020, 0x60b9677e),
+     TOBN(0x8b12e467, 0x2df7731d), TOBN(0x39f80382, 0x7b326d31),
+     TOBN(0xdfb8630c, 0x39024a94), TOBN(0xaacb96a8, 0x97319452),
+     TOBN(0xd68a3961, 0xeda3867c), TOBN(0x0c58e2b0, 0x77c4ffca),
+     TOBN(0x3d545d63, 0x4da919fa), TOBN(0xef79b69a, 0xf15e2289),
+     TOBN(0x54bc3d3d, 0x808bab10), TOBN(0xc8ab3007, 0x45f82c37),
+     TOBN(0xc12738b6, 0x7c4a658a), TOBN(0xb3c47639, 0x40e72182),
+     TOBN(0x3b77be46, 0x8798e44f), TOBN(0xdc047df2, 0x17a7f85f),
+     TOBN(0x2439d4c5, 0x5e59d92d), TOBN(0xcedca475, 0xe8e64d8d),
+     TOBN(0xa724cd0d, 0x87ca9b16), TOBN(0x35e4fd59, 0xa5540dfe),
+     TOBN(0xf8c1ff18, 0xe4bcf6b1), TOBN(0x856d6285, 0x295018fa),
+     TOBN(0x433f665c, 0x3263c949), TOBN(0xa6a76dd6, 0xa1f21409),
+     TOBN(0x17d32334, 0xcc7b4f79), TOBN(0xa1d03122, 0x06720e4a),
+     TOBN(0xadb6661d, 0x81d9bed5), TOBN(0xf0d6fb02, 0x11db15d1),
+     TOBN(0x7fd11ad5, 0x1fb747d2), TOBN(0xab50f959, 0x3033762b),
+     TOBN(0x2a7e711b, 0xfbefaf5a), TOBN(0xc7393278, 0x3fef2bbf),
+     TOBN(0xe29fa244, 0x0df6f9be), TOBN(0x9092757b, 0x71efd215),
+     TOBN(0xee60e311, 0x4f3d6fd9), TOBN(0x338542d4, 0x0acfb78b),
+     TOBN(0x44a23f08, 0x38961a0f), TOBN(0x1426eade, 0x986987ca),
+     TOBN(0x36e6ee2e, 0x4a863cc6), TOBN(0x48059420, 0x628b8b79),
+     TOBN(0x30303ad8, 0x7396e1de), TOBN(0x5c8bdc48, 0x38c5aad1),
+     TOBN(0x3e40e11f, 0x5c8f5066), TOBN(0xabd6e768, 0x8d246bbd),
+     TOBN(0x68aa40bb, 0x23330a01), TOBN(0xd23f5ee4, 0xc34eafa0),
+     TOBN(0x3bbee315, 0x5de02c21), TOBN(0x18dd4397, 0xd1d8dd06),
+     TOBN(0x3ba1939a, 0x122d7b44), TOBN(0xe6d3b40a, 0xa33870d6),
+     TOBN(0x8e620f70, 0x1c4fe3f8), TOBN(0xf6bba1a5, 0xd3a50cbf),
+     TOBN(0x4a78bde5, 0xcfc0aee0), TOBN(0x847edc46, 0xc08c50bd),
+     TOBN(0xbaa2439c, 0xad63c9b2), TOBN(0xceb4a728, 0x10fc2acb),
+     TOBN(0xa419e40e, 0x26da033d), TOBN(0x6cc3889d, 0x03e02683),
+     TOBN(0x1cd28559, 0xfdccf725), TOBN(0x0fd7e0f1, 0x8d13d208),
+     TOBN(0x01b9733b, 0x1f0df9d4), TOBN(0x8cc2c5f3, 0xa2b5e4f3),
+     TOBN(0x43053bfa, 0x3a304fd4), TOBN(0x8e87665c, 0x0a9f1aa7),
+     TOBN(0x087f29ec, 0xd73dc965), TOBN(0x15ace455, 0x3e9023db),
+     TOBN(0x2370e309, 0x2bce28b4), TOBN(0xf9723442, 0xb6b1e84a),
+     TOBN(0xbeee662e, 0xb72d9f26), TOBN(0xb19396de, 0xf0e47109),
+     TOBN(0x85b1fa73, 0xe13289d0), TOBN(0x436cf77e, 0x54e58e32),
+     TOBN(0x0ec833b3, 0xe990ef77), TOBN(0x7373e3ed, 0x1b11fc25),
+     TOBN(0xbe0eda87, 0x0fc332ce), TOBN(0xced04970, 0x8d7ea856),
+     TOBN(0xf85ff785, 0x7e977ca0), TOBN(0xb66ee8da, 0xdfdd5d2b),
+     TOBN(0xf5e37950, 0x905af461), TOBN(0x587b9090, 0x966d487c),
+     TOBN(0x6a198a1b, 0x32ba0127), TOBN(0xa7720e07, 0x141615ac),
+     TOBN(0xa23f3499, 0x996ef2f2), TOBN(0xef5f64b4, 0x470bcb3d),
+     TOBN(0xa526a962, 0x92b8c559), TOBN(0x0c14aac0, 0x69740a0f),
+     TOBN(0x0d41a9e3, 0xa6bdc0a5), TOBN(0x97d52106, 0x9c48aef4),
+     TOBN(0xcf16bd30, 0x3e7c253b), TOBN(0xcc834b1a, 0x47fdedc1),
+     TOBN(0x7362c6e5, 0x373aab2e), TOBN(0x264ed85e, 0xc5f590ff),
+     TOBN(0x7a46d9c0, 0x66d41870), TOBN(0xa50c20b1, 0x4787ba09),
+     TOBN(0x185e7e51, 0xe3d44635), TOBN(0xb3b3e080, 0x31e2d8dc),
+     TOBN(0xbed1e558, 0xa179e9d9), TOBN(0x2daa3f79, 0x74a76781),
+     TOBN(0x4372baf2, 0x3a40864f), TOBN(0x46900c54, 0x4fe75cb5),
+     TOBN(0xb95f171e, 0xf76765d0), TOBN(0x4ad726d2, 0x95c87502),
+     TOBN(0x2ec769da, 0x4d7c99bd), TOBN(0x5e2ddd19, 0xc36cdfa8),
+     TOBN(0xc22117fc, 0xa93e6dea), TOBN(0xe8a2583b, 0x93771123),
+     TOBN(0xbe2f6089, 0xfa08a3a2), TOBN(0x4809d5ed, 0x8f0e1112),
+     TOBN(0x3b414aa3, 0xda7a095e), TOBN(0x9049acf1, 0x26f5aadd),
+     TOBN(0x78d46a4d, 0x6be8b84a), TOBN(0xd66b1963, 0xb732b9b3),
+     TOBN(0x5c2ac2a0, 0xde6e9555), TOBN(0xcf52d098, 0xb5bd8770),
+     TOBN(0x15a15fa6, 0x0fd28921), TOBN(0x56ccb81e, 0x8b27536d),
+     TOBN(0x0f0d8ab8, 0x9f4ccbb8), TOBN(0xed5f44d2, 0xdb221729),
+     TOBN(0x43141988, 0x00bed10c), TOBN(0xc94348a4, 0x1d735b8b),
+     TOBN(0x79f3e9c4, 0x29ef8479), TOBN(0x4c13a4e3, 0x614c693f),
+     TOBN(0x32c9af56, 0x8e143a14), TOBN(0xbc517799, 0xe29ac5c4),
+     TOBN(0x05e17992, 0x2774856f), TOBN(0x6e52fb05, 0x6c1bf55f),
+     TOBN(0xaeda4225, 0xe4f19e16), TOBN(0x70f4728a, 0xaf5ccb26),
+     TOBN(0x5d2118d1, 0xb2947f22), TOBN(0xc827ea16, 0x281d6fb9),
+     TOBN(0x8412328d, 0x8cf0eabd), TOBN(0x45ee9fb2, 0x03ef9dcf),
+     TOBN(0x8e700421, 0xbb937d63), TOBN(0xdf8ff2d5, 0xcc4b37a6),
+     TOBN(0xa4c0d5b2, 0x5ced7b68), TOBN(0x6537c1ef, 0xc7308f59),
+     TOBN(0x25ce6a26, 0x3b37f8e8), TOBN(0x170e9a9b, 0xdeebc6ce),
+     TOBN(0xdd037952, 0x8728d72c), TOBN(0x445b0e55, 0x850154bc),
+     TOBN(0x4b7d0e06, 0x83a7337b), TOBN(0x1e3416d4, 0xffecf249),
+     TOBN(0x24840eff, 0x66a2b71f), TOBN(0xd0d9a50a, 0xb37cc26d),
+     TOBN(0xe2198150, 0x6fe28ef7), TOBN(0x3cc5ef16, 0x23324c7f),
+     TOBN(0x220f3455, 0x769b5263), TOBN(0xe2ade2f1, 0xa10bf475),
+     TOBN(0x28cd20fa, 0x458d3671), TOBN(0x1549722c, 0x2dc4847b),
+     TOBN(0x6dd01e55, 0x591941e3), TOBN(0x0e6fbcea, 0x27128ccb),
+     TOBN(0xae1a1e6b, 0x3bef0262), TOBN(0xfa8c472c, 0x8f54e103),
+     TOBN(0x7539c0a8, 0x72c052ec), TOBN(0xd7b27369, 0x5a3490e9),
+     TOBN(0x143fe1f1, 0x71684349), TOBN(0x36b4722e, 0x32e19b97),
+     TOBN(0xdc059227, 0x90980aff), TOBN(0x175c9c88, 0x9e13d674),
+     TOBN(0xa7de5b22, 0x6e6bfdb1), TOBN(0x5ea5b7b2, 0xbedb4b46),
+     TOBN(0xd5570191, 0xd34a6e44), TOBN(0xfcf60d2e, 0xa24ff7e6),
+     TOBN(0x614a392d, 0x677819e1), TOBN(0x7be74c7e, 0xaa5a29e8),
+     TOBN(0xab50fece, 0x63c85f3f), TOBN(0xaca2e2a9, 0x46cab337),
+     TOBN(0x7f700388, 0x122a6fe3), TOBN(0xdb69f703, 0x882a04a8),
+     TOBN(0x9a77935d, 0xcf7aed57), TOBN(0xdf16207c, 0x8d91c86f),
+     TOBN(0x2fca49ab, 0x63ed9998), TOBN(0xa3125c44, 0xa77ddf96),
+     TOBN(0x05dd8a86, 0x24344072), TOBN(0xa023dda2, 0xfec3fb56),
+     TOBN(0x421b41fc, 0x0c743032), TOBN(0x4f2120c1, 0x5e438639),
+     TOBN(0xfb7cae51, 0xc83c1b07), TOBN(0xb2370caa, 0xcac2171a),
+     TOBN(0x2eb2d962, 0x6cc820fb), TOBN(0x59feee5c, 0xb85a44bf),
+     TOBN(0x94620fca, 0x5b6598f0), TOBN(0x6b922cae, 0x7e314051),
+     TOBN(0xff8745ad, 0x106bed4e), TOBN(0x546e71f5, 0xdfa1e9ab),
+     TOBN(0x935c1e48, 0x1ec29487), TOBN(0x9509216c, 0x4d936530),
+     TOBN(0xc7ca3067, 0x85c9a2db), TOBN(0xd6ae5152, 0x6be8606f),
+     TOBN(0x09dbcae6, 0xe14c651d), TOBN(0xc9536e23, 0x9bc32f96),
+     TOBN(0xa90535a9, 0x34521b03), TOBN(0xf39c526c, 0x878756ff),
+     TOBN(0x383172ec, 0x8aedf03c), TOBN(0x20a8075e, 0xefe0c034),
+     TOBN(0xf22f9c62, 0x64026422), TOBN(0x8dd10780, 0x24b9d076),
+     TOBN(0x944c742a, 0x3bef2950), TOBN(0x55b9502e, 0x88a2b00b),
+     TOBN(0xa59e14b4, 0x86a09817), TOBN(0xa39dd3ac, 0x47bb4071),
+     TOBN(0x55137f66, 0x3be0592f), TOBN(0x07fcafd4, 0xc9e63f5b),
+     TOBN(0x963652ee, 0x346eb226), TOBN(0x7dfab085, 0xec2facb7),
+     TOBN(0x273bf2b8, 0x691add26), TOBN(0x30d74540, 0xf2b46c44),
+     TOBN(0x05e8e73e, 0xf2c2d065), TOBN(0xff9b8a00, 0xd42eeac9),
+     TOBN(0x2fcbd205, 0x97209d22), TOBN(0xeb740ffa, 0xde14ea2c),
+     TOBN(0xc71ff913, 0xa8aef518), TOBN(0x7bfc74bb, 0xfff4cfa2),
+     TOBN(0x1716680c, 0xb6b36048), TOBN(0x121b2cce, 0x9ef79af1),
+     TOBN(0xbff3c836, 0xa01eb3d3), TOBN(0x50eb1c6a, 0x5f79077b),
+     TOBN(0xa48c32d6, 0xa004bbcf), TOBN(0x47a59316, 0x7d64f61d),
+     TOBN(0x6068147f, 0x93102016), TOBN(0x12c5f654, 0x94d12576),
+     TOBN(0xefb071a7, 0xc9bc6b91), TOBN(0x7c2da0c5, 0x6e23ea95),
+     TOBN(0xf4fd45b6, 0xd4a1dd5d), TOBN(0x3e7ad9b6, 0x9122b13c),
+     TOBN(0x342ca118, 0xe6f57a48), TOBN(0x1c2e94a7, 0x06f8288f),
+     TOBN(0x99e68f07, 0x5a97d231), TOBN(0x7c80de97, 0x4d838758),
+     TOBN(0xbce0f5d0, 0x05872727), TOBN(0xbe5d95c2, 0x19c4d016),
+     TOBN(0x921d5cb1, 0x9c2492ee), TOBN(0x42192dc1, 0x404d6fb3),
+     TOBN(0x4c84dcd1, 0x32f988d3), TOBN(0xde26d61f, 0xa17b8e85),
+     TOBN(0xc466dcb6, 0x137c7408), TOBN(0x9a38d7b6, 0x36a266da),
+     TOBN(0x7ef5cb06, 0x83bebf1b), TOBN(0xe5cdcbbf, 0x0fd014e3),
+     TOBN(0x30aa376d, 0xf65965a0), TOBN(0x60fe88c2, 0xebb3e95e),
+     TOBN(0x33fd0b61, 0x66ee6f20), TOBN(0x8827dcdb, 0x3f41f0a0),
+     TOBN(0xbf8a9d24, 0x0c56c690), TOBN(0x40265dad, 0xddb7641d),
+     TOBN(0x522b05bf, 0x3a6b662b), TOBN(0x466d1dfe, 0xb1478c9b),
+     TOBN(0xaa616962, 0x1484469b), TOBN(0x0db60549, 0x02df8f9f),
+     TOBN(0xc37bca02, 0x3cb8bf51), TOBN(0x5effe346, 0x21371ce8),
+     TOBN(0xe8f65264, 0xff112c32), TOBN(0x8a9c736d, 0x7b971fb2),
+     TOBN(0xa4f19470, 0x7b75080d), TOBN(0xfc3f2c5a, 0x8839c59b),
+     TOBN(0x1d6c777e, 0x5aeb49c2), TOBN(0xf3db034d, 0xda1addfe),
+     TOBN(0xd76fee5a, 0x5535affc), TOBN(0x0853ac70, 0xb92251fd),
+     TOBN(0x37e3d594, 0x8b2a29d5), TOBN(0x28f1f457, 0x4de00ddb),
+     TOBN(0x8083c1b5, 0xf42c328b), TOBN(0xd8ef1d8f, 0xe493c73b),
+     TOBN(0x96fb6260, 0x41dc61bd), TOBN(0xf74e8a9d, 0x27ee2f8a),
+     TOBN(0x7c605a80, 0x2c946a5d), TOBN(0xeed48d65, 0x3839ccfd),
+     TOBN(0x9894344f, 0x3a29467a), TOBN(0xde81e949, 0xc51eba6d),
+     TOBN(0xdaea066b, 0xa5e5c2f2), TOBN(0x3fc8a614, 0x08c8c7b3),
+     TOBN(0x7adff88f, 0x06d0de9f), TOBN(0xbbc11cf5, 0x3b75ce0a),
+     TOBN(0x9fbb7acc, 0xfbbc87d5), TOBN(0xa1458e26, 0x7badfde2)}
+    ,
+    {TOBN(0x1cb43668, 0xe039c256), TOBN(0x5f26fb8b, 0x7c17fd5d),
+     TOBN(0xeee426af, 0x79aa062b), TOBN(0x072002d0, 0xd78fbf04),
+     TOBN(0x4c9ca237, 0xe84fb7e3), TOBN(0xb401d8a1, 0x0c82133d),
+     TOBN(0xaaa52592, 0x6d7e4181), TOBN(0xe9430833, 0x73dbb152),
+     TOBN(0xf92dda31, 0xbe24319a), TOBN(0x03f7d28b, 0xe095a8e7),
+     TOBN(0xa52fe840, 0x98782185), TOBN(0x276ddafe, 0x29c24dbc),
+     TOBN(0x80cd5496, 0x1d7a64eb), TOBN(0xe4360889, 0x7f1dbe42),
+     TOBN(0x2f81a877, 0x8438d2d5), TOBN(0x7e4d52a8, 0x85169036),
+     TOBN(0x19e3d5b1, 0x1d59715d), TOBN(0xc7eaa762, 0xd788983e),
+     TOBN(0xe5a730b0, 0xabf1f248), TOBN(0xfbab8084, 0xfae3fd83),
+     TOBN(0x65e50d21, 0x53765b2f), TOBN(0xbdd4e083, 0xfa127f3d),
+     TOBN(0x9cf3c074, 0x397b1b10), TOBN(0x59f8090c, 0xb1b59fd3),
+     TOBN(0x7b15fd9d, 0x615faa8f), TOBN(0x8fa1eb40, 0x968554ed),
+     TOBN(0x7bb4447e, 0x7aa44882), TOBN(0x2bb2d0d1, 0x029fff32),
+     TOBN(0x075e2a64, 0x6caa6d2f), TOBN(0x8eb879de, 0x22e7351b),
+     TOBN(0xbcd5624e, 0x9a506c62), TOBN(0x218eaef0, 0xa87e24dc),
+     TOBN(0x37e56847, 0x44ddfa35), TOBN(0x9ccfc5c5, 0xdab3f747),
+     TOBN(0x9ac1df3f, 0x1ee96cf4), TOBN(0x0c0571a1, 0x3b480b8f),
+     TOBN(0x2fbeb3d5, 0x4b3a7b3c), TOBN(0x35c03669, 0x5dcdbb99),
+     TOBN(0x52a0f5dc, 0xb2415b3a), TOBN(0xd57759b4, 0x4413ed9a),
+     TOBN(0x1fe647d8, 0x3d30a2c5), TOBN(0x0857f77e, 0xf78a81dc),
+     TOBN(0x11d5a334, 0x131a4a9b), TOBN(0xc0a94af9, 0x29d393f5),
+     TOBN(0xbc3a5c0b, 0xdaa6ec1a), TOBN(0xba9fe493, 0x88d2d7ed),
+     TOBN(0xbb4335b4, 0xbb614797), TOBN(0x991c4d68, 0x72f83533),
+     TOBN(0x53258c28, 0xd2f01cb3), TOBN(0x93d6eaa3, 0xd75db0b1),
+     TOBN(0x419a2b0d, 0xe87d0db4), TOBN(0xa1e48f03, 0xd8fe8493),
+     TOBN(0xf747faf6, 0xc508b23a), TOBN(0xf137571a, 0x35d53549),
+     TOBN(0x9f5e58e2, 0xfcf9b838), TOBN(0xc7186cee, 0xa7fd3cf5),
+     TOBN(0x77b868ce, 0xe978a1d3), TOBN(0xe3a68b33, 0x7ab92d04),
+     TOBN(0x51029794, 0x87a5b862), TOBN(0x5f0606c3, 0x3a61d41d),
+     TOBN(0x2814be27, 0x6f9326f1), TOBN(0x2f521c14, 0xc6fe3c2e),
+     TOBN(0x17464d7d, 0xacdf7351), TOBN(0x10f5f9d3, 0x777f7e44),
+     TOBN(0xce8e616b, 0x269fb37d), TOBN(0xaaf73804, 0x7de62de5),
+     TOBN(0xaba11175, 0x4fdd4153), TOBN(0x515759ba, 0x3770b49b),
+     TOBN(0x8b09ebf8, 0xaa423a61), TOBN(0x592245a1, 0xcd41fb92),
+     TOBN(0x1cba8ec1, 0x9b4c8936), TOBN(0xa87e91e3, 0xaf36710e),
+     TOBN(0x1fd84ce4, 0x3d34a2e3), TOBN(0xee3759ce, 0xb43b5d61),
+     TOBN(0x895bc78c, 0x619186c7), TOBN(0xf19c3809, 0xcbb9725a),
+     TOBN(0xc0be21aa, 0xde744b1f), TOBN(0xa7d222b0, 0x60f8056b),
+     TOBN(0x74be6157, 0xb23efe11), TOBN(0x6fab2b4f, 0x0cd68253),
+     TOBN(0xad33ea5f, 0x4bf1d725), TOBN(0x9c1d8ee2, 0x4f6c950f),
+     TOBN(0x544ee78a, 0xa377af06), TOBN(0x54f489bb, 0x94a113e1),
+     TOBN(0x8f11d634, 0x992fb7e8), TOBN(0x0169a7aa, 0xa2a44347),
+     TOBN(0x1d49d4af, 0x95020e00), TOBN(0x95945722, 0xe08e120b),
+     TOBN(0xb6e33878, 0xa4d32282), TOBN(0xe36e029d, 0x48020ae7),
+     TOBN(0xe05847fb, 0x37a9b750), TOBN(0xf876812c, 0xb29e3819),
+     TOBN(0x84ad138e, 0xd23a17f0), TOBN(0x6d7b4480, 0xf0b3950e),
+     TOBN(0xdfa8aef4, 0x2fd67ae0), TOBN(0x8d3eea24, 0x52333af6),
+     TOBN(0x0d052075, 0xb15d5acc), TOBN(0xc6d9c79f, 0xbd815bc4),
+     TOBN(0x8dcafd88, 0xdfa36cf2), TOBN(0x908ccbe2, 0x38aa9070),
+     TOBN(0x638722c4, 0xba35afce), TOBN(0x5a3da8b0, 0xfd6abf0b),
+     TOBN(0x2dce252c, 0xc9c335c1), TOBN(0x84e7f0de, 0x65aa799b),
+     TOBN(0x2101a522, 0xb99a72cb), TOBN(0x06de6e67, 0x87618016),
+     TOBN(0x5ff8c7cd, 0xe6f3653e), TOBN(0x0a821ab5, 0xc7a6754a),
+     TOBN(0x7e3fa52b, 0x7cb0b5a2), TOBN(0xa7fb121c, 0xc9048790),
+     TOBN(0x1a725020, 0x06ce053a), TOBN(0xb490a31f, 0x04e929b0),
+     TOBN(0xe17be47d, 0x62dd61ad), TOBN(0x781a961c, 0x6be01371),
+     TOBN(0x1063bfd3, 0xdae3cbba), TOBN(0x35647406, 0x7f73c9ba),
+     TOBN(0xf50e957b, 0x2736a129), TOBN(0xa6313702, 0xed13f256),
+     TOBN(0x9436ee65, 0x3a19fcc5), TOBN(0xcf2bdb29, 0xe7a4c8b6),
+     TOBN(0xb06b1244, 0xc5f95cd8), TOBN(0xda8c8af0, 0xf4ab95f4),
+     TOBN(0x1bae59c2, 0xb9e5836d), TOBN(0x07d51e7e, 0x3acffffc),
+     TOBN(0x01e15e6a, 0xc2ccbcda), TOBN(0x3bc1923f, 0x8528c3e0),
+     TOBN(0x43324577, 0xa49fead4), TOBN(0x61a1b884, 0x2aa7a711),
+     TOBN(0xf9a86e08, 0x700230ef), TOBN(0x0af585a1, 0xbd19adf8),
+     TOBN(0x7645f361, 0xf55ad8f2), TOBN(0x6e676223, 0x46c3614c),
+     TOBN(0x23cb257c, 0x4e774d3f), TOBN(0x82a38513, 0xac102d1b),
+     TOBN(0x9bcddd88, 0x7b126aa5), TOBN(0xe716998b, 0xeefd3ee4),
+     TOBN(0x4239d571, 0xfb167583), TOBN(0xdd011c78, 0xd16c8f8a),
+     TOBN(0x271c2895, 0x69a27519), TOBN(0x9ce0a3b7, 0xd2d64b6a),
+     TOBN(0x8c977289, 0xd5ec6738), TOBN(0xa3b49f9a, 0x8840ef6b),
+     TOBN(0x808c14c9, 0x9a453419), TOBN(0x5c00295b, 0x0cf0a2d5),
+     TOBN(0x524414fb, 0x1d4bcc76), TOBN(0xb07691d2, 0x459a88f1),
+     TOBN(0x77f43263, 0xf70d110f), TOBN(0x64ada5e0, 0xb7abf9f3),
+     TOBN(0xafd0f94e, 0x5b544cf5), TOBN(0xb4a13a15, 0xfd2713fe),
+     TOBN(0xb99b7d6e, 0x250c74f4), TOBN(0x097f2f73, 0x20324e45),
+     TOBN(0x994b37d8, 0xaffa8208), TOBN(0xc3c31b0b, 0xdc29aafc),
+     TOBN(0x3da74651, 0x7a3a607f), TOBN(0xd8e1b8c1, 0xfe6955d6),
+     TOBN(0x716e1815, 0xc8418682), TOBN(0x541d487f, 0x7dc91d97),
+     TOBN(0x48a04669, 0xc6996982), TOBN(0xf39cab15, 0x83a6502e),
+     TOBN(0x025801a0, 0xe68db055), TOBN(0xf3569758, 0xba3338d5),
+     TOBN(0xb0c8c0aa, 0xee2afa84), TOBN(0x4f6985d3, 0xfb6562d1),
+     TOBN(0x351f1f15, 0x132ed17a), TOBN(0x510ed0b4, 0xc04365fe),
+     TOBN(0xa3f98138, 0xe5b1f066), TOBN(0xbc9d95d6, 0x32df03dc),
+     TOBN(0xa83ccf6e, 0x19abd09e), TOBN(0x0b4097c1, 0x4ff17edb),
+     TOBN(0x58a5c478, 0xd64a06ce), TOBN(0x2ddcc3fd, 0x544a58fd),
+     TOBN(0xd449503d, 0x9e8153b8), TOBN(0x3324fd02, 0x7774179b),
+     TOBN(0xaf5d47c8, 0xdbd9120c), TOBN(0xeb860162, 0x34fa94db),
+     TOBN(0x5817bdd1, 0x972f07f4), TOBN(0xe5579e2e, 0xd27bbceb),
+     TOBN(0x86847a1f, 0x5f11e5a6), TOBN(0xb39ed255, 0x7c3cf048),
+     TOBN(0xe1076417, 0xa2f62e55), TOBN(0x6b9ab38f, 0x1bcf82a2),
+     TOBN(0x4bb7c319, 0x7aeb29f9), TOBN(0xf6d17da3, 0x17227a46),
+     TOBN(0xab53ddbd, 0x0f968c00), TOBN(0xa03da7ec, 0x000c880b),
+     TOBN(0x7b239624, 0x6a9ad24d), TOBN(0x612c0401, 0x01ec60d0),
+     TOBN(0x70d10493, 0x109f5df1), TOBN(0xfbda4030, 0x80af7550),
+     TOBN(0x30b93f95, 0xc6b9a9b3), TOBN(0x0c74ec71, 0x007d9418),
+     TOBN(0x94175564, 0x6edb951f), TOBN(0x5f4a9d78, 0x7f22c282),
+     TOBN(0xb7870895, 0xb38d1196), TOBN(0xbc593df3, 0xa228ce7c),
+     TOBN(0xc78c5bd4, 0x6af3641a), TOBN(0x7802200b, 0x3d9b3dcc),
+     TOBN(0x0dc73f32, 0x8be33304), TOBN(0x847ed87d, 0x61ffb79a),
+     TOBN(0xf85c974e, 0x6d671192), TOBN(0x1e14100a, 0xde16f60f),
+     TOBN(0x45cb0d5a, 0x95c38797), TOBN(0x18923bba, 0x9b022da4),
+     TOBN(0xef2be899, 0xbbe7e86e), TOBN(0x4a1510ee, 0x216067bf),
+     TOBN(0xd98c8154, 0x84d5ce3e), TOBN(0x1af777f0, 0xf92a2b90),
+     TOBN(0x9fbcb400, 0x4ef65724), TOBN(0x3e04a4c9, 0x3c0ca6fe),
+     TOBN(0xfb3e2cb5, 0x55002994), TOBN(0x1f3a93c5, 0x5363ecab),
+     TOBN(0x1fe00efe, 0x3923555b), TOBN(0x744bedd9, 0x1e1751ea),
+     TOBN(0x3fb2db59, 0x6ab69357), TOBN(0x8dbd7365, 0xf5e6618b),
+     TOBN(0x99d53099, 0xdf1ea40e), TOBN(0xb3f24a0b, 0x57d61e64),
+     TOBN(0xd088a198, 0x596eb812), TOBN(0x22c8361b, 0x5762940b),
+     TOBN(0x66f01f97, 0xf9c0d95c), TOBN(0x88461172, 0x8e43cdae),
+     TOBN(0x11599a7f, 0xb72b15c3), TOBN(0x135a7536, 0x420d95cc),
+     TOBN(0x2dcdf0f7, 0x5f7ae2f6), TOBN(0x15fc6e1d, 0xd7fa6da2),
+     TOBN(0x81ca829a, 0xd1d441b6), TOBN(0x84c10cf8, 0x04a106b6),
+     TOBN(0xa9b26c95, 0xa73fbbd0), TOBN(0x7f24e0cb, 0x4d8f6ee8),
+     TOBN(0x48b45937, 0x1e25a043), TOBN(0xf8a74fca, 0x036f3dfe),
+     TOBN(0x1ed46585, 0xc9f84296), TOBN(0x7fbaa8fb, 0x3bc278b0),
+     TOBN(0xa8e96cd4, 0x6c4fcbd0), TOBN(0x940a1202, 0x73b60a5f),
+     TOBN(0x34aae120, 0x55a4aec8), TOBN(0x550e9a74, 0xdbd742f0),
+     TOBN(0x794456d7, 0x228c68ab), TOBN(0x492f8868, 0xa4e25ec6),
+     TOBN(0x682915ad, 0xb2d8f398), TOBN(0xf13b51cc, 0x5b84c953),
+     TOBN(0xcda90ab8, 0x5bb917d6), TOBN(0x4b615560, 0x4ea3dee1),
+     TOBN(0x578b4e85, 0x0a52c1c8), TOBN(0xeab1a695, 0x20b75fc4),
+     TOBN(0x60c14f3c, 0xaa0bb3c6), TOBN(0x220f448a, 0xb8216094),
+     TOBN(0x4fe7ee31, 0xb0e63d34), TOBN(0xf4600572, 0xa9e54fab),
+     TOBN(0xc0493334, 0xd5e7b5a4), TOBN(0x8589fb92, 0x06d54831),
+     TOBN(0xaa70f5cc, 0x6583553a), TOBN(0x0879094a, 0xe25649e5),
+     TOBN(0xcc904507, 0x10044652), TOBN(0xebb0696d, 0x02541c4f),
+     TOBN(0x5a171fde, 0xb9718710), TOBN(0x38f1bed8, 0xf374a9f5),
+     TOBN(0xc8c582e1, 0xba39bdc1), TOBN(0xfc457b0a, 0x908cc0ce),
+     TOBN(0x9a187fd4, 0x883841e2), TOBN(0x8ec25b39, 0x38725381),
+     TOBN(0x2553ed05, 0x96f84395), TOBN(0x095c7661, 0x6f6c6897),
+     TOBN(0x917ac85c, 0x4bdc5610), TOBN(0xb2885fe4, 0x179eb301),
+     TOBN(0x5fc65547, 0x8b78bdcc), TOBN(0x4a9fc893, 0xe59e4699),
+     TOBN(0xbb7ff0cd, 0x3ce299af), TOBN(0x195be9b3, 0xadf38b20),
+     TOBN(0x6a929c87, 0xd38ddb8f), TOBN(0x55fcc99c, 0xb21a51b9),
+     TOBN(0x2b695b4c, 0x721a4593), TOBN(0xed1e9a15, 0x768eaac2),
+     TOBN(0xfb63d71c, 0x7489f914), TOBN(0xf98ba31c, 0x78118910),
+     TOBN(0x80291373, 0x9b128eb4), TOBN(0x7801214e, 0xd448af4a),
+     TOBN(0xdbd2e22b, 0x55418dd3), TOBN(0xeffb3c0d, 0xd3998242),
+     TOBN(0xdfa6077c, 0xc7bf3827), TOBN(0xf2165bcb, 0x47f8238f),
+     TOBN(0xfe37cf68, 0x8564d554), TOBN(0xe5f825c4, 0x0a81fb98),
+     TOBN(0x43cc4f67, 0xffed4d6f), TOBN(0xbc609578, 0xb50a34b0),
+     TOBN(0x8aa8fcf9, 0x5041faf1), TOBN(0x5659f053, 0x651773b6),
+     TOBN(0xe87582c3, 0x6044d63b), TOBN(0xa6089409, 0x0cdb0ca0),
+     TOBN(0x8c993e0f, 0xbfb2bcf6), TOBN(0xfc64a719, 0x45985cfc),
+     TOBN(0x15c4da80, 0x83dbedba), TOBN(0x804ae112, 0x2be67df7),
+     TOBN(0xda4c9658, 0xa23defde), TOBN(0x12002ddd, 0x5156e0d3),
+     TOBN(0xe68eae89, 0x5dd21b96), TOBN(0x8b99f28b, 0xcf44624d),
+     TOBN(0x0ae00808, 0x1ec8897a), TOBN(0xdd0a9303, 0x6712f76e),
+     TOBN(0x96237522, 0x4e233de4), TOBN(0x192445b1, 0x2b36a8a5),
+     TOBN(0xabf9ff74, 0x023993d9), TOBN(0x21f37bf4, 0x2aad4a8f),
+     TOBN(0x340a4349, 0xf8bd2bbd), TOBN(0x1d902cd9, 0x4868195d),
+     TOBN(0x3d27bbf1, 0xe5fdb6f1), TOBN(0x7a5ab088, 0x124f9f1c),
+     TOBN(0xc466ab06, 0xf7a09e03), TOBN(0x2f8a1977, 0x31f2c123),
+     TOBN(0xda355dc7, 0x041b6657), TOBN(0xcb840d12, 0x8ece2a7c),
+     TOBN(0xb600ad9f, 0x7db32675), TOBN(0x78fea133, 0x07a06f1b),
+     TOBN(0x5d032269, 0xb31f6094), TOBN(0x07753ef5, 0x83ec37aa),
+     TOBN(0x03485aed, 0x9c0bea78), TOBN(0x41bb3989, 0xbc3f4524),
+     TOBN(0x09403761, 0x697f726d), TOBN(0x6109beb3, 0xdf394820),
+     TOBN(0x804111ea, 0x3b6d1145), TOBN(0xb6271ea9, 0xa8582654),
+     TOBN(0x619615e6, 0x24e66562), TOBN(0xa2554945, 0xd7b6ad9c),
+     TOBN(0xd9c4985e, 0x99bfe35f), TOBN(0x9770ccc0, 0x7b51cdf6),
+     TOBN(0x7c327013, 0x92881832), TOBN(0x8777d45f, 0x286b26d1),
+     TOBN(0x9bbeda22, 0xd847999d), TOBN(0x03aa33b6, 0xc3525d32),
+     TOBN(0x4b7b96d4, 0x28a959a1), TOBN(0xbb3786e5, 0x31e5d234),
+     TOBN(0xaeb5d3ce, 0x6961f247), TOBN(0x20aa85af, 0x02f93d3f),
+     TOBN(0x9cd1ad3d, 0xd7a7ae4f), TOBN(0xbf6688f0, 0x781adaa8),
+     TOBN(0xb1b40e86, 0x7469cead), TOBN(0x1904c524, 0x309fca48),
+     TOBN(0x9b7312af, 0x4b54bbc7), TOBN(0xbe24bf8f, 0x593affa2),
+     TOBN(0xbe5e0790, 0xbd98764b), TOBN(0xa0f45f17, 0xa26e299e),
+     TOBN(0x4af0d2c2, 0x6b8fe4c7), TOBN(0xef170db1, 0x8ae8a3e6),
+     TOBN(0x0e8d61a0, 0x29e0ccc1), TOBN(0xcd53e87e, 0x60ad36ca),
+     TOBN(0x328c6623, 0xc8173822), TOBN(0x7ee1767d, 0xa496be55),
+     TOBN(0x89f13259, 0x648945af), TOBN(0x9e45a5fd, 0x25c8009c),
+     TOBN(0xaf2febd9, 0x1f61ab8c), TOBN(0x43f6bc86, 0x8a275385),
+     TOBN(0x87792348, 0xf2142e79), TOBN(0x17d89259, 0xc6e6238a),
+     TOBN(0x7536d2f6, 0x4a839d9b), TOBN(0x1f428fce, 0x76a1fbdc),
+     TOBN(0x1c109601, 0x0db06dfe), TOBN(0xbfc16bc1, 0x50a3a3cc),
+     TOBN(0xf9cbd9ec, 0x9b30f41b), TOBN(0x5b5da0d6, 0x00138cce),
+     TOBN(0xec1d0a48, 0x56ef96a7), TOBN(0xb47eb848, 0x982bf842),
+     TOBN(0x66deae32, 0xec3f700d), TOBN(0x4e43c42c, 0xaa1181e0),
+     TOBN(0xa1d72a31, 0xd1a4aa2a), TOBN(0x440d4668, 0xc004f3ce),
+     TOBN(0x0d6a2d3b, 0x45fe8a7a), TOBN(0x820e52e2, 0xfb128365),
+     TOBN(0x29ac5fcf, 0x25e51b09), TOBN(0x180cd2bf, 0x2023d159),
+     TOBN(0xa9892171, 0xa1ebf90e), TOBN(0xf97c4c87, 0x7c132181),
+     TOBN(0x9f1dc724, 0xc03dbb7e), TOBN(0xae043765, 0x018cbbe4),
+     TOBN(0xfb0b2a36, 0x0767d153), TOBN(0xa8e2f4d6, 0x249cbaeb),
+     TOBN(0x172a5247, 0xd95ea168), TOBN(0x1758fada, 0x2970764a),
+     TOBN(0xac803a51, 0x1d978169), TOBN(0x299cfe2e, 0xde77e01b),
+     TOBN(0x652a1e17, 0xb0a98927), TOBN(0x2e26e1d1, 0x20014495),
+     TOBN(0x7ae0af9f, 0x7175b56a), TOBN(0xc2e22a80, 0xd64b9f95),
+     TOBN(0x4d0ff9fb, 0xd90a060a), TOBN(0x496a27db, 0xbaf38085),
+     TOBN(0x32305401, 0xda776bcf), TOBN(0xb8cdcef6, 0x725f209e),
+     TOBN(0x61ba0f37, 0x436a0bba), TOBN(0x263fa108, 0x76860049),
+     TOBN(0x92beb98e, 0xda3542cf), TOBN(0xa2d4d14a, 0xd5849538),
+     TOBN(0x989b9d68, 0x12e9a1bc), TOBN(0x61d9075c, 0x5f6e3268),
+     TOBN(0x352c6aa9, 0x99ace638), TOBN(0xde4e4a55, 0x920f43ff),
+     TOBN(0xe5e4144a, 0xd673c017), TOBN(0x667417ae, 0x6f6e05ea),
+     TOBN(0x613416ae, 0xdcd1bd56), TOBN(0x5eb36201, 0x86693711),
+     TOBN(0x2d7bc504, 0x3a1aa914), TOBN(0x175a1299, 0x76dc5975),
+     TOBN(0xe900e0f2, 0x3fc8125c), TOBN(0x569ef68c, 0x11198875),
+     TOBN(0x9012db63, 0x63a113b4), TOBN(0xe3bd3f56, 0x98835766),
+     TOBN(0xa5c94a52, 0x76412dea), TOBN(0xad9e2a09, 0xaa735e5c),
+     TOBN(0x405a984c, 0x508b65e9), TOBN(0xbde4a1d1, 0x6df1a0d1),
+     TOBN(0x1a9433a1, 0xdfba80da), TOBN(0xe9192ff9, 0x9440ad2e),
+     TOBN(0x9f649696, 0x5099fe92), TOBN(0x25ddb65c, 0x0b27a54a),
+     TOBN(0x178279dd, 0xc590da61), TOBN(0x5479a999, 0xfbde681a),
+     TOBN(0xd0e84e05, 0x013fe162), TOBN(0xbe11dc92, 0x632d471b),
+     TOBN(0xdf0b0c45, 0xfc0e089f), TOBN(0x04fb15b0, 0x4c144025),
+     TOBN(0xa61d5fc2, 0x13c99927), TOBN(0xa033e9e0, 0x3de2eb35),
+     TOBN(0xf8185d5c, 0xb8dacbb4), TOBN(0x9a88e265, 0x8644549d),
+     TOBN(0xf717af62, 0x54671ff6), TOBN(0x4bd4241b, 0x5fa58603),
+     TOBN(0x06fba40b, 0xe67773c0), TOBN(0xc1d933d2, 0x6a2847e9),
+     TOBN(0xf4f5acf3, 0x689e2c70), TOBN(0x92aab0e7, 0x46bafd31),
+     TOBN(0x798d76aa, 0x3473f6e5), TOBN(0xcc6641db, 0x93141934),
+     TOBN(0xcae27757, 0xd31e535e), TOBN(0x04cc43b6, 0x87c2ee11),
+     TOBN(0x8d1f9675, 0x2e029ffa), TOBN(0xc2150672, 0xe4cc7a2c),
+     TOBN(0x3b03c1e0, 0x8d68b013), TOBN(0xa9d6816f, 0xedf298f3),
+     TOBN(0x1bfbb529, 0xa2804464), TOBN(0x95a52fae, 0x5db22125),
+     TOBN(0x55b32160, 0x0e1cb64e), TOBN(0x004828f6, 0x7e7fc9fe),
+     TOBN(0x13394b82, 0x1bb0fb93), TOBN(0xb6293a2d, 0x35f1a920),
+     TOBN(0xde35ef21, 0xd145d2d9), TOBN(0xbe6225b3, 0xbb8fa603),
+     TOBN(0x00fc8f6b, 0x32cf252d), TOBN(0xa28e52e6, 0x117cf8c2),
+     TOBN(0x9d1dc89b, 0x4c371e6d), TOBN(0xcebe0675, 0x36ef0f28),
+     TOBN(0x5de05d09, 0xa4292f81), TOBN(0xa8303593, 0x353e3083),
+     TOBN(0xa1715b0a, 0x7e37a9bb), TOBN(0x8c56f61e, 0x2b8faec3),
+     TOBN(0x52507431, 0x33c9b102), TOBN(0x0130cefc, 0xa44431f0),
+     TOBN(0x56039fa0, 0xbd865cfb), TOBN(0x4b03e578, 0xbc5f1dd7),
+     TOBN(0x40edf2e4, 0xbabe7224), TOBN(0xc752496d, 0x3a1988f6),
+     TOBN(0xd1572d3b, 0x564beb6b), TOBN(0x0db1d110, 0x39a1c608),
+     TOBN(0x568d1934, 0x16f60126), TOBN(0x05ae9668, 0xf354af33),
+     TOBN(0x19de6d37, 0xc92544f2), TOBN(0xcc084353, 0xa35837d5),
+     TOBN(0xcbb6869c, 0x1a514ece), TOBN(0xb633e728, 0x2e1d1066),
+     TOBN(0xf15dd69f, 0x936c581c), TOBN(0x96e7b8ce, 0x7439c4f9),
+     TOBN(0x5e676f48, 0x2e448a5b), TOBN(0xb2ca7d5b, 0xfd916bbb),
+     TOBN(0xd55a2541, 0xf5024025), TOBN(0x47bc5769, 0xe4c2d937),
+     TOBN(0x7d31b92a, 0x0362189f), TOBN(0x83f3086e, 0xef7816f9),
+     TOBN(0xf9f46d94, 0xb587579a), TOBN(0xec2d22d8, 0x30e76c5f),
+     TOBN(0x27d57461, 0xb000ffcf), TOBN(0xbb7e65f9, 0x364ffc2c),
+     TOBN(0x7c7c9477, 0x6652a220), TOBN(0x61618f89, 0xd696c981),
+     TOBN(0x5021701d, 0x89effff3), TOBN(0xf2c8ff8e, 0x7c314163),
+     TOBN(0x2da413ad, 0x8efb4d3e), TOBN(0x937b5adf, 0xce176d95),
+     TOBN(0x22867d34, 0x2a67d51c), TOBN(0x262b9b10, 0x18eb3ac9),
+     TOBN(0x4e314fe4, 0xc43ff28b), TOBN(0x76476627, 0x6a664e7a),
+     TOBN(0x3e90e40b, 0xb7a565c2), TOBN(0x8588993a, 0xc1acf831),
+     TOBN(0xd7b501d6, 0x8f938829), TOBN(0x996627ee, 0x3edd7d4c),
+     TOBN(0x37d44a62, 0x90cd34c7), TOBN(0xa8327499, 0xf3833e8d),
+     TOBN(0x2e18917d, 0x4bf50353), TOBN(0x85dd726b, 0x556765fb),
+     TOBN(0x54fe65d6, 0x93d5ab66), TOBN(0x3ddbaced, 0x915c25fe),
+     TOBN(0xa799d9a4, 0x12f22e85), TOBN(0xe2a24867, 0x6d06f6bc),
+     TOBN(0xf4f1ee56, 0x43ca1637), TOBN(0xfda2828b, 0x61ece30a),
+     TOBN(0x758c1a3e, 0xa2dee7a6), TOBN(0xdcde2f3c, 0x734b2284),
+     TOBN(0xaba445d2, 0x4eaba6ad), TOBN(0x35aaf668, 0x76cee0a7),
+     TOBN(0x7e0b04a9, 0xe5aa049a), TOBN(0xe74083ad, 0x91103e84),
+     TOBN(0xbeb183ce, 0x40afecc3), TOBN(0x6b89de9f, 0xea043f7a),}
+    ,
+    {TOBN(0x0e299d23, 0xfe67ba66), TOBN(0x91450760, 0x93cf2f34),
+     TOBN(0xf45b5ea9, 0x97fcf913), TOBN(0x5be00843, 0x8bd7ddda),
+     TOBN(0x358c3e05, 0xd53ff04d), TOBN(0xbf7ccdc3, 0x5de91ef7),
+     TOBN(0xad684dbf, 0xb69ec1a0), TOBN(0x367e7cf2, 0x801fd997),
+     TOBN(0x0ca1f3b7, 0xb0dc8595), TOBN(0x27de4608, 0x9f1d9f2e),
+     TOBN(0x1af3bf39, 0xbadd82a7), TOBN(0x79356a79, 0x65862448),
+     TOBN(0xc0602345, 0xf5f9a052), TOBN(0x1a8b0f89, 0x139a42f9),
+     TOBN(0xb53eee42, 0x844d40fc), TOBN(0x93b0bfe5, 0x4e5b6368),
+     TOBN(0x5434dd02, 0xc024789c), TOBN(0x90dca9ea, 0x41b57bfc),
+     TOBN(0x8aa898e2, 0x243398df), TOBN(0xf607c834, 0x894a94bb),
+     TOBN(0xbb07be97, 0xc2c99b76), TOBN(0x6576ba67, 0x18c29302),
+     TOBN(0x3d79efcc, 0xe703a88c), TOBN(0xf259ced7, 0xb6a0d106),
+     TOBN(0x0f893a5d, 0xc8de610b), TOBN(0xe8c515fb, 0x67e223ce),
+     TOBN(0x7774bfa6, 0x4ead6dc5), TOBN(0x89d20f95, 0x925c728f),
+     TOBN(0x7a1e0966, 0x098583ce), TOBN(0xa2eedb94, 0x93f2a7d7),
+     TOBN(0x1b282097, 0x4c304d4a), TOBN(0x0842e3da, 0xc077282d),
+     TOBN(0xe4d972a3, 0x3b9e2d7b), TOBN(0x7cc60b27, 0xc48218ff),
+     TOBN(0x8fc70838, 0x84149d91), TOBN(0x5c04346f, 0x2f461ecc),
+     TOBN(0xebe9fdf2, 0x614650a9), TOBN(0x5e35b537, 0xc1f666ac),
+     TOBN(0x645613d1, 0x88babc83), TOBN(0x88cace3a, 0xc5e1c93e),
+     TOBN(0x209ca375, 0x3de92e23), TOBN(0xccb03cc8, 0x5fbbb6e3),
+     TOBN(0xccb90f03, 0xd7b1487e), TOBN(0xfa9c2a38, 0xc710941f),
+     TOBN(0x756c3823, 0x6724ceed), TOBN(0x3a902258, 0x192d0323),
+     TOBN(0xb150e519, 0xea5e038e), TOBN(0xdcba2865, 0xc7427591),
+     TOBN(0xe549237f, 0x78890732), TOBN(0xc443bef9, 0x53fcb4d9),
+     TOBN(0x9884d8a6, 0xeb3480d6), TOBN(0x8a35b6a1, 0x3048b186),
+     TOBN(0xb4e44716, 0x65e9a90a), TOBN(0x45bf380d, 0x653006c0),
+     TOBN(0x8f3f820d, 0x4fe9ae3b), TOBN(0x244a35a0, 0x979a3b71),
+     TOBN(0xa1010e9d, 0x74cd06ff), TOBN(0x9c17c7df, 0xaca3eeac),
+     TOBN(0x74c86cd3, 0x8063aa2b), TOBN(0x8595c4b3, 0x734614ff),
+     TOBN(0xa3de00ca, 0x990f62cc), TOBN(0xd9bed213, 0xca0c3be5),
+     TOBN(0x7886078a, 0xdf8ce9f5), TOBN(0xddb27ce3, 0x5cd44444),
+     TOBN(0xed374a66, 0x58926ddd), TOBN(0x138b2d49, 0x908015b8),
+     TOBN(0x886c6579, 0xde1f7ab8), TOBN(0x888b9aa0, 0xc3020b7a),
+     TOBN(0xd3ec034e, 0x3a96e355), TOBN(0xba65b0b8, 0xf30fbe9a),
+     TOBN(0x064c8e50, 0xff21367a), TOBN(0x1f508ea4, 0x0b04b46e),
+     TOBN(0x98561a49, 0x747c866c), TOBN(0xbbb1e5fe, 0x0518a062),
+     TOBN(0x20ff4e8b, 0xecdc3608), TOBN(0x7f55cded, 0x20184027),
+     TOBN(0x8d73ec95, 0xf38c85f0), TOBN(0x5b589fdf, 0x8bc3b8c3),
+     TOBN(0xbe95dd98, 0x0f12b66f), TOBN(0xf5bd1a09, 0x0e338e01),
+     TOBN(0x65163ae5, 0x5e915918), TOBN(0x6158d6d9, 0x86f8a46b),
+     TOBN(0x8466b538, 0xeeebf99c), TOBN(0xca8761f6, 0xbca477ef),
+     TOBN(0xaf3449c2, 0x9ebbc601), TOBN(0xef3b0f41, 0xe0c3ae2f),
+     TOBN(0xaa6c577d, 0x5de63752), TOBN(0xe9166601, 0x64682a51),
+     TOBN(0x5a3097be, 0xfc15aa1e), TOBN(0x40d12548, 0xb54b0745),
+     TOBN(0x5bad4706, 0x519a5f12), TOBN(0xed03f717, 0xa439dee6),
+     TOBN(0x0794bb6c, 0x4a02c499), TOBN(0xf725083d, 0xcffe71d2),
+     TOBN(0x2cad7519, 0x0f3adcaf), TOBN(0x7f68ea1c, 0x43729310),
+     TOBN(0xe747c8c7, 0xb7ffd977), TOBN(0xec104c35, 0x80761a22),
+     TOBN(0x8395ebaf, 0x5a3ffb83), TOBN(0xfb3261f4, 0xe4b63db7),
+     TOBN(0x53544960, 0xd883e544), TOBN(0x13520d70, 0x8cc2eeb8),
+     TOBN(0x08f6337b, 0xd3d65f99), TOBN(0x83997db2, 0x781cf95b),
+     TOBN(0xce6ff106, 0x0dbd2c01), TOBN(0x4f8eea6b, 0x1f9ce934),
+     TOBN(0x546f7c4b, 0x0e993921), TOBN(0x6236a324, 0x5e753fc7),
+     TOBN(0x65a41f84, 0xa16022e9), TOBN(0x0c18d878, 0x43d1dbb2),
+     TOBN(0x73c55640, 0x2d4cef9c), TOBN(0xa0428108, 0x70444c74),
+     TOBN(0x68e4f15e, 0x9afdfb3c), TOBN(0x49a56143, 0x5bdfb6df),
+     TOBN(0xa9bc1bd4, 0x5f823d97), TOBN(0xbceb5970, 0xea111c2a),
+     TOBN(0x366b455f, 0xb269bbc4), TOBN(0x7cd85e1e, 0xe9bc5d62),
+     TOBN(0xc743c41c, 0x4f18b086), TOBN(0xa4b40990, 0x95294fb9),
+     TOBN(0x9c7c581d, 0x26ee8382), TOBN(0xcf17dcc5, 0x359d638e),
+     TOBN(0xee8273ab, 0xb728ae3d), TOBN(0x1d112926, 0xf821f047),
+     TOBN(0x11498477, 0x50491a74), TOBN(0x687fa761, 0xfde0dfb9),
+     TOBN(0x2c258022, 0x7ea435ab), TOBN(0x6b8bdb94, 0x91ce7e3f),
+     TOBN(0x4c5b5dc9, 0x3bf834aa), TOBN(0x04371819, 0x4f6c7e4b),
+     TOBN(0xc284e00a, 0x3736bcad), TOBN(0x0d881118, 0x21ae8f8d),
+     TOBN(0xf9cf0f82, 0xf48c8e33), TOBN(0xa11fd075, 0xa1bf40db),
+     TOBN(0xdceab0de, 0xdc2733e5), TOBN(0xc560a8b5, 0x8e986bd7),
+     TOBN(0x48dd1fe2, 0x3929d097), TOBN(0x3885b290, 0x92f188f1),
+     TOBN(0x0f2ae613, 0xda6fcdac), TOBN(0x9054303e, 0xb662a46c),
+     TOBN(0xb6871e44, 0x0738042a), TOBN(0x98e6a977, 0xbdaf6449),
+     TOBN(0xd8bc0650, 0xd1c9df1b), TOBN(0xef3d6451, 0x36e098f9),
+     TOBN(0x03fbae82, 0xb6d72d28), TOBN(0x77ca9db1, 0xf5d84080),
+     TOBN(0x8a112cff, 0xa58efc1c), TOBN(0x518d761c, 0xc564cb4a),
+     TOBN(0x69b5740e, 0xf0d1b5ce), TOBN(0x717039cc, 0xe9eb1785),
+     TOBN(0x3fe29f90, 0x22f53382), TOBN(0x8e54ba56, 0x6bc7c95c),
+     TOBN(0x9c806d8a, 0xf7f91d0f), TOBN(0x3b61b0f1, 0xa82a5728),
+     TOBN(0x4640032d, 0x94d76754), TOBN(0x273eb5de, 0x47d834c6),
+     TOBN(0x2988abf7, 0x7b4e4d53), TOBN(0xb7ce66bf, 0xde401777),
+     TOBN(0x9fba6b32, 0x715071b3), TOBN(0x82413c24, 0xad3a1a98),
+     TOBN(0x5b7fc8c4, 0xe0e8ad93), TOBN(0xb5679aee, 0x5fab868d),
+     TOBN(0xb1f9d2fa, 0x2b3946f3), TOBN(0x458897dc, 0x5685b50a),
+     TOBN(0x1e98c930, 0x89d0caf3), TOBN(0x39564c5f, 0x78642e92),
+     TOBN(0x1b77729a, 0x0dbdaf18), TOBN(0xf9170722, 0x579e82e6),
+     TOBN(0x680c0317, 0xe4515fa5), TOBN(0xf85cff84, 0xfb0c790f),
+     TOBN(0xc7a82aab, 0x6d2e0765), TOBN(0x7446bca9, 0x35c82b32),
+     TOBN(0x5de607aa, 0x6d63184f), TOBN(0x7c1a46a8, 0x262803a6),
+     TOBN(0xd218313d, 0xaebe8035), TOBN(0x92113ffd, 0xc73c51f8),
+     TOBN(0x4b38e083, 0x12e7e46c), TOBN(0x69d0a37a, 0x56126bd5),
+     TOBN(0xfb3f324b, 0x73c07e04), TOBN(0xa0c22f67, 0x8fda7267),
+     TOBN(0x8f2c0051, 0x4d2c7d8f), TOBN(0xbc45ced3, 0xcbe2cae5),
+     TOBN(0xe1c6cf07, 0xa8f0f277), TOBN(0xbc392312, 0x1eb99a98),
+     TOBN(0x75537b7e, 0x3cc8ac85), TOBN(0x8d725f57, 0xdd02753b),
+     TOBN(0xfd05ff64, 0xb737df2f), TOBN(0x55fe8712, 0xf6d2531d),
+     TOBN(0x57ce04a9, 0x6ab6b01c), TOBN(0x69a02a89, 0x7cd93724),
+     TOBN(0x4f82ac35, 0xcf86699b), TOBN(0x8242d3ad, 0x9cb4b232),
+     TOBN(0x713d0f65, 0xd62105e5), TOBN(0xbb222bfa, 0x2d29be61),
+     TOBN(0xf2f9a79e, 0x6cfbef09), TOBN(0xfc24d8d3, 0xd5d6782f),
+     TOBN(0x5db77085, 0xd4129967), TOBN(0xdb81c3cc, 0xdc3c2a43),
+     TOBN(0x9d655fc0, 0x05d8d9a3), TOBN(0x3f5d057a, 0x54298026),
+     TOBN(0x1157f56d, 0x88c54694), TOBN(0xb26baba5, 0x9b09573e),
+     TOBN(0x2cab03b0, 0x22adffd1), TOBN(0x60a412c8, 0xdd69f383),
+     TOBN(0xed76e98b, 0x54b25039), TOBN(0xd4ee67d3, 0x687e714d),
+     TOBN(0x87739648, 0x7b00b594), TOBN(0xce419775, 0xc9ef709b),
+     TOBN(0x40f76f85, 0x1c203a40), TOBN(0x30d352d6, 0xeafd8f91),
+     TOBN(0xaf196d3d, 0x95578dd2), TOBN(0xea4bb3d7, 0x77cc3f3d),
+     TOBN(0x42a5bd03, 0xb98e782b), TOBN(0xac958c40, 0x0624920d),
+     TOBN(0xb838134c, 0xfc56fcc8), TOBN(0x86ec4ccf, 0x89572e5e),
+     TOBN(0x69c43526, 0x9be47be0), TOBN(0x323b7dd8, 0xcb28fea1),
+     TOBN(0xfa5538ba, 0x3a6c67e5), TOBN(0xef921d70, 0x1d378e46),
+     TOBN(0xf92961fc, 0x3c4b880e), TOBN(0x3f6f914e, 0x98940a67),
+     TOBN(0xa990eb0a, 0xfef0ff39), TOBN(0xa6c2920f, 0xf0eeff9c),
+     TOBN(0xca804166, 0x51b8d9a3), TOBN(0x42531bc9, 0x0ffb0db1),
+     TOBN(0x72ce4718, 0xaa82e7ce), TOBN(0x6e199913, 0xdf574741),
+     TOBN(0xd5f1b13d, 0xd5d36946), TOBN(0x8255dc65, 0xf68f0194),
+     TOBN(0xdc9df4cd, 0x8710d230), TOBN(0x3453c20f, 0x138c1988),
+     TOBN(0x9af98dc0, 0x89a6ef01), TOBN(0x4dbcc3f0, 0x9857df85),
+     TOBN(0x34805601, 0x5c1ad924), TOBN(0x40448da5, 0xd0493046),
+     TOBN(0xf629926d, 0x4ee343e2), TOBN(0x6343f1bd, 0x90e8a301),
+     TOBN(0xefc93491, 0x40815b3f), TOBN(0xf882a423, 0xde8f66fb),
+     TOBN(0x3a12d5f4, 0xe7db9f57), TOBN(0x7dfba38a, 0x3c384c27),
+     TOBN(0x7a904bfd, 0x6fc660b1), TOBN(0xeb6c5db3, 0x2773b21c),
+     TOBN(0xc350ee66, 0x1cdfe049), TOBN(0x9baac0ce, 0x44540f29),
+     TOBN(0xbc57b6ab, 0xa5ec6aad), TOBN(0x167ce8c3, 0x0a7c1baa),
+     TOBN(0xb23a03a5, 0x53fb2b56), TOBN(0x6ce141e7, 0x4e057f78),
+     TOBN(0x796525c3, 0x89e490d9), TOBN(0x0bc95725, 0xa31a7e75),
+     TOBN(0x1ec56791, 0x1220fd06), TOBN(0x716e3a3c, 0x408b0bd6),
+     TOBN(0x31cd6bf7, 0xe8ebeba9), TOBN(0xa7326ca6, 0xbee6b670),
+     TOBN(0x3d9f851c, 0xcd090c43), TOBN(0x561e8f13, 0xf12c3988),
+     TOBN(0x50490b6a, 0x904b7be4), TOBN(0x61690ce1, 0x0410737b),
+     TOBN(0x299e9a37, 0x0f009052), TOBN(0x258758f0, 0xf026092e),
+     TOBN(0x9fa255f3, 0xfdfcdc0f), TOBN(0xdbc9fb1f, 0xc0e1bcd2),
+     TOBN(0x35f9dd6e, 0x24651840), TOBN(0xdca45a84, 0xa5c59abc),
+     TOBN(0x103d396f, 0xecca4938), TOBN(0x4532da0a, 0xb97b3f29),
+     TOBN(0xc4135ea5, 0x1999a6bf), TOBN(0x3aa9505a, 0x5e6bf2ee),
+     TOBN(0xf77cef06, 0x3f5be093), TOBN(0x97d1a0f8, 0xa943152e),
+     TOBN(0x2cb0ebba, 0x2e1c21dd), TOBN(0xf41b29fc, 0x2c6797c4),
+     TOBN(0xc6e17321, 0xb300101f), TOBN(0x4422b0e9, 0xd0d79a89),
+     TOBN(0x49e4901c, 0x92f1bfc4), TOBN(0x06ab1f8f, 0xe1e10ed9),
+     TOBN(0x84d35577, 0xdb2926b8), TOBN(0xca349d39, 0x356e8ec2),
+     TOBN(0x70b63d32, 0x343bf1a9), TOBN(0x8fd3bd28, 0x37d1a6b1),
+     TOBN(0x0454879c, 0x316865b4), TOBN(0xee959ff6, 0xc458efa2),
+     TOBN(0x0461dcf8, 0x9706dc3f), TOBN(0x737db0e2, 0x164e4b2e),
+     TOBN(0x09262680, 0x2f8843c8), TOBN(0x54498bbc, 0x7745e6f6),
+     TOBN(0x359473fa, 0xa29e24af), TOBN(0xfcc3c454, 0x70aa87a1),
+     TOBN(0xfd2c4bf5, 0x00573ace), TOBN(0xb65b514e, 0x28dd1965),
+     TOBN(0xe46ae7cf, 0x2193e393), TOBN(0x60e9a4e1, 0xf5444d97),
+     TOBN(0xe7594e96, 0x00ff38ed), TOBN(0x43d84d2f, 0x0a0e0f02),
+     TOBN(0x8b6db141, 0xee398a21), TOBN(0xb88a56ae, 0xe3bcc5be),
+     TOBN(0x0a1aa52f, 0x373460ea), TOBN(0x20da1a56, 0x160bb19b),
+     TOBN(0xfb54999d, 0x65bf0384), TOBN(0x71a14d24, 0x5d5a180e),
+     TOBN(0xbc44db7b, 0x21737b04), TOBN(0xd84fcb18, 0x01dd8e92),
+     TOBN(0x80de937b, 0xfa44b479), TOBN(0x53505499, 0x5c98fd4f),
+     TOBN(0x1edb12ab, 0x28f08727), TOBN(0x4c58b582, 0xa5f3ef53),
+     TOBN(0xbfb236d8, 0x8327f246), TOBN(0xc3a3bfaa, 0x4d7df320),
+     TOBN(0xecd96c59, 0xb96024f2), TOBN(0xfc293a53, 0x7f4e0433),
+     TOBN(0x5341352b, 0x5acf6e10), TOBN(0xc50343fd, 0xafe652c3),
+     TOBN(0x4af3792d, 0x18577a7f), TOBN(0xe1a4c617, 0xaf16823d),
+     TOBN(0x9b26d0cd, 0x33425d0a), TOBN(0x306399ed, 0x9b7bc47f),
+     TOBN(0x2a792f33, 0x706bb20b), TOBN(0x31219614, 0x98111055),
+     TOBN(0x864ec064, 0x87f5d28b), TOBN(0x11392d91, 0x962277fd),
+     TOBN(0xb5aa7942, 0xbb6aed5f), TOBN(0x080094dc, 0x47e799d9),
+     TOBN(0x4afa588c, 0x208ba19b), TOBN(0xd3e7570f, 0x8512f284),
+     TOBN(0xcbae64e6, 0x02f5799a), TOBN(0xdeebe7ef, 0x514b9492),
+     TOBN(0x30300f98, 0xe5c298ff), TOBN(0x17f561be, 0x3678361f),
+     TOBN(0xf52ff312, 0x98cb9a16), TOBN(0x6233c3bc, 0x5562d490),
+     TOBN(0x7bfa15a1, 0x92e3a2cb), TOBN(0x961bcfd1, 0xe6365119),
+     TOBN(0x3bdd29bf, 0x2c8c53b1), TOBN(0x739704df, 0x822844ba),
+     TOBN(0x7dacfb58, 0x7e7b754b), TOBN(0x23360791, 0xa806c9b9),
+     TOBN(0xe7eb88c9, 0x23504452), TOBN(0x2983e996, 0x852c1783),
+     TOBN(0xdd4ae529, 0x958d881d), TOBN(0x026bae03, 0x262c7b3c),
+     TOBN(0x3a6f9193, 0x960b52d1), TOBN(0xd0980f90, 0x92696cfb),
+     TOBN(0x4c1f428c, 0xd5f30851), TOBN(0x94dfed27, 0x2a4f6630),
+     TOBN(0x4df53772, 0xfc5d48a4), TOBN(0xdd2d5a2f, 0x933260ce),
+     TOBN(0x574115bd, 0xd44cc7a5), TOBN(0x4ba6b20d, 0xbd12533a),
+     TOBN(0x30e93cb8, 0x243057c9), TOBN(0x794c486a, 0x14de320e),
+     TOBN(0xe925d4ce, 0xf21496e4), TOBN(0xf951d198, 0xec696331),
+     TOBN(0x9810e2de, 0x3e8d812f), TOBN(0xd0a47259, 0x389294ab),
+     TOBN(0x513ba2b5, 0x0e3bab66), TOBN(0x462caff5, 0xabad306f),
+     TOBN(0xe2dc6d59, 0xaf04c49e), TOBN(0x1aeb8750, 0xe0b84b0b),
+     TOBN(0xc034f12f, 0x2f7d0ca2), TOBN(0x6d2e8128, 0xe06acf2f),
+     TOBN(0x801f4f83, 0x21facc2f), TOBN(0xa1170c03, 0xf40ef607),
+     TOBN(0xfe0a1d4f, 0x7805a99c), TOBN(0xbde56a36, 0xcc26aba5),
+     TOBN(0x5b1629d0, 0x35531f40), TOBN(0xac212c2b, 0x9afa6108),
+     TOBN(0x30a06bf3, 0x15697be5), TOBN(0x6f0545dc, 0x2c63c7c1),
+     TOBN(0x5d8cb842, 0x7ccdadaf), TOBN(0xd52e379b, 0xac7015bb),
+     TOBN(0xc4f56147, 0xf462c23e), TOBN(0xd44a4298, 0x46bc24b0),
+     TOBN(0xbc73d23a, 0xe2856d4f), TOBN(0x61cedd8c, 0x0832bcdf),
+     TOBN(0x60953556, 0x99f241d7), TOBN(0xee4adbd7, 0x001a349d),
+     TOBN(0x0b35bf6a, 0xaa89e491), TOBN(0x7f0076f4, 0x136f7546),
+     TOBN(0xd19a18ba, 0x9264da3d), TOBN(0x6eb2d2cd, 0x62a7a28b),
+     TOBN(0xcdba941f, 0x8761c971), TOBN(0x1550518b, 0xa3be4a5d),
+     TOBN(0xd0e8e2f0, 0x57d0b70c), TOBN(0xeea8612e, 0xcd133ba3),
+     TOBN(0x814670f0, 0x44416aec), TOBN(0x424db6c3, 0x30775061),
+     TOBN(0xd96039d1, 0x16213fd1), TOBN(0xc61e7fa5, 0x18a3478f),
+     TOBN(0xa805bdcc, 0xcb0c5021), TOBN(0xbdd6f3a8, 0x0cc616dd),
+     TOBN(0x06009667, 0x5d97f7e2), TOBN(0x31db0fc1, 0xaf0bf4b6),
+     TOBN(0x23680ed4, 0x5491627a), TOBN(0xb99a3c66, 0x7d741fb1),
+     TOBN(0xe9bb5f55, 0x36b1ff92), TOBN(0x29738577, 0x512b388d),
+     TOBN(0xdb8a2ce7, 0x50fcf263), TOBN(0x385346d4, 0x6c4f7b47),
+     TOBN(0xbe86c5ef, 0x31631f9e), TOBN(0xbf91da21, 0x03a57a29),
+     TOBN(0xc3b1f796, 0x7b23f821), TOBN(0x0f7d00d2, 0x770db354),
+     TOBN(0x8ffc6c3b, 0xd8fe79da), TOBN(0xcc5e8c40, 0xd525c996),
+     TOBN(0x4640991d, 0xcfff632a), TOBN(0x64d97e8c, 0x67112528),
+     TOBN(0xc232d973, 0x02f1cd1e), TOBN(0xce87eacb, 0x1dd212a4),
+     TOBN(0x6e4c8c73, 0xe69802f7), TOBN(0x12ef0290, 0x1fffddbd),
+     TOBN(0x941ec74e, 0x1bcea6e2), TOBN(0xd0b54024, 0x3cb92cbb),
+     TOBN(0x809fb9d4, 0x7e8f9d05), TOBN(0x3bf16159, 0xf2992aae),
+     TOBN(0xad40f279, 0xf8a7a838), TOBN(0x11aea631, 0x05615660),
+     TOBN(0xbf52e6f1, 0xa01f6fa1), TOBN(0xef046995, 0x3dc2aec9),
+     TOBN(0x785dbec9, 0xd8080711), TOBN(0xe1aec60a, 0x9fdedf76),
+     TOBN(0xece797b5, 0xfa21c126), TOBN(0xc66e898f, 0x05e52732),
+     TOBN(0x39bb69c4, 0x08811fdb), TOBN(0x8bfe1ef8, 0x2fc7f082),
+     TOBN(0xc8e7a393, 0x174f4138), TOBN(0xfba8ad1d, 0xd58d1f98),
+     TOBN(0xbc21d0ce, 0xbfd2fd5b), TOBN(0x0b839a82, 0x6ee60d61),
+     TOBN(0xaacf7658, 0xafd22253), TOBN(0xb526bed8, 0xaae396b3),
+     TOBN(0xccc1bbc2, 0x38564464), TOBN(0x9e3ff947, 0x8c45bc73),
+     TOBN(0xcde9bca3, 0x58188a78), TOBN(0x138b8ee0, 0xd73bf8f7),
+     TOBN(0x5c7e234c, 0x4123c489), TOBN(0x66e69368, 0xfa643297),
+     TOBN(0x0629eeee, 0x39a15fa3), TOBN(0x95fab881, 0xa9e2a927),
+     TOBN(0xb2497007, 0xeafbb1e1), TOBN(0xd75c9ce6, 0xe75b7a93),
+     TOBN(0x3558352d, 0xefb68d78), TOBN(0xa2f26699, 0x223f6396),
+     TOBN(0xeb911ecf, 0xe469b17a), TOBN(0x62545779, 0xe72d3ec2),
+     TOBN(0x8ea47de7, 0x82cb113f), TOBN(0xebe4b086, 0x4e1fa98d),
+     TOBN(0xec2d5ed7, 0x8cdfedb1), TOBN(0xa535c077, 0xfe211a74),
+     TOBN(0x9678109b, 0x11d244c5), TOBN(0xf17c8bfb, 0xbe299a76),
+     TOBN(0xb651412e, 0xfb11fbc4), TOBN(0xea0b5482, 0x94ab3f65),
+     TOBN(0xd8dffd95, 0x0cf78243), TOBN(0x2e719e57, 0xce0361d4),
+     TOBN(0x9007f085, 0x304ddc5b), TOBN(0x095e8c6d, 0x4daba2ea),
+     TOBN(0x5a33cdb4, 0x3f9d28a9), TOBN(0x85b95cd8, 0xe2283003),
+     TOBN(0xbcd6c819, 0xb9744733), TOBN(0x29c5f538, 0xfc7f5783),
+     TOBN(0x6c49b2fa, 0xd59038e4), TOBN(0x68349cc1, 0x3bbe1018),
+     TOBN(0xcc490c1d, 0x21830ee5), TOBN(0x36f9c4ee, 0xe9bfa297),
+     TOBN(0x58fd7294, 0x48de1a94), TOBN(0xaadb13a8, 0x4e8f2cdc),
+     TOBN(0x515eaaa0, 0x81313dba), TOBN(0xc76bb468, 0xc2152dd8),
+     TOBN(0x357f8d75, 0xa653dbf8), TOBN(0xe4d8c4d1, 0xb14ac143),
+     TOBN(0xbdb8e675, 0xb055cb40), TOBN(0x898f8e7b, 0x977b5167),
+     TOBN(0xecc65651, 0xb82fb863), TOBN(0x56544814, 0x6d88f01f),
+     TOBN(0xb0928e95, 0x263a75a9), TOBN(0xcfb6836f, 0x1a22fcda),
+     TOBN(0x651d14db, 0x3f3bd37c), TOBN(0x1d3837fb, 0xb6ad4664),
+     TOBN(0x7c5fb538, 0xff4f94ab), TOBN(0x7243c712, 0x6d7fb8f2),
+     TOBN(0xef13d60c, 0xa85c5287), TOBN(0x18cfb7c7, 0x4bb8dd1b),
+     TOBN(0x82f9bfe6, 0x72908219), TOBN(0x35c4592b, 0x9d5144ab),
+     TOBN(0x52734f37, 0x9cf4b42f), TOBN(0x6bac55e7, 0x8c60ddc4),
+     TOBN(0xb5cd811e, 0x94dea0f6), TOBN(0x259ecae4, 0xe18cc1a3),
+     TOBN(0x6a0e836e, 0x15e660f8), TOBN(0x6c639ea6, 0x0e02bff2),
+     TOBN(0x8721b8cb, 0x7e1026fd), TOBN(0x9e73b50b, 0x63261942),
+     TOBN(0xb8c70974, 0x77f01da3), TOBN(0x1839e6a6, 0x8268f57f),
+     TOBN(0x571b9415, 0x5150b805), TOBN(0x1892389e, 0xf92c7097),
+     TOBN(0x8d69c18e, 0x4a084b95), TOBN(0x7014c512, 0xbe5b495c),
+     TOBN(0x4780db36, 0x1b07523c), TOBN(0x2f6219ce, 0x2c1c64fa),
+     TOBN(0xc38b81b0, 0x602c105a), TOBN(0xab4f4f20, 0x5dc8e360),
+     TOBN(0x20d3c982, 0xcf7d62d2), TOBN(0x1f36e29d, 0x23ba8150),
+     TOBN(0x48ae0bf0, 0x92763f9e), TOBN(0x7a527e6b, 0x1d3a7007),
+     TOBN(0xb4a89097, 0x581a85e3), TOBN(0x1f1a520f, 0xdc158be5),
+     TOBN(0xf98db37d, 0x167d726e), TOBN(0x8802786e, 0x1113e862)}
+    ,
+    {TOBN(0xefb2149e, 0x36f09ab0), TOBN(0x03f163ca, 0x4a10bb5b),
+     TOBN(0xd0297045, 0x06e20998), TOBN(0x56f0af00, 0x1b5a3bab),
+     TOBN(0x7af4cfec, 0x70880e0d), TOBN(0x7332a66f, 0xbe3d913f),
+     TOBN(0x32e6c84a, 0x7eceb4bd), TOBN(0xedc4a79a, 0x9c228f55),
+     TOBN(0xc37c7dd0, 0xc55c4496), TOBN(0xa6a96357, 0x25bbabd2),
+     TOBN(0x5b7e63f2, 0xadd7f363), TOBN(0x9dce3782, 0x2e73f1df),
+     TOBN(0xe1e5a16a, 0xb2b91f71), TOBN(0xe4489823, 0x5ba0163c),
+     TOBN(0xf2759c32, 0xf6e515ad), TOBN(0xa5e2f1f8, 0x8615eecf),
+     TOBN(0x74519be7, 0xabded551), TOBN(0x03d358b8, 0xc8b74410),
+     TOBN(0x4d00b10b, 0x0e10d9a9), TOBN(0x6392b0b1, 0x28da52b7),
+     TOBN(0x6744a298, 0x0b75c904), TOBN(0xc305b0ae, 0xa8f7f96c),
+     TOBN(0x042e421d, 0x182cf932), TOBN(0xf6fc5d50, 0x9e4636ca),
+     TOBN(0x795847c9, 0xd64cc78c), TOBN(0x6c50621b, 0x9b6cb27b),
+     TOBN(0x07099bf8, 0xdf8022ab), TOBN(0x48f862eb, 0xc04eda1d),
+     TOBN(0xd12732ed, 0xe1603c16), TOBN(0x19a80e0f, 0x5c9a9450),
+     TOBN(0xe2257f54, 0xb429b4fc), TOBN(0x66d3b2c6, 0x45460515),
+     TOBN(0x6ca4f87e, 0x822e37be), TOBN(0x73f237b4, 0x253bda4e),
+     TOBN(0xf747f3a2, 0x41190aeb), TOBN(0xf06fa36f, 0x804cf284),
+     TOBN(0x0a6bbb6e, 0xfc621c12), TOBN(0x5d624b64, 0x40b80ec6),
+     TOBN(0x4b072425, 0x7ba556f3), TOBN(0x7fa0c354, 0x3e2d20a8),
+     TOBN(0xe921fa31, 0xe3229d41), TOBN(0xa929c652, 0x94531bd4),
+     TOBN(0x84156027, 0xa6d38209), TOBN(0xf3d69f73, 0x6bdb97bd),
+     TOBN(0x8906d19a, 0x16833631), TOBN(0x68a34c2e, 0x03d51be3),
+     TOBN(0xcb59583b, 0x0e511cd8), TOBN(0x99ce6bfd, 0xfdc132a8),
+     TOBN(0x3facdaaa, 0xffcdb463), TOBN(0x658bbc1a, 0x34a38b08),
+     TOBN(0x12a801f8, 0xf1a9078d), TOBN(0x1567bcf9, 0x6ab855de),
+     TOBN(0xe08498e0, 0x3572359b), TOBN(0xcf0353e5, 0x8659e68b),
+     TOBN(0xbb86e9c8, 0x7d23807c), TOBN(0xbc08728d, 0x2198e8a2),
+     TOBN(0x8de2b7bc, 0x453cadd6), TOBN(0x203900a7, 0xbc0bc1f8),
+     TOBN(0xbcd86e47, 0xa6abd3af), TOBN(0x911cac12, 0x8502effb),
+     TOBN(0x2d550242, 0xec965469), TOBN(0x0e9f7692, 0x29e0017e),
+     TOBN(0x633f078f, 0x65979885), TOBN(0xfb87d449, 0x4cf751ef),
+     TOBN(0xe1790e4b, 0xfc25419a), TOBN(0x36467203, 0x4bff3cfd),
+     TOBN(0xc8db6386, 0x25b6e83f), TOBN(0x6cc69f23, 0x6cad6fd2),
+     TOBN(0x0219e45a, 0x6bc68bb9), TOBN(0xe43d79b6, 0x297f7334),
+     TOBN(0x7d445368, 0x465dc97c), TOBN(0x4b9eea32, 0x2a0b949a),
+     TOBN(0x1b96c6ba, 0x6102d021), TOBN(0xeaafac78, 0x2f4461ea),
+     TOBN(0xd4b85c41, 0xc49f19a8), TOBN(0x275c28e4, 0xcf538875),
+     TOBN(0x35451a9d, 0xdd2e54e0), TOBN(0x6991adb5, 0x0605618b),
+     TOBN(0x5b8b4bcd, 0x7b36cd24), TOBN(0x372a4f8c, 0x56f37216),
+     TOBN(0xc890bd73, 0xa6a5da60), TOBN(0x6f083da0, 0xdc4c9ff0),
+     TOBN(0xf4e14d94, 0xf0536e57), TOBN(0xf9ee1eda, 0xaaec8243),
+     TOBN(0x571241ec, 0x8bdcf8e7), TOBN(0xa5db8271, 0x0b041e26),
+     TOBN(0x9a0b9a99, 0xe3fff040), TOBN(0xcaaf21dd, 0x7c271202),
+     TOBN(0xb4e2b2e1, 0x4f0dd2e8), TOBN(0xe77e7c4f, 0x0a377ac7),
+     TOBN(0x69202c3f, 0x0d7a2198), TOBN(0xf759b7ff, 0x28200eb8),
+     TOBN(0xc87526ed, 0xdcfe314e), TOBN(0xeb84c524, 0x53d5cf99),
+     TOBN(0xb1b52ace, 0x515138b6), TOBN(0x5aa7ff8c, 0x23fca3f4),
+     TOBN(0xff0b13c3, 0xb9791a26), TOBN(0x960022da, 0xcdd58b16),
+     TOBN(0xdbd55c92, 0x57aad2de), TOBN(0x3baaaaa3, 0xf30fe619),
+     TOBN(0x9a4b2346, 0x0d881efd), TOBN(0x506416c0, 0x46325e2a),
+     TOBN(0x91381e76, 0x035c18d4), TOBN(0xb3bb68be, 0xf27817b0),
+     TOBN(0x15bfb8bf, 0x5116f937), TOBN(0x7c64a586, 0xc1268943),
+     TOBN(0x71e25cc3, 0x8419a2c8), TOBN(0x9fd6b0c4, 0x8335f463),
+     TOBN(0x4bf0ba3c, 0xe8ee0e0e), TOBN(0x6f6fba60, 0x298c21fa),
+     TOBN(0x57d57b39, 0xae66bee0), TOBN(0x292d5130, 0x22672544),
+     TOBN(0xf451105d, 0xbab093b3), TOBN(0x012f59b9, 0x02839986),
+     TOBN(0x8a915802, 0x3474a89c), TOBN(0x048c919c, 0x2de03e97),
+     TOBN(0xc476a2b5, 0x91071cd5), TOBN(0x791ed89a, 0x034970a5),
+     TOBN(0x89bd9042, 0xe1b7994b), TOBN(0x8eaf5179, 0xa1057ffd),
+     TOBN(0x6066e2a2, 0xd551ee10), TOBN(0x87a8f1d8, 0x727e09a6),
+     TOBN(0x00d08bab, 0x2c01148d), TOBN(0x6da8e4f1, 0x424f33fe),
+     TOBN(0x466d17f0, 0xcf9a4e71), TOBN(0xff502010, 0x3bf5cb19),
+     TOBN(0xdccf97d8, 0xd062ecc0), TOBN(0x80c0d9af, 0x81d80ac4),
+     TOBN(0xe87771d8, 0x033f2876), TOBN(0xb0186ec6, 0x7d5cc3db),
+     TOBN(0x58e8bb80, 0x3bc9bc1d), TOBN(0x4d1395cc, 0x6f6ef60e),
+     TOBN(0xa73c62d6, 0x186244a0), TOBN(0x918e5f23, 0x110a5b53),
+     TOBN(0xed4878ca, 0x741b7eab), TOBN(0x3038d71a, 0xdbe03e51),
+     TOBN(0x840204b7, 0xa93c3246), TOBN(0x21ab6069, 0xa0b9b4cd),
+     TOBN(0xf5fa6e2b, 0xb1d64218), TOBN(0x1de6ad0e, 0xf3d56191),
+     TOBN(0x570aaa88, 0xff1929c7), TOBN(0xc6df4c6b, 0x640e87b5),
+     TOBN(0xde8a74f2, 0xc65f0ccc), TOBN(0x8b972fd5, 0xe6f6cc01),
+     TOBN(0x3fff36b6, 0x0b846531), TOBN(0xba7e45e6, 0x10a5e475),
+     TOBN(0x84a1d10e, 0x4145b6c5), TOBN(0xf1f7f91a, 0x5e046d9d),
+     TOBN(0x0317a692, 0x44de90d7), TOBN(0x951a1d4a, 0xf199c15e),
+     TOBN(0x91f78046, 0xc9d73deb), TOBN(0x74c82828, 0xfab8224f),
+     TOBN(0xaa6778fc, 0xe7560b90), TOBN(0xb4073e61, 0xa7e824ce),
+     TOBN(0xff0d693c, 0xd642eba8), TOBN(0x7ce2e57a, 0x5dccef38),
+     TOBN(0x89c2c789, 0x1df1ad46), TOBN(0x83a06922, 0x098346fd),
+     TOBN(0x2d715d72, 0xda2fc177), TOBN(0x7b6dd71d, 0x85b6cf1d),
+     TOBN(0xc60a6d0a, 0x73fa9cb0), TOBN(0xedd3992e, 0x328bf5a9),
+     TOBN(0xc380ddd0, 0x832c8c82), TOBN(0xd182d410, 0xa2a0bf50),
+     TOBN(0x7d9d7438, 0xd9a528db), TOBN(0xe8b1a0e9, 0xcaf53994),
+     TOBN(0xddd6e5fe, 0x0e19987c), TOBN(0xacb8df03, 0x190b059d),
+     TOBN(0x53703a32, 0x8300129f), TOBN(0x1f637662, 0x68c43bfd),
+     TOBN(0xbcbd1913, 0x00e54051), TOBN(0x812fcc62, 0x7bf5a8c5),
+     TOBN(0x3f969d5f, 0x29fb85da), TOBN(0x72f4e00a, 0x694759e8),
+     TOBN(0x426b6e52, 0x790726b7), TOBN(0x617bbc87, 0x3bdbb209),
+     TOBN(0x511f8bb9, 0x97aee317), TOBN(0x812a4096, 0xe81536a8),
+     TOBN(0x137dfe59, 0x3ac09b9b), TOBN(0x0682238f, 0xba8c9a7a),
+     TOBN(0x7072ead6, 0xaeccb4bd), TOBN(0x6a34e9aa, 0x692ba633),
+     TOBN(0xc82eaec2, 0x6fff9d33), TOBN(0xfb753512, 0x1d4d2b62),
+     TOBN(0x1a0445ff, 0x1d7aadab), TOBN(0x65d38260, 0xd5f6a67c),
+     TOBN(0x6e62fb08, 0x91cfb26f), TOBN(0xef1e0fa5, 0x5c7d91d6),
+     TOBN(0x47e7c7ba, 0x33db72cd), TOBN(0x017cbc09, 0xfa7c74b2),
+     TOBN(0x3c931590, 0xf50a503c), TOBN(0xcac54f60, 0x616baa42),
+     TOBN(0x9b6cd380, 0xb2369f0f), TOBN(0x97d3a70d, 0x23c76151),
+     TOBN(0x5f9dd6fc, 0x9862a9c6), TOBN(0x044c4ab2, 0x12312f51),
+     TOBN(0x035ea0fd, 0x834a2ddc), TOBN(0x49e6b862, 0xcc7b826d),
+     TOBN(0xb03d6883, 0x62fce490), TOBN(0x62f2497a, 0xb37e36e9),
+     TOBN(0x04b005b6, 0xc6458293), TOBN(0x36bb5276, 0xe8d10af7),
+     TOBN(0xacf2dc13, 0x8ee617b8), TOBN(0x470d2d35, 0xb004b3d4),
+     TOBN(0x06790832, 0xfeeb1b77), TOBN(0x2bb75c39, 0x85657f9c),
+     TOBN(0xd70bd4ed, 0xc0f60004), TOBN(0xfe797ecc, 0x219b018b),
+     TOBN(0x9b5bec2a, 0x753aebcc), TOBN(0xdaf9f3dc, 0xc939eca5),
+     TOBN(0xd6bc6833, 0xd095ad09), TOBN(0x98abdd51, 0xdaa4d2fc),
+     TOBN(0xd9840a31, 0x8d168be5), TOBN(0xcf7c10e0, 0x2325a23c),
+     TOBN(0xa5c02aa0, 0x7e6ecfaf), TOBN(0x2462e7e6, 0xb5bfdf18),
+     TOBN(0xab2d8a8b, 0xa0cc3f12), TOBN(0x68dd485d, 0xbc672a29),
+     TOBN(0x72039752, 0x596f2cd3), TOBN(0x5d3eea67, 0xa0cf3d8d),
+     TOBN(0x810a1a81, 0xe6602671), TOBN(0x8f144a40, 0x14026c0c),
+     TOBN(0xbc753a6d, 0x76b50f85), TOBN(0xc4dc21e8, 0x645cd4a4),
+     TOBN(0xc5262dea, 0x521d0378), TOBN(0x802b8e0e, 0x05011c6f),
+     TOBN(0x1ba19cbb, 0x0b4c19ea), TOBN(0x21db64b5, 0xebf0aaec),
+     TOBN(0x1f394ee9, 0x70342f9d), TOBN(0x93a10aee, 0x1bc44a14),
+     TOBN(0xa7eed31b, 0x3efd0baa), TOBN(0x6e7c824e, 0x1d154e65),
+     TOBN(0xee23fa81, 0x9966e7ee), TOBN(0x64ec4aa8, 0x05b7920d),
+     TOBN(0x2d44462d, 0x2d90aad4), TOBN(0xf44dd195, 0xdf277ad5),
+     TOBN(0x8d6471f1, 0xbb46b6a1), TOBN(0x1e65d313, 0xfd885090),
+     TOBN(0x33a800f5, 0x13a977b4), TOBN(0xaca9d721, 0x0797e1ef),
+     TOBN(0x9a5a85a0, 0xfcff6a17), TOBN(0x9970a3f3, 0x1eca7cee),
+     TOBN(0xbb9f0d6b, 0xc9504be3), TOBN(0xe0c504be, 0xadd24ee2),
+     TOBN(0x7e09d956, 0x77fcc2f4), TOBN(0xef1a5227, 0x65bb5fc4),
+     TOBN(0x145d4fb1, 0x8b9286aa), TOBN(0x66fd0c5d, 0x6649028b),
+     TOBN(0x98857ceb, 0x1bf4581c), TOBN(0xe635e186, 0xaca7b166),
+     TOBN(0x278ddd22, 0x659722ac), TOBN(0xa0903c4c, 0x1db68007),
+     TOBN(0x366e4589, 0x48f21402), TOBN(0x31b49c14, 0xb96abda2),
+     TOBN(0x329c4b09, 0xe0403190), TOBN(0x97197ca3, 0xd29f43fe),
+     TOBN(0x8073dd1e, 0x274983d8), TOBN(0xda1a3bde, 0x55717c8f),
+     TOBN(0xfd3d4da2, 0x0361f9d1), TOBN(0x1332d081, 0x4c7de1ce),
+     TOBN(0x9b7ef7a3, 0xaa6d0e10), TOBN(0x17db2e73, 0xf54f1c4a),
+     TOBN(0xaf3dffae, 0x4cd35567), TOBN(0xaaa2f406, 0xe56f4e71),
+     TOBN(0x8966759e, 0x7ace3fc7), TOBN(0x9594eacf, 0x45a8d8c6),
+     TOBN(0x8de3bd8b, 0x91834e0e), TOBN(0xafe4ca53, 0x548c0421),
+     TOBN(0xfdd7e856, 0xe6ee81c6), TOBN(0x8f671beb, 0x6b891a3a),
+     TOBN(0xf7a58f2b, 0xfae63829), TOBN(0x9ab186fb, 0x9c11ac9f),
+     TOBN(0x8d6eb369, 0x10b5be76), TOBN(0x046b7739, 0xfb040bcd),
+     TOBN(0xccb4529f, 0xcb73de88), TOBN(0x1df0fefc, 0xcf26be03),
+     TOBN(0xad7757a6, 0xbcfcd027), TOBN(0xa8786c75, 0xbb3165ca),
+     TOBN(0xe9db1e34, 0x7e99a4d9), TOBN(0x99ee86df, 0xb06c504b),
+     TOBN(0x5b7c2ddd, 0xc15c9f0a), TOBN(0xdf87a734, 0x4295989e),
+     TOBN(0x59ece47c, 0x03d08fda), TOBN(0xb074d3dd, 0xad5fc702),
+     TOBN(0x20407903, 0x51a03776), TOBN(0x2bb1f77b, 0x2a608007),
+     TOBN(0x25c58f4f, 0xe1153185), TOBN(0xe6df62f6, 0x766e6447),
+     TOBN(0xefb3d1be, 0xed51275a), TOBN(0x5de47dc7, 0x2f0f483f),
+     TOBN(0x7932d98e, 0x97c2bedf), TOBN(0xd5c11927, 0x0219f8a1),
+     TOBN(0x9d751200, 0xa73a294e), TOBN(0x5f88434a, 0x9dc20172),
+     TOBN(0xd28d9fd3, 0xa26f506a), TOBN(0xa890cd31, 0x9d1dcd48),
+     TOBN(0x0aebaec1, 0x70f4d3b4), TOBN(0xfd1a1369, 0x0ffc8d00),
+     TOBN(0xb9d9c240, 0x57d57838), TOBN(0x45929d26, 0x68bac361),
+     TOBN(0x5a2cd060, 0x25b15ca6), TOBN(0x4b3c83e1, 0x6e474446),
+     TOBN(0x1aac7578, 0xee1e5134), TOBN(0xa418f5d6, 0xc91e2f41),
+     TOBN(0x6936fc8a, 0x213ed68b), TOBN(0x860ae7ed, 0x510a5224),
+     TOBN(0x63660335, 0xdef09b53), TOBN(0x641b2897, 0xcd79c98d),
+     TOBN(0x29bd38e1, 0x01110f35), TOBN(0x79c26f42, 0x648b1937),
+     TOBN(0x64dae519, 0x9d9164f4), TOBN(0xd85a2310, 0x0265c273),
+     TOBN(0x7173dd5d, 0x4b07e2b1), TOBN(0xd144c4cb, 0x8d9ea221),
+     TOBN(0xe8b04ea4, 0x1105ab14), TOBN(0x92dda542, 0xfe80d8f1),
+     TOBN(0xe9982fa8, 0xcf03dce6), TOBN(0x8b5ea965, 0x1a22cffc),
+     TOBN(0xf7f4ea7f, 0x3fad88c4), TOBN(0x62db773e, 0x6a5ba95c),
+     TOBN(0xd20f02fb, 0x93f24567), TOBN(0xfd46c69a, 0x315257ca),
+     TOBN(0x0ac74cc7, 0x8bcab987), TOBN(0x46f31c01, 0x5ceca2f5),
+     TOBN(0x40aedb59, 0x888b219e), TOBN(0xe50ecc37, 0xe1fccd02),
+     TOBN(0x1bcd9dad, 0x911f816c), TOBN(0x583cc1ec, 0x8db9b00c),
+     TOBN(0xf3cd2e66, 0xa483bf11), TOBN(0xfa08a6f5, 0xb1b2c169),
+     TOBN(0xf375e245, 0x4be9fa28), TOBN(0x99a7ffec, 0x5b6d011f),
+     TOBN(0x6a3ebddb, 0xc4ae62da), TOBN(0x6cea00ae, 0x374aef5d),
+     TOBN(0xab5fb98d, 0x9d4d05bc), TOBN(0x7cba1423, 0xd560f252),
+     TOBN(0x49b2cc21, 0x208490de), TOBN(0x1ca66ec3, 0xbcfb2879),
+     TOBN(0x7f1166b7, 0x1b6fb16f), TOBN(0xfff63e08, 0x65fe5db3),
+     TOBN(0xb8345abe, 0x8b2610be), TOBN(0xb732ed80, 0x39de3df4),
+     TOBN(0x0e24ed50, 0x211c32b4), TOBN(0xd10d8a69, 0x848ff27d),
+     TOBN(0xc1074398, 0xed4de248), TOBN(0xd7cedace, 0x10488927),
+     TOBN(0xa4aa6bf8, 0x85673e13), TOBN(0xb46bae91, 0x6daf30af),
+     TOBN(0x07088472, 0xfcef7ad8), TOBN(0x61151608, 0xd4b35e97),
+     TOBN(0xbcfe8f26, 0xdde29986), TOBN(0xeb84c4c7, 0xd5a34c79),
+     TOBN(0xc1eec55c, 0x164e1214), TOBN(0x891be86d, 0xa147bb03),
+     TOBN(0x9fab4d10, 0x0ba96835), TOBN(0xbf01e9b8, 0xa5c1ae9f),
+     TOBN(0x6b4de139, 0xb186ebc0), TOBN(0xd5c74c26, 0x85b91bca),
+     TOBN(0x5086a99c, 0xc2d93854), TOBN(0xeed62a7b, 0xa7a9dfbc),
+     TOBN(0x8778ed6f, 0x76b7618a), TOBN(0xbff750a5, 0x03b66062),
+     TOBN(0x4cb7be22, 0xb65186db), TOBN(0x369dfbf0, 0xcc3a6d13),
+     TOBN(0xc7dab26c, 0x7191a321), TOBN(0x9edac3f9, 0x40ed718e),
+     TOBN(0xbc142b36, 0xd0cfd183), TOBN(0xc8af82f6, 0x7c991693),
+     TOBN(0xb3d1e4d8, 0x97ce0b2a), TOBN(0xe6d7c87f, 0xc3a55cdf),
+     TOBN(0x35846b95, 0x68b81afe), TOBN(0x018d12af, 0xd3c239d8),
+     TOBN(0x2b2c6208, 0x01206e15), TOBN(0xe0e42453, 0xa3b882c6),
+     TOBN(0x854470a3, 0xa50162d5), TOBN(0x08157478, 0x7017a62a),
+     TOBN(0x18bd3fb4, 0x820357c7), TOBN(0x992039ae, 0x6f1458ad),
+     TOBN(0x9a1df3c5, 0x25b44aa1), TOBN(0x2d780357, 0xed3d5281),
+     TOBN(0x58cf7e4d, 0xc77ad4d4), TOBN(0xd49a7998, 0xf9df4fc4),
+     TOBN(0x4465a8b5, 0x1d71205e), TOBN(0xa0ee0ea6, 0x649254aa),
+     TOBN(0x4b5eeecf, 0xab7bd771), TOBN(0x6c873073, 0x35c262b9),
+     TOBN(0xdc5bd648, 0x3c9d61e7), TOBN(0x233d6d54, 0x321460d2),
+     TOBN(0xd20c5626, 0xfc195bcc), TOBN(0x25445958, 0x04d78b63),
+     TOBN(0xe03fcb3d, 0x17ec8ef3), TOBN(0x54b690d1, 0x46b8f781),
+     TOBN(0x82fa2c8a, 0x21230646), TOBN(0xf51aabb9, 0x084f418c),
+     TOBN(0xff4fbec1, 0x1a30ba43), TOBN(0x6a5acf73, 0x743c9df7),
+     TOBN(0x1da2b357, 0xd635b4d5), TOBN(0xc3de68dd, 0xecd5c1da),
+     TOBN(0xa689080b, 0xd61af0dd), TOBN(0xdea5938a, 0xd665bf99),
+     TOBN(0x0231d71a, 0xfe637294), TOBN(0x01968aa6, 0xa5a81cd8),
+     TOBN(0x11252d50, 0x048e63b5), TOBN(0xc446bc52, 0x6ca007e9),
+     TOBN(0xef8c50a6, 0x96d6134b), TOBN(0x9361fbf5, 0x9e09a05c),
+     TOBN(0xf17f85a6, 0xdca3291a), TOBN(0xb178d548, 0xff251a21),
+     TOBN(0x87f6374b, 0xa4df3915), TOBN(0x566ce1bf, 0x2fd5d608),
+     TOBN(0x425cba4d, 0x7de35102), TOBN(0x6b745f8f, 0x58c5d5e2),
+     TOBN(0x88402af6, 0x63122edf), TOBN(0x3190f9ed, 0x3b989a89),
+     TOBN(0x4ad3d387, 0xebba3156), TOBN(0xef385ad9, 0xc7c469a5),
+     TOBN(0xb08281de, 0x3f642c29), TOBN(0x20be0888, 0x910ffb88),
+     TOBN(0xf353dd4a, 0xd5292546), TOBN(0x3f1627de, 0x8377a262),
+     TOBN(0xa5faa013, 0xeefcd638), TOBN(0x8f3bf626, 0x74cc77c3),
+     TOBN(0x32618f65, 0xa348f55e), TOBN(0x5787c0dc, 0x9fefeb9e),
+     TOBN(0xf1673aa2, 0xd9a23e44), TOBN(0x88dfa993, 0x4e10690d),
+     TOBN(0x1ced1b36, 0x2bf91108), TOBN(0x9193ceca, 0x3af48649),
+     TOBN(0xfb34327d, 0x2d738fc5), TOBN(0x6697b037, 0x975fee6c),
+     TOBN(0x2f485da0, 0xc04079a5), TOBN(0x2cdf5735, 0x2feaa1ac),
+     TOBN(0x76944420, 0xbd55659e), TOBN(0x7973e32b, 0x4376090c),
+     TOBN(0x86bb4fe1, 0x163b591a), TOBN(0x10441aed, 0xc196f0ca),
+     TOBN(0x3b431f4a, 0x045ad915), TOBN(0x6c11b437, 0xa4afacb1),
+     TOBN(0x30b0c7db, 0x71fdbbd8), TOBN(0xb642931f, 0xeda65acd),
+     TOBN(0x4baae6e8, 0x9c92b235), TOBN(0xa73bbd0e, 0x6b3993a1),
+     TOBN(0xd06d60ec, 0x693dd031), TOBN(0x03cab91b, 0x7156881c),
+     TOBN(0xd615862f, 0x1db3574b), TOBN(0x485b0185, 0x64bb061a),
+     TOBN(0x27434988, 0xa0181e06), TOBN(0x2cd61ad4, 0xc1c0c757),
+     TOBN(0x3effed5a, 0x2ff9f403), TOBN(0x8dc98d8b, 0x62239029),
+     TOBN(0x2206021e, 0x1f17b70d), TOBN(0xafbec0ca, 0xbf510015),
+     TOBN(0x9fed7164, 0x80130dfa), TOBN(0x306dc2b5, 0x8a02dcf5),
+     TOBN(0x48f06620, 0xfeb10fc0), TOBN(0x78d1e1d5, 0x5a57cf51),
+     TOBN(0xadef8c5a, 0x192ef710), TOBN(0x88afbd4b, 0x3b7431f9),
+     TOBN(0x7e1f7407, 0x64250c9e), TOBN(0x6e31318d, 0xb58bec07),
+     TOBN(0xfd4fc4b8, 0x24f89b4e), TOBN(0x65a5dd88, 0x48c36a2a),
+     TOBN(0x4f1eccff, 0xf024baa7), TOBN(0x22a21cf2, 0xcba94650),
+     TOBN(0x95d29dee, 0x42a554f7), TOBN(0x828983a5, 0x002ec4ba),
+     TOBN(0x8112a1f7, 0x8badb73d), TOBN(0x79ea8897, 0xa27c1839),
+     TOBN(0x8969a5a7, 0xd065fd83), TOBN(0xf49af791, 0xb262a0bc),
+     TOBN(0xfcdea8b6, 0xaf2b5127), TOBN(0x10e913e1, 0x564c2dbc),
+     TOBN(0x51239d14, 0xbc21ef51), TOBN(0xe51c3ceb, 0x4ce57292),
+     TOBN(0x795ff068, 0x47bbcc3b), TOBN(0x86b46e1e, 0xbd7e11e6),
+     TOBN(0x0ea6ba23, 0x80041ef4), TOBN(0xd72fe505, 0x6262342e),
+     TOBN(0x8abc6dfd, 0x31d294d4), TOBN(0xbbe017a2, 0x1278c2c9),
+     TOBN(0xb1fcfa09, 0xb389328a), TOBN(0x322fbc62, 0xd01771b5),
+     TOBN(0x04c0d063, 0x60b045bf), TOBN(0xdb652edc, 0x10e52d01),
+     TOBN(0x50ef932c, 0x03ec6627), TOBN(0xde1b3b2d, 0xc1ee50e3),
+     TOBN(0x5ab7bdc5, 0xdc37a90d), TOBN(0xfea67213, 0x31e33a96),
+     TOBN(0x6482b5cb, 0x4f2999aa), TOBN(0x38476cc6, 0xb8cbf0dd),
+     TOBN(0x93ebfacb, 0x173405bb), TOBN(0x15cdafe7, 0xe52369ec),
+     TOBN(0xd42d5ba4, 0xd935b7db), TOBN(0x648b6004, 0x1c99a4cd),
+     TOBN(0x785101bd, 0xa3b5545b), TOBN(0x4bf2c38a, 0x9dd67faf),
+     TOBN(0xb1aadc63, 0x4442449c), TOBN(0xe0e9921a, 0x33ad4fb8),
+     TOBN(0x5c552313, 0xaa686d82), TOBN(0xdee635fa, 0x465d866c),
+     TOBN(0xbc3c224a, 0x18ee6e8a), TOBN(0xeed748a6, 0xed42e02f),
+     TOBN(0xe70f930a, 0xd474cd08), TOBN(0x774ea6ec, 0xfff24adf),
+     TOBN(0x03e2de1c, 0xf3480d4a), TOBN(0xf0d8edc7, 0xbc8acf1a),
+     TOBN(0xf23e3303, 0x68295a9c), TOBN(0xfadd5f68, 0xc546a97d),
+     TOBN(0x895597ad, 0x96f8acb1), TOBN(0xbddd49d5, 0x671bdae2),
+     TOBN(0x16fcd528, 0x21dd43f4), TOBN(0xa5a45412, 0x6619141a)}
+    ,
+    {TOBN(0x8ce9b6bf, 0xc360e25a), TOBN(0xe6425195, 0x075a1a78),
+     TOBN(0x9dc756a8, 0x481732f4), TOBN(0x83c0440f, 0x5432b57a),
+     TOBN(0xc670b3f1, 0xd720281f), TOBN(0x2205910e, 0xd135e051),
+     TOBN(0xded14b0e, 0xdb052be7), TOBN(0x697b3d27, 0xc568ea39),
+     TOBN(0x2e599b9a, 0xfb3ff9ed), TOBN(0x28c2e0ab, 0x17f6515c),
+     TOBN(0x1cbee4fd, 0x474da449), TOBN(0x071279a4, 0x4f364452),
+     TOBN(0x97abff66, 0x01fbe855), TOBN(0x3ee394e8, 0x5fda51c4),
+     TOBN(0x190385f6, 0x67597c0b), TOBN(0x6e9fccc6, 0xa27ee34b),
+     TOBN(0x0b89de93, 0x14092ebb), TOBN(0xf17256bd, 0x428e240c),
+     TOBN(0xcf89a7f3, 0x93d2f064), TOBN(0x4f57841e, 0xe1ed3b14),
+     TOBN(0x4ee14405, 0xe708d855), TOBN(0x856aae72, 0x03f1c3d0),
+     TOBN(0xc8e5424f, 0xbdd7eed5), TOBN(0x3333e4ef, 0x73ab4270),
+     TOBN(0x3bc77ade, 0xdda492f8), TOBN(0xc11a3aea, 0x78297205),
+     TOBN(0x5e89a3e7, 0x34931b4c), TOBN(0x17512e2e, 0x9f5694bb),
+     TOBN(0x5dc349f3, 0x177bf8b6), TOBN(0x232ea4ba, 0x08c7ff3e),
+     TOBN(0x9c4f9d16, 0xf511145d), TOBN(0xccf109a3, 0x33b379c3),
+     TOBN(0xe75e7a88, 0xa1f25897), TOBN(0x7ac6961f, 0xa1b5d4d8),
+     TOBN(0xe3e10773, 0x08f3ed5c), TOBN(0x208a54ec, 0x0a892dfb),
+     TOBN(0xbe826e19, 0x78660710), TOBN(0x0cf70a97, 0x237df2c8),
+     TOBN(0x418a7340, 0xed704da5), TOBN(0xa3eeb9a9, 0x08ca33fd),
+     TOBN(0x49d96233, 0x169bca96), TOBN(0x04d286d4, 0x2da6aafb),
+     TOBN(0xc09606ec, 0xa0c2fa94), TOBN(0x8869d0d5, 0x23ff0fb3),
+     TOBN(0xa99937e5, 0xd0150d65), TOBN(0xa92e2503, 0x240c14c9),
+     TOBN(0x656bf945, 0x108e2d49), TOBN(0x152a733a, 0xa2f59e2b),
+     TOBN(0xb4323d58, 0x8434a920), TOBN(0xc0af8e93, 0x622103c5),
+     TOBN(0x667518ef, 0x938dbf9a), TOBN(0xa1843073, 0x83a9cdf2),
+     TOBN(0x350a94aa, 0x5447ab80), TOBN(0xe5e5a325, 0xc75a3d61),
+     TOBN(0x74ba507f, 0x68411a9e), TOBN(0x10581fc1, 0x594f70c5),
+     TOBN(0x60e28570, 0x80eb24a9), TOBN(0x7bedfb4d, 0x488e0cfd),
+     TOBN(0x721ebbd7, 0xc259cdb8), TOBN(0x0b0da855, 0xbc6390a9),
+     TOBN(0x2b4d04db, 0xde314c70), TOBN(0xcdbf1fbc, 0x6c32e846),
+     TOBN(0x33833eab, 0xb162fc9e), TOBN(0x9939b48b, 0xb0dd3ab7),
+     TOBN(0x5aaa98a7, 0xcb0c9c8c), TOBN(0x75105f30, 0x81c4375c),
+     TOBN(0xceee5057, 0x5ef1c90f), TOBN(0xb31e065f, 0xc23a17bf),
+     TOBN(0x5364d275, 0xd4b6d45a), TOBN(0xd363f3ad, 0x62ec8996),
+     TOBN(0xb5d21239, 0x4391c65b), TOBN(0x84564765, 0xebb41b47),
+     TOBN(0x20d18ecc, 0x37107c78), TOBN(0xacff3b6b, 0x570c2a66),
+     TOBN(0x22f975d9, 0x9bd0d845), TOBN(0xef0a0c46, 0xba178fa0),
+     TOBN(0x1a419651, 0x76b6028e), TOBN(0xc49ec674, 0x248612d4),
+     TOBN(0x5b6ac4f2, 0x7338af55), TOBN(0x06145e62, 0x7bee5a36),
+     TOBN(0x33e95d07, 0xe75746b5), TOBN(0x1c1e1f6d, 0xc40c78be),
+     TOBN(0x967833ef, 0x222ff8e2), TOBN(0x4bedcf6a, 0xb49180ad),
+     TOBN(0x6b37e9c1, 0x3d7a4c8a), TOBN(0x2748887c, 0x6ddfe760),
+     TOBN(0xf7055123, 0xaa3a5bbc), TOBN(0x954ff225, 0x7bbb8e74),
+     TOBN(0xc42b8ab1, 0x97c3dfb9), TOBN(0x55a549b0, 0xcf168154),
+     TOBN(0xad6748e7, 0xc1b50692), TOBN(0x2775780f, 0x6fc5cbcb),
+     TOBN(0x4eab80b8, 0xe1c9d7c8), TOBN(0x8c69dae1, 0x3fdbcd56),
+     TOBN(0x47e6b4fb, 0x9969eace), TOBN(0x002f1085, 0xa705cb5a),
+     TOBN(0x4e23ca44, 0x6d3fea55), TOBN(0xb4ae9c86, 0xf4810568),
+     TOBN(0x47bfb91b, 0x2a62f27d), TOBN(0x60deb4c9, 0xd9bac28c),
+     TOBN(0xa892d894, 0x7de6c34c), TOBN(0x4ee68259, 0x4494587d),
+     TOBN(0x914ee14e, 0x1a3f8a5b), TOBN(0xbb113eaa, 0x28700385),
+     TOBN(0x81ca03b9, 0x2115b4c9), TOBN(0x7c163d38, 0x8908cad1),
+     TOBN(0xc912a118, 0xaa18179a), TOBN(0xe09ed750, 0x886e3081),
+     TOBN(0xa676e3fa, 0x26f516ca), TOBN(0x753cacf7, 0x8e732f91),
+     TOBN(0x51592aea, 0x833da8b4), TOBN(0xc626f42f, 0x4cbea8aa),
+     TOBN(0xef9dc899, 0xa7b56eaf), TOBN(0x00c0e52c, 0x34ef7316),
+     TOBN(0x5b1e4e24, 0xfe818a86), TOBN(0x9d31e20d, 0xc538be47),
+     TOBN(0x22eb932d, 0x3ed68974), TOBN(0xe44bbc08, 0x7c4e87c4),
+     TOBN(0x4121086e, 0x0dde9aef), TOBN(0x8e6b9cff, 0x134f4345),
+     TOBN(0x96892c1f, 0x711b0eb9), TOBN(0xb905f2c8, 0x780ab954),
+     TOBN(0xace26309, 0xa20792db), TOBN(0xec8ac9b3, 0x0684e126),
+     TOBN(0x486ad8b6, 0xb40a2447), TOBN(0x60121fc1, 0x9fe3fb24),
+     TOBN(0x5626fccf, 0x1a8e3b3f), TOBN(0x4e568622, 0x6ad1f394),
+     TOBN(0xda7aae0d, 0x196aa5a1), TOBN(0xe0df8c77, 0x1041b5fb),
+     TOBN(0x451465d9, 0x26b318b7), TOBN(0xc29b6e55, 0x7ab136e9),
+     TOBN(0x2c2ab48b, 0x71148463), TOBN(0xb5738de3, 0x64454a76),
+     TOBN(0x54ccf9a0, 0x5a03abe4), TOBN(0x377c0296, 0x0427d58e),
+     TOBN(0x73f5f0b9, 0x2bb39c1f), TOBN(0x14373f2c, 0xe608d8c5),
+     TOBN(0xdcbfd314, 0x00fbb805), TOBN(0xdf18fb20, 0x83afdcfb),
+     TOBN(0x81a57f42, 0x42b3523f), TOBN(0xe958532d, 0x87f650fb),
+     TOBN(0xaa8dc8b6, 0x8b0a7d7c), TOBN(0x1b75dfb7, 0x150166be),
+     TOBN(0x90e4f7c9, 0x2d7d1413), TOBN(0x67e2d6b5, 0x9834f597),
+     TOBN(0x4fd4f4f9, 0xa808c3e8), TOBN(0xaf8237e0, 0xd5281ec1),
+     TOBN(0x25ab5fdc, 0x84687cee), TOBN(0xc5ded6b1, 0xa5b26c09),
+     TOBN(0x8e4a5aec, 0xc8ea7650), TOBN(0x23b73e5c, 0x14cc417f),
+     TOBN(0x2bfb4318, 0x3037bf52), TOBN(0xb61e6db5, 0x78c725d7),
+     TOBN(0x8efd4060, 0xbbb3e5d7), TOBN(0x2e014701, 0xdbac488e),
+     TOBN(0xac75cf9a, 0x360aa449), TOBN(0xb70cfd05, 0x79634d08),
+     TOBN(0xa591536d, 0xfffb15ef), TOBN(0xb2c37582, 0xd07c106c),
+     TOBN(0xb4293fdc, 0xf50225f9), TOBN(0xc52e175c, 0xb0e12b03),
+     TOBN(0xf649c3ba, 0xd0a8bf64), TOBN(0x745a8fef, 0xeb8ae3c6),
+     TOBN(0x30d7e5a3, 0x58321bc3), TOBN(0xb1732be7, 0x0bc4df48),
+     TOBN(0x1f217993, 0xe9ea5058), TOBN(0xf7a71cde, 0x3e4fd745),
+     TOBN(0x86cc533e, 0x894c5bbb), TOBN(0x6915c7d9, 0x69d83082),
+     TOBN(0xa6aa2d05, 0x5815c244), TOBN(0xaeeee592, 0x49b22ce5),
+     TOBN(0x89e39d13, 0x78135486), TOBN(0x3a275c1f, 0x16b76f2f),
+     TOBN(0xdb6bcc1b, 0xe036e8f5), TOBN(0x4df69b21, 0x5e4709f5),
+     TOBN(0xa188b250, 0x2d0f39aa), TOBN(0x622118bb, 0x15a85947),
+     TOBN(0x2ebf520f, 0xfde0f4fa), TOBN(0xa40e9f29, 0x4860e539),
+     TOBN(0x7b6a51eb, 0x22b57f0f), TOBN(0x849a33b9, 0x7e80644a),
+     TOBN(0x50e5d16f, 0x1cf095fe), TOBN(0xd754b54e, 0xec55f002),
+     TOBN(0x5cfbbb22, 0x236f4a98), TOBN(0x0b0c59e9, 0x066800bb),
+     TOBN(0x4ac69a8f, 0x5a9a7774), TOBN(0x2b33f804, 0xd6bec948),
+     TOBN(0xb3729295, 0x32e6c466), TOBN(0x68956d0f, 0x4e599c73),
+     TOBN(0xa47a249f, 0x155c31cc), TOBN(0x24d80f0d, 0xe1ce284e),
+     TOBN(0xcd821dfb, 0x988baf01), TOBN(0xe6331a7d, 0xdbb16647),
+     TOBN(0x1eb8ad33, 0x094cb960), TOBN(0x593cca38, 0xc91bbca5),
+     TOBN(0x384aac8d, 0x26567456), TOBN(0x40fa0309, 0xc04b6490),
+     TOBN(0x97834cd6, 0xdab6c8f6), TOBN(0x68a7318d, 0x3f91e55f),
+     TOBN(0xa00fd04e, 0xfc4d3157), TOBN(0xb56f8ab2, 0x2bf3bdea),
+     TOBN(0x014f5648, 0x4fa57172), TOBN(0x948c5860, 0x450abdb3),
+     TOBN(0x342b5df0, 0x0ebd4f08), TOBN(0x3e5168cd, 0x0e82938e),
+     TOBN(0x7aedc1ce, 0xb0df5dd0), TOBN(0x6bbbc6d9, 0xe5732516),
+     TOBN(0xc7bfd486, 0x605daaa6), TOBN(0x46fd72b7, 0xbb9a6c9e),
+     TOBN(0xe4847fb1, 0xa124fb89), TOBN(0x75959cbd, 0xa2d8ffbc),
+     TOBN(0x42579f65, 0xc8a588ee), TOBN(0x368c92e6, 0xb80b499d),
+     TOBN(0xea4ef6cd, 0x999a5df1), TOBN(0xaa73bb7f, 0x936fe604),
+     TOBN(0xf347a70d, 0x6457d188), TOBN(0x86eda86b, 0x8b7a388b),
+     TOBN(0xb7cdff06, 0x0ccd6013), TOBN(0xbeb1b6c7, 0xd0053fb2),
+     TOBN(0x0b022387, 0x99240a9f), TOBN(0x1bbb384f, 0x776189b2),
+     TOBN(0x8695e71e, 0x9066193a), TOBN(0x2eb50097, 0x06ffac7e),
+     TOBN(0x0654a9c0, 0x4a7d2caa), TOBN(0x6f3fb3d1, 0xa5aaa290),
+     TOBN(0x835db041, 0xff476e8f), TOBN(0x540b8b0b, 0xc42295e4),
+     TOBN(0xa5c73ac9, 0x05e214f5), TOBN(0x9a74075a, 0x56a0b638),
+     TOBN(0x2e4b1090, 0xce9e680b), TOBN(0x57a5b479, 0x6b8d9afa),
+     TOBN(0x0dca48e7, 0x26bfe65c), TOBN(0x097e391c, 0x7290c307),
+     TOBN(0x683c462e, 0x6669e72e), TOBN(0xf505be1e, 0x062559ac),
+     TOBN(0x5fbe3ea1, 0xe3a3035a), TOBN(0x6431ebf6, 0x9cd50da8),
+     TOBN(0xfd169d5c, 0x1f6407f2), TOBN(0x8d838a95, 0x60fce6b8),
+     TOBN(0x2a2bfa7f, 0x650006f0), TOBN(0xdfd7dad3, 0x50c0fbb2),
+     TOBN(0x92452495, 0xccf9ad96), TOBN(0x183bf494, 0xd95635f9),
+     TOBN(0x02d5df43, 0x4a7bd989), TOBN(0x505385cc, 0xa5431095),
+     TOBN(0xdd98e67d, 0xfd43f53e), TOBN(0xd61e1a6c, 0x500c34a9),
+     TOBN(0x5a4b46c6, 0x4a8a3d62), TOBN(0x8469c4d0, 0x247743d2),
+     TOBN(0x2bb3a13d, 0x88f7e433), TOBN(0x62b23a10, 0x01be5849),
+     TOBN(0xe83596b4, 0xa63d1a4c), TOBN(0x454e7fea, 0x7d183f3e),
+     TOBN(0x643fce61, 0x17afb01c), TOBN(0x4e65e5e6, 0x1c4c3638),
+     TOBN(0x41d85ea1, 0xef74c45b), TOBN(0x2cfbfa66, 0xae328506),
+     TOBN(0x98b078f5, 0x3ada7da9), TOBN(0xd985fe37, 0xec752fbb),
+     TOBN(0xeece68fe, 0x5a0148b4), TOBN(0x6f9a55c7, 0x2d78136d),
+     TOBN(0x232dccc4, 0xd2b729ce), TOBN(0xa27e0dfd, 0x90aafbc4),
+     TOBN(0x96474452, 0x12b4603e), TOBN(0xa876c551, 0x6b706d14),
+     TOBN(0xdf145fcf, 0x69a9d412), TOBN(0xe2ab75b7, 0x2d479c34),
+     TOBN(0x12df9a76, 0x1a23ff97), TOBN(0xc6138992, 0x5d359d10),
+     TOBN(0x6e51c7ae, 0xfa835f22), TOBN(0x69a79cb1, 0xc0fcc4d9),
+     TOBN(0xf57f350d, 0x594cc7e1), TOBN(0x3079ca63, 0x3350ab79),
+     TOBN(0x226fb614, 0x9aff594a), TOBN(0x35afec02, 0x6d59a62b),
+     TOBN(0x9bee46f4, 0x06ed2c6e), TOBN(0x58da1735, 0x7d939a57),
+     TOBN(0x44c50402, 0x8fd1797e), TOBN(0xd8853e7c, 0x5ccea6ca),
+     TOBN(0x4065508d, 0xa35fcd5f), TOBN(0x8965df8c, 0x495ccaeb),
+     TOBN(0x0f2da850, 0x12e1a962), TOBN(0xee471b94, 0xc1cf1cc4),
+     TOBN(0xcef19bc8, 0x0a08fb75), TOBN(0x704958f5, 0x81de3591),
+     TOBN(0x2867f8b2, 0x3aef4f88), TOBN(0x8d749384, 0xea9f9a5f),
+     TOBN(0x1b385537, 0x8c9049f4), TOBN(0x5be948f3, 0x7b92d8b6),
+     TOBN(0xd96f725d, 0xb6e2bd6b), TOBN(0x37a222bc, 0x958c454d),
+     TOBN(0xe7c61abb, 0x8809bf61), TOBN(0x46f07fbc, 0x1346f18d),
+     TOBN(0xfb567a7a, 0xe87c0d1c), TOBN(0x84a461c8, 0x7ef3d07a),
+     TOBN(0x0a5adce6, 0xd9278d98), TOBN(0x24d94813, 0x9dfc73e1),
+     TOBN(0x4f3528b6, 0x054321c3), TOBN(0x2e03fdde, 0x692ea706),
+     TOBN(0x10e60619, 0x47b533c0), TOBN(0x1a8bc73f, 0x2ca3c055),
+     TOBN(0xae58d4b2, 0x1bb62b8f), TOBN(0xb2045a73, 0x584a24e3),
+     TOBN(0x3ab3d5af, 0xbd76e195), TOBN(0x478dd1ad, 0x6938a810),
+     TOBN(0x6ffab393, 0x6ee3d5cb), TOBN(0xdfb693db, 0x22b361e4),
+     TOBN(0xf9694496, 0x51dbf1a7), TOBN(0xcab4b4ef, 0x08a2e762),
+     TOBN(0xe8c92f25, 0xd39bba9a), TOBN(0x850e61bc, 0xf1464d96),
+     TOBN(0xb7e830e3, 0xdc09508b), TOBN(0xfaf6d2cf, 0x74317655),
+     TOBN(0x72606ceb, 0xdf690355), TOBN(0x48bb92b3, 0xd0c3ded6),
+     TOBN(0x65b75484, 0x5c7cf892), TOBN(0xf6cd7ac9, 0xd5d5f01f),
+     TOBN(0xc2c30a59, 0x96401d69), TOBN(0x91268650, 0xed921878),
+     TOBN(0x380bf913, 0xb78c558f), TOBN(0x43c0baeb, 0xc8afdaa9),
+     TOBN(0x377f61d5, 0x54f169d3), TOBN(0xf8da07e3, 0xae5ff20b),
+     TOBN(0xb676c49d, 0xa8a90ea8), TOBN(0x81c1ff2b, 0x83a29b21),
+     TOBN(0x383297ac, 0x2ad8d276), TOBN(0x3001122f, 0xba89f982),
+     TOBN(0xe1d794be, 0x6718e448), TOBN(0x246c1482, 0x7c3e6e13),
+     TOBN(0x56646ef8, 0x5d26b5ef), TOBN(0x80f5091e, 0x88069cdd),
+     TOBN(0xc5992e2f, 0x724bdd38), TOBN(0x02e915b4, 0x8471e8c7),
+     TOBN(0x96ff320a, 0x0d0ff2a9), TOBN(0xbf886487, 0x4384d1a0),
+     TOBN(0xbbe1e6a6, 0xc93f72d6), TOBN(0xd5f75d12, 0xcad800ea),
+     TOBN(0xfa40a09f, 0xe7acf117), TOBN(0x32c8cdd5, 0x7581a355),
+     TOBN(0x74221992, 0x7023c499), TOBN(0xa8afe5d7, 0x38ec3901),
+     TOBN(0x5691afcb, 0xa90e83f0), TOBN(0x41bcaa03, 0x0b8f8eac),
+     TOBN(0xe38b5ff9, 0x8d2668d5), TOBN(0x0715281a, 0x7ad81965),
+     TOBN(0x1bc8fc7c, 0x03c6ce11), TOBN(0xcbbee6e2, 0x8b650436),
+     TOBN(0x06b00fe8, 0x0cdb9808), TOBN(0x17d6e066, 0xfe3ed315),
+     TOBN(0x2e9d38c6, 0x4d0b5018), TOBN(0xab8bfd56, 0x844dcaef),
+     TOBN(0x42894a59, 0x513aed8b), TOBN(0xf77f3b6d, 0x314bd07a),
+     TOBN(0xbbdecb8f, 0x8e42b582), TOBN(0xf10e2fa8, 0xd2390fe6),
+     TOBN(0xefb95022, 0x62a2f201), TOBN(0x4d59ea50, 0x50ee32b0),
+     TOBN(0xd87f7728, 0x6da789a8), TOBN(0xcf98a2cf, 0xf79492c4),
+     TOBN(0xf9577239, 0x720943c2), TOBN(0xba044cf5, 0x3990b9d0),
+     TOBN(0x5aa8e823, 0x95f2884a), TOBN(0x834de6ed, 0x0278a0af),
+     TOBN(0xc8e1ee9a, 0x5f25bd12), TOBN(0x9259ceaa, 0x6f7ab271),
+     TOBN(0x7e6d97a2, 0x77d00b76), TOBN(0x5c0c6eea, 0xa437832a),
+     TOBN(0x5232c20f, 0x5606b81d), TOBN(0xabd7b375, 0x0d991ee5),
+     TOBN(0x4d2bfe35, 0x8632d951), TOBN(0x78f85146, 0x98ed9364),
+     TOBN(0x951873f0, 0xf30c3282), TOBN(0x0da8ac80, 0xa789230b),
+     TOBN(0x3ac7789c, 0x5398967f), TOBN(0xa69b8f7f, 0xbdda0fb5),
+     TOBN(0xe5db7717, 0x6add8545), TOBN(0x1b71cb66, 0x72c49b66),
+     TOBN(0xd8560739, 0x68421d77), TOBN(0x03840fe8, 0x83e3afea),
+     TOBN(0xb391dad5, 0x1ec69977), TOBN(0xae243fb9, 0x307f6726),
+     TOBN(0xc88ac87b, 0xe8ca160c), TOBN(0x5174cced, 0x4ce355f4),
+     TOBN(0x98a35966, 0xe58ba37d), TOBN(0xfdcc8da2, 0x7817335d),
+     TOBN(0x5b752830, 0x83fbc7bf), TOBN(0x68e419d4, 0xd9c96984),
+     TOBN(0x409a39f4, 0x02a40380), TOBN(0x88940faf, 0x1fe977bc),
+     TOBN(0xc640a94b, 0x8f8edea6), TOBN(0x1e22cd17, 0xed11547d),
+     TOBN(0xe28568ce, 0x59ffc3e2), TOBN(0x60aa1b55, 0xc1dee4e7),
+     TOBN(0xc67497c8, 0x837cb363), TOBN(0x06fb438a, 0x105a2bf2),
+     TOBN(0x30357ec4, 0x500d8e20), TOBN(0x1ad9095d, 0x0670db10),
+     TOBN(0x7f589a05, 0xc73b7cfd), TOBN(0xf544607d, 0x880d6d28),
+     TOBN(0x17ba93b1, 0xa20ef103), TOBN(0xad859130, 0x6ba6577b),
+     TOBN(0x65c91cf6, 0x6fa214a0), TOBN(0xd7d49c6c, 0x27990da5),
+     TOBN(0xecd9ec8d, 0x20bb569d), TOBN(0xbd4b2502, 0xeeffbc33),
+     TOBN(0x2056ca5a, 0x6bed0467), TOBN(0x7916a1f7, 0x5b63728c),
+     TOBN(0xd4f9497d, 0x53a4f566), TOBN(0x89734664, 0x97b56810),
+     TOBN(0xf8e1da74, 0x0494a621), TOBN(0x82546a93, 0x8d011c68),
+     TOBN(0x1f3acb19, 0xc61ac162), TOBN(0x52f8fa9c, 0xabad0d3e),
+     TOBN(0x15356523, 0xb4b7ea43), TOBN(0x5a16ad61, 0xae608125),
+     TOBN(0xb0bcb87f, 0x4faed184), TOBN(0x5f236b1d, 0x5029f45f),
+     TOBN(0xd42c7607, 0x0bc6b1fc), TOBN(0xc644324e, 0x68aefce3),
+     TOBN(0x8e191d59, 0x5c5d8446), TOBN(0xc0208077, 0x13ae1979),
+     TOBN(0xadcaee55, 0x3ba59cc7), TOBN(0x20ed6d6b, 0xa2cb81ba),
+     TOBN(0x0952ba19, 0xb6efcffc), TOBN(0x60f12d68, 0x97c0b87c),
+     TOBN(0x4ee2c7c4, 0x9caa30bc), TOBN(0x767238b7, 0x97fbff4e),
+     TOBN(0xebc73921, 0x501b5d92), TOBN(0x3279e3df, 0xc2a37737),
+     TOBN(0x9fc12bc8, 0x6d197543), TOBN(0xfa94dc6f, 0x0a40db4e),
+     TOBN(0x7392b41a, 0x530ccbbd), TOBN(0x87c82146, 0xea823525),
+     TOBN(0xa52f984c, 0x05d98d0c), TOBN(0x2ae57d73, 0x5ef6974c),
+     TOBN(0x9377f7bf, 0x3042a6dd), TOBN(0xb1a007c0, 0x19647a64),
+     TOBN(0xfaa9079a, 0x0cca9767), TOBN(0x3d81a25b, 0xf68f72d5),
+     TOBN(0x752067f8, 0xff81578e), TOBN(0x78622150, 0x9045447d),
+     TOBN(0xc0c22fcf, 0x0505aa6f), TOBN(0x1030f0a6, 0x6bed1c77),
+     TOBN(0x31f29f15, 0x1f0bd739), TOBN(0x2d7989c7, 0xe6debe85),
+     TOBN(0x5c070e72, 0x8e677e98), TOBN(0x0a817bd3, 0x06e81fd5),
+     TOBN(0xc110d830, 0xb0f2ac95), TOBN(0x48d0995a, 0xab20e64e),
+     TOBN(0x0f3e00e1, 0x7729cd9a), TOBN(0x2a570c20, 0xdd556946),
+     TOBN(0x912dbcfd, 0x4e86214d), TOBN(0x2d014ee2, 0xcf615498),
+     TOBN(0x55e2b1e6, 0x3530d76e), TOBN(0xc5135ae4, 0xfd0fd6d1),
+     TOBN(0x0066273a, 0xd4f3049f), TOBN(0xbb8e9893, 0xe7087477),
+     TOBN(0x2dba1ddb, 0x14c6e5fd), TOBN(0xdba37886, 0x51f57e6c),
+     TOBN(0x5aaee0a6, 0x5a72f2cf), TOBN(0x1208bfbf, 0x7bea5642),
+     TOBN(0xf5c6aa3b, 0x67872c37), TOBN(0xd726e083, 0x43f93224),
+     TOBN(0x1854daa5, 0x061f1658), TOBN(0xc0016df1, 0xdf0cd2b3),
+     TOBN(0xc2a3f23e, 0x833d50de), TOBN(0x73b681d2, 0xbbbd3017),
+     TOBN(0x2f046dc4, 0x3ac343c0), TOBN(0x9c847e7d, 0x85716421),
+     TOBN(0xe1e13c91, 0x0917eed4), TOBN(0x3fc9eebd, 0x63a1b9c6),
+     TOBN(0x0f816a72, 0x7fe02299), TOBN(0x6335ccc2, 0x294f3319),
+     TOBN(0x3820179f, 0x4745c5be), TOBN(0xe647b782, 0x922f066e),
+     TOBN(0xc22e49de, 0x02cafb8a), TOBN(0x299bc2ff, 0xfcc2eccc),
+     TOBN(0x9a8feea2, 0x6e0e8282), TOBN(0xa627278b, 0xfe893205),
+     TOBN(0xa7e19733, 0x7933e47b), TOBN(0xf4ff6b13, 0x2e766402),
+     TOBN(0xa4d8be0a, 0x98440d9f), TOBN(0x658f5c2f, 0x38938808),
+     TOBN(0x90b75677, 0xc95b3b3e), TOBN(0xfa044269, 0x3137b6ff),
+     TOBN(0x077b039b, 0x43c47c29), TOBN(0xcca95dd3, 0x8a6445b2),
+     TOBN(0x0b498ba4, 0x2333fc4c), TOBN(0x274f8e68, 0xf736a1b1),
+     TOBN(0x6ca348fd, 0x5f1d4b2e), TOBN(0x24d3be78, 0xa8f10199),
+     TOBN(0x8535f858, 0xca14f530), TOBN(0xa6e7f163, 0x5b982e51),
+     TOBN(0x847c8512, 0x36e1bf62), TOBN(0xf6a7c58e, 0x03448418),
+     TOBN(0x583f3703, 0xf9374ab6), TOBN(0x864f9195, 0x6e564145),
+     TOBN(0x33bc3f48, 0x22526d50), TOBN(0x9f323c80, 0x1262a496),
+     TOBN(0xaa97a7ae, 0x3f046a9a), TOBN(0x70da183e, 0xdf8a039a),
+     TOBN(0x5b68f71c, 0x52aa0ba6), TOBN(0x9be0fe51, 0x21459c2d),
+     TOBN(0xc1e17eb6, 0xcbc613e5), TOBN(0x33131d55, 0x497ea61c),
+     TOBN(0x2f69d39e, 0xaf7eded5), TOBN(0x73c2f434, 0xde6af11b),
+     TOBN(0x4ca52493, 0xa4a375fa), TOBN(0x5f06787c, 0xb833c5c2),
+     TOBN(0x814e091f, 0x3e6e71cf), TOBN(0x76451f57, 0x8b746666)}
+    ,
+    {TOBN(0x80f9bdef, 0x694db7e0), TOBN(0xedca8787, 0xb9fcddc6),
+     TOBN(0x51981c34, 0x03b8dce1), TOBN(0x4274dcf1, 0x70e10ba1),
+     TOBN(0xf72743b8, 0x6def6d1a), TOBN(0xd25b1670, 0xebdb1866),
+     TOBN(0xc4491e8c, 0x050c6f58), TOBN(0x2be2b2ab, 0x87fbd7f5),
+     TOBN(0x3e0e5c9d, 0xd111f8ec), TOBN(0xbcc33f8d, 0xb7c4e760),
+     TOBN(0x702f9a91, 0xbd392a51), TOBN(0x7da4a795, 0xc132e92d),
+     TOBN(0x1a0b0ae3, 0x0bb1151b), TOBN(0x54febac8, 0x02e32251),
+     TOBN(0xea3a5082, 0x694e9e78), TOBN(0xe58ffec1, 0xe4fe40b8),
+     TOBN(0xf85592fc, 0xd1e0cf9e), TOBN(0xdea75f0d, 0xc0e7b2e8),
+     TOBN(0xc04215cf, 0xc135584e), TOBN(0x174fc727, 0x2f57092a),
+     TOBN(0xe7277877, 0xeb930bea), TOBN(0x504caccb, 0x5eb02a5a),
+     TOBN(0xf9fe08f7, 0xf5241b9b), TOBN(0xe7fb62f4, 0x8d5ca954),
+     TOBN(0xfbb8349d, 0x29c4120b), TOBN(0x9f94391f, 0xc0d0d915),
+     TOBN(0xc4074fa7, 0x5410ba51), TOBN(0xa66adbf6, 0x150a5911),
+     TOBN(0xc164543c, 0x34bfca38), TOBN(0xe0f27560, 0xb9e1ccfc),
+     TOBN(0x99da0f53, 0xe820219c), TOBN(0xe8234498, 0xc6b4997a),
+     TOBN(0xcfb88b76, 0x9d4c5423), TOBN(0x9e56eb10, 0xb0521c49),
+     TOBN(0x418e0b5e, 0xbe8700a1), TOBN(0x00cbaad6, 0xf93cb58a),
+     TOBN(0xe923fbde, 0xd92a5e67), TOBN(0xca4979ac, 0x1f347f11),
+     TOBN(0x89162d85, 0x6bc0585b), TOBN(0xdd6254af, 0xac3c70e3),
+     TOBN(0x7b23c513, 0x516e19e4), TOBN(0x56e2e847, 0xc5c4d593),
+     TOBN(0x9f727d73, 0x5ce71ef6), TOBN(0x5b6304a6, 0xf79a44c5),
+     TOBN(0x6638a736, 0x3ab7e433), TOBN(0x1adea470, 0xfe742f83),
+     TOBN(0xe054b854, 0x5b7fc19f), TOBN(0xf935381a, 0xba1d0698),
+     TOBN(0x546eab2d, 0x799e9a74), TOBN(0x96239e0e, 0xa949f729),
+     TOBN(0xca274c6b, 0x7090055a), TOBN(0x835142c3, 0x9020c9b0),
+     TOBN(0xa405667a, 0xa2e8807f), TOBN(0x29f2c085, 0x1aa3d39e),
+     TOBN(0xcc555d64, 0x42fc72f5), TOBN(0xe856e0e7, 0xfbeacb3c),
+     TOBN(0xb5504f9d, 0x918e4936), TOBN(0x65035ef6, 0xb2513982),
+     TOBN(0x0553a0c2, 0x6f4d9cb9), TOBN(0x6cb10d56, 0xbea85509),
+     TOBN(0x48d957b7, 0xa242da11), TOBN(0x16a4d3dd, 0x672b7268),
+     TOBN(0x3d7e637c, 0x8502a96b), TOBN(0x27c7032b, 0x730d463b),
+     TOBN(0xbdc02b18, 0xe4136a14), TOBN(0xbacf969d, 0x678e32bf),
+     TOBN(0xc98d89a3, 0xdd9c3c03), TOBN(0x7b92420a, 0x23becc4f),
+     TOBN(0xd4b41f78, 0xc64d565c), TOBN(0x9f969d00, 0x10f28295),
+     TOBN(0xec7f7f76, 0xb13d051a), TOBN(0x08945e1e, 0xa92da585),
+     TOBN(0x55366b7d, 0x5846426f), TOBN(0xe7d09e89, 0x247d441d),
+     TOBN(0x510b404d, 0x736fbf48), TOBN(0x7fa003d0, 0xe784bd7d),
+     TOBN(0x25f7614f, 0x17fd9596), TOBN(0x49e0e0a1, 0x35cb98db),
+     TOBN(0x2c65957b, 0x2e83a76a), TOBN(0x5d40da8d, 0xcddbe0f8),
+     TOBN(0xf2b8c405, 0x050bad24), TOBN(0x8918426d, 0xc2aa4823),
+     TOBN(0x2aeab3dd, 0xa38365a7), TOBN(0x72031717, 0x7c91b690),
+     TOBN(0x8b00d699, 0x60a94120), TOBN(0x478a255d, 0xe99eaeec),
+     TOBN(0xbf656a5f, 0x6f60aafd), TOBN(0xdfd7cb75, 0x5dee77b3),
+     TOBN(0x37f68bb4, 0xa595939d), TOBN(0x03556479, 0x28740217),
+     TOBN(0x8e740e7c, 0x84ad7612), TOBN(0xd89bc843, 0x9044695f),
+     TOBN(0xf7f3da5d, 0x85a9184d), TOBN(0x562563bb, 0x9fc0b074),
+     TOBN(0x06d2e6aa, 0xf88a888e), TOBN(0x612d8643, 0x161fbe7c),
+     TOBN(0x465edba7, 0xf64085e7), TOBN(0xb230f304, 0x29aa8511),
+     TOBN(0x53388426, 0xcda2d188), TOBN(0x90885735, 0x4b666649),
+     TOBN(0x6f02ff9a, 0x652f54f6), TOBN(0x65c82294, 0x5fae2bf0),
+     TOBN(0x7816ade0, 0x62f5eee3), TOBN(0xdcdbdf43, 0xfcc56d70),
+     TOBN(0x9fb3bba3, 0x54530bb2), TOBN(0xbde3ef77, 0xcb0869ea),
+     TOBN(0x89bc9046, 0x0b431163), TOBN(0x4d03d7d2, 0xe4819a35),
+     TOBN(0x33ae4f9e, 0x43b6a782), TOBN(0x216db307, 0x9c88a686),
+     TOBN(0x91dd88e0, 0x00ffedd9), TOBN(0xb280da9f, 0x12bd4840),
+     TOBN(0x32a7cb8a, 0x1635e741), TOBN(0xfe14008a, 0x78be02a7),
+     TOBN(0x3fafb334, 0x1b7ae030), TOBN(0x7fd508e7, 0x5add0ce9),
+     TOBN(0x72c83219, 0xd607ad51), TOBN(0x0f229c0a, 0x8d40964a),
+     TOBN(0x1be2c336, 0x1c878da2), TOBN(0xe0c96742, 0xeab2ab86),
+     TOBN(0x458f8691, 0x3e538cd7), TOBN(0xa7001f6c, 0x8e08ad53),
+     TOBN(0x52b8c6e6, 0xbf5d15ff), TOBN(0x548234a4, 0x011215dd),
+     TOBN(0xff5a9d2d, 0x3d5b4045), TOBN(0xb0ffeeb6, 0x4a904190),
+     TOBN(0x55a3aca4, 0x48607f8b), TOBN(0x8cbd665c, 0x30a0672a),
+     TOBN(0x87f834e0, 0x42583068), TOBN(0x02da2aeb, 0xf3f6e683),
+     TOBN(0x6b763e5d, 0x05c12248), TOBN(0x7230378f, 0x65a8aefc),
+     TOBN(0x93bd80b5, 0x71e8e5ca), TOBN(0x53ab041c, 0xb3b62524),
+     TOBN(0x1b860513, 0x6c9c552e), TOBN(0xe84d402c, 0xd5524e66),
+     TOBN(0xa37f3573, 0xf37f5937), TOBN(0xeb0f6c7d, 0xd1e4fca5),
+     TOBN(0x2965a554, 0xac8ab0fc), TOBN(0x17fbf56c, 0x274676ac),
+     TOBN(0x2e2f6bd9, 0xacf7d720), TOBN(0x41fc8f88, 0x10224766),
+     TOBN(0x517a14b3, 0x85d53bef), TOBN(0xdae327a5, 0x7d76a7d1),
+     TOBN(0x6ad0a065, 0xc4818267), TOBN(0x33aa189b, 0x37c1bbc1),
+     TOBN(0x64970b52, 0x27392a92), TOBN(0x21699a1c, 0x2d1535ea),
+     TOBN(0xcd20779c, 0xc2d7a7fd), TOBN(0xe3186059, 0x99c83cf2),
+     TOBN(0x9b69440b, 0x72c0b8c7), TOBN(0xa81497d7, 0x7b9e0e4d),
+     TOBN(0x515d5c89, 0x1f5f82dc), TOBN(0x9a7f67d7, 0x6361079e),
+     TOBN(0xa8da81e3, 0x11a35330), TOBN(0xe44990c4, 0x4b18be1b),
+     TOBN(0xc7d5ed95, 0xaf103e59), TOBN(0xece8aba7, 0x8dac9261),
+     TOBN(0xbe82b099, 0x9394b8d3), TOBN(0x6830f09a, 0x16adfe83),
+     TOBN(0x250a29b4, 0x88172d01), TOBN(0x8b20bd65, 0xcaff9e02),
+     TOBN(0xb8a7661e, 0xe8a6329a), TOBN(0x4520304d, 0xd3fce920),
+     TOBN(0xae45da1f, 0x2b47f7ef), TOBN(0xe07f5288, 0x5bffc540),
+     TOBN(0xf7997009, 0x3464f874), TOBN(0x2244c2cd, 0xa6fa1f38),
+     TOBN(0x43c41ac1, 0x94d7d9b1), TOBN(0x5bafdd82, 0xc82e7f17),
+     TOBN(0xdf0614c1, 0x5fda0fca), TOBN(0x74b043a7, 0xa8ae37ad),
+     TOBN(0x3ba6afa1, 0x9e71734c), TOBN(0x15d5437e, 0x9c450f2e),
+     TOBN(0x4a5883fe, 0x67e242b1), TOBN(0x5143bdc2, 0x2c1953c2),
+     TOBN(0x542b8b53, 0xfc5e8920), TOBN(0x363bf9a8, 0x9a9cee08),
+     TOBN(0x02375f10, 0xc3486e08), TOBN(0x2037543b, 0x8c5e70d2),
+     TOBN(0x7109bccc, 0x625640b4), TOBN(0xcbc1051e, 0x8bc62c3b),
+     TOBN(0xf8455fed, 0x803f26ea), TOBN(0x6badceab, 0xeb372424),
+     TOBN(0xa2a9ce7c, 0x6b53f5f9), TOBN(0x64246595, 0x1b176d99),
+     TOBN(0xb1298d36, 0xb95c081b), TOBN(0x53505bb8, 0x1d9a9ee6),
+     TOBN(0x3f6f9e61, 0xf2ba70b0), TOBN(0xd07e16c9, 0x8afad453),
+     TOBN(0x9f1694bb, 0xe7eb4a6a), TOBN(0xdfebced9, 0x3cb0bc8e),
+     TOBN(0x92d3dcdc, 0x53868c8b), TOBN(0x174311a2, 0x386107a6),
+     TOBN(0x4109e07c, 0x689b4e64), TOBN(0x30e4587f, 0x2df3dcb6),
+     TOBN(0x841aea31, 0x0811b3b2), TOBN(0x6144d41d, 0x0cce43ea),
+     TOBN(0x464c4581, 0x2a9a7803), TOBN(0xd03d371f, 0x3e158930),
+     TOBN(0xc676d7f2, 0xb1f3390b), TOBN(0x9f7a1b8c, 0xa5b61272),
+     TOBN(0x4ebebfc9, 0xc2e127a9), TOBN(0x4602500c, 0x5dd997bf),
+     TOBN(0x7f09771c, 0x4711230f), TOBN(0x058eb37c, 0x020f09c1),
+     TOBN(0xab693d4b, 0xfee5e38b), TOBN(0x9289eb1f, 0x4653cbc0),
+     TOBN(0xbecf46ab, 0xd51b9cf5), TOBN(0xd2aa9c02, 0x9f0121af),
+     TOBN(0x36aaf7d2, 0xe90dc274), TOBN(0x909e4ea0, 0x48b95a3c),
+     TOBN(0xe6b70496, 0x6f32dbdb), TOBN(0x672188a0, 0x8b030b3e),
+     TOBN(0xeeffe5b3, 0xcfb617e2), TOBN(0x87e947de, 0x7c82709e),
+     TOBN(0xa44d2b39, 0x1770f5a7), TOBN(0xe4d4d791, 0x0e44eb82),
+     TOBN(0x42e69d1e, 0x3f69712a), TOBN(0xbf11c4d6, 0xac6a820e),
+     TOBN(0xb5e7f3e5, 0x42c4224c), TOBN(0xd6b4e81c, 0x449d941c),
+     TOBN(0x5d72bd16, 0x5450e878), TOBN(0x6a61e28a, 0xee25ac54),
+     TOBN(0x33272094, 0xe6f1cd95), TOBN(0x7512f30d, 0x0d18673f),
+     TOBN(0x32f7a4ca, 0x5afc1464), TOBN(0x2f095656, 0x6bbb977b),
+     TOBN(0x586f47ca, 0xa8226200), TOBN(0x02c868ad, 0x1ac07369),
+     TOBN(0x4ef2b845, 0xc613acbe), TOBN(0x43d7563e, 0x0386054c),
+     TOBN(0x54da9dc7, 0xab952578), TOBN(0xb5423df2, 0x26e84d0b),
+     TOBN(0xa8b64eeb, 0x9b872042), TOBN(0xac205782, 0x5990f6df),
+     TOBN(0x4ff696eb, 0x21f4c77a), TOBN(0x1a79c3e4, 0xaab273af),
+     TOBN(0x29bc922e, 0x9436b3f1), TOBN(0xff807ef8, 0xd6d9a27a),
+     TOBN(0x82acea3d, 0x778f22a0), TOBN(0xfb10b2e8, 0x5b5e7469),
+     TOBN(0xc0b16980, 0x2818ee7d), TOBN(0x011afff4, 0xc91c1a2f),
+     TOBN(0x95a6d126, 0xad124418), TOBN(0x31c081a5, 0xe72e295f),
+     TOBN(0x36bb283a, 0xf2f4db75), TOBN(0xd115540f, 0x7acef462),
+     TOBN(0xc7f3a8f8, 0x33f6746c), TOBN(0x21e46f65, 0xfea990ca),
+     TOBN(0x915fd5c5, 0xcaddb0a9), TOBN(0xbd41f016, 0x78614555),
+     TOBN(0x346f4434, 0x426ffb58), TOBN(0x80559436, 0x14dbc204),
+     TOBN(0xf3dd20fe, 0x5a969b7f), TOBN(0x9d59e956, 0xe899a39a),
+     TOBN(0xf1b0971c, 0x8ad4cf4b), TOBN(0x03448860, 0x2ffb8fb8),
+     TOBN(0xf071ac3c, 0x65340ba4), TOBN(0x408d0596, 0xb27fd758),
+     TOBN(0xe7c78ea4, 0x98c364b0), TOBN(0xa4aac4a5, 0x051e8ab5),
+     TOBN(0xb9e1d560, 0x485d9002), TOBN(0x9acd518a, 0x88844455),
+     TOBN(0xe4ca688f, 0xd06f56c0), TOBN(0xa48af70d, 0xdf027972),
+     TOBN(0x691f0f04, 0x5e9a609d), TOBN(0xa9dd82cd, 0xee61270e),
+     TOBN(0x8903ca63, 0xa0ef18d3), TOBN(0x9fb7ee35, 0x3d6ca3bd),
+     TOBN(0xa7b4a09c, 0xabf47d03), TOBN(0x4cdada01, 0x1c67de8e),
+     TOBN(0x52003749, 0x9355a244), TOBN(0xe77fd2b6, 0x4f2151a9),
+     TOBN(0x695d6cf6, 0x66b4efcb), TOBN(0xc5a0cacf, 0xda2cfe25),
+     TOBN(0x104efe5c, 0xef811865), TOBN(0xf52813e8, 0x9ea5cc3d),
+     TOBN(0x855683dc, 0x40b58dbc), TOBN(0x0338ecde, 0x175fcb11),
+     TOBN(0xf9a05637, 0x74921592), TOBN(0xb4f1261d, 0xb9bb9d31),
+     TOBN(0x551429b7, 0x4e9c5459), TOBN(0xbe182e6f, 0x6ea71f53),
+     TOBN(0xd3a3b07c, 0xdfc50573), TOBN(0x9ba1afda, 0x62be8d44),
+     TOBN(0x9bcfd2cb, 0x52ab65d3), TOBN(0xdf11d547, 0xa9571802),
+     TOBN(0x099403ee, 0x02a2404a), TOBN(0x497406f4, 0x21088a71),
+     TOBN(0x99479409, 0x5004ae71), TOBN(0xbdb42078, 0xa812c362),
+     TOBN(0x2b72a30f, 0xd8828442), TOBN(0x283add27, 0xfcb5ed1c),
+     TOBN(0xf7c0e200, 0x66a40015), TOBN(0x3e3be641, 0x08b295ef),
+     TOBN(0xac127dc1, 0xe038a675), TOBN(0x729deff3, 0x8c5c6320),
+     TOBN(0xb7df8fd4, 0xa90d2c53), TOBN(0x9b74b0ec, 0x681e7cd3),
+     TOBN(0x5cb5a623, 0xdab407e5), TOBN(0xcdbd3615, 0x76b340c6),
+     TOBN(0xa184415a, 0x7d28392c), TOBN(0xc184c1d8, 0xe96f7830),
+     TOBN(0xc3204f19, 0x81d3a80f), TOBN(0xfde0c841, 0xc8e02432),
+     TOBN(0x78203b3e, 0x8149e0c1), TOBN(0x5904bdbb, 0x08053a73),
+     TOBN(0x30fc1dd1, 0x101b6805), TOBN(0x43c223bc, 0x49aa6d49),
+     TOBN(0x9ed67141, 0x7a174087), TOBN(0x311469a0, 0xd5997008),
+     TOBN(0xb189b684, 0x5e43fc61), TOBN(0xf3282375, 0xe0d3ab57),
+     TOBN(0x4fa34b67, 0xb1181da8), TOBN(0x621ed0b2, 0x99ee52b8),
+     TOBN(0x9b178de1, 0xad990676), TOBN(0xd51de67b, 0x56d54065),
+     TOBN(0x2a2c27c4, 0x7538c201), TOBN(0x33856ec8, 0x38a40f5c),
+     TOBN(0x2522fc15, 0xbe6cdcde), TOBN(0x1e603f33, 0x9f0c6f89),
+     TOBN(0x7994edc3, 0x103e30a6), TOBN(0x033a00db, 0x220c853e),
+     TOBN(0xd3cfa409, 0xf7bb7fd7), TOBN(0x70f8781e, 0x462d18f6),
+     TOBN(0xbbd82980, 0x687fe295), TOBN(0x6eef4c32, 0x595669f3),
+     TOBN(0x86a9303b, 0x2f7e85c3), TOBN(0x5fce4621, 0x71988f9b),
+     TOBN(0x5b935bf6, 0xc138acb5), TOBN(0x30ea7d67, 0x25661212),
+     TOBN(0xef1eb5f4, 0xe51ab9a2), TOBN(0x0587c98a, 0xae067c78),
+     TOBN(0xb3ce1b3c, 0x77ca9ca6), TOBN(0x2a553d4d, 0x54b5f057),
+     TOBN(0xc7898236, 0x4da29ec2), TOBN(0xdbdd5d13, 0xb9c57316),
+     TOBN(0xc57d6e6b, 0x2cd80d47), TOBN(0x80b460cf, 0xfe9e7391),
+     TOBN(0x98648cab, 0xf963c31e), TOBN(0x67f9f633, 0xcc4d32fd),
+     TOBN(0x0af42a9d, 0xfdf7c687), TOBN(0x55f292a3, 0x0b015ea7),
+     TOBN(0x89e468b2, 0xcd21ab3d), TOBN(0xe504f022, 0xc393d392),
+     TOBN(0xab21e1d4, 0xa5013af9), TOBN(0xe3283f78, 0xc2c28acb),
+     TOBN(0xf38b35f6, 0x226bf99f), TOBN(0xe8354274, 0x0e291e69),
+     TOBN(0x61673a15, 0xb20c162d), TOBN(0xc101dc75, 0xb04fbdbe),
+     TOBN(0x8323b4c2, 0x255bd617), TOBN(0x6c969693, 0x6c2a9154),
+     TOBN(0xc6e65860, 0x62679387), TOBN(0x8e01db0c, 0xb8c88e23),
+     TOBN(0x33c42873, 0x893a5559), TOBN(0x7630f04b, 0x47a3e149),
+     TOBN(0xb5d80805, 0xddcf35f8), TOBN(0x582ca080, 0x77dfe732),
+     TOBN(0x2c7156e1, 0x0b1894a0), TOBN(0x92034001, 0xd81c68c0),
+     TOBN(0xed225d00, 0xc8b115b5), TOBN(0x237f9c22, 0x83b907f2),
+     TOBN(0x0ea2f32f, 0x4470e2c0), TOBN(0xb725f7c1, 0x58be4e95),
+     TOBN(0x0f1dcafa, 0xb1ae5463), TOBN(0x59ed5187, 0x1ba2fc04),
+     TOBN(0xf6e0f316, 0xd0115d4d), TOBN(0x5180b12f, 0xd3691599),
+     TOBN(0x157e32c9, 0x527f0a41), TOBN(0x7b0b081d, 0xa8e0ecc0),
+     TOBN(0x6dbaaa8a, 0xbf4f0dd0), TOBN(0x99b289c7, 0x4d252696),
+     TOBN(0x79b7755e, 0xdbf864fe), TOBN(0x6974e2b1, 0x76cad3ab),
+     TOBN(0x35dbbee2, 0x06ddd657), TOBN(0xe7cbdd11, 0x2ff3a96d),
+     TOBN(0x88381968, 0x076be758), TOBN(0x2d737e72, 0x08c91f5d),
+     TOBN(0x5f83ab62, 0x86ec3776), TOBN(0x98aa649d, 0x945fa7a1),
+     TOBN(0xf477ec37, 0x72ef0933), TOBN(0x66f52b1e, 0x098c17b1),
+     TOBN(0x9eec58fb, 0xd803738b), TOBN(0x91aaade7, 0xe4e86aa4),
+     TOBN(0x6b1ae617, 0xa5b51492), TOBN(0x63272121, 0xbbc45974),
+     TOBN(0x7e0e28f0, 0x862c5129), TOBN(0x0a8f79a9, 0x3321a4a0),
+     TOBN(0xe26d1664, 0x5041c88f), TOBN(0x0571b805, 0x53233e3a),
+     TOBN(0xd1b0ccde, 0xc9520711), TOBN(0x55a9e4ed, 0x3c8b84bf),
+     TOBN(0x9426bd39, 0xa1fef314), TOBN(0x4f5f638e, 0x6eb93f2b),
+     TOBN(0xba2a1ed3, 0x2bf9341b), TOBN(0xd63c1321, 0x4d42d5a9),
+     TOBN(0xd2964a89, 0x316dc7c5), TOBN(0xd1759606, 0xca511851),
+     TOBN(0xd8a9201f, 0xf9e6ed35), TOBN(0xb7b5ee45, 0x6736925a),
+     TOBN(0x0a83fbbc, 0x99581af7), TOBN(0x3076bc40, 0x64eeb051),
+     TOBN(0x5511c98c, 0x02dec312), TOBN(0x270de898, 0x238dcb78),
+     TOBN(0x2cf4cf9c, 0x539c08c9), TOBN(0xa70cb65e, 0x38d3b06e),
+     TOBN(0xb12ec10e, 0xcfe57bbd), TOBN(0x82c7b656, 0x35a0c2b5),
+     TOBN(0xddc7d5cd, 0x161c67bd), TOBN(0xe32e8985, 0xae3a32cc),
+     TOBN(0x7aba9444, 0xd11a5529), TOBN(0xe964ed02, 0x2427fa1a),
+     TOBN(0x1528392d, 0x24a1770a), TOBN(0xa152ce2c, 0x12c72fcd),
+     TOBN(0x714553a4, 0x8ec07649), TOBN(0x18b4c290, 0x459dd453),
+     TOBN(0xea32b714, 0x7b64b110), TOBN(0xb871bfa5, 0x2e6f07a2),
+     TOBN(0xb67112e5, 0x9e2e3c9b), TOBN(0xfbf250e5, 0x44aa90f6),
+     TOBN(0xf77aedb8, 0xbd539006), TOBN(0x3b0cdf9a, 0xd172a66f),
+     TOBN(0xedf69fea, 0xf8c51187), TOBN(0x05bb67ec, 0x741e4da7),
+     TOBN(0x47df0f32, 0x08114345), TOBN(0x56facb07, 0xbb9792b1),
+     TOBN(0xf3e007e9, 0x8f6229e4), TOBN(0x62d103f4, 0x526fba0f),
+     TOBN(0x4f33bef7, 0xb0339d79), TOBN(0x9841357b, 0xb59bfec1),
+     TOBN(0xfa8dbb59, 0xc34e6705), TOBN(0xc3c7180b, 0x7fdaa84c),
+     TOBN(0xf95872fc, 0xa4108537), TOBN(0x8750cc3b, 0x932a3e5a),
+     TOBN(0xb61cc69d, 0xb7275d7d), TOBN(0xffa0168b, 0x2e59b2e9),
+     TOBN(0xca032abc, 0x6ecbb493), TOBN(0x1d86dbd3, 0x2c9082d8),
+     TOBN(0xae1e0b67, 0xe28ef5ba), TOBN(0x2c9a4699, 0xcb18e169),
+     TOBN(0x0ecd0e33, 0x1e6bbd20), TOBN(0x571b360e, 0xaf5e81d2),
+     TOBN(0xcd9fea58, 0x101c1d45), TOBN(0x6651788e, 0x18880452),
+     TOBN(0xa9972635, 0x1f8dd446), TOBN(0x44bed022, 0xe37281d0),
+     TOBN(0x094b2b2d, 0x33da525d), TOBN(0xf193678e, 0x13144fd8),
+     TOBN(0xb8ab5ba4, 0xf4c1061d), TOBN(0x4343b5fa, 0xdccbe0f4),
+     TOBN(0xa8702371, 0x63812713), TOBN(0x47bf6d2d, 0xf7611d93),
+     TOBN(0x46729b8c, 0xbd21e1d7), TOBN(0x7484d4e0, 0xd629e77d),
+     TOBN(0x830e6eea, 0x60dbac1f), TOBN(0x23d8c484, 0xda06a2f7),
+     TOBN(0x896714b0, 0x50ca535b), TOBN(0xdc8d3644, 0xebd97a9b),
+     TOBN(0x106ef9fa, 0xb12177b4), TOBN(0xf79bf464, 0x534d5d9c),
+     TOBN(0x2537a349, 0xa6ab360b), TOBN(0xc7c54253, 0xa00c744f),
+     TOBN(0xb3c7a047, 0xe5911a76), TOBN(0x61ffa5c8, 0x647f1ee7),
+     TOBN(0x15aed36f, 0x8f56ab42), TOBN(0x6a0d41b0, 0xa3ff9ac9),
+     TOBN(0x68f469f5, 0xcc30d357), TOBN(0xbe9adf81, 0x6b72be96),
+     TOBN(0x1cd926fe, 0x903ad461), TOBN(0x7e89e38f, 0xcaca441b),
+     TOBN(0xf0f82de5, 0xfacf69d4), TOBN(0x363b7e76, 0x4775344c),
+     TOBN(0x6894f312, 0xb2e36d04), TOBN(0x3c6cb4fe, 0x11d1c9a5),
+     TOBN(0x85d9c339, 0x4008e1f2), TOBN(0x5e9a85ea, 0x249f326c),
+     TOBN(0xdc35c60a, 0x678c5e06), TOBN(0xc08b944f, 0x9f86fba9),
+     TOBN(0xde40c02c, 0x89f71f0f), TOBN(0xad8f3e31, 0xff3da3c0),
+     TOBN(0x3ea5096b, 0x42125ded), TOBN(0x13879cbf, 0xa7379183),
+     TOBN(0x6f4714a5, 0x6b306a0b), TOBN(0x359c2ea6, 0x67646c5e),
+     TOBN(0xfacf8943, 0x07726368), TOBN(0x07a58935, 0x65ff431e),
+     TOBN(0x24d661d1, 0x68754ab0), TOBN(0x801fce1d, 0x6f429a76),
+     TOBN(0xc068a85f, 0xa58ce769), TOBN(0xedc35c54, 0x5d5eca2b),
+     TOBN(0xea31276f, 0xa3f660d1), TOBN(0xa0184ebe, 0xb8fc7167),
+     TOBN(0x0f20f21a, 0x1d8db0ae), TOBN(0xd96d095f, 0x56c35e12),
+     TOBN(0xedf402b5, 0xf8c2a25b), TOBN(0x1bb772b9, 0x059204b6),
+     TOBN(0x50cbeae2, 0x19b4e34c), TOBN(0x93109d80, 0x3fa0845a),
+     TOBN(0x54f7ccf7, 0x8ef59fb5), TOBN(0x3b438fe2, 0x88070963),
+     TOBN(0x9e28c659, 0x31f3ba9b), TOBN(0x9cc31b46, 0xead9da92),
+     TOBN(0x3c2f0ba9, 0xb733aa5f), TOBN(0xdece47cb, 0xf05af235),
+     TOBN(0xf8e3f715, 0xa2ac82a5), TOBN(0xc97ba641, 0x2203f18a),
+     TOBN(0xc3af5504, 0x09c11060), TOBN(0x56ea2c05, 0x46af512d),
+     TOBN(0xfac28daf, 0xf3f28146), TOBN(0x87fab43a, 0x959ef494),}
+    ,
+    {TOBN(0x09891641, 0xd4c5105f), TOBN(0x1ae80f8e, 0x6d7fbd65),
+     TOBN(0x9d67225f, 0xbee6bdb0), TOBN(0x3b433b59, 0x7fc4d860),
+     TOBN(0x44e66db6, 0x93e85638), TOBN(0xf7b59252, 0xe3e9862f),
+     TOBN(0xdb785157, 0x665c32ec), TOBN(0x702fefd7, 0xae362f50),
+     TOBN(0x3754475d, 0x0fefb0c3), TOBN(0xd48fb56b, 0x46d7c35d),
+     TOBN(0xa070b633, 0x363798a4), TOBN(0xae89f3d2, 0x8fdb98e6),
+     TOBN(0x970b89c8, 0x6363d14c), TOBN(0x89817521, 0x67abd27d),
+     TOBN(0x9bf7d474, 0x44d5a021), TOBN(0xb3083baf, 0xcac72aee),
+     TOBN(0x389741de, 0xbe949a44), TOBN(0x638e9388, 0x546a4fa5),
+     TOBN(0x3fe6419c, 0xa0047bdc), TOBN(0x7047f648, 0xaaea57ca),
+     TOBN(0x54e48a90, 0x41fbab17), TOBN(0xda8e0b28, 0x576bdba2),
+     TOBN(0xe807eebc, 0xc72afddc), TOBN(0x07d3336d, 0xf42577bf),
+     TOBN(0x62a8c244, 0xbfe20925), TOBN(0x91c19ac3, 0x8fdce867),
+     TOBN(0x5a96a5d5, 0xdd387063), TOBN(0x61d587d4, 0x21d324f6),
+     TOBN(0xe87673a2, 0xa37173ea), TOBN(0x23848008, 0x53778b65),
+     TOBN(0x10f8441e, 0x05bab43e), TOBN(0xfa11fe12, 0x4621efbe),
+     TOBN(0x047b772e, 0x81685d7b), TOBN(0x23f27d81, 0xbf34a976),
+     TOBN(0xc27608e2, 0x915f48ef), TOBN(0x3b0b43fa, 0xa521d5c3),
+     TOBN(0x7613fb26, 0x63ca7284), TOBN(0x7f5729b4, 0x1d4db837),
+     TOBN(0x87b14898, 0x583b526b), TOBN(0x00b732a6, 0xbbadd3d1),
+     TOBN(0x8e02f426, 0x2048e396), TOBN(0x436b50b6, 0x383d9de4),
+     TOBN(0xf78d3481, 0x471e85ad), TOBN(0x8b01ea6a, 0xd005c8d6),
+     TOBN(0xd3c7afee, 0x97015c07), TOBN(0x46cdf1a9, 0x4e3ba2ae),
+     TOBN(0x7a42e501, 0x83d3a1d2), TOBN(0xd54b5268, 0xb541dff4),
+     TOBN(0x3f24cf30, 0x4e23e9bc), TOBN(0x4387f816, 0x126e3624),
+     TOBN(0x26a46a03, 0x3b0b6d61), TOBN(0xaf1bc845, 0x8b2d777c),
+     TOBN(0x25c401ba, 0x527de79c), TOBN(0x0e1346d4, 0x4261bbb6),
+     TOBN(0x4b96c44b, 0x287b4bc7), TOBN(0x658493c7, 0x5254562f),
+     TOBN(0x23f949fe, 0xb8a24a20), TOBN(0x17ebfed1, 0xf52ca53f),
+     TOBN(0x9b691bbe, 0xbcfb4853), TOBN(0x5617ff6b, 0x6278a05d),
+     TOBN(0x241b34c5, 0xe3c99ebd), TOBN(0xfc64242e, 0x1784156a),
+     TOBN(0x4206482f, 0x695d67df), TOBN(0xb967ce0e, 0xee27c011),
+     TOBN(0x65db3751, 0x21c80b5d), TOBN(0x2e7a563c, 0xa31ecca0),
+     TOBN(0xe56ffc4e, 0x5238a07e), TOBN(0x3d6c2966, 0x32ced854),
+     TOBN(0xe99d7d1a, 0xaf70b885), TOBN(0xafc3bad9, 0x2d686459),
+     TOBN(0x9c78bf46, 0x0cc8ba5b), TOBN(0x5a439519, 0x18955aa3),
+     TOBN(0xf8b517a8, 0x5fe4e314), TOBN(0xe60234d0, 0xfcb8906f),
+     TOBN(0xffe542ac, 0xf2061b23), TOBN(0x287e191f, 0x6b4cb59c),
+     TOBN(0x21857ddc, 0x09d877d8), TOBN(0x1c23478c, 0x14678941),
+     TOBN(0xbbf0c056, 0xb6e05ea4), TOBN(0x82da4b53, 0xb01594fe),
+     TOBN(0xf7526791, 0xfadb8608), TOBN(0x049e832d, 0x7b74cdf6),
+     TOBN(0xa43581cc, 0xc2b90a34), TOBN(0x73639eb8, 0x9360b10c),
+     TOBN(0x4fba331f, 0xe1e4a71b), TOBN(0x6ffd6b93, 0x8072f919),
+     TOBN(0x6e53271c, 0x65679032), TOBN(0x67206444, 0xf14272ce),
+     TOBN(0xc0f734a3, 0xb2335834), TOBN(0x9526205a, 0x90ef6860),
+     TOBN(0xcb8be717, 0x04e2bb0d), TOBN(0x2418871e, 0x02f383fa),
+     TOBN(0xd7177681, 0x4082c157), TOBN(0xcc914ad0, 0x29c20073),
+     TOBN(0xf186c1eb, 0xe587e728), TOBN(0x6fdb3c22, 0x61bcd5fd),
+     TOBN(0x30d014a6, 0xf2f9f8e9), TOBN(0x963ece23, 0x4fec49d2),
+     TOBN(0x862025c5, 0x9605a8d9), TOBN(0x39874445, 0x19f8929a),
+     TOBN(0x01b6ff65, 0x12bf476a), TOBN(0x598a64d8, 0x09cf7d91),
+     TOBN(0xd7ec7749, 0x93be56ca), TOBN(0x10899785, 0xcbb33615),
+     TOBN(0xb8a092fd, 0x02eee3ad), TOBN(0xa86b3d35, 0x30145270),
+     TOBN(0x323d98c6, 0x8512b675), TOBN(0x4b8bc785, 0x62ebb40f),
+     TOBN(0x7d301f54, 0x413f9cde), TOBN(0xa5e4fb4f, 0x2bab5664),
+     TOBN(0x1d2b252d, 0x1cbfec23), TOBN(0xfcd576bb, 0xe177120d),
+     TOBN(0x04427d3e, 0x83731a34), TOBN(0x2bb9028e, 0xed836e8e),
+     TOBN(0xb36acff8, 0xb612ca7c), TOBN(0xb88fe5ef, 0xd3d9c73a),
+     TOBN(0xbe2a6bc6, 0xedea4eb3), TOBN(0x43b93133, 0x488eec77),
+     TOBN(0xf41ff566, 0xb17106e1), TOBN(0x469e9172, 0x654efa32),
+     TOBN(0xb4480f04, 0x41c23fa3), TOBN(0xb4712eb0, 0xc1989a2e),
+     TOBN(0x3ccbba0f, 0x93a29ca7), TOBN(0x6e205c14, 0xd619428c),
+     TOBN(0x90db7957, 0xb3641686), TOBN(0x0432691d, 0x45ac8b4e),
+     TOBN(0x07a759ac, 0xf64e0350), TOBN(0x0514d89c, 0x9c972517),
+     TOBN(0x1701147f, 0xa8e67fc3), TOBN(0x9e2e0b8b, 0xab2085be),
+     TOBN(0xd5651824, 0xac284e57), TOBN(0x890d4325, 0x74893664),
+     TOBN(0x8a7c5e6e, 0xc55e68a3), TOBN(0xbf12e90b, 0x4339c85a),
+     TOBN(0x31846b85, 0xf922b655), TOBN(0x9a54ce4d, 0x0bf4d700),
+     TOBN(0xd7f4e83a, 0xf1a14295), TOBN(0x916f955c, 0xb285d4f9),
+     TOBN(0xe57bb0e0, 0x99ffdaba), TOBN(0x28a43034, 0xeab0d152),
+     TOBN(0x0a36ffa2, 0xb8a9cef8), TOBN(0x5517407e, 0xb9ec051a),
+     TOBN(0x9c796096, 0xea68e672), TOBN(0x853db5fb, 0xfb3c77fb),
+     TOBN(0x21474ba9, 0xe864a51a), TOBN(0x6c267699, 0x6e8a1b8b),
+     TOBN(0x7c823626, 0x94120a28), TOBN(0xe61e9a48, 0x8383a5db),
+     TOBN(0x7dd75003, 0x9f84216d), TOBN(0xab020d07, 0xad43cd85),
+     TOBN(0x9437ae48, 0xda12c659), TOBN(0x6449c2eb, 0xe65452ad),
+     TOBN(0xcc7c4c1c, 0x2cf9d7c1), TOBN(0x1320886a, 0xee95e5ab),
+     TOBN(0xbb7b9056, 0xbeae170c), TOBN(0xc8a5b250, 0xdbc0d662),
+     TOBN(0x4ed81432, 0xc11d2303), TOBN(0x7da66912, 0x1f03769f),
+     TOBN(0x3ac7a5fd, 0x84539828), TOBN(0x14dada94, 0x3bccdd02),
+     TOBN(0x8b84c321, 0x7ef6b0d1), TOBN(0x52a9477a, 0x7c933f22),
+     TOBN(0x5ef6728a, 0xfd440b82), TOBN(0x5c3bd859, 0x6ce4bd5e),
+     TOBN(0x918b80f5, 0xf22c2d3e), TOBN(0x368d5040, 0xb7bb6cc5),
+     TOBN(0xb66142a1, 0x2695a11c), TOBN(0x60ac583a, 0xeb19ea70),
+     TOBN(0x317cbb98, 0x0eab2437), TOBN(0x8cc08c55, 0x5e2654c8),
+     TOBN(0xfe2d6520, 0xe6d8307f), TOBN(0xe9f147f3, 0x57428993),
+     TOBN(0x5f9c7d14, 0xd2fd6cf1), TOBN(0xa3ecd064, 0x2d4fcbb0),
+     TOBN(0xad83fef0, 0x8e7341f7), TOBN(0x643f23a0, 0x3a63115c),
+     TOBN(0xd38a78ab, 0xe65ab743), TOBN(0xbf7c75b1, 0x35edc89c),
+     TOBN(0x3dd8752e, 0x530df568), TOBN(0xf85c4a76, 0xe308c682),
+     TOBN(0x4c9955b2, 0xe68acf37), TOBN(0xa544df3d, 0xab32af85),
+     TOBN(0x4b8ec3f5, 0xa25cf493), TOBN(0x4d8f2764, 0x1a622feb),
+     TOBN(0x7bb4f7aa, 0xf0dcbc49), TOBN(0x7de551f9, 0x70bbb45b),
+     TOBN(0xcfd0f3e4, 0x9f2ca2e5), TOBN(0xece58709, 0x1f5c76ef),
+     TOBN(0x32920edd, 0x167d79ae), TOBN(0x039df8a2, 0xfa7d7ec1),
+     TOBN(0xf46206c0, 0xbb30af91), TOBN(0x1ff5e2f5, 0x22676b59),
+     TOBN(0x11f4a039, 0x6ea51d66), TOBN(0x506c1445, 0x807d7a26),
+     TOBN(0x60da5705, 0x755a9b24), TOBN(0x8fc8cc32, 0x1f1a319e),
+     TOBN(0x83642d4d, 0x9433d67d), TOBN(0x7fa5cb8f, 0x6a7dd296),
+     TOBN(0x576591db, 0x9b7bde07), TOBN(0x13173d25, 0x419716fb),
+     TOBN(0xea30599d, 0xd5b340ff), TOBN(0xfc6b5297, 0xb0fe76c5),
+     TOBN(0x1c6968c8, 0xab8f5adc), TOBN(0xf723c7f5, 0x901c928d),
+     TOBN(0x4203c321, 0x9773d402), TOBN(0xdf7c6aa3, 0x1b51dd47),
+     TOBN(0x3d49e37a, 0x552be23c), TOBN(0x57febee8, 0x0b5a6e87),
+     TOBN(0xc5ecbee4, 0x7bd8e739), TOBN(0x79d44994, 0xae63bf75),
+     TOBN(0x168bd00f, 0x38fb8923), TOBN(0x75d48ee4, 0xd0533130),
+     TOBN(0x554f77aa, 0xdb5cdf33), TOBN(0x3396e896, 0x3c696769),
+     TOBN(0x2fdddbf2, 0xd3fd674e), TOBN(0xbbb8f6ee, 0x99d0e3e5),
+     TOBN(0x51b90651, 0xcbae2f70), TOBN(0xefc4bc05, 0x93aaa8eb),
+     TOBN(0x8ecd8689, 0xdd1df499), TOBN(0x1aee99a8, 0x22f367a5),
+     TOBN(0x95d485b9, 0xae8274c5), TOBN(0x6c14d445, 0x7d30b39c),
+     TOBN(0xbafea90b, 0xbcc1ef81), TOBN(0x7c5f317a, 0xa459a2ed),
+     TOBN(0x01211075, 0x4ef44227), TOBN(0xa17bed6e, 0xdc20f496),
+     TOBN(0x0cdfe424, 0x819853cd), TOBN(0x13793298, 0xf71e2ce7),
+     TOBN(0x3c1f3078, 0xdbbe307b), TOBN(0x6dd1c20e, 0x76ee9936),
+     TOBN(0x23ee4b57, 0x423caa20), TOBN(0x4ac3793b, 0x8efb840e),
+     TOBN(0x934438eb, 0xed1f8ca0), TOBN(0x3e546658, 0x4ebb25a2),
+     TOBN(0xc415af0e, 0xc069896f), TOBN(0xc13eddb0, 0x9a5aa43d),
+     TOBN(0x7a04204f, 0xd49eb8f6), TOBN(0xd0d5bdfc, 0xd74f1670),
+     TOBN(0x3697e286, 0x56fc0558), TOBN(0x10207371, 0x01cebade),
+     TOBN(0x5f87e690, 0x0647a82b), TOBN(0x908e0ed4, 0x8f40054f),
+     TOBN(0xa9f633d4, 0x79853803), TOBN(0x8ed13c9a, 0x4a28b252),
+     TOBN(0x3e2ef676, 0x1f460f64), TOBN(0x53930b9b, 0x36d06336),
+     TOBN(0x347073ac, 0x8fc4979b), TOBN(0x84380e0e, 0x5ecd5597),
+     TOBN(0xe3b22c6b, 0xc4fe3c39), TOBN(0xba4a8153, 0x6c7bebdf),
+     TOBN(0xf23ab6b7, 0x25693459), TOBN(0x53bc3770, 0x14922b11),
+     TOBN(0x4645c8ab, 0x5afc60db), TOBN(0xaa022355, 0x20b9f2a3),
+     TOBN(0x52a2954c, 0xce0fc507), TOBN(0x8c2731bb, 0x7ce1c2e7),
+     TOBN(0xf39608ab, 0x18a0339d), TOBN(0xac7a658d, 0x3735436c),
+     TOBN(0xb22c2b07, 0xcd992b4f), TOBN(0x4e83daec, 0xf40dcfd4),
+     TOBN(0x8a34c7be, 0x2f39ea3e), TOBN(0xef0c005f, 0xb0a56d2e),
+     TOBN(0x62731f6a, 0x6edd8038), TOBN(0x5721d740, 0x4e3cb075),
+     TOBN(0x1ea41511, 0xfbeeee1b), TOBN(0xd1ef5e73, 0xef1d0c05),
+     TOBN(0x42feefd1, 0x73c07d35), TOBN(0xe530a00a, 0x8a329493),
+     TOBN(0x5d55b7fe, 0xf15ebfb0), TOBN(0x549de03c, 0xd322491a),
+     TOBN(0xf7b5f602, 0x745b3237), TOBN(0x3632a3a2, 0x1ab6e2b6),
+     TOBN(0x0d3bba89, 0x0ef59f78), TOBN(0x0dfc6443, 0xc9e52b9a),
+     TOBN(0x1dc79699, 0x72631447), TOBN(0xef033917, 0xb3be20b1),
+     TOBN(0x0c92735d, 0xb1383948), TOBN(0xc1fc29a2, 0xc0dd7d7d),
+     TOBN(0x6485b697, 0x403ed068), TOBN(0x13bfaab3, 0xaac93bdc),
+     TOBN(0x410dc6a9, 0x0deeaf52), TOBN(0xb003fb02, 0x4c641c15),
+     TOBN(0x1384978c, 0x5bc504c4), TOBN(0x37640487, 0x864a6a77),
+     TOBN(0x05991bc6, 0x222a77da), TOBN(0x62260a57, 0x5e47eb11),
+     TOBN(0xc7af6613, 0xf21b432c), TOBN(0x22f3acc9, 0xab4953e9),
+     TOBN(0x52934922, 0x8e41d155), TOBN(0x4d024568, 0x3ac059ef),
+     TOBN(0xb0201755, 0x4d884411), TOBN(0xce8055cf, 0xa59a178f),
+     TOBN(0xcd77d1af, 0xf6204549), TOBN(0xa0a00a3e, 0xc7066759),
+     TOBN(0x471071ef, 0x0272c229), TOBN(0x009bcf6b, 0xd3c4b6b0),
+     TOBN(0x2a2638a8, 0x22305177), TOBN(0xd51d59df, 0x41645bbf),
+     TOBN(0xa81142fd, 0xc0a7a3c0), TOBN(0xa17eca6d, 0x4c7063ee),
+     TOBN(0x0bb887ed, 0x60d9dcec), TOBN(0xd6d28e51, 0x20ad2455),
+     TOBN(0xebed6308, 0xa67102ba), TOBN(0x042c3114, 0x8bffa408),
+     TOBN(0xfd099ac5, 0x8aa68e30), TOBN(0x7a6a3d7c, 0x1483513e),
+     TOBN(0xffcc6b75, 0xba2d8f0c), TOBN(0x54dacf96, 0x1e78b954),
+     TOBN(0xf645696f, 0xa4a9af89), TOBN(0x3a411940, 0x06ac98ec),
+     TOBN(0x41b8b3f6, 0x22a67a20), TOBN(0x2d0b1e0f, 0x99dec626),
+     TOBN(0x27c89192, 0x40be34e8), TOBN(0xc7162b37, 0x91907f35),
+     TOBN(0x90188ec1, 0xa956702b), TOBN(0xca132f7d, 0xdf93769c),
+     TOBN(0x3ece44f9, 0x0e2025b4), TOBN(0x67aaec69, 0x0c62f14c),
+     TOBN(0xad741418, 0x22e3cc11), TOBN(0xcf9b75c3, 0x7ff9a50e),
+     TOBN(0x02fa2b16, 0x4d348272), TOBN(0xbd99d61a, 0x9959d56d),
+     TOBN(0xbc4f19db, 0x18762916), TOBN(0xcc7cce50, 0x49c1ac80),
+     TOBN(0x4d59ebaa, 0xd846bd83), TOBN(0x8775a9dc, 0xa9202849),
+     TOBN(0x07ec4ae1, 0x6e1f4ca9), TOBN(0x27eb5875, 0xba893f11),
+     TOBN(0x00284d51, 0x662cc565), TOBN(0x82353a6b, 0x0db4138d),
+     TOBN(0xd9c7aaaa, 0xaa32a594), TOBN(0xf5528b5e, 0xa5669c47),
+     TOBN(0xf3220231, 0x2f23c5ff), TOBN(0xe3e8147a, 0x6affa3a1),
+     TOBN(0xfb423d5c, 0x202ddda0), TOBN(0x3d6414ac, 0x6b871bd4),
+     TOBN(0x586f82e1, 0xa51a168a), TOBN(0xb712c671, 0x48ae5448),
+     TOBN(0x9a2e4bd1, 0x76233eb8), TOBN(0x0188223a, 0x78811ca9),
+     TOBN(0x553c5e21, 0xf7c18de1), TOBN(0x7682e451, 0xb27bb286),
+     TOBN(0x3ed036b3, 0x0e51e929), TOBN(0xf487211b, 0xec9cb34f),
+     TOBN(0x0d094277, 0x0c24efc8), TOBN(0x0349fd04, 0xbef737a4),
+     TOBN(0x6d1c9dd2, 0x514cdd28), TOBN(0x29c135ff, 0x30da9521),
+     TOBN(0xea6e4508, 0xf78b0b6f), TOBN(0x176f5dd2, 0x678c143c),
+     TOBN(0x08148418, 0x4be21e65), TOBN(0x27f7525c, 0xe7df38c4),
+     TOBN(0x1fb70e09, 0x748ab1a4), TOBN(0x9cba50a0, 0x5efe4433),
+     TOBN(0x7846c7a6, 0x15f75af2), TOBN(0x2a7c2c57, 0x5ee73ea8),
+     TOBN(0x42e566a4, 0x3f0a449a), TOBN(0x45474c3b, 0xad90fc3d),
+     TOBN(0x7447be3d, 0x8b61d057), TOBN(0x3e9d1cf1, 0x3a4ec092),
+     TOBN(0x1603e453, 0xf380a6e6), TOBN(0x0b86e431, 0x9b1437c2),
+     TOBN(0x7a4173f2, 0xef29610a), TOBN(0x8fa729a7, 0xf03d57f7),
+     TOBN(0x3e186f6e, 0x6c9c217e), TOBN(0xbe1d3079, 0x91919524),
+     TOBN(0x92a62a70, 0x153d4fb1), TOBN(0x32ed3e34, 0xd68c2f71),
+     TOBN(0xd785027f, 0x9eb1a8b7), TOBN(0xbc37eb77, 0xc5b22fe8),
+     TOBN(0x466b34f0, 0xb9d6a191), TOBN(0x008a89af, 0x9a05f816),
+     TOBN(0x19b028fb, 0x7d42c10a), TOBN(0x7fe8c92f, 0x49b3f6b8),
+     TOBN(0x58907cc0, 0xa5a0ade3), TOBN(0xb3154f51, 0x559d1a7c),
+     TOBN(0x5066efb6, 0xd9790ed6), TOBN(0xa77a0cbc, 0xa6aa793b),
+     TOBN(0x1a915f3c, 0x223e042e), TOBN(0x1c5def04, 0x69c5874b),
+     TOBN(0x0e830078, 0x73b6c1da), TOBN(0x55cf85d2, 0xfcd8557a),
+     TOBN(0x0f7c7c76, 0x0460f3b1), TOBN(0x87052acb, 0x46e58063),
+     TOBN(0x09212b80, 0x907eae66), TOBN(0x3cb068e0, 0x4d721c89),
+     TOBN(0xa87941ae, 0xdd45ac1c), TOBN(0xde8d5c0d, 0x0daa0dbb),
+     TOBN(0xda421fdc, 0xe3502e6e), TOBN(0xc8944201, 0x4d89a084),
+     TOBN(0x7307ba5e, 0xf0c24bfb), TOBN(0xda212beb, 0x20bde0ef),
+     TOBN(0xea2da24b, 0xf82ce682), TOBN(0x058d3816, 0x07f71fe4),
+     TOBN(0x35a02462, 0x5ffad8de), TOBN(0xcd7b05dc, 0xaadcefab),
+     TOBN(0xd442f8ed, 0x1d9f54ec), TOBN(0x8be3d618, 0xb2d3b5ca),
+     TOBN(0xe2220ed0, 0xe06b2ce2), TOBN(0x82699a5f, 0x1b0da4c0),
+     TOBN(0x3ff106f5, 0x71c0c3a7), TOBN(0x8f580f5a, 0x0d34180c),
+     TOBN(0x4ebb120e, 0x22d7d375), TOBN(0x5e5782cc, 0xe9513675),
+     TOBN(0x2275580c, 0x99c82a70), TOBN(0xe8359fbf, 0x15ea8c4c),
+     TOBN(0x53b48db8, 0x7b415e70), TOBN(0xaacf2240, 0x100c6014),
+     TOBN(0x9faaccf5, 0xe4652f1d), TOBN(0xbd6fdd2a, 0xd56157b2),
+     TOBN(0xa4f4fb1f, 0x6261ec50), TOBN(0x244e55ad, 0x476bcd52),
+     TOBN(0x881c9305, 0x047d320b), TOBN(0x1ca983d5, 0x6181263f),
+     TOBN(0x354e9a44, 0x278fb8ee), TOBN(0xad2dbc0f, 0x396e4964),
+     TOBN(0x723f3aa2, 0x9268b3de), TOBN(0x0d1ca29a, 0xe6e0609a),
+     TOBN(0x794866aa, 0x6cf44252), TOBN(0x0b59f3e3, 0x01af87ed),
+     TOBN(0xe234e5ff, 0x7f4a6c51), TOBN(0xa8768fd2, 0x61dc2f7e),
+     TOBN(0xdafc7332, 0x0a94d81f), TOBN(0xd7f84282, 0x06938ce1),
+     TOBN(0xae0b3c0e, 0x0546063e), TOBN(0x7fbadcb2, 0x5d61abc6),
+     TOBN(0xd5d7a2c9, 0x369ac400), TOBN(0xa5978d09, 0xae67d10c),
+     TOBN(0x290f211e, 0x4f85eaac), TOBN(0xe61e2ad1, 0xfacac681),
+     TOBN(0xae125225, 0x388384cd), TOBN(0xa7fb68e9, 0xccfde30f),
+     TOBN(0x7a59b936, 0x3daed4c2), TOBN(0x80a9aa40, 0x2606f789),
+     TOBN(0xb40c1ea5, 0xf6a6d90a), TOBN(0x948364d3, 0x514d5885),
+     TOBN(0x062ebc60, 0x70985182), TOBN(0xa6db5b0e, 0x33310895),
+     TOBN(0x64a12175, 0xe329c2f5), TOBN(0xc5f25bd2, 0x90ea237e),
+     TOBN(0x7915c524, 0x2d0a4c23), TOBN(0xeb5d26e4, 0x6bb3cc52),
+     TOBN(0x369a9116, 0xc09e2c92), TOBN(0x0c527f92, 0xcf182cf8),
+     TOBN(0x9e591938, 0x2aede0ac), TOBN(0xb2922208, 0x6cc34939),
+     TOBN(0x3c9d8962, 0x99a34361), TOBN(0x3c81836d, 0xc1905fe6),
+     TOBN(0x4bfeb57f, 0xa001ec5a), TOBN(0xe993f5bb, 0xa0dc5dba),
+     TOBN(0x47884109, 0x724a1380), TOBN(0x8a0369ab, 0x32fe9a04),
+     TOBN(0xea068d60, 0x8c927db8), TOBN(0xbf5f37cf, 0x94655741),
+     TOBN(0x47d402a2, 0x04b6c7ea), TOBN(0x4551c295, 0x6af259cb),
+     TOBN(0x698b71e7, 0xed77ee8b), TOBN(0xbddf7bd0, 0xf309d5c7),
+     TOBN(0x6201c22c, 0x34e780ca), TOBN(0xab04f7d8, 0x4c295ef4),
+     TOBN(0x1c947294, 0x4313a8ce), TOBN(0xe532e4ac, 0x92ca4cfe),
+     TOBN(0x89738f80, 0xd0a7a97a), TOBN(0xec088c88, 0xa580fd5b),
+     TOBN(0x612b1ecc, 0x42ce9e51), TOBN(0x8f9840fd, 0xb25fdd2a),
+     TOBN(0x3cda78c0, 0x01e7f839), TOBN(0x546b3d3a, 0xece05480),
+     TOBN(0x271719a9, 0x80d30916), TOBN(0x45497107, 0x584c20c4),
+     TOBN(0xaf8f9478, 0x5bc78608), TOBN(0x28c7d484, 0x277e2a4c),
+     TOBN(0xfce01767, 0x88a2ffe4), TOBN(0xdc506a35, 0x28e169a5),
+     TOBN(0x0ea10861, 0x7af9c93a), TOBN(0x1ed24361, 0x03fa0e08),
+     TOBN(0x96eaaa92, 0xa3d694e7), TOBN(0xc0f43b4d, 0xef50bc74),
+     TOBN(0xce6aa58c, 0x64114db4), TOBN(0x8218e8ea, 0x7c000fd4),
+     TOBN(0xac815dfb, 0x185f8844), TOBN(0xcd7e90cb, 0x1557abfb),
+     TOBN(0x23d16655, 0xafbfecdf), TOBN(0x80f3271f, 0x085cac4a),
+     TOBN(0x7fc39aa7, 0xd0e62f47), TOBN(0x88d519d1, 0x460a48e5),
+     TOBN(0x59559ac4, 0xd28f101e), TOBN(0x7981d9e9, 0xca9ae816),
+     TOBN(0x5c38652c, 0x9ac38203), TOBN(0x86eaf87f, 0x57657fe5),
+     TOBN(0x568fc472, 0xe21f5416), TOBN(0x2afff39c, 0xe7e597b5),
+     TOBN(0x3adbbb07, 0x256d4eab), TOBN(0x22598692, 0x8285ab89),
+     TOBN(0x35f8112a, 0x041caefe), TOBN(0x95df02e3, 0xa5064c8b),
+     TOBN(0x4d63356e, 0xc7004bf3), TOBN(0x230a08f4, 0xdb83c7de),
+     TOBN(0xca27b270, 0x8709a7b7), TOBN(0x0d1c4cc4, 0xcb9abd2d),
+     TOBN(0x8a0bc66e, 0x7550fee8), TOBN(0x369cd4c7, 0x9cf7247e),
+     TOBN(0x75562e84, 0x92b5b7e7), TOBN(0x8fed0da0, 0x5802af7b),
+     TOBN(0x6a7091c2, 0xe48fb889), TOBN(0x26882c13, 0x7b8a9d06),
+     TOBN(0xa2498663, 0x1b82a0e2), TOBN(0x844ed736, 0x3518152d),
+     TOBN(0x282f476f, 0xd86e27c7), TOBN(0xa04edaca, 0x04afefdc),
+     TOBN(0x8b256ebc, 0x6119e34d), TOBN(0x56a413e9, 0x0787d78b),}
+    ,
+    {TOBN(0x82ee061d, 0x5a74be50), TOBN(0xe41781c4, 0xdea16ff5),
+     TOBN(0xe0b0c81e, 0x99bfc8a2), TOBN(0x624f4d69, 0x0b547e2d),
+     TOBN(0x3a83545d, 0xbdcc9ae4), TOBN(0x2573dbb6, 0x409b1e8e),
+     TOBN(0x482960c4, 0xa6c93539), TOBN(0xf01059ad, 0x5ae18798),
+     TOBN(0x715c9f97, 0x3112795f), TOBN(0xe8244437, 0x984e6ee1),
+     TOBN(0x55cb4858, 0xecb66bcd), TOBN(0x7c136735, 0xabaffbee),
+     TOBN(0x54661595, 0x5dbec38e), TOBN(0x51c0782c, 0x388ad153),
+     TOBN(0x9ba4c53a, 0xc6e0952f), TOBN(0x27e6782a, 0x1b21dfa8),
+     TOBN(0x682f903d, 0x4ed2dbc2), TOBN(0x0eba59c8, 0x7c3b2d83),
+     TOBN(0x8e9dc84d, 0x9c7e9335), TOBN(0x5f9b21b0, 0x0eb226d7),
+     TOBN(0xe33bd394, 0xaf267bae), TOBN(0xaa86cc25, 0xbe2e15ae),
+     TOBN(0x4f0bf67d, 0x6a8ec500), TOBN(0x5846aa44, 0xf9630658),
+     TOBN(0xfeb09740, 0xe2c2bf15), TOBN(0x627a2205, 0xa9e99704),
+     TOBN(0xec8d73d0, 0xc2fbc565), TOBN(0x223eed8f, 0xc20c8de8),
+     TOBN(0x1ee32583, 0xa8363b49), TOBN(0x1a0b6cb9, 0xc9c2b0a6),
+     TOBN(0x49f7c3d2, 0x90dbc85c), TOBN(0xa8dfbb97, 0x1ef4c1ac),
+     TOBN(0xafb34d4c, 0x65c7c2ab), TOBN(0x1d4610e7, 0xe2c5ea84),
+     TOBN(0x893f6d1b, 0x973c4ab5), TOBN(0xa3cdd7e9, 0x945ba5c4),
+     TOBN(0x60514983, 0x064417ee), TOBN(0x1459b23c, 0xad6bdf2b),
+     TOBN(0x23b2c341, 0x5cf726c3), TOBN(0x3a829635, 0x32d6354a),
+     TOBN(0x294f901f, 0xab192c18), TOBN(0xec5fcbfe, 0x7030164f),
+     TOBN(0xe2e2fcb7, 0xe2246ba6), TOBN(0x1e7c88b3, 0x221a1a0c),
+     TOBN(0x72c7dd93, 0xc92d88c5), TOBN(0x41c2148e, 0x1106fb59),
+     TOBN(0x547dd4f5, 0xa0f60f14), TOBN(0xed9b52b2, 0x63960f31),
+     TOBN(0x6c8349eb, 0xb0a5b358), TOBN(0xb154c5c2, 0x9e7e2ed6),
+     TOBN(0xcad5eccf, 0xeda462db), TOBN(0xf2d6dbe4, 0x2de66b69),
+     TOBN(0x426aedf3, 0x8665e5b2), TOBN(0x488a8513, 0x7b7f5723),
+     TOBN(0x15cc43b3, 0x8bcbb386), TOBN(0x27ad0af3, 0xd791d879),
+     TOBN(0xc16c236e, 0x846e364f), TOBN(0x7f33527c, 0xdea50ca0),
+     TOBN(0xc4810775, 0x0926b86d), TOBN(0x6c2a3609, 0x0598e70c),
+     TOBN(0xa6755e52, 0xf024e924), TOBN(0xe0fa07a4, 0x9db4afca),
+     TOBN(0x15c3ce7d, 0x66831790), TOBN(0x5b4ef350, 0xa6cbb0d6),
+     TOBN(0x2c4aafc4, 0xb6205969), TOBN(0x42563f02, 0xf6c7854f),
+     TOBN(0x016aced5, 0x1d983b48), TOBN(0xfeb356d8, 0x99949755),
+     TOBN(0x8c2a2c81, 0xd1a39bd7), TOBN(0x8f44340f, 0xe6934ae9),
+     TOBN(0x148cf91c, 0x447904da), TOBN(0x7340185f, 0x0f51a926),
+     TOBN(0x2f8f00fb, 0x7409ab46), TOBN(0x057e78e6, 0x80e289b2),
+     TOBN(0x03e5022c, 0xa888e5d1), TOBN(0x3c87111a, 0x9dede4e2),
+     TOBN(0x5b9b0e1c, 0x7809460b), TOBN(0xe751c852, 0x71c9abc7),
+     TOBN(0x8b944e28, 0xc7cc1dc9), TOBN(0x4f201ffa, 0x1d3cfa08),
+     TOBN(0x02fc905c, 0x3e6721ce), TOBN(0xd52d70da, 0xd0b3674c),
+     TOBN(0x5dc2e5ca, 0x18810da4), TOBN(0xa984b273, 0x5c69dd99),
+     TOBN(0x63b92527, 0x84de5ca4), TOBN(0x2f1c9872, 0xc852dec4),
+     TOBN(0x18b03593, 0xc2e3de09), TOBN(0x19d70b01, 0x9813dc2f),
+     TOBN(0x42806b2d, 0xa6dc1d29), TOBN(0xd3030009, 0xf871e144),
+     TOBN(0xa1feb333, 0xaaf49276), TOBN(0xb5583b9e, 0xc70bc04b),
+     TOBN(0x1db0be78, 0x95695f20), TOBN(0xfc841811, 0x89d012b5),
+     TOBN(0x6409f272, 0x05f61643), TOBN(0x40d34174, 0xd5883128),
+     TOBN(0xd79196f5, 0x67419833), TOBN(0x6059e252, 0x863b7b08),
+     TOBN(0x84da1817, 0x1c56700c), TOBN(0x5758ee56, 0xb28d3ec4),
+     TOBN(0x7da2771d, 0x013b0ea6), TOBN(0xfddf524b, 0x54c5e9b9),
+     TOBN(0x7df4faf8, 0x24305d80), TOBN(0x58f5c1bf, 0x3a97763f),
+     TOBN(0xa5af37f1, 0x7c696042), TOBN(0xd4cba22c, 0x4a2538de),
+     TOBN(0x211cb995, 0x9ea42600), TOBN(0xcd105f41, 0x7b069889),
+     TOBN(0xb1e1cf19, 0xddb81e74), TOBN(0x472f2d89, 0x5157b8ca),
+     TOBN(0x086fb008, 0xee9db885), TOBN(0x365cd570, 0x0f26d131),
+     TOBN(0x284b02bb, 0xa2be7053), TOBN(0xdcbbf7c6, 0x7ab9a6d6),
+     TOBN(0x4425559c, 0x20f7a530), TOBN(0x961f2dfa, 0x188767c8),
+     TOBN(0xe2fd9435, 0x70dc80c4), TOBN(0x104d6b63, 0xf0784120),
+     TOBN(0x7f592bc1, 0x53567122), TOBN(0xf6bc1246, 0xf688ad77),
+     TOBN(0x05214c05, 0x0f15dde9), TOBN(0xa47a76a8, 0x0d5f2b82),
+     TOBN(0xbb254d30, 0x62e82b62), TOBN(0x11a05fe0, 0x3ec955ee),
+     TOBN(0x7eaff46e, 0x9d529b36), TOBN(0x55ab1301, 0x8f9e3df6),
+     TOBN(0xc463e371, 0x99317698), TOBN(0xfd251438, 0xccda47ad),
+     TOBN(0xca9c3547, 0x23d695ea), TOBN(0x48ce626e, 0x16e589b5),
+     TOBN(0x6b5b64c7, 0xb187d086), TOBN(0xd02e1794, 0xb2207948),
+     TOBN(0x8b58e98f, 0x7198111d), TOBN(0x90ca6305, 0xdcf9c3cc),
+     TOBN(0x5691fe72, 0xf34089b0), TOBN(0x60941af1, 0xfc7c80ff),
+     TOBN(0xa09bc0a2, 0x22eb51e5), TOBN(0xc0bb7244, 0xaa9cf09a),
+     TOBN(0x36a8077f, 0x80159f06), TOBN(0x8b5c989e, 0xdddc560e),
+     TOBN(0x19d2f316, 0x512e1f43), TOBN(0x02eac554, 0xad08ff62),
+     TOBN(0x012ab84c, 0x07d20b4e), TOBN(0x37d1e115, 0xd6d4e4e1),
+     TOBN(0xb6443e1a, 0xab7b19a8), TOBN(0xf08d067e, 0xdef8cd45),
+     TOBN(0x63adf3e9, 0x685e03da), TOBN(0xcf15a10e, 0x4792b916),
+     TOBN(0xf44bcce5, 0xb738a425), TOBN(0xebe131d5, 0x9636b2fd),
+     TOBN(0x94068841, 0x7850d605), TOBN(0x09684eaa, 0xb40d749d),
+     TOBN(0x8c3c669c, 0x72ba075b), TOBN(0x89f78b55, 0xba469015),
+     TOBN(0x5706aade, 0x3e9f8ba8), TOBN(0x6d8bd565, 0xb32d7ed7),
+     TOBN(0x25f4e63b, 0x805f08d6), TOBN(0x7f48200d, 0xc3bcc1b5),
+     TOBN(0x4e801968, 0xb025d847), TOBN(0x74afac04, 0x87cbe0a8),
+     TOBN(0x43ed2c2b, 0x7e63d690), TOBN(0xefb6bbf0, 0x0223cdb8),
+     TOBN(0x4fec3cae, 0x2884d3fe), TOBN(0x065ecce6, 0xd75e25a4),
+     TOBN(0x6c2294ce, 0x69f79071), TOBN(0x0d9a8e5f, 0x044b8666),
+     TOBN(0x5009f238, 0x17b69d8f), TOBN(0x3c29f8fe, 0xc5dfdaf7),
+     TOBN(0x9067528f, 0xebae68c4), TOBN(0x5b385632, 0x30c5ba21),
+     TOBN(0x540df119, 0x1fdd1aec), TOBN(0xcf37825b, 0xcfba4c78),
+     TOBN(0x77eff980, 0xbeb11454), TOBN(0x40a1a991, 0x60c1b066),
+     TOBN(0xe8018980, 0xf889a1c7), TOBN(0xb9c52ae9, 0x76c24be0),
+     TOBN(0x05fbbcce, 0x45650ef4), TOBN(0xae000f10, 0x8aa29ac7),
+     TOBN(0x884b7172, 0x4f04c470), TOBN(0x7cd4fde2, 0x19bb5c25),
+     TOBN(0x6477b22a, 0xe8840869), TOBN(0xa8868859, 0x5fbd0686),
+     TOBN(0xf23cc02e, 0x1116dfba), TOBN(0x76cd563f, 0xd87d7776),
+     TOBN(0xe2a37598, 0xa9d82abf), TOBN(0x5f188ccb, 0xe6c170f5),
+     TOBN(0x81682200, 0x5066b087), TOBN(0xda22c212, 0xc7155ada),
+     TOBN(0x151e5d3a, 0xfbddb479), TOBN(0x4b606b84, 0x6d715b99),
+     TOBN(0x4a73b54b, 0xf997cb2e), TOBN(0x9a1bfe43, 0x3ecd8b66),
+     TOBN(0x1c312809, 0x2a67d48a), TOBN(0xcd6a671e, 0x031fa9e2),
+     TOBN(0xbec3312a, 0x0e43a34a), TOBN(0x1d935639, 0x55ef47d3),
+     TOBN(0x5ea02489, 0x8fea73ea), TOBN(0x8247b364, 0xa035afb2),
+     TOBN(0xb58300a6, 0x5265b54c), TOBN(0x3286662f, 0x722c7148),
+     TOBN(0xb77fd76b, 0xb4ec4c20), TOBN(0xf0a12fa7, 0x0f3fe3fd),
+     TOBN(0xf845bbf5, 0x41d8c7e8), TOBN(0xe4d969ca, 0x5ec10aa8),
+     TOBN(0x4c0053b7, 0x43e232a3), TOBN(0xdc7a3fac, 0x37f8a45a),
+     TOBN(0x3c4261c5, 0x20d81c8f), TOBN(0xfd4b3453, 0xb00eab00),
+     TOBN(0x76d48f86, 0xd36e3062), TOBN(0x626c5277, 0xa143ff02),
+     TOBN(0x538174de, 0xaf76f42e), TOBN(0x2267aa86, 0x6407ceac),
+     TOBN(0xfad76351, 0x72e572d5), TOBN(0xab861af7, 0xba7330eb),
+     TOBN(0xa0a1c8c7, 0x418d8657), TOBN(0x988821cb, 0x20289a52),
+     TOBN(0x79732522, 0xcccc18ad), TOBN(0xaadf3f8d, 0xf1a6e027),
+     TOBN(0xf7382c93, 0x17c2354d), TOBN(0x5ce1680c, 0xd818b689),
+     TOBN(0x359ebbfc, 0xd9ecbee9), TOBN(0x4330689c, 0x1cae62ac),
+     TOBN(0xb55ce5b4, 0xc51ac38a), TOBN(0x7921dfea, 0xfe238ee8),
+     TOBN(0x3972bef8, 0x271d1ca5), TOBN(0x3e423bc7, 0xe8aabd18),
+     TOBN(0x57b09f3f, 0x44a3e5e3), TOBN(0x5da886ae, 0x7b444d66),
+     TOBN(0x68206634, 0xa9964375), TOBN(0x356a2fa3, 0x699cd0ff),
+     TOBN(0xaf0faa24, 0xdba515e9), TOBN(0x536e1f5c, 0xb321d79a),
+     TOBN(0xd3b9913a, 0x5c04e4ea), TOBN(0xd549dcfe, 0xd6f11513),
+     TOBN(0xee227bf5, 0x79fd1d94), TOBN(0x9f35afee, 0xb43f2c67),
+     TOBN(0xd2638d24, 0xf1314f53), TOBN(0x62baf948, 0xcabcd822),
+     TOBN(0x5542de29, 0x4ef48db0), TOBN(0xb3eb6a04, 0xfc5f6bb2),
+     TOBN(0x23c110ae, 0x1208e16a), TOBN(0x1a4d15b5, 0xf8363e24),
+     TOBN(0x30716844, 0x164be00b), TOBN(0xa8e24824, 0xf6f4690d),
+     TOBN(0x548773a2, 0x90b170cf), TOBN(0xa1bef331, 0x42f191f4),
+     TOBN(0x70f418d0, 0x9247aa97), TOBN(0xea06028e, 0x48be9147),
+     TOBN(0xe13122f3, 0xdbfb894e), TOBN(0xbe9b79f6, 0xce274b18),
+     TOBN(0x85a49de5, 0xca58aadf), TOBN(0x24957758, 0x11487351),
+     TOBN(0x111def61, 0xbb939099), TOBN(0x1d6a974a, 0x26d13694),
+     TOBN(0x4474b4ce, 0xd3fc253b), TOBN(0x3a1485e6, 0x4c5db15e),
+     TOBN(0xe79667b4, 0x147c15b4), TOBN(0xe34f553b, 0x7bc61301),
+     TOBN(0x032b80f8, 0x17094381), TOBN(0x55d8bafd, 0x723eaa21),
+     TOBN(0x5a987995, 0xf1c0e74e), TOBN(0x5a9b292e, 0xebba289c),
+     TOBN(0x413cd4b2, 0xeb4c8251), TOBN(0x98b5d243, 0xd162db0a),
+     TOBN(0xbb47bf66, 0x68342520), TOBN(0x08d68949, 0xbaa862d1),
+     TOBN(0x11f349c7, 0xe906abcd), TOBN(0x454ce985, 0xed7bf00e),
+     TOBN(0xacab5c9e, 0xb55b803b), TOBN(0xb03468ea, 0x31e3c16d),
+     TOBN(0x5c24213d, 0xd273bf12), TOBN(0x211538eb, 0x71587887),
+     TOBN(0x198e4a2f, 0x731dea2d), TOBN(0xd5856cf2, 0x74ed7b2a),
+     TOBN(0x86a632eb, 0x13a664fe), TOBN(0x932cd909, 0xbda41291),
+     TOBN(0x850e95d4, 0xc0c4ddc0), TOBN(0xc0f422f8, 0x347fc2c9),
+     TOBN(0xe68cbec4, 0x86076bcb), TOBN(0xf9e7c0c0, 0xcd6cd286),
+     TOBN(0x65994ddb, 0x0f5f27ca), TOBN(0xe85461fb, 0xa80d59ff),
+     TOBN(0xff05481a, 0x66601023), TOBN(0xc665427a, 0xfc9ebbfb),
+     TOBN(0xb0571a69, 0x7587fd52), TOBN(0x935289f8, 0x8d49efce),
+     TOBN(0x61becc60, 0xea420688), TOBN(0xb22639d9, 0x13a786af),
+     TOBN(0x1a8e6220, 0x361ecf90), TOBN(0x001f23e0, 0x25506463),
+     TOBN(0xe4ae9b5d, 0x0a5c2b79), TOBN(0xebc9cdad, 0xd8149db5),
+     TOBN(0xb33164a1, 0x934aa728), TOBN(0x750eb00e, 0xae9b60f3),
+     TOBN(0x5a91615b, 0x9b9cfbfd), TOBN(0x97015cbf, 0xef45f7f6),
+     TOBN(0xb462c4a5, 0xbf5151df), TOBN(0x21adcc41, 0xb07118f2),
+     TOBN(0xd60c545b, 0x043fa42c), TOBN(0xfc21aa54, 0xe96be1ab),
+     TOBN(0xe84bc32f, 0x4e51ea80), TOBN(0x3dae45f0, 0x259b5d8d),
+     TOBN(0xbb73c7eb, 0xc38f1b5e), TOBN(0xe405a74a, 0xe8ae617d),
+     TOBN(0xbb1ae9c6, 0x9f1c56bd), TOBN(0x8c176b98, 0x49f196a4),
+     TOBN(0xc448f311, 0x6875092b), TOBN(0xb5afe3de, 0x9f976033),
+     TOBN(0xa8dafd49, 0x145813e5), TOBN(0x687fc4d9, 0xe2b34226),
+     TOBN(0xf2dfc92d, 0x4c7ff57f), TOBN(0x004e3fc1, 0x401f1b46),
+     TOBN(0x5afddab6, 0x1430c9ab), TOBN(0x0bdd41d3, 0x2238e997),
+     TOBN(0xf0947430, 0x418042ae), TOBN(0x71f9adda, 0xcdddc4cb),
+     TOBN(0x7090c016, 0xc52dd907), TOBN(0xd9bdf44d, 0x29e2047f),
+     TOBN(0xe6f1fe80, 0x1b1011a6), TOBN(0xb63accbc, 0xd9acdc78),
+     TOBN(0xcfc7e235, 0x1272a95b), TOBN(0x0c667717, 0xa6276ac8),
+     TOBN(0x3c0d3709, 0xe2d7eef7), TOBN(0x5add2b06, 0x9a685b3e),
+     TOBN(0x363ad32d, 0x14ea5d65), TOBN(0xf8e01f06, 0x8d7dd506),
+     TOBN(0xc9ea2213, 0x75b4aac6), TOBN(0xed2a2bf9, 0x0d353466),
+     TOBN(0x439d79b5, 0xe9d3a7c3), TOBN(0x8e0ee5a6, 0x81b7f34b),
+     TOBN(0xcf3dacf5, 0x1dc4ba75), TOBN(0x1d3d1773, 0xeb3310c7),
+     TOBN(0xa8e67112, 0x7747ae83), TOBN(0x31f43160, 0x197d6b40),
+     TOBN(0x0521ccee, 0xcd961400), TOBN(0x67246f11, 0xf6535768),
+     TOBN(0x702fcc5a, 0xef0c3133), TOBN(0x247cc45d, 0x7e16693b),
+     TOBN(0xfd484e49, 0xc729b749), TOBN(0x522cef7d, 0xb218320f),
+     TOBN(0xe56ef405, 0x59ab93b3), TOBN(0x225fba11, 0x9f181071),
+     TOBN(0x33bd6595, 0x15330ed0), TOBN(0xc4be69d5, 0x1ddb32f7),
+     TOBN(0x264c7668, 0x0448087c), TOBN(0xac30903f, 0x71432dae),
+     TOBN(0x3851b266, 0x00f9bf47), TOBN(0x400ed311, 0x6cdd6d03),
+     TOBN(0x045e79fe, 0xf8fd2424), TOBN(0xfdfd974a, 0xfa6da98b),
+     TOBN(0x45c9f641, 0x0c1e673a), TOBN(0x76f2e733, 0x5b2c5168),
+     TOBN(0x1adaebb5, 0x2a601753), TOBN(0xb286514c, 0xc57c2d49),
+     TOBN(0xd8769670, 0x1e0bfd24), TOBN(0x950c547e, 0x04478922),
+     TOBN(0xd1d41969, 0xe5d32bfe), TOBN(0x30bc1472, 0x750d6c3e),
+     TOBN(0x8f3679fe, 0xe0e27f3a), TOBN(0x8f64a7dc, 0xa4a6ee0c),
+     TOBN(0x2fe59937, 0x633dfb1f), TOBN(0xea82c395, 0x977f2547),
+     TOBN(0xcbdfdf1a, 0x661ea646), TOBN(0xc7ccc591, 0xb9085451),
+     TOBN(0x82177962, 0x81761e13), TOBN(0xda57596f, 0x9196885c),
+     TOBN(0xbc17e849, 0x28ffbd70), TOBN(0x1e6e0a41, 0x2671d36f),
+     TOBN(0x61ae872c, 0x4152fcf5), TOBN(0x441c87b0, 0x9e77e754),
+     TOBN(0xd0799dd5, 0xa34dff09), TOBN(0x766b4e44, 0x88a6b171),
+     TOBN(0xdc06a512, 0x11f1c792), TOBN(0xea02ae93, 0x4be35c3e),
+     TOBN(0xe5ca4d6d, 0xe90c469e), TOBN(0x4df4368e, 0x56e4ff5c),
+     TOBN(0x7817acab, 0x4baef62e), TOBN(0x9f5a2202, 0xa85b91e8),
+     TOBN(0x9666ebe6, 0x6ce57610), TOBN(0x32ad31f3, 0xf73bfe03),
+     TOBN(0x628330a4, 0x25bcf4d6), TOBN(0xea950593, 0x515056e6),
+     TOBN(0x59811c89, 0xe1332156), TOBN(0xc89cf1fe, 0x8c11b2d7),
+     TOBN(0x75b63913, 0x04e60cc0), TOBN(0xce811e8d, 0x4625d375),
+     TOBN(0x030e43fc, 0x2d26e562), TOBN(0xfbb30b4b, 0x608d36a0),
+     TOBN(0x634ff82c, 0x48528118), TOBN(0x7c6fe085, 0xcd285911),
+     TOBN(0x7f2830c0, 0x99358f28), TOBN(0x2e60a95e, 0x665e6c09),
+     TOBN(0x08407d3d, 0x9b785dbf), TOBN(0x530889ab, 0xa759bce7),
+     TOBN(0xf228e0e6, 0x52f61239), TOBN(0x2b6d1461, 0x6879be3c),
+     TOBN(0xe6902c04, 0x51a7bbf7), TOBN(0x30ad99f0, 0x76f24a64),
+     TOBN(0x66d9317a, 0x98bc6da0), TOBN(0xf4f877f3, 0xcb596ac0),
+     TOBN(0xb05ff62d, 0x4c44f119), TOBN(0x4555f536, 0xe9b77416),
+     TOBN(0xc7c0d059, 0x8caed63b), TOBN(0x0cd2b7ce, 0xc358b2a9),
+     TOBN(0x3f33287b, 0x46945fa3), TOBN(0xf8785b20, 0xd67c8791),
+     TOBN(0xc54a7a61, 0x9637bd08), TOBN(0x54d4598c, 0x18be79d7),
+     TOBN(0x889e5acb, 0xc46d7ce1), TOBN(0x9a515bb7, 0x8b085877),
+     TOBN(0xfac1a03d, 0x0b7a5050), TOBN(0x7d3e738a, 0xf2926035),
+     TOBN(0x861cc2ce, 0x2a6cb0eb), TOBN(0x6f2e2955, 0x8f7adc79),
+     TOBN(0x61c4d451, 0x33016376), TOBN(0xd9fd2c80, 0x5ad59090),
+     TOBN(0xe5a83738, 0xb2b836a1), TOBN(0x855b41a0, 0x7c0d6622),
+     TOBN(0x186fe317, 0x7cc19af1), TOBN(0x6465c1ff, 0xfdd99acb),
+     TOBN(0x46e5c23f, 0x6974b99e), TOBN(0x75a7cf8b, 0xa2717cbe),
+     TOBN(0x4d2ebc3f, 0x062be658), TOBN(0x094b4447, 0x5f209c98),
+     TOBN(0x4af285ed, 0xb940cb5a), TOBN(0x6706d792, 0x7cc82f10),
+     TOBN(0xc8c8776c, 0x030526fa), TOBN(0xfa8e6f76, 0xa0da9140),
+     TOBN(0x77ea9d34, 0x591ee4f0), TOBN(0x5f46e337, 0x40274166),
+     TOBN(0x1bdf98bb, 0xea671457), TOBN(0xd7c08b46, 0x862a1fe2),
+     TOBN(0x46cc303c, 0x1c08ad63), TOBN(0x99543440, 0x4c845e7b),
+     TOBN(0x1b8fbdb5, 0x48f36bf7), TOBN(0x5b82c392, 0x8c8273a7),
+     TOBN(0x08f712c4, 0x928435d5), TOBN(0x071cf0f1, 0x79330380),
+     TOBN(0xc74c2d24, 0xa8da054a), TOBN(0xcb0e7201, 0x43c46b5c),
+     TOBN(0x0ad7337a, 0xc0b7eff3), TOBN(0x8552225e, 0xc5e48b3c),
+     TOBN(0xe6f78b0c, 0x73f13a5f), TOBN(0x5e70062e, 0x82349cbe),
+     TOBN(0x6b8d5048, 0xe7073969), TOBN(0x392d2a29, 0xc33cb3d2),
+     TOBN(0xee4f727c, 0x4ecaa20f), TOBN(0xa068c99e, 0x2ccde707),
+     TOBN(0xfcd5651f, 0xb87a2913), TOBN(0xea3e3c15, 0x3cc252f0),
+     TOBN(0x777d92df, 0x3b6cd3e4), TOBN(0x7a414143, 0xc5a732e7),
+     TOBN(0xa895951a, 0xa71ff493), TOBN(0xfe980c92, 0xbbd37cf6),
+     TOBN(0x45bd5e64, 0xdecfeeff), TOBN(0x910dc2a9, 0xa44c43e9),
+     TOBN(0xcb403f26, 0xcca9f54d), TOBN(0x928bbdfb, 0x9303f6db),
+     TOBN(0x3c37951e, 0xa9eee67c), TOBN(0x3bd61a52, 0xf79961c3),
+     TOBN(0x09a238e6, 0x395c9a79), TOBN(0x6940ca2d, 0x61eb352d),
+     TOBN(0x7d1e5c5e, 0xc1875631), TOBN(0x1e19742c, 0x1e1b20d1),
+     TOBN(0x4633d908, 0x23fc2e6e), TOBN(0xa76e29a9, 0x08959149),
+     TOBN(0x61069d9c, 0x84ed7da5), TOBN(0x0baa11cf, 0x5dbcad51),
+     TOBN(0xd01eec64, 0x961849da), TOBN(0x93b75f1f, 0xaf3d8c28),
+     TOBN(0x57bc4f9f, 0x1ca2ee44), TOBN(0x5a26322d, 0x00e00558),
+     TOBN(0x1888d658, 0x61a023ef), TOBN(0x1d72aab4, 0xb9e5246e),
+     TOBN(0xa9a26348, 0xe5563ec0), TOBN(0xa0971963, 0xc3439a43),
+     TOBN(0x567dd54b, 0xadb9b5b7), TOBN(0x73fac1a1, 0xc45a524b),
+     TOBN(0x8fe97ef7, 0xfe38e608), TOBN(0x608748d2, 0x3f384f48),
+     TOBN(0xb0571794, 0xc486094f), TOBN(0x869254a3, 0x8bf3a8d6),
+     TOBN(0x148a8dd1, 0x310b0e25), TOBN(0x99ab9f3f, 0x9aa3f7d8),
+     TOBN(0x0927c68a, 0x6706c02e), TOBN(0x22b5e76c, 0x69790e6c),
+     TOBN(0x6c325260, 0x6c71376c), TOBN(0x53a57690, 0x09ef6657),
+     TOBN(0x8d63f852, 0xedffcf3a), TOBN(0xb4d2ed04, 0x3c0a6f55),
+     TOBN(0xdb3aa8de, 0x12519b9e), TOBN(0x5d38e9c4, 0x1e0a569a),
+     TOBN(0x871528bf, 0x303747e2), TOBN(0xa208e77c, 0xf5b5c18d),
+     TOBN(0x9d129c88, 0xca6bf923), TOBN(0xbcbf197f, 0xbf02839f),
+     TOBN(0x9b9bf030, 0x27323194), TOBN(0x3b055a8b, 0x339ca59d),
+     TOBN(0xb46b2312, 0x0f669520), TOBN(0x19789f1f, 0x497e5f24),
+     TOBN(0x9c499468, 0xaaf01801), TOBN(0x72ee1190, 0x8b69d59c),
+     TOBN(0x8bd39595, 0xacf4c079), TOBN(0x3ee11ece, 0x8e0cd048),
+     TOBN(0xebde86ec, 0x1ed66f18), TOBN(0x225d906b, 0xd61fce43),
+     TOBN(0x5cab07d6, 0xe8bed74d), TOBN(0x16e4617f, 0x27855ab7),
+     TOBN(0x6568aadd, 0xb2fbc3dd), TOBN(0xedb5484f, 0x8aeddf5b),
+     TOBN(0x878f20e8, 0x6dcf2fad), TOBN(0x3516497c, 0x615f5699),}
+    ,
+    {TOBN(0xef0a3fec, 0xfa181e69), TOBN(0x9ea02f81, 0x30d69a98),
+     TOBN(0xb2e9cf8e, 0x66eab95d), TOBN(0x520f2beb, 0x24720021),
+     TOBN(0x621c540a, 0x1df84361), TOBN(0x12037721, 0x71fa6d5d),
+     TOBN(0x6e3c7b51, 0x0ff5f6ff), TOBN(0x817a069b, 0xabb2bef3),
+     TOBN(0x83572fb6, 0xb294cda6), TOBN(0x6ce9bf75, 0xb9039f34),
+     TOBN(0x20e012f0, 0x095cbb21), TOBN(0xa0aecc1b, 0xd063f0da),
+     TOBN(0x57c21c3a, 0xf02909e5), TOBN(0xc7d59ecf, 0x48ce9cdc),
+     TOBN(0x2732b844, 0x8ae336f8), TOBN(0x056e3723, 0x3f4f85f4),
+     TOBN(0x8a10b531, 0x89e800ca), TOBN(0x50fe0c17, 0x145208fd),
+     TOBN(0x9e43c0d3, 0xb714ba37), TOBN(0x427d200e, 0x34189acc),
+     TOBN(0x05dee24f, 0xe616e2c0), TOBN(0x9c25f4c8, 0xee1854c1),
+     TOBN(0x4d3222a5, 0x8f342a73), TOBN(0x0807804f, 0xa027c952),
+     TOBN(0xc222653a, 0x4f0d56f3), TOBN(0x961e4047, 0xca28b805),
+     TOBN(0x2c03f8b0, 0x4a73434b), TOBN(0x4c966787, 0xab712a19),
+     TOBN(0xcc196c42, 0x864fee42), TOBN(0xc1be93da, 0x5b0ece5c),
+     TOBN(0xa87d9f22, 0xc131c159), TOBN(0x2bb6d593, 0xdce45655),
+     TOBN(0x22c49ec9, 0xb809b7ce), TOBN(0x8a41486b, 0xe2c72c2c),
+     TOBN(0x813b9420, 0xfea0bf36), TOBN(0xb3d36ee9, 0xa66dac69),
+     TOBN(0x6fddc08a, 0x328cc987), TOBN(0x0a3bcd2c, 0x3a326461),
+     TOBN(0x7103c49d, 0xd810dbba), TOBN(0xf9d81a28, 0x4b78a4c4),
+     TOBN(0x3de865ad, 0xe4d55941), TOBN(0xdedafa5e, 0x30384087),
+     TOBN(0x6f414abb, 0x4ef18b9b), TOBN(0x9ee9ea42, 0xfaee5268),
+     TOBN(0x260faa16, 0x37a55a4a), TOBN(0xeb19a514, 0x015f93b9),
+     TOBN(0x51d7ebd2, 0x9e9c3598), TOBN(0x523fc56d, 0x1932178e),
+     TOBN(0x501d070c, 0xb98fe684), TOBN(0xd60fbe9a, 0x124a1458),
+     TOBN(0xa45761c8, 0x92bc6b3f), TOBN(0xf5384858, 0xfe6f27cb),
+     TOBN(0x4b0271f7, 0xb59e763b), TOBN(0x3d4606a9, 0x5b5a8e5e),
+     TOBN(0x1eda5d9b, 0x05a48292), TOBN(0xda7731d0, 0xe6fec446),
+     TOBN(0xa3e33693, 0x90d45871), TOBN(0xe9764040, 0x06166d8d),
+     TOBN(0xb5c33682, 0x89a90403), TOBN(0x4bd17983, 0x72f1d637),
+     TOBN(0xa616679e, 0xd5d2c53a), TOBN(0x5ec4bcd8, 0xfdcf3b87),
+     TOBN(0xae6d7613, 0xb66a694e), TOBN(0x7460fc76, 0xe3fc27e5),
+     TOBN(0x70469b82, 0x95caabee), TOBN(0xde024ca5, 0x889501e3),
+     TOBN(0x6bdadc06, 0x076ed265), TOBN(0x0cb1236b, 0x5a0ef8b2),
+     TOBN(0x4065ddbf, 0x0972ebf9), TOBN(0xf1dd3875, 0x22aca432),
+     TOBN(0xa88b97cf, 0x744aff76), TOBN(0xd1359afd, 0xfe8e3d24),
+     TOBN(0x52a3ba2b, 0x91502cf3), TOBN(0x2c3832a8, 0x084db75d),
+     TOBN(0x04a12ddd, 0xde30b1c9), TOBN(0x7802eabc, 0xe31fd60c),
+     TOBN(0x33707327, 0xa37fddab), TOBN(0x65d6f2ab, 0xfaafa973),
+     TOBN(0x3525c5b8, 0x11e6f91a), TOBN(0x76aeb0c9, 0x5f46530b),
+     TOBN(0xe8815ff6, 0x2f93a675), TOBN(0xa6ec9684, 0x05f48679),
+     TOBN(0x6dcbb556, 0x358ae884), TOBN(0x0af61472, 0xe19e3873),
+     TOBN(0x72334372, 0xa5f696be), TOBN(0xc65e57ea, 0x6f22fb70),
+     TOBN(0x268da30c, 0x946cea90), TOBN(0x136a8a87, 0x65681b2a),
+     TOBN(0xad5e81dc, 0x0f9f44d4), TOBN(0xf09a6960, 0x2c46585a),
+     TOBN(0xd1649164, 0xc447d1b1), TOBN(0x3b4b36c8, 0x879dc8b1),
+     TOBN(0x20d4177b, 0x3b6b234c), TOBN(0x096a2505, 0x1730d9d0),
+     TOBN(0x0611b9b8, 0xef80531d), TOBN(0xba904b3b, 0x64bb495d),
+     TOBN(0x1192d9d4, 0x93a3147a), TOBN(0x9f30a5dc, 0x9a565545),
+     TOBN(0x90b1f9cb, 0x6ef07212), TOBN(0x29958546, 0x0d87fc13),
+     TOBN(0xd3323eff, 0xc17db9ba), TOBN(0xcb18548c, 0xcb1644a8),
+     TOBN(0x18a306d4, 0x4f49ffbc), TOBN(0x28d658f1, 0x4c2e8684),
+     TOBN(0x44ba60cd, 0xa99f8c71), TOBN(0x67b7abdb, 0x4bf742ff),
+     TOBN(0x66310f9c, 0x914b3f99), TOBN(0xae430a32, 0xf412c161),
+     TOBN(0x1e6776d3, 0x88ace52f), TOBN(0x4bc0fa24, 0x52d7067d),
+     TOBN(0x03c286aa, 0x8f07cd1b), TOBN(0x4cb8f38c, 0xa985b2c1),
+     TOBN(0x83ccbe80, 0x8c3bff36), TOBN(0x005a0bd2, 0x5263e575),
+     TOBN(0x460d7dda, 0x259bdcd1), TOBN(0x4a1c5642, 0xfa5cab6b),
+     TOBN(0x2b7bdbb9, 0x9fe4fc88), TOBN(0x09418e28, 0xcc97bbb5),
+     TOBN(0xd8274fb4, 0xa12321ae), TOBN(0xb137007d, 0x5c87b64e),
+     TOBN(0x80531fe1, 0xc63c4962), TOBN(0x50541e89, 0x981fdb25),
+     TOBN(0xdc1291a1, 0xfd4c2b6b), TOBN(0xc0693a17, 0xa6df4fca),
+     TOBN(0xb2c4604e, 0x0117f203), TOBN(0x245f1963, 0x0a99b8d0),
+     TOBN(0xaedc20aa, 0xc6212c44), TOBN(0xb1ed4e56, 0x520f52a8),
+     TOBN(0xfe48f575, 0xf8547be3), TOBN(0x0a7033cd, 0xa9e45f98),
+     TOBN(0x4b45d3a9, 0x18c50100), TOBN(0xb2a6cd6a, 0xa61d41da),
+     TOBN(0x60bbb4f5, 0x57933c6b), TOBN(0xa7538ebd, 0x2b0d7ffc),
+     TOBN(0x9ea3ab8d, 0x8cd626b6), TOBN(0x8273a484, 0x3601625a),
+     TOBN(0x88859845, 0x0168e508), TOBN(0x8cbc9bb2, 0x99a94abd),
+     TOBN(0x713ac792, 0xfab0a671), TOBN(0xa3995b19, 0x6c9ebffc),
+     TOBN(0xe711668e, 0x1239e152), TOBN(0x56892558, 0xbbb8dff4),
+     TOBN(0x8bfc7dab, 0xdbf17963), TOBN(0x5b59fe5a, 0xb3de1253),
+     TOBN(0x7e3320eb, 0x34a9f7ae), TOBN(0xe5e8cf72, 0xd751efe4),
+     TOBN(0x7ea003bc, 0xd9be2f37), TOBN(0xc0f551a0, 0xb6c08ef7),
+     TOBN(0x56606268, 0x038f6725), TOBN(0x1dd38e35, 0x6d92d3b6),
+     TOBN(0x07dfce7c, 0xc3cbd686), TOBN(0x4e549e04, 0x651c5da8),
+     TOBN(0x4058f93b, 0x08b19340), TOBN(0xc2fae6f4, 0xcac6d89d),
+     TOBN(0x4bad8a8c, 0x8f159cc7), TOBN(0x0ddba4b3, 0xcb0b601c),
+     TOBN(0xda4fc7b5, 0x1dd95f8c), TOBN(0x1d163cd7, 0xcea5c255),
+     TOBN(0x30707d06, 0x274a8c4c), TOBN(0x79d9e008, 0x2802e9ce),
+     TOBN(0x02a29ebf, 0xe6ddd505), TOBN(0x37064e74, 0xb50bed1a),
+     TOBN(0x3f6bae65, 0xa7327d57), TOBN(0x3846f5f1, 0xf83920bc),
+     TOBN(0x87c37491, 0x60df1b9b), TOBN(0x4cfb2895, 0x2d1da29f),
+     TOBN(0x10a478ca, 0x4ed1743c), TOBN(0x390c6030, 0x3edd47c6),
+     TOBN(0x8f3e5312, 0x8c0a78de), TOBN(0xccd02bda, 0x1e85df70),
+     TOBN(0xd6c75c03, 0xa61b6582), TOBN(0x0762921c, 0xfc0eebd1),
+     TOBN(0xd34d0823, 0xd85010c0), TOBN(0xd73aaacb, 0x0044cf1f),
+     TOBN(0xfb4159bb, 0xa3b5e78a), TOBN(0x2287c7f7, 0xe5826f3f),
+     TOBN(0x4aeaf742, 0x580b1a01), TOBN(0xf080415d, 0x60423b79),
+     TOBN(0xe12622cd, 0xa7dea144), TOBN(0x49ea4996, 0x59d62472),
+     TOBN(0xb42991ef, 0x571f3913), TOBN(0x0610f214, 0xf5b25a8a),
+     TOBN(0x47adc585, 0x30b79e8f), TOBN(0xf90e3df6, 0x07a065a2),
+     TOBN(0x5d0a5deb, 0x43e2e034), TOBN(0x53fb5a34, 0x444024aa),
+     TOBN(0xa8628c68, 0x6b0c9f7f), TOBN(0x9c69c29c, 0xac563656),
+     TOBN(0x5a231feb, 0xbace47b6), TOBN(0xbdce0289, 0x9ea5a2ec),
+     TOBN(0x05da1fac, 0x9463853e), TOBN(0x96812c52, 0x509e78aa),
+     TOBN(0xd3fb5771, 0x57151692), TOBN(0xeb2721f8, 0xd98e1c44),
+     TOBN(0xc0506087, 0x32399be1), TOBN(0xda5a5511, 0xd979d8b8),
+     TOBN(0x737ed55d, 0xc6f56780), TOBN(0xe20d3004, 0x0dc7a7f4),
+     TOBN(0x02ce7301, 0xf5941a03), TOBN(0x91ef5215, 0xed30f83a),
+     TOBN(0x28727fc1, 0x4092d85f), TOBN(0x72d223c6, 0x5c49e41a),
+     TOBN(0xa7cf30a2, 0xba6a4d81), TOBN(0x7c086209, 0xb030d87d),
+     TOBN(0x04844c7d, 0xfc588b09), TOBN(0x728cd499, 0x5874bbb0),
+     TOBN(0xcc1281ee, 0xe84c0495), TOBN(0x0769b5ba, 0xec31958f),
+     TOBN(0x665c228b, 0xf99c2471), TOBN(0xf2d8a11b, 0x191eb110),
+     TOBN(0x4594f494, 0xd36d7024), TOBN(0x482ded8b, 0xcdcb25a1),
+     TOBN(0xc958a9d8, 0xdadd4885), TOBN(0x7004477e, 0xf1d2b547),
+     TOBN(0x0a45f6ef, 0x2a0af550), TOBN(0x4fc739d6, 0x2f8d6351),
+     TOBN(0x75cdaf27, 0x786f08a9), TOBN(0x8700bb26, 0x42c2737f),
+     TOBN(0x855a7141, 0x1c4e2670), TOBN(0x810188c1, 0x15076fef),
+     TOBN(0xc251d0c9, 0xabcd3297), TOBN(0xae4c8967, 0xf48108eb),
+     TOBN(0xbd146de7, 0x18ceed30), TOBN(0xf9d4f07a, 0xc986bced),
+     TOBN(0x5ad98ed5, 0x83fa1e08), TOBN(0x7780d33e, 0xbeabd1fb),
+     TOBN(0xe330513c, 0x903b1196), TOBN(0xba11de9e, 0xa47bc8c4),
+     TOBN(0x684334da, 0x02c2d064), TOBN(0x7ecf360d, 0xa48de23b),
+     TOBN(0x57a1b474, 0x0a9089d8), TOBN(0xf28fa439, 0xff36734c),
+     TOBN(0xf2a482cb, 0xea4570b3), TOBN(0xee65d68b, 0xa5ebcee9),
+     TOBN(0x988d0036, 0xb9694cd5), TOBN(0x53edd0e9, 0x37885d32),
+     TOBN(0xe37e3307, 0xbeb9bc6d), TOBN(0xe9abb907, 0x9f5c6768),
+     TOBN(0x4396ccd5, 0x51f2160f), TOBN(0x2500888c, 0x47336da6),
+     TOBN(0x383f9ed9, 0x926fce43), TOBN(0x809dd1c7, 0x04da2930),
+     TOBN(0x30f6f596, 0x8a4cb227), TOBN(0x0d700c7f, 0x73a56b38),
+     TOBN(0x1825ea33, 0xab64a065), TOBN(0xaab9b735, 0x1338df80),
+     TOBN(0x1516100d, 0x9b63f57f), TOBN(0x2574395a, 0x27a6a634),
+     TOBN(0xb5560fb6, 0x700a1acd), TOBN(0xe823fd73, 0xfd999681),
+     TOBN(0xda915d1f, 0x6cb4e1ba), TOBN(0x0d030118, 0x6ebe00a3),
+     TOBN(0x744fb0c9, 0x89fca8cd), TOBN(0x970d01db, 0xf9da0e0b),
+     TOBN(0x0ad8c564, 0x7931d76f), TOBN(0xb15737bf, 0xf659b96a),
+     TOBN(0xdc9933e8, 0xa8b484e7), TOBN(0xb2fdbdf9, 0x7a26dec7),
+     TOBN(0x2349e9a4, 0x9f1f0136), TOBN(0x7860368e, 0x70fddddb),
+     TOBN(0xd93d2c1c, 0xf9ad3e18), TOBN(0x6d6c5f17, 0x689f4e79),
+     TOBN(0x7a544d91, 0xb24ff1b6), TOBN(0x3e12a5eb, 0xfe16cd8c),
+     TOBN(0x543574e9, 0xa56b872f), TOBN(0xa1ad550c, 0xfcf68ea2),
+     TOBN(0x689e37d2, 0x3f560ef7), TOBN(0x8c54b9ca, 0xc9d47a8b),
+     TOBN(0x46d40a4a, 0x088ac342), TOBN(0xec450c7c, 0x1576c6d0),
+     TOBN(0xb589e31c, 0x1f9689e9), TOBN(0xdacf2602, 0xb8781718),
+     TOBN(0xa89237c6, 0xc8cb6b42), TOBN(0x1326fc93, 0xb96ef381),
+     TOBN(0x55d56c6d, 0xb5f07825), TOBN(0xacba2eea, 0x7449e22d),
+     TOBN(0x74e0887a, 0x633c3000), TOBN(0xcb6cd172, 0xd7cbcf71),
+     TOBN(0x309e81de, 0xc36cf1be), TOBN(0x07a18a6d, 0x60ae399b),
+     TOBN(0xb36c2679, 0x9edce57e), TOBN(0x52b892f4, 0xdf001d41),
+     TOBN(0xd884ae5d, 0x16a1f2c6), TOBN(0x9b329424, 0xefcc370a),
+     TOBN(0x3120daf2, 0xbd2e21df), TOBN(0x55298d2d, 0x02470a99),
+     TOBN(0x0b78af6c, 0xa05db32e), TOBN(0x5c76a331, 0x601f5636),
+     TOBN(0xaae861ff, 0xf8a4f29c), TOBN(0x70dc9240, 0xd68f8d49),
+     TOBN(0x960e649f, 0x81b1321c), TOBN(0x3d2c801b, 0x8792e4ce),
+     TOBN(0xf479f772, 0x42521876), TOBN(0x0bed93bc, 0x416c79b1),
+     TOBN(0xa67fbc05, 0x263e5bc9), TOBN(0x01e8e630, 0x521db049),
+     TOBN(0x76f26738, 0xc6f3431e), TOBN(0xe609cb02, 0xe3267541),
+     TOBN(0xb10cff2d, 0x818c877c), TOBN(0x1f0e75ce, 0x786a13cb),
+     TOBN(0xf4fdca64, 0x1158544d), TOBN(0x5d777e89, 0x6cb71ed0),
+     TOBN(0x3c233737, 0xa9aa4755), TOBN(0x7b453192, 0xe527ab40),
+     TOBN(0xdb59f688, 0x39f05ffe), TOBN(0x8f4f4be0, 0x6d82574e),
+     TOBN(0xcce3450c, 0xee292d1b), TOBN(0xaa448a12, 0x61ccd086),
+     TOBN(0xabce91b3, 0xf7914967), TOBN(0x4537f09b, 0x1908a5ed),
+     TOBN(0xa812421e, 0xf51042e7), TOBN(0xfaf5cebc, 0xec0b3a34),
+     TOBN(0x730ffd87, 0x4ca6b39a), TOBN(0x70fb72ed, 0x02efd342),
+     TOBN(0xeb4735f9, 0xd75c8edb), TOBN(0xc11f2157, 0xc278aa51),
+     TOBN(0xc459f635, 0xbf3bfebf), TOBN(0x3a1ff0b4, 0x6bd9601f),
+     TOBN(0xc9d12823, 0xc420cb73), TOBN(0x3e9af3e2, 0x3c2915a3),
+     TOBN(0xe0c82c72, 0xb41c3440), TOBN(0x175239e5, 0xe3039a5f),
+     TOBN(0xe1084b8a, 0x558795a3), TOBN(0x328d0a1d, 0xd01e5c60),
+     TOBN(0x0a495f2e, 0xd3788a04), TOBN(0x25d8ff16, 0x66c11a9f),
+     TOBN(0xf5155f05, 0x9ed692d6), TOBN(0x954fa107, 0x4f425fe4),
+     TOBN(0xd16aabf2, 0xe98aaa99), TOBN(0x90cd8ba0, 0x96b0f88a),
+     TOBN(0x957f4782, 0xc154026a), TOBN(0x54ee0734, 0x52af56d2),
+     TOBN(0xbcf89e54, 0x45b4147a), TOBN(0x3d102f21, 0x9a52816c),
+     TOBN(0x6808517e, 0x39b62e77), TOBN(0x92e25421, 0x69169ad8),
+     TOBN(0xd721d871, 0xbb608558), TOBN(0x60e4ebae, 0xf6d4ff9b),
+     TOBN(0x0ba10819, 0x41f2763e), TOBN(0xca2e45be, 0x51ee3247),
+     TOBN(0x66d172ec, 0x2bfd7a5f), TOBN(0x528a8f2f, 0x74d0b12d),
+     TOBN(0xe17f1e38, 0xdabe70dc), TOBN(0x1d5d7316, 0x9f93983c),
+     TOBN(0x51b2184a, 0xdf423e31), TOBN(0xcb417291, 0xaedb1a10),
+     TOBN(0x2054ca93, 0x625bcab9), TOBN(0x54396860, 0xa98998f0),
+     TOBN(0x4e53f6c4, 0xa54ae57e), TOBN(0x0ffeb590, 0xee648e9d),
+     TOBN(0xfbbdaadc, 0x6afaf6bc), TOBN(0xf88ae796, 0xaa3bfb8a),
+     TOBN(0x209f1d44, 0xd2359ed9), TOBN(0xac68dd03, 0xf3544ce2),
+     TOBN(0xf378da47, 0xfd51e569), TOBN(0xe1abd860, 0x2cc80097),
+     TOBN(0x23ca18d9, 0x343b6e3a), TOBN(0x480797e8, 0xb40a1bae),
+     TOBN(0xd1f0c717, 0x533f3e67), TOBN(0x44896970, 0x06e6cdfc),
+     TOBN(0x8ca21055, 0x52a82e8d), TOBN(0xb2caf785, 0x78460cdc),
+     TOBN(0x4c1b7b62, 0xe9037178), TOBN(0xefc09d2c, 0xdb514b58),
+     TOBN(0x5f2df9ee, 0x9113be5c), TOBN(0x2fbda78f, 0xb3f9271c),
+     TOBN(0xe09a81af, 0x8f83fc54), TOBN(0x06b13866, 0x8afb5141),
+     TOBN(0x38f6480f, 0x43e3865d), TOBN(0x72dd77a8, 0x1ddf47d9),
+     TOBN(0xf2a8e971, 0x4c205ff7), TOBN(0x46d449d8, 0x9d088ad8),
+     TOBN(0x926619ea, 0x185d706f), TOBN(0xe47e02eb, 0xc7dd7f62),
+     TOBN(0xe7f120a7, 0x8cbc2031), TOBN(0xc18bef00, 0x998d4ac9),
+     TOBN(0x18f37a9c, 0x6bdf22da), TOBN(0xefbc432f, 0x90dc82df),
+     TOBN(0xc52cef8e, 0x5d703651), TOBN(0x82887ba0, 0xd99881a5),
+     TOBN(0x7cec9dda, 0xb920ec1d), TOBN(0xd0d7e8c3, 0xec3e8d3b),
+     TOBN(0x445bc395, 0x4ca88747), TOBN(0xedeaa2e0, 0x9fd53535),
+     TOBN(0x461b1d93, 0x6cc87475), TOBN(0xd92a52e2, 0x6d2383bd),
+     TOBN(0xfabccb59, 0xd7903546), TOBN(0x6111a761, 0x3d14b112),
+     TOBN(0x0ae584fe, 0xb3d5f612), TOBN(0x5ea69b8d, 0x60e828ec),
+     TOBN(0x6c078985, 0x54087030), TOBN(0x649cab04, 0xac4821fe),
+     TOBN(0x25ecedcf, 0x8bdce214), TOBN(0xb5622f72, 0x86af7361),
+     TOBN(0x0e1227aa, 0x7038b9e2), TOBN(0xd0efb273, 0xac20fa77),
+     TOBN(0x817ff88b, 0x79df975b), TOBN(0x856bf286, 0x1999503e),
+     TOBN(0xb4d5351f, 0x5038ec46), TOBN(0x740a52c5, 0xfc42af6e),
+     TOBN(0x2e38bb15, 0x2cbb1a3f), TOBN(0xc3eb99fe, 0x17a83429),
+     TOBN(0xca4fcbf1, 0xdd66bb74), TOBN(0x880784d6, 0xcde5e8fc),
+     TOBN(0xddc84c1c, 0xb4e7a0be), TOBN(0x8780510d, 0xbd15a72f),
+     TOBN(0x44bcf1af, 0x81ec30e1), TOBN(0x141e50a8, 0x0a61073e),
+     TOBN(0x0d955718, 0x47be87ae), TOBN(0x68a61417, 0xf76a4372),
+     TOBN(0xf57e7e87, 0xc607c3d3), TOBN(0x043afaf8, 0x5252f332),
+     TOBN(0xcc14e121, 0x1552a4d2), TOBN(0xb6dee692, 0xbb4d4ab4),
+     TOBN(0xb6ab74c8, 0xa03816a4), TOBN(0x84001ae4, 0x6f394a29),
+     TOBN(0x5bed8344, 0xd795fb45), TOBN(0x57326e7d, 0xb79f55a5),
+     TOBN(0xc9533ce0, 0x4accdffc), TOBN(0x53473caf, 0x3993fa04),
+     TOBN(0x7906eb93, 0xa13df4c8), TOBN(0xa73e51f6, 0x97cbe46f),
+     TOBN(0xd1ab3ae1, 0x0ae4ccf8), TOBN(0x25614508, 0x8a5b3dbc),
+     TOBN(0x61eff962, 0x11a71b27), TOBN(0xdf71412b, 0x6bb7fa39),
+     TOBN(0xb31ba6b8, 0x2bd7f3ef), TOBN(0xb0b9c415, 0x69180d29),
+     TOBN(0xeec14552, 0x014cdde5), TOBN(0x702c624b, 0x227b4bbb),
+     TOBN(0x2b15e8c2, 0xd3e988f3), TOBN(0xee3bcc6d, 0xa4f7fd04),
+     TOBN(0x9d00822a, 0x42ac6c85), TOBN(0x2db0cea6, 0x1df9f2b7),
+     TOBN(0xd7cad2ab, 0x42de1e58), TOBN(0x346ed526, 0x2d6fbb61),
+     TOBN(0xb3962995, 0x1a2faf09), TOBN(0x2fa8a580, 0x7c25612e),
+     TOBN(0x30ae04da, 0x7cf56490), TOBN(0x75662908, 0x0eea3961),
+     TOBN(0x3609f5c5, 0x3d080847), TOBN(0xcb081d39, 0x5241d4f6),
+     TOBN(0xb4fb3810, 0x77961a63), TOBN(0xc20c5984, 0x2abb66fc),
+     TOBN(0x3d40aa7c, 0xf902f245), TOBN(0x9cb12736, 0x4e536b1e),
+     TOBN(0x5eda24da, 0x99b3134f), TOBN(0xafbd9c69, 0x5cd011af),
+     TOBN(0x9a16e30a, 0xc7088c7d), TOBN(0x5ab65710, 0x3207389f),
+     TOBN(0x1b09547f, 0xe7407a53), TOBN(0x2322f9d7, 0x4fdc6eab),
+     TOBN(0xc0f2f22d, 0x7430de4d), TOBN(0x19382696, 0xe68ca9a9),
+     TOBN(0x17f1eff1, 0x918e5868), TOBN(0xe3b5b635, 0x586f4204),
+     TOBN(0x146ef980, 0x3fbc4341), TOBN(0x359f2c80, 0x5b5eed4e),
+     TOBN(0x9f35744e, 0x7482e41d), TOBN(0x9a9ac3ec, 0xf3b224c2),
+     TOBN(0x9161a6fe, 0x91fc50ae), TOBN(0x89ccc66b, 0xc613fa7c),
+     TOBN(0x89268b14, 0xc732f15a), TOBN(0x7cd6f4e2, 0xb467ed03),
+     TOBN(0xfbf79869, 0xce56b40e), TOBN(0xf93e094c, 0xc02dde98),
+     TOBN(0xefe0c3a8, 0xedee2cd7), TOBN(0x90f3ffc0, 0xb268fd42),
+     TOBN(0x81a7fd56, 0x08241aed), TOBN(0x95ab7ad8, 0x00b1afe8),
+     TOBN(0x40127056, 0x3e310d52), TOBN(0xd3ffdeb1, 0x09d9fc43),
+     TOBN(0xc8f85c91, 0xd11a8594), TOBN(0x2e74d258, 0x31cf6db8),
+     TOBN(0x829c7ca3, 0x02b5dfd0), TOBN(0xe389cfbe, 0x69143c86),
+     TOBN(0xd01b6405, 0x941768d8), TOBN(0x45103995, 0x03bf825d),
+     TOBN(0xcc4ee166, 0x56cd17e2), TOBN(0xbea3c283, 0xba037e79),
+     TOBN(0x4e1ac06e, 0xd9a47520), TOBN(0xfbfe18aa, 0xaf852404),
+     TOBN(0x5615f8e2, 0x8087648a), TOBN(0x7301e47e, 0xb9d150d9),
+     TOBN(0x79f9f9dd, 0xb299b977), TOBN(0x76697a7b, 0xa5b78314),
+     TOBN(0x10d67468, 0x7d7c90e7), TOBN(0x7afffe03, 0x937210b5),
+     TOBN(0x5aef3e4b, 0x28c22cee), TOBN(0xefb0ecd8, 0x09fd55ae),
+     TOBN(0x4cea7132, 0x0d2a5d6a), TOBN(0x9cfb5fa1, 0x01db6357),
+     TOBN(0x395e0b57, 0xf36e1ac5), TOBN(0x008fa9ad, 0x36cafb7d),
+     TOBN(0x8f6cdf70, 0x5308c4db), TOBN(0x51527a37, 0x95ed2477),
+     TOBN(0xba0dee30, 0x5bd21311), TOBN(0x6ed41b22, 0x909c90d7),
+     TOBN(0xc5f6b758, 0x7c8696d3), TOBN(0x0db8eaa8, 0x3ce83a80),
+     TOBN(0xd297fe37, 0xb24b4b6f), TOBN(0xfe58afe8, 0x522d1f0d),
+     TOBN(0x97358736, 0x8c98dbd9), TOBN(0x6bc226ca, 0x9454a527),
+     TOBN(0xa12b384e, 0xce53c2d0), TOBN(0x779d897d, 0x5e4606da),
+     TOBN(0xa53e47b0, 0x73ec12b0), TOBN(0x462dbbba, 0x5756f1ad),
+     TOBN(0x69fe09f2, 0xcafe37b6), TOBN(0x273d1ebf, 0xecce2e17),
+     TOBN(0x8ac1d538, 0x3cf607fd), TOBN(0x8035f7ff, 0x12e10c25),}
+    ,
+    {TOBN(0x854d34c7, 0x7e6c5520), TOBN(0xc27df9ef, 0xdcb9ea58),
+     TOBN(0x405f2369, 0xd686666d), TOBN(0x29d1febf, 0x0417aa85),
+     TOBN(0x9846819e, 0x93470afe), TOBN(0x3e6a9669, 0xe2a27f9e),
+     TOBN(0x24d008a2, 0xe31e6504), TOBN(0xdba7cecf, 0x9cb7680a),
+     TOBN(0xecaff541, 0x338d6e43), TOBN(0x56f7dd73, 0x4541d5cc),
+     TOBN(0xb5d426de, 0x96bc88ca), TOBN(0x48d94f6b, 0x9ed3a2c3),
+     TOBN(0x6354a3bb, 0x2ef8279c), TOBN(0xd575465b, 0x0b1867f2),
+     TOBN(0xef99b0ff, 0x95225151), TOBN(0xf3e19d88, 0xf94500d8),
+     TOBN(0x92a83268, 0xe32dd620), TOBN(0x913ec99f, 0x627849a2),
+     TOBN(0xedd8fdfa, 0x2c378882), TOBN(0xaf96f33e, 0xee6f8cfe),
+     TOBN(0xc06737e5, 0xdc3fa8a5), TOBN(0x236bb531, 0xb0b03a1d),
+     TOBN(0x33e59f29, 0x89f037b0), TOBN(0x13f9b5a7, 0xd9a12a53),
+     TOBN(0x0d0df6ce, 0x51efb310), TOBN(0xcb5b2eb4, 0x958df5be),
+     TOBN(0xd6459e29, 0x36158e59), TOBN(0x82aae2b9, 0x1466e336),
+     TOBN(0xfb658a39, 0x411aa636), TOBN(0x7152ecc5, 0xd4c0a933),
+     TOBN(0xf10c758a, 0x49f026b7), TOBN(0xf4837f97, 0xcb09311f),
+     TOBN(0xddfb02c4, 0xc753c45f), TOBN(0x18ca81b6, 0xf9c840fe),
+     TOBN(0x846fd09a, 0xb0f8a3e6), TOBN(0xb1162add, 0xe7733dbc),
+     TOBN(0x7070ad20, 0x236e3ab6), TOBN(0xf88cdaf5, 0xb2a56326),
+     TOBN(0x05fc8719, 0x997cbc7a), TOBN(0x442cd452, 0x4b665272),
+     TOBN(0x7807f364, 0xb71698f5), TOBN(0x6ba418d2, 0x9f7b605e),
+     TOBN(0xfd20b00f, 0xa03b2cbb), TOBN(0x883eca37, 0xda54386f),
+     TOBN(0xff0be43f, 0xf3437f24), TOBN(0xe910b432, 0xa48bb33c),
+     TOBN(0x4963a128, 0x329df765), TOBN(0xac1dd556, 0xbe2fe6f7),
+     TOBN(0x557610f9, 0x24a0a3fc), TOBN(0x38e17bf4, 0xe881c3f9),
+     TOBN(0x6ba84faf, 0xed0dac99), TOBN(0xd4a222c3, 0x59eeb918),
+     TOBN(0xc79c1dbe, 0x13f542b6), TOBN(0x1fc65e0d, 0xe425d457),
+     TOBN(0xeffb754f, 0x1debb779), TOBN(0x638d8fd0, 0x9e08af60),
+     TOBN(0x994f523a, 0x626332d5), TOBN(0x7bc38833, 0x5561bb44),
+     TOBN(0x005ed4b0, 0x3d845ea2), TOBN(0xd39d3ee1, 0xc2a1f08a),
+     TOBN(0x6561fdd3, 0xe7676b0d), TOBN(0x620e35ff, 0xfb706017),
+     TOBN(0x36ce424f, 0xf264f9a8), TOBN(0xc4c3419f, 0xda2681f7),
+     TOBN(0xfb6afd2f, 0x69beb6e8), TOBN(0x3a50b993, 0x6d700d03),
+     TOBN(0xc840b2ad, 0x0c83a14f), TOBN(0x573207be, 0x54085bef),
+     TOBN(0x5af882e3, 0x09fe7e5b), TOBN(0x957678a4, 0x3b40a7e1),
+     TOBN(0x172d4bdd, 0x543056e2), TOBN(0x9c1b26b4, 0x0df13c0a),
+     TOBN(0x1c30861c, 0xf405ff06), TOBN(0xebac86bd, 0x486e828b),
+     TOBN(0xe791a971, 0x636933fc), TOBN(0x50e7c2be, 0x7aeee947),
+     TOBN(0xc3d4a095, 0xfa90d767), TOBN(0xae60eb7b, 0xe670ab7b),
+     TOBN(0x17633a64, 0x397b056d), TOBN(0x93a21f33, 0x105012aa),
+     TOBN(0x663c370b, 0xabb88643), TOBN(0x91df36d7, 0x22e21599),
+     TOBN(0x183ba835, 0x8b761671), TOBN(0x381eea1d, 0x728f3bf1),
+     TOBN(0xb9b2f1ba, 0x39966e6c), TOBN(0x7c464a28, 0xe7295492),
+     TOBN(0x0fd5f70a, 0x09b26b7f), TOBN(0xa9aba1f9, 0xfbe009df),
+     TOBN(0x857c1f22, 0x369b87ad), TOBN(0x3c00e5d9, 0x32fca556),
+     TOBN(0x1ad74cab, 0x90b06466), TOBN(0xa7112386, 0x550faaf2),
+     TOBN(0x7435e198, 0x6d9bd5f5), TOBN(0x2dcc7e38, 0x59c3463f),
+     TOBN(0xdc7df748, 0xca7bd4b2), TOBN(0x13cd4c08, 0x9dec2f31),
+     TOBN(0x0d3b5df8, 0xe3237710), TOBN(0x0dadb26e, 0xcbd2f7b0),
+     TOBN(0x9f5966ab, 0xe4aa082b), TOBN(0x666ec8de, 0x350e966e),
+     TOBN(0x1bfd1ed5, 0xee524216), TOBN(0xcd93c59b, 0x41dab0b6),
+     TOBN(0x658a8435, 0xd186d6ba), TOBN(0x1b7d34d2, 0x159d1195),
+     TOBN(0x5936e460, 0x22caf46b), TOBN(0x6a45dd8f, 0x9a96fe4f),
+     TOBN(0xf7925434, 0xb98f474e), TOBN(0x41410412, 0x0053ef15),
+     TOBN(0x71cf8d12, 0x41de97bf), TOBN(0xb8547b61, 0xbd80bef4),
+     TOBN(0xb47d3970, 0xc4db0037), TOBN(0xf1bcd328, 0xfef20dff),
+     TOBN(0x31a92e09, 0x10caad67), TOBN(0x1f591960, 0x5531a1e1),
+     TOBN(0x3bb852e0, 0x5f4fc840), TOBN(0x63e297ca, 0x93a72c6c),
+     TOBN(0x3c2b0b2e, 0x49abad67), TOBN(0x6ec405fc, 0xed3db0d9),
+     TOBN(0xdc14a530, 0x7fef1d40), TOBN(0xccd19846, 0x280896fc),
+     TOBN(0x00f83176, 0x9bb81648), TOBN(0xd69eb485, 0x653120d0),
+     TOBN(0xd17d75f4, 0x4ccabc62), TOBN(0x34a07f82, 0xb749fcb1),
+     TOBN(0x2c3af787, 0xbbfb5554), TOBN(0xb06ed4d0, 0x62e283f8),
+     TOBN(0x5722889f, 0xa19213a0), TOBN(0x162b085e, 0xdcf3c7b4),
+     TOBN(0xbcaecb31, 0xe0dd3eca), TOBN(0xc6237fbc, 0xe52f13a5),
+     TOBN(0xcc2b6b03, 0x27bac297), TOBN(0x2ae1cac5, 0xb917f54a),
+     TOBN(0x474807d4, 0x7845ae4f), TOBN(0xfec7dd92, 0xce5972e0),
+     TOBN(0xc3bd2541, 0x1d7915bb), TOBN(0x66f85dc4, 0xd94907ca),
+     TOBN(0xd981b888, 0xbdbcf0ca), TOBN(0xd75f5da6, 0xdf279e9f),
+     TOBN(0x128bbf24, 0x7054e934), TOBN(0x3c6ff6e5, 0x81db134b),
+     TOBN(0x795b7cf4, 0x047d26e4), TOBN(0xf370f7b8, 0x5049ec37),
+     TOBN(0xc6712d4d, 0xced945af), TOBN(0xdf30b5ec, 0x095642bc),
+     TOBN(0x9b034c62, 0x4896246e), TOBN(0x5652c016, 0xee90bbd1),
+     TOBN(0xeb38636f, 0x87fedb73), TOBN(0x5e32f847, 0x0135a613),
+     TOBN(0x0703b312, 0xcf933c83), TOBN(0xd05bb76e, 0x1a7f47e6),
+     TOBN(0x825e4f0c, 0x949c2415), TOBN(0x569e5622, 0x7250d6f8),
+     TOBN(0xbbe9eb3a, 0x6568013e), TOBN(0x8dbd203f, 0x22f243fc),
+     TOBN(0x9dbd7694, 0xb342734a), TOBN(0x8f6d12f8, 0x46afa984),
+     TOBN(0xb98610a2, 0xc9eade29), TOBN(0xbab4f323, 0x47dd0f18),
+     TOBN(0x5779737b, 0x671c0d46), TOBN(0x10b6a7c6, 0xd3e0a42a),
+     TOBN(0xfb19ddf3, 0x3035b41c), TOBN(0xd336343f, 0x99c45895),
+     TOBN(0x61fe4938, 0x54c857e5), TOBN(0xc4d506be, 0xae4e57d5),
+     TOBN(0x3cd8c8cb, 0xbbc33f75), TOBN(0x7281f08a, 0x9262c77d),
+     TOBN(0x083f4ea6, 0xf11a2823), TOBN(0x8895041e, 0x9fba2e33),
+     TOBN(0xfcdfea49, 0x9c438edf), TOBN(0x7678dcc3, 0x91edba44),
+     TOBN(0xf07b3b87, 0xe2ba50f0), TOBN(0xc13888ef, 0x43948c1b),
+     TOBN(0xc2135ad4, 0x1140af42), TOBN(0x8e5104f3, 0x926ed1a7),
+     TOBN(0xf24430cb, 0x88f6695f), TOBN(0x0ce0637b, 0x6d73c120),
+     TOBN(0xb2db01e6, 0xfe631e8f), TOBN(0x1c5563d7, 0xd7bdd24b),
+     TOBN(0x8daea3ba, 0x369ad44f), TOBN(0x000c81b6, 0x8187a9f9),
+     TOBN(0x5f48a951, 0xaae1fd9a), TOBN(0xe35626c7, 0x8d5aed8a),
+     TOBN(0x20952763, 0x0498c622), TOBN(0x76d17634, 0x773aa504),
+     TOBN(0x36d90dda, 0xeb300f7a), TOBN(0x9dcf7dfc, 0xedb5e801),
+     TOBN(0x645cb268, 0x74d5244c), TOBN(0xa127ee79, 0x348e3aa2),
+     TOBN(0x488acc53, 0x575f1dbb), TOBN(0x95037e85, 0x80e6161e),
+     TOBN(0x57e59283, 0x292650d0), TOBN(0xabe67d99, 0x14938216),
+     TOBN(0x3c7f944b, 0x3f8e1065), TOBN(0xed908cb6, 0x330e8924),
+     TOBN(0x08ee8fd5, 0x6f530136), TOBN(0x2227b7d5, 0xd7ffc169),
+     TOBN(0x4f55c893, 0xb5cd6dd5), TOBN(0x82225e11, 0xa62796e8),
+     TOBN(0x5c6cead1, 0xcb18e12c), TOBN(0x4381ae0c, 0x84f5a51a),
+     TOBN(0x345913d3, 0x7fafa4c8), TOBN(0x3d918082, 0x0491aac0),
+     TOBN(0x9347871f, 0x3e69264c), TOBN(0xbea9dd3c, 0xb4f4f0cd),
+     TOBN(0xbda5d067, 0x3eadd3e7), TOBN(0x0033c1b8, 0x0573bcd8),
+     TOBN(0x25589379, 0x5da2486c), TOBN(0xcb89ee5b, 0x86abbee7),
+     TOBN(0x8fe0a8f3, 0x22532e5d), TOBN(0xb6410ff0, 0x727dfc4c),
+     TOBN(0x619b9d58, 0x226726db), TOBN(0x5ec25669, 0x7a2b2dc7),
+     TOBN(0xaf4d2e06, 0x4c3beb01), TOBN(0x852123d0, 0x7acea556),
+     TOBN(0x0e9470fa, 0xf783487a), TOBN(0x75a7ea04, 0x5664b3eb),
+     TOBN(0x4ad78f35, 0x6798e4ba), TOBN(0x9214e6e5, 0xc7d0e091),
+     TOBN(0xc420b488, 0xb1290403), TOBN(0x64049e0a, 0xfc295749),
+     TOBN(0x03ef5af1, 0x3ae9841f), TOBN(0xdbe4ca19, 0xb0b662a6),
+     TOBN(0x46845c5f, 0xfa453458), TOBN(0xf8dabf19, 0x10b66722),
+     TOBN(0xb650f0aa, 0xcce2793b), TOBN(0x71db851e, 0xc5ec47c1),
+     TOBN(0x3eb78f3e, 0x3b234fa9), TOBN(0xb0c60f35, 0xfc0106ce),
+     TOBN(0x05427121, 0x774eadbd), TOBN(0x25367faf, 0xce323863),
+     TOBN(0x7541b5c9, 0xcd086976), TOBN(0x4ff069e2, 0xdc507ad1),
+     TOBN(0x74145256, 0x8776e667), TOBN(0x6e76142c, 0xb23c6bb5),
+     TOBN(0xdbf30712, 0x1b3a8a87), TOBN(0x60e7363e, 0x98450836),
+     TOBN(0x5741450e, 0xb7366d80), TOBN(0xe4ee14ca, 0x4837dbdf),
+     TOBN(0xa765eb9b, 0x69d4316f), TOBN(0x04548dca, 0x8ef43825),
+     TOBN(0x9c9f4e4c, 0x5ae888eb), TOBN(0x733abb51, 0x56e9ac99),
+     TOBN(0xdaad3c20, 0xba6ac029), TOBN(0x9b8dd3d3, 0x2ba3e38e),
+     TOBN(0xa9bb4c92, 0x0bc5d11a), TOBN(0xf20127a7, 0x9c5f88a3),
+     TOBN(0x4f52b06e, 0x161d3cb8), TOBN(0x26c1ff09, 0x6afaf0a6),
+     TOBN(0x32670d2f, 0x7189e71f), TOBN(0xc6438748, 0x5ecf91e7),
+     TOBN(0x15758e57, 0xdb757a21), TOBN(0x427d09f8, 0x290a9ce5),
+     TOBN(0x846a308f, 0x38384a7a), TOBN(0xaac3acb4, 0xb0732b99),
+     TOBN(0x9e941009, 0x17845819), TOBN(0x95cba111, 0xa7ce5e03),
+     TOBN(0x6f3d4f7f, 0xb00009c4), TOBN(0xb8396c27, 0x8ff28b5f),
+     TOBN(0xb1a9ae43, 0x1c97975d), TOBN(0x9d7ba8af, 0xe5d9fed5),
+     TOBN(0x338cf09f, 0x34f485b6), TOBN(0xbc0ddacc, 0x64122516),
+     TOBN(0xa450da12, 0x05d471fe), TOBN(0x4c3a6250, 0x628dd8c9),
+     TOBN(0x69c7d103, 0xd1295837), TOBN(0xa2893e50, 0x3807eb2f),
+     TOBN(0xd6e1e1de, 0xbdb41491), TOBN(0xc630745b, 0x5e138235),
+     TOBN(0xc892109e, 0x48661ae1), TOBN(0x8d17e7eb, 0xea2b2674),
+     TOBN(0x00ec0f87, 0xc328d6b5), TOBN(0x6d858645, 0xf079ff9e),
+     TOBN(0x6cdf243e, 0x19115ead), TOBN(0x1ce1393e, 0x4bac4fcf),
+     TOBN(0x2c960ed0, 0x9c29f25b), TOBN(0x59be4d8e, 0x9d388a05),
+     TOBN(0x0d46e06c, 0xd0def72b), TOBN(0xb923db5d, 0xe0342748),
+     TOBN(0xf7d3aacd, 0x936d4a3d), TOBN(0x558519cc, 0x0b0b099e),
+     TOBN(0x3ea8ebf8, 0x827097ef), TOBN(0x259353db, 0xd054f55d),
+     TOBN(0x84c89abc, 0x6d2ed089), TOBN(0x5c548b69, 0x8e096a7c),
+     TOBN(0xd587f616, 0x994b995d), TOBN(0x4d1531f6, 0xa5845601),
+     TOBN(0x792ab31e, 0x451fd9f0), TOBN(0xc8b57bb2, 0x65adf6ca),
+     TOBN(0x68440fcb, 0x1cd5ad73), TOBN(0xb9c860e6, 0x6144da4f),
+     TOBN(0x2ab286aa, 0x8462beb8), TOBN(0xcc6b8fff, 0xef46797f),
+     TOBN(0xac820da4, 0x20c8a471), TOBN(0x69ae05a1, 0x77ff7faf),
+     TOBN(0xb9163f39, 0xbfb5da77), TOBN(0xbd03e590, 0x2c73ab7a),
+     TOBN(0x7e862b5e, 0xb2940d9e), TOBN(0x3c663d86, 0x4b9af564),
+     TOBN(0xd8309031, 0xbde3033d), TOBN(0x298231b2, 0xd42c5bc6),
+     TOBN(0x42090d2c, 0x552ad093), TOBN(0xa4799d1c, 0xff854695),
+     TOBN(0x0a88b5d6, 0xd31f0d00), TOBN(0xf8b40825, 0xa2f26b46),
+     TOBN(0xec29b1ed, 0xf1bd7218), TOBN(0xd491c53b, 0x4b24c86e),
+     TOBN(0xd2fe588f, 0x3395ea65), TOBN(0x6f3764f7, 0x4456ef15),
+     TOBN(0xdb43116d, 0xcdc34800), TOBN(0xcdbcd456, 0xc1e33955),
+     TOBN(0xefdb5540, 0x74ab286b), TOBN(0x948c7a51, 0xd18c5d7c),
+     TOBN(0xeb81aa37, 0x7378058e), TOBN(0x41c746a1, 0x04411154),
+     TOBN(0xa10c73bc, 0xfb828ac7), TOBN(0x6439be91, 0x9d972b29),
+     TOBN(0x4bf3b4b0, 0x43a2fbad), TOBN(0x39e6dadf, 0x82b5e840),
+     TOBN(0x4f716408, 0x6397bd4c), TOBN(0x0f7de568, 0x7f1eeccb),
+     TOBN(0x5865c5a1, 0xd2ffbfc1), TOBN(0xf74211fa, 0x4ccb6451),
+     TOBN(0x66368a88, 0xc0b32558), TOBN(0x5b539dc2, 0x9ad7812e),
+     TOBN(0x579483d0, 0x2f3af6f6), TOBN(0x52132078, 0x99934ece),
+     TOBN(0x50b9650f, 0xdcc9e983), TOBN(0xca989ec9, 0xaee42b8a),
+     TOBN(0x6a44c829, 0xd6f62f99), TOBN(0x8f06a309, 0x4c2a7c0c),
+     TOBN(0x4ea2b3a0, 0x98a0cb0a), TOBN(0x5c547b70, 0xbeee8364),
+     TOBN(0x461d40e1, 0x682afe11), TOBN(0x9e0fc77a, 0x7b41c0a8),
+     TOBN(0x79e4aefd, 0xe20d5d36), TOBN(0x2916e520, 0x32dd9f63),
+     TOBN(0xf59e52e8, 0x3f883faf), TOBN(0x396f9639, 0x2b868d35),
+     TOBN(0xc902a9df, 0x4ca19881), TOBN(0x0fc96822, 0xdb2401a6),
+     TOBN(0x41237587, 0x66f1c68d), TOBN(0x10fc6de3, 0xfb476c0d),
+     TOBN(0xf8b6b579, 0x841f5d90), TOBN(0x2ba8446c, 0xfa24f44a),
+     TOBN(0xa237b920, 0xef4a9975), TOBN(0x60bb6004, 0x2330435f),
+     TOBN(0xd6f4ab5a, 0xcfb7e7b5), TOBN(0xb2ac5097, 0x83435391),
+     TOBN(0xf036ee2f, 0xb0d1ea67), TOBN(0xae779a6a, 0x74c56230),
+     TOBN(0x59bff8c8, 0xab838ae6), TOBN(0xcd83ca99, 0x9b38e6f0),
+     TOBN(0xbb27bef5, 0xe33deed3), TOBN(0xe6356f6f, 0x001892a8),
+     TOBN(0xbf3be6cc, 0x7adfbd3e), TOBN(0xaecbc81c, 0x33d1ac9d),
+     TOBN(0xe4feb909, 0xe6e861dc), TOBN(0x90a247a4, 0x53f5f801),
+     TOBN(0x01c50acb, 0x27346e57), TOBN(0xce29242e, 0x461acc1b),
+     TOBN(0x04dd214a, 0x2f998a91), TOBN(0x271ee9b1, 0xd4baf27b),
+     TOBN(0x7e3027d1, 0xe8c26722), TOBN(0x21d1645c, 0x1820dce5),
+     TOBN(0x086f242c, 0x7501779c), TOBN(0xf0061407, 0xfa0e8009),
+     TOBN(0xf23ce477, 0x60187129), TOBN(0x05bbdedb, 0x0fde9bd0),
+     TOBN(0x682f4832, 0x25d98473), TOBN(0xf207fe85, 0x5c658427),
+     TOBN(0xb6fdd7ba, 0x4166ffa1), TOBN(0x0c314056, 0x9eed799d),
+     TOBN(0x0db8048f, 0x4107e28f), TOBN(0x74ed3871, 0x41216840),
+     TOBN(0x74489f8f, 0x56a3c06e), TOBN(0x1e1c005b, 0x12777134),
+     TOBN(0xdb332a73, 0xf37ec3c3), TOBN(0xc65259bd, 0xdd59eba0),
+     TOBN(0x2291709c, 0xdb4d3257), TOBN(0x9a793b25, 0xbd389390),
+     TOBN(0xf39fe34b, 0xe43756f0), TOBN(0x2f76bdce, 0x9afb56c9),
+     TOBN(0x9f37867a, 0x61208b27), TOBN(0xea1d4307, 0x089972c3),
+     TOBN(0x8c595330, 0x8bdf623a), TOBN(0x5f5accda, 0x8441fb7d),
+     TOBN(0xfafa9418, 0x32ddfd95), TOBN(0x6ad40c5a, 0x0fde9be7),
+     TOBN(0x43faba89, 0xaeca8709), TOBN(0xc64a7cf1, 0x2c248a9d),
+     TOBN(0x16620252, 0x72637a76), TOBN(0xaee1c791, 0x22b8d1bb),
+     TOBN(0xf0f798fd, 0x21a843b2), TOBN(0x56e4ed4d, 0x8d005cb1),
+     TOBN(0x355f7780, 0x1f0d8abe), TOBN(0x197b04cf, 0x34522326),
+     TOBN(0x41f9b31f, 0xfd42c13f), TOBN(0x5ef7feb2, 0xb40f933d),
+     TOBN(0x27326f42, 0x5d60bad4), TOBN(0x027ecdb2, 0x8c92cf89),
+     TOBN(0x04aae4d1, 0x4e3352fe), TOBN(0x08414d2f, 0x73591b90),
+     TOBN(0x5ed6124e, 0xb7da7d60), TOBN(0xb985b931, 0x4d13d4ec),
+     TOBN(0xa592d3ab, 0x96bf36f9), TOBN(0x012dbed5, 0xbbdf51df),
+     TOBN(0xa57963c0, 0xdf6c177d), TOBN(0x010ec869, 0x87ca29cf),
+     TOBN(0xba1700f6, 0xbf926dff), TOBN(0x7c9fdbd1, 0xf4bf6bc2),
+     TOBN(0xdc18dc8f, 0x64da11f5), TOBN(0xa6074b7a, 0xd938ae75),
+     TOBN(0x14270066, 0xe84f44a4), TOBN(0x99998d38, 0xd27b954e),
+     TOBN(0xc1be8ab2, 0xb4f38e9a), TOBN(0x8bb55bbf, 0x15c01016),
+     TOBN(0xf73472b4, 0x0ea2ab30), TOBN(0xd365a340, 0xf73d68dd),
+     TOBN(0xc01a7168, 0x19c2e1eb), TOBN(0x32f49e37, 0x34061719),
+     TOBN(0xb73c57f1, 0x01d8b4d6), TOBN(0x03c8423c, 0x26b47700),
+     TOBN(0x321d0bc8, 0xa4d8826a), TOBN(0x6004213c, 0x4bc0e638),
+     TOBN(0xf78c64a1, 0xc1c06681), TOBN(0x16e0a16f, 0xef018e50),
+     TOBN(0x31cbdf91, 0xdb42b2b3), TOBN(0xf8f4ffce, 0xe0d36f58),
+     TOBN(0xcdcc71cd, 0x4cc5e3e0), TOBN(0xd55c7cfa, 0xa129e3e0),
+     TOBN(0xccdb6ba0, 0x0fb2cbf1), TOBN(0x6aba0005, 0xc4bce3cb),
+     TOBN(0x501cdb30, 0xd232cfc4), TOBN(0x9ddcf12e, 0xd58a3cef),
+     TOBN(0x02d2cf9c, 0x87e09149), TOBN(0xdc5d7ec7, 0x2c976257),
+     TOBN(0x6447986e, 0x0b50d7dd), TOBN(0x88fdbaf7, 0x807f112a),
+     TOBN(0x58c9822a, 0xb00ae9f6), TOBN(0x6abfb950, 0x6d3d27e0),
+     TOBN(0xd0a74487, 0x8a429f4f), TOBN(0x0649712b, 0xdb516609),
+     TOBN(0xb826ba57, 0xe769b5df), TOBN(0x82335df2, 0x1fc7aaf2),
+     TOBN(0x2389f067, 0x5c93d995), TOBN(0x59ac367a, 0x68677be6),
+     TOBN(0xa77985ff, 0x21d9951b), TOBN(0x038956fb, 0x85011cce),
+     TOBN(0x608e48cb, 0xbb734e37), TOBN(0xc08c0bf2, 0x2be5b26f),
+     TOBN(0x17bbdd3b, 0xf9b1a0d9), TOBN(0xeac7d898, 0x10483319),
+     TOBN(0xc95c4baf, 0xbc1a6dea), TOBN(0xfdd0e2bf, 0x172aafdb),
+     TOBN(0x40373cbc, 0x8235c41a), TOBN(0x14303f21, 0xfb6f41d5),
+     TOBN(0xba063621, 0x0408f237), TOBN(0xcad3b09a, 0xecd2d1ed),
+     TOBN(0x4667855a, 0x52abb6a2), TOBN(0xba9157dc, 0xaa8b417b),
+     TOBN(0xfe7f3507, 0x4f013efb), TOBN(0x1b112c4b, 0xaa38c4a2),
+     TOBN(0xa1406a60, 0x9ba64345), TOBN(0xe53cba33, 0x6993c80b),
+     TOBN(0x45466063, 0xded40d23), TOBN(0x3d5f1f4d, 0x54908e25),
+     TOBN(0x9ebefe62, 0x403c3c31), TOBN(0x274ea0b5, 0x0672a624),
+     TOBN(0xff818d99, 0x451d1b71), TOBN(0x80e82643, 0x8f79cf79),
+     TOBN(0xa165df13, 0x73ce37f5), TOBN(0xa744ef4f, 0xfe3a21fd),
+     TOBN(0x73f1e7f5, 0xcf551396), TOBN(0xc616898e, 0x868c676b),
+     TOBN(0x671c28c7, 0x8c442c36), TOBN(0xcfe5e558, 0x5e0a317d),
+     TOBN(0x1242d818, 0x7051f476), TOBN(0x56fad2a6, 0x14f03442),
+     TOBN(0x262068bc, 0x0a44d0f6), TOBN(0xdfa2cd6e, 0xce6edf4e),
+     TOBN(0x0f43813a, 0xd15d1517), TOBN(0x61214cb2, 0x377d44f5),
+     TOBN(0xd399aa29, 0xc639b35f), TOBN(0x42136d71, 0x54c51c19),
+     TOBN(0x9774711b, 0x08417221), TOBN(0x0a5546b3, 0x52545a57),
+     TOBN(0x80624c41, 0x1150582d), TOBN(0x9ec5c418, 0xfbc555bc),
+     TOBN(0x2c87dcad, 0x771849f1), TOBN(0xb0c932c5, 0x01d7bf6f),
+     TOBN(0x6aa5cd3e, 0x89116eb2), TOBN(0xd378c25a, 0x51ca7bd3),
+     TOBN(0xc612a0da, 0x9e6e3e31), TOBN(0x0417a54d, 0xb68ad5d0),
+     TOBN(0x00451e4a, 0x22c6edb8), TOBN(0x9fbfe019, 0xb42827ce),
+     TOBN(0x2fa92505, 0xba9384a2), TOBN(0x21b8596e, 0x64ad69c1),
+     TOBN(0x8f4fcc49, 0x983b35a6), TOBN(0xde093760, 0x72754672),
+     TOBN(0x2f14ccc8, 0xf7bffe6d), TOBN(0x27566bff, 0x5d94263d),
+     TOBN(0xb5b4e9c6, 0x2df3ec30), TOBN(0x94f1d7d5, 0x3e6ea6ba),
+     TOBN(0x97b7851a, 0xaaca5e9b), TOBN(0x518aa521, 0x56713b97),
+     TOBN(0x3357e8c7, 0x150a61f6), TOBN(0x7842e7e2, 0xec2c2b69),
+     TOBN(0x8dffaf65, 0x6868a548), TOBN(0xd963bd82, 0xe068fc81),
+     TOBN(0x64da5c8b, 0x65917733), TOBN(0x927090ff, 0x7b247328),}
+    ,
+    {TOBN(0x214bc9a7, 0xd298c241), TOBN(0xe3b697ba, 0x56807cfd),
+     TOBN(0xef1c7802, 0x4564eadb), TOBN(0xdde8cdcf, 0xb48149c5),
+     TOBN(0x946bf0a7, 0x5a4d2604), TOBN(0x27154d7f, 0x6c1538af),
+     TOBN(0x95cc9230, 0xde5b1fcc), TOBN(0xd88519e9, 0x66864f82),
+     TOBN(0xb828dd1a, 0x7cb1282c), TOBN(0xa08d7626, 0xbe46973a),
+     TOBN(0x6baf8d40, 0xe708d6b2), TOBN(0x72571fa1, 0x4daeb3f3),
+     TOBN(0x85b1732f, 0xf22dfd98), TOBN(0x87ab01a7, 0x0087108d),
+     TOBN(0xaaaafea8, 0x5988207a), TOBN(0xccc832f8, 0x69f00755),
+     TOBN(0x964d950e, 0x36ff3bf0), TOBN(0x8ad20f6f, 0xf0b34638),
+     TOBN(0x4d9177b3, 0xb5d7585f), TOBN(0xcf839760, 0xef3f019f),
+     TOBN(0x582fc5b3, 0x8288c545), TOBN(0x2f8e4e9b, 0x13116bd1),
+     TOBN(0xf91e1b2f, 0x332120ef), TOBN(0xcf568724, 0x2a17dd23),
+     TOBN(0x488f1185, 0xca8d9d1a), TOBN(0xadf2c77d, 0xd987ded2),
+     TOBN(0x5f3039f0, 0x60c46124), TOBN(0xe5d70b75, 0x71e095f4),
+     TOBN(0x82d58650, 0x6260e70f), TOBN(0x39d75ea7, 0xf750d105),
+     TOBN(0x8cf3d0b1, 0x75bac364), TOBN(0xf3a7564d, 0x21d01329),
+     TOBN(0x182f04cd, 0x2f52d2a7), TOBN(0x4fde149a, 0xe2df565a),
+     TOBN(0xb80c5eec, 0xa79fb2f7), TOBN(0xab491d7b, 0x22ddc897),
+     TOBN(0x99d76c18, 0xc6312c7f), TOBN(0xca0d5f3d, 0x6aa41a57),
+     TOBN(0x71207325, 0xd15363a0), TOBN(0xe82aa265, 0xbeb252c2),
+     TOBN(0x94ab4700, 0xec3128c2), TOBN(0x6c76d862, 0x8e383f49),
+     TOBN(0xdc36b150, 0xc03024eb), TOBN(0xfb439477, 0x53daac69),
+     TOBN(0xfc68764a, 0x8dc79623), TOBN(0x5b86995d, 0xb440fbb2),
+     TOBN(0xd66879bf, 0xccc5ee0d), TOBN(0x05228942, 0x95aa8bd3),
+     TOBN(0xb51a40a5, 0x1e6a75c1), TOBN(0x24327c76, 0x0ea7d817),
+     TOBN(0x06630182, 0x07774597), TOBN(0xd6fdbec3, 0x97fa7164),
+     TOBN(0x20c99dfb, 0x13c90f48), TOBN(0xd6ac5273, 0x686ef263),
+     TOBN(0xc6a50bdc, 0xfef64eeb), TOBN(0xcd87b281, 0x86fdfc32),
+     TOBN(0xb24aa43e, 0x3fcd3efc), TOBN(0xdd26c034, 0xb8088e9a),
+     TOBN(0xa5ef4dc9, 0xbd3d46ea), TOBN(0xa2f99d58, 0x8a4c6a6f),
+     TOBN(0xddabd355, 0x2f1da46c), TOBN(0x72c3f8ce, 0x1afacdd1),
+     TOBN(0xd90c4eee, 0x92d40578), TOBN(0xd28bb41f, 0xca623b94),
+     TOBN(0x50fc0711, 0x745edc11), TOBN(0x9dd9ad7d, 0x3dc87558),
+     TOBN(0xce6931fb, 0xb49d1e64), TOBN(0x6c77a0a2, 0xc98bd0f9),
+     TOBN(0x62b9a629, 0x6baf7cb1), TOBN(0xcf065f91, 0xccf72d22),
+     TOBN(0x7203cce9, 0x79639071), TOBN(0x09ae4885, 0xf9cb732f),
+     TOBN(0x5e7c3bec, 0xee8314f3), TOBN(0x1c068aed, 0xdbea298f),
+     TOBN(0x08d381f1, 0x7c80acec), TOBN(0x03b56be8, 0xe330495b),
+     TOBN(0xaeffb8f2, 0x9222882d), TOBN(0x95ff38f6, 0xc4af8bf7),
+     TOBN(0x50e32d35, 0x1fc57d8c), TOBN(0x6635be52, 0x17b444f0),
+     TOBN(0x04d15276, 0xa5177900), TOBN(0x4e1dbb47, 0xf6858752),
+     TOBN(0x5b475622, 0xc615796c), TOBN(0xa6fa0387, 0x691867bf),
+     TOBN(0xed7f5d56, 0x2844c6d0), TOBN(0xc633cf9b, 0x03a2477d),
+     TOBN(0xf6be5c40, 0x2d3721d6), TOBN(0xaf312eb7, 0xe9fd68e6),
+     TOBN(0x242792d2, 0xe7417ce1), TOBN(0xff42bc71, 0x970ee7f5),
+     TOBN(0x1ff4dc6d, 0x5c67a41e), TOBN(0x77709b7b, 0x20882a58),
+     TOBN(0x3554731d, 0xbe217f2c), TOBN(0x2af2a8cd, 0x5bb72177),
+     TOBN(0x58eee769, 0x591dd059), TOBN(0xbb2930c9, 0x4bba6477),
+     TOBN(0x863ee047, 0x7d930cfc), TOBN(0x4c262ad1, 0x396fd1f4),
+     TOBN(0xf4765bc8, 0x039af7e1), TOBN(0x2519834b, 0x5ba104f6),
+     TOBN(0x7cd61b4c, 0xd105f961), TOBN(0xa5415da5, 0xd63bca54),
+     TOBN(0x778280a0, 0x88a1f17c), TOBN(0xc4968949, 0x2329512c),
+     TOBN(0x174a9126, 0xcecdaa7a), TOBN(0xfc8c7e0e, 0x0b13247b),
+     TOBN(0x29c110d2, 0x3484c1c4), TOBN(0xf8eb8757, 0x831dfc3b),
+     TOBN(0x022f0212, 0xc0067452), TOBN(0x3f6f69ee, 0x7b9b926c),
+     TOBN(0x09032da0, 0xef42daf4), TOBN(0x79f00ade, 0x83f80de4),
+     TOBN(0x6210db71, 0x81236c97), TOBN(0x74f7685b, 0x3ee0781f),
+     TOBN(0x4df7da7b, 0xa3e41372), TOBN(0x2aae38b1, 0xb1a1553e),
+     TOBN(0x1688e222, 0xf6dd9d1b), TOBN(0x57695448, 0x5b8b6487),
+     TOBN(0x478d2127, 0x4b2edeaa), TOBN(0xb2818fa5, 0x1e85956a),
+     TOBN(0x1e6addda, 0xf176f2c0), TOBN(0x01ca4604, 0xe2572658),
+     TOBN(0x0a404ded, 0x85342ffb), TOBN(0x8cf60f96, 0x441838d6),
+     TOBN(0x9bbc691c, 0xc9071c4a), TOBN(0xfd588744, 0x34442803),
+     TOBN(0x97101c85, 0x809c0d81), TOBN(0xa7fb754c, 0x8c456f7f),
+     TOBN(0xc95f3c5c, 0xd51805e1), TOBN(0xab4ccd39, 0xb299dca8),
+     TOBN(0x3e03d20b, 0x47eaf500), TOBN(0xfa3165c1, 0xd7b80893),
+     TOBN(0x005e8b54, 0xe160e552), TOBN(0xdc4972ba, 0x9019d11f),
+     TOBN(0x21a6972e, 0x0c9a4a7a), TOBN(0xa52c258f, 0x37840fd7),
+     TOBN(0xf8559ff4, 0xc1e99d81), TOBN(0x08e1a7d6, 0xa3c617c0),
+     TOBN(0xb398fd43, 0x248c6ba7), TOBN(0x6ffedd91, 0xd1283794),
+     TOBN(0x8a6a59d2, 0xd629d208), TOBN(0xa9d141d5, 0x3490530e),
+     TOBN(0x42f6fc18, 0x38505989), TOBN(0x09bf250d, 0x479d94ee),
+     TOBN(0x223ad3b1, 0xb3822790), TOBN(0x6c5926c0, 0x93b8971c),
+     TOBN(0x609efc7e, 0x75f7fa62), TOBN(0x45d66a6d, 0x1ec2d989),
+     TOBN(0x4422d663, 0x987d2792), TOBN(0x4a73caad, 0x3eb31d2b),
+     TOBN(0xf06c2ac1, 0xa32cb9e6), TOBN(0xd9445c5f, 0x91aeba84),
+     TOBN(0x6af7a1d5, 0xaf71013f), TOBN(0xe68216e5, 0x0bedc946),
+     TOBN(0xf4cba30b, 0xd27370a0), TOBN(0x7981afbf, 0x870421cc),
+     TOBN(0x02496a67, 0x9449f0e1), TOBN(0x86cfc4be, 0x0a47edae),
+     TOBN(0x3073c936, 0xb1feca22), TOBN(0xf5694612, 0x03f8f8fb),
+     TOBN(0xd063b723, 0x901515ea), TOBN(0x4c6c77a5, 0x749cf038),
+     TOBN(0x6361e360, 0xab9e5059), TOBN(0x596cf171, 0xa76a37c0),
+     TOBN(0x800f53fa, 0x6530ae7a), TOBN(0x0f5e631e, 0x0792a7a6),
+     TOBN(0x5cc29c24, 0xefdb81c9), TOBN(0xa269e868, 0x3f9c40ba),
+     TOBN(0xec14f9e1, 0x2cb7191e), TOBN(0x78ea1bd8, 0xe5b08ea6),
+     TOBN(0x3c65aa9b, 0x46332bb9), TOBN(0x84cc22b3, 0xbf80ce25),
+     TOBN(0x0098e9e9, 0xd49d5bf1), TOBN(0xcd4ec1c6, 0x19087da4),
+     TOBN(0x3c9d07c5, 0xaef6e357), TOBN(0x839a0268, 0x9f8f64b8),
+     TOBN(0xc5e9eb62, 0xc6d8607f), TOBN(0x759689f5, 0x6aa995e4),
+     TOBN(0x70464669, 0xbbb48317), TOBN(0x921474bf, 0xe402417d),
+     TOBN(0xcabe135b, 0x2a354c8c), TOBN(0xd51e52d2, 0x812fa4b5),
+     TOBN(0xec741096, 0x53311fe8), TOBN(0x4f774535, 0xb864514b),
+     TOBN(0xbcadd671, 0x5bde48f8), TOBN(0xc9703873, 0x2189bc7d),
+     TOBN(0x5d45299e, 0xc709ee8a), TOBN(0xd1287ee2, 0x845aaff8),
+     TOBN(0x7d1f8874, 0xdb1dbf1f), TOBN(0xea46588b, 0x990c88d6),
+     TOBN(0x60ba649a, 0x84368313), TOBN(0xd5fdcbce, 0x60d543ae),
+     TOBN(0x90b46d43, 0x810d5ab0), TOBN(0x6739d8f9, 0x04d7e5cc),
+     TOBN(0x021c1a58, 0x0d337c33), TOBN(0x00a61162, 0x68e67c40),
+     TOBN(0x95ef413b, 0x379f0a1f), TOBN(0xfe126605, 0xe9e2ab95),
+     TOBN(0x67578b85, 0x2f5f199c), TOBN(0xf5c00329, 0x2cb84913),
+     TOBN(0xf7956430, 0x37577dd8), TOBN(0x83b82af4, 0x29c5fe88),
+     TOBN(0x9c1bea26, 0xcdbdc132), TOBN(0x589fa086, 0x9c04339e),
+     TOBN(0x033e9538, 0xb13799df), TOBN(0x85fa8b21, 0xd295d034),
+     TOBN(0xdf17f73f, 0xbd9ddcca), TOBN(0xf32bd122, 0xddb66334),
+     TOBN(0x55ef88a7, 0x858b044c), TOBN(0x1f0d69c2, 0x5aa9e397),
+     TOBN(0x55fd9cc3, 0x40d85559), TOBN(0xc774df72, 0x7785ddb2),
+     TOBN(0x5dcce9f6, 0xd3bd2e1c), TOBN(0xeb30da20, 0xa85dfed0),
+     TOBN(0x5ed7f5bb, 0xd3ed09c4), TOBN(0x7d42a35c, 0x82a9c1bd),
+     TOBN(0xcf3de995, 0x9890272d), TOBN(0x75f3432a, 0x3e713a10),
+     TOBN(0x5e13479f, 0xe28227b8), TOBN(0xb8561ea9, 0xfefacdc8),
+     TOBN(0xa6a297a0, 0x8332aafd), TOBN(0x9b0d8bb5, 0x73809b62),
+     TOBN(0xd2fa1cfd, 0x0c63036f), TOBN(0x7a16eb55, 0xbd64bda8),
+     TOBN(0x3f5cf5f6, 0x78e62ddc), TOBN(0x2267c454, 0x07fd752b),
+     TOBN(0x5e361b6b, 0x5e437bbe), TOBN(0x95c59501, 0x8354e075),
+     TOBN(0xec725f85, 0xf2b254d9), TOBN(0x844b617d, 0x2cb52b4e),
+     TOBN(0xed8554f5, 0xcf425fb5), TOBN(0xab67703e, 0x2af9f312),
+     TOBN(0x4cc34ec1, 0x3cf48283), TOBN(0xb09daa25, 0x9c8a705e),
+     TOBN(0xd1e9d0d0, 0x5b7d4f84), TOBN(0x4df6ef64, 0xdb38929d),
+     TOBN(0xe16b0763, 0xaa21ba46), TOBN(0xc6b1d178, 0xa293f8fb),
+     TOBN(0x0ff5b602, 0xd520aabf), TOBN(0x94d671bd, 0xc339397a),
+     TOBN(0x7c7d98cf, 0x4f5792fa), TOBN(0x7c5e0d67, 0x11215261),
+     TOBN(0x9b19a631, 0xa7c5a6d4), TOBN(0xc8511a62, 0x7a45274d),
+     TOBN(0x0c16621c, 0xa5a60d99), TOBN(0xf7fbab88, 0xcf5e48cb),
+     TOBN(0xab1e6ca2, 0xf7ddee08), TOBN(0x83bd08ce, 0xe7867f3c),
+     TOBN(0xf7e48e8a, 0x2ac13e27), TOBN(0x4494f6df, 0x4eb1a9f5),
+     TOBN(0xedbf84eb, 0x981f0a62), TOBN(0x49badc32, 0x536438f0),
+     TOBN(0x50bea541, 0x004f7571), TOBN(0xbac67d10, 0xdf1c94ee),
+     TOBN(0x253d73a1, 0xb727bc31), TOBN(0xb3d01cf2, 0x30686e28),
+     TOBN(0x51b77b1b, 0x55fd0b8b), TOBN(0xa099d183, 0xfeec3173),
+     TOBN(0x202b1fb7, 0x670e72b7), TOBN(0xadc88b33, 0xa8e1635f),
+     TOBN(0x34e8216a, 0xf989d905), TOBN(0xc2e68d20, 0x29b58d01),
+     TOBN(0x11f81c92, 0x6fe55a93), TOBN(0x15f1462a, 0x8f296f40),
+     TOBN(0x1915d375, 0xea3d62f2), TOBN(0xa17765a3, 0x01c8977d),
+     TOBN(0x7559710a, 0xe47b26f6), TOBN(0xe0bd29c8, 0x535077a5),
+     TOBN(0x615f976d, 0x08d84858), TOBN(0x370dfe85, 0x69ced5c1),
+     TOBN(0xbbc7503c, 0xa734fa56), TOBN(0xfbb9f1ec, 0x91ac4574),
+     TOBN(0x95d7ec53, 0x060dd7ef), TOBN(0xeef2dacd, 0x6e657979),
+     TOBN(0x54511af3, 0xe2a08235), TOBN(0x1e324aa4, 0x1f4aea3d),
+     TOBN(0x550e7e71, 0xe6e67671), TOBN(0xbccd5190, 0xbf52faf7),
+     TOBN(0xf880d316, 0x223cc62a), TOBN(0x0d402c7e, 0x2b32eb5d),
+     TOBN(0xa40bc039, 0x306a5a3b), TOBN(0x4e0a41fd, 0x96783a1b),
+     TOBN(0xa1e8d39a, 0x0253cdd4), TOBN(0x6480be26, 0xc7388638),
+     TOBN(0xee365e1d, 0x2285f382), TOBN(0x188d8d8f, 0xec0b5c36),
+     TOBN(0x34ef1a48, 0x1f0f4d82), TOBN(0x1a8f43e1, 0xa487d29a),
+     TOBN(0x8168226d, 0x77aefb3a), TOBN(0xf69a751e, 0x1e72c253),
+     TOBN(0x8e04359a, 0xe9594df1), TOBN(0x475ffd7d, 0xd14c0467),
+     TOBN(0xb5a2c2b1, 0x3844e95c), TOBN(0x85caf647, 0xdd12ef94),
+     TOBN(0x1ecd2a9f, 0xf1063d00), TOBN(0x1dd2e229, 0x23843311),
+     TOBN(0x38f0e09d, 0x73d17244), TOBN(0x3ede7746, 0x8fc653f1),
+     TOBN(0xae4459f5, 0xdc20e21c), TOBN(0x00db2ffa, 0x6a8599ea),
+     TOBN(0x11682c39, 0x30cfd905), TOBN(0x4934d074, 0xa5c112a6),
+     TOBN(0xbdf063c5, 0x568bfe95), TOBN(0x779a440a, 0x016c441a),
+     TOBN(0x0c23f218, 0x97d6fbdc), TOBN(0xd3a5cd87, 0xe0776aac),
+     TOBN(0xcee37f72, 0xd712e8db), TOBN(0xfb28c70d, 0x26f74e8d),
+     TOBN(0xffe0c728, 0xb61301a0), TOBN(0xa6282168, 0xd3724354),
+     TOBN(0x7ff4cb00, 0x768ffedc), TOBN(0xc51b3088, 0x03b02de9),
+     TOBN(0xa5a8147c, 0x3902dda5), TOBN(0x35d2f706, 0xfe6973b4),
+     TOBN(0x5ac2efcf, 0xc257457e), TOBN(0x933f48d4, 0x8700611b),
+     TOBN(0xc365af88, 0x4912beb2), TOBN(0x7f5a4de6, 0x162edf94),
+     TOBN(0xc646ba7c, 0x0c32f34b), TOBN(0x632c6af3, 0xb2091074),
+     TOBN(0x58d4f2e3, 0x753e43a9), TOBN(0x70e1d217, 0x24d4e23f),
+     TOBN(0xb24bf729, 0xafede6a6), TOBN(0x7f4a94d8, 0x710c8b60),
+     TOBN(0xaad90a96, 0x8d4faa6a), TOBN(0xd9ed0b32, 0xb066b690),
+     TOBN(0x52fcd37b, 0x78b6dbfd), TOBN(0x0b64615e, 0x8bd2b431),
+     TOBN(0x228e2048, 0xcfb9fad5), TOBN(0xbeaa386d, 0x240b76bd),
+     TOBN(0x2d6681c8, 0x90dad7bc), TOBN(0x3e553fc3, 0x06d38f5e),
+     TOBN(0xf27cdb9b, 0x9d5f9750), TOBN(0x3e85c52a, 0xd28c5b0e),
+     TOBN(0x190795af, 0x5247c39b), TOBN(0x547831eb, 0xbddd6828),
+     TOBN(0xf327a227, 0x4a82f424), TOBN(0x36919c78, 0x7e47f89d),
+     TOBN(0xe4783919, 0x43c7392c), TOBN(0xf101b9aa, 0x2316fefe),
+     TOBN(0xbcdc9e9c, 0x1c5009d2), TOBN(0xfb55ea13, 0x9cd18345),
+     TOBN(0xf5b5e231, 0xa3ce77c7), TOBN(0xde6b4527, 0xd2f2cb3d),
+     TOBN(0x10f6a333, 0x9bb26f5f), TOBN(0x1e85db8e, 0x044d85b6),
+     TOBN(0xc3697a08, 0x94197e54), TOBN(0x65e18cc0, 0xa7cb4ea8),
+     TOBN(0xa38c4f50, 0xa471fe6e), TOBN(0xf031747a, 0x2f13439c),
+     TOBN(0x53c4a6ba, 0xc007318b), TOBN(0xa8da3ee5, 0x1deccb3d),
+     TOBN(0x0555b31c, 0x558216b1), TOBN(0x90c7810c, 0x2f79e6c2),
+     TOBN(0x9b669f4d, 0xfe8eed3c), TOBN(0x70398ec8, 0xe0fac126),
+     TOBN(0xa96a449e, 0xf701b235), TOBN(0x0ceecdb3, 0xeb94f395),
+     TOBN(0x285fc368, 0xd0cb7431), TOBN(0x0d37bb52, 0x16a18c64),
+     TOBN(0x05110d38, 0xb880d2dd), TOBN(0xa60f177b, 0x65930d57),
+     TOBN(0x7da34a67, 0xf36235f5), TOBN(0x47f5e17c, 0x183816b9),
+     TOBN(0xc7664b57, 0xdb394af4), TOBN(0x39ba215d, 0x7036f789),
+     TOBN(0x46d2ca0e, 0x2f27b472), TOBN(0xc42647ee, 0xf73a84b7),
+     TOBN(0x44bc7545, 0x64488f1d), TOBN(0xaa922708, 0xf4cf85d5),
+     TOBN(0x721a01d5, 0x53e4df63), TOBN(0x649c0c51, 0x5db46ced),
+     TOBN(0x6bf0d64e, 0x3cffcb6c), TOBN(0xe3bf93fe, 0x50f71d96),
+     TOBN(0x75044558, 0xbcc194a0), TOBN(0x16ae3372, 0x6afdc554),
+     TOBN(0xbfc01adf, 0x5ca48f3f), TOBN(0x64352f06, 0xe22a9b84),
+     TOBN(0xcee54da1, 0xc1099e4a), TOBN(0xbbda54e8, 0xfa1b89c0),
+     TOBN(0x166a3df5, 0x6f6e55fb), TOBN(0x1ca44a24, 0x20176f88),
+     TOBN(0x936afd88, 0xdfb7b5ff), TOBN(0xe34c2437, 0x8611d4a0),
+     TOBN(0x7effbb75, 0x86142103), TOBN(0x6704ba1b, 0x1f34fc4d),
+     TOBN(0x7c2a468f, 0x10c1b122), TOBN(0x36b3a610, 0x8c6aace9),
+     TOBN(0xabfcc0a7, 0x75a0d050), TOBN(0x066f9197, 0x3ce33e32),
+     TOBN(0xce905ef4, 0x29fe09be), TOBN(0x89ee25ba, 0xa8376351),
+     TOBN(0x2a3ede22, 0xfd29dc76), TOBN(0x7fd32ed9, 0x36f17260),
+     TOBN(0x0cadcf68, 0x284b4126), TOBN(0x63422f08, 0xa7951fc8),
+     TOBN(0x562b24f4, 0x0807e199), TOBN(0xfe9ce5d1, 0x22ad4490),
+     TOBN(0xc2f51b10, 0x0db2b1b4), TOBN(0xeb3613ff, 0xe4541d0d),
+     TOBN(0xbd2c4a05, 0x2680813b), TOBN(0x527aa55d, 0x561b08d6),
+     TOBN(0xa9f8a40e, 0xa7205558), TOBN(0xe3eea56f, 0x243d0bec),
+     TOBN(0x7b853817, 0xa0ff58b3), TOBN(0xb67d3f65, 0x1a69e627),
+     TOBN(0x0b76bbb9, 0xa869b5d6), TOBN(0xa3afeb82, 0x546723ed),
+     TOBN(0x5f24416d, 0x3e554892), TOBN(0x8413b53d, 0x430e2a45),
+     TOBN(0x99c56aee, 0x9032a2a0), TOBN(0x09432bf6, 0xeec367b1),
+     TOBN(0x552850c6, 0xdaf0ecc1), TOBN(0x49ebce55, 0x5bc92048),
+     TOBN(0xdfb66ba6, 0x54811307), TOBN(0x1b84f797, 0x6f298597),
+     TOBN(0x79590481, 0x8d1d7a0d), TOBN(0xd9fabe03, 0x3a6fa556),
+     TOBN(0xa40f9c59, 0xba9e5d35), TOBN(0xcb1771c1, 0xf6247577),
+     TOBN(0x542a47ca, 0xe9a6312b), TOBN(0xa34b3560, 0x552dd8c5),
+     TOBN(0xfdf94de0, 0x0d794716), TOBN(0xd46124a9, 0x9c623094),
+     TOBN(0x56b7435d, 0x68afe8b4), TOBN(0x27f20540, 0x6c0d8ea1),
+     TOBN(0x12b77e14, 0x73186898), TOBN(0xdbc3dd46, 0x7479490f),
+     TOBN(0x951a9842, 0xc03b0c05), TOBN(0x8b1b3bb3, 0x7921bc96),
+     TOBN(0xa573b346, 0x2b202e0a), TOBN(0x77e4665d, 0x47254d56),
+     TOBN(0x08b70dfc, 0xd23e3984), TOBN(0xab86e8bc, 0xebd14236),
+     TOBN(0xaa3e07f8, 0x57114ba7), TOBN(0x5ac71689, 0xab0ef4f2),
+     TOBN(0x88fca384, 0x0139d9af), TOBN(0x72733f88, 0x76644af0),
+     TOBN(0xf122f72a, 0x65d74f4a), TOBN(0x13931577, 0xa5626c7a),
+     TOBN(0xd5b5d9eb, 0x70f8d5a4), TOBN(0x375adde7, 0xd7bbb228),
+     TOBN(0x31e88b86, 0x0c1c0b32), TOBN(0xd1f568c4, 0x173edbaa),
+     TOBN(0x1592fc83, 0x5459df02), TOBN(0x2beac0fb, 0x0fcd9a7e),
+     TOBN(0xb0a6fdb8, 0x1b473b0a), TOBN(0xe3224c6f, 0x0fe8fc48),
+     TOBN(0x680bd00e, 0xe87edf5b), TOBN(0x30385f02, 0x20e77cf5),
+     TOBN(0xe9ab98c0, 0x4d42d1b2), TOBN(0x72d191d2, 0xd3816d77),
+     TOBN(0x1564daca, 0x0917d9e5), TOBN(0x394eab59, 0x1f8fed7f),
+     TOBN(0xa209aa8d, 0x7fbb3896), TOBN(0x5564f3b9, 0xbe6ac98e),
+     TOBN(0xead21d05, 0xd73654ef), TOBN(0x68d1a9c4, 0x13d78d74),
+     TOBN(0x61e01708, 0x6d4973a0), TOBN(0x83da3500, 0x46e6d32a),
+     TOBN(0x6a3dfca4, 0x68ae0118), TOBN(0xa1b9a4c9, 0xd02da069),
+     TOBN(0x0b2ff9c7, 0xebab8302), TOBN(0x98af07c3, 0x944ba436),
+     TOBN(0x85997326, 0x995f0f9f), TOBN(0x467fade0, 0x71b58bc6),
+     TOBN(0x47e4495a, 0xbd625a2b), TOBN(0xfdd2d01d, 0x33c3b8cd),
+     TOBN(0x2c38ae28, 0xc693f9fa), TOBN(0x48622329, 0x348f7999),
+     TOBN(0x97bf738e, 0x2161f583), TOBN(0x15ee2fa7, 0x565e8cc9),
+     TOBN(0xa1a5c845, 0x5777e189), TOBN(0xcc10bee0, 0x456f2829),
+     TOBN(0x8ad95c56, 0xda762bd5), TOBN(0x152e2214, 0xe9d91da8),
+     TOBN(0x975b0e72, 0x7cb23c74), TOBN(0xfd5d7670, 0xa90c66df),
+     TOBN(0xb5b5b8ad, 0x225ffc53), TOBN(0xab6dff73, 0xfaded2ae),
+     TOBN(0xebd56781, 0x6f4cbe9d), TOBN(0x0ed8b249, 0x6a574bd7),
+     TOBN(0x41c246fe, 0x81a881fa), TOBN(0x91564805, 0xc3db9c70),
+     TOBN(0xd7c12b08, 0x5b862809), TOBN(0x1facd1f1, 0x55858d7b),
+     TOBN(0x7693747c, 0xaf09e92a), TOBN(0x3b69dcba, 0x189a425f),
+     TOBN(0x0be28e9f, 0x967365ef), TOBN(0x57300eb2, 0xe801f5c9),
+     TOBN(0x93b8ac6a, 0xd583352f), TOBN(0xa2cf1f89, 0xcd05b2b7),
+     TOBN(0x7c0c9b74, 0x4dcc40cc), TOBN(0xfee38c45, 0xada523fb),
+     TOBN(0xb49a4dec, 0x1099cc4d), TOBN(0x325c377f, 0x69f069c6),
+     TOBN(0xe12458ce, 0x476cc9ff), TOBN(0x580e0b6c, 0xc6d4cb63),
+     TOBN(0xd561c8b7, 0x9072289b), TOBN(0x0377f264, 0xa619e6da),
+     TOBN(0x26685362, 0x88e591a5), TOBN(0xa453a7bd, 0x7523ca2b),
+     TOBN(0x8a9536d2, 0xc1df4533), TOBN(0xc8e50f2f, 0xbe972f79),
+     TOBN(0xd433e50f, 0x6d3549cf), TOBN(0x6f33696f, 0xfacd665e),
+     TOBN(0x695bfdac, 0xce11fcb4), TOBN(0x810ee252, 0xaf7c9860),
+     TOBN(0x65450fe1, 0x7159bb2c), TOBN(0xf7dfbebe, 0x758b357b),
+     TOBN(0x2b057e74, 0xd69fea72), TOBN(0xd485717a, 0x92731745),}
+    ,
+    {TOBN(0x896c42e8, 0xee36860c), TOBN(0xdaf04dfd, 0x4113c22d),
+     TOBN(0x1adbb7b7, 0x44104213), TOBN(0xe5fd5fa1, 0x1fd394ea),
+     TOBN(0x68235d94, 0x1a4e0551), TOBN(0x6772cfbe, 0x18d10151),
+     TOBN(0x276071e3, 0x09984523), TOBN(0xe4e879de, 0x5a56ba98),
+     TOBN(0xaaafafb0, 0x285b9491), TOBN(0x01a0be88, 0x1e4c705e),
+     TOBN(0xff1d4f5d, 0x2ad9caab), TOBN(0x6e349a4a, 0xc37a233f),
+     TOBN(0xcf1c1246, 0x4a1c6a16), TOBN(0xd99e6b66, 0x29383260),
+     TOBN(0xea3d4366, 0x5f6d5471), TOBN(0x36974d04, 0xff8cc89b),
+     TOBN(0xc26c49a1, 0xcfe89d80), TOBN(0xb42c026d, 0xda9c8371),
+     TOBN(0xca6c013a, 0xdad066d2), TOBN(0xfb8f7228, 0x56a4f3ee),
+     TOBN(0x08b579ec, 0xd850935b), TOBN(0x34c1a74c, 0xd631e1b3),
+     TOBN(0xcb5fe596, 0xac198534), TOBN(0x39ff21f6, 0xe1f24f25),
+     TOBN(0x27f29e14, 0x8f929057), TOBN(0x7a64ae06, 0xc0c853df),
+     TOBN(0x256cd183, 0x58e9c5ce), TOBN(0x9d9cce82, 0xded092a5),
+     TOBN(0xcc6e5979, 0x6e93b7c7), TOBN(0xe1e47092, 0x31bb9e27),
+     TOBN(0xb70b3083, 0xaa9e29a0), TOBN(0xbf181a75, 0x3785e644),
+     TOBN(0xf53f2c65, 0x8ead09f7), TOBN(0x1335e1d5, 0x9780d14d),
+     TOBN(0x69cc20e0, 0xcd1b66bc), TOBN(0x9b670a37, 0xbbe0bfc8),
+     TOBN(0xce53dc81, 0x28efbeed), TOBN(0x0c74e77c, 0x8326a6e5),
+     TOBN(0x3604e0d2, 0xb88e9a63), TOBN(0xbab38fca, 0x13dc2248),
+     TOBN(0x8ed6e8c8, 0x5c0a3f1e), TOBN(0xbcad2492, 0x7c87c37f),
+     TOBN(0xfdfb62bb, 0x9ee3b78d), TOBN(0xeba8e477, 0xcbceba46),
+     TOBN(0x37d38cb0, 0xeeaede4b), TOBN(0x0bc498e8, 0x7976deb6),
+     TOBN(0xb2944c04, 0x6b6147fb), TOBN(0x8b123f35, 0xf71f9609),
+     TOBN(0xa155dcc7, 0xde79dc24), TOBN(0xf1168a32, 0x558f69cd),
+     TOBN(0xbac21595, 0x0d1850df), TOBN(0x15c8295b, 0xb204c848),
+     TOBN(0xf661aa36, 0x7d8184ff), TOBN(0xc396228e, 0x30447bdb),
+     TOBN(0x11cd5143, 0xbde4a59e), TOBN(0xe3a26e3b, 0x6beab5e6),
+     TOBN(0xd3b3a13f, 0x1402b9d0), TOBN(0x573441c3, 0x2c7bc863),
+     TOBN(0x4b301ec4, 0x578c3e6e), TOBN(0xc26fc9c4, 0x0adaf57e),
+     TOBN(0x96e71bfd, 0x7493cea3), TOBN(0xd05d4b3f, 0x1af81456),
+     TOBN(0xdaca2a8a, 0x6a8c608f), TOBN(0x53ef07f6, 0x0725b276),
+     TOBN(0x07a5fbd2, 0x7824fc56), TOBN(0x34675218, 0x13289077),
+     TOBN(0x5bf69fd5, 0xe0c48349), TOBN(0xa613ddd3, 0xb6aa7875),
+     TOBN(0x7f78c19c, 0x5450d866), TOBN(0x46f4409c, 0x8f84a481),
+     TOBN(0x9f1d1928, 0x90fce239), TOBN(0x016c4168, 0xb2ce44b9),
+     TOBN(0xbae023f0, 0xc7435978), TOBN(0xb152c888, 0x20e30e19),
+     TOBN(0x9c241645, 0xe3fa6faf), TOBN(0x735d95c1, 0x84823e60),
+     TOBN(0x03197573, 0x03955317), TOBN(0x0b4b02a9, 0xf03b4995),
+     TOBN(0x076bf559, 0x70274600), TOBN(0x32c5cc53, 0xaaf57508),
+     TOBN(0xe8af6d1f, 0x60624129), TOBN(0xb7bc5d64, 0x9a5e2b5e),
+     TOBN(0x3814b048, 0x5f082d72), TOBN(0x76f267f2, 0xce19677a),
+     TOBN(0x626c630f, 0xb36eed93), TOBN(0x55230cd7, 0x3bf56803),
+     TOBN(0x78837949, 0xce2736a0), TOBN(0x0d792d60, 0xaa6c55f1),
+     TOBN(0x0318dbfd, 0xd5c7c5d2), TOBN(0xb38f8da7, 0x072b342d),
+     TOBN(0x3569bddc, 0x7b8de38a), TOBN(0xf25b5887, 0xa1c94842),
+     TOBN(0xb2d5b284, 0x2946ad60), TOBN(0x854f29ad, 0xe9d1707e),
+     TOBN(0xaa5159dc, 0x2c6a4509), TOBN(0x899f94c0, 0x57189837),
+     TOBN(0xcf6adc51, 0xf4a55b03), TOBN(0x261762de, 0x35e3b2d5),
+     TOBN(0x4cc43012, 0x04827b51), TOBN(0xcd22a113, 0xc6021442),
+     TOBN(0xce2fd61a, 0x247c9569), TOBN(0x59a50973, 0xd152beca),
+     TOBN(0x6c835a11, 0x63a716d4), TOBN(0xc26455ed, 0x187dedcf),
+     TOBN(0x27f536e0, 0x49ce89e7), TOBN(0x18908539, 0xcc890cb5),
+     TOBN(0x308909ab, 0xd83c2aa1), TOBN(0xecd3142b, 0x1ab73bd3),
+     TOBN(0x6a85bf59, 0xb3f5ab84), TOBN(0x3c320a68, 0xf2bea4c6),
+     TOBN(0xad8dc538, 0x6da4541f), TOBN(0xeaf34eb0, 0xb7c41186),
+     TOBN(0x1c780129, 0x977c97c4), TOBN(0x5ff9beeb, 0xc57eb9fa),
+     TOBN(0xa24d0524, 0xc822c478), TOBN(0xfd8eec2a, 0x461cd415),
+     TOBN(0xfbde194e, 0xf027458c), TOBN(0xb4ff5319, 0x1d1be115),
+     TOBN(0x63f874d9, 0x4866d6f4), TOBN(0x35c75015, 0xb21ad0c9),
+     TOBN(0xa6b5c9d6, 0x46ac49d2), TOBN(0x42c77c0b, 0x83137aa9),
+     TOBN(0x24d000fc, 0x68225a38), TOBN(0x0f63cfc8, 0x2fe1e907),
+     TOBN(0x22d1b01b, 0xc6441f95), TOBN(0x7d38f719, 0xec8e448f),
+     TOBN(0x9b33fa5f, 0x787fb1ba), TOBN(0x94dcfda1, 0x190158df),
+     TOBN(0xc47cb339, 0x5f6d4a09), TOBN(0x6b4f355c, 0xee52b826),
+     TOBN(0x3d100f5d, 0xf51b930a), TOBN(0xf4512fac, 0x9f668f69),
+     TOBN(0x546781d5, 0x206c4c74), TOBN(0xd021d4d4, 0xcb4d2e48),
+     TOBN(0x494a54c2, 0xca085c2d), TOBN(0xf1dbaca4, 0x520850a8),
+     TOBN(0x63c79326, 0x490a1aca), TOBN(0xcb64dd9c, 0x41526b02),
+     TOBN(0xbb772591, 0xa2979258), TOBN(0x3f582970, 0x48d97846),
+     TOBN(0xd66b70d1, 0x7c213ba7), TOBN(0xc28febb5, 0xe8a0ced4),
+     TOBN(0x6b911831, 0xc10338c1), TOBN(0x0d54e389, 0xbf0126f3),
+     TOBN(0x7048d460, 0x4af206ee), TOBN(0x786c88f6, 0x77e97cb9),
+     TOBN(0xd4375ae1, 0xac64802e), TOBN(0x469bcfe1, 0xd53ec11c),
+     TOBN(0xfc9b340d, 0x47062230), TOBN(0xe743bb57, 0xc5b4a3ac),
+     TOBN(0xfe00b4aa, 0x59ef45ac), TOBN(0x29a4ef23, 0x59edf188),
+     TOBN(0x40242efe, 0xb483689b), TOBN(0x2575d3f6, 0x513ac262),
+     TOBN(0xf30037c8, 0x0ca6db72), TOBN(0xc9fcce82, 0x98864be2),
+     TOBN(0x84a112ff, 0x0149362d), TOBN(0x95e57582, 0x1c4ae971),
+     TOBN(0x1fa4b1a8, 0x945cf86c), TOBN(0x4525a734, 0x0b024a2f),
+     TOBN(0xe76c8b62, 0x8f338360), TOBN(0x483ff593, 0x28edf32b),
+     TOBN(0x67e8e90a, 0x298b1aec), TOBN(0x9caab338, 0x736d9a21),
+     TOBN(0x5c09d2fd, 0x66892709), TOBN(0x2496b4dc, 0xb55a1d41),
+     TOBN(0x93f5fb1a, 0xe24a4394), TOBN(0x08c75049, 0x6fa8f6c1),
+     TOBN(0xcaead1c2, 0xc905d85f), TOBN(0xe9d7f790, 0x0733ae57),
+     TOBN(0x24c9a65c, 0xf07cdd94), TOBN(0x7389359c, 0xa4b55931),
+     TOBN(0xf58709b7, 0x367e45f7), TOBN(0x1f203067, 0xcb7e7adc),
+     TOBN(0x82444bff, 0xc7b72818), TOBN(0x07303b35, 0xbaac8033),
+     TOBN(0x1e1ee4e4, 0xd13b7ea1), TOBN(0xe6489b24, 0xe0e74180),
+     TOBN(0xa5f2c610, 0x7e70ef70), TOBN(0xa1655412, 0xbdd10894),
+     TOBN(0x555ebefb, 0x7af4194e), TOBN(0x533c1c3c, 0x8e89bd9c),
+     TOBN(0x735b9b57, 0x89895856), TOBN(0x15fb3cd2, 0x567f5c15),
+     TOBN(0x057fed45, 0x526f09fd), TOBN(0xe8a4f10c, 0x8128240a),
+     TOBN(0x9332efc4, 0xff2bfd8d), TOBN(0x214e77a0, 0xbd35aa31),
+     TOBN(0x32896d73, 0x14faa40e), TOBN(0x767867ec, 0x01e5f186),
+     TOBN(0xc9adf8f1, 0x17a1813e), TOBN(0xcb6cda78, 0x54741795),
+     TOBN(0xb7521b6d, 0x349d51aa), TOBN(0xf56b5a9e, 0xe3c7b8e9),
+     TOBN(0xc6f1e5c9, 0x32a096df), TOBN(0x083667c4, 0xa3635024),
+     TOBN(0x365ea135, 0x18087f2f), TOBN(0xf1b8eaac, 0xd136e45d),
+     TOBN(0xc8a0e484, 0x73aec989), TOBN(0xd75a324b, 0x142c9259),
+     TOBN(0xb7b4d001, 0x01dae185), TOBN(0x45434e0b, 0x9b7a94bc),
+     TOBN(0xf54339af, 0xfbd8cb0b), TOBN(0xdcc4569e, 0xe98ef49e),
+     TOBN(0x7789318a, 0x09a51299), TOBN(0x81b4d206, 0xb2b025d8),
+     TOBN(0xf64aa418, 0xfae85792), TOBN(0x3e50258f, 0xacd7baf7),
+     TOBN(0xdce84cdb, 0x2996864b), TOBN(0xa2e67089, 0x1f485fa4),
+     TOBN(0xb28b2bb6, 0x534c6a5a), TOBN(0x31a7ec6b, 0xc94b9d39),
+     TOBN(0x1d217766, 0xd6bc20da), TOBN(0x4acdb5ec, 0x86761190),
+     TOBN(0x68726328, 0x73701063), TOBN(0x4d24ee7c, 0x2128c29b),
+     TOBN(0xc072ebd3, 0xa19fd868), TOBN(0x612e481c, 0xdb8ddd3b),
+     TOBN(0xb4e1d754, 0x1a64d852), TOBN(0x00ef95ac, 0xc4c6c4ab),
+     TOBN(0x1536d2ed, 0xaa0a6c46), TOBN(0x61294086, 0x43774790),
+     TOBN(0x54af25e8, 0x343fda10), TOBN(0x9ff9d98d, 0xfd25d6f2),
+     TOBN(0x0746af7c, 0x468b8835), TOBN(0x977a31cb, 0x730ecea7),
+     TOBN(0xa5096b80, 0xc2cf4a81), TOBN(0xaa986833, 0x6458c37a),
+     TOBN(0x6af29bf3, 0xa6bd9d34), TOBN(0x6a62fe9b, 0x33c5d854),
+     TOBN(0x50e6c304, 0xb7133b5e), TOBN(0x04b60159, 0x7d6e6848),
+     TOBN(0x4cd296df, 0x5579bea4), TOBN(0x10e35ac8, 0x5ceedaf1),
+     TOBN(0x04c4c5fd, 0xe3bcc5b1), TOBN(0x95f9ee8a, 0x89412cf9),
+     TOBN(0x2c9459ee, 0x82b6eb0f), TOBN(0x2e845765, 0x95c2aadd),
+     TOBN(0x774a84ae, 0xd327fcfe), TOBN(0xd8c93722, 0x0368d476),
+     TOBN(0x0dbd5748, 0xf83e8a3b), TOBN(0xa579aa96, 0x8d2495f3),
+     TOBN(0x535996a0, 0xae496e9b), TOBN(0x07afbfe9, 0xb7f9bcc2),
+     TOBN(0x3ac1dc6d, 0x5b7bd293), TOBN(0x3b592cff, 0x7022323d),
+     TOBN(0xba0deb98, 0x9c0a3e76), TOBN(0x18e78e9f, 0x4b197acb),
+     TOBN(0x211cde10, 0x296c36ef), TOBN(0x7ee89672, 0x82c4da77),
+     TOBN(0xb617d270, 0xa57836da), TOBN(0xf0cd9c31, 0x9cb7560b),
+     TOBN(0x01fdcbf7, 0xe455fe90), TOBN(0x3fb53cbb, 0x7e7334f3),
+     TOBN(0x781e2ea4, 0x4e7de4ec), TOBN(0x8adab3ad, 0x0b384fd0),
+     TOBN(0x129eee2f, 0x53d64829), TOBN(0x7a471e17, 0xa261492b),
+     TOBN(0xe4f9adb9, 0xe4cb4a2c), TOBN(0x3d359f6f, 0x97ba2c2d),
+     TOBN(0x346c6786, 0x0aacd697), TOBN(0x92b444c3, 0x75c2f8a8),
+     TOBN(0xc79fa117, 0xd85df44e), TOBN(0x56782372, 0x398ddf31),
+     TOBN(0x60e690f2, 0xbbbab3b8), TOBN(0x4851f8ae, 0x8b04816b),
+     TOBN(0xc72046ab, 0x9c92e4d2), TOBN(0x518c74a1, 0x7cf3136b),
+     TOBN(0xff4eb50a, 0xf9877d4c), TOBN(0x14578d90, 0xa919cabb),
+     TOBN(0x8218f8c4, 0xac5eb2b6), TOBN(0xa3ccc547, 0x542016e4),
+     TOBN(0x025bf48e, 0x327f8349), TOBN(0xf3e97346, 0xf43cb641),
+     TOBN(0xdc2bafdf, 0x500f1085), TOBN(0x57167876, 0x2f063055),
+     TOBN(0x5bd914b9, 0x411925a6), TOBN(0x7c078d48, 0xa1123de5),
+     TOBN(0xee6bf835, 0x182b165d), TOBN(0xb11b5e5b, 0xba519727),
+     TOBN(0xe33ea76c, 0x1eea7b85), TOBN(0x2352b461, 0x92d4f85e),
+     TOBN(0xf101d334, 0xafe115bb), TOBN(0xfabc1294, 0x889175a3),
+     TOBN(0x7f6bcdc0, 0x5233f925), TOBN(0xe0a802db, 0xe77fec55),
+     TOBN(0xbdb47b75, 0x8069b659), TOBN(0x1c5e12de, 0xf98fbd74),
+     TOBN(0x869c58c6, 0x4b8457ee), TOBN(0xa5360f69, 0x4f7ea9f7),
+     TOBN(0xe576c09f, 0xf460b38f), TOBN(0x6b70d548, 0x22b7fb36),
+     TOBN(0x3fd237f1, 0x3bfae315), TOBN(0x33797852, 0xcbdff369),
+     TOBN(0x97df25f5, 0x25b516f9), TOBN(0x46f388f2, 0xba38ad2d),
+     TOBN(0x656c4658, 0x89d8ddbb), TOBN(0x8830b26e, 0x70f38ee8),
+     TOBN(0x4320fd5c, 0xde1212b0), TOBN(0xc34f30cf, 0xe4a2edb2),
+     TOBN(0xabb131a3, 0x56ab64b8), TOBN(0x7f77f0cc, 0xd99c5d26),
+     TOBN(0x66856a37, 0xbf981d94), TOBN(0x19e76d09, 0x738bd76e),
+     TOBN(0xe76c8ac3, 0x96238f39), TOBN(0xc0a482be, 0xa830b366),
+     TOBN(0xb7b8eaff, 0x0b4eb499), TOBN(0x8ecd83bc, 0x4bfb4865),
+     TOBN(0x971b2cb7, 0xa2f3776f), TOBN(0xb42176a4, 0xf4b88adf),
+     TOBN(0xb9617df5, 0xbe1fa446), TOBN(0x8b32d508, 0xcd031bd2),
+     TOBN(0x1c6bd47d, 0x53b618c0), TOBN(0xc424f46c, 0x6a227923),
+     TOBN(0x7303ffde, 0xdd92d964), TOBN(0xe9712878, 0x71b5abf2),
+     TOBN(0x8f48a632, 0xf815561d), TOBN(0x85f48ff5, 0xd3c055d1),
+     TOBN(0x222a1427, 0x7525684f), TOBN(0xd0d841a0, 0x67360cc3),
+     TOBN(0x4245a926, 0x0b9267c6), TOBN(0xc78913f1, 0xcf07f863),
+     TOBN(0xaa844c8e, 0x4d0d9e24), TOBN(0xa42ad522, 0x3d5f9017),
+     TOBN(0xbd371749, 0xa2c989d5), TOBN(0x928292df, 0xe1f5e78e),
+     TOBN(0x493b383e, 0x0a1ea6da), TOBN(0x5136fd8d, 0x13aee529),
+     TOBN(0x860c44b1, 0xf2c34a99), TOBN(0x3b00aca4, 0xbf5855ac),
+     TOBN(0xabf6aaa0, 0xfaaf37be), TOBN(0x65f43682, 0x2a53ec08),
+     TOBN(0x1d9a5801, 0xa11b12e1), TOBN(0x78a7ab2c, 0xe20ed475),
+     TOBN(0x0de1067e, 0x9a41e0d5), TOBN(0x30473f5f, 0x305023ea),
+     TOBN(0xdd3ae09d, 0x169c7d97), TOBN(0x5cd5baa4, 0xcfaef9cd),
+     TOBN(0x5cd7440b, 0x65a44803), TOBN(0xdc13966a, 0x47f364de),
+     TOBN(0x077b2be8, 0x2b8357c1), TOBN(0x0cb1b4c5, 0xe9d57c2a),
+     TOBN(0x7a4ceb32, 0x05ff363e), TOBN(0xf310fa4d, 0xca35a9ef),
+     TOBN(0xdbb7b352, 0xf97f68c6), TOBN(0x0c773b50, 0x0b02cf58),
+     TOBN(0xea2e4821, 0x3c1f96d9), TOBN(0xffb357b0, 0xeee01815),
+     TOBN(0xb9c924cd, 0xe0f28039), TOBN(0x0b36c95a, 0x46a3fbe4),
+     TOBN(0x1faaaea4, 0x5e46db6c), TOBN(0xcae575c3, 0x1928aaff),
+     TOBN(0x7f671302, 0xa70dab86), TOBN(0xfcbd12a9, 0x71c58cfc),
+     TOBN(0xcbef9acf, 0xbee0cb92), TOBN(0x573da0b9, 0xf8c1b583),
+     TOBN(0x4752fcfe, 0x0d41d550), TOBN(0xe7eec0e3, 0x2155cffe),
+     TOBN(0x0fc39fcb, 0x545ae248), TOBN(0x522cb8d1, 0x8065f44e),
+     TOBN(0x263c962a, 0x70cbb96c), TOBN(0xe034362a, 0xbcd124a9),
+     TOBN(0xf120db28, 0x3c2ae58d), TOBN(0xb9a38d49, 0xfef6d507),
+     TOBN(0xb1fd2a82, 0x1ff140fd), TOBN(0xbd162f30, 0x20aee7e0),
+     TOBN(0x4e17a5d4, 0xcb251949), TOBN(0x2aebcb83, 0x4f7e1c3d),
+     TOBN(0x608eb25f, 0x937b0527), TOBN(0xf42e1e47, 0xeb7d9997),
+     TOBN(0xeba699c4, 0xb8a53a29), TOBN(0x1f921c71, 0xe091b536),
+     TOBN(0xcce29e7b, 0x5b26bbd5), TOBN(0x7a8ef5ed, 0x3b61a680),
+     TOBN(0xe5ef8043, 0xba1f1c7e), TOBN(0x16ea8217, 0x18158dda),
+     TOBN(0x01778a2b, 0x599ff0f9), TOBN(0x68a923d7, 0x8104fc6b),
+     TOBN(0x5bfa44df, 0xda694ff3), TOBN(0x4f7199db, 0xf7667f12),
+     TOBN(0xc06d8ff6, 0xe46f2a79), TOBN(0x08b5dead, 0xe9f8131d),
+     TOBN(0x02519a59, 0xabb4ce7c), TOBN(0xc4f710bc, 0xb42aec3e),
+     TOBN(0x3d77b057, 0x78bde41a), TOBN(0x6474bf80, 0xb4186b5a),
+     TOBN(0x048b3f67, 0x88c65741), TOBN(0xc64519de, 0x03c7c154),
+     TOBN(0xdf073846, 0x0edfcc4f), TOBN(0x319aa737, 0x48f1aa6b),
+     TOBN(0x8b9f8a02, 0xca909f77), TOBN(0x90258139, 0x7580bfef),
+     TOBN(0xd8bfd3ca, 0xc0c22719), TOBN(0xc60209e4, 0xc9ca151e),
+     TOBN(0x7a744ab5, 0xd9a1a69c), TOBN(0x6de5048b, 0x14937f8f),
+     TOBN(0x171938d8, 0xe115ac04), TOBN(0x7df70940, 0x1c6b16d2),
+     TOBN(0xa6aeb663, 0x7f8e94e7), TOBN(0xc130388e, 0x2a2cf094),
+     TOBN(0x1850be84, 0x77f54e6e), TOBN(0x9f258a72, 0x65d60fe5),
+     TOBN(0xff7ff0c0, 0x6c9146d6), TOBN(0x039aaf90, 0xe63a830b),
+     TOBN(0x38f27a73, 0x9460342f), TOBN(0x4703148c, 0x3f795f8a),
+     TOBN(0x1bb5467b, 0x9681a97e), TOBN(0x00931ba5, 0xecaeb594),
+     TOBN(0xcdb6719d, 0x786f337c), TOBN(0xd9c01cd2, 0xe704397d),
+     TOBN(0x0f4a3f20, 0x555c2fef), TOBN(0x00452509, 0x7c0af223),
+     TOBN(0x54a58047, 0x84db8e76), TOBN(0x3bacf1aa, 0x93c8aa06),
+     TOBN(0x11ca957c, 0xf7919422), TOBN(0x50641053, 0x78cdaa40),
+     TOBN(0x7a303874, 0x9f7144ae), TOBN(0x170c963f, 0x43d4acfd),
+     TOBN(0x5e148149, 0x58ddd3ef), TOBN(0xa7bde582, 0x9e72dba8),
+     TOBN(0x0769da8b, 0x6fa68750), TOBN(0xfa64e532, 0x572e0249),
+     TOBN(0xfcaadf9d, 0x2619ad31), TOBN(0x87882daa, 0xa7b349cd),
+     TOBN(0x9f6eb731, 0x6c67a775), TOBN(0xcb10471a, 0xefc5d0b1),
+     TOBN(0xb433750c, 0xe1b806b2), TOBN(0x19c5714d, 0x57b1ae7e),
+     TOBN(0xc0dc8b7b, 0xed03fd3f), TOBN(0xdd03344f, 0x31bc194e),
+     TOBN(0xa66c52a7, 0x8c6320b5), TOBN(0x8bc82ce3, 0xd0b6fd93),
+     TOBN(0xf8e13501, 0xb35f1341), TOBN(0xe53156dd, 0x25a43e42),
+     TOBN(0xd3adf27e, 0x4daeb85c), TOBN(0xb81d8379, 0xbbeddeb5),
+     TOBN(0x1b0b546e, 0x2e435867), TOBN(0x9020eb94, 0xeba5dd60),
+     TOBN(0x37d91161, 0x8210cb9d), TOBN(0x4c596b31, 0x5c91f1cf),
+     TOBN(0xb228a90f, 0x0e0b040d), TOBN(0xbaf02d82, 0x45ff897f),
+     TOBN(0x2aac79e6, 0x00fa6122), TOBN(0x24828817, 0x8e36f557),
+     TOBN(0xb9521d31, 0x113ec356), TOBN(0x9e48861e, 0x15eff1f8),
+     TOBN(0x2aa1d412, 0xe0d41715), TOBN(0x71f86203, 0x53f131b8),
+     TOBN(0xf60da8da, 0x3fd19408), TOBN(0x4aa716dc, 0x278d9d99),
+     TOBN(0x394531f7, 0xa8c51c90), TOBN(0xb560b0e8, 0xf59db51c),
+     TOBN(0xa28fc992, 0xfa34bdad), TOBN(0xf024fa14, 0x9cd4f8bd),
+     TOBN(0x5cf530f7, 0x23a9d0d3), TOBN(0x615ca193, 0xe28c9b56),
+     TOBN(0x6d2a483d, 0x6f73c51e), TOBN(0xa4cb2412, 0xea0dc2dd),
+     TOBN(0x50663c41, 0x1eb917ff), TOBN(0x3d3a74cf, 0xeade299e),
+     TOBN(0x29b3990f, 0x4a7a9202), TOBN(0xa9bccf59, 0xa7b15c3d),
+     TOBN(0x66a3ccdc, 0xa5df9208), TOBN(0x48027c14, 0x43f2f929),
+     TOBN(0xd385377c, 0x40b557f0), TOBN(0xe001c366, 0xcd684660),
+     TOBN(0x1b18ed6b, 0xe2183a27), TOBN(0x879738d8, 0x63210329),
+     TOBN(0xa687c74b, 0xbda94882), TOBN(0xd1bbcc48, 0xa684b299),
+     TOBN(0xaf6f1112, 0x863b3724), TOBN(0x6943d1b4, 0x2c8ce9f8),
+     TOBN(0xe044a3bb, 0x098cafb4), TOBN(0x27ed2310, 0x60d48caf),
+     TOBN(0x542b5675, 0x3a31b84d), TOBN(0xcbf3dd50, 0xfcddbed7),
+     TOBN(0x25031f16, 0x41b1d830), TOBN(0xa7ec851d, 0xcb0c1e27),
+     TOBN(0xac1c8fe0, 0xb5ae75db), TOBN(0xb24c7557, 0x08c52120),
+     TOBN(0x57f811dc, 0x1d4636c3), TOBN(0xf8436526, 0x681a9939),
+     TOBN(0x1f6bc6d9, 0x9c81adb3), TOBN(0x840f8ac3, 0x5b7d80d4),
+     TOBN(0x731a9811, 0xf4387f1a), TOBN(0x7c501cd3, 0xb5156880),
+     TOBN(0xa5ca4a07, 0xdfe68867), TOBN(0xf123d8f0, 0x5fcea120),
+     TOBN(0x1fbb0e71, 0xd607039e), TOBN(0x2b70e215, 0xcd3a4546),
+     TOBN(0x32d2f01d, 0x53324091), TOBN(0xb796ff08, 0x180ab19b),
+     TOBN(0x32d87a86, 0x3c57c4aa), TOBN(0x2aed9caf, 0xb7c49a27),
+     TOBN(0x9fb35eac, 0x31630d98), TOBN(0x338e8cdf, 0x5c3e20a3),
+     TOBN(0x80f16182, 0x66cde8db), TOBN(0x4e159980, 0x2d72fd36),
+     TOBN(0xd7b8f13b, 0x9b6e5072), TOBN(0xf5213907, 0x3b7b5dc1),
+     TOBN(0x4d431f1d, 0x8ce4396e), TOBN(0x37a1a680, 0xa7ed2142),
+     TOBN(0xbf375696, 0xd01aaf6b), TOBN(0xaa1c0c54, 0xe63aab66),
+     TOBN(0x3014368b, 0x4ed80940), TOBN(0x67e6d056, 0x7a6fcedd),
+     TOBN(0x7c208c49, 0xca97579f), TOBN(0xfe3d7a81, 0xa23597f6),
+     TOBN(0x5e203202, 0x7e096ae2), TOBN(0xb1f3e1e7, 0x24b39366),
+     TOBN(0x26da26f3, 0x2fdcdffc), TOBN(0x79422f1d, 0x6097be83),}
+    ,
+    {TOBN(0x263a2cfb, 0x9db3b381), TOBN(0x9c3a2dee, 0xd4df0a4b),
+     TOBN(0x728d06e9, 0x7d04e61f), TOBN(0x8b1adfbc, 0x42449325),
+     TOBN(0x6ec1d939, 0x7e053a1b), TOBN(0xee2be5c7, 0x66daf707),
+     TOBN(0x80ba1e14, 0x810ac7ab), TOBN(0xdd2ae778, 0xf530f174),
+     TOBN(0x0435d97a, 0x205b9d8b), TOBN(0x6eb8f064, 0x056756d4),
+     TOBN(0xd5e88a8b, 0xb6f8210e), TOBN(0x070ef12d, 0xec9fd9ea),
+     TOBN(0x4d849505, 0x3bcc876a), TOBN(0x12a75338, 0xa7404ce3),
+     TOBN(0xd22b49e1, 0xb8a1db5e), TOBN(0xec1f2051, 0x14bfa5ad),
+     TOBN(0xadbaeb79, 0xb6828f36), TOBN(0x9d7a0258, 0x01bd5b9e),
+     TOBN(0xeda01e0d, 0x1e844b0c), TOBN(0x4b625175, 0x887edfc9),
+     TOBN(0x14109fdd, 0x9669b621), TOBN(0x88a2ca56, 0xf6f87b98),
+     TOBN(0xfe2eb788, 0x170df6bc), TOBN(0x0cea06f4, 0xffa473f9),
+     TOBN(0x43ed81b5, 0xc4e83d33), TOBN(0xd9f35879, 0x5efd488b),
+     TOBN(0x164a620f, 0x9deb4d0f), TOBN(0xc6927bdb, 0xac6a7394),
+     TOBN(0x45c28df7, 0x9f9e0f03), TOBN(0x2868661e, 0xfcd7e1a9),
+     TOBN(0x7cf4e8d0, 0xffa348f1), TOBN(0x6bd4c284, 0x398538e0),
+     TOBN(0x2618a091, 0x289a8619), TOBN(0xef796e60, 0x6671b173),
+     TOBN(0x664e46e5, 0x9090c632), TOBN(0xa38062d4, 0x1e66f8fb),
+     TOBN(0x6c744a20, 0x0573274e), TOBN(0xd07b67e4, 0xa9271394),
+     TOBN(0x391223b2, 0x6bdc0e20), TOBN(0xbe2d93f1, 0xeb0a05a7),
+     TOBN(0xf23e2e53, 0x3f36d141), TOBN(0xe84bb3d4, 0x4dfca442),
+     TOBN(0xb804a48d, 0x6b7c023a), TOBN(0x1e16a8fa, 0x76431c3b),
+     TOBN(0x1b5452ad, 0xddd472e0), TOBN(0x7d405ee7, 0x0d1ee127),
+     TOBN(0x50fc6f1d, 0xffa27599), TOBN(0x351ac53c, 0xbf391b35),
+     TOBN(0x7efa14b8, 0x4444896b), TOBN(0x64974d2f, 0xf94027fb),
+     TOBN(0xefdcd0e8, 0xde84487d), TOBN(0x8c45b260, 0x2b48989b),
+     TOBN(0xa8fcbbc2, 0xd8463487), TOBN(0xd1b2b3f7, 0x3fbc476c),
+     TOBN(0x21d005b7, 0xc8f443c0), TOBN(0x518f2e67, 0x40c0139c),
+     TOBN(0x56036e8c, 0x06d75fc1), TOBN(0x2dcf7bb7, 0x3249a89f),
+     TOBN(0x81dd1d3d, 0xe245e7dd), TOBN(0xf578dc4b, 0xebd6e2a7),
+     TOBN(0x4c028903, 0xdf2ce7a0), TOBN(0xaee36288, 0x9c39afac),
+     TOBN(0xdc847c31, 0x146404ab), TOBN(0x6304c0d8, 0xa4e97818),
+     TOBN(0xae51dca2, 0xa91f6791), TOBN(0x2abe4190, 0x9baa9efc),
+     TOBN(0xd9d2e2f4, 0x559c7ac1), TOBN(0xe82f4b51, 0xfc9f773a),
+     TOBN(0xa7713027, 0x4073e81c), TOBN(0xc0276fac, 0xfbb596fc),
+     TOBN(0x1d819fc9, 0xa684f70c), TOBN(0x29b47fdd, 0xc9f7b1e0),
+     TOBN(0x358de103, 0x459b1940), TOBN(0xec881c59, 0x5b013e93),
+     TOBN(0x51574c93, 0x49532ad3), TOBN(0x2db1d445, 0xb37b46de),
+     TOBN(0xc6445b87, 0xdf239fd8), TOBN(0xc718af75, 0x151d24ee),
+     TOBN(0xaea1c4a4, 0xf43c6259), TOBN(0x40c0e5d7, 0x70be02f7),
+     TOBN(0x6a4590f4, 0x721b33f2), TOBN(0x2124f1fb, 0xfedf04ea),
+     TOBN(0xf8e53cde, 0x9745efe7), TOBN(0xe7e10432, 0x65f046d9),
+     TOBN(0xc3fca28e, 0xe4d0c7e6), TOBN(0x847e339a, 0x87253b1b),
+     TOBN(0x9b595348, 0x3743e643), TOBN(0xcb6a0a0b, 0x4fd12fc5),
+     TOBN(0xfb6836c3, 0x27d02dcc), TOBN(0x5ad00982, 0x7a68bcc2),
+     TOBN(0x1b24b44c, 0x005e912d), TOBN(0xcc83d20f, 0x811fdcfe),
+     TOBN(0x36527ec1, 0x666fba0c), TOBN(0x69948197, 0x14754635),
+     TOBN(0xfcdcb1a8, 0x556da9c2), TOBN(0xa5934267, 0x81a732b2),
+     TOBN(0xec1214ed, 0xa714181d), TOBN(0x609ac13b, 0x6067b341),
+     TOBN(0xff4b4c97, 0xa545df1f), TOBN(0xa1240501, 0x34d2076b),
+     TOBN(0x6efa0c23, 0x1409ca97), TOBN(0x254cc1a8, 0x20638c43),
+     TOBN(0xd4e363af, 0xdcfb46cd), TOBN(0x62c2adc3, 0x03942a27),
+     TOBN(0xc67b9df0, 0x56e46483), TOBN(0xa55abb20, 0x63736356),
+     TOBN(0xab93c098, 0xc551bc52), TOBN(0x382b49f9, 0xb15fe64b),
+     TOBN(0x9ec221ad, 0x4dff8d47), TOBN(0x79caf615, 0x437df4d6),
+     TOBN(0x5f13dc64, 0xbb456509), TOBN(0xe4c589d9, 0x191f0714),
+     TOBN(0x27b6a8ab, 0x3fd40e09), TOBN(0xe455842e, 0x77313ea9),
+     TOBN(0x8b51d1e2, 0x1f55988b), TOBN(0x5716dd73, 0x062bbbfc),
+     TOBN(0x633c11e5, 0x4e8bf3de), TOBN(0x9a0e77b6, 0x1b85be3b),
+     TOBN(0x56510729, 0x0911cca6), TOBN(0x27e76495, 0xefa6590f),
+     TOBN(0xe4ac8b33, 0x070d3aab), TOBN(0x2643672b, 0x9a2cd5e5),
+     TOBN(0x52eff79b, 0x1cfc9173), TOBN(0x665ca49b, 0x90a7c13f),
+     TOBN(0x5a8dda59, 0xb3efb998), TOBN(0x8a5b922d, 0x052f1341),
+     TOBN(0xae9ebbab, 0x3cf9a530), TOBN(0x35986e7b, 0xf56da4d7),
+     TOBN(0x3a636b5c, 0xff3513cc), TOBN(0xbb0cf8ba, 0x3198f7dd),
+     TOBN(0xb8d40522, 0x41f16f86), TOBN(0x760575d8, 0xde13a7bf),
+     TOBN(0x36f74e16, 0x9f7aa181), TOBN(0x163a3ecf, 0xf509ed1c),
+     TOBN(0x6aead61f, 0x3c40a491), TOBN(0x158c95fc, 0xdfe8fcaa),
+     TOBN(0xa3991b6e, 0x13cda46f), TOBN(0x79482415, 0x342faed0),
+     TOBN(0xf3ba5bde, 0x666b5970), TOBN(0x1d52e6bc, 0xb26ab6dd),
+     TOBN(0x768ba1e7, 0x8608dd3d), TOBN(0x4930db2a, 0xea076586),
+     TOBN(0xd9575714, 0xe7dc1afa), TOBN(0x1fc7bf7d, 0xf7c58817),
+     TOBN(0x6b47accd, 0xd9eee96c), TOBN(0x0ca277fb, 0xe58cec37),
+     TOBN(0x113fe413, 0xe702c42a), TOBN(0xdd1764ee, 0xc47cbe51),
+     TOBN(0x041e7cde, 0x7b3ed739), TOBN(0x50cb7459, 0x5ce9e1c0),
+     TOBN(0x35568513, 0x2925b212), TOBN(0x7cff95c4, 0x001b081c),
+     TOBN(0x63ee4cbd, 0x8088b454), TOBN(0xdb7f32f7, 0x9a9e0c8a),
+     TOBN(0xb377d418, 0x6b2447cb), TOBN(0xe3e982aa, 0xd370219b),
+     TOBN(0x06ccc1e4, 0xc2a2a593), TOBN(0x72c36865, 0x0773f24f),
+     TOBN(0xa13b4da7, 0x95859423), TOBN(0x8bbf1d33, 0x75040c8f),
+     TOBN(0x726f0973, 0xda50c991), TOBN(0x48afcd5b, 0x822d6ee2),
+     TOBN(0xe5fc718b, 0x20fd7771), TOBN(0xb9e8e77d, 0xfd0807a1),
+     TOBN(0x7f5e0f44, 0x99a7703d), TOBN(0x6972930e, 0x618e36f3),
+     TOBN(0x2b7c77b8, 0x23807bbe), TOBN(0xe5b82405, 0xcb27ff50),
+     TOBN(0xba8b8be3, 0xbd379062), TOBN(0xd64b7a1d, 0x2dce4a92),
+     TOBN(0x040a73c5, 0xb2952e37), TOBN(0x0a9e252e, 0xd438aeca),
+     TOBN(0xdd43956b, 0xc39d3bcb), TOBN(0x1a31ca00, 0xb32b2d63),
+     TOBN(0xd67133b8, 0x5c417a18), TOBN(0xd08e4790, 0x2ef442c8),
+     TOBN(0x98cb1ae9, 0x255c0980), TOBN(0x4bd86381, 0x2b4a739f),
+     TOBN(0x5a5c31e1, 0x1e4a45a1), TOBN(0x1e5d55fe, 0x9cb0db2f),
+     TOBN(0x74661b06, 0x8ff5cc29), TOBN(0x026b389f, 0x0eb8a4f4),
+     TOBN(0x536b21a4, 0x58848c24), TOBN(0x2e5bf8ec, 0x81dc72b0),
+     TOBN(0x03c187d0, 0xad886aac), TOBN(0x5c16878a, 0xb771b645),
+     TOBN(0xb07dfc6f, 0xc74045ab), TOBN(0x2c6360bf, 0x7800caed),
+     TOBN(0x24295bb5, 0xb9c972a3), TOBN(0xc9e6f88e, 0x7c9a6dba),
+     TOBN(0x90ffbf24, 0x92a79aa6), TOBN(0xde29d50a, 0x41c26ac2),
+     TOBN(0x9f0af483, 0xd309cbe6), TOBN(0x5b020d8a, 0xe0bced4f),
+     TOBN(0x606e986d, 0xb38023e3), TOBN(0xad8f2c9d, 0x1abc6933),
+     TOBN(0x19292e1d, 0xe7400e93), TOBN(0xfe3e18a9, 0x52be5e4d),
+     TOBN(0xe8e9771d, 0x2e0680bf), TOBN(0x8c5bec98, 0xc54db063),
+     TOBN(0x2af9662a, 0x74a55d1f), TOBN(0xe3fbf28f, 0x046f66d8),
+     TOBN(0xa3a72ab4, 0xd4dc4794), TOBN(0x09779f45, 0x5c7c2dd8),
+     TOBN(0xd893bdaf, 0xc3d19d8d), TOBN(0xd5a75094, 0x57d6a6df),
+     TOBN(0x8cf8fef9, 0x952e6255), TOBN(0x3da67cfb, 0xda9a8aff),
+     TOBN(0x4c23f62a, 0x2c160dcd), TOBN(0x34e6c5e3, 0x8f90eaef),
+     TOBN(0x35865519, 0xa9a65d5a), TOBN(0x07c48aae, 0x8fd38a3d),
+     TOBN(0xb7e7aeda, 0x50068527), TOBN(0x2c09ef23, 0x1c90936a),
+     TOBN(0x31ecfeb6, 0xe879324c), TOBN(0xa0871f6b, 0xfb0ec938),
+     TOBN(0xb1f0fb68, 0xd84d835d), TOBN(0xc90caf39, 0x861dc1e6),
+     TOBN(0x12e5b046, 0x7594f8d7), TOBN(0x26897ae2, 0x65012b92),
+     TOBN(0xbcf68a08, 0xa4d6755d), TOBN(0x403ee41c, 0x0991fbda),
+     TOBN(0x733e343e, 0x3bbf17e8), TOBN(0xd2c7980d, 0x679b3d65),
+     TOBN(0x33056232, 0xd2e11305), TOBN(0x966be492, 0xf3c07a6f),
+     TOBN(0x6a8878ff, 0xbb15509d), TOBN(0xff221101, 0x0a9b59a4),
+     TOBN(0x6c9f564a, 0xabe30129), TOBN(0xc6f2c940, 0x336e64cf),
+     TOBN(0x0fe75262, 0x8b0c8022), TOBN(0xbe0267e9, 0x6ae8db87),
+     TOBN(0x22e192f1, 0x93bc042b), TOBN(0xf085b534, 0xb237c458),
+     TOBN(0xa0d192bd, 0x832c4168), TOBN(0x7a76e9e3, 0xbdf6271d),
+     TOBN(0x52a882fa, 0xb88911b5), TOBN(0xc85345e4, 0xb4db0eb5),
+     TOBN(0xa3be02a6, 0x81a7c3ff), TOBN(0x51889c8c, 0xf0ec0469),
+     TOBN(0x9d031369, 0xa5e829e5), TOBN(0xcbb4c6fc, 0x1607aa41),
+     TOBN(0x75ac59a6, 0x241d84c1), TOBN(0xc043f2bf, 0x8829e0ee),
+     TOBN(0x82a38f75, 0x8ea5e185), TOBN(0x8bda40b9, 0xd87cbd9f),
+     TOBN(0x9e65e75e, 0x2d8fc601), TOBN(0x3d515f74, 0xa35690b3),
+     TOBN(0x534acf4f, 0xda79e5ac), TOBN(0x68b83b3a, 0x8630215f),
+     TOBN(0x5c748b2e, 0xd085756e), TOBN(0xb0317258, 0xe5d37cb2),
+     TOBN(0x6735841a, 0xc5ccc2c4), TOBN(0x7d7dc96b, 0x3d9d5069),
+     TOBN(0xa147e410, 0xfd1754bd), TOBN(0x65296e94, 0xd399ddd5),
+     TOBN(0xf6b5b2d0, 0xbc8fa5bc), TOBN(0x8a5ead67, 0x500c277b),
+     TOBN(0x214625e6, 0xdfa08a5d), TOBN(0x51fdfedc, 0x959cf047),
+     TOBN(0x6bc9430b, 0x289fca32), TOBN(0xe36ff0cf, 0x9d9bdc3f),
+     TOBN(0x2fe187cb, 0x58ea0ede), TOBN(0xed66af20, 0x5a900b3f),
+     TOBN(0x00e0968b, 0x5fa9f4d6), TOBN(0x2d4066ce, 0x37a362e7),
+     TOBN(0xa99a9748, 0xbd07e772), TOBN(0x710989c0, 0x06a4f1d0),
+     TOBN(0xd5dedf35, 0xce40cbd8), TOBN(0xab55c5f0, 0x1743293d),
+     TOBN(0x766f1144, 0x8aa24e2c), TOBN(0x94d874f8, 0x605fbcb4),
+     TOBN(0xa365f0e8, 0xa518001b), TOBN(0xee605eb6, 0x9d04ef0f),
+     TOBN(0x5a3915cd, 0xba8d4d25), TOBN(0x44c0e1b8, 0xb5113472),
+     TOBN(0xcbb024e8, 0x8b6740dc), TOBN(0x89087a53, 0xee1d4f0c),
+     TOBN(0xa88fa05c, 0x1fc4e372), TOBN(0x8bf395cb, 0xaf8b3af2),
+     TOBN(0x1e71c9a1, 0xdeb8568b), TOBN(0xa35daea0, 0x80fb3d32),
+     TOBN(0xe8b6f266, 0x2cf8fb81), TOBN(0x6d51afe8, 0x9490696a),
+     TOBN(0x81beac6e, 0x51803a19), TOBN(0xe3d24b7f, 0x86219080),
+     TOBN(0x727cfd9d, 0xdf6f463c), TOBN(0x8c6865ca, 0x72284ee8),
+     TOBN(0x32c88b7d, 0xb743f4ef), TOBN(0x3793909b, 0xe7d11dce),
+     TOBN(0xd398f922, 0x2ff2ebe8), TOBN(0x2c70ca44, 0xe5e49796),
+     TOBN(0xdf4d9929, 0xcb1131b1), TOBN(0x7826f298, 0x25888e79),
+     TOBN(0x4d3a112c, 0xf1d8740a), TOBN(0x00384cb6, 0x270afa8b),
+     TOBN(0xcb64125b, 0x3ab48095), TOBN(0x3451c256, 0x62d05106),
+     TOBN(0xd73d577d, 0xa4955845), TOBN(0x39570c16, 0xbf9f4433),
+     TOBN(0xd7dfaad3, 0xadecf263), TOBN(0xf1c3d8d1, 0xdc76e102),
+     TOBN(0x5e774a58, 0x54c6a836), TOBN(0xdad4b672, 0x3e92d47b),
+     TOBN(0xbe7e990f, 0xf0d796a0), TOBN(0x5fc62478, 0xdf0e8b02),
+     TOBN(0x8aae8bf4, 0x030c00ad), TOBN(0x3d2db93b, 0x9004ba0f),
+     TOBN(0xe48c8a79, 0xd85d5ddc), TOBN(0xe907caa7, 0x6bb07f34),
+     TOBN(0x58db343a, 0xa39eaed5), TOBN(0x0ea6e007, 0xadaf5724),
+     TOBN(0xe00df169, 0xd23233f3), TOBN(0x3e322796, 0x77cb637f),
+     TOBN(0x1f897c0e, 0x1da0cf6c), TOBN(0xa651f5d8, 0x31d6bbdd),
+     TOBN(0xdd61af19, 0x1a230c76), TOBN(0xbd527272, 0xcdaa5e4a),
+     TOBN(0xca753636, 0xd0abcd7e), TOBN(0x78bdd37c, 0x370bd8dc),
+     TOBN(0xc23916c2, 0x17cd93fe), TOBN(0x65b97a4d, 0xdadce6e2),
+     TOBN(0xe04ed4eb, 0x174e42f8), TOBN(0x1491ccaa, 0xbb21480a),
+     TOBN(0x145a8280, 0x23196332), TOBN(0x3c3862d7, 0x587b479a),
+     TOBN(0x9f4a88a3, 0x01dcd0ed), TOBN(0x4da2b7ef, 0x3ea12f1f),
+     TOBN(0xf8e7ae33, 0xb126e48e), TOBN(0x404a0b32, 0xf494e237),
+     TOBN(0x9beac474, 0xc55acadb), TOBN(0x4ee5cf3b, 0xcbec9fd9),
+     TOBN(0x336b33b9, 0x7df3c8c3), TOBN(0xbd905fe3, 0xb76808fd),
+     TOBN(0x8f436981, 0xaa45c16a), TOBN(0x255c5bfa, 0x3dd27b62),
+     TOBN(0x71965cbf, 0xc3dd9b4d), TOBN(0xce23edbf, 0xfc068a87),
+     TOBN(0xb78d4725, 0x745b029b), TOBN(0x74610713, 0xcefdd9bd),
+     TOBN(0x7116f75f, 0x1266bf52), TOBN(0x02046722, 0x18e49bb6),
+     TOBN(0xdf43df9f, 0x3d6f19e3), TOBN(0xef1bc7d0, 0xe685cb2f),
+     TOBN(0xcddb27c1, 0x7078c432), TOBN(0xe1961b9c, 0xb77fedb7),
+     TOBN(0x1edc2f5c, 0xc2290570), TOBN(0x2c3fefca, 0x19cbd886),
+     TOBN(0xcf880a36, 0xc2af389a), TOBN(0x96c610fd, 0xbda71cea),
+     TOBN(0xf03977a9, 0x32aa8463), TOBN(0x8eb7763f, 0x8586d90a),
+     TOBN(0x3f342454, 0x2a296e77), TOBN(0xc8718683, 0x42837a35),
+     TOBN(0x7dc71090, 0x6a09c731), TOBN(0x54778ffb, 0x51b816db),
+     TOBN(0x6b33bfec, 0xaf06defd), TOBN(0xfe3c105f, 0x8592b70b),
+     TOBN(0xf937fda4, 0x61da6114), TOBN(0x3c13e651, 0x4c266ad7),
+     TOBN(0xe363a829, 0x855938e8), TOBN(0x2eeb5d9e, 0x9de54b72),
+     TOBN(0xbeb93b0e, 0x20ccfab9), TOBN(0x3dffbb5f, 0x25e61a25),
+     TOBN(0x7f655e43, 0x1acc093d), TOBN(0x0cb6cc3d, 0x3964ce61),
+     TOBN(0x6ab283a1, 0xe5e9b460), TOBN(0x55d787c5, 0xa1c7e72d),
+     TOBN(0x4d2efd47, 0xdeadbf02), TOBN(0x11e80219, 0xac459068),
+     TOBN(0x810c7626, 0x71f311f0), TOBN(0xfa17ef8d, 0x4ab6ef53),
+     TOBN(0xaf47fd25, 0x93e43bff), TOBN(0x5cb5ff3f, 0x0be40632),
+     TOBN(0x54687106, 0x8ee61da3), TOBN(0x7764196e, 0xb08afd0f),
+     TOBN(0x831ab3ed, 0xf0290a8f), TOBN(0xcae81966, 0xcb47c387),
+     TOBN(0xaad7dece, 0x184efb4f), TOBN(0xdcfc53b3, 0x4749110e),
+     TOBN(0x6698f23c, 0x4cb632f9), TOBN(0xc42a1ad6, 0xb91f8067),
+     TOBN(0xb116a81d, 0x6284180a), TOBN(0xebedf5f8, 0xe901326f),
+     TOBN(0xf2274c9f, 0x97e3e044), TOBN(0x42018520, 0x11d09fc9),
+     TOBN(0x56a65f17, 0xd18e6e23), TOBN(0x2ea61e2a, 0x352b683c),
+     TOBN(0x27d291bc, 0x575eaa94), TOBN(0x9e7bc721, 0xb8ff522d),
+     TOBN(0x5f7268bf, 0xa7f04d6f), TOBN(0x5868c73f, 0xaba41748),
+     TOBN(0x9f85c2db, 0x7be0eead), TOBN(0x511e7842, 0xff719135),
+     TOBN(0x5a06b1e9, 0xc5ea90d7), TOBN(0x0c19e283, 0x26fab631),
+     TOBN(0x8af8f0cf, 0xe9206c55), TOBN(0x89389cb4, 0x3553c06a),
+     TOBN(0x39dbed97, 0xf65f8004), TOBN(0x0621b037, 0xc508991d),
+     TOBN(0x1c52e635, 0x96e78cc4), TOBN(0x5385c8b2, 0x0c06b4a8),
+     TOBN(0xd84ddfdb, 0xb0e87d03), TOBN(0xc49dfb66, 0x934bafad),
+     TOBN(0x7071e170, 0x59f70772), TOBN(0x3a073a84, 0x3a1db56b),
+     TOBN(0x03494903, 0x3b8af190), TOBN(0x7d882de3, 0xd32920f0),
+     TOBN(0x91633f0a, 0xb2cf8940), TOBN(0x72b0b178, 0x6f948f51),
+     TOBN(0x2d28dc30, 0x782653c8), TOBN(0x88829849, 0xdb903a05),
+     TOBN(0xb8095d0c, 0x6a19d2bb), TOBN(0x4b9e7f0c, 0x86f782cb),
+     TOBN(0x7af73988, 0x2d907064), TOBN(0xd12be0fe, 0x8b32643c),
+     TOBN(0x358ed23d, 0x0e165dc3), TOBN(0x3d47ce62, 0x4e2378ce),
+     TOBN(0x7e2bb0b9, 0xfeb8a087), TOBN(0x3246e8ae, 0xe29e10b9),
+     TOBN(0x459f4ec7, 0x03ce2b4d), TOBN(0xe9b4ca1b, 0xbbc077cf),
+     TOBN(0x2613b4f2, 0x0e9940c1), TOBN(0xfc598bb9, 0x047d1eb1),
+     TOBN(0x9744c62b, 0x45036099), TOBN(0xa9dee742, 0x167c65d8),
+     TOBN(0x0c511525, 0xdabe1943), TOBN(0xda110554, 0x93c6c624),
+     TOBN(0xae00a52c, 0x651a3be2), TOBN(0xcda5111d, 0x884449a6),
+     TOBN(0x063c06f4, 0xff33bed1), TOBN(0x73baaf9a, 0x0d3d76b4),
+     TOBN(0x52fb0c9d, 0x7fc63668), TOBN(0x6886c9dd, 0x0c039cde),
+     TOBN(0x602bd599, 0x55b22351), TOBN(0xb00cab02, 0x360c7c13),
+     TOBN(0x8cb616bc, 0x81b69442), TOBN(0x41486700, 0xb55c3cee),
+     TOBN(0x71093281, 0xf49ba278), TOBN(0xad956d9c, 0x64a50710),
+     TOBN(0x9561f28b, 0x638a7e81), TOBN(0x54155cdf, 0x5980ddc3),
+     TOBN(0xb2db4a96, 0xd26f247a), TOBN(0x9d774e4e, 0x4787d100),
+     TOBN(0x1a9e6e2e, 0x078637d2), TOBN(0x1c363e2d, 0x5e0ae06a),
+     TOBN(0x7493483e, 0xe9cfa354), TOBN(0x76843cb3, 0x7f74b98d),
+     TOBN(0xbaca6591, 0xd4b66947), TOBN(0xb452ce98, 0x04460a8c),
+     TOBN(0x6830d246, 0x43768f55), TOBN(0xf4197ed8, 0x7dff12df),
+     TOBN(0x6521b472, 0x400dd0f7), TOBN(0x59f5ca8f, 0x4b1e7093),
+     TOBN(0x6feff11b, 0x080338ae), TOBN(0x0ada31f6, 0xa29ca3c6),
+     TOBN(0x24794eb6, 0x94a2c215), TOBN(0xd83a43ab, 0x05a57ab4),
+     TOBN(0x264a543a, 0x2a6f89fe), TOBN(0x2c2a3868, 0xdd5ec7c2),
+     TOBN(0xd3373940, 0x8439d9b2), TOBN(0x715ea672, 0x0acd1f11),
+     TOBN(0x42c1d235, 0xe7e6cc19), TOBN(0x81ce6e96, 0xb990585c),
+     TOBN(0x04e5dfe0, 0xd809c7bd), TOBN(0xd7b2580c, 0x8f1050ab),
+     TOBN(0x6d91ad78, 0xd8a4176f), TOBN(0x0af556ee, 0x4e2e897c),
+     TOBN(0x162a8b73, 0x921de0ac), TOBN(0x52ac9c22, 0x7ea78400),
+     TOBN(0xee2a4eea, 0xefce2174), TOBN(0xbe61844e, 0x6d637f79),
+     TOBN(0x0491f1bc, 0x789a283b), TOBN(0x72d3ac3d, 0x880836f4),
+     TOBN(0xaa1c5ea3, 0x88e5402d), TOBN(0x1b192421, 0xd5cc473d),
+     TOBN(0x5c0b9998, 0x9dc84cac), TOBN(0xb0a8482d, 0x9c6e75b8),
+     TOBN(0x639961d0, 0x3a191ce2), TOBN(0xda3bc865, 0x6d837930),
+     TOBN(0xca990653, 0x056e6f8f), TOBN(0x84861c41, 0x64d133a7),
+     TOBN(0x8b403276, 0x746abe40), TOBN(0xb7b4d51a, 0xebf8e303),
+     TOBN(0x05b43211, 0x220a255d), TOBN(0xc997152c, 0x02419e6e),
+     TOBN(0x76ff47b6, 0x630c2fea), TOBN(0x50518677, 0x281fdade),
+     TOBN(0x3283b8ba, 0xcf902b0b), TOBN(0x8d4b4eb5, 0x37db303b),
+     TOBN(0xcc89f42d, 0x755011bc), TOBN(0xb43d74bb, 0xdd09d19b),
+     TOBN(0x65746bc9, 0x8adba350), TOBN(0x364eaf8c, 0xb51c1927),
+     TOBN(0x13c76596, 0x10ad72ec), TOBN(0x30045121, 0xf8d40c20),
+     TOBN(0x6d2d99b7, 0xea7b979b), TOBN(0xcd78cd74, 0xe6fb3bcd),
+     TOBN(0x11e45a9e, 0x86cffbfe), TOBN(0x78a61cf4, 0x637024f6),
+     TOBN(0xd06bc872, 0x3d502295), TOBN(0xf1376854, 0x458cb288),
+     TOBN(0xb9db26a1, 0x342f8586), TOBN(0xf33effcf, 0x4beee09e),
+     TOBN(0xd7e0c4cd, 0xb30cfb3a), TOBN(0x6d09b8c1, 0x6c9db4c8),
+     TOBN(0x40ba1a42, 0x07c8d9df), TOBN(0x6fd495f7, 0x1c52c66d),
+     TOBN(0xfb0e169f, 0x275264da), TOBN(0x80c2b746, 0xe57d8362),
+     TOBN(0xedd987f7, 0x49ad7222), TOBN(0xfdc229af, 0x4398ec7b),}
+    ,
+    {TOBN(0xb0d1ed84, 0x52666a58), TOBN(0x4bcb6e00, 0xe6a9c3c2),
+     TOBN(0x3c57411c, 0x26906408), TOBN(0xcfc20755, 0x13556400),
+     TOBN(0xa08b1c50, 0x5294dba3), TOBN(0xa30ba286, 0x8b7dd31e),
+     TOBN(0xd70ba90e, 0x991eca74), TOBN(0x094e142c, 0xe762c2b9),
+     TOBN(0xb81d783e, 0x979f3925), TOBN(0x1efd130a, 0xaf4c89a7),
+     TOBN(0x525c2144, 0xfd1bf7fa), TOBN(0x4b296904, 0x1b265a9e),
+     TOBN(0xed8e9634, 0xb9db65b6), TOBN(0x35c82e32, 0x03599d8a),
+     TOBN(0xdaa7a54f, 0x403563f3), TOBN(0x9df088ad, 0x022c38ab),
+     TOBN(0xe5cfb066, 0xbb3fd30a), TOBN(0x429169da, 0xeff0354e),
+     TOBN(0x809cf852, 0x3524e36c), TOBN(0x136f4fb3, 0x0155be1d),
+     TOBN(0x4826af01, 0x1fbba712), TOBN(0x6ef0f0b4, 0x506ba1a1),
+     TOBN(0xd9928b31, 0x77aea73e), TOBN(0xe2bf6af2, 0x5eaa244e),
+     TOBN(0x8d084f12, 0x4237b64b), TOBN(0x688ebe99, 0xe3ecfd07),
+     TOBN(0x57b8a70c, 0xf6845dd8), TOBN(0x808fc59c, 0x5da4a325),
+     TOBN(0xa9032b2b, 0xa3585862), TOBN(0xb66825d5, 0xedf29386),
+     TOBN(0xb5a5a8db, 0x431ec29b), TOBN(0xbb143a98, 0x3a1e8dc8),
+     TOBN(0x35ee94ce, 0x12ae381b), TOBN(0x3a7f176c, 0x86ccda90),
+     TOBN(0xc63a657e, 0x4606eaca), TOBN(0x9ae5a380, 0x43cd04df),
+     TOBN(0x9bec8d15, 0xed251b46), TOBN(0x1f5d6d30, 0xcaca5e64),
+     TOBN(0x347b3b35, 0x9ff20f07), TOBN(0x4d65f034, 0xf7e4b286),
+     TOBN(0x9e93ba24, 0xf111661e), TOBN(0xedced484, 0xb105eb04),
+     TOBN(0x96dc9ba1, 0xf424b578), TOBN(0xbf8f66b7, 0xe83e9069),
+     TOBN(0x872d4df4, 0xd7ed8216), TOBN(0xbf07f377, 0x8e2cbecf),
+     TOBN(0x4281d899, 0x98e73754), TOBN(0xfec85fbb, 0x8aab8708),
+     TOBN(0x9a3c0dee, 0xa5ba5b0b), TOBN(0xe6a116ce, 0x42d05299),
+     TOBN(0xae9775fe, 0xe9b02d42), TOBN(0x72b05200, 0xa1545cb6),
+     TOBN(0xbc506f7d, 0x31a3b4ea), TOBN(0xe5893078, 0x8bbd9b32),
+     TOBN(0xc8bc5f37, 0xe4b12a97), TOBN(0x6b000c06, 0x4a73b671),
+     TOBN(0x13b5bf22, 0x765fa7d0), TOBN(0x59805bf0, 0x1d6a5370),
+     TOBN(0x67a5e29d, 0x4280db98), TOBN(0x4f53916f, 0x776b1ce3),
+     TOBN(0x714ff61f, 0x33ddf626), TOBN(0x4206238e, 0xa085d103),
+     TOBN(0x1c50d4b7, 0xe5809ee3), TOBN(0x999f450d, 0x85f8eb1d),
+     TOBN(0x658a6051, 0xe4c79e9b), TOBN(0x1394cb73, 0xc66a9fea),
+     TOBN(0x27f31ed5, 0xc6be7b23), TOBN(0xf4c88f36, 0x5aa6f8fe),
+     TOBN(0x0fb0721f, 0x4aaa499e), TOBN(0x68b3a7d5, 0xe3fb2a6b),
+     TOBN(0xa788097d, 0x3a92851d), TOBN(0x060e7f8a, 0xe96f4913),
+     TOBN(0x82eebe73, 0x1a3a93bc), TOBN(0x42bbf465, 0xa21adc1a),
+     TOBN(0xc10b6fa4, 0xef030efd), TOBN(0x247aa4c7, 0x87b097bb),
+     TOBN(0x8b8dc632, 0xf60c77da), TOBN(0x6ffbc26a, 0xc223523e),
+     TOBN(0xa4f6ff11, 0x344579cf), TOBN(0x5825653c, 0x980250f6),
+     TOBN(0xb2dd097e, 0xbc1aa2b9), TOBN(0x07889393, 0x37a0333a),
+     TOBN(0x1cf55e71, 0x37a0db38), TOBN(0x2648487f, 0x792c1613),
+     TOBN(0xdad01336, 0x3fcef261), TOBN(0x6239c81d, 0x0eabf129),
+     TOBN(0x8ee761de, 0x9d276be2), TOBN(0x406a7a34, 0x1eda6ad3),
+     TOBN(0x4bf367ba, 0x4a493b31), TOBN(0x54f20a52, 0x9bf7f026),
+     TOBN(0xb696e062, 0x9795914b), TOBN(0xcddab96d, 0x8bf236ac),
+     TOBN(0x4ff2c70a, 0xed25ea13), TOBN(0xfa1d09eb, 0x81cbbbe7),
+     TOBN(0x88fc8c87, 0x468544c5), TOBN(0x847a670d, 0x696b3317),
+     TOBN(0xf133421e, 0x64bcb626), TOBN(0xaea638c8, 0x26dee0b5),
+     TOBN(0xd6e7680b, 0xb310346c), TOBN(0xe06f4097, 0xd5d4ced3),
+     TOBN(0x09961452, 0x7512a30b), TOBN(0xf3d867fd, 0xe589a59a),
+     TOBN(0x2e73254f, 0x52d0c180), TOBN(0x9063d8a3, 0x333c74ac),
+     TOBN(0xeda6c595, 0xd314e7bc), TOBN(0x2ee7464b, 0x467899ed),
+     TOBN(0x1cef423c, 0x0a1ed5d3), TOBN(0x217e76ea, 0x69cc7613),
+     TOBN(0x27ccce1f, 0xe7cda917), TOBN(0x12d8016b, 0x8a893f16),
+     TOBN(0xbcd6de84, 0x9fc74f6b), TOBN(0xfa5817e2, 0xf3144e61),
+     TOBN(0x1f354164, 0x0821ee4c), TOBN(0x1583eab4, 0x0bc61992),
+     TOBN(0x7490caf6, 0x1d72879f), TOBN(0x998ad9f3, 0xf76ae7b2),
+     TOBN(0x1e181950, 0xa41157f7), TOBN(0xa9d7e1e6, 0xe8da3a7e),
+     TOBN(0x963784eb, 0x8426b95f), TOBN(0x0ee4ed6e, 0x542e2a10),
+     TOBN(0xb79d4cc5, 0xac751e7b), TOBN(0x93f96472, 0xfd4211bd),
+     TOBN(0x8c72d3d2, 0xc8de4fc6), TOBN(0x7b69cbf5, 0xdf44f064),
+     TOBN(0x3da90ca2, 0xf4bf94e1), TOBN(0x1a5325f8, 0xf12894e2),
+     TOBN(0x0a437f6c, 0x7917d60b), TOBN(0x9be70486, 0x96c9cb5d),
+     TOBN(0xb4d880bf, 0xe1dc5c05), TOBN(0xd738adda, 0xeebeeb57),
+     TOBN(0x6f0119d3, 0xdf0fe6a3), TOBN(0x5c686e55, 0x66eaaf5a),
+     TOBN(0x9cb10b50, 0xdfd0b7ec), TOBN(0xbdd0264b, 0x6a497c21),
+     TOBN(0xfc093514, 0x8c546c96), TOBN(0x58a947fa, 0x79dbf42a),
+     TOBN(0xc0b48d4e, 0x49ccd6d7), TOBN(0xff8fb02c, 0x88bd5580),
+     TOBN(0xc75235e9, 0x07d473b2), TOBN(0x4fab1ac5, 0xa2188af3),
+     TOBN(0x030fa3bc, 0x97576ec0), TOBN(0xe8c946e8, 0x0b7e7d2f),
+     TOBN(0x40a5c9cc, 0x70305600), TOBN(0x6d8260a9, 0xc8b013b4),
+     TOBN(0x0368304f, 0x70bba85c), TOBN(0xad090da1, 0xa4a0d311),
+     TOBN(0x7170e870, 0x2415eec1), TOBN(0xbfba35fe, 0x8461ea47),
+     TOBN(0x6279019a, 0xc1e91938), TOBN(0xa47638f3, 0x1afc415f),
+     TOBN(0x36c65cbb, 0xbcba0e0f), TOBN(0x02160efb, 0x034e2c48),
+     TOBN(0xe6c51073, 0x615cd9e4), TOBN(0x498ec047, 0xf1243c06),
+     TOBN(0x3e5a8809, 0xb17b3d8c), TOBN(0x5cd99e61, 0x0cc565f1),
+     TOBN(0x81e312df, 0x7851dafe), TOBN(0xf156f5ba, 0xa79061e2),
+     TOBN(0x80d62b71, 0x880c590e), TOBN(0xbec9746f, 0x0a39faa1),
+     TOBN(0x1d98a9c1, 0xc8ed1f7a), TOBN(0x09e43bb5, 0xa81d5ff2),
+     TOBN(0xd5f00f68, 0x0da0794a), TOBN(0x412050d9, 0x661aa836),
+     TOBN(0xa89f7c4e, 0x90747e40), TOBN(0x6dc05ebb, 0xb62a3686),
+     TOBN(0xdf4de847, 0x308e3353), TOBN(0x53868fbb, 0x9fb53bb9),
+     TOBN(0x2b09d2c3, 0xcfdcf7dd), TOBN(0x41a9fce3, 0x723fcab4),
+     TOBN(0x73d905f7, 0x07f57ca3), TOBN(0x080f9fb1, 0xac8e1555),
+     TOBN(0x7c088e84, 0x9ba7a531), TOBN(0x07d35586, 0xed9a147f),
+     TOBN(0x602846ab, 0xaf48c336), TOBN(0x7320fd32, 0x0ccf0e79),
+     TOBN(0xaa780798, 0xb18bd1ff), TOBN(0x52c2e300, 0xafdd2905),
+     TOBN(0xf27ea3d6, 0x434267cd), TOBN(0x8b96d16d, 0x15605b5f),
+     TOBN(0x7bb31049, 0x4b45706b), TOBN(0xe7f58b8e, 0x743d25f8),
+     TOBN(0xe9b5e45b, 0x87f30076), TOBN(0xd19448d6, 0x5d053d5a),
+     TOBN(0x1ecc8cb9, 0xd3210a04), TOBN(0x6bc7d463, 0xdafb5269),
+     TOBN(0x3e59b10a, 0x67c3489f), TOBN(0x1769788c, 0x65641e1b),
+     TOBN(0x8a53b82d, 0xbd6cb838), TOBN(0x7066d6e6, 0x236d5f22),
+     TOBN(0x03aa1c61, 0x6908536e), TOBN(0xc971da0d, 0x66ae9809),
+     TOBN(0x01b3a86b, 0xc49a2fac), TOBN(0x3b8420c0, 0x3092e77a),
+     TOBN(0x02057300, 0x7d6fb556), TOBN(0x6941b2a1, 0xbff40a87),
+     TOBN(0x140b6308, 0x0658ff2a), TOBN(0x87804363, 0x3424ab36),
+     TOBN(0x0253bd51, 0x5751e299), TOBN(0xc75bcd76, 0x449c3e3a),
+     TOBN(0x92eb4090, 0x7f8f875d), TOBN(0x9c9d754e, 0x56c26bbf),
+     TOBN(0x158cea61, 0x8110bbe7), TOBN(0x62a6b802, 0x745f91ea),
+     TOBN(0xa79c41aa, 0xc6e7394b), TOBN(0x445b6a83, 0xad57ef10),
+     TOBN(0x0c5277eb, 0x6ea6f40c), TOBN(0x319fe96b, 0x88633365),
+     TOBN(0x0b0fc61f, 0x385f63cb), TOBN(0x41250c84, 0x22bdd127),
+     TOBN(0x67d153f1, 0x09e942c2), TOBN(0x60920d08, 0xc021ad5d),
+     TOBN(0x229f5746, 0x724d81a5), TOBN(0xb7ffb892, 0x5bba3299),
+     TOBN(0x518c51a1, 0xde413032), TOBN(0x2a9bfe77, 0x3c2fd94c),
+     TOBN(0xcbcde239, 0x3191f4fd), TOBN(0x43093e16, 0xd3d6ada1),
+     TOBN(0x184579f3, 0x58769606), TOBN(0x2c94a8b3, 0xd236625c),
+     TOBN(0x6922b9c0, 0x5c437d8e), TOBN(0x3d4ae423, 0xd8d9f3c8),
+     TOBN(0xf72c31c1, 0x2e7090a2), TOBN(0x4ac3f5f3, 0xd76a55bd),
+     TOBN(0x342508fc, 0x6b6af991), TOBN(0x0d527100, 0x1b5cebbd),
+     TOBN(0xb84740d0, 0xdd440dd7), TOBN(0x748ef841, 0x780162fd),
+     TOBN(0xa8dbfe0e, 0xdfc6fafb), TOBN(0xeadfdf05, 0xf7300f27),
+     TOBN(0x7d06555f, 0xfeba4ec9), TOBN(0x12c56f83, 0x9e25fa97),
+     TOBN(0x77f84203, 0xd39b8c34), TOBN(0xed8b1be6, 0x3125eddb),
+     TOBN(0x5bbf2441, 0xf6e39dc5), TOBN(0xb00f6ee6, 0x6a5d678a),
+     TOBN(0xba456ecf, 0x57d0ea99), TOBN(0xdcae0f58, 0x17e06c43),
+     TOBN(0x01643de4, 0x0f5b4baa), TOBN(0x2c324341, 0xd161b9be),
+     TOBN(0x80177f55, 0xe126d468), TOBN(0xed325f1f, 0x76748e09),
+     TOBN(0x6116004a, 0xcfa9bdc2), TOBN(0x2d8607e6, 0x3a9fb468),
+     TOBN(0x0e573e27, 0x6009d660), TOBN(0x3a525d2e, 0x8d10c5a1),
+     TOBN(0xd26cb45c, 0x3b9009a0), TOBN(0xb6b0cdc0, 0xde9d7448),
+     TOBN(0x949c9976, 0xe1337c26), TOBN(0x6faadebd, 0xd73d68e5),
+     TOBN(0x9e158614, 0xf1b768d9), TOBN(0x22dfa557, 0x9cc4f069),
+     TOBN(0xccd6da17, 0xbe93c6d6), TOBN(0x24866c61, 0xa504f5b9),
+     TOBN(0x2121353c, 0x8d694da1), TOBN(0x1c6ca580, 0x0140b8c6),
+     TOBN(0xc245ad8c, 0xe964021e), TOBN(0xb83bffba, 0x032b82b3),
+     TOBN(0xfaa220c6, 0x47ef9898), TOBN(0x7e8d3ac6, 0x982c948a),
+     TOBN(0x1faa2091, 0xbc2d124a), TOBN(0xbd54c3dd, 0x05b15ff4),
+     TOBN(0x386bf3ab, 0xc87c6fb7), TOBN(0xfb2b0563, 0xfdeb6f66),
+     TOBN(0x4e77c557, 0x5b45afb4), TOBN(0xe9ded649, 0xefb8912d),
+     TOBN(0x7ec9bbf5, 0x42f6e557), TOBN(0x2570dfff, 0x62671f00),
+     TOBN(0x2b3bfb78, 0x88e084bd), TOBN(0xa024b238, 0xf37fe5b4),
+     TOBN(0x44e7dc04, 0x95649aee), TOBN(0x498ca255, 0x5e7ec1d8),
+     TOBN(0x3bc766ea, 0xaaa07e86), TOBN(0x0db6facb, 0xf3608586),
+     TOBN(0xbadd2549, 0xbdc259c8), TOBN(0x95af3c6e, 0x041c649f),
+     TOBN(0xb36a928c, 0x02e30afb), TOBN(0x9b5356ad, 0x008a88b8),
+     TOBN(0x4b67a5f1, 0xcf1d9e9d), TOBN(0xc6542e47, 0xa5d8d8ce),
+     TOBN(0x73061fe8, 0x7adfb6cc), TOBN(0xcc826fd3, 0x98678141),
+     TOBN(0x00e758b1, 0x3c80515a), TOBN(0x6afe3247, 0x41485083),
+     TOBN(0x0fcb08b9, 0xb6ae8a75), TOBN(0xb8cf388d, 0x4acf51e1),
+     TOBN(0x344a5560, 0x6961b9d6), TOBN(0x1a6778b8, 0x6a97fd0c),
+     TOBN(0xd840fdc1, 0xecc4c7e3), TOBN(0xde9fe47d, 0x16db68cc),
+     TOBN(0xe95f89de, 0xa3e216aa), TOBN(0x84f1a6a4, 0x9594a8be),
+     TOBN(0x7ddc7d72, 0x5a7b162b), TOBN(0xc5cfda19, 0xadc817a3),
+     TOBN(0x80a5d350, 0x78b58d46), TOBN(0x93365b13, 0x82978f19),
+     TOBN(0x2e44d225, 0x26a1fc90), TOBN(0x0d6d10d2, 0x4d70705d),
+     TOBN(0xd94b6b10, 0xd70c45f4), TOBN(0x0f201022, 0xb216c079),
+     TOBN(0xcec966c5, 0x658fde41), TOBN(0xa8d2bc7d, 0x7e27601d),
+     TOBN(0xbfcce3e1, 0xff230be7), TOBN(0x3394ff6b, 0x0033ffb5),
+     TOBN(0xd890c509, 0x8132c9af), TOBN(0xaac4b0eb, 0x361e7868),
+     TOBN(0x5194ded3, 0xe82d15aa), TOBN(0x4550bd2e, 0x23ae6b7d),
+     TOBN(0x3fda318e, 0xea5399d4), TOBN(0xd989bffa, 0x91638b80),
+     TOBN(0x5ea124d0, 0xa14aa12d), TOBN(0x1fb1b899, 0x3667b944),
+     TOBN(0x95ec7969, 0x44c44d6a), TOBN(0x91df144a, 0x57e86137),
+     TOBN(0x915fd620, 0x73adac44), TOBN(0x8f01732d, 0x59a83801),
+     TOBN(0xec579d25, 0x3aa0a633), TOBN(0x06de5e7c, 0xc9d6d59c),
+     TOBN(0xc132f958, 0xb1ef8010), TOBN(0x29476f96, 0xe65c1a02),
+     TOBN(0x336a77c0, 0xd34c3565), TOBN(0xef1105b2, 0x1b9f1e9e),
+     TOBN(0x63e6d08b, 0xf9e08002), TOBN(0x9aff2f21, 0xc613809e),
+     TOBN(0xb5754f85, 0x3a80e75d), TOBN(0xde71853e, 0x6bbda681),
+     TOBN(0x86f041df, 0x8197fd7a), TOBN(0x8b332e08, 0x127817fa),
+     TOBN(0x05d99be8, 0xb9c20cda), TOBN(0x89f7aad5, 0xd5cd0c98),
+     TOBN(0x7ef936fe, 0x5bb94183), TOBN(0x92ca0753, 0xb05cd7f2),
+     TOBN(0x9d65db11, 0x74a1e035), TOBN(0x02628cc8, 0x13eaea92),
+     TOBN(0xf2d9e242, 0x49e4fbf2), TOBN(0x94fdfd9b, 0xe384f8b7),
+     TOBN(0x65f56054, 0x63428c6b), TOBN(0x2f7205b2, 0x90b409a5),
+     TOBN(0xf778bb78, 0xff45ae11), TOBN(0xa13045be, 0xc5ee53b2),
+     TOBN(0xe00a14ff, 0x03ef77fe), TOBN(0x689cd59f, 0xffef8bef),
+     TOBN(0x3578f0ed, 0x1e9ade22), TOBN(0xe99f3ec0, 0x6268b6a8),
+     TOBN(0xa2057d91, 0xea1b3c3e), TOBN(0x2d1a7053, 0xb8823a4a),
+     TOBN(0xabbb336a, 0x2cca451e), TOBN(0xcd2466e3, 0x2218bb5d),
+     TOBN(0x3ac1f42f, 0xc8cb762d), TOBN(0x7e312aae, 0x7690211f),
+     TOBN(0xebb9bd73, 0x45d07450), TOBN(0x207c4b82, 0x46c2213f),
+     TOBN(0x99d425c1, 0x375913ec), TOBN(0x94e45e96, 0x67908220),
+     TOBN(0xc08f3087, 0xcd67dbf6), TOBN(0xa5670fbe, 0xc0887056),
+     TOBN(0x6717b64a, 0x66f5b8fc), TOBN(0xd5a56aea, 0x786fec28),
+     TOBN(0xa8c3f55f, 0xc0ff4952), TOBN(0xa77fefae, 0x457ac49b),
+     TOBN(0x29882d7c, 0x98379d44), TOBN(0xd000bdfb, 0x509edc8a),
+     TOBN(0xc6f95979, 0xe66fe464), TOBN(0x504a6115, 0xfa61bde0),
+     TOBN(0x56b3b871, 0xeffea31a), TOBN(0x2d3de26d, 0xf0c21a54),
+     TOBN(0x21dbff31, 0x834753bf), TOBN(0xe67ecf49, 0x69269d86),
+     TOBN(0x7a176952, 0x151fe690), TOBN(0x03515804, 0x7f2adb5f),
+     TOBN(0xee794b15, 0xd1b62a8d), TOBN(0xf004ceec, 0xaae454e6),
+     TOBN(0x0897ea7c, 0xf0386fac), TOBN(0x3b62ff12, 0xd1fca751),
+     TOBN(0x154181df, 0x1b7a04ec), TOBN(0x2008e04a, 0xfb5847ec),
+     TOBN(0xd147148e, 0x41dbd772), TOBN(0x2b419f73, 0x22942654),
+     TOBN(0x669f30d3, 0xe9c544f7), TOBN(0x52a2c223, 0xc8540149),
+     TOBN(0x5da9ee14, 0x634dfb02), TOBN(0x5f074ff0, 0xf47869f3),
+     TOBN(0x74ee878d, 0xa3933acc), TOBN(0xe6510651, 0x4fe35ed1),
+     TOBN(0xb3eb9482, 0xf1012e7a), TOBN(0x51013cc0, 0xa8a566ae),
+     TOBN(0xdd5e9243, 0x47c00d3b), TOBN(0x7fde089d, 0x946bb0e5),
+     TOBN(0x030754fe, 0xc731b4b3), TOBN(0x12a136a4, 0x99fda062),
+     TOBN(0x7c1064b8, 0x5a1a35bc), TOBN(0xbf1f5763, 0x446c84ef),
+     TOBN(0xed29a56d, 0xa16d4b34), TOBN(0x7fba9d09, 0xdca21c4f),
+     TOBN(0x66d7ac00, 0x6d8de486), TOBN(0x60061987, 0x73a2a5e1),
+     TOBN(0x8b400f86, 0x9da28ff0), TOBN(0x3133f708, 0x43c4599c),
+     TOBN(0x9911c9b8, 0xee28cb0d), TOBN(0xcd7e2874, 0x8e0af61d),
+     TOBN(0x5a85f0f2, 0x72ed91fc), TOBN(0x85214f31, 0x9cd4a373),
+     TOBN(0x881fe5be, 0x1925253c), TOBN(0xd8dc98e0, 0x91e8bc76),
+     TOBN(0x7120affe, 0x585cc3a2), TOBN(0x724952ed, 0x735bf97a),
+     TOBN(0x5581e7dc, 0x3eb34581), TOBN(0x5cbff4f2, 0xe52ee57d),
+     TOBN(0x8d320a0e, 0x87d8cc7b), TOBN(0x9beaa7f3, 0xf1d280d0),
+     TOBN(0x7a0b9571, 0x9beec704), TOBN(0x9126332e, 0x5b7f0057),
+     TOBN(0x01fbc1b4, 0x8ed3bd6d), TOBN(0x35bb2c12, 0xd945eb24),
+     TOBN(0x6404694e, 0x9a8ae255), TOBN(0xb6092eec, 0x8d6abfb3),
+     TOBN(0x4d76143f, 0xcc058865), TOBN(0x7b0a5af2, 0x6e249922),
+     TOBN(0x8aef9440, 0x6a50d353), TOBN(0xe11e4bcc, 0x64f0e07a),
+     TOBN(0x4472993a, 0xa14a90fa), TOBN(0x7706e20c, 0xba0c51d4),
+     TOBN(0xf403292f, 0x1532672d), TOBN(0x52573bfa, 0x21829382),
+     TOBN(0x6a7bb6a9, 0x3b5bdb83), TOBN(0x08da65c0, 0xa4a72318),
+     TOBN(0xc58d22aa, 0x63eb065f), TOBN(0x1717596c, 0x1b15d685),
+     TOBN(0x112df0d0, 0xb266d88b), TOBN(0xf688ae97, 0x5941945a),
+     TOBN(0x487386e3, 0x7c292cac), TOBN(0x42f3b50d, 0x57d6985c),
+     TOBN(0x6da4f998, 0x6a90fc34), TOBN(0xc8f257d3, 0x65ca8a8d),
+     TOBN(0xc2feabca, 0x6951f762), TOBN(0xe1bc81d0, 0x74c323ac),
+     TOBN(0x1bc68f67, 0x251a2a12), TOBN(0x10d86587, 0xbe8a70dc),
+     TOBN(0xd648af7f, 0xf0f84d2e), TOBN(0xf0aa9ebc, 0x6a43ac92),
+     TOBN(0x69e3be04, 0x27596893), TOBN(0xb6bb02a6, 0x45bf452b),
+     TOBN(0x0875c11a, 0xf4c698c8), TOBN(0x6652b5c7, 0xbece3794),
+     TOBN(0x7b3755fd, 0x4f5c0499), TOBN(0x6ea16558, 0xb5532b38),
+     TOBN(0xd1c69889, 0xa2e96ef7), TOBN(0x9c773c3a, 0x61ed8f48),
+     TOBN(0x2b653a40, 0x9b323abc), TOBN(0xe26605e1, 0xf0e1d791),
+     TOBN(0x45d41064, 0x4a87157a), TOBN(0x8f9a78b7, 0xcbbce616),
+     TOBN(0xcf1e44aa, 0xc407eddd), TOBN(0x81ddd1d8, 0xa35b964f),
+     TOBN(0x473e339e, 0xfd083999), TOBN(0x6c94bdde, 0x8e796802),
+     TOBN(0x5a304ada, 0x8545d185), TOBN(0x82ae44ea, 0x738bb8cb),
+     TOBN(0x628a35e3, 0xdf87e10e), TOBN(0xd3624f3d, 0xa15b9fe3),
+     TOBN(0xcc44209b, 0x14be4254), TOBN(0x7d0efcbc, 0xbdbc2ea5),
+     TOBN(0x1f603362, 0x04c37bbe), TOBN(0x21f363f5, 0x56a5852c),
+     TOBN(0xa1503d1c, 0xa8501550), TOBN(0x2251e0e1, 0xd8ab10bb),
+     TOBN(0xde129c96, 0x6961c51c), TOBN(0x1f7246a4, 0x81910f68),
+     TOBN(0x2eb744ee, 0x5f2591f2), TOBN(0x3c47d33f, 0x5e627157),
+     TOBN(0x4d6d62c9, 0x22f3bd68), TOBN(0x6120a64b, 0xcb8df856),
+     TOBN(0x3a9ac6c0, 0x7b5d07df), TOBN(0xa92b9558, 0x7ef39783),
+     TOBN(0xe128a134, 0xab3a9b4f), TOBN(0x41c18807, 0xb1252f05),
+     TOBN(0xfc7ed089, 0x80ba9b1c), TOBN(0xac8dc6de, 0xc532a9dd),
+     TOBN(0xbf829cef, 0x55246809), TOBN(0x101b784f, 0x5b4ee80f),
+     TOBN(0xc09945bb, 0xb6f11603), TOBN(0x57b09dbe, 0x41d2801e),
+     TOBN(0xfba5202f, 0xa97534a8), TOBN(0x7fd8ae5f, 0xc17b9614),
+     TOBN(0xa50ba666, 0x78308435), TOBN(0x9572f77c, 0xd3868c4d),
+     TOBN(0x0cef7bfd, 0x2dd7aab0), TOBN(0xe7958e08, 0x2c7c79ff),
+     TOBN(0x81262e42, 0x25346689), TOBN(0x716da290, 0xb07c7004),
+     TOBN(0x35f911ea, 0xb7950ee3), TOBN(0x6fd72969, 0x261d21b5),
+     TOBN(0x52389803, 0x08b640d3), TOBN(0x5b0026ee, 0x887f12a1),
+     TOBN(0x20e21660, 0x742e9311), TOBN(0x0ef6d541, 0x5ff77ff7),
+     TOBN(0x969127f0, 0xf9c41135), TOBN(0xf21d60c9, 0x68a64993),
+     TOBN(0x656e5d0c, 0xe541875c), TOBN(0xf1e0f84e, 0xa1d3c233),
+     TOBN(0x9bcca359, 0x06002d60), TOBN(0xbe2da60c, 0x06191552),
+     TOBN(0x5da8bbae, 0x61181ec3), TOBN(0x9f04b823, 0x65806f19),
+     TOBN(0xf1604a7d, 0xd4b79bb8), TOBN(0xaee806fb, 0x52c878c8),
+     TOBN(0x34144f11, 0x8d47b8e8), TOBN(0x72edf52b, 0x949f9054),
+     TOBN(0xebfca84e, 0x2127015a), TOBN(0x9051d0c0, 0x9cb7cef3),
+     TOBN(0x86e8fe58, 0x296deec8), TOBN(0x33b28188, 0x41010d74),}
+    ,
+    {TOBN(0x01079383, 0x171b445f), TOBN(0x9bcf21e3, 0x8131ad4c),
+     TOBN(0x8cdfe205, 0xc93987e8), TOBN(0xe63f4152, 0xc92e8c8f),
+     TOBN(0x729462a9, 0x30add43d), TOBN(0x62ebb143, 0xc980f05a),
+     TOBN(0x4f3954e5, 0x3b06e968), TOBN(0xfe1d75ad, 0x242cf6b1),
+     TOBN(0x5f95c6c7, 0xaf8685c8), TOBN(0xd4c1c8ce, 0x2f8f01aa),
+     TOBN(0xc44bbe32, 0x2574692a), TOBN(0xb8003478, 0xd4a4a068),
+     TOBN(0x7c8fc6e5, 0x2eca3cdb), TOBN(0xea1db16b, 0xec04d399),
+     TOBN(0xb05bc82e, 0x8f2bc5cf), TOBN(0x763d517f, 0xf44793d2),
+     TOBN(0x4451c1b8, 0x08bd98d0), TOBN(0x644b1cd4, 0x6575f240),
+     TOBN(0x6907eb33, 0x7375d270), TOBN(0x56c8bebd, 0xfa2286bd),
+     TOBN(0xc713d2ac, 0xc4632b46), TOBN(0x17da427a, 0xafd60242),
+     TOBN(0x313065b7, 0xc95c7546), TOBN(0xf8239898, 0xbf17a3de),
+     TOBN(0xf3b7963f, 0x4c830320), TOBN(0x842c7aa0, 0x903203e3),
+     TOBN(0xaf22ca0a, 0xe7327afb), TOBN(0x38e13092, 0x967609b6),
+     TOBN(0x73b8fb62, 0x757558f1), TOBN(0x3cc3e831, 0xf7eca8c1),
+     TOBN(0xe4174474, 0xf6331627), TOBN(0xa77989ca, 0xc3c40234),
+     TOBN(0xe5fd17a1, 0x44a081e0), TOBN(0xd797fb7d, 0xb70e296a),
+     TOBN(0x2b472b30, 0x481f719c), TOBN(0x0e632a98, 0xfe6f8c52),
+     TOBN(0x89ccd116, 0xc5f0c284), TOBN(0xf51088af, 0x2d987c62),
+     TOBN(0x2a2bccda, 0x4c2de6cf), TOBN(0x810f9efe, 0xf679f0f9),
+     TOBN(0xb0f394b9, 0x7ffe4b3e), TOBN(0x0b691d21, 0xe5fa5d21),
+     TOBN(0xb0bd7747, 0x9dfbbc75), TOBN(0xd2830fda, 0xfaf78b00),
+     TOBN(0xf78c249c, 0x52434f57), TOBN(0x4b1f7545, 0x98096dab),
+     TOBN(0x73bf6f94, 0x8ff8c0b3), TOBN(0x34aef03d, 0x454e134c),
+     TOBN(0xf8d151f4, 0xb7ac7ec5), TOBN(0xd6ceb95a, 0xe50da7d5),
+     TOBN(0xa1b492b0, 0xdc3a0eb8), TOBN(0x75157b69, 0xb3dd2863),
+     TOBN(0xe2c4c74e, 0xc5413d62), TOBN(0xbe329ff7, 0xbc5fc4c7),
+     TOBN(0x835a2aea, 0x60fa9dda), TOBN(0xf117f5ad, 0x7445cb87),
+     TOBN(0xae8317f4, 0xb0166f7a), TOBN(0xfbd3e3f7, 0xceec74e6),
+     TOBN(0xfdb516ac, 0xe0874bfd), TOBN(0x3d846019, 0xc681f3a3),
+     TOBN(0x0b12ee5c, 0x7c1620b0), TOBN(0xba68b4dd, 0x2b63c501),
+     TOBN(0xac03cd32, 0x6668c51e), TOBN(0x2a6279f7, 0x4e0bcb5b),
+     TOBN(0x17bd69b0, 0x6ae85c10), TOBN(0x72946979, 0x1dfdd3a6),
+     TOBN(0xd9a03268, 0x2c078bec), TOBN(0x41c6a658, 0xbfd68a52),
+     TOBN(0xcdea1024, 0x0e023900), TOBN(0xbaeec121, 0xb10d144d),
+     TOBN(0x5a600e74, 0x058ab8dc), TOBN(0x1333af21, 0xbb89ccdd),
+     TOBN(0xdf25eae0, 0x3aaba1f1), TOBN(0x2cada16e, 0x3b7144cf),
+     TOBN(0x657ee27d, 0x71ab98bc), TOBN(0x99088b4c, 0x7a6fc96e),
+     TOBN(0x05d5c0a0, 0x3549dbd4), TOBN(0x42cbdf8f, 0xf158c3ac),
+     TOBN(0x3fb6b3b0, 0x87edd685), TOBN(0x22071cf6, 0x86f064d0),
+     TOBN(0xd2d6721f, 0xff2811e5), TOBN(0xdb81b703, 0xfe7fae8c),
+     TOBN(0x3cfb74ef, 0xd3f1f7bb), TOBN(0x0cdbcd76, 0x16cdeb5d),
+     TOBN(0x4f39642a, 0x566a808c), TOBN(0x02b74454, 0x340064d6),
+     TOBN(0xfabbadca, 0x0528fa6f), TOBN(0xe4c3074c, 0xd3fc0bb6),
+     TOBN(0xb32cb8b0, 0xb796d219), TOBN(0xc3e95f4f, 0x34741dd9),
+     TOBN(0x87212125, 0x68edf6f5), TOBN(0x7a03aee4, 0xa2b9cb8e),
+     TOBN(0x0cd3c376, 0xf53a89aa), TOBN(0x0d8af9b1, 0x948a28dc),
+     TOBN(0xcf86a3f4, 0x902ab04f), TOBN(0x8aacb62a, 0x7f42002d),
+     TOBN(0x106985eb, 0xf62ffd52), TOBN(0xe670b54e, 0x5797bf10),
+     TOBN(0x4b405209, 0xc5e30aef), TOBN(0x12c97a20, 0x4365b5e9),
+     TOBN(0x104646ce, 0x1fe32093), TOBN(0x13cb4ff6, 0x3907a8c9),
+     TOBN(0x8b9f30d1, 0xd46e726b), TOBN(0xe1985e21, 0xaba0f499),
+     TOBN(0xc573dea9, 0x10a230cd), TOBN(0x24f46a93, 0xcd30f947),
+     TOBN(0xf2623fcf, 0xabe2010a), TOBN(0x3f278cb2, 0x73f00e4f),
+     TOBN(0xed55c67d, 0x50b920eb), TOBN(0xf1cb9a2d, 0x8e760571),
+     TOBN(0x7c50d109, 0x0895b709), TOBN(0x4207cf07, 0x190d4369),
+     TOBN(0x3b027e81, 0xc4127fe1), TOBN(0xa9f8b9ad, 0x3ae9c566),
+     TOBN(0x5ab10851, 0xacbfbba5), TOBN(0xa747d648, 0x569556f5),
+     TOBN(0xcc172b5c, 0x2ba97bf7), TOBN(0x15e0f77d, 0xbcfa3324),
+     TOBN(0xa345b797, 0x7686279d), TOBN(0x5a723480, 0xe38003d3),
+     TOBN(0xfd8e139f, 0x8f5fcda8), TOBN(0xf3e558c4, 0xbdee5bfd),
+     TOBN(0xd76cbaf4, 0xe33f9f77), TOBN(0x3a4c97a4, 0x71771969),
+     TOBN(0xda27e84b, 0xf6dce6a7), TOBN(0xff373d96, 0x13e6c2d1),
+     TOBN(0xf115193c, 0xd759a6e9), TOBN(0x3f9b7025, 0x63d2262c),
+     TOBN(0xd9764a31, 0x317cd062), TOBN(0x30779d8e, 0x199f8332),
+     TOBN(0xd8074106, 0x16b11b0b), TOBN(0x7917ab9f, 0x78aeaed8),
+     TOBN(0xb67a9cbe, 0x28fb1d8e), TOBN(0x2e313563, 0x136eda33),
+     TOBN(0x010b7069, 0xa371a86c), TOBN(0x44d90fa2, 0x6744e6b7),
+     TOBN(0x68190867, 0xd6b3e243), TOBN(0x9fe6cd9d, 0x59048c48),
+     TOBN(0xb900b028, 0x95731538), TOBN(0xa012062f, 0x32cae04f),
+     TOBN(0x8107c8bc, 0x9399d082), TOBN(0x47e8c54a, 0x41df12e2),
+     TOBN(0x14ba5117, 0xb6ef3f73), TOBN(0x22260bea, 0x81362f0b),
+     TOBN(0x90ea261e, 0x1a18cc20), TOBN(0x2192999f, 0x2321d636),
+     TOBN(0xef64d314, 0xe311b6a0), TOBN(0xd7401e4c, 0x3b54a1f5),
+     TOBN(0x19019983, 0x6fbca2ba), TOBN(0x46ad3293, 0x8fbffc4b),
+     TOBN(0xa142d3f6, 0x3786bf40), TOBN(0xeb5cbc26, 0xb67039fc),
+     TOBN(0x9cb0ae6c, 0x252bd479), TOBN(0x05e0f88a, 0x12b5848f),
+     TOBN(0x78f6d2b2, 0xa5c97663), TOBN(0x6f6e149b, 0xc162225c),
+     TOBN(0xe602235c, 0xde601a89), TOBN(0xd17bbe98, 0xf373be1f),
+     TOBN(0xcaf49a5b, 0xa8471827), TOBN(0x7e1a0a85, 0x18aaa116),
+     TOBN(0x6c833196, 0x270580c3), TOBN(0x1e233839, 0xf1c98a14),
+     TOBN(0x67b2f7b4, 0xae34e0a5), TOBN(0x47ac8745, 0xd8ce7289),
+     TOBN(0x2b74779a, 0x100dd467), TOBN(0x274a4337, 0x4ee50d09),
+     TOBN(0x603dcf13, 0x83608bc9), TOBN(0xcd9da6c3, 0xc89e8388),
+     TOBN(0x2660199f, 0x355116ac), TOBN(0xcc38bb59, 0xb6d18eed),
+     TOBN(0x3075f31f, 0x2f4bc071), TOBN(0x9774457f, 0x265dc57e),
+     TOBN(0x06a6a9c8, 0xc6db88bb), TOBN(0x6429d07f, 0x4ec98e04),
+     TOBN(0x8d05e57b, 0x05ecaa8b), TOBN(0x20f140b1, 0x7872ea7b),
+     TOBN(0xdf8c0f09, 0xca494693), TOBN(0x48d3a020, 0xf252e909),
+     TOBN(0x4c5c29af, 0x57b14b12), TOBN(0x7e6fa37d, 0xbf47ad1c),
+     TOBN(0x66e7b506, 0x49a0c938), TOBN(0xb72c0d48, 0x6be5f41f),
+     TOBN(0x6a6242b8, 0xb2359412), TOBN(0xcd35c774, 0x8e859480),
+     TOBN(0x12536fea, 0x87baa627), TOBN(0x58c1fec1, 0xf72aa680),
+     TOBN(0x6c29b637, 0x601e5dc9), TOBN(0x9e3c3c1c, 0xde9e01b9),
+     TOBN(0xefc8127b, 0x2bcfe0b0), TOBN(0x35107102, 0x2a12f50d),
+     TOBN(0x6ccd6cb1, 0x4879b397), TOBN(0xf792f804, 0xf8a82f21),
+     TOBN(0x509d4804, 0xa9b46402), TOBN(0xedddf85d, 0xc10f0850),
+     TOBN(0x928410dc, 0x4b6208aa), TOBN(0xf6229c46, 0x391012dc),
+     TOBN(0xc5a7c41e, 0x7727b9b6), TOBN(0x289e4e4b, 0xaa444842),
+     TOBN(0x049ba1d9, 0xe9a947ea), TOBN(0x44f9e47f, 0x83c8debc),
+     TOBN(0xfa77a1fe, 0x611f8b8e), TOBN(0xfd2e416a, 0xf518f427),
+     TOBN(0xc5fffa70, 0x114ebac3), TOBN(0xfe57c4e9, 0x5d89697b),
+     TOBN(0xfdd053ac, 0xb1aaf613), TOBN(0x31df210f, 0xea585a45),
+     TOBN(0x318cc10e, 0x24985034), TOBN(0x1a38efd1, 0x5f1d6130),
+     TOBN(0xbf86f237, 0x0b1e9e21), TOBN(0xb258514d, 0x1dbe88aa),
+     TOBN(0x1e38a588, 0x90c1baf9), TOBN(0x2936a01e, 0xbdb9b692),
+     TOBN(0xd576de98, 0x6dd5b20c), TOBN(0xb586bf71, 0x70f98ecf),
+     TOBN(0xcccf0f12, 0xc42d2fd7), TOBN(0x8717e61c, 0xfb35bd7b),
+     TOBN(0x8b1e5722, 0x35e6fc06), TOBN(0x3477728f, 0x0b3e13d5),
+     TOBN(0x150c294d, 0xaa8a7372), TOBN(0xc0291d43, 0x3bfa528a),
+     TOBN(0xc6c8bc67, 0xcec5a196), TOBN(0xdeeb31e4, 0x5c2e8a7c),
+     TOBN(0xba93e244, 0xfb6e1c51), TOBN(0xb9f8b71b, 0x2e28e156),
+     TOBN(0xce65a287, 0x968a2ab9), TOBN(0xe3c5ce69, 0x46bbcb1f),
+     TOBN(0xf8c835b9, 0xe7ae3f30), TOBN(0x16bbee26, 0xff72b82b),
+     TOBN(0x665e2017, 0xfd42cd22), TOBN(0x1e139970, 0xf8b1d2a0),
+     TOBN(0x125cda29, 0x79204932), TOBN(0x7aee94a5, 0x49c3bee5),
+     TOBN(0x68c70160, 0x89821a66), TOBN(0xf7c37678, 0x8f981669),
+     TOBN(0xd90829fc, 0x48cc3645), TOBN(0x346af049, 0xd70addfc),
+     TOBN(0x2057b232, 0x370bf29c), TOBN(0xf90c73ce, 0x42e650ee),
+     TOBN(0xe03386ea, 0xa126ab90), TOBN(0x0e266e7e, 0x975a087b),
+     TOBN(0x80578eb9, 0x0fca65d9), TOBN(0x7e2989ea, 0x16af45b8),
+     TOBN(0x7438212d, 0xcac75a4e), TOBN(0x38c7ca39, 0x4fef36b8),
+     TOBN(0x8650c494, 0xd402676a), TOBN(0x26ab5a66, 0xf72c7c48),
+     TOBN(0x4e6cb426, 0xce3a464e), TOBN(0xf8f99896, 0x2b72f841),
+     TOBN(0x8c318491, 0x1a335cc8), TOBN(0x563459ba, 0x6a5913e4),
+     TOBN(0x1b920d61, 0xc7b32919), TOBN(0x805ab8b6, 0xa02425ad),
+     TOBN(0x2ac512da, 0x8d006086), TOBN(0x6ca4846a, 0xbcf5c0fd),
+     TOBN(0xafea51d8, 0xac2138d7), TOBN(0xcb647545, 0x344cd443),
+     TOBN(0x0429ee8f, 0xbd7d9040), TOBN(0xee66a2de, 0x819b9c96),
+     TOBN(0x54f9ec25, 0xdea7d744), TOBN(0x2ffea642, 0x671721bb),
+     TOBN(0x4f19dbd1, 0x114344ea), TOBN(0x04304536, 0xfd0dbc8b),
+     TOBN(0x014b50aa, 0x29ec7f91), TOBN(0xb5fc22fe, 0xbb06014d),
+     TOBN(0x60d963a9, 0x1ee682e0), TOBN(0xdf48abc0, 0xfe85c727),
+     TOBN(0x0cadba13, 0x2e707c2d), TOBN(0xde608d3a, 0xa645aeff),
+     TOBN(0x05f1c28b, 0xedafd883), TOBN(0x3c362ede, 0xbd94de1f),
+     TOBN(0x8dd0629d, 0x13593e41), TOBN(0x0a5e736f, 0x766d6eaf),
+     TOBN(0xbfa92311, 0xf68cf9d1), TOBN(0xa4f9ef87, 0xc1797556),
+     TOBN(0x10d75a1f, 0x5601c209), TOBN(0x651c374c, 0x09b07361),
+     TOBN(0x49950b58, 0x88b5cead), TOBN(0x0ef00058, 0x6fa9dbaa),
+     TOBN(0xf51ddc26, 0x4e15f33a), TOBN(0x1f8b5ca6, 0x2ef46140),
+     TOBN(0x343ac0a3, 0xee9523f0), TOBN(0xbb75eab2, 0x975ea978),
+     TOBN(0x1bccf332, 0x107387f4), TOBN(0x790f9259, 0x9ab0062e),
+     TOBN(0xf1a363ad, 0x1e4f6a5f), TOBN(0x06e08b84, 0x62519a50),
+     TOBN(0x60915187, 0x7265f1ee), TOBN(0x6a80ca34, 0x93ae985e),
+     TOBN(0x81b29768, 0xaaba4864), TOBN(0xb13cabf2, 0x8d52a7d6),
+     TOBN(0xb5c36348, 0x8ead03f1), TOBN(0xc932ad95, 0x81c7c1c0),
+     TOBN(0x5452708e, 0xcae1e27b), TOBN(0x9dac4269, 0x1b0df648),
+     TOBN(0x233e3f0c, 0xdfcdb8bc), TOBN(0xe6ceccdf, 0xec540174),
+     TOBN(0xbd0d845e, 0x95081181), TOBN(0xcc8a7920, 0x699355d5),
+     TOBN(0x111c0f6d, 0xc3b375a8), TOBN(0xfd95bc6b, 0xfd51e0dc),
+     TOBN(0x4a106a26, 0x6888523a), TOBN(0x4d142bd6, 0xcb01a06d),
+     TOBN(0x79bfd289, 0xadb9b397), TOBN(0x0bdbfb94, 0xe9863914),
+     TOBN(0x29d8a229, 0x1660f6a6), TOBN(0x7f6abcd6, 0x551c042d),
+     TOBN(0x13039deb, 0x0ac3ffe8), TOBN(0xa01be628, 0xec8523fb),
+     TOBN(0x6ea34103, 0x0ca1c328), TOBN(0xc74114bd, 0xb903928e),
+     TOBN(0x8aa4ff4e, 0x9e9144b0), TOBN(0x7064091f, 0x7f9a4b17),
+     TOBN(0xa3f4f521, 0xe447f2c4), TOBN(0x81b8da7a, 0x604291f0),
+     TOBN(0xd680bc46, 0x7d5926de), TOBN(0x84f21fd5, 0x34a1202f),
+     TOBN(0x1d1e3181, 0x4e9df3d8), TOBN(0x1ca4861a, 0x39ab8d34),
+     TOBN(0x809ddeec, 0x5b19aa4a), TOBN(0x59f72f7e, 0x4d329366),
+     TOBN(0xa2f93f41, 0x386d5087), TOBN(0x40bf739c, 0xdd67d64f),
+     TOBN(0xb4494205, 0x66702158), TOBN(0xc33c65be, 0x73b1e178),
+     TOBN(0xcdcd657c, 0x38ca6153), TOBN(0x97f4519a, 0xdc791976),
+     TOBN(0xcc7c7f29, 0xcd6e1f39), TOBN(0x38de9cfb, 0x7e3c3932),
+     TOBN(0xe448eba3, 0x7b793f85), TOBN(0xe9f8dbf9, 0xf067e914),
+     TOBN(0xc0390266, 0xf114ae87), TOBN(0x39ed75a7, 0xcd6a8e2a),
+     TOBN(0xadb14848, 0x7ffba390), TOBN(0x67f8cb8b, 0x6af9bc09),
+     TOBN(0x322c3848, 0x9c7476db), TOBN(0xa320fecf, 0x52a538d6),
+     TOBN(0xe0493002, 0xb2aced2b), TOBN(0xdfba1809, 0x616bd430),
+     TOBN(0x531c4644, 0xc331be70), TOBN(0xbc04d32e, 0x90d2e450),
+     TOBN(0x1805a0d1, 0x0f9f142d), TOBN(0x2c44a0c5, 0x47ee5a23),
+     TOBN(0x31875a43, 0x3989b4e3), TOBN(0x6b1949fd, 0x0c063481),
+     TOBN(0x2dfb9e08, 0xbe0f4492), TOBN(0x3ff0da03, 0xe9d5e517),
+     TOBN(0x03dbe9a1, 0xf79466a8), TOBN(0x0b87bcd0, 0x15ea9932),
+     TOBN(0xeb64fc83, 0xab1f58ab), TOBN(0x6d9598da, 0x817edc8a),
+     TOBN(0x699cff66, 0x1d3b67e5), TOBN(0x645c0f29, 0x92635853),
+     TOBN(0x253cdd82, 0xeabaf21c), TOBN(0x82b9602a, 0x2241659e),
+     TOBN(0x2cae07ec, 0x2d9f7091), TOBN(0xbe4c720c, 0x8b48cd9b),
+     TOBN(0x6ce5bc03, 0x6f08d6c9), TOBN(0x36e8a997, 0xaf10bf40),
+     TOBN(0x83422d21, 0x3e10ff12), TOBN(0x7b26d3eb, 0xbcc12494),
+     TOBN(0xb240d2d0, 0xc9469ad6), TOBN(0xc4a11b4d, 0x30afa05b),
+     TOBN(0x4b604ace, 0xdd6ba286), TOBN(0x18486600, 0x3ee2864c),
+     TOBN(0x5869d6ba, 0x8d9ce5be), TOBN(0x0d8f68c5, 0xff4bfb0d),
+     TOBN(0xb69f210b, 0x5700cf73), TOBN(0x61f6653a, 0x6d37c135),
+     TOBN(0xff3d432b, 0x5aff5a48), TOBN(0x0d81c4b9, 0x72ba3a69),
+     TOBN(0xee879ae9, 0xfa1899ef), TOBN(0xbac7e2a0, 0x2d6acafd),
+     TOBN(0xd6d93f6c, 0x1c664399), TOBN(0x4c288de1, 0x5bcb135d),
+     TOBN(0x83031dab, 0x9dab7cbf), TOBN(0xfe23feb0, 0x3abbf5f0),
+     TOBN(0x9f1b2466, 0xcdedca85), TOBN(0x140bb710, 0x1a09538c),
+     TOBN(0xac8ae851, 0x5e11115d), TOBN(0x0d63ff67, 0x6f03f59e),
+     TOBN(0x755e5551, 0x7d234afb), TOBN(0x61c2db4e, 0x7e208fc1),
+     TOBN(0xaa9859ce, 0xf28a4b5d), TOBN(0xbdd6d4fc, 0x34af030f),
+     TOBN(0xd1c4a26d, 0x3be01cb1), TOBN(0x9ba14ffc, 0x243aa07c),
+     TOBN(0xf95cd3a9, 0xb2503502), TOBN(0xe379bc06, 0x7d2a93ab),
+     TOBN(0x3efc18e9, 0xd4ca8d68), TOBN(0x083558ec, 0x80bb412a),
+     TOBN(0xd903b940, 0x9645a968), TOBN(0xa499f0b6, 0x9ba6054f),
+     TOBN(0x208b573c, 0xb8349abe), TOBN(0x3baab3e5, 0x30b4fc1c),
+     TOBN(0x87e978ba, 0xcb524990), TOBN(0x3524194e, 0xccdf0e80),
+     TOBN(0x62711725, 0x7d4bcc42), TOBN(0xe90a3d9b, 0xb90109ba),
+     TOBN(0x3b1bdd57, 0x1323e1e0), TOBN(0xb78e9bd5, 0x5eae1599),
+     TOBN(0x0794b746, 0x9e03d278), TOBN(0x80178605, 0xd70e6297),
+     TOBN(0x171792f8, 0x99c97855), TOBN(0x11b393ee, 0xf5a86b5c),
+     TOBN(0x48ef6582, 0xd8884f27), TOBN(0xbd44737a, 0xbf19ba5f),
+     TOBN(0x8698de4c, 0xa42062c6), TOBN(0x8975eb80, 0x61ce9c54),
+     TOBN(0xd50e57c7, 0xd7fe71f3), TOBN(0x15342190, 0xbc97ce38),
+     TOBN(0x51bda2de, 0x4df07b63), TOBN(0xba12aeae, 0x200eb87d),
+     TOBN(0xabe135d2, 0xa9b4f8f6), TOBN(0x04619d65, 0xfad6d99c),
+     TOBN(0x4a6683a7, 0x7994937c), TOBN(0x7a778c8b, 0x6f94f09a),
+     TOBN(0x8c508623, 0x20a71b89), TOBN(0x241a2aed, 0x1c229165),
+     TOBN(0x352be595, 0xaaf83a99), TOBN(0x9fbfee7f, 0x1562bac8),
+     TOBN(0xeaf658b9, 0x5c4017e3), TOBN(0x1dc7f9e0, 0x15120b86),
+     TOBN(0xd84f13dd, 0x4c034d6f), TOBN(0x283dd737, 0xeaea3038),
+     TOBN(0x197f2609, 0xcd85d6a2), TOBN(0x6ebbc345, 0xfae60177),
+     TOBN(0xb80f031b, 0x4e12fede), TOBN(0xde55d0c2, 0x07a2186b),
+     TOBN(0x1fb3e37f, 0x24dcdd5a), TOBN(0x8d602da5, 0x7ed191fb),
+     TOBN(0x108fb056, 0x76023e0d), TOBN(0x70178c71, 0x459c20c0),
+     TOBN(0xfad5a386, 0x3fe54cf0), TOBN(0xa4a3ec4f, 0x02bbb475),
+     TOBN(0x1aa5ec20, 0x919d94d7), TOBN(0x5d3b63b5, 0xa81e4ab3),
+     TOBN(0x7fa733d8, 0x5ad3d2af), TOBN(0xfbc586dd, 0xd1ac7a37),
+     TOBN(0x282925de, 0x40779614), TOBN(0xfe0ffffb, 0xe74a242a),
+     TOBN(0x3f39e67f, 0x906151e5), TOBN(0xcea27f5f, 0x55e10649),
+     TOBN(0xdca1d4e1, 0xc17cf7b7), TOBN(0x0c326d12, 0x2fe2362d),
+     TOBN(0x05f7ac33, 0x7dd35df3), TOBN(0x0c3b7639, 0xc396dbdf),
+     TOBN(0x0912f5ac, 0x03b7db1c), TOBN(0x9dea4b70, 0x5c9ed4a9),
+     TOBN(0x475e6e53, 0xaae3f639), TOBN(0xfaba0e7c, 0xfc278bac),
+     TOBN(0x16f9e221, 0x9490375f), TOBN(0xaebf9746, 0xa5a7ed0a),
+     TOBN(0x45f9af3f, 0xf41ad5d6), TOBN(0x03c4623c, 0xb2e99224),
+     TOBN(0x82c5bb5c, 0xb3cf56aa), TOBN(0x64311819, 0x34567ed3),
+     TOBN(0xec57f211, 0x8be489ac), TOBN(0x2821895d, 0xb9a1104b),
+     TOBN(0x610dc875, 0x6064e007), TOBN(0x8e526f3f, 0x5b20d0fe),
+     TOBN(0x6e71ca77, 0x5b645aee), TOBN(0x3d1dcb9f, 0x800e10ff),
+     TOBN(0x36b51162, 0x189cf6de), TOBN(0x2c5a3e30, 0x6bb17353),
+     TOBN(0xc186cd3e, 0x2a6c6fbf), TOBN(0xa74516fa, 0x4bf97906),
+     TOBN(0x5b4b8f4b, 0x279d6901), TOBN(0x0c4e57b4, 0x2b573743),
+     TOBN(0x75fdb229, 0xb6e386b6), TOBN(0xb46793fd, 0x99deac27),
+     TOBN(0xeeec47ea, 0xcf712629), TOBN(0xe965f3c4, 0xcbc3b2dd),
+     TOBN(0x8dd1fb83, 0x425c6559), TOBN(0x7fc00ee6, 0x0af06fda),
+     TOBN(0xe98c9225, 0x33d956df), TOBN(0x0f1ef335, 0x4fbdc8a2),
+     TOBN(0x2abb5145, 0xb79b8ea2), TOBN(0x40fd2945, 0xbdbff288),
+     TOBN(0x6a814ac4, 0xd7185db7), TOBN(0xc4329d6f, 0xc084609a),
+     TOBN(0xc9ba7b52, 0xed1be45d), TOBN(0x891dd20d, 0xe4cd2c74),
+     TOBN(0x5a4d4a7f, 0x824139b1), TOBN(0x66c17716, 0xb873c710),
+     TOBN(0x5e5bc141, 0x2843c4e0), TOBN(0xd5ac4817, 0xb97eb5bf),
+     TOBN(0xc0f8af54, 0x450c95c7), TOBN(0xc91b3fa0, 0x318406c5),
+     TOBN(0x360c340a, 0xab9d97f8), TOBN(0xfb57bd07, 0x90a2d611),
+     TOBN(0x4339ae3c, 0xa6a6f7e5), TOBN(0x9c1fcd2a, 0x2feb8a10),
+     TOBN(0x972bcca9, 0xc7ea7432), TOBN(0x1b0b924c, 0x308076f6),
+     TOBN(0x80b2814a, 0x2a5b4ca5), TOBN(0x2f78f55b, 0x61ef3b29),
+     TOBN(0xf838744a, 0xc18a414f), TOBN(0xc611eaae, 0x903d0a86),
+     TOBN(0x94dabc16, 0x2a453f55), TOBN(0xe6f2e3da, 0x14efb279),
+     TOBN(0x5b7a6017, 0x9320dc3c), TOBN(0x692e382f, 0x8df6b5a4),
+     TOBN(0x3f5e15e0, 0x2d40fa90), TOBN(0xc87883ae, 0x643dd318),
+     TOBN(0x511053e4, 0x53544774), TOBN(0x834d0ecc, 0x3adba2bc),
+     TOBN(0x4215d7f7, 0xbae371f5), TOBN(0xfcfd57bf, 0x6c8663bc),
+     TOBN(0xded2383d, 0xd6901b1d), TOBN(0x3b49fbb4, 0xb5587dc3),
+     TOBN(0xfd44a08d, 0x07625f62), TOBN(0x3ee4d65b, 0x9de9b762),}
+    ,
+    {TOBN(0x64e5137d, 0x0d63d1fa), TOBN(0x658fc052, 0x02a9d89f),
+     TOBN(0x48894874, 0x50436309), TOBN(0xe9ae30f8, 0xd598da61),
+     TOBN(0x2ed710d1, 0x818baf91), TOBN(0xe27e9e06, 0x8b6a0c20),
+     TOBN(0x1e28dcfb, 0x1c1a6b44), TOBN(0x883acb64, 0xd6ac57dc),
+     TOBN(0x8735728d, 0xc2c6ff70), TOBN(0x79d6122f, 0xc5dc2235),
+     TOBN(0x23f5d003, 0x19e277f9), TOBN(0x7ee84e25, 0xdded8cc7),
+     TOBN(0x91a8afb0, 0x63cd880a), TOBN(0x3f3ea7c6, 0x3574af60),
+     TOBN(0x0cfcdc84, 0x02de7f42), TOBN(0x62d0792f, 0xb31aa152),
+     TOBN(0x8e1b4e43, 0x8a5807ce), TOBN(0xad283893, 0xe4109a7e),
+     TOBN(0xc30cc9cb, 0xafd59dda), TOBN(0xf65f36c6, 0x3d8d8093),
+     TOBN(0xdf31469e, 0xa60d32b2), TOBN(0xee93df4b, 0x3e8191c8),
+     TOBN(0x9c1017c5, 0x355bdeb5), TOBN(0xd2623185, 0x8616aa28),
+     TOBN(0xb02c83f9, 0xdec31a21), TOBN(0x988c8b23, 0x6ad9d573),
+     TOBN(0x53e983ae, 0xa57be365), TOBN(0xe968734d, 0x646f834e),
+     TOBN(0x9137ea8f, 0x5da6309b), TOBN(0x10f3a624, 0xc1f1ce16),
+     TOBN(0x782a9ea2, 0xca440921), TOBN(0xdf94739e, 0x5b46f1b5),
+     TOBN(0x9f9be006, 0xcce85c9b), TOBN(0x360e70d6, 0xa4c7c2d3),
+     TOBN(0x2cd5beea, 0xaefa1e60), TOBN(0x64cf63c0, 0x8c3d2b6d),
+     TOBN(0xfb107fa3, 0xe1cf6f90), TOBN(0xb7e937c6, 0xd5e044e6),
+     TOBN(0x74e8ca78, 0xce34db9f), TOBN(0x4f8b36c1, 0x3e210bd0),
+     TOBN(0x1df165a4, 0x34a35ea8), TOBN(0x3418e0f7, 0x4d4412f6),
+     TOBN(0x5af1f8af, 0x518836c3), TOBN(0x42ceef4d, 0x130e1965),
+     TOBN(0x5560ca0b, 0x543a1957), TOBN(0xc33761e5, 0x886cb123),
+     TOBN(0x66624b1f, 0xfe98ed30), TOBN(0xf772f4bf, 0x1090997d),
+     TOBN(0xf4e540bb, 0x4885d410), TOBN(0x7287f810, 0x9ba5f8d7),
+     TOBN(0x22d0d865, 0xde98dfb1), TOBN(0x49ff51a1, 0xbcfbb8a3),
+     TOBN(0xb6b6fa53, 0x6bc3012e), TOBN(0x3d31fd72, 0x170d541d),
+     TOBN(0x8018724f, 0x4b0f4966), TOBN(0x79e7399f, 0x87dbde07),
+     TOBN(0x56f8410e, 0xf4f8b16a), TOBN(0x97241afe, 0xc47b266a),
+     TOBN(0x0a406b8e, 0x6d9c87c1), TOBN(0x803f3e02, 0xcd42ab1b),
+     TOBN(0x7f0309a8, 0x04dbec69), TOBN(0xa83b85f7, 0x3bbad05f),
+     TOBN(0xc6097273, 0xad8e197f), TOBN(0xc097440e, 0x5067adc1),
+     TOBN(0x730eafb6, 0x3524ff16), TOBN(0xd7f9b51e, 0x823fc6ce),
+     TOBN(0x27bd0d32, 0x443e4ac0), TOBN(0x40c59ad9, 0x4d66f217),
+     TOBN(0x6c33136f, 0x17c387a4), TOBN(0x5043b8d5, 0xeb86804d),
+     TOBN(0x74970312, 0x675a73c9), TOBN(0x838fdb31, 0xf16669b6),
+     TOBN(0xc507b6dd, 0x418e7ddd), TOBN(0x39888d93, 0x472f19d6),
+     TOBN(0x7eae26be, 0x0c27eb4d), TOBN(0x17b53ed3, 0xfbabb884),
+     TOBN(0xfc27021b, 0x2b01ae4f), TOBN(0x88462e87, 0xcf488682),
+     TOBN(0xbee096ec, 0x215e2d87), TOBN(0xeb2fea9a, 0xd242e29b),
+     TOBN(0x5d985b5f, 0xb821fc28), TOBN(0x89d2e197, 0xdc1e2ad2),
+     TOBN(0x55b566b8, 0x9030ba62), TOBN(0xe3fd41b5, 0x4f41b1c6),
+     TOBN(0xb738ac2e, 0xb9a96d61), TOBN(0x7f8567ca, 0x369443f4),
+     TOBN(0x8698622d, 0xf803a440), TOBN(0x2b586236, 0x8fe2f4dc),
+     TOBN(0xbbcc00c7, 0x56b95bce), TOBN(0x5ec03906, 0x616da680),
+     TOBN(0x79162ee6, 0x72214252), TOBN(0x43132b63, 0x86a892d2),
+     TOBN(0x4bdd3ff2, 0x2f3263bf), TOBN(0xd5b3733c, 0x9cd0a142),
+     TOBN(0x592eaa82, 0x44415ccb), TOBN(0x663e8924, 0x8d5474ea),
+     TOBN(0x8058a25e, 0x5236344e), TOBN(0x82e8df9d, 0xbda76ee6),
+     TOBN(0xdcf6efd8, 0x11cc3d22), TOBN(0x00089cda, 0x3b4ab529),
+     TOBN(0x91d3a071, 0xbd38a3db), TOBN(0x4ea97fc0, 0xef72b925),
+     TOBN(0x0c9fc15b, 0xea3edf75), TOBN(0x5a6297cd, 0xa4348ed3),
+     TOBN(0x0d38ab35, 0xce7c42d4), TOBN(0x9fd493ef, 0x82feab10),
+     TOBN(0x46056b6d, 0x82111b45), TOBN(0xda11dae1, 0x73efc5c3),
+     TOBN(0xdc740278, 0x5545a7fb), TOBN(0xbdb2601c, 0x40d507e6),
+     TOBN(0x121dfeeb, 0x7066fa58), TOBN(0x214369a8, 0x39ae8c2a),
+     TOBN(0x195709cb, 0x06e0956c), TOBN(0x4c9d254f, 0x010cd34b),
+     TOBN(0xf51e13f7, 0x0471a532), TOBN(0xe19d6791, 0x1e73054d),
+     TOBN(0xf702a628, 0xdb5c7be3), TOBN(0xc7141218, 0xb24dde05),
+     TOBN(0xdc18233c, 0xf29b2e2e), TOBN(0x3a6bd1e8, 0x85342dba),
+     TOBN(0x3f747fa0, 0xb311898c), TOBN(0xe2a272e4, 0xcd0eac65),
+     TOBN(0x4bba5851, 0xf914d0bc), TOBN(0x7a1a9660, 0xc4a43ee3),
+     TOBN(0xe5a367ce, 0xa1c8cde9), TOBN(0x9d958ba9, 0x7271abe3),
+     TOBN(0xf3ff7eb6, 0x3d1615cd), TOBN(0xa2280dce, 0xf5ae20b0),
+     TOBN(0x56dba5c1, 0xcf640147), TOBN(0xea5a2e3d, 0x5e83d118),
+     TOBN(0x04cd6b6d, 0xda24c511), TOBN(0x1c0f4671, 0xe854d214),
+     TOBN(0x91a6b7a9, 0x69565381), TOBN(0xdc966240, 0xdecf1f5b),
+     TOBN(0x1b22d21c, 0xfcf5d009), TOBN(0x2a05f641, 0x9021dbd5),
+     TOBN(0x8c0ed566, 0xd4312483), TOBN(0x5179a95d, 0x643e216f),
+     TOBN(0xcc185fec, 0x17044493), TOBN(0xb3063339, 0x54991a21),
+     TOBN(0xd801ecdb, 0x0081a726), TOBN(0x0149b0c6, 0x4fa89bbb),
+     TOBN(0xafe9065a, 0x4391b6b9), TOBN(0xedc92786, 0xd633f3a3),
+     TOBN(0xe408c24a, 0xae6a8e13), TOBN(0x85833fde, 0x9f3897ab),
+     TOBN(0x43800e7e, 0xd81a0715), TOBN(0xde08e346, 0xb44ffc5f),
+     TOBN(0x7094184c, 0xcdeff2e0), TOBN(0x49f9387b, 0x165eaed1),
+     TOBN(0x635d6129, 0x777c468a), TOBN(0x8c0dcfd1, 0x538c2dd8),
+     TOBN(0xd6d9d9e3, 0x7a6a308b), TOBN(0x62375830, 0x4c2767d3),
+     TOBN(0x874a8bc6, 0xf38cbeb6), TOBN(0xd94d3f1a, 0xccb6fd9e),
+     TOBN(0x92a9735b, 0xba21f248), TOBN(0x272ad0e5, 0x6cd1efb0),
+     TOBN(0x7437b69c, 0x05b03284), TOBN(0xe7f04702, 0x6948c225),
+     TOBN(0x8a56c04a, 0xcba2ecec), TOBN(0x0c181270, 0xe3a73e41),
+     TOBN(0x6cb34e9d, 0x03e93725), TOBN(0xf77c8713, 0x496521a9),
+     TOBN(0x94569183, 0xfa7f9f90), TOBN(0xf2e7aa4c, 0x8c9707ad),
+     TOBN(0xced2c9ba, 0x26c1c9a3), TOBN(0x9109fe96, 0x40197507),
+     TOBN(0x9ae868a9, 0xe9adfe1c), TOBN(0x3984403d, 0x314e39bb),
+     TOBN(0xb5875720, 0xf2fe378f), TOBN(0x33f901e0, 0xba44a628),
+     TOBN(0xea1125fe, 0x3652438c), TOBN(0xae9ec4e6, 0x9dd1f20b),
+     TOBN(0x1e740d9e, 0xbebf7fbd), TOBN(0x6dbd3ddc, 0x42dbe79c),
+     TOBN(0x62082aec, 0xedd36776), TOBN(0xf612c478, 0xe9859039),
+     TOBN(0xa493b201, 0x032f7065), TOBN(0xebd4d8f2, 0x4ff9b211),
+     TOBN(0x3f23a0aa, 0xaac4cb32), TOBN(0xea3aadb7, 0x15ed4005),
+     TOBN(0xacf17ea4, 0xafa27e63), TOBN(0x56125c1a, 0xc11fd66c),
+     TOBN(0x266344a4, 0x3794f8dc), TOBN(0xdcca923a, 0x483c5c36),
+     TOBN(0x2d6b6bbf, 0x3f9d10a0), TOBN(0xb320c5ca, 0x81d9bdf3),
+     TOBN(0x620e28ff, 0x47b50a95), TOBN(0x933e3b01, 0xcef03371),
+     TOBN(0xf081bf85, 0x99100153), TOBN(0x183be9a0, 0xc3a8c8d6),
+     TOBN(0x4e3ddc5a, 0xd6bbe24d), TOBN(0xc6c74630, 0x53843795),
+     TOBN(0x78193dd7, 0x65ec2d4c), TOBN(0xb8df26cc, 0xcd3c89b2),
+     TOBN(0x98dbe399, 0x5a483f8d), TOBN(0x72d8a957, 0x7dd3313a),
+     TOBN(0x65087294, 0xab0bd375), TOBN(0xfcd89248, 0x7c259d16),
+     TOBN(0x8a9443d7, 0x7613aa81), TOBN(0x80100800, 0x85fe6584),
+     TOBN(0x70fc4dbc, 0x7fb10288), TOBN(0xf58280d3, 0xe86beee8),
+     TOBN(0x14fdd82f, 0x7c978c38), TOBN(0xdf1204c1, 0x0de44d7b),
+     TOBN(0xa08a1c84, 0x4160252f), TOBN(0x591554ca, 0xc17646a5),
+     TOBN(0x214a37d6, 0xa05bd525), TOBN(0x48d5f09b, 0x07957b3c),
+     TOBN(0x0247cdcb, 0xd7109bc9), TOBN(0x40f9e4bb, 0x30599ce7),
+     TOBN(0xc325fa03, 0xf46ad2ec), TOBN(0x00f766cf, 0xc3e3f9ee),
+     TOBN(0xab556668, 0xd43a4577), TOBN(0x68d30a61, 0x3ee03b93),
+     TOBN(0x7ddc81ea, 0x77b46a08), TOBN(0xcf5a6477, 0xc7480699),
+     TOBN(0x43a8cb34, 0x6633f683), TOBN(0x1b867e6b, 0x92363c60),
+     TOBN(0x43921114, 0x1f60558e), TOBN(0xcdbcdd63, 0x2f41450e),
+     TOBN(0x7fc04601, 0xcc630e8b), TOBN(0xea7c66d5, 0x97038b43),
+     TOBN(0x7259b8a5, 0x04e99fd8), TOBN(0x98a8dd12, 0x4785549a),
+     TOBN(0x0e459a7c, 0x840552e1), TOBN(0xcdfcf4d0, 0x4bb0909e),
+     TOBN(0x34a86db2, 0x53758da7), TOBN(0xe643bb83, 0xeac997e1),
+     TOBN(0x96400bd7, 0x530c5b7e), TOBN(0x9f97af87, 0xb41c8b52),
+     TOBN(0x34fc8820, 0xfbeee3f9), TOBN(0x93e53490, 0x49091afd),
+     TOBN(0x764b9be5, 0x9a31f35c), TOBN(0x71f37864, 0x57e3d924),
+     TOBN(0x02fb34e0, 0x943aa75e), TOBN(0xa18c9c58, 0xab8ff6e4),
+     TOBN(0x080f31b1, 0x33cf0d19), TOBN(0x5c9682db, 0x083518a7),
+     TOBN(0x873d4ca6, 0xb709c3de), TOBN(0x64a84262, 0x3575b8f0),
+     TOBN(0x6275da1f, 0x020154bb), TOBN(0x97678caa, 0xd17cf1ab),
+     TOBN(0x8779795f, 0x951a95c3), TOBN(0xdd35b163, 0x50fccc08),
+     TOBN(0x32709627, 0x33d8f031), TOBN(0x3c5ab10a, 0x498dd85c),
+     TOBN(0xb6c185c3, 0x41dca566), TOBN(0x7de7feda, 0xd8622aa3),
+     TOBN(0x99e84d92, 0x901b6dfb), TOBN(0x30a02b0e, 0x7c4ad288),
+     TOBN(0xc7c81daa, 0x2fd3cf36), TOBN(0xd1319547, 0xdf89e59f),
+     TOBN(0xb2be8184, 0xcd496733), TOBN(0xd5f449eb, 0x93d3412b),
+     TOBN(0x7ea41b1b, 0x25fe531d), TOBN(0xf9797432, 0x6a1d5646),
+     TOBN(0x86067f72, 0x2bde501a), TOBN(0xf91481c0, 0x0c85e89c),
+     TOBN(0xca8ee465, 0xf8b05bc6), TOBN(0x1844e1cf, 0x02e83cda),
+     TOBN(0xca82114a, 0xb4dbe33b), TOBN(0x0f9f8769, 0x4eabfde2),
+     TOBN(0x4936b1c0, 0x38b27fe2), TOBN(0x63b6359b, 0xaba402df),
+     TOBN(0x40c0ea2f, 0x656bdbab), TOBN(0x9c992a89, 0x6580c39c),
+     TOBN(0x600e8f15, 0x2a60aed1), TOBN(0xeb089ca4, 0xe0bf49df),
+     TOBN(0x9c233d7d, 0x2d42d99a), TOBN(0x648d3f95, 0x4c6bc2fa),
+     TOBN(0xdcc383a8, 0xe1add3f3), TOBN(0xf42c0c6a, 0x4f64a348),
+     TOBN(0x2abd176f, 0x0030dbdb), TOBN(0x4de501a3, 0x7d6c215e),
+     TOBN(0x4a107c1f, 0x4b9a64bc), TOBN(0xa77f0ad3, 0x2496cd59),
+     TOBN(0xfb78ac62, 0x7688dffb), TOBN(0x7025a2ca, 0x67937d8e),
+     TOBN(0xfde8b2d1, 0xd1a8f4e7), TOBN(0xf5b3da47, 0x7354927c),
+     TOBN(0xe48606a3, 0xd9205735), TOBN(0xac477cc6, 0xe177b917),
+     TOBN(0xfb1f73d2, 0xa883239a), TOBN(0xe12572f6, 0xcc8b8357),
+     TOBN(0x9d355e9c, 0xfb1f4f86), TOBN(0x89b795f8, 0xd9f3ec6e),
+     TOBN(0x27be56f1, 0xb54398dc), TOBN(0x1890efd7, 0x3fedeed5),
+     TOBN(0x62f77f1f, 0x9c6d0140), TOBN(0x7ef0e314, 0x596f0ee4),
+     TOBN(0x50ca6631, 0xcc61dab3), TOBN(0x4a39801d, 0xf4866e4f),
+     TOBN(0x66c8d032, 0xae363b39), TOBN(0x22c591e5, 0x2ead66aa),
+     TOBN(0x954ba308, 0xde02a53e), TOBN(0x2a6c060f, 0xd389f357),
+     TOBN(0xe6cfcde8, 0xfbf40b66), TOBN(0x8e02fc56, 0xc6340ce1),
+     TOBN(0xe4957795, 0x73adb4ba), TOBN(0x7b86122c, 0xa7b03805),
+     TOBN(0x63f83512, 0x0c8e6fa6), TOBN(0x83660ea0, 0x057d7804),
+     TOBN(0xbad79105, 0x21ba473c), TOBN(0xb6c50bee, 0xded5389d),
+     TOBN(0xee2caf4d, 0xaa7c9bc0), TOBN(0xd97b8de4, 0x8c4e98a7),
+     TOBN(0xa9f63e70, 0xab3bbddb), TOBN(0x3898aabf, 0x2597815a),
+     TOBN(0x7659af89, 0xac15b3d9), TOBN(0xedf7725b, 0x703ce784),
+     TOBN(0x25470fab, 0xe085116b), TOBN(0x04a43375, 0x87285310),
+     TOBN(0x4e39187e, 0xe2bfd52f), TOBN(0x36166b44, 0x7d9ebc74),
+     TOBN(0x92ad433c, 0xfd4b322c), TOBN(0x726aa817, 0xba79ab51),
+     TOBN(0xf96eacd8, 0xc1db15eb), TOBN(0xfaf71e91, 0x0476be63),
+     TOBN(0xdd69a640, 0x641fad98), TOBN(0xb7995918, 0x29622559),
+     TOBN(0x03c6daa5, 0xde4199dc), TOBN(0x92cadc97, 0xad545eb4),
+     TOBN(0x1028238b, 0x256534e4), TOBN(0x73e80ce6, 0x8595409a),
+     TOBN(0x690d4c66, 0xd05dc59b), TOBN(0xc95f7b8f, 0x981dee80),
+     TOBN(0xf4337014, 0xd856ac25), TOBN(0x441bd9dd, 0xac524dca),
+     TOBN(0x640b3d85, 0x5f0499f5), TOBN(0x39cf84a9, 0xd5fda182),
+     TOBN(0x04e7b055, 0xb2aa95a0), TOBN(0x29e33f0a, 0x0ddf1860),
+     TOBN(0x082e74b5, 0x423f6b43), TOBN(0x217edeb9, 0x0aaa2b0f),
+     TOBN(0x58b83f35, 0x83cbea55), TOBN(0xc485ee4d, 0xbc185d70),
+     TOBN(0x833ff03b, 0x1e5f6992), TOBN(0xb5b9b9cc, 0xcf0c0dd5),
+     TOBN(0x7caaee8e, 0x4e9e8a50), TOBN(0x462e907b, 0x6269dafd),
+     TOBN(0x6ed5cee9, 0xfbe791c6), TOBN(0x68ca3259, 0xed430790),
+     TOBN(0x2b72bdf2, 0x13b5ba88), TOBN(0x60294c8a, 0x35ef0ac4),
+     TOBN(0x9c3230ed, 0x19b99b08), TOBN(0x560fff17, 0x6c2589aa),
+     TOBN(0x552b8487, 0xd6770374), TOBN(0xa373202d, 0x9a56f685),
+     TOBN(0xd3e7f907, 0x45f175d9), TOBN(0x3c2f315f, 0xd080d810),
+     TOBN(0x1130e9dd, 0x7b9520e8), TOBN(0xc078f9e2, 0x0af037b5),
+     TOBN(0x38cd2ec7, 0x1e9c104c), TOBN(0x0f684368, 0xc472fe92),
+     TOBN(0xd3f1b5ed, 0x6247e7ef), TOBN(0xb32d33a9, 0x396dfe21),
+     TOBN(0x46f59cf4, 0x4a9aa2c2), TOBN(0x69cd5168, 0xff0f7e41),
+     TOBN(0x3f59da0f, 0x4b3234da), TOBN(0xcf0b0235, 0xb4579ebe),
+     TOBN(0x6d1cbb25, 0x6d2476c7), TOBN(0x4f0837e6, 0x9dc30f08),
+     TOBN(0x9a4075bb, 0x906f6e98), TOBN(0x253bb434, 0xc761e7d1),
+     TOBN(0xde2e645f, 0x6e73af10), TOBN(0xb89a4060, 0x0c5f131c),
+     TOBN(0xd12840c5, 0xb8cc037f), TOBN(0x3d093a5b, 0x7405bb47),
+     TOBN(0x6202c253, 0x206348b8), TOBN(0xbf5d57fc, 0xc55a3ca7),
+     TOBN(0x89f6c90c, 0x8c3bef48), TOBN(0x23ac7623, 0x5a0a960a),
+     TOBN(0xdfbd3d6b, 0x552b42ab), TOBN(0x3ef22458, 0x132061f6),
+     TOBN(0xd74e9bda, 0xc97e6516), TOBN(0x88779360, 0xc230f49e),
+     TOBN(0xa6ec1de3, 0x1e74ea49), TOBN(0x581dcee5, 0x3fb645a2),
+     TOBN(0xbaef2391, 0x8f483f14), TOBN(0x6d2dddfc, 0xd137d13b),
+     TOBN(0x54cde50e, 0xd2743a42), TOBN(0x89a34fc5, 0xe4d97e67),
+     TOBN(0x13f1f5b3, 0x12e08ce5), TOBN(0xa80540b8, 0xa7f0b2ca),
+     TOBN(0x854bcf77, 0x01982805), TOBN(0xb8653ffd, 0x233bea04),
+     TOBN(0x8e7b8787, 0x02b0b4c9), TOBN(0x2675261f, 0x9acb170a),
+     TOBN(0x061a9d90, 0x930c14e5), TOBN(0xb59b30e0, 0xdef0abea),
+     TOBN(0x1dc19ea6, 0x0200ec7d), TOBN(0xb6f4a3f9, 0x0bce132b),
+     TOBN(0xb8d5de90, 0xf13e27e0), TOBN(0xbaee5ef0, 0x1fade16f),
+     TOBN(0x6f406aaa, 0xe4c6cf38), TOBN(0xab4cfe06, 0xd1369815),
+     TOBN(0x0dcffe87, 0xefd550c6), TOBN(0x9d4f59c7, 0x75ff7d39),
+     TOBN(0xb02553b1, 0x51deb6ad), TOBN(0x812399a4, 0xb1877749),
+     TOBN(0xce90f71f, 0xca6006e1), TOBN(0xc32363a6, 0xb02b6e77),
+     TOBN(0x02284fbe, 0xdc36c64d), TOBN(0x86c81e31, 0xa7e1ae61),
+     TOBN(0x2576c7e5, 0xb909d94a), TOBN(0x8b6f7d02, 0x818b2bb0),
+     TOBN(0xeca3ed07, 0x56faa38a), TOBN(0xa3790e6c, 0x9305bb54),
+     TOBN(0xd784eeda, 0x7bc73061), TOBN(0xbd56d369, 0x6dd50614),
+     TOBN(0xd6575949, 0x229a8aa9), TOBN(0xdcca8f47, 0x4595ec28),
+     TOBN(0x814305c1, 0x06ab4fe6), TOBN(0xc8c39768, 0x24f43f16),
+     TOBN(0xe2a45f36, 0x523f2b36), TOBN(0x995c6493, 0x920d93bb),
+     TOBN(0xf8afdab7, 0x90f1632b), TOBN(0x79ebbecd, 0x1c295954),
+     TOBN(0xc7bb3ddb, 0x79592f48), TOBN(0x67216a7b, 0x5f88e998),
+     TOBN(0xd91f098b, 0xbc01193e), TOBN(0xf7d928a5, 0xb1db83fc),
+     TOBN(0x55e38417, 0xe991f600), TOBN(0x2a91113e, 0x2981a934),
+     TOBN(0xcbc9d648, 0x06b13bde), TOBN(0xb011b6ac, 0x0755ff44),
+     TOBN(0x6f4cb518, 0x045ec613), TOBN(0x522d2d31, 0xc2f5930a),
+     TOBN(0x5acae1af, 0x382e65de), TOBN(0x57643067, 0x27bc966f),
+     TOBN(0x5e12705d, 0x1c7193f0), TOBN(0xf0f32f47, 0x3be8858e),
+     TOBN(0x785c3d7d, 0x96c6dfc7), TOBN(0xd75b4a20, 0xbf31795d),
+     TOBN(0x91acf17b, 0x342659d4), TOBN(0xe596ea34, 0x44f0378f),
+     TOBN(0x4515708f, 0xce52129d), TOBN(0x17387e1e, 0x79f2f585),
+     TOBN(0x72cfd2e9, 0x49dee168), TOBN(0x1ae05223, 0x3e2af239),
+     TOBN(0x009e75be, 0x1d94066a), TOBN(0x6cca31c7, 0x38abf413),
+     TOBN(0xb50bd61d, 0x9bc49908), TOBN(0x4a9b4a8c, 0xf5e2bc1e),
+     TOBN(0xeb6cc5f7, 0x946f83ac), TOBN(0x27da93fc, 0xebffab28),
+     TOBN(0xea314c96, 0x4821c8c5), TOBN(0x8de49ded, 0xa83c15f4),
+     TOBN(0x7a64cf20, 0x7af33004), TOBN(0x45f1bfeb, 0xc9627e10),
+     TOBN(0x878b0626, 0x54b9df60), TOBN(0x5e4fdc3c, 0xa95c0b33),
+     TOBN(0xe54a37ca, 0xc2035d8e), TOBN(0x9087cda9, 0x80f20b8c),
+     TOBN(0x36f61c23, 0x8319ade4), TOBN(0x766f287a, 0xde8cfdf8),
+     TOBN(0x48821948, 0x346f3705), TOBN(0x49a7b853, 0x16e4f4a2),
+     TOBN(0xb9b3f8a7, 0x5cedadfd), TOBN(0x8f562815, 0x8db2a815),
+     TOBN(0xc0b7d554, 0x01f68f95), TOBN(0x12971e27, 0x688a208e),
+     TOBN(0xc9f8b696, 0xd0ff34fc), TOBN(0x20824de2, 0x1222718c),
+     TOBN(0x7213cf9f, 0x0c95284d), TOBN(0xe2ad741b, 0xdc158240),
+     TOBN(0x0ee3a6df, 0x54043ccf), TOBN(0x16ff479b, 0xd84412b3),
+     TOBN(0xf6c74ee0, 0xdfc98af0), TOBN(0xa78a169f, 0x52fcd2fb),
+     TOBN(0xd8ae8746, 0x99c930e9), TOBN(0x1d33e858, 0x49e117a5),
+     TOBN(0x7581fcb4, 0x6624759f), TOBN(0xde50644f, 0x5bedc01d),
+     TOBN(0xbeec5d00, 0xcaf3155e), TOBN(0x672d66ac, 0xbc73e75f),
+     TOBN(0x86b9d8c6, 0x270b01db), TOBN(0xd249ef83, 0x50f55b79),
+     TOBN(0x6131d6d4, 0x73978fe3), TOBN(0xcc4e4542, 0x754b00a1),
+     TOBN(0x4e05df05, 0x57dfcfe9), TOBN(0x94b29cdd, 0x51ef6bf0),
+     TOBN(0xe4530cff, 0x9bc7edf2), TOBN(0x8ac236fd, 0xd3da65f3),
+     TOBN(0x0faf7d5f, 0xc8eb0b48), TOBN(0x4d2de14c, 0x660eb039),
+     TOBN(0xc006bba7, 0x60430e54), TOBN(0x10a2d0d6, 0xda3289ab),
+     TOBN(0x9c037a5d, 0xd7979c59), TOBN(0x04d1f3d3, 0xa116d944),
+     TOBN(0x9ff22473, 0x8a0983cd), TOBN(0x28e25b38, 0xc883cabb),
+     TOBN(0xe968dba5, 0x47a58995), TOBN(0x2c80b505, 0x774eebdf),
+     TOBN(0xee763b71, 0x4a953beb), TOBN(0x502e223f, 0x1642e7f6),
+     TOBN(0x6fe4b641, 0x61d5e722), TOBN(0x9d37c5b0, 0xdbef5316),
+     TOBN(0x0115ed70, 0xf8330bc7), TOBN(0x139850e6, 0x75a72789),
+     TOBN(0x27d7faec, 0xffceccc2), TOBN(0x3016a860, 0x4fd9f7f6),
+     TOBN(0xc492ec64, 0x4cd8f64c), TOBN(0x58a2d790, 0x279d7b51),
+     TOBN(0x0ced1fc5, 0x1fc75256), TOBN(0x3e658aed, 0x8f433017),
+     TOBN(0x0b61942e, 0x05da59eb), TOBN(0xba3d60a3, 0x0ddc3722),
+     TOBN(0x7c311cd1, 0x742e7f87), TOBN(0x6473ffee, 0xf6b01b6e),}
+    ,
+    {TOBN(0x8303604f, 0x692ac542), TOBN(0xf079ffe1, 0x227b91d3),
+     TOBN(0x19f63e63, 0x15aaf9bd), TOBN(0xf99ee565, 0xf1f344fb),
+     TOBN(0x8a1d661f, 0xd6219199), TOBN(0x8c883bc6, 0xd48ce41c),
+     TOBN(0x1065118f, 0x3c74d904), TOBN(0x713889ee, 0x0faf8b1b),
+     TOBN(0x972b3f8f, 0x81a1b3be), TOBN(0x4f3ce145, 0xce2764a0),
+     TOBN(0xe2d0f1cc, 0x28c4f5f7), TOBN(0xdeee0c0d, 0xc7f3985b),
+     TOBN(0x7df4adc0, 0xd39e25c3), TOBN(0x40619820, 0xc467a080),
+     TOBN(0x440ebc93, 0x61cf5a58), TOBN(0x527729a6, 0x422ad600),
+     TOBN(0xca6c0937, 0xb1b76ba6), TOBN(0x1a2eab85, 0x4d2026dc),
+     TOBN(0xb1715e15, 0x19d9ae0a), TOBN(0xf1ad9199, 0xbac4a026),
+     TOBN(0x35b3dfb8, 0x07ea7b0e), TOBN(0xedf5496f, 0x3ed9eb89),
+     TOBN(0x8932e5ff, 0x2d6d08ab), TOBN(0xf314874e, 0x25bd2731),
+     TOBN(0xefb26a75, 0x3f73f449), TOBN(0x1d1c94f8, 0x8d44fc79),
+     TOBN(0x49f0fbc5, 0x3bc0dc4d), TOBN(0xb747ea0b, 0x3698a0d0),
+     TOBN(0x5218c3fe, 0x228d291e), TOBN(0x35b804b5, 0x43c129d6),
+     TOBN(0xfac859b8, 0xd1acc516), TOBN(0x6c10697d, 0x95d6e668),
+     TOBN(0xc38e438f, 0x0876fd4e), TOBN(0x45f0c307, 0x83d2f383),
+     TOBN(0x203cc2ec, 0xb10934cb), TOBN(0x6a8f2439, 0x2c9d46ee),
+     TOBN(0xf16b431b, 0x65ccde7b), TOBN(0x41e2cd18, 0x27e76a6f),
+     TOBN(0xb9c8cf8f, 0x4e3484d7), TOBN(0x64426efd, 0x8315244a),
+     TOBN(0x1c0a8e44, 0xfc94dea3), TOBN(0x34c8cdbf, 0xdad6a0b0),
+     TOBN(0x919c3840, 0x04113cef), TOBN(0xfd32fba4, 0x15490ffa),
+     TOBN(0x58d190f6, 0x795dcfb7), TOBN(0xfef01b03, 0x83588baf),
+     TOBN(0x9e6d1d63, 0xca1fc1c0), TOBN(0x53173f96, 0xf0a41ac9),
+     TOBN(0x2b1d402a, 0xba16f73b), TOBN(0x2fb31014, 0x8cf9b9fc),
+     TOBN(0x2d51e60e, 0x446ef7bf), TOBN(0xc731021b, 0xb91e1745),
+     TOBN(0x9d3b4724, 0x4fee99d4), TOBN(0x4bca48b6, 0xfac5c1ea),
+     TOBN(0x70f5f514, 0xbbea9af7), TOBN(0x751f55a5, 0x974c283a),
+     TOBN(0x6e30251a, 0xcb452fdb), TOBN(0x31ee6965, 0x50f30650),
+     TOBN(0xb0b3e508, 0x933548d9), TOBN(0xb8949a4f, 0xf4b0ef5b),
+     TOBN(0x208b8326, 0x3c88f3bd), TOBN(0xab147c30, 0xdb1d9989),
+     TOBN(0xed6515fd, 0x44d4df03), TOBN(0x17a12f75, 0xe72eb0c5),
+     TOBN(0x3b59796d, 0x36cf69db), TOBN(0x1219eee9, 0x56670c18),
+     TOBN(0xfe3341f7, 0x7a070d8e), TOBN(0x9b70130b, 0xa327f90c),
+     TOBN(0x36a32462, 0x0ae18e0e), TOBN(0x2021a623, 0x46c0a638),
+     TOBN(0x251b5817, 0xc62eb0d4), TOBN(0x87bfbcdf, 0x4c762293),
+     TOBN(0xf78ab505, 0xcdd61d64), TOBN(0x8c7a53fc, 0xc8c18857),
+     TOBN(0xa653ce6f, 0x16147515), TOBN(0x9c923aa5, 0xea7d52d5),
+     TOBN(0xc24709cb, 0x5c18871f), TOBN(0x7d53bec8, 0x73b3cc74),
+     TOBN(0x59264aff, 0xfdd1d4c4), TOBN(0x5555917e, 0x240da582),
+     TOBN(0xcae8bbda, 0x548f5a0e), TOBN(0x1910eaba, 0x3bbfbbe1),
+     TOBN(0xae579685, 0x7677afc3), TOBN(0x49ea61f1, 0x73ff0b5c),
+     TOBN(0x78655478, 0x4f7c3922), TOBN(0x95d337cd, 0x20c68eef),
+     TOBN(0x68f1e1e5, 0xdf779ab9), TOBN(0x14b491b0, 0xb5cf69a8),
+     TOBN(0x7a6cbbe0, 0x28e3fe89), TOBN(0xe7e1fee4, 0xc5aac0eb),
+     TOBN(0x7f47eda5, 0x697e5140), TOBN(0x4f450137, 0xb454921f),
+     TOBN(0xdb625f84, 0x95cd8185), TOBN(0x74be0ba1, 0xcdb2e583),
+     TOBN(0xaee4fd7c, 0xdd5e6de4), TOBN(0x4251437d, 0xe8101739),
+     TOBN(0x686d72a0, 0xac620366), TOBN(0x4be3fb9c, 0xb6d59344),
+     TOBN(0x6e8b44e7, 0xa1eb75b9), TOBN(0x84e39da3, 0x91a5c10c),
+     TOBN(0x37cc1490, 0xb38f0409), TOBN(0x02951943, 0x2c2ade82),
+     TOBN(0x9b688783, 0x1190a2d8), TOBN(0x25627d14, 0x231182ba),
+     TOBN(0x6eb550aa, 0x658a6d87), TOBN(0x1405aaa7, 0xcf9c7325),
+     TOBN(0xd147142e, 0x5c8748c9), TOBN(0x7f637e4f, 0x53ede0e0),
+     TOBN(0xf8ca2776, 0x14ffad2c), TOBN(0xe58fb1bd, 0xbafb6791),
+     TOBN(0x17158c23, 0xbf8f93fc), TOBN(0x7f15b373, 0x0a4a4655),
+     TOBN(0x39d4add2, 0xd842ca72), TOBN(0xa71e4391, 0x3ed96305),
+     TOBN(0x5bb09cbe, 0x6700be14), TOBN(0x68d69d54, 0xd8befcf6),
+     TOBN(0xa45f5367, 0x37183bcf), TOBN(0x7152b7bb, 0x3370dff7),
+     TOBN(0xcf887baa, 0xbf12525b), TOBN(0xe7ac7bdd, 0xd6d1e3cd),
+     TOBN(0x25914f78, 0x81fdad90), TOBN(0xcf638f56, 0x0d2cf6ab),
+     TOBN(0xb90bc03f, 0xcc054de5), TOBN(0x932811a7, 0x18b06350),
+     TOBN(0x2f00b330, 0x9bbd11ff), TOBN(0x76108a6f, 0xb4044974),
+     TOBN(0x801bb9e0, 0xa851d266), TOBN(0x0dd099be, 0xbf8990c1),
+     TOBN(0x58c5aaaa, 0xabe32986), TOBN(0x0fe9dd2a, 0x50d59c27),
+     TOBN(0x84951ff4, 0x8d307305), TOBN(0x6c23f829, 0x86529b78),
+     TOBN(0x50bb2218, 0x0b136a79), TOBN(0x7e2174de, 0x77a20996),
+     TOBN(0x6f00a4b9, 0xc0bb4da6), TOBN(0x89a25a17, 0xefdde8da),
+     TOBN(0xf728a27e, 0xc11ee01d), TOBN(0xf900553a, 0xe5f10dfb),
+     TOBN(0x189a83c8, 0x02ec893c), TOBN(0x3ca5bdc1, 0x23f66d77),
+     TOBN(0x98781537, 0x97eada9f), TOBN(0x59c50ab3, 0x10256230),
+     TOBN(0x346042d9, 0x323c69b3), TOBN(0x1b715a6d, 0x2c460449),
+     TOBN(0xa41dd476, 0x6ae06e0b), TOBN(0xcdd7888e, 0x9d42e25f),
+     TOBN(0x0f395f74, 0x56b25a20), TOBN(0xeadfe0ae, 0x8700e27e),
+     TOBN(0xb09d52a9, 0x69950093), TOBN(0x3525d9cb, 0x327f8d40),
+     TOBN(0xb8235a94, 0x67df886a), TOBN(0x77e4b0dd, 0x035faec2),
+     TOBN(0x115eb20a, 0x517d7061), TOBN(0x77fe3433, 0x6c2df683),
+     TOBN(0x6870ddc7, 0xcdc6fc67), TOBN(0xb1610588, 0x0b87de83),
+     TOBN(0x343584ca, 0xd9c4ddbe), TOBN(0xb3164f1c, 0x3d754be2),
+     TOBN(0x0731ed3a, 0xc1e6c894), TOBN(0x26327dec, 0x4f6b904c),
+     TOBN(0x9d49c6de, 0x97b5cd32), TOBN(0x40835dae, 0xb5eceecd),
+     TOBN(0xc66350ed, 0xd9ded7fe), TOBN(0x8aeebb5c, 0x7a678804),
+     TOBN(0x51d42fb7, 0x5b8ee9ec), TOBN(0xd7a17bdd, 0x8e3ca118),
+     TOBN(0x40d7511a, 0x2ef4400e), TOBN(0xc48990ac, 0x875a66f4),
+     TOBN(0x8de07d2a, 0x2199e347), TOBN(0xbee75556, 0x2a39e051),
+     TOBN(0x56918786, 0x916e51dc), TOBN(0xeb191313, 0x4a2d89ec),
+     TOBN(0x6679610d, 0x37d341ed), TOBN(0x434fbb41, 0x56d51c2b),
+     TOBN(0xe54b7ee7, 0xd7492dba), TOBN(0xaa33a79a, 0x59021493),
+     TOBN(0x49fc5054, 0xe4bd6d3d), TOBN(0x09540f04, 0x5ab551d0),
+     TOBN(0x8acc9085, 0x4942d3a6), TOBN(0x231af02f, 0x2d28323b),
+     TOBN(0x93458cac, 0x0992c163), TOBN(0x1fef8e71, 0x888e3bb4),
+     TOBN(0x27578da5, 0xbe8c268c), TOBN(0xcc8be792, 0xe805ec00),
+     TOBN(0x29267bae, 0xc61c3855), TOBN(0xebff429d, 0x58c1fd3b),
+     TOBN(0x22d886c0, 0x8c0b93b8), TOBN(0xca5e00b2, 0x2ddb8953),
+     TOBN(0xcf330117, 0xc3fed8b7), TOBN(0xd49ac6fa, 0x819c01f6),
+     TOBN(0x6ddaa6bd, 0x3c0fbd54), TOBN(0x91743068, 0x8049a2cf),
+     TOBN(0xd67f981e, 0xaff2ef81), TOBN(0xc3654d35, 0x2818ae80),
+     TOBN(0x81d05044, 0x1b2aa892), TOBN(0x2db067bf, 0x3d099328),
+     TOBN(0xe7c79e86, 0x703dcc97), TOBN(0xe66f9b37, 0xe133e215),
+     TOBN(0xcdf119a6, 0xe39a7a5c), TOBN(0x47c60de3, 0x876f1b61),
+     TOBN(0x6e405939, 0xd860f1b2), TOBN(0x3e9a1dbc, 0xf5ed4d4a),
+     TOBN(0x3f23619e, 0xc9b6bcbd), TOBN(0x5ee790cf, 0x734e4497),
+     TOBN(0xf0a834b1, 0x5bdaf9bb), TOBN(0x02cedda7, 0x4ca295f0),
+     TOBN(0x4619aa2b, 0xcb8e378c), TOBN(0xe5613244, 0xcc987ea4),
+     TOBN(0x0bc022cc, 0x76b23a50), TOBN(0x4a2793ad, 0x0a6c21ce),
+     TOBN(0x38328780, 0x89cac3f5), TOBN(0x29176f1b, 0xcba26d56),
+     TOBN(0x06296187, 0x4f6f59eb), TOBN(0x86e9bca9, 0x8bdc658e),
+     TOBN(0x2ca9c4d3, 0x57e30402), TOBN(0x5438b216, 0x516a09bb),
+     TOBN(0x0a6a063c, 0x7672765a), TOBN(0x37a3ce64, 0x0547b9bf),
+     TOBN(0x42c099c8, 0x98b1a633), TOBN(0xb5ab800d, 0x05ee6961),
+     TOBN(0xf1963f59, 0x11a5acd6), TOBN(0xbaee6157, 0x46201063),
+     TOBN(0x36d9a649, 0xa596210a), TOBN(0xaed04363, 0x1ba7138c),
+     TOBN(0xcf817d1c, 0xa4a82b76), TOBN(0x5586960e, 0xf3806be9),
+     TOBN(0x7ab67c89, 0x09dc6bb5), TOBN(0x52ace7a0, 0x114fe7eb),
+     TOBN(0xcd987618, 0xcbbc9b70), TOBN(0x4f06fd5a, 0x604ca5e1),
+     TOBN(0x90af14ca, 0x6dbde133), TOBN(0x1afe4322, 0x948a3264),
+     TOBN(0xa70d2ca6, 0xc44b2c6c), TOBN(0xab726799, 0x0ef87dfe),
+     TOBN(0x310f64dc, 0x2e696377), TOBN(0x49b42e68, 0x4c8126a0),
+     TOBN(0x0ea444c3, 0xcea0b176), TOBN(0x53a8ddf7, 0xcb269182),
+     TOBN(0xf3e674eb, 0xbbba9dcb), TOBN(0x0d2878a8, 0xd8669d33),
+     TOBN(0x04b935d5, 0xd019b6a3), TOBN(0xbb5cf88e, 0x406f1e46),
+     TOBN(0xa1912d16, 0x5b57c111), TOBN(0x9803fc21, 0x19ebfd78),
+     TOBN(0x4f231c9e, 0xc07764a9), TOBN(0xd93286ee, 0xb75bd055),
+     TOBN(0x83a9457d, 0x8ee6c9de), TOBN(0x04695915, 0x6087ec90),
+     TOBN(0x14c6dd8a, 0x58d6cd46), TOBN(0x9cb633b5, 0x8e6634d2),
+     TOBN(0xc1305047, 0xf81bc328), TOBN(0x12ede0e2, 0x26a177e5),
+     TOBN(0x332cca62, 0x065a6f4f), TOBN(0xc3a47ecd, 0x67be487b),
+     TOBN(0x741eb187, 0x0f47ed1c), TOBN(0x99e66e58, 0xe7598b14),
+     TOBN(0x6f0544ca, 0x63d0ff12), TOBN(0xe5efc784, 0xb610a05f),
+     TOBN(0xf72917b1, 0x7cad7b47), TOBN(0x3ff6ea20, 0xf2cac0c0),
+     TOBN(0xcc23791b, 0xf21db8b7), TOBN(0x7dac70b1, 0xd7d93565),
+     TOBN(0x682cda1d, 0x694bdaad), TOBN(0xeb88bb8c, 0x1023516d),
+     TOBN(0xc4c634b4, 0xdfdbeb1b), TOBN(0x22f5ca72, 0xb4ee4dea),
+     TOBN(0x1045a368, 0xe6524821), TOBN(0xed9e8a3f, 0x052b18b2),
+     TOBN(0x9b7f2cb1, 0xb961f49a), TOBN(0x7fee2ec1, 0x7b009670),
+     TOBN(0x350d8754, 0x22507a6d), TOBN(0x561bd711, 0x4db55f1d),
+     TOBN(0x4c189ccc, 0x320bbcaf), TOBN(0x568434cf, 0xdf1de48c),
+     TOBN(0x6af1b00e, 0x0fa8f128), TOBN(0xf0ba9d02, 0x8907583c),
+     TOBN(0x735a4004, 0x32ff9f60), TOBN(0x3dd8e4b6, 0xc25dcf33),
+     TOBN(0xf2230f16, 0x42c74cef), TOBN(0xd8117623, 0x013fa8ad),
+     TOBN(0x36822876, 0xf51fe76e), TOBN(0x8a6811cc, 0x11d62589),
+     TOBN(0xc3fc7e65, 0x46225718), TOBN(0xb7df2c9f, 0xc82fdbcd),
+     TOBN(0x3b1d4e52, 0xdd7b205b), TOBN(0xb6959478, 0x47a2e414),
+     TOBN(0x05e4d793, 0xefa91148), TOBN(0xb47ed446, 0xfd2e9675),
+     TOBN(0x1a7098b9, 0x04c9d9bf), TOBN(0x661e2881, 0x1b793048),
+     TOBN(0xb1a16966, 0xb01ee461), TOBN(0xbc521308, 0x2954746f),
+     TOBN(0xc909a0fc, 0x2477de50), TOBN(0xd80bb41c, 0x7dbd51ef),
+     TOBN(0xa85be7ec, 0x53294905), TOBN(0x6d465b18, 0x83958f97),
+     TOBN(0x16f6f330, 0xfb6840fd), TOBN(0xfaaeb214, 0x3401e6c8),
+     TOBN(0xaf83d30f, 0xccb5b4f8), TOBN(0x22885739, 0x266dec4b),
+     TOBN(0x51b4367c, 0x7bc467df), TOBN(0x926562e3, 0xd842d27a),
+     TOBN(0xdfcb6614, 0x0fea14a6), TOBN(0xeb394dae, 0xf2734cd9),
+     TOBN(0x3eeae5d2, 0x11c0be98), TOBN(0xb1e6ed11, 0x814e8165),
+     TOBN(0x191086bc, 0xe52bce1c), TOBN(0x14b74cc6, 0xa75a04da),
+     TOBN(0x63cf1186, 0x8c060985), TOBN(0x071047de, 0x2dbd7f7c),
+     TOBN(0x4e433b8b, 0xce0942ca), TOBN(0xecbac447, 0xd8fec61d),
+     TOBN(0x8f0ed0e2, 0xebf3232f), TOBN(0xfff80f9e, 0xc52a2edd),
+     TOBN(0xad9ab433, 0x75b55fdb), TOBN(0x73ca7820, 0xe42e0c11),
+     TOBN(0x6dace0a0, 0xe6251b46), TOBN(0x89bc6b5c, 0x4c0d932d),
+     TOBN(0x3438cd77, 0x095da19a), TOBN(0x2f24a939, 0x8d48bdfb),
+     TOBN(0x99b47e46, 0x766561b7), TOBN(0x736600e6, 0x0ed0322a),
+     TOBN(0x06a47cb1, 0x638e1865), TOBN(0x927c1c2d, 0xcb136000),
+     TOBN(0x29542337, 0x0cc5df69), TOBN(0x99b37c02, 0x09d649a9),
+     TOBN(0xc5f0043c, 0x6aefdb27), TOBN(0x6cdd9987, 0x1be95c27),
+     TOBN(0x69850931, 0x390420d2), TOBN(0x299c40ac, 0x0983efa4),
+     TOBN(0x3a05e778, 0xaf39aead), TOBN(0x84274408, 0x43a45193),
+     TOBN(0x6bcd0fb9, 0x91a711a0), TOBN(0x461592c8, 0x9f52ab17),
+     TOBN(0xb49302b4, 0xda3c6ed6), TOBN(0xc51fddc7, 0x330d7067),
+     TOBN(0x94babeb6, 0xda50d531), TOBN(0x521b840d, 0xa6a7b9da),
+     TOBN(0x5305151e, 0x404bdc89), TOBN(0x1bcde201, 0xd0d07449),
+     TOBN(0xf427a78b, 0x3b76a59a), TOBN(0xf84841ce, 0x07791a1b),
+     TOBN(0xebd314be, 0xbf91ed1c), TOBN(0x8e61d34c, 0xbf172943),
+     TOBN(0x1d5dc451, 0x5541b892), TOBN(0xb186ee41, 0xfc9d9e54),
+     TOBN(0x9d9f345e, 0xd5bf610d), TOBN(0x3e7ba65d, 0xf6acca9f),
+     TOBN(0x9dda787a, 0xa8369486), TOBN(0x09f9dab7, 0x8eb5ba53),
+     TOBN(0x5afb2033, 0xd6481bc3), TOBN(0x76f4ce30, 0xafa62104),
+     TOBN(0xa8fa00cf, 0xf4f066b5), TOBN(0x89ab5143, 0x461dafc2),
+     TOBN(0x44339ed7, 0xa3389998), TOBN(0x2ff862f1, 0xbc214903),
+     TOBN(0x2c88f985, 0xb05556e3), TOBN(0xcd96058e, 0x3467081e),
+     TOBN(0x7d6a4176, 0xedc637ea), TOBN(0xe1743d09, 0x36a5acdc),
+     TOBN(0x66fd72e2, 0x7eb37726), TOBN(0xf7fa264e, 0x1481a037),
+     TOBN(0x9fbd3bde, 0x45f4aa79), TOBN(0xed1e0147, 0x767c3e22),
+     TOBN(0x7621f979, 0x82e7abe2), TOBN(0x19eedc72, 0x45f633f8),
+     TOBN(0xe69b155e, 0x6137bf3a), TOBN(0xa0ad13ce, 0x414ee94e),
+     TOBN(0x93e3d524, 0x1c0e651a), TOBN(0xab1a6e2a, 0x02ce227e),
+     TOBN(0xe7af1797, 0x4ab27eca), TOBN(0x245446de, 0xbd444f39),
+     TOBN(0x59e22a21, 0x56c07613), TOBN(0x43deafce, 0xf4275498),
+     TOBN(0x10834ccb, 0x67fd0946), TOBN(0xa75841e5, 0x47406edf),
+     TOBN(0xebd6a677, 0x7b0ac93d), TOBN(0xa6e37b0d, 0x78f5e0d7),
+     TOBN(0x2516c096, 0x76f5492b), TOBN(0x1e4bf888, 0x9ac05f3a),
+     TOBN(0xcdb42ce0, 0x4df0ba2b), TOBN(0x935d5cfd, 0x5062341b),
+     TOBN(0x8a303333, 0x82acac20), TOBN(0x429438c4, 0x5198b00e),
+     TOBN(0x1d083bc9, 0x049d33fa), TOBN(0x58b82dda, 0x946f67ff),
+     TOBN(0xac3e2db8, 0x67a1d6a3), TOBN(0x62e6bead, 0x1798aac8),
+     TOBN(0xfc85980f, 0xde46c58c), TOBN(0xa7f69379, 0x69c8d7be),
+     TOBN(0x23557927, 0x837b35ec), TOBN(0x06a933d8, 0xe0790c0c),
+     TOBN(0x827c0e9b, 0x077ff55d), TOBN(0x53977798, 0xbb26e680),
+     TOBN(0x59530874, 0x1d9cb54f), TOBN(0xcca3f449, 0x4aac53ef),
+     TOBN(0x11dc5c87, 0xa07eda0f), TOBN(0xc138bccf, 0xfd6400c8),
+     TOBN(0x549680d3, 0x13e5da72), TOBN(0xc93eed82, 0x4540617e),
+     TOBN(0xfd3db157, 0x4d0b75c0), TOBN(0x9716eb42, 0x6386075b),
+     TOBN(0x0639605c, 0x817b2c16), TOBN(0x09915109, 0xf1e4f201),
+     TOBN(0x35c9a928, 0x5cca6c3b), TOBN(0xb25f7d1a, 0x3505c900),
+     TOBN(0xeb9f7d20, 0x630480c4), TOBN(0xc3c7b8c6, 0x2a1a501c),
+     TOBN(0x3f99183c, 0x5a1f8e24), TOBN(0xfdb118fa, 0x9dd255f0),
+     TOBN(0xb9b18b90, 0xc27f62a6), TOBN(0xe8f732f7, 0x396ec191),
+     TOBN(0x524a2d91, 0x0be786ab), TOBN(0x5d32adef, 0x0ac5a0f5),
+     TOBN(0x9b53d4d6, 0x9725f694), TOBN(0x032a76c6, 0x0510ba89),
+     TOBN(0x840391a3, 0xebeb1544), TOBN(0x44b7b88c, 0x3ed73ac3),
+     TOBN(0xd24bae7a, 0x256cb8b3), TOBN(0x7ceb151a, 0xe394cb12),
+     TOBN(0xbd6b66d0, 0x5bc1e6a8), TOBN(0xec70cecb, 0x090f07bf),
+     TOBN(0x270644ed, 0x7d937589), TOBN(0xee9e1a3d, 0x5f1dccfe),
+     TOBN(0xb0d40a84, 0x745b98d2), TOBN(0xda429a21, 0x2556ed40),
+     TOBN(0xf676eced, 0x85148cb9), TOBN(0x5a22d40c, 0xded18936),
+     TOBN(0x3bc4b9e5, 0x70e8a4ce), TOBN(0xbfd1445b, 0x9eae0379),
+     TOBN(0xf23f2c0c, 0x1a0bd47e), TOBN(0xa9c0bb31, 0xe1845531),
+     TOBN(0x9ddc4d60, 0x0a4c3f6b), TOBN(0xbdfaad79, 0x2c15ef44),
+     TOBN(0xce55a236, 0x7f484acc), TOBN(0x08653ca7, 0x055b1f15),
+     TOBN(0x2efa8724, 0x538873a3), TOBN(0x09299e5d, 0xace1c7e7),
+     TOBN(0x07afab66, 0xade332ba), TOBN(0x9be1fdf6, 0x92dd71b7),
+     TOBN(0xa49b5d59, 0x5758b11c), TOBN(0x0b852893, 0xc8654f40),
+     TOBN(0xb63ef6f4, 0x52379447), TOBN(0xd4957d29, 0x105e690c),
+     TOBN(0x7d484363, 0x646559b0), TOBN(0xf4a8273c, 0x49788a8e),
+     TOBN(0xee406cb8, 0x34ce54a9), TOBN(0x1e1c260f, 0xf86fda9b),
+     TOBN(0xe150e228, 0xcf6a4a81), TOBN(0x1fa3b6a3, 0x1b488772),
+     TOBN(0x1e6ff110, 0xc5a9c15b), TOBN(0xc6133b91, 0x8ad6aa47),
+     TOBN(0x8ac5d55c, 0x9dffa978), TOBN(0xba1d1c1d, 0x5f3965f2),
+     TOBN(0xf969f4e0, 0x7732b52f), TOBN(0xfceecdb5, 0xa5172a07),
+     TOBN(0xb0120a5f, 0x10f2b8f5), TOBN(0xc83a6cdf, 0x5c4c2f63),
+     TOBN(0x4d47a491, 0xf8f9c213), TOBN(0xd9e1cce5, 0xd3f1bbd5),
+     TOBN(0x0d91bc7c, 0xaba7e372), TOBN(0xfcdc74c8, 0xdfd1a2db),
+     TOBN(0x05efa800, 0x374618e5), TOBN(0x11216969, 0x15a7925e),
+     TOBN(0xd4c89823, 0xf6021c5d), TOBN(0x880d5e84, 0xeff14423),
+     TOBN(0x6523bc5a, 0x6dcd1396), TOBN(0xd1acfdfc, 0x113c978b),
+     TOBN(0xb0c164e8, 0xbbb66840), TOBN(0xf7f4301e, 0x72b58459),
+     TOBN(0xc29ad4a6, 0xa638e8ec), TOBN(0xf5ab8961, 0x46b78699),
+     TOBN(0x9dbd7974, 0x0e954750), TOBN(0x0121de88, 0x64f9d2c6),
+     TOBN(0x2e597b42, 0xd985232e), TOBN(0x55b6c3c5, 0x53451777),
+     TOBN(0xbb53e547, 0x519cb9fb), TOBN(0xf134019f, 0x8428600d),
+     TOBN(0x5a473176, 0xe081791a), TOBN(0x2f3e2263, 0x35fb0c08),
+     TOBN(0xb28c3017, 0x73d273b0), TOBN(0xccd21076, 0x7721ef9a),
+     TOBN(0x054cc292, 0xb650dc39), TOBN(0x662246de, 0x6188045e),
+     TOBN(0x904b52fa, 0x6b83c0d1), TOBN(0xa72df267, 0x97e9cd46),
+     TOBN(0x886b43cd, 0x899725e4), TOBN(0x2b651688, 0xd849ff22),
+     TOBN(0x60479b79, 0x02f34533), TOBN(0x5e354c14, 0x0c77c148),
+     TOBN(0xb4bb7581, 0xa8537c78), TOBN(0x188043d7, 0xefe1495f),
+     TOBN(0x9ba12f42, 0x8c1d5026), TOBN(0x2e0c8a26, 0x93d4aaab),
+     TOBN(0xbdba7b8b, 0xaa57c450), TOBN(0x140c9ad6, 0x9bbdafef),
+     TOBN(0x2067aa42, 0x25ac0f18), TOBN(0xf7b1295b, 0x04d1fbf3),
+     TOBN(0x14829111, 0xa4b04824), TOBN(0x2ce3f192, 0x33bd5e91),
+     TOBN(0x9c7a1d55, 0x8f2e1b72), TOBN(0xfe932286, 0x302aa243),
+     TOBN(0x497ca7b4, 0xd4be9554), TOBN(0xb8e821b8, 0xe0547a6e),
+     TOBN(0xfb2838be, 0x67e573e0), TOBN(0x05891db9, 0x4084c44b),
+     TOBN(0x91311373, 0x96c1c2c5), TOBN(0x6aebfa3f, 0xd958444b),
+     TOBN(0xac9cdce9, 0xe56e55c1), TOBN(0x7148ced3, 0x2caa46d0),
+     TOBN(0x2e10c7ef, 0xb61fe8eb), TOBN(0x9fd835da, 0xff97cf4d),}
+    ,
+    {TOBN(0xa36da109, 0x081e9387), TOBN(0xfb9780d7, 0x8c935828),
+     TOBN(0xd5940332, 0xe540b015), TOBN(0xc9d7b51b, 0xe0f466fa),
+     TOBN(0xfaadcd41, 0xd6d9f671), TOBN(0xba6c1e28, 0xb1a2ac17),
+     TOBN(0x066a7833, 0xed201e5f), TOBN(0x19d99719, 0xf90f462b),
+     TOBN(0xf431f462, 0x060b5f61), TOBN(0xa56f46b4, 0x7bd057c2),
+     TOBN(0x348dca6c, 0x47e1bf65), TOBN(0x9a38783e, 0x41bcf1ff),
+     TOBN(0x7a5d33a9, 0xda710718), TOBN(0x5a779987, 0x2e0aeaf6),
+     TOBN(0xca87314d, 0x2d29d187), TOBN(0xfa0edc3e, 0xc687d733),
+     TOBN(0x9df33621, 0x6a31e09b), TOBN(0xde89e44d, 0xc1350e35),
+     TOBN(0x29214871, 0x4ca0cf52), TOBN(0xdf379672, 0x0b88a538),
+     TOBN(0xc92a510a, 0x2591d61b), TOBN(0x79aa87d7, 0x585b447b),
+     TOBN(0xf67db604, 0xe5287f77), TOBN(0x1697c8bf, 0x5efe7a80),
+     TOBN(0x1c894849, 0xcb198ac7), TOBN(0xa884a93d, 0x0f264665),
+     TOBN(0x2da964ef, 0x9b200678), TOBN(0x3c351b87, 0x009834e6),
+     TOBN(0xafb2ef9f, 0xe2c4b44b), TOBN(0x580f6c47, 0x3326790c),
+     TOBN(0xb8480521, 0x0b02264a), TOBN(0x8ba6f9e2, 0x42a194e2),
+     TOBN(0xfc87975f, 0x8fb54738), TOBN(0x35160788, 0x27c3ead3),
+     TOBN(0x834116d2, 0xb74a085a), TOBN(0x53c99a73, 0xa62fe996),
+     TOBN(0x87585be0, 0x5b81c51b), TOBN(0x925bafa8, 0xbe0852b7),
+     TOBN(0x76a4fafd, 0xa84d19a7), TOBN(0x39a45982, 0x585206d4),
+     TOBN(0x499b6ab6, 0x5eb03c0e), TOBN(0xf19b7954, 0x72bc3fde),
+     TOBN(0xa86b5b9c, 0x6e3a80d2), TOBN(0xe4377508, 0x6d42819f),
+     TOBN(0xc1663650, 0xbb3ee8a3), TOBN(0x75eb14fc, 0xb132075f),
+     TOBN(0xa8ccc906, 0x7ad834f6), TOBN(0xea6a2474, 0xe6e92ffd),
+     TOBN(0x9d72fd95, 0x0f8d6758), TOBN(0xcb84e101, 0x408c07dd),
+     TOBN(0xb9114bfd, 0xa5e23221), TOBN(0x358b5fe2, 0xe94e742c),
+     TOBN(0x1c0577ec, 0x95f40e75), TOBN(0xf0155451, 0x3d73f3d6),
+     TOBN(0x9d55cd67, 0xbd1b9b66), TOBN(0x63e86e78, 0xaf8d63c7),
+     TOBN(0x39d934ab, 0xd3c095f1), TOBN(0x04b261be, 0xe4b76d71),
+     TOBN(0x1d2e6970, 0xe73e6984), TOBN(0x879fb23b, 0x5e5fcb11),
+     TOBN(0x11506c72, 0xdfd75490), TOBN(0x3a97d085, 0x61bcf1c1),
+     TOBN(0x43201d82, 0xbf5e7007), TOBN(0x7f0ac52f, 0x798232a7),
+     TOBN(0x2715cbc4, 0x6eb564d4), TOBN(0x8d6c752c, 0x9e570e29),
+     TOBN(0xf80247c8, 0x9ef5fd5d), TOBN(0xc3c66b46, 0xd53eb514),
+     TOBN(0x9666b401, 0x0f87de56), TOBN(0xce62c06f, 0xc6c603b5),
+     TOBN(0xae7b4c60, 0x7e4fc942), TOBN(0x38ac0b77, 0x663a9c19),
+     TOBN(0xcb4d20ee, 0x4b049136), TOBN(0x8b63bf12, 0x356a4613),
+     TOBN(0x1221aef6, 0x70e08128), TOBN(0xe62d8c51, 0x4acb6b16),
+     TOBN(0x71f64a67, 0x379e7896), TOBN(0xb25237a2, 0xcafd7fa5),
+     TOBN(0xf077bd98, 0x3841ba6a), TOBN(0xc4ac0244, 0x3cd16e7e),
+     TOBN(0x548ba869, 0x21fea4ca), TOBN(0xd36d0817, 0xf3dfdac1),
+     TOBN(0x09d8d71f, 0xf4685faf), TOBN(0x8eff66be, 0xc52c459a),
+     TOBN(0x182faee7, 0x0b57235e), TOBN(0xee3c39b1, 0x0106712b),
+     TOBN(0x5107331f, 0xc0fcdcb0), TOBN(0x669fb9dc, 0xa51054ba),
+     TOBN(0xb25101fb, 0x319d7682), TOBN(0xb0293129, 0x0a982fee),
+     TOBN(0x51c1c9b9, 0x0261b344), TOBN(0x0e008c5b, 0xbfd371fa),
+     TOBN(0xd866dd1c, 0x0278ca33), TOBN(0x666f76a6, 0xe5aa53b1),
+     TOBN(0xe5cfb779, 0x6013a2cf), TOBN(0x1d3a1aad, 0xa3521836),
+     TOBN(0xcedd2531, 0x73faa485), TOBN(0xc8ee6c4f, 0xc0a76878),
+     TOBN(0xddbccfc9, 0x2a11667d), TOBN(0x1a418ea9, 0x1c2f695a),
+     TOBN(0xdb11bd92, 0x51f73971), TOBN(0x3e4b3c82, 0xda2ed89f),
+     TOBN(0x9a44f3f4, 0xe73e0319), TOBN(0xd1e3de0f, 0x303431af),
+     TOBN(0x3c5604ff, 0x50f75f9c), TOBN(0x1d8eddf3, 0x7e752b22),
+     TOBN(0x0ef074dd, 0x3c9a1118), TOBN(0xd0ffc172, 0xccb86d7b),
+     TOBN(0xabd1ece3, 0x037d90f2), TOBN(0xe3f307d6, 0x6055856c),
+     TOBN(0x422f9328, 0x7e4c6daf), TOBN(0x902aac66, 0x334879a0),
+     TOBN(0xb6a1e7bf, 0x94cdfade), TOBN(0x6c97e1ed, 0x7fc6d634),
+     TOBN(0x662ad24d, 0xa2fb63f8), TOBN(0xf81be1b9, 0xa5928405),
+     TOBN(0x86d765e4, 0xd14b4206), TOBN(0xbecc2e0e, 0x8fa0db65),
+     TOBN(0xa28838e0, 0xb17fc76c), TOBN(0xe49a602a, 0xe37cf24e),
+     TOBN(0x76b4131a, 0x567193ec), TOBN(0xaf3c305a, 0xe5f6e70b),
+     TOBN(0x9587bd39, 0x031eebdd), TOBN(0x5709def8, 0x71bbe831),
+     TOBN(0x57059983, 0x0eb2b669), TOBN(0x4d80ce1b, 0x875b7029),
+     TOBN(0x838a7da8, 0x0364ac16), TOBN(0x2f431d23, 0xbe1c83ab),
+     TOBN(0xe56812a6, 0xf9294dd3), TOBN(0xb448d01f, 0x9b4b0d77),
+     TOBN(0xf3ae6061, 0x04e8305c), TOBN(0x2bead645, 0x94d8c63e),
+     TOBN(0x0a85434d, 0x84fd8b07), TOBN(0x537b983f, 0xf7a9dee5),
+     TOBN(0xedcc5f18, 0xef55bd85), TOBN(0x2041af62, 0x21c6cf8b),
+     TOBN(0x8e52874c, 0xb940c71e), TOBN(0x211935a9, 0xdb5f4b3a),
+     TOBN(0x94350492, 0x301b1dc3), TOBN(0x33d2646d, 0x29958620),
+     TOBN(0x16b0d64b, 0xef911404), TOBN(0x9d1f25ea, 0x9a3c5ef4),
+     TOBN(0x20f200eb, 0x4a352c78), TOBN(0x43929f2c, 0x4bd0b428),
+     TOBN(0xa5656667, 0xc7196e29), TOBN(0x7992c2f0, 0x9391be48),
+     TOBN(0xaaa97cbd, 0x9ee0cd6e), TOBN(0x51b0310c, 0x3dc8c9bf),
+     TOBN(0x237f8acf, 0xdd9f22cb), TOBN(0xbb1d81a1, 0xb585d584),
+     TOBN(0x8d5d85f5, 0x8c416388), TOBN(0x0d6e5a5a, 0x42fe474f),
+     TOBN(0xe7812766, 0x38235d4e), TOBN(0x1c62bd67, 0x496e3298),
+     TOBN(0x8378660c, 0x3f175bc8), TOBN(0x4d04e189, 0x17afdd4d),
+     TOBN(0x32a81601, 0x85a8068c), TOBN(0xdb58e4e1, 0x92b29a85),
+     TOBN(0xe8a65b86, 0xc70d8a3b), TOBN(0x5f0e6f4e, 0x98a0403b),
+     TOBN(0x08129684, 0x69ed2370), TOBN(0x34dc30bd, 0x0871ee26),
+     TOBN(0x3a5ce948, 0x7c9c5b05), TOBN(0x7d487b80, 0x43a90c87),
+     TOBN(0x4089ba37, 0xdd0e7179), TOBN(0x45f80191, 0xb4041811),
+     TOBN(0x1c3e1058, 0x98747ba5), TOBN(0x98c4e13a, 0x6e1ae592),
+     TOBN(0xd44636e6, 0xe82c9f9e), TOBN(0x711db87c, 0xc33a1043),
+     TOBN(0x6f431263, 0xaa8aec05), TOBN(0x43ff120d, 0x2744a4aa),
+     TOBN(0xd3bd892f, 0xae77779b), TOBN(0xf0fe0cc9, 0x8cdc9f82),
+     TOBN(0xca5f7fe6, 0xf1c5b1bc), TOBN(0xcc63a682, 0x44929a72),
+     TOBN(0xc7eaba0c, 0x09dbe19a), TOBN(0x2f3585ad, 0x6b5c73c2),
+     TOBN(0x8ab8924b, 0x0ae50c30), TOBN(0x17fcd27a, 0x638b30ba),
+     TOBN(0xaf414d34, 0x10b3d5a5), TOBN(0x09c107d2, 0x2a9accf1),
+     TOBN(0x15dac49f, 0x946a6242), TOBN(0xaec3df2a, 0xd707d642),
+     TOBN(0x2c2492b7, 0x3f894ae0), TOBN(0xf59df3e5, 0xb75f18ce),
+     TOBN(0x7cb740d2, 0x8f53cad0), TOBN(0x3eb585fb, 0xc4f01294),
+     TOBN(0x17da0c86, 0x32c7f717), TOBN(0xeb8c795b, 0xaf943f4c),
+     TOBN(0x4ee23fb5, 0xf67c51d2), TOBN(0xef187575, 0x68889949),
+     TOBN(0xa6b4bdb2, 0x0389168b), TOBN(0xc4ecd258, 0xea577d03),
+     TOBN(0x3a63782b, 0x55743082), TOBN(0x6f678f4c, 0xc72f08cd),
+     TOBN(0x553511cf, 0x65e58dd8), TOBN(0xd53b4e3e, 0xd402c0cd),
+     TOBN(0x37de3e29, 0xa037c14c), TOBN(0x86b6c516, 0xc05712aa),
+     TOBN(0x2834da3e, 0xb38dff6f), TOBN(0xbe012c52, 0xea636be8),
+     TOBN(0x292d238c, 0x61dd37f8), TOBN(0x0e54523f, 0x8f8142db),
+     TOBN(0xe31eb436, 0x036a05d8), TOBN(0x83e3cdff, 0x1e93c0ff),
+     TOBN(0x3fd2fe0f, 0x50821ddf), TOBN(0xc8e19b0d, 0xff9eb33b),
+     TOBN(0xc8cc943f, 0xb569a5fe), TOBN(0xad0090d4, 0xd4342d75),
+     TOBN(0x82090b4b, 0xcaeca000), TOBN(0xca39687f, 0x1bd410eb),
+     TOBN(0xe7bb0df7, 0x65959d77), TOBN(0x39d78218, 0x9c964999),
+     TOBN(0xd87f62e8, 0xb2415451), TOBN(0xe5efb774, 0xbed76108),
+     TOBN(0x3ea011a4, 0xe822f0d0), TOBN(0xbc647ad1, 0x5a8704f8),
+     TOBN(0xbb315b35, 0x50c6820f), TOBN(0x863dec3d, 0xb7e76bec),
+     TOBN(0x01ff5d3a, 0xf017bfc7), TOBN(0x20054439, 0x976b8229),
+     TOBN(0x067fca37, 0x0bbd0d3b), TOBN(0xf63dde64, 0x7f5e3d0f),
+     TOBN(0x22dbefb3, 0x2a4c94e9), TOBN(0xafbff0fe, 0x96f8278a),
+     TOBN(0x80aea0b1, 0x3503793d), TOBN(0xb2238029, 0x5f06cd29),
+     TOBN(0x65703e57, 0x8ec3feca), TOBN(0x06c38314, 0x393e7053),
+     TOBN(0xa0b751eb, 0x7c6734c4), TOBN(0xd2e8a435, 0xc59f0f1e),
+     TOBN(0x147d9052, 0x5e9ca895), TOBN(0x2f4dd31e, 0x972072df),
+     TOBN(0xa16fda8e, 0xe6c6755c), TOBN(0xc66826ff, 0xcf196558),
+     TOBN(0x1f1a76a3, 0x0cf43895), TOBN(0xa9d604e0, 0x83c3097b),
+     TOBN(0xe1908309, 0x66390e0e), TOBN(0xa50bf753, 0xb3c85eff),
+     TOBN(0x0696bdde, 0xf6a70251), TOBN(0x548b801b, 0x3c6ab16a),
+     TOBN(0x37fcf704, 0xa4d08762), TOBN(0x090b3def, 0xdff76c4e),
+     TOBN(0x87e8cb89, 0x69cb9158), TOBN(0x44a90744, 0x995ece43),
+     TOBN(0xf85395f4, 0x0ad9fbf5), TOBN(0x49b0f6c5, 0x4fb0c82d),
+     TOBN(0x75d9bc15, 0xadf7cccf), TOBN(0x81a3e5d6, 0xdfa1e1b0),
+     TOBN(0x8c39e444, 0x249bc17e), TOBN(0xf37dccb2, 0x8ea7fd43),
+     TOBN(0xda654873, 0x907fba12), TOBN(0x35daa6da, 0x4a372904),
+     TOBN(0x0564cfc6, 0x6283a6c5), TOBN(0xd09fa4f6, 0x4a9395bf),
+     TOBN(0x688e9ec9, 0xaeb19a36), TOBN(0xd913f1ce, 0xc7bfbfb4),
+     TOBN(0x797b9a3c, 0x61c2faa6), TOBN(0x2f979bec, 0x6a0a9c12),
+     TOBN(0xb5969d0f, 0x359679ec), TOBN(0xebcf523d, 0x079b0460),
+     TOBN(0xfd6b0008, 0x10fab870), TOBN(0x3f2edcda, 0x9373a39c),
+     TOBN(0x0d64f9a7, 0x6f568431), TOBN(0xf848c27c, 0x02f8898c),
+     TOBN(0xf418ade1, 0x260b5bd5), TOBN(0xc1f3e323, 0x6973dee8),
+     TOBN(0x46e9319c, 0x26c185dd), TOBN(0x6d85b7d8, 0x546f0ac4),
+     TOBN(0x427965f2, 0x247f9d57), TOBN(0xb519b636, 0xb0035f48),
+     TOBN(0x6b6163a9, 0xab87d59c), TOBN(0xff9f58c3, 0x39caaa11),
+     TOBN(0x4ac39cde, 0x3177387b), TOBN(0x5f6557c2, 0x873e77f9),
+     TOBN(0x67504006, 0x36a83041), TOBN(0x9b1c96ca, 0x75ef196c),
+     TOBN(0xf34283de, 0xb08c7940), TOBN(0x7ea09644, 0x1128c316),
+     TOBN(0xb510b3b5, 0x6aa39dff), TOBN(0x59b43da2, 0x9f8e4d8c),
+     TOBN(0xa8ce31fd, 0x9e4c4b9f), TOBN(0x0e20be26, 0xc1303c01),
+     TOBN(0x18187182, 0xe8ee47c9), TOBN(0xd9687cdb, 0x7db98101),
+     TOBN(0x7a520e4d, 0xa1e14ff6), TOBN(0x429808ba, 0x8836d572),
+     TOBN(0xa37ca60d, 0x4944b663), TOBN(0xf901f7a9, 0xa3f91ae5),
+     TOBN(0xe4e3e76e, 0x9e36e3b1), TOBN(0x9aa219cf, 0x29d93250),
+     TOBN(0x347fe275, 0x056a2512), TOBN(0xa4d643d9, 0xde65d95c),
+     TOBN(0x9669d396, 0x699fc3ed), TOBN(0xb598dee2, 0xcf8c6bbe),
+     TOBN(0x682ac1e5, 0xdda9e5c6), TOBN(0x4e0d3c72, 0xcaa9fc95),
+     TOBN(0x17faaade, 0x772bea44), TOBN(0x5ef8428c, 0xab0009c8),
+     TOBN(0xcc4ce47a, 0x460ff016), TOBN(0xda6d12bf, 0x725281cb),
+     TOBN(0x44c67848, 0x0223aad2), TOBN(0x6e342afa, 0x36256e28),
+     TOBN(0x1400bb0b, 0x93a37c04), TOBN(0x62b1bc9b, 0xdd10bd96),
+     TOBN(0x7251adeb, 0x0dac46b7), TOBN(0x7d33b92e, 0x7be4ef51),
+     TOBN(0x28b2a94b, 0xe61fa29a), TOBN(0x4b2be13f, 0x06422233),
+     TOBN(0x36d6d062, 0x330d8d37), TOBN(0x5ef80e1e, 0xb28ca005),
+     TOBN(0x174d4699, 0x6d16768e), TOBN(0x9fc4ff6a, 0x628bf217),
+     TOBN(0x77705a94, 0x154e490d), TOBN(0x9d96dd28, 0x8d2d997a),
+     TOBN(0x77e2d9d8, 0xce5d72c4), TOBN(0x9d06c5a4, 0xc11c714f),
+     TOBN(0x02aa5136, 0x79e4a03e), TOBN(0x1386b3c2, 0x030ff28b),
+     TOBN(0xfe82e8a6, 0xfb283f61), TOBN(0x7df203e5, 0xf3abc3fb),
+     TOBN(0xeec7c351, 0x3a4d3622), TOBN(0xf7d17dbf, 0xdf762761),
+     TOBN(0xc3956e44, 0x522055f0), TOBN(0xde3012db, 0x8fa748db),
+     TOBN(0xca9fcb63, 0xbf1dcc14), TOBN(0xa56d9dcf, 0xbe4e2f3a),
+     TOBN(0xb86186b6, 0x8bcec9c2), TOBN(0x7cf24df9, 0x680b9f06),
+     TOBN(0xc46b45ea, 0xc0d29281), TOBN(0xfff42bc5, 0x07b10e12),
+     TOBN(0x12263c40, 0x4d289427), TOBN(0x3d5f1899, 0xb4848ec4),
+     TOBN(0x11f97010, 0xd040800c), TOBN(0xb4c5f529, 0x300feb20),
+     TOBN(0xcc543f8f, 0xde94fdcb), TOBN(0xe96af739, 0xc7c2f05e),
+     TOBN(0xaa5e0036, 0x882692e1), TOBN(0x09c75b68, 0x950d4ae9),
+     TOBN(0x62f63df2, 0xb5932a7a), TOBN(0x2658252e, 0xde0979ad),
+     TOBN(0x2a19343f, 0xb5e69631), TOBN(0x718c7501, 0x525b666b),
+     TOBN(0x26a42d69, 0xea40dc3a), TOBN(0xdc84ad22, 0xaecc018f),
+     TOBN(0x25c36c7b, 0x3270f04a), TOBN(0x46ba6d47, 0x50fa72ed),
+     TOBN(0x6c37d1c5, 0x93e58a8e), TOBN(0xa2394731, 0x120c088c),
+     TOBN(0xc3be4263, 0xcb6e86da), TOBN(0x2c417d36, 0x7126d038),
+     TOBN(0x5b70f9c5, 0x8b6f8efa), TOBN(0x671a2faa, 0x37718536),
+     TOBN(0xd3ced3c6, 0xb539c92b), TOBN(0xe56f1bd9, 0xa31203c2),
+     TOBN(0x8b096ec4, 0x9ff3c8eb), TOBN(0x2deae432, 0x43491cea),
+     TOBN(0x2465c6eb, 0x17943794), TOBN(0x5d267e66, 0x20586843),
+     TOBN(0x9d3d116d, 0xb07159d0), TOBN(0xae07a67f, 0xc1896210),
+     TOBN(0x8fc84d87, 0xbb961579), TOBN(0x30009e49, 0x1c1f8dd6),
+     TOBN(0x8a8caf22, 0xe3132819), TOBN(0xcffa197c, 0xf23ab4ff),
+     TOBN(0x58103a44, 0x205dd687), TOBN(0x57b796c3, 0x0ded67a2),
+     TOBN(0x0b9c3a6c, 0xa1779ad7), TOBN(0xa33cfe2e, 0x357c09c5),
+     TOBN(0x2ea29315, 0x3db4a57e), TOBN(0x91959695, 0x8ebeb52e),
+     TOBN(0x118db9a6, 0xe546c879), TOBN(0x8e996df4, 0x6295c8d6),
+     TOBN(0xdd990484, 0x55ec806b), TOBN(0x24f291ca, 0x165c1035),
+     TOBN(0xcca523bb, 0x440e2229), TOBN(0x324673a2, 0x73ef4d04),
+     TOBN(0xaf3adf34, 0x3e11ec39), TOBN(0x6136d7f1, 0xdc5968d3),
+     TOBN(0x7a7b2899, 0xb053a927), TOBN(0x3eaa2661, 0xae067ecd),
+     TOBN(0x8549b9c8, 0x02779cd9), TOBN(0x061d7940, 0xc53385ea),
+     TOBN(0x3e0ba883, 0xf06d18bd), TOBN(0x4ba6de53, 0xb2700843),
+     TOBN(0xb966b668, 0x591a9e4d), TOBN(0x93f67567, 0x7f4fa0ed),
+     TOBN(0x5a02711b, 0x4347237b), TOBN(0xbc041e2f, 0xe794608e),
+     TOBN(0x55af10f5, 0x70f73d8c), TOBN(0xd2d4d4f7, 0xbb7564f7),
+     TOBN(0xd7d27a89, 0xb3e93ce7), TOBN(0xf7b5a875, 0x5d3a2c1b),
+     TOBN(0xb29e68a0, 0x255b218a), TOBN(0xb533837e, 0x8af76754),
+     TOBN(0xd1b05a73, 0x579fab2e), TOBN(0xb41055a1, 0xecd74385),
+     TOBN(0xb2369274, 0x445e9115), TOBN(0x2972a7c4, 0xf520274e),
+     TOBN(0x6c08334e, 0xf678e68a), TOBN(0x4e4160f0, 0x99b057ed),
+     TOBN(0x3cfe11b8, 0x52ccb69a), TOBN(0x2fd1823a, 0x21c8f772),
+     TOBN(0xdf7f072f, 0x3298f055), TOBN(0x8c0566f9, 0xfec74a6e),
+     TOBN(0xe549e019, 0x5bb4d041), TOBN(0x7c3930ba, 0x9208d850),
+     TOBN(0xe07141fc, 0xaaa2902b), TOBN(0x539ad799, 0xe4f69ad3),
+     TOBN(0xa6453f94, 0x813f9ffd), TOBN(0xc58d3c48, 0x375bc2f7),
+     TOBN(0xb3326fad, 0x5dc64e96), TOBN(0x3aafcaa9, 0xb240e354),
+     TOBN(0x1d1b0903, 0xaca1e7a9), TOBN(0x4ceb9767, 0x1211b8a0),
+     TOBN(0xeca83e49, 0xe32a858e), TOBN(0x4c32892e, 0xae907bad),
+     TOBN(0xd5b42ab6, 0x2eb9b494), TOBN(0x7fde3ee2, 0x1eabae1b),
+     TOBN(0x13b5ab09, 0xcaf54957), TOBN(0xbfb028be, 0xe5f5d5d5),
+     TOBN(0x928a0650, 0x2003e2c0), TOBN(0x90793aac, 0x67476843),
+     TOBN(0x5e942e79, 0xc81710a0), TOBN(0x557e4a36, 0x27ccadd4),
+     TOBN(0x72a2bc56, 0x4bcf6d0c), TOBN(0x09ee5f43, 0x26d7b80c),
+     TOBN(0x6b70dbe9, 0xd4292f19), TOBN(0x56f74c26, 0x63f16b18),
+     TOBN(0xc23db0f7, 0x35fbb42a), TOBN(0xb606bdf6, 0x6ae10040),
+     TOBN(0x1eb15d4d, 0x044573ac), TOBN(0x7dc3cf86, 0x556b0ba4),
+     TOBN(0x97af9a33, 0xc60df6f7), TOBN(0x0b1ef85c, 0xa716ce8c),
+     TOBN(0x2922f884, 0xc96958be), TOBN(0x7c32fa94, 0x35690963),
+     TOBN(0x2d7f667c, 0xeaa00061), TOBN(0xeaaf7c17, 0x3547365c),
+     TOBN(0x1eb4de46, 0x87032d58), TOBN(0xc54f3d83, 0x5e2c79e0),
+     TOBN(0x07818df4, 0x5d04ef23), TOBN(0x55faa9c8, 0x673d41b4),
+     TOBN(0xced64f6f, 0x89b95355), TOBN(0x4860d2ea, 0xb7415c84),
+     TOBN(0x5fdb9bd2, 0x050ebad3), TOBN(0xdb53e0cc, 0x6685a5bf),
+     TOBN(0xb830c031, 0x9feb6593), TOBN(0xdd87f310, 0x6accff17),
+     TOBN(0x2303ebab, 0x9f555c10), TOBN(0x94603695, 0x287e7065),
+     TOBN(0xf88311c3, 0x2e83358c), TOBN(0x508dd9b4, 0xeefb0178),
+     TOBN(0x7ca23706, 0x2dba8652), TOBN(0x62aac5a3, 0x0047abe5),
+     TOBN(0x9a61d2a0, 0x8b1ea7b3), TOBN(0xd495ab63, 0xae8b1485),
+     TOBN(0x38740f84, 0x87052f99), TOBN(0x178ebe5b, 0xb2974eea),
+     TOBN(0x030bbcca, 0x5b36d17f), TOBN(0xb5e4cce3, 0xaaf86eea),
+     TOBN(0xb51a0220, 0x68f8e9e0), TOBN(0xa4348796, 0x09eb3e75),
+     TOBN(0xbe592309, 0xeef1a752), TOBN(0x5d7162d7, 0x6f2aa1ed),
+     TOBN(0xaebfb5ed, 0x0f007dd2), TOBN(0x255e14b2, 0xc89edd22),
+     TOBN(0xba85e072, 0x0303b697), TOBN(0xc5d17e25, 0xf05720ff),
+     TOBN(0x02b58d6e, 0x5128ebb6), TOBN(0x2c80242d, 0xd754e113),
+     TOBN(0x919fca5f, 0xabfae1ca), TOBN(0x937afaac, 0x1a21459b),
+     TOBN(0x9e0ca91c, 0x1f66a4d2), TOBN(0x194cc7f3, 0x23ec1331),
+     TOBN(0xad25143a, 0x8aa11690), TOBN(0xbe40ad8d, 0x09b59e08),
+     TOBN(0x37d60d9b, 0xe750860a), TOBN(0x6c53b008, 0xc6bf434c),
+     TOBN(0xb572415d, 0x1356eb80), TOBN(0xb8bf9da3, 0x9578ded8),
+     TOBN(0x22658e36, 0x5e8fb38b), TOBN(0x9b70ce22, 0x5af8cb22),
+     TOBN(0x7c00018a, 0x829a8180), TOBN(0x84329f93, 0xb81ed295),
+     TOBN(0x7c343ea2, 0x5f3cea83), TOBN(0x38f8655f, 0x67586536),
+     TOBN(0xa661a0d0, 0x1d3ec517), TOBN(0x98744652, 0x512321ae),
+     TOBN(0x084ca591, 0xeca92598), TOBN(0xa9bb9dc9, 0x1dcb3feb),
+     TOBN(0x14c54355, 0x78b4c240), TOBN(0x5ed62a3b, 0x610cafdc),
+     TOBN(0x07512f37, 0x1b38846b), TOBN(0x571bb70a, 0xb0e38161),
+     TOBN(0xb556b95b, 0x2da705d2), TOBN(0x3ef8ada6, 0xb1a08f98),
+     TOBN(0x85302ca7, 0xddecfbe5), TOBN(0x0e530573, 0x943105cd),
+     TOBN(0x60554d55, 0x21a9255d), TOBN(0x63a32fa1, 0xf2f3802a),
+     TOBN(0x35c8c5b0, 0xcd477875), TOBN(0x97f458ea, 0x6ad42da1),
+     TOBN(0x832d7080, 0xeb6b242d), TOBN(0xd30bd023, 0x3b71e246),
+     TOBN(0x7027991b, 0xbe31139d), TOBN(0x68797e91, 0x462e4e53),
+     TOBN(0x423fe20a, 0x6b4e185a), TOBN(0x82f2c67e, 0x42d9b707),
+     TOBN(0x25c81768, 0x4cf7811b), TOBN(0xbd53005e, 0x045bb95d),}
+    ,
+    {TOBN(0xe5f649be, 0x9d8e68fd), TOBN(0xdb0f0533, 0x1b044320),
+     TOBN(0xf6fde9b3, 0xe0c33398), TOBN(0x92f4209b, 0x66c8cfae),
+     TOBN(0xe9d1afcc, 0x1a739d4b), TOBN(0x09aea75f, 0xa28ab8de),
+     TOBN(0x14375fb5, 0xeac6f1d0), TOBN(0x6420b560, 0x708f7aa5),
+     TOBN(0x9eae499c, 0x6254dc41), TOBN(0x7e293924, 0x7a837e7e),
+     TOBN(0x74aec08c, 0x090524a7), TOBN(0xf82b9219, 0x8d6f55f2),
+     TOBN(0x493c962e, 0x1402cec5), TOBN(0x9f17ca17, 0xfa2f30e7),
+     TOBN(0xbcd783e8, 0xe9b879cb), TOBN(0xea3d8c14, 0x5a6f145f),
+     TOBN(0xdede15e7, 0x5e0dee6e), TOBN(0x74f24872, 0xdc628aa2),
+     TOBN(0xd3e9c4fe, 0x7861bb93), TOBN(0x56d4822a, 0x6187b2e0),
+     TOBN(0xb66417cf, 0xc59826f9), TOBN(0xca260969, 0x2408169e),
+     TOBN(0xedf69d06, 0xc79ef885), TOBN(0x00031f8a, 0xdc7d138f),
+     TOBN(0x103c46e6, 0x0ebcf726), TOBN(0x4482b831, 0x6231470e),
+     TOBN(0x6f6dfaca, 0x487c2109), TOBN(0x2e0ace97, 0x62e666ef),
+     TOBN(0x3246a9d3, 0x1f8d1f42), TOBN(0x1b1e83f1, 0x574944d2),
+     TOBN(0x13dfa63a, 0xa57f334b), TOBN(0x0cf8daed, 0x9f025d81),
+     TOBN(0x30d78ea8, 0x00ee11c1), TOBN(0xeb053cd4, 0xb5e3dd75),
+     TOBN(0x9b65b13e, 0xd58c43c5), TOBN(0xc3ad49bd, 0xbd151663),
+     TOBN(0x99fd8e41, 0xb6427990), TOBN(0x12cf15bd, 0x707eae1e),
+     TOBN(0x29ad4f1b, 0x1aabb71e), TOBN(0x5143e74d, 0x07545d0e),
+     TOBN(0x30266336, 0xc88bdee1), TOBN(0x25f29306, 0x5876767c),
+     TOBN(0x9c078571, 0xc6731996), TOBN(0xc88690b2, 0xed552951),
+     TOBN(0x274f2c2d, 0x852705b4), TOBN(0xb0bf8d44, 0x4e09552d),
+     TOBN(0x7628beeb, 0x986575d1), TOBN(0x407be238, 0x7f864651),
+     TOBN(0x0e5e3049, 0xa639fc6b), TOBN(0xe75c35d9, 0x86003625),
+     TOBN(0x0cf35bd8, 0x5dcc1646), TOBN(0x8bcaced2, 0x6c26273a),
+     TOBN(0xe22ecf1d, 0xb5536742), TOBN(0x013dd897, 0x1a9e068b),
+     TOBN(0x17f411cb, 0x8a7909c5), TOBN(0x5757ac98, 0x861dd506),
+     TOBN(0x85de1f0d, 0x1e935abb), TOBN(0xdefd10b4, 0x154de37a),
+     TOBN(0xb8d9e392, 0x369cebb5), TOBN(0x54d5ef9b, 0x761324be),
+     TOBN(0x4d6341ba, 0x74f17e26), TOBN(0xc0a0e3c8, 0x78c1dde4),
+     TOBN(0xa6d77581, 0x87d918fd), TOBN(0x66876015, 0x02ca3a13),
+     TOBN(0xc7313e9c, 0xf36658f0), TOBN(0xc433ef1c, 0x71f8057e),
+     TOBN(0x85326246, 0x1b6a835a), TOBN(0xc8f05398, 0x7c86394c),
+     TOBN(0xff398cdf, 0xe983c4a1), TOBN(0xbf5e8162, 0x03b7b931),
+     TOBN(0x93193c46, 0xb7b9045b), TOBN(0x1e4ebf5d, 0xa4a6e46b),
+     TOBN(0xf9942a60, 0x43a24fe7), TOBN(0x29c1191e, 0xffb3492b),
+     TOBN(0x9f662449, 0x902fde05), TOBN(0xc792a7ac, 0x6713c32d),
+     TOBN(0x2fd88ad8, 0xb737982c), TOBN(0x7e3a0319, 0xa21e60e3),
+     TOBN(0x09b0de44, 0x7383591a), TOBN(0x6df141ee, 0x8310a456),
+     TOBN(0xaec1a039, 0xe6d6f471), TOBN(0x14b2ba0f, 0x1198d12e),
+     TOBN(0xebc1a160, 0x3aeee5ac), TOBN(0x401f4836, 0xe0b964ce),
+     TOBN(0x2ee43796, 0x4fd03f66), TOBN(0x3fdb4e49, 0xdd8f3f12),
+     TOBN(0x6ef267f6, 0x29380f18), TOBN(0x3e8e9670, 0x8da64d16),
+     TOBN(0xbc19180c, 0x207674f1), TOBN(0x112e09a7, 0x33ae8fdb),
+     TOBN(0x99667554, 0x6aaeb71e), TOBN(0x79432af1, 0xe101b1c7),
+     TOBN(0xd5eb558f, 0xde2ddec6), TOBN(0x81392d1f, 0x5357753f),
+     TOBN(0xa7a76b97, 0x3ae1158a), TOBN(0x416fbbff, 0x4a899991),
+     TOBN(0x9e65fdfd, 0x0d4a9dcf), TOBN(0x7bc29e48, 0x944ddf12),
+     TOBN(0xbc1a92d9, 0x3c856866), TOBN(0x273c6905, 0x6e98dfe2),
+     TOBN(0x69fce418, 0xcdfaa6b8), TOBN(0x606bd823, 0x5061c69f),
+     TOBN(0x42d495a0, 0x6af75e27), TOBN(0x8ed3d505, 0x6d873a1f),
+     TOBN(0xaf552841, 0x6ab25b6a), TOBN(0xc6c0ffc7, 0x2b1a4523),
+     TOBN(0xab18827b, 0x21c99e03), TOBN(0x060e8648, 0x9034691b),
+     TOBN(0x5207f90f, 0x93c7f398), TOBN(0x9f4a96cb, 0x82f8d10b),
+     TOBN(0xdd71cd79, 0x3ad0f9e3), TOBN(0x84f435d2, 0xfc3a54f5),
+     TOBN(0x4b03c55b, 0x8e33787f), TOBN(0xef42f975, 0xa6384673),
+     TOBN(0xff7304f7, 0x5051b9f0), TOBN(0x18aca1dc, 0x741c87c2),
+     TOBN(0x56f120a7, 0x2d4bfe80), TOBN(0xfd823b3d, 0x053e732c),
+     TOBN(0x11bccfe4, 0x7537ca16), TOBN(0xdf6c9c74, 0x1b5a996b),
+     TOBN(0xee7332c7, 0x904fc3fa), TOBN(0x14a23f45, 0xc7e3636a),
+     TOBN(0xc38659c3, 0xf091d9aa), TOBN(0x4a995e5d, 0xb12d8540),
+     TOBN(0x20a53bec, 0xf3a5598a), TOBN(0x56534b17, 0xb1eaa995),
+     TOBN(0x9ed3dca4, 0xbf04e03c), TOBN(0x716c563a, 0xd8d56268),
+     TOBN(0x27ba77a4, 0x1d6178e7), TOBN(0xe4c80c40, 0x68a1ff8e),
+     TOBN(0x75011099, 0x0a13f63d), TOBN(0x7bf33521, 0xa61d46f3),
+     TOBN(0x0aff218e, 0x10b365bb), TOBN(0x81021804, 0x0fd7ea75),
+     TOBN(0x05a3fd8a, 0xa4b3a925), TOBN(0xb829e75f, 0x9b3db4e6),
+     TOBN(0x6bdc75a5, 0x4d53e5fb), TOBN(0x04a5dc02, 0xd52717e3),
+     TOBN(0x86af502f, 0xe9a42ec2), TOBN(0x8867e8fb, 0x2630e382),
+     TOBN(0xbf845c6e, 0xbec9889b), TOBN(0x54f491f2, 0xcb47c98d),
+     TOBN(0xa3091fba, 0x790c2a12), TOBN(0xd7f6fd78, 0xc20f708b),
+     TOBN(0xa569ac30, 0xacde5e17), TOBN(0xd0f996d0, 0x6852b4d7),
+     TOBN(0xe51d4bb5, 0x4609ae54), TOBN(0x3fa37d17, 0x0daed061),
+     TOBN(0x62a88684, 0x34b8fb41), TOBN(0x99a2acbd, 0x9efb64f1),
+     TOBN(0xb75c1a5e, 0x6448e1f2), TOBN(0xfa99951a, 0x42b5a069),
+     TOBN(0x6d956e89, 0x2f3b26e7), TOBN(0xf4709860, 0xda875247),
+     TOBN(0x3ad15179, 0x2482dda3), TOBN(0xd64110e3, 0x017d82f0),
+     TOBN(0x14928d2c, 0xfad414e4), TOBN(0x2b155f58, 0x2ed02b24),
+     TOBN(0x481a141b, 0xcb821bf1), TOBN(0x12e3c770, 0x4f81f5da),
+     TOBN(0xe49c5de5, 0x9fff8381), TOBN(0x11053232, 0x5bbec894),
+     TOBN(0xa0d051cc, 0x454d88c4), TOBN(0x4f6db89c, 0x1f8e531b),
+     TOBN(0x34fe3fd6, 0xca563a44), TOBN(0x7f5c2215, 0x58da8ab9),
+     TOBN(0x8445016d, 0x9474f0a1), TOBN(0x17d34d61, 0xcb7d8a0a),
+     TOBN(0x8e9d3910, 0x1c474019), TOBN(0xcaff2629, 0xd52ceefb),
+     TOBN(0xf9cf3e32, 0xc1622c2b), TOBN(0xd4b95e3c, 0xe9071a05),
+     TOBN(0xfbbca61f, 0x1594438c), TOBN(0x1eb6e6a6, 0x04aadedf),
+     TOBN(0x853027f4, 0x68e14940), TOBN(0x221d322a, 0xdfabda9c),
+     TOBN(0xed8ea9f6, 0xb7cb179a), TOBN(0xdc7b764d, 0xb7934dcc),
+     TOBN(0xfcb13940, 0x5e09180d), TOBN(0x6629a6bf, 0xb47dc2dd),
+     TOBN(0xbfc55e4e, 0x9f5a915e), TOBN(0xb1db9d37, 0x6204441e),
+     TOBN(0xf82d68cf, 0x930c5f53), TOBN(0x17d3a142, 0xcbb605b1),
+     TOBN(0xdd5944ea, 0x308780f2), TOBN(0xdc8de761, 0x3845f5e4),
+     TOBN(0x6beaba7d, 0x7624d7a3), TOBN(0x1e709afd, 0x304df11e),
+     TOBN(0x95364376, 0x02170456), TOBN(0xbf204b3a, 0xc8f94b64),
+     TOBN(0x4e53af7c, 0x5680ca68), TOBN(0x0526074a, 0xe0c67574),
+     TOBN(0x95d8cef8, 0xecd92af6), TOBN(0xe6b9fa7a, 0x6cd1745a),
+     TOBN(0x3d546d3d, 0xa325c3e4), TOBN(0x1f57691d, 0x9ae93aae),
+     TOBN(0xe891f3fe, 0x9d2e1a33), TOBN(0xd430093f, 0xac063d35),
+     TOBN(0xeda59b12, 0x5513a327), TOBN(0xdc2134f3, 0x5536f18f),
+     TOBN(0xaa51fe2c, 0x5c210286), TOBN(0x3f68aaee, 0x1cab658c),
+     TOBN(0x5a23a00b, 0xf9357292), TOBN(0x9a626f39, 0x7efdabed),
+     TOBN(0xfe2b3bf3, 0x199d78e3), TOBN(0xb7a2af77, 0x71bbc345),
+     TOBN(0x3d19827a, 0x1e59802c), TOBN(0x823bbc15, 0xb487a51c),
+     TOBN(0x856139f2, 0x99d0a422), TOBN(0x9ac3df65, 0xf456c6fb),
+     TOBN(0xaddf65c6, 0x701f8bd6), TOBN(0x149f321e, 0x3758df87),
+     TOBN(0xb1ecf714, 0x721b7eba), TOBN(0xe17df098, 0x31a3312a),
+     TOBN(0xdb2fd6ec, 0xd5c4d581), TOBN(0xfd02996f, 0x8fcea1b3),
+     TOBN(0xe29fa63e, 0x7882f14f), TOBN(0xc9f6dc35, 0x07c6cadc),
+     TOBN(0x46f22d6f, 0xb882bed0), TOBN(0x1a45755b, 0xd118e52c),
+     TOBN(0x9f2c7c27, 0x7c4608cf), TOBN(0x7ccbdf32, 0x568012c2),
+     TOBN(0xfcb0aedd, 0x61729b0e), TOBN(0x7ca2ca9e, 0xf7d75dbf),
+     TOBN(0xf58fecb1, 0x6f640f62), TOBN(0xe274b92b, 0x39f51946),
+     TOBN(0x7f4dfc04, 0x6288af44), TOBN(0x0a91f32a, 0xeac329e5),
+     TOBN(0x43ad274b, 0xd6aaba31), TOBN(0x719a1640, 0x0f6884f9),
+     TOBN(0x685d29f6, 0xdaf91e20), TOBN(0x5ec1cc33, 0x27e49d52),
+     TOBN(0x38f4de96, 0x3b54a059), TOBN(0x0e0015e5, 0xefbcfdb3),
+     TOBN(0x177d23d9, 0x4dbb8da6), TOBN(0x98724aa2, 0x97a617ad),
+     TOBN(0x30f0885b, 0xfdb6558e), TOBN(0xf9f7a28a, 0xc7899a96),
+     TOBN(0xd2ae8ac8, 0x872dc112), TOBN(0xfa0642ca, 0x73c3c459),
+     TOBN(0x15296981, 0xe7dfc8d6), TOBN(0x67cd4450, 0x1fb5b94a),
+     TOBN(0x0ec71cf1, 0x0eddfd37), TOBN(0xc7e5eeb3, 0x9a8eddc7),
+     TOBN(0x02ac8e3d, 0x81d95028), TOBN(0x0088f172, 0x70b0e35d),
+     TOBN(0xec041fab, 0xe1881fe3), TOBN(0x62cf71b8, 0xd99e7faa),
+     TOBN(0x5043dea7, 0xe0f222c2), TOBN(0x309d42ac, 0x72e65142),
+     TOBN(0x94fe9ddd, 0x9216cd30), TOBN(0xd6539c7d, 0x0f87feec),
+     TOBN(0x03c5a57c, 0x432ac7d7), TOBN(0x72692cf0, 0x327fda10),
+     TOBN(0xec28c85f, 0x280698de), TOBN(0x2331fb46, 0x7ec283b1),
+     TOBN(0xd34bfa32, 0x2867e633), TOBN(0x78709a82, 0x0a9cc815),
+     TOBN(0xb7fe6964, 0x875e2fa5), TOBN(0x25cc064f, 0x9e98bfb5),
+     TOBN(0x9eb0151c, 0x493a65c5), TOBN(0x5fb5d941, 0x53182464),
+     TOBN(0x69e6f130, 0xf04618e2), TOBN(0xa8ecec22, 0xf89c8ab6),
+     TOBN(0xcd6ac88b, 0xb96209bd), TOBN(0x65fa8cdb, 0xb3e1c9e0),
+     TOBN(0xa47d22f5, 0x4a8d8eac), TOBN(0x83895cdf, 0x8d33f963),
+     TOBN(0xa8adca59, 0xb56cd3d1), TOBN(0x10c8350b, 0xdaf38232),
+     TOBN(0x2b161fb3, 0xa5080a9f), TOBN(0xbe7f5c64, 0x3af65b3a),
+     TOBN(0x2c754039, 0x97403a11), TOBN(0x94626cf7, 0x121b96af),
+     TOBN(0x431de7c4, 0x6a983ec2), TOBN(0x3780dd3a, 0x52cc3df7),
+     TOBN(0xe28a0e46, 0x2baf8e3b), TOBN(0xabe68aad, 0x51d299ae),
+     TOBN(0x603eb8f9, 0x647a2408), TOBN(0x14c61ed6, 0x5c750981),
+     TOBN(0x88b34414, 0xc53352e7), TOBN(0x5a34889c, 0x1337d46e),
+     TOBN(0x612c1560, 0xf95f2bc8), TOBN(0x8a3f8441, 0xd4807a3a),
+     TOBN(0x680d9e97, 0x5224da68), TOBN(0x60cd6e88, 0xc3eb00e9),
+     TOBN(0x3875a98e, 0x9a6bc375), TOBN(0xdc80f924, 0x4fd554c2),
+     TOBN(0x6c4b3415, 0x6ac77407), TOBN(0xa1e5ea8f, 0x25420681),
+     TOBN(0x541bfa14, 0x4607a458), TOBN(0x5dbc7e7a, 0x96d7fbf9),
+     TOBN(0x646a851b, 0x31590a47), TOBN(0x039e85ba, 0x15ee6df8),
+     TOBN(0xd19fa231, 0xd7b43fc0), TOBN(0x84bc8be8, 0x299a0e04),
+     TOBN(0x2b9d2936, 0xf20df03a), TOBN(0x24054382, 0x8608d472),
+     TOBN(0x76b6ba04, 0x9149202a), TOBN(0xb21c3831, 0x3670e7b7),
+     TOBN(0xddd93059, 0xd6fdee10), TOBN(0x9da47ad3, 0x78488e71),
+     TOBN(0x99cc1dfd, 0xa0fcfb25), TOBN(0x42abde10, 0x64696954),
+     TOBN(0x14cc15fc, 0x17eab9fe), TOBN(0xd6e863e4, 0xd3e70972),
+     TOBN(0x29a7765c, 0x6432112c), TOBN(0x88660001, 0x5b0774d8),
+     TOBN(0x3729175a, 0x2c088eae), TOBN(0x13afbcae, 0x8230b8d4),
+     TOBN(0x44768151, 0x915f4379), TOBN(0xf086431a, 0xd8d22812),
+     TOBN(0x37461955, 0xc298b974), TOBN(0x905fb5f0, 0xf8711e04),
+     TOBN(0x787abf3a, 0xfe969d18), TOBN(0x392167c2, 0x6f6a494e),
+     TOBN(0xfc7a0d2d, 0x28c511da), TOBN(0xf127c7dc, 0xb66a262d),
+     TOBN(0xf9c4bb95, 0xfd63fdf0), TOBN(0x90016589, 0x3913ef46),
+     TOBN(0x74d2a73c, 0x11aa600d), TOBN(0x2f5379bd, 0x9fb5ab52),
+     TOBN(0xe49e53a4, 0x7fb70068), TOBN(0x68dd39e5, 0x404aa9a7),
+     TOBN(0xb9b0cf57, 0x2ecaa9c3), TOBN(0xba0e103b, 0xe824826b),
+     TOBN(0x60c2198b, 0x4631a3c4), TOBN(0xc5ff84ab, 0xfa8966a2),
+     TOBN(0x2d6ebe22, 0xac95aff8), TOBN(0x1c9bb6db, 0xb5a46d09),
+     TOBN(0x419062da, 0x53ee4f8d), TOBN(0x7b9042d0, 0xbb97efef),
+     TOBN(0x0f87f080, 0x830cf6bd), TOBN(0x4861d19a, 0x6ec8a6c6),
+     TOBN(0xd3a0daa1, 0x202f01aa), TOBN(0xb0111674, 0xf25afbd5),
+     TOBN(0x6d00d6cf, 0x1afb20d9), TOBN(0x13695000, 0x40671bc5),
+     TOBN(0x913ab0dc, 0x2485ea9b), TOBN(0x1f2bed06, 0x9eef61ac),
+     TOBN(0x850c8217, 0x6d799e20), TOBN(0x93415f37, 0x3271c2de),
+     TOBN(0x5afb06e9, 0x6c4f5910), TOBN(0x688a52df, 0xc4e9e421),
+     TOBN(0x30495ba3, 0xe2a9a6db), TOBN(0x4601303d, 0x58f9268b),
+     TOBN(0xbe3b0dad, 0x7eb0f04f), TOBN(0x4ea47250, 0x4456936d),
+     TOBN(0x8caf8798, 0xd33fd3e7), TOBN(0x1ccd8a89, 0xeb433708),
+     TOBN(0x9effe3e8, 0x87fd50ad), TOBN(0xbe240a56, 0x6b29c4df),
+     TOBN(0xec4ffd98, 0xca0e7ebd), TOBN(0xf586783a, 0xe748616e),
+     TOBN(0xa5b00d8f, 0xc77baa99), TOBN(0x0acada29, 0xb4f34c9c),
+     TOBN(0x36dad67d, 0x0fe723ac), TOBN(0x1d8e53a5, 0x39c36c1e),
+     TOBN(0xe4dd342d, 0x1f4bea41), TOBN(0x64fd5e35, 0xebc9e4e0),
+     TOBN(0x96f01f90, 0x57908805), TOBN(0xb5b9ea3d, 0x5ed480dd),
+     TOBN(0x366c5dc2, 0x3efd2dd0), TOBN(0xed2fe305, 0x6e9dfa27),
+     TOBN(0x4575e892, 0x6e9197e2), TOBN(0x11719c09, 0xab502a5d),
+     TOBN(0x264c7bec, 0xe81f213f), TOBN(0x741b9241, 0x55f5c457),
+     TOBN(0x78ac7b68, 0x49a5f4f4), TOBN(0xf91d70a2, 0x9fc45b7d),
+     TOBN(0x39b05544, 0xb0f5f355), TOBN(0x11f06bce, 0xeef930d9),
+     TOBN(0xdb84d25d, 0x038d05e1), TOBN(0x04838ee5, 0xbacc1d51),
+     TOBN(0x9da3ce86, 0x9e8ee00b), TOBN(0xc3412057, 0xc36eda1f),
+     TOBN(0xae80b913, 0x64d9c2f4), TOBN(0x7468bac3, 0xa010a8ff),
+     TOBN(0xdfd20037, 0x37359d41), TOBN(0x1a0f5ab8, 0x15efeacc),
+     TOBN(0x7c25ad2f, 0x659d0ce0), TOBN(0x4011bcbb, 0x6785cff1),
+     TOBN(0x128b9912, 0x7e2192c7), TOBN(0xa549d8e1, 0x13ccb0e8),
+     TOBN(0x805588d8, 0xc85438b1), TOBN(0x5680332d, 0xbc25cb27),
+     TOBN(0xdcd1bc96, 0x1a4bfdf4), TOBN(0x779ff428, 0x706f6566),
+     TOBN(0x8bbee998, 0xf059987a), TOBN(0xf6ce8cf2, 0xcc686de7),
+     TOBN(0xf8ad3c4a, 0x953cfdb2), TOBN(0xd1d426d9, 0x2205da36),
+     TOBN(0xb3c0f13f, 0xc781a241), TOBN(0x3e89360e, 0xd75362a8),
+     TOBN(0xccd05863, 0xc8a91184), TOBN(0x9bd0c9b7, 0xefa8a7f4),
+     TOBN(0x97ee4d53, 0x8a912a4b), TOBN(0xde5e15f8, 0xbcf518fd),
+     TOBN(0x6a055bf8, 0xc467e1e0), TOBN(0x10be4b4b, 0x1587e256),
+     TOBN(0xd90c14f2, 0x668621c9), TOBN(0xd5518f51, 0xab9c92c1),
+     TOBN(0x8e6a0100, 0xd6d47b3c), TOBN(0xcbe980dd, 0x66716175),
+     TOBN(0x500d3f10, 0xddd83683), TOBN(0x3b6cb35d, 0x99cac73c),
+     TOBN(0x53730c8b, 0x6083d550), TOBN(0xcf159767, 0xdf0a1987),
+     TOBN(0x84bfcf53, 0x43ad73b3), TOBN(0x1b528c20, 0x4f035a94),
+     TOBN(0x4294edf7, 0x33eeac69), TOBN(0xb6283e83, 0x817f3240),
+     TOBN(0xc3fdc959, 0x0a5f25b1), TOBN(0xefaf8aa5, 0x5844ee22),
+     TOBN(0xde269ba5, 0xdbdde4de), TOBN(0xe3347160, 0xc56133bf),
+     TOBN(0xc1184219, 0x8d9ea9f8), TOBN(0x090de5db, 0xf3fc1ab5),
+     TOBN(0x404c37b1, 0x0bf22cda), TOBN(0x7de20ec8, 0xf5618894),
+     TOBN(0x754c588e, 0xecdaecab), TOBN(0x6ca4b0ed, 0x88342743),
+     TOBN(0x76f08bdd, 0xf4a938ec), TOBN(0xd182de89, 0x91493ccb),
+     TOBN(0xd652c53e, 0xc8a4186a), TOBN(0xb3e878db, 0x946d8e33),
+     TOBN(0x088453c0, 0x5f37663c), TOBN(0x5cd9daaa, 0xb407748b),
+     TOBN(0xa1f5197f, 0x586d5e72), TOBN(0x47500be8, 0xc443ca59),
+     TOBN(0x78ef35b2, 0xe2652424), TOBN(0x09c5d26f, 0x6dd7767d),
+     TOBN(0x7175a79a, 0xa74d3f7b), TOBN(0x0428fd8d, 0xcf5ea459),
+     TOBN(0x511cb97c, 0xa5d1746d), TOBN(0x36363939, 0xe71d1278),
+     TOBN(0xcf2df955, 0x10350bf4), TOBN(0xb3817439, 0x60aae782),
+     TOBN(0xa748c0e4, 0x3e688809), TOBN(0x98021fbf, 0xd7a5a006),
+     TOBN(0x9076a70c, 0x0e367a98), TOBN(0xbea1bc15, 0x0f62b7c2),
+     TOBN(0x2645a68c, 0x30fe0343), TOBN(0xacaffa78, 0x699dc14f),
+     TOBN(0xf4469964, 0x457bf9c4), TOBN(0x0db6407b, 0x0d2ead83),
+     TOBN(0x68d56cad, 0xb2c6f3eb), TOBN(0x3b512e73, 0xf376356c),
+     TOBN(0xe43b0e1f, 0xfce10408), TOBN(0x89ddc003, 0x5a5e257d),
+     TOBN(0xb0ae0d12, 0x0362e5b3), TOBN(0x07f983c7, 0xb0519161),
+     TOBN(0xc2e94d15, 0x5d5231e7), TOBN(0xcff22aed, 0x0b4f9513),
+     TOBN(0xb02588dd, 0x6ad0b0b5), TOBN(0xb967d1ac, 0x11d0dcd5),
+     TOBN(0x8dac6bc6, 0xcf777b6c), TOBN(0x0062bdbd, 0x4c6d1959),
+     TOBN(0x53da71b5, 0x0ef5cc85), TOBN(0x07012c7d, 0x4006f14f),
+     TOBN(0x4617f962, 0xac47800d), TOBN(0x53365f2b, 0xc102ed75),
+     TOBN(0xb422efcb, 0x4ab8c9d3), TOBN(0x195cb26b, 0x34af31c9),
+     TOBN(0x3a926e29, 0x05f2c4ce), TOBN(0xbd2bdecb, 0x9856966c),
+     TOBN(0x5d16ab3a, 0x85527015), TOBN(0x9f81609e, 0x4486c231),
+     TOBN(0xd8b96b2c, 0xda350002), TOBN(0xbd054690, 0xfa1b7d36),
+     TOBN(0xdc90ebf5, 0xe71d79bc), TOBN(0xf241b6f9, 0x08964e4e),
+     TOBN(0x7c838643, 0x2fe3cd4c), TOBN(0xe0f33acb, 0xb4bc633c),
+     TOBN(0xb4a9ecec, 0x3d139f1f), TOBN(0x05ce69cd, 0xdc4a1f49),
+     TOBN(0xa19d1b16, 0xf5f98aaf), TOBN(0x45bb71d6, 0x6f23e0ef),
+     TOBN(0x33789fcd, 0x46cdfdd3), TOBN(0x9b8e2978, 0xcee040ca),
+     TOBN(0x9c69b246, 0xae0a6828), TOBN(0xba533d24, 0x7078d5aa),
+     TOBN(0x7a2e42c0, 0x7bb4fbdb), TOBN(0xcfb4879a, 0x7035385c),
+     TOBN(0x8c3dd30b, 0x3281705b), TOBN(0x7e361c6c, 0x404fe081),
+     TOBN(0x7b21649c, 0x3f604edf), TOBN(0x5dbf6a3f, 0xe52ffe47),
+     TOBN(0xc41b7c23, 0x4b54d9bf), TOBN(0x1374e681, 0x3511c3d9),
+     TOBN(0x1863bf16, 0xc1b2b758), TOBN(0x90e78507, 0x1e9e6a96),
+     TOBN(0xab4bf98d, 0x5d86f174), TOBN(0xd74e0bd3, 0x85e96fe4),
+     TOBN(0x8afde39f, 0xcac5d344), TOBN(0x90946dbc, 0xbd91b847),
+     TOBN(0xf5b42358, 0xfe1a838c), TOBN(0x05aae6c5, 0x620ac9d8),
+     TOBN(0x8e193bd8, 0xa1ce5a0b), TOBN(0x8f710571, 0x4dabfd72),
+     TOBN(0x8d8fdd48, 0x182caaac), TOBN(0x8c4aeefa, 0x040745cf),
+     TOBN(0x73c6c30a, 0xf3b93e6d), TOBN(0x991241f3, 0x16f42011),
+     TOBN(0xa0158eea, 0xe457a477), TOBN(0xd19857db, 0xee6ddc05),
+     TOBN(0xb3265224, 0x18c41671), TOBN(0x3ffdfc7e, 0x3c2c0d58),
+     TOBN(0x3a3a5254, 0x26ee7cda), TOBN(0x341b0869, 0xdf02c3a8),
+     TOBN(0xa023bf42, 0x723bbfc8), TOBN(0x3d15002a, 0x14452691),}
+    ,
+    {TOBN(0x5ef7324c, 0x85edfa30), TOBN(0x25976554, 0x87d4f3da),
+     TOBN(0x352f5bc0, 0xdcb50c86), TOBN(0x8f6927b0, 0x4832a96c),
+     TOBN(0xd08ee1ba, 0x55f2f94c), TOBN(0x6a996f99, 0x344b45fa),
+     TOBN(0xe133cb8d, 0xa8aa455d), TOBN(0x5d0721ec, 0x758dc1f7),
+     TOBN(0x6ba7a920, 0x79e5fb67), TOBN(0xe1331feb, 0x70aa725e),
+     TOBN(0x5080ccf5, 0x7df5d837), TOBN(0xe4cae01d, 0x7ff72e21),
+     TOBN(0xd9243ee6, 0x0412a77d), TOBN(0x06ff7cac, 0xdf449025),
+     TOBN(0xbe75f7cd, 0x23ef5a31), TOBN(0xbc957822, 0x0ddef7a8),
+     TOBN(0x8cf7230c, 0xb0ce1c55), TOBN(0x5b534d05, 0x0bbfb607),
+     TOBN(0xee1ef113, 0x0e16363b), TOBN(0x27e0aa7a, 0xb4999e82),
+     TOBN(0xce1dac2d, 0x79362c41), TOBN(0x67920c90, 0x91bb6cb0),
+     TOBN(0x1e648d63, 0x2223df24), TOBN(0x0f7d9eef, 0xe32e8f28),
+     TOBN(0x6943f39a, 0xfa833834), TOBN(0x22951722, 0xa6328562),
+     TOBN(0x81d63dd5, 0x4170fc10), TOBN(0x9f5fa58f, 0xaecc2e6d),
+     TOBN(0xb66c8725, 0xe77d9a3b), TOBN(0x11235cea, 0x6384ebe0),
+     TOBN(0x06a8c118, 0x5845e24a), TOBN(0x0137b286, 0xebd093b1),
+     TOBN(0xc589e1ce, 0x44ace150), TOBN(0xe0f8d3d9, 0x4381e97c),
+     TOBN(0x59e99b11, 0x62c5a4b8), TOBN(0x90d262f7, 0xfd0ec9f9),
+     TOBN(0xfbc854c9, 0x283e13c9), TOBN(0x2d04fde7, 0xaedc7085),
+     TOBN(0x057d7765, 0x47dcbecb), TOBN(0x8dbdf591, 0x9a76fa5f),
+     TOBN(0xd0150695, 0x0de1e578), TOBN(0x2e1463e7, 0xe9f72bc6),
+     TOBN(0xffa68441, 0x1b39eca5), TOBN(0x673c8530, 0x7c037f2f),
+     TOBN(0xd0d6a600, 0x747f91da), TOBN(0xb08d43e1, 0xc9cb78e9),
+     TOBN(0x0fc0c644, 0x27b5cef5), TOBN(0x5c1d160a, 0xa60a2fd6),
+     TOBN(0xf98cae53, 0x28c8e13b), TOBN(0x375f10c4, 0xb2eddcd1),
+     TOBN(0xd4eb8b7f, 0x5cce06ad), TOBN(0xb4669f45, 0x80a2e1ef),
+     TOBN(0xd593f9d0, 0x5bbd8699), TOBN(0x5528a4c9, 0xe7976d13),
+     TOBN(0x3923e095, 0x1c7e28d3), TOBN(0xb9293790, 0x3f6bb577),
+     TOBN(0xdb567d6a, 0xc42bd6d2), TOBN(0x6df86468, 0xbb1f96ae),
+     TOBN(0x0efe5b1a, 0x4843b28e), TOBN(0x961bbb05, 0x6379b240),
+     TOBN(0xb6caf5f0, 0x70a6a26b), TOBN(0x70686c0d, 0x328e6e39),
+     TOBN(0x80da06cf, 0x895fc8d3), TOBN(0x804d8810, 0xb363fdc9),
+     TOBN(0xbe22877b, 0x207f1670), TOBN(0x9b0dd188, 0x4e615291),
+     TOBN(0x625ae8dc, 0x97a3c2bf), TOBN(0x08584ef7, 0x439b86e8),
+     TOBN(0xde7190a5, 0xdcd898ff), TOBN(0x26286c40, 0x2058ee3d),
+     TOBN(0x3db0b217, 0x5f87b1c1), TOBN(0xcc334771, 0x102a6db5),
+     TOBN(0xd99de954, 0x2f770fb1), TOBN(0x97c1c620, 0x4cd7535e),
+     TOBN(0xd3b6c448, 0x3f09cefc), TOBN(0xd725af15, 0x5a63b4f8),
+     TOBN(0x0c95d24f, 0xc01e20ec), TOBN(0xdfd37494, 0x9ae7121f),
+     TOBN(0x7d6ddb72, 0xec77b7ec), TOBN(0xfe079d3b, 0x0353a4ae),
+     TOBN(0x3066e70a, 0x2e6ac8d2), TOBN(0x9c6b5a43, 0x106e5c05),
+     TOBN(0x52d3c6f5, 0xede59b8c), TOBN(0x30d6a5c3, 0xfccec9ae),
+     TOBN(0xedec7c22, 0x4fc0a9ef), TOBN(0x190ff083, 0x95c16ced),
+     TOBN(0xbe12ec8f, 0x94de0fde), TOBN(0x0d131ab8, 0x852d3433),
+     TOBN(0x42ace07e, 0x85701291), TOBN(0x94793ed9, 0x194061a8),
+     TOBN(0x30e83ed6, 0xd7f4a485), TOBN(0x9eec7269, 0xf9eeff4d),
+     TOBN(0x90acba59, 0x0c9d8005), TOBN(0x5feca458, 0x1e79b9d1),
+     TOBN(0x8fbe5427, 0x1d506a1e), TOBN(0xa32b2c8e, 0x2439cfa7),
+     TOBN(0x1671c173, 0x73dd0b4e), TOBN(0x37a28214, 0x44a054c6),
+     TOBN(0x81760a1b, 0x4e8b53f1), TOBN(0xa6c04224, 0xf9f93b9e),
+     TOBN(0x18784b34, 0xcf671e3c), TOBN(0x81bbecd2, 0xcda9b994),
+     TOBN(0x38831979, 0xb2ab3848), TOBN(0xef54feb7, 0xf2e03c2d),
+     TOBN(0xcf197ca7, 0xfb8088fa), TOBN(0x01427247, 0x4ddc96c5),
+     TOBN(0xa2d2550a, 0x30777176), TOBN(0x53469898, 0x4d0cf71d),
+     TOBN(0x6ce937b8, 0x3a2aaac6), TOBN(0xe9f91dc3, 0x5af38d9b),
+     TOBN(0x2598ad83, 0xc8bf2899), TOBN(0x8e706ac9, 0xb5536c16),
+     TOBN(0x40dc7495, 0xf688dc98), TOBN(0x26490cd7, 0x124c4afc),
+     TOBN(0xe651ec84, 0x1f18775c), TOBN(0x393ea6c3, 0xb4fdaf4a),
+     TOBN(0x1e1f3343, 0x7f338e0d), TOBN(0x39fb832b, 0x6053e7b5),
+     TOBN(0x46e702da, 0x619e14d5), TOBN(0x859cacd1, 0xcdeef6e0),
+     TOBN(0x63b99ce7, 0x4462007d), TOBN(0xb8ab48a5, 0x4cb5f5b7),
+     TOBN(0x9ec673d2, 0xf55edde7), TOBN(0xd1567f74, 0x8cfaefda),
+     TOBN(0x46381b6b, 0x0887bcec), TOBN(0x694497ce, 0xe178f3c2),
+     TOBN(0x5e6525e3, 0x1e6266cb), TOBN(0x5931de26, 0x697d6413),
+     TOBN(0x87f8df7c, 0x0e58d493), TOBN(0xb1ae5ed0, 0x58b73f12),
+     TOBN(0xc368f784, 0xdea0c34d), TOBN(0x9bd0a120, 0x859a91a0),
+     TOBN(0xb00d88b7, 0xcc863c68), TOBN(0x3a1cc11e, 0x3d1f4d65),
+     TOBN(0xea38e0e7, 0x0aa85593), TOBN(0x37f13e98, 0x7dc4aee8),
+     TOBN(0x10d38667, 0xbc947bad), TOBN(0x738e07ce, 0x2a36ee2e),
+     TOBN(0xc93470cd, 0xc577fcac), TOBN(0xdee1b616, 0x2782470d),
+     TOBN(0x36a25e67, 0x2e793d12), TOBN(0xd6aa6cae, 0xe0f186da),
+     TOBN(0x474d0fd9, 0x80e07af7), TOBN(0xf7cdc47d, 0xba8a5cd4),
+     TOBN(0x28af6d9d, 0xab15247f), TOBN(0x7c789c10, 0x493a537f),
+     TOBN(0x7ac9b110, 0x23a334e7), TOBN(0x0236ac09, 0x12c9c277),
+     TOBN(0xa7e5bd25, 0x1d7a5144), TOBN(0x098b9c2a, 0xf13ec4ec),
+     TOBN(0x3639daca, 0xd3f0abca), TOBN(0x642da81a, 0xa23960f9),
+     TOBN(0x7d2e5c05, 0x4f7269b1), TOBN(0xfcf30777, 0xe287c385),
+     TOBN(0x10edc84f, 0xf2a46f21), TOBN(0x35441757, 0x4f43fa36),
+     TOBN(0xf1327899, 0xfd703431), TOBN(0xa438d7a6, 0x16dd587a),
+     TOBN(0x65c34c57, 0xe9c8352d), TOBN(0xa728edab, 0x5cc5a24e),
+     TOBN(0xaed78abc, 0x42531689), TOBN(0x0a51a0e8, 0x010963ef),
+     TOBN(0x5776fa0a, 0xd717d9b3), TOBN(0xf356c239, 0x7dd3428b),
+     TOBN(0x29903fff, 0x8d3a3dac), TOBN(0x409597fa, 0x3d94491f),
+     TOBN(0x4cd7a5ff, 0xbf4a56a4), TOBN(0xe5096474, 0x8adab462),
+     TOBN(0xa97b5126, 0x5c3427b0), TOBN(0x6401405c, 0xd282c9bd),
+     TOBN(0x3629f8d7, 0x222c5c45), TOBN(0xb1c02c16, 0xe8d50aed),
+     TOBN(0xbea2ed75, 0xd9635bc9), TOBN(0x226790c7, 0x6e24552f),
+     TOBN(0x3c33f2a3, 0x65f1d066), TOBN(0x2a43463e, 0x6dfccc2e),
+     TOBN(0x8cc3453a, 0xdb483761), TOBN(0xe7cc6085, 0x65d5672b),
+     TOBN(0x277ed6cb, 0xde3efc87), TOBN(0x19f2f368, 0x69234eaf),
+     TOBN(0x9aaf4317, 0x5c0b800b), TOBN(0x1f1e7c89, 0x8b6da6e2),
+     TOBN(0x6cfb4715, 0xb94ec75e), TOBN(0xd590dd5f, 0x453118c2),
+     TOBN(0x14e49da1, 0x1f17a34c), TOBN(0x5420ab39, 0x235a1456),
+     TOBN(0xb7637241, 0x2f50363b), TOBN(0x7b15d623, 0xc3fabb6e),
+     TOBN(0xa0ef40b1, 0xe274e49c), TOBN(0x5cf50744, 0x96b1860a),
+     TOBN(0xd6583fbf, 0x66afe5a4), TOBN(0x44240510, 0xf47e3e9a),
+     TOBN(0x99254343, 0x11b2d595), TOBN(0xf1367499, 0xeec8df57),
+     TOBN(0x3cb12c61, 0x3e73dd05), TOBN(0xd248c033, 0x7dac102a),
+     TOBN(0xcf154f13, 0xa77739f5), TOBN(0xbf4288cb, 0x23d2af42),
+     TOBN(0xaa64c9b6, 0x32e4a1cf), TOBN(0xee8c07a8, 0xc8a208f3),
+     TOBN(0xe10d4999, 0x6fe8393f), TOBN(0x0f809a3f, 0xe91f3a32),
+     TOBN(0x61096d1c, 0x802f63c8), TOBN(0x289e1462, 0x57750d3d),
+     TOBN(0xed06167e, 0x9889feea), TOBN(0xd5c9c0e2, 0xe0993909),
+     TOBN(0x46fca0d8, 0x56508ac6), TOBN(0x91826047, 0x4f1b8e83),
+     TOBN(0x4f2c877a, 0x9a4a2751), TOBN(0x71bd0072, 0xcae6fead),
+     TOBN(0x38df8dcc, 0x06aa1941), TOBN(0x5a074b4c, 0x63beeaa8),
+     TOBN(0xd6d65934, 0xc1cec8ed), TOBN(0xa6ecb49e, 0xaabc03bd),
+     TOBN(0xaade91c2, 0xde8a8415), TOBN(0xcfb0efdf, 0x691136e0),
+     TOBN(0x11af45ee, 0x23ab3495), TOBN(0xa132df88, 0x0b77463d),
+     TOBN(0x8923c15c, 0x815d06f4), TOBN(0xc3ceb3f5, 0x0d61a436),
+     TOBN(0xaf52291d, 0xe88fb1da), TOBN(0xea057974, 0x1da12179),
+     TOBN(0xb0d7218c, 0xd2fef720), TOBN(0x6c0899c9, 0x8e1d8845),
+     TOBN(0x98157504, 0x752ddad7), TOBN(0xd60bd74f, 0xa1a68a97),
+     TOBN(0x7047a3a9, 0xf658fb99), TOBN(0x1f5d86d6, 0x5f8511e4),
+     TOBN(0xb8a4bc42, 0x4b5a6d88), TOBN(0x69eb2c33, 0x1abefa7d),
+     TOBN(0x95bf39e8, 0x13c9c510), TOBN(0xf571960a, 0xd48aab43),
+     TOBN(0x7e8cfbcf, 0x704e23c6), TOBN(0xc71b7d22, 0x28aaa65b),
+     TOBN(0xa041b2bd, 0x245e3c83), TOBN(0x69b98834, 0xd21854ff),
+     TOBN(0x89d227a3, 0x963bfeec), TOBN(0x99947aaa, 0xde7da7cb),
+     TOBN(0x1d9ee9db, 0xee68a9b1), TOBN(0x0a08f003, 0x698ec368),
+     TOBN(0xe9ea4094, 0x78ef2487), TOBN(0xc8d2d415, 0x02cfec26),
+     TOBN(0xc52f9a6e, 0xb7dcf328), TOBN(0x0ed489e3, 0x85b6a937),
+     TOBN(0x9b94986b, 0xbef3366e), TOBN(0x0de59c70, 0xedddddb8),
+     TOBN(0xffdb748c, 0xeadddbe2), TOBN(0x9b9784bb, 0x8266ea40),
+     TOBN(0x142b5502, 0x1a93507a), TOBN(0xb4cd1187, 0x8d3c06cf),
+     TOBN(0xdf70e76a, 0x91ec3f40), TOBN(0x484e81ad, 0x4e7553c2),
+     TOBN(0x830f87b5, 0x272e9d6e), TOBN(0xea1c93e5, 0xc6ff514a),
+     TOBN(0x67cc2adc, 0xc4192a8e), TOBN(0xc77e27e2, 0x42f4535a),
+     TOBN(0x9cdbab36, 0xd2b713c5), TOBN(0x86274ea0, 0xcf7b0cd3),
+     TOBN(0x784680f3, 0x09af826b), TOBN(0xbfcc837a, 0x0c72dea3),
+     TOBN(0xa8bdfe9d, 0xd6529b73), TOBN(0x708aa228, 0x63a88002),
+     TOBN(0x6c7a9a54, 0xc91d45b9), TOBN(0xdf1a38bb, 0xfd004f56),
+     TOBN(0x2e8c9a26, 0xb8bad853), TOBN(0x2d52cea3, 0x3723eae7),
+     TOBN(0x054d6d81, 0x56ca2830), TOBN(0xa3317d14, 0x9a8dc411),
+     TOBN(0xa08662fe, 0xfd4ddeda), TOBN(0xed2a153a, 0xb55d792b),
+     TOBN(0x7035c16a, 0xbfc6e944), TOBN(0xb6bc5834, 0x00171cf3),
+     TOBN(0xe27152b3, 0x83d102b6), TOBN(0xfe695a47, 0x0646b848),
+     TOBN(0xa5bb09d8, 0x916e6d37), TOBN(0xb4269d64, 0x0d17015e),
+     TOBN(0x8d8156a1, 0x0a1d2285), TOBN(0xfeef6c51, 0x46d26d72),
+     TOBN(0x9dac57c8, 0x4c5434a7), TOBN(0x0282e5be, 0x59d39e31),
+     TOBN(0xedfff181, 0x721c486d), TOBN(0x301baf10, 0xbc58824e),
+     TOBN(0x8136a6aa, 0x00570031), TOBN(0x55aaf78c, 0x1cddde68),
+     TOBN(0x26829371, 0x59c63952), TOBN(0x3a3bd274, 0x8bc25baf),
+     TOBN(0xecdf8657, 0xb7e52dc3), TOBN(0x2dd8c087, 0xfd78e6c8),
+     TOBN(0x20553274, 0xf5531461), TOBN(0x8b4a1281, 0x5d95499b),
+     TOBN(0xe2c8763a, 0x1a80f9d2), TOBN(0xd1dbe32b, 0x4ddec758),
+     TOBN(0xaf12210d, 0x30c34169), TOBN(0xba74a953, 0x78baa533),
+     TOBN(0x3d133c6e, 0xa438f254), TOBN(0xa431531a, 0x201bef5b),
+     TOBN(0x15295e22, 0xf669d7ec), TOBN(0xca374f64, 0x357fb515),
+     TOBN(0x8a8406ff, 0xeaa3fdb3), TOBN(0x106ae448, 0xdf3f2da8),
+     TOBN(0x8f9b0a90, 0x33c8e9a1), TOBN(0x234645e2, 0x71ad5885),
+     TOBN(0x3d083224, 0x1c0aed14), TOBN(0xf10a7d3e, 0x7a942d46),
+     TOBN(0x7c11deee, 0x40d5c9be), TOBN(0xb2bae7ff, 0xba84ed98),
+     TOBN(0x93e97139, 0xaad58ddd), TOBN(0x3d872796, 0x3f6d1fa3),
+     TOBN(0x483aca81, 0x8569ff13), TOBN(0x8b89a5fb, 0x9a600f72),
+     TOBN(0x4cbc27c3, 0xc06f2b86), TOBN(0x22130713, 0x63ad9c0b),
+     TOBN(0xb5358b1e, 0x48ac2840), TOBN(0x18311294, 0xecba9477),
+     TOBN(0xda58f990, 0xa6946b43), TOBN(0x3098baf9, 0x9ab41819),
+     TOBN(0x66c4c158, 0x4198da52), TOBN(0xab4fc17c, 0x146bfd1b),
+     TOBN(0x2f0a4c3c, 0xbf36a908), TOBN(0x2ae9e34b, 0x58cf7838),
+     TOBN(0xf411529e, 0x3fa11b1f), TOBN(0x21e43677, 0x974af2b4),
+     TOBN(0x7c20958e, 0xc230793b), TOBN(0x710ea885, 0x16e840f3),
+     TOBN(0xfc0b21fc, 0xc5dc67cf), TOBN(0x08d51647, 0x88405718),
+     TOBN(0xd955c21f, 0xcfe49eb7), TOBN(0x9722a5d5, 0x56dd4a1f),
+     TOBN(0xc9ef50e2, 0xc861baa5), TOBN(0xc0c21a5d, 0x9505ac3e),
+     TOBN(0xaf6b9a33, 0x8b7c063f), TOBN(0xc6370339, 0x2f4779c1),
+     TOBN(0x22df99c7, 0x638167c3), TOBN(0xfe6ffe76, 0x795db30c),
+     TOBN(0x2b822d33, 0xa4854989), TOBN(0xfef031dd, 0x30563aa5),
+     TOBN(0x16b09f82, 0xd57c667f), TOBN(0xc70312ce, 0xcc0b76f1),
+     TOBN(0xbf04a9e6, 0xc9118aec), TOBN(0x82fcb419, 0x3409d133),
+     TOBN(0x1a8ab385, 0xab45d44d), TOBN(0xfba07222, 0x617b83a3),
+     TOBN(0xb05f50dd, 0x58e81b52), TOBN(0x1d8db553, 0x21ce5aff),
+     TOBN(0x3097b8d4, 0xe344a873), TOBN(0x7d8d116d, 0xfe36d53e),
+     TOBN(0x6db22f58, 0x7875e750), TOBN(0x2dc5e373, 0x43e144ea),
+     TOBN(0xc05f32e6, 0xe799eb95), TOBN(0xe9e5f4df, 0x6899e6ec),
+     TOBN(0xbdc3bd68, 0x1fab23d5), TOBN(0xb72b8ab7, 0x73af60e6),
+     TOBN(0x8db27ae0, 0x2cecc84a), TOBN(0x600016d8, 0x7bdb871c),
+     TOBN(0x42a44b13, 0xd7c46f58), TOBN(0xb8919727, 0xc3a77d39),
+     TOBN(0xcfc6bbbd, 0xdafd6088), TOBN(0x1a740146, 0x6bd20d39),
+     TOBN(0x8c747abd, 0x98c41072), TOBN(0x4c91e765, 0xbdf68ea1),
+     TOBN(0x7c95e5ca, 0x08819a78), TOBN(0xcf48b729, 0xc9587921),
+     TOBN(0x091c7c5f, 0xdebbcc7d), TOBN(0x6f287404, 0xf0e05149),
+     TOBN(0xf83b5ac2, 0x26cd44ec), TOBN(0x88ae32a6, 0xcfea250e),
+     TOBN(0x6ac5047a, 0x1d06ebc5), TOBN(0xc7e550b4, 0xd434f781),
+     TOBN(0x61ab1cf2, 0x5c727bd2), TOBN(0x2e4badb1, 0x1cf915b0),
+     TOBN(0x1b4dadec, 0xf69d3920), TOBN(0xe61b1ca6, 0xf14c1dfe),
+     TOBN(0x90b479cc, 0xbd6bd51f), TOBN(0x8024e401, 0x8045ec30),
+     TOBN(0xcab29ca3, 0x25ef0e62), TOBN(0x4f2e9416, 0x49e4ebc0),
+     TOBN(0x45eb40ec, 0x0ccced58), TOBN(0x25cd4b9c, 0x0da44f98),
+     TOBN(0x43e06458, 0x871812c6), TOBN(0x99f80d55, 0x16cef651),
+     TOBN(0x571340c9, 0xce6dc153), TOBN(0x138d5117, 0xd8665521),
+     TOBN(0xacdb45bc, 0x4e07014d), TOBN(0x2f34bb38, 0x84b60b91),
+     TOBN(0xf44a4fd2, 0x2ae8921e), TOBN(0xb039288e, 0x892ba1e2),
+     TOBN(0x9da50174, 0xb1c180b2), TOBN(0x6b70ab66, 0x1693dc87),
+     TOBN(0x7e9babc9, 0xe7057481), TOBN(0x4581ddef, 0x9c80dc41),
+     TOBN(0x0c890da9, 0x51294682), TOBN(0x0b5629d3, 0x3f4736e5),
+     TOBN(0x2340c79e, 0xb06f5b41), TOBN(0xa42e84ce, 0x4e243469),
+     TOBN(0xf9a20135, 0x045a71a9), TOBN(0xefbfb415, 0xd27b6fb6),
+     TOBN(0x25ebea23, 0x9d33cd6f), TOBN(0x9caedb88, 0xaa6c0af8),
+     TOBN(0x53dc7e9a, 0xd9ce6f96), TOBN(0x3897f9fd, 0x51e0b15a),
+     TOBN(0xf51cb1f8, 0x8e5d788e), TOBN(0x1aec7ba8, 0xe1d490ee),
+     TOBN(0x265991e0, 0xcc58cb3c), TOBN(0x9f306e8c, 0x9fc3ad31),
+     TOBN(0x5fed006e, 0x5040a0ac), TOBN(0xca9d5043, 0xfb476f2e),
+     TOBN(0xa19c06e8, 0xbeea7a23), TOBN(0xd2865801, 0x0edabb63),
+     TOBN(0xdb92293f, 0x6967469a), TOBN(0x2894d839, 0x8d8a8ed8),
+     TOBN(0x87c9e406, 0xbbc77122), TOBN(0x8671c6f1, 0x2ea3a26a),
+     TOBN(0xe42df8d6, 0xd7de9853), TOBN(0x2e3ce346, 0xb1f2bcc7),
+     TOBN(0xda601dfc, 0x899d50cf), TOBN(0xbfc913de, 0xfb1b598f),
+     TOBN(0x81c4909f, 0xe61f7908), TOBN(0x192e304f, 0x9bbc7b29),
+     TOBN(0xc3ed8738, 0xc104b338), TOBN(0xedbe9e47, 0x783f5d61),
+     TOBN(0x0c06e9be, 0x2db30660), TOBN(0xda3e613f, 0xc0eb7d8e),
+     TOBN(0xd8fa3e97, 0x322e096e), TOBN(0xfebd91e8, 0xd336e247),
+     TOBN(0x8f13ccc4, 0xdf655a49), TOBN(0xa9e00dfc, 0x5eb20210),
+     TOBN(0x84631d0f, 0xc656b6ea), TOBN(0x93a058cd, 0xd8c0d947),
+     TOBN(0x6846904a, 0x67bd3448), TOBN(0x4a3d4e1a, 0xf394fd5c),
+     TOBN(0xc102c1a5, 0xdb225f52), TOBN(0xe3455bba, 0xfc4f5e9a),
+     TOBN(0x6b36985b, 0x4b9ad1ce), TOBN(0xa9818536, 0x5bb7f793),
+     TOBN(0x6c25e1d0, 0x48b1a416), TOBN(0x1381dd53, 0x3c81bee7),
+     TOBN(0xd2a30d61, 0x7a4a7620), TOBN(0xc8412926, 0x39b8944c),
+     TOBN(0x3c1c6fbe, 0x7a97c33a), TOBN(0x941e541d, 0x938664e7),
+     TOBN(0x417499e8, 0x4a34f239), TOBN(0x15fdb83c, 0xb90402d5),
+     TOBN(0xb75f46bf, 0x433aa832), TOBN(0xb61e15af, 0x63215db1),
+     TOBN(0xaabe59d4, 0xa127f89a), TOBN(0x5d541e0c, 0x07e816da),
+     TOBN(0xaaba0659, 0xa618b692), TOBN(0x55327733, 0x17266026),
+     TOBN(0xaf53a0fc, 0x95f57552), TOBN(0x32947650, 0x6cacb0c9),
+     TOBN(0x253ff58d, 0xc821be01), TOBN(0xb0309531, 0xa06f1146),
+     TOBN(0x59bbbdf5, 0x05c2e54d), TOBN(0x158f27ad, 0x26e8dd22),
+     TOBN(0xcc5b7ffb, 0x397e1e53), TOBN(0xae03f65b, 0x7fc1e50d),
+     TOBN(0xa9784ebd, 0x9c95f0f9), TOBN(0x5ed9deb2, 0x24640771),
+     TOBN(0x31244af7, 0x035561c4), TOBN(0x87332f3a, 0x7ee857de),
+     TOBN(0x09e16e9e, 0x2b9e0d88), TOBN(0x52d910f4, 0x56a06049),
+     TOBN(0x507ed477, 0xa9592f48), TOBN(0x85cb917b, 0x2365d678),
+     TOBN(0xf8511c93, 0x4c8998d1), TOBN(0x2186a3f1, 0x730ea58f),
+     TOBN(0x50189626, 0xb2029db0), TOBN(0x9137a6d9, 0x02ceb75a),
+     TOBN(0x2fe17f37, 0x748bc82c), TOBN(0x87c2e931, 0x80469f8c),
+     TOBN(0x850f71cd, 0xbf891aa2), TOBN(0x0ca1b89b, 0x75ec3d8d),
+     TOBN(0x516c43aa, 0x5e1cd3cd), TOBN(0x89397808, 0x9a887c28),
+     TOBN(0x0059c699, 0xddea1f9f), TOBN(0x7737d6fa, 0x8e6868f7),
+     TOBN(0x6d93746a, 0x60f1524b), TOBN(0x36985e55, 0xba052aa7),
+     TOBN(0x41b1d322, 0xed923ea5), TOBN(0x3429759f, 0x25852a11),
+     TOBN(0xbeca6ec3, 0x092e9f41), TOBN(0x3a238c66, 0x62256bbd),
+     TOBN(0xd82958ea, 0x70ad487d), TOBN(0x4ac8aaf9, 0x65610d93),
+     TOBN(0x3fa101b1, 0x5e4ccab0), TOBN(0x9bf430f2, 0x9de14bfb),
+     TOBN(0xa10f5cc6, 0x6531899d), TOBN(0x590005fb, 0xea8ce17d),
+     TOBN(0xc437912f, 0x24544cb6), TOBN(0x9987b71a, 0xd79ac2e3),
+     TOBN(0x13e3d9dd, 0xc058a212), TOBN(0x00075aac, 0xd2de9606),
+     TOBN(0x80ab508b, 0x6cac8369), TOBN(0x87842be7, 0xf54f6c89),
+     TOBN(0xa7ad663d, 0x6bc532a4), TOBN(0x67813de7, 0x78a91bc8),
+     TOBN(0x5dcb61ce, 0xc3427239), TOBN(0x5f3c7cf0, 0xc56934d9),
+     TOBN(0xc079e0fb, 0xe3191591), TOBN(0xe40896bd, 0xb01aada7),
+     TOBN(0x8d466791, 0x0492d25f), TOBN(0x8aeb30c9, 0xe7408276),
+     TOBN(0xe9437495, 0x9287aacc), TOBN(0x23d4708d, 0x79fe03d4),
+     TOBN(0x8cda9cf2, 0xd0c05199), TOBN(0x502fbc22, 0xfae78454),
+     TOBN(0xc0bda9df, 0xf572a182), TOBN(0x5f9b71b8, 0x6158b372),
+     TOBN(0xe0f33a59, 0x2b82dd07), TOBN(0x76302735, 0x9523032e),
+     TOBN(0x7fe1a721, 0xc4505a32), TOBN(0x7b6e3e82, 0xf796409f),}
+    ,
+    {TOBN(0xe3417bc0, 0x35d0b34a), TOBN(0x440b386b, 0x8327c0a7),
+     TOBN(0x8fb7262d, 0xac0362d1), TOBN(0x2c41114c, 0xe0cdf943),
+     TOBN(0x2ba5cef1, 0xad95a0b1), TOBN(0xc09b37a8, 0x67d54362),
+     TOBN(0x26d6cdd2, 0x01e486c9), TOBN(0x20477abf, 0x42ff9297),
+     TOBN(0xa004dcb3, 0x292a9287), TOBN(0xddc15cf6, 0x77b092c7),
+     TOBN(0x083a8464, 0x806c0605), TOBN(0x4a68df70, 0x3db997b0),
+     TOBN(0x9c134e45, 0x05bf7dd0), TOBN(0xa4e63d39, 0x8ccf7f8c),
+     TOBN(0xa6e6517f, 0x41b5f8af), TOBN(0xaa8b9342, 0xad7bc1cc),
+     TOBN(0x126f35b5, 0x1e706ad9), TOBN(0xb99cebb4, 0xc3a9ebdf),
+     TOBN(0xa75389af, 0xbf608d90), TOBN(0x76113c4f, 0xc6c89858),
+     TOBN(0x80de8eb0, 0x97e2b5aa), TOBN(0x7e1022cc, 0x63b91304),
+     TOBN(0x3bdab605, 0x6ccc066c), TOBN(0x33cbb144, 0xb2edf900),
+     TOBN(0xc4176471, 0x7af715d2), TOBN(0xe2f7f594, 0xd0134a96),
+     TOBN(0x2c1873ef, 0xa41ec956), TOBN(0xe4e7b4f6, 0x77821304),
+     TOBN(0xe5c8ff97, 0x88d5374a), TOBN(0x2b915e63, 0x80823d5b),
+     TOBN(0xea6bc755, 0xb2ee8fe2), TOBN(0x6657624c, 0xe7112651),
+     TOBN(0x157af101, 0xdace5aca), TOBN(0xc4fdbcf2, 0x11a6a267),
+     TOBN(0xdaddf340, 0xc49c8609), TOBN(0x97e49f52, 0xe9604a65),
+     TOBN(0x9be8e790, 0x937e2ad5), TOBN(0x846e2508, 0x326e17f1),
+     TOBN(0x3f38007a, 0x0bbbc0dc), TOBN(0xcf03603f, 0xb11e16d6),
+     TOBN(0xd6f800e0, 0x7442f1d5), TOBN(0x475607d1, 0x66e0e3ab),
+     TOBN(0x82807f16, 0xb7c64047), TOBN(0x8858e1e3, 0xa749883d),
+     TOBN(0x5859120b, 0x8231ee10), TOBN(0x1b80e7eb, 0x638a1ece),
+     TOBN(0xcb72525a, 0xc6aa73a4), TOBN(0xa7cdea3d, 0x844423ac),
+     TOBN(0x5ed0c007, 0xf8ae7c38), TOBN(0x6db07a5c, 0x3d740192),
+     TOBN(0xbe5e9c2a, 0x5fe36db3), TOBN(0xd5b9d57a, 0x76e95046),
+     TOBN(0x54ac32e7, 0x8eba20f2), TOBN(0xef11ca8f, 0x71b9a352),
+     TOBN(0x305e373e, 0xff98a658), TOBN(0xffe5a100, 0x823eb667),
+     TOBN(0x57477b11, 0xe51732d2), TOBN(0xdfd6eb28, 0x2538fc0e),
+     TOBN(0x5c43b0cc, 0x3b39eec5), TOBN(0x6af12778, 0xcb36cc57),
+     TOBN(0x70b0852d, 0x06c425ae), TOBN(0x6df92f8c, 0x5c221b9b),
+     TOBN(0x6c8d4f9e, 0xce826d9c), TOBN(0xf59aba7b, 0xb49359c3),
+     TOBN(0x5c8ed8d5, 0xda64309d), TOBN(0x61a6de56, 0x91b30704),
+     TOBN(0xd6b52f6a, 0x2f9b5808), TOBN(0x0eee4194, 0x98c958a7),
+     TOBN(0xcddd9aab, 0x771e4caa), TOBN(0x83965dfd, 0x78bc21be),
+     TOBN(0x02affce3, 0xb3b504f5), TOBN(0x30847a21, 0x561c8291),
+     TOBN(0xd2eb2cf1, 0x52bfda05), TOBN(0xe0e4c4e9, 0x6197b98c),
+     TOBN(0x1d35076c, 0xf8a1726f), TOBN(0x6c06085b, 0x2db11e3d),
+     TOBN(0x15c0c4d7, 0x4463ba14), TOBN(0x9d292f83, 0x0030238c),
+     TOBN(0x1311ee8b, 0x3727536d), TOBN(0xfeea86ef, 0xbeaedc1e),
+     TOBN(0xb9d18cd3, 0x66131e2e), TOBN(0xf31d974f, 0x80fe2682),
+     TOBN(0xb6e49e0f, 0xe4160289), TOBN(0x7c48ec0b, 0x08e92799),
+     TOBN(0x818111d8, 0xd1989aa7), TOBN(0xb34fa0aa, 0xebf926f9),
+     TOBN(0xdb5fe2f5, 0xa245474a), TOBN(0xf80a6ebb, 0x3c7ca756),
+     TOBN(0xa7f96054, 0xafa05dd8), TOBN(0x26dfcf21, 0xfcaf119e),
+     TOBN(0xe20ef2e3, 0x0564bb59), TOBN(0xef4dca50, 0x61cb02b8),
+     TOBN(0xcda7838a, 0x65d30672), TOBN(0x8b08d534, 0xfd657e86),
+     TOBN(0x4c5b4395, 0x46d595c8), TOBN(0x39b58725, 0x425cb836),
+     TOBN(0x8ea61059, 0x3de9abe3), TOBN(0x40434881, 0x9cdc03be),
+     TOBN(0x9b261245, 0xcfedce8c), TOBN(0x78c318b4, 0xcf5234a1),
+     TOBN(0x510bcf16, 0xfde24c99), TOBN(0x2a77cb75, 0xa2c2ff5d),
+     TOBN(0x9c895c2b, 0x27960fb4), TOBN(0xd30ce975, 0xb0eda42b),
+     TOBN(0xfda85393, 0x1a62cc26), TOBN(0x23c69b96, 0x50c0e052),
+     TOBN(0xa227df15, 0xbfc633f3), TOBN(0x2ac78848, 0x1bae7d48),
+     TOBN(0x487878f9, 0x187d073d), TOBN(0x6c2be919, 0x967f807d),
+     TOBN(0x765861d8, 0x336e6d8f), TOBN(0x88b8974c, 0xce528a43),
+     TOBN(0x09521177, 0xff57d051), TOBN(0x2ff38037, 0xfb6a1961),
+     TOBN(0xfc0aba74, 0xa3d76ad4), TOBN(0x7c764803, 0x25a7ec17),
+     TOBN(0x7532d75f, 0x48879bc8), TOBN(0xea7eacc0, 0x58ce6bc1),
+     TOBN(0xc82176b4, 0x8e896c16), TOBN(0x9a30e0b2, 0x2c750fed),
+     TOBN(0xc37e2c2e, 0x421d3aa4), TOBN(0xf926407c, 0xe84fa840),
+     TOBN(0x18abc03d, 0x1454e41c), TOBN(0x26605ecd, 0x3f7af644),
+     TOBN(0x242341a6, 0xd6a5eabf), TOBN(0x1edb84f4, 0x216b668e),
+     TOBN(0xd836edb8, 0x04010102), TOBN(0x5b337ce7, 0x945e1d8c),
+     TOBN(0xd2075c77, 0xc055dc14), TOBN(0x2a0ffa25, 0x81d89cdf),
+     TOBN(0x8ce815ea, 0x6ffdcbaf), TOBN(0xa3428878, 0xfb648867),
+     TOBN(0x277699cf, 0x884655fb), TOBN(0xfa5b5bd6, 0x364d3e41),
+     TOBN(0x01f680c6, 0x441e1cb7), TOBN(0x3fd61e66, 0xb70a7d67),
+     TOBN(0x666ba2dc, 0xcc78cf66), TOBN(0xb3018174, 0x6fdbff77),
+     TOBN(0x8d4dd0db, 0x168d4668), TOBN(0x259455d0, 0x1dab3a2a),
+     TOBN(0xf58564c5, 0xcde3acec), TOBN(0x77141925, 0x13adb276),
+     TOBN(0x527d725d, 0x8a303f65), TOBN(0x55deb6c9, 0xe6f38f7b),
+     TOBN(0xfd5bb657, 0xb1fa70fb), TOBN(0xfa07f50f, 0xd8073a00),
+     TOBN(0xf72e3aa7, 0xbca02500), TOBN(0xf68f895d, 0x9975740d),
+     TOBN(0x30112060, 0x5cae2a6a), TOBN(0x01bd7218, 0x02874842),
+     TOBN(0x3d423891, 0x7ce47bd3), TOBN(0xa66663c1, 0x789544f6),
+     TOBN(0x864d05d7, 0x3272d838), TOBN(0xe22924f9, 0xfa6295c5),
+     TOBN(0x8189593f, 0x6c2fda32), TOBN(0x330d7189, 0xb184b544),
+     TOBN(0x79efa62c, 0xbde1f714), TOBN(0x35771c94, 0xe5cb1a63),
+     TOBN(0x2f4826b8, 0x641c8332), TOBN(0x00a894fb, 0xc8cee854),
+     TOBN(0xb4b9a39b, 0x36194d40), TOBN(0xe857a7c5, 0x77612601),
+     TOBN(0xf4209dd2, 0x4ecf2f58), TOBN(0x82b9e66d, 0x5a033487),
+     TOBN(0xc1e36934, 0xe4e8b9dd), TOBN(0xd2372c9d, 0xa42377d7),
+     TOBN(0x51dc94c7, 0x0e3ae43b), TOBN(0x4c57761e, 0x04474f6f),
+     TOBN(0xdcdacd0a, 0x1058a318), TOBN(0x369cf3f5, 0x78053a9a),
+     TOBN(0xc6c3de50, 0x31c68de2), TOBN(0x4653a576, 0x3c4b6d9f),
+     TOBN(0x1688dd5a, 0xaa4e5c97), TOBN(0x5be80aa1, 0xb7ab3c74),
+     TOBN(0x70cefe7c, 0xbc65c283), TOBN(0x57f95f13, 0x06867091),
+     TOBN(0xa39114e2, 0x4415503b), TOBN(0xc08ff7c6, 0x4cbb17e9),
+     TOBN(0x1eff674d, 0xd7dec966), TOBN(0x6d4690af, 0x53376f63),
+     TOBN(0xff6fe32e, 0xea74237b), TOBN(0xc436d17e, 0xcd57508e),
+     TOBN(0x15aa28e1, 0xedcc40fe), TOBN(0x0d769c04, 0x581bbb44),
+     TOBN(0xc240b6de, 0x34eaacda), TOBN(0xd9e116e8, 0x2ba0f1de),
+     TOBN(0xcbe45ec7, 0x79438e55), TOBN(0x91787c9d, 0x96f752d7),
+     TOBN(0x897f532b, 0xf129ac2f), TOBN(0xd307b7c8, 0x5a36e22c),
+     TOBN(0x91940675, 0x749fb8f3), TOBN(0xd14f95d0, 0x157fdb28),
+     TOBN(0xfe51d029, 0x6ae55043), TOBN(0x8931e98f, 0x44a87de1),
+     TOBN(0xe57f1cc6, 0x09e4fee2), TOBN(0x0d063b67, 0x4e072d92),
+     TOBN(0x70a998b9, 0xed0e4316), TOBN(0xe74a736b, 0x306aca46),
+     TOBN(0xecf0fbf2, 0x4fda97c7), TOBN(0xa40f65cb, 0x3e178d93),
+     TOBN(0x16253604, 0x16df4285), TOBN(0xb0c9babb, 0xd0c56ae2),
+     TOBN(0x73032b19, 0xcfc5cfc3), TOBN(0xe497e5c3, 0x09752056),
+     TOBN(0x12096bb4, 0x164bda96), TOBN(0x1ee42419, 0xa0b74da1),
+     TOBN(0x8fc36243, 0x403826ba), TOBN(0x0c8f0069, 0xdc09e660),
+     TOBN(0x8667e981, 0xc27253c9), TOBN(0x05a6aefb, 0x92b36a45),
+     TOBN(0xa62c4b36, 0x9cb7bb46), TOBN(0x8394f375, 0x11f7027b),
+     TOBN(0x747bc79c, 0x5f109d0f), TOBN(0xcad88a76, 0x5b8cc60a),
+     TOBN(0x80c5a66b, 0x58f09e68), TOBN(0xe753d451, 0xf6127eac),
+     TOBN(0xc44b74a1, 0x5b0ec6f5), TOBN(0x47989fe4, 0x5289b2b8),
+     TOBN(0x745f8484, 0x58d6fc73), TOBN(0xec362a6f, 0xf61c70ab),
+     TOBN(0x070c98a7, 0xb3a8ad41), TOBN(0x73a20fc0, 0x7b63db51),
+     TOBN(0xed2c2173, 0xf44c35f4), TOBN(0x8a56149d, 0x9acc9dca),
+     TOBN(0x98f17881, 0x9ac6e0f4), TOBN(0x360fdeaf, 0xa413b5ed),
+     TOBN(0x0625b8f4, 0xa300b0fd), TOBN(0xf1f4d76a, 0x5b3222d3),
+     TOBN(0x9d6f5109, 0x587f76b8), TOBN(0x8b4ee08d, 0x2317fdb5),
+     TOBN(0x88089bb7, 0x8c68b095), TOBN(0x95570e9a, 0x5808d9b9),
+     TOBN(0xa395c36f, 0x35d33ae7), TOBN(0x200ea123, 0x50bb5a94),
+     TOBN(0x20c789bd, 0x0bafe84b), TOBN(0x243ef52d, 0x0919276a),
+     TOBN(0x3934c577, 0xe23ae233), TOBN(0xb93807af, 0xa460d1ec),
+     TOBN(0xb72a53b1, 0xf8fa76a4), TOBN(0xd8914cb0, 0xc3ca4491),
+     TOBN(0x2e128494, 0x3fb42622), TOBN(0x3b2700ac, 0x500907d5),
+     TOBN(0xf370fb09, 0x1a95ec63), TOBN(0xf8f30be2, 0x31b6dfbd),
+     TOBN(0xf2b2f8d2, 0x69e55f15), TOBN(0x1fead851, 0xcc1323e9),
+     TOBN(0xfa366010, 0xd9e5eef6), TOBN(0x64d487b0, 0xe316107e),
+     TOBN(0x4c076b86, 0xd23ddc82), TOBN(0x03fd344c, 0x7e0143f0),
+     TOBN(0xa95362ff, 0x317af2c5), TOBN(0x0add3db7, 0xe18b7a4f),
+     TOBN(0x9c673e3f, 0x8260e01b), TOBN(0xfbeb49e5, 0x54a1cc91),
+     TOBN(0x91351bf2, 0x92f2e433), TOBN(0xc755e7ec, 0x851141eb),
+     TOBN(0xc9a95139, 0x29607745), TOBN(0x0ca07420, 0xa26f2b28),
+     TOBN(0xcb2790e7, 0x4bc6f9dd), TOBN(0x345bbb58, 0xadcaffc0),
+     TOBN(0xc65ea38c, 0xbe0f27a2), TOBN(0x67c24d7c, 0x641fcb56),
+     TOBN(0x2c25f0a7, 0xa9e2c757), TOBN(0x93f5cdb0, 0x16f16c49),
+     TOBN(0x2ca5a9d7, 0xc5ee30a1), TOBN(0xd1593635, 0xb909b729),
+     TOBN(0x804ce9f3, 0xdadeff48), TOBN(0xec464751, 0xb07c30c3),
+     TOBN(0x89d65ff3, 0x9e49af6a), TOBN(0xf2d6238a, 0x6f3d01bc),
+     TOBN(0x1095561e, 0x0bced843), TOBN(0x51789e12, 0xc8a13fd8),
+     TOBN(0xd633f929, 0x763231df), TOBN(0x46df9f7d, 0xe7cbddef),
+     TOBN(0x01c889c0, 0xcb265da8), TOBN(0xfce1ad10, 0xaf4336d2),
+     TOBN(0x8d110df6, 0xfc6a0a7e), TOBN(0xdd431b98, 0x6da425dc),
+     TOBN(0xcdc4aeab, 0x1834aabe), TOBN(0x84deb124, 0x8439b7fc),
+     TOBN(0x8796f169, 0x3c2a5998), TOBN(0x9b9247b4, 0x7947190d),
+     TOBN(0x55b9d9a5, 0x11597014), TOBN(0x7e9dd70d, 0x7b1566ee),
+     TOBN(0x94ad78f7, 0xcbcd5e64), TOBN(0x0359ac17, 0x9bd4c032),
+     TOBN(0x3b11baaf, 0x7cc222ae), TOBN(0xa6a6e284, 0xba78e812),
+     TOBN(0x8392053f, 0x24cea1a0), TOBN(0xc97bce4a, 0x33621491),
+     TOBN(0x7eb1db34, 0x35399ee9), TOBN(0x473f78ef, 0xece81ad1),
+     TOBN(0x41d72fe0, 0xf63d3d0d), TOBN(0xe620b880, 0xafab62fc),
+     TOBN(0x92096bc9, 0x93158383), TOBN(0x41a21357, 0x8f896f6c),
+     TOBN(0x1b5ee2fa, 0xc7dcfcab), TOBN(0x650acfde, 0x9546e007),
+     TOBN(0xc081b749, 0xb1b02e07), TOBN(0xda9e41a0, 0xf9eca03d),
+     TOBN(0x013ba727, 0x175a54ab), TOBN(0xca0cd190, 0xea5d8d10),
+     TOBN(0x85ea52c0, 0x95fd96a9), TOBN(0x2c591b9f, 0xbc5c3940),
+     TOBN(0x6fb4d4e4, 0x2bad4d5f), TOBN(0xfa4c3590, 0xfef0059b),
+     TOBN(0x6a10218a, 0xf5122294), TOBN(0x9a78a81a, 0xa85751d1),
+     TOBN(0x04f20579, 0xa98e84e7), TOBN(0xfe1242c0, 0x4997e5b5),
+     TOBN(0xe77a273b, 0xca21e1e4), TOBN(0xfcc8b1ef, 0x9411939d),
+     TOBN(0xe20ea302, 0x92d0487a), TOBN(0x1442dbec, 0x294b91fe),
+     TOBN(0x1f7a4afe, 0xbb6b0e8f), TOBN(0x1700ef74, 0x6889c318),
+     TOBN(0xf5bbffc3, 0x70f1fc62), TOBN(0x3b31d4b6, 0x69c79cca),
+     TOBN(0xe8bc2aab, 0xa7f6340d), TOBN(0xb0b08ab4, 0xa725e10a),
+     TOBN(0x44f05701, 0xae340050), TOBN(0xba4b3016, 0x1cf0c569),
+     TOBN(0x5aa29f83, 0xfbe19a51), TOBN(0x1b9ed428, 0xb71d752e),
+     TOBN(0x1666e54e, 0xeb4819f5), TOBN(0x616cdfed, 0x9e18b75b),
+     TOBN(0x112ed5be, 0x3ee27b0b), TOBN(0xfbf28319, 0x44c7de4d),
+     TOBN(0xd685ec85, 0xe0e60d84), TOBN(0x68037e30, 0x1db7ee78),
+     TOBN(0x5b65bdcd, 0x003c4d6e), TOBN(0x33e7363a, 0x93e29a6a),
+     TOBN(0x995b3a61, 0x08d0756c), TOBN(0xd727f85c, 0x2faf134b),
+     TOBN(0xfac6edf7, 0x1d337823), TOBN(0x99b9aa50, 0x0439b8b4),
+     TOBN(0x722eb104, 0xe2b4e075), TOBN(0x49987295, 0x437c4926),
+     TOBN(0xb1e4c0e4, 0x46a9b82d), TOBN(0xd0cb3197, 0x57a006f5),
+     TOBN(0xf3de0f7d, 0xd7808c56), TOBN(0xb5c54d8f, 0x51f89772),
+     TOBN(0x500a114a, 0xadbd31aa), TOBN(0x9afaaaa6, 0x295f6cab),
+     TOBN(0x94705e21, 0x04cf667a), TOBN(0xfc2a811b, 0x9d3935d7),
+     TOBN(0x560b0280, 0x6d09267c), TOBN(0xf19ed119, 0xf780e53b),
+     TOBN(0xf0227c09, 0x067b6269), TOBN(0x967b8533, 0x5caef599),
+     TOBN(0x155b9243, 0x68efeebc), TOBN(0xcd6d34f5, 0xc497bae6),
+     TOBN(0x1dd8d5d3, 0x6cceb370), TOBN(0x2aeac579, 0xa78d7bf9),
+     TOBN(0x5d65017d, 0x70b67a62), TOBN(0x70c8e44f, 0x17c53f67),
+     TOBN(0xd1fc0950, 0x86a34d09), TOBN(0xe0fca256, 0xe7134907),
+     TOBN(0xe24fa29c, 0x80fdd315), TOBN(0x2c4acd03, 0xd87499ad),
+     TOBN(0xbaaf7517, 0x3b5a9ba6), TOBN(0xb9cbe1f6, 0x12e51a51),
+     TOBN(0xd88edae3, 0x5e154897), TOBN(0xe4309c3c, 0x77b66ca0),
+     TOBN(0xf5555805, 0xf67f3746), TOBN(0x85fc37ba, 0xa36401ff),
+     TOBN(0xdf86e2ca, 0xd9499a53), TOBN(0x6270b2a3, 0xecbc955b),
+     TOBN(0xafae64f5, 0x974ad33b), TOBN(0x04d85977, 0xfe7b2df1),
+     TOBN(0x2a3db3ff, 0x4ab03f73), TOBN(0x0b87878a, 0x8702740a),
+     TOBN(0x6d263f01, 0x5a061732), TOBN(0xc25430ce, 0xa32a1901),
+     TOBN(0xf7ebab3d, 0xdb155018), TOBN(0x3a86f693, 0x63a9b78e),
+     TOBN(0x349ae368, 0xda9f3804), TOBN(0x470f07fe, 0xa164349c),
+     TOBN(0xd52f4cc9, 0x8562baa5), TOBN(0xc74a9e86, 0x2b290df3),
+     TOBN(0xd3a1aa35, 0x43471a24), TOBN(0x239446be, 0xb8194511),
+     TOBN(0xbec2dd00, 0x81dcd44d), TOBN(0xca3d7f0f, 0xc42ac82d),
+     TOBN(0x1f3db085, 0xfdaf4520), TOBN(0xbb6d3e80, 0x4549daf2),
+     TOBN(0xf5969d8a, 0x19ad5c42), TOBN(0x7052b13d, 0xdbfd1511),
+     TOBN(0x11890d1b, 0x682b9060), TOBN(0xa71d3883, 0xac34452c),
+     TOBN(0xa438055b, 0x783805b4), TOBN(0x43241277, 0x4725b23e),
+     TOBN(0xf20cf96e, 0x4901bbed), TOBN(0x6419c710, 0xf432a2bb),
+     TOBN(0x57a0fbb9, 0xdfa9cd7d), TOBN(0x589111e4, 0x00daa249),
+     TOBN(0x19809a33, 0x7b60554e), TOBN(0xea5f8887, 0xede283a4),
+     TOBN(0x2d713802, 0x503bfd35), TOBN(0x151bb0af, 0x585d2a53),
+     TOBN(0x40b08f74, 0x43b30ca8), TOBN(0xe10b5bba, 0xd9934583),
+     TOBN(0xe8a546d6, 0xb51110ad), TOBN(0x1dd50e66, 0x28e0b6c5),
+     TOBN(0x292e9d54, 0xcff2b821), TOBN(0x3882555d, 0x47281760),
+     TOBN(0x134838f8, 0x3724d6e3), TOBN(0xf2c679e0, 0x22ddcda1),
+     TOBN(0x40ee8815, 0x6d2a5768), TOBN(0x7f227bd2, 0x1c1e7e2d),
+     TOBN(0x487ba134, 0xd04ff443), TOBN(0x76e2ff3d, 0xc614e54b),
+     TOBN(0x36b88d6f, 0xa3177ec7), TOBN(0xbf731d51, 0x2328fff5),
+     TOBN(0x758caea2, 0x49ba158e), TOBN(0x5ab8ff4c, 0x02938188),
+     TOBN(0x33e16056, 0x35edc56d), TOBN(0x5a69d349, 0x7e940d79),
+     TOBN(0x6c4fd001, 0x03866dcb), TOBN(0x20a38f57, 0x4893cdef),
+     TOBN(0xfbf3e790, 0xfac3a15b), TOBN(0x6ed7ea2e, 0x7a4f8e6b),
+     TOBN(0xa663eb4f, 0xbc3aca86), TOBN(0x22061ea5, 0x080d53f7),
+     TOBN(0x2480dfe6, 0xf546783f), TOBN(0xd38bc6da, 0x5a0a641e),
+     TOBN(0xfb093cd1, 0x2ede8965), TOBN(0x89654db4, 0xacb455cf),
+     TOBN(0x413cbf9a, 0x26e1adee), TOBN(0x291f3764, 0x373294d4),
+     TOBN(0x00797257, 0x648083fe), TOBN(0x25f504d3, 0x208cc341),
+     TOBN(0x635a8e5e, 0xc3a0ee43), TOBN(0x70aaebca, 0x679898ff),
+     TOBN(0x9ee9f547, 0x5dc63d56), TOBN(0xce987966, 0xffb34d00),
+     TOBN(0xf9f86b19, 0x5e26310a), TOBN(0x9e435484, 0x382a8ca8),
+     TOBN(0x253bcb81, 0xc2352fe4), TOBN(0xa4eac8b0, 0x4474b571),
+     TOBN(0xc1b97512, 0xc1ad8cf8), TOBN(0x193b4e9e, 0x99e0b697),
+     TOBN(0x939d2716, 0x01e85df0), TOBN(0x4fb265b3, 0xcd44eafd),
+     TOBN(0x321e7dcd, 0xe51e1ae2), TOBN(0x8e3a8ca6, 0xe3d8b096),
+     TOBN(0x8de46cb0, 0x52604998), TOBN(0x91099ad8, 0x39072aa7),
+     TOBN(0x2617f91c, 0x93aa96b8), TOBN(0x0fc8716b, 0x7fca2e13),
+     TOBN(0xa7106f5e, 0x95328723), TOBN(0xd1c9c40b, 0x262e6522),
+     TOBN(0xb9bafe86, 0x42b7c094), TOBN(0x1873439d, 0x1543c021),
+     TOBN(0xe1baa5de, 0x5cbefd5d), TOBN(0xa363fc5e, 0x521e8aff),
+     TOBN(0xefe6320d, 0xf862eaac), TOBN(0x14419c63, 0x22c647dc),
+     TOBN(0x0e06707c, 0x4e46d428), TOBN(0xcb6c834f, 0x4a178f8f),
+     TOBN(0x0f993a45, 0xd30f917c), TOBN(0xd4c4b049, 0x9879afee),
+     TOBN(0xb6142a1e, 0x70500063), TOBN(0x7c9b41c3, 0xa5d9d605),
+     TOBN(0xbc00fc2f, 0x2f8ba2c7), TOBN(0x0966eb2f, 0x7c67aa28),
+     TOBN(0x13f7b516, 0x5a786972), TOBN(0x3bfb7557, 0x8a2fbba0),
+     TOBN(0x131c4f23, 0x5a2b9620), TOBN(0xbff3ed27, 0x6faf46be),
+     TOBN(0x9b4473d1, 0x7e172323), TOBN(0x421e8878, 0x339f6246),
+     TOBN(0x0fa8587a, 0x25a41632), TOBN(0xc0814124, 0xa35b6c93),
+     TOBN(0x2b18a9f5, 0x59ebb8db), TOBN(0x264e3357, 0x76edb29c),
+     TOBN(0xaf245ccd, 0xc87c51e2), TOBN(0x16b3015b, 0x501e6214),
+     TOBN(0xbb31c560, 0x0a3882ce), TOBN(0x6961bb94, 0xfec11e04),
+     TOBN(0x3b825b8d, 0xeff7a3a0), TOBN(0xbec33738, 0xb1df7326),
+     TOBN(0x68ad747c, 0x99604a1f), TOBN(0xd154c934, 0x9a3bd499),
+     TOBN(0xac33506f, 0x1cc7a906), TOBN(0x73bb5392, 0x6c560e8f),
+     TOBN(0x6428fcbe, 0x263e3944), TOBN(0xc11828d5, 0x1c387434),
+     TOBN(0x3cd04be1, 0x3e4b12ff), TOBN(0xc3aad9f9, 0x2d88667c),
+     TOBN(0xc52ddcf8, 0x248120cf), TOBN(0x985a892e, 0x2a389532),
+     TOBN(0xfbb4b21b, 0x3bb85fa0), TOBN(0xf95375e0, 0x8dfc6269),
+     TOBN(0xfb4fb06c, 0x7ee2acea), TOBN(0x6785426e, 0x309c4d1f),
+     TOBN(0x659b17c8, 0xd8ceb147), TOBN(0x9b649eee, 0xb70a5554),
+     TOBN(0x6b7fa0b5, 0xac6bc634), TOBN(0xd99fe2c7, 0x1d6e732f),
+     TOBN(0x30e6e762, 0x8d3abba2), TOBN(0x18fee6e7, 0xa797b799),
+     TOBN(0x5c9d360d, 0xc696464d), TOBN(0xe3baeb48, 0x27bfde12),
+     TOBN(0x2bf5db47, 0xf23206d5), TOBN(0x2f6d3420, 0x1d260152),
+     TOBN(0x17b87653, 0x3f8ff89a), TOBN(0x5157c30c, 0x378fa458),
+     TOBN(0x7517c5c5, 0x2d4fb936), TOBN(0xef22f7ac, 0xe6518cdc),
+     TOBN(0xdeb483e6, 0xbf847a64), TOBN(0xf5084558, 0x92e0fa89),}
+    ,
+    {TOBN(0xab9659d8, 0xdf7304d4), TOBN(0xb71bcf1b, 0xff210e8e),
+     TOBN(0xa9a2438b, 0xd73fbd60), TOBN(0x4595cd1f, 0x5d11b4de),
+     TOBN(0x9c0d329a, 0x4835859d), TOBN(0x4a0f0d2d, 0x7dbb6e56),
+     TOBN(0xc6038e5e, 0xdf928a4e), TOBN(0xc9429621, 0x8f5ad154),
+     TOBN(0x91213462, 0xf23f2d92), TOBN(0x6cab71bd, 0x60b94078),
+     TOBN(0x6bdd0a63, 0x176cde20), TOBN(0x54c9b20c, 0xee4d54bc),
+     TOBN(0x3cd2d8aa, 0x9f2ac02f), TOBN(0x03f8e617, 0x206eedb0),
+     TOBN(0xc7f68e16, 0x93086434), TOBN(0x831469c5, 0x92dd3db9),
+     TOBN(0x8521df24, 0x8f981354), TOBN(0x587e23ec, 0x3588a259),
+     TOBN(0xcbedf281, 0xd7a0992c), TOBN(0x06930a55, 0x38961407),
+     TOBN(0x09320deb, 0xbe5bbe21), TOBN(0xa7ffa5b5, 0x2491817f),
+     TOBN(0xe6c8b4d9, 0x09065160), TOBN(0xac4f3992, 0xfff6d2a9),
+     TOBN(0x7aa7a158, 0x3ae9c1bd), TOBN(0xe0af6d98, 0xe37ce240),
+     TOBN(0xe54342d9, 0x28ab38b4), TOBN(0xe8b75007, 0x0a1c98ca),
+     TOBN(0xefce86af, 0xe02358f2), TOBN(0x31b8b856, 0xea921228),
+     TOBN(0x052a1912, 0x0a1c67fc), TOBN(0xb4069ea4, 0xe3aead59),
+     TOBN(0x3232d6e2, 0x7fa03cb3), TOBN(0xdb938e5b, 0x0fdd7d88),
+     TOBN(0x04c1d2cd, 0x2ccbfc5d), TOBN(0xd2f45c12, 0xaf3a580f),
+     TOBN(0x592620b5, 0x7883e614), TOBN(0x5fd27e68, 0xbe7c5f26),
+     TOBN(0x139e45a9, 0x1567e1e3), TOBN(0x2cc71d2d, 0x44d8aaaf),
+     TOBN(0x4a9090cd, 0xe36d0757), TOBN(0xf722d7b1, 0xd9a29382),
+     TOBN(0xfb7fb04c, 0x04b48ddf), TOBN(0x628ad2a7, 0xebe16f43),
+     TOBN(0xcd3fbfb5, 0x20226040), TOBN(0x6c34ecb1, 0x5104b6c4),
+     TOBN(0x30c0754e, 0xc903c188), TOBN(0xec336b08, 0x2d23cab0),
+     TOBN(0x473d62a2, 0x1e206ee5), TOBN(0xf1e27480, 0x8c49a633),
+     TOBN(0x87ab956c, 0xe9f6b2c3), TOBN(0x61830b48, 0x62b606ea),
+     TOBN(0x67cd6846, 0xe78e815f), TOBN(0xfe40139f, 0x4c02082a),
+     TOBN(0x52bbbfcb, 0x952ec365), TOBN(0x74c11642, 0x6b9836ab),
+     TOBN(0x9f51439e, 0x558df019), TOBN(0x230da4ba, 0xac712b27),
+     TOBN(0x518919e3, 0x55185a24), TOBN(0x4dcefcdd, 0x84b78f50),
+     TOBN(0xa7d90fb2, 0xa47d4c5a), TOBN(0x55ac9abf, 0xb30e009e),
+     TOBN(0xfd2fc359, 0x74eed273), TOBN(0xb72d824c, 0xdbea8faf),
+     TOBN(0xce721a74, 0x4513e2ca), TOBN(0x0b418612, 0x38240b2c),
+     TOBN(0x05199968, 0xd5baa450), TOBN(0xeb1757ed, 0x2b0e8c25),
+     TOBN(0x6ebc3e28, 0x3dfac6d5), TOBN(0xb2431e2e, 0x48a237f5),
+     TOBN(0x2acb5e23, 0x52f61499), TOBN(0x5558a2a7, 0xe06c936b),
+     TOBN(0xd213f923, 0xcbb13d1b), TOBN(0x98799f42, 0x5bfb9bfe),
+     TOBN(0x1ae8ddc9, 0x701144a9), TOBN(0x0b8b3bb6, 0x4c5595ee),
+     TOBN(0x0ea9ef2e, 0x3ecebb21), TOBN(0x17cb6c4b, 0x3671f9a7),
+     TOBN(0x47ef464f, 0x726f1d1f), TOBN(0x171b9484, 0x6943a276),
+     TOBN(0x51a4ae2d, 0x7ef0329c), TOBN(0x08509222, 0x91c4402a),
+     TOBN(0x64a61d35, 0xafd45bbc), TOBN(0x38f096fe, 0x3035a851),
+     TOBN(0xc7468b74, 0xa1dec027), TOBN(0xe8cf10e7, 0x4fc7dcba),
+     TOBN(0xea35ff40, 0xf4a06353), TOBN(0x0b4c0dfa, 0x8b77dd66),
+     TOBN(0x779b8552, 0xde7e5c19), TOBN(0xfab28609, 0xc1c0256c),
+     TOBN(0x64f58eee, 0xabd4743d), TOBN(0x4e8ef838, 0x7b6cc93b),
+     TOBN(0xee650d26, 0x4cb1bf3d), TOBN(0x4c1f9d09, 0x73dedf61),
+     TOBN(0xaef7c9d7, 0xbfb70ced), TOBN(0x1ec0507e, 0x1641de1e),
+     TOBN(0xcd7e5cc7, 0xcde45079), TOBN(0xde173c9a, 0x516ac9e4),
+     TOBN(0x517a8494, 0xc170315c), TOBN(0x438fd905, 0x91d8e8fb),
+     TOBN(0x5145c506, 0xc7d9630b), TOBN(0x6457a87b, 0xf47d4d75),
+     TOBN(0xd31646bf, 0x0d9a80e8), TOBN(0x453add2b, 0xcef3aabe),
+     TOBN(0xc9941109, 0xa607419d), TOBN(0xfaa71e62, 0xbb6bca80),
+     TOBN(0x34158c13, 0x07c431f3), TOBN(0x594abebc, 0x992bc47a),
+     TOBN(0x6dfea691, 0xeb78399f), TOBN(0x48aafb35, 0x3f42cba4),
+     TOBN(0xedcd65af, 0x077c04f0), TOBN(0x1a29a366, 0xe884491a),
+     TOBN(0x023a40e5, 0x1c21f2bf), TOBN(0xf99a513c, 0xa5057aee),
+     TOBN(0xa3fe7e25, 0xbcab072e), TOBN(0x8568d2e1, 0x40e32bcf),
+     TOBN(0x904594eb, 0xd3f69d9f), TOBN(0x181a9733, 0x07affab1),
+     TOBN(0xe4d68d76, 0xb6e330f4), TOBN(0x87a6dafb, 0xc75a7fc1),
+     TOBN(0x549db2b5, 0xef7d9289), TOBN(0x2480d4a8, 0x197f015a),
+     TOBN(0x61d5590b, 0xc40493b6), TOBN(0x3a55b52e, 0x6f780331),
+     TOBN(0x40eb8115, 0x309eadb0), TOBN(0xdea7de5a, 0x92e5c625),
+     TOBN(0x64d631f0, 0xcc6a3d5a), TOBN(0x9d5e9d7c, 0x93e8dd61),
+     TOBN(0xf297bef5, 0x206d3ffc), TOBN(0x23d5e033, 0x7d808bd4),
+     TOBN(0x4a4f6912, 0xd24cf5ba), TOBN(0xe4d8163b, 0x09cdaa8a),
+     TOBN(0x0e0de9ef, 0xd3082e8e), TOBN(0x4fe1246c, 0x0192f360),
+     TOBN(0x1f900150, 0x4b8eee0a), TOBN(0x5219da81, 0xf1da391b),
+     TOBN(0x7bf6a5c1, 0xf7ea25aa), TOBN(0xd165e6bf, 0xfbb07d5f),
+     TOBN(0xe3539361, 0x89e78671), TOBN(0xa3fcac89, 0x2bac4219),
+     TOBN(0xdfab6fd4, 0xf0baa8ab), TOBN(0x5a4adac1, 0xe2c1c2e5),
+     TOBN(0x6cd75e31, 0x40d85849), TOBN(0xce263fea, 0x19b39181),
+     TOBN(0xcb6803d3, 0x07032c72), TOBN(0x7f40d5ce, 0x790968c8),
+     TOBN(0xa6de86bd, 0xdce978f0), TOBN(0x25547c4f, 0x368f751c),
+     TOBN(0xb1e685fd, 0x65fb2a9e), TOBN(0xce69336f, 0x1eb9179c),
+     TOBN(0xb15d1c27, 0x12504442), TOBN(0xb7df465c, 0xb911a06b),
+     TOBN(0xb8d804a3, 0x315980cd), TOBN(0x693bc492, 0xfa3bebf7),
+     TOBN(0x3578aeee, 0x2253c504), TOBN(0x158de498, 0xcd2474a2),
+     TOBN(0x1331f5c7, 0xcfda8368), TOBN(0xd2d7bbb3, 0x78d7177e),
+     TOBN(0xdf61133a, 0xf3c1e46e), TOBN(0x5836ce7d, 0xd30e7be8),
+     TOBN(0x83084f19, 0x94f834cb), TOBN(0xd35653d4, 0x429ed782),
+     TOBN(0xa542f16f, 0x59e58243), TOBN(0xc2b52f65, 0x0470a22d),
+     TOBN(0xe3b6221b, 0x18f23d96), TOBN(0xcb05abac, 0x3f5252b4),
+     TOBN(0xca00938b, 0x87d61402), TOBN(0x2f186cdd, 0x411933e4),
+     TOBN(0xe042ece5, 0x9a29a5c5), TOBN(0xb19b3c07, 0x3b6c8402),
+     TOBN(0xc97667c7, 0x19d92684), TOBN(0xb5624622, 0xebc66372),
+     TOBN(0x0cb96e65, 0x3c04fa02), TOBN(0x83a7176c, 0x8eaa39aa),
+     TOBN(0x2033561d, 0xeaa1633f), TOBN(0x45a9d086, 0x4533df73),
+     TOBN(0xe0542c1d, 0x3dc090bc), TOBN(0x82c996ef, 0xaa59c167),
+     TOBN(0xe3f735e8, 0x0ee7fc4d), TOBN(0x7b179393, 0x7c35db79),
+     TOBN(0xb6419e25, 0xf8c5dbfd), TOBN(0x4d9d7a1e, 0x1f327b04),
+     TOBN(0x979f6f9b, 0x298dfca8), TOBN(0xc7c5dff1, 0x8de9366a),
+     TOBN(0x1b7a588d, 0x04c82bdd), TOBN(0x68005534, 0xf8319dfd),
+     TOBN(0xde8a55b5, 0xd8eb9580), TOBN(0x5ea886da, 0x8d5bca81),
+     TOBN(0xe8530a01, 0x252a0b4d), TOBN(0x1bffb4fe, 0x35eaa0a1),
+     TOBN(0x2ad828b1, 0xd8e99563), TOBN(0x7de96ef5, 0x95f9cd87),
+     TOBN(0x4abb2d0c, 0xd77d970c), TOBN(0x03cfb933, 0xd33ef9cb),
+     TOBN(0xb0547c01, 0x8b211fe9), TOBN(0x2fe64809, 0xa56ed1c6),
+     TOBN(0xcb7d5624, 0xc2ac98cc), TOBN(0x2a1372c0, 0x1a393e33),
+     TOBN(0xc8d1ec1c, 0x29660521), TOBN(0xf3d31b04, 0xb37ac3e9),
+     TOBN(0xa29ae9df, 0x5ece6e7c), TOBN(0x0603ac8f, 0x0facfb55),
+     TOBN(0xcfe85b7a, 0xdda233a5), TOBN(0xe618919f, 0xbd75f0b8),
+     TOBN(0xf555a3d2, 0x99bf1603), TOBN(0x1f43afc9, 0xf184255a),
+     TOBN(0xdcdaf341, 0x319a3e02), TOBN(0xd3b117ef, 0x03903a39),
+     TOBN(0xe095da13, 0x65d1d131), TOBN(0x86f16367, 0xc37ad03e),
+     TOBN(0x5f37389e, 0x462cd8dd), TOBN(0xc103fa04, 0xd67a60e6),
+     TOBN(0x57c34344, 0xf4b478f0), TOBN(0xce91edd8, 0xe117c98d),
+     TOBN(0x001777b0, 0x231fc12e), TOBN(0x11ae47f2, 0xb207bccb),
+     TOBN(0xd983cf8d, 0x20f8a242), TOBN(0x7aff5b1d, 0xf22e1ad8),
+     TOBN(0x68fd11d0, 0x7fc4feb3), TOBN(0x5d53ae90, 0xb0f1c3e1),
+     TOBN(0x50fb7905, 0xec041803), TOBN(0x85e3c977, 0x14404888),
+     TOBN(0x0e67faed, 0xac628d8f), TOBN(0x2e865150, 0x6668532c),
+     TOBN(0x15acaaa4, 0x6a67a6b0), TOBN(0xf4cdee25, 0xb25cec41),
+     TOBN(0x49ee565a, 0xe4c6701e), TOBN(0x2a04ca66, 0xfc7d63d8),
+     TOBN(0xeb105018, 0xef0543fb), TOBN(0xf709a4f5, 0xd1b0d81d),
+     TOBN(0x5b906ee6, 0x2915d333), TOBN(0xf4a87412, 0x96f1f0ab),
+     TOBN(0xb6b82fa7, 0x4d82f4c2), TOBN(0x90725a60, 0x6804efb3),
+     TOBN(0xbc82ec46, 0xadc3425e), TOBN(0xb7b80581, 0x2787843e),
+     TOBN(0xdf46d91c, 0xdd1fc74c), TOBN(0xdc1c62cb, 0xe783a6c4),
+     TOBN(0x59d1b9f3, 0x1a04cbba), TOBN(0xd87f6f72, 0x95e40764),
+     TOBN(0x02b4cfc1, 0x317f4a76), TOBN(0x8d2703eb, 0x91036bce),
+     TOBN(0x98206cc6, 0xa5e72a56), TOBN(0x57be9ed1, 0xcf53fb0f),
+     TOBN(0x09374571, 0xef0b17ac), TOBN(0x74b2655e, 0xd9181b38),
+     TOBN(0xc8f80ea8, 0x89935d0e), TOBN(0xc0d9e942, 0x91529936),
+     TOBN(0x19686041, 0x1e84e0e5), TOBN(0xa5db84d3, 0xaea34c93),
+     TOBN(0xf9d5bb19, 0x7073a732), TOBN(0xb8d2fe56, 0x6bcfd7c0),
+     TOBN(0x45775f36, 0xf3eb82fa), TOBN(0x8cb20ccc, 0xfdff8b58),
+     TOBN(0x1659b65f, 0x8374c110), TOBN(0xb8b4a422, 0x330c789a),
+     TOBN(0x75e3c3ea, 0x6fe8208b), TOBN(0xbd74b9e4, 0x286e78fe),
+     TOBN(0x0be2e81b, 0xd7d93a1a), TOBN(0x7ed06e27, 0xdd0a5aae),
+     TOBN(0x721f5a58, 0x6be8b800), TOBN(0x428299d1, 0xd846db28),
+     TOBN(0x95cb8e6b, 0x5be88ed3), TOBN(0xc3186b23, 0x1c034e11),
+     TOBN(0xa6312c9e, 0x8977d99b), TOBN(0xbe944331, 0x83f531e7),
+     TOBN(0x8232c0c2, 0x18d3b1d4), TOBN(0x617aae8b, 0xe1247b73),
+     TOBN(0x40153fc4, 0x282aec3b), TOBN(0xc6063d2f, 0xf7b8f823),
+     TOBN(0x68f10e58, 0x3304f94c), TOBN(0x31efae74, 0xee676346),
+     TOBN(0xbadb6c6d, 0x40a9b97c), TOBN(0x14702c63, 0x4f666256),
+     TOBN(0xdeb954f1, 0x5184b2e3), TOBN(0x5184a526, 0x94b6ca40),
+     TOBN(0xfff05337, 0x003c32ea), TOBN(0x5aa374dd, 0x205974c7),
+     TOBN(0x9a763854, 0x4b0dd71a), TOBN(0x459cd27f, 0xdeb947ec),
+     TOBN(0xa6e28161, 0x459c2b92), TOBN(0x2f020fa8, 0x75ee8ef5),
+     TOBN(0xb132ec2d, 0x30b06310), TOBN(0xc3e15899, 0xbc6a4530),
+     TOBN(0xdc5f53fe, 0xaa3f451a), TOBN(0x3a3c7f23, 0xc2d9acac),
+     TOBN(0x2ec2f892, 0x6b27e58b), TOBN(0x68466ee7, 0xd742799f),
+     TOBN(0x98324dd4, 0x1fa26613), TOBN(0xa2dc6dab, 0xbdc29d63),
+     TOBN(0xf9675faa, 0xd712d657), TOBN(0x813994be, 0x21fd8d15),
+     TOBN(0x5ccbb722, 0xfd4f7553), TOBN(0x5135ff8b, 0xf3a36b20),
+     TOBN(0x44be28af, 0x69559df5), TOBN(0x40b65bed, 0x9d41bf30),
+     TOBN(0xd98bf2a4, 0x3734e520), TOBN(0x5e3abbe3, 0x209bdcba),
+     TOBN(0x77c76553, 0xbc945b35), TOBN(0x5331c093, 0xc6ef14aa),
+     TOBN(0x518ffe29, 0x76b60c80), TOBN(0x2285593b, 0x7ace16f8),
+     TOBN(0xab1f64cc, 0xbe2b9784), TOBN(0xe8f2c0d9, 0xab2421b6),
+     TOBN(0x617d7174, 0xc1df065c), TOBN(0xafeeb5ab, 0x5f6578fa),
+     TOBN(0x16ff1329, 0x263b54a8), TOBN(0x45c55808, 0xc990dce3),
+     TOBN(0x42eab6c0, 0xecc8c177), TOBN(0x799ea9b5, 0x5982ecaa),
+     TOBN(0xf65da244, 0xb607ef8e), TOBN(0x8ab226ce, 0x32a3fc2c),
+     TOBN(0x745741e5, 0x7ea973dc), TOBN(0x5c00ca70, 0x20888f2e),
+     TOBN(0x7cdce3cf, 0x45fd9cf1), TOBN(0x8a741ef1, 0x5507f872),
+     TOBN(0x47c51c2f, 0x196b4cec), TOBN(0x70d08e43, 0xc97ea618),
+     TOBN(0x930da15c, 0x15b18a2b), TOBN(0x33b6c678, 0x2f610514),
+     TOBN(0xc662e4f8, 0x07ac9794), TOBN(0x1eccf050, 0xba06cb79),
+     TOBN(0x1ff08623, 0xe7d954e5), TOBN(0x6ef2c5fb, 0x24cf71c3),
+     TOBN(0xb2c063d2, 0x67978453), TOBN(0xa0cf3796, 0x1d654af8),
+     TOBN(0x7cb242ea, 0x7ebdaa37), TOBN(0x206e0b10, 0xb86747e0),
+     TOBN(0x481dae5f, 0xd5ecfefc), TOBN(0x07084fd8, 0xc2bff8fc),
+     TOBN(0x8040a01a, 0xea324596), TOBN(0x4c646980, 0xd4de4036),
+     TOBN(0x9eb8ab4e, 0xd65abfc3), TOBN(0xe01cb91f, 0x13541ec7),
+     TOBN(0x8f029adb, 0xfd695012), TOBN(0x9ae28483, 0x3c7569ec),
+     TOBN(0xa5614c9e, 0xa66d80a1), TOBN(0x680a3e44, 0x75f5f911),
+     TOBN(0x0c07b14d, 0xceba4fc1), TOBN(0x891c285b, 0xa13071c1),
+     TOBN(0xcac67ceb, 0x799ece3c), TOBN(0x29b910a9, 0x41e07e27),
+     TOBN(0x66bdb409, 0xf2e43123), TOBN(0x06f8b137, 0x7ac9ecbe),
+     TOBN(0x5981fafd, 0x38547090), TOBN(0x19ab8b9f, 0x85e3415d),
+     TOBN(0xfc28c194, 0xc7e31b27), TOBN(0x843be0aa, 0x6fbcbb42),
+     TOBN(0xf3b1ed43, 0xa6db836c), TOBN(0x2a1330e4, 0x01a45c05),
+     TOBN(0x4f19f3c5, 0x95c1a377), TOBN(0xa85f39d0, 0x44b5ee33),
+     TOBN(0x3da18e6d, 0x4ae52834), TOBN(0x5a403b39, 0x7423dcb0),
+     TOBN(0xbb555e0a, 0xf2374aef), TOBN(0x2ad599c4, 0x1e8ca111),
+     TOBN(0x1b3a2fb9, 0x014b3bf8), TOBN(0x73092684, 0xf66d5007),
+     TOBN(0x079f1426, 0xc4340102), TOBN(0x1827cf81, 0x8fddf4de),
+     TOBN(0xc83605f6, 0xf10ff927), TOBN(0xd3871451, 0x23739fc6),
+     TOBN(0x6d163450, 0xcac1c2cc), TOBN(0x6b521296, 0xa2ec1ac5),
+     TOBN(0x0606c4f9, 0x6e3cb4a5), TOBN(0xe47d3f41, 0x778abff7),
+     TOBN(0x425a8d5e, 0xbe8e3a45), TOBN(0x53ea9e97, 0xa6102160),
+     TOBN(0x477a106e, 0x39cbb688), TOBN(0x532401d2, 0xf3386d32),
+     TOBN(0x8e564f64, 0xb1b9b421), TOBN(0xca9b8388, 0x81dad33f),
+     TOBN(0xb1422b4e, 0x2093913e), TOBN(0x533d2f92, 0x69bc8112),
+     TOBN(0x3fa017be, 0xebe7b2c7), TOBN(0xb2767c4a, 0xcaf197c6),
+     TOBN(0xc925ff87, 0xaedbae9f), TOBN(0x7daf0eb9, 0x36880a54),
+     TOBN(0x9284ddf5, 0x9c4d0e71), TOBN(0x1581cf93, 0x316f8cf5),
+     TOBN(0x3eeca887, 0x3ac1f452), TOBN(0xb417fce9, 0xfb6aeffe),
+     TOBN(0xa5918046, 0xeefb8dc3), TOBN(0x73d318ac, 0x02209400),
+     TOBN(0xe800400f, 0x728693e5), TOBN(0xe87d814b, 0x339927ed),
+     TOBN(0x93e94d3b, 0x57ea9910), TOBN(0xff8a35b6, 0x2245fb69),
+     TOBN(0x043853d7, 0x7f200d34), TOBN(0x470f1e68, 0x0f653ce1),
+     TOBN(0x81ac05bd, 0x59a06379), TOBN(0xa14052c2, 0x03930c29),
+     TOBN(0x6b72fab5, 0x26bc2797), TOBN(0x13670d16, 0x99f16771),
+     TOBN(0x00170052, 0x1e3e48d1), TOBN(0x978fe401, 0xb7adf678),
+     TOBN(0x55ecfb92, 0xd41c5dd4), TOBN(0x5ff8e247, 0xc7b27da5),
+     TOBN(0xe7518272, 0x013fb606), TOBN(0x5768d7e5, 0x2f547a3c),
+     TOBN(0xbb24eaa3, 0x60017a5f), TOBN(0x6b18e6e4, 0x9c64ce9b),
+     TOBN(0xc225c655, 0x103dde07), TOBN(0xfc3672ae, 0x7592f7ea),
+     TOBN(0x9606ad77, 0xd06283a1), TOBN(0x542fc650, 0xe4d59d99),
+     TOBN(0xabb57c49, 0x2a40e7c2), TOBN(0xac948f13, 0xa8db9f55),
+     TOBN(0x6d4c9682, 0xb04465c3), TOBN(0xe3d062fa, 0x6468bd15),
+     TOBN(0xa51729ac, 0x5f318d7e), TOBN(0x1fc87df6, 0x9eb6fc95),
+     TOBN(0x63d146a8, 0x0591f652), TOBN(0xa861b8f7, 0x589621aa),
+     TOBN(0x59f5f15a, 0xce31348c), TOBN(0x8f663391, 0x440da6da),
+     TOBN(0xcfa778ac, 0xb591ffa3), TOBN(0x027ca9c5, 0x4cdfebce),
+     TOBN(0xbe8e05a5, 0x444ea6b3), TOBN(0x8aab4e69, 0xa78d8254),
+     TOBN(0x2437f04f, 0xb474d6b8), TOBN(0x6597ffd4, 0x045b3855),
+     TOBN(0xbb0aea4e, 0xca47ecaa), TOBN(0x568aae83, 0x85c7ebfc),
+     TOBN(0x0e966e64, 0xc73b2383), TOBN(0x49eb3447, 0xd17d8762),
+     TOBN(0xde107821, 0x8da05dab), TOBN(0x443d8baa, 0x016b7236),
+     TOBN(0x163b63a5, 0xea7610d6), TOBN(0xe47e4185, 0xce1ca979),
+     TOBN(0xae648b65, 0x80baa132), TOBN(0xebf53de2, 0x0e0d5b64),
+     TOBN(0x8d3bfcb4, 0xd3c8c1ca), TOBN(0x0d914ef3, 0x5d04b309),
+     TOBN(0x55ef6415, 0x3de7d395), TOBN(0xbde1666f, 0x26b850e8),
+     TOBN(0xdbe1ca6e, 0xd449ab19), TOBN(0x8902b322, 0xe89a2672),
+     TOBN(0xb1674b7e, 0xdacb7a53), TOBN(0x8e9faf6e, 0xf52523ff),
+     TOBN(0x6ba535da, 0x9a85788b), TOBN(0xd21f03ae, 0xbd0626d4),
+     TOBN(0x099f8c47, 0xe873dc64), TOBN(0xcda8564d, 0x018ec97e),
+     TOBN(0x3e8d7a5c, 0xde92c68c), TOBN(0x78e035a1, 0x73323cc4),
+     TOBN(0x3ef26275, 0xf880ff7c), TOBN(0xa4ee3dff, 0x273eedaa),
+     TOBN(0x58823507, 0xaf4e18f8), TOBN(0x967ec9b5, 0x0672f328),
+     TOBN(0x9ded19d9, 0x559d3186), TOBN(0x5e2ab3de, 0x6cdce39c),
+     TOBN(0xabad6e4d, 0x11c226df), TOBN(0xf9783f43, 0x87723014),
+     TOBN(0x9a49a0cf, 0x1a885719), TOBN(0xfc0c1a5a, 0x90da9dbf),
+     TOBN(0x8bbaec49, 0x571d92ac), TOBN(0x569e85fe, 0x4692517f),
+     TOBN(0x8333b014, 0xa14ea4af), TOBN(0x32f2a62f, 0x12e5c5ad),
+     TOBN(0x98c2ce3a, 0x06d89b85), TOBN(0xb90741aa, 0x2ff77a08),
+     TOBN(0x2530defc, 0x01f795a2), TOBN(0xd6e5ba0b, 0x84b3c199),
+     TOBN(0x7d8e8451, 0x12e4c936), TOBN(0xae419f7d, 0xbd0be17b),
+     TOBN(0xa583fc8c, 0x22262bc9), TOBN(0x6b842ac7, 0x91bfe2bd),
+     TOBN(0x33cef4e9, 0x440d6827), TOBN(0x5f69f4de, 0xef81fb14),
+     TOBN(0xf16cf6f6, 0x234fbb92), TOBN(0x76ae3fc3, 0xd9e7e158),
+     TOBN(0x4e89f6c2, 0xe9740b33), TOBN(0x677bc85d, 0x4962d6a1),
+     TOBN(0x6c6d8a7f, 0x68d10d15), TOBN(0x5f9a7224, 0x0257b1cd),
+     TOBN(0x7096b916, 0x4ad85961), TOBN(0x5f8c47f7, 0xe657ab4a),
+     TOBN(0xde57d7d0, 0xf7461d7e), TOBN(0x7eb6094d, 0x80ce5ee2),
+     TOBN(0x0b1e1dfd, 0x34190547), TOBN(0x8a394f43, 0xf05dd150),
+     TOBN(0x0a9eb24d, 0x97df44e6), TOBN(0x78ca06bf, 0x87675719),
+     TOBN(0x6f0b3462, 0x6ffeec22), TOBN(0x9d91bcea, 0x36cdd8fb),
+     TOBN(0xac83363c, 0xa105be47), TOBN(0x81ba76c1, 0x069710e3),
+     TOBN(0x3d1b24cb, 0x28c682c6), TOBN(0x27f25228, 0x8612575b),
+     TOBN(0xb587c779, 0xe8e66e98), TOBN(0x7b0c03e9, 0x405eb1fe),
+     TOBN(0xfdf0d030, 0x15b548e7), TOBN(0xa8be76e0, 0x38b36af7),
+     TOBN(0x4cdab04a, 0x4f310c40), TOBN(0x6287223e, 0xf47ecaec),
+     TOBN(0x678e6055, 0x8b399320), TOBN(0x61fe3fa6, 0xc01e4646),
+     TOBN(0xc482866b, 0x03261a5e), TOBN(0xdfcf45b8, 0x5c2f244a),
+     TOBN(0x8fab9a51, 0x2f684b43), TOBN(0xf796c654, 0xc7220a66),
+     TOBN(0x1d90707e, 0xf5afa58f), TOBN(0x2c421d97, 0x4fdbe0de),
+     TOBN(0xc4f4cda3, 0xaf2ebc2f), TOBN(0xa0af843d, 0xcb4efe24),
+     TOBN(0x53b857c1, 0x9ccd10b1), TOBN(0xddc9d1eb, 0x914d3e04),
+     TOBN(0x7bdec8bb, 0x62771deb), TOBN(0x829277aa, 0x91c5aa81),
+     TOBN(0x7af18dd6, 0x832391ae), TOBN(0x1740f316, 0xc71a84ca),}
+    ,
+    {TOBN(0x8928e99a, 0xeeaf8c49), TOBN(0xee7aa73d, 0x6e24d728),
+     TOBN(0x4c5007c2, 0xe72b156c), TOBN(0x5fcf57c5, 0xed408a1d),
+     TOBN(0x9f719e39, 0xb6057604), TOBN(0x7d343c01, 0xc2868bbf),
+     TOBN(0x2cca254b, 0x7e103e2d), TOBN(0xe6eb38a9, 0xf131bea2),
+     TOBN(0xb33e624f, 0x8be762b4), TOBN(0x2a9ee4d1, 0x058e3413),
+     TOBN(0x968e6369, 0x67d805fa), TOBN(0x9848949b, 0x7db8bfd7),
+     TOBN(0x5308d7e5, 0xd23a8417), TOBN(0x892f3b1d, 0xf3e29da5),
+     TOBN(0xc95c139e, 0x3dee471f), TOBN(0x8631594d, 0xd757e089),
+     TOBN(0xe0c82a3c, 0xde918dcc), TOBN(0x2e7b5994, 0x26fdcf4b),
+     TOBN(0x82c50249, 0x32cb1b2d), TOBN(0xea613a9d, 0x7657ae07),
+     TOBN(0xc2eb5f6c, 0xf1fdc9f7), TOBN(0xb6eae8b8, 0x879fe682),
+     TOBN(0x253dfee0, 0x591cbc7f), TOBN(0x000da713, 0x3e1290e6),
+     TOBN(0x1083e2ea, 0x1f095615), TOBN(0x0a28ad77, 0x14e68c33),
+     TOBN(0x6bfc0252, 0x3d8818be), TOBN(0xb585113a, 0xf35850cd),
+     TOBN(0x7d935f0b, 0x30df8aa1), TOBN(0xaddda07c, 0x4ab7e3ac),
+     TOBN(0x92c34299, 0x552f00cb), TOBN(0xc33ed1de, 0x2909df6c),
+     TOBN(0x22c2195d, 0x80e87766), TOBN(0x9e99e6d8, 0x9ddf4ac0),
+     TOBN(0x09642e4e, 0x65e74934), TOBN(0x2610ffa2, 0xff1ff241),
+     TOBN(0x4d1d47d4, 0x751c8159), TOBN(0x697b4985, 0xaf3a9363),
+     TOBN(0x0318ca46, 0x87477c33), TOBN(0xa90cb565, 0x9441eff3),
+     TOBN(0x58bb3848, 0x36f024cb), TOBN(0x85be1f77, 0x36016168),
+     TOBN(0x6c59587c, 0xdc7e07f1), TOBN(0x191be071, 0xaf1d8f02),
+     TOBN(0xbf169fa5, 0xcca5e55c), TOBN(0x3864ba3c, 0xf7d04eac),
+     TOBN(0x915e367f, 0x8d7d05db), TOBN(0xb48a876d, 0xa6549e5d),
+     TOBN(0xef89c656, 0x580e40a2), TOBN(0xf194ed8c, 0x728068bc),
+     TOBN(0x74528045, 0xa47990c9), TOBN(0xf53fc7d7, 0x5e1a4649),
+     TOBN(0xbec5ae9b, 0x78593e7d), TOBN(0x2cac4ee3, 0x41db65d7),
+     TOBN(0xa8c1eb24, 0x04a3d39b), TOBN(0x53b7d634, 0x03f8f3ef),
+     TOBN(0x2dc40d48, 0x3e07113c), TOBN(0x6e4a5d39, 0x7d8b63ae),
+     TOBN(0x5582a94b, 0x79684c2b), TOBN(0x932b33d4, 0x622da26c),
+     TOBN(0xf534f651, 0x0dbbf08d), TOBN(0x211d07c9, 0x64c23a52),
+     TOBN(0x0eeece0f, 0xee5bdc9b), TOBN(0xdf178168, 0xf7015558),
+     TOBN(0xd4294635, 0x0a712229), TOBN(0x93cbe448, 0x09273f8c),
+     TOBN(0x00b095ef, 0x8f13bc83), TOBN(0xbb741972, 0x8798978c),
+     TOBN(0x9d7309a2, 0x56dbe6e7), TOBN(0xe578ec56, 0x5a5d39ec),
+     TOBN(0x3961151b, 0x851f9a31), TOBN(0x2da7715d, 0xe5709eb4),
+     TOBN(0x867f3017, 0x53dfabf0), TOBN(0x728d2078, 0xb8e39259),
+     TOBN(0x5c75a0cd, 0x815d9958), TOBN(0xf84867a6, 0x16603be1),
+     TOBN(0xc865b13d, 0x70e35b1c), TOBN(0x02414468, 0x19b03e2c),
+     TOBN(0xe46041da, 0xac1f3121), TOBN(0x7c9017ad, 0x6f028a7c),
+     TOBN(0xabc96de9, 0x0a482873), TOBN(0x4265d6b1, 0xb77e54d4),
+     TOBN(0x68c38e79, 0xa57d88e7), TOBN(0xd461d766, 0x9ce82de3),
+     TOBN(0x817a9ec5, 0x64a7e489), TOBN(0xcc5675cd, 0xa0def5f2),
+     TOBN(0x9a00e785, 0x985d494e), TOBN(0xc626833f, 0x1b03514a),
+     TOBN(0xabe7905a, 0x83cdd60e), TOBN(0x50602fb5, 0xa1170184),
+     TOBN(0x689886cd, 0xb023642a), TOBN(0xd568d090, 0xa6e1fb00),
+     TOBN(0x5b1922c7, 0x0259217f), TOBN(0x93831cd9, 0xc43141e4),
+     TOBN(0xdfca3587, 0x0c95f86e), TOBN(0xdec2057a, 0x568ae828),
+     TOBN(0xc44ea599, 0xf98a759a), TOBN(0x55a0a7a2, 0xf7c23c1d),
+     TOBN(0xd5ffb6e6, 0x94c4f687), TOBN(0x3563cce2, 0x12848478),
+     TOBN(0x812b3517, 0xe7b1fbe1), TOBN(0x8a7dc979, 0x4f7338e0),
+     TOBN(0x211ecee9, 0x52d048db), TOBN(0x2eea4056, 0xc86ea3b8),
+     TOBN(0xd8cb68a7, 0xba772b34), TOBN(0xe16ed341, 0x5f4e2541),
+     TOBN(0x9b32f6a6, 0x0fec14db), TOBN(0xeee376f7, 0x391698be),
+     TOBN(0xe9a7aa17, 0x83674c02), TOBN(0x65832f97, 0x5843022a),
+     TOBN(0x29f3a8da, 0x5ba4990f), TOBN(0x79a59c3a, 0xfb8e3216),
+     TOBN(0x9cdc4d2e, 0xbd19bb16), TOBN(0xc6c7cfd0, 0xb3262d86),
+     TOBN(0xd4ce14d0, 0x969c0b47), TOBN(0x1fa352b7, 0x13e56128),
+     TOBN(0x383d55b8, 0x973db6d3), TOBN(0x71836850, 0xe8e5b7bf),
+     TOBN(0xc7714596, 0xe6bb571f), TOBN(0x259df31f, 0x2d5b2dd2),
+     TOBN(0x568f8925, 0x913cc16d), TOBN(0x18bc5b6d, 0xe1a26f5a),
+     TOBN(0xdfa413be, 0xf5f499ae), TOBN(0xf8835dec, 0xc3f0ae84),
+     TOBN(0xb6e60bd8, 0x65a40ab0), TOBN(0x65596439, 0x194b377e),
+     TOBN(0xbcd85625, 0x92084a69), TOBN(0x5ce433b9, 0x4f23ede0),
+     TOBN(0xe8e8f04f, 0x6ad65143), TOBN(0x11511827, 0xd6e14af6),
+     TOBN(0x3d390a10, 0x8295c0c7), TOBN(0x71e29ee4, 0x621eba16),
+     TOBN(0xa588fc09, 0x63717b46), TOBN(0x02be02fe, 0xe06ad4a2),
+     TOBN(0x931558c6, 0x04c22b22), TOBN(0xbb4d4bd6, 0x12f3c849),
+     TOBN(0x54a4f496, 0x20efd662), TOBN(0x92ba6d20, 0xc5952d14),
+     TOBN(0x2db8ea1e, 0xcc9784c2), TOBN(0x81cc10ca, 0x4b353644),
+     TOBN(0x40b570ad, 0x4b4d7f6c), TOBN(0x5c9f1d96, 0x84a1dcd2),
+     TOBN(0x01379f81, 0x3147e797), TOBN(0xe5c6097b, 0x2bd499f5),
+     TOBN(0x40dcafa6, 0x328e5e20), TOBN(0xf7b5244a, 0x54815550),
+     TOBN(0xb9a4f118, 0x47bfc978), TOBN(0x0ea0e79f, 0xd25825b1),
+     TOBN(0xa50f96eb, 0x646c7ecf), TOBN(0xeb811493, 0x446dea9d),
+     TOBN(0x2af04677, 0xdfabcf69), TOBN(0xbe3a068f, 0xc713f6e8),
+     TOBN(0x860d523d, 0x42e06189), TOBN(0xbf077941, 0x4e3aff13),
+     TOBN(0x0b616dca, 0xc1b20650), TOBN(0xe66dd6d1, 0x2131300d),
+     TOBN(0xd4a0fd67, 0xff99abde), TOBN(0xc9903550, 0xc7aac50d),
+     TOBN(0x022ecf8b, 0x7c46b2d7), TOBN(0x3333b1e8, 0x3abf92af),
+     TOBN(0x11cc113c, 0x6c491c14), TOBN(0x05976688, 0x80dd3f88),
+     TOBN(0xf5b4d9e7, 0x29d932ed), TOBN(0xe982aad8, 0xa2c38b6d),
+     TOBN(0x6f925347, 0x8be0dcf0), TOBN(0x700080ae, 0x65ca53f2),
+     TOBN(0xd8131156, 0x443ca77f), TOBN(0xe92d6942, 0xec51f984),
+     TOBN(0xd2a08af8, 0x85dfe9ae), TOBN(0xd825d9a5, 0x4d2a86ca),
+     TOBN(0x2c53988d, 0x39dff020), TOBN(0xf38b135a, 0x430cdc40),
+     TOBN(0x0c918ae0, 0x62a7150b), TOBN(0xf31fd8de, 0x0c340e9b),
+     TOBN(0xafa0e7ae, 0x4dbbf02e), TOBN(0x5847fb2a, 0x5eba6239),
+     TOBN(0x6b1647dc, 0xdccbac8b), TOBN(0xb642aa78, 0x06f485c8),
+     TOBN(0x873f3765, 0x7038ecdf), TOBN(0x2ce5e865, 0xfa49d3fe),
+     TOBN(0xea223788, 0xc98c4400), TOBN(0x8104a8cd, 0xf1fa5279),
+     TOBN(0xbcf7cc7a, 0x06becfd7), TOBN(0x49424316, 0xc8f974ae),
+     TOBN(0xc0da65e7, 0x84d6365d), TOBN(0xbcb7443f, 0x8f759fb8),
+     TOBN(0x35c712b1, 0x7ae81930), TOBN(0x80428dff, 0x4c6e08ab),
+     TOBN(0xf19dafef, 0xa4faf843), TOBN(0xced8538d, 0xffa9855f),
+     TOBN(0x20ac409c, 0xbe3ac7ce), TOBN(0x358c1fb6, 0x882da71e),
+     TOBN(0xafa9c0e5, 0xfd349961), TOBN(0x2b2cfa51, 0x8421c2fc),
+     TOBN(0x2a80db17, 0xf3a28d38), TOBN(0xa8aba539, 0x5d138e7e),
+     TOBN(0x52012d1d, 0x6e96eb8d), TOBN(0x65d8dea0, 0xcbaf9622),
+     TOBN(0x57735447, 0xb264f56c), TOBN(0xbeebef3f, 0x1b6c8da2),
+     TOBN(0xfc346d98, 0xce785254), TOBN(0xd50e8d72, 0xbb64a161),
+     TOBN(0xc03567c7, 0x49794add), TOBN(0x15a76065, 0x752c7ef6),
+     TOBN(0x59f3a222, 0x961f23d6), TOBN(0x378e4438, 0x73ecc0b0),
+     TOBN(0xc74be434, 0x5a82fde4), TOBN(0xae509af2, 0xd8b9cf34),
+     TOBN(0x4a61ee46, 0x577f44a1), TOBN(0xe09b748c, 0xb611deeb),
+     TOBN(0xc0481b2c, 0xf5f7b884), TOBN(0x35626678, 0x61acfa6b),
+     TOBN(0x37f4c518, 0xbf8d21e6), TOBN(0x22d96531, 0xb205a76d),
+     TOBN(0x37fb85e1, 0x954073c0), TOBN(0xbceafe4f, 0x65b3a567),
+     TOBN(0xefecdef7, 0xbe42a582), TOBN(0xd3fc6080, 0x65046be6),
+     TOBN(0xc9af13c8, 0x09e8dba9), TOBN(0x1e6c9847, 0x641491ff),
+     TOBN(0x3b574925, 0xd30c31f7), TOBN(0xb7eb72ba, 0xac2a2122),
+     TOBN(0x776a0dac, 0xef0859e7), TOBN(0x06fec314, 0x21900942),
+     TOBN(0x2464bc10, 0xf8c22049), TOBN(0x9bfbcce7, 0x875ebf69),
+     TOBN(0xd7a88e2a, 0x4336326b), TOBN(0xda05261c, 0x5bc2acfa),
+     TOBN(0xc29f5bdc, 0xeba7efc8), TOBN(0x471237ca, 0x25dbbf2e),
+     TOBN(0xa72773f2, 0x2975f127), TOBN(0xdc744e8e, 0x04d0b326),
+     TOBN(0x38a7ed16, 0xa56edb73), TOBN(0x64357e37, 0x2c007e70),
+     TOBN(0xa167d15b, 0x5080b400), TOBN(0x07b41164, 0x23de4be1),
+     TOBN(0xb2d91e32, 0x74c89883), TOBN(0x3c162821, 0x2882e7ed),
+     TOBN(0xad6b36ba, 0x7503e482), TOBN(0x48434e8e, 0x0ea34331),
+     TOBN(0x79f4f24f, 0x2c7ae0b9), TOBN(0xc46fbf81, 0x1939b44a),
+     TOBN(0x76fefae8, 0x56595eb1), TOBN(0x417b66ab, 0xcd5f29c7),
+     TOBN(0x5f2332b2, 0xc5ceec20), TOBN(0xd69661ff, 0xe1a1cae2),
+     TOBN(0x5ede7e52, 0x9b0286e6), TOBN(0x9d062529, 0xe276b993),
+     TOBN(0x324794b0, 0x7e50122b), TOBN(0xdd744f8b, 0x4af07ca5),
+     TOBN(0x30a12f08, 0xd63fc97b), TOBN(0x39650f1a, 0x76626d9d),
+     TOBN(0x101b47f7, 0x1fa38477), TOBN(0x3d815f19, 0xd4dc124f),
+     TOBN(0x1569ae95, 0xb26eb58a), TOBN(0xc3cde188, 0x95fb1887),
+     TOBN(0x54e9f37b, 0xf9539a48), TOBN(0xb0100e06, 0x7408c1a5),
+     TOBN(0x821d9811, 0xea580cbb), TOBN(0x8af52d35, 0x86e50c56),
+     TOBN(0xdfbd9d47, 0xdbbf698b), TOBN(0x2961a1ea, 0x03dc1c73),
+     TOBN(0x203d38f8, 0xe76a5df8), TOBN(0x08a53a68, 0x6def707a),
+     TOBN(0x26eefb48, 0x1bee45d4), TOBN(0xb3cee346, 0x3c688036),
+     TOBN(0x463c5315, 0xc42f2469), TOBN(0x19d84d2e, 0x81378162),
+     TOBN(0x22d7c3c5, 0x1c4d349f), TOBN(0x65965844, 0x163d59c5),
+     TOBN(0xcf198c56, 0xb8abceae), TOBN(0x6fb1fb1b, 0x628559d5),
+     TOBN(0x8bbffd06, 0x07bf8fe3), TOBN(0x46259c58, 0x3467734b),
+     TOBN(0xd8953cea, 0x35f7f0d3), TOBN(0x1f0bece2, 0xd65b0ff1),
+     TOBN(0xf7d5b4b3, 0xf3c72914), TOBN(0x29e8ea95, 0x3cb53389),
+     TOBN(0x4a365626, 0x836b6d46), TOBN(0xe849f910, 0xea174fde),
+     TOBN(0x7ec62fbb, 0xf4737f21), TOBN(0xd8dba5ab, 0x6209f5ac),
+     TOBN(0x24b5d7a9, 0xa5f9adbe), TOBN(0x707d28f7, 0xa61dc768),
+     TOBN(0x7711460b, 0xcaa999ea), TOBN(0xba7b174d, 0x1c92e4cc),
+     TOBN(0x3c4bab66, 0x18d4bf2d), TOBN(0xb8f0c980, 0xeb8bd279),
+     TOBN(0x024bea9a, 0x324b4737), TOBN(0xfba9e423, 0x32a83bca),
+     TOBN(0x6e635643, 0xa232dced), TOBN(0x99619367, 0x2571c8ba),
+     TOBN(0xe8c9f357, 0x54b7032b), TOBN(0xf936b3ba, 0x2442d54a),
+     TOBN(0x2263f0f0, 0x8290c65a), TOBN(0x48989780, 0xee2c7fdb),
+     TOBN(0xadc5d55a, 0x13d4f95e), TOBN(0x737cff85, 0xad9b8500),
+     TOBN(0x271c557b, 0x8a73f43d), TOBN(0xbed617a4, 0xe18bc476),
+     TOBN(0x66245401, 0x7dfd8ab2), TOBN(0xae7b89ae, 0x3a2870aa),
+     TOBN(0x1b555f53, 0x23a7e545), TOBN(0x6791e247, 0xbe057e4c),
+     TOBN(0x860136ad, 0x324fa34d), TOBN(0xea111447, 0x4cbeae28),
+     TOBN(0x023a4270, 0xbedd3299), TOBN(0x3d5c3a7f, 0xc1c35c34),
+     TOBN(0xb0f6db67, 0x8d0412d2), TOBN(0xd92625e2, 0xfcdc6b9a),
+     TOBN(0x92ae5ccc, 0x4e28a982), TOBN(0xea251c36, 0x47a3ce7e),
+     TOBN(0x9d658932, 0x790691bf), TOBN(0xed610589, 0x06b736ae),
+     TOBN(0x712c2f04, 0xc0d63b6e), TOBN(0x5cf06fd5, 0xc63d488f),
+     TOBN(0x97363fac, 0xd9588e41), TOBN(0x1f9bf762, 0x2b93257e),
+     TOBN(0xa9d1ffc4, 0x667acace), TOBN(0x1cf4a1aa, 0x0a061ecf),
+     TOBN(0x40e48a49, 0xdc1818d0), TOBN(0x0643ff39, 0xa3621ab0),
+     TOBN(0x5768640c, 0xe39ef639), TOBN(0x1fc099ea, 0x04d86854),
+     TOBN(0x9130b9c3, 0xeccd28fd), TOBN(0xd743cbd2, 0x7eec54ab),
+     TOBN(0x052b146f, 0xe5b475b6), TOBN(0x058d9a82, 0x900a7d1f),
+     TOBN(0x65e02292, 0x91262b72), TOBN(0x96f924f9, 0xbb0edf03),
+     TOBN(0x5cfa59c8, 0xfe206842), TOBN(0xf6037004, 0x5eafa720),
+     TOBN(0x5f30699e, 0x18d7dd96), TOBN(0x381e8782, 0xcbab2495),
+     TOBN(0x91669b46, 0xdd8be949), TOBN(0xb40606f5, 0x26aae8ef),
+     TOBN(0x2812b839, 0xfc6751a4), TOBN(0x16196214, 0xfba800ef),
+     TOBN(0x4398d5ca, 0x4c1a2875), TOBN(0x720c00ee, 0x653d8349),
+     TOBN(0xc2699eb0, 0xd820007c), TOBN(0x880ee660, 0xa39b5825),
+     TOBN(0x70694694, 0x471f6984), TOBN(0xf7d16ea8, 0xe3dda99a),
+     TOBN(0x28d675b2, 0xc0519a23), TOBN(0x9ebf94fe, 0x4f6952e3),
+     TOBN(0xf28bb767, 0xa2294a8a), TOBN(0x85512b4d, 0xfe0af3f5),
+     TOBN(0x18958ba8, 0x99b16a0d), TOBN(0x95c2430c, 0xba7548a7),
+     TOBN(0xb30d1b10, 0xa16be615), TOBN(0xe3ebbb97, 0x85bfb74c),
+     TOBN(0xa3273cfe, 0x18549fdb), TOBN(0xf6e200bf, 0x4fcdb792),
+     TOBN(0x54a76e18, 0x83aba56c), TOBN(0x73ec66f6, 0x89ef6aa2),
+     TOBN(0x8d17add7, 0xd1b9a305), TOBN(0xa959c5b9, 0xb7ae1b9d),
+     TOBN(0x88643522, 0x6bcc094a), TOBN(0xcc5616c4, 0xd7d429b9),
+     TOBN(0xa6dada01, 0xe6a33f7c), TOBN(0xc6217a07, 0x9d4e70ad),
+     TOBN(0xd619a818, 0x09c15b7c), TOBN(0xea06b329, 0x0e80c854),
+     TOBN(0x174811ce, 0xa5f5e7b9), TOBN(0x66dfc310, 0x787c65f4),
+     TOBN(0x4ea7bd69, 0x3316ab54), TOBN(0xc12c4acb, 0x1dcc0f70),
+     TOBN(0xe4308d1a, 0x1e407dd9), TOBN(0xe8a3587c, 0x91afa997),
+     TOBN(0xea296c12, 0xab77b7a5), TOBN(0xb5ad49e4, 0x673c0d52),
+     TOBN(0x40f9b2b2, 0x7006085a), TOBN(0xa88ff340, 0x87bf6ec2),
+     TOBN(0x978603b1, 0x4e3066a6), TOBN(0xb3f99fc2, 0xb5e486e2),
+     TOBN(0x07b53f5e, 0xb2e63645), TOBN(0xbe57e547, 0x84c84232),
+     TOBN(0xd779c216, 0x7214d5cf), TOBN(0x617969cd, 0x029a3aca),
+     TOBN(0xd17668cd, 0x8a7017a0), TOBN(0x77b4d19a, 0xbe9b7ee8),
+     TOBN(0x58fd0e93, 0x9c161776), TOBN(0xa8c4f4ef, 0xd5968a72),
+     TOBN(0x296071cc, 0x67b3de77), TOBN(0xae3c0b8e, 0x634f7905),
+     TOBN(0x67e440c2, 0x8a7100c9), TOBN(0xbb8c3c1b, 0xeb4b9b42),
+     TOBN(0x6d71e8ea, 0xc51b3583), TOBN(0x7591f5af, 0x9525e642),
+     TOBN(0xf73a2f7b, 0x13f509f3), TOBN(0x618487aa, 0x5619ac9b),
+     TOBN(0x3a72e5f7, 0x9d61718a), TOBN(0x00413bcc, 0x7592d28c),
+     TOBN(0x7d9b11d3, 0x963c35cf), TOBN(0x77623bcf, 0xb90a46ed),
+     TOBN(0xdeef273b, 0xdcdd2a50), TOBN(0x4a741f9b, 0x0601846e),
+     TOBN(0x33b89e51, 0x0ec6e929), TOBN(0xcb02319f, 0x8b7f22cd),
+     TOBN(0xbbe1500d, 0x084bae24), TOBN(0x2f0ae8d7, 0x343d2693),
+     TOBN(0xacffb5f2, 0x7cdef811), TOBN(0xaa0c030a, 0x263fb94f),
+     TOBN(0x6eef0d61, 0xa0f442de), TOBN(0xf92e1817, 0x27b139d3),
+     TOBN(0x1ae6deb7, 0x0ad8bc28), TOBN(0xa89e38dc, 0xc0514130),
+     TOBN(0x81eeb865, 0xd2fdca23), TOBN(0x5a15ee08, 0xcc8ef895),
+     TOBN(0x768fa10a, 0x01905614), TOBN(0xeff5b8ef, 0x880ee19b),
+     TOBN(0xf0c0cabb, 0xcb1c8a0e), TOBN(0x2e1ee9cd, 0xb8c838f9),
+     TOBN(0x0587d8b8, 0x8a4a14c0), TOBN(0xf6f27896, 0x2ff698e5),
+     TOBN(0xed38ef1c, 0x89ee6256), TOBN(0xf44ee1fe, 0x6b353b45),
+     TOBN(0x9115c0c7, 0x70e903b3), TOBN(0xc78ec0a1, 0x818f31df),
+     TOBN(0x6c003324, 0xb7dccbc6), TOBN(0xd96dd1f3, 0x163bbc25),
+     TOBN(0x33aa82dd, 0x5cedd805), TOBN(0x123aae4f, 0x7f7eb2f1),
+     TOBN(0x1723fcf5, 0xa26262cd), TOBN(0x1f7f4d5d, 0x0060ebd5),
+     TOBN(0xf19c5c01, 0xb2eaa3af), TOBN(0x2ccb9b14, 0x9790accf),
+     TOBN(0x1f9c1cad, 0x52324aa6), TOBN(0x63200526, 0x7247df54),
+     TOBN(0x5732fe42, 0xbac96f82), TOBN(0x52fe771f, 0x01a1c384),
+     TOBN(0x546ca13d, 0xb1001684), TOBN(0xb56b4eee, 0xa1709f75),
+     TOBN(0x266545a9, 0xd5db8672), TOBN(0xed971c90, 0x1e8f3cfb),
+     TOBN(0x4e7d8691, 0xe3a07b29), TOBN(0x7570d9ec, 0xe4b696b9),
+     TOBN(0xdc5fa067, 0x7bc7e9ae), TOBN(0x68b44caf, 0xc82c4844),
+     TOBN(0x519d34b3, 0xbf44da80), TOBN(0x283834f9, 0x5ab32e66),
+     TOBN(0x6e608797, 0x6278a000), TOBN(0x1e62960e, 0x627312f6),
+     TOBN(0x9b87b27b, 0xe6901c55), TOBN(0x80e78538, 0x24fdbc1f),
+     TOBN(0xbbbc0951, 0x2facc27d), TOBN(0x06394239, 0xac143b5a),
+     TOBN(0x35bb4a40, 0x376c1944), TOBN(0x7cb62694, 0x63da1511),
+     TOBN(0xafd29161, 0xb7148a3b), TOBN(0xa6f9d9ed, 0x4e2ea2ee),
+     TOBN(0x15dc2ca2, 0x880dd212), TOBN(0x903c3813, 0xa61139a9),
+     TOBN(0x2aa7b46d, 0x6c0f8785), TOBN(0x36ce2871, 0x901c60ff),
+     TOBN(0xc683b028, 0xe10d9c12), TOBN(0x7573baa2, 0x032f33d3),
+     TOBN(0x87a9b1f6, 0x67a31b58), TOBN(0xfd3ed11a, 0xf4ffae12),
+     TOBN(0x83dcaa9a, 0x0cb2748e), TOBN(0x8239f018, 0x5d6fdf16),
+     TOBN(0xba67b49c, 0x72753941), TOBN(0x2beec455, 0xc321cb36),
+     TOBN(0x88015606, 0x3f8b84ce), TOBN(0x76417083, 0x8d38c86f),
+     TOBN(0x054f1ca7, 0x598953dd), TOBN(0xc939e110, 0x4e8e7429),
+     TOBN(0x9b1ac2b3, 0x5a914f2f), TOBN(0x39e35ed3, 0xe74b8f9c),
+     TOBN(0xd0debdb2, 0x781b2fb0), TOBN(0x1585638f, 0x2d997ba2),
+     TOBN(0x9c4b646e, 0x9e2fce99), TOBN(0x68a21081, 0x1e80857f),
+     TOBN(0x06d54e44, 0x3643b52a), TOBN(0xde8d6d63, 0x0d8eb843),
+     TOBN(0x70321563, 0x42146a0a), TOBN(0x8ba826f2, 0x5eaa3622),
+     TOBN(0x227a58bd, 0x86138787), TOBN(0x43b6c03c, 0x10281d37),
+     TOBN(0x6326afbb, 0xb54dde39), TOBN(0x744e5e8a, 0xdb6f2d5f),
+     TOBN(0x48b2a99a, 0xcff158e1), TOBN(0xa93c8fa0, 0xef87918f),
+     TOBN(0x2182f956, 0xde058c5c), TOBN(0x216235d2, 0x936f9e7a),
+     TOBN(0xace0c0db, 0xd2e31e67), TOBN(0xc96449bf, 0xf23ac3e7),
+     TOBN(0x7e9a2874, 0x170693bd), TOBN(0xa28e14fd, 0xa45e6335),
+     TOBN(0x5757f6b3, 0x56427344), TOBN(0x822e4556, 0xacf8edf9),
+     TOBN(0x2b7a6ee2, 0xe6a285cd), TOBN(0x5866f211, 0xa9df3af0),
+     TOBN(0x40dde2dd, 0xf845b844), TOBN(0x986c3726, 0x110e5e49),
+     TOBN(0x73680c2a, 0xf7172277), TOBN(0x57b94f0f, 0x0cccb244),
+     TOBN(0xbdff7267, 0x2d438ca7), TOBN(0xbad1ce11, 0xcf4663fd),
+     TOBN(0x9813ed9d, 0xd8f71cae), TOBN(0xf43272a6, 0x961fdaa6),
+     TOBN(0xbeff0119, 0xbd6d1637), TOBN(0xfebc4f91, 0x30361978),
+     TOBN(0x02b37a95, 0x2f41deff), TOBN(0x0e44a59a, 0xe63b89b7),
+     TOBN(0x673257dc, 0x143ff951), TOBN(0x19c02205, 0xd752baf4),
+     TOBN(0x46c23069, 0xc4b7d692), TOBN(0x2e6392c3, 0xfd1502ac),
+     TOBN(0x6057b1a2, 0x1b220846), TOBN(0xe51ff946, 0x0c1b5b63),}
+    ,
+    {TOBN(0x6e85cb51, 0x566c5c43), TOBN(0xcff9c919, 0x3597f046),
+     TOBN(0x9354e90c, 0x4994d94a), TOBN(0xe0a39332, 0x2147927d),
+     TOBN(0x8427fac1, 0x0dc1eb2b), TOBN(0x88cfd8c2, 0x2ff319fa),
+     TOBN(0xe2d4e684, 0x01965274), TOBN(0xfa2e067d, 0x67aaa746),
+     TOBN(0xb6d92a7f, 0x3e5f9f11), TOBN(0x9afe153a, 0xd6cb3b8e),
+     TOBN(0x4d1a6dd7, 0xddf800bd), TOBN(0xf6c13cc0, 0xcaf17e19),
+     TOBN(0x15f6c58e, 0x325fc3ee), TOBN(0x71095400, 0xa31dc3b2),
+     TOBN(0x168e7c07, 0xafa3d3e7), TOBN(0x3f8417a1, 0x94c7ae2d),
+     TOBN(0xec234772, 0x813b230d), TOBN(0x634d0f5f, 0x17344427),
+     TOBN(0x11548ab1, 0xd77fc56a), TOBN(0x7fab1750, 0xce06af77),
+     TOBN(0xb62c10a7, 0x4f7c4f83), TOBN(0xa7d2edc4, 0x220a67d9),
+     TOBN(0x1c404170, 0x921209a0), TOBN(0x0b9815a0, 0xface59f0),
+     TOBN(0x2842589b, 0x319540c3), TOBN(0x18490f59, 0xa283d6f8),
+     TOBN(0xa2731f84, 0xdaae9fcb), TOBN(0x3db6d960, 0xc3683ba0),
+     TOBN(0xc85c63bb, 0x14611069), TOBN(0xb19436af, 0x0788bf05),
+     TOBN(0x905459df, 0x347460d2), TOBN(0x73f6e094, 0xe11a7db1),
+     TOBN(0xdc7f938e, 0xb6357f37), TOBN(0xc5d00f79, 0x2bd8aa62),
+     TOBN(0xc878dcb9, 0x2ca979fc), TOBN(0x37e83ed9, 0xeb023a99),
+     TOBN(0x6b23e273, 0x1560bf3d), TOBN(0x1086e459, 0x1d0fae61),
+     TOBN(0x78248316, 0x9a9414bd), TOBN(0x1b956bc0, 0xf0ea9ea1),
+     TOBN(0x7b85bb91, 0xc31b9c38), TOBN(0x0c5aa90b, 0x48ef57b5),
+     TOBN(0xdedeb169, 0xaf3bab6f), TOBN(0xe610ad73, 0x2d373685),
+     TOBN(0xf13870df, 0x02ba8e15), TOBN(0x0337edb6, 0x8ca7f771),
+     TOBN(0xe4acf747, 0xb62c036c), TOBN(0xd921d576, 0xb6b94e81),
+     TOBN(0xdbc86439, 0x2c422f7a), TOBN(0xfb635362, 0xed348898),
+     TOBN(0x83084668, 0xc45bfcd1), TOBN(0xc357c9e3, 0x2b315e11),
+     TOBN(0xb173b540, 0x5b2e5b8c), TOBN(0x7e946931, 0xe102b9a4),
+     TOBN(0x17c890eb, 0x7b0fb199), TOBN(0xec225a83, 0xd61b662b),
+     TOBN(0xf306a3c8, 0xee3c76cb), TOBN(0x3cf11623, 0xd32a1f6e),
+     TOBN(0xe6d5ab64, 0x6863e956), TOBN(0x3b8a4cbe, 0x5c005c26),
+     TOBN(0xdcd529a5, 0x9ce6bb27), TOBN(0xc4afaa52, 0x04d4b16f),
+     TOBN(0xb0624a26, 0x7923798d), TOBN(0x85e56df6, 0x6b307fab),
+     TOBN(0x0281893c, 0x2bf29698), TOBN(0x91fc19a4, 0xd7ce7603),
+     TOBN(0x75a5dca3, 0xad9a558f), TOBN(0x40ceb3fa, 0x4d50bf77),
+     TOBN(0x1baf6060, 0xbc9ba369), TOBN(0x927e1037, 0x597888c2),
+     TOBN(0xd936bf19, 0x86a34c07), TOBN(0xd4cf10c1, 0xc34ae980),
+     TOBN(0x3a3e5334, 0x859dd614), TOBN(0x9c475b5b, 0x18d0c8ee),
+     TOBN(0x63080d1f, 0x07cd51d5), TOBN(0xc9c0d0a6, 0xb88b4326),
+     TOBN(0x1ac98691, 0xc234296f), TOBN(0x2a0a83a4, 0x94887fb6),
+     TOBN(0x56511427, 0x0cea9cf2), TOBN(0x5230a6e8, 0xa24802f5),
+     TOBN(0xf7a2bf0f, 0x72e3d5c1), TOBN(0x37717446, 0x4f21439e),
+     TOBN(0xfedcbf25, 0x9ce30334), TOBN(0xe0030a78, 0x7ce202f9),
+     TOBN(0x6f2d9ebf, 0x1202e9ca), TOBN(0xe79dde6c, 0x75e6e591),
+     TOBN(0xf52072af, 0xf1dac4f8), TOBN(0x6c8d087e, 0xbb9b404d),
+     TOBN(0xad0fc73d, 0xbce913af), TOBN(0x909e587b, 0x458a07cb),
+     TOBN(0x1300da84, 0xd4f00c8a), TOBN(0x425cd048, 0xb54466ac),
+     TOBN(0xb59cb9be, 0x90e9d8bf), TOBN(0x991616db, 0x3e431b0e),
+     TOBN(0xd3aa117a, 0x531aecff), TOBN(0x91af92d3, 0x59f4dc3b),
+     TOBN(0x9b1ec292, 0xe93fda29), TOBN(0x76bb6c17, 0xe97d91bc),
+     TOBN(0x7509d95f, 0xaface1e6), TOBN(0x3653fe47, 0xbe855ae3),
+     TOBN(0x73180b28, 0x0f680e75), TOBN(0x75eefd1b, 0xeeb6c26c),
+     TOBN(0xa4cdf29f, 0xb66d4236), TOBN(0x2d70a997, 0x6b5821d8),
+     TOBN(0x7a3ee207, 0x20445c36), TOBN(0x71d1ac82, 0x59877174),
+     TOBN(0x0fc539f7, 0x949f73e9), TOBN(0xd05cf3d7, 0x982e3081),
+     TOBN(0x8758e20b, 0x7b1c7129), TOBN(0xffadcc20, 0x569e61f2),
+     TOBN(0xb05d3a2f, 0x59544c2d), TOBN(0xbe16f5c1, 0x9fff5e53),
+     TOBN(0x73cf65b8, 0xaad58135), TOBN(0x622c2119, 0x037aa5be),
+     TOBN(0x79373b3f, 0x646fd6a0), TOBN(0x0e029db5, 0x0d3978cf),
+     TOBN(0x8bdfc437, 0x94fba037), TOBN(0xaefbd687, 0x620797a6),
+     TOBN(0x3fa5382b, 0xbd30d38e), TOBN(0x7627cfbf, 0x585d7464),
+     TOBN(0xb2330fef, 0x4e4ca463), TOBN(0xbcef7287, 0x3566cc63),
+     TOBN(0xd161d2ca, 0xcf780900), TOBN(0x135dc539, 0x5b54827d),
+     TOBN(0x638f052e, 0x27bf1bc6), TOBN(0x10a224f0, 0x07dfa06c),
+     TOBN(0xe973586d, 0x6d3321da), TOBN(0x8b0c5738, 0x26152c8f),
+     TOBN(0x07ef4f2a, 0x34606074), TOBN(0x80fe7fe8, 0xa0f7047a),
+     TOBN(0x3d1a8152, 0xe1a0e306), TOBN(0x32cf43d8, 0x88da5222),
+     TOBN(0xbf89a95f, 0x5f02ffe6), TOBN(0x3d9eb9a4, 0x806ad3ea),
+     TOBN(0x012c17bb, 0x79c8e55e), TOBN(0xfdcd1a74, 0x99c81dac),
+     TOBN(0x7043178b, 0xb9556098), TOBN(0x4090a1df, 0x801c3886),
+     TOBN(0x759800ff, 0x9b67b912), TOBN(0x3e5c0304, 0x232620c8),
+     TOBN(0x4b9d3c4b, 0x70dceeca), TOBN(0xbb2d3c15, 0x181f648e),
+     TOBN(0xf981d837, 0x6e33345c), TOBN(0xb626289b, 0x0cf2297a),
+     TOBN(0x766ac659, 0x8baebdcf), TOBN(0x1a28ae09, 0x75df01e5),
+     TOBN(0xb71283da, 0x375876d8), TOBN(0x4865a96d, 0x607b9800),
+     TOBN(0x25dd1bcd, 0x237936b2), TOBN(0x332f4f4b, 0x60417494),
+     TOBN(0xd0923d68, 0x370a2147), TOBN(0x497f5dfb, 0xdc842203),
+     TOBN(0x9dc74cbd, 0x32be5e0f), TOBN(0x7475bcb7, 0x17a01375),
+     TOBN(0x438477c9, 0x50d872b1), TOBN(0xcec67879, 0xffe1d63d),
+     TOBN(0x9b006014, 0xd8578c70), TOBN(0xc9ad99a8, 0x78bb6b8b),
+     TOBN(0x6799008e, 0x11fb3806), TOBN(0xcfe81435, 0xcd44cab3),
+     TOBN(0xa2ee1582, 0x2f4fb344), TOBN(0xb8823450, 0x483fa6eb),
+     TOBN(0x622d323d, 0x652c7749), TOBN(0xd8474a98, 0xbeb0a15b),
+     TOBN(0xe43c154d, 0x5d1c00d0), TOBN(0x7fd581d9, 0x0e3e7aac),
+     TOBN(0x2b44c619, 0x2525ddf8), TOBN(0x67a033eb, 0xb8ae9739),
+     TOBN(0x113ffec1, 0x9ef2d2e4), TOBN(0x1bf6767e, 0xd5a0ea7f),
+     TOBN(0x57fff75e, 0x03714c0a), TOBN(0xa23c422e, 0x0a23e9ee),
+     TOBN(0xdd5f6b2d, 0x540f83af), TOBN(0xc2c2c27e, 0x55ea46a7),
+     TOBN(0xeb6b4246, 0x672a1208), TOBN(0xd13599f7, 0xae634f7a),
+     TOBN(0xcf914b5c, 0xd7b32c6e), TOBN(0x61a5a640, 0xeaf61814),
+     TOBN(0x8dc3df8b, 0x208a1bbb), TOBN(0xef627fd6, 0xb6d79aa5),
+     TOBN(0x44232ffc, 0xc4c86bc8), TOBN(0xe6f9231b, 0x061539fe),
+     TOBN(0x1d04f25a, 0x958b9533), TOBN(0x180cf934, 0x49e8c885),
+     TOBN(0x89689595, 0x9884aaf7), TOBN(0xb1959be3, 0x07b348a6),
+     TOBN(0x96250e57, 0x3c147c87), TOBN(0xae0efb3a, 0xdd0c61f8),
+     TOBN(0xed00745e, 0xca8c325e), TOBN(0x3c911696, 0xecff3f70),
+     TOBN(0x73acbc65, 0x319ad41d), TOBN(0x7b01a020, 0xf0b1c7ef),
+     TOBN(0xea32b293, 0x63a1483f), TOBN(0x89eabe71, 0x7a248f96),
+     TOBN(0x9c6231d3, 0x343157e5), TOBN(0x93a375e5, 0xdf3c546d),
+     TOBN(0xe76e9343, 0x6a2afe69), TOBN(0xc4f89100, 0xe166c88e),
+     TOBN(0x248efd0d, 0x4f872093), TOBN(0xae0eb3ea, 0x8fe0ea61),
+     TOBN(0xaf89790d, 0x9d79046e), TOBN(0x4d650f2d, 0x6cee0976),
+     TOBN(0xa3935d9a, 0x43071eca), TOBN(0x66fcd2c9, 0x283b0bfe),
+     TOBN(0x0e665eb5, 0x696605f1), TOBN(0xe77e5d07, 0xa54cd38d),
+     TOBN(0x90ee050a, 0x43d950cf), TOBN(0x86ddebda, 0xd32e69b5),
+     TOBN(0x6ad94a3d, 0xfddf7415), TOBN(0xf7fa1309, 0x3f6e8d5a),
+     TOBN(0xc4831d1d, 0xe9957f75), TOBN(0x7de28501, 0xd5817447),
+     TOBN(0x6f1d7078, 0x9e2aeb6b), TOBN(0xba2b9ff4, 0xf67a53c2),
+     TOBN(0x36963767, 0xdf9defc3), TOBN(0x479deed3, 0x0d38022c),
+     TOBN(0xd2edb89b, 0x3a8631e8), TOBN(0x8de855de, 0x7a213746),
+     TOBN(0xb2056cb7, 0xb00c5f11), TOBN(0xdeaefbd0, 0x2c9b85e4),
+     TOBN(0x03f39a8d, 0xd150892d), TOBN(0x37b84686, 0x218b7985),
+     TOBN(0x36296dd8, 0xb7375f1a), TOBN(0x472cd4b1, 0xb78e898e),
+     TOBN(0x15dff651, 0xe9f05de9), TOBN(0xd4045069, 0x2ce98ba9),
+     TOBN(0x8466a7ae, 0x9b38024c), TOBN(0xb910e700, 0xe5a6b5ef),
+     TOBN(0xae1c56ea, 0xb3aa8f0d), TOBN(0xbab2a507, 0x7eee74a6),
+     TOBN(0x0dca11e2, 0x4b4c4620), TOBN(0xfd896e2e, 0x4c47d1f4),
+     TOBN(0xeb45ae53, 0x308fbd93), TOBN(0x46cd5a2e, 0x02c36fda),
+     TOBN(0x6a3d4e90, 0xbaa48385), TOBN(0xdd55e62e, 0x9dbe9960),
+     TOBN(0xa1406aa0, 0x2a81ede7), TOBN(0x6860dd14, 0xf9274ea7),
+     TOBN(0xcfdcb0c2, 0x80414f86), TOBN(0xff410b10, 0x22f94327),
+     TOBN(0x5a33cc38, 0x49ad467b), TOBN(0xefb48b6c, 0x0a7335f1),
+     TOBN(0x14fb54a4, 0xb153a360), TOBN(0x604aa9d2, 0xb52469cc),
+     TOBN(0x5e9dc486, 0x754e48e9), TOBN(0x693cb455, 0x37471e8e),
+     TOBN(0xfb2fd7cd, 0x8d3b37b6), TOBN(0x63345e16, 0xcf09ff07),
+     TOBN(0x9910ba6b, 0x23a5d896), TOBN(0x1fe19e35, 0x7fe4364e),
+     TOBN(0x6e1da8c3, 0x9a33c677), TOBN(0x15b4488b, 0x29fd9fd0),
+     TOBN(0x1f439254, 0x1a1f22bf), TOBN(0x920a8a70, 0xab8163e8),
+     TOBN(0x3fd1b249, 0x07e5658e), TOBN(0xf2c4f79c, 0xb6ec839b),
+     TOBN(0x1abbc3d0, 0x4aa38d1b), TOBN(0x3b0db35c, 0xb5d9510e),
+     TOBN(0x1754ac78, 0x3e60dec0), TOBN(0x53272fd7, 0xea099b33),
+     TOBN(0x5fb0494f, 0x07a8e107), TOBN(0x4a89e137, 0x6a8191fa),
+     TOBN(0xa113b7f6, 0x3c4ad544), TOBN(0x88a2e909, 0x6cb9897b),
+     TOBN(0x17d55de3, 0xb44a3f84), TOBN(0xacb2f344, 0x17c6c690),
+     TOBN(0x32088168, 0x10232390), TOBN(0xf2e8a61f, 0x6c733bf7),
+     TOBN(0xa774aab6, 0x9c2d7652), TOBN(0xfb5307e3, 0xed95c5bc),
+     TOBN(0xa05c73c2, 0x4981f110), TOBN(0x1baae31c, 0xa39458c9),
+     TOBN(0x1def185b, 0xcbea62e7), TOBN(0xe8ac9eae, 0xeaf63059),
+     TOBN(0x098a8cfd, 0x9921851c), TOBN(0xd959c3f1, 0x3abe2f5b),
+     TOBN(0xa4f19525, 0x20e40ae5), TOBN(0x320789e3, 0x07a24aa1),
+     TOBN(0x259e6927, 0x7392b2bc), TOBN(0x58f6c667, 0x1918668b),
+     TOBN(0xce1db2bb, 0xc55d2d8b), TOBN(0x41d58bb7, 0xf4f6ca56),
+     TOBN(0x7650b680, 0x8f877614), TOBN(0x905e16ba, 0xf4c349ed),
+     TOBN(0xed415140, 0xf661acac), TOBN(0x3b8784f0, 0xcb2270af),
+     TOBN(0x3bc280ac, 0x8a402cba), TOBN(0xd53f7146, 0x0937921a),
+     TOBN(0xc03c8ee5, 0xe5681e83), TOBN(0x62126105, 0xf6ac9e4a),
+     TOBN(0x9503a53f, 0x936b1a38), TOBN(0x3d45e2d4, 0x782fecbd),
+     TOBN(0x69a5c439, 0x76e8ae98), TOBN(0xb53b2eeb, 0xbfb4b00e),
+     TOBN(0xf1674712, 0x72386c89), TOBN(0x30ca34a2, 0x4268bce4),
+     TOBN(0x7f1ed86c, 0x78341730), TOBN(0x8ef5beb8, 0xb525e248),
+     TOBN(0xbbc489fd, 0xb74fbf38), TOBN(0x38a92a0e, 0x91a0b382),
+     TOBN(0x7a77ba3f, 0x22433ccf), TOBN(0xde8362d6, 0xa29f05a9),
+     TOBN(0x7f6a30ea, 0x61189afc), TOBN(0x693b5505, 0x59ef114f),
+     TOBN(0x50266bc0, 0xcd1797a1), TOBN(0xea17b47e, 0xf4b7af2d),
+     TOBN(0xd6c4025c, 0x3df9483e), TOBN(0x8cbb9d9f, 0xa37b18c9),
+     TOBN(0x91cbfd9c, 0x4d8424cf), TOBN(0xdb7048f1, 0xab1c3506),
+     TOBN(0x9eaf641f, 0x028206a3), TOBN(0xf986f3f9, 0x25bdf6ce),
+     TOBN(0x262143b5, 0x224c08dc), TOBN(0x2bbb09b4, 0x81b50c91),
+     TOBN(0xc16ed709, 0xaca8c84f), TOBN(0xa6210d9d, 0xb2850ca8),
+     TOBN(0x6d8df67a, 0x09cb54d6), TOBN(0x91eef6e0, 0x500919a4),
+     TOBN(0x90f61381, 0x0f132857), TOBN(0x9acede47, 0xf8d5028b),
+     TOBN(0x844d1b71, 0x90b771c3), TOBN(0x563b71e4, 0xba6426be),
+     TOBN(0x2efa2e83, 0xbdb802ff), TOBN(0x3410cbab, 0xab5b4a41),
+     TOBN(0x555b2d26, 0x30da84dd), TOBN(0xd0711ae9, 0xee1cc29a),
+     TOBN(0xcf3e8c60, 0x2f547792), TOBN(0x03d7d5de, 0xdc678b35),
+     TOBN(0x071a2fa8, 0xced806b8), TOBN(0x222e6134, 0x697f1478),
+     TOBN(0xdc16fd5d, 0xabfcdbbf), TOBN(0x44912ebf, 0x121b53b8),
+     TOBN(0xac943674, 0x2496c27c), TOBN(0x8ea3176c, 0x1ffc26b0),
+     TOBN(0xb6e224ac, 0x13debf2c), TOBN(0x524cc235, 0xf372a832),
+     TOBN(0xd706e1d8, 0x9f6f1b18), TOBN(0x2552f005, 0x44cce35b),
+     TOBN(0x8c8326c2, 0xa88e31fc), TOBN(0xb5468b2c, 0xf9552047),
+     TOBN(0xce683e88, 0x3ff90f2b), TOBN(0x77947bdf, 0x2f0a5423),
+     TOBN(0xd0a1b28b, 0xed56e328), TOBN(0xaee35253, 0xc20134ac),
+     TOBN(0x7e98367d, 0x3567962f), TOBN(0x379ed61f, 0x8188bffb),
+     TOBN(0x73bba348, 0xfaf130a1), TOBN(0x6c1f75e1, 0x904ed734),
+     TOBN(0x18956642, 0x3b4a79fc), TOBN(0xf20bc83d, 0x54ef4493),
+     TOBN(0x836d425d, 0x9111eca1), TOBN(0xe5b5c318, 0x009a8dcf),
+     TOBN(0x3360b25d, 0x13221bc5), TOBN(0x707baad2, 0x6b3eeaf7),
+     TOBN(0xd7279ed8, 0x743a95a1), TOBN(0x7450a875, 0x969e809f),
+     TOBN(0x32b6bd53, 0xe5d0338f), TOBN(0x1e77f7af, 0x2b883bbc),
+     TOBN(0x90da12cc, 0x1063ecd0), TOBN(0xe2697b58, 0xc315be47),
+     TOBN(0x2771a5bd, 0xda85d534), TOBN(0x53e78c1f, 0xff980eea),
+     TOBN(0xadf1cf84, 0x900385e7), TOBN(0x7d3b14f6, 0xc9387b62),
+     TOBN(0x170e74b0, 0xcb8f2bd2), TOBN(0x2d50b486, 0x827fa993),
+     TOBN(0xcdbe8c9a, 0xf6f32bab), TOBN(0x55e906b0, 0xc3b93ab8),
+     TOBN(0x747f22fc, 0x8fe280d1), TOBN(0xcd8e0de5, 0xb2e114ab),
+     TOBN(0x5ab7dbeb, 0xe10b68b0), TOBN(0x9dc63a9c, 0xa480d4b2),
+     TOBN(0x78d4bc3b, 0x4be1495f), TOBN(0x25eb3db8, 0x9359122d),
+     TOBN(0x3f8ac05b, 0x0809cbdc), TOBN(0xbf4187bb, 0xd37c702f),
+     TOBN(0x84cea069, 0x1416a6a5), TOBN(0x8f860c79, 0x43ef881c),
+     TOBN(0x41311f8a, 0x38038a5d), TOBN(0xe78c2ec0, 0xfc612067),
+     TOBN(0x494d2e81, 0x5ad73581), TOBN(0xb4cc9e00, 0x59604097),
+     TOBN(0xff558aec, 0xf3612cba), TOBN(0x35beef7a, 0x9e36c39e),
+     TOBN(0x1845c7cf, 0xdbcf41b9), TOBN(0x5703662a, 0xaea997c0),
+     TOBN(0x8b925afe, 0xe402f6d8), TOBN(0xd0a1b1ae, 0x4dd72162),
+     TOBN(0x9f47b375, 0x03c41c4b), TOBN(0xa023829b, 0x0391d042),
+     TOBN(0x5f5045c3, 0x503b8b0a), TOBN(0x123c2688, 0x98c010e5),
+     TOBN(0x324ec0cc, 0x36ba06ee), TOBN(0xface3115, 0x3dd2cc0c),
+     TOBN(0xb364f3be, 0xf333e91f), TOBN(0xef8aff73, 0x28e832b0),
+     TOBN(0x1e9bad04, 0x2d05841b), TOBN(0x42f0e3df, 0x356a21e2),
+     TOBN(0xa3270bcb, 0x4add627e), TOBN(0xb09a8158, 0xd322e711),
+     TOBN(0x86e326a1, 0x0fee104a), TOBN(0xad7788f8, 0x3703f65d),
+     TOBN(0x7e765430, 0x47bc4833), TOBN(0x6cee582b, 0x2b9b893a),
+     TOBN(0x9cd2a167, 0xe8f55a7b), TOBN(0xefbee3c6, 0xd9e4190d),
+     TOBN(0x33ee7185, 0xd40c2e9d), TOBN(0x844cc9c5, 0xa380b548),
+     TOBN(0x323f8ecd, 0x66926e04), TOBN(0x0001e38f, 0x8110c1ba),
+     TOBN(0x8dbcac12, 0xfc6a7f07), TOBN(0xd65e1d58, 0x0cec0827),
+     TOBN(0xd2cd4141, 0xbe76ca2d), TOBN(0x7895cf5c, 0xe892f33a),
+     TOBN(0x956d230d, 0x367139d2), TOBN(0xa91abd3e, 0xd012c4c1),
+     TOBN(0x34fa4883, 0x87eb36bf), TOBN(0xc5f07102, 0x914b8fb4),
+     TOBN(0x90f0e579, 0xadb9c95f), TOBN(0xfe6ea8cb, 0x28888195),
+     TOBN(0x7b9b5065, 0xedfa9284), TOBN(0x6c510bd2, 0x2b8c8d65),
+     TOBN(0xd7b8ebef, 0xcbe8aafd), TOBN(0xedb3af98, 0x96b1da07),
+     TOBN(0x28ff779d, 0x6295d426), TOBN(0x0c4f6ac7, 0x3fa3ad7b),
+     TOBN(0xec44d054, 0x8b8e2604), TOBN(0x9b32a66d, 0x8b0050e1),
+     TOBN(0x1f943366, 0xf0476ce2), TOBN(0x7554d953, 0xa602c7b4),
+     TOBN(0xbe35aca6, 0x524f2809), TOBN(0xb6881229, 0xfd4edbea),
+     TOBN(0xe8cd0c8f, 0x508efb63), TOBN(0x9eb5b5c8, 0x6abcefc7),
+     TOBN(0xf5621f5f, 0xb441ab4f), TOBN(0x79e6c046, 0xb76a2b22),
+     TOBN(0x74a4792c, 0xe37a1f69), TOBN(0xcbd252cb, 0x03542b60),
+     TOBN(0x785f65d5, 0xb3c20bd3), TOBN(0x8dea6143, 0x4fabc60c),
+     TOBN(0x45e21446, 0xde673629), TOBN(0x57f7aa1e, 0x703c2d21),
+     TOBN(0xa0e99b7f, 0x98c868c7), TOBN(0x4e42f66d, 0x8b641676),
+     TOBN(0x602884dc, 0x91077896), TOBN(0xa0d690cf, 0xc2c9885b),
+     TOBN(0xfeb4da33, 0x3b9a5187), TOBN(0x5f789598, 0x153c87ee),
+     TOBN(0x2192dd47, 0x52b16dba), TOBN(0xdeefc0e6, 0x3524c1b1),
+     TOBN(0x465ea76e, 0xe4383693), TOBN(0x79401711, 0x361b8d98),
+     TOBN(0xa5f9ace9, 0xf21a15cb), TOBN(0x73d26163, 0xefee9aeb),
+     TOBN(0xcca844b3, 0xe677016c), TOBN(0x6c122b07, 0x57eaee06),
+     TOBN(0xb782dce7, 0x15f09690), TOBN(0x508b9b12, 0x2dfc0fc9),
+     TOBN(0x9015ab4b, 0x65d89fc6), TOBN(0x5e79dab7, 0xd6d5bb0f),
+     TOBN(0x64f021f0, 0x6c775aa2), TOBN(0xdf09d8cc, 0x37c7eca1),
+     TOBN(0x9a761367, 0xef2fa506), TOBN(0xed4ca476, 0x5b81eec6),
+     TOBN(0x262ede36, 0x10bbb8b5), TOBN(0x0737ce83, 0x0641ada3),
+     TOBN(0x4c94288a, 0xe9831ccc), TOBN(0x487fc1ce, 0x8065e635),
+     TOBN(0xb13d7ab3, 0xb8bb3659), TOBN(0xdea5df3e, 0x855e4120),
+     TOBN(0xb9a18573, 0x85eb0244), TOBN(0x1a1b8ea3, 0xa7cfe0a3),
+     TOBN(0x3b837119, 0x67b0867c), TOBN(0x8d5e0d08, 0x9d364520),
+     TOBN(0x52dccc1e, 0xd930f0e3), TOBN(0xefbbcec7, 0xbf20bbaf),
+     TOBN(0x99cffcab, 0x0263ad10), TOBN(0xd8199e6d, 0xfcd18f8a),
+     TOBN(0x64e2773f, 0xe9f10617), TOBN(0x0079e8e1, 0x08704848),
+     TOBN(0x1169989f, 0x8a342283), TOBN(0x8097799c, 0xa83012e6),
+     TOBN(0xece966cb, 0x8a6a9001), TOBN(0x93b3afef, 0x072ac7fc),
+     TOBN(0xe6893a2a, 0x2db3d5ba), TOBN(0x263dc462, 0x89bf4fdc),
+     TOBN(0x8852dfc9, 0xe0396673), TOBN(0x7ac70895, 0x3af362b6),
+     TOBN(0xbb9cce4d, 0x5c2f342b), TOBN(0xbf80907a, 0xb52d7aae),
+     TOBN(0x97f3d3cd, 0x2161bcd0), TOBN(0xb25b0834, 0x0962744d),
+     TOBN(0xc5b18ea5, 0x6c3a1dda), TOBN(0xfe4ec7eb, 0x06c92317),
+     TOBN(0xb787b890, 0xad1c4afe), TOBN(0xdccd9a92, 0x0ede801a),
+     TOBN(0x9ac6ddda, 0xdb58da1f), TOBN(0x22bbc12f, 0xb8cae6ee),
+     TOBN(0xc6f8bced, 0x815c4a43), TOBN(0x8105a92c, 0xf96480c7),
+     TOBN(0x0dc3dbf3, 0x7a859d51), TOBN(0xe3ec7ce6, 0x3041196b),
+     TOBN(0xd9f64b25, 0x0d1067c9), TOBN(0xf2321321, 0x3d1f8dd8),
+     TOBN(0x8b5c619c, 0x76497ee8), TOBN(0x5d2b0ac6, 0xc717370e),
+     TOBN(0x98204cb6, 0x4fcf68e1), TOBN(0x0bdec211, 0x62bc6792),
+     TOBN(0x6973ccef, 0xa63b1011), TOBN(0xf9e3fa97, 0xe0de1ac5),
+     TOBN(0x5efb693e, 0x3d0e0c8b), TOBN(0x037248e9, 0xd2d4fcb4),}
+    ,
+    {TOBN(0x80802dc9, 0x1ec34f9e), TOBN(0xd8772d35, 0x33810603),
+     TOBN(0x3f06d66c, 0x530cb4f3), TOBN(0x7be5ed0d, 0xc475c129),
+     TOBN(0xcb9e3c19, 0x31e82b10), TOBN(0xc63d2857, 0xc9ff6b4c),
+     TOBN(0xb92118c6, 0x92a1b45e), TOBN(0x0aec4414, 0x7285bbca),
+     TOBN(0xfc189ae7, 0x1e29a3ef), TOBN(0xcbe906f0, 0x4c93302e),
+     TOBN(0xd0107914, 0xceaae10e), TOBN(0xb7a23f34, 0xb68e19f8),
+     TOBN(0xe9d875c2, 0xefd2119d), TOBN(0x03198c6e, 0xfcadc9c8),
+     TOBN(0x65591bf6, 0x4da17113), TOBN(0x3cf0bbf8, 0x3d443038),
+     TOBN(0xae485bb7, 0x2b724759), TOBN(0x945353e1, 0xb2d4c63a),
+     TOBN(0x82159d07, 0xde7d6f2c), TOBN(0x389caef3, 0x4ec5b109),
+     TOBN(0x4a8ebb53, 0xdb65ef14), TOBN(0x2dc2cb7e, 0xdd99de43),
+     TOBN(0x816fa3ed, 0x83f2405f), TOBN(0x73429bb9, 0xc14208a3),
+     TOBN(0xb618d590, 0xb01e6e27), TOBN(0x047e2ccd, 0xe180b2dc),
+     TOBN(0xd1b299b5, 0x04aea4a9), TOBN(0x412c9e1e, 0x9fa403a4),
+     TOBN(0x88d28a36, 0x79407552), TOBN(0x49c50136, 0xf332b8e3),
+     TOBN(0x3a1b6fcc, 0xe668de19), TOBN(0x178851bc, 0x75122b97),
+     TOBN(0xb1e13752, 0xfb85fa4c), TOBN(0xd61257ce, 0x383c8ce9),
+     TOBN(0xd43da670, 0xd2f74dae), TOBN(0xa35aa23f, 0xbf846bbb),
+     TOBN(0x5e74235d, 0x4421fc83), TOBN(0xf6df8ee0, 0xc363473b),
+     TOBN(0x34d7f52a, 0x3c4aa158), TOBN(0x50d05aab, 0x9bc6d22e),
+     TOBN(0x8c56e735, 0xa64785f4), TOBN(0xbc56637b, 0x5f29cd07),
+     TOBN(0x53b2bb80, 0x3ee35067), TOBN(0x50235a0f, 0xdc919270),
+     TOBN(0x191ab6d8, 0xf2c4aa65), TOBN(0xc3475831, 0x8396023b),
+     TOBN(0x80400ba5, 0xf0f805ba), TOBN(0x8881065b, 0x5ec0f80f),
+     TOBN(0xc370e522, 0xcc1b5e83), TOBN(0xde2d4ad1, 0x860b8bfb),
+     TOBN(0xad364df0, 0x67b256df), TOBN(0x8f12502e, 0xe0138997),
+     TOBN(0x503fa0dc, 0x7783920a), TOBN(0xe80014ad, 0xc0bc866a),
+     TOBN(0x3f89b744, 0xd3064ba6), TOBN(0x03511dcd, 0xcba5dba5),
+     TOBN(0x197dd46d, 0x95a7b1a2), TOBN(0x9c4e7ad6, 0x3c6341fb),
+     TOBN(0x426eca29, 0x484c2ece), TOBN(0x9211e489, 0xde7f4f8a),
+     TOBN(0x14997f6e, 0xc78ef1f4), TOBN(0x2b2c0910, 0x06574586),
+     TOBN(0x17286a6e, 0x1c3eede8), TOBN(0x25f92e47, 0x0f60e018),
+     TOBN(0x805c5646, 0x31890a36), TOBN(0x703ef600, 0x57feea5b),
+     TOBN(0x389f747c, 0xaf3c3030), TOBN(0xe0e5daeb, 0x54dd3739),
+     TOBN(0xfe24a4c3, 0xc9c9f155), TOBN(0x7e4bf176, 0xb5393962),
+     TOBN(0x37183de2, 0xaf20bf29), TOBN(0x4a1bd7b5, 0xf95a8c3b),
+     TOBN(0xa83b9699, 0x46191d3d), TOBN(0x281fc8dd, 0x7b87f257),
+     TOBN(0xb18e2c13, 0x54107588), TOBN(0x6372def7, 0x9b2bafe8),
+     TOBN(0xdaf4bb48, 0x0d8972ca), TOBN(0x3f2dd4b7, 0x56167a3f),
+     TOBN(0x1eace32d, 0x84310cf4), TOBN(0xe3bcefaf, 0xe42700aa),
+     TOBN(0x5fe5691e, 0xd785e73d), TOBN(0xa5db5ab6, 0x2ea60467),
+     TOBN(0x02e23d41, 0xdfc6514a), TOBN(0x35e8048e, 0xe03c3665),
+     TOBN(0x3f8b118f, 0x1adaa0f8), TOBN(0x28ec3b45, 0x84ce1a5a),
+     TOBN(0xe8cacc6e, 0x2c6646b8), TOBN(0x1343d185, 0xdbd0e40f),
+     TOBN(0xe5d7f844, 0xcaaa358c), TOBN(0x1a1db7e4, 0x9924182a),
+     TOBN(0xd64cd42d, 0x9c875d9a), TOBN(0xb37b515f, 0x042eeec8),
+     TOBN(0x4d4dd409, 0x7b165fbe), TOBN(0xfc322ed9, 0xe206eff3),
+     TOBN(0x7dee4102, 0x59b7e17e), TOBN(0x55a481c0, 0x8236ca00),
+     TOBN(0x8c885312, 0xc23fc975), TOBN(0x15715806, 0x05d6297b),
+     TOBN(0xa078868e, 0xf78edd39), TOBN(0x956b31e0, 0x03c45e52),
+     TOBN(0x470275d5, 0xff7b33a6), TOBN(0xc8d5dc3a, 0x0c7e673f),
+     TOBN(0x419227b4, 0x7e2f2598), TOBN(0x8b37b634, 0x4c14a975),
+     TOBN(0xd0667ed6, 0x8b11888c), TOBN(0x5e0e8c3e, 0x803e25dc),
+     TOBN(0x34e5d0dc, 0xb987a24a), TOBN(0x9f40ac3b, 0xae920323),
+     TOBN(0x5463de95, 0x34e0f63a), TOBN(0xa128bf92, 0x6b6328f9),
+     TOBN(0x491ccd7c, 0xda64f1b7), TOBN(0x7ef1ec27, 0xc47bde35),
+     TOBN(0xa857240f, 0xa36a2737), TOBN(0x35dc1366, 0x63621bc1),
+     TOBN(0x7a3a6453, 0xd4fb6897), TOBN(0x80f1a439, 0xc929319d),
+     TOBN(0xfc18274b, 0xf8cb0ba0), TOBN(0xb0b53766, 0x8078c5eb),
+     TOBN(0xfb0d4924, 0x1e01d0ef), TOBN(0x50d7c67d, 0x372ab09c),
+     TOBN(0xb4e370af, 0x3aeac968), TOBN(0xe4f7fee9, 0xc4b63266),
+     TOBN(0xb4acd4c2, 0xe3ac5664), TOBN(0xf8910bd2, 0xceb38cbf),
+     TOBN(0x1c3ae50c, 0xc9c0726e), TOBN(0x15309569, 0xd97b40bf),
+     TOBN(0x70884b7f, 0xfd5a5a1b), TOBN(0x3890896a, 0xef8314cd),
+     TOBN(0x58e1515c, 0xa5618c93), TOBN(0xe665432b, 0x77d942d1),
+     TOBN(0xb32181bf, 0xb6f767a8), TOBN(0x753794e8, 0x3a604110),
+     TOBN(0x09afeb7c, 0xe8c0dbcc), TOBN(0x31e02613, 0x598673a3),
+     TOBN(0x5d98e557, 0x7d46db00), TOBN(0xfc21fb8c, 0x9d985b28),
+     TOBN(0xc9040116, 0xb0843e0b), TOBN(0x53b1b3a8, 0x69b04531),
+     TOBN(0xdd1649f0, 0x85d7d830), TOBN(0xbb3bcc87, 0xcb7427e8),
+     TOBN(0x77261100, 0xc93dce83), TOBN(0x7e79da61, 0xa1922a2a),
+     TOBN(0x587a2b02, 0xf3149ce8), TOBN(0x147e1384, 0xde92ec83),
+     TOBN(0x484c83d3, 0xaf077f30), TOBN(0xea78f844, 0x0658b53a),
+     TOBN(0x912076c2, 0x027aec53), TOBN(0xf34714e3, 0x93c8177d),
+     TOBN(0x37ef5d15, 0xc2376c84), TOBN(0x8315b659, 0x3d1aa783),
+     TOBN(0x3a75c484, 0xef852a90), TOBN(0x0ba0c58a, 0x16086bd4),
+     TOBN(0x29688d7a, 0x529a6d48), TOBN(0x9c7f250d, 0xc2f19203),
+     TOBN(0x123042fb, 0x682e2df9), TOBN(0x2b7587e7, 0xad8121bc),
+     TOBN(0x30fc0233, 0xe0182a65), TOBN(0xb82ecf87, 0xe3e1128a),
+     TOBN(0x71682861, 0x93fb098f), TOBN(0x043e21ae, 0x85e9e6a7),
+     TOBN(0xab5b49d6, 0x66c834ea), TOBN(0x3be43e18, 0x47414287),
+     TOBN(0xf40fb859, 0x219a2a47), TOBN(0x0e6559e9, 0xcc58df3c),
+     TOBN(0xfe1dfe8e, 0x0c6615b4), TOBN(0x14abc8fd, 0x56459d70),
+     TOBN(0x7be0fa8e, 0x05de0386), TOBN(0x8e63ef68, 0xe9035c7c),
+     TOBN(0x116401b4, 0x53b31e91), TOBN(0x0cba7ad4, 0x4436b4d8),
+     TOBN(0x9151f9a0, 0x107afd66), TOBN(0xafaca8d0, 0x1f0ee4c4),
+     TOBN(0x75fe5c1d, 0x9ee9761c), TOBN(0x3497a16b, 0xf0c0588f),
+     TOBN(0x3ee2bebd, 0x0304804c), TOBN(0xa8fb9a60, 0xc2c990b9),
+     TOBN(0xd14d32fe, 0x39251114), TOBN(0x36bf25bc, 0xcac73366),
+     TOBN(0xc9562c66, 0xdba7495c), TOBN(0x324d301b, 0x46ad348b),
+     TOBN(0x9f46620c, 0xd670407e), TOBN(0x0ea8d4f1, 0xe3733a01),
+     TOBN(0xd396d532, 0xb0c324e0), TOBN(0x5b211a0e, 0x03c317cd),
+     TOBN(0x090d7d20, 0x5ffe7b37), TOBN(0x3b7f3efb, 0x1747d2da),
+     TOBN(0xa2cb525f, 0xb54fc519), TOBN(0x6e220932, 0xf66a971e),
+     TOBN(0xddc160df, 0xb486d440), TOBN(0x7fcfec46, 0x3fe13465),
+     TOBN(0x83da7e4e, 0x76e4c151), TOBN(0xd6fa48a1, 0xd8d302b5),
+     TOBN(0xc6304f26, 0x5872cd88), TOBN(0x806c1d3c, 0x278b90a1),
+     TOBN(0x3553e725, 0xcaf0bc1c), TOBN(0xff59e603, 0xbb9d8d5c),
+     TOBN(0xa4550f32, 0x7a0b85dd), TOBN(0xdec5720a, 0x93ecc217),
+     TOBN(0x0b88b741, 0x69d62213), TOBN(0x7212f245, 0x5b365955),
+     TOBN(0x20764111, 0xb5cae787), TOBN(0x13cb7f58, 0x1dfd3124),
+     TOBN(0x2dca77da, 0x1175aefb), TOBN(0xeb75466b, 0xffaae775),
+     TOBN(0x74d76f3b, 0xdb6cff32), TOBN(0x7440f37a, 0x61fcda9a),
+     TOBN(0x1bb3ac92, 0xb525028b), TOBN(0x20fbf8f7, 0xa1975f29),
+     TOBN(0x982692e1, 0xdf83097f), TOBN(0x28738f6c, 0x554b0800),
+     TOBN(0xdc703717, 0xa2ce2f2f), TOBN(0x7913b93c, 0x40814194),
+     TOBN(0x04924593, 0x1fe89636), TOBN(0x7b98443f, 0xf78834a6),
+     TOBN(0x11c6ab01, 0x5114a5a1), TOBN(0x60deb383, 0xffba5f4c),
+     TOBN(0x4caa54c6, 0x01a982e6), TOBN(0x1dd35e11, 0x3491cd26),
+     TOBN(0x973c315f, 0x7cbd6b05), TOBN(0xcab00775, 0x52494724),
+     TOBN(0x04659b1f, 0x6565e15a), TOBN(0xbf30f529, 0x8c8fb026),
+     TOBN(0xfc21641b, 0xa8a0de37), TOBN(0xe9c7a366, 0xfa5e5114),
+     TOBN(0xdb849ca5, 0x52f03ad8), TOBN(0xc7e8dbe9, 0x024e35c0),
+     TOBN(0xa1a2bbac, 0xcfc3c789), TOBN(0xbf733e7d, 0x9c26f262),
+     TOBN(0x882ffbf5, 0xb8444823), TOBN(0xb7224e88, 0x6bf8483b),
+     TOBN(0x53023b8b, 0x65bef640), TOBN(0xaabfec91, 0xd4d5f8cd),
+     TOBN(0xa40e1510, 0x079ea1bd), TOBN(0x1ad9addc, 0xd05d5d26),
+     TOBN(0xdb3f2eab, 0x13e68d4f), TOBN(0x1cff1ae2, 0x640f803f),
+     TOBN(0xe0e7b749, 0xd4cee117), TOBN(0x8e9f275b, 0x4036d909),
+     TOBN(0xce34e31d, 0x8f4d4c38), TOBN(0x22b37f69, 0xd75130fc),
+     TOBN(0x83e0f1fd, 0xb4014604), TOBN(0xa8ce9919, 0x89415078),
+     TOBN(0x82375b75, 0x41792efe), TOBN(0x4f59bf5c, 0x97d4515b),
+     TOBN(0xac4f324f, 0x923a277d), TOBN(0xd9bc9b7d, 0x650f3406),
+     TOBN(0xc6fa87d1, 0x8a39bc51), TOBN(0x82588530, 0x5ccc108f),
+     TOBN(0x5ced3c9f, 0x82e4c634), TOBN(0x8efb8314, 0x3a4464f8),
+     TOBN(0xe706381b, 0x7a1dca25), TOBN(0x6cd15a3c, 0x5a2a412b),
+     TOBN(0x9347a8fd, 0xbfcd8fb5), TOBN(0x31db2eef, 0x6e54cd22),
+     TOBN(0xc4aeb11e, 0xf8d8932f), TOBN(0x11e7c1ed, 0x344411af),
+     TOBN(0x2653050c, 0xdc9a151e), TOBN(0x9edbfc08, 0x3bb0a859),
+     TOBN(0x926c81c7, 0xfd5691e7), TOBN(0x9c1b2342, 0x6f39019a),
+     TOBN(0x64a81c8b, 0x7f8474b9), TOBN(0x90657c07, 0x01761819),
+     TOBN(0x390b3331, 0x55e0375a), TOBN(0xc676c626, 0xb6ebc47d),
+     TOBN(0x51623247, 0xb7d6dee8), TOBN(0x0948d927, 0x79659313),
+     TOBN(0x99700161, 0xe9ab35ed), TOBN(0x06cc32b4, 0x8ddde408),
+     TOBN(0x6f2fd664, 0x061ef338), TOBN(0x1606fa02, 0xc202e9ed),
+     TOBN(0x55388bc1, 0x929ba99b), TOBN(0xc4428c5e, 0x1e81df69),
+     TOBN(0xce2028ae, 0xf91b0b2a), TOBN(0xce870a23, 0xf03dfd3f),
+     TOBN(0x66ec2c87, 0x0affe8ed), TOBN(0xb205fb46, 0x284d0c00),
+     TOBN(0xbf5dffe7, 0x44cefa48), TOBN(0xb6fc37a8, 0xa19876d7),
+     TOBN(0xbecfa84c, 0x08b72863), TOBN(0xd7205ff5, 0x2576374f),
+     TOBN(0x80330d32, 0x8887de41), TOBN(0x5de0df0c, 0x869ea534),
+     TOBN(0x13f42753, 0x3c56ea17), TOBN(0xeb1f6069, 0x452b1a78),
+     TOBN(0x50474396, 0xe30ea15c), TOBN(0x575816a1, 0xc1494125),
+     TOBN(0xbe1ce55b, 0xfe6bb38f), TOBN(0xb901a948, 0x96ae30f7),
+     TOBN(0xe5af0f08, 0xd8fc3548), TOBN(0x5010b5d0, 0xd73bfd08),
+     TOBN(0x993d2880, 0x53fe655a), TOBN(0x99f2630b, 0x1c1309fd),
+     TOBN(0xd8677baf, 0xb4e3b76f), TOBN(0x14e51ddc, 0xb840784b),
+     TOBN(0x326c750c, 0xbf0092ce), TOBN(0xc83d306b, 0xf528320f),
+     TOBN(0xc4456715, 0x77d4715c), TOBN(0xd30019f9, 0x6b703235),
+     TOBN(0x207ccb2e, 0xd669e986), TOBN(0x57c824af, 0xf6dbfc28),
+     TOBN(0xf0eb532f, 0xd8f92a23), TOBN(0x4a557fd4, 0x9bb98fd2),
+     TOBN(0xa57acea7, 0xc1e6199a), TOBN(0x0c663820, 0x8b94b1ed),
+     TOBN(0x9b42be8f, 0xf83a9266), TOBN(0xc7741c97, 0x0101bd45),
+     TOBN(0x95770c11, 0x07bd9ceb), TOBN(0x1f50250a, 0x8b2e0744),
+     TOBN(0xf762eec8, 0x1477b654), TOBN(0xc65b900e, 0x15efe59a),
+     TOBN(0x88c96148, 0x9546a897), TOBN(0x7e8025b3, 0xc30b4d7c),
+     TOBN(0xae4065ef, 0x12045cf9), TOBN(0x6fcb2caf, 0x9ccce8bd),
+     TOBN(0x1fa0ba4e, 0xf2cf6525), TOBN(0xf683125d, 0xcb72c312),
+     TOBN(0xa01da4ea, 0xe312410e), TOBN(0x67e28677, 0x6cd8e830),
+     TOBN(0xabd95752, 0x98fb3f07), TOBN(0x05f11e11, 0xeef649a5),
+     TOBN(0xba47faef, 0x9d3472c2), TOBN(0x3adff697, 0xc77d1345),
+     TOBN(0x4761fa04, 0xdd15afee), TOBN(0x64f1f61a, 0xb9e69462),
+     TOBN(0xfa691fab, 0x9bfb9093), TOBN(0x3df8ae8f, 0xa1133dfe),
+     TOBN(0xcd5f8967, 0x58cc710d), TOBN(0xfbb88d50, 0x16c7fe79),
+     TOBN(0x8e011b4c, 0xe88c50d1), TOBN(0x7532e807, 0xa8771c4f),
+     TOBN(0x64c78a48, 0xe2278ee4), TOBN(0x0b283e83, 0x3845072a),
+     TOBN(0x98a6f291, 0x49e69274), TOBN(0xb96e9668, 0x1868b21c),
+     TOBN(0x38f0adc2, 0xb1a8908e), TOBN(0x90afcff7, 0x1feb829d),
+     TOBN(0x9915a383, 0x210b0856), TOBN(0xa5a80602, 0xdef04889),
+     TOBN(0x800e9af9, 0x7c64d509), TOBN(0x81382d0b, 0xb8996f6f),
+     TOBN(0x490eba53, 0x81927e27), TOBN(0x46c63b32, 0x4af50182),
+     TOBN(0x784c5fd9, 0xd3ad62ce), TOBN(0xe4fa1870, 0xf8ae8736),
+     TOBN(0x4ec9d0bc, 0xd7466b25), TOBN(0x84ddbe1a, 0xdb235c65),
+     TOBN(0x5e2645ee, 0x163c1688), TOBN(0x570bd00e, 0x00eba747),
+     TOBN(0xfa51b629, 0x128bfa0f), TOBN(0x92fce1bd, 0x6c1d3b68),
+     TOBN(0x3e7361dc, 0xb66778b1), TOBN(0x9c7d249d, 0x5561d2bb),
+     TOBN(0xa40b28bf, 0x0bbc6229), TOBN(0x1c83c05e, 0xdfd91497),
+     TOBN(0x5f9f5154, 0xf083df05), TOBN(0xbac38b3c, 0xeee66c9d),
+     TOBN(0xf71db7e3, 0xec0dfcfd), TOBN(0xf2ecda8e, 0x8b0a8416),
+     TOBN(0x52fddd86, 0x7812aa66), TOBN(0x2896ef10, 0x4e6f4272),
+     TOBN(0xff27186a, 0x0fe9a745), TOBN(0x08249fcd, 0x49ca70db),
+     TOBN(0x7425a2e6, 0x441cac49), TOBN(0xf4a0885a, 0xece5ff57),
+     TOBN(0x6e2cb731, 0x7d7ead58), TOBN(0xf96cf7d6, 0x1898d104),
+     TOBN(0xafe67c9d, 0x4f2c9a89), TOBN(0x89895a50, 0x1c7bf5bc),
+     TOBN(0xdc7cb8e5, 0x573cecfa), TOBN(0x66497eae, 0xd15f03e6),
+     TOBN(0x6bc0de69, 0x3f084420), TOBN(0x323b9b36, 0xacd532b0),
+     TOBN(0xcfed390a, 0x0115a3c1), TOBN(0x9414c40b, 0x2d65ca0e),
+     TOBN(0x641406bd, 0x2f530c78), TOBN(0x29369a44, 0x833438f2),
+     TOBN(0x996884f5, 0x903fa271), TOBN(0xe6da0fd2, 0xb9da921e),
+     TOBN(0xa6f2f269, 0x5db01e54), TOBN(0x1ee3e9bd, 0x6876214e),
+     TOBN(0xa26e181c, 0xe27a9497), TOBN(0x36d254e4, 0x8e215e04),
+     TOBN(0x42f32a6c, 0x252cabca), TOBN(0x99481487, 0x80b57614),
+     TOBN(0x4c4dfe69, 0x40d9cae1), TOBN(0x05869580, 0x11a10f09),
+     TOBN(0xca287b57, 0x3491b64b), TOBN(0x77862d5d, 0x3fd4a53b),
+     TOBN(0xbf94856e, 0x50349126), TOBN(0x2be30bd1, 0x71c5268f),
+     TOBN(0x10393f19, 0xcbb650a6), TOBN(0x639531fe, 0x778cf9fd),
+     TOBN(0x02556a11, 0xb2935359), TOBN(0xda38aa96, 0xaf8c126e),
+     TOBN(0x47dbe6c2, 0x0960167f), TOBN(0x37bbabb6, 0x501901cd),
+     TOBN(0xb6e979e0, 0x2c947778), TOBN(0xd69a5175, 0x7a1a1dc6),
+     TOBN(0xc3ed5095, 0x9d9faf0c), TOBN(0x4dd9c096, 0x1d5fa5f0),
+     TOBN(0xa0c4304d, 0x64f16ea8), TOBN(0x8b1cac16, 0x7e718623),
+     TOBN(0x0b576546, 0x7c67f03e), TOBN(0x559cf5ad, 0xcbd88c01),
+     TOBN(0x074877bb, 0x0e2af19a), TOBN(0x1f717ec1, 0xa1228c92),
+     TOBN(0x70bcb800, 0x326e8920), TOBN(0xec6e2c5c, 0x4f312804),
+     TOBN(0x426aea7d, 0x3fca4752), TOBN(0xf12c0949, 0x2211f62a),
+     TOBN(0x24beecd8, 0x7be7b6b5), TOBN(0xb77eaf4c, 0x36d7a27d),
+     TOBN(0x154c2781, 0xfda78fd3), TOBN(0x848a83b0, 0x264eeabe),
+     TOBN(0x81287ef0, 0x4ffe2bc4), TOBN(0x7b6d88c6, 0xb6b6fc2a),
+     TOBN(0x805fb947, 0xce417d99), TOBN(0x4b93dcc3, 0x8b916cc4),
+     TOBN(0x72e65bb3, 0x21273323), TOBN(0xbcc1badd, 0x6ea9886e),
+     TOBN(0x0e223011, 0x4bc5ee85), TOBN(0xa561be74, 0xc18ee1e4),
+     TOBN(0x762fd2d4, 0xa6bcf1f1), TOBN(0x50e6a5a4, 0x95231489),
+     TOBN(0xca96001f, 0xa00b500b), TOBN(0x5c098cfc, 0x5d7dcdf5),
+     TOBN(0xa64e2d2e, 0x8c446a85), TOBN(0xbae9bcf1, 0x971f3c62),
+     TOBN(0x4ec22683, 0x8435a2c5), TOBN(0x8ceaed6c, 0x4bad4643),
+     TOBN(0xe9f8fb47, 0xccccf4e3), TOBN(0xbd4f3fa4, 0x1ce3b21e),
+     TOBN(0xd79fb110, 0xa3db3292), TOBN(0xe28a37da, 0xb536c66a),
+     TOBN(0x279ce87b, 0x8e49e6a9), TOBN(0x70ccfe8d, 0xfdcec8e3),
+     TOBN(0x2193e4e0, 0x3ba464b2), TOBN(0x0f39d60e, 0xaca9a398),
+     TOBN(0x7d7932af, 0xf82c12ab), TOBN(0xd8ff50ed, 0x91e7e0f7),
+     TOBN(0xea961058, 0xfa28a7e0), TOBN(0xc726cf25, 0x0bf5ec74),
+     TOBN(0xe74d55c8, 0xdb229666), TOBN(0x0bd9abbf, 0xa57f5799),
+     TOBN(0x7479ef07, 0x4dfc47b3), TOBN(0xd9c65fc3, 0x0c52f91d),
+     TOBN(0x8e0283fe, 0x36a8bde2), TOBN(0xa32a8b5e, 0x7d4b7280),
+     TOBN(0x6a677c61, 0x12e83233), TOBN(0x0fbb3512, 0xdcc9bf28),
+     TOBN(0x562e8ea5, 0x0d780f61), TOBN(0x0db8b22b, 0x1dc4e89c),
+     TOBN(0x0a6fd1fb, 0x89be0144), TOBN(0x8c77d246, 0xca57113b),
+     TOBN(0x4639075d, 0xff09c91c), TOBN(0x5b47b17f, 0x5060824c),
+     TOBN(0x58aea2b0, 0x16287b52), TOBN(0xa1343520, 0xd0cd8eb0),
+     TOBN(0x6148b4d0, 0xc5d58573), TOBN(0xdd2b6170, 0x291c68ae),
+     TOBN(0xa61b3929, 0x1da3b3b7), TOBN(0x5f946d79, 0x08c4ac10),
+     TOBN(0x4105d4a5, 0x7217d583), TOBN(0x5061da3d, 0x25e6de5e),
+     TOBN(0x3113940d, 0xec1b4991), TOBN(0xf12195e1, 0x36f485ae),
+     TOBN(0xa7507fb2, 0x731a2ee0), TOBN(0x95057a8e, 0x6e9e196e),
+     TOBN(0xa3c2c911, 0x2e130136), TOBN(0x97dfbb36, 0x33c60d15),
+     TOBN(0xcaf3c581, 0xb300ee2b), TOBN(0x77f25d90, 0xf4bac8b8),
+     TOBN(0xdb1c4f98, 0x6d840cd6), TOBN(0x471d62c0, 0xe634288c),
+     TOBN(0x8ec2f85e, 0xcec8a161), TOBN(0x41f37cbc, 0xfa6f4ae2),
+     TOBN(0x6793a20f, 0x4b709985), TOBN(0x7a7bd33b, 0xefa8985b),
+     TOBN(0x2c6a3fbd, 0x938e6446), TOBN(0x19042619, 0x2a8d47c1),
+     TOBN(0x16848667, 0xcc36975f), TOBN(0x02acf168, 0x9d5f1dfb),
+     TOBN(0x62d41ad4, 0x613baa94), TOBN(0xb56fbb92, 0x9f684670),
+     TOBN(0xce610d0d, 0xe9e40569), TOBN(0x7b99c65f, 0x35489fef),
+     TOBN(0x0c88ad1b, 0x3df18b97), TOBN(0x81b7d9be, 0x5d0e9edb),
+     TOBN(0xd85218c0, 0xc716cc0a), TOBN(0xf4b5ff90, 0x85691c49),
+     TOBN(0xa4fd666b, 0xce356ac6), TOBN(0x17c72895, 0x4b327a7a),
+     TOBN(0xf93d5085, 0xda6be7de), TOBN(0xff71530e, 0x3301d34e),
+     TOBN(0x4cd96442, 0xd8f448e8), TOBN(0x9283d331, 0x2ed18ffa),
+     TOBN(0x4d33dd99, 0x2a849870), TOBN(0xa716964b, 0x41576335),
+     TOBN(0xff5e3a9b, 0x179be0e5), TOBN(0x5b9d6b1b, 0x83b13632),
+     TOBN(0x3b8bd7d4, 0xa52f313b), TOBN(0xc9dd95a0, 0x637a4660),
+     TOBN(0x30035962, 0x0b3e218f), TOBN(0xce1481a3, 0xc7b28a3c),
+     TOBN(0xab41b43a, 0x43228d83), TOBN(0x24ae1c30, 0x4ad63f99),
+     TOBN(0x8e525f1a, 0x46a51229), TOBN(0x14af860f, 0xcd26d2b4),
+     TOBN(0xd6baef61, 0x3f714aa1), TOBN(0xf51865ad, 0xeb78795e),
+     TOBN(0xd3e21fce, 0xe6a9d694), TOBN(0x82ceb1dd, 0x8a37b527)}
+};
index f076635..1b31ba1 100644 (file)
@@ -17,9 +17,9 @@ TEST=ecdhtest.c
 APPS=
 
 LIB=$(TOP)/libcrypto.a
-LIBSRC=        ech_lib.c ech_ossl.c ech_key.c ech_err.c
+LIBSRC=        ech_lib.c ech_ossl.c ech_key.c ech_err.c ech_kdf.c
 
-LIBOBJ=        ech_lib.o ech_ossl.o ech_key.o ech_err.o
+LIBOBJ=        ech_lib.o ech_ossl.o ech_key.o ech_err.o ech_kdf.o
 
 SRC= $(LIBSRC)
 
@@ -85,6 +85,14 @@ ech_err.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
 ech_err.o: ../../include/openssl/ossl_typ.h ../../include/openssl/safestack.h
 ech_err.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
 ech_err.o: ech_err.c
+ech_kdf.o: ../../include/openssl/asn1.h ../../include/openssl/bio.h
+ech_kdf.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
+ech_kdf.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h
+ech_kdf.o: ../../include/openssl/evp.h ../../include/openssl/obj_mac.h
+ech_kdf.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
+ech_kdf.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
+ech_kdf.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
+ech_kdf.o: ../../include/openssl/symhacks.h ech_kdf.c
 ech_key.o: ../../include/openssl/asn1.h ../../include/openssl/bio.h
 ech_key.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
 ech_key.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h
index a9b811a..25348b3 100644 (file)
@@ -85,6 +85,8 @@
 extern "C" {
 #endif
 
+# define EC_FLAG_COFACTOR_ECDH   0x1000
+
 const ECDH_METHOD *ECDH_OpenSSL(void);
 
 void ECDH_set_default_method(const ECDH_METHOD *);
@@ -101,6 +103,11 @@ int ECDH_get_ex_new_index(long argl, void *argp, CRYPTO_EX_new
 int ECDH_set_ex_data(EC_KEY *d, int idx, void *arg);
 void *ECDH_get_ex_data(EC_KEY *d, int idx);
 
+int ECDH_KDF_X9_62(unsigned char *out, size_t outlen,
+                   const unsigned char *Z, size_t Zlen,
+                   const unsigned char *sinfo, size_t sinfolen,
+                   const EVP_MD *md);
+
 /* BEGIN ERROR CODES */
 /*
  * The following lines are auto generated by the script mkerr.pl. Any changes
index 996321d..2fe2c66 100644 (file)
@@ -312,6 +312,170 @@ static int test_ecdh_curve(int nid, const char *text, BN_CTX *ctx, BIO *out)
     return (ret);
 }
 
+/* Keys and shared secrets from RFC 7027 */
+
+static const unsigned char bp256_da[] = {
+    0x81, 0xDB, 0x1E, 0xE1, 0x00, 0x15, 0x0F, 0xF2, 0xEA, 0x33, 0x8D, 0x70,
+    0x82, 0x71, 0xBE, 0x38, 0x30, 0x0C, 0xB5, 0x42, 0x41, 0xD7, 0x99, 0x50,
+    0xF7, 0x7B, 0x06, 0x30, 0x39, 0x80, 0x4F, 0x1D
+};
+
+static const unsigned char bp256_db[] = {
+    0x55, 0xE4, 0x0B, 0xC4, 0x1E, 0x37, 0xE3, 0xE2, 0xAD, 0x25, 0xC3, 0xC6,
+    0x65, 0x45, 0x11, 0xFF, 0xA8, 0x47, 0x4A, 0x91, 0xA0, 0x03, 0x20, 0x87,
+    0x59, 0x38, 0x52, 0xD3, 0xE7, 0xD7, 0x6B, 0xD3
+};
+
+static const unsigned char bp256_Z[] = {
+    0x89, 0xAF, 0xC3, 0x9D, 0x41, 0xD3, 0xB3, 0x27, 0x81, 0x4B, 0x80, 0x94,
+    0x0B, 0x04, 0x25, 0x90, 0xF9, 0x65, 0x56, 0xEC, 0x91, 0xE6, 0xAE, 0x79,
+    0x39, 0xBC, 0xE3, 0x1F, 0x3A, 0x18, 0xBF, 0x2B
+};
+
+static const unsigned char bp384_da[] = {
+    0x1E, 0x20, 0xF5, 0xE0, 0x48, 0xA5, 0x88, 0x6F, 0x1F, 0x15, 0x7C, 0x74,
+    0xE9, 0x1B, 0xDE, 0x2B, 0x98, 0xC8, 0xB5, 0x2D, 0x58, 0xE5, 0x00, 0x3D,
+    0x57, 0x05, 0x3F, 0xC4, 0xB0, 0xBD, 0x65, 0xD6, 0xF1, 0x5E, 0xB5, 0xD1,
+    0xEE, 0x16, 0x10, 0xDF, 0x87, 0x07, 0x95, 0x14, 0x36, 0x27, 0xD0, 0x42
+};
+
+static const unsigned char bp384_db[] = {
+    0x03, 0x26, 0x40, 0xBC, 0x60, 0x03, 0xC5, 0x92, 0x60, 0xF7, 0x25, 0x0C,
+    0x3D, 0xB5, 0x8C, 0xE6, 0x47, 0xF9, 0x8E, 0x12, 0x60, 0xAC, 0xCE, 0x4A,
+    0xCD, 0xA3, 0xDD, 0x86, 0x9F, 0x74, 0xE0, 0x1F, 0x8B, 0xA5, 0xE0, 0x32,
+    0x43, 0x09, 0xDB, 0x6A, 0x98, 0x31, 0x49, 0x7A, 0xBA, 0xC9, 0x66, 0x70
+};
+
+static const unsigned char bp384_Z[] = {
+    0x0B, 0xD9, 0xD3, 0xA7, 0xEA, 0x0B, 0x3D, 0x51, 0x9D, 0x09, 0xD8, 0xE4,
+    0x8D, 0x07, 0x85, 0xFB, 0x74, 0x4A, 0x6B, 0x35, 0x5E, 0x63, 0x04, 0xBC,
+    0x51, 0xC2, 0x29, 0xFB, 0xBC, 0xE2, 0x39, 0xBB, 0xAD, 0xF6, 0x40, 0x37,
+    0x15, 0xC3, 0x5D, 0x4F, 0xB2, 0xA5, 0x44, 0x4F, 0x57, 0x5D, 0x4F, 0x42
+};
+
+static const unsigned char bp512_da[] = {
+    0x16, 0x30, 0x2F, 0xF0, 0xDB, 0xBB, 0x5A, 0x8D, 0x73, 0x3D, 0xAB, 0x71,
+    0x41, 0xC1, 0xB4, 0x5A, 0xCB, 0xC8, 0x71, 0x59, 0x39, 0x67, 0x7F, 0x6A,
+    0x56, 0x85, 0x0A, 0x38, 0xBD, 0x87, 0xBD, 0x59, 0xB0, 0x9E, 0x80, 0x27,
+    0x96, 0x09, 0xFF, 0x33, 0x3E, 0xB9, 0xD4, 0xC0, 0x61, 0x23, 0x1F, 0xB2,
+    0x6F, 0x92, 0xEE, 0xB0, 0x49, 0x82, 0xA5, 0xF1, 0xD1, 0x76, 0x4C, 0xAD,
+    0x57, 0x66, 0x54, 0x22
+};
+
+static const unsigned char bp512_db[] = {
+    0x23, 0x0E, 0x18, 0xE1, 0xBC, 0xC8, 0x8A, 0x36, 0x2F, 0xA5, 0x4E, 0x4E,
+    0xA3, 0x90, 0x20, 0x09, 0x29, 0x2F, 0x7F, 0x80, 0x33, 0x62, 0x4F, 0xD4,
+    0x71, 0xB5, 0xD8, 0xAC, 0xE4, 0x9D, 0x12, 0xCF, 0xAB, 0xBC, 0x19, 0x96,
+    0x3D, 0xAB, 0x8E, 0x2F, 0x1E, 0xBA, 0x00, 0xBF, 0xFB, 0x29, 0xE4, 0xD7,
+    0x2D, 0x13, 0xF2, 0x22, 0x45, 0x62, 0xF4, 0x05, 0xCB, 0x80, 0x50, 0x36,
+    0x66, 0xB2, 0x54, 0x29
+};
+
+static const unsigned char bp512_Z[] = {
+    0xA7, 0x92, 0x70, 0x98, 0x65, 0x5F, 0x1F, 0x99, 0x76, 0xFA, 0x50, 0xA9,
+    0xD5, 0x66, 0x86, 0x5D, 0xC5, 0x30, 0x33, 0x18, 0x46, 0x38, 0x1C, 0x87,
+    0x25, 0x6B, 0xAF, 0x32, 0x26, 0x24, 0x4B, 0x76, 0xD3, 0x64, 0x03, 0xC0,
+    0x24, 0xD7, 0xBB, 0xF0, 0xAA, 0x08, 0x03, 0xEA, 0xFF, 0x40, 0x5D, 0x3D,
+    0x24, 0xF1, 0x1A, 0x9B, 0x5C, 0x0B, 0xEF, 0x67, 0x9F, 0xE1, 0x45, 0x4B,
+    0x21, 0xC4, 0xCD, 0x1F
+};
+
+/* Given private value and NID, create EC_KEY structure */
+
+static EC_KEY *mk_eckey(int nid, const unsigned char *p, size_t plen)
+{
+    int ok = 0;
+    EC_KEY *k = NULL;
+    BIGNUM *priv = NULL;
+    EC_POINT *pub = NULL;
+    const EC_GROUP *grp;
+    k = EC_KEY_new_by_curve_name(nid);
+    if (!k)
+        goto err;
+    priv = BN_bin2bn(p, plen, NULL);
+    if (!priv)
+        goto err;
+    if (!EC_KEY_set_private_key(k, priv))
+        goto err;
+    grp = EC_KEY_get0_group(k);
+    pub = EC_POINT_new(grp);
+    if (!pub)
+        goto err;
+    if (!EC_POINT_mul(grp, pub, priv, NULL, NULL, NULL))
+        goto err;
+    if (!EC_KEY_set_public_key(k, pub))
+        goto err;
+    ok = 1;
+ err:
+    if (priv)
+        BN_clear_free(priv);
+    if (pub)
+        EC_POINT_free(pub);
+    if (ok)
+        return k;
+    else if (k)
+        EC_KEY_free(k);
+    return NULL;
+}
+
+/*
+ * Known answer test: compute shared secret and check it matches expected
+ * value.
+ */
+
+static int ecdh_kat(BIO *out, const char *cname, int nid,
+                    const unsigned char *k1, size_t k1_len,
+                    const unsigned char *k2, size_t k2_len,
+                    const unsigned char *Z, size_t Zlen)
+{
+    int rv = 0;
+    EC_KEY *key1 = NULL, *key2 = NULL;
+    unsigned char *Ztmp = NULL;
+    size_t Ztmplen;
+    BIO_puts(out, "Testing ECDH shared secret with ");
+    BIO_puts(out, cname);
+    key1 = mk_eckey(nid, k1, k1_len);
+    key2 = mk_eckey(nid, k2, k2_len);
+    if (!key1 || !key2)
+        goto err;
+    Ztmplen = (EC_GROUP_get_degree(EC_KEY_get0_group(key1)) + 7) / 8;
+    if (Ztmplen != Zlen)
+        goto err;
+    Ztmp = OPENSSL_malloc(Ztmplen);
+    if (!ECDH_compute_key(Ztmp, Ztmplen,
+                          EC_KEY_get0_public_key(key2), key1, 0))
+        goto err;
+    if (memcmp(Ztmp, Z, Zlen))
+        goto err;
+    memset(Ztmp, 0, Zlen);
+    if (!ECDH_compute_key(Ztmp, Ztmplen,
+                          EC_KEY_get0_public_key(key1), key2, 0))
+        goto err;
+    if (memcmp(Ztmp, Z, Zlen))
+        goto err;
+    rv = 1;
+ err:
+    if (key1)
+        EC_KEY_free(key1);
+    if (key2)
+        EC_KEY_free(key2);
+    if (Ztmp)
+        OPENSSL_free(Ztmp);
+    if (rv)
+        BIO_puts(out, " ok\n");
+    else {
+        fprintf(stderr, "Error in ECDH routines\n");
+        ERR_print_errors_fp(stderr);
+    }
+    return rv;
+}
+
+# define test_ecdh_kat(bio, curve, bits) \
+        ecdh_kat(bio, curve, NID_brainpoolP##bits##r1, \
+                bp##bits##_da, sizeof(bp##bits##_da), \
+                bp##bits##_db, sizeof(bp##bits##_db), \
+                bp##bits##_Z, sizeof(bp##bits##_Z))
+
 int main(int argc, char *argv[])
 {
     BN_CTX *ctx = NULL;
@@ -372,6 +536,12 @@ int main(int argc, char *argv[])
     if (!test_ecdh_curve(NID_sect571r1, "NIST Binary-Curve B-571", ctx, out))
         goto err;
 # endif
+    if (!test_ecdh_kat(out, "Brainpool Prime-Curve brainpoolP256r1", 256))
+        goto err;
+    if (!test_ecdh_kat(out, "Brainpool Prime-Curve brainpoolP384r1", 384))
+        goto err;
+    if (!test_ecdh_kat(out, "Brainpool Prime-Curve brainpoolP512r1", 512))
+        goto err;
 
     ret = 0;
 
diff --git a/crypto/ecdh/ech_kdf.c b/crypto/ecdh/ech_kdf.c
new file mode 100644 (file)
index 0000000..ac722ac
--- /dev/null
@@ -0,0 +1,111 @@
+/* crypto/ecdh/ec_kdf.c */
+/*
+ * Written by Stephen Henson for the OpenSSL project.
+ */
+/* ====================================================================
+ * Copyright (c) 2013 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ */
+
+#define OPENSSL_FIPSAPI
+
+#include <string.h>
+#include <openssl/ecdh.h>
+#include <openssl/evp.h>
+
+/* Key derivation function from X9.62/SECG */
+/* Way more than we will ever need */
+#define ECDH_KDF_MAX    (1 << 30)
+
+int ECDH_KDF_X9_62(unsigned char *out, size_t outlen,
+                   const unsigned char *Z, size_t Zlen,
+                   const unsigned char *sinfo, size_t sinfolen,
+                   const EVP_MD *md)
+{
+    EVP_MD_CTX mctx;
+    int rv = 0;
+    unsigned int i;
+    size_t mdlen;
+    unsigned char ctr[4];
+    if (sinfolen > ECDH_KDF_MAX || outlen > ECDH_KDF_MAX
+        || Zlen > ECDH_KDF_MAX)
+        return 0;
+    mdlen = EVP_MD_size(md);
+    EVP_MD_CTX_init(&mctx);
+    for (i = 1;; i++) {
+        unsigned char mtmp[EVP_MAX_MD_SIZE];
+        EVP_DigestInit_ex(&mctx, md, NULL);
+        ctr[3] = i & 0xFF;
+        ctr[2] = (i >> 8) & 0xFF;
+        ctr[1] = (i >> 16) & 0xFF;
+        ctr[0] = (i >> 24) & 0xFF;
+        if (!EVP_DigestUpdate(&mctx, Z, Zlen))
+            goto err;
+        if (!EVP_DigestUpdate(&mctx, ctr, sizeof(ctr)))
+            goto err;
+        if (!EVP_DigestUpdate(&mctx, sinfo, sinfolen))
+            goto err;
+        if (outlen >= mdlen) {
+            if (!EVP_DigestFinal(&mctx, out, NULL))
+                goto err;
+            outlen -= mdlen;
+            if (outlen == 0)
+                break;
+            out += mdlen;
+        } else {
+            if (!EVP_DigestFinal(&mctx, mtmp, NULL))
+                goto err;
+            memcpy(out, mtmp, outlen);
+            OPENSSL_cleanse(mtmp, mdlen);
+            break;
+        }
+    }
+    rv = 1;
+ err:
+    EVP_MD_CTX_cleanup(&mctx);
+    return rv;
+}
index d448b19..df115cc 100644 (file)
@@ -138,6 +138,16 @@ static int ecdh_compute_key(void *out, size_t outlen, const EC_POINT *pub_key,
     }
 
     group = EC_KEY_get0_group(ecdh);
+
+    if (EC_KEY_get_flags(ecdh) & EC_FLAG_COFACTOR_ECDH) {
+        if (!EC_GROUP_get_cofactor(group, x, ctx) ||
+            !BN_mul(x, x, priv_key, ctx)) {
+            ECDHerr(ECDH_F_ECDH_COMPUTE_KEY, ERR_R_MALLOC_FAILURE);
+            goto err;
+        }
+        priv_key = x;
+    }
+
     if ((tmp = EC_POINT_new(group)) == NULL) {
         ECDHerr(ECDH_F_ECDH_COMPUTE_KEY, ERR_R_MALLOC_FAILURE);
         goto err;
index faf76b1..c4016ac 100644 (file)
@@ -228,6 +228,80 @@ int ECDSA_get_ex_new_index(long argl, void *argp, CRYPTO_EX_new
 int ECDSA_set_ex_data(EC_KEY *d, int idx, void *arg);
 void *ECDSA_get_ex_data(EC_KEY *d, int idx);
 
+/** Allocates and initialize a ECDSA_METHOD structure
+ *  \param ecdsa_method pointer to ECDSA_METHOD to copy.  (May be NULL)
+ *  \return pointer to a ECDSA_METHOD structure or NULL if an error occurred
+ */
+
+ECDSA_METHOD *ECDSA_METHOD_new(ECDSA_METHOD *ecdsa_method);
+
+/** frees a ECDSA_METHOD structure
+ *  \param  ecdsa_method  pointer to the ECDSA_METHOD structure
+ */
+void ECDSA_METHOD_free(ECDSA_METHOD *ecdsa_method);
+
+/**  Sets application specific data in the ECDSA_METHOD
+ *   \param  ecdsa_method pointer to existing ECDSA_METHOD
+ *   \param  app application specific data to set
+ */
+
+void ECDSA_METHOD_set_app_data(ECDSA_METHOD *ecdsa_method, void *app);
+
+/** Returns application specific data from a ECDSA_METHOD structure
+ *  \param ecdsa_method pointer to ECDSA_METHOD structure
+ *  \return pointer to application specific data.
+ */
+
+void *ECDSA_METHOD_get_app_data(ECDSA_METHOD *ecdsa_method);
+
+/**  Set the ECDSA_do_sign function in the ECDSA_METHOD
+ *   \param  ecdsa_method  pointer to existing ECDSA_METHOD
+ *   \param  ecdsa_do_sign a funtion of type ECDSA_do_sign
+ */
+
+void ECDSA_METHOD_set_sign(ECDSA_METHOD *ecdsa_method,
+                           ECDSA_SIG *(*ecdsa_do_sign) (const unsigned char
+                                                        *dgst, int dgst_len,
+                                                        const BIGNUM *inv,
+                                                        const BIGNUM *rp,
+                                                        EC_KEY *eckey));
+
+/**  Set the  ECDSA_sign_setup function in the ECDSA_METHOD
+ *   \param  ecdsa_method  pointer to existing ECDSA_METHOD
+ *   \param  ecdsa_sign_setup a funtion of type ECDSA_sign_setup
+ */
+
+void ECDSA_METHOD_set_sign_setup(ECDSA_METHOD *ecdsa_method,
+                                 int (*ecdsa_sign_setup) (EC_KEY *eckey,
+                                                          BN_CTX *ctx,
+                                                          BIGNUM **kinv,
+                                                          BIGNUM **r));
+
+/**  Set the ECDSA_do_verify function in the ECDSA_METHOD
+ *   \param  ecdsa_method  pointer to existing ECDSA_METHOD
+ *   \param  ecdsa_do_verify a funtion of type ECDSA_do_verify
+ */
+
+void ECDSA_METHOD_set_verify(ECDSA_METHOD *ecdsa_method,
+                             int (*ecdsa_do_verify) (const unsigned char
+                                                     *dgst, int dgst_len,
+                                                     const ECDSA_SIG *sig,
+                                                     EC_KEY *eckey));
+
+void ECDSA_METHOD_set_flags(ECDSA_METHOD *ecdsa_method, int flags);
+
+/**  Set the flags field in the ECDSA_METHOD
+ *   \param  ecdsa_method  pointer to existing ECDSA_METHOD
+ *   \param  flags flags value to set
+ */
+
+void ECDSA_METHOD_set_name(ECDSA_METHOD *ecdsa_method, char *name);
+
+/**  Set the name field in the ECDSA_METHOD
+ *   \param  ecdsa_method  pointer to existing ECDSA_METHOD
+ *   \param  name name to set
+ */
+
 /* BEGIN ERROR CODES */
 /*
  * The following lines are auto generated by the script mkerr.pl. Any changes
@@ -242,6 +316,7 @@ void ERR_load_ECDSA_strings(void);
 # define ECDSA_F_ECDSA_DATA_NEW_METHOD                    100
 # define ECDSA_F_ECDSA_DO_SIGN                            101
 # define ECDSA_F_ECDSA_DO_VERIFY                          102
+# define ECDSA_F_ECDSA_METHOD_NEW                         105
 # define ECDSA_F_ECDSA_SIGN_SETUP                         103
 
 /* Reason codes. */
index 6fc64a0..f1fa7b5 100644 (file)
@@ -74,6 +74,7 @@ static ERR_STRING_DATA ECDSA_str_functs[] = {
     {ERR_FUNC(ECDSA_F_ECDSA_DATA_NEW_METHOD), "ECDSA_DATA_NEW_METHOD"},
     {ERR_FUNC(ECDSA_F_ECDSA_DO_SIGN), "ECDSA_do_sign"},
     {ERR_FUNC(ECDSA_F_ECDSA_DO_VERIFY), "ECDSA_do_verify"},
+    {ERR_FUNC(ECDSA_F_ECDSA_METHOD_NEW), "ECDSA_METHOD_new"},
     {ERR_FUNC(ECDSA_F_ECDSA_SIGN_SETUP), "ECDSA_sign_setup"},
     {0, NULL}
 };
index 0f2d343..1c02310 100644 (file)
@@ -275,3 +275,80 @@ void *ECDSA_get_ex_data(EC_KEY *d, int idx)
         return NULL;
     return (CRYPTO_get_ex_data(&ecdsa->ex_data, idx));
 }
+
+ECDSA_METHOD *ECDSA_METHOD_new(ECDSA_METHOD *ecdsa_meth)
+{
+    ECDSA_METHOD *ret;
+
+    ret = OPENSSL_malloc(sizeof(ECDSA_METHOD));
+    if (ret == NULL) {
+        ECDSAerr(ECDSA_F_ECDSA_METHOD_NEW, ERR_R_MALLOC_FAILURE);
+        return NULL;
+    }
+
+    if (ecdsa_meth)
+        *ret = *ecdsa_meth;
+    else {
+        ret->ecdsa_sign_setup = 0;
+        ret->ecdsa_do_sign = 0;
+        ret->ecdsa_do_verify = 0;
+        ret->name = NULL;
+        ret->flags = 0;
+    }
+    ret->flags |= ECDSA_METHOD_FLAG_ALLOCATED;
+    return ret;
+}
+
+void ECDSA_METHOD_set_sign(ECDSA_METHOD *ecdsa_method,
+                           ECDSA_SIG *(*ecdsa_do_sign) (const unsigned char
+                                                        *dgst, int dgst_len,
+                                                        const BIGNUM *inv,
+                                                        const BIGNUM *rp,
+                                                        EC_KEY *eckey))
+{
+    ecdsa_method->ecdsa_do_sign = ecdsa_do_sign;
+}
+
+void ECDSA_METHOD_set_sign_setup(ECDSA_METHOD *ecdsa_method,
+                                 int (*ecdsa_sign_setup) (EC_KEY *eckey,
+                                                          BN_CTX *ctx,
+                                                          BIGNUM **kinv,
+                                                          BIGNUM **r))
+{
+    ecdsa_method->ecdsa_sign_setup = ecdsa_sign_setup;
+}
+
+void ECDSA_METHOD_set_verify(ECDSA_METHOD *ecdsa_method,
+                             int (*ecdsa_do_verify) (const unsigned char
+                                                     *dgst, int dgst_len,
+                                                     const ECDSA_SIG *sig,
+                                                     EC_KEY *eckey))
+{
+    ecdsa_method->ecdsa_do_verify = ecdsa_do_verify;
+}
+
+void ECDSA_METHOD_set_flags(ECDSA_METHOD *ecdsa_method, int flags)
+{
+    ecdsa_method->flags = flags | ECDSA_METHOD_FLAG_ALLOCATED;
+}
+
+void ECDSA_METHOD_set_name(ECDSA_METHOD *ecdsa_method, char *name)
+{
+    ecdsa_method->name = name;
+}
+
+void ECDSA_METHOD_free(ECDSA_METHOD *ecdsa_method)
+{
+    if (ecdsa_method->flags & ECDSA_METHOD_FLAG_ALLOCATED)
+        OPENSSL_free(ecdsa_method);
+}
+
+void ECDSA_METHOD_set_app_data(ECDSA_METHOD *ecdsa_method, void *app)
+{
+    ecdsa_method->app_data = app;
+}
+
+void *ECDSA_METHOD_get_app_data(ECDSA_METHOD *ecdsa_method)
+{
+    return ecdsa_method->app_data;
+}
index 76b2caf..d3a5efc 100644 (file)
@@ -79,9 +79,13 @@ struct ecdsa_method {
     int (*finish) (EC_KEY *eckey);
 # endif
     int flags;
-    char *app_data;
+    void *app_data;
 };
 
+/* The ECDSA_METHOD was allocated and can be freed */
+
+# define ECDSA_METHOD_FLAG_ALLOCATED 0x2
+
 /*
  * If this flag is set the ECDSA method is FIPS compliant and can be used in
  * FIPS mode. This is set in the validated module method. If an application
index 4c5fa6b..dd76960 100644 (file)
@@ -179,10 +179,32 @@ static int ecdsa_sign_setup(EC_KEY *eckey, BN_CTX *ctx_in, BIGNUM **kinvp,
     while (BN_is_zero(r));
 
     /* compute the inverse of k */
-    if (!BN_mod_inverse(k, k, order, ctx)) {
-        ECDSAerr(ECDSA_F_ECDSA_SIGN_SETUP, ERR_R_BN_LIB);
-        goto err;
+    if (EC_GROUP_get_mont_data(group) != NULL) {
+        /*
+         * We want inverse in constant time, therefore we utilize the fact
+         * order must be prime and use Fermats Little Theorem instead.
+         */
+        if (!BN_set_word(X, 2)) {
+            ECDSAerr(ECDSA_F_ECDSA_SIGN_SETUP, ERR_R_BN_LIB);
+            goto err;
+        }
+        if (!BN_mod_sub(X, order, X, order, ctx)) {
+            ECDSAerr(ECDSA_F_ECDSA_SIGN_SETUP, ERR_R_BN_LIB);
+            goto err;
+        }
+        BN_set_flags(X, BN_FLG_CONSTTIME);
+        if (!BN_mod_exp_mont_consttime
+            (k, k, X, order, ctx, EC_GROUP_get_mont_data(group))) {
+            ECDSAerr(ECDSA_F_ECDSA_SIGN_SETUP, ERR_R_BN_LIB);
+            goto err;
+        }
+    } else {
+        if (!BN_mod_inverse(k, k, order, ctx)) {
+            ECDSAerr(ECDSA_F_ECDSA_SIGN_SETUP, ERR_R_BN_LIB);
+            goto err;
+        }
     }
+
     /* clear old values if necessary */
     if (*rp != NULL)
         BN_clear_free(*rp);
index 8ceb747..426388e 100644 (file)
@@ -22,13 +22,13 @@ LIBSRC= eng_err.c eng_lib.c eng_list.c eng_init.c eng_ctrl.c \
        tb_rsa.c tb_dsa.c tb_ecdsa.c tb_dh.c tb_ecdh.c tb_rand.c tb_store.c \
        tb_cipher.c tb_digest.c tb_pkmeth.c tb_asnmth.c \
        eng_openssl.c eng_cnf.c eng_dyn.c eng_cryptodev.c \
-       eng_rsax.c eng_rdrand.c
+       eng_rdrand.c
 LIBOBJ= eng_err.o eng_lib.o eng_list.o eng_init.o eng_ctrl.o \
        eng_table.o eng_pkey.o eng_fat.o eng_all.o \
        tb_rsa.o tb_dsa.o tb_ecdsa.o tb_dh.o tb_ecdh.o tb_rand.o tb_store.o \
        tb_cipher.o tb_digest.o tb_pkmeth.o tb_asnmth.o \
        eng_openssl.o eng_cnf.o eng_dyn.o eng_cryptodev.o \
-       eng_rsax.o eng_rdrand.o
+       eng_rdrand.o
 
 SRC= $(LIBSRC)
 
@@ -267,20 +267,6 @@ eng_rdrand.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
 eng_rdrand.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
 eng_rdrand.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h
 eng_rdrand.o: eng_rdrand.c
-eng_rsax.o: ../../include/openssl/asn1.h ../../include/openssl/bio.h
-eng_rsax.o: ../../include/openssl/bn.h ../../include/openssl/buffer.h
-eng_rsax.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
-eng_rsax.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h
-eng_rsax.o: ../../include/openssl/ecdsa.h ../../include/openssl/engine.h
-eng_rsax.o: ../../include/openssl/err.h ../../include/openssl/evp.h
-eng_rsax.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
-eng_rsax.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
-eng_rsax.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
-eng_rsax.o: ../../include/openssl/pkcs7.h ../../include/openssl/rsa.h
-eng_rsax.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
-eng_rsax.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
-eng_rsax.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h
-eng_rsax.o: eng_rsax.c
 eng_table.o: ../../e_os.h ../../include/openssl/asn1.h
 eng_table.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h
 eng_table.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
index 7edf12e..195a3a9 100644 (file)
@@ -76,9 +76,6 @@ void ENGINE_load_builtin_engines(void)
 #if !defined(OPENSSL_NO_HW) && (defined(__OpenBSD__) || defined(__FreeBSD__) || defined(HAVE_CRYPTODEV))
     ENGINE_load_cryptodev();
 #endif
-#ifndef OPENSSL_NO_RSAX
-    ENGINE_load_rsax();
-#endif
 #ifndef OPENSSL_NO_RDRAND
     ENGINE_load_rdrand();
 #endif
index bcb936d..926d95c 100644 (file)
@@ -54,10 +54,10 @@ void ENGINE_load_cryptodev(void)
 
 # include <sys/types.h>
 # include <crypto/cryptodev.h>
-# include <crypto/dh/dh.h>
-# include <crypto/dsa/dsa.h>
-# include <crypto/err/err.h>
-# include <crypto/rsa/rsa.h>
+# include <openssl/dh.h>
+# include <openssl/dsa.h>
+# include <openssl/err.h>
+# include <openssl/rsa.h>
 # include <sys/ioctl.h>
 # include <errno.h>
 # include <stdio.h>
@@ -160,6 +160,17 @@ static struct {
     {
         CRYPTO_AES_CBC, NID_aes_256_cbc, 16, 32,
     },
+# ifdef CRYPTO_AES_CTR
+    {
+        CRYPTO_AES_CTR, NID_aes_128_ctr, 14, 16,
+    },
+    {
+        CRYPTO_AES_CTR, NID_aes_192_ctr, 14, 24,
+    },
+    {
+        CRYPTO_AES_CTR, NID_aes_256_ctr, 14, 32,
+    },
+# endif
     {
         CRYPTO_BLF_CBC, NID_bf_cbc, 8, 16,
     },
@@ -630,6 +641,46 @@ const EVP_CIPHER cryptodev_aes_256_cbc = {
     NULL
 };
 
+# ifdef CRYPTO_AES_CTR
+const EVP_CIPHER cryptodev_aes_ctr = {
+    NID_aes_128_ctr,
+    16, 16, 14,
+    EVP_CIPH_CTR_MODE,
+    cryptodev_init_key,
+    cryptodev_cipher,
+    cryptodev_cleanup,
+    sizeof(struct dev_crypto_state),
+    EVP_CIPHER_set_asn1_iv,
+    EVP_CIPHER_get_asn1_iv,
+    NULL
+};
+
+const EVP_CIPHER cryptodev_aes_ctr_192 = {
+    NID_aes_192_ctr,
+    16, 24, 14,
+    EVP_CIPH_CTR_MODE,
+    cryptodev_init_key,
+    cryptodev_cipher,
+    cryptodev_cleanup,
+    sizeof(struct dev_crypto_state),
+    EVP_CIPHER_set_asn1_iv,
+    EVP_CIPHER_get_asn1_iv,
+    NULL
+};
+
+const EVP_CIPHER cryptodev_aes_ctr_256 = {
+    NID_aes_256_ctr,
+    16, 32, 14,
+    EVP_CIPH_CTR_MODE,
+    cryptodev_init_key,
+    cryptodev_cipher,
+    cryptodev_cleanup,
+    sizeof(struct dev_crypto_state),
+    EVP_CIPHER_set_asn1_iv,
+    EVP_CIPHER_get_asn1_iv,
+    NULL
+};
+# endif
 /*
  * Registered by the ENGINE when used to find out how to deal with
  * a particular NID in the ENGINE. this says what we'll do at the
@@ -667,6 +718,17 @@ cryptodev_engine_ciphers(ENGINE *e, const EVP_CIPHER **cipher,
     case NID_aes_256_cbc:
         *cipher = &cryptodev_aes_256_cbc;
         break;
+# ifdef CRYPTO_AES_CTR
+    case NID_aes_128_ctr:
+        *cipher = &cryptodev_aes_ctr;
+        break;
+    case NID_aes_192_ctr:
+        *cipher = &cryptodev_aes_ctr_192;
+        break;
+    case NID_aes_256_ctr:
+        *cipher = &cryptodev_aes_ctr_256;
+        break;
+# endif
     default:
         *cipher = NULL;
         break;
diff --git a/crypto/engine/eng_rsax.c b/crypto/engine/eng_rsax.c
deleted file mode 100644 (file)
index 86ee9d8..0000000
+++ /dev/null
@@ -1,701 +0,0 @@
-/* crypto/engine/eng_rsax.c */
-/* Copyright (c) 2010-2010 Intel Corp.
- *   Author: Vinodh.Gopal@intel.com
- *           Jim Guilford
- *           Erdinc.Ozturk@intel.com
- *           Maxim.Perminov@intel.com
- *           Ying.Huang@intel.com
- *
- * More information about algorithm used can be found at:
- *   http://www.cse.buffalo.edu/srds2009/escs2009_submission_Gopal.pdf
- */
-/* ====================================================================
- * Copyright (c) 1999-2001 The OpenSSL Project.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * 3. All advertising materials mentioning features or use of this
- *    software must display the following acknowledgment:
- *    "This product includes software developed by the OpenSSL Project
- *    for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
- *
- * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
- *    endorse or promote products derived from this software without
- *    prior written permission. For written permission, please contact
- *    licensing@OpenSSL.org.
- *
- * 5. Products derived from this software may not be called "OpenSSL"
- *    nor may "OpenSSL" appear in their names without prior written
- *    permission of the OpenSSL Project.
- *
- * 6. Redistributions of any form whatsoever must retain the following
- *    acknowledgment:
- *    "This product includes software developed by the OpenSSL Project
- *    for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
- *
- * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
- * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
- * OF THE POSSIBILITY OF SUCH DAMAGE.
- * ====================================================================
- *
- * This product includes cryptographic software written by Eric Young
- * (eay@cryptsoft.com).  This product includes software written by Tim
- * Hudson (tjh@cryptsoft.com).
- */
-
-#include <openssl/opensslconf.h>
-
-#include <stdio.h>
-#include <string.h>
-#include <openssl/crypto.h>
-#include <openssl/buffer.h>
-#include <openssl/engine.h>
-#ifndef OPENSSL_NO_RSA
-# include <openssl/rsa.h>
-#endif
-#include <openssl/bn.h>
-#include <openssl/err.h>
-
-/* RSAX is available **ONLY* on x86_64 CPUs */
-#undef COMPILE_RSAX
-
-#if (defined(__x86_64) || defined(__x86_64__) || \
-     defined(_M_AMD64) || defined (_M_X64)) && !defined(OPENSSL_NO_ASM)
-# define COMPILE_RSAX
-static ENGINE *ENGINE_rsax(void);
-#endif
-
-void ENGINE_load_rsax(void)
-{
-/* On non-x86 CPUs it just returns. */
-#ifdef COMPILE_RSAX
-    ENGINE *toadd = ENGINE_rsax();
-    if (!toadd)
-        return;
-    ENGINE_add(toadd);
-    ENGINE_free(toadd);
-    ERR_clear_error();
-#endif
-}
-
-#ifdef COMPILE_RSAX
-# define E_RSAX_LIB_NAME "rsax engine"
-
-static int e_rsax_destroy(ENGINE *e);
-static int e_rsax_init(ENGINE *e);
-static int e_rsax_finish(ENGINE *e);
-static int e_rsax_ctrl(ENGINE *e, int cmd, long i, void *p, void (*f) (void));
-
-# ifndef OPENSSL_NO_RSA
-/* RSA stuff */
-static int e_rsax_rsa_mod_exp(BIGNUM *r, const BIGNUM *I, RSA *rsa,
-                              BN_CTX *ctx);
-static int e_rsax_rsa_finish(RSA *r);
-# endif
-
-static const ENGINE_CMD_DEFN e_rsax_cmd_defns[] = {
-    {0, NULL, NULL, 0}
-};
-
-# ifndef OPENSSL_NO_RSA
-/* Our internal RSA_METHOD that we provide pointers to */
-static RSA_METHOD e_rsax_rsa = {
-    "Intel RSA-X method",
-    NULL,
-    NULL,
-    NULL,
-    NULL,
-    e_rsax_rsa_mod_exp,
-    NULL,
-    NULL,
-    e_rsax_rsa_finish,
-    RSA_FLAG_CACHE_PUBLIC | RSA_FLAG_CACHE_PRIVATE,
-    NULL,
-    NULL,
-    NULL
-};
-# endif
-
-/* Constants used when creating the ENGINE */
-static const char *engine_e_rsax_id = "rsax";
-static const char *engine_e_rsax_name = "RSAX engine support";
-
-/* This internal function is used by ENGINE_rsax() */
-static int bind_helper(ENGINE *e)
-{
-# ifndef OPENSSL_NO_RSA
-    const RSA_METHOD *meth1;
-# endif
-    if (!ENGINE_set_id(e, engine_e_rsax_id) ||
-        !ENGINE_set_name(e, engine_e_rsax_name) ||
-# ifndef OPENSSL_NO_RSA
-        !ENGINE_set_RSA(e, &e_rsax_rsa) ||
-# endif
-        !ENGINE_set_destroy_function(e, e_rsax_destroy) ||
-        !ENGINE_set_init_function(e, e_rsax_init) ||
-        !ENGINE_set_finish_function(e, e_rsax_finish) ||
-        !ENGINE_set_ctrl_function(e, e_rsax_ctrl) ||
-        !ENGINE_set_cmd_defns(e, e_rsax_cmd_defns))
-        return 0;
-
-# ifndef OPENSSL_NO_RSA
-    meth1 = RSA_PKCS1_SSLeay();
-    e_rsax_rsa.rsa_pub_enc = meth1->rsa_pub_enc;
-    e_rsax_rsa.rsa_pub_dec = meth1->rsa_pub_dec;
-    e_rsax_rsa.rsa_priv_enc = meth1->rsa_priv_enc;
-    e_rsax_rsa.rsa_priv_dec = meth1->rsa_priv_dec;
-    e_rsax_rsa.bn_mod_exp = meth1->bn_mod_exp;
-# endif
-    return 1;
-}
-
-static ENGINE *ENGINE_rsax(void)
-{
-    ENGINE *ret = ENGINE_new();
-    if (!ret)
-        return NULL;
-    if (!bind_helper(ret)) {
-        ENGINE_free(ret);
-        return NULL;
-    }
-    return ret;
-}
-
-# ifndef OPENSSL_NO_RSA
-/* Used to attach our own key-data to an RSA structure */
-static int rsax_ex_data_idx = -1;
-# endif
-
-static int e_rsax_destroy(ENGINE *e)
-{
-    return 1;
-}
-
-/* (de)initialisation functions. */
-static int e_rsax_init(ENGINE *e)
-{
-# ifndef OPENSSL_NO_RSA
-    if (rsax_ex_data_idx == -1)
-        rsax_ex_data_idx = RSA_get_ex_new_index(0, NULL, NULL, NULL, NULL);
-# endif
-    if (rsax_ex_data_idx == -1)
-        return 0;
-    return 1;
-}
-
-static int e_rsax_finish(ENGINE *e)
-{
-    return 1;
-}
-
-static int e_rsax_ctrl(ENGINE *e, int cmd, long i, void *p, void (*f) (void))
-{
-    int to_return = 1;
-
-    switch (cmd) {
-        /* The command isn't understood by this engine */
-    default:
-        to_return = 0;
-        break;
-    }
-
-    return to_return;
-}
-
-# ifndef OPENSSL_NO_RSA
-
-#  ifdef _WIN32
-typedef unsigned __int64 UINT64;
-#  else
-typedef unsigned long long UINT64;
-#  endif
-typedef unsigned short UINT16;
-
-/*
- * Table t is interleaved in the following manner: The order in memory is
- * t[0][0], t[0][1], ..., t[0][7], t[1][0], ... A particular 512-bit value is
- * stored in t[][index] rather than the more normal t[index][]; i.e. the
- * qwords of a particular entry in t are not adjacent in memory
- */
-
-/* Init BIGNUM b from the interleaved UINT64 array */
-static int interleaved_array_to_bn_512(BIGNUM *b, UINT64 *array);
-
-/*
- * Extract array elements from BIGNUM b To set the whole array from b, call
- * with n=8
- */
-static int bn_extract_to_array_512(const BIGNUM *b, unsigned int n,
-                                   UINT64 *array);
-
-struct mod_ctx_512 {
-    UINT64 t[8][8];
-    UINT64 m[8];
-    UINT64 m1[8];               /* 2^278 % m */
-    UINT64 m2[8];               /* 2^640 % m */
-    UINT64 k1[2];               /* (- 1/m) % 2^128 */
-};
-
-static int mod_exp_pre_compute_data_512(UINT64 *m, struct mod_ctx_512 *data);
-
-void mod_exp_512(UINT64 *result, /* 512 bits, 8 qwords */
-                 UINT64 *g,     /* 512 bits, 8 qwords */
-                 UINT64 *exp,   /* 512 bits, 8 qwords */
-                 struct mod_ctx_512 *data);
-
-typedef struct st_e_rsax_mod_ctx {
-    UINT64 type;
-    union {
-        struct mod_ctx_512 b512;
-    } ctx;
-
-} E_RSAX_MOD_CTX;
-
-static E_RSAX_MOD_CTX *e_rsax_get_ctx(RSA *rsa, int idx, BIGNUM *m)
-{
-    E_RSAX_MOD_CTX *hptr;
-
-    if (idx < 0 || idx > 2)
-        return NULL;
-
-    hptr = RSA_get_ex_data(rsa, rsax_ex_data_idx);
-    if (!hptr) {
-        hptr = OPENSSL_malloc(3 * sizeof(E_RSAX_MOD_CTX));
-        if (!hptr)
-            return NULL;
-        hptr[2].type = hptr[1].type = hptr[0].type = 0;
-        RSA_set_ex_data(rsa, rsax_ex_data_idx, hptr);
-    }
-
-    if (hptr[idx].type == (UINT64)BN_num_bits(m))
-        return hptr + idx;
-
-    if (BN_num_bits(m) == 512) {
-        UINT64 _m[8];
-        bn_extract_to_array_512(m, 8, _m);
-        memset(&hptr[idx].ctx.b512, 0, sizeof(struct mod_ctx_512));
-        mod_exp_pre_compute_data_512(_m, &hptr[idx].ctx.b512);
-    }
-
-    hptr[idx].type = BN_num_bits(m);
-    return hptr + idx;
-}
-
-static int e_rsax_rsa_finish(RSA *rsa)
-{
-    E_RSAX_MOD_CTX *hptr = RSA_get_ex_data(rsa, rsax_ex_data_idx);
-    if (hptr) {
-        OPENSSL_free(hptr);
-        RSA_set_ex_data(rsa, rsax_ex_data_idx, NULL);
-    }
-    if (rsa->_method_mod_n)
-        BN_MONT_CTX_free(rsa->_method_mod_n);
-    if (rsa->_method_mod_p)
-        BN_MONT_CTX_free(rsa->_method_mod_p);
-    if (rsa->_method_mod_q)
-        BN_MONT_CTX_free(rsa->_method_mod_q);
-    return 1;
-}
-
-static int e_rsax_bn_mod_exp(BIGNUM *r, const BIGNUM *g, const BIGNUM *e,
-                             const BIGNUM *m, BN_CTX *ctx,
-                             BN_MONT_CTX *in_mont,
-                             E_RSAX_MOD_CTX *rsax_mod_ctx)
-{
-    if (rsax_mod_ctx && BN_get_flags(e, BN_FLG_CONSTTIME) != 0) {
-        if (BN_num_bits(m) == 512) {
-            UINT64 _r[8];
-            UINT64 _g[8];
-            UINT64 _e[8];
-
-            /* Init the arrays from the BIGNUMs */
-            bn_extract_to_array_512(g, 8, _g);
-            bn_extract_to_array_512(e, 8, _e);
-
-            mod_exp_512(_r, _g, _e, &rsax_mod_ctx->ctx.b512);
-            /* Return the result in the BIGNUM */
-            interleaved_array_to_bn_512(r, _r);
-            return 1;
-        }
-    }
-
-    return BN_mod_exp_mont(r, g, e, m, ctx, in_mont);
-}
-
-/*
- * Declares for the Intel CIAP 512-bit / CRT / 1024 bit RSA modular
- * exponentiation routine precalculations and a structure to hold the
- * necessary values.  These files are meant to live in crypto/rsa/ in the
- * target openssl.
- */
-
-/*
- * Local method: extracts a piece from a BIGNUM, to fit it into
- * an array. Call with n=8 to extract an entire 512-bit BIGNUM
- */
-static int bn_extract_to_array_512(const BIGNUM *b, unsigned int n,
-                                   UINT64 *array)
-{
-    int i;
-    UINT64 tmp;
-    unsigned char bn_buff[64];
-    memset(bn_buff, 0, 64);
-    if (BN_num_bytes(b) > 64) {
-        printf("Can't support this byte size\n");
-        return 0;
-    }
-    if (BN_num_bytes(b) != 0) {
-        if (!BN_bn2bin(b, bn_buff + (64 - BN_num_bytes(b)))) {
-            printf("Error's in bn2bin\n");
-            /* We have to error, here */
-            return 0;
-        }
-    }
-    while (n-- > 0) {
-        array[n] = 0;
-        for (i = 7; i >= 0; i--) {
-            tmp = bn_buff[63 - (n * 8 + i)];
-            array[n] |= tmp << (8 * i);
-        }
-    }
-    return 1;
-}
-
-/* Init a 512-bit BIGNUM from the UINT64*_ (8 * 64) interleaved array */
-static int interleaved_array_to_bn_512(BIGNUM *b, UINT64 *array)
-{
-    unsigned char tmp[64];
-    int n = 8;
-    int i;
-    while (n-- > 0) {
-        for (i = 7; i >= 0; i--) {
-            tmp[63 - (n * 8 + i)] = (unsigned char)(array[n] >> (8 * i));
-    }}
-    BN_bin2bn(tmp, 64, b);
-    return 0;
-}
-
-/* The main 512bit precompute call */
-static int mod_exp_pre_compute_data_512(UINT64 *m, struct mod_ctx_512 *data)
-{
-    BIGNUM two_768, two_640, two_128, two_512, tmp, _m, tmp2;
-
-    /* We need a BN_CTX for the modulo functions */
-    BN_CTX *ctx;
-    /* Some tmps */
-    UINT64 _t[8];
-    int i, j, ret = 0;
-
-    /* Init _m with m */
-    BN_init(&_m);
-    interleaved_array_to_bn_512(&_m, m);
-    memset(_t, 0, 64);
-
-    /* Inits */
-    BN_init(&two_768);
-    BN_init(&two_640);
-    BN_init(&two_128);
-    BN_init(&two_512);
-    BN_init(&tmp);
-    BN_init(&tmp2);
-
-    /* Create our context */
-    if ((ctx = BN_CTX_new()) == NULL) {
-        goto err;
-    }
-    BN_CTX_start(ctx);
-
-    /*
-     * For production, if you care, these only need to be set once,
-     * and may be made constants.
-     */
-    BN_lshift(&two_768, BN_value_one(), 768);
-    BN_lshift(&two_640, BN_value_one(), 640);
-    BN_lshift(&two_128, BN_value_one(), 128);
-    BN_lshift(&two_512, BN_value_one(), 512);
-
-    if (0 == (m[7] & 0x8000000000000000)) {
-        goto err;
-    }
-    if (0 == (m[0] & 0x1)) {    /* Odd modulus required for Mont */
-        goto err;
-    }
-
-    /* Precompute m1 */
-    BN_mod(&tmp, &two_768, &_m, ctx);
-    if (!bn_extract_to_array_512(&tmp, 8, &data->m1[0])) {
-        goto err;
-    }
-
-    /* Precompute m2 */
-    BN_mod(&tmp, &two_640, &_m, ctx);
-    if (!bn_extract_to_array_512(&tmp, 8, &data->m2[0])) {
-        goto err;
-    }
-
-    /*
-     * Precompute k1, a 128b number = ((-1)* m-1 ) mod 2128; k1 should
-     * be non-negative.
-     */
-    BN_mod_inverse(&tmp, &_m, &two_128, ctx);
-    if (!BN_is_zero(&tmp)) {
-        BN_sub(&tmp, &two_128, &tmp);
-    }
-    if (!bn_extract_to_array_512(&tmp, 2, &data->k1[0])) {
-        goto err;
-    }
-
-    /* Precompute t */
-    for (i = 0; i < 8; i++) {
-        BN_zero(&tmp);
-        if (i & 1) {
-            BN_add(&tmp, &two_512, &tmp);
-        }
-        if (i & 2) {
-            BN_add(&tmp, &two_512, &tmp);
-        }
-        if (i & 4) {
-            BN_add(&tmp, &two_640, &tmp);
-        }
-
-        BN_nnmod(&tmp2, &tmp, &_m, ctx);
-        if (!bn_extract_to_array_512(&tmp2, 8, _t)) {
-            goto err;
-        }
-        for (j = 0; j < 8; j++)
-            data->t[j][i] = _t[j];
-    }
-
-    /* Precompute m */
-    for (i = 0; i < 8; i++) {
-        data->m[i] = m[i];
-    }
-
-    ret = 1;
-
- err:
-    /* Cleanup */
-    if (ctx != NULL) {
-        BN_CTX_end(ctx);
-        BN_CTX_free(ctx);
-    }
-    BN_free(&two_768);
-    BN_free(&two_640);
-    BN_free(&two_128);
-    BN_free(&two_512);
-    BN_free(&tmp);
-    BN_free(&tmp2);
-    BN_free(&_m);
-
-    return ret;
-}
-
-static int e_rsax_rsa_mod_exp(BIGNUM *r0, const BIGNUM *I, RSA *rsa,
-                              BN_CTX *ctx)
-{
-    BIGNUM *r1, *m1, *vrfy;
-    BIGNUM local_dmp1, local_dmq1, local_c, local_r1;
-    BIGNUM *dmp1, *dmq1, *c, *pr1;
-    int ret = 0;
-
-    BN_CTX_start(ctx);
-    r1 = BN_CTX_get(ctx);
-    m1 = BN_CTX_get(ctx);
-    vrfy = BN_CTX_get(ctx);
-
-    {
-        BIGNUM local_p, local_q;
-        BIGNUM *p = NULL, *q = NULL;
-        int error = 0;
-
-        /*
-         * Make sure BN_mod_inverse in Montgomery intialization uses the
-         * BN_FLG_CONSTTIME flag (unless RSA_FLAG_NO_CONSTTIME is set)
-         */
-        if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME)) {
-            BN_init(&local_p);
-            p = &local_p;
-            BN_with_flags(p, rsa->p, BN_FLG_CONSTTIME);
-
-            BN_init(&local_q);
-            q = &local_q;
-            BN_with_flags(q, rsa->q, BN_FLG_CONSTTIME);
-        } else {
-            p = rsa->p;
-            q = rsa->q;
-        }
-
-        if (rsa->flags & RSA_FLAG_CACHE_PRIVATE) {
-            if (!BN_MONT_CTX_set_locked
-                (&rsa->_method_mod_p, CRYPTO_LOCK_RSA, p, ctx))
-                error = 1;
-            if (!BN_MONT_CTX_set_locked
-                (&rsa->_method_mod_q, CRYPTO_LOCK_RSA, q, ctx))
-                error = 1;
-        }
-
-        /* clean up */
-        if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME)) {
-            BN_free(&local_p);
-            BN_free(&local_q);
-        }
-        if (error)
-            goto err;
-    }
-
-    if (rsa->flags & RSA_FLAG_CACHE_PUBLIC)
-        if (!BN_MONT_CTX_set_locked
-            (&rsa->_method_mod_n, CRYPTO_LOCK_RSA, rsa->n, ctx))
-            goto err;
-
-    /* compute I mod q */
-    if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME)) {
-        c = &local_c;
-        BN_with_flags(c, I, BN_FLG_CONSTTIME);
-        if (!BN_mod(r1, c, rsa->q, ctx))
-            goto err;
-    } else {
-        if (!BN_mod(r1, I, rsa->q, ctx))
-            goto err;
-    }
-
-    /* compute r1^dmq1 mod q */
-    if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME)) {
-        dmq1 = &local_dmq1;
-        BN_with_flags(dmq1, rsa->dmq1, BN_FLG_CONSTTIME);
-    } else
-        dmq1 = rsa->dmq1;
-
-    if (!e_rsax_bn_mod_exp(m1, r1, dmq1, rsa->q, ctx,
-                           rsa->_method_mod_q, e_rsax_get_ctx(rsa, 0,
-                                                              rsa->q)))
-        goto err;
-
-    /* compute I mod p */
-    if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME)) {
-        c = &local_c;
-        BN_with_flags(c, I, BN_FLG_CONSTTIME);
-        if (!BN_mod(r1, c, rsa->p, ctx))
-            goto err;
-    } else {
-        if (!BN_mod(r1, I, rsa->p, ctx))
-            goto err;
-    }
-
-    /* compute r1^dmp1 mod p */
-    if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME)) {
-        dmp1 = &local_dmp1;
-        BN_with_flags(dmp1, rsa->dmp1, BN_FLG_CONSTTIME);
-    } else
-        dmp1 = rsa->dmp1;
-
-    if (!e_rsax_bn_mod_exp(r0, r1, dmp1, rsa->p, ctx,
-                           rsa->_method_mod_p, e_rsax_get_ctx(rsa, 1,
-                                                              rsa->p)))
-        goto err;
-
-    if (!BN_sub(r0, r0, m1))
-        goto err;
-    /*
-     * This will help stop the size of r0 increasing, which does affect the
-     * multiply if it optimised for a power of 2 size
-     */
-    if (BN_is_negative(r0))
-        if (!BN_add(r0, r0, rsa->p))
-            goto err;
-
-    if (!BN_mul(r1, r0, rsa->iqmp, ctx))
-        goto err;
-
-    /* Turn BN_FLG_CONSTTIME flag on before division operation */
-    if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME)) {
-        pr1 = &local_r1;
-        BN_with_flags(pr1, r1, BN_FLG_CONSTTIME);
-    } else
-        pr1 = r1;
-    if (!BN_mod(r0, pr1, rsa->p, ctx))
-        goto err;
-
-    /*
-     * If p < q it is occasionally possible for the correction of adding 'p'
-     * if r0 is negative above to leave the result still negative. This can
-     * break the private key operations: the following second correction
-     * should *always* correct this rare occurrence. This will *never* happen
-     * with OpenSSL generated keys because they ensure p > q [steve]
-     */
-    if (BN_is_negative(r0))
-        if (!BN_add(r0, r0, rsa->p))
-            goto err;
-    if (!BN_mul(r1, r0, rsa->q, ctx))
-        goto err;
-    if (!BN_add(r0, r1, m1))
-        goto err;
-
-    if (rsa->e && rsa->n) {
-        if (!e_rsax_bn_mod_exp
-            (vrfy, r0, rsa->e, rsa->n, ctx, rsa->_method_mod_n,
-             e_rsax_get_ctx(rsa, 2, rsa->n)))
-            goto err;
-
-        /*
-         * If 'I' was greater than (or equal to) rsa->n, the operation will
-         * be equivalent to using 'I mod n'. However, the result of the
-         * verify will *always* be less than 'n' so we don't check for
-         * absolute equality, just congruency.
-         */
-        if (!BN_sub(vrfy, vrfy, I))
-            goto err;
-        if (!BN_mod(vrfy, vrfy, rsa->n, ctx))
-            goto err;
-        if (BN_is_negative(vrfy))
-            if (!BN_add(vrfy, vrfy, rsa->n))
-                goto err;
-        if (!BN_is_zero(vrfy)) {
-            /*
-             * 'I' and 'vrfy' aren't congruent mod n. Don't leak
-             * miscalculated CRT output, just do a raw (slower) mod_exp and
-             * return that instead.
-             */
-
-            BIGNUM local_d;
-            BIGNUM *d = NULL;
-
-            if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME)) {
-                d = &local_d;
-                BN_with_flags(d, rsa->d, BN_FLG_CONSTTIME);
-            } else
-                d = rsa->d;
-            if (!e_rsax_bn_mod_exp(r0, I, d, rsa->n, ctx,
-                                   rsa->_method_mod_n, e_rsax_get_ctx(rsa, 2,
-                                                                      rsa->n)))
-                goto err;
-        }
-    }
-    ret = 1;
-
- err:
-    BN_CTX_end(ctx);
-
-    return ret;
-}
-# endif                         /* !OPENSSL_NO_RSA */
-#endif                          /* !COMPILE_RSAX */
index e81096a..bd7b591 100644 (file)
@@ -413,7 +413,6 @@ void ENGINE_load_gost(void);
 #  endif
 # endif
 void ENGINE_load_cryptodev(void);
-void ENGINE_load_rsax(void);
 void ENGINE_load_rdrand(void);
 void ENGINE_load_builtin_engines(void);
 
index bafbc35..139afe3 100644 (file)
@@ -41,6 +41,7 @@ L NONE                crypto/x509/x509_vfy.h          NONE
 L NONE         crypto/ec/ec_lcl.h              NONE
 L NONE         crypto/asn1/asn_lcl.h           NONE
 L NONE         crypto/cms/cms_lcl.h            NONE
+L NONE         ssl/ssl_locl.h                  NONE
 
 
 F RSAREF_F_RSA_BN2BIN
index ed1502d..aaaad98 100644 (file)
@@ -28,8 +28,8 @@ LIBSRC= encode.c digest.c evp_enc.c evp_key.c evp_acnf.c evp_cnf.c \
        bio_md.c bio_b64.c bio_enc.c evp_err.c e_null.c \
        c_all.c c_allc.c c_alld.c evp_lib.c bio_ok.c \
        evp_pkey.c evp_pbe.c p5_crpt.c p5_crpt2.c \
-       e_old.c pmeth_lib.c pmeth_fn.c pmeth_gn.c m_sigver.c evp_fips.c \
-       e_aes_cbc_hmac_sha1.c e_rc4_hmac_md5.c
+       e_old.c pmeth_lib.c pmeth_fn.c pmeth_gn.c m_sigver.c \
+       e_aes_cbc_hmac_sha1.c e_aes_cbc_hmac_sha256.c e_rc4_hmac_md5.c
 
 LIBOBJ=        encode.o digest.o evp_enc.o evp_key.o evp_acnf.o evp_cnf.o \
        e_des.o e_bf.o e_idea.o e_des3.o e_camellia.o\
@@ -41,8 +41,8 @@ LIBOBJ=       encode.o digest.o evp_enc.o evp_key.o evp_acnf.o evp_cnf.o \
        bio_md.o bio_b64.o bio_enc.o evp_err.o e_null.o \
        c_all.o c_allc.o c_alld.o evp_lib.o bio_ok.o \
        evp_pkey.o evp_pbe.o p5_crpt.o p5_crpt2.o \
-       e_old.o pmeth_lib.o pmeth_fn.o pmeth_gn.o m_sigver.o evp_fips.o \
-       e_aes_cbc_hmac_sha1.o e_rc4_hmac_md5.o
+       e_old.o pmeth_lib.o pmeth_fn.o pmeth_gn.o m_sigver.o \
+       e_aes_cbc_hmac_sha1.o e_aes_cbc_hmac_sha256.o e_rc4_hmac_md5.o
 
 SRC= $(LIBSRC)
 
@@ -204,16 +204,36 @@ e_aes_cbc_hmac_sha1.o: ../../include/openssl/bio.h
 e_aes_cbc_hmac_sha1.o: ../../include/openssl/crypto.h
 e_aes_cbc_hmac_sha1.o: ../../include/openssl/e_os2.h
 e_aes_cbc_hmac_sha1.o: ../../include/openssl/evp.h
+e_aes_cbc_hmac_sha1.o: ../../include/openssl/modes.h
 e_aes_cbc_hmac_sha1.o: ../../include/openssl/obj_mac.h
 e_aes_cbc_hmac_sha1.o: ../../include/openssl/objects.h
 e_aes_cbc_hmac_sha1.o: ../../include/openssl/opensslconf.h
 e_aes_cbc_hmac_sha1.o: ../../include/openssl/opensslv.h
 e_aes_cbc_hmac_sha1.o: ../../include/openssl/ossl_typ.h
+e_aes_cbc_hmac_sha1.o: ../../include/openssl/rand.h
 e_aes_cbc_hmac_sha1.o: ../../include/openssl/safestack.h
 e_aes_cbc_hmac_sha1.o: ../../include/openssl/sha.h
 e_aes_cbc_hmac_sha1.o: ../../include/openssl/stack.h
-e_aes_cbc_hmac_sha1.o: ../../include/openssl/symhacks.h e_aes_cbc_hmac_sha1.c
-e_aes_cbc_hmac_sha1.o: evp_locl.h
+e_aes_cbc_hmac_sha1.o: ../../include/openssl/symhacks.h ../modes/modes_lcl.h
+e_aes_cbc_hmac_sha1.o: e_aes_cbc_hmac_sha1.c
+e_aes_cbc_hmac_sha256.o: ../../include/openssl/aes.h
+e_aes_cbc_hmac_sha256.o: ../../include/openssl/asn1.h
+e_aes_cbc_hmac_sha256.o: ../../include/openssl/bio.h
+e_aes_cbc_hmac_sha256.o: ../../include/openssl/crypto.h
+e_aes_cbc_hmac_sha256.o: ../../include/openssl/e_os2.h
+e_aes_cbc_hmac_sha256.o: ../../include/openssl/evp.h
+e_aes_cbc_hmac_sha256.o: ../../include/openssl/modes.h
+e_aes_cbc_hmac_sha256.o: ../../include/openssl/obj_mac.h
+e_aes_cbc_hmac_sha256.o: ../../include/openssl/objects.h
+e_aes_cbc_hmac_sha256.o: ../../include/openssl/opensslconf.h
+e_aes_cbc_hmac_sha256.o: ../../include/openssl/opensslv.h
+e_aes_cbc_hmac_sha256.o: ../../include/openssl/ossl_typ.h
+e_aes_cbc_hmac_sha256.o: ../../include/openssl/rand.h
+e_aes_cbc_hmac_sha256.o: ../../include/openssl/safestack.h
+e_aes_cbc_hmac_sha256.o: ../../include/openssl/sha.h
+e_aes_cbc_hmac_sha256.o: ../../include/openssl/stack.h
+e_aes_cbc_hmac_sha256.o: ../../include/openssl/symhacks.h ../modes/modes_lcl.h
+e_aes_cbc_hmac_sha256.o: e_aes_cbc_hmac_sha256.c
 e_bf.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.h
 e_bf.o: ../../include/openssl/blowfish.h ../../include/openssl/buffer.h
 e_bf.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
@@ -227,11 +247,13 @@ e_camellia.o: ../../include/openssl/asn1.h ../../include/openssl/bio.h
 e_camellia.o: ../../include/openssl/camellia.h ../../include/openssl/crypto.h
 e_camellia.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
 e_camellia.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h
-e_camellia.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
+e_camellia.o: ../../include/openssl/modes.h ../../include/openssl/obj_mac.h
+e_camellia.o: ../../include/openssl/objects.h
 e_camellia.o: ../../include/openssl/opensslconf.h
 e_camellia.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
 e_camellia.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
-e_camellia.o: ../../include/openssl/symhacks.h e_camellia.c evp_locl.h
+e_camellia.o: ../../include/openssl/symhacks.h ../modes/modes_lcl.h
+e_camellia.o: e_camellia.c evp_locl.h
 e_cast.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.h
 e_cast.o: ../../include/openssl/buffer.h ../../include/openssl/cast.h
 e_cast.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
@@ -260,9 +282,10 @@ e_des3.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h
 e_des3.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
 e_des3.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
 e_des3.o: ../../include/openssl/ossl_typ.h ../../include/openssl/rand.h
-e_des3.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
-e_des3.o: ../../include/openssl/symhacks.h ../../include/openssl/ui.h
-e_des3.o: ../../include/openssl/ui_compat.h ../cryptlib.h e_des3.c evp_locl.h
+e_des3.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
+e_des3.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
+e_des3.o: ../../include/openssl/ui.h ../../include/openssl/ui_compat.h
+e_des3.o: ../cryptlib.h e_des3.c evp_locl.h
 e_idea.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.h
 e_idea.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
 e_idea.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
@@ -394,13 +417,6 @@ evp_err.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
 evp_err.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
 evp_err.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
 evp_err.o: ../../include/openssl/symhacks.h evp_err.c
-evp_fips.o: ../../include/openssl/asn1.h ../../include/openssl/bio.h
-evp_fips.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
-evp_fips.o: ../../include/openssl/evp.h ../../include/openssl/obj_mac.h
-evp_fips.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
-evp_fips.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
-evp_fips.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
-evp_fips.o: ../../include/openssl/symhacks.h evp_fips.c
 evp_key.o: ../../e_os.h ../../include/openssl/asn1.h
 evp_key.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h
 evp_key.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
index 3097c21..280e584 100644 (file)
@@ -93,6 +93,7 @@ void OpenSSL_add_all_ciphers(void)
     EVP_add_cipher(EVP_des_ecb());
     EVP_add_cipher(EVP_des_ede());
     EVP_add_cipher(EVP_des_ede3());
+    EVP_add_cipher(EVP_des_ede3_wrap());
 #endif
 
 #ifndef OPENSSL_NO_RC4
@@ -172,6 +173,8 @@ void OpenSSL_add_all_ciphers(void)
     EVP_add_cipher(EVP_aes_128_ctr());
     EVP_add_cipher(EVP_aes_128_gcm());
     EVP_add_cipher(EVP_aes_128_xts());
+    EVP_add_cipher(EVP_aes_128_ccm());
+    EVP_add_cipher(EVP_aes_128_wrap());
     EVP_add_cipher_alias(SN_aes_128_cbc, "AES128");
     EVP_add_cipher_alias(SN_aes_128_cbc, "aes128");
     EVP_add_cipher(EVP_aes_192_ecb());
@@ -182,6 +185,8 @@ void OpenSSL_add_all_ciphers(void)
     EVP_add_cipher(EVP_aes_192_ofb());
     EVP_add_cipher(EVP_aes_192_ctr());
     EVP_add_cipher(EVP_aes_192_gcm());
+    EVP_add_cipher(EVP_aes_192_ccm());
+    EVP_add_cipher(EVP_aes_192_wrap());
     EVP_add_cipher_alias(SN_aes_192_cbc, "AES192");
     EVP_add_cipher_alias(SN_aes_192_cbc, "aes192");
     EVP_add_cipher(EVP_aes_256_ecb());
@@ -193,12 +198,18 @@ void OpenSSL_add_all_ciphers(void)
     EVP_add_cipher(EVP_aes_256_ctr());
     EVP_add_cipher(EVP_aes_256_gcm());
     EVP_add_cipher(EVP_aes_256_xts());
+    EVP_add_cipher(EVP_aes_256_ccm());
+    EVP_add_cipher(EVP_aes_256_wrap());
     EVP_add_cipher_alias(SN_aes_256_cbc, "AES256");
     EVP_add_cipher_alias(SN_aes_256_cbc, "aes256");
 # if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA1)
     EVP_add_cipher(EVP_aes_128_cbc_hmac_sha1());
     EVP_add_cipher(EVP_aes_256_cbc_hmac_sha1());
 # endif
+# if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA256)
+    EVP_add_cipher(EVP_aes_128_cbc_hmac_sha256());
+    EVP_add_cipher(EVP_aes_256_cbc_hmac_sha256());
+# endif
 #endif
 
 #ifndef OPENSSL_NO_CAMELLIA
index 2e202c8..f2643f3 100644 (file)
 
 #ifdef OPENSSL_FIPS
 # include <openssl/fips.h>
+# include "evp_locl.h"
 #endif
 
 void EVP_MD_CTX_init(EVP_MD_CTX *ctx)
@@ -145,6 +146,17 @@ int EVP_DigestInit(EVP_MD_CTX *ctx, const EVP_MD *type)
 int EVP_DigestInit_ex(EVP_MD_CTX *ctx, const EVP_MD *type, ENGINE *impl)
 {
     EVP_MD_CTX_clear_flags(ctx, EVP_MD_CTX_FLAG_CLEANED);
+#ifdef OPENSSL_FIPS
+    /* If FIPS mode switch to approved implementation if possible */
+    if (FIPS_mode()) {
+        const EVP_MD *fipsmd;
+        if (type) {
+            fipsmd = evp_get_fips_md(type);
+            if (fipsmd)
+                type = fipsmd;
+        }
+    }
+#endif
 #ifndef OPENSSL_NO_ENGINE
     /*
      * Whether it's nice or not, "Inits" can be used on "Final"'d contexts so
index a4327fc..1734a82 100644 (file)
 # include <assert.h>
 # include <openssl/aes.h>
 # include "evp_locl.h"
-# ifndef OPENSSL_FIPS
-#  include "modes_lcl.h"
-#  include <openssl/rand.h>
+# include "modes_lcl.h"
+# include <openssl/rand.h>
+
+# undef EVP_CIPH_FLAG_FIPS
+# define EVP_CIPH_FLAG_FIPS 0
 
 typedef struct {
-    AES_KEY ks;
+    union {
+        double align;
+        AES_KEY ks;
+    } ks;
     block128_f block;
     union {
         cbc128_f cbc;
@@ -71,7 +76,10 @@ typedef struct {
 } EVP_AES_KEY;
 
 typedef struct {
-    AES_KEY ks;                 /* AES key schedule to use */
+    union {
+        double align;
+        AES_KEY ks;
+    } ks;                       /* AES key schedule to use */
     int key_set;                /* Set if key initialised */
     int iv_set;                 /* Set if an iv is set */
     GCM128_CONTEXT gcm;
@@ -84,7 +92,10 @@ typedef struct {
 } EVP_AES_GCM_CTX;
 
 typedef struct {
-    AES_KEY ks1, ks2;           /* AES key schedules to use */
+    union {
+        double align;
+        AES_KEY ks;
+    } ks1, ks2;                 /* AES key schedules to use */
     XTS128_CONTEXT xts;
     void (*stream) (const unsigned char *in,
                     unsigned char *out, size_t length,
@@ -93,7 +104,10 @@ typedef struct {
 } EVP_AES_XTS_CTX;
 
 typedef struct {
-    AES_KEY ks;                 /* AES key schedule to use */
+    union {
+        double align;
+        AES_KEY ks;
+    } ks;                       /* AES key schedule to use */
     int key_set;                /* Set if key initialised */
     int iv_set;                 /* Set if an iv is set */
     int tag_set;                /* Set if tag is valid */
@@ -103,9 +117,9 @@ typedef struct {
     ccm128_f str;
 } EVP_AES_CCM_CTX;
 
-#  define MAXBITCHUNK     ((size_t)1<<(sizeof(size_t)*8-4))
+# define MAXBITCHUNK     ((size_t)1<<(sizeof(size_t)*8-4))
 
-#  ifdef VPAES_ASM
+# ifdef VPAES_ASM
 int vpaes_set_encrypt_key(const unsigned char *userKey, int bits,
                           AES_KEY *key);
 int vpaes_set_decrypt_key(const unsigned char *userKey, int bits,
@@ -120,8 +134,8 @@ void vpaes_cbc_encrypt(const unsigned char *in,
                        unsigned char *out,
                        size_t length,
                        const AES_KEY *key, unsigned char *ivec, int enc);
-#  endif
-#  ifdef BSAES_ASM
+# endif
+# ifdef BSAES_ASM
 void bsaes_cbc_encrypt(const unsigned char *in, unsigned char *out,
                        size_t length, const AES_KEY *key,
                        unsigned char ivec[16], int enc);
@@ -134,40 +148,54 @@ void bsaes_xts_encrypt(const unsigned char *inp, unsigned char *out,
 void bsaes_xts_decrypt(const unsigned char *inp, unsigned char *out,
                        size_t len, const AES_KEY *key1,
                        const AES_KEY *key2, const unsigned char iv[16]);
-#  endif
-#  ifdef AES_CTR_ASM
+# endif
+# ifdef AES_CTR_ASM
 void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out,
                        size_t blocks, const AES_KEY *key,
                        const unsigned char ivec[AES_BLOCK_SIZE]);
-#  endif
-#  ifdef AES_XTS_ASM
+# endif
+# ifdef AES_XTS_ASM
 void AES_xts_encrypt(const char *inp, char *out, size_t len,
                      const AES_KEY *key1, const AES_KEY *key2,
                      const unsigned char iv[16]);
 void AES_xts_decrypt(const char *inp, char *out, size_t len,
                      const AES_KEY *key1, const AES_KEY *key2,
                      const unsigned char iv[16]);
+# endif
+
+# if     defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
+#  include "ppc_arch.h"
+#  ifdef VPAES_ASM
+#   define VPAES_CAPABLE (OPENSSL_ppccap_P & PPC_ALTIVEC)
 #  endif
+#  define HWAES_CAPABLE  (OPENSSL_ppccap_P & PPC_CRYPTO207)
+#  define HWAES_set_encrypt_key aes_p8_set_encrypt_key
+#  define HWAES_set_decrypt_key aes_p8_set_decrypt_key
+#  define HWAES_encrypt aes_p8_encrypt
+#  define HWAES_decrypt aes_p8_decrypt
+#  define HWAES_cbc_encrypt aes_p8_cbc_encrypt
+#  define HWAES_ctr32_encrypt_blocks aes_p8_ctr32_encrypt_blocks
+# endif
 
-#  if     defined(AES_ASM) && !defined(I386_ONLY) &&      (  \
+# if     defined(AES_ASM) && !defined(I386_ONLY) &&      (  \
         ((defined(__i386)       || defined(__i386__)    || \
           defined(_M_IX86)) && defined(OPENSSL_IA32_SSE2))|| \
         defined(__x86_64)       || defined(__x86_64__)  || \
         defined(_M_AMD64)       || defined(_M_X64)      || \
         defined(__INTEL__)                              )
 
-extern unsigned int OPENSSL_ia32cap_P[2];
+extern unsigned int OPENSSL_ia32cap_P[];
 
-#   ifdef VPAES_ASM
-#    define VPAES_CAPABLE   (OPENSSL_ia32cap_P[1]&(1<<(41-32)))
-#   endif
-#   ifdef BSAES_ASM
-#    define BSAES_CAPABLE   (OPENSSL_ia32cap_P[1]&(1<<(41-32)))
-#   endif
+#  ifdef VPAES_ASM
+#   define VPAES_CAPABLE   (OPENSSL_ia32cap_P[1]&(1<<(41-32)))
+#  endif
+#  ifdef BSAES_ASM
+#   define BSAES_CAPABLE   (OPENSSL_ia32cap_P[1]&(1<<(41-32)))
+#  endif
 /*
  * AES-NI section
  */
-#   define AESNI_CAPABLE   (OPENSSL_ia32cap_P[1]&(1<<(57-32)))
+#  define AESNI_CAPABLE   (OPENSSL_ia32cap_P[1]&(1<<(57-32)))
 
 int aesni_set_encrypt_key(const unsigned char *userKey, int bits,
                           AES_KEY *key);
@@ -218,6 +246,26 @@ void aesni_ccm64_decrypt_blocks(const unsigned char *in,
                                 const unsigned char ivec[16],
                                 unsigned char cmac[16]);
 
+#  if defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64)
+size_t aesni_gcm_encrypt(const unsigned char *in,
+                         unsigned char *out,
+                         size_t len,
+                         const void *key, unsigned char ivec[16], u64 *Xi);
+#   define AES_gcm_encrypt aesni_gcm_encrypt
+size_t aesni_gcm_decrypt(const unsigned char *in,
+                         unsigned char *out,
+                         size_t len,
+                         const void *key, unsigned char ivec[16], u64 *Xi);
+#   define AES_gcm_decrypt aesni_gcm_decrypt
+void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *in,
+                   size_t len);
+#   define AES_GCM_ASM(gctx)       (gctx->ctr==aesni_ctr32_encrypt_blocks && \
+                                 gctx->gcm.ghash==gcm_ghash_avx)
+#   define AES_GCM_ASM2(gctx)      (gctx->gcm.block==(block128_f)aesni_encrypt && \
+                                 gctx->gcm.ghash==gcm_ghash_avx)
+#   undef AES_GCM_ASM2          /* minor size optimization */
+#  endif
+
 static int aesni_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
                           const unsigned char *iv, int enc)
 {
@@ -271,23 +319,23 @@ static int aesni_ecb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
     return 1;
 }
 
-#   define aesni_ofb_cipher aes_ofb_cipher
+#  define aesni_ofb_cipher aes_ofb_cipher
 static int aesni_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
                             const unsigned char *in, size_t len);
 
-#   define aesni_cfb_cipher aes_cfb_cipher
+#  define aesni_cfb_cipher aes_cfb_cipher
 static int aesni_cfb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
                             const unsigned char *in, size_t len);
 
-#   define aesni_cfb8_cipher aes_cfb8_cipher
+#  define aesni_cfb8_cipher aes_cfb8_cipher
 static int aesni_cfb8_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
                              const unsigned char *in, size_t len);
 
-#   define aesni_cfb1_cipher aes_cfb1_cipher
+#  define aesni_cfb1_cipher aes_cfb1_cipher
 static int aesni_cfb1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
                              const unsigned char *in, size_t len);
 
-#   define aesni_ctr_cipher aes_ctr_cipher
+#  define aesni_ctr_cipher aes_ctr_cipher
 static int aesni_ctr_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
                             const unsigned char *in, size_t len);
 
@@ -298,7 +346,7 @@ static int aesni_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
     if (!iv && !key)
         return 1;
     if (key) {
-        aesni_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks);
+        aesni_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks);
         CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, (block128_f) aesni_encrypt);
         gctx->ctr = (ctr128_f) aesni_ctr32_encrypt_blocks;
         /*
@@ -323,7 +371,7 @@ static int aesni_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
     return 1;
 }
 
-#   define aesni_gcm_cipher aes_gcm_cipher
+#  define aesni_gcm_cipher aes_gcm_cipher
 static int aesni_gcm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
                             const unsigned char *in, size_t len);
 
@@ -337,17 +385,17 @@ static int aesni_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
     if (key) {
         /* key_len is two AES keys */
         if (enc) {
-            aesni_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1);
+            aesni_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
             xctx->xts.block1 = (block128_f) aesni_encrypt;
             xctx->stream = aesni_xts_encrypt;
         } else {
-            aesni_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1);
+            aesni_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
             xctx->xts.block1 = (block128_f) aesni_decrypt;
             xctx->stream = aesni_xts_decrypt;
         }
 
         aesni_set_encrypt_key(key + ctx->key_len / 2,
-                              ctx->key_len * 4, &xctx->ks2);
+                              ctx->key_len * 4, &xctx->ks2.ks);
         xctx->xts.block2 = (block128_f) aesni_encrypt;
 
         xctx->xts.key1 = &xctx->ks1;
@@ -361,7 +409,7 @@ static int aesni_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
     return 1;
 }
 
-#   define aesni_xts_cipher aes_xts_cipher
+#  define aesni_xts_cipher aes_xts_cipher
 static int aesni_xts_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
                             const unsigned char *in, size_t len);
 
@@ -372,7 +420,7 @@ static int aesni_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
     if (!iv && !key)
         return 1;
     if (key) {
-        aesni_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks);
+        aesni_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks);
         CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L,
                            &cctx->ks, (block128_f) aesni_encrypt);
         cctx->str = enc ? (ccm128_f) aesni_ccm64_encrypt_blocks :
@@ -386,11 +434,11 @@ static int aesni_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
     return 1;
 }
 
-#   define aesni_ccm_cipher aes_ccm_cipher
+#  define aesni_ccm_cipher aes_ccm_cipher
 static int aesni_ccm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
                             const unsigned char *in, size_t len);
 
-#   define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \
+#  define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \
 static const EVP_CIPHER aesni_##keylen##_##mode = { \
         nid##_##keylen##_##nmode,blocksize,keylen/8,ivlen, \
         flags|EVP_CIPH_##MODE##_MODE,   \
@@ -411,7 +459,7 @@ static const EVP_CIPHER aes_##keylen##_##mode = { \
 const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \
 { return AESNI_CAPABLE?&aesni_##keylen##_##mode:&aes_##keylen##_##mode; }
 
-#   define BLOCK_CIPHER_custom(nid,keylen,blocksize,ivlen,mode,MODE,flags) \
+#  define BLOCK_CIPHER_custom(nid,keylen,blocksize,ivlen,mode,MODE,flags) \
 static const EVP_CIPHER aesni_##keylen##_##mode = { \
         nid##_##keylen##_##mode,blocksize, \
         (EVP_CIPH_##MODE##_MODE==EVP_CIPH_XTS_MODE?2:1)*keylen/8, ivlen, \
@@ -433,9 +481,378 @@ static const EVP_CIPHER aes_##keylen##_##mode = { \
 const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \
 { return AESNI_CAPABLE?&aesni_##keylen##_##mode:&aes_##keylen##_##mode; }
 
+# elif   defined(AES_ASM) && (defined(__sparc) || defined(__sparc__))
+
+#  include "sparc_arch.h"
+
+extern unsigned int OPENSSL_sparcv9cap_P[];
+
+#  define SPARC_AES_CAPABLE       (OPENSSL_sparcv9cap_P[1] & CFR_AES)
+
+void aes_t4_set_encrypt_key(const unsigned char *key, int bits, AES_KEY *ks);
+void aes_t4_set_decrypt_key(const unsigned char *key, int bits, AES_KEY *ks);
+void aes_t4_encrypt(const unsigned char *in, unsigned char *out,
+                    const AES_KEY *key);
+void aes_t4_decrypt(const unsigned char *in, unsigned char *out,
+                    const AES_KEY *key);
+/*
+ * Key-length specific subroutines were chosen for following reason.
+ * Each SPARC T4 core can execute up to 8 threads which share core's
+ * resources. Loading as much key material to registers allows to
+ * minimize references to shared memory interface, as well as amount
+ * of instructions in inner loops [much needed on T4]. But then having
+ * non-key-length specific routines would require conditional branches
+ * either in inner loops or on subroutines' entries. Former is hardly
+ * acceptable, while latter means code size increase to size occupied
+ * by multiple key-length specfic subroutines, so why fight?
+ */
+void aes128_t4_cbc_encrypt(const unsigned char *in, unsigned char *out,
+                           size_t len, const AES_KEY *key,
+                           unsigned char *ivec);
+void aes128_t4_cbc_decrypt(const unsigned char *in, unsigned char *out,
+                           size_t len, const AES_KEY *key,
+                           unsigned char *ivec);
+void aes192_t4_cbc_encrypt(const unsigned char *in, unsigned char *out,
+                           size_t len, const AES_KEY *key,
+                           unsigned char *ivec);
+void aes192_t4_cbc_decrypt(const unsigned char *in, unsigned char *out,
+                           size_t len, const AES_KEY *key,
+                           unsigned char *ivec);
+void aes256_t4_cbc_encrypt(const unsigned char *in, unsigned char *out,
+                           size_t len, const AES_KEY *key,
+                           unsigned char *ivec);
+void aes256_t4_cbc_decrypt(const unsigned char *in, unsigned char *out,
+                           size_t len, const AES_KEY *key,
+                           unsigned char *ivec);
+void aes128_t4_ctr32_encrypt(const unsigned char *in, unsigned char *out,
+                             size_t blocks, const AES_KEY *key,
+                             unsigned char *ivec);
+void aes192_t4_ctr32_encrypt(const unsigned char *in, unsigned char *out,
+                             size_t blocks, const AES_KEY *key,
+                             unsigned char *ivec);
+void aes256_t4_ctr32_encrypt(const unsigned char *in, unsigned char *out,
+                             size_t blocks, const AES_KEY *key,
+                             unsigned char *ivec);
+void aes128_t4_xts_encrypt(const unsigned char *in, unsigned char *out,
+                           size_t blocks, const AES_KEY *key1,
+                           const AES_KEY *key2, const unsigned char *ivec);
+void aes128_t4_xts_decrypt(const unsigned char *in, unsigned char *out,
+                           size_t blocks, const AES_KEY *key1,
+                           const AES_KEY *key2, const unsigned char *ivec);
+void aes256_t4_xts_encrypt(const unsigned char *in, unsigned char *out,
+                           size_t blocks, const AES_KEY *key1,
+                           const AES_KEY *key2, const unsigned char *ivec);
+void aes256_t4_xts_decrypt(const unsigned char *in, unsigned char *out,
+                           size_t blocks, const AES_KEY *key1,
+                           const AES_KEY *key2, const unsigned char *ivec);
+
+static int aes_t4_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
+                           const unsigned char *iv, int enc)
+{
+    int ret, mode, bits;
+    EVP_AES_KEY *dat = (EVP_AES_KEY *) ctx->cipher_data;
+
+    mode = ctx->cipher->flags & EVP_CIPH_MODE;
+    bits = ctx->key_len * 8;
+    if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE)
+        && !enc) {
+        ret = 0;
+        aes_t4_set_decrypt_key(key, bits, ctx->cipher_data);
+        dat->block = (block128_f) aes_t4_decrypt;
+        switch (bits) {
+        case 128:
+            dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ?
+                (cbc128_f) aes128_t4_cbc_decrypt : NULL;
+            break;
+        case 192:
+            dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ?
+                (cbc128_f) aes192_t4_cbc_decrypt : NULL;
+            break;
+        case 256:
+            dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ?
+                (cbc128_f) aes256_t4_cbc_decrypt : NULL;
+            break;
+        default:
+            ret = -1;
+        }
+    } else {
+        ret = 0;
+        aes_t4_set_encrypt_key(key, bits, ctx->cipher_data);
+        dat->block = (block128_f) aes_t4_encrypt;
+        switch (bits) {
+        case 128:
+            if (mode == EVP_CIPH_CBC_MODE)
+                dat->stream.cbc = (cbc128_f) aes128_t4_cbc_encrypt;
+            else if (mode == EVP_CIPH_CTR_MODE)
+                dat->stream.ctr = (ctr128_f) aes128_t4_ctr32_encrypt;
+            else
+                dat->stream.cbc = NULL;
+            break;
+        case 192:
+            if (mode == EVP_CIPH_CBC_MODE)
+                dat->stream.cbc = (cbc128_f) aes192_t4_cbc_encrypt;
+            else if (mode == EVP_CIPH_CTR_MODE)
+                dat->stream.ctr = (ctr128_f) aes192_t4_ctr32_encrypt;
+            else
+                dat->stream.cbc = NULL;
+            break;
+        case 256:
+            if (mode == EVP_CIPH_CBC_MODE)
+                dat->stream.cbc = (cbc128_f) aes256_t4_cbc_encrypt;
+            else if (mode == EVP_CIPH_CTR_MODE)
+                dat->stream.ctr = (ctr128_f) aes256_t4_ctr32_encrypt;
+            else
+                dat->stream.cbc = NULL;
+            break;
+        default:
+            ret = -1;
+        }
+    }
+
+    if (ret < 0) {
+        EVPerr(EVP_F_AES_T4_INIT_KEY, EVP_R_AES_KEY_SETUP_FAILED);
+        return 0;
+    }
+
+    return 1;
+}
+
+#  define aes_t4_cbc_cipher aes_cbc_cipher
+static int aes_t4_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
+                             const unsigned char *in, size_t len);
+
+#  define aes_t4_ecb_cipher aes_ecb_cipher
+static int aes_t4_ecb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
+                             const unsigned char *in, size_t len);
+
+#  define aes_t4_ofb_cipher aes_ofb_cipher
+static int aes_t4_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
+                             const unsigned char *in, size_t len);
+
+#  define aes_t4_cfb_cipher aes_cfb_cipher
+static int aes_t4_cfb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
+                             const unsigned char *in, size_t len);
+
+#  define aes_t4_cfb8_cipher aes_cfb8_cipher
+static int aes_t4_cfb8_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
+                              const unsigned char *in, size_t len);
+
+#  define aes_t4_cfb1_cipher aes_cfb1_cipher
+static int aes_t4_cfb1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
+                              const unsigned char *in, size_t len);
+
+#  define aes_t4_ctr_cipher aes_ctr_cipher
+static int aes_t4_ctr_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
+                             const unsigned char *in, size_t len);
+
+static int aes_t4_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
+                               const unsigned char *iv, int enc)
+{
+    EVP_AES_GCM_CTX *gctx = ctx->cipher_data;
+    if (!iv && !key)
+        return 1;
+    if (key) {
+        int bits = ctx->key_len * 8;
+        aes_t4_set_encrypt_key(key, bits, &gctx->ks.ks);
+        CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks,
+                           (block128_f) aes_t4_encrypt);
+        switch (bits) {
+        case 128:
+            gctx->ctr = (ctr128_f) aes128_t4_ctr32_encrypt;
+            break;
+        case 192:
+            gctx->ctr = (ctr128_f) aes192_t4_ctr32_encrypt;
+            break;
+        case 256:
+            gctx->ctr = (ctr128_f) aes256_t4_ctr32_encrypt;
+            break;
+        default:
+            return 0;
+        }
+        /*
+         * If we have an iv can set it directly, otherwise use saved IV.
+         */
+        if (iv == NULL && gctx->iv_set)
+            iv = gctx->iv;
+        if (iv) {
+            CRYPTO_gcm128_setiv(&gctx->gcm, iv, gctx->ivlen);
+            gctx->iv_set = 1;
+        }
+        gctx->key_set = 1;
+    } else {
+        /* If key set use IV, otherwise copy */
+        if (gctx->key_set)
+            CRYPTO_gcm128_setiv(&gctx->gcm, iv, gctx->ivlen);
+        else
+            memcpy(gctx->iv, iv, gctx->ivlen);
+        gctx->iv_set = 1;
+        gctx->iv_gen = 0;
+    }
+    return 1;
+}
+
+#  define aes_t4_gcm_cipher aes_gcm_cipher
+static int aes_t4_gcm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
+                             const unsigned char *in, size_t len);
+
+static int aes_t4_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
+                               const unsigned char *iv, int enc)
+{
+    EVP_AES_XTS_CTX *xctx = ctx->cipher_data;
+    if (!iv && !key)
+        return 1;
+
+    if (key) {
+        int bits = ctx->key_len * 4;
+        xctx->stream = NULL;
+        /* key_len is two AES keys */
+        if (enc) {
+            aes_t4_set_encrypt_key(key, bits, &xctx->ks1.ks);
+            xctx->xts.block1 = (block128_f) aes_t4_encrypt;
+            switch (bits) {
+            case 128:
+                xctx->stream = aes128_t4_xts_encrypt;
+                break;
+#  if 0                         /* not yet */
+            case 192:
+                xctx->stream = aes192_t4_xts_encrypt;
+                break;
+#  endif
+            case 256:
+                xctx->stream = aes256_t4_xts_encrypt;
+                break;
+            default:
+                return 0;
+            }
+        } else {
+            aes_t4_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
+            xctx->xts.block1 = (block128_f) aes_t4_decrypt;
+            switch (bits) {
+            case 128:
+                xctx->stream = aes128_t4_xts_decrypt;
+                break;
+#  if 0                         /* not yet */
+            case 192:
+                xctx->stream = aes192_t4_xts_decrypt;
+                break;
+#  endif
+            case 256:
+                xctx->stream = aes256_t4_xts_decrypt;
+                break;
+            default:
+                return 0;
+            }
+        }
+
+        aes_t4_set_encrypt_key(key + ctx->key_len / 2,
+                               ctx->key_len * 4, &xctx->ks2.ks);
+        xctx->xts.block2 = (block128_f) aes_t4_encrypt;
+
+        xctx->xts.key1 = &xctx->ks1;
+    }
+
+    if (iv) {
+        xctx->xts.key2 = &xctx->ks2;
+        memcpy(ctx->iv, iv, 16);
+    }
+
+    return 1;
+}
+
+#  define aes_t4_xts_cipher aes_xts_cipher
+static int aes_t4_xts_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
+                             const unsigned char *in, size_t len);
+
+static int aes_t4_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
+                               const unsigned char *iv, int enc)
+{
+    EVP_AES_CCM_CTX *cctx = ctx->cipher_data;
+    if (!iv && !key)
+        return 1;
+    if (key) {
+        int bits = ctx->key_len * 8;
+        aes_t4_set_encrypt_key(key, bits, &cctx->ks.ks);
+        CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L,
+                           &cctx->ks, (block128_f) aes_t4_encrypt);
+#  if 0                         /* not yet */
+        switch (bits) {
+        case 128:
+            cctx->str = enc ? (ccm128_f) aes128_t4_ccm64_encrypt :
+                (ccm128_f) ae128_t4_ccm64_decrypt;
+            break;
+        case 192:
+            cctx->str = enc ? (ccm128_f) aes192_t4_ccm64_encrypt :
+                (ccm128_f) ae192_t4_ccm64_decrypt;
+            break;
+        case 256:
+            cctx->str = enc ? (ccm128_f) aes256_t4_ccm64_encrypt :
+                (ccm128_f) ae256_t4_ccm64_decrypt;
+            break;
+        default:
+            return 0;
+        }
 #  else
+        cctx->str = NULL;
+#  endif
+        cctx->key_set = 1;
+    }
+    if (iv) {
+        memcpy(ctx->iv, iv, 15 - cctx->L);
+        cctx->iv_set = 1;
+    }
+    return 1;
+}
 
-#   define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \
+#  define aes_t4_ccm_cipher aes_ccm_cipher
+static int aes_t4_ccm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
+                             const unsigned char *in, size_t len);
+
+#  define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \
+static const EVP_CIPHER aes_t4_##keylen##_##mode = { \
+        nid##_##keylen##_##nmode,blocksize,keylen/8,ivlen, \
+        flags|EVP_CIPH_##MODE##_MODE,   \
+        aes_t4_init_key,                \
+        aes_t4_##mode##_cipher,         \
+        NULL,                           \
+        sizeof(EVP_AES_KEY),            \
+        NULL,NULL,NULL,NULL }; \
+static const EVP_CIPHER aes_##keylen##_##mode = { \
+        nid##_##keylen##_##nmode,blocksize,     \
+        keylen/8,ivlen, \
+        flags|EVP_CIPH_##MODE##_MODE,   \
+        aes_init_key,                   \
+        aes_##mode##_cipher,            \
+        NULL,                           \
+        sizeof(EVP_AES_KEY),            \
+        NULL,NULL,NULL,NULL }; \
+const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \
+{ return SPARC_AES_CAPABLE?&aes_t4_##keylen##_##mode:&aes_##keylen##_##mode; }
+
+#  define BLOCK_CIPHER_custom(nid,keylen,blocksize,ivlen,mode,MODE,flags) \
+static const EVP_CIPHER aes_t4_##keylen##_##mode = { \
+        nid##_##keylen##_##mode,blocksize, \
+        (EVP_CIPH_##MODE##_MODE==EVP_CIPH_XTS_MODE?2:1)*keylen/8, ivlen, \
+        flags|EVP_CIPH_##MODE##_MODE,   \
+        aes_t4_##mode##_init_key,       \
+        aes_t4_##mode##_cipher,         \
+        aes_##mode##_cleanup,           \
+        sizeof(EVP_AES_##MODE##_CTX),   \
+        NULL,NULL,aes_##mode##_ctrl,NULL }; \
+static const EVP_CIPHER aes_##keylen##_##mode = { \
+        nid##_##keylen##_##mode,blocksize, \
+        (EVP_CIPH_##MODE##_MODE==EVP_CIPH_XTS_MODE?2:1)*keylen/8, ivlen, \
+        flags|EVP_CIPH_##MODE##_MODE,   \
+        aes_##mode##_init_key,          \
+        aes_##mode##_cipher,            \
+        aes_##mode##_cleanup,           \
+        sizeof(EVP_AES_##MODE##_CTX),   \
+        NULL,NULL,aes_##mode##_ctrl,NULL }; \
+const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \
+{ return SPARC_AES_CAPABLE?&aes_t4_##keylen##_##mode:&aes_##keylen##_##mode; }
+
+# else
+
+#  define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \
 static const EVP_CIPHER aes_##keylen##_##mode = { \
         nid##_##keylen##_##nmode,blocksize,keylen/8,ivlen, \
         flags|EVP_CIPH_##MODE##_MODE,   \
@@ -447,7 +864,7 @@ static const EVP_CIPHER aes_##keylen##_##mode = { \
 const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \
 { return &aes_##keylen##_##mode; }
 
-#   define BLOCK_CIPHER_custom(nid,keylen,blocksize,ivlen,mode,MODE,flags) \
+#  define BLOCK_CIPHER_custom(nid,keylen,blocksize,ivlen,mode,MODE,flags) \
 static const EVP_CIPHER aes_##keylen##_##mode = { \
         nid##_##keylen##_##mode,blocksize, \
         (EVP_CIPH_##MODE##_MODE==EVP_CIPH_XTS_MODE?2:1)*keylen/8, ivlen, \
@@ -459,9 +876,42 @@ static const EVP_CIPHER aes_##keylen##_##mode = { \
         NULL,NULL,aes_##mode##_ctrl,NULL }; \
 const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \
 { return &aes_##keylen##_##mode; }
+# endif
+
+# if defined(OPENSSL_CPUID_OBJ) && (defined(__arm__) || defined(__arm) || defined(__aarch64__))
+#  include "arm_arch.h"
+#  if __ARM_MAX_ARCH__>=7
+#   if defined(BSAES_ASM)
+#    define BSAES_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
+#   endif
+#   define HWAES_CAPABLE (OPENSSL_armcap_P & ARMV8_AES)
+#   define HWAES_set_encrypt_key aes_v8_set_encrypt_key
+#   define HWAES_set_decrypt_key aes_v8_set_decrypt_key
+#   define HWAES_encrypt aes_v8_encrypt
+#   define HWAES_decrypt aes_v8_decrypt
+#   define HWAES_cbc_encrypt aes_v8_cbc_encrypt
+#   define HWAES_ctr32_encrypt_blocks aes_v8_ctr32_encrypt_blocks
 #  endif
+# endif
+
+# if defined(HWAES_CAPABLE)
+int HWAES_set_encrypt_key(const unsigned char *userKey, const int bits,
+                          AES_KEY *key);
+int HWAES_set_decrypt_key(const unsigned char *userKey, const int bits,
+                          AES_KEY *key);
+void HWAES_encrypt(const unsigned char *in, unsigned char *out,
+                   const AES_KEY *key);
+void HWAES_decrypt(const unsigned char *in, unsigned char *out,
+                   const AES_KEY *key);
+void HWAES_cbc_encrypt(const unsigned char *in, unsigned char *out,
+                       size_t length, const AES_KEY *key,
+                       unsigned char *ivec, const int enc);
+void HWAES_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out,
+                                size_t len, const AES_KEY *key,
+                                const unsigned char ivec[16]);
+# endif
 
-#  define BLOCK_CIPHER_generic_pack(nid,keylen,flags)             \
+# define BLOCK_CIPHER_generic_pack(nid,keylen,flags)             \
         BLOCK_CIPHER_generic(nid,keylen,16,16,cbc,cbc,CBC,flags|EVP_CIPH_FLAG_DEFAULT_ASN1)     \
         BLOCK_CIPHER_generic(nid,keylen,16,0,ecb,ecb,ECB,flags|EVP_CIPH_FLAG_DEFAULT_ASN1)      \
         BLOCK_CIPHER_generic(nid,keylen,1,16,ofb128,ofb,OFB,flags|EVP_CIPH_FLAG_DEFAULT_ASN1)   \
@@ -479,51 +929,80 @@ static int aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
     mode = ctx->cipher->flags & EVP_CIPH_MODE;
     if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE)
         && !enc)
-#  ifdef BSAES_CAPABLE
+# ifdef HWAES_CAPABLE
+        if (HWAES_CAPABLE) {
+            ret = HWAES_set_decrypt_key(key, ctx->key_len * 8, &dat->ks.ks);
+            dat->block = (block128_f) HWAES_decrypt;
+            dat->stream.cbc = NULL;
+#  ifdef HWAES_cbc_encrypt
+            if (mode == EVP_CIPH_CBC_MODE)
+                dat->stream.cbc = (cbc128_f) HWAES_cbc_encrypt;
+#  endif
+        } else
+# endif
+# ifdef BSAES_CAPABLE
         if (BSAES_CAPABLE && mode == EVP_CIPH_CBC_MODE) {
-            ret = AES_set_decrypt_key(key, ctx->key_len * 8, &dat->ks);
+            ret = AES_set_decrypt_key(key, ctx->key_len * 8, &dat->ks.ks);
             dat->block = (block128_f) AES_decrypt;
             dat->stream.cbc = (cbc128_f) bsaes_cbc_encrypt;
         } else
-#  endif
-#  ifdef VPAES_CAPABLE
+# endif
+# ifdef VPAES_CAPABLE
         if (VPAES_CAPABLE) {
-            ret = vpaes_set_decrypt_key(key, ctx->key_len * 8, &dat->ks);
+            ret = vpaes_set_decrypt_key(key, ctx->key_len * 8, &dat->ks.ks);
             dat->block = (block128_f) vpaes_decrypt;
             dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ?
                 (cbc128_f) vpaes_cbc_encrypt : NULL;
         } else
-#  endif
+# endif
         {
-            ret = AES_set_decrypt_key(key, ctx->key_len * 8, &dat->ks);
+            ret = AES_set_decrypt_key(key, ctx->key_len * 8, &dat->ks.ks);
             dat->block = (block128_f) AES_decrypt;
             dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ?
                 (cbc128_f) AES_cbc_encrypt : NULL;
     } else
-#  ifdef BSAES_CAPABLE
+# ifdef HWAES_CAPABLE
+    if (HWAES_CAPABLE) {
+        ret = HWAES_set_encrypt_key(key, ctx->key_len * 8, &dat->ks.ks);
+        dat->block = (block128_f) HWAES_encrypt;
+        dat->stream.cbc = NULL;
+#  ifdef HWAES_cbc_encrypt
+        if (mode == EVP_CIPH_CBC_MODE)
+            dat->stream.cbc = (cbc128_f) HWAES_cbc_encrypt;
+        else
+#  endif
+#  ifdef HWAES_ctr32_encrypt_blocks
+        if (mode == EVP_CIPH_CTR_MODE)
+            dat->stream.ctr = (ctr128_f) HWAES_ctr32_encrypt_blocks;
+        else
+#  endif
+            (void)0;            /* terminate potentially open 'else' */
+    } else
+# endif
+# ifdef BSAES_CAPABLE
     if (BSAES_CAPABLE && mode == EVP_CIPH_CTR_MODE) {
-        ret = AES_set_encrypt_key(key, ctx->key_len * 8, &dat->ks);
+        ret = AES_set_encrypt_key(key, ctx->key_len * 8, &dat->ks.ks);
         dat->block = (block128_f) AES_encrypt;
         dat->stream.ctr = (ctr128_f) bsaes_ctr32_encrypt_blocks;
     } else
-#  endif
-#  ifdef VPAES_CAPABLE
+# endif
+# ifdef VPAES_CAPABLE
     if (VPAES_CAPABLE) {
-        ret = vpaes_set_encrypt_key(key, ctx->key_len * 8, &dat->ks);
+        ret = vpaes_set_encrypt_key(key, ctx->key_len * 8, &dat->ks.ks);
         dat->block = (block128_f) vpaes_encrypt;
         dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ?
             (cbc128_f) vpaes_cbc_encrypt : NULL;
     } else
-#  endif
+# endif
     {
-        ret = AES_set_encrypt_key(key, ctx->key_len * 8, &dat->ks);
+        ret = AES_set_encrypt_key(key, ctx->key_len * 8, &dat->ks.ks);
         dat->block = (block128_f) AES_encrypt;
         dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ?
             (cbc128_f) AES_cbc_encrypt : NULL;
-#  ifdef AES_CTR_ASM
+# ifdef AES_CTR_ASM
         if (mode == EVP_CIPH_CTR_MODE)
             dat->stream.ctr = (ctr128_f) AES_ctr32_encrypt;
-#  endif
+# endif
     }
 
     if (ret < 0) {
@@ -544,7 +1023,7 @@ static int aes_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
     else if (ctx->encrypt)
         CRYPTO_cbc128_encrypt(in, out, len, &dat->ks, ctx->iv, dat->block);
     else
-        CRYPTO_cbc128_encrypt(in, out, len, &dat->ks, ctx->iv, dat->block);
+        CRYPTO_cbc128_decrypt(in, out, len, &dat->ks, ctx->iv, dat->block);
 
     return 1;
 }
@@ -680,11 +1159,6 @@ static int aes_gcm_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr)
     case EVP_CTRL_GCM_SET_IVLEN:
         if (arg <= 0)
             return 0;
-#  ifdef OPENSSL_FIPS
-        if (FIPS_module_mode() && !(c->flags & EVP_CIPH_FLAG_NON_FIPS_ALLOW)
-            && arg < 12)
-            return 0;
-#  endif
         /* Allocate memory for IV if needed */
         if ((arg > EVP_MAX_IV_LENGTH) && (arg > gctx->ivlen)) {
             if (gctx->iv != c->iv)
@@ -805,34 +1279,47 @@ static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
         return 1;
     if (key) {
         do {
-#  ifdef BSAES_CAPABLE
+# ifdef HWAES_CAPABLE
+            if (HWAES_CAPABLE) {
+                HWAES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks);
+                CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks,
+                                   (block128_f) HWAES_encrypt);
+#  ifdef HWAES_ctr32_encrypt_blocks
+                gctx->ctr = (ctr128_f) HWAES_ctr32_encrypt_blocks;
+#  else
+                gctx->ctr = NULL;
+#  endif
+                break;
+            } else
+# endif
+# ifdef BSAES_CAPABLE
             if (BSAES_CAPABLE) {
-                AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks);
+                AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks);
                 CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks,
                                    (block128_f) AES_encrypt);
                 gctx->ctr = (ctr128_f) bsaes_ctr32_encrypt_blocks;
                 break;
             } else
-#  endif
-#  ifdef VPAES_CAPABLE
+# endif
+# ifdef VPAES_CAPABLE
             if (VPAES_CAPABLE) {
-                vpaes_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks);
+                vpaes_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks);
                 CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks,
                                    (block128_f) vpaes_encrypt);
                 gctx->ctr = NULL;
                 break;
             } else
-#  endif
+# endif
                 (void)0;        /* terminate potentially open 'else' */
 
-            AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks);
+            AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks);
             CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks,
                                (block128_f) AES_encrypt);
-#  ifdef AES_CTR_ASM
+# ifdef AES_CTR_ASM
             gctx->ctr = (ctr128_f) AES_ctr32_encrypt;
-#  else
+# else
             gctx->ctr = NULL;
-#  endif
+# endif
         } while (0);
 
         /*
@@ -891,11 +1378,38 @@ static int aes_gcm_tls_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
     if (ctx->encrypt) {
         /* Encrypt payload */
         if (gctx->ctr) {
+            size_t bulk = 0;
+# if defined(AES_GCM_ASM)
+            if (len >= 32 && AES_GCM_ASM(gctx)) {
+                if (CRYPTO_gcm128_encrypt(&gctx->gcm, NULL, NULL, 0))
+                    return -1;
+
+                bulk = AES_gcm_encrypt(in, out, len,
+                                       gctx->gcm.key,
+                                       gctx->gcm.Yi.c, gctx->gcm.Xi.u);
+                gctx->gcm.len.u[1] += bulk;
+            }
+# endif
             if (CRYPTO_gcm128_encrypt_ctr32(&gctx->gcm,
-                                            in, out, len, gctx->ctr))
+                                            in + bulk,
+                                            out + bulk,
+                                            len - bulk, gctx->ctr))
                 goto err;
         } else {
-            if (CRYPTO_gcm128_encrypt(&gctx->gcm, in, out, len))
+            size_t bulk = 0;
+# if defined(AES_GCM_ASM2)
+            if (len >= 32 && AES_GCM_ASM2(gctx)) {
+                if (CRYPTO_gcm128_encrypt(&gctx->gcm, NULL, NULL, 0))
+                    return -1;
+
+                bulk = AES_gcm_encrypt(in, out, len,
+                                       gctx->gcm.key,
+                                       gctx->gcm.Yi.c, gctx->gcm.Xi.u);
+                gctx->gcm.len.u[1] += bulk;
+            }
+# endif
+            if (CRYPTO_gcm128_encrypt(&gctx->gcm,
+                                      in + bulk, out + bulk, len - bulk))
                 goto err;
         }
         out += len;
@@ -905,11 +1419,38 @@ static int aes_gcm_tls_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
     } else {
         /* Decrypt */
         if (gctx->ctr) {
+            size_t bulk = 0;
+# if defined(AES_GCM_ASM)
+            if (len >= 16 && AES_GCM_ASM(gctx)) {
+                if (CRYPTO_gcm128_decrypt(&gctx->gcm, NULL, NULL, 0))
+                    return -1;
+
+                bulk = AES_gcm_decrypt(in, out, len,
+                                       gctx->gcm.key,
+                                       gctx->gcm.Yi.c, gctx->gcm.Xi.u);
+                gctx->gcm.len.u[1] += bulk;
+            }
+# endif
             if (CRYPTO_gcm128_decrypt_ctr32(&gctx->gcm,
-                                            in, out, len, gctx->ctr))
+                                            in + bulk,
+                                            out + bulk,
+                                            len - bulk, gctx->ctr))
                 goto err;
         } else {
-            if (CRYPTO_gcm128_decrypt(&gctx->gcm, in, out, len))
+            size_t bulk = 0;
+# if defined(AES_GCM_ASM2)
+            if (len >= 16 && AES_GCM_ASM2(gctx)) {
+                if (CRYPTO_gcm128_decrypt(&gctx->gcm, NULL, NULL, 0))
+                    return -1;
+
+                bulk = AES_gcm_decrypt(in, out, len,
+                                       gctx->gcm.key,
+                                       gctx->gcm.Yi.c, gctx->gcm.Xi.u);
+                gctx->gcm.len.u[1] += bulk;
+            }
+# endif
+            if (CRYPTO_gcm128_decrypt(&gctx->gcm,
+                                      in + bulk, out + bulk, len - bulk))
                 goto err;
         }
         /* Retrieve tag */
@@ -947,20 +1488,90 @@ static int aes_gcm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
                 return -1;
         } else if (ctx->encrypt) {
             if (gctx->ctr) {
+                size_t bulk = 0;
+# if defined(AES_GCM_ASM)
+                if (len >= 32 && AES_GCM_ASM(gctx)) {
+                    size_t res = (16 - gctx->gcm.mres) % 16;
+
+                    if (CRYPTO_gcm128_encrypt(&gctx->gcm, in, out, res))
+                        return -1;
+
+                    bulk = AES_gcm_encrypt(in + res,
+                                           out + res, len - res,
+                                           gctx->gcm.key, gctx->gcm.Yi.c,
+                                           gctx->gcm.Xi.u);
+                    gctx->gcm.len.u[1] += bulk;
+                    bulk += res;
+                }
+# endif
                 if (CRYPTO_gcm128_encrypt_ctr32(&gctx->gcm,
-                                                in, out, len, gctx->ctr))
+                                                in + bulk,
+                                                out + bulk,
+                                                len - bulk, gctx->ctr))
                     return -1;
             } else {
-                if (CRYPTO_gcm128_encrypt(&gctx->gcm, in, out, len))
+                size_t bulk = 0;
+# if defined(AES_GCM_ASM2)
+                if (len >= 32 && AES_GCM_ASM2(gctx)) {
+                    size_t res = (16 - gctx->gcm.mres) % 16;
+
+                    if (CRYPTO_gcm128_encrypt(&gctx->gcm, in, out, res))
+                        return -1;
+
+                    bulk = AES_gcm_encrypt(in + res,
+                                           out + res, len - res,
+                                           gctx->gcm.key, gctx->gcm.Yi.c,
+                                           gctx->gcm.Xi.u);
+                    gctx->gcm.len.u[1] += bulk;
+                    bulk += res;
+                }
+# endif
+                if (CRYPTO_gcm128_encrypt(&gctx->gcm,
+                                          in + bulk, out + bulk, len - bulk))
                     return -1;
             }
         } else {
             if (gctx->ctr) {
+                size_t bulk = 0;
+# if defined(AES_GCM_ASM)
+                if (len >= 16 && AES_GCM_ASM(gctx)) {
+                    size_t res = (16 - gctx->gcm.mres) % 16;
+
+                    if (CRYPTO_gcm128_decrypt(&gctx->gcm, in, out, res))
+                        return -1;
+
+                    bulk = AES_gcm_decrypt(in + res,
+                                           out + res, len - res,
+                                           gctx->gcm.key,
+                                           gctx->gcm.Yi.c, gctx->gcm.Xi.u);
+                    gctx->gcm.len.u[1] += bulk;
+                    bulk += res;
+                }
+# endif
                 if (CRYPTO_gcm128_decrypt_ctr32(&gctx->gcm,
-                                                in, out, len, gctx->ctr))
+                                                in + bulk,
+                                                out + bulk,
+                                                len - bulk, gctx->ctr))
                     return -1;
             } else {
-                if (CRYPTO_gcm128_decrypt(&gctx->gcm, in, out, len))
+                size_t bulk = 0;
+# if defined(AES_GCM_ASM2)
+                if (len >= 16 && AES_GCM_ASM2(gctx)) {
+                    size_t res = (16 - gctx->gcm.mres) % 16;
+
+                    if (CRYPTO_gcm128_decrypt(&gctx->gcm, in, out, res))
+                        return -1;
+
+                    bulk = AES_gcm_decrypt(in + res,
+                                           out + res, len - res,
+                                           gctx->gcm.key,
+                                           gctx->gcm.Yi.c, gctx->gcm.Xi.u);
+                    gctx->gcm.len.u[1] += bulk;
+                    bulk += res;
+                }
+# endif
+                if (CRYPTO_gcm128_decrypt(&gctx->gcm,
+                                          in + bulk, out + bulk, len - bulk))
                     return -1;
             }
         }
@@ -983,7 +1594,7 @@ static int aes_gcm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
 
 }
 
-#  define CUSTOM_FLAGS    (EVP_CIPH_FLAG_DEFAULT_ASN1 \
+# define CUSTOM_FLAGS    (EVP_CIPH_FLAG_DEFAULT_ASN1 \
                 | EVP_CIPH_CUSTOM_IV | EVP_CIPH_FLAG_CUSTOM_CIPHER \
                 | EVP_CIPH_ALWAYS_CALL_INIT | EVP_CIPH_CTRL_INIT \
                 | EVP_CIPH_CUSTOM_COPY)
@@ -1032,47 +1643,69 @@ static int aes_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
 
     if (key)
         do {
-#  ifdef AES_XTS_ASM
+# ifdef AES_XTS_ASM
             xctx->stream = enc ? AES_xts_encrypt : AES_xts_decrypt;
-#  else
+# else
             xctx->stream = NULL;
-#  endif
+# endif
             /* key_len is two AES keys */
-#  ifdef BSAES_CAPABLE
+# ifdef HWAES_CAPABLE
+            if (HWAES_CAPABLE) {
+                if (enc) {
+                    HWAES_set_encrypt_key(key, ctx->key_len * 4,
+                                          &xctx->ks1.ks);
+                    xctx->xts.block1 = (block128_f) HWAES_encrypt;
+                } else {
+                    HWAES_set_decrypt_key(key, ctx->key_len * 4,
+                                          &xctx->ks1.ks);
+                    xctx->xts.block1 = (block128_f) HWAES_decrypt;
+                }
+
+                HWAES_set_encrypt_key(key + ctx->key_len / 2,
+                                      ctx->key_len * 4, &xctx->ks2.ks);
+                xctx->xts.block2 = (block128_f) HWAES_encrypt;
+
+                xctx->xts.key1 = &xctx->ks1;
+                break;
+            } else
+# endif
+# ifdef BSAES_CAPABLE
             if (BSAES_CAPABLE)
                 xctx->stream = enc ? bsaes_xts_encrypt : bsaes_xts_decrypt;
             else
-#  endif
-#  ifdef VPAES_CAPABLE
+# endif
+# ifdef VPAES_CAPABLE
             if (VPAES_CAPABLE) {
                 if (enc) {
-                    vpaes_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1);
+                    vpaes_set_encrypt_key(key, ctx->key_len * 4,
+                                          &xctx->ks1.ks);
                     xctx->xts.block1 = (block128_f) vpaes_encrypt;
                 } else {
-                    vpaes_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1);
+                    vpaes_set_decrypt_key(key, ctx->key_len * 4,
+                                          &xctx->ks1.ks);
                     xctx->xts.block1 = (block128_f) vpaes_decrypt;
                 }
 
                 vpaes_set_encrypt_key(key + ctx->key_len / 2,
-                                      ctx->key_len * 4, &xctx->ks2);
+                                      ctx->key_len * 4, &xctx->ks2.ks);
                 xctx->xts.block2 = (block128_f) vpaes_encrypt;
 
                 xctx->xts.key1 = &xctx->ks1;
                 break;
             } else
-#  endif
+# endif
                 (void)0;        /* terminate potentially open 'else' */
 
             if (enc) {
-                AES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1);
+                AES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
                 xctx->xts.block1 = (block128_f) AES_encrypt;
             } else {
-                AES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1);
+                AES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
                 xctx->xts.block1 = (block128_f) AES_decrypt;
             }
 
             AES_set_encrypt_key(key + ctx->key_len / 2,
-                                ctx->key_len * 4, &xctx->ks2);
+                                ctx->key_len * 4, &xctx->ks2.ks);
             xctx->xts.block2 = (block128_f) AES_encrypt;
 
             xctx->xts.key1 = &xctx->ks1;
@@ -1094,14 +1727,6 @@ static int aes_xts_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
         return 0;
     if (!out || !in || len < AES_BLOCK_SIZE)
         return 0;
-#  ifdef OPENSSL_FIPS
-    /* Requirement of SP800-38E */
-    if (FIPS_module_mode() && !(ctx->flags & EVP_CIPH_FLAG_NON_FIPS_ALLOW) &&
-        (len > (1UL << 20) * 16)) {
-        EVPerr(EVP_F_AES_XTS_CIPHER, EVP_R_TOO_LARGE);
-        return 0;
-    }
-#  endif
     if (xctx->stream)
         (*xctx->stream) (in, out, len,
                          xctx->xts.key1, xctx->xts.key2, ctx->iv);
@@ -1111,9 +1736,9 @@ static int aes_xts_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
     return 1;
 }
 
-#  define aes_xts_cleanup NULL
+# define aes_xts_cleanup NULL
 
-#  define XTS_FLAGS       (EVP_CIPH_FLAG_DEFAULT_ASN1 | EVP_CIPH_CUSTOM_IV \
+# define XTS_FLAGS       (EVP_CIPH_FLAG_DEFAULT_ASN1 | EVP_CIPH_CUSTOM_IV \
                          | EVP_CIPH_ALWAYS_CALL_INIT | EVP_CIPH_CTRL_INIT \
                          | EVP_CIPH_CUSTOM_COPY)
 
@@ -1191,17 +1816,28 @@ static int aes_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
         return 1;
     if (key)
         do {
-#  ifdef VPAES_CAPABLE
+# ifdef HWAES_CAPABLE
+            if (HWAES_CAPABLE) {
+                HWAES_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks);
+
+                CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L,
+                                   &cctx->ks, (block128_f) HWAES_encrypt);
+                cctx->str = NULL;
+                cctx->key_set = 1;
+                break;
+            } else
+# endif
+# ifdef VPAES_CAPABLE
             if (VPAES_CAPABLE) {
-                vpaes_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks);
+                vpaes_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks);
                 CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L,
                                    &cctx->ks, (block128_f) vpaes_encrypt);
                 cctx->str = NULL;
                 cctx->key_set = 1;
                 break;
             }
-#  endif
-            AES_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks);
+# endif
+            AES_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks);
             CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L,
                                &cctx->ks, (block128_f) AES_encrypt);
             cctx->str = NULL;
@@ -1274,7 +1910,7 @@ static int aes_ccm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
 
 }
 
-#  define aes_ccm_cleanup NULL
+# define aes_ccm_cleanup NULL
 
 BLOCK_CIPHER_custom(NID_aes, 128, 1, 12, ccm, CCM,
                     EVP_CIPH_FLAG_FIPS | CUSTOM_FLAGS)
@@ -1282,5 +1918,107 @@ BLOCK_CIPHER_custom(NID_aes, 128, 1, 12, ccm, CCM,
                     EVP_CIPH_FLAG_FIPS | CUSTOM_FLAGS)
     BLOCK_CIPHER_custom(NID_aes, 256, 1, 12, ccm, CCM,
                     EVP_CIPH_FLAG_FIPS | CUSTOM_FLAGS)
-# endif
 #endif
+typedef struct {
+    union {
+        double align;
+        AES_KEY ks;
+    } ks;
+    /* Indicates if IV has been set */
+    unsigned char *iv;
+} EVP_AES_WRAP_CTX;
+
+static int aes_wrap_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
+                             const unsigned char *iv, int enc)
+{
+    EVP_AES_WRAP_CTX *wctx = ctx->cipher_data;
+    if (!iv && !key)
+        return 1;
+    if (key) {
+        if (ctx->encrypt)
+            AES_set_encrypt_key(key, ctx->key_len * 8, &wctx->ks.ks);
+        else
+            AES_set_decrypt_key(key, ctx->key_len * 8, &wctx->ks.ks);
+        if (!iv)
+            wctx->iv = NULL;
+    }
+    if (iv) {
+        memcpy(ctx->iv, iv, 8);
+        wctx->iv = ctx->iv;
+    }
+    return 1;
+}
+
+static int aes_wrap_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
+                           const unsigned char *in, size_t inlen)
+{
+    EVP_AES_WRAP_CTX *wctx = ctx->cipher_data;
+    size_t rv;
+    if (!in)
+        return 0;
+    if (inlen % 8)
+        return -1;
+    if (ctx->encrypt && inlen < 8)
+        return -1;
+    if (!ctx->encrypt && inlen < 16)
+        return -1;
+    if (!out) {
+        if (ctx->encrypt)
+            return inlen + 8;
+        else
+            return inlen - 8;
+    }
+    if (ctx->encrypt)
+        rv = CRYPTO_128_wrap(&wctx->ks.ks, wctx->iv, out, in, inlen,
+                             (block128_f) AES_encrypt);
+    else
+        rv = CRYPTO_128_unwrap(&wctx->ks.ks, wctx->iv, out, in, inlen,
+                               (block128_f) AES_decrypt);
+    return rv ? (int)rv : -1;
+}
+
+#define WRAP_FLAGS      (EVP_CIPH_WRAP_MODE \
+                | EVP_CIPH_CUSTOM_IV | EVP_CIPH_FLAG_CUSTOM_CIPHER \
+                | EVP_CIPH_ALWAYS_CALL_INIT | EVP_CIPH_FLAG_DEFAULT_ASN1)
+
+static const EVP_CIPHER aes_128_wrap = {
+    NID_id_aes128_wrap,
+    8, 16, 8, WRAP_FLAGS,
+    aes_wrap_init_key, aes_wrap_cipher,
+    NULL,
+    sizeof(EVP_AES_WRAP_CTX),
+    NULL, NULL, NULL, NULL
+};
+
+const EVP_CIPHER *EVP_aes_128_wrap(void)
+{
+    return &aes_128_wrap;
+}
+
+static const EVP_CIPHER aes_192_wrap = {
+    NID_id_aes192_wrap,
+    8, 24, 8, WRAP_FLAGS,
+    aes_wrap_init_key, aes_wrap_cipher,
+    NULL,
+    sizeof(EVP_AES_WRAP_CTX),
+    NULL, NULL, NULL, NULL
+};
+
+const EVP_CIPHER *EVP_aes_192_wrap(void)
+{
+    return &aes_192_wrap;
+}
+
+static const EVP_CIPHER aes_256_wrap = {
+    NID_id_aes256_wrap,
+    8, 32, 8, WRAP_FLAGS,
+    aes_wrap_init_key, aes_wrap_cipher,
+    NULL,
+    sizeof(EVP_AES_WRAP_CTX),
+    NULL, NULL, NULL, NULL
+};
+
+const EVP_CIPHER *EVP_aes_256_wrap(void)
+{
+    return &aes_256_wrap;
+}
index d1f5928..8330964 100644 (file)
@@ -58,7 +58,8 @@
 # include <openssl/objects.h>
 # include <openssl/aes.h>
 # include <openssl/sha.h>
-# include "evp_locl.h"
+# include <openssl/rand.h>
+# include "modes_lcl.h"
 
 # ifndef EVP_CIPH_FLAG_AEAD_CIPHER
 #  define EVP_CIPH_FLAG_AEAD_CIPHER       0x200000
 #  define EVP_CIPH_FLAG_DEFAULT_ASN1 0
 # endif
 
+# if !defined(EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK)
+#  define EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK 0
+# endif
+
 # define TLS1_1_VERSION 0x0302
 
 typedef struct {
@@ -89,11 +94,7 @@ typedef struct {
         defined(_M_AMD64)       || defined(_M_X64)      || \
         defined(__INTEL__)      )
 
-#  if defined(__GNUC__) && __GNUC__>=2 && !defined(PEDANTIC)
-#   define BSWAP(x) ({ unsigned int r=(x); asm ("bswapl %0":"=r"(r):"0"(r)); r; })
-#  endif
-
-extern unsigned int OPENSSL_ia32cap_P[2];
+extern unsigned int OPENSSL_ia32cap_P[];
 #  define AESNI_CAPABLE   (1<<(57-32))
 
 int aesni_set_encrypt_key(const unsigned char *userKey, int bits,
@@ -110,6 +111,10 @@ void aesni_cbc_sha1_enc(const void *inp, void *out, size_t blocks,
                         const AES_KEY *key, unsigned char iv[16],
                         SHA_CTX *ctx, const void *in0);
 
+void aesni256_cbc_sha1_dec(const void *inp, void *out, size_t blocks,
+                           const AES_KEY *key, unsigned char iv[16],
+                           SHA_CTX *ctx, const void *in0);
+
 #  define data(ctx) ((EVP_AES_HMAC_SHA1 *)(ctx)->cipher_data)
 
 static int aesni_cbc_hmac_sha1_init_key(EVP_CIPHER_CTX *ctx,
@@ -134,6 +139,7 @@ static int aesni_cbc_hmac_sha1_init_key(EVP_CIPHER_CTX *ctx,
 }
 
 #  define STITCHED_CALL
+#  undef  STITCHED_DECRYPT_CALL
 
 #  if !defined(STITCHED_CALL)
 #   define aes_off 0
@@ -177,6 +183,275 @@ static void sha1_update(SHA_CTX *c, const void *data, size_t len)
 #  endif
 #  define SHA1_Update sha1_update
 
+#  if !defined(OPENSSL_NO_MULTIBLOCK) && EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK
+
+typedef struct {
+    unsigned int A[8], B[8], C[8], D[8], E[8];
+} SHA1_MB_CTX;
+typedef struct {
+    const unsigned char *ptr;
+    int blocks;
+} HASH_DESC;
+
+void sha1_multi_block(SHA1_MB_CTX *, const HASH_DESC *, int);
+
+typedef struct {
+    const unsigned char *inp;
+    unsigned char *out;
+    int blocks;
+    u64 iv[2];
+} CIPH_DESC;
+
+void aesni_multi_cbc_encrypt(CIPH_DESC *, void *, int);
+
+static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA1 *key,
+                                         unsigned char *out,
+                                         const unsigned char *inp,
+                                         size_t inp_len, int n4x)
+{                               /* n4x is 1 or 2 */
+    HASH_DESC hash_d[8], edges[8];
+    CIPH_DESC ciph_d[8];
+    unsigned char storage[sizeof(SHA1_MB_CTX) + 32];
+    union {
+        u64 q[16];
+        u32 d[32];
+        u8 c[128];
+    } blocks[8];
+    SHA1_MB_CTX *ctx;
+    unsigned int frag, last, packlen, i, x4 = 4 * n4x, minblocks, processed =
+        0;
+    size_t ret = 0;
+    u8 *IVs;
+#   if defined(BSWAP8)
+    u64 seqnum;
+#   endif
+
+    /* ask for IVs in bulk */
+    if (RAND_bytes((IVs = blocks[0].c), 16 * x4) <= 0)
+        return 0;
+
+    ctx = (SHA1_MB_CTX *) (storage + 32 - ((size_t)storage % 32)); /* align */
+
+    frag = (unsigned int)inp_len >> (1 + n4x);
+    last = (unsigned int)inp_len + frag - (frag << (1 + n4x));
+    if (last > frag && ((last + 13 + 9) % 64) < (x4 - 1)) {
+        frag++;
+        last -= x4 - 1;
+    }
+
+    packlen = 5 + 16 + ((frag + 20 + 16) & -16);
+
+    /* populate descriptors with pointers and IVs */
+    hash_d[0].ptr = inp;
+    ciph_d[0].inp = inp;
+    /* 5+16 is place for header and explicit IV */
+    ciph_d[0].out = out + 5 + 16;
+    memcpy(ciph_d[0].out - 16, IVs, 16);
+    memcpy(ciph_d[0].iv, IVs, 16);
+    IVs += 16;
+
+    for (i = 1; i < x4; i++) {
+        ciph_d[i].inp = hash_d[i].ptr = hash_d[i - 1].ptr + frag;
+        ciph_d[i].out = ciph_d[i - 1].out + packlen;
+        memcpy(ciph_d[i].out - 16, IVs, 16);
+        memcpy(ciph_d[i].iv, IVs, 16);
+        IVs += 16;
+    }
+
+#   if defined(BSWAP8)
+    memcpy(blocks[0].c, key->md.data, 8);
+    seqnum = BSWAP8(blocks[0].q[0]);
+#   endif
+    for (i = 0; i < x4; i++) {
+        unsigned int len = (i == (x4 - 1) ? last : frag);
+#   if !defined(BSWAP8)
+        unsigned int carry, j;
+#   endif
+
+        ctx->A[i] = key->md.h0;
+        ctx->B[i] = key->md.h1;
+        ctx->C[i] = key->md.h2;
+        ctx->D[i] = key->md.h3;
+        ctx->E[i] = key->md.h4;
+
+        /* fix seqnum */
+#   if defined(BSWAP8)
+        blocks[i].q[0] = BSWAP8(seqnum + i);
+#   else
+        for (carry = i, j = 8; j--;) {
+            blocks[i].c[j] = ((u8 *)key->md.data)[j] + carry;
+            carry = (blocks[i].c[j] - carry) >> (sizeof(carry) * 8 - 1);
+        }
+#   endif
+        blocks[i].c[8] = ((u8 *)key->md.data)[8];
+        blocks[i].c[9] = ((u8 *)key->md.data)[9];
+        blocks[i].c[10] = ((u8 *)key->md.data)[10];
+        /* fix length */
+        blocks[i].c[11] = (u8)(len >> 8);
+        blocks[i].c[12] = (u8)(len);
+
+        memcpy(blocks[i].c + 13, hash_d[i].ptr, 64 - 13);
+        hash_d[i].ptr += 64 - 13;
+        hash_d[i].blocks = (len - (64 - 13)) / 64;
+
+        edges[i].ptr = blocks[i].c;
+        edges[i].blocks = 1;
+    }
+
+    /* hash 13-byte headers and first 64-13 bytes of inputs */
+    sha1_multi_block(ctx, edges, n4x);
+    /* hash bulk inputs */
+#   define MAXCHUNKSIZE    2048
+#   if     MAXCHUNKSIZE%64
+#    error  "MAXCHUNKSIZE is not divisible by 64"
+#   elif   MAXCHUNKSIZE
+    /*
+     * goal is to minimize pressure on L1 cache by moving in shorter steps,
+     * so that hashed data is still in the cache by the time we encrypt it
+     */
+    minblocks = ((frag <= last ? frag : last) - (64 - 13)) / 64;
+    if (minblocks > MAXCHUNKSIZE / 64) {
+        for (i = 0; i < x4; i++) {
+            edges[i].ptr = hash_d[i].ptr;
+            edges[i].blocks = MAXCHUNKSIZE / 64;
+            ciph_d[i].blocks = MAXCHUNKSIZE / 16;
+        }
+        do {
+            sha1_multi_block(ctx, edges, n4x);
+            aesni_multi_cbc_encrypt(ciph_d, &key->ks, n4x);
+
+            for (i = 0; i < x4; i++) {
+                edges[i].ptr = hash_d[i].ptr += MAXCHUNKSIZE;
+                hash_d[i].blocks -= MAXCHUNKSIZE / 64;
+                edges[i].blocks = MAXCHUNKSIZE / 64;
+                ciph_d[i].inp += MAXCHUNKSIZE;
+                ciph_d[i].out += MAXCHUNKSIZE;
+                ciph_d[i].blocks = MAXCHUNKSIZE / 16;
+                memcpy(ciph_d[i].iv, ciph_d[i].out - 16, 16);
+            }
+            processed += MAXCHUNKSIZE;
+            minblocks -= MAXCHUNKSIZE / 64;
+        } while (minblocks > MAXCHUNKSIZE / 64);
+    }
+#   endif
+#   undef  MAXCHUNKSIZE
+    sha1_multi_block(ctx, hash_d, n4x);
+
+    memset(blocks, 0, sizeof(blocks));
+    for (i = 0; i < x4; i++) {
+        unsigned int len = (i == (x4 - 1) ? last : frag),
+            off = hash_d[i].blocks * 64;
+        const unsigned char *ptr = hash_d[i].ptr + off;
+
+        off = (len - processed) - (64 - 13) - off; /* remainder actually */
+        memcpy(blocks[i].c, ptr, off);
+        blocks[i].c[off] = 0x80;
+        len += 64 + 13;         /* 64 is HMAC header */
+        len *= 8;               /* convert to bits */
+        if (off < (64 - 8)) {
+#   ifdef BSWAP4
+            blocks[i].d[15] = BSWAP4(len);
+#   else
+            PUTU32(blocks[i].c + 60, len);
+#   endif
+            edges[i].blocks = 1;
+        } else {
+#   ifdef BSWAP4
+            blocks[i].d[31] = BSWAP4(len);
+#   else
+            PUTU32(blocks[i].c + 124, len);
+#   endif
+            edges[i].blocks = 2;
+        }
+        edges[i].ptr = blocks[i].c;
+    }
+
+    /* hash input tails and finalize */
+    sha1_multi_block(ctx, edges, n4x);
+
+    memset(blocks, 0, sizeof(blocks));
+    for (i = 0; i < x4; i++) {
+#   ifdef BSWAP4
+        blocks[i].d[0] = BSWAP4(ctx->A[i]);
+        ctx->A[i] = key->tail.h0;
+        blocks[i].d[1] = BSWAP4(ctx->B[i]);
+        ctx->B[i] = key->tail.h1;
+        blocks[i].d[2] = BSWAP4(ctx->C[i]);
+        ctx->C[i] = key->tail.h2;
+        blocks[i].d[3] = BSWAP4(ctx->D[i]);
+        ctx->D[i] = key->tail.h3;
+        blocks[i].d[4] = BSWAP4(ctx->E[i]);
+        ctx->E[i] = key->tail.h4;
+        blocks[i].c[20] = 0x80;
+        blocks[i].d[15] = BSWAP4((64 + 20) * 8);
+#   else
+        PUTU32(blocks[i].c + 0, ctx->A[i]);
+        ctx->A[i] = key->tail.h0;
+        PUTU32(blocks[i].c + 4, ctx->B[i]);
+        ctx->B[i] = key->tail.h1;
+        PUTU32(blocks[i].c + 8, ctx->C[i]);
+        ctx->C[i] = key->tail.h2;
+        PUTU32(blocks[i].c + 12, ctx->D[i]);
+        ctx->D[i] = key->tail.h3;
+        PUTU32(blocks[i].c + 16, ctx->E[i]);
+        ctx->E[i] = key->tail.h4;
+        blocks[i].c[20] = 0x80;
+        PUTU32(blocks[i].c + 60, (64 + 20) * 8);
+#   endif
+        edges[i].ptr = blocks[i].c;
+        edges[i].blocks = 1;
+    }
+
+    /* finalize MACs */
+    sha1_multi_block(ctx, edges, n4x);
+
+    for (i = 0; i < x4; i++) {
+        unsigned int len = (i == (x4 - 1) ? last : frag), pad, j;
+        unsigned char *out0 = out;
+
+        memcpy(ciph_d[i].out, ciph_d[i].inp, len - processed);
+        ciph_d[i].inp = ciph_d[i].out;
+
+        out += 5 + 16 + len;
+
+        /* write MAC */
+        PUTU32(out + 0, ctx->A[i]);
+        PUTU32(out + 4, ctx->B[i]);
+        PUTU32(out + 8, ctx->C[i]);
+        PUTU32(out + 12, ctx->D[i]);
+        PUTU32(out + 16, ctx->E[i]);
+        out += 20;
+        len += 20;
+
+        /* pad */
+        pad = 15 - len % 16;
+        for (j = 0; j <= pad; j++)
+            *(out++) = pad;
+        len += pad + 1;
+
+        ciph_d[i].blocks = (len - processed) / 16;
+        len += 16;              /* account for explicit iv */
+
+        /* arrange header */
+        out0[0] = ((u8 *)key->md.data)[8];
+        out0[1] = ((u8 *)key->md.data)[9];
+        out0[2] = ((u8 *)key->md.data)[10];
+        out0[3] = (u8)(len >> 8);
+        out0[4] = (u8)(len);
+
+        ret += len + 5;
+        inp += frag;
+    }
+
+    aesni_multi_cbc_encrypt(ciph_d, &key->ks, n4x);
+
+    OPENSSL_cleanse(blocks, sizeof(blocks));
+    OPENSSL_cleanse(ctx, sizeof(*ctx));
+
+    return ret;
+}
+#  endif
+
 static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
                                       const unsigned char *in, size_t len)
 {
@@ -257,10 +532,7 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
         /* arrange cache line alignment */
         pmac = (void *)(((size_t)mac.c + 31) & ((size_t)0 - 32));
 
-        /* decrypt HMAC|padding at once */
-        aesni_cbc_encrypt(in, out, len, &key->ks, ctx->iv, 0);
-
-        if (plen) {             /* "TLS" mode of operation */
+        if (plen != NO_PAYLOAD_LENGTH) { /* "TLS" mode of operation */
             size_t inp_len, mask, j, i;
             unsigned int res, maxpad, pad, bitlen;
             int ret = 1;
@@ -268,17 +540,37 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
                 unsigned int u[SHA_LBLOCK];
                 unsigned char c[SHA_CBLOCK];
             } *data = (void *)key->md.data;
+#  if defined(STITCHED_DECRYPT_CALL)
+            unsigned char tail_iv[AES_BLOCK_SIZE];
+            int stitch = 0;
+#  endif
 
             if ((key->aux.tls_aad[plen - 4] << 8 | key->aux.tls_aad[plen - 3])
-                >= TLS1_1_VERSION)
-                iv = AES_BLOCK_SIZE;
-
-            if (len < (iv + SHA_DIGEST_LENGTH + 1))
+                >= TLS1_1_VERSION) {
+                if (len < (AES_BLOCK_SIZE + SHA_DIGEST_LENGTH + 1))
+                    return 0;
+
+                /* omit explicit iv */
+                memcpy(ctx->iv, in, AES_BLOCK_SIZE);
+                in += AES_BLOCK_SIZE;
+                out += AES_BLOCK_SIZE;
+                len -= AES_BLOCK_SIZE;
+            } else if (len < (SHA_DIGEST_LENGTH + 1))
                 return 0;
 
-            /* omit explicit iv */
-            out += iv;
-            len -= iv;
+#  if defined(STITCHED_DECRYPT_CALL)
+            if (len >= 1024 && ctx->key_len == 32) {
+                /* decrypt last block */
+                memcpy(tail_iv, in + len - 2 * AES_BLOCK_SIZE,
+                       AES_BLOCK_SIZE);
+                aesni_cbc_encrypt(in + len - AES_BLOCK_SIZE,
+                                  out + len - AES_BLOCK_SIZE, AES_BLOCK_SIZE,
+                                  &key->ks, tail_iv, 0);
+                stitch = 1;
+            } else
+#  endif
+                /* decrypt HMAC|padding at once */
+                aesni_cbc_encrypt(in, out, len, &key->ks, ctx->iv, 0);
 
             /* figure out payload length */
             pad = out[len - 1];
@@ -298,6 +590,29 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
             key->md = key->head;
             SHA1_Update(&key->md, key->aux.tls_aad, plen);
 
+#  if defined(STITCHED_DECRYPT_CALL)
+            if (stitch) {
+                blocks = (len - (256 + 32 + SHA_CBLOCK)) / SHA_CBLOCK;
+                aes_off = len - AES_BLOCK_SIZE - blocks * SHA_CBLOCK;
+                sha_off = SHA_CBLOCK - plen;
+
+                aesni_cbc_encrypt(in, out, aes_off, &key->ks, ctx->iv, 0);
+
+                SHA1_Update(&key->md, out, sha_off);
+                aesni256_cbc_sha1_dec(in + aes_off,
+                                      out + aes_off, blocks, &key->ks,
+                                      ctx->iv, &key->md, out + sha_off);
+
+                sha_off += blocks *= SHA_CBLOCK;
+                out += sha_off;
+                len -= sha_off;
+                inp_len -= sha_off;
+
+                key->md.Nl += (blocks << 3); /* at most 18 bits */
+                memcpy(ctx->iv, tail_iv, AES_BLOCK_SIZE);
+            }
+#  endif
+
 #  if 1
             len -= SHA_DIGEST_LENGTH; /* amend mac */
             if (len >= (256 + SHA_CBLOCK)) {
@@ -311,8 +626,8 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
 
             /* but pretend as if we hashed padded payload */
             bitlen = key->md.Nl + (inp_len << 3); /* at most 18 bits */
-#   ifdef BSWAP
-            bitlen = BSWAP(bitlen);
+#   ifdef BSWAP4
+            bitlen = BSWAP4(bitlen);
 #   else
             mac.c[0] = 0;
             mac.c[1] = (unsigned char)(bitlen >> 16);
@@ -376,12 +691,12 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
             pmac->u[3] |= key->md.h3 & mask;
             pmac->u[4] |= key->md.h4 & mask;
 
-#   ifdef BSWAP
-            pmac->u[0] = BSWAP(pmac->u[0]);
-            pmac->u[1] = BSWAP(pmac->u[1]);
-            pmac->u[2] = BSWAP(pmac->u[2]);
-            pmac->u[3] = BSWAP(pmac->u[3]);
-            pmac->u[4] = BSWAP(pmac->u[4]);
+#   ifdef BSWAP4
+            pmac->u[0] = BSWAP4(pmac->u[0]);
+            pmac->u[1] = BSWAP4(pmac->u[1]);
+            pmac->u[2] = BSWAP4(pmac->u[2]);
+            pmac->u[3] = BSWAP4(pmac->u[3]);
+            pmac->u[4] = BSWAP4(pmac->u[4]);
 #   else
             for (i = 0; i < 5; i++) {
                 res = pmac->u[i];
@@ -458,6 +773,33 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
 #  endif
             return ret;
         } else {
+#  if defined(STITCHED_DECRYPT_CALL)
+            if (len >= 1024 && ctx->key_len == 32) {
+                if (sha_off %= SHA_CBLOCK)
+                    blocks = (len - 3 * SHA_CBLOCK) / SHA_CBLOCK;
+                else
+                    blocks = (len - 2 * SHA_CBLOCK) / SHA_CBLOCK;
+                aes_off = len - blocks * SHA_CBLOCK;
+
+                aesni_cbc_encrypt(in, out, aes_off, &key->ks, ctx->iv, 0);
+                SHA1_Update(&key->md, out, sha_off);
+                aesni256_cbc_sha1_dec(in + aes_off,
+                                      out + aes_off, blocks, &key->ks,
+                                      ctx->iv, &key->md, out + sha_off);
+
+                sha_off += blocks *= SHA_CBLOCK;
+                out += sha_off;
+                len -= sha_off;
+
+                key->md.Nh += blocks >> 29;
+                key->md.Nl += blocks <<= 3;
+                if (key->md.Nl < (unsigned int)blocks)
+                    key->md.Nh++;
+            } else
+#  endif
+                /* decrypt HMAC|padding at once */
+                aesni_cbc_encrypt(in, out, len, &key->ks, ctx->iv, 0);
+
             SHA1_Update(&key->md, out, len);
         }
     }
@@ -531,6 +873,70 @@ static int aesni_cbc_hmac_sha1_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg,
                 return SHA_DIGEST_LENGTH;
             }
         }
+#  if !defined(OPENSSL_NO_MULTIBLOCK) && EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK
+    case EVP_CTRL_TLS1_1_MULTIBLOCK_MAX_BUFSIZE:
+        return (int)(5 + 16 + ((arg + 20 + 16) & -16));
+    case EVP_CTRL_TLS1_1_MULTIBLOCK_AAD:
+        {
+            EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *param =
+                (EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *) ptr;
+            unsigned int n4x = 1, x4;
+            unsigned int frag, last, packlen, inp_len;
+
+            if (arg < (int)sizeof(EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM))
+                return -1;
+
+            inp_len = param->inp[11] << 8 | param->inp[12];
+
+            if (ctx->encrypt) {
+                if ((param->inp[9] << 8 | param->inp[10]) < TLS1_1_VERSION)
+                    return -1;
+
+                if (inp_len) {
+                    if (inp_len < 4096)
+                        return 0; /* too short */
+
+                    if (inp_len >= 8192 && OPENSSL_ia32cap_P[2] & (1 << 5))
+                        n4x = 2; /* AVX2 */
+                } else if ((n4x = param->interleave / 4) && n4x <= 2)
+                    inp_len = param->len;
+                else
+                    return -1;
+
+                key->md = key->head;
+                SHA1_Update(&key->md, param->inp, 13);
+
+                x4 = 4 * n4x;
+                n4x += 1;
+
+                frag = inp_len >> n4x;
+                last = inp_len + frag - (frag << n4x);
+                if (last > frag && ((last + 13 + 9) % 64 < (x4 - 1))) {
+                    frag++;
+                    last -= x4 - 1;
+                }
+
+                packlen = 5 + 16 + ((frag + 20 + 16) & -16);
+                packlen = (packlen << n4x) - packlen;
+                packlen += 5 + 16 + ((last + 20 + 16) & -16);
+
+                param->interleave = x4;
+
+                return (int)packlen;
+            } else
+                return -1;      /* not yet */
+        }
+    case EVP_CTRL_TLS1_1_MULTIBLOCK_ENCRYPT:
+        {
+            EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *param =
+                (EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *) ptr;
+
+            return (int)tls1_1_multi_block_encrypt(key, param->out,
+                                                   param->inp, param->len,
+                                                   param->interleave / 4);
+        }
+    case EVP_CTRL_TLS1_1_MULTIBLOCK_DECRYPT:
+#  endif
     default:
         return -1;
     }
@@ -544,7 +950,7 @@ static EVP_CIPHER aesni_128_cbc_hmac_sha1_cipher = {
 #  endif
     16, 16, 16,
     EVP_CIPH_CBC_MODE | EVP_CIPH_FLAG_DEFAULT_ASN1 |
-        EVP_CIPH_FLAG_AEAD_CIPHER,
+        EVP_CIPH_FLAG_AEAD_CIPHER | EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK,
     aesni_cbc_hmac_sha1_init_key,
     aesni_cbc_hmac_sha1_cipher,
     NULL,
@@ -563,7 +969,7 @@ static EVP_CIPHER aesni_256_cbc_hmac_sha1_cipher = {
 #  endif
     16, 32, 16,
     EVP_CIPH_CBC_MODE | EVP_CIPH_FLAG_DEFAULT_ASN1 |
-        EVP_CIPH_FLAG_AEAD_CIPHER,
+        EVP_CIPH_FLAG_AEAD_CIPHER | EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK,
     aesni_cbc_hmac_sha1_init_key,
     aesni_cbc_hmac_sha1_cipher,
     NULL,
diff --git a/crypto/evp/e_aes_cbc_hmac_sha256.c b/crypto/evp/e_aes_cbc_hmac_sha256.c
new file mode 100644 (file)
index 0000000..b1c586e
--- /dev/null
@@ -0,0 +1,973 @@
+/* ====================================================================
+ * Copyright (c) 2011-2013 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    licensing@OpenSSL.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ */
+
+#include <openssl/opensslconf.h>
+
+#include <stdio.h>
+#include <string.h>
+
+#if !defined(OPENSSL_NO_AES) && !defined(OPENSSL_NO_SHA256)
+
+# include <openssl/evp.h>
+# include <openssl/objects.h>
+# include <openssl/aes.h>
+# include <openssl/sha.h>
+# include <openssl/rand.h>
+# include "modes_lcl.h"
+
+# ifndef EVP_CIPH_FLAG_AEAD_CIPHER
+#  define EVP_CIPH_FLAG_AEAD_CIPHER       0x200000
+#  define EVP_CTRL_AEAD_TLS1_AAD          0x16
+#  define EVP_CTRL_AEAD_SET_MAC_KEY       0x17
+# endif
+
+# if !defined(EVP_CIPH_FLAG_DEFAULT_ASN1)
+#  define EVP_CIPH_FLAG_DEFAULT_ASN1 0
+# endif
+
+# if !defined(EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK)
+#  define EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK 0
+# endif
+
+# define TLS1_1_VERSION 0x0302
+
+typedef struct {
+    AES_KEY ks;
+    SHA256_CTX head, tail, md;
+    size_t payload_length;      /* AAD length in decrypt case */
+    union {
+        unsigned int tls_ver;
+        unsigned char tls_aad[16]; /* 13 used */
+    } aux;
+} EVP_AES_HMAC_SHA256;
+
+# define NO_PAYLOAD_LENGTH       ((size_t)-1)
+
+# if     defined(AES_ASM) &&     ( \
+        defined(__x86_64)       || defined(__x86_64__)  || \
+        defined(_M_AMD64)       || defined(_M_X64)      || \
+        defined(__INTEL__)      )
+
+extern unsigned int OPENSSL_ia32cap_P[];
+#  define AESNI_CAPABLE   (1<<(57-32))
+
+int aesni_set_encrypt_key(const unsigned char *userKey, int bits,
+                          AES_KEY *key);
+int aesni_set_decrypt_key(const unsigned char *userKey, int bits,
+                          AES_KEY *key);
+
+void aesni_cbc_encrypt(const unsigned char *in,
+                       unsigned char *out,
+                       size_t length,
+                       const AES_KEY *key, unsigned char *ivec, int enc);
+
+int aesni_cbc_sha256_enc(const void *inp, void *out, size_t blocks,
+                         const AES_KEY *key, unsigned char iv[16],
+                         SHA256_CTX *ctx, const void *in0);
+
+#  define data(ctx) ((EVP_AES_HMAC_SHA256 *)(ctx)->cipher_data)
+
+static int aesni_cbc_hmac_sha256_init_key(EVP_CIPHER_CTX *ctx,
+                                          const unsigned char *inkey,
+                                          const unsigned char *iv, int enc)
+{
+    EVP_AES_HMAC_SHA256 *key = data(ctx);
+    int ret;
+
+    if (enc)
+        memset(&key->ks, 0, sizeof(key->ks.rd_key)),
+            ret = aesni_set_encrypt_key(inkey, ctx->key_len * 8, &key->ks);
+    else
+        ret = aesni_set_decrypt_key(inkey, ctx->key_len * 8, &key->ks);
+
+    SHA256_Init(&key->head);    /* handy when benchmarking */
+    key->tail = key->head;
+    key->md = key->head;
+
+    key->payload_length = NO_PAYLOAD_LENGTH;
+
+    return ret < 0 ? 0 : 1;
+}
+
+#  define STITCHED_CALL
+
+#  if !defined(STITCHED_CALL)
+#   define aes_off 0
+#  endif
+
+void sha256_block_data_order(void *c, const void *p, size_t len);
+
+static void sha256_update(SHA256_CTX *c, const void *data, size_t len)
+{
+    const unsigned char *ptr = data;
+    size_t res;
+
+    if ((res = c->num)) {
+        res = SHA256_CBLOCK - res;
+        if (len < res)
+            res = len;
+        SHA256_Update(c, ptr, res);
+        ptr += res;
+        len -= res;
+    }
+
+    res = len % SHA256_CBLOCK;
+    len -= res;
+
+    if (len) {
+        sha256_block_data_order(c, ptr, len / SHA256_CBLOCK);
+
+        ptr += len;
+        c->Nh += len >> 29;
+        c->Nl += len <<= 3;
+        if (c->Nl < (unsigned int)len)
+            c->Nh++;
+    }
+
+    if (res)
+        SHA256_Update(c, ptr, res);
+}
+
+#  ifdef SHA256_Update
+#   undef SHA256_Update
+#  endif
+#  define SHA256_Update sha256_update
+
+#  if !defined(OPENSSL_NO_MULTIBLOCK) && EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK
+
+typedef struct {
+    unsigned int A[8], B[8], C[8], D[8], E[8], F[8], G[8], H[8];
+} SHA256_MB_CTX;
+typedef struct {
+    const unsigned char *ptr;
+    int blocks;
+} HASH_DESC;
+
+void sha256_multi_block(SHA256_MB_CTX *, const HASH_DESC *, int);
+
+typedef struct {
+    const unsigned char *inp;
+    unsigned char *out;
+    int blocks;
+    u64 iv[2];
+} CIPH_DESC;
+
+void aesni_multi_cbc_encrypt(CIPH_DESC *, void *, int);
+
+static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA256 *key,
+                                         unsigned char *out,
+                                         const unsigned char *inp,
+                                         size_t inp_len, int n4x)
+{                               /* n4x is 1 or 2 */
+    HASH_DESC hash_d[8], edges[8];
+    CIPH_DESC ciph_d[8];
+    unsigned char storage[sizeof(SHA256_MB_CTX) + 32];
+    union {
+        u64 q[16];
+        u32 d[32];
+        u8 c[128];
+    } blocks[8];
+    SHA256_MB_CTX *ctx;
+    unsigned int frag, last, packlen, i, x4 = 4 * n4x, minblocks, processed =
+        0;
+    size_t ret = 0;
+    u8 *IVs;
+#   if defined(BSWAP8)
+    u64 seqnum;
+#   endif
+
+    /* ask for IVs in bulk */
+    if (RAND_bytes((IVs = blocks[0].c), 16 * x4) <= 0)
+        return 0;
+
+    /* align */
+    ctx = (SHA256_MB_CTX *) (storage + 32 - ((size_t)storage % 32));
+
+    frag = (unsigned int)inp_len >> (1 + n4x);
+    last = (unsigned int)inp_len + frag - (frag << (1 + n4x));
+    if (last > frag && ((last + 13 + 9) % 64) < (x4 - 1)) {
+        frag++;
+        last -= x4 - 1;
+    }
+
+    packlen = 5 + 16 + ((frag + 32 + 16) & -16);
+
+    /* populate descriptors with pointers and IVs */
+    hash_d[0].ptr = inp;
+    ciph_d[0].inp = inp;
+    /* 5+16 is place for header and explicit IV */
+    ciph_d[0].out = out + 5 + 16;
+    memcpy(ciph_d[0].out - 16, IVs, 16);
+    memcpy(ciph_d[0].iv, IVs, 16);
+    IVs += 16;
+
+    for (i = 1; i < x4; i++) {
+        ciph_d[i].inp = hash_d[i].ptr = hash_d[i - 1].ptr + frag;
+        ciph_d[i].out = ciph_d[i - 1].out + packlen;
+        memcpy(ciph_d[i].out - 16, IVs, 16);
+        memcpy(ciph_d[i].iv, IVs, 16);
+        IVs += 16;
+    }
+
+#   if defined(BSWAP8)
+    memcpy(blocks[0].c, key->md.data, 8);
+    seqnum = BSWAP8(blocks[0].q[0]);
+#   endif
+    for (i = 0; i < x4; i++) {
+        unsigned int len = (i == (x4 - 1) ? last : frag);
+#   if !defined(BSWAP8)
+        unsigned int carry, j;
+#   endif
+
+        ctx->A[i] = key->md.h[0];
+        ctx->B[i] = key->md.h[1];
+        ctx->C[i] = key->md.h[2];
+        ctx->D[i] = key->md.h[3];
+        ctx->E[i] = key->md.h[4];
+        ctx->F[i] = key->md.h[5];
+        ctx->G[i] = key->md.h[6];
+        ctx->H[i] = key->md.h[7];
+
+        /* fix seqnum */
+#   if defined(BSWAP8)
+        blocks[i].q[0] = BSWAP8(seqnum + i);
+#   else
+        for (carry = i, j = 8; j--;) {
+            blocks[i].c[j] = ((u8 *)key->md.data)[j] + carry;
+            carry = (blocks[i].c[j] - carry) >> (sizeof(carry) * 8 - 1);
+        }
+#   endif
+        blocks[i].c[8] = ((u8 *)key->md.data)[8];
+        blocks[i].c[9] = ((u8 *)key->md.data)[9];
+        blocks[i].c[10] = ((u8 *)key->md.data)[10];
+        /* fix length */
+        blocks[i].c[11] = (u8)(len >> 8);
+        blocks[i].c[12] = (u8)(len);
+
+        memcpy(blocks[i].c + 13, hash_d[i].ptr, 64 - 13);
+        hash_d[i].ptr += 64 - 13;
+        hash_d[i].blocks = (len - (64 - 13)) / 64;
+
+        edges[i].ptr = blocks[i].c;
+        edges[i].blocks = 1;
+    }
+
+    /* hash 13-byte headers and first 64-13 bytes of inputs */
+    sha256_multi_block(ctx, edges, n4x);
+    /* hash bulk inputs */
+#   define MAXCHUNKSIZE    2048
+#   if     MAXCHUNKSIZE%64
+#    error  "MAXCHUNKSIZE is not divisible by 64"
+#   elif   MAXCHUNKSIZE
+    /*
+     * goal is to minimize pressure on L1 cache by moving in shorter steps,
+     * so that hashed data is still in the cache by the time we encrypt it
+     */
+    minblocks = ((frag <= last ? frag : last) - (64 - 13)) / 64;
+    if (minblocks > MAXCHUNKSIZE / 64) {
+        for (i = 0; i < x4; i++) {
+            edges[i].ptr = hash_d[i].ptr;
+            edges[i].blocks = MAXCHUNKSIZE / 64;
+            ciph_d[i].blocks = MAXCHUNKSIZE / 16;
+        }
+        do {
+            sha256_multi_block(ctx, edges, n4x);
+            aesni_multi_cbc_encrypt(ciph_d, &key->ks, n4x);
+
+            for (i = 0; i < x4; i++) {
+                edges[i].ptr = hash_d[i].ptr += MAXCHUNKSIZE;
+                hash_d[i].blocks -= MAXCHUNKSIZE / 64;
+                edges[i].blocks = MAXCHUNKSIZE / 64;
+                ciph_d[i].inp += MAXCHUNKSIZE;
+                ciph_d[i].out += MAXCHUNKSIZE;
+                ciph_d[i].blocks = MAXCHUNKSIZE / 16;
+                memcpy(ciph_d[i].iv, ciph_d[i].out - 16, 16);
+            }
+            processed += MAXCHUNKSIZE;
+            minblocks -= MAXCHUNKSIZE / 64;
+        } while (minblocks > MAXCHUNKSIZE / 64);
+    }
+#   endif
+#   undef  MAXCHUNKSIZE
+    sha256_multi_block(ctx, hash_d, n4x);
+
+    memset(blocks, 0, sizeof(blocks));
+    for (i = 0; i < x4; i++) {
+        unsigned int len = (i == (x4 - 1) ? last : frag),
+            off = hash_d[i].blocks * 64;
+        const unsigned char *ptr = hash_d[i].ptr + off;
+
+        off = (len - processed) - (64 - 13) - off; /* remainder actually */
+        memcpy(blocks[i].c, ptr, off);
+        blocks[i].c[off] = 0x80;
+        len += 64 + 13;         /* 64 is HMAC header */
+        len *= 8;               /* convert to bits */
+        if (off < (64 - 8)) {
+#   ifdef BSWAP4
+            blocks[i].d[15] = BSWAP4(len);
+#   else
+            PUTU32(blocks[i].c + 60, len);
+#   endif
+            edges[i].blocks = 1;
+        } else {
+#   ifdef BSWAP4
+            blocks[i].d[31] = BSWAP4(len);
+#   else
+            PUTU32(blocks[i].c + 124, len);
+#   endif
+            edges[i].blocks = 2;
+        }
+        edges[i].ptr = blocks[i].c;
+    }
+
+    /* hash input tails and finalize */
+    sha256_multi_block(ctx, edges, n4x);
+
+    memset(blocks, 0, sizeof(blocks));
+    for (i = 0; i < x4; i++) {
+#   ifdef BSWAP4
+        blocks[i].d[0] = BSWAP4(ctx->A[i]);
+        ctx->A[i] = key->tail.h[0];
+        blocks[i].d[1] = BSWAP4(ctx->B[i]);
+        ctx->B[i] = key->tail.h[1];
+        blocks[i].d[2] = BSWAP4(ctx->C[i]);
+        ctx->C[i] = key->tail.h[2];
+        blocks[i].d[3] = BSWAP4(ctx->D[i]);
+        ctx->D[i] = key->tail.h[3];
+        blocks[i].d[4] = BSWAP4(ctx->E[i]);
+        ctx->E[i] = key->tail.h[4];
+        blocks[i].d[5] = BSWAP4(ctx->F[i]);
+        ctx->F[i] = key->tail.h[5];
+        blocks[i].d[6] = BSWAP4(ctx->G[i]);
+        ctx->G[i] = key->tail.h[6];
+        blocks[i].d[7] = BSWAP4(ctx->H[i]);
+        ctx->H[i] = key->tail.h[7];
+        blocks[i].c[32] = 0x80;
+        blocks[i].d[15] = BSWAP4((64 + 32) * 8);
+#   else
+        PUTU32(blocks[i].c + 0, ctx->A[i]);
+        ctx->A[i] = key->tail.h[0];
+        PUTU32(blocks[i].c + 4, ctx->B[i]);
+        ctx->B[i] = key->tail.h[1];
+        PUTU32(blocks[i].c + 8, ctx->C[i]);
+        ctx->C[i] = key->tail.h[2];
+        PUTU32(blocks[i].c + 12, ctx->D[i]);
+        ctx->D[i] = key->tail.h[3];
+        PUTU32(blocks[i].c + 16, ctx->E[i]);
+        ctx->E[i] = key->tail.h[4];
+        PUTU32(blocks[i].c + 20, ctx->F[i]);
+        ctx->F[i] = key->tail.h[5];
+        PUTU32(blocks[i].c + 24, ctx->G[i]);
+        ctx->G[i] = key->tail.h[6];
+        PUTU32(blocks[i].c + 28, ctx->H[i]);
+        ctx->H[i] = key->tail.h[7];
+        blocks[i].c[32] = 0x80;
+        PUTU32(blocks[i].c + 60, (64 + 32) * 8);
+#   endif
+        edges[i].ptr = blocks[i].c;
+        edges[i].blocks = 1;
+    }
+
+    /* finalize MACs */
+    sha256_multi_block(ctx, edges, n4x);
+
+    for (i = 0; i < x4; i++) {
+        unsigned int len = (i == (x4 - 1) ? last : frag), pad, j;
+        unsigned char *out0 = out;
+
+        memcpy(ciph_d[i].out, ciph_d[i].inp, len - processed);
+        ciph_d[i].inp = ciph_d[i].out;
+
+        out += 5 + 16 + len;
+
+        /* write MAC */
+        PUTU32(out + 0, ctx->A[i]);
+        PUTU32(out + 4, ctx->B[i]);
+        PUTU32(out + 8, ctx->C[i]);
+        PUTU32(out + 12, ctx->D[i]);
+        PUTU32(out + 16, ctx->E[i]);
+        PUTU32(out + 20, ctx->F[i]);
+        PUTU32(out + 24, ctx->G[i]);
+        PUTU32(out + 28, ctx->H[i]);
+        out += 32;
+        len += 32;
+
+        /* pad */
+        pad = 15 - len % 16;
+        for (j = 0; j <= pad; j++)
+            *(out++) = pad;
+        len += pad + 1;
+
+        ciph_d[i].blocks = (len - processed) / 16;
+        len += 16;              /* account for explicit iv */
+
+        /* arrange header */
+        out0[0] = ((u8 *)key->md.data)[8];
+        out0[1] = ((u8 *)key->md.data)[9];
+        out0[2] = ((u8 *)key->md.data)[10];
+        out0[3] = (u8)(len >> 8);
+        out0[4] = (u8)(len);
+
+        ret += len + 5;
+        inp += frag;
+    }
+
+    aesni_multi_cbc_encrypt(ciph_d, &key->ks, n4x);
+
+    OPENSSL_cleanse(blocks, sizeof(blocks));
+    OPENSSL_cleanse(ctx, sizeof(*ctx));
+
+    return ret;
+}
+#  endif
+
+static int aesni_cbc_hmac_sha256_cipher(EVP_CIPHER_CTX *ctx,
+                                        unsigned char *out,
+                                        const unsigned char *in, size_t len)
+{
+    EVP_AES_HMAC_SHA256 *key = data(ctx);
+    unsigned int l;
+    size_t plen = key->payload_length, iv = 0, /* explicit IV in TLS 1.1 and
+                                                * later */
+        sha_off = 0;
+#  if defined(STITCHED_CALL)
+    size_t aes_off = 0, blocks;
+
+    sha_off = SHA256_CBLOCK - key->md.num;
+#  endif
+
+    key->payload_length = NO_PAYLOAD_LENGTH;
+
+    if (len % AES_BLOCK_SIZE)
+        return 0;
+
+    if (ctx->encrypt) {
+        if (plen == NO_PAYLOAD_LENGTH)
+            plen = len;
+        else if (len !=
+                 ((plen + SHA256_DIGEST_LENGTH +
+                   AES_BLOCK_SIZE) & -AES_BLOCK_SIZE))
+            return 0;
+        else if (key->aux.tls_ver >= TLS1_1_VERSION)
+            iv = AES_BLOCK_SIZE;
+
+#  if defined(STITCHED_CALL)
+        if (OPENSSL_ia32cap_P[1] & (1 << (60 - 32)) && /* AVX? */
+            plen > (sha_off + iv) &&
+            (blocks = (plen - (sha_off + iv)) / SHA256_CBLOCK)) {
+            SHA256_Update(&key->md, in + iv, sha_off);
+
+            (void)aesni_cbc_sha256_enc(in, out, blocks, &key->ks,
+                                       ctx->iv, &key->md, in + iv + sha_off);
+            blocks *= SHA256_CBLOCK;
+            aes_off += blocks;
+            sha_off += blocks;
+            key->md.Nh += blocks >> 29;
+            key->md.Nl += blocks <<= 3;
+            if (key->md.Nl < (unsigned int)blocks)
+                key->md.Nh++;
+        } else {
+            sha_off = 0;
+        }
+#  endif
+        sha_off += iv;
+        SHA256_Update(&key->md, in + sha_off, plen - sha_off);
+
+        if (plen != len) {      /* "TLS" mode of operation */
+            if (in != out)
+                memcpy(out + aes_off, in + aes_off, plen - aes_off);
+
+            /* calculate HMAC and append it to payload */
+            SHA256_Final(out + plen, &key->md);
+            key->md = key->tail;
+            SHA256_Update(&key->md, out + plen, SHA256_DIGEST_LENGTH);
+            SHA256_Final(out + plen, &key->md);
+
+            /* pad the payload|hmac */
+            plen += SHA256_DIGEST_LENGTH;
+            for (l = len - plen - 1; plen < len; plen++)
+                out[plen] = l;
+            /* encrypt HMAC|padding at once */
+            aesni_cbc_encrypt(out + aes_off, out + aes_off, len - aes_off,
+                              &key->ks, ctx->iv, 1);
+        } else {
+            aesni_cbc_encrypt(in + aes_off, out + aes_off, len - aes_off,
+                              &key->ks, ctx->iv, 1);
+        }
+    } else {
+        union {
+            unsigned int u[SHA256_DIGEST_LENGTH / sizeof(unsigned int)];
+            unsigned char c[64 + SHA256_DIGEST_LENGTH];
+        } mac, *pmac;
+
+        /* arrange cache line alignment */
+        pmac = (void *)(((size_t)mac.c + 63) & ((size_t)0 - 64));
+
+        /* decrypt HMAC|padding at once */
+        aesni_cbc_encrypt(in, out, len, &key->ks, ctx->iv, 0);
+
+        if (plen != NO_PAYLOAD_LENGTH) { /* "TLS" mode of operation */
+            size_t inp_len, mask, j, i;
+            unsigned int res, maxpad, pad, bitlen;
+            int ret = 1;
+            union {
+                unsigned int u[SHA_LBLOCK];
+                unsigned char c[SHA256_CBLOCK];
+            } *data = (void *)key->md.data;
+
+            if ((key->aux.tls_aad[plen - 4] << 8 | key->aux.tls_aad[plen - 3])
+                >= TLS1_1_VERSION)
+                iv = AES_BLOCK_SIZE;
+
+            if (len < (iv + SHA256_DIGEST_LENGTH + 1))
+                return 0;
+
+            /* omit explicit iv */
+            out += iv;
+            len -= iv;
+
+            /* figure out payload length */
+            pad = out[len - 1];
+            maxpad = len - (SHA256_DIGEST_LENGTH + 1);
+            maxpad |= (255 - maxpad) >> (sizeof(maxpad) * 8 - 8);
+            maxpad &= 255;
+
+            inp_len = len - (SHA256_DIGEST_LENGTH + pad + 1);
+            mask = (0 - ((inp_len - len) >> (sizeof(inp_len) * 8 - 1)));
+            inp_len &= mask;
+            ret &= (int)mask;
+
+            key->aux.tls_aad[plen - 2] = inp_len >> 8;
+            key->aux.tls_aad[plen - 1] = inp_len;
+
+            /* calculate HMAC */
+            key->md = key->head;
+            SHA256_Update(&key->md, key->aux.tls_aad, plen);
+
+#  if 1
+            len -= SHA256_DIGEST_LENGTH; /* amend mac */
+            if (len >= (256 + SHA256_CBLOCK)) {
+                j = (len - (256 + SHA256_CBLOCK)) & (0 - SHA256_CBLOCK);
+                j += SHA256_CBLOCK - key->md.num;
+                SHA256_Update(&key->md, out, j);
+                out += j;
+                len -= j;
+                inp_len -= j;
+            }
+
+            /* but pretend as if we hashed padded payload */
+            bitlen = key->md.Nl + (inp_len << 3); /* at most 18 bits */
+#   ifdef BSWAP4
+            bitlen = BSWAP4(bitlen);
+#   else
+            mac.c[0] = 0;
+            mac.c[1] = (unsigned char)(bitlen >> 16);
+            mac.c[2] = (unsigned char)(bitlen >> 8);
+            mac.c[3] = (unsigned char)bitlen;
+            bitlen = mac.u[0];
+#   endif
+
+            pmac->u[0] = 0;
+            pmac->u[1] = 0;
+            pmac->u[2] = 0;
+            pmac->u[3] = 0;
+            pmac->u[4] = 0;
+            pmac->u[5] = 0;
+            pmac->u[6] = 0;
+            pmac->u[7] = 0;
+
+            for (res = key->md.num, j = 0; j < len; j++) {
+                size_t c = out[j];
+                mask = (j - inp_len) >> (sizeof(j) * 8 - 8);
+                c &= mask;
+                c |= 0x80 & ~mask & ~((inp_len - j) >> (sizeof(j) * 8 - 8));
+                data->c[res++] = (unsigned char)c;
+
+                if (res != SHA256_CBLOCK)
+                    continue;
+
+                /* j is not incremented yet */
+                mask = 0 - ((inp_len + 7 - j) >> (sizeof(j) * 8 - 1));
+                data->u[SHA_LBLOCK - 1] |= bitlen & mask;
+                sha256_block_data_order(&key->md, data, 1);
+                mask &= 0 - ((j - inp_len - 72) >> (sizeof(j) * 8 - 1));
+                pmac->u[0] |= key->md.h[0] & mask;
+                pmac->u[1] |= key->md.h[1] & mask;
+                pmac->u[2] |= key->md.h[2] & mask;
+                pmac->u[3] |= key->md.h[3] & mask;
+                pmac->u[4] |= key->md.h[4] & mask;
+                pmac->u[5] |= key->md.h[5] & mask;
+                pmac->u[6] |= key->md.h[6] & mask;
+                pmac->u[7] |= key->md.h[7] & mask;
+                res = 0;
+            }
+
+            for (i = res; i < SHA256_CBLOCK; i++, j++)
+                data->c[i] = 0;
+
+            if (res > SHA256_CBLOCK - 8) {
+                mask = 0 - ((inp_len + 8 - j) >> (sizeof(j) * 8 - 1));
+                data->u[SHA_LBLOCK - 1] |= bitlen & mask;
+                sha256_block_data_order(&key->md, data, 1);
+                mask &= 0 - ((j - inp_len - 73) >> (sizeof(j) * 8 - 1));
+                pmac->u[0] |= key->md.h[0] & mask;
+                pmac->u[1] |= key->md.h[1] & mask;
+                pmac->u[2] |= key->md.h[2] & mask;
+                pmac->u[3] |= key->md.h[3] & mask;
+                pmac->u[4] |= key->md.h[4] & mask;
+                pmac->u[5] |= key->md.h[5] & mask;
+                pmac->u[6] |= key->md.h[6] & mask;
+                pmac->u[7] |= key->md.h[7] & mask;
+
+                memset(data, 0, SHA256_CBLOCK);
+                j += 64;
+            }
+            data->u[SHA_LBLOCK - 1] = bitlen;
+            sha256_block_data_order(&key->md, data, 1);
+            mask = 0 - ((j - inp_len - 73) >> (sizeof(j) * 8 - 1));
+            pmac->u[0] |= key->md.h[0] & mask;
+            pmac->u[1] |= key->md.h[1] & mask;
+            pmac->u[2] |= key->md.h[2] & mask;
+            pmac->u[3] |= key->md.h[3] & mask;
+            pmac->u[4] |= key->md.h[4] & mask;
+            pmac->u[5] |= key->md.h[5] & mask;
+            pmac->u[6] |= key->md.h[6] & mask;
+            pmac->u[7] |= key->md.h[7] & mask;
+
+#   ifdef BSWAP4
+            pmac->u[0] = BSWAP4(pmac->u[0]);
+            pmac->u[1] = BSWAP4(pmac->u[1]);
+            pmac->u[2] = BSWAP4(pmac->u[2]);
+            pmac->u[3] = BSWAP4(pmac->u[3]);
+            pmac->u[4] = BSWAP4(pmac->u[4]);
+            pmac->u[5] = BSWAP4(pmac->u[5]);
+            pmac->u[6] = BSWAP4(pmac->u[6]);
+            pmac->u[7] = BSWAP4(pmac->u[7]);
+#   else
+            for (i = 0; i < 8; i++) {
+                res = pmac->u[i];
+                pmac->c[4 * i + 0] = (unsigned char)(res >> 24);
+                pmac->c[4 * i + 1] = (unsigned char)(res >> 16);
+                pmac->c[4 * i + 2] = (unsigned char)(res >> 8);
+                pmac->c[4 * i + 3] = (unsigned char)res;
+            }
+#   endif
+            len += SHA256_DIGEST_LENGTH;
+#  else
+            SHA256_Update(&key->md, out, inp_len);
+            res = key->md.num;
+            SHA256_Final(pmac->c, &key->md);
+
+            {
+                unsigned int inp_blocks, pad_blocks;
+
+                /* but pretend as if we hashed padded payload */
+                inp_blocks =
+                    1 + ((SHA256_CBLOCK - 9 - res) >> (sizeof(res) * 8 - 1));
+                res += (unsigned int)(len - inp_len);
+                pad_blocks = res / SHA256_CBLOCK;
+                res %= SHA256_CBLOCK;
+                pad_blocks +=
+                    1 + ((SHA256_CBLOCK - 9 - res) >> (sizeof(res) * 8 - 1));
+                for (; inp_blocks < pad_blocks; inp_blocks++)
+                    sha1_block_data_order(&key->md, data, 1);
+            }
+#  endif
+            key->md = key->tail;
+            SHA256_Update(&key->md, pmac->c, SHA256_DIGEST_LENGTH);
+            SHA256_Final(pmac->c, &key->md);
+
+            /* verify HMAC */
+            out += inp_len;
+            len -= inp_len;
+#  if 1
+            {
+                unsigned char *p =
+                    out + len - 1 - maxpad - SHA256_DIGEST_LENGTH;
+                size_t off = out - p;
+                unsigned int c, cmask;
+
+                maxpad += SHA256_DIGEST_LENGTH;
+                for (res = 0, i = 0, j = 0; j < maxpad; j++) {
+                    c = p[j];
+                    cmask =
+                        ((int)(j - off - SHA256_DIGEST_LENGTH)) >>
+                        (sizeof(int) * 8 - 1);
+                    res |= (c ^ pad) & ~cmask; /* ... and padding */
+                    cmask &= ((int)(off - 1 - j)) >> (sizeof(int) * 8 - 1);
+                    res |= (c ^ pmac->c[i]) & cmask;
+                    i += 1 & cmask;
+                }
+                maxpad -= SHA256_DIGEST_LENGTH;
+
+                res = 0 - ((0 - res) >> (sizeof(res) * 8 - 1));
+                ret &= (int)~res;
+            }
+#  else
+            for (res = 0, i = 0; i < SHA256_DIGEST_LENGTH; i++)
+                res |= out[i] ^ pmac->c[i];
+            res = 0 - ((0 - res) >> (sizeof(res) * 8 - 1));
+            ret &= (int)~res;
+
+            /* verify padding */
+            pad = (pad & ~res) | (maxpad & res);
+            out = out + len - 1 - pad;
+            for (res = 0, i = 0; i < pad; i++)
+                res |= out[i] ^ pad;
+
+            res = (0 - res) >> (sizeof(res) * 8 - 1);
+            ret &= (int)~res;
+#  endif
+            return ret;
+        } else {
+            SHA256_Update(&key->md, out, len);
+        }
+    }
+
+    return 1;
+}
+
+static int aesni_cbc_hmac_sha256_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg,
+                                      void *ptr)
+{
+    EVP_AES_HMAC_SHA256 *key = data(ctx);
+
+    switch (type) {
+    case EVP_CTRL_AEAD_SET_MAC_KEY:
+        {
+            unsigned int i;
+            unsigned char hmac_key[64];
+
+            memset(hmac_key, 0, sizeof(hmac_key));
+
+            if (arg > (int)sizeof(hmac_key)) {
+                SHA256_Init(&key->head);
+                SHA256_Update(&key->head, ptr, arg);
+                SHA256_Final(hmac_key, &key->head);
+            } else {
+                memcpy(hmac_key, ptr, arg);
+            }
+
+            for (i = 0; i < sizeof(hmac_key); i++)
+                hmac_key[i] ^= 0x36; /* ipad */
+            SHA256_Init(&key->head);
+            SHA256_Update(&key->head, hmac_key, sizeof(hmac_key));
+
+            for (i = 0; i < sizeof(hmac_key); i++)
+                hmac_key[i] ^= 0x36 ^ 0x5c; /* opad */
+            SHA256_Init(&key->tail);
+            SHA256_Update(&key->tail, hmac_key, sizeof(hmac_key));
+
+            OPENSSL_cleanse(hmac_key, sizeof(hmac_key));
+
+            return 1;
+        }
+    case EVP_CTRL_AEAD_TLS1_AAD:
+        {
+            unsigned char *p = ptr;
+            unsigned int len = p[arg - 2] << 8 | p[arg - 1];
+
+            if (arg != EVP_AEAD_TLS1_AAD_LEN)
+                return -1;
+
+            len = p[arg - 2] << 8 | p[arg - 1];
+
+            if (ctx->encrypt) {
+                key->payload_length = len;
+                if ((key->aux.tls_ver =
+                     p[arg - 4] << 8 | p[arg - 3]) >= TLS1_1_VERSION) {
+                    len -= AES_BLOCK_SIZE;
+                    p[arg - 2] = len >> 8;
+                    p[arg - 1] = len;
+                }
+                key->md = key->head;
+                SHA256_Update(&key->md, p, arg);
+
+                return (int)(((len + SHA256_DIGEST_LENGTH +
+                               AES_BLOCK_SIZE) & -AES_BLOCK_SIZE)
+                             - len);
+            } else {
+                memcpy(key->aux.tls_aad, ptr, arg);
+                key->payload_length = arg;
+
+                return SHA256_DIGEST_LENGTH;
+            }
+        }
+#  if !defined(OPENSSL_NO_MULTIBLOCK) && EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK
+    case EVP_CTRL_TLS1_1_MULTIBLOCK_MAX_BUFSIZE:
+        return (int)(5 + 16 + ((arg + 32 + 16) & -16));
+    case EVP_CTRL_TLS1_1_MULTIBLOCK_AAD:
+        {
+            EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *param =
+                (EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *) ptr;
+            unsigned int n4x = 1, x4;
+            unsigned int frag, last, packlen, inp_len;
+
+            if (arg < (int)sizeof(EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM))
+                return -1;
+
+            inp_len = param->inp[11] << 8 | param->inp[12];
+
+            if (ctx->encrypt) {
+                if ((param->inp[9] << 8 | param->inp[10]) < TLS1_1_VERSION)
+                    return -1;
+
+                if (inp_len) {
+                    if (inp_len < 4096)
+                        return 0; /* too short */
+
+                    if (inp_len >= 8192 && OPENSSL_ia32cap_P[2] & (1 << 5))
+                        n4x = 2; /* AVX2 */
+                } else if ((n4x = param->interleave / 4) && n4x <= 2)
+                    inp_len = param->len;
+                else
+                    return -1;
+
+                key->md = key->head;
+                SHA256_Update(&key->md, param->inp, 13);
+
+                x4 = 4 * n4x;
+                n4x += 1;
+
+                frag = inp_len >> n4x;
+                last = inp_len + frag - (frag << n4x);
+                if (last > frag && ((last + 13 + 9) % 64 < (x4 - 1))) {
+                    frag++;
+                    last -= x4 - 1;
+                }
+
+                packlen = 5 + 16 + ((frag + 32 + 16) & -16);
+                packlen = (packlen << n4x) - packlen;
+                packlen += 5 + 16 + ((last + 32 + 16) & -16);
+
+                param->interleave = x4;
+
+                return (int)packlen;
+            } else
+                return -1;      /* not yet */
+        }
+    case EVP_CTRL_TLS1_1_MULTIBLOCK_ENCRYPT:
+        {
+            EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *param =
+                (EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *) ptr;
+
+            return (int)tls1_1_multi_block_encrypt(key, param->out,
+                                                   param->inp, param->len,
+                                                   param->interleave / 4);
+        }
+    case EVP_CTRL_TLS1_1_MULTIBLOCK_DECRYPT:
+#  endif
+    default:
+        return -1;
+    }
+}
+
+static EVP_CIPHER aesni_128_cbc_hmac_sha256_cipher = {
+#  ifdef NID_aes_128_cbc_hmac_sha256
+    NID_aes_128_cbc_hmac_sha256,
+#  else
+    NID_undef,
+#  endif
+    16, 16, 16,
+    EVP_CIPH_CBC_MODE | EVP_CIPH_FLAG_DEFAULT_ASN1 |
+        EVP_CIPH_FLAG_AEAD_CIPHER | EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK,
+    aesni_cbc_hmac_sha256_init_key,
+    aesni_cbc_hmac_sha256_cipher,
+    NULL,
+    sizeof(EVP_AES_HMAC_SHA256),
+    EVP_CIPH_FLAG_DEFAULT_ASN1 ? NULL : EVP_CIPHER_set_asn1_iv,
+    EVP_CIPH_FLAG_DEFAULT_ASN1 ? NULL : EVP_CIPHER_get_asn1_iv,
+    aesni_cbc_hmac_sha256_ctrl,
+    NULL
+};
+
+static EVP_CIPHER aesni_256_cbc_hmac_sha256_cipher = {
+#  ifdef NID_aes_256_cbc_hmac_sha256
+    NID_aes_256_cbc_hmac_sha256,
+#  else
+    NID_undef,
+#  endif
+    16, 32, 16,
+    EVP_CIPH_CBC_MODE | EVP_CIPH_FLAG_DEFAULT_ASN1 |
+        EVP_CIPH_FLAG_AEAD_CIPHER | EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK,
+    aesni_cbc_hmac_sha256_init_key,
+    aesni_cbc_hmac_sha256_cipher,
+    NULL,
+    sizeof(EVP_AES_HMAC_SHA256),
+    EVP_CIPH_FLAG_DEFAULT_ASN1 ? NULL : EVP_CIPHER_set_asn1_iv,
+    EVP_CIPH_FLAG_DEFAULT_ASN1 ? NULL : EVP_CIPHER_get_asn1_iv,
+    aesni_cbc_hmac_sha256_ctrl,
+    NULL
+};
+
+const EVP_CIPHER *EVP_aes_128_cbc_hmac_sha256(void)
+{
+    return ((OPENSSL_ia32cap_P[1] & AESNI_CAPABLE) &&
+            aesni_cbc_sha256_enc(NULL, NULL, 0, NULL, NULL, NULL, NULL) ?
+            &aesni_128_cbc_hmac_sha256_cipher : NULL);
+}
+
+const EVP_CIPHER *EVP_aes_256_cbc_hmac_sha256(void)
+{
+    return ((OPENSSL_ia32cap_P[1] & AESNI_CAPABLE) &&
+            aesni_cbc_sha256_enc(NULL, NULL, 0, NULL, NULL, NULL, NULL) ?
+            &aesni_256_cbc_hmac_sha256_cipher : NULL);
+}
+# else
+const EVP_CIPHER *EVP_aes_128_cbc_hmac_sha256(void)
+{
+    return NULL;
+}
+
+const EVP_CIPHER *EVP_aes_256_cbc_hmac_sha256(void)
+{
+    return NULL;
+}
+# endif
+#endif
index 27bc489..f9c8401 100644 (file)
@@ -61,6 +61,7 @@
 # include <assert.h>
 # include <openssl/camellia.h>
 # include "evp_locl.h"
+# include "modes_lcl.h"
 
 static int camellia_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
                              const unsigned char *iv, int enc);
@@ -68,48 +69,322 @@ static int camellia_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
 /* Camellia subkey Structure */
 typedef struct {
     CAMELLIA_KEY ks;
+    block128_f block;
+    union {
+        cbc128_f cbc;
+        ctr128_f ctr;
+    } stream;
 } EVP_CAMELLIA_KEY;
 
+# define MAXBITCHUNK     ((size_t)1<<(sizeof(size_t)*8-4))
+
 /* Attribute operation for Camellia */
 # define data(ctx)       EVP_C_DATA(EVP_CAMELLIA_KEY,ctx)
 
-IMPLEMENT_BLOCK_CIPHER(camellia_128, ks, Camellia, EVP_CAMELLIA_KEY,
-                       NID_camellia_128, 16, 16, 16, 128,
-                       0, camellia_init_key, NULL,
-                       EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, NULL)
-    IMPLEMENT_BLOCK_CIPHER(camellia_192, ks, Camellia, EVP_CAMELLIA_KEY,
-                       NID_camellia_192, 16, 24, 16, 128,
-                       0, camellia_init_key, NULL,
-                       EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, NULL)
-    IMPLEMENT_BLOCK_CIPHER(camellia_256, ks, Camellia, EVP_CAMELLIA_KEY,
-                       NID_camellia_256, 16, 32, 16, 128,
-                       0, camellia_init_key, NULL,
-                       EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, NULL)
-# define IMPLEMENT_CAMELLIA_CFBR(ksize,cbits)    IMPLEMENT_CFBR(camellia,Camellia,EVP_CAMELLIA_KEY,ks,ksize,cbits,16)
-    IMPLEMENT_CAMELLIA_CFBR(128, 1)
-    IMPLEMENT_CAMELLIA_CFBR(192, 1)
-    IMPLEMENT_CAMELLIA_CFBR(256, 1)
-
-    IMPLEMENT_CAMELLIA_CFBR(128, 8)
-    IMPLEMENT_CAMELLIA_CFBR(192, 8)
-    IMPLEMENT_CAMELLIA_CFBR(256, 8)
+# if defined(AES_ASM) && (defined(__sparc) || defined(__sparc__))
+/* ---------^^^ this is not a typo, just a way to detect that
+ * assembler support was in general requested... */
+#  include "sparc_arch.h"
+
+extern unsigned int OPENSSL_sparcv9cap_P[];
+
+#  define SPARC_CMLL_CAPABLE      (OPENSSL_sparcv9cap_P[1] & CFR_CAMELLIA)
+
+void cmll_t4_set_key(const unsigned char *key, int bits, CAMELLIA_KEY *ks);
+void cmll_t4_encrypt(const unsigned char *in, unsigned char *out,
+                     const CAMELLIA_KEY *key);
+void cmll_t4_decrypt(const unsigned char *in, unsigned char *out,
+                     const CAMELLIA_KEY *key);
+
+void cmll128_t4_cbc_encrypt(const unsigned char *in, unsigned char *out,
+                            size_t len, const CAMELLIA_KEY *key,
+                            unsigned char *ivec);
+void cmll128_t4_cbc_decrypt(const unsigned char *in, unsigned char *out,
+                            size_t len, const CAMELLIA_KEY *key,
+                            unsigned char *ivec);
+void cmll256_t4_cbc_encrypt(const unsigned char *in, unsigned char *out,
+                            size_t len, const CAMELLIA_KEY *key,
+                            unsigned char *ivec);
+void cmll256_t4_cbc_decrypt(const unsigned char *in, unsigned char *out,
+                            size_t len, const CAMELLIA_KEY *key,
+                            unsigned char *ivec);
+void cmll128_t4_ctr32_encrypt(const unsigned char *in, unsigned char *out,
+                              size_t blocks, const CAMELLIA_KEY *key,
+                              unsigned char *ivec);
+void cmll256_t4_ctr32_encrypt(const unsigned char *in, unsigned char *out,
+                              size_t blocks, const CAMELLIA_KEY *key,
+                              unsigned char *ivec);
+
+static int cmll_t4_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
+                            const unsigned char *iv, int enc)
+{
+    int ret, mode, bits;
+    EVP_CAMELLIA_KEY *dat = (EVP_CAMELLIA_KEY *) ctx->cipher_data;
+
+    mode = ctx->cipher->flags & EVP_CIPH_MODE;
+    bits = ctx->key_len * 8;
+
+    cmll_t4_set_key(key, bits, &dat->ks);
+
+    if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE)
+        && !enc) {
+        ret = 0;
+        dat->block = (block128_f) cmll_t4_decrypt;
+        switch (bits) {
+        case 128:
+            dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ?
+                (cbc128_f) cmll128_t4_cbc_decrypt : NULL;
+            break;
+        case 192:
+        case 256:
+            dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ?
+                (cbc128_f) cmll256_t4_cbc_decrypt : NULL;
+            break;
+        default:
+            ret = -1;
+        }
+    } else {
+        ret = 0;
+        dat->block = (block128_f) cmll_t4_encrypt;
+        switch (bits) {
+        case 128:
+            if (mode == EVP_CIPH_CBC_MODE)
+                dat->stream.cbc = (cbc128_f) cmll128_t4_cbc_encrypt;
+            else if (mode == EVP_CIPH_CTR_MODE)
+                dat->stream.ctr = (ctr128_f) cmll128_t4_ctr32_encrypt;
+            else
+                dat->stream.cbc = NULL;
+            break;
+        case 192:
+        case 256:
+            if (mode == EVP_CIPH_CBC_MODE)
+                dat->stream.cbc = (cbc128_f) cmll256_t4_cbc_encrypt;
+            else if (mode == EVP_CIPH_CTR_MODE)
+                dat->stream.ctr = (ctr128_f) cmll256_t4_ctr32_encrypt;
+            else
+                dat->stream.cbc = NULL;
+            break;
+        default:
+            ret = -1;
+        }
+    }
+
+    if (ret < 0) {
+        EVPerr(EVP_F_CMLL_T4_INIT_KEY, EVP_R_CAMELLIA_KEY_SETUP_FAILED);
+        return 0;
+    }
+
+    return 1;
+}
+
+#  define cmll_t4_cbc_cipher camellia_cbc_cipher
+static int cmll_t4_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
+                              const unsigned char *in, size_t len);
+
+#  define cmll_t4_ecb_cipher camellia_ecb_cipher
+static int cmll_t4_ecb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
+                              const unsigned char *in, size_t len);
 
+#  define cmll_t4_ofb_cipher camellia_ofb_cipher
+static int cmll_t4_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
+                              const unsigned char *in, size_t len);
+
+#  define cmll_t4_cfb_cipher camellia_cfb_cipher
+static int cmll_t4_cfb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
+                              const unsigned char *in, size_t len);
+
+#  define cmll_t4_cfb8_cipher camellia_cfb8_cipher
+static int cmll_t4_cfb8_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
+                               const unsigned char *in, size_t len);
+
+#  define cmll_t4_cfb1_cipher camellia_cfb1_cipher
+static int cmll_t4_cfb1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
+                               const unsigned char *in, size_t len);
+
+#  define cmll_t4_ctr_cipher camellia_ctr_cipher
+static int cmll_t4_ctr_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
+                              const unsigned char *in, size_t len);
+
+#  define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \
+static const EVP_CIPHER cmll_t4_##keylen##_##mode = { \
+        nid##_##keylen##_##nmode,blocksize,keylen/8,ivlen, \
+        flags|EVP_CIPH_##MODE##_MODE,   \
+        cmll_t4_init_key,               \
+        cmll_t4_##mode##_cipher,        \
+        NULL,                           \
+        sizeof(EVP_CAMELLIA_KEY),       \
+        NULL,NULL,NULL,NULL }; \
+static const EVP_CIPHER camellia_##keylen##_##mode = { \
+        nid##_##keylen##_##nmode,blocksize,     \
+        keylen/8,ivlen, \
+        flags|EVP_CIPH_##MODE##_MODE,   \
+        camellia_init_key,              \
+        camellia_##mode##_cipher,       \
+        NULL,                           \
+        sizeof(EVP_CAMELLIA_KEY),       \
+        NULL,NULL,NULL,NULL }; \
+const EVP_CIPHER *EVP_camellia_##keylen##_##mode(void) \
+{ return SPARC_CMLL_CAPABLE?&cmll_t4_##keylen##_##mode:&camellia_##keylen##_##mode; }
+
+# else
+
+#  define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \
+static const EVP_CIPHER camellia_##keylen##_##mode = { \
+        nid##_##keylen##_##nmode,blocksize,keylen/8,ivlen, \
+        flags|EVP_CIPH_##MODE##_MODE,   \
+        camellia_init_key,              \
+        camellia_##mode##_cipher,       \
+        NULL,                           \
+        sizeof(EVP_CAMELLIA_KEY),       \
+        NULL,NULL,NULL,NULL }; \
+const EVP_CIPHER *EVP_camellia_##keylen##_##mode(void) \
+{ return &camellia_##keylen##_##mode; }
+
+# endif
+
+# define BLOCK_CIPHER_generic_pack(nid,keylen,flags)             \
+        BLOCK_CIPHER_generic(nid,keylen,16,16,cbc,cbc,CBC,flags|EVP_CIPH_FLAG_DEFAULT_ASN1)     \
+        BLOCK_CIPHER_generic(nid,keylen,16,0,ecb,ecb,ECB,flags|EVP_CIPH_FLAG_DEFAULT_ASN1)      \
+        BLOCK_CIPHER_generic(nid,keylen,1,16,ofb128,ofb,OFB,flags|EVP_CIPH_FLAG_DEFAULT_ASN1)   \
+        BLOCK_CIPHER_generic(nid,keylen,1,16,cfb128,cfb,CFB,flags|EVP_CIPH_FLAG_DEFAULT_ASN1)   \
+        BLOCK_CIPHER_generic(nid,keylen,1,16,cfb1,cfb1,CFB,flags)       \
+        BLOCK_CIPHER_generic(nid,keylen,1,16,cfb8,cfb8,CFB,flags)
+# if 0                          /* not yet, missing NID */
+BLOCK_CIPHER_generic(nid, keylen, 1, 16, ctr, ctr, CTR, flags)
+# endif
 /* The subkey for Camellia is generated. */
 static int camellia_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
                              const unsigned char *iv, int enc)
 {
-    int ret;
-
-    ret = Camellia_set_key(key, ctx->key_len * 8, ctx->cipher_data);
+    int ret, mode;
+    EVP_CAMELLIA_KEY *dat = (EVP_CAMELLIA_KEY *) ctx->cipher_data;
 
+    ret = Camellia_set_key(key, ctx->key_len * 8, &dat->ks);
     if (ret < 0) {
         EVPerr(EVP_F_CAMELLIA_INIT_KEY, EVP_R_CAMELLIA_KEY_SETUP_FAILED);
         return 0;
     }
 
+    mode = ctx->cipher->flags & EVP_CIPH_MODE;
+    if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE)
+        && !enc) {
+        dat->block = (block128_f) Camellia_decrypt;
+        dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ?
+            (cbc128_f) Camellia_cbc_encrypt : NULL;
+    } else {
+        dat->block = (block128_f) Camellia_encrypt;
+        dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ?
+            (cbc128_f) Camellia_cbc_encrypt : NULL;
+    }
+
     return 1;
 }
 
+static int camellia_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
+                               const unsigned char *in, size_t len)
+{
+    EVP_CAMELLIA_KEY *dat = (EVP_CAMELLIA_KEY *) ctx->cipher_data;
+
+    if (dat->stream.cbc)
+        (*dat->stream.cbc) (in, out, len, &dat->ks, ctx->iv, ctx->encrypt);
+    else if (ctx->encrypt)
+        CRYPTO_cbc128_encrypt(in, out, len, &dat->ks, ctx->iv, dat->block);
+    else
+        CRYPTO_cbc128_decrypt(in, out, len, &dat->ks, ctx->iv, dat->block);
+
+    return 1;
+}
+
+static int camellia_ecb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
+                               const unsigned char *in, size_t len)
+{
+    size_t bl = ctx->cipher->block_size;
+    size_t i;
+    EVP_CAMELLIA_KEY *dat = (EVP_CAMELLIA_KEY *) ctx->cipher_data;
+
+    if (len < bl)
+        return 1;
+
+    for (i = 0, len -= bl; i <= len; i += bl)
+        (*dat->block) (in + i, out + i, &dat->ks);
+
+    return 1;
+}
+
+static int camellia_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
+                               const unsigned char *in, size_t len)
+{
+    EVP_CAMELLIA_KEY *dat = (EVP_CAMELLIA_KEY *) ctx->cipher_data;
+
+    CRYPTO_ofb128_encrypt(in, out, len, &dat->ks,
+                          ctx->iv, &ctx->num, dat->block);
+    return 1;
+}
+
+static int camellia_cfb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
+                               const unsigned char *in, size_t len)
+{
+    EVP_CAMELLIA_KEY *dat = (EVP_CAMELLIA_KEY *) ctx->cipher_data;
+
+    CRYPTO_cfb128_encrypt(in, out, len, &dat->ks,
+                          ctx->iv, &ctx->num, ctx->encrypt, dat->block);
+    return 1;
+}
+
+static int camellia_cfb8_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
+                                const unsigned char *in, size_t len)
+{
+    EVP_CAMELLIA_KEY *dat = (EVP_CAMELLIA_KEY *) ctx->cipher_data;
+
+    CRYPTO_cfb128_8_encrypt(in, out, len, &dat->ks,
+                            ctx->iv, &ctx->num, ctx->encrypt, dat->block);
+    return 1;
+}
+
+static int camellia_cfb1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
+                                const unsigned char *in, size_t len)
+{
+    EVP_CAMELLIA_KEY *dat = (EVP_CAMELLIA_KEY *) ctx->cipher_data;
+
+    if (ctx->flags & EVP_CIPH_FLAG_LENGTH_BITS) {
+        CRYPTO_cfb128_1_encrypt(in, out, len, &dat->ks,
+                                ctx->iv, &ctx->num, ctx->encrypt, dat->block);
+        return 1;
+    }
+
+    while (len >= MAXBITCHUNK) {
+        CRYPTO_cfb128_1_encrypt(in, out, MAXBITCHUNK * 8, &dat->ks,
+                                ctx->iv, &ctx->num, ctx->encrypt, dat->block);
+        len -= MAXBITCHUNK;
+    }
+    if (len)
+        CRYPTO_cfb128_1_encrypt(in, out, len * 8, &dat->ks,
+                                ctx->iv, &ctx->num, ctx->encrypt, dat->block);
+
+    return 1;
+}
+
+# if 0                          /* not yet, missing NID */
+static int camellia_ctr_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
+                               const unsigned char *in, size_t len)
+{
+    unsigned int num = ctx->num;
+    EVP_CAMELLIA_KEY *dat = (EVP_CAMELLIA_KEY *) ctx->cipher_data;
+
+    if (dat->stream.ctr)
+        CRYPTO_ctr128_encrypt_ctr32(in, out, len, &dat->ks,
+                                    ctx->iv, ctx->buf, &num, dat->stream.ctr);
+    else
+        CRYPTO_ctr128_encrypt(in, out, len, &dat->ks,
+                              ctx->iv, ctx->buf, &num, dat->block);
+    ctx->num = (size_t)num;
+    return 1;
+}
+# endif
+
+BLOCK_CIPHER_generic_pack(NID_camellia, 128, 0)
+    BLOCK_CIPHER_generic_pack(NID_camellia, 192, 0)
+    BLOCK_CIPHER_generic_pack(NID_camellia, 256, 0)
 #else
 
 # ifdef PEDANTIC
index ea1a4c4..aae13a6 100644 (file)
 # include <openssl/des.h>
 # include <openssl/rand.h>
 
+typedef struct {
+    union {
+        double align;
+        DES_key_schedule ks;
+    } ks;
+    union {
+        void (*cbc) (const void *, void *, size_t, const void *, void *);
+    } stream;
+} EVP_DES_KEY;
+
+# if defined(AES_ASM) && (defined(__sparc) || defined(__sparc__))
+/* ---------^^^ this is not a typo, just a way to detect that
+ * assembler support was in general requested... */
+#  include "sparc_arch.h"
+
+extern unsigned int OPENSSL_sparcv9cap_P[];
+
+#  define SPARC_DES_CAPABLE       (OPENSSL_sparcv9cap_P[1] & CFR_DES)
+
+void des_t4_key_expand(const void *key, DES_key_schedule *ks);
+void des_t4_cbc_encrypt(const void *inp, void *out, size_t len,
+                        DES_key_schedule *ks, unsigned char iv[8]);
+void des_t4_cbc_decrypt(const void *inp, void *out, size_t len,
+                        DES_key_schedule *ks, unsigned char iv[8]);
+# endif
+
 static int des_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
                         const unsigned char *iv, int enc);
 static int des_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr);
@@ -102,6 +128,12 @@ static int des_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
 static int des_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
                           const unsigned char *in, size_t inl)
 {
+    EVP_DES_KEY *dat = (EVP_DES_KEY *) ctx->cipher_data;
+
+    if (dat->stream.cbc) {
+        (*dat->stream.cbc) (in, out, inl, &dat->ks.ks, ctx->iv);
+        return 1;
+    }
     while (inl >= EVP_MAXCHUNK) {
         DES_ncbc_encrypt(in, out, (long)EVP_MAXCHUNK, ctx->cipher_data,
                          (DES_cblock *)ctx->iv, ctx->encrypt);
@@ -179,16 +211,15 @@ static int des_cfb8_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
     return 1;
 }
 
-BLOCK_CIPHER_defs(des, DES_key_schedule, NID_des, 8, 8, 8, 64,
+BLOCK_CIPHER_defs(des, EVP_DES_KEY, NID_des, 8, 8, 8, 64,
                   EVP_CIPH_RAND_KEY, des_init_key, NULL,
                   EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des_ctrl)
 
+    BLOCK_CIPHER_def_cfb(des, EVP_DES_KEY, NID_des, 8, 8, 1,
+                     EVP_CIPH_RAND_KEY, des_init_key, NULL,
+                     EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des_ctrl)
 
-BLOCK_CIPHER_def_cfb(des, DES_key_schedule, NID_des, 8, 8, 1,
-                 EVP_CIPH_RAND_KEY, des_init_key, NULL,
-                 EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des_ctrl)
-
-BLOCK_CIPHER_def_cfb(des, DES_key_schedule, NID_des, 8, 8, 8,
+    BLOCK_CIPHER_def_cfb(des, EVP_DES_KEY, NID_des, 8, 8, 8,
                      EVP_CIPH_RAND_KEY, des_init_key, NULL,
                      EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des_ctrl)
 
@@ -196,8 +227,22 @@ static int des_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
                         const unsigned char *iv, int enc)
 {
     DES_cblock *deskey = (DES_cblock *)key;
+    EVP_DES_KEY *dat = (EVP_DES_KEY *) ctx->cipher_data;
+
+    dat->stream.cbc = NULL;
+# if defined(SPARC_DES_CAPABLE)
+    if (SPARC_DES_CAPABLE) {
+        int mode = ctx->cipher->flags & EVP_CIPH_MODE;
+
+        if (mode == EVP_CIPH_CBC_MODE) {
+            des_t4_key_expand(key, &dat->ks.ks);
+            dat->stream.cbc = enc ? des_t4_cbc_encrypt : des_t4_cbc_decrypt;
+            return 1;
+        }
+    }
+# endif
 # ifdef EVP_CHECK_DES_KEY
-    if (DES_set_key_checked(deskey, ctx->cipher_data) != 0)
+    if (DES_set_key_checked(deskey, dat->ks.ks) != 0)
         return 0;
 # else
     DES_set_key_unchecked(deskey, ctx->cipher_data);
index 07a5aca..96f272e 100644 (file)
 # include <openssl/des.h>
 # include <openssl/rand.h>
 
-# ifndef OPENSSL_FIPS
+/* Block use of implementations in FIPS mode */
+# undef EVP_CIPH_FLAG_FIPS
+# define EVP_CIPH_FLAG_FIPS      0
+
+typedef struct {
+    union {
+        double align;
+        DES_key_schedule ks[3];
+    } ks;
+    union {
+        void (*cbc) (const void *, void *, size_t, const void *, void *);
+    } stream;
+} DES_EDE_KEY;
+# define ks1 ks.ks[0]
+# define ks2 ks.ks[1]
+# define ks3 ks.ks[2]
+
+# if defined(AES_ASM) && (defined(__sparc) || defined(__sparc__))
+/* ---------^^^ this is not a typo, just a way to detect that
+ * assembler support was in general requested... */
+#  include "sparc_arch.h"
+
+extern unsigned int OPENSSL_sparcv9cap_P[];
+
+#  define SPARC_DES_CAPABLE       (OPENSSL_sparcv9cap_P[1] & CFR_DES)
+
+void des_t4_key_expand(const void *key, DES_key_schedule *ks);
+void des_t4_ede3_cbc_encrypt(const void *inp, void *out, size_t len,
+                             DES_key_schedule *ks, unsigned char iv[8]);
+void des_t4_ede3_cbc_decrypt(const void *inp, void *out, size_t len,
+                             DES_key_schedule *ks, unsigned char iv[8]);
+# endif
 
 static int des_ede_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
                             const unsigned char *iv, int enc);
@@ -75,13 +106,7 @@ static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
 
 static int des3_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr);
 
-typedef struct {
-    DES_key_schedule ks1;       /* key schedule */
-    DES_key_schedule ks2;       /* key schedule (for ede) */
-    DES_key_schedule ks3;       /* key schedule (for ede3) */
-} DES_EDE_KEY;
-
-#  define data(ctx) ((DES_EDE_KEY *)(ctx)->cipher_data)
+# define data(ctx) ((DES_EDE_KEY *)(ctx)->cipher_data)
 
 /*
  * Because of various casts and different args can't use
@@ -123,7 +148,9 @@ static int des_ede_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
 static int des_ede_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
                               const unsigned char *in, size_t inl)
 {
-#  ifdef KSSL_DEBUG
+    DES_EDE_KEY *dat = data(ctx);
+
+# ifdef KSSL_DEBUG
     {
         int i;
         fprintf(stderr, "des_ede_cbc_cipher(ctx=%p, buflen=%d)\n", ctx,
@@ -133,21 +160,24 @@ static int des_ede_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
             fprintf(stderr, "%02X", ctx->iv[i]);
         fprintf(stderr, "\n");
     }
-#  endif                        /* KSSL_DEBUG */
+# endif                         /* KSSL_DEBUG */
+    if (dat->stream.cbc) {
+        (*dat->stream.cbc) (in, out, inl, &dat->ks, ctx->iv);
+        return 1;
+    }
+
     while (inl >= EVP_MAXCHUNK) {
         DES_ede3_cbc_encrypt(in, out, (long)EVP_MAXCHUNK,
-                             &data(ctx)->ks1, &data(ctx)->ks2,
-                             &data(ctx)->ks3, (DES_cblock *)ctx->iv,
-                             ctx->encrypt);
+                             &dat->ks1, &dat->ks2, &dat->ks3,
+                             (DES_cblock *)ctx->iv, ctx->encrypt);
         inl -= EVP_MAXCHUNK;
         in += EVP_MAXCHUNK;
         out += EVP_MAXCHUNK;
     }
     if (inl)
         DES_ede3_cbc_encrypt(in, out, (long)inl,
-                             &data(ctx)->ks1, &data(ctx)->ks2,
-                             &data(ctx)->ks3, (DES_cblock *)ctx->iv,
-                             ctx->encrypt);
+                             &dat->ks1, &dat->ks2, &dat->ks3,
+                             (DES_cblock *)ctx->iv, ctx->encrypt);
     return 1;
 }
 
@@ -215,39 +245,57 @@ static int des_ede3_cfb8_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
 }
 
 BLOCK_CIPHER_defs(des_ede, DES_EDE_KEY, NID_des_ede, 8, 16, 8, 64,
-                  EVP_CIPH_RAND_KEY, des_ede_init_key, NULL,
-                  EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des3_ctrl)
-#  define des_ede3_cfb64_cipher des_ede_cfb64_cipher
-#  define des_ede3_ofb_cipher des_ede_ofb_cipher
-#  define des_ede3_cbc_cipher des_ede_cbc_cipher
-#  define des_ede3_ecb_cipher des_ede_ecb_cipher
+                  EVP_CIPH_RAND_KEY | EVP_CIPH_FLAG_DEFAULT_ASN1,
+                  des_ede_init_key, NULL, NULL, NULL, des3_ctrl)
+# define des_ede3_cfb64_cipher des_ede_cfb64_cipher
+# define des_ede3_ofb_cipher des_ede_ofb_cipher
+# define des_ede3_cbc_cipher des_ede_cbc_cipher
+# define des_ede3_ecb_cipher des_ede_ecb_cipher
     BLOCK_CIPHER_defs(des_ede3, DES_EDE_KEY, NID_des_ede3, 8, 24, 8, 64,
-                  EVP_CIPH_RAND_KEY, des_ede3_init_key, NULL,
-                  EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des3_ctrl)
+                  EVP_CIPH_RAND_KEY | EVP_CIPH_FLAG_FIPS |
+                  EVP_CIPH_FLAG_DEFAULT_ASN1, des_ede3_init_key, NULL, NULL, NULL,
+                  des3_ctrl)
 
     BLOCK_CIPHER_def_cfb(des_ede3, DES_EDE_KEY, NID_des_ede3, 24, 8, 1,
-                     EVP_CIPH_RAND_KEY, des_ede3_init_key, NULL,
-                     EVP_CIPHER_set_asn1_iv,
-                     EVP_CIPHER_get_asn1_iv, des3_ctrl)
+                     EVP_CIPH_RAND_KEY | EVP_CIPH_FLAG_FIPS |
+                     EVP_CIPH_FLAG_DEFAULT_ASN1, des_ede3_init_key, NULL, NULL,
+                     NULL, des3_ctrl)
 
     BLOCK_CIPHER_def_cfb(des_ede3, DES_EDE_KEY, NID_des_ede3, 24, 8, 8,
-                     EVP_CIPH_RAND_KEY, des_ede3_init_key, NULL,
-                     EVP_CIPHER_set_asn1_iv,
-                     EVP_CIPHER_get_asn1_iv, des3_ctrl)
+                     EVP_CIPH_RAND_KEY | EVP_CIPH_FLAG_FIPS |
+                     EVP_CIPH_FLAG_DEFAULT_ASN1, des_ede3_init_key, NULL, NULL,
+                     NULL, des3_ctrl)
 
 static int des_ede_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
                             const unsigned char *iv, int enc)
 {
     DES_cblock *deskey = (DES_cblock *)key;
-#  ifdef EVP_CHECK_DES_KEY
-    if (DES_set_key_checked(&deskey[0], &data(ctx)->ks1)
-        ! !DES_set_key_checked(&deskey[1], &data(ctx)->ks2))
+    DES_EDE_KEY *dat = data(ctx);
+
+    dat->stream.cbc = NULL;
+# if defined(SPARC_DES_CAPABLE)
+    if (SPARC_DES_CAPABLE) {
+        int mode = ctx->cipher->flags & EVP_CIPH_MODE;
+
+        if (mode == EVP_CIPH_CBC_MODE) {
+            des_t4_key_expand(&deskey[0], &dat->ks1);
+            des_t4_key_expand(&deskey[1], &dat->ks2);
+            memcpy(&dat->ks3, &dat->ks1, sizeof(dat->ks1));
+            dat->stream.cbc = enc ? des_t4_ede3_cbc_encrypt :
+                des_t4_ede3_cbc_decrypt;
+            return 1;
+        }
+    }
+# endif
+# ifdef EVP_CHECK_DES_KEY
+    if (DES_set_key_checked(&deskey[0], &dat->ks1)
+        ! !DES_set_key_checked(&deskey[1], &dat->ks2))
         return 0;
-#  else
-    DES_set_key_unchecked(&deskey[0], &data(ctx)->ks1);
-    DES_set_key_unchecked(&deskey[1], &data(ctx)->ks2);
-#  endif
-    memcpy(&data(ctx)->ks3, &data(ctx)->ks1, sizeof(data(ctx)->ks1));
+# else
+    DES_set_key_unchecked(&deskey[0], &dat->ks1);
+    DES_set_key_unchecked(&deskey[1], &dat->ks2);
+# endif
+    memcpy(&dat->ks3, &dat->ks1, sizeof(dat->ks1));
     return 1;
 }
 
@@ -255,7 +303,9 @@ static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
                              const unsigned char *iv, int enc)
 {
     DES_cblock *deskey = (DES_cblock *)key;
-#  ifdef KSSL_DEBUG
+    DES_EDE_KEY *dat = data(ctx);
+
+# ifdef KSSL_DEBUG
     {
         int i;
         fprintf(stderr, "des_ede3_init_key(ctx=%p)\n", ctx);
@@ -270,18 +320,33 @@ static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
             fprintf(stderr, "\n");
         }
     }
-#  endif                        /* KSSL_DEBUG */
+# endif                         /* KSSL_DEBUG */
+
+    dat->stream.cbc = NULL;
+# if defined(SPARC_DES_CAPABLE)
+    if (SPARC_DES_CAPABLE) {
+        int mode = ctx->cipher->flags & EVP_CIPH_MODE;
 
-#  ifdef EVP_CHECK_DES_KEY
-    if (DES_set_key_checked(&deskey[0], &data(ctx)->ks1)
-        || DES_set_key_checked(&deskey[1], &data(ctx)->ks2)
-        || DES_set_key_checked(&deskey[2], &data(ctx)->ks3))
+        if (mode == EVP_CIPH_CBC_MODE) {
+            des_t4_key_expand(&deskey[0], &dat->ks1);
+            des_t4_key_expand(&deskey[1], &dat->ks2);
+            des_t4_key_expand(&deskey[2], &dat->ks3);
+            dat->stream.cbc = enc ? des_t4_ede3_cbc_encrypt :
+                des_t4_ede3_cbc_decrypt;
+            return 1;
+        }
+    }
+# endif
+# ifdef EVP_CHECK_DES_KEY
+    if (DES_set_key_checked(&deskey[0], &dat->ks1)
+        || DES_set_key_checked(&deskey[1], &dat->ks2)
+        || DES_set_key_checked(&deskey[2], &dat->ks3))
         return 0;
-#  else
-    DES_set_key_unchecked(&deskey[0], &data(ctx)->ks1);
-    DES_set_key_unchecked(&deskey[1], &data(ctx)->ks2);
-    DES_set_key_unchecked(&deskey[2], &data(ctx)->ks3);
-#  endif
+# else
+    DES_set_key_unchecked(&deskey[0], &dat->ks1);
+    DES_set_key_unchecked(&deskey[1], &dat->ks2);
+    DES_set_key_unchecked(&deskey[2], &dat->ks3);
+# endif
     return 1;
 }
 
@@ -315,5 +380,115 @@ const EVP_CIPHER *EVP_des_ede3(void)
 {
     return &des_ede3_ecb;
 }
+
+# ifndef OPENSSL_NO_SHA
+
+#  include <openssl/sha.h>
+
+static const unsigned char wrap_iv[8] =
+    { 0x4a, 0xdd, 0xa2, 0x2c, 0x79, 0xe8, 0x21, 0x05 };
+
+static int des_ede3_unwrap(EVP_CIPHER_CTX *ctx, unsigned char *out,
+                           const unsigned char *in, size_t inl)
+{
+    unsigned char icv[8], iv[8], sha1tmp[SHA_DIGEST_LENGTH];
+    int rv = -1;
+    if (inl < 24)
+        return -1;
+    if (!out)
+        return inl - 16;
+    memcpy(ctx->iv, wrap_iv, 8);
+    /* Decrypt first block which will end up as icv */
+    des_ede_cbc_cipher(ctx, icv, in, 8);
+    /* Decrypt central blocks */
+    /*
+     * If decrypting in place move whole output along a block so the next
+     * des_ede_cbc_cipher is in place.
+     */
+    if (out == in) {
+        memmove(out, out + 8, inl - 8);
+        in -= 8;
+    }
+    des_ede_cbc_cipher(ctx, out, in + 8, inl - 16);
+    /* Decrypt final block which will be IV */
+    des_ede_cbc_cipher(ctx, iv, in + inl - 8, 8);
+    /* Reverse order of everything */
+    BUF_reverse(icv, NULL, 8);
+    BUF_reverse(out, NULL, inl - 16);
+    BUF_reverse(ctx->iv, iv, 8);
+    /* Decrypt again using new IV */
+    des_ede_cbc_cipher(ctx, out, out, inl - 16);
+    des_ede_cbc_cipher(ctx, icv, icv, 8);
+    /* Work out SHA1 hash of first portion */
+    SHA1(out, inl - 16, sha1tmp);
+
+    if (!CRYPTO_memcmp(sha1tmp, icv, 8))
+        rv = inl - 16;
+    OPENSSL_cleanse(icv, 8);
+    OPENSSL_cleanse(sha1tmp, SHA_DIGEST_LENGTH);
+    OPENSSL_cleanse(iv, 8);
+    OPENSSL_cleanse(ctx->iv, 8);
+    if (rv == -1)
+        OPENSSL_cleanse(out, inl - 16);
+
+    return rv;
+}
+
+static int des_ede3_wrap(EVP_CIPHER_CTX *ctx, unsigned char *out,
+                         const unsigned char *in, size_t inl)
+{
+    unsigned char sha1tmp[SHA_DIGEST_LENGTH];
+    if (!out)
+        return inl + 16;
+    /* Copy input to output buffer + 8 so we have space for IV */
+    memmove(out + 8, in, inl);
+    /* Work out ICV */
+    SHA1(in, inl, sha1tmp);
+    memcpy(out + inl + 8, sha1tmp, 8);
+    OPENSSL_cleanse(sha1tmp, SHA_DIGEST_LENGTH);
+    /* Generate random IV */
+    if (RAND_bytes(ctx->iv, 8) <= 0)
+        return -1;
+    memcpy(out, ctx->iv, 8);
+    /* Encrypt everything after IV in place */
+    des_ede_cbc_cipher(ctx, out + 8, out + 8, inl + 8);
+    BUF_reverse(out, NULL, inl + 16);
+    memcpy(ctx->iv, wrap_iv, 8);
+    des_ede_cbc_cipher(ctx, out, out, inl + 16);
+    return inl + 16;
+}
+
+static int des_ede3_wrap_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
+                                const unsigned char *in, size_t inl)
+{
+    /*
+     * Sanity check input length: we typically only wrap keys so EVP_MAXCHUNK
+     * is more than will ever be needed. Also input length must be a multiple
+     * of 8 bits.
+     */
+    if (inl >= EVP_MAXCHUNK || inl % 8)
+        return -1;
+    if (ctx->encrypt)
+        return des_ede3_wrap(ctx, out, in, inl);
+    else
+        return des_ede3_unwrap(ctx, out, in, inl);
+}
+
+static const EVP_CIPHER des3_wrap = {
+    NID_id_smime_alg_CMS3DESwrap,
+    8, 24, 0,
+    EVP_CIPH_WRAP_MODE | EVP_CIPH_CUSTOM_IV | EVP_CIPH_FLAG_CUSTOM_CIPHER
+        | EVP_CIPH_FLAG_DEFAULT_ASN1,
+    des_ede3_init_key, des_ede3_wrap_cipher,
+    NULL,
+    sizeof(DES_EDE_KEY),
+    NULL, NULL, NULL, NULL
+};
+
+const EVP_CIPHER *EVP_des_ede3_wrap(void)
+{
+    return &des3_wrap;
+}
+
 # endif
 #endif
index af90ce3..599fcb8 100644 (file)
@@ -61,8 +61,6 @@
 #include <openssl/evp.h>
 #include <openssl/objects.h>
 
-#ifndef OPENSSL_FIPS
-
 static int null_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
                          const unsigned char *iv, int enc);
 static int null_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
@@ -100,4 +98,3 @@ static int null_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
         memcpy((char *)out, (const char *)in, inl);
     return 1;
 }
-#endif
index 5c5988f..c361d1f 100644 (file)
@@ -248,7 +248,7 @@ int EVP_DecodeUpdate(EVP_ENCODE_CTX *ctx, unsigned char *out, int *outl,
 
     /* We parse the input data */
     for (i = 0; i < inl; i++) {
-        /* If the current line is > 80 characters, scream alot */
+        /* If the current line is > 80 characters, scream a lot */
         if (ln >= 80) {
             rv = -1;
             goto end;
index 6cf98ac..39ab793 100644 (file)
 # define EVP_PKEY_DSA3   NID_dsaWithSHA1
 # define EVP_PKEY_DSA4   NID_dsaWithSHA1_2
 # define EVP_PKEY_DH     NID_dhKeyAgreement
+# define EVP_PKEY_DHX    NID_dhpublicnumber
 # define EVP_PKEY_EC     NID_X9_62_id_ecPublicKey
 # define EVP_PKEY_HMAC   NID_hmac
 # define EVP_PKEY_CMAC   NID_cmac
@@ -345,6 +346,7 @@ struct evp_cipher_st {
 # define         EVP_CIPH_GCM_MODE               0x6
 # define         EVP_CIPH_CCM_MODE               0x7
 # define         EVP_CIPH_XTS_MODE               0x10001
+# define         EVP_CIPH_WRAP_MODE              0x10002
 # define         EVP_CIPH_MODE                   0xF0007
 /* Set if variable length cipher */
 # define         EVP_CIPH_VARIABLE_LENGTH        0x8
@@ -375,6 +377,14 @@ struct evp_cipher_st {
  */
 # define         EVP_CIPH_FLAG_CUSTOM_CIPHER     0x100000
 # define         EVP_CIPH_FLAG_AEAD_CIPHER       0x200000
+# define         EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK 0x400000
+
+/*
+ * Cipher context flag to indicate we can handle wrap mode: if allowed in
+ * older applications it could overflow buffers.
+ */
+
+# define         EVP_CIPHER_CTX_FLAG_WRAP_ALLOW  0x1
 
 /* ctrl() values */
 
@@ -408,9 +418,21 @@ struct evp_cipher_st {
 /* Set the GCM invocation field, decrypt only */
 # define         EVP_CTRL_GCM_SET_IV_INV         0x18
 
+# define         EVP_CTRL_TLS1_1_MULTIBLOCK_AAD  0x19
+# define         EVP_CTRL_TLS1_1_MULTIBLOCK_ENCRYPT      0x1a
+# define         EVP_CTRL_TLS1_1_MULTIBLOCK_DECRYPT      0x1b
+# define         EVP_CTRL_TLS1_1_MULTIBLOCK_MAX_BUFSIZE  0x1c
+
 /* RFC 5246 defines additional data to be 13 bytes in length */
 # define         EVP_AEAD_TLS1_AAD_LEN           13
 
+typedef struct {
+    unsigned char *out;
+    const unsigned char *inp;
+    size_t len;
+    unsigned int interleave;
+} EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM;
+
 /* GCM TLS constants */
 /* Length of fixed part of IV derived from PRF */
 # define EVP_GCM_TLS_FIXED_IV_LEN                        4
@@ -639,7 +661,8 @@ int EVP_DigestSignFinal(EVP_MD_CTX *ctx,
 
 int EVP_DigestVerifyInit(EVP_MD_CTX *ctx, EVP_PKEY_CTX **pctx,
                          const EVP_MD *type, ENGINE *e, EVP_PKEY *pkey);
-int EVP_DigestVerifyFinal(EVP_MD_CTX *ctx, unsigned char *sig, size_t siglen);
+int EVP_DigestVerifyFinal(EVP_MD_CTX *ctx,
+                          const unsigned char *sig, size_t siglen);
 
 int EVP_OpenInit(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *type,
                  const unsigned char *ek, int ekl, const unsigned char *iv,
@@ -744,6 +767,7 @@ const EVP_CIPHER *EVP_des_cbc(void);
 const EVP_CIPHER *EVP_des_ede_cbc(void);
 const EVP_CIPHER *EVP_des_ede3_cbc(void);
 const EVP_CIPHER *EVP_desx_cbc(void);
+const EVP_CIPHER *EVP_des_ede3_wrap(void);
 /*
  * This should now be supported through the dev_crypto ENGINE. But also, why
  * are rc4 and md5 declarations made here inside a "NO_DES" precompiler
@@ -813,6 +837,7 @@ const EVP_CIPHER *EVP_aes_128_ctr(void);
 const EVP_CIPHER *EVP_aes_128_ccm(void);
 const EVP_CIPHER *EVP_aes_128_gcm(void);
 const EVP_CIPHER *EVP_aes_128_xts(void);
+const EVP_CIPHER *EVP_aes_128_wrap(void);
 const EVP_CIPHER *EVP_aes_192_ecb(void);
 const EVP_CIPHER *EVP_aes_192_cbc(void);
 const EVP_CIPHER *EVP_aes_192_cfb1(void);
@@ -823,6 +848,7 @@ const EVP_CIPHER *EVP_aes_192_ofb(void);
 const EVP_CIPHER *EVP_aes_192_ctr(void);
 const EVP_CIPHER *EVP_aes_192_ccm(void);
 const EVP_CIPHER *EVP_aes_192_gcm(void);
+const EVP_CIPHER *EVP_aes_192_wrap(void);
 const EVP_CIPHER *EVP_aes_256_ecb(void);
 const EVP_CIPHER *EVP_aes_256_cbc(void);
 const EVP_CIPHER *EVP_aes_256_cfb1(void);
@@ -834,10 +860,15 @@ const EVP_CIPHER *EVP_aes_256_ctr(void);
 const EVP_CIPHER *EVP_aes_256_ccm(void);
 const EVP_CIPHER *EVP_aes_256_gcm(void);
 const EVP_CIPHER *EVP_aes_256_xts(void);
+const EVP_CIPHER *EVP_aes_256_wrap(void);
 #  if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA1)
 const EVP_CIPHER *EVP_aes_128_cbc_hmac_sha1(void);
 const EVP_CIPHER *EVP_aes_256_cbc_hmac_sha1(void);
 #  endif
+#  ifndef OPENSSL_NO_SHA256
+const EVP_CIPHER *EVP_aes_128_cbc_hmac_sha256(void);
+const EVP_CIPHER *EVP_aes_256_cbc_hmac_sha256(void);
+#  endif
 # endif
 # ifndef OPENSSL_NO_CAMELLIA
 const EVP_CIPHER *EVP_camellia_128_ecb(void);
@@ -1028,6 +1059,7 @@ void EVP_PBE_cleanup(void);
 # define ASN1_PKEY_CTRL_DEFAULT_MD_NID   0x3
 # define ASN1_PKEY_CTRL_CMS_SIGN         0x5
 # define ASN1_PKEY_CTRL_CMS_ENVELOPE     0x7
+# define ASN1_PKEY_CTRL_CMS_RI_TYPE      0x8
 
 int EVP_PKEY_asn1_get_count(void);
 const EVP_PKEY_ASN1_METHOD *EVP_PKEY_asn1_get0(int idx);
@@ -1091,6 +1123,19 @@ void EVP_PKEY_asn1_set_free(EVP_PKEY_ASN1_METHOD *ameth,
 void EVP_PKEY_asn1_set_ctrl(EVP_PKEY_ASN1_METHOD *ameth,
                             int (*pkey_ctrl) (EVP_PKEY *pkey, int op,
                                               long arg1, void *arg2));
+void EVP_PKEY_asn1_set_item(EVP_PKEY_ASN1_METHOD *ameth,
+                            int (*item_verify) (EVP_MD_CTX *ctx,
+                                                const ASN1_ITEM *it,
+                                                void *asn,
+                                                X509_ALGOR *a,
+                                                ASN1_BIT_STRING *sig,
+                                                EVP_PKEY *pkey),
+                            int (*item_sign) (EVP_MD_CTX *ctx,
+                                              const ASN1_ITEM *it,
+                                              void *asn,
+                                              X509_ALGOR *alg1,
+                                              X509_ALGOR *alg2,
+                                              ASN1_BIT_STRING *sig));
 
 # define EVP_PKEY_OP_UNDEFINED           0
 # define EVP_PKEY_OP_PARAMGEN            (1<<1)
@@ -1121,6 +1166,10 @@ void EVP_PKEY_asn1_set_ctrl(EVP_PKEY_ASN1_METHOD *ameth,
                 EVP_PKEY_CTX_ctrl(ctx, -1, EVP_PKEY_OP_TYPE_SIG,  \
                                         EVP_PKEY_CTRL_MD, 0, (void *)md)
 
+# define  EVP_PKEY_CTX_get_signature_md(ctx, pmd)        \
+                EVP_PKEY_CTX_ctrl(ctx, -1, EVP_PKEY_OP_TYPE_SIG,  \
+                                        EVP_PKEY_CTRL_GET_MD, 0, (void *)pmd)
+
 # define EVP_PKEY_CTRL_MD                1
 # define EVP_PKEY_CTRL_PEER_KEY          2
 
@@ -1142,6 +1191,8 @@ void EVP_PKEY_asn1_set_ctrl(EVP_PKEY_ASN1_METHOD *ameth,
 
 # define EVP_PKEY_CTRL_CIPHER            12
 
+# define EVP_PKEY_CTRL_GET_MD            13
+
 # define EVP_PKEY_ALG_CTRL               0x1000
 
 # define EVP_PKEY_FLAG_AUTOARGLEN        2
@@ -1327,11 +1378,13 @@ void ERR_load_EVP_strings(void);
 # define EVP_F_AESNI_INIT_KEY                             165
 # define EVP_F_AESNI_XTS_CIPHER                           176
 # define EVP_F_AES_INIT_KEY                               133
+# define EVP_F_AES_T4_INIT_KEY                            178
 # define EVP_F_AES_XTS                                    172
 # define EVP_F_AES_XTS_CIPHER                             175
 # define EVP_F_ALG_MODULE_INIT                            177
 # define EVP_F_CAMELLIA_INIT_KEY                          159
 # define EVP_F_CMAC_INIT                                  173
+# define EVP_F_CMLL_T4_INIT_KEY                           179
 # define EVP_F_D2I_PKEY                                   100
 # define EVP_F_DO_SIGVER_INIT                             161
 # define EVP_F_DSAPKEY2PKCS8                              134
@@ -1471,6 +1524,7 @@ void ERR_load_EVP_strings(void);
 # define EVP_R_UNSUPPORTED_PRF                            125
 # define EVP_R_UNSUPPORTED_PRIVATE_KEY_ALGORITHM          118
 # define EVP_R_UNSUPPORTED_SALT_TYPE                      126
+# define EVP_R_WRAP_MODE_NOT_ALLOWED                      170
 # define EVP_R_WRONG_FINAL_BLOCK_LENGTH                   109
 # define EVP_R_WRONG_PUBLIC_KEY_TYPE                      110
 
index 4e983c4..65f0e02 100644 (file)
@@ -169,8 +169,14 @@ int EVP_CipherInit_ex(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher,
 #endif
 
 #ifdef OPENSSL_FIPS
-        if (FIPS_mode())
+        if (FIPS_mode()) {
+            const EVP_CIPHER *fcipher;
+            if (cipher)
+                fcipher = evp_get_fips_cipher(cipher);
+            if (fcipher)
+                cipher = fcipher;
             return FIPS_cipherinit(ctx, cipher, key, iv, enc);
+        }
 #endif
         ctx->cipher = cipher;
         if (ctx->cipher->ctx_size) {
@@ -183,7 +189,8 @@ int EVP_CipherInit_ex(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher,
             ctx->cipher_data = NULL;
         }
         ctx->key_len = cipher->key_len;
-        ctx->flags = 0;
+        /* Preserve wrap enable flag, zero everything else */
+        ctx->flags &= EVP_CIPHER_CTX_FLAG_WRAP_ALLOW;
         if (ctx->cipher->flags & EVP_CIPH_CTRL_INIT) {
             if (!EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_INIT, 0, NULL)) {
                 EVPerr(EVP_F_EVP_CIPHERINIT_EX, EVP_R_INITIALIZATION_ERROR);
@@ -206,6 +213,12 @@ int EVP_CipherInit_ex(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher,
                    || ctx->cipher->block_size == 8
                    || ctx->cipher->block_size == 16);
 
+    if (!(ctx->flags & EVP_CIPHER_CTX_FLAG_WRAP_ALLOW)
+        && EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_WRAP_MODE) {
+        EVPerr(EVP_F_EVP_CIPHERINIT_EX, EVP_R_WRAP_MODE_NOT_ALLOWED);
+        return 0;
+    }
+
     if (!(EVP_CIPHER_CTX_flags(ctx) & EVP_CIPH_CUSTOM_IV)) {
         switch (EVP_CIPHER_CTX_mode(ctx)) {
 
index 686a699..15cf553 100644 (file)
@@ -1,6 +1,6 @@
 /* crypto/evp/evp_err.c */
 /* ====================================================================
- * Copyright (c) 1999-2011 The OpenSSL Project.  All rights reserved.
+ * Copyright (c) 1999-2013 The OpenSSL Project.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -73,11 +73,13 @@ static ERR_STRING_DATA EVP_str_functs[] = {
     {ERR_FUNC(EVP_F_AESNI_INIT_KEY), "AESNI_INIT_KEY"},
     {ERR_FUNC(EVP_F_AESNI_XTS_CIPHER), "AESNI_XTS_CIPHER"},
     {ERR_FUNC(EVP_F_AES_INIT_KEY), "AES_INIT_KEY"},
+    {ERR_FUNC(EVP_F_AES_T4_INIT_KEY), "AES_T4_INIT_KEY"},
     {ERR_FUNC(EVP_F_AES_XTS), "AES_XTS"},
     {ERR_FUNC(EVP_F_AES_XTS_CIPHER), "AES_XTS_CIPHER"},
     {ERR_FUNC(EVP_F_ALG_MODULE_INIT), "ALG_MODULE_INIT"},
     {ERR_FUNC(EVP_F_CAMELLIA_INIT_KEY), "CAMELLIA_INIT_KEY"},
     {ERR_FUNC(EVP_F_CMAC_INIT), "CMAC_INIT"},
+    {ERR_FUNC(EVP_F_CMLL_T4_INIT_KEY), "CMLL_T4_INIT_KEY"},
     {ERR_FUNC(EVP_F_D2I_PKEY), "D2I_PKEY"},
     {ERR_FUNC(EVP_F_DO_SIGVER_INIT), "DO_SIGVER_INIT"},
     {ERR_FUNC(EVP_F_DSAPKEY2PKCS8), "DSAPKEY2PKCS8"},
@@ -232,6 +234,7 @@ static ERR_STRING_DATA EVP_str_reasons[] = {
     {ERR_REASON(EVP_R_UNSUPPORTED_PRIVATE_KEY_ALGORITHM),
      "unsupported private key algorithm"},
     {ERR_REASON(EVP_R_UNSUPPORTED_SALT_TYPE), "unsupported salt type"},
+    {ERR_REASON(EVP_R_WRAP_MODE_NOT_ALLOWED), "wrap mode not allowed"},
     {ERR_REASON(EVP_R_WRONG_FINAL_BLOCK_LENGTH), "wrong final block length"},
     {ERR_REASON(EVP_R_WRONG_PUBLIC_KEY_TYPE), "wrong public key type"},
     {0, NULL}
index 21688b0..0f7b011 100644 (file)
@@ -345,7 +345,7 @@ static int test_EVP_DigestVerifyInit(void)
     if (pkey == NULL ||
         !EVP_DigestVerifyInit(&md_ctx, NULL, EVP_sha256(), NULL, pkey) ||
         !EVP_DigestVerifyUpdate(&md_ctx, kMsg, sizeof(kMsg)) ||
-        !EVP_DigestVerifyFinal(&md_ctx, (unsigned char *)kSignature, sizeof(kSignature))) {
+        !EVP_DigestVerifyFinal(&md_ctx, kSignature, sizeof(kSignature))) {
         goto out;
     }
     ret = 1;
diff --git a/crypto/evp/evp_fips.c b/crypto/evp/evp_fips.c
deleted file mode 100644 (file)
index 71a32fc..0000000
+++ /dev/null
@@ -1,310 +0,0 @@
-/* crypto/evp/evp_fips.c */
-/*
- * Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
- * project.
- */
-/* ====================================================================
- * Copyright (c) 2011 The OpenSSL Project.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * 3. All advertising materials mentioning features or use of this
- *    software must display the following acknowledgment:
- *    "This product includes software developed by the OpenSSL Project
- *    for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
- *
- * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
- *    endorse or promote products derived from this software without
- *    prior written permission. For written permission, please contact
- *    licensing@OpenSSL.org.
- *
- * 5. Products derived from this software may not be called "OpenSSL"
- *    nor may "OpenSSL" appear in their names without prior written
- *    permission of the OpenSSL Project.
- *
- * 6. Redistributions of any form whatsoever must retain the following
- *    acknowledgment:
- *    "This product includes software developed by the OpenSSL Project
- *    for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
- *
- * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
- * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
- * OF THE POSSIBILITY OF SUCH DAMAGE.
- * ====================================================================
- */
-
-#include <openssl/evp.h>
-
-#ifdef OPENSSL_FIPS
-# include <openssl/fips.h>
-
-const EVP_CIPHER *EVP_aes_128_cbc(void)
-{
-    return FIPS_evp_aes_128_cbc();
-}
-
-const EVP_CIPHER *EVP_aes_128_ccm(void)
-{
-    return FIPS_evp_aes_128_ccm();
-}
-
-const EVP_CIPHER *EVP_aes_128_cfb1(void)
-{
-    return FIPS_evp_aes_128_cfb1();
-}
-
-const EVP_CIPHER *EVP_aes_128_cfb128(void)
-{
-    return FIPS_evp_aes_128_cfb128();
-}
-
-const EVP_CIPHER *EVP_aes_128_cfb8(void)
-{
-    return FIPS_evp_aes_128_cfb8();
-}
-
-const EVP_CIPHER *EVP_aes_128_ctr(void)
-{
-    return FIPS_evp_aes_128_ctr();
-}
-
-const EVP_CIPHER *EVP_aes_128_ecb(void)
-{
-    return FIPS_evp_aes_128_ecb();
-}
-
-const EVP_CIPHER *EVP_aes_128_gcm(void)
-{
-    return FIPS_evp_aes_128_gcm();
-}
-
-const EVP_CIPHER *EVP_aes_128_ofb(void)
-{
-    return FIPS_evp_aes_128_ofb();
-}
-
-const EVP_CIPHER *EVP_aes_128_xts(void)
-{
-    return FIPS_evp_aes_128_xts();
-}
-
-const EVP_CIPHER *EVP_aes_192_cbc(void)
-{
-    return FIPS_evp_aes_192_cbc();
-}
-
-const EVP_CIPHER *EVP_aes_192_ccm(void)
-{
-    return FIPS_evp_aes_192_ccm();
-}
-
-const EVP_CIPHER *EVP_aes_192_cfb1(void)
-{
-    return FIPS_evp_aes_192_cfb1();
-}
-
-const EVP_CIPHER *EVP_aes_192_cfb128(void)
-{
-    return FIPS_evp_aes_192_cfb128();
-}
-
-const EVP_CIPHER *EVP_aes_192_cfb8(void)
-{
-    return FIPS_evp_aes_192_cfb8();
-}
-
-const EVP_CIPHER *EVP_aes_192_ctr(void)
-{
-    return FIPS_evp_aes_192_ctr();
-}
-
-const EVP_CIPHER *EVP_aes_192_ecb(void)
-{
-    return FIPS_evp_aes_192_ecb();
-}
-
-const EVP_CIPHER *EVP_aes_192_gcm(void)
-{
-    return FIPS_evp_aes_192_gcm();
-}
-
-const EVP_CIPHER *EVP_aes_192_ofb(void)
-{
-    return FIPS_evp_aes_192_ofb();
-}
-
-const EVP_CIPHER *EVP_aes_256_cbc(void)
-{
-    return FIPS_evp_aes_256_cbc();
-}
-
-const EVP_CIPHER *EVP_aes_256_ccm(void)
-{
-    return FIPS_evp_aes_256_ccm();
-}
-
-const EVP_CIPHER *EVP_aes_256_cfb1(void)
-{
-    return FIPS_evp_aes_256_cfb1();
-}
-
-const EVP_CIPHER *EVP_aes_256_cfb128(void)
-{
-    return FIPS_evp_aes_256_cfb128();
-}
-
-const EVP_CIPHER *EVP_aes_256_cfb8(void)
-{
-    return FIPS_evp_aes_256_cfb8();
-}
-
-const EVP_CIPHER *EVP_aes_256_ctr(void)
-{
-    return FIPS_evp_aes_256_ctr();
-}
-
-const EVP_CIPHER *EVP_aes_256_ecb(void)
-{
-    return FIPS_evp_aes_256_ecb();
-}
-
-const EVP_CIPHER *EVP_aes_256_gcm(void)
-{
-    return FIPS_evp_aes_256_gcm();
-}
-
-const EVP_CIPHER *EVP_aes_256_ofb(void)
-{
-    return FIPS_evp_aes_256_ofb();
-}
-
-const EVP_CIPHER *EVP_aes_256_xts(void)
-{
-    return FIPS_evp_aes_256_xts();
-}
-
-const EVP_CIPHER *EVP_des_ede(void)
-{
-    return FIPS_evp_des_ede();
-}
-
-const EVP_CIPHER *EVP_des_ede3(void)
-{
-    return FIPS_evp_des_ede3();
-}
-
-const EVP_CIPHER *EVP_des_ede3_cbc(void)
-{
-    return FIPS_evp_des_ede3_cbc();
-}
-
-const EVP_CIPHER *EVP_des_ede3_cfb1(void)
-{
-    return FIPS_evp_des_ede3_cfb1();
-}
-
-const EVP_CIPHER *EVP_des_ede3_cfb64(void)
-{
-    return FIPS_evp_des_ede3_cfb64();
-}
-
-const EVP_CIPHER *EVP_des_ede3_cfb8(void)
-{
-    return FIPS_evp_des_ede3_cfb8();
-}
-
-const EVP_CIPHER *EVP_des_ede3_ecb(void)
-{
-    return FIPS_evp_des_ede3_ecb();
-}
-
-const EVP_CIPHER *EVP_des_ede3_ofb(void)
-{
-    return FIPS_evp_des_ede3_ofb();
-}
-
-const EVP_CIPHER *EVP_des_ede_cbc(void)
-{
-    return FIPS_evp_des_ede_cbc();
-}
-
-const EVP_CIPHER *EVP_des_ede_cfb64(void)
-{
-    return FIPS_evp_des_ede_cfb64();
-}
-
-const EVP_CIPHER *EVP_des_ede_ecb(void)
-{
-    return FIPS_evp_des_ede_ecb();
-}
-
-const EVP_CIPHER *EVP_des_ede_ofb(void)
-{
-    return FIPS_evp_des_ede_ofb();
-}
-
-const EVP_CIPHER *EVP_enc_null(void)
-{
-    return FIPS_evp_enc_null();
-}
-
-const EVP_MD *EVP_sha1(void)
-{
-    return FIPS_evp_sha1();
-}
-
-const EVP_MD *EVP_sha224(void)
-{
-    return FIPS_evp_sha224();
-}
-
-const EVP_MD *EVP_sha256(void)
-{
-    return FIPS_evp_sha256();
-}
-
-const EVP_MD *EVP_sha384(void)
-{
-    return FIPS_evp_sha384();
-}
-
-const EVP_MD *EVP_sha512(void)
-{
-    return FIPS_evp_sha512();
-}
-
-const EVP_MD *EVP_dss(void)
-{
-    return FIPS_evp_dss();
-}
-
-const EVP_MD *EVP_dss1(void)
-{
-    return FIPS_evp_dss1();
-}
-
-const EVP_MD *EVP_ecdsa(void)
-{
-    return FIPS_evp_ecdsa();
-}
-
-#endif
index d4d2b4b..a53a27c 100644 (file)
 #include "cryptlib.h"
 #include <openssl/evp.h>
 #include <openssl/objects.h>
+#ifdef OPENSSL_FIPS
+# include <openssl/fips.h>
+# include "evp_locl.h"
+#endif
 
 int EVP_CIPHER_param_to_asn1(EVP_CIPHER_CTX *c, ASN1_TYPE *type)
 {
@@ -67,9 +71,13 @@ int EVP_CIPHER_param_to_asn1(EVP_CIPHER_CTX *c, ASN1_TYPE *type)
 
     if (c->cipher->set_asn1_parameters != NULL)
         ret = c->cipher->set_asn1_parameters(c, type);
-    else if (c->cipher->flags & EVP_CIPH_FLAG_DEFAULT_ASN1)
-        ret = EVP_CIPHER_set_asn1_iv(c, type);
-    else
+    else if (c->cipher->flags & EVP_CIPH_FLAG_DEFAULT_ASN1) {
+        if (EVP_CIPHER_CTX_mode(c) == EVP_CIPH_WRAP_MODE) {
+            ASN1_TYPE_set(type, V_ASN1_NULL, NULL);
+            ret = 1;
+        } else
+            ret = EVP_CIPHER_set_asn1_iv(c, type);
+    } else
         ret = -1;
     return (ret);
 }
@@ -80,9 +88,11 @@ int EVP_CIPHER_asn1_to_param(EVP_CIPHER_CTX *c, ASN1_TYPE *type)
 
     if (c->cipher->get_asn1_parameters != NULL)
         ret = c->cipher->get_asn1_parameters(c, type);
-    else if (c->cipher->flags & EVP_CIPH_FLAG_DEFAULT_ASN1)
+    else if (c->cipher->flags & EVP_CIPH_FLAG_DEFAULT_ASN1) {
+        if (EVP_CIPHER_CTX_mode(c) == EVP_CIPH_WRAP_MODE)
+            return 1;
         ret = EVP_CIPHER_get_asn1_iv(c, type);
-    else
+    else
         ret = -1;
     return (ret);
 }
@@ -200,12 +210,22 @@ const EVP_CIPHER *EVP_CIPHER_CTX_cipher(const EVP_CIPHER_CTX *ctx)
 
 unsigned long EVP_CIPHER_flags(const EVP_CIPHER *cipher)
 {
+#ifdef OPENSSL_FIPS
+    const EVP_CIPHER *fcipher;
+    fcipher = evp_get_fips_cipher(cipher);
+    if (fcipher && fcipher->flags & EVP_CIPH_FLAG_FIPS)
+        return cipher->flags | EVP_CIPH_FLAG_FIPS;
+#endif
     return cipher->flags;
 }
 
 unsigned long EVP_CIPHER_CTX_flags(const EVP_CIPHER_CTX *ctx)
 {
+#ifdef OPENSSL_FIPS
+    return EVP_CIPHER_flags(ctx->cipher);
+#else
     return ctx->cipher->flags;
+#endif
 }
 
 void *EVP_CIPHER_CTX_get_app_data(const EVP_CIPHER_CTX *ctx)
@@ -272,8 +292,40 @@ int EVP_MD_size(const EVP_MD *md)
     return md->md_size;
 }
 
+#ifdef OPENSSL_FIPS
+
+const EVP_MD *evp_get_fips_md(const EVP_MD *md)
+{
+    int nid = EVP_MD_type(md);
+    if (nid == NID_dsa)
+        return FIPS_evp_dss1();
+    else if (nid == NID_dsaWithSHA)
+        return FIPS_evp_dss();
+    else if (nid == NID_ecdsa_with_SHA1)
+        return FIPS_evp_ecdsa();
+    else
+        return FIPS_get_digestbynid(nid);
+}
+
+const EVP_CIPHER *evp_get_fips_cipher(const EVP_CIPHER *cipher)
+{
+    int nid = cipher->nid;
+    if (nid == NID_undef)
+        return FIPS_evp_enc_null();
+    else
+        return FIPS_get_cipherbynid(nid);
+}
+
+#endif
+
 unsigned long EVP_MD_flags(const EVP_MD *md)
 {
+#ifdef OPENSSL_FIPS
+    const EVP_MD *fmd;
+    fmd = evp_get_fips_md(md);
+    if (fmd && fmd->flags & EVP_MD_FLAG_FIPS)
+        return md->flags | EVP_MD_FLAG_FIPS;
+#endif
     return md->flags;
 }
 
index 980dada..2bb709a 100644 (file)
@@ -333,6 +333,9 @@ int PKCS5_v2_PBKDF2_keyivgen(EVP_CIPHER_CTX *ctx, const char *pass,
                              const EVP_CIPHER *c, const EVP_MD *md,
                              int en_de);
 
+const EVP_MD *evp_get_fips_md(const EVP_MD *md);
+const EVP_CIPHER *evp_get_fips_cipher(const EVP_CIPHER *cipher);
+
 #ifdef OPENSSL_FIPS
 
 # ifdef OPENSSL_DOING_MAKEDEPEND
index d06b4ee..d7441ec 100644 (file)
@@ -132,11 +132,13 @@ static int test1_exit(int ec)
 static void test1(const EVP_CIPHER *c, const unsigned char *key, int kn,
                   const unsigned char *iv, int in,
                   const unsigned char *plaintext, int pn,
-                  const unsigned char *ciphertext, int cn, int encdec)
+                  const unsigned char *ciphertext, int cn,
+                  const unsigned char *aad, int an,
+                  const unsigned char *tag, int tn, int encdec)
 {
     EVP_CIPHER_CTX ctx;
     unsigned char out[4096];
-    int outl, outl2;
+    int outl, outl2, mode;
 
     printf("Testing cipher %s%s\n", EVP_CIPHER_name(c),
            (encdec ==
@@ -147,15 +149,78 @@ static void test1(const EVP_CIPHER *c, const unsigned char *key, int kn,
         hexdump(stdout, "IV", iv, in);
     hexdump(stdout, "Plaintext", plaintext, pn);
     hexdump(stdout, "Ciphertext", ciphertext, cn);
-
-    if (kn != c->key_len) {
+    if (an)
+        hexdump(stdout, "AAD", aad, an);
+    if (tn)
+        hexdump(stdout, "Tag", tag, tn);
+    mode = EVP_CIPHER_mode(c);
+    if (kn != EVP_CIPHER_key_length(c)) {
         fprintf(stderr, "Key length doesn't match, got %d expected %lu\n", kn,
-                (unsigned long)c->key_len);
+                (unsigned long)EVP_CIPHER_key_length(c));
         test1_exit(5);
     }
     EVP_CIPHER_CTX_init(&ctx);
+    EVP_CIPHER_CTX_set_flags(&ctx, EVP_CIPHER_CTX_FLAG_WRAP_ALLOW);
     if (encdec != 0) {
-        if (!EVP_EncryptInit_ex(&ctx, c, NULL, key, iv)) {
+        if (mode == EVP_CIPH_GCM_MODE) {
+            if (!EVP_EncryptInit_ex(&ctx, c, NULL, NULL, NULL)) {
+                fprintf(stderr, "EncryptInit failed\n");
+                ERR_print_errors_fp(stderr);
+                test1_exit(10);
+            }
+            if (!EVP_CIPHER_CTX_ctrl(&ctx, EVP_CTRL_GCM_SET_IVLEN, in, NULL)) {
+                fprintf(stderr, "IV length set failed\n");
+                ERR_print_errors_fp(stderr);
+                test1_exit(11);
+            }
+            if (!EVP_EncryptInit_ex(&ctx, NULL, NULL, key, iv)) {
+                fprintf(stderr, "Key/IV set failed\n");
+                ERR_print_errors_fp(stderr);
+                test1_exit(12);
+            }
+            if (an && !EVP_EncryptUpdate(&ctx, NULL, &outl, aad, an)) {
+                fprintf(stderr, "AAD set failed\n");
+                ERR_print_errors_fp(stderr);
+                test1_exit(13);
+            }
+        } else if (mode == EVP_CIPH_CCM_MODE) {
+            if (!EVP_EncryptInit_ex(&ctx, c, NULL, NULL, NULL)) {
+                fprintf(stderr, "EncryptInit failed\n");
+                ERR_print_errors_fp(stderr);
+                test1_exit(10);
+            }
+            if (!EVP_CIPHER_CTX_ctrl(&ctx, EVP_CTRL_CCM_SET_IVLEN, in, NULL)) {
+                fprintf(stderr, "IV length set failed\n");
+                ERR_print_errors_fp(stderr);
+                test1_exit(11);
+            }
+            if (!EVP_CIPHER_CTX_ctrl(&ctx, EVP_CTRL_CCM_SET_TAG, tn, NULL)) {
+                fprintf(stderr, "Tag length set failed\n");
+                ERR_print_errors_fp(stderr);
+                test1_exit(11);
+            }
+            if (!EVP_EncryptInit_ex(&ctx, NULL, NULL, key, iv)) {
+                fprintf(stderr, "Key/IV set failed\n");
+                ERR_print_errors_fp(stderr);
+                test1_exit(12);
+            }
+            if (!EVP_EncryptUpdate(&ctx, NULL, &outl, NULL, pn)) {
+                fprintf(stderr, "Plaintext length set failed\n");
+                ERR_print_errors_fp(stderr);
+                test1_exit(12);
+            }
+            if (an && !EVP_EncryptUpdate(&ctx, NULL, &outl, aad, an)) {
+                fprintf(stderr, "AAD set failed\n");
+                ERR_print_errors_fp(stderr);
+                test1_exit(13);
+            }
+        } else if (mode == EVP_CIPH_WRAP_MODE) {
+            if (!EVP_EncryptInit_ex(&ctx, c, NULL, key, in ? iv : NULL)) {
+                fprintf(stderr, "EncryptInit failed\n");
+                ERR_print_errors_fp(stderr);
+                test1_exit(10);
+            }
+        } else if (!EVP_EncryptInit_ex(&ctx, c, NULL, key, iv)) {
             fprintf(stderr, "EncryptInit failed\n");
             ERR_print_errors_fp(stderr);
             test1_exit(10);
@@ -185,10 +250,93 @@ static void test1(const EVP_CIPHER *c, const unsigned char *key, int kn,
             hexdump(stderr, "Expected", ciphertext, cn);
             test1_exit(9);
         }
+        if (mode == EVP_CIPH_GCM_MODE || mode == EVP_CIPH_CCM_MODE) {
+            unsigned char rtag[16];
+            /*
+             * Note: EVP_CTRL_CCM_GET_TAG has same value as
+             * EVP_CTRL_GCM_GET_TAG
+             */
+            if (!EVP_CIPHER_CTX_ctrl(&ctx, EVP_CTRL_GCM_GET_TAG, tn, rtag)) {
+                fprintf(stderr, "Get tag failed\n");
+                ERR_print_errors_fp(stderr);
+                test1_exit(14);
+            }
+            if (memcmp(rtag, tag, tn)) {
+                fprintf(stderr, "Tag mismatch\n");
+                hexdump(stderr, "Got", rtag, tn);
+                hexdump(stderr, "Expected", tag, tn);
+                test1_exit(9);
+            }
+        }
     }
 
     if (encdec <= 0) {
-        if (!EVP_DecryptInit_ex(&ctx, c, NULL, key, iv)) {
+        if (mode == EVP_CIPH_GCM_MODE) {
+            if (!EVP_DecryptInit_ex(&ctx, c, NULL, NULL, NULL)) {
+                fprintf(stderr, "EncryptInit failed\n");
+                ERR_print_errors_fp(stderr);
+                test1_exit(10);
+            }
+            if (!EVP_CIPHER_CTX_ctrl(&ctx, EVP_CTRL_GCM_SET_IVLEN, in, NULL)) {
+                fprintf(stderr, "IV length set failed\n");
+                ERR_print_errors_fp(stderr);
+                test1_exit(11);
+            }
+            if (!EVP_DecryptInit_ex(&ctx, NULL, NULL, key, iv)) {
+                fprintf(stderr, "Key/IV set failed\n");
+                ERR_print_errors_fp(stderr);
+                test1_exit(12);
+            }
+            if (!EVP_CIPHER_CTX_ctrl
+                (&ctx, EVP_CTRL_GCM_SET_TAG, tn, (void *)tag)) {
+                fprintf(stderr, "Set tag failed\n");
+                ERR_print_errors_fp(stderr);
+                test1_exit(14);
+            }
+            if (an && !EVP_DecryptUpdate(&ctx, NULL, &outl, aad, an)) {
+                fprintf(stderr, "AAD set failed\n");
+                ERR_print_errors_fp(stderr);
+                test1_exit(13);
+            }
+        } else if (mode == EVP_CIPH_CCM_MODE) {
+            if (!EVP_DecryptInit_ex(&ctx, c, NULL, NULL, NULL)) {
+                fprintf(stderr, "DecryptInit failed\n");
+                ERR_print_errors_fp(stderr);
+                test1_exit(10);
+            }
+            if (!EVP_CIPHER_CTX_ctrl(&ctx, EVP_CTRL_CCM_SET_IVLEN, in, NULL)) {
+                fprintf(stderr, "IV length set failed\n");
+                ERR_print_errors_fp(stderr);
+                test1_exit(11);
+            }
+            if (!EVP_CIPHER_CTX_ctrl
+                (&ctx, EVP_CTRL_CCM_SET_TAG, tn, (void *)tag)) {
+                fprintf(stderr, "Tag length set failed\n");
+                ERR_print_errors_fp(stderr);
+                test1_exit(11);
+            }
+            if (!EVP_DecryptInit_ex(&ctx, NULL, NULL, key, iv)) {
+                fprintf(stderr, "Key/Nonce set failed\n");
+                ERR_print_errors_fp(stderr);
+                test1_exit(12);
+            }
+            if (!EVP_DecryptUpdate(&ctx, NULL, &outl, NULL, pn)) {
+                fprintf(stderr, "Plaintext length set failed\n");
+                ERR_print_errors_fp(stderr);
+                test1_exit(12);
+            }
+            if (an && !EVP_EncryptUpdate(&ctx, NULL, &outl, aad, an)) {
+                fprintf(stderr, "AAD set failed\n");
+                ERR_print_errors_fp(stderr);
+                test1_exit(13);
+            }
+        } else if (mode == EVP_CIPH_WRAP_MODE) {
+            if (!EVP_DecryptInit_ex(&ctx, c, NULL, key, in ? iv : NULL)) {
+                fprintf(stderr, "EncryptInit failed\n");
+                ERR_print_errors_fp(stderr);
+                test1_exit(10);
+            }
+        } else if (!EVP_DecryptInit_ex(&ctx, c, NULL, key, iv)) {
             fprintf(stderr, "DecryptInit failed\n");
             ERR_print_errors_fp(stderr);
             test1_exit(11);
@@ -200,7 +348,8 @@ static void test1(const EVP_CIPHER *c, const unsigned char *key, int kn,
             ERR_print_errors_fp(stderr);
             test1_exit(6);
         }
-        if (!EVP_DecryptFinal_ex(&ctx, out + outl, &outl2)) {
+        if (mode != EVP_CIPH_CCM_MODE
+            && !EVP_DecryptFinal_ex(&ctx, out + outl, &outl2)) {
             fprintf(stderr, "DecryptFinal failed\n");
             ERR_print_errors_fp(stderr);
             test1_exit(7);
@@ -228,7 +377,9 @@ static void test1(const EVP_CIPHER *c, const unsigned char *key, int kn,
 static int test_cipher(const char *cipher, const unsigned char *key, int kn,
                        const unsigned char *iv, int in,
                        const unsigned char *plaintext, int pn,
-                       const unsigned char *ciphertext, int cn, int encdec)
+                       const unsigned char *ciphertext, int cn,
+                       const unsigned char *aad, int an,
+                       const unsigned char *tag, int tn, int encdec)
 {
     const EVP_CIPHER *c;
 
@@ -236,7 +387,8 @@ static int test_cipher(const char *cipher, const unsigned char *key, int kn,
     if (!c)
         return 0;
 
-    test1(c, key, kn, iv, in, plaintext, pn, ciphertext, cn, encdec);
+    test1(c, key, kn, iv, in, plaintext, pn, ciphertext, cn, aad, an, tag, tn,
+          encdec);
 
     return 1;
 }
@@ -316,7 +468,7 @@ int main(int argc, char **argv)
         perror(szTestFile);
         EXIT(2);
     }
-
+    ERR_load_crypto_strings();
     /* Load up the software EVP_CIPHER and EVP_MD definitions */
     OpenSSL_add_all_ciphers();
     OpenSSL_add_all_digests();
@@ -346,9 +498,11 @@ int main(int argc, char **argv)
         char line[4096];
         char *p;
         char *cipher;
-        unsigned char *iv, *key, *plaintext, *ciphertext;
+        unsigned char *iv, *key, *plaintext, *ciphertext, *aad, *tag;
         int encdec;
         int kn, in, pn, cn;
+        int an = 0;
+        int tn = 0;
 
         if (!fgets((char *)line, sizeof line, f))
             break;
@@ -361,19 +515,37 @@ int main(int argc, char **argv)
         plaintext = ustrsep(&p, ":");
         ciphertext = ustrsep(&p, ":");
         if (p[-1] == '\n') {
-            p[-1] = '\0';
             encdec = -1;
+            p[-1] = '\0';
+            tag = aad = NULL;
+            an = tn = 0;
         } else {
-            encdec = atoi(sstrsep(&p, "\n"));
+            aad = ustrsep(&p, ":");
+            tag = ustrsep(&p, ":");
+            if (tag == NULL) {
+                p = (char *)aad;
+                tag = aad = NULL;
+                an = tn = 0;
+            }
+            if (p[-1] == '\n') {
+                encdec = -1;
+                p[-1] = '\0';
+            } else
+                encdec = atoi(sstrsep(&p, "\n"));
         }
 
         kn = convert(key);
         in = convert(iv);
         pn = convert(plaintext);
         cn = convert(ciphertext);
+        if (aad) {
+            an = convert(aad);
+            tn = convert(tag);
+        }
 
         if (!test_cipher
-            (cipher, key, kn, iv, in, plaintext, pn, ciphertext, cn, encdec)
+            (cipher, key, kn, iv, in, plaintext, pn, ciphertext, cn, aad, an,
+             tag, tn, encdec)
             && !test_digest(cipher, plaintext, pn, ciphertext, cn)) {
 #ifdef OPENSSL_NO_AES
             if (strstr(cipher, "AES") == cipher) {
index c273707..4e9958b 100644 (file)
@@ -1,4 +1,5 @@
 #cipher:key:iv:plaintext:ciphertext:0/1(decrypt/encrypt)
+#aadcipher:key:iv:plaintext:ciphertext:aad:tag:0/1(decrypt/encrypt)
 #digest:::input:output
 
 # SHA(1) tests (from shatest.c)
@@ -332,3 +333,69 @@ SEED-ECB:00000000000000000000000000000000::000102030405060708090A0B0C0D0E0F:5EBA
 SEED-ECB:000102030405060708090A0B0C0D0E0F::00000000000000000000000000000000:C11F22F20140505084483597E4370F43:1
 SEED-ECB:4706480851E61BE85D74BFB3FD956185::83A2F8A288641FB9A4E9A5CC2F131C7D:EE54D13EBCAE706D226BC3142CD40D4A:1
 SEED-ECB:28DBC3BC49FFD87DCFA509B11D422BE7::B41E6BE2EBA84A148E2EED84593C5EC7:9B9B7BFCD1813CB95D0B3618F40F5122:1
+
+# AES CCM 256 bit key
+aes-256-ccm:1bde3251d41a8b5ea013c195ae128b218b3e0306376357077ef1c1c78548b92e:5b8e40746f6b98e00f1d13ff41:53bd72a97089e312422bf72e242377b3c6ee3e2075389b999c4ef7f28bd2b80a:9a5fcccdb4cf04e7293d2775cc76a488f042382d949b43b7d6bb2b9864786726:c17a32514eb6103f3249e076d4c871dc97e04b286699e54491dc18f6d734d4c0:2024931d73bca480c24a24ece6b6c2bf
+
+# AES GCM test vectors from http://csrc.nist.gov/groups/ST/toolkit/BCM/documents/proposedmodes/gcm/gcm-spec.pdf
+aes-128-gcm:00000000000000000000000000000000:000000000000000000000000::::58e2fccefa7e3061367f1d57a4e7455a
+aes-128-gcm:00000000000000000000000000000000:000000000000000000000000:00000000000000000000000000000000:0388dace60b6a392f328c2b971b2fe78::ab6e47d42cec13bdf53a67b21257bddf
+aes-128-gcm:feffe9928665731c6d6a8f9467308308:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b391aafd255:42831ec2217774244b7221b784d0d49ce3aa212f2c02a4e035c17e2329aca12e21d514b25466931c7d8f6a5aac84aa051ba30b396a0aac973d58e091473f5985::4d5c2af327cd64a62cf35abd2ba6fab4
+aes-128-gcm:feffe9928665731c6d6a8f9467308308:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:42831ec2217774244b7221b784d0d49ce3aa212f2c02a4e035c17e2329aca12e21d514b25466931c7d8f6a5aac84aa051ba30b396a0aac973d58e091:feedfacedeadbeeffeedfacedeadbeefabaddad2:5bc94fbc3221a5db94fae95ae7121a47
+aes-128-gcm:feffe9928665731c6d6a8f9467308308:cafebabefacedbad:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:61353b4c2806934a777ff51fa22a4755699b2a714fcdc6f83766e5f97b6c742373806900e49f24b22b097544d4896b424989b5e1ebac0f07c23f4598:feedfacedeadbeeffeedfacedeadbeefabaddad2:3612d2e79e3b0785561be14aaca2fccb
+aes-128-gcm:feffe9928665731c6d6a8f9467308308:9313225df88406e555909c5aff5269aa6a7a9538534f7da1e4c303d2a318a728c3c0c95156809539fcf0e2429a6b525416aedbf5a0de6a57a637b39b:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:8ce24998625615b603a033aca13fb894be9112a5c3a211a8ba262a3cca7e2ca701e4a9a4fba43c90ccdcb281d48c7c6fd62875d2aca417034c34aee5:feedfacedeadbeeffeedfacedeadbeefabaddad2:619cc5aefffe0bfa462af43c1699d050
+aes-192-gcm:000000000000000000000000000000000000000000000000:000000000000000000000000::::cd33b28ac773f74ba00ed1f312572435
+aes-192-gcm:000000000000000000000000000000000000000000000000:000000000000000000000000:00000000000000000000000000000000:98e7247c07f0fe411c267e4384b0f600::2ff58d80033927ab8ef4d4587514f0fb
+aes-192-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b391aafd255:3980ca0b3c00e841eb06fac4872a2757859e1ceaa6efd984628593b40ca1e19c7d773d00c144c525ac619d18c84a3f4718e2448b2fe324d9ccda2710acade256::9924a7c8587336bfb118024db8674a14
+aes-192-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:3980ca0b3c00e841eb06fac4872a2757859e1ceaa6efd984628593b40ca1e19c7d773d00c144c525ac619d18c84a3f4718e2448b2fe324d9ccda2710:feedfacedeadbeeffeedfacedeadbeefabaddad2:2519498e80f1478f37ba55bd6d27618c
+aes-192-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c:cafebabefacedbad:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:0f10f599ae14a154ed24b36e25324db8c566632ef2bbb34f8347280fc4507057fddc29df9a471f75c66541d4d4dad1c9e93a19a58e8b473fa0f062f7:feedfacedeadbeeffeedfacedeadbeefabaddad2:65dcc57fcf623a24094fcca40d3533f8
+aes-192-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c:9313225df88406e555909c5aff5269aa6a7a9538534f7da1e4c303d2a318a728c3c0c95156809539fcf0e2429a6b525416aedbf5a0de6a57a637b39b:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:d27e88681ce3243c4830165a8fdcf9ff1de9a1d8e6b447ef6ef7b79828666e4581e79012af34ddd9e2f037589b292db3e67c036745fa22e7e9b7373b:feedfacedeadbeeffeedfacedeadbeefabaddad2:dcf566ff291c25bbb8568fc3d376a6d9
+aes-256-gcm:0000000000000000000000000000000000000000000000000000000000000000:000000000000000000000000::::530f8afbc74536b9a963b4f1c4cb738b
+aes-256-gcm:0000000000000000000000000000000000000000000000000000000000000000:000000000000000000000000:00000000000000000000000000000000:cea7403d4d606b6e074ec5d3baf39d18::d0d1c8a799996bf0265b98b5d48ab919
+aes-256-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c6d6a8f9467308308:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b391aafd255:522dc1f099567d07f47f37a32a84427d643a8cdcbfe5c0c97598a2bd2555d1aa8cb08e48590dbb3da7b08b1056828838c5f61e6393ba7a0abcc9f662898015ad::b094dac5d93471bdec1a502270e3cc6c
+aes-256-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c6d6a8f9467308308:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:522dc1f099567d07f47f37a32a84427d643a8cdcbfe5c0c97598a2bd2555d1aa8cb08e48590dbb3da7b08b1056828838c5f61e6393ba7a0abcc9f662:feedfacedeadbeeffeedfacedeadbeefabaddad2:76fc6ece0f4e1768cddf8853bb2d551b
+aes-256-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c6d6a8f9467308308:cafebabefacedbad:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:c3762df1ca787d32ae47c13bf19844cbaf1ae14d0b976afac52ff7d79bba9de0feb582d33934a4f0954cc2363bc73f7862ac430e64abe499f47c9b1f:feedfacedeadbeeffeedfacedeadbeefabaddad2:3a337dbf46a792c45e454913fe2ea8f2
+aes-256-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c6d6a8f9467308308:9313225df88406e555909c5aff5269aa6a7a9538534f7da1e4c303d2a318a728c3c0c95156809539fcf0e2429a6b525416aedbf5a0de6a57a637b39b:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:5a8def2f0c9e53f1f75d7853659e2a20eeb2b22aafde6419a058ab4f6f746bf40fc0c3b780f244452da3ebf1c5d82cdea2418997200ef82e44ae7e3f:feedfacedeadbeeffeedfacedeadbeefabaddad2:a44a8266ee1c8eb0c8b5d4cf5ae9f19a
+# local add-ons, primarily streaming ghash tests
+# 128 bytes aad
+aes-128-gcm:00000000000000000000000000000000:000000000000000000000000:::d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b391aafd255522dc1f099567d07f47f37a32a84427d643a8cdcbfe5c0c97598a2bd2555d1aa8cb08e48590dbb3da7b08b1056828838c5f61e6393ba7a0abcc9f662898015ad:5fea793a2d6f974d37e68e0cb8ff9492
+# 48 bytes plaintext
+aes-128-gcm:00000000000000000000000000000000:000000000000000000000000:000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000:0388dace60b6a392f328c2b971b2fe78f795aaab494b5923f7fd89ff948bc1e0200211214e7394da2089b6acd093abe0::9dd0a376b08e40eb00c35f29f9ea61a4
+# 80 bytes plaintext
+aes-128-gcm:00000000000000000000000000000000:000000000000000000000000:0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000:0388dace60b6a392f328c2b971b2fe78f795aaab494b5923f7fd89ff948bc1e0200211214e7394da2089b6acd093abe0c94da219118e297d7b7ebcbcc9c388f28ade7d85a8ee35616f7124a9d5270291::98885a3a22bd4742fe7b72172193b163
+# 128 bytes plaintext
+aes-128-gcm:00000000000000000000000000000000:000000000000000000000000:0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000:0388dace60b6a392f328c2b971b2fe78f795aaab494b5923f7fd89ff948bc1e0200211214e7394da2089b6acd093abe0c94da219118e297d7b7ebcbcc9c388f28ade7d85a8ee35616f7124a9d527029195b84d1b96c690ff2f2de30bf2ec89e00253786e126504f0dab90c48a30321de3345e6b0461e7c9e6c6b7afedde83f40::cac45f60e31efd3b5a43b98a22ce1aa1
+# 192 bytes plaintext, iv is chosen so that initial counter LSB is 0xFF
+aes-128-gcm:00000000000000000000000000000000:ffffffff000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000:000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000:56b3373ca9ef6e4a2b64fe1e9a17b61425f10d47a75a5fce13efc6bc784af24f4141bdd48cf7c770887afd573cca5418a9aeffcd7c5ceddfc6a78397b9a85b499da558257267caab2ad0b23ca476a53cb17fb41c4b8b475cb4f3f7165094c229c9e8c4dc0a2a5ff1903e501511221376a1cdb8364c5061a20cae74bc4acd76ceb0abc9fd3217ef9f8c90be402ddf6d8697f4f880dff15bfb7a6b28241ec8fe183c2d59e3f9dfff653c7126f0acb9e64211f42bae12af462b1070bef1ab5e3606::566f8ef683078bfdeeffa869d751a017
+# 80 bytes plaintext, submitted by Intel
+aes-128-gcm:843ffcf5d2b72694d19ed01d01249412:dbcca32ebf9b804617c3aa9e:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f:6268c6fa2a80b2d137467f092f657ac04d89be2beaa623d61b5a868c8f03ff95d3dcee23ad2f1ab3a6c80eaf4b140eb05de3457f0fbc111a6b43d0763aa422a3013cf1dc37fe417d1fbfc449b75d4cc5:00000000000000000000000000000000101112131415161718191a1b1c1d1e1f:3b629ccfbc1119b7319e1dce2cd6fd6d
+
+# AES XTS test vectors from IEEE Std 1619-2007
+aes-128-xts:0000000000000000000000000000000000000000000000000000000000000000:00000000000000000000000000000000:0000000000000000000000000000000000000000000000000000000000000000:917cf69ebd68b2ec9b9fe9a3eadda692cd43d2f59598ed858c02c2652fbf922e
+aes-128-xts:1111111111111111111111111111111122222222222222222222222222222222:33333333330000000000000000000000:4444444444444444444444444444444444444444444444444444444444444444:c454185e6a16936e39334038acef838bfb186fff7480adc4289382ecd6d394f0
+aes-128-xts:fffefdfcfbfaf9f8f7f6f5f4f3f2f1f022222222222222222222222222222222:33333333330000000000000000000000:4444444444444444444444444444444444444444444444444444444444444444:af85336b597afc1a900b2eb21ec949d292df4c047e0b21532186a5971a227a89
+aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:00000000000000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:27a7479befa1d476489f308cd4cfa6e2a96e4bbe3208ff25287dd3819616e89cc78cf7f5e543445f8333d8fa7f56000005279fa5d8b5e4ad40e736ddb4d35412328063fd2aab53e5ea1e0a9f332500a5df9487d07a5c92cc512c8866c7e860ce93fdf166a24912b422976146ae20ce846bb7dc9ba94a767aaef20c0d61ad02655ea92dc4c4e41a8952c651d33174be51a10c421110e6d81588ede82103a252d8a750e8768defffed9122810aaeb99f9172af82b604dc4b8e51bcb08235a6f4341332e4ca60482a4ba1a03b3e65008fc5da76b70bf1690db4eae29c5f1badd03c5ccf2a55d705ddcd86d449511ceb7ec30bf12b1fa35b913f9f747a8afd1b130e94bff94effd01a91735ca1726acd0b197c4e5b03393697e126826fb6bbde8ecc1e08298516e2c9ed03ff3c1b7860f6de76d4cecd94c8119855ef5297ca67e9f3e7ff72b1e99785ca0a7e7720c5b36dc6d72cac9574c8cbbc2f801e23e56fd344b07f22154beba0f08ce8891e643ed995c94d9a69c9f1b5f499027a78572aeebd74d20cc39881c213ee770b1010e4bea718846977ae119f7a023ab58cca0ad752afe656bb3c17256a9f6e9bf19fdd5a38fc82bbe872c5539edb609ef4f79c203ebb140f2e583cb2ad15b4aa5b655016a8449277dbd477ef2c8d6c017db738b18deb4a427d1923ce3ff262735779a418f20a282df920147beabe421ee5319d0568
+aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:01000000000000000000000000000000:27a7479befa1d476489f308cd4cfa6e2a96e4bbe3208ff25287dd3819616e89cc78cf7f5e543445f8333d8fa7f56000005279fa5d8b5e4ad40e736ddb4d35412328063fd2aab53e5ea1e0a9f332500a5df9487d07a5c92cc512c8866c7e860ce93fdf166a24912b422976146ae20ce846bb7dc9ba94a767aaef20c0d61ad02655ea92dc4c4e41a8952c651d33174be51a10c421110e6d81588ede82103a252d8a750e8768defffed9122810aaeb99f9172af82b604dc4b8e51bcb08235a6f4341332e4ca60482a4ba1a03b3e65008fc5da76b70bf1690db4eae29c5f1badd03c5ccf2a55d705ddcd86d449511ceb7ec30bf12b1fa35b913f9f747a8afd1b130e94bff94effd01a91735ca1726acd0b197c4e5b03393697e126826fb6bbde8ecc1e08298516e2c9ed03ff3c1b7860f6de76d4cecd94c8119855ef5297ca67e9f3e7ff72b1e99785ca0a7e7720c5b36dc6d72cac9574c8cbbc2f801e23e56fd344b07f22154beba0f08ce8891e643ed995c94d9a69c9f1b5f499027a78572aeebd74d20cc39881c213ee770b1010e4bea718846977ae119f7a023ab58cca0ad752afe656bb3c17256a9f6e9bf19fdd5a38fc82bbe872c5539edb609ef4f79c203ebb140f2e583cb2ad15b4aa5b655016a8449277dbd477ef2c8d6c017db738b18deb4a427d1923ce3ff262735779a418f20a282df920147beabe421ee5319d0568:264d3ca8512194fec312c8c9891f279fefdd608d0c027b60483a3fa811d65ee59d52d9e40ec5672d81532b38b6b089ce951f0f9c35590b8b978d175213f329bb1c2fd30f2f7f30492a61a532a79f51d36f5e31a7c9a12c286082ff7d2394d18f783e1a8e72c722caaaa52d8f065657d2631fd25bfd8e5baad6e527d763517501c68c5edc3cdd55435c532d7125c8614deed9adaa3acade5888b87bef641c4c994c8091b5bcd387f3963fb5bc37aa922fbfe3df4e5b915e6eb514717bdd2a74079a5073f5c4bfd46adf7d282e7a393a52579d11a028da4d9cd9c77124f9648ee383b1ac763930e7162a8d37f350b2f74b8472cf09902063c6b32e8c2d9290cefbd7346d1c779a0df50edcde4531da07b099c638e83a755944df2aef1aa31752fd323dcb710fb4bfbb9d22b925bc3577e1b8949e729a90bbafeacf7f7879e7b1147e28ba0bae940db795a61b15ecf4df8db07b824bb062802cc98a9545bb2aaeed77cb3fc6db15dcd7d80d7d5bc406c4970a3478ada8899b329198eb61c193fb6275aa8ca340344a75a862aebe92eee1ce032fd950b47d7704a3876923b4ad62844bf4a09c4dbe8b4397184b7471360c9564880aedddb9baa4af2e75394b08cd32ff479c57a07d3eab5d54de5f9738b8d27f27a9f0ab11799d7b7ffefb2704c95c6ad12c39f1e867a4b7b1d7818a4b753dfd2a89ccb45e001a03a867b187f225dd
+aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:02000000000000000000000000000000:264d3ca8512194fec312c8c9891f279fefdd608d0c027b60483a3fa811d65ee59d52d9e40ec5672d81532b38b6b089ce951f0f9c35590b8b978d175213f329bb1c2fd30f2f7f30492a61a532a79f51d36f5e31a7c9a12c286082ff7d2394d18f783e1a8e72c722caaaa52d8f065657d2631fd25bfd8e5baad6e527d763517501c68c5edc3cdd55435c532d7125c8614deed9adaa3acade5888b87bef641c4c994c8091b5bcd387f3963fb5bc37aa922fbfe3df4e5b915e6eb514717bdd2a74079a5073f5c4bfd46adf7d282e7a393a52579d11a028da4d9cd9c77124f9648ee383b1ac763930e7162a8d37f350b2f74b8472cf09902063c6b32e8c2d9290cefbd7346d1c779a0df50edcde4531da07b099c638e83a755944df2aef1aa31752fd323dcb710fb4bfbb9d22b925bc3577e1b8949e729a90bbafeacf7f7879e7b1147e28ba0bae940db795a61b15ecf4df8db07b824bb062802cc98a9545bb2aaeed77cb3fc6db15dcd7d80d7d5bc406c4970a3478ada8899b329198eb61c193fb6275aa8ca340344a75a862aebe92eee1ce032fd950b47d7704a3876923b4ad62844bf4a09c4dbe8b4397184b7471360c9564880aedddb9baa4af2e75394b08cd32ff479c57a07d3eab5d54de5f9738b8d27f27a9f0ab11799d7b7ffefb2704c95c6ad12c39f1e867a4b7b1d7818a4b753dfd2a89ccb45e001a03a867b187f225dd:fa762a3680b76007928ed4a4f49a9456031b704782e65e16cecb54ed7d017b5e18abd67b338e81078f21edb7868d901ebe9c731a7c18b5e6dec1d6a72e078ac9a4262f860beefa14f4e821018272e411a951502b6e79066e84252c3346f3aa62344351a291d4bedc7a07618bdea2af63145cc7a4b8d4070691ae890cd65733e7946e9021a1dffc4c59f159425ee6d50ca9b135fa6162cea18a939838dc000fb386fad086acce5ac07cb2ece7fd580b00cfa5e98589631dc25e8e2a3daf2ffdec26531659912c9d8f7a15e5865ea8fb5816d6207052bd7128cd743c12c8118791a4736811935eb982a532349e31dd401e0b660a568cb1a4711f552f55ded59f1f15bf7196b3ca12a91e488ef59d64f3a02bf45239499ac6176ae321c4a211ec545365971c5d3f4f09d4eb139bfdf2073d33180b21002b65cc9865e76cb24cd92c874c24c18350399a936ab3637079295d76c417776b94efce3a0ef7206b15110519655c956cbd8b2489405ee2b09a6b6eebe0c53790a12a8998378b33a5b71159625f4ba49d2a2fdba59fbf0897bc7aabd8d707dc140a80f0f309f835d3da54ab584e501dfa0ee977fec543f74186a802b9a37adb3e8291eca04d66520d229e60401e7282bef486ae059aa70696e0e305d777140a7a883ecdcb69b9ff938e8a4231864c69ca2c2043bed007ff3e605e014bcf518138dc3a25c5e236171a2d01d6
+aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:fd000000000000000000000000000000:8e41b78c390b5af9d758bb214a67e9f6bf7727b09ac6124084c37611398fa45daad94868600ed391fb1acd4857a95b466e62ef9f4b377244d1c152e7b30d731aad30c716d214b707aed99eb5b5e580b3e887cf7497465651d4b60e6042051da3693c3b78c14489543be8b6ad0ba629565bba202313ba7b0d0c94a3252b676f46cc02ce0f8a7d34c0ed229129673c1f61aed579d08a9203a25aac3a77e9db60267996db38df637356d9dcd1632e369939f2a29d89345c66e05066f1a3677aef18dea4113faeb629e46721a66d0a7e785d3e29af2594eb67dfa982affe0aac058f6e15864269b135418261fc3afb089472cf68c45dd7f231c6249ba0255e1e033833fc4d00a3fe02132d7bc3873614b8aee34273581ea0325c81f0270affa13641d052d36f0757d484014354d02d6883ca15c24d8c3956b1bd027bcf41f151fd8023c5340e5606f37e90fdb87c86fb4fa634b3718a30bace06a66eaf8f63c4aa3b637826a87fe8cfa44282e92cb1615af3a28e53bc74c7cba1a0977be9065d0c1a5dec6c54ae38d37f37aa35283e048e5530a85c4e7a29d7b92ec0c3169cdf2a805c7604bce60049b9fb7b8eaac10f51ae23794ceba68bb58112e293b9b692ca721b37c662f8574ed4dba6f88e170881c82cddc1034a0ca7e284bf0962b6b26292d836fa9f73c1ac770eef0f2d3a1eaf61d3e03555fd424eedd67e18a18094f888:d55f684f81f4426e9fde92a5ff02df2ac896af63962888a97910c1379e20b0a3b1db613fb7fe2e07004329ea5c22bfd33e3dbe4cf58cc608c2c26c19a2e2fe22f98732c2b5cb844cc6c0702d91e1d50fc4382a7eba5635cd602432a2306ac4ce82f8d70c8d9bc15f918fe71e74c622d5cf71178bf6e0b9cc9f2b41dd8dbe441c41cd0c73a6dc47a348f6702f9d0e9b1b1431e948e299b9ec2272ab2c5f0c7be86affa5dec87a0bee81d3d50007edaa2bcfccb35605155ff36ed8edd4a40dcd4b243acd11b2b987bdbfaf91a7cac27e9c5aea525ee53de7b2d3332c8644402b823e94a7db26276d2d23aa07180f76b4fd29b9c0823099c9d62c519880aee7e9697617c1497d47bf3e571950311421b6b734d38b0db91eb85331b91ea9f61530f54512a5a52a4bad589eb69781d537f23297bb459bdad2948a29e1550bf4787e0be95bb173cf5fab17dab7a13a052a63453d97ccec1a321954886b7a1299faaeecae35c6eaaca753b041b5e5f093bf83397fd21dd6b3012066fcc058cc32c3b09d7562dee29509b5839392c9ff05f51f3166aaac4ac5f238038a3045e6f72e48ef0fe8bc675e82c318a268e43970271bf119b81bf6a982746554f84e72b9f00280a320a08142923c23c883423ff949827f29bbacdc1ccdb04938ce6098c95ba6b32528f4ef78eed778b2e122ddfd1cbdd11d1c0a6783e011fc536d63d053260637
+aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:fe000000000000000000000000000000:d55f684f81f4426e9fde92a5ff02df2ac896af63962888a97910c1379e20b0a3b1db613fb7fe2e07004329ea5c22bfd33e3dbe4cf58cc608c2c26c19a2e2fe22f98732c2b5cb844cc6c0702d91e1d50fc4382a7eba5635cd602432a2306ac4ce82f8d70c8d9bc15f918fe71e74c622d5cf71178bf6e0b9cc9f2b41dd8dbe441c41cd0c73a6dc47a348f6702f9d0e9b1b1431e948e299b9ec2272ab2c5f0c7be86affa5dec87a0bee81d3d50007edaa2bcfccb35605155ff36ed8edd4a40dcd4b243acd11b2b987bdbfaf91a7cac27e9c5aea525ee53de7b2d3332c8644402b823e94a7db26276d2d23aa07180f76b4fd29b9c0823099c9d62c519880aee7e9697617c1497d47bf3e571950311421b6b734d38b0db91eb85331b91ea9f61530f54512a5a52a4bad589eb69781d537f23297bb459bdad2948a29e1550bf4787e0be95bb173cf5fab17dab7a13a052a63453d97ccec1a321954886b7a1299faaeecae35c6eaaca753b041b5e5f093bf83397fd21dd6b3012066fcc058cc32c3b09d7562dee29509b5839392c9ff05f51f3166aaac4ac5f238038a3045e6f72e48ef0fe8bc675e82c318a268e43970271bf119b81bf6a982746554f84e72b9f00280a320a08142923c23c883423ff949827f29bbacdc1ccdb04938ce6098c95ba6b32528f4ef78eed778b2e122ddfd1cbdd11d1c0a6783e011fc536d63d053260637:72efc1ebfe1ee25975a6eb3aa8589dda2b261f1c85bdab442a9e5b2dd1d7c3957a16fc08e526d4b1223f1b1232a11af274c3d70dac57f83e0983c498f1a6f1aecb021c3e70085a1e527f1ce41ee5911a82020161529cd82773762daf5459de94a0a82adae7e1703c808543c29ed6fb32d9e004327c1355180c995a07741493a09c21ba01a387882da4f62534b87bb15d60d197201c0fd3bf30c1500a3ecfecdd66d8721f90bcc4c17ee925c61b0a03727a9c0d5f5ca462fbfa0af1c2513a9d9d4b5345bd27a5f6e653f751693e6b6a2b8ead57d511e00e58c45b7b8d005af79288f5c7c22fd4f1bf7a898b03a5634c6a1ae3f9fae5de4f296a2896b23e7ed43ed14fa5a2803f4d28f0d3ffcf24757677aebdb47bb388378708948a8d4126ed1839e0da29a537a8c198b3c66ab00712dd261674bf45a73d67f76914f830ca014b65596f27e4cf62de66125a5566df9975155628b400fbfb3a29040ed50faffdbb18aece7c5c44693260aab386c0a37b11b114f1c415aebb653be468179428d43a4d8bc3ec38813eca30a13cf1bb18d524f1992d44d8b1a42ea30b22e6c95b199d8d182f8840b09d059585c31ad691fa0619ff038aca2c39a943421157361717c49d322028a74648113bd8c9d7ec77cf3c89c1ec8718ceff8516d96b34c3c614f10699c9abc4ed0411506223bea16af35c883accdbe1104eef0cfdb54e12fb230a
+aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:ff000000000000000000000000000000:72efc1ebfe1ee25975a6eb3aa8589dda2b261f1c85bdab442a9e5b2dd1d7c3957a16fc08e526d4b1223f1b1232a11af274c3d70dac57f83e0983c498f1a6f1aecb021c3e70085a1e527f1ce41ee5911a82020161529cd82773762daf5459de94a0a82adae7e1703c808543c29ed6fb32d9e004327c1355180c995a07741493a09c21ba01a387882da4f62534b87bb15d60d197201c0fd3bf30c1500a3ecfecdd66d8721f90bcc4c17ee925c61b0a03727a9c0d5f5ca462fbfa0af1c2513a9d9d4b5345bd27a5f6e653f751693e6b6a2b8ead57d511e00e58c45b7b8d005af79288f5c7c22fd4f1bf7a898b03a5634c6a1ae3f9fae5de4f296a2896b23e7ed43ed14fa5a2803f4d28f0d3ffcf24757677aebdb47bb388378708948a8d4126ed1839e0da29a537a8c198b3c66ab00712dd261674bf45a73d67f76914f830ca014b65596f27e4cf62de66125a5566df9975155628b400fbfb3a29040ed50faffdbb18aece7c5c44693260aab386c0a37b11b114f1c415aebb653be468179428d43a4d8bc3ec38813eca30a13cf1bb18d524f1992d44d8b1a42ea30b22e6c95b199d8d182f8840b09d059585c31ad691fa0619ff038aca2c39a943421157361717c49d322028a74648113bd8c9d7ec77cf3c89c1ec8718ceff8516d96b34c3c614f10699c9abc4ed0411506223bea16af35c883accdbe1104eef0cfdb54e12fb230a:3260ae8dad1f4a32c5cafe3ab0eb95549d461a67ceb9e5aa2d3afb62dece0553193ba50c75be251e08d1d08f1088576c7efdfaaf3f459559571e12511753b07af073f35da06af0ce0bbf6b8f5ccc5cea500ec1b211bd51f63b606bf6528796ca12173ba39b8935ee44ccce646f90a45bf9ccc567f0ace13dc2d53ebeedc81f58b2e41179dddf0d5a5c42f5d8506c1a5d2f8f59f3ea873cbcd0eec19acbf325423bd3dcb8c2b1bf1d1eaed0eba7f0698e4314fbeb2f1566d1b9253008cbccf45a2b0d9c5c9c21474f4076e02be26050b99dee4fd68a4cf890e496e4fcae7b70f94ea5a9062da0daeba1993d2ccd1dd3c244b8428801495a58b216547e7e847c46d1d756377b6242d2e5fb83bf752b54e0df71e889f3a2bb0f4c10805bf3c590376e3c24e22ff57f7fa965577375325cea5d920db94b9c336b455f6e894c01866fe9fbb8c8d3f70a2957285f6dfb5dcd8cbf54782f8fe7766d4723819913ac773421e3a31095866bad22c86a6036b2518b2059b4229d18c8c2ccbdf906c6cc6e82464ee57bddb0bebcb1dc645325bfb3e665ef7251082c88ebb1cf203bd779fdd38675713c8daadd17e1cabee432b09787b6ddf3304e38b731b45df5df51b78fcfb3d32466028d0ba36555e7e11ab0ee0666061d1645d962444bc47a38188930a84b4d561395c73c087021927ca638b7afc8a8679ccb84c26555440ec7f10445cd
+
+aes-256-xts:27182818284590452353602874713526624977572470936999595749669676273141592653589793238462643383279502884197169399375105820974944592:ff000000000000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:1c3b3a102f770386e4836c99e370cf9bea00803f5e482357a4ae12d414a3e63b5d31e276f8fe4a8d66b317f9ac683f44680a86ac35adfc3345befecb4bb188fd5776926c49a3095eb108fd1098baec70aaa66999a72a82f27d848b21d4a741b0c5cd4d5fff9dac89aeba122961d03a757123e9870f8acf1000020887891429ca2a3e7a7d7df7b10355165c8b9a6d0a7de8b062c4500dc4cd120c0f7418dae3d0b5781c34803fa75421c790dfe1de1834f280d7667b327f6c8cd7557e12ac3a0f93ec05c52e0493ef31a12d3d9260f79a289d6a379bc70c50841473d1a8cc81ec583e9645e07b8d9670655ba5bbcfecc6dc3966380ad8fecb17b6ba02469a020a84e18e8f84252070c13e9f1f289be54fbc481457778f616015e1327a02b140f1505eb309326d68378f8374595c849d84f4c333ec4423885143cb47bd71c5edae9be69a2ffeceb1bec9de244fbe15992b11b77c040f12bd8f6a975a44a0f90c29a9abc3d4d893927284c58754cce294529f8614dcd2aba991925fedc4ae74ffac6e333b93eb4aff0479da9a410e4450e0dd7ae4c6e2910900575da401fc07059f645e8b7e9bfdef33943054ff84011493c27b3429eaedb4ed5376441a77ed43851ad77f16f541dfd269d50d6a5f14fb0aab1cbb4c1550be97f7ab4066193c4caa773dad38014bd2092fa755c824bb5e54c4f36ffda9fcea70b9c6e693e148c151
+aes-256-xts:27182818284590452353602874713526624977572470936999595749669676273141592653589793238462643383279502884197169399375105820974944592:ffff0000000000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:77a31251618a15e6b92d1d66dffe7b50b50bad552305ba0217a610688eff7e11e1d0225438e093242d6db274fde801d4cae06f2092c728b2478559df58e837c2469ee4a4fa794e4bbc7f39bc026e3cb72c33b0888f25b4acf56a2a9804f1ce6d3d6e1dc6ca181d4b546179d55544aa7760c40d06741539c7e3cd9d2f6650b2013fd0eeb8c2b8e3d8d240ccae2d4c98320a7442e1c8d75a42d6e6cfa4c2eca1798d158c7aecdf82490f24bb9b38e108bcda12c3faf9a21141c3613b58367f922aaa26cd22f23d708dae699ad7cb40a8ad0b6e2784973dcb605684c08b8d6998c69aac049921871ebb65301a4619ca80ecb485a31d744223ce8ddc2394828d6a80470c092f5ba413c3378fa6054255c6f9df4495862bbb3287681f931b687c888abf844dfc8fc28331e579928cd12bd2390ae123cf03818d14dedde5c0c24c8ab018bfca75ca096f2d531f3d1619e785f1ada437cab92e980558b3dce1474afb75bfedbf8ff54cb2618e0244c9ac0d3c66fb51598cd2db11f9be39791abe447c63094f7c453b7ff87cb5bb36b7c79efb0872d17058b83b15ab0866ad8a58656c5a7e20dbdf308b2461d97c0ec0024a2715055249cf3b478ddd4740de654f75ca686e0d7345c69ed50cdc2a8b332b1f8824108ac937eb050585608ee734097fc09054fbff89eeaeea791f4a7ab1f9868294a4f9e27b42af8100cb9d59cef9645803
+aes-256-xts:27182818284590452353602874713526624977572470936999595749669676273141592653589793238462643383279502884197169399375105820974944592:ffffff00000000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:e387aaa58ba483afa7e8eb469778317ecf4cf573aa9d4eac23f2cdf914e4e200a8b490e42ee646802dc6ee2b471b278195d60918ececb44bf79966f83faba0499298ebc699c0c8634715a320bb4f075d622e74c8c932004f25b41e361025b5a87815391f6108fc4afa6a05d9303c6ba68a128a55705d415985832fdeaae6c8e19110e84d1b1f199a2692119edc96132658f09da7c623efcec712537a3d94c0bf5d7e352ec94ae5797fdb377dc1551150721adf15bd26a8efc2fcaad56881fa9e62462c28f30ae1ceaca93c345cf243b73f542e2074a705bd2643bb9f7cc79bb6e7091ea6e232df0f9ad0d6cf502327876d82207abf2115cdacf6d5a48f6c1879a65b115f0f8b3cb3c59d15dd8c769bc014795a1837f3901b5845eb491adfefe097b1fa30a12fc1f65ba22905031539971a10f2f36c321bb51331cdefb39e3964c7ef079994f5b69b2edd83a71ef549971ee93f44eac3938fcdd61d01fa71799da3a8091c4c48aa9ed263ff0749df95d44fef6a0bb578ec69456aa5408ae32c7af08ad7ba8921287e3bbee31b767be06a0e705c864a769137df28292283ea81a2480241b44d9921cdbec1bc28dc1fda114bd8e5217ac9d8ebafa720e9da4f9ace231cc949e5b96fe76ffc21063fddc83a6b8679c00d35e09576a875305bed5f36ed242c8900dd1fa965bc950dfce09b132263a1eef52dd6888c309f5a7d712826
+aes-256-xts:27182818284590452353602874713526624977572470936999595749669676273141592653589793238462643383279502884197169399375105820974944592:ffffffff000000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:bf53d2dade78e822a4d949a9bc6766b01b06a8ef70d26748c6a7fc36d80ae4c5520f7c4ab0ac8544424fa405162fef5a6b7f229498063618d39f0003cb5fb8d1c86b643497da1ff945c8d3bedeca4f479702a7a735f043ddb1d6aaade3c4a0ac7ca7f3fa5279bef56f82cd7a2f38672e824814e10700300a055e1630b8f1cb0e919f5e942010a416e2bf48cb46993d3cb6a51c19bacf864785a00bc2ecff15d350875b246ed53e68be6f55bd7e05cfc2b2ed6432198a6444b6d8c247fab941f569768b5c429366f1d3f00f0345b96123d56204c01c63b22ce78baf116e525ed90fdea39fa469494d3866c31e05f295ff21fea8d4e6e13d67e47ce722e9698a1c1048d68ebcde76b86fcf976eab8aa9790268b7068e017a8b9b749409514f1053027fd16c3786ea1bac5f15cb79711ee2abe82f5cf8b13ae73030ef5b9e4457e75d1304f988d62dd6fc4b94ed38ba831da4b7634971b6cd8ec325d9c61c00f1df73627ed3745a5e8489f3a95c69639c32cd6e1d537a85f75cc844726e8a72fc0077ad22000f1d5078f6b866318c668f1ad03d5a5fced5219f2eabbd0aa5c0f460d183f04404a0d6f469558e81fab24a167905ab4c7878502ad3e38fdbe62a41556cec37325759533ce8f25f367c87bb5578d667ae93f9e2fd99bcbc5f2fbba88cf6516139420fcff3b7361d86322c4bd84c82f335abb152c4a93411373aaa8220
+aes-256-xts:27182818284590452353602874713526624977572470936999595749669676273141592653589793238462643383279502884197169399375105820974944592:ffffffffff0000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:64497e5a831e4a932c09be3e5393376daa599548b816031d224bbf50a818ed2350eae7e96087c8a0db51ad290bd00c1ac1620857635bf246c176ab463be30b808da548081ac847b158e1264be25bb0910bbc92647108089415d45fab1b3d2604e8a8eff1ae4020cfa39936b66827b23f371b92200be90251e6d73c5f86de5fd4a950781933d79a28272b782a2ec313efdfcc0628f43d744c2dc2ff3dcb66999b50c7ca895b0c64791eeaa5f29499fb1c026f84ce5b5c72ba1083cddb5ce45434631665c333b60b11593fb253c5179a2c8db813782a004856a1653011e93fb6d876c18366dd8683f53412c0c180f9c848592d593f8609ca736317d356e13e2bff3a9f59cd9aeb19cd482593d8c46128bb32423b37a9adfb482b99453fbe25a41bf6feb4aa0bef5ed24bf73c762978025482c13115e4015aac992e5613a3b5c2f685b84795cb6e9b2656d8c88157e52c42f978d8634c43d06fea928f2822e465aa6576e9bf419384506cc3ce3c54ac1a6f67dc66f3b30191e698380bc999b05abce19dc0c6dcc2dd001ec535ba18deb2df1a101023108318c75dc98611a09dc48a0acdec676fabdf222f07e026f059b672b56e5cbc8e1d21bbd867dd927212054681d70ea737134cdfce93b6f82ae22423274e58a0821cc5502e2d0ab4585e94de6975be5e0b4efce51cd3e70c25a1fbbbd609d273ad5b0d59631c531f6a0a57b9
+
+aes-128-xts:fffefdfcfbfaf9f8f7f6f5f4f3f2f1f0bfbebdbcbbbab9b8b7b6b5b4b3b2b1b0:9a785634120000000000000000000000:000102030405060708090a0b0c0d0e0f10:6c1625db4671522d3d7599601de7ca09ed
+aes-128-xts:fffefdfcfbfaf9f8f7f6f5f4f3f2f1f0bfbebdbcbbbab9b8b7b6b5b4b3b2b1b0:9a785634120000000000000000000000:000102030405060708090a0b0c0d0e0f1011:d069444b7a7e0cab09e24447d24deb1fedbf
+aes-128-xts:fffefdfcfbfaf9f8f7f6f5f4f3f2f1f0bfbebdbcbbbab9b8b7b6b5b4b3b2b1b0:9a785634120000000000000000000000:000102030405060708090a0b0c0d0e0f101112:e5df1351c0544ba1350b3363cd8ef4beedbf9d
+aes-128-xts:fffefdfcfbfaf9f8f7f6f5f4f3f2f1f0bfbebdbcbbbab9b8b7b6b5b4b3b2b1b0:9a785634120000000000000000000000:000102030405060708090a0b0c0d0e0f10111213:9d84c813f719aa2c7be3f66171c7c5c2edbf9dac
+aes-128-xts:e0e1e2e3e4e5e6e7e8e9eaebecedeeefc0c1c2c3c4c5c6c7c8c9cacbcccdcecf:21436587a90000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:38b45812ef43a05bd957e545907e223b954ab4aaf088303ad910eadf14b42be68b2461149d8c8ba85f992be970bc621f1b06573f63e867bf5875acafa04e42ccbd7bd3c2a0fb1fff791ec5ec36c66ae4ac1e806d81fbf709dbe29e471fad38549c8e66f5345d7c1eb94f405d1ec785cc6f6a68f6254dd8339f9d84057e01a17741990482999516b5611a38f41bb6478e6f173f320805dd71b1932fc333cb9ee39936beea9ad96fa10fb4112b901734ddad40bc1878995f8e11aee7d141a2f5d48b7a4e1e7f0b2c04830e69a4fd1378411c2f287edf48c6c4e5c247a19680f7fe41cefbd49b582106e3616cbbe4dfb2344b2ae9519391f3e0fb4922254b1d6d2d19c6d4d537b3a26f3bcc51588b32f3eca0829b6a5ac72578fb814fb43cf80d64a233e3f997a3f02683342f2b33d25b492536b93becb2f5e1a8b82f5b883342729e8ae09d16938841a21a97fb543eea3bbff59f13c1a18449e398701c1ad51648346cbc04c27bb2da3b93a1372ccae548fb53bee476f9e9c91773b1bb19828394d55d3e1a20ed69113a860b6829ffa847224604435070221b257e8dff783615d2cae4803a93aa4334ab482a0afac9c0aeda70b45a481df5dec5df8cc0f423c77a5fd46cd312021d4b438862419a791be03bb4d97c0e59578542531ba466a83baf92cefc151b5cc1611a167893819b63fb8a6b18e86de60290fa72b797b0ce59f3
+# AES wrap tests from RFC3394
+id-aes128-wrap:000102030405060708090A0B0C0D0E0F::00112233445566778899AABBCCDDEEFF:1FA68B0A8112B447AEF34BD8FB5A7B829D3E862371D2CFE5
+id-aes192-wrap:000102030405060708090A0B0C0D0E0F1011121314151617::00112233445566778899AABBCCDDEEFF:96778B25AE6CA435F92B5B97C050AED2468AB8A17AD84E5D
+id-aes256-wrap:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F::00112233445566778899AABBCCDDEEFF:64E8C3F9CE0F5BA263E9777905818A2A93C8191E7D6E8AE7
+id-aes192-wrap:000102030405060708090A0B0C0D0E0F1011121314151617::00112233445566778899AABBCCDDEEFF0001020304050607:031D33264E15D33268F24EC260743EDCE1C6C7DDEE725A936BA814915C6762D2
+id-aes256-wrap:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F::00112233445566778899AABBCCDDEEFF0001020304050607:A8F9BC1612C68B3FF6E6F4FBE30E71E4769C8B80A32CB8958CD5D17D6B254DA1
+id-aes256-wrap:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F::00112233445566778899AABBCCDDEEFF000102030405060708090A0B0C0D0E0F:28C9F404C4B810F4CBCCB35CFB87F8263F5786E2D80ED326CBC7F0E71A99F43BFB988B9B7A02DD21
index f22ba52..1478448 100644 (file)
@@ -66,7 +66,6 @@
 #endif
 
 #ifndef OPENSSL_NO_SHA
-# ifndef OPENSSL_FIPS
 
 static int init(EVP_MD_CTX *ctx)
 {
@@ -102,5 +101,4 @@ const EVP_MD *EVP_dss(void)
 {
     return (&dsa_md);
 }
-# endif
 #endif
index 976148e..e36fabf 100644 (file)
@@ -68,8 +68,6 @@
 #  include <openssl/dsa.h>
 # endif
 
-# ifndef OPENSSL_FIPS
-
 static int init(EVP_MD_CTX *ctx)
 {
     return SHA1_Init(ctx->md_data);
@@ -104,5 +102,4 @@ const EVP_MD *EVP_dss1(void)
 {
     return (&dss1_md);
 }
-# endif
 #endif
index d11a13a..803d314 100644 (file)
 #include <openssl/x509.h>
 
 #ifndef OPENSSL_NO_SHA
-# ifndef OPENSSL_FIPS
 
 static int init(EVP_MD_CTX *ctx)
 {
@@ -152,5 +151,4 @@ const EVP_MD *EVP_ecdsa(void)
 {
     return (&ecdsa_md);
 }
-# endif
 #endif
index 0cc6355..a74e6b7 100644 (file)
 #include <stdio.h>
 #include "cryptlib.h"
 
-#ifndef OPENSSL_FIPS
+#ifndef OPENSSL_NO_SHA
 
-# ifndef OPENSSL_NO_SHA
-
-#  include <openssl/evp.h>
-#  include <openssl/objects.h>
-#  include <openssl/sha.h>
-#  ifndef OPENSSL_NO_RSA
-#   include <openssl/rsa.h>
-#  endif
+# include <openssl/evp.h>
+# include <openssl/objects.h>
+# include <openssl/sha.h>
+# ifndef OPENSSL_NO_RSA
+#  include <openssl/rsa.h>
+# endif
 
 static int init(EVP_MD_CTX *ctx)
 {
@@ -104,9 +102,9 @@ const EVP_MD *EVP_sha1(void)
 {
     return (&sha1_md);
 }
-# endif
+#endif
 
-# ifndef OPENSSL_NO_SHA256
+#ifndef OPENSSL_NO_SHA256
 static int init224(EVP_MD_CTX *ctx)
 {
     return SHA224_Init(ctx->md_data);
@@ -171,9 +169,9 @@ const EVP_MD *EVP_sha256(void)
 {
     return (&sha256_md);
 }
-# endif                         /* ifndef OPENSSL_NO_SHA256 */
+#endif                          /* ifndef OPENSSL_NO_SHA256 */
 
-# ifndef OPENSSL_NO_SHA512
+#ifndef OPENSSL_NO_SHA512
 static int init384(EVP_MD_CTX *ctx)
 {
     return SHA384_Init(ctx->md_data);
@@ -234,6 +232,4 @@ const EVP_MD *EVP_sha512(void)
 {
     return (&sha512_md);
 }
-# endif                         /* ifndef OPENSSL_NO_SHA512 */
-
-#endif
+#endif                          /* ifndef OPENSSL_NO_SHA512 */
index e153a18..4492d20 100644 (file)
@@ -73,15 +73,18 @@ static int do_sigver_init(EVP_MD_CTX *ctx, EVP_PKEY_CTX **pctx,
     if (ctx->pctx == NULL)
         return 0;
 
-    if (type == NULL) {
-        int def_nid;
-        if (EVP_PKEY_get_default_digest_nid(pkey, &def_nid) > 0)
-            type = EVP_get_digestbynid(def_nid);
-    }
+    if (!(ctx->pctx->pmeth->flags & EVP_PKEY_FLAG_SIGCTX_CUSTOM)) {
 
-    if (type == NULL) {
-        EVPerr(EVP_F_DO_SIGVER_INIT, EVP_R_NO_DEFAULT_DIGEST);
-        return 0;
+        if (type == NULL) {
+            int def_nid;
+            if (EVP_PKEY_get_default_digest_nid(pkey, &def_nid) > 0)
+                type = EVP_get_digestbynid(def_nid);
+        }
+
+        if (type == NULL) {
+            EVPerr(EVP_F_DO_SIGVER_INIT, EVP_R_NO_DEFAULT_DIGEST);
+            return 0;
+        }
     }
 
     if (ver) {
@@ -103,6 +106,8 @@ static int do_sigver_init(EVP_MD_CTX *ctx, EVP_PKEY_CTX **pctx,
         return 0;
     if (pctx)
         *pctx = ctx->pctx;
+    if (ctx->pctx->pmeth->flags & EVP_PKEY_FLAG_SIGCTX_CUSTOM)
+        return 1;
     if (!EVP_DigestInit_ex(ctx, type, e))
         return 0;
     return 1;
@@ -124,7 +129,19 @@ int EVP_DigestSignFinal(EVP_MD_CTX *ctx, unsigned char *sigret,
                         size_t *siglen)
 {
     int sctx, r = 0;
-    if (ctx->pctx->pmeth->signctx)
+    EVP_PKEY_CTX *pctx = ctx->pctx;
+    if (pctx->pmeth->flags & EVP_PKEY_FLAG_SIGCTX_CUSTOM) {
+        EVP_PKEY_CTX *dctx;
+        if (!sigret)
+            return pctx->pmeth->signctx(pctx, sigret, siglen, ctx);
+        dctx = EVP_PKEY_CTX_dup(ctx->pctx);
+        if (!dctx)
+            return 0;
+        r = dctx->pmeth->signctx(dctx, sigret, siglen, ctx);
+        EVP_PKEY_CTX_free(dctx);
+        return r;
+    }
+    if (pctx->pmeth->signctx)
         sctx = 1;
     else
         sctx = 0;
@@ -147,20 +164,19 @@ int EVP_DigestSignFinal(EVP_MD_CTX *ctx, unsigned char *sigret,
             return 0;
     } else {
         if (sctx) {
-            if (ctx->pctx->pmeth->signctx(ctx->pctx, sigret, siglen, ctx) <=
-                0)
+            if (pctx->pmeth->signctx(pctx, sigret, siglen, ctx) <= 0)
                 return 0;
         } else {
             int s = EVP_MD_size(ctx->digest);
-            if (s < 0
-                || EVP_PKEY_sign(ctx->pctx, sigret, siglen, NULL, s) <= 0)
+            if (s < 0 || EVP_PKEY_sign(pctx, sigret, siglen, NULL, s) <= 0)
                 return 0;
         }
     }
     return 1;
 }
 
-int EVP_DigestVerifyFinal(EVP_MD_CTX *ctx, unsigned char *sig, size_t siglen)
+int EVP_DigestVerifyFinal(EVP_MD_CTX *ctx, const unsigned char *sig,
+                          size_t siglen)
 {
     EVP_MD_CTX tmp_ctx;
     unsigned char md[EVP_MAX_MD_SIZE];
index 2b84dc7..1171d30 100644 (file)
@@ -337,7 +337,7 @@ int EVP_PKEY_set1_DH(EVP_PKEY *pkey, DH *key)
 
 DH *EVP_PKEY_get1_DH(EVP_PKEY *pkey)
 {
-    if (pkey->type != EVP_PKEY_DH) {
+    if (pkey->type != EVP_PKEY_DH && pkey->type != EVP_PKEY_DHX) {
         EVPerr(EVP_F_EVP_PKEY_GET1_DH, EVP_R_EXPECTING_A_DH_KEY);
         return NULL;
     }
index ae8bccb..9f81d10 100644 (file)
@@ -75,6 +75,7 @@ STACK_OF(EVP_PKEY_METHOD) *app_pkey_methods = NULL;
 
 extern const EVP_PKEY_METHOD rsa_pkey_meth, dh_pkey_meth, dsa_pkey_meth;
 extern const EVP_PKEY_METHOD ec_pkey_meth, hmac_pkey_meth, cmac_pkey_meth;
+extern const EVP_PKEY_METHOD dhx_pkey_meth;
 
 static const EVP_PKEY_METHOD *standard_methods[] = {
 #ifndef OPENSSL_NO_RSA
@@ -90,7 +91,10 @@ static const EVP_PKEY_METHOD *standard_methods[] = {
     &ec_pkey_meth,
 #endif
     &hmac_pkey_meth,
-    &cmac_pkey_meth
+    &cmac_pkey_meth,
+#ifndef OPENSSL_NO_DH
+    &dhx_pkey_meth
+#endif
 };
 
 DECLARE_OBJ_BSEARCH_CMP_FN(const EVP_PKEY_METHOD *, const EVP_PKEY_METHOD *,
index 641c797..29b2b5d 100644 (file)
@@ -87,7 +87,7 @@ static int hmac_pkey_ctrl(EVP_PKEY *pkey, int op, long arg1, void *arg2)
 {
     switch (op) {
     case ASN1_PKEY_CTRL_DEFAULT_MD_NID:
-        *(int *)arg2 = NID_sha1;
+        *(int *)arg2 = NID_sha256;
         return 1;
 
     default:
index 33d88be..51a0a3e 100644 (file)
@@ -72,6 +72,16 @@ int HMAC_Init_ex(HMAC_CTX *ctx, const void *key, int len,
     unsigned char pad[HMAC_MAX_MD_CBLOCK];
 
 #ifdef OPENSSL_FIPS
+    /* If FIPS mode switch to approved implementation if possible */
+    if (FIPS_mode()) {
+        const EVP_MD *fipsmd;
+        if (md) {
+            fipsmd = FIPS_get_digestbynid(EVP_MD_type(md));
+            if (fipsmd)
+                md = fipsmd;
+        }
+    }
+
     if (FIPS_mode()) {
         /* If we have an ENGINE need to allow non FIPS */
         if ((impl || ctx->i_ctx.engine)
index 271d0eb..5a57395 100644 (file)
@@ -195,6 +195,7 @@ int main(int argc, char *argv[])
     }
     printf("test 4 ok\n");
 test5:
+    HMAC_CTX_cleanup(&ctx);
     HMAC_CTX_init(&ctx);
     if (HMAC_Init_ex(&ctx, test[4].key, test[4].key_len, NULL, NULL)) {
         printf("Should fail to initialise HMAC with empty MD (test 5)\n");
@@ -284,6 +285,7 @@ test5:
         printf("test 5 ok\n");
     }
 test6:
+    HMAC_CTX_cleanup(&ctx);
     HMAC_CTX_init(&ctx);
     if (!HMAC_Init_ex(&ctx, test[7].key, test[7].key_len, EVP_sha1(), NULL)) {
         printf("Failed to initialise HMAC (test 6)\n");
@@ -314,6 +316,7 @@ test6:
         printf("test 6 ok\n");
     }
 end:
+    HMAC_CTX_cleanup(&ctx);
     EXIT(err);
     return (0);
 }
index d19081d..af1d75b 100755 (executable)
@@ -81,7 +81,7 @@ $ sdirs := , -
    buffer, bio, stack, lhash, rand, err, -
    evp, asn1, pem, x509, x509v3, conf, txt_db, pkcs7, pkcs12, comp, ocsp, -
    ui, krb5, -
-   cms, pqueue, ts, jpake, srp, store, cmac
+   store, cms, pqueue, ts, jpake
 $!
 $ exheader_ := crypto.h, opensslv.h, ebcdic.h, symhacks.h, ossl_typ.h
 $ exheader_'archd' := opensslconf.h
@@ -139,9 +139,6 @@ $ exheader_cms := cms.h
 $ exheader_pqueue := pqueue.h
 $ exheader_ts := ts.h
 $ exheader_jpake := jpake.h
-$ exheader_srp := srp.h
-$ exheader_store := store.h
-$ exheader_cmac := cmac.h
 $ libs := ssl_libcrypto
 $!
 $ exe_dir := [-.'archd'.exe.crypto]
index ed2e888..8c38727 100644 (file)
@@ -4,6 +4,7 @@
 #include <openssl/sha.h>
 #include <openssl/err.h>
 #include <memory.h>
+#include <string.h>
 
 /*
  * In the definition, (xa, xb, xc, xd) are Alice's (x1, x2, x3, x4) or
index 1823833..96828d2 100644 (file)
                                    asm ("bswapl %0":"=r"(r):"0"(r));    \
                                    *((unsigned int *)(c))=r; (c)+=4; r; })
 #    endif
+#   elif defined(__aarch64__)
+#    if defined(__BYTE_ORDER__)
+#     if defined(__ORDER_LITTLE_ENDIAN__) && __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
+#      define HOST_c2l(c,l)      ({ unsigned int r;              \
+                                   asm ("rev    %w0,%w1"        \
+                                        :"=r"(r)                \
+                                        :"r"(*((const unsigned int *)(c))));\
+                                   (c)+=4; (l)=r;               })
+#      define HOST_l2c(l,c)      ({ unsigned int r;              \
+                                   asm ("rev    %w0,%w1"        \
+                                        :"=r"(r)                \
+                                        :"r"((unsigned int)(l)));\
+                                   *((unsigned int *)(c))=r; (c)+=4; r; })
+#     elif defined(__ORDER_BIG_ENDIAN__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
+#      define HOST_c2l(c,l)      ((l)=*((const unsigned int *)(c)), (c)+=4, (l))
+#      define HOST_l2c(l,c)      (*((unsigned int *)(c))=(l), (c)+=4, (l))
+#     endif
+#    endif
 #   endif
 #  endif
-# endif
-# if defined(__s390__) || defined(__s390x__)
-#  define HOST_c2l(c,l) ((l)=*((const unsigned int *)(c)), (c)+=4, (l))
-#  define HOST_l2c(l,c) (*((unsigned int *)(c))=(l), (c)+=4, (l))
+#  if defined(__s390__) || defined(__s390x__)
+#   define HOST_c2l(c,l) ((l)=*((const unsigned int *)(c)), (c)+=4, (l))
+#   define HOST_l2c(l,c) (*((unsigned int *)(c))=(l), (c)+=4, (l))
+#  endif
 # endif
 
 # ifndef HOST_c2l
                                    (c)+=4; (l);                         })
 #   endif
 #  endif
-# endif
-# if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
-#  ifndef B_ENDIAN
-   /* See comment in DATA_ORDER_IS_BIG_ENDIAN section. */
-#   define HOST_c2l(c,l) ((l)=*((const unsigned int *)(c)), (c)+=4, l)
-#   define HOST_l2c(l,c) (*((unsigned int *)(c))=(l), (c)+=4, l)
+#  if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
+#   ifndef B_ENDIAN
+    /* See comment in DATA_ORDER_IS_BIG_ENDIAN section. */
+#    define HOST_c2l(c,l)        ((l)=*((const unsigned int *)(c)), (c)+=4, l)
+#    define HOST_l2c(l,c)        (*((unsigned int *)(c))=(l), (c)+=4, l)
+#   endif
 #  endif
 # endif
 
index 0f87dbd..f5240da 100644 (file)
@@ -52,6 +52,9 @@ md5-ia64.s: asm/md5-ia64.S
        $(CC) $(CFLAGS) -E asm/md5-ia64.S | \
        $(PERL) -ne 's/;\s+/;\n/g; print;' > $@
 
+md5-sparcv9.S: asm/md5-sparcv9.pl
+       $(PERL) asm/md5-sparcv9.pl $@ $(CFLAGS)
+
 files:
        $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
 
diff --git a/crypto/md5/asm/md5-sparcv9.pl b/crypto/md5/asm/md5-sparcv9.pl
new file mode 100644 (file)
index 0000000..407da3c
--- /dev/null
@@ -0,0 +1,430 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+#
+# Hardware SPARC T4 support by David S. Miller <davem@davemloft.net>.
+# ====================================================================
+
+# MD5 for SPARCv9, 6.9 cycles per byte on UltraSPARC, >40% faster than
+# code generated by Sun C 5.2.
+
+# SPARC T4 MD5 hardware achieves 3.20 cycles per byte, which is 2.1x
+# faster than software. Multi-process benchmark saturates at 12x
+# single-process result on 8-core processor, or ~11GBps per 2.85GHz
+# socket.
+
+$output=shift;
+open STDOUT,">$output";
+
+use integer;
+
+($ctx,$inp,$len)=("%i0","%i1","%i2");  # input arguments
+
+# 64-bit values
+@X=("%o0","%o1","%o2","%o3","%o4","%o5","%o7","%g1","%g2");
+$tx="%g3";
+($AB,$CD)=("%g4","%g5");
+
+# 32-bit values
+@V=($A,$B,$C,$D)=map("%l$_",(0..3));
+($t1,$t2,$t3,$saved_asi)=map("%l$_",(4..7));
+($shr,$shl1,$shl2)=("%i3","%i4","%i5");
+
+my @K=(        0xd76aa478,0xe8c7b756,0x242070db,0xc1bdceee,
+       0xf57c0faf,0x4787c62a,0xa8304613,0xfd469501,
+       0x698098d8,0x8b44f7af,0xffff5bb1,0x895cd7be,
+       0x6b901122,0xfd987193,0xa679438e,0x49b40821,
+
+       0xf61e2562,0xc040b340,0x265e5a51,0xe9b6c7aa,
+       0xd62f105d,0x02441453,0xd8a1e681,0xe7d3fbc8,
+       0x21e1cde6,0xc33707d6,0xf4d50d87,0x455a14ed,
+       0xa9e3e905,0xfcefa3f8,0x676f02d9,0x8d2a4c8a,
+
+       0xfffa3942,0x8771f681,0x6d9d6122,0xfde5380c,
+       0xa4beea44,0x4bdecfa9,0xf6bb4b60,0xbebfbc70,
+       0x289b7ec6,0xeaa127fa,0xd4ef3085,0x04881d05,
+       0xd9d4d039,0xe6db99e5,0x1fa27cf8,0xc4ac5665,
+
+       0xf4292244,0x432aff97,0xab9423a7,0xfc93a039,
+       0x655b59c3,0x8f0ccc92,0xffeff47d,0x85845dd1,
+       0x6fa87e4f,0xfe2ce6e0,0xa3014314,0x4e0811a1,
+       0xf7537e82,0xbd3af235,0x2ad7d2bb,0xeb86d391, 0  );
+
+sub R0 {
+  my ($i,$a,$b,$c,$d) = @_;
+  my $rot = (7,12,17,22)[$i%4];
+  my $j   = ($i+1)/2;
+
+  if ($i&1) {
+    $code.=<<___;
+        srlx   @X[$j],$shr,@X[$j]      ! align X[`$i+1`]
+       and     $b,$t1,$t1              ! round $i
+        sllx   @X[$j+1],$shl1,$tx
+       add     $t2,$a,$a
+        sllx   $tx,$shl2,$tx
+       xor     $d,$t1,$t1
+        or     $tx,@X[$j],@X[$j]
+        sethi  %hi(@K[$i+1]),$t2
+       add     $t1,$a,$a
+        or     $t2,%lo(@K[$i+1]),$t2
+       sll     $a,$rot,$t3
+        add    @X[$j],$t2,$t2          ! X[`$i+1`]+K[`$i+1`]
+       srl     $a,32-$rot,$a
+       add     $b,$t3,$t3
+        xor     $b,$c,$t1
+       add     $t3,$a,$a
+___
+  } else {
+    $code.=<<___;
+        srlx   @X[$j],32,$tx           ! extract X[`2*$j+1`]
+       and     $b,$t1,$t1              ! round $i
+       add     $t2,$a,$a
+       xor     $d,$t1,$t1
+        sethi  %hi(@K[$i+1]),$t2
+       add     $t1,$a,$a
+        or     $t2,%lo(@K[$i+1]),$t2
+       sll     $a,$rot,$t3
+        add    $tx,$t2,$t2             ! X[`2*$j+1`]+K[`$i+1`]
+       srl     $a,32-$rot,$a
+       add     $b,$t3,$t3
+        xor     $b,$c,$t1
+       add     $t3,$a,$a
+___
+  }
+}
+
+sub R0_1 {
+  my ($i,$a,$b,$c,$d) = @_;
+  my $rot = (7,12,17,22)[$i%4];
+
+$code.=<<___;
+        srlx   @X[0],32,$tx            ! extract X[1]
+       and     $b,$t1,$t1              ! round $i
+       add     $t2,$a,$a
+       xor     $d,$t1,$t1
+        sethi  %hi(@K[$i+1]),$t2
+       add     $t1,$a,$a
+        or     $t2,%lo(@K[$i+1]),$t2
+       sll     $a,$rot,$t3
+        add    $tx,$t2,$t2             ! X[1]+K[`$i+1`]
+       srl     $a,32-$rot,$a
+       add     $b,$t3,$t3
+        andn    $b,$c,$t1
+       add     $t3,$a,$a
+___
+}
+
+sub R1 {
+  my ($i,$a,$b,$c,$d) = @_;
+  my $rot = (5,9,14,20)[$i%4];
+  my $j   = $i<31 ? (1+5*($i+1))%16 : (5+3*($i+1))%16;
+  my $xi  = @X[$j/2];
+
+$code.=<<___ if ($j&1 && ($xi=$tx));
+        srlx   @X[$j/2],32,$xi         ! extract X[$j]
+___
+$code.=<<___;
+       and     $b,$d,$t3               ! round $i
+       add     $t2,$a,$a
+       or      $t3,$t1,$t1
+        sethi  %hi(@K[$i+1]),$t2
+       add     $t1,$a,$a
+        or     $t2,%lo(@K[$i+1]),$t2
+       sll     $a,$rot,$t3
+        add    $xi,$t2,$t2             ! X[$j]+K[`$i+1`]
+       srl     $a,32-$rot,$a
+       add     $b,$t3,$t3
+        `$i<31?"andn":"xor"`    $b,$c,$t1
+       add     $t3,$a,$a
+___
+}
+
+sub R2 {
+  my ($i,$a,$b,$c,$d) = @_;
+  my $rot = (4,11,16,23)[$i%4];
+  my $j   = $i<47 ? (5+3*($i+1))%16 : (0+7*($i+1))%16;
+  my $xi  = @X[$j/2];
+
+$code.=<<___ if ($j&1 && ($xi=$tx));
+        srlx   @X[$j/2],32,$xi         ! extract X[$j]
+___
+$code.=<<___;
+       add     $t2,$a,$a               ! round $i
+       xor     $b,$t1,$t1
+        sethi  %hi(@K[$i+1]),$t2
+       add     $t1,$a,$a
+        or     $t2,%lo(@K[$i+1]),$t2
+       sll     $a,$rot,$t3
+        add    $xi,$t2,$t2             ! X[$j]+K[`$i+1`]
+       srl     $a,32-$rot,$a
+       add     $b,$t3,$t3
+        xor     $b,$c,$t1
+       add     $t3,$a,$a
+___
+}
+
+sub R3 {
+  my ($i,$a,$b,$c,$d) = @_;
+  my $rot = (6,10,15,21)[$i%4];
+  my $j   = (0+7*($i+1))%16;
+  my $xi  = @X[$j/2];
+
+$code.=<<___;
+       add     $t2,$a,$a               ! round $i
+___
+$code.=<<___ if ($j&1 && ($xi=$tx));
+        srlx   @X[$j/2],32,$xi         ! extract X[$j]
+___
+$code.=<<___;
+       orn     $b,$d,$t1
+        sethi  %hi(@K[$i+1]),$t2
+       xor     $c,$t1,$t1
+        or     $t2,%lo(@K[$i+1]),$t2
+       add     $t1,$a,$a
+       sll     $a,$rot,$t3
+        add    $xi,$t2,$t2             ! X[$j]+K[`$i+1`]
+       srl     $a,32-$rot,$a
+       add     $b,$t3,$t3
+       add     $t3,$a,$a
+___
+}
+
+$code.=<<___;
+#include "sparc_arch.h"
+
+#ifdef __arch64__
+.register      %g2,#scratch
+.register      %g3,#scratch
+#endif
+
+.section       ".text",#alloc,#execinstr
+
+#ifdef __PIC__
+SPARC_PIC_THUNK(%g1)
+#endif
+
+.globl md5_block_asm_data_order
+.align 32
+md5_block_asm_data_order:
+       SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
+       ld      [%g1+4],%g1             ! OPENSSL_sparcv9cap_P[1]
+
+       andcc   %g1, CFR_MD5, %g0
+       be      .Lsoftware
+       nop
+
+       mov     4, %g1
+       andcc   %o1, 0x7, %g0
+       lda     [%o0 + %g0]0x88, %f0            ! load context
+       lda     [%o0 + %g1]0x88, %f1
+       add     %o0, 8, %o0
+       lda     [%o0 + %g0]0x88, %f2
+       lda     [%o0 + %g1]0x88, %f3
+       bne,pn  %icc, .Lhwunaligned
+       sub     %o0, 8, %o0
+
+.Lhw_loop:
+       ldd     [%o1 + 0x00], %f8
+       ldd     [%o1 + 0x08], %f10
+       ldd     [%o1 + 0x10], %f12
+       ldd     [%o1 + 0x18], %f14
+       ldd     [%o1 + 0x20], %f16
+       ldd     [%o1 + 0x28], %f18
+       ldd     [%o1 + 0x30], %f20
+       subcc   %o2, 1, %o2             ! done yet? 
+       ldd     [%o1 + 0x38], %f22
+       add     %o1, 0x40, %o1
+       prefetch [%o1 + 63], 20
+
+       .word   0x81b02800              ! MD5
+
+       bne,pt  SIZE_T_CC, .Lhw_loop
+       nop
+
+.Lhwfinish:
+       sta     %f0, [%o0 + %g0]0x88    ! store context
+       sta     %f1, [%o0 + %g1]0x88
+       add     %o0, 8, %o0
+       sta     %f2, [%o0 + %g0]0x88
+       sta     %f3, [%o0 + %g1]0x88
+       retl
+       nop
+
+.align 8
+.Lhwunaligned:
+       alignaddr %o1, %g0, %o1
+
+       ldd     [%o1 + 0x00], %f10
+.Lhwunaligned_loop:
+       ldd     [%o1 + 0x08], %f12
+       ldd     [%o1 + 0x10], %f14
+       ldd     [%o1 + 0x18], %f16
+       ldd     [%o1 + 0x20], %f18
+       ldd     [%o1 + 0x28], %f20
+       ldd     [%o1 + 0x30], %f22
+       ldd     [%o1 + 0x38], %f24
+       subcc   %o2, 1, %o2             ! done yet?
+       ldd     [%o1 + 0x40], %f26
+       add     %o1, 0x40, %o1
+       prefetch [%o1 + 63], 20
+
+       faligndata %f10, %f12, %f8
+       faligndata %f12, %f14, %f10
+       faligndata %f14, %f16, %f12
+       faligndata %f16, %f18, %f14
+       faligndata %f18, %f20, %f16
+       faligndata %f20, %f22, %f18
+       faligndata %f22, %f24, %f20
+       faligndata %f24, %f26, %f22
+
+       .word   0x81b02800              ! MD5
+
+       bne,pt  SIZE_T_CC, .Lhwunaligned_loop
+       for     %f26, %f26, %f10        ! %f10=%f26
+
+       ba      .Lhwfinish
+       nop
+
+.align 16
+.Lsoftware:
+       save    %sp,-STACK_FRAME,%sp
+
+       rd      %asi,$saved_asi
+       wr      %g0,0x88,%asi           ! ASI_PRIMARY_LITTLE
+       and     $inp,7,$shr
+       andn    $inp,7,$inp
+
+       sll     $shr,3,$shr             ! *=8
+       mov     56,$shl2
+       ld      [$ctx+0],$A
+       sub     $shl2,$shr,$shl2
+       ld      [$ctx+4],$B
+       and     $shl2,32,$shl1
+       add     $shl2,8,$shl2
+       ld      [$ctx+8],$C
+       sub     $shl2,$shl1,$shl2       ! shr+shl1+shl2==64
+       ld      [$ctx+12],$D
+       nop
+
+.Loop:
+        cmp    $shr,0                  ! was inp aligned?
+       ldxa    [$inp+0]%asi,@X[0]      ! load little-endian input
+       ldxa    [$inp+8]%asi,@X[1]
+       ldxa    [$inp+16]%asi,@X[2]
+       ldxa    [$inp+24]%asi,@X[3]
+       ldxa    [$inp+32]%asi,@X[4]
+        sllx   $A,32,$AB               ! pack A,B
+       ldxa    [$inp+40]%asi,@X[5]
+        sllx   $C,32,$CD               ! pack C,D
+       ldxa    [$inp+48]%asi,@X[6]
+        or     $B,$AB,$AB
+       ldxa    [$inp+56]%asi,@X[7]
+        or     $D,$CD,$CD
+       bnz,a,pn        %icc,.+8
+       ldxa    [$inp+64]%asi,@X[8]
+
+       srlx    @X[0],$shr,@X[0]        ! align X[0]
+       sllx    @X[1],$shl1,$tx
+        sethi  %hi(@K[0]),$t2
+       sllx    $tx,$shl2,$tx
+        or     $t2,%lo(@K[0]),$t2
+       or      $tx,@X[0],@X[0]
+        xor    $C,$D,$t1
+        add    @X[0],$t2,$t2           ! X[0]+K[0]
+___
+       for ($i=0;$i<15;$i++)   { &R0($i,@V);   unshift(@V,pop(@V)); }
+       for (;$i<16;$i++)       { &R0_1($i,@V); unshift(@V,pop(@V)); }
+       for (;$i<32;$i++)       { &R1($i,@V);   unshift(@V,pop(@V)); }
+       for (;$i<48;$i++)       { &R2($i,@V);   unshift(@V,pop(@V)); }
+       for (;$i<64;$i++)       { &R3($i,@V);   unshift(@V,pop(@V)); }
+$code.=<<___;
+       srlx    $AB,32,$t1              ! unpack A,B,C,D and accumulate
+       add     $inp,64,$inp            ! advance inp
+       srlx    $CD,32,$t2
+       add     $t1,$A,$A
+       subcc   $len,1,$len             ! done yet?
+       add     $AB,$B,$B
+       add     $t2,$C,$C
+       add     $CD,$D,$D
+       srl     $B,0,$B                 ! clruw $B
+       bne     SIZE_T_CC,.Loop
+       srl     $D,0,$D                 ! clruw $D
+
+       st      $A,[$ctx+0]             ! write out ctx
+       st      $B,[$ctx+4]
+       st      $C,[$ctx+8]
+       st      $D,[$ctx+12]
+
+       wr      %g0,$saved_asi,%asi
+       ret
+       restore
+.type  md5_block_asm_data_order,#function
+.size  md5_block_asm_data_order,(.-md5_block_asm_data_order)
+
+.asciz "MD5 block transform for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>"
+.align 4
+___
+
+# Purpose of these subroutines is to explicitly encode VIS instructions,
+# so that one can compile the module without having to specify VIS
+# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
+# Idea is to reserve for option to produce "universal" binary and let
+# programmer detect if current CPU is VIS capable at run-time.
+sub unvis {
+my ($mnemonic,$rs1,$rs2,$rd)=@_;
+my $ref,$opf;
+my %visopf = ( "faligndata"    => 0x048,
+               "for"           => 0x07c        );
+
+    $ref = "$mnemonic\t$rs1,$rs2,$rd";
+
+    if ($opf=$visopf{$mnemonic}) {
+       foreach ($rs1,$rs2,$rd) {
+           return $ref if (!/%f([0-9]{1,2})/);
+           $_=$1;
+           if ($1>=32) {
+               return $ref if ($1&1);
+               # re-encode for upper double register addressing
+               $_=($1|$1>>5)&31;
+           }
+       }
+
+       return  sprintf ".word\t0x%08x !%s",
+                       0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2,
+                       $ref;
+    } else {
+       return $ref;
+    }
+}
+sub unalignaddr {
+my ($mnemonic,$rs1,$rs2,$rd)=@_;
+my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 );
+my $ref="$mnemonic\t$rs1,$rs2,$rd";
+
+    foreach ($rs1,$rs2,$rd) {
+       if (/%([goli])([0-7])/) { $_=$bias{$1}+$2; }
+       else                    { return $ref; }
+    }
+    return  sprintf ".word\t0x%08x !%s",
+                   0x81b00300|$rd<<25|$rs1<<14|$rs2,
+                   $ref;
+}
+
+foreach (split("\n",$code)) {
+       s/\`([^\`]*)\`/eval $1/ge;
+
+       s/\b(f[^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/
+               &unvis($1,$2,$3,$4)
+        /ge;
+       s/\b(alignaddr)\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/
+               &unalignaddr($1,$2,$3,$4)
+        /ge;
+
+       print $_,"\n";
+}
+
+close STDOUT;
index 5f6f2fd..82e6921 100644 (file)
@@ -71,6 +71,8 @@
 #  define md5_block_data_order md5_block_asm_data_order
 # elif defined(__ia64) || defined(__ia64__) || defined(_M_IA64)
 #  define md5_block_data_order md5_block_asm_data_order
+# elif defined(__sparc) || defined(__sparc__)
+#  define md5_block_data_order md5_block_asm_data_order
 # endif
 #endif
 
index e278fa6..a7863d9 100644 (file)
@@ -22,9 +22,9 @@ APPS=
 
 LIB=$(TOP)/libcrypto.a
 LIBSRC= cbc128.c ctr128.c cts128.c cfb128.c ofb128.c gcm128.c \
-       ccm128.c xts128.c
+       ccm128.c xts128.c wrap128.c
 LIBOBJ= cbc128.o ctr128.o cts128.o cfb128.o ofb128.o gcm128.o \
-       ccm128.o xts128.o $(MODES_ASM_OBJ)
+       ccm128.o xts128.o wrap128.o $(MODES_ASM_OBJ)
 
 SRC= $(LIBSRC)
 
@@ -50,20 +50,26 @@ ghash-x86.s:        asm/ghash-x86.pl
        $(PERL) asm/ghash-x86.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@
 ghash-x86_64.s:        asm/ghash-x86_64.pl
        $(PERL) asm/ghash-x86_64.pl $(PERLASM_SCHEME) > $@
+aesni-gcm-x86_64.s:    asm/aesni-gcm-x86_64.pl
+       $(PERL) asm/aesni-gcm-x86_64.pl $(PERLASM_SCHEME) > $@
 ghash-sparcv9.s:       asm/ghash-sparcv9.pl
        $(PERL) asm/ghash-sparcv9.pl $@ $(CFLAGS)
 ghash-alpha.s: asm/ghash-alpha.pl
-       (preproc=/tmp/$$$$.$@; trap "rm $$preproc" INT; \
+       (preproc=$$$$.$@.S; trap "rm $$preproc" INT; \
        $(PERL) asm/ghash-alpha.pl > $$preproc && \
-       $(CC) -E $$preproc > $@ && rm $$preproc)
-
+       $(CC) -E -P $$preproc > $@ && rm $$preproc)
 ghash-parisc.s:        asm/ghash-parisc.pl
        $(PERL) asm/ghash-parisc.pl $(PERLASM_SCHEME) $@
+ghashv8-armx.S:        asm/ghashv8-armx.pl
+       $(PERL) asm/ghashv8-armx.pl $(PERLASM_SCHEME) $@
+ghashp8-ppc.s: asm/ghashp8-ppc.pl
+       $(PERL) asm/ghashp8-ppc.pl $(PERLASM_SCHEME) $@
 
 # GNU make "catch all"
 ghash-%.S:     asm/ghash-%.pl; $(PERL) $< $(PERLASM_SCHEME) $@
 
 ghash-armv4.o: ghash-armv4.S
+ghashv8-armx.o:        ghashv8-armx.S
 
 files:
        $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
@@ -139,6 +145,14 @@ ofb128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
 ofb128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
 ofb128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
 ofb128.o: ../../include/openssl/symhacks.h modes_lcl.h ofb128.c
+wrap128.o: ../../e_os.h ../../include/openssl/bio.h
+wrap128.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
+wrap128.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
+wrap128.o: ../../include/openssl/lhash.h ../../include/openssl/modes.h
+wrap128.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
+wrap128.o: ../../include/openssl/ossl_typ.h ../../include/openssl/safestack.h
+wrap128.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
+wrap128.o: ../cryptlib.h wrap128.c
 xts128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
 xts128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h
 xts128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
diff --git a/crypto/modes/asm/aesni-gcm-x86_64.pl b/crypto/modes/asm/aesni-gcm-x86_64.pl
new file mode 100644 (file)
index 0000000..7e4e04e
--- /dev/null
@@ -0,0 +1,1057 @@
+#!/usr/bin/env perl
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+#
+# AES-NI-CTR+GHASH stitch.
+#
+# February 2013
+#
+# OpenSSL GCM implementation is organized in such way that its
+# performance is rather close to the sum of its streamed components,
+# in the context parallelized AES-NI CTR and modulo-scheduled
+# PCLMULQDQ-enabled GHASH. Unfortunately, as no stitch implementation
+# was observed to perform significantly better than the sum of the
+# components on contemporary CPUs, the effort was deemed impossible to
+# justify. This module is based on combination of Intel submissions,
+# [1] and [2], with MOVBE twist suggested by Ilya Albrekht and Max
+# Locktyukhin of Intel Corp. who verified that it reduces shuffles
+# pressure with notable relative improvement, achieving 1.0 cycle per
+# byte processed with 128-bit key on Haswell processor, and 0.74 -
+# on Broadwell. [Mentioned results are raw profiled measurements for
+# favourable packet size, one divisible by 96. Applications using the
+# EVP interface will observe a few percent worse performance.]
+#
+# [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest
+# [2] http://www.intel.com/content/dam/www/public/us/en/documents/software-support/enabling-high-performance-gcm.pdf
+
+$flavour = shift;
+$output  = shift;
+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
+
+$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
+die "can't locate x86_64-xlate.pl";
+
+if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
+               =~ /GNU assembler version ([2-9]\.[0-9]+)/) {
+       $avx = ($1>=2.19) + ($1>=2.22);
+}
+
+if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
+           `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) {
+       $avx = ($1>=2.09) + ($1>=2.10);
+}
+
+if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
+           `ml64 2>&1` =~ /Version ([0-9]+)\./) {
+       $avx = ($1>=10) + ($1>=11);
+}
+
+if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/) {
+       $avx = ($2>=3.0) + ($2>3.0);
+}
+
+open OUT,"| \"$^X\" $xlate $flavour $output";
+*STDOUT=*OUT;
+
+if ($avx>1) {{{
+
+($inp,$out,$len,$key,$ivp,$Xip)=("%rdi","%rsi","%rdx","%rcx","%r8","%r9");
+
+($Ii,$T1,$T2,$Hkey,
+ $Z0,$Z1,$Z2,$Z3,$Xi) = map("%xmm$_",(0..8));
+
+($inout0,$inout1,$inout2,$inout3,$inout4,$inout5,$rndkey) = map("%xmm$_",(9..15));
+
+($counter,$rounds,$ret,$const,$in0,$end0)=("%ebx","%ebp","%r10","%r11","%r14","%r15");
+
+$code=<<___;
+.text
+
+.type  _aesni_ctr32_ghash_6x,\@abi-omnipotent
+.align 32
+_aesni_ctr32_ghash_6x:
+       vmovdqu         0x20($const),$T2        # borrow $T2, .Lone_msb
+       sub             \$6,$len
+       vpxor           $Z0,$Z0,$Z0             # $Z0   = 0
+       vmovdqu         0x00-0x80($key),$rndkey
+       vpaddb          $T2,$T1,$inout1
+       vpaddb          $T2,$inout1,$inout2
+       vpaddb          $T2,$inout2,$inout3
+       vpaddb          $T2,$inout3,$inout4
+       vpaddb          $T2,$inout4,$inout5
+       vpxor           $rndkey,$T1,$inout0
+       vmovdqu         $Z0,16+8(%rsp)          # "$Z3" = 0
+       jmp             .Loop6x
+
+.align 32
+.Loop6x:
+       add             \$`6<<24`,$counter
+       jc              .Lhandle_ctr32          # discard $inout[1-5]?
+       vmovdqu         0x00-0x20($Xip),$Hkey   # $Hkey^1
+         vpaddb        $T2,$inout5,$T1         # next counter value
+         vpxor         $rndkey,$inout1,$inout1
+         vpxor         $rndkey,$inout2,$inout2
+
+.Lresume_ctr32:
+       vmovdqu         $T1,($ivp)              # save next counter value
+       vpclmulqdq      \$0x10,$Hkey,$Z3,$Z1
+         vpxor         $rndkey,$inout3,$inout3
+         vmovups       0x10-0x80($key),$T2     # borrow $T2 for $rndkey
+       vpclmulqdq      \$0x01,$Hkey,$Z3,$Z2
+       xor             %r12,%r12
+       cmp             $in0,$end0
+
+         vaesenc       $T2,$inout0,$inout0
+       vmovdqu         0x30+8(%rsp),$Ii        # I[4]
+         vpxor         $rndkey,$inout4,$inout4
+       vpclmulqdq      \$0x00,$Hkey,$Z3,$T1
+         vaesenc       $T2,$inout1,$inout1
+         vpxor         $rndkey,$inout5,$inout5
+       setnc           %r12b
+       vpclmulqdq      \$0x11,$Hkey,$Z3,$Z3
+         vaesenc       $T2,$inout2,$inout2
+       vmovdqu         0x10-0x20($Xip),$Hkey   # $Hkey^2
+       neg             %r12
+         vaesenc       $T2,$inout3,$inout3
+        vpxor          $Z1,$Z2,$Z2
+       vpclmulqdq      \$0x00,$Hkey,$Ii,$Z1
+        vpxor          $Z0,$Xi,$Xi             # modulo-scheduled
+         vaesenc       $T2,$inout4,$inout4
+        vpxor          $Z1,$T1,$Z0
+       and             \$0x60,%r12
+         vmovups       0x20-0x80($key),$rndkey
+       vpclmulqdq      \$0x10,$Hkey,$Ii,$T1
+         vaesenc       $T2,$inout5,$inout5
+
+       vpclmulqdq      \$0x01,$Hkey,$Ii,$T2
+       lea             ($in0,%r12),$in0
+         vaesenc       $rndkey,$inout0,$inout0
+        vpxor          16+8(%rsp),$Xi,$Xi      # modulo-scheduled [vpxor $Z3,$Xi,$Xi]
+       vpclmulqdq      \$0x11,$Hkey,$Ii,$Hkey
+        vmovdqu        0x40+8(%rsp),$Ii        # I[3]
+         vaesenc       $rndkey,$inout1,$inout1
+       movbe           0x58($in0),%r13
+         vaesenc       $rndkey,$inout2,$inout2
+       movbe           0x50($in0),%r12
+         vaesenc       $rndkey,$inout3,$inout3
+       mov             %r13,0x20+8(%rsp)
+         vaesenc       $rndkey,$inout4,$inout4
+       mov             %r12,0x28+8(%rsp)
+       vmovdqu         0x30-0x20($Xip),$Z1     # borrow $Z1 for $Hkey^3
+         vaesenc       $rndkey,$inout5,$inout5
+
+         vmovups       0x30-0x80($key),$rndkey
+        vpxor          $T1,$Z2,$Z2
+       vpclmulqdq      \$0x00,$Z1,$Ii,$T1
+         vaesenc       $rndkey,$inout0,$inout0
+        vpxor          $T2,$Z2,$Z2
+       vpclmulqdq      \$0x10,$Z1,$Ii,$T2
+         vaesenc       $rndkey,$inout1,$inout1
+        vpxor          $Hkey,$Z3,$Z3
+       vpclmulqdq      \$0x01,$Z1,$Ii,$Hkey
+         vaesenc       $rndkey,$inout2,$inout2
+       vpclmulqdq      \$0x11,$Z1,$Ii,$Z1
+        vmovdqu        0x50+8(%rsp),$Ii        # I[2]
+         vaesenc       $rndkey,$inout3,$inout3
+         vaesenc       $rndkey,$inout4,$inout4
+        vpxor          $T1,$Z0,$Z0
+       vmovdqu         0x40-0x20($Xip),$T1     # borrow $T1 for $Hkey^4
+         vaesenc       $rndkey,$inout5,$inout5
+
+         vmovups       0x40-0x80($key),$rndkey
+        vpxor          $T2,$Z2,$Z2
+       vpclmulqdq      \$0x00,$T1,$Ii,$T2
+         vaesenc       $rndkey,$inout0,$inout0
+        vpxor          $Hkey,$Z2,$Z2
+       vpclmulqdq      \$0x10,$T1,$Ii,$Hkey
+         vaesenc       $rndkey,$inout1,$inout1
+       movbe           0x48($in0),%r13
+        vpxor          $Z1,$Z3,$Z3
+       vpclmulqdq      \$0x01,$T1,$Ii,$Z1
+         vaesenc       $rndkey,$inout2,$inout2
+       movbe           0x40($in0),%r12
+       vpclmulqdq      \$0x11,$T1,$Ii,$T1
+        vmovdqu        0x60+8(%rsp),$Ii        # I[1]
+         vaesenc       $rndkey,$inout3,$inout3
+       mov             %r13,0x30+8(%rsp)
+         vaesenc       $rndkey,$inout4,$inout4
+       mov             %r12,0x38+8(%rsp)
+        vpxor          $T2,$Z0,$Z0
+       vmovdqu         0x60-0x20($Xip),$T2     # borrow $T2 for $Hkey^5
+         vaesenc       $rndkey,$inout5,$inout5
+
+         vmovups       0x50-0x80($key),$rndkey
+        vpxor          $Hkey,$Z2,$Z2
+       vpclmulqdq      \$0x00,$T2,$Ii,$Hkey
+         vaesenc       $rndkey,$inout0,$inout0
+        vpxor          $Z1,$Z2,$Z2
+       vpclmulqdq      \$0x10,$T2,$Ii,$Z1
+         vaesenc       $rndkey,$inout1,$inout1
+       movbe           0x38($in0),%r13
+        vpxor          $T1,$Z3,$Z3
+       vpclmulqdq      \$0x01,$T2,$Ii,$T1
+        vpxor          0x70+8(%rsp),$Xi,$Xi    # accumulate I[0]
+         vaesenc       $rndkey,$inout2,$inout2
+       movbe           0x30($in0),%r12
+       vpclmulqdq      \$0x11,$T2,$Ii,$T2
+         vaesenc       $rndkey,$inout3,$inout3
+       mov             %r13,0x40+8(%rsp)
+         vaesenc       $rndkey,$inout4,$inout4
+       mov             %r12,0x48+8(%rsp)
+        vpxor          $Hkey,$Z0,$Z0
+        vmovdqu        0x70-0x20($Xip),$Hkey   # $Hkey^6
+         vaesenc       $rndkey,$inout5,$inout5
+
+         vmovups       0x60-0x80($key),$rndkey
+        vpxor          $Z1,$Z2,$Z2
+       vpclmulqdq      \$0x10,$Hkey,$Xi,$Z1
+         vaesenc       $rndkey,$inout0,$inout0
+        vpxor          $T1,$Z2,$Z2
+       vpclmulqdq      \$0x01,$Hkey,$Xi,$T1
+         vaesenc       $rndkey,$inout1,$inout1
+       movbe           0x28($in0),%r13
+        vpxor          $T2,$Z3,$Z3
+       vpclmulqdq      \$0x00,$Hkey,$Xi,$T2
+         vaesenc       $rndkey,$inout2,$inout2
+       movbe           0x20($in0),%r12
+       vpclmulqdq      \$0x11,$Hkey,$Xi,$Xi
+         vaesenc       $rndkey,$inout3,$inout3
+       mov             %r13,0x50+8(%rsp)
+         vaesenc       $rndkey,$inout4,$inout4
+       mov             %r12,0x58+8(%rsp)
+       vpxor           $Z1,$Z2,$Z2
+         vaesenc       $rndkey,$inout5,$inout5
+       vpxor           $T1,$Z2,$Z2
+
+         vmovups       0x70-0x80($key),$rndkey
+       vpslldq         \$8,$Z2,$Z1
+       vpxor           $T2,$Z0,$Z0
+       vmovdqu         0x10($const),$Hkey      # .Lpoly
+
+         vaesenc       $rndkey,$inout0,$inout0
+       vpxor           $Xi,$Z3,$Z3
+         vaesenc       $rndkey,$inout1,$inout1
+       vpxor           $Z1,$Z0,$Z0
+       movbe           0x18($in0),%r13
+         vaesenc       $rndkey,$inout2,$inout2
+       movbe           0x10($in0),%r12
+       vpalignr        \$8,$Z0,$Z0,$Ii         # 1st phase
+       vpclmulqdq      \$0x10,$Hkey,$Z0,$Z0
+       mov             %r13,0x60+8(%rsp)
+         vaesenc       $rndkey,$inout3,$inout3
+       mov             %r12,0x68+8(%rsp)
+         vaesenc       $rndkey,$inout4,$inout4
+         vmovups       0x80-0x80($key),$T1     # borrow $T1 for $rndkey
+         vaesenc       $rndkey,$inout5,$inout5
+
+         vaesenc       $T1,$inout0,$inout0
+         vmovups       0x90-0x80($key),$rndkey
+         vaesenc       $T1,$inout1,$inout1
+       vpsrldq         \$8,$Z2,$Z2
+         vaesenc       $T1,$inout2,$inout2
+       vpxor           $Z2,$Z3,$Z3
+         vaesenc       $T1,$inout3,$inout3
+       vpxor           $Ii,$Z0,$Z0
+       movbe           0x08($in0),%r13
+         vaesenc       $T1,$inout4,$inout4
+       movbe           0x00($in0),%r12
+         vaesenc       $T1,$inout5,$inout5
+         vmovups       0xa0-0x80($key),$T1
+         cmp           \$11,$rounds
+         jb            .Lenc_tail              # 128-bit key
+
+         vaesenc       $rndkey,$inout0,$inout0
+         vaesenc       $rndkey,$inout1,$inout1
+         vaesenc       $rndkey,$inout2,$inout2
+         vaesenc       $rndkey,$inout3,$inout3
+         vaesenc       $rndkey,$inout4,$inout4
+         vaesenc       $rndkey,$inout5,$inout5
+
+         vaesenc       $T1,$inout0,$inout0
+         vaesenc       $T1,$inout1,$inout1
+         vaesenc       $T1,$inout2,$inout2
+         vaesenc       $T1,$inout3,$inout3
+         vaesenc       $T1,$inout4,$inout4
+         vmovups       0xb0-0x80($key),$rndkey
+         vaesenc       $T1,$inout5,$inout5
+         vmovups       0xc0-0x80($key),$T1
+         je            .Lenc_tail              # 192-bit key
+
+         vaesenc       $rndkey,$inout0,$inout0
+         vaesenc       $rndkey,$inout1,$inout1
+         vaesenc       $rndkey,$inout2,$inout2
+         vaesenc       $rndkey,$inout3,$inout3
+         vaesenc       $rndkey,$inout4,$inout4
+         vaesenc       $rndkey,$inout5,$inout5
+
+         vaesenc       $T1,$inout0,$inout0
+         vaesenc       $T1,$inout1,$inout1
+         vaesenc       $T1,$inout2,$inout2
+         vaesenc       $T1,$inout3,$inout3
+         vaesenc       $T1,$inout4,$inout4
+         vmovups       0xd0-0x80($key),$rndkey
+         vaesenc       $T1,$inout5,$inout5
+         vmovups       0xe0-0x80($key),$T1
+         jmp           .Lenc_tail              # 256-bit key
+
+.align 32
+.Lhandle_ctr32:
+       vmovdqu         ($const),$Ii            # borrow $Ii for .Lbswap_mask
+         vpshufb       $Ii,$T1,$Z2             # byte-swap counter
+         vmovdqu       0x30($const),$Z1        # borrow $Z1, .Ltwo_lsb
+         vpaddd        0x40($const),$Z2,$inout1        # .Lone_lsb
+         vpaddd        $Z1,$Z2,$inout2
+       vmovdqu         0x00-0x20($Xip),$Hkey   # $Hkey^1
+         vpaddd        $Z1,$inout1,$inout3
+         vpshufb       $Ii,$inout1,$inout1
+         vpaddd        $Z1,$inout2,$inout4
+         vpshufb       $Ii,$inout2,$inout2
+         vpxor         $rndkey,$inout1,$inout1
+         vpaddd        $Z1,$inout3,$inout5
+         vpshufb       $Ii,$inout3,$inout3
+         vpxor         $rndkey,$inout2,$inout2
+         vpaddd        $Z1,$inout4,$T1         # byte-swapped next counter value
+         vpshufb       $Ii,$inout4,$inout4
+         vpshufb       $Ii,$inout5,$inout5
+         vpshufb       $Ii,$T1,$T1             # next counter value
+       jmp             .Lresume_ctr32
+
+.align 32
+.Lenc_tail:
+         vaesenc       $rndkey,$inout0,$inout0
+       vmovdqu         $Z3,16+8(%rsp)          # postpone vpxor $Z3,$Xi,$Xi
+       vpalignr        \$8,$Z0,$Z0,$Xi         # 2nd phase
+         vaesenc       $rndkey,$inout1,$inout1
+       vpclmulqdq      \$0x10,$Hkey,$Z0,$Z0
+         vpxor         0x00($inp),$T1,$T2
+         vaesenc       $rndkey,$inout2,$inout2
+         vpxor         0x10($inp),$T1,$Ii
+         vaesenc       $rndkey,$inout3,$inout3
+         vpxor         0x20($inp),$T1,$Z1
+         vaesenc       $rndkey,$inout4,$inout4
+         vpxor         0x30($inp),$T1,$Z2
+         vaesenc       $rndkey,$inout5,$inout5
+         vpxor         0x40($inp),$T1,$Z3
+         vpxor         0x50($inp),$T1,$Hkey
+         vmovdqu       ($ivp),$T1              # load next counter value
+
+         vaesenclast   $T2,$inout0,$inout0
+         vmovdqu       0x20($const),$T2        # borrow $T2, .Lone_msb
+         vaesenclast   $Ii,$inout1,$inout1
+        vpaddb         $T2,$T1,$Ii
+       mov             %r13,0x70+8(%rsp)
+       lea             0x60($inp),$inp
+         vaesenclast   $Z1,$inout2,$inout2
+        vpaddb         $T2,$Ii,$Z1
+       mov             %r12,0x78+8(%rsp)
+       lea             0x60($out),$out
+         vmovdqu       0x00-0x80($key),$rndkey
+         vaesenclast   $Z2,$inout3,$inout3
+        vpaddb         $T2,$Z1,$Z2
+         vaesenclast   $Z3, $inout4,$inout4
+        vpaddb         $T2,$Z2,$Z3
+         vaesenclast   $Hkey,$inout5,$inout5
+        vpaddb         $T2,$Z3,$Hkey
+
+       add             \$0x60,$ret
+       sub             \$0x6,$len
+       jc              .L6x_done
+
+         vmovups       $inout0,-0x60($out)     # save output
+        vpxor          $rndkey,$T1,$inout0
+         vmovups       $inout1,-0x50($out)
+        vmovdqa        $Ii,$inout1             # 0 latency
+         vmovups       $inout2,-0x40($out)
+        vmovdqa        $Z1,$inout2             # 0 latency
+         vmovups       $inout3,-0x30($out)
+        vmovdqa        $Z2,$inout3             # 0 latency
+         vmovups       $inout4,-0x20($out)
+        vmovdqa        $Z3,$inout4             # 0 latency
+         vmovups       $inout5,-0x10($out)
+        vmovdqa        $Hkey,$inout5           # 0 latency
+       vmovdqu         0x20+8(%rsp),$Z3        # I[5]
+       jmp             .Loop6x
+
+.L6x_done:
+       vpxor           16+8(%rsp),$Xi,$Xi      # modulo-scheduled
+       vpxor           $Z0,$Xi,$Xi             # modulo-scheduled
+
+       ret
+.size  _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x
+___
+######################################################################
+#
+# size_t aesni_gcm_[en|de]crypt(const void *inp, void *out, size_t len,
+#              const AES_KEY *key, unsigned char iv[16],
+#              struct { u128 Xi,H,Htbl[9]; } *Xip);
+$code.=<<___;
+.globl aesni_gcm_decrypt
+.type  aesni_gcm_decrypt,\@function,6
+.align 32
+aesni_gcm_decrypt:
+       xor     $ret,$ret
+       cmp     \$0x60,$len                     # minimal accepted length
+       jb      .Lgcm_dec_abort
+
+       lea     (%rsp),%rax                     # save stack pointer
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+___
+$code.=<<___ if ($win64);
+       lea     -0xa8(%rsp),%rsp
+       movaps  %xmm6,-0xd8(%rax)
+       movaps  %xmm7,-0xc8(%rax)
+       movaps  %xmm8,-0xb8(%rax)
+       movaps  %xmm9,-0xa8(%rax)
+       movaps  %xmm10,-0x98(%rax)
+       movaps  %xmm11,-0x88(%rax)
+       movaps  %xmm12,-0x78(%rax)
+       movaps  %xmm13,-0x68(%rax)
+       movaps  %xmm14,-0x58(%rax)
+       movaps  %xmm15,-0x48(%rax)
+.Lgcm_dec_body:
+___
+$code.=<<___;
+       vzeroupper
+
+       vmovdqu         ($ivp),$T1              # input counter value
+       add             \$-128,%rsp
+       mov             12($ivp),$counter
+       lea             .Lbswap_mask(%rip),$const
+       lea             -0x80($key),$in0        # borrow $in0
+       mov             \$0xf80,$end0           # borrow $end0
+       vmovdqu         ($Xip),$Xi              # load Xi
+       and             \$-128,%rsp             # ensure stack alignment
+       vmovdqu         ($const),$Ii            # borrow $Ii for .Lbswap_mask
+       lea             0x80($key),$key         # size optimization
+       lea             0x20+0x20($Xip),$Xip    # size optimization
+       mov             0xf0-0x80($key),$rounds
+       vpshufb         $Ii,$Xi,$Xi
+
+       and             $end0,$in0
+       and             %rsp,$end0
+       sub             $in0,$end0
+       jc              .Ldec_no_key_aliasing
+       cmp             \$768,$end0
+       jnc             .Ldec_no_key_aliasing
+       sub             $end0,%rsp              # avoid aliasing with key
+.Ldec_no_key_aliasing:
+
+       vmovdqu         0x50($inp),$Z3          # I[5]
+       lea             ($inp),$in0
+       vmovdqu         0x40($inp),$Z0
+       lea             -0xc0($inp,$len),$end0
+       vmovdqu         0x30($inp),$Z1
+       shr             \$4,$len
+       xor             $ret,$ret
+       vmovdqu         0x20($inp),$Z2
+        vpshufb        $Ii,$Z3,$Z3             # passed to _aesni_ctr32_ghash_6x
+       vmovdqu         0x10($inp),$T2
+        vpshufb        $Ii,$Z0,$Z0
+       vmovdqu         ($inp),$Hkey
+        vpshufb        $Ii,$Z1,$Z1
+       vmovdqu         $Z0,0x30(%rsp)
+        vpshufb        $Ii,$Z2,$Z2
+       vmovdqu         $Z1,0x40(%rsp)
+        vpshufb        $Ii,$T2,$T2
+       vmovdqu         $Z2,0x50(%rsp)
+        vpshufb        $Ii,$Hkey,$Hkey
+       vmovdqu         $T2,0x60(%rsp)
+       vmovdqu         $Hkey,0x70(%rsp)
+
+       call            _aesni_ctr32_ghash_6x
+
+       vmovups         $inout0,-0x60($out)     # save output
+       vmovups         $inout1,-0x50($out)
+       vmovups         $inout2,-0x40($out)
+       vmovups         $inout3,-0x30($out)
+       vmovups         $inout4,-0x20($out)
+       vmovups         $inout5,-0x10($out)
+
+       vpshufb         ($const),$Xi,$Xi        # .Lbswap_mask
+       vmovdqu         $Xi,-0x40($Xip)         # output Xi
+
+       vzeroupper
+___
+$code.=<<___ if ($win64);
+       movaps  -0xd8(%rax),%xmm6
+       movaps  -0xd8(%rax),%xmm7
+       movaps  -0xb8(%rax),%xmm8
+       movaps  -0xa8(%rax),%xmm9
+       movaps  -0x98(%rax),%xmm10
+       movaps  -0x88(%rax),%xmm11
+       movaps  -0x78(%rax),%xmm12
+       movaps  -0x68(%rax),%xmm13
+       movaps  -0x58(%rax),%xmm14
+       movaps  -0x48(%rax),%xmm15
+___
+$code.=<<___;
+       mov     -48(%rax),%r15
+       mov     -40(%rax),%r14
+       mov     -32(%rax),%r13
+       mov     -24(%rax),%r12
+       mov     -16(%rax),%rbp
+       mov     -8(%rax),%rbx
+       lea     (%rax),%rsp             # restore %rsp
+.Lgcm_dec_abort:
+       mov     $ret,%rax               # return value
+       ret
+.size  aesni_gcm_decrypt,.-aesni_gcm_decrypt
+___
+
+$code.=<<___;
+.type  _aesni_ctr32_6x,\@abi-omnipotent
+.align 32
+_aesni_ctr32_6x:
+       vmovdqu         0x00-0x80($key),$Z0     # borrow $Z0 for $rndkey
+       vmovdqu         0x20($const),$T2        # borrow $T2, .Lone_msb
+       lea             -1($rounds),%r13
+       vmovups         0x10-0x80($key),$rndkey
+       lea             0x20-0x80($key),%r12
+       vpxor           $Z0,$T1,$inout0
+       add             \$`6<<24`,$counter
+       jc              .Lhandle_ctr32_2
+       vpaddb          $T2,$T1,$inout1
+       vpaddb          $T2,$inout1,$inout2
+       vpxor           $Z0,$inout1,$inout1
+       vpaddb          $T2,$inout2,$inout3
+       vpxor           $Z0,$inout2,$inout2
+       vpaddb          $T2,$inout3,$inout4
+       vpxor           $Z0,$inout3,$inout3
+       vpaddb          $T2,$inout4,$inout5
+       vpxor           $Z0,$inout4,$inout4
+       vpaddb          $T2,$inout5,$T1
+       vpxor           $Z0,$inout5,$inout5
+       jmp             .Loop_ctr32
+
+.align 16
+.Loop_ctr32:
+       vaesenc         $rndkey,$inout0,$inout0
+       vaesenc         $rndkey,$inout1,$inout1
+       vaesenc         $rndkey,$inout2,$inout2
+       vaesenc         $rndkey,$inout3,$inout3
+       vaesenc         $rndkey,$inout4,$inout4
+       vaesenc         $rndkey,$inout5,$inout5
+       vmovups         (%r12),$rndkey
+       lea             0x10(%r12),%r12
+       dec             %r13d
+       jnz             .Loop_ctr32
+
+       vmovdqu         (%r12),$Hkey            # last round key
+       vaesenc         $rndkey,$inout0,$inout0
+       vpxor           0x00($inp),$Hkey,$Z0
+       vaesenc         $rndkey,$inout1,$inout1
+       vpxor           0x10($inp),$Hkey,$Z1
+       vaesenc         $rndkey,$inout2,$inout2
+       vpxor           0x20($inp),$Hkey,$Z2
+       vaesenc         $rndkey,$inout3,$inout3
+       vpxor           0x30($inp),$Hkey,$Xi
+       vaesenc         $rndkey,$inout4,$inout4
+       vpxor           0x40($inp),$Hkey,$T2
+       vaesenc         $rndkey,$inout5,$inout5
+       vpxor           0x50($inp),$Hkey,$Hkey
+       lea             0x60($inp),$inp
+
+       vaesenclast     $Z0,$inout0,$inout0
+       vaesenclast     $Z1,$inout1,$inout1
+       vaesenclast     $Z2,$inout2,$inout2
+       vaesenclast     $Xi,$inout3,$inout3
+       vaesenclast     $T2,$inout4,$inout4
+       vaesenclast     $Hkey,$inout5,$inout5
+       vmovups         $inout0,0x00($out)
+       vmovups         $inout1,0x10($out)
+       vmovups         $inout2,0x20($out)
+       vmovups         $inout3,0x30($out)
+       vmovups         $inout4,0x40($out)
+       vmovups         $inout5,0x50($out)
+       lea             0x60($out),$out
+
+       ret
+.align 32
+.Lhandle_ctr32_2:
+       vpshufb         $Ii,$T1,$Z2             # byte-swap counter
+       vmovdqu         0x30($const),$Z1        # borrow $Z1, .Ltwo_lsb
+       vpaddd          0x40($const),$Z2,$inout1        # .Lone_lsb
+       vpaddd          $Z1,$Z2,$inout2
+       vpaddd          $Z1,$inout1,$inout3
+       vpshufb         $Ii,$inout1,$inout1
+       vpaddd          $Z1,$inout2,$inout4
+       vpshufb         $Ii,$inout2,$inout2
+       vpxor           $Z0,$inout1,$inout1
+       vpaddd          $Z1,$inout3,$inout5
+       vpshufb         $Ii,$inout3,$inout3
+       vpxor           $Z0,$inout2,$inout2
+       vpaddd          $Z1,$inout4,$T1         # byte-swapped next counter value
+       vpshufb         $Ii,$inout4,$inout4
+       vpxor           $Z0,$inout3,$inout3
+       vpshufb         $Ii,$inout5,$inout5
+       vpxor           $Z0,$inout4,$inout4
+       vpshufb         $Ii,$T1,$T1             # next counter value
+       vpxor           $Z0,$inout5,$inout5
+       jmp     .Loop_ctr32
+.size  _aesni_ctr32_6x,.-_aesni_ctr32_6x
+
+.globl aesni_gcm_encrypt
+.type  aesni_gcm_encrypt,\@function,6
+.align 32
+aesni_gcm_encrypt:
+       xor     $ret,$ret
+       cmp     \$0x60*3,$len                   # minimal accepted length
+       jb      .Lgcm_enc_abort
+
+       lea     (%rsp),%rax                     # save stack pointer
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+___
+$code.=<<___ if ($win64);
+       lea     -0xa8(%rsp),%rsp
+       movaps  %xmm6,-0xd8(%rax)
+       movaps  %xmm7,-0xc8(%rax)
+       movaps  %xmm8,-0xb8(%rax)
+       movaps  %xmm9,-0xa8(%rax)
+       movaps  %xmm10,-0x98(%rax)
+       movaps  %xmm11,-0x88(%rax)
+       movaps  %xmm12,-0x78(%rax)
+       movaps  %xmm13,-0x68(%rax)
+       movaps  %xmm14,-0x58(%rax)
+       movaps  %xmm15,-0x48(%rax)
+.Lgcm_enc_body:
+___
+$code.=<<___;
+       vzeroupper
+
+       vmovdqu         ($ivp),$T1              # input counter value
+       add             \$-128,%rsp
+       mov             12($ivp),$counter
+       lea             .Lbswap_mask(%rip),$const
+       lea             -0x80($key),$in0        # borrow $in0
+       mov             \$0xf80,$end0           # borrow $end0
+       lea             0x80($key),$key         # size optimization
+       vmovdqu         ($const),$Ii            # borrow $Ii for .Lbswap_mask
+       and             \$-128,%rsp             # ensure stack alignment
+       mov             0xf0-0x80($key),$rounds
+
+       and             $end0,$in0
+       and             %rsp,$end0
+       sub             $in0,$end0
+       jc              .Lenc_no_key_aliasing
+       cmp             \$768,$end0
+       jnc             .Lenc_no_key_aliasing
+       sub             $end0,%rsp              # avoid aliasing with key
+.Lenc_no_key_aliasing:
+
+       lea             ($out),$in0
+       lea             -0xc0($out,$len),$end0
+       shr             \$4,$len
+
+       call            _aesni_ctr32_6x
+       vpshufb         $Ii,$inout0,$Xi         # save bswapped output on stack
+       vpshufb         $Ii,$inout1,$T2
+       vmovdqu         $Xi,0x70(%rsp)
+       vpshufb         $Ii,$inout2,$Z0
+       vmovdqu         $T2,0x60(%rsp)
+       vpshufb         $Ii,$inout3,$Z1
+       vmovdqu         $Z0,0x50(%rsp)
+       vpshufb         $Ii,$inout4,$Z2
+       vmovdqu         $Z1,0x40(%rsp)
+       vpshufb         $Ii,$inout5,$Z3         # passed to _aesni_ctr32_ghash_6x
+       vmovdqu         $Z2,0x30(%rsp)
+
+       call            _aesni_ctr32_6x
+
+       vmovdqu         ($Xip),$Xi              # load Xi
+       lea             0x20+0x20($Xip),$Xip    # size optimization
+       sub             \$12,$len
+       mov             \$0x60*2,$ret
+       vpshufb         $Ii,$Xi,$Xi
+
+       call            _aesni_ctr32_ghash_6x
+       vmovdqu         0x20(%rsp),$Z3          # I[5]
+        vmovdqu        ($const),$Ii            # borrow $Ii for .Lbswap_mask
+       vmovdqu         0x00-0x20($Xip),$Hkey   # $Hkey^1
+       vpunpckhqdq     $Z3,$Z3,$T1
+       vmovdqu         0x20-0x20($Xip),$rndkey # borrow $rndkey for $HK
+        vmovups        $inout0,-0x60($out)     # save output
+        vpshufb        $Ii,$inout0,$inout0     # but keep bswapped copy
+       vpxor           $Z3,$T1,$T1
+        vmovups        $inout1,-0x50($out)
+        vpshufb        $Ii,$inout1,$inout1
+        vmovups        $inout2,-0x40($out)
+        vpshufb        $Ii,$inout2,$inout2
+        vmovups        $inout3,-0x30($out)
+        vpshufb        $Ii,$inout3,$inout3
+        vmovups        $inout4,-0x20($out)
+        vpshufb        $Ii,$inout4,$inout4
+        vmovups        $inout5,-0x10($out)
+        vpshufb        $Ii,$inout5,$inout5
+        vmovdqu        $inout0,0x10(%rsp)      # free $inout0
+___
+{ my ($HK,$T3)=($rndkey,$inout0);
+
+$code.=<<___;
+        vmovdqu        0x30(%rsp),$Z2          # I[4]
+        vmovdqu        0x10-0x20($Xip),$Ii     # borrow $Ii for $Hkey^2
+        vpunpckhqdq    $Z2,$Z2,$T2
+       vpclmulqdq      \$0x00,$Hkey,$Z3,$Z1
+        vpxor          $Z2,$T2,$T2
+       vpclmulqdq      \$0x11,$Hkey,$Z3,$Z3
+       vpclmulqdq      \$0x00,$HK,$T1,$T1
+
+        vmovdqu        0x40(%rsp),$T3          # I[3]
+       vpclmulqdq      \$0x00,$Ii,$Z2,$Z0
+        vmovdqu        0x30-0x20($Xip),$Hkey   # $Hkey^3
+       vpxor           $Z1,$Z0,$Z0
+        vpunpckhqdq    $T3,$T3,$Z1
+       vpclmulqdq      \$0x11,$Ii,$Z2,$Z2
+        vpxor          $T3,$Z1,$Z1
+       vpxor           $Z3,$Z2,$Z2
+       vpclmulqdq      \$0x10,$HK,$T2,$T2
+        vmovdqu        0x50-0x20($Xip),$HK
+       vpxor           $T1,$T2,$T2
+
+        vmovdqu        0x50(%rsp),$T1          # I[2]
+       vpclmulqdq      \$0x00,$Hkey,$T3,$Z3
+        vmovdqu        0x40-0x20($Xip),$Ii     # borrow $Ii for $Hkey^4
+       vpxor           $Z0,$Z3,$Z3
+        vpunpckhqdq    $T1,$T1,$Z0
+       vpclmulqdq      \$0x11,$Hkey,$T3,$T3
+        vpxor          $T1,$Z0,$Z0
+       vpxor           $Z2,$T3,$T3
+       vpclmulqdq      \$0x00,$HK,$Z1,$Z1
+       vpxor           $T2,$Z1,$Z1
+
+        vmovdqu        0x60(%rsp),$T2          # I[1]
+       vpclmulqdq      \$0x00,$Ii,$T1,$Z2
+        vmovdqu        0x60-0x20($Xip),$Hkey   # $Hkey^5
+       vpxor           $Z3,$Z2,$Z2
+        vpunpckhqdq    $T2,$T2,$Z3
+       vpclmulqdq      \$0x11,$Ii,$T1,$T1
+        vpxor          $T2,$Z3,$Z3
+       vpxor           $T3,$T1,$T1
+       vpclmulqdq      \$0x10,$HK,$Z0,$Z0
+        vmovdqu        0x80-0x20($Xip),$HK
+       vpxor           $Z1,$Z0,$Z0
+
+        vpxor          0x70(%rsp),$Xi,$Xi      # accumulate I[0]
+       vpclmulqdq      \$0x00,$Hkey,$T2,$Z1
+        vmovdqu        0x70-0x20($Xip),$Ii     # borrow $Ii for $Hkey^6
+        vpunpckhqdq    $Xi,$Xi,$T3
+       vpxor           $Z2,$Z1,$Z1
+       vpclmulqdq      \$0x11,$Hkey,$T2,$T2
+        vpxor          $Xi,$T3,$T3
+       vpxor           $T1,$T2,$T2
+       vpclmulqdq      \$0x00,$HK,$Z3,$Z3
+       vpxor           $Z0,$Z3,$Z0
+
+       vpclmulqdq      \$0x00,$Ii,$Xi,$Z2
+        vmovdqu        0x00-0x20($Xip),$Hkey   # $Hkey^1
+        vpunpckhqdq    $inout5,$inout5,$T1
+       vpclmulqdq      \$0x11,$Ii,$Xi,$Xi
+        vpxor          $inout5,$T1,$T1
+       vpxor           $Z1,$Z2,$Z1
+       vpclmulqdq      \$0x10,$HK,$T3,$T3
+        vmovdqu        0x20-0x20($Xip),$HK
+       vpxor           $T2,$Xi,$Z3
+       vpxor           $Z0,$T3,$Z2
+
+        vmovdqu        0x10-0x20($Xip),$Ii     # borrow $Ii for $Hkey^2
+         vpxor         $Z1,$Z3,$T3             # aggregated Karatsuba post-processing
+       vpclmulqdq      \$0x00,$Hkey,$inout5,$Z0
+         vpxor         $T3,$Z2,$Z2
+        vpunpckhqdq    $inout4,$inout4,$T2
+       vpclmulqdq      \$0x11,$Hkey,$inout5,$inout5
+        vpxor          $inout4,$T2,$T2
+         vpslldq       \$8,$Z2,$T3
+       vpclmulqdq      \$0x00,$HK,$T1,$T1
+         vpxor         $T3,$Z1,$Xi
+         vpsrldq       \$8,$Z2,$Z2
+         vpxor         $Z2,$Z3,$Z3
+
+       vpclmulqdq      \$0x00,$Ii,$inout4,$Z1
+        vmovdqu        0x30-0x20($Xip),$Hkey   # $Hkey^3
+       vpxor           $Z0,$Z1,$Z1
+        vpunpckhqdq    $inout3,$inout3,$T3
+       vpclmulqdq      \$0x11,$Ii,$inout4,$inout4
+        vpxor          $inout3,$T3,$T3
+       vpxor           $inout5,$inout4,$inout4
+         vpalignr      \$8,$Xi,$Xi,$inout5     # 1st phase
+       vpclmulqdq      \$0x10,$HK,$T2,$T2
+        vmovdqu        0x50-0x20($Xip),$HK
+       vpxor           $T1,$T2,$T2
+
+       vpclmulqdq      \$0x00,$Hkey,$inout3,$Z0
+        vmovdqu        0x40-0x20($Xip),$Ii     # borrow $Ii for $Hkey^4
+       vpxor           $Z1,$Z0,$Z0
+        vpunpckhqdq    $inout2,$inout2,$T1
+       vpclmulqdq      \$0x11,$Hkey,$inout3,$inout3
+        vpxor          $inout2,$T1,$T1
+       vpxor           $inout4,$inout3,$inout3
+         vxorps        0x10(%rsp),$Z3,$Z3      # accumulate $inout0
+       vpclmulqdq      \$0x00,$HK,$T3,$T3
+       vpxor           $T2,$T3,$T3
+
+         vpclmulqdq    \$0x10,0x10($const),$Xi,$Xi
+         vxorps        $inout5,$Xi,$Xi
+
+       vpclmulqdq      \$0x00,$Ii,$inout2,$Z1
+        vmovdqu        0x60-0x20($Xip),$Hkey   # $Hkey^5
+       vpxor           $Z0,$Z1,$Z1
+        vpunpckhqdq    $inout1,$inout1,$T2
+       vpclmulqdq      \$0x11,$Ii,$inout2,$inout2
+        vpxor          $inout1,$T2,$T2
+         vpalignr      \$8,$Xi,$Xi,$inout5     # 2nd phase
+       vpxor           $inout3,$inout2,$inout2
+       vpclmulqdq      \$0x10,$HK,$T1,$T1
+        vmovdqu        0x80-0x20($Xip),$HK
+       vpxor           $T3,$T1,$T1
+
+         vxorps        $Z3,$inout5,$inout5
+         vpclmulqdq    \$0x10,0x10($const),$Xi,$Xi
+         vxorps        $inout5,$Xi,$Xi
+
+       vpclmulqdq      \$0x00,$Hkey,$inout1,$Z0
+        vmovdqu        0x70-0x20($Xip),$Ii     # borrow $Ii for $Hkey^6
+       vpxor           $Z1,$Z0,$Z0
+        vpunpckhqdq    $Xi,$Xi,$T3
+       vpclmulqdq      \$0x11,$Hkey,$inout1,$inout1
+        vpxor          $Xi,$T3,$T3
+       vpxor           $inout2,$inout1,$inout1
+       vpclmulqdq      \$0x00,$HK,$T2,$T2
+       vpxor           $T1,$T2,$T2
+
+       vpclmulqdq      \$0x00,$Ii,$Xi,$Z1
+       vpclmulqdq      \$0x11,$Ii,$Xi,$Z3
+       vpxor           $Z0,$Z1,$Z1
+       vpclmulqdq      \$0x10,$HK,$T3,$Z2
+       vpxor           $inout1,$Z3,$Z3
+       vpxor           $T2,$Z2,$Z2
+
+       vpxor           $Z1,$Z3,$Z0             # aggregated Karatsuba post-processing
+       vpxor           $Z0,$Z2,$Z2
+       vpslldq         \$8,$Z2,$T1
+       vmovdqu         0x10($const),$Hkey      # .Lpoly
+       vpsrldq         \$8,$Z2,$Z2
+       vpxor           $T1,$Z1,$Xi
+       vpxor           $Z2,$Z3,$Z3
+
+       vpalignr        \$8,$Xi,$Xi,$T2         # 1st phase
+       vpclmulqdq      \$0x10,$Hkey,$Xi,$Xi
+       vpxor           $T2,$Xi,$Xi
+
+       vpalignr        \$8,$Xi,$Xi,$T2         # 2nd phase
+       vpclmulqdq      \$0x10,$Hkey,$Xi,$Xi
+       vpxor           $Z3,$T2,$T2
+       vpxor           $T2,$Xi,$Xi
+___
+}
+$code.=<<___;
+       vpshufb         ($const),$Xi,$Xi        # .Lbswap_mask
+       vmovdqu         $Xi,-0x40($Xip)         # output Xi
+
+       vzeroupper
+___
+$code.=<<___ if ($win64);
+       movaps  -0xd8(%rax),%xmm6
+       movaps  -0xc8(%rax),%xmm7
+       movaps  -0xb8(%rax),%xmm8
+       movaps  -0xa8(%rax),%xmm9
+       movaps  -0x98(%rax),%xmm10
+       movaps  -0x88(%rax),%xmm11
+       movaps  -0x78(%rax),%xmm12
+       movaps  -0x68(%rax),%xmm13
+       movaps  -0x58(%rax),%xmm14
+       movaps  -0x48(%rax),%xmm15
+___
+$code.=<<___;
+       mov     -48(%rax),%r15
+       mov     -40(%rax),%r14
+       mov     -32(%rax),%r13
+       mov     -24(%rax),%r12
+       mov     -16(%rax),%rbp
+       mov     -8(%rax),%rbx
+       lea     (%rax),%rsp             # restore %rsp
+.Lgcm_enc_abort:
+       mov     $ret,%rax               # return value
+       ret
+.size  aesni_gcm_encrypt,.-aesni_gcm_encrypt
+___
+
+$code.=<<___;
+.align 64
+.Lbswap_mask:
+       .byte   15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+.Lpoly:
+       .byte   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
+.Lone_msb:
+       .byte   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
+.Ltwo_lsb:
+       .byte   2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+.Lone_lsb:
+       .byte   1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+.asciz "AES-NI GCM module for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
+.align 64
+___
+if ($win64) {
+$rec="%rcx";
+$frame="%rdx";
+$context="%r8";
+$disp="%r9";
+
+$code.=<<___
+.extern        __imp_RtlVirtualUnwind
+.type  gcm_se_handler,\@abi-omnipotent
+.align 16
+gcm_se_handler:
+       push    %rsi
+       push    %rdi
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+       pushfq
+       sub     \$64,%rsp
+
+       mov     120($context),%rax      # pull context->Rax
+       mov     248($context),%rbx      # pull context->Rip
+
+       mov     8($disp),%rsi           # disp->ImageBase
+       mov     56($disp),%r11          # disp->HandlerData
+
+       mov     0(%r11),%r10d           # HandlerData[0]
+       lea     (%rsi,%r10),%r10        # prologue label
+       cmp     %r10,%rbx               # context->Rip<prologue label
+       jb      .Lcommon_seh_tail
+
+       mov     152($context),%rax      # pull context->Rsp
+
+       mov     4(%r11),%r10d           # HandlerData[1]
+       lea     (%rsi,%r10),%r10        # epilogue label
+       cmp     %r10,%rbx               # context->Rip>=epilogue label
+       jae     .Lcommon_seh_tail
+
+       mov     120($context),%rax      # pull context->Rax
+
+       mov     -48(%rax),%r15
+       mov     -40(%rax),%r14
+       mov     -32(%rax),%r13
+       mov     -24(%rax),%r12
+       mov     -16(%rax),%rbp
+       mov     -8(%rax),%rbx
+       mov     %r15,240($context)
+       mov     %r14,232($context)
+       mov     %r13,224($context)
+       mov     %r12,216($context)
+       mov     %rbp,160($context)
+       mov     %rbx,144($context)
+
+       lea     -0xd8(%rax),%rsi        # %xmm save area
+       lea     512($context),%rdi      # & context.Xmm6
+       mov     \$20,%ecx               # 10*sizeof(%xmm0)/sizeof(%rax)
+       .long   0xa548f3fc              # cld; rep movsq
+
+.Lcommon_seh_tail:
+       mov     8(%rax),%rdi
+       mov     16(%rax),%rsi
+       mov     %rax,152($context)      # restore context->Rsp
+       mov     %rsi,168($context)      # restore context->Rsi
+       mov     %rdi,176($context)      # restore context->Rdi
+
+       mov     40($disp),%rdi          # disp->ContextRecord
+       mov     $context,%rsi           # context
+       mov     \$154,%ecx              # sizeof(CONTEXT)
+       .long   0xa548f3fc              # cld; rep movsq
+
+       mov     $disp,%rsi
+       xor     %rcx,%rcx               # arg1, UNW_FLAG_NHANDLER
+       mov     8(%rsi),%rdx            # arg2, disp->ImageBase
+       mov     0(%rsi),%r8             # arg3, disp->ControlPc
+       mov     16(%rsi),%r9            # arg4, disp->FunctionEntry
+       mov     40(%rsi),%r10           # disp->ContextRecord
+       lea     56(%rsi),%r11           # &disp->HandlerData
+       lea     24(%rsi),%r12           # &disp->EstablisherFrame
+       mov     %r10,32(%rsp)           # arg5
+       mov     %r11,40(%rsp)           # arg6
+       mov     %r12,48(%rsp)           # arg7
+       mov     %rcx,56(%rsp)           # arg8, (NULL)
+       call    *__imp_RtlVirtualUnwind(%rip)
+
+       mov     \$1,%eax                # ExceptionContinueSearch
+       add     \$64,%rsp
+       popfq
+       pop     %r15
+       pop     %r14
+       pop     %r13
+       pop     %r12
+       pop     %rbp
+       pop     %rbx
+       pop     %rdi
+       pop     %rsi
+       ret
+.size  gcm_se_handler,.-gcm_se_handler
+
+.section       .pdata
+.align 4
+       .rva    .LSEH_begin_aesni_gcm_decrypt
+       .rva    .LSEH_end_aesni_gcm_decrypt
+       .rva    .LSEH_gcm_dec_info
+
+       .rva    .LSEH_begin_aesni_gcm_encrypt
+       .rva    .LSEH_end_aesni_gcm_encrypt
+       .rva    .LSEH_gcm_enc_info
+.section       .xdata
+.align 8
+.LSEH_gcm_dec_info:
+       .byte   9,0,0,0
+       .rva    gcm_se_handler
+       .rva    .Lgcm_dec_body,.Lgcm_dec_abort
+.LSEH_gcm_enc_info:
+       .byte   9,0,0,0
+       .rva    gcm_se_handler
+       .rva    .Lgcm_enc_body,.Lgcm_enc_abort
+___
+}
+}}} else {{{
+$code=<<___;   # assembler is too old
+.text
+
+.globl aesni_gcm_encrypt
+.type  aesni_gcm_encrypt,\@abi-omnipotent
+aesni_gcm_encrypt:
+       xor     %eax,%eax
+       ret
+.size  aesni_gcm_encrypt,.-aesni_gcm_encrypt
+
+.globl aesni_gcm_decrypt
+.type  aesni_gcm_decrypt,\@abi-omnipotent
+aesni_gcm_decrypt:
+       xor     %eax,%eax
+       ret
+.size  aesni_gcm_decrypt,.-aesni_gcm_decrypt
+___
+}}}
+
+$code =~ s/\`([^\`]*)\`/eval($1)/gem;
+
+print $code;
+
+close STDOUT;
index d91586e..77fbf34 100644 (file)
 # Add NEON implementation featuring polynomial multiplication, i.e. no
 # lookup tables involved. On Cortex A8 it was measured to process one
 # byte in 15 cycles or 55% faster than integer-only code.
+#
+# April 2014
+#
+# Switch to multiplication algorithm suggested in paper referred
+# below and combine it with reduction algorithm from x86 module.
+# Performance improvement over previous version varies from 65% on
+# Snapdragon S4 to 110% on Cortex A9. In absolute terms Cortex A8
+# processes one byte in 8.45 cycles, A9 - in 10.2, Snapdragon S4 -
+# in 9.33.
+#
+# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software
+# Polynomial Multiplication on ARM Processors using the NEON Engine.
+# 
+# http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf
 
 # ====================================================================
 # Note about "528B" variant. In ARM case it makes lesser sense to
@@ -303,117 +317,161 @@ $code.=<<___;
 .size  gcm_gmult_4bit,.-gcm_gmult_4bit
 ___
 {
-my $cnt=$Htbl; # $Htbl is used once in the very beginning
-
-my ($Hhi, $Hlo, $Zo, $T, $xi, $mod) = map("d$_",(0..7));
-my ($Qhi, $Qlo, $Z,  $R, $zero, $Qpost, $IN) = map("q$_",(8..15));
-
-# Z:Zo keeps 128-bit result shifted by 1 to the right, with bottom bit
-# in Zo. Or should I say "top bit", because GHASH is specified in
-# reverse bit order? Otherwise straightforward 128-bt H by one input
-# byte multiplication and modulo-reduction, times 16.
+my ($Xl,$Xm,$Xh,$IN)=map("q$_",(0..3));
+my ($t0,$t1,$t2,$t3)=map("q$_",(8..12));
+my ($Hlo,$Hhi,$Hhl,$k48,$k32,$k16)=map("d$_",(26..31));
 
-sub Dlo()   { shift=~m|q([1]?[0-9])|?"d".($1*2):"";     }
-sub Dhi()   { shift=~m|q([1]?[0-9])|?"d".($1*2+1):"";   }
-sub Q()     { shift=~m|d([1-3]?[02468])|?"q".($1/2):""; }
+sub clmul64x64 {
+my ($r,$a,$b)=@_;
+$code.=<<___;
+       vext.8          $t0#lo, $a, $a, #1      @ A1
+       vmull.p8        $t0, $t0#lo, $b         @ F = A1*B
+       vext.8          $r#lo, $b, $b, #1       @ B1
+       vmull.p8        $r, $a, $r#lo           @ E = A*B1
+       vext.8          $t1#lo, $a, $a, #2      @ A2
+       vmull.p8        $t1, $t1#lo, $b         @ H = A2*B
+       vext.8          $t3#lo, $b, $b, #2      @ B2
+       vmull.p8        $t3, $a, $t3#lo         @ G = A*B2
+       vext.8          $t2#lo, $a, $a, #3      @ A3
+       veor            $t0, $t0, $r            @ L = E + F
+       vmull.p8        $t2, $t2#lo, $b         @ J = A3*B
+       vext.8          $r#lo, $b, $b, #3       @ B3
+       veor            $t1, $t1, $t3           @ M = G + H
+       vmull.p8        $r, $a, $r#lo           @ I = A*B3
+       veor            $t0#lo, $t0#lo, $t0#hi  @ t0 = (L) (P0 + P1) << 8
+       vand            $t0#hi, $t0#hi, $k48
+       vext.8          $t3#lo, $b, $b, #4      @ B4
+       veor            $t1#lo, $t1#lo, $t1#hi  @ t1 = (M) (P2 + P3) << 16
+       vand            $t1#hi, $t1#hi, $k32
+       vmull.p8        $t3, $a, $t3#lo         @ K = A*B4
+       veor            $t2, $t2, $r            @ N = I + J
+       veor            $t0#lo, $t0#lo, $t0#hi
+       veor            $t1#lo, $t1#lo, $t1#hi
+       veor            $t2#lo, $t2#lo, $t2#hi  @ t2 = (N) (P4 + P5) << 24
+       vand            $t2#hi, $t2#hi, $k16
+       vext.8          $t0, $t0, $t0, #15
+       veor            $t3#lo, $t3#lo, $t3#hi  @ t3 = (K) (P6 + P7) << 32
+       vmov.i64        $t3#hi, #0
+       vext.8          $t1, $t1, $t1, #14
+       veor            $t2#lo, $t2#lo, $t2#hi
+       vmull.p8        $r, $a, $b              @ D = A*B
+       vext.8          $t3, $t3, $t3, #12
+       vext.8          $t2, $t2, $t2, #13
+       veor            $t0, $t0, $t1
+       veor            $t2, $t2, $t3
+       veor            $r, $r, $t0
+       veor            $r, $r, $t2
+___
+}
 
 $code.=<<___;
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
+.arch  armv7-a
 .fpu   neon
 
+.global        gcm_init_neon
+.type  gcm_init_neon,%function
+.align 4
+gcm_init_neon:
+       vld1.64         $IN#hi,[r1,:64]!        @ load H
+       vmov.i8         $t0,#0xe1
+       vld1.64         $IN#lo,[r1,:64]
+       vshl.i64        $t0#hi,#57
+       vshr.u64        $t0#lo,#63              @ t0=0xc2....01
+       vdup.8          $t1,$IN#hi[7]
+       vshr.u64        $Hlo,$IN#lo,#63
+       vshr.s8         $t1,#7                  @ broadcast carry bit
+       vshl.i64        $IN,$IN,#1
+       vand            $t0,$t0,$t1
+       vorr            $IN#hi,$Hlo             @ H<<<=1
+       veor            $IN,$IN,$t0             @ twisted H
+       vstmia          r0,{$IN}
+
+       ret                                     @ bx lr
+.size  gcm_init_neon,.-gcm_init_neon
+
 .global        gcm_gmult_neon
 .type  gcm_gmult_neon,%function
 .align 4
 gcm_gmult_neon:
-       sub             $Htbl,#16               @ point at H in GCM128_CTX
-       vld1.64         `&Dhi("$IN")`,[$Xi,:64]!@ load Xi
-       vmov.i32        $mod,#0xe1              @ our irreducible polynomial
-       vld1.64         `&Dlo("$IN")`,[$Xi,:64]!
-       vshr.u64        $mod,#32
-       vldmia          $Htbl,{$Hhi-$Hlo}       @ load H
-       veor            $zero,$zero
+       vld1.64         $IN#hi,[$Xi,:64]!       @ load Xi
+       vld1.64         $IN#lo,[$Xi,:64]!
+       vmov.i64        $k48,#0x0000ffffffffffff
+       vldmia          $Htbl,{$Hlo-$Hhi}       @ load twisted H
+       vmov.i64        $k32,#0x00000000ffffffff
 #ifdef __ARMEL__
        vrev64.8        $IN,$IN
 #endif
-       veor            $Qpost,$Qpost
-       veor            $R,$R
-       mov             $cnt,#16
-       veor            $Z,$Z
+       vmov.i64        $k16,#0x000000000000ffff
+       veor            $Hhl,$Hlo,$Hhi          @ Karatsuba pre-processing
        mov             $len,#16
-       veor            $Zo,$Zo
-       vdup.8          $xi,`&Dlo("$IN")`[0]    @ broadcast lowest byte
-       b               .Linner_neon
+       b               .Lgmult_neon
 .size  gcm_gmult_neon,.-gcm_gmult_neon
 
 .global        gcm_ghash_neon
 .type  gcm_ghash_neon,%function
 .align 4
 gcm_ghash_neon:
-       vld1.64         `&Dhi("$Z")`,[$Xi,:64]! @ load Xi
-       vmov.i32        $mod,#0xe1              @ our irreducible polynomial
-       vld1.64         `&Dlo("$Z")`,[$Xi,:64]!
-       vshr.u64        $mod,#32
-       vldmia          $Xi,{$Hhi-$Hlo}         @ load H
-       veor            $zero,$zero
-       nop
+       vld1.64         $Xl#hi,[$Xi,:64]!       @ load Xi
+       vld1.64         $Xl#lo,[$Xi,:64]!
+       vmov.i64        $k48,#0x0000ffffffffffff
+       vldmia          $Htbl,{$Hlo-$Hhi}       @ load twisted H
+       vmov.i64        $k32,#0x00000000ffffffff
 #ifdef __ARMEL__
-       vrev64.8        $Z,$Z
+       vrev64.8        $Xl,$Xl
 #endif
-.Louter_neon:
-       vld1.64         `&Dhi($IN)`,[$inp]!     @ load inp
-       veor            $Qpost,$Qpost
-       vld1.64         `&Dlo($IN)`,[$inp]!
-       veor            $R,$R
-       mov             $cnt,#16
+       vmov.i64        $k16,#0x000000000000ffff
+       veor            $Hhl,$Hlo,$Hhi          @ Karatsuba pre-processing
+
+.Loop_neon:
+       vld1.64         $IN#hi,[$inp]!          @ load inp
+       vld1.64         $IN#lo,[$inp]!
 #ifdef __ARMEL__
        vrev64.8        $IN,$IN
 #endif
-       veor            $Zo,$Zo
-       veor            $IN,$Z                  @ inp^=Xi
-       veor            $Z,$Z
-       vdup.8          $xi,`&Dlo("$IN")`[0]    @ broadcast lowest byte
-.Linner_neon:
-       subs            $cnt,$cnt,#1
-       vmull.p8        $Qlo,$Hlo,$xi           @ H.lo·Xi[i]
-       vmull.p8        $Qhi,$Hhi,$xi           @ H.hi·Xi[i]
-       vext.8          $IN,$zero,#1            @ IN>>=8
-
-       veor            $Z,$Qpost               @ modulo-scheduled part
-       vshl.i64        `&Dlo("$R")`,#48
-       vdup.8          $xi,`&Dlo("$IN")`[0]    @ broadcast lowest byte
-       veor            $T,`&Dlo("$Qlo")`,`&Dlo("$Z")`
-
-       veor            `&Dhi("$Z")`,`&Dlo("$R")`
-       vuzp.8          $Qlo,$Qhi
-       vsli.8          $Zo,$T,#1               @ compose the "carry" byte
-       vext.8          $Z,$zero,#1             @ Z>>=8
-
-       vmull.p8        $R,$Zo,$mod             @ "carry"·0xe1
-       vshr.u8         $Zo,$T,#7               @ save Z's bottom bit
-       vext.8          $Qpost,$Qlo,$zero,#1    @ Qlo>>=8
-       veor            $Z,$Qhi
-       bne             .Linner_neon
-
-       veor            $Z,$Qpost               @ modulo-scheduled artefact
-       vshl.i64        `&Dlo("$R")`,#48
-       veor            `&Dhi("$Z")`,`&Dlo("$R")`
-
-       @ finalization, normalize Z:Zo
-       vand            $Zo,$mod                @ suffices to mask the bit
-       vshr.u64        `&Dhi(&Q("$Zo"))`,`&Dlo("$Z")`,#63
-       vshl.i64        $Z,#1
+       veor            $IN,$Xl                 @ inp^=Xi
+.Lgmult_neon:
+___
+       &clmul64x64     ($Xl,$Hlo,"$IN#lo");    # H.lo·Xi.lo
+$code.=<<___;
+       veor            $IN#lo,$IN#lo,$IN#hi    @ Karatsuba pre-processing
+___
+       &clmul64x64     ($Xm,$Hhl,"$IN#lo");    # (H.lo+H.hi)·(Xi.lo+Xi.hi)
+       &clmul64x64     ($Xh,$Hhi,"$IN#hi");    # H.hi·Xi.hi
+$code.=<<___;
+       veor            $Xm,$Xm,$Xl             @ Karatsuba post-processing
+       veor            $Xm,$Xm,$Xh
+       veor            $Xl#hi,$Xl#hi,$Xm#lo
+       veor            $Xh#lo,$Xh#lo,$Xm#hi    @ Xh|Xl - 256-bit result
+
+       @ equivalent of reduction_avx from ghash-x86_64.pl
+       vshl.i64        $t1,$Xl,#57             @ 1st phase
+       vshl.i64        $t2,$Xl,#62
+       veor            $t2,$t2,$t1             @
+       vshl.i64        $t1,$Xl,#63
+       veor            $t2, $t2, $t1           @
+       veor            $Xl#hi,$Xl#hi,$t2#lo    @
+       veor            $Xh#lo,$Xh#lo,$t2#hi
+
+       vshr.u64        $t2,$Xl,#1              @ 2nd phase
+       veor            $Xh,$Xh,$Xl
+       veor            $Xl,$Xl,$t2             @
+       vshr.u64        $t2,$t2,#6
+       vshr.u64        $Xl,$Xl,#1              @
+       veor            $Xl,$Xl,$Xh             @
+       veor            $Xl,$Xl,$t2             @
+
        subs            $len,#16
-       vorr            $Z,`&Q("$Zo")`          @ Z=Z:Zo<<1
-       bne             .Louter_neon
+       bne             .Loop_neon
 
 #ifdef __ARMEL__
-       vrev64.8        $Z,$Z
+       vrev64.8        $Xl,$Xl
 #endif
        sub             $Xi,#16 
-       vst1.64         `&Dhi("$Z")`,[$Xi,:64]! @ write out Xi
-       vst1.64         `&Dlo("$Z")`,[$Xi,:64]
+       vst1.64         $Xl#hi,[$Xi,:64]!       @ write out Xi
+       vst1.64         $Xl#lo,[$Xi,:64]
 
-       bx      lr
+       ret                                     @ bx lr
 .size  gcm_ghash_neon,.-gcm_ghash_neon
 #endif
 ___
@@ -423,7 +481,13 @@ $code.=<<___;
 .align  2
 ___
 
-$code =~ s/\`([^\`]*)\`/eval $1/gem;
-$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;   # make it possible to compile with -march=armv4
-print $code;
+foreach (split("\n",$code)) {
+       s/\`([^\`]*)\`/eval $1/geo;
+
+       s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo       or
+       s/\bret\b/bx    lr/go           or
+       s/\bbx\s+lr\b/.word\t0xe12fff1e/go;    # make it possible to compile with -march=armv4
+
+       print $_,"\n";
+}
 close STDOUT; # enforce flush
index 6a40d5d..39096b4 100644 (file)
@@ -186,13 +186,13 @@ $code.=<<___;
        sllg    $rem1,$Zlo,3
        xgr     $Zlo,$tmp
        ngr     $rem1,$x78
+       sllg    $tmp,$Zhi,60
        j       .Lghash_inner
 .align 16
 .Lghash_inner:
        srlg    $Zlo,$Zlo,4
-       sllg    $tmp,$Zhi,60
-       xg      $Zlo,8($nlo,$Htbl)
        srlg    $Zhi,$Zhi,4
+       xg      $Zlo,8($nlo,$Htbl)
        llgc    $xi,0($cnt,$Xi)
        xg      $Zhi,0($nlo,$Htbl)
        sllg    $nlo,$xi,4
@@ -213,9 +213,9 @@ $code.=<<___;
        sllg    $rem1,$Zlo,3
        xgr     $Zlo,$tmp
        ngr     $rem1,$x78
+       sllg    $tmp,$Zhi,60
        brct    $cnt,.Lghash_inner
 
-       sllg    $tmp,$Zhi,60
        srlg    $Zlo,$Zlo,4
        srlg    $Zhi,$Zhi,4
        xg      $Zlo,8($nlo,$Htbl)
index 70e7b04..0365e0f 100644 (file)
 # references to input data and Z.hi updates to achieve 12 cycles
 # timing. To anchor to something else, sha1-sparcv9.pl spends 11.6
 # cycles to process one byte on UltraSPARC pre-Tx CPU and ~24 on T1.
+#
+# October 2012
+#
+# Add VIS3 lookup-table-free implementation using polynomial
+# multiplication xmulx[hi] and extended addition addxc[cc]
+# instructions. 4.52/7.63x improvement on T3/T4 or in absolute
+# terms 7.90/2.14 cycles per byte. On T4 multi-process benchmark
+# saturates at ~15.5x single-process result on 8-core processor,
+# or ~20.5GBps per 2.85GHz socket.
 
 $bits=32;
 for (@ARGV)     { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
@@ -66,6 +75,10 @@ $Htbl="%i1";
 $inp="%i2";
 $len="%i3";
 
+$code.=<<___ if ($bits==64);
+.register      %g2,#scratch
+.register      %g3,#scratch
+___
 $code.=<<___;
 .section       ".text",#alloc,#execinstr
 
@@ -321,10 +334,238 @@ gcm_gmult_4bit:
        restore
 .type  gcm_gmult_4bit,#function
 .size  gcm_gmult_4bit,(.-gcm_gmult_4bit)
-.asciz "GHASH for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>"
+___
+\f
+{{{
+# Straightforward 128x128-bit multiplication using Karatsuba algorithm
+# followed by pair of 64-bit reductions [with a shortcut in first one,
+# which allowed to break dependency between reductions and remove one
+# multiplication from critical path]. While it might be suboptimal
+# with regard to sheer number of multiplications, other methods [such
+# as aggregate reduction] would require more 64-bit registers, which
+# we don't have in 32-bit application context.
+
+($Xip,$Htable,$inp,$len)=map("%i$_",(0..3));
+
+($Hhl,$Hlo,$Hhi,$Xlo,$Xhi,$xE1,$sqr, $C0,$C1,$C2,$C3,$V)=
+       (map("%o$_",(0..5,7)),map("%g$_",(1..5)));
+
+($shl,$shr)=map("%l$_",(0..7));
+
+# For details regarding "twisted H" see ghash-x86.pl.
+$code.=<<___;
+.globl gcm_init_vis3
+.align 32
+gcm_init_vis3:
+       save    %sp,-$frame,%sp
+
+       ldx     [%i1+0],$Hhi
+       ldx     [%i1+8],$Hlo
+       mov     0xE1,$Xhi
+       mov     1,$Xlo
+       sllx    $Xhi,57,$Xhi
+       srax    $Hhi,63,$C0             ! broadcast carry
+       addcc   $Hlo,$Hlo,$Hlo          ! H<<=1
+       addxc   $Hhi,$Hhi,$Hhi
+       and     $C0,$Xlo,$Xlo
+       and     $C0,$Xhi,$Xhi
+       xor     $Xlo,$Hlo,$Hlo
+       xor     $Xhi,$Hhi,$Hhi
+       stx     $Hlo,[%i0+8]            ! save twisted H
+       stx     $Hhi,[%i0+0]
+
+       sethi   %hi(0xA0406080),$V
+       sethi   %hi(0x20C0E000),%l0
+       or      $V,%lo(0xA0406080),$V
+       or      %l0,%lo(0x20C0E000),%l0
+       sllx    $V,32,$V
+       or      %l0,$V,$V               ! (0xE0·i)&0xff=0xA040608020C0E000
+       stx     $V,[%i0+16]
+
+       ret
+       restore
+.type  gcm_init_vis3,#function
+.size  gcm_init_vis3,.-gcm_init_vis3
+
+.globl gcm_gmult_vis3
+.align 32
+gcm_gmult_vis3:
+       save    %sp,-$frame,%sp
+
+       ldx     [$Xip+8],$Xlo           ! load Xi
+       ldx     [$Xip+0],$Xhi
+       ldx     [$Htable+8],$Hlo        ! load twisted H
+       ldx     [$Htable+0],$Hhi
+
+       mov     0xE1,%l7
+       sllx    %l7,57,$xE1             ! 57 is not a typo
+       ldx     [$Htable+16],$V         ! (0xE0·i)&0xff=0xA040608020C0E000
+
+       xor     $Hhi,$Hlo,$Hhl          ! Karatsuba pre-processing
+       xmulx   $Xlo,$Hlo,$C0
+       xor     $Xlo,$Xhi,$C2           ! Karatsuba pre-processing
+       xmulx   $C2,$Hhl,$C1
+       xmulxhi $Xlo,$Hlo,$Xlo
+       xmulxhi $C2,$Hhl,$C2
+       xmulxhi $Xhi,$Hhi,$C3
+       xmulx   $Xhi,$Hhi,$Xhi
+
+       sll     $C0,3,$sqr
+       srlx    $V,$sqr,$sqr            ! Â·0xE0 [implicit &(7<<3)]
+       xor     $C0,$sqr,$sqr
+       sllx    $sqr,57,$sqr            ! ($C0·0xE1)<<1<<56 [implicit &0x7f]
+
+       xor     $C0,$C1,$C1             ! Karatsuba post-processing
+       xor     $Xlo,$C2,$C2
+        xor    $sqr,$Xlo,$Xlo          ! real destination is $C1
+       xor     $C3,$C2,$C2
+       xor     $Xlo,$C1,$C1
+       xor     $Xhi,$C2,$C2
+       xor     $Xhi,$C1,$C1
+
+       xmulxhi $C0,$xE1,$Xlo           ! Â·0xE1<<1<<56
+        xor    $C0,$C2,$C2
+       xmulx   $C1,$xE1,$C0
+        xor    $C1,$C3,$C3
+       xmulxhi $C1,$xE1,$C1
+
+       xor     $Xlo,$C2,$C2
+       xor     $C0,$C2,$C2
+       xor     $C1,$C3,$C3
+
+       stx     $C2,[$Xip+8]            ! save Xi
+       stx     $C3,[$Xip+0]
+
+       ret
+       restore
+.type  gcm_gmult_vis3,#function
+.size  gcm_gmult_vis3,.-gcm_gmult_vis3
+
+.globl gcm_ghash_vis3
+.align 32
+gcm_ghash_vis3:
+       save    %sp,-$frame,%sp
+
+       ldx     [$Xip+8],$C2            ! load Xi
+       ldx     [$Xip+0],$C3
+       ldx     [$Htable+8],$Hlo        ! load twisted H
+       ldx     [$Htable+0],$Hhi
+
+       mov     0xE1,%l7
+       sllx    %l7,57,$xE1             ! 57 is not a typo
+       ldx     [$Htable+16],$V         ! (0xE0·i)&0xff=0xA040608020C0E000
+
+       and     $inp,7,$shl
+       andn    $inp,7,$inp
+       sll     $shl,3,$shl
+       prefetch [$inp+63], 20
+       sub     %g0,$shl,$shr
+
+       xor     $Hhi,$Hlo,$Hhl          ! Karatsuba pre-processing
+.Loop:
+       ldx     [$inp+8],$Xlo
+       brz,pt  $shl,1f
+       ldx     [$inp+0],$Xhi
+
+       ldx     [$inp+16],$C1           ! align data
+       srlx    $Xlo,$shr,$C0
+       sllx    $Xlo,$shl,$Xlo
+       sllx    $Xhi,$shl,$Xhi
+       srlx    $C1,$shr,$C1
+       or      $C0,$Xhi,$Xhi
+       or      $C1,$Xlo,$Xlo
+1:
+       add     $inp,16,$inp
+       sub     $len,16,$len
+       xor     $C2,$Xlo,$Xlo
+       xor     $C3,$Xhi,$Xhi
+       prefetch [$inp+63], 20
+
+       xmulx   $Xlo,$Hlo,$C0
+       xor     $Xlo,$Xhi,$C2           ! Karatsuba pre-processing
+       xmulx   $C2,$Hhl,$C1
+       xmulxhi $Xlo,$Hlo,$Xlo
+       xmulxhi $C2,$Hhl,$C2
+       xmulxhi $Xhi,$Hhi,$C3
+       xmulx   $Xhi,$Hhi,$Xhi
+
+       sll     $C0,3,$sqr
+       srlx    $V,$sqr,$sqr            ! Â·0xE0 [implicit &(7<<3)]
+       xor     $C0,$sqr,$sqr
+       sllx    $sqr,57,$sqr            ! ($C0·0xE1)<<1<<56 [implicit &0x7f]
+
+       xor     $C0,$C1,$C1             ! Karatsuba post-processing
+       xor     $Xlo,$C2,$C2
+        xor    $sqr,$Xlo,$Xlo          ! real destination is $C1
+       xor     $C3,$C2,$C2
+       xor     $Xlo,$C1,$C1
+       xor     $Xhi,$C2,$C2
+       xor     $Xhi,$C1,$C1
+
+       xmulxhi $C0,$xE1,$Xlo           ! Â·0xE1<<1<<56
+        xor    $C0,$C2,$C2
+       xmulx   $C1,$xE1,$C0
+        xor    $C1,$C3,$C3
+       xmulxhi $C1,$xE1,$C1
+
+       xor     $Xlo,$C2,$C2
+       xor     $C0,$C2,$C2
+       brnz,pt $len,.Loop
+       xor     $C1,$C3,$C3
+
+       stx     $C2,[$Xip+8]            ! save Xi
+       stx     $C3,[$Xip+0]
+
+       ret
+       restore
+.type  gcm_ghash_vis3,#function
+.size  gcm_ghash_vis3,.-gcm_ghash_vis3
+___
+}}}
+$code.=<<___;
+.asciz "GHASH for SPARCv9/VIS3, CRYPTOGAMS by <appro\@openssl.org>"
 .align 4
 ___
 
-$code =~ s/\`([^\`]*)\`/eval $1/gem;
-print $code;
+\f
+# Purpose of these subroutines is to explicitly encode VIS instructions,
+# so that one can compile the module without having to specify VIS
+# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
+# Idea is to reserve for option to produce "universal" binary and let
+# programmer detect if current CPU is VIS capable at run-time.
+sub unvis3 {
+my ($mnemonic,$rs1,$rs2,$rd)=@_;
+my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 );
+my ($ref,$opf);
+my %visopf = ( "addxc"         => 0x011,
+               "addxccc"       => 0x013,
+               "xmulx"         => 0x115,
+               "xmulxhi"       => 0x116        );
+
+    $ref = "$mnemonic\t$rs1,$rs2,$rd";
+
+    if ($opf=$visopf{$mnemonic}) {
+       foreach ($rs1,$rs2,$rd) {
+           return $ref if (!/%([goli])([0-9])/);
+           $_=$bias{$1}+$2;
+       }
+
+       return  sprintf ".word\t0x%08x !%s",
+                       0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2,
+                       $ref;
+    } else {
+       return $ref;
+    }
+}
+
+foreach (split("\n",$code)) {
+       s/\`([^\`]*)\`/eval $1/ge;
+
+       s/\b(xmulx[hi]*|addxc[c]{0,2})\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/
+               &unvis3($1,$2,$3,$4)
+        /ge;
+
+       print $_,"\n";
+}
+
 close STDOUT;
index 83c727e..23a5527 100644 (file)
 # The module implements "4-bit" GCM GHASH function and underlying
 # single multiplication operation in GF(2^128). "4-bit" means that it
 # uses 256 bytes per-key table [+64/128 bytes fixed table]. It has two
-# code paths: vanilla x86 and vanilla MMX. Former will be executed on
-# 486 and Pentium, latter on all others. MMX GHASH features so called
+# code paths: vanilla x86 and vanilla SSE. Former will be executed on
+# 486 and Pentium, latter on all others. SSE GHASH features so called
 # "528B" variant of "4-bit" method utilizing additional 256+16 bytes
 # of per-key storage [+512 bytes shared table]. Performance results
 # are for streamed GHASH subroutine and are expressed in cycles per
 # processed byte, less is better:
 #
-#              gcc 2.95.3(*)   MMX assembler   x86 assembler
+#              gcc 2.95.3(*)   SSE assembler   x86 assembler
 #
 # Pentium      105/111(**)     -               50
 # PIII         68 /75          12.2            24
 # P4           125/125         17.8            84(***)
 # Opteron      66 /70          10.1            30
 # Core2                54 /67          8.4             18
+# Atom         105/105         16.8            53
+# VIA Nano     69 /71          13.0            27
 #
 # (*)  gcc 3.4.x was observed to generate few percent slower code,
 #      which is one of reasons why 2.95.3 results were chosen,
 #      another reason is lack of 3.4.x results for older CPUs;
-#      comparison with MMX results is not completely fair, because C
+#      comparison with SSE results is not completely fair, because C
 #      results are for vanilla "256B" implementation, while
 #      assembler results are for "528B";-)
 # (**) second number is result for code compiled with -fPIC flag,
@@ -40,8 +42,8 @@
 #
 # To summarize, it's >2-5 times faster than gcc-generated code. To
 # anchor it to something else SHA1 assembler processes one byte in
-# 11-13 cycles on contemporary x86 cores. As for choice of MMX in
-# particular, see comment at the end of the file...
+# ~7 cycles on contemporary x86 cores. As for choice of MMX/SSE
+# in particular, see comment at the end of the file...
 
 # May 2010
 #
 # similar manner resulted in almost 20% degradation on Sandy Bridge,
 # where original 64-bit code processes one byte in 1.95 cycles.
 
+#####################################################################
+# For reference, AMD Bulldozer processes one byte in 1.98 cycles in
+# 32-bit mode and 1.89 in 64-bit.
+
+# February 2013
+#
+# Overhaul: aggregate Karatsuba post-processing, improve ILP in
+# reduction_alg9. Resulting performance is 1.96 cycles per byte on
+# Westmere, 1.95 - on Sandy/Ivy Bridge, 1.76 - on Bulldozer.
+
 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 push(@INC,"${dir}","${dir}../../perlasm");
 require "x86asm.pl";
@@ -822,17 +834,18 @@ $len="ebx";
 &static_label("bswap");
 
 sub clmul64x64_T2 {    # minimal "register" pressure
-my ($Xhi,$Xi,$Hkey)=@_;
+my ($Xhi,$Xi,$Hkey,$HK)=@_;
 
        &movdqa         ($Xhi,$Xi);             #
        &pshufd         ($T1,$Xi,0b01001110);
-       &pshufd         ($T2,$Hkey,0b01001110);
+       &pshufd         ($T2,$Hkey,0b01001110)  if (!defined($HK));
        &pxor           ($T1,$Xi);              #
-       &pxor           ($T2,$Hkey);
+       &pxor           ($T2,$Hkey)             if (!defined($HK));
+                       $HK=$T2                 if (!defined($HK));
 
        &pclmulqdq      ($Xi,$Hkey,0x00);       #######
        &pclmulqdq      ($Xhi,$Hkey,0x11);      #######
-       &pclmulqdq      ($T1,$T2,0x00);         #######
+       &pclmulqdq      ($T1,$HK,0x00);         #######
        &xorps          ($T1,$Xi);              #
        &xorps          ($T1,$Xhi);             #
 
@@ -879,31 +892,32 @@ if (1) {          # Algorithm 9 with <<1 twist.
                        # below. Algorithm 9 was therefore chosen for
                        # further optimization...
 
-sub reduction_alg9 {   # 17/13 times faster than Intel version
+sub reduction_alg9 {   # 17/11 times faster than Intel version
 my ($Xhi,$Xi) = @_;
 
        # 1st phase
-       &movdqa         ($T1,$Xi);              #
+       &movdqa         ($T2,$Xi);              #
+       &movdqa         ($T1,$Xi);
+       &psllq          ($Xi,5);
+       &pxor           ($T1,$Xi);              #
        &psllq          ($Xi,1);
        &pxor           ($Xi,$T1);              #
-       &psllq          ($Xi,5);                #
-       &pxor           ($Xi,$T1);              #
        &psllq          ($Xi,57);               #
-       &movdqa         ($T2,$Xi);              #
+       &movdqa         ($T1,$Xi);              #
        &pslldq         ($Xi,8);
-       &psrldq         ($T2,8);                #
-       &pxor           ($Xi,$T1);
-       &pxor           ($Xhi,$T2);             #
+       &psrldq         ($T1,8);                #       
+       &pxor           ($Xi,$T2);
+       &pxor           ($Xhi,$T1);             #
 
        # 2nd phase
        &movdqa         ($T2,$Xi);
+       &psrlq          ($Xi,1);
+       &pxor           ($Xhi,$T2);             #
+       &pxor           ($T2,$Xi);
        &psrlq          ($Xi,5);
        &pxor           ($Xi,$T2);              #
        &psrlq          ($Xi,1);                #
-       &pxor           ($Xi,$T2);              #
-       &pxor           ($T2,$Xhi);
-       &psrlq          ($Xi,1);                #
-       &pxor           ($Xi,$T2);              #
+       &pxor           ($Xi,$Xhi)              #
 }
 
 &function_begin_B("gcm_init_clmul");
@@ -937,8 +951,14 @@ my ($Xhi,$Xi) = @_;
        &clmul64x64_T2  ($Xhi,$Xi,$Hkey);
        &reduction_alg9 ($Xhi,$Xi);
 
+       &pshufd         ($T1,$Hkey,0b01001110);
+       &pshufd         ($T2,$Xi,0b01001110);
+       &pxor           ($T1,$Hkey);            # Karatsuba pre-processing
        &movdqu         (&QWP(0,$Htbl),$Hkey);  # save H
+       &pxor           ($T2,$Xi);              # Karatsuba pre-processing
        &movdqu         (&QWP(16,$Htbl),$Xi);   # save H^2
+       &palignr        ($T2,$T1,8);            # low part is H.lo^H.hi
+       &movdqu         (&QWP(32,$Htbl),$T2);   # save Karatsuba "salt"
 
        &ret            ();
 &function_end_B("gcm_init_clmul");
@@ -956,8 +976,9 @@ my ($Xhi,$Xi) = @_;
        &movdqa         ($T3,&QWP(0,$const));
        &movups         ($Hkey,&QWP(0,$Htbl));
        &pshufb         ($Xi,$T3);
+       &movups         ($T2,&QWP(32,$Htbl));
 
-       &clmul64x64_T2  ($Xhi,$Xi,$Hkey);
+       &clmul64x64_T2  ($Xhi,$Xi,$Hkey,$T2);
        &reduction_alg9 ($Xhi,$Xi);
 
        &pshufb         ($Xi,$T3);
@@ -994,79 +1015,109 @@ my ($Xhi,$Xi) = @_;
        &movdqu         ($Xn,&QWP(16,$inp));    # Ii+1
        &pshufb         ($T1,$T3);
        &pshufb         ($Xn,$T3);
+       &movdqu         ($T3,&QWP(32,$Htbl));
        &pxor           ($Xi,$T1);              # Ii+Xi
 
-       &clmul64x64_T2  ($Xhn,$Xn,$Hkey);       # H*Ii+1
+       &pshufd         ($T1,$Xn,0b01001110);   # H*Ii+1
+       &movdqa         ($Xhn,$Xn);
+       &pxor           ($T1,$Xn);              #
+       &lea            ($inp,&DWP(32,$inp));   # i+=2
+
+       &pclmulqdq      ($Xn,$Hkey,0x00);       #######
+       &pclmulqdq      ($Xhn,$Hkey,0x11);      #######
+       &pclmulqdq      ($T1,$T3,0x00);         #######
        &movups         ($Hkey,&QWP(16,$Htbl)); # load H^2
+       &nop            ();
 
-       &lea            ($inp,&DWP(32,$inp));   # i+=2
        &sub            ($len,0x20);
        &jbe            (&label("even_tail"));
+       &jmp            (&label("mod_loop"));
 
-&set_label("mod_loop");
-       &clmul64x64_T2  ($Xhi,$Xi,$Hkey);       # H^2*(Ii+Xi)
-       &movdqu         ($T1,&QWP(0,$inp));     # Ii
-       &movups         ($Hkey,&QWP(0,$Htbl));  # load H
+&set_label("mod_loop",32);
+       &pshufd         ($T2,$Xi,0b01001110);   # H^2*(Ii+Xi)
+       &movdqa         ($Xhi,$Xi);
+       &pxor           ($T2,$Xi);              #
+       &nop            ();
 
-       &pxor           ($Xi,$Xn);              # (H*Ii+1) + H^2*(Ii+Xi)
-       &pxor           ($Xhi,$Xhn);
+       &pclmulqdq      ($Xi,$Hkey,0x00);       #######
+       &pclmulqdq      ($Xhi,$Hkey,0x11);      #######
+       &pclmulqdq      ($T2,$T3,0x10);         #######
+       &movups         ($Hkey,&QWP(0,$Htbl));  # load H
 
-       &movdqu         ($Xn,&QWP(16,$inp));    # Ii+1
-       &pshufb         ($T1,$T3);
-       &pshufb         ($Xn,$T3);
+       &xorps          ($Xi,$Xn);              # (H*Ii+1) + H^2*(Ii+Xi)
+       &movdqa         ($T3,&QWP(0,$const));
+       &xorps          ($Xhi,$Xhn);
+        &movdqu        ($Xhn,&QWP(0,$inp));    # Ii
+       &pxor           ($T1,$Xi);              # aggregated Karatsuba post-processing
+        &movdqu        ($Xn,&QWP(16,$inp));    # Ii+1
+       &pxor           ($T1,$Xhi);             #
 
-       &movdqa         ($T3,$Xn);              #&clmul64x64_TX ($Xhn,$Xn,$Hkey); H*Ii+1
-       &movdqa         ($Xhn,$Xn);
-        &pxor          ($Xhi,$T1);             # "Ii+Xi", consume early
+        &pshufb        ($Xhn,$T3);
+       &pxor           ($T2,$T1);              #
 
-         &movdqa       ($T1,$Xi);              #&reduction_alg9($Xhi,$Xi); 1st phase
+       &movdqa         ($T1,$T2);              #
+       &psrldq         ($T2,8);
+       &pslldq         ($T1,8);                #
+       &pxor           ($Xhi,$T2);
+       &pxor           ($Xi,$T1);              #
+        &pshufb        ($Xn,$T3);
+        &pxor          ($Xhi,$Xhn);            # "Ii+Xi", consume early
+
+       &movdqa         ($Xhn,$Xn);             #&clmul64x64_TX ($Xhn,$Xn,$Hkey); H*Ii+1
+         &movdqa       ($T2,$Xi);              #&reduction_alg9($Xhi,$Xi); 1st phase
+         &movdqa       ($T1,$Xi);
+         &psllq        ($Xi,5);
+         &pxor         ($T1,$Xi);              #
          &psllq        ($Xi,1);
          &pxor         ($Xi,$T1);              #
-         &psllq        ($Xi,5);                #
-         &pxor         ($Xi,$T1);              #
        &pclmulqdq      ($Xn,$Hkey,0x00);       #######
+       &movups         ($T3,&QWP(32,$Htbl));
          &psllq        ($Xi,57);               #
-         &movdqa       ($T2,$Xi);              #
+         &movdqa       ($T1,$Xi);              #
          &pslldq       ($Xi,8);
-         &psrldq       ($T2,8);                #       
-         &pxor         ($Xi,$T1);
-       &pshufd         ($T1,$T3,0b01001110);
+         &psrldq       ($T1,8);                #       
+         &pxor         ($Xi,$T2);
+         &pxor         ($Xhi,$T1);             #
+       &pshufd         ($T1,$Xhn,0b01001110);
+         &movdqa       ($T2,$Xi);              # 2nd phase
+         &psrlq        ($Xi,1);
+       &pxor           ($T1,$Xhn);
          &pxor         ($Xhi,$T2);             #
-       &pxor           ($T1,$T3);
-       &pshufd         ($T3,$Hkey,0b01001110);
-       &pxor           ($T3,$Hkey);            #
-
        &pclmulqdq      ($Xhn,$Hkey,0x11);      #######
-         &movdqa       ($T2,$Xi);              # 2nd phase
+       &movups         ($Hkey,&QWP(16,$Htbl)); # load H^2
+         &pxor         ($T2,$Xi);
          &psrlq        ($Xi,5);
          &pxor         ($Xi,$T2);              #
          &psrlq        ($Xi,1);                #
-         &pxor         ($Xi,$T2);              #
-         &pxor         ($T2,$Xhi);
-         &psrlq        ($Xi,1);                #
-         &pxor         ($Xi,$T2);              #
-
+         &pxor         ($Xi,$Xhi)              #
        &pclmulqdq      ($T1,$T3,0x00);         #######
-       &movups         ($Hkey,&QWP(16,$Htbl)); # load H^2
-       &xorps          ($T1,$Xn);              #
-       &xorps          ($T1,$Xhn);             #
-
-       &movdqa         ($T3,$T1);              #
-       &psrldq         ($T1,8);
-       &pslldq         ($T3,8);                #
-       &pxor           ($Xhn,$T1);
-       &pxor           ($Xn,$T3);              #
-       &movdqa         ($T3,&QWP(0,$const));
 
        &lea            ($inp,&DWP(32,$inp));
        &sub            ($len,0x20);
        &ja             (&label("mod_loop"));
 
 &set_label("even_tail");
-       &clmul64x64_T2  ($Xhi,$Xi,$Hkey);       # H^2*(Ii+Xi)
+       &pshufd         ($T2,$Xi,0b01001110);   # H^2*(Ii+Xi)
+       &movdqa         ($Xhi,$Xi);
+       &pxor           ($T2,$Xi);              #
 
-       &pxor           ($Xi,$Xn);              # (H*Ii+1) + H^2*(Ii+Xi)
-       &pxor           ($Xhi,$Xhn);
+       &pclmulqdq      ($Xi,$Hkey,0x00);       #######
+       &pclmulqdq      ($Xhi,$Hkey,0x11);      #######
+       &pclmulqdq      ($T2,$T3,0x10);         #######
+       &movdqa         ($T3,&QWP(0,$const));
+
+       &xorps          ($Xi,$Xn);              # (H*Ii+1) + H^2*(Ii+Xi)
+       &xorps          ($Xhi,$Xhn);
+       &pxor           ($T1,$Xi);              # aggregated Karatsuba post-processing
+       &pxor           ($T1,$Xhi);             #
+
+       &pxor           ($T2,$T1);              #
+
+       &movdqa         ($T1,$T2);              #
+       &psrldq         ($T2,8);
+       &pslldq         ($T1,8);                #
+       &pxor           ($Xhi,$T2);
+       &pxor           ($Xi,$T1);              #
 
        &reduction_alg9 ($Xhi,$Xi);
 
@@ -1273,13 +1324,6 @@ my ($Xhi,$Xi)=@_;
 &set_label("bswap",64);
        &data_byte(15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0);
        &data_byte(1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2); # 0x1c2_polynomial
-}}     # $sse2
-
-&set_label("rem_4bit",64);
-       &data_word(0,0x0000<<$S,0,0x1C20<<$S,0,0x3840<<$S,0,0x2460<<$S);
-       &data_word(0,0x7080<<$S,0,0x6CA0<<$S,0,0x48C0<<$S,0,0x54E0<<$S);
-       &data_word(0,0xE100<<$S,0,0xFD20<<$S,0,0xD940<<$S,0,0xC560<<$S);
-       &data_word(0,0x9180<<$S,0,0x8DA0<<$S,0,0xA9C0<<$S,0,0xB5E0<<$S);
 &set_label("rem_8bit",64);
        &data_short(0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E);
        &data_short(0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E);
@@ -1313,6 +1357,13 @@ my ($Xhi,$Xi)=@_;
        &data_short(0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E);
        &data_short(0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE);
        &data_short(0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE);
+}}     # $sse2
+
+&set_label("rem_4bit",64);
+       &data_word(0,0x0000<<$S,0,0x1C20<<$S,0,0x3840<<$S,0,0x2460<<$S);
+       &data_word(0,0x7080<<$S,0,0x6CA0<<$S,0,0x48C0<<$S,0,0x54E0<<$S);
+       &data_word(0,0xE100<<$S,0,0xFD20<<$S,0,0xD940<<$S,0,0xC560<<$S);
+       &data_word(0,0x9180<<$S,0,0x8DA0<<$S,0,0xA9C0<<$S,0,0xB5E0<<$S);
 }}}    # !$x86only
 
 &asciz("GHASH for x86, CRYPTOGAMS by <appro\@openssl.org>");
index 38d779e..6e656ca 100644 (file)
@@ -22,6 +22,8 @@
 # P4           28.6            14.0            +100%
 # Opteron      19.3            7.7             +150%
 # Core2                17.8            8.1(**)         +120%
+# Atom         31.6            16.8            +88%
+# VIA Nano     21.8            10.1            +115%
 #
 # (*)  comparison is not completely fair, because C results are
 #      for vanilla "256B" implementation, while assembler results
 # providing access to a Westmere-based system on behalf of Intel
 # Open Source Technology Centre.
 
+# December 2012
+#
+# Overhaul: aggregate Karatsuba post-processing, improve ILP in
+# reduction_alg9, increase reduction aggregate factor to 4x. As for
+# the latter. ghash-x86.pl discusses that it makes lesser sense to
+# increase aggregate factor. Then why increase here? Critical path
+# consists of 3 independent pclmulqdq instructions, Karatsuba post-
+# processing and reduction. "On top" of this we lay down aggregated
+# multiplication operations, triplets of independent pclmulqdq's. As
+# issue rate for pclmulqdq is limited, it makes lesser sense to
+# aggregate more multiplications than it takes to perform remaining
+# non-multiplication operations. 2x is near-optimal coefficient for
+# contemporary Intel CPUs (therefore modest improvement coefficient),
+# but not for Bulldozer. Latter is because logical SIMD operations
+# are twice as slow in comparison to Intel, so that critical path is
+# longer. A CPU with higher pclmulqdq issue rate would also benefit
+# from higher aggregate factor...
+#
+# Westmere     1.78(+13%)
+# Sandy Bridge 1.80(+8%)
+# Ivy Bridge   1.80(+7%)
+# Haswell      0.55(+93%) (if system doesn't support AVX)
+# Broadwell    0.45(+110%)(if system doesn't support AVX)
+# Bulldozer    1.49(+27%)
+# Silvermont   2.88(+13%)
+
+# March 2013
+#
+# ... 8x aggregate factor AVX code path is using reduction algorithm
+# suggested by Shay Gueron[1]. Even though contemporary AVX-capable
+# CPUs such as Sandy and Ivy Bridge can execute it, the code performs
+# sub-optimally in comparison to above mentioned version. But thanks
+# to Ilya Albrekht and Max Locktyukhin of Intel Corp. we knew that
+# it performs in 0.41 cycles per byte on Haswell processor, and in
+# 0.29 on Broadwell.
+#
+# [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest
+
 $flavour = shift;
 $output  = shift;
 if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
@@ -50,9 +90,30 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
 die "can't locate x86_64-xlate.pl";
 
+if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
+               =~ /GNU assembler version ([2-9]\.[0-9]+)/) {
+       $avx = ($1>=2.19) + ($1>=2.22);
+}
+
+if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
+           `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) {
+       $avx = ($1>=2.09) + ($1>=2.10);
+}
+
+if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
+           `ml64 2>&1` =~ /Version ([0-9]+)\./) {
+       $avx = ($1>=10) + ($1>=11);
+}
+
+if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/) {
+       $avx = ($2>=3.0) + ($2>3.0);
+}
+
 open OUT,"| \"$^X\" $xlate $flavour $output";
 *STDOUT=*OUT;
 
+$do4xaggr=1;
+
 # common register layout
 $nlo="%rax";
 $nhi="%rbx";
@@ -160,6 +221,7 @@ ___
 
 $code=<<___;
 .text
+.extern        OPENSSL_ia32cap_P
 
 .globl gcm_gmult_4bit
 .type  gcm_gmult_4bit,\@function,2
@@ -352,19 +414,27 @@ ___
 ($T1,$T2,$T3)=("%xmm3","%xmm4","%xmm5");
 
 sub clmul64x64_T2 {    # minimal register pressure
-my ($Xhi,$Xi,$Hkey,$modulo)=@_;
+my ($Xhi,$Xi,$Hkey,$HK)=@_;
 
-$code.=<<___ if (!defined($modulo));
+if (!defined($HK)) {   $HK = $T2;
+$code.=<<___;
        movdqa          $Xi,$Xhi                #
        pshufd          \$0b01001110,$Xi,$T1
        pshufd          \$0b01001110,$Hkey,$T2
        pxor            $Xi,$T1                 #
        pxor            $Hkey,$T2
 ___
+} else {
+$code.=<<___;
+       movdqa          $Xi,$Xhi                #
+       pshufd          \$0b01001110,$Xi,$T1
+       pxor            $Xi,$T1                 #
+___
+}
 $code.=<<___;
        pclmulqdq       \$0x00,$Hkey,$Xi        #######
        pclmulqdq       \$0x11,$Hkey,$Xhi       #######
-       pclmulqdq       \$0x00,$T2,$T1          #######
+       pclmulqdq       \$0x00,$HK,$T1          #######
        pxor            $Xi,$T1                 #
        pxor            $Xhi,$T1                #
 
@@ -376,42 +446,53 @@ $code.=<<___;
 ___
 }
 
-sub reduction_alg9 {   # 17/13 times faster than Intel version
+sub reduction_alg9 {   # 17/11 times faster than Intel version
 my ($Xhi,$Xi) = @_;
 
 $code.=<<___;
        # 1st phase
-       movdqa          $Xi,$T1                 #
+       movdqa          $Xi,$T2                 #
+       movdqa          $Xi,$T1
+       psllq           \$5,$Xi
+       pxor            $Xi,$T1                 #
        psllq           \$1,$Xi
        pxor            $T1,$Xi                 #
-       psllq           \$5,$Xi                 #
-       pxor            $T1,$Xi                 #
        psllq           \$57,$Xi                #
-       movdqa          $Xi,$T2                 #
+       movdqa          $Xi,$T1                 #
        pslldq          \$8,$Xi
-       psrldq          \$8,$T2                 #       
-       pxor            $T1,$Xi
-       pxor            $T2,$Xhi                #
+       psrldq          \$8,$T1                 #       
+       pxor            $T2,$Xi
+       pxor            $T1,$Xhi                #
 
        # 2nd phase
        movdqa          $Xi,$T2
+       psrlq           \$1,$Xi
+       pxor            $T2,$Xhi                #
+       pxor            $Xi,$T2
        psrlq           \$5,$Xi
        pxor            $T2,$Xi                 #
        psrlq           \$1,$Xi                 #
-       pxor            $T2,$Xi                 #
-       pxor            $Xhi,$T2
-       psrlq           \$1,$Xi                 #
-       pxor            $T2,$Xi                 #
+       pxor            $Xhi,$Xi                #
 ___
 }
 \f
 { my ($Htbl,$Xip)=@_4args;
+  my $HK="%xmm6";
 
 $code.=<<___;
 .globl gcm_init_clmul
 .type  gcm_init_clmul,\@abi-omnipotent
 .align 16
 gcm_init_clmul:
+.L_init_clmul:
+___
+$code.=<<___ if ($win64);
+.LSEH_begin_gcm_init_clmul:
+       # I can't trust assembler to use specific encoding:-(
+       .byte   0x48,0x83,0xec,0x18             #sub    $0x18,%rsp
+       .byte   0x0f,0x29,0x34,0x24             #movaps %xmm6,(%rsp)
+___
+$code.=<<___;
        movdqu          ($Xip),$Hkey
        pshufd          \$0b01001110,$Hkey,$Hkey        # dword swap
 
@@ -430,13 +511,47 @@ gcm_init_clmul:
        pxor            $T3,$Hkey               # if(carry) H^=0x1c2_polynomial
 
        # calculate H^2
+       pshufd          \$0b01001110,$Hkey,$HK
        movdqa          $Hkey,$Xi
+       pxor            $Hkey,$HK
 ___
-       &clmul64x64_T2  ($Xhi,$Xi,$Hkey);
+       &clmul64x64_T2  ($Xhi,$Xi,$Hkey,$HK);
        &reduction_alg9 ($Xhi,$Xi);
 $code.=<<___;
-       movdqu          $Hkey,($Htbl)           # save H
-       movdqu          $Xi,16($Htbl)           # save H^2
+       pshufd          \$0b01001110,$Hkey,$T1
+       pshufd          \$0b01001110,$Xi,$T2
+       pxor            $Hkey,$T1               # Karatsuba pre-processing
+       movdqu          $Hkey,0x00($Htbl)       # save H
+       pxor            $Xi,$T2                 # Karatsuba pre-processing
+       movdqu          $Xi,0x10($Htbl)         # save H^2
+       palignr         \$8,$T1,$T2             # low part is H.lo^H.hi...
+       movdqu          $T2,0x20($Htbl)         # save Karatsuba "salt"
+___
+if ($do4xaggr) {
+       &clmul64x64_T2  ($Xhi,$Xi,$Hkey,$HK);   # H^3
+       &reduction_alg9 ($Xhi,$Xi);
+$code.=<<___;
+       movdqa          $Xi,$T3
+___
+       &clmul64x64_T2  ($Xhi,$Xi,$Hkey,$HK);   # H^4
+       &reduction_alg9 ($Xhi,$Xi);
+$code.=<<___;
+       pshufd          \$0b01001110,$T3,$T1
+       pshufd          \$0b01001110,$Xi,$T2
+       pxor            $T3,$T1                 # Karatsuba pre-processing
+       movdqu          $T3,0x30($Htbl)         # save H^3
+       pxor            $Xi,$T2                 # Karatsuba pre-processing
+       movdqu          $Xi,0x40($Htbl)         # save H^4
+       palignr         \$8,$T1,$T2             # low part is H^3.lo^H^3.hi...
+       movdqu          $T2,0x50($Htbl)         # save Karatsuba "salt"
+___
+}
+$code.=<<___ if ($win64);
+       movaps  (%rsp),%xmm6
+       lea     0x18(%rsp),%rsp
+.LSEH_end_gcm_init_clmul:
+___
+$code.=<<___;
        ret
 .size  gcm_init_clmul,.-gcm_init_clmul
 ___
@@ -449,13 +564,38 @@ $code.=<<___;
 .type  gcm_gmult_clmul,\@abi-omnipotent
 .align 16
 gcm_gmult_clmul:
+.L_gmult_clmul:
        movdqu          ($Xip),$Xi
        movdqa          .Lbswap_mask(%rip),$T3
        movdqu          ($Htbl),$Hkey
+       movdqu          0x20($Htbl),$T2
        pshufb          $T3,$Xi
 ___
-       &clmul64x64_T2  ($Xhi,$Xi,$Hkey);
-       &reduction_alg9 ($Xhi,$Xi);
+       &clmul64x64_T2  ($Xhi,$Xi,$Hkey,$T2);
+$code.=<<___ if (0 || (&reduction_alg9($Xhi,$Xi)&&0));
+       # experimental alternative. special thing about is that there
+       # no dependency between the two multiplications... 
+       mov             \$`0xE1<<1`,%eax
+       mov             \$0xA040608020C0E000,%r10       # ((7..0)·0xE0)&0xff
+       mov             \$0x07,%r11d
+       movq            %rax,$T1
+       movq            %r10,$T2
+       movq            %r11,$T3                # borrow $T3
+       pand            $Xi,$T3
+       pshufb          $T3,$T2                 # ($Xi&7)·0xE0
+       movq            %rax,$T3
+       pclmulqdq       \$0x00,$Xi,$T1          # Â·(0xE1<<1)
+       pxor            $Xi,$T2
+       pslldq          \$15,$T2
+       paddd           $T2,$T2                 # <<(64+56+1)
+       pxor            $T2,$Xi
+       pclmulqdq       \$0x01,$T3,$Xi
+       movdqa          .Lbswap_mask(%rip),$T3  # reload $T3
+       psrldq          \$1,$T1
+       pxor            $T1,$Xhi
+       pslldq          \$7,$Xi
+       pxor            $Xhi,$Xi
+___
 $code.=<<___;
        pshufb          $T3,$Xi
        movdqu          $Xi,($Xip)
@@ -465,129 +605,327 @@ ___
 }
 \f
 { my ($Xip,$Htbl,$inp,$len)=@_4args;
-  my $Xn="%xmm6";
-  my $Xhn="%xmm7";
-  my $Hkey2="%xmm8";
-  my $T1n="%xmm9";
-  my $T2n="%xmm10";
+  my ($Xln,$Xmn,$Xhn,$Hkey2,$HK) = map("%xmm$_",(3..7));
+  my ($T1,$T2,$T3)=map("%xmm$_",(8..10));
 
 $code.=<<___;
 .globl gcm_ghash_clmul
 .type  gcm_ghash_clmul,\@abi-omnipotent
-.align 16
+.align 32
 gcm_ghash_clmul:
+.L_ghash_clmul:
 ___
 $code.=<<___ if ($win64);
+       lea     -0x88(%rsp),%rax
 .LSEH_begin_gcm_ghash_clmul:
        # I can't trust assembler to use specific encoding:-(
-       .byte   0x48,0x83,0xec,0x58             #sub    \$0x58,%rsp
-       .byte   0x0f,0x29,0x34,0x24             #movaps %xmm6,(%rsp)
-       .byte   0x0f,0x29,0x7c,0x24,0x10        #movdqa %xmm7,0x10(%rsp)
-       .byte   0x44,0x0f,0x29,0x44,0x24,0x20   #movaps %xmm8,0x20(%rsp)
-       .byte   0x44,0x0f,0x29,0x4c,0x24,0x30   #movaps %xmm9,0x30(%rsp)
-       .byte   0x44,0x0f,0x29,0x54,0x24,0x40   #movaps %xmm10,0x40(%rsp)
+       .byte   0x48,0x8d,0x60,0xe0             #lea    -0x20(%rax),%rsp
+       .byte   0x0f,0x29,0x70,0xe0             #movaps %xmm6,-0x20(%rax)
+       .byte   0x0f,0x29,0x78,0xf0             #movaps %xmm7,-0x10(%rax)
+       .byte   0x44,0x0f,0x29,0x00             #movaps %xmm8,0(%rax)
+       .byte   0x44,0x0f,0x29,0x48,0x10        #movaps %xmm9,0x10(%rax)
+       .byte   0x44,0x0f,0x29,0x50,0x20        #movaps %xmm10,0x20(%rax)
+       .byte   0x44,0x0f,0x29,0x58,0x30        #movaps %xmm11,0x30(%rax)
+       .byte   0x44,0x0f,0x29,0x60,0x40        #movaps %xmm12,0x40(%rax)
+       .byte   0x44,0x0f,0x29,0x68,0x50        #movaps %xmm13,0x50(%rax)
+       .byte   0x44,0x0f,0x29,0x70,0x60        #movaps %xmm14,0x60(%rax)
+       .byte   0x44,0x0f,0x29,0x78,0x70        #movaps %xmm15,0x70(%rax)
 ___
 $code.=<<___;
        movdqa          .Lbswap_mask(%rip),$T3
 
        movdqu          ($Xip),$Xi
        movdqu          ($Htbl),$Hkey
+       movdqu          0x20($Htbl),$HK
        pshufb          $T3,$Xi
 
        sub             \$0x10,$len
        jz              .Lodd_tail
 
-       movdqu          16($Htbl),$Hkey2
+       movdqu          0x10($Htbl),$Hkey2
+___
+if ($do4xaggr) {
+my ($Xl,$Xm,$Xh,$Hkey3,$Hkey4)=map("%xmm$_",(11..15));
+
+$code.=<<___;
+       mov             OPENSSL_ia32cap_P+4(%rip),%eax
+       cmp             \$0x30,$len
+       jb              .Lskip4x
+
+       and             \$`1<<26|1<<22`,%eax    # isolate MOVBE+XSAVE
+       cmp             \$`1<<22`,%eax          # check for MOVBE without XSAVE
+       je              .Lskip4x
+
+       sub             \$0x30,$len
+       mov             \$0xA040608020C0E000,%rax       # ((7..0)·0xE0)&0xff
+       movdqu          0x30($Htbl),$Hkey3
+       movdqu          0x40($Htbl),$Hkey4
+
+       #######
+       # Xi+4 =[(H*Ii+3) + (H^2*Ii+2) + (H^3*Ii+1) + H^4*(Ii+Xi)] mod P
+       #
+       movdqu          0x30($inp),$Xln
+        movdqu         0x20($inp),$Xl
+       pshufb          $T3,$Xln
+        pshufb         $T3,$Xl
+       movdqa          $Xln,$Xhn
+       pshufd          \$0b01001110,$Xln,$Xmn
+       pxor            $Xln,$Xmn
+       pclmulqdq       \$0x00,$Hkey,$Xln
+       pclmulqdq       \$0x11,$Hkey,$Xhn
+       pclmulqdq       \$0x00,$HK,$Xmn
+
+       movdqa          $Xl,$Xh
+       pshufd          \$0b01001110,$Xl,$Xm
+       pxor            $Xl,$Xm
+       pclmulqdq       \$0x00,$Hkey2,$Xl
+       pclmulqdq       \$0x11,$Hkey2,$Xh
+       pclmulqdq       \$0x10,$HK,$Xm
+       xorps           $Xl,$Xln
+       xorps           $Xh,$Xhn
+       movups          0x50($Htbl),$HK
+       xorps           $Xm,$Xmn
+
+       movdqu          0x10($inp),$Xl
+        movdqu         0($inp),$T1
+       pshufb          $T3,$Xl
+        pshufb         $T3,$T1
+       movdqa          $Xl,$Xh
+       pshufd          \$0b01001110,$Xl,$Xm
+        pxor           $T1,$Xi
+       pxor            $Xl,$Xm
+       pclmulqdq       \$0x00,$Hkey3,$Xl
+        movdqa         $Xi,$Xhi
+        pshufd         \$0b01001110,$Xi,$T1
+        pxor           $Xi,$T1
+       pclmulqdq       \$0x11,$Hkey3,$Xh
+       pclmulqdq       \$0x00,$HK,$Xm
+       xorps           $Xl,$Xln
+       xorps           $Xh,$Xhn
+
+       lea     0x40($inp),$inp
+       sub     \$0x40,$len
+       jc      .Ltail4x
+
+       jmp     .Lmod4_loop
+.align 32
+.Lmod4_loop:
+       pclmulqdq       \$0x00,$Hkey4,$Xi
+       xorps           $Xm,$Xmn
+        movdqu         0x30($inp),$Xl
+        pshufb         $T3,$Xl
+       pclmulqdq       \$0x11,$Hkey4,$Xhi
+       xorps           $Xln,$Xi
+        movdqu         0x20($inp),$Xln
+        movdqa         $Xl,$Xh
+       pclmulqdq       \$0x10,$HK,$T1
+        pshufd         \$0b01001110,$Xl,$Xm
+       xorps           $Xhn,$Xhi
+        pxor           $Xl,$Xm
+        pshufb         $T3,$Xln
+       movups          0x20($Htbl),$HK
+       xorps           $Xmn,$T1
+        pclmulqdq      \$0x00,$Hkey,$Xl
+        pshufd         \$0b01001110,$Xln,$Xmn
+
+       pxor            $Xi,$T1                 # aggregated Karatsuba post-processing
+        movdqa         $Xln,$Xhn
+       pxor            $Xhi,$T1                #
+        pxor           $Xln,$Xmn
+       movdqa          $T1,$T2                 #
+        pclmulqdq      \$0x11,$Hkey,$Xh
+       pslldq          \$8,$T1
+       psrldq          \$8,$T2                 #
+       pxor            $T1,$Xi
+       movdqa          .L7_mask(%rip),$T1
+       pxor            $T2,$Xhi                #
+       movq            %rax,$T2
+
+       pand            $Xi,$T1                 # 1st phase
+       pshufb          $T1,$T2                 #
+       pxor            $Xi,$T2                 #
+        pclmulqdq      \$0x00,$HK,$Xm
+       psllq           \$57,$T2                #
+       movdqa          $T2,$T1                 #
+       pslldq          \$8,$T2
+        pclmulqdq      \$0x00,$Hkey2,$Xln
+       psrldq          \$8,$T1                 #       
+       pxor            $T2,$Xi
+       pxor            $T1,$Xhi                #
+       movdqu          0($inp),$T1
+
+       movdqa          $Xi,$T2                 # 2nd phase
+       psrlq           \$1,$Xi
+        pclmulqdq      \$0x11,$Hkey2,$Xhn
+        xorps          $Xl,$Xln
+        movdqu         0x10($inp),$Xl
+        pshufb         $T3,$Xl
+        pclmulqdq      \$0x10,$HK,$Xmn
+        xorps          $Xh,$Xhn
+        movups         0x50($Htbl),$HK
+       pshufb          $T3,$T1
+       pxor            $T2,$Xhi                #
+       pxor            $Xi,$T2
+       psrlq           \$5,$Xi
+
+        movdqa         $Xl,$Xh
+        pxor           $Xm,$Xmn
+        pshufd         \$0b01001110,$Xl,$Xm
+       pxor            $T2,$Xi                 #
+       pxor            $T1,$Xhi
+        pxor           $Xl,$Xm
+        pclmulqdq      \$0x00,$Hkey3,$Xl
+       psrlq           \$1,$Xi                 #
+       pxor            $Xhi,$Xi                #
+       movdqa          $Xi,$Xhi
+        pclmulqdq      \$0x11,$Hkey3,$Xh
+        xorps          $Xl,$Xln
+       pshufd          \$0b01001110,$Xi,$T1
+       pxor            $Xi,$T1
+
+        pclmulqdq      \$0x00,$HK,$Xm
+        xorps          $Xh,$Xhn
+
+       lea     0x40($inp),$inp
+       sub     \$0x40,$len
+       jnc     .Lmod4_loop
+
+.Ltail4x:
+       pclmulqdq       \$0x00,$Hkey4,$Xi
+       pclmulqdq       \$0x11,$Hkey4,$Xhi
+       pclmulqdq       \$0x10,$HK,$T1
+       xorps           $Xm,$Xmn
+       xorps           $Xln,$Xi
+       xorps           $Xhn,$Xhi
+       pxor            $Xi,$Xhi                # aggregated Karatsuba post-processing
+       pxor            $Xmn,$T1
+
+       pxor            $Xhi,$T1                #
+       pxor            $Xi,$Xhi
+
+       movdqa          $T1,$T2                 #
+       psrldq          \$8,$T1
+       pslldq          \$8,$T2                 #
+       pxor            $T1,$Xhi
+       pxor            $T2,$Xi                 #
+___
+       &reduction_alg9($Xhi,$Xi);
+$code.=<<___;
+       add     \$0x40,$len
+       jz      .Ldone
+       movdqu  0x20($Htbl),$HK
+       sub     \$0x10,$len
+       jz      .Lodd_tail
+.Lskip4x:
+___
+}
+$code.=<<___;
        #######
        # Xi+2 =[H*(Ii+1 + Xi+1)] mod P =
        #       [(H*Ii+1) + (H*Xi+1)] mod P =
        #       [(H*Ii+1) + H^2*(Ii+Xi)] mod P
        #
        movdqu          ($inp),$T1              # Ii
-       movdqu          16($inp),$X           # Ii+1
+       movdqu          16($inp),$Xln           # Ii+1
        pshufb          $T3,$T1
-       pshufb          $T3,$Xn
+       pshufb          $T3,$Xln
        pxor            $T1,$Xi                 # Ii+Xi
-___
-       &clmul64x64_T2  ($Xhn,$Xn,$Hkey);       # H*Ii+1
-$code.=<<___;
-       movdqa          $Xi,$Xhi                #
-       pshufd          \$0b01001110,$Xi,$T1
-       pshufd          \$0b01001110,$Hkey2,$T2
-       pxor            $Xi,$T1                 #
-       pxor            $Hkey2,$T2
+
+       movdqa          $Xln,$Xhn
+       pshufd          \$0b01001110,$Xln,$Xmn
+       pxor            $Xln,$Xmn
+       pclmulqdq       \$0x00,$Hkey,$Xln
+       pclmulqdq       \$0x11,$Hkey,$Xhn
+       pclmulqdq       \$0x00,$HK,$Xmn
 
        lea             32($inp),$inp           # i+=2
+       nop
        sub             \$0x20,$len
        jbe             .Leven_tail
+       nop
+       jmp             .Lmod_loop
 
+.align 32
 .Lmod_loop:
-___
-       &clmul64x64_T2  ($Xhi,$Xi,$Hkey2,1);    # H^2*(Ii+Xi)
-$code.=<<___;
-       movdqu          ($inp),$T1              # Ii
-       pxor            $Xn,$Xi                 # (H*Ii+1) + H^2*(Ii+Xi)
-       pxor            $Xhn,$Xhi
+       movdqa          $Xi,$Xhi
+       movdqa          $Xmn,$T1
+       pshufd          \$0b01001110,$Xi,$Xmn   #
+       pxor            $Xi,$Xmn                #
 
-       movdqu          16($inp),$Xn            # Ii+1
-       pshufb          $T3,$T1
-       pshufb          $T3,$Xn
+       pclmulqdq       \$0x00,$Hkey2,$Xi
+       pclmulqdq       \$0x11,$Hkey2,$Xhi
+       pclmulqdq       \$0x10,$HK,$Xmn
 
-       movdqa          $Xn,$Xhn                #
-       pshufd          \$0b01001110,$Xn,$T1n
-       pshufd          \$0b01001110,$Hkey,$T2n
-       pxor            $Xn,$T1n                #
-       pxor            $Hkey,$T2n
-        pxor           $T1,$Xhi                # "Ii+Xi", consume early
+       pxor            $Xln,$Xi                # (H*Ii+1) + H^2*(Ii+Xi)
+       pxor            $Xhn,$Xhi
+         movdqu        ($inp),$T2              # Ii
+       pxor            $Xi,$T1                 # aggregated Karatsuba post-processing
+         pshufb        $T3,$T2
+         movdqu        16($inp),$Xln           # Ii+1
+
+       pxor            $Xhi,$T1
+         pxor          $T2,$Xhi                # "Ii+Xi", consume early
+       pxor            $T1,$Xmn
+        pshufb         $T3,$Xln
+       movdqa          $Xmn,$T1                #
+       psrldq          \$8,$T1
+       pslldq          \$8,$Xmn                #
+       pxor            $T1,$Xhi
+       pxor            $Xmn,$Xi                #
+
+       movdqa          $Xln,$Xhn               #
 
-         movdqa        $Xi,$T1                 # 1st phase
+         movdqa        $Xi,$T2                 # 1st phase
+         movdqa        $Xi,$T1
+         psllq         \$5,$Xi
+         pxor          $Xi,$T1                 #
+       pclmulqdq       \$0x00,$Hkey,$Xln       #######
          psllq         \$1,$Xi
          pxor          $T1,$Xi                 #
-         psllq         \$5,$Xi                 #
-         pxor          $T1,$Xi                 #
-       pclmulqdq       \$0x00,$Hkey,$Xn        #######
          psllq         \$57,$Xi                #
-         movdqa        $Xi,$T2                 #
+         movdqa        $Xi,$T1                 #
          pslldq        \$8,$Xi
-         psrldq        \$8,$T2                 #       
-         pxor          $T1,$Xi
-         pxor          $T2,$Xhi                #
+         psrldq        \$8,$T1                 #       
+         pxor          $T2,$Xi
+       pshufd          \$0b01001110,$Xhn,$Xmn
+         pxor          $T1,$Xhi                #
+       pxor            $Xhn,$Xmn               #
 
-       pclmulqdq       \$0x11,$Hkey,$Xhn       #######
          movdqa        $Xi,$T2                 # 2nd phase
+         psrlq         \$1,$Xi
+       pclmulqdq       \$0x11,$Hkey,$Xhn       #######
+         pxor          $T2,$Xhi                #
+         pxor          $Xi,$T2
          psrlq         \$5,$Xi
          pxor          $T2,$Xi                 #
+       lea             32($inp),$inp
          psrlq         \$1,$Xi                 #
-         pxor          $T2,$Xi                 #
-         pxor          $Xhi,$T2
-         psrlq         \$1,$Xi                 #
-         pxor          $T2,$Xi                 #
+       pclmulqdq       \$0x00,$HK,$Xmn         #######
+         pxor          $Xhi,$Xi                #
 
-       pclmulqdq       \$0x00,$T2n,$T1n        #######
-        movdqa         $Xi,$Xhi                #
-        pshufd         \$0b01001110,$Xi,$T1
-        pshufd         \$0b01001110,$Hkey2,$T2
-        pxor           $Xi,$T1                 #
-        pxor           $Hkey2,$T2
-
-       pxor            $Xn,$T1n                #
-       pxor            $Xhn,$T1n               #
-       movdqa          $T1n,$T2n               #
-       psrldq          \$8,$T1n
-       pslldq          \$8,$T2n                #
-       pxor            $T1n,$Xhn
-       pxor            $T2n,$Xn                #
-
-       lea             32($inp),$inp
        sub             \$0x20,$len
        ja              .Lmod_loop
 
 .Leven_tail:
-___
-       &clmul64x64_T2  ($Xhi,$Xi,$Hkey2,1);    # H^2*(Ii+Xi)
-$code.=<<___;
-       pxor            $Xn,$Xi                 # (H*Ii+1) + H^2*(Ii+Xi)
+        movdqa         $Xi,$Xhi
+        movdqa         $Xmn,$T1
+        pshufd         \$0b01001110,$Xi,$Xmn   #
+        pxor           $Xi,$Xmn                #
+
+       pclmulqdq       \$0x00,$Hkey2,$Xi
+       pclmulqdq       \$0x11,$Hkey2,$Xhi
+       pclmulqdq       \$0x10,$HK,$Xmn
+
+       pxor            $Xln,$Xi                # (H*Ii+1) + H^2*(Ii+Xi)
        pxor            $Xhn,$Xhi
+       pxor            $Xi,$T1
+       pxor            $Xhi,$T1
+       pxor            $T1,$Xmn
+       movdqa          $Xmn,$T1                #
+       psrldq          \$8,$T1
+       pslldq          \$8,$Xmn                #
+       pxor            $T1,$Xhi
+       pxor            $Xmn,$Xi                #
 ___
        &reduction_alg9 ($Xhi,$Xi);
 $code.=<<___;
@@ -599,7 +937,7 @@ $code.=<<___;
        pshufb          $T3,$T1
        pxor            $T1,$Xi                 # Ii+Xi
 ___
-       &clmul64x64_T2  ($Xhi,$Xi,$Hkey);       # H*(Ii+Xi)
+       &clmul64x64_T2  ($Xhi,$Xi,$Hkey,$HK);   # H*(Ii+Xi)
        &reduction_alg9 ($Xhi,$Xi);
 $code.=<<___;
 .Ldone:
@@ -612,21 +950,607 @@ $code.=<<___ if ($win64);
        movaps  0x20(%rsp),%xmm8
        movaps  0x30(%rsp),%xmm9
        movaps  0x40(%rsp),%xmm10
-       add     \$0x58,%rsp
+       movaps  0x50(%rsp),%xmm11
+       movaps  0x60(%rsp),%xmm12
+       movaps  0x70(%rsp),%xmm13
+       movaps  0x80(%rsp),%xmm14
+       movaps  0x90(%rsp),%xmm15
+       lea     0xa8(%rsp),%rsp
+.LSEH_end_gcm_ghash_clmul:
 ___
 $code.=<<___;
        ret
-.LSEH_end_gcm_ghash_clmul:
 .size  gcm_ghash_clmul,.-gcm_ghash_clmul
 ___
 }
+\f
+$code.=<<___;
+.globl gcm_init_avx
+.type  gcm_init_avx,\@abi-omnipotent
+.align 32
+gcm_init_avx:
+___
+if ($avx) {
+my ($Htbl,$Xip)=@_4args;
+my $HK="%xmm6";
+
+$code.=<<___ if ($win64);
+.LSEH_begin_gcm_init_avx:
+       # I can't trust assembler to use specific encoding:-(
+       .byte   0x48,0x83,0xec,0x18             #sub    $0x18,%rsp
+       .byte   0x0f,0x29,0x34,0x24             #movaps %xmm6,(%rsp)
+___
+$code.=<<___;
+       vzeroupper
+
+       vmovdqu         ($Xip),$Hkey
+       vpshufd         \$0b01001110,$Hkey,$Hkey        # dword swap
+
+       # <<1 twist
+       vpshufd         \$0b11111111,$Hkey,$T2  # broadcast uppermost dword
+       vpsrlq          \$63,$Hkey,$T1
+       vpsllq          \$1,$Hkey,$Hkey
+       vpxor           $T3,$T3,$T3             #
+       vpcmpgtd        $T2,$T3,$T3             # broadcast carry bit
+       vpslldq         \$8,$T1,$T1
+       vpor            $T1,$Hkey,$Hkey         # H<<=1
+
+       # magic reduction
+       vpand           .L0x1c2_polynomial(%rip),$T3,$T3
+       vpxor           $T3,$Hkey,$Hkey         # if(carry) H^=0x1c2_polynomial
+
+       vpunpckhqdq     $Hkey,$Hkey,$HK
+       vmovdqa         $Hkey,$Xi
+       vpxor           $Hkey,$HK,$HK
+       mov             \$4,%r10                # up to H^8
+       jmp             .Linit_start_avx
+___
+
+sub clmul64x64_avx {
+my ($Xhi,$Xi,$Hkey,$HK)=@_;
+
+if (!defined($HK)) {   $HK = $T2;
+$code.=<<___;
+       vpunpckhqdq     $Xi,$Xi,$T1
+       vpunpckhqdq     $Hkey,$Hkey,$T2
+       vpxor           $Xi,$T1,$T1             #
+       vpxor           $Hkey,$T2,$T2
+___
+} else {
+$code.=<<___;
+       vpunpckhqdq     $Xi,$Xi,$T1
+       vpxor           $Xi,$T1,$T1             #
+___
+}
+$code.=<<___;
+       vpclmulqdq      \$0x11,$Hkey,$Xi,$Xhi   #######
+       vpclmulqdq      \$0x00,$Hkey,$Xi,$Xi    #######
+       vpclmulqdq      \$0x00,$HK,$T1,$T1      #######
+       vpxor           $Xi,$Xhi,$T2            #
+       vpxor           $T2,$T1,$T1             #
+
+       vpslldq         \$8,$T1,$T2             #
+       vpsrldq         \$8,$T1,$T1
+       vpxor           $T2,$Xi,$Xi             #
+       vpxor           $T1,$Xhi,$Xhi
+___
+}
+
+sub reduction_avx {
+my ($Xhi,$Xi) = @_;
+
+$code.=<<___;
+       vpsllq          \$57,$Xi,$T1            # 1st phase
+       vpsllq          \$62,$Xi,$T2
+       vpxor           $T1,$T2,$T2             #
+       vpsllq          \$63,$Xi,$T1
+       vpxor           $T1,$T2,$T2             #
+       vpslldq         \$8,$T2,$T1             #
+       vpsrldq         \$8,$T2,$T2
+       vpxor           $T1,$Xi,$Xi             #
+       vpxor           $T2,$Xhi,$Xhi
+
+       vpsrlq          \$1,$Xi,$T2             # 2nd phase
+       vpxor           $Xi,$Xhi,$Xhi
+       vpxor           $T2,$Xi,$Xi             #
+       vpsrlq          \$5,$T2,$T2
+       vpxor           $T2,$Xi,$Xi             #
+       vpsrlq          \$1,$Xi,$Xi             #
+       vpxor           $Xhi,$Xi,$Xi            #
+___
+}
 
 $code.=<<___;
+.align 32
+.Linit_loop_avx:
+       vpalignr        \$8,$T1,$T2,$T3         # low part is H.lo^H.hi...
+       vmovdqu         $T3,-0x10($Htbl)        # save Karatsuba "salt"
+___
+       &clmul64x64_avx ($Xhi,$Xi,$Hkey,$HK);   # calculate H^3,5,7
+       &reduction_avx  ($Xhi,$Xi);
+$code.=<<___;
+.Linit_start_avx:
+       vmovdqa         $Xi,$T3
+___
+       &clmul64x64_avx ($Xhi,$Xi,$Hkey,$HK);   # calculate H^2,4,6,8
+       &reduction_avx  ($Xhi,$Xi);
+$code.=<<___;
+       vpshufd         \$0b01001110,$T3,$T1
+       vpshufd         \$0b01001110,$Xi,$T2
+       vpxor           $T3,$T1,$T1             # Karatsuba pre-processing
+       vmovdqu         $T3,0x00($Htbl)         # save H^1,3,5,7
+       vpxor           $Xi,$T2,$T2             # Karatsuba pre-processing
+       vmovdqu         $Xi,0x10($Htbl)         # save H^2,4,6,8
+       lea             0x30($Htbl),$Htbl
+       sub             \$1,%r10
+       jnz             .Linit_loop_avx
+
+       vpalignr        \$8,$T2,$T1,$T3         # last "salt" is flipped
+       vmovdqu         $T3,-0x10($Htbl)
+
+       vzeroupper
+___
+$code.=<<___ if ($win64);
+       movaps  (%rsp),%xmm6
+       lea     0x18(%rsp),%rsp
+.LSEH_end_gcm_init_avx:
+___
+$code.=<<___;
+       ret
+.size  gcm_init_avx,.-gcm_init_avx
+___
+} else {
+$code.=<<___;
+       jmp     .L_init_clmul
+.size  gcm_init_avx,.-gcm_init_avx
+___
+}
+
+$code.=<<___;
+.globl gcm_gmult_avx
+.type  gcm_gmult_avx,\@abi-omnipotent
+.align 32
+gcm_gmult_avx:
+       jmp     .L_gmult_clmul
+.size  gcm_gmult_avx,.-gcm_gmult_avx
+___
+\f
+$code.=<<___;
+.globl gcm_ghash_avx
+.type  gcm_ghash_avx,\@abi-omnipotent
+.align 32
+gcm_ghash_avx:
+___
+if ($avx) {
+my ($Xip,$Htbl,$inp,$len)=@_4args;
+my ($Xlo,$Xhi,$Xmi,
+    $Zlo,$Zhi,$Zmi,
+    $Hkey,$HK,$T1,$T2,
+    $Xi,$Xo,$Tred,$bswap,$Ii,$Ij) = map("%xmm$_",(0..15));
+
+$code.=<<___ if ($win64);
+       lea     -0x88(%rsp),%rax
+.LSEH_begin_gcm_ghash_avx:
+       # I can't trust assembler to use specific encoding:-(
+       .byte   0x48,0x8d,0x60,0xe0             #lea    -0x20(%rax),%rsp
+       .byte   0x0f,0x29,0x70,0xe0             #movaps %xmm6,-0x20(%rax)
+       .byte   0x0f,0x29,0x78,0xf0             #movaps %xmm7,-0x10(%rax)
+       .byte   0x44,0x0f,0x29,0x00             #movaps %xmm8,0(%rax)
+       .byte   0x44,0x0f,0x29,0x48,0x10        #movaps %xmm9,0x10(%rax)
+       .byte   0x44,0x0f,0x29,0x50,0x20        #movaps %xmm10,0x20(%rax)
+       .byte   0x44,0x0f,0x29,0x58,0x30        #movaps %xmm11,0x30(%rax)
+       .byte   0x44,0x0f,0x29,0x60,0x40        #movaps %xmm12,0x40(%rax)
+       .byte   0x44,0x0f,0x29,0x68,0x50        #movaps %xmm13,0x50(%rax)
+       .byte   0x44,0x0f,0x29,0x70,0x60        #movaps %xmm14,0x60(%rax)
+       .byte   0x44,0x0f,0x29,0x78,0x70        #movaps %xmm15,0x70(%rax)
+___
+$code.=<<___;
+       vzeroupper
+
+       vmovdqu         ($Xip),$Xi              # load $Xi
+       lea             .L0x1c2_polynomial(%rip),%r10
+       lea             0x40($Htbl),$Htbl       # size optimization
+       vmovdqu         .Lbswap_mask(%rip),$bswap
+       vpshufb         $bswap,$Xi,$Xi
+       cmp             \$0x80,$len
+       jb              .Lshort_avx
+       sub             \$0x80,$len
+
+       vmovdqu         0x70($inp),$Ii          # I[7]
+       vmovdqu         0x00-0x40($Htbl),$Hkey  # $Hkey^1
+       vpshufb         $bswap,$Ii,$Ii
+       vmovdqu         0x20-0x40($Htbl),$HK
+
+       vpunpckhqdq     $Ii,$Ii,$T2
+        vmovdqu        0x60($inp),$Ij          # I[6]
+       vpclmulqdq      \$0x00,$Hkey,$Ii,$Xlo
+       vpxor           $Ii,$T2,$T2
+        vpshufb        $bswap,$Ij,$Ij
+       vpclmulqdq      \$0x11,$Hkey,$Ii,$Xhi
+        vmovdqu        0x10-0x40($Htbl),$Hkey  # $Hkey^2
+        vpunpckhqdq    $Ij,$Ij,$T1
+        vmovdqu        0x50($inp),$Ii          # I[5]
+       vpclmulqdq      \$0x00,$HK,$T2,$Xmi
+        vpxor          $Ij,$T1,$T1
+
+        vpshufb        $bswap,$Ii,$Ii
+       vpclmulqdq      \$0x00,$Hkey,$Ij,$Zlo
+        vpunpckhqdq    $Ii,$Ii,$T2
+       vpclmulqdq      \$0x11,$Hkey,$Ij,$Zhi
+        vmovdqu        0x30-0x40($Htbl),$Hkey  # $Hkey^3
+        vpxor          $Ii,$T2,$T2
+        vmovdqu        0x40($inp),$Ij          # I[4]
+       vpclmulqdq      \$0x10,$HK,$T1,$Zmi
+        vmovdqu        0x50-0x40($Htbl),$HK
+
+        vpshufb        $bswap,$Ij,$Ij
+       vpxor           $Xlo,$Zlo,$Zlo
+       vpclmulqdq      \$0x00,$Hkey,$Ii,$Xlo
+       vpxor           $Xhi,$Zhi,$Zhi
+        vpunpckhqdq    $Ij,$Ij,$T1
+       vpclmulqdq      \$0x11,$Hkey,$Ii,$Xhi
+        vmovdqu        0x40-0x40($Htbl),$Hkey  # $Hkey^4
+       vpxor           $Xmi,$Zmi,$Zmi
+       vpclmulqdq      \$0x00,$HK,$T2,$Xmi
+        vpxor          $Ij,$T1,$T1
+
+        vmovdqu        0x30($inp),$Ii          # I[3]
+       vpxor           $Zlo,$Xlo,$Xlo
+       vpclmulqdq      \$0x00,$Hkey,$Ij,$Zlo
+       vpxor           $Zhi,$Xhi,$Xhi
+        vpshufb        $bswap,$Ii,$Ii
+       vpclmulqdq      \$0x11,$Hkey,$Ij,$Zhi
+        vmovdqu        0x60-0x40($Htbl),$Hkey  # $Hkey^5
+       vpxor           $Zmi,$Xmi,$Xmi
+        vpunpckhqdq    $Ii,$Ii,$T2
+       vpclmulqdq      \$0x10,$HK,$T1,$Zmi
+        vmovdqu        0x80-0x40($Htbl),$HK
+        vpxor          $Ii,$T2,$T2
+
+        vmovdqu        0x20($inp),$Ij          # I[2]
+       vpxor           $Xlo,$Zlo,$Zlo
+       vpclmulqdq      \$0x00,$Hkey,$Ii,$Xlo
+       vpxor           $Xhi,$Zhi,$Zhi
+        vpshufb        $bswap,$Ij,$Ij
+       vpclmulqdq      \$0x11,$Hkey,$Ii,$Xhi
+        vmovdqu        0x70-0x40($Htbl),$Hkey  # $Hkey^6
+       vpxor           $Xmi,$Zmi,$Zmi
+        vpunpckhqdq    $Ij,$Ij,$T1
+       vpclmulqdq      \$0x00,$HK,$T2,$Xmi
+        vpxor          $Ij,$T1,$T1
+
+        vmovdqu        0x10($inp),$Ii          # I[1]
+       vpxor           $Zlo,$Xlo,$Xlo
+       vpclmulqdq      \$0x00,$Hkey,$Ij,$Zlo
+       vpxor           $Zhi,$Xhi,$Xhi
+        vpshufb        $bswap,$Ii,$Ii
+       vpclmulqdq      \$0x11,$Hkey,$Ij,$Zhi
+        vmovdqu        0x90-0x40($Htbl),$Hkey  # $Hkey^7
+       vpxor           $Zmi,$Xmi,$Xmi
+        vpunpckhqdq    $Ii,$Ii,$T2
+       vpclmulqdq      \$0x10,$HK,$T1,$Zmi
+        vmovdqu        0xb0-0x40($Htbl),$HK
+        vpxor          $Ii,$T2,$T2
+
+        vmovdqu        ($inp),$Ij              # I[0]
+       vpxor           $Xlo,$Zlo,$Zlo
+       vpclmulqdq      \$0x00,$Hkey,$Ii,$Xlo
+       vpxor           $Xhi,$Zhi,$Zhi
+        vpshufb        $bswap,$Ij,$Ij
+       vpclmulqdq      \$0x11,$Hkey,$Ii,$Xhi
+        vmovdqu        0xa0-0x40($Htbl),$Hkey  # $Hkey^8
+       vpxor           $Xmi,$Zmi,$Zmi
+       vpclmulqdq      \$0x10,$HK,$T2,$Xmi
+
+       lea             0x80($inp),$inp
+       cmp             \$0x80,$len
+       jb              .Ltail_avx
+
+       vpxor           $Xi,$Ij,$Ij             # accumulate $Xi
+       sub             \$0x80,$len
+       jmp             .Loop8x_avx
+
+.align 32
+.Loop8x_avx:
+       vpunpckhqdq     $Ij,$Ij,$T1
+        vmovdqu        0x70($inp),$Ii          # I[7]
+       vpxor           $Xlo,$Zlo,$Zlo
+       vpxor           $Ij,$T1,$T1
+       vpclmulqdq      \$0x00,$Hkey,$Ij,$Xi
+        vpshufb        $bswap,$Ii,$Ii
+       vpxor           $Xhi,$Zhi,$Zhi
+       vpclmulqdq      \$0x11,$Hkey,$Ij,$Xo
+        vmovdqu        0x00-0x40($Htbl),$Hkey  # $Hkey^1
+        vpunpckhqdq    $Ii,$Ii,$T2
+       vpxor           $Xmi,$Zmi,$Zmi
+       vpclmulqdq      \$0x00,$HK,$T1,$Tred
+        vmovdqu        0x20-0x40($Htbl),$HK
+        vpxor          $Ii,$T2,$T2
+
+         vmovdqu       0x60($inp),$Ij          # I[6]
+        vpclmulqdq     \$0x00,$Hkey,$Ii,$Xlo
+       vpxor           $Zlo,$Xi,$Xi            # collect result
+         vpshufb       $bswap,$Ij,$Ij
+        vpclmulqdq     \$0x11,$Hkey,$Ii,$Xhi
+       vxorps          $Zhi,$Xo,$Xo
+         vmovdqu       0x10-0x40($Htbl),$Hkey  # $Hkey^2
+        vpunpckhqdq    $Ij,$Ij,$T1
+        vpclmulqdq     \$0x00,$HK,  $T2,$Xmi
+       vpxor           $Zmi,$Tred,$Tred
+        vxorps         $Ij,$T1,$T1
+
+         vmovdqu       0x50($inp),$Ii          # I[5]
+       vpxor           $Xi,$Tred,$Tred         # aggregated Karatsuba post-processing
+        vpclmulqdq     \$0x00,$Hkey,$Ij,$Zlo
+       vpxor           $Xo,$Tred,$Tred
+       vpslldq         \$8,$Tred,$T2
+        vpxor          $Xlo,$Zlo,$Zlo
+        vpclmulqdq     \$0x11,$Hkey,$Ij,$Zhi
+       vpsrldq         \$8,$Tred,$Tred
+       vpxor           $T2, $Xi, $Xi
+         vmovdqu       0x30-0x40($Htbl),$Hkey  # $Hkey^3
+         vpshufb       $bswap,$Ii,$Ii
+       vxorps          $Tred,$Xo, $Xo
+        vpxor          $Xhi,$Zhi,$Zhi
+        vpunpckhqdq    $Ii,$Ii,$T2
+        vpclmulqdq     \$0x10,$HK,  $T1,$Zmi
+         vmovdqu       0x50-0x40($Htbl),$HK
+        vpxor          $Ii,$T2,$T2
+        vpxor          $Xmi,$Zmi,$Zmi
+
+         vmovdqu       0x40($inp),$Ij          # I[4]
+       vpalignr        \$8,$Xi,$Xi,$Tred       # 1st phase
+        vpclmulqdq     \$0x00,$Hkey,$Ii,$Xlo
+         vpshufb       $bswap,$Ij,$Ij
+        vpxor          $Zlo,$Xlo,$Xlo
+        vpclmulqdq     \$0x11,$Hkey,$Ii,$Xhi
+         vmovdqu       0x40-0x40($Htbl),$Hkey  # $Hkey^4
+        vpunpckhqdq    $Ij,$Ij,$T1
+        vpxor          $Zhi,$Xhi,$Xhi
+        vpclmulqdq     \$0x00,$HK,  $T2,$Xmi
+        vxorps         $Ij,$T1,$T1
+        vpxor          $Zmi,$Xmi,$Xmi
+
+         vmovdqu       0x30($inp),$Ii          # I[3]
+       vpclmulqdq      \$0x10,(%r10),$Xi,$Xi
+        vpclmulqdq     \$0x00,$Hkey,$Ij,$Zlo
+         vpshufb       $bswap,$Ii,$Ii
+        vpxor          $Xlo,$Zlo,$Zlo
+        vpclmulqdq     \$0x11,$Hkey,$Ij,$Zhi
+         vmovdqu       0x60-0x40($Htbl),$Hkey  # $Hkey^5
+        vpunpckhqdq    $Ii,$Ii,$T2
+        vpxor          $Xhi,$Zhi,$Zhi
+        vpclmulqdq     \$0x10,$HK,  $T1,$Zmi
+         vmovdqu       0x80-0x40($Htbl),$HK
+        vpxor          $Ii,$T2,$T2
+        vpxor          $Xmi,$Zmi,$Zmi
+
+         vmovdqu       0x20($inp),$Ij          # I[2]
+        vpclmulqdq     \$0x00,$Hkey,$Ii,$Xlo
+         vpshufb       $bswap,$Ij,$Ij
+        vpxor          $Zlo,$Xlo,$Xlo
+        vpclmulqdq     \$0x11,$Hkey,$Ii,$Xhi
+         vmovdqu       0x70-0x40($Htbl),$Hkey  # $Hkey^6
+        vpunpckhqdq    $Ij,$Ij,$T1
+        vpxor          $Zhi,$Xhi,$Xhi
+        vpclmulqdq     \$0x00,$HK,  $T2,$Xmi
+        vpxor          $Ij,$T1,$T1
+        vpxor          $Zmi,$Xmi,$Xmi
+       vxorps          $Tred,$Xi,$Xi
+
+         vmovdqu       0x10($inp),$Ii          # I[1]
+       vpalignr        \$8,$Xi,$Xi,$Tred       # 2nd phase
+        vpclmulqdq     \$0x00,$Hkey,$Ij,$Zlo
+         vpshufb       $bswap,$Ii,$Ii
+        vpxor          $Xlo,$Zlo,$Zlo
+        vpclmulqdq     \$0x11,$Hkey,$Ij,$Zhi
+         vmovdqu       0x90-0x40($Htbl),$Hkey  # $Hkey^7
+       vpclmulqdq      \$0x10,(%r10),$Xi,$Xi
+       vxorps          $Xo,$Tred,$Tred
+        vpunpckhqdq    $Ii,$Ii,$T2
+        vpxor          $Xhi,$Zhi,$Zhi
+        vpclmulqdq     \$0x10,$HK,  $T1,$Zmi
+         vmovdqu       0xb0-0x40($Htbl),$HK
+        vpxor          $Ii,$T2,$T2
+        vpxor          $Xmi,$Zmi,$Zmi
+
+         vmovdqu       ($inp),$Ij              # I[0]
+        vpclmulqdq     \$0x00,$Hkey,$Ii,$Xlo
+         vpshufb       $bswap,$Ij,$Ij
+        vpclmulqdq     \$0x11,$Hkey,$Ii,$Xhi
+         vmovdqu       0xa0-0x40($Htbl),$Hkey  # $Hkey^8
+       vpxor           $Tred,$Ij,$Ij
+        vpclmulqdq     \$0x10,$HK,  $T2,$Xmi
+       vpxor           $Xi,$Ij,$Ij             # accumulate $Xi
+
+       lea             0x80($inp),$inp
+       sub             \$0x80,$len
+       jnc             .Loop8x_avx
+
+       add             \$0x80,$len
+       jmp             .Ltail_no_xor_avx
+
+.align 32
+.Lshort_avx:
+       vmovdqu         -0x10($inp,$len),$Ii    # very last word
+       lea             ($inp,$len),$inp
+       vmovdqu         0x00-0x40($Htbl),$Hkey  # $Hkey^1
+       vmovdqu         0x20-0x40($Htbl),$HK
+       vpshufb         $bswap,$Ii,$Ij
+
+       vmovdqa         $Xlo,$Zlo               # subtle way to zero $Zlo,
+       vmovdqa         $Xhi,$Zhi               # $Zhi and
+       vmovdqa         $Xmi,$Zmi               # $Zmi
+       sub             \$0x10,$len
+       jz              .Ltail_avx
+
+       vpunpckhqdq     $Ij,$Ij,$T1
+       vpxor           $Xlo,$Zlo,$Zlo
+       vpclmulqdq      \$0x00,$Hkey,$Ij,$Xlo
+       vpxor           $Ij,$T1,$T1
+        vmovdqu        -0x20($inp),$Ii
+       vpxor           $Xhi,$Zhi,$Zhi
+       vpclmulqdq      \$0x11,$Hkey,$Ij,$Xhi
+       vmovdqu         0x10-0x40($Htbl),$Hkey  # $Hkey^2
+        vpshufb        $bswap,$Ii,$Ij
+       vpxor           $Xmi,$Zmi,$Zmi
+       vpclmulqdq      \$0x00,$HK,$T1,$Xmi
+       vpsrldq         \$8,$HK,$HK
+       sub             \$0x10,$len
+       jz              .Ltail_avx
+
+       vpunpckhqdq     $Ij,$Ij,$T1
+       vpxor           $Xlo,$Zlo,$Zlo
+       vpclmulqdq      \$0x00,$Hkey,$Ij,$Xlo
+       vpxor           $Ij,$T1,$T1
+        vmovdqu        -0x30($inp),$Ii
+       vpxor           $Xhi,$Zhi,$Zhi
+       vpclmulqdq      \$0x11,$Hkey,$Ij,$Xhi
+       vmovdqu         0x30-0x40($Htbl),$Hkey  # $Hkey^3
+        vpshufb        $bswap,$Ii,$Ij
+       vpxor           $Xmi,$Zmi,$Zmi
+       vpclmulqdq      \$0x00,$HK,$T1,$Xmi
+       vmovdqu         0x50-0x40($Htbl),$HK
+       sub             \$0x10,$len
+       jz              .Ltail_avx
+
+       vpunpckhqdq     $Ij,$Ij,$T1
+       vpxor           $Xlo,$Zlo,$Zlo
+       vpclmulqdq      \$0x00,$Hkey,$Ij,$Xlo
+       vpxor           $Ij,$T1,$T1
+        vmovdqu        -0x40($inp),$Ii
+       vpxor           $Xhi,$Zhi,$Zhi
+       vpclmulqdq      \$0x11,$Hkey,$Ij,$Xhi
+       vmovdqu         0x40-0x40($Htbl),$Hkey  # $Hkey^4
+        vpshufb        $bswap,$Ii,$Ij
+       vpxor           $Xmi,$Zmi,$Zmi
+       vpclmulqdq      \$0x00,$HK,$T1,$Xmi
+       vpsrldq         \$8,$HK,$HK
+       sub             \$0x10,$len
+       jz              .Ltail_avx
+
+       vpunpckhqdq     $Ij,$Ij,$T1
+       vpxor           $Xlo,$Zlo,$Zlo
+       vpclmulqdq      \$0x00,$Hkey,$Ij,$Xlo
+       vpxor           $Ij,$T1,$T1
+        vmovdqu        -0x50($inp),$Ii
+       vpxor           $Xhi,$Zhi,$Zhi
+       vpclmulqdq      \$0x11,$Hkey,$Ij,$Xhi
+       vmovdqu         0x60-0x40($Htbl),$Hkey  # $Hkey^5
+        vpshufb        $bswap,$Ii,$Ij
+       vpxor           $Xmi,$Zmi,$Zmi
+       vpclmulqdq      \$0x00,$HK,$T1,$Xmi
+       vmovdqu         0x80-0x40($Htbl),$HK
+       sub             \$0x10,$len
+       jz              .Ltail_avx
+
+       vpunpckhqdq     $Ij,$Ij,$T1
+       vpxor           $Xlo,$Zlo,$Zlo
+       vpclmulqdq      \$0x00,$Hkey,$Ij,$Xlo
+       vpxor           $Ij,$T1,$T1
+        vmovdqu        -0x60($inp),$Ii
+       vpxor           $Xhi,$Zhi,$Zhi
+       vpclmulqdq      \$0x11,$Hkey,$Ij,$Xhi
+       vmovdqu         0x70-0x40($Htbl),$Hkey  # $Hkey^6
+        vpshufb        $bswap,$Ii,$Ij
+       vpxor           $Xmi,$Zmi,$Zmi
+       vpclmulqdq      \$0x00,$HK,$T1,$Xmi
+       vpsrldq         \$8,$HK,$HK
+       sub             \$0x10,$len
+       jz              .Ltail_avx
+
+       vpunpckhqdq     $Ij,$Ij,$T1
+       vpxor           $Xlo,$Zlo,$Zlo
+       vpclmulqdq      \$0x00,$Hkey,$Ij,$Xlo
+       vpxor           $Ij,$T1,$T1
+        vmovdqu        -0x70($inp),$Ii
+       vpxor           $Xhi,$Zhi,$Zhi
+       vpclmulqdq      \$0x11,$Hkey,$Ij,$Xhi
+       vmovdqu         0x90-0x40($Htbl),$Hkey  # $Hkey^7
+        vpshufb        $bswap,$Ii,$Ij
+       vpxor           $Xmi,$Zmi,$Zmi
+       vpclmulqdq      \$0x00,$HK,$T1,$Xmi
+       vmovq           0xb8-0x40($Htbl),$HK
+       sub             \$0x10,$len
+       jmp             .Ltail_avx
+
+.align 32
+.Ltail_avx:
+       vpxor           $Xi,$Ij,$Ij             # accumulate $Xi
+.Ltail_no_xor_avx:
+       vpunpckhqdq     $Ij,$Ij,$T1
+       vpxor           $Xlo,$Zlo,$Zlo
+       vpclmulqdq      \$0x00,$Hkey,$Ij,$Xlo
+       vpxor           $Ij,$T1,$T1
+       vpxor           $Xhi,$Zhi,$Zhi
+       vpclmulqdq      \$0x11,$Hkey,$Ij,$Xhi
+       vpxor           $Xmi,$Zmi,$Zmi
+       vpclmulqdq      \$0x00,$HK,$T1,$Xmi
+
+       vmovdqu         (%r10),$Tred
+
+       vpxor           $Xlo,$Zlo,$Xi
+       vpxor           $Xhi,$Zhi,$Xo
+       vpxor           $Xmi,$Zmi,$Zmi
+
+       vpxor           $Xi, $Zmi,$Zmi          # aggregated Karatsuba post-processing
+       vpxor           $Xo, $Zmi,$Zmi
+       vpslldq         \$8, $Zmi,$T2
+       vpsrldq         \$8, $Zmi,$Zmi
+       vpxor           $T2, $Xi, $Xi
+       vpxor           $Zmi,$Xo, $Xo
+
+       vpclmulqdq      \$0x10,$Tred,$Xi,$T2    # 1st phase
+       vpalignr        \$8,$Xi,$Xi,$Xi
+       vpxor           $T2,$Xi,$Xi
+
+       vpclmulqdq      \$0x10,$Tred,$Xi,$T2    # 2nd phase
+       vpalignr        \$8,$Xi,$Xi,$Xi
+       vpxor           $Xo,$Xi,$Xi
+       vpxor           $T2,$Xi,$Xi
+
+       cmp             \$0,$len
+       jne             .Lshort_avx
+
+       vpshufb         $bswap,$Xi,$Xi
+       vmovdqu         $Xi,($Xip)
+       vzeroupper
+___
+$code.=<<___ if ($win64);
+       movaps  (%rsp),%xmm6
+       movaps  0x10(%rsp),%xmm7
+       movaps  0x20(%rsp),%xmm8
+       movaps  0x30(%rsp),%xmm9
+       movaps  0x40(%rsp),%xmm10
+       movaps  0x50(%rsp),%xmm11
+       movaps  0x60(%rsp),%xmm12
+       movaps  0x70(%rsp),%xmm13
+       movaps  0x80(%rsp),%xmm14
+       movaps  0x90(%rsp),%xmm15
+       lea     0xa8(%rsp),%rsp
+.LSEH_end_gcm_ghash_avx:
+___
+$code.=<<___;
+       ret
+.size  gcm_ghash_avx,.-gcm_ghash_avx
+___
+} else {
+$code.=<<___;
+       jmp     .L_ghash_clmul
+.size  gcm_ghash_avx,.-gcm_ghash_avx
+___
+}
+\f
+$code.=<<___;
 .align 64
 .Lbswap_mask:
        .byte   15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
 .L0x1c2_polynomial:
        .byte   1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
+.L7_mask:
+       .long   7,0,7,0
+.L7_mask_poly:
+       .long   7,0,`0xE1<<1`,0
 .align 64
 .type  .Lrem_4bit,\@object
 .Lrem_4bit:
@@ -774,10 +1698,24 @@ se_handler:
        .rva    .LSEH_end_gcm_ghash_4bit
        .rva    .LSEH_info_gcm_ghash_4bit
 
+       .rva    .LSEH_begin_gcm_init_clmul
+       .rva    .LSEH_end_gcm_init_clmul
+       .rva    .LSEH_info_gcm_init_clmul
+
        .rva    .LSEH_begin_gcm_ghash_clmul
        .rva    .LSEH_end_gcm_ghash_clmul
        .rva    .LSEH_info_gcm_ghash_clmul
+___
+$code.=<<___   if ($avx);
+       .rva    .LSEH_begin_gcm_init_avx
+       .rva    .LSEH_end_gcm_init_avx
+       .rva    .LSEH_info_gcm_init_clmul
 
+       .rva    .LSEH_begin_gcm_ghash_avx
+       .rva    .LSEH_end_gcm_ghash_avx
+       .rva    .LSEH_info_gcm_ghash_clmul
+___
+$code.=<<___;
 .section       .xdata
 .align 8
 .LSEH_info_gcm_gmult_4bit:
@@ -788,14 +1726,23 @@ se_handler:
        .byte   9,0,0,0
        .rva    se_handler
        .rva    .Lghash_prologue,.Lghash_epilogue       # HandlerData
+.LSEH_info_gcm_init_clmul:
+       .byte   0x01,0x08,0x03,0x00
+       .byte   0x08,0x68,0x00,0x00     #movaps 0x00(rsp),xmm6
+       .byte   0x04,0x22,0x00,0x00     #sub    rsp,0x18
 .LSEH_info_gcm_ghash_clmul:
-       .byte   0x01,0x1f,0x0b,0x00
-       .byte   0x1f,0xa8,0x04,0x00     #movaps 0x40(rsp),xmm10
-       .byte   0x19,0x98,0x03,0x00     #movaps 0x30(rsp),xmm9
-       .byte   0x13,0x88,0x02,0x00     #movaps 0x20(rsp),xmm8
-       .byte   0x0d,0x78,0x01,0x00     #movaps 0x10(rsp),xmm7
-       .byte   0x08,0x68,0x00,0x00     #movaps (rsp),xmm6
-       .byte   0x04,0xa2,0x00,0x00     #sub    rsp,0x58
+       .byte   0x01,0x33,0x16,0x00
+       .byte   0x33,0xf8,0x09,0x00     #movaps 0x90(rsp),xmm15
+       .byte   0x2e,0xe8,0x08,0x00     #movaps 0x80(rsp),xmm14
+       .byte   0x29,0xd8,0x07,0x00     #movaps 0x70(rsp),xmm13
+       .byte   0x24,0xc8,0x06,0x00     #movaps 0x60(rsp),xmm12
+       .byte   0x1f,0xb8,0x05,0x00     #movaps 0x50(rsp),xmm11
+       .byte   0x1a,0xa8,0x04,0x00     #movaps 0x40(rsp),xmm10
+       .byte   0x15,0x98,0x03,0x00     #movaps 0x30(rsp),xmm9
+       .byte   0x10,0x88,0x02,0x00     #movaps 0x20(rsp),xmm8
+       .byte   0x0c,0x78,0x01,0x00     #movaps 0x10(rsp),xmm7
+       .byte   0x08,0x68,0x00,0x00     #movaps 0x00(rsp),xmm6
+       .byte   0x04,0x01,0x15,0x00     #sub    rsp,0xa8
 ___
 }
 \f
diff --git a/crypto/modes/asm/ghashp8-ppc.pl b/crypto/modes/asm/ghashp8-ppc.pl
new file mode 100755 (executable)
index 0000000..e76a58c
--- /dev/null
@@ -0,0 +1,234 @@
+#!/usr/bin/env perl
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# GHASH for for PowerISA v2.07.
+#
+# July 2014
+#
+# Accurate performance measurements are problematic, because it's
+# always virtualized setup with possibly throttled processor.
+# Relative comparison is therefore more informative. This initial
+# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
+# faster than "4-bit" integer-only compiler-generated 64-bit code.
+# "Initial version" means that there is room for futher improvement.
+
+$flavour=shift;
+$output =shift;
+
+if ($flavour =~ /64/) {
+       $SIZE_T=8;
+       $LRSAVE=2*$SIZE_T;
+       $STU="stdu";
+       $POP="ld";
+       $PUSH="std";
+} elsif ($flavour =~ /32/) {
+       $SIZE_T=4;
+       $LRSAVE=$SIZE_T;
+       $STU="stwu";
+       $POP="lwz";
+       $PUSH="stw";
+} else { die "nonsense $flavour"; }
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
+die "can't locate ppc-xlate.pl";
+
+open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
+
+my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6));   # argument block
+
+my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
+my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
+my $vrsave="r12";
+
+$code=<<___;
+.machine       "any"
+
+.text
+
+.globl .gcm_init_p8
+.align 5
+.gcm_init_p8:
+       lis             r0,0xfff0
+       li              r8,0x10
+       mfspr           $vrsave,256
+       li              r9,0x20
+       mtspr           256,r0
+       li              r10,0x30
+       lvx_u           $H,0,r4                 # load H
+
+       vspltisb        $xC2,-16                # 0xf0
+       vspltisb        $t0,1                   # one
+       vaddubm         $xC2,$xC2,$xC2          # 0xe0
+       vxor            $zero,$zero,$zero
+       vor             $xC2,$xC2,$t0           # 0xe1
+       vsldoi          $xC2,$xC2,$zero,15      # 0xe1...
+       vsldoi          $t1,$zero,$t0,1         # ...1
+       vaddubm         $xC2,$xC2,$xC2          # 0xc2...
+       vspltisb        $t2,7
+       vor             $xC2,$xC2,$t1           # 0xc2....01
+       vspltb          $t1,$H,0                # most significant byte
+       vsl             $H,$H,$t0               # H<<=1
+       vsrab           $t1,$t1,$t2             # broadcast carry bit
+       vand            $t1,$t1,$xC2
+       vxor            $H,$H,$t1               # twisted H
+
+       vsldoi          $H,$H,$H,8              # twist even more ...
+       vsldoi          $xC2,$zero,$xC2,8       # 0xc2.0
+       vsldoi          $Hl,$zero,$H,8          # ... and split
+       vsldoi          $Hh,$H,$zero,8
+
+       stvx_u          $xC2,0,r3               # save pre-computed table
+       stvx_u          $Hl,r8,r3
+       stvx_u          $H, r9,r3
+       stvx_u          $Hh,r10,r3
+
+       mtspr           256,$vrsave
+       blr
+       .long           0
+       .byte           0,12,0x14,0,0,0,2,0
+       .long           0
+.size  .gcm_init_p8,.-.gcm_init_p8
+
+.globl .gcm_gmult_p8
+.align 5
+.gcm_gmult_p8:
+       lis             r0,0xfff8
+       li              r8,0x10
+       mfspr           $vrsave,256
+       li              r9,0x20
+       mtspr           256,r0
+       li              r10,0x30
+       lvx_u           $IN,0,$Xip              # load Xi
+
+       lvx_u           $Hl,r8,$Htbl            # load pre-computed table
+        le?lvsl        $lemask,r0,r0
+       lvx_u           $H, r9,$Htbl
+        le?vspltisb    $t0,0x07
+       lvx_u           $Hh,r10,$Htbl
+        le?vxor        $lemask,$lemask,$t0
+       lvx_u           $xC2,0,$Htbl
+        le?vperm       $IN,$IN,$IN,$lemask
+       vxor            $zero,$zero,$zero
+
+       vpmsumd         $Xl,$IN,$Hl             # H.lo·Xi.lo
+       vpmsumd         $Xm,$IN,$H              # H.hi·Xi.lo+H.lo·Xi.hi
+       vpmsumd         $Xh,$IN,$Hh             # H.hi·Xi.hi
+
+       vpmsumd         $t2,$Xl,$xC2            # 1st phase
+
+       vsldoi          $t0,$Xm,$zero,8
+       vsldoi          $t1,$zero,$Xm,8
+       vxor            $Xl,$Xl,$t0
+       vxor            $Xh,$Xh,$t1
+
+       vsldoi          $Xl,$Xl,$Xl,8
+       vxor            $Xl,$Xl,$t2
+
+       vsldoi          $t1,$Xl,$Xl,8           # 2nd phase
+       vpmsumd         $Xl,$Xl,$xC2
+       vxor            $t1,$t1,$Xh
+       vxor            $Xl,$Xl,$t1
+
+       le?vperm        $Xl,$Xl,$Xl,$lemask
+       stvx_u          $Xl,0,$Xip              # write out Xi
+
+       mtspr           256,$vrsave
+       blr
+       .long           0
+       .byte           0,12,0x14,0,0,0,2,0
+       .long           0
+.size  .gcm_gmult_p8,.-.gcm_gmult_p8
+
+.globl .gcm_ghash_p8
+.align 5
+.gcm_ghash_p8:
+       lis             r0,0xfff8
+       li              r8,0x10
+       mfspr           $vrsave,256
+       li              r9,0x20
+       mtspr           256,r0
+       li              r10,0x30
+       lvx_u           $Xl,0,$Xip              # load Xi
+
+       lvx_u           $Hl,r8,$Htbl            # load pre-computed table
+        le?lvsl        $lemask,r0,r0
+       lvx_u           $H, r9,$Htbl
+        le?vspltisb    $t0,0x07
+       lvx_u           $Hh,r10,$Htbl
+        le?vxor        $lemask,$lemask,$t0
+       lvx_u           $xC2,0,$Htbl
+        le?vperm       $Xl,$Xl,$Xl,$lemask
+       vxor            $zero,$zero,$zero
+
+       lvx_u           $IN,0,$inp
+       addi            $inp,$inp,16
+       subi            $len,$len,16
+        le?vperm       $IN,$IN,$IN,$lemask
+       vxor            $IN,$IN,$Xl
+       b               Loop
+
+.align 5
+Loop:
+        subic          $len,$len,16
+       vpmsumd         $Xl,$IN,$Hl             # H.lo·Xi.lo
+        subfe.         r0,r0,r0                # borrow?-1:0
+       vpmsumd         $Xm,$IN,$H              # H.hi·Xi.lo+H.lo·Xi.hi
+        and            r0,r0,$len
+       vpmsumd         $Xh,$IN,$Hh             # H.hi·Xi.hi
+        add            $inp,$inp,r0
+
+       vpmsumd         $t2,$Xl,$xC2            # 1st phase
+
+       vsldoi          $t0,$Xm,$zero,8
+       vsldoi          $t1,$zero,$Xm,8
+       vxor            $Xl,$Xl,$t0
+       vxor            $Xh,$Xh,$t1
+
+       vsldoi          $Xl,$Xl,$Xl,8
+       vxor            $Xl,$Xl,$t2
+        lvx_u          $IN,0,$inp
+        addi           $inp,$inp,16
+
+       vsldoi          $t1,$Xl,$Xl,8           # 2nd phase
+       vpmsumd         $Xl,$Xl,$xC2
+        le?vperm       $IN,$IN,$IN,$lemask
+       vxor            $t1,$t1,$Xh
+       vxor            $IN,$IN,$t1
+       vxor            $IN,$IN,$Xl
+       beq             Loop                    # did $len-=16 borrow?
+
+       vxor            $Xl,$Xl,$t1
+       le?vperm        $Xl,$Xl,$Xl,$lemask
+       stvx_u          $Xl,0,$Xip              # write out Xi
+
+       mtspr           256,$vrsave
+       blr
+       .long           0
+       .byte           0,12,0x14,0,0,0,4,0
+       .long           0
+.size  .gcm_ghash_p8,.-.gcm_ghash_p8
+
+.asciz  "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
+.align  2
+___
+
+foreach (split("\n",$code)) {
+       if ($flavour =~ /le$/o) {       # little-endian
+           s/le\?//o           or
+           s/be\?/#be#/o;
+       } else {
+           s/le\?/#le#/o       or
+           s/be\?//o;
+       }
+       print $_,"\n";
+}
+
+close STDOUT; # enforce flush
diff --git a/crypto/modes/asm/ghashv8-armx.pl b/crypto/modes/asm/ghashv8-armx.pl
new file mode 100644 (file)
index 0000000..0b9cd73
--- /dev/null
@@ -0,0 +1,409 @@
+#!/usr/bin/env perl
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# GHASH for ARMv8 Crypto Extension, 64-bit polynomial multiplication.
+#
+# June 2014
+#
+# Initial version was developed in tight cooperation with Ard
+# Biesheuvel <ard.biesheuvel@linaro.org> from bits-n-pieces from
+# other assembly modules. Just like aesv8-armx.pl this module
+# supports both AArch32 and AArch64 execution modes.
+#
+# July 2014
+#
+# Implement 2x aggregated reduction [see ghash-x86.pl for background
+# information].
+#
+# Current performance in cycles per processed byte:
+#
+#              PMULL[2]        32-bit NEON(*)
+# Apple A7     0.92            5.62
+# Cortex-A53   1.01            8.39
+# Cortex-A57   1.17            7.61
+#
+# (*)  presented for reference/comparison purposes;
+
+$flavour = shift;
+open STDOUT,">".shift;
+
+$Xi="x0";      # argument block
+$Htbl="x1";
+$inp="x2";
+$len="x3";
+
+$inc="x12";
+
+{
+my ($Xl,$Xm,$Xh,$IN)=map("q$_",(0..3));
+my ($t0,$t1,$t2,$xC2,$H,$Hhl,$H2)=map("q$_",(8..14));
+
+$code=<<___;
+#include "arm_arch.h"
+
+.text
+___
+$code.=".arch  armv8-a+crypto\n"       if ($flavour =~ /64/);
+$code.=".fpu   neon\n.code     32\n"   if ($flavour !~ /64/);
+
+################################################################################
+# void gcm_init_v8(u128 Htable[16],const u64 H[2]);
+#
+# input:       128-bit H - secret parameter E(K,0^128)
+# output:      precomputed table filled with degrees of twisted H;
+#              H is twisted to handle reverse bitness of GHASH;
+#              only few of 16 slots of Htable[16] are used;
+#              data is opaque to outside world (which allows to
+#              optimize the code independently);
+#
+$code.=<<___;
+.global        gcm_init_v8
+.type  gcm_init_v8,%function
+.align 4
+gcm_init_v8:
+       vld1.64         {$t1},[x1]              @ load input H
+       vmov.i8         $xC2,#0xe1
+       vshl.i64        $xC2,$xC2,#57           @ 0xc2.0
+       vext.8          $IN,$t1,$t1,#8
+       vshr.u64        $t2,$xC2,#63
+       vdup.32         $t1,${t1}[1]
+       vext.8          $t0,$t2,$xC2,#8         @ t0=0xc2....01
+       vshr.u64        $t2,$IN,#63
+       vshr.s32        $t1,$t1,#31             @ broadcast carry bit
+       vand            $t2,$t2,$t0
+       vshl.i64        $IN,$IN,#1
+       vext.8          $t2,$t2,$t2,#8
+       vand            $t0,$t0,$t1
+       vorr            $IN,$IN,$t2             @ H<<<=1
+       veor            $H,$IN,$t0              @ twisted H
+       vst1.64         {$H},[x0],#16           @ store Htable[0]
+
+       @ calculate H^2
+       vext.8          $t0,$H,$H,#8            @ Karatsuba pre-processing
+       vpmull.p64      $Xl,$H,$H
+       veor            $t0,$t0,$H
+       vpmull2.p64     $Xh,$H,$H
+       vpmull.p64      $Xm,$t0,$t0
+
+       vext.8          $t1,$Xl,$Xh,#8          @ Karatsuba post-processing
+       veor            $t2,$Xl,$Xh
+       veor            $Xm,$Xm,$t1
+       veor            $Xm,$Xm,$t2
+       vpmull.p64      $t2,$Xl,$xC2            @ 1st phase
+
+       vmov            $Xh#lo,$Xm#hi           @ Xh|Xm - 256-bit result
+       vmov            $Xm#hi,$Xl#lo           @ Xm is rotated Xl
+       veor            $Xl,$Xm,$t2
+
+       vext.8          $t2,$Xl,$Xl,#8          @ 2nd phase
+       vpmull.p64      $Xl,$Xl,$xC2
+       veor            $t2,$t2,$Xh
+       veor            $H2,$Xl,$t2
+
+       vext.8          $t1,$H2,$H2,#8          @ Karatsuba pre-processing
+       veor            $t1,$t1,$H2
+       vext.8          $Hhl,$t0,$t1,#8         @ pack Karatsuba pre-processed
+       vst1.64         {$Hhl-$H2},[x0]         @ store Htable[1..2]
+
+       ret
+.size  gcm_init_v8,.-gcm_init_v8
+___
+################################################################################
+# void gcm_gmult_v8(u64 Xi[2],const u128 Htable[16]);
+#
+# input:       Xi - current hash value;
+#              Htable - table precomputed in gcm_init_v8;
+# output:      Xi - next hash value Xi;
+#
+$code.=<<___;
+.global        gcm_gmult_v8
+.type  gcm_gmult_v8,%function
+.align 4
+gcm_gmult_v8:
+       vld1.64         {$t1},[$Xi]             @ load Xi
+       vmov.i8         $xC2,#0xe1
+       vld1.64         {$H-$Hhl},[$Htbl]       @ load twisted H, ...
+       vshl.u64        $xC2,$xC2,#57
+#ifndef __ARMEB__
+       vrev64.8        $t1,$t1
+#endif
+       vext.8          $IN,$t1,$t1,#8
+
+       vpmull.p64      $Xl,$H,$IN              @ H.lo·Xi.lo
+       veor            $t1,$t1,$IN             @ Karatsuba pre-processing
+       vpmull2.p64     $Xh,$H,$IN              @ H.hi·Xi.hi
+       vpmull.p64      $Xm,$Hhl,$t1            @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
+
+       vext.8          $t1,$Xl,$Xh,#8          @ Karatsuba post-processing
+       veor            $t2,$Xl,$Xh
+       veor            $Xm,$Xm,$t1
+       veor            $Xm,$Xm,$t2
+       vpmull.p64      $t2,$Xl,$xC2            @ 1st phase of reduction
+
+       vmov            $Xh#lo,$Xm#hi           @ Xh|Xm - 256-bit result
+       vmov            $Xm#hi,$Xl#lo           @ Xm is rotated Xl
+       veor            $Xl,$Xm,$t2
+
+       vext.8          $t2,$Xl,$Xl,#8          @ 2nd phase of reduction
+       vpmull.p64      $Xl,$Xl,$xC2
+       veor            $t2,$t2,$Xh
+       veor            $Xl,$Xl,$t2
+
+#ifndef __ARMEB__
+       vrev64.8        $Xl,$Xl
+#endif
+       vext.8          $Xl,$Xl,$Xl,#8
+       vst1.64         {$Xl},[$Xi]             @ write out Xi
+
+       ret
+.size  gcm_gmult_v8,.-gcm_gmult_v8
+___
+################################################################################
+# void gcm_ghash_v8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
+#
+# input:       table precomputed in gcm_init_v8;
+#              current hash value Xi;
+#              pointer to input data;
+#              length of input data in bytes, but divisible by block size;
+# output:      next hash value Xi;
+#
+$code.=<<___;
+.global        gcm_ghash_v8
+.type  gcm_ghash_v8,%function
+.align 4
+gcm_ghash_v8:
+___
+$code.=<<___           if ($flavour !~ /64/);
+       vstmdb          sp!,{d8-d15}            @ 32-bit ABI says so
+___
+$code.=<<___;
+       vld1.64         {$Xl},[$Xi]             @ load [rotated] Xi
+                                               @ "[rotated]" means that
+                                               @ loaded value would have
+                                               @ to be rotated in order to
+                                               @ make it appear as in
+                                               @ alorithm specification
+       subs            $len,$len,#32           @ see if $len is 32 or larger
+       mov             $inc,#16                @ $inc is used as post-
+                                               @ increment for input pointer;
+                                               @ as loop is modulo-scheduled
+                                               @ $inc is zeroed just in time
+                                               @ to preclude oversteping
+                                               @ inp[len], which means that
+                                               @ last block[s] are actually
+                                               @ loaded twice, but last
+                                               @ copy is not processed
+       vld1.64         {$H-$Hhl},[$Htbl],#32   @ load twisted H, ..., H^2
+       vmov.i8         $xC2,#0xe1
+       vld1.64         {$H2},[$Htbl]
+       cclr            $inc,eq                 @ is it time to zero $inc?
+       vext.8          $Xl,$Xl,$Xl,#8          @ rotate Xi
+       vld1.64         {$t0},[$inp],#16        @ load [rotated] I[0]
+       vshl.u64        $xC2,$xC2,#57           @ compose 0xc2.0 constant
+#ifndef __ARMEB__
+       vrev64.8        $t0,$t0
+       vrev64.8        $Xl,$Xl
+#endif
+       vext.8          $IN,$t0,$t0,#8          @ rotate I[0]
+       b.lo            .Lodd_tail_v8           @ $len was less than 32
+___
+{ my ($Xln,$Xmn,$Xhn,$In) = map("q$_",(4..7));
+       #######
+       # Xi+2 =[H*(Ii+1 + Xi+1)] mod P =
+       #       [(H*Ii+1) + (H*Xi+1)] mod P =
+       #       [(H*Ii+1) + H^2*(Ii+Xi)] mod P
+       #
+$code.=<<___;
+       vld1.64         {$t1},[$inp],$inc       @ load [rotated] I[1]
+#ifndef __ARMEB__
+       vrev64.8        $t1,$t1
+#endif
+       vext.8          $In,$t1,$t1,#8
+       veor            $IN,$IN,$Xl             @ I[i]^=Xi
+       vpmull.p64      $Xln,$H,$In             @ H·Ii+1
+       veor            $t1,$t1,$In             @ Karatsuba pre-processing
+       vpmull2.p64     $Xhn,$H,$In
+       b               .Loop_mod2x_v8
+
+.align 4
+.Loop_mod2x_v8:
+       vext.8          $t2,$IN,$IN,#8
+       subs            $len,$len,#32           @ is there more data?
+       vpmull.p64      $Xl,$H2,$IN             @ H^2.lo·Xi.lo
+       cclr            $inc,lo                 @ is it time to zero $inc?
+
+        vpmull.p64     $Xmn,$Hhl,$t1
+       veor            $t2,$t2,$IN             @ Karatsuba pre-processing
+       vpmull2.p64     $Xh,$H2,$IN             @ H^2.hi·Xi.hi
+       veor            $Xl,$Xl,$Xln            @ accumulate
+       vpmull2.p64     $Xm,$Hhl,$t2            @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
+        vld1.64        {$t0},[$inp],$inc       @ load [rotated] I[i+2]
+
+       veor            $Xh,$Xh,$Xhn
+        cclr           $inc,eq                 @ is it time to zero $inc?
+       veor            $Xm,$Xm,$Xmn
+
+       vext.8          $t1,$Xl,$Xh,#8          @ Karatsuba post-processing
+       veor            $t2,$Xl,$Xh
+       veor            $Xm,$Xm,$t1
+        vld1.64        {$t1},[$inp],$inc       @ load [rotated] I[i+3]
+#ifndef __ARMEB__
+        vrev64.8       $t0,$t0
+#endif
+       veor            $Xm,$Xm,$t2
+       vpmull.p64      $t2,$Xl,$xC2            @ 1st phase of reduction
+
+#ifndef __ARMEB__
+        vrev64.8       $t1,$t1
+#endif
+       vmov            $Xh#lo,$Xm#hi           @ Xh|Xm - 256-bit result
+       vmov            $Xm#hi,$Xl#lo           @ Xm is rotated Xl
+        vext.8         $In,$t1,$t1,#8
+        vext.8         $IN,$t0,$t0,#8
+       veor            $Xl,$Xm,$t2
+        vpmull.p64     $Xln,$H,$In             @ H·Ii+1
+       veor            $IN,$IN,$Xh             @ accumulate $IN early
+
+       vext.8          $t2,$Xl,$Xl,#8          @ 2nd phase of reduction
+       vpmull.p64      $Xl,$Xl,$xC2
+       veor            $IN,$IN,$t2
+        veor           $t1,$t1,$In             @ Karatsuba pre-processing
+       veor            $IN,$IN,$Xl
+        vpmull2.p64    $Xhn,$H,$In
+       b.hs            .Loop_mod2x_v8          @ there was at least 32 more bytes
+
+       veor            $Xh,$Xh,$t2
+       vext.8          $IN,$t0,$t0,#8          @ re-construct $IN
+       adds            $len,$len,#32           @ re-construct $len
+       veor            $Xl,$Xl,$Xh             @ re-construct $Xl
+       b.eq            .Ldone_v8               @ is $len zero?
+___
+}
+$code.=<<___;
+.Lodd_tail_v8:
+       vext.8          $t2,$Xl,$Xl,#8
+       veor            $IN,$IN,$Xl             @ inp^=Xi
+       veor            $t1,$t0,$t2             @ $t1 is rotated inp^Xi
+
+       vpmull.p64      $Xl,$H,$IN              @ H.lo·Xi.lo
+       veor            $t1,$t1,$IN             @ Karatsuba pre-processing
+       vpmull2.p64     $Xh,$H,$IN              @ H.hi·Xi.hi
+       vpmull.p64      $Xm,$Hhl,$t1            @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
+
+       vext.8          $t1,$Xl,$Xh,#8          @ Karatsuba post-processing
+       veor            $t2,$Xl,$Xh
+       veor            $Xm,$Xm,$t1
+       veor            $Xm,$Xm,$t2
+       vpmull.p64      $t2,$Xl,$xC2            @ 1st phase of reduction
+
+       vmov            $Xh#lo,$Xm#hi           @ Xh|Xm - 256-bit result
+       vmov            $Xm#hi,$Xl#lo           @ Xm is rotated Xl
+       veor            $Xl,$Xm,$t2
+
+       vext.8          $t2,$Xl,$Xl,#8          @ 2nd phase of reduction
+       vpmull.p64      $Xl,$Xl,$xC2
+       veor            $t2,$t2,$Xh
+       veor            $Xl,$Xl,$t2
+
+.Ldone_v8:
+#ifndef __ARMEB__
+       vrev64.8        $Xl,$Xl
+#endif
+       vext.8          $Xl,$Xl,$Xl,#8
+       vst1.64         {$Xl},[$Xi]             @ write out Xi
+
+___
+$code.=<<___           if ($flavour !~ /64/);
+       vldmia          sp!,{d8-d15}            @ 32-bit ABI says so
+___
+$code.=<<___;
+       ret
+.size  gcm_ghash_v8,.-gcm_ghash_v8
+___
+}
+$code.=<<___;
+.asciz  "GHASH for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
+.align  2
+___
+
+if ($flavour =~ /64/) {                        ######## 64-bit code
+    sub unvmov {
+       my $arg=shift;
+
+       $arg =~ m/q([0-9]+)#(lo|hi),\s*q([0-9]+)#(lo|hi)/o &&
+       sprintf "ins    v%d.d[%d],v%d.d[%d]",$1,($2 eq "lo")?0:1,$3,($4 eq "lo")?0:1;
+    }
+    foreach(split("\n",$code)) {
+       s/cclr\s+([wx])([^,]+),\s*([a-z]+)/csel $1$2,$1zr,$1$2,$3/o     or
+       s/vmov\.i8/movi/o               or      # fix up legacy mnemonics
+       s/vmov\s+(.*)/unvmov($1)/geo    or
+       s/vext\.8/ext/o                 or
+       s/vshr\.s/sshr\.s/o             or
+       s/vshr/ushr/o                   or
+       s/^(\s+)v/$1/o                  or      # strip off v prefix
+       s/\bbx\s+lr\b/ret/o;
+
+       s/\bq([0-9]+)\b/"v".($1<8?$1:$1+8).".16b"/geo;  # old->new registers
+       s/@\s/\/\//o;                           # old->new style commentary
+
+       # fix up remainig legacy suffixes
+       s/\.[ui]?8(\s)/$1/o;
+       s/\.[uis]?32//o and s/\.16b/\.4s/go;
+       m/\.p64/o and s/\.16b/\.1q/o;           # 1st pmull argument
+       m/l\.p64/o and s/\.16b/\.1d/go;         # 2nd and 3rd pmull arguments
+       s/\.[uisp]?64//o and s/\.16b/\.2d/go;
+       s/\.[42]([sd])\[([0-3])\]/\.$1\[$2\]/o;
+
+       print $_,"\n";
+    }
+} else {                               ######## 32-bit code
+    sub unvdup32 {
+       my $arg=shift;
+
+       $arg =~ m/q([0-9]+),\s*q([0-9]+)\[([0-3])\]/o &&
+       sprintf "vdup.32        q%d,d%d[%d]",$1,2*$2+($3>>1),$3&1;
+    }
+    sub unvpmullp64 {
+       my ($mnemonic,$arg)=@_;
+
+       if ($arg =~ m/q([0-9]+),\s*q([0-9]+),\s*q([0-9]+)/o) {
+           my $word = 0xf2a00e00|(($1&7)<<13)|(($1&8)<<19)
+                                |(($2&7)<<17)|(($2&8)<<4)
+                                |(($3&7)<<1) |(($3&8)<<2);
+           $word |= 0x00010001  if ($mnemonic =~ "2");
+           # since ARMv7 instructions are always encoded little-endian.
+           # correct solution is to use .inst directive, but older
+           # assemblers don't implement it:-(
+           sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s",
+                       $word&0xff,($word>>8)&0xff,
+                       ($word>>16)&0xff,($word>>24)&0xff,
+                       $mnemonic,$arg;
+       }
+    }
+
+    foreach(split("\n",$code)) {
+       s/\b[wx]([0-9]+)\b/r$1/go;              # new->old registers
+       s/\bv([0-9])\.[12468]+[bsd]\b/q$1/go;   # new->old registers
+       s/\/\/\s?/@ /o;                         # new->old style commentary
+
+       # fix up remainig new-style suffixes
+       s/\],#[0-9]+/]!/o;
+
+       s/cclr\s+([^,]+),\s*([a-z]+)/mov$2      $1,#0/o                 or
+       s/vdup\.32\s+(.*)/unvdup32($1)/geo                              or
+       s/v?(pmull2?)\.p64\s+(.*)/unvpmullp64($1,$2)/geo                or
+       s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo       or
+       s/^(\s+)b\./$1b/o                                               or
+       s/^(\s+)ret/$1bx\tlr/o;
+
+       print $_,"\n";
+    }
+}
+
+close STDOUT; # enforce flush
index 1ed7967..c13caea 100644 (file)
@@ -59,7 +59,7 @@
 #endif
 #include <assert.h>
 
-#ifndef STRICT_ALIGNMENT
+#if !defined(STRICT_ALIGNMENT) && !defined(PEDANTIC)
 # define STRICT_ALIGNMENT 0
 #endif
 
index 0ee569f..e299131 100644 (file)
@@ -687,20 +687,31 @@ static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2])
 
 #endif
 
-#if     TABLE_BITS==4 && defined(GHASH_ASM)
+#if     TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
 # if    !defined(I386_ONLY) && \
         (defined(__i386)        || defined(__i386__)    || \
          defined(__x86_64)      || defined(__x86_64__)  || \
          defined(_M_IX86)       || defined(_M_AMD64)    || defined(_M_X64))
 #  define GHASH_ASM_X86_OR_64
 #  define GCM_FUNCREF_4BIT
-extern unsigned int OPENSSL_ia32cap_P[2];
+extern unsigned int OPENSSL_ia32cap_P[];
 
 void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
 void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
 void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
                      size_t len);
 
+#  if defined(__i386) || defined(__i386__) || defined(_M_IX86)
+#   define gcm_init_avx   gcm_init_clmul
+#   define gcm_gmult_avx  gcm_gmult_clmul
+#   define gcm_ghash_avx  gcm_ghash_clmul
+#  else
+void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
+void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
+void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
+                   size_t len);
+#  endif
+
 #  if   defined(__i386) || defined(__i386__) || defined(_M_IX86)
 #   define GHASH_ASM_X86
 void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
@@ -711,15 +722,41 @@ void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
 void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
                         size_t len);
 #  endif
-# elif defined(__arm__) || defined(__arm)
+# elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
 #  include "arm_arch.h"
-#  if __ARM_ARCH__>=7
+#  if __ARM_MAX_ARCH__>=7
 #   define GHASH_ASM_ARM
 #   define GCM_FUNCREF_4BIT
+#   define PMULL_CAPABLE        (OPENSSL_armcap_P & ARMV8_PMULL)
+#   if defined(__arm__) || defined(__arm)
+#    define NEON_CAPABLE        (OPENSSL_armcap_P & ARMV7_NEON)
+#   endif
+void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
 void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
 void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
                     size_t len);
+void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
+void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
+void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
+                  size_t len);
 #  endif
+# elif defined(__sparc__) || defined(__sparc)
+#  include "sparc_arch.h"
+#  define GHASH_ASM_SPARC
+#  define GCM_FUNCREF_4BIT
+extern unsigned int OPENSSL_sparcv9cap_P[];
+void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
+void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
+void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
+                    size_t len);
+# elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
+#  include "ppc_arch.h"
+#  define GHASH_ASM_PPC
+#  define GCM_FUNCREF_4BIT
+void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
+void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
+void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
+                  size_t len);
 # endif
 #endif
 
@@ -768,9 +805,15 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
 #  if   !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
     if (OPENSSL_ia32cap_P[0] & (1 << 24) && /* check FXSR bit */
         OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
-        gcm_init_clmul(ctx->Htable, ctx->H.u);
-        ctx->gmult = gcm_gmult_clmul;
-        ctx->ghash = gcm_ghash_clmul;
+        if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
+            gcm_init_avx(ctx->Htable, ctx->H.u);
+            ctx->gmult = gcm_gmult_avx;
+            ctx->ghash = gcm_ghash_avx;
+        } else {
+            gcm_init_clmul(ctx->Htable, ctx->H.u);
+            ctx->gmult = gcm_gmult_clmul;
+            ctx->ghash = gcm_ghash_clmul;
+        }
         return;
     }
 #  endif
@@ -792,13 +835,52 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
     ctx->ghash = gcm_ghash_4bit;
 #  endif
 # elif  defined(GHASH_ASM_ARM)
-    if (OPENSSL_armcap_P & ARMV7_NEON) {
+#  ifdef PMULL_CAPABLE
+    if (PMULL_CAPABLE) {
+        gcm_init_v8(ctx->Htable, ctx->H.u);
+        ctx->gmult = gcm_gmult_v8;
+        ctx->ghash = gcm_ghash_v8;
+    } else
+#  endif
+#  ifdef NEON_CAPABLE
+    if (NEON_CAPABLE) {
+        gcm_init_neon(ctx->Htable, ctx->H.u);
         ctx->gmult = gcm_gmult_neon;
         ctx->ghash = gcm_ghash_neon;
+    } else
+#  endif
+    {
+        gcm_init_4bit(ctx->Htable, ctx->H.u);
+        ctx->gmult = gcm_gmult_4bit;
+#  if defined(GHASH)
+        ctx->ghash = gcm_ghash_4bit;
+#  else
+        ctx->ghash = NULL;
+#  endif
+    }
+# elif  defined(GHASH_ASM_SPARC)
+    if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
+        gcm_init_vis3(ctx->Htable, ctx->H.u);
+        ctx->gmult = gcm_gmult_vis3;
+        ctx->ghash = gcm_ghash_vis3;
+    } else {
+        gcm_init_4bit(ctx->Htable, ctx->H.u);
+        ctx->gmult = gcm_gmult_4bit;
+        ctx->ghash = gcm_ghash_4bit;
+    }
+# elif  defined(GHASH_ASM_PPC)
+    if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
+        gcm_init_p8(ctx->Htable, ctx->H.u);
+        ctx->gmult = gcm_gmult_p8;
+        ctx->ghash = gcm_ghash_p8;
     } else {
         gcm_init_4bit(ctx->Htable, ctx->H.u);
         ctx->gmult = gcm_gmult_4bit;
+#  if defined(GHASH)
         ctx->ghash = gcm_ghash_4bit;
+#  else
+        ctx->ghash = NULL;
+#  endif
     }
 # else
     gcm_init_4bit(ctx->Htable, ctx->H.u);
index 880f020..fd48849 100644 (file)
@@ -148,6 +148,16 @@ int CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx,
                           const unsigned char *inp, unsigned char *out,
                           size_t len, int enc);
 
+size_t CRYPTO_128_wrap(void *key, const unsigned char *iv,
+                       unsigned char *out,
+                       const unsigned char *in, size_t inlen,
+                       block128_f block);
+
+size_t CRYPTO_128_unwrap(void *key, const unsigned char *iv,
+                         unsigned char *out,
+                         const unsigned char *in, size_t inlen,
+                         block128_f block);
+
 #ifdef  __cplusplus
 }
 #endif
index 296849b..fe14ec7 100644 (file)
@@ -25,39 +25,49 @@ typedef unsigned int u32;
 typedef unsigned char u8;
 
 #define STRICT_ALIGNMENT 1
-#if defined(__i386)     || defined(__i386__)    || \
-    defined(__x86_64)   || defined(__x86_64__)  || \
-    defined(_M_IX86)    || defined(_M_AMD64)    || defined(_M_X64) || \
-    defined(__s390__)   || defined(__s390x__)
-# undef STRICT_ALIGNMENT
+#ifndef PEDANTIC
+# if defined(__i386)    || defined(__i386__)    || \
+     defined(__x86_64)  || defined(__x86_64__)  || \
+     defined(_M_IX86)   || defined(_M_AMD64)    || defined(_M_X64) || \
+     defined(__aarch64__)                       || \
+     defined(__s390__)  || defined(__s390x__)
+#  undef STRICT_ALIGNMENT
+# endif
 #endif
 
 #if !defined(PEDANTIC) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
 # if defined(__GNUC__) && __GNUC__>=2
 #  if defined(__x86_64) || defined(__x86_64__)
-#   define BSWAP8(x) ({ u64 ret=(x);                    \
+#   define BSWAP8(x) ({ u64 ret_=(x);                   \
                         asm ("bswapq %0"                \
-                        : "+r"(ret));   ret;            })
-#   define BSWAP4(x) ({ u32 ret=(x);                    \
+                        : "+r"(ret_));   ret_;          })
+#   define BSWAP4(x) ({ u32 ret_=(x);                   \
                         asm ("bswapl %0"                \
-                        : "+r"(ret));   ret;            })
+                        : "+r"(ret_));   ret_;          })
 #  elif (defined(__i386) || defined(__i386__)) && !defined(I386_ONLY)
-#   define BSWAP8(x) ({ u32 lo=(u64)(x)>>32,hi=(x);     \
+#   define BSWAP8(x) ({ u32 lo_=(u64)(x)>>32,hi_=(x);   \
                         asm ("bswapl %0; bswapl %1"     \
-                        : "+r"(hi),"+r"(lo));           \
-                        (u64)hi<<32|lo;                 })
-#   define BSWAP4(x) ({ u32 ret=(x);                    \
+                        : "+r"(hi_),"+r"(lo_));         \
+                        (u64)hi_<<32|lo_;               })
+#   define BSWAP4(x) ({ u32 ret_=(x);                   \
                         asm ("bswapl %0"                \
-                        : "+r"(ret));   ret;            })
+                        : "+r"(ret_));   ret_;          })
+#  elif defined(__aarch64__)
+#   define BSWAP8(x) ({ u64 ret_;                       \
+                        asm ("rev %0,%1"                \
+                        : "=r"(ret_) : "r"(x)); ret_;   })
+#   define BSWAP4(x) ({ u32 ret_;                       \
+                        asm ("rev %w0,%w1"              \
+                        : "=r"(ret_) : "r"(x)); ret_;   })
 #  elif (defined(__arm__) || defined(__arm)) && !defined(STRICT_ALIGNMENT)
-#   define BSWAP8(x) ({  u32 lo=(u64)(x)>>32,hi=(x);     \
+#   define BSWAP8(x) ({ u32 lo_=(u64)(x)>>32,hi_=(x);   \
                         asm ("rev %0,%0; rev %1,%1"     \
-                        : "+r"(hi),"+r"(lo));           \
-                        (u64)hi<<32|lo;                 })
-#   define BSWAP4(x) ({ u32 ret                       \
+                        : "+r"(hi_),"+r"(lo_));         \
+                        (u64)hi_<<32|lo_;               })
+#   define BSWAP4(x) ({ u32 ret_;                       \
                         asm ("rev %0,%1"                \
-                        : "=r"(ret) : "r"((u32)(x)));   \
-                        ret                           })
+                        : "=r"(ret_) : "r"((u32)(x)));  \
+                        ret_;                           })
 #  endif
 # elif defined(_MSC_VER)
 #  if _MSC_VER>=1300
diff --git a/crypto/modes/wrap128.c b/crypto/modes/wrap128.c
new file mode 100644 (file)
index 0000000..4dcaf03
--- /dev/null
@@ -0,0 +1,138 @@
+/* crypto/modes/wrap128.c */
+/*
+ * Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
+ * project.
+ */
+/* ====================================================================
+ * Copyright (c) 2013 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    licensing@OpenSSL.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ */
+
+#include "cryptlib.h"
+#include <openssl/modes.h>
+
+static const unsigned char default_iv[] = {
+    0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6,
+};
+
+/*
+ * Input size limit: lower than maximum of standards but far larger than
+ * anything that will be used in practice.
+ */
+#define CRYPTO128_WRAP_MAX (1UL << 31)
+
+size_t CRYPTO_128_wrap(void *key, const unsigned char *iv,
+                       unsigned char *out,
+                       const unsigned char *in, size_t inlen,
+                       block128_f block)
+{
+    unsigned char *A, B[16], *R;
+    size_t i, j, t;
+    if ((inlen & 0x7) || (inlen < 8) || (inlen > CRYPTO128_WRAP_MAX))
+        return 0;
+    A = B;
+    t = 1;
+    memcpy(out + 8, in, inlen);
+    if (!iv)
+        iv = default_iv;
+
+    memcpy(A, iv, 8);
+
+    for (j = 0; j < 6; j++) {
+        R = out + 8;
+        for (i = 0; i < inlen; i += 8, t++, R += 8) {
+            memcpy(B + 8, R, 8);
+            block(B, B, key);
+            A[7] ^= (unsigned char)(t & 0xff);
+            if (t > 0xff) {
+                A[6] ^= (unsigned char)((t >> 8) & 0xff);
+                A[5] ^= (unsigned char)((t >> 16) & 0xff);
+                A[4] ^= (unsigned char)((t >> 24) & 0xff);
+            }
+            memcpy(R, B + 8, 8);
+        }
+    }
+    memcpy(out, A, 8);
+    return inlen + 8;
+}
+
+size_t CRYPTO_128_unwrap(void *key, const unsigned char *iv,
+                         unsigned char *out,
+                         const unsigned char *in, size_t inlen,
+                         block128_f block)
+{
+    unsigned char *A, B[16], *R;
+    size_t i, j, t;
+    inlen -= 8;
+    if ((inlen & 0x7) || (inlen < 16) || (inlen > CRYPTO128_WRAP_MAX))
+        return 0;
+    A = B;
+    t = 6 * (inlen >> 3);
+    memcpy(A, in, 8);
+    memcpy(out, in + 8, inlen);
+    for (j = 0; j < 6; j++) {
+        R = out + inlen - 8;
+        for (i = 0; i < inlen; i += 8, t--, R -= 8) {
+            A[7] ^= (unsigned char)(t & 0xff);
+            if (t > 0xff) {
+                A[6] ^= (unsigned char)((t >> 8) & 0xff);
+                A[5] ^= (unsigned char)((t >> 16) & 0xff);
+                A[4] ^= (unsigned char)((t >> 24) & 0xff);
+            }
+            memcpy(B + 8, R, 8);
+            block(B, B, key);
+            memcpy(R, B + 8, 8);
+        }
+    }
+    if (!iv)
+        iv = default_iv;
+    if (memcmp(A, iv, 8)) {
+        OPENSSL_cleanse(out, inlen);
+        return 0;
+    }
+    return inlen;
+}
index b23ef32..4e2d096 100644 (file)
@@ -62,7 +62,7 @@
 #include "o_str.h"
 
 #if !defined(OPENSSL_IMPLEMENTS_strncasecmp) && \
-    !defined(OPENSSL_SYSNAME_WIN32) && \
+    !defined(OPENSSL_SYSNAME_WIN32) && !defined(OPENSSL_SYSNAME_WINCE) && \
     !defined(NETWARE_CLIB)
 # include <strings.h>
 #endif
index e18b71d..58413fe 100644 (file)
@@ -246,9 +246,73 @@ struct tm *OPENSSL_gmtime(const time_t *timer, struct tm *result)
 
 static long date_to_julian(int y, int m, int d);
 static void julian_to_date(long jd, int *y, int *m, int *d);
+static int julian_adj(const struct tm *tm, int off_day, long offset_sec,
+                      long *pday, int *psec);
 
 int OPENSSL_gmtime_adj(struct tm *tm, int off_day, long offset_sec)
 {
+    int time_sec, time_year, time_month, time_day;
+    long time_jd;
+
+    /* Convert time and offset into julian day and seconds */
+    if (!julian_adj(tm, off_day, offset_sec, &time_jd, &time_sec))
+        return 0;
+
+    /* Convert Julian day back to date */
+
+    julian_to_date(time_jd, &time_year, &time_month, &time_day);
+
+    if (time_year < 1900 || time_year > 9999)
+        return 0;
+
+    /* Update tm structure */
+
+    tm->tm_year = time_year - 1900;
+    tm->tm_mon = time_month - 1;
+    tm->tm_mday = time_day;
+
+    tm->tm_hour = time_sec / 3600;
+    tm->tm_min = (time_sec / 60) % 60;
+    tm->tm_sec = time_sec % 60;
+
+    return 1;
+
+}
+
+int OPENSSL_gmtime_diff(int *pday, int *psec,
+                        const struct tm *from, const struct tm *to)
+{
+    int from_sec, to_sec, diff_sec;
+    long from_jd, to_jd, diff_day;
+    if (!julian_adj(from, 0, 0, &from_jd, &from_sec))
+        return 0;
+    if (!julian_adj(to, 0, 0, &to_jd, &to_sec))
+        return 0;
+    diff_day = to_jd - from_jd;
+    diff_sec = to_sec - from_sec;
+    /* Adjust differences so both positive or both negative */
+    if (diff_day > 0 && diff_sec < 0) {
+        diff_day--;
+        diff_sec += SECS_PER_DAY;
+    }
+    if (diff_day < 0 && diff_sec > 0) {
+        diff_day++;
+        diff_sec -= SECS_PER_DAY;
+    }
+
+    if (pday)
+        *pday = (int)diff_day;
+    if (psec)
+        *psec = diff_sec;
+
+    return 1;
+
+}
+
+/* Convert tm structure and offset into julian day and seconds */
+static int julian_adj(const struct tm *tm, int off_day, long offset_sec,
+                      long *pday, int *psec)
+{
     int offset_hms, offset_day;
     long time_jd;
     int time_year, time_month, time_day;
@@ -284,25 +348,9 @@ int OPENSSL_gmtime_adj(struct tm *tm, int off_day, long offset_sec)
     if (time_jd < 0)
         return 0;
 
-    /* Convert Julian day back to date */
-
-    julian_to_date(time_jd, &time_year, &time_month, &time_day);
-
-    if (time_year < 1900 || time_year > 9999)
-        return 0;
-
-    /* Update tm structure */
-
-    tm->tm_year = time_year - 1900;
-    tm->tm_mon = time_month - 1;
-    tm->tm_mday = time_day;
-
-    tm->tm_hour = offset_hms / 3600;
-    tm->tm_min = (offset_hms / 60) % 60;
-    tm->tm_sec = offset_hms % 60;
-
+    *pday = time_jd;
+    *psec = offset_hms;
     return 1;
-
 }
 
 /*
@@ -354,27 +402,39 @@ int main(int argc, char **argv)
 
 int check_time(long offset)
 {
-    struct tm tm1, tm2;
+    struct tm tm1, tm2, o1;
+    int off_day, off_sec;
+    long toffset;
     time_t t1, t2;
     time(&t1);
     t2 = t1 + offset;
     OPENSSL_gmtime(&t2, &tm2);
     OPENSSL_gmtime(&t1, &tm1);
+    o1 = tm1;
     OPENSSL_gmtime_adj(&tm1, 0, offset);
-    if ((tm1.tm_year == tm2.tm_year) &&
-        (tm1.tm_mon == tm2.tm_mon) &&
-        (tm1.tm_mday == tm2.tm_mday) &&
-        (tm1.tm_hour == tm2.tm_hour) &&
-        (tm1.tm_min == tm2.tm_min) && (tm1.tm_sec == tm2.tm_sec))
-        return 1;
-    fprintf(stderr, "TIME ERROR!!\n");
-    fprintf(stderr, "Time1: %d/%d/%d, %d:%02d:%02d\n",
-            tm2.tm_mday, tm2.tm_mon + 1, tm2.tm_year + 1900,
-            tm2.tm_hour, tm2.tm_min, tm2.tm_sec);
-    fprintf(stderr, "Time2: %d/%d/%d, %d:%02d:%02d\n",
-            tm1.tm_mday, tm1.tm_mon + 1, tm1.tm_year + 1900,
-            tm1.tm_hour, tm1.tm_min, tm1.tm_sec);
-    return 0;
+    if ((tm1.tm_year != tm2.tm_year) ||
+        (tm1.tm_mon != tm2.tm_mon) ||
+        (tm1.tm_mday != tm2.tm_mday) ||
+        (tm1.tm_hour != tm2.tm_hour) ||
+        (tm1.tm_min != tm2.tm_min) || (tm1.tm_sec != tm2.tm_sec)) {
+        fprintf(stderr, "TIME ERROR!!\n");
+        fprintf(stderr, "Time1: %d/%d/%d, %d:%02d:%02d\n",
+                tm2.tm_mday, tm2.tm_mon + 1, tm2.tm_year + 1900,
+                tm2.tm_hour, tm2.tm_min, tm2.tm_sec);
+        fprintf(stderr, "Time2: %d/%d/%d, %d:%02d:%02d\n",
+                tm1.tm_mday, tm1.tm_mon + 1, tm1.tm_year + 1900,
+                tm1.tm_hour, tm1.tm_min, tm1.tm_sec);
+        return 0;
+    }
+    OPENSSL_gmtime_diff(&o1, &tm1, &off_day, &off_sec);
+    toffset = (long)off_day *SECS_PER_DAY + off_sec;
+    if (offset != toffset) {
+        fprintf(stderr, "TIME OFFSET ERROR!!\n");
+        fprintf(stderr, "Expected %ld, Got %ld (%d:%d)\n",
+                offset, toffset, off_day, off_sec);
+        return 0;
+    }
+    return 1;
 }
 
 #endif
index 901b200..a83a3d2 100644 (file)
@@ -64,5 +64,7 @@
 
 struct tm *OPENSSL_gmtime(const time_t *timer, struct tm *result);
 int OPENSSL_gmtime_adj(struct tm *tm, int offset_day, long offset_sec);
+int OPENSSL_gmtime_diff(int *pday, int *psec,
+                        const struct tm *from, const struct tm *to);
 
 #endif
index bc69665..b7e3cf2 100644 (file)
  * [including the GNU Public Licence.]
  */
 
-#define NUM_NID 920
-#define NUM_SN 913
-#define NUM_LN 913
-#define NUM_OBJ 857
+#define NUM_NID 958
+#define NUM_SN 951
+#define NUM_LN 951
+#define NUM_OBJ 890
 
-static const unsigned char lvalues[5974]={
+static const unsigned char lvalues[6255]={
 0x2A,0x86,0x48,0x86,0xF7,0x0D,               /* [  0] OBJ_rsadsi */
 0x2A,0x86,0x48,0x86,0xF7,0x0D,0x01,          /* [  6] OBJ_pkcs */
 0x2A,0x86,0x48,0x86,0xF7,0x0D,0x02,0x02,     /* [ 13] OBJ_md2 */
@@ -919,6 +919,39 @@ static const unsigned char lvalues[5974]={
 0x2A,0x86,0x48,0x86,0xF7,0x0D,0x01,0x01,0x08,/* [5946] OBJ_mgf1 */
 0x2A,0x86,0x48,0x86,0xF7,0x0D,0x01,0x01,0x0A,/* [5955] OBJ_rsassaPss */
 0x2A,0x86,0x48,0x86,0xF7,0x0D,0x01,0x01,0x07,/* [5964] OBJ_rsaesOaep */
+0x2A,0x86,0x48,0xCE,0x3E,0x02,0x01,          /* [5973] OBJ_dhpublicnumber */
+0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x01,/* [5980] OBJ_brainpoolP160r1 */
+0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x02,/* [5989] OBJ_brainpoolP160t1 */
+0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x03,/* [5998] OBJ_brainpoolP192r1 */
+0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x04,/* [6007] OBJ_brainpoolP192t1 */
+0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x05,/* [6016] OBJ_brainpoolP224r1 */
+0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x06,/* [6025] OBJ_brainpoolP224t1 */
+0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x07,/* [6034] OBJ_brainpoolP256r1 */
+0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x08,/* [6043] OBJ_brainpoolP256t1 */
+0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x09,/* [6052] OBJ_brainpoolP320r1 */
+0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x0A,/* [6061] OBJ_brainpoolP320t1 */
+0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x0B,/* [6070] OBJ_brainpoolP384r1 */
+0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x0C,/* [6079] OBJ_brainpoolP384t1 */
+0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x0D,/* [6088] OBJ_brainpoolP512r1 */
+0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x0E,/* [6097] OBJ_brainpoolP512t1 */
+0x2A,0x86,0x48,0x86,0xF7,0x0D,0x01,0x01,0x09,/* [6106] OBJ_pSpecified */
+0x2B,0x81,0x05,0x10,0x86,0x48,0x3F,0x00,0x02,/* [6115] OBJ_dhSinglePass_stdDH_sha1kdf_scheme */
+0x2B,0x81,0x04,0x01,0x0B,0x00,               /* [6124] OBJ_dhSinglePass_stdDH_sha224kdf_scheme */
+0x2B,0x81,0x04,0x01,0x0B,0x01,               /* [6130] OBJ_dhSinglePass_stdDH_sha256kdf_scheme */
+0x2B,0x81,0x04,0x01,0x0B,0x02,               /* [6136] OBJ_dhSinglePass_stdDH_sha384kdf_scheme */
+0x2B,0x81,0x04,0x01,0x0B,0x03,               /* [6142] OBJ_dhSinglePass_stdDH_sha512kdf_scheme */
+0x2B,0x81,0x05,0x10,0x86,0x48,0x3F,0x00,0x03,/* [6148] OBJ_dhSinglePass_cofactorDH_sha1kdf_scheme */
+0x2B,0x81,0x04,0x01,0x0E,0x00,               /* [6157] OBJ_dhSinglePass_cofactorDH_sha224kdf_scheme */
+0x2B,0x81,0x04,0x01,0x0E,0x01,               /* [6163] OBJ_dhSinglePass_cofactorDH_sha256kdf_scheme */
+0x2B,0x81,0x04,0x01,0x0E,0x02,               /* [6169] OBJ_dhSinglePass_cofactorDH_sha384kdf_scheme */
+0x2B,0x81,0x04,0x01,0x0E,0x03,               /* [6175] OBJ_dhSinglePass_cofactorDH_sha512kdf_scheme */
+0x2B,0x06,0x01,0x04,0x01,0xD6,0x79,0x02,0x04,0x02,/* [6181] OBJ_ct_precert_scts */
+0x2B,0x06,0x01,0x04,0x01,0xD6,0x79,0x02,0x04,0x03,/* [6191] OBJ_ct_precert_poison */
+0x2B,0x06,0x01,0x04,0x01,0xD6,0x79,0x02,0x04,0x04,/* [6201] OBJ_ct_precert_signer */
+0x2B,0x06,0x01,0x04,0x01,0xD6,0x79,0x02,0x04,0x05,/* [6211] OBJ_ct_cert_scts */
+0x2B,0x06,0x01,0x04,0x01,0x82,0x37,0x3C,0x02,0x01,0x01,/* [6221] OBJ_jurisdictionLocalityName */
+0x2B,0x06,0x01,0x04,0x01,0x82,0x37,0x3C,0x02,0x01,0x02,/* [6232] OBJ_jurisdictionStateOrProvinceName */
+0x2B,0x06,0x01,0x04,0x01,0x82,0x37,0x3C,0x02,0x01,0x03,/* [6243] OBJ_jurisdictionCountryName */
 };
 
 static const ASN1_OBJECT nid_objs[NUM_NID]={
@@ -2399,12 +2432,95 @@ static const ASN1_OBJECT nid_objs[NUM_NID]={
 {"AES-256-CBC-HMAC-SHA1","aes-256-cbc-hmac-sha1",
        NID_aes_256_cbc_hmac_sha1,0,NULL,0},
 {"RSAES-OAEP","rsaesOaep",NID_rsaesOaep,9,&(lvalues[5964]),0},
+{"dhpublicnumber","X9.42 DH",NID_dhpublicnumber,7,&(lvalues[5973]),0},
+{"brainpoolP160r1","brainpoolP160r1",NID_brainpoolP160r1,9,
+       &(lvalues[5980]),0},
+{"brainpoolP160t1","brainpoolP160t1",NID_brainpoolP160t1,9,
+       &(lvalues[5989]),0},
+{"brainpoolP192r1","brainpoolP192r1",NID_brainpoolP192r1,9,
+       &(lvalues[5998]),0},
+{"brainpoolP192t1","brainpoolP192t1",NID_brainpoolP192t1,9,
+       &(lvalues[6007]),0},
+{"brainpoolP224r1","brainpoolP224r1",NID_brainpoolP224r1,9,
+       &(lvalues[6016]),0},
+{"brainpoolP224t1","brainpoolP224t1",NID_brainpoolP224t1,9,
+       &(lvalues[6025]),0},
+{"brainpoolP256r1","brainpoolP256r1",NID_brainpoolP256r1,9,
+       &(lvalues[6034]),0},
+{"brainpoolP256t1","brainpoolP256t1",NID_brainpoolP256t1,9,
+       &(lvalues[6043]),0},
+{"brainpoolP320r1","brainpoolP320r1",NID_brainpoolP320r1,9,
+       &(lvalues[6052]),0},
+{"brainpoolP320t1","brainpoolP320t1",NID_brainpoolP320t1,9,
+       &(lvalues[6061]),0},
+{"brainpoolP384r1","brainpoolP384r1",NID_brainpoolP384r1,9,
+       &(lvalues[6070]),0},
+{"brainpoolP384t1","brainpoolP384t1",NID_brainpoolP384t1,9,
+       &(lvalues[6079]),0},
+{"brainpoolP512r1","brainpoolP512r1",NID_brainpoolP512r1,9,
+       &(lvalues[6088]),0},
+{"brainpoolP512t1","brainpoolP512t1",NID_brainpoolP512t1,9,
+       &(lvalues[6097]),0},
+{"PSPECIFIED","pSpecified",NID_pSpecified,9,&(lvalues[6106]),0},
+{"dhSinglePass-stdDH-sha1kdf-scheme",
+       "dhSinglePass-stdDH-sha1kdf-scheme",
+       NID_dhSinglePass_stdDH_sha1kdf_scheme,9,&(lvalues[6115]),0},
+{"dhSinglePass-stdDH-sha224kdf-scheme",
+       "dhSinglePass-stdDH-sha224kdf-scheme",
+       NID_dhSinglePass_stdDH_sha224kdf_scheme,6,&(lvalues[6124]),0},
+{"dhSinglePass-stdDH-sha256kdf-scheme",
+       "dhSinglePass-stdDH-sha256kdf-scheme",
+       NID_dhSinglePass_stdDH_sha256kdf_scheme,6,&(lvalues[6130]),0},
+{"dhSinglePass-stdDH-sha384kdf-scheme",
+       "dhSinglePass-stdDH-sha384kdf-scheme",
+       NID_dhSinglePass_stdDH_sha384kdf_scheme,6,&(lvalues[6136]),0},
+{"dhSinglePass-stdDH-sha512kdf-scheme",
+       "dhSinglePass-stdDH-sha512kdf-scheme",
+       NID_dhSinglePass_stdDH_sha512kdf_scheme,6,&(lvalues[6142]),0},
+{"dhSinglePass-cofactorDH-sha1kdf-scheme",
+       "dhSinglePass-cofactorDH-sha1kdf-scheme",
+       NID_dhSinglePass_cofactorDH_sha1kdf_scheme,9,&(lvalues[6148]),0},
+{"dhSinglePass-cofactorDH-sha224kdf-scheme",
+       "dhSinglePass-cofactorDH-sha224kdf-scheme",
+       NID_dhSinglePass_cofactorDH_sha224kdf_scheme,6,&(lvalues[6157]),0},
+{"dhSinglePass-cofactorDH-sha256kdf-scheme",
+       "dhSinglePass-cofactorDH-sha256kdf-scheme",
+       NID_dhSinglePass_cofactorDH_sha256kdf_scheme,6,&(lvalues[6163]),0},
+{"dhSinglePass-cofactorDH-sha384kdf-scheme",
+       "dhSinglePass-cofactorDH-sha384kdf-scheme",
+       NID_dhSinglePass_cofactorDH_sha384kdf_scheme,6,&(lvalues[6169]),0},
+{"dhSinglePass-cofactorDH-sha512kdf-scheme",
+       "dhSinglePass-cofactorDH-sha512kdf-scheme",
+       NID_dhSinglePass_cofactorDH_sha512kdf_scheme,6,&(lvalues[6175]),0},
+{"dh-std-kdf","dh-std-kdf",NID_dh_std_kdf,0,NULL,0},
+{"dh-cofactor-kdf","dh-cofactor-kdf",NID_dh_cofactor_kdf,0,NULL,0},
+{"AES-128-CBC-HMAC-SHA256","aes-128-cbc-hmac-sha256",
+       NID_aes_128_cbc_hmac_sha256,0,NULL,0},
+{"AES-192-CBC-HMAC-SHA256","aes-192-cbc-hmac-sha256",
+       NID_aes_192_cbc_hmac_sha256,0,NULL,0},
+{"AES-256-CBC-HMAC-SHA256","aes-256-cbc-hmac-sha256",
+       NID_aes_256_cbc_hmac_sha256,0,NULL,0},
+{"ct_precert_scts","CT Precertificate SCTs",NID_ct_precert_scts,10,
+       &(lvalues[6181]),0},
+{"ct_precert_poison","CT Precertificate Poison",NID_ct_precert_poison,
+       10,&(lvalues[6191]),0},
+{"ct_precert_signer","CT Precertificate Signer",NID_ct_precert_signer,
+       10,&(lvalues[6201]),0},
+{"ct_cert_scts","CT Certificate SCTs",NID_ct_cert_scts,10,
+       &(lvalues[6211]),0},
+{"jurisdictionL","jurisdictionLocalityName",
+       NID_jurisdictionLocalityName,11,&(lvalues[6221]),0},
+{"jurisdictionST","jurisdictionStateOrProvinceName",
+       NID_jurisdictionStateOrProvinceName,11,&(lvalues[6232]),0},
+{"jurisdictionC","jurisdictionCountryName",
+       NID_jurisdictionCountryName,11,&(lvalues[6243]),0},
 };
 
 static const unsigned int sn_objs[NUM_SN]={
 364,   /* "AD_DVCS" */
 419,   /* "AES-128-CBC" */
 916,   /* "AES-128-CBC-HMAC-SHA1" */
+948,   /* "AES-128-CBC-HMAC-SHA256" */
 421,   /* "AES-128-CFB" */
 650,   /* "AES-128-CFB1" */
 653,   /* "AES-128-CFB8" */
@@ -2414,6 +2530,7 @@ static const unsigned int sn_objs[NUM_SN]={
 913,   /* "AES-128-XTS" */
 423,   /* "AES-192-CBC" */
 917,   /* "AES-192-CBC-HMAC-SHA1" */
+949,   /* "AES-192-CBC-HMAC-SHA256" */
 425,   /* "AES-192-CFB" */
 651,   /* "AES-192-CFB1" */
 654,   /* "AES-192-CFB8" */
@@ -2422,6 +2539,7 @@ static const unsigned int sn_objs[NUM_SN]={
 424,   /* "AES-192-OFB" */
 427,   /* "AES-256-CBC" */
 918,   /* "AES-256-CBC-HMAC-SHA1" */
+950,   /* "AES-256-CBC-HMAC-SHA256" */
 429,   /* "AES-256-CFB" */
 652,   /* "AES-256-CFB1" */
 655,   /* "AES-256-CFB8" */
@@ -2537,6 +2655,7 @@ static const unsigned int sn_objs[NUM_SN]={
 69,    /* "PBKDF2" */
 162,   /* "PBMAC1" */
 127,   /* "PKIX" */
+935,   /* "PSPECIFIED" */
 98,    /* "RC2-40-CBC" */
 166,   /* "RC2-64-CBC" */
 37,    /* "RC2-CBC" */
@@ -2613,6 +2732,20 @@ static const unsigned int sn_objs[NUM_SN]={
 87,    /* "basicConstraints" */
 365,   /* "basicOCSPResponse" */
 285,   /* "biometricInfo" */
+921,   /* "brainpoolP160r1" */
+922,   /* "brainpoolP160t1" */
+923,   /* "brainpoolP192r1" */
+924,   /* "brainpoolP192t1" */
+925,   /* "brainpoolP224r1" */
+926,   /* "brainpoolP224t1" */
+927,   /* "brainpoolP256r1" */
+928,   /* "brainpoolP256t1" */
+929,   /* "brainpoolP320r1" */
+930,   /* "brainpoolP320t1" */
+931,   /* "brainpoolP384r1" */
+932,   /* "brainpoolP384t1" */
+933,   /* "brainpoolP512r1" */
+934,   /* "brainpoolP512t1" */
 494,   /* "buildingName" */
 860,   /* "businessCategory" */
 691,   /* "c2onb191v4" */
@@ -2658,6 +2791,10 @@ static const unsigned int sn_objs[NUM_SN]={
 884,   /* "crossCertificatePair" */
 806,   /* "cryptocom" */
 805,   /* "cryptopro" */
+954,   /* "ct_cert_scts" */
+952,   /* "ct_precert_poison" */
+951,   /* "ct_precert_scts" */
+953,   /* "ct_precert_signer" */
 500,   /* "dITRedirect" */
 451,   /* "dNSDomain" */
 495,   /* "dSAQuality" */
@@ -2667,7 +2804,20 @@ static const unsigned int sn_objs[NUM_SN]={
 891,   /* "deltaRevocationList" */
 107,   /* "description" */
 871,   /* "destinationIndicator" */
+947,   /* "dh-cofactor-kdf" */
+946,   /* "dh-std-kdf" */
 28,    /* "dhKeyAgreement" */
+941,   /* "dhSinglePass-cofactorDH-sha1kdf-scheme" */
+942,   /* "dhSinglePass-cofactorDH-sha224kdf-scheme" */
+943,   /* "dhSinglePass-cofactorDH-sha256kdf-scheme" */
+944,   /* "dhSinglePass-cofactorDH-sha384kdf-scheme" */
+945,   /* "dhSinglePass-cofactorDH-sha512kdf-scheme" */
+936,   /* "dhSinglePass-stdDH-sha1kdf-scheme" */
+937,   /* "dhSinglePass-stdDH-sha224kdf-scheme" */
+938,   /* "dhSinglePass-stdDH-sha256kdf-scheme" */
+939,   /* "dhSinglePass-stdDH-sha384kdf-scheme" */
+940,   /* "dhSinglePass-stdDH-sha512kdf-scheme" */
+920,   /* "dhpublicnumber" */
 382,   /* "directory" */
 887,   /* "distinguishedName" */
 892,   /* "dmdName" */
@@ -2978,6 +3128,9 @@ static const unsigned int sn_objs[NUM_SN]={
 86,    /* "issuerAltName" */
 770,   /* "issuingDistributionPoint" */
 492,   /* "janetMailbox" */
+957,   /* "jurisdictionC" */
+955,   /* "jurisdictionL" */
+956,   /* "jurisdictionST" */
 150,   /* "keyBag" */
 83,    /* "keyUsage" */
 477,   /* "lastModifiedBy" */
@@ -3328,6 +3481,10 @@ static const unsigned int ln_objs[NUM_LN]={
 285,   /* "Biometric Info" */
 179,   /* "CA Issuers" */
 785,   /* "CA Repository" */
+954,   /* "CT Certificate SCTs" */
+952,   /* "CT Precertificate Poison" */
+951,   /* "CT Precertificate SCTs" */
+953,   /* "CT Precertificate Signer" */
 131,   /* "Code Signing" */
 783,   /* "Diffie-Hellman based MAC" */
 382,   /* "Directory" */
@@ -3451,6 +3608,7 @@ static const unsigned int ln_objs[NUM_LN]={
 85,    /* "X509v3 Subject Alternative Name" */
 769,   /* "X509v3 Subject Directory Attributes" */
 82,    /* "X509v3 Subject Key Identifier" */
+920,   /* "X9.42 DH" */
 184,   /* "X9.57" */
 185,   /* "X9.57 CM ?" */
 478,   /* "aRecord" */
@@ -3463,6 +3621,7 @@ static const unsigned int ln_objs[NUM_LN]={
 606,   /* "additional verification" */
 419,   /* "aes-128-cbc" */
 916,   /* "aes-128-cbc-hmac-sha1" */
+948,   /* "aes-128-cbc-hmac-sha256" */
 896,   /* "aes-128-ccm" */
 421,   /* "aes-128-cfb" */
 650,   /* "aes-128-cfb1" */
@@ -3474,6 +3633,7 @@ static const unsigned int ln_objs[NUM_LN]={
 913,   /* "aes-128-xts" */
 423,   /* "aes-192-cbc" */
 917,   /* "aes-192-cbc-hmac-sha1" */
+949,   /* "aes-192-cbc-hmac-sha256" */
 899,   /* "aes-192-ccm" */
 425,   /* "aes-192-cfb" */
 651,   /* "aes-192-cfb1" */
@@ -3484,6 +3644,7 @@ static const unsigned int ln_objs[NUM_LN]={
 424,   /* "aes-192-ofb" */
 427,   /* "aes-256-cbc" */
 918,   /* "aes-256-cbc-hmac-sha1" */
+950,   /* "aes-256-cbc-hmac-sha256" */
 902,   /* "aes-256-ccm" */
 429,   /* "aes-256-cfb" */
 652,   /* "aes-256-cfb1" */
@@ -3502,6 +3663,20 @@ static const unsigned int ln_objs[NUM_LN]={
 93,    /* "bf-cfb" */
 92,    /* "bf-ecb" */
 94,    /* "bf-ofb" */
+921,   /* "brainpoolP160r1" */
+922,   /* "brainpoolP160t1" */
+923,   /* "brainpoolP192r1" */
+924,   /* "brainpoolP192t1" */
+925,   /* "brainpoolP224r1" */
+926,   /* "brainpoolP224t1" */
+927,   /* "brainpoolP256r1" */
+928,   /* "brainpoolP256t1" */
+929,   /* "brainpoolP320r1" */
+930,   /* "brainpoolP320t1" */
+931,   /* "brainpoolP384r1" */
+932,   /* "brainpoolP384t1" */
+933,   /* "brainpoolP512r1" */
+934,   /* "brainpoolP512t1" */
 494,   /* "buildingName" */
 860,   /* "businessCategory" */
 691,   /* "c2onb191v4" */
@@ -3593,7 +3768,19 @@ static const unsigned int ln_objs[NUM_LN]={
 107,   /* "description" */
 871,   /* "destinationIndicator" */
 80,    /* "desx-cbc" */
+947,   /* "dh-cofactor-kdf" */
+946,   /* "dh-std-kdf" */
 28,    /* "dhKeyAgreement" */
+941,   /* "dhSinglePass-cofactorDH-sha1kdf-scheme" */
+942,   /* "dhSinglePass-cofactorDH-sha224kdf-scheme" */
+943,   /* "dhSinglePass-cofactorDH-sha256kdf-scheme" */
+944,   /* "dhSinglePass-cofactorDH-sha384kdf-scheme" */
+945,   /* "dhSinglePass-cofactorDH-sha512kdf-scheme" */
+936,   /* "dhSinglePass-stdDH-sha1kdf-scheme" */
+937,   /* "dhSinglePass-stdDH-sha224kdf-scheme" */
+938,   /* "dhSinglePass-stdDH-sha256kdf-scheme" */
+939,   /* "dhSinglePass-stdDH-sha384kdf-scheme" */
+940,   /* "dhSinglePass-stdDH-sha512kdf-scheme" */
 11,    /* "directory services (X.500)" */
 378,   /* "directory services - algorithms" */
 887,   /* "distinguishedName" */
@@ -3881,6 +4068,9 @@ static const unsigned int ln_objs[NUM_LN]={
 645,   /* "itu-t" */
 492,   /* "janetMailbox" */
 646,   /* "joint-iso-itu-t" */
+957,   /* "jurisdictionCountryName" */
+955,   /* "jurisdictionLocalityName" */
+956,   /* "jurisdictionStateOrProvinceName" */
 150,   /* "keyBag" */
 773,   /* "kisa" */
 477,   /* "lastModifiedBy" */
@@ -3917,6 +4107,7 @@ static const unsigned int ln_objs[NUM_LN]={
 18,    /* "organizationalUnitName" */
 475,   /* "otherMailbox" */
 876,   /* "owner" */
+935,   /* "pSpecified" */
 489,   /* "pagerTelephoneNumber" */
 782,   /* "password based MAC" */
 374,   /* "path" */
@@ -4560,6 +4751,14 @@ static const unsigned int obj_objs[NUM_OBJ]={
 505,   /* OBJ_mime_mhs_headings            1 3 6 1 7 1 1 */
 506,   /* OBJ_mime_mhs_bodies              1 3 6 1 7 1 2 */
 119,   /* OBJ_ripemd160WithRSA             1 3 36 3 3 1 2 */
+937,   /* OBJ_dhSinglePass_stdDH_sha224kdf_scheme 1 3 132 1 11 0 */
+938,   /* OBJ_dhSinglePass_stdDH_sha256kdf_scheme 1 3 132 1 11 1 */
+939,   /* OBJ_dhSinglePass_stdDH_sha384kdf_scheme 1 3 132 1 11 2 */
+940,   /* OBJ_dhSinglePass_stdDH_sha512kdf_scheme 1 3 132 1 11 3 */
+942,   /* OBJ_dhSinglePass_cofactorDH_sha224kdf_scheme 1 3 132 1 14 0 */
+943,   /* OBJ_dhSinglePass_cofactorDH_sha256kdf_scheme 1 3 132 1 14 1 */
+944,   /* OBJ_dhSinglePass_cofactorDH_sha384kdf_scheme 1 3 132 1 14 2 */
+945,   /* OBJ_dhSinglePass_cofactorDH_sha512kdf_scheme 1 3 132 1 14 3 */
 631,   /* OBJ_setAttr_GenCryptgrm          2 23 42 3 3 3 1 */
 632,   /* OBJ_setAttr_T2Enc                2 23 42 3 3 4 1 */
 633,   /* OBJ_setAttr_T2cleartxt           2 23 42 3 3 4 2 */
@@ -4608,6 +4807,7 @@ static const unsigned int obj_objs[NUM_OBJ]={
 416,   /* OBJ_ecdsa_with_SHA1              1 2 840 10045 4 1 */
 791,   /* OBJ_ecdsa_with_Recommended       1 2 840 10045 4 2 */
 792,   /* OBJ_ecdsa_with_Specified         1 2 840 10045 4 3 */
+920,   /* OBJ_dhpublicnumber               1 2 840 10046 2 1 */
 258,   /* OBJ_id_pkix_mod                  1 3 6 1 5 5 7 0 */
 175,   /* OBJ_id_pe                        1 3 6 1 5 5 7 1 */
 259,   /* OBJ_id_qt                        1 3 6 1 5 5 7 2 */
@@ -4825,6 +5025,7 @@ static const unsigned int obj_objs[NUM_OBJ]={
 644,   /* OBJ_rsaOAEPEncryptionSET         1 2 840 113549 1 1 6 */
 919,   /* OBJ_rsaesOaep                    1 2 840 113549 1 1 7 */
 911,   /* OBJ_mgf1                         1 2 840 113549 1 1 8 */
+935,   /* OBJ_pSpecified                   1 2 840 113549 1 1 9 */
 912,   /* OBJ_rsassaPss                    1 2 840 113549 1 1 10 */
 668,   /* OBJ_sha256WithRSAEncryption      1 2 840 113549 1 1 11 */
 669,   /* OBJ_sha384WithRSAEncryption      1 2 840 113549 1 1 12 */
@@ -4886,6 +5087,22 @@ static const unsigned int obj_objs[NUM_OBJ]={
 373,   /* OBJ_id_pkix_OCSP_valid           1 3 6 1 5 5 7 48 1 9 */
 374,   /* OBJ_id_pkix_OCSP_path            1 3 6 1 5 5 7 48 1 10 */
 375,   /* OBJ_id_pkix_OCSP_trustRoot       1 3 6 1 5 5 7 48 1 11 */
+921,   /* OBJ_brainpoolP160r1              1 3 36 3 3 2 8 1 1 1 */
+922,   /* OBJ_brainpoolP160t1              1 3 36 3 3 2 8 1 1 2 */
+923,   /* OBJ_brainpoolP192r1              1 3 36 3 3 2 8 1 1 3 */
+924,   /* OBJ_brainpoolP192t1              1 3 36 3 3 2 8 1 1 4 */
+925,   /* OBJ_brainpoolP224r1              1 3 36 3 3 2 8 1 1 5 */
+926,   /* OBJ_brainpoolP224t1              1 3 36 3 3 2 8 1 1 6 */
+927,   /* OBJ_brainpoolP256r1              1 3 36 3 3 2 8 1 1 7 */
+928,   /* OBJ_brainpoolP256t1              1 3 36 3 3 2 8 1 1 8 */
+929,   /* OBJ_brainpoolP320r1              1 3 36 3 3 2 8 1 1 9 */
+930,   /* OBJ_brainpoolP320t1              1 3 36 3 3 2 8 1 1 10 */
+931,   /* OBJ_brainpoolP384r1              1 3 36 3 3 2 8 1 1 11 */
+932,   /* OBJ_brainpoolP384t1              1 3 36 3 3 2 8 1 1 12 */
+933,   /* OBJ_brainpoolP512r1              1 3 36 3 3 2 8 1 1 13 */
+934,   /* OBJ_brainpoolP512t1              1 3 36 3 3 2 8 1 1 14 */
+936,   /* OBJ_dhSinglePass_stdDH_sha1kdf_scheme 1 3 133 16 840 63 0 2 */
+941,   /* OBJ_dhSinglePass_cofactorDH_sha1kdf_scheme 1 3 133 16 840 63 0 3 */
 418,   /* OBJ_aes_128_ecb                  2 16 840 1 101 3 4 1 1 */
 419,   /* OBJ_aes_128_cbc                  2 16 840 1 101 3 4 1 2 */
 420,   /* OBJ_aes_128_ofb128               2 16 840 1 101 3 4 1 3 */
@@ -5013,6 +5230,10 @@ static const unsigned int obj_objs[NUM_OBJ]={
 138,   /* OBJ_ms_efs                       1 3 6 1 4 1 311 10 3 4 */
 648,   /* OBJ_ms_smartcard_login           1 3 6 1 4 1 311 20 2 2 */
 649,   /* OBJ_ms_upn                       1 3 6 1 4 1 311 20 2 3 */
+951,   /* OBJ_ct_precert_scts              1 3 6 1 4 1 11129 2 4 2 */
+952,   /* OBJ_ct_precert_poison            1 3 6 1 4 1 11129 2 4 3 */
+953,   /* OBJ_ct_precert_signer            1 3 6 1 4 1 11129 2 4 4 */
+954,   /* OBJ_ct_cert_scts                 1 3 6 1 4 1 11129 2 4 5 */
 751,   /* OBJ_camellia_128_cbc             1 2 392 200011 61 1 1 1 2 */
 752,   /* OBJ_camellia_192_cbc             1 2 392 200011 61 1 1 1 3 */
 753,   /* OBJ_camellia_256_cbc             1 2 392 200011 61 1 1 1 4 */
@@ -5091,5 +5312,8 @@ static const unsigned int obj_objs[NUM_OBJ]={
 154,   /* OBJ_secretBag                    1 2 840 113549 1 12 10 1 5 */
 155,   /* OBJ_safeContentsBag              1 2 840 113549 1 12 10 1 6 */
 34,    /* OBJ_idea_cbc                     1 3 6 1 4 1 188 7 1 1 2 */
+955,   /* OBJ_jurisdictionLocalityName     1 3 6 1 4 1 311 60 2 1 1 */
+956,   /* OBJ_jurisdictionStateOrProvinceName 1 3 6 1 4 1 311 60 2 1 2 */
+957,   /* OBJ_jurisdictionCountryName      1 3 6 1 4 1 311 60 2 1 3 */
 };
 
index f752aef..779c309 100644 (file)
 #define NID_mgf1                911
 #define OBJ_mgf1                OBJ_pkcs1,8L
 
+#define SN_pSpecified           "PSPECIFIED"
+#define LN_pSpecified           "pSpecified"
+#define NID_pSpecified          935
+#define OBJ_pSpecified          OBJ_pkcs1,9L
+
 #define SN_rsassaPss            "RSASSA-PSS"
 #define LN_rsassaPss            "rsassaPss"
 #define NID_rsassaPss           912
 #define SN_aes_256_cbc_hmac_sha1                "AES-256-CBC-HMAC-SHA1"
 #define LN_aes_256_cbc_hmac_sha1                "aes-256-cbc-hmac-sha1"
 #define NID_aes_256_cbc_hmac_sha1               918
+
+#define SN_aes_128_cbc_hmac_sha256              "AES-128-CBC-HMAC-SHA256"
+#define LN_aes_128_cbc_hmac_sha256              "aes-128-cbc-hmac-sha256"
+#define NID_aes_128_cbc_hmac_sha256             948
+
+#define SN_aes_192_cbc_hmac_sha256              "AES-192-CBC-HMAC-SHA256"
+#define LN_aes_192_cbc_hmac_sha256              "aes-192-cbc-hmac-sha256"
+#define NID_aes_192_cbc_hmac_sha256             949
+
+#define SN_aes_256_cbc_hmac_sha256              "AES-256-CBC-HMAC-SHA256"
+#define LN_aes_256_cbc_hmac_sha256              "aes-256-cbc-hmac-sha256"
+#define NID_aes_256_cbc_hmac_sha256             950
+
+#define SN_dhpublicnumber               "dhpublicnumber"
+#define LN_dhpublicnumber               "X9.42 DH"
+#define NID_dhpublicnumber              920
+#define OBJ_dhpublicnumber              OBJ_ISO_US,10046L,2L,1L
+
+#define SN_brainpoolP160r1              "brainpoolP160r1"
+#define NID_brainpoolP160r1             921
+#define OBJ_brainpoolP160r1             1L,3L,36L,3L,3L,2L,8L,1L,1L,1L
+
+#define SN_brainpoolP160t1              "brainpoolP160t1"
+#define NID_brainpoolP160t1             922
+#define OBJ_brainpoolP160t1             1L,3L,36L,3L,3L,2L,8L,1L,1L,2L
+
+#define SN_brainpoolP192r1              "brainpoolP192r1"
+#define NID_brainpoolP192r1             923
+#define OBJ_brainpoolP192r1             1L,3L,36L,3L,3L,2L,8L,1L,1L,3L
+
+#define SN_brainpoolP192t1              "brainpoolP192t1"
+#define NID_brainpoolP192t1             924
+#define OBJ_brainpoolP192t1             1L,3L,36L,3L,3L,2L,8L,1L,1L,4L
+
+#define SN_brainpoolP224r1              "brainpoolP224r1"
+#define NID_brainpoolP224r1             925
+#define OBJ_brainpoolP224r1             1L,3L,36L,3L,3L,2L,8L,1L,1L,5L
+
+#define SN_brainpoolP224t1              "brainpoolP224t1"
+#define NID_brainpoolP224t1             926
+#define OBJ_brainpoolP224t1             1L,3L,36L,3L,3L,2L,8L,1L,1L,6L
+
+#define SN_brainpoolP256r1              "brainpoolP256r1"
+#define NID_brainpoolP256r1             927
+#define OBJ_brainpoolP256r1             1L,3L,36L,3L,3L,2L,8L,1L,1L,7L
+
+#define SN_brainpoolP256t1              "brainpoolP256t1"
+#define NID_brainpoolP256t1             928
+#define OBJ_brainpoolP256t1             1L,3L,36L,3L,3L,2L,8L,1L,1L,8L
+
+#define SN_brainpoolP320r1              "brainpoolP320r1"
+#define NID_brainpoolP320r1             929
+#define OBJ_brainpoolP320r1             1L,3L,36L,3L,3L,2L,8L,1L,1L,9L
+
+#define SN_brainpoolP320t1              "brainpoolP320t1"
+#define NID_brainpoolP320t1             930
+#define OBJ_brainpoolP320t1             1L,3L,36L,3L,3L,2L,8L,1L,1L,10L
+
+#define SN_brainpoolP384r1              "brainpoolP384r1"
+#define NID_brainpoolP384r1             931
+#define OBJ_brainpoolP384r1             1L,3L,36L,3L,3L,2L,8L,1L,1L,11L
+
+#define SN_brainpoolP384t1              "brainpoolP384t1"
+#define NID_brainpoolP384t1             932
+#define OBJ_brainpoolP384t1             1L,3L,36L,3L,3L,2L,8L,1L,1L,12L
+
+#define SN_brainpoolP512r1              "brainpoolP512r1"
+#define NID_brainpoolP512r1             933
+#define OBJ_brainpoolP512r1             1L,3L,36L,3L,3L,2L,8L,1L,1L,13L
+
+#define SN_brainpoolP512t1              "brainpoolP512t1"
+#define NID_brainpoolP512t1             934
+#define OBJ_brainpoolP512t1             1L,3L,36L,3L,3L,2L,8L,1L,1L,14L
+
+#define OBJ_x9_63_scheme                1L,3L,133L,16L,840L,63L,0L
+
+#define OBJ_secg_scheme         OBJ_certicom_arc,1L
+
+#define SN_dhSinglePass_stdDH_sha1kdf_scheme            "dhSinglePass-stdDH-sha1kdf-scheme"
+#define NID_dhSinglePass_stdDH_sha1kdf_scheme           936
+#define OBJ_dhSinglePass_stdDH_sha1kdf_scheme           OBJ_x9_63_scheme,2L
+
+#define SN_dhSinglePass_stdDH_sha224kdf_scheme          "dhSinglePass-stdDH-sha224kdf-scheme"
+#define NID_dhSinglePass_stdDH_sha224kdf_scheme         937
+#define OBJ_dhSinglePass_stdDH_sha224kdf_scheme         OBJ_secg_scheme,11L,0L
+
+#define SN_dhSinglePass_stdDH_sha256kdf_scheme          "dhSinglePass-stdDH-sha256kdf-scheme"
+#define NID_dhSinglePass_stdDH_sha256kdf_scheme         938
+#define OBJ_dhSinglePass_stdDH_sha256kdf_scheme         OBJ_secg_scheme,11L,1L
+
+#define SN_dhSinglePass_stdDH_sha384kdf_scheme          "dhSinglePass-stdDH-sha384kdf-scheme"
+#define NID_dhSinglePass_stdDH_sha384kdf_scheme         939
+#define OBJ_dhSinglePass_stdDH_sha384kdf_scheme         OBJ_secg_scheme,11L,2L
+
+#define SN_dhSinglePass_stdDH_sha512kdf_scheme          "dhSinglePass-stdDH-sha512kdf-scheme"
+#define NID_dhSinglePass_stdDH_sha512kdf_scheme         940
+#define OBJ_dhSinglePass_stdDH_sha512kdf_scheme         OBJ_secg_scheme,11L,3L
+
+#define SN_dhSinglePass_cofactorDH_sha1kdf_scheme               "dhSinglePass-cofactorDH-sha1kdf-scheme"
+#define NID_dhSinglePass_cofactorDH_sha1kdf_scheme              941
+#define OBJ_dhSinglePass_cofactorDH_sha1kdf_scheme              OBJ_x9_63_scheme,3L
+
+#define SN_dhSinglePass_cofactorDH_sha224kdf_scheme             "dhSinglePass-cofactorDH-sha224kdf-scheme"
+#define NID_dhSinglePass_cofactorDH_sha224kdf_scheme            942
+#define OBJ_dhSinglePass_cofactorDH_sha224kdf_scheme            OBJ_secg_scheme,14L,0L
+
+#define SN_dhSinglePass_cofactorDH_sha256kdf_scheme             "dhSinglePass-cofactorDH-sha256kdf-scheme"
+#define NID_dhSinglePass_cofactorDH_sha256kdf_scheme            943
+#define OBJ_dhSinglePass_cofactorDH_sha256kdf_scheme            OBJ_secg_scheme,14L,1L
+
+#define SN_dhSinglePass_cofactorDH_sha384kdf_scheme             "dhSinglePass-cofactorDH-sha384kdf-scheme"
+#define NID_dhSinglePass_cofactorDH_sha384kdf_scheme            944
+#define OBJ_dhSinglePass_cofactorDH_sha384kdf_scheme            OBJ_secg_scheme,14L,2L
+
+#define SN_dhSinglePass_cofactorDH_sha512kdf_scheme             "dhSinglePass-cofactorDH-sha512kdf-scheme"
+#define NID_dhSinglePass_cofactorDH_sha512kdf_scheme            945
+#define OBJ_dhSinglePass_cofactorDH_sha512kdf_scheme            OBJ_secg_scheme,14L,3L
+
+#define SN_dh_std_kdf           "dh-std-kdf"
+#define NID_dh_std_kdf          946
+
+#define SN_dh_cofactor_kdf              "dh-cofactor-kdf"
+#define NID_dh_cofactor_kdf             947
+
+#define SN_ct_precert_scts              "ct_precert_scts"
+#define LN_ct_precert_scts              "CT Precertificate SCTs"
+#define NID_ct_precert_scts             951
+#define OBJ_ct_precert_scts             1L,3L,6L,1L,4L,1L,11129L,2L,4L,2L
+
+#define SN_ct_precert_poison            "ct_precert_poison"
+#define LN_ct_precert_poison            "CT Precertificate Poison"
+#define NID_ct_precert_poison           952
+#define OBJ_ct_precert_poison           1L,3L,6L,1L,4L,1L,11129L,2L,4L,3L
+
+#define SN_ct_precert_signer            "ct_precert_signer"
+#define LN_ct_precert_signer            "CT Precertificate Signer"
+#define NID_ct_precert_signer           953
+#define OBJ_ct_precert_signer           1L,3L,6L,1L,4L,1L,11129L,2L,4L,4L
+
+#define SN_ct_cert_scts         "ct_cert_scts"
+#define LN_ct_cert_scts         "CT Certificate SCTs"
+#define NID_ct_cert_scts                954
+#define OBJ_ct_cert_scts                1L,3L,6L,1L,4L,1L,11129L,2L,4L,5L
+
+#define SN_jurisdictionLocalityName             "jurisdictionL"
+#define LN_jurisdictionLocalityName             "jurisdictionLocalityName"
+#define NID_jurisdictionLocalityName            955
+#define OBJ_jurisdictionLocalityName            1L,3L,6L,1L,4L,1L,311L,60L,2L,1L,1L
+
+#define SN_jurisdictionStateOrProvinceName              "jurisdictionST"
+#define LN_jurisdictionStateOrProvinceName              "jurisdictionStateOrProvinceName"
+#define NID_jurisdictionStateOrProvinceName             956
+#define OBJ_jurisdictionStateOrProvinceName             1L,3L,6L,1L,4L,1L,311L,60L,2L,1L,2L
+
+#define SN_jurisdictionCountryName              "jurisdictionC"
+#define LN_jurisdictionCountryName              "jurisdictionCountryName"
+#define NID_jurisdictionCountryName             957
+#define OBJ_jurisdictionCountryName             1L,3L,6L,1L,4L,1L,311L,60L,2L,1L,3L
index 1d0a7c8..8e5ea83 100644 (file)
@@ -917,3 +917,41 @@ aes_128_cbc_hmac_sha1              916
 aes_192_cbc_hmac_sha1          917
 aes_256_cbc_hmac_sha1          918
 rsaesOaep              919
+dhpublicnumber         920
+brainpoolP160r1                921
+brainpoolP160t1                922
+brainpoolP192r1                923
+brainpoolP192t1                924
+brainpoolP224r1                925
+brainpoolP224t1                926
+brainpoolP256r1                927
+brainpoolP256t1                928
+brainpoolP320r1                929
+brainpoolP320t1                930
+brainpoolP384r1                931
+brainpoolP384t1                932
+brainpoolP512r1                933
+brainpoolP512t1                934
+pSpecified             935
+dhSinglePass_stdDH_sha1kdf_scheme              936
+dhSinglePass_stdDH_sha224kdf_scheme            937
+dhSinglePass_stdDH_sha256kdf_scheme            938
+dhSinglePass_stdDH_sha384kdf_scheme            939
+dhSinglePass_stdDH_sha512kdf_scheme            940
+dhSinglePass_cofactorDH_sha1kdf_scheme         941
+dhSinglePass_cofactorDH_sha224kdf_scheme               942
+dhSinglePass_cofactorDH_sha256kdf_scheme               943
+dhSinglePass_cofactorDH_sha384kdf_scheme               944
+dhSinglePass_cofactorDH_sha512kdf_scheme               945
+dh_std_kdf             946
+dh_cofactor_kdf                947
+aes_128_cbc_hmac_sha256                948
+aes_192_cbc_hmac_sha256                949
+aes_256_cbc_hmac_sha256                950
+ct_precert_scts                951
+ct_precert_poison              952
+ct_precert_signer              953
+ct_cert_scts           954
+jurisdictionLocalityName               955
+jurisdictionStateOrProvinceName                956
+jurisdictionCountryName                957
index b8f7d34..e453e99 100644 (file)
@@ -41,6 +41,21 @@ static const nid_triple sigoid_srt[] = {
     {NID_id_GostR3411_94_with_GostR3410_2001_cc, NID_id_GostR3411_94,
      NID_id_GostR3410_2001_cc},
     {NID_rsassaPss, NID_undef, NID_rsaEncryption},
+    {NID_dhSinglePass_stdDH_sha1kdf_scheme, NID_sha1, NID_dh_std_kdf},
+    {NID_dhSinglePass_stdDH_sha224kdf_scheme, NID_sha224, NID_dh_std_kdf},
+    {NID_dhSinglePass_stdDH_sha256kdf_scheme, NID_sha256, NID_dh_std_kdf},
+    {NID_dhSinglePass_stdDH_sha384kdf_scheme, NID_sha384, NID_dh_std_kdf},
+    {NID_dhSinglePass_stdDH_sha512kdf_scheme, NID_sha512, NID_dh_std_kdf},
+    {NID_dhSinglePass_cofactorDH_sha1kdf_scheme, NID_sha1,
+     NID_dh_cofactor_kdf},
+    {NID_dhSinglePass_cofactorDH_sha224kdf_scheme, NID_sha224,
+     NID_dh_cofactor_kdf},
+    {NID_dhSinglePass_cofactorDH_sha256kdf_scheme, NID_sha256,
+     NID_dh_cofactor_kdf},
+    {NID_dhSinglePass_cofactorDH_sha384kdf_scheme, NID_sha384,
+     NID_dh_cofactor_kdf},
+    {NID_dhSinglePass_cofactorDH_sha512kdf_scheme, NID_sha512,
+     NID_dh_cofactor_kdf},
 };
 
 static const nid_triple *const sigoid_srt_xref[] = {
@@ -54,19 +69,29 @@ static const nid_triple *const sigoid_srt_xref[] = {
     &sigoid_srt[5],
     &sigoid_srt[8],
     &sigoid_srt[12],
+    &sigoid_srt[30],
+    &sigoid_srt[35],
     &sigoid_srt[6],
     &sigoid_srt[10],
     &sigoid_srt[11],
     &sigoid_srt[13],
     &sigoid_srt[24],
     &sigoid_srt[20],
+    &sigoid_srt[32],
+    &sigoid_srt[37],
     &sigoid_srt[14],
     &sigoid_srt[21],
+    &sigoid_srt[33],
+    &sigoid_srt[38],
     &sigoid_srt[15],
     &sigoid_srt[22],
+    &sigoid_srt[34],
+    &sigoid_srt[39],
     &sigoid_srt[16],
     &sigoid_srt[23],
     &sigoid_srt[19],
+    &sigoid_srt[31],
+    &sigoid_srt[36],
     &sigoid_srt[25],
     &sigoid_srt[26],
     &sigoid_srt[27],
index cb91718..19c9422 100644 (file)
@@ -44,3 +44,15 @@ id_GostR3411_94_with_GostR3410_2001  id_GostR3411_94 id_GostR3410_2001
 id_GostR3411_94_with_GostR3410_94      id_GostR3411_94 id_GostR3410_94
 id_GostR3411_94_with_GostR3410_94_cc   id_GostR3411_94 id_GostR3410_94_cc
 id_GostR3411_94_with_GostR3410_2001_cc id_GostR3411_94 id_GostR3410_2001_cc
+# ECDH KDFs and their corresponding message digests and schemes
+dhSinglePass_stdDH_sha1kdf_scheme              sha1    dh_std_kdf
+dhSinglePass_stdDH_sha224kdf_scheme            sha224  dh_std_kdf
+dhSinglePass_stdDH_sha256kdf_scheme            sha256  dh_std_kdf
+dhSinglePass_stdDH_sha384kdf_scheme            sha384  dh_std_kdf
+dhSinglePass_stdDH_sha512kdf_scheme            sha512  dh_std_kdf
+
+dhSinglePass_cofactorDH_sha1kdf_scheme         sha1    dh_cofactor_kdf
+dhSinglePass_cofactorDH_sha224kdf_scheme       sha224  dh_cofactor_kdf
+dhSinglePass_cofactorDH_sha256kdf_scheme       sha256  dh_cofactor_kdf
+dhSinglePass_cofactorDH_sha384kdf_scheme       sha384  dh_cofactor_kdf
+dhSinglePass_cofactorDH_sha512kdf_scheme       sha512  dh_cofactor_kdf
index d3bfad7..b57aabb 100644 (file)
@@ -168,6 +168,7 @@ pkcs1 5                     : RSA-SHA1              : sha1WithRSAEncryption
 # According to PKCS #1 version 2.1
 pkcs1 7                        : RSAES-OAEP            : rsaesOaep
 pkcs1 8                        : MGF1                  : mgf1
+pkcs1 9                        : PSPECIFIED            : pSpecified
 pkcs1 10               : RSASSA-PSS            : rsassaPss
 
 pkcs1 11               : RSA-SHA256            : sha256WithRSAEncryption
@@ -1290,3 +1291,60 @@ kisa 1 6                : SEED-OFB      : seed-ofb
                        : AES-128-CBC-HMAC-SHA1         : aes-128-cbc-hmac-sha1
                        : AES-192-CBC-HMAC-SHA1         : aes-192-cbc-hmac-sha1
                        : AES-256-CBC-HMAC-SHA1         : aes-256-cbc-hmac-sha1
+                       : AES-128-CBC-HMAC-SHA256       : aes-128-cbc-hmac-sha256
+                       : AES-192-CBC-HMAC-SHA256       : aes-192-cbc-hmac-sha256
+                       : AES-256-CBC-HMAC-SHA256       : aes-256-cbc-hmac-sha256
+
+ISO-US 10046 2 1       : dhpublicnumber                : X9.42 DH
+
+# RFC 5639 curve OIDs (see http://www.ietf.org/rfc/rfc5639.txt)
+# versionOne OBJECT IDENTIFIER ::= {
+# iso(1) identifified-organization(3) teletrust(36) algorithm(3)
+# signature-algorithm(3) ecSign(2) ecStdCurvesAndGeneration(8)
+# ellipticCurve(1) 1 }
+1 3 36 3 3 2 8 1 1 1 : brainpoolP160r1
+1 3 36 3 3 2 8 1 1 2 : brainpoolP160t1
+1 3 36 3 3 2 8 1 1 3 : brainpoolP192r1
+1 3 36 3 3 2 8 1 1 4 : brainpoolP192t1
+1 3 36 3 3 2 8 1 1 5 : brainpoolP224r1
+1 3 36 3 3 2 8 1 1 6 : brainpoolP224t1
+1 3 36 3 3 2 8 1 1 7 : brainpoolP256r1
+1 3 36 3 3 2 8 1 1 8 : brainpoolP256t1
+1 3 36 3 3 2 8 1 1 9 : brainpoolP320r1
+1 3 36 3 3 2 8 1 1 10 : brainpoolP320t1
+1 3 36 3 3 2 8 1 1 11 : brainpoolP384r1
+1 3 36 3 3 2 8 1 1 12 : brainpoolP384t1
+1 3 36 3 3 2 8 1 1 13 : brainpoolP512r1
+1 3 36 3 3 2 8 1 1 14 : brainpoolP512t1            
+
+# ECDH schemes from RFC5753
+!Alias x9-63-scheme 1 3 133 16 840 63 0
+!Alias secg-scheme certicom-arc 1
+
+x9-63-scheme 2   : dhSinglePass-stdDH-sha1kdf-scheme
+secg-scheme 11 0 : dhSinglePass-stdDH-sha224kdf-scheme
+secg-scheme 11 1 : dhSinglePass-stdDH-sha256kdf-scheme
+secg-scheme 11 2 : dhSinglePass-stdDH-sha384kdf-scheme
+secg-scheme 11 3 : dhSinglePass-stdDH-sha512kdf-scheme
+
+x9-63-scheme 3   : dhSinglePass-cofactorDH-sha1kdf-scheme
+secg-scheme 14 0 : dhSinglePass-cofactorDH-sha224kdf-scheme
+secg-scheme 14 1 : dhSinglePass-cofactorDH-sha256kdf-scheme
+secg-scheme 14 2 : dhSinglePass-cofactorDH-sha384kdf-scheme
+secg-scheme 14 3 : dhSinglePass-cofactorDH-sha512kdf-scheme
+# NIDs for use with lookup tables.
+                 : dh-std-kdf
+                 : dh-cofactor-kdf
+
+# RFC 6962 Extension OIDs (see http://www.ietf.org/rfc/rfc6962.txt)
+1 3 6 1 4 1 11129 2 4 2        : ct_precert_scts               : CT Precertificate SCTs
+1 3 6 1 4 1 11129 2 4 3        : ct_precert_poison             : CT Precertificate Poison
+1 3 6 1 4 1 11129 2 4 4        : ct_precert_signer             : CT Precertificate Signer
+1 3 6 1 4 1 11129 2 4 5        : ct_cert_scts                  : CT Certificate SCTs
+
+# CABForum EV SSL Certificate Guidelines
+# (see https://cabforum.org/extended-validation/)
+# OIDs for Subject Jurisdiction of Incorporation or Registration
+1 3 6 1 4 1 311 60 2 1 1       : jurisdictionL         : jurisdictionLocalityName
+1 3 6 1 4 1 311 60 2 1 2       : jurisdictionST        : jurisdictionStateOrProvinceName
+1 3 6 1 4 1 311 60 2 1 3       : jurisdictionC         : jurisdictionCountryName
index 35c0651..1913b9d 100644 (file)
@@ -39,7 +39,8 @@ my @xrkeys = keys %xref_tbl;
 
 my @srt1 = sort { $oid_tbl{$a} <=> $oid_tbl{$b}} @xrkeys;
 
-for(my $i = 0; $i <= $#srt1; $i++)
+my $i;
+for($i = 0; $i <= $#srt1; $i++)
        {
        $xref_tbl{$srt1[$i]}[2] = $i;
        }
index 25ef019..ca2ee76 100644 (file)
@@ -394,11 +394,22 @@ typedef struct ocsp_service_locator_st {
 
 OCSP_CERTID *OCSP_CERTID_dup(OCSP_CERTID *id);
 
-OCSP_RESPONSE *OCSP_sendreq_bio(BIO *b, char *path, OCSP_REQUEST *req);
-OCSP_REQ_CTX *OCSP_sendreq_new(BIO *io, char *path, OCSP_REQUEST *req,
+OCSP_RESPONSE *OCSP_sendreq_bio(BIO *b, const char *path, OCSP_REQUEST *req);
+OCSP_REQ_CTX *OCSP_sendreq_new(BIO *io, const char *path, OCSP_REQUEST *req,
                                int maxline);
+int OCSP_REQ_CTX_nbio(OCSP_REQ_CTX *rctx);
 int OCSP_sendreq_nbio(OCSP_RESPONSE **presp, OCSP_REQ_CTX *rctx);
+OCSP_REQ_CTX *OCSP_REQ_CTX_new(BIO *io, int maxline);
 void OCSP_REQ_CTX_free(OCSP_REQ_CTX *rctx);
+void OCSP_set_max_response_length(OCSP_REQ_CTX *rctx, unsigned long len);
+int OCSP_REQ_CTX_i2d(OCSP_REQ_CTX *rctx, const ASN1_ITEM *it,
+                     ASN1_VALUE *val);
+int OCSP_REQ_CTX_nbio_d2i(OCSP_REQ_CTX *rctx, ASN1_VALUE **pval,
+                          const ASN1_ITEM *it);
+BIO *OCSP_REQ_CTX_get0_mem_bio(OCSP_REQ_CTX *rctx);
+int OCSP_REQ_CTX_i2d(OCSP_REQ_CTX *rctx, const ASN1_ITEM *it,
+                     ASN1_VALUE *val);
+int OCSP_REQ_CTX_http(OCSP_REQ_CTX *rctx, const char *op, const char *path);
 int OCSP_REQ_CTX_set1_req(OCSP_REQ_CTX *rctx, OCSP_REQUEST *req);
 int OCSP_REQ_CTX_add1_header(OCSP_REQ_CTX *rctx,
                              const char *name, const char *value);
@@ -447,7 +458,7 @@ int OCSP_check_validity(ASN1_GENERALIZEDTIME *thisupd,
 int OCSP_request_verify(OCSP_REQUEST *req, STACK_OF(X509) *certs,
                         X509_STORE *store, unsigned long flags);
 
-int OCSP_parse_url(char *url, char **phost, char **pport, char **ppath,
+int OCSP_parse_url(const char *url, char **phost, char **pport, char **ppath,
                    int *pssl);
 
 int OCSP_id_issuer_cmp(OCSP_CERTID *a, OCSP_CERTID *b);
index 970fea4..88b26b3 100644 (file)
@@ -81,9 +81,10 @@ struct ocsp_req_ctx_st {
     BIO *io;                    /* BIO to perform I/O with */
     BIO *mem;                   /* Memory BIO response is built into */
     unsigned long asn1_len;     /* ASN1 length of response */
+    unsigned long max_resp_len; /* Maximum length of response */
 };
 
-#define OCSP_MAX_REQUEST_LENGTH (100 * 1024)
+#define OCSP_MAX_RESP_LENGTH    (100 * 1024)
 #define OCSP_MAX_LINE_LEN       4096;
 
 /* OCSP states */
@@ -100,15 +101,42 @@ struct ocsp_req_ctx_st {
 #define OHS_ASN1_HEADER         3
 /* OCSP content octets being read */
 #define OHS_ASN1_CONTENT        4
+/* First call: ready to start I/O */
+#define OHS_ASN1_WRITE_INIT     (5 | OHS_NOREAD)
 /* Request being sent */
 #define OHS_ASN1_WRITE          (6 | OHS_NOREAD)
 /* Request being flushed */
 #define OHS_ASN1_FLUSH          (7 | OHS_NOREAD)
 /* Completed */
 #define OHS_DONE                (8 | OHS_NOREAD)
+/* Headers set, no final \r\n included */
+#define OHS_HTTP_HEADER         (9 | OHS_NOREAD)
 
 static int parse_http_line1(char *line);
 
+OCSP_REQ_CTX *OCSP_REQ_CTX_new(BIO *io, int maxline)
+{
+    OCSP_REQ_CTX *rctx;
+    rctx = OPENSSL_malloc(sizeof(OCSP_REQ_CTX));
+    if (!rctx)
+        return NULL;
+    rctx->state = OHS_ERROR;
+    rctx->max_resp_len = OCSP_MAX_RESP_LENGTH;
+    rctx->mem = BIO_new(BIO_s_mem());
+    rctx->io = io;
+    rctx->asn1_len = 0;
+    if (maxline > 0)
+        rctx->iobuflen = maxline;
+    else
+        rctx->iobuflen = OCSP_MAX_LINE_LEN;
+    rctx->iobuf = OPENSSL_malloc(rctx->iobuflen);
+    if (!rctx->iobuf || !rctx->mem) {
+        OCSP_REQ_CTX_free(rctx);
+        return NULL;
+    }
+    return rctx;
+}
+
 void OCSP_REQ_CTX_free(OCSP_REQ_CTX *rctx)
 {
     if (rctx->mem)
@@ -118,20 +146,71 @@ void OCSP_REQ_CTX_free(OCSP_REQ_CTX *rctx)
     OPENSSL_free(rctx);
 }
 
-int OCSP_REQ_CTX_set1_req(OCSP_REQ_CTX *rctx, OCSP_REQUEST *req)
+BIO *OCSP_REQ_CTX_get0_mem_bio(OCSP_REQ_CTX *rctx)
+{
+    return rctx->mem;
+}
+
+void OCSP_set_max_response_length(OCSP_REQ_CTX *rctx, unsigned long len)
+{
+    if (len == 0)
+        rctx->max_resp_len = OCSP_MAX_RESP_LENGTH;
+    else
+        rctx->max_resp_len = len;
+}
+
+int OCSP_REQ_CTX_i2d(OCSP_REQ_CTX *rctx, const ASN1_ITEM *it, ASN1_VALUE *val)
 {
     static const char req_hdr[] =
         "Content-Type: application/ocsp-request\r\n"
         "Content-Length: %d\r\n\r\n";
-    if (BIO_printf(rctx->mem, req_hdr, i2d_OCSP_REQUEST(req, NULL)) <= 0)
+    int reqlen = ASN1_item_i2d(val, NULL, it);
+    if (BIO_printf(rctx->mem, req_hdr, reqlen) <= 0)
+        return 0;
+    if (ASN1_item_i2d_bio(it, rctx->mem, val) <= 0)
+        return 0;
+    rctx->state = OHS_ASN1_WRITE_INIT;
+    return 1;
+}
+
+int OCSP_REQ_CTX_nbio_d2i(OCSP_REQ_CTX *rctx,
+                          ASN1_VALUE **pval, const ASN1_ITEM *it)
+{
+    int rv, len;
+    const unsigned char *p;
+
+    rv = OCSP_REQ_CTX_nbio(rctx);
+    if (rv != 1)
+        return rv;
+
+    len = BIO_get_mem_data(rctx->mem, &p);
+    *pval = ASN1_item_d2i(NULL, &p, len, it);
+    if (*pval == NULL) {
+        rctx->state = OHS_ERROR;
         return 0;
-    if (i2d_OCSP_REQUEST_bio(rctx->mem, req) <= 0)
+    }
+    return 1;
+}
+
+int OCSP_REQ_CTX_http(OCSP_REQ_CTX *rctx, const char *op, const char *path)
+{
+    static const char http_hdr[] = "%s %s HTTP/1.0\r\n";
+
+    if (!path)
+        path = "/";
+
+    if (BIO_printf(rctx->mem, http_hdr, op, path) <= 0)
         return 0;
-    rctx->state = OHS_ASN1_WRITE;
-    rctx->asn1_len = BIO_get_mem_data(rctx->mem, NULL);
+    rctx->state = OHS_HTTP_HEADER;
     return 1;
 }
 
+int OCSP_REQ_CTX_set1_req(OCSP_REQ_CTX *rctx, OCSP_REQUEST *req)
+{
+    return OCSP_REQ_CTX_i2d(rctx, ASN1_ITEM_rptr(OCSP_REQUEST),
+                            (ASN1_VALUE *)req);
+}
+
 int OCSP_REQ_CTX_add1_header(OCSP_REQ_CTX *rctx,
                              const char *name, const char *value)
 {
@@ -147,39 +226,27 @@ int OCSP_REQ_CTX_add1_header(OCSP_REQ_CTX *rctx,
     }
     if (BIO_write(rctx->mem, "\r\n", 2) != 2)
         return 0;
+    rctx->state = OHS_HTTP_HEADER;
     return 1;
 }
 
-OCSP_REQ_CTX *OCSP_sendreq_new(BIO *io, char *path, OCSP_REQUEST *req,
+OCSP_REQ_CTX *OCSP_sendreq_new(BIO *io, const char *path, OCSP_REQUEST *req,
                                int maxline)
 {
-    static const char post_hdr[] = "POST %s HTTP/1.0\r\n";
 
-    OCSP_REQ_CTX *rctx;
-    rctx = OPENSSL_malloc(sizeof(OCSP_REQ_CTX));
+    OCSP_REQ_CTX *rctx = NULL;
+    rctx = OCSP_REQ_CTX_new(io, maxline);
     if (!rctx)
         return NULL;
-    rctx->state = OHS_ERROR;
-    rctx->mem = BIO_new(BIO_s_mem());
-    rctx->io = io;
-    rctx->asn1_len = 0;
-    if (maxline > 0)
-        rctx->iobuflen = maxline;
-    else
-        rctx->iobuflen = OCSP_MAX_LINE_LEN;
-    rctx->iobuf = OPENSSL_malloc(rctx->iobuflen);
-    if (!rctx->mem || !rctx->iobuf)
-        goto err;
-    if (!path)
-        path = "/";
 
-    if (BIO_printf(rctx->mem, post_hdr, path) <= 0)
+    if (!OCSP_REQ_CTX_http(rctx, "POST", path))
         goto err;
 
     if (req && !OCSP_REQ_CTX_set1_req(rctx, req))
         goto err;
 
     return rctx;
+
  err:
     OCSP_REQ_CTX_free(rctx);
     return NULL;
@@ -256,7 +323,7 @@ static int parse_http_line1(char *line)
 
 }
 
-int OCSP_sendreq_nbio(OCSP_RESPONSE **presp, OCSP_REQ_CTX *rctx)
+int OCSP_REQ_CTX_nbio(OCSP_REQ_CTX *rctx)
 {
     int i, n;
     const unsigned char *p;
@@ -277,6 +344,17 @@ int OCSP_sendreq_nbio(OCSP_RESPONSE **presp, OCSP_REQ_CTX *rctx)
     }
 
     switch (rctx->state) {
+    case OHS_HTTP_HEADER:
+        /* Last operation was adding headers: need a final \r\n */
+        if (BIO_write(rctx->mem, "\r\n", 2) != 2) {
+            rctx->state = OHS_ERROR;
+            return 0;
+        }
+        rctx->state = OHS_ASN1_WRITE_INIT;
+
+    case OHS_ASN1_WRITE_INIT:
+        rctx->asn1_len = BIO_get_mem_data(rctx->mem, NULL);
+        rctx->state = OHS_ASN1_WRITE;
 
     case OHS_ASN1_WRITE:
         n = BIO_get_mem_data(rctx->mem, &p);
@@ -412,7 +490,7 @@ int OCSP_sendreq_nbio(OCSP_RESPONSE **presp, OCSP_REQ_CTX *rctx)
                 rctx->asn1_len |= *p++;
             }
 
-            if (rctx->asn1_len > OCSP_MAX_REQUEST_LENGTH) {
+            if (rctx->asn1_len > rctx->max_resp_len) {
                 rctx->state = OHS_ERROR;
                 return 0;
             }
@@ -426,18 +504,12 @@ int OCSP_sendreq_nbio(OCSP_RESPONSE **presp, OCSP_REQ_CTX *rctx)
         /* Fall thru */
 
     case OHS_ASN1_CONTENT:
-        n = BIO_get_mem_data(rctx->mem, &p);
+        n = BIO_get_mem_data(rctx->mem, NULL);
         if (n < (int)rctx->asn1_len)
             goto next_io;
 
-        *presp = d2i_OCSP_RESPONSE(NULL, &p, rctx->asn1_len);
-        if (*presp) {
-            rctx->state = OHS_DONE;
-            return 1;
-        }
-
-        rctx->state = OHS_ERROR;
-        return 0;
+        rctx->state = OHS_DONE;
+        return 1;
 
         break;
 
@@ -450,9 +522,16 @@ int OCSP_sendreq_nbio(OCSP_RESPONSE **presp, OCSP_REQ_CTX *rctx)
 
 }
 
+int OCSP_sendreq_nbio(OCSP_RESPONSE **presp, OCSP_REQ_CTX *rctx)
+{
+    return OCSP_REQ_CTX_nbio_d2i(rctx,
+                                 (ASN1_VALUE **)presp,
+                                 ASN1_ITEM_rptr(OCSP_RESPONSE));
+}
+
 /* Blocking OCSP request handler: now a special case of non-blocking I/O */
 
-OCSP_RESPONSE *OCSP_sendreq_bio(BIO *b, char *path, OCSP_REQUEST *req)
+OCSP_RESPONSE *OCSP_sendreq_bio(BIO *b, const char *path, OCSP_REQUEST *req)
 {
     OCSP_RESPONSE *resp = NULL;
     OCSP_REQ_CTX *ctx;
index d28d6b5..442a5b6 100644 (file)
@@ -175,7 +175,7 @@ int OCSP_id_cmp(OCSP_CERTID *a, OCSP_CERTID *b)
  * whether it is SSL.
  */
 
-int OCSP_parse_url(char *url, char **phost, char **pport, char **ppath,
+int OCSP_parse_url(const char *url, char **phost, char **pport, char **ppath,
                    int *pssl)
 {
     char *p, *buf;
index 4a6e276..15487c9 100644 (file)
@@ -20,6 +20,9 @@ extern "C" {
 #ifndef OPENSSL_NO_KRB5
 # define OPENSSL_NO_KRB5
 #endif
+#ifndef OPENSSL_NO_LIBUNBOUND
+# define OPENSSL_NO_LIBUNBOUND
+#endif
 #ifndef OPENSSL_NO_MD2
 # define OPENSSL_NO_MD2
 #endif
@@ -32,6 +35,9 @@ extern "C" {
 #ifndef OPENSSL_NO_SCTP
 # define OPENSSL_NO_SCTP
 #endif
+#ifndef OPENSSL_NO_SSL_TRACE
+# define OPENSSL_NO_SSL_TRACE
+#endif
 #ifndef OPENSSL_NO_STORE
 # define OPENSSL_NO_STORE
 #endif
@@ -62,6 +68,9 @@ extern "C" {
 # if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5)
 #  define NO_KRB5
 # endif
+# if defined(OPENSSL_NO_LIBUNBOUND) && !defined(NO_LIBUNBOUND)
+#  define NO_LIBUNBOUND
+# endif
 # if defined(OPENSSL_NO_MD2) && !defined(NO_MD2)
 #  define NO_MD2
 # endif
@@ -74,6 +83,9 @@ extern "C" {
 # if defined(OPENSSL_NO_SCTP) && !defined(NO_SCTP)
 #  define NO_SCTP
 # endif
+# if defined(OPENSSL_NO_SSL_TRACE) && !defined(NO_SSL_TRACE)
+#  define NO_SSL_TRACE
+# endif
 # if defined(OPENSSL_NO_STORE) && !defined(NO_STORE)
 #  define NO_STORE
 # endif
index bd66999..c06b13a 100644 (file)
@@ -30,11 +30,11 @@ extern "C" {
  * (Prior to 0.9.5a beta1, a different scheme was used: MMNNFFRBB for
  *  major minor fix final patch/beta)
  */
-# define OPENSSL_VERSION_NUMBER  0x1000110fL
+# define OPENSSL_VERSION_NUMBER  0x1000204fL
 # ifdef OPENSSL_FIPS
-#  define OPENSSL_VERSION_TEXT    "OpenSSL 1.0.1p-fips 9 Jul 2015"
+#  define OPENSSL_VERSION_TEXT    "OpenSSL 1.0.2d-fips 9 Jul 2015"
 # else
-#  define OPENSSL_VERSION_TEXT    "OpenSSL 1.0.1p 9 Jul 2015"
+#  define OPENSSL_VERSION_TEXT    "OpenSSL 1.0.2d 9 Jul 2015"
 # endif
 # define OPENSSL_VERSION_PTEXT   " part of " OPENSSL_VERSION_TEXT
 
index 0fcb0ce..9144ea2 100644 (file)
@@ -100,6 +100,8 @@ typedef int ASN1_BOOLEAN;
 typedef int ASN1_NULL;
 # endif
 
+typedef struct asn1_object_st ASN1_OBJECT;
+
 typedef struct ASN1_ITEM_st ASN1_ITEM;
 typedef struct asn1_pctx_st ASN1_PCTX;
 
index 7a930ee..65de60e 100644 (file)
@@ -171,12 +171,13 @@ pem_pk8.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h
 pem_pk8.o: ../cryptlib.h pem_pk8.c
 pem_pkey.o: ../../e_os.h ../../include/openssl/asn1.h
 pem_pkey.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h
-pem_pkey.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
-pem_pkey.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h
-pem_pkey.o: ../../include/openssl/ecdsa.h ../../include/openssl/engine.h
-pem_pkey.o: ../../include/openssl/err.h ../../include/openssl/evp.h
-pem_pkey.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
-pem_pkey.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
+pem_pkey.o: ../../include/openssl/crypto.h ../../include/openssl/dh.h
+pem_pkey.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
+pem_pkey.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h
+pem_pkey.o: ../../include/openssl/engine.h ../../include/openssl/err.h
+pem_pkey.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h
+pem_pkey.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
+pem_pkey.o: ../../include/openssl/opensslconf.h
 pem_pkey.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
 pem_pkey.o: ../../include/openssl/pem.h ../../include/openssl/pem2.h
 pem_pkey.o: ../../include/openssl/pkcs12.h ../../include/openssl/pkcs7.h
index 2cdad8a..d3b23fc 100644 (file)
@@ -129,6 +129,7 @@ extern "C" {
 # define PEM_STRING_PKCS8        "ENCRYPTED PRIVATE KEY"
 # define PEM_STRING_PKCS8INF     "PRIVATE KEY"
 # define PEM_STRING_DHPARAMS     "DH PARAMETERS"
+# define PEM_STRING_DHXPARAMS    "X9.42 DH PARAMETERS"
 # define PEM_STRING_SSL_SESSION  "SSL SESSION PARAMETERS"
 # define PEM_STRING_DSAPARAMS    "DSA PARAMETERS"
 # define PEM_STRING_ECDSA_PUBLIC "ECDSA PUBLIC KEY"
@@ -181,7 +182,6 @@ typedef struct pem_ctx_st {
 
     int num_recipient;
     PEM_USER **recipient;
-
 /*-
     XXX(ben): don#t think this is used!
         STACK *x509_chain;      / * certificate chain */
@@ -399,8 +399,8 @@ int PEM_do_header(EVP_CIPHER_INFO *cipher, unsigned char *data, long *len,
 # ifndef OPENSSL_NO_BIO
 int PEM_read_bio(BIO *bp, char **name, char **header,
                  unsigned char **data, long *len);
-int PEM_write_bio(BIO *bp, const char *name, char *hdr, unsigned char *data,
-                  long len);
+int PEM_write_bio(BIO *bp, const char *name, const char *hdr,
+                  const unsigned char *data, long len);
 int PEM_bytes_read_bio(unsigned char **pdata, long *plen, char **pnm,
                        const char *name, BIO *bp, pem_password_cb *cb,
                        void *u);
@@ -419,7 +419,8 @@ int PEM_X509_INFO_write_bio(BIO *bp, X509_INFO *xi, EVP_CIPHER *enc,
 
 int PEM_read(FILE *fp, char **name, char **header,
              unsigned char **data, long *len);
-int PEM_write(FILE *fp, char *name, char *hdr, unsigned char *data, long len);
+int PEM_write(FILE *fp, const char *name, const char *hdr,
+              const unsigned char *data, long len);
 void *PEM_ASN1_read(d2i_of_void *d2i, const char *name, FILE *fp, void **x,
                     pem_password_cb *cb, void *u);
 int PEM_ASN1_write(i2d_of_void *i2d, const char *name, FILE *fp,
@@ -474,6 +475,7 @@ DECLARE_PEM_rw(EC_PUBKEY, EC_KEY)
 # endif
 # ifndef OPENSSL_NO_DH
 DECLARE_PEM_rw_const(DHparams, DH)
+DECLARE_PEM_write_const(DHxparams, DH)
 # endif
 DECLARE_PEM_rw_cb(PrivateKey, EVP_PKEY)
 DECLARE_PEM_rw(PUBKEY, EVP_PKEY)
@@ -562,8 +564,10 @@ void ERR_load_PEM_strings(void);
 # define PEM_F_PEM_PK8PKEY                                119
 # define PEM_F_PEM_READ                                   108
 # define PEM_F_PEM_READ_BIO                               109
+# define PEM_F_PEM_READ_BIO_DHPARAMS                      141
 # define PEM_F_PEM_READ_BIO_PARAMETERS                    140
 # define PEM_F_PEM_READ_BIO_PRIVATEKEY                    123
+# define PEM_F_PEM_READ_DHPARAMS                          142
 # define PEM_F_PEM_READ_PRIVATEKEY                        124
 # define PEM_F_PEM_SEALFINAL                              110
 # define PEM_F_PEM_SEALINIT                               111
index 64b8ba7..0e5be63 100644 (file)
@@ -421,6 +421,7 @@ EC_KEY *PEM_read_ECPrivateKey(FILE *fp, EC_KEY **eckey, pem_password_cb *cb,
 
 #ifndef OPENSSL_NO_DH
 
-IMPLEMENT_PEM_rw_const(DHparams, DH, PEM_STRING_DHPARAMS, DHparams)
+IMPLEMENT_PEM_write_const(DHparams, DH, PEM_STRING_DHPARAMS, DHparams)
+    IMPLEMENT_PEM_write_const(DHxparams, DH, PEM_STRING_DHXPARAMS, DHxparams)
 #endif
-    IMPLEMENT_PEM_rw(PUBKEY, EVP_PKEY, PEM_STRING_PUBLIC, PUBKEY)
+IMPLEMENT_PEM_rw(PUBKEY, EVP_PKEY, PEM_STRING_PUBLIC, PUBKEY)
index 702c5ad..e1f4fdb 100644 (file)
@@ -1,6 +1,6 @@
 /* crypto/pem/pem_err.c */
 /* ====================================================================
- * Copyright (c) 1999-2007 The OpenSSL Project.  All rights reserved.
+ * Copyright (c) 1999-2011 The OpenSSL Project.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -99,8 +99,10 @@ static ERR_STRING_DATA PEM_str_functs[] = {
     {ERR_FUNC(PEM_F_PEM_PK8PKEY), "PEM_PK8PKEY"},
     {ERR_FUNC(PEM_F_PEM_READ), "PEM_read"},
     {ERR_FUNC(PEM_F_PEM_READ_BIO), "PEM_read_bio"},
+    {ERR_FUNC(PEM_F_PEM_READ_BIO_DHPARAMS), "PEM_READ_BIO_DHPARAMS"},
     {ERR_FUNC(PEM_F_PEM_READ_BIO_PARAMETERS), "PEM_read_bio_Parameters"},
     {ERR_FUNC(PEM_F_PEM_READ_BIO_PRIVATEKEY), "PEM_READ_BIO_PRIVATEKEY"},
+    {ERR_FUNC(PEM_F_PEM_READ_DHPARAMS), "PEM_READ_DHPARAMS"},
     {ERR_FUNC(PEM_F_PEM_READ_PRIVATEKEY), "PEM_READ_PRIVATEKEY"},
     {ERR_FUNC(PEM_F_PEM_SEALFINAL), "PEM_SealFinal"},
     {ERR_FUNC(PEM_F_PEM_SEALINIT), "PEM_SealInit"},
index 5507161..a29821a 100644 (file)
@@ -229,6 +229,10 @@ static int check_pem(const char *nm, const char *name)
         }
         return 0;
     }
+    /* If reading DH parameters handle X9.42 DH format too */
+    if (!strcmp(nm, PEM_STRING_DHXPARAMS) &&
+        !strcmp(name, PEM_STRING_DHPARAMS))
+        return 1;
 
     /* Permit older strings */
 
@@ -472,8 +476,9 @@ int PEM_do_header(EVP_CIPHER_INFO *cipher, unsigned char *data, long *plen,
     EVP_CIPHER_CTX_cleanup(&ctx);
     OPENSSL_cleanse((char *)buf, sizeof(buf));
     OPENSSL_cleanse((char *)key, sizeof(key));
-    j += i;
-    if (!o) {
+    if (o)
+        j += i;
+    else {
         PEMerr(PEM_F_PEM_DO_HEADER, PEM_R_BAD_DECRYPT);
         return (0);
     }
@@ -574,8 +579,8 @@ static int load_iv(char **fromp, unsigned char *to, int num)
 }
 
 #ifndef OPENSSL_NO_FP_API
-int PEM_write(FILE *fp, char *name, char *header, unsigned char *data,
-              long len)
+int PEM_write(FILE *fp, const char *name, const char *header,
+              const unsigned char *data, long len)
 {
     BIO *b;
     int ret;
@@ -591,8 +596,8 @@ int PEM_write(FILE *fp, char *name, char *header, unsigned char *data,
 }
 #endif
 
-int PEM_write_bio(BIO *bp, const char *name, char *header,
-                  unsigned char *data, long len)
+int PEM_write_bio(BIO *bp, const char *name, const char *header,
+                  const unsigned char *data, long len)
 {
     int nlen, n, i, j, outl;
     unsigned char *buf = NULL;
index 0b05e63..04d6319 100644 (file)
@@ -68,6 +68,9 @@
 #ifndef OPENSSL_NO_ENGINE
 # include <openssl/engine.h>
 #endif
+#ifndef OPENSSL_NO_DH
+# include <openssl/dh.h>
+#endif
 #include "asn1_locl.h"
 
 int pem_check_suffix(const char *pem_str, const char *suffix);
@@ -241,3 +244,50 @@ int PEM_write_PrivateKey(FILE *fp, EVP_PKEY *x, const EVP_CIPHER *enc,
 }
 
 #endif
+
+#ifndef OPENSSL_NO_DH
+
+/* Transparently read in PKCS#3 or X9.42 DH parameters */
+
+DH *PEM_read_bio_DHparams(BIO *bp, DH **x, pem_password_cb *cb, void *u)
+{
+    char *nm = NULL;
+    const unsigned char *p = NULL;
+    unsigned char *data = NULL;
+    long len;
+    DH *ret = NULL;
+
+    if (!PEM_bytes_read_bio(&data, &len, &nm, PEM_STRING_DHPARAMS, bp, cb, u))
+        return NULL;
+    p = data;
+
+    if (!strcmp(nm, PEM_STRING_DHXPARAMS))
+        ret = d2i_DHxparams(x, &p, len);
+    else
+        ret = d2i_DHparams(x, &p, len);
+
+    if (ret == NULL)
+        PEMerr(PEM_F_PEM_READ_BIO_DHPARAMS, ERR_R_ASN1_LIB);
+    OPENSSL_free(nm);
+    OPENSSL_free(data);
+    return ret;
+}
+
+# ifndef OPENSSL_NO_FP_API
+DH *PEM_read_DHparams(FILE *fp, DH **x, pem_password_cb *cb, void *u)
+{
+    BIO *b;
+    DH *ret;
+
+    if ((b = BIO_new(BIO_s_file())) == NULL) {
+        PEMerr(PEM_F_PEM_READ_DHPARAMS, ERR_R_BUF_LIB);
+        return (0);
+    }
+    BIO_set_fp(b, fp, BIO_NOCLOSE);
+    ret = PEM_read_bio_DHparams(b, x, cb, u);
+    BIO_free(b);
+    return (ret);
+}
+# endif
+
+#endif
index a3edd98..f89e814 100755 (executable)
@@ -27,7 +27,8 @@ my $globl = sub {
        /osx/           && do { $name = "_$name";
                                last;
                              };
-       /linux.*32/     && do { $ret .= ".globl $name\n";
+       /linux.*(32|64le)/
+                       && do { $ret .= ".globl $name\n";
                                $ret .= ".type  $name,\@function";
                                last;
                              };
@@ -37,7 +38,6 @@ my $globl = sub {
                                $ret .= ".align 3\n";
                                $ret .= "$name:\n";
                                $ret .= ".quad  .$name,.TOC.\@tocbase,0\n";
-                               $ret .= ".size  $name,24\n";
                                $ret .= ".previous\n";
 
                                $name = ".$name";
@@ -50,7 +50,9 @@ my $globl = sub {
     $ret;
 };
 my $text = sub {
-    ($flavour =~ /aix/) ? ".csect" : ".text";
+    my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text";
+    $ret = ".abiversion        2\n".$ret       if ($flavour =~ /linux.*64le/);
+    $ret;
 };
 my $machine = sub {
     my $junk = shift;
@@ -62,9 +64,12 @@ my $machine = sub {
     ".machine  $arch";
 };
 my $size = sub {
-    if ($flavour =~ /linux.*32/)
+    if ($flavour =~ /linux/)
     {  shift;
-       ".size  " . join(",",@_);
+       my $name = shift; $name =~ s|^[\.\_]||;
+       my $ret  = ".size       $name,.-".($flavour=~/64$/?".":"").$name;
+       $ret .= "\n.size        .$name,.-.$name" if ($flavour=~/64$/);
+       $ret;
     }
     else
     {  "";     }
@@ -77,6 +82,25 @@ my $asciz = sub {
     else
     {  "";     }
 };
+my $quad = sub {
+    shift;
+    my @ret;
+    my ($hi,$lo);
+    for (@_) {
+       if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io)
+       {  $hi=$1?"0x$1":"0"; $lo="0x$2";  }
+       elsif (/^([0-9]+)$/o)
+       {  $hi=$1>>32; $lo=$1&0xffffffff;  } # error-prone with 32-bit perl
+       else
+       {  $hi=undef; $lo=$_; }
+
+       if (defined($hi))
+       {  push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo");  }
+       else
+       {  push(@ret,".quad     $lo");  }
+    }
+    join("\n",@ret);
+};
 
 ################################################################
 # simplified mnemonics not handled by at least one assembler
@@ -122,6 +146,46 @@ my $extrdi = sub {
     $b = ($b+$n)&63; $n = 64-$n;
     "  rldicl  $ra,$rs,$b,$n";
 };
+my $vmr = sub {
+    my ($f,$vx,$vy) = @_;
+    "  vor     $vx,$vy,$vy";
+};
+
+# PowerISA 2.06 stuff
+sub vsxmem_op {
+    my ($f, $vrt, $ra, $rb, $op) = @_;
+    "  .long   ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1);
+}
+# made-up unaligned memory reference AltiVec/VMX instructions
+my $lvx_u      = sub { vsxmem_op(@_, 844); };  # lxvd2x
+my $stvx_u     = sub { vsxmem_op(@_, 972); };  # stxvd2x
+my $lvdx_u     = sub { vsxmem_op(@_, 588); };  # lxsdx
+my $stvdx_u    = sub { vsxmem_op(@_, 716); };  # stxsdx
+my $lvx_4w     = sub { vsxmem_op(@_, 780); };  # lxvw4x
+my $stvx_4w    = sub { vsxmem_op(@_, 908); };  # stxvw4x
+
+# PowerISA 2.07 stuff
+sub vcrypto_op {
+    my ($f, $vrt, $vra, $vrb, $op) = @_;
+    "  .long   ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op;
+}
+my $vcipher    = sub { vcrypto_op(@_, 1288); };
+my $vcipherlast        = sub { vcrypto_op(@_, 1289); };
+my $vncipher   = sub { vcrypto_op(@_, 1352); };
+my $vncipherlast= sub { vcrypto_op(@_, 1353); };
+my $vsbox      = sub { vcrypto_op(@_, 0, 1480); };
+my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); };
+my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); };
+my $vpmsumb    = sub { vcrypto_op(@_, 1032); };
+my $vpmsumd    = sub { vcrypto_op(@_, 1224); };
+my $vpmsubh    = sub { vcrypto_op(@_, 1096); };
+my $vpmsumw    = sub { vcrypto_op(@_, 1160); };
+my $vaddudm    = sub { vcrypto_op(@_, 192);  };
+
+my $mtsle      = sub {
+    my ($f, $arg) = @_;
+    "  .long   ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2);
+};
 
 while($line=<>) {
 
@@ -138,7 +202,10 @@ while($line=<>) {
     {
        $line =~ s|(^[\.\w]+)\:\s*||;
        my $label = $1;
-       printf "%s:",($GLOBALS{$label} or $label) if ($label);
+       if ($label) {
+           printf "%s:",($GLOBALS{$label} or $label);
+           printf "\n.localentry\t$GLOBALS{$label},0"  if ($GLOBALS{$label} && $flavour =~ /linux.*64le/);
+       }
     }
 
     {
@@ -147,7 +214,7 @@ while($line=<>) {
        my $mnemonic = $2;
        my $f = $3;
        my $opcode = eval("\$$mnemonic");
-       $line =~ s|\bc?[rf]([0-9]+)\b|$1|g if ($c ne "." and $flavour !~ /osx/);
+       $line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/);
        if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); }
        elsif ($mnemonic)           { $line = $c.$mnemonic.$f."\t".$line; }
     }
diff --git a/crypto/perlasm/sparcv9_modes.pl b/crypto/perlasm/sparcv9_modes.pl
new file mode 100644 (file)
index 0000000..eb267a5
--- /dev/null
@@ -0,0 +1,1687 @@
+#!/usr/bin/env perl
+
+# Specific modes implementations for SPARC Architecture 2011. There
+# is T4 dependency though, an ASI value that is not specified in the
+# Architecture Manual. But as SPARC universe is rather monocultural,
+# we imply that processor capable of executing crypto instructions
+# can handle the ASI in question as well. This means that we ought to
+# keep eyes open when new processors emerge...
+#
+# As for above mentioned ASI. It's so called "block initializing
+# store" which cancels "read" in "read-update-write" on cache lines.
+# This is "cooperative" optimization, as it reduces overall pressure
+# on memory interface. Benefits can't be observed/quantified with
+# usual benchmarks, on the contrary you can notice that single-thread
+# performance for parallelizable modes is ~1.5% worse for largest
+# block sizes [though few percent better for not so long ones]. All
+# this based on suggestions from David Miller.
+
+sub asm_init {         # to be called with @ARGV as argument
+    for (@_)           { $::abibits=64 if (/\-m64/ || /\-xarch\=v9/); }
+    if ($::abibits==64)        { $::bias=2047; $::frame=192; $::size_t_cc="%xcc"; }
+    else               { $::bias=0;    $::frame=112; $::size_t_cc="%icc"; }
+}
+
+# unified interface
+my ($inp,$out,$len,$key,$ivec)=map("%i$_",(0..5));
+# local variables
+my ($ileft,$iright,$ooff,$omask,$ivoff,$blk_init)=map("%l$_",(0..7));
+
+sub alg_cbc_encrypt_implement {
+my ($alg,$bits) = @_;
+
+$::code.=<<___;
+.globl ${alg}${bits}_t4_cbc_encrypt
+.align 32
+${alg}${bits}_t4_cbc_encrypt:
+       save            %sp, -$::frame, %sp
+       cmp             $len, 0
+       be,pn           $::size_t_cc, .L${bits}_cbc_enc_abort
+       sub             $inp, $out, $blk_init   ! $inp!=$out
+___
+$::code.=<<___ if (!$::evp);
+       andcc           $ivec, 7, $ivoff
+       alignaddr       $ivec, %g0, $ivec
+
+       ldd             [$ivec + 0], %f0        ! load ivec
+       bz,pt           %icc, 1f
+       ldd             [$ivec + 8], %f2
+       ldd             [$ivec + 16], %f4
+       faligndata      %f0, %f2, %f0
+       faligndata      %f2, %f4, %f2
+1:
+___
+$::code.=<<___ if ($::evp);
+       ld              [$ivec + 0], %f0
+       ld              [$ivec + 4], %f1
+       ld              [$ivec + 8], %f2
+       ld              [$ivec + 12], %f3
+___
+$::code.=<<___;
+       prefetch        [$inp], 20
+       prefetch        [$inp + 63], 20
+       call            _${alg}${bits}_load_enckey
+       and             $inp, 7, $ileft
+       andn            $inp, 7, $inp
+       sll             $ileft, 3, $ileft
+       mov             64, $iright
+       mov             0xff, $omask
+       sub             $iright, $ileft, $iright
+       and             $out, 7, $ooff
+       cmp             $len, 127
+       movrnz          $ooff, 0, $blk_init             ! if (  $out&7 ||
+       movleu          $::size_t_cc, 0, $blk_init      !       $len<128 ||
+       brnz,pn         $blk_init, .L${bits}cbc_enc_blk !       $inp==$out)
+       srl             $omask, $ooff, $omask
+
+       alignaddrl      $out, %g0, $out
+       srlx            $len, 4, $len
+       prefetch        [$out], 22
+
+.L${bits}_cbc_enc_loop:
+       ldx             [$inp + 0], %o0
+       brz,pt          $ileft, 4f
+       ldx             [$inp + 8], %o1
+
+       ldx             [$inp + 16], %o2
+       sllx            %o0, $ileft, %o0
+       srlx            %o1, $iright, %g1
+       sllx            %o1, $ileft, %o1
+       or              %g1, %o0, %o0
+       srlx            %o2, $iright, %o2
+       or              %o2, %o1, %o1
+4:
+       xor             %g4, %o0, %o0           ! ^= rk[0]
+       xor             %g5, %o1, %o1
+       movxtod         %o0, %f12
+       movxtod         %o1, %f14
+
+       fxor            %f12, %f0, %f0          ! ^= ivec
+       fxor            %f14, %f2, %f2
+       prefetch        [$out + 63], 22
+       prefetch        [$inp + 16+63], 20
+       call            _${alg}${bits}_encrypt_1x
+       add             $inp, 16, $inp
+
+       brnz,pn         $ooff, 2f
+       sub             $len, 1, $len
+               
+       std             %f0, [$out + 0]
+       std             %f2, [$out + 8]
+       brnz,pt         $len, .L${bits}_cbc_enc_loop
+       add             $out, 16, $out
+___
+$::code.=<<___ if ($::evp);
+       st              %f0, [$ivec + 0]
+       st              %f1, [$ivec + 4]
+       st              %f2, [$ivec + 8]
+       st              %f3, [$ivec + 12]
+___
+$::code.=<<___ if (!$::evp);
+       brnz,pn         $ivoff, 3f
+       nop
+
+       std             %f0, [$ivec + 0]        ! write out ivec
+       std             %f2, [$ivec + 8]
+___
+$::code.=<<___;
+.L${bits}_cbc_enc_abort:
+       ret
+       restore
+
+.align 16
+2:     ldxa            [$inp]0x82, %o0         ! avoid read-after-write hazard
+                                               ! and ~3x deterioration
+                                               ! in inp==out case
+       faligndata      %f0, %f0, %f4           ! handle unaligned output
+       faligndata      %f0, %f2, %f6
+       faligndata      %f2, %f2, %f8
+
+       stda            %f4, [$out + $omask]0xc0        ! partial store
+       std             %f6, [$out + 8]
+       add             $out, 16, $out
+       orn             %g0, $omask, $omask
+       stda            %f8, [$out + $omask]0xc0        ! partial store
+
+       brnz,pt         $len, .L${bits}_cbc_enc_loop+4
+       orn             %g0, $omask, $omask
+___
+$::code.=<<___ if ($::evp);
+       st              %f0, [$ivec + 0]
+       st              %f1, [$ivec + 4]
+       st              %f2, [$ivec + 8]
+       st              %f3, [$ivec + 12]
+___
+$::code.=<<___ if (!$::evp);
+       brnz,pn         $ivoff, 3f
+       nop
+
+       std             %f0, [$ivec + 0]        ! write out ivec
+       std             %f2, [$ivec + 8]
+       ret
+       restore
+
+.align 16
+3:     alignaddrl      $ivec, $ivoff, %g0      ! handle unaligned ivec
+       mov             0xff, $omask
+       srl             $omask, $ivoff, $omask
+       faligndata      %f0, %f0, %f4
+       faligndata      %f0, %f2, %f6
+       faligndata      %f2, %f2, %f8
+       stda            %f4, [$ivec + $omask]0xc0
+       std             %f6, [$ivec + 8]
+       add             $ivec, 16, $ivec
+       orn             %g0, $omask, $omask
+       stda            %f8, [$ivec + $omask]0xc0
+___
+$::code.=<<___;
+       ret
+       restore
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+.align 32
+.L${bits}cbc_enc_blk:
+       add     $out, $len, $blk_init
+       and     $blk_init, 63, $blk_init        ! tail
+       sub     $len, $blk_init, $len
+       add     $blk_init, 15, $blk_init        ! round up to 16n
+       srlx    $len, 4, $len
+       srl     $blk_init, 4, $blk_init
+
+.L${bits}_cbc_enc_blk_loop:
+       ldx             [$inp + 0], %o0
+       brz,pt          $ileft, 5f
+       ldx             [$inp + 8], %o1
+
+       ldx             [$inp + 16], %o2
+       sllx            %o0, $ileft, %o0
+       srlx            %o1, $iright, %g1
+       sllx            %o1, $ileft, %o1
+       or              %g1, %o0, %o0
+       srlx            %o2, $iright, %o2
+       or              %o2, %o1, %o1
+5:
+       xor             %g4, %o0, %o0           ! ^= rk[0]
+       xor             %g5, %o1, %o1
+       movxtod         %o0, %f12
+       movxtod         %o1, %f14
+
+       fxor            %f12, %f0, %f0          ! ^= ivec
+       fxor            %f14, %f2, %f2
+       prefetch        [$inp + 16+63], 20
+       call            _${alg}${bits}_encrypt_1x
+       add             $inp, 16, $inp
+       sub             $len, 1, $len
+               
+       stda            %f0, [$out]0xe2         ! ASI_BLK_INIT, T4-specific
+       add             $out, 8, $out
+       stda            %f2, [$out]0xe2         ! ASI_BLK_INIT, T4-specific
+       brnz,pt         $len, .L${bits}_cbc_enc_blk_loop
+       add             $out, 8, $out
+
+       membar          #StoreLoad|#StoreStore
+       brnz,pt         $blk_init, .L${bits}_cbc_enc_loop
+       mov             $blk_init, $len
+___
+$::code.=<<___ if ($::evp);
+       st              %f0, [$ivec + 0]
+       st              %f1, [$ivec + 4]
+       st              %f2, [$ivec + 8]
+       st              %f3, [$ivec + 12]
+___
+$::code.=<<___ if (!$::evp);
+       brnz,pn         $ivoff, 3b
+       nop
+
+       std             %f0, [$ivec + 0]        ! write out ivec
+       std             %f2, [$ivec + 8]
+___
+$::code.=<<___;
+       ret
+       restore
+.type  ${alg}${bits}_t4_cbc_encrypt,#function
+.size  ${alg}${bits}_t4_cbc_encrypt,.-${alg}${bits}_t4_cbc_encrypt
+___
+}
+
+sub alg_cbc_decrypt_implement {
+my ($alg,$bits) = @_;
+
+$::code.=<<___;
+.globl ${alg}${bits}_t4_cbc_decrypt
+.align 32
+${alg}${bits}_t4_cbc_decrypt:
+       save            %sp, -$::frame, %sp
+       cmp             $len, 0
+       be,pn           $::size_t_cc, .L${bits}_cbc_dec_abort
+       sub             $inp, $out, $blk_init   ! $inp!=$out
+___
+$::code.=<<___ if (!$::evp);
+       andcc           $ivec, 7, $ivoff
+       alignaddr       $ivec, %g0, $ivec
+
+       ldd             [$ivec + 0], %f12       ! load ivec
+       bz,pt           %icc, 1f
+       ldd             [$ivec + 8], %f14
+       ldd             [$ivec + 16], %f0
+       faligndata      %f12, %f14, %f12
+       faligndata      %f14, %f0, %f14
+1:
+___
+$::code.=<<___ if ($::evp);
+       ld              [$ivec + 0], %f12       ! load ivec
+       ld              [$ivec + 4], %f13
+       ld              [$ivec + 8], %f14
+       ld              [$ivec + 12], %f15
+___
+$::code.=<<___;
+       prefetch        [$inp], 20
+       prefetch        [$inp + 63], 20
+       call            _${alg}${bits}_load_deckey
+       and             $inp, 7, $ileft
+       andn            $inp, 7, $inp
+       sll             $ileft, 3, $ileft
+       mov             64, $iright
+       mov             0xff, $omask
+       sub             $iright, $ileft, $iright
+       and             $out, 7, $ooff
+       cmp             $len, 255
+       movrnz          $ooff, 0, $blk_init             ! if (  $out&7 ||
+       movleu          $::size_t_cc, 0, $blk_init      !       $len<256 ||
+       brnz,pn         $blk_init, .L${bits}cbc_dec_blk !       $inp==$out)
+       srl             $omask, $ooff, $omask
+
+       andcc           $len, 16, %g0           ! is number of blocks even?
+       srlx            $len, 4, $len
+       alignaddrl      $out, %g0, $out
+       bz              %icc, .L${bits}_cbc_dec_loop2x
+       prefetch        [$out], 22
+.L${bits}_cbc_dec_loop:
+       ldx             [$inp + 0], %o0
+       brz,pt          $ileft, 4f
+       ldx             [$inp + 8], %o1
+
+       ldx             [$inp + 16], %o2
+       sllx            %o0, $ileft, %o0
+       srlx            %o1, $iright, %g1
+       sllx            %o1, $ileft, %o1
+       or              %g1, %o0, %o0
+       srlx            %o2, $iright, %o2
+       or              %o2, %o1, %o1
+4:
+       xor             %g4, %o0, %o2           ! ^= rk[0]
+       xor             %g5, %o1, %o3
+       movxtod         %o2, %f0
+       movxtod         %o3, %f2
+
+       prefetch        [$out + 63], 22
+       prefetch        [$inp + 16+63], 20
+       call            _${alg}${bits}_decrypt_1x
+       add             $inp, 16, $inp
+
+       fxor            %f12, %f0, %f0          ! ^= ivec
+       fxor            %f14, %f2, %f2
+       movxtod         %o0, %f12
+       movxtod         %o1, %f14
+
+       brnz,pn         $ooff, 2f
+       sub             $len, 1, $len
+               
+       std             %f0, [$out + 0]
+       std             %f2, [$out + 8]
+       brnz,pt         $len, .L${bits}_cbc_dec_loop2x
+       add             $out, 16, $out
+___
+$::code.=<<___ if ($::evp);
+       st              %f12, [$ivec + 0]
+       st              %f13, [$ivec + 4]
+       st              %f14, [$ivec + 8]
+       st              %f15, [$ivec + 12]
+___
+$::code.=<<___ if (!$::evp);
+       brnz,pn         $ivoff, .L${bits}_cbc_dec_unaligned_ivec
+       nop
+
+       std             %f12, [$ivec + 0]       ! write out ivec
+       std             %f14, [$ivec + 8]
+___
+$::code.=<<___;
+.L${bits}_cbc_dec_abort:
+       ret
+       restore
+
+.align 16
+2:     ldxa            [$inp]0x82, %o0         ! avoid read-after-write hazard
+                                               ! and ~3x deterioration
+                                               ! in inp==out case
+       faligndata      %f0, %f0, %f4           ! handle unaligned output
+       faligndata      %f0, %f2, %f6
+       faligndata      %f2, %f2, %f8
+
+       stda            %f4, [$out + $omask]0xc0        ! partial store
+       std             %f6, [$out + 8]
+       add             $out, 16, $out
+       orn             %g0, $omask, $omask
+       stda            %f8, [$out + $omask]0xc0        ! partial store
+
+       brnz,pt         $len, .L${bits}_cbc_dec_loop2x+4
+       orn             %g0, $omask, $omask
+___
+$::code.=<<___ if ($::evp);
+       st              %f12, [$ivec + 0]
+       st              %f13, [$ivec + 4]
+       st              %f14, [$ivec + 8]
+       st              %f15, [$ivec + 12]
+___
+$::code.=<<___ if (!$::evp);
+       brnz,pn         $ivoff, .L${bits}_cbc_dec_unaligned_ivec
+       nop
+
+       std             %f12, [$ivec + 0]       ! write out ivec
+       std             %f14, [$ivec + 8]
+___
+$::code.=<<___;
+       ret
+       restore
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+.align 32
+.L${bits}_cbc_dec_loop2x:
+       ldx             [$inp + 0], %o0
+       ldx             [$inp + 8], %o1
+       ldx             [$inp + 16], %o2
+       brz,pt          $ileft, 4f
+       ldx             [$inp + 24], %o3
+
+       ldx             [$inp + 32], %o4
+       sllx            %o0, $ileft, %o0
+       srlx            %o1, $iright, %g1
+       or              %g1, %o0, %o0
+       sllx            %o1, $ileft, %o1
+       srlx            %o2, $iright, %g1
+       or              %g1, %o1, %o1
+       sllx            %o2, $ileft, %o2
+       srlx            %o3, $iright, %g1
+       or              %g1, %o2, %o2
+       sllx            %o3, $ileft, %o3
+       srlx            %o4, $iright, %o4
+       or              %o4, %o3, %o3
+4:
+       xor             %g4, %o0, %o4           ! ^= rk[0]
+       xor             %g5, %o1, %o5
+       movxtod         %o4, %f0
+       movxtod         %o5, %f2
+       xor             %g4, %o2, %o4
+       xor             %g5, %o3, %o5
+       movxtod         %o4, %f4
+       movxtod         %o5, %f6
+
+       prefetch        [$out + 63], 22
+       prefetch        [$inp + 32+63], 20
+       call            _${alg}${bits}_decrypt_2x
+       add             $inp, 32, $inp
+
+       movxtod         %o0, %f8
+       movxtod         %o1, %f10
+       fxor            %f12, %f0, %f0          ! ^= ivec
+       fxor            %f14, %f2, %f2
+       movxtod         %o2, %f12
+       movxtod         %o3, %f14
+       fxor            %f8, %f4, %f4
+       fxor            %f10, %f6, %f6
+
+       brnz,pn         $ooff, 2f
+       sub             $len, 2, $len
+               
+       std             %f0, [$out + 0]
+       std             %f2, [$out + 8]
+       std             %f4, [$out + 16]
+       std             %f6, [$out + 24]
+       brnz,pt         $len, .L${bits}_cbc_dec_loop2x
+       add             $out, 32, $out
+___
+$::code.=<<___ if ($::evp);
+       st              %f12, [$ivec + 0]
+       st              %f13, [$ivec + 4]
+       st              %f14, [$ivec + 8]
+       st              %f15, [$ivec + 12]
+___
+$::code.=<<___ if (!$::evp);
+       brnz,pn         $ivoff, .L${bits}_cbc_dec_unaligned_ivec
+       nop
+
+       std             %f12, [$ivec + 0]       ! write out ivec
+       std             %f14, [$ivec + 8]
+___
+$::code.=<<___;
+       ret
+       restore
+
+.align 16
+2:     ldxa            [$inp]0x82, %o0         ! avoid read-after-write hazard
+                                               ! and ~3x deterioration
+                                               ! in inp==out case
+       faligndata      %f0, %f0, %f8           ! handle unaligned output
+       faligndata      %f0, %f2, %f0
+       faligndata      %f2, %f4, %f2
+       faligndata      %f4, %f6, %f4
+       faligndata      %f6, %f6, %f6
+       stda            %f8, [$out + $omask]0xc0        ! partial store
+       std             %f0, [$out + 8]
+       std             %f2, [$out + 16]
+       std             %f4, [$out + 24]
+       add             $out, 32, $out
+       orn             %g0, $omask, $omask
+       stda            %f6, [$out + $omask]0xc0        ! partial store
+
+       brnz,pt         $len, .L${bits}_cbc_dec_loop2x+4
+       orn             %g0, $omask, $omask
+___
+$::code.=<<___ if ($::evp);
+       st              %f12, [$ivec + 0]
+       st              %f13, [$ivec + 4]
+       st              %f14, [$ivec + 8]
+       st              %f15, [$ivec + 12]
+___
+$::code.=<<___ if (!$::evp);
+       brnz,pn         $ivoff, .L${bits}_cbc_dec_unaligned_ivec
+       nop
+
+       std             %f12, [$ivec + 0]       ! write out ivec
+       std             %f14, [$ivec + 8]
+       ret
+       restore
+
+.align 16
+.L${bits}_cbc_dec_unaligned_ivec:
+       alignaddrl      $ivec, $ivoff, %g0      ! handle unaligned ivec
+       mov             0xff, $omask
+       srl             $omask, $ivoff, $omask
+       faligndata      %f12, %f12, %f0
+       faligndata      %f12, %f14, %f2
+       faligndata      %f14, %f14, %f4
+       stda            %f0, [$ivec + $omask]0xc0
+       std             %f2, [$ivec + 8]
+       add             $ivec, 16, $ivec
+       orn             %g0, $omask, $omask
+       stda            %f4, [$ivec + $omask]0xc0
+___
+$::code.=<<___;
+       ret
+       restore
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+.align 32
+.L${bits}cbc_dec_blk:
+       add     $out, $len, $blk_init
+       and     $blk_init, 63, $blk_init        ! tail
+       sub     $len, $blk_init, $len
+       add     $blk_init, 15, $blk_init        ! round up to 16n
+       srlx    $len, 4, $len
+       srl     $blk_init, 4, $blk_init
+       sub     $len, 1, $len
+       add     $blk_init, 1, $blk_init
+
+.L${bits}_cbc_dec_blk_loop2x:
+       ldx             [$inp + 0], %o0
+       ldx             [$inp + 8], %o1
+       ldx             [$inp + 16], %o2
+       brz,pt          $ileft, 5f
+       ldx             [$inp + 24], %o3
+
+       ldx             [$inp + 32], %o4
+       sllx            %o0, $ileft, %o0
+       srlx            %o1, $iright, %g1
+       or              %g1, %o0, %o0
+       sllx            %o1, $ileft, %o1
+       srlx            %o2, $iright, %g1
+       or              %g1, %o1, %o1
+       sllx            %o2, $ileft, %o2
+       srlx            %o3, $iright, %g1
+       or              %g1, %o2, %o2
+       sllx            %o3, $ileft, %o3
+       srlx            %o4, $iright, %o4
+       or              %o4, %o3, %o3
+5:
+       xor             %g4, %o0, %o4           ! ^= rk[0]
+       xor             %g5, %o1, %o5
+       movxtod         %o4, %f0
+       movxtod         %o5, %f2
+       xor             %g4, %o2, %o4
+       xor             %g5, %o3, %o5
+       movxtod         %o4, %f4
+       movxtod         %o5, %f6
+
+       prefetch        [$inp + 32+63], 20
+       call            _${alg}${bits}_decrypt_2x
+       add             $inp, 32, $inp
+       subcc           $len, 2, $len
+
+       movxtod         %o0, %f8
+       movxtod         %o1, %f10
+       fxor            %f12, %f0, %f0          ! ^= ivec
+       fxor            %f14, %f2, %f2
+       movxtod         %o2, %f12
+       movxtod         %o3, %f14
+       fxor            %f8, %f4, %f4
+       fxor            %f10, %f6, %f6
+
+       stda            %f0, [$out]0xe2         ! ASI_BLK_INIT, T4-specific
+       add             $out, 8, $out
+       stda            %f2, [$out]0xe2         ! ASI_BLK_INIT, T4-specific
+       add             $out, 8, $out
+       stda            %f4, [$out]0xe2         ! ASI_BLK_INIT, T4-specific
+       add             $out, 8, $out
+       stda            %f6, [$out]0xe2         ! ASI_BLK_INIT, T4-specific
+       bgu,pt          $::size_t_cc, .L${bits}_cbc_dec_blk_loop2x
+       add             $out, 8, $out
+
+       add             $blk_init, $len, $len
+       andcc           $len, 1, %g0            ! is number of blocks even?
+       membar          #StoreLoad|#StoreStore
+       bnz,pt          %icc, .L${bits}_cbc_dec_loop
+       srl             $len, 0, $len
+       brnz,pn         $len, .L${bits}_cbc_dec_loop2x
+       nop
+___
+$::code.=<<___ if ($::evp);
+       st              %f12, [$ivec + 0]       ! write out ivec
+       st              %f13, [$ivec + 4]
+       st              %f14, [$ivec + 8]
+       st              %f15, [$ivec + 12]
+___
+$::code.=<<___ if (!$::evp);
+       brnz,pn         $ivoff, 3b
+       nop
+
+       std             %f12, [$ivec + 0]       ! write out ivec
+       std             %f14, [$ivec + 8]
+___
+$::code.=<<___;
+       ret
+       restore
+.type  ${alg}${bits}_t4_cbc_decrypt,#function
+.size  ${alg}${bits}_t4_cbc_decrypt,.-${alg}${bits}_t4_cbc_decrypt
+___
+}
+
+sub alg_ctr32_implement {
+my ($alg,$bits) = @_;
+
+$::code.=<<___;
+.globl ${alg}${bits}_t4_ctr32_encrypt
+.align 32
+${alg}${bits}_t4_ctr32_encrypt:
+       save            %sp, -$::frame, %sp
+
+       prefetch        [$inp], 20
+       prefetch        [$inp + 63], 20
+       call            _${alg}${bits}_load_enckey
+       sllx            $len, 4, $len
+
+       ld              [$ivec + 0], %l4        ! counter
+       ld              [$ivec + 4], %l5
+       ld              [$ivec + 8], %l6
+       ld              [$ivec + 12], %l7
+
+       sllx            %l4, 32, %o5
+       or              %l5, %o5, %o5
+       sllx            %l6, 32, %g1
+       xor             %o5, %g4, %g4           ! ^= rk[0]
+       xor             %g1, %g5, %g5
+       movxtod         %g4, %f14               ! most significant 64 bits
+
+       sub             $inp, $out, $blk_init   ! $inp!=$out
+       and             $inp, 7, $ileft
+       andn            $inp, 7, $inp
+       sll             $ileft, 3, $ileft
+       mov             64, $iright
+       mov             0xff, $omask
+       sub             $iright, $ileft, $iright
+       and             $out, 7, $ooff
+       cmp             $len, 255
+       movrnz          $ooff, 0, $blk_init             ! if (  $out&7 ||
+       movleu          $::size_t_cc, 0, $blk_init      !       $len<256 ||
+       brnz,pn         $blk_init, .L${bits}_ctr32_blk  !       $inp==$out)
+       srl             $omask, $ooff, $omask
+
+       andcc           $len, 16, %g0           ! is number of blocks even?
+       alignaddrl      $out, %g0, $out
+       bz              %icc, .L${bits}_ctr32_loop2x
+       srlx            $len, 4, $len
+.L${bits}_ctr32_loop:
+       ldx             [$inp + 0], %o0
+       brz,pt          $ileft, 4f
+       ldx             [$inp + 8], %o1
+
+       ldx             [$inp + 16], %o2
+       sllx            %o0, $ileft, %o0
+       srlx            %o1, $iright, %g1
+       sllx            %o1, $ileft, %o1
+       or              %g1, %o0, %o0
+       srlx            %o2, $iright, %o2
+       or              %o2, %o1, %o1
+4:
+       xor             %g5, %l7, %g1           ! ^= rk[0]
+       add             %l7, 1, %l7
+       movxtod         %g1, %f2
+       srl             %l7, 0, %l7             ! clruw
+       prefetch        [$out + 63], 22
+       prefetch        [$inp + 16+63], 20
+___
+$::code.=<<___ if ($alg eq "aes");
+       aes_eround01    %f16, %f14, %f2, %f4
+       aes_eround23    %f18, %f14, %f2, %f2
+___
+$::code.=<<___ if ($alg eq "cmll");
+       camellia_f      %f16, %f2, %f14, %f2
+       camellia_f      %f18, %f14, %f2, %f0
+___
+$::code.=<<___;
+       call            _${alg}${bits}_encrypt_1x+8
+       add             $inp, 16, $inp
+
+       movxtod         %o0, %f10
+       movxtod         %o1, %f12
+       fxor            %f10, %f0, %f0          ! ^= inp
+       fxor            %f12, %f2, %f2
+
+       brnz,pn         $ooff, 2f
+       sub             $len, 1, $len
+               
+       std             %f0, [$out + 0]
+       std             %f2, [$out + 8]
+       brnz,pt         $len, .L${bits}_ctr32_loop2x
+       add             $out, 16, $out
+
+       ret
+       restore
+
+.align 16
+2:     ldxa            [$inp]0x82, %o0         ! avoid read-after-write hazard
+                                               ! and ~3x deterioration
+                                               ! in inp==out case
+       faligndata      %f0, %f0, %f4           ! handle unaligned output
+       faligndata      %f0, %f2, %f6
+       faligndata      %f2, %f2, %f8
+       stda            %f4, [$out + $omask]0xc0        ! partial store
+       std             %f6, [$out + 8]
+       add             $out, 16, $out
+       orn             %g0, $omask, $omask
+       stda            %f8, [$out + $omask]0xc0        ! partial store
+
+       brnz,pt         $len, .L${bits}_ctr32_loop2x+4
+       orn             %g0, $omask, $omask
+
+       ret
+       restore
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+.align 32
+.L${bits}_ctr32_loop2x:
+       ldx             [$inp + 0], %o0
+       ldx             [$inp + 8], %o1
+       ldx             [$inp + 16], %o2
+       brz,pt          $ileft, 4f
+       ldx             [$inp + 24], %o3
+
+       ldx             [$inp + 32], %o4
+       sllx            %o0, $ileft, %o0
+       srlx            %o1, $iright, %g1
+       or              %g1, %o0, %o0
+       sllx            %o1, $ileft, %o1
+       srlx            %o2, $iright, %g1
+       or              %g1, %o1, %o1
+       sllx            %o2, $ileft, %o2
+       srlx            %o3, $iright, %g1
+       or              %g1, %o2, %o2
+       sllx            %o3, $ileft, %o3
+       srlx            %o4, $iright, %o4
+       or              %o4, %o3, %o3
+4:
+       xor             %g5, %l7, %g1           ! ^= rk[0]
+       add             %l7, 1, %l7
+       movxtod         %g1, %f2
+       srl             %l7, 0, %l7             ! clruw
+       xor             %g5, %l7, %g1
+       add             %l7, 1, %l7
+       movxtod         %g1, %f6
+       srl             %l7, 0, %l7             ! clruw
+       prefetch        [$out + 63], 22
+       prefetch        [$inp + 32+63], 20
+___
+$::code.=<<___ if ($alg eq "aes");
+       aes_eround01    %f16, %f14, %f2, %f8
+       aes_eround23    %f18, %f14, %f2, %f2
+       aes_eround01    %f16, %f14, %f6, %f10
+       aes_eround23    %f18, %f14, %f6, %f6
+___
+$::code.=<<___ if ($alg eq "cmll");
+       camellia_f      %f16, %f2, %f14, %f2
+       camellia_f      %f16, %f6, %f14, %f6
+       camellia_f      %f18, %f14, %f2, %f0
+       camellia_f      %f18, %f14, %f6, %f4
+___
+$::code.=<<___;
+       call            _${alg}${bits}_encrypt_2x+16
+       add             $inp, 32, $inp
+
+       movxtod         %o0, %f8
+       movxtod         %o1, %f10
+       movxtod         %o2, %f12
+       fxor            %f8, %f0, %f0           ! ^= inp
+       movxtod         %o3, %f8
+       fxor            %f10, %f2, %f2
+       fxor            %f12, %f4, %f4
+       fxor            %f8, %f6, %f6
+
+       brnz,pn         $ooff, 2f
+       sub             $len, 2, $len
+               
+       std             %f0, [$out + 0]
+       std             %f2, [$out + 8]
+       std             %f4, [$out + 16]
+       std             %f6, [$out + 24]
+       brnz,pt         $len, .L${bits}_ctr32_loop2x
+       add             $out, 32, $out
+
+       ret
+       restore
+
+.align 16
+2:     ldxa            [$inp]0x82, %o0         ! avoid read-after-write hazard
+                                               ! and ~3x deterioration
+                                               ! in inp==out case
+       faligndata      %f0, %f0, %f8           ! handle unaligned output
+       faligndata      %f0, %f2, %f0
+       faligndata      %f2, %f4, %f2
+       faligndata      %f4, %f6, %f4
+       faligndata      %f6, %f6, %f6
+
+       stda            %f8, [$out + $omask]0xc0        ! partial store
+       std             %f0, [$out + 8]
+       std             %f2, [$out + 16]
+       std             %f4, [$out + 24]
+       add             $out, 32, $out
+       orn             %g0, $omask, $omask
+       stda            %f6, [$out + $omask]0xc0        ! partial store
+
+       brnz,pt         $len, .L${bits}_ctr32_loop2x+4
+       orn             %g0, $omask, $omask
+
+       ret
+       restore
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+.align 32
+.L${bits}_ctr32_blk:
+       add     $out, $len, $blk_init
+       and     $blk_init, 63, $blk_init        ! tail
+       sub     $len, $blk_init, $len
+       add     $blk_init, 15, $blk_init        ! round up to 16n
+       srlx    $len, 4, $len
+       srl     $blk_init, 4, $blk_init
+       sub     $len, 1, $len
+       add     $blk_init, 1, $blk_init
+
+.L${bits}_ctr32_blk_loop2x:
+       ldx             [$inp + 0], %o0
+       ldx             [$inp + 8], %o1
+       ldx             [$inp + 16], %o2
+       brz,pt          $ileft, 5f
+       ldx             [$inp + 24], %o3
+
+       ldx             [$inp + 32], %o4
+       sllx            %o0, $ileft, %o0
+       srlx            %o1, $iright, %g1
+       or              %g1, %o0, %o0
+       sllx            %o1, $ileft, %o1
+       srlx            %o2, $iright, %g1
+       or              %g1, %o1, %o1
+       sllx            %o2, $ileft, %o2
+       srlx            %o3, $iright, %g1
+       or              %g1, %o2, %o2
+       sllx            %o3, $ileft, %o3
+       srlx            %o4, $iright, %o4
+       or              %o4, %o3, %o3
+5:
+       xor             %g5, %l7, %g1           ! ^= rk[0]
+       add             %l7, 1, %l7
+       movxtod         %g1, %f2
+       srl             %l7, 0, %l7             ! clruw
+       xor             %g5, %l7, %g1
+       add             %l7, 1, %l7
+       movxtod         %g1, %f6
+       srl             %l7, 0, %l7             ! clruw
+       prefetch        [$inp + 32+63], 20
+___
+$::code.=<<___ if ($alg eq "aes");
+       aes_eround01    %f16, %f14, %f2, %f8
+       aes_eround23    %f18, %f14, %f2, %f2
+       aes_eround01    %f16, %f14, %f6, %f10
+       aes_eround23    %f18, %f14, %f6, %f6
+___
+$::code.=<<___ if ($alg eq "cmll");
+       camellia_f      %f16, %f2, %f14, %f2
+       camellia_f      %f16, %f6, %f14, %f6
+       camellia_f      %f18, %f14, %f2, %f0
+       camellia_f      %f18, %f14, %f6, %f4
+___
+$::code.=<<___;
+       call            _${alg}${bits}_encrypt_2x+16
+       add             $inp, 32, $inp
+       subcc           $len, 2, $len
+
+       movxtod         %o0, %f8
+       movxtod         %o1, %f10
+       movxtod         %o2, %f12
+       fxor            %f8, %f0, %f0           ! ^= inp
+       movxtod         %o3, %f8
+       fxor            %f10, %f2, %f2
+       fxor            %f12, %f4, %f4
+       fxor            %f8, %f6, %f6
+
+       stda            %f0, [$out]0xe2         ! ASI_BLK_INIT, T4-specific
+       add             $out, 8, $out
+       stda            %f2, [$out]0xe2         ! ASI_BLK_INIT, T4-specific
+       add             $out, 8, $out
+       stda            %f4, [$out]0xe2         ! ASI_BLK_INIT, T4-specific
+       add             $out, 8, $out
+       stda            %f6, [$out]0xe2         ! ASI_BLK_INIT, T4-specific
+       bgu,pt          $::size_t_cc, .L${bits}_ctr32_blk_loop2x
+       add             $out, 8, $out
+
+       add             $blk_init, $len, $len
+       andcc           $len, 1, %g0            ! is number of blocks even?
+       membar          #StoreLoad|#StoreStore
+       bnz,pt          %icc, .L${bits}_ctr32_loop
+       srl             $len, 0, $len
+       brnz,pn         $len, .L${bits}_ctr32_loop2x
+       nop
+
+       ret
+       restore
+.type  ${alg}${bits}_t4_ctr32_encrypt,#function
+.size  ${alg}${bits}_t4_ctr32_encrypt,.-${alg}${bits}_t4_ctr32_encrypt
+___
+}
+
+sub alg_xts_implement {
+my ($alg,$bits,$dir) = @_;
+my ($inp,$out,$len,$key1,$key2,$ivec)=map("%i$_",(0..5));
+my $rem=$ivec;
+
+$::code.=<<___;
+.globl ${alg}${bits}_t4_xts_${dir}crypt
+.align 32
+${alg}${bits}_t4_xts_${dir}crypt:
+       save            %sp, -$::frame-16, %sp
+
+       mov             $ivec, %o0
+       add             %fp, $::bias-16, %o1
+       call            ${alg}_t4_encrypt
+       mov             $key2, %o2
+
+       add             %fp, $::bias-16, %l7
+       ldxa            [%l7]0x88, %g2
+       add             %fp, $::bias-8, %l7
+       ldxa            [%l7]0x88, %g3          ! %g3:%g2 is tweak
+
+       sethi           %hi(0x76543210), %l7
+       or              %l7, %lo(0x76543210), %l7
+       bmask           %l7, %g0, %g0           ! byte swap mask
+
+       prefetch        [$inp], 20
+       prefetch        [$inp + 63], 20
+       call            _${alg}${bits}_load_${dir}ckey
+       and             $len, 15,  $rem
+       and             $len, -16, $len
+___
+$code.=<<___ if ($dir eq "de");
+       mov             0, %l7
+       movrnz          $rem, 16,  %l7
+       sub             $len, %l7, $len
+___
+$code.=<<___;
+
+       sub             $inp, $out, $blk_init   ! $inp!=$out
+       and             $inp, 7, $ileft
+       andn            $inp, 7, $inp
+       sll             $ileft, 3, $ileft
+       mov             64, $iright
+       mov             0xff, $omask
+       sub             $iright, $ileft, $iright
+       and             $out, 7, $ooff
+       cmp             $len, 255
+       movrnz          $ooff, 0, $blk_init             ! if (  $out&7 ||
+       movleu          $::size_t_cc, 0, $blk_init      !       $len<256 ||
+       brnz,pn         $blk_init, .L${bits}_xts_${dir}blk !    $inp==$out)
+       srl             $omask, $ooff, $omask
+
+       andcc           $len, 16, %g0           ! is number of blocks even?
+___
+$code.=<<___ if ($dir eq "de");
+       brz,pn          $len, .L${bits}_xts_${dir}steal
+___
+$code.=<<___;
+       alignaddrl      $out, %g0, $out
+       bz              %icc, .L${bits}_xts_${dir}loop2x
+       srlx            $len, 4, $len
+.L${bits}_xts_${dir}loop:
+       ldx             [$inp + 0], %o0
+       brz,pt          $ileft, 4f
+       ldx             [$inp + 8], %o1
+
+       ldx             [$inp + 16], %o2
+       sllx            %o0, $ileft, %o0
+       srlx            %o1, $iright, %g1
+       sllx            %o1, $ileft, %o1
+       or              %g1, %o0, %o0
+       srlx            %o2, $iright, %o2
+       or              %o2, %o1, %o1
+4:
+       movxtod         %g2, %f12
+       movxtod         %g3, %f14
+       bshuffle        %f12, %f12, %f12
+       bshuffle        %f14, %f14, %f14
+
+       xor             %g4, %o0, %o0           ! ^= rk[0]
+       xor             %g5, %o1, %o1
+       movxtod         %o0, %f0
+       movxtod         %o1, %f2
+
+       fxor            %f12, %f0, %f0          ! ^= tweak[0]
+       fxor            %f14, %f2, %f2
+
+       prefetch        [$out + 63], 22
+       prefetch        [$inp + 16+63], 20
+       call            _${alg}${bits}_${dir}crypt_1x
+       add             $inp, 16, $inp
+
+       fxor            %f12, %f0, %f0          ! ^= tweak[0]
+       fxor            %f14, %f2, %f2
+
+       srax            %g3, 63, %l7            ! next tweak value
+       addcc           %g2, %g2, %g2
+       and             %l7, 0x87, %l7
+       addxc           %g3, %g3, %g3
+       xor             %l7, %g2, %g2
+
+       brnz,pn         $ooff, 2f
+       sub             $len, 1, $len
+               
+       std             %f0, [$out + 0]
+       std             %f2, [$out + 8]
+       brnz,pt         $len, .L${bits}_xts_${dir}loop2x
+       add             $out, 16, $out
+
+       brnz,pn         $rem, .L${bits}_xts_${dir}steal
+       nop
+
+       ret
+       restore
+
+.align 16
+2:     ldxa            [$inp]0x82, %o0         ! avoid read-after-write hazard
+                                               ! and ~3x deterioration
+                                               ! in inp==out case
+       faligndata      %f0, %f0, %f4           ! handle unaligned output
+       faligndata      %f0, %f2, %f6
+       faligndata      %f2, %f2, %f8
+       stda            %f4, [$out + $omask]0xc0        ! partial store
+       std             %f6, [$out + 8]
+       add             $out, 16, $out
+       orn             %g0, $omask, $omask
+       stda            %f8, [$out + $omask]0xc0        ! partial store
+
+       brnz,pt         $len, .L${bits}_xts_${dir}loop2x+4
+       orn             %g0, $omask, $omask
+
+       brnz,pn         $rem, .L${bits}_xts_${dir}steal
+       nop
+
+       ret
+       restore
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+.align 32
+.L${bits}_xts_${dir}loop2x:
+       ldx             [$inp + 0], %o0
+       ldx             [$inp + 8], %o1
+       ldx             [$inp + 16], %o2
+       brz,pt          $ileft, 4f
+       ldx             [$inp + 24], %o3
+
+       ldx             [$inp + 32], %o4
+       sllx            %o0, $ileft, %o0
+       srlx            %o1, $iright, %g1
+       or              %g1, %o0, %o0
+       sllx            %o1, $ileft, %o1
+       srlx            %o2, $iright, %g1
+       or              %g1, %o1, %o1
+       sllx            %o2, $ileft, %o2
+       srlx            %o3, $iright, %g1
+       or              %g1, %o2, %o2
+       sllx            %o3, $ileft, %o3
+       srlx            %o4, $iright, %o4
+       or              %o4, %o3, %o3
+4:
+       movxtod         %g2, %f12
+       movxtod         %g3, %f14
+       bshuffle        %f12, %f12, %f12
+       bshuffle        %f14, %f14, %f14
+
+       srax            %g3, 63, %l7            ! next tweak value
+       addcc           %g2, %g2, %g2
+       and             %l7, 0x87, %l7
+       addxc           %g3, %g3, %g3
+       xor             %l7, %g2, %g2
+
+       movxtod         %g2, %f8
+       movxtod         %g3, %f10
+       bshuffle        %f8,  %f8,  %f8
+       bshuffle        %f10, %f10, %f10
+
+       xor             %g4, %o0, %o0           ! ^= rk[0]
+       xor             %g5, %o1, %o1
+       xor             %g4, %o2, %o2           ! ^= rk[0]
+       xor             %g5, %o3, %o3
+       movxtod         %o0, %f0
+       movxtod         %o1, %f2
+       movxtod         %o2, %f4
+       movxtod         %o3, %f6
+
+       fxor            %f12, %f0, %f0          ! ^= tweak[0]
+       fxor            %f14, %f2, %f2
+       fxor            %f8,  %f4, %f4          ! ^= tweak[0]
+       fxor            %f10, %f6, %f6
+
+       prefetch        [$out + 63], 22
+       prefetch        [$inp + 32+63], 20
+       call            _${alg}${bits}_${dir}crypt_2x
+       add             $inp, 32, $inp
+
+       movxtod         %g2, %f8
+       movxtod         %g3, %f10
+
+       srax            %g3, 63, %l7            ! next tweak value
+       addcc           %g2, %g2, %g2
+       and             %l7, 0x87, %l7
+       addxc           %g3, %g3, %g3
+       xor             %l7, %g2, %g2
+
+       bshuffle        %f8,  %f8,  %f8
+       bshuffle        %f10, %f10, %f10
+
+       fxor            %f12, %f0, %f0          ! ^= tweak[0]
+       fxor            %f14, %f2, %f2
+       fxor            %f8,  %f4, %f4
+       fxor            %f10, %f6, %f6
+
+       brnz,pn         $ooff, 2f
+       sub             $len, 2, $len
+               
+       std             %f0, [$out + 0]
+       std             %f2, [$out + 8]
+       std             %f4, [$out + 16]
+       std             %f6, [$out + 24]
+       brnz,pt         $len, .L${bits}_xts_${dir}loop2x
+       add             $out, 32, $out
+
+       fsrc2           %f4, %f0
+       fsrc2           %f6, %f2
+       brnz,pn         $rem, .L${bits}_xts_${dir}steal
+       nop
+
+       ret
+       restore
+
+.align 16
+2:     ldxa            [$inp]0x82, %o0         ! avoid read-after-write hazard
+                                               ! and ~3x deterioration
+                                               ! in inp==out case
+       faligndata      %f0, %f0, %f8           ! handle unaligned output
+       faligndata      %f0, %f2, %f10
+       faligndata      %f2, %f4, %f12
+       faligndata      %f4, %f6, %f14
+       faligndata      %f6, %f6, %f0
+
+       stda            %f8, [$out + $omask]0xc0        ! partial store
+       std             %f10, [$out + 8]
+       std             %f12, [$out + 16]
+       std             %f14, [$out + 24]
+       add             $out, 32, $out
+       orn             %g0, $omask, $omask
+       stda            %f0, [$out + $omask]0xc0        ! partial store
+
+       brnz,pt         $len, .L${bits}_xts_${dir}loop2x+4
+       orn             %g0, $omask, $omask
+
+       fsrc2           %f4, %f0
+       fsrc2           %f6, %f2
+       brnz,pn         $rem, .L${bits}_xts_${dir}steal
+       nop
+
+       ret
+       restore
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+.align 32
+.L${bits}_xts_${dir}blk:
+       add     $out, $len, $blk_init
+       and     $blk_init, 63, $blk_init        ! tail
+       sub     $len, $blk_init, $len
+       add     $blk_init, 15, $blk_init        ! round up to 16n
+       srlx    $len, 4, $len
+       srl     $blk_init, 4, $blk_init
+       sub     $len, 1, $len
+       add     $blk_init, 1, $blk_init
+
+.L${bits}_xts_${dir}blk2x:
+       ldx             [$inp + 0], %o0
+       ldx             [$inp + 8], %o1
+       ldx             [$inp + 16], %o2
+       brz,pt          $ileft, 5f
+       ldx             [$inp + 24], %o3
+
+       ldx             [$inp + 32], %o4
+       sllx            %o0, $ileft, %o0
+       srlx            %o1, $iright, %g1
+       or              %g1, %o0, %o0
+       sllx            %o1, $ileft, %o1
+       srlx            %o2, $iright, %g1
+       or              %g1, %o1, %o1
+       sllx            %o2, $ileft, %o2
+       srlx            %o3, $iright, %g1
+       or              %g1, %o2, %o2
+       sllx            %o3, $ileft, %o3
+       srlx            %o4, $iright, %o4
+       or              %o4, %o3, %o3
+5:
+       movxtod         %g2, %f12
+       movxtod         %g3, %f14
+       bshuffle        %f12, %f12, %f12
+       bshuffle        %f14, %f14, %f14
+
+       srax            %g3, 63, %l7            ! next tweak value
+       addcc           %g2, %g2, %g2
+       and             %l7, 0x87, %l7
+       addxc           %g3, %g3, %g3
+       xor             %l7, %g2, %g2
+
+       movxtod         %g2, %f8
+       movxtod         %g3, %f10
+       bshuffle        %f8,  %f8,  %f8
+       bshuffle        %f10, %f10, %f10
+
+       xor             %g4, %o0, %o0           ! ^= rk[0]
+       xor             %g5, %o1, %o1
+       xor             %g4, %o2, %o2           ! ^= rk[0]
+       xor             %g5, %o3, %o3
+       movxtod         %o0, %f0
+       movxtod         %o1, %f2
+       movxtod         %o2, %f4
+       movxtod         %o3, %f6
+
+       fxor            %f12, %f0, %f0          ! ^= tweak[0]
+       fxor            %f14, %f2, %f2
+       fxor            %f8,  %f4, %f4          ! ^= tweak[0]
+       fxor            %f10, %f6, %f6
+
+       prefetch        [$inp + 32+63], 20
+       call            _${alg}${bits}_${dir}crypt_2x
+       add             $inp, 32, $inp
+
+       movxtod         %g2, %f8
+       movxtod         %g3, %f10
+
+       srax            %g3, 63, %l7            ! next tweak value
+       addcc           %g2, %g2, %g2
+       and             %l7, 0x87, %l7
+       addxc           %g3, %g3, %g3
+       xor             %l7, %g2, %g2
+
+       bshuffle        %f8,  %f8,  %f8
+       bshuffle        %f10, %f10, %f10
+
+       fxor            %f12, %f0, %f0          ! ^= tweak[0]
+       fxor            %f14, %f2, %f2
+       fxor            %f8,  %f4, %f4
+       fxor            %f10, %f6, %f6
+
+       subcc           $len, 2, $len
+       stda            %f0, [$out]0xe2         ! ASI_BLK_INIT, T4-specific
+       add             $out, 8, $out
+       stda            %f2, [$out]0xe2         ! ASI_BLK_INIT, T4-specific
+       add             $out, 8, $out
+       stda            %f4, [$out]0xe2         ! ASI_BLK_INIT, T4-specific
+       add             $out, 8, $out
+       stda            %f6, [$out]0xe2         ! ASI_BLK_INIT, T4-specific
+       bgu,pt          $::size_t_cc, .L${bits}_xts_${dir}blk2x
+       add             $out, 8, $out
+
+       add             $blk_init, $len, $len
+       andcc           $len, 1, %g0            ! is number of blocks even?
+       membar          #StoreLoad|#StoreStore
+       bnz,pt          %icc, .L${bits}_xts_${dir}loop
+       srl             $len, 0, $len
+       brnz,pn         $len, .L${bits}_xts_${dir}loop2x
+       nop
+
+       fsrc2           %f4, %f0
+       fsrc2           %f6, %f2
+       brnz,pn         $rem, .L${bits}_xts_${dir}steal
+       nop
+
+       ret
+       restore
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+___
+$code.=<<___ if ($dir eq "en");
+.align 32
+.L${bits}_xts_${dir}steal:
+       std             %f0, [%fp + $::bias-16] ! copy of output
+       std             %f2, [%fp + $::bias-8]
+
+       srl             $ileft, 3, $ileft
+       add             %fp, $::bias-16, %l7
+       add             $inp, $ileft, $inp      ! original $inp+$len&-15
+       add             $out, $ooff, $out       ! original $out+$len&-15
+       mov             0, $ileft
+       nop                                     ! align
+
+.L${bits}_xts_${dir}stealing:
+       ldub            [$inp + $ileft], %o0
+       ldub            [%l7  + $ileft], %o1
+       dec             $rem
+       stb             %o0, [%l7  + $ileft]
+       stb             %o1, [$out + $ileft]
+       brnz            $rem, .L${bits}_xts_${dir}stealing
+       inc             $ileft
+
+       mov             %l7, $inp
+       sub             $out, 16, $out
+       mov             0, $ileft
+       sub             $out, $ooff, $out
+       ba              .L${bits}_xts_${dir}loop        ! one more time
+       mov             1, $len                         ! $rem is 0
+___
+$code.=<<___ if ($dir eq "de");
+.align 32
+.L${bits}_xts_${dir}steal:
+       ldx             [$inp + 0], %o0
+       brz,pt          $ileft, 8f
+       ldx             [$inp + 8], %o1
+
+       ldx             [$inp + 16], %o2
+       sllx            %o0, $ileft, %o0
+       srlx            %o1, $iright, %g1
+       sllx            %o1, $ileft, %o1
+       or              %g1, %o0, %o0
+       srlx            %o2, $iright, %o2
+       or              %o2, %o1, %o1
+8:
+       srax            %g3, 63, %l7            ! next tweak value
+       addcc           %g2, %g2, %o2
+       and             %l7, 0x87, %l7
+       addxc           %g3, %g3, %o3
+       xor             %l7, %o2, %o2
+
+       movxtod         %o2, %f12
+       movxtod         %o3, %f14
+       bshuffle        %f12, %f12, %f12
+       bshuffle        %f14, %f14, %f14
+
+       xor             %g4, %o0, %o0           ! ^= rk[0]
+       xor             %g5, %o1, %o1
+       movxtod         %o0, %f0
+       movxtod         %o1, %f2
+
+       fxor            %f12, %f0, %f0          ! ^= tweak[0]
+       fxor            %f14, %f2, %f2
+
+       call            _${alg}${bits}_${dir}crypt_1x
+       add             $inp, 16, $inp
+
+       fxor            %f12, %f0, %f0          ! ^= tweak[0]
+       fxor            %f14, %f2, %f2
+
+       std             %f0, [%fp + $::bias-16]
+       std             %f2, [%fp + $::bias-8]
+
+       srl             $ileft, 3, $ileft
+       add             %fp, $::bias-16, %l7
+       add             $inp, $ileft, $inp      ! original $inp+$len&-15
+       add             $out, $ooff, $out       ! original $out+$len&-15
+       mov             0, $ileft
+       add             $out, 16, $out
+       nop                                     ! align
+
+.L${bits}_xts_${dir}stealing:
+       ldub            [$inp + $ileft], %o0
+       ldub            [%l7  + $ileft], %o1
+       dec             $rem
+       stb             %o0, [%l7  + $ileft]
+       stb             %o1, [$out + $ileft]
+       brnz            $rem, .L${bits}_xts_${dir}stealing
+       inc             $ileft
+
+       mov             %l7, $inp
+       sub             $out, 16, $out
+       mov             0, $ileft
+       sub             $out, $ooff, $out
+       ba              .L${bits}_xts_${dir}loop        ! one more time
+       mov             1, $len                         ! $rem is 0
+___
+$code.=<<___;
+       ret
+       restore
+.type  ${alg}${bits}_t4_xts_${dir}crypt,#function
+.size  ${alg}${bits}_t4_xts_${dir}crypt,.-${alg}${bits}_t4_xts_${dir}crypt
+___
+}
+
+# Purpose of these subroutines is to explicitly encode VIS instructions,
+# so that one can compile the module without having to specify VIS
+# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
+# Idea is to reserve for option to produce "universal" binary and let
+# programmer detect if current CPU is VIS capable at run-time.
+sub unvis {
+my ($mnemonic,$rs1,$rs2,$rd)=@_;
+my ($ref,$opf);
+my %visopf = ( "faligndata"    => 0x048,
+               "bshuffle"      => 0x04c,
+               "fnot2"         => 0x066,
+               "fxor"          => 0x06c,
+               "fsrc2"         => 0x078        );
+
+    $ref = "$mnemonic\t$rs1,$rs2,$rd";
+
+    if ($opf=$visopf{$mnemonic}) {
+       foreach ($rs1,$rs2,$rd) {
+           return $ref if (!/%f([0-9]{1,2})/);
+           $_=$1;
+           if ($1>=32) {
+               return $ref if ($1&1);
+               # re-encode for upper double register addressing
+               $_=($1|$1>>5)&31;
+           }
+       }
+
+       return  sprintf ".word\t0x%08x !%s",
+                       0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2,
+                       $ref;
+    } else {
+       return $ref;
+    }
+}
+
+sub unvis3 {
+my ($mnemonic,$rs1,$rs2,$rd)=@_;
+my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 );
+my ($ref,$opf);
+my %visopf = ( "addxc"         => 0x011,
+               "addxccc"       => 0x013,
+               "umulxhi"       => 0x016,
+               "alignaddr"     => 0x018,
+               "bmask"         => 0x019,
+               "alignaddrl"    => 0x01a        );
+
+    $ref = "$mnemonic\t$rs1,$rs2,$rd";
+
+    if ($opf=$visopf{$mnemonic}) {
+       foreach ($rs1,$rs2,$rd) {
+           return $ref if (!/%([goli])([0-9])/);
+           $_=$bias{$1}+$2;
+       }
+
+       return  sprintf ".word\t0x%08x !%s",
+                       0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2,
+                       $ref;
+    } else {
+       return $ref;
+    }
+}
+
+sub unaes_round {      # 4-argument instructions
+my ($mnemonic,$rs1,$rs2,$rs3,$rd)=@_;
+my ($ref,$opf);
+my %aesopf = ( "aes_eround01"  => 0,
+               "aes_eround23"  => 1,
+               "aes_dround01"  => 2,
+               "aes_dround23"  => 3,
+               "aes_eround01_l"=> 4,
+               "aes_eround23_l"=> 5,
+               "aes_dround01_l"=> 6,
+               "aes_dround23_l"=> 7,
+               "aes_kexpand1"  => 8    );
+
+    $ref = "$mnemonic\t$rs1,$rs2,$rs3,$rd";
+
+    if (defined($opf=$aesopf{$mnemonic})) {
+       $rs3 = ($rs3 =~ /%f([0-6]*[02468])/) ? (($1|$1>>5)&31) : $rs3;
+       foreach ($rs1,$rs2,$rd) {
+           return $ref if (!/%f([0-9]{1,2})/);
+           $_=$1;
+           if ($1>=32) {
+               return $ref if ($1&1);
+               # re-encode for upper double register addressing
+               $_=($1|$1>>5)&31;
+           }
+       }
+
+       return  sprintf ".word\t0x%08x !%s",
+                       2<<30|$rd<<25|0x19<<19|$rs1<<14|$rs3<<9|$opf<<5|$rs2,
+                       $ref;
+    } else {
+       return $ref;
+    }
+}
+
+sub unaes_kexpand {    # 3-argument instructions
+my ($mnemonic,$rs1,$rs2,$rd)=@_;
+my ($ref,$opf);
+my %aesopf = ( "aes_kexpand0"  => 0x130,
+               "aes_kexpand2"  => 0x131        );
+
+    $ref = "$mnemonic\t$rs1,$rs2,$rd";
+
+    if (defined($opf=$aesopf{$mnemonic})) {
+       foreach ($rs1,$rs2,$rd) {
+           return $ref if (!/%f([0-9]{1,2})/);
+           $_=$1;
+           if ($1>=32) {
+               return $ref if ($1&1);
+               # re-encode for upper double register addressing
+               $_=($1|$1>>5)&31;
+           }
+       }
+
+       return  sprintf ".word\t0x%08x !%s",
+                       2<<30|$rd<<25|0x36<<19|$rs1<<14|$opf<<5|$rs2,
+                       $ref;
+    } else {
+       return $ref;
+    }
+}
+
+sub uncamellia_f {     # 4-argument instructions
+my ($mnemonic,$rs1,$rs2,$rs3,$rd)=@_;
+my ($ref,$opf);
+
+    $ref = "$mnemonic\t$rs1,$rs2,$rs3,$rd";
+
+    if (1) {
+       $rs3 = ($rs3 =~ /%f([0-6]*[02468])/) ? (($1|$1>>5)&31) : $rs3;
+       foreach ($rs1,$rs2,$rd) {
+           return $ref if (!/%f([0-9]{1,2})/);
+           $_=$1;
+           if ($1>=32) {
+               return $ref if ($1&1);
+               # re-encode for upper double register addressing
+               $_=($1|$1>>5)&31;
+           }
+       }
+
+       return  sprintf ".word\t0x%08x !%s",
+                       2<<30|$rd<<25|0x19<<19|$rs1<<14|$rs3<<9|0xc<<5|$rs2,
+                       $ref;
+    } else {
+       return $ref;
+    }
+}
+
+sub uncamellia3 {      # 3-argument instructions
+my ($mnemonic,$rs1,$rs2,$rd)=@_;
+my ($ref,$opf);
+my %cmllopf = (        "camellia_fl"   => 0x13c,
+               "camellia_fli"  => 0x13d        );
+
+    $ref = "$mnemonic\t$rs1,$rs2,$rd";
+
+    if (defined($opf=$cmllopf{$mnemonic})) {
+       foreach ($rs1,$rs2,$rd) {
+           return $ref if (!/%f([0-9]{1,2})/);
+           $_=$1;
+           if ($1>=32) {
+               return $ref if ($1&1);
+               # re-encode for upper double register addressing
+               $_=($1|$1>>5)&31;
+           }
+       }
+
+       return  sprintf ".word\t0x%08x !%s",
+                       2<<30|$rd<<25|0x36<<19|$rs1<<14|$opf<<5|$rs2,
+                       $ref;
+    } else {
+       return $ref;
+    }
+}
+
+sub unmovxtox {                # 2-argument instructions
+my ($mnemonic,$rs,$rd)=@_;
+my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24, "f" => 0 );
+my ($ref,$opf);
+my %movxopf = (        "movdtox"       => 0x110,
+               "movstouw"      => 0x111,
+               "movstosw"      => 0x113,
+               "movxtod"       => 0x118,
+               "movwtos"       => 0x119        );
+
+    $ref = "$mnemonic\t$rs,$rd";
+
+    if (defined($opf=$movxopf{$mnemonic})) {
+       foreach ($rs,$rd) {
+           return $ref if (!/%([fgoli])([0-9]{1,2})/);
+           $_=$bias{$1}+$2;
+           if ($2>=32) {
+               return $ref if ($2&1);
+               # re-encode for upper double register addressing
+               $_=($2|$2>>5)&31;
+           }
+       }
+
+       return  sprintf ".word\t0x%08x !%s",
+                       2<<30|$rd<<25|0x36<<19|$opf<<5|$rs,
+                       $ref;
+    } else {
+       return $ref;
+    }
+}
+
+sub undes {
+my ($mnemonic)=shift;
+my @args=@_;
+my ($ref,$opf);
+my %desopf = ( "des_round"     => 0b1001,
+               "des_ip"        => 0b100110100,
+               "des_iip"       => 0b100110101,
+               "des_kexpand"   => 0b100110110  );
+
+    $ref = "$mnemonic\t".join(",",@_);
+
+    if (defined($opf=$desopf{$mnemonic})) {    # 4-arg
+       if ($mnemonic eq "des_round") {
+           foreach (@args[0..3]) {
+               return $ref if (!/%f([0-9]{1,2})/);
+               $_=$1;
+               if ($1>=32) {
+                   return $ref if ($1&1);
+                   # re-encode for upper double register addressing
+                   $_=($1|$1>>5)&31;
+               }
+           }
+           return  sprintf ".word\t0x%08x !%s",
+                           2<<30|0b011001<<19|$opf<<5|$args[0]<<14|$args[1]|$args[2]<<9|$args[3]<<25,
+                           $ref;
+       } elsif ($mnemonic eq "des_kexpand") {  # 3-arg
+           foreach (@args[0..2]) {
+               return $ref if (!/(%f)?([0-9]{1,2})/);
+               $_=$2;
+               if ($2>=32) {
+                   return $ref if ($2&1);
+                   # re-encode for upper double register addressing
+                   $_=($2|$2>>5)&31;
+               }
+           }
+           return  sprintf ".word\t0x%08x !%s",
+                           2<<30|0b110110<<19|$opf<<5|$args[0]<<14|$args[1]|$args[2]<<25,
+                           $ref;
+       } else {                                # 2-arg
+           foreach (@args[0..1]) {
+               return $ref if (!/%f([0-9]{1,2})/);
+               $_=$1;
+               if ($1>=32) {
+                   return $ref if ($2&1);
+                   # re-encode for upper double register addressing
+                   $_=($1|$1>>5)&31;
+               }
+           }
+           return  sprintf ".word\t0x%08x !%s",
+                           2<<30|0b110110<<19|$opf<<5|$args[0]<<14|$args[1]<<25,
+                           $ref;
+       }
+    } else {
+       return $ref;
+    }
+}
+
+sub emit_assembler {
+    foreach (split("\n",$::code)) {
+       s/\`([^\`]*)\`/eval $1/ge;
+
+       s/\b(f[a-z]+2[sd]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})\s*$/$1\t%f0,$2,$3/go;
+
+       s/\b(aes_[edk][^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*([%fx0-9]+),\s*(%f[0-9]{1,2})/
+               &unaes_round($1,$2,$3,$4,$5)
+        /geo or
+       s/\b(aes_kexpand[02])\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/
+               &unaes_kexpand($1,$2,$3,$4)
+        /geo or
+       s/\b(camellia_f)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*([%fx0-9]+),\s*(%f[0-9]{1,2})/
+               &uncamellia_f($1,$2,$3,$4,$5)
+        /geo or
+       s/\b(camellia_[^s]+)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/
+               &uncamellia3($1,$2,$3,$4)
+        /geo or
+       s/\b(des_\w+)\s+(%f[0-9]{1,2}),\s*([%fx0-9]+)(?:,\s*(%f[0-9]{1,2})(?:,\s*(%f[0-9]{1,2}))?)?/
+               &undes($1,$2,$3,$4,$5)
+        /geo or
+       s/\b(mov[ds]to\w+)\s+(%f[0-9]{1,2}),\s*(%[goli][0-7])/
+               &unmovxtox($1,$2,$3)
+        /geo or
+       s/\b(mov[xw]to[ds])\s+(%[goli][0-7]),\s*(%f[0-9]{1,2})/
+               &unmovxtox($1,$2,$3)
+        /geo or
+       s/\b([fb][^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/
+               &unvis($1,$2,$3,$4)
+        /geo or
+       s/\b(umulxhi|bmask|addxc[c]{0,2}|alignaddr[l]*)\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/
+               &unvis3($1,$2,$3,$4)
+        /geo;
+
+       print $_,"\n";
+    }
+}
+
+1;
index 56d9b64..9c70b8c 100755 (executable)
@@ -121,7 +121,7 @@ my %globals;
                $self->{sz} = "";
            } elsif ($self->{op} =~ /^v/) { # VEX
                $self->{sz} = "";
-           } elsif ($self->{op} =~ /movq/ && $line =~ /%xmm/) {
+           } elsif ($self->{op} =~ /mov[dq]/ && $line =~ /%xmm/) {
                $self->{sz} = "";
            } elsif ($self->{op} =~ /([a-z]{3,})([qlwb])$/) {
                $self->{op} = $1;
@@ -250,8 +250,13 @@ my %globals;
        # in $self->{label}, new gas requires sign extension...
        use integer;
        $self->{label} =~ s/(?<![\w\$\.])(0x?[0-9a-f]+)/oct($1)/egi;
-       $self->{label} =~ s/([0-9]+\s*[\*\/\%]\s*[0-9]+)/eval($1)/eg;
-       $self->{label} =~ s/([0-9]+)/$1<<32>>32/eg;
+       $self->{label} =~ s/\b([0-9]+\s*[\*\/\%]\s*[0-9]+)\b/eval($1)/eg;
+       $self->{label} =~ s/\b([0-9]+)\b/$1<<32>>32/eg;
+
+       if (!$self->{label} && $self->{index} && $self->{scale}==1 &&
+           $self->{base} =~ /(rbp|r13)/) {
+               $self->{base} = $self->{index}; $self->{index} = $1;
+       }
 
        if ($gas) {
            $self->{label} =~ s/^___imp_/__imp__/   if ($flavour eq "mingw64");
@@ -265,14 +270,20 @@ my %globals;
                sprintf "%s%s(%%%s)",   $self->{asterisk},$self->{label},$self->{base};
            }
        } else {
-           %szmap = (  b=>"BYTE$PTR", w=>"WORD$PTR", l=>"DWORD$PTR",
-                       q=>"QWORD$PTR",o=>"OWORD$PTR",x=>"XMMWORD$PTR" );
+           %szmap = (  b=>"BYTE$PTR",  w=>"WORD$PTR",
+                       l=>"DWORD$PTR", d=>"DWORD$PTR",
+                       q=>"QWORD$PTR", o=>"OWORD$PTR",
+                       x=>"XMMWORD$PTR", y=>"YMMWORD$PTR", z=>"ZMMWORD$PTR" );
 
            $self->{label} =~ s/\./\$/g;
            $self->{label} =~ s/(?<![\w\$\.])0x([0-9a-f]+)/0$1h/ig;
            $self->{label} = "($self->{label})" if ($self->{label} =~ /[\*\+\-\/]/);
-           $sz="q" if ($self->{asterisk} || opcode->mnemonic() eq "movq");
-           $sz="l" if (opcode->mnemonic() eq "movd");
+
+           ($self->{asterisk})                                 && ($sz="q") ||
+           (opcode->mnemonic() =~ /^v?mov([qd])$/)             && ($sz=$1)  ||
+           (opcode->mnemonic() =~ /^v?pinsr([qdwb])$/)         && ($sz=$1)  ||
+           (opcode->mnemonic() =~ /^vpbroadcast([qdwb])$/)     && ($sz=$1)  ||
+           (opcode->mnemonic() =~ /^vinsert[fi]128$/)          && ($sz="x");
 
            if (defined($self->{index})) {
                sprintf "%s[%s%s*%d%s]",$szmap{$sz},
@@ -412,7 +423,7 @@ my %globals;
     }
     sub out {
        my $self = shift;
-       if ($nasm && opcode->mnemonic()=~m/^j/) {
+       if ($nasm && opcode->mnemonic()=~m/^j(?![re]cxz)/) {
            "NEAR ".$self->{value};
        } else {
            $self->{value};
@@ -530,7 +541,7 @@ my %globals;
                                        $v="$current_segment\tENDS\n" if ($current_segment);
                                        $current_segment = ".text\$";
                                        $v.="$current_segment\tSEGMENT ";
-                                       $v.=$masm>=$masmref ? "ALIGN(64)" : "PAGE";
+                                       $v.=$masm>=$masmref ? "ALIGN(256)" : "PAGE";
                                        $v.=" 'CODE'";
                                    }
                                    $self->{value} = $v;
@@ -772,10 +783,64 @@ my $rdrand = sub {
     }
 };
 
+my $rdseed = sub {
+    if (shift =~ /%[er](\w+)/) {
+      my @opcode=();
+      my $dst=$1;
+       if ($dst !~ /[0-9]+/) { $dst = $regrm{"%e$dst"}; }
+       rex(\@opcode,0,$1,8);
+       push @opcode,0x0f,0xc7,0xf8|($dst&7);
+       @opcode;
+    } else {
+       ();
+    }
+};
+
+sub rxb {
+ local *opcode=shift;
+ my ($dst,$src1,$src2,$rxb)=@_;
+
+   $rxb|=0x7<<5;
+   $rxb&=~(0x04<<5) if($dst>=8);
+   $rxb&=~(0x01<<5) if($src1>=8);
+   $rxb&=~(0x02<<5) if($src2>=8);
+   push @opcode,$rxb;
+}
+
+my $vprotd = sub {
+    if (shift =~ /\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) {
+      my @opcode=(0x8f);
+       rxb(\@opcode,$3,$2,-1,0x08);
+       push @opcode,0x78,0xc2;
+       push @opcode,0xc0|($2&7)|(($3&7)<<3);           # ModR/M
+       my $c=$1;
+       push @opcode,$c=~/^0/?oct($c):$c;
+       @opcode;
+    } else {
+       ();
+    }
+};
+
+my $vprotq = sub {
+    if (shift =~ /\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) {
+      my @opcode=(0x8f);
+       rxb(\@opcode,$3,$2,-1,0x08);
+       push @opcode,0x78,0xc3;
+       push @opcode,0xc0|($2&7)|(($3&7)<<3);           # ModR/M
+       my $c=$1;
+       push @opcode,$c=~/^0/?oct($c):$c;
+       @opcode;
+    } else {
+       ();
+    }
+};
+
 if ($nasm) {
     print <<___;
 default        rel
 %define XMMWORD
+%define YMMWORD
+%define ZMMWORD
 ___
 } elsif ($masm) {
     print <<___;
@@ -789,6 +854,7 @@ while($line=<>) {
     $line =~ s|[#!].*$||;      # get rid of asm-style comments...
     $line =~ s|/\*.*\*/||;     # ... and C-style comments...
     $line =~ s|^\s+||;         # ... and skip white spaces in beginning
+    $line =~ s|\s+$||;         # ... and at the end
 
     undef $label;
     undef $opcode;
@@ -837,6 +903,8 @@ while($line=<>) {
                    my $arg = $_->out();
                    # $insn.=$sz compensates for movq, pinsrw, ...
                    if ($arg =~ /^xmm[0-9]+$/) { $insn.=$sz; $sz="x" if(!$sz); last; }
+                   if ($arg =~ /^ymm[0-9]+$/) { $insn.=$sz; $sz="y" if(!$sz); last; }
+                   if ($arg =~ /^zmm[0-9]+$/) { $insn.=$sz; $sz="z" if(!$sz); last; }
                    if ($arg =~ /^mm[0-9]+$/)  { $insn.=$sz; $sz="q" if(!$sz); last; }
                }
                @args = reverse(@args);
index eb543db..cae156a 100644 (file)
@@ -131,6 +131,40 @@ sub ::rdrand
     {  &::generic("rdrand",@_);        }
 }
 
+sub ::rdseed
+{ my ($dst)=@_;
+    if ($dst =~ /(e[a-dsd][ixp])/)
+    {  &::data_byte(0x0f,0xc7,0xf8|$regrm{$dst});      }
+    else
+    {  &::generic("rdrand",@_);        }
+}
+
+sub rxb {
+ local *opcode=shift;
+ my ($dst,$src1,$src2,$rxb)=@_;
+
+   $rxb|=0x7<<5;
+   $rxb&=~(0x04<<5) if($dst>=8);
+   $rxb&=~(0x01<<5) if($src1>=8);
+   $rxb&=~(0x02<<5) if($src2>=8);
+   push @opcode,$rxb;
+}
+
+sub ::vprotd
+{ my $args=join(',',@_);
+    if ($args =~ /xmm([0-7]),xmm([0-7]),([x0-9a-f]+)/)
+    { my @opcode=(0x8f);
+       rxb(\@opcode,$1,$2,-1,0x08);
+       push @opcode,0x78,0xc2;
+       push @opcode,0xc0|($2&7)|(($1&7)<<3);           # ModR/M
+       my $c=$3;
+       push @opcode,$c=~/^0/?oct($c):$c;
+       &::data_byte(@opcode);
+    }
+    else
+    {  &::generic("vprotd",@_);        }
+}
+
 # label management
 $lbdecor="L";          # local label decoration, set by package
 $label="000";
@@ -221,6 +255,8 @@ sub ::asm_init
     $elf=$cpp=$coff=$aout=$macosx=$win32=$netware=$mwerks=$android=0;
     if    (($type eq "elf"))
     {  $elf=1;                 require "x86gas.pl";    }
+    elsif (($type eq "elf-1"))
+    {  $elf=-1;                require "x86gas.pl";    }
     elsif (($type eq "a\.out"))
     {  $aout=1;                require "x86gas.pl";    }
     elsif (($type eq "coff" or $type eq "gaswin"))
@@ -257,4 +293,6 @@ EOF
     &file($filename);
 }
 
+sub ::hidden {}
+
 1;
index 682a3a3..63b2301 100644 (file)
@@ -70,6 +70,8 @@ sub ::DWP
 { my($addr,$reg1,$reg2,$idx)=@_;
   my $ret="";
 
+    if (!defined($idx) && 1*$reg2) { $idx=$reg2; $reg2=$reg1; undef $reg1; }
+
     $addr =~ s/^\s+//;
     # prepend global references with optional underscore
     $addr =~ s/^([^\+\-0-9][^\+\-]*)/&::islabel($1) or "$nmdecor$1"/ige;
@@ -157,7 +159,7 @@ sub ::file_end
        }
     }
     if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) {
-       my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,8";
+       my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,16";
        if ($::macosx)  { push (@out,"$tmp,2\n"); }
        elsif ($::elf)  { push (@out,"$tmp,4\n"); }
        else            { push (@out,"$tmp\n"); }
@@ -170,10 +172,9 @@ sub ::data_short{   push(@out,".value\t".join(',',@_)."\n");  }
 sub ::data_word {   push(@out,".long\t".join(',',@_)."\n");   }
 
 sub ::align
-{ my $val=$_[0],$p2,$i;
+{ my $val=$_[0];
     if ($::aout)
-    {  for ($p2=0;$val!=0;$val>>=1) { $p2++; }
-       $val=$p2-1;
+    {  $val=int(log($val)/log(2));
        $val.=",0x90";
     }
     push(@out,".align\t$val\n");
@@ -195,6 +196,8 @@ sub ::picmeup
            &::mov($dst,&::DWP("$indirect-$reflabel",$base));
            $non_lazy_ptr{"$nmdecor$sym"}=$indirect;
        }
+       elsif ($sym eq "OPENSSL_ia32cap_P" && $::elf>0)
+       {   &::lea($dst,&::DWP("$sym-$reflabel",$base));   }
        else
        {   &::lea($dst,&::DWP("_GLOBAL_OFFSET_TABLE_+[.-$reflabel]",
                            $base));
@@ -250,4 +253,6 @@ ___
 sub ::dataseg
 {   push(@out,".data\n");   }
 
+*::hidden = sub { push(@out,".hidden\t$nmdecor$_[0]\n"); } if ($::elf);
+
 1;
index f937d07..1741342 100644 (file)
@@ -39,6 +39,8 @@ sub get_mem
 { my($size,$addr,$reg1,$reg2,$idx)=@_;
   my($post,$ret);
 
+    if (!defined($idx) && 1*$reg2) { $idx=$reg2; $reg2=$reg1; undef $reg1; }
+
     $ret .= "$size PTR " if ($size ne "");
 
     $addr =~ s/^\s+//;
@@ -133,7 +135,7 @@ ___
     if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out)
     {  my $comm=<<___;
 .bss   SEGMENT 'BSS'
-COMM   ${nmdecor}OPENSSL_ia32cap_P:QWORD
+COMM   ${nmdecor}OPENSSL_ia32cap_P:DWORD:4
 .bss   ENDS
 ___
        # comment out OPENSSL_ia32cap_P declarations
index ca2511c..5d92f60 100644 (file)
@@ -36,6 +36,8 @@ sub get_mem
 { my($size,$addr,$reg1,$reg2,$idx)=@_;
   my($post,$ret);
 
+    if (!defined($idx) && 1*$reg2) { $idx=$reg2; $reg2=$reg1; undef $reg1; }
+
     if ($size ne "")
     {  $ret .= "$size";
        $ret .= " PTR" if ($::mwerks);
@@ -117,7 +119,7 @@ sub ::file_end
 {   if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out)
     {  my $comm=<<___;
 ${drdecor}segment      .bss
-${drdecor}common       ${nmdecor}OPENSSL_ia32cap_P 8
+${drdecor}common       ${nmdecor}OPENSSL_ia32cap_P 16
 ___
        # comment out OPENSSL_ia32cap_P declarations
        grep {s/(^extern\s+${nmdecor}OPENSSL_ia32cap_P)/\;$1/} @out;
index d46eae3..b40ea10 100644 (file)
@@ -171,28 +171,32 @@ ASN1_OCTET_STRING *PKCS12_item_i2d_encrypt(X509_ALGOR *algor,
                                            const char *pass, int passlen,
                                            void *obj, int zbuf)
 {
-    ASN1_OCTET_STRING *oct;
+    ASN1_OCTET_STRING *oct = NULL;
     unsigned char *in = NULL;
     int inlen;
     if (!(oct = M_ASN1_OCTET_STRING_new())) {
         PKCS12err(PKCS12_F_PKCS12_ITEM_I2D_ENCRYPT, ERR_R_MALLOC_FAILURE);
-        return NULL;
+        goto err;
     }
     inlen = ASN1_item_i2d(obj, &in, it);
     if (!in) {
         PKCS12err(PKCS12_F_PKCS12_ITEM_I2D_ENCRYPT, PKCS12_R_ENCODE_ERROR);
-        return NULL;
+        goto err;
     }
     if (!PKCS12_pbe_crypt(algor, pass, passlen, in, inlen, &oct->data,
                           &oct->length, 1)) {
         PKCS12err(PKCS12_F_PKCS12_ITEM_I2D_ENCRYPT, PKCS12_R_ENCRYPT_ERROR);
         OPENSSL_free(in);
-        return NULL;
+        goto err;
     }
     if (zbuf)
         OPENSSL_cleanse(in, inlen);
     OPENSSL_free(in);
     return oct;
+ err:
+    if (oct)
+        ASN1_OCTET_STRING_free(oct);
+    return NULL;
 }
 
 IMPLEMENT_PKCS12_STACK_OF(PKCS7)
index d970f05..861a087 100644 (file)
@@ -76,8 +76,12 @@ X509_SIG *PKCS8_encrypt(int pbe_nid, const EVP_CIPHER *cipher,
 
     if (pbe_nid == -1)
         pbe = PKCS5_pbe2_set(cipher, iter, salt, saltlen);
-    else
+    else if (EVP_PBE_find(EVP_PBE_TYPE_PRF, pbe_nid, NULL, NULL, 0))
+        pbe = PKCS5_pbe2_set_iv(cipher, iter, salt, saltlen, NULL, pbe_nid);
+    else {
+        ERR_clear_error();
         pbe = PKCS5_pbe_set(pbe_nid, iter, salt, saltlen);
+    }
     if (!pbe) {
         PKCS12err(PKCS12_F_PKCS8_ENCRYPT, ERR_R_ASN1_LIB);
         goto err;
diff --git a/crypto/ppc_arch.h b/crypto/ppc_arch.h
new file mode 100644 (file)
index 0000000..b50ec99
--- /dev/null
@@ -0,0 +1,10 @@
+#ifndef __PPC_ARCH_H__
+# define __PPC_ARCH_H__
+
+extern unsigned int OPENSSL_ppccap_P;
+
+# define PPC_FPU64       (1<<0)
+# define PPC_ALTIVEC     (1<<1)
+# define PPC_CRYPTO207   (1<<2)
+
+#endif
index 5242294..2b7f704 100644 (file)
@@ -4,13 +4,15 @@
 #include <setjmp.h>
 #include <signal.h>
 #include <unistd.h>
+#if defined(__linux) || defined(_AIX)
+# include <sys/utsname.h>
+#endif
 #include <crypto.h>
 #include <openssl/bn.h>
 
-#define PPC_FPU64       (1<<0)
-#define PPC_ALTIVEC     (1<<1)
+#include "ppc_arch.h"
 
-static int OPENSSL_ppccap_P = 0;
+unsigned int OPENSSL_ppccap_P = 0;
 
 static sigset_t all_masked;
 
@@ -25,7 +27,7 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                         const BN_ULONG *np, const BN_ULONG *n0, int num);
 
     if (sizeof(size_t) == 4) {
-# if (defined(__APPLE__) && defined(__MACH__))
+# if 1 || (defined(__APPLE__) && defined(__MACH__))
         if (num >= 8 && (num & 3) == 0 && (OPENSSL_ppccap_P & PPC_FPU64))
             return bn_mul_mont_fpu64(rp, ap, bp, np, n0, num);
 # else
@@ -55,6 +57,22 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
 }
 #endif
 
+void sha256_block_p8(void *ctx, const void *inp, size_t len);
+void sha256_block_ppc(void *ctx, const void *inp, size_t len);
+void sha256_block_data_order(void *ctx, const void *inp, size_t len)
+{
+    OPENSSL_ppccap_P & PPC_CRYPTO207 ? sha256_block_p8(ctx, inp, len) :
+        sha256_block_ppc(ctx, inp, len);
+}
+
+void sha512_block_p8(void *ctx, const void *inp, size_t len);
+void sha512_block_ppc(void *ctx, const void *inp, size_t len);
+void sha512_block_data_order(void *ctx, const void *inp, size_t len)
+{
+    OPENSSL_ppccap_P & PPC_CRYPTO207 ? sha512_block_p8(ctx, inp, len) :
+        sha512_block_ppc(ctx, inp, len);
+}
+
 static sigjmp_buf ill_jmp;
 static void ill_handler(int sig)
 {
@@ -63,6 +81,7 @@ static void ill_handler(int sig)
 
 void OPENSSL_ppc64_probe(void);
 void OPENSSL_altivec_probe(void);
+void OPENSSL_crypto207_probe(void);
 
 void OPENSSL_cpuid_setup(void)
 {
@@ -93,12 +112,15 @@ void OPENSSL_cpuid_setup(void)
     OPENSSL_ppccap_P = 0;
 
 #if defined(_AIX)
-    if (sizeof(size_t) == 4
+    if (sizeof(size_t) == 4) {
+        struct utsname uts;
 # if defined(_SC_AIX_KERNEL_BITMODE)
-        && sysconf(_SC_AIX_KERNEL_BITMODE) != 64
+        if (sysconf(_SC_AIX_KERNEL_BITMODE) != 64)
+            return;
 # endif
-        )
-        return;
+        if (uname(&uts) != 0 || atoi(uts.version) < 6)
+            return;
+    }
 #endif
 
     memset(&ill_act, 0, sizeof(ill_act));
@@ -109,10 +131,14 @@ void OPENSSL_cpuid_setup(void)
     sigaction(SIGILL, &ill_act, &ill_oact);
 
     if (sizeof(size_t) == 4) {
-        if (sigsetjmp(ill_jmp, 1) == 0) {
-            OPENSSL_ppc64_probe();
-            OPENSSL_ppccap_P |= PPC_FPU64;
-        }
+#ifdef __linux
+        struct utsname uts;
+        if (uname(&uts) == 0 && strcmp(uts.machine, "ppc64") == 0)
+#endif
+            if (sigsetjmp(ill_jmp, 1) == 0) {
+                OPENSSL_ppc64_probe();
+                OPENSSL_ppccap_P |= PPC_FPU64;
+            }
     } else {
         /*
          * Wanted code detecting POWER6 CPU and setting PPC_FPU64
@@ -122,6 +148,10 @@ void OPENSSL_cpuid_setup(void)
     if (sigsetjmp(ill_jmp, 1) == 0) {
         OPENSSL_altivec_probe();
         OPENSSL_ppccap_P |= PPC_ALTIVEC;
+        if (sigsetjmp(ill_jmp, 1) == 0) {
+            OPENSSL_crypto207_probe();
+            OPENSSL_ppccap_P |= PPC_CRYPTO207;
+        }
     }
 
     sigaction(SIGILL, &ill_oact, NULL);
index 4ba736a..8d800fe 100755 (executable)
@@ -31,6 +31,7 @@ $code=<<___;
        blr
        .long   0
        .byte   0,12,0x14,0,0,0,0,0
+.size  .OPENSSL_ppc64_probe,.-.OPENSSL_ppc64_probe
 
 .globl .OPENSSL_altivec_probe
 .align 4
@@ -39,6 +40,17 @@ $code=<<___;
        blr
        .long   0
        .byte   0,12,0x14,0,0,0,0,0
+.size  .OPENSSL_altivec_probe,.-..OPENSSL_altivec_probe
+
+.globl .OPENSSL_crypto207_probe
+.align 4
+.OPENSSL_crypto207_probe:
+       lvx_u   v0,0,r1
+       vcipher v0,v0,v0
+       blr
+       .long   0
+       .byte   0,12,0x14,0,0,0,0,0
+.size  .OPENSSL_crypto207_probe,.-.OPENSSL_crypto207_probe
 
 .globl .OPENSSL_wipe_cpu
 .align 4
@@ -71,6 +83,7 @@ $code=<<___;
        blr
        .long   0
        .byte   0,12,0x14,0,0,0,0,0
+.size  .OPENSSL_wipe_cpu,.-.OPENSSL_wipe_cpu
 
 .globl .OPENSSL_atomic_add
 .align 4
@@ -84,6 +97,7 @@ Ladd: lwarx   r5,0,r3
        .long   0
        .byte   0,12,0x14,0,0,0,2,0
        .long   0
+.size  .OPENSSL_atomic_add,.-.OPENSSL_atomic_add
 
 .globl .OPENSSL_rdtsc
 .align 4
@@ -93,6 +107,7 @@ Ladd:        lwarx   r5,0,r3
        blr
        .long   0
        .byte   0,12,0x14,0,0,0,0,0
+.size  .OPENSSL_rdtsc,.-.OPENSSL_rdtsc
 
 .globl .OPENSSL_cleanse
 .align 4
@@ -125,6 +140,7 @@ Laligned:
        .long   0
        .byte   0,12,0x14,0,0,0,2,0
        .long   0
+.size  .OPENSSL_cleanse,.-.OPENSSL_cleanse
 ___
 
 $code =~ s/\`([^\`]*)\`/eval $1/gem;
index 0c616c4..06670ae 100644 (file)
@@ -684,9 +684,7 @@ static void readscreen(void)
 {
 # if !defined(OPENSSL_SYS_WINCE) && !defined(OPENSSL_SYS_WIN32_CYGWIN)
     HDC hScrDC;                 /* screen DC */
-    HDC hMemDC;                 /* memory DC */
     HBITMAP hBitmap;            /* handle for our bitmap */
-    HBITMAP hOldBitmap;         /* handle for previous bitmap */
     BITMAP bm;                  /* bitmap properties */
     unsigned int size;          /* size of bitmap */
     char *bmbits;               /* contents of bitmap */
@@ -694,13 +692,13 @@ static void readscreen(void)
     int h;                      /* screen height */
     int y;                      /* y-coordinate of screen lines to grab */
     int n = 16;                 /* number of screen lines to grab at a time */
+    BITMAPINFOHEADER bi;        /* info about the bitmap */
 
     if (check_winnt() && OPENSSL_isservice() > 0)
         return;
 
-    /* Create a screen DC and a memory DC compatible to screen DC */
-    hScrDC = CreateDC(TEXT("DISPLAY"), NULL, NULL, NULL);
-    hMemDC = CreateCompatibleDC(hScrDC);
+    /* Get a reference to the screen DC */
+    hScrDC = GetDC(NULL);
 
     /* Get screen resolution */
     w = GetDeviceCaps(hScrDC, HORZRES);
@@ -709,24 +707,31 @@ static void readscreen(void)
     /* Create a bitmap compatible with the screen DC */
     hBitmap = CreateCompatibleBitmap(hScrDC, w, n);
 
-    /* Select new bitmap into memory DC */
-    hOldBitmap = SelectObject(hMemDC, hBitmap);
-
     /* Get bitmap properties */
     GetObject(hBitmap, sizeof(BITMAP), (LPSTR) & bm);
     size = (unsigned int)bm.bmWidthBytes * bm.bmHeight * bm.bmPlanes;
 
+    bi.biSize = sizeof(BITMAPINFOHEADER);
+    bi.biWidth = bm.bmWidth;
+    bi.biHeight = bm.bmHeight;
+    bi.biPlanes = bm.bmPlanes;
+    bi.biBitCount = bm.bmBitsPixel;
+    bi.biCompression = BI_RGB;
+    bi.biSizeImage = 0;
+    bi.biXPelsPerMeter = 0;
+    bi.biYPelsPerMeter = 0;
+    bi.biClrUsed = 0;
+    bi.biClrImportant = 0;
+
     bmbits = OPENSSL_malloc(size);
     if (bmbits) {
         /* Now go through the whole screen, repeatedly grabbing n lines */
         for (y = 0; y < h - n; y += n) {
             unsigned char md[MD_DIGEST_LENGTH];
 
-            /* Bitblt screen DC to memory DC */
-            BitBlt(hMemDC, 0, 0, w, n, hScrDC, 0, y, SRCCOPY);
-
-            /* Copy bitmap bits from memory DC to bmbits */
-            GetBitmapBits(hBitmap, size, bmbits);
+            /* Copy the bits of the current line range into the buffer */
+            GetDIBits(hScrDC, hBitmap, y, n,
+                      bmbits, (BITMAPINFO *) & bi, DIB_RGB_COLORS);
 
             /* Get the hash of the bitmap */
             MD(bmbits, size, md);
@@ -738,13 +743,9 @@ static void readscreen(void)
         OPENSSL_free(bmbits);
     }
 
-    /* Select old bitmap back into memory DC */
-    hBitmap = SelectObject(hMemDC, hOldBitmap);
-
     /* Clean up */
     DeleteObject(hBitmap);
-    DeleteDC(hMemDC);
-    DeleteDC(hScrDC);
+    ReleaseDC(NULL, hScrDC);
 # endif                         /* !OPENSSL_SYS_WINCE */
 }
 
index f3d6e0d..7434ff7 100644 (file)
@@ -42,7 +42,7 @@ lib:  $(LIBOBJ)
        @touch lib
 
 rc4-586.s:     asm/rc4-586.pl ../perlasm/x86asm.pl
-       $(PERL) asm/rc4-586.pl $(PERLASM_SCHEME) $(CFLAGS) > $@
+       $(PERL) asm/rc4-586.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@
 
 rc4-x86_64.s: asm/rc4-x86_64.pl
        $(PERL) asm/rc4-x86_64.pl $(PERLASM_SCHEME) > $@
@@ -66,7 +66,7 @@ rc4-ia64.s: rc4-ia64.S
 rc4-%.s:       asm/rc4-%.pl;   $(PERL) $< $(PERLASM_SCHEME) $@
 
 files:
-       $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
+       $(PERL) $(TOP)/util/files.pl "RC4_ENC=$(RC4_ENC)" Makefile >> $(TOP)/MINFO
 
 links:
        @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER)
index 5c9ac6a..1d55d55 100644 (file)
@@ -60,7 +60,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 push(@INC,"${dir}","${dir}../../perlasm");
 require "x86asm.pl";
 
-&asm_init($ARGV[0],"rc4-586.pl");
+&asm_init($ARGV[0],"rc4-586.pl",$x86only = $ARGV[$#ARGV] eq "386");
 
 $xx="eax";
 $yy="ebx";
@@ -184,8 +184,11 @@ if ($alt=0) {
        &and    ($ty,-4);               # how many 4-byte chunks?
        &jz     (&label("loop1"));
 
-       &test   ($ty,-8);
        &mov    (&wparam(3),$out);      # $out as accumulator in these loops
+                                       if ($x86only) {
+       &jmp    (&label("go4loop4"));
+                                       } else {
+       &test   ($ty,-8);
        &jz     (&label("go4loop4"));
 
        &picmeup($out,"OPENSSL_ia32cap_P");
@@ -228,6 +231,7 @@ if ($alt=0) {
        &cmp    ($inp,&wparam(1));      # compare to input+len
        &je     (&label("done"));
        &jmp    (&label("loop1"));
+                                       }
 
 &set_label("go4loop4",16);
        &lea    ($ty,&DWP(-4,$inp,$ty));
index 6ebd54d..0f0a248 100644 (file)
@@ -79,7 +79,7 @@ void RC4(RC4_KEY *key, size_t len, const unsigned char *indata,
     y = key->y;
     d = key->data;
 
-#if defined(RC4_CHUNK)
+#if defined(RC4_CHUNK) && !defined(PEDANTIC)
     /*-
      * The original reason for implementing this(*) was the fact that
      * pre-21164a Alpha CPUs don't have byte load/store instructions
index 1e83f19..ee757e6 100644 (file)
                          *((c)++)=(unsigned char)(((l)>> 8L)&0xff), \
                          *((c)++)=(unsigned char)(((l)     )&0xff))
 
-#if (defined(OPENSSL_SYS_WIN32) && defined(_MSC_VER)) || defined(__ICC)
+#if (defined(OPENSSL_SYS_WIN32) && defined(_MSC_VER))
 # define ROTATE_l32(a,n)     _lrotl(a,n)
 # define ROTATE_r32(a,n)     _lrotr(a,n)
+#elif defined(__ICC)
+# define ROTATE_l32(a,n)     _rotl(a,n)
+# define ROTATE_r32(a,n)     _rotr(a,n)
 #elif defined(__GNUC__) && __GNUC__>=2 && !defined(__STRICT_ANSI__) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) && !defined(PEDANTIC)
 # if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
 #  define ROTATE_l32(a,n)       ({ register unsigned int ret;   \
index 79c7c42..e292e84 100644 (file)
@@ -228,19 +228,20 @@ rsa_pk1.o: ../cryptlib.h rsa_pk1.c
 rsa_pmeth.o: ../../e_os.h ../../include/openssl/asn1.h
 rsa_pmeth.o: ../../include/openssl/asn1t.h ../../include/openssl/bio.h
 rsa_pmeth.o: ../../include/openssl/bn.h ../../include/openssl/buffer.h
-rsa_pmeth.o: ../../include/openssl/cms.h ../../include/openssl/crypto.h
-rsa_pmeth.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
-rsa_pmeth.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h
-rsa_pmeth.o: ../../include/openssl/err.h ../../include/openssl/evp.h
-rsa_pmeth.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
-rsa_pmeth.o: ../../include/openssl/objects.h
+rsa_pmeth.o: ../../include/openssl/cms.h ../../include/openssl/conf.h
+rsa_pmeth.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
+rsa_pmeth.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h
+rsa_pmeth.o: ../../include/openssl/ecdsa.h ../../include/openssl/err.h
+rsa_pmeth.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h
+rsa_pmeth.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
 rsa_pmeth.o: ../../include/openssl/opensslconf.h
 rsa_pmeth.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
 rsa_pmeth.o: ../../include/openssl/pkcs7.h ../../include/openssl/rsa.h
 rsa_pmeth.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
 rsa_pmeth.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
 rsa_pmeth.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h
-rsa_pmeth.o: ../cryptlib.h ../evp/evp_locl.h rsa_locl.h rsa_pmeth.c
+rsa_pmeth.o: ../../include/openssl/x509v3.h ../cryptlib.h ../evp/evp_locl.h
+rsa_pmeth.o: rsa_locl.h rsa_pmeth.c
 rsa_prn.o: ../../e_os.h ../../include/openssl/asn1.h
 rsa_prn.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h
 rsa_prn.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
index a8b59a9..d2ee374 100644 (file)
@@ -262,13 +262,31 @@ struct rsa_st {
                                 EVP_PKEY_CTRL_RSA_KEYGEN_PUBEXP, 0, pubexp)
 
 # define  EVP_PKEY_CTX_set_rsa_mgf1_md(ctx, md)  \
-                EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_RSA, EVP_PKEY_OP_TYPE_SIG,  \
+                EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_RSA, \
+                        EVP_PKEY_OP_TYPE_SIG | EVP_PKEY_OP_TYPE_CRYPT, \
                                 EVP_PKEY_CTRL_RSA_MGF1_MD, 0, (void *)md)
 
+# define  EVP_PKEY_CTX_set_rsa_oaep_md(ctx, md)  \
+                EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_RSA, EVP_PKEY_OP_TYPE_CRYPT,  \
+                                EVP_PKEY_CTRL_RSA_OAEP_MD, 0, (void *)md)
+
 # define  EVP_PKEY_CTX_get_rsa_mgf1_md(ctx, pmd) \
-                EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_RSA, EVP_PKEY_OP_TYPE_SIG,  \
+                EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_RSA, \
+                        EVP_PKEY_OP_TYPE_SIG | EVP_PKEY_OP_TYPE_CRYPT, \
                                 EVP_PKEY_CTRL_GET_RSA_MGF1_MD, 0, (void *)pmd)
 
+# define  EVP_PKEY_CTX_get_rsa_oaep_md(ctx, pmd) \
+                EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_RSA, EVP_PKEY_OP_TYPE_CRYPT,  \
+                                EVP_PKEY_CTRL_GET_RSA_OAEP_MD, 0, (void *)pmd)
+
+# define  EVP_PKEY_CTX_set0_rsa_oaep_label(ctx, l, llen) \
+                EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_RSA, EVP_PKEY_OP_TYPE_CRYPT,  \
+                                EVP_PKEY_CTRL_RSA_OAEP_LABEL, llen, (void *)l)
+
+# define  EVP_PKEY_CTX_get0_rsa_oaep_label(ctx, l)       \
+                EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_RSA, EVP_PKEY_OP_TYPE_CRYPT,  \
+                                EVP_PKEY_CTRL_GET_RSA_OAEP_LABEL, 0, (void *)l)
+
 # define EVP_PKEY_CTRL_RSA_PADDING       (EVP_PKEY_ALG_CTRL + 1)
 # define EVP_PKEY_CTRL_RSA_PSS_SALTLEN   (EVP_PKEY_ALG_CTRL + 2)
 
@@ -280,6 +298,12 @@ struct rsa_st {
 # define EVP_PKEY_CTRL_GET_RSA_PSS_SALTLEN       (EVP_PKEY_ALG_CTRL + 7)
 # define EVP_PKEY_CTRL_GET_RSA_MGF1_MD           (EVP_PKEY_ALG_CTRL + 8)
 
+# define EVP_PKEY_CTRL_RSA_OAEP_MD       (EVP_PKEY_ALG_CTRL + 9)
+# define EVP_PKEY_CTRL_RSA_OAEP_LABEL    (EVP_PKEY_ALG_CTRL + 10)
+
+# define EVP_PKEY_CTRL_GET_RSA_OAEP_MD   (EVP_PKEY_ALG_CTRL + 11)
+# define EVP_PKEY_CTRL_GET_RSA_OAEP_LABEL (EVP_PKEY_ALG_CTRL + 12)
+
 # define RSA_PKCS1_PADDING       1
 # define RSA_SSLV23_PADDING      2
 # define RSA_NO_PADDING          3
@@ -347,6 +371,14 @@ typedef struct rsa_pss_params_st {
 
 DECLARE_ASN1_FUNCTIONS(RSA_PSS_PARAMS)
 
+typedef struct rsa_oaep_params_st {
+    X509_ALGOR *hashFunc;
+    X509_ALGOR *maskGenFunc;
+    X509_ALGOR *pSourceFunc;
+} RSA_OAEP_PARAMS;
+
+DECLARE_ASN1_FUNCTIONS(RSA_OAEP_PARAMS)
+
 # ifndef OPENSSL_NO_FP_API
 int RSA_print_fp(FILE *fp, const RSA *r, int offset);
 # endif
@@ -414,6 +446,15 @@ int RSA_padding_add_PKCS1_OAEP(unsigned char *to, int tlen,
 int RSA_padding_check_PKCS1_OAEP(unsigned char *to, int tlen,
                                  const unsigned char *f, int fl, int rsa_len,
                                  const unsigned char *p, int pl);
+int RSA_padding_add_PKCS1_OAEP_mgf1(unsigned char *to, int tlen,
+                                    const unsigned char *from, int flen,
+                                    const unsigned char *param, int plen,
+                                    const EVP_MD *md, const EVP_MD *mgf1md);
+int RSA_padding_check_PKCS1_OAEP_mgf1(unsigned char *to, int tlen,
+                                      const unsigned char *from, int flen,
+                                      int num, const unsigned char *param,
+                                      int plen, const EVP_MD *md,
+                                      const EVP_MD *mgf1md);
 int RSA_padding_add_SSLv23(unsigned char *to, int tlen,
                            const unsigned char *f, int fl);
 int RSA_padding_check_SSLv23(unsigned char *to, int tlen,
@@ -494,8 +535,10 @@ void ERR_load_RSA_strings(void);
 # define RSA_F_PKEY_RSA_SIGN                              142
 # define RSA_F_PKEY_RSA_VERIFY                            154
 # define RSA_F_PKEY_RSA_VERIFYRECOVER                     141
+# define RSA_F_RSA_ALGOR_TO_MD                            157
 # define RSA_F_RSA_BUILTIN_KEYGEN                         129
 # define RSA_F_RSA_CHECK_KEY                              123
+# define RSA_F_RSA_CMS_DECRYPT                            158
 # define RSA_F_RSA_EAY_PRIVATE_DECRYPT                    101
 # define RSA_F_RSA_EAY_PRIVATE_ENCRYPT                    102
 # define RSA_F_RSA_EAY_PUBLIC_DECRYPT                     103
@@ -504,6 +547,7 @@ void ERR_load_RSA_strings(void);
 # define RSA_F_RSA_GENERATE_KEY_EX                        155
 # define RSA_F_RSA_ITEM_VERIFY                            156
 # define RSA_F_RSA_MEMORY_LOCK                            130
+# define RSA_F_RSA_MGF1_TO_MD                             159
 # define RSA_F_RSA_NEW_METHOD                             106
 # define RSA_F_RSA_NULL                                   124
 # define RSA_F_RSA_NULL_MOD_EXP                           131
@@ -513,6 +557,7 @@ void ERR_load_RSA_strings(void);
 # define RSA_F_RSA_NULL_PUBLIC_ENCRYPT                    135
 # define RSA_F_RSA_PADDING_ADD_NONE                       107
 # define RSA_F_RSA_PADDING_ADD_PKCS1_OAEP                 121
+# define RSA_F_RSA_PADDING_ADD_PKCS1_OAEP_MGF1            160
 # define RSA_F_RSA_PADDING_ADD_PKCS1_PSS                  125
 # define RSA_F_RSA_PADDING_ADD_PKCS1_PSS_MGF1             148
 # define RSA_F_RSA_PADDING_ADD_PKCS1_TYPE_1               108
@@ -521,6 +566,7 @@ void ERR_load_RSA_strings(void);
 # define RSA_F_RSA_PADDING_ADD_X931                       127
 # define RSA_F_RSA_PADDING_CHECK_NONE                     111
 # define RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP               122
+# define RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP_MGF1          161
 # define RSA_F_RSA_PADDING_CHECK_PKCS1_TYPE_1             112
 # define RSA_F_RSA_PADDING_CHECK_PKCS1_TYPE_2             113
 # define RSA_F_RSA_PADDING_CHECK_SSLV23                   114
@@ -531,6 +577,7 @@ void ERR_load_RSA_strings(void);
 # define RSA_F_RSA_PRIVATE_ENCRYPT                        151
 # define RSA_F_RSA_PRIV_DECODE                            137
 # define RSA_F_RSA_PRIV_ENCODE                            138
+# define RSA_F_RSA_PSS_TO_CTX                             162
 # define RSA_F_RSA_PUBLIC_DECRYPT                         152
 # define RSA_F_RSA_PUBLIC_ENCRYPT                         153
 # define RSA_F_RSA_PUB_DECODE                             139
@@ -556,17 +603,21 @@ void ERR_load_RSA_strings(void);
 # define RSA_R_DATA_TOO_LARGE_FOR_MODULUS                 132
 # define RSA_R_DATA_TOO_SMALL                             111
 # define RSA_R_DATA_TOO_SMALL_FOR_KEY_SIZE                122
+# define RSA_R_DIGEST_DOES_NOT_MATCH                      166
 # define RSA_R_DIGEST_TOO_BIG_FOR_RSA_KEY                 112
 # define RSA_R_DMP1_NOT_CONGRUENT_TO_D                    124
 # define RSA_R_DMQ1_NOT_CONGRUENT_TO_D                    125
 # define RSA_R_D_E_NOT_CONGRUENT_TO_1                     123
 # define RSA_R_FIRST_OCTET_INVALID                        133
 # define RSA_R_ILLEGAL_OR_UNSUPPORTED_PADDING_MODE        144
+# define RSA_R_INVALID_DIGEST                             160
 # define RSA_R_INVALID_DIGEST_LENGTH                      143
 # define RSA_R_INVALID_HEADER                             137
 # define RSA_R_INVALID_KEYBITS                            145
+# define RSA_R_INVALID_LABEL                              161
 # define RSA_R_INVALID_MESSAGE_LENGTH                     131
 # define RSA_R_INVALID_MGF1_MD                            156
+# define RSA_R_INVALID_OAEP_PARAMETERS                    162
 # define RSA_R_INVALID_PADDING                            138
 # define RSA_R_INVALID_PADDING_MODE                       141
 # define RSA_R_INVALID_PSS_PARAMETERS                     149
@@ -595,9 +646,12 @@ void ERR_load_RSA_strings(void);
 # define RSA_R_SSLV3_ROLLBACK_ATTACK                      115
 # define RSA_R_THE_ASN1_OBJECT_IDENTIFIER_IS_NOT_KNOWN_FOR_THIS_MD 116
 # define RSA_R_UNKNOWN_ALGORITHM_TYPE                     117
+# define RSA_R_UNKNOWN_DIGEST                             163
 # define RSA_R_UNKNOWN_MASK_DIGEST                        151
 # define RSA_R_UNKNOWN_PADDING_TYPE                       118
 # define RSA_R_UNKNOWN_PSS_DIGEST                         152
+# define RSA_R_UNSUPPORTED_ENCRYPTION_TYPE                164
+# define RSA_R_UNSUPPORTED_LABEL_SOURCE                   165
 # define RSA_R_UNSUPPORTED_MASK_ALGORITHM                 153
 # define RSA_R_UNSUPPORTED_MASK_PARAMETER                 154
 # define RSA_R_UNSUPPORTED_SIGNATURE_TYPE                 155
index 93e071d..ca3922e 100644 (file)
 #endif
 #include "asn1_locl.h"
 
+static int rsa_cms_sign(CMS_SignerInfo *si);
+static int rsa_cms_verify(CMS_SignerInfo *si);
+static int rsa_cms_decrypt(CMS_RecipientInfo *ri);
+static int rsa_cms_encrypt(CMS_RecipientInfo *ri);
+
 static int rsa_pub_encode(X509_PUBKEY *pk, const EVP_PKEY *pkey)
 {
     unsigned char *penc = NULL;
@@ -258,6 +263,23 @@ static int rsa_priv_print(BIO *bp, const EVP_PKEY *pkey, int indent,
     return do_rsa_print(bp, pkey->pkey.rsa, indent, 1);
 }
 
+/* Given an MGF1 Algorithm ID decode to an Algorithm Identifier */
+static X509_ALGOR *rsa_mgf1_decode(X509_ALGOR *alg)
+{
+    const unsigned char *p;
+    int plen;
+    if (alg == NULL)
+        return NULL;
+    if (OBJ_obj2nid(alg->algorithm) != NID_mgf1)
+        return NULL;
+    if (alg->parameter->type != V_ASN1_SEQUENCE)
+        return NULL;
+
+    p = alg->parameter->value.sequence->data;
+    plen = alg->parameter->value.sequence->length;
+    return d2i_X509_ALGOR(NULL, &p, plen);
+}
+
 static RSA_PSS_PARAMS *rsa_pss_decode(const X509_ALGOR *alg,
                                       X509_ALGOR **pmaskHash)
 {
@@ -276,15 +298,7 @@ static RSA_PSS_PARAMS *rsa_pss_decode(const X509_ALGOR *alg,
     if (!pss)
         return NULL;
 
-    if (pss->maskGenAlgorithm) {
-        ASN1_TYPE *param = pss->maskGenAlgorithm->parameter;
-        if (OBJ_obj2nid(pss->maskGenAlgorithm->algorithm) == NID_mgf1
-            && param->type == V_ASN1_SEQUENCE) {
-            p = param->value.sequence->data;
-            plen = param->value.sequence->length;
-            *pmaskHash = d2i_X509_ALGOR(NULL, &p, plen);
-        }
-    }
+    *pmaskHash = rsa_mgf1_decode(pss->maskGenAlgorithm);
 
     return pss;
 }
@@ -401,17 +415,25 @@ static int rsa_pkey_ctrl(EVP_PKEY *pkey, int op, long arg1, void *arg2)
 #ifndef OPENSSL_NO_CMS
     case ASN1_PKEY_CTRL_CMS_SIGN:
         if (arg1 == 0)
-            CMS_SignerInfo_get0_algs(arg2, NULL, NULL, NULL, &alg);
+            return rsa_cms_sign(arg2);
+        else if (arg1 == 1)
+            return rsa_cms_verify(arg2);
         break;
 
     case ASN1_PKEY_CTRL_CMS_ENVELOPE:
         if (arg1 == 0)
-            CMS_RecipientInfo_ktri_get0_algs(arg2, NULL, NULL, &alg);
+            return rsa_cms_encrypt(arg2);
+        else if (arg1 == 1)
+            return rsa_cms_decrypt(arg2);
         break;
+
+    case ASN1_PKEY_CTRL_CMS_RI_TYPE:
+        *(int *)arg2 = CMS_RECIPINFO_TRANS;
+        return 1;
 #endif
 
     case ASN1_PKEY_CTRL_DEFAULT_MD_NID:
-        *(int *)arg2 = NID_sha1;
+        *(int *)arg2 = NID_sha256;
         return 1;
 
     default:
@@ -426,59 +448,166 @@ static int rsa_pkey_ctrl(EVP_PKEY *pkey, int op, long arg1, void *arg2)
 
 }
 
+/* allocate and set algorithm ID from EVP_MD, default SHA1 */
+static int rsa_md_to_algor(X509_ALGOR **palg, const EVP_MD *md)
+{
+    if (EVP_MD_type(md) == NID_sha1)
+        return 1;
+    *palg = X509_ALGOR_new();
+    if (!*palg)
+        return 0;
+    X509_ALGOR_set_md(*palg, md);
+    return 1;
+}
+
+/* Allocate and set MGF1 algorithm ID from EVP_MD */
+static int rsa_md_to_mgf1(X509_ALGOR **palg, const EVP_MD *mgf1md)
+{
+    X509_ALGOR *algtmp = NULL;
+    ASN1_STRING *stmp = NULL;
+    *palg = NULL;
+    if (EVP_MD_type(mgf1md) == NID_sha1)
+        return 1;
+    /* need to embed algorithm ID inside another */
+    if (!rsa_md_to_algor(&algtmp, mgf1md))
+        goto err;
+    if (!ASN1_item_pack(algtmp, ASN1_ITEM_rptr(X509_ALGOR), &stmp))
+         goto err;
+    *palg = X509_ALGOR_new();
+    if (!*palg)
+        goto err;
+    X509_ALGOR_set0(*palg, OBJ_nid2obj(NID_mgf1), V_ASN1_SEQUENCE, stmp);
+    stmp = NULL;
+ err:
+    if (stmp)
+        ASN1_STRING_free(stmp);
+    if (algtmp)
+        X509_ALGOR_free(algtmp);
+    if (*palg)
+        return 1;
+    return 0;
+}
+
+/* convert algorithm ID to EVP_MD, default SHA1 */
+static const EVP_MD *rsa_algor_to_md(X509_ALGOR *alg)
+{
+    const EVP_MD *md;
+    if (!alg)
+        return EVP_sha1();
+    md = EVP_get_digestbyobj(alg->algorithm);
+    if (md == NULL)
+        RSAerr(RSA_F_RSA_ALGOR_TO_MD, RSA_R_UNKNOWN_DIGEST);
+    return md;
+}
+
+/* convert MGF1 algorithm ID to EVP_MD, default SHA1 */
+static const EVP_MD *rsa_mgf1_to_md(X509_ALGOR *alg, X509_ALGOR *maskHash)
+{
+    const EVP_MD *md;
+    if (!alg)
+        return EVP_sha1();
+    /* Check mask and lookup mask hash algorithm */
+    if (OBJ_obj2nid(alg->algorithm) != NID_mgf1) {
+        RSAerr(RSA_F_RSA_MGF1_TO_MD, RSA_R_UNSUPPORTED_MASK_ALGORITHM);
+        return NULL;
+    }
+    if (!maskHash) {
+        RSAerr(RSA_F_RSA_MGF1_TO_MD, RSA_R_UNSUPPORTED_MASK_PARAMETER);
+        return NULL;
+    }
+    md = EVP_get_digestbyobj(maskHash->algorithm);
+    if (md == NULL) {
+        RSAerr(RSA_F_RSA_MGF1_TO_MD, RSA_R_UNKNOWN_MASK_DIGEST);
+        return NULL;
+    }
+    return md;
+}
+
 /*
- * Customised RSA item verification routine. This is called when a signature
- * is encountered requiring special handling. We currently only handle PSS.
+ * Convert EVP_PKEY_CTX is PSS mode into corresponding algorithm parameter,
+ * suitable for setting an AlgorithmIdentifier.
  */
 
-static int rsa_item_verify(EVP_MD_CTX *ctx, const ASN1_ITEM *it, void *asn,
-                           X509_ALGOR *sigalg, ASN1_BIT_STRING *sig,
-                           EVP_PKEY *pkey)
+static ASN1_STRING *rsa_ctx_to_pss(EVP_PKEY_CTX *pkctx)
+{
+    const EVP_MD *sigmd, *mgf1md;
+    RSA_PSS_PARAMS *pss = NULL;
+    ASN1_STRING *os = NULL;
+    EVP_PKEY *pk = EVP_PKEY_CTX_get0_pkey(pkctx);
+    int saltlen, rv = 0;
+    if (EVP_PKEY_CTX_get_signature_md(pkctx, &sigmd) <= 0)
+        goto err;
+    if (EVP_PKEY_CTX_get_rsa_mgf1_md(pkctx, &mgf1md) <= 0)
+        goto err;
+    if (!EVP_PKEY_CTX_get_rsa_pss_saltlen(pkctx, &saltlen))
+        goto err;
+    if (saltlen == -1)
+        saltlen = EVP_MD_size(sigmd);
+    else if (saltlen == -2) {
+        saltlen = EVP_PKEY_size(pk) - EVP_MD_size(sigmd) - 2;
+        if (((EVP_PKEY_bits(pk) - 1) & 0x7) == 0)
+            saltlen--;
+    }
+    pss = RSA_PSS_PARAMS_new();
+    if (!pss)
+        goto err;
+    if (saltlen != 20) {
+        pss->saltLength = ASN1_INTEGER_new();
+        if (!pss->saltLength)
+            goto err;
+        if (!ASN1_INTEGER_set(pss->saltLength, saltlen))
+            goto err;
+    }
+    if (!rsa_md_to_algor(&pss->hashAlgorithm, sigmd))
+        goto err;
+    if (!rsa_md_to_mgf1(&pss->maskGenAlgorithm, mgf1md))
+        goto err;
+    /* Finally create string with pss parameter encoding. */
+    if (!ASN1_item_pack(pss, ASN1_ITEM_rptr(RSA_PSS_PARAMS), &os))
+         goto err;
+    rv = 1;
+ err:
+    if (pss)
+        RSA_PSS_PARAMS_free(pss);
+    if (rv)
+        return os;
+    if (os)
+        ASN1_STRING_free(os);
+    return NULL;
+}
+
+/*
+ * From PSS AlgorithmIdentifier set public key parameters. If pkey isn't NULL
+ * then the EVP_MD_CTX is setup and initalised. If it is NULL parameters are
+ * passed to pkctx instead.
+ */
+
+static int rsa_pss_to_ctx(EVP_MD_CTX *ctx, EVP_PKEY_CTX *pkctx,
+                          X509_ALGOR *sigalg, EVP_PKEY *pkey)
 {
     int rv = -1;
     int saltlen;
     const EVP_MD *mgf1md = NULL, *md = NULL;
     RSA_PSS_PARAMS *pss;
     X509_ALGOR *maskHash;
-    EVP_PKEY_CTX *pkctx;
     /* Sanity check: make sure it is PSS */
     if (OBJ_obj2nid(sigalg->algorithm) != NID_rsassaPss) {
-        RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_UNSUPPORTED_SIGNATURE_TYPE);
+        RSAerr(RSA_F_RSA_PSS_TO_CTX, RSA_R_UNSUPPORTED_SIGNATURE_TYPE);
         return -1;
     }
     /* Decode PSS parameters */
     pss = rsa_pss_decode(sigalg, &maskHash);
 
     if (pss == NULL) {
-        RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_INVALID_PSS_PARAMETERS);
+        RSAerr(RSA_F_RSA_PSS_TO_CTX, RSA_R_INVALID_PSS_PARAMETERS);
         goto err;
     }
-    /* Check mask and lookup mask hash algorithm */
-    if (pss->maskGenAlgorithm) {
-        if (OBJ_obj2nid(pss->maskGenAlgorithm->algorithm) != NID_mgf1) {
-            RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_UNSUPPORTED_MASK_ALGORITHM);
-            goto err;
-        }
-        if (!maskHash) {
-            RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_UNSUPPORTED_MASK_PARAMETER);
-            goto err;
-        }
-        mgf1md = EVP_get_digestbyobj(maskHash->algorithm);
-        if (mgf1md == NULL) {
-            RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_UNKNOWN_MASK_DIGEST);
-            goto err;
-        }
-    } else
-        mgf1md = EVP_sha1();
-
-    if (pss->hashAlgorithm) {
-        md = EVP_get_digestbyobj(pss->hashAlgorithm->algorithm);
-        if (md == NULL) {
-            RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_UNKNOWN_PSS_DIGEST);
-            goto err;
-        }
-    } else
-        md = EVP_sha1();
+    mgf1md = rsa_mgf1_to_md(pss->maskGenAlgorithm, maskHash);
+    if (!mgf1md)
+        goto err;
+    md = rsa_algor_to_md(pss->hashAlgorithm);
+    if (!md)
+        goto err;
 
     if (pss->saltLength) {
         saltlen = ASN1_INTEGER_get(pss->saltLength);
@@ -488,7 +617,7 @@ static int rsa_item_verify(EVP_MD_CTX *ctx, const ASN1_ITEM *it, void *asn,
          * routines will trap other invalid values anyway.
          */
         if (saltlen < 0) {
-            RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_INVALID_SALT_LENGTH);
+            RSAerr(RSA_F_RSA_PSS_TO_CTX, RSA_R_INVALID_SALT_LENGTH);
             goto err;
         }
     } else
@@ -499,14 +628,24 @@ static int rsa_item_verify(EVP_MD_CTX *ctx, const ASN1_ITEM *it, void *asn,
      * PKCS#1 says we should reject any other value anyway.
      */
     if (pss->trailerField && ASN1_INTEGER_get(pss->trailerField) != 1) {
-        RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_INVALID_TRAILER);
+        RSAerr(RSA_F_RSA_PSS_TO_CTX, RSA_R_INVALID_TRAILER);
         goto err;
     }
 
     /* We have all parameters now set up context */
 
-    if (!EVP_DigestVerifyInit(ctx, &pkctx, md, NULL, pkey))
-        goto err;
+    if (pkey) {
+        if (!EVP_DigestVerifyInit(ctx, &pkctx, md, NULL, pkey))
+            goto err;
+    } else {
+        const EVP_MD *checkmd;
+        if (EVP_PKEY_CTX_get_signature_md(pkctx, &checkmd) <= 0)
+            goto err;
+        if (EVP_MD_type(md) != EVP_MD_type(checkmd)) {
+            RSAerr(RSA_F_RSA_PSS_TO_CTX, RSA_R_DIGEST_DOES_NOT_MATCH);
+            goto err;
+        }
+    }
 
     if (EVP_PKEY_CTX_set_rsa_padding(pkctx, RSA_PKCS1_PSS_PADDING) <= 0)
         goto err;
@@ -517,7 +656,7 @@ static int rsa_item_verify(EVP_MD_CTX *ctx, const ASN1_ITEM *it, void *asn,
     if (EVP_PKEY_CTX_set_rsa_mgf1_md(pkctx, mgf1md) <= 0)
         goto err;
     /* Carry on */
-    rv = 2;
+    rv = 1;
 
  err:
     RSA_PSS_PARAMS_free(pss);
@@ -526,6 +665,71 @@ static int rsa_item_verify(EVP_MD_CTX *ctx, const ASN1_ITEM *it, void *asn,
     return rv;
 }
 
+static int rsa_cms_verify(CMS_SignerInfo *si)
+{
+    int nid, nid2;
+    X509_ALGOR *alg;
+    EVP_PKEY_CTX *pkctx = CMS_SignerInfo_get0_pkey_ctx(si);
+    CMS_SignerInfo_get0_algs(si, NULL, NULL, NULL, &alg);
+    nid = OBJ_obj2nid(alg->algorithm);
+    if (nid == NID_rsaEncryption)
+        return 1;
+    if (nid == NID_rsassaPss)
+        return rsa_pss_to_ctx(NULL, pkctx, alg, NULL);
+    /* Workaround for some implementation that use a signature OID */
+    if (OBJ_find_sigid_algs(nid, NULL, &nid2)) {
+        if (nid2 == NID_rsaEncryption)
+            return 1;
+    }
+    return 0;
+}
+
+/*
+ * Customised RSA item verification routine. This is called when a signature
+ * is encountered requiring special handling. We currently only handle PSS.
+ */
+
+static int rsa_item_verify(EVP_MD_CTX *ctx, const ASN1_ITEM *it, void *asn,
+                           X509_ALGOR *sigalg, ASN1_BIT_STRING *sig,
+                           EVP_PKEY *pkey)
+{
+    /* Sanity check: make sure it is PSS */
+    if (OBJ_obj2nid(sigalg->algorithm) != NID_rsassaPss) {
+        RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_UNSUPPORTED_SIGNATURE_TYPE);
+        return -1;
+    }
+    if (rsa_pss_to_ctx(ctx, NULL, sigalg, pkey) > 0) {
+        /* Carry on */
+        return 2;
+    }
+    return -1;
+}
+
+static int rsa_cms_sign(CMS_SignerInfo *si)
+{
+    int pad_mode = RSA_PKCS1_PADDING;
+    X509_ALGOR *alg;
+    EVP_PKEY_CTX *pkctx = CMS_SignerInfo_get0_pkey_ctx(si);
+    ASN1_STRING *os = NULL;
+    CMS_SignerInfo_get0_algs(si, NULL, NULL, NULL, &alg);
+    if (pkctx) {
+        if (EVP_PKEY_CTX_get_rsa_padding(pkctx, &pad_mode) <= 0)
+            return 0;
+    }
+    if (pad_mode == RSA_PKCS1_PADDING) {
+        X509_ALGOR_set0(alg, OBJ_nid2obj(NID_rsaEncryption), V_ASN1_NULL, 0);
+        return 1;
+    }
+    /* We don't support it */
+    if (pad_mode != RSA_PKCS1_PSS_PADDING)
+        return 0;
+    os = rsa_ctx_to_pss(pkctx);
+    if (!os)
+        return 0;
+    X509_ALGOR_set0(alg, OBJ_nid2obj(NID_rsassaPss), V_ASN1_SEQUENCE, os);
+    return 1;
+}
+
 static int rsa_item_sign(EVP_MD_CTX *ctx, const ASN1_ITEM *it, void *asn,
                          X509_ALGOR *alg1, X509_ALGOR *alg2,
                          ASN1_BIT_STRING *sig)
@@ -537,78 +741,184 @@ static int rsa_item_sign(EVP_MD_CTX *ctx, const ASN1_ITEM *it, void *asn,
     if (pad_mode == RSA_PKCS1_PADDING)
         return 2;
     if (pad_mode == RSA_PKCS1_PSS_PADDING) {
-        const EVP_MD *sigmd, *mgf1md;
-        RSA_PSS_PARAMS *pss = NULL;
-        X509_ALGOR *mgf1alg = NULL;
-        ASN1_STRING *os1 = NULL, *os2 = NULL;
-        EVP_PKEY *pk = EVP_PKEY_CTX_get0_pkey(pkctx);
-        int saltlen, rv = 0;
-        sigmd = EVP_MD_CTX_md(ctx);
-        if (EVP_PKEY_CTX_get_rsa_mgf1_md(pkctx, &mgf1md) <= 0)
-            goto err;
-        if (!EVP_PKEY_CTX_get_rsa_pss_saltlen(pkctx, &saltlen))
-            goto err;
-        if (saltlen == -1)
-            saltlen = EVP_MD_size(sigmd);
-        else if (saltlen == -2) {
-            saltlen = EVP_PKEY_size(pk) - EVP_MD_size(sigmd) - 2;
-            if (((EVP_PKEY_bits(pk) - 1) & 0x7) == 0)
-                saltlen--;
-        }
-        pss = RSA_PSS_PARAMS_new();
-        if (!pss)
-            goto err;
-        if (saltlen != 20) {
-            pss->saltLength = ASN1_INTEGER_new();
-            if (!pss->saltLength)
-                goto err;
-            if (!ASN1_INTEGER_set(pss->saltLength, saltlen))
-                goto err;
-        }
-        if (EVP_MD_type(sigmd) != NID_sha1) {
-            pss->hashAlgorithm = X509_ALGOR_new();
-            if (!pss->hashAlgorithm)
-                goto err;
-            X509_ALGOR_set_md(pss->hashAlgorithm, sigmd);
-        }
-        if (EVP_MD_type(mgf1md) != NID_sha1) {
-            ASN1_STRING *stmp = NULL;
-            /* need to embed algorithm ID inside another */
-            mgf1alg = X509_ALGOR_new();
-            X509_ALGOR_set_md(mgf1alg, mgf1md);
-            if (!ASN1_item_pack(mgf1alg, ASN1_ITEM_rptr(X509_ALGOR), &stmp))
-                 goto err;
-            pss->maskGenAlgorithm = X509_ALGOR_new();
-            if (!pss->maskGenAlgorithm)
-                goto err;
-            X509_ALGOR_set0(pss->maskGenAlgorithm,
-                            OBJ_nid2obj(NID_mgf1), V_ASN1_SEQUENCE, stmp);
-        }
-        /* Finally create string with pss parameter encoding. */
-        if (!ASN1_item_pack(pss, ASN1_ITEM_rptr(RSA_PSS_PARAMS), &os1))
-             goto err;
+        ASN1_STRING *os1 = NULL;
+        os1 = rsa_ctx_to_pss(pkctx);
+        if (!os1)
+            return 0;
+        /* Duplicate parameters if we have to */
         if (alg2) {
-            os2 = ASN1_STRING_dup(os1);
-            if (!os2)
-                goto err;
+            ASN1_STRING *os2 = ASN1_STRING_dup(os1);
+            if (!os2) {
+                ASN1_STRING_free(os1);
+                return 0;
+            }
             X509_ALGOR_set0(alg2, OBJ_nid2obj(NID_rsassaPss),
                             V_ASN1_SEQUENCE, os2);
         }
         X509_ALGOR_set0(alg1, OBJ_nid2obj(NID_rsassaPss),
                         V_ASN1_SEQUENCE, os1);
-        os1 = os2 = NULL;
-        rv = 3;
+        return 3;
+    }
+    return 2;
+}
+
+static RSA_OAEP_PARAMS *rsa_oaep_decode(const X509_ALGOR *alg,
+                                        X509_ALGOR **pmaskHash)
+{
+    const unsigned char *p;
+    int plen;
+    RSA_OAEP_PARAMS *pss;
+
+    *pmaskHash = NULL;
+
+    if (!alg->parameter || alg->parameter->type != V_ASN1_SEQUENCE)
+        return NULL;
+    p = alg->parameter->value.sequence->data;
+    plen = alg->parameter->value.sequence->length;
+    pss = d2i_RSA_OAEP_PARAMS(NULL, &p, plen);
+
+    if (!pss)
+        return NULL;
+
+    *pmaskHash = rsa_mgf1_decode(pss->maskGenFunc);
+
+    return pss;
+}
+
+static int rsa_cms_decrypt(CMS_RecipientInfo *ri)
+{
+    EVP_PKEY_CTX *pkctx;
+    X509_ALGOR *cmsalg;
+    int nid;
+    int rv = -1;
+    unsigned char *label = NULL;
+    int labellen = 0;
+    const EVP_MD *mgf1md = NULL, *md = NULL;
+    RSA_OAEP_PARAMS *oaep;
+    X509_ALGOR *maskHash;
+    pkctx = CMS_RecipientInfo_get0_pkey_ctx(ri);
+    if (!pkctx)
+        return 0;
+    if (!CMS_RecipientInfo_ktri_get0_algs(ri, NULL, NULL, &cmsalg))
+        return -1;
+    nid = OBJ_obj2nid(cmsalg->algorithm);
+    if (nid == NID_rsaEncryption)
+        return 1;
+    if (nid != NID_rsaesOaep) {
+        RSAerr(RSA_F_RSA_CMS_DECRYPT, RSA_R_UNSUPPORTED_ENCRYPTION_TYPE);
+        return -1;
+    }
+    /* Decode OAEP parameters */
+    oaep = rsa_oaep_decode(cmsalg, &maskHash);
+
+    if (oaep == NULL) {
+        RSAerr(RSA_F_RSA_CMS_DECRYPT, RSA_R_INVALID_OAEP_PARAMETERS);
+        goto err;
+    }
+
+    mgf1md = rsa_mgf1_to_md(oaep->maskGenFunc, maskHash);
+    if (!mgf1md)
+        goto err;
+    md = rsa_algor_to_md(oaep->hashFunc);
+    if (!md)
+        goto err;
+
+    if (oaep->pSourceFunc) {
+        X509_ALGOR *plab = oaep->pSourceFunc;
+        if (OBJ_obj2nid(plab->algorithm) != NID_pSpecified) {
+            RSAerr(RSA_F_RSA_CMS_DECRYPT, RSA_R_UNSUPPORTED_LABEL_SOURCE);
+            goto err;
+        }
+        if (plab->parameter->type != V_ASN1_OCTET_STRING) {
+            RSAerr(RSA_F_RSA_CMS_DECRYPT, RSA_R_INVALID_LABEL);
+            goto err;
+        }
+
+        label = plab->parameter->value.octet_string->data;
+        /* Stop label being freed when OAEP parameters are freed */
+        plab->parameter->value.octet_string->data = NULL;
+        labellen = plab->parameter->value.octet_string->length;
+    }
+
+    if (EVP_PKEY_CTX_set_rsa_padding(pkctx, RSA_PKCS1_OAEP_PADDING) <= 0)
+        goto err;
+    if (EVP_PKEY_CTX_set_rsa_oaep_md(pkctx, md) <= 0)
+        goto err;
+    if (EVP_PKEY_CTX_set_rsa_mgf1_md(pkctx, mgf1md) <= 0)
+        goto err;
+    if (EVP_PKEY_CTX_set0_rsa_oaep_label(pkctx, label, labellen) <= 0)
+        goto err;
+    /* Carry on */
+    rv = 1;
+
  err:
-        if (mgf1alg)
-            X509_ALGOR_free(mgf1alg);
-        if (pss)
-            RSA_PSS_PARAMS_free(pss);
-        if (os1)
-            ASN1_STRING_free(os1);
-        return rv;
+    RSA_OAEP_PARAMS_free(oaep);
+    if (maskHash)
+        X509_ALGOR_free(maskHash);
+    return rv;
+}
 
+static int rsa_cms_encrypt(CMS_RecipientInfo *ri)
+{
+    const EVP_MD *md, *mgf1md;
+    RSA_OAEP_PARAMS *oaep = NULL;
+    ASN1_STRING *os = NULL;
+    X509_ALGOR *alg;
+    EVP_PKEY_CTX *pkctx = CMS_RecipientInfo_get0_pkey_ctx(ri);
+    int pad_mode = RSA_PKCS1_PADDING, rv = 0, labellen;
+    unsigned char *label;
+    CMS_RecipientInfo_ktri_get0_algs(ri, NULL, NULL, &alg);
+    if (pkctx) {
+        if (EVP_PKEY_CTX_get_rsa_padding(pkctx, &pad_mode) <= 0)
+            return 0;
     }
-    return 2;
+    if (pad_mode == RSA_PKCS1_PADDING) {
+        X509_ALGOR_set0(alg, OBJ_nid2obj(NID_rsaEncryption), V_ASN1_NULL, 0);
+        return 1;
+    }
+    /* Not supported */
+    if (pad_mode != RSA_PKCS1_OAEP_PADDING)
+        return 0;
+    if (EVP_PKEY_CTX_get_rsa_oaep_md(pkctx, &md) <= 0)
+        goto err;
+    if (EVP_PKEY_CTX_get_rsa_mgf1_md(pkctx, &mgf1md) <= 0)
+        goto err;
+    labellen = EVP_PKEY_CTX_get0_rsa_oaep_label(pkctx, &label);
+    if (labellen < 0)
+        goto err;
+    oaep = RSA_OAEP_PARAMS_new();
+    if (!oaep)
+        goto err;
+    if (!rsa_md_to_algor(&oaep->hashFunc, md))
+        goto err;
+    if (!rsa_md_to_mgf1(&oaep->maskGenFunc, mgf1md))
+        goto err;
+    if (labellen > 0) {
+        ASN1_OCTET_STRING *los = ASN1_OCTET_STRING_new();
+        oaep->pSourceFunc = X509_ALGOR_new();
+        if (!oaep->pSourceFunc)
+            goto err;
+        if (!los)
+            goto err;
+        if (!ASN1_OCTET_STRING_set(los, label, labellen)) {
+            ASN1_OCTET_STRING_free(los);
+            goto err;
+        }
+        X509_ALGOR_set0(oaep->pSourceFunc, OBJ_nid2obj(NID_pSpecified),
+                        V_ASN1_OCTET_STRING, los);
+    }
+    /* create string with pss parameter encoding. */
+    if (!ASN1_item_pack(oaep, ASN1_ITEM_rptr(RSA_OAEP_PARAMS), &os))
+         goto err;
+    X509_ALGOR_set0(alg, OBJ_nid2obj(NID_rsaesOaep), V_ASN1_SEQUENCE, os);
+    os = NULL;
+    rv = 1;
+ err:
+    if (oaep)
+        RSA_OAEP_PARAMS_free(oaep);
+    if (os)
+        ASN1_STRING_free(os);
+    return rv;
 }
 
 const EVP_PKEY_ASN1_METHOD rsa_asn1_meths[] = {
index 3d82c1d..aff8b58 100644 (file)
@@ -108,6 +108,14 @@ ASN1_SEQUENCE(RSA_PSS_PARAMS) = {
 
 IMPLEMENT_ASN1_FUNCTIONS(RSA_PSS_PARAMS)
 
+ASN1_SEQUENCE(RSA_OAEP_PARAMS) = {
+        ASN1_EXP_OPT(RSA_OAEP_PARAMS, hashFunc, X509_ALGOR, 0),
+        ASN1_EXP_OPT(RSA_OAEP_PARAMS, maskGenFunc, X509_ALGOR, 1),
+        ASN1_EXP_OPT(RSA_OAEP_PARAMS, pSourceFunc, X509_ALGOR, 2),
+} ASN1_SEQUENCE_END(RSA_OAEP_PARAMS)
+
+IMPLEMENT_ASN1_FUNCTIONS(RSA_OAEP_PARAMS)
+
 IMPLEMENT_ASN1_ENCODE_FUNCTIONS_const_fname(RSA, RSAPrivateKey, RSAPrivateKey)
 
 IMPLEMENT_ASN1_ENCODE_FUNCTIONS_const_fname(RSA, RSAPublicKey, RSAPublicKey)
index 25b3fa7..0bab05e 100644 (file)
@@ -1,6 +1,6 @@
 /* crypto/rsa/rsa_err.c */
 /* ====================================================================
- * Copyright (c) 1999-2011 The OpenSSL Project.  All rights reserved.
+ * Copyright (c) 1999-2014 The OpenSSL Project.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -80,8 +80,10 @@ static ERR_STRING_DATA RSA_str_functs[] = {
     {ERR_FUNC(RSA_F_PKEY_RSA_SIGN), "PKEY_RSA_SIGN"},
     {ERR_FUNC(RSA_F_PKEY_RSA_VERIFY), "PKEY_RSA_VERIFY"},
     {ERR_FUNC(RSA_F_PKEY_RSA_VERIFYRECOVER), "PKEY_RSA_VERIFYRECOVER"},
+    {ERR_FUNC(RSA_F_RSA_ALGOR_TO_MD), "RSA_ALGOR_TO_MD"},
     {ERR_FUNC(RSA_F_RSA_BUILTIN_KEYGEN), "RSA_BUILTIN_KEYGEN"},
     {ERR_FUNC(RSA_F_RSA_CHECK_KEY), "RSA_check_key"},
+    {ERR_FUNC(RSA_F_RSA_CMS_DECRYPT), "RSA_CMS_DECRYPT"},
     {ERR_FUNC(RSA_F_RSA_EAY_PRIVATE_DECRYPT), "RSA_EAY_PRIVATE_DECRYPT"},
     {ERR_FUNC(RSA_F_RSA_EAY_PRIVATE_ENCRYPT), "RSA_EAY_PRIVATE_ENCRYPT"},
     {ERR_FUNC(RSA_F_RSA_EAY_PUBLIC_DECRYPT), "RSA_EAY_PUBLIC_DECRYPT"},
@@ -90,6 +92,7 @@ static ERR_STRING_DATA RSA_str_functs[] = {
     {ERR_FUNC(RSA_F_RSA_GENERATE_KEY_EX), "RSA_generate_key_ex"},
     {ERR_FUNC(RSA_F_RSA_ITEM_VERIFY), "RSA_ITEM_VERIFY"},
     {ERR_FUNC(RSA_F_RSA_MEMORY_LOCK), "RSA_memory_lock"},
+    {ERR_FUNC(RSA_F_RSA_MGF1_TO_MD), "RSA_MGF1_TO_MD"},
     {ERR_FUNC(RSA_F_RSA_NEW_METHOD), "RSA_new_method"},
     {ERR_FUNC(RSA_F_RSA_NULL), "RSA_NULL"},
     {ERR_FUNC(RSA_F_RSA_NULL_MOD_EXP), "RSA_NULL_MOD_EXP"},
@@ -100,6 +103,8 @@ static ERR_STRING_DATA RSA_str_functs[] = {
     {ERR_FUNC(RSA_F_RSA_PADDING_ADD_NONE), "RSA_padding_add_none"},
     {ERR_FUNC(RSA_F_RSA_PADDING_ADD_PKCS1_OAEP),
      "RSA_padding_add_PKCS1_OAEP"},
+    {ERR_FUNC(RSA_F_RSA_PADDING_ADD_PKCS1_OAEP_MGF1),
+     "RSA_padding_add_PKCS1_OAEP_mgf1"},
     {ERR_FUNC(RSA_F_RSA_PADDING_ADD_PKCS1_PSS), "RSA_padding_add_PKCS1_PSS"},
     {ERR_FUNC(RSA_F_RSA_PADDING_ADD_PKCS1_PSS_MGF1),
      "RSA_padding_add_PKCS1_PSS_mgf1"},
@@ -112,6 +117,8 @@ static ERR_STRING_DATA RSA_str_functs[] = {
     {ERR_FUNC(RSA_F_RSA_PADDING_CHECK_NONE), "RSA_padding_check_none"},
     {ERR_FUNC(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP),
      "RSA_padding_check_PKCS1_OAEP"},
+    {ERR_FUNC(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP_MGF1),
+     "RSA_padding_check_PKCS1_OAEP_mgf1"},
     {ERR_FUNC(RSA_F_RSA_PADDING_CHECK_PKCS1_TYPE_1),
      "RSA_padding_check_PKCS1_type_1"},
     {ERR_FUNC(RSA_F_RSA_PADDING_CHECK_PKCS1_TYPE_2),
@@ -124,6 +131,7 @@ static ERR_STRING_DATA RSA_str_functs[] = {
     {ERR_FUNC(RSA_F_RSA_PRIVATE_ENCRYPT), "RSA_private_encrypt"},
     {ERR_FUNC(RSA_F_RSA_PRIV_DECODE), "RSA_PRIV_DECODE"},
     {ERR_FUNC(RSA_F_RSA_PRIV_ENCODE), "RSA_PRIV_ENCODE"},
+    {ERR_FUNC(RSA_F_RSA_PSS_TO_CTX), "RSA_PSS_TO_CTX"},
     {ERR_FUNC(RSA_F_RSA_PUBLIC_DECRYPT), "RSA_public_decrypt"},
     {ERR_FUNC(RSA_F_RSA_PUBLIC_ENCRYPT), "RSA_public_encrypt"},
     {ERR_FUNC(RSA_F_RSA_PUB_DECODE), "RSA_PUB_DECODE"},
@@ -157,6 +165,7 @@ static ERR_STRING_DATA RSA_str_reasons[] = {
     {ERR_REASON(RSA_R_DATA_TOO_SMALL), "data too small"},
     {ERR_REASON(RSA_R_DATA_TOO_SMALL_FOR_KEY_SIZE),
      "data too small for key size"},
+    {ERR_REASON(RSA_R_DIGEST_DOES_NOT_MATCH), "digest does not match"},
     {ERR_REASON(RSA_R_DIGEST_TOO_BIG_FOR_RSA_KEY),
      "digest too big for rsa key"},
     {ERR_REASON(RSA_R_DMP1_NOT_CONGRUENT_TO_D), "dmp1 not congruent to d"},
@@ -165,11 +174,14 @@ static ERR_STRING_DATA RSA_str_reasons[] = {
     {ERR_REASON(RSA_R_FIRST_OCTET_INVALID), "first octet invalid"},
     {ERR_REASON(RSA_R_ILLEGAL_OR_UNSUPPORTED_PADDING_MODE),
      "illegal or unsupported padding mode"},
+    {ERR_REASON(RSA_R_INVALID_DIGEST), "invalid digest"},
     {ERR_REASON(RSA_R_INVALID_DIGEST_LENGTH), "invalid digest length"},
     {ERR_REASON(RSA_R_INVALID_HEADER), "invalid header"},
     {ERR_REASON(RSA_R_INVALID_KEYBITS), "invalid keybits"},
+    {ERR_REASON(RSA_R_INVALID_LABEL), "invalid label"},
     {ERR_REASON(RSA_R_INVALID_MESSAGE_LENGTH), "invalid message length"},
     {ERR_REASON(RSA_R_INVALID_MGF1_MD), "invalid mgf1 md"},
+    {ERR_REASON(RSA_R_INVALID_OAEP_PARAMETERS), "invalid oaep parameters"},
     {ERR_REASON(RSA_R_INVALID_PADDING), "invalid padding"},
     {ERR_REASON(RSA_R_INVALID_PADDING_MODE), "invalid padding mode"},
     {ERR_REASON(RSA_R_INVALID_PSS_PARAMETERS), "invalid pss parameters"},
@@ -203,9 +215,13 @@ static ERR_STRING_DATA RSA_str_reasons[] = {
     {ERR_REASON(RSA_R_THE_ASN1_OBJECT_IDENTIFIER_IS_NOT_KNOWN_FOR_THIS_MD),
      "the asn1 object identifier is not known for this md"},
     {ERR_REASON(RSA_R_UNKNOWN_ALGORITHM_TYPE), "unknown algorithm type"},
+    {ERR_REASON(RSA_R_UNKNOWN_DIGEST), "unknown digest"},
     {ERR_REASON(RSA_R_UNKNOWN_MASK_DIGEST), "unknown mask digest"},
     {ERR_REASON(RSA_R_UNKNOWN_PADDING_TYPE), "unknown padding type"},
     {ERR_REASON(RSA_R_UNKNOWN_PSS_DIGEST), "unknown pss digest"},
+    {ERR_REASON(RSA_R_UNSUPPORTED_ENCRYPTION_TYPE),
+     "unsupported encryption type"},
+    {ERR_REASON(RSA_R_UNSUPPORTED_LABEL_SOURCE), "unsupported label source"},
     {ERR_REASON(RSA_R_UNSUPPORTED_MASK_ALGORITHM),
      "unsupported mask algorithm"},
     {ERR_REASON(RSA_R_UNSUPPORTED_MASK_PARAMETER),
index 499835f..9c2a943 100644 (file)
 # include <openssl/rand.h>
 # include <openssl/sha.h>
 
-static int MGF1(unsigned char *mask, long len,
-                const unsigned char *seed, long seedlen);
-
 int RSA_padding_add_PKCS1_OAEP(unsigned char *to, int tlen,
                                const unsigned char *from, int flen,
                                const unsigned char *param, int plen)
 {
+    return RSA_padding_add_PKCS1_OAEP_mgf1(to, tlen, from, flen,
+                                           param, plen, NULL, NULL);
+}
+
+int RSA_padding_add_PKCS1_OAEP_mgf1(unsigned char *to, int tlen,
+                                    const unsigned char *from, int flen,
+                                    const unsigned char *param, int plen,
+                                    const EVP_MD *md, const EVP_MD *mgf1md)
+{
     int i, emlen = tlen - 1;
     unsigned char *db, *seed;
-    unsigned char *dbmask, seedmask[SHA_DIGEST_LENGTH];
+    unsigned char *dbmask, seedmask[EVP_MAX_MD_SIZE];
+    int mdlen;
+
+    if (md == NULL)
+        md = EVP_sha1();
+    if (mgf1md == NULL)
+        mgf1md = md;
 
-    if (flen > emlen - 2 * SHA_DIGEST_LENGTH - 1) {
-        RSAerr(RSA_F_RSA_PADDING_ADD_PKCS1_OAEP,
+    mdlen = EVP_MD_size(md);
+
+    if (flen > emlen - 2 * mdlen - 1) {
+        RSAerr(RSA_F_RSA_PADDING_ADD_PKCS1_OAEP_MGF1,
                RSA_R_DATA_TOO_LARGE_FOR_KEY_SIZE);
         return 0;
     }
 
-    if (emlen < 2 * SHA_DIGEST_LENGTH + 1) {
-        RSAerr(RSA_F_RSA_PADDING_ADD_PKCS1_OAEP, RSA_R_KEY_SIZE_TOO_SMALL);
+    if (emlen < 2 * mdlen + 1) {
+        RSAerr(RSA_F_RSA_PADDING_ADD_PKCS1_OAEP_MGF1,
+               RSA_R_KEY_SIZE_TOO_SMALL);
         return 0;
     }
 
     to[0] = 0;
     seed = to + 1;
-    db = to + SHA_DIGEST_LENGTH + 1;
+    db = to + mdlen + 1;
 
-    if (!EVP_Digest((void *)param, plen, db, NULL, EVP_sha1(), NULL))
+    if (!EVP_Digest((void *)param, plen, db, NULL, md, NULL))
         return 0;
-    memset(db + SHA_DIGEST_LENGTH, 0,
-           emlen - flen - 2 * SHA_DIGEST_LENGTH - 1);
-    db[emlen - flen - SHA_DIGEST_LENGTH - 1] = 0x01;
-    memcpy(db + emlen - flen - SHA_DIGEST_LENGTH, from, (unsigned int)flen);
-    if (RAND_bytes(seed, SHA_DIGEST_LENGTH) <= 0)
+    memset(db + mdlen, 0, emlen - flen - 2 * mdlen - 1);
+    db[emlen - flen - mdlen - 1] = 0x01;
+    memcpy(db + emlen - flen - mdlen, from, (unsigned int)flen);
+    if (RAND_bytes(seed, mdlen) <= 0)
         return 0;
 # ifdef PKCS_TESTVECT
     memcpy(seed,
@@ -68,20 +82,20 @@ int RSA_padding_add_PKCS1_OAEP(unsigned char *to, int tlen,
            20);
 # endif
 
-    dbmask = OPENSSL_malloc(emlen - SHA_DIGEST_LENGTH);
+    dbmask = OPENSSL_malloc(emlen - mdlen);
     if (dbmask == NULL) {
-        RSAerr(RSA_F_RSA_PADDING_ADD_PKCS1_OAEP, ERR_R_MALLOC_FAILURE);
+        RSAerr(RSA_F_RSA_PADDING_ADD_PKCS1_OAEP_MGF1, ERR_R_MALLOC_FAILURE);
         return 0;
     }
 
-    if (MGF1(dbmask, emlen - SHA_DIGEST_LENGTH, seed, SHA_DIGEST_LENGTH) < 0)
+    if (PKCS1_MGF1(dbmask, emlen - mdlen, seed, mdlen, mgf1md) < 0)
         return 0;
-    for (i = 0; i < emlen - SHA_DIGEST_LENGTH; i++)
+    for (i = 0; i < emlen - mdlen; i++)
         db[i] ^= dbmask[i];
 
-    if (MGF1(seedmask, SHA_DIGEST_LENGTH, db, emlen - SHA_DIGEST_LENGTH) < 0)
+    if (PKCS1_MGF1(seedmask, mdlen, db, emlen - mdlen, mgf1md) < 0)
         return 0;
-    for (i = 0; i < SHA_DIGEST_LENGTH; i++)
+    for (i = 0; i < mdlen; i++)
         seed[i] ^= seedmask[i];
 
     OPENSSL_free(dbmask);
@@ -92,6 +106,16 @@ int RSA_padding_check_PKCS1_OAEP(unsigned char *to, int tlen,
                                  const unsigned char *from, int flen, int num,
                                  const unsigned char *param, int plen)
 {
+    return RSA_padding_check_PKCS1_OAEP_mgf1(to, tlen, from, flen, num,
+                                             param, plen, NULL, NULL);
+}
+
+int RSA_padding_check_PKCS1_OAEP_mgf1(unsigned char *to, int tlen,
+                                      const unsigned char *from, int flen,
+                                      int num, const unsigned char *param,
+                                      int plen, const EVP_MD *md,
+                                      const EVP_MD *mgf1md)
+{
     int i, dblen, mlen = -1, one_index = 0, msg_index;
     unsigned int good, found_one_byte;
     const unsigned char *maskedseed, *maskeddb;
@@ -101,26 +125,33 @@ int RSA_padding_check_PKCS1_OAEP(unsigned char *to, int tlen,
      */
     unsigned char *db = NULL, *em = NULL, seed[EVP_MAX_MD_SIZE],
         phash[EVP_MAX_MD_SIZE];
+    int mdlen;
+
+    if (md == NULL)
+        md = EVP_sha1();
+    if (mgf1md == NULL)
+        mgf1md = md;
+
+    mdlen = EVP_MD_size(md);
 
     if (tlen <= 0 || flen <= 0)
         return -1;
-
     /*
      * |num| is the length of the modulus; |flen| is the length of the
      * encoded message. Therefore, for any |from| that was obtained by
      * decrypting a ciphertext, we must have |flen| <= |num|. Similarly,
-     * num < 2 * SHA_DIGEST_LENGTH + 2 must hold for the modulus
-     * irrespective of the ciphertext, see PKCS #1 v2.2, section 7.1.2.
+     * num < 2 * mdlen + 2 must hold for the modulus irrespective of
+     * the ciphertext, see PKCS #1 v2.2, section 7.1.2.
      * This does not leak any side-channel information.
      */
-    if (num < flen || num < 2 * SHA_DIGEST_LENGTH + 2)
+    if (num < flen || num < 2 * mdlen + 2)
         goto decoding_err;
 
-    dblen = num - SHA_DIGEST_LENGTH - 1;
+    dblen = num - mdlen - 1;
     db = OPENSSL_malloc(dblen);
     em = OPENSSL_malloc(num);
     if (db == NULL || em == NULL) {
-        RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP, ERR_R_MALLOC_FAILURE);
+        RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP_MGF1, ERR_R_MALLOC_FAILURE);
         goto cleanup;
     }
 
@@ -143,26 +174,25 @@ int RSA_padding_check_PKCS1_OAEP(unsigned char *to, int tlen,
     good = constant_time_is_zero(em[0]);
 
     maskedseed = em + 1;
-    maskeddb = em + 1 + SHA_DIGEST_LENGTH;
+    maskeddb = em + 1 + mdlen;
 
-    if (MGF1(seed, SHA_DIGEST_LENGTH, maskeddb, dblen))
+    if (PKCS1_MGF1(seed, mdlen, maskeddb, dblen, mgf1md))
         goto cleanup;
-    for (i = 0; i < SHA_DIGEST_LENGTH; i++)
+    for (i = 0; i < mdlen; i++)
         seed[i] ^= maskedseed[i];
 
-    if (MGF1(db, dblen, seed, SHA_DIGEST_LENGTH))
+    if (PKCS1_MGF1(db, dblen, seed, mdlen, mgf1md))
         goto cleanup;
     for (i = 0; i < dblen; i++)
         db[i] ^= maskeddb[i];
 
-    if (!EVP_Digest((void *)param, plen, phash, NULL, EVP_sha1(), NULL))
+    if (!EVP_Digest((void *)param, plen, phash, NULL, md, NULL))
         goto cleanup;
 
-    good &=
-        constant_time_is_zero(CRYPTO_memcmp(db, phash, SHA_DIGEST_LENGTH));
+    good &= constant_time_is_zero(CRYPTO_memcmp(db, phash, mdlen));
 
     found_one_byte = 0;
-    for (i = SHA_DIGEST_LENGTH; i < dblen; i++) {
+    for (i = mdlen; i < dblen; i++) {
         /*
          * Padding consists of a number of 0-bytes, followed by a 1.
          */
@@ -188,7 +218,7 @@ int RSA_padding_check_PKCS1_OAEP(unsigned char *to, int tlen,
     mlen = dblen - msg_index;
 
     if (tlen < mlen) {
-        RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP, RSA_R_DATA_TOO_LARGE);
+        RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP_MGF1, RSA_R_DATA_TOO_LARGE);
         mlen = -1;
     } else {
         memcpy(to, db + msg_index, mlen);
@@ -200,7 +230,8 @@ int RSA_padding_check_PKCS1_OAEP(unsigned char *to, int tlen,
      * To avoid chosen ciphertext attacks, the error message should not
      * reveal which kind of decoding error happened.
      */
-    RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP, RSA_R_OAEP_DECODING_ERROR);
+    RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP_MGF1,
+           RSA_R_OAEP_DECODING_ERROR);
  cleanup:
     if (db != NULL)
         OPENSSL_free(db);
@@ -249,9 +280,4 @@ int PKCS1_MGF1(unsigned char *mask, long len,
     return rv;
 }
 
-static int MGF1(unsigned char *mask, long len, const unsigned char *seed,
-                long seedlen)
-{
-    return PKCS1_MGF1(mask, len, seed, seedlen, EVP_sha1());
-}
 #endif
index 6a7c67c..2036355 100644 (file)
@@ -64,6 +64,7 @@
 #include <openssl/rsa.h>
 #include <openssl/bn.h>
 #include <openssl/evp.h>
+#include <openssl/x509v3.h>
 #ifndef OPENSSL_NO_CMS
 # include <openssl/cms.h>
 #endif
@@ -87,10 +88,13 @@ typedef struct {
     const EVP_MD *md;
     /* message digest for MGF1 */
     const EVP_MD *mgf1md;
-    /* PSS/OAEP salt length */
+    /* PSS salt length */
     int saltlen;
     /* Temp buffer */
     unsigned char *tbuf;
+    /* OAEP label */
+    unsigned char *oaep_label;
+    size_t oaep_labellen;
 } RSA_PKEY_CTX;
 
 static int pkey_rsa_init(EVP_PKEY_CTX *ctx)
@@ -108,6 +112,9 @@ static int pkey_rsa_init(EVP_PKEY_CTX *ctx)
 
     rctx->saltlen = -2;
 
+    rctx->oaep_label = NULL;
+    rctx->oaep_labellen = 0;
+
     ctx->data = rctx;
     ctx->keygen_info = rctx->gentmp;
     ctx->keygen_info_count = 2;
@@ -130,6 +137,15 @@ static int pkey_rsa_copy(EVP_PKEY_CTX *dst, EVP_PKEY_CTX *src)
     }
     dctx->pad_mode = sctx->pad_mode;
     dctx->md = sctx->md;
+    dctx->mgf1md = sctx->mgf1md;
+    if (sctx->oaep_label) {
+        if (dctx->oaep_label)
+            OPENSSL_free(dctx->oaep_label);
+        dctx->oaep_label = BUF_memdup(sctx->oaep_label, sctx->oaep_labellen);
+        if (!dctx->oaep_label)
+            return 0;
+        dctx->oaep_labellen = sctx->oaep_labellen;
+    }
     return 1;
 }
 
@@ -151,6 +167,8 @@ static void pkey_rsa_cleanup(EVP_PKEY_CTX *ctx)
             BN_free(rctx->pub_exp);
         if (rctx->tbuf)
             OPENSSL_free(rctx->tbuf);
+        if (rctx->oaep_label)
+            OPENSSL_free(rctx->oaep_label);
         OPENSSL_free(rctx);
     }
 }
@@ -173,10 +191,18 @@ static int pkey_fips_check_ctx(EVP_PKEY_CTX *ctx)
         rv = 0;
     if (!(rsa->meth->flags & RSA_FLAG_FIPS_METHOD) && rv)
         return -1;
-    if (rctx->md && !(rctx->md->flags & EVP_MD_FLAG_FIPS))
-        return rv;
-    if (rctx->mgf1md && !(rctx->mgf1md->flags & EVP_MD_FLAG_FIPS))
-        return rv;
+    if (rctx->md) {
+        const EVP_MD *fmd;
+        fmd = FIPS_get_digestbynid(EVP_MD_type(rctx->md));
+        if (!fmd || !(fmd->flags & EVP_MD_FLAG_FIPS))
+            return rv;
+    }
+    if (rctx->mgf1md && !(rctx->mgf1md->flags & EVP_MD_FLAG_FIPS)) {
+        const EVP_MD *fmd;
+        fmd = FIPS_get_digestbynid(EVP_MD_type(rctx->mgf1md));
+        if (!fmd || !(fmd->flags & EVP_MD_FLAG_FIPS))
+            return rv;
+    }
     return 1;
 }
 #endif
@@ -388,8 +414,21 @@ static int pkey_rsa_encrypt(EVP_PKEY_CTX *ctx,
 {
     int ret;
     RSA_PKEY_CTX *rctx = ctx->data;
-    ret = RSA_public_encrypt(inlen, in, out, ctx->pkey->pkey.rsa,
-                             rctx->pad_mode);
+    if (rctx->pad_mode == RSA_PKCS1_OAEP_PADDING) {
+        int klen = RSA_size(ctx->pkey->pkey.rsa);
+        if (!setup_tbuf(rctx, ctx))
+            return -1;
+        if (!RSA_padding_add_PKCS1_OAEP_mgf1(rctx->tbuf, klen,
+                                             in, inlen,
+                                             rctx->oaep_label,
+                                             rctx->oaep_labellen,
+                                             rctx->md, rctx->mgf1md))
+            return -1;
+        ret = RSA_public_encrypt(klen, rctx->tbuf, out,
+                                 ctx->pkey->pkey.rsa, RSA_NO_PADDING);
+    } else
+        ret = RSA_public_encrypt(inlen, in, out, ctx->pkey->pkey.rsa,
+                                 rctx->pad_mode);
     if (ret < 0)
         return ret;
     *outlen = ret;
@@ -402,8 +441,26 @@ static int pkey_rsa_decrypt(EVP_PKEY_CTX *ctx,
 {
     int ret;
     RSA_PKEY_CTX *rctx = ctx->data;
-    ret = RSA_private_decrypt(inlen, in, out, ctx->pkey->pkey.rsa,
-                              rctx->pad_mode);
+    if (rctx->pad_mode == RSA_PKCS1_OAEP_PADDING) {
+        int i;
+        if (!setup_tbuf(rctx, ctx))
+            return -1;
+        ret = RSA_private_decrypt(inlen, in, rctx->tbuf,
+                                  ctx->pkey->pkey.rsa, RSA_NO_PADDING);
+        if (ret <= 0)
+            return ret;
+        for (i = 0; i < ret; i++) {
+            if (rctx->tbuf[i])
+                break;
+        }
+        ret = RSA_padding_check_PKCS1_OAEP_mgf1(out, ret, rctx->tbuf + i,
+                                                ret - i, ret,
+                                                rctx->oaep_label,
+                                                rctx->oaep_labellen,
+                                                rctx->md, rctx->mgf1md);
+    } else
+        ret = RSA_private_decrypt(inlen, in, out, ctx->pkey->pkey.rsa,
+                                  rctx->pad_mode);
     if (ret < 0)
         return ret;
     *outlen = ret;
@@ -490,18 +547,36 @@ static int pkey_rsa_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2)
     case EVP_PKEY_CTRL_RSA_KEYGEN_PUBEXP:
         if (!p2)
             return -2;
+        BN_free(rctx->pub_exp);
         rctx->pub_exp = p2;
         return 1;
 
+    case EVP_PKEY_CTRL_RSA_OAEP_MD:
+    case EVP_PKEY_CTRL_GET_RSA_OAEP_MD:
+        if (rctx->pad_mode != RSA_PKCS1_OAEP_PADDING) {
+            RSAerr(RSA_F_PKEY_RSA_CTRL, RSA_R_INVALID_PADDING_MODE);
+            return -2;
+        }
+        if (type == EVP_PKEY_CTRL_GET_RSA_OAEP_MD)
+            *(const EVP_MD **)p2 = rctx->md;
+        else
+            rctx->md = p2;
+        return 1;
+
     case EVP_PKEY_CTRL_MD:
         if (!check_padding_md(p2, rctx->pad_mode))
             return 0;
         rctx->md = p2;
         return 1;
 
+    case EVP_PKEY_CTRL_GET_MD:
+        *(const EVP_MD **)p2 = rctx->md;
+        return 1;
+
     case EVP_PKEY_CTRL_RSA_MGF1_MD:
     case EVP_PKEY_CTRL_GET_RSA_MGF1_MD:
-        if (rctx->pad_mode != RSA_PKCS1_PSS_PADDING) {
+        if (rctx->pad_mode != RSA_PKCS1_PSS_PADDING
+            && rctx->pad_mode != RSA_PKCS1_OAEP_PADDING) {
             RSAerr(RSA_F_PKEY_RSA_CTRL, RSA_R_INVALID_MGF1_MD);
             return -2;
         }
@@ -514,6 +589,30 @@ static int pkey_rsa_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2)
             rctx->mgf1md = p2;
         return 1;
 
+    case EVP_PKEY_CTRL_RSA_OAEP_LABEL:
+        if (rctx->pad_mode != RSA_PKCS1_OAEP_PADDING) {
+            RSAerr(RSA_F_PKEY_RSA_CTRL, RSA_R_INVALID_PADDING_MODE);
+            return -2;
+        }
+        if (rctx->oaep_label)
+            OPENSSL_free(rctx->oaep_label);
+        if (p2 && p1 > 0) {
+            rctx->oaep_label = p2;
+            rctx->oaep_labellen = p1;
+        } else {
+            rctx->oaep_label = NULL;
+            rctx->oaep_labellen = 0;
+        }
+        return 1;
+
+    case EVP_PKEY_CTRL_GET_RSA_OAEP_LABEL:
+        if (rctx->pad_mode != RSA_PKCS1_OAEP_PADDING) {
+            RSAerr(RSA_F_PKEY_RSA_CTRL, RSA_R_INVALID_PADDING_MODE);
+            return -2;
+        }
+        *(unsigned char **)p2 = rctx->oaep_label;
+        return rctx->oaep_labellen;
+
     case EVP_PKEY_CTRL_DIGESTINIT:
     case EVP_PKEY_CTRL_PKCS7_ENCRYPT:
     case EVP_PKEY_CTRL_PKCS7_DECRYPT:
@@ -521,16 +620,6 @@ static int pkey_rsa_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2)
         return 1;
 #ifndef OPENSSL_NO_CMS
     case EVP_PKEY_CTRL_CMS_DECRYPT:
-        {
-            X509_ALGOR *alg = NULL;
-            ASN1_OBJECT *encalg = NULL;
-            if (p2)
-                CMS_RecipientInfo_ktri_get0_algs(p2, NULL, NULL, &alg);
-            if (alg)
-                X509_ALGOR_get0(&encalg, NULL, NULL, alg);
-            if (encalg && OBJ_obj2nid(encalg) == NID_rsaesOaep)
-                rctx->pad_mode = RSA_PKCS1_OAEP_PADDING;
-        }
     case EVP_PKEY_CTRL_CMS_ENCRYPT:
     case EVP_PKEY_CTRL_CMS_SIGN:
         return 1;
@@ -599,6 +688,36 @@ static int pkey_rsa_ctrl_str(EVP_PKEY_CTX *ctx,
         return ret;
     }
 
+    if (!strcmp(type, "rsa_mgf1_md")) {
+        const EVP_MD *md;
+        if (!(md = EVP_get_digestbyname(value))) {
+            RSAerr(RSA_F_PKEY_RSA_CTRL_STR, RSA_R_INVALID_DIGEST);
+            return 0;
+        }
+        return EVP_PKEY_CTX_set_rsa_mgf1_md(ctx, md);
+    }
+
+    if (!strcmp(type, "rsa_oaep_md")) {
+        const EVP_MD *md;
+        if (!(md = EVP_get_digestbyname(value))) {
+            RSAerr(RSA_F_PKEY_RSA_CTRL_STR, RSA_R_INVALID_DIGEST);
+            return 0;
+        }
+        return EVP_PKEY_CTX_set_rsa_oaep_md(ctx, md);
+    }
+    if (!strcmp(type, "rsa_oaep_label")) {
+        unsigned char *lab;
+        long lablen;
+        int ret;
+        lab = string_to_hex(value, &lablen);
+        if (!lab)
+            return 0;
+        ret = EVP_PKEY_CTX_set0_rsa_oaep_label(ctx, lab, lablen);
+        if (ret <= 0)
+            OPENSSL_free(lab);
+        return ret;
+    }
+
     return -2;
 }
 
index bc91da2..19461c6 100644 (file)
@@ -261,19 +261,8 @@ int int_rsa_verify(int dtype, const unsigned char *m,
                 OBJ_nid2ln(dtype));
 #endif
         if (sigtype != dtype) {
-            if (((dtype == NID_md5) &&
-                 (sigtype == NID_md5WithRSAEncryption)) ||
-                ((dtype == NID_md2) &&
-                 (sigtype == NID_md2WithRSAEncryption))) {
-                /* ok, we will let it through */
-#if !defined(OPENSSL_NO_STDIO) && !defined(OPENSSL_SYS_WIN16)
-                fprintf(stderr,
-                        "signature has problems, re-make with post SSLeay045\n");
-#endif
-            } else {
-                RSAerr(RSA_F_INT_RSA_VERIFY, RSA_R_ALGORITHM_MISMATCH);
-                goto err;
-            }
+            RSAerr(RSA_F_INT_RSA_VERIFY, RSA_R_ALGORITHM_MISMATCH);
+            goto err;
         }
         if (rm) {
             const EVP_MD *md;
index ceb8094..de6cdde 100644 (file)
@@ -60,21 +60,25 @@ sha256-armv4.S: asm/sha256-armv4.pl
        $(PERL) $< $(PERLASM_SCHEME) $@
 
 sha1-alpha.s:  asm/sha1-alpha.pl
-       (preproc=/tmp/$$$$.$@; trap "rm $$preproc" INT; \
+       (preproc=$$$$.$@.S; trap "rm $$preproc" INT; \
        $(PERL) asm/sha1-alpha.pl > $$preproc && \
-       $(CC) -E $$preproc > $@ && rm $$preproc)
+       $(CC) -E -P $$preproc > $@ && rm $$preproc)
 
 # Solaris make has to be explicitly told
 sha1-x86_64.s: asm/sha1-x86_64.pl;     $(PERL) asm/sha1-x86_64.pl $(PERLASM_SCHEME) > $@
+sha1-mb-x86_64.s:      asm/sha1-mb-x86_64.pl;  $(PERL) asm/sha1-mb-x86_64.pl $(PERLASM_SCHEME) > $@
 sha256-x86_64.s:asm/sha512-x86_64.pl;  $(PERL) asm/sha512-x86_64.pl $(PERLASM_SCHEME) $@
+sha256-mb-x86_64.s:    asm/sha256-mb-x86_64.pl;        $(PERL) asm/sha256-mb-x86_64.pl $(PERLASM_SCHEME) > $@
 sha512-x86_64.s:asm/sha512-x86_64.pl;  $(PERL) asm/sha512-x86_64.pl $(PERLASM_SCHEME) $@
-sha1-sparcv9.s:        asm/sha1-sparcv9.pl;    $(PERL) asm/sha1-sparcv9.pl $@ $(CFLAGS)
-sha256-sparcv9.s:asm/sha512-sparcv9.pl;        $(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS)
-sha512-sparcv9.s:asm/sha512-sparcv9.pl;        $(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS)
+sha1-sparcv9.S:        asm/sha1-sparcv9.pl;    $(PERL) asm/sha1-sparcv9.pl $@ $(CFLAGS)
+sha256-sparcv9.S:asm/sha512-sparcv9.pl;        $(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS)
+sha512-sparcv9.S:asm/sha512-sparcv9.pl;        $(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS)
 
 sha1-ppc.s:    asm/sha1-ppc.pl;        $(PERL) asm/sha1-ppc.pl $(PERLASM_SCHEME) $@
 sha256-ppc.s:  asm/sha512-ppc.pl;      $(PERL) asm/sha512-ppc.pl $(PERLASM_SCHEME) $@
 sha512-ppc.s:  asm/sha512-ppc.pl;      $(PERL) asm/sha512-ppc.pl $(PERLASM_SCHEME) $@
+sha256p8-ppc.s:        asm/sha512p8-ppc.pl;    $(PERL) asm/sha512p8-ppc.pl $(PERLASM_SCHEME) $@
+sha512p8-ppc.s:        asm/sha512p8-ppc.pl;    $(PERL) asm/sha512p8-ppc.pl $(PERLASM_SCHEME) $@
 
 sha1-parisc.s: asm/sha1-parisc.pl;     $(PERL) asm/sha1-parisc.pl $(PERLASM_SCHEME) $@
 sha256-parisc.s:asm/sha512-parisc.pl;  $(PERL) asm/sha512-parisc.pl $(PERLASM_SCHEME) $@
@@ -92,6 +96,9 @@ sha512-%.S:   asm/sha512-%.pl;        $(PERL) $< $(PERLASM_SCHEME) $@
 sha1-armv4-large.o:    sha1-armv4-large.S
 sha256-armv4.o:                sha256-armv4.S
 sha512-armv4.o:                sha512-armv4.S
+sha1-armv8.o:          sha1-armv8.S
+sha256-armv8.o:                sha256-armv8.S
+sha512-armv8.o:                sha512-armv8.S
 
 files:
        $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
index 1084d22..4895eb3 100644 (file)
@@ -1,7 +1,7 @@
 #!/usr/bin/env perl
 
 # ====================================================================
-# [Re]written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# [Re]written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
 # details see http://www.openssl.org/~appro/cryptogams/.
 # strongly, it's probably more appropriate to discuss possibility of
 # using vector rotate XOP on AMD...
 
+# March 2014.
+#
+# Add support for Intel SHA Extensions.
+
 ######################################################################
 # Current performance is summarized in following table. Numbers are
 # CPU clock cycles spent to process single byte (less is better).
 # PIII         11.5            -
 # P4           10.6            -
 # AMD K8       7.1             -
-# Core2                7.3             6.1/+20%        -
-# Atom         12.5            9.5(*)/+32%     -
-# Westmere     7.3             5.6/+30%        -
-# Sandy Bridge 8.8             6.2/+40%        5.1(**)/+70%
+# Core2                7.3             6.0/+22%        -
+# Westmere     7.3             5.5/+33%        -
+# Sandy Bridge 8.8             6.2/+40%        5.1(**)/+73%
+# Ivy Bridge   7.2             4.8/+51%        4.7(**)/+53%
+# Haswell      6.5             4.3/+51%        4.1(**)/+58%
+# Bulldozer    11.6            6.0/+92%
+# VIA Nano     10.6            7.5/+41%
+# Atom         12.5            9.3(*)/+35%
+# Silvermont   14.5            9.9(*)/+46%
 #
 # (*)  Loop is 1056 instructions long and expected result is ~8.25.
-#      It remains mystery [to me] why ILP is limited to 1.7.
+#      The discrepancy is because of front-end limitations, so
+#      called MS-ROM penalties, and on Silvermont even rotate's
+#      limited parallelism.
 #
 # (**) As per above comment, the result is for AVX *plus* sh[rl]d.
 
@@ -116,6 +127,15 @@ $ymm=1 if ($xmm && !$ymm && $ARGV[0] eq "win32n" &&
                `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/ &&
                $1>=2.03);      # first version supporting AVX
 
+$ymm=1 if ($xmm && !$ymm && $ARGV[0] eq "win32" &&
+               `ml 2>&1` =~ /Version ([0-9]+)\./ &&
+               $1>=10);        # first version supporting AVX
+
+$ymm=1 if ($xmm && !$ymm && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/ &&
+               $2>=3.0);       # first version supporting AVX
+
+$shaext=$xmm;  ### set to zero if compiling for 1.0.1
+
 &external_label("OPENSSL_ia32cap_P") if ($xmm);
 
 
@@ -295,6 +315,7 @@ if ($alt) {
 
 &function_begin("sha1_block_data_order");
 if ($xmm) {
+  &static_label("shaext_shortcut")     if ($shaext);
   &static_label("ssse3_shortcut");
   &static_label("avx_shortcut")                if ($ymm);
   &static_label("K_XX_XX");
@@ -309,8 +330,13 @@ if ($xmm) {
        &mov    ($D,&DWP(4,$T));
        &test   ($D,1<<9);              # check SSSE3 bit
        &jz     (&label("x86"));
+       &mov    ($C,&DWP(8,$T));
        &test   ($A,1<<24);             # check FXSR bit
        &jz     (&label("x86"));
+       if ($shaext) {
+               &test   ($C,1<<29);             # check SHA bit
+               &jnz    (&label("shaext_shortcut"));
+       }
        if ($ymm) {
                &and    ($D,1<<28);             # mask AVX bit
                &and    ($A,1<<30);             # mask "Intel CPU" bit
@@ -389,6 +415,117 @@ if ($xmm) {
 &function_end("sha1_block_data_order");
 
 if ($xmm) {
+if ($shaext) {
+######################################################################
+# Intel SHA Extensions implementation of SHA1 update function.
+#
+my ($ctx,$inp,$num)=("edi","esi","ecx");
+my ($ABCD,$E,$E_,$BSWAP)=map("xmm$_",(0..3));
+my @MSG=map("xmm$_",(4..7));
+
+sub sha1rnds4 {
+ my ($dst,$src,$imm)=@_;
+    if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/)
+    {  &data_byte(0x0f,0x3a,0xcc,0xc0|($1<<3)|$2,$imm);        }
+}
+sub sha1op38 {
+ my ($opcodelet,$dst,$src)=@_;
+    if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/)
+    {  &data_byte(0x0f,0x38,$opcodelet,0xc0|($1<<3)|$2);       }
+}
+sub sha1nexte  { sha1op38(0xc8,@_); }
+sub sha1msg1   { sha1op38(0xc9,@_); }
+sub sha1msg2   { sha1op38(0xca,@_); }
+
+&function_begin("_sha1_block_data_order_shaext");
+       &call   (&label("pic_point"));  # make it PIC!
+       &set_label("pic_point");
+       &blindpop($tmp1);
+       &lea    ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1));
+&set_label("shaext_shortcut");
+       &mov    ($ctx,&wparam(0));
+       &mov    ("ebx","esp");
+       &mov    ($inp,&wparam(1));
+       &mov    ($num,&wparam(2));
+       &sub    ("esp",32);
+
+       &movdqu ($ABCD,&QWP(0,$ctx));
+       &movd   ($E,&DWP(16,$ctx));
+       &and    ("esp",-32);
+       &movdqa ($BSWAP,&QWP(0x50,$tmp1));      # byte-n-word swap
+
+       &movdqu (@MSG[0],&QWP(0,$inp));
+       &pshufd ($ABCD,$ABCD,0b00011011);       # flip word order
+       &movdqu (@MSG[1],&QWP(0x10,$inp));
+       &pshufd ($E,$E,0b00011011);             # flip word order
+       &movdqu (@MSG[2],&QWP(0x20,$inp));
+       &pshufb (@MSG[0],$BSWAP);
+       &movdqu (@MSG[3],&QWP(0x30,$inp));
+       &pshufb (@MSG[1],$BSWAP);
+       &pshufb (@MSG[2],$BSWAP);
+       &pshufb (@MSG[3],$BSWAP);
+       &jmp    (&label("loop_shaext"));
+
+&set_label("loop_shaext",16);
+       &dec            ($num);
+       &lea            ("eax",&DWP(0x40,$inp));
+       &movdqa         (&QWP(0,"esp"),$E);     # offload $E
+       &paddd          ($E,@MSG[0]);
+       &cmovne         ($inp,"eax");
+       &movdqa         (&QWP(16,"esp"),$ABCD); # offload $ABCD
+
+for($i=0;$i<20-4;$i+=2) {
+       &sha1msg1       (@MSG[0],@MSG[1]);
+       &movdqa         ($E_,$ABCD);
+       &sha1rnds4      ($ABCD,$E,int($i/5));   # 0-3...
+       &sha1nexte      ($E_,@MSG[1]);
+       &pxor           (@MSG[0],@MSG[2]);
+       &sha1msg1       (@MSG[1],@MSG[2]);
+       &sha1msg2       (@MSG[0],@MSG[3]);
+
+       &movdqa         ($E,$ABCD);
+       &sha1rnds4      ($ABCD,$E_,int(($i+1)/5));
+       &sha1nexte      ($E,@MSG[2]);
+       &pxor           (@MSG[1],@MSG[3]);
+       &sha1msg2       (@MSG[1],@MSG[0]);
+
+       push(@MSG,shift(@MSG)); push(@MSG,shift(@MSG));
+}
+       &movdqu         (@MSG[0],&QWP(0,$inp));
+       &movdqa         ($E_,$ABCD);
+       &sha1rnds4      ($ABCD,$E,3);           # 64-67
+       &sha1nexte      ($E_,@MSG[1]);
+       &movdqu         (@MSG[1],&QWP(0x10,$inp));
+       &pshufb         (@MSG[0],$BSWAP);
+
+       &movdqa         ($E,$ABCD);
+       &sha1rnds4      ($ABCD,$E_,3);          # 68-71
+       &sha1nexte      ($E,@MSG[2]);
+       &movdqu         (@MSG[2],&QWP(0x20,$inp));
+       &pshufb         (@MSG[1],$BSWAP);
+
+       &movdqa         ($E_,$ABCD);
+       &sha1rnds4      ($ABCD,$E,3);           # 72-75
+       &sha1nexte      ($E_,@MSG[3]);
+       &movdqu         (@MSG[3],&QWP(0x30,$inp));
+       &pshufb         (@MSG[2],$BSWAP);
+
+       &movdqa         ($E,$ABCD);
+       &sha1rnds4      ($ABCD,$E_,3);          # 76-79
+       &movdqa         ($E_,&QWP(0,"esp"));
+       &pshufb         (@MSG[3],$BSWAP);
+       &sha1nexte      ($E,$E_);
+       &paddd          ($ABCD,&QWP(16,"esp"));
+
+       &jnz            (&label("loop_shaext"));
+
+       &pshufd ($ABCD,$ABCD,0b00011011);
+       &pshufd ($E,$E,0b00011011);
+       &movdqu (&QWP(0,$ctx),$ABCD)
+       &movd   (&DWP(16,$ctx),$E);
+       &mov    ("esp","ebx");
+&function_end("_sha1_block_data_order_shaext");
+}
 ######################################################################
 # The SSSE3 implementation.
 #
@@ -416,6 +553,7 @@ my $Xi=4;                   # 4xSIMD Xupdate round, start pre-seeded
 my @X=map("xmm$_",(4..7,0..3));        # pre-seeded for $Xi=4
 my @V=($A,$B,$C,$D,$E);
 my $j=0;                       # hash round
+my $rx=0;
 my @T=($T,$tmp1);
 my $inp;
 
@@ -501,8 +639,11 @@ my $_ror=sub { &ror(@_) };
        &movdqa (&QWP(0+16,"esp"),@X[-3&7]);
        &psubd  (@X[-3&7],@X[3]);
        &movdqa (&QWP(0+32,"esp"),@X[-2&7]);
+       &mov    (@T[1],$C);
        &psubd  (@X[-2&7],@X[3]);
-       &movdqa (@X[0],@X[-3&7]);
+       &xor    (@T[1],$D);
+       &pshufd (@X[0],@X[-4&7],0xee);          # was &movdqa   (@X[0],@X[-3&7]);
+       &and    (@T[0],@T[1]);
        &jmp    (&label("loop"));
 
 ######################################################################
@@ -528,76 +669,77 @@ sub Xupdate_ssse3_16_31()         # recall that $Xi starts wtih 4
   my @insns = (&$body,&$body,&$body,&$body);   # 40 instructions
   my ($a,$b,$c,$d,$e);
 
+        eval(shift(@insns));           # ror
         eval(shift(@insns));
         eval(shift(@insns));
-       &palignr(@X[0],@X[-4&7],8);     # compose "X[-14]" in "X[0]"
+       &punpcklqdq(@X[0],@X[-3&7]);    # compose "X[-14]" in "X[0]", was &palignr(@X[0],@X[-4&7],8);
        &movdqa (@X[2],@X[-1&7]);
         eval(shift(@insns));
         eval(shift(@insns));
 
          &paddd        (@X[3],@X[-1&7]);
          &movdqa       (&QWP(64+16*(($Xi-4)%3),"esp"),@X[-4&7]);# save X[] to backtrace buffer
-        eval(shift(@insns));
+        eval(shift(@insns));           # rol
         eval(shift(@insns));
        &psrldq (@X[2],4);              # "X[-3]", 3 dwords
         eval(shift(@insns));
         eval(shift(@insns));
        &pxor   (@X[0],@X[-4&7]);       # "X[0]"^="X[-16]"
         eval(shift(@insns));
-        eval(shift(@insns));
+        eval(shift(@insns));           # ror
 
        &pxor   (@X[2],@X[-2&7]);       # "X[-3]"^"X[-8]"
         eval(shift(@insns));
         eval(shift(@insns));
         eval(shift(@insns));
-        eval(shift(@insns));
 
        &pxor   (@X[0],@X[2]);          # "X[0]"^="X[-3]"^"X[-8]"
         eval(shift(@insns));
-        eval(shift(@insns));
+        eval(shift(@insns));           # rol
          &movdqa       (&QWP(0+16*(($Xi-1)&3),"esp"),@X[3]);   # X[]+K xfer to IALU
         eval(shift(@insns));
         eval(shift(@insns));
 
        &movdqa (@X[4],@X[0]);
-       &movdqa (@X[2],@X[0]);
-        eval(shift(@insns));
         eval(shift(@insns));
         eval(shift(@insns));
+        eval(shift(@insns));           # ror
+       &movdqa (@X[2],@X[0]);
         eval(shift(@insns));
 
        &pslldq (@X[4],12);             # "X[0]"<<96, extract one dword
        &paddd  (@X[0],@X[0]);
         eval(shift(@insns));
         eval(shift(@insns));
-        eval(shift(@insns));
-        eval(shift(@insns));
 
        &psrld  (@X[2],31);
         eval(shift(@insns));
-        eval(shift(@insns));
+        eval(shift(@insns));           # rol
        &movdqa (@X[3],@X[4]);
         eval(shift(@insns));
         eval(shift(@insns));
+        eval(shift(@insns));
 
        &psrld  (@X[4],30);
-       &por    (@X[0],@X[2]);          # "X[0]"<<<=1
         eval(shift(@insns));
+        eval(shift(@insns));           # ror
+       &por    (@X[0],@X[2]);          # "X[0]"<<<=1
         eval(shift(@insns));
          &movdqa       (@X[2],&QWP(64+16*(($Xi-6)%3),"esp")) if ($Xi>5);       # restore X[] from backtrace buffer
         eval(shift(@insns));
         eval(shift(@insns));
 
        &pslld  (@X[3],2);
-       &pxor   (@X[0],@X[4]);
-        eval(shift(@insns));
         eval(shift(@insns));
+        eval(shift(@insns));           # rol
+       &pxor   (@X[0],@X[4]);
          &movdqa       (@X[4],&QWP(112-16+16*(($Xi)/5),"esp"));        # K_XX_XX
         eval(shift(@insns));
         eval(shift(@insns));
 
        &pxor   (@X[0],@X[3]);          # "X[0]"^=("X[0]"<<96)<<<2
-         &movdqa       (@X[1],@X[-2&7])        if ($Xi<7);
+         &pshufd       (@X[1],@X[-3&7],0xee)   if ($Xi<7);     # was &movdqa   (@X[1],@X[-2&7])
+         &pshufd       (@X[3],@X[-1&7],0xee)   if ($Xi==7);
         eval(shift(@insns));
         eval(shift(@insns));
 
@@ -609,13 +751,12 @@ sub Xupdate_ssse3_16_31()         # recall that $Xi starts wtih 4
 sub Xupdate_ssse3_32_79()
 { use integer;
   my $body = shift;
-  my @insns = (&$body,&$body,&$body,&$body);   # 32 to 48 instructions
+  my @insns = (&$body,&$body,&$body,&$body);   # 32 to 44 instructions
   my ($a,$b,$c,$d,$e);
 
-       &movdqa (@X[2],@X[-1&7])        if ($Xi==8);
         eval(shift(@insns));           # body_20_39
        &pxor   (@X[0],@X[-4&7]);       # "X[0]"="X[-32]"^"X[-16]"
-       &palignr(@X[2],@X[-2&7],8);     # compose "X[-6]"
+       &punpcklqdq(@X[2],@X[-1&7]);    # compose "X[-6]", was &palignr(@X[2],@X[-2&7],8)
         eval(shift(@insns));
         eval(shift(@insns));
         eval(shift(@insns));           # rol
@@ -624,13 +765,14 @@ sub Xupdate_ssse3_32_79()
          &movdqa       (&QWP(64+16*(($Xi-4)%3),"esp"),@X[-4&7]);       # save X[] to backtrace buffer
         eval(shift(@insns));
         eval(shift(@insns));
+        eval(shift(@insns))            if (@insns[0] =~ /_rol/);
         if ($Xi%5) {
          &movdqa       (@X[4],@X[3]);  # "perpetuate" K_XX_XX...
         } else {                       # ... or load next one
          &movdqa       (@X[4],&QWP(112-16+16*($Xi/5),"esp"));
         }
-         &paddd        (@X[3],@X[-1&7]);
         eval(shift(@insns));           # ror
+         &paddd        (@X[3],@X[-1&7]);
         eval(shift(@insns));
 
        &pxor   (@X[0],@X[2]);          # "X[0]"^="X[-6]"
@@ -645,6 +787,7 @@ sub Xupdate_ssse3_32_79()
         eval(shift(@insns));
         eval(shift(@insns));           # ror
         eval(shift(@insns));
+        eval(shift(@insns))            if (@insns[0] =~ /_rol/);
 
        &pslld  (@X[0],2);
         eval(shift(@insns));           # body_20_39
@@ -656,6 +799,8 @@ sub Xupdate_ssse3_32_79()
         eval(shift(@insns));
         eval(shift(@insns));           # ror
         eval(shift(@insns));
+        eval(shift(@insns))            if (@insns[1] =~ /_rol/);
+        eval(shift(@insns))            if (@insns[0] =~ /_rol/);
 
        &por    (@X[0],@X[2]);          # "X[0]"<<<=2
         eval(shift(@insns));           # body_20_39
@@ -666,7 +811,7 @@ sub Xupdate_ssse3_32_79()
         eval(shift(@insns));
         eval(shift(@insns));
         eval(shift(@insns));           # ror
-         &movdqa       (@X[3],@X[0])   if ($Xi<19);
+         &pshufd       (@X[3],@X[-1],0xee)     if ($Xi<19);    # was &movdqa   (@X[3],@X[0])
         eval(shift(@insns));
 
         foreach (@insns) { eval; }     # remaining instructions
@@ -681,6 +826,12 @@ sub Xuplast_ssse3_80()
   my ($a,$b,$c,$d,$e);
 
         eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
          &paddd        (@X[3],@X[-1&7]);
         eval(shift(@insns));
         eval(shift(@insns));
@@ -717,9 +868,16 @@ sub Xloop_ssse3()
 
         eval(shift(@insns));
         eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
        &pshufb (@X[($Xi-3)&7],@X[2]);
         eval(shift(@insns));
         eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
        &paddd  (@X[($Xi-4)&7],@X[3]);
         eval(shift(@insns));
         eval(shift(@insns));
@@ -728,6 +886,8 @@ sub Xloop_ssse3()
        &movdqa (&QWP(0+16*$Xi,"esp"),@X[($Xi-4)&7]);   # X[]+K xfer to IALU
         eval(shift(@insns));
         eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
        &psubd  (@X[($Xi-4)&7],@X[3]);
 
        foreach (@insns) { eval; }
@@ -743,51 +903,124 @@ sub Xtail_ssse3()
        foreach (@insns) { eval; }
 }
 
-sub body_00_19 () {
+sub body_00_19 () {    # ((c^d)&b)^d
+       # on start @T[0]=(c^d)&b
+       return &body_20_39()    if ($rx==19);   $rx++;
        (
        '($a,$b,$c,$d,$e)=@V;'.
-       '&add   ($e,&DWP(4*($j&15),"esp"));',   # X[]+K xfer
-       '&xor   ($c,$d);',
+       '&$_ror ($b,$j?7:2);',  # $b>>>2
+       '&xor   (@T[0],$d);',
        '&mov   (@T[1],$a);',   # $b in next round
+
+       '&add   ($e,&DWP(4*($j&15),"esp"));',   # X[]+K xfer
+       '&xor   ($b,$c);',      # $c^$d for next round
+
        '&$_rol ($a,5);',
-       '&and   (@T[0],$c);',   # ($b&($c^$d))
-       '&xor   ($c,$d);',      # restore $c
-       '&xor   (@T[0],$d);',
-       '&add   ($e,$a);',
-       '&$_ror ($b,$j?7:2);',  # $b>>>2
-       '&add   ($e,@T[0]);'    .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));'
+       '&add   ($e,@T[0]);',
+       '&and   (@T[1],$b);',   # ($b&($c^$d)) for next round
+
+       '&xor   ($b,$c);',      # restore $b
+       '&add   ($e,$a);'       .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));'
        );
 }
 
-sub body_20_39 () {
+sub body_20_39 () {    # b^d^c
+       # on entry @T[0]=b^d
+       return &body_40_59()    if ($rx==39);   $rx++;
        (
        '($a,$b,$c,$d,$e)=@V;'.
-       '&add   ($e,&DWP(4*($j++&15),"esp"));', # X[]+K xfer
-       '&xor   (@T[0],$d);',   # ($b^$d)
+       '&add   ($e,&DWP(4*($j&15),"esp"));',   # X[]+K xfer
+       '&xor   (@T[0],$d)      if($j==19);'.
+       '&xor   (@T[0],$c)      if($j> 19);',   # ($b^$d^$c)
        '&mov   (@T[1],$a);',   # $b in next round
+
        '&$_rol ($a,5);',
-       '&xor   (@T[0],$c);',   # ($b^$d^$c)
-       '&add   ($e,$a);',
+       '&add   ($e,@T[0]);',
+       '&xor   (@T[1],$c)      if ($j< 79);',  # $b^$d for next round
+
        '&$_ror ($b,7);',       # $b>>>2
-       '&add   ($e,@T[0]);'    .'unshift(@V,pop(@V)); unshift(@T,pop(@T));'
+       '&add   ($e,$a);'       .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));'
        );
 }
 
-sub body_40_59 () {
+sub body_40_59 () {    # ((b^c)&(c^d))^c
+       # on entry @T[0]=(b^c), (c^=d)
+       $rx++;
        (
        '($a,$b,$c,$d,$e)=@V;'.
-       '&mov   (@T[1],$c);',
-       '&xor   ($c,$d);',
-       '&add   ($e,&DWP(4*($j++&15),"esp"));', # X[]+K xfer
-       '&and   (@T[1],$d);',
-       '&and   (@T[0],$c);',   # ($b&($c^$d))
+       '&add   ($e,&DWP(4*($j&15),"esp"));',   # X[]+K xfer
+       '&and   (@T[0],$c)      if ($j>=40);',  # (b^c)&(c^d)
+       '&xor   ($c,$d)         if ($j>=40);',  # restore $c
+
        '&$_ror ($b,7);',       # $b>>>2
-       '&add   ($e,@T[1]);',
-       '&mov   (@T[1],$a);',   # $b in next round
+       '&mov   (@T[1],$a);',   # $b for next round
+       '&xor   (@T[0],$c);',
+
        '&$_rol ($a,5);',
        '&add   ($e,@T[0]);',
-       '&xor   ($c,$d);',      # restore $c
-       '&add   ($e,$a);'       .'unshift(@V,pop(@V)); unshift(@T,pop(@T));'
+       '&xor   (@T[1],$c)      if ($j==59);'.
+       '&xor   (@T[1],$b)      if ($j< 59);',  # b^c for next round
+
+       '&xor   ($b,$c)         if ($j< 59);',  # c^d for next round
+       '&add   ($e,$a);'       .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));'
+       );
+}
+######
+sub bodyx_00_19 () {   # ((c^d)&b)^d
+       # on start @T[0]=(b&c)^(~b&d), $e+=X[]+K
+       return &bodyx_20_39()   if ($rx==19);   $rx++;
+       (
+       '($a,$b,$c,$d,$e)=@V;'.
+
+       '&rorx  ($b,$b,2)                       if ($j==0);'.   # $b>>>2
+       '&rorx  ($b,@T[1],7)                    if ($j!=0);',   # $b>>>2
+       '&lea   ($e,&DWP(0,$e,@T[0]));',
+       '&rorx  (@T[0],$a,5);',
+
+       '&andn  (@T[1],$a,$c);',
+       '&and   ($a,$b)',
+       '&add   ($d,&DWP(4*(($j+1)&15),"esp"));',       # X[]+K xfer
+
+       '&xor   (@T[1],$a)',
+       '&add   ($e,@T[0]);'    .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));'
+       );
+}
+
+sub bodyx_20_39 () {   # b^d^c
+       # on start $b=b^c^d
+       return &bodyx_40_59()   if ($rx==39);   $rx++;
+       (
+       '($a,$b,$c,$d,$e)=@V;'.
+
+       '&add   ($e,($j==19?@T[0]:$b))',
+       '&rorx  ($b,@T[1],7);', # $b>>>2
+       '&rorx  (@T[0],$a,5);',
+
+       '&xor   ($a,$b)                         if ($j<79);',
+       '&add   ($d,&DWP(4*(($j+1)&15),"esp"))  if ($j<79);',   # X[]+K xfer
+       '&xor   ($a,$c)                         if ($j<79);',
+       '&add   ($e,@T[0]);'    .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));'
+       );
+}
+
+sub bodyx_40_59 () {   # ((b^c)&(c^d))^c
+       # on start $b=((b^c)&(c^d))^c
+       return &bodyx_20_39()   if ($rx==59);   $rx++;
+       (
+       '($a,$b,$c,$d,$e)=@V;'.
+
+       '&rorx  (@T[0],$a,5)',
+       '&lea   ($e,&DWP(0,$e,$b))',
+       '&rorx  ($b,@T[1],7)',  # $b>>>2
+       '&add   ($d,&DWP(4*(($j+1)&15),"esp"))',        # X[]+K xfer
+
+       '&mov   (@T[1],$c)',
+       '&xor   ($a,$b)',       # b^c for next round
+       '&xor   (@T[1],$b)',    # c^d for next round
+
+       '&and   ($a,@T[1])',
+       '&add   ($e,@T[0])',
+       '&xor   ($a,$b)'        .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));'
        );
 }
 
@@ -825,10 +1058,14 @@ sub body_40_59 () {
        &mov    (&DWP(4,@T[1]),@T[0]);
        &add    ($E,&DWP(16,@T[1]));
        &mov    (&DWP(8,@T[1]),$C);
-       &mov    ($B,@T[0]);
+       &mov    ($B,$C);
        &mov    (&DWP(12,@T[1]),$D);
+       &xor    ($B,$D);
        &mov    (&DWP(16,@T[1]),$E);
-       &movdqa (@X[0],@X[-3&7]);
+       &mov    (@T[1],@T[0]);
+       &pshufd (@X[0],@X[-4&7],0xee);          # was &movdqa   (@X[0],@X[-3&7]);
+       &and    (@T[0],$B);
+       &mov    ($B,$T[1]);
 
        &jmp    (&label("loop"));
 
@@ -853,6 +1090,8 @@ sub body_40_59 () {
 
 &function_end("_sha1_block_data_order_ssse3");
 
+$rx=0; # reset
+
 if ($ymm) {
 my $Xi=4;                      # 4xSIMD Xupdate round, start pre-seeded
 my @X=map("xmm$_",(4..7,0..3));        # pre-seeded for $Xi=4
@@ -940,8 +1179,11 @@ my $_ror=sub { &shrd(@_[0],@_) };
        &vpaddd (@X[1],@X[-3&7],@X[3]);
        &vpaddd (@X[2],@X[-2&7],@X[3]);
        &vmovdqa(&QWP(0,"esp"),@X[0]);          # X[]+K xfer to IALU
+       &mov    (@T[1],$C);
        &vmovdqa(&QWP(0+16,"esp"),@X[1]);
+       &xor    (@T[1],$D);
        &vmovdqa(&QWP(0+32,"esp"),@X[2]);
+       &and    (@T[0],@T[1]);
        &jmp    (&label("loop"));
 
 sub Xupdate_avx_16_31()                # recall that $Xi starts wtih 4
@@ -1025,7 +1267,7 @@ sub Xupdate_avx_16_31()           # recall that $Xi starts wtih 4
 sub Xupdate_avx_32_79()
 { use integer;
   my $body = shift;
-  my @insns = (&$body,&$body,&$body,&$body);   # 32 to 48 instructions
+  my @insns = (&$body,&$body,&$body,&$body);   # 32 to 44 instructions
   my ($a,$b,$c,$d,$e);
 
        &vpalignr(@X[2],@X[-1&7],@X[-2&7],8);   # compose "X[-6]"
@@ -1188,10 +1430,14 @@ sub Xtail_avx()
        &add    ($D,&DWP(12,@T[1]));
        &mov    (&DWP(4,@T[1]),@T[0]);
        &add    ($E,&DWP(16,@T[1]));
+       &mov    ($B,$C);
        &mov    (&DWP(8,@T[1]),$C);
-       &mov    ($B,@T[0]);
+       &xor    ($B,$D);
        &mov    (&DWP(12,@T[1]),$D);
        &mov    (&DWP(16,@T[1]),$E);
+       &mov    (@T[1],@T[0]);
+       &and    (@T[0],$B);
+       &mov    ($B,@T[1]);
 
        &jmp    (&label("loop"));
 
@@ -1223,6 +1469,7 @@ sub Xtail_avx()
 &data_word(0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc);       # K_40_59
 &data_word(0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6);       # K_60_79
 &data_word(0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f);       # pbswap mask
+&data_byte(0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0);
 }
 &asciz("SHA1 block transform for x86, CRYPTOGAMS by <appro\@openssl.org>");
 
index 33da3e0..b2c3032 100644 (file)
@@ -1,7 +1,7 @@
 #!/usr/bin/env perl
 
 # ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
 # details see http://www.openssl.org/~appro/cryptogams/.
 # Profiler-assisted and platform-specific optimization resulted in 10%
 # improvement on Cortex A8 core and 12.2 cycles per byte.
 
+# September 2013.
+#
+# Add NEON implementation (see sha1-586.pl for background info). On
+# Cortex A8 it was measured to process one byte in 6.7 cycles or >80%
+# faster than integer-only code. Because [fully unrolled] NEON code
+# is ~2.5x larger and there are some redundant instructions executed
+# when processing last block, improvement is not as big for smallest
+# blocks, only ~30%. Snapdragon S4 is a tad faster, 6.4 cycles per
+# byte, which is also >80% faster than integer-only code.
+
+# May 2014.
+#
+# Add ARMv8 code path performing at 2.35 cpb on Apple A7.
+
 while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
 open STDOUT,">$output";
 
@@ -153,12 +167,22 @@ $code=<<___;
 #include "arm_arch.h"
 
 .text
+.code  32
 
 .global        sha1_block_data_order
 .type  sha1_block_data_order,%function
 
-.align 2
+.align 5
 sha1_block_data_order:
+#if __ARM_MAX_ARCH__>=7
+       sub     r3,pc,#8                @ sha1_block_data_order
+       ldr     r12,.LOPENSSL_armcap
+       ldr     r12,[r3,r12]            @ OPENSSL_armcap_P
+       tst     r12,#ARMV8_SHA1
+       bne     .LARMv8
+       tst     r12,#ARMV7_NEON
+       bne     .LNEON
+#endif
        stmdb   sp!,{r4-r12,lr}
        add     $len,$inp,$len,lsl#6    @ $len to point at the end of $inp
        ldmia   $ctx,{$a,$b,$c,$d,$e}
@@ -233,16 +257,427 @@ $code.=<<___;
        moveq   pc,lr                   @ be binary compatible with V4, yet
        bx      lr                      @ interoperable with Thumb ISA:-)
 #endif
-.align 2
+.size  sha1_block_data_order,.-sha1_block_data_order
+
+.align 5
 .LK_00_19:     .word   0x5a827999
 .LK_20_39:     .word   0x6ed9eba1
 .LK_40_59:     .word   0x8f1bbcdc
 .LK_60_79:     .word   0xca62c1d6
-.size  sha1_block_data_order,.-sha1_block_data_order
-.asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
-.align 2
+#if __ARM_MAX_ARCH__>=7
+.LOPENSSL_armcap:
+.word  OPENSSL_armcap_P-sha1_block_data_order
+#endif
+.asciz "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
+.align 5
+___
+#####################################################################
+# NEON stuff
+#
+{{{
+my @V=($a,$b,$c,$d,$e);
+my ($K_XX_XX,$Ki,$t0,$t1,$Xfer,$saved_sp)=map("r$_",(8..12,14));
+my $Xi=4;
+my @X=map("q$_",(8..11,0..3));
+my @Tx=("q12","q13");
+my ($K,$zero)=("q14","q15");
+my $j=0;
+
+sub AUTOLOAD()          # thunk [simplified] x86-style perlasm
+{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./;
+  my $arg = pop;
+    $arg = "#$arg" if ($arg*1 eq $arg);
+    $code .= "\t$opcode\t".join(',',@_,$arg)."\n";
+}
+
+sub body_00_19 () {
+       (
+       '($a,$b,$c,$d,$e)=@V;'.         # '$code.="@ $j\n";'.
+       '&bic   ($t0,$d,$b)',
+       '&add   ($e,$e,$Ki)',           # e+=X[i]+K
+       '&and   ($t1,$c,$b)',
+       '&ldr   ($Ki,sprintf "[sp,#%d]",4*(($j+1)&15))',
+       '&add   ($e,$e,$a,"ror#27")',   # e+=ROR(A,27)
+       '&eor   ($t1,$t1,$t0)',         # F_00_19
+       '&mov   ($b,$b,"ror#2")',       # b=ROR(b,2)
+       '&add   ($e,$e,$t1);'.          # e+=F_00_19
+       '$j++;  unshift(@V,pop(@V));'
+       )
+}
+sub body_20_39 () {
+       (
+       '($a,$b,$c,$d,$e)=@V;'.         # '$code.="@ $j\n";'.
+       '&eor   ($t0,$b,$d)',
+       '&add   ($e,$e,$Ki)',           # e+=X[i]+K
+       '&ldr   ($Ki,sprintf "[sp,#%d]",4*(($j+1)&15)) if ($j<79)',
+       '&eor   ($t1,$t0,$c)',          # F_20_39
+       '&add   ($e,$e,$a,"ror#27")',   # e+=ROR(A,27)
+       '&mov   ($b,$b,"ror#2")',       # b=ROR(b,2)
+       '&add   ($e,$e,$t1);'.          # e+=F_20_39
+       '$j++;  unshift(@V,pop(@V));'
+       )
+}
+sub body_40_59 () {
+       (
+       '($a,$b,$c,$d,$e)=@V;'.         # '$code.="@ $j\n";'.
+       '&add   ($e,$e,$Ki)',           # e+=X[i]+K
+       '&and   ($t0,$c,$d)',
+       '&ldr   ($Ki,sprintf "[sp,#%d]",4*(($j+1)&15))',
+       '&add   ($e,$e,$a,"ror#27")',   # e+=ROR(A,27)
+       '&eor   ($t1,$c,$d)',
+       '&add   ($e,$e,$t0)',
+       '&and   ($t1,$t1,$b)',
+       '&mov   ($b,$b,"ror#2")',       # b=ROR(b,2)
+       '&add   ($e,$e,$t1);'.          # e+=F_40_59
+       '$j++;  unshift(@V,pop(@V));'
+       )
+}
+
+sub Xupdate_16_31 ()
+{ use integer;
+  my $body = shift;
+  my @insns = (&$body,&$body,&$body,&$body);
+  my ($a,$b,$c,$d,$e);
+
+       &vext_8         (@X[0],@X[-4&7],@X[-3&7],8);    # compose "X[-14]" in "X[0]"
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+         &vadd_i32     (@Tx[1],@X[-1&7],$K);
+        eval(shift(@insns));
+         &vld1_32      ("{$K\[]}","[$K_XX_XX,:32]!")   if ($Xi%5==0);
+        eval(shift(@insns));
+       &vext_8         (@Tx[0],@X[-1&7],$zero,4);      # "X[-3]", 3 words
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &veor           (@X[0],@X[0],@X[-4&7]);         # "X[0]"^="X[-16]"
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &veor           (@Tx[0],@Tx[0],@X[-2&7]);       # "X[-3]"^"X[-8]"
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &veor           (@Tx[0],@Tx[0],@X[0]);          # "X[0]"^="X[-3]"^"X[-8]
+        eval(shift(@insns));
+        eval(shift(@insns));
+         &vst1_32      ("{@Tx[1]}","[$Xfer,:128]!");   # X[]+K xfer
+         &sub          ($Xfer,$Xfer,64)                if ($Xi%4==0);
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &vext_8         (@Tx[1],$zero,@Tx[0],4);        # "X[0]"<<96, extract one dword
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &vadd_i32       (@X[0],@Tx[0],@Tx[0]);
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &vsri_32        (@X[0],@Tx[0],31);              # "X[0]"<<<=1
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &vshr_u32       (@Tx[0],@Tx[1],30);
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &vshl_u32       (@Tx[1],@Tx[1],2);
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &veor           (@X[0],@X[0],@Tx[0]);
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &veor           (@X[0],@X[0],@Tx[1]);           # "X[0]"^=("X[0]">>96)<<<2
+
+       foreach (@insns) { eval; }      # remaining instructions [if any]
+
+  $Xi++;       push(@X,shift(@X));     # "rotate" X[]
+}
+
+sub Xupdate_32_79 ()
+{ use integer;
+  my $body = shift;
+  my @insns = (&$body,&$body,&$body,&$body);
+  my ($a,$b,$c,$d,$e);
+
+       &vext_8         (@Tx[0],@X[-2&7],@X[-1&7],8);   # compose "X[-6]"
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &veor           (@X[0],@X[0],@X[-4&7]);         # "X[0]"="X[-32]"^"X[-16]"
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &veor           (@X[0],@X[0],@X[-7&7]);         # "X[0]"^="X[-28]"
+        eval(shift(@insns));
+        eval(shift(@insns));
+         &vadd_i32     (@Tx[1],@X[-1&7],$K);
+        eval(shift(@insns));
+         &vld1_32      ("{$K\[]}","[$K_XX_XX,:32]!")   if ($Xi%5==0);
+        eval(shift(@insns));
+       &veor           (@Tx[0],@Tx[0],@X[0]);          # "X[-6]"^="X[0]"
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &vshr_u32       (@X[0],@Tx[0],30);
+        eval(shift(@insns));
+        eval(shift(@insns));
+         &vst1_32      ("{@Tx[1]}","[$Xfer,:128]!");   # X[]+K xfer
+         &sub          ($Xfer,$Xfer,64)                if ($Xi%4==0);
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &vsli_32        (@X[0],@Tx[0],2);               # "X[0]"="X[-6]"<<<2
+
+       foreach (@insns) { eval; }      # remaining instructions [if any]
+
+  $Xi++;       push(@X,shift(@X));     # "rotate" X[]
+}
+
+sub Xuplast_80 ()
+{ use integer;
+  my $body = shift;
+  my @insns = (&$body,&$body,&$body,&$body);
+  my ($a,$b,$c,$d,$e);
+
+       &vadd_i32       (@Tx[1],@X[-1&7],$K);
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &vst1_32        ("{@Tx[1]}","[$Xfer,:128]!");
+       &sub            ($Xfer,$Xfer,64);
+
+       &teq            ($inp,$len);
+       &sub            ($K_XX_XX,$K_XX_XX,16); # rewind $K_XX_XX
+       &subeq          ($inp,$inp,64);         # reload last block to avoid SEGV
+       &vld1_8         ("{@X[-4&7]-@X[-3&7]}","[$inp]!");
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &vld1_8         ("{@X[-2&7]-@X[-1&7]}","[$inp]!");
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &vld1_32        ("{$K\[]}","[$K_XX_XX,:32]!");  # load K_00_19
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &vrev32_8       (@X[-4&7],@X[-4&7]);
+
+       foreach (@insns) { eval; }              # remaining instructions
+
+   $Xi=0;
+}
+
+sub Xloop()
+{ use integer;
+  my $body = shift;
+  my @insns = (&$body,&$body,&$body,&$body);
+  my ($a,$b,$c,$d,$e);
+
+       &vrev32_8       (@X[($Xi-3)&7],@X[($Xi-3)&7]);
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &vadd_i32       (@X[$Xi&7],@X[($Xi-4)&7],$K);
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &vst1_32        ("{@X[$Xi&7]}","[$Xfer,:128]!");# X[]+K xfer to IALU
+
+       foreach (@insns) { eval; }
+
+  $Xi++;
+}
+
+$code.=<<___;
+#if __ARM_MAX_ARCH__>=7
+.arch  armv7-a
+.fpu   neon
+
+.type  sha1_block_data_order_neon,%function
+.align 4
+sha1_block_data_order_neon:
+.LNEON:
+       stmdb   sp!,{r4-r12,lr}
+       add     $len,$inp,$len,lsl#6    @ $len to point at the end of $inp
+       @ dmb                           @ errata #451034 on early Cortex A8
+       @ vstmdb        sp!,{d8-d15}    @ ABI specification says so
+       mov     $saved_sp,sp
+       sub     sp,sp,#64               @ alloca
+       adr     $K_XX_XX,.LK_00_19
+       bic     sp,sp,#15               @ align for 128-bit stores
+
+       ldmia   $ctx,{$a,$b,$c,$d,$e}   @ load context
+       mov     $Xfer,sp
+
+       vld1.8          {@X[-4&7]-@X[-3&7]},[$inp]!     @ handles unaligned
+       veor            $zero,$zero,$zero
+       vld1.8          {@X[-2&7]-@X[-1&7]},[$inp]!
+       vld1.32         {${K}\[]},[$K_XX_XX,:32]!       @ load K_00_19
+       vrev32.8        @X[-4&7],@X[-4&7]               @ yes, even on
+       vrev32.8        @X[-3&7],@X[-3&7]               @ big-endian...
+       vrev32.8        @X[-2&7],@X[-2&7]
+       vadd.i32        @X[0],@X[-4&7],$K
+       vrev32.8        @X[-1&7],@X[-1&7]
+       vadd.i32        @X[1],@X[-3&7],$K
+       vst1.32         {@X[0]},[$Xfer,:128]!
+       vadd.i32        @X[2],@X[-2&7],$K
+       vst1.32         {@X[1]},[$Xfer,:128]!
+       vst1.32         {@X[2]},[$Xfer,:128]!
+       ldr             $Ki,[sp]                        @ big RAW stall
+
+.Loop_neon:
+___
+       &Xupdate_16_31(\&body_00_19);
+       &Xupdate_16_31(\&body_00_19);
+       &Xupdate_16_31(\&body_00_19);
+       &Xupdate_16_31(\&body_00_19);
+       &Xupdate_32_79(\&body_00_19);
+       &Xupdate_32_79(\&body_20_39);
+       &Xupdate_32_79(\&body_20_39);
+       &Xupdate_32_79(\&body_20_39);
+       &Xupdate_32_79(\&body_20_39);
+       &Xupdate_32_79(\&body_20_39);
+       &Xupdate_32_79(\&body_40_59);
+       &Xupdate_32_79(\&body_40_59);
+       &Xupdate_32_79(\&body_40_59);
+       &Xupdate_32_79(\&body_40_59);
+       &Xupdate_32_79(\&body_40_59);
+       &Xupdate_32_79(\&body_20_39);
+       &Xuplast_80(\&body_20_39);
+       &Xloop(\&body_20_39);
+       &Xloop(\&body_20_39);
+       &Xloop(\&body_20_39);
+$code.=<<___;
+       ldmia   $ctx,{$Ki,$t0,$t1,$Xfer}        @ accumulate context
+       add     $a,$a,$Ki
+       ldr     $Ki,[$ctx,#16]
+       add     $b,$b,$t0
+       add     $c,$c,$t1
+       add     $d,$d,$Xfer
+       moveq   sp,$saved_sp
+       add     $e,$e,$Ki
+       ldrne   $Ki,[sp]
+       stmia   $ctx,{$a,$b,$c,$d,$e}
+       addne   $Xfer,sp,#3*16
+       bne     .Loop_neon
+
+       @ vldmia        sp!,{d8-d15}
+       ldmia   sp!,{r4-r12,pc}
+.size  sha1_block_data_order_neon,.-sha1_block_data_order_neon
+#endif
+___
+}}}
+#####################################################################
+# ARMv8 stuff
+#
+{{{
+my ($ABCD,$E,$E0,$E1)=map("q$_",(0..3));
+my @MSG=map("q$_",(4..7));
+my @Kxx=map("q$_",(8..11));
+my ($W0,$W1,$ABCD_SAVE)=map("q$_",(12..14));
+
+$code.=<<___;
+#if __ARM_MAX_ARCH__>=7
+.type  sha1_block_data_order_armv8,%function
+.align 5
+sha1_block_data_order_armv8:
+.LARMv8:
+       vstmdb  sp!,{d8-d15}            @ ABI specification says so
+
+       veor    $E,$E,$E
+       adr     r3,.LK_00_19
+       vld1.32 {$ABCD},[$ctx]!
+       vld1.32 {$E\[0]},[$ctx]
+       sub     $ctx,$ctx,#16
+       vld1.32 {@Kxx[0]\[]},[r3,:32]!
+       vld1.32 {@Kxx[1]\[]},[r3,:32]!
+       vld1.32 {@Kxx[2]\[]},[r3,:32]!
+       vld1.32 {@Kxx[3]\[]},[r3,:32]
+
+.Loop_v8:
+       vld1.8          {@MSG[0]-@MSG[1]},[$inp]!
+       vld1.8          {@MSG[2]-@MSG[3]},[$inp]!
+       vrev32.8        @MSG[0],@MSG[0]
+       vrev32.8        @MSG[1],@MSG[1]
+
+       vadd.i32        $W0,@Kxx[0],@MSG[0]
+       vrev32.8        @MSG[2],@MSG[2]
+       vmov            $ABCD_SAVE,$ABCD        @ offload
+       subs            $len,$len,#1
+
+       vadd.i32        $W1,@Kxx[0],@MSG[1]
+       vrev32.8        @MSG[3],@MSG[3]
+       sha1h           $E1,$ABCD               @ 0
+       sha1c           $ABCD,$E,$W0
+       vadd.i32        $W0,@Kxx[$j],@MSG[2]
+       sha1su0         @MSG[0],@MSG[1],@MSG[2]
+___
+for ($j=0,$i=1;$i<20-3;$i++) {
+my $f=("c","p","m","p")[$i/5];
+$code.=<<___;
+       sha1h           $E0,$ABCD               @ $i
+       sha1$f          $ABCD,$E1,$W1
+       vadd.i32        $W1,@Kxx[$j],@MSG[3]
+       sha1su1         @MSG[0],@MSG[3]
+___
+$code.=<<___ if ($i<20-4);
+       sha1su0         @MSG[1],@MSG[2],@MSG[3]
 ___
+       ($E0,$E1)=($E1,$E0);    ($W0,$W1)=($W1,$W0);
+       push(@MSG,shift(@MSG)); $j++ if ((($i+3)%5)==0);
+}
+$code.=<<___;
+       sha1h           $E0,$ABCD               @ $i
+       sha1p           $ABCD,$E1,$W1
+       vadd.i32        $W1,@Kxx[$j],@MSG[3]
+
+       sha1h           $E1,$ABCD               @ 18
+       sha1p           $ABCD,$E0,$W0
+
+       sha1h           $E0,$ABCD               @ 19
+       sha1p           $ABCD,$E1,$W1
+
+       vadd.i32        $E,$E,$E0
+       vadd.i32        $ABCD,$ABCD,$ABCD_SAVE
+       bne             .Loop_v8
+
+       vst1.32         {$ABCD},[$ctx]!
+       vst1.32         {$E\[0]},[$ctx]
+
+       vldmia  sp!,{d8-d15}
+       ret                                     @ bx lr
+.size  sha1_block_data_order_armv8,.-sha1_block_data_order_armv8
+#endif
+___
+}}}
+$code.=<<___;
+#if __ARM_MAX_ARCH__>=7
+.comm  OPENSSL_armcap_P,4,4
+#endif
+___
+
+{   my  %opcode = (
+       "sha1c"         => 0xf2000c40,  "sha1p"         => 0xf2100c40,
+       "sha1m"         => 0xf2200c40,  "sha1su0"       => 0xf2300c40,
+       "sha1h"         => 0xf3b902c0,  "sha1su1"       => 0xf3ba0380   );
+
+    sub unsha1 {
+       my ($mnemonic,$arg)=@_;
+
+       if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) {
+           my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
+                                        |(($2&7)<<17)|(($2&8)<<4)
+                                        |(($3&7)<<1) |(($3&8)<<2);
+           # since ARMv7 instructions are always encoded little-endian.
+           # correct solution is to use .inst directive, but older
+           # assemblers don't implement it:-(
+           sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s",
+                       $word&0xff,($word>>8)&0xff,
+                       ($word>>16)&0xff,($word>>24)&0xff,
+                       $mnemonic,$arg;
+       }
+    }
+}
+
+foreach (split($/,$code)) {
+       s/{q([0-9]+)\[\]}/sprintf "{d%d[],d%d[]}",2*$1,2*$1+1/eo        or
+       s/{q([0-9]+)\[0\]}/sprintf "{d%d[0]}",2*$1/eo;
+
+       s/\b(sha1\w+)\s+(q.*)/unsha1($1,$2)/geo;
+
+       s/\bret\b/bx    lr/o            or
+       s/\bbx\s+lr\b/.word\t0xe12fff1e/o;      # make it possible to compile with -march=armv4
+
+       print $_,$/;
+}
 
-$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;   # make it possible to compile with -march=armv4
-print $code;
 close STDOUT; # enforce flush
diff --git a/crypto/sha/asm/sha1-armv8.pl b/crypto/sha/asm/sha1-armv8.pl
new file mode 100644 (file)
index 0000000..c04432a
--- /dev/null
@@ -0,0 +1,338 @@
+#!/usr/bin/env perl
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# SHA1 for ARMv8.
+#
+# Performance in cycles per processed byte and improvement coefficient
+# over code generated with "default" compiler:
+#
+#              hardware-assisted       software(*)
+# Apple A7     2.31                    4.13 (+14%)
+# Cortex-A53   2.24                    8.03 (+97%)
+# Cortex-A57   2.35                    7.88 (+74%)
+# Denver       2.13                    3.97 (+0%)(**)
+# X-Gene                               8.80 (+200%)
+#
+# (*)  Software results are presented mostly for reference purposes.
+# (**) Keep in mind that Denver relies on binary translation, which
+#      optimizes compiler output at run-time.
+
+$flavour = shift;
+open STDOUT,">".shift;
+
+($ctx,$inp,$num)=("x0","x1","x2");
+@Xw=map("w$_",(3..17,19));
+@Xx=map("x$_",(3..17,19));
+@V=($A,$B,$C,$D,$E)=map("w$_",(20..24));
+($t0,$t1,$t2,$K)=map("w$_",(25..28));
+
+
+sub BODY_00_19 {
+my ($i,$a,$b,$c,$d,$e)=@_;
+my $j=($i+2)&15;
+
+$code.=<<___ if ($i<15 && !($i&1));
+       lsr     @Xx[$i+1],@Xx[$i],#32
+___
+$code.=<<___ if ($i<14 && !($i&1));
+       ldr     @Xx[$i+2],[$inp,#`($i+2)*4-64`]
+___
+$code.=<<___ if ($i<14 && ($i&1));
+#ifdef __ARMEB__
+       ror     @Xx[$i+1],@Xx[$i+1],#32
+#else
+       rev32   @Xx[$i+1],@Xx[$i+1]
+#endif
+___
+$code.=<<___ if ($i<14);
+       bic     $t0,$d,$b
+       and     $t1,$c,$b
+       ror     $t2,$a,#27
+       add     $d,$d,$K                // future e+=K
+       orr     $t0,$t0,$t1
+       add     $e,$e,$t2               // e+=rot(a,5)
+       ror     $b,$b,#2
+       add     $d,$d,@Xw[($i+1)&15]    // future e+=X[i]
+       add     $e,$e,$t0               // e+=F(b,c,d)
+___
+$code.=<<___ if ($i==19);
+       movz    $K,#0xeba1
+       movk    $K,#0x6ed9,lsl#16
+___
+$code.=<<___ if ($i>=14);
+        eor    @Xw[$j],@Xw[$j],@Xw[($j+2)&15]
+       bic     $t0,$d,$b
+       and     $t1,$c,$b
+       ror     $t2,$a,#27
+        eor    @Xw[$j],@Xw[$j],@Xw[($j+8)&15]
+       add     $d,$d,$K                // future e+=K
+       orr     $t0,$t0,$t1
+       add     $e,$e,$t2               // e+=rot(a,5)
+        eor    @Xw[$j],@Xw[$j],@Xw[($j+13)&15]
+       ror     $b,$b,#2
+       add     $d,$d,@Xw[($i+1)&15]    // future e+=X[i]
+       add     $e,$e,$t0               // e+=F(b,c,d)
+        ror    @Xw[$j],@Xw[$j],#31
+___
+}
+
+sub BODY_40_59 {
+my ($i,$a,$b,$c,$d,$e)=@_;
+my $j=($i+2)&15;
+
+$code.=<<___ if ($i==59);
+       movz    $K,#0xc1d6
+       movk    $K,#0xca62,lsl#16
+___
+$code.=<<___;
+       orr     $t0,$b,$c
+       and     $t1,$b,$c
+        eor    @Xw[$j],@Xw[$j],@Xw[($j+2)&15]
+       ror     $t2,$a,#27
+       and     $t0,$t0,$d
+       add     $d,$d,$K                // future e+=K
+        eor    @Xw[$j],@Xw[$j],@Xw[($j+8)&15]
+       add     $e,$e,$t2               // e+=rot(a,5)
+       orr     $t0,$t0,$t1
+       ror     $b,$b,#2
+        eor    @Xw[$j],@Xw[$j],@Xw[($j+13)&15]
+       add     $d,$d,@Xw[($i+1)&15]    // future e+=X[i]
+       add     $e,$e,$t0               // e+=F(b,c,d)
+        ror    @Xw[$j],@Xw[$j],#31
+___
+}
+
+sub BODY_20_39 {
+my ($i,$a,$b,$c,$d,$e)=@_;
+my $j=($i+2)&15;
+
+$code.=<<___ if ($i==39);
+       movz    $K,#0xbcdc
+       movk    $K,#0x8f1b,lsl#16
+___
+$code.=<<___ if ($i<78);
+        eor    @Xw[$j],@Xw[$j],@Xw[($j+2)&15]
+       eor     $t0,$d,$b
+       ror     $t2,$a,#27
+       add     $d,$d,$K                // future e+=K
+        eor    @Xw[$j],@Xw[$j],@Xw[($j+8)&15]
+       eor     $t0,$t0,$c
+       add     $e,$e,$t2               // e+=rot(a,5)
+       ror     $b,$b,#2
+        eor    @Xw[$j],@Xw[$j],@Xw[($j+13)&15]
+       add     $d,$d,@Xw[($i+1)&15]    // future e+=X[i]
+       add     $e,$e,$t0               // e+=F(b,c,d)
+        ror    @Xw[$j],@Xw[$j],#31
+___
+$code.=<<___ if ($i==78);
+       ldp     @Xw[1],@Xw[2],[$ctx]
+       eor     $t0,$d,$b
+       ror     $t2,$a,#27
+       add     $d,$d,$K                // future e+=K
+       eor     $t0,$t0,$c
+       add     $e,$e,$t2               // e+=rot(a,5)
+       ror     $b,$b,#2
+       add     $d,$d,@Xw[($i+1)&15]    // future e+=X[i]
+       add     $e,$e,$t0               // e+=F(b,c,d)
+___
+$code.=<<___ if ($i==79);
+       ldp     @Xw[3],@Xw[4],[$ctx,#8]
+       eor     $t0,$d,$b
+       ror     $t2,$a,#27
+       eor     $t0,$t0,$c
+       add     $e,$e,$t2               // e+=rot(a,5)
+       ror     $b,$b,#2
+       ldr     @Xw[5],[$ctx,#16]
+       add     $e,$e,$t0               // e+=F(b,c,d)
+___
+}
+
+$code.=<<___;
+#include "arm_arch.h"
+
+.text
+
+.globl sha1_block_data_order
+.type  sha1_block_data_order,%function
+.align 6
+sha1_block_data_order:
+       ldr     x16,.LOPENSSL_armcap_P
+       adr     x17,.LOPENSSL_armcap_P
+       add     x16,x16,x17
+       ldr     w16,[x16]
+       tst     w16,#ARMV8_SHA1
+       b.ne    .Lv8_entry
+
+       stp     x29,x30,[sp,#-96]!
+       add     x29,sp,#0
+       stp     x19,x20,[sp,#16]
+       stp     x21,x22,[sp,#32]
+       stp     x23,x24,[sp,#48]
+       stp     x25,x26,[sp,#64]
+       stp     x27,x28,[sp,#80]
+
+       ldp     $A,$B,[$ctx]
+       ldp     $C,$D,[$ctx,#8]
+       ldr     $E,[$ctx,#16]
+
+.Loop:
+       ldr     @Xx[0],[$inp],#64
+       movz    $K,#0x7999
+       sub     $num,$num,#1
+       movk    $K,#0x5a82,lsl#16
+#ifdef __ARMEB__
+       ror     $Xx[0],@Xx[0],#32
+#else
+       rev32   @Xx[0],@Xx[0]
+#endif
+       add     $E,$E,$K                // warm it up
+       add     $E,$E,@Xw[0]
+___
+for($i=0;$i<20;$i++)   { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
+for(;$i<40;$i++)       { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
+for(;$i<60;$i++)       { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
+for(;$i<80;$i++)       { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+       add     $B,$B,@Xw[2]
+       add     $C,$C,@Xw[3]
+       add     $A,$A,@Xw[1]
+       add     $D,$D,@Xw[4]
+       add     $E,$E,@Xw[5]
+       stp     $A,$B,[$ctx]
+       stp     $C,$D,[$ctx,#8]
+       str     $E,[$ctx,#16]
+       cbnz    $num,.Loop
+
+       ldp     x19,x20,[sp,#16]
+       ldp     x21,x22,[sp,#32]
+       ldp     x23,x24,[sp,#48]
+       ldp     x25,x26,[sp,#64]
+       ldp     x27,x28,[sp,#80]
+       ldr     x29,[sp],#96
+       ret
+.size  sha1_block_data_order,.-sha1_block_data_order
+___
+{{{
+my ($ABCD,$E,$E0,$E1)=map("v$_.16b",(0..3));
+my @MSG=map("v$_.16b",(4..7));
+my @Kxx=map("v$_.4s",(16..19));
+my ($W0,$W1)=("v20.4s","v21.4s");
+my $ABCD_SAVE="v22.16b";
+
+$code.=<<___;
+.type  sha1_block_armv8,%function
+.align 6
+sha1_block_armv8:
+.Lv8_entry:
+       stp     x29,x30,[sp,#-16]!
+       add     x29,sp,#0
+
+       adr     x4,.Lconst
+       eor     $E,$E,$E
+       ld1.32  {$ABCD},[$ctx],#16
+       ld1.32  {$E}[0],[$ctx]
+       sub     $ctx,$ctx,#16
+       ld1.32  {@Kxx[0]-@Kxx[3]},[x4]
+
+.Loop_hw:
+       ld1     {@MSG[0]-@MSG[3]},[$inp],#64
+       sub     $num,$num,#1
+       rev32   @MSG[0],@MSG[0]
+       rev32   @MSG[1],@MSG[1]
+
+       add.i32 $W0,@Kxx[0],@MSG[0]
+       rev32   @MSG[2],@MSG[2]
+       orr     $ABCD_SAVE,$ABCD,$ABCD  // offload
+
+       add.i32 $W1,@Kxx[0],@MSG[1]
+       rev32   @MSG[3],@MSG[3]
+       sha1h   $E1,$ABCD
+       sha1c   $ABCD,$E,$W0            // 0
+       add.i32 $W0,@Kxx[$j],@MSG[2]
+       sha1su0 @MSG[0],@MSG[1],@MSG[2]
+___
+for ($j=0,$i=1;$i<20-3;$i++) {
+my $f=("c","p","m","p")[$i/5];
+$code.=<<___;
+       sha1h   $E0,$ABCD               // $i
+       sha1$f  $ABCD,$E1,$W1
+       add.i32 $W1,@Kxx[$j],@MSG[3]
+       sha1su1 @MSG[0],@MSG[3]
+___
+$code.=<<___ if ($i<20-4);
+       sha1su0 @MSG[1],@MSG[2],@MSG[3]
+___
+       ($E0,$E1)=($E1,$E0);            ($W0,$W1)=($W1,$W0);
+       push(@MSG,shift(@MSG));         $j++ if ((($i+3)%5)==0);
+}
+$code.=<<___;
+       sha1h   $E0,$ABCD               // $i
+       sha1p   $ABCD,$E1,$W1
+       add.i32 $W1,@Kxx[$j],@MSG[3]
+
+       sha1h   $E1,$ABCD               // 18
+       sha1p   $ABCD,$E0,$W0
+
+       sha1h   $E0,$ABCD               // 19
+       sha1p   $ABCD,$E1,$W1
+
+       add.i32 $E,$E,$E0
+       add.i32 $ABCD,$ABCD,$ABCD_SAVE
+
+       cbnz    $num,.Loop_hw
+
+       st1.32  {$ABCD},[$ctx],#16
+       st1.32  {$E}[0],[$ctx]
+
+       ldr     x29,[sp],#16
+       ret
+.size  sha1_block_armv8,.-sha1_block_armv8
+.align 6
+.Lconst:
+.long  0x5a827999,0x5a827999,0x5a827999,0x5a827999     //K_00_19
+.long  0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1     //K_20_39
+.long  0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc     //K_40_59
+.long  0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6     //K_60_79
+.LOPENSSL_armcap_P:
+.quad  OPENSSL_armcap_P-.
+.asciz "SHA1 block transform for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
+.align 2
+.comm  OPENSSL_armcap_P,4,4
+___
+}}}
+
+{   my %opcode = (
+       "sha1c"         => 0x5e000000,  "sha1p"         => 0x5e001000,
+       "sha1m"         => 0x5e002000,  "sha1su0"       => 0x5e003000,
+       "sha1h"         => 0x5e280800,  "sha1su1"       => 0x5e281800   );
+
+    sub unsha1 {
+       my ($mnemonic,$arg)=@_;
+
+       $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+))?/o
+       &&
+       sprintf ".inst\t0x%08x\t//%s %s",
+                       $opcode{$mnemonic}|$1|($2<<5)|($3<<16),
+                       $mnemonic,$arg;
+    }
+}
+
+foreach(split("\n",$code)) {
+
+       s/\`([^\`]*)\`/eval($1)/geo;
+
+       s/\b(sha1\w+)\s+([qv].*)/unsha1($1,$2)/geo;
+
+       s/\.\w?32\b//o          and s/\.16b/\.4s/go;
+       m/(ld|st)1[^\[]+\[0\]/o and s/\.4s/\.s/go;
+
+       print $_,"\n";
+}
+
+close STDOUT;
diff --git a/crypto/sha/asm/sha1-mb-x86_64.pl b/crypto/sha/asm/sha1-mb-x86_64.pl
new file mode 100644 (file)
index 0000000..a8ee075
--- /dev/null
@@ -0,0 +1,1574 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+
+# Multi-buffer SHA1 procedure processes n buffers in parallel by
+# placing buffer data to designated lane of SIMD register. n is
+# naturally limited to 4 on pre-AVX2 processors and to 8 on
+# AVX2-capable processors such as Haswell.
+#
+#              this    +aesni(i)       sha1    aesni-sha1      gain(iv)
+# -------------------------------------------------------------------
+# Westmere(ii) 10.7/n  +1.28=3.96(n=4) 5.30    6.66            +68%
+# Atom(ii)     18.1/n  +3.93=8.46(n=4) 9.37    12.8            +51%
+# Sandy Bridge (8.16   +5.15=13.3)/n   4.99    5.98            +80%
+# Ivy Bridge   (8.08   +5.14=13.2)/n   4.60    5.54            +68%
+# Haswell(iii) (8.96   +5.00=14.0)/n   3.57    4.55            +160%
+# Bulldozer    (9.76   +5.76=15.5)/n   5.95    6.37            +64%
+#
+# (i)  multi-block CBC encrypt with 128-bit key;
+# (ii) (HASH+AES)/n does not apply to Westmere for n>3 and Atom,
+#      because of lower AES-NI instruction throughput;
+# (iii)        "this" is for n=8, when we gather twice as much data, result
+#      for n=4 is 8.00+4.44=12.4;
+# (iv) presented improvement coefficients are asymptotic limits and
+#      in real-life application are somewhat lower, e.g. for 2KB
+#      fragments they range from 30% to 100% (on Haswell);
+
+$flavour = shift;
+$output  = shift;
+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
+
+$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
+die "can't locate x86_64-xlate.pl";
+
+$avx=0;
+
+if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
+               =~ /GNU assembler version ([2-9]\.[0-9]+)/) {
+       $avx = ($1>=2.19) + ($1>=2.22);
+}
+
+if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
+          `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) {
+       $avx = ($1>=2.09) + ($1>=2.10);
+}
+
+if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
+          `ml64 2>&1` =~ /Version ([0-9]+)\./) {
+       $avx = ($1>=10) + ($1>=11);
+}
+
+if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/) {
+       $avx = ($2>=3.0) + ($2>3.0);
+}
+
+open OUT,"| \"$^X\" $xlate $flavour $output";
+*STDOUT=*OUT;
+
+# void sha1_multi_block (
+#     struct { unsigned int A[8];
+#              unsigned int B[8];
+#              unsigned int C[8];
+#              unsigned int D[8];
+#              unsigned int E[8];      } *ctx,
+#     struct { void *ptr; int blocks;  } inp[8],
+#     int num);                /* 1 or 2 */
+#
+$ctx="%rdi";   # 1st arg
+$inp="%rsi";   # 2nd arg
+$num="%edx";
+@ptr=map("%r$_",(8..11));
+$Tbl="%rbp";
+
+@V=($A,$B,$C,$D,$E)=map("%xmm$_",(0..4));
+($t0,$t1,$t2,$t3,$tx)=map("%xmm$_",(5..9));
+@Xi=map("%xmm$_",(10..14));
+$K="%xmm15";
+
+if (1) {
+    # Atom-specific optimization aiming to eliminate pshufb with high
+    # registers [and thus get rid of 48 cycles accumulated penalty] 
+    @Xi=map("%xmm$_",(0..4));
+    ($tx,$t0,$t1,$t2,$t3)=map("%xmm$_",(5..9));
+    @V=($A,$B,$C,$D,$E)=map("%xmm$_",(10..14));
+}
+
+$REG_SZ=16;
+
+sub Xi_off {
+my $off = shift;
+
+    $off %= 16; $off *= $REG_SZ;
+    $off<256 ? "$off-128(%rax)" : "$off-256-128(%rbx)";
+}
+
+sub BODY_00_19 {
+my ($i,$a,$b,$c,$d,$e)=@_;
+my $j=$i+1;
+my $k=$i+2;
+
+# Loads are performed 2+3/4 iterations in advance. 3/4 means that out
+# of 4 words you would expect to be loaded per given iteration one is
+# spilled to next iteration. In other words indices in four input
+# streams are distributed as following:
+#
+# $i==0:       0,0,0,0,1,1,1,1,2,2,2,
+# $i==1:       2,3,3,3,
+# $i==2:       3,4,4,4,
+# ...
+# $i==13:      14,15,15,15,
+# $i==14:      15
+# 
+# Then at $i==15 Xupdate is applied one iteration in advance...
+$code.=<<___ if ($i==0);
+       movd            (@ptr[0]),@Xi[0]
+        lea            `16*4`(@ptr[0]),@ptr[0]
+       movd            (@ptr[1]),@Xi[2]        # borrow @Xi[2]
+        lea            `16*4`(@ptr[1]),@ptr[1]
+       movd            (@ptr[2]),@Xi[3]        # borrow @Xi[3]
+        lea            `16*4`(@ptr[2]),@ptr[2]
+       movd            (@ptr[3]),@Xi[4]        # borrow @Xi[4]
+        lea            `16*4`(@ptr[3]),@ptr[3]
+       punpckldq       @Xi[3],@Xi[0]
+        movd           `4*$j-16*4`(@ptr[0]),@Xi[1]
+       punpckldq       @Xi[4],@Xi[2]
+        movd           `4*$j-16*4`(@ptr[1]),$t3
+       punpckldq       @Xi[2],@Xi[0]
+        movd           `4*$j-16*4`(@ptr[2]),$t2
+       pshufb          $tx,@Xi[0]
+___
+$code.=<<___ if ($i<14);                       # just load input
+        movd           `4*$j-16*4`(@ptr[3]),$t1
+        punpckldq      $t2,@Xi[1]
+       movdqa  $a,$t2
+       paddd   $K,$e                           # e+=K_00_19
+        punpckldq      $t1,$t3
+       movdqa  $b,$t1
+       movdqa  $b,$t0
+       pslld   \$5,$t2
+       pandn   $d,$t1
+       pand    $c,$t0
+        punpckldq      $t3,@Xi[1]
+       movdqa  $a,$t3
+
+       movdqa  @Xi[0],`&Xi_off($i)`
+       paddd   @Xi[0],$e                       # e+=X[i]
+        movd           `4*$k-16*4`(@ptr[0]),@Xi[2]
+       psrld   \$27,$t3
+       pxor    $t1,$t0                         # Ch(b,c,d)
+       movdqa  $b,$t1
+
+       por     $t3,$t2                         # rol(a,5)
+        movd           `4*$k-16*4`(@ptr[1]),$t3
+       pslld   \$30,$t1
+       paddd   $t0,$e                          # e+=Ch(b,c,d)
+
+       psrld   \$2,$b
+       paddd   $t2,$e                          # e+=rol(a,5)
+        pshufb $tx,@Xi[1]
+        movd           `4*$k-16*4`(@ptr[2]),$t2
+       por     $t1,$b                          # b=rol(b,30)
+___
+$code.=<<___ if ($i==14);                      # just load input
+        movd           `4*$j-16*4`(@ptr[3]),$t1
+        punpckldq      $t2,@Xi[1]
+       movdqa  $a,$t2
+       paddd   $K,$e                           # e+=K_00_19
+        punpckldq      $t1,$t3
+       movdqa  $b,$t1
+       movdqa  $b,$t0
+       pslld   \$5,$t2
+        prefetcht0     63(@ptr[0])
+       pandn   $d,$t1
+       pand    $c,$t0
+        punpckldq      $t3,@Xi[1]
+       movdqa  $a,$t3
+
+       movdqa  @Xi[0],`&Xi_off($i)`
+       paddd   @Xi[0],$e                       # e+=X[i]
+       psrld   \$27,$t3
+       pxor    $t1,$t0                         # Ch(b,c,d)
+       movdqa  $b,$t1
+        prefetcht0     63(@ptr[1])
+
+       por     $t3,$t2                         # rol(a,5)
+       pslld   \$30,$t1
+       paddd   $t0,$e                          # e+=Ch(b,c,d)
+        prefetcht0     63(@ptr[2])
+
+       psrld   \$2,$b
+       paddd   $t2,$e                          # e+=rol(a,5)
+        pshufb $tx,@Xi[1]
+        prefetcht0     63(@ptr[3])
+       por     $t1,$b                          # b=rol(b,30)
+___
+$code.=<<___ if ($i>=13 && $i<15);
+       movdqa  `&Xi_off($j+2)`,@Xi[3]          # preload "X[2]"
+___
+$code.=<<___ if ($i>=15);                      # apply Xupdate
+       pxor    @Xi[-2],@Xi[1]                  # "X[13]"
+       movdqa  `&Xi_off($j+2)`,@Xi[3]          # "X[2]"
+
+       movdqa  $a,$t2
+        pxor   `&Xi_off($j+8)`,@Xi[1]
+       paddd   $K,$e                           # e+=K_00_19
+       movdqa  $b,$t1
+       pslld   \$5,$t2
+        pxor   @Xi[3],@Xi[1]
+       movdqa  $b,$t0
+       pandn   $d,$t1
+        movdqa @Xi[1],$tx
+       pand    $c,$t0
+       movdqa  $a,$t3
+        psrld  \$31,$tx
+        paddd  @Xi[1],@Xi[1]
+
+       movdqa  @Xi[0],`&Xi_off($i)`
+       paddd   @Xi[0],$e                       # e+=X[i]
+       psrld   \$27,$t3
+       pxor    $t1,$t0                         # Ch(b,c,d)
+
+       movdqa  $b,$t1
+       por     $t3,$t2                         # rol(a,5)
+       pslld   \$30,$t1
+       paddd   $t0,$e                          # e+=Ch(b,c,d)
+
+       psrld   \$2,$b
+       paddd   $t2,$e                          # e+=rol(a,5)
+        por    $tx,@Xi[1]                      # rol   \$1,@Xi[1]
+       por     $t1,$b                          # b=rol(b,30)
+___
+push(@Xi,shift(@Xi));
+}
+
+sub BODY_20_39 {
+my ($i,$a,$b,$c,$d,$e)=@_;
+my $j=$i+1;
+
+$code.=<<___ if ($i<79);
+       pxor    @Xi[-2],@Xi[1]                  # "X[13]"
+       movdqa  `&Xi_off($j+2)`,@Xi[3]          # "X[2]"
+
+       movdqa  $a,$t2
+       movdqa  $d,$t0
+        pxor   `&Xi_off($j+8)`,@Xi[1]
+       paddd   $K,$e                           # e+=K_20_39
+       pslld   \$5,$t2
+       pxor    $b,$t0
+
+       movdqa  $a,$t3
+___
+$code.=<<___ if ($i<72);
+       movdqa  @Xi[0],`&Xi_off($i)`
+___
+$code.=<<___ if ($i<79);
+       paddd   @Xi[0],$e                       # e+=X[i]
+        pxor   @Xi[3],@Xi[1]
+       psrld   \$27,$t3
+       pxor    $c,$t0                          # Parity(b,c,d)
+       movdqa  $b,$t1
+
+       pslld   \$30,$t1
+        movdqa @Xi[1],$tx
+       por     $t3,$t2                         # rol(a,5)
+        psrld  \$31,$tx
+       paddd   $t0,$e                          # e+=Parity(b,c,d)
+        paddd  @Xi[1],@Xi[1]
+
+       psrld   \$2,$b
+       paddd   $t2,$e                          # e+=rol(a,5)
+        por    $tx,@Xi[1]                      # rol(@Xi[1],1)
+       por     $t1,$b                          # b=rol(b,30)
+___
+$code.=<<___ if ($i==79);
+       movdqa  $a,$t2
+       paddd   $K,$e                           # e+=K_20_39
+       movdqa  $d,$t0
+       pslld   \$5,$t2
+       pxor    $b,$t0
+
+       movdqa  $a,$t3
+       paddd   @Xi[0],$e                       # e+=X[i]
+       psrld   \$27,$t3
+       movdqa  $b,$t1
+       pxor    $c,$t0                          # Parity(b,c,d)
+
+       pslld   \$30,$t1
+       por     $t3,$t2                         # rol(a,5)
+       paddd   $t0,$e                          # e+=Parity(b,c,d)
+
+       psrld   \$2,$b
+       paddd   $t2,$e                          # e+=rol(a,5)
+       por     $t1,$b                          # b=rol(b,30)
+___
+push(@Xi,shift(@Xi));
+}
+
+sub BODY_40_59 {
+my ($i,$a,$b,$c,$d,$e)=@_;
+my $j=$i+1;
+
+$code.=<<___;
+       pxor    @Xi[-2],@Xi[1]                  # "X[13]"
+       movdqa  `&Xi_off($j+2)`,@Xi[3]          # "X[2]"
+
+       movdqa  $a,$t2
+       movdqa  $d,$t1
+        pxor   `&Xi_off($j+8)`,@Xi[1]
+       pxor    @Xi[3],@Xi[1]
+       paddd   $K,$e                           # e+=K_40_59
+       pslld   \$5,$t2
+       movdqa  $a,$t3
+       pand    $c,$t1
+
+       movdqa  $d,$t0
+        movdqa @Xi[1],$tx
+       psrld   \$27,$t3
+       paddd   $t1,$e
+       pxor    $c,$t0
+
+       movdqa  @Xi[0],`&Xi_off($i)`
+       paddd   @Xi[0],$e                       # e+=X[i]
+       por     $t3,$t2                         # rol(a,5)
+        psrld  \$31,$tx
+       pand    $b,$t0
+       movdqa  $b,$t1
+
+       pslld   \$30,$t1
+        paddd  @Xi[1],@Xi[1]
+       paddd   $t0,$e                          # e+=Maj(b,d,c)
+
+       psrld   \$2,$b
+       paddd   $t2,$e                          # e+=rol(a,5)
+        por    $tx,@Xi[1]                      # rol(@X[1],1)
+       por     $t1,$b                          # b=rol(b,30)
+___
+push(@Xi,shift(@Xi));
+}
+
+$code.=<<___;
+.text
+
+.extern        OPENSSL_ia32cap_P
+
+.globl sha1_multi_block
+.type  sha1_multi_block,\@function,3
+.align 32
+sha1_multi_block:
+       mov     OPENSSL_ia32cap_P+4(%rip),%rcx
+       bt      \$61,%rcx                       # check SHA bit
+       jc      _shaext_shortcut
+___
+$code.=<<___ if ($avx);
+       test    \$`1<<28`,%ecx
+       jnz     _avx_shortcut
+___
+$code.=<<___;
+       mov     %rsp,%rax
+       push    %rbx
+       push    %rbp
+___
+$code.=<<___ if ($win64);
+       lea     -0xa8(%rsp),%rsp
+       movaps  %xmm6,(%rsp)
+       movaps  %xmm7,0x10(%rsp)
+       movaps  %xmm8,0x20(%rsp)
+       movaps  %xmm9,0x30(%rsp)
+       movaps  %xmm10,-0x78(%rax)
+       movaps  %xmm11,-0x68(%rax)
+       movaps  %xmm12,-0x58(%rax)
+       movaps  %xmm13,-0x48(%rax)
+       movaps  %xmm14,-0x38(%rax)
+       movaps  %xmm15,-0x28(%rax)
+___
+$code.=<<___;
+       sub     \$`$REG_SZ*18`,%rsp
+       and     \$-256,%rsp
+       mov     %rax,`$REG_SZ*17`(%rsp)         # original %rsp
+.Lbody:
+       lea     K_XX_XX(%rip),$Tbl
+       lea     `$REG_SZ*16`(%rsp),%rbx
+
+.Loop_grande:
+       mov     $num,`$REG_SZ*17+8`(%rsp)       # original $num
+       xor     $num,$num
+___
+for($i=0;$i<4;$i++) {
+    $code.=<<___;
+       mov     `16*$i+0`($inp),@ptr[$i]        # input pointer
+       mov     `16*$i+8`($inp),%ecx            # number of blocks
+       cmp     $num,%ecx
+       cmovg   %ecx,$num                       # find maximum
+       test    %ecx,%ecx
+       mov     %ecx,`4*$i`(%rbx)               # initialize counters
+       cmovle  $Tbl,@ptr[$i]                   # cancel input
+___
+}
+$code.=<<___;
+       test    $num,$num
+       jz      .Ldone
+
+       movdqu  0x00($ctx),$A                   # load context
+        lea    128(%rsp),%rax
+       movdqu  0x20($ctx),$B
+       movdqu  0x40($ctx),$C
+       movdqu  0x60($ctx),$D
+       movdqu  0x80($ctx),$E
+       movdqa  0x60($Tbl),$tx                  # pbswap_mask
+       movdqa  -0x20($Tbl),$K                  # K_00_19
+       jmp     .Loop
+
+.align 32
+.Loop:
+___
+for($i=0;$i<20;$i++)   { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
+$code.="       movdqa  0x00($Tbl),$K\n";       # K_20_39
+for(;$i<40;$i++)       { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
+$code.="       movdqa  0x20($Tbl),$K\n";       # K_40_59
+for(;$i<60;$i++)       { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
+$code.="       movdqa  0x40($Tbl),$K\n";       # K_60_79
+for(;$i<80;$i++)       { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+       movdqa  (%rbx),@Xi[0]                   # pull counters
+       mov     \$1,%ecx
+       cmp     4*0(%rbx),%ecx                  # examinte counters
+       pxor    $t2,$t2
+       cmovge  $Tbl,@ptr[0]                    # cancel input
+       cmp     4*1(%rbx),%ecx
+       movdqa  @Xi[0],@Xi[1]
+       cmovge  $Tbl,@ptr[1]
+       cmp     4*2(%rbx),%ecx
+       pcmpgtd $t2,@Xi[1]                      # mask value
+       cmovge  $Tbl,@ptr[2]
+       cmp     4*3(%rbx),%ecx
+       paddd   @Xi[1],@Xi[0]                   # counters--
+       cmovge  $Tbl,@ptr[3]
+
+       movdqu  0x00($ctx),$t0
+       pand    @Xi[1],$A
+       movdqu  0x20($ctx),$t1
+       pand    @Xi[1],$B
+       paddd   $t0,$A
+       movdqu  0x40($ctx),$t2
+       pand    @Xi[1],$C
+       paddd   $t1,$B
+       movdqu  0x60($ctx),$t3
+       pand    @Xi[1],$D
+       paddd   $t2,$C
+       movdqu  0x80($ctx),$tx
+       pand    @Xi[1],$E
+       movdqu  $A,0x00($ctx)
+       paddd   $t3,$D
+       movdqu  $B,0x20($ctx)
+       paddd   $tx,$E
+       movdqu  $C,0x40($ctx)
+       movdqu  $D,0x60($ctx)
+       movdqu  $E,0x80($ctx)
+
+       movdqa  @Xi[0],(%rbx)                   # save counters
+       movdqa  0x60($Tbl),$tx                  # pbswap_mask
+       movdqa  -0x20($Tbl),$K                  # K_00_19
+       dec     $num
+       jnz     .Loop
+
+       mov     `$REG_SZ*17+8`(%rsp),$num
+       lea     $REG_SZ($ctx),$ctx
+       lea     `16*$REG_SZ/4`($inp),$inp
+       dec     $num
+       jnz     .Loop_grande
+
+.Ldone:
+       mov     `$REG_SZ*17`(%rsp),%rax         # orignal %rsp
+___
+$code.=<<___ if ($win64);
+       movaps  -0xb8(%rax),%xmm6
+       movaps  -0xa8(%rax),%xmm7
+       movaps  -0x98(%rax),%xmm8
+       movaps  -0x88(%rax),%xmm9
+       movaps  -0x78(%rax),%xmm10
+       movaps  -0x68(%rax),%xmm11
+       movaps  -0x58(%rax),%xmm12
+       movaps  -0x48(%rax),%xmm13
+       movaps  -0x38(%rax),%xmm14
+       movaps  -0x28(%rax),%xmm15
+___
+$code.=<<___;
+       mov     -16(%rax),%rbp
+       mov     -8(%rax),%rbx
+       lea     (%rax),%rsp
+.Lepilogue:
+       ret
+.size  sha1_multi_block,.-sha1_multi_block
+___
+                                               {{{
+my ($ABCD0,$E0,$E0_,$BSWAP,$ABCD1,$E1,$E1_)=map("%xmm$_",(0..3,8..10));
+my @MSG0=map("%xmm$_",(4..7));
+my @MSG1=map("%xmm$_",(11..14));
+
+$code.=<<___;
+.type  sha1_multi_block_shaext,\@function,3
+.align 32
+sha1_multi_block_shaext:
+_shaext_shortcut:
+       mov     %rsp,%rax
+       push    %rbx
+       push    %rbp
+___
+$code.=<<___ if ($win64);
+       lea     -0xa8(%rsp),%rsp
+       movaps  %xmm6,(%rsp)
+       movaps  %xmm7,0x10(%rsp)
+       movaps  %xmm8,0x20(%rsp)
+       movaps  %xmm9,0x30(%rsp)
+       movaps  %xmm10,-0x78(%rax)
+       movaps  %xmm11,-0x68(%rax)
+       movaps  %xmm12,-0x58(%rax)
+       movaps  %xmm13,-0x48(%rax)
+       movaps  %xmm14,-0x38(%rax)
+       movaps  %xmm15,-0x28(%rax)
+___
+$code.=<<___;
+       sub     \$`$REG_SZ*18`,%rsp
+       shl     \$1,$num                        # we process pair at a time
+       and     \$-256,%rsp
+       lea     0x40($ctx),$ctx                 # size optimization
+       mov     %rax,`$REG_SZ*17`(%rsp)         # original %rsp
+.Lbody_shaext:
+       lea     `$REG_SZ*16`(%rsp),%rbx
+       movdqa  K_XX_XX+0x80(%rip),$BSWAP       # byte-n-word swap
+
+.Loop_grande_shaext:
+       mov     $num,`$REG_SZ*17+8`(%rsp)       # orignal $num
+       xor     $num,$num
+___
+for($i=0;$i<2;$i++) {
+    $code.=<<___;
+       mov     `16*$i+0`($inp),@ptr[$i]        # input pointer
+       mov     `16*$i+8`($inp),%ecx            # number of blocks
+       cmp     $num,%ecx
+       cmovg   %ecx,$num                       # find maximum
+       test    %ecx,%ecx
+       mov     %ecx,`4*$i`(%rbx)               # initialize counters
+       cmovle  %rsp,@ptr[$i]                   # cancel input
+___
+}
+$code.=<<___;
+       test    $num,$num
+       jz      .Ldone_shaext
+
+       movq            0x00-0x40($ctx),$ABCD0  # a1.a0
+       movq            0x20-0x40($ctx),@MSG0[0]# b1.b0
+       movq            0x40-0x40($ctx),@MSG0[1]# c1.c0
+       movq            0x60-0x40($ctx),@MSG0[2]# d1.d0
+       movq            0x80-0x40($ctx),@MSG0[3]# e1.e0
+
+       punpckldq       @MSG0[0],$ABCD0         # b1.a1.b0.a0
+       punpckldq       @MSG0[2],@MSG0[1]       # d1.c1.d0.c0
+
+       movdqa          $ABCD0,$ABCD1
+       punpcklqdq      @MSG0[1],$ABCD0         # d0.c0.b0.a0
+       punpckhqdq      @MSG0[1],$ABCD1         # d1.c1.b1.a1
+
+       pshufd          \$0b00111111,@MSG0[3],$E0
+       pshufd          \$0b01111111,@MSG0[3],$E1
+       pshufd          \$0b00011011,$ABCD0,$ABCD0
+       pshufd          \$0b00011011,$ABCD1,$ABCD1
+       jmp             .Loop_shaext
+
+.align 32
+.Loop_shaext:
+       movdqu          0x00(@ptr[0]),@MSG0[0]
+        movdqu         0x00(@ptr[1]),@MSG1[0]
+       movdqu          0x10(@ptr[0]),@MSG0[1]
+        movdqu         0x10(@ptr[1]),@MSG1[1]
+       movdqu          0x20(@ptr[0]),@MSG0[2]
+       pshufb          $BSWAP,@MSG0[0]
+        movdqu         0x20(@ptr[1]),@MSG1[2]
+        pshufb         $BSWAP,@MSG1[0]
+       movdqu          0x30(@ptr[0]),@MSG0[3]
+       lea             0x40(@ptr[0]),@ptr[0]
+       pshufb          $BSWAP,@MSG0[1]
+        movdqu         0x30(@ptr[1]),@MSG1[3]
+        lea            0x40(@ptr[1]),@ptr[1]
+        pshufb         $BSWAP,@MSG1[1]
+
+       movdqa          $E0,0x50(%rsp)          # offload
+       paddd           @MSG0[0],$E0
+        movdqa         $E1,0x70(%rsp)
+        paddd          @MSG1[0],$E1
+       movdqa          $ABCD0,0x40(%rsp)       # offload
+       movdqa          $ABCD0,$E0_
+        movdqa         $ABCD1,0x60(%rsp)
+        movdqa         $ABCD1,$E1_
+       sha1rnds4       \$0,$E0,$ABCD0          # 0-3
+       sha1nexte       @MSG0[1],$E0_
+        sha1rnds4      \$0,$E1,$ABCD1          # 0-3
+        sha1nexte      @MSG1[1],$E1_
+       pshufb          $BSWAP,@MSG0[2]
+       prefetcht0      127(@ptr[0])
+       sha1msg1        @MSG0[1],@MSG0[0]
+        pshufb         $BSWAP,@MSG1[2]
+        prefetcht0     127(@ptr[1])
+        sha1msg1       @MSG1[1],@MSG1[0]
+
+       pshufb          $BSWAP,@MSG0[3]
+       movdqa          $ABCD0,$E0
+        pshufb         $BSWAP,@MSG1[3]
+        movdqa         $ABCD1,$E1
+       sha1rnds4       \$0,$E0_,$ABCD0         # 4-7
+       sha1nexte       @MSG0[2],$E0
+        sha1rnds4      \$0,$E1_,$ABCD1         # 4-7
+        sha1nexte      @MSG1[2],$E1
+       pxor            @MSG0[2],@MSG0[0]
+       sha1msg1        @MSG0[2],@MSG0[1]
+        pxor           @MSG1[2],@MSG1[0]
+        sha1msg1       @MSG1[2],@MSG1[1]
+___
+for($i=2;$i<20-4;$i++) {
+$code.=<<___;
+       movdqa          $ABCD0,$E0_
+        movdqa         $ABCD1,$E1_
+       sha1rnds4       \$`int($i/5)`,$E0,$ABCD0        # 8-11
+       sha1nexte       @MSG0[3],$E0_
+        sha1rnds4      \$`int($i/5)`,$E1,$ABCD1        # 8-11
+        sha1nexte      @MSG1[3],$E1_
+       sha1msg2        @MSG0[3],@MSG0[0]
+        sha1msg2       @MSG1[3],@MSG1[0]
+       pxor            @MSG0[3],@MSG0[1]
+       sha1msg1        @MSG0[3],@MSG0[2]
+        pxor           @MSG1[3],@MSG1[1]
+        sha1msg1       @MSG1[3],@MSG1[2]
+___
+       ($E0,$E0_)=($E0_,$E0);          ($E1,$E1_)=($E1_,$E1);
+       push(@MSG0,shift(@MSG0));       push(@MSG1,shift(@MSG1));
+}
+$code.=<<___;
+       movdqa          $ABCD0,$E0_
+        movdqa         $ABCD1,$E1_
+       sha1rnds4       \$3,$E0,$ABCD0          # 64-67
+       sha1nexte       @MSG0[3],$E0_
+        sha1rnds4      \$3,$E1,$ABCD1          # 64-67
+        sha1nexte      @MSG1[3],$E1_
+       sha1msg2        @MSG0[3],@MSG0[0]
+        sha1msg2       @MSG1[3],@MSG1[0]
+       pxor            @MSG0[3],@MSG0[1]
+        pxor           @MSG1[3],@MSG1[1]
+
+       mov             \$1,%ecx
+       pxor            @MSG0[2],@MSG0[2]       # zero
+       cmp             4*0(%rbx),%ecx          # examine counters
+       cmovge          %rsp,@ptr[0]            # cancel input
+
+       movdqa          $ABCD0,$E0
+        movdqa         $ABCD1,$E1
+       sha1rnds4       \$3,$E0_,$ABCD0         # 68-71
+       sha1nexte       @MSG0[0],$E0
+        sha1rnds4      \$3,$E1_,$ABCD1         # 68-71
+        sha1nexte      @MSG1[0],$E1
+       sha1msg2        @MSG0[0],@MSG0[1]
+        sha1msg2       @MSG1[0],@MSG1[1]
+
+       cmp             4*1(%rbx),%ecx
+       cmovge          %rsp,@ptr[1]
+       movq            (%rbx),@MSG0[0]         # pull counters
+
+       movdqa          $ABCD0,$E0_
+        movdqa         $ABCD1,$E1_
+       sha1rnds4       \$3,$E0,$ABCD0          # 72-75
+       sha1nexte       @MSG0[1],$E0_
+        sha1rnds4      \$3,$E1,$ABCD1          # 72-75
+        sha1nexte      @MSG1[1],$E1_
+
+       pshufd          \$0x00,@MSG0[0],@MSG1[2]
+       pshufd          \$0x55,@MSG0[0],@MSG1[3]
+       movdqa          @MSG0[0],@MSG0[1]
+       pcmpgtd         @MSG0[2],@MSG1[2]
+       pcmpgtd         @MSG0[2],@MSG1[3]
+
+       movdqa          $ABCD0,$E0
+        movdqa         $ABCD1,$E1
+       sha1rnds4       \$3,$E0_,$ABCD0         # 76-79
+       sha1nexte       $MSG0[2],$E0
+        sha1rnds4      \$3,$E1_,$ABCD1         # 76-79
+        sha1nexte      $MSG0[2],$E1
+
+       pcmpgtd         @MSG0[2],@MSG0[1]       # counter mask
+       pand            @MSG1[2],$ABCD0
+       pand            @MSG1[2],$E0
+        pand           @MSG1[3],$ABCD1
+        pand           @MSG1[3],$E1
+       paddd           @MSG0[1],@MSG0[0]       # counters--
+
+       paddd           0x40(%rsp),$ABCD0
+       paddd           0x50(%rsp),$E0
+        paddd          0x60(%rsp),$ABCD1
+        paddd          0x70(%rsp),$E1
+
+       movq            @MSG0[0],(%rbx)         # save counters
+       dec             $num
+       jnz             .Loop_shaext
+
+       mov             `$REG_SZ*17+8`(%rsp),$num
+
+       pshufd          \$0b00011011,$ABCD0,$ABCD0
+       pshufd          \$0b00011011,$ABCD1,$ABCD1
+
+       movdqa          $ABCD0,@MSG0[0]
+       punpckldq       $ABCD1,$ABCD0           # b1.b0.a1.a0
+       punpckhdq       $ABCD1,@MSG0[0]         # d1.d0.c1.c0
+       punpckhdq       $E1,$E0                 # e1.e0.xx.xx
+       movq            $ABCD0,0x00-0x40($ctx)  # a1.a0
+       psrldq          \$8,$ABCD0
+       movq            @MSG0[0],0x40-0x40($ctx)# c1.c0
+       psrldq          \$8,@MSG0[0]
+       movq            $ABCD0,0x20-0x40($ctx)  # b1.b0
+       psrldq          \$8,$E0
+       movq            @MSG0[0],0x60-0x40($ctx)# d1.d0
+       movq            $E0,0x80-0x40($ctx)     # e1.e0
+
+       lea     `$REG_SZ/2`($ctx),$ctx
+       lea     `16*2`($inp),$inp
+       dec     $num
+       jnz     .Loop_grande_shaext
+
+.Ldone_shaext:
+       #mov    `$REG_SZ*17`(%rsp),%rax         # original %rsp
+___
+$code.=<<___ if ($win64);
+       movaps  -0xb8(%rax),%xmm6
+       movaps  -0xa8(%rax),%xmm7
+       movaps  -0x98(%rax),%xmm8
+       movaps  -0x88(%rax),%xmm9
+       movaps  -0x78(%rax),%xmm10
+       movaps  -0x68(%rax),%xmm11
+       movaps  -0x58(%rax),%xmm12
+       movaps  -0x48(%rax),%xmm13
+       movaps  -0x38(%rax),%xmm14
+       movaps  -0x28(%rax),%xmm15
+___
+$code.=<<___;
+       mov     -16(%rax),%rbp
+       mov     -8(%rax),%rbx
+       lea     (%rax),%rsp
+.Lepilogue_shaext:
+       ret
+.size  sha1_multi_block_shaext,.-sha1_multi_block_shaext
+___
+                                               }}}
+
+                                               if ($avx) {{{
+sub BODY_00_19_avx {
+my ($i,$a,$b,$c,$d,$e)=@_;
+my $j=$i+1;
+my $k=$i+2;
+my $vpack = $REG_SZ==16 ? "vpunpckldq" : "vinserti128";
+my $ptr_n = $REG_SZ==16 ? @ptr[1] : @ptr[4];
+
+$code.=<<___ if ($i==0 && $REG_SZ==16);
+       vmovd           (@ptr[0]),@Xi[0]
+        lea            `16*4`(@ptr[0]),@ptr[0]
+       vmovd           (@ptr[1]),@Xi[2]        # borrow Xi[2]
+        lea            `16*4`(@ptr[1]),@ptr[1]
+       vpinsrd         \$1,(@ptr[2]),@Xi[0],@Xi[0]
+        lea            `16*4`(@ptr[2]),@ptr[2]
+       vpinsrd         \$1,(@ptr[3]),@Xi[2],@Xi[2]
+        lea            `16*4`(@ptr[3]),@ptr[3]
+        vmovd          `4*$j-16*4`(@ptr[0]),@Xi[1]
+       vpunpckldq      @Xi[2],@Xi[0],@Xi[0]
+        vmovd          `4*$j-16*4`($ptr_n),$t3
+       vpshufb         $tx,@Xi[0],@Xi[0]
+___
+$code.=<<___ if ($i<15 && $REG_SZ==16);                # just load input
+        vpinsrd        \$1,`4*$j-16*4`(@ptr[2]),@Xi[1],@Xi[1]
+        vpinsrd        \$1,`4*$j-16*4`(@ptr[3]),$t3,$t3
+___
+$code.=<<___ if ($i==0 && $REG_SZ==32);
+       vmovd           (@ptr[0]),@Xi[0]
+        lea            `16*4`(@ptr[0]),@ptr[0]
+       vmovd           (@ptr[4]),@Xi[2]        # borrow Xi[2]
+        lea            `16*4`(@ptr[4]),@ptr[4]
+       vmovd           (@ptr[1]),$t2
+        lea            `16*4`(@ptr[1]),@ptr[1]
+       vmovd           (@ptr[5]),$t1
+        lea            `16*4`(@ptr[5]),@ptr[5]
+       vpinsrd         \$1,(@ptr[2]),@Xi[0],@Xi[0]
+        lea            `16*4`(@ptr[2]),@ptr[2]
+       vpinsrd         \$1,(@ptr[6]),@Xi[2],@Xi[2]
+        lea            `16*4`(@ptr[6]),@ptr[6]
+       vpinsrd         \$1,(@ptr[3]),$t2,$t2
+        lea            `16*4`(@ptr[3]),@ptr[3]
+       vpunpckldq      $t2,@Xi[0],@Xi[0]
+       vpinsrd         \$1,(@ptr[7]),$t1,$t1
+        lea            `16*4`(@ptr[7]),@ptr[7]
+       vpunpckldq      $t1,@Xi[2],@Xi[2]
+        vmovd          `4*$j-16*4`(@ptr[0]),@Xi[1]
+       vinserti128     @Xi[2],@Xi[0],@Xi[0]
+        vmovd          `4*$j-16*4`($ptr_n),$t3
+       vpshufb         $tx,@Xi[0],@Xi[0]
+___
+$code.=<<___ if ($i<15 && $REG_SZ==32);                # just load input
+        vmovd          `4*$j-16*4`(@ptr[1]),$t2
+        vmovd          `4*$j-16*4`(@ptr[5]),$t1
+        vpinsrd        \$1,`4*$j-16*4`(@ptr[2]),@Xi[1],@Xi[1]
+        vpinsrd        \$1,`4*$j-16*4`(@ptr[6]),$t3,$t3
+        vpinsrd        \$1,`4*$j-16*4`(@ptr[3]),$t2,$t2
+        vpunpckldq     $t2,@Xi[1],@Xi[1]
+        vpinsrd        \$1,`4*$j-16*4`(@ptr[7]),$t1,$t1
+        vpunpckldq     $t1,$t3,$t3
+___
+$code.=<<___ if ($i<14);
+       vpaddd  $K,$e,$e                        # e+=K_00_19
+       vpslld  \$5,$a,$t2
+       vpandn  $d,$b,$t1
+       vpand   $c,$b,$t0
+
+       vmovdqa @Xi[0],`&Xi_off($i)`
+       vpaddd  @Xi[0],$e,$e                    # e+=X[i]
+        $vpack         $t3,@Xi[1],@Xi[1]
+       vpsrld  \$27,$a,$t3
+       vpxor   $t1,$t0,$t0                     # Ch(b,c,d)
+        vmovd          `4*$k-16*4`(@ptr[0]),@Xi[2]
+
+       vpslld  \$30,$b,$t1
+       vpor    $t3,$t2,$t2                     # rol(a,5)
+        vmovd          `4*$k-16*4`($ptr_n),$t3
+       vpaddd  $t0,$e,$e                       # e+=Ch(b,c,d)
+
+       vpsrld  \$2,$b,$b
+       vpaddd  $t2,$e,$e                       # e+=rol(a,5)
+        vpshufb        $tx,@Xi[1],@Xi[1]
+       vpor    $t1,$b,$b                       # b=rol(b,30)
+___
+$code.=<<___ if ($i==14);
+       vpaddd  $K,$e,$e                        # e+=K_00_19
+        prefetcht0     63(@ptr[0])
+       vpslld  \$5,$a,$t2
+       vpandn  $d,$b,$t1
+       vpand   $c,$b,$t0
+
+       vmovdqa @Xi[0],`&Xi_off($i)`
+       vpaddd  @Xi[0],$e,$e                    # e+=X[i]
+        $vpack         $t3,@Xi[1],@Xi[1]
+       vpsrld  \$27,$a,$t3
+        prefetcht0     63(@ptr[1])
+       vpxor   $t1,$t0,$t0                     # Ch(b,c,d)
+
+       vpslld  \$30,$b,$t1
+       vpor    $t3,$t2,$t2                     # rol(a,5)
+        prefetcht0     63(@ptr[2])
+       vpaddd  $t0,$e,$e                       # e+=Ch(b,c,d)
+
+       vpsrld  \$2,$b,$b
+       vpaddd  $t2,$e,$e                       # e+=rol(a,5)
+        prefetcht0     63(@ptr[3])
+        vpshufb        $tx,@Xi[1],@Xi[1]
+       vpor    $t1,$b,$b                       # b=rol(b,30)
+___
+$code.=<<___ if ($i>=13 && $i<15);
+       vmovdqa `&Xi_off($j+2)`,@Xi[3]          # preload "X[2]"
+___
+$code.=<<___ if ($i>=15);                      # apply Xupdate
+       vpxor   @Xi[-2],@Xi[1],@Xi[1]           # "X[13]"
+       vmovdqa `&Xi_off($j+2)`,@Xi[3]          # "X[2]"
+
+       vpaddd  $K,$e,$e                        # e+=K_00_19
+       vpslld  \$5,$a,$t2
+       vpandn  $d,$b,$t1
+        `"prefetcht0   63(@ptr[4])"            if ($i==15 && $REG_SZ==32)`
+       vpand   $c,$b,$t0
+
+       vmovdqa @Xi[0],`&Xi_off($i)`
+       vpaddd  @Xi[0],$e,$e                    # e+=X[i]
+        vpxor  `&Xi_off($j+8)`,@Xi[1],@Xi[1]
+       vpsrld  \$27,$a,$t3
+       vpxor   $t1,$t0,$t0                     # Ch(b,c,d)
+        vpxor  @Xi[3],@Xi[1],@Xi[1]
+        `"prefetcht0   63(@ptr[5])"            if ($i==15 && $REG_SZ==32)`
+
+       vpslld  \$30,$b,$t1
+       vpor    $t3,$t2,$t2                     # rol(a,5)
+       vpaddd  $t0,$e,$e                       # e+=Ch(b,c,d)
+        `"prefetcht0   63(@ptr[6])"            if ($i==15 && $REG_SZ==32)`
+        vpsrld \$31,@Xi[1],$tx
+        vpaddd @Xi[1],@Xi[1],@Xi[1]
+
+       vpsrld  \$2,$b,$b
+        `"prefetcht0   63(@ptr[7])"            if ($i==15 && $REG_SZ==32)`
+       vpaddd  $t2,$e,$e                       # e+=rol(a,5)
+        vpor   $tx,@Xi[1],@Xi[1]               # rol   \$1,@Xi[1]
+       vpor    $t1,$b,$b                       # b=rol(b,30)
+___
+push(@Xi,shift(@Xi));
+}
+
+sub BODY_20_39_avx {
+my ($i,$a,$b,$c,$d,$e)=@_;
+my $j=$i+1;
+
+$code.=<<___ if ($i<79);
+       vpxor   @Xi[-2],@Xi[1],@Xi[1]           # "X[13]"
+       vmovdqa `&Xi_off($j+2)`,@Xi[3]          # "X[2]"
+
+       vpslld  \$5,$a,$t2
+       vpaddd  $K,$e,$e                        # e+=K_20_39
+       vpxor   $b,$d,$t0
+___
+$code.=<<___ if ($i<72);
+       vmovdqa @Xi[0],`&Xi_off($i)`
+___
+$code.=<<___ if ($i<79);
+       vpaddd  @Xi[0],$e,$e                    # e+=X[i]
+        vpxor  `&Xi_off($j+8)`,@Xi[1],@Xi[1]
+       vpsrld  \$27,$a,$t3
+       vpxor   $c,$t0,$t0                      # Parity(b,c,d)
+        vpxor  @Xi[3],@Xi[1],@Xi[1]
+
+       vpslld  \$30,$b,$t1
+       vpor    $t3,$t2,$t2                     # rol(a,5)
+       vpaddd  $t0,$e,$e                       # e+=Parity(b,c,d)
+        vpsrld \$31,@Xi[1],$tx
+        vpaddd @Xi[1],@Xi[1],@Xi[1]
+
+       vpsrld  \$2,$b,$b
+       vpaddd  $t2,$e,$e                       # e+=rol(a,5)
+        vpor   $tx,@Xi[1],@Xi[1]               # rol(@Xi[1],1)
+       vpor    $t1,$b,$b                       # b=rol(b,30)
+___
+$code.=<<___ if ($i==79);
+       vpslld  \$5,$a,$t2
+       vpaddd  $K,$e,$e                        # e+=K_20_39
+       vpxor   $b,$d,$t0
+
+       vpsrld  \$27,$a,$t3
+       vpaddd  @Xi[0],$e,$e                    # e+=X[i]
+       vpxor   $c,$t0,$t0                      # Parity(b,c,d)
+
+       vpslld  \$30,$b,$t1
+       vpor    $t3,$t2,$t2                     # rol(a,5)
+       vpaddd  $t0,$e,$e                       # e+=Parity(b,c,d)
+
+       vpsrld  \$2,$b,$b
+       vpaddd  $t2,$e,$e                       # e+=rol(a,5)
+       vpor    $t1,$b,$b                       # b=rol(b,30)
+___
+push(@Xi,shift(@Xi));
+}
+
+sub BODY_40_59_avx {
+my ($i,$a,$b,$c,$d,$e)=@_;
+my $j=$i+1;
+
+$code.=<<___;
+       vpxor   @Xi[-2],@Xi[1],@Xi[1]           # "X[13]"
+       vmovdqa `&Xi_off($j+2)`,@Xi[3]          # "X[2]"
+
+       vpaddd  $K,$e,$e                        # e+=K_40_59
+       vpslld  \$5,$a,$t2
+       vpand   $c,$d,$t1
+        vpxor  `&Xi_off($j+8)`,@Xi[1],@Xi[1]
+
+       vpaddd  $t1,$e,$e
+       vpsrld  \$27,$a,$t3
+       vpxor   $c,$d,$t0
+        vpxor  @Xi[3],@Xi[1],@Xi[1]
+
+       vmovdqu @Xi[0],`&Xi_off($i)`
+       vpaddd  @Xi[0],$e,$e                    # e+=X[i]
+       vpor    $t3,$t2,$t2                     # rol(a,5)
+        vpsrld \$31,@Xi[1],$tx
+       vpand   $b,$t0,$t0
+        vpaddd @Xi[1],@Xi[1],@Xi[1]
+
+       vpslld  \$30,$b,$t1
+       vpaddd  $t0,$e,$e                       # e+=Maj(b,d,c)
+
+       vpsrld  \$2,$b,$b
+       vpaddd  $t2,$e,$e                       # e+=rol(a,5)
+        vpor   $tx,@Xi[1],@Xi[1]               # rol(@X[1],1)
+       vpor    $t1,$b,$b                       # b=rol(b,30)
+___
+push(@Xi,shift(@Xi));
+}
+
+$code.=<<___;
+.type  sha1_multi_block_avx,\@function,3
+.align 32
+sha1_multi_block_avx:
+_avx_shortcut:
+___
+$code.=<<___ if ($avx>1);
+       shr     \$32,%rcx
+       cmp     \$2,$num
+       jb      .Lavx
+       test    \$`1<<5`,%ecx
+       jnz     _avx2_shortcut
+       jmp     .Lavx
+.align 32
+.Lavx:
+___
+$code.=<<___;
+       mov     %rsp,%rax
+       push    %rbx
+       push    %rbp
+___
+$code.=<<___ if ($win64);
+       lea     -0xa8(%rsp),%rsp
+       movaps  %xmm6,(%rsp)
+       movaps  %xmm7,0x10(%rsp)
+       movaps  %xmm8,0x20(%rsp)
+       movaps  %xmm9,0x30(%rsp)
+       movaps  %xmm10,-0x78(%rax)
+       movaps  %xmm11,-0x68(%rax)
+       movaps  %xmm12,-0x58(%rax)
+       movaps  %xmm13,-0x48(%rax)
+       movaps  %xmm14,-0x38(%rax)
+       movaps  %xmm15,-0x28(%rax)
+___
+$code.=<<___;
+       sub     \$`$REG_SZ*18`, %rsp
+       and     \$-256,%rsp
+       mov     %rax,`$REG_SZ*17`(%rsp)         # original %rsp
+.Lbody_avx:
+       lea     K_XX_XX(%rip),$Tbl
+       lea     `$REG_SZ*16`(%rsp),%rbx
+
+       vzeroupper
+.Loop_grande_avx:
+       mov     $num,`$REG_SZ*17+8`(%rsp)       # original $num
+       xor     $num,$num
+___
+for($i=0;$i<4;$i++) {
+    $code.=<<___;
+       mov     `16*$i+0`($inp),@ptr[$i]        # input pointer
+       mov     `16*$i+8`($inp),%ecx            # number of blocks
+       cmp     $num,%ecx
+       cmovg   %ecx,$num                       # find maximum
+       test    %ecx,%ecx
+       mov     %ecx,`4*$i`(%rbx)               # initialize counters
+       cmovle  $Tbl,@ptr[$i]                   # cancel input
+___
+}
+$code.=<<___;
+       test    $num,$num
+       jz      .Ldone_avx
+
+       vmovdqu 0x00($ctx),$A                   # load context
+        lea    128(%rsp),%rax
+       vmovdqu 0x20($ctx),$B
+       vmovdqu 0x40($ctx),$C
+       vmovdqu 0x60($ctx),$D
+       vmovdqu 0x80($ctx),$E
+       vmovdqu 0x60($Tbl),$tx                  # pbswap_mask
+       jmp     .Loop_avx
+
+.align 32
+.Loop_avx:
+___
+$code.="       vmovdqa -0x20($Tbl),$K\n";      # K_00_19
+for($i=0;$i<20;$i++)   { &BODY_00_19_avx($i,@V); unshift(@V,pop(@V)); }
+$code.="       vmovdqa 0x00($Tbl),$K\n";       # K_20_39
+for(;$i<40;$i++)       { &BODY_20_39_avx($i,@V); unshift(@V,pop(@V)); }
+$code.="       vmovdqa 0x20($Tbl),$K\n";       # K_40_59
+for(;$i<60;$i++)       { &BODY_40_59_avx($i,@V); unshift(@V,pop(@V)); }
+$code.="       vmovdqa 0x40($Tbl),$K\n";       # K_60_79
+for(;$i<80;$i++)       { &BODY_20_39_avx($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+       mov     \$1,%ecx
+___
+for($i=0;$i<4;$i++) {
+    $code.=<<___;
+       cmp     `4*$i`(%rbx),%ecx               # examine counters
+       cmovge  $Tbl,@ptr[$i]                   # cancel input
+___
+}
+$code.=<<___;
+       vmovdqu (%rbx),$t0                      # pull counters
+       vpxor   $t2,$t2,$t2
+       vmovdqa $t0,$t1
+       vpcmpgtd $t2,$t1,$t1                    # mask value
+       vpaddd  $t1,$t0,$t0                     # counters--
+
+       vpand   $t1,$A,$A
+       vpand   $t1,$B,$B
+       vpaddd  0x00($ctx),$A,$A
+       vpand   $t1,$C,$C
+       vpaddd  0x20($ctx),$B,$B
+       vpand   $t1,$D,$D
+       vpaddd  0x40($ctx),$C,$C
+       vpand   $t1,$E,$E
+       vpaddd  0x60($ctx),$D,$D
+       vpaddd  0x80($ctx),$E,$E
+       vmovdqu $A,0x00($ctx)
+       vmovdqu $B,0x20($ctx)
+       vmovdqu $C,0x40($ctx)
+       vmovdqu $D,0x60($ctx)
+       vmovdqu $E,0x80($ctx)
+
+       vmovdqu $t0,(%rbx)                      # save counters
+       vmovdqu 0x60($Tbl),$tx                  # pbswap_mask
+       dec     $num
+       jnz     .Loop_avx
+
+       mov     `$REG_SZ*17+8`(%rsp),$num
+       lea     $REG_SZ($ctx),$ctx
+       lea     `16*$REG_SZ/4`($inp),$inp
+       dec     $num
+       jnz     .Loop_grande_avx
+
+.Ldone_avx:
+       mov     `$REG_SZ*17`(%rsp),%rax         # orignal %rsp
+       vzeroupper
+___
+$code.=<<___ if ($win64);
+       movaps  -0xb8(%rax),%xmm6
+       movaps  -0xa8(%rax),%xmm7
+       movaps  -0x98(%rax),%xmm8
+       movaps  -0x88(%rax),%xmm9
+       movaps  -0x78(%rax),%xmm10
+       movaps  -0x68(%rax),%xmm11
+       movaps  -0x58(%rax),%xmm12
+       movaps  -0x48(%rax),%xmm13
+       movaps  -0x38(%rax),%xmm14
+       movaps  -0x28(%rax),%xmm15
+___
+$code.=<<___;
+       mov     -16(%rax),%rbp
+       mov     -8(%rax),%rbx
+       lea     (%rax),%rsp
+.Lepilogue_avx:
+       ret
+.size  sha1_multi_block_avx,.-sha1_multi_block_avx
+___
+
+                                               if ($avx>1) {
+$code =~ s/\`([^\`]*)\`/eval $1/gem;
+
+$REG_SZ=32;
+
+@ptr=map("%r$_",(12..15,8..11));
+
+@V=($A,$B,$C,$D,$E)=map("%ymm$_",(0..4));
+($t0,$t1,$t2,$t3,$tx)=map("%ymm$_",(5..9));
+@Xi=map("%ymm$_",(10..14));
+$K="%ymm15";
+
+$code.=<<___;
+.type  sha1_multi_block_avx2,\@function,3
+.align 32
+sha1_multi_block_avx2:
+_avx2_shortcut:
+       mov     %rsp,%rax
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+___
+$code.=<<___ if ($win64);
+       lea     -0xa8(%rsp),%rsp
+       movaps  %xmm6,(%rsp)
+       movaps  %xmm7,0x10(%rsp)
+       movaps  %xmm8,0x20(%rsp)
+       movaps  %xmm9,0x30(%rsp)
+       movaps  %xmm10,0x40(%rsp)
+       movaps  %xmm11,0x50(%rsp)
+       movaps  %xmm12,-0x78(%rax)
+       movaps  %xmm13,-0x68(%rax)
+       movaps  %xmm14,-0x58(%rax)
+       movaps  %xmm15,-0x48(%rax)
+___
+$code.=<<___;
+       sub     \$`$REG_SZ*18`, %rsp
+       and     \$-256,%rsp
+       mov     %rax,`$REG_SZ*17`(%rsp)         # original %rsp
+.Lbody_avx2:
+       lea     K_XX_XX(%rip),$Tbl
+       shr     \$1,$num
+
+       vzeroupper
+.Loop_grande_avx2:
+       mov     $num,`$REG_SZ*17+8`(%rsp)       # original $num
+       xor     $num,$num
+       lea     `$REG_SZ*16`(%rsp),%rbx
+___
+for($i=0;$i<8;$i++) {
+    $code.=<<___;
+       mov     `16*$i+0`($inp),@ptr[$i]        # input pointer
+       mov     `16*$i+8`($inp),%ecx            # number of blocks
+       cmp     $num,%ecx
+       cmovg   %ecx,$num                       # find maximum
+       test    %ecx,%ecx
+       mov     %ecx,`4*$i`(%rbx)               # initialize counters
+       cmovle  $Tbl,@ptr[$i]                   # cancel input
+___
+}
+$code.=<<___;
+       vmovdqu 0x00($ctx),$A                   # load context
+        lea    128(%rsp),%rax
+       vmovdqu 0x20($ctx),$B
+        lea    256+128(%rsp),%rbx
+       vmovdqu 0x40($ctx),$C
+       vmovdqu 0x60($ctx),$D
+       vmovdqu 0x80($ctx),$E
+       vmovdqu 0x60($Tbl),$tx                  # pbswap_mask
+       jmp     .Loop_avx2
+
+.align 32
+.Loop_avx2:
+___
+$code.="       vmovdqa -0x20($Tbl),$K\n";      # K_00_19
+for($i=0;$i<20;$i++)   { &BODY_00_19_avx($i,@V); unshift(@V,pop(@V)); }
+$code.="       vmovdqa 0x00($Tbl),$K\n";       # K_20_39
+for(;$i<40;$i++)       { &BODY_20_39_avx($i,@V); unshift(@V,pop(@V)); }
+$code.="       vmovdqa 0x20($Tbl),$K\n";       # K_40_59
+for(;$i<60;$i++)       { &BODY_40_59_avx($i,@V); unshift(@V,pop(@V)); }
+$code.="       vmovdqa 0x40($Tbl),$K\n";       # K_60_79
+for(;$i<80;$i++)       { &BODY_20_39_avx($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+       mov     \$1,%ecx
+       lea     `$REG_SZ*16`(%rsp),%rbx
+___
+for($i=0;$i<8;$i++) {
+    $code.=<<___;
+       cmp     `4*$i`(%rbx),%ecx               # examine counters
+       cmovge  $Tbl,@ptr[$i]                   # cancel input
+___
+}
+$code.=<<___;
+       vmovdqu (%rbx),$t0              # pull counters
+       vpxor   $t2,$t2,$t2
+       vmovdqa $t0,$t1
+       vpcmpgtd $t2,$t1,$t1                    # mask value
+       vpaddd  $t1,$t0,$t0                     # counters--
+
+       vpand   $t1,$A,$A
+       vpand   $t1,$B,$B
+       vpaddd  0x00($ctx),$A,$A
+       vpand   $t1,$C,$C
+       vpaddd  0x20($ctx),$B,$B
+       vpand   $t1,$D,$D
+       vpaddd  0x40($ctx),$C,$C
+       vpand   $t1,$E,$E
+       vpaddd  0x60($ctx),$D,$D
+       vpaddd  0x80($ctx),$E,$E
+       vmovdqu $A,0x00($ctx)
+       vmovdqu $B,0x20($ctx)
+       vmovdqu $C,0x40($ctx)
+       vmovdqu $D,0x60($ctx)
+       vmovdqu $E,0x80($ctx)
+
+       vmovdqu $t0,(%rbx)                      # save counters
+       lea     256+128(%rsp),%rbx
+       vmovdqu 0x60($Tbl),$tx                  # pbswap_mask
+       dec     $num
+       jnz     .Loop_avx2
+
+       #mov    `$REG_SZ*17+8`(%rsp),$num
+       #lea    $REG_SZ($ctx),$ctx
+       #lea    `16*$REG_SZ/4`($inp),$inp
+       #dec    $num
+       #jnz    .Loop_grande_avx2
+
+.Ldone_avx2:
+       mov     `$REG_SZ*17`(%rsp),%rax         # orignal %rsp
+       vzeroupper
+___
+$code.=<<___ if ($win64);
+       movaps  -0xd8(%rax),%xmm6
+       movaps  -0xc8(%rax),%xmm7
+       movaps  -0xb8(%rax),%xmm8
+       movaps  -0xa8(%rax),%xmm9
+       movaps  -0x98(%rax),%xmm10
+       movaps  -0x88(%rax),%xmm11
+       movaps  -0x78(%rax),%xmm12
+       movaps  -0x68(%rax),%xmm13
+       movaps  -0x58(%rax),%xmm14
+       movaps  -0x48(%rax),%xmm15
+___
+$code.=<<___;
+       mov     -48(%rax),%r15
+       mov     -40(%rax),%r14
+       mov     -32(%rax),%r13
+       mov     -24(%rax),%r12
+       mov     -16(%rax),%rbp
+       mov     -8(%rax),%rbx
+       lea     (%rax),%rsp
+.Lepilogue_avx2:
+       ret
+.size  sha1_multi_block_avx2,.-sha1_multi_block_avx2
+___
+                                               }       }}}
+$code.=<<___;
+
+.align 256
+       .long   0x5a827999,0x5a827999,0x5a827999,0x5a827999     # K_00_19
+       .long   0x5a827999,0x5a827999,0x5a827999,0x5a827999     # K_00_19
+K_XX_XX:
+       .long   0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1     # K_20_39
+       .long   0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1     # K_20_39
+       .long   0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc     # K_40_59
+       .long   0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc     # K_40_59
+       .long   0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6     # K_60_79
+       .long   0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6     # K_60_79
+       .long   0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f     # pbswap
+       .long   0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f     # pbswap
+       .byte   0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0
+       .asciz  "SHA1 multi-block transform for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
+___
+
+if ($win64) {
+# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
+#              CONTEXT *context,DISPATCHER_CONTEXT *disp)
+$rec="%rcx";
+$frame="%rdx";
+$context="%r8";
+$disp="%r9";
+
+$code.=<<___;
+.extern        __imp_RtlVirtualUnwind
+.type  se_handler,\@abi-omnipotent
+.align 16
+se_handler:
+       push    %rsi
+       push    %rdi
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+       pushfq
+       sub     \$64,%rsp
+
+       mov     120($context),%rax      # pull context->Rax
+       mov     248($context),%rbx      # pull context->Rip
+
+       mov     8($disp),%rsi           # disp->ImageBase
+       mov     56($disp),%r11          # disp->HandlerData
+
+       mov     0(%r11),%r10d           # HandlerData[0]
+       lea     (%rsi,%r10),%r10        # end of prologue label
+       cmp     %r10,%rbx               # context->Rip<.Lbody
+       jb      .Lin_prologue
+
+       mov     152($context),%rax      # pull context->Rsp
+
+       mov     4(%r11),%r10d           # HandlerData[1]
+       lea     (%rsi,%r10),%r10        # epilogue label
+       cmp     %r10,%rbx               # context->Rip>=.Lepilogue
+       jae     .Lin_prologue
+
+       mov     `16*17`(%rax),%rax      # pull saved stack pointer
+
+       mov     -8(%rax),%rbx
+       mov     -16(%rax),%rbp
+       mov     %rbx,144($context)      # restore context->Rbx
+       mov     %rbp,160($context)      # restore context->Rbp
+
+       lea     -24-10*16(%rax),%rsi
+       lea     512($context),%rdi      # &context.Xmm6
+       mov     \$20,%ecx
+       .long   0xa548f3fc              # cld; rep movsq
+
+.Lin_prologue:
+       mov     8(%rax),%rdi
+       mov     16(%rax),%rsi
+       mov     %rax,152($context)      # restore context->Rsp
+       mov     %rsi,168($context)      # restore context->Rsi
+       mov     %rdi,176($context)      # restore context->Rdi
+
+       mov     40($disp),%rdi          # disp->ContextRecord
+       mov     $context,%rsi           # context
+       mov     \$154,%ecx              # sizeof(CONTEXT)
+       .long   0xa548f3fc              # cld; rep movsq
+
+       mov     $disp,%rsi
+       xor     %rcx,%rcx               # arg1, UNW_FLAG_NHANDLER
+       mov     8(%rsi),%rdx            # arg2, disp->ImageBase
+       mov     0(%rsi),%r8             # arg3, disp->ControlPc
+       mov     16(%rsi),%r9            # arg4, disp->FunctionEntry
+       mov     40(%rsi),%r10           # disp->ContextRecord
+       lea     56(%rsi),%r11           # &disp->HandlerData
+       lea     24(%rsi),%r12           # &disp->EstablisherFrame
+       mov     %r10,32(%rsp)           # arg5
+       mov     %r11,40(%rsp)           # arg6
+       mov     %r12,48(%rsp)           # arg7
+       mov     %rcx,56(%rsp)           # arg8, (NULL)
+       call    *__imp_RtlVirtualUnwind(%rip)
+
+       mov     \$1,%eax                # ExceptionContinueSearch
+       add     \$64,%rsp
+       popfq
+       pop     %r15
+       pop     %r14
+       pop     %r13
+       pop     %r12
+       pop     %rbp
+       pop     %rbx
+       pop     %rdi
+       pop     %rsi
+       ret
+.size  se_handler,.-se_handler
+___
+$code.=<<___ if ($avx>1);
+.type  avx2_handler,\@abi-omnipotent
+.align 16
+avx2_handler:
+       push    %rsi
+       push    %rdi
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+       pushfq
+       sub     \$64,%rsp
+
+       mov     120($context),%rax      # pull context->Rax
+       mov     248($context),%rbx      # pull context->Rip
+
+       mov     8($disp),%rsi           # disp->ImageBase
+       mov     56($disp),%r11          # disp->HandlerData
+
+       mov     0(%r11),%r10d           # HandlerData[0]
+       lea     (%rsi,%r10),%r10        # end of prologue label
+       cmp     %r10,%rbx               # context->Rip<body label
+       jb      .Lin_prologue
+
+       mov     152($context),%rax      # pull context->Rsp
+
+       mov     4(%r11),%r10d           # HandlerData[1]
+       lea     (%rsi,%r10),%r10        # epilogue label
+       cmp     %r10,%rbx               # context->Rip>=epilogue label
+       jae     .Lin_prologue
+
+       mov     `32*17`($context),%rax  # pull saved stack pointer
+
+       mov     -8(%rax),%rbx
+       mov     -16(%rax),%rbp
+       mov     -24(%rax),%r12
+       mov     -32(%rax),%r13
+       mov     -40(%rax),%r14
+       mov     -48(%rax),%r15
+       mov     %rbx,144($context)      # restore context->Rbx
+       mov     %rbp,160($context)      # restore context->Rbp
+       mov     %r12,216($context)      # restore cotnext->R12
+       mov     %r13,224($context)      # restore cotnext->R13
+       mov     %r14,232($context)      # restore cotnext->R14
+       mov     %r15,240($context)      # restore cotnext->R15
+
+       lea     -56-10*16(%rax),%rsi
+       lea     512($context),%rdi      # &context.Xmm6
+       mov     \$20,%ecx
+       .long   0xa548f3fc              # cld; rep movsq
+
+       jmp     .Lin_prologue
+.size  avx2_handler,.-avx2_handler
+___
+$code.=<<___;
+.section       .pdata
+.align 4
+       .rva    .LSEH_begin_sha1_multi_block
+       .rva    .LSEH_end_sha1_multi_block
+       .rva    .LSEH_info_sha1_multi_block
+       .rva    .LSEH_begin_sha1_multi_block_shaext
+       .rva    .LSEH_end_sha1_multi_block_shaext
+       .rva    .LSEH_info_sha1_multi_block_shaext
+___
+$code.=<<___ if ($avx);
+       .rva    .LSEH_begin_sha1_multi_block_avx
+       .rva    .LSEH_end_sha1_multi_block_avx
+       .rva    .LSEH_info_sha1_multi_block_avx
+___
+$code.=<<___ if ($avx>1);
+       .rva    .LSEH_begin_sha1_multi_block_avx2
+       .rva    .LSEH_end_sha1_multi_block_avx2
+       .rva    .LSEH_info_sha1_multi_block_avx2
+___
+$code.=<<___;
+.section       .xdata
+.align 8
+.LSEH_info_sha1_multi_block:
+       .byte   9,0,0,0
+       .rva    se_handler
+       .rva    .Lbody,.Lepilogue                       # HandlerData[]
+.LSEH_info_sha1_multi_block_shaext:
+       .byte   9,0,0,0
+       .rva    se_handler
+       .rva    .Lbody_shaext,.Lepilogue_shaext # HandlerData[]
+___
+$code.=<<___ if ($avx);
+.LSEH_info_sha1_multi_block_avx:
+       .byte   9,0,0,0
+       .rva    se_handler
+       .rva    .Lbody_avx,.Lepilogue_avx               # HandlerData[]
+___
+$code.=<<___ if ($avx>1);
+.LSEH_info_sha1_multi_block_avx2:
+       .byte   9,0,0,0
+       .rva    avx2_handler
+       .rva    .Lbody_avx2,.Lepilogue_avx2             # HandlerData[]
+___
+}
+####################################################################
+
+sub rex {
+  local *opcode=shift;
+  my ($dst,$src)=@_;
+  my $rex=0;
+
+    $rex|=0x04                 if ($dst>=8);
+    $rex|=0x01                 if ($src>=8);
+    unshift @opcode,$rex|0x40  if ($rex);
+}
+
+sub sha1rnds4 {
+    if (@_[0] =~ /\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) {
+      my @opcode=(0x0f,0x3a,0xcc);
+       rex(\@opcode,$3,$2);
+       push @opcode,0xc0|($2&7)|(($3&7)<<3);           # ModR/M
+       my $c=$1;
+       push @opcode,$c=~/^0/?oct($c):$c;
+       return ".byte\t".join(',',@opcode);
+    } else {
+       return "sha1rnds4\t".@_[0];
+    }
+}
+
+sub sha1op38 {
+    my $instr = shift;
+    my %opcodelet = (
+               "sha1nexte" => 0xc8,
+               "sha1msg1"  => 0xc9,
+               "sha1msg2"  => 0xca     );
+
+    if (defined($opcodelet{$instr}) && @_[0] =~ /%xmm([0-9]+),\s*%xmm([0-9]+)/) {
+      my @opcode=(0x0f,0x38);
+       rex(\@opcode,$2,$1);
+       push @opcode,$opcodelet{$instr};
+       push @opcode,0xc0|($1&7)|(($2&7)<<3);           # ModR/M
+       return ".byte\t".join(',',@opcode);
+    } else {
+       return $instr."\t".@_[0];
+    }
+}
+
+foreach (split("\n",$code)) {
+       s/\`([^\`]*)\`/eval($1)/ge;
+
+       s/\b(sha1rnds4)\s+(.*)/sha1rnds4($2)/geo                or
+       s/\b(sha1[^\s]*)\s+(.*)/sha1op38($1,$2)/geo             or
+
+       s/\b(vmov[dq])\b(.+)%ymm([0-9]+)/$1$2%xmm$3/go          or
+       s/\b(vmovdqu)\b(.+)%x%ymm([0-9]+)/$1$2%xmm$3/go         or
+       s/\b(vpinsr[qd])\b(.+)%ymm([0-9]+),%ymm([0-9]+)/$1$2%xmm$3,%xmm$4/go    or
+       s/\b(vpextr[qd])\b(.+)%ymm([0-9]+)/$1$2%xmm$3/go        or
+       s/\b(vinserti128)\b(\s+)%ymm/$1$2\$1,%xmm/go            or
+       s/\b(vpbroadcast[qd]\s+)%ymm([0-9]+)/$1%xmm$2/go;
+
+       print $_,"\n";
+}
+
+close STDOUT;
index 197bc6b..3408493 100644 (file)
 # compatible subroutine. There is room for minor optimization on
 # little-endian platforms...
 
+# September 2012.
+#
+# Add MIPS32r2 code (>25% less instructions).
+
 ######################################################################
 # There is a number of MIPS ABI in use, O32 and N32/64 are most
 # widely used. Then there is a new contender: NUBI. It appears that if
@@ -42,7 +46,7 @@
 # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
 # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
 #
-$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64
+$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
 
 if ($flavour =~ /64|n32/i) {
        $PTR_ADD="dadd";        # incidentally works even on n32
@@ -95,6 +99,10 @@ sub BODY_00_14 {
 my ($i,$a,$b,$c,$d,$e)=@_;
 my $j=$i+1;
 $code.=<<___   if (!$big_endian);
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+       wsbh    @X[$i],@X[$i]   # byte swap($i)
+       rotr    @X[$i],@X[$i],16
+#else
        srl     $t0,@X[$i],24   # byte swap($i)
        srl     $t1,@X[$i],8
        andi    $t2,@X[$i],0xFF00
@@ -104,8 +112,22 @@ $code.=<<___       if (!$big_endian);
        or      @X[$i],$t0
        or      $t1,$t2
        or      @X[$i],$t1
+#endif
 ___
 $code.=<<___;
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+       addu    $e,$K           # $i
+       xor     $t0,$c,$d
+       rotr    $t1,$a,27
+        lwl    @X[$j],$j*4+$MSB($inp)
+       and     $t0,$b
+       addu    $e,$t1
+        lwr    @X[$j],$j*4+$LSB($inp)
+       xor     $t0,$d
+       addu    $e,@X[$i]
+       rotr    $b,$b,2
+       addu    $e,$t0
+#else
         lwl    @X[$j],$j*4+$MSB($inp)
        sll     $t0,$a,5        # $i
        addu    $e,$K
@@ -121,6 +143,7 @@ $code.=<<___;
        addu    $e,@X[$i]
        or      $b,$t2
        addu    $e,$t0
+#endif
 ___
 }
 
@@ -129,6 +152,10 @@ my ($i,$a,$b,$c,$d,$e)=@_;
 my $j=$i+1;
 
 $code.=<<___   if (!$big_endian && $i==15);
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+       wsbh    @X[$i],@X[$i]   # byte swap($i)
+       rotr    @X[$i],@X[$i],16
+#else
        srl     $t0,@X[$i],24   # byte swap($i)
        srl     $t1,@X[$i],8
        andi    $t2,@X[$i],0xFF00
@@ -138,8 +165,24 @@ $code.=<<___       if (!$big_endian && $i==15);
        or      @X[$i],$t0
        or      @X[$i],$t1
        or      @X[$i],$t2
+#endif
 ___
 $code.=<<___;
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+       addu    $e,$K           # $i
+        xor    @X[$j%16],@X[($j+2)%16]
+       xor     $t0,$c,$d
+       rotr    $t1,$a,27
+        xor    @X[$j%16],@X[($j+8)%16]
+       and     $t0,$b
+       addu    $e,$t1
+        xor    @X[$j%16],@X[($j+13)%16]
+       xor     $t0,$d
+       addu    $e,@X[$i%16]
+        rotr   @X[$j%16],@X[$j%16],31
+       rotr    $b,$b,2
+       addu    $e,$t0
+#else
         xor    @X[$j%16],@X[($j+2)%16]
        sll     $t0,$a,5        # $i
        addu    $e,$K
@@ -159,6 +202,7 @@ $code.=<<___;
        addu    $e,@X[$i%16]
        or      $b,$t2
        addu    $e,$t0
+#endif
 ___
 }
 
@@ -166,6 +210,20 @@ sub BODY_20_39 {
 my ($i,$a,$b,$c,$d,$e)=@_;
 my $j=$i+1;
 $code.=<<___ if ($i<79);
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+        xor    @X[$j%16],@X[($j+2)%16]
+       addu    $e,$K           # $i
+       rotr    $t1,$a,27
+        xor    @X[$j%16],@X[($j+8)%16]
+       xor     $t0,$c,$d
+       addu    $e,$t1
+        xor    @X[$j%16],@X[($j+13)%16]
+       xor     $t0,$b
+       addu    $e,@X[$i%16]
+        rotr   @X[$j%16],@X[$j%16],31
+       rotr    $b,$b,2
+       addu    $e,$t0
+#else
         xor    @X[$j%16],@X[($j+2)%16]
        sll     $t0,$a,5        # $i
        addu    $e,$K
@@ -184,8 +242,24 @@ $code.=<<___ if ($i<79);
         or     @X[$j%16],$t1
        or      $b,$t2
        addu    $e,$t0
+#endif
 ___
 $code.=<<___ if ($i==79);
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+        lw     @X[0],0($ctx)
+       addu    $e,$K           # $i
+        lw     @X[1],4($ctx)
+       rotr    $t1,$a,27
+        lw     @X[2],8($ctx)
+       xor     $t0,$c,$d
+       addu    $e,$t1
+        lw     @X[3],12($ctx)
+       xor     $t0,$b
+       addu    $e,@X[$i%16]
+        lw     @X[4],16($ctx)
+       rotr    $b,$b,2
+       addu    $e,$t0
+#else
         lw     @X[0],0($ctx)
        sll     $t0,$a,5        # $i
        addu    $e,$K
@@ -203,6 +277,7 @@ $code.=<<___ if ($i==79);
        addu    $e,@X[$i%16]
        or      $b,$t2
        addu    $e,$t0
+#endif
 ___
 }
 
@@ -210,6 +285,22 @@ sub BODY_40_59 {
 my ($i,$a,$b,$c,$d,$e)=@_;
 my $j=$i+1;
 $code.=<<___ if ($i<79);
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+       addu    $e,$K           # $i
+       and     $t0,$c,$d
+        xor    @X[$j%16],@X[($j+2)%16]
+       rotr    $t1,$a,27
+       addu    $e,$t0
+        xor    @X[$j%16],@X[($j+8)%16]
+       xor     $t0,$c,$d
+       addu    $e,$t1
+        xor    @X[$j%16],@X[($j+13)%16]
+       and     $t0,$b
+       addu    $e,@X[$i%16]
+        rotr   @X[$j%16],@X[$j%16],31
+       rotr    $b,$b,2
+       addu    $e,$t0
+#else
         xor    @X[$j%16],@X[($j+2)%16]
        sll     $t0,$a,5        # $i
        addu    $e,$K
@@ -230,6 +321,7 @@ $code.=<<___ if ($i<79);
        addu    $e,@X[$i%16]
        or      $b,$t2
        addu    $e,$t0
+#endif
 ___
 }
 
@@ -241,6 +333,10 @@ $code=<<___;
 # include <openssl/fipssyms.h>
 #endif
 
+#if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2)
+#define _MIPS_ARCH_MIPS32R2
+#endif
+
 .text
 
 .set   noat
index 2140dd2..df59896 100755 (executable)
@@ -9,8 +9,7 @@
 
 # I let hardware handle unaligned input(*), except on page boundaries
 # (see below for details). Otherwise straightforward implementation
-# with X vector in register bank. The module is big-endian [which is
-# not big deal as there're no little-endian targets left around].
+# with X vector in register bank.
 #
 # (*) this means that this module is inappropriate for PPC403? Does
 #     anybody know if pre-POWER3 can sustain unaligned load?
@@ -38,6 +37,10 @@ if ($flavour =~ /64/) {
        $PUSH   ="stw";
 } else { die "nonsense $flavour"; }
 
+# Define endianess based on flavour
+# i.e.: linux64le
+$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
+
 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
@@ -68,14 +71,28 @@ $T  ="r12";
 @X=("r16","r17","r18","r19","r20","r21","r22","r23",
     "r24","r25","r26","r27","r28","r29","r30","r31");
 
+sub loadbe {
+my ($dst, $src, $temp_reg) = @_;
+$code.=<<___ if (!$LITTLE_ENDIAN);
+       lwz     $dst,$src
+___
+$code.=<<___ if ($LITTLE_ENDIAN);
+       lwz     $temp_reg,$src
+       rotlwi  $dst,$temp_reg,8
+       rlwimi  $dst,$temp_reg,24,0,7
+       rlwimi  $dst,$temp_reg,24,16,23
+___
+}
+
 sub BODY_00_19 {
 my ($i,$a,$b,$c,$d,$e,$f)=@_;
 my $j=$i+1;
-$code.=<<___ if ($i==0);
-       lwz     @X[$i],`$i*4`($inp)
-___
+
+       # Since the last value of $f is discarded, we can use
+       # it as a temp reg to swap byte-order when needed.
+       loadbe("@X[$i]","`$i*4`($inp)",$f) if ($i==0);
+       loadbe("@X[$j]","`$j*4`($inp)",$f) if ($i<15);
 $code.=<<___ if ($i<15);
-       lwz     @X[$j],`$j*4`($inp)
        add     $f,$K,$e
        rotlwi  $e,$a,5
        add     $f,$f,@X[$i]
@@ -108,31 +125,31 @@ my ($i,$a,$b,$c,$d,$e,$f)=@_;
 my $j=$i+1;
 $code.=<<___ if ($i<79);
        add     $f,$K,$e
+       xor     $t0,$b,$d
        rotlwi  $e,$a,5
        xor     @X[$j%16],@X[$j%16],@X[($j+2)%16]
        add     $f,$f,@X[$i%16]
-       xor     $t0,$b,$c
+       xor     $t0,$t0,$c
        xor     @X[$j%16],@X[$j%16],@X[($j+8)%16]
-       add     $f,$f,$e
+       add     $f,$f,$t0
        rotlwi  $b,$b,30
-       xor     $t0,$t0,$d
        xor     @X[$j%16],@X[$j%16],@X[($j+13)%16]
-       add     $f,$f,$t0
+       add     $f,$f,$e
        rotlwi  @X[$j%16],@X[$j%16],1
 ___
 $code.=<<___ if ($i==79);
        add     $f,$K,$e
+       xor     $t0,$b,$d
        rotlwi  $e,$a,5
        lwz     r16,0($ctx)
        add     $f,$f,@X[$i%16]
-       xor     $t0,$b,$c
+       xor     $t0,$t0,$c
        lwz     r17,4($ctx)
-       add     $f,$f,$e
+       add     $f,$f,$t0
        rotlwi  $b,$b,30
        lwz     r18,8($ctx)
-       xor     $t0,$t0,$d
        lwz     r19,12($ctx)
-       add     $f,$f,$t0
+       add     $f,$f,$e
        lwz     r20,16($ctx)
 ___
 }
@@ -316,6 +333,7 @@ $code.=<<___;
        blr
        .long   0
        .byte   0,12,0x14,0,0,0,0,0
+.size  .sha1_block_data_order,.-.sha1_block_data_order
 ___
 $code.=<<___;
 .asciz "SHA1 block transform for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>"
index 5c161ce..b5efcde 100644 (file)
@@ -5,6 +5,8 @@
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
 # details see http://www.openssl.org/~appro/cryptogams/.
+#
+# Hardware SPARC T4 support by David S. Miller <davem@davemloft.net>.
 # ====================================================================
 
 # Performance improvement is not really impressive on pre-T1 CPU: +8%
 # ensure scalability on UltraSPARC T1, or rather to avoid decay when
 # amount of active threads exceeds the number of physical cores.
 
-$bits=32;
-for (@ARGV)    { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
-if ($bits==64) { $bias=2047; $frame=192; }
-else           { $bias=0;    $frame=112; }
+# SPARC T4 SHA1 hardware achieves 3.72 cycles per byte, which is 3.1x
+# faster than software. Multi-process benchmark saturates at 11x
+# single-process result on 8-core processor, or ~9GBps per 2.85GHz
+# socket.
 
 $output=shift;
 open STDOUT,">$output";
@@ -178,17 +180,102 @@ $code.=<<___;
 ___
 }
 
-$code.=<<___ if ($bits==64);
+$code.=<<___;
+#include "sparc_arch.h"
+
+#ifdef __arch64__
 .register      %g2,#scratch
 .register      %g3,#scratch
-___
-$code.=<<___;
+#endif
+
 .section       ".text",#alloc,#execinstr
 
+#ifdef __PIC__
+SPARC_PIC_THUNK(%g1)
+#endif
+
 .align 32
 .globl sha1_block_data_order
 sha1_block_data_order:
-       save    %sp,-$frame,%sp
+       SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
+       ld      [%g1+4],%g1             ! OPENSSL_sparcv9cap_P[1]
+
+       andcc   %g1, CFR_SHA1, %g0
+       be      .Lsoftware
+       nop
+
+       ld      [%o0 + 0x00], %f0       ! load context
+       ld      [%o0 + 0x04], %f1
+       ld      [%o0 + 0x08], %f2
+       andcc   %o1, 0x7, %g0
+       ld      [%o0 + 0x0c], %f3
+       bne,pn  %icc, .Lhwunaligned
+        ld     [%o0 + 0x10], %f4
+
+.Lhw_loop:
+       ldd     [%o1 + 0x00], %f8
+       ldd     [%o1 + 0x08], %f10
+       ldd     [%o1 + 0x10], %f12
+       ldd     [%o1 + 0x18], %f14
+       ldd     [%o1 + 0x20], %f16
+       ldd     [%o1 + 0x28], %f18
+       ldd     [%o1 + 0x30], %f20
+       subcc   %o2, 1, %o2             ! done yet? 
+       ldd     [%o1 + 0x38], %f22
+       add     %o1, 0x40, %o1
+       prefetch [%o1 + 63], 20
+
+       .word   0x81b02820              ! SHA1
+
+       bne,pt  SIZE_T_CC, .Lhw_loop
+       nop
+
+.Lhwfinish:
+       st      %f0, [%o0 + 0x00]       ! store context
+       st      %f1, [%o0 + 0x04]
+       st      %f2, [%o0 + 0x08]
+       st      %f3, [%o0 + 0x0c]
+       retl
+       st      %f4, [%o0 + 0x10]
+
+.align 8
+.Lhwunaligned:
+       alignaddr %o1, %g0, %o1
+
+       ldd     [%o1 + 0x00], %f10
+.Lhwunaligned_loop:
+       ldd     [%o1 + 0x08], %f12
+       ldd     [%o1 + 0x10], %f14
+       ldd     [%o1 + 0x18], %f16
+       ldd     [%o1 + 0x20], %f18
+       ldd     [%o1 + 0x28], %f20
+       ldd     [%o1 + 0x30], %f22
+       ldd     [%o1 + 0x38], %f24
+       subcc   %o2, 1, %o2             ! done yet?
+       ldd     [%o1 + 0x40], %f26
+       add     %o1, 0x40, %o1
+       prefetch [%o1 + 63], 20
+
+       faligndata %f10, %f12, %f8
+       faligndata %f12, %f14, %f10
+       faligndata %f14, %f16, %f12
+       faligndata %f16, %f18, %f14
+       faligndata %f18, %f20, %f16
+       faligndata %f20, %f22, %f18
+       faligndata %f22, %f24, %f20
+       faligndata %f24, %f26, %f22
+
+       .word   0x81b02820              ! SHA1
+
+       bne,pt  SIZE_T_CC, .Lhwunaligned_loop
+       for     %f26, %f26, %f10        ! %f10=%f26
+
+       ba      .Lhwfinish
+       nop
+
+.align 16
+.Lsoftware:
+       save    %sp,-STACK_FRAME,%sp
        sllx    $len,6,$len
        add     $inp,$len,$len
 
@@ -268,7 +355,7 @@ $code.=<<___;
        add     $E,@X[4],$E
        st      $E,[$ctx+16]
 
-       bne     `$bits==64?"%xcc":"%icc"`,.Lloop
+       bne     SIZE_T_CC,.Lloop
        andn    $inp,7,$tmp0
 
        ret
@@ -279,6 +366,62 @@ $code.=<<___;
 .align 4
 ___
 
-$code =~ s/\`([^\`]*)\`/eval $1/gem;
-print $code;
+# Purpose of these subroutines is to explicitly encode VIS instructions,
+# so that one can compile the module without having to specify VIS
+# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
+# Idea is to reserve for option to produce "universal" binary and let
+# programmer detect if current CPU is VIS capable at run-time.
+sub unvis {
+my ($mnemonic,$rs1,$rs2,$rd)=@_;
+my $ref,$opf;
+my %visopf = ( "faligndata"    => 0x048,
+               "for"           => 0x07c        );
+
+    $ref = "$mnemonic\t$rs1,$rs2,$rd";
+
+    if ($opf=$visopf{$mnemonic}) {
+       foreach ($rs1,$rs2,$rd) {
+           return $ref if (!/%f([0-9]{1,2})/);
+           $_=$1;
+           if ($1>=32) {
+               return $ref if ($1&1);
+               # re-encode for upper double register addressing
+               $_=($1|$1>>5)&31;
+           }
+       }
+
+       return  sprintf ".word\t0x%08x !%s",
+                       0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2,
+                       $ref;
+    } else {
+       return $ref;
+    }
+}
+sub unalignaddr {
+my ($mnemonic,$rs1,$rs2,$rd)=@_;
+my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 );
+my $ref="$mnemonic\t$rs1,$rs2,$rd";
+
+    foreach ($rs1,$rs2,$rd) {
+       if (/%([goli])([0-7])/) { $_=$bias{$1}+$2; }
+       else                    { return $ref; }
+    }
+    return  sprintf ".word\t0x%08x !%s",
+                   0x81b00300|$rd<<25|$rs1<<14|$rs2,
+                   $ref;
+}
+
+foreach (split("\n",$code)) {
+       s/\`([^\`]*)\`/eval $1/ge;
+
+       s/\b(f[^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/
+               &unvis($1,$2,$3,$4)
+        /ge;
+       s/\b(alignaddr)\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/
+               &unalignaddr($1,$2,$3,$4)
+        /ge;
+
+       print $_,"\n";
+}
+
 close STDOUT;
index f15c7ec..9bb6b49 100755 (executable)
@@ -1,7 +1,7 @@
 #!/usr/bin/env perl
 #
 # ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
 # details see http://www.openssl.org/~appro/cryptogams/.
 #
 # Add AVX code path. See sha1-586.pl for further information.
 
+# May 2013.
+#
+# Add AVX2+BMI code path. Initial attempt (utilizing BMI instructions
+# and loading pair of consecutive blocks to 256-bit %ymm registers)
+# did not provide impressive performance improvement till a crucial
+# hint regarding the number of Xupdate iterations to pre-compute in
+# advance was provided by Ilya Albrekht of Intel Corp.
+
+# March 2014.
+#
+# Add support for Intel SHA Extensions.
+
 ######################################################################
 # Current performance is summarized in following table. Numbers are
 # CPU clock cycles spent to process single byte (less is better).
 #
-#              x86_64          SSSE3           AVX
-# P4           9.8             -
-# Opteron      6.6             -
-# Core2                6.7             6.1/+10%        -
-# Atom         11.0            9.7/+13%        -
-# Westmere     7.1             5.6/+27%        -
-# Sandy Bridge 7.9             6.3/+25%        5.2/+51%
+#              x86_64          SSSE3           AVX[2]
+# P4           9.05            -
+# Opteron      6.26            -
+# Core2                6.55            6.05/+8%        -
+# Westmere     6.73            5.30/+27%       -
+# Sandy Bridge 7.70            6.10/+26%       4.99/+54%
+# Ivy Bridge   6.06            4.67/+30%       4.60/+32%
+# Haswell      5.45            4.15/+31%       3.57/+53%
+# Bulldozer    9.11            5.95/+53%
+# VIA Nano     9.32            7.15/+30%
+# Atom         10.3            9.17/+12%
+# Silvermont   13.1(*)         9.37/+40%
+#
+# (*)  obviously suboptimal result, nothing was done about it,
+#      because SSSE3 code is compiled unconditionally;
 
 $flavour = shift;
 $output  = shift;
@@ -72,15 +92,27 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
 die "can't locate x86_64-xlate.pl";
 
-$avx=1 if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
-               =~ /GNU assembler version ([2-9]\.[0-9]+)/ &&
-          $1>=2.19);
-$avx=1 if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
-          `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/ &&
-          $1>=2.09);
-$avx=1 if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
-          `ml64 2>&1` =~ /Version ([0-9]+)\./ &&
-          $1>=10);
+if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
+               =~ /GNU assembler version ([2-9]\.[0-9]+)/) {
+       $avx = ($1>=2.19) + ($1>=2.22);
+}
+
+if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
+          `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) {
+       $avx = ($1>=2.09) + ($1>=2.10);
+}
+
+if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
+          `ml64 2>&1` =~ /Version ([0-9]+)\./) {
+       $avx = ($1>=10) + ($1>=11);
+}
+
+if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([2-9]\.[0-9]+)/) {
+       $avx = ($2>=3.0) + ($2>3.0);
+}
+
+$shaext=1;     ### set to zero if compiling for 1.0.1
+$avx=1         if (!$shaext && $avx);
 
 open OUT,"| \"$^X\" $xlate $flavour $output";
 *STDOUT=*OUT;
@@ -97,7 +129,7 @@ $num="%r10";
 $t0="%eax";
 $t1="%ebx";
 $t2="%ecx";
-@xi=("%edx","%ebp");
+@xi=("%edx","%ebp","%r14d");
 $A="%esi";
 $B="%edi";
 $C="%r11d";
@@ -112,42 +144,40 @@ my $j=$i+1;
 $code.=<<___ if ($i==0);
        mov     `4*$i`($inp),$xi[0]
        bswap   $xi[0]
-       mov     $xi[0],`4*$i`(%rsp)
 ___
 $code.=<<___ if ($i<15);
-       mov     $c,$t0
        mov     `4*$j`($inp),$xi[1]
+       mov     $d,$t0
+       mov     $xi[0],`4*$i`(%rsp)
        mov     $a,$t2
-       xor     $d,$t0
        bswap   $xi[1]
+       xor     $c,$t0
        rol     \$5,$t2
-       lea     0x5a827999($xi[0],$e),$e
        and     $b,$t0
-       mov     $xi[1],`4*$j`(%rsp)
+       lea     0x5a827999($xi[0],$e),$e
        add     $t2,$e
        xor     $d,$t0
        rol     \$30,$b
        add     $t0,$e
 ___
 $code.=<<___ if ($i>=15);
-       mov     `4*($j%16)`(%rsp),$xi[1]
-       mov     $c,$t0
+       xor     `4*($j%16)`(%rsp),$xi[1]
+       mov     $d,$t0
+       mov     $xi[0],`4*($i%16)`(%rsp)
        mov     $a,$t2
        xor     `4*(($j+2)%16)`(%rsp),$xi[1]
-       xor     $d,$t0
+       xor     $c,$t0
        rol     \$5,$t2
        xor     `4*(($j+8)%16)`(%rsp),$xi[1]
        and     $b,$t0
        lea     0x5a827999($xi[0],$e),$e
-       xor     `4*(($j+13)%16)`(%rsp),$xi[1]
+       rol     \$30,$b
        xor     $d,$t0
-       rol     \$1,$xi[1]
        add     $t2,$e
-       rol     \$30,$b
-       mov     $xi[1],`4*($j%16)`(%rsp)
+       rol     \$1,$xi[1]
        add     $t0,$e
 ___
-unshift(@xi,pop(@xi));
+push(@xi,shift(@xi));
 }
 
 sub BODY_20_39 {
@@ -155,62 +185,58 @@ my ($i,$a,$b,$c,$d,$e)=@_;
 my $j=$i+1;
 my $K=($i<40)?0x6ed9eba1:0xca62c1d6;
 $code.=<<___ if ($i<79);
-       mov     `4*($j%16)`(%rsp),$xi[1]
-       mov     $c,$t0
+       xor     `4*($j%16)`(%rsp),$xi[1]
+       mov     $b,$t0
+       `"mov   $xi[0],".4*($i%16)."(%rsp)"     if ($i<72)`
        mov     $a,$t2
        xor     `4*(($j+2)%16)`(%rsp),$xi[1]
-       xor     $b,$t0
+       xor     $d,$t0
        rol     \$5,$t2
-       lea     $K($xi[0],$e),$e
        xor     `4*(($j+8)%16)`(%rsp),$xi[1]
-       xor     $d,$t0
+       lea     $K($xi[0],$e),$e
+       xor     $c,$t0
        add     $t2,$e
-       xor     `4*(($j+13)%16)`(%rsp),$xi[1]
        rol     \$30,$b
        add     $t0,$e
        rol     \$1,$xi[1]
 ___
-$code.=<<___ if ($i<76);
-       mov     $xi[1],`4*($j%16)`(%rsp)
-___
 $code.=<<___ if ($i==79);
-       mov     $c,$t0
+       mov     $b,$t0
        mov     $a,$t2
-       xor     $b,$t0
+       xor     $d,$t0
        lea     $K($xi[0],$e),$e
        rol     \$5,$t2
-       xor     $d,$t0
+       xor     $c,$t0
        add     $t2,$e
        rol     \$30,$b
        add     $t0,$e
 ___
-unshift(@xi,pop(@xi));
+push(@xi,shift(@xi));
 }
 
 sub BODY_40_59 {
 my ($i,$a,$b,$c,$d,$e)=@_;
 my $j=$i+1;
 $code.=<<___;
-       mov     `4*($j%16)`(%rsp),$xi[1]
-       mov     $c,$t0
-       mov     $c,$t1
+       xor     `4*($j%16)`(%rsp),$xi[1]
+       mov     $d,$t0
+       mov     $xi[0],`4*($i%16)`(%rsp)
+       mov     $d,$t1
        xor     `4*(($j+2)%16)`(%rsp),$xi[1]
-       and     $d,$t0
+       and     $c,$t0
        mov     $a,$t2
        xor     `4*(($j+8)%16)`(%rsp),$xi[1]
-       xor     $d,$t1
        lea     0x8f1bbcdc($xi[0],$e),$e
+       xor     $c,$t1
        rol     \$5,$t2
-       xor     `4*(($j+13)%16)`(%rsp),$xi[1]
        add     $t0,$e
-       and     $b,$t1
        rol     \$1,$xi[1]
-       add     $t1,$e
-       rol     \$30,$b
-       mov     $xi[1],`4*($j%16)`(%rsp)
+       and     $b,$t1
        add     $t2,$e
+       rol     \$30,$b
+       add     $t1,$e
 ___
-unshift(@xi,pop(@xi));
+push(@xi,shift(@xi));
 }
 
 $code.=<<___;
@@ -223,9 +249,19 @@ $code.=<<___;
 sha1_block_data_order:
        mov     OPENSSL_ia32cap_P+0(%rip),%r9d
        mov     OPENSSL_ia32cap_P+4(%rip),%r8d
+       mov     OPENSSL_ia32cap_P+8(%rip),%r10d
        test    \$`1<<9`,%r8d           # check SSSE3 bit
        jz      .Lialu
 ___
+$code.=<<___ if ($shaext);
+       test    \$`1<<29`,%r10d         # check SHA bit 
+       jnz     _shaext_shortcut
+___
+$code.=<<___ if ($avx>1);
+       and     \$`1<<3|1<<5|1<<8`,%r10d        # check AVX2+BMI1+BMI2
+       cmp     \$`1<<3|1<<5|1<<8`,%r10d
+       je      _avx2_shortcut
+___
 $code.=<<___ if ($avx);
        and     \$`1<<28`,%r8d          # mask AVX bit
        and     \$`1<<30`,%r9d          # mask "Intel CPU" bit
@@ -238,17 +274,18 @@ $code.=<<___;
 
 .align 16
 .Lialu:
+       mov     %rsp,%rax
        push    %rbx
        push    %rbp
        push    %r12
        push    %r13
-       mov     %rsp,%r11
+       push    %r14
        mov     %rdi,$ctx       # reassigned argument
        sub     \$`8+16*4`,%rsp
        mov     %rsi,$inp       # reassigned argument
        and     \$-64,%rsp
        mov     %rdx,$num       # reassigned argument
-       mov     %r11,`16*4`(%rsp)
+       mov     %rax,`16*4`(%rsp)
 .Lprologue:
 
        mov     0($ctx),$A
@@ -282,53 +319,187 @@ $code.=<<___;
        jnz     .Lloop
 
        mov     `16*4`(%rsp),%rsi
-       mov     (%rsi),%r13
-       mov     8(%rsi),%r12
-       mov     16(%rsi),%rbp
-       mov     24(%rsi),%rbx
-       lea     32(%rsi),%rsp
+       mov     -40(%rsi),%r14
+       mov     -32(%rsi),%r13
+       mov     -24(%rsi),%r12
+       mov     -16(%rsi),%rbp
+       mov     -8(%rsi),%rbx
+       lea     (%rsi),%rsp
 .Lepilogue:
        ret
 .size  sha1_block_data_order,.-sha1_block_data_order
 ___
+if ($shaext) {{{
+######################################################################
+# Intel SHA Extensions implementation of SHA1 update function.
+#
+my ($ctx,$inp,$num)=("%rdi","%rsi","%rdx");
+my ($ABCD,$E,$E_,$BSWAP,$ABCD_SAVE,$E_SAVE)=map("%xmm$_",(0..3,8,9));
+my @MSG=map("%xmm$_",(4..7));
+
+$code.=<<___;
+.type  sha1_block_data_order_shaext,\@function,3
+.align 32
+sha1_block_data_order_shaext:
+_shaext_shortcut:
+___
+$code.=<<___ if ($win64);
+       lea     `-8-4*16`(%rsp),%rsp
+       movaps  %xmm6,-8-4*16(%rax)
+       movaps  %xmm7,-8-3*16(%rax)
+       movaps  %xmm8,-8-2*16(%rax)
+       movaps  %xmm9,-8-1*16(%rax)
+.Lprologue_shaext:
+___
+$code.=<<___;
+       movdqu  ($ctx),$ABCD
+       movd    16($ctx),$E
+       movdqa  K_XX_XX+0xa0(%rip),$BSWAP       # byte-n-word swap
+
+       movdqu  ($inp),@MSG[0]
+       pshufd  \$0b00011011,$ABCD,$ABCD        # flip word order
+       movdqu  0x10($inp),@MSG[1]
+       pshufd  \$0b00011011,$E,$E              # flip word order
+       movdqu  0x20($inp),@MSG[2]
+       pshufb  $BSWAP,@MSG[0]
+       movdqu  0x30($inp),@MSG[3]
+       pshufb  $BSWAP,@MSG[1]
+       pshufb  $BSWAP,@MSG[2]
+       movdqa  $E,$E_SAVE                      # offload $E
+       pshufb  $BSWAP,@MSG[3]
+       jmp     .Loop_shaext
+
+.align 16
+.Loop_shaext:
+       dec             $num
+       lea             0x40($inp),%rax         # next input block
+       paddd           @MSG[0],$E
+       cmovne          %rax,$inp
+       movdqa          $ABCD,$ABCD_SAVE        # offload $ABCD
+___
+for($i=0;$i<20-4;$i+=2) {
+$code.=<<___;
+       sha1msg1        @MSG[1],@MSG[0]
+       movdqa          $ABCD,$E_
+       sha1rnds4       \$`int($i/5)`,$E,$ABCD  # 0-3...
+       sha1nexte       @MSG[1],$E_
+       pxor            @MSG[2],@MSG[0]
+       sha1msg1        @MSG[2],@MSG[1]
+       sha1msg2        @MSG[3],@MSG[0]
+
+       movdqa          $ABCD,$E
+       sha1rnds4       \$`int(($i+1)/5)`,$E_,$ABCD
+       sha1nexte       @MSG[2],$E
+       pxor            @MSG[3],@MSG[1]
+       sha1msg2        @MSG[0],@MSG[1]
+___
+       push(@MSG,shift(@MSG)); push(@MSG,shift(@MSG));
+}
+$code.=<<___;
+       movdqu          ($inp),@MSG[0]
+       movdqa          $ABCD,$E_
+       sha1rnds4       \$3,$E,$ABCD            # 64-67
+       sha1nexte       @MSG[1],$E_
+       movdqu          0x10($inp),@MSG[1]
+       pshufb          $BSWAP,@MSG[0]
+
+       movdqa          $ABCD,$E
+       sha1rnds4       \$3,$E_,$ABCD           # 68-71
+       sha1nexte       @MSG[2],$E
+       movdqu          0x20($inp),@MSG[2]
+       pshufb          $BSWAP,@MSG[1]
+
+       movdqa          $ABCD,$E_
+       sha1rnds4       \$3,$E,$ABCD            # 72-75
+       sha1nexte       @MSG[3],$E_
+       movdqu          0x30($inp),@MSG[3]
+       pshufb          $BSWAP,@MSG[2]
+
+       movdqa          $ABCD,$E
+       sha1rnds4       \$3,$E_,$ABCD           # 76-79
+       sha1nexte       $E_SAVE,$E
+       pshufb          $BSWAP,@MSG[3]
+
+       paddd           $ABCD_SAVE,$ABCD
+       movdqa          $E,$E_SAVE              # offload $E
+
+       jnz             .Loop_shaext
+
+       pshufd  \$0b00011011,$ABCD,$ABCD
+       pshufd  \$0b00011011,$E,$E
+       movdqu  $ABCD,($ctx)
+       movd    $E,16($ctx)
+___
+$code.=<<___ if ($win64);
+       movaps  -8-4*16(%rax),%xmm6
+       movaps  -8-3*16(%rax),%xmm7
+       movaps  -8-2*16(%rax),%xmm8
+       movaps  -8-1*16(%rax),%xmm9
+       mov     %rax,%rsp
+.Lepilogue_shaext:
+___
+$code.=<<___;
+       ret
+.size  sha1_block_data_order_shaext,.-sha1_block_data_order_shaext
+___
+}}}
 {{{
 my $Xi=4;
 my @X=map("%xmm$_",(4..7,0..3));
 my @Tx=map("%xmm$_",(8..10));
+my $Kx="%xmm11";
 my @V=($A,$B,$C,$D,$E)=("%eax","%ebx","%ecx","%edx","%ebp");   # size optimization
 my @T=("%esi","%edi");
 my $j=0;
+my $rx=0;
 my $K_XX_XX="%r11";
 
 my $_rol=sub { &rol(@_) };
 my $_ror=sub { &ror(@_) };
 
+{ my $sn;
+sub align32() {
+  ++$sn;
+$code.=<<___;
+       jmp     .Lalign32_$sn   # see "Decoded ICache" in manual
+.align 32
+.Lalign32_$sn:
+___
+}
+}
+
 $code.=<<___;
 .type  sha1_block_data_order_ssse3,\@function,3
 .align 16
 sha1_block_data_order_ssse3:
 _ssse3_shortcut:
+       mov     %rsp,%rax
        push    %rbx
        push    %rbp
        push    %r12
-       lea     `-64-($win64?5*16:0)`(%rsp),%rsp
+       push    %r13            # redundant, done to share Win64 SE handler
+       push    %r14
+       lea     `-64-($win64?6*16:0)`(%rsp),%rsp
 ___
 $code.=<<___ if ($win64);
-       movaps  %xmm6,64+0(%rsp)
-       movaps  %xmm7,64+16(%rsp)
-       movaps  %xmm8,64+32(%rsp)
-       movaps  %xmm9,64+48(%rsp)
-       movaps  %xmm10,64+64(%rsp)
+       movaps  %xmm6,-40-6*16(%rax)
+       movaps  %xmm7,-40-5*16(%rax)
+       movaps  %xmm8,-40-4*16(%rax)
+       movaps  %xmm9,-40-3*16(%rax)
+       movaps  %xmm10,-40-2*16(%rax)
+       movaps  %xmm11,-40-1*16(%rax)
 .Lprologue_ssse3:
 ___
 $code.=<<___;
+       mov     %rax,%r14       # original %rsp
+       and     \$-64,%rsp
        mov     %rdi,$ctx       # reassigned argument
        mov     %rsi,$inp       # reassigned argument
        mov     %rdx,$num       # reassigned argument
 
        shl     \$6,$num
        add     $inp,$num
-       lea     K_XX_XX(%rip),$K_XX_XX
+       lea     K_XX_XX+64(%rip),$K_XX_XX
 
        mov     0($ctx),$A              # load context
        mov     4($ctx),$B
@@ -336,19 +507,22 @@ $code.=<<___;
        mov     12($ctx),$D
        mov     $B,@T[0]                # magic seed
        mov     16($ctx),$E
+       mov     $C,@T[1]
+       xor     $D,@T[1]
+       and     @T[1],@T[0]
 
        movdqa  64($K_XX_XX),@X[2]      # pbswap mask
-       movdqa  0($K_XX_XX),@Tx[1]      # K_00_19
+       movdqa  -64($K_XX_XX),@Tx[1]    # K_00_19
        movdqu  0($inp),@X[-4&7]        # load input to %xmm[0-3]
        movdqu  16($inp),@X[-3&7]
        movdqu  32($inp),@X[-2&7]
        movdqu  48($inp),@X[-1&7]
        pshufb  @X[2],@X[-4&7]          # byte swap
-       add     \$64,$inp
        pshufb  @X[2],@X[-3&7]
        pshufb  @X[2],@X[-2&7]
-       pshufb  @X[2],@X[-1&7]
+       add     \$64,$inp
        paddd   @Tx[1],@X[-4&7]         # add K_00_19
+       pshufb  @X[2],@X[-1&7]
        paddd   @Tx[1],@X[-3&7]
        paddd   @Tx[1],@X[-2&7]
        movdqa  @X[-4&7],0(%rsp)        # X[]+K xfer to IALU
@@ -373,61 +547,61 @@ sub Xupdate_ssse3_16_31()         # recall that $Xi starts wtih 4
   my @insns = (&$body,&$body,&$body,&$body);   # 40 instructions
   my ($a,$b,$c,$d,$e);
 
-       &movdqa (@X[0],@X[-3&7]);
-        eval(shift(@insns));
+        eval(shift(@insns));           # ror
+       &pshufd (@X[0],@X[-4&7],0xee);  # was &movdqa   (@X[0],@X[-3&7]);
         eval(shift(@insns));
        &movdqa (@Tx[0],@X[-1&7]);
-       &palignr(@X[0],@X[-4&7],8);     # compose "X[-14]" in "X[0]"
+         &paddd        (@Tx[1],@X[-1&7]);
         eval(shift(@insns));
         eval(shift(@insns));
 
-         &paddd        (@Tx[1],@X[-1&7]);
+       &punpcklqdq(@X[0],@X[-3&7]);    # compose "X[-14]" in "X[0]", was &palignr(@X[0],@X[-4&7],8);
         eval(shift(@insns));
+        eval(shift(@insns));           # rol
         eval(shift(@insns));
        &psrldq (@Tx[0],4);             # "X[-3]", 3 dwords
         eval(shift(@insns));
         eval(shift(@insns));
+
        &pxor   (@X[0],@X[-4&7]);       # "X[0]"^="X[-16]"
         eval(shift(@insns));
-        eval(shift(@insns));
-
+        eval(shift(@insns));           # ror
        &pxor   (@Tx[0],@X[-2&7]);      # "X[-3]"^"X[-8]"
         eval(shift(@insns));
         eval(shift(@insns));
         eval(shift(@insns));
-        eval(shift(@insns));
 
        &pxor   (@X[0],@Tx[0]);         # "X[0]"^="X[-3]"^"X[-8]"
         eval(shift(@insns));
-        eval(shift(@insns));
+        eval(shift(@insns));           # rol
          &movdqa       (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU
         eval(shift(@insns));
         eval(shift(@insns));
 
        &movdqa (@Tx[2],@X[0]);
-       &movdqa (@Tx[0],@X[0]);
-        eval(shift(@insns));
         eval(shift(@insns));
         eval(shift(@insns));
+        eval(shift(@insns));           # ror
+       &movdqa (@Tx[0],@X[0]);
         eval(shift(@insns));
 
        &pslldq (@Tx[2],12);            # "X[0]"<<96, extract one dword
        &paddd  (@X[0],@X[0]);
         eval(shift(@insns));
         eval(shift(@insns));
-        eval(shift(@insns));
-        eval(shift(@insns));
 
        &psrld  (@Tx[0],31);
         eval(shift(@insns));
+        eval(shift(@insns));           # rol
         eval(shift(@insns));
        &movdqa (@Tx[1],@Tx[2]);
         eval(shift(@insns));
         eval(shift(@insns));
 
        &psrld  (@Tx[2],30);
-       &por    (@X[0],@Tx[0]);         # "X[0]"<<<=1
         eval(shift(@insns));
+        eval(shift(@insns));           # ror
+       &por    (@X[0],@Tx[0]);         # "X[0]"<<<=1
         eval(shift(@insns));
         eval(shift(@insns));
         eval(shift(@insns));
@@ -435,12 +609,13 @@ sub Xupdate_ssse3_16_31()         # recall that $Xi starts wtih 4
        &pslld  (@Tx[1],2);
        &pxor   (@X[0],@Tx[2]);
         eval(shift(@insns));
-        eval(shift(@insns));
-         &movdqa       (@Tx[2],eval(16*(($Xi)/5))."($K_XX_XX)");       # K_XX_XX
+         &movdqa       (@Tx[2],eval(2*16*(($Xi)/5)-64)."($K_XX_XX)");  # K_XX_XX
+        eval(shift(@insns));           # rol
         eval(shift(@insns));
         eval(shift(@insns));
 
        &pxor   (@X[0],@Tx[1]);         # "X[0]"^=("X[0]">>96)<<<2
+       &pshufd (@Tx[1],@X[-1&7],0xee)  if ($Xi==7);    # was &movdqa   (@Tx[0],@X[-1&7]) in Xupdate_ssse3_32_79
 
         foreach (@insns) { eval; }     # remaining instructions [if any]
 
@@ -451,27 +626,30 @@ sub Xupdate_ssse3_16_31()         # recall that $Xi starts wtih 4
 sub Xupdate_ssse3_32_79()
 { use integer;
   my $body = shift;
-  my @insns = (&$body,&$body,&$body,&$body);   # 32 to 48 instructions
+  my @insns = (&$body,&$body,&$body,&$body);   # 32 to 44 instructions
   my ($a,$b,$c,$d,$e);
 
-       &movdqa (@Tx[0],@X[-1&7])       if ($Xi==8);
-        eval(shift(@insns));           # body_20_39
+        eval(shift(@insns))            if ($Xi==8);
        &pxor   (@X[0],@X[-4&7]);       # "X[0]"="X[-32]"^"X[-16]"
-       &palignr(@Tx[0],@X[-2&7],8);    # compose "X[-6]"
+        eval(shift(@insns))            if ($Xi==8);
+        eval(shift(@insns));           # body_20_39
         eval(shift(@insns));
+        eval(shift(@insns))            if (@insns[1] =~ /_ror/);
+        eval(shift(@insns))            if (@insns[0] =~ /_ror/);
+       &punpcklqdq(@Tx[0],@X[-1&7]);   # compose "X[-6]", was &palignr(@Tx[0],@X[-2&7],8);
         eval(shift(@insns));
         eval(shift(@insns));           # rol
 
        &pxor   (@X[0],@X[-7&7]);       # "X[0]"^="X[-28]"
         eval(shift(@insns));
-        eval(shift(@insns))    if (@insns[0] !~ /&ro[rl]/);
+        eval(shift(@insns));
        if ($Xi%5) {
          &movdqa       (@Tx[2],@Tx[1]);# "perpetuate" K_XX_XX...
        } else {                        # ... or load next one
-         &movdqa       (@Tx[2],eval(16*($Xi/5))."($K_XX_XX)");
+         &movdqa       (@Tx[2],eval(2*16*($Xi/5)-64)."($K_XX_XX)");
        }
-         &paddd        (@Tx[1],@X[-1&7]);
         eval(shift(@insns));           # ror
+         &paddd        (@Tx[1],@X[-1&7]);
         eval(shift(@insns));
 
        &pxor   (@X[0],@Tx[0]);         # "X[0]"^="X[-6]"
@@ -479,29 +657,31 @@ sub Xupdate_ssse3_32_79()
         eval(shift(@insns));
         eval(shift(@insns));
         eval(shift(@insns));           # rol
+        eval(shift(@insns))            if (@insns[0] =~ /_ror/);
 
        &movdqa (@Tx[0],@X[0]);
-         &movdqa       (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU
         eval(shift(@insns));
         eval(shift(@insns));
+         &movdqa       (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU
         eval(shift(@insns));           # ror
         eval(shift(@insns));
+        eval(shift(@insns));           # body_20_39
 
        &pslld  (@X[0],2);
-        eval(shift(@insns));           # body_20_39
         eval(shift(@insns));
-       &psrld  (@Tx[0],30);
         eval(shift(@insns));
-        eval(shift(@insns));           # rol
+       &psrld  (@Tx[0],30);
+        eval(shift(@insns))            if (@insns[0] =~ /_rol/);# rol
         eval(shift(@insns));
         eval(shift(@insns));
         eval(shift(@insns));           # ror
-        eval(shift(@insns));
 
        &por    (@X[0],@Tx[0]);         # "X[0]"<<<=2
-        eval(shift(@insns));           # body_20_39
         eval(shift(@insns));
-         &movdqa       (@Tx[1],@X[0])  if ($Xi<19);
+        eval(shift(@insns));           # body_20_39
+        eval(shift(@insns))            if (@insns[1] =~ /_rol/);
+        eval(shift(@insns))            if (@insns[0] =~ /_rol/);
+         &pshufd(@Tx[1],@X[-1&7],0xee) if ($Xi<19);    # was &movdqa   (@Tx[1],@X[0])
         eval(shift(@insns));
         eval(shift(@insns));           # rol
         eval(shift(@insns));
@@ -522,10 +702,11 @@ sub Xuplast_ssse3_80()
   my ($a,$b,$c,$d,$e);
 
         eval(shift(@insns));
-         &paddd        (@Tx[1],@X[-1&7]);
         eval(shift(@insns));
         eval(shift(@insns));
         eval(shift(@insns));
+         &paddd        (@Tx[1],@X[-1&7]);
+        eval(shift(@insns));
         eval(shift(@insns));
 
          &movdqa       (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer IALU
@@ -538,7 +719,7 @@ sub Xuplast_ssse3_80()
        unshift(@Tx,pop(@Tx));
 
        &movdqa (@X[2],"64($K_XX_XX)");         # pbswap mask
-       &movdqa (@Tx[1],"0($K_XX_XX)");         # K_00_19
+       &movdqa (@Tx[1],"-64($K_XX_XX)");       # K_00_19
        &movdqu (@X[-4&7],"0($inp)");           # load input
        &movdqu (@X[-3&7],"16($inp)");
        &movdqu (@X[-2&7],"32($inp)");
@@ -557,9 +738,12 @@ sub Xloop_ssse3()
 
         eval(shift(@insns));
         eval(shift(@insns));
+        eval(shift(@insns));
        &pshufb (@X[($Xi-3)&7],@X[2]);
         eval(shift(@insns));
         eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
        &paddd  (@X[($Xi-4)&7],@Tx[1]);
         eval(shift(@insns));
         eval(shift(@insns));
@@ -568,6 +752,8 @@ sub Xloop_ssse3()
        &movdqa (eval(16*$Xi)."(%rsp)",@X[($Xi-4)&7]);  # X[]+K xfer to IALU
         eval(shift(@insns));
         eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
        &psubd  (@X[($Xi-4)&7],@Tx[1]);
 
        foreach (@insns) { eval; }
@@ -583,51 +769,66 @@ sub Xtail_ssse3()
        foreach (@insns) { eval; }
 }
 
-sub body_00_19 () {
+sub body_00_19 () {    # ((c^d)&b)^d
+       # on start @T[0]=(c^d)&b
+       return &body_20_39() if ($rx==19); $rx++;
        (
        '($a,$b,$c,$d,$e)=@V;'.
-       '&add   ($e,eval(4*($j&15))."(%rsp)");',        # X[]+K xfer
-       '&xor   ($c,$d);',
-       '&mov   (@T[1],$a);',   # $b in next round
-       '&$_rol ($a,5);',
-       '&and   (@T[0],$c);',   # ($b&($c^$d))
-       '&xor   ($c,$d);',      # restore $c
-       '&xor   (@T[0],$d);',
-       '&add   ($e,$a);',
-       '&$_ror ($b,$j?7:2);',  # $b>>>2
-       '&add   ($e,@T[0]);'    .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));'
+       '&$_ror ($b,$j?7:2)',   # $b>>>2
+       '&xor   (@T[0],$d)',
+       '&mov   (@T[1],$a)',    # $b for next round
+
+       '&add   ($e,eval(4*($j&15))."(%rsp)")', # X[]+K xfer
+       '&xor   ($b,$c)',       # $c^$d for next round
+
+       '&$_rol ($a,5)',
+       '&add   ($e,@T[0])',
+       '&and   (@T[1],$b)',    # ($b&($c^$d)) for next round
+
+       '&xor   ($b,$c)',       # restore $b
+       '&add   ($e,$a);'       .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));'
        );
 }
 
-sub body_20_39 () {
+sub body_20_39 () {    # b^d^c
+       # on entry @T[0]=b^d
+       return &body_40_59() if ($rx==39); $rx++;
        (
        '($a,$b,$c,$d,$e)=@V;'.
-       '&add   ($e,eval(4*($j++&15))."(%rsp)");',      # X[]+K xfer
-       '&xor   (@T[0],$d);',   # ($b^$d)
-       '&mov   (@T[1],$a);',   # $b in next round
-       '&$_rol ($a,5);',
-       '&xor   (@T[0],$c);',   # ($b^$d^$c)
-       '&add   ($e,$a);',
-       '&$_ror ($b,7);',       # $b>>>2
-       '&add   ($e,@T[0]);'    .'unshift(@V,pop(@V)); unshift(@T,pop(@T));'
+       '&add   ($e,eval(4*($j&15))."(%rsp)")', # X[]+K xfer
+       '&xor   (@T[0],$d)      if($j==19);'.
+       '&xor   (@T[0],$c)      if($j> 19)',    # ($b^$d^$c)
+       '&mov   (@T[1],$a)',    # $b for next round
+
+       '&$_rol ($a,5)',
+       '&add   ($e,@T[0])',
+       '&xor   (@T[1],$c)      if ($j< 79)',   # $b^$d for next round
+
+       '&$_ror ($b,7)',        # $b>>>2
+       '&add   ($e,$a);'       .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));'
        );
 }
 
-sub body_40_59 () {
+sub body_40_59 () {    # ((b^c)&(c^d))^c
+       # on entry @T[0]=(b^c), (c^=d)
+       $rx++;
        (
        '($a,$b,$c,$d,$e)=@V;'.
-       '&mov   (@T[1],$c);',
-       '&xor   ($c,$d);',
-       '&add   ($e,eval(4*($j++&15))."(%rsp)");',      # X[]+K xfer
-       '&and   (@T[1],$d);',
-       '&and   (@T[0],$c);',   # ($b&($c^$d))
-       '&$_ror ($b,7);',       # $b>>>2
-       '&add   ($e,@T[1]);',
-       '&mov   (@T[1],$a);',   # $b in next round
-       '&$_rol ($a,5);',
-       '&add   ($e,@T[0]);',
-       '&xor   ($c,$d);',      # restore $c
-       '&add   ($e,$a);'       .'unshift(@V,pop(@V)); unshift(@T,pop(@T));'
+       '&add   ($e,eval(4*($j&15))."(%rsp)")', # X[]+K xfer
+       '&and   (@T[0],$c)      if ($j>=40)',   # (b^c)&(c^d)
+       '&xor   ($c,$d)         if ($j>=40)',   # restore $c
+
+       '&$_ror ($b,7)',        # $b>>>2
+       '&mov   (@T[1],$a)',    # $b for next round
+       '&xor   (@T[0],$c)',
+
+       '&$_rol ($a,5)',
+       '&add   ($e,@T[0])',
+       '&xor   (@T[1],$c)      if ($j==59);'.
+       '&xor   (@T[1],$b)      if ($j< 59)',   # b^c for next round
+
+       '&xor   ($b,$c)         if ($j< 59)',   # c^d for next round
+       '&add   ($e,$a);'       .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));'
        );
 }
 $code.=<<___;
@@ -668,8 +869,11 @@ $code.=<<___;
        mov     @T[0],4($ctx)
        mov     @T[0],$B                        # magic seed
        mov     $C,8($ctx)
+       mov     $C,@T[1]
        mov     $D,12($ctx)
+       xor     $D,@T[1]
        mov     $E,16($ctx)
+       and     @T[1],@T[0]
        jmp     .Loop_ssse3
 
 .align 16
@@ -694,31 +898,34 @@ $code.=<<___;
        mov     $E,16($ctx)
 ___
 $code.=<<___ if ($win64);
-       movaps  64+0(%rsp),%xmm6
-       movaps  64+16(%rsp),%xmm7
-       movaps  64+32(%rsp),%xmm8
-       movaps  64+48(%rsp),%xmm9
-       movaps  64+64(%rsp),%xmm10
+       movaps  -40-6*16(%r14),%xmm6
+       movaps  -40-5*16(%r14),%xmm7
+       movaps  -40-4*16(%r14),%xmm8
+       movaps  -40-3*16(%r14),%xmm9
+       movaps  -40-2*16(%r14),%xmm10
+       movaps  -40-1*16(%r14),%xmm11
 ___
 $code.=<<___;
-       lea     `64+($win64?5*16:0)`(%rsp),%rsi
-       mov     0(%rsi),%r12
-       mov     8(%rsi),%rbp
-       mov     16(%rsi),%rbx
-       lea     24(%rsi),%rsp
+       lea     (%r14),%rsi
+       mov     -40(%rsi),%r14
+       mov     -32(%rsi),%r13
+       mov     -24(%rsi),%r12
+       mov     -16(%rsi),%rbp
+       mov     -8(%rsi),%rbx
+       lea     (%rsi),%rsp
 .Lepilogue_ssse3:
        ret
 .size  sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3
 ___
 
 if ($avx) {
-my $Xi=4;
-my @X=map("%xmm$_",(4..7,0..3));
-my @Tx=map("%xmm$_",(8..10));
-my @V=($A,$B,$C,$D,$E)=("%eax","%ebx","%ecx","%edx","%ebp");   # size optimization
-my @T=("%esi","%edi");
-my $j=0;
-my $K_XX_XX="%r11";
+$Xi=4;                         # reset variables
+@X=map("%xmm$_",(4..7,0..3));
+@Tx=map("%xmm$_",(8..10));
+$j=0;
+$rx=0;
+
+my $done_avx_label=".Ldone_avx";
 
 my $_rol=sub { &shld(@_[0],@_) };
 my $_ror=sub { &shrd(@_[0],@_) };
@@ -728,28 +935,34 @@ $code.=<<___;
 .align 16
 sha1_block_data_order_avx:
 _avx_shortcut:
+       mov     %rsp,%rax
        push    %rbx
        push    %rbp
        push    %r12
-       lea     `-64-($win64?5*16:0)`(%rsp),%rsp
+       push    %r13            # redundant, done to share Win64 SE handler
+       push    %r14
+       lea     `-64-($win64?6*16:0)`(%rsp),%rsp
+       vzeroupper
 ___
 $code.=<<___ if ($win64);
-       movaps  %xmm6,64+0(%rsp)
-       movaps  %xmm7,64+16(%rsp)
-       movaps  %xmm8,64+32(%rsp)
-       movaps  %xmm9,64+48(%rsp)
-       movaps  %xmm10,64+64(%rsp)
+       vmovaps %xmm6,-40-6*16(%rax)
+       vmovaps %xmm7,-40-5*16(%rax)
+       vmovaps %xmm8,-40-4*16(%rax)
+       vmovaps %xmm9,-40-3*16(%rax)
+       vmovaps %xmm10,-40-2*16(%rax)
+       vmovaps %xmm11,-40-1*16(%rax)
 .Lprologue_avx:
 ___
 $code.=<<___;
+       mov     %rax,%r14       # original %rsp
+       and     \$-64,%rsp
        mov     %rdi,$ctx       # reassigned argument
        mov     %rsi,$inp       # reassigned argument
        mov     %rdx,$num       # reassigned argument
-       vzeroupper
 
        shl     \$6,$num
        add     $inp,$num
-       lea     K_XX_XX(%rip),$K_XX_XX
+       lea     K_XX_XX+64(%rip),$K_XX_XX
 
        mov     0($ctx),$A              # load context
        mov     4($ctx),$B
@@ -757,9 +970,12 @@ $code.=<<___;
        mov     12($ctx),$D
        mov     $B,@T[0]                # magic seed
        mov     16($ctx),$E
+       mov     $C,@T[1]
+       xor     $D,@T[1]
+       and     @T[1],@T[0]
 
        vmovdqa 64($K_XX_XX),@X[2]      # pbswap mask
-       vmovdqa 0($K_XX_XX),@Tx[1]      # K_00_19
+       vmovdqa -64($K_XX_XX),$Kx       # K_00_19
        vmovdqu 0($inp),@X[-4&7]        # load input to %xmm[0-3]
        vmovdqu 16($inp),@X[-3&7]
        vmovdqu 32($inp),@X[-2&7]
@@ -769,9 +985,9 @@ $code.=<<___;
        vpshufb @X[2],@X[-3&7],@X[-3&7]
        vpshufb @X[2],@X[-2&7],@X[-2&7]
        vpshufb @X[2],@X[-1&7],@X[-1&7]
-       vpaddd  @Tx[1],@X[-4&7],@X[0]   # add K_00_19
-       vpaddd  @Tx[1],@X[-3&7],@X[1]
-       vpaddd  @Tx[1],@X[-2&7],@X[2]
+       vpaddd  $Kx,@X[-4&7],@X[0]      # add K_00_19
+       vpaddd  $Kx,@X[-3&7],@X[1]
+       vpaddd  $Kx,@X[-2&7],@X[2]
        vmovdqa @X[0],0(%rsp)           # X[]+K xfer to IALU
        vmovdqa @X[1],16(%rsp)
        vmovdqa @X[2],32(%rsp)
@@ -790,10 +1006,10 @@ sub Xupdate_avx_16_31()          # recall that $Xi starts wtih 4
         eval(shift(@insns));
         eval(shift(@insns));
 
-         &vpaddd       (@Tx[1],@Tx[1],@X[-1&7]);
+         &vpaddd       (@Tx[1],$Kx,@X[-1&7]);
         eval(shift(@insns));
         eval(shift(@insns));
-       &vpsrldq(@Tx[0],@X[-1&7],4);    # "X[-3]", 3 dwords
+       &vpsrldq(@Tx[0],@X[-1&7],4);            # "X[-3]", 3 dwords
         eval(shift(@insns));
         eval(shift(@insns));
        &vpxor  (@X[0],@X[0],@X[-4&7]);         # "X[0]"^="X[-16]"
@@ -843,7 +1059,7 @@ sub Xupdate_avx_16_31()            # recall that $Xi starts wtih 4
        &vpxor  (@X[0],@X[0],@Tx[2]);           # "X[0]"^=("X[0]">>96)<<<2
         eval(shift(@insns));
         eval(shift(@insns));
-         &vmovdqa      (@Tx[2],eval(16*(($Xi)/5))."($K_XX_XX)");       # K_XX_XX
+         &vmovdqa      ($Kx,eval(2*16*(($Xi)/5)-64)."($K_XX_XX)")      if ($Xi%5==0);  # K_XX_XX
         eval(shift(@insns));
         eval(shift(@insns));
 
@@ -851,13 +1067,12 @@ sub Xupdate_avx_16_31()          # recall that $Xi starts wtih 4
         foreach (@insns) { eval; }     # remaining instructions [if any]
 
   $Xi++;       push(@X,shift(@X));     # "rotate" X[]
-               push(@Tx,shift(@Tx));
 }
 
 sub Xupdate_avx_32_79()
 { use integer;
   my $body = shift;
-  my @insns = (&$body,&$body,&$body,&$body);   # 32 to 48 instructions
+  my @insns = (&$body,&$body,&$body,&$body);   # 32 to 44 instructions
   my ($a,$b,$c,$d,$e);
 
        &vpalignr(@Tx[0],@X[-1&7],@X[-2&7],8);  # compose "X[-6]"
@@ -870,12 +1085,8 @@ sub Xupdate_avx_32_79()
        &vpxor  (@X[0],@X[0],@X[-7&7]);         # "X[0]"^="X[-28]"
         eval(shift(@insns));
         eval(shift(@insns))    if (@insns[0] !~ /&ro[rl]/);
-       if ($Xi%5) {
-         &vmovdqa      (@Tx[2],@Tx[1]);# "perpetuate" K_XX_XX...
-       } else {                        # ... or load next one
-         &vmovdqa      (@Tx[2],eval(16*($Xi/5))."($K_XX_XX)");
-       }
-         &vpaddd       (@Tx[1],@Tx[1],@X[-1&7]);
+         &vpaddd       (@Tx[1],$Kx,@X[-1&7]);
+         &vmovdqa      ($Kx,eval(2*16*($Xi/5)-64)."($K_XX_XX)")        if ($Xi%5==0);
         eval(shift(@insns));           # ror
         eval(shift(@insns));
 
@@ -905,7 +1116,6 @@ sub Xupdate_avx_32_79()
        &vpor   (@X[0],@X[0],@Tx[0]);           # "X[0]"<<<=2
         eval(shift(@insns));           # body_20_39
         eval(shift(@insns));
-         &vmovdqa      (@Tx[1],@X[0])  if ($Xi<19);
         eval(shift(@insns));
         eval(shift(@insns));           # rol
         eval(shift(@insns));
@@ -916,7 +1126,6 @@ sub Xupdate_avx_32_79()
         foreach (@insns) { eval; }     # remaining instructions
 
   $Xi++;       push(@X,shift(@X));     # "rotate" X[]
-               push(@Tx,shift(@Tx));
 }
 
 sub Xuplast_avx_80()
@@ -926,23 +1135,21 @@ sub Xuplast_avx_80()
   my ($a,$b,$c,$d,$e);
 
         eval(shift(@insns));
-         &vpaddd       (@Tx[1],@Tx[1],@X[-1&7]);
+         &vpaddd       (@Tx[1],$Kx,@X[-1&7]);
         eval(shift(@insns));
         eval(shift(@insns));
         eval(shift(@insns));
         eval(shift(@insns));
 
-         &movdqa       (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer IALU
+         &vmovdqa      (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer IALU
 
         foreach (@insns) { eval; }             # remaining instructions
 
        &cmp    ($inp,$num);
-       &je     (".Ldone_avx");
-
-       unshift(@Tx,pop(@Tx));
+       &je     ($done_avx_label);
 
        &vmovdqa(@X[2],"64($K_XX_XX)");         # pbswap mask
-       &vmovdqa(@Tx[1],"0($K_XX_XX)");         # K_00_19
+       &vmovdqa($Kx,"-64($K_XX_XX)");          # K_00_19
        &vmovdqu(@X[-4&7],"0($inp)");           # load input
        &vmovdqu(@X[-3&7],"16($inp)");
        &vmovdqu(@X[-2&7],"32($inp)");
@@ -964,7 +1171,7 @@ sub Xloop_avx()
        &vpshufb(@X[($Xi-3)&7],@X[($Xi-3)&7],@X[2]);
         eval(shift(@insns));
         eval(shift(@insns));
-       &vpaddd (@X[$Xi&7],@X[($Xi-4)&7],@Tx[1]);
+       &vpaddd (@X[$Xi&7],@X[($Xi-4)&7],$Kx);
         eval(shift(@insns));
         eval(shift(@insns));
         eval(shift(@insns));
@@ -1024,12 +1231,15 @@ $code.=<<___;
        mov     @T[0],4($ctx)
        mov     @T[0],$B                        # magic seed
        mov     $C,8($ctx)
+       mov     $C,@T[1]
        mov     $D,12($ctx)
+       xor     $D,@T[1]
        mov     $E,16($ctx)
+       and     @T[1],@T[0]
        jmp     .Loop_avx
 
 .align 16
-.Ldone_avx:
+$done_avx_label:
 ___
                                $j=$saved_j; @V=@saved_V;
 
@@ -1052,31 +1262,520 @@ $code.=<<___;
        mov     $E,16($ctx)
 ___
 $code.=<<___ if ($win64);
-       movaps  64+0(%rsp),%xmm6
-       movaps  64+16(%rsp),%xmm7
-       movaps  64+32(%rsp),%xmm8
-       movaps  64+48(%rsp),%xmm9
-       movaps  64+64(%rsp),%xmm10
+       movaps  -40-6*16(%r14),%xmm6
+       movaps  -40-5*16(%r14),%xmm7
+       movaps  -40-4*16(%r14),%xmm8
+       movaps  -40-3*16(%r14),%xmm9
+       movaps  -40-2*16(%r14),%xmm10
+       movaps  -40-1*16(%r14),%xmm11
 ___
 $code.=<<___;
-       lea     `64+($win64?5*16:0)`(%rsp),%rsi
-       mov     0(%rsi),%r12
-       mov     8(%rsi),%rbp
-       mov     16(%rsi),%rbx
-       lea     24(%rsi),%rsp
+       lea     (%r14),%rsi
+       mov     -40(%rsi),%r14
+       mov     -32(%rsi),%r13
+       mov     -24(%rsi),%r12
+       mov     -16(%rsi),%rbp
+       mov     -8(%rsi),%rbx
+       lea     (%rsi),%rsp
 .Lepilogue_avx:
        ret
 .size  sha1_block_data_order_avx,.-sha1_block_data_order_avx
 ___
+
+if ($avx>1) {
+use integer;
+$Xi=4;                                 # reset variables
+@X=map("%ymm$_",(4..7,0..3));
+@Tx=map("%ymm$_",(8..10));
+$Kx="%ymm11";
+$j=0;
+
+my @ROTX=("%eax","%ebp","%ebx","%ecx","%edx","%esi");
+my ($a5,$t0)=("%r12d","%edi");
+
+my ($A,$F,$B,$C,$D,$E)=@ROTX;
+my $rx=0;
+my $frame="%r13";
+
+$code.=<<___;
+.type  sha1_block_data_order_avx2,\@function,3
+.align 16
+sha1_block_data_order_avx2:
+_avx2_shortcut:
+       mov     %rsp,%rax
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       vzeroupper
+___
+$code.=<<___ if ($win64);
+       lea     -6*16(%rsp),%rsp
+       vmovaps %xmm6,-40-6*16(%rax)
+       vmovaps %xmm7,-40-5*16(%rax)
+       vmovaps %xmm8,-40-4*16(%rax)
+       vmovaps %xmm9,-40-3*16(%rax)
+       vmovaps %xmm10,-40-2*16(%rax)
+       vmovaps %xmm11,-40-1*16(%rax)
+.Lprologue_avx2:
+___
+$code.=<<___;
+       mov     %rax,%r14               # original %rsp
+       mov     %rdi,$ctx               # reassigned argument
+       mov     %rsi,$inp               # reassigned argument
+       mov     %rdx,$num               # reassigned argument
+
+       lea     -640(%rsp),%rsp
+       shl     \$6,$num
+        lea    64($inp),$frame
+       and     \$-128,%rsp
+       add     $inp,$num
+       lea     K_XX_XX+64(%rip),$K_XX_XX
+
+       mov     0($ctx),$A              # load context
+        cmp    $num,$frame
+        cmovae $inp,$frame             # next or same block
+       mov     4($ctx),$F
+       mov     8($ctx),$C
+       mov     12($ctx),$D
+       mov     16($ctx),$E
+       vmovdqu 64($K_XX_XX),@X[2]      # pbswap mask
+
+       vmovdqu         ($inp),%xmm0
+       vmovdqu         16($inp),%xmm1
+       vmovdqu         32($inp),%xmm2
+       vmovdqu         48($inp),%xmm3
+       lea             64($inp),$inp
+       vinserti128     \$1,($frame),@X[-4&7],@X[-4&7]
+       vinserti128     \$1,16($frame),@X[-3&7],@X[-3&7]
+       vpshufb         @X[2],@X[-4&7],@X[-4&7]
+       vinserti128     \$1,32($frame),@X[-2&7],@X[-2&7]
+       vpshufb         @X[2],@X[-3&7],@X[-3&7]
+       vinserti128     \$1,48($frame),@X[-1&7],@X[-1&7]
+       vpshufb         @X[2],@X[-2&7],@X[-2&7]
+       vmovdqu         -64($K_XX_XX),$Kx       # K_00_19
+       vpshufb         @X[2],@X[-1&7],@X[-1&7]
+
+       vpaddd  $Kx,@X[-4&7],@X[0]      # add K_00_19
+       vpaddd  $Kx,@X[-3&7],@X[1]
+       vmovdqu @X[0],0(%rsp)           # X[]+K xfer to IALU
+       vpaddd  $Kx,@X[-2&7],@X[2]
+       vmovdqu @X[1],32(%rsp)
+       vpaddd  $Kx,@X[-1&7],@X[3]
+       vmovdqu @X[2],64(%rsp)
+       vmovdqu @X[3],96(%rsp)
+___
+for (;$Xi<8;$Xi++) {   # Xupdate_avx2_16_31
+    use integer;
+
+       &vpalignr(@X[0],@X[-3&7],@X[-4&7],8);   # compose "X[-14]" in "X[0]"
+       &vpsrldq(@Tx[0],@X[-1&7],4);            # "X[-3]", 3 dwords
+       &vpxor  (@X[0],@X[0],@X[-4&7]);         # "X[0]"^="X[-16]"
+       &vpxor  (@Tx[0],@Tx[0],@X[-2&7]);       # "X[-3]"^"X[-8]"
+       &vpxor  (@X[0],@X[0],@Tx[0]);           # "X[0]"^="X[-3]"^"X[-8]"
+       &vpsrld (@Tx[0],@X[0],31);
+       &vmovdqu($Kx,eval(2*16*(($Xi)/5)-64)."($K_XX_XX)")      if ($Xi%5==0);  # K_XX_XX
+       &vpslldq(@Tx[2],@X[0],12);              # "X[0]"<<96, extract one dword
+       &vpaddd (@X[0],@X[0],@X[0]);
+       &vpsrld (@Tx[1],@Tx[2],30);
+       &vpor   (@X[0],@X[0],@Tx[0]);           # "X[0]"<<<=1
+       &vpslld (@Tx[2],@Tx[2],2);
+       &vpxor  (@X[0],@X[0],@Tx[1]);
+       &vpxor  (@X[0],@X[0],@Tx[2]);           # "X[0]"^=("X[0]">>96)<<<2
+       &vpaddd (@Tx[1],@X[0],$Kx);
+       &vmovdqu("32*$Xi(%rsp)",@Tx[1]);        # X[]+K xfer to IALU
+
+       push(@X,shift(@X));     # "rotate" X[]
+}
+$code.=<<___;
+       lea     128(%rsp),$frame
+       jmp     .Loop_avx2
+.align 32
+.Loop_avx2:
+       rorx    \$2,$F,$B
+       andn    $D,$F,$t0
+       and     $C,$F
+       xor     $t0,$F
+___
+sub bodyx_00_19 () {   # 8 instructions, 3 cycles critical path
+       # at start $f=(b&c)^(~b&d), $b>>>=2
+       return &bodyx_20_39() if ($rx==19); $rx++;
+       (
+       '($a,$f,$b,$c,$d,$e)=@ROTX;'.
+
+       '&add   ($e,((32*($j/4)+4*($j%4))%256-128)."($frame)");'.       # e+=X[i]+K
+        '&lea  ($frame,"256($frame)")  if ($j%32==31);',
+       '&andn  ($t0,$a,$c)',                   # ~b&d for next round
+
+       '&add   ($e,$f)',                       # e+=(b&c)^(~b&d)
+       '&rorx  ($a5,$a,27)',                   # a<<<5
+       '&rorx  ($f,$a,2)',                     # b>>>2 for next round
+       '&and   ($a,$b)',                       # b&c for next round
+
+       '&add   ($e,$a5)',                      # e+=a<<<5
+       '&xor   ($a,$t0);'.                     # f=(b&c)^(~b&d) for next round
+
+       'unshift(@ROTX,pop(@ROTX)); $j++;'
+       )
+}
+
+sub bodyx_20_39 () {   # 7 instructions, 2 cycles critical path
+       # on entry $f=b^c^d, $b>>>=2
+       return &bodyx_40_59() if ($rx==39); $rx++;
+       (
+       '($a,$f,$b,$c,$d,$e)=@ROTX;'.
+
+       '&add   ($e,((32*($j/4)+4*($j%4))%256-128)."($frame)");'.       # e+=X[i]+K
+        '&lea  ($frame,"256($frame)")  if ($j%32==31);',
+
+       '&lea   ($e,"($e,$f)")',                # e+=b^c^d
+       '&rorx  ($a5,$a,27)',                   # a<<<5
+       '&rorx  ($f,$a,2)       if ($j<79)',    # b>>>2 in next round
+       '&xor   ($a,$b)         if ($j<79)',    # b^c for next round
+
+       '&add   ($e,$a5)',                      # e+=a<<<5
+       '&xor   ($a,$c)         if ($j<79);'.   # f=b^c^d for next round
+
+       'unshift(@ROTX,pop(@ROTX)); $j++;'
+       )
+}
+
+sub bodyx_40_59 () {   # 10 instructions, 3 cycles critical path
+       # on entry $f=((b^c)&(c^d)), $b>>>=2
+       $rx++;
+       (
+       '($a,$f,$b,$c,$d,$e)=@ROTX;'.
+
+       '&add   ($e,((32*($j/4)+4*($j%4))%256-128)."($frame)");'.       # e+=X[i]+K
+        '&lea  ($frame,"256($frame)")  if ($j%32==31);',
+       '&xor   ($f,$c)         if ($j>39)',    # (b^c)&(c^d)^c
+       '&mov   ($t0,$b)        if ($j<59)',    # count on zero latency
+       '&xor   ($t0,$c)        if ($j<59)',    # c^d for next round
+
+       '&lea   ($e,"($e,$f)")',                # e+=(b^c)&(c^d)^c
+       '&rorx  ($a5,$a,27)',                   # a<<<5
+       '&rorx  ($f,$a,2)',                     # b>>>2 in next round
+       '&xor   ($a,$b)',                       # b^c for next round
+
+       '&add   ($e,$a5)',                      # e+=a<<<5
+       '&and   ($a,$t0)        if ($j< 59);'.  # f=(b^c)&(c^d) for next round
+       '&xor   ($a,$c)         if ($j==59);'.  # f=b^c^d for next round
+
+       'unshift(@ROTX,pop(@ROTX)); $j++;'
+       )
+}
+
+sub Xupdate_avx2_16_31()               # recall that $Xi starts wtih 4
+{ use integer;
+  my $body = shift;
+  my @insns = (&$body,&$body,&$body,&$body,&$body);    # 35 instructions
+  my ($a,$b,$c,$d,$e);
+
+       &vpalignr(@X[0],@X[-3&7],@X[-4&7],8);   # compose "X[-14]" in "X[0]"
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+
+       &vpsrldq(@Tx[0],@X[-1&7],4);            # "X[-3]", 3 dwords
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+
+       &vpxor  (@X[0],@X[0],@X[-4&7]);         # "X[0]"^="X[-16]"
+       &vpxor  (@Tx[0],@Tx[0],@X[-2&7]);       # "X[-3]"^"X[-8]"
+        eval(shift(@insns));
+        eval(shift(@insns));
+
+       &vpxor  (@X[0],@X[0],@Tx[0]);           # "X[0]"^="X[-3]"^"X[-8]"
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+
+       &vpsrld (@Tx[0],@X[0],31);
+       &vmovdqu($Kx,eval(2*16*(($Xi)/5)-64)."($K_XX_XX)")      if ($Xi%5==0);  # K_XX_XX
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+
+       &vpslldq(@Tx[2],@X[0],12);              # "X[0]"<<96, extract one dword
+       &vpaddd (@X[0],@X[0],@X[0]);
+        eval(shift(@insns));
+        eval(shift(@insns));
+
+       &vpsrld (@Tx[1],@Tx[2],30);
+       &vpor   (@X[0],@X[0],@Tx[0]);           # "X[0]"<<<=1
+        eval(shift(@insns));
+        eval(shift(@insns));
+
+       &vpslld (@Tx[2],@Tx[2],2);
+       &vpxor  (@X[0],@X[0],@Tx[1]);
+        eval(shift(@insns));
+        eval(shift(@insns));
+
+       &vpxor  (@X[0],@X[0],@Tx[2]);           # "X[0]"^=("X[0]">>96)<<<2
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+
+       &vpaddd (@Tx[1],@X[0],$Kx);
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &vmovdqu(eval(32*($Xi))."(%rsp)",@Tx[1]);       # X[]+K xfer to IALU
+
+        foreach (@insns) { eval; }     # remaining instructions [if any]
+
+       $Xi++;
+       push(@X,shift(@X));     # "rotate" X[]
+}
+
+sub Xupdate_avx2_32_79()
+{ use integer;
+  my $body = shift;
+  my @insns = (&$body,&$body,&$body,&$body,&$body);    # 35 to 50 instructions
+  my ($a,$b,$c,$d,$e);
+
+       &vpalignr(@Tx[0],@X[-1&7],@X[-2&7],8);  # compose "X[-6]"
+       &vpxor  (@X[0],@X[0],@X[-4&7]);         # "X[0]"="X[-32]"^"X[-16]"
+        eval(shift(@insns));
+        eval(shift(@insns));
+
+       &vpxor  (@X[0],@X[0],@X[-7&7]);         # "X[0]"^="X[-28]"
+       &vmovdqu($Kx,eval(2*16*($Xi/5)-64)."($K_XX_XX)")        if ($Xi%5==0);
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+
+       &vpxor  (@X[0],@X[0],@Tx[0]);           # "X[0]"^="X[-6]"
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+
+       &vpsrld (@Tx[0],@X[0],30);
+       &vpslld (@X[0],@X[0],2);
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+
+       #&vpslld        (@X[0],@X[0],2);
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+
+       &vpor   (@X[0],@X[0],@Tx[0]);           # "X[0]"<<<=2
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+
+       &vpaddd (@Tx[1],@X[0],$Kx);
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+
+       &vmovdqu("32*$Xi(%rsp)",@Tx[1]);        # X[]+K xfer to IALU
+
+        foreach (@insns) { eval; }     # remaining instructions
+
+       $Xi++;
+       push(@X,shift(@X));     # "rotate" X[]
+}
+
+sub Xloop_avx2()
+{ use integer;
+  my $body = shift;
+  my @insns = (&$body,&$body,&$body,&$body,&$body);    # 32 instructions
+  my ($a,$b,$c,$d,$e);
+
+        foreach (@insns) { eval; }
+}
+
+       &align32();
+       &Xupdate_avx2_32_79(\&bodyx_00_19);
+       &Xupdate_avx2_32_79(\&bodyx_00_19);
+       &Xupdate_avx2_32_79(\&bodyx_00_19);
+       &Xupdate_avx2_32_79(\&bodyx_00_19);
+
+       &Xupdate_avx2_32_79(\&bodyx_20_39);
+       &Xupdate_avx2_32_79(\&bodyx_20_39);
+       &Xupdate_avx2_32_79(\&bodyx_20_39);
+       &Xupdate_avx2_32_79(\&bodyx_20_39);
+
+       &align32();
+       &Xupdate_avx2_32_79(\&bodyx_40_59);
+       &Xupdate_avx2_32_79(\&bodyx_40_59);
+       &Xupdate_avx2_32_79(\&bodyx_40_59);
+       &Xupdate_avx2_32_79(\&bodyx_40_59);
+
+       &Xloop_avx2(\&bodyx_20_39);
+       &Xloop_avx2(\&bodyx_20_39);
+       &Xloop_avx2(\&bodyx_20_39);
+       &Xloop_avx2(\&bodyx_20_39);
+
+$code.=<<___;
+       lea     128($inp),$frame
+       lea     128($inp),%rdi                  # borrow $t0
+       cmp     $num,$frame
+       cmovae  $inp,$frame                     # next or previous block
+
+       # output is d-e-[a]-f-b-c => A=d,F=e,C=f,D=b,E=c
+       add     0($ctx),@ROTX[0]                # update context
+       add     4($ctx),@ROTX[1]
+       add     8($ctx),@ROTX[3]
+       mov     @ROTX[0],0($ctx)
+       add     12($ctx),@ROTX[4]
+       mov     @ROTX[1],4($ctx)
+        mov    @ROTX[0],$A                     # A=d
+       add     16($ctx),@ROTX[5]
+        mov    @ROTX[3],$a5
+       mov     @ROTX[3],8($ctx)
+        mov    @ROTX[4],$D                     # D=b
+        #xchg  @ROTX[5],$F                     # F=c, C=f
+       mov     @ROTX[4],12($ctx)
+        mov    @ROTX[1],$F                     # F=e
+       mov     @ROTX[5],16($ctx)
+       #mov    $F,16($ctx)
+        mov    @ROTX[5],$E                     # E=c
+        mov    $a5,$C                          # C=f
+        #xchg  $F,$E                           # E=c, F=e
+
+       cmp     $num,$inp
+       je      .Ldone_avx2
+___
+
+$Xi=4;                         # reset variables
+@X=map("%ymm$_",(4..7,0..3));
+
+$code.=<<___;
+       vmovdqu 64($K_XX_XX),@X[2]              # pbswap mask
+       cmp     $num,%rdi                       # borrowed $t0
+       ja      .Last_avx2
+
+       vmovdqu         -64(%rdi),%xmm0         # low part of @X[-4&7]
+       vmovdqu         -48(%rdi),%xmm1
+       vmovdqu         -32(%rdi),%xmm2
+       vmovdqu         -16(%rdi),%xmm3
+       vinserti128     \$1,0($frame),@X[-4&7],@X[-4&7]
+       vinserti128     \$1,16($frame),@X[-3&7],@X[-3&7]
+       vinserti128     \$1,32($frame),@X[-2&7],@X[-2&7]
+       vinserti128     \$1,48($frame),@X[-1&7],@X[-1&7]
+       jmp     .Last_avx2
+
+.align 32
+.Last_avx2:
+       lea     128+16(%rsp),$frame
+       rorx    \$2,$F,$B
+       andn    $D,$F,$t0
+       and     $C,$F
+       xor     $t0,$F
+       sub     \$-128,$inp
+___
+       $rx=$j=0;       @ROTX=($A,$F,$B,$C,$D,$E);
+
+       &Xloop_avx2     (\&bodyx_00_19);
+       &Xloop_avx2     (\&bodyx_00_19);
+       &Xloop_avx2     (\&bodyx_00_19);
+       &Xloop_avx2     (\&bodyx_00_19);
+
+       &Xloop_avx2     (\&bodyx_20_39);
+         &vmovdqu      ($Kx,"-64($K_XX_XX)");          # K_00_19
+         &vpshufb      (@X[-4&7],@X[-4&7],@X[2]);      # byte swap
+       &Xloop_avx2     (\&bodyx_20_39);
+         &vpshufb      (@X[-3&7],@X[-3&7],@X[2]);
+         &vpaddd       (@Tx[0],@X[-4&7],$Kx);          # add K_00_19
+       &Xloop_avx2     (\&bodyx_20_39);
+         &vmovdqu      ("0(%rsp)",@Tx[0]);
+         &vpshufb      (@X[-2&7],@X[-2&7],@X[2]);
+         &vpaddd       (@Tx[1],@X[-3&7],$Kx);
+       &Xloop_avx2     (\&bodyx_20_39);
+         &vmovdqu      ("32(%rsp)",@Tx[1]);
+         &vpshufb      (@X[-1&7],@X[-1&7],@X[2]);
+         &vpaddd       (@X[2],@X[-2&7],$Kx);
+
+       &Xloop_avx2     (\&bodyx_40_59);
+       &align32        ();
+         &vmovdqu      ("64(%rsp)",@X[2]);
+         &vpaddd       (@X[3],@X[-1&7],$Kx);
+       &Xloop_avx2     (\&bodyx_40_59);
+         &vmovdqu      ("96(%rsp)",@X[3]);
+       &Xloop_avx2     (\&bodyx_40_59);
+       &Xupdate_avx2_16_31(\&bodyx_40_59);
+
+       &Xupdate_avx2_16_31(\&bodyx_20_39);
+       &Xupdate_avx2_16_31(\&bodyx_20_39);
+       &Xupdate_avx2_16_31(\&bodyx_20_39);
+       &Xloop_avx2     (\&bodyx_20_39);
+
+$code.=<<___;
+       lea     128(%rsp),$frame
+
+       # output is d-e-[a]-f-b-c => A=d,F=e,C=f,D=b,E=c
+       add     0($ctx),@ROTX[0]                # update context
+       add     4($ctx),@ROTX[1]
+       add     8($ctx),@ROTX[3]
+       mov     @ROTX[0],0($ctx)
+       add     12($ctx),@ROTX[4]
+       mov     @ROTX[1],4($ctx)
+        mov    @ROTX[0],$A                     # A=d
+       add     16($ctx),@ROTX[5]
+        mov    @ROTX[3],$a5
+       mov     @ROTX[3],8($ctx)
+        mov    @ROTX[4],$D                     # D=b
+        #xchg  @ROTX[5],$F                     # F=c, C=f
+       mov     @ROTX[4],12($ctx)
+        mov    @ROTX[1],$F                     # F=e
+       mov     @ROTX[5],16($ctx)
+       #mov    $F,16($ctx)
+        mov    @ROTX[5],$E                     # E=c
+        mov    $a5,$C                          # C=f
+        #xchg  $F,$E                           # E=c, F=e
+
+       cmp     $num,$inp
+       jbe     .Loop_avx2
+
+.Ldone_avx2:
+       vzeroupper
+___
+$code.=<<___ if ($win64);
+       movaps  -40-6*16(%r14),%xmm6
+       movaps  -40-5*16(%r14),%xmm7
+       movaps  -40-4*16(%r14),%xmm8
+       movaps  -40-3*16(%r14),%xmm9
+       movaps  -40-2*16(%r14),%xmm10
+       movaps  -40-1*16(%r14),%xmm11
+___
+$code.=<<___;
+       lea     (%r14),%rsi
+       mov     -40(%rsi),%r14
+       mov     -32(%rsi),%r13
+       mov     -24(%rsi),%r12
+       mov     -16(%rsi),%rbp
+       mov     -8(%rsi),%rbx
+       lea     (%rsi),%rsp
+.Lepilogue_avx2:
+       ret
+.size  sha1_block_data_order_avx2,.-sha1_block_data_order_avx2
+___
+}
 }
 $code.=<<___;
 .align 64
 K_XX_XX:
 .long  0x5a827999,0x5a827999,0x5a827999,0x5a827999     # K_00_19
+.long  0x5a827999,0x5a827999,0x5a827999,0x5a827999     # K_00_19
+.long  0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1     # K_20_39
 .long  0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1     # K_20_39
 .long  0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc     # K_40_59
+.long  0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc     # K_40_59
+.long  0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6     # K_60_79
 .long  0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6     # K_60_79
 .long  0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f     # pbswap mask
+.long  0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f     # pbswap mask
+.byte  0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0
 ___
 }}}
 $code.=<<___;
@@ -1122,20 +1821,58 @@ se_handler:
        jae     .Lcommon_seh_tail
 
        mov     `16*4`(%rax),%rax       # pull saved stack pointer
-       lea     32(%rax),%rax
 
        mov     -8(%rax),%rbx
        mov     -16(%rax),%rbp
        mov     -24(%rax),%r12
        mov     -32(%rax),%r13
+       mov     -40(%rax),%r14
        mov     %rbx,144($context)      # restore context->Rbx
        mov     %rbp,160($context)      # restore context->Rbp
        mov     %r12,216($context)      # restore context->R12
        mov     %r13,224($context)      # restore context->R13
+       mov     %r14,232($context)      # restore context->R14
 
        jmp     .Lcommon_seh_tail
 .size  se_handler,.-se_handler
+___
+
+$code.=<<___ if ($shaext);
+.type  shaext_handler,\@abi-omnipotent
+.align 16
+shaext_handler:
+       push    %rsi
+       push    %rdi
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+       pushfq
+       sub     \$64,%rsp
+
+       mov     120($context),%rax      # pull context->Rax
+       mov     248($context),%rbx      # pull context->Rip
+
+       lea     .Lprologue_shaext(%rip),%r10
+       cmp     %r10,%rbx               # context->Rip<.Lprologue
+       jb      .Lcommon_seh_tail
+
+       lea     .Lepilogue_shaext(%rip),%r10
+       cmp     %r10,%rbx               # context->Rip>=.Lepilogue
+       jae     .Lcommon_seh_tail
+
+       lea     -8-4*16(%rax),%rsi
+       lea     512($context),%rdi      # &context.Xmm6
+       mov     \$8,%ecx
+       .long   0xa548f3fc              # cld; rep movsq
 
+       jmp     .Lcommon_seh_tail
+.size  shaext_handler,.-shaext_handler
+___
+
+$code.=<<___;
 .type  ssse3_handler,\@abi-omnipotent
 .align 16
 ssse3_handler:
@@ -1168,18 +1905,23 @@ ssse3_handler:
        cmp     %r10,%rbx               # context->Rip>=epilogue label
        jae     .Lcommon_seh_tail
 
-       lea     64(%rax),%rsi
+       mov     232($context),%rax      # pull context->R14
+
+       lea     -40-6*16(%rax),%rsi
        lea     512($context),%rdi      # &context.Xmm6
-       mov     \$10,%ecx
+       mov     \$12,%ecx
        .long   0xa548f3fc              # cld; rep movsq
-       lea     `24+64+5*16`(%rax),%rax # adjust stack pointer
 
        mov     -8(%rax),%rbx
        mov     -16(%rax),%rbp
        mov     -24(%rax),%r12
+       mov     -32(%rax),%r13
+       mov     -40(%rax),%r14
        mov     %rbx,144($context)      # restore context->Rbx
        mov     %rbp,160($context)      # restore context->Rbp
        mov     %r12,216($context)      # restore cotnext->R12
+       mov     %r13,224($context)      # restore cotnext->R13
+       mov     %r14,232($context)      # restore cotnext->R14
 
 .Lcommon_seh_tail:
        mov     8(%rax),%rdi
@@ -1226,6 +1968,13 @@ ssse3_handler:
        .rva    .LSEH_begin_sha1_block_data_order
        .rva    .LSEH_end_sha1_block_data_order
        .rva    .LSEH_info_sha1_block_data_order
+___
+$code.=<<___ if ($shaext);
+       .rva    .LSEH_begin_sha1_block_data_order_shaext
+       .rva    .LSEH_end_sha1_block_data_order_shaext
+       .rva    .LSEH_info_sha1_block_data_order_shaext
+___
+$code.=<<___;
        .rva    .LSEH_begin_sha1_block_data_order_ssse3
        .rva    .LSEH_end_sha1_block_data_order_ssse3
        .rva    .LSEH_info_sha1_block_data_order_ssse3
@@ -1235,12 +1984,24 @@ $code.=<<___ if ($avx);
        .rva    .LSEH_end_sha1_block_data_order_avx
        .rva    .LSEH_info_sha1_block_data_order_avx
 ___
+$code.=<<___ if ($avx>1);
+       .rva    .LSEH_begin_sha1_block_data_order_avx2
+       .rva    .LSEH_end_sha1_block_data_order_avx2
+       .rva    .LSEH_info_sha1_block_data_order_avx2
+___
 $code.=<<___;
 .section       .xdata
 .align 8
 .LSEH_info_sha1_block_data_order:
        .byte   9,0,0,0
        .rva    se_handler
+___
+$code.=<<___ if ($shaext);
+.LSEH_info_sha1_block_data_order_shaext:
+       .byte   9,0,0,0
+       .rva    shaext_handler
+___
+$code.=<<___;
 .LSEH_info_sha1_block_data_order_ssse3:
        .byte   9,0,0,0
        .rva    ssse3_handler
@@ -1252,10 +2013,55 @@ $code.=<<___ if ($avx);
        .rva    ssse3_handler
        .rva    .Lprologue_avx,.Lepilogue_avx           # HandlerData[]
 ___
+$code.=<<___ if ($avx>1);
+.LSEH_info_sha1_block_data_order_avx2:
+       .byte   9,0,0,0
+       .rva    ssse3_handler
+       .rva    .Lprologue_avx2,.Lepilogue_avx2         # HandlerData[]
+___
 }
 
 ####################################################################
 
-$code =~ s/\`([^\`]*)\`/eval $1/gem;
-print $code;
+sub sha1rnds4 {
+    if (@_[0] =~ /\$([x0-9a-f]+),\s*%xmm([0-7]),\s*%xmm([0-7])/) {
+      my @opcode=(0x0f,0x3a,0xcc);
+       push @opcode,0xc0|($2&7)|(($3&7)<<3);           # ModR/M
+       my $c=$1;
+       push @opcode,$c=~/^0/?oct($c):$c;
+       return ".byte\t".join(',',@opcode);
+    } else {
+       return "sha1rnds4\t".@_[0];
+    }
+}
+
+sub sha1op38 {
+    my $instr = shift;
+    my %opcodelet = (
+               "sha1nexte" => 0xc8,
+               "sha1msg1"  => 0xc9,
+               "sha1msg2"  => 0xca     );
+
+    if (defined($opcodelet{$instr}) && @_[0] =~ /%xmm([0-9]+),\s*%xmm([0-9]+)/) {
+      my @opcode=(0x0f,0x38);
+      my $rex=0;
+       $rex|=0x04                      if ($2>=8);
+       $rex|=0x01                      if ($1>=8);
+       unshift @opcode,0x40|$rex       if ($rex);
+       push @opcode,$opcodelet{$instr};
+       push @opcode,0xc0|($1&7)|(($2&7)<<3);           # ModR/M
+       return ".byte\t".join(',',@opcode);
+    } else {
+       return $instr."\t".@_[0];
+    }
+}
+
+foreach (split("\n",$code)) {
+       s/\`([^\`]*)\`/eval $1/geo;
+
+       s/\b(sha1rnds4)\s+(.*)/sha1rnds4($2)/geo        or
+       s/\b(sha1[^\s]*)\s+(.*)/sha1op38($1,$2)/geo;
+
+       print $_,"\n";
+}
 close STDOUT;
index 928ec53..6462e45 100644 (file)
@@ -1,7 +1,7 @@
 #!/usr/bin/env perl
 #
 # ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
 # details see http://www.openssl.org/~appro/cryptogams/.
@@ -9,20 +9,55 @@
 #
 # SHA256 block transform for x86. September 2007.
 #
-# Performance in clock cycles per processed byte (less is better):
+# Performance improvement over compiler generated code varies from
+# 10% to 40% [see below]. Not very impressive on some Âµ-archs, but
+# it's 5 times smaller and optimizies amount of writes.
 #
-#              Pentium PIII    P4      AMD K8  Core2
-# gcc          46      36      41      27      26
-# icc          57      33      38      25      23      
-# x86 asm      40      30      33      20      18
-# x86_64 asm(*)        -       -       21      16      16
+# May 2012.
 #
-# (*) x86_64 assembler performance is presented for reference
-#     purposes.
+# Optimization including two of Pavel Semjanov's ideas, alternative
+# Maj and full unroll, resulted in ~20-25% improvement on most CPUs,
+# ~7% on Pentium, ~40% on Atom. As fully unrolled loop body is almost
+# 15x larger, 8KB vs. 560B, it's fired only for longer inputs. But not
+# on P4, where it kills performance, nor Sandy Bridge, where folded
+# loop is approximately as fast...
 #
-# Performance improvement over compiler generated code varies from
-# 10% to 40% [see above]. Not very impressive on some Âµ-archs, but
-# it's 5 times smaller and optimizies amount of writes.
+# June 2012.
+#
+# Add AMD XOP-specific code path, >30% improvement on Bulldozer over
+# May version, >60% over original. Add AVX+shrd code path, >25%
+# improvement on Sandy Bridge over May version, 60% over original.
+#
+# May 2013.
+#
+# Replace AMD XOP code path with SSSE3 to cover more processors.
+# (Biggest improvement coefficient is on upcoming Atom Silvermont,
+# not shown.) Add AVX+BMI code path.
+#
+# March 2014.
+#
+# Add support for Intel SHA Extensions.
+#
+# Performance in clock cycles per processed byte (less is better):
+#
+#              gcc     icc     x86 asm(*)      SIMD    x86_64 asm(**)  
+# Pentium      46      57      40/38           -       -
+# PIII         36      33      27/24           -       -
+# P4           41      38      28              -       17.3
+# AMD K8       27      25      19/15.5         -       14.9
+# Core2                26      23      18/15.6         14.3    13.8
+# Westmere     27      -       19/15.7         13.4    12.3
+# Sandy Bridge 25      -       15.9            12.4    11.6
+# Ivy Bridge   24      -       15.0            11.4    10.3
+# Haswell      22      -       13.9            9.46    7.80
+# Bulldozer    36      -       27/22           17.0    13.6
+# VIA Nano     36      -       25/22           16.8    16.5
+# Atom         50      -       30/25           21.9    18.9
+# Silvermont   40      -       34/31           22.9    20.6
+#
+# (*)  numbers after slash are for unrolled loop, where applicable;
+# (**) x86_64 assembly performance is presented for reference
+#      purposes, results are best-available;
 
 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 push(@INC,"${dir}","${dir}../../perlasm");
@@ -30,72 +65,122 @@ require "x86asm.pl";
 
 &asm_init($ARGV[0],"sha512-586.pl",$ARGV[$#ARGV] eq "386");
 
+$xmm=$avx=0;
+for (@ARGV) { $xmm=1 if (/-DOPENSSL_IA32_SSE2/); }
+
+if ($xmm &&    `$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
+                       =~ /GNU assembler version ([2-9]\.[0-9]+)/) {
+       $avx = ($1>=2.19) + ($1>=2.22);
+}
+
+if ($xmm && !$avx && $ARGV[0] eq "win32n" &&
+               `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) {
+       $avx = ($1>=2.03) + ($1>=2.10);
+}
+
+if ($xmm && !$avx && $ARGV[0] eq "win32" &&
+               `ml 2>&1` =~ /Version ([0-9]+)\./) {
+       $avx = ($1>=10) + ($1>=11);
+}
+
+if ($xmm && !$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/) {
+       $avx = ($2>=3.0) + ($2>3.0);
+}
+
+$shaext=$xmm;  ### set to zero if compiling for 1.0.1
+
+$unroll_after = 64*4;  # If pre-evicted from L1P cache first spin of
+                       # fully unrolled loop was measured to run about
+                       # 3-4x slower. If slowdown coefficient is N and
+                       # unrolled loop is m times faster, then you break
+                       # even at (N-1)/(m-1) blocks. Then it needs to be
+                       # adjusted for probability of code being evicted,
+                       # code size/cache size=1/4. Typical m is 1.15...
+
 $A="eax";
 $E="edx";
 $T="ebx";
-$Aoff=&DWP(0,"esp");
-$Boff=&DWP(4,"esp");
-$Coff=&DWP(8,"esp");
-$Doff=&DWP(12,"esp");
-$Eoff=&DWP(16,"esp");
-$Foff=&DWP(20,"esp");
-$Goff=&DWP(24,"esp");
-$Hoff=&DWP(28,"esp");
-$Xoff=&DWP(32,"esp");
+$Aoff=&DWP(4,"esp");
+$Boff=&DWP(8,"esp");
+$Coff=&DWP(12,"esp");
+$Doff=&DWP(16,"esp");
+$Eoff=&DWP(20,"esp");
+$Foff=&DWP(24,"esp");
+$Goff=&DWP(28,"esp");
+$Hoff=&DWP(32,"esp");
+$Xoff=&DWP(36,"esp");
 $K256="ebp";
 
+sub BODY_16_63() {
+       &mov    ($T,"ecx");                     # "ecx" is preloaded
+        &mov   ("esi",&DWP(4*(9+15+16-14),"esp"));
+       &ror    ("ecx",18-7);
+        &mov   ("edi","esi");
+       &ror    ("esi",19-17);
+        &xor   ("ecx",$T);
+        &shr   ($T,3);
+       &ror    ("ecx",7);
+        &xor   ("esi","edi");
+        &xor   ($T,"ecx");                     # T = sigma0(X[-15])
+       &ror    ("esi",17);
+        &add   ($T,&DWP(4*(9+15+16),"esp"));   # T += X[-16]
+       &shr    ("edi",10);
+        &add   ($T,&DWP(4*(9+15+16-9),"esp")); # T += X[-7]
+       #&xor   ("edi","esi")                   # sigma1(X[-2])
+       # &add  ($T,"edi");                     # T += sigma1(X[-2])
+       # &mov  (&DWP(4*(9+15),"esp"),$T);      # save X[0]
+
+       &BODY_00_15(1);
+}
 sub BODY_00_15() {
     my $in_16_63=shift;
 
        &mov    ("ecx",$E);
-        &add   ($T,"edi")                      if ($in_16_63); # T += sigma1(X[-2])
-       &ror    ("ecx",25-11);
+        &xor   ("edi","esi")                   if ($in_16_63); # sigma1(X[-2])
         &mov   ("esi",$Foff);
+       &ror    ("ecx",25-11);
+        &add   ($T,"edi")                      if ($in_16_63); # T += sigma1(X[-2])
+        &mov   ("edi",$Goff);
        &xor    ("ecx",$E);
+        &xor   ("esi","edi");
+        &mov   ($T,&DWP(4*(9+15),"esp"))       if (!$in_16_63);
+        &mov   (&DWP(4*(9+15),"esp"),$T)       if ($in_16_63); # save X[0]
        &ror    ("ecx",11-6);
-        &mov   (&DWP(4*(8+15),"esp"),$T)       if ($in_16_63); # save X[0]
-       &xor    ("ecx",$E);
-       &ror    ("ecx",6);      # Sigma1(e)
-        &mov   ("edi",$Goff);
-       &add    ($T,"ecx");     # T += Sigma1(e)
-
-       &xor    ("esi","edi");
-        &mov   ($Eoff,$E);     # modulo-scheduled
+        &and   ("esi",$E);
+        &mov   ($Eoff,$E);             # modulo-scheduled
+       &xor    ($E,"ecx");
+        &add   ($T,$Hoff);             # T += h
+        &xor   ("esi","edi");          # Ch(e,f,g)
+       &ror    ($E,6);                 # Sigma1(e)
         &mov   ("ecx",$A);
-       &and    ("esi",$E);
-        &mov   ($E,$Doff);     # e becomes d, which is e in next iteration
-       &xor    ("esi","edi");  # Ch(e,f,g)
-        &mov   ("edi",$A);
-       &add    ($T,"esi");     # T += Ch(e,f,g)
+        &add   ($T,"esi");             # T += Ch(e,f,g)
 
        &ror    ("ecx",22-13);
-        &add   ($T,$Hoff);     # T += h
+        &add   ($T,$E);                # T += Sigma1(e)
+        &mov   ("edi",$Boff);
        &xor    ("ecx",$A);
+        &mov   ($Aoff,$A);             # modulo-scheduled
+        &lea   ("esp",&DWP(-4,"esp"));
        &ror    ("ecx",13-2);
-        &mov   ("esi",$Boff);
-       &xor    ("ecx",$A);
-       &ror    ("ecx",2);      # Sigma0(a)
-        &add   ($E,$T);        # d += T
-        &mov   ("edi",$Coff);
-
-       &add    ($T,"ecx");     # T += Sigma0(a)
-        &mov   ($Aoff,$A);     # modulo-scheduled
-
-       &mov    ("ecx",$A);
-        &sub   ("esp",4);
-       &or     ($A,"esi");     # a becomes h, which is a in next iteration
-       &and    ("ecx","esi");
-       &and    ($A,"edi");
         &mov   ("esi",&DWP(0,$K256));
-       &or     ($A,"ecx");     # h=Maj(a,b,c)
+       &xor    ("ecx",$A);
+        &mov   ($E,$Eoff);             # e in next iteration, d in this one
+        &xor   ($A,"edi");             # a ^= b
+       &ror    ("ecx",2);              # Sigma0(a)
 
+        &add   ($T,"esi");             # T+= K[i]
+        &mov   (&DWP(0,"esp"),$A);     # (b^c) in next round
+       &add    ($E,$T);                # d += T
+        &and   ($A,&DWP(4,"esp"));     # a &= (b^c)
+       &add    ($T,"ecx");             # T += Sigma0(a)
+        &xor   ($A,"edi");             # h = Maj(a,b,c) = Ch(a^b,c,b)
+        &mov   ("ecx",&DWP(4*(9+15+16-1),"esp"))       if ($in_16_63); # preload T
        &add    ($K256,4);
-       &add    ($A,$T);        # h += T
-        &mov   ($T,&DWP(4*(8+15+16-1),"esp"))  if ($in_16_63); # preload T
-       &add    ($E,"esi");     # d += K256[i]
-       &add    ($A,"esi");     # h += K256[i]
+        &add   ($A,$T);                # h += T
 }
 
+&external_label("OPENSSL_ia32cap_P")           if (!$i386);
+
 &function_begin("sha256_block_data_order");
        &mov    ("esi",wparam(0));      # ctx
        &mov    ("edi",wparam(1));      # inp
@@ -116,26 +201,59 @@ sub BODY_00_15() {
        &mov    (&DWP(4,"esp"),"edi");  # inp
        &mov    (&DWP(8,"esp"),"eax");  # inp+num*128
        &mov    (&DWP(12,"esp"),"ebx"); # saved sp
+                                               if (!$i386 && $xmm) {
+       &picmeup("edx","OPENSSL_ia32cap_P",$K256,&label("K256"));
+       &mov    ("ecx",&DWP(0,"edx"));
+       &mov    ("ebx",&DWP(4,"edx"));
+       &test   ("ecx",1<<20);          # check for P4
+       &jnz    (&label("loop"));
+       &mov    ("edx",&DWP(8,"edx"))   if ($xmm);
+       &test   ("ecx",1<<24);          # check for FXSR
+       &jz     ($unroll_after?&label("no_xmm"):&label("loop"));
+       &and    ("ecx",1<<30);          # mask "Intel CPU" bit
+       &and    ("ebx",1<<28|1<<9);     # mask AVX and SSSE3 bits
+       &test   ("edx",1<<29)           if ($shaext);   # check for SHA
+       &jnz    (&label("shaext"))      if ($shaext);
+       &or     ("ecx","ebx");
+       &and    ("ecx",1<<28|1<<30);
+       &cmp    ("ecx",1<<28|1<<30);
+                                       if ($xmm) {
+       &je     (&label("AVX"))         if ($avx);
+       &test   ("ebx",1<<9);           # check for SSSE3
+       &jnz    (&label("SSSE3"));
+                                       } else {
+       &je     (&label("loop_shrd"));
+                                       }
+                                               if ($unroll_after) {
+&set_label("no_xmm");
+       &sub    ("eax","edi");
+       &cmp    ("eax",$unroll_after);
+       &jae    (&label("unrolled"));
+                                               } }
+       &jmp    (&label("loop"));
+
+sub COMPACT_LOOP() {
+my $suffix=shift;
 
-&set_label("loop",16);
+&set_label("loop$suffix",$suffix?32:16);
     # copy input block to stack reversing byte and dword order
     for($i=0;$i<4;$i++) {
        &mov    ("eax",&DWP($i*16+0,"edi"));
        &mov    ("ebx",&DWP($i*16+4,"edi"));
        &mov    ("ecx",&DWP($i*16+8,"edi"));
-       &mov    ("edx",&DWP($i*16+12,"edi"));
        &bswap  ("eax");
+       &mov    ("edx",&DWP($i*16+12,"edi"));
        &bswap  ("ebx");
-       &bswap  ("ecx");
-       &bswap  ("edx");
        &push   ("eax");
+       &bswap  ("ecx");
        &push   ("ebx");
+       &bswap  ("edx");
        &push   ("ecx");
        &push   ("edx");
     }
        &add    ("edi",64);
-       &sub    ("esp",4*8);            # place for A,B,C,D,E,F,G,H
-       &mov    (&DWP(4*(8+16)+4,"esp"),"edi");
+       &lea    ("esp",&DWP(-4*9,"esp"));# place for A,B,C,D,E,F,G,H
+       &mov    (&DWP(4*(9+16)+4,"esp"),"edi");
 
        # copy ctx->h[0-7] to A,B,C,D,E,F,G,H on stack
        &mov    ($A,&DWP(0,"esi"));
@@ -144,8 +262,10 @@ sub BODY_00_15() {
        &mov    ("edi",&DWP(12,"esi"));
        # &mov  ($Aoff,$A);
        &mov    ($Boff,"ebx");
+       &xor    ("ebx","ecx");
        &mov    ($Coff,"ecx");
        &mov    ($Doff,"edi");
+       &mov    (&DWP(0,"esp"),"ebx");  # magic
        &mov    ($E,&DWP(16,"esi"));    
        &mov    ("ebx",&DWP(20,"esi"));
        &mov    ("ecx",&DWP(24,"esi"));
@@ -155,59 +275,41 @@ sub BODY_00_15() {
        &mov    ($Goff,"ecx");
        &mov    ($Hoff,"edi");
 
-&set_label("00_15",16);
-       &mov    ($T,&DWP(4*(8+15),"esp"));
+&set_label("00_15$suffix",16);
 
        &BODY_00_15();
 
        &cmp    ("esi",0xc19bf174);
-       &jne    (&label("00_15"));
-
-       &mov    ($T,&DWP(4*(8+15+16-1),"esp")); # preloaded in BODY_00_15(1)
-&set_label("16_63",16);
-       &mov    ("esi",$T);
-        &mov   ("ecx",&DWP(4*(8+15+16-14),"esp"));
-       &ror    ("esi",18-7);
-        &mov   ("edi","ecx");
-       &xor    ("esi",$T);
-       &ror    ("esi",7);
-       &shr    ($T,3);
-
-       &ror    ("edi",19-17);
-        &xor   ($T,"esi");                     # T = sigma0(X[-15])
-       &xor    ("edi","ecx");
-       &ror    ("edi",17);
-       &shr    ("ecx",10);
-        &add   ($T,&DWP(4*(8+15+16),"esp"));   # T += X[-16]
-       &xor    ("edi","ecx");                  # sigma1(X[-2])
-
-        &add   ($T,&DWP(4*(8+15+16-9),"esp")); # T += X[-7]
-       # &add  ($T,"edi");                     # T += sigma1(X[-2])
-       # &mov  (&DWP(4*(8+15),"esp"),$T);      # save X[0]
+       &jne    (&label("00_15$suffix"));
 
-       &BODY_00_15(1);
+       &mov    ("ecx",&DWP(4*(9+15+16-1),"esp"));      # preloaded in BODY_00_15(1)
+       &jmp    (&label("16_63$suffix"));
+
+&set_label("16_63$suffix",16);
+
+       &BODY_16_63();
 
        &cmp    ("esi",0xc67178f2);
-       &jne    (&label("16_63"));
+       &jne    (&label("16_63$suffix"));
 
-       &mov    ("esi",&DWP(4*(8+16+64)+0,"esp"));#ctx
+       &mov    ("esi",&DWP(4*(9+16+64)+0,"esp"));#ctx
        # &mov  ($A,$Aoff);
        &mov    ("ebx",$Boff);
-       &mov    ("ecx",$Coff);
-       &mov    ("edi",$Doff);
+       # &mov  ("edi",$Coff);
+       &mov    ("ecx",$Doff);
        &add    ($A,&DWP(0,"esi"));
        &add    ("ebx",&DWP(4,"esi"));
-       &add    ("ecx",&DWP(8,"esi"));
-       &add    ("edi",&DWP(12,"esi"));
+       &add    ("edi",&DWP(8,"esi"));
+       &add    ("ecx",&DWP(12,"esi"));
        &mov    (&DWP(0,"esi"),$A);
        &mov    (&DWP(4,"esi"),"ebx");
-       &mov    (&DWP(8,"esi"),"ecx");
-       &mov    (&DWP(12,"esi"),"edi");
+       &mov    (&DWP(8,"esi"),"edi");
+       &mov    (&DWP(12,"esi"),"ecx");
        # &mov  ($E,$Eoff);
        &mov    ("eax",$Foff);
        &mov    ("ebx",$Goff);
        &mov    ("ecx",$Hoff);
-       &mov    ("edi",&DWP(4*(8+16+64)+4,"esp"));#inp
+       &mov    ("edi",&DWP(4*(9+16+64)+4,"esp"));#inp
        &add    ($E,&DWP(16,"esi"));
        &add    ("eax",&DWP(20,"esi"));
        &add    ("ebx",&DWP(24,"esi"));
@@ -217,33 +319,963 @@ sub BODY_00_15() {
        &mov    (&DWP(24,"esi"),"ebx");
        &mov    (&DWP(28,"esi"),"ecx");
 
-       &add    ("esp",4*(8+16+64));            # destroy frame
+       &lea    ("esp",&DWP(4*(9+16+64),"esp"));# destroy frame
        &sub    ($K256,4*64);                   # rewind K
 
        &cmp    ("edi",&DWP(8,"esp"));          # are we done yet?
-       &jb     (&label("loop"));
-
+       &jb     (&label("loop$suffix"));
+}
+       &COMPACT_LOOP();
+       &mov    ("esp",&DWP(12,"esp"));         # restore sp
+&function_end_A();
+                                               if (!$i386 && !$xmm) {
+       # ~20% improvement on Sandy Bridge
+       local *ror = sub { &shrd(@_[0],@_) };
+       &COMPACT_LOOP("_shrd");
        &mov    ("esp",&DWP(12,"esp"));         # restore sp
 &function_end_A();
+                                               }
 
 &set_label("K256",64); # Yes! I keep it in the code segment!
-       &data_word(0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5);
-       &data_word(0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5);
-       &data_word(0xd807aa98,0x12835b01,0x243185be,0x550c7dc3);
-       &data_word(0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174);
-       &data_word(0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc);
-       &data_word(0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da);
-       &data_word(0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7);
-       &data_word(0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967);
-       &data_word(0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13);
-       &data_word(0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85);
-       &data_word(0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3);
-       &data_word(0xd192e819,0xd6990624,0xf40e3585,0x106aa070);
-       &data_word(0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5);
-       &data_word(0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3);
-       &data_word(0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208);
-       &data_word(0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2);
-&function_end_B("sha256_block_data_order");
+@K256=(        0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5,
+       0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5,
+       0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,
+       0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174,
+       0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc,
+       0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da,
+       0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7,
+       0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967,
+       0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13,
+       0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85,
+       0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3,
+       0xd192e819,0xd6990624,0xf40e3585,0x106aa070,
+       0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5,
+       0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3,
+       0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208,
+       0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2     );
+&data_word(@K256);
+&data_word(0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f);       # byte swap mask
 &asciz("SHA256 block transform for x86, CRYPTOGAMS by <appro\@openssl.org>");
 
+($a,$b,$c,$d,$e,$f,$g,$h)=(0..7);      # offsets
+sub off { &DWP(4*(((shift)-$i)&7),"esp"); }
+
+if (!$i386 && $unroll_after) {
+my @AH=($A,$K256);
+
+&set_label("unrolled",16);
+       &lea    ("esp",&DWP(-96,"esp"));
+       # copy ctx->h[0-7] to A,B,C,D,E,F,G,H on stack
+       &mov    ($AH[0],&DWP(0,"esi"));
+       &mov    ($AH[1],&DWP(4,"esi"));
+       &mov    ("ecx",&DWP(8,"esi"));
+       &mov    ("ebx",&DWP(12,"esi"));
+       #&mov   (&DWP(0,"esp"),$AH[0]);
+       &mov    (&DWP(4,"esp"),$AH[1]);
+       &xor    ($AH[1],"ecx");         # magic
+       &mov    (&DWP(8,"esp"),"ecx");
+       &mov    (&DWP(12,"esp"),"ebx");
+       &mov    ($E,&DWP(16,"esi"));    
+       &mov    ("ebx",&DWP(20,"esi"));
+       &mov    ("ecx",&DWP(24,"esi"));
+       &mov    ("esi",&DWP(28,"esi"));
+       #&mov   (&DWP(16,"esp"),$E);
+       &mov    (&DWP(20,"esp"),"ebx");
+       &mov    (&DWP(24,"esp"),"ecx");
+       &mov    (&DWP(28,"esp"),"esi");
+       &jmp    (&label("grand_loop"));
+
+&set_label("grand_loop",16);
+    # copy input block to stack reversing byte order
+    for($i=0;$i<5;$i++) {
+       &mov    ("ebx",&DWP(12*$i+0,"edi"));
+       &mov    ("ecx",&DWP(12*$i+4,"edi"));
+       &bswap  ("ebx");
+       &mov    ("esi",&DWP(12*$i+8,"edi"));
+       &bswap  ("ecx");
+       &mov    (&DWP(32+12*$i+0,"esp"),"ebx");
+       &bswap  ("esi");
+       &mov    (&DWP(32+12*$i+4,"esp"),"ecx");
+       &mov    (&DWP(32+12*$i+8,"esp"),"esi");
+    }
+       &mov    ("ebx",&DWP($i*12,"edi"));
+       &add    ("edi",64);
+       &bswap  ("ebx");
+       &mov    (&DWP(96+4,"esp"),"edi");
+       &mov    (&DWP(32+12*$i,"esp"),"ebx");
+
+    my ($t1,$t2) = ("ecx","esi");
+
+    for ($i=0;$i<64;$i++) {
+
+      if ($i>=16) {
+       &mov    ($T,$t1);                       # $t1 is preloaded
+       # &mov  ($t2,&DWP(32+4*(($i+14)&15),"esp"));
+       &ror    ($t1,18-7);
+        &mov   ("edi",$t2);
+       &ror    ($t2,19-17);
+        &xor   ($t1,$T);
+        &shr   ($T,3);
+       &ror    ($t1,7);
+        &xor   ($t2,"edi");
+        &xor   ($T,$t1);                       # T = sigma0(X[-15])
+       &ror    ($t2,17);
+        &add   ($T,&DWP(32+4*($i&15),"esp"));  # T += X[-16]
+       &shr    ("edi",10);
+        &add   ($T,&DWP(32+4*(($i+9)&15),"esp"));      # T += X[-7]
+       #&xor   ("edi",$t2)                     # sigma1(X[-2])
+       # &add  ($T,"edi");                     # T += sigma1(X[-2])
+       # &mov  (&DWP(4*(9+15),"esp"),$T);      # save X[0]
+      }
+       &mov    ($t1,$E);
+        &xor   ("edi",$t2)                     if ($i>=16);    # sigma1(X[-2])
+        &mov   ($t2,&off($f));
+       &ror    ($E,25-11);
+        &add   ($T,"edi")                      if ($i>=16);    # T += sigma1(X[-2])
+        &mov   ("edi",&off($g));
+       &xor    ($E,$t1);
+        &mov   ($T,&DWP(32+4*($i&15),"esp"))   if ($i<16);     # X[i]
+        &mov   (&DWP(32+4*($i&15),"esp"),$T)   if ($i>=16 && $i<62);   # save X[0]
+        &xor   ($t2,"edi");
+       &ror    ($E,11-6);
+        &and   ($t2,$t1);
+        &mov   (&off($e),$t1);         # save $E, modulo-scheduled
+       &xor    ($E,$t1);
+        &add   ($T,&off($h));          # T += h
+        &xor   ("edi",$t2);            # Ch(e,f,g)
+       &ror    ($E,6);                 # Sigma1(e)
+        &mov   ($t1,$AH[0]);
+        &add   ($T,"edi");             # T += Ch(e,f,g)
+
+       &ror    ($t1,22-13);
+        &mov   ($t2,$AH[0]);
+        &mov   ("edi",&off($b));
+       &xor    ($t1,$AH[0]);
+        &mov   (&off($a),$AH[0]);      # save $A, modulo-scheduled
+        &xor   ($AH[0],"edi");         # a ^= b, (b^c) in next round
+       &ror    ($t1,13-2);
+        &and   ($AH[1],$AH[0]);        # (b^c) &= (a^b)
+        &lea   ($E,&DWP(@K256[$i],$T,$E));     # T += Sigma1(1)+K[i]
+       &xor    ($t1,$t2);
+        &xor   ($AH[1],"edi");         # h = Maj(a,b,c) = Ch(a^b,c,b)
+        &mov   ($t2,&DWP(32+4*(($i+2)&15),"esp"))      if ($i>=15 && $i<63);
+       &ror    ($t1,2);                # Sigma0(a)
+
+        &add   ($AH[1],$E);            # h += T
+        &add   ($E,&off($d));          # d += T
+       &add    ($AH[1],$t1);           # h += Sigma0(a)
+        &mov   ($t1,&DWP(32+4*(($i+15)&15),"esp"))     if ($i>=15 && $i<63);
+
+       @AH = reverse(@AH);             # rotate(a,h)
+       ($t1,$t2) = ($t2,$t1);          # rotate(t1,t2)
+    }
+       &mov    ("esi",&DWP(96,"esp")); #ctx
+                                       #&mov   ($AH[0],&DWP(0,"esp"));
+       &xor    ($AH[1],"edi");         #&mov   ($AH[1],&DWP(4,"esp"));
+                                       #&mov   ("edi", &DWP(8,"esp"));
+       &mov    ("ecx",&DWP(12,"esp"));
+       &add    ($AH[0],&DWP(0,"esi"));
+       &add    ($AH[1],&DWP(4,"esi"));
+       &add    ("edi",&DWP(8,"esi"));
+       &add    ("ecx",&DWP(12,"esi"));
+       &mov    (&DWP(0,"esi"),$AH[0]);
+       &mov    (&DWP(4,"esi"),$AH[1]);
+       &mov    (&DWP(8,"esi"),"edi");
+       &mov    (&DWP(12,"esi"),"ecx");
+        #&mov  (&DWP(0,"esp"),$AH[0]);
+        &mov   (&DWP(4,"esp"),$AH[1]);
+        &xor   ($AH[1],"edi");         # magic
+        &mov   (&DWP(8,"esp"),"edi");
+        &mov   (&DWP(12,"esp"),"ecx");
+       #&mov   ($E,&DWP(16,"esp"));
+       &mov    ("edi",&DWP(20,"esp"));
+       &mov    ("ebx",&DWP(24,"esp"));
+       &mov    ("ecx",&DWP(28,"esp"));
+       &add    ($E,&DWP(16,"esi"));
+       &add    ("edi",&DWP(20,"esi"));
+       &add    ("ebx",&DWP(24,"esi"));
+       &add    ("ecx",&DWP(28,"esi"));
+       &mov    (&DWP(16,"esi"),$E);
+       &mov    (&DWP(20,"esi"),"edi");
+       &mov    (&DWP(24,"esi"),"ebx");
+       &mov    (&DWP(28,"esi"),"ecx");
+        #&mov  (&DWP(16,"esp"),$E);
+        &mov   (&DWP(20,"esp"),"edi");
+       &mov    ("edi",&DWP(96+4,"esp"));       # inp
+        &mov   (&DWP(24,"esp"),"ebx");
+        &mov   (&DWP(28,"esp"),"ecx");
+
+       &cmp    ("edi",&DWP(96+8,"esp"));       # are we done yet?
+       &jb     (&label("grand_loop"));
+
+       &mov    ("esp",&DWP(96+12,"esp"));      # restore sp
+&function_end_A();
+}
+                                               if (!$i386 && $xmm) {{{
+if ($shaext) {
+######################################################################
+# Intel SHA Extensions implementation of SHA256 update function.
+#
+my ($ctx,$inp,$end)=("esi","edi","eax");
+my ($Wi,$ABEF,$CDGH,$TMP)=map("xmm$_",(0..2,7));
+my @MSG=map("xmm$_",(3..6));
+
+sub sha256op38 {
+ my ($opcodelet,$dst,$src)=@_;
+    if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/)
+    {  &data_byte(0x0f,0x38,$opcodelet,0xc0|($1<<3)|$2);       }
+}
+sub sha256rnds2        { sha256op38(0xcb,@_); }
+sub sha256msg1 { sha256op38(0xcc,@_); }
+sub sha256msg2 { sha256op38(0xcd,@_); }
+
+&set_label("shaext",32);
+       &sub            ("esp",32);
+
+       &movdqu         ($ABEF,&QWP(0,$ctx));           # DCBA
+       &lea            ($K256,&DWP(0x80,$K256));
+       &movdqu         ($CDGH,&QWP(16,$ctx));          # HGFE
+       &movdqa         ($TMP,&QWP(0x100-0x80,$K256));  # byte swap mask
+
+       &pshufd         ($Wi,$ABEF,0x1b);               # ABCD
+       &pshufd         ($ABEF,$ABEF,0xb1);             # CDAB
+       &pshufd         ($CDGH,$CDGH,0x1b);             # EFGH
+       &palignr        ($ABEF,$CDGH,8);                # ABEF
+       &punpcklqdq     ($CDGH,$Wi);                    # CDGH
+       &jmp            (&label("loop_shaext"));
+
+&set_label("loop_shaext",16);
+       &movdqu         (@MSG[0],&QWP(0,$inp));
+       &movdqu         (@MSG[1],&QWP(0x10,$inp));
+       &movdqu         (@MSG[2],&QWP(0x20,$inp));
+       &pshufb         (@MSG[0],$TMP);
+       &movdqu         (@MSG[3],&QWP(0x30,$inp));
+       &movdqa         (&QWP(16,"esp"),$CDGH);         # offload
+
+       &movdqa         ($Wi,&QWP(0*16-0x80,$K256));
+       &paddd          ($Wi,@MSG[0]);
+       &pshufb         (@MSG[1],$TMP);
+       &sha256rnds2    ($CDGH,$ABEF);                  # 0-3
+       &pshufd         ($Wi,$Wi,0x0e);
+       &nop            ();
+       &movdqa         (&QWP(0,"esp"),$ABEF);          # offload
+       &sha256rnds2    ($ABEF,$CDGH);
+
+       &movdqa         ($Wi,&QWP(1*16-0x80,$K256));
+       &paddd          ($Wi,@MSG[1]);
+       &pshufb         (@MSG[2],$TMP);
+       &sha256rnds2    ($CDGH,$ABEF);                  # 4-7
+       &pshufd         ($Wi,$Wi,0x0e);
+       &lea            ($inp,&DWP(0x40,$inp));
+       &sha256msg1     (@MSG[0],@MSG[1]);
+       &sha256rnds2    ($ABEF,$CDGH);
+
+       &movdqa         ($Wi,&QWP(2*16-0x80,$K256));
+       &paddd          ($Wi,@MSG[2]);
+       &pshufb         (@MSG[3],$TMP);
+       &sha256rnds2    ($CDGH,$ABEF);                  # 8-11
+       &pshufd         ($Wi,$Wi,0x0e);
+       &movdqa         ($TMP,@MSG[3]);
+       &palignr        ($TMP,@MSG[2],4);
+       &nop            ();
+       &paddd          (@MSG[0],$TMP);
+       &sha256msg1     (@MSG[1],@MSG[2]);
+       &sha256rnds2    ($ABEF,$CDGH);
+
+       &movdqa         ($Wi,&QWP(3*16-0x80,$K256));
+       &paddd          ($Wi,@MSG[3]);
+       &sha256msg2     (@MSG[0],@MSG[3]);
+       &sha256rnds2    ($CDGH,$ABEF);                  # 12-15
+       &pshufd         ($Wi,$Wi,0x0e);
+       &movdqa         ($TMP,@MSG[0]);
+       &palignr        ($TMP,@MSG[3],4);
+       &nop            ();
+       &paddd          (@MSG[1],$TMP);
+       &sha256msg1     (@MSG[2],@MSG[3]);
+       &sha256rnds2    ($ABEF,$CDGH);
+
+for($i=4;$i<16-3;$i++) {
+       &movdqa         ($Wi,&QWP($i*16-0x80,$K256));
+       &paddd          ($Wi,@MSG[0]);
+       &sha256msg2     (@MSG[1],@MSG[0]);
+       &sha256rnds2    ($CDGH,$ABEF);                  # 16-19...
+       &pshufd         ($Wi,$Wi,0x0e);
+       &movdqa         ($TMP,@MSG[1]);
+       &palignr        ($TMP,@MSG[0],4);
+       &nop            ();
+       &paddd          (@MSG[2],$TMP);
+       &sha256msg1     (@MSG[3],@MSG[0]);
+       &sha256rnds2    ($ABEF,$CDGH);
+
+       push(@MSG,shift(@MSG));
+}
+       &movdqa         ($Wi,&QWP(13*16-0x80,$K256));
+       &paddd          ($Wi,@MSG[0]);
+       &sha256msg2     (@MSG[1],@MSG[0]);
+       &sha256rnds2    ($CDGH,$ABEF);                  # 52-55
+       &pshufd         ($Wi,$Wi,0x0e);
+       &movdqa         ($TMP,@MSG[1])
+       &palignr        ($TMP,@MSG[0],4);
+       &sha256rnds2    ($ABEF,$CDGH);
+       &paddd          (@MSG[2],$TMP);
+
+       &movdqa         ($Wi,&QWP(14*16-0x80,$K256));
+       &paddd          ($Wi,@MSG[1]);
+       &sha256rnds2    ($CDGH,$ABEF);                  # 56-59
+       &pshufd         ($Wi,$Wi,0x0e);
+       &sha256msg2     (@MSG[2],@MSG[1]);
+       &movdqa         ($TMP,&QWP(0x100-0x80,$K256));  # byte swap mask
+       &sha256rnds2    ($ABEF,$CDGH);
+
+       &movdqa         ($Wi,&QWP(15*16-0x80,$K256));
+       &paddd          ($Wi,@MSG[2]);
+       &nop            ();
+       &sha256rnds2    ($CDGH,$ABEF);                  # 60-63
+       &pshufd         ($Wi,$Wi,0x0e);
+       &cmp            ($end,$inp);
+       &nop            ();
+       &sha256rnds2    ($ABEF,$CDGH);
+
+       &paddd          ($CDGH,&QWP(16,"esp"));
+       &paddd          ($ABEF,&QWP(0,"esp"));
+       &jnz            (&label("loop_shaext"));
+
+       &pshufd         ($CDGH,$CDGH,0xb1);             # DCHG
+       &pshufd         ($TMP,$ABEF,0x1b);              # FEBA
+       &pshufd         ($ABEF,$ABEF,0xb1);             # BAFE
+       &punpckhqdq     ($ABEF,$CDGH);                  # DCBA
+       &palignr        ($CDGH,$TMP,8);                 # HGFE
+
+       &mov            ("esp",&DWP(32+12,"esp"));
+       &movdqu         (&QWP(0,$ctx),$ABEF);
+       &movdqu         (&QWP(16,$ctx),$CDGH);
+&function_end_A();
+}
+
+my @X = map("xmm$_",(0..3));
+my ($t0,$t1,$t2,$t3) = map("xmm$_",(4..7));
+my @AH = ($A,$T);
+
+&set_label("SSSE3",32);
+       &lea    ("esp",&DWP(-96,"esp"));
+       # copy ctx->h[0-7] to A,B,C,D,E,F,G,H on stack
+       &mov    ($AH[0],&DWP(0,"esi"));
+       &mov    ($AH[1],&DWP(4,"esi"));
+       &mov    ("ecx",&DWP(8,"esi"));
+       &mov    ("edi",&DWP(12,"esi"));
+       #&mov   (&DWP(0,"esp"),$AH[0]);
+       &mov    (&DWP(4,"esp"),$AH[1]);
+       &xor    ($AH[1],"ecx");                 # magic
+       &mov    (&DWP(8,"esp"),"ecx");
+       &mov    (&DWP(12,"esp"),"edi");
+       &mov    ($E,&DWP(16,"esi"));
+       &mov    ("edi",&DWP(20,"esi"));
+       &mov    ("ecx",&DWP(24,"esi"));
+       &mov    ("esi",&DWP(28,"esi"));
+       #&mov   (&DWP(16,"esp"),$E);
+       &mov    (&DWP(20,"esp"),"edi");
+       &mov    ("edi",&DWP(96+4,"esp"));       # inp
+       &mov    (&DWP(24,"esp"),"ecx");
+       &mov    (&DWP(28,"esp"),"esi");
+       &movdqa ($t3,&QWP(256,$K256));
+       &jmp    (&label("grand_ssse3"));
+
+&set_label("grand_ssse3",16);
+       # load input, reverse byte order, add K256[0..15], save to stack
+       &movdqu (@X[0],&QWP(0,"edi"));
+       &movdqu (@X[1],&QWP(16,"edi"));
+       &movdqu (@X[2],&QWP(32,"edi"));
+       &movdqu (@X[3],&QWP(48,"edi"));
+       &add    ("edi",64);
+       &pshufb (@X[0],$t3);
+       &mov    (&DWP(96+4,"esp"),"edi");
+       &pshufb (@X[1],$t3);
+       &movdqa ($t0,&QWP(0,$K256));
+       &pshufb (@X[2],$t3);
+       &movdqa ($t1,&QWP(16,$K256));
+       &paddd  ($t0,@X[0]);
+       &pshufb (@X[3],$t3);
+       &movdqa ($t2,&QWP(32,$K256));
+       &paddd  ($t1,@X[1]);
+       &movdqa ($t3,&QWP(48,$K256));
+       &movdqa (&QWP(32+0,"esp"),$t0);
+       &paddd  ($t2,@X[2]);
+       &movdqa (&QWP(32+16,"esp"),$t1);
+       &paddd  ($t3,@X[3]);
+       &movdqa (&QWP(32+32,"esp"),$t2);
+       &movdqa (&QWP(32+48,"esp"),$t3);
+       &jmp    (&label("ssse3_00_47"));
+
+&set_label("ssse3_00_47",16);
+       &add            ($K256,64);
+
+sub SSSE3_00_47 () {
+my $j = shift;
+my $body = shift;
+my @X = @_;
+my @insns = (&$body,&$body,&$body,&$body);     # 120 instructions
+
+         eval(shift(@insns));
+       &movdqa         ($t0,@X[1]);
+         eval(shift(@insns));                  # @
+         eval(shift(@insns));
+       &movdqa         ($t3,@X[3]);
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &palignr        ($t0,@X[0],4);          # X[1..4]
+         eval(shift(@insns));
+         eval(shift(@insns));                  # @
+         eval(shift(@insns));
+        &palignr       ($t3,@X[2],4);          # X[9..12]
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &movdqa         ($t1,$t0);
+         eval(shift(@insns));                  # @
+         eval(shift(@insns));
+       &movdqa         ($t2,$t0);
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &psrld          ($t0,3);
+         eval(shift(@insns));
+         eval(shift(@insns));                  # @
+        &paddd         (@X[0],$t3);            # X[0..3] += X[9..12]
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &psrld          ($t2,7);
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));                  # @
+         eval(shift(@insns));
+        &pshufd        ($t3,@X[3],0b11111010); # X[14..15]
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &pslld          ($t1,32-18);
+         eval(shift(@insns));
+         eval(shift(@insns));                  # @
+       &pxor           ($t0,$t2);
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &psrld          ($t2,18-7);
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));                  # @
+       &pxor           ($t0,$t1);
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &pslld          ($t1,18-7);
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));                  # @
+       &pxor           ($t0,$t2);
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &movdqa        ($t2,$t3);
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));                  # @
+       &pxor           ($t0,$t1);              # sigma0(X[1..4])
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &psrld         ($t3,10);
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));                  # @
+       &paddd          (@X[0],$t0);            # X[0..3] += sigma0(X[1..4])
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &psrlq         ($t2,17);
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));                  # @
+        &pxor          ($t3,$t2);
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &psrlq         ($t2,19-17);
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));                  # @
+        &pxor          ($t3,$t2);
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &pshufd        ($t3,$t3,0b10000000);
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));                  # @
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));                  # @
+         eval(shift(@insns));
+        &psrldq        ($t3,8);
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &paddd          (@X[0],$t3);            # X[0..1] += sigma1(X[14..15])
+         eval(shift(@insns));                  # @
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));                  # @
+         eval(shift(@insns));
+        &pshufd        ($t3,@X[0],0b01010000); # X[16..17]
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &movdqa        ($t2,$t3);
+         eval(shift(@insns));                  # @
+        &psrld         ($t3,10);
+         eval(shift(@insns));
+        &psrlq         ($t2,17);
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));                  # @
+        &pxor          ($t3,$t2);
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &psrlq         ($t2,19-17);
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));                  # @
+        &pxor          ($t3,$t2);
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &pshufd        ($t3,$t3,0b00001000);
+         eval(shift(@insns));
+         eval(shift(@insns));                  # @
+       &movdqa         ($t2,&QWP(16*$j,$K256));
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &pslldq        ($t3,8);
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));                  # @
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));                  # @
+       &paddd          (@X[0],$t3);            # X[2..3] += sigma1(X[16..17])
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &paddd          ($t2,@X[0]);
+         eval(shift(@insns));                  # @
+
+       foreach (@insns) { eval; }              # remaining instructions
+
+       &movdqa         (&QWP(32+16*$j,"esp"),$t2);
+}
+
+sub body_00_15 () {
+       (
+       '&mov   ("ecx",$E);',
+       '&ror   ($E,25-11);',
+        '&mov  ("esi",&off($f));',
+       '&xor   ($E,"ecx");',
+        '&mov  ("edi",&off($g));',
+        '&xor  ("esi","edi");',
+       '&ror   ($E,11-6);',
+        '&and  ("esi","ecx");',
+        '&mov  (&off($e),"ecx");',     # save $E, modulo-scheduled
+       '&xor   ($E,"ecx");',
+        '&xor  ("edi","esi");',        # Ch(e,f,g)
+       '&ror   ($E,6);',               # T = Sigma1(e)
+        '&mov  ("ecx",$AH[0]);',
+        '&add  ($E,"edi");',           # T += Ch(e,f,g)
+        '&mov  ("edi",&off($b));',
+       '&mov   ("esi",$AH[0]);',
+
+       '&ror   ("ecx",22-13);',
+        '&mov  (&off($a),$AH[0]);',    # save $A, modulo-scheduled
+       '&xor   ("ecx",$AH[0]);',
+        '&xor  ($AH[0],"edi");',       # a ^= b, (b^c) in next round
+        '&add  ($E,&off($h));',        # T += h
+       '&ror   ("ecx",13-2);',
+        '&and  ($AH[1],$AH[0]);',      # (b^c) &= (a^b)
+       '&xor   ("ecx","esi");',
+        '&add  ($E,&DWP(32+4*($i&15),"esp"));',        # T += K[i]+X[i]
+        '&xor  ($AH[1],"edi");',       # h = Maj(a,b,c) = Ch(a^b,c,b)
+       '&ror   ("ecx",2);',            # Sigma0(a)
+
+        '&add  ($AH[1],$E);',          # h += T
+        '&add  ($E,&off($d));',        # d += T
+       '&add   ($AH[1],"ecx");'.       # h += Sigma0(a)
+
+       '@AH = reverse(@AH); $i++;'     # rotate(a,h)
+       );
+}
+
+    for ($i=0,$j=0; $j<4; $j++) {
+       &SSSE3_00_47($j,\&body_00_15,@X);
+       push(@X,shift(@X));             # rotate(@X)
+    }
+       &cmp    (&DWP(16*$j,$K256),0x00010203);
+       &jne    (&label("ssse3_00_47"));
+
+    for ($i=0; $i<16; ) {
+       foreach(body_00_15()) { eval; }
+    }
+
+       &mov    ("esi",&DWP(96,"esp")); #ctx
+                                       #&mov   ($AH[0],&DWP(0,"esp"));
+       &xor    ($AH[1],"edi");         #&mov   ($AH[1],&DWP(4,"esp"));
+                                       #&mov   ("edi", &DWP(8,"esp"));
+       &mov    ("ecx",&DWP(12,"esp"));
+       &add    ($AH[0],&DWP(0,"esi"));
+       &add    ($AH[1],&DWP(4,"esi"));
+       &add    ("edi",&DWP(8,"esi"));
+       &add    ("ecx",&DWP(12,"esi"));
+       &mov    (&DWP(0,"esi"),$AH[0]);
+       &mov    (&DWP(4,"esi"),$AH[1]);
+       &mov    (&DWP(8,"esi"),"edi");
+       &mov    (&DWP(12,"esi"),"ecx");
+        #&mov  (&DWP(0,"esp"),$AH[0]);
+        &mov   (&DWP(4,"esp"),$AH[1]);
+        &xor   ($AH[1],"edi");                 # magic
+        &mov   (&DWP(8,"esp"),"edi");
+        &mov   (&DWP(12,"esp"),"ecx");
+       #&mov   ($E,&DWP(16,"esp"));
+       &mov    ("edi",&DWP(20,"esp"));
+       &mov    ("ecx",&DWP(24,"esp"));
+       &add    ($E,&DWP(16,"esi"));
+       &add    ("edi",&DWP(20,"esi"));
+       &add    ("ecx",&DWP(24,"esi"));
+       &mov    (&DWP(16,"esi"),$E);
+       &mov    (&DWP(20,"esi"),"edi");
+        &mov   (&DWP(20,"esp"),"edi");
+       &mov    ("edi",&DWP(28,"esp"));
+       &mov    (&DWP(24,"esi"),"ecx");
+        #&mov  (&DWP(16,"esp"),$E);
+       &add    ("edi",&DWP(28,"esi"));
+        &mov   (&DWP(24,"esp"),"ecx");
+       &mov    (&DWP(28,"esi"),"edi");
+        &mov   (&DWP(28,"esp"),"edi");
+       &mov    ("edi",&DWP(96+4,"esp"));       # inp
+
+       &movdqa ($t3,&QWP(64,$K256));
+       &sub    ($K256,3*64);                   # rewind K
+       &cmp    ("edi",&DWP(96+8,"esp"));       # are we done yet?
+       &jb     (&label("grand_ssse3"));
+
+       &mov    ("esp",&DWP(96+12,"esp"));      # restore sp
+&function_end_A();
+                                               if ($avx) {
+&set_label("AVX",32);
+                                               if ($avx>1) {
+       &and    ("edx",1<<8|1<<3);              # check for BMI2+BMI1
+       &cmp    ("edx",1<<8|1<<3);
+       &je     (&label("AVX_BMI"));
+                                               }
+       &lea    ("esp",&DWP(-96,"esp"));
+       &vzeroall       ();
+       # copy ctx->h[0-7] to A,B,C,D,E,F,G,H on stack
+       &mov    ($AH[0],&DWP(0,"esi"));
+       &mov    ($AH[1],&DWP(4,"esi"));
+       &mov    ("ecx",&DWP(8,"esi"));
+       &mov    ("edi",&DWP(12,"esi"));
+       #&mov   (&DWP(0,"esp"),$AH[0]);
+       &mov    (&DWP(4,"esp"),$AH[1]);
+       &xor    ($AH[1],"ecx");                 # magic
+       &mov    (&DWP(8,"esp"),"ecx");
+       &mov    (&DWP(12,"esp"),"edi");
+       &mov    ($E,&DWP(16,"esi"));
+       &mov    ("edi",&DWP(20,"esi"));
+       &mov    ("ecx",&DWP(24,"esi"));
+       &mov    ("esi",&DWP(28,"esi"));
+       #&mov   (&DWP(16,"esp"),$E);
+       &mov    (&DWP(20,"esp"),"edi");
+       &mov    ("edi",&DWP(96+4,"esp"));       # inp
+       &mov    (&DWP(24,"esp"),"ecx");
+       &mov    (&DWP(28,"esp"),"esi");
+       &vmovdqa        ($t3,&QWP(256,$K256));
+       &jmp    (&label("grand_avx"));
+
+&set_label("grand_avx",32);
+       # load input, reverse byte order, add K256[0..15], save to stack
+       &vmovdqu        (@X[0],&QWP(0,"edi"));
+       &vmovdqu        (@X[1],&QWP(16,"edi"));
+       &vmovdqu        (@X[2],&QWP(32,"edi"));
+       &vmovdqu        (@X[3],&QWP(48,"edi"));
+       &add            ("edi",64);
+       &vpshufb        (@X[0],@X[0],$t3);
+       &mov            (&DWP(96+4,"esp"),"edi");
+       &vpshufb        (@X[1],@X[1],$t3);
+       &vpshufb        (@X[2],@X[2],$t3);
+       &vpaddd         ($t0,@X[0],&QWP(0,$K256));
+       &vpshufb        (@X[3],@X[3],$t3);
+       &vpaddd         ($t1,@X[1],&QWP(16,$K256));
+       &vpaddd         ($t2,@X[2],&QWP(32,$K256));
+       &vpaddd         ($t3,@X[3],&QWP(48,$K256));
+       &vmovdqa        (&QWP(32+0,"esp"),$t0);
+       &vmovdqa        (&QWP(32+16,"esp"),$t1);
+       &vmovdqa        (&QWP(32+32,"esp"),$t2);
+       &vmovdqa        (&QWP(32+48,"esp"),$t3);
+       &jmp            (&label("avx_00_47"));
+
+&set_label("avx_00_47",16);
+       &add            ($K256,64);
+
+sub Xupdate_AVX () {
+       (
+       '&vpalignr      ($t0,@X[1],@X[0],4);',  # X[1..4]
+        '&vpalignr     ($t3,@X[3],@X[2],4);',  # X[9..12]
+       '&vpsrld        ($t2,$t0,7);',
+        '&vpaddd       (@X[0],@X[0],$t3);',    # X[0..3] += X[9..16]
+       '&vpsrld        ($t3,$t0,3);',
+       '&vpslld        ($t1,$t0,14);',
+       '&vpxor         ($t0,$t3,$t2);',
+        '&vpshufd      ($t3,@X[3],0b11111010)',# X[14..15]
+       '&vpsrld        ($t2,$t2,18-7);',
+       '&vpxor         ($t0,$t0,$t1);',
+       '&vpslld        ($t1,$t1,25-14);',
+       '&vpxor         ($t0,$t0,$t2);',
+        '&vpsrld       ($t2,$t3,10);',
+       '&vpxor         ($t0,$t0,$t1);',        # sigma0(X[1..4])
+        '&vpsrlq       ($t1,$t3,17);',
+       '&vpaddd        (@X[0],@X[0],$t0);',    # X[0..3] += sigma0(X[1..4])
+        '&vpxor        ($t2,$t2,$t1);',
+        '&vpsrlq       ($t3,$t3,19);',
+        '&vpxor        ($t2,$t2,$t3);',        # sigma1(X[14..15]
+        '&vpshufd      ($t3,$t2,0b10000100);',
+       '&vpsrldq       ($t3,$t3,8);',
+       '&vpaddd        (@X[0],@X[0],$t3);',    # X[0..1] += sigma1(X[14..15])
+        '&vpshufd      ($t3,@X[0],0b01010000)',# X[16..17]
+        '&vpsrld       ($t2,$t3,10);',
+        '&vpsrlq       ($t1,$t3,17);',
+        '&vpxor        ($t2,$t2,$t1);',
+        '&vpsrlq       ($t3,$t3,19);',
+        '&vpxor        ($t2,$t2,$t3);',        # sigma1(X[16..17]
+        '&vpshufd      ($t3,$t2,0b11101000);',
+       '&vpslldq       ($t3,$t3,8);',
+       '&vpaddd        (@X[0],@X[0],$t3);'     # X[2..3] += sigma1(X[16..17])
+       );
+}
+
+local *ror = sub { &shrd(@_[0],@_) };
+sub AVX_00_47 () {
+my $j = shift;
+my $body = shift;
+my @X = @_;
+my @insns = (&$body,&$body,&$body,&$body);     # 120 instructions
+my $insn;
+
+       foreach (Xupdate_AVX()) {               # 31 instructions
+           eval;
+           eval(shift(@insns));
+           eval(shift(@insns));
+           eval($insn = shift(@insns));
+           eval(shift(@insns)) if ($insn =~ /rorx/ && @insns[0] =~ /rorx/);
+       }
+       &vpaddd         ($t2,@X[0],&QWP(16*$j,$K256));
+       foreach (@insns) { eval; }              # remaining instructions
+       &vmovdqa        (&QWP(32+16*$j,"esp"),$t2);
+}
+
+    for ($i=0,$j=0; $j<4; $j++) {
+       &AVX_00_47($j,\&body_00_15,@X);
+       push(@X,shift(@X));             # rotate(@X)
+    }
+       &cmp    (&DWP(16*$j,$K256),0x00010203);
+       &jne    (&label("avx_00_47"));
+
+    for ($i=0; $i<16; ) {
+       foreach(body_00_15()) { eval; }
+    }
+
+       &mov    ("esi",&DWP(96,"esp")); #ctx
+                                       #&mov   ($AH[0],&DWP(0,"esp"));
+       &xor    ($AH[1],"edi");         #&mov   ($AH[1],&DWP(4,"esp"));
+                                       #&mov   ("edi", &DWP(8,"esp"));
+       &mov    ("ecx",&DWP(12,"esp"));
+       &add    ($AH[0],&DWP(0,"esi"));
+       &add    ($AH[1],&DWP(4,"esi"));
+       &add    ("edi",&DWP(8,"esi"));
+       &add    ("ecx",&DWP(12,"esi"));
+       &mov    (&DWP(0,"esi"),$AH[0]);
+       &mov    (&DWP(4,"esi"),$AH[1]);
+       &mov    (&DWP(8,"esi"),"edi");
+       &mov    (&DWP(12,"esi"),"ecx");
+        #&mov  (&DWP(0,"esp"),$AH[0]);
+        &mov   (&DWP(4,"esp"),$AH[1]);
+        &xor   ($AH[1],"edi");                 # magic
+        &mov   (&DWP(8,"esp"),"edi");
+        &mov   (&DWP(12,"esp"),"ecx");
+       #&mov   ($E,&DWP(16,"esp"));
+       &mov    ("edi",&DWP(20,"esp"));
+       &mov    ("ecx",&DWP(24,"esp"));
+       &add    ($E,&DWP(16,"esi"));
+       &add    ("edi",&DWP(20,"esi"));
+       &add    ("ecx",&DWP(24,"esi"));
+       &mov    (&DWP(16,"esi"),$E);
+       &mov    (&DWP(20,"esi"),"edi");
+        &mov   (&DWP(20,"esp"),"edi");
+       &mov    ("edi",&DWP(28,"esp"));
+       &mov    (&DWP(24,"esi"),"ecx");
+        #&mov  (&DWP(16,"esp"),$E);
+       &add    ("edi",&DWP(28,"esi"));
+        &mov   (&DWP(24,"esp"),"ecx");
+       &mov    (&DWP(28,"esi"),"edi");
+        &mov   (&DWP(28,"esp"),"edi");
+       &mov    ("edi",&DWP(96+4,"esp"));       # inp
+
+       &vmovdqa        ($t3,&QWP(64,$K256));
+       &sub    ($K256,3*64);                   # rewind K
+       &cmp    ("edi",&DWP(96+8,"esp"));       # are we done yet?
+       &jb     (&label("grand_avx"));
+
+       &mov    ("esp",&DWP(96+12,"esp"));      # restore sp
+       &vzeroall       ();
+&function_end_A();
+                                               if ($avx>1) {
+sub bodyx_00_15 () {                   # +10%
+       (
+       '&rorx  ("ecx",$E,6)',
+       '&rorx  ("esi",$E,11)',
+        '&mov  (&off($e),$E)',         # save $E, modulo-scheduled
+       '&rorx  ("edi",$E,25)',
+       '&xor   ("ecx","esi")',
+        '&andn ("esi",$E,&off($g))',
+       '&xor   ("ecx","edi")',         # Sigma1(e)
+        '&and  ($E,&off($f))',
+        '&mov  (&off($a),$AH[0]);',    # save $A, modulo-scheduled
+        '&or   ($E,"esi")',            # T = Ch(e,f,g)
+
+       '&rorx  ("edi",$AH[0],2)',
+       '&rorx  ("esi",$AH[0],13)',
+        '&lea  ($E,&DWP(0,$E,"ecx"))', # T += Sigma1(e)
+       '&rorx  ("ecx",$AH[0],22)',
+       '&xor   ("esi","edi")',
+        '&mov  ("edi",&off($b))',
+       '&xor   ("ecx","esi")',         # Sigma0(a)
+
+        '&xor  ($AH[0],"edi")',        # a ^= b, (b^c) in next round
+        '&add  ($E,&off($h))',         # T += h
+        '&and  ($AH[1],$AH[0])',       # (b^c) &= (a^b)
+        '&add  ($E,&DWP(32+4*($i&15),"esp"))', # T += K[i]+X[i]
+        '&xor  ($AH[1],"edi")',        # h = Maj(a,b,c) = Ch(a^b,c,b)
+
+        '&add  ("ecx",$E)',            # h += T
+        '&add  ($E,&off($d))',         # d += T
+       '&lea   ($AH[1],&DWP(0,$AH[1],"ecx"));'.        # h += Sigma0(a)
+
+       '@AH = reverse(@AH); $i++;'     # rotate(a,h)
+       );
+}
+
+&set_label("AVX_BMI",32);
+       &lea    ("esp",&DWP(-96,"esp"));
+       &vzeroall       ();
+       # copy ctx->h[0-7] to A,B,C,D,E,F,G,H on stack
+       &mov    ($AH[0],&DWP(0,"esi"));
+       &mov    ($AH[1],&DWP(4,"esi"));
+       &mov    ("ecx",&DWP(8,"esi"));
+       &mov    ("edi",&DWP(12,"esi"));
+       #&mov   (&DWP(0,"esp"),$AH[0]);
+       &mov    (&DWP(4,"esp"),$AH[1]);
+       &xor    ($AH[1],"ecx");                 # magic
+       &mov    (&DWP(8,"esp"),"ecx");
+       &mov    (&DWP(12,"esp"),"edi");
+       &mov    ($E,&DWP(16,"esi"));
+       &mov    ("edi",&DWP(20,"esi"));
+       &mov    ("ecx",&DWP(24,"esi"));
+       &mov    ("esi",&DWP(28,"esi"));
+       #&mov   (&DWP(16,"esp"),$E);
+       &mov    (&DWP(20,"esp"),"edi");
+       &mov    ("edi",&DWP(96+4,"esp"));       # inp
+       &mov    (&DWP(24,"esp"),"ecx");
+       &mov    (&DWP(28,"esp"),"esi");
+       &vmovdqa        ($t3,&QWP(256,$K256));
+       &jmp    (&label("grand_avx_bmi"));
+
+&set_label("grand_avx_bmi",32);
+       # load input, reverse byte order, add K256[0..15], save to stack
+       &vmovdqu        (@X[0],&QWP(0,"edi"));
+       &vmovdqu        (@X[1],&QWP(16,"edi"));
+       &vmovdqu        (@X[2],&QWP(32,"edi"));
+       &vmovdqu        (@X[3],&QWP(48,"edi"));
+       &add            ("edi",64);
+       &vpshufb        (@X[0],@X[0],$t3);
+       &mov            (&DWP(96+4,"esp"),"edi");
+       &vpshufb        (@X[1],@X[1],$t3);
+       &vpshufb        (@X[2],@X[2],$t3);
+       &vpaddd         ($t0,@X[0],&QWP(0,$K256));
+       &vpshufb        (@X[3],@X[3],$t3);
+       &vpaddd         ($t1,@X[1],&QWP(16,$K256));
+       &vpaddd         ($t2,@X[2],&QWP(32,$K256));
+       &vpaddd         ($t3,@X[3],&QWP(48,$K256));
+       &vmovdqa        (&QWP(32+0,"esp"),$t0);
+       &vmovdqa        (&QWP(32+16,"esp"),$t1);
+       &vmovdqa        (&QWP(32+32,"esp"),$t2);
+       &vmovdqa        (&QWP(32+48,"esp"),$t3);
+       &jmp            (&label("avx_bmi_00_47"));
+
+&set_label("avx_bmi_00_47",16);
+       &add            ($K256,64);
+
+    for ($i=0,$j=0; $j<4; $j++) {
+       &AVX_00_47($j,\&bodyx_00_15,@X);
+       push(@X,shift(@X));             # rotate(@X)
+    }
+       &cmp    (&DWP(16*$j,$K256),0x00010203);
+       &jne    (&label("avx_bmi_00_47"));
+
+    for ($i=0; $i<16; ) {
+       foreach(bodyx_00_15()) { eval; }
+    }
+
+       &mov    ("esi",&DWP(96,"esp")); #ctx
+                                       #&mov   ($AH[0],&DWP(0,"esp"));
+       &xor    ($AH[1],"edi");         #&mov   ($AH[1],&DWP(4,"esp"));
+                                       #&mov   ("edi", &DWP(8,"esp"));
+       &mov    ("ecx",&DWP(12,"esp"));
+       &add    ($AH[0],&DWP(0,"esi"));
+       &add    ($AH[1],&DWP(4,"esi"));
+       &add    ("edi",&DWP(8,"esi"));
+       &add    ("ecx",&DWP(12,"esi"));
+       &mov    (&DWP(0,"esi"),$AH[0]);
+       &mov    (&DWP(4,"esi"),$AH[1]);
+       &mov    (&DWP(8,"esi"),"edi");
+       &mov    (&DWP(12,"esi"),"ecx");
+        #&mov  (&DWP(0,"esp"),$AH[0]);
+        &mov   (&DWP(4,"esp"),$AH[1]);
+        &xor   ($AH[1],"edi");                 # magic
+        &mov   (&DWP(8,"esp"),"edi");
+        &mov   (&DWP(12,"esp"),"ecx");
+       #&mov   ($E,&DWP(16,"esp"));
+       &mov    ("edi",&DWP(20,"esp"));
+       &mov    ("ecx",&DWP(24,"esp"));
+       &add    ($E,&DWP(16,"esi"));
+       &add    ("edi",&DWP(20,"esi"));
+       &add    ("ecx",&DWP(24,"esi"));
+       &mov    (&DWP(16,"esi"),$E);
+       &mov    (&DWP(20,"esi"),"edi");
+        &mov   (&DWP(20,"esp"),"edi");
+       &mov    ("edi",&DWP(28,"esp"));
+       &mov    (&DWP(24,"esi"),"ecx");
+        #&mov  (&DWP(16,"esp"),$E);
+       &add    ("edi",&DWP(28,"esi"));
+        &mov   (&DWP(24,"esp"),"ecx");
+       &mov    (&DWP(28,"esi"),"edi");
+        &mov   (&DWP(28,"esp"),"edi");
+       &mov    ("edi",&DWP(96+4,"esp"));       # inp
+
+       &vmovdqa        ($t3,&QWP(64,$K256));
+       &sub    ($K256,3*64);                   # rewind K
+       &cmp    ("edi",&DWP(96+8,"esp"));       # are we done yet?
+       &jb     (&label("grand_avx_bmi"));
+
+       &mov    ("esp",&DWP(96+12,"esp"));      # restore sp
+       &vzeroall       ();
+&function_end_A();
+                                               }
+                                               }
+                                               }}}
+&function_end_B("sha256_block_data_order");
+
 &asm_finish();
index 9c84e8d..4fee74d 100644 (file)
@@ -1,10 +1,12 @@
 #!/usr/bin/env perl
 
 # ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
 # details see http://www.openssl.org/~appro/cryptogams/.
+#
+# Permission to use under GPL terms is granted.
 # ====================================================================
 
 # SHA256 block procedure for ARMv4. May 2007.
 # February 2011.
 #
 # Profiler-assisted and platform-specific optimization resulted in 16%
-# improvement on Cortex A8 core and ~17 cycles per processed byte.
+# improvement on Cortex A8 core and ~15.4 cycles per processed byte.
+
+# September 2013.
+#
+# Add NEON implementation. On Cortex A8 it was measured to process one
+# byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
+# S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
+# code (meaning that latter performs sub-optimally, nothing was done
+# about it).
+
+# May 2014.
+#
+# Add ARMv8 code path performing at 2.0 cpb on Apple A7.
 
 while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
 open STDOUT,">$output";
 
 $ctx="r0";     $t0="r0";
-$inp="r1";     $t3="r1";
+$inp="r1";     $t4="r1";
 $len="r2";     $t1="r2";
-$T1="r3";
+$T1="r3";      $t3="r3";
 $A="r4";
 $B="r5";
 $C="r6";
@@ -52,80 +66,111 @@ my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
 
 $code.=<<___ if ($i<16);
 #if __ARM_ARCH__>=7
-       ldr     $T1,[$inp],#4
+       @ ldr   $t1,[$inp],#4                   @ $i
+# if $i==15
+       str     $inp,[sp,#17*4]                 @ make room for $t4
+# endif
+       eor     $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
+       add     $a,$a,$t2                       @ h+=Maj(a,b,c) from the past
+       eor     $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]`  @ Sigma1(e)
+       rev     $t1,$t1
 #else
-       ldrb    $T1,[$inp,#3]                   @ $i
+       @ ldrb  $t1,[$inp,#3]                   @ $i
+       add     $a,$a,$t2                       @ h+=Maj(a,b,c) from the past
        ldrb    $t2,[$inp,#2]
-       ldrb    $t1,[$inp,#1]
-       ldrb    $t0,[$inp],#4
-       orr     $T1,$T1,$t2,lsl#8
-       orr     $T1,$T1,$t1,lsl#16
-       orr     $T1,$T1,$t0,lsl#24
+       ldrb    $t0,[$inp,#1]
+       orr     $t1,$t1,$t2,lsl#8
+       ldrb    $t2,[$inp],#4
+       orr     $t1,$t1,$t0,lsl#16
+# if $i==15
+       str     $inp,[sp,#17*4]                 @ make room for $t4
+# endif
+       eor     $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
+       orr     $t1,$t1,$t2,lsl#24
+       eor     $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]`  @ Sigma1(e)
 #endif
 ___
 $code.=<<___;
-       mov     $t0,$e,ror#$Sigma1[0]
        ldr     $t2,[$Ktbl],#4                  @ *K256++
-       eor     $t0,$t0,$e,ror#$Sigma1[1]
+       add     $h,$h,$t1                       @ h+=X[i]
+       str     $t1,[sp,#`$i%16`*4]
        eor     $t1,$f,$g
-#if $i>=16
-       add     $T1,$T1,$t3                     @ from BODY_16_xx
-#elif __ARM_ARCH__>=7 && defined(__ARMEL__)
-       rev     $T1,$T1
-#endif
-#if $i==15
-       str     $inp,[sp,#17*4]                 @ leave room for $t3
-#endif
-       eor     $t0,$t0,$e,ror#$Sigma1[2]       @ Sigma1(e)
+       add     $h,$h,$t0,ror#$Sigma1[0]        @ h+=Sigma1(e)
        and     $t1,$t1,$e
-       str     $T1,[sp,#`$i%16`*4]
-       add     $T1,$T1,$t0
+       add     $h,$h,$t2                       @ h+=K256[i]
        eor     $t1,$t1,$g                      @ Ch(e,f,g)
-       add     $T1,$T1,$h
-       mov     $h,$a,ror#$Sigma0[0]
-       add     $T1,$T1,$t1
-       eor     $h,$h,$a,ror#$Sigma0[1]
-       add     $T1,$T1,$t2
-       eor     $h,$h,$a,ror#$Sigma0[2]         @ Sigma0(a)
-#if $i>=15
-       ldr     $t3,[sp,#`($i+2)%16`*4]         @ from BODY_16_xx
+       eor     $t0,$a,$a,ror#`$Sigma0[1]-$Sigma0[0]`
+       add     $h,$h,$t1                       @ h+=Ch(e,f,g)
+#if $i==31
+       and     $t2,$t2,#0xff
+       cmp     $t2,#0xf2                       @ done?
+#endif
+#if $i<15
+# if __ARM_ARCH__>=7
+       ldr     $t1,[$inp],#4                   @ prefetch
+# else
+       ldrb    $t1,[$inp,#3]
+# endif
+       eor     $t2,$a,$b                       @ a^b, b^c in next round
+#else
+       ldr     $t1,[sp,#`($i+2)%16`*4]         @ from future BODY_16_xx
+       eor     $t2,$a,$b                       @ a^b, b^c in next round
+       ldr     $t4,[sp,#`($i+15)%16`*4]        @ from future BODY_16_xx
 #endif
-       orr     $t0,$a,$b
-       and     $t1,$a,$b
-       and     $t0,$t0,$c
-       add     $h,$h,$T1
-       orr     $t0,$t0,$t1                     @ Maj(a,b,c)
-       add     $d,$d,$T1
-       add     $h,$h,$t0
+       eor     $t0,$t0,$a,ror#`$Sigma0[2]-$Sigma0[0]`  @ Sigma0(a)
+       and     $t3,$t3,$t2                     @ (b^c)&=(a^b)
+       add     $d,$d,$h                        @ d+=h
+       eor     $t3,$t3,$b                      @ Maj(a,b,c)
+       add     $h,$h,$t0,ror#$Sigma0[0]        @ h+=Sigma0(a)
+       @ add   $h,$h,$t3                       @ h+=Maj(a,b,c)
 ___
+       ($t2,$t3)=($t3,$t2);
 }
 
 sub BODY_16_XX {
 my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
 
 $code.=<<___;
-       @ ldr   $t3,[sp,#`($i+1)%16`*4]         @ $i
-       ldr     $t2,[sp,#`($i+14)%16`*4]
-       mov     $t0,$t3,ror#$sigma0[0]
-       ldr     $T1,[sp,#`($i+0)%16`*4]
-       eor     $t0,$t0,$t3,ror#$sigma0[1]
-       ldr     $t1,[sp,#`($i+9)%16`*4]
-       eor     $t0,$t0,$t3,lsr#$sigma0[2]      @ sigma0(X[i+1])
-       mov     $t3,$t2,ror#$sigma1[0]
-       add     $T1,$T1,$t0
-       eor     $t3,$t3,$t2,ror#$sigma1[1]
-       add     $T1,$T1,$t1
-       eor     $t3,$t3,$t2,lsr#$sigma1[2]      @ sigma1(X[i+14])
-       @ add   $T1,$T1,$t3
+       @ ldr   $t1,[sp,#`($i+1)%16`*4]         @ $i
+       @ ldr   $t4,[sp,#`($i+14)%16`*4]
+       mov     $t0,$t1,ror#$sigma0[0]
+       add     $a,$a,$t2                       @ h+=Maj(a,b,c) from the past
+       mov     $t2,$t4,ror#$sigma1[0]
+       eor     $t0,$t0,$t1,ror#$sigma0[1]
+       eor     $t2,$t2,$t4,ror#$sigma1[1]
+       eor     $t0,$t0,$t1,lsr#$sigma0[2]      @ sigma0(X[i+1])
+       ldr     $t1,[sp,#`($i+0)%16`*4]
+       eor     $t2,$t2,$t4,lsr#$sigma1[2]      @ sigma1(X[i+14])
+       ldr     $t4,[sp,#`($i+9)%16`*4]
+
+       add     $t2,$t2,$t0
+       eor     $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`   @ from BODY_00_15
+       add     $t1,$t1,$t2
+       eor     $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]`  @ Sigma1(e)
+       add     $t1,$t1,$t4                     @ X[i]
 ___
        &BODY_00_15(@_);
 }
 
 $code=<<___;
-#include "arm_arch.h"
+#ifndef __KERNEL__
+# include "arm_arch.h"
+#else
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ 7
+#endif
 
 .text
+#if __ARM_ARCH__<7
 .code  32
+#else
+.syntax unified
+# ifdef __thumb2__
+.thumb
+# else
+.code   32
+# endif
+#endif
 
 .type  K256,%object
 .align 5
@@ -147,46 +192,73 @@ K256:
 .word  0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
 .word  0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
 .size  K256,.-K256
+.word  0                               @ terminator
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+.LOPENSSL_armcap:
+.word  OPENSSL_armcap_P-sha256_block_data_order
+#endif
+.align 5
 
 .global        sha256_block_data_order
 .type  sha256_block_data_order,%function
 sha256_block_data_order:
+#if __ARM_ARCH__<7
        sub     r3,pc,#8                @ sha256_block_data_order
+#else
+       adr     r3,sha256_block_data_order
+#endif
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+       ldr     r12,.LOPENSSL_armcap
+       ldr     r12,[r3,r12]            @ OPENSSL_armcap_P
+       tst     r12,#ARMV8_SHA256
+       bne     .LARMv8
+       tst     r12,#ARMV7_NEON
+       bne     .LNEON
+#endif
        add     $len,$inp,$len,lsl#6    @ len to point at the end of inp
        stmdb   sp!,{$ctx,$inp,$len,r4-r11,lr}
        ldmia   $ctx,{$A,$B,$C,$D,$E,$F,$G,$H}
-       sub     $Ktbl,r3,#256           @ K256
+       sub     $Ktbl,r3,#256+32        @ K256
        sub     sp,sp,#16*4             @ alloca(X[16])
 .Loop:
+# if __ARM_ARCH__>=7
+       ldr     $t1,[$inp],#4
+# else
+       ldrb    $t1,[$inp,#3]
+# endif
+       eor     $t3,$B,$C               @ magic
+       eor     $t2,$t2,$t2
 ___
 for($i=0;$i<16;$i++)   { &BODY_00_15($i,@V); unshift(@V,pop(@V)); }
 $code.=".Lrounds_16_xx:\n";
 for (;$i<32;$i++)      { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); }
 $code.=<<___;
-       and     $t2,$t2,#0xff
-       cmp     $t2,#0xf2
+#if __ARM_ARCH__>=7
+       ite     eq                      @ Thumb2 thing, sanity check in ARM
+#endif
+       ldreq   $t3,[sp,#16*4]          @ pull ctx
        bne     .Lrounds_16_xx
 
-       ldr     $T1,[sp,#16*4]          @ pull ctx
-       ldr     $t0,[$T1,#0]
-       ldr     $t1,[$T1,#4]
-       ldr     $t2,[$T1,#8]
+       add     $A,$A,$t2               @ h+=Maj(a,b,c) from the past
+       ldr     $t0,[$t3,#0]
+       ldr     $t1,[$t3,#4]
+       ldr     $t2,[$t3,#8]
        add     $A,$A,$t0
-       ldr     $t0,[$T1,#12]
+       ldr     $t0,[$t3,#12]
        add     $B,$B,$t1
-       ldr     $t1,[$T1,#16]
+       ldr     $t1,[$t3,#16]
        add     $C,$C,$t2
-       ldr     $t2,[$T1,#20]
+       ldr     $t2,[$t3,#20]
        add     $D,$D,$t0
-       ldr     $t0,[$T1,#24]
+       ldr     $t0,[$t3,#24]
        add     $E,$E,$t1
-       ldr     $t1,[$T1,#28]
+       ldr     $t1,[$t3,#28]
        add     $F,$F,$t2
        ldr     $inp,[sp,#17*4]         @ pull inp
        ldr     $t2,[sp,#18*4]          @ pull inp+len
        add     $G,$G,$t0
        add     $H,$H,$t1
-       stmia   $T1,{$A,$B,$C,$D,$E,$F,$G,$H}
+       stmia   $t3,{$A,$B,$C,$D,$E,$F,$G,$H}
        cmp     $inp,$t2
        sub     $Ktbl,$Ktbl,#256        @ rewind Ktbl
        bne     .Loop
@@ -200,12 +272,442 @@ $code.=<<___;
        moveq   pc,lr                   @ be binary compatible with V4, yet
        bx      lr                      @ interoperable with Thumb ISA:-)
 #endif
-.size   sha256_block_data_order,.-sha256_block_data_order
-.asciz  "SHA256 block transform for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
+.size  sha256_block_data_order,.-sha256_block_data_order
+___
+######################################################################
+# NEON stuff
+#
+{{{
+my @X=map("q$_",(0..3));
+my ($T0,$T1,$T2,$T3,$T4,$T5)=("q8","q9","q10","q11","d24","d25");
+my $Xfer=$t4;
+my $j=0;
+
+sub Dlo()   { shift=~m|q([1]?[0-9])|?"d".($1*2):"";     }
+sub Dhi()   { shift=~m|q([1]?[0-9])|?"d".($1*2+1):"";   }
+
+sub AUTOLOAD()          # thunk [simplified] x86-style perlasm
+{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./;
+  my $arg = pop;
+    $arg = "#$arg" if ($arg*1 eq $arg);
+    $code .= "\t$opcode\t".join(',',@_,$arg)."\n";
+}
+
+sub Xupdate()
+{ use integer;
+  my $body = shift;
+  my @insns = (&$body,&$body,&$body,&$body);
+  my ($a,$b,$c,$d,$e,$f,$g,$h);
+
+       &vext_8         ($T0,@X[0],@X[1],4);    # X[1..4]
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &vext_8         ($T1,@X[2],@X[3],4);    # X[9..12]
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &vshr_u32       ($T2,$T0,$sigma0[0]);
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &vadd_i32       (@X[0],@X[0],$T1);      # X[0..3] += X[9..12]
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &vshr_u32       ($T1,$T0,$sigma0[2]);
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &vsli_32        ($T2,$T0,32-$sigma0[0]);
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &vshr_u32       ($T3,$T0,$sigma0[1]);
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &veor           ($T1,$T1,$T2);
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &vsli_32        ($T3,$T0,32-$sigma0[1]);
+        eval(shift(@insns));
+        eval(shift(@insns));
+         &vshr_u32     ($T4,&Dhi(@X[3]),$sigma1[0]);
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &veor           ($T1,$T1,$T3);          # sigma0(X[1..4])
+        eval(shift(@insns));
+        eval(shift(@insns));
+         &vsli_32      ($T4,&Dhi(@X[3]),32-$sigma1[0]);
+        eval(shift(@insns));
+        eval(shift(@insns));
+         &vshr_u32     ($T5,&Dhi(@X[3]),$sigma1[2]);
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &vadd_i32       (@X[0],@X[0],$T1);      # X[0..3] += sigma0(X[1..4])
+        eval(shift(@insns));
+        eval(shift(@insns));
+         &veor         ($T5,$T5,$T4);
+        eval(shift(@insns));
+        eval(shift(@insns));
+         &vshr_u32     ($T4,&Dhi(@X[3]),$sigma1[1]);
+        eval(shift(@insns));
+        eval(shift(@insns));
+         &vsli_32      ($T4,&Dhi(@X[3]),32-$sigma1[1]);
+        eval(shift(@insns));
+        eval(shift(@insns));
+         &veor         ($T5,$T5,$T4);          # sigma1(X[14..15])
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &vadd_i32       (&Dlo(@X[0]),&Dlo(@X[0]),$T5);# X[0..1] += sigma1(X[14..15])
+        eval(shift(@insns));
+        eval(shift(@insns));
+         &vshr_u32     ($T4,&Dlo(@X[0]),$sigma1[0]);
+        eval(shift(@insns));
+        eval(shift(@insns));
+         &vsli_32      ($T4,&Dlo(@X[0]),32-$sigma1[0]);
+        eval(shift(@insns));
+        eval(shift(@insns));
+         &vshr_u32     ($T5,&Dlo(@X[0]),$sigma1[2]);
+        eval(shift(@insns));
+        eval(shift(@insns));
+         &veor         ($T5,$T5,$T4);
+        eval(shift(@insns));
+        eval(shift(@insns));
+         &vshr_u32     ($T4,&Dlo(@X[0]),$sigma1[1]);
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &vld1_32        ("{$T0}","[$Ktbl,:128]!");
+        eval(shift(@insns));
+        eval(shift(@insns));
+         &vsli_32      ($T4,&Dlo(@X[0]),32-$sigma1[1]);
+        eval(shift(@insns));
+        eval(shift(@insns));
+         &veor         ($T5,$T5,$T4);          # sigma1(X[16..17])
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &vadd_i32       (&Dhi(@X[0]),&Dhi(@X[0]),$T5);# X[2..3] += sigma1(X[16..17])
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &vadd_i32       ($T0,$T0,@X[0]);
+        while($#insns>=2) { eval(shift(@insns)); }
+       &vst1_32        ("{$T0}","[$Xfer,:128]!");
+        eval(shift(@insns));
+        eval(shift(@insns));
+
+       push(@X,shift(@X));             # "rotate" X[]
+}
+
+sub Xpreload()
+{ use integer;
+  my $body = shift;
+  my @insns = (&$body,&$body,&$body,&$body);
+  my ($a,$b,$c,$d,$e,$f,$g,$h);
+
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &vld1_32        ("{$T0}","[$Ktbl,:128]!");
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &vrev32_8       (@X[0],@X[0]);
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &vadd_i32       ($T0,$T0,@X[0]);
+        foreach (@insns) { eval; }     # remaining instructions
+       &vst1_32        ("{$T0}","[$Xfer,:128]!");
+
+       push(@X,shift(@X));             # "rotate" X[]
+}
+
+sub body_00_15 () {
+       (
+       '($a,$b,$c,$d,$e,$f,$g,$h)=@V;'.
+       '&add   ($h,$h,$t1)',                   # h+=X[i]+K[i]
+       '&eor   ($t1,$f,$g)',
+       '&eor   ($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))',
+       '&add   ($a,$a,$t2)',                   # h+=Maj(a,b,c) from the past
+       '&and   ($t1,$t1,$e)',
+       '&eor   ($t2,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))',   # Sigma1(e)
+       '&eor   ($t0,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))',
+       '&eor   ($t1,$t1,$g)',                  # Ch(e,f,g)
+       '&add   ($h,$h,$t2,"ror#$Sigma1[0]")',  # h+=Sigma1(e)
+       '&eor   ($t2,$a,$b)',                   # a^b, b^c in next round
+       '&eor   ($t0,$t0,$a,"ror#".($Sigma0[2]-$Sigma0[0]))',   # Sigma0(a)
+       '&add   ($h,$h,$t1)',                   # h+=Ch(e,f,g)
+       '&ldr   ($t1,sprintf "[sp,#%d]",4*(($j+1)&15))  if (($j&15)!=15);'.
+       '&ldr   ($t1,"[$Ktbl]")                         if ($j==15);'.
+       '&ldr   ($t1,"[sp,#64]")                        if ($j==31)',
+       '&and   ($t3,$t3,$t2)',                 # (b^c)&=(a^b)
+       '&add   ($d,$d,$h)',                    # d+=h
+       '&add   ($h,$h,$t0,"ror#$Sigma0[0]");'. # h+=Sigma0(a)
+       '&eor   ($t3,$t3,$b)',                  # Maj(a,b,c)
+       '$j++;  unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);'
+       )
+}
+
+$code.=<<___;
+#if __ARM_MAX_ARCH__>=7
+.arch  armv7-a
+.fpu   neon
+
+.global        sha256_block_data_order_neon
+.type  sha256_block_data_order_neon,%function
+.align 4
+sha256_block_data_order_neon:
+.LNEON:
+       stmdb   sp!,{r4-r12,lr}
+
+       sub     $H,sp,#16*4+16
+       adr     $Ktbl,K256
+       bic     $H,$H,#15               @ align for 128-bit stores
+       mov     $t2,sp
+       mov     sp,$H                   @ alloca
+       add     $len,$inp,$len,lsl#6    @ len to point at the end of inp
+
+       vld1.8          {@X[0]},[$inp]!
+       vld1.8          {@X[1]},[$inp]!
+       vld1.8          {@X[2]},[$inp]!
+       vld1.8          {@X[3]},[$inp]!
+       vld1.32         {$T0},[$Ktbl,:128]!
+       vld1.32         {$T1},[$Ktbl,:128]!
+       vld1.32         {$T2},[$Ktbl,:128]!
+       vld1.32         {$T3},[$Ktbl,:128]!
+       vrev32.8        @X[0],@X[0]             @ yes, even on
+       str             $ctx,[sp,#64]
+       vrev32.8        @X[1],@X[1]             @ big-endian
+       str             $inp,[sp,#68]
+       mov             $Xfer,sp
+       vrev32.8        @X[2],@X[2]
+       str             $len,[sp,#72]
+       vrev32.8        @X[3],@X[3]
+       str             $t2,[sp,#76]            @ save original sp
+       vadd.i32        $T0,$T0,@X[0]
+       vadd.i32        $T1,$T1,@X[1]
+       vst1.32         {$T0},[$Xfer,:128]!
+       vadd.i32        $T2,$T2,@X[2]
+       vst1.32         {$T1},[$Xfer,:128]!
+       vadd.i32        $T3,$T3,@X[3]
+       vst1.32         {$T2},[$Xfer,:128]!
+       vst1.32         {$T3},[$Xfer,:128]!
+
+       ldmia           $ctx,{$A-$H}
+       sub             $Xfer,$Xfer,#64
+       ldr             $t1,[sp,#0]
+       eor             $t2,$t2,$t2
+       eor             $t3,$B,$C
+       b               .L_00_48
+
+.align 4
+.L_00_48:
+___
+       &Xupdate(\&body_00_15);
+       &Xupdate(\&body_00_15);
+       &Xupdate(\&body_00_15);
+       &Xupdate(\&body_00_15);
+$code.=<<___;
+       teq     $t1,#0                          @ check for K256 terminator
+       ldr     $t1,[sp,#0]
+       sub     $Xfer,$Xfer,#64
+       bne     .L_00_48
+
+       ldr             $inp,[sp,#68]
+       ldr             $t0,[sp,#72]
+       sub             $Ktbl,$Ktbl,#256        @ rewind $Ktbl
+       teq             $inp,$t0
+       it              eq
+       subeq           $inp,$inp,#64           @ avoid SEGV
+       vld1.8          {@X[0]},[$inp]!         @ load next input block
+       vld1.8          {@X[1]},[$inp]!
+       vld1.8          {@X[2]},[$inp]!
+       vld1.8          {@X[3]},[$inp]!
+       it              ne
+       strne           $inp,[sp,#68]
+       mov             $Xfer,sp
+___
+       &Xpreload(\&body_00_15);
+       &Xpreload(\&body_00_15);
+       &Xpreload(\&body_00_15);
+       &Xpreload(\&body_00_15);
+$code.=<<___;
+       ldr     $t0,[$t1,#0]
+       add     $A,$A,$t2                       @ h+=Maj(a,b,c) from the past
+       ldr     $t2,[$t1,#4]
+       ldr     $t3,[$t1,#8]
+       ldr     $t4,[$t1,#12]
+       add     $A,$A,$t0                       @ accumulate
+       ldr     $t0,[$t1,#16]
+       add     $B,$B,$t2
+       ldr     $t2,[$t1,#20]
+       add     $C,$C,$t3
+       ldr     $t3,[$t1,#24]
+       add     $D,$D,$t4
+       ldr     $t4,[$t1,#28]
+       add     $E,$E,$t0
+       str     $A,[$t1],#4
+       add     $F,$F,$t2
+       str     $B,[$t1],#4
+       add     $G,$G,$t3
+       str     $C,[$t1],#4
+       add     $H,$H,$t4
+       str     $D,[$t1],#4
+       stmia   $t1,{$E-$H}
+
+       ittte   ne
+       movne   $Xfer,sp
+       ldrne   $t1,[sp,#0]
+       eorne   $t2,$t2,$t2
+       ldreq   sp,[sp,#76]                     @ restore original sp
+       itt     ne
+       eorne   $t3,$B,$C
+       bne     .L_00_48
+
+       ldmia   sp!,{r4-r12,pc}
+.size  sha256_block_data_order_neon,.-sha256_block_data_order_neon
+#endif
+___
+}}}
+######################################################################
+# ARMv8 stuff
+#
+{{{
+my ($ABCD,$EFGH,$abcd)=map("q$_",(0..2));
+my @MSG=map("q$_",(8..11));
+my ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15));
+my $Ktbl="r3";
+
+$code.=<<___;
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+
+# ifdef __thumb2__
+#  define INST(a,b,c,d)        .byte   c,d|0xc,a,b
+# else
+#  define INST(a,b,c,d)        .byte   a,b,c,d
+# endif
+
+.type  sha256_block_data_order_armv8,%function
+.align 5
+sha256_block_data_order_armv8:
+.LARMv8:
+       vld1.32 {$ABCD,$EFGH},[$ctx]
+# ifdef __thumb2__
+       adr     $Ktbl,.LARMv8
+       sub     $Ktbl,$Ktbl,#.LARMv8-K256
+# else
+       adrl    $Ktbl,K256
+# endif
+       add     $len,$inp,$len,lsl#6    @ len to point at the end of inp
+
+.Loop_v8:
+       vld1.8          {@MSG[0]-@MSG[1]},[$inp]!
+       vld1.8          {@MSG[2]-@MSG[3]},[$inp]!
+       vld1.32         {$W0},[$Ktbl]!
+       vrev32.8        @MSG[0],@MSG[0]
+       vrev32.8        @MSG[1],@MSG[1]
+       vrev32.8        @MSG[2],@MSG[2]
+       vrev32.8        @MSG[3],@MSG[3]
+       vmov            $ABCD_SAVE,$ABCD        @ offload
+       vmov            $EFGH_SAVE,$EFGH
+       teq             $inp,$len
+___
+for($i=0;$i<12;$i++) {
+$code.=<<___;
+       vld1.32         {$W1},[$Ktbl]!
+       vadd.i32        $W0,$W0,@MSG[0]
+       sha256su0       @MSG[0],@MSG[1]
+       vmov            $abcd,$ABCD
+       sha256h         $ABCD,$EFGH,$W0
+       sha256h2        $EFGH,$abcd,$W0
+       sha256su1       @MSG[0],@MSG[2],@MSG[3]
+___
+       ($W0,$W1)=($W1,$W0);    push(@MSG,shift(@MSG));
+}
+$code.=<<___;
+       vld1.32         {$W1},[$Ktbl]!
+       vadd.i32        $W0,$W0,@MSG[0]
+       vmov            $abcd,$ABCD
+       sha256h         $ABCD,$EFGH,$W0
+       sha256h2        $EFGH,$abcd,$W0
+
+       vld1.32         {$W0},[$Ktbl]!
+       vadd.i32        $W1,$W1,@MSG[1]
+       vmov            $abcd,$ABCD
+       sha256h         $ABCD,$EFGH,$W1
+       sha256h2        $EFGH,$abcd,$W1
+
+       vld1.32         {$W1},[$Ktbl]
+       vadd.i32        $W0,$W0,@MSG[2]
+       sub             $Ktbl,$Ktbl,#256-16     @ rewind
+       vmov            $abcd,$ABCD
+       sha256h         $ABCD,$EFGH,$W0
+       sha256h2        $EFGH,$abcd,$W0
+
+       vadd.i32        $W1,$W1,@MSG[3]
+       vmov            $abcd,$ABCD
+       sha256h         $ABCD,$EFGH,$W1
+       sha256h2        $EFGH,$abcd,$W1
+
+       vadd.i32        $ABCD,$ABCD,$ABCD_SAVE
+       vadd.i32        $EFGH,$EFGH,$EFGH_SAVE
+       it              ne
+       bne             .Loop_v8
+
+       vst1.32         {$ABCD,$EFGH},[$ctx]
+
+       ret             @ bx lr
+.size  sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
+#endif
+___
+}}}
+$code.=<<___;
+.asciz  "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
 .align 2
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+.comm   OPENSSL_armcap_P,4,4
+#endif
 ___
 
-$code =~ s/\`([^\`]*)\`/eval $1/gem;
-$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;   # make it possible to compile with -march=armv4
-print $code;
+open SELF,$0;
+while(<SELF>) {
+       next if (/^#!/);
+       last if (!s/^#/@/ and !/^$/);
+       print;
+}
+close SELF;
+
+{   my  %opcode = (
+       "sha256h"       => 0xf3000c40,  "sha256h2"      => 0xf3100c40,
+       "sha256su0"     => 0xf3ba03c0,  "sha256su1"     => 0xf3200c40   );
+
+    sub unsha256 {
+       my ($mnemonic,$arg)=@_;
+
+       if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) {
+           my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
+                                        |(($2&7)<<17)|(($2&8)<<4)
+                                        |(($3&7)<<1) |(($3&8)<<2);
+           # since ARMv7 instructions are always encoded little-endian.
+           # correct solution is to use .inst directive, but older
+           # assemblers don't implement it:-(
+           sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s",
+                       $word&0xff,($word>>8)&0xff,
+                       ($word>>16)&0xff,($word>>24)&0xff,
+                       $mnemonic,$arg;
+       }
+    }
+}
+
+foreach (split($/,$code)) {
+
+       s/\`([^\`]*)\`/eval $1/geo;
+
+       s/\b(sha256\w+)\s+(q.*)/unsha256($1,$2)/geo;
+
+       s/\bret\b/bx    lr/go           or
+       s/\bbx\s+lr\b/.word\t0xe12fff1e/go;     # make it possible to compile with -march=armv4
+
+       print $_,"\n";
+}
+
 close STDOUT; # enforce flush
diff --git a/crypto/sha/asm/sha256-mb-x86_64.pl b/crypto/sha/asm/sha256-mb-x86_64.pl
new file mode 100644 (file)
index 0000000..adf2ddc
--- /dev/null
@@ -0,0 +1,1560 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+
+# Multi-buffer SHA256 procedure processes n buffers in parallel by
+# placing buffer data to designated lane of SIMD register. n is
+# naturally limited to 4 on pre-AVX2 processors and to 8 on
+# AVX2-capable processors such as Haswell.
+#
+#              this    +aesni(i)       sha256  aesni-sha256    gain(iv)
+# -------------------------------------------------------------------
+# Westmere(ii) 23.3/n  +1.28=7.11(n=4) 12.3    +3.75=16.1      +126%
+# Atom(ii)     38.7/n  +3.93=13.6(n=4) 20.8    +5.69=26.5      +95%
+# Sandy Bridge (20.5   +5.15=25.7)/n   11.6    13.0            +103%
+# Ivy Bridge   (20.4   +5.14=25.5)/n   10.3    11.6            +82%
+# Haswell(iii) (21.0   +5.00=26.0)/n   7.80    8.79            +170%
+# Bulldozer    (21.6   +5.76=27.4)/n   13.6    13.7            +100%
+#
+# (i)  multi-block CBC encrypt with 128-bit key;
+# (ii) (HASH+AES)/n does not apply to Westmere for n>3 and Atom,
+#      because of lower AES-NI instruction throughput, nor is there
+#      AES-NI-SHA256 stitch for these processors;
+# (iii)        "this" is for n=8, when we gather twice as much data, result
+#      for n=4 is 20.3+4.44=24.7;
+# (iv) presented improvement coefficients are asymptotic limits and
+#      in real-life application are somewhat lower, e.g. for 2KB 
+#      fragments they range from 75% to 130% (on Haswell);
+
+$flavour = shift;
+$output  = shift;
+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
+
+$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
+die "can't locate x86_64-xlate.pl";
+
+$avx=0;
+
+if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
+               =~ /GNU assembler version ([2-9]\.[0-9]+)/) {
+       $avx = ($1>=2.19) + ($1>=2.22);
+}
+
+if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
+          `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) {
+       $avx = ($1>=2.09) + ($1>=2.10);
+}
+
+if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
+          `ml64 2>&1` =~ /Version ([0-9]+)\./) {
+       $avx = ($1>=10) + ($1>=11);
+}
+
+if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/) {
+       $avx = ($2>=3.0) + ($2>3.0);
+}
+
+open OUT,"| \"$^X\" $xlate $flavour $output";
+*STDOUT=*OUT;
+
+# void sha256_multi_block (
+#     struct { unsigned int A[8];
+#              unsigned int B[8];
+#              unsigned int C[8];
+#              unsigned int D[8];
+#              unsigned int E[8];
+#              unsigned int F[8];
+#              unsigned int G[8];
+#              unsigned int H[8];      } *ctx,
+#     struct { void *ptr; int blocks;  } inp[8],
+#     int num);                /* 1 or 2 */
+#
+$ctx="%rdi";   # 1st arg
+$inp="%rsi";   # 2nd arg
+$num="%edx";   # 3rd arg
+@ptr=map("%r$_",(8..11));
+$Tbl="%rbp";
+
+@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("%xmm$_",(8..15));
+($t1,$t2,$t3,$axb,$bxc,$Xi,$Xn,$sigma)=map("%xmm$_",(0..7));
+
+$REG_SZ=16;
+
+sub Xi_off {
+my $off = shift;
+
+    $off %= 16; $off *= $REG_SZ;
+    $off<256 ? "$off-128(%rax)" : "$off-256-128(%rbx)";
+}
+
+sub ROUND_00_15 {
+my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
+
+$code.=<<___ if ($i<15);
+       movd            `4*$i`(@ptr[0]),$Xi
+       movd            `4*$i`(@ptr[1]),$t1
+       movd            `4*$i`(@ptr[2]),$t2
+       movd            `4*$i`(@ptr[3]),$t3
+       punpckldq       $t2,$Xi
+       punpckldq       $t3,$t1
+       punpckldq       $t1,$Xi
+___
+$code.=<<___ if ($i==15);
+       movd            `4*$i`(@ptr[0]),$Xi
+        lea            `16*4`(@ptr[0]),@ptr[0]
+       movd            `4*$i`(@ptr[1]),$t1
+        lea            `16*4`(@ptr[1]),@ptr[1]
+       movd            `4*$i`(@ptr[2]),$t2
+        lea            `16*4`(@ptr[2]),@ptr[2]
+       movd            `4*$i`(@ptr[3]),$t3
+        lea            `16*4`(@ptr[3]),@ptr[3]
+       punpckldq       $t2,$Xi
+       punpckldq       $t3,$t1
+       punpckldq       $t1,$Xi
+___
+$code.=<<___;
+       movdqa  $e,$sigma
+       `"pshufb        $Xn,$Xi"                if ($i<=15 && ($i&1)==0)`
+       movdqa  $e,$t3
+       `"pshufb        $Xn,$Xi"                if ($i<=15 && ($i&1)==1)`
+       psrld   \$6,$sigma
+       movdqa  $e,$t2
+       pslld   \$7,$t3
+       movdqa  $Xi,`&Xi_off($i)`
+        paddd  $h,$Xi                          # Xi+=h
+
+       psrld   \$11,$t2
+       pxor    $t3,$sigma
+       pslld   \$21-7,$t3
+        paddd  `32*($i%8)-128`($Tbl),$Xi       # Xi+=K[round]
+       pxor    $t2,$sigma
+
+       psrld   \$25-11,$t2
+        movdqa $e,$t1
+        `"prefetcht0   63(@ptr[0])"            if ($i==15)`
+       pxor    $t3,$sigma
+        movdqa $e,$axb                         # borrow $axb
+       pslld   \$26-21,$t3
+        pandn  $g,$t1
+        pand   $f,$axb
+       pxor    $t2,$sigma
+
+        `"prefetcht0   63(@ptr[1])"            if ($i==15)`
+       movdqa  $a,$t2
+       pxor    $t3,$sigma                      # Sigma1(e)
+       movdqa  $a,$t3
+       psrld   \$2,$t2
+       paddd   $sigma,$Xi                      # Xi+=Sigma1(e)
+        pxor   $axb,$t1                        # Ch(e,f,g)
+        movdqa $b,$axb
+       movdqa  $a,$sigma
+       pslld   \$10,$t3
+        pxor   $a,$axb                         # a^b, b^c in next round
+
+        `"prefetcht0   63(@ptr[2])"            if ($i==15)`
+       psrld   \$13,$sigma
+       pxor    $t3,$t2
+        paddd  $t1,$Xi                         # Xi+=Ch(e,f,g)
+       pslld   \$19-10,$t3
+        pand   $axb,$bxc
+       pxor    $sigma,$t2
+
+        `"prefetcht0   63(@ptr[3])"            if ($i==15)`
+       psrld   \$22-13,$sigma
+       pxor    $t3,$t2
+        movdqa $b,$h
+       pslld   \$30-19,$t3
+       pxor    $t2,$sigma
+        pxor   $bxc,$h                         # h=Maj(a,b,c)=Ch(a^b,c,b)
+        paddd  $Xi,$d                          # d+=Xi
+       pxor    $t3,$sigma                      # Sigma0(a)
+
+       paddd   $Xi,$h                          # h+=Xi
+       paddd   $sigma,$h                       # h+=Sigma0(a)
+___
+$code.=<<___ if (($i%8)==7);
+       lea     `32*8`($Tbl),$Tbl
+___
+       ($axb,$bxc)=($bxc,$axb);
+}
+
+sub ROUND_16_XX {
+my $i=shift;
+
+$code.=<<___;
+       movdqa  `&Xi_off($i+1)`,$Xn
+       paddd   `&Xi_off($i+9)`,$Xi             # Xi+=X[i+9]
+
+       movdqa  $Xn,$sigma
+       movdqa  $Xn,$t2
+       psrld   \$3,$sigma
+       movdqa  $Xn,$t3
+
+       psrld   \$7,$t2
+       movdqa  `&Xi_off($i+14)`,$t1
+       pslld   \$14,$t3
+       pxor    $t2,$sigma
+       psrld   \$18-7,$t2
+       movdqa  $t1,$axb                        # borrow $axb
+       pxor    $t3,$sigma
+       pslld   \$25-14,$t3
+       pxor    $t2,$sigma
+       psrld   \$10,$t1
+       movdqa  $axb,$t2
+
+       psrld   \$17,$axb
+       pxor    $t3,$sigma                      # sigma0(X[i+1])
+       pslld   \$13,$t2
+        paddd  $sigma,$Xi                      # Xi+=sigma0(e)
+       pxor    $axb,$t1
+       psrld   \$19-17,$axb
+       pxor    $t2,$t1
+       pslld   \$15-13,$t2
+       pxor    $axb,$t1
+       pxor    $t2,$t1                         # sigma0(X[i+14])
+       paddd   $t1,$Xi                         # Xi+=sigma1(X[i+14])
+___
+       &ROUND_00_15($i,@_);
+       ($Xi,$Xn)=($Xn,$Xi);
+}
+
+$code.=<<___;
+.text
+
+.extern        OPENSSL_ia32cap_P
+
+.globl sha256_multi_block
+.type  sha256_multi_block,\@function,3
+.align 32
+sha256_multi_block:
+       mov     OPENSSL_ia32cap_P+4(%rip),%rcx
+       bt      \$61,%rcx                       # check SHA bit
+       jc      _shaext_shortcut
+___
+$code.=<<___ if ($avx);
+       test    \$`1<<28`,%ecx
+       jnz     _avx_shortcut
+___
+$code.=<<___;
+       mov     %rsp,%rax
+       push    %rbx
+       push    %rbp
+___
+$code.=<<___ if ($win64);
+       lea     -0xa8(%rsp),%rsp
+       movaps  %xmm6,(%rsp)
+       movaps  %xmm7,0x10(%rsp)
+       movaps  %xmm8,0x20(%rsp)
+       movaps  %xmm9,0x30(%rsp)
+       movaps  %xmm10,-0x78(%rax)
+       movaps  %xmm11,-0x68(%rax)
+       movaps  %xmm12,-0x58(%rax)
+       movaps  %xmm13,-0x48(%rax)
+       movaps  %xmm14,-0x38(%rax)
+       movaps  %xmm15,-0x28(%rax)
+___
+$code.=<<___;
+       sub     \$`$REG_SZ*18`, %rsp
+       and     \$-256,%rsp
+       mov     %rax,`$REG_SZ*17`(%rsp)         # original %rsp
+.Lbody:
+       lea     K256+128(%rip),$Tbl
+       lea     `$REG_SZ*16`(%rsp),%rbx
+       lea     0x80($ctx),$ctx                 # size optimization
+
+.Loop_grande:
+       mov     $num,`$REG_SZ*17+8`(%rsp)       # original $num
+       xor     $num,$num
+___
+for($i=0;$i<4;$i++) {
+    $code.=<<___;
+       mov     `16*$i+0`($inp),@ptr[$i]        # input pointer
+       mov     `16*$i+8`($inp),%ecx            # number of blocks
+       cmp     $num,%ecx
+       cmovg   %ecx,$num                       # find maximum
+       test    %ecx,%ecx
+       mov     %ecx,`4*$i`(%rbx)               # initialize counters
+       cmovle  $Tbl,@ptr[$i]                   # cancel input
+___
+}
+$code.=<<___;
+       test    $num,$num
+       jz      .Ldone
+
+       movdqu  0x00-0x80($ctx),$A              # load context
+        lea    128(%rsp),%rax
+       movdqu  0x20-0x80($ctx),$B
+       movdqu  0x40-0x80($ctx),$C
+       movdqu  0x60-0x80($ctx),$D
+       movdqu  0x80-0x80($ctx),$E
+       movdqu  0xa0-0x80($ctx),$F
+       movdqu  0xc0-0x80($ctx),$G
+       movdqu  0xe0-0x80($ctx),$H
+       movdqu  .Lpbswap(%rip),$Xn
+       jmp     .Loop
+
+.align 32
+.Loop:
+       movdqa  $C,$bxc
+       pxor    $B,$bxc                         # magic seed
+___
+for($i=0;$i<16;$i++)   { &ROUND_00_15($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+       movdqu  `&Xi_off($i)`,$Xi
+       mov     \$3,%ecx
+       jmp     .Loop_16_xx
+.align 32
+.Loop_16_xx:
+___
+for(;$i<32;$i++)       { &ROUND_16_XX($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+       dec     %ecx
+       jnz     .Loop_16_xx
+
+       mov     \$1,%ecx
+       lea     K256+128(%rip),$Tbl
+
+       movdqa  (%rbx),$sigma                   # pull counters
+       cmp     4*0(%rbx),%ecx                  # examine counters
+       pxor    $t1,$t1
+       cmovge  $Tbl,@ptr[0]                    # cancel input
+       cmp     4*1(%rbx),%ecx
+       movdqa  $sigma,$Xn
+       cmovge  $Tbl,@ptr[1]
+       cmp     4*2(%rbx),%ecx
+       pcmpgtd $t1,$Xn                         # mask value
+       cmovge  $Tbl,@ptr[2]
+       cmp     4*3(%rbx),%ecx
+       paddd   $Xn,$sigma                      # counters--
+       cmovge  $Tbl,@ptr[3]
+
+       movdqu  0x00-0x80($ctx),$t1
+       pand    $Xn,$A
+       movdqu  0x20-0x80($ctx),$t2
+       pand    $Xn,$B
+       movdqu  0x40-0x80($ctx),$t3
+       pand    $Xn,$C
+       movdqu  0x60-0x80($ctx),$Xi
+       pand    $Xn,$D
+       paddd   $t1,$A
+       movdqu  0x80-0x80($ctx),$t1
+       pand    $Xn,$E
+       paddd   $t2,$B
+       movdqu  0xa0-0x80($ctx),$t2
+       pand    $Xn,$F
+       paddd   $t3,$C
+       movdqu  0xc0-0x80($ctx),$t3
+       pand    $Xn,$G
+       paddd   $Xi,$D
+       movdqu  0xe0-0x80($ctx),$Xi
+       pand    $Xn,$H
+       paddd   $t1,$E
+       paddd   $t2,$F
+       movdqu  $A,0x00-0x80($ctx)
+       paddd   $t3,$G
+       movdqu  $B,0x20-0x80($ctx)
+       paddd   $Xi,$H
+       movdqu  $C,0x40-0x80($ctx)
+       movdqu  $D,0x60-0x80($ctx)
+       movdqu  $E,0x80-0x80($ctx)
+       movdqu  $F,0xa0-0x80($ctx)
+       movdqu  $G,0xc0-0x80($ctx)
+       movdqu  $H,0xe0-0x80($ctx)
+
+       movdqa  $sigma,(%rbx)                   # save counters
+       movdqa  .Lpbswap(%rip),$Xn
+       dec     $num
+       jnz     .Loop
+
+       mov     `$REG_SZ*17+8`(%rsp),$num
+       lea     $REG_SZ($ctx),$ctx
+       lea     `16*$REG_SZ/4`($inp),$inp
+       dec     $num
+       jnz     .Loop_grande
+
+.Ldone:
+       mov     `$REG_SZ*17`(%rsp),%rax         # orignal %rsp
+___
+$code.=<<___ if ($win64);
+       movaps  -0xb8(%rax),%xmm6
+       movaps  -0xa8(%rax),%xmm7
+       movaps  -0x98(%rax),%xmm8
+       movaps  -0x88(%rax),%xmm9
+       movaps  -0x78(%rax),%xmm10
+       movaps  -0x68(%rax),%xmm11
+       movaps  -0x58(%rax),%xmm12
+       movaps  -0x48(%rax),%xmm13
+       movaps  -0x38(%rax),%xmm14
+       movaps  -0x28(%rax),%xmm15
+___
+$code.=<<___;
+       mov     -16(%rax),%rbp
+       mov     -8(%rax),%rbx
+       lea     (%rax),%rsp
+.Lepilogue:
+       ret
+.size  sha256_multi_block,.-sha256_multi_block
+___
+                                               {{{
+my ($Wi,$TMP0,$TMP1,$TMPx,$ABEF0,$CDGH0,$ABEF1,$CDGH1)=map("%xmm$_",(0..3,12..15));
+my @MSG0=map("%xmm$_",(4..7));
+my @MSG1=map("%xmm$_",(8..11));
+
+$code.=<<___;
+.type  sha256_multi_block_shaext,\@function,3
+.align 32
+sha256_multi_block_shaext:
+_shaext_shortcut:
+       mov     %rsp,%rax
+       push    %rbx
+       push    %rbp
+___
+$code.=<<___ if ($win64);
+       lea     -0xa8(%rsp),%rsp
+       movaps  %xmm6,(%rsp)
+       movaps  %xmm7,0x10(%rsp)
+       movaps  %xmm8,0x20(%rsp)
+       movaps  %xmm9,0x30(%rsp)
+       movaps  %xmm10,-0x78(%rax)
+       movaps  %xmm11,-0x68(%rax)
+       movaps  %xmm12,-0x58(%rax)
+       movaps  %xmm13,-0x48(%rax)
+       movaps  %xmm14,-0x38(%rax)
+       movaps  %xmm15,-0x28(%rax)
+___
+$code.=<<___;
+       sub     \$`$REG_SZ*18`,%rsp
+       shl     \$1,$num                        # we process pair at a time
+       and     \$-256,%rsp
+       lea     0x80($ctx),$ctx                 # size optimization
+       mov     %rax,`$REG_SZ*17`(%rsp)         # original %rsp
+.Lbody_shaext:
+       lea     `$REG_SZ*16`(%rsp),%rbx
+       lea     K256_shaext+0x80(%rip),$Tbl
+
+.Loop_grande_shaext:
+       mov     $num,`$REG_SZ*17+8`(%rsp)       # orignal $num
+       xor     $num,$num
+___
+for($i=0;$i<2;$i++) {
+    $code.=<<___;
+       mov     `16*$i+0`($inp),@ptr[$i]        # input pointer
+       mov     `16*$i+8`($inp),%ecx            # number of blocks
+       cmp     $num,%ecx
+       cmovg   %ecx,$num                       # find maximum
+       test    %ecx,%ecx
+       mov     %ecx,`4*$i`(%rbx)               # initialize counters
+       cmovle  %rsp,@ptr[$i]                   # cancel input
+___
+}
+$code.=<<___;
+       test    $num,$num
+       jz      .Ldone_shaext
+
+       movq            0x00-0x80($ctx),$ABEF0          # A1.A0
+       movq            0x20-0x80($ctx),@MSG0[0]        # B1.B0
+       movq            0x40-0x80($ctx),$CDGH0          # C1.C0
+       movq            0x60-0x80($ctx),@MSG0[1]        # D1.D0
+       movq            0x80-0x80($ctx),@MSG1[0]        # E1.E0
+       movq            0xa0-0x80($ctx),@MSG1[1]        # F1.F0
+       movq            0xc0-0x80($ctx),@MSG1[2]        # G1.G0
+       movq            0xe0-0x80($ctx),@MSG1[3]        # H1.H0
+
+       punpckldq       @MSG0[0],$ABEF0                 # B1.A1.B0.A0
+       punpckldq       @MSG0[1],$CDGH0                 # D1.C1.D0.C0
+       punpckldq       @MSG1[1],@MSG1[0]               # F1.E1.F0.E0
+       punpckldq       @MSG1[3],@MSG1[2]               # H1.G1.H0.G0
+       movdqa          K256_shaext-0x10(%rip),$TMPx    # byte swap
+
+       movdqa          $ABEF0,$ABEF1
+       movdqa          $CDGH0,$CDGH1
+       punpcklqdq      @MSG1[0],$ABEF0                 # F0.E0.B0.A0
+       punpcklqdq      @MSG1[2],$CDGH0                 # H0.G0.D0.C0
+       punpckhqdq      @MSG1[0],$ABEF1                 # F1.E1.B1.A1
+       punpckhqdq      @MSG1[2],$CDGH1                 # H1.G1.D1.C1
+
+       pshufd          \$0b00011011,$ABEF0,$ABEF0
+       pshufd          \$0b00011011,$CDGH0,$CDGH0
+       pshufd          \$0b00011011,$ABEF1,$ABEF1
+       pshufd          \$0b00011011,$CDGH1,$CDGH1
+       jmp             .Loop_shaext
+
+.align 32
+.Loop_shaext:
+       movdqu          0x00(@ptr[0]),@MSG0[0]
+        movdqu         0x00(@ptr[1]),@MSG1[0]
+       movdqu          0x10(@ptr[0]),@MSG0[1]
+        movdqu         0x10(@ptr[1]),@MSG1[1]
+       movdqu          0x20(@ptr[0]),@MSG0[2]
+       pshufb          $TMPx,@MSG0[0]
+        movdqu         0x20(@ptr[1]),@MSG1[2]
+        pshufb         $TMPx,@MSG1[0]
+       movdqu          0x30(@ptr[0]),@MSG0[3]
+       lea             0x40(@ptr[0]),@ptr[0]
+        movdqu         0x30(@ptr[1]),@MSG1[3]
+        lea            0x40(@ptr[1]),@ptr[1]
+
+       movdqa          0*16-0x80($Tbl),$Wi
+       pshufb          $TMPx,@MSG0[1]
+       paddd           @MSG0[0],$Wi
+       pxor            $ABEF0,@MSG0[0]         # black magic
+       movdqa          $Wi,$TMP0
+        movdqa         0*16-0x80($Tbl),$TMP1
+        pshufb         $TMPx,@MSG1[1]
+        paddd          @MSG1[0],$TMP1
+       movdqa          $CDGH0,0x50(%rsp)       # offload
+       sha256rnds2     $ABEF0,$CDGH0           # 0-3
+        pxor           $ABEF1,@MSG1[0]         # black magic
+        movdqa         $TMP1,$Wi
+        movdqa         $CDGH1,0x70(%rsp)
+        sha256rnds2    $ABEF1,$CDGH1           # 0-3
+       pshufd          \$0x0e,$TMP0,$Wi
+       pxor            $ABEF0,@MSG0[0]         # black magic
+       movdqa          $ABEF0,0x40(%rsp)       # offload
+       sha256rnds2     $CDGH0,$ABEF0
+        pshufd         \$0x0e,$TMP1,$Wi
+        pxor           $ABEF1,@MSG1[0]         # black magic
+        movdqa         $ABEF1,0x60(%rsp)
+       movdqa          1*16-0x80($Tbl),$TMP0
+       paddd           @MSG0[1],$TMP0
+       pshufb          $TMPx,@MSG0[2]
+        sha256rnds2    $CDGH1,$ABEF1
+
+       movdqa          $TMP0,$Wi
+        movdqa         1*16-0x80($Tbl),$TMP1
+        paddd          @MSG1[1],$TMP1
+       sha256rnds2     $ABEF0,$CDGH0           # 4-7
+        movdqa         $TMP1,$Wi
+       prefetcht0      127(@ptr[0])
+       pshufb          $TMPx,@MSG0[3]
+        pshufb         $TMPx,@MSG1[2]
+        prefetcht0     127(@ptr[1])
+        sha256rnds2    $ABEF1,$CDGH1           # 4-7
+       pshufd          \$0x0e,$TMP0,$Wi
+        pshufb         $TMPx,@MSG1[3]
+       sha256msg1      @MSG0[1],@MSG0[0]
+       sha256rnds2     $CDGH0,$ABEF0
+        pshufd         \$0x0e,$TMP1,$Wi
+       movdqa          2*16-0x80($Tbl),$TMP0
+       paddd           @MSG0[2],$TMP0
+        sha256rnds2    $CDGH1,$ABEF1
+
+       movdqa          $TMP0,$Wi
+        movdqa         2*16-0x80($Tbl),$TMP1
+        paddd          @MSG1[2],$TMP1
+       sha256rnds2     $ABEF0,$CDGH0           # 8-11
+        sha256msg1     @MSG1[1],@MSG1[0]
+        movdqa         $TMP1,$Wi
+       movdqa          @MSG0[3],$TMPx
+        sha256rnds2    $ABEF1,$CDGH1           # 8-11
+       pshufd          \$0x0e,$TMP0,$Wi
+       palignr         \$4,@MSG0[2],$TMPx
+       paddd           $TMPx,@MSG0[0]
+        movdqa         @MSG1[3],$TMPx
+        palignr        \$4,@MSG1[2],$TMPx
+       sha256msg1      @MSG0[2],@MSG0[1]
+       sha256rnds2     $CDGH0,$ABEF0
+        pshufd         \$0x0e,$TMP1,$Wi
+       movdqa          3*16-0x80($Tbl),$TMP0
+       paddd           @MSG0[3],$TMP0
+        sha256rnds2    $CDGH1,$ABEF1
+        sha256msg1     @MSG1[2],@MSG1[1]
+
+       movdqa          $TMP0,$Wi
+        movdqa         3*16-0x80($Tbl),$TMP1
+        paddd          $TMPx,@MSG1[0]
+        paddd          @MSG1[3],$TMP1
+       sha256msg2      @MSG0[3],@MSG0[0]
+       sha256rnds2     $ABEF0,$CDGH0           # 12-15
+        movdqa         $TMP1,$Wi
+       movdqa          @MSG0[0],$TMPx
+       palignr         \$4,@MSG0[3],$TMPx
+        sha256rnds2    $ABEF1,$CDGH1           # 12-15
+        sha256msg2     @MSG1[3],@MSG1[0]
+       pshufd          \$0x0e,$TMP0,$Wi
+       paddd           $TMPx,@MSG0[1]
+        movdqa         @MSG1[0],$TMPx
+        palignr        \$4,@MSG1[3],$TMPx
+       sha256msg1      @MSG0[3],@MSG0[2]
+       sha256rnds2     $CDGH0,$ABEF0
+        pshufd         \$0x0e,$TMP1,$Wi
+       movdqa          4*16-0x80($Tbl),$TMP0
+       paddd           @MSG0[0],$TMP0
+        sha256rnds2    $CDGH1,$ABEF1
+        sha256msg1     @MSG1[3],@MSG1[2]
+___
+for($i=4;$i<16-3;$i++) {
+$code.=<<___;
+       movdqa          $TMP0,$Wi
+        movdqa         $i*16-0x80($Tbl),$TMP1
+        paddd          $TMPx,@MSG1[1]
+        paddd          @MSG1[0],$TMP1
+       sha256msg2      @MSG0[0],@MSG0[1]
+       sha256rnds2     $ABEF0,$CDGH0           # 16-19...
+        movdqa         $TMP1,$Wi
+       movdqa          @MSG0[1],$TMPx
+       palignr         \$4,@MSG0[0],$TMPx
+        sha256rnds2    $ABEF1,$CDGH1           # 16-19...
+        sha256msg2     @MSG1[0],@MSG1[1]
+       pshufd          \$0x0e,$TMP0,$Wi
+       paddd           $TMPx,@MSG0[2]
+        movdqa         @MSG1[1],$TMPx
+        palignr        \$4,@MSG1[0],$TMPx
+       sha256msg1      @MSG0[0],@MSG0[3]
+       sha256rnds2     $CDGH0,$ABEF0
+        pshufd         \$0x0e,$TMP1,$Wi
+       movdqa          `($i+1)*16`-0x80($Tbl),$TMP0
+       paddd           @MSG0[1],$TMP0
+        sha256rnds2    $CDGH1,$ABEF1
+        sha256msg1     @MSG1[0],@MSG1[3]
+___
+       push(@MSG0,shift(@MSG0));       push(@MSG1,shift(@MSG1));
+}
+$code.=<<___;
+       movdqa          $TMP0,$Wi
+        movdqa         13*16-0x80($Tbl),$TMP1
+        paddd          $TMPx,@MSG1[1]
+        paddd          @MSG1[0],$TMP1
+       sha256msg2      @MSG0[0],@MSG0[1]
+       sha256rnds2     $ABEF0,$CDGH0           # 52-55
+        movdqa         $TMP1,$Wi
+       movdqa          @MSG0[1],$TMPx
+       palignr         \$4,@MSG0[0],$TMPx
+        sha256rnds2    $ABEF1,$CDGH1           # 52-55
+        sha256msg2     @MSG1[0],@MSG1[1]
+       pshufd          \$0x0e,$TMP0,$Wi
+       paddd           $TMPx,@MSG0[2]
+        movdqa         @MSG1[1],$TMPx
+        palignr        \$4,@MSG1[0],$TMPx
+       nop
+       sha256rnds2     $CDGH0,$ABEF0
+        pshufd         \$0x0e,$TMP1,$Wi
+       movdqa          14*16-0x80($Tbl),$TMP0
+       paddd           @MSG0[1],$TMP0
+        sha256rnds2    $CDGH1,$ABEF1
+
+       movdqa          $TMP0,$Wi
+        movdqa         14*16-0x80($Tbl),$TMP1
+        paddd          $TMPx,@MSG1[2]
+        paddd          @MSG1[1],$TMP1
+       sha256msg2      @MSG0[1],@MSG0[2]
+       nop
+       sha256rnds2     $ABEF0,$CDGH0           # 56-59
+        movdqa         $TMP1,$Wi
+         mov           \$1,%ecx
+         pxor          @MSG0[1],@MSG0[1]       # zero
+        sha256rnds2    $ABEF1,$CDGH1           # 56-59
+        sha256msg2     @MSG1[1],@MSG1[2]
+       pshufd          \$0x0e,$TMP0,$Wi
+       movdqa          15*16-0x80($Tbl),$TMP0
+       paddd           @MSG0[2],$TMP0
+         movq          (%rbx),@MSG0[2]         # pull counters
+         nop
+       sha256rnds2     $CDGH0,$ABEF0
+        pshufd         \$0x0e,$TMP1,$Wi
+        movdqa         15*16-0x80($Tbl),$TMP1
+        paddd          @MSG1[2],$TMP1
+        sha256rnds2    $CDGH1,$ABEF1
+
+       movdqa          $TMP0,$Wi
+         cmp           4*0(%rbx),%ecx          # examine counters
+         cmovge        %rsp,@ptr[0]            # cancel input
+         cmp           4*1(%rbx),%ecx
+         cmovge        %rsp,@ptr[1]
+         pshufd        \$0x00,@MSG0[2],@MSG1[0]
+       sha256rnds2     $ABEF0,$CDGH0           # 60-63
+        movdqa         $TMP1,$Wi
+         pshufd        \$0x55,@MSG0[2],@MSG1[1]
+         movdqa        @MSG0[2],@MSG1[2]
+        sha256rnds2    $ABEF1,$CDGH1           # 60-63
+       pshufd          \$0x0e,$TMP0,$Wi
+         pcmpgtd       @MSG0[1],@MSG1[0]
+         pcmpgtd       @MSG0[1],@MSG1[1]
+       sha256rnds2     $CDGH0,$ABEF0
+        pshufd         \$0x0e,$TMP1,$Wi
+         pcmpgtd       @MSG0[1],@MSG1[2]       # counter mask
+         movdqa        K256_shaext-0x10(%rip),$TMPx
+        sha256rnds2    $CDGH1,$ABEF1
+
+       pand            @MSG1[0],$CDGH0
+        pand           @MSG1[1],$CDGH1
+       pand            @MSG1[0],$ABEF0
+        pand           @MSG1[1],$ABEF1
+       paddd           @MSG0[2],@MSG1[2]       # counters--
+
+       paddd           0x50(%rsp),$CDGH0
+        paddd          0x70(%rsp),$CDGH1
+       paddd           0x40(%rsp),$ABEF0
+        paddd          0x60(%rsp),$ABEF1
+
+       movq            @MSG1[2],(%rbx)         # save counters
+       dec             $num
+       jnz             .Loop_shaext
+
+       mov             `$REG_SZ*17+8`(%rsp),$num
+
+       pshufd          \$0b00011011,$ABEF0,$ABEF0
+       pshufd          \$0b00011011,$CDGH0,$CDGH0
+       pshufd          \$0b00011011,$ABEF1,$ABEF1
+       pshufd          \$0b00011011,$CDGH1,$CDGH1
+
+       movdqa          $ABEF0,@MSG0[0]
+       movdqa          $CDGH0,@MSG0[1]
+       punpckldq       $ABEF1,$ABEF0                   # B1.B0.A1.A0
+       punpckhdq       $ABEF1,@MSG0[0]                 # F1.F0.E1.E0
+       punpckldq       $CDGH1,$CDGH0                   # D1.D0.C1.C0
+       punpckhdq       $CDGH1,@MSG0[1]                 # H1.H0.G1.G0
+
+       movq            $ABEF0,0x00-0x80($ctx)          # A1.A0
+       psrldq          \$8,$ABEF0
+       movq            @MSG0[0],0x80-0x80($ctx)        # E1.E0
+       psrldq          \$8,@MSG0[0]
+       movq            $ABEF0,0x20-0x80($ctx)          # B1.B0
+       movq            @MSG0[0],0xa0-0x80($ctx)        # F1.F0
+
+       movq            $CDGH0,0x40-0x80($ctx)          # C1.C0
+       psrldq          \$8,$CDGH0
+       movq            @MSG0[1],0xc0-0x80($ctx)        # G1.G0
+       psrldq          \$8,@MSG0[1]
+       movq            $CDGH0,0x60-0x80($ctx)          # D1.D0
+       movq            @MSG0[1],0xe0-0x80($ctx)        # H1.H0
+
+       lea     `$REG_SZ/2`($ctx),$ctx
+       lea     `16*2`($inp),$inp
+       dec     $num
+       jnz     .Loop_grande_shaext
+
+.Ldone_shaext:
+       #mov    `$REG_SZ*17`(%rsp),%rax         # original %rsp
+___
+$code.=<<___ if ($win64);
+       movaps  -0xb8(%rax),%xmm6
+       movaps  -0xa8(%rax),%xmm7
+       movaps  -0x98(%rax),%xmm8
+       movaps  -0x88(%rax),%xmm9
+       movaps  -0x78(%rax),%xmm10
+       movaps  -0x68(%rax),%xmm11
+       movaps  -0x58(%rax),%xmm12
+       movaps  -0x48(%rax),%xmm13
+       movaps  -0x38(%rax),%xmm14
+       movaps  -0x28(%rax),%xmm15
+___
+$code.=<<___;
+       mov     -16(%rax),%rbp
+       mov     -8(%rax),%rbx
+       lea     (%rax),%rsp
+.Lepilogue_shaext:
+       ret
+.size  sha256_multi_block_shaext,.-sha256_multi_block_shaext
+___
+                                               }}}
+                                               if ($avx) {{{
+sub ROUND_00_15_avx {
+my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
+
+$code.=<<___ if ($i<15 && $REG_SZ==16);
+       vmovd           `4*$i`(@ptr[0]),$Xi
+       vmovd           `4*$i`(@ptr[1]),$t1
+       vpinsrd         \$1,`4*$i`(@ptr[2]),$Xi,$Xi
+       vpinsrd         \$1,`4*$i`(@ptr[3]),$t1,$t1
+       vpunpckldq      $t1,$Xi,$Xi
+       vpshufb         $Xn,$Xi,$Xi
+___
+$code.=<<___ if ($i==15 && $REG_SZ==16);
+       vmovd           `4*$i`(@ptr[0]),$Xi
+        lea            `16*4`(@ptr[0]),@ptr[0]
+       vmovd           `4*$i`(@ptr[1]),$t1
+        lea            `16*4`(@ptr[1]),@ptr[1]
+       vpinsrd         \$1,`4*$i`(@ptr[2]),$Xi,$Xi
+        lea            `16*4`(@ptr[2]),@ptr[2]
+       vpinsrd         \$1,`4*$i`(@ptr[3]),$t1,$t1
+        lea            `16*4`(@ptr[3]),@ptr[3]
+       vpunpckldq      $t1,$Xi,$Xi
+       vpshufb         $Xn,$Xi,$Xi
+___
+$code.=<<___ if ($i<15 && $REG_SZ==32);
+       vmovd           `4*$i`(@ptr[0]),$Xi
+       vmovd           `4*$i`(@ptr[4]),$t1
+       vmovd           `4*$i`(@ptr[1]),$t2
+       vmovd           `4*$i`(@ptr[5]),$t3
+       vpinsrd         \$1,`4*$i`(@ptr[2]),$Xi,$Xi
+       vpinsrd         \$1,`4*$i`(@ptr[6]),$t1,$t1
+       vpinsrd         \$1,`4*$i`(@ptr[3]),$t2,$t2
+       vpunpckldq      $t2,$Xi,$Xi
+       vpinsrd         \$1,`4*$i`(@ptr[7]),$t3,$t3
+       vpunpckldq      $t3,$t1,$t1
+       vinserti128     $t1,$Xi,$Xi
+       vpshufb         $Xn,$Xi,$Xi
+___
+$code.=<<___ if ($i==15 && $REG_SZ==32);
+       vmovd           `4*$i`(@ptr[0]),$Xi
+        lea            `16*4`(@ptr[0]),@ptr[0]
+       vmovd           `4*$i`(@ptr[4]),$t1
+        lea            `16*4`(@ptr[4]),@ptr[4]
+       vmovd           `4*$i`(@ptr[1]),$t2
+        lea            `16*4`(@ptr[1]),@ptr[1]
+       vmovd           `4*$i`(@ptr[5]),$t3
+        lea            `16*4`(@ptr[5]),@ptr[5]
+       vpinsrd         \$1,`4*$i`(@ptr[2]),$Xi,$Xi
+        lea            `16*4`(@ptr[2]),@ptr[2]
+       vpinsrd         \$1,`4*$i`(@ptr[6]),$t1,$t1
+        lea            `16*4`(@ptr[6]),@ptr[6]
+       vpinsrd         \$1,`4*$i`(@ptr[3]),$t2,$t2
+        lea            `16*4`(@ptr[3]),@ptr[3]
+       vpunpckldq      $t2,$Xi,$Xi
+       vpinsrd         \$1,`4*$i`(@ptr[7]),$t3,$t3
+        lea            `16*4`(@ptr[7]),@ptr[7]
+       vpunpckldq      $t3,$t1,$t1
+       vinserti128     $t1,$Xi,$Xi
+       vpshufb         $Xn,$Xi,$Xi
+___
+$code.=<<___;
+       vpsrld  \$6,$e,$sigma
+       vpslld  \$26,$e,$t3
+       vmovdqu $Xi,`&Xi_off($i)`
+        vpaddd $h,$Xi,$Xi                      # Xi+=h
+
+       vpsrld  \$11,$e,$t2
+       vpxor   $t3,$sigma,$sigma
+       vpslld  \$21,$e,$t3
+        vpaddd `32*($i%8)-128`($Tbl),$Xi,$Xi   # Xi+=K[round]
+       vpxor   $t2,$sigma,$sigma
+
+       vpsrld  \$25,$e,$t2
+       vpxor   $t3,$sigma,$sigma
+        `"prefetcht0   63(@ptr[0])"            if ($i==15)`
+       vpslld  \$7,$e,$t3
+        vpandn $g,$e,$t1
+        vpand  $f,$e,$axb                      # borrow $axb
+        `"prefetcht0   63(@ptr[1])"            if ($i==15)`
+       vpxor   $t2,$sigma,$sigma
+
+       vpsrld  \$2,$a,$h                       # borrow $h
+       vpxor   $t3,$sigma,$sigma               # Sigma1(e)
+        `"prefetcht0   63(@ptr[2])"            if ($i==15)`
+       vpslld  \$30,$a,$t2
+        vpxor  $axb,$t1,$t1                    # Ch(e,f,g)
+        vpxor  $a,$b,$axb                      # a^b, b^c in next round
+        `"prefetcht0   63(@ptr[3])"            if ($i==15)`
+       vpxor   $t2,$h,$h
+       vpaddd  $sigma,$Xi,$Xi                  # Xi+=Sigma1(e)
+
+       vpsrld  \$13,$a,$t2
+        `"prefetcht0   63(@ptr[4])"            if ($i==15 && $REG_SZ==32)`
+       vpslld  \$19,$a,$t3
+        vpaddd $t1,$Xi,$Xi                     # Xi+=Ch(e,f,g)
+        vpand  $axb,$bxc,$bxc
+        `"prefetcht0   63(@ptr[5])"            if ($i==15 && $REG_SZ==32)`
+       vpxor   $t2,$h,$sigma
+
+       vpsrld  \$22,$a,$t2
+       vpxor   $t3,$sigma,$sigma
+        `"prefetcht0   63(@ptr[6])"            if ($i==15 && $REG_SZ==32)`
+       vpslld  \$10,$a,$t3
+        vpxor  $bxc,$b,$h                      # h=Maj(a,b,c)=Ch(a^b,c,b)
+        vpaddd $Xi,$d,$d                       # d+=Xi
+        `"prefetcht0   63(@ptr[7])"            if ($i==15 && $REG_SZ==32)`
+       vpxor   $t2,$sigma,$sigma
+       vpxor   $t3,$sigma,$sigma               # Sigma0(a)
+
+       vpaddd  $Xi,$h,$h                       # h+=Xi
+       vpaddd  $sigma,$h,$h                    # h+=Sigma0(a)
+___
+$code.=<<___ if (($i%8)==7);
+       add     \$`32*8`,$Tbl
+___
+       ($axb,$bxc)=($bxc,$axb);
+}
+
+sub ROUND_16_XX_avx {
+my $i=shift;
+
+$code.=<<___;
+       vmovdqu `&Xi_off($i+1)`,$Xn
+       vpaddd  `&Xi_off($i+9)`,$Xi,$Xi         # Xi+=X[i+9]
+
+       vpsrld  \$3,$Xn,$sigma
+       vpsrld  \$7,$Xn,$t2
+       vpslld  \$25,$Xn,$t3
+       vpxor   $t2,$sigma,$sigma
+       vpsrld  \$18,$Xn,$t2
+       vpxor   $t3,$sigma,$sigma
+       vpslld  \$14,$Xn,$t3
+       vmovdqu `&Xi_off($i+14)`,$t1
+       vpsrld  \$10,$t1,$axb                   # borrow $axb
+
+       vpxor   $t2,$sigma,$sigma
+       vpsrld  \$17,$t1,$t2
+       vpxor   $t3,$sigma,$sigma               # sigma0(X[i+1])
+       vpslld  \$15,$t1,$t3
+        vpaddd $sigma,$Xi,$Xi                  # Xi+=sigma0(e)
+       vpxor   $t2,$axb,$sigma
+       vpsrld  \$19,$t1,$t2
+       vpxor   $t3,$sigma,$sigma
+       vpslld  \$13,$t1,$t3
+       vpxor   $t2,$sigma,$sigma
+       vpxor   $t3,$sigma,$sigma               # sigma0(X[i+14])
+       vpaddd  $sigma,$Xi,$Xi                  # Xi+=sigma1(X[i+14])
+___
+       &ROUND_00_15_avx($i,@_);
+       ($Xi,$Xn)=($Xn,$Xi);
+}
+
+$code.=<<___;
+.type  sha256_multi_block_avx,\@function,3
+.align 32
+sha256_multi_block_avx:
+_avx_shortcut:
+___
+$code.=<<___ if ($avx>1);
+       shr     \$32,%rcx
+       cmp     \$2,$num
+       jb      .Lavx
+       test    \$`1<<5`,%ecx
+       jnz     _avx2_shortcut
+       jmp     .Lavx
+.align 32
+.Lavx:
+___
+$code.=<<___;
+       mov     %rsp,%rax
+       push    %rbx
+       push    %rbp
+___
+$code.=<<___ if ($win64);
+       lea     -0xa8(%rsp),%rsp
+       movaps  %xmm6,(%rsp)
+       movaps  %xmm7,0x10(%rsp)
+       movaps  %xmm8,0x20(%rsp)
+       movaps  %xmm9,0x30(%rsp)
+       movaps  %xmm10,-0x78(%rax)
+       movaps  %xmm11,-0x68(%rax)
+       movaps  %xmm12,-0x58(%rax)
+       movaps  %xmm13,-0x48(%rax)
+       movaps  %xmm14,-0x38(%rax)
+       movaps  %xmm15,-0x28(%rax)
+___
+$code.=<<___;
+       sub     \$`$REG_SZ*18`, %rsp
+       and     \$-256,%rsp
+       mov     %rax,`$REG_SZ*17`(%rsp)         # original %rsp
+.Lbody_avx:
+       lea     K256+128(%rip),$Tbl
+       lea     `$REG_SZ*16`(%rsp),%rbx
+       lea     0x80($ctx),$ctx                 # size optimization
+
+.Loop_grande_avx:
+       mov     $num,`$REG_SZ*17+8`(%rsp)       # original $num
+       xor     $num,$num
+___
+for($i=0;$i<4;$i++) {
+    $code.=<<___;
+       mov     `16*$i+0`($inp),@ptr[$i]        # input pointer
+       mov     `16*$i+8`($inp),%ecx            # number of blocks
+       cmp     $num,%ecx
+       cmovg   %ecx,$num                       # find maximum
+       test    %ecx,%ecx
+       mov     %ecx,`4*$i`(%rbx)               # initialize counters
+       cmovle  $Tbl,@ptr[$i]                   # cancel input
+___
+}
+$code.=<<___;
+       test    $num,$num
+       jz      .Ldone_avx
+
+       vmovdqu 0x00-0x80($ctx),$A              # load context
+        lea    128(%rsp),%rax
+       vmovdqu 0x20-0x80($ctx),$B
+       vmovdqu 0x40-0x80($ctx),$C
+       vmovdqu 0x60-0x80($ctx),$D
+       vmovdqu 0x80-0x80($ctx),$E
+       vmovdqu 0xa0-0x80($ctx),$F
+       vmovdqu 0xc0-0x80($ctx),$G
+       vmovdqu 0xe0-0x80($ctx),$H
+       vmovdqu .Lpbswap(%rip),$Xn
+       jmp     .Loop_avx
+
+.align 32
+.Loop_avx:
+       vpxor   $B,$C,$bxc                      # magic seed
+___
+for($i=0;$i<16;$i++)   { &ROUND_00_15_avx($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+       vmovdqu `&Xi_off($i)`,$Xi
+       mov     \$3,%ecx
+       jmp     .Loop_16_xx_avx
+.align 32
+.Loop_16_xx_avx:
+___
+for(;$i<32;$i++)       { &ROUND_16_XX_avx($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+       dec     %ecx
+       jnz     .Loop_16_xx_avx
+
+       mov     \$1,%ecx
+       lea     K256+128(%rip),$Tbl
+___
+for($i=0;$i<4;$i++) {
+    $code.=<<___;
+       cmp     `4*$i`(%rbx),%ecx               # examine counters
+       cmovge  $Tbl,@ptr[$i]                   # cancel input
+___
+}
+$code.=<<___;
+       vmovdqa (%rbx),$sigma                   # pull counters
+       vpxor   $t1,$t1,$t1
+       vmovdqa $sigma,$Xn
+       vpcmpgtd $t1,$Xn,$Xn                    # mask value
+       vpaddd  $Xn,$sigma,$sigma               # counters--
+
+       vmovdqu 0x00-0x80($ctx),$t1
+       vpand   $Xn,$A,$A
+       vmovdqu 0x20-0x80($ctx),$t2
+       vpand   $Xn,$B,$B
+       vmovdqu 0x40-0x80($ctx),$t3
+       vpand   $Xn,$C,$C
+       vmovdqu 0x60-0x80($ctx),$Xi
+       vpand   $Xn,$D,$D
+       vpaddd  $t1,$A,$A
+       vmovdqu 0x80-0x80($ctx),$t1
+       vpand   $Xn,$E,$E
+       vpaddd  $t2,$B,$B
+       vmovdqu 0xa0-0x80($ctx),$t2
+       vpand   $Xn,$F,$F
+       vpaddd  $t3,$C,$C
+       vmovdqu 0xc0-0x80($ctx),$t3
+       vpand   $Xn,$G,$G
+       vpaddd  $Xi,$D,$D
+       vmovdqu 0xe0-0x80($ctx),$Xi
+       vpand   $Xn,$H,$H
+       vpaddd  $t1,$E,$E
+       vpaddd  $t2,$F,$F
+       vmovdqu $A,0x00-0x80($ctx)
+       vpaddd  $t3,$G,$G
+       vmovdqu $B,0x20-0x80($ctx)
+       vpaddd  $Xi,$H,$H
+       vmovdqu $C,0x40-0x80($ctx)
+       vmovdqu $D,0x60-0x80($ctx)
+       vmovdqu $E,0x80-0x80($ctx)
+       vmovdqu $F,0xa0-0x80($ctx)
+       vmovdqu $G,0xc0-0x80($ctx)
+       vmovdqu $H,0xe0-0x80($ctx)
+
+       vmovdqu $sigma,(%rbx)                   # save counters
+       vmovdqu .Lpbswap(%rip),$Xn
+       dec     $num
+       jnz     .Loop_avx
+
+       mov     `$REG_SZ*17+8`(%rsp),$num
+       lea     $REG_SZ($ctx),$ctx
+       lea     `16*$REG_SZ/4`($inp),$inp
+       dec     $num
+       jnz     .Loop_grande_avx
+
+.Ldone_avx:
+       mov     `$REG_SZ*17`(%rsp),%rax         # orignal %rsp
+       vzeroupper
+___
+$code.=<<___ if ($win64);
+       movaps  -0xb8(%rax),%xmm6
+       movaps  -0xa8(%rax),%xmm7
+       movaps  -0x98(%rax),%xmm8
+       movaps  -0x88(%rax),%xmm9
+       movaps  -0x78(%rax),%xmm10
+       movaps  -0x68(%rax),%xmm11
+       movaps  -0x58(%rax),%xmm12
+       movaps  -0x48(%rax),%xmm13
+       movaps  -0x38(%rax),%xmm14
+       movaps  -0x28(%rax),%xmm15
+___
+$code.=<<___;
+       mov     -16(%rax),%rbp
+       mov     -8(%rax),%rbx
+       lea     (%rax),%rsp
+.Lepilogue_avx:
+       ret
+.size  sha256_multi_block_avx,.-sha256_multi_block_avx
+___
+                                               if ($avx>1) {
+$code =~ s/\`([^\`]*)\`/eval $1/gem;
+
+$REG_SZ=32;
+@ptr=map("%r$_",(12..15,8..11));
+
+@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("%ymm$_",(8..15));
+($t1,$t2,$t3,$axb,$bxc,$Xi,$Xn,$sigma)=map("%ymm$_",(0..7));
+
+$code.=<<___;
+.type  sha256_multi_block_avx2,\@function,3
+.align 32
+sha256_multi_block_avx2:
+_avx2_shortcut:
+       mov     %rsp,%rax
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+___
+$code.=<<___ if ($win64);
+       lea     -0xa8(%rsp),%rsp
+       movaps  %xmm6,(%rsp)
+       movaps  %xmm7,0x10(%rsp)
+       movaps  %xmm8,0x20(%rsp)
+       movaps  %xmm9,0x30(%rsp)
+       movaps  %xmm10,0x40(%rsp)
+       movaps  %xmm11,0x50(%rsp)
+       movaps  %xmm12,-0x78(%rax)
+       movaps  %xmm13,-0x68(%rax)
+       movaps  %xmm14,-0x58(%rax)
+       movaps  %xmm15,-0x48(%rax)
+___
+$code.=<<___;
+       sub     \$`$REG_SZ*18`, %rsp
+       and     \$-256,%rsp
+       mov     %rax,`$REG_SZ*17`(%rsp)         # original %rsp
+.Lbody_avx2:
+       lea     K256+128(%rip),$Tbl
+       lea     0x80($ctx),$ctx                 # size optimization
+
+.Loop_grande_avx2:
+       mov     $num,`$REG_SZ*17+8`(%rsp)       # original $num
+       xor     $num,$num
+       lea     `$REG_SZ*16`(%rsp),%rbx
+___
+for($i=0;$i<8;$i++) {
+    $code.=<<___;
+       mov     `16*$i+0`($inp),@ptr[$i]        # input pointer
+       mov     `16*$i+8`($inp),%ecx            # number of blocks
+       cmp     $num,%ecx
+       cmovg   %ecx,$num                       # find maximum
+       test    %ecx,%ecx
+       mov     %ecx,`4*$i`(%rbx)               # initialize counters
+       cmovle  $Tbl,@ptr[$i]                   # cancel input
+___
+}
+$code.=<<___;
+       vmovdqu 0x00-0x80($ctx),$A              # load context
+        lea    128(%rsp),%rax
+       vmovdqu 0x20-0x80($ctx),$B
+        lea    256+128(%rsp),%rbx
+       vmovdqu 0x40-0x80($ctx),$C
+       vmovdqu 0x60-0x80($ctx),$D
+       vmovdqu 0x80-0x80($ctx),$E
+       vmovdqu 0xa0-0x80($ctx),$F
+       vmovdqu 0xc0-0x80($ctx),$G
+       vmovdqu 0xe0-0x80($ctx),$H
+       vmovdqu .Lpbswap(%rip),$Xn
+       jmp     .Loop_avx2
+
+.align 32
+.Loop_avx2:
+       vpxor   $B,$C,$bxc                      # magic seed
+___
+for($i=0;$i<16;$i++)   { &ROUND_00_15_avx($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+       vmovdqu `&Xi_off($i)`,$Xi
+       mov     \$3,%ecx
+       jmp     .Loop_16_xx_avx2
+.align 32
+.Loop_16_xx_avx2:
+___
+for(;$i<32;$i++)       { &ROUND_16_XX_avx($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+       dec     %ecx
+       jnz     .Loop_16_xx_avx2
+
+       mov     \$1,%ecx
+       lea     `$REG_SZ*16`(%rsp),%rbx
+       lea     K256+128(%rip),$Tbl
+___
+for($i=0;$i<8;$i++) {
+    $code.=<<___;
+       cmp     `4*$i`(%rbx),%ecx               # examine counters
+       cmovge  $Tbl,@ptr[$i]                   # cancel input
+___
+}
+$code.=<<___;
+       vmovdqa (%rbx),$sigma                   # pull counters
+       vpxor   $t1,$t1,$t1
+       vmovdqa $sigma,$Xn
+       vpcmpgtd $t1,$Xn,$Xn                    # mask value
+       vpaddd  $Xn,$sigma,$sigma               # counters--
+
+       vmovdqu 0x00-0x80($ctx),$t1
+       vpand   $Xn,$A,$A
+       vmovdqu 0x20-0x80($ctx),$t2
+       vpand   $Xn,$B,$B
+       vmovdqu 0x40-0x80($ctx),$t3
+       vpand   $Xn,$C,$C
+       vmovdqu 0x60-0x80($ctx),$Xi
+       vpand   $Xn,$D,$D
+       vpaddd  $t1,$A,$A
+       vmovdqu 0x80-0x80($ctx),$t1
+       vpand   $Xn,$E,$E
+       vpaddd  $t2,$B,$B
+       vmovdqu 0xa0-0x80($ctx),$t2
+       vpand   $Xn,$F,$F
+       vpaddd  $t3,$C,$C
+       vmovdqu 0xc0-0x80($ctx),$t3
+       vpand   $Xn,$G,$G
+       vpaddd  $Xi,$D,$D
+       vmovdqu 0xe0-0x80($ctx),$Xi
+       vpand   $Xn,$H,$H
+       vpaddd  $t1,$E,$E
+       vpaddd  $t2,$F,$F
+       vmovdqu $A,0x00-0x80($ctx)
+       vpaddd  $t3,$G,$G
+       vmovdqu $B,0x20-0x80($ctx)
+       vpaddd  $Xi,$H,$H
+       vmovdqu $C,0x40-0x80($ctx)
+       vmovdqu $D,0x60-0x80($ctx)
+       vmovdqu $E,0x80-0x80($ctx)
+       vmovdqu $F,0xa0-0x80($ctx)
+       vmovdqu $G,0xc0-0x80($ctx)
+       vmovdqu $H,0xe0-0x80($ctx)
+
+       vmovdqu $sigma,(%rbx)                   # save counters
+       lea     256+128(%rsp),%rbx
+       vmovdqu .Lpbswap(%rip),$Xn
+       dec     $num
+       jnz     .Loop_avx2
+
+       #mov    `$REG_SZ*17+8`(%rsp),$num
+       #lea    $REG_SZ($ctx),$ctx
+       #lea    `16*$REG_SZ/4`($inp),$inp
+       #dec    $num
+       #jnz    .Loop_grande_avx2
+
+.Ldone_avx2:
+       mov     `$REG_SZ*17`(%rsp),%rax         # orignal %rsp
+       vzeroupper
+___
+$code.=<<___ if ($win64);
+       movaps  -0xd8(%rax),%xmm6
+       movaps  -0xc8(%rax),%xmm7
+       movaps  -0xb8(%rax),%xmm8
+       movaps  -0xa8(%rax),%xmm9
+       movaps  -0x98(%rax),%xmm10
+       movaps  -0x88(%rax),%xmm11
+       movaps  -0x78(%rax),%xmm12
+       movaps  -0x68(%rax),%xmm13
+       movaps  -0x58(%rax),%xmm14
+       movaps  -0x48(%rax),%xmm15
+___
+$code.=<<___;
+       mov     -48(%rax),%r15
+       mov     -40(%rax),%r14
+       mov     -32(%rax),%r13
+       mov     -24(%rax),%r12
+       mov     -16(%rax),%rbp
+       mov     -8(%rax),%rbx
+       lea     (%rax),%rsp
+.Lepilogue_avx2:
+       ret
+.size  sha256_multi_block_avx2,.-sha256_multi_block_avx2
+___
+                                       }       }}}
+$code.=<<___;
+.align 256
+K256:
+___
+sub TABLE {
+    foreach (@_) {
+       $code.=<<___;
+       .long   $_,$_,$_,$_
+       .long   $_,$_,$_,$_
+___
+    }
+}
+&TABLE(        0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5,
+       0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5,
+       0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,
+       0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174,
+       0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc,
+       0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da,
+       0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7,
+       0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967,
+       0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13,
+       0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85,
+       0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3,
+       0xd192e819,0xd6990624,0xf40e3585,0x106aa070,
+       0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5,
+       0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3,
+       0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208,
+       0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 );
+$code.=<<___;
+.Lpbswap:
+       .long   0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f     # pbswap
+       .long   0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f     # pbswap
+K256_shaext:
+       .long   0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+       .long   0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+       .long   0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+       .long   0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+       .long   0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+       .long   0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+       .long   0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+       .long   0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+       .long   0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+       .long   0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+       .long   0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+       .long   0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+       .long   0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+       .long   0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+       .long   0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+       .long   0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+       .asciz  "SHA256 multi-block transform for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
+___
+
+if ($win64) {
+# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
+#              CONTEXT *context,DISPATCHER_CONTEXT *disp)
+$rec="%rcx";
+$frame="%rdx";
+$context="%r8";
+$disp="%r9";
+
+$code.=<<___;
+.extern        __imp_RtlVirtualUnwind
+.type  se_handler,\@abi-omnipotent
+.align 16
+se_handler:
+       push    %rsi
+       push    %rdi
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+       pushfq
+       sub     \$64,%rsp
+
+       mov     120($context),%rax      # pull context->Rax
+       mov     248($context),%rbx      # pull context->Rip
+
+       mov     8($disp),%rsi           # disp->ImageBase
+       mov     56($disp),%r11          # disp->HandlerData
+
+       mov     0(%r11),%r10d           # HandlerData[0]
+       lea     (%rsi,%r10),%r10        # end of prologue label
+       cmp     %r10,%rbx               # context->Rip<.Lbody
+       jb      .Lin_prologue
+
+       mov     152($context),%rax      # pull context->Rsp
+
+       mov     4(%r11),%r10d           # HandlerData[1]
+       lea     (%rsi,%r10),%r10        # epilogue label
+       cmp     %r10,%rbx               # context->Rip>=.Lepilogue
+       jae     .Lin_prologue
+
+       mov     `16*17`(%rax),%rax      # pull saved stack pointer
+
+       mov     -8(%rax),%rbx
+       mov     -16(%rax),%rbp
+       mov     %rbx,144($context)      # restore context->Rbx
+       mov     %rbp,160($context)      # restore context->Rbp
+
+       lea     -24-10*16(%rax),%rsi
+       lea     512($context),%rdi      # &context.Xmm6
+       mov     \$20,%ecx
+       .long   0xa548f3fc              # cld; rep movsq
+
+.Lin_prologue:
+       mov     8(%rax),%rdi
+       mov     16(%rax),%rsi
+       mov     %rax,152($context)      # restore context->Rsp
+       mov     %rsi,168($context)      # restore context->Rsi
+       mov     %rdi,176($context)      # restore context->Rdi
+
+       mov     40($disp),%rdi          # disp->ContextRecord
+       mov     $context,%rsi           # context
+       mov     \$154,%ecx              # sizeof(CONTEXT)
+       .long   0xa548f3fc              # cld; rep movsq
+
+       mov     $disp,%rsi
+       xor     %rcx,%rcx               # arg1, UNW_FLAG_NHANDLER
+       mov     8(%rsi),%rdx            # arg2, disp->ImageBase
+       mov     0(%rsi),%r8             # arg3, disp->ControlPc
+       mov     16(%rsi),%r9            # arg4, disp->FunctionEntry
+       mov     40(%rsi),%r10           # disp->ContextRecord
+       lea     56(%rsi),%r11           # &disp->HandlerData
+       lea     24(%rsi),%r12           # &disp->EstablisherFrame
+       mov     %r10,32(%rsp)           # arg5
+       mov     %r11,40(%rsp)           # arg6
+       mov     %r12,48(%rsp)           # arg7
+       mov     %rcx,56(%rsp)           # arg8, (NULL)
+       call    *__imp_RtlVirtualUnwind(%rip)
+
+       mov     \$1,%eax                # ExceptionContinueSearch
+       add     \$64,%rsp
+       popfq
+       pop     %r15
+       pop     %r14
+       pop     %r13
+       pop     %r12
+       pop     %rbp
+       pop     %rbx
+       pop     %rdi
+       pop     %rsi
+       ret
+.size  se_handler,.-se_handler
+___
+$code.=<<___ if ($avx>1);
+.type  avx2_handler,\@abi-omnipotent
+.align 16
+avx2_handler:
+       push    %rsi
+       push    %rdi
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+       pushfq
+       sub     \$64,%rsp
+
+       mov     120($context),%rax      # pull context->Rax
+       mov     248($context),%rbx      # pull context->Rip
+
+       mov     8($disp),%rsi           # disp->ImageBase
+       mov     56($disp),%r11          # disp->HandlerData
+
+       mov     0(%r11),%r10d           # HandlerData[0]
+       lea     (%rsi,%r10),%r10        # end of prologue label
+       cmp     %r10,%rbx               # context->Rip<body label
+       jb      .Lin_prologue
+
+       mov     152($context),%rax      # pull context->Rsp
+
+       mov     4(%r11),%r10d           # HandlerData[1]
+       lea     (%rsi,%r10),%r10        # epilogue label
+       cmp     %r10,%rbx               # context->Rip>=epilogue label
+       jae     .Lin_prologue
+
+       mov     `32*17`($context),%rax  # pull saved stack pointer
+
+       mov     -8(%rax),%rbx
+       mov     -16(%rax),%rbp
+       mov     -24(%rax),%r12
+       mov     -32(%rax),%r13
+       mov     -40(%rax),%r14
+       mov     -48(%rax),%r15
+       mov     %rbx,144($context)      # restore context->Rbx
+       mov     %rbp,160($context)      # restore context->Rbp
+       mov     %r12,216($context)      # restore cotnext->R12
+       mov     %r13,224($context)      # restore cotnext->R13
+       mov     %r14,232($context)      # restore cotnext->R14
+       mov     %r15,240($context)      # restore cotnext->R15
+
+       lea     -56-10*16(%rax),%rsi
+       lea     512($context),%rdi      # &context.Xmm6
+       mov     \$20,%ecx
+       .long   0xa548f3fc              # cld; rep movsq
+
+       jmp     .Lin_prologue
+.size  avx2_handler,.-avx2_handler
+___
+$code.=<<___;
+.section       .pdata
+.align 4
+       .rva    .LSEH_begin_sha256_multi_block
+       .rva    .LSEH_end_sha256_multi_block
+       .rva    .LSEH_info_sha256_multi_block
+       .rva    .LSEH_begin_sha256_multi_block_shaext
+       .rva    .LSEH_end_sha256_multi_block_shaext
+       .rva    .LSEH_info_sha256_multi_block_shaext
+___
+$code.=<<___ if ($avx);
+       .rva    .LSEH_begin_sha256_multi_block_avx
+       .rva    .LSEH_end_sha256_multi_block_avx
+       .rva    .LSEH_info_sha256_multi_block_avx
+___
+$code.=<<___ if ($avx>1);
+       .rva    .LSEH_begin_sha256_multi_block_avx2
+       .rva    .LSEH_end_sha256_multi_block_avx2
+       .rva    .LSEH_info_sha256_multi_block_avx2
+___
+$code.=<<___;
+.section       .xdata
+.align 8
+.LSEH_info_sha256_multi_block:
+       .byte   9,0,0,0
+       .rva    se_handler
+       .rva    .Lbody,.Lepilogue                       # HandlerData[]
+.LSEH_info_sha256_multi_block_shaext:
+       .byte   9,0,0,0
+       .rva    se_handler
+       .rva    .Lbody_shaext,.Lepilogue_shaext         # HandlerData[]
+___
+$code.=<<___ if ($avx);
+.LSEH_info_sha256_multi_block_avx:
+       .byte   9,0,0,0
+       .rva    se_handler
+       .rva    .Lbody_avx,.Lepilogue_avx               # HandlerData[]
+___
+$code.=<<___ if ($avx>1);
+.LSEH_info_sha256_multi_block_avx2:
+       .byte   9,0,0,0
+       .rva    avx2_handler
+       .rva    .Lbody_avx2,.Lepilogue_avx2             # HandlerData[]
+___
+}
+####################################################################
+
+sub rex {
+  local *opcode=shift;
+  my ($dst,$src)=@_;
+  my $rex=0;
+
+    $rex|=0x04                 if ($dst>=8);
+    $rex|=0x01                 if ($src>=8);
+    unshift @opcode,$rex|0x40  if ($rex);
+}
+
+sub sha256op38 {
+    my $instr = shift;
+    my %opcodelet = (
+               "sha256rnds2" => 0xcb,
+               "sha256msg1"  => 0xcc,
+               "sha256msg2"  => 0xcd   );
+
+    if (defined($opcodelet{$instr}) && @_[0] =~ /%xmm([0-9]+),\s*%xmm([0-9]+)/) {
+      my @opcode=(0x0f,0x38);
+       rex(\@opcode,$2,$1);
+       push @opcode,$opcodelet{$instr};
+       push @opcode,0xc0|($1&7)|(($2&7)<<3);           # ModR/M
+       return ".byte\t".join(',',@opcode);
+    } else {
+       return $instr."\t".@_[0];
+    }
+}
+
+foreach (split("\n",$code)) {
+       s/\`([^\`]*)\`/eval($1)/ge;
+
+       s/\b(sha256[^\s]*)\s+(.*)/sha256op38($1,$2)/geo         or
+
+       s/\b(vmov[dq])\b(.+)%ymm([0-9]+)/$1$2%xmm$3/go          or
+       s/\b(vmovdqu)\b(.+)%x%ymm([0-9]+)/$1$2%xmm$3/go         or
+       s/\b(vpinsr[qd])\b(.+)%ymm([0-9]+),%ymm([0-9]+)/$1$2%xmm$3,%xmm$4/go    or
+       s/\b(vpextr[qd])\b(.+)%ymm([0-9]+)/$1$2%xmm$3/go        or
+       s/\b(vinserti128)\b(\s+)%ymm/$1$2\$1,%xmm/go            or
+       s/\b(vpbroadcast[qd]\s+)%ymm([0-9]+)/$1%xmm$2/go;
+
+       print $_,"\n";
+}
+
+close STDOUT;
index 7eab6a5..e96ec00 100644 (file)
@@ -1,7 +1,7 @@
 #!/usr/bin/env perl
 #
 # ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
 # details see http://www.openssl.org/~appro/cryptogams/.
@@ -9,17 +9,31 @@
 #
 # SHA512 block transform for x86. September 2007.
 #
+# May 2013.
+#
+# Add SSSE3 code path, 20-25% improvement [over original SSE2 code].
+#
 # Performance in clock cycles per processed byte (less is better):
 #
-#              Pentium PIII    P4      AMD K8  Core2
-# gcc          100     75      116     54      66
-# icc          97      77      95      55      57
-# x86 asm      61      56      82      36      40
-# SSE2 asm     -       -       38      24      20
-# x86_64 asm(*)        -       -       30      10.0    10.5
+#              gcc     icc     x86 asm SIMD(*) x86_64(**)
+# Pentium      100     97      61      -       -
+# PIII         75      77      56      -       -
+# P4           116     95      82      34.6    30.8
+# AMD K8       54      55      36      20.7    9.57
+# Core2                66      57      40      15.9    9.97
+# Westmere     70      -       38      12.2    9.58
+# Sandy Bridge 58      -       35      11.9    11.2
+# Ivy Bridge   50      -       33      11.5    8.17
+# Haswell      46      -       29      11.3    7.66
+# Bulldozer    121     -       50      14.0    13.5
+# VIA Nano     91      -       52      33      14.7
+# Atom         126     -       68      48(***) 14.7
+# Silvermont   97      -       58      42(***) 17.5
 #
-# (*) x86_64 assembler performance is presented for reference
-#     purposes.
+# (*)  whichever best applicable.
+# (**) x86_64 assembler performance is presented for reference
+#      purposes, the results are for integer-only code.
+# (***)        paddq is increadibly slow on Atom.
 #
 # IALU code-path is optimized for elder Pentiums. On vanilla Pentium
 # performance improvement over compiler generated code reaches ~60%,
@@ -66,72 +80,77 @@ $Hsse2=&QWP(56,"esp");
 $A="mm0";      # B-D and
 $E="mm4";      # F-H are commonly loaded to respectively mm1-mm3 and
                # mm5-mm7, but it's done on on-demand basis...
+$BxC="mm2";    # ... except for B^C
 
 sub BODY_00_15_sse2 {
-    my $prefetch=shift;
+    my $phase=shift;
 
-       &movq   ("mm5",$Fsse2);                 # load f
-       &movq   ("mm6",$Gsse2);                 # load g
-       &movq   ("mm7",$Hsse2);                 # load h
+       #&movq  ("mm5",$Fsse2);                 # load f
+       #&movq  ("mm6",$Gsse2);                 # load g
 
        &movq   ("mm1",$E);                     # %mm1 is sliding right
-       &movq   ("mm2",$E);                     # %mm2 is sliding left
+        &pxor  ("mm5","mm6");                  # f^=g
        &psrlq  ("mm1",14);
-       &movq   ($Esse2,$E);                    # modulo-scheduled save e
-       &psllq  ("mm2",23);
+        &movq  ($Esse2,$E);                    # modulo-scheduled save e
+        &pand  ("mm5",$E);                     # f&=e
+       &psllq  ($E,23);                        # $E is sliding left
+        &movq  ($A,"mm3")                      if ($phase<2);
+        &movq  (&QWP(8*9,"esp"),"mm7")         # save X[i]
        &movq   ("mm3","mm1");                  # %mm3 is T1
-       &psrlq  ("mm1",4);
-       &pxor   ("mm3","mm2");
-       &psllq  ("mm2",23);
+        &psrlq ("mm1",4);
+        &pxor  ("mm5","mm6");                  # Ch(e,f,g)
+       &pxor   ("mm3",$E);
+        &psllq ($E,23);
        &pxor   ("mm3","mm1");
-       &psrlq  ("mm1",23);
-       &pxor   ("mm3","mm2");
-       &psllq  ("mm2",4);
+        &movq  ($Asse2,$A);                    # modulo-scheduled save a
+        &paddq ("mm7","mm5");                  # X[i]+=Ch(e,f,g)
+       &pxor   ("mm3",$E);
+        &psrlq ("mm1",23);
+        &paddq ("mm7",$Hsse2);                 # X[i]+=h
        &pxor   ("mm3","mm1");
-       &paddq  ("mm7",QWP(0,$K512));           # h+=K512[i]
-       &pxor   ("mm3","mm2");                  # T1=Sigma1_512(e)
-
-       &pxor   ("mm5","mm6");                  # f^=g
+        &psllq ($E,4);
+        &paddq ("mm7",QWP(0,$K512));           # X[i]+=K512[i]
+       &pxor   ("mm3",$E);                     # T1=Sigma1_512(e)
+
+        &movq  ($E,$Dsse2);                    # e = load d, e in next round
+       &paddq  ("mm3","mm7");                  # T1+=X[i]
+        &movq  ("mm5",$A);                     # %mm5 is sliding right
+        &psrlq ("mm5",28);
+       &paddq  ($E,"mm3");                     # d += T1
+        &movq  ("mm6",$A);                     # %mm6 is sliding left
+        &movq  ("mm7","mm5");
+        &psllq ("mm6",25);
        &movq   ("mm1",$Bsse2);                 # load b
-       &pand   ("mm5",$E);                     # f&=e
-       &movq   ("mm2",$Csse2);                 # load c
-       &pxor   ("mm5","mm6");                  # f^=g
-       &movq   ($E,$Dsse2);                    # e = load d
-       &paddq  ("mm3","mm5");                  # T1+=Ch(e,f,g)
-       &movq   (&QWP(0,"esp"),$A);             # modulo-scheduled save a
-       &paddq  ("mm3","mm7");                  # T1+=h
-
-       &movq   ("mm5",$A);                     # %mm5 is sliding right
-       &movq   ("mm6",$A);                     # %mm6 is sliding left
-       &paddq  ("mm3",&QWP(8*9,"esp"));        # T1+=X[0]
-       &psrlq  ("mm5",28);
-       &paddq  ($E,"mm3");                     # e += T1
-       &psllq  ("mm6",25);
-       &movq   ("mm7","mm5");                  # %mm7 is T2
-       &psrlq  ("mm5",6);
-       &pxor   ("mm7","mm6");
-       &psllq  ("mm6",5);
-       &pxor   ("mm7","mm5");
-       &psrlq  ("mm5",5);
-       &pxor   ("mm7","mm6");
-       &psllq  ("mm6",6);
-       &pxor   ("mm7","mm5");
+        &psrlq ("mm5",6);
+        &pxor  ("mm7","mm6");
        &sub    ("esp",8);
-       &pxor   ("mm7","mm6");                  # T2=Sigma0_512(a)
-
-       &movq   ("mm5",$A);                     # %mm5=a
-       &por    ($A,"mm2");                     # a=a|c
-       &movq   ("mm6",&QWP(8*(9+16-14),"esp")) if ($prefetch);
-       &pand   ("mm5","mm2");                  # %mm5=a&c
-       &pand   ($A,"mm1");                     # a=(a|c)&b
-       &movq   ("mm2",&QWP(8*(9+16-1),"esp"))  if ($prefetch);
-       &por    ("mm5",$A);                     # %mm5=(a&c)|((a|c)&b)
-       &paddq  ("mm7","mm5");                  # T2+=Maj(a,b,c)
-       &movq   ($A,"mm3");                     # a=T1
-
-       &mov    (&LB("edx"),&BP(0,$K512));
-       &paddq  ($A,"mm7");                     # a+=T2
-       &add    ($K512,8);
+        &psllq ("mm6",5);
+        &pxor  ("mm7","mm5");
+       &pxor   ($A,"mm1");                     # a^b, b^c in next round
+        &psrlq ("mm5",5);
+        &pxor  ("mm7","mm6");
+       &pand   ($BxC,$A);                      # (b^c)&(a^b)
+        &psllq ("mm6",6);
+        &pxor  ("mm7","mm5");
+       &pxor   ($BxC,"mm1");                   # [h=]Maj(a,b,c)
+        &pxor  ("mm6","mm7");                  # Sigma0_512(a)
+        &movq  ("mm7",&QWP(8*(9+16-1),"esp"))  if ($phase!=0); # pre-fetch
+        &movq  ("mm5",$Fsse2)                  if ($phase==0); # load f
+
+    if ($phase>1) {
+       &paddq  ($BxC,"mm6");                   # h+=Sigma0(a)
+        &add   ($K512,8);
+       #&paddq ($BxC,"mm3");                   # h+=T1
+
+       ($A,$BxC) = ($BxC,$A);                  # rotate registers
+    } else {
+       &paddq  ("mm3",$BxC);                   # T1+=Maj(a,b,c)
+        &movq  ($BxC,$A);
+        &add   ($K512,8);
+       &paddq  ("mm3","mm6");                  # T1+=Sigma0(a)
+        &movq  ("mm6",$Gsse2)                  if ($phase==0); # load g
+       #&movq  ($A,"mm3");                     # h=T1
+    }
 }
 
 sub BODY_00_15_x86 {
@@ -284,110 +303,357 @@ sub BODY_00_15_x86 {
 
 if ($sse2) {
        &picmeup("edx","OPENSSL_ia32cap_P",$K512,&label("K512"));
-       &bt     (&DWP(0,"edx"),26);
-       &jnc    (&label("loop_x86"));
+       &mov    ("ecx",&DWP(0,"edx"));
+       &test   ("ecx",1<<26);
+       &jz     (&label("loop_x86"));
+
+       &mov    ("edx",&DWP(4,"edx"));
 
        # load ctx->h[0-7]
        &movq   ($A,&QWP(0,"esi"));
+        &and   ("ecx",1<<24);          # XMM registers availability
        &movq   ("mm1",&QWP(8,"esi"));
-       &movq   ("mm2",&QWP(16,"esi"));
+        &and   ("edx",1<<9);           # SSSE3 bit
+       &movq   ($BxC,&QWP(16,"esi"));
+        &or    ("ecx","edx");
        &movq   ("mm3",&QWP(24,"esi"));
        &movq   ($E,&QWP(32,"esi"));
        &movq   ("mm5",&QWP(40,"esi"));
        &movq   ("mm6",&QWP(48,"esi"));
        &movq   ("mm7",&QWP(56,"esi"));
+       &cmp    ("ecx",1<<24|1<<9);
+       &je     (&label("SSSE3"));
        &sub    ("esp",8*10);
+       &jmp    (&label("loop_sse2"));
 
 &set_label("loop_sse2",16);
-       # &movq ($Asse2,$A);
+       #&movq  ($Asse2,$A);
        &movq   ($Bsse2,"mm1");
-       &movq   ($Csse2,"mm2");
+       &movq   ($Csse2,$BxC);
        &movq   ($Dsse2,"mm3");
-       # &movq ($Esse2,$E);
+       #&movq  ($Esse2,$E);
        &movq   ($Fsse2,"mm5");
        &movq   ($Gsse2,"mm6");
+       &pxor   ($BxC,"mm1");                   # magic
        &movq   ($Hsse2,"mm7");
+       &movq   ("mm3",$A);                     # magic
 
-       &mov    ("ecx",&DWP(0,"edi"));
-       &mov    ("edx",&DWP(4,"edi"));
+       &mov    ("eax",&DWP(0,"edi"));
+       &mov    ("ebx",&DWP(4,"edi"));
        &add    ("edi",8);
-       &bswap  ("ecx");
-       &bswap  ("edx");
-       &mov    (&DWP(8*9+4,"esp"),"ecx");
-       &mov    (&DWP(8*9+0,"esp"),"edx");
+       &mov    ("edx",15);                     # counter
+       &bswap  ("eax");
+       &bswap  ("ebx");
+       &jmp    (&label("00_14_sse2"));
 
 &set_label("00_14_sse2",16);
+       &movd   ("mm1","eax");
        &mov    ("eax",&DWP(0,"edi"));
+       &movd   ("mm7","ebx");
        &mov    ("ebx",&DWP(4,"edi"));
        &add    ("edi",8);
        &bswap  ("eax");
        &bswap  ("ebx");
-       &mov    (&DWP(8*8+4,"esp"),"eax");
-       &mov    (&DWP(8*8+0,"esp"),"ebx");
+       &punpckldq("mm7","mm1");
 
        &BODY_00_15_sse2();
 
-       &cmp    (&LB("edx"),0x35);
-       &jne    (&label("00_14_sse2"));
+       &dec    ("edx");
+       &jnz    (&label("00_14_sse2"));
+
+       &movd   ("mm1","eax");
+       &movd   ("mm7","ebx");
+       &punpckldq("mm7","mm1");
 
        &BODY_00_15_sse2(1);
 
+       &pxor   ($A,$A);                        # A is in %mm3
+       &mov    ("edx",32);                     # counter
+       &jmp    (&label("16_79_sse2"));
+
 &set_label("16_79_sse2",16);
-       #&movq  ("mm2",&QWP(8*(9+16-1),"esp")); #prefetched in BODY_00_15 
-       #&movq  ("mm6",&QWP(8*(9+16-14),"esp"));
-       &movq   ("mm1","mm2");
+    for ($j=0;$j<2;$j++) {                     # 2x unroll
+       #&movq  ("mm7",&QWP(8*(9+16-1),"esp")); # prefetched in BODY_00_15 
+       &movq   ("mm5",&QWP(8*(9+16-14),"esp"));
+       &movq   ("mm1","mm7");
+       &psrlq  ("mm7",1);
+        &movq  ("mm6","mm5");
+        &psrlq ("mm5",6);
+       &psllq  ("mm1",56);
+        &paddq ($A,"mm3");                     # from BODY_00_15
+        &movq  ("mm3","mm7");
+       &psrlq  ("mm7",7-1);
+        &pxor  ("mm3","mm1");
+        &psllq ("mm1",63-56);
+       &pxor   ("mm3","mm7");
+        &psrlq ("mm7",8-7);
+       &pxor   ("mm3","mm1");
+        &movq  ("mm1","mm5");
+        &psrlq ("mm5",19-6);
+       &pxor   ("mm7","mm3");                  # sigma0
+
+        &psllq ("mm6",3);
+        &pxor  ("mm1","mm5");
+       &paddq  ("mm7",&QWP(8*(9+16),"esp"));
+        &pxor  ("mm1","mm6");
+        &psrlq ("mm5",61-19);
+       &paddq  ("mm7",&QWP(8*(9+16-9),"esp"));
+        &pxor  ("mm1","mm5");
+        &psllq ("mm6",45-3);
+       &movq   ("mm5",$Fsse2);                 # load f
+        &pxor  ("mm1","mm6");                  # sigma1
+       &movq   ("mm6",$Gsse2);                 # load g
 
-       &psrlq  ("mm2",1);
-       &movq   ("mm7","mm6");
-       &psrlq  ("mm6",6);
-       &movq   ("mm3","mm2");
+       &paddq  ("mm7","mm1");                  # X[i]
+       #&movq  (&QWP(8*9,"esp"),"mm7");        # moved to BODY_00_15
 
-       &psrlq  ("mm2",7-1);
-       &movq   ("mm5","mm6");
-       &psrlq  ("mm6",19-6);
-       &pxor   ("mm3","mm2");
+       &BODY_00_15_sse2(2);
+    }
+       &dec    ("edx");
+       &jnz    (&label("16_79_sse2"));
 
-       &psrlq  ("mm2",8-7);
-       &pxor   ("mm5","mm6");
-       &psrlq  ("mm6",61-19);
-       &pxor   ("mm3","mm2");
+       #&movq  ($A,$Asse2);
+       &paddq  ($A,"mm3");                     # from BODY_00_15
+       &movq   ("mm1",$Bsse2);
+       #&movq  ($BxC,$Csse2);
+       &movq   ("mm3",$Dsse2);
+       #&movq  ($E,$Esse2);
+       &movq   ("mm5",$Fsse2);
+       &movq   ("mm6",$Gsse2);
+       &movq   ("mm7",$Hsse2);
 
-       &movq   ("mm2",&QWP(8*(9+16),"esp"));
+       &pxor   ($BxC,"mm1");                   # de-magic
+       &paddq  ($A,&QWP(0,"esi"));
+       &paddq  ("mm1",&QWP(8,"esi"));
+       &paddq  ($BxC,&QWP(16,"esi"));
+       &paddq  ("mm3",&QWP(24,"esi"));
+       &paddq  ($E,&QWP(32,"esi"));
+       &paddq  ("mm5",&QWP(40,"esi"));
+       &paddq  ("mm6",&QWP(48,"esi"));
+       &paddq  ("mm7",&QWP(56,"esi"));
 
-       &psllq  ("mm1",56);
-       &pxor   ("mm5","mm6");
-       &psllq  ("mm7",3);
-       &pxor   ("mm3","mm1");
+       &mov    ("eax",8*80);
+       &movq   (&QWP(0,"esi"),$A);
+       &movq   (&QWP(8,"esi"),"mm1");
+       &movq   (&QWP(16,"esi"),$BxC);
+       &movq   (&QWP(24,"esi"),"mm3");
+       &movq   (&QWP(32,"esi"),$E);
+       &movq   (&QWP(40,"esi"),"mm5");
+       &movq   (&QWP(48,"esi"),"mm6");
+       &movq   (&QWP(56,"esi"),"mm7");
 
-       &paddq  ("mm2",&QWP(8*(9+16-9),"esp"));
+       &lea    ("esp",&DWP(0,"esp","eax"));    # destroy frame
+       &sub    ($K512,"eax");                  # rewind K
 
-       &psllq  ("mm1",63-56);
-       &pxor   ("mm5","mm7");
-       &psllq  ("mm7",45-3);
-       &pxor   ("mm3","mm1");
-       &pxor   ("mm5","mm7");
+       &cmp    ("edi",&DWP(8*10+8,"esp"));     # are we done yet?
+       &jb     (&label("loop_sse2"));
 
-       &paddq  ("mm3","mm5");
-       &paddq  ("mm3","mm2");
-       &movq   (&QWP(8*9,"esp"),"mm3");
+       &mov    ("esp",&DWP(8*10+12,"esp"));    # restore sp
+       &emms   ();
+&function_end_A();
 
-       &BODY_00_15_sse2(1);
+&set_label("SSSE3",32);
+{ my ($cnt,$frame)=("ecx","edx");
+  my @X=map("xmm$_",(0..7));
+  my $j;
+  my $i=0;
+
+       &lea    ($frame,&DWP(-64,"esp"));
+       &sub    ("esp",256);
+
+       # fixed stack frame layout
+       #
+       # +0    A B C D E F G H         # backing store
+       # +64   X[0]+K[i] .. X[15]+K[i] # XMM->MM xfer area
+       # +192                          # XMM off-load ring buffer
+       # +256                          # saved parameters
+
+       &movdqa         (@X[1],&QWP(80*8,$K512));               # byte swap mask
+       &movdqu         (@X[0],&QWP(0,"edi"));
+       &pshufb         (@X[0],@X[1]);
+    for ($j=0;$j<8;$j++) {
+       &movdqa         (&QWP(16*(($j-1)%4),$frame),@X[3])      if ($j>4); # off-load
+       &movdqa         (@X[3],&QWP(16*($j%8),$K512));
+       &movdqa         (@X[2],@X[1])                           if ($j<7); # perpetuate byte swap mask
+       &movdqu         (@X[1],&QWP(16*($j+1),"edi"))           if ($j<7); # next input
+       &movdqa         (@X[1],&QWP(16*(($j+1)%4),$frame))      if ($j==7);# restore @X[0]
+       &paddq          (@X[3],@X[0]);
+       &pshufb         (@X[1],@X[2])                           if ($j<7);
+       &movdqa         (&QWP(16*($j%8)-128,$frame),@X[3]);     # xfer X[i]+K[i]
+
+       push(@X,shift(@X));                                     # rotate(@X)
+    }
+       #&jmp           (&label("loop_ssse3"));
+       &nop            ();
 
-       &cmp    (&LB("edx"),0x17);
-       &jne    (&label("16_79_sse2"));
+&set_label("loop_ssse3",32);
+       &movdqa         (@X[2],&QWP(16*(($j+1)%4),$frame));     # pre-restore @X[1]
+       &movdqa         (&QWP(16*(($j-1)%4),$frame),@X[3]);     # off-load @X[3]
+       &lea            ($K512,&DWP(16*8,$K512));
+
+       #&movq  ($Asse2,$A);                    # off-load A-H
+       &movq   ($Bsse2,"mm1");
+        &mov   ("ebx","edi");
+       &movq   ($Csse2,$BxC);
+        &lea   ("edi",&DWP(128,"edi"));        # advance input
+       &movq   ($Dsse2,"mm3");
+        &cmp   ("edi","eax");
+       #&movq  ($Esse2,$E);
+       &movq   ($Fsse2,"mm5");
+        &cmovb ("ebx","edi");
+       &movq   ($Gsse2,"mm6");
+        &mov   ("ecx",4);                      # loop counter
+       &pxor   ($BxC,"mm1");                   # magic
+       &movq   ($Hsse2,"mm7");
+       &pxor   ("mm3","mm3");                  # magic
+
+       &jmp            (&label("00_47_ssse3"));
+
+sub BODY_00_15_ssse3 {         # "phase-less" copy of BODY_00_15_sse2
+       (
+       '&movq  ("mm1",$E)',                            # %mm1 is sliding right
+       '&movq  ("mm7",&QWP(((-8*$i)%128)-128,$frame))',# X[i]+K[i]
+        '&pxor ("mm5","mm6")',                         # f^=g
+       '&psrlq ("mm1",14)',
+        '&movq (&QWP(8*($i+4)%64,"esp"),$E)',          # modulo-scheduled save e
+        '&pand ("mm5",$E)',                            # f&=e
+       '&psllq ($E,23)',                               # $E is sliding left
+       '&paddq ($A,"mm3")',                            # [h+=Maj(a,b,c)]
+       '&movq  ("mm3","mm1")',                         # %mm3 is T1
+        '&psrlq("mm1",4)',
+        '&pxor ("mm5","mm6")',                         # Ch(e,f,g)
+       '&pxor  ("mm3",$E)',
+        '&psllq($E,23)',
+       '&pxor  ("mm3","mm1")',
+        '&movq (&QWP(8*$i%64,"esp"),$A)',              # modulo-scheduled save a
+        '&paddq("mm7","mm5")',                         # X[i]+=Ch(e,f,g)
+       '&pxor  ("mm3",$E)',
+        '&psrlq("mm1",23)',
+        '&paddq("mm7",&QWP(8*($i+7)%64,"esp"))',       # X[i]+=h
+       '&pxor  ("mm3","mm1")',
+        '&psllq($E,4)',
+       '&pxor  ("mm3",$E)',                            # T1=Sigma1_512(e)
+
+        '&movq ($E,&QWP(8*($i+3)%64,"esp"))',          # e = load d, e in next round
+       '&paddq ("mm3","mm7")',                         # T1+=X[i]
+        '&movq ("mm5",$A)',                            # %mm5 is sliding right
+        '&psrlq("mm5",28)',
+       '&paddq ($E,"mm3")',                            # d += T1
+        '&movq ("mm6",$A)',                            # %mm6 is sliding left
+        '&movq ("mm7","mm5")',
+        '&psllq("mm6",25)',
+       '&movq  ("mm1",&QWP(8*($i+1)%64,"esp"))',       # load b
+        '&psrlq("mm5",6)',
+        '&pxor ("mm7","mm6")',
+        '&psllq("mm6",5)',
+        '&pxor ("mm7","mm5")',
+       '&pxor  ($A,"mm1")',                            # a^b, b^c in next round
+        '&psrlq("mm5",5)',
+        '&pxor ("mm7","mm6")',
+       '&pand  ($BxC,$A)',                             # (b^c)&(a^b)
+        '&psllq("mm6",6)',
+        '&pxor ("mm7","mm5")',
+       '&pxor  ($BxC,"mm1")',                          # [h=]Maj(a,b,c)
+        '&pxor ("mm6","mm7")',                         # Sigma0_512(a)
+        '&movq ("mm5",&QWP(8*($i+5-1)%64,"esp"))',     # pre-load f
+       '&paddq ($BxC,"mm6")',                          # h+=Sigma0(a)
+        '&movq ("mm6",&QWP(8*($i+6-1)%64,"esp"))',     # pre-load g
+
+       '($A,$BxC) = ($BxC,$A); $i--;'
+       );
+}
 
-       # &movq ($A,$Asse2);
+&set_label("00_47_ssse3",32);
+
+    for(;$j<16;$j++) {
+       my ($t0,$t2,$t1)=@X[2..4];
+       my @insns = (&BODY_00_15_ssse3(),&BODY_00_15_ssse3());
+
+       &movdqa         ($t2,@X[5]);
+       &movdqa         (@X[1],$t0);                    # restore @X[1]
+       &palignr        ($t0,@X[0],8);                  # X[1..2]
+       &movdqa         (&QWP(16*($j%4),$frame),@X[4]); # off-load @X[4]
+        &palignr       ($t2,@X[4],8);                  # X[9..10]
+
+       &movdqa         ($t1,$t0);
+       &psrlq          ($t0,7);
+        &paddq         (@X[0],$t2);                    # X[0..1] += X[9..10]
+       &movdqa         ($t2,$t1);
+       &psrlq          ($t1,1);
+       &psllq          ($t2,64-8);
+       &pxor           ($t0,$t1);
+       &psrlq          ($t1,8-1);
+       &pxor           ($t0,$t2);
+       &psllq          ($t2,8-1);
+       &pxor           ($t0,$t1);
+        &movdqa        ($t1,@X[7]);
+       &pxor           ($t0,$t2);                      # sigma0(X[1..2])
+        &movdqa        ($t2,@X[7]);
+        &psrlq         ($t1,6);
+       &paddq          (@X[0],$t0);                    # X[0..1] += sigma0(X[1..2])
+
+       &movdqa         ($t0,@X[7]);
+       &psrlq          ($t2,19);
+       &psllq          ($t0,64-61);
+       &pxor           ($t1,$t2);
+       &psrlq          ($t2,61-19);
+       &pxor           ($t1,$t0);
+       &psllq          ($t0,61-19);
+       &pxor           ($t1,$t2);
+       &movdqa         ($t2,&QWP(16*(($j+2)%4),$frame));# pre-restore @X[1]
+       &pxor           ($t1,$t0);                      # sigma0(X[1..2])
+       &movdqa         ($t0,&QWP(16*($j%8),$K512));
+        eval(shift(@insns));
+       &paddq          (@X[0],$t1);                    # X[0..1] += sigma0(X[14..15])
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &paddq          ($t0,@X[0]);
+        foreach(@insns) { eval; }
+       &movdqa         (&QWP(16*($j%8)-128,$frame),$t0);# xfer X[i]+K[i]
+
+       push(@X,shift(@X));                             # rotate(@X)
+    }
+       &lea            ($K512,&DWP(16*8,$K512));
+       &dec            ("ecx");
+       &jnz            (&label("00_47_ssse3"));
+
+       &movdqa         (@X[1],&QWP(0,$K512));          # byte swap mask
+       &lea            ($K512,&DWP(-80*8,$K512));      # rewind
+       &movdqu         (@X[0],&QWP(0,"ebx"));
+       &pshufb         (@X[0],@X[1]);
+
+    for ($j=0;$j<8;$j++) {     # load next or same block
+       my @insns = (&BODY_00_15_ssse3(),&BODY_00_15_ssse3());
+
+       &movdqa         (&QWP(16*(($j-1)%4),$frame),@X[3])      if ($j>4); # off-load
+       &movdqa         (@X[3],&QWP(16*($j%8),$K512));
+       &movdqa         (@X[2],@X[1])                           if ($j<7); # perpetuate byte swap mask
+       &movdqu         (@X[1],&QWP(16*($j+1),"ebx"))           if ($j<7); # next input
+       &movdqa         (@X[1],&QWP(16*(($j+1)%4),$frame))      if ($j==7);# restore @X[0]
+       &paddq          (@X[3],@X[0]);
+       &pshufb         (@X[1],@X[2])                           if ($j<7);
+        foreach(@insns) { eval; }
+       &movdqa         (&QWP(16*($j%8)-128,$frame),@X[3]);# xfer X[i]+K[i]
+
+       push(@X,shift(@X));                             # rotate(@X)
+    }
+
+       #&movq  ($A,$Asse2);                    # load A-H
        &movq   ("mm1",$Bsse2);
-       &movq   ("mm2",$Csse2);
+       &paddq  ($A,"mm3");                     # from BODY_00_15
+       #&movq  ($BxC,$Csse2);
        &movq   ("mm3",$Dsse2);
-       # &movq ($E,$Esse2);
-       &movq   ("mm5",$Fsse2);
-       &movq   ("mm6",$Gsse2);
+       #&movq  ($E,$Esse2);
+       #&movq  ("mm5",$Fsse2);
+       #&movq  ("mm6",$Gsse2);
        &movq   ("mm7",$Hsse2);
 
+       &pxor   ($BxC,"mm1");                   # de-magic
        &paddq  ($A,&QWP(0,"esi"));
        &paddq  ("mm1",&QWP(8,"esi"));
-       &paddq  ("mm2",&QWP(16,"esi"));
+       &paddq  ($BxC,&QWP(16,"esi"));
        &paddq  ("mm3",&QWP(24,"esi"));
        &paddq  ($E,&QWP(32,"esi"));
        &paddq  ("mm5",&QWP(40,"esi"));
@@ -396,21 +662,19 @@ if ($sse2) {
 
        &movq   (&QWP(0,"esi"),$A);
        &movq   (&QWP(8,"esi"),"mm1");
-       &movq   (&QWP(16,"esi"),"mm2");
+       &movq   (&QWP(16,"esi"),$BxC);
        &movq   (&QWP(24,"esi"),"mm3");
        &movq   (&QWP(32,"esi"),$E);
        &movq   (&QWP(40,"esi"),"mm5");
        &movq   (&QWP(48,"esi"),"mm6");
        &movq   (&QWP(56,"esi"),"mm7");
 
-       &add    ("esp",8*80);                   # destroy frame
-       &sub    ($K512,8*80);                   # rewind K
-
-       &cmp    ("edi",&DWP(8*10+8,"esp"));     # are we done yet?
-       &jb     (&label("loop_sse2"));
+       &cmp    ("edi","eax")                   # are we done yet?
+       &jb     (&label("loop_ssse3"));
 
+       &mov    ("esp",&DWP(64+12,$frame));     # restore sp
        &emms   ();
-       &mov    ("esp",&DWP(8*10+12,"esp"));    # restore sp
+}
 &function_end_A();
 }
 &set_label("loop_x86",16);
@@ -638,6 +902,9 @@ if ($sse2) {
        &data_word(0xfc657e2a,0x597f299c);      # u64
        &data_word(0x3ad6faec,0x5fcb6fab);      # u64
        &data_word(0x4a475817,0x6c44198c);      # u64
+
+       &data_word(0x04050607,0x00010203);      # byte swap
+       &data_word(0x0c0d0e0f,0x08090a0b);      # mask
 &function_end_B("sha512_block_data_order");
 &asciz("SHA512 block transform for x86, CRYPTOGAMS by <appro\@openssl.org>");
 
index 7faf37b..fb7dc50 100644 (file)
@@ -1,7 +1,7 @@
 #!/usr/bin/env perl
 
 # ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
 # details see http://www.openssl.org/~appro/cryptogams/.
 # March 2011.
 #
 # Add NEON implementation. On Cortex A8 it was measured to process
-# one byte in 25.5 cycles or 47% faster than integer-only code.
+# one byte in 23.3 cycles or ~60% faster than integer-only code.
+
+# August 2012.
+#
+# Improve NEON performance by 12% on Snapdragon S4. In absolute
+# terms it's 22.6 cycles per byte, which is disappointing result.
+# Technical writers asserted that 3-way S4 pipeline can sustain
+# multiple NEON instructions per cycle, but dual NEON issue could
+# not be observed, and for NEON-only sequences IPC(*) was found to
+# be limited by 1:-( 0.33 and 0.66 were measured for sequences with
+# ILPs(*) of 1 and 2 respectively. This in turn means that you can
+# even find yourself striving, as I did here, for achieving IPC
+# adequate to one delivered by Cortex A8 [for reference, it's
+# 0.5 for ILP of 1, and 1 for higher ILPs].
+#
+# (*) ILP, instruction-level parallelism, how many instructions
+#     *can* execute at the same time. IPC, instructions per cycle,
+#     indicates how many instructions actually execute.
 
 # Byte order [in]dependence. =========================================
 #
@@ -220,16 +237,20 @@ WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
 WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
 WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
 .size  K512,.-K512
+#if __ARM_MAX_ARCH__>=7
 .LOPENSSL_armcap:
 .word  OPENSSL_armcap_P-sha512_block_data_order
 .skip  32-4
+#else
+.skip  32
+#endif
 
 .global        sha512_block_data_order
 .type  sha512_block_data_order,%function
 sha512_block_data_order:
        sub     r3,pc,#8                @ sha512_block_data_order
        add     $len,$inp,$len,lsl#7    @ len to point at the end of inp
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
        ldr     r12,.LOPENSSL_armcap
        ldr     r12,[r3,r12]            @ OPENSSL_armcap_P
        tst     r12,#1
@@ -457,40 +478,40 @@ $code.=<<___ if ($i<16 || $i&1);
        vld1.64         {@X[$i%16]},[$inp]!     @ handles unaligned
 #endif
        vshr.u64        $t1,$e,#@Sigma1[1]
+#if $i>0
+        vadd.i64       $a,$Maj                 @ h+=Maj from the past
+#endif
        vshr.u64        $t2,$e,#@Sigma1[2]
 ___
 $code.=<<___;
        vld1.64         {$K},[$Ktbl,:64]!       @ K[i++]
        vsli.64         $t0,$e,#`64-@Sigma1[0]`
        vsli.64         $t1,$e,#`64-@Sigma1[1]`
+       vmov            $Ch,$e
        vsli.64         $t2,$e,#`64-@Sigma1[2]`
 #if $i<16 && defined(__ARMEL__)
        vrev64.8        @X[$i],@X[$i]
 #endif
-       vadd.i64        $T1,$K,$h
-       veor            $Ch,$f,$g
-       veor            $t0,$t1
-       vand            $Ch,$e
-       veor            $t0,$t2                 @ Sigma1(e)
-       veor            $Ch,$g                  @ Ch(e,f,g)
-       vadd.i64        $T1,$t0
+       veor            $t1,$t0
+       vbsl            $Ch,$f,$g               @ Ch(e,f,g)
        vshr.u64        $t0,$a,#@Sigma0[0]
-       vadd.i64        $T1,$Ch
+       veor            $t2,$t1                 @ Sigma1(e)
+       vadd.i64        $T1,$Ch,$h
        vshr.u64        $t1,$a,#@Sigma0[1]
-       vshr.u64        $t2,$a,#@Sigma0[2]
        vsli.64         $t0,$a,#`64-@Sigma0[0]`
+       vadd.i64        $T1,$t2
+       vshr.u64        $t2,$a,#@Sigma0[2]
+       vadd.i64        $K,@X[$i%16]
        vsli.64         $t1,$a,#`64-@Sigma0[1]`
+       veor            $Maj,$a,$b
        vsli.64         $t2,$a,#`64-@Sigma0[2]`
-       vadd.i64        $T1,@X[$i%16]
-       vorr            $Maj,$a,$c
-       vand            $Ch,$a,$c
        veor            $h,$t0,$t1
-       vand            $Maj,$b
+       vadd.i64        $T1,$K
+       vbsl            $Maj,$c,$b              @ Maj(a,b,c)
        veor            $h,$t2                  @ Sigma0(a)
-       vorr            $Maj,$Ch                @ Maj(a,b,c)
-       vadd.i64        $h,$T1
        vadd.i64        $d,$T1
-       vadd.i64        $h,$Maj
+       vadd.i64        $Maj,$T1
+       @ vadd.i64      $h,$Maj
 ___
 }
 
@@ -508,6 +529,7 @@ $i /= 2;
 $code.=<<___;
        vshr.u64        $t0,@X[($i+7)%8],#@sigma1[0]
        vshr.u64        $t1,@X[($i+7)%8],#@sigma1[1]
+        vadd.i64       @_[0],d30                       @ h+=Maj from the past
        vshr.u64        $s1,@X[($i+7)%8],#@sigma1[2]
        vsli.64         $t0,@X[($i+7)%8],#`64-@sigma1[0]`
        vext.8          $s0,@X[$i%8],@X[($i+1)%8],#8    @ X[i+1]
@@ -533,7 +555,8 @@ ___
 }
 
 $code.=<<___;
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
+.arch  armv7-a
 .fpu   neon
 
 .align 4
@@ -554,6 +577,7 @@ for(;$i<32;$i++)    { &NEON_16_79($i,@V); unshift(@V,pop(@V)); }
 $code.=<<___;
        bne             .L16_79_neon
 
+        vadd.i64       $A,d30          @ h+=Maj from the past
        vldmia          $ctx,{d24-d31}  @ load context to temp
        vadd.i64        q8,q12          @ vectorized accumulate
        vadd.i64        q9,q13
@@ -565,7 +589,7 @@ $code.=<<___;
        bne             .Loop_neon
 
        vldmia  sp!,{d8-d15}            @ epilogue
-       bx      lr
+       ret                             @ bx lr
 #endif
 ___
 }
@@ -573,10 +597,13 @@ $code.=<<___;
 .size  sha512_block_data_order,.-sha512_block_data_order
 .asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
 .align 2
+#if __ARM_MAX_ARCH__>=7
 .comm  OPENSSL_armcap_P,4,4
+#endif
 ___
 
 $code =~ s/\`([^\`]*)\`/eval $1/gem;
 $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;   # make it possible to compile with -march=armv4
+$code =~ s/\bret\b/bx  lr/gm;
 print $code;
 close STDOUT; # enforce flush
diff --git a/crypto/sha/asm/sha512-armv8.pl b/crypto/sha/asm/sha512-armv8.pl
new file mode 100644 (file)
index 0000000..f7b36b9
--- /dev/null
@@ -0,0 +1,422 @@
+#!/usr/bin/env perl
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# SHA256/512 for ARMv8.
+#
+# Performance in cycles per processed byte and improvement coefficient
+# over code generated with "default" compiler:
+#
+#              SHA256-hw       SHA256(*)       SHA512
+# Apple A7     1.97            10.5 (+33%)     6.73 (-1%(**))
+# Cortex-A53   2.38            15.5 (+115%)    10.0 (+150%(***))
+# Cortex-A57   2.31            11.6 (+86%)     7.51 (+260%(***))
+# Denver       2.01            10.5 (+26%)     6.70 (+8%)
+# X-Gene                       20.0 (+100%)    12.8 (+300%(***))
+# 
+# (*)  Software SHA256 results are of lesser relevance, presented
+#      mostly for informational purposes.
+# (**) The result is a trade-off: it's possible to improve it by
+#      10% (or by 1 cycle per round), but at the cost of 20% loss
+#      on Cortex-A53 (or by 4 cycles per round).
+# (***)        Super-impressive coefficients over gcc-generated code are
+#      indication of some compiler "pathology", most notably code
+#      generated with -mgeneral-regs-only is significanty faster
+#      and the gap is only 40-90%.
+
+$flavour=shift;
+$output=shift;
+open STDOUT,">$output";
+
+if ($output =~ /512/) {
+       $BITS=512;
+       $SZ=8;
+       @Sigma0=(28,34,39);
+       @Sigma1=(14,18,41);
+       @sigma0=(1,  8, 7);
+       @sigma1=(19,61, 6);
+       $rounds=80;
+       $reg_t="x";
+} else {
+       $BITS=256;
+       $SZ=4;
+       @Sigma0=( 2,13,22);
+       @Sigma1=( 6,11,25);
+       @sigma0=( 7,18, 3);
+       @sigma1=(17,19,10);
+       $rounds=64;
+       $reg_t="w";
+}
+
+$func="sha${BITS}_block_data_order";
+
+($ctx,$inp,$num,$Ktbl)=map("x$_",(0..2,30));
+
+@X=map("$reg_t$_",(3..15,0..2));
+@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("$reg_t$_",(20..27));
+($t0,$t1,$t2,$t3)=map("$reg_t$_",(16,17,19,28));
+
+sub BODY_00_xx {
+my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
+my $j=($i+1)&15;
+my ($T0,$T1,$T2)=(@X[($i-8)&15],@X[($i-9)&15],@X[($i-10)&15]);
+   $T0=@X[$i+3] if ($i<11);
+
+$code.=<<___   if ($i<16);
+#ifndef        __ARMEB__
+       rev     @X[$i],@X[$i]                   // $i
+#endif
+___
+$code.=<<___   if ($i<13 && ($i&1));
+       ldp     @X[$i+1],@X[$i+2],[$inp],#2*$SZ
+___
+$code.=<<___   if ($i==13);
+       ldp     @X[14],@X[15],[$inp]
+___
+$code.=<<___   if ($i>=14);
+       ldr     @X[($i-11)&15],[sp,#`$SZ*(($i-11)%4)`]
+___
+$code.=<<___   if ($i>0 && $i<16);
+       add     $a,$a,$t1                       // h+=Sigma0(a)
+___
+$code.=<<___   if ($i>=11);
+       str     @X[($i-8)&15],[sp,#`$SZ*(($i-8)%4)`]
+___
+# While ARMv8 specifies merged rotate-n-logical operation such as
+# 'eor x,y,z,ror#n', it was found to negatively affect performance
+# on Apple A7. The reason seems to be that it requires even 'y' to
+# be available earlier. This means that such merged instruction is
+# not necessarily best choice on critical path... On the other hand
+# Cortex-A5x handles merged instructions much better than disjoint
+# rotate and logical... See (**) footnote above.
+$code.=<<___   if ($i<15);
+       ror     $t0,$e,#$Sigma1[0]
+       add     $h,$h,$t2                       // h+=K[i]
+       eor     $T0,$e,$e,ror#`$Sigma1[2]-$Sigma1[1]`
+       and     $t1,$f,$e
+       bic     $t2,$g,$e
+       add     $h,$h,@X[$i&15]                 // h+=X[i]
+       orr     $t1,$t1,$t2                     // Ch(e,f,g)
+       eor     $t2,$a,$b                       // a^b, b^c in next round
+       eor     $t0,$t0,$T0,ror#$Sigma1[1]      // Sigma1(e)
+       ror     $T0,$a,#$Sigma0[0]
+       add     $h,$h,$t1                       // h+=Ch(e,f,g)
+       eor     $t1,$a,$a,ror#`$Sigma0[2]-$Sigma0[1]`
+       add     $h,$h,$t0                       // h+=Sigma1(e)
+       and     $t3,$t3,$t2                     // (b^c)&=(a^b)
+       add     $d,$d,$h                        // d+=h
+       eor     $t3,$t3,$b                      // Maj(a,b,c)
+       eor     $t1,$T0,$t1,ror#$Sigma0[1]      // Sigma0(a)
+       add     $h,$h,$t3                       // h+=Maj(a,b,c)
+       ldr     $t3,[$Ktbl],#$SZ                // *K++, $t2 in next round
+       //add   $h,$h,$t1                       // h+=Sigma0(a)
+___
+$code.=<<___   if ($i>=15);
+       ror     $t0,$e,#$Sigma1[0]
+       add     $h,$h,$t2                       // h+=K[i]
+       ror     $T1,@X[($j+1)&15],#$sigma0[0]
+       and     $t1,$f,$e
+       ror     $T2,@X[($j+14)&15],#$sigma1[0]
+       bic     $t2,$g,$e
+       ror     $T0,$a,#$Sigma0[0]
+       add     $h,$h,@X[$i&15]                 // h+=X[i]
+       eor     $t0,$t0,$e,ror#$Sigma1[1]
+       eor     $T1,$T1,@X[($j+1)&15],ror#$sigma0[1]
+       orr     $t1,$t1,$t2                     // Ch(e,f,g)
+       eor     $t2,$a,$b                       // a^b, b^c in next round
+       eor     $t0,$t0,$e,ror#$Sigma1[2]       // Sigma1(e)
+       eor     $T0,$T0,$a,ror#$Sigma0[1]
+       add     $h,$h,$t1                       // h+=Ch(e,f,g)
+       and     $t3,$t3,$t2                     // (b^c)&=(a^b)
+       eor     $T2,$T2,@X[($j+14)&15],ror#$sigma1[1]
+       eor     $T1,$T1,@X[($j+1)&15],lsr#$sigma0[2]    // sigma0(X[i+1])
+       add     $h,$h,$t0                       // h+=Sigma1(e)
+       eor     $t3,$t3,$b                      // Maj(a,b,c)
+       eor     $t1,$T0,$a,ror#$Sigma0[2]       // Sigma0(a)
+       eor     $T2,$T2,@X[($j+14)&15],lsr#$sigma1[2]   // sigma1(X[i+14])
+       add     @X[$j],@X[$j],@X[($j+9)&15]
+       add     $d,$d,$h                        // d+=h
+       add     $h,$h,$t3                       // h+=Maj(a,b,c)
+       ldr     $t3,[$Ktbl],#$SZ                // *K++, $t2 in next round
+       add     @X[$j],@X[$j],$T1
+       add     $h,$h,$t1                       // h+=Sigma0(a)
+       add     @X[$j],@X[$j],$T2
+___
+       ($t2,$t3)=($t3,$t2);
+}
+
+$code.=<<___;
+#include "arm_arch.h"
+
+.text
+
+.globl $func
+.type  $func,%function
+.align 6
+$func:
+___
+$code.=<<___   if ($SZ==4);
+       ldr     x16,.LOPENSSL_armcap_P
+       adr     x17,.LOPENSSL_armcap_P
+       add     x16,x16,x17
+       ldr     w16,[x16]
+       tst     w16,#ARMV8_SHA256
+       b.ne    .Lv8_entry
+___
+$code.=<<___;
+       stp     x29,x30,[sp,#-128]!
+       add     x29,sp,#0
+
+       stp     x19,x20,[sp,#16]
+       stp     x21,x22,[sp,#32]
+       stp     x23,x24,[sp,#48]
+       stp     x25,x26,[sp,#64]
+       stp     x27,x28,[sp,#80]
+       sub     sp,sp,#4*$SZ
+
+       ldp     $A,$B,[$ctx]                            // load context
+       ldp     $C,$D,[$ctx,#2*$SZ]
+       ldp     $E,$F,[$ctx,#4*$SZ]
+       add     $num,$inp,$num,lsl#`log(16*$SZ)/log(2)` // end of input
+       ldp     $G,$H,[$ctx,#6*$SZ]
+       adr     $Ktbl,K$BITS
+       stp     $ctx,$num,[x29,#96]
+
+.Loop:
+       ldp     @X[0],@X[1],[$inp],#2*$SZ
+       ldr     $t2,[$Ktbl],#$SZ                        // *K++
+       eor     $t3,$B,$C                               // magic seed
+       str     $inp,[x29,#112]
+___
+for ($i=0;$i<16;$i++)  { &BODY_00_xx($i,@V); unshift(@V,pop(@V)); }
+$code.=".Loop_16_xx:\n";
+for (;$i<32;$i++)      { &BODY_00_xx($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+       cbnz    $t2,.Loop_16_xx
+
+       ldp     $ctx,$num,[x29,#96]
+       ldr     $inp,[x29,#112]
+       sub     $Ktbl,$Ktbl,#`$SZ*($rounds+1)`          // rewind
+
+       ldp     @X[0],@X[1],[$ctx]
+       ldp     @X[2],@X[3],[$ctx,#2*$SZ]
+       add     $inp,$inp,#14*$SZ                       // advance input pointer
+       ldp     @X[4],@X[5],[$ctx,#4*$SZ]
+       add     $A,$A,@X[0]
+       ldp     @X[6],@X[7],[$ctx,#6*$SZ]
+       add     $B,$B,@X[1]
+       add     $C,$C,@X[2]
+       add     $D,$D,@X[3]
+       stp     $A,$B,[$ctx]
+       add     $E,$E,@X[4]
+       add     $F,$F,@X[5]
+       stp     $C,$D,[$ctx,#2*$SZ]
+       add     $G,$G,@X[6]
+       add     $H,$H,@X[7]
+       cmp     $inp,$num
+       stp     $E,$F,[$ctx,#4*$SZ]
+       stp     $G,$H,[$ctx,#6*$SZ]
+       b.ne    .Loop
+
+       ldp     x19,x20,[x29,#16]
+       add     sp,sp,#4*$SZ
+       ldp     x21,x22,[x29,#32]
+       ldp     x23,x24,[x29,#48]
+       ldp     x25,x26,[x29,#64]
+       ldp     x27,x28,[x29,#80]
+       ldp     x29,x30,[sp],#128
+       ret
+.size  $func,.-$func
+
+.align 6
+.type  K$BITS,%object
+K$BITS:
+___
+$code.=<<___ if ($SZ==8);
+       .quad   0x428a2f98d728ae22,0x7137449123ef65cd
+       .quad   0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+       .quad   0x3956c25bf348b538,0x59f111f1b605d019
+       .quad   0x923f82a4af194f9b,0xab1c5ed5da6d8118
+       .quad   0xd807aa98a3030242,0x12835b0145706fbe
+       .quad   0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+       .quad   0x72be5d74f27b896f,0x80deb1fe3b1696b1
+       .quad   0x9bdc06a725c71235,0xc19bf174cf692694
+       .quad   0xe49b69c19ef14ad2,0xefbe4786384f25e3
+       .quad   0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+       .quad   0x2de92c6f592b0275,0x4a7484aa6ea6e483
+       .quad   0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+       .quad   0x983e5152ee66dfab,0xa831c66d2db43210
+       .quad   0xb00327c898fb213f,0xbf597fc7beef0ee4
+       .quad   0xc6e00bf33da88fc2,0xd5a79147930aa725
+       .quad   0x06ca6351e003826f,0x142929670a0e6e70
+       .quad   0x27b70a8546d22ffc,0x2e1b21385c26c926
+       .quad   0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+       .quad   0x650a73548baf63de,0x766a0abb3c77b2a8
+       .quad   0x81c2c92e47edaee6,0x92722c851482353b
+       .quad   0xa2bfe8a14cf10364,0xa81a664bbc423001
+       .quad   0xc24b8b70d0f89791,0xc76c51a30654be30
+       .quad   0xd192e819d6ef5218,0xd69906245565a910
+       .quad   0xf40e35855771202a,0x106aa07032bbd1b8
+       .quad   0x19a4c116b8d2d0c8,0x1e376c085141ab53
+       .quad   0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+       .quad   0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+       .quad   0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+       .quad   0x748f82ee5defb2fc,0x78a5636f43172f60
+       .quad   0x84c87814a1f0ab72,0x8cc702081a6439ec
+       .quad   0x90befffa23631e28,0xa4506cebde82bde9
+       .quad   0xbef9a3f7b2c67915,0xc67178f2e372532b
+       .quad   0xca273eceea26619c,0xd186b8c721c0c207
+       .quad   0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+       .quad   0x06f067aa72176fba,0x0a637dc5a2c898a6
+       .quad   0x113f9804bef90dae,0x1b710b35131c471b
+       .quad   0x28db77f523047d84,0x32caab7b40c72493
+       .quad   0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+       .quad   0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+       .quad   0x5fcb6fab3ad6faec,0x6c44198c4a475817
+       .quad   0       // terminator
+___
+$code.=<<___ if ($SZ==4);
+       .long   0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+       .long   0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+       .long   0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+       .long   0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+       .long   0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+       .long   0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+       .long   0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+       .long   0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+       .long   0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+       .long   0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+       .long   0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+       .long   0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+       .long   0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+       .long   0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+       .long   0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+       .long   0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+       .long   0       //terminator
+___
+$code.=<<___;
+.size  K$BITS,.-K$BITS
+.align 3
+.LOPENSSL_armcap_P:
+       .quad   OPENSSL_armcap_P-.
+.asciz "SHA$BITS block transform for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
+.align 2
+___
+
+if ($SZ==4) {
+my $Ktbl="x3";
+
+my ($ABCD,$EFGH,$abcd)=map("v$_.16b",(0..2));
+my @MSG=map("v$_.16b",(4..7));
+my ($W0,$W1)=("v16.4s","v17.4s");
+my ($ABCD_SAVE,$EFGH_SAVE)=("v18.16b","v19.16b");
+
+$code.=<<___;
+.type  sha256_block_armv8,%function
+.align 6
+sha256_block_armv8:
+.Lv8_entry:
+       stp             x29,x30,[sp,#-16]!
+       add             x29,sp,#0
+
+       ld1.32          {$ABCD,$EFGH},[$ctx]
+       adr             $Ktbl,K256
+
+.Loop_hw:
+       ld1             {@MSG[0]-@MSG[3]},[$inp],#64
+       sub             $num,$num,#1
+       ld1.32          {$W0},[$Ktbl],#16
+       rev32           @MSG[0],@MSG[0]
+       rev32           @MSG[1],@MSG[1]
+       rev32           @MSG[2],@MSG[2]
+       rev32           @MSG[3],@MSG[3]
+       orr             $ABCD_SAVE,$ABCD,$ABCD          // offload
+       orr             $EFGH_SAVE,$EFGH,$EFGH
+___
+for($i=0;$i<12;$i++) {
+$code.=<<___;
+       ld1.32          {$W1},[$Ktbl],#16
+       add.i32         $W0,$W0,@MSG[0]
+       sha256su0       @MSG[0],@MSG[1]
+       orr             $abcd,$ABCD,$ABCD
+       sha256h         $ABCD,$EFGH,$W0
+       sha256h2        $EFGH,$abcd,$W0
+       sha256su1       @MSG[0],@MSG[2],@MSG[3]
+___
+       ($W0,$W1)=($W1,$W0);    push(@MSG,shift(@MSG));
+}
+$code.=<<___;
+       ld1.32          {$W1},[$Ktbl],#16
+       add.i32         $W0,$W0,@MSG[0]
+       orr             $abcd,$ABCD,$ABCD
+       sha256h         $ABCD,$EFGH,$W0
+       sha256h2        $EFGH,$abcd,$W0
+
+       ld1.32          {$W0},[$Ktbl],#16
+       add.i32         $W1,$W1,@MSG[1]
+       orr             $abcd,$ABCD,$ABCD
+       sha256h         $ABCD,$EFGH,$W1
+       sha256h2        $EFGH,$abcd,$W1
+
+       ld1.32          {$W1},[$Ktbl]
+       add.i32         $W0,$W0,@MSG[2]
+       sub             $Ktbl,$Ktbl,#$rounds*$SZ-16     // rewind
+       orr             $abcd,$ABCD,$ABCD
+       sha256h         $ABCD,$EFGH,$W0
+       sha256h2        $EFGH,$abcd,$W0
+
+       add.i32         $W1,$W1,@MSG[3]
+       orr             $abcd,$ABCD,$ABCD
+       sha256h         $ABCD,$EFGH,$W1
+       sha256h2        $EFGH,$abcd,$W1
+
+       add.i32         $ABCD,$ABCD,$ABCD_SAVE
+       add.i32         $EFGH,$EFGH,$EFGH_SAVE
+
+       cbnz            $num,.Loop_hw
+
+       st1.32          {$ABCD,$EFGH},[$ctx]
+
+       ldr             x29,[sp],#16
+       ret
+.size  sha256_block_armv8,.-sha256_block_armv8
+___
+}
+
+$code.=<<___;
+.comm  OPENSSL_armcap_P,4,4
+___
+
+{   my  %opcode = (
+       "sha256h"       => 0x5e004000,  "sha256h2"      => 0x5e005000,
+       "sha256su0"     => 0x5e282800,  "sha256su1"     => 0x5e006000   );
+
+    sub unsha256 {
+       my ($mnemonic,$arg)=@_;
+
+       $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+))?/o
+       &&
+       sprintf ".inst\t0x%08x\t//%s %s",
+                       $opcode{$mnemonic}|$1|($2<<5)|($3<<16),
+                       $mnemonic,$arg;
+    }
+}
+
+foreach(split("\n",$code)) {
+
+       s/\`([^\`]*)\`/eval($1)/geo;
+
+       s/\b(sha256\w+)\s+([qv].*)/unsha256($1,$2)/geo;
+
+       s/\.\w?32\b//o          and s/\.16b/\.4s/go;
+       m/(ld|st)1[^\[]+\[0\]/o and s/\.4s/\.s/go;
+
+       print $_,"\n";
+}
+
+close STDOUT;
index 1c6ce56..59f889a 100755 (executable)
@@ -1,7 +1,7 @@
 #!/usr/bin/env perl
 #
 # ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
 # details see http://www.openssl.org/~appro/cryptogams/.
 #     because on Itanium 1 stall on MM result is accompanied by
 #     pipeline flush, which takes 6 cycles:-(
 #
-# Resulting performance numbers for 900MHz Itanium 2 system:
+# June 2012
 #
-# The 'numbers' are in 1000s of bytes per second processed.
-# type     16 bytes    64 bytes   256 bytes  1024 bytes  8192 bytes
-# sha1(*)   6210.14k   20376.30k   52447.83k   85870.05k  105478.12k
-# sha256    7476.45k   20572.05k   41538.34k   56062.29k   62093.18k
-# sha512    4996.56k   20026.28k   47597.20k   85278.79k  111501.31k
+# Improve performance by 15-20%. Note about "rules of engagement"
+# above. Contemporary cores are equipped with additional shifter,
+# so that they should perform even better than below, presumably
+# by ~10%.
 #
-# (*) SHA1 numbers are for HP-UX compiler and are presented purely
-#     for reference purposes. I bet it can improved too...
+######################################################################
+# Current performance in cycles per processed byte for Itanium 2
+# pre-9000 series [little-endian] system:
+#
+# SHA1(*)      5.7
+# SHA256       12.6
+# SHA512       6.7
+#
+# (*) SHA1 result is presented purely for reference purposes.
 #
 # To generate code, pass the file name with either 256 or 512 in its
 # name and compiler flags.
@@ -106,8 +112,8 @@ if (!defined($big_endian))
              { $big_endian=(unpack('L',pack('N',1))==1);  }
 
 $code=<<___;
-.ident  \"$output, version 1.1\"
-.ident  \"IA-64 ISA artwork by Andy Polyakov <appro\@fy.chalmers.se>\"
+.ident  \"$output, version 2.0\"
+.ident  \"IA-64 ISA artwork by Andy Polyakov <appro\@openssl.org>\"
 .explicit
 .text
 
@@ -115,26 +121,25 @@ pfssave=r2;
 lcsave=r3;
 prsave=r14;
 K=r15;
-A=r16; B=r17;  C=r18;  D=r19;
-E=r20; F=r21;  G=r22;  H=r23;
+A_=r16; B_=r17; C_=r18; D_=r19;
+E_=r20; F_=r21; G_=r22; H_=r23;
 T1=r24;        T2=r25;
 s0=r26;        s1=r27; t0=r28; t1=r29;
 Ktbl=r30;
 ctx=r31;       // 1st arg
-input=r48;     // 2nd arg
-num=r49;       // 3rd arg
-sgm0=r50;      sgm1=r51;       // small constants
-A_=r54;        B_=r55; C_=r56; D_=r57;
-E_=r58;        F_=r59; G_=r60; H_=r61;
+input=r56;     // 2nd arg
+num=r57;       // 3rd arg
+sgm0=r58;      sgm1=r59;       // small constants
 
 // void $func (SHA_CTX *ctx, const void *in,size_t num[,int host])
 .global        $func#
 .proc  $func#
 .align 32
+.skip  16
 $func:
        .prologue
        .save   ar.pfs,pfssave
-{ .mmi;        alloc   pfssave=ar.pfs,3,27,0,16
+{ .mmi;        alloc   pfssave=ar.pfs,3,25,0,24
        $ADDP   ctx=0,r32               // 1st arg
        .save   ar.lc,lcsave
        mov     lcsave=ar.lc    }
@@ -145,11 +150,9 @@ $func:
 
        .body
 { .mib;        add     r8=0*$SZ,ctx
-       add     r9=1*$SZ,ctx
-       brp.loop.imp    .L_first16,.L_first16_end-16    }
+       add     r9=1*$SZ,ctx    }
 { .mib;        add     r10=2*$SZ,ctx
-       add     r11=3*$SZ,ctx
-       brp.loop.imp    .L_rest,.L_rest_end-16          };;
+       add     r11=3*$SZ,ctx   };;
 
 // load A-H
 .Lpic_point:
@@ -164,7 +167,7 @@ $func:
        add     Ktbl=($TABLE#-.Lpic_point),Ktbl         }
 { .mmi;        $LDW    G_=[r10]
        $LDW    H_=[r11]
-       cmp.ne  p0,p16=0,r0     };;     // used in sha256_block
+       cmp.ne  p0,p16=0,r0     };;
 ___
 $code.=<<___ if ($BITS==64);
 { .mii;        and     r8=7,input
@@ -179,50 +182,26 @@ $code.=<<___ if ($BITS==64);
 ___
 $code.=<<___;
 .L_outer:
-.rotr  X[16]
-{ .mmi;        mov     A=A_
-       mov     B=B_
+.rotr  R[8],X[16]
+A=R[0]; B=R[1]; C=R[2]; D=R[3]; E=R[4]; F=R[5]; G=R[6]; H=R[7]
+{ .mmi;        ld1     X[15]=[input],$SZ               // eliminated in sha512
+       mov     A=A_
        mov     ar.lc=14        }
-{ .mmi;        mov     C=C_
-       mov     D=D_
-       mov     E=E_            }
-{ .mmi;        mov     F=F_
-       mov     G=G_
-       mov     ar.ec=2         }
-{ .mmi;        ld1     X[15]=[input],$SZ               // eliminated in 64-bit
+{ .mmi;        mov     B=B_
+       mov     C=C_
+       mov     D=D_            }
+{ .mmi;        mov     E=E_
+       mov     F=F_
+       mov     ar.ec=2         };;
+{ .mmi;        mov     G=G_
        mov     H=H_
-       mov     sgm1=$sigma1[2] };;
-
-___
-$t0="t0", $t1="t1", $code.=<<___ if ($BITS==32);
-.align 32
-.L_first16:
-{ .mmi;                add     r9=1-$SZ,input
-               add     r10=2-$SZ,input
-               add     r11=3-$SZ,input };;
-{ .mmi;                ld1     r9=[r9]
-               ld1     r10=[r10]
-               dep.z   $t1=E,32,32     }
-{ .mmi;                $LDW    K=[Ktbl],$SZ
-               ld1     r11=[r11]
-               zxt4    E=E             };;
-{ .mii;                or      $t1=$t1,E
-               dep     X[15]=X[15],r9,8,8
-               dep     r11=r10,r11,8,8 };;
-{ .mmi;                and     T1=F,E
-               and     T2=A,B
-               dep     X[15]=X[15],r11,16,16   }
-{ .mmi;                andcm   r8=G,E
-               and     r9=A,C
-               mux2    $t0=A,0x44      };;     // copy lower half to upper
-{ .mmi;        (p16)   ld1     X[15-1]=[input],$SZ     // prefetch
-               xor     T1=T1,r8                // T1=((e & f) ^ (~e & g))
-               _rotr   r11=$t1,$Sigma1[0] }    // ROTR(e,14)
-{ .mib;                and     r10=B,C
-               xor     T2=T2,r9        };;
+       mov     sgm1=$sigma1[2] }
+{ .mib;        mov     r8=0
+       add     r9=1-$SZ,input
+       brp.loop.imp    .L_first16,.L_first16_end-16    };;
 ___
 $t0="A", $t1="E", $code.=<<___ if ($BITS==64);
-// in 64-bit mode I load whole X[16] at once and take care of alignment...
+// in sha512 case I load whole X[16] at once and take care of alignment...
 { .mmi;        add     r8=1*$SZ,input
        add     r9=2*$SZ,input
        add     r10=3*$SZ,input         };;
@@ -248,7 +227,9 @@ $t0="A", $t1="E", $code.=<<___ if ($BITS==64);
        $LDW    X[ 2]=[r8],4*$SZ
 (p15)  br.cond.dpnt.many       .L7byte };;
 { .mmb;        $LDW    X[ 1]=[r9],4*$SZ
-       $LDW    X[ 0]=[r10],4*$SZ
+       $LDW    X[ 0]=[r10],4*$SZ       }
+{ .mib;        mov     r8=0
+       mux1    X[15]=X[15],\@rev               // eliminated on big-endian
        br.many .L_first16              };;
 .L1byte:
 { .mmi;        $LDW    X[13]=[r9],4*$SZ
@@ -281,7 +262,9 @@ $t0="A", $t1="E", $code.=<<___ if ($BITS==64);
        shrp    X[ 3]=X[ 3],X[ 2],56    }
 { .mii;        shrp    X[ 2]=X[ 2],X[ 1],56
        shrp    X[ 1]=X[ 1],X[ 0],56    }
-{ .mib;        shrp    X[ 0]=X[ 0],T1,56
+{ .mib;        shrp    X[ 0]=X[ 0],T1,56       }
+{ .mib;        mov     r8=0
+       mux1    X[15]=X[15],\@rev               // eliminated on big-endian
        br.many .L_first16              };;
 .L2byte:
 { .mmi;        $LDW    X[11]=[input],4*$SZ
@@ -313,7 +296,9 @@ $t0="A", $t1="E", $code.=<<___ if ($BITS==64);
        shrp    X[ 2]=X[ 2],X[ 1],48    }
 { .mii;        shrp    X[ 1]=X[ 1],X[ 0],48
        shrp    X[ 0]=X[ 0],T1,48       }
-{ .mfb;        br.many .L_first16              };;
+{ .mib;        mov     r8=0
+       mux1    X[15]=X[15],\@rev               // eliminated on big-endian
+       br.many .L_first16              };;
 .L3byte:
 { .mmi;        $LDW    X[ 9]=[r9],4*$SZ
        $LDW    X[ 8]=[r10],4*$SZ
@@ -341,7 +326,9 @@ $t0="A", $t1="E", $code.=<<___ if ($BITS==64);
        shrp    X[ 3]=X[ 3],X[ 2],40    }
 { .mii;        shrp    X[ 2]=X[ 2],X[ 1],40
        shrp    X[ 1]=X[ 1],X[ 0],40    }
-{ .mib;        shrp    X[ 0]=X[ 0],T1,40
+{ .mib;        shrp    X[ 0]=X[ 0],T1,40       }
+{ .mib;        mov     r8=0
+       mux1    X[15]=X[15],\@rev               // eliminated on big-endian
        br.many .L_first16              };;
 .L4byte:
 { .mmi;        $LDW    X[ 7]=[input],4*$SZ
@@ -369,7 +356,9 @@ $t0="A", $t1="E", $code.=<<___ if ($BITS==64);
        shrp    X[ 2]=X[ 2],X[ 1],32    }
 { .mii;        shrp    X[ 1]=X[ 1],X[ 0],32
        shrp    X[ 0]=X[ 0],T1,32       }
-{ .mfb;        br.many .L_first16              };;
+{ .mib;        mov     r8=0
+       mux1    X[15]=X[15],\@rev               // eliminated on big-endian
+       br.many .L_first16              };;
 .L5byte:
 { .mmi;        $LDW    X[ 5]=[r9],4*$SZ
        $LDW    X[ 4]=[r10],4*$SZ
@@ -393,7 +382,9 @@ $t0="A", $t1="E", $code.=<<___ if ($BITS==64);
        shrp    X[ 3]=X[ 3],X[ 2],24    }
 { .mii;        shrp    X[ 2]=X[ 2],X[ 1],24
        shrp    X[ 1]=X[ 1],X[ 0],24    }
-{ .mib;        shrp    X[ 0]=X[ 0],T1,24
+{ .mib;        shrp    X[ 0]=X[ 0],T1,24       }
+{ .mib;        mov     r8=0
+       mux1    X[15]=X[15],\@rev               // eliminated on big-endian
        br.many .L_first16              };;
 .L6byte:
 { .mmi;        $LDW    X[ 3]=[input],4*$SZ
@@ -417,7 +408,9 @@ $t0="A", $t1="E", $code.=<<___ if ($BITS==64);
        shrp    X[ 2]=X[ 2],X[ 1],16    }
 { .mii;        shrp    X[ 1]=X[ 1],X[ 0],16
        shrp    X[ 0]=X[ 0],T1,16       }
-{ .mfb;        br.many .L_first16              };;
+{ .mib;        mov     r8=0
+       mux1    X[15]=X[15],\@rev               // eliminated on big-endian
+       br.many .L_first16              };;
 .L7byte:
 { .mmi;        $LDW    X[ 1]=[r9],4*$SZ
        $LDW    X[ 0]=[r10],4*$SZ
@@ -437,128 +430,146 @@ $t0="A", $t1="E", $code.=<<___ if ($BITS==64);
        shrp    X[ 3]=X[ 3],X[ 2],8     }
 { .mii;        shrp    X[ 2]=X[ 2],X[ 1],8
        shrp    X[ 1]=X[ 1],X[ 0],8     }
-{ .mib;        shrp    X[ 0]=X[ 0],T1,8
-       br.many .L_first16              };;
+{ .mib;        shrp    X[ 0]=X[ 0],T1,8        }
+{ .mib;        mov     r8=0
+       mux1    X[15]=X[15],\@rev       };;     // eliminated on big-endian
 
 .align 32
 .L_first16:
 { .mmi;                $LDW    K=[Ktbl],$SZ
-               and     T1=F,E
-               and     T2=A,B          }
-{ .mmi;                //$LDW  X[15]=[input],$SZ       // X[i]=*input++
+               add     A=A,r8                  // H+=Sigma(0) from the past
+               _rotr   r10=$t1,$Sigma1[0]  }   // ROTR(e,14)
+{ .mmi;                and     T1=F,E
                andcm   r8=G,E
-               and     r9=A,C          };;
-{ .mmi;                xor     T1=T1,r8                //T1=((e & f) ^ (~e & g))
-               and     r10=B,C
-               _rotr   r11=$t1,$Sigma1[0] }    // ROTR(e,14)
-{ .mmi;                xor     T2=T2,r9
-               mux1    X[15]=X[15],\@rev };;   // eliminated in big-endian
+       (p16)   mux1    X[14]=X[14],\@rev   };; // eliminated on big-endian
+{ .mmi;                and     T2=A,B
+               and     r9=A,C
+               _rotr   r11=$t1,$Sigma1[1]  }   // ROTR(e,41)
+{ .mmi;                xor     T1=T1,r8                // T1=((e & f) ^ (~e & g))
+               and     r8=B,C              };;
+___
+$t0="t0", $t1="t1", $code.=<<___ if ($BITS==32);
+.align 32
+.L_first16:
+{ .mmi;                add     A=A,r8                  // H+=Sigma(0) from the past
+               add     r10=2-$SZ,input
+               add     r11=3-$SZ,input };;
+{ .mmi;                ld1     r9=[r9]
+               ld1     r10=[r10]
+               dep.z   $t1=E,32,32     }
+{ .mmi;                ld1     r11=[r11]
+               $LDW    K=[Ktbl],$SZ
+               zxt4    E=E             };;
+{ .mii;                or      $t1=$t1,E
+               dep     X[15]=X[15],r9,8,8
+               mux2    $t0=A,0x44      };;     // copy lower half to upper
+{ .mmi;                and     T1=F,E
+               andcm   r8=G,E
+               dep     r11=r10,r11,8,8 };;
+{ .mmi;                and     T2=A,B
+               and     r9=A,C
+               dep     X[15]=X[15],r11,16,16   };;
+{ .mmi;        (p16)   ld1     X[15-1]=[input],$SZ     // prefetch
+               xor     T1=T1,r8                // T1=((e & f) ^ (~e & g))
+               _rotr   r10=$t1,$Sigma1[0] }    // ROTR(e,14)
+{ .mmi;                and     r8=B,C
+               _rotr   r11=$t1,$Sigma1[1] };;  // ROTR(e,18)
 ___
 $code.=<<___;
-{ .mib;                add     T1=T1,H                 // T1=Ch(e,f,g)+h
-               _rotr   r8=$t1,$Sigma1[1] }     // ROTR(e,18)
-{ .mib;                xor     T2=T2,r10               // T2=((a & b) ^ (a & c) ^ (b & c))
-               mov     H=G             };;
-{ .mib;                xor     r11=r8,r11
-               _rotr   r9=$t1,$Sigma1[2] }     // ROTR(e,41)
-{ .mib;                mov     G=F
-               mov     F=E             };;
-{ .mib;                xor     r9=r9,r11               // r9=Sigma1(e)
-               _rotr   r10=$t0,$Sigma0[0] }    // ROTR(a,28)
-{ .mib;                add     T1=T1,K                 // T1=Ch(e,f,g)+h+K512[i]
-               mov     E=D             };;
-{ .mib;                add     T1=T1,r9                // T1+=Sigma1(e)
-               _rotr   r11=$t0,$Sigma0[1] }    // ROTR(a,34)
-{ .mib;                mov     D=C
-               mov     C=B             };;
-{ .mib;                add     T1=T1,X[15]             // T1+=X[i]
-               _rotr   r8=$t0,$Sigma0[2] }     // ROTR(a,39)
-{ .mib;                xor     r10=r10,r11
-               mux2    X[15]=X[15],0x44 };;    // eliminated in 64-bit
-{ .mmi;                xor     r10=r8,r10              // r10=Sigma0(a)
-               mov     B=A
-               add     A=T1,T2         };;
-{ .mib;                add     E=E,T1
-               add     A=A,r10                 // T2=Maj(a,b,c)+Sigma0(a)
-       br.ctop.sptk    .L_first16      };;
+{ .mmi;                add     T1=T1,H                 // T1=Ch(e,f,g)+h
+               xor     r10=r10,r11
+               _rotr   r11=$t1,$Sigma1[2]  }   // ROTR(e,41)
+{ .mmi;                xor     T2=T2,r9
+               add     K=K,X[15]           };;
+{ .mmi;                add     T1=T1,K                 // T1+=K[i]+X[i]
+               xor     T2=T2,r8                // T2=((a & b) ^ (a & c) ^ (b & c))
+               _rotr   r8=$t0,$Sigma0[0]   }   // ROTR(a,28)
+{ .mmi;                xor     r11=r11,r10             // Sigma1(e)
+               _rotr   r9=$t0,$Sigma0[1]   };; // ROTR(a,34)
+{ .mmi;                add     T1=T1,r11               // T+=Sigma1(e)
+               xor     r8=r8,r9
+               _rotr   r9=$t0,$Sigma0[2]   };; // ROTR(a,39)
+{ .mmi;                xor     r8=r8,r9                // Sigma0(a)
+               add     D=D,T1
+               mux2    H=X[15],0x44        }   // mov H=X[15] in sha512
+{ .mib;        (p16)   add     r9=1-$SZ,input          // not used in sha512
+               add     X[15]=T1,T2             // H=T1+Maj(a,b,c)
+       br.ctop.sptk    .L_first16          };;
 .L_first16_end:
 
-{ .mii;        mov     ar.lc=$rounds-17
-       mov     ar.ec=1                 };;
+{ .mib;        mov     ar.lc=$rounds-17
+       brp.loop.imp    .L_rest,.L_rest_end-16          }
+{ .mib;        mov     ar.ec=1
+       br.many .L_rest                 };;
 
 .align 32
 .L_rest:
-.rotr  X[16]
-{ .mib;                $LDW    K=[Ktbl],$SZ
+{ .mmi;                $LDW    K=[Ktbl],$SZ
+               add     A=A,r8                  // H+=Sigma0(a) from the past
                _rotr   r8=X[15-1],$sigma0[0] } // ROTR(s0,1)
-{ .mib;        $ADD    X[15]=X[15],X[15-9]     // X[i&0xF]+=X[(i+9)&0xF]
-               $SHRU   s0=X[15-1],sgm0 };;     // s0=X[(i+1)&0xF]>>7
+{ .mmi;        add     X[15]=X[15],X[15-9]     // X[i&0xF]+=X[(i+9)&0xF]
+               $SHRU   s0=X[15-1],sgm0     };; // s0=X[(i+1)&0xF]>>7
 { .mib;                and     T1=F,E
                _rotr   r9=X[15-1],$sigma0[1] } // ROTR(s0,8)
 { .mib;                andcm   r10=G,E
-               $SHRU   s1=X[15-14],sgm1 };;    // s1=X[(i+14)&0xF]>>6
+               $SHRU   s1=X[15-14],sgm1    };; // s1=X[(i+14)&0xF]>>6
+// Pair of mmi; splits on Itanium 1 and prevents pipeline flush
+// upon $SHRU output usage
 { .mmi;                xor     T1=T1,r10               // T1=((e & f) ^ (~e & g))
                xor     r9=r8,r9
-               _rotr   r10=X[15-14],$sigma1[0] };;// ROTR(s1,19)
-{ .mib;                and     T2=A,B          
-               _rotr   r11=X[15-14],$sigma1[1] }// ROTR(s1,61)
-{ .mib;                and     r8=A,C          };;
+               _rotr   r10=X[15-14],$sigma1[0] }// ROTR(s1,19)
+{ .mmi;                and     T2=A,B
+               and     r8=A,C
+               _rotr   r11=X[15-14],$sigma1[1] };;// ROTR(s1,61)
 ___
 $t0="t0", $t1="t1", $code.=<<___ if ($BITS==32);
-// I adhere to mmi; in order to hold Itanium 1 back and avoid 6 cycle
-// pipeline flush in last bundle. Note that even on Itanium2 the
-// latter stalls for one clock cycle...
-{ .mmi;                xor     s0=s0,r9                // s0=sigma0(X[(i+1)&0xF])
-               dep.z   $t1=E,32,32     }
-{ .mmi;                xor     r10=r11,r10
-               zxt4    E=E             };;
-{ .mmi;                or      $t1=$t1,E
-               xor     s1=s1,r10               // s1=sigma1(X[(i+14)&0xF])
-               mux2    $t0=A,0x44      };;     // copy lower half to upper
+{ .mib;                xor     s0=s0,r9                // s0=sigma0(X[(i+1)&0xF])
+               dep.z   $t1=E,32,32         }
+{ .mib;                xor     r10=r11,r10
+               zxt4    E=E                 };;
+{ .mii;                xor     s1=s1,r10               // s1=sigma1(X[(i+14)&0xF])
+               shrp    r9=E,$t1,32+$Sigma1[0]  // ROTR(e,14)
+               mux2    $t0=A,0x44          };; // copy lower half to upper
+// Pair of mmi; splits on Itanium 1 and prevents pipeline flush
+// upon mux2 output usage
 { .mmi;                xor     T2=T2,r8
-               _rotr   r9=$t1,$Sigma1[0] }     // ROTR(e,14)
+               shrp    r8=E,$t1,32+$Sigma1[1]} // ROTR(e,18)
 { .mmi;                and     r10=B,C
                add     T1=T1,H                 // T1=Ch(e,f,g)+h
-               $ADD    X[15]=X[15],s0  };;     // X[i&0xF]+=sigma0(X[(i+1)&0xF])
+               or      $t1=$t1,E           };;
 ___
 $t0="A", $t1="E", $code.=<<___ if ($BITS==64);
 { .mib;                xor     s0=s0,r9                // s0=sigma0(X[(i+1)&0xF])
-               _rotr   r9=$t1,$Sigma1[0] }     // ROTR(e,14)
+               _rotr   r9=$t1,$Sigma1[0]   }   // ROTR(e,14)
 { .mib;                xor     r10=r11,r10
-               xor     T2=T2,r8        };;
+               xor     T2=T2,r8            };;
 { .mib;                xor     s1=s1,r10               // s1=sigma1(X[(i+14)&0xF])
-               add     T1=T1,H         }
+               _rotr   r8=$t1,$Sigma1[1]   }   // ROTR(e,18)
 { .mib;                and     r10=B,C
-               $ADD    X[15]=X[15],s0  };;     // X[i&0xF]+=sigma0(X[(i+1)&0xF])
+               add     T1=T1,H             };; // T1+=H
 ___
 $code.=<<___;
-{ .mmi;                xor     T2=T2,r10               // T2=((a & b) ^ (a & c) ^ (b & c))
-               mov     H=G
-               _rotr   r8=$t1,$Sigma1[1] };;   // ROTR(e,18)
-{ .mmi;                xor     r11=r8,r9
-               $ADD    X[15]=X[15],s1          // X[i&0xF]+=sigma1(X[(i+14)&0xF])
-               _rotr   r9=$t1,$Sigma1[2] }     // ROTR(e,41)
-{ .mmi;                mov     G=F
-               mov     F=E             };;
-{ .mib;                xor     r9=r9,r11               // r9=Sigma1(e)
-               _rotr   r10=$t0,$Sigma0[0] }    // ROTR(a,28)
-{ .mib;                add     T1=T1,K                 // T1=Ch(e,f,g)+h+K512[i]
-               mov     E=D             };;
-{ .mib;                add     T1=T1,r9                // T1+=Sigma1(e)
-               _rotr   r11=$t0,$Sigma0[1] }    // ROTR(a,34)
-{ .mib;                mov     D=C
-               mov     C=B             };;
-{ .mmi;                add     T1=T1,X[15]             // T1+=X[i]
-               xor     r10=r10,r11
-               _rotr   r8=$t0,$Sigma0[2] };;   // ROTR(a,39)
-{ .mmi;                xor     r10=r8,r10              // r10=Sigma0(a)
-               mov     B=A
-               add     A=T1,T2         };;
-{ .mib;                add     E=E,T1
-               add     A=A,r10                 // T2=Maj(a,b,c)+Sigma0(a)
-       br.ctop.sptk    .L_rest };;
+{ .mib;                xor     r9=r9,r8
+               _rotr   r8=$t1,$Sigma1[2]   }   // ROTR(e,41)
+{ .mib;                xor     T2=T2,r10               // T2=((a & b) ^ (a & c) ^ (b & c))
+               add     X[15]=X[15],s0      };; // X[i]+=sigma0(X[i+1])
+{ .mmi;                xor     r9=r9,r8                // Sigma1(e)
+               add     X[15]=X[15],s1          // X[i]+=sigma0(X[i+14])
+               _rotr   r8=$t0,$Sigma0[0]   };; // ROTR(a,28)
+{ .mmi;                add     K=K,X[15]
+               add     T1=T1,r9                // T1+=Sigma1(e)
+               _rotr   r9=$t0,$Sigma0[1]   };; // ROTR(a,34)
+{ .mmi;                add     T1=T1,K                 // T1+=K[i]+X[i]
+               xor     r8=r8,r9
+               _rotr   r9=$t0,$Sigma0[2]   };; // ROTR(a,39)
+{ .mib;                add     D=D,T1
+               mux2    H=X[15],0x44        }   // mov H=X[15] in sha512
+{ .mib;                xor     r8=r8,r9                // Sigma0(a)
+               add     X[15]=T1,T2             // H=T1+Maj(a,b,c)
+       br.ctop.sptk    .L_rest             };;
 .L_rest_end:
 
+{ .mmi;        add     A=A,r8                  };;     // H+=Sigma0(a) from the past
 { .mmi;        add     A_=A_,A
        add     B_=B_,B
        add     C_=C_,C                 }
@@ -590,17 +601,19 @@ $code.=<<___;
 .endp  $func#
 ___
 
-$code =~ s/\`([^\`]*)\`/eval $1/gem;
-$code =~ s/_rotr(\s+)([^=]+)=([^,]+),([0-9]+)/shrp$1$2=$3,$3,$4/gm;
-if ($BITS==64) {
-    $code =~ s/mux2(\s+)\S+/nop.i$1 0x0/gm;
-    $code =~ s/mux1(\s+)\S+/nop.i$1 0x0/gm     if ($big_endian);
-    $code =~ s/(shrp\s+X\[[^=]+)=([^,]+),([^,]+),([1-9]+)/$1=$3,$2,64-$4/gm
+foreach(split($/,$code)) {
+    s/\`([^\`]*)\`/eval $1/gem;
+    s/_rotr(\s+)([^=]+)=([^,]+),([0-9]+)/shrp$1$2=$3,$3,$4/gm;
+    if ($BITS==64) {
+       s/mux2(\s+)([^=]+)=([^,]+),\S+/mov$1 $2=$3/gm;
+       s/mux1(\s+)\S+/nop.i$1 0x0/gm   if ($big_endian);
+       s/(shrp\s+X\[[^=]+)=([^,]+),([^,]+),([1-9]+)/$1=$3,$2,64-$4/gm
                                                if (!$big_endian);
-    $code =~ s/ld1(\s+)X\[\S+/nop.m$1 0x0/gm;
-}
+       s/ld1(\s+)X\[\S+/nop.m$1 0x0/gm;
+    }
 
-print $code;
+    print $_,"\n";
+}
 
 print<<___ if ($BITS==32);
 .align 64
index 6807a2c..b468cfb 100644 (file)
@@ -1,7 +1,7 @@
 #!/usr/bin/env perl
 
 # ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
 # details see http://www.openssl.org/~appro/cryptogams/.
 # ~17%, but it comes for free, because it's same instruction sequence.
 # Improvement coefficients are for aligned input.
 
+# September 2012.
+#
+# Add MIPS[32|64]R2 code (>25% less instructions).
+
 ######################################################################
 # There is a number of MIPS ABI in use, O32 and N32/64 are most
 # widely used. Then there is a new contender: NUBI. It appears that if
@@ -45,7 +49,7 @@
 # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
 # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
 #
-$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64
+$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
 
 if ($flavour =~ /64|n32/i) {
        $PTR_ADD="dadd";        # incidentally works even on n32
@@ -83,6 +87,7 @@ if ($output =~ /512/) {
        $SLL="dsll";            # shift left logical
        $SRL="dsrl";            # shift right logical
        $ADDU="daddu";
+       $ROTR="drotr";
        @Sigma0=(28,34,39);
        @Sigma1=(14,18,41);
        @sigma0=( 7, 1, 8);     # right shift first
@@ -97,6 +102,7 @@ if ($output =~ /512/) {
        $SLL="sll";             # shift left logical
        $SRL="srl";             # shift right logical
        $ADDU="addu";
+       $ROTR="rotr";
        @Sigma0=( 2,13,22);
        @Sigma1=( 6,11,25);
        @sigma0=( 3, 7,18);     # right shift first
@@ -124,6 +130,10 @@ $code.=<<___ if ($i<15);
        ${LD}r  @X[1],`($i+1)*$SZ+$LSB`($inp)
 ___
 $code.=<<___   if (!$big_endian && $i<16 && $SZ==4);
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+       wsbh    @X[0],@X[0]             # byte swap($i)
+       rotr    @X[0],@X[0],16
+#else
        srl     $tmp0,@X[0],24          # byte swap($i)
        srl     $tmp1,@X[0],8
        andi    $tmp2,@X[0],0xFF00
@@ -133,8 +143,13 @@ $code.=<<___       if (!$big_endian && $i<16 && $SZ==4);
        or      @X[0],$tmp0
        or      $tmp1,$tmp2
        or      @X[0],$tmp1
+#endif
 ___
 $code.=<<___   if (!$big_endian && $i<16 && $SZ==8);
+#if defined(_MIPS_ARCH_MIPS64R2)
+       dsbh    @X[0],@X[0]             # byte swap($i)
+       dshd    @X[0],@X[0]
+#else
        ori     $tmp0,$zero,0xFF
        dsll    $tmp2,$tmp0,32
        or      $tmp0,$tmp2             # 0x000000FF000000FF
@@ -153,8 +168,31 @@ $code.=<<___       if (!$big_endian && $i<16 && $SZ==8);
        dsrl    $tmp1,@X[0],32
        dsll    @X[0],32
        or      @X[0],$tmp1
+#endif
 ___
 $code.=<<___;
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+       xor     $tmp2,$f,$g                     # $i
+       $ROTR   $tmp0,$e,@Sigma1[0]
+       $ADDU   $T1,$X[0],$h
+       $ROTR   $tmp1,$e,@Sigma1[1]
+       and     $tmp2,$e
+       $ROTR   $h,$e,@Sigma1[2]
+       xor     $tmp0,$tmp1
+       $ROTR   $tmp1,$a,@Sigma0[0]
+       xor     $tmp2,$g                        # Ch(e,f,g)
+       xor     $tmp0,$h                        # Sigma1(e)
+
+       $ROTR   $h,$a,@Sigma0[1]
+       $ADDU   $T1,$tmp2
+       $LD     $tmp2,`$i*$SZ`($Ktbl)           # K[$i]
+       xor     $h,$tmp1
+       $ROTR   $tmp1,$a,@Sigma0[2]
+       $ADDU   $T1,$tmp0
+       and     $tmp0,$b,$c
+       xor     $h,$tmp1                        # Sigma0(a)
+       xor     $tmp1,$b,$c
+#else
        $ADDU   $T1,$X[0],$h                    # $i
        $SRL    $h,$e,@Sigma1[0]
        xor     $tmp2,$f,$g
@@ -184,16 +222,15 @@ $code.=<<___;
        xor     $h,$tmp1
        $SLL    $tmp1,$a,`$SZ*8-@Sigma0[0]`
        xor     $h,$tmp0
-       $ST     @X[0],`($i%16)*$SZ`($sp)        # offload to ring buffer
+       and     $tmp0,$b,$c
        xor     $h,$tmp1                        # Sigma0(a)
-
-       or      $tmp0,$a,$b
-       and     $tmp1,$a,$b
-       and     $tmp0,$c
-       or      $tmp1,$tmp0                     # Maj(a,b,c)
+       xor     $tmp1,$b,$c
+#endif
+       $ST     @X[0],`($i%16)*$SZ`($sp)        # offload to ring buffer
+       $ADDU   $h,$tmp0
+       and     $tmp1,$a
        $ADDU   $T1,$tmp2                       # +=K[$i]
-       $ADDU   $h,$tmp1
-
+       $ADDU   $h,$tmp1                        # +=Maj(a,b,c)
        $ADDU   $d,$T1
        $ADDU   $h,$T1
 ___
@@ -207,6 +244,20 @@ my $i=@_[0];
 my ($tmp0,$tmp1,$tmp2,$tmp3)=(@X[4],@X[5],@X[6],@X[7]);
 
 $code.=<<___;
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+       $SRL    $tmp2,@X[1],@sigma0[0]          # Xupdate($i)
+       $ROTR   $tmp0,@X[1],@sigma0[1]
+       $ADDU   @X[0],@X[9]                     # +=X[i+9]
+       xor     $tmp2,$tmp0
+       $ROTR   $tmp0,@X[1],@sigma0[2]
+
+       $SRL    $tmp3,@X[14],@sigma1[0]
+       $ROTR   $tmp1,@X[14],@sigma1[1]
+       xor     $tmp2,$tmp0                     # sigma0(X[i+1])
+       $ROTR   $tmp0,@X[14],@sigma1[2]
+       xor     $tmp3,$tmp1
+       $ADDU   @X[0],$tmp2
+#else
        $SRL    $tmp2,@X[1],@sigma0[0]          # Xupdate($i)
        $ADDU   @X[0],@X[9]                     # +=X[i+9]
        $SLL    $tmp1,@X[1],`$SZ*8-@sigma0[2]`
@@ -227,7 +278,7 @@ $code.=<<___;
        xor     $tmp3,$tmp0
        $SRL    $tmp0,@X[14],@sigma1[2]
        xor     $tmp3,$tmp1
-
+#endif
        xor     $tmp3,$tmp0                     # sigma1(X[i+14])
        $ADDU   @X[0],$tmp3
 ___
@@ -242,9 +293,13 @@ $code.=<<___;
 # include <openssl/fipssyms.h>
 #endif
 
+#if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2)
+#define _MIPS_ARCH_MIPS32R2
+#endif
+
 .text
 .set   noat
-#if !defined(__vxworks) || defined(__pic__)
+#if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__))
 .option        pic2
 #endif
 
index 6b44a68..734f3c1 100755 (executable)
@@ -1,7 +1,7 @@
 #!/usr/bin/env perl
 
 # ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
 # details see http://www.openssl.org/~appro/cryptogams/.
@@ -9,8 +9,7 @@
 
 # I let hardware handle unaligned input, except on page boundaries
 # (see below for details). Otherwise straightforward implementation
-# with X vector in register bank. The module is big-endian [which is
-# not big deal as there're no little-endian targets left around].
+# with X vector in register bank.
 
 #                      sha256          |       sha512
 #                      -m64    -m32    |       -m64    -m32
@@ -56,6 +55,8 @@ if ($flavour =~ /64/) {
        $PUSH="stw";
 } else { die "nonsense $flavour"; }
 
+$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
+
 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
@@ -64,7 +65,7 @@ die "can't locate ppc-xlate.pl";
 open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
 
 if ($output =~ /512/) {
-       $func="sha512_block_data_order";
+       $func="sha512_block_ppc";
        $SZ=8;
        @Sigma0=(28,34,39);
        @Sigma1=(14,18,41);
@@ -76,7 +77,7 @@ if ($output =~ /512/) {
        $ROR="rotrdi";
        $SHR="srdi";
 } else {
-       $func="sha256_block_data_order";
+       $func="sha256_block_ppc";
        $SZ=4;
        @Sigma0=( 2,13,22);
        @Sigma1=( 6,11,25);
@@ -110,7 +111,7 @@ $B  ="r9";
 $C  ="r10";
 $D  ="r11";
 $E  ="r12";
-$F  ="r13";    $F="r2" if ($SIZE_T==8);# reassigned to exempt TLS pointer
+$F  =$t1;      $t1 = "r0";     # stay away from "r13";
 $G  ="r14";
 $H  ="r15";
 
@@ -118,24 +119,23 @@ $H  ="r15";
 @X=("r16","r17","r18","r19","r20","r21","r22","r23",
     "r24","r25","r26","r27","r28","r29","r30","r31");
 
-$inp="r31"   # reassigned $inp! aliases with @X[15]
+$inp="r31" if($SZ==4 || $SIZE_T==8);   # reassigned $inp! aliases with @X[15]
 
 sub ROUND_00_15 {
 my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
 $code.=<<___;
-       $LD     $T,`$i*$SZ`($Tbl)
        $ROR    $a0,$e,$Sigma1[0]
        $ROR    $a1,$e,$Sigma1[1]
        and     $t0,$f,$e
-       andc    $t1,$g,$e
-       add     $T,$T,$h
        xor     $a0,$a0,$a1
+       add     $h,$h,$t1
+       andc    $t1,$g,$e
        $ROR    $a1,$a1,`$Sigma1[2]-$Sigma1[1]`
        or      $t0,$t0,$t1             ; Ch(e,f,g)
-       add     $T,$T,@X[$i]
+       add     $h,$h,@X[$i%16]
        xor     $a0,$a0,$a1             ; Sigma1(e)
-       add     $T,$T,$t0
-       add     $T,$T,$a0
+       add     $h,$h,$t0
+       add     $h,$h,$a0
 
        $ROR    $a0,$a,$Sigma0[0]
        $ROR    $a1,$a,$Sigma0[1]
@@ -146,9 +146,14 @@ $code.=<<___;
        xor     $t0,$t0,$t1
        and     $t1,$b,$c
        xor     $a0,$a0,$a1             ; Sigma0(a)
-       add     $d,$d,$T
+       add     $d,$d,$h
        xor     $t0,$t0,$t1             ; Maj(a,b,c)
-       add     $h,$T,$a0
+___
+$code.=<<___ if ($i<15);
+       $LD     $t1,`($i+1)*$SZ`($Tbl)
+___
+$code.=<<___;
+       add     $h,$h,$a0
        add     $h,$h,$t0
 
 ___
@@ -169,10 +174,11 @@ $code.=<<___;
        add     @X[$i],@X[$i],@X[($i+9)%16]
        xor     $a0,$a0,$a1             ; sigma0(X[(i+1)&0x0f])
        xor     $t0,$t0,$t1             ; sigma1(X[(i+14)&0x0f])
+       $LD     $t1,`$i*$SZ`($Tbl)
        add     @X[$i],@X[$i],$a0
        add     @X[$i],@X[$i],$t0
 ___
-&ROUND_00_15($i,$a,$b,$c,$d,$e,$f,$g,$h);
+&ROUND_00_15($i+16,$a,$b,$c,$d,$e,$f,$g,$h);
 }
 
 $code=<<___;
@@ -188,8 +194,6 @@ $func:
 
        $PUSH   $ctx,`$FRAME-$SIZE_T*22`($sp)
 
-       $PUSH   $toc,`$FRAME-$SIZE_T*20`($sp)
-       $PUSH   r13,`$FRAME-$SIZE_T*19`($sp)
        $PUSH   r14,`$FRAME-$SIZE_T*18`($sp)
        $PUSH   r15,`$FRAME-$SIZE_T*17`($sp)
        $PUSH   r16,`$FRAME-$SIZE_T*16`($sp)
@@ -209,7 +213,10 @@ $func:
        $PUSH   r30,`$FRAME-$SIZE_T*2`($sp)
        $PUSH   r31,`$FRAME-$SIZE_T*1`($sp)
        $PUSH   r0,`$FRAME+$LRSAVE`($sp)
+___
 
+if ($SZ==4 || $SIZE_T==8) {
+$code.=<<___;
        $LD     $A,`0*$SZ`($ctx)
        mr      $inp,r4                         ; incarnate $inp
        $LD     $B,`1*$SZ`($ctx)
@@ -219,7 +226,16 @@ $func:
        $LD     $F,`5*$SZ`($ctx)
        $LD     $G,`6*$SZ`($ctx)
        $LD     $H,`7*$SZ`($ctx)
+___
+} else {
+  for ($i=16;$i<32;$i++) {
+    $code.=<<___;
+       lwz     r$i,`$LITTLE_ENDIAN^(4*($i-16))`($ctx)
+___
+  }
+}
 
+$code.=<<___;
        bl      LPICmeup
 LPICedup:
        andi.   r0,$inp,3
@@ -255,6 +271,9 @@ Lunaligned:
 Lcross_page:
        li      $t1,`16*$SZ/4`
        mtctr   $t1
+___
+if ($SZ==4 || $SIZE_T==8) {
+$code.=<<___;
        addi    r20,$sp,$LOCALS                 ; aligned spot below the frame
 Lmemcpy:
        lbz     r16,0($inp)
@@ -268,7 +287,26 @@ Lmemcpy:
        stb     r19,3(r20)
        addi    r20,r20,4
        bdnz    Lmemcpy
+___
+} else {
+$code.=<<___;
+       addi    r12,$sp,$LOCALS                 ; aligned spot below the frame
+Lmemcpy:
+       lbz     r8,0($inp)
+       lbz     r9,1($inp)
+       lbz     r10,2($inp)
+       lbz     r11,3($inp)
+       addi    $inp,$inp,4
+       stb     r8,0(r12)
+       stb     r9,1(r12)
+       stb     r10,2(r12)
+       stb     r11,3(r12)
+       addi    r12,r12,4
+       bdnz    Lmemcpy
+___
+}
 
+$code.=<<___;
        $PUSH   $inp,`$FRAME-$SIZE_T*26`($sp)   ; save real inp
        addi    $t1,$sp,`$LOCALS+16*$SZ`        ; fictitious end pointer
        addi    $inp,$sp,$LOCALS                ; fictitious inp pointer
@@ -283,8 +321,6 @@ Lmemcpy:
 
 Ldone:
        $POP    r0,`$FRAME+$LRSAVE`($sp)
-       $POP    $toc,`$FRAME-$SIZE_T*20`($sp)
-       $POP    r13,`$FRAME-$SIZE_T*19`($sp)
        $POP    r14,`$FRAME-$SIZE_T*18`($sp)
        $POP    r15,`$FRAME-$SIZE_T*17`($sp)
        $POP    r16,`$FRAME-$SIZE_T*16`($sp)
@@ -309,27 +345,48 @@ Ldone:
        .long   0
        .byte   0,12,4,1,0x80,18,3,0
        .long   0
+___
 
+if ($SZ==4 || $SIZE_T==8) {
+$code.=<<___;
 .align 4
 Lsha2_block_private:
+       $LD     $t1,0($Tbl)
 ___
 for($i=0;$i<16;$i++) {
-$code.=<<___ if ($SZ==4);
+$code.=<<___ if ($SZ==4 && !$LITTLE_ENDIAN);
        lwz     @X[$i],`$i*$SZ`($inp)
 ___
+$code.=<<___ if ($SZ==4 && $LITTLE_ENDIAN);
+       lwz     $a0,`$i*$SZ`($inp)
+       rotlwi  @X[$i],$a0,8
+       rlwimi  @X[$i],$a0,24,0,7
+       rlwimi  @X[$i],$a0,24,16,23
+___
 # 64-bit loads are split to 2x32-bit ones, as CPU can't handle
 # unaligned 64-bit loads, only 32-bit ones...
-$code.=<<___ if ($SZ==8);
+$code.=<<___ if ($SZ==8 && !$LITTLE_ENDIAN);
        lwz     $t0,`$i*$SZ`($inp)
        lwz     @X[$i],`$i*$SZ+4`($inp)
        insrdi  @X[$i],$t0,32,0
 ___
+$code.=<<___ if ($SZ==8 && $LITTLE_ENDIAN);
+       lwz     $a0,`$i*$SZ`($inp)
+        lwz    $a1,`$i*$SZ+4`($inp)
+       rotlwi  $t0,$a0,8
+        rotlwi @X[$i],$a1,8
+       rlwimi  $t0,$a0,24,0,7
+        rlwimi @X[$i],$a1,24,0,7
+       rlwimi  $t0,$a0,24,16,23
+        rlwimi @X[$i],$a1,24,16,23
+       insrdi  @X[$i],$t0,32,0
+___
        &ROUND_00_15($i,@V);
        unshift(@V,pop(@V));
 }
 $code.=<<___;
-       li      $T,`$rounds/16-1`
-       mtctr   $T
+       li      $t0,`$rounds/16-1`
+       mtctr   $t0
 .align 4
 Lrounds:
        addi    $Tbl,$Tbl,`16*$SZ`
@@ -377,7 +434,282 @@ $code.=<<___;
        blr
        .long   0
        .byte   0,12,0x14,0,0,0,0,0
+.size  $func,.-$func
+___
+} else {
+########################################################################
+# SHA512 for PPC32, X vector is off-loaded to stack...
+#
+#                      |       sha512
+#                      |       -m32
+# ----------------------+-----------------------
+# PPC74x0,gcc-4.0.1    |       +48%
+# POWER6,gcc-4.4.6     |       +124%(*)
+# POWER7,gcc-4.4.6     |       +79%(*)
+# e300,gcc-4.1.0       |       +167%
+#
+# (*)  ~1/3 of -m64 result [and ~20% better than -m32 code generated
+#      by xlc-12.1]
+
+my $XOFF=$LOCALS;
+
+my @V=map("r$_",(16..31));     # A..H
+
+my ($s0,$s1,$t0,$t1,$t2,$t3,$a0,$a1,$a2,$a3)=map("r$_",(0,5,6,8..12,14,15));
+my ($x0,$x1)=("r3","r4");      # zaps $ctx and $inp
+
+sub ROUND_00_15_ppc32 {
+my ($i,        $ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo,
+       $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo)=@_;
+
+$code.=<<___;
+       lwz     $t2,`$SZ*($i%16)+($LITTLE_ENDIAN^4)`($Tbl)
+        xor    $a0,$flo,$glo
+       lwz     $t3,`$SZ*($i%16)+($LITTLE_ENDIAN^0)`($Tbl)
+        xor    $a1,$fhi,$ghi
+       addc    $hlo,$hlo,$t0                   ; h+=x[i]
+       stw     $t0,`$XOFF+0+$SZ*($i%16)`($sp)  ; save x[i]
+
+       srwi    $s0,$elo,$Sigma1[0]
+       srwi    $s1,$ehi,$Sigma1[0]
+        and    $a0,$a0,$elo
+       adde    $hhi,$hhi,$t1
+        and    $a1,$a1,$ehi
+       stw     $t1,`$XOFF+4+$SZ*($i%16)`($sp)
+       srwi    $t0,$elo,$Sigma1[1]
+       srwi    $t1,$ehi,$Sigma1[1]
+        addc   $hlo,$hlo,$t2                   ; h+=K512[i]
+       insrwi  $s0,$ehi,$Sigma1[0],0
+       insrwi  $s1,$elo,$Sigma1[0],0
+        xor    $a0,$a0,$glo                    ; Ch(e,f,g)
+        adde   $hhi,$hhi,$t3
+        xor    $a1,$a1,$ghi
+       insrwi  $t0,$ehi,$Sigma1[1],0
+       insrwi  $t1,$elo,$Sigma1[1],0
+        addc   $hlo,$hlo,$a0                   ; h+=Ch(e,f,g)
+       srwi    $t2,$ehi,$Sigma1[2]-32
+       srwi    $t3,$elo,$Sigma1[2]-32
+       xor     $s0,$s0,$t0
+       xor     $s1,$s1,$t1
+       insrwi  $t2,$elo,$Sigma1[2]-32,0
+       insrwi  $t3,$ehi,$Sigma1[2]-32,0
+        xor    $a0,$alo,$blo                   ; a^b, b^c in next round
+        adde   $hhi,$hhi,$a1
+        xor    $a1,$ahi,$bhi
+       xor     $s0,$s0,$t2                     ; Sigma1(e)
+       xor     $s1,$s1,$t3
+
+       srwi    $t0,$alo,$Sigma0[0]
+        and    $a2,$a2,$a0
+        addc   $hlo,$hlo,$s0                   ; h+=Sigma1(e)
+        and    $a3,$a3,$a1
+       srwi    $t1,$ahi,$Sigma0[0]
+       srwi    $s0,$ahi,$Sigma0[1]-32
+        adde   $hhi,$hhi,$s1
+       srwi    $s1,$alo,$Sigma0[1]-32
+       insrwi  $t0,$ahi,$Sigma0[0],0
+       insrwi  $t1,$alo,$Sigma0[0],0
+        xor    $a2,$a2,$blo                    ; Maj(a,b,c)
+        addc   $dlo,$dlo,$hlo                  ; d+=h
+        xor    $a3,$a3,$bhi
+       insrwi  $s0,$alo,$Sigma0[1]-32,0
+       insrwi  $s1,$ahi,$Sigma0[1]-32,0
+        adde   $dhi,$dhi,$hhi
+       srwi    $t2,$ahi,$Sigma0[2]-32
+       srwi    $t3,$alo,$Sigma0[2]-32
+       xor     $s0,$s0,$t0
+        addc   $hlo,$hlo,$a2                   ; h+=Maj(a,b,c)
+       xor     $s1,$s1,$t1
+       insrwi  $t2,$alo,$Sigma0[2]-32,0
+       insrwi  $t3,$ahi,$Sigma0[2]-32,0
+        adde   $hhi,$hhi,$a3
+___
+$code.=<<___ if ($i>=15);
+       lwz     $t0,`$XOFF+0+$SZ*(($i+2)%16)`($sp)
+       lwz     $t1,`$XOFF+4+$SZ*(($i+2)%16)`($sp)
+___
+$code.=<<___ if ($i<15 && !$LITTLE_ENDIAN);
+       lwz     $t1,`$SZ*($i+1)+0`($inp)
+       lwz     $t0,`$SZ*($i+1)+4`($inp)
 ___
+$code.=<<___ if ($i<15 && $LITTLE_ENDIAN);
+       lwz     $a2,`$SZ*($i+1)+0`($inp)
+        lwz    $a3,`$SZ*($i+1)+4`($inp)
+       rotlwi  $t1,$a2,8
+        rotlwi $t0,$a3,8
+       rlwimi  $t1,$a2,24,0,7
+        rlwimi $t0,$a3,24,0,7
+       rlwimi  $t1,$a2,24,16,23
+        rlwimi $t0,$a3,24,16,23
+___
+$code.=<<___;
+       xor     $s0,$s0,$t2                     ; Sigma0(a)
+       xor     $s1,$s1,$t3
+       addc    $hlo,$hlo,$s0                   ; h+=Sigma0(a)
+       adde    $hhi,$hhi,$s1
+___
+$code.=<<___ if ($i==15);
+       lwz     $x0,`$XOFF+0+$SZ*(($i+1)%16)`($sp)
+       lwz     $x1,`$XOFF+4+$SZ*(($i+1)%16)`($sp)
+___
+}
+sub ROUND_16_xx_ppc32 {
+my ($i,        $ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo,
+       $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo)=@_;
+
+$code.=<<___;
+       srwi    $s0,$t0,$sigma0[0]
+       srwi    $s1,$t1,$sigma0[0]
+       srwi    $t2,$t0,$sigma0[1]
+       srwi    $t3,$t1,$sigma0[1]
+       insrwi  $s0,$t1,$sigma0[0],0
+       insrwi  $s1,$t0,$sigma0[0],0
+       srwi    $a0,$t0,$sigma0[2]
+       insrwi  $t2,$t1,$sigma0[1],0
+       insrwi  $t3,$t0,$sigma0[1],0
+       insrwi  $a0,$t1,$sigma0[2],0
+       xor     $s0,$s0,$t2
+        lwz    $t2,`$XOFF+0+$SZ*(($i+14)%16)`($sp)
+       srwi    $a1,$t1,$sigma0[2]
+       xor     $s1,$s1,$t3
+        lwz    $t3,`$XOFF+4+$SZ*(($i+14)%16)`($sp)
+       xor     $a0,$a0,$s0
+        srwi   $s0,$t2,$sigma1[0]
+       xor     $a1,$a1,$s1
+        srwi   $s1,$t3,$sigma1[0]
+       addc    $x0,$x0,$a0                     ; x[i]+=sigma0(x[i+1])
+        srwi   $a0,$t3,$sigma1[1]-32
+       insrwi  $s0,$t3,$sigma1[0],0
+       insrwi  $s1,$t2,$sigma1[0],0
+       adde    $x1,$x1,$a1
+        srwi   $a1,$t2,$sigma1[1]-32
+
+       insrwi  $a0,$t2,$sigma1[1]-32,0
+       srwi    $t2,$t2,$sigma1[2]
+       insrwi  $a1,$t3,$sigma1[1]-32,0
+       insrwi  $t2,$t3,$sigma1[2],0
+       xor     $s0,$s0,$a0
+        lwz    $a0,`$XOFF+0+$SZ*(($i+9)%16)`($sp)
+       srwi    $t3,$t3,$sigma1[2]
+       xor     $s1,$s1,$a1
+        lwz    $a1,`$XOFF+4+$SZ*(($i+9)%16)`($sp)
+       xor     $s0,$s0,$t2
+        addc   $x0,$x0,$a0                     ; x[i]+=x[i+9]
+       xor     $s1,$s1,$t3
+        adde   $x1,$x1,$a1
+       addc    $x0,$x0,$s0                     ; x[i]+=sigma1(x[i+14])
+       adde    $x1,$x1,$s1
+___
+       ($t0,$t1,$x0,$x1) = ($x0,$x1,$t0,$t1);
+       &ROUND_00_15_ppc32(@_);
+}
+
+$code.=<<___;
+.align 4
+Lsha2_block_private:
+___
+$code.=<<___ if (!$LITTLE_ENDIAN);
+       lwz     $t1,0($inp)
+       xor     $a2,@V[3],@V[5]         ; B^C, magic seed
+       lwz     $t0,4($inp)
+       xor     $a3,@V[2],@V[4]
+___
+$code.=<<___ if ($LITTLE_ENDIAN);
+       lwz     $a1,0($inp)
+       xor     $a2,@V[3],@V[5]         ; B^C, magic seed
+       lwz     $a0,4($inp)
+       xor     $a3,@V[2],@V[4]
+       rotlwi  $t1,$a1,8
+        rotlwi $t0,$a0,8
+       rlwimi  $t1,$a1,24,0,7
+        rlwimi $t0,$a0,24,0,7
+       rlwimi  $t1,$a1,24,16,23
+        rlwimi $t0,$a0,24,16,23
+___
+for($i=0;$i<16;$i++) {
+       &ROUND_00_15_ppc32($i,@V);
+       unshift(@V,pop(@V));    unshift(@V,pop(@V));
+       ($a0,$a1,$a2,$a3) = ($a2,$a3,$a0,$a1);
+}
+$code.=<<___;
+       li      $a0,`$rounds/16-1`
+       mtctr   $a0
+.align 4
+Lrounds:
+       addi    $Tbl,$Tbl,`16*$SZ`
+___
+for(;$i<32;$i++) {
+       &ROUND_16_xx_ppc32($i,@V);
+       unshift(@V,pop(@V));    unshift(@V,pop(@V));
+       ($a0,$a1,$a2,$a3) = ($a2,$a3,$a0,$a1);
+}
+$code.=<<___;
+       bdnz-   Lrounds
+
+       $POP    $ctx,`$FRAME-$SIZE_T*22`($sp)
+       $POP    $inp,`$FRAME-$SIZE_T*23`($sp)   ; inp pointer
+       $POP    $num,`$FRAME-$SIZE_T*24`($sp)   ; end pointer
+       subi    $Tbl,$Tbl,`($rounds-16)*$SZ`    ; rewind Tbl
+
+       lwz     $t0,`$LITTLE_ENDIAN^0`($ctx)
+       lwz     $t1,`$LITTLE_ENDIAN^4`($ctx)
+       lwz     $t2,`$LITTLE_ENDIAN^8`($ctx)
+       lwz     $t3,`$LITTLE_ENDIAN^12`($ctx)
+       lwz     $a0,`$LITTLE_ENDIAN^16`($ctx)
+       lwz     $a1,`$LITTLE_ENDIAN^20`($ctx)
+       lwz     $a2,`$LITTLE_ENDIAN^24`($ctx)
+       addc    @V[1],@V[1],$t1
+       lwz     $a3,`$LITTLE_ENDIAN^28`($ctx)
+       adde    @V[0],@V[0],$t0
+       lwz     $t0,`$LITTLE_ENDIAN^32`($ctx)
+       addc    @V[3],@V[3],$t3
+       lwz     $t1,`$LITTLE_ENDIAN^36`($ctx)
+       adde    @V[2],@V[2],$t2
+       lwz     $t2,`$LITTLE_ENDIAN^40`($ctx)
+       addc    @V[5],@V[5],$a1
+       lwz     $t3,`$LITTLE_ENDIAN^44`($ctx)
+       adde    @V[4],@V[4],$a0
+       lwz     $a0,`$LITTLE_ENDIAN^48`($ctx)
+       addc    @V[7],@V[7],$a3
+       lwz     $a1,`$LITTLE_ENDIAN^52`($ctx)
+       adde    @V[6],@V[6],$a2
+       lwz     $a2,`$LITTLE_ENDIAN^56`($ctx)
+       addc    @V[9],@V[9],$t1
+       lwz     $a3,`$LITTLE_ENDIAN^60`($ctx)
+       adde    @V[8],@V[8],$t0
+       stw     @V[0],`$LITTLE_ENDIAN^0`($ctx)
+       stw     @V[1],`$LITTLE_ENDIAN^4`($ctx)
+       addc    @V[11],@V[11],$t3
+       stw     @V[2],`$LITTLE_ENDIAN^8`($ctx)
+       stw     @V[3],`$LITTLE_ENDIAN^12`($ctx)
+       adde    @V[10],@V[10],$t2
+       stw     @V[4],`$LITTLE_ENDIAN^16`($ctx)
+       stw     @V[5],`$LITTLE_ENDIAN^20`($ctx)
+       addc    @V[13],@V[13],$a1
+       stw     @V[6],`$LITTLE_ENDIAN^24`($ctx)
+       stw     @V[7],`$LITTLE_ENDIAN^28`($ctx)
+       adde    @V[12],@V[12],$a0
+       stw     @V[8],`$LITTLE_ENDIAN^32`($ctx)
+       stw     @V[9],`$LITTLE_ENDIAN^36`($ctx)
+       addc    @V[15],@V[15],$a3
+       stw     @V[10],`$LITTLE_ENDIAN^40`($ctx)
+       stw     @V[11],`$LITTLE_ENDIAN^44`($ctx)
+       adde    @V[14],@V[14],$a2
+       stw     @V[12],`$LITTLE_ENDIAN^48`($ctx)
+       stw     @V[13],`$LITTLE_ENDIAN^52`($ctx)
+       stw     @V[14],`$LITTLE_ENDIAN^56`($ctx)
+       stw     @V[15],`$LITTLE_ENDIAN^60`($ctx)
+
+       addi    $inp,$inp,`16*$SZ`              ; advance inp
+       $PUSH   $inp,`$FRAME-$SIZE_T*23`($sp)
+       $UCMP   $inp,$num
+       bne     Lsha2_block_private
+       blr
+       .long   0
+       .byte   0,12,0x14,0,0,0,0,0
+.size  $func,.-$func
+___
+}
 
 # Ugly hack here, because PPC assembler syntax seem to vary too
 # much from platforms to platform...
@@ -395,46 +727,46 @@ LPICmeup:
        .space  `64-9*4`
 ___
 $code.=<<___ if ($SZ==8);
-       .long   0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd
-       .long   0xb5c0fbcf,0xec4d3b2f,0xe9b5dba5,0x8189dbbc
-       .long   0x3956c25b,0xf348b538,0x59f111f1,0xb605d019
-       .long   0x923f82a4,0xaf194f9b,0xab1c5ed5,0xda6d8118
-       .long   0xd807aa98,0xa3030242,0x12835b01,0x45706fbe
-       .long   0x243185be,0x4ee4b28c,0x550c7dc3,0xd5ffb4e2
-       .long   0x72be5d74,0xf27b896f,0x80deb1fe,0x3b1696b1
-       .long   0x9bdc06a7,0x25c71235,0xc19bf174,0xcf692694
-       .long   0xe49b69c1,0x9ef14ad2,0xefbe4786,0x384f25e3
-       .long   0x0fc19dc6,0x8b8cd5b5,0x240ca1cc,0x77ac9c65
-       .long   0x2de92c6f,0x592b0275,0x4a7484aa,0x6ea6e483
-       .long   0x5cb0a9dc,0xbd41fbd4,0x76f988da,0x831153b5
-       .long   0x983e5152,0xee66dfab,0xa831c66d,0x2db43210
-       .long   0xb00327c8,0x98fb213f,0xbf597fc7,0xbeef0ee4
-       .long   0xc6e00bf3,0x3da88fc2,0xd5a79147,0x930aa725
-       .long   0x06ca6351,0xe003826f,0x14292967,0x0a0e6e70
-       .long   0x27b70a85,0x46d22ffc,0x2e1b2138,0x5c26c926
-       .long   0x4d2c6dfc,0x5ac42aed,0x53380d13,0x9d95b3df
-       .long   0x650a7354,0x8baf63de,0x766a0abb,0x3c77b2a8
-       .long   0x81c2c92e,0x47edaee6,0x92722c85,0x1482353b
-       .long   0xa2bfe8a1,0x4cf10364,0xa81a664b,0xbc423001
-       .long   0xc24b8b70,0xd0f89791,0xc76c51a3,0x0654be30
-       .long   0xd192e819,0xd6ef5218,0xd6990624,0x5565a910
-       .long   0xf40e3585,0x5771202a,0x106aa070,0x32bbd1b8
-       .long   0x19a4c116,0xb8d2d0c8,0x1e376c08,0x5141ab53
-       .long   0x2748774c,0xdf8eeb99,0x34b0bcb5,0xe19b48a8
-       .long   0x391c0cb3,0xc5c95a63,0x4ed8aa4a,0xe3418acb
-       .long   0x5b9cca4f,0x7763e373,0x682e6ff3,0xd6b2b8a3
-       .long   0x748f82ee,0x5defb2fc,0x78a5636f,0x43172f60
-       .long   0x84c87814,0xa1f0ab72,0x8cc70208,0x1a6439ec
-       .long   0x90befffa,0x23631e28,0xa4506ceb,0xde82bde9
-       .long   0xbef9a3f7,0xb2c67915,0xc67178f2,0xe372532b
-       .long   0xca273ece,0xea26619c,0xd186b8c7,0x21c0c207
-       .long   0xeada7dd6,0xcde0eb1e,0xf57d4f7f,0xee6ed178
-       .long   0x06f067aa,0x72176fba,0x0a637dc5,0xa2c898a6
-       .long   0x113f9804,0xbef90dae,0x1b710b35,0x131c471b
-       .long   0x28db77f5,0x23047d84,0x32caab7b,0x40c72493
-       .long   0x3c9ebe0a,0x15c9bebc,0x431d67c4,0x9c100d4c
-       .long   0x4cc5d4be,0xcb3e42b6,0x597f299c,0xfc657e2a
-       .long   0x5fcb6fab,0x3ad6faec,0x6c44198c,0x4a475817
+       .quad   0x428a2f98d728ae22,0x7137449123ef65cd
+       .quad   0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+       .quad   0x3956c25bf348b538,0x59f111f1b605d019
+       .quad   0x923f82a4af194f9b,0xab1c5ed5da6d8118
+       .quad   0xd807aa98a3030242,0x12835b0145706fbe
+       .quad   0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+       .quad   0x72be5d74f27b896f,0x80deb1fe3b1696b1
+       .quad   0x9bdc06a725c71235,0xc19bf174cf692694
+       .quad   0xe49b69c19ef14ad2,0xefbe4786384f25e3
+       .quad   0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+       .quad   0x2de92c6f592b0275,0x4a7484aa6ea6e483
+       .quad   0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+       .quad   0x983e5152ee66dfab,0xa831c66d2db43210
+       .quad   0xb00327c898fb213f,0xbf597fc7beef0ee4
+       .quad   0xc6e00bf33da88fc2,0xd5a79147930aa725
+       .quad   0x06ca6351e003826f,0x142929670a0e6e70
+       .quad   0x27b70a8546d22ffc,0x2e1b21385c26c926
+       .quad   0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+       .quad   0x650a73548baf63de,0x766a0abb3c77b2a8
+       .quad   0x81c2c92e47edaee6,0x92722c851482353b
+       .quad   0xa2bfe8a14cf10364,0xa81a664bbc423001
+       .quad   0xc24b8b70d0f89791,0xc76c51a30654be30
+       .quad   0xd192e819d6ef5218,0xd69906245565a910
+       .quad   0xf40e35855771202a,0x106aa07032bbd1b8
+       .quad   0x19a4c116b8d2d0c8,0x1e376c085141ab53
+       .quad   0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+       .quad   0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+       .quad   0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+       .quad   0x748f82ee5defb2fc,0x78a5636f43172f60
+       .quad   0x84c87814a1f0ab72,0x8cc702081a6439ec
+       .quad   0x90befffa23631e28,0xa4506cebde82bde9
+       .quad   0xbef9a3f7b2c67915,0xc67178f2e372532b
+       .quad   0xca273eceea26619c,0xd186b8c721c0c207
+       .quad   0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+       .quad   0x06f067aa72176fba,0x0a637dc5a2c898a6
+       .quad   0x113f9804bef90dae,0x1b710b35131c471b
+       .quad   0x28db77f523047d84,0x32caab7b40c72493
+       .quad   0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+       .quad   0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+       .quad   0x5fcb6fab3ad6faec,0x6c44198c4a475817
 ___
 $code.=<<___ if ($SZ==4);
        .long   0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
index 5857407..5a9c15d 100644 (file)
@@ -5,6 +5,8 @@
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
 # details see http://www.openssl.org/~appro/cryptogams/.
+#
+# Hardware SPARC T4 support by David S. Miller <davem@davemloft.net>.
 # ====================================================================
 
 # SHA256 performance improvement over compiler generated code varies
 #      loads are always slower than one 64-bit load. Once again this
 #      is unlike pre-T1 UltraSPARC, where, if scheduled appropriately,
 #      2x32-bit loads can be as fast as 1x64-bit ones.
-
-$bits=32;
-for (@ARGV)    { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
-if ($bits==64) { $bias=2047; $frame=192; }
-else           { $bias=0;    $frame=112; }
+#
+# SPARC T4 SHA256/512 hardware achieves 3.17/2.01 cycles per byte,
+# which is 9.3x/11.1x faster than software. Multi-process benchmark
+# saturates at 11.5x single-process result on 8-core processor, or
+# ~11/16GBps per 2.85GHz socket.
 
 $output=shift;
 open STDOUT,">$output";
@@ -170,6 +172,7 @@ $code.=<<___ if ($i==0);
        ld      [$inp+16],%l4
        ld      [$inp+20],%l5
        ld      [$inp+24],%l6
+       cmp     $tmp31,0
        ld      [$inp+28],%l7
 ___
 $code.=<<___ if ($i<15);
@@ -182,29 +185,29 @@ $code.=<<___ if ($i<15);
        or      @pair[1],$tmp2,$tmp2
        `"ld    [$inp+".eval(32+4+$i*8)."],@pair[1]"    if ($i<12)`
        add     $h,$tmp2,$T1
-       $ST     $tmp2,[%sp+`$bias+$frame+$i*$SZ`]
+       $ST     $tmp2,[%sp+STACK_BIAS+STACK_FRAME+`$i*$SZ`]
 ___
 $code.=<<___ if ($i==12);
-       brnz,a  $tmp31,.+8
+       bnz,a,pn        %icc,.+8
        ld      [$inp+128],%l0
 ___
 $code.=<<___ if ($i==15);
-       ld      [%sp+`$bias+$frame+(($i+1+1)%16)*$SZ+0`],%l2
+       ld      [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+1)%16)*$SZ+0`],%l2
        sllx    @pair[1],$tmp31,$tmp2   ! Xload($i)
        add     $tmp31,32,$tmp0
-       ld      [%sp+`$bias+$frame+(($i+1+1)%16)*$SZ+4`],%l3
+       ld      [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+1)%16)*$SZ+4`],%l3
        sllx    @pair[0],$tmp0,$tmp1
-       ld      [%sp+`$bias+$frame+(($i+1+9)%16)*$SZ+0`],%l4
+       ld      [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+9)%16)*$SZ+0`],%l4
        srlx    @pair[2],$tmp32,@pair[1]
        or      $tmp1,$tmp2,$tmp2
-       ld      [%sp+`$bias+$frame+(($i+1+9)%16)*$SZ+4`],%l5
+       ld      [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+9)%16)*$SZ+4`],%l5
        or      @pair[1],$tmp2,$tmp2
-       ld      [%sp+`$bias+$frame+(($i+1+14)%16)*$SZ+0`],%l6
+       ld      [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+14)%16)*$SZ+0`],%l6
        add     $h,$tmp2,$T1
-       $ST     $tmp2,[%sp+`$bias+$frame+$i*$SZ`]
-       ld      [%sp+`$bias+$frame+(($i+1+14)%16)*$SZ+4`],%l7
-       ld      [%sp+`$bias+$frame+(($i+1+0)%16)*$SZ+0`],%l0
-       ld      [%sp+`$bias+$frame+(($i+1+0)%16)*$SZ+4`],%l1
+       $ST     $tmp2,[%sp+STACK_BIAS+STACK_FRAME+`$i*$SZ`]
+       ld      [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+14)%16)*$SZ+4`],%l7
+       ld      [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+0)%16)*$SZ+0`],%l0
+       ld      [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+0)%16)*$SZ+4`],%l1
 ___
 } if ($SZ==8);
 
@@ -340,9 +343,9 @@ $code.=<<___;
        or      %l3,$tmp0,$tmp0
 
        srlx    $tmp0,@sigma0[0],$T1
-       ld      [%sp+`$bias+$frame+(($i+1+1)%16)*$SZ+0`],%l2
+       ld      [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+1)%16)*$SZ+0`],%l2
        sllx    $tmp0,`64-@sigma0[2]`,$tmp1
-       ld      [%sp+`$bias+$frame+(($i+1+1)%16)*$SZ+4`],%l3
+       ld      [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+1)%16)*$SZ+4`],%l3
        srlx    $tmp0,@sigma0[1],$tmp0
        xor     $tmp1,$T1,$T1
        sllx    $tmp1,`@sigma0[2]-@sigma0[1]`,$tmp1
@@ -354,9 +357,9 @@ $code.=<<___;
        or      %l7,$tmp2,$tmp2
 
        srlx    $tmp2,@sigma1[0],$tmp1
-       ld      [%sp+`$bias+$frame+(($i+1+14)%16)*$SZ+0`],%l6
+       ld      [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+14)%16)*$SZ+0`],%l6
        sllx    $tmp2,`64-@sigma1[2]`,$tmp0
-       ld      [%sp+`$bias+$frame+(($i+1+14)%16)*$SZ+4`],%l7
+       ld      [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+14)%16)*$SZ+4`],%l7
        srlx    $tmp2,@sigma1[1],$tmp2
        xor     $tmp0,$tmp1,$tmp1
        sllx    $tmp0,`@sigma1[2]-@sigma1[1]`,$tmp0
@@ -365,27 +368,30 @@ $code.=<<___;
        xor     $tmp0,$tmp1,$tmp1
        sllx    %l4,32,$tmp0
        xor     $tmp2,$tmp1,$tmp1       ! sigma1(X[$i+14])
-       ld      [%sp+`$bias+$frame+(($i+1+9)%16)*$SZ+0`],%l4
+       ld      [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+9)%16)*$SZ+0`],%l4
        or      %l5,$tmp0,$tmp0
-       ld      [%sp+`$bias+$frame+(($i+1+9)%16)*$SZ+4`],%l5
+       ld      [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+9)%16)*$SZ+4`],%l5
 
        sllx    %l0,32,$tmp2
        add     $tmp1,$T1,$T1
-       ld      [%sp+`$bias+$frame+(($i+1+0)%16)*$SZ+0`],%l0
+       ld      [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+0)%16)*$SZ+0`],%l0
        or      %l1,$tmp2,$tmp2
        add     $tmp0,$T1,$T1           ! +=X[$i+9]
-       ld      [%sp+`$bias+$frame+(($i+1+0)%16)*$SZ+4`],%l1
+       ld      [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+0)%16)*$SZ+4`],%l1
        add     $tmp2,$T1,$T1           ! +=X[$i]
-       $ST     $T1,[%sp+`$bias+$frame+($i%16)*$SZ`]
+       $ST     $T1,[%sp+STACK_BIAS+STACK_FRAME+`($i%16)*$SZ`]
 ___
     &BODY_00_15(@_);
 } if ($SZ==8);
 
-$code.=<<___ if ($bits==64);
+$code.=<<___;
+#include "sparc_arch.h"
+
+#ifdef __arch64__
 .register      %g2,#scratch
 .register      %g3,#scratch
-___
-$code.=<<___;
+#endif
+
 .section       ".text",#alloc,#execinstr
 
 .align 64
@@ -457,9 +463,203 @@ ___
 }
 $code.=<<___;
 .size  K${label},.-K${label}
+
+#ifdef __PIC__
+SPARC_PIC_THUNK(%g1)
+#endif
+
 .globl sha${label}_block_data_order
+.align 32
 sha${label}_block_data_order:
-       save    %sp,`-$frame-$locals`,%sp
+       SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
+       ld      [%g1+4],%g1             ! OPENSSL_sparcv9cap_P[1]
+
+       andcc   %g1, CFR_SHA${label}, %g0
+       be      .Lsoftware
+       nop
+___
+$code.=<<___ if ($SZ==8);              # SHA512
+       ldd     [%o0 + 0x00], %f0       ! load context
+       ldd     [%o0 + 0x08], %f2
+       ldd     [%o0 + 0x10], %f4
+       ldd     [%o0 + 0x18], %f6
+       ldd     [%o0 + 0x20], %f8
+       ldd     [%o0 + 0x28], %f10
+       andcc   %o1, 0x7, %g0
+       ldd     [%o0 + 0x30], %f12
+       bne,pn  %icc, .Lhwunaligned
+        ldd    [%o0 + 0x38], %f14
+
+.Lhwaligned_loop:
+       ldd     [%o1 + 0x00], %f16
+       ldd     [%o1 + 0x08], %f18
+       ldd     [%o1 + 0x10], %f20
+       ldd     [%o1 + 0x18], %f22
+       ldd     [%o1 + 0x20], %f24
+       ldd     [%o1 + 0x28], %f26
+       ldd     [%o1 + 0x30], %f28
+       ldd     [%o1 + 0x38], %f30
+       ldd     [%o1 + 0x40], %f32
+       ldd     [%o1 + 0x48], %f34
+       ldd     [%o1 + 0x50], %f36
+       ldd     [%o1 + 0x58], %f38
+       ldd     [%o1 + 0x60], %f40
+       ldd     [%o1 + 0x68], %f42
+       ldd     [%o1 + 0x70], %f44
+       subcc   %o2, 1, %o2             ! done yet?
+       ldd     [%o1 + 0x78], %f46
+       add     %o1, 0x80, %o1
+       prefetch [%o1 + 63], 20
+       prefetch [%o1 + 64+63], 20
+
+       .word   0x81b02860              ! SHA512
+
+       bne,pt  SIZE_T_CC, .Lhwaligned_loop
+       nop
+
+.Lhwfinish:
+       std     %f0, [%o0 + 0x00]       ! store context
+       std     %f2, [%o0 + 0x08]
+       std     %f4, [%o0 + 0x10]
+       std     %f6, [%o0 + 0x18]
+       std     %f8, [%o0 + 0x20]
+       std     %f10, [%o0 + 0x28]
+       std     %f12, [%o0 + 0x30]
+       retl
+        std    %f14, [%o0 + 0x38]
+
+.align 16
+.Lhwunaligned:
+       alignaddr %o1, %g0, %o1
+
+       ldd     [%o1 + 0x00], %f18
+.Lhwunaligned_loop:
+       ldd     [%o1 + 0x08], %f20
+       ldd     [%o1 + 0x10], %f22
+       ldd     [%o1 + 0x18], %f24
+       ldd     [%o1 + 0x20], %f26
+       ldd     [%o1 + 0x28], %f28
+       ldd     [%o1 + 0x30], %f30
+       ldd     [%o1 + 0x38], %f32
+       ldd     [%o1 + 0x40], %f34
+       ldd     [%o1 + 0x48], %f36
+       ldd     [%o1 + 0x50], %f38
+       ldd     [%o1 + 0x58], %f40
+       ldd     [%o1 + 0x60], %f42
+       ldd     [%o1 + 0x68], %f44
+       ldd     [%o1 + 0x70], %f46
+       ldd     [%o1 + 0x78], %f48
+       subcc   %o2, 1, %o2             ! done yet?
+       ldd     [%o1 + 0x80], %f50
+       add     %o1, 0x80, %o1
+       prefetch [%o1 + 63], 20
+       prefetch [%o1 + 64+63], 20
+
+       faligndata %f18, %f20, %f16
+       faligndata %f20, %f22, %f18
+       faligndata %f22, %f24, %f20
+       faligndata %f24, %f26, %f22
+       faligndata %f26, %f28, %f24
+       faligndata %f28, %f30, %f26
+       faligndata %f30, %f32, %f28
+       faligndata %f32, %f34, %f30
+       faligndata %f34, %f36, %f32
+       faligndata %f36, %f38, %f34
+       faligndata %f38, %f40, %f36
+       faligndata %f40, %f42, %f38
+       faligndata %f42, %f44, %f40
+       faligndata %f44, %f46, %f42
+       faligndata %f46, %f48, %f44
+       faligndata %f48, %f50, %f46
+
+       .word   0x81b02860              ! SHA512
+
+       bne,pt  SIZE_T_CC, .Lhwunaligned_loop
+       for     %f50, %f50, %f18        ! %f18=%f50
+
+       ba      .Lhwfinish
+       nop
+___
+$code.=<<___ if ($SZ==4);              # SHA256
+       ld      [%o0 + 0x00], %f0
+       ld      [%o0 + 0x04], %f1
+       ld      [%o0 + 0x08], %f2
+       ld      [%o0 + 0x0c], %f3
+       ld      [%o0 + 0x10], %f4
+       ld      [%o0 + 0x14], %f5
+       andcc   %o1, 0x7, %g0
+       ld      [%o0 + 0x18], %f6
+       bne,pn  %icc, .Lhwunaligned
+        ld     [%o0 + 0x1c], %f7
+
+.Lhwloop:
+       ldd     [%o1 + 0x00], %f8
+       ldd     [%o1 + 0x08], %f10
+       ldd     [%o1 + 0x10], %f12
+       ldd     [%o1 + 0x18], %f14
+       ldd     [%o1 + 0x20], %f16
+       ldd     [%o1 + 0x28], %f18
+       ldd     [%o1 + 0x30], %f20
+       subcc   %o2, 1, %o2             ! done yet?
+       ldd     [%o1 + 0x38], %f22
+       add     %o1, 0x40, %o1
+       prefetch [%o1 + 63], 20
+
+       .word   0x81b02840              ! SHA256
+
+       bne,pt  SIZE_T_CC, .Lhwloop
+       nop
+
+.Lhwfinish:
+       st      %f0, [%o0 + 0x00]       ! store context
+       st      %f1, [%o0 + 0x04]
+       st      %f2, [%o0 + 0x08]
+       st      %f3, [%o0 + 0x0c]
+       st      %f4, [%o0 + 0x10]
+       st      %f5, [%o0 + 0x14]
+       st      %f6, [%o0 + 0x18]
+       retl
+        st     %f7, [%o0 + 0x1c]
+
+.align 8
+.Lhwunaligned:
+       alignaddr %o1, %g0, %o1
+
+       ldd     [%o1 + 0x00], %f10
+.Lhwunaligned_loop:
+       ldd     [%o1 + 0x08], %f12
+       ldd     [%o1 + 0x10], %f14
+       ldd     [%o1 + 0x18], %f16
+       ldd     [%o1 + 0x20], %f18
+       ldd     [%o1 + 0x28], %f20
+       ldd     [%o1 + 0x30], %f22
+       ldd     [%o1 + 0x38], %f24
+       subcc   %o2, 1, %o2             ! done yet?
+       ldd     [%o1 + 0x40], %f26
+       add     %o1, 0x40, %o1
+       prefetch [%o1 + 63], 20
+
+       faligndata %f10, %f12, %f8
+       faligndata %f12, %f14, %f10
+       faligndata %f14, %f16, %f12
+       faligndata %f16, %f18, %f14
+       faligndata %f18, %f20, %f16
+       faligndata %f20, %f22, %f18
+       faligndata %f22, %f24, %f20
+       faligndata %f24, %f26, %f22
+
+       .word   0x81b02840              ! SHA256
+
+       bne,pt  SIZE_T_CC, .Lhwunaligned_loop
+       for     %f26, %f26, %f10        ! %f10=%f26
+
+       ba      .Lhwfinish
+       nop
+___
+$code.=<<___;
+.align 16
+.Lsoftware:
+       save    %sp,-STACK_FRAME-$locals,%sp
        and     $inp,`$align-1`,$tmp31
        sllx    $len,`log(16*$SZ)/log(2)`,$len
        andn    $inp,`$align-1`,$inp
@@ -578,7 +778,7 @@ ___
 $code.=<<___;
        add     $inp,`16*$SZ`,$inp              ! advance inp
        cmp     $inp,$len
-       bne     `$bits==64?"%xcc":"%icc"`,.Lloop
+       bne     SIZE_T_CC,.Lloop
        sub     $Ktbl,`($rounds-16)*$SZ`,$Ktbl  ! rewind Ktbl
 
        ret
@@ -589,6 +789,62 @@ $code.=<<___;
 .align 4
 ___
 
-$code =~ s/\`([^\`]*)\`/eval $1/gem;
-print $code;
+# Purpose of these subroutines is to explicitly encode VIS instructions,
+# so that one can compile the module without having to specify VIS
+# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
+# Idea is to reserve for option to produce "universal" binary and let
+# programmer detect if current CPU is VIS capable at run-time.
+sub unvis {
+my ($mnemonic,$rs1,$rs2,$rd)=@_;
+my $ref,$opf;
+my %visopf = ( "faligndata"    => 0x048,
+               "for"           => 0x07c        );
+
+    $ref = "$mnemonic\t$rs1,$rs2,$rd";
+
+    if ($opf=$visopf{$mnemonic}) {
+       foreach ($rs1,$rs2,$rd) {
+           return $ref if (!/%f([0-9]{1,2})/);
+           $_=$1;
+           if ($1>=32) {
+               return $ref if ($1&1);
+               # re-encode for upper double register addressing
+               $_=($1|$1>>5)&31;
+           }
+       }
+
+       return  sprintf ".word\t0x%08x !%s",
+                       0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2,
+                       $ref;
+    } else {
+       return $ref;
+    }
+}
+sub unalignaddr {
+my ($mnemonic,$rs1,$rs2,$rd)=@_;
+my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 );
+my $ref="$mnemonic\t$rs1,$rs2,$rd";
+
+    foreach ($rs1,$rs2,$rd) {
+       if (/%([goli])([0-7])/) { $_=$bias{$1}+$2; }
+       else                    { return $ref; }
+    }
+    return  sprintf ".word\t0x%08x !%s",
+                   0x81b00300|$rd<<25|$rs1<<14|$rs2,
+                   $ref;
+}
+
+foreach (split("\n",$code)) {
+       s/\`([^\`]*)\`/eval $1/ge;
+
+       s/\b(f[^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/
+               &unvis($1,$2,$3,$4)
+        /ge;
+       s/\b(alignaddr)\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/
+               &unalignaddr($1,$2,$3,$4)
+        /ge;
+
+       print $_,"\n";
+}
+
 close STDOUT;
index 8d51678..b7b44b4 100755 (executable)
@@ -1,7 +1,7 @@
 #!/usr/bin/env perl
 #
 # ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project. Rights for redistribution and usage in source and binary
 # forms are granted according to the OpenSSL license.
 # ====================================================================
 # contrary, 64-bit version, sha512_block, is ~30% *slower* than 32-bit
 # sha256_block:-( This is presumably because 64-bit shifts/rotates
 # apparently are not atomic instructions, but implemented in microcode.
+#
+# May 2012.
+#
+# Optimization including one of Pavel Semjanov's ideas, alternative
+# Maj, resulted in >=5% improvement on most CPUs, +20% SHA256 and
+# unfortunately -2% SHA512 on P4 [which nobody should care about
+# that much].
+#
+# June 2012.
+#
+# Add SIMD code paths, see below for improvement coefficients. SSSE3
+# code path was not attempted for SHA512, because improvement is not
+# estimated to be high enough, noticeably less than 9%, to justify
+# the effort, not on pre-AVX processors. [Obviously with exclusion
+# for VIA Nano, but it has SHA512 instruction that is faster and
+# should be used instead.] For reference, corresponding estimated
+# upper limit for improvement for SSSE3 SHA256 is 28%. The fact that
+# higher coefficients are observed on VIA Nano and Bulldozer has more
+# to do with specifics of their architecture [which is topic for
+# separate discussion].
+#
+# November 2012.
+#
+# Add AVX2 code path. Two consecutive input blocks are loaded to
+# 256-bit %ymm registers, with data from first block to least
+# significant 128-bit halves and data from second to most significant.
+# The data is then processed with same SIMD instruction sequence as
+# for AVX, but with %ymm as operands. Side effect is increased stack
+# frame, 448 additional bytes in SHA256 and 1152 in SHA512, and 1.2KB
+# code size increase.
+#
+# March 2014.
+#
+# Add support for Intel SHA Extensions.
+
+######################################################################
+# Current performance in cycles per processed byte (less is better):
+#
+#              SHA256  SSSE3       AVX/XOP(*)      SHA512  AVX/XOP(*)
+#
+# AMD K8       14.9    -           -               9.57    -
+# P4           17.3    -           -               30.8    -
+# Core 2       15.6    13.8(+13%)  -               9.97    -
+# Westmere     14.8    12.3(+19%)  -               9.58    -
+# Sandy Bridge 17.4    14.2(+23%)  11.6(+50%(**))  11.2    8.10(+38%(**))
+# Ivy Bridge   12.6    10.5(+20%)  10.3(+22%)      8.17    7.22(+13%)
+# Haswell      12.2    9.28(+31%)  7.80(+56%)      7.66    5.40(+42%)
+# Bulldozer    21.1    13.6(+54%)  13.6(+54%(***)) 13.5    8.58(+57%)
+# VIA Nano     23.0    16.5(+39%)  -               14.7    -
+# Atom         23.0    18.9(+22%)  -               14.7    -
+# Silvermont   27.4    20.6(+33%)  -               17.5    -
+#
+# (*)  whichever best applicable;
+# (**) switch from ror to shrd stands for fair share of improvement;
+# (***)        execution time is fully determined by remaining integer-only
+#      part, body_00_15; reducing the amount of SIMD instructions
+#      below certain limit makes no difference/sense; to conserve
+#      space SHA256 XOP code path is therefore omitted;
 
 $flavour = shift;
 $output  = shift;
@@ -51,6 +109,28 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
 die "can't locate x86_64-xlate.pl";
 
+if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
+               =~ /GNU assembler version ([2-9]\.[0-9]+)/) {
+       $avx = ($1>=2.19) + ($1>=2.22);
+}
+
+if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
+          `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) {
+       $avx = ($1>=2.09) + ($1>=2.10);
+}
+
+if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
+          `ml64 2>&1` =~ /Version ([0-9]+)\./) {
+       $avx = ($1>=10) + ($1>=11);
+}
+
+if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/) {
+       $avx = ($2>=3.0) + ($2>3.0);
+}
+
+$shaext=1;     ### set to zero if compiling for 1.0.1
+$avx=1         if (!$shaext && $avx);
+
 open OUT,"| \"$^X\" $xlate $flavour $output";
 *STDOUT=*OUT;
 
@@ -60,7 +140,7 @@ if ($output =~ /512/) {
        $SZ=8;
        @ROT=($A,$B,$C,$D,$E,$F,$G,$H)=("%rax","%rbx","%rcx","%rdx",
                                        "%r8", "%r9", "%r10","%r11");
-       ($T1,$a0,$a1,$a2)=("%r12","%r13","%r14","%r15");
+       ($T1,$a0,$a1,$a2,$a3)=("%r12","%r13","%r14","%r15","%rdi");
        @Sigma0=(28,34,39);
        @Sigma1=(14,18,41);
        @sigma0=(1,  8, 7);
@@ -72,7 +152,7 @@ if ($output =~ /512/) {
        $SZ=4;
        @ROT=($A,$B,$C,$D,$E,$F,$G,$H)=("%eax","%ebx","%ecx","%edx",
                                        "%r8d","%r9d","%r10d","%r11d");
-       ($T1,$a0,$a1,$a2)=("%r12d","%r13d","%r14d","%r15d");
+       ($T1,$a0,$a1,$a2,$a3)=("%r12d","%r13d","%r14d","%r15d","%edi");
        @Sigma0=( 2,13,22);
        @Sigma1=( 6,11,25);
        @sigma0=( 7,18, 3);
@@ -80,8 +160,7 @@ if ($output =~ /512/) {
        $rounds=64;
 }
 
-$ctx="%rdi";   # 1st arg
-$round="%rdi"; # zaps $ctx
+$ctx="%rdi";   # 1st arg, zapped by $a3
 $inp="%rsi";   # 2nd arg
 $Tbl="%rbp";
 
@@ -94,47 +173,51 @@ $framesz="16*$SZ+4*8";
 
 sub ROUND_00_15()
 { my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
+  my $STRIDE=$SZ;
+     $STRIDE += 16 if ($i%(16/$SZ)==(16/$SZ-1));
 
 $code.=<<___;
        ror     \$`$Sigma1[2]-$Sigma1[1]`,$a0
        mov     $f,$a2
-       mov     $T1,`$SZ*($i&0xf)`(%rsp)
 
-       ror     \$`$Sigma0[2]-$Sigma0[1]`,$a1
        xor     $e,$a0
+       ror     \$`$Sigma0[2]-$Sigma0[1]`,$a1
        xor     $g,$a2                  # f^g
 
-       ror     \$`$Sigma1[1]-$Sigma1[0]`,$a0
-       add     $h,$T1                  # T1+=h
+       mov     $T1,`$SZ*($i&0xf)`(%rsp)
        xor     $a,$a1
-
-       add     ($Tbl,$round,$SZ),$T1   # T1+=K[round]
        and     $e,$a2                  # (f^g)&e
-       mov     $b,$h
+
+       ror     \$`$Sigma1[1]-$Sigma1[0]`,$a0
+       add     $h,$T1                  # T1+=h
+       xor     $g,$a2                  # Ch(e,f,g)=((f^g)&e)^g
 
        ror     \$`$Sigma0[1]-$Sigma0[0]`,$a1
        xor     $e,$a0
-       xor     $g,$a2                  # Ch(e,f,g)=((f^g)&e)^g
+       add     $a2,$T1                 # T1+=Ch(e,f,g)
 
-       xor     $c,$h                   # b^c
+       mov     $a,$a2
+       add     ($Tbl),$T1              # T1+=K[round]
        xor     $a,$a1
-       add     $a2,$T1                 # T1+=Ch(e,f,g)
-       mov     $b,$a2
 
+       xor     $b,$a2                  # a^b, b^c in next round
        ror     \$$Sigma1[0],$a0        # Sigma1(e)
-       and     $a,$h                   # h=(b^c)&a
-       and     $c,$a2                  # b&c
+       mov     $b,$h
 
+       and     $a2,$a3
        ror     \$$Sigma0[0],$a1        # Sigma0(a)
        add     $a0,$T1                 # T1+=Sigma1(e)
-       add     $a2,$h                  # h+=b&c (completes +=Maj(a,b,c)
 
+       xor     $a3,$h                  # h=Maj(a,b,c)=Ch(a^b,c,b)
        add     $T1,$d                  # d+=T1
        add     $T1,$h                  # h+=T1
-       lea     1($round),$round        # round++
-       add     $a1,$h                  # h+=Sigma0(a)
 
+       lea     $STRIDE($Tbl),$Tbl      # round++
+___
+$code.=<<___ if ($i<15);
+       add     $a1,$h                  # h+=Sigma0(a)
 ___
+       ($a2,$a3) = ($a3,$a2);
 }
 
 sub ROUND_16_XX()
@@ -142,29 +225,28 @@ sub ROUND_16_XX()
 
 $code.=<<___;
        mov     `$SZ*(($i+1)&0xf)`(%rsp),$a0
-       mov     `$SZ*(($i+14)&0xf)`(%rsp),$a1
-       mov     $a0,$T1
-       mov     $a1,$a2
-
-       ror     \$`$sigma0[1]-$sigma0[0]`,$T1
-       xor     $a0,$T1
-       shr     \$$sigma0[2],$a0
-
-       ror     \$$sigma0[0],$T1
-       xor     $T1,$a0                 # sigma0(X[(i+1)&0xf])
-       mov     `$SZ*(($i+9)&0xf)`(%rsp),$T1
+       mov     `$SZ*(($i+14)&0xf)`(%rsp),$a2
 
+       mov     $a0,$T1
+       ror     \$`$sigma0[1]-$sigma0[0]`,$a0
+       add     $a1,$a                  # modulo-scheduled h+=Sigma0(a)
+       mov     $a2,$a1
        ror     \$`$sigma1[1]-$sigma1[0]`,$a2
+
+       xor     $T1,$a0
+       shr     \$$sigma0[2],$T1
+       ror     \$$sigma0[0],$a0
        xor     $a1,$a2
        shr     \$$sigma1[2],$a1
 
        ror     \$$sigma1[0],$a2
-       add     $a0,$T1
-       xor     $a2,$a1                 # sigma1(X[(i+14)&0xf])
+       xor     $a0,$T1                 # sigma0(X[(i+1)&0xf])
+       xor     $a1,$a2                 # sigma1(X[(i+14)&0xf])
+       add     `$SZ*(($i+9)&0xf)`(%rsp),$T1
 
        add     `$SZ*($i&0xf)`(%rsp),$T1
        mov     $e,$a0
-       add     $a1,$T1
+       add     $a2,$T1
        mov     $a,$a1
 ___
        &ROUND_00_15(@_);
@@ -173,10 +255,43 @@ ___
 $code=<<___;
 .text
 
+.extern        OPENSSL_ia32cap_P
 .globl $func
-.type  $func,\@function,4
+.type  $func,\@function,3
 .align 16
 $func:
+___
+$code.=<<___ if ($SZ==4 || $avx);
+       lea     OPENSSL_ia32cap_P(%rip),%r11
+       mov     0(%r11),%r9d
+       mov     4(%r11),%r10d
+       mov     8(%r11),%r11d
+___
+$code.=<<___ if ($SZ==4 && $shaext);
+       test    \$`1<<29`,%r11d         # check for SHA
+       jnz     _shaext_shortcut
+___
+$code.=<<___ if ($avx && $SZ==8);
+       test    \$`1<<11`,%r10d         # check for XOP
+       jnz     .Lxop_shortcut
+___
+$code.=<<___ if ($avx>1);
+       and     \$`1<<8|1<<5|1<<3`,%r11d        # check for BMI2+AVX2+BMI1
+       cmp     \$`1<<8|1<<5|1<<3`,%r11d
+       je      .Lavx2_shortcut
+___
+$code.=<<___ if ($avx);
+       and     \$`1<<30`,%r9d          # mask "Intel CPU" bit
+       and     \$`1<<28|1<<9`,%r10d    # mask AVX and SSSE3 bits
+       or      %r9d,%r10d
+       cmp     \$`1<<28|1<<9|1<<30`,%r10d
+       je      .Lavx_shortcut
+___
+$code.=<<___ if ($SZ==4);
+       test    \$`1<<9`,%r10d
+       jnz     .Lssse3_shortcut
+___
+$code.=<<___;
        push    %rbx
        push    %rbp
        push    %r12
@@ -194,8 +309,6 @@ $func:
        mov     %r11,$_rsp              # save copy of %rsp
 .Lprologue:
 
-       lea     $TABLE(%rip),$Tbl
-
        mov     $SZ*0($ctx),$A
        mov     $SZ*1($ctx),$B
        mov     $SZ*2($ctx),$C
@@ -208,7 +321,9 @@ $func:
 
 .align 16
 .Lloop:
-       xor     $round,$round
+       mov     $B,$a3
+       lea     $TABLE(%rip),$Tbl
+       xor     $C,$a3                  # magic
 ___
        for($i=0;$i<16;$i++) {
                $code.="        mov     $SZ*$i($inp),$T1\n";
@@ -229,10 +344,11 @@ ___
        }
 
 $code.=<<___;
-       cmp     \$$rounds,$round
-       j     .Lrounds_16_xx
+       cmpb    \$0,`$SZ-1`($Tbl)
+       jnz     .Lrounds_16_xx
 
        mov     $_ctx,$ctx
+       add     $a1,$A                  # modulo-scheduled h+=Sigma0(a)
        lea     16*$SZ($inp),$inp
 
        add     $SZ*0($ctx),$A
@@ -275,21 +391,45 @@ $code.=<<___;
 .type  $TABLE,\@object
 $TABLE:
        .long   0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+       .long   0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+       .long   0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
        .long   0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
        .long   0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+       .long   0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+       .long   0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
        .long   0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
        .long   0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+       .long   0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+       .long   0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
        .long   0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
        .long   0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+       .long   0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+       .long   0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
        .long   0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
        .long   0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+       .long   0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+       .long   0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
        .long   0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
        .long   0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+       .long   0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+       .long   0xd192e819,0xd6990624,0xf40e3585,0x106aa070
        .long   0xd192e819,0xd6990624,0xf40e3585,0x106aa070
        .long   0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+       .long   0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+       .long   0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
        .long   0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
        .long   0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+       .long   0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+       .long   0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
        .long   0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+
+       .long   0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+       .long   0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+       .long   0x03020100,0x0b0a0908,0xffffffff,0xffffffff
+       .long   0x03020100,0x0b0a0908,0xffffffff,0xffffffff
+       .long   0xffffffff,0xffffffff,0x03020100,0x0b0a0908
+       .long   0xffffffff,0xffffffff,0x03020100,0x0b0a0908
+       .asciz  "SHA256 block transform for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
 ___
 } else {
 $code.=<<___;
@@ -297,123 +437,1815 @@ $code.=<<___;
 .type  $TABLE,\@object
 $TABLE:
        .quad   0x428a2f98d728ae22,0x7137449123ef65cd
+       .quad   0x428a2f98d728ae22,0x7137449123ef65cd
        .quad   0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+       .quad   0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+       .quad   0x3956c25bf348b538,0x59f111f1b605d019
        .quad   0x3956c25bf348b538,0x59f111f1b605d019
        .quad   0x923f82a4af194f9b,0xab1c5ed5da6d8118
+       .quad   0x923f82a4af194f9b,0xab1c5ed5da6d8118
+       .quad   0xd807aa98a3030242,0x12835b0145706fbe
        .quad   0xd807aa98a3030242,0x12835b0145706fbe
        .quad   0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+       .quad   0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+       .quad   0x72be5d74f27b896f,0x80deb1fe3b1696b1
        .quad   0x72be5d74f27b896f,0x80deb1fe3b1696b1
        .quad   0x9bdc06a725c71235,0xc19bf174cf692694
+       .quad   0x9bdc06a725c71235,0xc19bf174cf692694
+       .quad   0xe49b69c19ef14ad2,0xefbe4786384f25e3
        .quad   0xe49b69c19ef14ad2,0xefbe4786384f25e3
        .quad   0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+       .quad   0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+       .quad   0x2de92c6f592b0275,0x4a7484aa6ea6e483
        .quad   0x2de92c6f592b0275,0x4a7484aa6ea6e483
        .quad   0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+       .quad   0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+       .quad   0x983e5152ee66dfab,0xa831c66d2db43210
        .quad   0x983e5152ee66dfab,0xa831c66d2db43210
        .quad   0xb00327c898fb213f,0xbf597fc7beef0ee4
+       .quad   0xb00327c898fb213f,0xbf597fc7beef0ee4
+       .quad   0xc6e00bf33da88fc2,0xd5a79147930aa725
        .quad   0xc6e00bf33da88fc2,0xd5a79147930aa725
        .quad   0x06ca6351e003826f,0x142929670a0e6e70
+       .quad   0x06ca6351e003826f,0x142929670a0e6e70
+       .quad   0x27b70a8546d22ffc,0x2e1b21385c26c926
        .quad   0x27b70a8546d22ffc,0x2e1b21385c26c926
        .quad   0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+       .quad   0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+       .quad   0x650a73548baf63de,0x766a0abb3c77b2a8
        .quad   0x650a73548baf63de,0x766a0abb3c77b2a8
        .quad   0x81c2c92e47edaee6,0x92722c851482353b
+       .quad   0x81c2c92e47edaee6,0x92722c851482353b
+       .quad   0xa2bfe8a14cf10364,0xa81a664bbc423001
        .quad   0xa2bfe8a14cf10364,0xa81a664bbc423001
        .quad   0xc24b8b70d0f89791,0xc76c51a30654be30
+       .quad   0xc24b8b70d0f89791,0xc76c51a30654be30
+       .quad   0xd192e819d6ef5218,0xd69906245565a910
        .quad   0xd192e819d6ef5218,0xd69906245565a910
        .quad   0xf40e35855771202a,0x106aa07032bbd1b8
+       .quad   0xf40e35855771202a,0x106aa07032bbd1b8
+       .quad   0x19a4c116b8d2d0c8,0x1e376c085141ab53
        .quad   0x19a4c116b8d2d0c8,0x1e376c085141ab53
        .quad   0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+       .quad   0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+       .quad   0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
        .quad   0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
        .quad   0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+       .quad   0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+       .quad   0x748f82ee5defb2fc,0x78a5636f43172f60
        .quad   0x748f82ee5defb2fc,0x78a5636f43172f60
        .quad   0x84c87814a1f0ab72,0x8cc702081a6439ec
+       .quad   0x84c87814a1f0ab72,0x8cc702081a6439ec
+       .quad   0x90befffa23631e28,0xa4506cebde82bde9
        .quad   0x90befffa23631e28,0xa4506cebde82bde9
        .quad   0xbef9a3f7b2c67915,0xc67178f2e372532b
+       .quad   0xbef9a3f7b2c67915,0xc67178f2e372532b
+       .quad   0xca273eceea26619c,0xd186b8c721c0c207
        .quad   0xca273eceea26619c,0xd186b8c721c0c207
        .quad   0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+       .quad   0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+       .quad   0x06f067aa72176fba,0x0a637dc5a2c898a6
        .quad   0x06f067aa72176fba,0x0a637dc5a2c898a6
        .quad   0x113f9804bef90dae,0x1b710b35131c471b
+       .quad   0x113f9804bef90dae,0x1b710b35131c471b
+       .quad   0x28db77f523047d84,0x32caab7b40c72493
        .quad   0x28db77f523047d84,0x32caab7b40c72493
        .quad   0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+       .quad   0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+       .quad   0x4cc5d4becb3e42b6,0x597f299cfc657e2a
        .quad   0x4cc5d4becb3e42b6,0x597f299cfc657e2a
        .quad   0x5fcb6fab3ad6faec,0x6c44198c4a475817
+       .quad   0x5fcb6fab3ad6faec,0x6c44198c4a475817
+
+       .quad   0x0001020304050607,0x08090a0b0c0d0e0f
+       .quad   0x0001020304050607,0x08090a0b0c0d0e0f
+       .asciz  "SHA512 block transform for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
 ___
 }
 
-# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
-#              CONTEXT *context,DISPATCHER_CONTEXT *disp)
-if ($win64) {
-$rec="%rcx";
-$frame="%rdx";
-$context="%r8";
-$disp="%r9";
+######################################################################
+# SIMD code paths
+#
+if ($SZ==4 && $shaext) {{{
+######################################################################
+# Intel SHA Extensions implementation of SHA256 update function.
+#
+my ($ctx,$inp,$num,$Tbl)=("%rdi","%rsi","%rdx","%rcx");
+
+my ($Wi,$ABEF,$CDGH,$TMP,$BSWAP,$ABEF_SAVE,$CDGH_SAVE)=map("%xmm$_",(0..2,7..10));
+my @MSG=map("%xmm$_",(3..6));
 
 $code.=<<___;
-.extern        __imp_RtlVirtualUnwind
-.type  se_handler,\@abi-omnipotent
+.type  sha256_block_data_order_shaext,\@function,3
+.align 64
+sha256_block_data_order_shaext:
+_shaext_shortcut:
+___
+$code.=<<___ if ($win64);
+       lea     `-8-5*16`(%rsp),%rsp
+       movaps  %xmm6,-8-5*16(%rax)
+       movaps  %xmm7,-8-4*16(%rax)
+       movaps  %xmm8,-8-3*16(%rax)
+       movaps  %xmm9,-8-2*16(%rax)
+       movaps  %xmm10,-8-1*16(%rax)
+.Lprologue_shaext:
+___
+$code.=<<___;
+       lea             K256+0x80(%rip),$Tbl
+       movdqu          ($ctx),$ABEF            # DCBA
+       movdqu          16($ctx),$CDGH          # HGFE
+       movdqa          0x200-0x80($Tbl),$TMP   # byte swap mask
+
+       pshufd          \$0x1b,$ABEF,$Wi        # ABCD
+       pshufd          \$0xb1,$ABEF,$ABEF      # CDAB
+       pshufd          \$0x1b,$CDGH,$CDGH      # EFGH
+       movdqa          $TMP,$BSWAP             # offload
+       palignr         \$8,$CDGH,$ABEF         # ABEF
+       punpcklqdq      $Wi,$CDGH               # CDGH
+       jmp             .Loop_shaext
+
 .align 16
-se_handler:
-       push    %rsi
-       push    %rdi
+.Loop_shaext:
+       movdqu          ($inp),@MSG[0]
+       movdqu          0x10($inp),@MSG[1]
+       movdqu          0x20($inp),@MSG[2]
+       pshufb          $TMP,@MSG[0]
+       movdqu          0x30($inp),@MSG[3]
+
+       movdqa          0*32-0x80($Tbl),$Wi
+       paddd           @MSG[0],$Wi
+       pshufb          $TMP,@MSG[1]
+       movdqa          $CDGH,$CDGH_SAVE        # offload
+       sha256rnds2     $ABEF,$CDGH             # 0-3
+       pshufd          \$0x0e,$Wi,$Wi
+       nop
+       movdqa          $ABEF,$ABEF_SAVE        # offload
+       sha256rnds2     $CDGH,$ABEF
+
+       movdqa          1*32-0x80($Tbl),$Wi
+       paddd           @MSG[1],$Wi
+       pshufb          $TMP,@MSG[2]
+       sha256rnds2     $ABEF,$CDGH             # 4-7
+       pshufd          \$0x0e,$Wi,$Wi
+       lea             0x40($inp),$inp
+       sha256msg1      @MSG[1],@MSG[0]
+       sha256rnds2     $CDGH,$ABEF
+
+       movdqa          2*32-0x80($Tbl),$Wi
+       paddd           @MSG[2],$Wi
+       pshufb          $TMP,@MSG[3]
+       sha256rnds2     $ABEF,$CDGH             # 8-11
+       pshufd          \$0x0e,$Wi,$Wi
+       movdqa          @MSG[3],$TMP
+       palignr         \$4,@MSG[2],$TMP
+       nop
+       paddd           $TMP,@MSG[0]
+       sha256msg1      @MSG[2],@MSG[1]
+       sha256rnds2     $CDGH,$ABEF
+
+       movdqa          3*32-0x80($Tbl),$Wi
+       paddd           @MSG[3],$Wi
+       sha256msg2      @MSG[3],@MSG[0]
+       sha256rnds2     $ABEF,$CDGH             # 12-15
+       pshufd          \$0x0e,$Wi,$Wi
+       movdqa          @MSG[0],$TMP
+       palignr         \$4,@MSG[3],$TMP
+       nop
+       paddd           $TMP,@MSG[1]
+       sha256msg1      @MSG[3],@MSG[2]
+       sha256rnds2     $CDGH,$ABEF
+___
+for($i=4;$i<16-3;$i++) {
+$code.=<<___;
+       movdqa          $i*32-0x80($Tbl),$Wi
+       paddd           @MSG[0],$Wi
+       sha256msg2      @MSG[0],@MSG[1]
+       sha256rnds2     $ABEF,$CDGH             # 16-19...
+       pshufd          \$0x0e,$Wi,$Wi
+       movdqa          @MSG[1],$TMP
+       palignr         \$4,@MSG[0],$TMP
+       nop
+       paddd           $TMP,@MSG[2]
+       sha256msg1      @MSG[0],@MSG[3]
+       sha256rnds2     $CDGH,$ABEF
+___
+       push(@MSG,shift(@MSG));
+}
+$code.=<<___;
+       movdqa          13*32-0x80($Tbl),$Wi
+       paddd           @MSG[0],$Wi
+       sha256msg2      @MSG[0],@MSG[1]
+       sha256rnds2     $ABEF,$CDGH             # 52-55
+       pshufd          \$0x0e,$Wi,$Wi
+       movdqa          @MSG[1],$TMP
+       palignr         \$4,@MSG[0],$TMP
+       sha256rnds2     $CDGH,$ABEF
+       paddd           $TMP,@MSG[2]
+
+       movdqa          14*32-0x80($Tbl),$Wi
+       paddd           @MSG[1],$Wi
+       sha256rnds2     $ABEF,$CDGH             # 56-59
+       pshufd          \$0x0e,$Wi,$Wi
+       sha256msg2      @MSG[1],@MSG[2]
+       movdqa          $BSWAP,$TMP
+       sha256rnds2     $CDGH,$ABEF
+
+       movdqa          15*32-0x80($Tbl),$Wi
+       paddd           @MSG[2],$Wi
+       nop
+       sha256rnds2     $ABEF,$CDGH             # 60-63
+       pshufd          \$0x0e,$Wi,$Wi
+       dec             $num
+       nop
+       sha256rnds2     $CDGH,$ABEF
+
+       paddd           $CDGH_SAVE,$CDGH
+       paddd           $ABEF_SAVE,$ABEF
+       jnz             .Loop_shaext
+
+       pshufd          \$0xb1,$CDGH,$CDGH      # DCHG
+       pshufd          \$0x1b,$ABEF,$TMP       # FEBA
+       pshufd          \$0xb1,$ABEF,$ABEF      # BAFE
+       punpckhqdq      $CDGH,$ABEF             # DCBA
+       palignr         \$8,$TMP,$CDGH          # HGFE
+
+       movdqu  $ABEF,($ctx)
+       movdqu  $CDGH,16($ctx)
+___
+$code.=<<___ if ($win64);
+       movaps  -8-5*16(%rax),%xmm6
+       movaps  -8-4*16(%rax),%xmm7
+       movaps  -8-3*16(%rax),%xmm8
+       movaps  -8-2*16(%rax),%xmm9
+       movaps  -8-1*16(%rax),%xmm10
+       mov     %rax,%rsp
+.Lepilogue_shaext:
+___
+$code.=<<___;
+       ret
+.size  sha256_block_data_order_shaext,.-sha256_block_data_order_shaext
+___
+}}}
+{{{
+
+my $a4=$T1;
+my ($a,$b,$c,$d,$e,$f,$g,$h);
+
+sub AUTOLOAD()         # thunk [simplified] 32-bit style perlasm
+{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://;
+  my $arg = pop;
+    $arg = "\$$arg" if ($arg*1 eq $arg);
+    $code .= "\t$opcode\t".join(',',$arg,reverse @_)."\n";
+}
+
+sub body_00_15 () {
+       (
+       '($a,$b,$c,$d,$e,$f,$g,$h)=@ROT;'.
+
+       '&ror   ($a0,$Sigma1[2]-$Sigma1[1])',
+       '&mov   ($a,$a1)',
+       '&mov   ($a4,$f)',
+
+       '&ror   ($a1,$Sigma0[2]-$Sigma0[1])',
+       '&xor   ($a0,$e)',
+       '&xor   ($a4,$g)',                      # f^g
+
+       '&ror   ($a0,$Sigma1[1]-$Sigma1[0])',
+       '&xor   ($a1,$a)',
+       '&and   ($a4,$e)',                      # (f^g)&e
+
+       '&xor   ($a0,$e)',
+       '&add   ($h,$SZ*($i&15)."(%rsp)")',     # h+=X[i]+K[i]
+       '&mov   ($a2,$a)',
+
+       '&xor   ($a4,$g)',                      # Ch(e,f,g)=((f^g)&e)^g
+       '&ror   ($a1,$Sigma0[1]-$Sigma0[0])',
+       '&xor   ($a2,$b)',                      # a^b, b^c in next round
+
+       '&add   ($h,$a4)',                      # h+=Ch(e,f,g)
+       '&ror   ($a0,$Sigma1[0])',              # Sigma1(e)
+       '&and   ($a3,$a2)',                     # (b^c)&(a^b)
+
+       '&xor   ($a1,$a)',
+       '&add   ($h,$a0)',                      # h+=Sigma1(e)
+       '&xor   ($a3,$b)',                      # Maj(a,b,c)=Ch(a^b,c,b)
+
+       '&ror   ($a1,$Sigma0[0])',              # Sigma0(a)
+       '&add   ($d,$h)',                       # d+=h
+       '&add   ($h,$a3)',                      # h+=Maj(a,b,c)
+
+       '&mov   ($a0,$d)',
+       '&add   ($a1,$h);'.                     # h+=Sigma0(a)
+       '($a2,$a3) = ($a3,$a2); unshift(@ROT,pop(@ROT)); $i++;'
+       );
+}
+
+######################################################################
+# SSSE3 code path
+#
+if ($SZ==4) {  # SHA256 only
+my @X = map("%xmm$_",(0..3));
+my ($t0,$t1,$t2,$t3, $t4,$t5) = map("%xmm$_",(4..9));
+
+$code.=<<___;
+.type  ${func}_ssse3,\@function,3
+.align 64
+${func}_ssse3:
+.Lssse3_shortcut:
        push    %rbx
        push    %rbp
        push    %r12
        push    %r13
        push    %r14
        push    %r15
-       pushfq
-       sub     \$64,%rsp
+       mov     %rsp,%r11               # copy %rsp
+       shl     \$4,%rdx                # num*16
+       sub     \$`$framesz+$win64*16*4`,%rsp
+       lea     ($inp,%rdx,$SZ),%rdx    # inp+num*16*$SZ
+       and     \$-64,%rsp              # align stack frame
+       mov     $ctx,$_ctx              # save ctx, 1st arg
+       mov     $inp,$_inp              # save inp, 2nd arh
+       mov     %rdx,$_end              # save end pointer, "3rd" arg
+       mov     %r11,$_rsp              # save copy of %rsp
+___
+$code.=<<___ if ($win64);
+       movaps  %xmm6,16*$SZ+32(%rsp)
+       movaps  %xmm7,16*$SZ+48(%rsp)
+       movaps  %xmm8,16*$SZ+64(%rsp)
+       movaps  %xmm9,16*$SZ+80(%rsp)
+___
+$code.=<<___;
+.Lprologue_ssse3:
 
-       mov     120($context),%rax      # pull context->Rax
-       mov     248($context),%rbx      # pull context->Rip
+       mov     $SZ*0($ctx),$A
+       mov     $SZ*1($ctx),$B
+       mov     $SZ*2($ctx),$C
+       mov     $SZ*3($ctx),$D
+       mov     $SZ*4($ctx),$E
+       mov     $SZ*5($ctx),$F
+       mov     $SZ*6($ctx),$G
+       mov     $SZ*7($ctx),$H
+___
 
-       lea     .Lprologue(%rip),%r10
-       cmp     %r10,%rbx               # context->Rip<.Lprologue
-       jb      .Lin_prologue
+$code.=<<___;
+       #movdqa $TABLE+`$SZ*2*$rounds`+32(%rip),$t4
+       #movdqa $TABLE+`$SZ*2*$rounds`+64(%rip),$t5
+       jmp     .Lloop_ssse3
+.align 16
+.Lloop_ssse3:
+       movdqa  $TABLE+`$SZ*2*$rounds`(%rip),$t3
+       movdqu  0x00($inp),@X[0]
+       movdqu  0x10($inp),@X[1]
+       movdqu  0x20($inp),@X[2]
+       pshufb  $t3,@X[0]
+       movdqu  0x30($inp),@X[3]
+       lea     $TABLE(%rip),$Tbl
+       pshufb  $t3,@X[1]
+       movdqa  0x00($Tbl),$t0
+       movdqa  0x20($Tbl),$t1
+       pshufb  $t3,@X[2]
+       paddd   @X[0],$t0
+       movdqa  0x40($Tbl),$t2
+       pshufb  $t3,@X[3]
+       movdqa  0x60($Tbl),$t3
+       paddd   @X[1],$t1
+       paddd   @X[2],$t2
+       paddd   @X[3],$t3
+       movdqa  $t0,0x00(%rsp)
+       mov     $A,$a1
+       movdqa  $t1,0x10(%rsp)
+       mov     $B,$a3
+       movdqa  $t2,0x20(%rsp)
+       xor     $C,$a3                  # magic
+       movdqa  $t3,0x30(%rsp)
+       mov     $E,$a0
+       jmp     .Lssse3_00_47
 
-       mov     152($context),%rax      # pull context->Rsp
+.align 16
+.Lssse3_00_47:
+       sub     \$`-16*2*$SZ`,$Tbl      # size optimization
+___
+sub Xupdate_256_SSSE3 () {
+       (
+       '&movdqa        ($t0,@X[1]);',
+       '&movdqa        ($t3,@X[3])',
+       '&palignr       ($t0,@X[0],$SZ)',       # X[1..4]
+        '&palignr      ($t3,@X[2],$SZ);',      # X[9..12]
+       '&movdqa        ($t1,$t0)',
+       '&movdqa        ($t2,$t0);',
+       '&psrld         ($t0,$sigma0[2])',
+        '&paddd        (@X[0],$t3);',          # X[0..3] += X[9..12]
+       '&psrld         ($t2,$sigma0[0])',
+        '&pshufd       ($t3,@X[3],0b11111010)',# X[14..15]
+       '&pslld         ($t1,8*$SZ-$sigma0[1]);'.
+       '&pxor          ($t0,$t2)',
+       '&psrld         ($t2,$sigma0[1]-$sigma0[0]);'.
+       '&pxor          ($t0,$t1)',
+       '&pslld         ($t1,$sigma0[1]-$sigma0[0]);'.
+       '&pxor          ($t0,$t2);',
+        '&movdqa       ($t2,$t3)',
+       '&pxor          ($t0,$t1);',            # sigma0(X[1..4])
+        '&psrld        ($t3,$sigma1[2])',
+       '&paddd         (@X[0],$t0);',          # X[0..3] += sigma0(X[1..4])
+        '&psrlq        ($t2,$sigma1[0])',
+        '&pxor         ($t3,$t2);',
+        '&psrlq        ($t2,$sigma1[1]-$sigma1[0])',
+        '&pxor         ($t3,$t2)',
+        '&pshufb       ($t3,$t4)',             # sigma1(X[14..15])
+       '&paddd         (@X[0],$t3)',           # X[0..1] += sigma1(X[14..15])
+        '&pshufd       ($t3,@X[0],0b01010000)',# X[16..17]
+        '&movdqa       ($t2,$t3);',
+        '&psrld        ($t3,$sigma1[2])',
+        '&psrlq        ($t2,$sigma1[0])',
+        '&pxor         ($t3,$t2);',
+        '&psrlq        ($t2,$sigma1[1]-$sigma1[0])',
+        '&pxor         ($t3,$t2);',
+       '&movdqa        ($t2,16*2*$j."($Tbl)")',
+        '&pshufb       ($t3,$t5)',
+       '&paddd         (@X[0],$t3)'            # X[2..3] += sigma1(X[16..17])
+       );
+}
 
-       lea     .Lepilogue(%rip),%r10
-       cmp     %r10,%rbx               # context->Rip>=.Lepilogue
-       jae     .Lin_prologue
+sub SSSE3_256_00_47 () {
+my $j = shift;
+my $body = shift;
+my @X = @_;
+my @insns = (&$body,&$body,&$body,&$body);     # 104 instructions
 
-       mov     16*$SZ+3*8(%rax),%rax   # pull $_rsp
-       lea     48(%rax),%rax
+    if (0) {
+       foreach (Xupdate_256_SSSE3()) {         # 36 instructions
+           eval;
+           eval(shift(@insns));
+           eval(shift(@insns));
+           eval(shift(@insns));
+       }
+    } else {                   # squeeze extra 4% on Westmere and 19% on Atom
+         eval(shift(@insns));  #@
+       &movdqa         ($t0,@X[1]);
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &movdqa         ($t3,@X[3]);
+         eval(shift(@insns));  #@
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));  #@
+         eval(shift(@insns));
+       &palignr        ($t0,@X[0],$SZ);        # X[1..4]
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &palignr       ($t3,@X[2],$SZ);        # X[9..12]
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));  #@
+       &movdqa         ($t1,$t0);
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &movdqa         ($t2,$t0);
+         eval(shift(@insns));  #@
+         eval(shift(@insns));
+       &psrld          ($t0,$sigma0[2]);
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &paddd         (@X[0],$t3);            # X[0..3] += X[9..12]
+         eval(shift(@insns));  #@
+         eval(shift(@insns));
+       &psrld          ($t2,$sigma0[0]);
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &pshufd        ($t3,@X[3],0b11111010); # X[4..15]
+         eval(shift(@insns));
+         eval(shift(@insns));  #@
+       &pslld          ($t1,8*$SZ-$sigma0[1]);
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &pxor           ($t0,$t2);
+         eval(shift(@insns));  #@
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));  #@
+       &psrld          ($t2,$sigma0[1]-$sigma0[0]);
+         eval(shift(@insns));
+       &pxor           ($t0,$t1);
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &pslld          ($t1,$sigma0[1]-$sigma0[0]);
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &pxor           ($t0,$t2);
+         eval(shift(@insns));
+         eval(shift(@insns));  #@
+        &movdqa        ($t2,$t3);
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &pxor           ($t0,$t1);              # sigma0(X[1..4])
+         eval(shift(@insns));  #@
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &psrld         ($t3,$sigma1[2]);
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &paddd          (@X[0],$t0);            # X[0..3] += sigma0(X[1..4])
+         eval(shift(@insns));  #@
+         eval(shift(@insns));
+        &psrlq         ($t2,$sigma1[0]);
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &pxor          ($t3,$t2);
+         eval(shift(@insns));  #@
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));  #@
+        &psrlq         ($t2,$sigma1[1]-$sigma1[0]);
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &pxor          ($t3,$t2);
+         eval(shift(@insns));  #@
+         eval(shift(@insns));
+         eval(shift(@insns));
+        #&pshufb       ($t3,$t4);              # sigma1(X[14..15])
+        &pshufd        ($t3,$t3,0b10000000);
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &psrldq        ($t3,8);
+         eval(shift(@insns));
+         eval(shift(@insns));  #@
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));  #@
+       &paddd          (@X[0],$t3);            # X[0..1] += sigma1(X[14..15])
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &pshufd        ($t3,@X[0],0b01010000); # X[16..17]
+         eval(shift(@insns));
+         eval(shift(@insns));  #@
+         eval(shift(@insns));
+        &movdqa        ($t2,$t3);
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &psrld         ($t3,$sigma1[2]);
+         eval(shift(@insns));
+         eval(shift(@insns));  #@
+        &psrlq         ($t2,$sigma1[0]);
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &pxor          ($t3,$t2);
+         eval(shift(@insns));  #@
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));  #@
+         eval(shift(@insns));
+        &psrlq         ($t2,$sigma1[1]-$sigma1[0]);
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &pxor          ($t3,$t2);
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));  #@
+        #&pshufb       ($t3,$t5);
+        &pshufd        ($t3,$t3,0b00001000);
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &movdqa         ($t2,16*2*$j."($Tbl)");
+         eval(shift(@insns));  #@
+         eval(shift(@insns));
+        &pslldq        ($t3,8);
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &paddd          (@X[0],$t3);            # X[2..3] += sigma1(X[16..17])
+         eval(shift(@insns));  #@
+         eval(shift(@insns));
+         eval(shift(@insns));
+    }
+       &paddd          ($t2,@X[0]);
+         foreach (@insns) { eval; }            # remaining instructions
+       &movdqa         (16*$j."(%rsp)",$t2);
+}
 
-       mov     -8(%rax),%rbx
-       mov     -16(%rax),%rbp
-       mov     -24(%rax),%r12
-       mov     -32(%rax),%r13
-       mov     -40(%rax),%r14
-       mov     -48(%rax),%r15
-       mov     %rbx,144($context)      # restore context->Rbx
-       mov     %rbp,160($context)      # restore context->Rbp
-       mov     %r12,216($context)      # restore context->R12
-       mov     %r13,224($context)      # restore context->R13
-       mov     %r14,232($context)      # restore context->R14
-       mov     %r15,240($context)      # restore context->R15
+    for ($i=0,$j=0; $j<4; $j++) {
+       &SSSE3_256_00_47($j,\&body_00_15,@X);
+       push(@X,shift(@X));                     # rotate(@X)
+    }
+       &cmpb   ($SZ-1+16*2*$SZ."($Tbl)",0);
+       &jne    (".Lssse3_00_47");
 
-.Lin_prologue:
-       mov     8(%rax),%rdi
-       mov     16(%rax),%rsi
-       mov     %rax,152($context)      # restore context->Rsp
-       mov     %rsi,168($context)      # restore context->Rsi
-       mov     %rdi,176($context)      # restore context->Rdi
+    for ($i=0; $i<16; ) {
+       foreach(body_00_15()) { eval; }
+    }
+$code.=<<___;
+       mov     $_ctx,$ctx
+       mov     $a1,$A
 
-       mov     40($disp),%rdi          # disp->ContextRecord
-       mov     $context,%rsi           # context
-       mov     \$154,%ecx              # sizeof(CONTEXT)
-       .long   0xa548f3fc              # cld; rep movsq
+       add     $SZ*0($ctx),$A
+       lea     16*$SZ($inp),$inp
+       add     $SZ*1($ctx),$B
+       add     $SZ*2($ctx),$C
+       add     $SZ*3($ctx),$D
+       add     $SZ*4($ctx),$E
+       add     $SZ*5($ctx),$F
+       add     $SZ*6($ctx),$G
+       add     $SZ*7($ctx),$H
 
-       mov     $disp,%rsi
-       xor     %rcx,%rcx               # arg1, UNW_FLAG_NHANDLER
-       mov     8(%rsi),%rdx            # arg2, disp->ImageBase
-       mov     0(%rsi),%r8             # arg3, disp->ControlPc
-       mov     16(%rsi),%r9            # arg4, disp->FunctionEntry
-       mov     40(%rsi),%r10           # disp->ContextRecord
-       lea     56(%rsi),%r11           # &disp->HandlerData
-       lea     24(%rsi),%r12           # &disp->EstablisherFrame
-       mov     %r10,32(%rsp)           # arg5
-       mov     %r11,40(%rsp)           # arg6
+       cmp     $_end,$inp
+
+       mov     $A,$SZ*0($ctx)
+       mov     $B,$SZ*1($ctx)
+       mov     $C,$SZ*2($ctx)
+       mov     $D,$SZ*3($ctx)
+       mov     $E,$SZ*4($ctx)
+       mov     $F,$SZ*5($ctx)
+       mov     $G,$SZ*6($ctx)
+       mov     $H,$SZ*7($ctx)
+       jb      .Lloop_ssse3
+
+       mov     $_rsp,%rsi
+___
+$code.=<<___ if ($win64);
+       movaps  16*$SZ+32(%rsp),%xmm6
+       movaps  16*$SZ+48(%rsp),%xmm7
+       movaps  16*$SZ+64(%rsp),%xmm8
+       movaps  16*$SZ+80(%rsp),%xmm9
+___
+$code.=<<___;
+       mov     (%rsi),%r15
+       mov     8(%rsi),%r14
+       mov     16(%rsi),%r13
+       mov     24(%rsi),%r12
+       mov     32(%rsi),%rbp
+       mov     40(%rsi),%rbx
+       lea     48(%rsi),%rsp
+.Lepilogue_ssse3:
+       ret
+.size  ${func}_ssse3,.-${func}_ssse3
+___
+}
+
+if ($avx) {{
+######################################################################
+# XOP code path
+#
+if ($SZ==8) {  # SHA512 only
+$code.=<<___;
+.type  ${func}_xop,\@function,3
+.align 64
+${func}_xop:
+.Lxop_shortcut:
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+       mov     %rsp,%r11               # copy %rsp
+       shl     \$4,%rdx                # num*16
+       sub     \$`$framesz+$win64*16*($SZ==4?4:6)`,%rsp
+       lea     ($inp,%rdx,$SZ),%rdx    # inp+num*16*$SZ
+       and     \$-64,%rsp              # align stack frame
+       mov     $ctx,$_ctx              # save ctx, 1st arg
+       mov     $inp,$_inp              # save inp, 2nd arh
+       mov     %rdx,$_end              # save end pointer, "3rd" arg
+       mov     %r11,$_rsp              # save copy of %rsp
+___
+$code.=<<___ if ($win64);
+       movaps  %xmm6,16*$SZ+32(%rsp)
+       movaps  %xmm7,16*$SZ+48(%rsp)
+       movaps  %xmm8,16*$SZ+64(%rsp)
+       movaps  %xmm9,16*$SZ+80(%rsp)
+___
+$code.=<<___ if ($win64 && $SZ>4);
+       movaps  %xmm10,16*$SZ+96(%rsp)
+       movaps  %xmm11,16*$SZ+112(%rsp)
+___
+$code.=<<___;
+.Lprologue_xop:
+
+       vzeroupper
+       mov     $SZ*0($ctx),$A
+       mov     $SZ*1($ctx),$B
+       mov     $SZ*2($ctx),$C
+       mov     $SZ*3($ctx),$D
+       mov     $SZ*4($ctx),$E
+       mov     $SZ*5($ctx),$F
+       mov     $SZ*6($ctx),$G
+       mov     $SZ*7($ctx),$H
+       jmp     .Lloop_xop
+___
+                                       if ($SZ==4) {   # SHA256
+    my @X = map("%xmm$_",(0..3));
+    my ($t0,$t1,$t2,$t3) = map("%xmm$_",(4..7));
+
+$code.=<<___;
+.align 16
+.Lloop_xop:
+       vmovdqa $TABLE+`$SZ*2*$rounds`(%rip),$t3
+       vmovdqu 0x00($inp),@X[0]
+       vmovdqu 0x10($inp),@X[1]
+       vmovdqu 0x20($inp),@X[2]
+       vmovdqu 0x30($inp),@X[3]
+       vpshufb $t3,@X[0],@X[0]
+       lea     $TABLE(%rip),$Tbl
+       vpshufb $t3,@X[1],@X[1]
+       vpshufb $t3,@X[2],@X[2]
+       vpaddd  0x00($Tbl),@X[0],$t0
+       vpshufb $t3,@X[3],@X[3]
+       vpaddd  0x20($Tbl),@X[1],$t1
+       vpaddd  0x40($Tbl),@X[2],$t2
+       vpaddd  0x60($Tbl),@X[3],$t3
+       vmovdqa $t0,0x00(%rsp)
+       mov     $A,$a1
+       vmovdqa $t1,0x10(%rsp)
+       mov     $B,$a3
+       vmovdqa $t2,0x20(%rsp)
+       xor     $C,$a3                  # magic
+       vmovdqa $t3,0x30(%rsp)
+       mov     $E,$a0
+       jmp     .Lxop_00_47
+
+.align 16
+.Lxop_00_47:
+       sub     \$`-16*2*$SZ`,$Tbl      # size optimization
+___
+sub XOP_256_00_47 () {
+my $j = shift;
+my $body = shift;
+my @X = @_;
+my @insns = (&$body,&$body,&$body,&$body);     # 104 instructions
+
+       &vpalignr       ($t0,@X[1],@X[0],$SZ);  # X[1..4]
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &vpalignr      ($t3,@X[3],@X[2],$SZ);  # X[9..12]
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &vprotd         ($t1,$t0,8*$SZ-$sigma0[1]);
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &vpsrld         ($t0,$t0,$sigma0[2]);
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &vpaddd        (@X[0],@X[0],$t3);      # X[0..3] += X[9..12]
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &vprotd         ($t2,$t1,$sigma0[1]-$sigma0[0]);
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &vpxor          ($t0,$t0,$t1);
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &vprotd        ($t3,@X[3],8*$SZ-$sigma1[1]);
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &vpxor          ($t0,$t0,$t2);          # sigma0(X[1..4])
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &vpsrld        ($t2,@X[3],$sigma1[2]);
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &vpaddd         (@X[0],@X[0],$t0);      # X[0..3] += sigma0(X[1..4])
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &vprotd        ($t1,$t3,$sigma1[1]-$sigma1[0]);
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &vpxor         ($t3,$t3,$t2);
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &vpxor         ($t3,$t3,$t1);          # sigma1(X[14..15])
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &vpsrldq        ($t3,$t3,8);
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &vpaddd         (@X[0],@X[0],$t3);      # X[0..1] += sigma1(X[14..15])
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &vprotd        ($t3,@X[0],8*$SZ-$sigma1[1]);
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &vpsrld        ($t2,@X[0],$sigma1[2]);
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &vprotd        ($t1,$t3,$sigma1[1]-$sigma1[0]);
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &vpxor         ($t3,$t3,$t2);
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &vpxor         ($t3,$t3,$t1);          # sigma1(X[16..17])
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &vpslldq        ($t3,$t3,8);            # 22 instructions
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &vpaddd         (@X[0],@X[0],$t3);      # X[2..3] += sigma1(X[16..17])
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &vpaddd         ($t2,@X[0],16*2*$j."($Tbl)");
+         foreach (@insns) { eval; }            # remaining instructions
+       &vmovdqa        (16*$j."(%rsp)",$t2);
+}
+
+    for ($i=0,$j=0; $j<4; $j++) {
+       &XOP_256_00_47($j,\&body_00_15,@X);
+       push(@X,shift(@X));                     # rotate(@X)
+    }
+       &cmpb   ($SZ-1+16*2*$SZ."($Tbl)",0);
+       &jne    (".Lxop_00_47");
+
+    for ($i=0; $i<16; ) {
+       foreach(body_00_15()) { eval; }
+    }
+
+                                       } else {        # SHA512
+    my @X = map("%xmm$_",(0..7));
+    my ($t0,$t1,$t2,$t3) = map("%xmm$_",(8..11));
+
+$code.=<<___;
+.align 16
+.Lloop_xop:
+       vmovdqa $TABLE+`$SZ*2*$rounds`(%rip),$t3
+       vmovdqu 0x00($inp),@X[0]
+       lea     $TABLE+0x80(%rip),$Tbl  # size optimization
+       vmovdqu 0x10($inp),@X[1]
+       vmovdqu 0x20($inp),@X[2]
+       vpshufb $t3,@X[0],@X[0]
+       vmovdqu 0x30($inp),@X[3]
+       vpshufb $t3,@X[1],@X[1]
+       vmovdqu 0x40($inp),@X[4]
+       vpshufb $t3,@X[2],@X[2]
+       vmovdqu 0x50($inp),@X[5]
+       vpshufb $t3,@X[3],@X[3]
+       vmovdqu 0x60($inp),@X[6]
+       vpshufb $t3,@X[4],@X[4]
+       vmovdqu 0x70($inp),@X[7]
+       vpshufb $t3,@X[5],@X[5]
+       vpaddq  -0x80($Tbl),@X[0],$t0
+       vpshufb $t3,@X[6],@X[6]
+       vpaddq  -0x60($Tbl),@X[1],$t1
+       vpshufb $t3,@X[7],@X[7]
+       vpaddq  -0x40($Tbl),@X[2],$t2
+       vpaddq  -0x20($Tbl),@X[3],$t3
+       vmovdqa $t0,0x00(%rsp)
+       vpaddq  0x00($Tbl),@X[4],$t0
+       vmovdqa $t1,0x10(%rsp)
+       vpaddq  0x20($Tbl),@X[5],$t1
+       vmovdqa $t2,0x20(%rsp)
+       vpaddq  0x40($Tbl),@X[6],$t2
+       vmovdqa $t3,0x30(%rsp)
+       vpaddq  0x60($Tbl),@X[7],$t3
+       vmovdqa $t0,0x40(%rsp)
+       mov     $A,$a1
+       vmovdqa $t1,0x50(%rsp)
+       mov     $B,$a3
+       vmovdqa $t2,0x60(%rsp)
+       xor     $C,$a3                  # magic
+       vmovdqa $t3,0x70(%rsp)
+       mov     $E,$a0
+       jmp     .Lxop_00_47
+
+.align 16
+.Lxop_00_47:
+       add     \$`16*2*$SZ`,$Tbl
+___
+sub XOP_512_00_47 () {
+my $j = shift;
+my $body = shift;
+my @X = @_;
+my @insns = (&$body,&$body);                   # 52 instructions
+
+       &vpalignr       ($t0,@X[1],@X[0],$SZ);  # X[1..2]
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &vpalignr      ($t3,@X[5],@X[4],$SZ);  # X[9..10]
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &vprotq         ($t1,$t0,8*$SZ-$sigma0[1]);
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &vpsrlq         ($t0,$t0,$sigma0[2]);
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &vpaddq        (@X[0],@X[0],$t3);      # X[0..1] += X[9..10]
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &vprotq         ($t2,$t1,$sigma0[1]-$sigma0[0]);
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &vpxor          ($t0,$t0,$t1);
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &vprotq        ($t3,@X[7],8*$SZ-$sigma1[1]);
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &vpxor          ($t0,$t0,$t2);          # sigma0(X[1..2])
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &vpsrlq        ($t2,@X[7],$sigma1[2]);
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &vpaddq         (@X[0],@X[0],$t0);      # X[0..1] += sigma0(X[1..2])
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &vprotq        ($t1,$t3,$sigma1[1]-$sigma1[0]);
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &vpxor         ($t3,$t3,$t2);
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+        &vpxor         ($t3,$t3,$t1);          # sigma1(X[14..15])
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &vpaddq         (@X[0],@X[0],$t3);      # X[0..1] += sigma1(X[14..15])
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+         eval(shift(@insns));
+       &vpaddq         ($t2,@X[0],16*2*$j-0x80."($Tbl)");
+         foreach (@insns) { eval; }            # remaining instructions
+       &vmovdqa        (16*$j."(%rsp)",$t2);
+}
+
+    for ($i=0,$j=0; $j<8; $j++) {
+       &XOP_512_00_47($j,\&body_00_15,@X);
+       push(@X,shift(@X));                     # rotate(@X)
+    }
+       &cmpb   ($SZ-1+16*2*$SZ-0x80."($Tbl)",0);
+       &jne    (".Lxop_00_47");
+
+    for ($i=0; $i<16; ) {
+       foreach(body_00_15()) { eval; }
+    }
+}
+$code.=<<___;
+       mov     $_ctx,$ctx
+       mov     $a1,$A
+
+       add     $SZ*0($ctx),$A
+       lea     16*$SZ($inp),$inp
+       add     $SZ*1($ctx),$B
+       add     $SZ*2($ctx),$C
+       add     $SZ*3($ctx),$D
+       add     $SZ*4($ctx),$E
+       add     $SZ*5($ctx),$F
+       add     $SZ*6($ctx),$G
+       add     $SZ*7($ctx),$H
+
+       cmp     $_end,$inp
+
+       mov     $A,$SZ*0($ctx)
+       mov     $B,$SZ*1($ctx)
+       mov     $C,$SZ*2($ctx)
+       mov     $D,$SZ*3($ctx)
+       mov     $E,$SZ*4($ctx)
+       mov     $F,$SZ*5($ctx)
+       mov     $G,$SZ*6($ctx)
+       mov     $H,$SZ*7($ctx)
+       jb      .Lloop_xop
+
+       mov     $_rsp,%rsi
+       vzeroupper
+___
+$code.=<<___ if ($win64);
+       movaps  16*$SZ+32(%rsp),%xmm6
+       movaps  16*$SZ+48(%rsp),%xmm7
+       movaps  16*$SZ+64(%rsp),%xmm8
+       movaps  16*$SZ+80(%rsp),%xmm9
+___
+$code.=<<___ if ($win64 && $SZ>4);
+       movaps  16*$SZ+96(%rsp),%xmm10
+       movaps  16*$SZ+112(%rsp),%xmm11
+___
+$code.=<<___;
+       mov     (%rsi),%r15
+       mov     8(%rsi),%r14
+       mov     16(%rsi),%r13
+       mov     24(%rsi),%r12
+       mov     32(%rsi),%rbp
+       mov     40(%rsi),%rbx
+       lea     48(%rsi),%rsp
+.Lepilogue_xop:
+       ret
+.size  ${func}_xop,.-${func}_xop
+___
+}
+######################################################################
+# AVX+shrd code path
+#
+local *ror = sub { &shrd(@_[0],@_) };
+
+$code.=<<___;
+.type  ${func}_avx,\@function,3
+.align 64
+${func}_avx:
+.Lavx_shortcut:
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+       mov     %rsp,%r11               # copy %rsp
+       shl     \$4,%rdx                # num*16
+       sub     \$`$framesz+$win64*16*($SZ==4?4:6)`,%rsp
+       lea     ($inp,%rdx,$SZ),%rdx    # inp+num*16*$SZ
+       and     \$-64,%rsp              # align stack frame
+       mov     $ctx,$_ctx              # save ctx, 1st arg
+       mov     $inp,$_inp              # save inp, 2nd arh
+       mov     %rdx,$_end              # save end pointer, "3rd" arg
+       mov     %r11,$_rsp              # save copy of %rsp
+___
+$code.=<<___ if ($win64);
+       movaps  %xmm6,16*$SZ+32(%rsp)
+       movaps  %xmm7,16*$SZ+48(%rsp)
+       movaps  %xmm8,16*$SZ+64(%rsp)
+       movaps  %xmm9,16*$SZ+80(%rsp)
+___
+$code.=<<___ if ($win64 && $SZ>4);
+       movaps  %xmm10,16*$SZ+96(%rsp)
+       movaps  %xmm11,16*$SZ+112(%rsp)
+___
+$code.=<<___;
+.Lprologue_avx:
+
+       vzeroupper
+       mov     $SZ*0($ctx),$A
+       mov     $SZ*1($ctx),$B
+       mov     $SZ*2($ctx),$C
+       mov     $SZ*3($ctx),$D
+       mov     $SZ*4($ctx),$E
+       mov     $SZ*5($ctx),$F
+       mov     $SZ*6($ctx),$G
+       mov     $SZ*7($ctx),$H
+___
+                                       if ($SZ==4) {   # SHA256
+    my @X = map("%xmm$_",(0..3));
+    my ($t0,$t1,$t2,$t3, $t4,$t5) = map("%xmm$_",(4..9));
+
+$code.=<<___;
+       vmovdqa $TABLE+`$SZ*2*$rounds`+32(%rip),$t4
+       vmovdqa $TABLE+`$SZ*2*$rounds`+64(%rip),$t5
+       jmp     .Lloop_avx
+.align 16
+.Lloop_avx:
+       vmovdqa $TABLE+`$SZ*2*$rounds`(%rip),$t3
+       vmovdqu 0x00($inp),@X[0]
+       vmovdqu 0x10($inp),@X[1]
+       vmovdqu 0x20($inp),@X[2]
+       vmovdqu 0x30($inp),@X[3]
+       vpshufb $t3,@X[0],@X[0]
+       lea     $TABLE(%rip),$Tbl
+       vpshufb $t3,@X[1],@X[1]
+       vpshufb $t3,@X[2],@X[2]
+       vpaddd  0x00($Tbl),@X[0],$t0
+       vpshufb $t3,@X[3],@X[3]
+       vpaddd  0x20($Tbl),@X[1],$t1
+       vpaddd  0x40($Tbl),@X[2],$t2
+       vpaddd  0x60($Tbl),@X[3],$t3
+       vmovdqa $t0,0x00(%rsp)
+       mov     $A,$a1
+       vmovdqa $t1,0x10(%rsp)
+       mov     $B,$a3
+       vmovdqa $t2,0x20(%rsp)
+       xor     $C,$a3                  # magic
+       vmovdqa $t3,0x30(%rsp)
+       mov     $E,$a0
+       jmp     .Lavx_00_47
+
+.align 16
+.Lavx_00_47:
+       sub     \$`-16*2*$SZ`,$Tbl      # size optimization
+___
+sub Xupdate_256_AVX () {
+       (
+       '&vpalignr      ($t0,@X[1],@X[0],$SZ)', # X[1..4]
+        '&vpalignr     ($t3,@X[3],@X[2],$SZ)', # X[9..12]
+       '&vpsrld        ($t2,$t0,$sigma0[0]);',
+        '&vpaddd       (@X[0],@X[0],$t3)',     # X[0..3] += X[9..12]
+       '&vpsrld        ($t3,$t0,$sigma0[2])',
+       '&vpslld        ($t1,$t0,8*$SZ-$sigma0[1]);',
+       '&vpxor         ($t0,$t3,$t2)',
+        '&vpshufd      ($t3,@X[3],0b11111010)',# X[14..15]
+       '&vpsrld        ($t2,$t2,$sigma0[1]-$sigma0[0]);',
+       '&vpxor         ($t0,$t0,$t1)',
+       '&vpslld        ($t1,$t1,$sigma0[1]-$sigma0[0]);',
+       '&vpxor         ($t0,$t0,$t2)',
+        '&vpsrld       ($t2,$t3,$sigma1[2]);',
+       '&vpxor         ($t0,$t0,$t1)',         # sigma0(X[1..4])
+        '&vpsrlq       ($t3,$t3,$sigma1[0]);',
+       '&vpaddd        (@X[0],@X[0],$t0)',     # X[0..3] += sigma0(X[1..4])
+        '&vpxor        ($t2,$t2,$t3);',
+        '&vpsrlq       ($t3,$t3,$sigma1[1]-$sigma1[0])',
+        '&vpxor        ($t2,$t2,$t3)',
+        '&vpshufb      ($t2,$t2,$t4)',         # sigma1(X[14..15])
+       '&vpaddd        (@X[0],@X[0],$t2)',     # X[0..1] += sigma1(X[14..15])
+        '&vpshufd      ($t3,@X[0],0b01010000)',# X[16..17]
+        '&vpsrld       ($t2,$t3,$sigma1[2])',
+        '&vpsrlq       ($t3,$t3,$sigma1[0])',
+        '&vpxor        ($t2,$t2,$t3);',
+        '&vpsrlq       ($t3,$t3,$sigma1[1]-$sigma1[0])',
+        '&vpxor        ($t2,$t2,$t3)',
+        '&vpshufb      ($t2,$t2,$t5)',
+       '&vpaddd        (@X[0],@X[0],$t2)'      # X[2..3] += sigma1(X[16..17])
+       );
+}
+
+sub AVX_256_00_47 () {
+my $j = shift;
+my $body = shift;
+my @X = @_;
+my @insns = (&$body,&$body,&$body,&$body);     # 104 instructions
+
+       foreach (Xupdate_256_AVX()) {           # 29 instructions
+           eval;
+           eval(shift(@insns));
+           eval(shift(@insns));
+           eval(shift(@insns));
+       }
+       &vpaddd         ($t2,@X[0],16*2*$j."($Tbl)");
+         foreach (@insns) { eval; }            # remaining instructions
+       &vmovdqa        (16*$j."(%rsp)",$t2);
+}
+
+    for ($i=0,$j=0; $j<4; $j++) {
+       &AVX_256_00_47($j,\&body_00_15,@X);
+       push(@X,shift(@X));                     # rotate(@X)
+    }
+       &cmpb   ($SZ-1+16*2*$SZ."($Tbl)",0);
+       &jne    (".Lavx_00_47");
+
+    for ($i=0; $i<16; ) {
+       foreach(body_00_15()) { eval; }
+    }
+
+                                       } else {        # SHA512
+    my @X = map("%xmm$_",(0..7));
+    my ($t0,$t1,$t2,$t3) = map("%xmm$_",(8..11));
+
+$code.=<<___;
+       jmp     .Lloop_avx
+.align 16
+.Lloop_avx:
+       vmovdqa $TABLE+`$SZ*2*$rounds`(%rip),$t3
+       vmovdqu 0x00($inp),@X[0]
+       lea     $TABLE+0x80(%rip),$Tbl  # size optimization
+       vmovdqu 0x10($inp),@X[1]
+       vmovdqu 0x20($inp),@X[2]
+       vpshufb $t3,@X[0],@X[0]
+       vmovdqu 0x30($inp),@X[3]
+       vpshufb $t3,@X[1],@X[1]
+       vmovdqu 0x40($inp),@X[4]
+       vpshufb $t3,@X[2],@X[2]
+       vmovdqu 0x50($inp),@X[5]
+       vpshufb $t3,@X[3],@X[3]
+       vmovdqu 0x60($inp),@X[6]
+       vpshufb $t3,@X[4],@X[4]
+       vmovdqu 0x70($inp),@X[7]
+       vpshufb $t3,@X[5],@X[5]
+       vpaddq  -0x80($Tbl),@X[0],$t0
+       vpshufb $t3,@X[6],@X[6]
+       vpaddq  -0x60($Tbl),@X[1],$t1
+       vpshufb $t3,@X[7],@X[7]
+       vpaddq  -0x40($Tbl),@X[2],$t2
+       vpaddq  -0x20($Tbl),@X[3],$t3
+       vmovdqa $t0,0x00(%rsp)
+       vpaddq  0x00($Tbl),@X[4],$t0
+       vmovdqa $t1,0x10(%rsp)
+       vpaddq  0x20($Tbl),@X[5],$t1
+       vmovdqa $t2,0x20(%rsp)
+       vpaddq  0x40($Tbl),@X[6],$t2
+       vmovdqa $t3,0x30(%rsp)
+       vpaddq  0x60($Tbl),@X[7],$t3
+       vmovdqa $t0,0x40(%rsp)
+       mov     $A,$a1
+       vmovdqa $t1,0x50(%rsp)
+       mov     $B,$a3
+       vmovdqa $t2,0x60(%rsp)
+       xor     $C,$a3                  # magic
+       vmovdqa $t3,0x70(%rsp)
+       mov     $E,$a0
+       jmp     .Lavx_00_47
+
+.align 16
+.Lavx_00_47:
+       add     \$`16*2*$SZ`,$Tbl
+___
+sub Xupdate_512_AVX () {
+       (
+       '&vpalignr      ($t0,@X[1],@X[0],$SZ)', # X[1..2]
+        '&vpalignr     ($t3,@X[5],@X[4],$SZ)', # X[9..10]
+       '&vpsrlq        ($t2,$t0,$sigma0[0])',
+        '&vpaddq       (@X[0],@X[0],$t3);',    # X[0..1] += X[9..10]
+       '&vpsrlq        ($t3,$t0,$sigma0[2])',
+       '&vpsllq        ($t1,$t0,8*$SZ-$sigma0[1]);',
+        '&vpxor        ($t0,$t3,$t2)',
+       '&vpsrlq        ($t2,$t2,$sigma0[1]-$sigma0[0]);',
+        '&vpxor        ($t0,$t0,$t1)',
+       '&vpsllq        ($t1,$t1,$sigma0[1]-$sigma0[0]);',
+        '&vpxor        ($t0,$t0,$t2)',
+        '&vpsrlq       ($t3,@X[7],$sigma1[2]);',
+       '&vpxor         ($t0,$t0,$t1)',         # sigma0(X[1..2])
+        '&vpsllq       ($t2,@X[7],8*$SZ-$sigma1[1]);',
+       '&vpaddq        (@X[0],@X[0],$t0)',     # X[0..1] += sigma0(X[1..2])
+        '&vpsrlq       ($t1,@X[7],$sigma1[0]);',
+        '&vpxor        ($t3,$t3,$t2)',
+        '&vpsllq       ($t2,$t2,$sigma1[1]-$sigma1[0]);',
+        '&vpxor        ($t3,$t3,$t1)',
+        '&vpsrlq       ($t1,$t1,$sigma1[1]-$sigma1[0]);',
+        '&vpxor        ($t3,$t3,$t2)',
+        '&vpxor        ($t3,$t3,$t1)',         # sigma1(X[14..15])
+       '&vpaddq        (@X[0],@X[0],$t3)',     # X[0..1] += sigma1(X[14..15])
+       );
+}
+
+sub AVX_512_00_47 () {
+my $j = shift;
+my $body = shift;
+my @X = @_;
+my @insns = (&$body,&$body);                   # 52 instructions
+
+       foreach (Xupdate_512_AVX()) {           # 23 instructions
+           eval;
+           eval(shift(@insns));
+           eval(shift(@insns));
+       }
+       &vpaddq         ($t2,@X[0],16*2*$j-0x80."($Tbl)");
+         foreach (@insns) { eval; }            # remaining instructions
+       &vmovdqa        (16*$j."(%rsp)",$t2);
+}
+
+    for ($i=0,$j=0; $j<8; $j++) {
+       &AVX_512_00_47($j,\&body_00_15,@X);
+       push(@X,shift(@X));                     # rotate(@X)
+    }
+       &cmpb   ($SZ-1+16*2*$SZ-0x80."($Tbl)",0);
+       &jne    (".Lavx_00_47");
+
+    for ($i=0; $i<16; ) {
+       foreach(body_00_15()) { eval; }
+    }
+}
+$code.=<<___;
+       mov     $_ctx,$ctx
+       mov     $a1,$A
+
+       add     $SZ*0($ctx),$A
+       lea     16*$SZ($inp),$inp
+       add     $SZ*1($ctx),$B
+       add     $SZ*2($ctx),$C
+       add     $SZ*3($ctx),$D
+       add     $SZ*4($ctx),$E
+       add     $SZ*5($ctx),$F
+       add     $SZ*6($ctx),$G
+       add     $SZ*7($ctx),$H
+
+       cmp     $_end,$inp
+
+       mov     $A,$SZ*0($ctx)
+       mov     $B,$SZ*1($ctx)
+       mov     $C,$SZ*2($ctx)
+       mov     $D,$SZ*3($ctx)
+       mov     $E,$SZ*4($ctx)
+       mov     $F,$SZ*5($ctx)
+       mov     $G,$SZ*6($ctx)
+       mov     $H,$SZ*7($ctx)
+       jb      .Lloop_avx
+
+       mov     $_rsp,%rsi
+       vzeroupper
+___
+$code.=<<___ if ($win64);
+       movaps  16*$SZ+32(%rsp),%xmm6
+       movaps  16*$SZ+48(%rsp),%xmm7
+       movaps  16*$SZ+64(%rsp),%xmm8
+       movaps  16*$SZ+80(%rsp),%xmm9
+___
+$code.=<<___ if ($win64 && $SZ>4);
+       movaps  16*$SZ+96(%rsp),%xmm10
+       movaps  16*$SZ+112(%rsp),%xmm11
+___
+$code.=<<___;
+       mov     (%rsi),%r15
+       mov     8(%rsi),%r14
+       mov     16(%rsi),%r13
+       mov     24(%rsi),%r12
+       mov     32(%rsi),%rbp
+       mov     40(%rsi),%rbx
+       lea     48(%rsi),%rsp
+.Lepilogue_avx:
+       ret
+.size  ${func}_avx,.-${func}_avx
+___
+
+if ($avx>1) {{
+######################################################################
+# AVX2+BMI code path
+#
+my $a5=$SZ==4?"%esi":"%rsi";   # zap $inp 
+my $PUSH8=8*2*$SZ;
+use integer;
+
+sub bodyx_00_15 () {
+       # at start $a1 should be zero, $a3 - $b^$c and $a4 copy of $f
+       (
+       '($a,$b,$c,$d,$e,$f,$g,$h)=@ROT;'.
+
+       '&add   ($h,(32*($i/(16/$SZ))+$SZ*($i%(16/$SZ)))%$PUSH8.$base)',    # h+=X[i]+K[i]
+       '&and   ($a4,$e)',              # f&e
+       '&rorx  ($a0,$e,$Sigma1[2])',
+       '&rorx  ($a2,$e,$Sigma1[1])',
+
+       '&lea   ($a,"($a,$a1)")',       # h+=Sigma0(a) from the past
+       '&lea   ($h,"($h,$a4)")',
+       '&andn  ($a4,$e,$g)',           # ~e&g
+       '&xor   ($a0,$a2)',
+
+       '&rorx  ($a1,$e,$Sigma1[0])',
+       '&lea   ($h,"($h,$a4)")',       # h+=Ch(e,f,g)=(e&f)+(~e&g)
+       '&xor   ($a0,$a1)',             # Sigma1(e)
+       '&mov   ($a2,$a)',
+
+       '&rorx  ($a4,$a,$Sigma0[2])',
+       '&lea   ($h,"($h,$a0)")',       # h+=Sigma1(e)
+       '&xor   ($a2,$b)',              # a^b, b^c in next round
+       '&rorx  ($a1,$a,$Sigma0[1])',
+
+       '&rorx  ($a0,$a,$Sigma0[0])',
+       '&lea   ($d,"($d,$h)")',        # d+=h
+       '&and   ($a3,$a2)',             # (b^c)&(a^b)
+       '&xor   ($a1,$a4)',
+
+       '&xor   ($a3,$b)',              # Maj(a,b,c)=Ch(a^b,c,b)
+       '&xor   ($a1,$a0)',             # Sigma0(a)
+       '&lea   ($h,"($h,$a3)");'.      # h+=Maj(a,b,c)
+       '&mov   ($a4,$e)',              # copy of f in future
+
+       '($a2,$a3) = ($a3,$a2); unshift(@ROT,pop(@ROT)); $i++;'
+       );
+       # and at the finish one has to $a+=$a1
+}
+
+$code.=<<___;
+.type  ${func}_avx2,\@function,3
+.align 64
+${func}_avx2:
+.Lavx2_shortcut:
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+       mov     %rsp,%r11               # copy %rsp
+       sub     \$`2*$SZ*$rounds+4*8+$win64*16*($SZ==4?4:6)`,%rsp
+       shl     \$4,%rdx                # num*16
+       and     \$-256*$SZ,%rsp         # align stack frame
+       lea     ($inp,%rdx,$SZ),%rdx    # inp+num*16*$SZ
+       add     \$`2*$SZ*($rounds-8)`,%rsp
+       mov     $ctx,$_ctx              # save ctx, 1st arg
+       mov     $inp,$_inp              # save inp, 2nd arh
+       mov     %rdx,$_end              # save end pointer, "3rd" arg
+       mov     %r11,$_rsp              # save copy of %rsp
+___
+$code.=<<___ if ($win64);
+       movaps  %xmm6,16*$SZ+32(%rsp)
+       movaps  %xmm7,16*$SZ+48(%rsp)
+       movaps  %xmm8,16*$SZ+64(%rsp)
+       movaps  %xmm9,16*$SZ+80(%rsp)
+___
+$code.=<<___ if ($win64 && $SZ>4);
+       movaps  %xmm10,16*$SZ+96(%rsp)
+       movaps  %xmm11,16*$SZ+112(%rsp)
+___
+$code.=<<___;
+.Lprologue_avx2:
+
+       vzeroupper
+       sub     \$-16*$SZ,$inp          # inp++, size optimization
+       mov     $SZ*0($ctx),$A
+       mov     $inp,%r12               # borrow $T1
+       mov     $SZ*1($ctx),$B
+       cmp     %rdx,$inp               # $_end
+       mov     $SZ*2($ctx),$C
+       cmove   %rsp,%r12               # next block or random data
+       mov     $SZ*3($ctx),$D
+       mov     $SZ*4($ctx),$E
+       mov     $SZ*5($ctx),$F
+       mov     $SZ*6($ctx),$G
+       mov     $SZ*7($ctx),$H
+___
+                                       if ($SZ==4) {   # SHA256
+    my @X = map("%ymm$_",(0..3));
+    my ($t0,$t1,$t2,$t3, $t4,$t5) = map("%ymm$_",(4..9));
+
+$code.=<<___;
+       vmovdqa $TABLE+`$SZ*2*$rounds`+32(%rip),$t4
+       vmovdqa $TABLE+`$SZ*2*$rounds`+64(%rip),$t5
+       jmp     .Loop_avx2
+.align 16
+.Loop_avx2:
+       vmovdqa $TABLE+`$SZ*2*$rounds`(%rip),$t3
+       vmovdqu -16*$SZ+0($inp),%xmm0
+       vmovdqu -16*$SZ+16($inp),%xmm1
+       vmovdqu -16*$SZ+32($inp),%xmm2
+       vmovdqu -16*$SZ+48($inp),%xmm3
+       #mov            $inp,$_inp      # offload $inp
+       vinserti128     \$1,(%r12),@X[0],@X[0]
+       vinserti128     \$1,16(%r12),@X[1],@X[1]
+       vpshufb         $t3,@X[0],@X[0]
+       vinserti128     \$1,32(%r12),@X[2],@X[2]
+       vpshufb         $t3,@X[1],@X[1]
+       vinserti128     \$1,48(%r12),@X[3],@X[3]
+
+       lea     $TABLE(%rip),$Tbl
+       vpshufb $t3,@X[2],@X[2]
+       vpaddd  0x00($Tbl),@X[0],$t0
+       vpshufb $t3,@X[3],@X[3]
+       vpaddd  0x20($Tbl),@X[1],$t1
+       vpaddd  0x40($Tbl),@X[2],$t2
+       vpaddd  0x60($Tbl),@X[3],$t3
+       vmovdqa $t0,0x00(%rsp)
+       xor     $a1,$a1
+       vmovdqa $t1,0x20(%rsp)
+       lea     -$PUSH8(%rsp),%rsp
+       mov     $B,$a3
+       vmovdqa $t2,0x00(%rsp)
+       xor     $C,$a3                  # magic
+       vmovdqa $t3,0x20(%rsp)
+       mov     $F,$a4
+       sub     \$-16*2*$SZ,$Tbl        # size optimization
+       jmp     .Lavx2_00_47
+
+.align 16
+.Lavx2_00_47:
+___
+
+sub AVX2_256_00_47 () {
+my $j = shift;
+my $body = shift;
+my @X = @_;
+my @insns = (&$body,&$body,&$body,&$body);     # 96 instructions
+my $base = "+2*$PUSH8(%rsp)";
+
+       &lea    ("%rsp","-$PUSH8(%rsp)")        if (($j%2)==0);
+       foreach (Xupdate_256_AVX()) {           # 29 instructions
+           eval;
+           eval(shift(@insns));
+           eval(shift(@insns));
+           eval(shift(@insns));
+       }
+       &vpaddd         ($t2,@X[0],16*2*$j."($Tbl)");
+         foreach (@insns) { eval; }            # remaining instructions
+       &vmovdqa        ((32*$j)%$PUSH8."(%rsp)",$t2);
+}
+
+    for ($i=0,$j=0; $j<4; $j++) {
+       &AVX2_256_00_47($j,\&bodyx_00_15,@X);
+       push(@X,shift(@X));                     # rotate(@X)
+    }
+       &lea    ($Tbl,16*2*$SZ."($Tbl)");
+       &cmpb   (($SZ-1)."($Tbl)",0);
+       &jne    (".Lavx2_00_47");
+
+    for ($i=0; $i<16; ) {
+       my $base=$i<8?"+$PUSH8(%rsp)":"(%rsp)";
+       foreach(bodyx_00_15()) { eval; }
+    }
+                                       } else {        # SHA512
+    my @X = map("%ymm$_",(0..7));
+    my ($t0,$t1,$t2,$t3) = map("%ymm$_",(8..11));
+
+$code.=<<___;
+       jmp     .Loop_avx2
+.align 16
+.Loop_avx2:
+       vmovdqu -16*$SZ($inp),%xmm0
+       vmovdqu -16*$SZ+16($inp),%xmm1
+       vmovdqu -16*$SZ+32($inp),%xmm2
+       lea     $TABLE+0x80(%rip),$Tbl  # size optimization
+       vmovdqu -16*$SZ+48($inp),%xmm3
+       vmovdqu -16*$SZ+64($inp),%xmm4
+       vmovdqu -16*$SZ+80($inp),%xmm5
+       vmovdqu -16*$SZ+96($inp),%xmm6
+       vmovdqu -16*$SZ+112($inp),%xmm7
+       #mov    $inp,$_inp      # offload $inp
+       vmovdqa `$SZ*2*$rounds-0x80`($Tbl),$t2
+       vinserti128     \$1,(%r12),@X[0],@X[0]
+       vinserti128     \$1,16(%r12),@X[1],@X[1]
+        vpshufb        $t2,@X[0],@X[0]
+       vinserti128     \$1,32(%r12),@X[2],@X[2]
+        vpshufb        $t2,@X[1],@X[1]
+       vinserti128     \$1,48(%r12),@X[3],@X[3]
+        vpshufb        $t2,@X[2],@X[2]
+       vinserti128     \$1,64(%r12),@X[4],@X[4]
+        vpshufb        $t2,@X[3],@X[3]
+       vinserti128     \$1,80(%r12),@X[5],@X[5]
+        vpshufb        $t2,@X[4],@X[4]
+       vinserti128     \$1,96(%r12),@X[6],@X[6]
+        vpshufb        $t2,@X[5],@X[5]
+       vinserti128     \$1,112(%r12),@X[7],@X[7]
+
+       vpaddq  -0x80($Tbl),@X[0],$t0
+       vpshufb $t2,@X[6],@X[6]
+       vpaddq  -0x60($Tbl),@X[1],$t1
+       vpshufb $t2,@X[7],@X[7]
+       vpaddq  -0x40($Tbl),@X[2],$t2
+       vpaddq  -0x20($Tbl),@X[3],$t3
+       vmovdqa $t0,0x00(%rsp)
+       vpaddq  0x00($Tbl),@X[4],$t0
+       vmovdqa $t1,0x20(%rsp)
+       vpaddq  0x20($Tbl),@X[5],$t1
+       vmovdqa $t2,0x40(%rsp)
+       vpaddq  0x40($Tbl),@X[6],$t2
+       vmovdqa $t3,0x60(%rsp)
+       lea     -$PUSH8(%rsp),%rsp
+       vpaddq  0x60($Tbl),@X[7],$t3
+       vmovdqa $t0,0x00(%rsp)
+       xor     $a1,$a1
+       vmovdqa $t1,0x20(%rsp)
+       mov     $B,$a3
+       vmovdqa $t2,0x40(%rsp)
+       xor     $C,$a3                  # magic
+       vmovdqa $t3,0x60(%rsp)
+       mov     $F,$a4
+       add     \$16*2*$SZ,$Tbl
+       jmp     .Lavx2_00_47
+
+.align 16
+.Lavx2_00_47:
+___
+
+sub AVX2_512_00_47 () {
+my $j = shift;
+my $body = shift;
+my @X = @_;
+my @insns = (&$body,&$body);                   # 48 instructions
+my $base = "+2*$PUSH8(%rsp)";
+
+       &lea    ("%rsp","-$PUSH8(%rsp)")        if (($j%4)==0);
+       foreach (Xupdate_512_AVX()) {           # 23 instructions
+           eval;
+           if ($_ !~ /\;$/) {
+               eval(shift(@insns));
+               eval(shift(@insns));
+               eval(shift(@insns));
+           }
+       }
+       &vpaddq         ($t2,@X[0],16*2*$j-0x80."($Tbl)");
+         foreach (@insns) { eval; }            # remaining instructions
+       &vmovdqa        ((32*$j)%$PUSH8."(%rsp)",$t2);
+}
+
+    for ($i=0,$j=0; $j<8; $j++) {
+       &AVX2_512_00_47($j,\&bodyx_00_15,@X);
+       push(@X,shift(@X));                     # rotate(@X)
+    }
+       &lea    ($Tbl,16*2*$SZ."($Tbl)");
+       &cmpb   (($SZ-1-0x80)."($Tbl)",0);
+       &jne    (".Lavx2_00_47");
+
+    for ($i=0; $i<16; ) {
+       my $base=$i<8?"+$PUSH8(%rsp)":"(%rsp)";
+       foreach(bodyx_00_15()) { eval; }
+    }
+}
+$code.=<<___;
+       mov     `2*$SZ*$rounds`(%rsp),$ctx      # $_ctx
+       add     $a1,$A
+       #mov    `2*$SZ*$rounds+8`(%rsp),$inp    # $_inp
+       lea     `2*$SZ*($rounds-8)`(%rsp),$Tbl
+
+       add     $SZ*0($ctx),$A
+       add     $SZ*1($ctx),$B
+       add     $SZ*2($ctx),$C
+       add     $SZ*3($ctx),$D
+       add     $SZ*4($ctx),$E
+       add     $SZ*5($ctx),$F
+       add     $SZ*6($ctx),$G
+       add     $SZ*7($ctx),$H
+
+       mov     $A,$SZ*0($ctx)
+       mov     $B,$SZ*1($ctx)
+       mov     $C,$SZ*2($ctx)
+       mov     $D,$SZ*3($ctx)
+       mov     $E,$SZ*4($ctx)
+       mov     $F,$SZ*5($ctx)
+       mov     $G,$SZ*6($ctx)
+       mov     $H,$SZ*7($ctx)
+
+       cmp     `$PUSH8+2*8`($Tbl),$inp # $_end
+       je      .Ldone_avx2
+
+       xor     $a1,$a1
+       mov     $B,$a3
+       xor     $C,$a3                  # magic
+       mov     $F,$a4
+       jmp     .Lower_avx2
+.align 16
+.Lower_avx2:
+___
+    for ($i=0; $i<8; ) {
+       my $base="+16($Tbl)";
+       foreach(bodyx_00_15()) { eval; }
+    }
+$code.=<<___;
+       lea     -$PUSH8($Tbl),$Tbl
+       cmp     %rsp,$Tbl
+       jae     .Lower_avx2
+
+       mov     `2*$SZ*$rounds`(%rsp),$ctx      # $_ctx
+       add     $a1,$A
+       #mov    `2*$SZ*$rounds+8`(%rsp),$inp    # $_inp
+       lea     `2*$SZ*($rounds-8)`(%rsp),%rsp
+
+       add     $SZ*0($ctx),$A
+       add     $SZ*1($ctx),$B
+       add     $SZ*2($ctx),$C
+       add     $SZ*3($ctx),$D
+       add     $SZ*4($ctx),$E
+       add     $SZ*5($ctx),$F
+       lea     `2*16*$SZ`($inp),$inp   # inp+=2
+       add     $SZ*6($ctx),$G
+       mov     $inp,%r12
+       add     $SZ*7($ctx),$H
+       cmp     $_end,$inp
+
+       mov     $A,$SZ*0($ctx)
+       cmove   %rsp,%r12               # next block or stale data
+       mov     $B,$SZ*1($ctx)
+       mov     $C,$SZ*2($ctx)
+       mov     $D,$SZ*3($ctx)
+       mov     $E,$SZ*4($ctx)
+       mov     $F,$SZ*5($ctx)
+       mov     $G,$SZ*6($ctx)
+       mov     $H,$SZ*7($ctx)
+
+       jbe     .Loop_avx2
+       lea     (%rsp),$Tbl
+
+.Ldone_avx2:
+       lea     ($Tbl),%rsp
+       mov     $_rsp,%rsi
+       vzeroupper
+___
+$code.=<<___ if ($win64);
+       movaps  16*$SZ+32(%rsp),%xmm6
+       movaps  16*$SZ+48(%rsp),%xmm7
+       movaps  16*$SZ+64(%rsp),%xmm8
+       movaps  16*$SZ+80(%rsp),%xmm9
+___
+$code.=<<___ if ($win64 && $SZ>4);
+       movaps  16*$SZ+96(%rsp),%xmm10
+       movaps  16*$SZ+112(%rsp),%xmm11
+___
+$code.=<<___;
+       mov     (%rsi),%r15
+       mov     8(%rsi),%r14
+       mov     16(%rsi),%r13
+       mov     24(%rsi),%r12
+       mov     32(%rsi),%rbp
+       mov     40(%rsi),%rbx
+       lea     48(%rsi),%rsp
+.Lepilogue_avx2:
+       ret
+.size  ${func}_avx2,.-${func}_avx2
+___
+}}
+}}}}}
+
+# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
+#              CONTEXT *context,DISPATCHER_CONTEXT *disp)
+if ($win64) {
+$rec="%rcx";
+$frame="%rdx";
+$context="%r8";
+$disp="%r9";
+
+$code.=<<___;
+.extern        __imp_RtlVirtualUnwind
+.type  se_handler,\@abi-omnipotent
+.align 16
+se_handler:
+       push    %rsi
+       push    %rdi
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+       pushfq
+       sub     \$64,%rsp
+
+       mov     120($context),%rax      # pull context->Rax
+       mov     248($context),%rbx      # pull context->Rip
+
+       mov     8($disp),%rsi           # disp->ImageBase
+       mov     56($disp),%r11          # disp->HanderlData
+
+       mov     0(%r11),%r10d           # HandlerData[0]
+       lea     (%rsi,%r10),%r10        # prologue label
+       cmp     %r10,%rbx               # context->Rip<prologue label
+       jb      .Lin_prologue
+
+       mov     152($context),%rax      # pull context->Rsp
+
+       mov     4(%r11),%r10d           # HandlerData[1]
+       lea     (%rsi,%r10),%r10        # epilogue label
+       cmp     %r10,%rbx               # context->Rip>=epilogue label
+       jae     .Lin_prologue
+___
+$code.=<<___ if ($avx>1);
+       lea     .Lavx2_shortcut(%rip),%r10
+       cmp     %r10,%rbx               # context->Rip<avx2_shortcut
+       jb      .Lnot_in_avx2
+
+       and     \$-256*$SZ,%rax
+       add     \$`2*$SZ*($rounds-8)`,%rax
+.Lnot_in_avx2:
+___
+$code.=<<___;
+       mov     %rax,%rsi               # put aside Rsp
+       mov     16*$SZ+3*8(%rax),%rax   # pull $_rsp
+       lea     48(%rax),%rax
+
+       mov     -8(%rax),%rbx
+       mov     -16(%rax),%rbp
+       mov     -24(%rax),%r12
+       mov     -32(%rax),%r13
+       mov     -40(%rax),%r14
+       mov     -48(%rax),%r15
+       mov     %rbx,144($context)      # restore context->Rbx
+       mov     %rbp,160($context)      # restore context->Rbp
+       mov     %r12,216($context)      # restore context->R12
+       mov     %r13,224($context)      # restore context->R13
+       mov     %r14,232($context)      # restore context->R14
+       mov     %r15,240($context)      # restore context->R15
+
+       lea     .Lepilogue(%rip),%r10
+       cmp     %r10,%rbx
+       jb      .Lin_prologue           # non-AVX code
+
+       lea     16*$SZ+4*8(%rsi),%rsi   # Xmm6- save area
+       lea     512($context),%rdi      # &context.Xmm6
+       mov     \$`$SZ==4?8:12`,%ecx
+       .long   0xa548f3fc              # cld; rep movsq
+
+.Lin_prologue:
+       mov     8(%rax),%rdi
+       mov     16(%rax),%rsi
+       mov     %rax,152($context)      # restore context->Rsp
+       mov     %rsi,168($context)      # restore context->Rsi
+       mov     %rdi,176($context)      # restore context->Rdi
+
+       mov     40($disp),%rdi          # disp->ContextRecord
+       mov     $context,%rsi           # context
+       mov     \$154,%ecx              # sizeof(CONTEXT)
+       .long   0xa548f3fc              # cld; rep movsq
+
+       mov     $disp,%rsi
+       xor     %rcx,%rcx               # arg1, UNW_FLAG_NHANDLER
+       mov     8(%rsi),%rdx            # arg2, disp->ImageBase
+       mov     0(%rsi),%r8             # arg3, disp->ControlPc
+       mov     16(%rsi),%r9            # arg4, disp->FunctionEntry
+       mov     40(%rsi),%r10           # disp->ContextRecord
+       lea     56(%rsi),%r11           # &disp->HandlerData
+       lea     24(%rsi),%r12           # &disp->EstablisherFrame
+       mov     %r10,32(%rsp)           # arg5
+       mov     %r11,40(%rsp)           # arg6
        mov     %r12,48(%rsp)           # arg7
        mov     %rcx,56(%rsp)           # arg8, (NULL)
        call    *__imp_RtlVirtualUnwind(%rip)
@@ -431,21 +2263,136 @@ se_handler:
        pop     %rsi
        ret
 .size  se_handler,.-se_handler
+___
+
+$code.=<<___ if ($SZ==4 && $shaext);
+.type  shaext_handler,\@abi-omnipotent
+.align 16
+shaext_handler:
+       push    %rsi
+       push    %rdi
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+       pushfq
+       sub     \$64,%rsp
+
+       mov     120($context),%rax      # pull context->Rax
+       mov     248($context),%rbx      # pull context->Rip
+
+       lea     .Lprologue_shaext(%rip),%r10
+       cmp     %r10,%rbx               # context->Rip<.Lprologue
+       jb      .Lin_prologue
+
+       lea     .Lepilogue_shaext(%rip),%r10
+       cmp     %r10,%rbx               # context->Rip>=.Lepilogue
+       jae     .Lin_prologue
+
+       lea     -8-5*16(%rax),%rsi
+       lea     512($context),%rdi      # &context.Xmm6
+       mov     \$10,%ecx
+       .long   0xa548f3fc              # cld; rep movsq
+
+       jmp     .Lin_prologue
+.size  shaext_handler,.-shaext_handler
+___
 
+$code.=<<___;
 .section       .pdata
 .align 4
        .rva    .LSEH_begin_$func
        .rva    .LSEH_end_$func
        .rva    .LSEH_info_$func
-
+___
+$code.=<<___ if ($SZ==4 && $shaext);
+       .rva    .LSEH_begin_${func}_shaext
+       .rva    .LSEH_end_${func}_shaext
+       .rva    .LSEH_info_${func}_shaext
+___
+$code.=<<___ if ($SZ==4);
+       .rva    .LSEH_begin_${func}_ssse3
+       .rva    .LSEH_end_${func}_ssse3
+       .rva    .LSEH_info_${func}_ssse3
+___
+$code.=<<___ if ($avx && $SZ==8);
+       .rva    .LSEH_begin_${func}_xop
+       .rva    .LSEH_end_${func}_xop
+       .rva    .LSEH_info_${func}_xop
+___
+$code.=<<___ if ($avx);
+       .rva    .LSEH_begin_${func}_avx
+       .rva    .LSEH_end_${func}_avx
+       .rva    .LSEH_info_${func}_avx
+___
+$code.=<<___ if ($avx>1);
+       .rva    .LSEH_begin_${func}_avx2
+       .rva    .LSEH_end_${func}_avx2
+       .rva    .LSEH_info_${func}_avx2
+___
+$code.=<<___;
 .section       .xdata
 .align 8
 .LSEH_info_$func:
        .byte   9,0,0,0
        .rva    se_handler
+       .rva    .Lprologue,.Lepilogue                   # HandlerData[]
+___
+$code.=<<___ if ($SZ==4 && $shaext);
+.LSEH_info_${func}_shaext:
+       .byte   9,0,0,0
+       .rva    shaext_handler
+___
+$code.=<<___ if ($SZ==4);
+.LSEH_info_${func}_ssse3:
+       .byte   9,0,0,0
+       .rva    se_handler
+       .rva    .Lprologue_ssse3,.Lepilogue_ssse3       # HandlerData[]
 ___
+$code.=<<___ if ($avx && $SZ==8);
+.LSEH_info_${func}_xop:
+       .byte   9,0,0,0
+       .rva    se_handler
+       .rva    .Lprologue_xop,.Lepilogue_xop           # HandlerData[]
+___
+$code.=<<___ if ($avx);
+.LSEH_info_${func}_avx:
+       .byte   9,0,0,0
+       .rva    se_handler
+       .rva    .Lprologue_avx,.Lepilogue_avx           # HandlerData[]
+___
+$code.=<<___ if ($avx>1);
+.LSEH_info_${func}_avx2:
+       .byte   9,0,0,0
+       .rva    se_handler
+       .rva    .Lprologue_avx2,.Lepilogue_avx2         # HandlerData[]
+___
+}
+
+sub sha256op38 {
+    my $instr = shift;
+    my %opcodelet = (
+               "sha256rnds2" => 0xcb,
+               "sha256msg1"  => 0xcc,
+               "sha256msg2"  => 0xcd   );
+
+    if (defined($opcodelet{$instr}) && @_[0] =~ /%xmm([0-7]),\s*%xmm([0-7])/) {
+      my @opcode=(0x0f,0x38);
+       push @opcode,$opcodelet{$instr};
+       push @opcode,0xc0|($1&7)|(($2&7)<<3);           # ModR/M
+       return ".byte\t".join(',',@opcode);
+    } else {
+       return $instr."\t".@_[0];
+    }
 }
 
-$code =~ s/\`([^\`]*)\`/eval $1/gem;
-print $code;
+foreach (split("\n",$code)) {
+       s/\`([^\`]*)\`/eval $1/geo;
+
+       s/\b(sha256[^\s]*)\s+(.*)/sha256op38($1,$2)/geo;
+
+       print $_,"\n";
+}
 close STDOUT;
diff --git a/crypto/sha/asm/sha512p8-ppc.pl b/crypto/sha/asm/sha512p8-ppc.pl
new file mode 100755 (executable)
index 0000000..4718950
--- /dev/null
@@ -0,0 +1,424 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+
+# SHA256/512 for PowerISA v2.07.
+#
+# Accurate performance measurements are problematic, because it's
+# always virtualized setup with possibly throttled processor.
+# Relative comparison is therefore more informative. This module is
+# ~60% faster than integer-only sha512-ppc.pl. To anchor to something
+# else, SHA256 is 24% slower than sha1-ppc.pl and 2.5x slower than
+# hardware-assisted aes-128-cbc encrypt. SHA512 is 20% faster than
+# sha1-ppc.pl and 1.6x slower than aes-128-cbc. Another interesting
+# result is degree of computational resources' utilization. POWER8 is
+# "massively multi-threaded chip" and difference between single- and
+# maximum multi-process benchmark results tells that utlization is
+# whooping 94%. For sha512-ppc.pl we get [not unimpressive] 84% and
+# for sha1-ppc.pl - 73%. 100% means that multi-process result equals
+# to single-process one, given that all threads end up on the same
+# physical core.
+
+$flavour=shift;
+$output =shift;
+
+if ($flavour =~ /64/) {
+       $SIZE_T=8;
+       $LRSAVE=2*$SIZE_T;
+       $STU="stdu";
+       $POP="ld";
+       $PUSH="std";
+} elsif ($flavour =~ /32/) {
+       $SIZE_T=4;
+       $LRSAVE=$SIZE_T;
+       $STU="stwu";
+       $POP="lwz";
+       $PUSH="stw";
+} else { die "nonsense $flavour"; }
+
+$LENDIAN=($flavour=~/le/);
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
+die "can't locate ppc-xlate.pl";
+
+open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
+
+if ($output =~ /512/) {
+       $bits=512;
+       $SZ=8;
+       $sz="d";
+       $rounds=80;
+} else {
+       $bits=256;
+       $SZ=4;
+       $sz="w";
+       $rounds=64;
+}
+
+$func="sha${bits}_block_p8";
+$FRAME=8*$SIZE_T;
+
+$sp ="r1";
+$toc="r2";
+$ctx="r3";
+$inp="r4";
+$num="r5";
+$Tbl="r6";
+$idx="r7";
+$lrsave="r8";
+$offload="r11";
+$vrsave="r12";
+($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,10,26..31));
+ $x00=0 if ($flavour =~ /osx/);
+
+@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("v$_",(0..7));
+@X=map("v$_",(8..23));
+($Ki,$Func,$S0,$S1,$s0,$s1,$lemask)=map("v$_",(24..31));
+
+sub ROUND {
+my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
+my $j=($i+1)%16;
+
+$code.=<<___           if ($i<15 && ($i%(16/$SZ))==(16/$SZ-1));
+       lvx_u           @X[$i+1],0,$inp         ; load X[i] in advance
+       addi            $inp,$inp,16
+___
+$code.=<<___           if ($i<16 && ($i%(16/$SZ)));
+       vsldoi          @X[$i],@X[$i-1],@X[$i-1],$SZ
+___
+$code.=<<___           if ($LENDIAN && $i<16 && ($i%(16/$SZ))==0);
+       vperm           @X[$i],@X[$i],@X[$i],$lemask
+___
+$code.=<<___;
+       `"vshasigma${sz}        $s0,@X[($j+1)%16],0,0"          if ($i>=15)`
+       vsel            $Func,$g,$f,$e          ; Ch(e,f,g)
+       vshasigma${sz}  $S1,$e,1,15             ; Sigma1(e)
+       vaddu${sz}m     $h,$h,@X[$i%16]         ; h+=X[i]
+       vshasigma${sz}  $S0,$a,1,0              ; Sigma0(a)
+       `"vshasigma${sz}        $s1,@X[($j+14)%16],0,15"        if ($i>=15)`
+       vaddu${sz}m     $h,$h,$Func             ; h+=Ch(e,f,g)
+       vxor            $Func,$a,$b
+       `"vaddu${sz}m           @X[$j],@X[$j],@X[($j+9)%16]"    if ($i>=15)`
+       vaddu${sz}m     $h,$h,$S1               ; h+=Sigma1(e)
+       vsel            $Func,$b,$c,$Func       ; Maj(a,b,c)
+       vaddu${sz}m     $g,$g,$Ki               ; future h+=K[i]
+       vaddu${sz}m     $d,$d,$h                ; d+=h
+       vaddu${sz}m     $S0,$S0,$Func           ; Sigma0(a)+Maj(a,b,c)
+       `"vaddu${sz}m           @X[$j],@X[$j],$s0"              if ($i>=15)`
+       lvx             $Ki,$idx,$Tbl           ; load next K[i]
+       addi            $idx,$idx,16
+       vaddu${sz}m     $h,$h,$S0               ; h+=Sigma0(a)+Maj(a,b,c)
+       `"vaddu${sz}m           @X[$j],@X[$j],$s1"              if ($i>=15)`
+___
+}
+
+$code=<<___;
+.machine       "any"
+.text
+
+.globl $func
+.align 6
+$func:
+       $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
+       mflr            $lrsave
+       li              r10,`$FRAME+8*16+15`
+       li              r11,`$FRAME+8*16+31`
+       stvx            v20,r10,$sp             # ABI says so
+       addi            r10,r10,32
+       mfspr           $vrsave,256
+       stvx            v21,r11,$sp
+       addi            r11,r11,32
+       stvx            v22,r10,$sp
+       addi            r10,r10,32
+       stvx            v23,r11,$sp
+       addi            r11,r11,32
+       stvx            v24,r10,$sp
+       addi            r10,r10,32
+       stvx            v25,r11,$sp
+       addi            r11,r11,32
+       stvx            v26,r10,$sp
+       addi            r10,r10,32
+       stvx            v27,r11,$sp
+       addi            r11,r11,32
+       stvx            v28,r10,$sp
+       addi            r10,r10,32
+       stvx            v29,r11,$sp
+       addi            r11,r11,32
+       stvx            v30,r10,$sp
+       stvx            v31,r11,$sp
+       li              r11,-1
+       stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
+       li              $x10,0x10
+       $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+       li              $x20,0x20
+       $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+       li              $x30,0x30
+       $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+       li              $x40,0x40
+       $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+       li              $x50,0x50
+       $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+       li              $x60,0x60
+       $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+       li              $x70,0x70
+       $PUSH           $lrsave,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
+       mtspr           256,r11
+
+       bl              LPICmeup
+       addi            $offload,$sp,$FRAME+15
+___
+$code.=<<___           if ($LENDIAN);
+       li              $idx,8
+       lvsl            $lemask,0,$idx
+       vspltisb        $Ki,0x0f
+       vxor            $lemask,$lemask,$Ki
+___
+$code.=<<___           if ($SZ==4);
+       lvx_4w          $A,$x00,$ctx
+       lvx_4w          $E,$x10,$ctx
+       vsldoi          $B,$A,$A,4              # unpack
+       vsldoi          $C,$A,$A,8
+       vsldoi          $D,$A,$A,12
+       vsldoi          $F,$E,$E,4
+       vsldoi          $G,$E,$E,8
+       vsldoi          $H,$E,$E,12
+___
+$code.=<<___           if ($SZ==8);
+       lvx_u           $A,$x00,$ctx
+       lvx_u           $C,$x10,$ctx
+       lvx_u           $E,$x20,$ctx
+       vsldoi          $B,$A,$A,8              # unpack
+       lvx_u           $G,$x30,$ctx
+       vsldoi          $D,$C,$C,8
+       vsldoi          $F,$E,$E,8
+       vsldoi          $H,$G,$G,8
+___
+$code.=<<___;
+       li              r0,`($rounds-16)/16`    # inner loop counter
+       b               Loop
+.align 5
+Loop:
+       lvx             $Ki,$x00,$Tbl
+       li              $idx,16
+       lvx_u           @X[0],0,$inp
+       addi            $inp,$inp,16
+       stvx            $A,$x00,$offload        # offload $A-$H
+       stvx            $B,$x10,$offload
+       stvx            $C,$x20,$offload
+       stvx            $D,$x30,$offload
+       stvx            $E,$x40,$offload
+       stvx            $F,$x50,$offload
+       stvx            $G,$x60,$offload
+       stvx            $H,$x70,$offload
+       vaddu${sz}m     $H,$H,$Ki               # h+K[i]
+       lvx             $Ki,$idx,$Tbl
+       addi            $idx,$idx,16
+___
+for ($i=0;$i<16;$i++)  { &ROUND($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+       mtctr           r0
+       b               L16_xx
+.align 5
+L16_xx:
+___
+for (;$i<32;$i++)      { &ROUND($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+       bdnz            L16_xx
+
+       lvx             @X[2],$x00,$offload
+       subic.          $num,$num,1
+       lvx             @X[3],$x10,$offload
+       vaddu${sz}m     $A,$A,@X[2]
+       lvx             @X[4],$x20,$offload
+       vaddu${sz}m     $B,$B,@X[3]
+       lvx             @X[5],$x30,$offload
+       vaddu${sz}m     $C,$C,@X[4]
+       lvx             @X[6],$x40,$offload
+       vaddu${sz}m     $D,$D,@X[5]
+       lvx             @X[7],$x50,$offload
+       vaddu${sz}m     $E,$E,@X[6]
+       lvx             @X[8],$x60,$offload
+       vaddu${sz}m     $F,$F,@X[7]
+       lvx             @X[9],$x70,$offload
+       vaddu${sz}m     $G,$G,@X[8]
+       vaddu${sz}m     $H,$H,@X[9]
+       bne             Loop
+___
+$code.=<<___           if ($SZ==4);
+       lvx             @X[0],$idx,$Tbl
+       addi            $idx,$idx,16
+       vperm           $A,$A,$B,$Ki            # pack the answer
+       lvx             @X[1],$idx,$Tbl
+       vperm           $E,$E,$F,$Ki
+       vperm           $A,$A,$C,@X[0]
+       vperm           $E,$E,$G,@X[0]
+       vperm           $A,$A,$D,@X[1]
+       vperm           $E,$E,$H,@X[1]
+       stvx_4w         $A,$x00,$ctx
+       stvx_4w         $E,$x10,$ctx
+___
+$code.=<<___           if ($SZ==8);
+       vperm           $A,$A,$B,$Ki            # pack the answer
+       vperm           $C,$C,$D,$Ki
+       vperm           $E,$E,$F,$Ki
+       vperm           $G,$G,$H,$Ki
+       stvx_u          $A,$x00,$ctx
+       stvx_u          $C,$x10,$ctx
+       stvx_u          $E,$x20,$ctx
+       stvx_u          $G,$x30,$ctx
+___
+$code.=<<___;
+       li              r10,`$FRAME+8*16+15`
+       mtlr            $lrsave
+       li              r11,`$FRAME+8*16+31`
+       mtspr           256,$vrsave
+       lvx             v20,r10,$sp             # ABI says so
+       addi            r10,r10,32
+       lvx             v21,r11,$sp
+       addi            r11,r11,32
+       lvx             v22,r10,$sp
+       addi            r10,r10,32
+       lvx             v23,r11,$sp
+       addi            r11,r11,32
+       lvx             v24,r10,$sp
+       addi            r10,r10,32
+       lvx             v25,r11,$sp
+       addi            r11,r11,32
+       lvx             v26,r10,$sp
+       addi            r10,r10,32
+       lvx             v27,r11,$sp
+       addi            r11,r11,32
+       lvx             v28,r10,$sp
+       addi            r10,r10,32
+       lvx             v29,r11,$sp
+       addi            r11,r11,32
+       lvx             v30,r10,$sp
+       lvx             v31,r11,$sp
+       $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+       $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+       $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+       $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+       $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+       $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+       addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
+       blr
+       .long           0
+       .byte           0,12,4,1,0x80,6,3,0
+       .long           0
+.size  $func,.-$func
+___
+
+# Ugly hack here, because PPC assembler syntax seem to vary too
+# much from platforms to platform...
+$code.=<<___;
+.align 6
+LPICmeup:
+       mflr    r0
+       bcl     20,31,\$+4
+       mflr    $Tbl    ; vvvvvv "distance" between . and 1st data entry
+       addi    $Tbl,$Tbl,`64-8`
+       mtlr    r0
+       blr
+       .long   0
+       .byte   0,12,0x14,0,0,0,0,0
+       .space  `64-9*4`
+___
+
+if ($SZ==8) {
+    local *table = sub {
+       foreach(@_) { $code.=".quad     $_,$_\n"; }
+    };
+    table(
+       "0x428a2f98d728ae22","0x7137449123ef65cd",
+       "0xb5c0fbcfec4d3b2f","0xe9b5dba58189dbbc",
+       "0x3956c25bf348b538","0x59f111f1b605d019",
+       "0x923f82a4af194f9b","0xab1c5ed5da6d8118",
+       "0xd807aa98a3030242","0x12835b0145706fbe",
+       "0x243185be4ee4b28c","0x550c7dc3d5ffb4e2",
+       "0x72be5d74f27b896f","0x80deb1fe3b1696b1",
+       "0x9bdc06a725c71235","0xc19bf174cf692694",
+       "0xe49b69c19ef14ad2","0xefbe4786384f25e3",
+       "0x0fc19dc68b8cd5b5","0x240ca1cc77ac9c65",
+       "0x2de92c6f592b0275","0x4a7484aa6ea6e483",
+       "0x5cb0a9dcbd41fbd4","0x76f988da831153b5",
+       "0x983e5152ee66dfab","0xa831c66d2db43210",
+       "0xb00327c898fb213f","0xbf597fc7beef0ee4",
+       "0xc6e00bf33da88fc2","0xd5a79147930aa725",
+       "0x06ca6351e003826f","0x142929670a0e6e70",
+       "0x27b70a8546d22ffc","0x2e1b21385c26c926",
+       "0x4d2c6dfc5ac42aed","0x53380d139d95b3df",
+       "0x650a73548baf63de","0x766a0abb3c77b2a8",
+       "0x81c2c92e47edaee6","0x92722c851482353b",
+       "0xa2bfe8a14cf10364","0xa81a664bbc423001",
+       "0xc24b8b70d0f89791","0xc76c51a30654be30",
+       "0xd192e819d6ef5218","0xd69906245565a910",
+       "0xf40e35855771202a","0x106aa07032bbd1b8",
+       "0x19a4c116b8d2d0c8","0x1e376c085141ab53",
+       "0x2748774cdf8eeb99","0x34b0bcb5e19b48a8",
+       "0x391c0cb3c5c95a63","0x4ed8aa4ae3418acb",
+       "0x5b9cca4f7763e373","0x682e6ff3d6b2b8a3",
+       "0x748f82ee5defb2fc","0x78a5636f43172f60",
+       "0x84c87814a1f0ab72","0x8cc702081a6439ec",
+       "0x90befffa23631e28","0xa4506cebde82bde9",
+       "0xbef9a3f7b2c67915","0xc67178f2e372532b",
+       "0xca273eceea26619c","0xd186b8c721c0c207",
+       "0xeada7dd6cde0eb1e","0xf57d4f7fee6ed178",
+       "0x06f067aa72176fba","0x0a637dc5a2c898a6",
+       "0x113f9804bef90dae","0x1b710b35131c471b",
+       "0x28db77f523047d84","0x32caab7b40c72493",
+       "0x3c9ebe0a15c9bebc","0x431d67c49c100d4c",
+       "0x4cc5d4becb3e42b6","0x597f299cfc657e2a",
+       "0x5fcb6fab3ad6faec","0x6c44198c4a475817","0");
+$code.=<<___   if (!$LENDIAN);
+.quad  0x0001020304050607,0x1011121314151617
+___
+$code.=<<___   if ($LENDIAN);  # quad-swapped
+.quad  0x1011121314151617,0x0001020304050607
+___
+} else {
+    local *table = sub {
+       foreach(@_) { $code.=".long     $_,$_,$_,$_\n"; }
+    };
+    table(
+       "0x428a2f98","0x71374491","0xb5c0fbcf","0xe9b5dba5",
+       "0x3956c25b","0x59f111f1","0x923f82a4","0xab1c5ed5",
+       "0xd807aa98","0x12835b01","0x243185be","0x550c7dc3",
+       "0x72be5d74","0x80deb1fe","0x9bdc06a7","0xc19bf174",
+       "0xe49b69c1","0xefbe4786","0x0fc19dc6","0x240ca1cc",
+       "0x2de92c6f","0x4a7484aa","0x5cb0a9dc","0x76f988da",
+       "0x983e5152","0xa831c66d","0xb00327c8","0xbf597fc7",
+       "0xc6e00bf3","0xd5a79147","0x06ca6351","0x14292967",
+       "0x27b70a85","0x2e1b2138","0x4d2c6dfc","0x53380d13",
+       "0x650a7354","0x766a0abb","0x81c2c92e","0x92722c85",
+       "0xa2bfe8a1","0xa81a664b","0xc24b8b70","0xc76c51a3",
+       "0xd192e819","0xd6990624","0xf40e3585","0x106aa070",
+       "0x19a4c116","0x1e376c08","0x2748774c","0x34b0bcb5",
+       "0x391c0cb3","0x4ed8aa4a","0x5b9cca4f","0x682e6ff3",
+       "0x748f82ee","0x78a5636f","0x84c87814","0x8cc70208",
+       "0x90befffa","0xa4506ceb","0xbef9a3f7","0xc67178f2","0");
+$code.=<<___   if (!$LENDIAN);
+.long  0x00010203,0x10111213,0x10111213,0x10111213
+.long  0x00010203,0x04050607,0x10111213,0x10111213
+.long  0x00010203,0x04050607,0x08090a0b,0x10111213
+___
+$code.=<<___   if ($LENDIAN);  # word-swapped
+.long  0x10111213,0x10111213,0x10111213,0x00010203
+.long  0x10111213,0x10111213,0x04050607,0x00010203
+.long  0x10111213,0x08090a0b,0x04050607,0x00010203
+___
+}
+$code.=<<___;
+.asciz "SHA${bits} for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
+.align 2
+___
+
+$code =~ s/\`([^\`]*)\`/eval $1/gem;
+print $code;
+close STDOUT;
index de0aad8..3bf66ae 100644 (file)
@@ -55,6 +55,7 @@ const char SHA512_version[] = "SHA-512" OPENSSL_VERSION_PTEXT;
 # if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
     defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
     defined(__s390__) || defined(__s390x__) || \
+    defined(__aarch64__) || \
     defined(SHA512_ASM)
 #  define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
 # endif
@@ -353,6 +354,18 @@ static const SHA_LONG64 K512[80] = {
                                 asm ("rotrdi %0,%1,%2"  \
                                 : "=r"(ret)             \
                                 : "r"(a),"K"(n)); ret;  })
+#    elif defined(__aarch64__)
+#     define ROTR(a,n)    ({ SHA_LONG64 ret;              \
+                                asm ("ror %0,%1,%2"     \
+                                : "=r"(ret)             \
+                                : "r"(a),"I"(n)); ret;  })
+#     if  defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
+        __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
+#      define PULL64(x)   ({ SHA_LONG64 ret;                      \
+                                asm ("rev       %0,%1"          \
+                                : "=r"(ret)                     \
+                                : "r"(*((const SHA_LONG64 *)(&(x))))); ret;             })
+#     endif
 #    endif
 #   elif defined(_MSC_VER)
 #    if defined(_WIN64)         /* applies to both IA-64 and AMD64 */
diff --git a/crypto/sparc_arch.h b/crypto/sparc_arch.h
new file mode 100644 (file)
index 0000000..e30d322
--- /dev/null
@@ -0,0 +1,101 @@
+#ifndef __SPARC_ARCH_H__
+# define __SPARC_ARCH_H__
+
+# define SPARCV9_TICK_PRIVILEGED (1<<0)
+# define SPARCV9_PREFER_FPU      (1<<1)
+# define SPARCV9_VIS1            (1<<2)
+# define SPARCV9_VIS2            (1<<3)/* reserved */
+# define SPARCV9_FMADD           (1<<4)/* reserved for SPARC64 V */
+# define SPARCV9_BLK             (1<<5)/* VIS1 block copy */
+# define SPARCV9_VIS3            (1<<6)
+# define SPARCV9_RANDOM          (1<<7)
+# define SPARCV9_64BIT_STACK     (1<<8)
+
+/*
+ * OPENSSL_sparcv9cap_P[1] is copy of Compatibility Feature Register,
+ * %asr26, SPARC-T4 and later. There is no SPARCV9_CFR bit in
+ * OPENSSL_sparcv9cap_P[0], as %cfr copy is sufficient...
+ */
+# define CFR_AES         0x00000001/* Supports AES opcodes */
+# define CFR_DES         0x00000002/* Supports DES opcodes */
+# define CFR_KASUMI      0x00000004/* Supports KASUMI opcodes */
+# define CFR_CAMELLIA    0x00000008/* Supports CAMELLIA opcodes */
+# define CFR_MD5         0x00000010/* Supports MD5 opcodes */
+# define CFR_SHA1        0x00000020/* Supports SHA1 opcodes */
+# define CFR_SHA256      0x00000040/* Supports SHA256 opcodes */
+# define CFR_SHA512      0x00000080/* Supports SHA512 opcodes */
+# define CFR_MPMUL       0x00000100/* Supports MPMUL opcodes */
+# define CFR_MONTMUL     0x00000200/* Supports MONTMUL opcodes */
+# define CFR_MONTSQR     0x00000400/* Supports MONTSQR opcodes */
+# define CFR_CRC32C      0x00000800/* Supports CRC32C opcodes */
+
+# if defined(OPENSSL_PIC) && !defined(__PIC__)
+#  define __PIC__
+# endif
+
+# if defined(__SUNPRO_C) && defined(__sparcv9) && !defined(__arch64__)
+#  define __arch64__
+# endif
+
+# define SPARC_PIC_THUNK(reg)    \
+        .align  32;             \
+.Lpic_thunk:                    \
+        jmp     %o7 + 8;        \
+         add    %o7, reg, reg;
+
+# define SPARC_PIC_THUNK_CALL(reg)                       \
+        sethi   %hi(_GLOBAL_OFFSET_TABLE_-4), reg;      \
+        call    .Lpic_thunk;                            \
+         or     reg, %lo(_GLOBAL_OFFSET_TABLE_+4), reg;
+
+# if 1
+#  define SPARC_SETUP_GOT_REG(reg)       SPARC_PIC_THUNK_CALL(reg)
+# else
+#  define SPARC_SETUP_GOT_REG(reg)       \
+        sethi   %hi(_GLOBAL_OFFSET_TABLE_-4), reg;      \
+        call    .+8;                                    \
+        or      reg,%lo(_GLOBAL_OFFSET_TABLE_+4), reg;  \
+        add     %o7, reg, reg
+# endif
+
+# if defined(__arch64__)
+
+#  define SPARC_LOAD_ADDRESS(SYM, reg)   \
+        setx    SYM, %o7, reg;
+#  define LDPTR          ldx
+#  define SIZE_T_CC      %xcc
+#  define STACK_FRAME    192
+#  define STACK_BIAS     2047
+#  define STACK_7thARG   (STACK_BIAS+176)
+
+# else
+
+#  define SPARC_LOAD_ADDRESS(SYM, reg)   \
+        set     SYM, reg;
+#  define LDPTR          ld
+#  define SIZE_T_CC      %icc
+#  define STACK_FRAME    112
+#  define STACK_BIAS     0
+#  define STACK_7thARG   92
+#  define SPARC_LOAD_ADDRESS_LEAF(SYM,reg,tmp) SPARC_LOAD_ADDRESS(SYM,reg)
+
+# endif
+
+# ifdef __PIC__
+#  undef SPARC_LOAD_ADDRESS
+#  undef SPARC_LOAD_ADDRESS_LEAF
+#  define SPARC_LOAD_ADDRESS(SYM, reg)   \
+        SPARC_SETUP_GOT_REG(reg);       \
+        sethi   %hi(SYM), %o7;          \
+        or      %o7, %lo(SYM), %o7;     \
+        LDPTR   [reg + %o7], reg;
+# endif
+
+# ifndef SPARC_LOAD_ADDRESS_LEAF
+#  define SPARC_LOAD_ADDRESS_LEAF(SYM, reg, tmp) \
+        mov     %o7, tmp;                       \
+        SPARC_LOAD_ADDRESS(SYM, reg)            \
+        mov     tmp, %o7;
+# endif
+
+#endif                          /* __SPARC_ARCH_H__ */
index 0cc247e..eea2006 100644 (file)
@@ -251,6 +251,11 @@ _sparcv9_vis1_probe:
 !      UltraSPARC IIe          7
 !      UltraSPARC III          7
 !      UltraSPARC T1           24
+!      SPARC T4                65(*)
+!
+! (*)  result has lesser to do with VIS instruction latencies, rdtick
+!      appears that slow, but it does the trick in sense that FP and
+!      VIS code paths are still slower than integer-only ones.
 !
 ! Numbers for T2 and SPARC64 V-VII are more than welcomed.
 !
@@ -260,6 +265,8 @@ _sparcv9_vis1_probe:
 .global        _sparcv9_vis1_instrument
 .align 8
 _sparcv9_vis1_instrument:
+       .word   0x81b00d80      !fxor   %f0,%f0,%f0
+       .word   0x85b08d82      !fxor   %f2,%f2,%f2
        .word   0x91410000      !rd     %tick,%o0
        .word   0x81b00d80      !fxor   %f0,%f0,%f0
        .word   0x85b08d82      !fxor   %f2,%f2,%f2
@@ -314,6 +321,30 @@ _sparcv9_fmadd_probe:
 .type  _sparcv9_fmadd_probe,#function
 .size  _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe
 
+.global        _sparcv9_rdcfr
+.align 8
+_sparcv9_rdcfr:
+       retl
+       .word   0x91468000      !rd     %asr26,%o0
+.type  _sparcv9_rdcfr,#function
+.size  _sparcv9_rdcfr,.-_sparcv9_rdcfr
+
+.global        _sparcv9_vis3_probe
+.align 8
+_sparcv9_vis3_probe:
+       retl
+       .word   0x81b022a0      !xmulx  %g0,%g0,%g0
+.type  _sparcv9_vis3_probe,#function
+.size  _sparcv9_vis3_probe,.-_sparcv9_vis3_probe
+
+.global        _sparcv9_random
+.align 8
+_sparcv9_random:
+       retl
+       .word   0x91b002a0      !random %o0
+.type  _sparcv9_random,#function
+.size  _sparcv9_random,.-_sparcv9_vis3_probe
+
 .global        OPENSSL_cleanse
 .align 32
 OPENSSL_cleanse:
@@ -397,6 +428,102 @@ OPENSSL_cleanse:
 .type  OPENSSL_cleanse,#function
 .size  OPENSSL_cleanse,.-OPENSSL_cleanse
 
+.global        _sparcv9_vis1_instrument_bus
+.align 8
+_sparcv9_vis1_instrument_bus:
+       mov     %o1,%o3                                 ! save cnt
+       .word   0x99410000      !rd     %tick,%o4       ! tick
+       mov     %o4,%o5                                 ! lasttick = tick
+       set     0,%g4                                   ! diff
+
+       andn    %o0,63,%g1
+       .word   0xc1985e00      !ldda   [%g1]0xf0,%f0   ! block load
+       .word   0x8143e040      !membar #Sync
+       .word   0xc1b85c00      !stda   %f0,[%g1]0xe0   ! block store and commit
+       .word   0x8143e040      !membar #Sync
+       ld      [%o0],%o4
+       add     %o4,%g4,%g4
+       .word   0xc9e2100c      !cas    [%o0],%o4,%g4
+
+.Loop: .word   0x99410000      !rd     %tick,%o4
+       sub     %o4,%o5,%g4                             ! diff=tick-lasttick
+       mov     %o4,%o5                                 ! lasttick=tick
+
+       andn    %o0,63,%g1
+       .word   0xc1985e00      !ldda   [%g1]0xf0,%f0   ! block load
+       .word   0x8143e040      !membar #Sync
+       .word   0xc1b85c00      !stda   %f0,[%g1]0xe0   ! block store and commit
+       .word   0x8143e040      !membar #Sync
+       ld      [%o0],%o4
+       add     %o4,%g4,%g4
+       .word   0xc9e2100c      !cas    [%o0],%o4,%g4
+       subcc   %o1,1,%o1                               ! --$cnt
+       bnz     .Loop
+       add     %o0,4,%o0                               ! ++$out
+
+       retl
+       mov     %o3,%o0
+.type  _sparcv9_vis1_instrument_bus,#function
+.size  _sparcv9_vis1_instrument_bus,.-_sparcv9_vis1_instrument_bus
+
+.global        _sparcv9_vis1_instrument_bus2
+.align 8
+_sparcv9_vis1_instrument_bus2:
+       mov     %o1,%o3                                 ! save cnt
+       sll     %o1,2,%o1                               ! cnt*=4
+
+       .word   0x99410000      !rd     %tick,%o4       ! tick
+       mov     %o4,%o5                                 ! lasttick = tick
+       set     0,%g4                                   ! diff
+
+       andn    %o0,63,%g1
+       .word   0xc1985e00      !ldda   [%g1]0xf0,%f0   ! block load
+       .word   0x8143e040      !membar #Sync
+       .word   0xc1b85c00      !stda   %f0,[%g1]0xe0   ! block store and commit
+       .word   0x8143e040      !membar #Sync
+       ld      [%o0],%o4
+       add     %o4,%g4,%g4
+       .word   0xc9e2100c      !cas    [%o0],%o4,%g4
+
+       .word   0x99410000      !rd     %tick,%o4       ! tick
+       sub     %o4,%o5,%g4                             ! diff=tick-lasttick
+       mov     %o4,%o5                                 ! lasttick=tick
+       mov     %g4,%g5                                 ! lastdiff=diff
+.Loop2:
+       andn    %o0,63,%g1
+       .word   0xc1985e00      !ldda   [%g1]0xf0,%f0   ! block load
+       .word   0x8143e040      !membar #Sync
+       .word   0xc1b85c00      !stda   %f0,[%g1]0xe0   ! block store and commit
+       .word   0x8143e040      !membar #Sync
+       ld      [%o0],%o4
+       add     %o4,%g4,%g4
+       .word   0xc9e2100c      !cas    [%o0],%o4,%g4
+
+       subcc   %o2,1,%o2                               ! --max
+       bz      .Ldone2
+       nop
+
+       .word   0x99410000      !rd     %tick,%o4       ! tick
+       sub     %o4,%o5,%g4                             ! diff=tick-lasttick
+       mov     %o4,%o5                                 ! lasttick=tick
+       cmp     %g4,%g5
+       mov     %g4,%g5                                 ! lastdiff=diff
+
+       .word   0x83408000      !rd     %ccr,%g1
+       and     %g1,4,%g1                               ! isolate zero flag
+       xor     %g1,4,%g1                               ! flip zero flag
+
+       subcc   %o1,%g1,%o1                             ! conditional --$cnt
+       bnz     .Loop2
+       add     %o0,%g1,%o0                             ! conditional ++$out
+
+.Ldone2:
+       srl     %o1,2,%o1
+       retl
+       sub     %o3,%o1,%o0
+.type  _sparcv9_vis1_instrument_bus2,#function
+.size  _sparcv9_vis1_instrument_bus2,.-_sparcv9_vis1_instrument_bus2
+
 .section       ".init",#alloc,#execinstr
        call    OPENSSL_cpuid_setup
        nop
index d9f986f..8bf2846 100644 (file)
@@ -4,30 +4,68 @@
 #include <setjmp.h>
 #include <signal.h>
 #include <sys/time.h>
+#include <unistd.h>
 #include <openssl/bn.h>
 
-#define SPARCV9_TICK_PRIVILEGED (1<<0)
-#define SPARCV9_PREFER_FPU      (1<<1)
-#define SPARCV9_VIS1            (1<<2)
-#define SPARCV9_VIS2            (1<<3) /* reserved */
-#define SPARCV9_FMADD           (1<<4) /* reserved for SPARC64 V */
+#include "sparc_arch.h"
 
-static int OPENSSL_sparcv9cap_P = SPARCV9_TICK_PRIVILEGED;
+#if defined(__GNUC__) && defined(__linux)
+__attribute__ ((visibility("hidden")))
+#endif
+unsigned int OPENSSL_sparcv9cap_P[2] = { SPARCV9_TICK_PRIVILEGED, 0 };
 
 int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                 const BN_ULONG *np, const BN_ULONG *n0, int num)
 {
+    int bn_mul_mont_vis3(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
+                         const BN_ULONG *np, const BN_ULONG *n0, int num);
     int bn_mul_mont_fpu(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                         const BN_ULONG *np, const BN_ULONG *n0, int num);
     int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                         const BN_ULONG *np, const BN_ULONG *n0, int num);
 
-    if (num >= 8 && !(num & 1) &&
-        (OPENSSL_sparcv9cap_P & (SPARCV9_PREFER_FPU | SPARCV9_VIS1)) ==
-        (SPARCV9_PREFER_FPU | SPARCV9_VIS1))
-        return bn_mul_mont_fpu(rp, ap, bp, np, n0, num);
-    else
-        return bn_mul_mont_int(rp, ap, bp, np, n0, num);
+    if (!(num & 1) && num >= 6) {
+        if ((num & 15) == 0 && num <= 64 &&
+            (OPENSSL_sparcv9cap_P[1] & (CFR_MONTMUL | CFR_MONTSQR)) ==
+            (CFR_MONTMUL | CFR_MONTSQR)) {
+            typedef int (*bn_mul_mont_f) (BN_ULONG *rp, const BN_ULONG *ap,
+                                          const BN_ULONG *bp,
+                                          const BN_ULONG *np,
+                                          const BN_ULONG *n0);
+            int bn_mul_mont_t4_8(BN_ULONG *rp, const BN_ULONG *ap,
+                                 const BN_ULONG *bp, const BN_ULONG *np,
+                                 const BN_ULONG *n0);
+            int bn_mul_mont_t4_16(BN_ULONG *rp, const BN_ULONG *ap,
+                                  const BN_ULONG *bp, const BN_ULONG *np,
+                                  const BN_ULONG *n0);
+            int bn_mul_mont_t4_24(BN_ULONG *rp, const BN_ULONG *ap,
+                                  const BN_ULONG *bp, const BN_ULONG *np,
+                                  const BN_ULONG *n0);
+            int bn_mul_mont_t4_32(BN_ULONG *rp, const BN_ULONG *ap,
+                                  const BN_ULONG *bp, const BN_ULONG *np,
+                                  const BN_ULONG *n0);
+            static const bn_mul_mont_f funcs[4] = {
+                bn_mul_mont_t4_8, bn_mul_mont_t4_16,
+                bn_mul_mont_t4_24, bn_mul_mont_t4_32
+            };
+            bn_mul_mont_f worker = funcs[num / 16 - 1];
+
+            if ((*worker) (rp, ap, bp, np, n0))
+                return 1;
+            /* retry once and fall back */
+            if ((*worker) (rp, ap, bp, np, n0))
+                return 1;
+            return bn_mul_mont_vis3(rp, ap, bp, np, n0, num);
+        }
+        if ((OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3))
+            return bn_mul_mont_vis3(rp, ap, bp, np, n0, num);
+        else if (num >= 8 &&
+                 (OPENSSL_sparcv9cap_P[0] &
+                  (SPARCV9_PREFER_FPU | SPARCV9_VIS1)) ==
+                 (SPARCV9_PREFER_FPU | SPARCV9_VIS1))
+            return bn_mul_mont_fpu(rp, ap, bp, np, n0, num);
+    }
+    return bn_mul_mont_int(rp, ap, bp, np, n0, num);
 }
 
 unsigned long _sparcv9_rdtick(void);
@@ -35,10 +73,15 @@ void _sparcv9_vis1_probe(void);
 unsigned long _sparcv9_vis1_instrument(void);
 void _sparcv9_vis2_probe(void);
 void _sparcv9_fmadd_probe(void);
+unsigned long _sparcv9_rdcfr(void);
+void _sparcv9_vis3_probe(void);
+unsigned long _sparcv9_random(void);
+size_t _sparcv9_vis1_instrument_bus(unsigned int *, size_t);
+size_t _sparcv9_vis1_instrument_bus2(unsigned int *, size_t, size_t);
 
 unsigned long OPENSSL_rdtsc(void)
 {
-    if (OPENSSL_sparcv9cap_P & SPARCV9_TICK_PRIVILEGED)
+    if (OPENSSL_sparcv9cap_P[0] & SPARCV9_TICK_PRIVILEGED)
 #if defined(__sun) && defined(__SVR4)
         return gethrtime();
 #else
@@ -48,6 +91,24 @@ unsigned long OPENSSL_rdtsc(void)
         return _sparcv9_rdtick();
 }
 
+size_t OPENSSL_instrument_bus(unsigned int *out, size_t cnt)
+{
+    if ((OPENSSL_sparcv9cap_P[0] & (SPARCV9_TICK_PRIVILEGED | SPARCV9_BLK)) ==
+        SPARCV9_BLK)
+        return _sparcv9_vis1_instrument_bus(out, cnt);
+    else
+        return 0;
+}
+
+size_t OPENSSL_instrument_bus2(unsigned int *out, size_t cnt, size_t max)
+{
+    if ((OPENSSL_sparcv9cap_P[0] & (SPARCV9_TICK_PRIVILEGED | SPARCV9_BLK)) ==
+        SPARCV9_BLK)
+        return _sparcv9_vis1_instrument_bus2(out, cnt, max);
+    else
+        return 0;
+}
+
 #if 0 && defined(__sun) && defined(__SVR4)
 /*
  * This code path is disabled, because of incompatibility of libdevinfo.so.1
@@ -74,17 +135,17 @@ static int walk_nodename(di_node_t node, di_node_name_t di_node_name)
     if (!strcmp(name, "SUNW,UltraSPARC") ||
         /* covers II,III,IV */
         !strncmp(name, "SUNW,UltraSPARC-I", 17)) {
-        OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU | SPARCV9_VIS1;
+        OPENSSL_sparcv9cap_P[0] |= SPARCV9_PREFER_FPU | SPARCV9_VIS1;
 
         /* %tick is privileged only on UltraSPARC-I/II, but not IIe */
         if (name[14] != '\0' && name[17] != '\0' && name[18] != '\0')
-            OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED;
+            OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED;
 
         return DI_WALK_TERMINATE;
     }
     /* This is expected to catch remaining UltraSPARCs, such as T1 */
     else if (!strncmp(name, "SUNW,UltraSPARC", 15)) {
-        OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED;
+        OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED;
 
         return DI_WALK_TERMINATE;
     }
@@ -103,22 +164,22 @@ void OPENSSL_cpuid_setup(void)
     trigger = 1;
 
     if ((e = getenv("OPENSSL_sparcv9cap"))) {
-        OPENSSL_sparcv9cap_P = strtoul(e, NULL, 0);
+        OPENSSL_sparcv9cap_P[0] = strtoul(e, NULL, 0);
         return;
     }
 
     if (sysinfo(SI_MACHINE, si, sizeof(si)) > 0) {
         if (strcmp(si, "sun4v"))
             /* FPU is preferred for all CPUs, but US-T1/2 */
-            OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU;
+            OPENSSL_sparcv9cap_P[0] |= SPARCV9_PREFER_FPU;
     }
 
     if (sysinfo(SI_ISALIST, si, sizeof(si)) > 0) {
         if (strstr(si, "+vis"))
-            OPENSSL_sparcv9cap_P |= SPARCV9_VIS1;
+            OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS1 | SPARCV9_BLK;
         if (strstr(si, "+vis2")) {
-            OPENSSL_sparcv9cap_P |= SPARCV9_VIS2;
-            OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED;
+            OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS2;
+            OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED;
             return;
         }
     }
@@ -188,12 +249,14 @@ void OPENSSL_cpuid_setup(void)
     trigger = 1;
 
     if ((e = getenv("OPENSSL_sparcv9cap"))) {
-        OPENSSL_sparcv9cap_P = strtoul(e, NULL, 0);
+        OPENSSL_sparcv9cap_P[0] = strtoul(e, NULL, 0);
+        if ((e = strchr(e, ':')))
+            OPENSSL_sparcv9cap_P[1] = strtoul(e + 1, NULL, 0);
         return;
     }
 
     /* Initial value, fits UltraSPARC-I&II... */
-    OPENSSL_sparcv9cap_P = SPARCV9_PREFER_FPU | SPARCV9_TICK_PRIVILEGED;
+    OPENSSL_sparcv9cap_P[0] = SPARCV9_PREFER_FPU | SPARCV9_TICK_PRIVILEGED;
 
     sigfillset(&all_masked);
     sigdelset(&all_masked, SIGILL);
@@ -216,30 +279,68 @@ void OPENSSL_cpuid_setup(void)
 
     if (sigsetjmp(common_jmp, 1) == 0) {
         _sparcv9_rdtick();
-        OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED;
+        OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED;
     }
 
     if (sigsetjmp(common_jmp, 1) == 0) {
         _sparcv9_vis1_probe();
-        OPENSSL_sparcv9cap_P |= SPARCV9_VIS1;
+        OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS1 | SPARCV9_BLK;
         /* detect UltraSPARC-Tx, see sparccpud.S for details... */
         if (_sparcv9_vis1_instrument() >= 12)
-            OPENSSL_sparcv9cap_P &= ~(SPARCV9_VIS1 | SPARCV9_PREFER_FPU);
+            OPENSSL_sparcv9cap_P[0] &= ~(SPARCV9_VIS1 | SPARCV9_PREFER_FPU);
         else {
             _sparcv9_vis2_probe();
-            OPENSSL_sparcv9cap_P |= SPARCV9_VIS2;
+            OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS2;
         }
     }
 
     if (sigsetjmp(common_jmp, 1) == 0) {
         _sparcv9_fmadd_probe();
-        OPENSSL_sparcv9cap_P |= SPARCV9_FMADD;
+        OPENSSL_sparcv9cap_P[0] |= SPARCV9_FMADD;
+    }
+
+    /*
+     * VIS3 flag is tested independently from VIS1, unlike VIS2 that is,
+     * because VIS3 defines even integer instructions.
+     */
+    if (sigsetjmp(common_jmp, 1) == 0) {
+        _sparcv9_vis3_probe();
+        OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS3;
+    }
+# if 0                          /* was planned at some point but never
+                                 * implemented in hardware */
+    if (sigsetjmp(common_jmp, 1) == 0) {
+        (void)_sparcv9_random();
+        OPENSSL_sparcv9cap_P[0] |= SPARCV9_RANDOM;
+    }
+# endif
+
+    /*
+     * In wait for better solution _sparcv9_rdcfr is masked by
+     * VIS3 flag, because it goes to uninterruptable endless
+     * loop on UltraSPARC II running Solaris. Things might be
+     * different on Linux...
+     */
+    if ((OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) &&
+        sigsetjmp(common_jmp, 1) == 0) {
+        OPENSSL_sparcv9cap_P[1] = (unsigned int)_sparcv9_rdcfr();
     }
 
     sigaction(SIGBUS, &bus_oact, NULL);
     sigaction(SIGILL, &ill_oact, NULL);
 
     sigprocmask(SIG_SETMASK, &oset, NULL);
+
+    if (sizeof(size_t) == 8)
+        OPENSSL_sparcv9cap_P[0] |= SPARCV9_64BIT_STACK;
+# ifdef __linux
+    else {
+        int ret = syscall(340);
+
+        if (ret >= 0 && ret & 1)
+            OPENSSL_sparcv9cap_P[0] |= SPARCV9_64BIT_STACK;
+    }
+# endif
 }
 
 #endif
index 7639533..414af7b 100644 (file)
@@ -37,6 +37,9 @@ lib:  $(LIBOBJ)
        $(RANLIB) $(LIB) || echo Never mind.
        @touch lib
 
+files:
+       $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
+
 links:
        @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER)
        @$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST)
index 451c70e..00e001b 100644 (file)
@@ -148,6 +148,7 @@ int main(int argc, char **argv)
     ERR_remove_thread_state(NULL);
     ERR_free_strings();
     CRYPTO_mem_leaks(bio_err);
+    BIO_free(bio_err);
 
     return 0;
 }
index 519649b..1d4f87e 100644 (file)
@@ -75,12 +75,12 @@ extern "C" {
 # define CHECKED_STACK_OF(type, p) \
     ((_STACK*) (1 ? p : (STACK_OF(type)*)0))
 
+# define CHECKED_SK_COPY_FUNC(type, p) \
+    ((void *(*)(void *)) ((1 ? p : (type *(*)(const type *))0)))
+
 # define CHECKED_SK_FREE_FUNC(type, p) \
     ((void (*)(void *)) ((1 ? p : (void (*)(type *))0)))
 
-# define CHECKED_SK_FREE_FUNC2(type, p) \
-    ((void (*)(void *)) ((1 ? p : (void (*)(type))0)))
-
 # define CHECKED_SK_CMP_FUNC(type, p) \
     ((int (*)(const void *, const void *)) \
         ((1 ? p : (int (*)(const type * const *, const type * const *))0)))
@@ -177,6 +177,8 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
         (STACK_OF(type) *)sk_dup(CHECKED_STACK_OF(type, st))
 # define SKM_sk_pop_free(type, st, free_func) \
         sk_pop_free(CHECKED_STACK_OF(type, st), CHECKED_SK_FREE_FUNC(type, free_func))
+# define SKM_sk_deep_copy(type, st, copy_func, free_func) \
+        (STACK_OF(type) *)sk_deep_copy(CHECKED_STACK_OF(type, st), CHECKED_SK_COPY_FUNC(type, copy_func), CHECKED_SK_FREE_FUNC(type, free_func))
 # define SKM_sk_shift(type, st) \
         (type *)sk_shift(CHECKED_STACK_OF(type, st))
 # define SKM_sk_pop(type, st) \
@@ -226,6 +228,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_ACCESS_DESCRIPTION_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(ACCESS_DESCRIPTION, (st), (cmp))
 # define sk_ACCESS_DESCRIPTION_dup(st) SKM_sk_dup(ACCESS_DESCRIPTION, st)
 # define sk_ACCESS_DESCRIPTION_pop_free(st, free_func) SKM_sk_pop_free(ACCESS_DESCRIPTION, (st), (free_func))
+# define sk_ACCESS_DESCRIPTION_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(ACCESS_DESCRIPTION, (st), (copy_func), (free_func))
 # define sk_ACCESS_DESCRIPTION_shift(st) SKM_sk_shift(ACCESS_DESCRIPTION, (st))
 # define sk_ACCESS_DESCRIPTION_pop(st) SKM_sk_pop(ACCESS_DESCRIPTION, (st))
 # define sk_ACCESS_DESCRIPTION_sort(st) SKM_sk_sort(ACCESS_DESCRIPTION, (st))
@@ -247,6 +250,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_ASIdOrRange_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(ASIdOrRange, (st), (cmp))
 # define sk_ASIdOrRange_dup(st) SKM_sk_dup(ASIdOrRange, st)
 # define sk_ASIdOrRange_pop_free(st, free_func) SKM_sk_pop_free(ASIdOrRange, (st), (free_func))
+# define sk_ASIdOrRange_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(ASIdOrRange, (st), (copy_func), (free_func))
 # define sk_ASIdOrRange_shift(st) SKM_sk_shift(ASIdOrRange, (st))
 # define sk_ASIdOrRange_pop(st) SKM_sk_pop(ASIdOrRange, (st))
 # define sk_ASIdOrRange_sort(st) SKM_sk_sort(ASIdOrRange, (st))
@@ -268,6 +272,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_ASN1_GENERALSTRING_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(ASN1_GENERALSTRING, (st), (cmp))
 # define sk_ASN1_GENERALSTRING_dup(st) SKM_sk_dup(ASN1_GENERALSTRING, st)
 # define sk_ASN1_GENERALSTRING_pop_free(st, free_func) SKM_sk_pop_free(ASN1_GENERALSTRING, (st), (free_func))
+# define sk_ASN1_GENERALSTRING_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(ASN1_GENERALSTRING, (st), (copy_func), (free_func))
 # define sk_ASN1_GENERALSTRING_shift(st) SKM_sk_shift(ASN1_GENERALSTRING, (st))
 # define sk_ASN1_GENERALSTRING_pop(st) SKM_sk_pop(ASN1_GENERALSTRING, (st))
 # define sk_ASN1_GENERALSTRING_sort(st) SKM_sk_sort(ASN1_GENERALSTRING, (st))
@@ -289,6 +294,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_ASN1_INTEGER_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(ASN1_INTEGER, (st), (cmp))
 # define sk_ASN1_INTEGER_dup(st) SKM_sk_dup(ASN1_INTEGER, st)
 # define sk_ASN1_INTEGER_pop_free(st, free_func) SKM_sk_pop_free(ASN1_INTEGER, (st), (free_func))
+# define sk_ASN1_INTEGER_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(ASN1_INTEGER, (st), (copy_func), (free_func))
 # define sk_ASN1_INTEGER_shift(st) SKM_sk_shift(ASN1_INTEGER, (st))
 # define sk_ASN1_INTEGER_pop(st) SKM_sk_pop(ASN1_INTEGER, (st))
 # define sk_ASN1_INTEGER_sort(st) SKM_sk_sort(ASN1_INTEGER, (st))
@@ -310,6 +316,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_ASN1_OBJECT_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(ASN1_OBJECT, (st), (cmp))
 # define sk_ASN1_OBJECT_dup(st) SKM_sk_dup(ASN1_OBJECT, st)
 # define sk_ASN1_OBJECT_pop_free(st, free_func) SKM_sk_pop_free(ASN1_OBJECT, (st), (free_func))
+# define sk_ASN1_OBJECT_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(ASN1_OBJECT, (st), (copy_func), (free_func))
 # define sk_ASN1_OBJECT_shift(st) SKM_sk_shift(ASN1_OBJECT, (st))
 # define sk_ASN1_OBJECT_pop(st) SKM_sk_pop(ASN1_OBJECT, (st))
 # define sk_ASN1_OBJECT_sort(st) SKM_sk_sort(ASN1_OBJECT, (st))
@@ -331,6 +338,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_ASN1_STRING_TABLE_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(ASN1_STRING_TABLE, (st), (cmp))
 # define sk_ASN1_STRING_TABLE_dup(st) SKM_sk_dup(ASN1_STRING_TABLE, st)
 # define sk_ASN1_STRING_TABLE_pop_free(st, free_func) SKM_sk_pop_free(ASN1_STRING_TABLE, (st), (free_func))
+# define sk_ASN1_STRING_TABLE_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(ASN1_STRING_TABLE, (st), (copy_func), (free_func))
 # define sk_ASN1_STRING_TABLE_shift(st) SKM_sk_shift(ASN1_STRING_TABLE, (st))
 # define sk_ASN1_STRING_TABLE_pop(st) SKM_sk_pop(ASN1_STRING_TABLE, (st))
 # define sk_ASN1_STRING_TABLE_sort(st) SKM_sk_sort(ASN1_STRING_TABLE, (st))
@@ -352,6 +360,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_ASN1_TYPE_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(ASN1_TYPE, (st), (cmp))
 # define sk_ASN1_TYPE_dup(st) SKM_sk_dup(ASN1_TYPE, st)
 # define sk_ASN1_TYPE_pop_free(st, free_func) SKM_sk_pop_free(ASN1_TYPE, (st), (free_func))
+# define sk_ASN1_TYPE_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(ASN1_TYPE, (st), (copy_func), (free_func))
 # define sk_ASN1_TYPE_shift(st) SKM_sk_shift(ASN1_TYPE, (st))
 # define sk_ASN1_TYPE_pop(st) SKM_sk_pop(ASN1_TYPE, (st))
 # define sk_ASN1_TYPE_sort(st) SKM_sk_sort(ASN1_TYPE, (st))
@@ -373,6 +382,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_ASN1_UTF8STRING_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(ASN1_UTF8STRING, (st), (cmp))
 # define sk_ASN1_UTF8STRING_dup(st) SKM_sk_dup(ASN1_UTF8STRING, st)
 # define sk_ASN1_UTF8STRING_pop_free(st, free_func) SKM_sk_pop_free(ASN1_UTF8STRING, (st), (free_func))
+# define sk_ASN1_UTF8STRING_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(ASN1_UTF8STRING, (st), (copy_func), (free_func))
 # define sk_ASN1_UTF8STRING_shift(st) SKM_sk_shift(ASN1_UTF8STRING, (st))
 # define sk_ASN1_UTF8STRING_pop(st) SKM_sk_pop(ASN1_UTF8STRING, (st))
 # define sk_ASN1_UTF8STRING_sort(st) SKM_sk_sort(ASN1_UTF8STRING, (st))
@@ -394,6 +404,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_ASN1_VALUE_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(ASN1_VALUE, (st), (cmp))
 # define sk_ASN1_VALUE_dup(st) SKM_sk_dup(ASN1_VALUE, st)
 # define sk_ASN1_VALUE_pop_free(st, free_func) SKM_sk_pop_free(ASN1_VALUE, (st), (free_func))
+# define sk_ASN1_VALUE_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(ASN1_VALUE, (st), (copy_func), (free_func))
 # define sk_ASN1_VALUE_shift(st) SKM_sk_shift(ASN1_VALUE, (st))
 # define sk_ASN1_VALUE_pop(st) SKM_sk_pop(ASN1_VALUE, (st))
 # define sk_ASN1_VALUE_sort(st) SKM_sk_sort(ASN1_VALUE, (st))
@@ -415,6 +426,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_BIO_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(BIO, (st), (cmp))
 # define sk_BIO_dup(st) SKM_sk_dup(BIO, st)
 # define sk_BIO_pop_free(st, free_func) SKM_sk_pop_free(BIO, (st), (free_func))
+# define sk_BIO_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(BIO, (st), (copy_func), (free_func))
 # define sk_BIO_shift(st) SKM_sk_shift(BIO, (st))
 # define sk_BIO_pop(st) SKM_sk_pop(BIO, (st))
 # define sk_BIO_sort(st) SKM_sk_sort(BIO, (st))
@@ -436,6 +448,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_BY_DIR_ENTRY_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(BY_DIR_ENTRY, (st), (cmp))
 # define sk_BY_DIR_ENTRY_dup(st) SKM_sk_dup(BY_DIR_ENTRY, st)
 # define sk_BY_DIR_ENTRY_pop_free(st, free_func) SKM_sk_pop_free(BY_DIR_ENTRY, (st), (free_func))
+# define sk_BY_DIR_ENTRY_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(BY_DIR_ENTRY, (st), (copy_func), (free_func))
 # define sk_BY_DIR_ENTRY_shift(st) SKM_sk_shift(BY_DIR_ENTRY, (st))
 # define sk_BY_DIR_ENTRY_pop(st) SKM_sk_pop(BY_DIR_ENTRY, (st))
 # define sk_BY_DIR_ENTRY_sort(st) SKM_sk_sort(BY_DIR_ENTRY, (st))
@@ -457,6 +470,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_BY_DIR_HASH_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(BY_DIR_HASH, (st), (cmp))
 # define sk_BY_DIR_HASH_dup(st) SKM_sk_dup(BY_DIR_HASH, st)
 # define sk_BY_DIR_HASH_pop_free(st, free_func) SKM_sk_pop_free(BY_DIR_HASH, (st), (free_func))
+# define sk_BY_DIR_HASH_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(BY_DIR_HASH, (st), (copy_func), (free_func))
 # define sk_BY_DIR_HASH_shift(st) SKM_sk_shift(BY_DIR_HASH, (st))
 # define sk_BY_DIR_HASH_pop(st) SKM_sk_pop(BY_DIR_HASH, (st))
 # define sk_BY_DIR_HASH_sort(st) SKM_sk_sort(BY_DIR_HASH, (st))
@@ -478,10 +492,33 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_CMS_CertificateChoices_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(CMS_CertificateChoices, (st), (cmp))
 # define sk_CMS_CertificateChoices_dup(st) SKM_sk_dup(CMS_CertificateChoices, st)
 # define sk_CMS_CertificateChoices_pop_free(st, free_func) SKM_sk_pop_free(CMS_CertificateChoices, (st), (free_func))
+# define sk_CMS_CertificateChoices_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(CMS_CertificateChoices, (st), (copy_func), (free_func))
 # define sk_CMS_CertificateChoices_shift(st) SKM_sk_shift(CMS_CertificateChoices, (st))
 # define sk_CMS_CertificateChoices_pop(st) SKM_sk_pop(CMS_CertificateChoices, (st))
 # define sk_CMS_CertificateChoices_sort(st) SKM_sk_sort(CMS_CertificateChoices, (st))
 # define sk_CMS_CertificateChoices_is_sorted(st) SKM_sk_is_sorted(CMS_CertificateChoices, (st))
+# define sk_CMS_RecipientEncryptedKey_new(cmp) SKM_sk_new(CMS_RecipientEncryptedKey, (cmp))
+# define sk_CMS_RecipientEncryptedKey_new_null() SKM_sk_new_null(CMS_RecipientEncryptedKey)
+# define sk_CMS_RecipientEncryptedKey_free(st) SKM_sk_free(CMS_RecipientEncryptedKey, (st))
+# define sk_CMS_RecipientEncryptedKey_num(st) SKM_sk_num(CMS_RecipientEncryptedKey, (st))
+# define sk_CMS_RecipientEncryptedKey_value(st, i) SKM_sk_value(CMS_RecipientEncryptedKey, (st), (i))
+# define sk_CMS_RecipientEncryptedKey_set(st, i, val) SKM_sk_set(CMS_RecipientEncryptedKey, (st), (i), (val))
+# define sk_CMS_RecipientEncryptedKey_zero(st) SKM_sk_zero(CMS_RecipientEncryptedKey, (st))
+# define sk_CMS_RecipientEncryptedKey_push(st, val) SKM_sk_push(CMS_RecipientEncryptedKey, (st), (val))
+# define sk_CMS_RecipientEncryptedKey_unshift(st, val) SKM_sk_unshift(CMS_RecipientEncryptedKey, (st), (val))
+# define sk_CMS_RecipientEncryptedKey_find(st, val) SKM_sk_find(CMS_RecipientEncryptedKey, (st), (val))
+# define sk_CMS_RecipientEncryptedKey_find_ex(st, val) SKM_sk_find_ex(CMS_RecipientEncryptedKey, (st), (val))
+# define sk_CMS_RecipientEncryptedKey_delete(st, i) SKM_sk_delete(CMS_RecipientEncryptedKey, (st), (i))
+# define sk_CMS_RecipientEncryptedKey_delete_ptr(st, ptr) SKM_sk_delete_ptr(CMS_RecipientEncryptedKey, (st), (ptr))
+# define sk_CMS_RecipientEncryptedKey_insert(st, val, i) SKM_sk_insert(CMS_RecipientEncryptedKey, (st), (val), (i))
+# define sk_CMS_RecipientEncryptedKey_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(CMS_RecipientEncryptedKey, (st), (cmp))
+# define sk_CMS_RecipientEncryptedKey_dup(st) SKM_sk_dup(CMS_RecipientEncryptedKey, st)
+# define sk_CMS_RecipientEncryptedKey_pop_free(st, free_func) SKM_sk_pop_free(CMS_RecipientEncryptedKey, (st), (free_func))
+# define sk_CMS_RecipientEncryptedKey_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(CMS_RecipientEncryptedKey, (st), (copy_func), (free_func))
+# define sk_CMS_RecipientEncryptedKey_shift(st) SKM_sk_shift(CMS_RecipientEncryptedKey, (st))
+# define sk_CMS_RecipientEncryptedKey_pop(st) SKM_sk_pop(CMS_RecipientEncryptedKey, (st))
+# define sk_CMS_RecipientEncryptedKey_sort(st) SKM_sk_sort(CMS_RecipientEncryptedKey, (st))
+# define sk_CMS_RecipientEncryptedKey_is_sorted(st) SKM_sk_is_sorted(CMS_RecipientEncryptedKey, (st))
 # define sk_CMS_RecipientInfo_new(cmp) SKM_sk_new(CMS_RecipientInfo, (cmp))
 # define sk_CMS_RecipientInfo_new_null() SKM_sk_new_null(CMS_RecipientInfo)
 # define sk_CMS_RecipientInfo_free(st) SKM_sk_free(CMS_RecipientInfo, (st))
@@ -499,6 +536,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_CMS_RecipientInfo_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(CMS_RecipientInfo, (st), (cmp))
 # define sk_CMS_RecipientInfo_dup(st) SKM_sk_dup(CMS_RecipientInfo, st)
 # define sk_CMS_RecipientInfo_pop_free(st, free_func) SKM_sk_pop_free(CMS_RecipientInfo, (st), (free_func))
+# define sk_CMS_RecipientInfo_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(CMS_RecipientInfo, (st), (copy_func), (free_func))
 # define sk_CMS_RecipientInfo_shift(st) SKM_sk_shift(CMS_RecipientInfo, (st))
 # define sk_CMS_RecipientInfo_pop(st) SKM_sk_pop(CMS_RecipientInfo, (st))
 # define sk_CMS_RecipientInfo_sort(st) SKM_sk_sort(CMS_RecipientInfo, (st))
@@ -520,6 +558,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_CMS_RevocationInfoChoice_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(CMS_RevocationInfoChoice, (st), (cmp))
 # define sk_CMS_RevocationInfoChoice_dup(st) SKM_sk_dup(CMS_RevocationInfoChoice, st)
 # define sk_CMS_RevocationInfoChoice_pop_free(st, free_func) SKM_sk_pop_free(CMS_RevocationInfoChoice, (st), (free_func))
+# define sk_CMS_RevocationInfoChoice_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(CMS_RevocationInfoChoice, (st), (copy_func), (free_func))
 # define sk_CMS_RevocationInfoChoice_shift(st) SKM_sk_shift(CMS_RevocationInfoChoice, (st))
 # define sk_CMS_RevocationInfoChoice_pop(st) SKM_sk_pop(CMS_RevocationInfoChoice, (st))
 # define sk_CMS_RevocationInfoChoice_sort(st) SKM_sk_sort(CMS_RevocationInfoChoice, (st))
@@ -541,6 +580,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_CMS_SignerInfo_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(CMS_SignerInfo, (st), (cmp))
 # define sk_CMS_SignerInfo_dup(st) SKM_sk_dup(CMS_SignerInfo, st)
 # define sk_CMS_SignerInfo_pop_free(st, free_func) SKM_sk_pop_free(CMS_SignerInfo, (st), (free_func))
+# define sk_CMS_SignerInfo_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(CMS_SignerInfo, (st), (copy_func), (free_func))
 # define sk_CMS_SignerInfo_shift(st) SKM_sk_shift(CMS_SignerInfo, (st))
 # define sk_CMS_SignerInfo_pop(st) SKM_sk_pop(CMS_SignerInfo, (st))
 # define sk_CMS_SignerInfo_sort(st) SKM_sk_sort(CMS_SignerInfo, (st))
@@ -562,6 +602,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_CONF_IMODULE_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(CONF_IMODULE, (st), (cmp))
 # define sk_CONF_IMODULE_dup(st) SKM_sk_dup(CONF_IMODULE, st)
 # define sk_CONF_IMODULE_pop_free(st, free_func) SKM_sk_pop_free(CONF_IMODULE, (st), (free_func))
+# define sk_CONF_IMODULE_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(CONF_IMODULE, (st), (copy_func), (free_func))
 # define sk_CONF_IMODULE_shift(st) SKM_sk_shift(CONF_IMODULE, (st))
 # define sk_CONF_IMODULE_pop(st) SKM_sk_pop(CONF_IMODULE, (st))
 # define sk_CONF_IMODULE_sort(st) SKM_sk_sort(CONF_IMODULE, (st))
@@ -583,6 +624,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_CONF_MODULE_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(CONF_MODULE, (st), (cmp))
 # define sk_CONF_MODULE_dup(st) SKM_sk_dup(CONF_MODULE, st)
 # define sk_CONF_MODULE_pop_free(st, free_func) SKM_sk_pop_free(CONF_MODULE, (st), (free_func))
+# define sk_CONF_MODULE_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(CONF_MODULE, (st), (copy_func), (free_func))
 # define sk_CONF_MODULE_shift(st) SKM_sk_shift(CONF_MODULE, (st))
 # define sk_CONF_MODULE_pop(st) SKM_sk_pop(CONF_MODULE, (st))
 # define sk_CONF_MODULE_sort(st) SKM_sk_sort(CONF_MODULE, (st))
@@ -604,6 +646,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_CONF_VALUE_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(CONF_VALUE, (st), (cmp))
 # define sk_CONF_VALUE_dup(st) SKM_sk_dup(CONF_VALUE, st)
 # define sk_CONF_VALUE_pop_free(st, free_func) SKM_sk_pop_free(CONF_VALUE, (st), (free_func))
+# define sk_CONF_VALUE_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(CONF_VALUE, (st), (copy_func), (free_func))
 # define sk_CONF_VALUE_shift(st) SKM_sk_shift(CONF_VALUE, (st))
 # define sk_CONF_VALUE_pop(st) SKM_sk_pop(CONF_VALUE, (st))
 # define sk_CONF_VALUE_sort(st) SKM_sk_sort(CONF_VALUE, (st))
@@ -625,6 +668,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_CRYPTO_EX_DATA_FUNCS_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(CRYPTO_EX_DATA_FUNCS, (st), (cmp))
 # define sk_CRYPTO_EX_DATA_FUNCS_dup(st) SKM_sk_dup(CRYPTO_EX_DATA_FUNCS, st)
 # define sk_CRYPTO_EX_DATA_FUNCS_pop_free(st, free_func) SKM_sk_pop_free(CRYPTO_EX_DATA_FUNCS, (st), (free_func))
+# define sk_CRYPTO_EX_DATA_FUNCS_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(CRYPTO_EX_DATA_FUNCS, (st), (copy_func), (free_func))
 # define sk_CRYPTO_EX_DATA_FUNCS_shift(st) SKM_sk_shift(CRYPTO_EX_DATA_FUNCS, (st))
 # define sk_CRYPTO_EX_DATA_FUNCS_pop(st) SKM_sk_pop(CRYPTO_EX_DATA_FUNCS, (st))
 # define sk_CRYPTO_EX_DATA_FUNCS_sort(st) SKM_sk_sort(CRYPTO_EX_DATA_FUNCS, (st))
@@ -646,6 +690,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_CRYPTO_dynlock_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(CRYPTO_dynlock, (st), (cmp))
 # define sk_CRYPTO_dynlock_dup(st) SKM_sk_dup(CRYPTO_dynlock, st)
 # define sk_CRYPTO_dynlock_pop_free(st, free_func) SKM_sk_pop_free(CRYPTO_dynlock, (st), (free_func))
+# define sk_CRYPTO_dynlock_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(CRYPTO_dynlock, (st), (copy_func), (free_func))
 # define sk_CRYPTO_dynlock_shift(st) SKM_sk_shift(CRYPTO_dynlock, (st))
 # define sk_CRYPTO_dynlock_pop(st) SKM_sk_pop(CRYPTO_dynlock, (st))
 # define sk_CRYPTO_dynlock_sort(st) SKM_sk_sort(CRYPTO_dynlock, (st))
@@ -667,6 +712,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_DIST_POINT_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(DIST_POINT, (st), (cmp))
 # define sk_DIST_POINT_dup(st) SKM_sk_dup(DIST_POINT, st)
 # define sk_DIST_POINT_pop_free(st, free_func) SKM_sk_pop_free(DIST_POINT, (st), (free_func))
+# define sk_DIST_POINT_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(DIST_POINT, (st), (copy_func), (free_func))
 # define sk_DIST_POINT_shift(st) SKM_sk_shift(DIST_POINT, (st))
 # define sk_DIST_POINT_pop(st) SKM_sk_pop(DIST_POINT, (st))
 # define sk_DIST_POINT_sort(st) SKM_sk_sort(DIST_POINT, (st))
@@ -688,6 +734,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_ENGINE_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(ENGINE, (st), (cmp))
 # define sk_ENGINE_dup(st) SKM_sk_dup(ENGINE, st)
 # define sk_ENGINE_pop_free(st, free_func) SKM_sk_pop_free(ENGINE, (st), (free_func))
+# define sk_ENGINE_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(ENGINE, (st), (copy_func), (free_func))
 # define sk_ENGINE_shift(st) SKM_sk_shift(ENGINE, (st))
 # define sk_ENGINE_pop(st) SKM_sk_pop(ENGINE, (st))
 # define sk_ENGINE_sort(st) SKM_sk_sort(ENGINE, (st))
@@ -709,6 +756,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_ENGINE_CLEANUP_ITEM_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(ENGINE_CLEANUP_ITEM, (st), (cmp))
 # define sk_ENGINE_CLEANUP_ITEM_dup(st) SKM_sk_dup(ENGINE_CLEANUP_ITEM, st)
 # define sk_ENGINE_CLEANUP_ITEM_pop_free(st, free_func) SKM_sk_pop_free(ENGINE_CLEANUP_ITEM, (st), (free_func))
+# define sk_ENGINE_CLEANUP_ITEM_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(ENGINE_CLEANUP_ITEM, (st), (copy_func), (free_func))
 # define sk_ENGINE_CLEANUP_ITEM_shift(st) SKM_sk_shift(ENGINE_CLEANUP_ITEM, (st))
 # define sk_ENGINE_CLEANUP_ITEM_pop(st) SKM_sk_pop(ENGINE_CLEANUP_ITEM, (st))
 # define sk_ENGINE_CLEANUP_ITEM_sort(st) SKM_sk_sort(ENGINE_CLEANUP_ITEM, (st))
@@ -730,6 +778,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_ESS_CERT_ID_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(ESS_CERT_ID, (st), (cmp))
 # define sk_ESS_CERT_ID_dup(st) SKM_sk_dup(ESS_CERT_ID, st)
 # define sk_ESS_CERT_ID_pop_free(st, free_func) SKM_sk_pop_free(ESS_CERT_ID, (st), (free_func))
+# define sk_ESS_CERT_ID_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(ESS_CERT_ID, (st), (copy_func), (free_func))
 # define sk_ESS_CERT_ID_shift(st) SKM_sk_shift(ESS_CERT_ID, (st))
 # define sk_ESS_CERT_ID_pop(st) SKM_sk_pop(ESS_CERT_ID, (st))
 # define sk_ESS_CERT_ID_sort(st) SKM_sk_sort(ESS_CERT_ID, (st))
@@ -751,6 +800,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_EVP_MD_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(EVP_MD, (st), (cmp))
 # define sk_EVP_MD_dup(st) SKM_sk_dup(EVP_MD, st)
 # define sk_EVP_MD_pop_free(st, free_func) SKM_sk_pop_free(EVP_MD, (st), (free_func))
+# define sk_EVP_MD_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(EVP_MD, (st), (copy_func), (free_func))
 # define sk_EVP_MD_shift(st) SKM_sk_shift(EVP_MD, (st))
 # define sk_EVP_MD_pop(st) SKM_sk_pop(EVP_MD, (st))
 # define sk_EVP_MD_sort(st) SKM_sk_sort(EVP_MD, (st))
@@ -772,6 +822,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_EVP_PBE_CTL_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(EVP_PBE_CTL, (st), (cmp))
 # define sk_EVP_PBE_CTL_dup(st) SKM_sk_dup(EVP_PBE_CTL, st)
 # define sk_EVP_PBE_CTL_pop_free(st, free_func) SKM_sk_pop_free(EVP_PBE_CTL, (st), (free_func))
+# define sk_EVP_PBE_CTL_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(EVP_PBE_CTL, (st), (copy_func), (free_func))
 # define sk_EVP_PBE_CTL_shift(st) SKM_sk_shift(EVP_PBE_CTL, (st))
 # define sk_EVP_PBE_CTL_pop(st) SKM_sk_pop(EVP_PBE_CTL, (st))
 # define sk_EVP_PBE_CTL_sort(st) SKM_sk_sort(EVP_PBE_CTL, (st))
@@ -793,6 +844,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_EVP_PKEY_ASN1_METHOD_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(EVP_PKEY_ASN1_METHOD, (st), (cmp))
 # define sk_EVP_PKEY_ASN1_METHOD_dup(st) SKM_sk_dup(EVP_PKEY_ASN1_METHOD, st)
 # define sk_EVP_PKEY_ASN1_METHOD_pop_free(st, free_func) SKM_sk_pop_free(EVP_PKEY_ASN1_METHOD, (st), (free_func))
+# define sk_EVP_PKEY_ASN1_METHOD_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(EVP_PKEY_ASN1_METHOD, (st), (copy_func), (free_func))
 # define sk_EVP_PKEY_ASN1_METHOD_shift(st) SKM_sk_shift(EVP_PKEY_ASN1_METHOD, (st))
 # define sk_EVP_PKEY_ASN1_METHOD_pop(st) SKM_sk_pop(EVP_PKEY_ASN1_METHOD, (st))
 # define sk_EVP_PKEY_ASN1_METHOD_sort(st) SKM_sk_sort(EVP_PKEY_ASN1_METHOD, (st))
@@ -814,6 +866,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_EVP_PKEY_METHOD_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(EVP_PKEY_METHOD, (st), (cmp))
 # define sk_EVP_PKEY_METHOD_dup(st) SKM_sk_dup(EVP_PKEY_METHOD, st)
 # define sk_EVP_PKEY_METHOD_pop_free(st, free_func) SKM_sk_pop_free(EVP_PKEY_METHOD, (st), (free_func))
+# define sk_EVP_PKEY_METHOD_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(EVP_PKEY_METHOD, (st), (copy_func), (free_func))
 # define sk_EVP_PKEY_METHOD_shift(st) SKM_sk_shift(EVP_PKEY_METHOD, (st))
 # define sk_EVP_PKEY_METHOD_pop(st) SKM_sk_pop(EVP_PKEY_METHOD, (st))
 # define sk_EVP_PKEY_METHOD_sort(st) SKM_sk_sort(EVP_PKEY_METHOD, (st))
@@ -835,6 +888,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_GENERAL_NAME_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(GENERAL_NAME, (st), (cmp))
 # define sk_GENERAL_NAME_dup(st) SKM_sk_dup(GENERAL_NAME, st)
 # define sk_GENERAL_NAME_pop_free(st, free_func) SKM_sk_pop_free(GENERAL_NAME, (st), (free_func))
+# define sk_GENERAL_NAME_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(GENERAL_NAME, (st), (copy_func), (free_func))
 # define sk_GENERAL_NAME_shift(st) SKM_sk_shift(GENERAL_NAME, (st))
 # define sk_GENERAL_NAME_pop(st) SKM_sk_pop(GENERAL_NAME, (st))
 # define sk_GENERAL_NAME_sort(st) SKM_sk_sort(GENERAL_NAME, (st))
@@ -856,6 +910,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_GENERAL_NAMES_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(GENERAL_NAMES, (st), (cmp))
 # define sk_GENERAL_NAMES_dup(st) SKM_sk_dup(GENERAL_NAMES, st)
 # define sk_GENERAL_NAMES_pop_free(st, free_func) SKM_sk_pop_free(GENERAL_NAMES, (st), (free_func))
+# define sk_GENERAL_NAMES_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(GENERAL_NAMES, (st), (copy_func), (free_func))
 # define sk_GENERAL_NAMES_shift(st) SKM_sk_shift(GENERAL_NAMES, (st))
 # define sk_GENERAL_NAMES_pop(st) SKM_sk_pop(GENERAL_NAMES, (st))
 # define sk_GENERAL_NAMES_sort(st) SKM_sk_sort(GENERAL_NAMES, (st))
@@ -877,6 +932,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_GENERAL_SUBTREE_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(GENERAL_SUBTREE, (st), (cmp))
 # define sk_GENERAL_SUBTREE_dup(st) SKM_sk_dup(GENERAL_SUBTREE, st)
 # define sk_GENERAL_SUBTREE_pop_free(st, free_func) SKM_sk_pop_free(GENERAL_SUBTREE, (st), (free_func))
+# define sk_GENERAL_SUBTREE_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(GENERAL_SUBTREE, (st), (copy_func), (free_func))
 # define sk_GENERAL_SUBTREE_shift(st) SKM_sk_shift(GENERAL_SUBTREE, (st))
 # define sk_GENERAL_SUBTREE_pop(st) SKM_sk_pop(GENERAL_SUBTREE, (st))
 # define sk_GENERAL_SUBTREE_sort(st) SKM_sk_sort(GENERAL_SUBTREE, (st))
@@ -898,6 +954,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_IPAddressFamily_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(IPAddressFamily, (st), (cmp))
 # define sk_IPAddressFamily_dup(st) SKM_sk_dup(IPAddressFamily, st)
 # define sk_IPAddressFamily_pop_free(st, free_func) SKM_sk_pop_free(IPAddressFamily, (st), (free_func))
+# define sk_IPAddressFamily_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(IPAddressFamily, (st), (copy_func), (free_func))
 # define sk_IPAddressFamily_shift(st) SKM_sk_shift(IPAddressFamily, (st))
 # define sk_IPAddressFamily_pop(st) SKM_sk_pop(IPAddressFamily, (st))
 # define sk_IPAddressFamily_sort(st) SKM_sk_sort(IPAddressFamily, (st))
@@ -919,6 +976,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_IPAddressOrRange_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(IPAddressOrRange, (st), (cmp))
 # define sk_IPAddressOrRange_dup(st) SKM_sk_dup(IPAddressOrRange, st)
 # define sk_IPAddressOrRange_pop_free(st, free_func) SKM_sk_pop_free(IPAddressOrRange, (st), (free_func))
+# define sk_IPAddressOrRange_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(IPAddressOrRange, (st), (copy_func), (free_func))
 # define sk_IPAddressOrRange_shift(st) SKM_sk_shift(IPAddressOrRange, (st))
 # define sk_IPAddressOrRange_pop(st) SKM_sk_pop(IPAddressOrRange, (st))
 # define sk_IPAddressOrRange_sort(st) SKM_sk_sort(IPAddressOrRange, (st))
@@ -940,6 +998,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_KRB5_APREQBODY_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(KRB5_APREQBODY, (st), (cmp))
 # define sk_KRB5_APREQBODY_dup(st) SKM_sk_dup(KRB5_APREQBODY, st)
 # define sk_KRB5_APREQBODY_pop_free(st, free_func) SKM_sk_pop_free(KRB5_APREQBODY, (st), (free_func))
+# define sk_KRB5_APREQBODY_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(KRB5_APREQBODY, (st), (copy_func), (free_func))
 # define sk_KRB5_APREQBODY_shift(st) SKM_sk_shift(KRB5_APREQBODY, (st))
 # define sk_KRB5_APREQBODY_pop(st) SKM_sk_pop(KRB5_APREQBODY, (st))
 # define sk_KRB5_APREQBODY_sort(st) SKM_sk_sort(KRB5_APREQBODY, (st))
@@ -961,6 +1020,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_KRB5_AUTHDATA_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(KRB5_AUTHDATA, (st), (cmp))
 # define sk_KRB5_AUTHDATA_dup(st) SKM_sk_dup(KRB5_AUTHDATA, st)
 # define sk_KRB5_AUTHDATA_pop_free(st, free_func) SKM_sk_pop_free(KRB5_AUTHDATA, (st), (free_func))
+# define sk_KRB5_AUTHDATA_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(KRB5_AUTHDATA, (st), (copy_func), (free_func))
 # define sk_KRB5_AUTHDATA_shift(st) SKM_sk_shift(KRB5_AUTHDATA, (st))
 # define sk_KRB5_AUTHDATA_pop(st) SKM_sk_pop(KRB5_AUTHDATA, (st))
 # define sk_KRB5_AUTHDATA_sort(st) SKM_sk_sort(KRB5_AUTHDATA, (st))
@@ -982,6 +1042,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_KRB5_AUTHENTBODY_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(KRB5_AUTHENTBODY, (st), (cmp))
 # define sk_KRB5_AUTHENTBODY_dup(st) SKM_sk_dup(KRB5_AUTHENTBODY, st)
 # define sk_KRB5_AUTHENTBODY_pop_free(st, free_func) SKM_sk_pop_free(KRB5_AUTHENTBODY, (st), (free_func))
+# define sk_KRB5_AUTHENTBODY_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(KRB5_AUTHENTBODY, (st), (copy_func), (free_func))
 # define sk_KRB5_AUTHENTBODY_shift(st) SKM_sk_shift(KRB5_AUTHENTBODY, (st))
 # define sk_KRB5_AUTHENTBODY_pop(st) SKM_sk_pop(KRB5_AUTHENTBODY, (st))
 # define sk_KRB5_AUTHENTBODY_sort(st) SKM_sk_sort(KRB5_AUTHENTBODY, (st))
@@ -1003,6 +1064,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_KRB5_CHECKSUM_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(KRB5_CHECKSUM, (st), (cmp))
 # define sk_KRB5_CHECKSUM_dup(st) SKM_sk_dup(KRB5_CHECKSUM, st)
 # define sk_KRB5_CHECKSUM_pop_free(st, free_func) SKM_sk_pop_free(KRB5_CHECKSUM, (st), (free_func))
+# define sk_KRB5_CHECKSUM_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(KRB5_CHECKSUM, (st), (copy_func), (free_func))
 # define sk_KRB5_CHECKSUM_shift(st) SKM_sk_shift(KRB5_CHECKSUM, (st))
 # define sk_KRB5_CHECKSUM_pop(st) SKM_sk_pop(KRB5_CHECKSUM, (st))
 # define sk_KRB5_CHECKSUM_sort(st) SKM_sk_sort(KRB5_CHECKSUM, (st))
@@ -1024,6 +1086,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_KRB5_ENCDATA_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(KRB5_ENCDATA, (st), (cmp))
 # define sk_KRB5_ENCDATA_dup(st) SKM_sk_dup(KRB5_ENCDATA, st)
 # define sk_KRB5_ENCDATA_pop_free(st, free_func) SKM_sk_pop_free(KRB5_ENCDATA, (st), (free_func))
+# define sk_KRB5_ENCDATA_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(KRB5_ENCDATA, (st), (copy_func), (free_func))
 # define sk_KRB5_ENCDATA_shift(st) SKM_sk_shift(KRB5_ENCDATA, (st))
 # define sk_KRB5_ENCDATA_pop(st) SKM_sk_pop(KRB5_ENCDATA, (st))
 # define sk_KRB5_ENCDATA_sort(st) SKM_sk_sort(KRB5_ENCDATA, (st))
@@ -1045,6 +1108,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_KRB5_ENCKEY_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(KRB5_ENCKEY, (st), (cmp))
 # define sk_KRB5_ENCKEY_dup(st) SKM_sk_dup(KRB5_ENCKEY, st)
 # define sk_KRB5_ENCKEY_pop_free(st, free_func) SKM_sk_pop_free(KRB5_ENCKEY, (st), (free_func))
+# define sk_KRB5_ENCKEY_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(KRB5_ENCKEY, (st), (copy_func), (free_func))
 # define sk_KRB5_ENCKEY_shift(st) SKM_sk_shift(KRB5_ENCKEY, (st))
 # define sk_KRB5_ENCKEY_pop(st) SKM_sk_pop(KRB5_ENCKEY, (st))
 # define sk_KRB5_ENCKEY_sort(st) SKM_sk_sort(KRB5_ENCKEY, (st))
@@ -1066,6 +1130,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_KRB5_PRINCNAME_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(KRB5_PRINCNAME, (st), (cmp))
 # define sk_KRB5_PRINCNAME_dup(st) SKM_sk_dup(KRB5_PRINCNAME, st)
 # define sk_KRB5_PRINCNAME_pop_free(st, free_func) SKM_sk_pop_free(KRB5_PRINCNAME, (st), (free_func))
+# define sk_KRB5_PRINCNAME_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(KRB5_PRINCNAME, (st), (copy_func), (free_func))
 # define sk_KRB5_PRINCNAME_shift(st) SKM_sk_shift(KRB5_PRINCNAME, (st))
 # define sk_KRB5_PRINCNAME_pop(st) SKM_sk_pop(KRB5_PRINCNAME, (st))
 # define sk_KRB5_PRINCNAME_sort(st) SKM_sk_sort(KRB5_PRINCNAME, (st))
@@ -1087,6 +1152,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_KRB5_TKTBODY_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(KRB5_TKTBODY, (st), (cmp))
 # define sk_KRB5_TKTBODY_dup(st) SKM_sk_dup(KRB5_TKTBODY, st)
 # define sk_KRB5_TKTBODY_pop_free(st, free_func) SKM_sk_pop_free(KRB5_TKTBODY, (st), (free_func))
+# define sk_KRB5_TKTBODY_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(KRB5_TKTBODY, (st), (copy_func), (free_func))
 # define sk_KRB5_TKTBODY_shift(st) SKM_sk_shift(KRB5_TKTBODY, (st))
 # define sk_KRB5_TKTBODY_pop(st) SKM_sk_pop(KRB5_TKTBODY, (st))
 # define sk_KRB5_TKTBODY_sort(st) SKM_sk_sort(KRB5_TKTBODY, (st))
@@ -1108,6 +1174,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_MEM_OBJECT_DATA_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(MEM_OBJECT_DATA, (st), (cmp))
 # define sk_MEM_OBJECT_DATA_dup(st) SKM_sk_dup(MEM_OBJECT_DATA, st)
 # define sk_MEM_OBJECT_DATA_pop_free(st, free_func) SKM_sk_pop_free(MEM_OBJECT_DATA, (st), (free_func))
+# define sk_MEM_OBJECT_DATA_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(MEM_OBJECT_DATA, (st), (copy_func), (free_func))
 # define sk_MEM_OBJECT_DATA_shift(st) SKM_sk_shift(MEM_OBJECT_DATA, (st))
 # define sk_MEM_OBJECT_DATA_pop(st) SKM_sk_pop(MEM_OBJECT_DATA, (st))
 # define sk_MEM_OBJECT_DATA_sort(st) SKM_sk_sort(MEM_OBJECT_DATA, (st))
@@ -1129,6 +1196,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_MIME_HEADER_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(MIME_HEADER, (st), (cmp))
 # define sk_MIME_HEADER_dup(st) SKM_sk_dup(MIME_HEADER, st)
 # define sk_MIME_HEADER_pop_free(st, free_func) SKM_sk_pop_free(MIME_HEADER, (st), (free_func))
+# define sk_MIME_HEADER_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(MIME_HEADER, (st), (copy_func), (free_func))
 # define sk_MIME_HEADER_shift(st) SKM_sk_shift(MIME_HEADER, (st))
 # define sk_MIME_HEADER_pop(st) SKM_sk_pop(MIME_HEADER, (st))
 # define sk_MIME_HEADER_sort(st) SKM_sk_sort(MIME_HEADER, (st))
@@ -1150,6 +1218,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_MIME_PARAM_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(MIME_PARAM, (st), (cmp))
 # define sk_MIME_PARAM_dup(st) SKM_sk_dup(MIME_PARAM, st)
 # define sk_MIME_PARAM_pop_free(st, free_func) SKM_sk_pop_free(MIME_PARAM, (st), (free_func))
+# define sk_MIME_PARAM_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(MIME_PARAM, (st), (copy_func), (free_func))
 # define sk_MIME_PARAM_shift(st) SKM_sk_shift(MIME_PARAM, (st))
 # define sk_MIME_PARAM_pop(st) SKM_sk_pop(MIME_PARAM, (st))
 # define sk_MIME_PARAM_sort(st) SKM_sk_sort(MIME_PARAM, (st))
@@ -1171,6 +1240,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_NAME_FUNCS_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(NAME_FUNCS, (st), (cmp))
 # define sk_NAME_FUNCS_dup(st) SKM_sk_dup(NAME_FUNCS, st)
 # define sk_NAME_FUNCS_pop_free(st, free_func) SKM_sk_pop_free(NAME_FUNCS, (st), (free_func))
+# define sk_NAME_FUNCS_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(NAME_FUNCS, (st), (copy_func), (free_func))
 # define sk_NAME_FUNCS_shift(st) SKM_sk_shift(NAME_FUNCS, (st))
 # define sk_NAME_FUNCS_pop(st) SKM_sk_pop(NAME_FUNCS, (st))
 # define sk_NAME_FUNCS_sort(st) SKM_sk_sort(NAME_FUNCS, (st))
@@ -1192,6 +1262,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_OCSP_CERTID_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(OCSP_CERTID, (st), (cmp))
 # define sk_OCSP_CERTID_dup(st) SKM_sk_dup(OCSP_CERTID, st)
 # define sk_OCSP_CERTID_pop_free(st, free_func) SKM_sk_pop_free(OCSP_CERTID, (st), (free_func))
+# define sk_OCSP_CERTID_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(OCSP_CERTID, (st), (copy_func), (free_func))
 # define sk_OCSP_CERTID_shift(st) SKM_sk_shift(OCSP_CERTID, (st))
 # define sk_OCSP_CERTID_pop(st) SKM_sk_pop(OCSP_CERTID, (st))
 # define sk_OCSP_CERTID_sort(st) SKM_sk_sort(OCSP_CERTID, (st))
@@ -1213,6 +1284,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_OCSP_ONEREQ_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(OCSP_ONEREQ, (st), (cmp))
 # define sk_OCSP_ONEREQ_dup(st) SKM_sk_dup(OCSP_ONEREQ, st)
 # define sk_OCSP_ONEREQ_pop_free(st, free_func) SKM_sk_pop_free(OCSP_ONEREQ, (st), (free_func))
+# define sk_OCSP_ONEREQ_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(OCSP_ONEREQ, (st), (copy_func), (free_func))
 # define sk_OCSP_ONEREQ_shift(st) SKM_sk_shift(OCSP_ONEREQ, (st))
 # define sk_OCSP_ONEREQ_pop(st) SKM_sk_pop(OCSP_ONEREQ, (st))
 # define sk_OCSP_ONEREQ_sort(st) SKM_sk_sort(OCSP_ONEREQ, (st))
@@ -1234,6 +1306,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_OCSP_RESPID_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(OCSP_RESPID, (st), (cmp))
 # define sk_OCSP_RESPID_dup(st) SKM_sk_dup(OCSP_RESPID, st)
 # define sk_OCSP_RESPID_pop_free(st, free_func) SKM_sk_pop_free(OCSP_RESPID, (st), (free_func))
+# define sk_OCSP_RESPID_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(OCSP_RESPID, (st), (copy_func), (free_func))
 # define sk_OCSP_RESPID_shift(st) SKM_sk_shift(OCSP_RESPID, (st))
 # define sk_OCSP_RESPID_pop(st) SKM_sk_pop(OCSP_RESPID, (st))
 # define sk_OCSP_RESPID_sort(st) SKM_sk_sort(OCSP_RESPID, (st))
@@ -1255,6 +1328,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_OCSP_SINGLERESP_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(OCSP_SINGLERESP, (st), (cmp))
 # define sk_OCSP_SINGLERESP_dup(st) SKM_sk_dup(OCSP_SINGLERESP, st)
 # define sk_OCSP_SINGLERESP_pop_free(st, free_func) SKM_sk_pop_free(OCSP_SINGLERESP, (st), (free_func))
+# define sk_OCSP_SINGLERESP_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(OCSP_SINGLERESP, (st), (copy_func), (free_func))
 # define sk_OCSP_SINGLERESP_shift(st) SKM_sk_shift(OCSP_SINGLERESP, (st))
 # define sk_OCSP_SINGLERESP_pop(st) SKM_sk_pop(OCSP_SINGLERESP, (st))
 # define sk_OCSP_SINGLERESP_sort(st) SKM_sk_sort(OCSP_SINGLERESP, (st))
@@ -1276,6 +1350,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_PKCS12_SAFEBAG_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(PKCS12_SAFEBAG, (st), (cmp))
 # define sk_PKCS12_SAFEBAG_dup(st) SKM_sk_dup(PKCS12_SAFEBAG, st)
 # define sk_PKCS12_SAFEBAG_pop_free(st, free_func) SKM_sk_pop_free(PKCS12_SAFEBAG, (st), (free_func))
+# define sk_PKCS12_SAFEBAG_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(PKCS12_SAFEBAG, (st), (copy_func), (free_func))
 # define sk_PKCS12_SAFEBAG_shift(st) SKM_sk_shift(PKCS12_SAFEBAG, (st))
 # define sk_PKCS12_SAFEBAG_pop(st) SKM_sk_pop(PKCS12_SAFEBAG, (st))
 # define sk_PKCS12_SAFEBAG_sort(st) SKM_sk_sort(PKCS12_SAFEBAG, (st))
@@ -1297,6 +1372,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_PKCS7_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(PKCS7, (st), (cmp))
 # define sk_PKCS7_dup(st) SKM_sk_dup(PKCS7, st)
 # define sk_PKCS7_pop_free(st, free_func) SKM_sk_pop_free(PKCS7, (st), (free_func))
+# define sk_PKCS7_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(PKCS7, (st), (copy_func), (free_func))
 # define sk_PKCS7_shift(st) SKM_sk_shift(PKCS7, (st))
 # define sk_PKCS7_pop(st) SKM_sk_pop(PKCS7, (st))
 # define sk_PKCS7_sort(st) SKM_sk_sort(PKCS7, (st))
@@ -1318,6 +1394,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_PKCS7_RECIP_INFO_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(PKCS7_RECIP_INFO, (st), (cmp))
 # define sk_PKCS7_RECIP_INFO_dup(st) SKM_sk_dup(PKCS7_RECIP_INFO, st)
 # define sk_PKCS7_RECIP_INFO_pop_free(st, free_func) SKM_sk_pop_free(PKCS7_RECIP_INFO, (st), (free_func))
+# define sk_PKCS7_RECIP_INFO_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(PKCS7_RECIP_INFO, (st), (copy_func), (free_func))
 # define sk_PKCS7_RECIP_INFO_shift(st) SKM_sk_shift(PKCS7_RECIP_INFO, (st))
 # define sk_PKCS7_RECIP_INFO_pop(st) SKM_sk_pop(PKCS7_RECIP_INFO, (st))
 # define sk_PKCS7_RECIP_INFO_sort(st) SKM_sk_sort(PKCS7_RECIP_INFO, (st))
@@ -1339,6 +1416,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_PKCS7_SIGNER_INFO_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(PKCS7_SIGNER_INFO, (st), (cmp))
 # define sk_PKCS7_SIGNER_INFO_dup(st) SKM_sk_dup(PKCS7_SIGNER_INFO, st)
 # define sk_PKCS7_SIGNER_INFO_pop_free(st, free_func) SKM_sk_pop_free(PKCS7_SIGNER_INFO, (st), (free_func))
+# define sk_PKCS7_SIGNER_INFO_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(PKCS7_SIGNER_INFO, (st), (copy_func), (free_func))
 # define sk_PKCS7_SIGNER_INFO_shift(st) SKM_sk_shift(PKCS7_SIGNER_INFO, (st))
 # define sk_PKCS7_SIGNER_INFO_pop(st) SKM_sk_pop(PKCS7_SIGNER_INFO, (st))
 # define sk_PKCS7_SIGNER_INFO_sort(st) SKM_sk_sort(PKCS7_SIGNER_INFO, (st))
@@ -1360,6 +1438,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_POLICYINFO_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(POLICYINFO, (st), (cmp))
 # define sk_POLICYINFO_dup(st) SKM_sk_dup(POLICYINFO, st)
 # define sk_POLICYINFO_pop_free(st, free_func) SKM_sk_pop_free(POLICYINFO, (st), (free_func))
+# define sk_POLICYINFO_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(POLICYINFO, (st), (copy_func), (free_func))
 # define sk_POLICYINFO_shift(st) SKM_sk_shift(POLICYINFO, (st))
 # define sk_POLICYINFO_pop(st) SKM_sk_pop(POLICYINFO, (st))
 # define sk_POLICYINFO_sort(st) SKM_sk_sort(POLICYINFO, (st))
@@ -1381,6 +1460,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_POLICYQUALINFO_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(POLICYQUALINFO, (st), (cmp))
 # define sk_POLICYQUALINFO_dup(st) SKM_sk_dup(POLICYQUALINFO, st)
 # define sk_POLICYQUALINFO_pop_free(st, free_func) SKM_sk_pop_free(POLICYQUALINFO, (st), (free_func))
+# define sk_POLICYQUALINFO_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(POLICYQUALINFO, (st), (copy_func), (free_func))
 # define sk_POLICYQUALINFO_shift(st) SKM_sk_shift(POLICYQUALINFO, (st))
 # define sk_POLICYQUALINFO_pop(st) SKM_sk_pop(POLICYQUALINFO, (st))
 # define sk_POLICYQUALINFO_sort(st) SKM_sk_sort(POLICYQUALINFO, (st))
@@ -1402,10 +1482,33 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_POLICY_MAPPING_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(POLICY_MAPPING, (st), (cmp))
 # define sk_POLICY_MAPPING_dup(st) SKM_sk_dup(POLICY_MAPPING, st)
 # define sk_POLICY_MAPPING_pop_free(st, free_func) SKM_sk_pop_free(POLICY_MAPPING, (st), (free_func))
+# define sk_POLICY_MAPPING_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(POLICY_MAPPING, (st), (copy_func), (free_func))
 # define sk_POLICY_MAPPING_shift(st) SKM_sk_shift(POLICY_MAPPING, (st))
 # define sk_POLICY_MAPPING_pop(st) SKM_sk_pop(POLICY_MAPPING, (st))
 # define sk_POLICY_MAPPING_sort(st) SKM_sk_sort(POLICY_MAPPING, (st))
 # define sk_POLICY_MAPPING_is_sorted(st) SKM_sk_is_sorted(POLICY_MAPPING, (st))
+# define sk_SCT_new(cmp) SKM_sk_new(SCT, (cmp))
+# define sk_SCT_new_null() SKM_sk_new_null(SCT)
+# define sk_SCT_free(st) SKM_sk_free(SCT, (st))
+# define sk_SCT_num(st) SKM_sk_num(SCT, (st))
+# define sk_SCT_value(st, i) SKM_sk_value(SCT, (st), (i))
+# define sk_SCT_set(st, i, val) SKM_sk_set(SCT, (st), (i), (val))
+# define sk_SCT_zero(st) SKM_sk_zero(SCT, (st))
+# define sk_SCT_push(st, val) SKM_sk_push(SCT, (st), (val))
+# define sk_SCT_unshift(st, val) SKM_sk_unshift(SCT, (st), (val))
+# define sk_SCT_find(st, val) SKM_sk_find(SCT, (st), (val))
+# define sk_SCT_find_ex(st, val) SKM_sk_find_ex(SCT, (st), (val))
+# define sk_SCT_delete(st, i) SKM_sk_delete(SCT, (st), (i))
+# define sk_SCT_delete_ptr(st, ptr) SKM_sk_delete_ptr(SCT, (st), (ptr))
+# define sk_SCT_insert(st, val, i) SKM_sk_insert(SCT, (st), (val), (i))
+# define sk_SCT_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(SCT, (st), (cmp))
+# define sk_SCT_dup(st) SKM_sk_dup(SCT, st)
+# define sk_SCT_pop_free(st, free_func) SKM_sk_pop_free(SCT, (st), (free_func))
+# define sk_SCT_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(SCT, (st), (copy_func), (free_func))
+# define sk_SCT_shift(st) SKM_sk_shift(SCT, (st))
+# define sk_SCT_pop(st) SKM_sk_pop(SCT, (st))
+# define sk_SCT_sort(st) SKM_sk_sort(SCT, (st))
+# define sk_SCT_is_sorted(st) SKM_sk_is_sorted(SCT, (st))
 # define sk_SRP_gN_new(cmp) SKM_sk_new(SRP_gN, (cmp))
 # define sk_SRP_gN_new_null() SKM_sk_new_null(SRP_gN)
 # define sk_SRP_gN_free(st) SKM_sk_free(SRP_gN, (st))
@@ -1423,6 +1526,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_SRP_gN_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(SRP_gN, (st), (cmp))
 # define sk_SRP_gN_dup(st) SKM_sk_dup(SRP_gN, st)
 # define sk_SRP_gN_pop_free(st, free_func) SKM_sk_pop_free(SRP_gN, (st), (free_func))
+# define sk_SRP_gN_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(SRP_gN, (st), (copy_func), (free_func))
 # define sk_SRP_gN_shift(st) SKM_sk_shift(SRP_gN, (st))
 # define sk_SRP_gN_pop(st) SKM_sk_pop(SRP_gN, (st))
 # define sk_SRP_gN_sort(st) SKM_sk_sort(SRP_gN, (st))
@@ -1444,6 +1548,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_SRP_gN_cache_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(SRP_gN_cache, (st), (cmp))
 # define sk_SRP_gN_cache_dup(st) SKM_sk_dup(SRP_gN_cache, st)
 # define sk_SRP_gN_cache_pop_free(st, free_func) SKM_sk_pop_free(SRP_gN_cache, (st), (free_func))
+# define sk_SRP_gN_cache_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(SRP_gN_cache, (st), (copy_func), (free_func))
 # define sk_SRP_gN_cache_shift(st) SKM_sk_shift(SRP_gN_cache, (st))
 # define sk_SRP_gN_cache_pop(st) SKM_sk_pop(SRP_gN_cache, (st))
 # define sk_SRP_gN_cache_sort(st) SKM_sk_sort(SRP_gN_cache, (st))
@@ -1465,6 +1570,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_SRP_user_pwd_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(SRP_user_pwd, (st), (cmp))
 # define sk_SRP_user_pwd_dup(st) SKM_sk_dup(SRP_user_pwd, st)
 # define sk_SRP_user_pwd_pop_free(st, free_func) SKM_sk_pop_free(SRP_user_pwd, (st), (free_func))
+# define sk_SRP_user_pwd_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(SRP_user_pwd, (st), (copy_func), (free_func))
 # define sk_SRP_user_pwd_shift(st) SKM_sk_shift(SRP_user_pwd, (st))
 # define sk_SRP_user_pwd_pop(st) SKM_sk_pop(SRP_user_pwd, (st))
 # define sk_SRP_user_pwd_sort(st) SKM_sk_sort(SRP_user_pwd, (st))
@@ -1486,6 +1592,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_SRTP_PROTECTION_PROFILE_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(SRTP_PROTECTION_PROFILE, (st), (cmp))
 # define sk_SRTP_PROTECTION_PROFILE_dup(st) SKM_sk_dup(SRTP_PROTECTION_PROFILE, st)
 # define sk_SRTP_PROTECTION_PROFILE_pop_free(st, free_func) SKM_sk_pop_free(SRTP_PROTECTION_PROFILE, (st), (free_func))
+# define sk_SRTP_PROTECTION_PROFILE_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(SRTP_PROTECTION_PROFILE, (st), (copy_func), (free_func))
 # define sk_SRTP_PROTECTION_PROFILE_shift(st) SKM_sk_shift(SRTP_PROTECTION_PROFILE, (st))
 # define sk_SRTP_PROTECTION_PROFILE_pop(st) SKM_sk_pop(SRTP_PROTECTION_PROFILE, (st))
 # define sk_SRTP_PROTECTION_PROFILE_sort(st) SKM_sk_sort(SRTP_PROTECTION_PROFILE, (st))
@@ -1507,6 +1614,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_SSL_CIPHER_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(SSL_CIPHER, (st), (cmp))
 # define sk_SSL_CIPHER_dup(st) SKM_sk_dup(SSL_CIPHER, st)
 # define sk_SSL_CIPHER_pop_free(st, free_func) SKM_sk_pop_free(SSL_CIPHER, (st), (free_func))
+# define sk_SSL_CIPHER_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(SSL_CIPHER, (st), (copy_func), (free_func))
 # define sk_SSL_CIPHER_shift(st) SKM_sk_shift(SSL_CIPHER, (st))
 # define sk_SSL_CIPHER_pop(st) SKM_sk_pop(SSL_CIPHER, (st))
 # define sk_SSL_CIPHER_sort(st) SKM_sk_sort(SSL_CIPHER, (st))
@@ -1528,6 +1636,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_SSL_COMP_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(SSL_COMP, (st), (cmp))
 # define sk_SSL_COMP_dup(st) SKM_sk_dup(SSL_COMP, st)
 # define sk_SSL_COMP_pop_free(st, free_func) SKM_sk_pop_free(SSL_COMP, (st), (free_func))
+# define sk_SSL_COMP_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(SSL_COMP, (st), (copy_func), (free_func))
 # define sk_SSL_COMP_shift(st) SKM_sk_shift(SSL_COMP, (st))
 # define sk_SSL_COMP_pop(st) SKM_sk_pop(SSL_COMP, (st))
 # define sk_SSL_COMP_sort(st) SKM_sk_sort(SSL_COMP, (st))
@@ -1549,6 +1658,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_STACK_OF_X509_NAME_ENTRY_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(STACK_OF_X509_NAME_ENTRY, (st), (cmp))
 # define sk_STACK_OF_X509_NAME_ENTRY_dup(st) SKM_sk_dup(STACK_OF_X509_NAME_ENTRY, st)
 # define sk_STACK_OF_X509_NAME_ENTRY_pop_free(st, free_func) SKM_sk_pop_free(STACK_OF_X509_NAME_ENTRY, (st), (free_func))
+# define sk_STACK_OF_X509_NAME_ENTRY_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(STACK_OF_X509_NAME_ENTRY, (st), (copy_func), (free_func))
 # define sk_STACK_OF_X509_NAME_ENTRY_shift(st) SKM_sk_shift(STACK_OF_X509_NAME_ENTRY, (st))
 # define sk_STACK_OF_X509_NAME_ENTRY_pop(st) SKM_sk_pop(STACK_OF_X509_NAME_ENTRY, (st))
 # define sk_STACK_OF_X509_NAME_ENTRY_sort(st) SKM_sk_sort(STACK_OF_X509_NAME_ENTRY, (st))
@@ -1570,6 +1680,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_STORE_ATTR_INFO_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(STORE_ATTR_INFO, (st), (cmp))
 # define sk_STORE_ATTR_INFO_dup(st) SKM_sk_dup(STORE_ATTR_INFO, st)
 # define sk_STORE_ATTR_INFO_pop_free(st, free_func) SKM_sk_pop_free(STORE_ATTR_INFO, (st), (free_func))
+# define sk_STORE_ATTR_INFO_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(STORE_ATTR_INFO, (st), (copy_func), (free_func))
 # define sk_STORE_ATTR_INFO_shift(st) SKM_sk_shift(STORE_ATTR_INFO, (st))
 # define sk_STORE_ATTR_INFO_pop(st) SKM_sk_pop(STORE_ATTR_INFO, (st))
 # define sk_STORE_ATTR_INFO_sort(st) SKM_sk_sort(STORE_ATTR_INFO, (st))
@@ -1591,6 +1702,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_STORE_OBJECT_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(STORE_OBJECT, (st), (cmp))
 # define sk_STORE_OBJECT_dup(st) SKM_sk_dup(STORE_OBJECT, st)
 # define sk_STORE_OBJECT_pop_free(st, free_func) SKM_sk_pop_free(STORE_OBJECT, (st), (free_func))
+# define sk_STORE_OBJECT_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(STORE_OBJECT, (st), (copy_func), (free_func))
 # define sk_STORE_OBJECT_shift(st) SKM_sk_shift(STORE_OBJECT, (st))
 # define sk_STORE_OBJECT_pop(st) SKM_sk_pop(STORE_OBJECT, (st))
 # define sk_STORE_OBJECT_sort(st) SKM_sk_sort(STORE_OBJECT, (st))
@@ -1612,6 +1724,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_SXNETID_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(SXNETID, (st), (cmp))
 # define sk_SXNETID_dup(st) SKM_sk_dup(SXNETID, st)
 # define sk_SXNETID_pop_free(st, free_func) SKM_sk_pop_free(SXNETID, (st), (free_func))
+# define sk_SXNETID_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(SXNETID, (st), (copy_func), (free_func))
 # define sk_SXNETID_shift(st) SKM_sk_shift(SXNETID, (st))
 # define sk_SXNETID_pop(st) SKM_sk_pop(SXNETID, (st))
 # define sk_SXNETID_sort(st) SKM_sk_sort(SXNETID, (st))
@@ -1633,6 +1746,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_UI_STRING_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(UI_STRING, (st), (cmp))
 # define sk_UI_STRING_dup(st) SKM_sk_dup(UI_STRING, st)
 # define sk_UI_STRING_pop_free(st, free_func) SKM_sk_pop_free(UI_STRING, (st), (free_func))
+# define sk_UI_STRING_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(UI_STRING, (st), (copy_func), (free_func))
 # define sk_UI_STRING_shift(st) SKM_sk_shift(UI_STRING, (st))
 # define sk_UI_STRING_pop(st) SKM_sk_pop(UI_STRING, (st))
 # define sk_UI_STRING_sort(st) SKM_sk_sort(UI_STRING, (st))
@@ -1654,6 +1768,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_X509_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(X509, (st), (cmp))
 # define sk_X509_dup(st) SKM_sk_dup(X509, st)
 # define sk_X509_pop_free(st, free_func) SKM_sk_pop_free(X509, (st), (free_func))
+# define sk_X509_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(X509, (st), (copy_func), (free_func))
 # define sk_X509_shift(st) SKM_sk_shift(X509, (st))
 # define sk_X509_pop(st) SKM_sk_pop(X509, (st))
 # define sk_X509_sort(st) SKM_sk_sort(X509, (st))
@@ -1675,6 +1790,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_X509V3_EXT_METHOD_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(X509V3_EXT_METHOD, (st), (cmp))
 # define sk_X509V3_EXT_METHOD_dup(st) SKM_sk_dup(X509V3_EXT_METHOD, st)
 # define sk_X509V3_EXT_METHOD_pop_free(st, free_func) SKM_sk_pop_free(X509V3_EXT_METHOD, (st), (free_func))
+# define sk_X509V3_EXT_METHOD_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(X509V3_EXT_METHOD, (st), (copy_func), (free_func))
 # define sk_X509V3_EXT_METHOD_shift(st) SKM_sk_shift(X509V3_EXT_METHOD, (st))
 # define sk_X509V3_EXT_METHOD_pop(st) SKM_sk_pop(X509V3_EXT_METHOD, (st))
 # define sk_X509V3_EXT_METHOD_sort(st) SKM_sk_sort(X509V3_EXT_METHOD, (st))
@@ -1696,6 +1812,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_X509_ALGOR_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(X509_ALGOR, (st), (cmp))
 # define sk_X509_ALGOR_dup(st) SKM_sk_dup(X509_ALGOR, st)
 # define sk_X509_ALGOR_pop_free(st, free_func) SKM_sk_pop_free(X509_ALGOR, (st), (free_func))
+# define sk_X509_ALGOR_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(X509_ALGOR, (st), (copy_func), (free_func))
 # define sk_X509_ALGOR_shift(st) SKM_sk_shift(X509_ALGOR, (st))
 # define sk_X509_ALGOR_pop(st) SKM_sk_pop(X509_ALGOR, (st))
 # define sk_X509_ALGOR_sort(st) SKM_sk_sort(X509_ALGOR, (st))
@@ -1717,6 +1834,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_X509_ATTRIBUTE_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(X509_ATTRIBUTE, (st), (cmp))
 # define sk_X509_ATTRIBUTE_dup(st) SKM_sk_dup(X509_ATTRIBUTE, st)
 # define sk_X509_ATTRIBUTE_pop_free(st, free_func) SKM_sk_pop_free(X509_ATTRIBUTE, (st), (free_func))
+# define sk_X509_ATTRIBUTE_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(X509_ATTRIBUTE, (st), (copy_func), (free_func))
 # define sk_X509_ATTRIBUTE_shift(st) SKM_sk_shift(X509_ATTRIBUTE, (st))
 # define sk_X509_ATTRIBUTE_pop(st) SKM_sk_pop(X509_ATTRIBUTE, (st))
 # define sk_X509_ATTRIBUTE_sort(st) SKM_sk_sort(X509_ATTRIBUTE, (st))
@@ -1738,6 +1856,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_X509_CRL_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(X509_CRL, (st), (cmp))
 # define sk_X509_CRL_dup(st) SKM_sk_dup(X509_CRL, st)
 # define sk_X509_CRL_pop_free(st, free_func) SKM_sk_pop_free(X509_CRL, (st), (free_func))
+# define sk_X509_CRL_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(X509_CRL, (st), (copy_func), (free_func))
 # define sk_X509_CRL_shift(st) SKM_sk_shift(X509_CRL, (st))
 # define sk_X509_CRL_pop(st) SKM_sk_pop(X509_CRL, (st))
 # define sk_X509_CRL_sort(st) SKM_sk_sort(X509_CRL, (st))
@@ -1759,6 +1878,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_X509_EXTENSION_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(X509_EXTENSION, (st), (cmp))
 # define sk_X509_EXTENSION_dup(st) SKM_sk_dup(X509_EXTENSION, st)
 # define sk_X509_EXTENSION_pop_free(st, free_func) SKM_sk_pop_free(X509_EXTENSION, (st), (free_func))
+# define sk_X509_EXTENSION_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(X509_EXTENSION, (st), (copy_func), (free_func))
 # define sk_X509_EXTENSION_shift(st) SKM_sk_shift(X509_EXTENSION, (st))
 # define sk_X509_EXTENSION_pop(st) SKM_sk_pop(X509_EXTENSION, (st))
 # define sk_X509_EXTENSION_sort(st) SKM_sk_sort(X509_EXTENSION, (st))
@@ -1780,6 +1900,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_X509_INFO_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(X509_INFO, (st), (cmp))
 # define sk_X509_INFO_dup(st) SKM_sk_dup(X509_INFO, st)
 # define sk_X509_INFO_pop_free(st, free_func) SKM_sk_pop_free(X509_INFO, (st), (free_func))
+# define sk_X509_INFO_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(X509_INFO, (st), (copy_func), (free_func))
 # define sk_X509_INFO_shift(st) SKM_sk_shift(X509_INFO, (st))
 # define sk_X509_INFO_pop(st) SKM_sk_pop(X509_INFO, (st))
 # define sk_X509_INFO_sort(st) SKM_sk_sort(X509_INFO, (st))
@@ -1801,6 +1922,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_X509_LOOKUP_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(X509_LOOKUP, (st), (cmp))
 # define sk_X509_LOOKUP_dup(st) SKM_sk_dup(X509_LOOKUP, st)
 # define sk_X509_LOOKUP_pop_free(st, free_func) SKM_sk_pop_free(X509_LOOKUP, (st), (free_func))
+# define sk_X509_LOOKUP_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(X509_LOOKUP, (st), (copy_func), (free_func))
 # define sk_X509_LOOKUP_shift(st) SKM_sk_shift(X509_LOOKUP, (st))
 # define sk_X509_LOOKUP_pop(st) SKM_sk_pop(X509_LOOKUP, (st))
 # define sk_X509_LOOKUP_sort(st) SKM_sk_sort(X509_LOOKUP, (st))
@@ -1822,6 +1944,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_X509_NAME_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(X509_NAME, (st), (cmp))
 # define sk_X509_NAME_dup(st) SKM_sk_dup(X509_NAME, st)
 # define sk_X509_NAME_pop_free(st, free_func) SKM_sk_pop_free(X509_NAME, (st), (free_func))
+# define sk_X509_NAME_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(X509_NAME, (st), (copy_func), (free_func))
 # define sk_X509_NAME_shift(st) SKM_sk_shift(X509_NAME, (st))
 # define sk_X509_NAME_pop(st) SKM_sk_pop(X509_NAME, (st))
 # define sk_X509_NAME_sort(st) SKM_sk_sort(X509_NAME, (st))
@@ -1843,6 +1966,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_X509_NAME_ENTRY_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(X509_NAME_ENTRY, (st), (cmp))
 # define sk_X509_NAME_ENTRY_dup(st) SKM_sk_dup(X509_NAME_ENTRY, st)
 # define sk_X509_NAME_ENTRY_pop_free(st, free_func) SKM_sk_pop_free(X509_NAME_ENTRY, (st), (free_func))
+# define sk_X509_NAME_ENTRY_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(X509_NAME_ENTRY, (st), (copy_func), (free_func))
 # define sk_X509_NAME_ENTRY_shift(st) SKM_sk_shift(X509_NAME_ENTRY, (st))
 # define sk_X509_NAME_ENTRY_pop(st) SKM_sk_pop(X509_NAME_ENTRY, (st))
 # define sk_X509_NAME_ENTRY_sort(st) SKM_sk_sort(X509_NAME_ENTRY, (st))
@@ -1864,6 +1988,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_X509_OBJECT_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(X509_OBJECT, (st), (cmp))
 # define sk_X509_OBJECT_dup(st) SKM_sk_dup(X509_OBJECT, st)
 # define sk_X509_OBJECT_pop_free(st, free_func) SKM_sk_pop_free(X509_OBJECT, (st), (free_func))
+# define sk_X509_OBJECT_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(X509_OBJECT, (st), (copy_func), (free_func))
 # define sk_X509_OBJECT_shift(st) SKM_sk_shift(X509_OBJECT, (st))
 # define sk_X509_OBJECT_pop(st) SKM_sk_pop(X509_OBJECT, (st))
 # define sk_X509_OBJECT_sort(st) SKM_sk_sort(X509_OBJECT, (st))
@@ -1885,6 +2010,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_X509_POLICY_DATA_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(X509_POLICY_DATA, (st), (cmp))
 # define sk_X509_POLICY_DATA_dup(st) SKM_sk_dup(X509_POLICY_DATA, st)
 # define sk_X509_POLICY_DATA_pop_free(st, free_func) SKM_sk_pop_free(X509_POLICY_DATA, (st), (free_func))
+# define sk_X509_POLICY_DATA_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(X509_POLICY_DATA, (st), (copy_func), (free_func))
 # define sk_X509_POLICY_DATA_shift(st) SKM_sk_shift(X509_POLICY_DATA, (st))
 # define sk_X509_POLICY_DATA_pop(st) SKM_sk_pop(X509_POLICY_DATA, (st))
 # define sk_X509_POLICY_DATA_sort(st) SKM_sk_sort(X509_POLICY_DATA, (st))
@@ -1906,6 +2032,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_X509_POLICY_NODE_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(X509_POLICY_NODE, (st), (cmp))
 # define sk_X509_POLICY_NODE_dup(st) SKM_sk_dup(X509_POLICY_NODE, st)
 # define sk_X509_POLICY_NODE_pop_free(st, free_func) SKM_sk_pop_free(X509_POLICY_NODE, (st), (free_func))
+# define sk_X509_POLICY_NODE_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(X509_POLICY_NODE, (st), (copy_func), (free_func))
 # define sk_X509_POLICY_NODE_shift(st) SKM_sk_shift(X509_POLICY_NODE, (st))
 # define sk_X509_POLICY_NODE_pop(st) SKM_sk_pop(X509_POLICY_NODE, (st))
 # define sk_X509_POLICY_NODE_sort(st) SKM_sk_sort(X509_POLICY_NODE, (st))
@@ -1927,6 +2054,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_X509_PURPOSE_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(X509_PURPOSE, (st), (cmp))
 # define sk_X509_PURPOSE_dup(st) SKM_sk_dup(X509_PURPOSE, st)
 # define sk_X509_PURPOSE_pop_free(st, free_func) SKM_sk_pop_free(X509_PURPOSE, (st), (free_func))
+# define sk_X509_PURPOSE_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(X509_PURPOSE, (st), (copy_func), (free_func))
 # define sk_X509_PURPOSE_shift(st) SKM_sk_shift(X509_PURPOSE, (st))
 # define sk_X509_PURPOSE_pop(st) SKM_sk_pop(X509_PURPOSE, (st))
 # define sk_X509_PURPOSE_sort(st) SKM_sk_sort(X509_PURPOSE, (st))
@@ -1948,6 +2076,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_X509_REVOKED_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(X509_REVOKED, (st), (cmp))
 # define sk_X509_REVOKED_dup(st) SKM_sk_dup(X509_REVOKED, st)
 # define sk_X509_REVOKED_pop_free(st, free_func) SKM_sk_pop_free(X509_REVOKED, (st), (free_func))
+# define sk_X509_REVOKED_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(X509_REVOKED, (st), (copy_func), (free_func))
 # define sk_X509_REVOKED_shift(st) SKM_sk_shift(X509_REVOKED, (st))
 # define sk_X509_REVOKED_pop(st) SKM_sk_pop(X509_REVOKED, (st))
 # define sk_X509_REVOKED_sort(st) SKM_sk_sort(X509_REVOKED, (st))
@@ -1969,6 +2098,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_X509_TRUST_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(X509_TRUST, (st), (cmp))
 # define sk_X509_TRUST_dup(st) SKM_sk_dup(X509_TRUST, st)
 # define sk_X509_TRUST_pop_free(st, free_func) SKM_sk_pop_free(X509_TRUST, (st), (free_func))
+# define sk_X509_TRUST_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(X509_TRUST, (st), (copy_func), (free_func))
 # define sk_X509_TRUST_shift(st) SKM_sk_shift(X509_TRUST, (st))
 # define sk_X509_TRUST_pop(st) SKM_sk_pop(X509_TRUST, (st))
 # define sk_X509_TRUST_sort(st) SKM_sk_sort(X509_TRUST, (st))
@@ -1990,6 +2120,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_X509_VERIFY_PARAM_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(X509_VERIFY_PARAM, (st), (cmp))
 # define sk_X509_VERIFY_PARAM_dup(st) SKM_sk_dup(X509_VERIFY_PARAM, st)
 # define sk_X509_VERIFY_PARAM_pop_free(st, free_func) SKM_sk_pop_free(X509_VERIFY_PARAM, (st), (free_func))
+# define sk_X509_VERIFY_PARAM_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(X509_VERIFY_PARAM, (st), (copy_func), (free_func))
 # define sk_X509_VERIFY_PARAM_shift(st) SKM_sk_shift(X509_VERIFY_PARAM, (st))
 # define sk_X509_VERIFY_PARAM_pop(st) SKM_sk_pop(X509_VERIFY_PARAM, (st))
 # define sk_X509_VERIFY_PARAM_sort(st) SKM_sk_sort(X509_VERIFY_PARAM, (st))
@@ -2011,6 +2142,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_nid_triple_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(nid_triple, (st), (cmp))
 # define sk_nid_triple_dup(st) SKM_sk_dup(nid_triple, st)
 # define sk_nid_triple_pop_free(st, free_func) SKM_sk_pop_free(nid_triple, (st), (free_func))
+# define sk_nid_triple_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(nid_triple, (st), (copy_func), (free_func))
 # define sk_nid_triple_shift(st) SKM_sk_shift(nid_triple, (st))
 # define sk_nid_triple_pop(st) SKM_sk_pop(nid_triple, (st))
 # define sk_nid_triple_sort(st) SKM_sk_sort(nid_triple, (st))
@@ -2032,6 +2164,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_void_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(void, (st), (cmp))
 # define sk_void_dup(st) SKM_sk_dup(void, st)
 # define sk_void_pop_free(st, free_func) SKM_sk_pop_free(void, (st), (free_func))
+# define sk_void_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(void, (st), (copy_func), (free_func))
 # define sk_void_shift(st) SKM_sk_shift(void, (st))
 # define sk_void_pop(st) SKM_sk_pop(void, (st))
 # define sk_void_sort(st) SKM_sk_sort(void, (st))
@@ -2042,7 +2175,8 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_OPENSSL_STRING_find(st, val) sk_find(CHECKED_STACK_OF(OPENSSL_STRING, st), CHECKED_PTR_OF(char, val))
 # define sk_OPENSSL_STRING_value(st, i) ((OPENSSL_STRING)sk_value(CHECKED_STACK_OF(OPENSSL_STRING, st), i))
 # define sk_OPENSSL_STRING_num(st) SKM_sk_num(OPENSSL_STRING, st)
-# define sk_OPENSSL_STRING_pop_free(st, free_func) sk_pop_free(CHECKED_STACK_OF(OPENSSL_STRING, st), CHECKED_SK_FREE_FUNC2(OPENSSL_STRING, free_func))
+# define sk_OPENSSL_STRING_pop_free(st, free_func) sk_pop_free(CHECKED_STACK_OF(OPENSSL_STRING, st), CHECKED_SK_FREE_FUNC(char, free_func))
+# define sk_OPENSSL_STRING_deep_copy(st, copy_func, free_func) ((STACK_OF(OPENSSL_STRING) *)sk_deep_copy(CHECKED_STACK_OF(OPENSSL_STRING, st), CHECKED_SK_COPY_FUNC(char, copy_func), CHECKED_SK_FREE_FUNC(char, free_func)))
 # define sk_OPENSSL_STRING_insert(st, val, i) sk_insert(CHECKED_STACK_OF(OPENSSL_STRING, st), CHECKED_PTR_OF(char, val), i)
 # define sk_OPENSSL_STRING_free(st) SKM_sk_free(OPENSSL_STRING, st)
 # define sk_OPENSSL_STRING_set(st, i, val) sk_set(CHECKED_STACK_OF(OPENSSL_STRING, st), i, CHECKED_PTR_OF(char, val))
@@ -2065,7 +2199,8 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_OPENSSL_BLOCK_find(st, val) sk_find(CHECKED_STACK_OF(OPENSSL_BLOCK, st), CHECKED_PTR_OF(void, val))
 # define sk_OPENSSL_BLOCK_value(st, i) ((OPENSSL_BLOCK)sk_value(CHECKED_STACK_OF(OPENSSL_BLOCK, st), i))
 # define sk_OPENSSL_BLOCK_num(st) SKM_sk_num(OPENSSL_BLOCK, st)
-# define sk_OPENSSL_BLOCK_pop_free(st, free_func) sk_pop_free(CHECKED_STACK_OF(OPENSSL_BLOCK, st), CHECKED_SK_FREE_FUNC2(OPENSSL_BLOCK, free_func))
+# define sk_OPENSSL_BLOCK_pop_free(st, free_func) sk_pop_free(CHECKED_STACK_OF(OPENSSL_BLOCK, st), CHECKED_SK_FREE_FUNC(void, free_func))
+# define sk_OPENSSL_BLOCK_deep_copy(st, copy_func, free_func) ((STACK_OF(OPENSSL_BLOCK) *)sk_deep_copy(CHECKED_STACK_OF(OPENSSL_BLOCK, st), CHECKED_SK_COPY_FUNC(void, copy_func), CHECKED_SK_FREE_FUNC(void, free_func)))
 # define sk_OPENSSL_BLOCK_insert(st, val, i) sk_insert(CHECKED_STACK_OF(OPENSSL_BLOCK, st), CHECKED_PTR_OF(void, val), i)
 # define sk_OPENSSL_BLOCK_free(st) SKM_sk_free(OPENSSL_BLOCK, st)
 # define sk_OPENSSL_BLOCK_set(st, i, val) sk_set(CHECKED_STACK_OF(OPENSSL_BLOCK, st), i, CHECKED_PTR_OF(void, val))
@@ -2088,7 +2223,8 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void)
 # define sk_OPENSSL_PSTRING_find(st, val) sk_find(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_PTR_OF(OPENSSL_STRING, val))
 # define sk_OPENSSL_PSTRING_value(st, i) ((OPENSSL_PSTRING)sk_value(CHECKED_STACK_OF(OPENSSL_PSTRING, st), i))
 # define sk_OPENSSL_PSTRING_num(st) SKM_sk_num(OPENSSL_PSTRING, st)
-# define sk_OPENSSL_PSTRING_pop_free(st, free_func) sk_pop_free(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_SK_FREE_FUNC2(OPENSSL_PSTRING, free_func))
+# define sk_OPENSSL_PSTRING_pop_free(st, free_func) sk_pop_free(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_SK_FREE_FUNC(OPENSSL_STRING, free_func))
+# define sk_OPENSSL_PSTRING_deep_copy(st, copy_func, free_func) ((STACK_OF(OPENSSL_PSTRING) *)sk_deep_copy(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_SK_COPY_FUNC(OPENSSL_STRING, copy_func), CHECKED_SK_FREE_FUNC(OPENSSL_STRING, free_func)))
 # define sk_OPENSSL_PSTRING_insert(st, val, i) sk_insert(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_PTR_OF(OPENSSL_STRING, val), i)
 # define sk_OPENSSL_PSTRING_free(st) SKM_sk_free(OPENSSL_PSTRING, st)
 # define sk_OPENSSL_PSTRING_set(st, i, val) sk_set(CHECKED_STACK_OF(OPENSSL_PSTRING, st), i, CHECKED_PTR_OF(OPENSSL_STRING, val))
index 331f907..de437ac 100644 (file)
@@ -115,6 +115,40 @@ _STACK *sk_dup(_STACK *sk)
     return (NULL);
 }
 
+_STACK *sk_deep_copy(_STACK *sk, void *(*copy_func) (void *),
+                     void (*free_func) (void *))
+{
+    _STACK *ret;
+    int i;
+
+    if ((ret = OPENSSL_malloc(sizeof(_STACK))) == NULL)
+        return ret;
+    ret->comp = sk->comp;
+    ret->sorted = sk->sorted;
+    ret->num = sk->num;
+    ret->num_alloc = sk->num > MIN_NODES ? sk->num : MIN_NODES;
+    ret->data = OPENSSL_malloc(sizeof(char *) * ret->num_alloc);
+    if (ret->data == NULL) {
+        OPENSSL_free(ret);
+        return NULL;
+    }
+    for (i = 0; i < ret->num_alloc; i++)
+        ret->data[i] = NULL;
+
+    for (i = 0; i < ret->num; ++i) {
+        if (sk->data[i] == NULL)
+            continue;
+        if ((ret->data[i] = copy_func(sk->data[i])) == NULL) {
+            while (--i >= 0)
+                if (ret->data[i] != NULL)
+                    free_func(ret->data[i]);
+            sk_free(ret);
+            return NULL;
+        }
+    }
+    return ret;
+}
+
 _STACK *sk_new_null(void)
 {
     return sk_new((int (*)(const void *, const void *))0);
index 8d6e939..eb07216 100644 (file)
@@ -83,6 +83,7 @@ _STACK *sk_new(int (*cmp) (const void *, const void *));
 _STACK *sk_new_null(void);
 void sk_free(_STACK *);
 void sk_pop_free(_STACK *st, void (*func) (void *));
+_STACK *sk_deep_copy(_STACK *, void *(*)(void *), void (*)(void *));
 int sk_insert(_STACK *sk, void *data, int where);
 void *sk_delete(_STACK *st, int loc);
 void *sk_delete_ptr(_STACK *st, void *p);
index 2eadf7f..239fa4f 100644 (file)
 #  undef CRYPTO_get_locked_mem_ex_functions
 #  define CRYPTO_get_locked_mem_ex_functions      CRYPTO_get_locked_mem_ex_funcs
 
-/* Hack some long SSL names */
+/* Hack some long SSL/TLS names */
 #  undef SSL_CTX_set_default_verify_paths
 #  define SSL_CTX_set_default_verify_paths        SSL_CTX_set_def_verify_paths
 #  undef SSL_get_ex_data_X509_STORE_CTX_idx
 #  define SSL_CTX_set_default_passwd_cb_userdata  SSL_CTX_set_def_passwd_cb_ud
 #  undef SSL_COMP_get_compression_methods
 #  define SSL_COMP_get_compression_methods        SSL_COMP_get_compress_methods
+#  undef SSL_COMP_set0_compression_methods
+#  define SSL_COMP_set0_compression_methods       SSL_COMP_set0_compress_methods
+#  undef SSL_COMP_free_compression_methods
+#  define SSL_COMP_free_compression_methods       SSL_COMP_free_compress_methods
 #  undef ssl_add_clienthello_renegotiate_ext
 #  define ssl_add_clienthello_renegotiate_ext     ssl_add_clienthello_reneg_ext
 #  undef ssl_add_serverhello_renegotiate_ext
 #  define SSL_CTX_set_next_protos_advertised_cb   SSL_CTX_set_next_protos_adv_cb
 #  undef SSL_CTX_set_next_proto_select_cb
 #  define SSL_CTX_set_next_proto_select_cb        SSL_CTX_set_next_proto_sel_cb
+
+#  undef tls1_send_server_supplemental_data
+#  define tls1_send_server_supplemental_data      tls1_send_server_suppl_data
+#  undef tls1_send_client_supplemental_data
+#  define tls1_send_client_supplemental_data      tls1_send_client_suppl_data
+#  undef tls1_get_server_supplemental_data
+#  define tls1_get_server_supplemental_data       tls1_get_server_suppl_data
+#  undef tls1_get_client_supplemental_data
+#  define tls1_get_client_supplemental_data       tls1_get_client_suppl_data
+
 #  undef ssl3_cbc_record_digest_supported
 #  define ssl3_cbc_record_digest_supported        ssl3_cbc_record_digest_support
 #  undef ssl_check_clienthello_tlsext_late
 #  undef ssl_check_clienthello_tlsext_early
 #  define ssl_check_clienthello_tlsext_early      ssl_check_clihello_tlsext_early
 
-/* Hack some long ENGINE names */
+/* Hack some RSA long names */
+#  undef RSA_padding_check_PKCS1_OAEP_mgf1
+#  define RSA_padding_check_PKCS1_OAEP_mgf1       RSA_pad_check_PKCS1_OAEP_mgf1
+
+/* Hack some ENGINE long names */
 #  undef ENGINE_get_default_BN_mod_exp_crt
 #  define ENGINE_get_default_BN_mod_exp_crt       ENGINE_get_def_BN_mod_exp_crt
 #  undef ENGINE_set_default_BN_mod_exp_crt
 #  define CMS_OriginatorIdentifierOrKey_it        CMS_OriginatorIdOrKey_it
 #  undef cms_SignerIdentifier_get0_signer_id
 #  define cms_SignerIdentifier_get0_signer_id     cms_SignerId_get0_signer_id
+#  undef CMS_RecipientInfo_kari_get0_orig_id
+#  define CMS_RecipientInfo_kari_get0_orig_id     CMS_RecipInfo_kari_get0_orig_id
+#  undef CMS_RecipientInfo_kari_get0_reks
+#  define CMS_RecipientInfo_kari_get0_reks        CMS_RecipInfo_kari_get0_reks
+#  undef CMS_RecipientEncryptedKey_cert_cmp
+#  define CMS_RecipientEncryptedKey_cert_cmp      CMS_RecipEncryptedKey_cert_cmp
+#  undef CMS_RecipientInfo_kari_set0_pkey
+#  define CMS_RecipientInfo_kari_set0_pkey        CMS_RecipInfo_kari_set0_pkey
+#  undef CMS_RecipientEncryptedKey_get0_id
+#  define CMS_RecipientEncryptedKey_get0_id       CMS_RecipEncryptedKey_get0_id
+#  undef CMS_RecipientInfo_kari_orig_id_cmp
+#  define CMS_RecipientInfo_kari_orig_id_cmp      CMS_RecipInfo_kari_orig_id_cmp
 
 /* Hack some long DTLS1 names */
 #  undef dtls1_retransmit_buffered_messages
index 031d872..db6ce32 100644 (file)
@@ -238,7 +238,6 @@ int TS_RESP_CTX_set_def_policy(TS_RESP_CTX *ctx, ASN1_OBJECT *def_policy)
 
 int TS_RESP_CTX_set_certs(TS_RESP_CTX *ctx, STACK_OF(X509) *certs)
 {
-    int i;
 
     if (ctx->certs) {
         sk_X509_pop_free(ctx->certs, X509_free);
@@ -246,14 +245,10 @@ int TS_RESP_CTX_set_certs(TS_RESP_CTX *ctx, STACK_OF(X509) *certs)
     }
     if (!certs)
         return 1;
-    if (!(ctx->certs = sk_X509_dup(certs))) {
+    if (!(ctx->certs = X509_chain_up_ref(certs))) {
         TSerr(TS_F_TS_RESP_CTX_SET_CERTS, ERR_R_MALLOC_FAILURE);
         return 0;
     }
-    for (i = 0; i < sk_X509_num(ctx->certs); ++i) {
-        X509 *cert = sk_X509_value(ctx->certs, i);
-        CRYPTO_add(&cert->references, +1, CRYPTO_LOCK_X509);
-    }
 
     return 1;
 }
index 32b4d99..3ce765d 100644 (file)
@@ -637,7 +637,7 @@ static int TS_compute_imprint(BIO *data, TS_TST_INFO *tst_info,
     X509_ALGOR_free(*md_alg);
     OPENSSL_free(*imprint);
     *imprint_len = 0;
-    *imprint = NULL;
+    *imprint = 0;
     return 0;
 }
 
index 829ea86..5d66276 100644 (file)
 # elif !defined(OPENSSL_SYS_VMS) \
        && !defined(OPENSSL_SYS_MSDOS) \
        && !defined(OPENSSL_SYS_MACINTOSH_CLASSIC) \
-       && !defined(MAC_OS_GUSI_SOURCE) \
+       && !defined(MAC_OS_GUSI_SOURCE) \
        && !defined(OPENSSL_SYS_VXWORKS) \
        && !defined(OPENSSL_SYS_NETWARE)
 #  define TERMIOS
index cb2381c..c584e5b 100644 (file)
@@ -118,34 +118,36 @@ $tbl="ebp";
        &movq   (@mm[0],&QWP(2048*$SCALE,$tbl,"esi",8));        # rc[r]
        &mov    ("eax",&DWP(0,"esp"));
        &mov    ("ebx",&DWP(4,"esp"));
+       &movz   ("ecx",&LB("eax"));
+       &movz   ("edx",&HB("eax"));
 for($i=0;$i<8;$i++) {
     my $func = ($i==0)? \&movq : \&pxor;
-       &movb   (&LB("ecx"),&LB("eax"));
-       &movb   (&LB("edx"),&HB("eax"));
+       &shr    ("eax",16);
        &scale  ("esi","ecx");
+       &movz   ("ecx",&LB("eax"));
        &scale  ("edi","edx");
-       &shr    ("eax",16);
+       &movz   ("edx",&HB("eax"));
        &pxor   (@mm[0],&QWP(&row(0),$tbl,"esi",8));
        &$func  (@mm[1],&QWP(&row(1),$tbl,"edi",8));
-       &movb   (&LB("ecx"),&LB("eax"));
-       &movb   (&LB("edx"),&HB("eax"));
        &mov    ("eax",&DWP(($i+1)*8,"esp"));
        &scale  ("esi","ecx");
+       &movz   ("ecx",&LB("ebx"));
        &scale  ("edi","edx");
+       &movz   ("edx",&HB("ebx"));
        &$func  (@mm[2],&QWP(&row(2),$tbl,"esi",8));
        &$func  (@mm[3],&QWP(&row(3),$tbl,"edi",8));
-       &movb   (&LB("ecx"),&LB("ebx"));
-       &movb   (&LB("edx"),&HB("ebx"));
+       &shr    ("ebx",16);
        &scale  ("esi","ecx");
+       &movz   ("ecx",&LB("ebx"));
        &scale  ("edi","edx");
-       &shr    ("ebx",16);
+       &movz   ("edx",&HB("ebx"));
        &$func  (@mm[4],&QWP(&row(4),$tbl,"esi",8));
        &$func  (@mm[5],&QWP(&row(5),$tbl,"edi",8));
-       &movb   (&LB("ecx"),&LB("ebx"));
-       &movb   (&LB("edx"),&HB("ebx"));
        &mov    ("ebx",&DWP(($i+1)*8+4,"esp"));
        &scale  ("esi","ecx");
+       &movz   ("ecx",&LB("eax"));
        &scale  ("edi","edx");
+       &movz   ("edx",&HB("eax"));
        &$func  (@mm[6],&QWP(&row(6),$tbl,"esi",8));
        &$func  (@mm[7],&QWP(&row(7),$tbl,"edi",8));
     push(@mm,shift(@mm));
@@ -154,32 +156,32 @@ for($i=0;$i<8;$i++) {
        for($i=0;$i<8;$i++) { &movq(&QWP($i*8,"esp"),@mm[$i]); }    # K=L
 
 for($i=0;$i<8;$i++) {
-       &movb   (&LB("ecx"),&LB("eax"));
-       &movb   (&LB("edx"),&HB("eax"));
+       &shr    ("eax",16);
        &scale  ("esi","ecx");
+       &movz   ("ecx",&LB("eax"));
        &scale  ("edi","edx");
-       &shr    ("eax",16);
+       &movz   ("edx",&HB("eax"));
        &pxor   (@mm[0],&QWP(&row(0),$tbl,"esi",8));
        &pxor   (@mm[1],&QWP(&row(1),$tbl,"edi",8));
-       &movb   (&LB("ecx"),&LB("eax"));
-       &movb   (&LB("edx"),&HB("eax"));
        &mov    ("eax",&DWP(64+($i+1)*8,"esp"))         if ($i<7);
        &scale  ("esi","ecx");
+       &movz   ("ecx",&LB("ebx"));
        &scale  ("edi","edx");
+       &movz   ("edx",&HB("ebx"));
        &pxor   (@mm[2],&QWP(&row(2),$tbl,"esi",8));
        &pxor   (@mm[3],&QWP(&row(3),$tbl,"edi",8));
-       &movb   (&LB("ecx"),&LB("ebx"));
-       &movb   (&LB("edx"),&HB("ebx"));
+       &shr    ("ebx",16);
        &scale  ("esi","ecx");
+       &movz   ("ecx",&LB("ebx"));
        &scale  ("edi","edx");
-       &shr    ("ebx",16);
+       &movz   ("edx",&HB("ebx"));
        &pxor   (@mm[4],&QWP(&row(4),$tbl,"esi",8));
        &pxor   (@mm[5],&QWP(&row(5),$tbl,"edi",8));
-       &movb   (&LB("ecx"),&LB("ebx"));
-       &movb   (&LB("edx"),&HB("ebx"));
        &mov    ("ebx",&DWP(64+($i+1)*8+4,"esp"))       if ($i<7);
        &scale  ("esi","ecx");
+       &movz   ("ecx",&LB("eax"));
        &scale  ("edi","edx");
+       &movz   ("edx",&HB("eax"));
        &pxor   (@mm[6],&QWP(&row(6),$tbl,"esi",8));
        &pxor   (@mm[7],&QWP(&row(7),$tbl,"edi",8));
     push(@mm,shift(@mm));
index 24b2ff6..5a3bdbc 100644 (file)
@@ -91,41 +91,44 @@ for($i=0;$i<8;$i++) { $code.="mov @mm[$i],64+$i*8(%rsp)\n"; }       # S=L
 $code.=<<___;
        xor     %rsi,%rsi
        mov     %rsi,24(%rbx)           # zero round counter
+       jmp     .Lround
 .align 16
 .Lround:
        mov     4096(%rbp,%rsi,8),@mm[0]        # rc[r]
        mov     0(%rsp),%eax
        mov     4(%rsp),%ebx
+       movz    %al,%ecx
+       movz    %ah,%edx
 ___
 for($i=0;$i<8;$i++) {
     my $func = ($i==0)? "mov" : "xor";
     $code.=<<___;
-       mov     %al,%cl
-       mov     %ah,%dl
+       shr     \$16,%eax
        lea     (%rcx,%rcx),%rsi
+       movz    %al,%ecx
        lea     (%rdx,%rdx),%rdi
-       shr     \$16,%eax
+       movz    %ah,%edx
        xor     0(%rbp,%rsi,8),@mm[0]
        $func   7(%rbp,%rdi,8),@mm[1]
-       mov     %al,%cl
-       mov     %ah,%dl
        mov     $i*8+8(%rsp),%eax               # ($i+1)*8
        lea     (%rcx,%rcx),%rsi
+       movz    %bl,%ecx
        lea     (%rdx,%rdx),%rdi
+       movz    %bh,%edx
        $func   6(%rbp,%rsi,8),@mm[2]
        $func   5(%rbp,%rdi,8),@mm[3]
-       mov     %bl,%cl
-       mov     %bh,%dl
+       shr     \$16,%ebx
        lea     (%rcx,%rcx),%rsi
+       movz    %bl,%ecx
        lea     (%rdx,%rdx),%rdi
-       shr     \$16,%ebx
+       movz    %bh,%edx
        $func   4(%rbp,%rsi,8),@mm[4]
        $func   3(%rbp,%rdi,8),@mm[5]
-       mov     %bl,%cl
-       mov     %bh,%dl
        mov     $i*8+8+4(%rsp),%ebx             # ($i+1)*8+4
        lea     (%rcx,%rcx),%rsi
+       movz    %al,%ecx
        lea     (%rdx,%rdx),%rdi
+       movz    %ah,%edx
        $func   2(%rbp,%rsi,8),@mm[6]
        $func   1(%rbp,%rdi,8),@mm[7]
 ___
@@ -134,32 +137,32 @@ ___
 for($i=0;$i<8;$i++) { $code.="mov @mm[$i],$i*8(%rsp)\n"; }     # K=L
 for($i=0;$i<8;$i++) {
     $code.=<<___;
-       mov     %al,%cl
-       mov     %ah,%dl
+       shr     \$16,%eax
        lea     (%rcx,%rcx),%rsi
+       movz    %al,%ecx
        lea     (%rdx,%rdx),%rdi
-       shr     \$16,%eax
+       movz    %ah,%edx
        xor     0(%rbp,%rsi,8),@mm[0]
        xor     7(%rbp,%rdi,8),@mm[1]
-       mov     %al,%cl
-       mov     %ah,%dl
        `"mov   64+$i*8+8(%rsp),%eax"   if($i<7);`      # 64+($i+1)*8
        lea     (%rcx,%rcx),%rsi
+       movz    %bl,%ecx
        lea     (%rdx,%rdx),%rdi
+       movz    %bh,%edx
        xor     6(%rbp,%rsi,8),@mm[2]
        xor     5(%rbp,%rdi,8),@mm[3]
-       mov     %bl,%cl
-       mov     %bh,%dl
+       shr     \$16,%ebx
        lea     (%rcx,%rcx),%rsi
+       movz    %bl,%ecx
        lea     (%rdx,%rdx),%rdi
-       shr     \$16,%ebx
+       movz    %bh,%edx
        xor     4(%rbp,%rsi,8),@mm[4]
        xor     3(%rbp,%rdi,8),@mm[5]
-       mov     %bl,%cl
-       mov     %bh,%dl
        `"mov   64+$i*8+8+4(%rsp),%ebx" if($i<7);`      # 64+($i+1)*8+4
        lea     (%rcx,%rcx),%rsi
+       movz    %al,%ecx
        lea     (%rdx,%rdx),%rdi
+       movz    %ah,%edx
        xor     2(%rbp,%rsi,8),@mm[6]
        xor     1(%rbp,%rdi,8),@mm[7]
 ___
index aac3ece..bf197a1 100644 (file)
@@ -33,7 +33,7 @@ LIBOBJ= x509_def.o x509_d2.o x509_r2x.o x509_cmp.o \
 SRC= $(LIBSRC)
 
 EXHEADER= x509.h x509_vfy.h
-HEADER=        $(EXHEADER)
+HEADER=        $(EXHEADER) vpm_int.h
 
 ALL=    $(GENERAL) $(SRC) $(HEADER)
 
@@ -314,7 +314,7 @@ x509_vfy.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
 x509_vfy.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
 x509_vfy.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h
 x509_vfy.o: ../../include/openssl/x509_vfy.h ../../include/openssl/x509v3.h
-x509_vfy.o: ../cryptlib.h x509_vfy.c
+x509_vfy.o: ../cryptlib.h vpm_int.h x509_vfy.c
 x509_vpm.o: ../../e_os.h ../../include/openssl/asn1.h
 x509_vpm.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h
 x509_vpm.o: ../../include/openssl/conf.h ../../include/openssl/crypto.h
@@ -328,7 +328,7 @@ x509_vpm.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
 x509_vpm.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
 x509_vpm.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h
 x509_vpm.o: ../../include/openssl/x509_vfy.h ../../include/openssl/x509v3.h
-x509_vpm.o: ../cryptlib.h x509_vpm.c
+x509_vpm.o: ../cryptlib.h vpm_int.h x509_vpm.c
 x509cset.o: ../../e_os.h ../../include/openssl/asn1.h
 x509cset.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h
 x509cset.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
@@ -395,15 +395,17 @@ x509type.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
 x509type.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h
 x509type.o: ../../include/openssl/x509_vfy.h ../cryptlib.h x509type.c
 x_all.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.h
-x_all.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
-x_all.o: ../../include/openssl/dsa.h ../../include/openssl/e_os2.h
-x_all.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h
-x_all.o: ../../include/openssl/ecdsa.h ../../include/openssl/err.h
-x_all.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h
-x_all.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
+x_all.o: ../../include/openssl/buffer.h ../../include/openssl/conf.h
+x_all.o: ../../include/openssl/crypto.h ../../include/openssl/dsa.h
+x_all.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
+x_all.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h
+x_all.o: ../../include/openssl/err.h ../../include/openssl/evp.h
+x_all.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
+x_all.o: ../../include/openssl/objects.h ../../include/openssl/ocsp.h
 x_all.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
 x_all.o: ../../include/openssl/ossl_typ.h ../../include/openssl/pkcs7.h
 x_all.o: ../../include/openssl/rsa.h ../../include/openssl/safestack.h
 x_all.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
 x_all.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h
-x_all.o: ../../include/openssl/x509_vfy.h ../cryptlib.h x_all.c
+x_all.o: ../../include/openssl/x509_vfy.h ../../include/openssl/x509v3.h
+x_all.o: ../cryptlib.h x_all.c
index a1e41f2..08509f0 100644 (file)
@@ -168,8 +168,7 @@ static int test_alt_chains_cert_forgery(void)
 
     i = X509_verify_cert(sctx);
 
-    if(i == 0 && X509_STORE_CTX_get_error(sctx)
-                 == X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT) {
+    if(i == 0 && X509_STORE_CTX_get_error(sctx) == X509_V_ERR_INVALID_CA) {
         /* This is the result we were expecting: Test passed */
         ret = 1;
     }
diff --git a/crypto/x509/vpm_int.h b/crypto/x509/vpm_int.h
new file mode 100644 (file)
index 0000000..9c55def
--- /dev/null
@@ -0,0 +1,70 @@
+/* vpm_int.h */
+/*
+ * Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL project
+ * 2013.
+ */
+/* ====================================================================
+ * Copyright (c) 2013 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    licensing@OpenSSL.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This product includes cryptographic software written by Eric Young
+ * (eay@cryptsoft.com).  This product includes software written by Tim
+ * Hudson (tjh@cryptsoft.com).
+ *
+ */
+
+/* internal only structure to hold additional X509_VERIFY_PARAM data */
+
+struct X509_VERIFY_PARAM_ID_st {
+    STACK_OF(OPENSSL_STRING) *hosts; /* Set of acceptable names */
+    unsigned int hostflags;     /* Flags to control matching features */
+    char *peername;             /* Matching hostname in peer certificate */
+    char *email;                /* If not NULL email address to match */
+    size_t emaillen;
+    unsigned char *ip;          /* If not NULL IP address to match */
+    size_t iplen;               /* Length of IP address */
+};
index a491174..99337b8 100644 (file)
@@ -361,6 +361,7 @@ typedef struct x509_cert_pair_st {
 # define X509_FLAG_NO_SIGDUMP            (1L << 9)
 # define X509_FLAG_NO_AUX                (1L << 10)
 # define X509_FLAG_NO_ATTRIBUTES         (1L << 11)
+# define X509_FLAG_NO_IDS                (1L << 12)
 
 /* Flags specific to X509_NAME_print_ex() */
 
@@ -645,10 +646,12 @@ int X509_signature_print(BIO *bp, X509_ALGOR *alg, ASN1_STRING *sig);
 
 int X509_sign(X509 *x, EVP_PKEY *pkey, const EVP_MD *md);
 int X509_sign_ctx(X509 *x, EVP_MD_CTX *ctx);
+int X509_http_nbio(OCSP_REQ_CTX *rctx, X509 **pcert);
 int X509_REQ_sign(X509_REQ *x, EVP_PKEY *pkey, const EVP_MD *md);
 int X509_REQ_sign_ctx(X509_REQ *x, EVP_MD_CTX *ctx);
 int X509_CRL_sign(X509_CRL *x, EVP_PKEY *pkey, const EVP_MD *md);
 int X509_CRL_sign_ctx(X509_CRL *x, EVP_MD_CTX *ctx);
+int X509_CRL_http_nbio(OCSP_REQ_CTX *rctx, X509_CRL **pcrl);
 int NETSCAPE_SPKI_sign(NETSCAPE_SPKI *x, EVP_PKEY *pkey, const EVP_MD *md);
 
 int X509_pubkey_digest(const X509 *data, const EVP_MD *type,
@@ -745,6 +748,7 @@ X509 *X509_dup(X509 *x509);
 X509_ATTRIBUTE *X509_ATTRIBUTE_dup(X509_ATTRIBUTE *xa);
 X509_EXTENSION *X509_EXTENSION_dup(X509_EXTENSION *ex);
 X509_CRL *X509_CRL_dup(X509_CRL *crl);
+X509_REVOKED *X509_REVOKED_dup(X509_REVOKED *rev);
 X509_REQ *X509_REQ_dup(X509_REQ *req);
 X509_ALGOR *X509_ALGOR_dup(X509_ALGOR *xn);
 int X509_ALGOR_set0(X509_ALGOR *alg, ASN1_OBJECT *aobj, int ptype,
@@ -828,6 +832,12 @@ void *X509_get_ex_data(X509 *r, int idx);
 int i2d_X509_AUX(X509 *a, unsigned char **pp);
 X509 *d2i_X509_AUX(X509 **a, const unsigned char **pp, long length);
 
+int i2d_re_X509_tbs(X509 *x, unsigned char **pp);
+
+void X509_get0_signature(ASN1_BIT_STRING **psig, X509_ALGOR **palg,
+                         const X509 *x);
+int X509_get_signature_nid(const X509 *x);
+
 int X509_alias_set1(X509 *x, unsigned char *name, int len);
 int X509_keyid_set1(X509 *x, unsigned char *id, int len);
 unsigned char *X509_alias_get0(X509 *x, int *len);
@@ -939,9 +949,17 @@ int X509_CRL_sort(X509_CRL *crl);
 int X509_REVOKED_set_serialNumber(X509_REVOKED *x, ASN1_INTEGER *serial);
 int X509_REVOKED_set_revocationDate(X509_REVOKED *r, ASN1_TIME *tm);
 
+X509_CRL *X509_CRL_diff(X509_CRL *base, X509_CRL *newer,
+                        EVP_PKEY *skey, const EVP_MD *md, unsigned int flags);
+
 int X509_REQ_check_private_key(X509_REQ *x509, EVP_PKEY *pkey);
 
 int X509_check_private_key(X509 *x509, EVP_PKEY *pkey);
+int X509_chain_check_suiteb(int *perror_depth,
+                            X509 *x, STACK_OF(X509) *chain,
+                            unsigned long flags);
+int X509_CRL_check_suiteb(X509_CRL *crl, EVP_PKEY *pk, unsigned long flags);
+STACK_OF(X509) *X509_chain_up_ref(STACK_OF(X509) *chain);
 
 int X509_issuer_and_serial_cmp(const X509 *a, const X509 *b);
 unsigned long X509_issuer_and_serial_hash(X509 *a);
@@ -1236,6 +1254,7 @@ void ERR_load_X509_strings(void);
 # define X509_F_X509_ATTRIBUTE_GET0_DATA                  139
 # define X509_F_X509_ATTRIBUTE_SET1_DATA                  138
 # define X509_F_X509_CHECK_PRIVATE_KEY                    128
+# define X509_F_X509_CRL_DIFF                             105
 # define X509_F_X509_CRL_PRINT_FP                         147
 # define X509_F_X509_EXTENSION_CREATE_BY_NID              108
 # define X509_F_X509_EXTENSION_CREATE_BY_OBJ              109
@@ -1268,20 +1287,27 @@ void ERR_load_X509_strings(void);
 # define X509_F_X509_VERIFY_CERT                          127
 
 /* Reason codes. */
+# define X509_R_AKID_MISMATCH                             110
 # define X509_R_BAD_X509_FILETYPE                         100
 # define X509_R_BASE64_DECODE_ERROR                       118
 # define X509_R_CANT_CHECK_DH_KEY                         114
 # define X509_R_CERT_ALREADY_IN_HASH_TABLE                101
+# define X509_R_CRL_ALREADY_DELTA                         127
+# define X509_R_CRL_VERIFY_FAILURE                        131
 # define X509_R_ERR_ASN1_LIB                              102
+# define X509_R_IDP_MISMATCH                              128
 # define X509_R_INVALID_DIRECTORY                         113
 # define X509_R_INVALID_FIELD_NAME                        119
 # define X509_R_INVALID_TRUST                             123
+# define X509_R_ISSUER_MISMATCH                           129
 # define X509_R_KEY_TYPE_MISMATCH                         115
 # define X509_R_KEY_VALUES_MISMATCH                       116
 # define X509_R_LOADING_CERT_DIR                          103
 # define X509_R_LOADING_DEFAULTS                          104
 # define X509_R_METHOD_NOT_SUPPORTED                      124
+# define X509_R_NEWER_CRL_NOT_NEWER                       132
 # define X509_R_NO_CERT_SET_FOR_US_TO_VERIFY              105
+# define X509_R_NO_CRL_NUMBER                             130
 # define X509_R_PUBLIC_KEY_DECODE_ERROR                   125
 # define X509_R_PUBLIC_KEY_ENCODE_ERROR                   126
 # define X509_R_SHOULD_RETRY                              106
index 3c5b717..49c71b9 100644 (file)
@@ -179,11 +179,23 @@ unsigned long X509_subject_name_hash_old(X509 *x)
  */
 int X509_cmp(const X509 *a, const X509 *b)
 {
+    int rv;
     /* ensure hash is valid */
     X509_check_purpose((X509 *)a, -1, 0);
     X509_check_purpose((X509 *)b, -1, 0);
 
-    return memcmp(a->sha1_hash, b->sha1_hash, SHA_DIGEST_LENGTH);
+    rv = memcmp(a->sha1_hash, b->sha1_hash, SHA_DIGEST_LENGTH);
+    if (rv)
+        return rv;
+    /* Check for match against stored encoding too */
+    if (!a->cert_info->enc.modified && !b->cert_info->enc.modified) {
+        rv = (int)(a->cert_info->enc.len - b->cert_info->enc.len);
+        if (rv)
+            return rv;
+        return memcmp(a->cert_info->enc.enc, b->cert_info->enc.enc,
+                      a->cert_info->enc.len);
+    }
+    return rv;
 }
 #endif
 
@@ -339,3 +351,148 @@ int X509_check_private_key(X509 *x, EVP_PKEY *k)
         return 1;
     return 0;
 }
+
+/*
+ * Check a suite B algorithm is permitted: pass in a public key and the NID
+ * of its signature (or 0 if no signature). The pflags is a pointer to a
+ * flags field which must contain the suite B verification flags.
+ */
+
+#ifndef OPENSSL_NO_EC
+
+static int check_suite_b(EVP_PKEY *pkey, int sign_nid, unsigned long *pflags)
+{
+    const EC_GROUP *grp = NULL;
+    int curve_nid;
+    if (pkey && pkey->type == EVP_PKEY_EC)
+        grp = EC_KEY_get0_group(pkey->pkey.ec);
+    if (!grp)
+        return X509_V_ERR_SUITE_B_INVALID_ALGORITHM;
+    curve_nid = EC_GROUP_get_curve_name(grp);
+    /* Check curve is consistent with LOS */
+    if (curve_nid == NID_secp384r1) { /* P-384 */
+        /*
+         * Check signature algorithm is consistent with curve.
+         */
+        if (sign_nid != -1 && sign_nid != NID_ecdsa_with_SHA384)
+            return X509_V_ERR_SUITE_B_INVALID_SIGNATURE_ALGORITHM;
+        if (!(*pflags & X509_V_FLAG_SUITEB_192_LOS))
+            return X509_V_ERR_SUITE_B_LOS_NOT_ALLOWED;
+        /* If we encounter P-384 we cannot use P-256 later */
+        *pflags &= ~X509_V_FLAG_SUITEB_128_LOS_ONLY;
+    } else if (curve_nid == NID_X9_62_prime256v1) { /* P-256 */
+        if (sign_nid != -1 && sign_nid != NID_ecdsa_with_SHA256)
+            return X509_V_ERR_SUITE_B_INVALID_SIGNATURE_ALGORITHM;
+        if (!(*pflags & X509_V_FLAG_SUITEB_128_LOS_ONLY))
+            return X509_V_ERR_SUITE_B_LOS_NOT_ALLOWED;
+    } else
+        return X509_V_ERR_SUITE_B_INVALID_CURVE;
+
+    return X509_V_OK;
+}
+
+int X509_chain_check_suiteb(int *perror_depth, X509 *x, STACK_OF(X509) *chain,
+                            unsigned long flags)
+{
+    int rv, i, sign_nid;
+    EVP_PKEY *pk = NULL;
+    unsigned long tflags;
+    if (!(flags & X509_V_FLAG_SUITEB_128_LOS))
+        return X509_V_OK;
+    tflags = flags;
+    /* If no EE certificate passed in must be first in chain */
+    if (x == NULL) {
+        x = sk_X509_value(chain, 0);
+        i = 1;
+    } else
+        i = 0;
+
+    if (X509_get_version(x) != 2) {
+        rv = X509_V_ERR_SUITE_B_INVALID_VERSION;
+        /* Correct error depth */
+        i = 0;
+        goto end;
+    }
+
+    pk = X509_get_pubkey(x);
+    /* Check EE key only */
+    rv = check_suite_b(pk, -1, &tflags);
+    if (rv != X509_V_OK) {
+        /* Correct error depth */
+        i = 0;
+        goto end;
+    }
+    for (; i < sk_X509_num(chain); i++) {
+        sign_nid = X509_get_signature_nid(x);
+        x = sk_X509_value(chain, i);
+        if (X509_get_version(x) != 2) {
+            rv = X509_V_ERR_SUITE_B_INVALID_VERSION;
+            goto end;
+        }
+        EVP_PKEY_free(pk);
+        pk = X509_get_pubkey(x);
+        rv = check_suite_b(pk, sign_nid, &tflags);
+        if (rv != X509_V_OK)
+            goto end;
+    }
+
+    /* Final check: root CA signature */
+    rv = check_suite_b(pk, X509_get_signature_nid(x), &tflags);
+ end:
+    if (pk)
+        EVP_PKEY_free(pk);
+    if (rv != X509_V_OK) {
+        /* Invalid signature or LOS errors are for previous cert */
+        if ((rv == X509_V_ERR_SUITE_B_INVALID_SIGNATURE_ALGORITHM
+             || rv == X509_V_ERR_SUITE_B_LOS_NOT_ALLOWED) && i)
+            i--;
+        /*
+         * If we have LOS error and flags changed then we are signing P-384
+         * with P-256. Use more meaninggul error.
+         */
+        if (rv == X509_V_ERR_SUITE_B_LOS_NOT_ALLOWED && flags != tflags)
+            rv = X509_V_ERR_SUITE_B_CANNOT_SIGN_P_384_WITH_P_256;
+        if (perror_depth)
+            *perror_depth = i;
+    }
+    return rv;
+}
+
+int X509_CRL_check_suiteb(X509_CRL *crl, EVP_PKEY *pk, unsigned long flags)
+{
+    int sign_nid;
+    if (!(flags & X509_V_FLAG_SUITEB_128_LOS))
+        return X509_V_OK;
+    sign_nid = OBJ_obj2nid(crl->crl->sig_alg->algorithm);
+    return check_suite_b(pk, sign_nid, &flags);
+}
+
+#else
+int X509_chain_check_suiteb(int *perror_depth, X509 *x, STACK_OF(X509) *chain,
+                            unsigned long flags)
+{
+    return 0;
+}
+
+int X509_CRL_check_suiteb(X509_CRL *crl, EVP_PKEY *pk, unsigned long flags)
+{
+    return 0;
+}
+
+#endif
+/*
+ * Not strictly speaking an "up_ref" as a STACK doesn't have a reference
+ * count but it has the same effect by duping the STACK and upping the ref of
+ * each X509 structure.
+ */
+STACK_OF(X509) *X509_chain_up_ref(STACK_OF(X509) *chain)
+{
+    STACK_OF(X509) *ret;
+    int i;
+    ret = sk_X509_dup(chain);
+    for (i = 0; i < sk_X509_num(ret); i++) {
+        X509 *x = sk_X509_value(ret, i);
+        CRYPTO_add(&x->references, 1, CRYPTO_LOCK_X509);
+    }
+    return ret;
+}
index 61a19f7..43cde18 100644 (file)
@@ -1,6 +1,6 @@
 /* crypto/x509/x509_err.c */
 /* ====================================================================
- * Copyright (c) 1999-2006 The OpenSSL Project.  All rights reserved.
+ * Copyright (c) 1999-2012 The OpenSSL Project.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -88,6 +88,7 @@ static ERR_STRING_DATA X509_str_functs[] = {
     {ERR_FUNC(X509_F_X509_ATTRIBUTE_GET0_DATA), "X509_ATTRIBUTE_get0_data"},
     {ERR_FUNC(X509_F_X509_ATTRIBUTE_SET1_DATA), "X509_ATTRIBUTE_set1_data"},
     {ERR_FUNC(X509_F_X509_CHECK_PRIVATE_KEY), "X509_check_private_key"},
+    {ERR_FUNC(X509_F_X509_CRL_DIFF), "X509_CRL_diff"},
     {ERR_FUNC(X509_F_X509_CRL_PRINT_FP), "X509_CRL_print_fp"},
     {ERR_FUNC(X509_F_X509_EXTENSION_CREATE_BY_NID),
      "X509_EXTENSION_create_by_NID"},
@@ -131,22 +132,29 @@ static ERR_STRING_DATA X509_str_functs[] = {
 };
 
 static ERR_STRING_DATA X509_str_reasons[] = {
+    {ERR_REASON(X509_R_AKID_MISMATCH), "akid mismatch"},
     {ERR_REASON(X509_R_BAD_X509_FILETYPE), "bad x509 filetype"},
     {ERR_REASON(X509_R_BASE64_DECODE_ERROR), "base64 decode error"},
     {ERR_REASON(X509_R_CANT_CHECK_DH_KEY), "cant check dh key"},
     {ERR_REASON(X509_R_CERT_ALREADY_IN_HASH_TABLE),
      "cert already in hash table"},
+    {ERR_REASON(X509_R_CRL_ALREADY_DELTA), "crl already delta"},
+    {ERR_REASON(X509_R_CRL_VERIFY_FAILURE), "crl verify failure"},
     {ERR_REASON(X509_R_ERR_ASN1_LIB), "err asn1 lib"},
+    {ERR_REASON(X509_R_IDP_MISMATCH), "idp mismatch"},
     {ERR_REASON(X509_R_INVALID_DIRECTORY), "invalid directory"},
     {ERR_REASON(X509_R_INVALID_FIELD_NAME), "invalid field name"},
     {ERR_REASON(X509_R_INVALID_TRUST), "invalid trust"},
+    {ERR_REASON(X509_R_ISSUER_MISMATCH), "issuer mismatch"},
     {ERR_REASON(X509_R_KEY_TYPE_MISMATCH), "key type mismatch"},
     {ERR_REASON(X509_R_KEY_VALUES_MISMATCH), "key values mismatch"},
     {ERR_REASON(X509_R_LOADING_CERT_DIR), "loading cert dir"},
     {ERR_REASON(X509_R_LOADING_DEFAULTS), "loading defaults"},
     {ERR_REASON(X509_R_METHOD_NOT_SUPPORTED), "method not supported"},
+    {ERR_REASON(X509_R_NEWER_CRL_NOT_NEWER), "newer crl not newer"},
     {ERR_REASON(X509_R_NO_CERT_SET_FOR_US_TO_VERIFY),
      "no cert set for us to verify"},
+    {ERR_REASON(X509_R_NO_CRL_NUMBER), "no crl number"},
     {ERR_REASON(X509_R_PUBLIC_KEY_DECODE_ERROR), "public key decode error"},
     {ERR_REASON(X509_R_PUBLIC_KEY_ENCODE_ERROR), "public key encode error"},
     {ERR_REASON(X509_R_SHOULD_RETRY), "should retry"},
index 8415d1d..b0d6539 100644 (file)
@@ -238,6 +238,19 @@ void X509_STORE_free(X509_STORE *vfy)
     if (vfy == NULL)
         return;
 
+    i = CRYPTO_add(&vfy->references, -1, CRYPTO_LOCK_X509_STORE);
+#ifdef REF_PRINT
+    REF_PRINT("X509_STORE", vfy);
+#endif
+    if (i > 0)
+        return;
+#ifdef REF_CHECK
+    if (i < 0) {
+        fprintf(stderr, "X509_STORE_free, bad reference count\n");
+        abort();                /* ok */
+    }
+#endif
+
     sk = vfy->get_cert_methods;
     for (i = 0; i < sk_X509_LOOKUP_num(sk); i++) {
         lu = sk_X509_LOOKUP_value(sk, i);
@@ -681,6 +694,19 @@ void X509_STORE_set_verify_cb(X509_STORE *ctx,
     ctx->verify_cb = verify_cb;
 }
 
+void X509_STORE_set_lookup_crls_cb(X509_STORE *ctx,
+                                   STACK_OF(X509_CRL) *(*cb) (X509_STORE_CTX
+                                                              *ctx,
+                                                              X509_NAME *nm))
+{
+    ctx->lookup_crls = cb;
+}
+
+X509_STORE *X509_STORE_CTX_get0_store(X509_STORE_CTX *ctx)
+{
+    return ctx->ctx;
+}
+
 IMPLEMENT_STACK_OF(X509_LOOKUP)
 
 IMPLEMENT_STACK_OF(X509_OBJECT)
index 4645777..5b802bd 100644 (file)
@@ -67,6 +67,11 @@ int X509_set_version(X509 *x, long version)
 {
     if (x == NULL)
         return (0);
+    if (version == 0) {
+        M_ASN1_INTEGER_free(x->cert_info->version);
+        x->cert_info->version = NULL;
+        return (1);
+    }
     if (x->cert_info->version == NULL) {
         if ((x->cert_info->version = M_ASN1_INTEGER_new()) == NULL)
             return (0);
index 7e44479..11e0763 100644 (file)
@@ -119,6 +119,14 @@ int X509_check_trust(X509 *x, int id, int flags)
     int idx;
     if (id == -1)
         return 1;
+    /* We get this as a default value */
+    if (id == 0) {
+        int rv;
+        rv = obj_trust(NID_anyExtendedKeyUsage, x, 0);
+        if (rv != X509_TRUST_UNTRUSTED)
+            return rv;
+        return trust_compat(NULL, x, 0);
+    }
     idx = X509_TRUST_get_by_id(id);
     if (idx == -1)
         return default_trust(id, x, flags);
index d834180..3d46d3f 100644 (file)
@@ -184,6 +184,26 @@ const char *X509_verify_cert_error_string(long n)
     case X509_V_ERR_CRL_PATH_VALIDATION_ERROR:
         return ("CRL path validation error");
 
+    case X509_V_ERR_SUITE_B_INVALID_VERSION:
+        return ("Suite B: certificate version invalid");
+    case X509_V_ERR_SUITE_B_INVALID_ALGORITHM:
+        return ("Suite B: invalid public key algorithm");
+    case X509_V_ERR_SUITE_B_INVALID_CURVE:
+        return ("Suite B: invalid ECC curve");
+    case X509_V_ERR_SUITE_B_INVALID_SIGNATURE_ALGORITHM:
+        return ("Suite B: invalid signature algorithm");
+    case X509_V_ERR_SUITE_B_LOS_NOT_ALLOWED:
+        return ("Suite B: curve not allowed for this LOS");
+    case X509_V_ERR_SUITE_B_CANNOT_SIGN_P_384_WITH_P_256:
+        return ("Suite B: cannot sign P-384 with P-256");
+
+    case X509_V_ERR_HOSTNAME_MISMATCH:
+        return ("Hostname mismatch");
+    case X509_V_ERR_EMAIL_MISMATCH:
+        return ("Email address mismatch");
+    case X509_V_ERR_IP_ADDRESS_MISMATCH:
+        return ("IP address mismatch");
+
     default:
         BIO_snprintf(buf, sizeof buf, "error number %ld", n);
         return (buf);
index 7009ae6..a2f1dbe 100644 (file)
@@ -69,6 +69,7 @@
 #include <openssl/x509.h>
 #include <openssl/x509v3.h>
 #include <openssl/objects.h>
+#include "vpm_int.h"
 
 /* CRL score values */
 
@@ -113,6 +114,7 @@ static int check_issued(X509_STORE_CTX *ctx, X509 *x, X509 *issuer);
 static X509 *find_issuer(X509_STORE_CTX *ctx, STACK_OF(X509) *sk, X509 *x);
 static int check_chain_extensions(X509_STORE_CTX *ctx);
 static int check_name_constraints(X509_STORE_CTX *ctx);
+static int check_id(X509_STORE_CTX *ctx);
 static int check_trust(X509_STORE_CTX *ctx);
 static int check_revocation(X509_STORE_CTX *ctx);
 static int check_cert(X509_STORE_CTX *ctx);
@@ -148,6 +150,40 @@ static int x509_subject_cmp(X509 **a, X509 **b)
     return X509_subject_name_cmp(*a, *b);
 }
 #endif
+/* Return 1 is a certificate is self signed */
+static int cert_self_signed(X509 *x)
+{
+    X509_check_purpose(x, -1, 0);
+    if (x->ex_flags & EXFLAG_SS)
+        return 1;
+    else
+        return 0;
+}
+
+/* Given a certificate try and find an exact match in the store */
+
+static X509 *lookup_cert_match(X509_STORE_CTX *ctx, X509 *x)
+{
+    STACK_OF(X509) *certs;
+    X509 *xtmp = NULL;
+    int i;
+    /* Lookup all certs with matching subject name */
+    certs = ctx->lookup_certs(ctx, X509_get_subject_name(x));
+    if (certs == NULL)
+        return NULL;
+    /* Look for exact match */
+    for (i = 0; i < sk_X509_num(certs); i++) {
+        xtmp = sk_X509_value(certs, i);
+        if (!X509_cmp(xtmp, x))
+            break;
+    }
+    if (i < sk_X509_num(certs))
+        CRYPTO_add(&xtmp->references, 1, CRYPTO_LOCK_X509);
+    else
+        xtmp = NULL;
+    sk_X509_pop_free(certs, X509_free);
+    return xtmp;
+}
 
 int X509_verify_cert(X509_STORE_CTX *ctx)
 {
@@ -205,8 +241,24 @@ int X509_verify_cert(X509_STORE_CTX *ctx)
                                  * later. */
 
         /* If we are self signed, we break */
-        if (ctx->check_issued(ctx, x, x))
+        if (cert_self_signed(x))
             break;
+        /*
+         * If asked see if we can find issuer in trusted store first
+         */
+        if (ctx->param->flags & X509_V_FLAG_TRUSTED_FIRST) {
+            ok = ctx->get_issuer(&xtmp, ctx, x);
+            if (ok < 0)
+                return ok;
+            /*
+             * If successful for now free up cert so it will be picked up
+             * again later.
+             */
+            if (ok > 0) {
+                X509_free(xtmp);
+                break;
+            }
+        }
 
         /* If we were passed a cert chain, use it first */
         if (ctx->untrusted != NULL) {
@@ -244,7 +296,7 @@ int X509_verify_cert(X509_STORE_CTX *ctx)
          */
         i = sk_X509_num(ctx->chain);
         x = sk_X509_value(ctx->chain, i - 1);
-        if (ctx->check_issued(ctx, x, x)) {
+        if (cert_self_signed(x)) {
             /* we have a self signed certificate */
             if (sk_X509_num(ctx->chain) == 1) {
                 /*
@@ -290,9 +342,10 @@ int X509_verify_cert(X509_STORE_CTX *ctx)
             if (depth < num)
                 break;
             /* If we are self signed, we break */
-            if (ctx->check_issued(ctx, x, x))
+            if (cert_self_signed(x))
                 break;
             ok = ctx->get_issuer(&xtmp, ctx, x);
+
             if (ok < 0)
                 return ok;
             if (ok == 0)
@@ -306,14 +359,22 @@ int X509_verify_cert(X509_STORE_CTX *ctx)
             num++;
         }
 
+        /* we now have our chain, lets check it... */
+        i = check_trust(ctx);
+
+        /* If explicitly rejected error */
+        if (i == X509_TRUST_REJECTED)
+            goto end;
         /*
-         * If we haven't got a least one certificate from our store then check
-         * if there is an alternative chain that could be used.  We only do this
-         * if the user hasn't switched off alternate chain checking
+         * If it's not explicitly trusted then check if there is an alternative
+         * chain that could be used. We only do this if we haven't already
+         * checked via TRUSTED_FIRST and the user hasn't switched off alternate
+         * chain checking
          */
         retry = 0;
-        if (num == ctx->last_untrusted &&
-            !(ctx->param->flags & X509_V_FLAG_NO_ALT_CHAINS)) {
+        if (i != X509_TRUST_TRUSTED
+            && !(ctx->param->flags & X509_V_FLAG_TRUSTED_FIRST)
+            && !(ctx->param->flags & X509_V_FLAG_NO_ALT_CHAINS)) {
             while (j-- > 1) {
                 xtmp2 = sk_X509_value(ctx->chain, j - 1);
                 ok = ctx->get_issuer(&xtmp, ctx, xtmp2);
@@ -343,8 +404,12 @@ int X509_verify_cert(X509_STORE_CTX *ctx)
         }
     } while (retry);
 
-    /* Is last certificate looked up self signed? */
-    if (!ctx->check_issued(ctx, x, x)) {
+    /*
+     * If not explicitly trusted then indicate error unless it's a single
+     * self signed certificate in which case we've indicated an error already
+     * and set bad_chain == 1
+     */
+    if (i != X509_TRUST_TRUSTED && !bad_chain) {
         if ((chain_ss == NULL) || !ctx->check_issued(ctx, x, chain_ss)) {
             if (ctx->last_untrusted >= num)
                 ctx->error = X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT_LOCALLY;
@@ -381,10 +446,7 @@ int X509_verify_cert(X509_STORE_CTX *ctx)
     if (!ok)
         goto end;
 
-    /* The chain extensions are OK: check trust */
-
-    if (param->trust > 0)
-        ok = check_trust(ctx);
+    ok = check_id(ctx);
 
     if (!ok)
         goto end;
@@ -401,6 +463,16 @@ int X509_verify_cert(X509_STORE_CTX *ctx)
     if (!ok)
         goto end;
 
+    i = X509_chain_check_suiteb(&ctx->error_depth, NULL, ctx->chain,
+                                ctx->param->flags);
+    if (i != X509_V_OK) {
+        ctx->error = i;
+        ctx->current_cert = sk_X509_value(ctx->chain, ctx->error_depth);
+        ok = cb(0, ctx);
+        if (!ok)
+            goto end;
+    }
+
     /* At this point, we have a chain and need to verify it */
     if (ctx->verify != NULL)
         ok = ctx->verify(ctx);
@@ -467,7 +539,6 @@ static int check_issued(X509_STORE_CTX *ctx, X509 *x, X509 *issuer)
     ctx->current_cert = x;
     ctx->current_issuer = issuer;
     return ctx->verify_cb(0, ctx);
-    return 0;
 }
 
 /* Alternative lookup method: look from a STACK stored in other_ctx */
@@ -667,30 +738,97 @@ static int check_name_constraints(X509_STORE_CTX *ctx)
     return 1;
 }
 
-static int check_trust(X509_STORE_CTX *ctx)
+static int check_id_error(X509_STORE_CTX *ctx, int errcode)
 {
-#ifdef OPENSSL_NO_CHAIN_VERIFY
+    ctx->error = errcode;
+    ctx->current_cert = ctx->cert;
+    ctx->error_depth = 0;
+    return ctx->verify_cb(0, ctx);
+}
+
+static int check_hosts(X509 *x, X509_VERIFY_PARAM_ID *id)
+{
+    int i;
+    int n = sk_OPENSSL_STRING_num(id->hosts);
+    char *name;
+
+    for (i = 0; i < n; ++i) {
+        name = sk_OPENSSL_STRING_value(id->hosts, i);
+        if (X509_check_host(x, name, 0, id->hostflags, &id->peername) > 0)
+            return 1;
+    }
+    return n == 0;
+}
+
+static int check_id(X509_STORE_CTX *ctx)
+{
+    X509_VERIFY_PARAM *vpm = ctx->param;
+    X509_VERIFY_PARAM_ID *id = vpm->id;
+    X509 *x = ctx->cert;
+    if (id->hosts && check_hosts(x, id) <= 0) {
+        if (!check_id_error(ctx, X509_V_ERR_HOSTNAME_MISMATCH))
+            return 0;
+    }
+    if (id->email && X509_check_email(x, id->email, id->emaillen, 0) <= 0) {
+        if (!check_id_error(ctx, X509_V_ERR_EMAIL_MISMATCH))
+            return 0;
+    }
+    if (id->ip && X509_check_ip(x, id->ip, id->iplen, 0) <= 0) {
+        if (!check_id_error(ctx, X509_V_ERR_IP_ADDRESS_MISMATCH))
+            return 0;
+    }
     return 1;
-#else
+}
+
+static int check_trust(X509_STORE_CTX *ctx)
+{
     int i, ok;
-    X509 *x;
+    X509 *x = NULL;
     int (*cb) (int xok, X509_STORE_CTX *xctx);
     cb = ctx->verify_cb;
-/* For now just check the last certificate in the chain */
-    i = sk_X509_num(ctx->chain) - 1;
-    x = sk_X509_value(ctx->chain, i);
-    ok = X509_check_trust(x, ctx->param->trust, 0);
-    if (ok == X509_TRUST_TRUSTED)
-        return 1;
-    ctx->error_depth = i;
-    ctx->current_cert = x;
-    if (ok == X509_TRUST_REJECTED)
-        ctx->error = X509_V_ERR_CERT_REJECTED;
-    else
-        ctx->error = X509_V_ERR_CERT_UNTRUSTED;
-    ok = cb(0, ctx);
-    return ok;
-#endif
+    /* Check all trusted certificates in chain */
+    for (i = ctx->last_untrusted; i < sk_X509_num(ctx->chain); i++) {
+        x = sk_X509_value(ctx->chain, i);
+        ok = X509_check_trust(x, ctx->param->trust, 0);
+        /* If explicitly trusted return trusted */
+        if (ok == X509_TRUST_TRUSTED)
+            return X509_TRUST_TRUSTED;
+        /*
+         * If explicitly rejected notify callback and reject if not
+         * overridden.
+         */
+        if (ok == X509_TRUST_REJECTED) {
+            ctx->error_depth = i;
+            ctx->current_cert = x;
+            ctx->error = X509_V_ERR_CERT_REJECTED;
+            ok = cb(0, ctx);
+            if (!ok)
+                return X509_TRUST_REJECTED;
+        }
+    }
+    /*
+     * If we accept partial chains and have at least one trusted certificate
+     * return success.
+     */
+    if (ctx->param->flags & X509_V_FLAG_PARTIAL_CHAIN) {
+        X509 *mx;
+        if (ctx->last_untrusted < sk_X509_num(ctx->chain))
+            return X509_TRUST_TRUSTED;
+        x = sk_X509_value(ctx->chain, 0);
+        mx = lookup_cert_match(ctx, x);
+        if (mx) {
+            (void)sk_X509_set(ctx->chain, 0, mx);
+            X509_free(x);
+            ctx->last_untrusted = 0;
+            return X509_TRUST_TRUSTED;
+        }
+    }
+
+    /*
+     * If no trusted certs in chain at all return untrusted and allow
+     * standard (no issuer cert) etc errors to be indicated.
+     */
+    return X509_TRUST_UNTRUSTED;
 }
 
 static int check_revocation(X509_STORE_CTX *ctx)
@@ -1409,6 +1547,14 @@ static int check_crl(X509_STORE_CTX *ctx, X509_CRL *crl)
             if (!ok)
                 goto err;
         } else {
+            int rv;
+            rv = X509_CRL_check_suiteb(crl, ikey, ctx->param->flags);
+            if (rv != X509_V_OK) {
+                ctx->error = rv;
+                ok = ctx->verify_cb(0, ctx);
+                if (!ok)
+                    goto err;
+            }
             /* Verify CRL signature */
             if (X509_CRL_verify(crl, ikey) <= 0) {
                 ctx->error = X509_V_ERR_CRL_SIGNATURE_FAILURE;
@@ -1565,6 +1711,10 @@ static int internal_verify(X509_STORE_CTX *ctx)
     if (ctx->check_issued(ctx, xi, xi))
         xs = xi;
     else {
+        if (ctx->param->flags & X509_V_FLAG_PARTIAL_CHAIN) {
+            xs = xi;
+            goto check_cert;
+        }
         if (n <= 0) {
             ctx->error = X509_V_ERR_UNABLE_TO_VERIFY_LEAF_SIGNATURE;
             ctx->current_cert = xi;
@@ -1610,6 +1760,7 @@ static int internal_verify(X509_STORE_CTX *ctx)
 
         xs->valid = 1;
 
+ check_cert:
         ok = check_cert_time(ctx, xs);
         if (!ok)
             goto end;
@@ -1824,6 +1975,114 @@ int X509_get_pubkey_parameters(EVP_PKEY *pkey, STACK_OF(X509) *chain)
     return 1;
 }
 
+/* Make a delta CRL as the diff between two full CRLs */
+
+X509_CRL *X509_CRL_diff(X509_CRL *base, X509_CRL *newer,
+                        EVP_PKEY *skey, const EVP_MD *md, unsigned int flags)
+{
+    X509_CRL *crl = NULL;
+    int i;
+    STACK_OF(X509_REVOKED) *revs = NULL;
+    /* CRLs can't be delta already */
+    if (base->base_crl_number || newer->base_crl_number) {
+        X509err(X509_F_X509_CRL_DIFF, X509_R_CRL_ALREADY_DELTA);
+        return NULL;
+    }
+    /* Base and new CRL must have a CRL number */
+    if (!base->crl_number || !newer->crl_number) {
+        X509err(X509_F_X509_CRL_DIFF, X509_R_NO_CRL_NUMBER);
+        return NULL;
+    }
+    /* Issuer names must match */
+    if (X509_NAME_cmp(X509_CRL_get_issuer(base), X509_CRL_get_issuer(newer))) {
+        X509err(X509_F_X509_CRL_DIFF, X509_R_ISSUER_MISMATCH);
+        return NULL;
+    }
+    /* AKID and IDP must match */
+    if (!crl_extension_match(base, newer, NID_authority_key_identifier)) {
+        X509err(X509_F_X509_CRL_DIFF, X509_R_AKID_MISMATCH);
+        return NULL;
+    }
+    if (!crl_extension_match(base, newer, NID_issuing_distribution_point)) {
+        X509err(X509_F_X509_CRL_DIFF, X509_R_IDP_MISMATCH);
+        return NULL;
+    }
+    /* Newer CRL number must exceed full CRL number */
+    if (ASN1_INTEGER_cmp(newer->crl_number, base->crl_number) <= 0) {
+        X509err(X509_F_X509_CRL_DIFF, X509_R_NEWER_CRL_NOT_NEWER);
+        return NULL;
+    }
+    /* CRLs must verify */
+    if (skey && (X509_CRL_verify(base, skey) <= 0 ||
+                 X509_CRL_verify(newer, skey) <= 0)) {
+        X509err(X509_F_X509_CRL_DIFF, X509_R_CRL_VERIFY_FAILURE);
+        return NULL;
+    }
+    /* Create new CRL */
+    crl = X509_CRL_new();
+    if (!crl || !X509_CRL_set_version(crl, 1))
+        goto memerr;
+    /* Set issuer name */
+    if (!X509_CRL_set_issuer_name(crl, X509_CRL_get_issuer(newer)))
+        goto memerr;
+
+    if (!X509_CRL_set_lastUpdate(crl, X509_CRL_get_lastUpdate(newer)))
+        goto memerr;
+    if (!X509_CRL_set_nextUpdate(crl, X509_CRL_get_nextUpdate(newer)))
+        goto memerr;
+
+    /* Set base CRL number: must be critical */
+
+    if (!X509_CRL_add1_ext_i2d(crl, NID_delta_crl, base->crl_number, 1, 0))
+        goto memerr;
+
+    /*
+     * Copy extensions across from newest CRL to delta: this will set CRL
+     * number to correct value too.
+     */
+
+    for (i = 0; i < X509_CRL_get_ext_count(newer); i++) {
+        X509_EXTENSION *ext;
+        ext = X509_CRL_get_ext(newer, i);
+        if (!X509_CRL_add_ext(crl, ext, -1))
+            goto memerr;
+    }
+
+    /* Go through revoked entries, copying as needed */
+
+    revs = X509_CRL_get_REVOKED(newer);
+
+    for (i = 0; i < sk_X509_REVOKED_num(revs); i++) {
+        X509_REVOKED *rvn, *rvtmp;
+        rvn = sk_X509_REVOKED_value(revs, i);
+        /*
+         * Add only if not also in base. TODO: need something cleverer here
+         * for some more complex CRLs covering multiple CAs.
+         */
+        if (!X509_CRL_get0_by_serial(base, &rvtmp, rvn->serialNumber)) {
+            rvtmp = X509_REVOKED_dup(rvn);
+            if (!rvtmp)
+                goto memerr;
+            if (!X509_CRL_add0_revoked(crl, rvtmp)) {
+                X509_REVOKED_free(rvtmp);
+                goto memerr;
+            }
+        }
+    }
+    /* TODO: optionally prune deleted entries */
+
+    if (skey && md && !X509_CRL_sign(crl, skey, md))
+        goto memerr;
+
+    return crl;
+
+ memerr:
+    X509err(X509_F_X509_CRL_DIFF, ERR_R_MALLOC_FAILURE);
+    if (crl)
+        X509_CRL_free(crl);
+    return NULL;
+}
+
 int X509_STORE_CTX_get_ex_new_index(long argl, void *argp,
                                     CRYPTO_EX_new *new_func,
                                     CRYPTO_EX_dup *dup_func,
@@ -1874,16 +2133,9 @@ STACK_OF(X509) *X509_STORE_CTX_get_chain(X509_STORE_CTX *ctx)
 
 STACK_OF(X509) *X509_STORE_CTX_get1_chain(X509_STORE_CTX *ctx)
 {
-    int i;
-    X509 *x;
-    STACK_OF(X509) *chain;
-    if (!ctx->chain || !(chain = sk_X509_dup(ctx->chain)))
+    if (!ctx->chain)
         return NULL;
-    for (i = 0; i < sk_X509_num(chain); i++) {
-        x = sk_X509_value(chain, i);
-        CRYPTO_add(&x->references, 1, CRYPTO_LOCK_X509);
-    }
-    return chain;
+    return X509_chain_up_ref(ctx->chain);
 }
 
 X509 *X509_STORE_CTX_get0_current_issuer(X509_STORE_CTX *ctx)
index aacdf55..bd8613c 100644 (file)
@@ -156,6 +156,8 @@ typedef struct x509_lookup_method_st {
                          X509_OBJECT *ret);
 } X509_LOOKUP_METHOD;
 
+typedef struct X509_VERIFY_PARAM_ID_st X509_VERIFY_PARAM_ID;
+
 /*
  * This structure hold all parameters associated with a verify operation by
  * including an X509_VERIFY_PARAM structure in related structures the
@@ -171,6 +173,7 @@ typedef struct X509_VERIFY_PARAM_st {
     int trust;                  /* trust setting to check */
     int depth;                  /* Verify depth */
     STACK_OF(ASN1_OBJECT) *policies; /* Permissible policies */
+    X509_VERIFY_PARAM_ID *id;   /* opaque ID data */
 } X509_VERIFY_PARAM;
 
 DECLARE_STACK_OF(X509_VERIFY_PARAM)
@@ -370,6 +373,19 @@ void X509_STORE_CTX_set_depth(X509_STORE_CTX *ctx, int depth);
 # define         X509_V_ERR_UNSUPPORTED_NAME_SYNTAX              53
 # define         X509_V_ERR_CRL_PATH_VALIDATION_ERROR            54
 
+/* Suite B mode algorithm violation */
+# define         X509_V_ERR_SUITE_B_INVALID_VERSION              56
+# define         X509_V_ERR_SUITE_B_INVALID_ALGORITHM            57
+# define         X509_V_ERR_SUITE_B_INVALID_CURVE                58
+# define         X509_V_ERR_SUITE_B_INVALID_SIGNATURE_ALGORITHM  59
+# define         X509_V_ERR_SUITE_B_LOS_NOT_ALLOWED              60
+# define         X509_V_ERR_SUITE_B_CANNOT_SIGN_P_384_WITH_P_256 61
+
+/* Host, email and IP check errors */
+# define         X509_V_ERR_HOSTNAME_MISMATCH                    62
+# define         X509_V_ERR_EMAIL_MISMATCH                       63
+# define         X509_V_ERR_IP_ADDRESS_MISMATCH                  64
+
 /* The application is not happy */
 # define         X509_V_ERR_APPLICATION_VERIFICATION             50
 
@@ -405,10 +421,21 @@ void X509_STORE_CTX_set_depth(X509_STORE_CTX *ctx, int depth);
 # define X509_V_FLAG_USE_DELTAS                  0x2000
 /* Check selfsigned CA signature */
 # define X509_V_FLAG_CHECK_SS_SIGNATURE          0x4000
+/* Use trusted store first */
+# define X509_V_FLAG_TRUSTED_FIRST               0x8000
+/* Suite B 128 bit only mode: not normally used */
+# define X509_V_FLAG_SUITEB_128_LOS_ONLY         0x10000
+/* Suite B 192 bit only mode */
+# define X509_V_FLAG_SUITEB_192_LOS              0x20000
+/* Suite B 128 bit mode allowing 192 bit algorithms */
+# define X509_V_FLAG_SUITEB_128_LOS              0x30000
+
+/* Allow partial chains if at least one certificate is in trusted store */
+# define X509_V_FLAG_PARTIAL_CHAIN               0x80000
 /*
  * If the initial chain is not trusted, do not attempt to build an alternative
- * chain. Alternate chain checking was introduced in 1.0.1n/1.0.2b. Setting
- * this flag will force the behaviour to match that of previous versions.
+ * chain. Alternate chain checking was introduced in 1.0.2b. Setting this flag
+ * will force the behaviour to match that of previous versions.
  */
 # define X509_V_FLAG_NO_ALT_CHAINS               0x100000
 
@@ -445,6 +472,11 @@ int X509_STORE_set1_param(X509_STORE *ctx, X509_VERIFY_PARAM *pm);
 void X509_STORE_set_verify_cb(X509_STORE *ctx,
                               int (*verify_cb) (int, X509_STORE_CTX *));
 
+void X509_STORE_set_lookup_crls_cb(X509_STORE *ctx,
+                                   STACK_OF(X509_CRL) *(*cb) (X509_STORE_CTX
+                                                              *ctx,
+                                                              X509_NAME *nm));
+
 X509_STORE_CTX *X509_STORE_CTX_new(void);
 
 int X509_STORE_CTX_get1_issuer(X509 **issuer, X509_STORE_CTX *ctx, X509 *x);
@@ -455,6 +487,8 @@ int X509_STORE_CTX_init(X509_STORE_CTX *ctx, X509_STORE *store,
 void X509_STORE_CTX_trusted_stack(X509_STORE_CTX *ctx, STACK_OF(X509) *sk);
 void X509_STORE_CTX_cleanup(X509_STORE_CTX *ctx);
 
+X509_STORE *X509_STORE_CTX_get0_store(X509_STORE_CTX *ctx);
+
 X509_LOOKUP *X509_STORE_add_lookup(X509_STORE *v, X509_LOOKUP_METHOD *m);
 
 X509_LOOKUP_METHOD *X509_LOOKUP_hash_dir(void);
@@ -552,9 +586,27 @@ int X509_VERIFY_PARAM_add0_policy(X509_VERIFY_PARAM *param,
                                   ASN1_OBJECT *policy);
 int X509_VERIFY_PARAM_set1_policies(X509_VERIFY_PARAM *param,
                                     STACK_OF(ASN1_OBJECT) *policies);
+
+int X509_VERIFY_PARAM_set1_host(X509_VERIFY_PARAM *param,
+                                const char *name, size_t namelen);
+int X509_VERIFY_PARAM_add1_host(X509_VERIFY_PARAM *param,
+                                const char *name, size_t namelen);
+void X509_VERIFY_PARAM_set_hostflags(X509_VERIFY_PARAM *param,
+                                     unsigned int flags);
+char *X509_VERIFY_PARAM_get0_peername(X509_VERIFY_PARAM *);
+int X509_VERIFY_PARAM_set1_email(X509_VERIFY_PARAM *param,
+                                 const char *email, size_t emaillen);
+int X509_VERIFY_PARAM_set1_ip(X509_VERIFY_PARAM *param,
+                              const unsigned char *ip, size_t iplen);
+int X509_VERIFY_PARAM_set1_ip_asc(X509_VERIFY_PARAM *param,
+                                  const char *ipasc);
+
 int X509_VERIFY_PARAM_get_depth(const X509_VERIFY_PARAM *param);
+const char *X509_VERIFY_PARAM_get0_name(const X509_VERIFY_PARAM *param);
 
 int X509_VERIFY_PARAM_add0_table(X509_VERIFY_PARAM *param);
+int X509_VERIFY_PARAM_get_count(void);
+const X509_VERIFY_PARAM *X509_VERIFY_PARAM_get0(int id);
 const X509_VERIFY_PARAM *X509_VERIFY_PARAM_lookup(const char *name);
 void X509_VERIFY_PARAM_table_cleanup(void);
 
index 6b0bf8a..1ea0c69 100644 (file)
 #include <openssl/x509.h>
 #include <openssl/x509v3.h>
 
+#include "vpm_int.h"
+
 /* X509_VERIFY_PARAM functions */
 
+#define SET_HOST 0
+#define ADD_HOST 1
+
+static char *str_copy(const char *s)
+{
+    return OPENSSL_strdup(s);
+}
+
+static void str_free(char *s)
+{
+    OPENSSL_free(s);
+}
+
+#define string_stack_free(sk) sk_OPENSSL_STRING_pop_free(sk, str_free)
+
+static int int_x509_param_set_hosts(X509_VERIFY_PARAM_ID *id, int mode,
+                                    const char *name, size_t namelen)
+{
+    char *copy;
+
+    /*
+     * Refuse names with embedded NUL bytes, except perhaps as final byte.
+     * XXX: Do we need to push an error onto the error stack?
+     */
+    if (namelen == 0)
+        namelen = name ? strlen(name) : 0;
+    else if (name && memchr(name, '\0', namelen > 1 ? namelen - 1 : namelen))
+        return 0;
+    if (name && name[namelen - 1] == '\0')
+        --namelen;
+
+    if (mode == SET_HOST && id->hosts) {
+        string_stack_free(id->hosts);
+        id->hosts = NULL;
+    }
+    if (name == NULL || namelen == 0)
+        return 1;
+
+    copy = BUF_strndup(name, namelen);
+    if (copy == NULL)
+        return 0;
+
+    if (id->hosts == NULL &&
+        (id->hosts = sk_OPENSSL_STRING_new_null()) == NULL) {
+        OPENSSL_free(copy);
+        return 0;
+    }
+
+    if (!sk_OPENSSL_STRING_push(id->hosts, copy)) {
+        OPENSSL_free(copy);
+        if (sk_OPENSSL_STRING_num(id->hosts) == 0) {
+            sk_OPENSSL_STRING_free(id->hosts);
+            id->hosts = NULL;
+        }
+        return 0;
+    }
+
+    return 1;
+}
+
 static void x509_verify_param_zero(X509_VERIFY_PARAM *param)
 {
+    X509_VERIFY_PARAM_ID *paramid;
     if (!param)
         return;
     param->name = NULL;
@@ -85,15 +148,42 @@ static void x509_verify_param_zero(X509_VERIFY_PARAM *param)
         sk_ASN1_OBJECT_pop_free(param->policies, ASN1_OBJECT_free);
         param->policies = NULL;
     }
+    paramid = param->id;
+    if (paramid->hosts) {
+        string_stack_free(paramid->hosts);
+        paramid->hosts = NULL;
+    }
+    if (paramid->peername)
+        OPENSSL_free(paramid->peername);
+    if (paramid->email) {
+        OPENSSL_free(paramid->email);
+        paramid->email = NULL;
+        paramid->emaillen = 0;
+    }
+    if (paramid->ip) {
+        OPENSSL_free(paramid->ip);
+        paramid->ip = NULL;
+        paramid->iplen = 0;
+    }
+
 }
 
 X509_VERIFY_PARAM *X509_VERIFY_PARAM_new(void)
 {
     X509_VERIFY_PARAM *param;
-    param = OPENSSL_malloc(sizeof(X509_VERIFY_PARAM));
+    X509_VERIFY_PARAM_ID *paramid;
+
+    param = OPENSSL_malloc(sizeof *param);
     if (!param)
         return NULL;
-    memset(param, 0, sizeof(X509_VERIFY_PARAM));
+    paramid = OPENSSL_malloc(sizeof *paramid);
+    if (!paramid) {
+        OPENSSL_free(param);
+        return NULL;
+    }
+    memset(param, 0, sizeof *param);
+    memset(paramid, 0, sizeof *paramid);
+    param->id = paramid;
     x509_verify_param_zero(param);
     return param;
 }
@@ -103,6 +193,7 @@ void X509_VERIFY_PARAM_free(X509_VERIFY_PARAM *param)
     if (param == NULL)
         return;
     x509_verify_param_zero(param);
+    OPENSSL_free(param->id);
     OPENSSL_free(param);
 }
 
@@ -144,6 +235,11 @@ void X509_VERIFY_PARAM_free(X509_VERIFY_PARAM *param)
         (to_overwrite || \
                 ((src->field != def) && (to_default || (dest->field == def))))
 
+/* As above but for ID fields */
+
+#define test_x509_verify_param_copy_id(idf, def) \
+        test_x509_verify_param_copy(id->idf, def)
+
 /* Macro to test and copy a field if necessary */
 
 #define x509_verify_param_copy(field, def) \
@@ -155,8 +251,10 @@ int X509_VERIFY_PARAM_inherit(X509_VERIFY_PARAM *dest,
 {
     unsigned long inh_flags;
     int to_default, to_overwrite;
+    X509_VERIFY_PARAM_ID *id;
     if (!src)
         return 1;
+    id = src->id;
     inh_flags = dest->inh_flags | src->inh_flags;
 
     if (inh_flags & X509_VP_FLAG_ONCE)
@@ -197,6 +295,31 @@ int X509_VERIFY_PARAM_inherit(X509_VERIFY_PARAM *dest,
             return 0;
     }
 
+    /* Copy the host flags if and only if we're copying the host list */
+    if (test_x509_verify_param_copy_id(hosts, NULL)) {
+        if (dest->id->hosts) {
+            string_stack_free(dest->id->hosts);
+            dest->id->hosts = NULL;
+        }
+        if (id->hosts) {
+            dest->id->hosts =
+                sk_OPENSSL_STRING_deep_copy(id->hosts, str_copy, str_free);
+            if (dest->id->hosts == NULL)
+                return 0;
+            dest->id->hostflags = id->hostflags;
+        }
+    }
+
+    if (test_x509_verify_param_copy_id(email, NULL)) {
+        if (!X509_VERIFY_PARAM_set1_email(dest, id->email, id->emaillen))
+            return 0;
+    }
+
+    if (test_x509_verify_param_copy_id(ip, NULL)) {
+        if (!X509_VERIFY_PARAM_set1_ip(dest, id->ip, id->iplen))
+            return 0;
+    }
+
     return 1;
 }
 
@@ -211,6 +334,30 @@ int X509_VERIFY_PARAM_set1(X509_VERIFY_PARAM *to,
     return ret;
 }
 
+static int int_x509_param_set1(char **pdest, size_t *pdestlen,
+                               const char *src, size_t srclen)
+{
+    void *tmp;
+    if (src) {
+        if (srclen == 0) {
+            tmp = BUF_strdup(src);
+            srclen = strlen(src);
+        } else
+            tmp = BUF_memdup(src, srclen);
+        if (!tmp)
+            return 0;
+    } else {
+        tmp = NULL;
+        srclen = 0;
+    }
+    if (*pdest)
+        OPENSSL_free(*pdest);
+    *pdest = tmp;
+    if (pdestlen)
+        *pdestlen = srclen;
+    return 1;
+}
+
 int X509_VERIFY_PARAM_set1_name(X509_VERIFY_PARAM *param, const char *name)
 {
     if (param->name)
@@ -308,11 +455,70 @@ int X509_VERIFY_PARAM_set1_policies(X509_VERIFY_PARAM *param,
     return 1;
 }
 
+int X509_VERIFY_PARAM_set1_host(X509_VERIFY_PARAM *param,
+                                const char *name, size_t namelen)
+{
+    return int_x509_param_set_hosts(param->id, SET_HOST, name, namelen);
+}
+
+int X509_VERIFY_PARAM_add1_host(X509_VERIFY_PARAM *param,
+                                const char *name, size_t namelen)
+{
+    return int_x509_param_set_hosts(param->id, ADD_HOST, name, namelen);
+}
+
+void X509_VERIFY_PARAM_set_hostflags(X509_VERIFY_PARAM *param,
+                                     unsigned int flags)
+{
+    param->id->hostflags = flags;
+}
+
+char *X509_VERIFY_PARAM_get0_peername(X509_VERIFY_PARAM *param)
+{
+    return param->id->peername;
+}
+
+int X509_VERIFY_PARAM_set1_email(X509_VERIFY_PARAM *param,
+                                 const char *email, size_t emaillen)
+{
+    return int_x509_param_set1(&param->id->email, &param->id->emaillen,
+                               email, emaillen);
+}
+
+int X509_VERIFY_PARAM_set1_ip(X509_VERIFY_PARAM *param,
+                              const unsigned char *ip, size_t iplen)
+{
+    if (iplen != 0 && iplen != 4 && iplen != 16)
+        return 0;
+    return int_x509_param_set1((char **)&param->id->ip, &param->id->iplen,
+                               (char *)ip, iplen);
+}
+
+int X509_VERIFY_PARAM_set1_ip_asc(X509_VERIFY_PARAM *param, const char *ipasc)
+{
+    unsigned char ipout[16];
+    size_t iplen;
+
+    iplen = (size_t)a2i_ipadd(ipout, ipasc);
+    if (iplen == 0)
+        return 0;
+    return X509_VERIFY_PARAM_set1_ip(param, ipout, iplen);
+}
+
 int X509_VERIFY_PARAM_get_depth(const X509_VERIFY_PARAM *param)
 {
     return param->depth;
 }
 
+const char *X509_VERIFY_PARAM_get0_name(const X509_VERIFY_PARAM *param)
+{
+    return param->name;
+}
+
+static X509_VERIFY_PARAM_ID _empty_id = { NULL, 0U, NULL, NULL, 0, NULL, 0 };
+
+#define vpm_empty_id (X509_VERIFY_PARAM_ID *)&_empty_id
+
 /*
  * Default verify parameters: these are used for various applications and can
  * be overridden by the user specified table. NB: the 'name' field *must* be
@@ -328,8 +534,8 @@ static const X509_VERIFY_PARAM default_table[] = {
      0,                         /* purpose */
      0,                         /* trust */
      100,                       /* depth */
-     NULL                       /* policies */
-     },
+     NULL,                      /* policies */
+     vpm_empty_id},
     {
      "pkcs7",                   /* S/MIME sign parameters */
      0,                         /* Check time */
@@ -338,8 +544,8 @@ static const X509_VERIFY_PARAM default_table[] = {
      X509_PURPOSE_SMIME_SIGN,   /* purpose */
      X509_TRUST_EMAIL,          /* trust */
      -1,                        /* depth */
-     NULL                       /* policies */
-     },
+     NULL,                      /* policies */
+     vpm_empty_id},
     {
      "smime_sign",              /* S/MIME sign parameters */
      0,                         /* Check time */
@@ -348,8 +554,8 @@ static const X509_VERIFY_PARAM default_table[] = {
      X509_PURPOSE_SMIME_SIGN,   /* purpose */
      X509_TRUST_EMAIL,          /* trust */
      -1,                        /* depth */
-     NULL                       /* policies */
-     },
+     NULL,                      /* policies */
+     vpm_empty_id},
     {
      "ssl_client",              /* SSL/TLS client parameters */
      0,                         /* Check time */
@@ -358,8 +564,8 @@ static const X509_VERIFY_PARAM default_table[] = {
      X509_PURPOSE_SSL_CLIENT,   /* purpose */
      X509_TRUST_SSL_CLIENT,     /* trust */
      -1,                        /* depth */
-     NULL                       /* policies */
-     },
+     NULL,                      /* policies */
+     vpm_empty_id},
     {
      "ssl_server",              /* SSL/TLS server parameters */
      0,                         /* Check time */
@@ -368,8 +574,8 @@ static const X509_VERIFY_PARAM default_table[] = {
      X509_PURPOSE_SSL_SERVER,   /* purpose */
      X509_TRUST_SSL_SERVER,     /* trust */
      -1,                        /* depth */
-     NULL                       /* policies */
-     }
+     NULL,                      /* policies */
+     vpm_empty_id}
 };
 
 static STACK_OF(X509_VERIFY_PARAM) *param_table = NULL;
@@ -409,6 +615,22 @@ int X509_VERIFY_PARAM_add0_table(X509_VERIFY_PARAM *param)
     return 1;
 }
 
+int X509_VERIFY_PARAM_get_count(void)
+{
+    int num = sizeof(default_table) / sizeof(X509_VERIFY_PARAM);
+    if (param_table)
+        num += sk_X509_VERIFY_PARAM_num(param_table);
+    return num;
+}
+
+const X509_VERIFY_PARAM *X509_VERIFY_PARAM_get0(int id)
+{
+    int num = sizeof(default_table) / sizeof(X509_VERIFY_PARAM);
+    if (id < num)
+        return default_table + id;
+    return sk_X509_VERIFY_PARAM_value(param_table, id - num);
+}
+
 const X509_VERIFY_PARAM *X509_VERIFY_PARAM_lookup(const char *name)
 {
     int idx;
index 43152e9..0f26c54 100644 (file)
@@ -63,6 +63,7 @@
 #include <openssl/asn1.h>
 #include <openssl/evp.h>
 #include <openssl/x509.h>
+#include <openssl/ocsp.h>
 #ifndef OPENSSL_NO_RSA
 # include <openssl/rsa.h>
 #endif
@@ -105,6 +106,12 @@ int X509_sign_ctx(X509 *x, EVP_MD_CTX *ctx)
                               x->sig_alg, x->signature, x->cert_info, ctx);
 }
 
+int X509_http_nbio(OCSP_REQ_CTX *rctx, X509 **pcert)
+{
+    return OCSP_REQ_CTX_nbio_d2i(rctx,
+                                 (ASN1_VALUE **)pcert, ASN1_ITEM_rptr(X509));
+}
+
 int X509_REQ_sign(X509_REQ *x, EVP_PKEY *pkey, const EVP_MD *md)
 {
     return (ASN1_item_sign(ASN1_ITEM_rptr(X509_REQ_INFO), x->sig_alg, NULL,
@@ -133,6 +140,13 @@ int X509_CRL_sign_ctx(X509_CRL *x, EVP_MD_CTX *ctx)
                               x->crl, ctx);
 }
 
+int X509_CRL_http_nbio(OCSP_REQ_CTX *rctx, X509_CRL **pcrl)
+{
+    return OCSP_REQ_CTX_nbio_d2i(rctx,
+                                 (ASN1_VALUE **)pcrl,
+                                 ASN1_ITEM_rptr(X509_CRL));
+}
+
 int NETSCAPE_SPKI_sign(NETSCAPE_SPKI *x, EVP_PKEY *pkey, const EVP_MD *md)
 {
     return (ASN1_item_sign(ASN1_ITEM_rptr(NETSCAPE_SPKAC), x->sig_algor, NULL,
index 05125ab..9791b77 100644 (file)
@@ -13,7 +13,7 @@ AR=           ar r
 CFLAGS= $(INCLUDES) $(CFLAG)
 
 GENERAL=Makefile README
-TEST=
+TEST=v3nametest.c
 APPS=
 
 LIB=$(TOP)/libcrypto.a
@@ -22,13 +22,13 @@ v3_prn.c v3_utl.c v3err.c v3_genn.c v3_alt.c v3_skey.c v3_akey.c v3_pku.c \
 v3_int.c v3_enum.c v3_sxnet.c v3_cpols.c v3_crld.c v3_purp.c v3_info.c \
 v3_ocsp.c v3_akeya.c v3_pmaps.c v3_pcons.c v3_ncons.c v3_pcia.c v3_pci.c \
 pcy_cache.c pcy_node.c pcy_data.c pcy_map.c pcy_tree.c pcy_lib.c \
-v3_asid.c v3_addr.c
+v3_asid.c v3_addr.c v3_scts.c
 LIBOBJ= v3_bcons.o v3_bitst.o v3_conf.o v3_extku.o v3_ia5.o v3_lib.o \
 v3_prn.o v3_utl.o v3err.o v3_genn.o v3_alt.o v3_skey.o v3_akey.o v3_pku.o \
 v3_int.o v3_enum.o v3_sxnet.o v3_cpols.o v3_crld.o v3_purp.o v3_info.o \
 v3_ocsp.o v3_akeya.o v3_pmaps.o v3_pcons.o v3_ncons.o v3_pcia.o v3_pci.o \
 pcy_cache.o pcy_node.o pcy_data.o pcy_map.o pcy_tree.o pcy_lib.o \
-v3_asid.o v3_addr.o
+v3_asid.o v3_addr.o v3_scts.o
 
 SRC= $(LIBSRC)
 
@@ -535,6 +535,20 @@ v3_purp.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
 v3_purp.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h
 v3_purp.o: ../../include/openssl/x509_vfy.h ../../include/openssl/x509v3.h
 v3_purp.o: ../cryptlib.h v3_purp.c
+v3_scts.o: ../../e_os.h ../../include/openssl/asn1.h
+v3_scts.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h
+v3_scts.o: ../../include/openssl/conf.h ../../include/openssl/crypto.h
+v3_scts.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
+v3_scts.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h
+v3_scts.o: ../../include/openssl/err.h ../../include/openssl/evp.h
+v3_scts.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
+v3_scts.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
+v3_scts.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
+v3_scts.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
+v3_scts.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
+v3_scts.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h
+v3_scts.o: ../../include/openssl/x509_vfy.h ../../include/openssl/x509v3.h
+v3_scts.o: ../cryptlib.h v3_scts.c
 v3_skey.o: ../../e_os.h ../../include/openssl/asn1.h
 v3_skey.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h
 v3_skey.o: ../../include/openssl/conf.h ../../include/openssl/crypto.h
index 136b1f8..c3a6fce 100644 (file)
@@ -69,6 +69,7 @@ extern X509V3_EXT_METHOD v3_crl_hold, v3_pci;
 extern X509V3_EXT_METHOD v3_policy_mappings, v3_policy_constraints;
 extern X509V3_EXT_METHOD v3_name_constraints, v3_inhibit_anyp, v3_idp;
 extern X509V3_EXT_METHOD v3_addr, v3_asid;
+extern X509V3_EXT_METHOD v3_ct_scts[];
 
 /*
  * This table will be searched using OBJ_bsearch so it *must* kept in order
@@ -126,6 +127,8 @@ static const X509V3_EXT_METHOD *standard_exts[] = {
     &v3_idp,
     &v3_alt[2],
     &v3_freshest_crl,
+    &v3_ct_scts[0],
+    &v3_ct_scts[1],
 };
 
 /* Number of standard extensions */
index b5598c9..8350429 100644 (file)
@@ -122,6 +122,28 @@ const X509V3_EXT_METHOD *X509V3_EXT_get(X509_EXTENSION *ext)
     return X509V3_EXT_get_nid(nid);
 }
 
+int X509V3_EXT_free(int nid, void *ext_data)
+{
+    const X509V3_EXT_METHOD *ext_method = X509V3_EXT_get_nid(nid);
+    if (ext_method == NULL) {
+        X509V3err(X509V3_F_X509V3_EXT_FREE,
+                  X509V3_R_CANNOT_FIND_FREE_FUNCTION);
+        return 0;
+    }
+
+    if (ext_method->it != NULL)
+        ASN1_item_free(ext_data, ASN1_ITEM_ptr(ext_method->it));
+    else if (ext_method->ext_free != NULL)
+        ext_method->ext_free(ext_data);
+    else {
+        X509V3err(X509V3_F_X509V3_EXT_FREE,
+                  X509V3_R_CANNOT_FIND_FREE_FUNCTION);
+        return 0;
+    }
+
+    return 1;
+}
+
 int X509V3_EXT_add_list(X509V3_EXT_METHOD *extlist)
 {
     for (; extlist->ext_nid != -1; extlist++)
index 0d9bc58..36b0d87 100644 (file)
@@ -395,9 +395,6 @@ static void x509v3_cache_extensions(X509 *x)
 #ifndef OPENSSL_NO_SHA
     X509_digest(x, EVP_sha1(), x->sha1_hash, NULL);
 #endif
-    /* Does subject name match issuer ? */
-    if (!X509_NAME_cmp(X509_get_subject_name(x), X509_get_issuer_name(x)))
-        x->ex_flags |= EXFLAG_SI;
     /* V1 should mean no extensions ... */
     if (!X509_get_version(x))
         x->ex_flags |= EXFLAG_V1;
@@ -479,6 +476,10 @@ static void x509v3_cache_extensions(X509 *x)
             case NID_dvcs:
                 x->ex_xkusage |= XKU_DVCS;
                 break;
+
+            case NID_anyExtendedKeyUsage:
+                x->ex_xkusage |= XKU_ANYEKU;
+                break;
             }
         }
         sk_ASN1_OBJECT_pop_free(extusage, ASN1_OBJECT_free);
@@ -494,6 +495,13 @@ static void x509v3_cache_extensions(X509 *x)
     }
     x->skid = X509_get_ext_d2i(x, NID_subject_key_identifier, NULL, NULL);
     x->akid = X509_get_ext_d2i(x, NID_authority_key_identifier, NULL, NULL);
+    /* Does subject name match issuer ? */
+    if (!X509_NAME_cmp(X509_get_subject_name(x), X509_get_issuer_name(x))) {
+        x->ex_flags |= EXFLAG_SI;
+        /* If SKID matches AKID also indicate self signed */
+        if (X509_check_akid(x, x->akid) == X509_V_OK)
+            x->ex_flags |= EXFLAG_SS;
+    }
     x->altname = X509_get_ext_d2i(x, NID_subject_alt_name, NULL, NULL);
     x->nc = X509_get_ext_d2i(x, NID_name_constraints, &i, NULL);
     if (!x->nc && (i != -1))
@@ -598,8 +606,8 @@ static int check_purpose_ssl_client(const X509_PURPOSE *xp, const X509 *x,
         return 0;
     if (ca)
         return check_ssl_ca(x);
-    /* We need to do digital signatures with it */
-    if (ku_reject(x, KU_DIGITAL_SIGNATURE))
+    /* We need to do digital signatures or key agreement */
+    if (ku_reject(x, KU_DIGITAL_SIGNATURE | KU_KEY_AGREEMENT))
         return 0;
     /* nsCertType if present should allow SSL client use */
     if (ns_reject(x, NS_SSL_CLIENT))
@@ -607,6 +615,14 @@ static int check_purpose_ssl_client(const X509_PURPOSE *xp, const X509 *x,
     return 1;
 }
 
+/*
+ * Key usage needed for TLS/SSL server: digital signature, encipherment or
+ * key agreement. The ssl code can check this more thoroughly for individual
+ * key types.
+ */
+#define KU_TLS \
+        KU_DIGITAL_SIGNATURE|KU_KEY_ENCIPHERMENT|KU_KEY_AGREEMENT
+
 static int check_purpose_ssl_server(const X509_PURPOSE *xp, const X509 *x,
                                     int ca)
 {
@@ -617,8 +633,7 @@ static int check_purpose_ssl_server(const X509_PURPOSE *xp, const X509 *x,
 
     if (ns_reject(x, NS_SSL_SERVER))
         return 0;
-    /* Now as for keyUsage: we'll at least need to sign OR encipher */
-    if (ku_reject(x, KU_DIGITAL_SIGNATURE | KU_KEY_ENCIPHERMENT))
+    if (ku_reject(x, KU_TLS))
         return 0;
 
     return 1;
diff --git a/crypto/x509v3/v3_scts.c b/crypto/x509v3/v3_scts.c
new file mode 100644 (file)
index 0000000..6e0b8d6
--- /dev/null
@@ -0,0 +1,332 @@
+/* v3_scts.c */
+/*
+ * Written by Rob Stradling (rob@comodo.com) for the OpenSSL project 2014.
+ */
+/* ====================================================================
+ * Copyright (c) 2014 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    licensing@OpenSSL.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This product includes cryptographic software written by Eric Young
+ * (eay@cryptsoft.com).  This product includes software written by Tim
+ * Hudson (tjh@cryptsoft.com).
+ *
+ */
+
+#include <stdio.h>
+#include "cryptlib.h"
+#include <openssl/asn1.h>
+#include <openssl/x509v3.h>
+
+/* Signature and hash algorithms from RFC 5246 */
+#define TLSEXT_hash_sha256                              4
+
+#define TLSEXT_signature_rsa                            1
+#define TLSEXT_signature_ecdsa                          3
+
+
+#define n2s(c,s)        ((s=(((unsigned int)(c[0]))<< 8)| \
+                            (((unsigned int)(c[1]))    )),c+=2)
+
+#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
+# define SCT_TIMESTAMP unsigned __int64
+#elif defined(__arch64__)
+# define SCT_TIMESTAMP unsigned long
+#else
+# define SCT_TIMESTAMP unsigned long long
+#endif
+
+#define n2l8(c,l)       (l =((SCT_TIMESTAMP)(*((c)++)))<<56, \
+                         l|=((SCT_TIMESTAMP)(*((c)++)))<<48, \
+                         l|=((SCT_TIMESTAMP)(*((c)++)))<<40, \
+                         l|=((SCT_TIMESTAMP)(*((c)++)))<<32, \
+                         l|=((SCT_TIMESTAMP)(*((c)++)))<<24, \
+                         l|=((SCT_TIMESTAMP)(*((c)++)))<<16, \
+                         l|=((SCT_TIMESTAMP)(*((c)++)))<< 8, \
+                         l|=((SCT_TIMESTAMP)(*((c)++))))
+
+typedef struct SCT_st {
+    /* The encoded SCT */
+    unsigned char *sct;
+    unsigned short sctlen;
+    /*
+     * Components of the SCT.  "logid", "ext" and "sig" point to addresses
+     * inside "sct".
+     */
+    unsigned char version;
+    unsigned char *logid;
+    unsigned short logidlen;
+    SCT_TIMESTAMP timestamp;
+    unsigned char *ext;
+    unsigned short extlen;
+    unsigned char hash_alg;
+    unsigned char sig_alg;
+    unsigned char *sig;
+    unsigned short siglen;
+} SCT;
+
+DECLARE_STACK_OF(SCT)
+
+static void SCT_LIST_free(STACK_OF(SCT) *a);
+static STACK_OF(SCT) *d2i_SCT_LIST(STACK_OF(SCT) **a,
+                                   const unsigned char **pp, long length);
+static int i2r_SCT_LIST(X509V3_EXT_METHOD *method, STACK_OF(SCT) *sct_list,
+                        BIO *out, int indent);
+
+const X509V3_EXT_METHOD v3_ct_scts[] = {
+    {NID_ct_precert_scts, 0, NULL,
+     0, (X509V3_EXT_FREE)SCT_LIST_free,
+     (X509V3_EXT_D2I)d2i_SCT_LIST, 0,
+     0, 0, 0, 0,
+     (X509V3_EXT_I2R)i2r_SCT_LIST, 0,
+     NULL},
+
+    {NID_ct_cert_scts, 0, NULL,
+     0, (X509V3_EXT_FREE)SCT_LIST_free,
+     (X509V3_EXT_D2I)d2i_SCT_LIST, 0,
+     0, 0, 0, 0,
+     (X509V3_EXT_I2R)i2r_SCT_LIST, 0,
+     NULL},
+};
+
+static void tls12_signature_print(BIO *out, const unsigned char hash_alg,
+                                  const unsigned char sig_alg)
+{
+    int nid = NID_undef;
+    /* RFC6962 only permits two signature algorithms */
+    if (hash_alg == TLSEXT_hash_sha256) {
+        if (sig_alg == TLSEXT_signature_rsa)
+            nid = NID_sha256WithRSAEncryption;
+        else if (sig_alg == TLSEXT_signature_ecdsa)
+            nid = NID_ecdsa_with_SHA256;
+    }
+    if (nid == NID_undef)
+        BIO_printf(out, "%02X%02X", hash_alg, sig_alg);
+    else
+        BIO_printf(out, "%s", OBJ_nid2ln(nid));
+}
+
+static void timestamp_print(BIO *out, SCT_TIMESTAMP timestamp)
+{
+    ASN1_GENERALIZEDTIME *gen;
+    char genstr[20];
+    gen = ASN1_GENERALIZEDTIME_new();
+    ASN1_GENERALIZEDTIME_adj(gen, (time_t)0,
+                             (int)(timestamp / 86400000),
+                             (timestamp % 86400000) / 1000);
+    /*
+     * Note GeneralizedTime from ASN1_GENERALIZETIME_adj is always 15
+     * characters long with a final Z. Update it with fractional seconds.
+     */
+    BIO_snprintf(genstr, sizeof(genstr), "%.14s.%03dZ",
+                 ASN1_STRING_data(gen), (unsigned int)(timestamp % 1000));
+    ASN1_GENERALIZEDTIME_set_string(gen, genstr);
+    ASN1_GENERALIZEDTIME_print(out, gen);
+    ASN1_GENERALIZEDTIME_free(gen);
+}
+
+static void SCT_free(SCT *sct)
+{
+    if (sct) {
+        if (sct->sct)
+            OPENSSL_free(sct->sct);
+        OPENSSL_free(sct);
+    }
+}
+
+static void SCT_LIST_free(STACK_OF(SCT) *a)
+{
+    sk_SCT_pop_free(a, SCT_free);
+}
+
+static STACK_OF(SCT) *d2i_SCT_LIST(STACK_OF(SCT) **a,
+                                   const unsigned char **pp, long length)
+{
+    ASN1_OCTET_STRING *oct = NULL;
+    STACK_OF(SCT) *sk = NULL;
+    SCT *sct;
+    unsigned char *p, *p2;
+    unsigned short listlen, sctlen = 0, fieldlen;
+
+    if (d2i_ASN1_OCTET_STRING(&oct, pp, length) == NULL)
+        return NULL;
+    if (oct->length < 2)
+        goto done;
+    p = oct->data;
+    n2s(p, listlen);
+    if (listlen != oct->length - 2)
+        goto done;
+
+    if ((sk = sk_SCT_new_null()) == NULL)
+        goto done;
+
+    while (listlen > 0) {
+        if (listlen < 2)
+            goto err;
+        n2s(p, sctlen);
+        listlen -= 2;
+
+        if ((sctlen < 1) || (sctlen > listlen))
+            goto err;
+        listlen -= sctlen;
+
+        sct = OPENSSL_malloc(sizeof(SCT));
+        if (!sct)
+            goto err;
+        if (!sk_SCT_push(sk, sct)) {
+            OPENSSL_free(sct);
+            goto err;
+        }
+
+        sct->sct = OPENSSL_malloc(sctlen);
+        if (!sct->sct)
+            goto err;
+        memcpy(sct->sct, p, sctlen);
+        sct->sctlen = sctlen;
+        p += sctlen;
+        p2 = sct->sct;
+
+        sct->version = *p2++;
+        if (sct->version == 0) { /* SCT v1 */
+            /*-
+             * Fixed-length header:
+             *              struct {
+             * (1 byte)       Version sct_version;
+             * (32 bytes)     LogID id;
+             * (8 bytes)      uint64 timestamp;
+             * (2 bytes + ?)  CtExtensions extensions;
+             */
+            if (sctlen < 43)
+                goto err;
+            sctlen -= 43;
+
+            sct->logid = p2;
+            sct->logidlen = 32;
+            p2 += 32;
+
+            n2l8(p2, sct->timestamp);
+
+            n2s(p2, fieldlen);
+            if (sctlen < fieldlen)
+                goto err;
+            sct->ext = p2;
+            sct->extlen = fieldlen;
+            p2 += fieldlen;
+            sctlen -= fieldlen;
+
+            /*-
+             * digitally-signed struct header:
+             * (1 byte)       Hash algorithm
+             * (1 byte)       Signature algorithm
+             * (2 bytes + ?)  Signature
+             */
+            if (sctlen < 4)
+                goto err;
+            sctlen -= 4;
+
+            sct->hash_alg = *p2++;
+            sct->sig_alg = *p2++;
+            n2s(p2, fieldlen);
+            if (sctlen != fieldlen)
+                goto err;
+            sct->sig = p2;
+            sct->siglen = fieldlen;
+        }
+    }
+
+ done:
+    ASN1_OCTET_STRING_free(oct);
+    return sk;
+
+ err:
+    SCT_LIST_free(sk);
+    sk = NULL;
+    goto done;
+}
+
+static int i2r_SCT_LIST(X509V3_EXT_METHOD *method, STACK_OF(SCT) *sct_list,
+                        BIO *out, int indent)
+{
+    SCT *sct;
+    int i;
+
+    for (i = 0; i < sk_SCT_num(sct_list);) {
+        sct = sk_SCT_value(sct_list, i);
+
+        BIO_printf(out, "%*sSigned Certificate Timestamp:", indent, "");
+        BIO_printf(out, "\n%*sVersion   : ", indent + 4, "");
+
+        if (sct->version == 0) { /* SCT v1 */
+            BIO_printf(out, "v1(0)");
+
+            BIO_printf(out, "\n%*sLog ID    : ", indent + 4, "");
+            BIO_hex_string(out, indent + 16, 16, sct->logid, sct->logidlen);
+
+            BIO_printf(out, "\n%*sTimestamp : ", indent + 4, "");
+            timestamp_print(out, sct->timestamp);
+
+            BIO_printf(out, "\n%*sExtensions: ", indent + 4, "");
+            if (sct->extlen == 0)
+                BIO_printf(out, "none");
+            else
+                BIO_hex_string(out, indent + 16, 16, sct->ext, sct->extlen);
+
+            BIO_printf(out, "\n%*sSignature : ", indent + 4, "");
+            tls12_signature_print(out, sct->hash_alg, sct->sig_alg);
+            BIO_printf(out, "\n%*s            ", indent + 4, "");
+            BIO_hex_string(out, indent + 16, 16, sct->sig, sct->siglen);
+        } else {                /* Unknown version */
+
+            BIO_printf(out, "unknown\n%*s", indent + 16, "");
+            BIO_hex_string(out, indent + 16, 16, sct->sct, sct->sctlen);
+        }
+
+        if (++i < sk_SCT_num(sct_list))
+            BIO_printf(out, "\n");
+    }
+
+    return 1;
+}
index 94aaebb..bdd7b95 100644 (file)
@@ -632,6 +632,433 @@ void X509_email_free(STACK_OF(OPENSSL_STRING) *sk)
     sk_OPENSSL_STRING_pop_free(sk, str_free);
 }
 
+typedef int (*equal_fn) (const unsigned char *pattern, size_t pattern_len,
+                         const unsigned char *subject, size_t subject_len,
+                         unsigned int flags);
+
+/* Skip pattern prefix to match "wildcard" subject */
+static void skip_prefix(const unsigned char **p, size_t *plen,
+                        const unsigned char *subject, size_t subject_len,
+                        unsigned int flags)
+{
+    const unsigned char *pattern = *p;
+    size_t pattern_len = *plen;
+
+    /*
+     * If subject starts with a leading '.' followed by more octets, and
+     * pattern is longer, compare just an equal-length suffix with the
+     * full subject (starting at the '.'), provided the prefix contains
+     * no NULs.
+     */
+    if ((flags & _X509_CHECK_FLAG_DOT_SUBDOMAINS) == 0)
+        return;
+
+    while (pattern_len > subject_len && *pattern) {
+        if ((flags & X509_CHECK_FLAG_SINGLE_LABEL_SUBDOMAINS) &&
+            *pattern == '.')
+            break;
+        ++pattern;
+        --pattern_len;
+    }
+
+    /* Skip if entire prefix acceptable */
+    if (pattern_len == subject_len) {
+        *p = pattern;
+        *plen = pattern_len;
+    }
+}
+
+/* Compare while ASCII ignoring case. */
+static int equal_nocase(const unsigned char *pattern, size_t pattern_len,
+                        const unsigned char *subject, size_t subject_len,
+                        unsigned int flags)
+{
+    skip_prefix(&pattern, &pattern_len, subject, subject_len, flags);
+    if (pattern_len != subject_len)
+        return 0;
+    while (pattern_len) {
+        unsigned char l = *pattern;
+        unsigned char r = *subject;
+        /* The pattern must not contain NUL characters. */
+        if (l == 0)
+            return 0;
+        if (l != r) {
+            if ('A' <= l && l <= 'Z')
+                l = (l - 'A') + 'a';
+            if ('A' <= r && r <= 'Z')
+                r = (r - 'A') + 'a';
+            if (l != r)
+                return 0;
+        }
+        ++pattern;
+        ++subject;
+        --pattern_len;
+    }
+    return 1;
+}
+
+/* Compare using memcmp. */
+static int equal_case(const unsigned char *pattern, size_t pattern_len,
+                      const unsigned char *subject, size_t subject_len,
+                      unsigned int flags)
+{
+    skip_prefix(&pattern, &pattern_len, subject, subject_len, flags);
+    if (pattern_len != subject_len)
+        return 0;
+    return !memcmp(pattern, subject, pattern_len);
+}
+
+/*
+ * RFC 5280, section 7.5, requires that only the domain is compared in a
+ * case-insensitive manner.
+ */
+static int equal_email(const unsigned char *a, size_t a_len,
+                       const unsigned char *b, size_t b_len,
+                       unsigned int unused_flags)
+{
+    size_t i = a_len;
+    if (a_len != b_len)
+        return 0;
+    /*
+     * We search backwards for the '@' character, so that we do not have to
+     * deal with quoted local-parts.  The domain part is compared in a
+     * case-insensitive manner.
+     */
+    while (i > 0) {
+        --i;
+        if (a[i] == '@' || b[i] == '@') {
+            if (!equal_nocase(a + i, a_len - i, b + i, a_len - i, 0))
+                return 0;
+            break;
+        }
+    }
+    if (i == 0)
+        i = a_len;
+    return equal_case(a, i, b, i, 0);
+}
+
+/*
+ * Compare the prefix and suffix with the subject, and check that the
+ * characters in-between are valid.
+ */
+static int wildcard_match(const unsigned char *prefix, size_t prefix_len,
+                          const unsigned char *suffix, size_t suffix_len,
+                          const unsigned char *subject, size_t subject_len,
+                          unsigned int flags)
+{
+    const unsigned char *wildcard_start;
+    const unsigned char *wildcard_end;
+    const unsigned char *p;
+    int allow_multi = 0;
+    int allow_idna = 0;
+
+    if (subject_len < prefix_len + suffix_len)
+        return 0;
+    if (!equal_nocase(prefix, prefix_len, subject, prefix_len, flags))
+        return 0;
+    wildcard_start = subject + prefix_len;
+    wildcard_end = subject + (subject_len - suffix_len);
+    if (!equal_nocase(wildcard_end, suffix_len, suffix, suffix_len, flags))
+        return 0;
+    /*
+     * If the wildcard makes up the entire first label, it must match at
+     * least one character.
+     */
+    if (prefix_len == 0 && *suffix == '.') {
+        if (wildcard_start == wildcard_end)
+            return 0;
+        allow_idna = 1;
+        if (flags & X509_CHECK_FLAG_MULTI_LABEL_WILDCARDS)
+            allow_multi = 1;
+    }
+    /* IDNA labels cannot match partial wildcards */
+    if (!allow_idna &&
+        subject_len >= 4 && strncasecmp((char *)subject, "xn--", 4) == 0)
+        return 0;
+    /* The wildcard may match a literal '*' */
+    if (wildcard_end == wildcard_start + 1 && *wildcard_start == '*')
+        return 1;
+    /*
+     * Check that the part matched by the wildcard contains only
+     * permitted characters and only matches a single label unless
+     * allow_multi is set.
+     */
+    for (p = wildcard_start; p != wildcard_end; ++p)
+        if (!(('0' <= *p && *p <= '9') ||
+              ('A' <= *p && *p <= 'Z') ||
+              ('a' <= *p && *p <= 'z') ||
+              *p == '-' || (allow_multi && *p == '.')))
+            return 0;
+    return 1;
+}
+
+#define LABEL_START     (1 << 0)
+#define LABEL_END       (1 << 1)
+#define LABEL_HYPHEN    (1 << 2)
+#define LABEL_IDNA      (1 << 3)
+
+static const unsigned char *valid_star(const unsigned char *p, size_t len,
+                                       unsigned int flags)
+{
+    const unsigned char *star = 0;
+    size_t i;
+    int state = LABEL_START;
+    int dots = 0;
+    for (i = 0; i < len; ++i) {
+        /*
+         * Locate first and only legal wildcard, either at the start
+         * or end of a non-IDNA first and not final label.
+         */
+        if (p[i] == '*') {
+            int atstart = (state & LABEL_START);
+            int atend = (i == len - 1 || p[i + 1] == '.');
+            /*-
+             * At most one wildcard per pattern.
+             * No wildcards in IDNA labels.
+             * No wildcards after the first label.
+             */
+            if (star != NULL || (state & LABEL_IDNA) != 0 || dots)
+                return NULL;
+            /* Only full-label '*.example.com' wildcards? */
+            if ((flags & X509_CHECK_FLAG_NO_PARTIAL_WILDCARDS)
+                && (!atstart || !atend))
+                return NULL;
+            /* No 'foo*bar' wildcards */
+            if (!atstart && !atend)
+                return NULL;
+            star = &p[i];
+            state &= ~LABEL_START;
+        } else if (('a' <= p[i] && p[i] <= 'z')
+                   || ('A' <= p[i] && p[i] <= 'Z')
+                   || ('0' <= p[i] && p[i] <= '9')) {
+            if ((state & LABEL_START) != 0
+                && len - i >= 4 && strncasecmp((char *)&p[i], "xn--", 4) == 0)
+                state |= LABEL_IDNA;
+            state &= ~(LABEL_HYPHEN | LABEL_START);
+        } else if (p[i] == '.') {
+            if ((state & (LABEL_HYPHEN | LABEL_START)) != 0)
+                return NULL;
+            state = LABEL_START;
+            ++dots;
+        } else if (p[i] == '-') {
+            if ((state & LABEL_HYPHEN) != 0)
+                return NULL;
+            state |= LABEL_HYPHEN;
+        } else
+            return NULL;
+    }
+
+    /*
+     * The final label must not end in a hyphen or ".", and
+     * there must be at least two dots after the star.
+     */
+    if ((state & (LABEL_START | LABEL_HYPHEN)) != 0 || dots < 2)
+        return NULL;
+    return star;
+}
+
+/* Compare using wildcards. */
+static int equal_wildcard(const unsigned char *pattern, size_t pattern_len,
+                          const unsigned char *subject, size_t subject_len,
+                          unsigned int flags)
+{
+    const unsigned char *star = NULL;
+
+    /*
+     * Subject names starting with '.' can only match a wildcard pattern
+     * via a subject sub-domain pattern suffix match.
+     */
+    if (!(subject_len > 1 && subject[0] == '.'))
+        star = valid_star(pattern, pattern_len, flags);
+    if (star == NULL)
+        return equal_nocase(pattern, pattern_len,
+                            subject, subject_len, flags);
+    return wildcard_match(pattern, star - pattern,
+                          star + 1, (pattern + pattern_len) - star - 1,
+                          subject, subject_len, flags);
+}
+
+/*
+ * Compare an ASN1_STRING to a supplied string. If they match return 1. If
+ * cmp_type > 0 only compare if string matches the type, otherwise convert it
+ * to UTF8.
+ */
+
+static int do_check_string(ASN1_STRING *a, int cmp_type, equal_fn equal,
+                           unsigned int flags, const char *b, size_t blen,
+                           char **peername)
+{
+    int rv = 0;
+
+    if (!a->data || !a->length)
+        return 0;
+    if (cmp_type > 0) {
+        if (cmp_type != a->type)
+            return 0;
+        if (cmp_type == V_ASN1_IA5STRING)
+            rv = equal(a->data, a->length, (unsigned char *)b, blen, flags);
+        else if (a->length == (int)blen && !memcmp(a->data, b, blen))
+            rv = 1;
+        if (rv > 0 && peername)
+            *peername = BUF_strndup((char *)a->data, a->length);
+    } else {
+        int astrlen;
+        unsigned char *astr;
+        astrlen = ASN1_STRING_to_UTF8(&astr, a);
+        if (astrlen < 0) {
+            /*
+             * -1 could be an internal malloc failure or a decoding error from
+             * malformed input; we can't distinguish.
+             */
+            return -1;
+        }
+        rv = equal(astr, astrlen, (unsigned char *)b, blen, flags);
+        if (rv > 0 && peername)
+            *peername = BUF_strndup((char *)astr, astrlen);
+        OPENSSL_free(astr);
+    }
+    return rv;
+}
+
+static int do_x509_check(X509 *x, const char *chk, size_t chklen,
+                         unsigned int flags, int check_type, char **peername)
+{
+    GENERAL_NAMES *gens = NULL;
+    X509_NAME *name = NULL;
+    int i;
+    int cnid;
+    int alt_type;
+    int san_present = 0;
+    int rv = 0;
+    equal_fn equal;
+
+    /* See below, this flag is internal-only */
+    flags &= ~_X509_CHECK_FLAG_DOT_SUBDOMAINS;
+    if (check_type == GEN_EMAIL) {
+        cnid = NID_pkcs9_emailAddress;
+        alt_type = V_ASN1_IA5STRING;
+        equal = equal_email;
+    } else if (check_type == GEN_DNS) {
+        cnid = NID_commonName;
+        /* Implicit client-side DNS sub-domain pattern */
+        if (chklen > 1 && chk[0] == '.')
+            flags |= _X509_CHECK_FLAG_DOT_SUBDOMAINS;
+        alt_type = V_ASN1_IA5STRING;
+        if (flags & X509_CHECK_FLAG_NO_WILDCARDS)
+            equal = equal_nocase;
+        else
+            equal = equal_wildcard;
+    } else {
+        cnid = 0;
+        alt_type = V_ASN1_OCTET_STRING;
+        equal = equal_case;
+    }
+
+    if (chklen == 0)
+        chklen = strlen(chk);
+
+    gens = X509_get_ext_d2i(x, NID_subject_alt_name, NULL, NULL);
+    if (gens) {
+        for (i = 0; i < sk_GENERAL_NAME_num(gens); i++) {
+            GENERAL_NAME *gen;
+            ASN1_STRING *cstr;
+            gen = sk_GENERAL_NAME_value(gens, i);
+            if (gen->type != check_type)
+                continue;
+            san_present = 1;
+            if (check_type == GEN_EMAIL)
+                cstr = gen->d.rfc822Name;
+            else if (check_type == GEN_DNS)
+                cstr = gen->d.dNSName;
+            else
+                cstr = gen->d.iPAddress;
+            /* Positive on success, negative on error! */
+            if ((rv = do_check_string(cstr, alt_type, equal, flags,
+                                      chk, chklen, peername)) != 0)
+                break;
+        }
+        GENERAL_NAMES_free(gens);
+        if (rv != 0)
+            return rv;
+        if (!cnid
+            || (san_present
+                && !(flags & X509_CHECK_FLAG_ALWAYS_CHECK_SUBJECT)))
+            return 0;
+    }
+    i = -1;
+    name = X509_get_subject_name(x);
+    while ((i = X509_NAME_get_index_by_NID(name, cnid, i)) >= 0) {
+        X509_NAME_ENTRY *ne;
+        ASN1_STRING *str;
+        ne = X509_NAME_get_entry(name, i);
+        str = X509_NAME_ENTRY_get_data(ne);
+        /* Positive on success, negative on error! */
+        if ((rv = do_check_string(str, -1, equal, flags,
+                                  chk, chklen, peername)) != 0)
+            return rv;
+    }
+    return 0;
+}
+
+int X509_check_host(X509 *x, const char *chk, size_t chklen,
+                    unsigned int flags, char **peername)
+{
+    if (chk == NULL)
+        return -2;
+    /*
+     * Embedded NULs are disallowed, except as the last character of a
+     * string of length 2 or more (tolerate caller including terminating
+     * NUL in string length).
+     */
+    if (chklen == 0)
+        chklen = strlen(chk);
+    else if (memchr(chk, '\0', chklen > 1 ? chklen - 1 : chklen))
+        return -2;
+    if (chklen > 1 && chk[chklen - 1] == '\0')
+        --chklen;
+    return do_x509_check(x, chk, chklen, flags, GEN_DNS, peername);
+}
+
+int X509_check_email(X509 *x, const char *chk, size_t chklen,
+                     unsigned int flags)
+{
+    if (chk == NULL)
+        return -2;
+    /*
+     * Embedded NULs are disallowed, except as the last character of a
+     * string of length 2 or more (tolerate caller including terminating
+     * NUL in string length).
+     */
+    if (chklen == 0)
+        chklen = strlen((char *)chk);
+    else if (memchr(chk, '\0', chklen > 1 ? chklen - 1 : chklen))
+        return -2;
+    if (chklen > 1 && chk[chklen - 1] == '\0')
+        --chklen;
+    return do_x509_check(x, chk, chklen, flags, GEN_EMAIL, NULL);
+}
+
+int X509_check_ip(X509 *x, const unsigned char *chk, size_t chklen,
+                  unsigned int flags)
+{
+    if (chk == NULL)
+        return -2;
+    return do_x509_check(x, (char *)chk, chklen, flags, GEN_IPADD, NULL);
+}
+
+int X509_check_ip_asc(X509 *x, const char *ipasc, unsigned int flags)
+{
+    unsigned char ipout[16];
+    size_t iplen;
+
+    if (ipasc == NULL)
+        return -2;
+    iplen = (size_t)a2i_ipadd(ipout, ipasc);
+    if (iplen == 0)
+        return -2;
+    return do_x509_check(x, (char *)ipout, iplen, flags, GEN_IPADD, NULL);
+}
+
 /*
  * Convert IP addresses both IPv4 and IPv6 into an OCTET STRING compatible
  * with RFC3280.
index 0138f7a..bcc1be7 100644 (file)
@@ -1,6 +1,6 @@
 /* crypto/x509v3/v3err.c */
 /* ====================================================================
- * Copyright (c) 1999-2007 The OpenSSL Project.  All rights reserved.
+ * Copyright (c) 1999-2014 The OpenSSL Project.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -70,7 +70,7 @@
 # define ERR_REASON(reason) ERR_PACK(ERR_LIB_X509V3,0,reason)
 
 static ERR_STRING_DATA X509V3_str_functs[] = {
-    {ERR_FUNC(X509V3_F_A2I_GENERAL_NAME), "A2I_GENERAL_NAME"},
+    {ERR_FUNC(X509V3_F_A2I_GENERAL_NAME), "a2i_GENERAL_NAME"},
     {ERR_FUNC(X509V3_F_ASIDENTIFIERCHOICE_CANONIZE),
      "ASIDENTIFIERCHOICE_CANONIZE"},
     {ERR_FUNC(X509V3_F_ASIDENTIFIERCHOICE_IS_CANONICAL),
@@ -132,6 +132,7 @@ static ERR_STRING_DATA X509V3_str_functs[] = {
     {ERR_FUNC(X509V3_F_X509V3_EXT_ADD), "X509V3_EXT_add"},
     {ERR_FUNC(X509V3_F_X509V3_EXT_ADD_ALIAS), "X509V3_EXT_add_alias"},
     {ERR_FUNC(X509V3_F_X509V3_EXT_CONF), "X509V3_EXT_conf"},
+    {ERR_FUNC(X509V3_F_X509V3_EXT_FREE), "X509V3_EXT_free"},
     {ERR_FUNC(X509V3_F_X509V3_EXT_I2D), "X509V3_EXT_i2d"},
     {ERR_FUNC(X509V3_F_X509V3_EXT_NCONF), "X509V3_EXT_nconf"},
     {ERR_FUNC(X509V3_F_X509V3_GET_SECTION), "X509V3_get_section"},
@@ -149,6 +150,8 @@ static ERR_STRING_DATA X509V3_str_reasons[] = {
     {ERR_REASON(X509V3_R_BN_DEC2BN_ERROR), "bn dec2bn error"},
     {ERR_REASON(X509V3_R_BN_TO_ASN1_INTEGER_ERROR),
      "bn to asn1 integer error"},
+    {ERR_REASON(X509V3_R_CANNOT_FIND_FREE_FUNCTION),
+     "cannot find free function"},
     {ERR_REASON(X509V3_R_DIRNAME_ERROR), "dirname error"},
     {ERR_REASON(X509V3_R_DISTPOINT_ALREADY_SET), "distpoint already set"},
     {ERR_REASON(X509V3_R_DUPLICATE_ZONE_ID), "duplicate zone id"},
@@ -167,7 +170,6 @@ static ERR_STRING_DATA X509V3_str_reasons[] = {
     {ERR_REASON(X509V3_R_ILLEGAL_HEX_DIGIT), "illegal hex digit"},
     {ERR_REASON(X509V3_R_INCORRECT_POLICY_SYNTAX_TAG),
      "incorrect policy syntax tag"},
-    {ERR_REASON(X509V3_R_INVALID_MULTIPLE_RDNS), "invalid multiple rdns"},
     {ERR_REASON(X509V3_R_INVALID_ASNUMBER), "invalid asnumber"},
     {ERR_REASON(X509V3_R_INVALID_ASRANGE), "invalid asrange"},
     {ERR_REASON(X509V3_R_INVALID_BOOLEAN_STRING), "invalid boolean string"},
@@ -175,6 +177,7 @@ static ERR_STRING_DATA X509V3_str_reasons[] = {
      "invalid extension string"},
     {ERR_REASON(X509V3_R_INVALID_INHERITANCE), "invalid inheritance"},
     {ERR_REASON(X509V3_R_INVALID_IPADDRESS), "invalid ipaddress"},
+    {ERR_REASON(X509V3_R_INVALID_MULTIPLE_RDNS), "invalid multiple rdns"},
     {ERR_REASON(X509V3_R_INVALID_NAME), "invalid name"},
     {ERR_REASON(X509V3_R_INVALID_NULL_ARGUMENT), "invalid null argument"},
     {ERR_REASON(X509V3_R_INVALID_NULL_NAME), "invalid null name"},
diff --git a/crypto/x509v3/v3nametest.c b/crypto/x509v3/v3nametest.c
new file mode 100644 (file)
index 0000000..7b5c1c8
--- /dev/null
@@ -0,0 +1,336 @@
+#include <openssl/x509.h>
+#include <openssl/x509v3.h>
+#include "../e_os.h"
+#include <string.h>
+
+static const char *const names[] = {
+    "a", "b", ".", "*", "@",
+    ".a", "a.", ".b", "b.", ".*", "*.", "*@", "@*", "a@", "@a", "b@", "..",
+    "@@", "**", "*.com", "*com", "*.*.com", "*com", "com*", "*example.com",
+    "*@example.com", "test@*.example.com", "example.com", "www.example.com",
+    "test.www.example.com", "*.example.com", "*.www.example.com",
+    "test.*.example.com", "www.*.com",
+    ".www.example.com", "*www.example.com",
+    "example.net", "xn--rger-koa.example.com",
+    "a.example.com", "b.example.com",
+    "postmaster@example.com", "Postmaster@example.com",
+    "postmaster@EXAMPLE.COM",
+    NULL
+};
+
+static const char *const exceptions[] = {
+    "set CN: host: [*.example.com] matches [a.example.com]",
+    "set CN: host: [*.example.com] matches [b.example.com]",
+    "set CN: host: [*.example.com] matches [www.example.com]",
+    "set CN: host: [*.example.com] matches [xn--rger-koa.example.com]",
+    "set CN: host: [*.www.example.com] matches [test.www.example.com]",
+    "set CN: host: [*.www.example.com] matches [.www.example.com]",
+    "set CN: host: [*www.example.com] matches [www.example.com]",
+    "set CN: host: [test.www.example.com] matches [.www.example.com]",
+    "set CN: host-no-wildcards: [*.www.example.com] matches [.www.example.com]",
+    "set CN: host-no-wildcards: [test.www.example.com] matches [.www.example.com]",
+    "set emailAddress: email: [postmaster@example.com] does not match [Postmaster@example.com]",
+    "set emailAddress: email: [postmaster@EXAMPLE.COM] does not match [Postmaster@example.com]",
+    "set emailAddress: email: [Postmaster@example.com] does not match [postmaster@example.com]",
+    "set emailAddress: email: [Postmaster@example.com] does not match [postmaster@EXAMPLE.COM]",
+    "set dnsName: host: [*.example.com] matches [www.example.com]",
+    "set dnsName: host: [*.example.com] matches [a.example.com]",
+    "set dnsName: host: [*.example.com] matches [b.example.com]",
+    "set dnsName: host: [*.example.com] matches [xn--rger-koa.example.com]",
+    "set dnsName: host: [*.www.example.com] matches [test.www.example.com]",
+    "set dnsName: host-no-wildcards: [*.www.example.com] matches [.www.example.com]",
+    "set dnsName: host-no-wildcards: [test.www.example.com] matches [.www.example.com]",
+    "set dnsName: host: [*.www.example.com] matches [.www.example.com]",
+    "set dnsName: host: [*www.example.com] matches [www.example.com]",
+    "set dnsName: host: [test.www.example.com] matches [.www.example.com]",
+    "set rfc822Name: email: [postmaster@example.com] does not match [Postmaster@example.com]",
+    "set rfc822Name: email: [Postmaster@example.com] does not match [postmaster@example.com]",
+    "set rfc822Name: email: [Postmaster@example.com] does not match [postmaster@EXAMPLE.COM]",
+    "set rfc822Name: email: [postmaster@EXAMPLE.COM] does not match [Postmaster@example.com]",
+    NULL
+};
+
+static int is_exception(const char *msg)
+{
+    const char *const *p;
+    for (p = exceptions; *p; ++p)
+        if (strcmp(msg, *p) == 0)
+            return 1;
+    return 0;
+}
+
+static int set_cn(X509 *crt, ...)
+{
+    int ret = 0;
+    X509_NAME *n = NULL;
+    va_list ap;
+    va_start(ap, crt);
+    n = X509_NAME_new();
+    if (n == NULL)
+        goto out;
+    while (1) {
+        int nid;
+        const char *name;
+        nid = va_arg(ap, int);
+        if (nid == 0)
+            break;
+        name = va_arg(ap, const char *);
+        if (!X509_NAME_add_entry_by_NID(n, nid, MBSTRING_ASC,
+                                        (unsigned char *)name, -1, -1, 1))
+            goto out;
+    }
+    if (!X509_set_subject_name(crt, n))
+        goto out;
+    ret = 1;
+ out:
+    X509_NAME_free(n);
+    va_end(ap);
+    return ret;
+}
+
+/*-
+int             X509_add_ext(X509 *x, X509_EXTENSION *ex, int loc);
+X509_EXTENSION *X509_EXTENSION_create_by_NID(X509_EXTENSION **ex,
+                        int nid, int crit, ASN1_OCTET_STRING *data);
+int             X509_add_ext(X509 *x, X509_EXTENSION *ex, int loc);
+*/
+
+static int set_altname(X509 *crt, ...)
+{
+    int ret = 0;
+    GENERAL_NAMES *gens = NULL;
+    GENERAL_NAME *gen = NULL;
+    ASN1_IA5STRING *ia5 = NULL;
+    va_list ap;
+    va_start(ap, crt);
+    gens = sk_GENERAL_NAME_new_null();
+    if (gens == NULL)
+        goto out;
+    while (1) {
+        int type;
+        const char *name;
+        type = va_arg(ap, int);
+        if (type == 0)
+            break;
+        name = va_arg(ap, const char *);
+
+        gen = GENERAL_NAME_new();
+        if (gen == NULL)
+            goto out;
+        ia5 = ASN1_IA5STRING_new();
+        if (ia5 == NULL)
+            goto out;
+        if (!ASN1_STRING_set(ia5, name, -1))
+            goto out;
+        switch (type) {
+        case GEN_EMAIL:
+        case GEN_DNS:
+            GENERAL_NAME_set0_value(gen, type, ia5);
+            ia5 = NULL;
+            break;
+        default:
+            abort();
+        }
+        sk_GENERAL_NAME_push(gens, gen);
+        gen = NULL;
+    }
+    if (!X509_add1_ext_i2d(crt, NID_subject_alt_name, gens, 0, 0))
+        goto out;
+    ret = 1;
+ out:
+    ASN1_IA5STRING_free(ia5);
+    GENERAL_NAME_free(gen);
+    GENERAL_NAMES_free(gens);
+    va_end(ap);
+    return ret;
+}
+
+static int set_cn1(X509 *crt, const char *name)
+{
+    return set_cn(crt, NID_commonName, name, 0);
+}
+
+static int set_cn_and_email(X509 *crt, const char *name)
+{
+    return set_cn(crt, NID_commonName, name,
+                  NID_pkcs9_emailAddress, "dummy@example.com", 0);
+}
+
+static int set_cn2(X509 *crt, const char *name)
+{
+    return set_cn(crt, NID_commonName, "dummy value",
+                  NID_commonName, name, 0);
+}
+
+static int set_cn3(X509 *crt, const char *name)
+{
+    return set_cn(crt, NID_commonName, name,
+                  NID_commonName, "dummy value", 0);
+}
+
+static int set_email1(X509 *crt, const char *name)
+{
+    return set_cn(crt, NID_pkcs9_emailAddress, name, 0);
+}
+
+static int set_email2(X509 *crt, const char *name)
+{
+    return set_cn(crt, NID_pkcs9_emailAddress, "dummy@example.com",
+                  NID_pkcs9_emailAddress, name, 0);
+}
+
+static int set_email3(X509 *crt, const char *name)
+{
+    return set_cn(crt, NID_pkcs9_emailAddress, name,
+                  NID_pkcs9_emailAddress, "dummy@example.com", 0);
+}
+
+static int set_email_and_cn(X509 *crt, const char *name)
+{
+    return set_cn(crt, NID_pkcs9_emailAddress, name,
+                  NID_commonName, "www.example.org", 0);
+}
+
+static int set_altname_dns(X509 *crt, const char *name)
+{
+    return set_altname(crt, GEN_DNS, name, 0);
+}
+
+static int set_altname_email(X509 *crt, const char *name)
+{
+    return set_altname(crt, GEN_EMAIL, name, 0);
+}
+
+struct set_name_fn {
+    int (*fn) (X509 *, const char *);
+    const char *name;
+    int host;
+    int email;
+};
+
+static const struct set_name_fn name_fns[] = {
+    {set_cn1, "set CN", 1, 0},
+    {set_cn2, "set CN", 1, 0},
+    {set_cn3, "set CN", 1, 0},
+    {set_cn_and_email, "set CN", 1, 0},
+    {set_email1, "set emailAddress", 0, 1},
+    {set_email2, "set emailAddress", 0, 1},
+    {set_email3, "set emailAddress", 0, 1},
+    {set_email_and_cn, "set emailAddress", 0, 1},
+    {set_altname_dns, "set dnsName", 1, 0},
+    {set_altname_email, "set rfc822Name", 0, 1},
+    {NULL, NULL, 0}
+};
+
+static X509 *make_cert()
+{
+    X509 *ret = NULL;
+    X509 *crt = NULL;
+    X509_NAME *issuer = NULL;
+    crt = X509_new();
+    if (crt == NULL)
+        goto out;
+    if (!X509_set_version(crt, 3))
+        goto out;
+    ret = crt;
+    crt = NULL;
+ out:
+    X509_NAME_free(issuer);
+    return ret;
+}
+
+static int errors;
+
+static void check_message(const struct set_name_fn *fn, const char *op,
+                          const char *nameincert, int match, const char *name)
+{
+    char msg[1024];
+    if (match < 0)
+        return;
+    BIO_snprintf(msg, sizeof(msg), "%s: %s: [%s] %s [%s]",
+                 fn->name, op, nameincert,
+                 match ? "matches" : "does not match", name);
+    if (is_exception(msg))
+        return;
+    puts(msg);
+    ++errors;
+}
+
+static void run_cert(X509 *crt, const char *nameincert,
+                     const struct set_name_fn *fn)
+{
+    const char *const *pname = names;
+    while (*pname) {
+        int samename = strcasecmp(nameincert, *pname) == 0;
+        size_t namelen = strlen(*pname);
+        char *name = malloc(namelen);
+        int match, ret;
+        memcpy(name, *pname, namelen);
+
+        ret = X509_check_host(crt, name, namelen, 0, NULL);
+        match = -1;
+        if (ret < 0) {
+            fprintf(stderr, "internal error in X509_check_host");
+            ++errors;
+        } else if (fn->host) {
+            if (ret == 1 && !samename)
+                match = 1;
+            if (ret == 0 && samename)
+                match = 0;
+        } else if (ret == 1)
+            match = 1;
+        check_message(fn, "host", nameincert, match, *pname);
+
+        ret = X509_check_host(crt, name, namelen,
+                              X509_CHECK_FLAG_NO_WILDCARDS, NULL);
+        match = -1;
+        if (ret < 0) {
+            fprintf(stderr, "internal error in X509_check_host");
+            ++errors;
+        } else if (fn->host) {
+            if (ret == 1 && !samename)
+                match = 1;
+            if (ret == 0 && samename)
+                match = 0;
+        } else if (ret == 1)
+            match = 1;
+        check_message(fn, "host-no-wildcards", nameincert, match, *pname);
+
+        ret = X509_check_email(crt, name, namelen, 0);
+        match = -1;
+        if (fn->email) {
+            if (ret && !samename)
+                match = 1;
+            if (!ret && samename && strchr(nameincert, '@') != NULL)
+                match = 0;
+        } else if (ret)
+            match = 1;
+        check_message(fn, "email", nameincert, match, *pname);
+        ++pname;
+        free(name);
+    }
+}
+
+int main(void)
+{
+    const struct set_name_fn *pfn = name_fns;
+    while (pfn->name) {
+        const char *const *pname = names;
+        while (*pname) {
+            X509 *crt = make_cert();
+            if (crt == NULL) {
+                fprintf(stderr, "make_cert failed\n");
+                return 1;
+            }
+            if (!pfn->fn(crt, *pname)) {
+                fprintf(stderr, "X509 name setting failed\n");
+                return 1;
+            }
+            run_cert(crt, *pname, pfn);
+            X509_free(crt);
+            ++pname;
+        }
+        ++pfn;
+    }
+    return errors > 0 ? 1 : 0;
+}
index db9c3e8..f5c6156 100644 (file)
 extern "C" {
 #endif
 
+# ifdef OPENSSL_SYS_WIN32
+/* Under Win32 these are defined in wincrypt.h */
+#  undef X509_NAME
+#  undef X509_CERT_PAIR
+#  undef X509_EXTENSIONS
+# endif
+
 /* Forward reference */
 struct v3_ext_method;
 struct v3_ext_ctx;
@@ -405,7 +412,6 @@ struct ISSUING_DIST_POINT_st {
 # define EXFLAG_CA               0x10
 /* Really self issued not necessarily self signed */
 # define EXFLAG_SI               0x20
-# define EXFLAG_SS               0x20
 # define EXFLAG_V1               0x40
 # define EXFLAG_INVALID          0x80
 # define EXFLAG_SET              0x100
@@ -414,6 +420,8 @@ struct ISSUING_DIST_POINT_st {
 
 # define EXFLAG_INVALID_POLICY   0x800
 # define EXFLAG_FRESHEST         0x1000
+/* Self signed */
+# define EXFLAG_SS               0x2000
 
 # define KU_DIGITAL_SIGNATURE    0x0080
 # define KU_NON_REPUDIATION      0x0040
@@ -442,6 +450,7 @@ struct ISSUING_DIST_POINT_st {
 # define XKU_OCSP_SIGN           0x20
 # define XKU_TIMESTAMP           0x40
 # define XKU_DVCS                0x80
+# define XKU_ANYEKU              0x100
 
 # define X509_PURPOSE_DYNAMIC    0x1
 # define X509_PURPOSE_DYNAMIC_NAME       0x2
@@ -665,6 +674,7 @@ STACK_OF(CONF_VALUE) *X509V3_parse_list(const char *line);
 void *X509V3_EXT_d2i(X509_EXTENSION *ext);
 void *X509V3_get_d2i(STACK_OF(X509_EXTENSION) *x, int nid, int *crit,
                      int *idx);
+int X509V3_EXT_free(int nid, void *ext_data);
 
 X509_EXTENSION *X509V3_EXT_i2d(int ext_nid, int crit, void *ext_struc);
 int X509V3_add1_i2d(STACK_OF(X509_EXTENSION) **x, int nid, void *value,
@@ -707,6 +717,34 @@ STACK_OF(OPENSSL_STRING) *X509_get1_email(X509 *x);
 STACK_OF(OPENSSL_STRING) *X509_REQ_get1_email(X509_REQ *x);
 void X509_email_free(STACK_OF(OPENSSL_STRING) *sk);
 STACK_OF(OPENSSL_STRING) *X509_get1_ocsp(X509 *x);
+/* Flags for X509_check_* functions */
+
+/*
+ * Always check subject name for host match even if subject alt names present
+ */
+# define X509_CHECK_FLAG_ALWAYS_CHECK_SUBJECT    0x1
+/* Disable wildcard matching for dnsName fields and common name. */
+# define X509_CHECK_FLAG_NO_WILDCARDS    0x2
+/* Wildcards must not match a partial label. */
+# define X509_CHECK_FLAG_NO_PARTIAL_WILDCARDS 0x4
+/* Allow (non-partial) wildcards to match multiple labels. */
+# define X509_CHECK_FLAG_MULTI_LABEL_WILDCARDS 0x8
+/* Constraint verifier subdomain patterns to match a single labels. */
+# define X509_CHECK_FLAG_SINGLE_LABEL_SUBDOMAINS 0x10
+/*
+ * Match reference identifiers starting with "." to any sub-domain.
+ * This is a non-public flag, turned on implicitly when the subject
+ * reference identity is a DNS name.
+ */
+# define _X509_CHECK_FLAG_DOT_SUBDOMAINS 0x8000
+
+int X509_check_host(X509 *x, const char *chk, size_t chklen,
+                    unsigned int flags, char **peername);
+int X509_check_email(X509 *x, const char *chk, size_t chklen,
+                     unsigned int flags);
+int X509_check_ip(X509 *x, const unsigned char *chk, size_t chklen,
+                  unsigned int flags);
+int X509_check_ip_asc(X509 *x, const char *ipasc, unsigned int flags);
 
 ASN1_OCTET_STRING *a2i_IPADDRESS(const char *ipasc);
 ASN1_OCTET_STRING *a2i_IPADDRESS_NC(const char *ipasc);
@@ -930,6 +968,7 @@ void ERR_load_X509V3_strings(void);
 # define X509V3_F_X509V3_EXT_ADD                          104
 # define X509V3_F_X509V3_EXT_ADD_ALIAS                    106
 # define X509V3_F_X509V3_EXT_CONF                         107
+# define X509V3_F_X509V3_EXT_FREE                         165
 # define X509V3_F_X509V3_EXT_I2D                          136
 # define X509V3_F_X509V3_EXT_NCONF                        152
 # define X509V3_F_X509V3_GET_SECTION                      142
@@ -944,6 +983,7 @@ void ERR_load_X509V3_strings(void);
 # define X509V3_R_BAD_OBJECT                              119
 # define X509V3_R_BN_DEC2BN_ERROR                         100
 # define X509V3_R_BN_TO_ASN1_INTEGER_ERROR                101
+# define X509V3_R_CANNOT_FIND_FREE_FUNCTION               168
 # define X509V3_R_DIRNAME_ERROR                           149
 # define X509V3_R_DISTPOINT_ALREADY_SET                   160
 # define X509V3_R_DUPLICATE_ZONE_ID                       133
@@ -959,13 +999,13 @@ void ERR_load_X509V3_strings(void);
 # define X509V3_R_ILLEGAL_EMPTY_EXTENSION                 151
 # define X509V3_R_ILLEGAL_HEX_DIGIT                       113
 # define X509V3_R_INCORRECT_POLICY_SYNTAX_TAG             152
-# define X509V3_R_INVALID_MULTIPLE_RDNS                   161
 # define X509V3_R_INVALID_ASNUMBER                        162
 # define X509V3_R_INVALID_ASRANGE                         163
 # define X509V3_R_INVALID_BOOLEAN_STRING                  104
 # define X509V3_R_INVALID_EXTENSION_STRING                105
 # define X509V3_R_INVALID_INHERITANCE                     165
 # define X509V3_R_INVALID_IPADDRESS                       166
+# define X509V3_R_INVALID_MULTIPLE_RDNS                   161
 # define X509V3_R_INVALID_NAME                            106
 # define X509V3_R_INVALID_NULL_ARGUMENT                   107
 # define X509V3_R_INVALID_NULL_NAME                       108
index 6ebfd01..d208d02 100644 (file)
@@ -24,7 +24,7 @@ print<<___;
        call    OPENSSL_cpuid_setup
 
 .hidden        OPENSSL_ia32cap_P
-.comm  OPENSSL_ia32cap_P,8,4
+.comm  OPENSSL_ia32cap_P,16,4
 
 .text
 
@@ -53,12 +53,13 @@ OPENSSL_rdtsc:
 .size  OPENSSL_rdtsc,.-OPENSSL_rdtsc
 
 .globl OPENSSL_ia32_cpuid
-.type  OPENSSL_ia32_cpuid,\@abi-omnipotent
+.type  OPENSSL_ia32_cpuid,\@function,1
 .align 16
 OPENSSL_ia32_cpuid:
        mov     %rbx,%r8                # save %rbx
 
        xor     %eax,%eax
+       mov     %eax,8(%rdi)            # clear 3rd word
        cpuid
        mov     %eax,%r11d              # max value for standard query level
 
@@ -126,6 +127,14 @@ OPENSSL_ia32_cpuid:
        shr     \$14,%r10d
        and     \$0xfff,%r10d           # number of cores -1 per L1D
 
+       cmp     \$7,%r11d
+       jb      .Lnocacheinfo
+
+       mov     \$7,%eax
+       xor     %ecx,%ecx
+       cpuid
+       mov     %ebx,8(%rdi)
+
 .Lnocacheinfo:
        mov     \$1,%eax
        cpuid
@@ -165,6 +174,7 @@ OPENSSL_ia32_cpuid:
 .Lclear_avx:
        mov     \$0xefffe7ff,%eax       # ~(1<<28|1<<12|1<<11)
        and     %eax,%r9d               # clear AVX, FMA and AMD XOP bits
+       andl    \$0xffffffdf,8(%rdi)    # cleax AVX2, ~(1<<5)
 .Ldone:
        shl     \$32,%r9
        mov     %r10d,%eax
@@ -279,6 +289,21 @@ OPENSSL_ia32_rdrand:
        cmove   %rcx,%rax
        ret
 .size  OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand
+
+.globl OPENSSL_ia32_rdseed
+.type  OPENSSL_ia32_rdseed,\@abi-omnipotent
+.align 16
+OPENSSL_ia32_rdseed:
+       mov     \$8,%ecx
+.Loop_rdseed:
+       rdseed  %rax
+       jc      .Lbreak_rdseed
+       loop    .Loop_rdseed
+.Lbreak_rdseed:
+       cmp     \$0,%rax
+       cmove   %rcx,%rax
+       ret
+.size  OPENSSL_ia32_rdseed,.-OPENSSL_ia32_rdseed
 ___
 
 close STDOUT;  # flush
index b270b44..e95f627 100644 (file)
@@ -22,6 +22,8 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
        &xor    ("eax","eax");
        &bt     ("ecx",21);
        &jnc    (&label("nocpuid"));
+       &mov    ("esi",&wparam(0));
+       &mov    (&DWP(8,"esi"),"eax");  # clear 3rd word
        &cpuid  ();
        &mov    ("edi","eax");          # max value for standard query level
 
@@ -79,6 +81,16 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
        &jmp    (&label("generic"));
        
 &set_label("intel");
+       &cmp    ("edi",7);
+       &jb     (&label("cacheinfo"));
+
+       &mov    ("esi",&wparam(0));
+       &mov    ("eax",7);
+       &xor    ("ecx","ecx");
+       &cpuid  ();
+       &mov    (&DWP(8,"esi"),"ebx");
+
+&set_label("cacheinfo");
        &cmp    ("edi",4);
        &mov    ("edi",-1);
        &jb     (&label("nocacheinfo"));
@@ -135,6 +147,8 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
        &and    ("esi",0xfeffffff);     # clear FXSR
 &set_label("clear_avx");
        &and    ("ebp",0xefffe7ff);     # clear AVX, FMA and AMD XOP bits
+       &mov    ("edi",&wparam(0));
+       &and    (&DWP(8,"edi"),0xffffffdf);     # clear AVX2
 &set_label("done");
        &mov    ("eax","esi");
        &mov    ("edx","ebp");
@@ -198,7 +212,7 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
 
 &function_begin_B("OPENSSL_far_spin");
        &pushf  ();
-       &pop    ("eax")
+       &pop    ("eax");
        &bt     ("eax",9);
        &jnc    (&label("nospin"));     # interrupts are disabled
 
@@ -353,6 +367,21 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
        &ret    ();
 &function_end_B("OPENSSL_ia32_rdrand");
 
+&function_begin_B("OPENSSL_ia32_rdseed");
+       &mov    ("ecx",8);
+&set_label("loop");
+       &rdseed ("eax");
+       &jc     (&label("break"));
+       &loop   (&label("loop"));
+&set_label("break");
+       &cmp    ("eax",0);
+       &cmove  ("eax","ecx");
+       &ret    ();
+&function_end_B("OPENSSL_ia32_rdseed");
+
 &initseg("OPENSSL_cpuid_setup");
 
+&hidden("OPENSSL_cpuid_setup");
+&hidden("OPENSSL_ia32cap_P");
+
 &asm_finish();
index 4351540..f8c8f03 100644 (file)
@@ -1,7 +1,7 @@
 CC=cc
 CFLAGS= -g -I../../include
-LIBS= -L../.. ../../libssl.a ../../libcrypto.a
-EXAMPLES=saccept sconnect
+LIBS= -L../.. ../../libssl.a ../../libcrypto.a -ldl
+EXAMPLES=saccept sconnect client-arg client-conf
 
 all: $(EXAMPLES) 
 
@@ -11,6 +11,12 @@ saccept: saccept.o
 sconnect: sconnect.o
        $(CC) -o sconnect sconnect.o $(LIBS)
 
+client-arg: client-arg.o
+       $(CC) -o client-arg client-arg.o $(LIBS)
+
+client-conf: client-conf.o
+       $(CC) -o client-conf client-conf.o $(LIBS)
+
 clean: 
        rm -f $(EXAMPLES) *.o
 
index 0b24e5b..a36bb48 100644 (file)
@@ -1,3 +1,7 @@
 This directory contains some simple examples of the use of BIO's
 to simplify socket programming.
 
+The client-conf, server-conf, client-arg and client-conf include examples
+of how to use the SSL_CONF API for configuration file or command line
+processing.
+
diff --git a/demos/bio/accept.cnf b/demos/bio/accept.cnf
new file mode 100644 (file)
index 0000000..e4acea7
--- /dev/null
@@ -0,0 +1,13 @@
+# Example configuration file
+# Port to listen on
+Port = 4433
+# Disable TLS v1.2 for test.
+# Protocol = ALL, -TLSv1.2
+# Only support 3 curves
+Curves = P-521:P-384:P-256
+# Automatic curve selection
+ECDHParameters = Automatic
+# Restricted signature algorithms
+SignatureAlgorithms = RSA+SHA512:ECDSA+SHA512 
+Certificate=server.pem
+PrivateKey=server.pem
diff --git a/demos/bio/client-arg.c b/demos/bio/client-arg.c
new file mode 100644 (file)
index 0000000..dc354ca
--- /dev/null
@@ -0,0 +1,111 @@
+#include <openssl/err.h>
+#include <openssl/ssl.h>
+
+int main(int argc, char **argv)
+{
+    BIO *sbio = NULL, *out = NULL;
+    int len;
+    char tmpbuf[1024];
+    SSL_CTX *ctx;
+    SSL_CONF_CTX *cctx;
+    SSL *ssl;
+    char **args = argv + 1;
+    const char *connect_str = "localhost:4433";
+    int nargs = argc - 1;
+
+    ERR_load_crypto_strings();
+    ERR_load_SSL_strings();
+    SSL_library_init();
+
+    ctx = SSL_CTX_new(SSLv23_client_method());
+    cctx = SSL_CONF_CTX_new();
+    SSL_CONF_CTX_set_flags(cctx, SSL_CONF_FLAG_CLIENT);
+    SSL_CONF_CTX_set_ssl_ctx(cctx, ctx);
+    while (*args && **args == '-') {
+        int rv;
+        /* Parse standard arguments */
+        rv = SSL_CONF_cmd_argv(cctx, &nargs, &args);
+        if (rv == -3) {
+            fprintf(stderr, "Missing argument for %s\n", *args);
+            goto end;
+        }
+        if (rv < 0) {
+            fprintf(stderr, "Error in command %s\n", *args);
+            ERR_print_errors_fp(stderr);
+            goto end;
+        }
+        /* If rv > 0 we processed something so proceed to next arg */
+        if (rv > 0)
+            continue;
+        /* Otherwise application specific argument processing */
+        if (!strcmp(*args, "-connect")) {
+            connect_str = args[1];
+            if (connect_str == NULL) {
+                fprintf(stderr, "Missing -connect argument\n");
+                goto end;
+            }
+            args += 2;
+            nargs -= 2;
+            continue;
+        } else {
+            fprintf(stderr, "Unknown argument %s\n", *args);
+            goto end;
+        }
+    }
+
+    if (!SSL_CONF_CTX_finish(cctx)) {
+        fprintf(stderr, "Finish error\n");
+        ERR_print_errors_fp(stderr);
+        goto err;
+    }
+
+    /*
+     * We'd normally set some stuff like the verify paths and * mode here
+     * because as things stand this will connect to * any server whose
+     * certificate is signed by any CA.
+     */
+
+    sbio = BIO_new_ssl_connect(ctx);
+
+    BIO_get_ssl(sbio, &ssl);
+
+    if (!ssl) {
+        fprintf(stderr, "Can't locate SSL pointer\n");
+        goto end;
+    }
+
+    /* Don't want any retries */
+    SSL_set_mode(ssl, SSL_MODE_AUTO_RETRY);
+
+    /* We might want to do other things with ssl here */
+
+    BIO_set_conn_hostname(sbio, connect_str);
+
+    out = BIO_new_fp(stdout, BIO_NOCLOSE);
+    if (BIO_do_connect(sbio) <= 0) {
+        fprintf(stderr, "Error connecting to server\n");
+        ERR_print_errors_fp(stderr);
+        goto end;
+    }
+
+    if (BIO_do_handshake(sbio) <= 0) {
+        fprintf(stderr, "Error establishing SSL connection\n");
+        ERR_print_errors_fp(stderr);
+        goto end;
+    }
+
+    /* Could examine ssl here to get connection info */
+
+    BIO_puts(sbio, "GET / HTTP/1.0\n\n");
+    for (;;) {
+        len = BIO_read(sbio, tmpbuf, 1024);
+        if (len <= 0)
+            break;
+        BIO_write(out, tmpbuf, len);
+    }
+ end:
+    SSL_CONF_CTX_free(cctx);
+    BIO_free_all(sbio);
+    BIO_free(out);
+    return 0;
+}
diff --git a/demos/bio/client-conf.c b/demos/bio/client-conf.c
new file mode 100644 (file)
index 0000000..150e7fc
--- /dev/null
@@ -0,0 +1,120 @@
+#include <openssl/err.h>
+#include <openssl/ssl.h>
+#include <openssl/conf.h>
+
+int main(int argc, char **argv)
+{
+    BIO *sbio = NULL, *out = NULL;
+    int i, len, rv;
+    char tmpbuf[1024];
+    SSL_CTX *ctx = NULL;
+    SSL_CONF_CTX *cctx = NULL;
+    SSL *ssl = NULL;
+    CONF *conf = NULL;
+    STACK_OF(CONF_VALUE) *sect = NULL;
+    CONF_VALUE *cnf;
+    const char *connect_str = "localhost:4433";
+    long errline = -1;
+
+    ERR_load_crypto_strings();
+    ERR_load_SSL_strings();
+    SSL_library_init();
+
+    conf = NCONF_new(NULL);
+
+    if (NCONF_load(conf, "connect.cnf", &errline) <= 0) {
+        if (errline <= 0)
+            fprintf(stderr, "Error processing config file\n");
+        else
+            fprintf(stderr, "Error on line %ld\n", errline);
+        goto end;
+    }
+
+    sect = NCONF_get_section(conf, "default");
+
+    if (sect == NULL) {
+        fprintf(stderr, "Error retrieving default section\n");
+        goto end;
+    }
+
+    ctx = SSL_CTX_new(SSLv23_client_method());
+    cctx = SSL_CONF_CTX_new();
+    SSL_CONF_CTX_set_flags(cctx, SSL_CONF_FLAG_CLIENT);
+    SSL_CONF_CTX_set_flags(cctx, SSL_CONF_FLAG_FILE);
+    SSL_CONF_CTX_set_ssl_ctx(cctx, ctx);
+    for (i = 0; i < sk_CONF_VALUE_num(sect); i++) {
+        cnf = sk_CONF_VALUE_value(sect, i);
+        rv = SSL_CONF_cmd(cctx, cnf->name, cnf->value);
+        if (rv > 0)
+            continue;
+        if (rv != -2) {
+            fprintf(stderr, "Error processing %s = %s\n",
+                    cnf->name, cnf->value);
+            ERR_print_errors_fp(stderr);
+            goto end;
+        }
+        if (!strcmp(cnf->name, "Connect")) {
+            connect_str = cnf->value;
+        } else {
+            fprintf(stderr, "Unknown configuration option %s\n", cnf->name);
+            goto end;
+        }
+    }
+
+    if (!SSL_CONF_CTX_finish(cctx)) {
+        fprintf(stderr, "Finish error\n");
+        ERR_print_errors_fp(stderr);
+        goto err;
+    }
+
+    /*
+     * We'd normally set some stuff like the verify paths and * mode here
+     * because as things stand this will connect to * any server whose
+     * certificate is signed by any CA.
+     */
+
+    sbio = BIO_new_ssl_connect(ctx);
+
+    BIO_get_ssl(sbio, &ssl);
+
+    if (!ssl) {
+        fprintf(stderr, "Can't locate SSL pointer\n");
+        goto end;
+    }
+
+    /* Don't want any retries */
+    SSL_set_mode(ssl, SSL_MODE_AUTO_RETRY);
+
+    /* We might want to do other things with ssl here */
+
+    BIO_set_conn_hostname(sbio, connect_str);
+
+    out = BIO_new_fp(stdout, BIO_NOCLOSE);
+    if (BIO_do_connect(sbio) <= 0) {
+        fprintf(stderr, "Error connecting to server\n");
+        ERR_print_errors_fp(stderr);
+        goto end;
+    }
+
+    if (BIO_do_handshake(sbio) <= 0) {
+        fprintf(stderr, "Error establishing SSL connection\n");
+        ERR_print_errors_fp(stderr);
+        goto end;
+    }
+
+    /* Could examine ssl here to get connection info */
+
+    BIO_puts(sbio, "GET / HTTP/1.0\n\n");
+    for (;;) {
+        len = BIO_read(sbio, tmpbuf, 1024);
+        if (len <= 0)
+            break;
+        BIO_write(out, tmpbuf, len);
+    }
+ end:
+    SSL_CONF_CTX_free(cctx);
+    BIO_free_all(sbio);
+    BIO_free(out);
+    NCONF_free(conf);
+    return 0;
+}
diff --git a/demos/bio/connect.cnf b/demos/bio/connect.cnf
new file mode 100644 (file)
index 0000000..4dee03c
--- /dev/null
@@ -0,0 +1,9 @@
+# Example configuration file
+# Connects to the default port of s_server
+Connect = localhost:4433
+# Disable TLS v1.2 for test.
+# Protocol = ALL, -TLSv1.2
+# Only support 3 curves
+Curves = P-521:P-384:P-256
+# Restricted signature algorithms
+SignatureAlgorithms = RSA+SHA512:ECDSA+SHA512 
index 8d02610..e79c872 100644 (file)
@@ -2,13 +2,13 @@
 /* demos/bio/saccept.c */
 
 /*-
- * A minimal program to server an SSL connection.
+ * A minimal program to serve an SSL connection.
  * It uses blocking.
  * saccept host:port
  * host is the interface IP to use.  If any interface, use *:port
  * The default it *:4433
  *
- * cc -I../../include saccept.c -L../.. -lssl -lcrypto
+ * cc -I../../include saccept.c -L../.. -lssl -lcrypto -ldl
  */
 
 #include <stdio.h>
@@ -70,8 +70,8 @@ char *argv[];
         goto err;
 
     /*
-     * This means that when a new connection is acceptede on 'in', The
-     * ssl_bio will be 'dupilcated' and have the new socket BIO push into it.
+     * This means that when a new connection is accepted on 'in', The ssl_bio
+     * will be 'duplicated' and have the new socket BIO push into it.
      * Basically it means the SSL BIO will be automatically setup
      */
     BIO_set_accept_bios(in, ssl_bio);
diff --git a/demos/bio/server-arg.c b/demos/bio/server-arg.c
new file mode 100644 (file)
index 0000000..1d0e1db
--- /dev/null
@@ -0,0 +1,144 @@
+/* NOCW */
+/* demos/bio/server-arg.c */
+
+/*
+ * A minimal program to serve an SSL connection. It uses blocking. It use the
+ * SSL_CONF API with the command line. cc -I../../include server-arg.c
+ * -L../.. -lssl -lcrypto -ldl
+ */
+
+#include <stdio.h>
+#include <signal.h>
+#include <openssl/err.h>
+#include <openssl/ssl.h>
+
+int main(int argc, char *argv[])
+{
+    char *port = "*:4433";
+    BIO *ssl_bio, *tmp;
+    SSL_CTX *ctx;
+    SSL_CONF_CTX *cctx;
+    char buf[512];
+    BIO *in = NULL;
+    int ret = 1, i;
+    char **args = argv + 1;
+    int nargs = argc - 1;
+
+    SSL_load_error_strings();
+
+    /* Add ciphers and message digests */
+    OpenSSL_add_ssl_algorithms();
+
+    ctx = SSL_CTX_new(SSLv23_server_method());
+
+    cctx = SSL_CONF_CTX_new();
+    SSL_CONF_CTX_set_flags(cctx, SSL_CONF_FLAG_SERVER);
+    SSL_CONF_CTX_set_flags(cctx, SSL_CONF_FLAG_CERTIFICATE);
+    SSL_CONF_CTX_set_ssl_ctx(cctx, ctx);
+    while (*args && **args == '-') {
+        int rv;
+        /* Parse standard arguments */
+        rv = SSL_CONF_cmd_argv(cctx, &nargs, &args);
+        if (rv == -3) {
+            fprintf(stderr, "Missing argument for %s\n", *args);
+            goto err;
+        }
+        if (rv < 0) {
+            fprintf(stderr, "Error in command %s\n", *args);
+            ERR_print_errors_fp(stderr);
+            goto err;
+        }
+        /* If rv > 0 we processed something so proceed to next arg */
+        if (rv > 0)
+            continue;
+        /* Otherwise application specific argument processing */
+        if (!strcmp(*args, "-port")) {
+            port = args[1];
+            if (port == NULL) {
+                fprintf(stderr, "Missing -port argument\n");
+                goto err;
+            }
+            args += 2;
+            nargs -= 2;
+            continue;
+        } else {
+            fprintf(stderr, "Unknown argument %s\n", *args);
+            goto err;
+        }
+    }
+
+    if (!SSL_CONF_CTX_finish(cctx)) {
+        fprintf(stderr, "Finish error\n");
+        ERR_print_errors_fp(stderr);
+        goto err;
+    }
+#if 0
+    /*
+     * Demo of how to iterate over all certificates in an SSL_CTX structure.
+     */
+    {
+        X509 *x;
+        int rv;
+        rv = SSL_CTX_set_current_cert(ctx, SSL_CERT_SET_FIRST);
+        while (rv) {
+            X509 *x = SSL_CTX_get0_certificate(ctx);
+            X509_NAME_print_ex_fp(stdout, X509_get_subject_name(x), 0,
+                                  XN_FLAG_ONELINE);
+            printf("\n");
+            rv = SSL_CTX_set_current_cert(ctx, SSL_CERT_SET_NEXT);
+        }
+        fflush(stdout);
+    }
+#endif
+    /* Setup server side SSL bio */
+    ssl_bio = BIO_new_ssl(ctx, 0);
+
+    if ((in = BIO_new_accept(port)) == NULL)
+        goto err;
+
+    /*
+     * This means that when a new connection is accepted on 'in', The ssl_bio
+     * will be 'duplicated' and have the new socket BIO push into it.
+     * Basically it means the SSL BIO will be automatically setup
+     */
+    BIO_set_accept_bios(in, ssl_bio);
+
+ again:
+    /*
+     * The first call will setup the accept socket, and the second will get a
+     * socket.  In this loop, the first actual accept will occur in the
+     * BIO_read() function.
+     */
+
+    if (BIO_do_accept(in) <= 0)
+        goto err;
+
+    for (;;) {
+        i = BIO_read(in, buf, 512);
+        if (i == 0) {
+            /*
+             * If we have finished, remove the underlying BIO stack so the
+             * next time we call any function for this BIO, it will attempt
+             * to do an accept
+             */
+            printf("Done\n");
+            tmp = BIO_pop(in);
+            BIO_free_all(tmp);
+            goto again;
+        }
+        if (i < 0)
+            goto err;
+        fwrite(buf, 1, i, stdout);
+        fflush(stdout);
+    }
+
+    ret = 0;
+ err:
+    if (ret) {
+        ERR_print_errors_fp(stderr);
+    }
+    if (in != NULL)
+        BIO_free(in);
+    exit(ret);
+    return (!ret);
+}
diff --git a/demos/bio/server-conf.c b/demos/bio/server-conf.c
new file mode 100644 (file)
index 0000000..a09bc93
--- /dev/null
@@ -0,0 +1,138 @@
+/* NOCW */
+/* demos/bio/saccept-conf.c */
+
+/*
+ * A minimal program to serve an SSL connection. It uses blocking. It uses
+ * the SSL_CONF API with a configuration file. cc -I../../include saccept.c
+ * -L../.. -lssl -lcrypto -ldl
+ */
+
+#include <stdio.h>
+#include <signal.h>
+#include <openssl/err.h>
+#include <openssl/ssl.h>
+#include <openssl/conf.h>
+
+int main(int argc, char *argv[])
+{
+    char *port = "*:4433";
+    BIO *in = NULL;
+    BIO *ssl_bio, *tmp;
+    SSL_CTX *ctx;
+    SSL_CONF_CTX *cctx = NULL;
+    CONF *conf = NULL;
+    STACK_OF(CONF_VALUE) *sect = NULL;
+    CONF_VALUE *cnf;
+    long errline = -1;
+    char buf[512];
+    int ret = 1, i;
+
+    SSL_load_error_strings();
+
+    /* Add ciphers and message digests */
+    OpenSSL_add_ssl_algorithms();
+
+    conf = NCONF_new(NULL);
+
+    if (NCONF_load(conf, "accept.cnf", &errline) <= 0) {
+        if (errline <= 0)
+            fprintf(stderr, "Error processing config file\n");
+        else
+            fprintf(stderr, "Error on line %ld\n", errline);
+        goto err;
+    }
+
+    sect = NCONF_get_section(conf, "default");
+
+    if (sect == NULL) {
+        fprintf(stderr, "Error retrieving default section\n");
+        goto err;
+    }
+
+    ctx = SSL_CTX_new(SSLv23_server_method());
+    cctx = SSL_CONF_CTX_new();
+    SSL_CONF_CTX_set_flags(cctx, SSL_CONF_FLAG_SERVER);
+    SSL_CONF_CTX_set_flags(cctx, SSL_CONF_FLAG_CERTIFICATE);
+    SSL_CONF_CTX_set_flags(cctx, SSL_CONF_FLAG_FILE);
+    SSL_CONF_CTX_set_ssl_ctx(cctx, ctx);
+    for (i = 0; i < sk_CONF_VALUE_num(sect); i++) {
+        int rv;
+        cnf = sk_CONF_VALUE_value(sect, i);
+        rv = SSL_CONF_cmd(cctx, cnf->name, cnf->value);
+        if (rv > 0)
+            continue;
+        if (rv != -2) {
+            fprintf(stderr, "Error processing %s = %s\n",
+                    cnf->name, cnf->value);
+            ERR_print_errors_fp(stderr);
+            goto err;
+        }
+        if (!strcmp(cnf->name, "Port")) {
+            port = cnf->value;
+        } else {
+            fprintf(stderr, "Unknown configuration option %s\n", cnf->name);
+            goto err;
+        }
+    }
+
+    if (!SSL_CONF_CTX_finish(cctx)) {
+        fprintf(stderr, "Finish error\n");
+        ERR_print_errors_fp(stderr);
+        goto err;
+    }
+
+    /* Setup server side SSL bio */
+    ssl_bio = BIO_new_ssl(ctx, 0);
+
+    if ((in = BIO_new_accept(port)) == NULL)
+        goto err;
+
+    /*
+     * This means that when a new connection is accepted on 'in', The ssl_bio
+     * will be 'duplicated' and have the new socket BIO push into it.
+     * Basically it means the SSL BIO will be automatically setup
+     */
+    BIO_set_accept_bios(in, ssl_bio);
+
+ again:
+    /*
+     * The first call will setup the accept socket, and the second will get a
+     * socket.  In this loop, the first actual accept will occur in the
+     * BIO_read() function.
+     */
+
+    if (BIO_do_accept(in) <= 0)
+        goto err;
+
+    for (;;) {
+        i = BIO_read(in, buf, 512);
+        if (i == 0) {
+            /*
+             * If we have finished, remove the underlying BIO stack so the
+             * next time we call any function for this BIO, it will attempt
+             * to do an accept
+             */
+            printf("Done\n");
+            tmp = BIO_pop(in);
+            BIO_free_all(tmp);
+            goto again;
+        }
+        if (i < 0) {
+            if (BIO_should_retry(in))
+                continue;
+            goto err;
+        }
+        fwrite(buf, 1, i, stdout);
+        fflush(stdout);
+    }
+
+    ret = 0;
+ err:
+    if (ret) {
+        ERR_print_errors_fp(stderr);
+    }
+    if (in != NULL)
+        BIO_free(in);
+    exit(ret);
+    return (!ret);
+}
index 5cf1387..d0fc265 100644 (file)
@@ -1,30 +1,52 @@
-subject=/C=AU/SP=QLD/O=Mincom Pty. Ltd./OU=CS/CN=SSLeay demo server
-issuer= /C=AU/SP=QLD/O=Mincom Pty. Ltd./OU=CS/CN=CA
------BEGIN X509 CERTIFICATE-----
-
-MIIBgjCCASwCAQQwDQYJKoZIhvcNAQEEBQAwODELMAkGA1UEBhMCQVUxDDAKBgNV
-BAgTA1FMRDEbMBkGA1UEAxMSU1NMZWF5L3JzYSB0ZXN0IENBMB4XDTk1MTAwOTIz
-MzIwNVoXDTk4MDcwNTIzMzIwNVowYDELMAkGA1UEBhMCQVUxDDAKBgNVBAgTA1FM
-RDEZMBcGA1UEChMQTWluY29tIFB0eS4gTHRkLjELMAkGA1UECxMCQ1MxGzAZBgNV
-BAMTElNTTGVheSBkZW1vIHNlcnZlcjBcMA0GCSqGSIb3DQEBAQUAA0sAMEgCQQC3
-LCXcScWua0PFLkHBLm2VejqpA1F4RQ8q0VjRiPafjx/Z/aWH3ipdMVvuJGa/wFXb
-/nDFLDlfWp+oCPwhBtVPAgMBAAEwDQYJKoZIhvcNAQEEBQADQQArNFsihWIjBzb0
-DCsU0BvL2bvSwJrPEqFlkDq3F4M6EGutL9axEcANWgbbEdAvNJD1dmEmoWny27Pn
-IMs6ZOZB
------END X509 CERTIFICATE-----
+subject= C = UK, O = OpenSSL Group, OU = FOR TESTING PURPOSES ONLY, CN = Test Server Cert
+issuer= C = UK, O = OpenSSL Group, OU = FOR TESTING PURPOSES ONLY, CN = OpenSSL Test Intermediate CA
+-----BEGIN CERTIFICATE-----
+MIID5zCCAs+gAwIBAgIJALnu1NlVpZ6zMA0GCSqGSIb3DQEBBQUAMHAxCzAJBgNV
+BAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMSIwIAYDVQQLDBlGT1IgVEVT
+VElORyBQVVJQT1NFUyBPTkxZMSUwIwYDVQQDDBxPcGVuU1NMIFRlc3QgSW50ZXJt
+ZWRpYXRlIENBMB4XDTExMTIwODE0MDE0OFoXDTIxMTAxNjE0MDE0OFowZDELMAkG
+A1UEBhMCVUsxFjAUBgNVBAoMDU9wZW5TU0wgR3JvdXAxIjAgBgNVBAsMGUZPUiBU
+RVNUSU5HIFBVUlBPU0VTIE9OTFkxGTAXBgNVBAMMEFRlc3QgU2VydmVyIENlcnQw
+ggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDzhPOSNtyyRspmeuUpxfNJ
+KCLTuf7g3uQ4zu4iHOmRO5TQci+HhVlLZrHF9XqFXcIP0y4pWDbMSGuiorUmzmfi
+R7bfSdI/+qIQt8KXRH6HNG1t8ou0VSvWId5TS5Dq/er5ODUr9OaaDva7EquHIcMv
+vPQGuI+OEAcnleVCy9HVEIySrO4P3CNIicnGkwwiAud05yUAq/gPXBC1hTtmlPD7
+TVcGVSEiJdvzqqlgv02qedGrkki6GY4S7GjZxrrf7Foc2EP+51LJzwLQx3/JfrCU
+41NEWAsu/Sl0tQabXESN+zJ1pDqoZ3uHMgpQjeGiE0olr+YcsSW/tJmiU9OiAr8R
+AgMBAAGjgY8wgYwwDAYDVR0TAQH/BAIwADAOBgNVHQ8BAf8EBAMCBeAwLAYJYIZI
+AYb4QgENBB8WHU9wZW5TU0wgR2VuZXJhdGVkIENlcnRpZmljYXRlMB0GA1UdDgQW
+BBSCvM8AABPR9zklmifnr9LvIBturDAfBgNVHSMEGDAWgBQ2w2yI55X+sL3szj49
+hqshgYfa2jANBgkqhkiG9w0BAQUFAAOCAQEAqb1NV0B0/pbpK9Z4/bNjzPQLTRLK
+WnSNm/Jh5v0GEUOE/Beg7GNjNrmeNmqxAlpqWz9qoeoFZax+QBpIZYjROU3TS3fp
+yLsrnlr0CDQ5R7kCCDGa8dkXxemmpZZLbUCpW2Uoy8sAA4JjN9OtsZY7dvUXFgJ7
+vVNTRnI01ghknbtD+2SxSQd3CWF6QhcRMAzZJ1z1cbbwGDDzfvGFPzJ+Sq+zEPds
+xoVLLSetCiBc+40ZcDS5dV98h9XD7JMTQfxzA7mNGv73JoZJA6nFgj+ADSlJsY/t
+JBv+z1iQRueoh9Qeee+ZbRifPouCB8FDx+AltvHTANdAq0t/K3o+pplMVA==
+-----END CERTIFICATE-----
 -----BEGIN RSA PRIVATE KEY-----
-
-MIIBPAIBAAJBALcsJdxJxa5rQ8UuQcEubZV6OqkDUXhFDyrRWNGI9p+PH9n9pYfe
-Kl0xW+4kZr/AVdv+cMUsOV9an6gI/CEG1U8CAwEAAQJAXJMBZ34ZXHd1vtgL/3hZ
-hexKbVTx/djZO4imXO/dxPGRzG2ylYZpHmG32/T1kaHpZlCHoEPgHoSzmxYXfxjG
-sQIhAPmZ/bQOjmRUHM/VM2X5zrjjM6z18R1P6l3ObFwt9FGdAiEAu943Yh9SqMRw
-tL0xHGxKmM/YJueUw1gB6sLkETN71NsCIQCeT3RhoqXfrpXDoEcEU+gwzjI1bpxq
-agiNTOLfqGoA5QIhAIQFYjgzONxex7FLrsKBm16N2SFl5pXsN9SpRqqL2n63AiEA
-g9VNIQ3xwpw7og3IbONifeku+J9qGMGQJMKwSTwrFtI=
+MIIEpAIBAAKCAQEA84TzkjbcskbKZnrlKcXzSSgi07n+4N7kOM7uIhzpkTuU0HIv
+h4VZS2axxfV6hV3CD9MuKVg2zEhroqK1Js5n4ke230nSP/qiELfCl0R+hzRtbfKL
+tFUr1iHeU0uQ6v3q+Tg1K/Tmmg72uxKrhyHDL7z0BriPjhAHJ5XlQsvR1RCMkqzu
+D9wjSInJxpMMIgLndOclAKv4D1wQtYU7ZpTw+01XBlUhIiXb86qpYL9NqnnRq5JI
+uhmOEuxo2ca63+xaHNhD/udSyc8C0Md/yX6wlONTRFgLLv0pdLUGm1xEjfsydaQ6
+qGd7hzIKUI3hohNKJa/mHLElv7SZolPTogK/EQIDAQABAoIBAADq9FwNtuE5IRQn
+zGtO4q7Y5uCzZ8GDNYr9RKp+P2cbuWDbvVAecYq2NV9QoIiWJOAYZKklOvekIju3
+r0UZLA0PRiIrTg6NrESx3JrjWDK8QNlUO7CPTZ39/K+FrmMkV9lem9yxjJjyC34D
+AQB+YRTx+l14HppjdxNwHjAVQpIx/uO2F5xAMuk32+3K+pq9CZUtrofe1q4Agj9R
+5s8mSy9pbRo9kW9wl5xdEotz1LivFOEiqPUJTUq5J5PeMKao3vdK726XI4Z455Nm
+W2/MA0YV0ug2FYinHcZdvKM6dimH8GLfa3X8xKRfzjGjTiMSwsdjgMa4awY3tEHH
+674jhAECgYEA/zqMrc0zsbNk83sjgaYIug5kzEpN4ic020rSZsmQxSCerJTgNhmg
+utKSCt0Re09Jt3LqG48msahX8ycqDsHNvlEGPQSbMu9IYeO3Wr3fAm75GEtFWePY
+BhM73I7gkRt4s8bUiUepMG/wY45c5tRF23xi8foReHFFe9MDzh8fJFECgYEA9EFX
+4qAik1pOJGNei9BMwmx0I0gfVEIgu0tzeVqT45vcxbxr7RkTEaDoAG6PlbWP6D9a
+WQNLp4gsgRM90ZXOJ4up5DsAWDluvaF4/omabMA+MJJ5kGZ0gCj5rbZbKqUws7x8
+bp+6iBfUPJUbcqNqFmi/08Yt7vrDnMnyMw2A/sECgYEAiiuRMxnuzVm34hQcsbhH
+6ymVqf7j0PW2qK0F4H1ocT9qhzWFd+RB3kHWrCjnqODQoI6GbGr/4JepHUpre1ex
+4UEN5oSS3G0ru0rC3U4C59dZ5KwDHFm7ffZ1pr52ljfQDUsrjjIMRtuiwNK2OoRa
+WSsqiaL+SDzSB+nBmpnAizECgYBdt/y6rerWUx4MhDwwtTnel7JwHyo2MDFS6/5g
+n8qC2Lj6/fMDRE22w+CA2esp7EJNQJGv+b27iFpbJEDh+/Lf5YzIT4MwVskQ5bYB
+JFcmRxUVmf4e09D7o705U/DjCgMH09iCsbLmqQ38ONIRSHZaJtMDtNTHD1yi+jF+
+OT43gQKBgQC/2OHZoko6iRlNOAQ/tMVFNq7fL81GivoQ9F1U0Qr+DH3ZfaH8eIkX
+xT0ToMPJUzWAn8pZv0snA0um6SIgvkCuxO84OkANCVbttzXImIsL7pFzfcwV/ERK
+UM6j0ZuSMFOCr/lGPAoOQU0fskidGEHi1/kW+suSr28TqsyYZpwBDQ==
 -----END RSA PRIVATE KEY-----
-
------BEGIN DH PARAMETERS-----
-MEYCQQDaWDwW2YUiidDkr3VvTMqS3UvlM7gE+w/tlO+cikQD7VdGUNNpmdsp13Yn
-a6LT1BLiGPTdHghM9tgAPnxHdOgzAgEC
------END DH PARAMETERS-----
-
index c564e86..ccce29e 100644 (file)
@@ -10,13 +10,19 @@ c_rehash - Create symbolic links to files named by the hash values
 =head1 SYNOPSIS
 
 B<c_rehash>
+B<[-old]>
+B<[-h]>
+B<[-n]>
+B<[-v]>
 [ I<directory>...]
 
 =head1 DESCRIPTION
 
-B<c_rehash> scans directories and calculates a hash value of each C<.pem>
+B<c_rehash> scans directories and calculates a hash value of each
+C<.pem>, C<.crt>, C<.cer>, or C<.crl>
 file in the specified directory list and creates symbolic links
 for each file, where the name of the link is the hash value.
+(If the platform does not support symbolic links, a copy is made.)
 This utility is useful as many programs that use OpenSSL require
 directories to be set up like this in order to find certificates.
 
@@ -34,6 +40,7 @@ is a hexadecimal character and B<D> is a single decimal digit.
 When processing a directory, B<c_rehash> will first remove all links
 that have a name in that syntax. If you have links in that format
 used for other purposes, they will be removed.
+To skip the removal step, use the B<-n> flag.
 Hashes for CRL's look similar except the letter B<r> appears after
 the period, like this: C<HHHHHHHH.rD>.
 
@@ -42,7 +49,7 @@ incrementing the B<D> value. Duplicates are found by comparing the
 full SHA-1 fingerprint. A warning will be displayed if a duplicate
 is found.
 
-A warning will also be displayed if there are B<.pem> files that
+A warning will also be displayed if there are files that
 cannot be parsed as either a certificate or a CRL.
 
 The program uses the B<openssl> program to compute the hashes and
@@ -51,13 +58,39 @@ B<OPENSSL> environment variable to the full pathname.
 Any program can be used, it will be invoked as follows for either
 a certificate or CRL:
 
-  $OPENSSL x509 -hash -fingerprint -noout -in FFFFFF
-  $OPENSSL crl -hash -fingerprint -noout -in FFFFFF
+  $OPENSSL x509 -hash -fingerprint -noout -in FILENAME
+  $OPENSSL crl -hash -fingerprint -noout -in FILENAME
 
-where B<FFFFFF> is the filename. It must output the hash of the
+where B<FILENAME> is the filename. It must output the hash of the
 file on the first line, and the fingerprint on the second,
 optionally prefixed with some text and an equals sign.
 
+=head1 OPTIONS
+
+=over 4
+
+=item B<-old>
+
+Use old-style hashing (MD5, as opposed to SHA-1) for generating
+links for releases before 1.0.0.  Note that current versions will
+not use the old style.
+
+=item B<-h>
+
+Display a brief usage message.
+
+=item B<-n>
+
+Do not remove existing links.
+This is needed when keeping new and old-style links in the same directory.
+
+=item B<-v>
+
+Print messages about old links removed and new links created.
+By default, B<c_rehash> only lists each directory as it is processed.
+
+=back
+
 =head1 ENVIRONMENT
 
 =over
index 0aa1bad..1c26e3b 100644 (file)
@@ -175,14 +175,14 @@ cipher suites using RSA key exchange.
 =item B<kDHr>, B<kDHd>, B<kDH>
 
 cipher suites using DH key agreement and DH certificates signed by CAs with RSA
-and DSS keys or either respectively. Not implemented.
+and DSS keys or either respectively.
 
-=item B<kEDH>
+=item B<kDHE>, B<kEDH>
 
 cipher suites using ephemeral DH key agreement, including anonymous cipher
 suites.
 
-=item B<EDH>
+=item B<DHE>, B<EDH>
 
 cipher suites using authenticated ephemeral DH key agreement.
 
@@ -200,12 +200,12 @@ cipher suites using DH, including anonymous DH, ephemeral DH and fixed DH.
 cipher suites using fixed ECDH key agreement signed by CAs with RSA and ECDSA
 keys or either respectively.
 
-=item B<kEECDH>
+=item B<kECDHE>, B<kEECDH>
 
 cipher suites using ephemeral ECDH key agreement, including anonymous
 cipher suites.
 
-=item B<EECDHE>
+=item B<ECDHE>, B<EECDH>
 
 cipher suites using authenticated ephemeral ECDH key agreement.
 
@@ -229,7 +229,7 @@ cipher suites using DSS authentication, i.e. the certificates carry DSS keys.
 =item B<aDH>
 
 cipher suites effectively using DH authentication, i.e. the certificates carry
-DH keys.  Not implemented.
+DH keys.
 
 =item B<aECDH>
 
@@ -331,6 +331,18 @@ cipher suites using GOST 28147-89 MAC B<instead of> HMAC.
 
 cipher suites using pre-shared keys (PSK).
 
+=item B<SUITEB128>, B<SUITEB128ONLY>, B<SUITEB192>
+
+enables suite B mode operation using 128 (permitting 192 bit mode by peer)
+128 bit (not permitting 192 bit by peer) or 192 bit level of security
+respectively. If used these cipherstrings should appear first in the cipher
+list and anything after them is ignored. Setting Suite B mode has additional
+consequences required to comply with RFC6460. In particular the supported
+signature algorithms is reduced to support only ECDSA and SHA256 or SHA384,
+only the elliptic curves P-256 and P-384 can be used and only the two suite B
+compliant ciphersuites (ECDHE-ECDSA-AES128-GCM-SHA256 and
+ECDHE-ECDSA-AES256-GCM-SHA384) are permissible.
+
 =back
 
 =head1 CIPHER SUITE NAMES
@@ -353,12 +365,10 @@ e.g. DES-CBC3-SHA. In these cases, RSA authentication is used.
  SSL_RSA_WITH_DES_CBC_SHA                DES-CBC-SHA
  SSL_RSA_WITH_3DES_EDE_CBC_SHA           DES-CBC3-SHA
 
- SSL_DH_DSS_EXPORT_WITH_DES40_CBC_SHA    Not implemented.
- SSL_DH_DSS_WITH_DES_CBC_SHA             Not implemented.
- SSL_DH_DSS_WITH_3DES_EDE_CBC_SHA        Not implemented.
- SSL_DH_RSA_EXPORT_WITH_DES40_CBC_SHA    Not implemented.
- SSL_DH_RSA_WITH_DES_CBC_SHA             Not implemented.
- SSL_DH_RSA_WITH_3DES_EDE_CBC_SHA        Not implemented.
+ SSL_DH_DSS_WITH_DES_CBC_SHA             DH-DSS-DES-CBC-SHA
+ SSL_DH_DSS_WITH_3DES_EDE_CBC_SHA        DH-DSS-DES-CBC3-SHA
+ SSL_DH_RSA_WITH_DES_CBC_SHA             DH-RSA-DES-CBC-SHA
+ SSL_DH_RSA_WITH_3DES_EDE_CBC_SHA        DH-RSA-DES-CBC3-SHA
  SSL_DHE_DSS_EXPORT_WITH_DES40_CBC_SHA   EXP-EDH-DSS-DES-CBC-SHA
  SSL_DHE_DSS_WITH_DES_CBC_SHA            EDH-DSS-CBC-SHA
  SSL_DHE_DSS_WITH_3DES_EDE_CBC_SHA       EDH-DSS-DES-CBC3-SHA
@@ -413,10 +423,10 @@ e.g. DES-CBC3-SHA. In these cases, RSA authentication is used.
  TLS_RSA_WITH_AES_128_CBC_SHA            AES128-SHA
  TLS_RSA_WITH_AES_256_CBC_SHA            AES256-SHA
 
- TLS_DH_DSS_WITH_AES_128_CBC_SHA         Not implemented.
- TLS_DH_DSS_WITH_AES_256_CBC_SHA         Not implemented.
- TLS_DH_RSA_WITH_AES_128_CBC_SHA         Not implemented.
- TLS_DH_RSA_WITH_AES_256_CBC_SHA         Not implemented.
+ TLS_DH_DSS_WITH_AES_128_CBC_SHA         DH-DSS-AES128-SHA
+ TLS_DH_DSS_WITH_AES_256_CBC_SHA         DH-DSS-AES256-SHA
+ TLS_DH_RSA_WITH_AES_128_CBC_SHA         DH-RSA-AES128-SHA
+ TLS_DH_RSA_WITH_AES_256_CBC_SHA         DH-RSA-AES256-SHA
 
  TLS_DHE_DSS_WITH_AES_128_CBC_SHA        DHE-DSS-AES128-SHA
  TLS_DHE_DSS_WITH_AES_256_CBC_SHA        DHE-DSS-AES256-SHA
@@ -431,10 +441,10 @@ e.g. DES-CBC3-SHA. In these cases, RSA authentication is used.
  TLS_RSA_WITH_CAMELLIA_128_CBC_SHA      CAMELLIA128-SHA
  TLS_RSA_WITH_CAMELLIA_256_CBC_SHA      CAMELLIA256-SHA
 
- TLS_DH_DSS_WITH_CAMELLIA_128_CBC_SHA   Not implemented.
- TLS_DH_DSS_WITH_CAMELLIA_256_CBC_SHA   Not implemented.
- TLS_DH_RSA_WITH_CAMELLIA_128_CBC_SHA   Not implemented.
- TLS_DH_RSA_WITH_CAMELLIA_256_CBC_SHA   Not implemented.
+ TLS_DH_DSS_WITH_CAMELLIA_128_CBC_SHA   DH-DSS-CAMELLIA128-SHA
+ TLS_DH_DSS_WITH_CAMELLIA_256_CBC_SHA   DH-DSS-CAMELLIA256-SHA
+ TLS_DH_RSA_WITH_CAMELLIA_128_CBC_SHA   DH-RSA-CAMELLIA128-SHA
+ TLS_DH_RSA_WITH_CAMELLIA_256_CBC_SHA   DH-RSA-CAMELLIA256-SHA
 
  TLS_DHE_DSS_WITH_CAMELLIA_128_CBC_SHA  DHE-DSS-CAMELLIA128-SHA
  TLS_DHE_DSS_WITH_CAMELLIA_256_CBC_SHA  DHE-DSS-CAMELLIA256-SHA
@@ -448,8 +458,8 @@ e.g. DES-CBC3-SHA. In these cases, RSA authentication is used.
 
  TLS_RSA_WITH_SEED_CBC_SHA              SEED-SHA
 
- TLS_DH_DSS_WITH_SEED_CBC_SHA           Not implemented.
- TLS_DH_RSA_WITH_SEED_CBC_SHA           Not implemented.
+ TLS_DH_DSS_WITH_SEED_CBC_SHA           DH-DSS-SEED-SHA
+ TLS_DH_RSA_WITH_SEED_CBC_SHA           DH-RSA-SEED-SHA
 
  TLS_DHE_DSS_WITH_SEED_CBC_SHA          DHE-DSS-SEED-SHA
  TLS_DHE_RSA_WITH_SEED_CBC_SHA          DHE-RSA-SEED-SHA
@@ -517,15 +527,15 @@ Note: these ciphers can also be used in SSL v3.
  TLS_RSA_WITH_AES_128_GCM_SHA256           AES128-GCM-SHA256
  TLS_RSA_WITH_AES_256_GCM_SHA384           AES256-GCM-SHA384
 
- TLS_DH_RSA_WITH_AES_128_CBC_SHA256        Not implemented.
- TLS_DH_RSA_WITH_AES_256_CBC_SHA256        Not implemented.
- TLS_DH_RSA_WITH_AES_128_GCM_SHA256        Not implemented.
- TLS_DH_RSA_WITH_AES_256_GCM_SHA384        Not implemented.
+ TLS_DH_RSA_WITH_AES_128_CBC_SHA256        DH-RSA-AES128-SHA256
+ TLS_DH_RSA_WITH_AES_256_CBC_SHA256        DH-RSA-AES256-SHA256
+ TLS_DH_RSA_WITH_AES_128_GCM_SHA256        DH-RSA-AES128-GCM-SHA256
+ TLS_DH_RSA_WITH_AES_256_GCM_SHA384        DH-RSA-AES256-GCM-SHA384
 
- TLS_DH_DSS_WITH_AES_128_CBC_SHA256        Not implemented.
- TLS_DH_DSS_WITH_AES_256_CBC_SHA256        Not implemented.
- TLS_DH_DSS_WITH_AES_128_GCM_SHA256        Not implemented.
- TLS_DH_DSS_WITH_AES_256_GCM_SHA384        Not implemented.
+ TLS_DH_DSS_WITH_AES_128_CBC_SHA256        DH-DSS-AES128-SHA256
+ TLS_DH_DSS_WITH_AES_256_CBC_SHA256        DH-DSS-AES256-SHA256
+ TLS_DH_DSS_WITH_AES_128_GCM_SHA256        DH-DSS-AES128-GCM-SHA256
+ TLS_DH_DSS_WITH_AES_256_GCM_SHA384        DH-DSS-AES256-GCM-SHA384
 
  TLS_DHE_RSA_WITH_AES_128_CBC_SHA256       DHE-RSA-AES128-SHA256
  TLS_DHE_RSA_WITH_AES_256_CBC_SHA256       DHE-RSA-AES256-SHA256
@@ -581,9 +591,6 @@ Note: these ciphers can also be used in SSL v3.
 
 =head1 NOTES
 
-The non-ephemeral DH modes are currently unimplemented in OpenSSL
-because there is no support for DH certificates.
-
 Some compiled versions of OpenSSL may not include all the ciphers
 listed here because some ciphers were excluded at compile time.
 
index 9a24082..4eaedbc 100644 (file)
@@ -58,6 +58,7 @@ B<openssl> B<cms>
 [B<-secretkeyid id>]
 [B<-econtent_type type>]
 [B<-inkey file>]
+[B<-keyopt name:parameter>]
 [B<-passin arg>]
 [B<-rand file(s)>]
 [B<cert.pem...>]
@@ -322,8 +323,13 @@ verification was successful.
 
 =item B<-recip file>
 
-the recipients certificate when decrypting a message. This certificate
-must match one of the recipients of the message or an error occurs.
+when decrypting a message this specifies the recipients certificate. The
+certificate must match one of the recipients of the message or an error
+occurs.
+
+When encrypting a message this option may be used multiple times to specify
+each recipient. This form B<must> be used if customised parameters are
+required (for example to specify RSA-OAEP).
 
 =item B<-keyid>
 
@@ -382,6 +388,13 @@ private key must be included in the certificate file specified with
 the B<-recip> or B<-signer> file. When signing this option can be used
 multiple times to specify successive keys.
 
+=item B<-keyopt name:opt>
+
+for signing and encryption this option can be used multiple times to
+set customised parameters for the preceding key or certificate. It can
+currently be used to set RSA-PSS for signing, RSA-OAEP for encryption
+or to modify default parameters for ECDH.
+
 =item B<-passin arg>
 
 the private key password source. For more information about the format of B<arg>
@@ -509,6 +522,10 @@ The B<-compress> option.
 
 The B<-secretkey> option when used with B<-encrypt>.
 
+The use of PSS with B<-sign>.
+
+The use of OAEP or non-RSA keys with B<-encrypt>.
+
 Additionally the B<-EncryptedData_create> and B<-data_create> type cannot
 be processed by the older B<smime> command.
 
@@ -589,6 +606,21 @@ Add a signer to an existing message:
 
  openssl cms -resign -in mail.msg -signer newsign.pem -out mail2.msg
 
+Sign mail using RSA-PSS:
+
+ openssl cms -sign -in message.txt -text -out mail.msg \
+       -signer mycert.pem -keyopt rsa_padding_mode:pss
+
+Create encrypted mail using RSA-OAEP:
+
+ openssl cms -encrypt -in plain.txt -out mail.msg \
+       -recip cert.pem -keyopt rsa_padding_mode:oaep
+
+Use SHA256 KDF with an ECDH certificate:
+
+ openssl cms -encrypt -in plain.txt -out mail.msg \
+       -recip ecdhcert.pem -keyopt ecdh_kdf_md:sha256
+
 =head1 BUGS
 
 The MIME parser isn't very clever: it seems to handle most messages that I've
@@ -614,7 +646,16 @@ No revocation checking is done on the signer's certificate.
 The use of multiple B<-signer> options and the B<-resign> command were first
 added in OpenSSL 1.0.0
 
+The B<keyopt> option was first added in OpenSSL 1.1.0
+
+The use of B<-recip> to specify the recipient when encrypting mail was first
+added to OpenSSL 1.1.0
+
+Support for RSA-OAEP and RSA-PSS was first added to OpenSSL 1.1.0. 
+
+The use of non-RSA keys with B<-encrypt> and B<-decrypt> was first added
+to OpenSSL 1.1.0.
 
-The -no_alt_chains options was first added to OpenSSL 1.0.1n and 1.0.2b.
+The -no_alt_chains options was first added to OpenSSL 1.0.2b.
 
 =cut
index c74d097..929edcd 100644 (file)
@@ -128,6 +128,15 @@ The number of bits in the prime parameter B<p>.
 
 The value to use for the generator B<g>.
 
+=item B<dh_rfc5114:num>
+
+If this option is set then the appropriate RFC5114 parameters are used
+instead of generating new parameters. The value B<num> can take the
+values 1, 2 or 3 corresponding to RFC5114 DH parameters consisting of
+1024 bit group with 160 bit subgroup, 2048 bit group with 224 bit subgroup
+and 2048 bit group with 256 bit subgroup as mentioned in RFC5114 sections
+2.1, 2.2 and 2.3 respectively.
+
 =back
 
 =head1 EC PARAMETER GENERATION OPTIONS
@@ -206,6 +215,10 @@ Generate 1024 bit DH parameters:
  openssl genpkey -genparam -algorithm DH -out dhp.pem \
                                        -pkeyopt dh_paramgen_prime_len:1024
 
+Output RFC5114 2048 bit DH parameters with 224 bit subgroup:
+
+ openssl genpkey -genparam -algorithm DH -out dhp.pem -pkeyopt dh_rfc5114:2
+
 Generate DH key from parameters:
 
  openssl genpkey -paramfile dhp.pem -out dhkey.pem 
index fdb900c..4639502 100644 (file)
@@ -387,6 +387,6 @@ second file.
 
 =head1 HISTORY
 
-The -no_alt_chains options was first added to OpenSSL 1.0.1n and 1.0.2b.
+The -no_alt_chains options was first added to OpenSSL 1.0.2b.
 
 =cut
index 84abee7..6901f1f 100644 (file)
@@ -20,6 +20,7 @@ B<openssl> B<pkcs8>
 [B<-embed>]
 [B<-nsdb>]
 [B<-v2 alg>]
+[B<-v2prf alg>]
 [B<-v1 alg>]
 [B<-engine id>]
 
@@ -118,6 +119,12 @@ private keys with OpenSSL then this doesn't matter.
 The B<alg> argument is the encryption algorithm to use, valid values include
 B<des>, B<des3> and B<rc2>. It is recommended that B<des3> is used.
 
+=item B<-v2prf alg>
+
+This option sets the PRF algorithm to use with PKCS#5 v2.0. A typical value
+values would be B<hmacWithSHA256>. If this option isn't set then the default
+for the cipher is used or B<hmacWithSHA1> if there is no default.
+
 =item B<-v1 alg>
 
 This option specifies a PKCS#5 v1.5 or PKCS#12 algorithm to use. A complete
@@ -195,6 +202,11 @@ DES:
 
  openssl pkcs8 -in key.pem -topk8 -v2 des3 -out enckey.pem
 
+Convert a private from traditional to PKCS#5 v2.0 format using AES with
+256 bits in CBC mode and B<hmacWithSHA256> PRF:
+
+ openssl pkcs8 -in key.pem -topk8 -v2 aes-256-cbc -v2prf hmacWithSHA256 -out enckey.pem
+
 Convert a private key to PKCS#8 using a PKCS#5 1.5 compatible algorithm
 (DES):
 
index 0730d11..df68cb0 100644 (file)
@@ -235,8 +235,8 @@ this option outputs a self signed certificate instead of a certificate
 request. This is typically used to generate a test certificate or
 a self signed root CA. The extensions added to the certificate
 (if any) are specified in the configuration file. Unless specified
-using the B<set_serial> option B<0> will be used for the serial
-number.
+using the B<set_serial> option, a large random number will be used for
+the serial number.
 
 =item B<-days n>
 
index d92ec93..84d0527 100644 (file)
@@ -38,6 +38,9 @@ B<openssl> B<s_client>
 [B<-no_ssl2>]
 [B<-no_ssl3>]
 [B<-no_tls1>]
+[B<-no_tls1_1>]
+[B<-no_tls1_2>]
+[B<-fallback_scsv>]
 [B<-bugs>]
 [B<-cipher cipherlist>]
 [B<-serverpref>]
@@ -48,6 +51,7 @@ B<openssl> B<s_client>
 [B<-sess_out filename>]
 [B<-sess_in filename>]
 [B<-rand file(s)>]
+[B<-serverinfo types>]
 [B<-status>]
 [B<-nextprotoneg protocols>]
 
@@ -197,16 +201,19 @@ Use the PSK key B<key> when using a PSK cipher suite. The key is
 given as a hexadecimal number without leading 0x, for example -psk
 1a2b3c4d.
 
-=item B<-ssl2>, B<-ssl3>, B<-tls1>, B<-no_ssl2>, B<-no_ssl3>, B<-no_tls1>
+=item B<-ssl2>, B<-ssl3>, B<-tls1>, B<-no_ssl2>, B<-no_ssl3>, B<-no_tls1>, B<-no_tls1_1>, B<-no_tls1_2>
 
 these options disable the use of certain SSL or TLS protocols. By default
 the initial handshake uses a method which should be compatible with all
 servers and permit them to use SSL v3, SSL v2 or TLS as appropriate.
 
-Unfortunately there are a lot of ancient and broken servers in use which
+Unfortunately there are still ancient and broken servers in use which
 cannot handle this technique and will fail to connect. Some servers only
-work if TLS is turned off with the B<-no_tls> option others will only
-support SSL v2 and may need the B<-ssl2> option.
+work if TLS is turned off.
+
+=item B<-fallback_scsv>
+
+Send TLS_FALLBACK_SCSV in the ClientHello.
 
 =item B<-bugs>
 
@@ -262,6 +269,13 @@ Multiple files can be specified separated by a OS-dependent character.
 The separator is B<;> for MS-Windows, B<,> for OpenVMS, and B<:> for
 all others.
 
+=item B<-serverinfo types>
+
+a list of comma-separated TLS Extension Types (numbers between 0 and 
+65535).  Each type will be sent as an empty ClientHello TLS Extension.
+The server's response (if any) will be encoded and displayed as a PEM
+file.
+
 =item B<-status>
 
 sends a certificate status request to the server (OCSP stapling). The server
@@ -350,6 +364,6 @@ L<sess_id(1)|sess_id(1)>, L<s_server(1)|s_server(1)>, L<ciphers(1)|ciphers(1)>
 
 =head1 HISTORY
 
-The -no_alt_chains options was first added to OpenSSL 1.0.1n and 1.0.2b.
+The -no_alt_chains options was first added to OpenSSL 1.0.2b.
 
 =cut
index 491038e..baca779 100644 (file)
@@ -46,7 +46,6 @@ B<openssl> B<s_server>
 [B<-no_ssl3>]
 [B<-no_tls1>]
 [B<-no_dhe>]
-[B<-no_ecdhe>]
 [B<-bugs>]
 [B<-hack>]
 [B<-www>]
@@ -57,6 +56,8 @@ B<openssl> B<s_server>
 [B<-no_ticket>]
 [B<-id_prefix arg>]
 [B<-rand file(s)>]
+[B<-serverinfo file>]
+[B<-no_resumption_on_reneg>]
 [B<-status>]
 [B<-status_verbose>]
 [B<-status_timeout nsec>]
@@ -139,11 +140,6 @@ a static set of parameters hard coded into the s_server program will be used.
 if this option is set then no DH parameters will be loaded effectively
 disabling the ephemeral DH cipher suites.
 
-=item B<-no_ecdhe>
-
-if this option is set then no ECDH parameters will be loaded effectively
-disabling the ephemeral ECDH cipher suites.
-
 =item B<-no_tmp_rsa>
 
 certain export cipher suites sometimes use a temporary RSA key, this option
@@ -300,6 +296,18 @@ Multiple files can be specified separated by a OS-dependent character.
 The separator is B<;> for MS-Windows, B<,> for OpenVMS, and B<:> for
 all others.
 
+=item B<-serverinfo file>
+
+a file containing one or more blocks of PEM data.  Each PEM block
+must encode a TLS ServerHello extension (2 bytes type, 2 bytes length,
+followed by "length" bytes of extension data).  If the client sends
+an empty TLS ClientHello extension matching the type, the corresponding
+ServerHello extension will be returned.
+
+=item B<-no_resumption_on_reneg>
+
+set SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION flag.
+
 =item B<-status>
 
 enables certificate status request support (aka OCSP stapling).
@@ -405,6 +413,6 @@ L<sess_id(1)|sess_id(1)>, L<s_client(1)|s_client(1)>, L<ciphers(1)|ciphers(1)>
 
 =head1 HISTORY
 
-The -no_alt_chains options was first added to OpenSSL 1.0.1n and 1.0.2b.
+The -no_alt_chains options was first added to OpenSSL 1.0.2b.
 
 =cut
index 94a8823..d5618c8 100644 (file)
@@ -442,6 +442,6 @@ structures may cause parsing errors.
 The use of multiple B<-signer> options and the B<-resign> command were first
 added in OpenSSL 1.0.0
 
-The -no_alt_chains options was first added to OpenSSL 1.0.1n and 1.0.2b.
+The -no_alt_chains options was first added to OpenSSL 1.0.2b.
 
 =cut
index 18eeee0..bffa6c0 100644 (file)
@@ -12,6 +12,10 @@ B<openssl> B<verify>
 [B<-purpose purpose>]
 [B<-policy arg>]
 [B<-ignore_critical>]
+[B<-attime timestamp>]
+[B<-check_ss_sig>]
+[B<-crlfile file>]
+[B<-crl_download>]
 [B<-crl_check>]
 [B<-crl_check_all>]
 [B<-policy_check>]
@@ -26,7 +30,7 @@ B<openssl> B<verify>
 [B<-untrusted file>]
 [B<-help>]
 [B<-issuer_checks>]
-[B<-attime timestamp>]
+[B<-trusted file>]
 [B<-verbose>]
 [B<->]
 [certificates]
@@ -52,6 +56,30 @@ create symbolic links to a directory of certificates.
 A file of trusted certificates. The file should contain multiple certificates
 in PEM format concatenated together.
 
+=item B<-attime timestamp>
+
+Perform validation checks using time specified by B<timestamp> and not
+current system time. B<timestamp> is the number of seconds since
+01.01.1970 (UNIX time).
+
+=item B<-check_ss_sig>
+
+Verify the signature on the self-signed root CA. This is disabled by default
+because it doesn't add any security.
+
+=item B<-crlfile file>
+
+File containing one or more CRL's (in PEM format) to load.
+
+=item B<-crl_download>
+
+Attempt to download CRL information for this certificate.
+
+=item B<-crl_check>
+
+Checks end entity certificate validity by attempting to look up a valid CRL.
+If a valid CRL cannot be found an error occurs.
+
 =item B<-untrusted file>
 
 A file of untrusted certificates. The file should contain multiple certificates
@@ -81,12 +109,6 @@ rejected. The presence of rejection messages does not itself imply that
 anything is wrong; during the normal verification process, several
 rejections may take place.
 
-=item B<-attime timestamp>
-
-Perform validation checks using time specified by B<timestamp> and not
-current system time. B<timestamp> is the number of seconds since
-01.01.1970 (UNIX time).
-
 =item B<-policy arg>
 
 Enable policy processing and add B<arg> to the user-initial-policy-set (see
@@ -117,6 +139,11 @@ be found that is trusted. With this option that behaviour is suppressed so that
 only the first chain found is ever used. Using this option will force the
 behaviour to match that of previous OpenSSL versions.
 
+=item B<-trusted file>
+
+A file of additional trusted certificates. The file should contain multiple
+certificates in PEM format concatenated together.
+
 =item B<-policy_print>
 
 Print out diagnostics related to policy processing.
@@ -420,6 +447,6 @@ L<x509(1)|x509(1)>
 
 =head1 HISTORY
 
-The -no_alt_chains options was first added to OpenSSL 1.0.1n and 1.0.2b.
+The -no_alt_chains options was first added to OpenSSL 1.0.2b.
 
 =cut
index 6109389..a1326ed 100644 (file)
@@ -51,6 +51,7 @@ B<openssl> B<x509>
 [B<-CAkey filename>]
 [B<-CAcreateserial>]
 [B<-CAserial filename>]
+[B<-force_pubkey key>]
 [B<-text>]
 [B<-certopt option>]
 [B<-C>]
@@ -418,6 +419,15 @@ specified then the extensions should either be contained in the unnamed
 L<x509v3_config(5)|x509v3_config(5)> manual page for details of the
 extension section format.
 
+=item B<-force_pubkey key>
+
+when a certificate is created set its public key to B<key> instead of the
+key in the certificate or certificate request. This option is useful for
+creating certificates where the algorithm can't normally sign requests, for
+example DH.
+
+The format or B<key> can be specified using the B<-keyform> option.
+
 =back
 
 =head2 NAME OPTIONS
index a08e9a0..f651e4f 100644 (file)
@@ -3,7 +3,7 @@
 =head1 NAME
 
 ASN1_STRING_dup, ASN1_STRING_cmp, ASN1_STRING_set, ASN1_STRING_length,
-ASN1_STRING_length_set, ASN1_STRING_type, ASN1_STRING_data -
+ASN1_STRING_length_set, ASN1_STRING_type, ASN1_STRING_data, ASN1_STRING_to_UTF8 -
 ASN1_STRING utility functions
 
 =head1 SYNOPSIS
index 3891b88..19c82ff 100644 (file)
@@ -2,7 +2,7 @@
 
 =head1 NAME
 
-ASN1_STRING_print_ex, ASN1_STRING_print_ex_fp - ASN1_STRING output routines.
+ASN1_STRING_print_ex, ASN1_STRING_print_ex_fp, ASN1_STRING_print - ASN1_STRING output routines.
 
 =head1 SYNOPSIS
 
diff --git a/doc/crypto/ASN1_TIME_set.pod b/doc/crypto/ASN1_TIME_set.pod
new file mode 100644 (file)
index 0000000..ae2b53d
--- /dev/null
@@ -0,0 +1,129 @@
+=pod
+
+=head1 NAME
+
+ASN1_TIME_set, ASN1_TIME_adj, ASN1_TIME_check, ASN1_TIME_set_string,
+ASN1_TIME_print, ASN1_TIME_diff - ASN.1 Time functions.
+
+=head1 SYNOPSIS
+
+ ASN1_TIME *ASN1_TIME_set(ASN1_TIME *s, time_t t);
+ ASN1_TIME *ASN1_TIME_adj(ASN1_TIME *s, time_t t,
+                          int offset_day, long offset_sec);
+ int ASN1_TIME_set_string(ASN1_TIME *s, const char *str);
+ int ASN1_TIME_check(const ASN1_TIME *t);
+ int ASN1_TIME_print(BIO *b, const ASN1_TIME *s);
+
+ int ASN1_TIME_diff(int *pday, int *psec,
+                    const ASN1_TIME *from, const ASN1_TIME *to);
+
+=head1 DESCRIPTION
+
+The function ASN1_TIME_set() sets the ASN1_TIME structure B<s> to the
+time represented by the time_t value B<t>. If B<s> is NULL a new ASN1_TIME
+structure is allocated and returned.
+
+ASN1_TIME_adj() sets the ASN1_TIME structure B<s> to the time represented
+by the time B<offset_day> and B<offset_sec> after the time_t value B<t>.
+The values of B<offset_day> or B<offset_sec> can be negative to set a
+time before B<t>. The B<offset_sec> value can also exceed the number of
+seconds in a day. If B<s> is NULL a new ASN1_TIME structure is allocated
+and returned.
+
+ASN1_TIME_set_string() sets ASN1_TIME structure B<s> to the time
+represented by string B<str> which must be in appropriate ASN.1 time
+format (for example YYMMDDHHMMSSZ or YYYYMMDDHHMMSSZ).
+
+ASN1_TIME_check() checks the syntax of ASN1_TIME structure B<s>.
+
+ASN1_TIME_print() prints out the time B<s> to BIO B<b> in human readable
+format. It will be of the format MMM DD HH:MM:SS YYYY [GMT], for example
+"Feb  3 00:55:52 2015 GMT" it does not include a newline. If the time
+structure has invalid format it prints out "Bad time value" and returns
+an error.
+
+ASN1_TIME_diff() sets B<*pday> and B<*psec> to the time difference between
+B<from> and B<to>. If B<to> represents a time later than B<from> then
+one or both (depending on the time difference) of B<*pday> and B<*psec>
+will be positive. If B<to> represents a time earlier than B<from> then
+one or both of B<*pday> and B<*psec> will be negative. If B<to> and B<from>
+represent the same time then B<*pday> and B<*psec> will both be zero.
+If both B<*pday> and B<*psec> are non-zero they will always have the same
+sign. The value of B<*psec> will always be less than the number of seconds
+in a day. If B<from> or B<to> is NULL the current time is used.
+
+=head1 NOTES
+
+The ASN1_TIME structure corresponds to the ASN.1 structure B<Time>
+defined in RFC5280 et al. The time setting functions obey the rules outlined
+in RFC5280: if the date can be represented by UTCTime it is used, else
+GeneralizedTime is used.
+
+The ASN1_TIME structure is represented as an ASN1_STRING internally and can
+be freed up using ASN1_STRING_free().
+
+The ASN1_TIME structure can represent years from 0000 to 9999 but no attempt
+is made to correct ancient calendar changes (for example from Julian to
+Gregorian calendars).
+
+Some applications add offset times directly to a time_t value and pass the
+results to ASN1_TIME_set() (or equivalent). This can cause problems as the
+time_t value can overflow on some systems resulting in unexpected results.
+New applications should use ASN1_TIME_adj() instead and pass the offset value
+in the B<offset_sec> and B<offset_day> parameters instead of directly
+manipulating a time_t value.
+
+=head1 BUGS
+
+ASN1_TIME_print() currently does not print out the time zone: it either prints
+out "GMT" or nothing. But all certificates complying with RFC5280 et al use GMT
+anyway.
+
+=head1 EXAMPLES
+
+Set a time structure to one hour after the current time and print it out:
+
+ #include <time.h>
+ #include <openssl/asn1.h>
+ ASN1_TIME *tm;
+ time_t t;
+ BIO *b;
+ t = time(NULL);
+ tm = ASN1_TIME_adj(NULL, t, 0, 60 * 60);
+ b = BIO_new_fp(stdout, BIO_NOCLOSE);
+ ASN1_TIME_print(b, tm);
+ ASN1_STRING_free(tm);
+ BIO_free(b);
+
+Determine if one time is later or sooner than the current time:
+
+ int day, sec;
+
+ if (!ASN1_TIME_diff(&day, &sec, NULL, to))
+       /* Invalid time format */
+
+ if (day > 0 || sec > 0)
+   printf("Later\n");
+ else if (day < 0 || sec < 0)
+   printf("Sooner\n");
+ else
+   printf("Same\n");
+
+=head1 RETURN VALUES
+
+ASN1_TIME_set() and ASN1_TIME_adj() return a pointer to an ASN1_TIME structure
+or NULL if an error occurred.
+
+ASN1_TIME_set_string() returns 1 if the time value is successfully set and
+0 otherwise.
+
+ASN1_TIME_check() returns 1 if the structure is syntactically correct and 0
+otherwise.
+
+ASN1_TIME_print() returns 1 if the time is successfully printed out and 0 if
+an error occurred (I/O error or invalid time format).
+
+ASN1_TIME_diff() returns 1 for sucess and 0 for failure. It can fail if the
+pass ASN1_TIME structure has invalid syntax for example.
+
+=cut
index bc5861a..a9f23f1 100644 (file)
@@ -108,7 +108,7 @@ SSL BIOs are exceptional in that if the underlying transport
 is non blocking they can still request a retry in exceptional
 circumstances. Specifically this will happen if a session
 renegotiation takes place during a BIO_read() operation, one
-case where this happens is when SGC or step up occurs.
+case where this happens is when step up occurs.
 
 In OpenSSL 0.9.6 and later the SSL flag SSL_AUTO_RETRY can be
 set to disable this behaviour. That is when this flag is set
index bd3b256..2595200 100644 (file)
@@ -2,7 +2,7 @@
 
 =head1 NAME
 
-BIO_find_type, BIO_next - BIO chain traversal
+BIO_find_type, BIO_next, BIO_method_type - BIO chain traversal
 
 =head1 SYNOPSIS
 
index b80b6ae..560c112 100644 (file)
@@ -2,7 +2,7 @@
 
 =head1 NAME
 
-BIO_s_accept, BIO_set_accept_port, BIO_get_accept_port,
+BIO_s_accept, BIO_set_accept_port, BIO_get_accept_port, BIO_new_accept,
 BIO_set_nbio_accept, BIO_set_accept_bios, BIO_set_bind_mode,
 BIO_get_bind_mode, BIO_do_accept - accept BIO
 
index bcf7d8d..18ece4c 100644 (file)
@@ -2,7 +2,7 @@
 
 =head1 NAME
 
-BIO_s_connect, BIO_set_conn_hostname, BIO_set_conn_port,
+BIO_s_connect, BIO_new_connect, BIO_set_conn_hostname, BIO_set_conn_port,
 BIO_set_conn_ip, BIO_set_conn_int_port, BIO_get_conn_hostname,
 BIO_get_conn_port, BIO_get_conn_ip, BIO_get_conn_int_port,
 BIO_set_nbio, BIO_do_connect - connect BIO
index da06e44..06d7ea2 100644 (file)
@@ -4,7 +4,7 @@
 
 BN_BLINDING_new, BN_BLINDING_free, BN_BLINDING_update, BN_BLINDING_convert, 
 BN_BLINDING_invert, BN_BLINDING_convert_ex, BN_BLINDING_invert_ex, 
-BN_BLINDING_get_thread_id, BN_BLINDING_set_thread_id, BN_BLINDING_get_flags,
+BN_BLINDING_get_thread_id, BN_BLINDING_set_thread_id, BN_BLINDING_thread_id, BN_BLINDING_get_flags,
 BN_BLINDING_set_flags, BN_BLINDING_create_param - blinding related BIGNUM
 functions.
 
@@ -84,7 +84,7 @@ or NULL in case of an error.
 
 BN_BLINDING_update(), BN_BLINDING_convert(), BN_BLINDING_invert(),
 BN_BLINDING_convert_ex() and BN_BLINDING_invert_ex() return 1 on
-success and 0 if an error occured.
+success and 0 if an error occurred.
 
 BN_BLINDING_thread_id() returns a pointer to the thread id object
 within a B<BN_BLINDING> object.
index ad8d07d..bbedbb1 100644 (file)
@@ -10,9 +10,12 @@ BN_CTX_new, BN_CTX_init, BN_CTX_free - allocate and free BN_CTX structures
 
  BN_CTX *BN_CTX_new(void);
 
+ void BN_CTX_free(BN_CTX *c);
+
+Deprecated:
+
  void BN_CTX_init(BN_CTX *c);
 
- void BN_CTX_free(BN_CTX *c);
 
 =head1 DESCRIPTION
 
@@ -22,8 +25,7 @@ is rather expensive when used in conjunction with repeated subroutine
 calls, the B<BN_CTX> structure is used.
 
 BN_CTX_new() allocates and initializes a B<BN_CTX>
-structure. BN_CTX_init() initializes an existing uninitialized
-B<BN_CTX>.
+structure. 
 
 BN_CTX_free() frees the components of the B<BN_CTX>, and if it was
 created by BN_CTX_new(), also the structure itself.
@@ -31,6 +33,8 @@ If L<BN_CTX_start(3)|BN_CTX_start(3)> has been used on the B<BN_CTX>,
 L<BN_CTX_end(3)|BN_CTX_end(3)> must be called before the B<BN_CTX>
 may be freed by BN_CTX_free().
 
+BN_CTX_init() (deprecated) initializes an existing uninitialized B<BN_CTX>.
+This should not be used for new programs. Use BN_CTX_new() instead.
 
 =head1 RETURN VALUES
 
index 7dccacb..bf1b530 100644 (file)
@@ -2,12 +2,31 @@
 
 =head1 NAME
 
-BN_generate_prime, BN_is_prime, BN_is_prime_fasttest - generate primes and test for primality
+BN_generate_prime_ex, BN_is_prime_ex, BN_is_prime_fasttest_ex, BN_GENCB_call,
+BN_GENCB_set_old, BN_GENCB_set, BN_generate_prime, BN_is_prime,
+BN_is_prime_fasttest - generate primes and test for primality
 
 =head1 SYNOPSIS
 
  #include <openssl/bn.h>
 
+ int BN_generate_prime_ex(BIGNUM *ret,int bits,int safe, const BIGNUM *add,
+     const BIGNUM *rem, BN_GENCB *cb);
+
+ int BN_is_prime_ex(const BIGNUM *p,int nchecks, BN_CTX *ctx, BN_GENCB *cb);
+
+ int BN_is_prime_fasttest_ex(const BIGNUM *p,int nchecks, BN_CTX *ctx,
+     int do_trial_division, BN_GENCB *cb);
+
+ int BN_GENCB_call(BN_GENCB *cb, int a, int b);
+
+ #define BN_GENCB_set_old(gencb, callback, cb_arg) ...
+
+ #define BN_GENCB_set(gencb, callback, cb_arg) ...
+
+
+Deprecated:
+
  BIGNUM *BN_generate_prime(BIGNUM *ret, int num, int safe, BIGNUM *add,
      BIGNUM *rem, void (*callback)(int, int, void *), void *cb_arg);
 
@@ -20,27 +39,27 @@ BN_generate_prime, BN_is_prime, BN_is_prime_fasttest - generate primes and test
 
 =head1 DESCRIPTION
 
-BN_generate_prime() generates a pseudo-random prime number of B<num>
-bits.
+BN_generate_prime_ex() generates a pseudo-random prime number of
+bit length B<bits>.
 If B<ret> is not B<NULL>, it will be used to store the number.
 
-If B<callback> is not B<NULL>, it is called as follows:
+If B<cb> is not B<NULL>, it is used as follows:
 
 =over 4
 
 =item *
 
-B<callback(0, i, cb_arg)> is called after generating the i-th
+B<BN_GENCB_call(cb, 0, i)> is called after generating the i-th
 potential prime number.
 
 =item *
 
-While the number is being tested for primality, B<callback(1, j,
-cb_arg)> is called as described below.
+While the number is being tested for primality,
+B<BN_GENCB_call(cb, 1, j)> is called as described below.
 
 =item *
 
-When a prime has been found, B<callback(2, i, cb_arg)> is called.
+When a prime has been found, B<BN_GENCB_call(cb, 2, i)> is called.
 
 =back
 
@@ -54,38 +73,67 @@ generator.
 If B<safe> is true, it will be a safe prime (i.e. a prime p so
 that (p-1)/2 is also prime).
 
-The PRNG must be seeded prior to calling BN_generate_prime().
+The PRNG must be seeded prior to calling BN_generate_prime_ex().
 The prime number generation has a negligible error probability.
 
-BN_is_prime() and BN_is_prime_fasttest() test if the number B<a> is
+BN_is_prime_ex() and BN_is_prime_fasttest_ex() test if the number B<p> is
 prime.  The following tests are performed until one of them shows that
-B<a> is composite; if B<a> passes all these tests, it is considered
+B<p> is composite; if B<p> passes all these tests, it is considered
 prime.
 
-BN_is_prime_fasttest(), when called with B<do_trial_division == 1>,
+BN_is_prime_fasttest_ex(), when called with B<do_trial_division == 1>,
 first attempts trial division by a number of small primes;
-if no divisors are found by this test and B<callback> is not B<NULL>,
-B<callback(1, -1, cb_arg)> is called.
+if no divisors are found by this test and B<cb> is not B<NULL>,
+B<BN_GENCB_call(cb, 1, -1)> is called.
 If B<do_trial_division == 0>, this test is skipped.
 
-Both BN_is_prime() and BN_is_prime_fasttest() perform a Miller-Rabin
-probabilistic primality test with B<checks> iterations. If
-B<checks == BN_prime_checks>, a number of iterations is used that
+Both BN_is_prime_ex() and BN_is_prime_fasttest_ex() perform a Miller-Rabin
+probabilistic primality test with B<nchecks> iterations. If
+B<nchecks == BN_prime_checks>, a number of iterations is used that
 yields a false positive rate of at most 2^-80 for random input.
 
-If B<callback> is not B<NULL>, B<callback(1, j, cb_arg)> is called
+If B<cb> is not B<NULL>, B<BN_GENCB_call(cb, 1, j)> is called
 after the j-th iteration (j = 0, 1, ...). B<ctx> is a
 pre-allocated B<BN_CTX> (to save the overhead of allocating and
 freeing the structure in a loop), or B<NULL>.
 
+BN_GENCB_call calls the callback function held in the B<BN_GENCB> structure
+and passes the ints B<a> and B<b> as arguments. There are two types of
+B<BN_GENCB> structure that are supported: "new" style and "old" style. New
+programs should prefer the "new" style, whilst the "old" style is provided
+for backwards compatibility purposes.
+
+For "new" style callbacks a BN_GENCB structure should be initialised with a
+call to BN_GENCB_set, where B<gencb> is a B<BN_GENCB *>, B<callback> is of
+type B<int (*callback)(int, int, BN_GENCB *)> and B<cb_arg> is a B<void *>.
+"Old" style callbacks are the same except they are initialised with a call
+to BN_GENCB_set_old and B<callback> is of type
+B<void (*callback)(int, int, void *)>.
+
+A callback is invoked through a call to B<BN_GENCB_call>. This will check
+the type of the callback and will invoke B<callback(a, b, gencb)> for new
+style callbacks or B<callback(a, b, cb_arg)> for old style.
+
+BN_generate_prime (deprecated) works in the same way as
+BN_generate_prime_ex but expects an old style callback function
+directly in the B<callback> parameter, and an argument to pass to it in
+the B<cb_arg>. Similarly BN_is_prime and BN_is_prime_fasttest are
+deprecated and can be compared to BN_is_prime_ex and
+BN_is_prime_fasttest_ex respectively.
+
 =head1 RETURN VALUES
 
-BN_generate_prime() returns the prime number on success, B<NULL> otherwise.
+BN_generate_prime_ex() return 1 on success or 0 on error.
 
-BN_is_prime() returns 0 if the number is composite, 1 if it is
-prime with an error probability of less than 0.25^B<checks>, and
+BN_is_prime_ex(), BN_is_prime_fasttest_ex(), BN_is_prime() and
+BN_is_prime_fasttest() return 0 if the number is composite, 1 if it is
+prime with an error probability of less than 0.25^B<nchecks>, and
 -1 on error.
 
+BN_generate_prime() returns the prime number on success, B<NULL> otherwise.
+
+Callback functions should return 1 on success or 0 on error.
+
 The error codes can be obtained by L<ERR_get_error(3)|ERR_get_error(3)>.
 
 =head1 SEE ALSO
index 3b2796c..bd6bc86 100644 (file)
@@ -2,7 +2,7 @@
 
 =head1 NAME
 
-BN_rand, BN_pseudo_rand - generate pseudo-random number
+BN_rand, BN_pseudo_rand, BN_rand_range, BN_pseudo_rand_range - generate pseudo-random number
 
 =head1 SYNOPSIS
 
index 9c13f48..8678ca1 100644 (file)
@@ -2,7 +2,7 @@
 
 =head1 NAME
 
- CMS_add0_cert, CMS_add1_cert, CMS_get1_certs, CMS_add0_crl, CMS_get1_crls, - CMS certificate and CRL utility functions
+CMS_add0_cert, CMS_add1_cert, CMS_get1_certs, CMS_add0_crl, CMS_add1_crl, CMS_get1_crls, - CMS certificate and CRL utility functions
 
 =head1 SYNOPSIS
 
index e035542..fe49772 100644 (file)
@@ -2,7 +2,7 @@
 
 =head1 NAME
 
- CMS_get0_RecipientInfos, CMS_RecipientInfo_type, CMS_RecipientInfo_ktri_get0_signer_id,CMS_RecipientInfo_ktri_cert_cmp, CMS_RecipientInfo_set0_pkey, CMS_RecipientInfo_kekri_get0_id, CMS_RecipientInfo_kekri_id_cmp, CMS_RecipientInfo_set0_key, CMS_RecipientInfo_decrypt - CMS envelopedData RecipientInfo routines
+CMS_get0_RecipientInfos, CMS_RecipientInfo_type, CMS_RecipientInfo_ktri_get0_signer_id,CMS_RecipientInfo_ktri_cert_cmp, CMS_RecipientInfo_set0_pkey, CMS_RecipientInfo_kekri_get0_id, CMS_RecipientInfo_kekri_id_cmp, CMS_RecipientInfo_set0_key, CMS_RecipientInfo_decrypt, CMS_RecipientInfo_encrypt - CMS envelopedData RecipientInfo routines
 
 =head1 SYNOPSIS
 
@@ -20,6 +20,7 @@
  int CMS_RecipientInfo_set0_key(CMS_RecipientInfo *ri, unsigned char *key, size_t keylen);
 
  int CMS_RecipientInfo_decrypt(CMS_ContentInfo *cms, CMS_RecipientInfo *ri);
+ int CMS_RecipientInfo_encrypt(CMS_ContentInfo *cms, CMS_RecipientInfo *ri);
 
 =head1 DESCRIPTION
 
@@ -66,6 +67,11 @@ CMS_RecipientInfo_decrypt() attempts to decrypt CMS_RecipientInfo structure
 B<ri> in structure B<cms>. A key must have been associated with the structure
 first.
 
+CMS_RecipientInfo_encrypt() attempts to encrypt CMS_RecipientInfo structure
+B<ri> in structure B<cms>. A key must have been associated with the structure
+first and the content encryption key must be available: for example by a
+previous call to CMS_RecipientInfo_decrypt().
+
 =head1 NOTES
 
 The main purpose of these functions is to enable an application to lookup
@@ -81,6 +87,13 @@ any appropriate means it can then associated with the structure and
 CMS_RecpientInfo_decrypt() called. If successful CMS_decrypt() can be called
 with a NULL key to decrypt the enveloped content.
 
+The CMS_RecipientInfo_encrypt() can be used to add a new recipient to an
+existing enveloped data structure. Typically an application will first decrypt
+an appropriate CMS_RecipientInfo structure to make the content encrypt key
+available, it will then add a new recipient using a function such as
+CMS_add1_recipient_cert() and finally encrypt the content encryption key
+using CMS_RecipientInfo_encrypt().
+
 =head1 RETURN VALUES
 
 CMS_get0_RecipientInfos() returns all CMS_RecipientInfo structures, or NULL if
@@ -89,6 +102,7 @@ an error occurs.
 CMS_RecipientInfo_ktri_get0_signer_id(), CMS_RecipientInfo_set0_pkey(),
 CMS_RecipientInfo_kekri_get0_id(), CMS_RecipientInfo_set0_key() and
 CMS_RecipientInfo_decrypt() return 1 for success or 0 if an error occurs.
+CMS_RecipientInfo_encrypt() return 1 for success or 0 if an error occurs.
 
 CMS_RecipientInfo_ktri_cert_cmp() and CMS_RecipientInfo_kekri_cmp() return 0
 for a successful comparison and non zero otherwise.
index 47f6d2a..b46c0e0 100644 (file)
@@ -2,7 +2,7 @@
 
 =head1 NAME
 
- CMS_get0_SignerInfos, CMS_SignerInfo_get0_signer_id, CMS_SignerInfo_cert_cmp, CMS_set1_signer_certs - CMS signedData signer functions.
+CMS_get0_SignerInfos, CMS_SignerInfo_get0_signer_id, CMS_SignerInfo_get0_signature, CMS_SignerInfo_cert_cmp, CMS_set1_signer_cert - CMS signedData signer functions.
 
 =head1 SYNOPSIS
 
@@ -11,6 +11,7 @@
  STACK_OF(CMS_SignerInfo) *CMS_get0_SignerInfos(CMS_ContentInfo *cms);
 
  int CMS_SignerInfo_get0_signer_id(CMS_SignerInfo *si, ASN1_OCTET_STRING **keyid, X509_NAME **issuer, ASN1_INTEGER **sno);
+ ASN1_OCTET_STRING *CMS_SignerInfo_get0_signature(CMS_SignerInfo *si);
  int CMS_SignerInfo_cert_cmp(CMS_SignerInfo *si, X509 *cert);
  void CMS_SignerInfo_set1_signer_cert(CMS_SignerInfo *si, X509 *signer);
 
@@ -24,6 +25,11 @@ associated with a specific CMS_SignerInfo structure B<si>. Either the
 keyidentifier will be set in B<keyid> or B<both> issuer name and serial number
 in B<issuer> and B<sno>.
 
+CMS_SignerInfo_get0_signature() retrieves the signature associated with 
+B<si> in a pointer to an ASN1_OCTET_STRING structure. This pointer returned
+corresponds to the internal signature value if B<si> so it may be read or
+modified.
+
 CMS_SignerInfo_cert_cmp() compares the certificate B<cert> against the signer
 identifier B<si>. It returns zero if the comparison is successful and non zero
 if not.
index 8f26fda..7a2c1ee 100644 (file)
@@ -2,7 +2,7 @@
 
 =head1 NAME
 
- CMS_verify - verify a CMS SignedData structure
+CMS_verify, CMS_get0_signers - verify a CMS SignedData structure
 
 =head1 SYNOPSIS
 
index 9081e9e..7f81a04 100644 (file)
@@ -2,32 +2,39 @@
 
 =head1 NAME
 
-DH_generate_parameters, DH_check - generate and check Diffie-Hellman parameters
+
+DH_generate_parameters_ex, DH_generate_parameters,
+DH_check - generate and check Diffie-Hellman parameters
 
 =head1 SYNOPSIS
 
  #include <openssl/dh.h>
 
- DH *DH_generate_parameters(int prime_len, int generator,
-     void (*callback)(int, int, void *), void *cb_arg);
+ int DH_generate_parameters_ex(DH *dh, int prime_len,int generator, BN_GENCB *cb);
 
  int DH_check(DH *dh, int *codes);
 
+Deprecated:
+
+ DH *DH_generate_parameters(int prime_len, int generator,
+     void (*callback)(int, int, void *), void *cb_arg);
+
 =head1 DESCRIPTION
 
-DH_generate_parameters() generates Diffie-Hellman parameters that can
-be shared among a group of users, and returns them in a newly
-allocated B<DH> structure. The pseudo-random number generator must be
+DH_generate_parameters_ex() generates Diffie-Hellman parameters that can
+be shared among a group of users, and stores them in the provided B<DH>
+structure. The pseudo-random number generator must be
 seeded prior to calling DH_generate_parameters().
 
 B<prime_len> is the length in bits of the safe prime to be generated.
 B<generator> is a small number E<gt> 1, typically 2 or 5. 
 
 A callback function may be used to provide feedback about the progress
-of the key generation. If B<callback> is not B<NULL>, it will be
+of the key generation. If B<cb> is not B<NULL>, it will be
 called as described in L<BN_generate_prime(3)|BN_generate_prime(3)> while a random prime
-number is generated, and when a prime has been found, B<callback(3,
-0, cb_arg)> is called.
+number is generated, and when a prime has been found, B<BN_GENCB_call(cb, 3, 0)>
+is called. See L<BN_generate_prime(3)|BN_generate_prime(3)> for information on
+the BN_GENCB_call() function.
 
 DH_check() validates Diffie-Hellman parameters. It checks that B<p> is
 a safe prime, and that B<g> is a suitable generator. In the case of an
@@ -38,19 +45,21 @@ checked, i.e. it does not equal 2 or 5.
 
 =head1 RETURN VALUES
 
-DH_generate_parameters() returns a pointer to the DH structure, or
-NULL if the parameter generation fails. The error codes can be
-obtained by L<ERR_get_error(3)|ERR_get_error(3)>.
+DH_generate_parameters_ex() and DH_check() return 1 if the check could be
+performed, 0 otherwise.
+
+DH_generate_parameters() (deprecated) returns a pointer to the DH structure, or
+NULL if the parameter generation fails.
 
-DH_check() returns 1 if the check could be performed, 0 otherwise.
+The error codes can be obtained by L<ERR_get_error(3)|ERR_get_error(3)>.
 
 =head1 NOTES
 
-DH_generate_parameters() may run for several hours before finding a
-suitable prime.
+DH_generate_parameters_ex() and DH_generate_parameters() may run for several
+hours before finding a suitable prime.
 
-The parameters generated by DH_generate_parameters() are not to be
-used in signature schemes.
+The parameters generated by DH_generate_parameters_ex() and DH_generate_parameters()
+are not to be used in signature schemes.
 
 =head1 BUGS
 
index be7c924..16a67f2 100644 (file)
@@ -2,20 +2,26 @@
 
 =head1 NAME
 
-DSA_generate_parameters - generate DSA parameters
+DSA_generate_parameters_ex, DSA_generate_parameters - generate DSA parameters
 
 =head1 SYNOPSIS
 
  #include <openssl/dsa.h>
 
+ int DSA_generate_parameters_ex(DSA *dsa, int bits,
+               const unsigned char *seed,int seed_len,
+               int *counter_ret, unsigned long *h_ret, BN_GENCB *cb);
+
+Deprecated:
+
  DSA *DSA_generate_parameters(int bits, unsigned char *seed,
                 int seed_len, int *counter_ret, unsigned long *h_ret,
                void (*callback)(int, int, void *), void *cb_arg);
 
 =head1 DESCRIPTION
 
-DSA_generate_parameters() generates primes p and q and a generator g
-for use in the DSA.
+DSA_generate_parameters_ex() generates primes p and q and a generator g
+for use in the DSA and stores the result in B<dsa>.
 
 B<bits> is the length of the prime to be generated; the DSS allows a
 maximum of 1024 bits.
@@ -25,64 +31,74 @@ generated at random. Otherwise, the seed is used to generate
 them. If the given seed does not yield a prime q, a new random
 seed is chosen and placed at B<seed>.
 
-DSA_generate_parameters() places the iteration count in
+DSA_generate_parameters_ex() places the iteration count in
 *B<counter_ret> and a counter used for finding a generator in
 *B<h_ret>, unless these are B<NULL>.
 
 A callback function may be used to provide feedback about the progress
-of the key generation. If B<callback> is not B<NULL>, it will be
-called as follows:
+of the key generation. If B<cb> is not B<NULL>, it will be
+called as shown below. For information on the BN_GENCB structure and the
+BN_GENCB_call function discussed below, refer to
+L<BN_generate_prime(3)|BN_generate_prime(3)>.
 
 =over 4
 
 =item *
 
-When a candidate for q is generated, B<callback(0, m++, cb_arg)> is called
+When a candidate for q is generated, B<BN_GENCB_call(cb, 0, m++)> is called
 (m is 0 for the first candidate).
 
 =item *
 
 When a candidate for q has passed a test by trial division,
-B<callback(1, -1, cb_arg)> is called.
+B<BN_GENCB_call(cb, 1, -1)> is called.
 While a candidate for q is tested by Miller-Rabin primality tests,
-B<callback(1, i, cb_arg)> is called in the outer loop
+B<BN_GENCB_call(cb, 1, i)> is called in the outer loop
 (once for each witness that confirms that the candidate may be prime);
 i is the loop counter (starting at 0).
 
 =item *
 
-When a prime q has been found, B<callback(2, 0, cb_arg)> and
-B<callback(3, 0, cb_arg)> are called.
+When a prime q has been found, B<BN_GENCB_call(cb, 2, 0)> and
+B<BN_GENCB_call(cb, 3, 0)> are called.
 
 =item *
 
 Before a candidate for p (other than the first) is generated and tested,
-B<callback(0, counter, cb_arg)> is called.
+B<BN_GENCB_call(cb, 0, counter)> is called.
 
 =item *
 
 When a candidate for p has passed the test by trial division,
-B<callback(1, -1, cb_arg)> is called.
+B<BN_GENCB_call(cb, 1, -1)> is called.
 While it is tested by the Miller-Rabin primality test,
-B<callback(1, i, cb_arg)> is called in the outer loop
+B<BN_GENCB_call(cb, 1, i)> is called in the outer loop
 (once for each witness that confirms that the candidate may be prime).
 i is the loop counter (starting at 0).
 
 =item *
 
-When p has been found, B<callback(2, 1, cb_arg)> is called.
+When p has been found, B<BN_GENCB_call(cb, 2, 1)> is called.
 
 =item *
 
-When the generator has been found, B<callback(3, 1, cb_arg)> is called.
+When the generator has been found, B<BN_GENCB_call(cb, 3, 1)> is called.
 
 =back
 
+DSA_generate_parameters() (deprecated) works in much the same way as for DSA_generate_parameters_ex, except that no B<dsa> parameter is passed and
+instead a newly allocated B<DSA> structure is returned. Additionally "old
+style" callbacks are used instead of the newer BN_GENCB based approach.
+Refer to L<BN_generate_prime(3)|BN_generate_prime(3)> for further information.
+
 =head1 RETURN VALUE
 
+DSA_generate_parameters_ex() returns a 1 on success, or 0 otherwise.
+
 DSA_generate_parameters() returns a pointer to the DSA structure, or
-B<NULL> if the parameter generation fails. The error codes can be
-obtained by L<ERR_get_error(3)|ERR_get_error(3)>.
+B<NULL> if the parameter generation fails.
+
+The error codes can be obtained by L<ERR_get_error(3)|ERR_get_error(3)>.
 
 =head1 BUGS
 
@@ -91,7 +107,7 @@ Seed lengths E<gt> 20 are not supported.
 =head1 SEE ALSO
 
 L<dsa(3)|dsa(3)>, L<ERR_get_error(3)|ERR_get_error(3)>, L<rand(3)|rand(3)>,
-L<DSA_free(3)|DSA_free(3)>
+L<DSA_free(3)|DSA_free(3)>, L<BN_generate_prime(3)|BN_generate_prime(3)>
 
 =head1 HISTORY
 
diff --git a/doc/crypto/EC_GFp_simple_method.pod b/doc/crypto/EC_GFp_simple_method.pod
new file mode 100644 (file)
index 0000000..aff20ac
--- /dev/null
@@ -0,0 +1,60 @@
+=pod
+
+=head1 NAME
+
+EC_GFp_simple_method, EC_GFp_mont_method, EC_GFp_nist_method, EC_GFp_nistp224_method, EC_GFp_nistp256_method, EC_GFp_nistp521_method, EC_GF2m_simple_method, EC_METHOD_get_field_type - Functions for obtaining B<EC_METHOD> objects.
+
+=head1 SYNOPSIS
+
+ #include <openssl/ec.h>
+
+ const EC_METHOD *EC_GFp_simple_method(void);
+ const EC_METHOD *EC_GFp_mont_method(void);
+ const EC_METHOD *EC_GFp_nist_method(void);
+ const EC_METHOD *EC_GFp_nistp224_method(void);
+ const EC_METHOD *EC_GFp_nistp256_method(void);
+ const EC_METHOD *EC_GFp_nistp521_method(void);
+
+ const EC_METHOD *EC_GF2m_simple_method(void);
+
+ int EC_METHOD_get_field_type(const EC_METHOD *meth);
+
+=head1 DESCRIPTION
+
+The Elliptic Curve library provides a number of different implementations through a single common interface.
+When constructing a curve using EC_GROUP_new (see L<EC_GROUP_new(3)|EC_GROUP_new(3)>) an
+implementation method must be provided. The functions described here all return a const pointer to an
+B<EC_METHOD> structure that can be passed to EC_GROUP_NEW. It is important that the correct implementation
+type for the form of curve selected is used.
+
+For F2^m curves there is only one implementation choice, i.e. EC_GF2_simple_method.
+
+For Fp curves the lowest common denominator implementation is the EC_GFp_simple_method implementation. All
+other implementations are based on this one. EC_GFp_mont_method builds on EC_GFp_simple_method but adds the
+use of montgomery multiplication (see L<BN_mod_mul_montgomery(3)|BN_mod_mul_montgomery(3)>). EC_GFp_nist_method
+offers an implementation optimised for use with NIST recommended curves (NIST curves are available through
+EC_GROUP_new_by_curve_name as described in L<EC_GROUP_new(3)|EC_GROUP_new(3)>).
+
+The functions EC_GFp_nistp224_method, EC_GFp_nistp256_method and EC_GFp_nistp521_method offer 64 bit
+optimised implementations for the NIST P224, P256 and P521 curves respectively. Note, however, that these
+implementations are not available on all platforms.
+
+EC_METHOD_get_field_type identifies what type of field the EC_METHOD structure supports, which will be either
+F2^m or Fp. If the field type is Fp then the value B<NID_X9_62_prime_field> is returned. If the field type is
+F2^m then the value B<NID_X9_62_characteristic_two_field> is returned. These values are defined in the
+obj_mac.h header file.
+
+=head1 RETURN VALUES
+
+All EC_GFp* functions and EC_GF2m_simple_method always return a const pointer to an EC_METHOD structure.
+
+EC_METHOD_get_field_type returns an integer that identifies the type of field the EC_METHOD structure supports.
+
+=head1 SEE ALSO
+
+L<crypto(3)|crypto(3)>, L<ec(3)|ec(3)>, L<EC_GROUP_new(3)|EC_GROUP_new(3)>, L<EC_GROUP_copy(3)|EC_GROUP_copy(3)>,
+L<EC_POINT_new(3)|EC_POINT_new(3)>, L<EC_POINT_add(3)|EC_POINT_add(3)>, L<EC_KEY_new(3)|EC_KEY_new(3)>,
+L<d2i_ECPKParameters(3)|d2i_ECPKParameters(3)>,
+L<BN_mod_mul_montgomery(3)|BN_mod_mul_montgomery(3)>
+
+=cut
diff --git a/doc/crypto/EC_GROUP_copy.pod b/doc/crypto/EC_GROUP_copy.pod
new file mode 100644 (file)
index 0000000..954af46
--- /dev/null
@@ -0,0 +1,174 @@
+=pod
+
+=head1 NAME
+
+EC_GROUP_copy, EC_GROUP_dup, EC_GROUP_method_of, EC_GROUP_set_generator, EC_GROUP_get0_generator, EC_GROUP_get_order, EC_GROUP_get_cofactor, EC_GROUP_set_curve_name, EC_GROUP_get_curve_name, EC_GROUP_set_asn1_flag, EC_GROUP_get_asn1_flag, EC_GROUP_set_point_conversion_form, EC_GROUP_get_point_conversion_form, EC_GROUP_get0_seed, EC_GROUP_get_seed_len, EC_GROUP_set_seed, EC_GROUP_get_degree, EC_GROUP_check, EC_GROUP_check_discriminant, EC_GROUP_cmp, EC_GROUP_get_basis_type, EC_GROUP_get_trinomial_basis, EC_GROUP_get_pentanomial_basis - Functions for manipulating B<EC_GROUP> objects.
+
+=head1 SYNOPSIS
+
+ #include <openssl/ec.h>
+ #include <openssl/bn.h>
+
+ int EC_GROUP_copy(EC_GROUP *dst, const EC_GROUP *src);
+ EC_GROUP *EC_GROUP_dup(const EC_GROUP *src);
+
+ const EC_METHOD *EC_GROUP_method_of(const EC_GROUP *group);
+
+ int EC_GROUP_set_generator(EC_GROUP *group, const EC_POINT *generator, const BIGNUM *order, const BIGNUM *cofactor);
+ const EC_POINT *EC_GROUP_get0_generator(const EC_GROUP *group);
+
+ int EC_GROUP_get_order(const EC_GROUP *group, BIGNUM *order, BN_CTX *ctx);
+ int EC_GROUP_get_cofactor(const EC_GROUP *group, BIGNUM *cofactor, BN_CTX *ctx);
+
+ void EC_GROUP_set_curve_name(EC_GROUP *group, int nid);
+ int EC_GROUP_get_curve_name(const EC_GROUP *group);
+
+ void EC_GROUP_set_asn1_flag(EC_GROUP *group, int flag);
+ int EC_GROUP_get_asn1_flag(const EC_GROUP *group);
+
+ void EC_GROUP_set_point_conversion_form(EC_GROUP *group, point_conversion_form_t form);
+ point_conversion_form_t EC_GROUP_get_point_conversion_form(const EC_GROUP *);
+
+ unsigned char *EC_GROUP_get0_seed(const EC_GROUP *x);
+ size_t EC_GROUP_get_seed_len(const EC_GROUP *);
+ size_t EC_GROUP_set_seed(EC_GROUP *, const unsigned char *, size_t len);
+
+ int EC_GROUP_get_degree(const EC_GROUP *group);
+
+ int EC_GROUP_check(const EC_GROUP *group, BN_CTX *ctx);
+
+ int EC_GROUP_check_discriminant(const EC_GROUP *group, BN_CTX *ctx);
+
+ int EC_GROUP_cmp(const EC_GROUP *a, const EC_GROUP *b, BN_CTX *ctx);
+
+ int EC_GROUP_get_basis_type(const EC_GROUP *);
+ int EC_GROUP_get_trinomial_basis(const EC_GROUP *, unsigned int *k);
+ int EC_GROUP_get_pentanomial_basis(const EC_GROUP *, unsigned int *k1, 
+       unsigned int *k2, unsigned int *k3);
+
+=head1 DESCRIPTION
+
+EC_GROUP_copy copies the curve B<src> into B<dst>. Both B<src> and B<dst> must use the same EC_METHOD.
+
+EC_GROUP_dup creates a new EC_GROUP object and copies the content from B<src> to the newly created
+EC_GROUP object.
+
+EC_GROUP_method_of obtains the EC_METHOD of B<group>.
+
+EC_GROUP_set_generator sets curve paramaters that must be agreed by all participants using the curve. These
+paramaters include the B<generator>, the B<order> and the B<cofactor>. The B<generator> is a well defined point on the
+curve chosen for cryptographic operations. Integers used for point multiplications will be between 0 and
+n-1 where n is the B<order>. The B<order> multipied by the B<cofactor> gives the number of points on the curve.
+
+EC_GROUP_get0_generator returns the generator for the identified B<group>.
+
+The functions EC_GROUP_get_order and EC_GROUP_get_cofactor populate the provided B<order> and B<cofactor> parameters
+with the respective order and cofactors for the B<group>.
+
+The functions EC_GROUP_set_curve_name and EC_GROUP_get_curve_name, set and get the NID for the curve respectively
+(see L<EC_GROUP_new(3)|EC_GROUP_new(3)>). If a curve does not have a NID associated with it, then EC_GROUP_get_curve_name
+will return 0.
+
+The asn1_flag value on a curve is used to determine whether there is a specific ASN1 OID to describe the curve or not.
+If the asn1_flag is 1 then this is a named curve with an associated ASN1 OID. If not then asn1_flag is 0. The functions
+EC_GROUP_get_asn1_flag and EC_GROUP_set_asn1_flag get and set the status of the asn1_flag for the curve. If set then
+the curve_name must also be set.
+
+The point_coversion_form for a curve controls how EC_POINT data is encoded as ASN1 as defined in X9.62 (ECDSA).
+point_conversion_form_t is an enum defined as follows: 
+
+ typedef enum {
+       /** the point is encoded as z||x, where the octet z specifies 
+        *   which solution of the quadratic equation y is  */
+       POINT_CONVERSION_COMPRESSED = 2,
+       /** the point is encoded as z||x||y, where z is the octet 0x02  */
+       POINT_CONVERSION_UNCOMPRESSED = 4,
+       /** the point is encoded as z||x||y, where the octet z specifies
+         *  which solution of the quadratic equation y is  */
+       POINT_CONVERSION_HYBRID = 6
+ } point_conversion_form_t;
+
+For POINT_CONVERSION_UNCOMPRESSED the point is encoded as an octet signifying the UNCOMPRESSED form has been used followed by
+the octets for x, followed by the octets for y.
+
+For any given x co-ordinate for a point on a curve it is possible to derive two possible y values. For
+POINT_CONVERSION_COMPRESSED the point is encoded as an octet signifying that the COMPRESSED form has been used AND which of
+the two possible solutions for y has been used, followed by the octets for x. 
+
+For POINT_CONVERSION_HYBRID the point is encoded as an octet signifying the HYBRID form has been used AND which of the two
+possible solutions for y has been used, followed by the octets for x, followed by the octets for y.
+
+The functions EC_GROUP_set_point_conversion_form and EC_GROUP_get_point_conversion_form set and get the point_conversion_form
+for the curve respectively.
+
+ANSI X9.62 (ECDSA standard) defines a method of generating the curve parameter b from a random number. This provides advantages
+in that a parameter obtained in this way is highly unlikely to be susceptible to special purpose attacks, or have any trapdoors in it.
+If the seed is present for a curve then the b parameter was generated in a verifiable fashion using that seed. The OpenSSL EC library
+does not use this seed value but does enable you to inspect it using EC_GROUP_get0_seed. This returns a pointer to a memory block
+containing the seed that was used. The length of the memory block can be obtained using EC_GROUP_get_seed_len. A number of the
+builtin curves within the library provide seed values that can be obtained. It is also possible to set a custom seed using
+EC_GROUP_set_seed and passing a pointer to a memory block, along with the length of the seed. Again, the EC library will not use
+this seed value, although it will be preserved in any ASN1 based communications.
+
+EC_GROUP_get_degree gets the degree of the field. For Fp fields this will be the number of bits in p.  For F2^m fields this will be
+the value m.
+
+The function EC_GROUP_check_discriminant calculates the discriminant for the curve and verifies that it is valid.
+For a curve defined over Fp the discriminant is given by the formula 4*a^3 + 27*b^2 whilst for F2^m curves the discriminant is
+simply b. In either case for the curve to be valid the discriminant must be non zero.
+
+The function EC_GROUP_check performs a number of checks on a curve to verify that it is valid. Checks performed include
+verifying that the discriminant is non zero; that a generator has been defined; that the generator is on the curve and has
+the correct order.
+
+EC_GROUP_cmp compares B<a> and B<b> to determine whether they represent the same curve or not.
+
+The functions EC_GROUP_get_basis_type, EC_GROUP_get_trinomial_basis and EC_GROUP_get_pentanomial_basis should only be called for curves
+defined over an F2^m field. Addition and multiplication operations within an F2^m field are performed using an irreducible polynomial
+function f(x). This function is either a trinomial of the form:
+
+f(x) = x^m + x^k + 1 with m > k >= 1
+
+or a pentanomial of the form:
+
+f(x) = x^m + x^k3 + x^k2 + x^k1 + 1 with m > k3 > k2 > k1 >= 1
+
+The function EC_GROUP_get_basis_type returns a NID identifying whether a trinomial or pentanomial is in use for the field. The
+function EC_GROUP_get_trinomial_basis must only be called where f(x) is of the trinomial form, and returns the value of B<k>. Similary
+the function EC_GROUP_get_pentanomial_basis must only be called where f(x) is of the pentanomial form, and returns the values of B<k1>,
+B<k2> and B<k3> respectively.
+
+=head1 RETURN VALUES
+
+The following functions return 1 on success or 0 on error: EC_GROUP_copy, EC_GROUP_set_generator, EC_GROUP_check,
+EC_GROUP_check_discriminant, EC_GROUP_get_trinomial_basis and EC_GROUP_get_pentanomial_basis.
+
+EC_GROUP_dup returns a pointer to the duplicated curve, or NULL on error.
+
+EC_GROUP_method_of returns the EC_METHOD implementation in use for the given curve or NULL on error.
+
+EC_GROUP_get0_generator returns the generator for the given curve or NULL on error.
+
+EC_GROUP_get_order, EC_GROUP_get_cofactor, EC_GROUP_get_curve_name, EC_GROUP_get_asn1_flag, EC_GROUP_get_point_conversion_form
+and EC_GROUP_get_degree return the order, cofactor, curve name (NID), ASN1 flag, point_conversion_form and degree for the
+specified curve respectively. If there is no curve name associated with a curve then EC_GROUP_get_curve_name will return 0.
+
+EC_GROUP_get0_seed returns a pointer to the seed that was used to generate the parameter b, or NULL if the seed is not
+specified. EC_GROUP_get_seed_len returns the length of the seed or 0 if the seed is not specified.
+
+EC_GROUP_set_seed returns the length of the seed that has been set. If the supplied seed is NULL, or the supplied seed length is
+0, the the return value will be 1. On error 0 is returned.
+
+EC_GROUP_cmp returns 0 if the curves are equal, 1 if they are not equal, or -1 on error.
+
+EC_GROUP_get_basis_type returns the values NID_X9_62_tpBasis or NID_X9_62_ppBasis (as defined in <openssl/obj_mac.h>) for a
+trinomial or pentanomial respectively. Alternatively in the event of an error a 0 is returned.
+
+=head1 SEE ALSO
+
+L<crypto(3)|crypto(3)>, L<ec(3)|ec(3)>, L<EC_GROUP_new(3)|EC_GROUP_new(3)>,
+L<EC_POINT_new(3)|EC_POINT_new(3)>, L<EC_POINT_add(3)|EC_POINT_add(3)>, L<EC_KEY_new(3)|EC_KEY_new(3)>,
+L<EC_GFp_simple_method(3)|EC_GFp_simple_method(3)>, L<d2i_ECPKParameters(3)|d2i_ECPKParameters(3)>
+
+=cut
diff --git a/doc/crypto/EC_GROUP_new.pod b/doc/crypto/EC_GROUP_new.pod
new file mode 100644 (file)
index 0000000..ff55bf3
--- /dev/null
@@ -0,0 +1,95 @@
+=pod
+
+=head1 NAME
+
+EC_GROUP_new, EC_GROUP_free, EC_GROUP_clear_free, EC_GROUP_new_curve_GFp, EC_GROUP_new_curve_GF2m, EC_GROUP_new_by_curve_name, EC_GROUP_set_curve_GFp, EC_GROUP_get_curve_GFp, EC_GROUP_set_curve_GF2m, EC_GROUP_get_curve_GF2m, EC_get_builtin_curves - Functions for creating and destroying B<EC_GROUP> objects.
+
+=head1 SYNOPSIS
+
+ #include <openssl/ec.h>
+ #include <openssl/bn.h>
+
+ EC_GROUP *EC_GROUP_new(const EC_METHOD *meth);
+ void EC_GROUP_free(EC_GROUP *group);
+ void EC_GROUP_clear_free(EC_GROUP *group);
+
+ EC_GROUP *EC_GROUP_new_curve_GFp(const BIGNUM *p, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
+ EC_GROUP *EC_GROUP_new_curve_GF2m(const BIGNUM *p, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
+ EC_GROUP *EC_GROUP_new_by_curve_name(int nid);
+
+ int EC_GROUP_set_curve_GFp(EC_GROUP *group, const BIGNUM *p, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
+ int EC_GROUP_get_curve_GFp(const EC_GROUP *group, BIGNUM *p, BIGNUM *a, BIGNUM *b, BN_CTX *ctx);
+ int EC_GROUP_set_curve_GF2m(EC_GROUP *group, const BIGNUM *p, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
+ int EC_GROUP_get_curve_GF2m(const EC_GROUP *group, BIGNUM *p, BIGNUM *a, BIGNUM *b, BN_CTX *ctx);
+
+ size_t EC_get_builtin_curves(EC_builtin_curve *r, size_t nitems);
+
+=head1 DESCRIPTION
+
+Within the library there are two forms of elliptic curve that are of interest. The first form is those defined over the
+prime field Fp. The elements of Fp are the integers 0 to p-1, where p is a prime number. This gives us a revised
+elliptic curve equation as follows:
+
+y^2 mod p = x^3 +ax + b mod p
+
+The second form is those defined over a binary field F2^m where the elements of the field are integers of length at
+most m bits. For this form the elliptic curve equation is modified to:
+
+y^2 + xy = x^3 + ax^2 + b (where b != 0)
+
+Operations in a binary field are performed relative to an B<irreducible polynomial>. All such curves with OpenSSL
+use a trinomial or a pentanomial for this parameter.
+
+A new curve can be constructed by calling EC_GROUP_new, using the implementation provided by B<meth> (see
+L<EC_GFp_simple_method(3)|EC_GFp_simple_method(3)>). It is then necessary to call either EC_GROUP_set_curve_GFp or
+EC_GROUP_set_curve_GF2m as appropriate to create a curve defined over Fp or over F2^m respectively. 
+
+EC_GROUP_set_curve_GFp sets the curve parameters B<p>, B<a> and B<b> for a curve over Fp stored in B<group>.
+EC_group_get_curve_GFp obtains the previously set curve parameters.
+
+EC_GROUP_set_curve_GF2m sets the equivalent curve parameters for a curve over F2^m. In this case B<p> represents
+the irreducible polybnomial - each bit represents a term in the polynomial. Therefore there will either be three
+or five bits set dependant on whether the polynomial is a trinomial or a pentanomial.
+EC_group_get_curve_GF2m obtains the previously set curve parameters.
+
+The functions EC_GROUP_new_curve_GFp and EC_GROUP_new_curve_GF2m are shortcuts for calling EC_GROUP_new and the
+appropriate EC_group_set_curve function. An appropriate default implementation method will be used.
+
+Whilst the library can be used to create any curve using the functions described above, there are also a number of
+predefined curves that are available. In order to obtain a list of all of the predefined curves, call the function
+EC_get_builtin_curves. The parameter B<r> should be an array of EC_builtin_curve structures of size B<nitems>. The function
+will populate the B<r> array with information about the builtin curves. If B<nitems> is less than the total number of
+curves available, then the first B<nitems> curves will be returned. Otherwise the total number of curves will be
+provided. The return value is the total number of curves available (whether that number has been populated in B<r> or
+not). Passing a NULL B<r>, or setting B<nitems> to 0 will do nothing other than return the total number of curves available.
+The EC_builtin_curve structure is defined as follows:
+
+ typedef struct { 
+       int nid;
+       const char *comment;
+       } EC_builtin_curve;
+
+Each EC_builtin_curve item has a unique integer id (B<nid>), and a human readable comment string describing the curve.
+
+In order to construct a builtin curve use the function EC_GROUP_new_by_curve_name and provide the B<nid> of the curve to
+be constructed.
+
+EC_GROUP_free frees the memory associated with the EC_GROUP.
+
+EC_GROUP_clear_free destroys any sensitive data held within the EC_GROUP and then frees its memory.
+
+=head1 RETURN VALUES
+
+All EC_GROUP_new* functions return a pointer to the newly constructed group, or NULL on error.
+
+EC_get_builtin_curves returns the number of builtin curves that are available.
+
+EC_GROUP_set_curve_GFp, EC_GROUP_get_curve_GFp, EC_GROUP_set_curve_GF2m, EC_GROUP_get_curve_GF2m return 1 on success or 0 on error.
+
+=head1 SEE ALSO
+
+L<crypto(3)|crypto(3)>, L<ec(3)|ec(3)>, L<EC_GROUP_copy(3)|EC_GROUP_copy(3)>,
+L<EC_POINT_new(3)|EC_POINT_new(3)>, L<EC_POINT_add(3)|EC_POINT_add(3)>, L<EC_KEY_new(3)|EC_KEY_new(3)>,
+L<EC_GFp_simple_method(3)|EC_GFp_simple_method(3)>, L<d2i_ECPKParameters(3)|d2i_ECPKParameters(3)>
+
+=cut
diff --git a/doc/crypto/EC_KEY_new.pod b/doc/crypto/EC_KEY_new.pod
new file mode 100644 (file)
index 0000000..e859689
--- /dev/null
@@ -0,0 +1,108 @@
+=pod
+
+=head1 NAME
+
+EC_KEY_new, EC_KEY_get_flags, EC_KEY_set_flags, EC_KEY_clear_flags, EC_KEY_new_by_curve_name, EC_KEY_free, EC_KEY_copy, EC_KEY_dup, EC_KEY_up_ref, EC_KEY_get0_group, EC_KEY_set_group, EC_KEY_get0_private_key, EC_KEY_set_private_key, EC_KEY_get0_public_key, EC_KEY_set_public_key, EC_KEY_get_enc_flags, EC_KEY_set_enc_flags, EC_KEY_get_conv_form, EC_KEY_set_conv_form, EC_KEY_get_key_method_data, EC_KEY_insert_key_method_data, EC_KEY_set_asn1_flag, EC_KEY_precompute_mult, EC_KEY_generate_key, EC_KEY_check_key, EC_KEY_set_public_key_affine_coordinates - Functions for creating, destroying and manipulating B<EC_KEY> objects.
+
+=head1 SYNOPSIS
+
+ #include <openssl/ec.h>
+ #include <openssl/bn.h>
+
+ EC_KEY *EC_KEY_new(void);
+ int EC_KEY_get_flags(const EC_KEY *key);
+ void EC_KEY_set_flags(EC_KEY *key, int flags);
+ void EC_KEY_clear_flags(EC_KEY *key, int flags);
+ EC_KEY *EC_KEY_new_by_curve_name(int nid);
+ void EC_KEY_free(EC_KEY *key);
+ EC_KEY *EC_KEY_copy(EC_KEY *dst, const EC_KEY *src);
+ EC_KEY *EC_KEY_dup(const EC_KEY *src);
+ int EC_KEY_up_ref(EC_KEY *key);
+ const EC_GROUP *EC_KEY_get0_group(const EC_KEY *key);
+ int EC_KEY_set_group(EC_KEY *key, const EC_GROUP *group);
+ const BIGNUM *EC_KEY_get0_private_key(const EC_KEY *key);
+ int EC_KEY_set_private_key(EC_KEY *key, const BIGNUM *prv);
+ const EC_POINT *EC_KEY_get0_public_key(const EC_KEY *key);
+ int EC_KEY_set_public_key(EC_KEY *key, const EC_POINT *pub);
+ point_conversion_form_t EC_KEY_get_conv_form(const EC_KEY *key);
+ void EC_KEY_set_conv_form(EC_KEY *eckey, point_conversion_form_t cform);
+ void *EC_KEY_get_key_method_data(EC_KEY *key, 
+       void *(*dup_func)(void *), void (*free_func)(void *), void (*clear_free_func)(void *));
+ void EC_KEY_insert_key_method_data(EC_KEY *key, void *data,
+       void *(*dup_func)(void *), void (*free_func)(void *), void (*clear_free_func)(void *));
+ void EC_KEY_set_asn1_flag(EC_KEY *eckey, int asn1_flag);
+ int EC_KEY_precompute_mult(EC_KEY *key, BN_CTX *ctx);
+ int EC_KEY_generate_key(EC_KEY *key);
+ int EC_KEY_check_key(const EC_KEY *key);
+ int EC_KEY_set_public_key_affine_coordinates(EC_KEY *key, BIGNUM *x, BIGNUM *y);
+
+=head1 DESCRIPTION
+
+An EC_KEY represents a public key and (optionaly) an associated private key. A new EC_KEY (with no associated curve) can be constructed by calling EC_KEY_new.
+The reference count for the newly created EC_KEY is initially set to 1. A curve can be associated with the EC_KEY by calling
+EC_KEY_set_group.
+
+Alternatively a new EC_KEY can be constructed by calling EC_KEY_new_by_curve_name and supplying the nid of the associated curve. Refer to L<EC_GROUP_new(3)|EC_GROUP_new(3)> for a description of curve names. This function simply wraps calls to EC_KEY_new and 
+EC_GROUP_new_by_curve_name.
+
+Calling EC_KEY_free decrements the reference count for the EC_KEY object, and if it has dropped to zero then frees the memory associated
+with it.
+
+EC_KEY_copy copies the contents of the EC_KEY in B<src> into B<dest>.
+
+EC_KEY_dup creates a new EC_KEY object and copies B<ec_key> into it.
+
+EC_KEY_up_ref increments the reference count associated with the EC_KEY object.
+
+EC_KEY_generate_key generates a new public and private key for the supplied B<eckey> object. B<eckey> must have an EC_GROUP object
+associated with it before calling this function. The private key is a random integer (0 < priv_key < order, where order is the order
+of the EC_GROUP object). The public key is an EC_POINT on the curve calculated by multiplying the generator for the curve by the
+private key.
+
+EC_KEY_check_key performs various sanity checks on the EC_KEY object to confirm that it is valid.
+
+EC_KEY_set_public_key_affine_coordinates sets the public key for B<key> based on its affine co-ordinates, i.e. it constructs an EC_POINT
+object based on the supplied B<x> and B<y> values and sets the public key to be this EC_POINT. It will also performs certain sanity checks
+on the key to confirm that it is valid.
+
+The functions EC_KEY_get0_group, EC_KEY_set_group, EC_KEY_get0_private_key, EC_KEY_set_private_key, EC_KEY_get0_public_key, and EC_KEY_set_public_key get and set the EC_GROUP object, the private key and the EC_POINT public key for the B<key> respectively.
+
+The functions EC_KEY_get_conv_form and EC_KEY_set_conv_form get and set the point_conversion_form for the B<key>. For a description
+of point_conversion_forms please refer to L<EC_POINT_new(3)|EC_POINT_new(3)>.
+
+EC_KEY_insert_key_method_data and EC_KEY_get_key_method_data enable the caller to associate arbitary additional data specific to the
+elliptic curve scheme being used with the EC_KEY object. This data is treated as a "black box" by the ec library. The data to be stored by EC_KEY_insert_key_method_data is provided in the B<data> parameter, which must have have associated functions for duplicating, freeing and "clear_freeing" the data item. If a subsequent EC_KEY_get_key_method_data call is issued, the functions for duplicating, freeing and "clear_freeing" the data item must be provided again, and they must be the same as they were when the data item was inserted.
+
+EC_KEY_set_flags sets the flags in the B<flags> parameter on the EC_KEY object. Any flags that are already set are left set. The currently defined standard flags are EC_FLAG_NON_FIPS_ALLOW and EC_FLAG_FIPS_CHECKED. In addition there is the flag EC_FLAG_COFACTOR_ECDH which is specific to ECDH and is defined in ecdh.h. EC_KEY_get_flags returns the current flags that are set for this EC_KEY. EC_KEY_clear_flags clears the flags indicated by the B<flags> parameter. All other flags are left in their existing state.
+
+EC_KEY_set_asn1_flag sets the asn1_flag on the underlying EC_GROUP object (if set). Refer to L<EC_GROUP_copy(3)|EC_GROUP_copy(3)> for further information on the asn1_flag.
+
+EC_KEY_precompute_mult stores multiples of the underlying EC_GROUP generator for faster point multiplication. See also L<EC_POINT_add(3)|EC_POINT_add(3)>.
+
+
+=head1 RETURN VALUES
+
+EC_KEY_new, EC_KEY_new_by_curve_name and EC_KEY_dup return a pointer to the newly created EC_KEY object, or NULL on error.
+
+EC_KEY_get_flags returns the flags associated with the EC_KEY object as an integer.
+
+EC_KEY_copy returns a pointer to the destination key, or NULL on error.
+
+EC_KEY_up_ref, EC_KEY_set_group, EC_KEY_set_private_key, EC_KEY_set_public_key, EC_KEY_precompute_mult, EC_KEY_generate_key, EC_KEY_check_key and EC_KEY_set_public_key_affine_coordinates return 1 on success or 0 on error.
+
+EC_KEY_get0_group returns the EC_GROUP associated with the EC_KEY.
+
+EC_KEY_get0_private_key returns the private key associated with the EC_KEY.
+
+EC_KEY_get_conv_form return the point_conversion_form for the EC_KEY.
+
+
+=head1 SEE ALSO
+
+L<crypto(3)|crypto(3)>, L<ec(3)|ec(3)>, L<EC_GROUP_new(3)|EC_GROUP_new(3)>,
+L<EC_GROUP_copy(3)|EC_GROUP_copy(3)>, L<EC_POINT_new(3)|EC_POINT_new(3)>,
+L<EC_POINT_add(3)|EC_POINT_add(3)>,
+L<EC_GFp_simple_method(3)|EC_GFp_simple_method(3)>,
+L<d2i_ECPKParameters(3)|d2i_ECPKParameters(3)>
+
+=cut
diff --git a/doc/crypto/EC_POINT_add.pod b/doc/crypto/EC_POINT_add.pod
new file mode 100644 (file)
index 0000000..ae92640
--- /dev/null
@@ -0,0 +1,72 @@
+=pod
+
+=head1 NAME
+
+EC_POINT_add, EC_POINT_dbl, EC_POINT_invert, EC_POINT_is_at_infinity, EC_POINT_is_on_curve, EC_POINT_cmp, EC_POINT_make_affine, EC_POINTs_make_affine, EC_POINTs_mul, EC_POINT_mul, EC_GROUP_precompute_mult, EC_GROUP_have_precompute_mult - Functions for performing mathematical operations and tests on B<EC_POINT> objects.
+
+=head1 SYNOPSIS
+
+ #include <openssl/ec.h>
+ #include <openssl/bn.h>
+
+ int EC_POINT_add(const EC_GROUP *group, EC_POINT *r, const EC_POINT *a, const EC_POINT *b, BN_CTX *ctx);
+ int EC_POINT_dbl(const EC_GROUP *group, EC_POINT *r, const EC_POINT *a, BN_CTX *ctx);
+ int EC_POINT_invert(const EC_GROUP *group, EC_POINT *a, BN_CTX *ctx);
+ int EC_POINT_is_at_infinity(const EC_GROUP *group, const EC_POINT *p);
+ int EC_POINT_is_on_curve(const EC_GROUP *group, const EC_POINT *point, BN_CTX *ctx);
+ int EC_POINT_cmp(const EC_GROUP *group, const EC_POINT *a, const EC_POINT *b, BN_CTX *ctx);
+ int EC_POINT_make_affine(const EC_GROUP *group, EC_POINT *point, BN_CTX *ctx);
+ int EC_POINTs_make_affine(const EC_GROUP *group, size_t num, EC_POINT *points[], BN_CTX *ctx);
+ int EC_POINTs_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *n, size_t num, const EC_POINT *p[], const BIGNUM *m[], BN_CTX *ctx);
+ int EC_POINT_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *n, const EC_POINT *q, const BIGNUM *m, BN_CTX *ctx);
+ int EC_GROUP_precompute_mult(EC_GROUP *group, BN_CTX *ctx);
+ int EC_GROUP_have_precompute_mult(const EC_GROUP *group);
+
+
+=head1 DESCRIPTION
+
+EC_POINT_add adds the two points B<a> and B<b> and places the result in B<r>. Similarly EC_POINT_dbl doubles the point B<a> and places the
+result in B<r>. In both cases it is valid for B<r> to be one of B<a> or B<b>.
+
+EC_POINT_invert calculates the inverse of the supplied point B<a>. The result is placed back in B<a>.
+
+The function EC_POINT_is_at_infinity tests whether the supplied point is at infinity or not.
+
+EC_POINT_is_on_curve tests whether the supplied point is on the curve or not.
+
+EC_POINT_cmp compares the two supplied points and tests whether or not they are equal.
+
+The functions EC_POINT_make_affine and EC_POINTs_make_affine force the internal representation of the EC_POINT(s) into the affine
+co-ordinate system. In the case of EC_POINTs_make_affine the value B<num> provides the number of points in the array B<points> to be
+forced.
+
+EC_POINT_mul calculates the value generator * B<n> + B<q> * B<m> and stores the result in B<r>. The value B<n> may be NULL in which case the result is just B<q> * B<m>.
+
+EC_POINTs_mul calculates the value generator * B<n> + B<q[0]> * B<m[0]> + ... + B<q[num-1]> * B<m[num-1]>. As for EC_POINT_mul the value
+B<n> may be NULL.
+
+The function EC_GROUP_precompute_mult stores multiples of the generator for faster point multiplication, whilst
+EC_GROUP_have_precompute_mult tests whether precomputation has already been done. See L<EC_GROUP_copy(3)|EC_GROUP_copy(3)> for information
+about the generator.
+
+
+=head1 RETURN VALUES
+
+The following functions return 1 on success or 0 on error: EC_POINT_add, EC_POINT_dbl, EC_POINT_invert, EC_POINT_make_affine,
+EC_POINTs_make_affine, EC_POINTs_make_affine, EC_POINT_mul, EC_POINTs_mul and EC_GROUP_precompute_mult.
+
+EC_POINT_is_at_infinity returns 1 if the point is at infinity, or 0 otherwise.
+
+EC_POINT_is_on_curve returns 1 if the point is on the curve, 0 if not, or -1 on error.
+
+EC_POINT_cmp returns 1 if the points are not equal, 0 if they are, or -1 on error.
+
+EC_GROUP_have_precompute_mult return 1 if a precomputation has been done, or 0 if not.
+
+=head1 SEE ALSO
+
+L<crypto(3)|crypto(3)>, L<ec(3)|ec(3)>, L<EC_GROUP_new(3)|EC_GROUP_new(3)>, L<EC_GROUP_copy(3)|EC_GROUP_copy(3)>,
+L<EC_POINT_new(3)|EC_POINT_new(3)>, L<EC_KEY_new(3)|EC_KEY_new(3)>,
+L<EC_GFp_simple_method(3)|EC_GFp_simple_method(3)>, L<d2i_ECPKParameters(3)|d2i_ECPKParameters(3)>
+
+=cut
diff --git a/doc/crypto/EC_POINT_new.pod b/doc/crypto/EC_POINT_new.pod
new file mode 100644 (file)
index 0000000..858baf4
--- /dev/null
@@ -0,0 +1,128 @@
+=pod
+
+=head1 NAME
+
+EC_POINT_new, EC_POINT_free, EC_POINT_clear_free, EC_POINT_copy, EC_POINT_dup, EC_POINT_method_of, EC_POINT_set_to_infinity, EC_POINT_set_Jprojective_coordinates, EC_POINT_get_Jprojective_coordinates_GFp, EC_POINT_set_affine_coordinates_GFp, EC_POINT_get_affine_coordinates_GFp, EC_POINT_set_compressed_coordinates_GFp, EC_POINT_set_affine_coordinates_GF2m, EC_POINT_get_affine_coordinates_GF2m, EC_POINT_set_compressed_coordinates_GF2m, EC_POINT_point2oct, EC_POINT_oct2point, EC_POINT_point2bn, EC_POINT_bn2point, EC_POINT_point2hex, EC_POINT_hex2point - Functions for creating, destroying and manipulating B<EC_POINT> objects.
+
+=head1 SYNOPSIS
+
+ #include <openssl/ec.h>
+ #include <openssl/bn.h>
+
+ EC_POINT *EC_POINT_new(const EC_GROUP *group);
+ void EC_POINT_free(EC_POINT *point);
+ void EC_POINT_clear_free(EC_POINT *point);
+ int EC_POINT_copy(EC_POINT *dst, const EC_POINT *src);
+ EC_POINT *EC_POINT_dup(const EC_POINT *src, const EC_GROUP *group);
+ const EC_METHOD *EC_POINT_method_of(const EC_POINT *point);
+ int EC_POINT_set_to_infinity(const EC_GROUP *group, EC_POINT *point);
+ int EC_POINT_set_Jprojective_coordinates_GFp(const EC_GROUP *group, EC_POINT *p,
+       const BIGNUM *x, const BIGNUM *y, const BIGNUM *z, BN_CTX *ctx);
+ int EC_POINT_get_Jprojective_coordinates_GFp(const EC_GROUP *group,
+       const EC_POINT *p, BIGNUM *x, BIGNUM *y, BIGNUM *z, BN_CTX *ctx);
+ int EC_POINT_set_affine_coordinates_GFp(const EC_GROUP *group, EC_POINT *p,
+       const BIGNUM *x, const BIGNUM *y, BN_CTX *ctx);
+ int EC_POINT_get_affine_coordinates_GFp(const EC_GROUP *group,
+       const EC_POINT *p, BIGNUM *x, BIGNUM *y, BN_CTX *ctx);
+ int EC_POINT_set_compressed_coordinates_GFp(const EC_GROUP *group, EC_POINT *p,
+       const BIGNUM *x, int y_bit, BN_CTX *ctx);
+ int EC_POINT_set_affine_coordinates_GF2m(const EC_GROUP *group, EC_POINT *p,
+       const BIGNUM *x, const BIGNUM *y, BN_CTX *ctx);
+ int EC_POINT_get_affine_coordinates_GF2m(const EC_GROUP *group,
+       const EC_POINT *p, BIGNUM *x, BIGNUM *y, BN_CTX *ctx);
+ int EC_POINT_set_compressed_coordinates_GF2m(const EC_GROUP *group, EC_POINT *p,
+       const BIGNUM *x, int y_bit, BN_CTX *ctx);
+ size_t EC_POINT_point2oct(const EC_GROUP *group, const EC_POINT *p,
+       point_conversion_form_t form,
+        unsigned char *buf, size_t len, BN_CTX *ctx);
+ int EC_POINT_oct2point(const EC_GROUP *group, EC_POINT *p,
+        const unsigned char *buf, size_t len, BN_CTX *ctx);
+ BIGNUM *EC_POINT_point2bn(const EC_GROUP *, const EC_POINT *,
+       point_conversion_form_t form, BIGNUM *, BN_CTX *);
+ EC_POINT *EC_POINT_bn2point(const EC_GROUP *, const BIGNUM *,
+       EC_POINT *, BN_CTX *);
+ char *EC_POINT_point2hex(const EC_GROUP *, const EC_POINT *,
+       point_conversion_form_t form, BN_CTX *);
+ EC_POINT *EC_POINT_hex2point(const EC_GROUP *, const char *,
+       EC_POINT *, BN_CTX *);
+
+
+=head1 DESCRIPTION
+
+An EC_POINT represents a point on a curve. A new point is constructed by calling the function EC_POINT_new and providing the B<group>
+object that the point relates to.
+
+EC_POINT_free frees the memory associated with the EC_POINT.
+
+EC_POINT_clear_free destroys any sensitive data held within the EC_POINT and then frees its memory.
+
+EC_POINT_copy copies the point B<src> into B<dst>. Both B<src> and B<dst> must use the same EC_METHOD.
+
+EC_POINT_dup creates a new EC_POINT object and copies the content from B<src> to the newly created
+EC_POINT object.
+
+EC_POINT_method_of obtains the EC_METHOD associated with B<point>.
+
+A valid point on a curve is the special point at  infinity. A point is set to be at infinity by calling EC_POINT_set_to_infinity.
+
+The affine co-ordinates for a point describe a point in terms of its x and y position. The functions
+EC_POINT_set_affine_coordinates_GFp and EC_POINT_set_affine_coordinates_GF2m set the B<x> and B<y> co-ordinates for the point
+B<p> defined over the curve given in B<group>.
+
+As well as the affine co-ordinates, a point can alternatively be described in terms of its Jacobian
+projective co-ordinates (for Fp curves only). Jacobian projective co-ordinates are expressed as three values x, y and z. Working in
+this co-ordinate system provides more efficient point multiplication operations.
+A mapping exists between Jacobian projective co-ordinates and affine co-ordinates. A Jacobian projective co-ordinate (x, y, z) can be written as an affine co-ordinate as (x/(z^2), y/(z^3)). Conversion to Jacobian projective to affine co-ordinates is simple. The co-ordinate (x, y) is
+mapped to (x, y, 1). To set or get the projective co-ordinates use EC_POINT_set_Jprojective_coordinates_GFp and
+EC_POINT_get_Jprojective_coordinates_GFp respectively.
+
+Points can also be described in terms of their compressed co-ordinates. For a point (x, y), for any given value for x such that the point is
+on the curve there will only ever be two possible values for y. Therefore a point can be set using the EC_POINT_set_compressed_coordinates_GFp
+and EC_POINT_set_compressed_coordinates_GF2m functions where B<x> is the x co-ordinate and B<y_bit> is a value 0 or 1 to identify which of
+the two possible values for y should be used.
+
+In addition EC_POINTs can be converted to and from various external
+representations. Supported representations are octet strings, BIGNUMs and
+hexadecimal. Octet strings are stored in a buffer along with an associated
+buffer length. A point held in a BIGNUM is calculated by converting the point to
+an octet string and then converting that octet string into a BIGNUM integer.
+Points in hexadecimal format are stored in a NULL terminated character string
+where each character is one of the printable values 0-9 or A-F (or a-f).
+
+The functions EC_POINT_point2oct, EC_POINT_oct2point, EC_POINT_point2bn, EC_POINT_bn2point, EC_POINT_point2hex and EC_POINT_hex2point convert
+from and to EC_POINTs for the formats: octet string, BIGNUM and hexadecimal respectively.
+
+The function EC_POINT_point2oct must be supplied with a buffer long enough to store the octet string. The return value provides the number of
+octets stored. Calling the function with a NULL buffer will not perform the conversion but will still return the required buffer length.
+
+The function EC_POINT_point2hex will allocate sufficient memory to store the hexadecimal string. It is the caller's responsibility to free
+this memory with a subsequent call to OPENSSL_free().
+
+=head1 RETURN VALUES
+
+EC_POINT_new and EC_POINT_dup return the newly allocated EC_POINT or NULL on error.
+
+The following functions return 1 on success or 0 on error: EC_POINT_copy, EC_POINT_set_to_infinity, EC_POINT_set_Jprojective_coordinates_GFp,
+EC_POINT_get_Jprojective_coordinates_GFp, EC_POINT_set_affine_coordinates_GFp, EC_POINT_get_affine_coordinates_GFp,
+EC_POINT_set_compressed_coordinates_GFp, EC_POINT_set_affine_coordinates_GF2m, EC_POINT_get_affine_coordinates_GF2m,
+EC_POINT_set_compressed_coordinates_GF2m and EC_POINT_oct2point.
+
+EC_POINT_method_of returns the EC_METHOD associated with the supplied EC_POINT.
+
+EC_POINT_point2oct returns the length of the required buffer, or 0 on error.
+
+EC_POINT_point2bn returns the pointer to the BIGNUM supplied, or NULL on error.
+
+EC_POINT_bn2point returns the pointer to the EC_POINT supplied, or NULL on error.
+
+EC_POINT_point2hex returns a pointer to the hex string, or NULL on error.
+
+EC_POINT_hex2point returns the pointer to the EC_POINT supplied, or NULL on error.
+
+=head1 SEE ALSO
+
+L<crypto(3)|crypto(3)>, L<ec(3)|ec(3)>, L<EC_GROUP_new(3)|EC_GROUP_new(3)>, L<EC_GROUP_copy(3)|EC_GROUP_copy(3)>,
+L<EC_POINT_add(3)|EC_POINT_add(3)>, L<EC_KEY_new(3)|EC_KEY_new(3)>,
+L<EC_GFp_simple_method(3)|EC_GFp_simple_method(3)>, L<d2i_ECPKParameters(3)|d2i_ECPKParameters(3)>
+
+=cut
index 72925fb..a4d38c1 100644 (file)
@@ -2,26 +2,35 @@
 
 =head1 NAME
 
-ERR_remove_state - free a thread's error queue
+ERR_remove_thread_state, ERR_remove_state - free a thread's error queue
 
 =head1 SYNOPSIS
 
  #include <openssl/err.h>
 
+ void ERR_remove_thread_state(const CRYPTO_THREADID *tid);
+
+Deprecated:
+
  void ERR_remove_state(unsigned long pid);
 
 =head1 DESCRIPTION
 
-ERR_remove_state() frees the error queue associated with thread B<pid>.
-If B<pid> == 0, the current thread will have its error queue removed.
+ERR_remove_thread_state() frees the error queue associated with thread B<tid>.
+If B<tid> == B<NULL>, the current thread will have its error queue removed.
 
 Since error queue data structures are allocated automatically for new
 threads, they must be freed when threads are terminated in order to
 avoid memory leaks.
 
+ERR_remove_state is deprecated and has been replaced by
+ERR_remove_thread_state. Since threads in OpenSSL are no longer identified
+by unsigned long values any argument to this function is ignored. Calling
+ERR_remove_state is equivalent to B<ERR_remove_thread_state(NULL)>.
+
 =head1 RETURN VALUE
 
-ERR_remove_state() returns no value.
+ERR_remove_thread_state and ERR_remove_state() return no value.
 
 =head1 SEE ALSO
 
@@ -29,6 +38,8 @@ L<err(3)|err(3)>
 
 =head1 HISTORY
 
-ERR_remove_state() is available in all versions of SSLeay and OpenSSL.
+ERR_remove_state() is available in all versions of SSLeay and OpenSSL. It
+was deprecated in OpenSSL 1.0.0 when ERR_remove_thread_state was introduced
+and thread IDs were introduced to identify threads instead of 'unsigned long'. 
 
 =cut
index 0ea7d55..a9b6bb0 100644 (file)
@@ -36,8 +36,8 @@ If the total key and IV length is less than the digest length and
 B<MD5> is used then the derivation algorithm is compatible with PKCS#5 v1.5
 otherwise a non standard extension is used to derive the extra data.
 
-Newer applications should use more standard algorithms such as PKCS#5
-v2.0 for key derivation.
+Newer applications should use a more modern algorithm such as PBKDF2 as
+defined in PKCS#5v2.1 and provided by PKCS5_PBKDF2_HMAC.
 
 =head1 KEY DERIVATION ALGORITHM
 
@@ -55,7 +55,10 @@ the IV.
 
 =head1 RETURN VALUES
 
-EVP_BytesToKey() returns the size of the derived key in bytes.
+If B<data> is NULL, then EVP_BytesToKey() returns the number of bytes
+needed to store the derived key.
+Otherwise, EVP_BytesToKey() returns the size of the derived key in bytes,
+or 0 on error.
 
 =head1 SEE ALSO
 
index ac526bb..0895e8c 100644 (file)
@@ -4,10 +4,10 @@
 
 EVP_MD_CTX_init, EVP_MD_CTX_create, EVP_DigestInit_ex, EVP_DigestUpdate,
 EVP_DigestFinal_ex, EVP_MD_CTX_cleanup, EVP_MD_CTX_destroy, EVP_MAX_MD_SIZE,
-EVP_MD_CTX_copy_ex, EVP_MD_CTX_copy, EVP_MD_type, EVP_MD_pkey_type, EVP_MD_size,
-EVP_MD_block_size, EVP_MD_CTX_md, EVP_MD_CTX_size, EVP_MD_CTX_block_size, EVP_MD_CTX_type,
-EVP_md_null, EVP_md2, EVP_md5, EVP_sha, EVP_sha1, EVP_sha224, EVP_sha256,
-EVP_sha384, EVP_sha512, EVP_dss, EVP_dss1, EVP_mdc2,
+EVP_MD_CTX_copy_ex, EVP_DigestInit, EVP_DigestFinal, EVP_MD_CTX_copy, EVP_MD_type,
+EVP_MD_pkey_type, EVP_MD_size, EVP_MD_block_size, EVP_MD_CTX_md, EVP_MD_CTX_size,
+EVP_MD_CTX_block_size, EVP_MD_CTX_type, EVP_md_null, EVP_md2, EVP_md5, EVP_sha, EVP_sha1,
+EVP_sha224, EVP_sha256, EVP_sha384, EVP_sha512, EVP_dss, EVP_dss1, EVP_mdc2,
 EVP_ripemd160, EVP_get_digestbyname, EVP_get_digestbynid, EVP_get_digestbyobj -
 EVP digest routines
 
@@ -270,7 +270,7 @@ and EVP_DigestFinal_ex() were added in OpenSSL 0.9.7.
 
 EVP_md_null(), EVP_md2(), EVP_md5(), EVP_sha(), EVP_sha1(),
 EVP_dss(), EVP_dss1(), EVP_mdc2() and EVP_ripemd160() were
-changed to return truely const EVP_MD * in OpenSSL 0.9.7.
+changed to return truly const EVP_MD * in OpenSSL 0.9.7.
 
 The link between digests and signing algorithms was fixed in OpenSSL 1.0 and
 later, so now EVP_sha1() can be used with RSA and DSA; there is no need to
index cfeccd9..e0217e4 100644 (file)
@@ -11,7 +11,7 @@ EVP_DigestVerifyInit, EVP_DigestVerifyUpdate, EVP_DigestVerifyFinal - EVP signat
  int EVP_DigestVerifyInit(EVP_MD_CTX *ctx, EVP_PKEY_CTX **pctx,
                        const EVP_MD *type, ENGINE *e, EVP_PKEY *pkey);
  int EVP_DigestVerifyUpdate(EVP_MD_CTX *ctx, const void *d, unsigned int cnt);
- int EVP_DigestVerifyFinal(EVP_MD_CTX *ctx, unsigned char *sig, size_t siglen);
+ int EVP_DigestVerifyFinal(EVP_MD_CTX *ctx, const unsigned char *sig, size_t siglen);
 
 =head1 DESCRIPTION
 
index ed027b3..fb6036f 100644 (file)
@@ -16,7 +16,17 @@ EVP_CIPHER_CTX_nid, EVP_CIPHER_CTX_block_size, EVP_CIPHER_CTX_key_length,
 EVP_CIPHER_CTX_iv_length, EVP_CIPHER_CTX_get_app_data,
 EVP_CIPHER_CTX_set_app_data, EVP_CIPHER_CTX_type, EVP_CIPHER_CTX_flags,
 EVP_CIPHER_CTX_mode, EVP_CIPHER_param_to_asn1, EVP_CIPHER_asn1_to_param,
-EVP_CIPHER_CTX_set_padding - EVP cipher routines
+EVP_CIPHER_CTX_set_padding,  EVP_enc_null, EVP_des_cbc, EVP_des_ecb,
+EVP_des_cfb, EVP_des_ofb, EVP_des_ede_cbc, EVP_des_ede, EVP_des_ede_ofb,
+EVP_des_ede_cfb, EVP_des_ede3_cbc, EVP_des_ede3, EVP_des_ede3_ofb,
+EVP_des_ede3_cfb, EVP_desx_cbc, EVP_rc4, EVP_rc4_40, EVP_idea_cbc,
+EVP_idea_ecb, EVP_idea_cfb, EVP_idea_ofb, EVP_idea_cbc, EVP_rc2_cbc,
+EVP_rc2_ecb, EVP_rc2_cfb, EVP_rc2_ofb, EVP_rc2_40_cbc, EVP_rc2_64_cbc,
+EVP_bf_cbc, EVP_bf_ecb, EVP_bf_cfb, EVP_bf_ofb, EVP_cast5_cbc,
+EVP_cast5_ecb, EVP_cast5_cfb, EVP_cast5_ofb, EVP_rc5_32_12_16_cbc,
+EVP_rc5_32_12_16_ecb, EVP_rc5_32_12_16_cfb, EVP_rc5_32_12_16_ofb, 
+EVP_aes_128_gcm, EVP_aes_192_gcm, EVP_aes_256_gcm, EVP_aes_128_ccm,
+EVP_aes_192_ccm, EVP_aes_256_ccm - EVP cipher routines
 
 =head1 SYNOPSIS
 
@@ -231,8 +241,7 @@ or the parameters cannot be set (for example the RC2 effective key length
 is not supported.
 
 EVP_CIPHER_CTX_ctrl() allows various cipher specific parameters to be determined
-and set. Currently only the RC2 effective key length and the number of rounds of
-RC5 can be set.
+and set.
 
 =head1 RETURN VALUES
 
@@ -338,8 +347,88 @@ RC5 encryption algorithm in CBC, ECB, CFB and OFB modes respectively. This is a
 cipher with an additional "number of rounds" parameter. By default the key length is set to 128
 bits and 12 rounds.
 
+=item EVP_aes_128_gcm(void), EVP_aes_192_gcm(void), EVP_aes_256_gcm(void)
+
+AES Galois Counter Mode (GCM) for 128, 192 and 256 bit keys respectively.
+These ciphers require additional control operations to function correctly: see
+L<GCM mode> section below for details.
+
+=item EVP_aes_128_ccm(void), EVP_aes_192_ccm(void), EVP_aes_256_ccm(void)
+
+AES Counter with CBC-MAC Mode (CCM) for 128, 192 and 256 bit keys respectively.
+These ciphers require additional control operations to function correctly: see
+CCM mode section below for details.
+
 =back
 
+=head1 GCM Mode
+
+For GCM mode ciphers the behaviour of the EVP interface is subtly altered and
+several GCM specific ctrl operations are supported.
+
+To specify any additional authenticated data (AAD) a call to EVP_CipherUpdate(),
+EVP_EncryptUpdate() or EVP_DecryptUpdate() should be made with the output 
+parameter B<out> set to B<NULL>.
+
+When decrypting the return value of EVP_DecryptFinal() or EVP_CipherFinal()
+indicates if the operation was successful. If it does not indicate success
+the authentication operation has failed and any output data B<MUST NOT>
+be used as it is corrupted.
+
+The following ctrls are supported in GCM mode:
+
+ EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_GCM_SET_IVLEN, ivlen, NULL);
+
+Sets the GCM IV length: this call can only be made before specifying an IV. If
+not called a default IV length is used (96 bits for AES).
+ EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_GCM_GET_TAG, taglen, tag);
+
+Writes B<taglen> bytes of the tag value to the buffer indicated by B<tag>.
+This call can only be made when encrypting data and B<after> all data has been
+processed (e.g. after an EVP_EncryptFinal() call).
+
+ EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_GCM_SET_TAG, taglen, tag);
+
+Sets the expected tag to B<taglen> bytes from B<tag>. This call is only legal
+when decrypting data and must be made B<before> any data is processed (e.g.
+before any EVP_DecryptUpdate() call). 
+
+See L<EXAMPLES> below for an example of the use of GCM mode.
+
+=head1 CCM Mode
+
+The behaviour of CCM mode ciphers is similar to CCM mode but with a few
+additional requirements and different ctrl values.
+
+Like GCM mode any additional authenticated data (AAD) is passed by calling
+EVP_CipherUpdate(), EVP_EncryptUpdate() or EVP_DecryptUpdate() with the output 
+parameter B<out> set to B<NULL>. Additionally the total plaintext or ciphertext
+length B<MUST> be passed to EVP_CipherUpdate(), EVP_EncryptUpdate() or
+EVP_DecryptUpdate() with the output and input parameters (B<in> and B<out>) 
+set to B<NULL> and the length passed in the B<inl> parameter.
+
+The following ctrls are supported in CCM mode:
+ EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_CCM_SET_TAG, taglen, tag);
+
+This call is made to set the expected B<CCM> tag value when decrypting or
+the length of the tag (with the B<tag> parameter set to NULL) when encrypting.
+The tag length is often referred to as B<M>. If not set a default value is
+used (12 for AES).
+
+ EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_CCM_SET_L, ivlen, NULL);
+
+Sets the CCM B<L> value. If not set a default is used (8 for AES).
+
+ EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_CCM_SET_IVLEN, ivlen, NULL);
+
+Sets the CCM nonce (IV) length: this call can only be made before specifying
+an nonce value. The nonce length is given by B<15 - L> so it is 7 by default
+for AES.
+
+
+
 =head1 NOTES
 
 Where possible the B<EVP> interface to symmetric ciphers should be used in
index 13b91f1..44b5fdb 100644 (file)
@@ -2,7 +2,13 @@
 
 =head1 NAME
 
-EVP_PKEY_ctrl, EVP_PKEY_ctrl_str - algorithm specific control operations
+EVP_PKEY_CTX_ctrl, EVP_PKEY_CTX_ctrl_str, EVP_PKEY_get_default_digest_nid,
+EVP_PKEY_CTX_set_signature_md, EVP_PKEY_CTX_set_rsa_padding,
+EVP_PKEY_CTX_set_rsa_pss_saltlen, EVP_PKEY_CTX_set_rsa_rsa_keygen_bits,
+EVP_PKEY_CTX_set_rsa_keygen_pubexp, EVP_PKEY_CTX_set_dsa_paramgen_bits,
+EVP_PKEY_CTX_set_dh_paramgen_prime_len,
+EVP_PKEY_CTX_set_dh_paramgen_generator,
+EVP_PKEY_CTX_set_ec_paramgen_curve_nid - algorithm specific control operations
 
 =head1 SYNOPSIS
 
@@ -45,7 +51,7 @@ B<p1> and B<p2>.
 Applications will not normally call EVP_PKEY_CTX_ctrl() directly but will
 instead call one of the algorithm specific macros below.
 
-The function EVP_PKEY_ctrl_str() allows an application to send an algorithm
+The function EVP_PKEY_CTX_ctrl_str() allows an application to send an algorithm
 specific control operation to a context B<ctx> in string form. This is
 intended to be used for options specified on the command line or in text
 files. The commands supported are documented in the openssl utility
index 4f8185e..0ff027c 100644 (file)
@@ -23,10 +23,10 @@ doesn't use parameters.
 The function EVP_PKEY_copy_parameters() copies the parameters from key
 B<from> to key B<to>.
 
-The funcion EVP_PKEY_cmp_parameters() compares the parameters of keys
+The function EVP_PKEY_cmp_parameters() compares the parameters of keys
 B<a> and B<b>.
 
-The funcion EVP_PKEY_cmp() compares the public key components and paramters
+The function EVP_PKEY_cmp() compares the public key components and paramters
 (if present) of keys B<a> and B<b>.
 
 =head1 NOTES
index c39ac35..f7ca7cb 100644 (file)
@@ -17,7 +17,7 @@ OPENSSL_VERSION_NUMBER, SSLeay, SSLeay_version - get OpenSSL version number
 
 OPENSSL_VERSION_NUMBER is a numeric release version identifier:
 
- MMNNFFPPS: major minor fix patch status
+ MNNFFPPS: major minor fix patch status
 
 The status nibble has one of the values 0 for development, 1 to e for betas
 1 to 14, and f for release.
index 5096fac..2d25b26 100644 (file)
@@ -48,17 +48,6 @@ configuration file.
 Applications should free up configuration at application closedown by calling
 CONF_modules_free().
 
-=head1 RESTRICTIONS
-
-The OPENSSL_config() function is designed to be a very simple "call it and
-forget it" function. As a result its behaviour is somewhat limited. It ignores
-all errors silently and it can only load from the standard configuration file
-location for example.
-
-It is however B<much> better than nothing. Applications which need finer
-control over their configuration functionality should use the configuration
-functions such as CONF_load_modules() directly.
-
 =head1 RETURN VALUES
 
 Neither OPENSSL_config() nor OPENSSL_no_config() return a value.
index 2e659d3..90156d2 100644 (file)
@@ -2,42 +2,95 @@
 
 =head1 NAME
 
-OPENSSL_ia32cap - finding the IA-32 processor capabilities
+OPENSSL_ia32cap, OPENSSL_ia32cap_loc - the IA-32 processor capabilities vector
 
 =head1 SYNOPSIS
 
- unsigned long *OPENSSL_ia32cap_loc(void);
- #define OPENSSL_ia32cap (*(OPENSSL_ia32cap_loc()))
+ unsigned int *OPENSSL_ia32cap_loc(void);
+ #define OPENSSL_ia32cap ((OPENSSL_ia32cap_loc())[0])
 
 =head1 DESCRIPTION
 
 Value returned by OPENSSL_ia32cap_loc() is address of a variable
-containing IA-32 processor capabilities bit vector as it appears in EDX
-register after executing CPUID instruction with EAX=1 input value (see
-Intel Application Note #241618). Naturally it's meaningful on IA-32[E]
-platforms only. The variable is normally set up automatically upon
-toolkit initialization, but can be manipulated afterwards to modify
-crypto library behaviour. For the moment of this writing six bits are
-significant, namely:
-
-1. bit #28 denoting Hyperthreading, which is used to distiguish
-   cores with shared cache;
-2. bit #26 denoting SSE2 support;
-3. bit #25 denoting SSE support;
-4. bit #23 denoting MMX support;
-5. bit #20, reserved by Intel, is used to choose between RC4 code
-   pathes;
-6. bit #4 denoting presence of Time-Stamp Counter.
+containing IA-32 processor capabilities bit vector as it appears in
+EDX:ECX register pair after executing CPUID instruction with EAX=1
+input value (see Intel Application Note #241618). Naturally it's
+meaningful on x86 and x86_64 platforms only. The variable is normally
+set up automatically upon toolkit initialization, but can be
+manipulated afterwards to modify crypto library behaviour. For the
+moment of this writing following bits are significant:
+
+=over
+
+=item bit #4 denoting presence of Time-Stamp Counter.
+
+=item bit #19 denoting availability of CLFLUSH instruction;
+
+=item bit #20, reserved by Intel, is used to choose among RC4 code paths;
+
+=item bit #23 denoting MMX support;
+
+=item bit #24, FXSR bit, denoting availability of XMM registers;
+
+=item bit #25 denoting SSE support;
+
+=item bit #26 denoting SSE2 support;
+
+=item bit #28 denoting Hyperthreading, which is used to distinguish
+cores with shared cache;
+
+=item bit #30, reserved by Intel, denotes specifically Intel CPUs;
+
+=item bit #33 denoting availability of PCLMULQDQ instruction;
+
+=item bit #41 denoting SSSE3, Supplemental SSE3, support;
+
+=item bit #43 denoting AMD XOP support (forced to zero on non-AMD CPUs);
+
+=item bit #57 denoting AES-NI instruction set extension;
+
+=item bit #59, OSXSAVE bit, denoting availability of YMM registers;
+
+=item bit #60 denoting AVX extension;
+
+=item bit #62 denoting availability of RDRAND instruction;
+
+=back
 
 For example, clearing bit #26 at run-time disables high-performance
-SSE2 code present in the crypto library. You might have to do this if
-target OpenSSL application is executed on SSE2 capable CPU, but under
-control of OS which does not support SSE2 extentions. Even though you
-can manipulate the value programmatically, you most likely will find it
-more appropriate to set up an environment variable with the same name
-prior starting target application, e.g. on Intel P4 processor 'env
-OPENSSL_ia32cap=0x12900010 apps/openssl', to achieve same effect
-without modifying the application source code. Alternatively you can
-reconfigure the toolkit with no-sse2 option and recompile.
-
-=cut
+SSE2 code present in the crypto library, while clearing bit #24
+disables SSE2 code operating on 128-bit XMM register bank. You might
+have to do the latter if target OpenSSL application is executed on SSE2
+capable CPU, but under control of OS that does not enable XMM
+registers. Even though you can manipulate the value programmatically,
+you most likely will find it more appropriate to set up an environment
+variable with the same name prior starting target application, e.g. on
+Intel P4 processor 'env OPENSSL_ia32cap=0x16980010 apps/openssl', or
+better yet 'env OPENSSL_ia32cap=~0x1000000 apps/openssl' to achieve same
+effect without modifying the application source code. Alternatively you
+can reconfigure the toolkit with no-sse2 option and recompile.
+
+Less intuitive is clearing bit #28. The truth is that it's not copied
+from CPUID output verbatim, but is adjusted to reflect whether or not
+the data cache is actually shared between logical cores. This in turn
+affects the decision on whether or not expensive countermeasures
+against cache-timing attacks are applied, most notably in AES assembler
+module.
+
+The vector is further extended with EBX value returned by CPUID with
+EAX=7 and ECX=0 as input. Following bits are significant:
+
+=over
+
+=item bit #64+3 denoting availability of BMI1 instructions, e.g. ANDN;
+
+=item bit #64+5 denoting availability of AVX2 instructions;
+
+=item bit #64+8 denoting availability of BMI2 instructions, e.g. MUXL
+and RORX;
+
+=item bit #64+18 denoting availability of RDSEED instruction;
+
+=item bit #64+19 denoting availability of ADCX and ADOX instructions;
+
+=back
diff --git a/doc/crypto/OPENSSL_instrument_bus.pod b/doc/crypto/OPENSSL_instrument_bus.pod
new file mode 100644 (file)
index 0000000..4ed83e4
--- /dev/null
@@ -0,0 +1,42 @@
+=pod
+
+=head1 NAME
+
+OPENSSL_instrument_bus, OPENSSL_instrument_bus2 - instrument references to memory bus
+
+=head1 SYNOPSIS
+
+ #ifdef OPENSSL_CPUID_OBJ
+ size_t OPENSSL_instrument_bus (int *vector,size_t num);
+ size_t OPENSSL_instrument_bus2(int *vector,size_t num,size_t max);
+ #endif
+
+=head1 DESCRIPTION
+
+It was empirically found that timings of references to primary memory
+are subject to irregular, apparently non-deterministic variations. The
+subroutines in question instrument these references for purposes of
+gathering entropy for random number generator. In order to make it
+bus-bound a 'flush cache line' instruction is used between probes. In
+addition probes are added to B<vector> elements in atomic or
+interlocked manner, which should contribute additional noise on
+multi-processor systems. This also means that B<vector[num]> should be
+zeroed upon invocation (if you want to retrieve actual probe values).
+
+OPENSSL_instrument_bus performs B<num> probes and records the number of
+oscillator cycles every probe took.
+
+OPENSSL_instrument_bus2 on the other hand B<accumulates> consecutive
+probes with the same value, i.e. in a way it records duration of
+periods when probe values appeared deterministic. The subroutine
+performs at most B<max> probes in attempt to fill the B<vector[num]>,
+with B<max> value of 0 meaning "as many as it takes."
+
+=head1 RETURN VALUE
+
+Return value of 0 indicates that CPU is not capable of performing the
+benchmark, either because oscillator counter or 'flush cache line' is
+not available on current platform. For reference, on x86 'flush cache
+line' was introduced with the SSE2 extensions.
+
+Otherwise number of recorded values is returned.
index f14dfaf..de62912 100644 (file)
@@ -2,7 +2,7 @@
 
 =head1 NAME
 
-OPENSSL_load_builtin_modules - add standard configuration modules
+OPENSSL_load_builtin_modules, ASN1_add_oid_module, ENGINE_add_conf_module - add standard configuration modules
 
 =head1 SYNOPSIS
 
index e63411b..bcb79e5 100644 (file)
@@ -2,7 +2,7 @@
 
 =head1 NAME
 
-OpenSSL_add_all_algorithms, OpenSSL_add_all_ciphers, OpenSSL_add_all_digests -
+OpenSSL_add_all_algorithms, OpenSSL_add_all_ciphers, OpenSSL_add_all_digests, EVP_cleanup -
 add algorithms to internal table
 
 =head1 SYNOPSIS
index 7c10a4c..f083306 100644 (file)
@@ -2,7 +2,7 @@
 
 =head1 NAME
 
-PKCS7_verify - verify a PKCS#7 signedData structure
+PKCS7_verify, PKCS7_get0_signers - verify a PKCS#7 signedData structure
 
 =head1 SYNOPSIS
 
@@ -91,8 +91,8 @@ timestamp).
 
 =head1 RETURN VALUES
 
-PKCS7_verify() returns 1 for a successful verification and zero or a negative
-value if an error occurs.
+PKCS7_verify() returns one for a successful verification and zero
+if an error occurs.
 
 PKCS7_get0_signers() returns all signers or B<NULL> if an error occurred.
 
index 8b8c61d..80fa734 100644 (file)
@@ -2,7 +2,7 @@
 
 =head1 NAME
 
-RAND_egd - query entropy gathering daemon
+RAND_egd, RAND_egd_bytes, RAND_query_egd_bytes - query entropy gathering daemon
 
 =head1 SYNOPSIS
 
index 52dbb14..881391a 100644 (file)
@@ -2,28 +2,33 @@
 
 =head1 NAME
 
-RSA_generate_key - generate RSA key pair
+RSA_generate_key_ex, RSA_generate_key - generate RSA key pair
 
 =head1 SYNOPSIS
 
  #include <openssl/rsa.h>
 
+ int RSA_generate_key_ex(RSA *rsa, int bits, BIGNUM *e, BN_GENCB *cb);
+
+Deprecated:
+
  RSA *RSA_generate_key(int num, unsigned long e,
     void (*callback)(int,int,void *), void *cb_arg);
 
 =head1 DESCRIPTION
 
-RSA_generate_key() generates a key pair and returns it in a newly
-allocated B<RSA> structure. The pseudo-random number generator must
-be seeded prior to calling RSA_generate_key().
+RSA_generate_key_ex() generates a key pair and stores it in the B<RSA>
+structure provided in B<rsa>. The pseudo-random number generator must
+be seeded prior to calling RSA_generate_key_ex().
 
-The modulus size will be B<num> bits, and the public exponent will be
+The modulus size will be of length B<bits>, and the public exponent will be
 B<e>. Key sizes with B<num> E<lt> 1024 should be considered insecure.
 The exponent is an odd number, typically 3, 17 or 65537.
 
 A callback function may be used to provide feedback about the
-progress of the key generation. If B<callback> is not B<NULL>, it
-will be called as follows:
+progress of the key generation. If B<cb> is not B<NULL>, it
+will be called as follows using the BN_GENCB_call() function
+described on the L<BN_generate_prime(3)|BN_generate_prime(3)> page.
 
 =over 4
 
@@ -35,32 +40,38 @@ described in L<BN_generate_prime(3)|BN_generate_prime(3)>.
 =item *
 
 When the n-th randomly generated prime is rejected as not
-suitable for the key, B<callback(2, n, cb_arg)> is called.
+suitable for the key, B<BN_GENCB_call(cb, 2, n)> is called.
 
 =item *
 
 When a random p has been found with p-1 relatively prime to B<e>,
-it is called as B<callback(3, 0, cb_arg)>.
+it is called as B<BN_GENCB_call(cb, 3, 0)>.
 
 =back
 
-The process is then repeated for prime q with B<callback(3, 1, cb_arg)>.
+The process is then repeated for prime q with B<BN_GENCB_call(cb, 3, 1)>.
+
+RSA_generate_key is deprecated (new applications should use
+RSA_generate_key_ex instead). RSA_generate_key works in the same was as
+RSA_generate_key_ex except it uses "old style" call backs. See
+L<BN_generate_prime(3)|BN_generate_prime(3)> for further details.
 
 =head1 RETURN VALUE
 
-If key generation fails, RSA_generate_key() returns B<NULL>; the
-error codes can be obtained by L<ERR_get_error(3)|ERR_get_error(3)>.
+If key generation fails, RSA_generate_key() returns B<NULL>.
+
+The error codes can be obtained by L<ERR_get_error(3)|ERR_get_error(3)>.
 
 =head1 BUGS
 
-B<callback(2, x, cb_arg)> is used with two different meanings.
+B<BN_GENCB_call(cb, 2, x)> is used with two different meanings.
 
 RSA_generate_key() goes into an infinite loop for illegal input values.
 
 =head1 SEE ALSO
 
 L<ERR_get_error(3)|ERR_get_error(3)>, L<rand(3)|rand(3)>, L<rsa(3)|rsa(3)>,
-L<RSA_free(3)|RSA_free(3)>
+L<RSA_free(3)|RSA_free(3)>, L<BN_generate_prime(3)|BN_generate_prime(3)>
 
 =head1 HISTORY
 
diff --git a/doc/crypto/SSLeay_version.pod b/doc/crypto/SSLeay_version.pod
new file mode 100644 (file)
index 0000000..1500c2a
--- /dev/null
@@ -0,0 +1,74 @@
+=pod
+
+=head1 NAME
+
+SSLeay_version - retrieve version/build information about OpenSSL library
+
+=head1 SYNOPSIS
+
+ #include <openssl/crypto.h>
+
+ const char *SSLeay_version(int type);
+
+=head1 DESCRIPTION
+
+SSLeay_version() returns a pointer to a constant string describing the
+version of the OpenSSL library or giving information about the library
+build.
+
+The following B<type> values are supported:
+
+=over 4
+
+=item SSLEAY_VERSION
+
+The version of the OpenSSL library including the release date.
+
+=item SSLEAY_CFLAGS
+
+The compiler flags set for the compilation process in the form
+"compiler: ..."  if available or "compiler: information not available"
+otherwise.
+
+=item SSLEAY_BUILT_ON
+
+The date of the build process in the form "built on: ..." if available
+or "built on: date not available" otherwise.
+
+=item SSLEAY_PLATFORM
+
+The "Configure" target of the library build in the form "platform: ..."
+if available or "platform: information not available" otherwise.
+
+=item SSLEAY_DIR
+
+The "OPENSSLDIR" setting of the library build in the form "OPENSSLDIR: "...""
+if available or "OPENSSLDIR: N/A" otherwise.
+
+=back
+
+=head1 RETURN VALUES
+
+The following return values can occur:
+
+=over 4
+
+=item "not available"
+
+An invalid value for B<type> was given.
+
+=item Pointer to constant string
+
+Textual description.
+
+=back
+
+=head1 SEE ALSO
+
+L<crypto(3)|crypto(3)>
+
+=head1 HISTORY
+
+B<SSLEAY_DIR> was added in OpenSSL 0.9.7.
+
+=cut
index 043766c..3bdc07f 100644 (file)
@@ -44,7 +44,7 @@ B<loc>. The deleted entry is returned and must be freed up.
 =head1 NOTES
 
 The use of string types such as B<MBSTRING_ASC> or B<MBSTRING_UTF8>
-is strongly recommened for the B<type> parameter. This allows the
+is strongly recommended for the B<type> parameter. This allows the
 internal code to correctly determine the type of the field and to
 apply length checks according to the relevant standards. This is
 done using ASN1_STRING_set_by_NID().
index 60e8332..be00ff1 100644 (file)
@@ -32,7 +32,7 @@ checks.
 
 X509_STORE_CTX_get_error_depth() returns the B<depth> of the error. This is a
 non-negative integer representing where in the certificate chain the error
-occurred. If it is zero it occured in the end entity certificate, one if
+occurred. If it is zero it occurred in the end entity certificate, one if
 it is the certificate which signed the end entity certificate and so on.
 
 X509_STORE_CTX_get_current_cert() returns the certificate in B<ctx> which
@@ -246,11 +246,11 @@ Some feature of a certificate extension is not supported. Unused.
 
 =item B<X509_V_ERR_PERMITTED_VIOLATION: permitted subtree violation>
 
-A name constraint violation occured in the permitted subtrees.
+A name constraint violation occurred in the permitted subtrees.
 
 =item B<X509_V_ERR_EXCLUDED_VIOLATION: excluded subtree violation>
 
-A name constraint violation occured in the excluded subtrees.
+A name constraint violation occurred in the excluded subtrees.
 
 =item B<X509_V_ERR_SUBTREE_MINMAX: name constraints minimum and maximum not supported>
 
@@ -270,7 +270,7 @@ a garbage extension or some new feature not currently supported.
 
 =item B<X509_V_ERR_CRL_PATH_VALIDATION_ERROR: CRL path validation error>
 
-An error occured when attempting to verify the CRL path. This error can only
+An error occurred when attempting to verify the CRL path. This error can only
 happen if extended CRL checking is enabled.
 
 =item B<X509_V_ERR_APPLICATION_VERIFICATION: application verification failure>
index 9d86d46..44792f9 100644 (file)
@@ -2,7 +2,7 @@
 
 =head1 NAME
 
-X509_VERIFY_PARAM_set_flags, X509_VERIFY_PARAM_clear_flags, X509_VERIFY_PARAM_get_flags, X509_VERIFY_PARAM_set_purpose, X509_VERIFY_PARAM_set_trust, X509_VERIFY_PARAM_set_depth, X509_VERIFY_PARAM_get_depth, X509_VERIFY_PARAM_set_time, X509_VERIFY_PARAM_add0_policy, X509_VERIFY_PARAM_set1_policies - X509 verification parameters 
+X509_VERIFY_PARAM_set_flags, X509_VERIFY_PARAM_clear_flags, X509_VERIFY_PARAM_get_flags, X509_VERIFY_PARAM_set_purpose, X509_VERIFY_PARAM_set_trust, X509_VERIFY_PARAM_set_depth, X509_VERIFY_PARAM_get_depth, X509_VERIFY_PARAM_set_time, X509_VERIFY_PARAM_add0_policy, X509_VERIFY_PARAM_set1_policies, X509_VERIFY_PARAM_set1_host, X509_VERIFY_PARAM_add1_host, X509_VERIFY_PARAM_set_hostflags, X509_VERIFY_PARAM_get0_peername, X509_VERIFY_PARAM_set1_email, X509_VERIFY_PARAM_set1_ip, X509_VERIFY_PARAM_set1_ip_asc - X509 verification parameters
 
 =head1 SYNOPSIS
 
@@ -26,6 +26,19 @@ X509_VERIFY_PARAM_set_flags, X509_VERIFY_PARAM_clear_flags, X509_VERIFY_PARAM_ge
  void X509_VERIFY_PARAM_set_depth(X509_VERIFY_PARAM *param, int depth);
  int X509_VERIFY_PARAM_get_depth(const X509_VERIFY_PARAM *param);
 
+ int X509_VERIFY_PARAM_set1_host(X509_VERIFY_PARAM *param,
+                                const char *name, size_t namelen);
+ int X509_VERIFY_PARAM_add1_host(X509_VERIFY_PARAM *param,
+                                 const char *name, size_t namelen);
+ void X509_VERIFY_PARAM_set_hostflags(X509_VERIFY_PARAM *param,
+                                     unsigned int flags);
+ char *X509_VERIFY_PARAM_get0_peername(X509_VERIFY_PARAM *param);
+ int X509_VERIFY_PARAM_set1_email(X509_VERIFY_PARAM *param,
+                                const char *email, size_t emaillen);
+ int X509_VERIFY_PARAM_set1_ip(X509_VERIFY_PARAM *param,
+                              const unsigned char *ip, size_t iplen);
+ int X509_VERIFY_PARAM_set1_ip_asc(X509_VERIFY_PARAM *param, const char *ipasc);
+
 =head1 DESCRIPTION
 
 These functions manipulate the B<X509_VERIFY_PARAM> structure associated with
@@ -61,12 +74,63 @@ X509_VERIFY_PARAM_set_depth() sets the maximum verification depth to B<depth>.
 That is the maximum number of untrusted CA certificates that can appear in a
 chain.
 
+X509_VERIFY_PARAM_set1_host() sets the expected DNS hostname to
+B<name> clearing any previously specified host name or names.  If
+B<name> is NULL, or empty the list of hostnames is cleared, and
+name checks are not performed on the peer certificate.  If B<name>
+is NUL-terminated, B<namelen> may be zero, otherwise B<namelen>
+must be set to the length of B<name>.  When a hostname is specified,
+certificate verification automatically invokes L<X509_check_host(3)>
+with flags equal to the B<flags> argument given to
+B<X509_VERIFY_PARAM_set_hostflags()> (default zero).  Applications
+are strongly advised to use this interface in preference to explicitly
+calling L<X509_check_host(3)>, hostname checks are out of scope
+with the DANE-EE(3) certificate usage, and the internal check will
+be suppressed as appropriate when DANE support is added to OpenSSL.
+
+X509_VERIFY_PARAM_add1_host() adds B<name> as an additional reference
+identifer that can match the peer's certificate.  Any previous names
+set via X509_VERIFY_PARAM_set1_host() or X509_VERIFY_PARAM_add1_host()
+are retained, no change is made if B<name> is NULL or empty.  When
+multiple names are configured, the peer is considered verified when
+any name matches.
+
+X509_VERIFY_PARAM_get0_peername() returns the DNS hostname or subject
+CommonName from the peer certificate that matched one of the reference
+identifiers.  When wildcard matching is not disabled, or when a
+reference identifier specifies a parent domain (starts with ".")
+rather than a hostname, the peer name may be a wildcard name or a
+sub-domain of the reference identifier respectively.  The return
+string is allocated by the library and is no longer valid once the
+associated B<param> argument is freed.  Applications must not free
+the return value.
+
+X509_VERIFY_PARAM_set1_email() sets the expected RFC822 email address to
+B<email>.  If B<email> is NUL-terminated, B<emaillen> may be zero, otherwise
+B<emaillen> must be set to the length of B<email>.  When an email address
+is specified, certificate verification automatically invokes
+L<X509_check_email(3)>.
+
+X509_VERIFY_PARAM_set1_ip() sets the expected IP address to B<ip>.
+The B<ip> argument is in binary format, in network byte-order and
+B<iplen> must be set to 4 for IPv4 and 16 for IPv6.  When an IP
+address is specified, certificate verification automatically invokes
+L<X509_check_ip(3)>.
+
+X509_VERIFY_PARAM_set1_ip_asc() sets the expected IP address to
+B<ipasc>.  The B<ipasc> argument is a NUL-terminal ASCII string:
+dotted decimal quad for IPv4 and colon-separated hexadecimal for
+IPv6.  The condensed "::" notation is supported for IPv6 addresses.
+
 =head1 RETURN VALUES
 
-X509_VERIFY_PARAM_set_flags(), X509_VERIFY_PARAM_clear_flags(), 
+X509_VERIFY_PARAM_set_flags(), X509_VERIFY_PARAM_clear_flags(),
 X509_VERIFY_PARAM_set_purpose(), X509_VERIFY_PARAM_set_trust(),
-X509_VERIFY_PARAM_add0_policy() and X509_VERIFY_PARAM_set1_policies() return 1
-for success and 0 for failure. 
+X509_VERIFY_PARAM_add0_policy() X509_VERIFY_PARAM_set1_policies(),
+X509_VERIFY_PARAM_set1_host(), X509_VERIFY_PARAM_set_hostflags(),
+X509_VERIFY_PARAM_set1_email(), X509_VERIFY_PARAM_set1_ip() and
+X509_VERIFY_PARAM_set1_ip_asc() return 1 for success and 0 for
+failure.
 
 X509_VERIFY_PARAM_get_flags() returns the current verification flags.
 
@@ -137,7 +201,7 @@ The B<X509_V_FLAG_NO_ALT_CHAINS> flag suppresses checking for alternative
 chains. By default, when building a certificate chain, if the first certificate
 chain found is not trusted, then OpenSSL will continue to check to see if an
 alternative chain can be found that is trusted. With this flag set the behaviour
-will match that of OpenSSL versions prior to 1.0.1n and 1.0.2b.
+will match that of OpenSSL versions prior to 1.0.2b.
 
 =head1 NOTES
 
@@ -168,10 +232,13 @@ connections associated with an B<SSL_CTX> structure B<ctx>:
 
 =head1 SEE ALSO
 
-L<X509_verify_cert(3)|X509_verify_cert(3)>
+L<X509_verify_cert(3)|X509_verify_cert(3)>,
+L<X509_check_host(3)|X509_check_host(3)>,
+L<X509_check_email(3)|X509_check_email(3)>,
+L<X509_check_ip(3)|X509_check_ip(3)>
 
 =head1 HISTORY
 
-The B<X509_V_FLAG_NO_ALT_CHAINS> flag was added in OpenSSL 1.0.1n and 1.0.2b
+The B<X509_V_FLAG_NO_ALT_CHAINS> flag was added in OpenSSL 1.0.2b
 
 =cut
diff --git a/doc/crypto/X509_check_host.pod b/doc/crypto/X509_check_host.pod
new file mode 100644 (file)
index 0000000..0def17a
--- /dev/null
@@ -0,0 +1,140 @@
+=pod
+
+=head1 NAME
+
+X509_check_host, X509_check_email, X509_check_ip, X509_check_ip_asc - X.509 certificate matching
+
+=head1 SYNOPSIS
+
+ #include <openssl/x509.h>
+
+ int X509_check_host(X509 *, const char *name, size_t namelen,
+                    unsigned int flags, char **peername);
+ int X509_check_email(X509 *, const char *address, size_t addresslen,
+                     unsigned int flags);
+ int X509_check_ip(X509 *, const unsigned char *address, size_t addresslen,
+                  unsigned int flags);
+ int X509_check_ip_asc(X509 *, const char *address, unsigned int flags);
+
+=head1 DESCRIPTION
+
+The certificate matching functions are used to check whether a
+certificate matches a given host name, email address, or IP address.
+The validity of the certificate and its trust level has to be checked by
+other means.
+
+X509_check_host() checks if the certificate Subject Alternative
+Name (SAN) or Subject CommonName (CN) matches the specified host
+name, which must be encoded in the preferred name syntax described
+in section 3.5 of RFC 1034.  By default, wildcards are supported
+and they match  only in the left-most label; but they may match
+part of that label with an explicit prefix or suffix.  For example,
+by default, the host B<name> "www.example.com" would match a
+certificate with a SAN or CN value of "*.example.com", "w*.example.com"
+or "*w.example.com".
+
+Per section 6.4.2 of RFC 6125, B<name> values representing international
+domain names must be given in A-label form.  The B<namelen> argument
+must be the number of characters in the name string or zero in which
+case the length is calculated with strlen(B<name>).  When B<name> starts
+with a dot (e.g ".example.com"), it will be matched by a certificate
+valid for any sub-domain of B<name>, (see also
+B<X509_CHECK_FLAG_SINGLE_LABEL_SUBDOMAINS> below).
+
+When the certificate is matched, and B<peername> is not NULL, a
+pointer to a copy of the matching SAN or CN from the peer certificate
+is stored at the address passed in B<peername>.  The application
+is responsible for freeing the peername via OPENSSL_free() when it
+is no longer needed.
+
+X509_check_email() checks if the certificate matches the specified
+email B<address>.  Only the mailbox syntax of RFC 822 is supported,
+comments are not allowed, and no attempt is made to normalize quoted
+characters.  The B<addresslen> argument must be the number of
+characters in the address string or zero in which case the length
+is calculated with strlen(B<address>).
+
+X509_check_ip() checks if the certificate matches a specified IPv4 or
+IPv6 address.  The B<address> array is in binary format, in network
+byte order.  The length is either 4 (IPv4) or 16 (IPv6).  Only
+explicitly marked addresses in the certificates are considered; IP
+addresses stored in DNS names and Common Names are ignored.
+
+X509_check_ip_asc() is similar, except that the NUL-terminated
+string B<address> is first converted to the internal representation.
+
+The B<flags> argument is usually 0.  It can be the bitwise OR of the
+flags:
+
+=over 4
+
+=item B<X509_CHECK_FLAG_ALWAYS_CHECK_SUBJECT>,
+
+=item B<X509_CHECK_FLAG_NO_WILDCARDS>,
+
+=item B<X509_CHECK_FLAG_NO_PARTIAL_WILDCARDS>,
+
+=item B<X509_CHECK_FLAG_MULTI_LABEL_WILDCARDS>.
+
+=item B<X509_CHECK_FLAG_SINGLE_LABEL_SUBDOMAINS>.
+
+=back
+
+The B<X509_CHECK_FLAG_ALWAYS_CHECK_SUBJECT> flag causes the function
+to consider the subject DN even if the certificate contains at least
+one subject alternative name of the right type (DNS name or email
+address as appropriate); the default is to ignore the subject DN
+when at least one corresponding subject alternative names is present.
+
+If set, B<X509_CHECK_FLAG_NO_WILDCARDS> disables wildcard
+expansion; this only applies to B<X509_check_host>.
+
+If set, B<X509_CHECK_FLAG_NO_PARTIAL_WILDCARDS> suppresses support
+for "*" as wildcard pattern in labels that have a prefix or suffix,
+such as: "www*" or "*www"; this only aplies to B<X509_check_host>.
+
+If set, B<X509_CHECK_FLAG_MULTI_LABEL_WILDCARDS> allows a "*" that
+constitutes the complete label of a DNS name (e.g. "*.example.com")
+to match more than one label in B<name>; this flag only applies
+to B<X509_check_host>.
+
+If set, B<X509_CHECK_FLAG_SINGLE_LABEL_SUBDOMAINS> restricts B<name>
+values which start with ".", that would otherwise match any sub-domain
+in the peer certificate, to only match direct child sub-domains.
+Thus, for instance, with this flag set a B<name> of ".example.com"
+would match a peer certificate with a DNS name of "www.example.com",
+but would not match a peer certificate with a DNS name of
+"www.sub.example.com"; this flag only applies to B<X509_check_host>.
+
+=head1 RETURN VALUES
+
+The functions return 1 for a successful match, 0 for a failed match
+and -1 for an internal error: typically a memory allocation failure
+or an ASN.1 decoding error.
+
+All functions can also return -2 if the input is malformed. For example,
+X509_check_host() returns -2 if the provided B<name> contains embedded
+NULs.
+
+=head1 NOTES
+
+Applications are encouraged to use X509_VERIFY_PARAM_set1_host()
+rather than explicitly calling L<X509_check_host(3)>. Host name
+checks are out of scope with the DANE-EE(3) certificate usage,
+and the internal checks will be suppressed as appropriate when
+DANE support is added to OpenSSL.
+
+=head1 SEE ALSO
+
+L<SSL_get_verify_result(3)|SSL_get_verify_result(3)>,
+L<X509_VERIFY_PARAM_set1_host(3)|X509_VERIFY_PARAM_set1_host(3)>,
+L<X509_VERIFY_PARAM_add1_host(3)|X509_VERIFY_PARAM_add1_host(3)>,
+L<X509_VERIFY_PARAM_set1_email(3)|X509_VERIFY_PARAM_set1_email(3)>,
+L<X509_VERIFY_PARAM_set1_ip(3)|X509_VERIFY_PARAM_set1_ip(3)>,
+L<X509_VERIFY_PARAM_set1_ipasc(3)|X509_VERIFY_PARAM_set1_ipasc(3)>
+
+=head1 HISTORY
+
+These functions were added in OpenSSL 1.1.0.
+
+=cut
index 7a52799..f18edfe 100644 (file)
@@ -56,7 +56,7 @@ L<pkcs7(3)|pkcs7(3)>, L<pkcs12(3)|pkcs12(3)>
 
 =item INTERNAL FUNCTIONS
 
-L<bn(3)|bn(3)>, L<buffer(3)|buffer(3)>, L<lhash(3)|lhash(3)>,
+L<bn(3)|bn(3)>, L<buffer(3)|buffer(3)>, L<ec(3)|ec(3)>, L<lhash(3)|lhash(3)>,
 L<objects(3)|objects(3)>, L<stack(3)|stack(3)>,
 L<txt_db(3)|txt_db(3)> 
 
index 22c1b50..e999376 100644 (file)
@@ -3,7 +3,7 @@
 =head1 NAME
 
 d2i_DSAPublicKey, i2d_DSAPublicKey, d2i_DSAPrivateKey, i2d_DSAPrivateKey,
-d2i_DSA_PUBKEY, i2d_DSA_PUBKEY, d2i_DSA_SIG, i2d_DSA_SIG - DSA key encoding
+d2i_DSA_PUBKEY, i2d_DSA_PUBKEY, d2i_DSAparams, i2d_DSAparams, d2i_DSA_SIG, i2d_DSA_SIG - DSA key encoding
 and parsing functions.
 
 =head1 SYNOPSIS
diff --git a/doc/crypto/d2i_ECPKParameters.pod b/doc/crypto/d2i_ECPKParameters.pod
new file mode 100644 (file)
index 0000000..704b4ab
--- /dev/null
@@ -0,0 +1,84 @@
+=pod
+
+=head1 NAME
+
+d2i_ECPKParameters, i2d_ECPKParameters, d2i_ECPKParameters_bio, i2d_ECPKParameters_bio, d2i_ECPKParameters_fp, i2d_ECPKParameters_fp, ECPKParameters_print, ECPKParameters_print_fp - Functions for decoding and encoding ASN1 representations of elliptic curve entities
+
+=head1 SYNOPSIS
+
+ #include <openssl/ec.h>
+
+ EC_GROUP *d2i_ECPKParameters(EC_GROUP **px, const unsigned char **in, long len);
+ int i2d_ECPKParameters(const EC_GROUP *x, unsigned char **out);
+ #define d2i_ECPKParameters_bio(bp,x) ASN1_d2i_bio_of(EC_GROUP,NULL,d2i_ECPKParameters,bp,x)
+ #define i2d_ECPKParameters_bio(bp,x) ASN1_i2d_bio_of_const(EC_GROUP,i2d_ECPKParameters,bp,x)
+ #define d2i_ECPKParameters_fp(fp,x) (EC_GROUP *)ASN1_d2i_fp(NULL, \
+                (char *(*)())d2i_ECPKParameters,(fp),(unsigned char **)(x))
+ #define i2d_ECPKParameters_fp(fp,x) ASN1_i2d_fp(i2d_ECPKParameters,(fp), \
+               (unsigned char *)(x))
+ int     ECPKParameters_print(BIO *bp, const EC_GROUP *x, int off);
+ int     ECPKParameters_print_fp(FILE *fp, const EC_GROUP *x, int off);
+
+
+=head1 DESCRIPTION
+
+The ECPKParameters encode and decode routines encode and parse the public parameters for an
+B<EC_GROUP> structure, which represents a curve.
+
+d2i_ECPKParameters() attempts to decode B<len> bytes at B<*in>. If 
+successful a pointer to the B<EC_GROUP> structure is returned. If an error
+occurred then B<NULL> is returned. If B<px> is not B<NULL> then the
+returned structure is written to B<*px>. If B<*px> is not B<NULL>
+then it is assumed that B<*px> contains a valid B<EC_GROUP>
+structure and an attempt is made to reuse it. If the call is
+successful B<*in> is incremented to the byte following the
+parsed data.
+
+i2d_ECPKParameters() encodes the structure pointed to by B<x> into DER format.
+If B<out> is not B<NULL> is writes the DER encoded data to the buffer
+at B<*out>, and increments it to point after the data just written.
+If the return value is negative an error occurred, otherwise it
+returns the length of the encoded data. 
+
+If B<*out> is B<NULL> memory will be allocated for a buffer and the encoded
+data written to it. In this case B<*out> is not incremented and it points to
+the start of the data just written.
+
+d2i_ECPKParameters_bio() is similar to d2i_ECPKParameters() except it attempts
+to parse data from BIO B<bp>.
+
+d2i_ECPKParameters_fp() is similar to d2i_ECPKParameters() except it attempts
+to parse data from FILE pointer B<fp>.
+
+i2d_ECPKParameters_bio() is similar to i2d_ECPKParameters() except it writes
+the encoding of the structure B<x> to BIO B<bp> and it
+returns 1 for success and 0 for failure.
+
+i2d_ECPKParameters_fp() is similar to i2d_ECPKParameters() except it writes
+the encoding of the structure B<x> to BIO B<bp> and it
+returns 1 for success and 0 for failure.
+
+These functions are very similar to the X509 functions described in L<d2i_X509(3)|d2i_X509(3)>,
+where further notes and examples are available.
+
+The ECPKParameters_print and ECPKParameters_print_fp functions print a human-readable output
+of the public parameters of the EC_GROUP to B<bp> or B<fp>. The output lines are indented by B<off> spaces.
+
+=head1 RETURN VALUES
+
+d2i_ECPKParameters(), d2i_ECPKParameters_bio() and d2i_ECPKParameters_fp() return a valid B<EC_GROUP> structure
+or B<NULL> if an error occurs.
+
+i2d_ECPKParameters() returns the number of bytes successfully encoded or a negative
+value if an error occurs.
+
+i2d_ECPKParameters_bio(), i2d_ECPKParameters_fp(), ECPKParameters_print and ECPKParameters_print_fp
+return 1 for success and 0 if an error occurs. 
+
+=head1 SEE ALSO
+
+L<crypto(3)|crypto(3)>, L<ec(3)|ec(3)>, L<EC_GROUP_new(3)|EC_GROUP_new(3)>, L<EC_GROUP_copy(3)|EC_GROUP_copy(3)>,
+L<EC_POINT_new(3)|EC_POINT_new(3)>, L<EC_POINT_add(3)|EC_POINT_add(3)>, L<EC_KEY_new(3)|EC_KEY_new(3)>,
+L<EC_GFp_simple_method(3)|EC_GFp_simple_method(3)>, L<d2i_X509(3)|d2i_X509(3)>
+
+=cut
index e3dc238..5b7c16f 100644 (file)
@@ -18,6 +18,8 @@ i2d_X509_fp - X509 encode and decode functions
  int i2d_X509_bio(BIO *bp, X509 *x);
  int i2d_X509_fp(FILE *fp, X509 *x);
 
+ int i2d_re_X509_tbs(X509 *x, unsigned char **out);
+
 =head1 DESCRIPTION
 
 The X509 encode and decode routines encode and parse an
@@ -60,11 +62,17 @@ i2d_X509_fp() is similar to i2d_X509() except it writes
 the encoding of the structure B<x> to BIO B<bp> and it
 returns 1 for success and 0 for failure.
 
+i2d_re_X509_tbs() is similar to i2d_X509() except it encodes
+only the TBSCertificate portion of the certificate.
+
 =head1 NOTES
 
 The letters B<i> and B<d> in for example B<i2d_X509> stand for
-"internal" (that is an internal C structure) and "DER". So that
-B<i2d_X509> converts from internal to DER.
+"internal" (that is an internal C structure) and "DER". So
+B<i2d_X509> converts from internal to DER. The "re" in
+B<i2d_re_X509_tbs> stands for "re-encode", and ensures that a fresh
+encoding is generated in case the object has been modified after
+creation (see the BUGS section).
 
 The functions can also understand B<BER> forms.
 
@@ -209,6 +217,21 @@ fields entirely and will not be parsed by d2i_X509(). This may be
 fixed in future so code should not assume that i2d_X509() will
 always succeed.
 
+The encoding of the TBSCertificate portion of a certificate is cached
+in the B<X509> structure internally to improve encoding performance
+and to ensure certificate signatures are verified correctly in some
+certificates with broken (non-DER) encodings.
+
+Any function which encodes an X509 structure such as i2d_X509(),
+i2d_X509_fp() or i2d_X509_bio() may return a stale encoding if the
+B<X509> structure has been modified after deserialization or previous
+serialization.
+
+If, after modification, the B<X509> object is re-signed with X509_sign(),
+the encoding is automatically renewed. Otherwise, the encoding of the
+TBSCertificate portion of the B<X509> can be manually renewed by calling
+i2d_re_X509_tbs().
+
 =head1 RETURN VALUES
 
 d2i_X509(), d2i_X509_bio() and d2i_X509_fp() return a valid B<X509> structure
index 224f9e0..675d38b 100644 (file)
@@ -2,7 +2,7 @@
 
 =head1 NAME
 
-d2i_X509_CRL, i2d_X509_CRL, d2i_X509_CRL_bio, d2i_509_CRL_fp,
+d2i_X509_CRL, i2d_X509_CRL, d2i_X509_CRL_bio, d2i_X509_CRL_fp,
 i2d_X509_CRL_bio, i2d_X509_CRL_fp - PKCS#10 certificate request functions.
 
 =head1 SYNOPSIS
diff --git a/doc/crypto/ec.pod b/doc/crypto/ec.pod
new file mode 100644 (file)
index 0000000..7d57ba8
--- /dev/null
@@ -0,0 +1,201 @@
+=pod
+
+=head1 NAME
+
+ec - Elliptic Curve functions
+
+=head1 SYNOPSIS
+
+ #include <openssl/ec.h>
+ #include <openssl/bn.h>
+
+ const EC_METHOD *EC_GFp_simple_method(void);
+ const EC_METHOD *EC_GFp_mont_method(void);
+ const EC_METHOD *EC_GFp_nist_method(void);
+ const EC_METHOD *EC_GFp_nistp224_method(void);
+ const EC_METHOD *EC_GFp_nistp256_method(void);
+ const EC_METHOD *EC_GFp_nistp521_method(void);
+
+ const EC_METHOD *EC_GF2m_simple_method(void);
+
+ EC_GROUP *EC_GROUP_new(const EC_METHOD *meth);
+ void EC_GROUP_free(EC_GROUP *group);
+ void EC_GROUP_clear_free(EC_GROUP *group);
+ int EC_GROUP_copy(EC_GROUP *dst, const EC_GROUP *src);
+ EC_GROUP *EC_GROUP_dup(const EC_GROUP *src);
+ const EC_METHOD *EC_GROUP_method_of(const EC_GROUP *group);
+ int EC_METHOD_get_field_type(const EC_METHOD *meth);
+ int EC_GROUP_set_generator(EC_GROUP *group, const EC_POINT *generator, const BIGNUM *order, const BIGNUM *cofactor);
+ const EC_POINT *EC_GROUP_get0_generator(const EC_GROUP *group);
+ int EC_GROUP_get_order(const EC_GROUP *group, BIGNUM *order, BN_CTX *ctx);
+ int EC_GROUP_get_cofactor(const EC_GROUP *group, BIGNUM *cofactor, BN_CTX *ctx);
+ void EC_GROUP_set_curve_name(EC_GROUP *group, int nid);
+ int EC_GROUP_get_curve_name(const EC_GROUP *group);
+ void EC_GROUP_set_asn1_flag(EC_GROUP *group, int flag);
+ int EC_GROUP_get_asn1_flag(const EC_GROUP *group);
+ void EC_GROUP_set_point_conversion_form(EC_GROUP *group, point_conversion_form_t form);
+ point_conversion_form_t EC_GROUP_get_point_conversion_form(const EC_GROUP *);
+ unsigned char *EC_GROUP_get0_seed(const EC_GROUP *x);
+ size_t EC_GROUP_get_seed_len(const EC_GROUP *);
+ size_t EC_GROUP_set_seed(EC_GROUP *, const unsigned char *, size_t len);
+ int EC_GROUP_set_curve_GFp(EC_GROUP *group, const BIGNUM *p, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
+ int EC_GROUP_get_curve_GFp(const EC_GROUP *group, BIGNUM *p, BIGNUM *a, BIGNUM *b, BN_CTX *ctx);
+ int EC_GROUP_set_curve_GF2m(EC_GROUP *group, const BIGNUM *p, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
+ int EC_GROUP_get_curve_GF2m(const EC_GROUP *group, BIGNUM *p, BIGNUM *a, BIGNUM *b, BN_CTX *ctx);
+ int EC_GROUP_get_degree(const EC_GROUP *group);
+ int EC_GROUP_check(const EC_GROUP *group, BN_CTX *ctx);
+ int EC_GROUP_check_discriminant(const EC_GROUP *group, BN_CTX *ctx);
+ int EC_GROUP_cmp(const EC_GROUP *a, const EC_GROUP *b, BN_CTX *ctx);
+ EC_GROUP *EC_GROUP_new_curve_GFp(const BIGNUM *p, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
+ EC_GROUP *EC_GROUP_new_curve_GF2m(const BIGNUM *p, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
+ EC_GROUP *EC_GROUP_new_by_curve_name(int nid);
+
+ size_t EC_get_builtin_curves(EC_builtin_curve *r, size_t nitems);
+
+ EC_POINT *EC_POINT_new(const EC_GROUP *group);
+ void EC_POINT_free(EC_POINT *point);
+ void EC_POINT_clear_free(EC_POINT *point);
+ int EC_POINT_copy(EC_POINT *dst, const EC_POINT *src);
+ EC_POINT *EC_POINT_dup(const EC_POINT *src, const EC_GROUP *group);
+ const EC_METHOD *EC_POINT_method_of(const EC_POINT *point);
+ int EC_POINT_set_to_infinity(const EC_GROUP *group, EC_POINT *point);
+ int EC_POINT_set_Jprojective_coordinates_GFp(const EC_GROUP *group, EC_POINT *p,
+       const BIGNUM *x, const BIGNUM *y, const BIGNUM *z, BN_CTX *ctx);
+ int EC_POINT_get_Jprojective_coordinates_GFp(const EC_GROUP *group,
+       const EC_POINT *p, BIGNUM *x, BIGNUM *y, BIGNUM *z, BN_CTX *ctx);
+ int EC_POINT_set_affine_coordinates_GFp(const EC_GROUP *group, EC_POINT *p,
+       const BIGNUM *x, const BIGNUM *y, BN_CTX *ctx);
+ int EC_POINT_get_affine_coordinates_GFp(const EC_GROUP *group,
+       const EC_POINT *p, BIGNUM *x, BIGNUM *y, BN_CTX *ctx);
+ int EC_POINT_set_compressed_coordinates_GFp(const EC_GROUP *group, EC_POINT *p,
+       const BIGNUM *x, int y_bit, BN_CTX *ctx);
+ int EC_POINT_set_affine_coordinates_GF2m(const EC_GROUP *group, EC_POINT *p,
+       const BIGNUM *x, const BIGNUM *y, BN_CTX *ctx);
+ int EC_POINT_get_affine_coordinates_GF2m(const EC_GROUP *group,
+       const EC_POINT *p, BIGNUM *x, BIGNUM *y, BN_CTX *ctx);
+ int EC_POINT_set_compressed_coordinates_GF2m(const EC_GROUP *group, EC_POINT *p,
+       const BIGNUM *x, int y_bit, BN_CTX *ctx);
+ size_t EC_POINT_point2oct(const EC_GROUP *group, const EC_POINT *p,
+       point_conversion_form_t form,
+        unsigned char *buf, size_t len, BN_CTX *ctx);
+ int EC_POINT_oct2point(const EC_GROUP *group, EC_POINT *p,
+        const unsigned char *buf, size_t len, BN_CTX *ctx);
+ BIGNUM *EC_POINT_point2bn(const EC_GROUP *, const EC_POINT *,
+       point_conversion_form_t form, BIGNUM *, BN_CTX *);
+ EC_POINT *EC_POINT_bn2point(const EC_GROUP *, const BIGNUM *,
+       EC_POINT *, BN_CTX *);
+ char *EC_POINT_point2hex(const EC_GROUP *, const EC_POINT *,
+       point_conversion_form_t form, BN_CTX *);
+ EC_POINT *EC_POINT_hex2point(const EC_GROUP *, const char *,
+       EC_POINT *, BN_CTX *);
+
+ int EC_POINT_add(const EC_GROUP *group, EC_POINT *r, const EC_POINT *a, const EC_POINT *b, BN_CTX *ctx);
+ int EC_POINT_dbl(const EC_GROUP *group, EC_POINT *r, const EC_POINT *a, BN_CTX *ctx);
+ int EC_POINT_invert(const EC_GROUP *group, EC_POINT *a, BN_CTX *ctx);
+ int EC_POINT_is_at_infinity(const EC_GROUP *group, const EC_POINT *p);
+ int EC_POINT_is_on_curve(const EC_GROUP *group, const EC_POINT *point, BN_CTX *ctx);
+ int EC_POINT_cmp(const EC_GROUP *group, const EC_POINT *a, const EC_POINT *b, BN_CTX *ctx);
+ int EC_POINT_make_affine(const EC_GROUP *group, EC_POINT *point, BN_CTX *ctx);
+ int EC_POINTs_make_affine(const EC_GROUP *group, size_t num, EC_POINT *points[], BN_CTX *ctx);
+ int EC_POINTs_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *n, size_t num, const EC_POINT *p[], const BIGNUM *m[], BN_CTX *ctx);
+ int EC_POINT_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *n, const EC_POINT *q, const BIGNUM *m, BN_CTX *ctx);
+ int EC_GROUP_precompute_mult(EC_GROUP *group, BN_CTX *ctx);
+ int EC_GROUP_have_precompute_mult(const EC_GROUP *group);
+
+ int EC_GROUP_get_basis_type(const EC_GROUP *);
+ int EC_GROUP_get_trinomial_basis(const EC_GROUP *, unsigned int *k);
+ int EC_GROUP_get_pentanomial_basis(const EC_GROUP *, unsigned int *k1, 
+       unsigned int *k2, unsigned int *k3);
+ EC_GROUP *d2i_ECPKParameters(EC_GROUP **, const unsigned char **in, long len);
+ int i2d_ECPKParameters(const EC_GROUP *, unsigned char **out);
+ #define d2i_ECPKParameters_bio(bp,x) ASN1_d2i_bio_of(EC_GROUP,NULL,d2i_ECPKParameters,bp,x)
+ #define i2d_ECPKParameters_bio(bp,x) ASN1_i2d_bio_of_const(EC_GROUP,i2d_ECPKParameters,bp,x)
+ #define d2i_ECPKParameters_fp(fp,x) (EC_GROUP *)ASN1_d2i_fp(NULL, \
+                (char *(*)())d2i_ECPKParameters,(fp),(unsigned char **)(x))
+ #define i2d_ECPKParameters_fp(fp,x) ASN1_i2d_fp(i2d_ECPKParameters,(fp), \
+               (unsigned char *)(x))
+ int     ECPKParameters_print(BIO *bp, const EC_GROUP *x, int off);
+ int     ECPKParameters_print_fp(FILE *fp, const EC_GROUP *x, int off);
+
+ EC_KEY *EC_KEY_new(void);
+ int EC_KEY_get_flags(const EC_KEY *key);
+ void EC_KEY_set_flags(EC_KEY *key, int flags);
+ void EC_KEY_clear_flags(EC_KEY *key, int flags);
+ EC_KEY *EC_KEY_new_by_curve_name(int nid);
+ void EC_KEY_free(EC_KEY *key);
+ EC_KEY *EC_KEY_copy(EC_KEY *dst, const EC_KEY *src);
+ EC_KEY *EC_KEY_dup(const EC_KEY *src);
+ int EC_KEY_up_ref(EC_KEY *key);
+ const EC_GROUP *EC_KEY_get0_group(const EC_KEY *key);
+ int EC_KEY_set_group(EC_KEY *key, const EC_GROUP *group);
+ const BIGNUM *EC_KEY_get0_private_key(const EC_KEY *key);
+ int EC_KEY_set_private_key(EC_KEY *key, const BIGNUM *prv);
+ const EC_POINT *EC_KEY_get0_public_key(const EC_KEY *key);
+ int EC_KEY_set_public_key(EC_KEY *key, const EC_POINT *pub);
+ unsigned EC_KEY_get_enc_flags(const EC_KEY *key);
+ void EC_KEY_set_enc_flags(EC_KEY *eckey, unsigned int flags);
+ point_conversion_form_t EC_KEY_get_conv_form(const EC_KEY *key);
+ void EC_KEY_set_conv_form(EC_KEY *eckey, point_conversion_form_t cform);
+ void *EC_KEY_get_key_method_data(EC_KEY *key, 
+       void *(*dup_func)(void *), void (*free_func)(void *), void (*clear_free_func)(void *));
+ void EC_KEY_insert_key_method_data(EC_KEY *key, void *data,
+       void *(*dup_func)(void *), void (*free_func)(void *), void (*clear_free_func)(void *));
+ void EC_KEY_set_asn1_flag(EC_KEY *eckey, int asn1_flag);
+ int EC_KEY_precompute_mult(EC_KEY *key, BN_CTX *ctx);
+ int EC_KEY_generate_key(EC_KEY *key);
+ int EC_KEY_check_key(const EC_KEY *key);
+ int EC_KEY_set_public_key_affine_coordinates(EC_KEY *key, BIGNUM *x, BIGNUM *y);
+
+ EC_KEY *d2i_ECPrivateKey(EC_KEY **key, const unsigned char **in, long len);
+ int i2d_ECPrivateKey(EC_KEY *key, unsigned char **out);
+
+ EC_KEY *d2i_ECParameters(EC_KEY **key, const unsigned char **in, long len);
+ int i2d_ECParameters(EC_KEY *key, unsigned char **out);
+
+ EC_KEY *o2i_ECPublicKey(EC_KEY **key, const unsigned char **in, long len);
+ int i2o_ECPublicKey(EC_KEY *key, unsigned char **out);
+ int   ECParameters_print(BIO *bp, const EC_KEY *key);
+ int   EC_KEY_print(BIO *bp, const EC_KEY *key, int off);
+ int   ECParameters_print_fp(FILE *fp, const EC_KEY *key);
+ int   EC_KEY_print_fp(FILE *fp, const EC_KEY *key, int off);
+ #define ECParameters_dup(x) ASN1_dup_of(EC_KEY,i2d_ECParameters,d2i_ECParameters,x)
+ #define EVP_PKEY_CTX_set_ec_paramgen_curve_nid(ctx, nid) \
+       EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_EC, EVP_PKEY_OP_PARAMGEN, \
+                               EVP_PKEY_CTRL_EC_PARAMGEN_CURVE_NID, nid, NULL)
+
+
+=head1 DESCRIPTION
+
+This library provides an extensive set of functions for performing operations on elliptic curves over finite fields.
+In general an elliptic curve is one with an equation of the form:
+
+y^2 = x^3 + ax + b
+
+An B<EC_GROUP> structure is used to represent the definition of an elliptic curve. Points on a curve are stored using an
+B<EC_POINT> structure. An B<EC_KEY> is used to hold a private/public key pair, where a private key is simply a BIGNUM and a
+public key is a point on a curve (represented by an B<EC_POINT>).
+
+The library contains a number of alternative implementations of the different functions. Each implementation is optimised
+for different scenarios. No matter which implementation is being used, the interface remains the same. The library
+handles calling the correct implementation when an interface function is invoked. An implementation is represented by
+an B<EC_METHOD> structure.
+
+The creation and destruction of B<EC_GROUP> objects is described in L<EC_GROUP_new(3)|EC_GROUP_new(3)>. Functions for
+manipulating B<EC_GROUP> objects are described in L<EC_GROUP_copy(3)|EC_GROUP_copy(3)>.
+
+Functions for creating, destroying and manipulating B<EC_POINT> objects are explained in L<EC_POINT_new(3)|EC_POINT_new(3)>,
+whilst functions for performing mathematical operations and tests on B<EC_POINTs> are coverd in L<EC_POINT_add(3)|EC_POINT_add(3)>.
+
+For working with private and public keys refer to L<EC_KEY_new(3)|EC_KEY_new(3)>. Implementations are covered in
+L<EC_GFp_simple_method(3)|EC_GFp_simple_method(3)>.
+
+For information on encoding and decoding curve parameters to and from ASN1 see L<d2i_ECPKParameters(3)|d2i_ECPKParameters(3)>.
+
+=head1 SEE ALSO
+
+L<crypto(3)|crypto(3)>, L<EC_GROUP_new(3)|EC_GROUP_new(3)>, L<EC_GROUP_copy(3)|EC_GROUP_copy(3)>,
+L<EC_POINT_new(3)|EC_POINT_new(3)>, L<EC_POINT_add(3)|EC_POINT_add(3)>, L<EC_KEY_new(3)|EC_KEY_new(3)>,
+L<EC_GFp_simple_method(3)|EC_GFp_simple_method(3)>, L<d2i_ECPKParameters(3)|d2i_ECPKParameters(3)>
+
+
+=cut
index 59a5916..46c071b 100644 (file)
@@ -2,7 +2,7 @@
 
 =head1 NAME
 
-ecdsa - Elliptic Curve Digital Signature Algorithm
+ECDSA_SIG_new, ECDSA_SIG_free, i2d_ECDSA_SIG, d2i_ECDSA_SIG, ECDSA_size, ECDSA_sign_setup, ECDSA_sign, ECDSA_sign_ex, ECDSA_verify, ECDSA_do_sign, ECDSA_do_sign_ex, ECDSA_do_verify - Elliptic Curve Digital Signature Algorithm
 
 =head1 SYNOPSIS
 
index 9faa349..29fab9f 100644 (file)
@@ -13,22 +13,58 @@ evp - high-level cryptographic functions
 The EVP library provides a high-level interface to cryptographic
 functions.
 
-B<EVP_Seal>I<...> and B<EVP_Open>I<...> provide public key encryption
-and decryption to implement digital "envelopes".
+L<B<EVP_Seal>I<...>|EVP_SealInit(3)> and L<B<EVP_Open>I<...>|EVP_OpenInit(3)>
+provide public key encryption and decryption to implement digital "envelopes".
 
-The B<EVP_Sign>I<...> and B<EVP_Verify>I<...> functions implement
-digital signatures.
+The L<B<EVP_DigestSign>I<...>|EVP_DigestSignInit(3)> and
+L<B<EVP_DigestVerify>I<...>|EVP_DigestVerifyInit(3)> functions implement
+digital signatures and Message Authentication Codes (MACs). Also see the older
+L<B<EVP_Sign>I<...>|EVP_SignInit(3)> and L<B<EVP_Verify>I<...>|EVP_VerifyInit(3)>
+functions.
 
-Symmetric encryption is available with the B<EVP_Encrypt>I<...>
-functions.  The B<EVP_Digest>I<...> functions provide message digests.
+Symmetric encryption is available with the L<B<EVP_Encrypt>I<...>|EVP_EncryptInit(3)>
+functions.  The L<B<EVP_Digest>I<...>|EVP_DigestInit(3)> functions provide message digests.
 
 The B<EVP_PKEY>I<...> functions provide a high level interface to
-asymmetric algorithms.
+asymmetric algorithms. To create a new EVP_PKEY see
+L<EVP_PKEY_new(3)|EVP_PKEY_new(3)>. EVP_PKEYs can be associated
+with a private key of a particular algorithm by using the functions
+described on the L<EVP_PKEY_set1_RSA(3)|EVP_PKEY_set1_RSA(3)> page, or
+new keys can be generated using L<EVP_PKEY_keygen(3)|EVP_PKEY_keygen(3)>.
+EVP_PKEYs can be compared using L<EVP_PKEY_cmp(3)|EVP_PKEY_cmp(3)>, or printed using
+L<EVP_PKEY_print_private(3)|EVP_PKEY_print_private(3)>.
+
+The EVP_PKEY functions support the full range of asymmetric algorithm operations:
+
+=over
+
+=item For key agreement see L<EVP_PKEY_derive(3)|EVP_PKEY_derive(3)>
+
+=item For signing and verifying see L<EVP_PKEY_sign(3)|EVP_PKEY_sign(3)>,
+L<EVP_PKEY_verify(3)|EVP_PKEY_verify(3)> and L<EVP_PKEY_verify_recover(3)|EVP_PKEY_verify_recover(3)>.
+However, note that
+these functions do not perform a digest of the data to be signed. Therefore
+normally you would use the L<B<EVP_DigestSign>I<...>|EVP_DigestSignInit(3)>
+functions for this purpose.
+
+=item For encryption and decryption see L<EVP_PKEY_encrypt(3)|EVP_PKEY_encrypt(3)>
+and L<EVP_PKEY_decrypt(3)|EVP_PKEY_decrypt(3)> respectively. However, note that
+these functions perform encryption and decryption only. As public key
+encryption is an expensive operation, normally you would wrap
+an encrypted message in a "digital envelope" using the L<B<EVP_Seal>I<...>|EVP_SealInit(3)> and
+L<B<EVP_Open>I<...>|EVP_OpenInit(3)> functions.
+
+=back
+
+The L<EVP_BytesToKey(3)|EVP_BytesToKey(3)> function provides some limited support for password
+based encryption. Careful selection of the parameters will provide a PKCS#5 PBKDF1 compatible
+implementation. However, new applications should not typically use this (preferring, for example,
+PBKDF2 from PCKS#5).
 
-Algorithms are loaded with OpenSSL_add_all_algorithms(3).
+Algorithms are loaded with L<OpenSSL_add_all_algorithms(3)|OpenSSL_add_all_algorithms(3)>.
 
 All the symmetric algorithms (ciphers), digests and asymmetric algorithms
-(public key algorithms) can be replaced by ENGINE modules providing alternative
+(public key algorithms) can be replaced by L<ENGINE|engine(3)> modules providing alternative
 implementations. If ENGINE implementations of ciphers or digests are registered
 as defaults, then the various EVP functions will automatically use those
 implementations automatically in preference to built in software
@@ -47,8 +83,20 @@ L<EVP_DigestInit(3)|EVP_DigestInit(3)>,
 L<EVP_EncryptInit(3)|EVP_EncryptInit(3)>,
 L<EVP_OpenInit(3)|EVP_OpenInit(3)>,
 L<EVP_SealInit(3)|EVP_SealInit(3)>,
+L<EVP_DigestSignInit(3)|EVP_DigestSignInit(3)>,
 L<EVP_SignInit(3)|EVP_SignInit(3)>,
 L<EVP_VerifyInit(3)|EVP_VerifyInit(3)>,
+L<EVP_PKEY_new(3)|EVP_PKEY_new(3)>,
+L<EVP_PKEY_set1_RSA(3)|EVP_PKEY_set1_RSA(3)>,
+L<EVP_PKEY_keygen(3)|EVP_PKEY_keygen(3)>,
+L<EVP_PKEY_print_private(3)|EVP_PKEY_print_private(3)>,
+L<EVP_PKEY_decrypt(3)|EVP_PKEY_decrypt(3)>,
+L<EVP_PKEY_encrypt(3)|EVP_PKEY_encrypt(3)>,
+L<EVP_PKEY_sign(3)|EVP_PKEY_sign(3)>,
+L<EVP_PKEY_verify(3)|EVP_PKEY_verify(3)>,
+L<EVP_PKEY_verify_recover(3)|EVP_PKEY_verify_recover(3)>,
+L<EVP_PKEY_derive(3)|EVP_PKEY_derive(3)>,
+L<EVP_BytesToKey(3)|EVP_BytesToKey(3)>,
 L<OpenSSL_add_all_algorithms(3)|OpenSSL_add_all_algorithms(3)>,
 L<engine(3)|engine(3)>
 
index d92138d..58a57f4 100644 (file)
@@ -2,8 +2,8 @@
 
 =head1 NAME
 
-HMAC, HMAC_Init, HMAC_Update, HMAC_Final, HMAC_cleanup - HMAC message
-authentication code
+HMAC, HMAC_CTX_init, HMAC_Init, HMAC_Init_ex, HMAC_Update, HMAC_Final, HMAC_CTX_cleanup,
+HMAC_cleanup - HMAC message authentication code
 
 =head1 SYNOPSIS
 
index dc4d884..a37231e 100644 (file)
@@ -23,7 +23,7 @@ streaming.
 
 =head1 BUGS
 
-The prefix "d2i" is arguably wrong because the function outputs BER format.
+The prefix "i2d" is arguably wrong because the function outputs BER format.
 
 =head1 RETURN VALUES
 
index 1c068c8..d102df2 100644 (file)
@@ -39,7 +39,7 @@ Since the introduction of the ENGINE API, the recommended way of controlling
 default implementations is by using the ENGINE API functions. The default
 B<RAND_METHOD>, as set by RAND_set_rand_method() and returned by
 RAND_get_rand_method(), is only used if no ENGINE has been set as the default
-"rand" implementation. Hence, these two functions are no longer the recommened
+"rand" implementation. Hence, these two functions are no longer the recommended
 way to control defaults.
 
 If an alternative B<RAND_METHOD> implementation is being used (either set
index 94ab7bc..0c9dbf2 100644 (file)
@@ -2,29 +2,58 @@
 
 =head1 NAME
 
-SHA1, SHA1_Init, SHA1_Update, SHA1_Final - Secure Hash Algorithm
+SHA1, SHA1_Init, SHA1_Update, SHA1_Final, SHA224, SHA224_Init, SHA224_Update,
+SHA224_Final, SHA256, SHA256_Init, SHA256_Update, SHA256_Final, SHA384,
+SHA384_Init, SHA384_Update, SHA384_Final, SHA512, SHA512_Init, SHA512_Update,
+SHA512_Final - Secure Hash Algorithm
 
 =head1 SYNOPSIS
 
  #include <openssl/sha.h>
 
- unsigned char *SHA1(const unsigned char *d, unsigned long n,
-                  unsigned char *md);
-
  int SHA1_Init(SHA_CTX *c);
- int SHA1_Update(SHA_CTX *c, const void *data,
-                  unsigned long len);
+ int SHA1_Update(SHA_CTX *c, const void *data, size_t len);
  int SHA1_Final(unsigned char *md, SHA_CTX *c);
+ unsigned char *SHA1(const unsigned char *d, size_t n,
+      unsigned char *md);
+
+ int SHA224_Init(SHA256_CTX *c);
+ int SHA224_Update(SHA256_CTX *c, const void *data, size_t len);
+ int SHA224_Final(unsigned char *md, SHA256_CTX *c);
+ unsigned char *SHA224(const unsigned char *d, size_t n,
+      unsigned char *md);
+
+ int SHA256_Init(SHA256_CTX *c);
+ int SHA256_Update(SHA256_CTX *c, const void *data, size_t len);
+ int SHA256_Final(unsigned char *md, SHA256_CTX *c);
+ unsigned char *SHA256(const unsigned char *d, size_t n,
+      unsigned char *md);
+
+ int SHA384_Init(SHA512_CTX *c);
+ int SHA384_Update(SHA512_CTX *c, const void *data, size_t len);
+ int SHA384_Final(unsigned char *md, SHA512_CTX *c);
+ unsigned char *SHA384(const unsigned char *d, size_t n,
+      unsigned char *md);
+
+ int SHA512_Init(SHA512_CTX *c);
+ int SHA512_Update(SHA512_CTX *c, const void *data, size_t len);
+ int SHA512_Final(unsigned char *md, SHA512_CTX *c);
+ unsigned char *SHA512(const unsigned char *d, size_t n,
+      unsigned char *md);
 
 =head1 DESCRIPTION
 
+Applications should use the higher level functions
+L<EVP_DigestInit(3)|EVP_DigestInit(3)> etc. instead of calling the hash
+functions directly.
+
 SHA-1 (Secure Hash Algorithm) is a cryptographic hash function with a
 160 bit output.
 
 SHA1() computes the SHA-1 message digest of the B<n>
 bytes at B<d> and places it in B<md> (which must have space for
 SHA_DIGEST_LENGTH == 20 bytes of output). If B<md> is NULL, the digest
-is placed in a static array.
+is placed in a static array. Note: setting B<md> to NULL is B<not thread safe>.
 
 The following functions may be used if the message is not completely
 stored in memory:
@@ -37,24 +66,29 @@ be hashed (B<len> bytes at B<data>).
 SHA1_Final() places the message digest in B<md>, which must have space
 for SHA_DIGEST_LENGTH == 20 bytes of output, and erases the B<SHA_CTX>.
 
-Applications should use the higher level functions
-L<EVP_DigestInit(3)|EVP_DigestInit(3)>
-etc. instead of calling the hash functions directly.
+The SHA224, SHA256, SHA384 and SHA512 families of functions operate in the
+same way as for the SHA1 functions. Note that SHA224 and SHA256 use a
+B<SHA256_CTX> object instead of B<SHA_CTX>. SHA384 and SHA512 use B<SHA512_CTX>.
+The buffer B<md> must have space for the output from the SHA variant being used
+(defined by SHA224_DIGEST_LENGTH, SHA256_DIGEST_LENGTH, SHA384_DIGEST_LENGTH and
+SHA512_DIGEST_LENGTH). Also note that, as for the SHA1() function above, the
+SHA224(), SHA256(), SHA384() and SHA512() functions are not thread safe if
+B<md> is NULL.
 
 The predecessor of SHA-1, SHA, is also implemented, but it should be
 used only when backward compatibility is required.
 
 =head1 RETURN VALUES
 
-SHA1() returns a pointer to the hash value. 
+SHA1(), SHA224(), SHA256(), SHA384() and SHA512() return a pointer to the hash
+value. 
 
-SHA1_Init(), SHA1_Update() and SHA1_Final() return 1 for success, 0 otherwise.
+SHA1_Init(), SHA1_Update() and SHA1_Final() and equivalent SHA224, SHA256,
+SHA384 and SHA512 functions return 1 for success, 0 otherwise.
 
 =head1 CONFORMING TO
 
-SHA: US Federal Information Processing Standard FIPS PUB 180 (Secure Hash
-Standard),
-SHA-1: US Federal Information Processing Standard FIPS PUB 180-1 (Secure Hash
+US Federal Information Processing Standard FIPS PUB 180-4 (Secure Hash
 Standard),
 ANSI X9.30
 
index 2e113be..c598f4d 100644 (file)
@@ -109,6 +109,16 @@ If SSL_CIPHER_description() cannot handle a built-in cipher, the according
 description of the cipher property is B<unknown>. This case should not
 occur.
 
+The standard terminology for ephemeral Diffie-Hellman schemes is DHE
+(finite field) or ECDHE (elliptic curve).  This version of OpenSSL
+idiosyncratically reports these schemes as EDH and EECDH, even though
+it also accepts the standard terminology.
+
+It is recommended to use the standard terminology (DHE and ECDHE)
+during configuration (e.g. via SSL_CTX_set_cipher_list) for clarity of
+configuration.  OpenSSL versions after 1.0.2 will report the standard
+terms via SSL_CIPHER_get_name and SSL_CIPHER_description.
+
 =head1 RETURN VALUES
 
 See DESCRIPTION
@@ -116,6 +126,7 @@ See DESCRIPTION
 =head1 SEE ALSO
 
 L<ssl(3)|ssl(3)>, L<SSL_get_current_cipher(3)|SSL_get_current_cipher(3)>,
-L<SSL_get_ciphers(3)|SSL_get_ciphers(3)>, L<ciphers(1)|ciphers(1)>
+L<SSL_get_ciphers(3)|SSL_get_ciphers(3)>, L<ciphers(1)|ciphers(1)>,
+L<SSL_CTX_set_cipher_list(3)|SSL_CTX_set_cipher_list(3)>
 
 =cut
index f4d191c..2bb4403 100644 (file)
@@ -2,7 +2,7 @@
 
 =head1 NAME
 
-SSL_COMP_add_compression_method - handle SSL/TLS integrated compression methods
+SSL_COMP_add_compression_method, SSL_COMP_free_compression_methods - handle SSL/TLS integrated compression methods
 
 =head1 SYNOPSIS
 
@@ -10,6 +10,8 @@ SSL_COMP_add_compression_method - handle SSL/TLS integrated compression methods
 
  int SSL_COMP_add_compression_method(int id, COMP_METHOD *cm);
 
+ +void SSL_COMP_free_compression_methods(void);
+
 =head1 DESCRIPTION
 
 SSL_COMP_add_compression_method() adds the compression method B<cm> with
@@ -17,6 +19,10 @@ the identifier B<id> to the list of available compression methods. This
 list is globally maintained for all SSL operations within this application.
 It cannot be set for specific SSL_CTX or SSL objects.
 
+SSL_COMP_free_compression_methods() frees the internal table of
+compression methods that were built internally, and possibly
+augmented by adding SSL_COMP_add_compression_method().
+
 =head1 NOTES
 
 The TLS standard (or SSLv3) allows the integration of compression methods
@@ -38,8 +44,8 @@ its own compression methods and will unconditionally activate compression
 when a matching identifier is found. There is no way to restrict the list
 of compression methods supported on a per connection basis.
 
-The OpenSSL library has the compression methods B<COMP_rle()> and (when
-especially enabled during compilation) B<COMP_zlib()> available.
+If enabled during compilation, the OpenSSL library will have the
+COMP_zlib() compression method available.
 
 =head1 WARNINGS
 
diff --git a/doc/ssl/SSL_CONF_CTX_new.pod b/doc/ssl/SSL_CONF_CTX_new.pod
new file mode 100644 (file)
index 0000000..a9ccb04
--- /dev/null
@@ -0,0 +1,40 @@
+=pod
+
+=head1 NAME
+
+SSL_CONF_CTX_new, SSL_CONF_CTX_free - SSL configuration allocation functions
+
+=head1 SYNOPSIS
+
+ #include <openssl/ssl.h>
+
+ SSL_CONF_CTX *SSL_CONF_CTX_new(void);
+ void SSL_CONF_CTX_free(SSL_CONF_CTX *cctx);
+
+=head1 DESCRIPTION
+
+The function SSL_CONF_CTX_new() allocates and initialises an B<SSL_CONF_CTX>
+structure for use with the SSL_CONF functions.
+
+The function SSL_CONF_CTX_free() frees up the context B<cctx>.
+
+=head1 RETURN VALUES
+
+SSL_CONF_CTX_new() returns either the newly allocated B<SSL_CONF_CTX> structure
+or B<NULL> if an error occurs.
+
+SSL_CONF_CTX_free() does not return a value.
+
+=head1 SEE ALSO
+
+L<SSL_CONF_CTX_set_flags(3)|SSL_CONF_CTX_set_flags(3)>,
+L<SSL_CONF_CTX_set_ssl_ctx(3)|SSL_CONF_CTX_set_ssl_ctx(3)>,
+L<SSL_CONF_CTX_set1_prefix(3)|SSL_CONF_CTX_set1_prefix(3)>,
+L<SSL_CONF_cmd(3)|SSL_CONF_cmd(3)>,
+L<SSL_CONF_cmd_argv(3)|SSL_CONF_cmd_argv(3)>
+
+=head1 HISTORY
+
+These functions were first added to OpenSSL 1.0.2
+
+=cut
diff --git a/doc/ssl/SSL_CONF_CTX_set1_prefix.pod b/doc/ssl/SSL_CONF_CTX_set1_prefix.pod
new file mode 100644 (file)
index 0000000..7699018
--- /dev/null
@@ -0,0 +1,49 @@
+=pod
+
+=head1 NAME
+
+SSL_CONF_CTX_set1_prefix - Set configuration context command prefix
+
+=head1 SYNOPSIS
+
+ #include <openssl/ssl.h>
+
+ unsigned int SSL_CONF_CTX_set1_prefix(SSL_CONF_CTX *cctx, const char *prefix);
+
+=head1 DESCRIPTION
+
+The function SSL_CONF_CTX_set1_prefix() sets the command prefix of B<cctx>
+to B<prefix>. If B<prefix> is B<NULL> it is restored to the default value.
+
+=head1 NOTES
+
+Command prefixes alter the commands recognised by subsequent SSL_CTX_cmd()
+calls. For example for files, if the prefix "SSL" is set then command names
+such as "SSLProtocol", "SSLOptions" etc. are recognised instead of "Protocol"
+and "Options". Similarly for command lines if the prefix is "--ssl-" then 
+"--ssl-no_tls1_2" is recognised instead of "-no_tls1_2".
+
+If the B<SSL_CONF_FLAG_CMDLINE> flag is set then prefix checks are case
+sensitive and "-" is the default. In the unlikely even an application
+explicitly wants to set no prefix it must be explicitly set to "".
+
+If the B<SSL_CONF_FLAG_FILE> flag is set then prefix checks are case
+insensitive and no prefix is the default.
+
+=head1 RETURN VALUES
+
+SSL_CONF_CTX_set1_prefix() returns 1 for success and 0 for failure.
+
+=head1 SEE ALSO
+
+L<SSL_CONF_CTX_new(3)|SSL_CONF_CTX_new(3)>,
+L<SSL_CONF_CTX_set_flags(3)|SSL_CONF_CTX_set_flags(3)>,
+L<SSL_CONF_CTX_set_ssl_ctx(3)|SSL_CONF_CTX_set_ssl_ctx(3)>,
+L<SSL_CONF_cmd(3)|SSL_CONF_cmd(3)>,
+L<SSL_CONF_cmd_argv(3)|SSL_CONF_cmd_argv(3)>
+
+=head1 HISTORY
+
+These functions were first added to OpenSSL 1.0.2
+
+=cut
diff --git a/doc/ssl/SSL_CONF_CTX_set_flags.pod b/doc/ssl/SSL_CONF_CTX_set_flags.pod
new file mode 100644 (file)
index 0000000..4e34280
--- /dev/null
@@ -0,0 +1,68 @@
+=pod
+
+=head1 NAME
+
+SSL_CONF_CTX_set_flags, SSL_CONF_CTX_clear_flags - Set of clear SSL configuration context flags
+
+=head1 SYNOPSIS
+
+ #include <openssl/ssl.h>
+
+ unsigned int SSL_CONF_CTX_set_flags(SSL_CONF_CTX *cctx, unsigned int flags);
+ unsigned int SSL_CONF_CTX_clear_flags(SSL_CONF_CTX *cctx, unsigned int flags);
+
+=head1 DESCRIPTION
+
+The function SSL_CONF_CTX_set_flags() sets B<flags> in the context B<cctx>.
+
+The function SSL_CONF_CTX_clear_flags() clears B<flags> in the context B<cctx>.
+
+=head1 NOTES
+
+The flags set affect how subsequent calls to SSL_CONF_cmd() or
+SSL_CONF_argv() behave.
+
+Currently the following B<flags> values are recognised:
+
+=over 4
+
+=item SSL_CONF_FLAG_CMDLINE, SSL_CONF_FLAG_FILE
+
+recognise options intended for command line or configuration file use. At
+least one of these flags must be set.
+
+=item SSL_CONF_FLAG_CLIENT, SSL_CONF_FLAG_SERVER
+
+recognise options intended for use in SSL/TLS clients or servers. One or
+both of these flags must be set.
+
+=item SSL_CONF_FLAG_CERTIFICATE
+
+recognise certificate and private key options.
+
+=item SSL_CONF_FLAG_SHOW_ERRORS
+
+indicate errors relating to unrecognised options or missing arguments in
+the error queue. If this option isn't set such errors are only reflected
+in the return values of SSL_CONF_set_cmd() or SSL_CONF_set_argv()
+
+=back
+
+=head1 RETURN VALUES
+
+SSL_CONF_CTX_set_flags() and SSL_CONF_CTX_clear_flags() returns the new flags
+value after setting or clearing flags.
+
+=head1 SEE ALSO
+
+L<SSL_CONF_CTX_new(3)|SSL_CONF_CTX_new(3)>,
+L<SSL_CONF_CTX_set_ssl_ctx(3)|SSL_CONF_CTX_set_ssl_ctx(3)>,
+L<SSL_CONF_CTX_set1_prefix(3)|SSL_CONF_CTX_set1_prefix(3)>,
+L<SSL_CONF_cmd(3)|SSL_CONF_cmd(3)>,
+L<SSL_CONF_cmd_argv(3)|SSL_CONF_cmd_argv(3)>
+
+=head1 HISTORY
+
+These functions were first added to OpenSSL 1.0.2
+
+=cut
diff --git a/doc/ssl/SSL_CONF_CTX_set_ssl_ctx.pod b/doc/ssl/SSL_CONF_CTX_set_ssl_ctx.pod
new file mode 100644 (file)
index 0000000..2049a53
--- /dev/null
@@ -0,0 +1,47 @@
+=pod
+
+=head1 NAME
+
+SSL_CONF_CTX_set_ssl_ctx, SSL_CONF_CTX_set_ssl - set context to configure
+
+=head1 SYNOPSIS
+
+ #include <openssl/ssl.h>
+
+ void SSL_CONF_CTX_set_ssl_ctx(SSL_CONF_CTX *cctx, SSL_CTX *ctx);
+ void SSL_CONF_CTX_set_ssl(SSL_CONF_CTX *cctx, SSL *ssl);
+
+=head1 DESCRIPTION
+
+SSL_CONF_CTX_set_ssl_ctx() sets the context associated with B<cctx> to the
+B<SSL_CTX> structure B<ctx>. Any previous B<SSL> or B<SSL_CTX> associated with
+B<cctx> is cleared. Subsequent calls to SSL_CONF_cmd() will be sent to
+B<ctx>.
+
+SSL_CONF_CTX_set_ssl() sets the context associated with B<cctx> to the
+B<SSL> structure B<ssl>. Any previous B<SSL> or B<SSL_CTX> associated with
+B<cctx> is cleared. Subsequent calls to SSL_CONF_cmd() will be sent to
+B<ssl>.
+
+=head1 NOTES
+
+The context need not be set or it can be set to B<NULL> in which case only
+syntax checking of commands is performed, where possible.
+
+=head1 RETURN VALUES
+
+SSL_CONF_CTX_set_ssl_ctx() and SSL_CTX_set_ssl() do not return a value.
+
+=head1 SEE ALSO
+
+L<SSL_CONF_CTX_new(3)|SSL_CONF_CTX_new(3)>,
+L<SSL_CONF_CTX_set_flags(3)|SSL_CONF_CTX_set_flags(3)>,
+L<SSL_CONF_CTX_set1_prefix(3)|SSL_CONF_CTX_set1_prefix(3)>,
+L<SSL_CONF_cmd(3)|SSL_CONF_cmd(3)>,
+L<SSL_CONF_cmd_argv(3)|SSL_CONF_cmd_argv(3)>
+
+=head1 HISTORY
+
+These functions were first added to OpenSSL 1.0.2
+
+=cut
diff --git a/doc/ssl/SSL_CONF_cmd.pod b/doc/ssl/SSL_CONF_cmd.pod
new file mode 100644 (file)
index 0000000..2bf1a60
--- /dev/null
@@ -0,0 +1,438 @@
+=pod
+
+=head1 NAME
+
+SSL_CONF_cmd - send configuration command
+
+=head1 SYNOPSIS
+
+ #include <openssl/ssl.h>
+
+ int SSL_CONF_cmd(SSL_CONF_CTX *cctx, const char *cmd, const char *value);
+ int SSL_CONF_cmd_value_type(SSL_CONF_CTX *cctx, const char *cmd);
+ int SSL_CONF_finish(SSL_CONF_CTX *cctx);
+
+=head1 DESCRIPTION
+
+The function SSL_CONF_cmd() performs configuration operation B<cmd> with
+optional parameter B<value> on B<ctx>. Its purpose is to simplify application
+configuration of B<SSL_CTX> or B<SSL> structures by providing a common
+framework for command line options or configuration files.
+
+SSL_CONF_cmd_value_type() returns the type of value that B<cmd> refers to.
+
+The function SSL_CONF_finish() must be called after all configuration
+operations have been completed. It is used to finalise any operations
+or to process defaults.
+
+=head1 SUPPORTED COMMAND LINE COMMANDS
+
+Currently supported B<cmd> names for command lines (i.e. when the
+flag B<SSL_CONF_CMDLINE> is set) are listed below. Note: all B<cmd> names
+are case sensitive. Unless otherwise stated commands can be used by
+both clients and servers and the B<value> parameter is not used. The default
+prefix for command line commands is B<-> and that is reflected below.
+
+=over 4
+
+=item B<-sigalgs>
+
+This sets the supported signature algorithms for TLS v1.2. For clients this
+value is used directly for the supported signature algorithms extension. For
+servers it is used to determine which signature algorithms to support.
+
+The B<value> argument should be a colon separated list of signature algorithms
+in order of decreasing preference of the form B<algorithm+hash>. B<algorithm>
+is one of B<RSA>, B<DSA> or B<ECDSA> and B<hash> is a supported algorithm
+OID short name such as B<SHA1>, B<SHA224>, B<SHA256>, B<SHA384> of B<SHA512>.
+Note: algorithm and hash names are case sensitive.
+
+If this option is not set then all signature algorithms supported by the
+OpenSSL library are permissible.
+
+=item B<-client_sigalgs>
+
+This sets the supported signature algorithms associated with client
+authentication for TLS v1.2. For servers the value is used in the supported
+signature algorithms field of a certificate request. For clients it is
+used to determine which signature algorithm to with the client certificate.
+If a server does not request a certificate this option has no effect.
+
+The syntax of B<value> is identical to B<-sigalgs>. If not set then
+the value set for B<-sigalgs> will be used instead.
+
+=item B<-curves>
+
+This sets the supported elliptic curves. For clients the curves are
+sent using the supported curves extension. For servers it is used
+to determine which curve to use. This setting affects curves used for both
+signatures and key exchange, if applicable.
+
+The B<value> argument is a colon separated list of curves. The curve can be
+either the B<NIST> name (e.g. B<P-256>) or an OpenSSL OID name (e.g
+B<prime256v1>). Curve names are case sensitive.
+
+=item B<-named_curve>
+
+This sets the temporary curve used for ephemeral ECDH modes. Only used by 
+servers
+
+The B<value> argument is a curve name or the special value B<auto> which
+picks an appropriate curve based on client and server preferences. The curve
+can be either the B<NIST> name (e.g. B<P-256>) or an OpenSSL OID name
+(e.g B<prime256v1>). Curve names are case sensitive.
+
+=item B<-cipher>
+
+Sets the cipher suite list to B<value>. Note: syntax checking of B<value> is
+currently not performed unless a B<SSL> or B<SSL_CTX> structure is 
+associated with B<cctx>.
+
+=item B<-cert>
+
+Attempts to use the file B<value> as the certificate for the appropriate
+context. It currently uses SSL_CTX_use_certificate_chain_file() if an B<SSL_CTX>
+structure is set or SSL_use_certificate_file() with filetype PEM if an B<SSL>
+structure is set. This option is only supported if certificate operations
+are permitted.
+
+=item B<-key>
+
+Attempts to use the file B<value> as the private key for the appropriate
+context. This option is only supported if certificate operations
+are permitted. Note: if no B<-key> option is set then a private key is
+not loaded: it does not currently use the B<-cert> file.
+
+=item B<-dhparam>
+
+Attempts to use the file B<value> as the set of temporary DH parameters for
+the appropriate context. This option is only supported if certificate
+operations are permitted.
+
+=item B<-no_ssl2>, B<-no_ssl3>, B<-no_tls1>, B<-no_tls1_1>, B<-no_tls1_2>
+
+Disables protocol support for SSLv2, SSLv3, TLS 1.0, TLS 1.1 or TLS 1.2 
+by setting the corresponding options B<SSL_OP_NO_SSL2>, B<SSL_OP_NO_SSL3>,
+B<SSL_OP_NO_TLS1>, B<SSL_OP_NO_TLS1_1> and B<SSL_OP_NO_TLS1_2> respectively.
+
+=item B<-bugs>
+
+Various bug workarounds are set, same as setting B<SSL_OP_ALL>.
+
+=item B<-no_comp>
+
+Disables support for SSL/TLS compression, same as setting B<SSL_OP_NO_COMPRESS>.
+
+=item B<-no_ticket>
+
+Disables support for session tickets, same as setting B<SSL_OP_NO_TICKET>.
+
+=item B<-serverpref>
+
+Use server and not client preference order when determining which cipher suite,
+signature algorithm or elliptic curve to use for an incoming connection.
+Equivalent to B<SSL_OP_CIPHER_SERVER_PREFERENCE>. Only used by servers.
+
+=item B<-no_resumption_on_reneg>
+
+set SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION flag. Only used by servers.
+
+=item B<-legacyrenegotiation>
+
+permits the use of unsafe legacy renegotiation. Equivalent to setting
+B<SSL_OP_ALLOW_UNSAFE_LEGACY_RENEGOTIATION>.
+
+=item B<-legacy_server_connect>, B<-no_legacy_server_connect>
+
+permits or prohibits the use of unsafe legacy renegotiation for OpenSSL
+clients only. Equivalent to setting or clearing B<SSL_OP_LEGACY_SERVER_CONNECT>.
+Set by default.
+
+=item B<-strict>
+
+enables strict mode protocol handling. Equivalent to setting
+B<SSL_CERT_FLAG_TLS_STRICT>.
+
+=item B<-debug_broken_protocol>
+
+disables various checks and permits several kinds of broken protocol behaviour
+for testing purposes: it should B<NEVER> be used in anything other than a test
+environment. Only supported if OpenSSL is configured with
+B<-DOPENSSL_SSL_DEBUG_BROKEN_PROTOCOL>.
+
+=back
+
+=head1 SUPPORTED CONFIGURATION FILE COMMANDS
+
+Currently supported B<cmd> names for configuration files (i.e. when the
+flag B<SSL_CONF_FLAG_FILE> is set) are listed below. All configuration file
+B<cmd> names and are case insensitive so B<signaturealgorithms> is recognised
+as well as B<SignatureAlgorithms>. Unless otherwise stated the B<value> names
+are also case insensitive.
+
+Note: the command prefix (if set) alters the recognised B<cmd> values.
+
+=over 4
+
+=item B<CipherString>
+
+Sets the cipher suite list to B<value>. Note: syntax checking of B<value> is
+currently not performed unless an B<SSL> or B<SSL_CTX> structure is 
+associated with B<cctx>.
+
+=item B<Certificate>
+
+Attempts to use the file B<value> as the certificate for the appropriate
+context. It currently uses SSL_CTX_use_certificate_chain_file() if an B<SSL_CTX>
+structure is set or SSL_use_certificate_file() with filetype PEM if an B<SSL>
+structure is set. This option is only supported if certificate operations
+are permitted.
+
+=item B<PrivateKey>
+
+Attempts to use the file B<value> as the private key for the appropriate
+context. This option is only supported if certificate operations
+are permitted. Note: if no B<-key> option is set then a private key is
+not loaded: it does not currently use the B<Certificate> file.
+
+=item B<ServerInfoFile>
+
+Attempts to use the file B<value> in the "serverinfo" extension using the
+function SSL_CTX_use_serverinfo_file.
+
+=item B<DHParameters>
+
+Attempts to use the file B<value> as the set of temporary DH parameters for
+the appropriate context. This option is only supported if certificate
+operations are permitted.
+
+=item B<SignatureAlgorithms>
+
+This sets the supported signature algorithms for TLS v1.2. For clients this
+value is used directly for the supported signature algorithms extension. For
+servers it is used to determine which signature algorithms to support.
+
+The B<value> argument should be a colon separated list of signature algorithms
+in order of decreasing preference of the form B<algorithm+hash>. B<algorithm>
+is one of B<RSA>, B<DSA> or B<ECDSA> and B<hash> is a supported algorithm
+OID short name such as B<SHA1>, B<SHA224>, B<SHA256>, B<SHA384> of B<SHA512>.
+Note: algorithm and hash names are case sensitive.
+
+If this option is not set then all signature algorithms supported by the
+OpenSSL library are permissible.
+
+=item B<ClientSignatureAlgorithms>
+
+This sets the supported signature algorithms associated with client
+authentication for TLS v1.2. For servers the value is used in the supported
+signature algorithms field of a certificate request. For clients it is
+used to determine which signature algorithm to with the client certificate.
+
+The syntax of B<value> is identical to B<SignatureAlgorithms>. If not set then
+the value set for B<SignatureAlgorithms> will be used instead.
+
+=item B<Curves>
+
+This sets the supported elliptic curves. For clients the curves are
+sent using the supported curves extension. For servers it is used
+to determine which curve to use. This setting affects curves used for both
+signatures and key exchange, if applicable.
+
+The B<value> argument is a colon separated list of curves. The curve can be
+either the B<NIST> name (e.g. B<P-256>) or an OpenSSL OID name (e.g
+B<prime256v1>). Curve names are case sensitive.
+
+=item B<ECDHParameters>
+
+This sets the temporary curve used for ephemeral ECDH modes. Only used by 
+servers
+
+The B<value> argument is a curve name or the special value B<Automatic> which
+picks an appropriate curve based on client and server preferences. The curve
+can be either the B<NIST> name (e.g. B<P-256>) or an OpenSSL OID name
+(e.g B<prime256v1>). Curve names are case sensitive.
+
+=item B<Protocol>
+
+The supported versions of the SSL or TLS protocol.
+
+The B<value> argument is a comma separated list of supported protocols to
+enable or disable. If an protocol is preceded by B<-> that version is disabled.
+All versions are enabled by default, though applications may choose to
+explicitly disable some. Currently supported protocol values are B<SSLv2>,
+B<SSLv3>, B<TLSv1>, B<TLSv1.1> and B<TLSv1.2>. The special value B<ALL> refers
+to all supported versions.
+
+=item B<Options>
+
+The B<value> argument is a comma separated list of various flags to set.
+If a flag string is preceded B<-> it is disabled. See the
+B<SSL_CTX_set_options> function for more details of individual options.
+
+Each option is listed below. Where an operation is enabled by default
+the B<-flag> syntax is needed to disable it.
+
+B<SessionTicket>: session ticket support, enabled by default. Inverse of
+B<SSL_OP_NO_TICKET>: that is B<-SessionTicket> is the same as setting
+B<SSL_OP_NO_TICKET>.
+
+B<Compression>: SSL/TLS compression support, enabled by default. Inverse
+of B<SSL_OP_NO_COMPRESSION>.
+
+B<EmptyFragments>: use empty fragments as a countermeasure against a
+SSL 3.0/TLS 1.0 protocol vulnerability affecting CBC ciphers. It
+is set by default. Inverse of B<SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS>.
+
+B<Bugs>: enable various bug workarounds. Same as B<SSL_OP_ALL>.
+
+B<DHSingle>: enable single use DH keys, set by default. Inverse of
+B<SSL_OP_DH_SINGLE>. Only used by servers.
+
+B<ECDHSingle> enable single use ECDH keys, set by default. Inverse of
+B<SSL_OP_ECDH_SINGLE>. Only used by servers.
+
+B<ServerPreference> use server and not client preference order when
+determining which cipher suite, signature algorithm or elliptic curve
+to use for an incoming connection.  Equivalent to
+B<SSL_OP_CIPHER_SERVER_PREFERENCE>. Only used by servers.
+
+B<NoResumptionOnRenegotiation> set
+B<SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION> flag. Only used by servers.
+
+B<UnsafeLegacyRenegotiation> permits the use of unsafe legacy renegotiation.
+Equivalent to B<SSL_OP_ALLOW_UNSAFE_LEGACY_RENEGOTIATION>.
+
+B<UnsafeLegacyServerConnect> permits the use of unsafe legacy renegotiation
+for OpenSSL clients only. Equivalent to B<SSL_OP_LEGACY_SERVER_CONNECT>.
+Set by default.
+
+=back
+
+=head1 SUPPORTED COMMAND TYPES
+
+The function SSL_CONF_cmd_value_type() currently returns one of the following
+types:
+
+=over 4
+
+=item B<SSL_CONF_TYPE_UNKNOWN>
+
+The B<cmd> string is unrecognised, this return value can be use to flag
+syntax errors.
+
+=item B<SSL_CONF_TYPE_STRING>
+
+The value is a string without any specific structure.
+
+=item B<SSL_CONF_TYPE_FILE>
+
+The value is a file name.
+
+=item B<SSL_CONF_TYPE_DIR>
+
+The value is a directory name.
+
+=back
+
+=head1 NOTES
+
+The order of operations is significant. This can be used to set either defaults
+or values which cannot be overridden. For example if an application calls:
+
+ SSL_CONF_cmd(ctx, "Protocol", "-SSLv2");
+ SSL_CONF_cmd(ctx, userparam, uservalue);
+
+it will disable SSLv2 support by default but the user can override it. If 
+however the call sequence is:
+
+ SSL_CONF_cmd(ctx, userparam, uservalue);
+ SSL_CONF_cmd(ctx, "Protocol", "-SSLv2");
+
+SSLv2 is B<always> disabled and attempt to override this by the user are
+ignored.
+
+By checking the return code of SSL_CTX_cmd() it is possible to query if a
+given B<cmd> is recognised, this is useful is SSL_CTX_cmd() values are
+mixed with additional application specific operations.
+
+For example an application might call SSL_CTX_cmd() and if it returns
+-2 (unrecognised command) continue with processing of application specific
+commands.
+
+Applications can also use SSL_CTX_cmd() to process command lines though the
+utility function SSL_CTX_cmd_argv() is normally used instead. One way
+to do this is to set the prefix to an appropriate value using
+SSL_CONF_CTX_set1_prefix(), pass the current argument to B<cmd> and the
+following argument to B<value> (which may be NULL).
+
+In this case if the return value is positive then it is used to skip that
+number of arguments as they have been processed by SSL_CTX_cmd(). If -2 is
+returned then B<cmd> is not recognised and application specific arguments
+can be checked instead. If -3 is returned a required argument is missing
+and an error is indicated. If 0 is returned some other error occurred and
+this can be reported back to the user.
+
+The function SSL_CONF_cmd_value_type() can be used by applications to 
+check for the existence of a command or to perform additional syntax
+checking or translation of the command value. For example if the return
+value is B<SSL_CONF_TYPE_FILE> an application could translate a relative
+pathname to an absolute pathname.
+
+=head1 EXAMPLES
+
+Set supported signature algorithms:
+
+ SSL_CONF_cmd(ctx, "SignatureAlgorithms", "ECDSA+SHA256:RSA+SHA256:DSA+SHA256");
+
+Enable all protocols except SSLv3 and SSLv2:
+
+ SSL_CONF_cmd(ctx, "Protocol", "ALL,-SSLv3,-SSLv2");
+
+Only enable TLSv1.2:
+
+ SSL_CONF_cmd(ctx, "Protocol", "-ALL,TLSv1.2");
+
+Disable TLS session tickets:
+
+ SSL_CONF_cmd(ctx, "Options", "-SessionTicket");
+
+Set supported curves to P-256, P-384:
+
+ SSL_CONF_cmd(ctx, "Curves", "P-256:P-384");
+
+Set automatic support for any elliptic curve for key exchange:
+
+ SSL_CONF_cmd(ctx, "ECDHParameters", "Automatic");
+
+=head1 RETURN VALUES
+
+SSL_CONF_cmd() returns 1 if the value of B<cmd> is recognised and B<value> is
+B<NOT> used and 2 if both B<cmd> and B<value> are used. In other words it
+returns the number of arguments processed. This is useful when processing
+command lines.
+
+A return value of -2 means B<cmd> is not recognised.
+
+A return value of -3 means B<cmd> is recognised and the command requires a
+value but B<value> is NULL.
+
+A return code of 0 indicates that both B<cmd> and B<value> are valid but an
+error occurred attempting to perform the operation: for example due to an
+error in the syntax of B<value> in this case the error queue may provide
+additional information.
+
+SSL_CONF_finish() returns 1 for success and 0 for failure.
+
+=head1 SEE ALSO
+
+L<SSL_CONF_CTX_new(3)|SSL_CONF_CTX_new(3)>,
+L<SSL_CONF_CTX_set_flags(3)|SSL_CONF_CTX_set_flags(3)>,
+L<SSL_CONF_CTX_set1_prefix(3)|SSL_CONF_CTX_set1_prefix(3)>,
+L<SSL_CONF_CTX_set_ssl_ctx(3)|SSL_CONF_CTX_set_ssl_ctx(3)>,
+L<SSL_CONF_cmd_argv(3)|SSL_CONF_cmd_argv(3)>
+
+=head1 HISTORY
+
+SSL_CONF_cmd() was first added to OpenSSL 1.0.2
+
+=cut
diff --git a/doc/ssl/SSL_CONF_cmd_argv.pod b/doc/ssl/SSL_CONF_cmd_argv.pod
new file mode 100644 (file)
index 0000000..6e66441
--- /dev/null
@@ -0,0 +1,42 @@
+=pod
+
+=head1 NAME
+
+SSL_CONF_cmd_argv - SSL configuration command line processing.
+
+=head1 SYNOPSIS
+
+ #include <openssl/ssl.h>
+
+ int SSL_CONF_cmd_argv(SSL_CONF_CTX *cctx, int *pargc, char ***pargv);
+
+=head1 DESCRIPTION
+
+The function SSL_CONF_cmd_argv() processes at most two command line
+arguments from B<pargv> and B<pargc>. The values of B<pargv> and B<pargc>
+are updated to reflect the number of command options processed. The B<pargc>
+argument can be set to B<NULL> is it is not used.
+
+=head1 RETURN VALUES
+
+SSL_CONF_cmd_argv() returns the number of command arguments processed: 0, 1, 2
+or a negative error code.
+
+If -2 is returned then an argument for a command is missing.
+
+If -1 is returned the command is recognised but couldn't be processed due
+to an error: for example a syntax error in the argument.
+
+=head1 SEE ALSO
+
+L<SSL_CONF_CTX_new(3)|SSL_CONF_CTX_new(3)>,
+L<SSL_CONF_CTX_set_flags(3)|SSL_CONF_CTX_set_flags(3)>,
+L<SSL_CONF_CTX_set1_prefix(3)|SSL_CONF_CTX_set1_prefix(3)>,
+L<SSL_CONF_CTX_set_ssl_ctx(3)|SSL_CONF_CTX_set_ssl_ctx(3)>,
+L<SSL_CONF_cmd(3)|SSL_CONF_cmd(3)>
+
+=head1 HISTORY
+
+These functions were first added to OpenSSL 1.0.2
+
+=cut
diff --git a/doc/ssl/SSL_CTX_add1_chain_cert.pod b/doc/ssl/SSL_CTX_add1_chain_cert.pod
new file mode 100644 (file)
index 0000000..b999f09
--- /dev/null
@@ -0,0 +1,150 @@
+=pod
+
+=head1 NAME
+
+SSL_CTX_set0_chain, SSL_CTX_set1_chain, SSL_CTX_add0_chain_cert,
+SSL_CTX_add1_chain_cert, SSL_CTX_get0_chain_certs, SSL_CTX_clear_chain_certs,
+SSL_set0_chain, SSL_set1_chain, SSL_add0_chain_cert, SSL_add1_chain_cert,
+SSL_get0_chain_certs, SSL_clear_chain_certs, SSL_CTX_build_cert_chain,
+SSL_build_cert_chain, SSL_CTX_select_current_cert,
+SSL_select_current_cert, SSL_CTX_set_current_cert, SSL_set_current_cert - extra
+chain certificate processing
+
+=head1 SYNOPSIS
+
+ #include <openssl/ssl.h>
+
+ int SSL_CTX_set0_chain(SSL_CTX *ctx, STACK_OF(X509) *sk);
+ int SSL_CTX_set1_chain(SSL_CTX *ctx, STACK_OF(X509) *sk);
+ int SSL_CTX_add0_chain_cert(SSL_CTX *ctx, X509 *x509);
+ int SSL_CTX_add1_chain_cert(SSL_CTX *ctx, X509 *x509);
+ int SSL_CTX_get0_chain_certs(SSL_CTX *ctx, STACK_OF(X509) **sk);
+ int SSL_CTX_clear_chain_certs(SSL_CTX *ctx);
+
+ int SSL_set0_chain(SSL *ssl, STACK_OF(X509) *sk);
+ int SSL_set1_chain(SSL *ssl, STACK_OF(X509) *sk);
+ int SSL_add0_chain_cert(SSL *ssl, X509 *x509);
+ int SSL_add1_chain_cert(SSL *ssl, X509 *x509);
+ int SSL_get0_chain_certs(SSL *ssl, STACK_OF(X509) **sk);
+ int SSL_clear_chain_certs(SSL *ssl);
+
+ int SSL_CTX_build_cert_chain(SSL_CTX *ctx, flags);
+ int SSL_build_cert_chain(SSL *ssl, flags);
+
+ int SSL_CTX_select_current_cert(SSL_CTX *ctx, X509 *x509);
+ int SSL_select_current_cert(SSL *ssl, X509 *x509);
+ int SSL_CTX_set_current_cert(SSL_CTX *ctx, long op);
+ int SSL_set_current_cert(SSL *ssl, long op);
+
+=head1 DESCRIPTION
+
+SSL_CTX_set0_chain() and SSL_CTX_set1_chain() set the certificate chain
+associated with the current certificate of B<ctx> to B<sk>.
+
+SSL_CTX_add0_chain_cert() and SSL_CTX_add1_chain_cert() append the single
+certificate B<x509> to the chain associated with the current certificate of
+B<ctx>.
+
+SSL_CTX_get0_chain_certs() retrieves the chain associated with the current
+certificate of B<ctx>.
+
+SSL_CTX_clear_chain_certs() clears any existing chain associated with the
+current certificate of B<ctx>.  (This is implemented by calling
+SSL_CTX_set0_chain() with B<sk> set to B<NULL>).
+
+SSL_CTX_build_cert_chain() builds the certificate chain for B<ctx> normally
+this uses the chain store or the verify store if the chain store is not set.
+If the function is successful the built chain will replace any existing chain.
+The B<flags> parameter can be set to B<SSL_BUILD_CHAIN_FLAG_UNTRUSTED> to use
+existing chain certificates as untrusted CAs, B<SSL_BUILD_CHAIN_FLAG_NO_ROOT>
+to omit the root CA from the built chain, B<SSL_BUILD_CHAIN_FLAG_CHECK> to
+use all existing chain certificates only to build the chain (effectively
+sanity checking and rearranging them if necessary), the flag
+B<SSL_BUILD_CHAIN_FLAG_IGNORE_ERROR> ignores any errors during verification:
+if flag B<SSL_BUILD_CHAIN_FLAG_CLEAR_ERROR> is also set verification errors
+are cleared from the error queue.
+
+Each of these functions operates on the I<current> end entity
+(i.e. server or client) certificate. This is the last certificate loaded or
+selected on the corresponding B<ctx> structure.
+
+SSL_CTX_select_current_cert() selects B<x509> as the current end entity
+certificate, but only if B<x509> has already been loaded into B<ctx> using a
+function such as SSL_CTX_use_certificate().
+
+SSL_set0_chain(), SSL_set1_chain(), SSL_add0_chain_cert(),
+SSL_add1_chain_cert(), SSL_get0_chain_certs(), SSL_clear_chain_certs(),
+SSL_build_cert_chain(), SSL_select_current_cert() and SSL_set_current_cert()
+are similar except they apply to SSL structure B<ssl>.
+
+SSL_CTX_set_current_cert() changes the current certificate to a value based
+on the B<op> argument. Currently B<op> can be B<SSL_CERT_SET_FIRST> to use
+the first valid certificate or B<SSL_CERT_SET_NEXT> to set the next valid
+certificate after the current certificate. These two operations can be
+used to iterate over all certificates in an B<SSL_CTX> structure.
+
+SSL_set_current_cert() also supports the option B<SSL_CERT_SET_SERVER>.
+If B<ssl> is a server and has sent a certificate to a connected client
+this option sets that certificate to the current certificate and returns 1.
+If the negotiated ciphersuite is anonymous (and thus no certificate will
+be sent) 2 is returned and the current certificate is unchanged. If B<ssl>
+is not a server or a certificate has not been sent 0 is returned and
+the current certificate is unchanged.
+
+All these functions are implemented as macros. Those containing a B<1>
+increment the reference count of the supplied certificate or chain so it must
+be freed at some point after the operation. Those containing a B<0> do
+not increment reference counts and the supplied certificate or chain
+B<MUST NOT> be freed after the operation.
+
+=head1 NOTES
+
+The chains associate with an SSL_CTX structure are copied to any SSL
+structures when SSL_new() is called. SSL structures will not be affected
+by any chains subsequently changed in the parent SSL_CTX.
+
+One chain can be set for each key type supported by a server. So, for example,
+an RSA and a DSA certificate can (and often will) have different chains.
+
+The functions SSL_CTX_build_cert_chain() and SSL_build_cert_chain() can
+be used to check application configuration and to ensure any necessary
+subordinate CAs are sent in the correct order. Misconfigured applications
+sending incorrect certificate chains often cause problems with peers.
+
+For example an application can add any set of certificates using
+SSL_CTX_use_certificate_chain_file() then call SSL_CTX_build_cert_chain()
+with the option B<SSL_BUILD_CHAIN_FLAG_CHECK> to check and reorder them.
+
+Applications can issue non fatal warnings when checking chains by setting
+the flag B<SSL_BUILD_CHAIN_FLAG_IGNORE_ERRORS> and checking the return
+value.
+
+Calling SSL_CTX_build_cert_chain() or SSL_build_cert_chain() is more
+efficient than the automatic chain building as it is only performed once.
+Automatic chain building is performed on each new session.
+
+If any certificates are added using these functions no certificates added
+using SSL_CTX_add_extra_chain_cert() will be used.
+
+=head1 RETURN VALUES
+
+SSL_set_current_cert() with B<SSL_CERT_SET_SERVER> return 1 for success, 2 if
+no server certificate is used because the ciphersuites is anonymous and 0
+for failure.
+
+SSL_CTX_build_cert_chain() and SSL_build_cert_chain() return 1 for success
+and 0 for failure. If the flag B<SSL_BUILD_CHAIN_FLAG_IGNORE_ERROR> and
+a verification error occurs then 2 is returned.
+
+All other functions return 1 for success and 0 for failure.
+
+
+=head1 SEE ALSO
+
+L<SSL_CTX_add_extra_chain_cert(3)|SSL_CTX_add_extra_chain_cert(3)>
+
+=head1 HISTORY
+
+These functions were first added to OpenSSL 1.0.2.
+
+=cut
index 5955ee1..8e832a5 100644 (file)
@@ -32,7 +32,8 @@ Only one set of extra chain certificates can be specified per SSL_CTX
 structure. Different chains for different certificates (for example if both
 RSA and DSA certificates are specified by the same server) or different SSL
 structures with the same parent SSL_CTX cannot be specified using this
-function.
+function. For more flexibility functions such as SSL_add1_chain_cert() should
+be used instead.
 
 =head1 RETURN VALUES
 
@@ -45,5 +46,15 @@ L<ssl(3)|ssl(3)>,
 L<SSL_CTX_use_certificate(3)|SSL_CTX_use_certificate(3)>,
 L<SSL_CTX_set_client_cert_cb(3)|SSL_CTX_set_client_cert_cb(3)>,
 L<SSL_CTX_load_verify_locations(3)|SSL_CTX_load_verify_locations(3)>
+L<SSL_CTX_set0_chain(3)|SSL_CTX_set0_chain(3)>
+L<SSL_CTX_set1_chain(3)|SSL_CTX_set1_chain(3)>
+L<SSL_CTX_add0_chain_cert(3)|SSL_CTX_add0_chain_cert(3)>
+L<SSL_CTX_add1_chain_cert(3)|SSL_CTX_add1_chain_cert(3)>
+L<SSL_set0_chain(3)|SSL_set0_chain(3)>
+L<SSL_set1_chain(3)|SSL_set1_chain(3)>
+L<SSL_add0_chain_cert(3)|SSL_add0_chain_cert(3)>
+L<SSL_add1_chain_cert(3)|SSL_add1_chain_cert(3)>
+L<SSL_CTX_build_cert_chain(3)|SSL_CTX_build_cert_chain(3)>
+L<SSL_build_cert_chain(3)|SSL_build_cert_chain(3)>
 
 =cut
diff --git a/doc/ssl/SSL_CTX_get0_param.pod b/doc/ssl/SSL_CTX_get0_param.pod
new file mode 100644 (file)
index 0000000..332f181
--- /dev/null
@@ -0,0 +1,55 @@
+=pod
+
+=head1 NAME
+
+SSL_CTX_get0_param, SSL_get0_param, SSL_CTX_set1_param, SSL_set1_param -
+get and set verification parameters
+
+=head1 SYNOPSIS
+
+ #include <openssl/ssl.h>
+
+ X509_VERIFY_PARAM *SSL_CTX_get0_param(SSL_CTX *ctx)
+ X509_VERIFY_PARAM *SSL_get0_param(SSL *ssl)
+ int SSL_CTX_set1_param(SSL_CTX *ctx, X509_VERIFY_PARAM *vpm)
+ int SSL_set1_param(SSL *ssl, X509_VERIFY_PARAM *vpm)
+
+=head1 DESCRIPTION
+
+SSL_CTX_get0_param() and SSL_get0_param() retrieve an internal pointer to
+the verification parameters for B<ctx> or B<ssl> respectively. The returned
+pointer must not be freed by the calling application.
+
+SSL_CTX_set1_param() and SSL_set1_param() set the verification parameters
+to B<vpm> for B<ctx> or B<ssl>.
+
+=head1 NOTES
+
+Typically parameters are retrieved from an B<SSL_CTX> or B<SSL> structure
+using SSL_CTX_get0_param() or SSL_get0_param() and an application modifies
+them to suit its needs: for example to add a hostname check.
+
+=head1 EXAMPLE
+
+Check hostname matches "www.foo.com" in peer certificate:
+
+ X509_VERIFY_PARAM *vpm = SSL_get0_param(ssl);
+ X509_VERIFY_PARAM_set1_host(vpm, "www.foo.com");
+
+=head1 RETURN VALUES
+
+SSL_CTX_get0_param() and SSL_get0_param() return a pointer to an
+B<X509_VERIFY_PARAM> structure.
+
+SSL_CTX_set1_param() and SSL_set1_param() return 1 for success and 0
+for failure.
+
+=head1 SEE ALSO
+
+L<X509_VERIFY_PARAM_set_flags(3)|X509_VERIFY_PARAM_set_flags(3)>
+
+=head1 HISTORY
+
+These functions were first added to OpenSSL 1.0.2.
+
+=cut
index c8b99f4..4aeda09 100644 (file)
@@ -15,6 +15,7 @@ SSL_CTX_sess_set_cache_size, SSL_CTX_sess_get_cache_size - manipulate session ca
 
 SSL_CTX_sess_set_cache_size() sets the size of the internal session cache
 of context B<ctx> to B<t>.
+This value is a hint and not an absolute; see the notes below.
 
 SSL_CTX_sess_get_cache_size() returns the currently valid session cache size.
 
@@ -25,8 +26,9 @@ currently 1024*20, so that up to 20000 sessions can be held. This size
 can be modified using the SSL_CTX_sess_set_cache_size() call. A special
 case is the size 0, which is used for unlimited size.
 
-When the maximum number of sessions is reached, no more new sessions are
-added to the cache. New space may be added by calling
+If adding the session makes the cache exceed its size, then unused
+sessions are dropped from the end of the cache.
+Cache space may also be reclaimed by calling
 L<SSL_CTX_flush_sessions(3)|SSL_CTX_flush_sessions(3)> to remove
 expired sessions.
 
diff --git a/doc/ssl/SSL_CTX_set1_curves.pod b/doc/ssl/SSL_CTX_set1_curves.pod
new file mode 100644 (file)
index 0000000..18d0c9a
--- /dev/null
@@ -0,0 +1,103 @@
+=pod
+
+=head1 NAME
+
+SSL_CTX_set1_curves, SSL_CTX_set1_curves_list, SSL_set1_curves,
+SSL_set1_curves_list, SSL_get1_curves, SSL_get_shared_curve,
+SSL_CTX_set_ecdh_auto, SSL_set_ecdh_auto - EC supported curve functions
+
+=head1 SYNOPSIS
+
+ #include <openssl/ssl.h>
+
+ int SSL_CTX_set1_curves(SSL_CTX *ctx, int *clist, int clistlen);
+ int SSL_CTX_set1_curves_list(SSL_CTX *ctx, char *list);
+
+ int SSL_set1_curves(SSL *ssl, int *clist, int clistlen);
+ int SSL_set1_curves_list(SSL *ssl, char *list);
+
+ int SSL_get1_curves(SSL *ssl, int *curves);
+ int SSL_get_shared_curve(SSL *s, int n);
+
+ int SSL_CTX_set_ecdh_auto(SSL_CTX *ctx, int onoff);
+ int SSL_set_ecdh_auto(SSL *s, int onoff);
+
+=head1 DESCRIPTION
+
+SSL_CTX_set1_curves() sets the supported curves for B<ctx> to B<clistlen>
+curves in the array B<clist>. The array consist of all NIDs of curves in
+preference order. For a TLS client the curves are used directly in the
+supported curves extension. For a TLS server the curves are used to 
+determine the set of shared curves.
+
+SSL_CTX_set1_curves_list() sets the supported curves for B<ctx> to
+string B<list>. The string is a colon separated list of curve NIDs or
+names, for example "P-521:P-384:P-256".
+
+SSL_set1_curves() and SSL_set1_curves_list() are similar except they set
+supported curves for the SSL structure B<ssl>.
+
+SSL_get1_curves() returns the set of supported curves sent by a client
+in the supported curves extension. It returns the total number of 
+supported curves. The B<curves> parameter can be B<NULL> to simply
+return the number of curves for memory allocation purposes. The
+B<curves> array is in the form of a set of curve NIDs in preference
+order. It can return zero if the client did not send a supported curves
+extension.
+
+SSL_get_shared_curve() returns shared curve B<n> for a server-side
+SSL B<ssl>. If B<n> is -1 then the total number of shared curves is
+returned, which may be zero. Other than for diagnostic purposes,
+most applications will only be interested in the first shared curve
+so B<n> is normally set to zero. If the value B<n> is out of range,
+NID_undef is returned.
+
+SSL_CTX_set_ecdh_auto() and SSL_set_ecdh_auto() set automatic curve
+selection for server B<ctx> or B<ssl> to B<onoff>. If B<onoff> is 1 then 
+the highest preference curve is automatically used for ECDH temporary
+keys used during key exchange.
+
+All these functions are implemented as macros.
+
+=head1 NOTES
+
+If an application wishes to make use of several of these functions for
+configuration purposes either on a command line or in a file it should
+consider using the SSL_CONF interface instead of manually parsing options.
+
+The functions SSL_CTX_set_ecdh_auto() and SSL_set_ecdh_auto() can be used to
+make a server always choose the most appropriate curve for a client. If set
+it will override any temporary ECDH parameters set by a server. Previous
+versions of OpenSSL could effectively only use a single ECDH curve set
+using a function such as SSL_CTX_set_ecdh_tmp(). Newer applications should
+just call:
+
+ SSL_CTX_set_ecdh_auto(ctx, 1);
+
+and they will automatically support ECDH using the most appropriate shared
+curve.
+
+=head1 RETURN VALUES
+
+SSL_CTX_set1_curves(), SSL_CTX_set1_curves_list(), SSL_set1_curves(),
+SSL_set1_curves_list(), SSL_CTX_set_ecdh_auto() and SSL_set_ecdh_auto()
+return 1 for success and 0 for failure.
+
+SSL_get1_curves() returns the number of curves, which may be zero.
+
+SSL_get_shared_curve() returns the NID of shared curve B<n> or NID_undef if there
+is no shared curve B<n>; or the total number of shared curves if B<n>
+is -1.
+
+When called on a client B<ssl>, SSL_get_shared_curve() has no meaning and
+returns -1.
+
+=head1 SEE ALSO
+
+L<SSL_CTX_add_extra_chain_cert(3)|SSL_CTX_add_extra_chain_cert(3)>
+
+=head1 HISTORY
+
+These functions were first added to OpenSSL 1.0.2.
+
+=cut
diff --git a/doc/ssl/SSL_CTX_set1_verify_cert_store.pod b/doc/ssl/SSL_CTX_set1_verify_cert_store.pod
new file mode 100644 (file)
index 0000000..493cca4
--- /dev/null
@@ -0,0 +1,91 @@
+=pod
+
+=head1 NAME
+
+SSL_CTX_set0_verify_cert_store, SSL_CTX_set1_verify_cert_store,
+SSL_CTX_set0_chain_cert_store, SSL_CTX_set1_chain_cert_store,
+SSL_set0_verify_cert_store, SSL_set1_verify_cert_store,
+SSL_set0_chain_cert_store, SSL_set1_chain_cert_store - set certificate
+verification or chain store
+
+=head1 SYNOPSIS
+
+ #include <openssl/ssl.h>
+
+ int SSL_CTX_set0_verify_cert_store(SSL_CTX *ctx, X509_STORE *st);
+ int SSL_CTX_set1_verify_cert_store(SSL_CTX *ctx, X509_STORE *st);
+ int SSL_CTX_set0_chain_cert_store(SSL_CTX *ctx, X509_STORE *st);
+ int SSL_CTX_set1_chain_cert_store(SSL_CTX *ctx, X509_STORE *st);
+
+ int SSL_set0_verify_cert_store(SSL_CTX *ctx, X509_STORE *st);
+ int SSL_set1_verify_cert_store(SSL_CTX *ctx, X509_STORE *st);
+ int SSL_set0_chain_cert_store(SSL_CTX *ctx, X509_STORE *st);
+ int SSL_set1_chain_cert_store(SSL_CTX *ctx, X509_STORE *st);
+
+=head1 DESCRIPTION
+
+SSL_CTX_set0_verify_cert_store() and SSL_CTX_set1_verify_cert_store()
+set the certificate store used for certificate verification to B<st>.
+
+SSL_CTX_set0_chain_cert_store() and SSL_CTX_set1_chain_cert_store()
+set the certificate store used for certificate chain building to B<st>.
+
+SSL_set0_verify_cert_store(), SSL_set1_verify_cert_store(),
+SSL_set0_chain_cert_store() and SSL_set1_chain_cert_store() are similar
+except they apply to SSL structure B<ssl>.
+
+All these functions are implemented as macros. Those containing a B<1>
+increment the reference count of the supplied store so it must
+be freed at some point after the operation. Those containing a B<0> do
+not increment reference counts and the supplied store B<MUST NOT> be freed
+after the operation.
+
+=head1 NOTES
+
+The stores pointers associated with an SSL_CTX structure are copied to any SSL
+structures when SSL_new() is called. As a result SSL structures will not be
+affected if the parent SSL_CTX store pointer is set to a new value.
+
+The verification store is used to verify the certificate chain sent by the
+peer: that is an SSL/TLS client will use the verification store to verify
+the server's certificate chain and a SSL/TLS server will use it to verify
+any client certificate chain.
+
+The chain store is used to build the certificate chain.
+
+If the mode B<SSL_MODE_NO_AUTO_CHAIN> is set or a certificate chain is
+configured already (for example using the functions such as 
+L<SSL_CTX_add1_chain_cert(3)|SSL_CTX_add1_chain_cert(3)> or
+L<SSL_CTX_add_extra_chain_cert(3)|SSL_CTX_add_extra_chain_cert(3)>) then
+automatic chain building is disabled.
+
+If the mode B<SSL_MODE_NO_AUTO_CHAIN> is set then automatic chain building
+is disabled.
+
+If the chain or the verification store is not set then the store associated
+with the parent SSL_CTX is used instead to retain compatibility with previous
+versions of OpenSSL.
+
+=head1 RETURN VALUES
+
+All these functions return 1 for success and 0 for failure.
+
+=head1 SEE ALSO
+
+L<SSL_CTX_add_extra_chain_cert(3)|SSL_CTX_add_extra_chain_cert(3)>
+L<SSL_CTX_set0_chain(3)|SSL_CTX_set0_chain(3)>
+L<SSL_CTX_set1_chain(3)|SSL_CTX_set1_chain(3)>
+L<SSL_CTX_add0_chain_cert(3)|SSL_CTX_add0_chain_cert(3)>
+L<SSL_CTX_add1_chain_cert(3)|SSL_CTX_add1_chain_cert(3)>
+L<SSL_set0_chain(3)|SSL_set0_chain(3)>
+L<SSL_set1_chain(3)|SSL_set1_chain(3)>
+L<SSL_add0_chain_cert(3)|SSL_add0_chain_cert(3)>
+L<SSL_add1_chain_cert(3)|SSL_add1_chain_cert(3)>
+L<SSL_CTX_build_cert_chain(3)|SSL_CTX_build_cert_chain(3)>
+L<SSL_build_cert_chain(3)|SSL_build_cert_chain(3)>
+
+=head1 HISTORY
+
+These functions were first added to OpenSSL 1.0.2.
+
+=cut
diff --git a/doc/ssl/SSL_CTX_set_cert_cb.pod b/doc/ssl/SSL_CTX_set_cert_cb.pod
new file mode 100644 (file)
index 0000000..141d828
--- /dev/null
@@ -0,0 +1,68 @@
+=pod
+
+=head1 NAME
+
+SSL_CTX_set_cert_cb, SSL_set_cert_cb - handle certificate callback function
+
+=head1 SYNOPSIS
+
+ #include <openssl/ssl.h>
+
+ void SSL_CTX_set_cert_cb(SSL_CTX *c, int (*cert_cb)(SSL *ssl, void *arg), void *arg);
+ void SSL_set_cert_cb(SSL *s, int (*cert_cb)(SSL *ssl, void *arg), void *arg);
+
+ int (*cert_cb)(SSL *ssl, void *arg);
+
+=head1 DESCRIPTION
+
+SSL_CTX_set_cert_cb() and SSL_set_cert_cb() sets the B<cert_cb()> callback,
+B<arg> value is pointer which is passed to the application callback.
+
+When B<cert_cb()> is NULL, no callback function is used.
+
+cert_cb() is the application defined callback. It is called before a
+certificate will be used by a client or server. The callback can then inspect
+the passed B<ssl> structure and set or clear any appropriate certificates. If
+the callback is successful it B<MUST> return 1 even if no certificates have
+been set. A zero is returned on error which will abort the handshake with a
+fatal internal error alert. A negative return value will suspend the handshake
+and the handshake function will return immediately.
+L<SSL_get_error(3)|SSL_get_error(3)> will return SSL_ERROR_WANT_X509_LOOKUP to
+indicate, that the handshake was suspended. The next call to the handshake
+function will again lead to the call of cert_cb(). It is the job of the
+cert_cb() to store information about the state of the last call,
+if required to continue.
+
+=head1 NOTES
+
+An application will typically call SSL_use_certificate() and
+SSL_use_PrivateKey() to set the end entity certificate and private key.
+It can add intermediate and optionally the root CA certificates using
+SSL_add1_chain_cert().
+
+It might also call SSL_certs_clear() to delete any certificates associated
+with the B<SSL> object.
+
+The certificate callback functionality supercedes the (largely broken)
+functionality provided by the old client certificate callback interface.
+It is B<always> called even is a certificate is already set so the callback
+can modify or delete the existing certificate.
+
+A more advanced callback might examine the handshake parameters and set
+whatever chain is appropriate. For example a legacy client supporting only
+TLS v1.0 might receive a certificate chain signed using SHA1 whereas a
+TLS v1.2 client which advertises support for SHA256 could receive a chain
+using SHA256.
+
+Normal server sanity checks are performed on any certificates set
+by the callback. So if an EC chain is set for a curve the client does not
+support it will B<not> be used.
+
+=head1 SEE ALSO
+
+L<ssl(3)|ssl(3)>, L<SSL_use_certificate(3)|SSL_use_certificate(3)>,
+L<SSL_add1_chain_cert(3)|SSL_add1_chain_cert(3)>,
+L<SSL_get_client_CA_list(3)|SSL_get_client_CA_list(3)>,
+L<SSL_clear(3)|SSL_clear(3)>, L<SSL_free(3)|SSL_free(3)>
+
+=cut
index 6acf0d9..846416e 100644 (file)
@@ -42,6 +42,13 @@ L<SSL_CTX_set_verify(3)|SSL_CTX_set_verify(3)> family of functions.
 This document must therefore be updated when documentation about the
 X509_STORE object and its handling becomes available.
 
+=head1 RESTRICTIONS
+
+The X509_STORE structure used by an SSL_CTX is used for verifying peer
+certificates and building certificate chains, it is also shared by
+every child SSL structure. Applications wanting finer control can use 
+functions such as SSL_CTX_set1_verify_cert_store() instead.
+
 =head1 RETURN VALUES
 
 SSL_CTX_set_cert_store() does not return diagnostic output.
index bd4df4a..c84a831 100644 (file)
@@ -41,7 +41,7 @@ RSA export ciphers with a keylength of 512 bits for the RSA key require
 a temporary 512 bit RSA key, as typically the supplied key has a length
 of 1024 bit (see
 L<SSL_CTX_set_tmp_rsa_callback(3)|SSL_CTX_set_tmp_rsa_callback(3)>).
-RSA ciphers using EDH need a certificate and key and additional DH-parameters
+RSA ciphers using DHE need a certificate and key and additional DH-parameters
 (see L<SSL_CTX_set_tmp_dh_callback(3)|SSL_CTX_set_tmp_dh_callback(3)>).
 
 A DSA cipher can only be chosen, when a DSA certificate is available.
diff --git a/doc/ssl/SSL_CTX_set_custom_cli_ext.pod b/doc/ssl/SSL_CTX_set_custom_cli_ext.pod
new file mode 100644 (file)
index 0000000..3fceef9
--- /dev/null
@@ -0,0 +1,133 @@
+=pod
+
+=head1 NAME
+
+SSL_CTX_add_client_custom_ext, SSL_CTX_add_server_custom_ext - custom TLS extension handling
+
+=head1 SYNOPSIS
+
+ #include <openssl/ssl.h>
+
+ int SSL_CTX_add_client_custom_ext(SSL_CTX *ctx, unsigned int ext_type,
+                                  custom_ext_add_cb add_cb,
+                                  custom_ext_free_cb free_cb, void *add_arg,
+                                  custom_ext_parse_cb parse_cb,
+                                  void *parse_arg);
+
+ int SSL_CTX_add_server_custom_ext(SSL_CTX *ctx, unsigned int ext_type,
+                                  custom_ext_add_cb add_cb,
+                                  custom_ext_free_cb free_cb, void *add_arg,
+                                  custom_ext_parse_cb parse_cb,
+                                  void *parse_arg);
+
+ int SSL_extension_supported(unsigned int ext_type);
+
+ typedef int (*custom_ext_add_cb)(SSL *s, unsigned int ext_type,
+                                 const unsigned char **out,
+                                 size_t *outlen, int *al,
+                                 void *add_arg);
+
+ typedef void (*custom_ext_free_cb)(SSL *s, unsigned int ext_type,
+                                   const unsigned char *out,
+                                   void *add_arg);
+
+ typedef int (*custom_ext_parse_cb)(SSL *s, unsigned int ext_type,
+                                   const unsigned char *in,
+                                   size_t inlen, int *al,
+                                   void *parse_arg);
+
+
+=head1 DESCRIPTION
+
+SSL_CTX_add_client_custom_ext() adds a custom extension for a TLS client 
+with extension type B<ext_type> and callbacks B<add_cb>, B<free_cb> and
+B<parse_cb>.
+
+SSL_CTX_add_server_custom_ext() adds a custom extension for a TLS server 
+with extension type B<ext_type> and callbacks B<add_cb>, B<free_cb> and
+B<parse_cb>.
+
+In both cases the extension type must not be handled by OpenSSL internally
+or an error occurs.
+
+SSL_extension_supported() returns 1 if the extension B<ext_type> is handled
+internally by OpenSSL and 0 otherwise.
+
+=head1 EXTENSION CALLBACKS
+
+The callback B<add_cb> is called to send custom extension data to be 
+included in ClientHello for TLS clients or ServerHello for servers. The
+B<ext_type> parameter is set to the extension type which will be added and
+B<add_arg> to the value set when the extension handler was added.
+
+If the application wishes to include the extension B<ext_type> it should
+set B<*out> to the extension data, set B<*outlen> to the length of the
+extension data and return 1.
+
+If the B<add_cb> does not wish to include the extension it must return 0.
+
+If B<add_cb> returns -1 a fatal handshake error occurs using the TLS
+alert value specified in B<*al>.
+
+For clients (but not servers) if B<add_cb> is set to NULL a zero length
+extension is added for B<ext_type>.
+
+For clients every registered B<add_cb> is always called to see if the
+application wishes to add an extension to ClientHello.
+
+For servers every registered B<add_cb> is called once if and only if the
+corresponding extension was received in ClientHello to see if the application
+wishes to add the extension to ServerHello. That is, if no corresponding extension
+was received in ClientHello then B<add_cb> will not be called.
+
+If an extension is added (that is B<add_cb> returns 1) B<free_cb> is called
+(if it is set) with the value of B<out> set by the add callback. It can be
+used to free up any dynamic extension data set by B<add_cb>. Since B<out> is
+constant (to permit use of constant data in B<add_cb>) applications may need to
+cast away const to free the data.
+
+The callback B<parse_cb> receives data for TLS extensions. For TLS clients
+the extension data will come from ServerHello and for TLS servers it will
+come from ClientHello.
+
+The extension data consists of B<inlen> bytes in the buffer B<in> for the
+extension B<extension_type>.
+
+If the B<parse_cb> considers the extension data acceptable it must return
+1. If it returns 0 or a negative value a fatal handshake error occurs
+using the TLS alert value specified in B<*al>.
+
+The buffer B<in> is a temporary internal buffer which will not be valid after
+the callback returns.
+
+=head1 NOTES
+
+The B<add_arg> and B<parse_arg> parameters can be set to arbitrary values
+which will be passed to the corresponding callbacks. They can, for example,
+be used to store the extension data received in a convenient structure or
+pass the extension data to be added or freed when adding extensions.
+
+The B<ext_type> parameter corresponds to the B<extension_type> field of
+RFC5246 et al. It is B<not> a NID.
+
+If the same custom extension type is received multiple times a fatal
+B<decode_error> alert is sent and the handshake aborts. If a custom extension
+is received in ServerHello which was not sent in ClientHello a fatal
+B<unsupported_extension> alert is sent and the handshake is aborted. The
+ServerHello B<add_cb> callback is only called if the corresponding extension
+was received in ClientHello. This is compliant with the TLS specifications.
+This behaviour ensures that each callback is called at most once and that
+an application can never send unsolicited extensions.
+
+=head1 RETURN VALUES
+
+SSL_CTX_add_client_custom_ext() and SSL_CTX_add_server_custom_ext() return 1 for
+success and 0 for failure. A failure can occur if an attempt is made to
+add the same B<ext_type> more than once, if an attempt is made to use an
+extension type handled internally by OpenSSL or if an internal error occurs
+(for example a memory allocation failure).
+
+SSL_extension_supported() returns 1 if the extension B<ext_type> is handled
+internally by OpenSSL and 0 otherwise.
+
+=cut
index 8794eb7..94c55b8 100644 (file)
@@ -70,7 +70,7 @@ the TLS standard, when the RSA key can be used for signing only, that is
 for export ciphers. Using ephemeral RSA key exchange for other purposes
 violates the standard and can break interoperability with clients.
 It is therefore strongly recommended to not use ephemeral RSA key
-exchange and use EDH (Ephemeral Diffie-Hellman) key exchange instead
+exchange and use DHE (Ephemeral Diffie-Hellman) key exchange instead
 in order to achieve forward secrecy (see
 L<SSL_CTX_set_tmp_dh_callback(3)|SSL_CTX_set_tmp_dh_callback(3)>).
 
index 10be95f..80321b8 100644 (file)
@@ -109,10 +109,9 @@ this B<ssl>, the last item added into B<ctx> will be checked.
 
 =head1 NOTES
   
-The internal certificate store of OpenSSL can hold two private key/certificate
-pairs at a time: one key/certificate of type RSA and one key/certificate
-of type DSA. The certificate used depends on the cipher select, see
-also L<SSL_CTX_set_cipher_list(3)|SSL_CTX_set_cipher_list(3)>.
+The internal certificate store of OpenSSL can hold several private
+key/certificate pairs at a time. The certificate used depends on the
+cipher selected, see also L<SSL_CTX_set_cipher_list(3)|SSL_CTX_set_cipher_list(3)>.
 
 When reading certificates and private keys from file, files of type
 SSL_FILETYPE_ASN1 (also known as B<DER>, binary encoding) can only contain
@@ -122,16 +121,13 @@ Files of type SSL_FILETYPE_PEM can contain more than one item.
 
 SSL_CTX_use_certificate_chain_file() adds the first certificate found
 in the file to the certificate store. The other certificates are added
-to the store of chain certificates using
-L<SSL_CTX_add_extra_chain_cert(3)|SSL_CTX_add_extra_chain_cert(3)>.
-There exists only one extra chain store, so that the same chain is appended
-to both types of certificates, RSA and DSA! If it is not intended to use
-both type of certificate at the same time, it is recommended to use the
-SSL_CTX_use_certificate_chain_file() instead of the
-SSL_CTX_use_certificate_file() function in order to allow the use of
-complete certificate chains even when no trusted CA storage is used or
-when the CA issuing the certificate shall not be added to the trusted
-CA storage.
+to the store of chain certificates using L<SSL_CTX_add1_chain_cert(3)|SSL_CTX_add1_chain_cert(3)>. Note: versions of OpenSSL before 1.0.2 only had a single
+certificate chain store for all certificate types, OpenSSL 1.0.2 and later
+have a separate chain store for each type. SSL_CTX_use_certificate_chain_file() 
+should be used instead of the SSL_CTX_use_certificate_file() function in order
+to allow the use of complete certificate chains even when no trusted CA
+storage is used or when the CA issuing the certificate shall not be added to
+the trusted CA storage.
 
 If additional certificates are needed to complete the chain during the
 TLS negotiation, CA certificates are additionally looked up in the
index 9da7201..12db0da 100644 (file)
@@ -83,7 +83,12 @@ Return values from the server callback are interpreted as follows:
 
 =over 4
 
-=item > 0
+=item Z<>0
+
+PSK identity was not found. An "unknown_psk_identity" alert message
+will be sent and the connection setup fails.
+
+=item E<gt>0
 
 PSK identity was found and the server callback has provided the PSK
 successfully in parameter B<psk>. Return value is the length of
@@ -96,11 +101,6 @@ data to B<psk> and return the length of the random data, so the
 connection will fail with decryption_error before it will be finished
 completely.
 
-=item Z<>0
-
-PSK identity was not found. An "unknown_psk_identity" alert message
-will be sent and the connection setup fails.
-
 =back
 
 =cut
diff --git a/doc/ssl/SSL_CTX_use_serverinfo.pod b/doc/ssl/SSL_CTX_use_serverinfo.pod
new file mode 100644 (file)
index 0000000..318e052
--- /dev/null
@@ -0,0 +1,46 @@
+=pod
+
+=head1 NAME
+
+SSL_CTX_use_serverinfo, SSL_CTX_use_serverinfo_file - use serverinfo extension
+
+=head1 SYNOPSIS
+
+ #include <openssl/ssl.h>
+
+ int SSL_CTX_use_serverinfo(SSL_CTX *ctx, const unsigned char *serverinfo,
+                            size_t serverinfo_length);
+
+ int SSL_CTX_use_serverinfo_file(SSL_CTX *ctx, const char *file);
+
+=head1 DESCRIPTION
+
+These functions load "serverinfo" TLS ServerHello Extensions into the SSL_CTX.
+A "serverinfo" extension is returned in response to an empty ClientHello
+Extension.
+
+SSL_CTX_use_serverinfo() loads one or more serverinfo extensions from
+a byte array into B<ctx>.  The extensions must be concatenated into a 
+sequence of bytes.  Each extension must consist of a 2-byte Extension Type, 
+a 2-byte length, and then length bytes of extension_data.
+
+SSL_CTX_use_serverinfo_file() loads one or more serverinfo extensions from
+B<file> into B<ctx>.  The extensions must be in PEM format.  Each extension
+must consist of a 2-byte Extension Type, a 2-byte length, and then length
+bytes of extension_data.  Each PEM extension name must begin with the phrase
+"BEGIN SERVERINFO FOR ".
+
+=head1 NOTES
+
+=head1 RETURN VALUES
+
+On success, the functions return 1.
+On failure, the functions return 0.  Check out the error stack to find out
+the reason.
+
+=head1 SEE ALSO
+
+=head1 HISTORY
+
+
+=cut
index 2239444..89ad6bd 100644 (file)
@@ -21,10 +21,7 @@ B<ssl> by setting an underlying B<BIO>.
 The behaviour of SSL_accept() depends on the underlying BIO. 
 
 If the underlying BIO is B<blocking>, SSL_accept() will only return once the
-handshake has been finished or an error occurred, except for SGC (Server
-Gated Cryptography). For SGC, SSL_accept() may return with -1, but
-SSL_get_error() will yield B<SSL_ERROR_WANT_READ/WRITE> and SSL_accept()
-should be called again.
+handshake has been finished or an error occurred.
 
 If the underlying BIO is B<non-blocking>, SSL_accept() will also return
 when the underlying BIO could not satisfy the needs of SSL_accept()
index b35ddf5..8b590c9 100644 (file)
@@ -23,10 +23,7 @@ L<SSL_set_accept_state(3)|SSL_set_accept_state(3)>.
 The behaviour of SSL_do_handshake() depends on the underlying BIO.
 
 If the underlying BIO is B<blocking>, SSL_do_handshake() will only return
-once the handshake has been finished or an error occurred, except for SGC
-(Server Gated Cryptography). For SGC, SSL_do_handshake() may return with -1,
-but SSL_get_error() will yield B<SSL_ERROR_WANT_READ/WRITE> and
-SSL_do_handshake() should be called again.
+once the handshake has been finished or an error occurred.
 
 If the underlying BIO is B<non-blocking>, SSL_do_handshake() will also return
 when the underlying BIO could not satisfy the needs of SSL_do_handshake()
index 85d4a64..efbff5a 100644 (file)
@@ -104,7 +104,7 @@ erroneous SSL_ERROR_SYSCALL may be flagged even though no error occurred.
 The shutdown was successfully completed. The "close notify" alert was sent
 and the peer's "close notify" alert was received.
 
-=item -1
+=item E<lt>0
 
 The shutdown was not successful because a fatal error occurred either
 at the protocol level or a connection failure occurred. It can also occur if
index 660489a..242087e 100644 (file)
@@ -380,6 +380,10 @@ session instead of a context.
 
 =item int B<SSL_CTX_use_certificate_file>(SSL_CTX *ctx, char *file, int type);
 
+=item X509 *B<SSL_CTX_get0_certificate>(const SSL_CTX *ctx);
+
+=item EVP_PKEY *B<SSL_CTX_get0_privatekey>(const SSL_CTX *ctx);
+
 =item void B<SSL_CTX_set_psk_client_callback>(SSL_CTX *ctx, unsigned int (*callback)(SSL *ssl, const char *hint, char *identity, unsigned int max_identity_len, unsigned char *psk, unsigned int max_psk_len));
 
 =item int B<SSL_CTX_use_psk_identity_hint>(SSL_CTX *ctx, const char *hint);
@@ -513,7 +517,7 @@ connection defined in the B<SSL> structure.
 
 =item X509 *B<SSL_get_peer_certificate>(const SSL *ssl);
 
-=item EVP_PKEY *B<SSL_get_privatekey>(SSL *ssl);
+=item EVP_PKEY *B<SSL_get_privatekey>(const SSL *ssl);
 
 =item int B<SSL_get_quiet_shutdown>(const SSL *ssl);
 
index 4d2e714..c9b29bd 100644 (file)
@@ -6026,7 +6026,7 @@ one at a time, or use 'aliases' to specify the preference and order for
 the ciphers.
 
 There are a large number of aliases, but the most importaint are
-kRSA, kDHr, kDHd and kEDH for key exchange types.
+kRSA, kDHr, kDHd and kDHE for key exchange types.
 
 aRSA, aDSS, aNULL and aDH for authentication
 DES, 3DES, RC4, RC2, IDEA and eNULL for ciphers
diff --git a/e_os.h b/e_os.h
index 7584485..45fef69 100644 (file)
--- a/e_os.h
+++ b/e_os.h
@@ -277,7 +277,7 @@ extern "C" {
         */
 #    define _WIN32_WINNT 0x0400
 #   endif
-#   if !defined(OPENSSL_NO_SOCK) && defined(_WIN32_WINNT)
+#   if !defined(OPENSSL_NO_SOCK) && (defined(_WIN32_WINNT) || defined(_WIN32_WCE))
        /*
         * Just like defining _WIN32_WINNT including winsock2.h implies
         * certain "discipline" for maintaining [broad] binary compatibility.
@@ -293,6 +293,9 @@ extern "C" {
 #   include <stdio.h>
 #   include <stddef.h>
 #   include <errno.h>
+#   if defined(_WIN32_WCE) && !defined(EACCES)
+#    define EACCES   13
+#   endif
 #   include <string.h>
 #   ifdef _WIN64
 #    define strlen(s) _strlen31(s)
@@ -314,8 +317,8 @@ static __inline unsigned int _strlen31(const char *str)
 #    undef isupper
 #    undef isxdigit
 #   endif
-#   if defined(_MSC_VER) && !defined(_DLL) && defined(stdin)
-#    if _MSC_VER>=1300
+#   if defined(_MSC_VER) && !defined(_WIN32_WCE) && !defined(_DLL) && defined(stdin)
+#    if _MSC_VER>=1300 && _MSC_VER<1600
 #     undef stdin
 #     undef stdout
 #     undef stderr
@@ -323,7 +326,7 @@ FILE *__iob_func();
 #     define stdin  (&__iob_func()[0])
 #     define stdout (&__iob_func()[1])
 #     define stderr (&__iob_func()[2])
-#    elif defined(I_CAN_LIVE_WITH_LNK4049)
+#    elif _MSC_VER<1300 && defined(I_CAN_LIVE_WITH_LNK4049)
 #     undef stdin
 #     undef stdout
 #     undef stderr
@@ -498,6 +501,10 @@ typedef unsigned long clock_t;
 
 /*************/
 
+# if defined(OPENSSL_NO_SOCK) && !defined(OPENSSL_NO_DGRAM)
+#  define OPENSSL_NO_DGRAM
+# endif
+
 # ifdef USE_SOCKETS
 #  if defined(WINDOWS) || defined(MSDOS)
       /* windows world */
diff --git a/e_os2.h b/e_os2.h
index 2b1b78f..7be9989 100644 (file)
--- a/e_os2.h
+++ b/e_os2.h
@@ -101,7 +101,7 @@ extern "C" {
 #  undef OPENSSL_SYS_UNIX
 #  define OPENSSL_SYS_WIN32_UWIN
 # else
-#  if defined(__CYGWIN32__) || defined(OPENSSL_SYSNAME_CYGWIN32)
+#  if defined(__CYGWIN__) || defined(OPENSSL_SYSNAME_CYGWIN)
 #   undef OPENSSL_SYS_UNIX
 #   define OPENSSL_SYS_WIN32_CYGWIN
 #  else
index 23bd2f3..2058ff4 100644 (file)
@@ -111,7 +111,10 @@ install:
                for l in $(LIBNAMES); do \
                        ( echo installing $$l; \
                          pfx=lib; \
-                         if [ "$(PLATFORM)" != "Cygwin" ]; then \
+                         if expr "$(PLATFORM)" : "Cygwin" >/dev/null; then \
+                               sfx=".so"; \
+                               cp cyg$$l.dll $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/engines/$$pfx$$l$$sfx.new; \
+                         else \
                                case "$(CFLAGS)" in \
                                *DSO_BEOS*)     sfx=".so";;     \
                                *DSO_DLFCN*)    sfx=`expr "$(SHLIB_EXT)" : '.*\(\.[a-z][a-z]*\)' \| ".so"`;;    \
@@ -120,9 +123,6 @@ install:
                                *)              sfx=".bad";;    \
                                esac; \
                                cp $$pfx$$l$$sfx $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/engines/$$pfx$$l$$sfx.new; \
-                         else \
-                               sfx=".so"; \
-                               cp cyg$$l.dll $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/engines/$$pfx$$l$$sfx.new; \
                          fi; \
                          chmod 555 $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/engines/$$pfx$$l$$sfx.new; \
                          mv -f $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/engines/$$pfx$$l$$sfx.new $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/engines/$$pfx$$l$$sfx ); \
@@ -216,17 +216,17 @@ e_atalla.o: ../include/openssl/symhacks.h ../include/openssl/x509.h
 e_atalla.o: ../include/openssl/x509_vfy.h e_atalla.c e_atalla_err.c
 e_atalla.o: e_atalla_err.h vendor_defns/atalla.h
 e_capi.o: ../include/openssl/asn1.h ../include/openssl/bio.h
-e_capi.o: ../include/openssl/bn.h ../include/openssl/buffer.h
-e_capi.o: ../include/openssl/crypto.h ../include/openssl/e_os2.h
-e_capi.o: ../include/openssl/ec.h ../include/openssl/ecdh.h
-e_capi.o: ../include/openssl/ecdsa.h ../include/openssl/engine.h
-e_capi.o: ../include/openssl/evp.h ../include/openssl/lhash.h
-e_capi.o: ../include/openssl/obj_mac.h ../include/openssl/objects.h
-e_capi.o: ../include/openssl/opensslconf.h ../include/openssl/opensslv.h
-e_capi.o: ../include/openssl/ossl_typ.h ../include/openssl/pkcs7.h
-e_capi.o: ../include/openssl/safestack.h ../include/openssl/sha.h
-e_capi.o: ../include/openssl/stack.h ../include/openssl/symhacks.h
-e_capi.o: ../include/openssl/x509.h ../include/openssl/x509_vfy.h e_capi.c
+e_capi.o: ../include/openssl/buffer.h ../include/openssl/crypto.h
+e_capi.o: ../include/openssl/e_os2.h ../include/openssl/ec.h
+e_capi.o: ../include/openssl/ecdh.h ../include/openssl/ecdsa.h
+e_capi.o: ../include/openssl/engine.h ../include/openssl/evp.h
+e_capi.o: ../include/openssl/lhash.h ../include/openssl/obj_mac.h
+e_capi.o: ../include/openssl/objects.h ../include/openssl/opensslconf.h
+e_capi.o: ../include/openssl/opensslv.h ../include/openssl/ossl_typ.h
+e_capi.o: ../include/openssl/pkcs7.h ../include/openssl/safestack.h
+e_capi.o: ../include/openssl/sha.h ../include/openssl/stack.h
+e_capi.o: ../include/openssl/symhacks.h ../include/openssl/x509.h
+e_capi.o: ../include/openssl/x509_vfy.h e_capi.c
 e_chil.o: ../include/openssl/asn1.h ../include/openssl/bio.h
 e_chil.o: ../include/openssl/bn.h ../include/openssl/buffer.h
 e_chil.o: ../include/openssl/crypto.h ../include/openssl/dh.h
index 4e183a4..17e1efb 100644 (file)
@@ -45,7 +45,10 @@ install:
                set -e; \
                echo installing $(LIBNAME); \
                pfx=lib; \
-               if [ "$(PLATFORM)" != "Cygwin" ]; then \
+               if expr "$(PLATFORM)" : "Cygwin" >/dev/null; then \
+                       sfx=".so"; \
+                       cp cyg$(LIBNAME).dll $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/engines/$${pfx}$(LIBNAME)$$sfx.new; \
+               else \
                        case "$(CFLAGS)" in \
                        *DSO_BEOS*) sfx=".so";; \
                        *DSO_DLFCN*) sfx=`expr "$(SHLIB_EXT)" : '.*\(\.[a-z][a-z]*\)' \| ".so"`;; \
@@ -54,9 +57,6 @@ install:
                        *) sfx=".bad";; \
                        esac; \
                        cp $${pfx}$(LIBNAME)$$sfx $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/engines/$${pfx}$(LIBNAME)$$sfx.new; \
-               else \
-                       sfx=".so"; \
-                       cp cyg$(LIBNAME).dll $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/engines/$${pfx}$(LIBNAME)$$sfx.new; \
                fi; \
                chmod 555 $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/engines/$${pfx}$(LIBNAME)$$sfx.new; \
                mv -f $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/engines/$${pfx}$(LIBNAME)$$sfx.new $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/engines/$${pfx}$(LIBNAME)$$sfx; \
@@ -75,8 +75,7 @@ local_depend:
        @[ -z "$(THIS)" ] || $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC)
 
 files:
-
-
+       $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
 
 lint:
        lint -DLINT $(INCLUDES) $(SRC)>fluff
index 945509d..4ff4ddd 100644 (file)
@@ -221,7 +221,7 @@ static void kboxinit(gost_ctx * c, const gost_subst_block * b)
     int i;
 
     for (i = 0; i < 256; i++) {
-        c->k87[i] = (b->k8[i >> 4] << 4 | b->k7[i & 15]) << 24;
+        c->k87[i] = (word32) (b->k8[i >> 4] << 4 | b->k7[i & 15]) << 24;
         c->k65[i] = (b->k6[i >> 4] << 4 | b->k5[i & 15]) << 16;
         c->k43[i] = (b->k4[i >> 4] << 4 | b->k3[i & 15]) << 8;
         c->k21[i] = b->k2[i >> 4] << 4 | b->k1[i & 15];
@@ -242,8 +242,8 @@ static word32 f(gost_ctx * c, word32 x)
 void gostcrypt(gost_ctx * c, const byte * in, byte * out)
 {
     register word32 n1, n2;     /* As named in the GOST */
-    n1 = in[0] | (in[1] << 8) | (in[2] << 16) | (in[3] << 24);
-    n2 = in[4] | (in[5] << 8) | (in[6] << 16) | (in[7] << 24);
+    n1 = in[0] | (in[1] << 8) | (in[2] << 16) | ((word32) in[3] << 24);
+    n2 = in[4] | (in[5] << 8) | (in[6] << 16) | ((word32) in[7] << 24);
     /* Instead of swapping halves, swap names each round */
 
     n2 ^= f(c, n1 + c->k[0]);
@@ -296,8 +296,8 @@ void gostcrypt(gost_ctx * c, const byte * in, byte * out)
 void gostdecrypt(gost_ctx * c, const byte * in, byte * out)
 {
     register word32 n1, n2;     /* As named in the GOST */
-    n1 = in[0] | (in[1] << 8) | (in[2] << 16) | (in[3] << 24);
-    n2 = in[4] | (in[5] << 8) | (in[6] << 16) | (in[7] << 24);
+    n1 = in[0] | (in[1] << 8) | (in[2] << 16) | ((word32) in[3] << 24);
+    n2 = in[4] | (in[5] << 8) | (in[6] << 16) | ((word32) in[7] << 24);
 
     n2 ^= f(c, n1 + c->k[0]);
     n1 ^= f(c, n2 + c->k[1]);
@@ -417,7 +417,8 @@ void gost_key(gost_ctx * c, const byte * k)
     int i, j;
     for (i = 0, j = 0; i < 8; i++, j += 4) {
         c->k[i] =
-            k[j] | (k[j + 1] << 8) | (k[j + 2] << 16) | (k[j + 3] << 24);
+            k[j] | (k[j + 1] << 8) | (k[j + 2] << 16) | ((word32) k[j + 3] <<
+                                                         24);
     }
 }
 
@@ -462,8 +463,10 @@ void mac_block(gost_ctx * c, byte * buffer, const byte * block)
     for (i = 0; i < 8; i++) {
         buffer[i] ^= block[i];
     }
-    n1 = buffer[0] | (buffer[1] << 8) | (buffer[2] << 16) | (buffer[3] << 24);
-    n2 = buffer[4] | (buffer[5] << 8) | (buffer[6] << 16) | (buffer[7] << 24);
+    n1 = buffer[0] | (buffer[1] << 8) | (buffer[2] << 16) | ((word32)
+                                                             buffer[3] << 24);
+    n2 = buffer[4] | (buffer[5] << 8) | (buffer[6] << 16) | ((word32)
+                                                             buffer[7] << 24);
     /* Instead of swapping halves, swap names each round */
 
     n2 ^= f(c, n1 + c->k[0]);
index aae941e..2bbdc6c 100644 (file)
@@ -251,13 +251,13 @@ static void gost_cnt_next(void *ctx, unsigned char *iv, unsigned char *buf)
     } else {
         memcpy(buf1, iv, 8);
     }
-    g = buf1[0] | (buf1[1] << 8) | (buf1[2] << 16) | (buf1[3] << 24);
+    g = buf1[0] | (buf1[1] << 8) | (buf1[2] << 16) | ((word32) buf1[3] << 24);
     g += 0x01010101;
     buf1[0] = (unsigned char)(g & 0xff);
     buf1[1] = (unsigned char)((g >> 8) & 0xff);
     buf1[2] = (unsigned char)((g >> 16) & 0xff);
     buf1[3] = (unsigned char)((g >> 24) & 0xff);
-    g = buf1[4] | (buf1[5] << 8) | (buf1[6] << 16) | (buf1[7] << 24);
+    g = buf1[4] | (buf1[5] << 8) | (buf1[6] << 16) | ((word32) buf1[7] << 24);
     go = g;
     g += 0x01010104;
     if (go > g)                 /* overflow */
index b6275b2..4a79a85 100644 (file)
@@ -87,6 +87,10 @@ static int pkey_gost_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2)
         }
         break;
 
+    case EVP_PKEY_CTRL_GET_MD:
+        *(const EVP_MD **)p2 = pctx->md;
+        return 1;
+
     case EVP_PKEY_CTRL_PKCS7_ENCRYPT:
     case EVP_PKEY_CTRL_PKCS7_DECRYPT:
     case EVP_PKEY_CTRL_PKCS7_SIGN:
@@ -447,6 +451,10 @@ static int pkey_gost_mac_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2)
         }
         break;
 
+    case EVP_PKEY_CTRL_GET_MD:
+        *(const EVP_MD **)p2 = data->md;
+        return 1;
+
     case EVP_PKEY_CTRL_PKCS7_ENCRYPT:
     case EVP_PKEY_CTRL_PKCS7_DECRYPT:
     case EVP_PKEY_CTRL_PKCS7_SIGN:
index 43dd310..f4cd2ff 100644 (file)
 
 #include <stdio.h>
 #include <string.h>
+#include <stdlib.h>
+
 #include <openssl/crypto.h>
-#include <openssl/buffer.h>
-#include <openssl/bn.h>
 
 #ifdef OPENSSL_SYS_WIN32
 # ifndef OPENSSL_NO_CAPIENG
 
+#  include <openssl/buffer.h>
+#  include <openssl/bn.h>
 #  include <openssl/rsa.h>
 
-#  include <windows.h>
-
 #  ifndef _WIN32_WINNT
 #   define _WIN32_WINNT 0x0400
 #  endif
 
+#  include <windows.h>
 #  include <wincrypt.h>
+#  include <malloc.h>
+#  ifndef alloca
+#   define alloca _alloca
+#  endif
 
 /*
  * This module uses several "new" interfaces, among which is
@@ -832,7 +837,7 @@ int capi_rsa_sign(int dtype, const unsigned char *m, unsigned int m_len,
 
 /* Finally sign it */
     slen = RSA_size(rsa);
-    if (!CryptSignHashA(hash, capi_key->keyspec, NULL, 0, sigret, &slen)) {
+    if (!CryptSignHash(hash, capi_key->keyspec, NULL, 0, sigret, &slen)) {
         CAPIerr(CAPI_F_CAPI_RSA_SIGN, CAPI_R_ERROR_SIGNING_HASH);
         capi_addlasterror();
         goto err;
@@ -956,7 +961,7 @@ static DSA_SIG *capi_dsa_do_sign(const unsigned char *digest, int dlen,
 
     /* Finally sign it */
     slen = sizeof(csigbuf);
-    if (!CryptSignHashA(hash, capi_key->keyspec, NULL, 0, csigbuf, &slen)) {
+    if (!CryptSignHash(hash, capi_key->keyspec, NULL, 0, csigbuf, &slen)) {
         CAPIerr(CAPI_F_CAPI_DSA_DO_SIGN, CAPI_R_ERROR_SIGNING_HASH);
         capi_addlasterror();
         goto err;
@@ -1025,7 +1030,7 @@ static void capi_adderror(DWORD err)
     ERR_add_error_data(2, "Error code= 0x", errstr);
 }
 
-static char *wide_to_asc(LPWSTR wstr)
+static char *wide_to_asc(LPCWSTR wstr)
 {
     char *str;
     int len_0, sz;
@@ -1054,10 +1059,10 @@ static char *wide_to_asc(LPWSTR wstr)
 static int capi_get_provname(CAPI_CTX * ctx, LPSTR * pname, DWORD * ptype,
                              DWORD idx)
 {
-    LPSTR name;
     DWORD len, err;
+    LPTSTR name;
     CAPI_trace(ctx, "capi_get_provname, index=%d\n", idx);
-    if (!CryptEnumProvidersA(idx, NULL, 0, ptype, NULL, &len)) {
+    if (!CryptEnumProviders(idx, NULL, 0, ptype, NULL, &len)) {
         err = GetLastError();
         if (err == ERROR_NO_MORE_ITEMS)
             return 2;
@@ -1065,8 +1070,11 @@ static int capi_get_provname(CAPI_CTX * ctx, LPSTR * pname, DWORD * ptype,
         capi_adderror(err);
         return 0;
     }
-    name = OPENSSL_malloc(len);
-    if (!CryptEnumProvidersA(idx, NULL, 0, ptype, name, &len)) {
+    if (sizeof(TCHAR) != sizeof(char))
+        name = alloca(len);
+    else
+        name = OPENSSL_malloc(len);
+    if (!CryptEnumProviders(idx, NULL, 0, ptype, name, &len)) {
         err = GetLastError();
         if (err == ERROR_NO_MORE_ITEMS)
             return 2;
@@ -1074,8 +1082,11 @@ static int capi_get_provname(CAPI_CTX * ctx, LPSTR * pname, DWORD * ptype,
         capi_adderror(err);
         return 0;
     }
-    *pname = name;
-    CAPI_trace(ctx, "capi_get_provname, returned name=%s, type=%d\n", name,
+    if (sizeof(TCHAR) != sizeof(char))
+        *pname = wide_to_asc((WCHAR *)name);
+    else
+        *pname = (char *)name;
+    CAPI_trace(ctx, "capi_get_provname, returned name=%s, type=%d\n", *pname,
                *ptype);
 
     return 1;
@@ -1106,10 +1117,26 @@ static int capi_list_containers(CAPI_CTX * ctx, BIO *out)
     HCRYPTPROV hprov;
     DWORD err, idx, flags, buflen = 0, clen;
     LPSTR cname;
+    LPTSTR cspname = NULL;
+
     CAPI_trace(ctx, "Listing containers CSP=%s, type = %d\n", ctx->cspname,
                ctx->csptype);
-    if (!CryptAcquireContextA
-        (&hprov, NULL, ctx->cspname, ctx->csptype, CRYPT_VERIFYCONTEXT)) {
+    if (ctx->cspname && sizeof(TCHAR) != sizeof(char)) {
+        if ((clen =
+             MultiByteToWideChar(CP_ACP, 0, ctx->cspname, -1, NULL, 0))) {
+            cspname = alloca(clen * sizeof(WCHAR));
+            MultiByteToWideChar(CP_ACP, 0, ctx->cspname, -1, (WCHAR *)cspname,
+                                clen);
+        }
+        if (!cspname) {
+            CAPIerr(CAPI_F_CAPI_LIST_CONTAINERS, ERR_R_MALLOC_FAILURE);
+            capi_addlasterror();
+            return 0;
+        }
+    } else
+        cspname = (TCHAR *)ctx->cspname;
+    if (!CryptAcquireContext
+        (&hprov, NULL, cspname, ctx->csptype, CRYPT_VERIFYCONTEXT)) {
         CAPIerr(CAPI_F_CAPI_LIST_CONTAINERS,
                 CAPI_R_CRYPTACQUIRECONTEXT_ERROR);
         capi_addlasterror();
@@ -1139,7 +1166,8 @@ static int capi_list_containers(CAPI_CTX * ctx, BIO *out)
             flags = CRYPT_FIRST;
         else
             flags = 0;
-        if (!CryptGetProvParam(hprov, PP_ENUMCONTAINERS, cname, &clen, flags)) {
+        if (!CryptGetProvParam
+            (hprov, PP_ENUMCONTAINERS, (BYTE *) cname, &clen, flags)) {
             err = GetLastError();
             if (err == ERROR_NO_MORE_ITEMS)
                 goto done;
@@ -1326,7 +1354,6 @@ int capi_list_certs(CAPI_CTX * ctx, BIO *out, char *id)
         CertFreeCertificateContext(cert);
     } else {
         for (idx = 0;; idx++) {
-            LPWSTR fname = NULL;
             cert = CertEnumCertificatesInStore(hstore, cert);
             if (!cert)
                 break;
@@ -1371,18 +1398,30 @@ static PCCERT_CONTEXT capi_find_cert(CAPI_CTX * ctx, const char *id,
     }
 }
 
-static CAPI_KEY *capi_get_key(CAPI_CTX * ctx, const char *contname,
-                              char *provname, DWORD ptype, DWORD keyspec)
+static CAPI_KEY *capi_get_key(CAPI_CTX * ctx, const TCHAR *contname,
+                              TCHAR *provname, DWORD ptype, DWORD keyspec)
 {
     CAPI_KEY *key;
     DWORD dwFlags = 0;
     key = OPENSSL_malloc(sizeof(CAPI_KEY));
-    CAPI_trace(ctx, "capi_get_key, contname=%s, provname=%s, type=%d\n",
-               contname, provname, ptype);
+    if (sizeof(TCHAR) == sizeof(char))
+        CAPI_trace(ctx, "capi_get_key, contname=%s, provname=%s, type=%d\n",
+                   contname, provname, ptype);
+    else if (ctx && ctx->debug_level >= CAPI_DBG_TRACE && ctx->debug_file) {
+        /* above 'if' is optimization to minimize malloc-ations */
+        char *_contname = wide_to_asc((WCHAR *)contname);
+        char *_provname = wide_to_asc((WCHAR *)provname);
+
+        CAPI_trace(ctx, "capi_get_key, contname=%s, provname=%s, type=%d\n",
+                   _contname, _provname, ptype);
+        if (_provname)
+            OPENSSL_free(_provname);
+        if (_contname)
+            OPENSSL_free(_contname);
+    }
     if (ctx->store_flags & CERT_SYSTEM_STORE_LOCAL_MACHINE)
         dwFlags = CRYPT_MACHINE_KEYSET;
-    if (!CryptAcquireContextA
-        (&key->hprov, contname, provname, ptype, dwFlags)) {
+    if (!CryptAcquireContext(&key->hprov, contname, provname, ptype, dwFlags)) {
         CAPIerr(CAPI_F_CAPI_GET_KEY, CAPI_R_CRYPTACQUIRECONTEXT_ERROR);
         capi_addlasterror();
         goto err;
@@ -1410,12 +1449,18 @@ static CAPI_KEY *capi_get_cert_key(CAPI_CTX * ctx, PCCERT_CONTEXT cert)
     pinfo = capi_get_prov_info(ctx, cert);
     if (!pinfo)
         goto err;
-    provname = wide_to_asc(pinfo->pwszProvName);
-    contname = wide_to_asc(pinfo->pwszContainerName);
-    if (!provname || !contname)
-        goto err;
-    key = capi_get_key(ctx, contname, provname,
-                       pinfo->dwProvType, pinfo->dwKeySpec);
+    if (sizeof(TCHAR) != sizeof(char))
+        key = capi_get_key(ctx, (TCHAR *)pinfo->pwszContainerName,
+                           (TCHAR *)pinfo->pwszProvName,
+                           pinfo->dwProvType, pinfo->dwKeySpec);
+    else {
+        provname = wide_to_asc(pinfo->pwszProvName);
+        contname = wide_to_asc(pinfo->pwszContainerName);
+        if (!provname || !contname)
+            goto err;
+        key = capi_get_key(ctx, (TCHAR *)contname, (TCHAR *)provname,
+                           pinfo->dwProvType, pinfo->dwKeySpec);
+    }
 
  err:
     if (pinfo)
@@ -1447,7 +1492,29 @@ CAPI_KEY *capi_find_key(CAPI_CTX * ctx, const char *id)
         break;
 
     case CAPI_LU_CONTNAME:
-        key = capi_get_key(ctx, id, ctx->cspname, ctx->csptype, ctx->keytype);
+        if (sizeof(TCHAR) != sizeof(char)) {
+            WCHAR *contname, *provname;
+            DWORD len;
+
+            if ((len = MultiByteToWideChar(CP_ACP, 0, id, -1, NULL, 0)) &&
+                (contname = alloca(len * sizeof(WCHAR)),
+                 MultiByteToWideChar(CP_ACP, 0, id, -1, contname, len)) &&
+                (len =
+                 MultiByteToWideChar(CP_ACP, 0, ctx->cspname, -1, NULL, 0))
+                && (provname =
+                    alloca(len * sizeof(WCHAR)), MultiByteToWideChar(CP_ACP,
+                                                                     0,
+                                                                     ctx->cspname,
+                                                                     -1,
+                                                                     provname,
+                                                                     len)))
+                key =
+                    capi_get_key(ctx, (TCHAR *)contname, (TCHAR *)provname,
+                                 ctx->csptype, ctx->keytype);
+        } else
+            key = capi_get_key(ctx, (TCHAR *)id,
+                               (TCHAR *)ctx->cspname,
+                               ctx->csptype, ctx->keytype);
         break;
     }
 
@@ -1512,8 +1579,19 @@ static int capi_ctx_set_provname(CAPI_CTX * ctx, LPSTR pname, DWORD type,
     CAPI_trace(ctx, "capi_ctx_set_provname, name=%s, type=%d\n", pname, type);
     if (check) {
         HCRYPTPROV hprov;
-        if (!CryptAcquireContextA(&hprov, NULL, pname, type,
-                                  CRYPT_VERIFYCONTEXT)) {
+        LPTSTR name = NULL;
+
+        if (sizeof(TCHAR) != sizeof(char)) {
+            DWORD len;
+            if ((len = MultiByteToWideChar(CP_ACP, 0, pname, -1, NULL, 0))) {
+                name = alloca(len * sizeof(WCHAR));
+                MultiByteToWideChar(CP_ACP, 0, pname, -1, (WCHAR *)name, len);
+            }
+        } else
+            name = (TCHAR *)pname;
+
+        if (!name || !CryptAcquireContext(&hprov, NULL, name, type,
+                                          CRYPT_VERIFYCONTEXT)) {
             CAPIerr(CAPI_F_CAPI_CTX_SET_PROVNAME,
                     CAPI_R_CRYPTACQUIRECONTEXT_ERROR);
             capi_addlasterror();
index 4838e38..fd8bca9 100644 (file)
@@ -759,7 +759,7 @@ $ IF F$TYPE(USER_CCFLAGS) .NES. "" THEN CCEXTRAFLAGS = USER_CCFLAGS
 $ CCDISABLEWARNINGS = "" !!! "MAYLOSEDATA3" !!! "LONGLONGTYPE,LONGLONGSUFX"
 $ IF F$TYPE(USER_CCDISABLEWARNINGS) .NES. ""
 $ THEN
-$     IF CCDISABLEWARNINGS .NES. "" THEN CCDISABLEWARNINGS = CCDISABLEWARNINGS + ","
+$     IF CCDISABLEWARNINGS .NES. THEN CCDISABLEWARNINGS = CCDISABLEWARNINGS + ","
 $     CCDISABLEWARNINGS = CCDISABLEWARNINGS + USER_CCDISABLEWARNINGS
 $ ENDIF
 $!
index 9fc7b89..fa06396 100644 (file)
@@ -214,12 +214,12 @@ typedef struct {
     int (*mutex_acquire) (HWCryptoHook_Mutex *);
     void (*mutex_release) (HWCryptoHook_Mutex *);
     void (*mutex_destroy) (HWCryptoHook_Mutex *);
-   /*-
-    * For greater efficiency, can use condition vars internally for
-    * synchronisation.  In this case maxsimultaneous is ignored, but
-    * the other mutex stuff must be available.  In singlethreaded
-    * programs, set everything to 0.
-    */
+    /*-
+     * For greater efficiency, can use condition vars internally for
+     * synchronisation.  In this case maxsimultaneous is ignored, but
+     * the other mutex stuff must be available.  In singlethreaded
+     * programs, set everything to 0.
+     */
     size_t condvarsize;
     int (*condvar_init) (HWCryptoHook_CondVar *,
                          HWCryptoHook_CallerContext * cactx);
@@ -227,103 +227,103 @@ typedef struct {
     void (*condvar_signal) (HWCryptoHook_CondVar *);
     void (*condvar_broadcast) (HWCryptoHook_CondVar *);
     void (*condvar_destroy) (HWCryptoHook_CondVar *);
-   /*-
-    * The semantics of acquiring and releasing mutexes and broadcasting
-    * and waiting on condition variables are expected to be those from
-    * POSIX threads (pthreads).  The mutexes may be (in pthread-speak)
-    * fast mutexes, recursive mutexes, or nonrecursive ones.
-    *
-    * The _release/_signal/_broadcast and _destroy functions must
-    * always succeed when given a valid argument; if they are given an
-    * invalid argument then the program (crypto plugin + application)
-    * has an internal error, and they should abort the program.
-    */
+    /*-
+     * The semantics of acquiring and releasing mutexes and broadcasting
+     * and waiting on condition variables are expected to be those from
+     * POSIX threads (pthreads).  The mutexes may be (in pthread-speak)
+     * fast mutexes, recursive mutexes, or nonrecursive ones.
+     *
+     * The _release/_signal/_broadcast and _destroy functions must
+     * always succeed when given a valid argument; if they are given an
+     * invalid argument then the program (crypto plugin + application)
+     * has an internal error, and they should abort the program.
+     */
     int (*getpassphrase) (const char *prompt_info,
                           int *len_io, char *buf,
                           HWCryptoHook_PassphraseContext * ppctx,
                           HWCryptoHook_CallerContext * cactx);
-   /*-
-    * Passphrases and the prompt_info, if they contain high-bit-set
-    * characters, are UTF-8.  The prompt_info may be a null pointer if
-    * no prompt information is available (it should not be an empty
-    * string).  It will not contain text like `enter passphrase';
-    * instead it might say something like `Operator Card for John
-    * Smith' or `SmartCard in nFast Module #1, Slot #1'.
-    *
-    * buf points to a buffer in which to return the passphrase; on
-    * entry *len_io is the length of the buffer.  It should be updated
-    * by the callback.  The returned passphrase should not be
-    * null-terminated by the callback.
-    */
+    /*-
+     * Passphrases and the prompt_info, if they contain high-bit-set
+     * characters, are UTF-8.  The prompt_info may be a null pointer if
+     * no prompt information is available (it should not be an empty
+     * string).  It will not contain text like `enter passphrase';
+     * instead it might say something like `Operator Card for John
+     * Smith' or `SmartCard in nFast Module #1, Slot #1'.
+     *
+     * buf points to a buffer in which to return the passphrase; on
+     * entry *len_io is the length of the buffer.  It should be updated
+     * by the callback.  The returned passphrase should not be
+     * null-terminated by the callback.
+     */
     int (*getphystoken) (const char *prompt_info,
                          const char *wrong_info,
                          HWCryptoHook_PassphraseContext * ppctx,
                          HWCryptoHook_CallerContext * cactx);
-   /*-
-    * Requests that the human user physically insert a different
-    * smartcard, DataKey, etc.  The plugin should check whether the
-    * currently inserted token(s) are appropriate, and if they are it
-    * should not make this call.
-    *
-    * prompt_info is as before.  wrong_info is a description of the
-    * currently inserted token(s) so that the user is told what
-    * something is.  wrong_info, like prompt_info, may be null, but
-    * should not be an empty string.  Its contents should be
-    * syntactically similar to that of prompt_info.
-    */
-   /*-
-    * Note that a single LoadKey operation might cause several calls to
-    * getpassphrase and/or requestphystoken.  If requestphystoken is
-    * not provided (ie, a null pointer is passed) then the plugin may
-    * not support loading keys for which authorisation by several cards
-    * is required.  If getpassphrase is not provided then cards with
-    * passphrases may not be supported.
-    *
-    * getpassphrase and getphystoken do not need to check that the
-    * passphrase has been entered correctly or the correct token
-    * inserted; the crypto plugin will do that.  If this is not the
-    * case then the crypto plugin is responsible for calling these
-    * routines again as appropriate until the correct token(s) and
-    * passphrase(s) are supplied as required, or until any retry limits
-    * implemented by the crypto plugin are reached.
-    *
-    * In either case, the application must allow the user to say `no'
-    * or `cancel' to indicate that they do not know the passphrase or
-    * have the appropriate token; this should cause the callback to
-    * return nonzero indicating error.
-    */
+    /*-
+     * Requests that the human user physically insert a different
+     * smartcard, DataKey, etc.  The plugin should check whether the
+     * currently inserted token(s) are appropriate, and if they are it
+     * should not make this call.
+     *
+     * prompt_info is as before.  wrong_info is a description of the
+     * currently inserted token(s) so that the user is told what
+     * something is.  wrong_info, like prompt_info, may be null, but
+     * should not be an empty string.  Its contents should be
+     * syntactically similar to that of prompt_info.
+     */
+    /*-
+     * Note that a single LoadKey operation might cause several calls to
+     * getpassphrase and/or requestphystoken.  If requestphystoken is
+     * not provided (ie, a null pointer is passed) then the plugin may
+     * not support loading keys for which authorisation by several cards
+     * is required.  If getpassphrase is not provided then cards with
+     * passphrases may not be supported.
+     *
+     * getpassphrase and getphystoken do not need to check that the
+     * passphrase has been entered correctly or the correct token
+     * inserted; the crypto plugin will do that.  If this is not the
+     * case then the crypto plugin is responsible for calling these
+     * routines again as appropriate until the correct token(s) and
+     * passphrase(s) are supplied as required, or until any retry limits
+     * implemented by the crypto plugin are reached.
+     *
+     * In either case, the application must allow the user to say `no'
+     * or `cancel' to indicate that they do not know the passphrase or
+     * have the appropriate token; this should cause the callback to
+     * return nonzero indicating error.
+     */
     void (*logmessage) (void *logstream, const char *message);
-   /*-
-    * A log message will be generated at least every time something goes
-    * wrong and an ErrMsgBuf is filled in (or would be if one was
-    * provided).  Other diagnostic information may be written there too,
-    * including more detailed reasons for errors which are reported in an
-    * ErrMsgBuf.
-    *
-    * When a log message is generated, this callback is called.  It
-    * should write a message to the relevant logging arrangements.
-    *
-    * The message string passed will be null-terminated and may be of arbitrary
-    * length.  It will not be prefixed by the time and date, nor by the
-    * name of the library that is generating it - if this is required,
-    * the logmessage callback must do it.  The message will not have a
-    * trailing newline (though it may contain internal newlines).
-    *
-    * If a null pointer is passed for logmessage a default function is
-    * used.  The default function treats logstream as a FILE* which has
-    * been converted to a void*.  If logstream is 0 it does nothing.
-    * Otherwise it prepends the date and time and library name and
-    * writes the message to logstream.  Each line will be prefixed by a
-    * descriptive string containing the date, time and identity of the
-    * crypto plugin.  Errors on the logstream are not reported
-    * anywhere, and the default function doesn't flush the stream, so
-    * the application must set the buffering how it wants it.
-    *
-    * The crypto plugin may also provide a facility to have copies of
-    * log messages sent elsewhere, and or for adjusting the verbosity
-    * of the log messages; any such facilities will be configured by
-    * external means.
-    */
+    /*-
+     * A log message will be generated at least every time something goes
+     * wrong and an ErrMsgBuf is filled in (or would be if one was
+     * provided).  Other diagnostic information may be written there too,
+     * including more detailed reasons for errors which are reported in an
+     * ErrMsgBuf.
+     *
+     * When a log message is generated, this callback is called.  It
+     * should write a message to the relevant logging arrangements.
+     *
+     * The message string passed will be null-terminated and may be of arbitrary
+     * length.  It will not be prefixed by the time and date, nor by the
+     * name of the library that is generating it - if this is required,
+     * the logmessage callback must do it.  The message will not have a
+     * trailing newline (though it may contain internal newlines).
+     *
+     * If a null pointer is passed for logmessage a default function is
+     * used.  The default function treats logstream as a FILE* which has
+     * been converted to a void*.  If logstream is 0 it does nothing.
+     * Otherwise it prepends the date and time and library name and
+     * writes the message to logstream.  Each line will be prefixed by a
+     * descriptive string containing the date, time and identity of the
+     * crypto plugin.  Errors on the logstream are not reported
+     * anywhere, and the default function doesn't flush the stream, so
+     * the application must set the buffering how it wants it.
+     *
+     * The crypto plugin may also provide a facility to have copies of
+     * log messages sent elsewhere, and or for adjusting the verbosity
+     * of the log messages; any such facilities will be configured by
+     * external means.
+     */
 } HWCryptoHook_InitInfo;
 
 typedef
index 01ada3a..f6b3ff2 100755 (executable)
@@ -242,7 +242,7 @@ $ WRITE H_FILE ""
 $ WRITE H_FILE "#ifndef OPENSSL_SYS_VMS"
 $ WRITE H_FILE "# define OPENSSL_SYS_VMS"
 $ WRITE H_FILE "#endif"
-$
+$!
 $! One of the best way to figure out what the list should be is to do
 $! the following on a Unix system:
 $!   grep OPENSSL_NO_ crypto/*/*.h ssl/*.h engines/*.h engines/*/*.h|grep ':# *if'|sed -e 's/^.*def //'|sort|uniq
@@ -274,6 +274,7 @@ $ CONFIG_LOGICALS := AES,-
                     GMP,-
                     GOST,-
                     HASH_COMP,-
+                    HEARTBEATS,-
                     HMAC,-
                     IDEA,-
                     JPAKE,-
@@ -292,6 +293,7 @@ $ CONFIG_LOGICALS := AES,-
                     RFC3779,-
                     RIPEMD,-
                     RSA,-
+                    SCTP,-
                     SEED,-
                     SHA,-
                     SHA0,-
@@ -302,6 +304,7 @@ $ CONFIG_LOGICALS := AES,-
                     SRP,-
                     SSL2,-
                     SSL_INTERN,-
+                    SSL_TRACE,-
                     STACK,-
                     STATIC_ENGINE,-
                     STDIO,-
@@ -346,7 +349,8 @@ $ CONFIG_DISABLE_RULES := RIJNDAEL/AES;-
                          /MD2;-
                          /RC5;-
                          /RFC3779;-
-                         /SCTP
+                         /SCTP;-
+                         /SSL_TRACE
 $ CONFIG_ENABLE_RULES := ZLIB_DYNAMIC/ZLIB;-
                         /THREADS
 $
@@ -512,6 +516,7 @@ $ WRITE H_FILE "#define OPENSSL_NO_SETVBUF_IONBF"
 $ WRITE H_FILE "/* STCP support comes with TCPIP 5.7 ECO 2 "
 $ WRITE H_FILE " * enable on newer systems / 2012-02-24 arpadffy */"
 $ WRITE H_FILE "#define OPENSSL_NO_SCTP"
+$ WRITE H_FILE "#define OPENSSL_NO_LIBUNBOUND"
 $ WRITE H_FILE ""
 $!
 $! Add in the common "crypto/opensslconf.h.in".
@@ -825,7 +830,7 @@ $ @CRYPTO-LIB LIBRARY 'DEBUGGER' "''COMPILER'" "''TCPIP_TYPE'" -
    "''ISSEVEN'" "''BUILDPART'" "''POINTER_SIZE'" "''ZLIB'"
 $!
 $! Build The [.xxx.EXE.CRYPTO]*.EXE Test Applications.
-$!  
+$!
 $!!! DISABLED, as these test programs lack any support
 $!!!$ @CRYPTO-LIB APPS 'DEBUGGER' "''COMPILER'" "''TCPIP_TYPE'" -
 $!!!   "''ISSEVEN'" "''BUILDPART'" "''POINTER_SIZE'" "''ZLIB'"
@@ -1017,9 +1022,9 @@ $!
 $!    Tell The User We Don't Know What They Want.
 $!
 $     WRITE SYS$OUTPUT ""
-$     WRITE SYS$OUTPUT "USAGE:   @MAKEVMS.COM [Target] [Pointer size] [Debug option] <Compiler>"
+$     WRITE SYS$OUTPUT "USAGE:   @MAKEVMS.COM [Target] [Pointer size] [Debug option] <Compiler> <TCP/IP library>"
 $     WRITE SYS$OUTPUT ""
-$     WRITE SYS$OUTPUT "Example: @MAKEVMS.COM ALL """" NODEBUG "
+$     WRITE SYS$OUTPUT "Example: @MAKEVMS.COM ALL """" NODEBUG DECC TCPIP"
 $     WRITE SYS$OUTPUT ""
 $     WRITE SYS$OUTPUT "The Target ",P1," Is Invalid.  The Valid Target Options Are:"
 $     WRITE SYS$OUTPUT ""
index 67a6074..b721d65 100644 (file)
@@ -6,8 +6,7 @@ Release: 1
 
 Summary: Secure Sockets Layer and cryptography libraries and tools
 Name: openssl
-#Version: %{libmaj}.%{libmin}.%{librel}
-Version: 1.0.1p
+Version: 1.0.2d
 Source0: ftp://ftp.openssl.org/source/%{name}-%{version}.tar.gz
 License: OpenSSL
 Group: System Environment/Libraries
index 29d9e45..42f1af5 100644 (file)
@@ -24,24 +24,24 @@ LIBSRC=     \
        s2_meth.c   s2_srvr.c s2_clnt.c  s2_lib.c  s2_enc.c s2_pkt.c \
        s3_meth.c   s3_srvr.c s3_clnt.c  s3_lib.c  s3_enc.c s3_pkt.c s3_both.c s3_cbc.c \
        s23_meth.c s23_srvr.c s23_clnt.c s23_lib.c          s23_pkt.c \
-       t1_meth.c   t1_srvr.c t1_clnt.c  t1_lib.c  t1_enc.c \
+       t1_meth.c   t1_srvr.c t1_clnt.c  t1_lib.c  t1_enc.c t1_ext.c \
        d1_meth.c   d1_srvr.c d1_clnt.c  d1_lib.c  d1_pkt.c \
-       d1_both.c d1_enc.c d1_srtp.c \
+       d1_both.c d1_srtp.c \
        ssl_lib.c ssl_err2.c ssl_cert.c ssl_sess.c \
        ssl_ciph.c ssl_stat.c ssl_rsa.c \
-       ssl_asn1.c ssl_txt.c ssl_algs.c \
-       bio_ssl.c ssl_err.c kssl.c tls_srp.c t1_reneg.c ssl_utst.c
+       ssl_asn1.c ssl_txt.c ssl_algs.c ssl_conf.c \
+       bio_ssl.c ssl_err.c kssl.c t1_reneg.c tls_srp.c t1_trce.c ssl_utst.c
 LIBOBJ= \
        s2_meth.o  s2_srvr.o  s2_clnt.o  s2_lib.o  s2_enc.o s2_pkt.o \
        s3_meth.o  s3_srvr.o  s3_clnt.o  s3_lib.o  s3_enc.o s3_pkt.o s3_both.o s3_cbc.o \
        s23_meth.o s23_srvr.o s23_clnt.o s23_lib.o          s23_pkt.o \
-       t1_meth.o   t1_srvr.o t1_clnt.o  t1_lib.o  t1_enc.o \
+       t1_meth.o   t1_srvr.o t1_clnt.o  t1_lib.o  t1_enc.o t1_ext.o \
        d1_meth.o   d1_srvr.o d1_clnt.o  d1_lib.o  d1_pkt.o \
-       d1_both.o d1_enc.o d1_srtp.o\
+       d1_both.o d1_srtp.o\
        ssl_lib.o ssl_err2.o ssl_cert.o ssl_sess.o \
        ssl_ciph.o ssl_stat.o ssl_rsa.o \
-       ssl_asn1.o ssl_txt.o ssl_algs.o \
-       bio_ssl.o ssl_err.o kssl.o tls_srp.o t1_reneg.o ssl_utst.o
+       ssl_asn1.o ssl_txt.o ssl_algs.o ssl_conf.o \
+       bio_ssl.o ssl_err.o kssl.o t1_reneg.o tls_srp.o t1_trce.o ssl_utst.o
 
 SRC= $(LIBSRC)
 
@@ -166,27 +166,6 @@ d1_clnt.o: ../include/openssl/ssl3.h ../include/openssl/stack.h
 d1_clnt.o: ../include/openssl/symhacks.h ../include/openssl/tls1.h
 d1_clnt.o: ../include/openssl/x509.h ../include/openssl/x509_vfy.h d1_clnt.c
 d1_clnt.o: kssl_lcl.h ssl_locl.h
-d1_enc.o: ../e_os.h ../include/openssl/asn1.h ../include/openssl/bio.h
-d1_enc.o: ../include/openssl/buffer.h ../include/openssl/comp.h
-d1_enc.o: ../include/openssl/crypto.h ../include/openssl/dsa.h
-d1_enc.o: ../include/openssl/dtls1.h ../include/openssl/e_os2.h
-d1_enc.o: ../include/openssl/ec.h ../include/openssl/ecdh.h
-d1_enc.o: ../include/openssl/ecdsa.h ../include/openssl/err.h
-d1_enc.o: ../include/openssl/evp.h ../include/openssl/hmac.h
-d1_enc.o: ../include/openssl/kssl.h ../include/openssl/lhash.h
-d1_enc.o: ../include/openssl/md5.h ../include/openssl/obj_mac.h
-d1_enc.o: ../include/openssl/objects.h ../include/openssl/opensslconf.h
-d1_enc.o: ../include/openssl/opensslv.h ../include/openssl/ossl_typ.h
-d1_enc.o: ../include/openssl/pem.h ../include/openssl/pem2.h
-d1_enc.o: ../include/openssl/pkcs7.h ../include/openssl/pqueue.h
-d1_enc.o: ../include/openssl/rand.h ../include/openssl/rsa.h
-d1_enc.o: ../include/openssl/safestack.h ../include/openssl/sha.h
-d1_enc.o: ../include/openssl/srtp.h ../include/openssl/ssl.h
-d1_enc.o: ../include/openssl/ssl2.h ../include/openssl/ssl23.h
-d1_enc.o: ../include/openssl/ssl3.h ../include/openssl/stack.h
-d1_enc.o: ../include/openssl/symhacks.h ../include/openssl/tls1.h
-d1_enc.o: ../include/openssl/x509.h ../include/openssl/x509_vfy.h d1_enc.c
-d1_enc.o: ssl_locl.h
 d1_lib.o: ../e_os.h ../include/openssl/asn1.h ../include/openssl/bio.h
 d1_lib.o: ../include/openssl/buffer.h ../include/openssl/comp.h
 d1_lib.o: ../include/openssl/crypto.h ../include/openssl/dsa.h
@@ -612,8 +591,7 @@ s3_enc.o: ../include/openssl/ssl23.h ../include/openssl/ssl3.h
 s3_enc.o: ../include/openssl/stack.h ../include/openssl/symhacks.h
 s3_enc.o: ../include/openssl/tls1.h ../include/openssl/x509.h
 s3_enc.o: ../include/openssl/x509_vfy.h s3_enc.c ssl_locl.h
-s3_lib.o: ../crypto/ec/ec_lcl.h ../e_os.h ../include/openssl/asn1.h
-s3_lib.o: ../include/openssl/bio.h ../include/openssl/bn.h
+s3_lib.o: ../e_os.h ../include/openssl/asn1.h ../include/openssl/bio.h
 s3_lib.o: ../include/openssl/buffer.h ../include/openssl/comp.h
 s3_lib.o: ../include/openssl/crypto.h ../include/openssl/dh.h
 s3_lib.o: ../include/openssl/dsa.h ../include/openssl/dtls1.h
@@ -779,6 +757,27 @@ ssl_ciph.o: ../include/openssl/ssl23.h ../include/openssl/ssl3.h
 ssl_ciph.o: ../include/openssl/stack.h ../include/openssl/symhacks.h
 ssl_ciph.o: ../include/openssl/tls1.h ../include/openssl/x509.h
 ssl_ciph.o: ../include/openssl/x509_vfy.h ssl_ciph.c ssl_locl.h
+ssl_conf.o: ../e_os.h ../include/openssl/asn1.h ../include/openssl/bio.h
+ssl_conf.o: ../include/openssl/buffer.h ../include/openssl/comp.h
+ssl_conf.o: ../include/openssl/conf.h ../include/openssl/crypto.h
+ssl_conf.o: ../include/openssl/dh.h ../include/openssl/dsa.h
+ssl_conf.o: ../include/openssl/dtls1.h ../include/openssl/e_os2.h
+ssl_conf.o: ../include/openssl/ec.h ../include/openssl/ecdh.h
+ssl_conf.o: ../include/openssl/ecdsa.h ../include/openssl/err.h
+ssl_conf.o: ../include/openssl/evp.h ../include/openssl/hmac.h
+ssl_conf.o: ../include/openssl/kssl.h ../include/openssl/lhash.h
+ssl_conf.o: ../include/openssl/obj_mac.h ../include/openssl/objects.h
+ssl_conf.o: ../include/openssl/opensslconf.h ../include/openssl/opensslv.h
+ssl_conf.o: ../include/openssl/ossl_typ.h ../include/openssl/pem.h
+ssl_conf.o: ../include/openssl/pem2.h ../include/openssl/pkcs7.h
+ssl_conf.o: ../include/openssl/pqueue.h ../include/openssl/rsa.h
+ssl_conf.o: ../include/openssl/safestack.h ../include/openssl/sha.h
+ssl_conf.o: ../include/openssl/srtp.h ../include/openssl/ssl.h
+ssl_conf.o: ../include/openssl/ssl2.h ../include/openssl/ssl23.h
+ssl_conf.o: ../include/openssl/ssl3.h ../include/openssl/stack.h
+ssl_conf.o: ../include/openssl/symhacks.h ../include/openssl/tls1.h
+ssl_conf.o: ../include/openssl/x509.h ../include/openssl/x509_vfy.h ssl_conf.c
+ssl_conf.o: ssl_locl.h
 ssl_err.o: ../include/openssl/asn1.h ../include/openssl/bio.h
 ssl_err.o: ../include/openssl/buffer.h ../include/openssl/comp.h
 ssl_err.o: ../include/openssl/crypto.h ../include/openssl/dtls1.h
@@ -980,6 +979,26 @@ t1_enc.o: ../include/openssl/ssl3.h ../include/openssl/stack.h
 t1_enc.o: ../include/openssl/symhacks.h ../include/openssl/tls1.h
 t1_enc.o: ../include/openssl/x509.h ../include/openssl/x509_vfy.h ssl_locl.h
 t1_enc.o: t1_enc.c
+t1_ext.o: ../e_os.h ../include/openssl/asn1.h ../include/openssl/bio.h
+t1_ext.o: ../include/openssl/buffer.h ../include/openssl/comp.h
+t1_ext.o: ../include/openssl/crypto.h ../include/openssl/dsa.h
+t1_ext.o: ../include/openssl/dtls1.h ../include/openssl/e_os2.h
+t1_ext.o: ../include/openssl/ec.h ../include/openssl/ecdh.h
+t1_ext.o: ../include/openssl/ecdsa.h ../include/openssl/err.h
+t1_ext.o: ../include/openssl/evp.h ../include/openssl/hmac.h
+t1_ext.o: ../include/openssl/kssl.h ../include/openssl/lhash.h
+t1_ext.o: ../include/openssl/obj_mac.h ../include/openssl/objects.h
+t1_ext.o: ../include/openssl/opensslconf.h ../include/openssl/opensslv.h
+t1_ext.o: ../include/openssl/ossl_typ.h ../include/openssl/pem.h
+t1_ext.o: ../include/openssl/pem2.h ../include/openssl/pkcs7.h
+t1_ext.o: ../include/openssl/pqueue.h ../include/openssl/rsa.h
+t1_ext.o: ../include/openssl/safestack.h ../include/openssl/sha.h
+t1_ext.o: ../include/openssl/srtp.h ../include/openssl/ssl.h
+t1_ext.o: ../include/openssl/ssl2.h ../include/openssl/ssl23.h
+t1_ext.o: ../include/openssl/ssl3.h ../include/openssl/stack.h
+t1_ext.o: ../include/openssl/symhacks.h ../include/openssl/tls1.h
+t1_ext.o: ../include/openssl/x509.h ../include/openssl/x509_vfy.h ssl_locl.h
+t1_ext.o: t1_ext.c
 t1_lib.o: ../e_os.h ../include/openssl/asn1.h ../include/openssl/bio.h
 t1_lib.o: ../include/openssl/buffer.h ../include/openssl/comp.h
 t1_lib.o: ../include/openssl/conf.h ../include/openssl/crypto.h
@@ -1062,6 +1081,26 @@ t1_srvr.o: ../include/openssl/ssl23.h ../include/openssl/ssl3.h
 t1_srvr.o: ../include/openssl/stack.h ../include/openssl/symhacks.h
 t1_srvr.o: ../include/openssl/tls1.h ../include/openssl/x509.h
 t1_srvr.o: ../include/openssl/x509_vfy.h ssl_locl.h t1_srvr.c
+t1_trce.o: ../e_os.h ../include/openssl/asn1.h ../include/openssl/bio.h
+t1_trce.o: ../include/openssl/buffer.h ../include/openssl/comp.h
+t1_trce.o: ../include/openssl/crypto.h ../include/openssl/dsa.h
+t1_trce.o: ../include/openssl/dtls1.h ../include/openssl/e_os2.h
+t1_trce.o: ../include/openssl/ec.h ../include/openssl/ecdh.h
+t1_trce.o: ../include/openssl/ecdsa.h ../include/openssl/err.h
+t1_trce.o: ../include/openssl/evp.h ../include/openssl/hmac.h
+t1_trce.o: ../include/openssl/kssl.h ../include/openssl/lhash.h
+t1_trce.o: ../include/openssl/obj_mac.h ../include/openssl/objects.h
+t1_trce.o: ../include/openssl/opensslconf.h ../include/openssl/opensslv.h
+t1_trce.o: ../include/openssl/ossl_typ.h ../include/openssl/pem.h
+t1_trce.o: ../include/openssl/pem2.h ../include/openssl/pkcs7.h
+t1_trce.o: ../include/openssl/pqueue.h ../include/openssl/rsa.h
+t1_trce.o: ../include/openssl/safestack.h ../include/openssl/sha.h
+t1_trce.o: ../include/openssl/srtp.h ../include/openssl/ssl.h
+t1_trce.o: ../include/openssl/ssl2.h ../include/openssl/ssl23.h
+t1_trce.o: ../include/openssl/ssl3.h ../include/openssl/stack.h
+t1_trce.o: ../include/openssl/symhacks.h ../include/openssl/tls1.h
+t1_trce.o: ../include/openssl/x509.h ../include/openssl/x509_vfy.h ssl_locl.h
+t1_trce.o: t1_trce.c
 tls_srp.o: ../e_os.h ../include/openssl/asn1.h ../include/openssl/bio.h
 tls_srp.o: ../include/openssl/bn.h ../include/openssl/buffer.h
 tls_srp.o: ../include/openssl/comp.h ../include/openssl/crypto.h
index 8dd8ea3..b4ee7ab 100644 (file)
@@ -279,13 +279,17 @@ int dtls1_do_write(SSL *s, int type)
                        (int)s->d1->w_msg_hdr.msg_len +
                        DTLS1_HM_HEADER_LENGTH);
 
-    if (s->write_hash)
-        mac_size = EVP_MD_CTX_size(s->write_hash);
-    else
+    if (s->write_hash) {
+        if (s->enc_write_ctx
+            && EVP_CIPHER_CTX_mode(s->enc_write_ctx) == EVP_CIPH_GCM_MODE)
+            mac_size = 0;
+        else
+            mac_size = EVP_MD_CTX_size(s->write_hash);
+    } else
         mac_size = 0;
 
     if (s->enc_write_ctx &&
-        (EVP_CIPHER_mode(s->enc_write_ctx->cipher) & EVP_CIPH_CBC_MODE))
+        (EVP_CIPHER_CTX_mode(s->enc_write_ctx) == EVP_CIPH_CBC_MODE))
         blocksize = 2 * EVP_CIPHER_block_size(s->enc_write_ctx->cipher);
     else
         blocksize = 0;
@@ -978,59 +982,6 @@ dtls1_get_message_fragment(SSL *s, int st1, int stn, long max, int *ok)
     return (-1);
 }
 
-int dtls1_send_finished(SSL *s, int a, int b, const char *sender, int slen)
-{
-    unsigned char *p, *d;
-    int i;
-    unsigned long l;
-
-    if (s->state == a) {
-        d = (unsigned char *)s->init_buf->data;
-        p = &(d[DTLS1_HM_HEADER_LENGTH]);
-
-        i = s->method->ssl3_enc->final_finish_mac(s,
-                                                  sender, slen,
-                                                  s->s3->tmp.finish_md);
-        s->s3->tmp.finish_md_len = i;
-        memcpy(p, s->s3->tmp.finish_md, i);
-        p += i;
-        l = i;
-
-        /*
-         * Copy the finished so we can use it for renegotiation checks
-         */
-        if (s->type == SSL_ST_CONNECT) {
-            OPENSSL_assert(i <= EVP_MAX_MD_SIZE);
-            memcpy(s->s3->previous_client_finished, s->s3->tmp.finish_md, i);
-            s->s3->previous_client_finished_len = i;
-        } else {
-            OPENSSL_assert(i <= EVP_MAX_MD_SIZE);
-            memcpy(s->s3->previous_server_finished, s->s3->tmp.finish_md, i);
-            s->s3->previous_server_finished_len = i;
-        }
-
-#ifdef OPENSSL_SYS_WIN16
-        /*
-         * MSVC 1.5 does not clear the top bytes of the word unless I do
-         * this.
-         */
-        l &= 0xffff;
-#endif
-
-        d = dtls1_set_message_header(s, d, SSL3_MT_FINISHED, l, 0, l);
-        s->init_num = (int)l + DTLS1_HM_HEADER_LENGTH;
-        s->init_off = 0;
-
-        /* buffer the message to handle re-xmits */
-        dtls1_buffer_message(s, 0);
-
-        s->state = b;
-    }
-
-    /* SSL3_ST_SEND_xxxxxx_HELLO_B */
-    return (dtls1_do_write(s, SSL3_RT_HANDSHAKE));
-}
-
 /*-
  * for these 2 messages, we need to
  * ssl->enc_read_ctx                    re-init
@@ -1071,77 +1022,6 @@ int dtls1_send_change_cipher_spec(SSL *s, int a, int b)
     return (dtls1_do_write(s, SSL3_RT_CHANGE_CIPHER_SPEC));
 }
 
-static int dtls1_add_cert_to_buf(BUF_MEM *buf, unsigned long *l, X509 *x)
-{
-    int n;
-    unsigned char *p;
-
-    n = i2d_X509(x, NULL);
-    if (!BUF_MEM_grow_clean(buf, (int)(n + (*l) + 3))) {
-        SSLerr(SSL_F_DTLS1_ADD_CERT_TO_BUF, ERR_R_BUF_LIB);
-        return 0;
-    }
-    p = (unsigned char *)&(buf->data[*l]);
-    l2n3(n, p);
-    i2d_X509(x, &p);
-    *l += n + 3;
-
-    return 1;
-}
-
-unsigned long dtls1_output_cert_chain(SSL *s, X509 *x)
-{
-    unsigned char *p;
-    int i;
-    unsigned long l = 3 + DTLS1_HM_HEADER_LENGTH;
-    BUF_MEM *buf;
-
-    /* TLSv1 sends a chain with nothing in it, instead of an alert */
-    buf = s->init_buf;
-    if (!BUF_MEM_grow_clean(buf, 10)) {
-        SSLerr(SSL_F_DTLS1_OUTPUT_CERT_CHAIN, ERR_R_BUF_LIB);
-        return (0);
-    }
-    if (x != NULL) {
-        X509_STORE_CTX xs_ctx;
-
-        if (!X509_STORE_CTX_init(&xs_ctx, s->ctx->cert_store, x, NULL)) {
-            SSLerr(SSL_F_DTLS1_OUTPUT_CERT_CHAIN, ERR_R_X509_LIB);
-            return (0);
-        }
-
-        X509_verify_cert(&xs_ctx);
-        /* Don't leave errors in the queue */
-        ERR_clear_error();
-        for (i = 0; i < sk_X509_num(xs_ctx.chain); i++) {
-            x = sk_X509_value(xs_ctx.chain, i);
-
-            if (!dtls1_add_cert_to_buf(buf, &l, x)) {
-                X509_STORE_CTX_cleanup(&xs_ctx);
-                return 0;
-            }
-        }
-        X509_STORE_CTX_cleanup(&xs_ctx);
-    }
-    /* Thawte special :-) */
-    for (i = 0; i < sk_X509_num(s->ctx->extra_certs); i++) {
-        x = sk_X509_value(s->ctx->extra_certs, i);
-        if (!dtls1_add_cert_to_buf(buf, &l, x))
-            return 0;
-    }
-
-    l -= (3 + DTLS1_HM_HEADER_LENGTH);
-
-    p = (unsigned char *)&(buf->data[DTLS1_HM_HEADER_LENGTH]);
-    l2n3(l, p);
-    l += 3;
-    p = (unsigned char *)&(buf->data[0]);
-    p = dtls1_set_message_header(s, p, SSL3_MT_CERTIFICATE, l, 0, l);
-
-    l += DTLS1_HM_HEADER_LENGTH;
-    return (l);
-}
-
 int dtls1_read_failed(SSL *s, int code)
 {
     if (code > 0) {
@@ -1244,10 +1124,10 @@ int dtls1_buffer_message(SSL *s, int is_ccs)
     memcpy(frag->fragment, s->init_buf->data, s->init_num);
 
     if (is_ccs) {
+        /* For DTLS1_BAD_VER the header length is non-standard */
         OPENSSL_assert(s->d1->w_msg_hdr.msg_len +
-                       ((s->version ==
-                         DTLS1_VERSION) ? DTLS1_CCS_HEADER_LENGTH : 3) ==
-                       (unsigned int)s->init_num);
+                       ((s->version==DTLS1_BAD_VER)?3:DTLS1_CCS_HEADER_LENGTH)
+                       == (unsigned int)s->init_num);
     } else {
         OPENSSL_assert(s->d1->w_msg_hdr.msg_len +
                        DTLS1_HM_HEADER_LENGTH == (unsigned int)s->init_num);
index 377c1e6..4c2ccbf 100644 (file)
@@ -135,13 +135,29 @@ static const SSL_METHOD *dtls1_get_client_method(int ver)
 {
     if (ver == DTLS1_VERSION || ver == DTLS1_BAD_VER)
         return (DTLSv1_client_method());
+    else if (ver == DTLS1_2_VERSION)
+        return (DTLSv1_2_client_method());
     else
         return (NULL);
 }
 
-IMPLEMENT_dtls1_meth_func(DTLSv1_client_method,
+IMPLEMENT_dtls1_meth_func(DTLS1_VERSION,
+                          DTLSv1_client_method,
                           ssl_undefined_function,
-                          dtls1_connect, dtls1_get_client_method)
+                          dtls1_connect,
+                          dtls1_get_client_method, DTLSv1_enc_data)
+
+    IMPLEMENT_dtls1_meth_func(DTLS1_2_VERSION,
+                          DTLSv1_2_client_method,
+                          ssl_undefined_function,
+                          dtls1_connect,
+                          dtls1_get_client_method, DTLSv1_2_enc_data)
+
+    IMPLEMENT_dtls1_meth_func(DTLS_ANY_VERSION,
+                          DTLS_client_method,
+                          ssl_undefined_function,
+                          dtls1_connect,
+                          dtls1_get_client_method, DTLSv1_2_enc_data)
 
 int dtls1_connect(SSL *s)
 {
@@ -307,7 +323,7 @@ int dtls1_connect(SSL *s)
             ssl3_init_finished_mac(s);
 
             dtls1_start_timer(s);
-            ret = dtls1_client_hello(s);
+            ret = ssl3_client_hello(s);
             if (ret <= 0)
                 goto end;
 
@@ -462,7 +478,7 @@ int dtls1_connect(SSL *s)
         case SSL3_ST_CW_CERT_C:
         case SSL3_ST_CW_CERT_D:
             dtls1_start_timer(s);
-            ret = dtls1_send_client_certificate(s);
+            ret = ssl3_send_client_certificate(s);
             if (ret <= 0)
                 goto end;
             s->state = SSL3_ST_CW_KEY_EXCH_A;
@@ -472,7 +488,7 @@ int dtls1_connect(SSL *s)
         case SSL3_ST_CW_KEY_EXCH_A:
         case SSL3_ST_CW_KEY_EXCH_B:
             dtls1_start_timer(s);
-            ret = dtls1_send_client_key_exchange(s);
+            ret = ssl3_send_client_key_exchange(s);
             if (ret <= 0)
                 goto end;
 
@@ -517,7 +533,7 @@ int dtls1_connect(SSL *s)
         case SSL3_ST_CW_CERT_VRFY_A:
         case SSL3_ST_CW_CERT_VRFY_B:
             dtls1_start_timer(s);
-            ret = dtls1_send_client_verify(s);
+            ret = ssl3_send_client_verify(s);
             if (ret <= 0)
                 goto end;
 #ifndef OPENSSL_NO_SCTP
@@ -583,13 +599,13 @@ int dtls1_connect(SSL *s)
         case SSL3_ST_CW_FINISHED_B:
             if (!s->hit)
                 dtls1_start_timer(s);
-            ret = dtls1_send_finished(s,
-                                      SSL3_ST_CW_FINISHED_A,
-                                      SSL3_ST_CW_FINISHED_B,
-                                      s->method->
-                                      ssl3_enc->client_finished_label,
-                                      s->method->
-                                      ssl3_enc->client_finished_label_len);
+            ret = ssl3_send_finished(s,
+                                     SSL3_ST_CW_FINISHED_A,
+                                     SSL3_ST_CW_FINISHED_B,
+                                     s->method->
+                                     ssl3_enc->client_finished_label,
+                                     s->method->
+                                     ssl3_enc->client_finished_label_len);
             if (ret <= 0)
                 goto end;
             s->state = SSL3_ST_CW_FLUSH;
@@ -786,140 +802,18 @@ int dtls1_connect(SSL *s)
     return (ret);
 }
 
-int dtls1_client_hello(SSL *s)
-{
-    unsigned char *buf;
-    unsigned char *p, *d;
-    unsigned int i, j;
-    unsigned long l;
-    SSL_COMP *comp;
-
-    buf = (unsigned char *)s->init_buf->data;
-    if (s->state == SSL3_ST_CW_CLNT_HELLO_A) {
-        SSL_SESSION *sess = s->session;
-        if ((s->session == NULL) || (s->session->ssl_version != s->version) ||
-#ifdef OPENSSL_NO_TLSEXT
-            !sess->session_id_length ||
-#else
-            (!sess->session_id_length && !sess->tlsext_tick) ||
-#endif
-            (s->session->not_resumable)) {
-            if (!ssl_get_new_session(s, 0))
-                goto err;
-        }
-        /* else use the pre-loaded session */
-
-        p = s->s3->client_random;
-
-        /*
-         * if client_random is initialized, reuse it, we are required to use
-         * same upon reply to HelloVerify
-         */
-        for (i = 0; p[i] == '\0' && i < sizeof(s->s3->client_random); i++) ;
-        if (i == sizeof(s->s3->client_random))
-            ssl_fill_hello_random(s, 0, p, sizeof(s->s3->client_random));
-
-        /* Do the message type and length last */
-        d = p = &(buf[DTLS1_HM_HEADER_LENGTH]);
-
-        *(p++) = s->version >> 8;
-        *(p++) = s->version & 0xff;
-        s->client_version = s->version;
-
-        /* Random stuff */
-        memcpy(p, s->s3->client_random, SSL3_RANDOM_SIZE);
-        p += SSL3_RANDOM_SIZE;
-
-        /* Session ID */
-        if (s->new_session)
-            i = 0;
-        else
-            i = s->session->session_id_length;
-        *(p++) = i;
-        if (i != 0) {
-            if (i > sizeof s->session->session_id) {
-                SSLerr(SSL_F_DTLS1_CLIENT_HELLO, ERR_R_INTERNAL_ERROR);
-                goto err;
-            }
-            memcpy(p, s->session->session_id, i);
-            p += i;
-        }
-
-        /* cookie stuff */
-        if (s->d1->cookie_len > sizeof(s->d1->cookie)) {
-            SSLerr(SSL_F_DTLS1_CLIENT_HELLO, ERR_R_INTERNAL_ERROR);
-            goto err;
-        }
-        *(p++) = s->d1->cookie_len;
-        memcpy(p, s->d1->cookie, s->d1->cookie_len);
-        p += s->d1->cookie_len;
-
-        /* Ciphers supported */
-        i = ssl_cipher_list_to_bytes(s, SSL_get_ciphers(s), &(p[2]), 0);
-        if (i == 0) {
-            SSLerr(SSL_F_DTLS1_CLIENT_HELLO, SSL_R_NO_CIPHERS_AVAILABLE);
-            goto err;
-        }
-        s2n(i, p);
-        p += i;
-
-        /* COMPRESSION */
-        if (s->ctx->comp_methods == NULL)
-            j = 0;
-        else
-            j = sk_SSL_COMP_num(s->ctx->comp_methods);
-        *(p++) = 1 + j;
-        for (i = 0; i < j; i++) {
-            comp = sk_SSL_COMP_value(s->ctx->comp_methods, i);
-            *(p++) = comp->id;
-        }
-        *(p++) = 0;             /* Add the NULL method */
-
-#ifndef OPENSSL_NO_TLSEXT
-        /* TLS extensions */
-        if (ssl_prepare_clienthello_tlsext(s) <= 0) {
-            SSLerr(SSL_F_DTLS1_CLIENT_HELLO, SSL_R_CLIENTHELLO_TLSEXT);
-            goto err;
-        }
-        if ((p =
-             ssl_add_clienthello_tlsext(s, p,
-                                        buf + SSL3_RT_MAX_PLAIN_LENGTH)) ==
-            NULL) {
-            SSLerr(SSL_F_DTLS1_CLIENT_HELLO, ERR_R_INTERNAL_ERROR);
-            goto err;
-        }
-#endif
-
-        l = (p - d);
-        d = buf;
-
-        d = dtls1_set_message_header(s, d, SSL3_MT_CLIENT_HELLO, l, 0, l);
-
-        s->state = SSL3_ST_CW_CLNT_HELLO_B;
-        /* number of bytes to write */
-        s->init_num = p - buf;
-        s->init_off = 0;
-
-        /* buffer the message to handle re-xmits */
-        dtls1_buffer_message(s, 0);
-    }
-
-    /* SSL3_ST_CW_CLNT_HELLO_B */
-    return (dtls1_do_write(s, SSL3_RT_HANDSHAKE));
- err:
-    return (-1);
-}
-
 static int dtls1_get_hello_verify(SSL *s)
 {
     int n, al, ok = 0;
     unsigned char *data;
     unsigned int cookie_len;
 
+    s->first_packet = 1;
     n = s->method->ssl_get_message(s,
                                    DTLS1_ST_CR_HELLO_VERIFY_REQUEST_A,
                                    DTLS1_ST_CR_HELLO_VERIFY_REQUEST_B,
                                    -1, s->max_cert_list, &ok);
+    s->first_packet = 0;
 
     if (!ok)
         return ((int)n);
@@ -931,13 +825,16 @@ static int dtls1_get_hello_verify(SSL *s)
     }
 
     data = (unsigned char *)s->init_msg;
-
-    if ((data[0] != (s->version >> 8)) || (data[1] != (s->version & 0xff))) {
+#if 0
+    if (s->method->version != DTLS_ANY_VERSION &&
+        ((data[0] != (s->version >> 8)) || (data[1] != (s->version & 0xff))))
+    {
         SSLerr(SSL_F_DTLS1_GET_HELLO_VERIFY, SSL_R_WRONG_SSL_VERSION);
         s->version = (s->version & 0xff00) | data[1];
         al = SSL_AD_PROTOCOL_VERSION;
         goto f_err;
     }
+#endif
     data += 2;
 
     cookie_len = *(data++);
@@ -957,746 +854,3 @@ static int dtls1_get_hello_verify(SSL *s)
     s->state = SSL_ST_ERR;
     return -1;
 }
-
-int dtls1_send_client_key_exchange(SSL *s)
-{
-    unsigned char *p, *d;
-    int n;
-    unsigned long alg_k;
-#ifndef OPENSSL_NO_RSA
-    unsigned char *q;
-    EVP_PKEY *pkey = NULL;
-#endif
-#ifndef OPENSSL_NO_KRB5
-    KSSL_ERR kssl_err;
-#endif                          /* OPENSSL_NO_KRB5 */
-#ifndef OPENSSL_NO_ECDH
-    EC_KEY *clnt_ecdh = NULL;
-    const EC_POINT *srvr_ecpoint = NULL;
-    EVP_PKEY *srvr_pub_pkey = NULL;
-    unsigned char *encodedPoint = NULL;
-    int encoded_pt_len = 0;
-    BN_CTX *bn_ctx = NULL;
-#endif
-
-    if (s->state == SSL3_ST_CW_KEY_EXCH_A) {
-        d = (unsigned char *)s->init_buf->data;
-        p = &(d[DTLS1_HM_HEADER_LENGTH]);
-
-        alg_k = s->s3->tmp.new_cipher->algorithm_mkey;
-
-        /* Fool emacs indentation */
-        if (0) {
-        }
-#ifndef OPENSSL_NO_RSA
-        else if (alg_k & SSL_kRSA) {
-            RSA *rsa;
-            unsigned char tmp_buf[SSL_MAX_MASTER_KEY_LENGTH];
-
-            if (s->session->sess_cert == NULL) {
-                /*
-                 * We should always have a server certificate with SSL_kRSA.
-                 */
-                SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE,
-                       ERR_R_INTERNAL_ERROR);
-                goto err;
-            }
-
-            if (s->session->sess_cert->peer_rsa_tmp != NULL)
-                rsa = s->session->sess_cert->peer_rsa_tmp;
-            else {
-                pkey =
-                    X509_get_pubkey(s->session->
-                                    sess_cert->peer_pkeys[SSL_PKEY_RSA_ENC].
-                                    x509);
-                if ((pkey == NULL) || (pkey->type != EVP_PKEY_RSA)
-                    || (pkey->pkey.rsa == NULL)) {
-                    SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE,
-                           ERR_R_INTERNAL_ERROR);
-                    goto err;
-                }
-                rsa = pkey->pkey.rsa;
-                EVP_PKEY_free(pkey);
-            }
-
-            tmp_buf[0] = s->client_version >> 8;
-            tmp_buf[1] = s->client_version & 0xff;
-            if (RAND_bytes(&(tmp_buf[2]), sizeof tmp_buf - 2) <= 0)
-                goto err;
-
-            s->session->master_key_length = sizeof tmp_buf;
-
-            q = p;
-            /* Fix buf for TLS and [incidentally] DTLS */
-            if (s->version > SSL3_VERSION)
-                p += 2;
-            n = RSA_public_encrypt(sizeof tmp_buf,
-                                   tmp_buf, p, rsa, RSA_PKCS1_PADDING);
-# ifdef PKCS1_CHECK
-            if (s->options & SSL_OP_PKCS1_CHECK_1)
-                p[1]++;
-            if (s->options & SSL_OP_PKCS1_CHECK_2)
-                tmp_buf[0] = 0x70;
-# endif
-            if (n <= 0) {
-                SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE,
-                       SSL_R_BAD_RSA_ENCRYPT);
-                goto err;
-            }
-
-            /* Fix buf for TLS and [incidentally] DTLS */
-            if (s->version > SSL3_VERSION) {
-                s2n(n, q);
-                n += 2;
-            }
-
-            s->session->master_key_length =
-                s->method->ssl3_enc->generate_master_secret(s,
-                                                            s->
-                                                            session->master_key,
-                                                            tmp_buf,
-                                                            sizeof tmp_buf);
-            OPENSSL_cleanse(tmp_buf, sizeof tmp_buf);
-        }
-#endif
-#ifndef OPENSSL_NO_KRB5
-        else if (alg_k & SSL_kKRB5) {
-            krb5_error_code krb5rc;
-            KSSL_CTX *kssl_ctx = s->kssl_ctx;
-            /*  krb5_data   krb5_ap_req;  */
-            krb5_data *enc_ticket;
-            krb5_data authenticator, *authp = NULL;
-            EVP_CIPHER_CTX ciph_ctx;
-            const EVP_CIPHER *enc = NULL;
-            unsigned char iv[EVP_MAX_IV_LENGTH];
-            unsigned char tmp_buf[SSL_MAX_MASTER_KEY_LENGTH];
-            unsigned char epms[SSL_MAX_MASTER_KEY_LENGTH + EVP_MAX_IV_LENGTH];
-            int padl, outl = sizeof(epms);
-
-            EVP_CIPHER_CTX_init(&ciph_ctx);
-
-# ifdef KSSL_DEBUG
-            printf("ssl3_send_client_key_exchange(%lx & %lx)\n",
-                   alg_k, SSL_kKRB5);
-# endif                         /* KSSL_DEBUG */
-
-            authp = NULL;
-# ifdef KRB5SENDAUTH
-            if (KRB5SENDAUTH)
-                authp = &authenticator;
-# endif                         /* KRB5SENDAUTH */
-
-            krb5rc = kssl_cget_tkt(kssl_ctx, &enc_ticket, authp, &kssl_err);
-            enc = kssl_map_enc(kssl_ctx->enctype);
-            if (enc == NULL)
-                goto err;
-# ifdef KSSL_DEBUG
-            {
-                printf("kssl_cget_tkt rtn %d\n", krb5rc);
-                if (krb5rc && kssl_err.text)
-                    printf("kssl_cget_tkt kssl_err=%s\n", kssl_err.text);
-            }
-# endif                         /* KSSL_DEBUG */
-
-            if (krb5rc) {
-                ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_HANDSHAKE_FAILURE);
-                SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE, kssl_err.reason);
-                goto err;
-            }
-
-            /*-
-             *   20010406 VRS - Earlier versions used KRB5 AP_REQ
-            **  in place of RFC 2712 KerberosWrapper, as in:
-            **
-            **  Send ticket (copy to *p, set n = length)
-            **  n = krb5_ap_req.length;
-            **  memcpy(p, krb5_ap_req.data, krb5_ap_req.length);
-            **  if (krb5_ap_req.data)
-            **    kssl_krb5_free_data_contents(NULL,&krb5_ap_req);
-            **
-            **  Now using real RFC 2712 KerberosWrapper
-            **  (Thanks to Simon Wilkinson <sxw@sxw.org.uk>)
-            **  Note: 2712 "opaque" types are here replaced
-            **  with a 2-byte length followed by the value.
-            **  Example:
-            **  KerberosWrapper= xx xx asn1ticket 0 0 xx xx encpms
-            **  Where "xx xx" = length bytes.  Shown here with
-            **  optional authenticator omitted.
-            */
-
-            /*  KerberosWrapper.Ticket              */
-            s2n(enc_ticket->length, p);
-            memcpy(p, enc_ticket->data, enc_ticket->length);
-            p += enc_ticket->length;
-            n = enc_ticket->length + 2;
-
-            /*  KerberosWrapper.Authenticator       */
-            if (authp && authp->length) {
-                s2n(authp->length, p);
-                memcpy(p, authp->data, authp->length);
-                p += authp->length;
-                n += authp->length + 2;
-
-                free(authp->data);
-                authp->data = NULL;
-                authp->length = 0;
-            } else {
-                s2n(0, p);      /* null authenticator length */
-                n += 2;
-            }
-
-            if (RAND_bytes(tmp_buf, sizeof tmp_buf) <= 0)
-                goto err;
-
-            /*-
-             *  20010420 VRS.  Tried it this way; failed.
-             *      EVP_EncryptInit_ex(&ciph_ctx,enc, NULL,NULL);
-             *      EVP_CIPHER_CTX_set_key_length(&ciph_ctx,
-             *                              kssl_ctx->length);
-             *      EVP_EncryptInit_ex(&ciph_ctx,NULL, key,iv);
-             */
-
-            memset(iv, 0, sizeof iv); /* per RFC 1510 */
-            EVP_EncryptInit_ex(&ciph_ctx, enc, NULL, kssl_ctx->key, iv);
-            EVP_EncryptUpdate(&ciph_ctx, epms, &outl, tmp_buf,
-                              sizeof tmp_buf);
-            EVP_EncryptFinal_ex(&ciph_ctx, &(epms[outl]), &padl);
-            outl += padl;
-            if (outl > (int)sizeof epms) {
-                SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE,
-                       ERR_R_INTERNAL_ERROR);
-                goto err;
-            }
-            EVP_CIPHER_CTX_cleanup(&ciph_ctx);
-
-            /*  KerberosWrapper.EncryptedPreMasterSecret    */
-            s2n(outl, p);
-            memcpy(p, epms, outl);
-            p += outl;
-            n += outl + 2;
-
-            s->session->master_key_length =
-                s->method->ssl3_enc->generate_master_secret(s,
-                                                            s->
-                                                            session->master_key,
-                                                            tmp_buf,
-                                                            sizeof tmp_buf);
-
-            OPENSSL_cleanse(tmp_buf, sizeof tmp_buf);
-            OPENSSL_cleanse(epms, outl);
-        }
-#endif
-#ifndef OPENSSL_NO_DH
-        else if (alg_k & (SSL_kEDH | SSL_kDHr | SSL_kDHd)) {
-            DH *dh_srvr, *dh_clnt;
-
-            if (s->session->sess_cert == NULL) {
-                ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_UNEXPECTED_MESSAGE);
-                SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE,
-                       SSL_R_UNEXPECTED_MESSAGE);
-                goto err;
-            }
-
-            if (s->session->sess_cert->peer_dh_tmp != NULL)
-                dh_srvr = s->session->sess_cert->peer_dh_tmp;
-            else {
-                /* we get them from the cert */
-                ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_HANDSHAKE_FAILURE);
-                SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE,
-                       SSL_R_UNABLE_TO_FIND_DH_PARAMETERS);
-                goto err;
-            }
-
-            /* generate a new random key */
-            if ((dh_clnt = DHparams_dup(dh_srvr)) == NULL) {
-                SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE, ERR_R_DH_LIB);
-                goto err;
-            }
-            if (!DH_generate_key(dh_clnt)) {
-                SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE, ERR_R_DH_LIB);
-                goto err;
-            }
-
-            /*
-             * use the 'p' output buffer for the DH key, but make sure to
-             * clear it out afterwards
-             */
-
-            n = DH_compute_key(p, dh_srvr->pub_key, dh_clnt);
-
-            if (n <= 0) {
-                SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE, ERR_R_DH_LIB);
-                goto err;
-            }
-
-            /* generate master key from the result */
-            s->session->master_key_length =
-                s->method->ssl3_enc->generate_master_secret(s,
-                                                            s->
-                                                            session->master_key,
-                                                            p, n);
-            /* clean up */
-            memset(p, 0, n);
-
-            /* send off the data */
-            n = BN_num_bytes(dh_clnt->pub_key);
-            s2n(n, p);
-            BN_bn2bin(dh_clnt->pub_key, p);
-            n += 2;
-
-            DH_free(dh_clnt);
-
-            /* perhaps clean things up a bit EAY EAY EAY EAY */
-        }
-#endif
-#ifndef OPENSSL_NO_ECDH
-        else if (alg_k & (SSL_kEECDH | SSL_kECDHr | SSL_kECDHe)) {
-            const EC_GROUP *srvr_group = NULL;
-            EC_KEY *tkey;
-            int ecdh_clnt_cert = 0;
-            int field_size = 0;
-
-            if (s->session->sess_cert == NULL) {
-                ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_UNEXPECTED_MESSAGE);
-                SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE,
-                       SSL_R_UNEXPECTED_MESSAGE);
-                goto err;
-            }
-
-            /*
-             * Did we send out the client's ECDH share for use in premaster
-             * computation as part of client certificate? If so, set
-             * ecdh_clnt_cert to 1.
-             */
-            if ((alg_k & (SSL_kECDHr | SSL_kECDHe)) && (s->cert != NULL)) {
-                /*
-                 * XXX: For now, we do not support client authentication
-                 * using ECDH certificates. To add such support, one needs to
-                 * add code that checks for appropriate conditions and sets
-                 * ecdh_clnt_cert to 1. For example, the cert have an ECC key
-                 * on the same curve as the server's and the key should be
-                 * authorized for key agreement. One also needs to add code
-                 * in ssl3_connect to skip sending the certificate verify
-                 * message. if ((s->cert->key->privatekey != NULL) &&
-                 * (s->cert->key->privatekey->type == EVP_PKEY_EC) && ...)
-                 * ecdh_clnt_cert = 1;
-                 */
-            }
-
-            if (s->session->sess_cert->peer_ecdh_tmp != NULL) {
-                tkey = s->session->sess_cert->peer_ecdh_tmp;
-            } else {
-                /* Get the Server Public Key from Cert */
-                srvr_pub_pkey =
-                    X509_get_pubkey(s->session->
-                                    sess_cert->peer_pkeys[SSL_PKEY_ECC].x509);
-                if ((srvr_pub_pkey == NULL)
-                    || (srvr_pub_pkey->type != EVP_PKEY_EC)
-                    || (srvr_pub_pkey->pkey.ec == NULL)) {
-                    SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE,
-                           ERR_R_INTERNAL_ERROR);
-                    goto err;
-                }
-
-                tkey = srvr_pub_pkey->pkey.ec;
-            }
-
-            srvr_group = EC_KEY_get0_group(tkey);
-            srvr_ecpoint = EC_KEY_get0_public_key(tkey);
-
-            if ((srvr_group == NULL) || (srvr_ecpoint == NULL)) {
-                SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE,
-                       ERR_R_INTERNAL_ERROR);
-                goto err;
-            }
-
-            if ((clnt_ecdh = EC_KEY_new()) == NULL) {
-                SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE,
-                       ERR_R_MALLOC_FAILURE);
-                goto err;
-            }
-
-            if (!EC_KEY_set_group(clnt_ecdh, srvr_group)) {
-                SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE, ERR_R_EC_LIB);
-                goto err;
-            }
-            if (ecdh_clnt_cert) {
-                /*
-                 * Reuse key info from our certificate We only need our
-                 * private key to perform the ECDH computation.
-                 */
-                const BIGNUM *priv_key;
-                tkey = s->cert->key->privatekey->pkey.ec;
-                priv_key = EC_KEY_get0_private_key(tkey);
-                if (priv_key == NULL) {
-                    SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE,
-                           ERR_R_MALLOC_FAILURE);
-                    goto err;
-                }
-                if (!EC_KEY_set_private_key(clnt_ecdh, priv_key)) {
-                    SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE,
-                           ERR_R_EC_LIB);
-                    goto err;
-                }
-            } else {
-                /* Generate a new ECDH key pair */
-                if (!(EC_KEY_generate_key(clnt_ecdh))) {
-                    SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE,
-                           ERR_R_ECDH_LIB);
-                    goto err;
-                }
-            }
-
-            /*
-             * use the 'p' output buffer for the ECDH key, but make sure to
-             * clear it out afterwards
-             */
-
-            field_size = EC_GROUP_get_degree(srvr_group);
-            if (field_size <= 0) {
-                SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE, ERR_R_ECDH_LIB);
-                goto err;
-            }
-            n = ECDH_compute_key(p, (field_size + 7) / 8, srvr_ecpoint,
-                                 clnt_ecdh, NULL);
-            if (n <= 0) {
-                SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE, ERR_R_ECDH_LIB);
-                goto err;
-            }
-
-            /* generate master key from the result */
-            s->session->master_key_length =
-                s->method->ssl3_enc->generate_master_secret(s,
-                                                            s->
-                                                            session->master_key,
-                                                            p, n);
-
-            memset(p, 0, n);    /* clean up */
-
-            if (ecdh_clnt_cert) {
-                /* Send empty client key exch message */
-                n = 0;
-            } else {
-                /*
-                 * First check the size of encoding and allocate memory
-                 * accordingly.
-                 */
-                encoded_pt_len =
-                    EC_POINT_point2oct(srvr_group,
-                                       EC_KEY_get0_public_key(clnt_ecdh),
-                                       POINT_CONVERSION_UNCOMPRESSED,
-                                       NULL, 0, NULL);
-
-                encodedPoint = (unsigned char *)
-                    OPENSSL_malloc(encoded_pt_len * sizeof(unsigned char));
-                bn_ctx = BN_CTX_new();
-                if ((encodedPoint == NULL) || (bn_ctx == NULL)) {
-                    SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE,
-                           ERR_R_MALLOC_FAILURE);
-                    goto err;
-                }
-
-                /* Encode the public key */
-                n = EC_POINT_point2oct(srvr_group,
-                                       EC_KEY_get0_public_key(clnt_ecdh),
-                                       POINT_CONVERSION_UNCOMPRESSED,
-                                       encodedPoint, encoded_pt_len, bn_ctx);
-
-                *p = n;         /* length of encoded point */
-                /* Encoded point will be copied here */
-                p += 1;
-                /* copy the point */
-                memcpy((unsigned char *)p, encodedPoint, n);
-                /* increment n to account for length field */
-                n += 1;
-            }
-
-            /* Free allocated memory */
-            BN_CTX_free(bn_ctx);
-            if (encodedPoint != NULL)
-                OPENSSL_free(encodedPoint);
-            if (clnt_ecdh != NULL)
-                EC_KEY_free(clnt_ecdh);
-            EVP_PKEY_free(srvr_pub_pkey);
-        }
-#endif                          /* !OPENSSL_NO_ECDH */
-
-#ifndef OPENSSL_NO_PSK
-        else if (alg_k & SSL_kPSK) {
-            char identity[PSK_MAX_IDENTITY_LEN];
-            unsigned char *t = NULL;
-            unsigned char psk_or_pre_ms[PSK_MAX_PSK_LEN * 2 + 4];
-            unsigned int pre_ms_len = 0, psk_len = 0;
-            int psk_err = 1;
-
-            n = 0;
-            if (s->psk_client_callback == NULL) {
-                SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE,
-                       SSL_R_PSK_NO_CLIENT_CB);
-                goto err;
-            }
-
-            psk_len = s->psk_client_callback(s, s->ctx->psk_identity_hint,
-                                             identity, PSK_MAX_IDENTITY_LEN,
-                                             psk_or_pre_ms,
-                                             sizeof(psk_or_pre_ms));
-            if (psk_len > PSK_MAX_PSK_LEN) {
-                SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE,
-                       ERR_R_INTERNAL_ERROR);
-                goto psk_err;
-            } else if (psk_len == 0) {
-                SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE,
-                       SSL_R_PSK_IDENTITY_NOT_FOUND);
-                goto psk_err;
-            }
-
-            /* create PSK pre_master_secret */
-            pre_ms_len = 2 + psk_len + 2 + psk_len;
-            t = psk_or_pre_ms;
-            memmove(psk_or_pre_ms + psk_len + 4, psk_or_pre_ms, psk_len);
-            s2n(psk_len, t);
-            memset(t, 0, psk_len);
-            t += psk_len;
-            s2n(psk_len, t);
-
-            if (s->session->psk_identity_hint != NULL)
-                OPENSSL_free(s->session->psk_identity_hint);
-            s->session->psk_identity_hint =
-                BUF_strdup(s->ctx->psk_identity_hint);
-            if (s->ctx->psk_identity_hint != NULL
-                && s->session->psk_identity_hint == NULL) {
-                SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE,
-                       ERR_R_MALLOC_FAILURE);
-                goto psk_err;
-            }
-
-            if (s->session->psk_identity != NULL)
-                OPENSSL_free(s->session->psk_identity);
-            s->session->psk_identity = BUF_strdup(identity);
-            if (s->session->psk_identity == NULL) {
-                SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE,
-                       ERR_R_MALLOC_FAILURE);
-                goto psk_err;
-            }
-
-            s->session->master_key_length =
-                s->method->ssl3_enc->generate_master_secret(s,
-                                                            s->
-                                                            session->master_key,
-                                                            psk_or_pre_ms,
-                                                            pre_ms_len);
-            n = strlen(identity);
-            s2n(n, p);
-            memcpy(p, identity, n);
-            n += 2;
-            psk_err = 0;
- psk_err:
-            OPENSSL_cleanse(identity, PSK_MAX_IDENTITY_LEN);
-            OPENSSL_cleanse(psk_or_pre_ms, sizeof(psk_or_pre_ms));
-            if (psk_err != 0) {
-                ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_HANDSHAKE_FAILURE);
-                goto err;
-            }
-        }
-#endif
-        else {
-            ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_HANDSHAKE_FAILURE);
-            SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE,
-                   ERR_R_INTERNAL_ERROR);
-            goto err;
-        }
-
-        d = dtls1_set_message_header(s, d,
-                                     SSL3_MT_CLIENT_KEY_EXCHANGE, n, 0, n);
-        /*-
-         *(d++)=SSL3_MT_CLIENT_KEY_EXCHANGE;
-         l2n3(n,d);
-         l2n(s->d1->handshake_write_seq,d);
-         s->d1->handshake_write_seq++;
-        */
-
-        s->state = SSL3_ST_CW_KEY_EXCH_B;
-        /* number of bytes to write */
-        s->init_num = n + DTLS1_HM_HEADER_LENGTH;
-        s->init_off = 0;
-
-        /* buffer the message to handle re-xmits */
-        dtls1_buffer_message(s, 0);
-    }
-
-    /* SSL3_ST_CW_KEY_EXCH_B */
-    return (dtls1_do_write(s, SSL3_RT_HANDSHAKE));
- err:
-#ifndef OPENSSL_NO_ECDH
-    BN_CTX_free(bn_ctx);
-    if (encodedPoint != NULL)
-        OPENSSL_free(encodedPoint);
-    if (clnt_ecdh != NULL)
-        EC_KEY_free(clnt_ecdh);
-    EVP_PKEY_free(srvr_pub_pkey);
-#endif
-    return (-1);
-}
-
-int dtls1_send_client_verify(SSL *s)
-{
-    unsigned char *p, *d;
-    unsigned char data[MD5_DIGEST_LENGTH + SHA_DIGEST_LENGTH];
-    EVP_PKEY *pkey;
-#ifndef OPENSSL_NO_RSA
-    unsigned u = 0;
-#endif
-    unsigned long n;
-#if !defined(OPENSSL_NO_DSA) || !defined(OPENSSL_NO_ECDSA)
-    int j;
-#endif
-
-    if (s->state == SSL3_ST_CW_CERT_VRFY_A) {
-        d = (unsigned char *)s->init_buf->data;
-        p = &(d[DTLS1_HM_HEADER_LENGTH]);
-        pkey = s->cert->key->privatekey;
-
-        s->method->ssl3_enc->cert_verify_mac(s,
-                                             NID_sha1,
-                                             &(data[MD5_DIGEST_LENGTH]));
-
-#ifndef OPENSSL_NO_RSA
-        if (pkey->type == EVP_PKEY_RSA) {
-            s->method->ssl3_enc->cert_verify_mac(s, NID_md5, &(data[0]));
-            if (RSA_sign(NID_md5_sha1, data,
-                         MD5_DIGEST_LENGTH + SHA_DIGEST_LENGTH,
-                         &(p[2]), &u, pkey->pkey.rsa) <= 0) {
-                SSLerr(SSL_F_DTLS1_SEND_CLIENT_VERIFY, ERR_R_RSA_LIB);
-                goto err;
-            }
-            s2n(u, p);
-            n = u + 2;
-        } else
-#endif
-#ifndef OPENSSL_NO_DSA
-        if (pkey->type == EVP_PKEY_DSA) {
-            if (!DSA_sign(pkey->save_type,
-                          &(data[MD5_DIGEST_LENGTH]),
-                          SHA_DIGEST_LENGTH, &(p[2]),
-                          (unsigned int *)&j, pkey->pkey.dsa)) {
-                SSLerr(SSL_F_DTLS1_SEND_CLIENT_VERIFY, ERR_R_DSA_LIB);
-                goto err;
-            }
-            s2n(j, p);
-            n = j + 2;
-        } else
-#endif
-#ifndef OPENSSL_NO_ECDSA
-        if (pkey->type == EVP_PKEY_EC) {
-            if (!ECDSA_sign(pkey->save_type,
-                            &(data[MD5_DIGEST_LENGTH]),
-                            SHA_DIGEST_LENGTH, &(p[2]),
-                            (unsigned int *)&j, pkey->pkey.ec)) {
-                SSLerr(SSL_F_DTLS1_SEND_CLIENT_VERIFY, ERR_R_ECDSA_LIB);
-                goto err;
-            }
-            s2n(j, p);
-            n = j + 2;
-        } else
-#endif
-        {
-            SSLerr(SSL_F_DTLS1_SEND_CLIENT_VERIFY, ERR_R_INTERNAL_ERROR);
-            goto err;
-        }
-
-        d = dtls1_set_message_header(s, d,
-                                     SSL3_MT_CERTIFICATE_VERIFY, n, 0, n);
-
-        s->init_num = (int)n + DTLS1_HM_HEADER_LENGTH;
-        s->init_off = 0;
-
-        /* buffer the message to handle re-xmits */
-        dtls1_buffer_message(s, 0);
-
-        s->state = SSL3_ST_CW_CERT_VRFY_B;
-    }
-
-    /* s->state = SSL3_ST_CW_CERT_VRFY_B */
-    return (dtls1_do_write(s, SSL3_RT_HANDSHAKE));
- err:
-    return (-1);
-}
-
-int dtls1_send_client_certificate(SSL *s)
-{
-    X509 *x509 = NULL;
-    EVP_PKEY *pkey = NULL;
-    int i;
-    unsigned long l;
-
-    if (s->state == SSL3_ST_CW_CERT_A) {
-        if ((s->cert == NULL) ||
-            (s->cert->key->x509 == NULL) ||
-            (s->cert->key->privatekey == NULL))
-            s->state = SSL3_ST_CW_CERT_B;
-        else
-            s->state = SSL3_ST_CW_CERT_C;
-    }
-
-    /* We need to get a client cert */
-    if (s->state == SSL3_ST_CW_CERT_B) {
-        /*
-         * If we get an error, we need to ssl->rwstate=SSL_X509_LOOKUP;
-         * return(-1); We then get retied later
-         */
-        i = 0;
-        i = ssl_do_client_cert_cb(s, &x509, &pkey);
-        if (i < 0) {
-            s->rwstate = SSL_X509_LOOKUP;
-            return (-1);
-        }
-        s->rwstate = SSL_NOTHING;
-        if ((i == 1) && (pkey != NULL) && (x509 != NULL)) {
-            s->state = SSL3_ST_CW_CERT_B;
-            if (!SSL_use_certificate(s, x509) || !SSL_use_PrivateKey(s, pkey))
-                i = 0;
-        } else if (i == 1) {
-            i = 0;
-            SSLerr(SSL_F_DTLS1_SEND_CLIENT_CERTIFICATE,
-                   SSL_R_BAD_DATA_RETURNED_BY_CALLBACK);
-        }
-
-        if (x509 != NULL)
-            X509_free(x509);
-        if (pkey != NULL)
-            EVP_PKEY_free(pkey);
-        if (i == 0) {
-            if (s->version == SSL3_VERSION) {
-                s->s3->tmp.cert_req = 0;
-                ssl3_send_alert(s, SSL3_AL_WARNING, SSL_AD_NO_CERTIFICATE);
-                return (1);
-            } else {
-                s->s3->tmp.cert_req = 2;
-            }
-        }
-
-        /* Ok, we have a cert */
-        s->state = SSL3_ST_CW_CERT_C;
-    }
-
-    if (s->state == SSL3_ST_CW_CERT_C) {
-        s->state = SSL3_ST_CW_CERT_D;
-        l = dtls1_output_cert_chain(s,
-                                    (s->s3->tmp.cert_req ==
-                                     2) ? NULL : s->cert->key->x509);
-        if (!l) {
-            SSLerr(SSL_F_DTLS1_SEND_CLIENT_CERTIFICATE, ERR_R_INTERNAL_ERROR);
-            ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_INTERNAL_ERROR);
-            return 0;
-        }
-        s->init_num = (int)l;
-        s->init_off = 0;
-
-        /* set header called by dtls1_output_cert_chain() */
-
-        /* buffer the message to handle re-xmits */
-        dtls1_buffer_message(s, 0);
-    }
-    /* SSL3_ST_CW_CERT_D */
-    return (dtls1_do_write(s, SSL3_RT_HANDSHAKE));
-}
diff --git a/ssl/d1_enc.c b/ssl/d1_enc.c
deleted file mode 100644 (file)
index e876364..0000000
+++ /dev/null
@@ -1,251 +0,0 @@
-/* ssl/d1_enc.c */
-/*
- * DTLS implementation written by Nagendra Modadugu
- * (nagendra@cs.stanford.edu) for the OpenSSL project 2005.
- */
-/* ====================================================================
- * Copyright (c) 1998-2005 The OpenSSL Project.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * 3. All advertising materials mentioning features or use of this
- *    software must display the following acknowledgment:
- *    "This product includes software developed by the OpenSSL Project
- *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
- *
- * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
- *    endorse or promote products derived from this software without
- *    prior written permission. For written permission, please contact
- *    openssl-core@openssl.org.
- *
- * 5. Products derived from this software may not be called "OpenSSL"
- *    nor may "OpenSSL" appear in their names without prior written
- *    permission of the OpenSSL Project.
- *
- * 6. Redistributions of any form whatsoever must retain the following
- *    acknowledgment:
- *    "This product includes software developed by the OpenSSL Project
- *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
- *
- * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
- * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
- * OF THE POSSIBILITY OF SUCH DAMAGE.
- * ====================================================================
- *
- * This product includes cryptographic software written by Eric Young
- * (eay@cryptsoft.com).  This product includes software written by Tim
- * Hudson (tjh@cryptsoft.com).
- *
- */
-/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
- * All rights reserved.
- *
- * This package is an SSL implementation written
- * by Eric Young (eay@cryptsoft.com).
- * The implementation was written so as to conform with Netscapes SSL.
- *
- * This library is free for commercial and non-commercial use as long as
- * the following conditions are aheared to.  The following conditions
- * apply to all code found in this distribution, be it the RC4, RSA,
- * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
- * included with this distribution is covered by the same copyright terms
- * except that the holder is Tim Hudson (tjh@cryptsoft.com).
- *
- * Copyright remains Eric Young's, and as such any Copyright notices in
- * the code are not to be removed.
- * If this package is used in a product, Eric Young should be given attribution
- * as the author of the parts of the library used.
- * This can be in the form of a textual message at program startup or
- * in documentation (online or textual) provided with the package.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *    "This product includes cryptographic software written by
- *     Eric Young (eay@cryptsoft.com)"
- *    The word 'cryptographic' can be left out if the rouines from the library
- *    being used are not cryptographic related :-).
- * 4. If you include any Windows specific code (or a derivative thereof) from
- *    the apps directory (application code) you must include an acknowledgement:
- *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
- *
- * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * The licence and distribution terms for any publically available version or
- * derivative of this code cannot be changed.  i.e. this code cannot simply be
- * copied and put under another distribution licence
- * [including the GNU Public Licence.]
- */
-
-#include <stdio.h>
-#include "ssl_locl.h"
-#ifndef OPENSSL_NO_COMP
-# include <openssl/comp.h>
-#endif
-#include <openssl/evp.h>
-#include <openssl/hmac.h>
-#include <openssl/md5.h>
-#include <openssl/rand.h>
-#ifdef KSSL_DEBUG
-# include <openssl/des.h>
-#endif
-
-/*-
- * dtls1_enc encrypts/decrypts the record in |s->wrec| / |s->rrec|, respectively.
- *
- * Returns:
- *   0: (in non-constant time) if the record is publically invalid (i.e. too
- *       short etc).
- *   1: if the record's padding is valid / the encryption was successful.
- *   -1: if the record's padding/AEAD-authenticator is invalid or, if sending,
- *       an internal error occured.
- */
-int dtls1_enc(SSL *s, int send)
-{
-    SSL3_RECORD *rec;
-    EVP_CIPHER_CTX *ds;
-    unsigned long l;
-    int bs, i, j, k, mac_size = 0;
-    const EVP_CIPHER *enc;
-
-    if (send) {
-        if (EVP_MD_CTX_md(s->write_hash)) {
-            mac_size = EVP_MD_CTX_size(s->write_hash);
-            if (mac_size < 0)
-                return -1;
-        }
-        ds = s->enc_write_ctx;
-        rec = &(s->s3->wrec);
-        if (s->enc_write_ctx == NULL)
-            enc = NULL;
-        else {
-            enc = EVP_CIPHER_CTX_cipher(s->enc_write_ctx);
-            if (rec->data != rec->input)
-                /* we can't write into the input stream */
-                fprintf(stderr, "%s:%d: rec->data != rec->input\n",
-                        __FILE__, __LINE__);
-            else if (EVP_CIPHER_block_size(ds->cipher) > 1) {
-                if (RAND_bytes(rec->input, EVP_CIPHER_block_size(ds->cipher))
-                    <= 0)
-                    return -1;
-            }
-        }
-    } else {
-        if (EVP_MD_CTX_md(s->read_hash)) {
-            mac_size = EVP_MD_CTX_size(s->read_hash);
-            OPENSSL_assert(mac_size >= 0);
-        }
-        ds = s->enc_read_ctx;
-        rec = &(s->s3->rrec);
-        if (s->enc_read_ctx == NULL)
-            enc = NULL;
-        else
-            enc = EVP_CIPHER_CTX_cipher(s->enc_read_ctx);
-    }
-
-#ifdef KSSL_DEBUG
-    printf("dtls1_enc(%d)\n", send);
-#endif                          /* KSSL_DEBUG */
-
-    if ((s->session == NULL) || (ds == NULL) || (enc == NULL)) {
-        memmove(rec->data, rec->input, rec->length);
-        rec->input = rec->data;
-    } else {
-        l = rec->length;
-        bs = EVP_CIPHER_block_size(ds->cipher);
-
-        if ((bs != 1) && send) {
-            i = bs - ((int)l % bs);
-
-            /* Add weird padding of upto 256 bytes */
-
-            /* we need to add 'i' padding bytes of value j */
-            j = i - 1;
-            if (s->options & SSL_OP_TLS_BLOCK_PADDING_BUG) {
-                if (s->s3->flags & TLS1_FLAGS_TLS_PADDING_BUG)
-                    j++;
-            }
-            for (k = (int)l; k < (int)(l + i); k++)
-                rec->input[k] = j;
-            l += i;
-            rec->length += i;
-        }
-#ifdef KSSL_DEBUG
-        {
-            unsigned long ui;
-            printf("EVP_Cipher(ds=%p,rec->data=%p,rec->input=%p,l=%ld) ==>\n",
-                   ds, rec->data, rec->input, l);
-            printf
-                ("\tEVP_CIPHER_CTX: %d buf_len, %d key_len [%d %d], %d iv_len\n",
-                 ds->buf_len, ds->cipher->key_len, DES_KEY_SZ,
-                 DES_SCHEDULE_SZ, ds->cipher->iv_len);
-            printf("\t\tIV: ");
-            for (i = 0; i < ds->cipher->iv_len; i++)
-                printf("%02X", ds->iv[i]);
-            printf("\n");
-            printf("\trec->input=");
-            for (ui = 0; ui < l; ui++)
-                printf(" %02x", rec->input[ui]);
-            printf("\n");
-        }
-#endif                          /* KSSL_DEBUG */
-
-        if (!send) {
-            if (l == 0 || l % bs != 0)
-                return 0;
-        }
-
-        if (EVP_Cipher(ds, rec->data, rec->input, l) < 1)
-            return -1;
-
-#ifdef KSSL_DEBUG
-        {
-            unsigned long i;
-            printf("\trec->data=");
-            for (i = 0; i < l; i++)
-                printf(" %02x", rec->data[i]);
-            printf("\n");
-        }
-#endif                          /* KSSL_DEBUG */
-
-        if ((bs != 1) && !send)
-            return tls1_cbc_remove_padding(s, rec, bs, mac_size);
-    }
-    return (1);
-}
index 011d7b7..ee78921 100644 (file)
 #include <openssl/objects.h>
 #include "ssl_locl.h"
 
-#if defined(OPENSSL_SYS_WIN32) || defined(OPENSSL_SYS_VMS)
+#if defined(OPENSSL_SYS_VMS)
 # include <sys/timeb.h>
 #endif
 
 static void get_current_time(struct timeval *t);
+static void dtls1_set_handshake_header(SSL *s, int type, unsigned long len);
+static int dtls1_handshake_write(SSL *s);
 const char dtls1_version_str[] = "DTLSv1" OPENSSL_VERSION_PTEXT;
 int dtls1_listen(SSL *s, struct sockaddr *client);
 
 SSL3_ENC_METHOD DTLSv1_enc_data = {
-    dtls1_enc,
+    tls1_enc,
     tls1_mac,
     tls1_setup_key_block,
     tls1_generate_master_secret,
@@ -83,6 +85,30 @@ SSL3_ENC_METHOD DTLSv1_enc_data = {
     TLS_MD_SERVER_FINISH_CONST, TLS_MD_SERVER_FINISH_CONST_SIZE,
     tls1_alert_code,
     tls1_export_keying_material,
+    SSL_ENC_FLAG_DTLS | SSL_ENC_FLAG_EXPLICIT_IV,
+    DTLS1_HM_HEADER_LENGTH,
+    dtls1_set_handshake_header,
+    dtls1_handshake_write
+};
+
+SSL3_ENC_METHOD DTLSv1_2_enc_data = {
+    tls1_enc,
+    tls1_mac,
+    tls1_setup_key_block,
+    tls1_generate_master_secret,
+    tls1_change_cipher_state,
+    tls1_final_finish_mac,
+    TLS1_FINISH_MAC_LENGTH,
+    tls1_cert_verify_mac,
+    TLS_MD_CLIENT_FINISH_CONST, TLS_MD_CLIENT_FINISH_CONST_SIZE,
+    TLS_MD_SERVER_FINISH_CONST, TLS_MD_SERVER_FINISH_CONST_SIZE,
+    tls1_alert_code,
+    tls1_export_keying_material,
+    SSL_ENC_FLAG_DTLS | SSL_ENC_FLAG_EXPLICIT_IV | SSL_ENC_FLAG_SIGALGS
+        | SSL_ENC_FLAG_SHA256_PRF | SSL_ENC_FLAG_TLS1_2_CIPHERS,
+    DTLS1_HM_HEADER_LENGTH,
+    dtls1_set_handshake_header,
+    dtls1_handshake_write
 };
 
 long dtls1_default_timeout(void)
@@ -244,9 +270,11 @@ void dtls1_clear(SSL *s)
 
     ssl3_clear(s);
     if (s->options & SSL_OP_CISCO_ANYCONNECT)
-        s->version = DTLS1_BAD_VER;
+        s->client_version = s->version = DTLS1_BAD_VER;
+    else if (s->method->version == DTLS_ANY_VERSION)
+        s->version = DTLS1_2_VERSION;
     else
-        s->version = DTLS1_VERSION;
+        s->version = s->method->version;
 }
 
 long dtls1_ctrl(SSL *s, int cmd, long larg, void *parg)
@@ -271,14 +299,22 @@ long dtls1_ctrl(SSL *s, int cmd, long larg, void *parg)
          * highest enabled version (according to s->ctx->method, as version
          * negotiation may have changed s->method).
          */
-#if DTLS_MAX_VERSION != DTLS1_VERSION
-# error Code needs update for DTLS_method() support beyond DTLS1_VERSION.
-#endif
+        if (s->version == s->ctx->method->version)
+            return 1;
         /*
-         * Just one protocol version is supported so far; fail closed if the
-         * version is not as expected.
+         * Apparently we're using a version-flexible SSL_METHOD (not at its
+         * highest protocol version).
          */
-        return s->version == DTLS_MAX_VERSION;
+        if (s->ctx->method->version == DTLS_method()->version) {
+#if DTLS_MAX_VERSION != DTLS1_2_VERSION
+# error Code needs update for DTLS_method() support beyond DTLS1_2_VERSION.
+#endif
+            if (!(s->options & SSL_OP_NO_DTLSv1_2))
+                return s->version == DTLS1_2_VERSION;
+            if (!(s->options & SSL_OP_NO_DTLSv1))
+                return s->version == DTLS1_VERSION;
+        }
+        return 0;               /* Unexpected state; fail closed. */
     case DTLS_CTRL_SET_LINK_MTU:
         if (larg < (long)dtls1_link_min_mtu())
             return 0;
@@ -477,11 +513,22 @@ int dtls1_handle_timeout(SSL *s)
 
 static void get_current_time(struct timeval *t)
 {
-#ifdef OPENSSL_SYS_WIN32
-    struct _timeb tb;
-    _ftime(&tb);
-    t->tv_sec = (long)tb.time;
-    t->tv_usec = (long)tb.millitm * 1000;
+#if defined(_WIN32)
+    SYSTEMTIME st;
+    union {
+        unsigned __int64 ul;
+        FILETIME ft;
+    } now;
+
+    GetSystemTime(&st);
+    SystemTimeToFileTime(&st, &now.ft);
+# ifdef  __MINGW32__
+    now.ul -= 116444736000000000ULL;
+# else
+    now.ul -= 116444736000000000UI64; /* re-bias to 1/1/1970 */
+# endif
+    t->tv_sec = (long)(now.ul / 10000000);
+    t->tv_usec = ((int)(now.ul % 10000000)) / 10;
 #elif defined(OPENSSL_SYS_VMS)
     struct timeb tb;
     ftime(&tb);
@@ -509,3 +556,18 @@ int dtls1_listen(SSL *s, struct sockaddr *client)
     (void)BIO_dgram_get_peer(SSL_get_rbio(s), client);
     return 1;
 }
+
+static void dtls1_set_handshake_header(SSL *s, int htype, unsigned long len)
+{
+    unsigned char *p = (unsigned char *)s->init_buf->data;
+    dtls1_set_message_header(s, p, htype, len, 0, len);
+    s->init_num = (int)len + DTLS1_HM_HEADER_LENGTH;
+    s->init_off = 0;
+    /* Buffer the message to handle re-xmits */
+    dtls1_buffer_message(s, 0);
+}
+
+static int dtls1_handshake_write(SSL *s)
+{
+    return dtls1_do_write(s, SSL3_RT_HANDSHAKE);
+}
index aaa718c..7340774 100644 (file)
@@ -66,9 +66,23 @@ static const SSL_METHOD *dtls1_get_method(int ver)
 {
     if (ver == DTLS1_VERSION)
         return (DTLSv1_method());
+    else if (ver == DTLS1_2_VERSION)
+        return (DTLSv1_2_method());
     else
         return (NULL);
 }
 
-IMPLEMENT_dtls1_meth_func(DTLSv1_method,
-                          dtls1_accept, dtls1_connect, dtls1_get_method)
+IMPLEMENT_dtls1_meth_func(DTLS1_VERSION,
+                          DTLSv1_method,
+                          dtls1_accept,
+                          dtls1_connect, dtls1_get_method, DTLSv1_enc_data)
+
+    IMPLEMENT_dtls1_meth_func(DTLS1_2_VERSION,
+                          DTLSv1_2_method,
+                          dtls1_accept,
+                          dtls1_connect, dtls1_get_method, DTLSv1_2_enc_data)
+
+    IMPLEMENT_dtls1_meth_func(DTLS_ANY_VERSION,
+                          DTLS_method,
+                          dtls1_accept,
+                          dtls1_connect, dtls1_get_method, DTLSv1_2_enc_data)
index d659ed4..fe30ec7 100644 (file)
@@ -612,6 +612,10 @@ int dtls1_get_record(SSL *s)
 
         p = s->packet;
 
+        if (s->msg_callback)
+            s->msg_callback(0, 0, SSL3_RT_HEADER, p, DTLS1_RT_HEADER_LENGTH,
+                            s, s->msg_callback_arg);
+
         /* Pull apart the header into the DTLS1_RECORD */
         rr->type = *(p++);
         ssl_major = *(p++);
@@ -1488,10 +1492,10 @@ int do_dtls1_write(SSL *s, int type, const unsigned char *buf,
     unsigned char *p, *pseq;
     int i, mac_size, clear = 0;
     int prefix_len = 0;
+    int eivlen;
     SSL3_RECORD *wr;
     SSL3_BUFFER *wb;
     SSL_SESSION *sess;
-    int bs;
 
     /*
      * first check if there is a SSL3_BUFFER still being written out.  This
@@ -1570,27 +1574,41 @@ int do_dtls1_write(SSL *s, int type, const unsigned char *buf,
 
     *(p++) = type & 0xff;
     wr->type = type;
-
-    *(p++) = (s->version >> 8);
-    *(p++) = s->version & 0xff;
+    /*
+     * Special case: for hello verify request, client version 1.0 and we
+     * haven't decided which version to use yet send back using version 1.0
+     * header: otherwise some clients will ignore it.
+     */
+    if (s->method->version == DTLS_ANY_VERSION) {
+        *(p++) = DTLS1_VERSION >> 8;
+        *(p++) = DTLS1_VERSION & 0xff;
+    } else {
+        *(p++) = s->version >> 8;
+        *(p++) = s->version & 0xff;
+    }
 
     /* field where we are to write out packet epoch, seq num and len */
     pseq = p;
     p += 10;
 
-    /* lets setup the record stuff. */
-
-    /*
-     * Make space for the explicit IV in case of CBC. (this is a bit of a
-     * boundary violation, but what the heck).
-     */
-    if (s->enc_write_ctx &&
-        (EVP_CIPHER_mode(s->enc_write_ctx->cipher) & EVP_CIPH_CBC_MODE))
-        bs = EVP_CIPHER_block_size(s->enc_write_ctx->cipher);
-    else
-        bs = 0;
+    /* Explicit IV length, block ciphers appropriate version flag */
+    if (s->enc_write_ctx) {
+        int mode = EVP_CIPHER_CTX_mode(s->enc_write_ctx);
+        if (mode == EVP_CIPH_CBC_MODE) {
+            eivlen = EVP_CIPHER_CTX_iv_length(s->enc_write_ctx);
+            if (eivlen <= 1)
+                eivlen = 0;
+        }
+        /* Need explicit part of IV for GCM mode */
+        else if (mode == EVP_CIPH_GCM_MODE)
+            eivlen = EVP_GCM_TLS_EXPLICIT_IV_LEN;
+        else
+            eivlen = 0;
+    } else
+        eivlen = 0;
 
-    wr->data = p + bs;          /* make room for IV in case of CBC */
+    /* lets setup the record stuff. */
+    wr->data = p + eivlen;      /* make room for IV in case of CBC */
     wr->length = (int)len;
     wr->input = (unsigned char *)buf;
 
@@ -1616,7 +1634,7 @@ int do_dtls1_write(SSL *s, int type, const unsigned char *buf,
      */
 
     if (mac_size != 0) {
-        if (s->method->ssl3_enc->mac(s, &(p[wr->length + bs]), 1) < 0)
+        if (s->method->ssl3_enc->mac(s, &(p[wr->length + eivlen]), 1) < 0)
             goto err;
         wr->length += mac_size;
     }
@@ -1625,14 +1643,8 @@ int do_dtls1_write(SSL *s, int type, const unsigned char *buf,
     wr->input = p;
     wr->data = p;
 
-    /* ssl3_enc can only have an error on read */
-    if (bs) {                   /* bs != 0 in case of CBC */
-        RAND_pseudo_bytes(p, bs);
-        /*
-         * master IV and last CBC residue stand for the rest of randomness
-         */
-        wr->length += bs;
-    }
+    if (eivlen)
+        wr->length += eivlen;
 
     if (s->method->ssl3_enc->enc(s, 1) < 1)
         goto err;
@@ -1656,6 +1668,10 @@ int do_dtls1_write(SSL *s, int type, const unsigned char *buf,
     pseq += 6;
     s2n(wr->length, pseq);
 
+    if (s->msg_callback)
+        s->msg_callback(1, 0, SSL3_RT_HEADER, pseq - DTLS1_RT_HEADER_LENGTH,
+                        DTLS1_RT_HEADER_LENGTH, s, s->msg_callback_arg);
+
     /*
      * we should now have wr->data pointing to the encrypted data, which is
      * wr->length long
index 6c6e07c..64d0634 100644 (file)
 #include <stdio.h>
 #include <openssl/objects.h>
 #include "ssl_locl.h"
+#include "srtp.h"
 
 #ifndef OPENSSL_NO_SRTP
 
-# include "srtp.h"
-
 static SRTP_PROTECTION_PROFILE srtp_known_profiles[] = {
     {
      "SRTP_AES128_CM_SHA1_80",
index 41c7dc5..655333a 100644 (file)
@@ -133,13 +133,29 @@ static const SSL_METHOD *dtls1_get_server_method(int ver)
 {
     if (ver == DTLS1_VERSION)
         return (DTLSv1_server_method());
+    else if (ver == DTLS1_2_VERSION)
+        return (DTLSv1_2_server_method());
     else
         return (NULL);
 }
 
-IMPLEMENT_dtls1_meth_func(DTLSv1_server_method,
+IMPLEMENT_dtls1_meth_func(DTLS1_VERSION,
+                          DTLSv1_server_method,
                           dtls1_accept,
-                          ssl_undefined_function, dtls1_get_server_method)
+                          ssl_undefined_function,
+                          dtls1_get_server_method, DTLSv1_enc_data)
+
+    IMPLEMENT_dtls1_meth_func(DTLS1_2_VERSION,
+                          DTLSv1_2_server_method,
+                          dtls1_accept,
+                          ssl_undefined_function,
+                          dtls1_get_server_method, DTLSv1_2_enc_data)
+
+    IMPLEMENT_dtls1_meth_func(DTLS_ANY_VERSION,
+                          DTLS_server_method,
+                          dtls1_accept,
+                          ssl_undefined_function,
+                          dtls1_get_server_method, DTLSv1_2_enc_data)
 
 int dtls1_accept(SSL *s)
 {
@@ -284,7 +300,7 @@ int dtls1_accept(SSL *s)
             s->shutdown = 0;
             dtls1_clear_record_buffer(s);
             dtls1_start_timer(s);
-            ret = dtls1_send_hello_request(s);
+            ret = ssl3_send_hello_request(s);
             if (ret <= 0)
                 goto end;
             s->s3->tmp.next_state = SSL3_ST_SR_CLNT_HELLO_A;
@@ -392,7 +408,7 @@ int dtls1_accept(SSL *s)
         case SSL3_ST_SW_SRVR_HELLO_B:
             s->renegotiate = 2;
             dtls1_start_timer(s);
-            ret = dtls1_send_server_hello(s);
+            ret = ssl3_send_server_hello(s);
             if (ret <= 0)
                 goto end;
 
@@ -431,7 +447,7 @@ int dtls1_accept(SSL *s)
             if (!(s->s3->tmp.new_cipher->algorithm_auth & SSL_aNULL)
                 && !(s->s3->tmp.new_cipher->algorithm_mkey & SSL_kPSK)) {
                 dtls1_start_timer(s);
-                ret = dtls1_send_server_certificate(s);
+                ret = ssl3_send_server_certificate(s);
                 if (ret <= 0)
                     goto end;
 #ifndef OPENSSL_NO_TLSEXT
@@ -474,7 +490,7 @@ int dtls1_accept(SSL *s)
 #ifndef OPENSSL_NO_PSK
                 || ((alg_k & SSL_kPSK) && s->ctx->psk_identity_hint)
 #endif
-                || (alg_k & SSL_kEDH)
+                || (alg_k & SSL_kDHE)
                 || (alg_k & SSL_kEECDH)
                 || ((alg_k & SSL_kRSA)
                     && (s->cert->pkeys[SSL_PKEY_RSA_ENC].privatekey == NULL
@@ -487,7 +503,7 @@ int dtls1_accept(SSL *s)
                 )
                 ) {
                 dtls1_start_timer(s);
-                ret = dtls1_send_server_key_exchange(s);
+                ret = ssl3_send_server_key_exchange(s);
                 if (ret <= 0)
                     goto end;
             } else
@@ -541,7 +557,7 @@ int dtls1_accept(SSL *s)
             } else {
                 s->s3->tmp.cert_request = 1;
                 dtls1_start_timer(s);
-                ret = dtls1_send_certificate_request(s);
+                ret = ssl3_send_certificate_request(s);
                 if (ret <= 0)
                     goto end;
 #ifndef NETSCAPE_HANG_BUG
@@ -569,7 +585,7 @@ int dtls1_accept(SSL *s)
         case SSL3_ST_SW_SRVR_DONE_A:
         case SSL3_ST_SW_SRVR_DONE_B:
             dtls1_start_timer(s);
-            ret = dtls1_send_server_done(s);
+            ret = ssl3_send_server_done(s);
             if (ret <= 0)
                 goto end;
             s->s3->tmp.next_state = SSL3_ST_SR_CERT_A;
@@ -597,22 +613,13 @@ int dtls1_accept(SSL *s)
 
         case SSL3_ST_SR_CERT_A:
         case SSL3_ST_SR_CERT_B:
-            /* Check for second client hello (MS SGC) */
-            ret = ssl3_check_client_hello(s);
-            if (ret <= 0)
-                goto end;
-            if (ret == 2) {
-                dtls1_stop_timer(s);
-                s->state = SSL3_ST_SR_CLNT_HELLO_C;
-            } else {
-                if (s->s3->tmp.cert_request) {
-                    ret = ssl3_get_client_certificate(s);
-                    if (ret <= 0)
-                        goto end;
-                }
-                s->init_num = 0;
-                s->state = SSL3_ST_SR_KEY_EXCH_A;
+            if (s->s3->tmp.cert_request) {
+                ret = ssl3_get_client_certificate(s);
+                if (ret <= 0)
+                    goto end;
             }
+            s->init_num = 0;
+            s->state = SSL3_ST_SR_KEY_EXCH_A;
             break;
 
         case SSL3_ST_SR_KEY_EXCH_A:
@@ -647,6 +654,25 @@ int dtls1_accept(SSL *s)
                  */
                 s->state = SSL3_ST_SR_FINISHED_A;
                 s->init_num = 0;
+            } else if (SSL_USE_SIGALGS(s)) {
+                s->state = SSL3_ST_SR_CERT_VRFY_A;
+                s->init_num = 0;
+                if (!s->session->peer)
+                    break;
+                /*
+                 * For sigalgs freeze the handshake buffer at this point and
+                 * digest cached records.
+                 */
+                if (!s->s3->handshake_buffer) {
+                    SSLerr(SSL_F_DTLS1_ACCEPT, ERR_R_INTERNAL_ERROR);
+                    s->state = SSL_ST_ERR;
+                    return -1;
+                }
+                s->s3->flags |= TLS1_FLAGS_KEEP_HANDSHAKE;
+                if (!ssl3_digest_cached_records(s)) {
+                    s->state = SSL_ST_ERR;
+                    return -1;
+                }
             } else {
                 s->state = SSL3_ST_SR_CERT_VRFY_A;
                 s->init_num = 0;
@@ -714,7 +740,7 @@ int dtls1_accept(SSL *s)
 #ifndef OPENSSL_NO_TLSEXT
         case SSL3_ST_SW_SESSION_TICKET_A:
         case SSL3_ST_SW_SESSION_TICKET_B:
-            ret = dtls1_send_newsession_ticket(s);
+            ret = ssl3_send_newsession_ticket(s);
             if (ret <= 0)
                 goto end;
             s->state = SSL3_ST_SW_CHANGE_A;
@@ -776,13 +802,13 @@ int dtls1_accept(SSL *s)
 
         case SSL3_ST_SW_FINISHED_A:
         case SSL3_ST_SW_FINISHED_B:
-            ret = dtls1_send_finished(s,
-                                      SSL3_ST_SW_FINISHED_A,
-                                      SSL3_ST_SW_FINISHED_B,
-                                      s->method->
-                                      ssl3_enc->server_finished_label,
-                                      s->method->
-                                      ssl3_enc->server_finished_label_len);
+            ret = ssl3_send_finished(s,
+                                     SSL3_ST_SW_FINISHED_A,
+                                     SSL3_ST_SW_FINISHED_B,
+                                     s->method->
+                                     ssl3_enc->server_finished_label,
+                                     s->method->
+                                     ssl3_enc->server_finished_label_len);
             if (ret <= 0)
                 goto end;
             s->state = SSL3_ST_SW_FLUSH;
@@ -889,29 +915,6 @@ int dtls1_accept(SSL *s)
     return (ret);
 }
 
-int dtls1_send_hello_request(SSL *s)
-{
-    unsigned char *p;
-
-    if (s->state == SSL3_ST_SW_HELLO_REQ_A) {
-        p = (unsigned char *)s->init_buf->data;
-        p = dtls1_set_message_header(s, p, SSL3_MT_HELLO_REQUEST, 0, 0, 0);
-
-        s->state = SSL3_ST_SW_HELLO_REQ_B;
-        /* number of bytes to write */
-        s->init_num = DTLS1_HM_HEADER_LENGTH;
-        s->init_off = 0;
-
-        /*
-         * no need to buffer this message, since there are no retransmit
-         * requests for it
-         */
-    }
-
-    /* SSL3_ST_SW_HELLO_REQ_B */
-    return (dtls1_do_write(s, SSL3_RT_HANDSHAKE));
-}
-
 int dtls1_send_hello_verify_request(SSL *s)
 {
     unsigned int msg_len;
@@ -921,8 +924,9 @@ int dtls1_send_hello_verify_request(SSL *s)
         buf = (unsigned char *)s->init_buf->data;
 
         msg = p = &(buf[DTLS1_HM_HEADER_LENGTH]);
-        *(p++) = s->version >> 8;
-        *(p++) = s->version & 0xFF;
+        /* Always use DTLS 1.0 version: see RFC 6347 */
+        *(p++) = DTLS1_VERSION >> 8;
+        *(p++) = DTLS1_VERSION & 0xFF;
 
         if (s->ctx->app_gen_cookie_cb == NULL ||
             s->ctx->app_gen_cookie_cb(s, s->d1->cookie,
@@ -951,788 +955,3 @@ int dtls1_send_hello_verify_request(SSL *s)
     /* s->state = DTLS1_ST_SW_HELLO_VERIFY_REQUEST_B */
     return (dtls1_do_write(s, SSL3_RT_HANDSHAKE));
 }
-
-int dtls1_send_server_hello(SSL *s)
-{
-    unsigned char *buf;
-    unsigned char *p, *d;
-    int i;
-    unsigned int sl;
-    unsigned long l;
-
-    if (s->state == SSL3_ST_SW_SRVR_HELLO_A) {
-        buf = (unsigned char *)s->init_buf->data;
-        p = s->s3->server_random;
-        ssl_fill_hello_random(s, 1, p, SSL3_RANDOM_SIZE);
-        /* Do the message type and length last */
-        d = p = &(buf[DTLS1_HM_HEADER_LENGTH]);
-
-        *(p++) = s->version >> 8;
-        *(p++) = s->version & 0xff;
-
-        /* Random stuff */
-        memcpy(p, s->s3->server_random, SSL3_RANDOM_SIZE);
-        p += SSL3_RANDOM_SIZE;
-
-        /*
-         * now in theory we have 3 options to sending back the session id.
-         * If it is a re-use, we send back the old session-id, if it is a new
-         * session, we send back the new session-id or we send back a 0
-         * length session-id if we want it to be single use. Currently I will
-         * not implement the '0' length session-id 12-Jan-98 - I'll now
-         * support the '0' length stuff.
-         */
-        if (!(s->ctx->session_cache_mode & SSL_SESS_CACHE_SERVER))
-            s->session->session_id_length = 0;
-
-        sl = s->session->session_id_length;
-        if (sl > sizeof s->session->session_id) {
-            SSLerr(SSL_F_DTLS1_SEND_SERVER_HELLO, ERR_R_INTERNAL_ERROR);
-            return -1;
-        }
-        *(p++) = sl;
-        memcpy(p, s->session->session_id, sl);
-        p += sl;
-
-        /* put the cipher */
-        if (s->s3->tmp.new_cipher == NULL)
-            return -1;
-        i = ssl3_put_cipher_by_char(s->s3->tmp.new_cipher, p);
-        p += i;
-
-        /* put the compression method */
-#ifdef OPENSSL_NO_COMP
-        *(p++) = 0;
-#else
-        if (s->s3->tmp.new_compression == NULL)
-            *(p++) = 0;
-        else
-            *(p++) = s->s3->tmp.new_compression->id;
-#endif
-
-#ifndef OPENSSL_NO_TLSEXT
-        if (ssl_prepare_serverhello_tlsext(s) <= 0) {
-            SSLerr(SSL_F_DTLS1_SEND_SERVER_HELLO, SSL_R_SERVERHELLO_TLSEXT);
-            return -1;
-        }
-        if ((p =
-             ssl_add_serverhello_tlsext(s, p,
-                                        buf + SSL3_RT_MAX_PLAIN_LENGTH)) ==
-            NULL) {
-            SSLerr(SSL_F_DTLS1_SEND_SERVER_HELLO, ERR_R_INTERNAL_ERROR);
-            return -1;
-        }
-#endif
-
-        /* do the header */
-        l = (p - d);
-        d = buf;
-
-        d = dtls1_set_message_header(s, d, SSL3_MT_SERVER_HELLO, l, 0, l);
-
-        s->state = SSL3_ST_SW_SRVR_HELLO_B;
-        /* number of bytes to write */
-        s->init_num = p - buf;
-        s->init_off = 0;
-
-        /* buffer the message to handle re-xmits */
-        dtls1_buffer_message(s, 0);
-    }
-
-    /* SSL3_ST_SW_SRVR_HELLO_B */
-    return (dtls1_do_write(s, SSL3_RT_HANDSHAKE));
-}
-
-int dtls1_send_server_done(SSL *s)
-{
-    unsigned char *p;
-
-    if (s->state == SSL3_ST_SW_SRVR_DONE_A) {
-        p = (unsigned char *)s->init_buf->data;
-
-        /* do the header */
-        p = dtls1_set_message_header(s, p, SSL3_MT_SERVER_DONE, 0, 0, 0);
-
-        s->state = SSL3_ST_SW_SRVR_DONE_B;
-        /* number of bytes to write */
-        s->init_num = DTLS1_HM_HEADER_LENGTH;
-        s->init_off = 0;
-
-        /* buffer the message to handle re-xmits */
-        dtls1_buffer_message(s, 0);
-    }
-
-    /* SSL3_ST_SW_SRVR_DONE_B */
-    return (dtls1_do_write(s, SSL3_RT_HANDSHAKE));
-}
-
-int dtls1_send_server_key_exchange(SSL *s)
-{
-#ifndef OPENSSL_NO_RSA
-    unsigned char *q;
-    int j, num;
-    RSA *rsa;
-    unsigned char md_buf[MD5_DIGEST_LENGTH + SHA_DIGEST_LENGTH];
-    unsigned int u;
-#endif
-#ifndef OPENSSL_NO_DH
-    DH *dh = NULL, *dhp;
-#endif
-#ifndef OPENSSL_NO_ECDH
-    EC_KEY *ecdh = NULL, *ecdhp;
-    unsigned char *encodedPoint = NULL;
-    int encodedlen = 0;
-    int curve_id = 0;
-    BN_CTX *bn_ctx = NULL;
-#endif
-    EVP_PKEY *pkey;
-    unsigned char *p, *d;
-    int al, i;
-    unsigned long type;
-    int n;
-    CERT *cert;
-    BIGNUM *r[4];
-    int nr[4], kn;
-    BUF_MEM *buf;
-    EVP_MD_CTX md_ctx;
-
-    EVP_MD_CTX_init(&md_ctx);
-    if (s->state == SSL3_ST_SW_KEY_EXCH_A) {
-        type = s->s3->tmp.new_cipher->algorithm_mkey;
-        cert = s->cert;
-
-        buf = s->init_buf;
-
-        r[0] = r[1] = r[2] = r[3] = NULL;
-        n = 0;
-#ifndef OPENSSL_NO_RSA
-        if (type & SSL_kRSA) {
-            rsa = cert->rsa_tmp;
-            if ((rsa == NULL) && (s->cert->rsa_tmp_cb != NULL)) {
-                rsa = s->cert->rsa_tmp_cb(s,
-                                          SSL_C_IS_EXPORT(s->s3->
-                                                          tmp.new_cipher),
-                                          SSL_C_EXPORT_PKEYLENGTH(s->s3->
-                                                                  tmp.new_cipher));
-                if (rsa == NULL) {
-                    al = SSL_AD_HANDSHAKE_FAILURE;
-                    SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE,
-                           SSL_R_ERROR_GENERATING_TMP_RSA_KEY);
-                    goto f_err;
-                }
-                RSA_up_ref(rsa);
-                cert->rsa_tmp = rsa;
-            }
-            if (rsa == NULL) {
-                al = SSL_AD_HANDSHAKE_FAILURE;
-                SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE,
-                       SSL_R_MISSING_TMP_RSA_KEY);
-                goto f_err;
-            }
-            r[0] = rsa->n;
-            r[1] = rsa->e;
-            s->s3->tmp.use_rsa_tmp = 1;
-        } else
-#endif
-#ifndef OPENSSL_NO_DH
-        if (type & SSL_kEDH) {
-            dhp = cert->dh_tmp;
-            if ((dhp == NULL) && (s->cert->dh_tmp_cb != NULL))
-                dhp = s->cert->dh_tmp_cb(s,
-                                         SSL_C_IS_EXPORT(s->s3->
-                                                         tmp.new_cipher),
-                                         SSL_C_EXPORT_PKEYLENGTH(s->s3->
-                                                                 tmp.new_cipher));
-            if (dhp == NULL) {
-                al = SSL_AD_HANDSHAKE_FAILURE;
-                SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE,
-                       SSL_R_MISSING_TMP_DH_KEY);
-                goto f_err;
-            }
-
-            if (s->s3->tmp.dh != NULL) {
-                DH_free(dh);
-                SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE,
-                       ERR_R_INTERNAL_ERROR);
-                goto err;
-            }
-
-            if ((dh = DHparams_dup(dhp)) == NULL) {
-                SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE, ERR_R_DH_LIB);
-                goto err;
-            }
-
-            s->s3->tmp.dh = dh;
-            if ((dhp->pub_key == NULL ||
-                 dhp->priv_key == NULL ||
-                 (s->options & SSL_OP_SINGLE_DH_USE))) {
-                if (!DH_generate_key(dh)) {
-                    SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE,
-                           ERR_R_DH_LIB);
-                    goto err;
-                }
-            } else {
-                dh->pub_key = BN_dup(dhp->pub_key);
-                dh->priv_key = BN_dup(dhp->priv_key);
-                if ((dh->pub_key == NULL) || (dh->priv_key == NULL)) {
-                    SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE,
-                           ERR_R_DH_LIB);
-                    goto err;
-                }
-            }
-            r[0] = dh->p;
-            r[1] = dh->g;
-            r[2] = dh->pub_key;
-        } else
-#endif
-#ifndef OPENSSL_NO_ECDH
-        if (type & SSL_kEECDH) {
-            const EC_GROUP *group;
-
-            ecdhp = cert->ecdh_tmp;
-            if ((ecdhp == NULL) && (s->cert->ecdh_tmp_cb != NULL)) {
-                ecdhp = s->cert->ecdh_tmp_cb(s,
-                                             SSL_C_IS_EXPORT(s->s3->
-                                                             tmp.new_cipher),
-                                             SSL_C_EXPORT_PKEYLENGTH(s->
-                                                                     s3->tmp.new_cipher));
-            }
-            if (ecdhp == NULL) {
-                al = SSL_AD_HANDSHAKE_FAILURE;
-                SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE,
-                       SSL_R_MISSING_TMP_ECDH_KEY);
-                goto f_err;
-            }
-
-            if (s->s3->tmp.ecdh != NULL) {
-                EC_KEY_free(s->s3->tmp.ecdh);
-                SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE,
-                       ERR_R_INTERNAL_ERROR);
-                goto err;
-            }
-
-            /* Duplicate the ECDH structure. */
-            if (ecdhp == NULL) {
-                SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE, ERR_R_ECDH_LIB);
-                goto err;
-            }
-            if ((ecdh = EC_KEY_dup(ecdhp)) == NULL) {
-                SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE, ERR_R_ECDH_LIB);
-                goto err;
-            }
-
-            s->s3->tmp.ecdh = ecdh;
-            if ((EC_KEY_get0_public_key(ecdh) == NULL) ||
-                (EC_KEY_get0_private_key(ecdh) == NULL) ||
-                (s->options & SSL_OP_SINGLE_ECDH_USE)) {
-                if (!EC_KEY_generate_key(ecdh)) {
-                    SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE,
-                           ERR_R_ECDH_LIB);
-                    goto err;
-                }
-            }
-
-            if (((group = EC_KEY_get0_group(ecdh)) == NULL) ||
-                (EC_KEY_get0_public_key(ecdh) == NULL) ||
-                (EC_KEY_get0_private_key(ecdh) == NULL)) {
-                SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE, ERR_R_ECDH_LIB);
-                goto err;
-            }
-
-            if (SSL_C_IS_EXPORT(s->s3->tmp.new_cipher) &&
-                (EC_GROUP_get_degree(group) > 163)) {
-                SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE,
-                       SSL_R_ECGROUP_TOO_LARGE_FOR_CIPHER);
-                goto err;
-            }
-
-            /*
-             * XXX: For now, we only support ephemeral ECDH keys over named
-             * (not generic) curves. For supported named curves, curve_id is
-             * non-zero.
-             */
-            if ((curve_id =
-                 tls1_ec_nid2curve_id(EC_GROUP_get_curve_name(group)))
-                == 0) {
-                SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE,
-                       SSL_R_UNSUPPORTED_ELLIPTIC_CURVE);
-                goto err;
-            }
-
-            /*
-             * Encode the public key. First check the size of encoding and
-             * allocate memory accordingly.
-             */
-            encodedlen = EC_POINT_point2oct(group,
-                                            EC_KEY_get0_public_key(ecdh),
-                                            POINT_CONVERSION_UNCOMPRESSED,
-                                            NULL, 0, NULL);
-
-            encodedPoint = (unsigned char *)
-                OPENSSL_malloc(encodedlen * sizeof(unsigned char));
-            bn_ctx = BN_CTX_new();
-            if ((encodedPoint == NULL) || (bn_ctx == NULL)) {
-                SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE,
-                       ERR_R_MALLOC_FAILURE);
-                goto err;
-            }
-
-            encodedlen = EC_POINT_point2oct(group,
-                                            EC_KEY_get0_public_key(ecdh),
-                                            POINT_CONVERSION_UNCOMPRESSED,
-                                            encodedPoint, encodedlen, bn_ctx);
-
-            if (encodedlen == 0) {
-                SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE, ERR_R_ECDH_LIB);
-                goto err;
-            }
-
-            BN_CTX_free(bn_ctx);
-            bn_ctx = NULL;
-
-            /*
-             * XXX: For now, we only support named (not generic) curves in
-             * ECDH ephemeral key exchanges. In this situation, we need four
-             * additional bytes to encode the entire ServerECDHParams
-             * structure.
-             */
-            n = 4 + encodedlen;
-
-            /*
-             * We'll generate the serverKeyExchange message explicitly so we
-             * can set these to NULLs
-             */
-            r[0] = NULL;
-            r[1] = NULL;
-            r[2] = NULL;
-            r[3] = NULL;
-        } else
-#endif                          /* !OPENSSL_NO_ECDH */
-#ifndef OPENSSL_NO_PSK
-        if (type & SSL_kPSK) {
-            /*
-             * reserve size for record length and PSK identity hint
-             */
-            n += 2 + strlen(s->ctx->psk_identity_hint);
-        } else
-#endif                          /* !OPENSSL_NO_PSK */
-        {
-            al = SSL_AD_HANDSHAKE_FAILURE;
-            SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE,
-                   SSL_R_UNKNOWN_KEY_EXCHANGE_TYPE);
-            goto f_err;
-        }
-        for (i = 0; r[i] != NULL; i++) {
-            nr[i] = BN_num_bytes(r[i]);
-            n += 2 + nr[i];
-        }
-
-        if (!(s->s3->tmp.new_cipher->algorithm_auth & SSL_aNULL)
-            && !(s->s3->tmp.new_cipher->algorithm_mkey & SSL_kPSK)) {
-            if ((pkey = ssl_get_sign_pkey(s, s->s3->tmp.new_cipher, NULL))
-                == NULL) {
-                al = SSL_AD_DECODE_ERROR;
-                goto f_err;
-            }
-            kn = EVP_PKEY_size(pkey);
-        } else {
-            pkey = NULL;
-            kn = 0;
-        }
-
-        if (!BUF_MEM_grow_clean(buf, n + DTLS1_HM_HEADER_LENGTH + kn)) {
-            SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE, ERR_LIB_BUF);
-            goto err;
-        }
-        d = (unsigned char *)s->init_buf->data;
-        p = &(d[DTLS1_HM_HEADER_LENGTH]);
-
-        for (i = 0; r[i] != NULL; i++) {
-            s2n(nr[i], p);
-            BN_bn2bin(r[i], p);
-            p += nr[i];
-        }
-
-#ifndef OPENSSL_NO_ECDH
-        if (type & SSL_kEECDH) {
-            /*
-             * XXX: For now, we only support named (not generic) curves. In
-             * this situation, the serverKeyExchange message has: [1 byte
-             * CurveType], [2 byte CurveName] [1 byte length of encoded
-             * point], followed by the actual encoded point itself
-             */
-            *p = NAMED_CURVE_TYPE;
-            p += 1;
-            *p = 0;
-            p += 1;
-            *p = curve_id;
-            p += 1;
-            *p = encodedlen;
-            p += 1;
-            memcpy((unsigned char *)p,
-                   (unsigned char *)encodedPoint, encodedlen);
-            OPENSSL_free(encodedPoint);
-            encodedPoint = NULL;
-            p += encodedlen;
-        }
-#endif
-
-#ifndef OPENSSL_NO_PSK
-        if (type & SSL_kPSK) {
-            /* copy PSK identity hint */
-            s2n(strlen(s->ctx->psk_identity_hint), p);
-            strncpy((char *)p, s->ctx->psk_identity_hint,
-                    strlen(s->ctx->psk_identity_hint));
-            p += strlen(s->ctx->psk_identity_hint);
-        }
-#endif
-
-        /* not anonymous */
-        if (pkey != NULL) {
-            /*
-             * n is the length of the params, they start at
-             * &(d[DTLS1_HM_HEADER_LENGTH]) and p points to the space at the
-             * end.
-             */
-#ifndef OPENSSL_NO_RSA
-            if (pkey->type == EVP_PKEY_RSA) {
-                q = md_buf;
-                j = 0;
-                for (num = 2; num > 0; num--) {
-                    EVP_DigestInit_ex(&md_ctx, (num == 2)
-                                      ? s->ctx->md5 : s->ctx->sha1, NULL);
-                    EVP_DigestUpdate(&md_ctx, &(s->s3->client_random[0]),
-                                     SSL3_RANDOM_SIZE);
-                    EVP_DigestUpdate(&md_ctx, &(s->s3->server_random[0]),
-                                     SSL3_RANDOM_SIZE);
-                    EVP_DigestUpdate(&md_ctx, &(d[DTLS1_HM_HEADER_LENGTH]),
-                                     n);
-                    EVP_DigestFinal_ex(&md_ctx, q, (unsigned int *)&i);
-                    q += i;
-                    j += i;
-                }
-                if (RSA_sign(NID_md5_sha1, md_buf, j,
-                             &(p[2]), &u, pkey->pkey.rsa) <= 0) {
-                    SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE, ERR_LIB_RSA);
-                    goto err;
-                }
-                s2n(u, p);
-                n += u + 2;
-            } else
-#endif
-#if !defined(OPENSSL_NO_DSA)
-            if (pkey->type == EVP_PKEY_DSA) {
-                /* lets do DSS */
-                EVP_SignInit_ex(&md_ctx, EVP_dss1(), NULL);
-                EVP_SignUpdate(&md_ctx, &(s->s3->client_random[0]),
-                               SSL3_RANDOM_SIZE);
-                EVP_SignUpdate(&md_ctx, &(s->s3->server_random[0]),
-                               SSL3_RANDOM_SIZE);
-                EVP_SignUpdate(&md_ctx, &(d[DTLS1_HM_HEADER_LENGTH]), n);
-                if (!EVP_SignFinal(&md_ctx, &(p[2]),
-                                   (unsigned int *)&i, pkey)) {
-                    SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE, ERR_LIB_DSA);
-                    goto err;
-                }
-                s2n(i, p);
-                n += i + 2;
-            } else
-#endif
-#if !defined(OPENSSL_NO_ECDSA)
-            if (pkey->type == EVP_PKEY_EC) {
-                /* let's do ECDSA */
-                EVP_SignInit_ex(&md_ctx, EVP_ecdsa(), NULL);
-                EVP_SignUpdate(&md_ctx, &(s->s3->client_random[0]),
-                               SSL3_RANDOM_SIZE);
-                EVP_SignUpdate(&md_ctx, &(s->s3->server_random[0]),
-                               SSL3_RANDOM_SIZE);
-                EVP_SignUpdate(&md_ctx, &(d[DTLS1_HM_HEADER_LENGTH]), n);
-                if (!EVP_SignFinal(&md_ctx, &(p[2]),
-                                   (unsigned int *)&i, pkey)) {
-                    SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE,
-                           ERR_LIB_ECDSA);
-                    goto err;
-                }
-                s2n(i, p);
-                n += i + 2;
-            } else
-#endif
-            {
-                /* Is this error check actually needed? */
-                al = SSL_AD_HANDSHAKE_FAILURE;
-                SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE,
-                       SSL_R_UNKNOWN_PKEY_TYPE);
-                goto f_err;
-            }
-        }
-
-        d = dtls1_set_message_header(s, d,
-                                     SSL3_MT_SERVER_KEY_EXCHANGE, n, 0, n);
-
-        /*
-         * we should now have things packed up, so lets send it off
-         */
-        s->init_num = n + DTLS1_HM_HEADER_LENGTH;
-        s->init_off = 0;
-
-        /* buffer the message to handle re-xmits */
-        dtls1_buffer_message(s, 0);
-    }
-
-    s->state = SSL3_ST_SW_KEY_EXCH_B;
-    EVP_MD_CTX_cleanup(&md_ctx);
-    return (dtls1_do_write(s, SSL3_RT_HANDSHAKE));
- f_err:
-    ssl3_send_alert(s, SSL3_AL_FATAL, al);
- err:
-#ifndef OPENSSL_NO_ECDH
-    if (encodedPoint != NULL)
-        OPENSSL_free(encodedPoint);
-    BN_CTX_free(bn_ctx);
-#endif
-    EVP_MD_CTX_cleanup(&md_ctx);
-    return (-1);
-}
-
-int dtls1_send_certificate_request(SSL *s)
-{
-    unsigned char *p, *d;
-    int i, j, nl, off, n;
-    STACK_OF(X509_NAME) *sk = NULL;
-    X509_NAME *name;
-    BUF_MEM *buf;
-    unsigned int msg_len;
-
-    if (s->state == SSL3_ST_SW_CERT_REQ_A) {
-        buf = s->init_buf;
-
-        d = p = (unsigned char *)&(buf->data[DTLS1_HM_HEADER_LENGTH]);
-
-        /* get the list of acceptable cert types */
-        p++;
-        n = ssl3_get_req_cert_type(s, p);
-        d[0] = n;
-        p += n;
-        n++;
-
-        off = n;
-        p += 2;
-        n += 2;
-
-        sk = SSL_get_client_CA_list(s);
-        nl = 0;
-        if (sk != NULL) {
-            for (i = 0; i < sk_X509_NAME_num(sk); i++) {
-                name = sk_X509_NAME_value(sk, i);
-                j = i2d_X509_NAME(name, NULL);
-                if (!BUF_MEM_grow_clean
-                    (buf, DTLS1_HM_HEADER_LENGTH + n + j + 2)) {
-                    SSLerr(SSL_F_DTLS1_SEND_CERTIFICATE_REQUEST,
-                           ERR_R_BUF_LIB);
-                    goto err;
-                }
-                p = (unsigned char *)&(buf->data[DTLS1_HM_HEADER_LENGTH + n]);
-                if (!(s->options & SSL_OP_NETSCAPE_CA_DN_BUG)) {
-                    s2n(j, p);
-                    i2d_X509_NAME(name, &p);
-                    n += 2 + j;
-                    nl += 2 + j;
-                } else {
-                    d = p;
-                    i2d_X509_NAME(name, &p);
-                    j -= 2;
-                    s2n(j, d);
-                    j += 2;
-                    n += j;
-                    nl += j;
-                }
-            }
-        }
-        /* else no CA names */
-        p = (unsigned char *)&(buf->data[DTLS1_HM_HEADER_LENGTH + off]);
-        s2n(nl, p);
-
-        d = (unsigned char *)buf->data;
-        *(d++) = SSL3_MT_CERTIFICATE_REQUEST;
-        l2n3(n, d);
-        s2n(s->d1->handshake_write_seq, d);
-        s->d1->handshake_write_seq++;
-
-        /*
-         * we should now have things packed up, so lets send it off
-         */
-
-        s->init_num = n + DTLS1_HM_HEADER_LENGTH;
-        s->init_off = 0;
-#ifdef NETSCAPE_HANG_BUG
-/* XXX: what to do about this? */
-        p = (unsigned char *)s->init_buf->data + s->init_num;
-
-        /* do the header */
-        *(p++) = SSL3_MT_SERVER_DONE;
-        *(p++) = 0;
-        *(p++) = 0;
-        *(p++) = 0;
-        s->init_num += 4;
-#endif
-
-        /* XDTLS:  set message header ? */
-        msg_len = s->init_num - DTLS1_HM_HEADER_LENGTH;
-        dtls1_set_message_header(s, (void *)s->init_buf->data,
-                                 SSL3_MT_CERTIFICATE_REQUEST, msg_len, 0,
-                                 msg_len);
-
-        /* buffer the message to handle re-xmits */
-        dtls1_buffer_message(s, 0);
-
-        s->state = SSL3_ST_SW_CERT_REQ_B;
-    }
-
-    /* SSL3_ST_SW_CERT_REQ_B */
-    return (dtls1_do_write(s, SSL3_RT_HANDSHAKE));
- err:
-    return (-1);
-}
-
-int dtls1_send_server_certificate(SSL *s)
-{
-    unsigned long l;
-    X509 *x;
-
-    if (s->state == SSL3_ST_SW_CERT_A) {
-        x = ssl_get_server_send_cert(s);
-        if (x == NULL) {
-            /* VRS: allow null cert if auth == KRB5 */
-            if ((s->s3->tmp.new_cipher->algorithm_mkey != SSL_kKRB5) ||
-                (s->s3->tmp.new_cipher->algorithm_auth != SSL_aKRB5)) {
-                SSLerr(SSL_F_DTLS1_SEND_SERVER_CERTIFICATE,
-                       ERR_R_INTERNAL_ERROR);
-                return (0);
-            }
-        }
-
-        l = dtls1_output_cert_chain(s, x);
-        if (!l) {
-            SSLerr(SSL_F_DTLS1_SEND_SERVER_CERTIFICATE, ERR_R_INTERNAL_ERROR);
-            return (0);
-        }
-        s->state = SSL3_ST_SW_CERT_B;
-        s->init_num = (int)l;
-        s->init_off = 0;
-
-        /* buffer the message to handle re-xmits */
-        dtls1_buffer_message(s, 0);
-    }
-
-    /* SSL3_ST_SW_CERT_B */
-    return (dtls1_do_write(s, SSL3_RT_HANDSHAKE));
-}
-
-#ifndef OPENSSL_NO_TLSEXT
-int dtls1_send_newsession_ticket(SSL *s)
-{
-    if (s->state == SSL3_ST_SW_SESSION_TICKET_A) {
-        unsigned char *p, *senc, *macstart;
-        int len, slen;
-        unsigned int hlen, msg_len;
-        EVP_CIPHER_CTX ctx;
-        HMAC_CTX hctx;
-        SSL_CTX *tctx = s->initial_ctx;
-        unsigned char iv[EVP_MAX_IV_LENGTH];
-        unsigned char key_name[16];
-
-        /* get session encoding length */
-        slen = i2d_SSL_SESSION(s->session, NULL);
-        /*
-         * Some length values are 16 bits, so forget it if session is too
-         * long
-         */
-        if (slen > 0xFF00)
-            return -1;
-        /*
-         * Grow buffer if need be: the length calculation is as follows 12
-         * (DTLS handshake message header) + 4 (ticket lifetime hint) + 2
-         * (ticket length) + 16 (key name) + max_iv_len (iv length) +
-         * session_length + max_enc_block_size (max encrypted session length)
-         * + max_md_size (HMAC).
-         */
-        if (!BUF_MEM_grow(s->init_buf,
-                          DTLS1_HM_HEADER_LENGTH + 22 + EVP_MAX_IV_LENGTH +
-                          EVP_MAX_BLOCK_LENGTH + EVP_MAX_MD_SIZE + slen))
-            return -1;
-        senc = OPENSSL_malloc(slen);
-        if (!senc)
-            return -1;
-        p = senc;
-        i2d_SSL_SESSION(s->session, &p);
-
-        p = (unsigned char *)&(s->init_buf->data[DTLS1_HM_HEADER_LENGTH]);
-        EVP_CIPHER_CTX_init(&ctx);
-        HMAC_CTX_init(&hctx);
-        /*
-         * Initialize HMAC and cipher contexts. If callback present it does
-         * all the work otherwise use generated values from parent ctx.
-         */
-        if (tctx->tlsext_ticket_key_cb) {
-            if (tctx->tlsext_ticket_key_cb(s, key_name, iv, &ctx,
-                                           &hctx, 1) < 0) {
-                OPENSSL_free(senc);
-                return -1;
-            }
-        } else {
-            RAND_pseudo_bytes(iv, 16);
-            EVP_EncryptInit_ex(&ctx, EVP_aes_128_cbc(), NULL,
-                               tctx->tlsext_tick_aes_key, iv);
-            HMAC_Init_ex(&hctx, tctx->tlsext_tick_hmac_key, 16,
-                         tlsext_tick_md(), NULL);
-            memcpy(key_name, tctx->tlsext_tick_key_name, 16);
-        }
-        l2n(s->session->tlsext_tick_lifetime_hint, p);
-        /* Skip ticket length for now */
-        p += 2;
-        /* Output key name */
-        macstart = p;
-        memcpy(p, key_name, 16);
-        p += 16;
-        /* output IV */
-        memcpy(p, iv, EVP_CIPHER_CTX_iv_length(&ctx));
-        p += EVP_CIPHER_CTX_iv_length(&ctx);
-        /* Encrypt session data */
-        EVP_EncryptUpdate(&ctx, p, &len, senc, slen);
-        p += len;
-        EVP_EncryptFinal(&ctx, p, &len);
-        p += len;
-        EVP_CIPHER_CTX_cleanup(&ctx);
-
-        HMAC_Update(&hctx, macstart, p - macstart);
-        HMAC_Final(&hctx, p, &hlen);
-        HMAC_CTX_cleanup(&hctx);
-
-        p += hlen;
-        /* Now write out lengths: p points to end of data written */
-        /* Total length */
-        len = p - (unsigned char *)(s->init_buf->data);
-        /* Ticket length */
-        p = (unsigned char *)&(s->init_buf->data[DTLS1_HM_HEADER_LENGTH]) + 4;
-        s2n(len - DTLS1_HM_HEADER_LENGTH - 6, p);
-
-        /* number of bytes to write */
-        s->init_num = len;
-        s->state = SSL3_ST_SW_SESSION_TICKET_B;
-        s->init_off = 0;
-        OPENSSL_free(senc);
-
-        /* XDTLS:  set message header ? */
-        msg_len = s->init_num - DTLS1_HM_HEADER_LENGTH;
-        dtls1_set_message_header(s, (void *)s->init_buf->data,
-                                 SSL3_MT_NEWSESSION_TICKET, msg_len, 0,
-                                 msg_len);
-
-        /* buffer the message to handle re-xmits */
-        dtls1_buffer_message(s, 0);
-    }
-
-    /* SSL3_ST_SW_SESSION_TICKET_B */
-    return (dtls1_do_write(s, SSL3_RT_HANDSHAKE));
-}
-#endif
index 8deb299..30bbcf2 100644 (file)
@@ -84,11 +84,15 @@ extern "C" {
 #endif
 
 # define DTLS1_VERSION                   0xFEFF
-# define DTLS_MAX_VERSION                DTLS1_VERSION
+# define DTLS1_2_VERSION                 0xFEFD
+# define DTLS_MAX_VERSION                DTLS1_2_VERSION
 # define DTLS1_VERSION_MAJOR             0xFE
 
 # define DTLS1_BAD_VER                   0x0100
 
+/* Special value for method supporting multiple versions */
+# define DTLS_ANY_VERSION                0x1FFFF
+
 # if 0
 /* this alert description is not specified anywhere... */
 #  define DTLS1_AD_MISSING_HANDSHAKE_MESSAGE    110
index 3cec8b1..7623c36 100644 (file)
@@ -272,8 +272,8 @@ static int test_dtls1_not_bleeding()
 {
     SETUP_HEARTBEAT_TEST_FIXTURE(dtls);
     /* Three-byte pad at the beginning for type and payload length */
-    unsigned char payload_buf[] = "   Not bleeding, sixteen spaces of padding"
-        "                ";
+    unsigned char payload_buf[MAX_PRINTABLE_CHARACTERS + 4] =
+        "   Not bleeding, sixteen spaces of padding" "                ";
     const int payload_buf_len = honest_payload_size(payload_buf);
 
     fixture.payload = &payload_buf[0];
@@ -294,9 +294,9 @@ static int test_dtls1_not_bleeding_empty_payload()
      * Three-byte pad at the beginning for type and payload length, plus a
      * NUL at the end
      */
-    unsigned char payload_buf[4 + MIN_PADDING_SIZE];
-    memset(payload_buf, ' ', sizeof(payload_buf));
-    payload_buf[sizeof(payload_buf) - 1] = '\0';
+    unsigned char payload_buf[4 + MAX_PRINTABLE_CHARACTERS];
+    memset(payload_buf, ' ', MIN_PADDING_SIZE + 3);
+    payload_buf[MIN_PADDING_SIZE + 3] = '\0';
     payload_buf_len = honest_payload_size(payload_buf);
 
     fixture.payload = &payload_buf[0];
@@ -311,7 +311,8 @@ static int test_dtls1_heartbleed()
 {
     SETUP_HEARTBEAT_TEST_FIXTURE(dtls);
     /* Three-byte pad at the beginning for type and payload length */
-    unsigned char payload_buf[] = "   HEARTBLEED                ";
+    unsigned char payload_buf[4 + MAX_PRINTABLE_CHARACTERS] =
+        "   HEARTBLEED                ";
 
     fixture.payload = &payload_buf[0];
     fixture.sent_payload_len = MAX_PRINTABLE_CHARACTERS;
@@ -328,9 +329,9 @@ static int test_dtls1_heartbleed_empty_payload()
      * Excluding the NUL at the end, one byte short of type + payload length
      * + minimum padding
      */
-    unsigned char payload_buf[MIN_PADDING_SIZE + 3];
-    memset(payload_buf, ' ', sizeof(payload_buf));
-    payload_buf[sizeof(payload_buf) - 1] = '\0';
+    unsigned char payload_buf[MAX_PRINTABLE_CHARACTERS + 4];
+    memset(payload_buf, ' ', MIN_PADDING_SIZE + 2);
+    payload_buf[MIN_PADDING_SIZE + 2] = '\0';
 
     fixture.payload = &payload_buf[0];
     fixture.sent_payload_len = MAX_PRINTABLE_CHARACTERS;
@@ -363,8 +364,8 @@ static int test_tls1_not_bleeding()
 {
     SETUP_HEARTBEAT_TEST_FIXTURE(tls);
     /* Three-byte pad at the beginning for type and payload length */
-    unsigned char payload_buf[] = "   Not bleeding, sixteen spaces of padding"
-        "                ";
+    unsigned char payload_buf[MAX_PRINTABLE_CHARACTERS + 4] =
+        "   Not bleeding, sixteen spaces of padding" "                ";
     const int payload_buf_len = honest_payload_size(payload_buf);
 
     fixture.payload = &payload_buf[0];
@@ -385,9 +386,9 @@ static int test_tls1_not_bleeding_empty_payload()
      * Three-byte pad at the beginning for type and payload length, plus a
      * NUL at the end
      */
-    unsigned char payload_buf[4 + MIN_PADDING_SIZE];
-    memset(payload_buf, ' ', sizeof(payload_buf));
-    payload_buf[sizeof(payload_buf) - 1] = '\0';
+    unsigned char payload_buf[4 + MAX_PRINTABLE_CHARACTERS];
+    memset(payload_buf, ' ', MIN_PADDING_SIZE + 3);
+    payload_buf[MIN_PADDING_SIZE + 3] = '\0';
     payload_buf_len = honest_payload_size(payload_buf);
 
     fixture.payload = &payload_buf[0];
@@ -402,7 +403,8 @@ static int test_tls1_heartbleed()
 {
     SETUP_HEARTBEAT_TEST_FIXTURE(tls);
     /* Three-byte pad at the beginning for type and payload length */
-    unsigned char payload_buf[] = "   HEARTBLEED                ";
+    unsigned char payload_buf[MAX_PRINTABLE_CHARACTERS + 4] =
+        "   HEARTBLEED                ";
 
     fixture.payload = &payload_buf[0];
     fixture.sent_payload_len = MAX_PRINTABLE_CHARACTERS;
@@ -419,9 +421,9 @@ static int test_tls1_heartbleed_empty_payload()
      * Excluding the NUL at the end, one byte short of type + payload length
      * + minimum padding
      */
-    unsigned char payload_buf[MIN_PADDING_SIZE + 3];
-    memset(payload_buf, ' ', sizeof(payload_buf));
-    payload_buf[sizeof(payload_buf) - 1] = '\0';
+    unsigned char payload_buf[MAX_PRINTABLE_CHARACTERS + 4];
+    memset(payload_buf, ' ', MIN_PADDING_SIZE + 2);
+    payload_buf[MIN_PADDING_SIZE + 2] = '\0';
 
     fixture.payload = &payload_buf[0];
     fixture.sent_payload_len = MAX_PRINTABLE_CHARACTERS;
index 3766567..e4e707c 100644 (file)
@@ -279,7 +279,6 @@ static int ssl23_no_ssl2_ciphers(SSL *s)
 int ssl_fill_hello_random(SSL *s, int server, unsigned char *result, int len)
 {
     int send_time = 0;
-
     if (len < 4)
         return 0;
     if (server)
@@ -303,6 +302,7 @@ static int ssl23_client_hello(SSL *s)
     unsigned long l;
     int ssl2_compat;
     int version = 0, version_major, version_minor;
+    int al = 0;
 #ifndef OPENSSL_NO_COMP
     int j;
     SSL_COMP *comp;
@@ -368,6 +368,8 @@ static int ssl23_client_hello(SSL *s)
             || s->tlsext_opaque_prf_input != NULL)
             ssl2_compat = 0;
 # endif
+        if (s->cert->cli_ext.meths_count != 0)
+            ssl2_compat = 0;
     }
 #endif
 
@@ -387,6 +389,10 @@ static int ssl23_client_hello(SSL *s)
         if (version == TLS1_2_VERSION) {
             version_major = TLS1_2_VERSION_MAJOR;
             version_minor = TLS1_2_VERSION_MINOR;
+        } else if (tls1_suiteb(s)) {
+            SSLerr(SSL_F_SSL23_CLIENT_HELLO,
+                   SSL_R_ONLY_TLS_1_2_ALLOWED_IN_SUITEB_MODE);
+            return -1;
         } else if (version == TLS1_1_VERSION) {
             version_major = TLS1_1_VERSION_MAJOR;
             version_minor = TLS1_1_VERSION_MINOR;
@@ -542,9 +548,9 @@ static int ssl23_client_hello(SSL *s)
             }
             if ((p =
                  ssl_add_clienthello_tlsext(s, p,
-                                            buf +
-                                            SSL3_RT_MAX_PLAIN_LENGTH)) ==
-                NULL) {
+                                            buf + SSL3_RT_MAX_PLAIN_LENGTH,
+                                            &al)) == NULL) {
+                ssl3_send_alert(s, SSL3_AL_FATAL, al);
                 SSLerr(SSL_F_SSL23_CLIENT_HELLO, ERR_R_INTERNAL_ERROR);
                 return -1;
             }
@@ -598,10 +604,13 @@ static int ssl23_client_hello(SSL *s)
         if (ssl2_compat)
             s->msg_callback(1, SSL2_VERSION, 0, s->init_buf->data + 2,
                             ret - 2, s, s->msg_callback_arg);
-        else
+        else {
+            s->msg_callback(1, version, SSL3_RT_HEADER, s->init_buf->data, 5,
+                            s, s->msg_callback_arg);
             s->msg_callback(1, version, SSL3_RT_HANDSHAKE,
                             s->init_buf->data + 5, ret - 5, s,
                             s->msg_callback_arg);
+        }
     }
 
     return ret;
@@ -749,9 +758,12 @@ static int ssl23_get_server_hello(SSL *s)
                 cb(s, SSL_CB_READ_ALERT, j);
             }
 
-            if (s->msg_callback)
+            if (s->msg_callback) {
+                s->msg_callback(0, s->version, SSL3_RT_HEADER, p, 5, s,
+                                s->msg_callback_arg);
                 s->msg_callback(0, s->version, SSL3_RT_ALERT, p + 5, 2, s,
                                 s->msg_callback_arg);
+            }
 
             s->rwstate = SSL_NOTHING;
             SSLerr(SSL_F_SSL23_GET_SERVER_HELLO, SSL_AD_REASON_OFFSET + p[6]);
index 50f98dc..470bd3d 100644 (file)
@@ -402,6 +402,11 @@ int ssl23_get_client_hello(SSL *s)
     /* ensure that TLS_MAX_VERSION is up-to-date */
     OPENSSL_assert(s->version <= TLS_MAX_VERSION);
 
+    if (s->version < TLS1_2_VERSION && tls1_suiteb(s)) {
+        SSLerr(SSL_F_SSL23_GET_CLIENT_HELLO,
+               SSL_R_ONLY_TLS_1_2_ALLOWED_IN_SUITEB_MODE);
+        goto err;
+    }
 #ifdef OPENSSL_FIPS
     if (FIPS_mode() && (s->version < TLS1_VERSION)) {
         SSLerr(SSL_F_SSL23_GET_CLIENT_HELLO,
index b23b083..69da6b1 100644 (file)
@@ -418,19 +418,19 @@ static int get_server_hello(SSL *s)
             return (-1);
         }
     } else {
-# ifdef undef
+# if 0
         /* very bad */
         memset(s->session->session_id, 0,
                SSL_MAX_SSL_SESSION_ID_LENGTH_IN_BYTES);
         s->session->session_id_length = 0;
-        */
 # endif
-            /*
-             * we need to do this in case we were trying to reuse a client
-             * session but others are already reusing it. If this was a new
-             * 'blank' session ID, the session-id length will still be 0
-             */
-            if (s->session->session_id_length > 0) {
+
+        /*
+         * we need to do this in case we were trying to reuse a client
+         * session but others are already reusing it. If this was a new
+         * 'blank' session ID, the session-id length will still be 0
+         */
+        if (s->session->session_id_length > 0) {
             if (!ssl_get_new_session(s, 0)) {
                 ssl2_return_error(s, SSL2_PE_UNDEFINED_ERROR);
                 return (-1);
index 7e3674a..d55b93f 100644 (file)
@@ -435,10 +435,7 @@ const SSL_CIPHER *ssl2_get_cipher_by_char(const unsigned char *p)
         ((unsigned long)p[1] << 8L) | (unsigned long)p[2];
     c.id = id;
     cp = OBJ_bsearch_ssl_cipher_id(&c, ssl2_ciphers, SSL2_NUM_CIPHERS);
-    if ((cp == NULL) || (cp->valid == 0))
-        return NULL;
-    else
-        return cp;
+    return cp;
 }
 
 int ssl2_put_cipher_by_char(const SSL_CIPHER *c, unsigned char *p)
index 107b460..019e21c 100644 (file)
@@ -158,13 +158,12 @@ int ssl3_do_write(SSL *s, int type)
 
 int ssl3_send_finished(SSL *s, int a, int b, const char *sender, int slen)
 {
-    unsigned char *p, *d;
+    unsigned char *p;
     int i;
     unsigned long l;
 
     if (s->state == a) {
-        d = (unsigned char *)s->init_buf->data;
-        p = &(d[4]);
+        p = ssl_handshake_start(s);
 
         i = s->method->ssl3_enc->final_finish_mac(s,
                                                   sender, slen,
@@ -173,7 +172,6 @@ int ssl3_send_finished(SSL *s, int a, int b, const char *sender, int slen)
             return 0;
         s->s3->tmp.finish_md_len = i;
         memcpy(p, s->s3->tmp.finish_md, i);
-        p += i;
         l = i;
 
         /*
@@ -196,17 +194,12 @@ int ssl3_send_finished(SSL *s, int a, int b, const char *sender, int slen)
          */
         l &= 0xffff;
 #endif
-
-        *(d++) = SSL3_MT_FINISHED;
-        l2n3(l, d);
-        s->init_num = (int)l + 4;
-        s->init_off = 0;
-
+        ssl_set_handshake_header(s, SSL3_MT_FINISHED, l);
         s->state = b;
     }
 
     /* SSL3_ST_SEND_xxxxxx_HELLO_B */
-    return (ssl3_do_write(s, SSL3_RT_HANDSHAKE));
+    return ssl_do_write(s);
 }
 
 #ifndef OPENSSL_NO_NEXTPROTONEG
@@ -248,7 +241,7 @@ int ssl3_get_finished(SSL *s, int a, int b)
 #ifdef OPENSSL_NO_NEXTPROTONEG
     /*
      * the mac has already been generated when we received the change cipher
-     * spec message and is in s->s3->tmp.peer_finish_md.
+     * spec message and is in s->s3->tmp.peer_finish_md
      */
 #endif
 
@@ -326,84 +319,20 @@ int ssl3_send_change_cipher_spec(SSL *s, int a, int b)
     return (ssl3_do_write(s, SSL3_RT_CHANGE_CIPHER_SPEC));
 }
 
-static int ssl3_add_cert_to_buf(BUF_MEM *buf, unsigned long *l, X509 *x)
-{
-    int n;
-    unsigned char *p;
-
-    n = i2d_X509(x, NULL);
-    if (!BUF_MEM_grow_clean(buf, (int)(n + (*l) + 3))) {
-        SSLerr(SSL_F_SSL3_ADD_CERT_TO_BUF, ERR_R_BUF_LIB);
-        return (-1);
-    }
-    p = (unsigned char *)&(buf->data[*l]);
-    l2n3(n, p);
-    i2d_X509(x, &p);
-    *l += n + 3;
-
-    return (0);
-}
-
-unsigned long ssl3_output_cert_chain(SSL *s, X509 *x)
+unsigned long ssl3_output_cert_chain(SSL *s, CERT_PKEY *cpk)
 {
     unsigned char *p;
-    int i;
-    unsigned long l = 7;
-    BUF_MEM *buf;
-    int no_chain;
-
-    if ((s->mode & SSL_MODE_NO_AUTO_CHAIN) || s->ctx->extra_certs)
-        no_chain = 1;
-    else
-        no_chain = 0;
+    unsigned long l = 3 + SSL_HM_HEADER_LENGTH(s);
 
-    /* TLSv1 sends a chain with nothing in it, instead of an alert */
-    buf = s->init_buf;
-    if (!BUF_MEM_grow_clean(buf, 10)) {
-        SSLerr(SSL_F_SSL3_OUTPUT_CERT_CHAIN, ERR_R_BUF_LIB);
-        return (0);
-    }
-    if (x != NULL) {
-        if (no_chain) {
-            if (ssl3_add_cert_to_buf(buf, &l, x))
-                return (0);
-        } else {
-            X509_STORE_CTX xs_ctx;
-
-            if (!X509_STORE_CTX_init(&xs_ctx, s->ctx->cert_store, x, NULL)) {
-                SSLerr(SSL_F_SSL3_OUTPUT_CERT_CHAIN, ERR_R_X509_LIB);
-                return (0);
-            }
-            X509_verify_cert(&xs_ctx);
-            /* Don't leave errors in the queue */
-            ERR_clear_error();
-            for (i = 0; i < sk_X509_num(xs_ctx.chain); i++) {
-                x = sk_X509_value(xs_ctx.chain, i);
-
-                if (ssl3_add_cert_to_buf(buf, &l, x)) {
-                    X509_STORE_CTX_cleanup(&xs_ctx);
-                    return 0;
-                }
-            }
-            X509_STORE_CTX_cleanup(&xs_ctx);
-        }
-    }
-    /* Thawte special :-) */
-    for (i = 0; i < sk_X509_num(s->ctx->extra_certs); i++) {
-        x = sk_X509_value(s->ctx->extra_certs, i);
-        if (ssl3_add_cert_to_buf(buf, &l, x))
-            return (0);
-    }
+    if (!ssl_add_cert_chain(s, cpk, &l))
+        return 0;
 
-    l -= 7;
-    p = (unsigned char *)&(buf->data[4]);
+    l -= 3 + SSL_HM_HEADER_LENGTH(s);
+    p = ssl_handshake_start(s);
     l2n3(l, p);
     l += 3;
-    p = (unsigned char *)&(buf->data[0]);
-    *(p++) = SSL3_MT_CERTIFICATE;
-    l2n3(l, p);
-    l += 4;
-    return (l);
+    ssl_set_handshake_header(s, SSL3_MT_CERTIFICATE, l);
+    return l + SSL_HM_HEADER_LENGTH(s);
 }
 
 /*
@@ -477,17 +406,6 @@ long ssl3_get_message(SSL *s, int st1, int stn, int mt, long max, int *ok)
             SSLerr(SSL_F_SSL3_GET_MESSAGE, SSL_R_UNEXPECTED_MESSAGE);
             goto f_err;
         }
-        if ((mt < 0) && (*p == SSL3_MT_CLIENT_HELLO) &&
-            (st1 == SSL3_ST_SR_CERT_A) && (stn == SSL3_ST_SR_CERT_B)) {
-            /*
-             * At this point we have got an MS SGC second client hello (maybe
-             * we should always allow the client to start a new handshake?).
-             * We need to restart the mac. Don't increment
-             * {num,total}_renegotiations because we have not completed the
-             * handshake.
-             */
-            ssl3_init_finished_mac(s);
-        }
 
         s->s3->tmp.message_type = *(p++);
 
@@ -578,7 +496,18 @@ int ssl_cert_type(X509 *x, EVP_PKEY *pkey)
         ret = SSL_PKEY_GOST94;
     } else if (i == NID_id_GostR3410_2001 || i == NID_id_GostR3410_2001_cc) {
         ret = SSL_PKEY_GOST01;
+    } else if (x && (i == EVP_PKEY_DH || i == EVP_PKEY_DHX)) {
+        /*
+         * For DH two cases: DH certificate signed with RSA and DH
+         * certificate signed with DSA.
+         */
+        i = X509_certificate_type(x, pk);
+        if (i & EVP_PKS_RSA)
+            ret = SSL_PKEY_DH_RSA;
+        else if (i & EVP_PKS_DSA)
+            ret = SSL_PKEY_DH_DSA;
     }
+
  err:
     if (!pkey)
         EVP_PKEY_free(pk);
index 2fb71f2..a0edcef 100644 (file)
@@ -125,7 +125,7 @@ int tls1_cbc_remove_padding(const SSL *s,
     unsigned padding_length, good, to_check, i;
     const unsigned overhead = 1 /* padding length byte */  + mac_size;
     /* Check if version requires explicit IV */
-    if (s->version >= TLS1_1_VERSION || s->version == DTLS1_BAD_VER) {
+    if (SSL_USE_EXPLICIT_IV(s)) {
         /*
          * These lengths are all public so we can test them in non-constant
          * time.
index 35ad121..6af145a 100644 (file)
@@ -317,11 +317,11 @@ int ssl3_connect(SSL *s)
                     s->state = SSL3_ST_CR_SESSION_TICKET_A;
                 }
 #endif
-            } else
+            } else {
                 s->state = SSL3_ST_CR_CERT_A;
+            }
             s->init_num = 0;
             break;
-
         case SSL3_ST_CR_CERT_A:
         case SSL3_ST_CR_CERT_B:
 #ifndef OPENSSL_NO_TLSEXT
@@ -675,6 +675,7 @@ int ssl3_client_hello(SSL *s)
     unsigned char *p, *d;
     int i;
     unsigned long l;
+    int al = 0;
 #ifndef OPENSSL_NO_COMP
     int j;
     SSL_COMP *comp;
@@ -697,15 +698,64 @@ int ssl3_client_hello(SSL *s)
             if (!ssl_get_new_session(s, 0))
                 goto err;
         }
+        if (s->method->version == DTLS_ANY_VERSION) {
+            /* Determine which DTLS version to use */
+            int options = s->options;
+            /* If DTLS 1.2 disabled correct the version number */
+            if (options & SSL_OP_NO_DTLSv1_2) {
+                if (tls1_suiteb(s)) {
+                    SSLerr(SSL_F_SSL3_CLIENT_HELLO,
+                           SSL_R_ONLY_DTLS_1_2_ALLOWED_IN_SUITEB_MODE);
+                    goto err;
+                }
+                /*
+                 * Disabling all versions is silly: return an error.
+                 */
+                if (options & SSL_OP_NO_DTLSv1) {
+                    SSLerr(SSL_F_SSL3_CLIENT_HELLO, SSL_R_WRONG_SSL_VERSION);
+                    goto err;
+                }
+                /*
+                 * Update method so we don't use any DTLS 1.2 features.
+                 */
+                s->method = DTLSv1_client_method();
+                s->version = DTLS1_VERSION;
+            } else {
+                /*
+                 * We only support one version: update method
+                 */
+                if (options & SSL_OP_NO_DTLSv1)
+                    s->method = DTLSv1_2_client_method();
+                s->version = DTLS1_2_VERSION;
+            }
+            s->client_version = s->version;
+        }
         /* else use the pre-loaded session */
 
         p = s->s3->client_random;
 
-        if (ssl_fill_hello_random(s, 0, p, SSL3_RANDOM_SIZE) <= 0)
+        /*
+         * for DTLS if client_random is initialized, reuse it, we are
+         * required to use same upon reply to HelloVerify
+         */
+        if (SSL_IS_DTLS(s)) {
+            size_t idx;
+            i = 1;
+            for (idx = 0; idx < sizeof(s->s3->client_random); idx++) {
+                if (p[idx]) {
+                    i = 0;
+                    break;
+                }
+            }
+        } else
+            i = 1;
+
+        if (i && ssl_fill_hello_random(s, 0, p,
+                                       sizeof(s->s3->client_random)) <= 0)
             goto err;
 
         /* Do the message type and length last */
-        d = p = &(buf[4]);
+        d = p = ssl_handshake_start(s);
 
         /*-
          * version indicates the negotiated version: for example from
@@ -765,6 +815,17 @@ int ssl3_client_hello(SSL *s)
             p += i;
         }
 
+        /* cookie stuff for DTLS */
+        if (SSL_IS_DTLS(s)) {
+            if (s->d1->cookie_len > sizeof(s->d1->cookie)) {
+                SSLerr(SSL_F_SSL3_CLIENT_HELLO, ERR_R_INTERNAL_ERROR);
+                goto err;
+            }
+            *(p++) = s->d1->cookie_len;
+            memcpy(p, s->d1->cookie, s->d1->cookie_len);
+            p += s->d1->cookie_len;
+        }
+
         /* Ciphers supported */
         i = ssl_cipher_list_to_bytes(s, SSL_get_ciphers(s), &(p[2]), 0);
         if (i == 0) {
@@ -809,27 +870,21 @@ int ssl3_client_hello(SSL *s)
             goto err;
         }
         if ((p =
-             ssl_add_clienthello_tlsext(s, p,
-                                        buf + SSL3_RT_MAX_PLAIN_LENGTH)) ==
-            NULL) {
+             ssl_add_clienthello_tlsext(s, p, buf + SSL3_RT_MAX_PLAIN_LENGTH,
+                                        &al)) == NULL) {
+            ssl3_send_alert(s, SSL3_AL_FATAL, al);
             SSLerr(SSL_F_SSL3_CLIENT_HELLO, ERR_R_INTERNAL_ERROR);
             goto err;
         }
 #endif
 
-        l = (p - d);
-        d = buf;
-        *(d++) = SSL3_MT_CLIENT_HELLO;
-        l2n3(l, d);
-
+        l = p - d;
+        ssl_set_handshake_header(s, SSL3_MT_CLIENT_HELLO, l);
         s->state = SSL3_ST_CW_CLNT_HELLO_B;
-        /* number of bytes to write */
-        s->init_num = p - buf;
-        s->init_off = 0;
     }
 
     /* SSL3_ST_CW_CLNT_HELLO_B */
-    return (ssl3_do_write(s, SSL3_RT_HANDSHAKE));
+    return ssl_do_write(s);
  err:
     s->state = SSL_ST_ERR;
     return (-1);
@@ -839,13 +894,20 @@ int ssl3_get_server_hello(SSL *s)
 {
     STACK_OF(SSL_CIPHER) *sk;
     const SSL_CIPHER *c;
+    CERT *ct = s->cert;
     unsigned char *p, *d;
-    int i, al, ok;
+    int i, al = SSL_AD_INTERNAL_ERROR, ok;
     unsigned int j;
     long n;
 #ifndef OPENSSL_NO_COMP
     SSL_COMP *comp;
 #endif
+    /*
+     * Hello verify request and/or server hello version may not match so set
+     * first packet if we're negotiating version.
+     */
+    if (SSL_IS_DTLS(s))
+        s->first_packet = 1;
 
     n = s->method->ssl_get_message(s,
                                    SSL3_ST_CR_SRVR_HELLO_A,
@@ -854,7 +916,8 @@ int ssl3_get_server_hello(SSL *s)
     if (!ok)
         return ((int)n);
 
-    if (SSL_version(s) == DTLS1_VERSION || SSL_version(s) == DTLS1_BAD_VER) {
+    if (SSL_IS_DTLS(s)) {
+        s->first_packet = 0;
         if (s->s3->tmp.message_type == DTLS1_MT_HELLO_VERIFY_REQUEST) {
             if (s->d1->send_cookie == 0) {
                 s->s3->tmp.reuse_message = 1;
@@ -875,6 +938,28 @@ int ssl3_get_server_hello(SSL *s)
     }
 
     d = p = (unsigned char *)s->init_msg;
+    if (s->method->version == DTLS_ANY_VERSION) {
+        /* Work out correct protocol version to use */
+        int hversion = (p[0] << 8) | p[1];
+        int options = s->options;
+        if (hversion == DTLS1_2_VERSION && !(options & SSL_OP_NO_DTLSv1_2))
+            s->method = DTLSv1_2_client_method();
+        else if (tls1_suiteb(s)) {
+            SSLerr(SSL_F_SSL3_GET_SERVER_HELLO,
+                   SSL_R_ONLY_DTLS_1_2_ALLOWED_IN_SUITEB_MODE);
+            s->version = hversion;
+            al = SSL_AD_PROTOCOL_VERSION;
+            goto f_err;
+        } else if (hversion == DTLS1_VERSION && !(options & SSL_OP_NO_DTLSv1))
+            s->method = DTLSv1_client_method();
+        else {
+            SSLerr(SSL_F_SSL3_GET_SERVER_HELLO, SSL_R_WRONG_SSL_VERSION);
+            s->version = hversion;
+            al = SSL_AD_PROTOCOL_VERSION;
+            goto f_err;
+        }
+        s->session->ssl_version = s->version = s->method->version;
+    }
 
     if ((p[0] != (s->version >> 8)) || (p[1] != (s->version & 0xff))) {
         SSLerr(SSL_F_SSL3_GET_SERVER_HELLO, SSL_R_WRONG_SSL_VERSION);
@@ -951,7 +1036,6 @@ int ssl3_get_server_hello(SSL *s)
          */
         if (s->session->session_id_length > 0) {
             if (!ssl_get_new_session(s, 0)) {
-                al = SSL_AD_INTERNAL_ERROR;
                 goto f_err;
             }
         }
@@ -966,21 +1050,16 @@ int ssl3_get_server_hello(SSL *s)
         SSLerr(SSL_F_SSL3_GET_SERVER_HELLO, SSL_R_UNKNOWN_CIPHER_RETURNED);
         goto f_err;
     }
-    /* TLS v1.2 only ciphersuites require v1.2 or later */
-    if ((c->algorithm_ssl & SSL_TLSV1_2) &&
-        (TLS1_get_version(s) < TLS1_2_VERSION)) {
-        al = SSL_AD_ILLEGAL_PARAMETER;
-        SSLerr(SSL_F_SSL3_GET_SERVER_HELLO, SSL_R_WRONG_CIPHER_RETURNED);
-        goto f_err;
-    }
-#ifndef OPENSSL_NO_SRP
-    if (((c->algorithm_mkey & SSL_kSRP) || (c->algorithm_auth & SSL_aSRP)) &&
-        !(s->srp_ctx.srp_Mask & SSL_kSRP)) {
+    /*
+     * If it is a disabled cipher we didn't send it in client hello, so
+     * return an error.
+     */
+    if (c->algorithm_ssl & ct->mask_ssl ||
+        c->algorithm_mkey & ct->mask_k || c->algorithm_auth & ct->mask_a) {
         al = SSL_AD_ILLEGAL_PARAMETER;
         SSLerr(SSL_F_SSL3_GET_SERVER_HELLO, SSL_R_WRONG_CIPHER_RETURNED);
         goto f_err;
     }
-#endif                          /* OPENSSL_NO_SRP */
     p += ssl_put_cipher_by_char(s, NULL, NULL);
 
     sk = ssl_get_ciphers_by_id(s);
@@ -1013,14 +1092,11 @@ int ssl3_get_server_hello(SSL *s)
     }
     s->s3->tmp.new_cipher = c;
     /*
-     * Don't digest cached records if TLS v1.2: we may need them for client
+     * Don't digest cached records if no sigalgs: we may need them for client
      * authentication.
      */
-    if (TLS1_get_version(s) < TLS1_2_VERSION
-        && !ssl3_digest_cached_records(s)) {
-        al = SSL_AD_INTERNAL_ERROR;
+    if (!SSL_USE_SIGALGS(s) && !ssl3_digest_cached_records(s))
         goto f_err;
-    }
     /* lets get the compression algorithm */
     /* COMPRESSION */
 #ifdef OPENSSL_NO_COMP
@@ -1035,7 +1111,6 @@ int ssl3_get_server_hello(SSL *s)
      * using compression.
      */
     if (s->session->compress_meth != 0) {
-        al = SSL_AD_INTERNAL_ERROR;
         SSLerr(SSL_F_SSL3_GET_SERVER_HELLO, SSL_R_INCONSISTENT_COMPRESSION);
         goto f_err;
     }
@@ -1068,16 +1143,9 @@ int ssl3_get_server_hello(SSL *s)
 
 #ifndef OPENSSL_NO_TLSEXT
     /* TLS extensions */
-    if (s->version >= SSL3_VERSION) {
-        if (!ssl_parse_serverhello_tlsext(s, &p, d, n, &al)) {
-            /* 'al' set by ssl_parse_serverhello_tlsext */
-            SSLerr(SSL_F_SSL3_GET_SERVER_HELLO, SSL_R_PARSE_TLSEXT);
-            goto f_err;
-        }
-        if (ssl_check_serverhello_tlsext(s) <= 0) {
-            SSLerr(SSL_F_SSL3_GET_SERVER_HELLO, SSL_R_SERVERHELLO_TLSEXT);
-            goto err;
-        }
+    if (!ssl_parse_serverhello_tlsext(s, &p, d, n)) {
+        SSLerr(SSL_F_SSL3_GET_SERVER_HELLO, SSL_R_PARSE_TLSEXT);
+        goto err;
     }
 #endif
 
@@ -1240,6 +1308,14 @@ int ssl3_get_server_certificate(SSL *s)
     }
 
     if (need_cert) {
+        int exp_idx = ssl_cipher_get_cert_index(s->s3->tmp.new_cipher);
+        if (exp_idx >= 0 && i != exp_idx) {
+            x = NULL;
+            al = SSL_AD_ILLEGAL_PARAMETER;
+            SSLerr(SSL_F_SSL3_GET_SERVER_CERTIFICATE,
+                   SSL_R_WRONG_CERTIFICATE_TYPE);
+            goto f_err;
+        }
         sc->peer_cert_type = i;
         CRYPTO_add(&x->references, 1, CRYPTO_LOCK_X509);
         /*
@@ -1267,7 +1343,6 @@ int ssl3_get_server_certificate(SSL *s)
 
     x = NULL;
     ret = 1;
-
     if (0) {
  f_err:
         ssl3_send_alert(s, SSL3_AL_FATAL, al);
@@ -1326,7 +1401,7 @@ int ssl3_get_key_exchange(SSL *s)
          * Can't skip server key exchange if this is an ephemeral
          * ciphersuite.
          */
-        if (alg_k & (SSL_kEDH | SSL_kEECDH)) {
+        if (alg_k & (SSL_kDHE | SSL_kECDHE)) {
             SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, SSL_R_UNEXPECTED_MESSAGE);
             al = SSL_AD_UNEXPECTED_MESSAGE;
             goto f_err;
@@ -1718,9 +1793,16 @@ int ssl3_get_key_exchange(SSL *s)
             SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, SSL_R_LENGTH_TOO_SHORT);
             goto f_err;
         }
+        /*
+         * Check curve is one of our preferences, if not server has sent an
+         * invalid curve. ECParameters is 3 bytes.
+         */
+        if (!tls1_check_curve(s, p, 3)) {
+            SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, SSL_R_WRONG_CURVE);
+            goto f_err;
+        }
 
-        if ((*p != NAMED_CURVE_TYPE) ||
-            ((curve_nid = tls1_ec_curve_id2nid(*(p + 2))) == 0)) {
+        if ((curve_nid = tls1_ec_curve_id2nid(*(p + 2))) == 0) {
             al = SSL_AD_INTERNAL_ERROR;
             SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE,
                    SSL_R_UNABLE_TO_FIND_ECDH_PARAMETERS);
@@ -1808,29 +1890,16 @@ int ssl3_get_key_exchange(SSL *s)
 
     /* if it was signed, check the signature */
     if (pkey != NULL) {
-        if (TLS1_get_version(s) >= TLS1_2_VERSION) {
-            int sigalg;
+        if (SSL_USE_SIGALGS(s)) {
+            int rv;
             if (2 > n) {
                 SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, SSL_R_LENGTH_TOO_SHORT);
                 goto f_err;
             }
-
-            sigalg = tls12_get_sigid(pkey);
-            /* Should never happen */
-            if (sigalg == -1) {
-                SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, ERR_R_INTERNAL_ERROR);
+            rv = tls12_check_peer_sigalg(&md, s, p, pkey);
+            if (rv == -1)
                 goto err;
-            }
-            /* Check key type is consistent with signature */
-            if (sigalg != (int)p[1]) {
-                SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE,
-                       SSL_R_WRONG_SIGNATURE_TYPE);
-                al = SSL_AD_DECODE_ERROR;
-                goto f_err;
-            }
-            md = tls12_get_hash(p[0]);
-            if (md == NULL) {
-                SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, SSL_R_UNKNOWN_DIGEST);
+            else if (rv == 0) {
                 goto f_err;
             }
 #ifdef SSL_DEBUG
@@ -1858,8 +1927,7 @@ int ssl3_get_key_exchange(SSL *s)
             goto f_err;
         }
 #ifndef OPENSSL_NO_RSA
-        if (pkey->type == EVP_PKEY_RSA
-            && TLS1_get_version(s) < TLS1_2_VERSION) {
+        if (pkey->type == EVP_PKEY_RSA && !SSL_USE_SIGALGS(s)) {
             int num;
             unsigned int size;
 
@@ -1909,7 +1977,10 @@ int ssl3_get_key_exchange(SSL *s)
     } else {
         /* aNULL, aSRP or kPSK do not need public keys */
         if (!(alg_a & (SSL_aNULL | SSL_aSRP)) && !(alg_k & SSL_kPSK)) {
-            SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, ERR_R_INTERNAL_ERROR);
+            /* Might be wrong key type, check it */
+            if (ssl3_check_cert_and_algorithm(s))
+                /* Otherwise this shouldn't happen */
+                SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, ERR_R_INTERNAL_ERROR);
             goto err;
         }
         /* still data left over */
@@ -2002,12 +2073,21 @@ int ssl3_get_certificate_request(SSL *s)
 
     /* get the certificate types */
     ctype_num = *(p++);
-    if (ctype_num > SSL3_CT_NUMBER)
+    if (s->cert->ctypes) {
+        OPENSSL_free(s->cert->ctypes);
+        s->cert->ctypes = NULL;
+    }
+    if (ctype_num > SSL3_CT_NUMBER) {
+        /* If we exceed static buffer copy all to cert structure */
+        s->cert->ctypes = OPENSSL_malloc(ctype_num);
+        memcpy(s->cert->ctypes, p, ctype_num);
+        s->cert->ctype_num = (size_t)ctype_num;
         ctype_num = SSL3_CT_NUMBER;
+    }
     for (i = 0; i < ctype_num; i++)
         s->s3->tmp.ctype[i] = p[i];
-    p += ctype_num;
-    if (TLS1_get_version(s) >= TLS1_2_VERSION) {
+    p += p[-1];
+    if (SSL_USE_SIGALGS(s)) {
         n2s(p, llen);
         /*
          * Check we have enough room for signature algorithms and following
@@ -2019,12 +2099,22 @@ int ssl3_get_certificate_request(SSL *s)
                    SSL_R_DATA_LENGTH_TOO_LONG);
             goto err;
         }
-        if ((llen & 1) || !tls1_process_sigalgs(s, p, llen)) {
+        /* Clear certificate digests and validity flags */
+        for (i = 0; i < SSL_PKEY_NUM; i++) {
+            s->cert->pkeys[i].digest = NULL;
+            s->cert->pkeys[i].valid_flags = 0;
+        }
+        if ((llen & 1) || !tls1_save_sigalgs(s, p, llen)) {
             ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_DECODE_ERROR);
             SSLerr(SSL_F_SSL3_GET_CERTIFICATE_REQUEST,
                    SSL_R_SIGNATURE_ALGORITHMS_ERROR);
             goto err;
         }
+        if (!tls1_process_sigalgs(s)) {
+            ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_INTERNAL_ERROR);
+            SSLerr(SSL_F_SSL3_GET_CERTIFICATE_REQUEST, ERR_R_MALLOC_FAILURE);
+            goto err;
+        }
         p += llen;
     }
 
@@ -2299,9 +2389,28 @@ int ssl3_get_server_done(SSL *s)
     return (ret);
 }
 
+#ifndef OPENSSL_NO_DH
+static DH *get_server_static_dh_key(SESS_CERT *scert)
+{
+    DH *dh_srvr = NULL;
+    EVP_PKEY *spkey = NULL;
+    int idx = scert->peer_cert_type;
+
+    if (idx >= 0)
+        spkey = X509_get_pubkey(scert->peer_pkeys[idx].x509);
+    if (spkey) {
+        dh_srvr = EVP_PKEY_get1_DH(spkey);
+        EVP_PKEY_free(spkey);
+    }
+    if (dh_srvr == NULL)
+        SSLerr(SSL_F_GET_SERVER_STATIC_DH_KEY, ERR_R_INTERNAL_ERROR);
+    return dh_srvr;
+}
+#endif
+
 int ssl3_send_client_key_exchange(SSL *s)
 {
-    unsigned char *p, *d;
+    unsigned char *p;
     int n;
     unsigned long alg_k;
 #ifndef OPENSSL_NO_RSA
@@ -2321,8 +2430,7 @@ int ssl3_send_client_key_exchange(SSL *s)
 #endif
 
     if (s->state == SSL3_ST_CW_KEY_EXCH_A) {
-        d = (unsigned char *)s->init_buf->data;
-        p = &(d[4]);
+        p = ssl_handshake_start(s);
 
         alg_k = s->s3->tmp.new_cipher->algorithm_mkey;
 
@@ -2533,33 +2641,45 @@ int ssl3_send_client_key_exchange(SSL *s)
 #ifndef OPENSSL_NO_DH
         else if (alg_k & (SSL_kEDH | SSL_kDHr | SSL_kDHd)) {
             DH *dh_srvr, *dh_clnt;
+            SESS_CERT *scert = s->session->sess_cert;
 
-            if (s->session->sess_cert == NULL) {
+            if (scert == NULL) {
                 ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_UNEXPECTED_MESSAGE);
                 SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE,
                        SSL_R_UNEXPECTED_MESSAGE);
                 goto err;
             }
 
-            if (s->session->sess_cert->peer_dh_tmp != NULL)
-                dh_srvr = s->session->sess_cert->peer_dh_tmp;
-            else {
-                /* we get them from the cert */
-                ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_HANDSHAKE_FAILURE);
-                SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE,
-                       SSL_R_UNABLE_TO_FIND_DH_PARAMETERS);
-                goto err;
+            if (scert->peer_dh_tmp != NULL) {
+                dh_srvr = scert->peer_dh_tmp;
+            } else {
+                dh_srvr = get_server_static_dh_key(scert);
+                if (dh_srvr == NULL)
+                    goto err;
             }
 
-            /* generate a new random key */
-            if ((dh_clnt = DHparams_dup(dh_srvr)) == NULL) {
-                SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE, ERR_R_DH_LIB);
-                goto err;
-            }
-            if (!DH_generate_key(dh_clnt)) {
-                SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE, ERR_R_DH_LIB);
-                DH_free(dh_clnt);
-                goto err;
+            if (s->s3->flags & TLS1_FLAGS_SKIP_CERT_VERIFY) {
+                /* Use client certificate key */
+                EVP_PKEY *clkey = s->cert->key->privatekey;
+                dh_clnt = NULL;
+                if (clkey)
+                    dh_clnt = EVP_PKEY_get1_DH(clkey);
+                if (dh_clnt == NULL) {
+                    SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE,
+                           ERR_R_INTERNAL_ERROR);
+                    goto err;
+                }
+            } else {
+                /* generate a new random key */
+                if ((dh_clnt = DHparams_dup(dh_srvr)) == NULL) {
+                    SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE, ERR_R_DH_LIB);
+                    goto err;
+                }
+                if (!DH_generate_key(dh_clnt)) {
+                    SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE, ERR_R_DH_LIB);
+                    DH_free(dh_clnt);
+                    goto err;
+                }
             }
 
             /*
@@ -2568,6 +2688,8 @@ int ssl3_send_client_key_exchange(SSL *s)
              */
 
             n = DH_compute_key(p, dh_srvr->pub_key, dh_clnt);
+            if (scert->peer_dh_tmp == NULL)
+                DH_free(dh_srvr);
 
             if (n <= 0) {
                 SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE, ERR_R_DH_LIB);
@@ -2584,11 +2706,15 @@ int ssl3_send_client_key_exchange(SSL *s)
             /* clean up */
             memset(p, 0, n);
 
-            /* send off the data */
-            n = BN_num_bytes(dh_clnt->pub_key);
-            s2n(n, p);
-            BN_bn2bin(dh_clnt->pub_key, p);
-            n += 2;
+            if (s->s3->flags & TLS1_FLAGS_SKIP_CERT_VERIFY)
+                n = 0;
+            else {
+                /* send off the data */
+                n = BN_num_bytes(dh_clnt->pub_key);
+                s2n(n, p);
+                BN_bn2bin(dh_clnt->pub_key, p);
+                n += 2;
+            }
 
             DH_free(dh_clnt);
         }
@@ -3014,17 +3140,12 @@ int ssl3_send_client_key_exchange(SSL *s)
             goto err;
         }
 
-        *(d++) = SSL3_MT_CLIENT_KEY_EXCHANGE;
-        l2n3(n, d);
-
+        ssl_set_handshake_header(s, SSL3_MT_CLIENT_KEY_EXCHANGE, n);
         s->state = SSL3_ST_CW_KEY_EXCH_B;
-        /* number of bytes to write */
-        s->init_num = n + 4;
-        s->init_off = 0;
     }
 
     /* SSL3_ST_CW_KEY_EXCH_B */
-    return (ssl3_do_write(s, SSL3_RT_HANDSHAKE));
+    return ssl_do_write(s);
  err:
 #ifndef OPENSSL_NO_ECDH
     BN_CTX_free(bn_ctx);
@@ -3040,7 +3161,7 @@ int ssl3_send_client_key_exchange(SSL *s)
 
 int ssl3_send_client_verify(SSL *s)
 {
-    unsigned char *p, *d;
+    unsigned char *p;
     unsigned char data[MD5_DIGEST_LENGTH + SHA_DIGEST_LENGTH];
     EVP_PKEY *pkey;
     EVP_PKEY_CTX *pctx = NULL;
@@ -3052,14 +3173,13 @@ int ssl3_send_client_verify(SSL *s)
     EVP_MD_CTX_init(&mctx);
 
     if (s->state == SSL3_ST_CW_CERT_VRFY_A) {
-        d = (unsigned char *)s->init_buf->data;
-        p = &(d[4]);
+        p = ssl_handshake_start(s);
         pkey = s->cert->key->privatekey;
 /* Create context from key and test if sha1 is allowed as digest */
         pctx = EVP_PKEY_CTX_new(pkey, NULL);
         EVP_PKEY_sign_init(pctx);
         if (EVP_PKEY_CTX_set_signature_md(pctx, EVP_sha1()) > 0) {
-            if (TLS1_get_version(s) < TLS1_2_VERSION)
+            if (!SSL_USE_SIGALGS(s))
                 s->method->ssl3_enc->cert_verify_mac(s,
                                                      NID_sha1,
                                                      &(data
@@ -3071,7 +3191,7 @@ int ssl3_send_client_verify(SSL *s)
          * For TLS v1.2 send signature algorithm and signature using agreed
          * digest and cached handshake records.
          */
-        if (TLS1_get_version(s) >= TLS1_2_VERSION) {
+        if (SSL_USE_SIGALGS(s)) {
             long hdatalen = 0;
             void *hdata;
             const EVP_MD *md = s->cert->key->digest;
@@ -3155,16 +3275,12 @@ int ssl3_send_client_verify(SSL *s)
             SSLerr(SSL_F_SSL3_SEND_CLIENT_VERIFY, ERR_R_INTERNAL_ERROR);
             goto err;
         }
-        *(d++) = SSL3_MT_CERTIFICATE_VERIFY;
-        l2n3(n, d);
-
+        ssl_set_handshake_header(s, SSL3_MT_CERTIFICATE_VERIFY, n);
         s->state = SSL3_ST_CW_CERT_VRFY_B;
-        s->init_num = (int)n + 4;
-        s->init_off = 0;
     }
     EVP_MD_CTX_cleanup(&mctx);
     EVP_PKEY_CTX_free(pctx);
-    return (ssl3_do_write(s, SSL3_RT_HANDSHAKE));
+    return ssl_do_write(s);
  err:
     EVP_MD_CTX_cleanup(&mctx);
     EVP_PKEY_CTX_free(pctx);
@@ -3172,20 +3288,75 @@ int ssl3_send_client_verify(SSL *s)
     return (-1);
 }
 
+/*
+ * Check a certificate can be used for client authentication. Currently check
+ * cert exists, if we have a suitable digest for TLS 1.2 if static DH client
+ * certificates can be used and optionally checks suitability for Suite B.
+ */
+static int ssl3_check_client_certificate(SSL *s)
+{
+    unsigned long alg_k;
+    if (!s->cert || !s->cert->key->x509 || !s->cert->key->privatekey)
+        return 0;
+    /* If no suitable signature algorithm can't use certificate */
+    if (SSL_USE_SIGALGS(s) && !s->cert->key->digest)
+        return 0;
+    /*
+     * If strict mode check suitability of chain before using it. This also
+     * adjusts suite B digest if necessary.
+     */
+    if (s->cert->cert_flags & SSL_CERT_FLAGS_CHECK_TLS_STRICT &&
+        !tls1_check_chain(s, NULL, NULL, NULL, -2))
+        return 0;
+    alg_k = s->s3->tmp.new_cipher->algorithm_mkey;
+    /* See if we can use client certificate for fixed DH */
+    if (alg_k & (SSL_kDHr | SSL_kDHd)) {
+        SESS_CERT *scert = s->session->sess_cert;
+        int i = scert->peer_cert_type;
+        EVP_PKEY *clkey = NULL, *spkey = NULL;
+        clkey = s->cert->key->privatekey;
+        /* If client key not DH assume it can be used */
+        if (EVP_PKEY_id(clkey) != EVP_PKEY_DH)
+            return 1;
+        if (i >= 0)
+            spkey = X509_get_pubkey(scert->peer_pkeys[i].x509);
+        if (spkey) {
+            /* Compare server and client parameters */
+            i = EVP_PKEY_cmp_parameters(clkey, spkey);
+            EVP_PKEY_free(spkey);
+            if (i != 1)
+                return 0;
+        }
+        s->s3->flags |= TLS1_FLAGS_SKIP_CERT_VERIFY;
+    }
+    return 1;
+}
+
 int ssl3_send_client_certificate(SSL *s)
 {
     X509 *x509 = NULL;
     EVP_PKEY *pkey = NULL;
     int i;
-    unsigned long l;
 
     if (s->state == SSL3_ST_CW_CERT_A) {
-        if ((s->cert == NULL) ||
-            (s->cert->key->x509 == NULL) ||
-            (s->cert->key->privatekey == NULL))
-            s->state = SSL3_ST_CW_CERT_B;
-        else
+        /* Let cert callback update client certificates if required */
+        if (s->cert->cert_cb) {
+            i = s->cert->cert_cb(s, s->cert->cert_cb_arg);
+            if (i < 0) {
+                s->rwstate = SSL_X509_LOOKUP;
+                return -1;
+            }
+            if (i == 0) {
+                ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_INTERNAL_ERROR);
+                s->state = SSL_ST_ERR;
+                return 0;
+            }
+            s->rwstate = SSL_NOTHING;
+        }
+        if (ssl3_check_client_certificate(s))
             s->state = SSL3_ST_CW_CERT_C;
+        else
+            s->state = SSL3_ST_CW_CERT_B;
     }
 
     /* We need to get a client cert */
@@ -3215,6 +3386,8 @@ int ssl3_send_client_certificate(SSL *s)
             X509_free(x509);
         if (pkey != NULL)
             EVP_PKEY_free(pkey);
+        if (i && !ssl3_check_client_certificate(s))
+            i = 0;
         if (i == 0) {
             if (s->version == SSL3_VERSION) {
                 s->s3->tmp.cert_req = 0;
@@ -3231,20 +3404,17 @@ int ssl3_send_client_certificate(SSL *s)
 
     if (s->state == SSL3_ST_CW_CERT_C) {
         s->state = SSL3_ST_CW_CERT_D;
-        l = ssl3_output_cert_chain(s,
-                                   (s->s3->tmp.cert_req ==
-                                    2) ? NULL : s->cert->key->x509);
-        if (!l) {
+        if (!ssl3_output_cert_chain(s,
+                                    (s->s3->tmp.cert_req ==
+                                     2) ? NULL : s->cert->key)) {
             SSLerr(SSL_F_SSL3_SEND_CLIENT_CERTIFICATE, ERR_R_INTERNAL_ERROR);
             ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_INTERNAL_ERROR);
             s->state = SSL_ST_ERR;
             return 0;
         }
-        s->init_num = (int)l;
-        s->init_off = 0;
     }
     /* SSL3_ST_CW_CERT_D */
-    return (ssl3_do_write(s, SSL3_RT_HANDSHAKE));
+    return ssl_do_write(s);
 }
 
 #define has_bits(i,m)   (((i)&(m)) == (m))
@@ -3268,7 +3438,7 @@ int ssl3_check_cert_and_algorithm(SSL *s)
     alg_a = s->s3->tmp.new_cipher->algorithm_auth;
 
     /* we don't have a certificate */
-    if ((alg_a & (SSL_aDH | SSL_aNULL | SSL_aKRB5)) || (alg_k & SSL_kPSK))
+    if ((alg_a & (SSL_aNULL | SSL_aKRB5)) || (alg_k & SSL_kPSK))
         return (1);
 
     sc = s->session->sess_cert;
@@ -3295,6 +3465,13 @@ int ssl3_check_cert_and_algorithm(SSL *s)
         } else {
             return 1;
         }
+    } else if (alg_a & SSL_aECDSA) {
+        SSLerr(SSL_F_SSL3_CHECK_CERT_AND_ALGORITHM,
+               SSL_R_MISSING_ECDSA_SIGNING_CERT);
+        goto f_err;
+    } else if (alg_k & (SSL_kECDHr | SSL_kECDHe)) {
+        SSLerr(SSL_F_SSL3_CHECK_CERT_AND_ALGORITHM, SSL_R_MISSING_ECDH_CERT);
+        goto f_err;
     }
 #endif
     pkey = X509_get_pubkey(sc->peer_pkeys[idx].x509);
@@ -3345,22 +3522,33 @@ int ssl3_check_cert_and_algorithm(SSL *s)
         SSLerr(SSL_F_SSL3_CHECK_CERT_AND_ALGORITHM, ERR_R_INTERNAL_ERROR);
         goto f_err;
     }
-    if ((alg_k & SSL_kDHr) && !has_bits(i, EVP_PK_DH | EVP_PKS_RSA)) {
+    if ((alg_k & SSL_kDHr) && !SSL_USE_SIGALGS(s) &&
+               !has_bits(i, EVP_PK_DH | EVP_PKS_RSA)) {
         SSLerr(SSL_F_SSL3_CHECK_CERT_AND_ALGORITHM,
                SSL_R_MISSING_DH_RSA_CERT);
         goto f_err;
     }
 # ifndef OPENSSL_NO_DSA
-    if ((alg_k & SSL_kDHd) && !has_bits(i, EVP_PK_DH | EVP_PKS_DSA)) {
+    if ((alg_k & SSL_kDHd) && !SSL_USE_SIGALGS(s) &&
+        !has_bits(i, EVP_PK_DH | EVP_PKS_DSA)) {
         SSLerr(SSL_F_SSL3_CHECK_CERT_AND_ALGORITHM,
                SSL_R_MISSING_DH_DSA_CERT);
         goto f_err;
     }
 # endif
 
-    /* Check DHE only: static DH not implemented. */
-    if (alg_k & SSL_kEDH) {
-        int dh_size = BN_num_bits(dh->p);
+    if (alg_k & (SSL_kDHE | SSL_kDHr | SSL_kDHd)) {
+        int dh_size;
+        if (alg_k & SSL_kDHE) {
+            dh_size = BN_num_bits(dh->p);
+        } else {
+            DH *dh_srvr = get_server_static_dh_key(sc);
+            if (dh_srvr == NULL)
+                goto f_err;
+            dh_size = BN_num_bits(dh_srvr->p);
+            DH_free(dh_srvr);
+        }
+
         if ((!SSL_C_IS_EXPORT(s->s3->tmp.new_cipher) && dh_size < 768)
             || (SSL_C_IS_EXPORT(s->s3->tmp.new_cipher) && dh_size < 512)) {
             SSLerr(SSL_F_SSL3_CHECK_CERT_AND_ALGORITHM, SSL_R_DH_KEY_TOO_SMALL);
@@ -3388,7 +3576,7 @@ int ssl3_check_cert_and_algorithm(SSL *s)
         } else
 #endif
 #ifndef OPENSSL_NO_DH
-        if (alg_k & SSL_kEDH) {
+        if (alg_k & SSL_kDHE) {
             if (BN_num_bits(dh->p) >
                 SSL_C_EXPORT_PKEYLENGTH(s->s3->tmp.new_cipher)) {
                 /* We have a temporary DH key but it's too large. */
index 152f40d..cda2d8c 100644 (file)
@@ -374,6 +374,23 @@ int ssl3_change_cipher_state(SSL *s, int which)
 
     EVP_CipherInit_ex(dd, c, NULL, key, iv, (which & SSL3_CC_WRITE));
 
+#ifdef OPENSSL_SSL_TRACE_CRYPTO
+    if (s->msg_callback) {
+
+        int wh = which & SSL3_CC_WRITE ?
+            TLS1_RT_CRYPTO_WRITE : TLS1_RT_CRYPTO_READ;
+        s->msg_callback(2, s->version, wh | TLS1_RT_CRYPTO_MAC,
+                        mac_secret, EVP_MD_size(m), s, s->msg_callback_arg);
+        if (c->key_len)
+            s->msg_callback(2, s->version, wh | TLS1_RT_CRYPTO_KEY,
+                            key, c->key_len, s, s->msg_callback_arg);
+        if (k) {
+            s->msg_callback(2, s->version, wh | TLS1_RT_CRYPTO_IV,
+                            iv, k, s, s->msg_callback_arg);
+        }
+    }
+#endif
+
     OPENSSL_cleanse(&(exp_key[0]), sizeof(exp_key));
     OPENSSL_cleanse(&(exp_iv[0]), sizeof(exp_iv));
     EVP_MD_CTX_cleanup(&md);
@@ -823,6 +840,9 @@ int ssl3_generate_master_secret(SSL *s, unsigned char *out, unsigned char *p,
     EVP_MD_CTX ctx;
     int i, ret = 0;
     unsigned int n;
+#ifdef OPENSSL_SSL_TRACE_CRYPTO
+    unsigned char *tmpout = out;
+#endif
 
     EVP_MD_CTX_init(&ctx);
     for (i = 0; i < 3; i++) {
@@ -841,6 +861,22 @@ int ssl3_generate_master_secret(SSL *s, unsigned char *out, unsigned char *p,
         ret += n;
     }
     EVP_MD_CTX_cleanup(&ctx);
+
+#ifdef OPENSSL_SSL_TRACE_CRYPTO
+    if (s->msg_callback) {
+        s->msg_callback(2, s->version, TLS1_RT_CRYPTO_PREMASTER,
+                        p, len, s, s->msg_callback_arg);
+        s->msg_callback(2, s->version, TLS1_RT_CRYPTO_CLIENT_RANDOM,
+                        s->s3->client_random, SSL3_RANDOM_SIZE,
+                        s, s->msg_callback_arg);
+        s->msg_callback(2, s->version, TLS1_RT_CRYPTO_SERVER_RANDOM,
+                        s->s3->server_random, SSL3_RANDOM_SIZE,
+                        s, s->msg_callback_arg);
+        s->msg_callback(2, s->version, TLS1_RT_CRYPTO_MASTER,
+                        tmpout, SSL3_MASTER_SECRET_SIZE,
+                        s, s->msg_callback_arg);
+    }
+#endif
     OPENSSL_cleanse(buf, sizeof buf);
     return (ret);
 }
index de917d3..ad9eeb6 100644 (file)
 #include <openssl/objects.h>
 #include "ssl_locl.h"
 #include "kssl_lcl.h"
-#ifndef OPENSSL_NO_TLSEXT
-# ifndef OPENSSL_NO_EC
-#  include "../crypto/ec/ec_lcl.h"
-# endif                         /* OPENSSL_NO_EC */
-#endif                          /* OPENSSL_NO_TLSEXT */
 #include <openssl/md5.h>
 #ifndef OPENSSL_NO_DH
 # include <openssl/dh.h>
@@ -351,7 +346,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
 
 /* Cipher 0C */
     {
-     0,                         /* not implemented (non-ephemeral DH) */
+     1,
      SSL3_TXT_DH_DSS_DES_64_CBC_SHA,
      SSL3_CK_DH_DSS_DES_64_CBC_SHA,
      SSL_kDHd,
@@ -367,7 +362,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
 
 /* Cipher 0D */
     {
-     0,                         /* not implemented (non-ephemeral DH) */
+     1,
      SSL3_TXT_DH_DSS_DES_192_CBC3_SHA,
      SSL3_CK_DH_DSS_DES_192_CBC3_SHA,
      SSL_kDHd,
@@ -383,7 +378,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
 
 /* Cipher 0E */
     {
-     0,                         /* not implemented (non-ephemeral DH) */
+     0,
      SSL3_TXT_DH_RSA_DES_40_CBC_SHA,
      SSL3_CK_DH_RSA_DES_40_CBC_SHA,
      SSL_kDHr,
@@ -399,7 +394,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
 
 /* Cipher 0F */
     {
-     0,                         /* not implemented (non-ephemeral DH) */
+     1,
      SSL3_TXT_DH_RSA_DES_64_CBC_SHA,
      SSL3_CK_DH_RSA_DES_64_CBC_SHA,
      SSL_kDHr,
@@ -415,7 +410,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
 
 /* Cipher 10 */
     {
-     0,                         /* not implemented (non-ephemeral DH) */
+     1,
      SSL3_TXT_DH_RSA_DES_192_CBC3_SHA,
      SSL3_CK_DH_RSA_DES_192_CBC3_SHA,
      SSL_kDHr,
@@ -902,7 +897,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
      },
 /* Cipher 30 */
     {
-     0,
+     1,
      TLS1_TXT_DH_DSS_WITH_AES_128_SHA,
      TLS1_CK_DH_DSS_WITH_AES_128_SHA,
      SSL_kDHd,
@@ -917,7 +912,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
      },
 /* Cipher 31 */
     {
-     0,
+     1,
      TLS1_TXT_DH_RSA_WITH_AES_128_SHA,
      TLS1_CK_DH_RSA_WITH_AES_128_SHA,
      SSL_kDHr,
@@ -993,7 +988,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
      },
 /* Cipher 36 */
     {
-     0,
+     1,
      TLS1_TXT_DH_DSS_WITH_AES_256_SHA,
      TLS1_CK_DH_DSS_WITH_AES_256_SHA,
      SSL_kDHd,
@@ -1009,7 +1004,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
 
 /* Cipher 37 */
     {
-     0,                         /* not implemented (non-ephemeral DH) */
+     1,
      TLS1_TXT_DH_RSA_WITH_AES_256_SHA,
      TLS1_CK_DH_RSA_WITH_AES_256_SHA,
      SSL_kDHr,
@@ -1122,7 +1117,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
 
     /* Cipher 3E */
     {
-     0,                         /* not implemented (non-ephemeral DH) */
+     1,
      TLS1_TXT_DH_DSS_WITH_AES_128_SHA256,
      TLS1_CK_DH_DSS_WITH_AES_128_SHA256,
      SSL_kDHd,
@@ -1138,7 +1133,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
 
     /* Cipher 3F */
     {
-     0,                         /* not implemented (non-ephemeral DH) */
+     1,
      TLS1_TXT_DH_RSA_WITH_AES_128_SHA256,
      TLS1_CK_DH_RSA_WITH_AES_128_SHA256,
      SSL_kDHr,
@@ -1189,7 +1184,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
 
     /* Cipher 42 */
     {
-     0,                         /* not implemented (non-ephemeral DH) */
+     1,
      TLS1_TXT_DH_DSS_WITH_CAMELLIA_128_CBC_SHA,
      TLS1_CK_DH_DSS_WITH_CAMELLIA_128_CBC_SHA,
      SSL_kDHd,
@@ -1205,7 +1200,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
 
     /* Cipher 43 */
     {
-     0,                         /* not implemented (non-ephemeral DH) */
+     1,
      TLS1_TXT_DH_RSA_WITH_CAMELLIA_128_CBC_SHA,
      TLS1_CK_DH_RSA_WITH_CAMELLIA_128_CBC_SHA,
      SSL_kDHr,
@@ -1404,7 +1399,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
 
     /* Cipher 68 */
     {
-     0,                         /* not implemented (non-ephemeral DH) */
+     1,
      TLS1_TXT_DH_DSS_WITH_AES_256_SHA256,
      TLS1_CK_DH_DSS_WITH_AES_256_SHA256,
      SSL_kDHd,
@@ -1420,7 +1415,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
 
     /* Cipher 69 */
     {
-     0,                         /* not implemented (non-ephemeral DH) */
+     1,
      TLS1_TXT_DH_RSA_WITH_AES_256_SHA256,
      TLS1_CK_DH_RSA_WITH_AES_256_SHA256,
      SSL_kDHr,
@@ -1573,7 +1568,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
      },
     /* Cipher 85 */
     {
-     0,                         /* not implemented (non-ephemeral DH) */
+     1,
      TLS1_TXT_DH_DSS_WITH_CAMELLIA_256_CBC_SHA,
      TLS1_CK_DH_DSS_WITH_CAMELLIA_256_CBC_SHA,
      SSL_kDHd,
@@ -1589,7 +1584,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
 
     /* Cipher 86 */
     {
-     0,                         /* not implemented (non-ephemeral DH) */
+     1,
      TLS1_TXT_DH_RSA_WITH_CAMELLIA_256_CBC_SHA,
      TLS1_CK_DH_RSA_WITH_CAMELLIA_256_CBC_SHA,
      SSL_kDHr,
@@ -1739,7 +1734,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
 
     /* Cipher 97 */
     {
-     0,                         /* not implemented (non-ephemeral DH) */
+     1,
      TLS1_TXT_DH_DSS_WITH_SEED_SHA,
      TLS1_CK_DH_DSS_WITH_SEED_SHA,
      SSL_kDHd,
@@ -1755,7 +1750,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
 
     /* Cipher 98 */
     {
-     0,                         /* not implemented (non-ephemeral DH) */
+     1,
      TLS1_TXT_DH_RSA_WITH_SEED_SHA,
      TLS1_CK_DH_RSA_WITH_SEED_SHA,
      SSL_kDHr,
@@ -1887,7 +1882,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
 
     /* Cipher A0 */
     {
-     0,
+     1,
      TLS1_TXT_DH_RSA_WITH_AES_128_GCM_SHA256,
      TLS1_CK_DH_RSA_WITH_AES_128_GCM_SHA256,
      SSL_kDHr,
@@ -1903,7 +1898,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
 
     /* Cipher A1 */
     {
-     0,
+     1,
      TLS1_TXT_DH_RSA_WITH_AES_256_GCM_SHA384,
      TLS1_CK_DH_RSA_WITH_AES_256_GCM_SHA384,
      SSL_kDHr,
@@ -1951,7 +1946,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
 
     /* Cipher A4 */
     {
-     0,
+     1,
      TLS1_TXT_DH_DSS_WITH_AES_128_GCM_SHA256,
      TLS1_CK_DH_DSS_WITH_AES_128_GCM_SHA256,
      SSL_kDHd,
@@ -1967,7 +1962,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
 
     /* Cipher A5 */
     {
-     0,
+     1,
      TLS1_TXT_DH_DSS_WITH_AES_256_GCM_SHA384,
      TLS1_CK_DH_DSS_WITH_AES_256_GCM_SHA384,
      SSL_kDHd,
@@ -2012,6 +2007,21 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
      256,
      256,
      },
+#ifdef OPENSSL_SSL_DEBUG_BROKEN_PROTOCOL
+    {
+     1,
+     "SCSV",
+     SSL3_CK_SCSV,
+     0,
+     0,
+     0,
+     0,
+     0,
+     0,
+     0,
+     0,
+     0},
+#endif
 
 #ifndef OPENSSL_NO_ECDH
     /* Cipher C001 */
@@ -2899,6 +2909,10 @@ SSL3_ENC_METHOD SSLv3_enc_data = {
     (int (*)(SSL *, unsigned char *, size_t, const char *,
              size_t, const unsigned char *, size_t,
              int use_context))ssl_undefined_function,
+    0,
+    SSL3_HM_HEADER_LENGTH,
+    ssl3_set_handshake_header,
+    ssl3_handshake_write
 };
 
 long ssl3_default_timeout(void)
@@ -2932,6 +2946,20 @@ int ssl3_pending(const SSL *s)
             SSL3_RT_APPLICATION_DATA) ? s->s3->rrec.length : 0;
 }
 
+void ssl3_set_handshake_header(SSL *s, int htype, unsigned long len)
+{
+    unsigned char *p = (unsigned char *)s->init_buf->data;
+    *(p++) = htype;
+    l2n3(len, p);
+    s->init_num = (int)len + SSL3_HM_HEADER_LENGTH;
+    s->init_off = 0;
+}
+
+int ssl3_handshake_write(SSL *s)
+{
+    return ssl3_do_write(s, SSL3_RT_HANDSHAKE);
+}
+
 int ssl3_new(SSL *s)
 {
     SSL3_STATE *s3;
@@ -2988,6 +3016,11 @@ void ssl3_free(SSL *s)
     }
     if (s->s3->handshake_dgst)
         ssl3_free_digest_list(s);
+#ifndef OPENSSL_NO_TLSEXT
+    if (s->s3->alpn_selected)
+        OPENSSL_free(s->s3->alpn_selected);
+#endif
+
 #ifndef OPENSSL_NO_SRP
     SSL_SRP_CTX_free(s);
 #endif
@@ -3049,6 +3082,12 @@ void ssl3_clear(SSL *s)
     if (s->s3->handshake_dgst) {
         ssl3_free_digest_list(s);
     }
+#if !defined(OPENSSL_NO_TLSEXT)
+    if (s->s3->alpn_selected) {
+        OPENSSL_free(s->s3->alpn_selected);
+        s->s3->alpn_selected = NULL;
+    }
+#endif
     memset(s->s3, 0, sizeof *s->s3);
     s->s3->rbuf.buf = rp;
     s->s3->wbuf.buf = wp;
@@ -3081,6 +3120,9 @@ static char *MS_CALLBACK srp_password_from_info_cb(SSL *s, void *arg)
 }
 #endif
 
+static int ssl3_set_req_cert_type(CERT *c, const unsigned char *p,
+                                  size_t len);
+
 long ssl3_ctrl(SSL *s, int cmd, long larg, void *parg)
 {
     int ret = 0;
@@ -3309,8 +3351,7 @@ long ssl3_ctrl(SSL *s, int cmd, long larg, void *parg)
 
 # ifndef OPENSSL_NO_HEARTBEATS
     case SSL_CTRL_TLS_EXT_SEND_HEARTBEAT:
-        if (SSL_version(s) == DTLS1_VERSION
-            || SSL_version(s) == DTLS1_BAD_VER)
+        if (SSL_IS_DTLS(s))
             ret = dtls1_heartbeat(s);
         else
             ret = tls1_heartbeat(s);
@@ -3331,6 +3372,196 @@ long ssl3_ctrl(SSL *s, int cmd, long larg, void *parg)
 
 #endif                          /* !OPENSSL_NO_TLSEXT */
 
+    case SSL_CTRL_CHAIN:
+        if (larg)
+            return ssl_cert_set1_chain(s->cert, (STACK_OF(X509) *)parg);
+        else
+            return ssl_cert_set0_chain(s->cert, (STACK_OF(X509) *)parg);
+
+    case SSL_CTRL_CHAIN_CERT:
+        if (larg)
+            return ssl_cert_add1_chain_cert(s->cert, (X509 *)parg);
+        else
+            return ssl_cert_add0_chain_cert(s->cert, (X509 *)parg);
+
+    case SSL_CTRL_GET_CHAIN_CERTS:
+        *(STACK_OF(X509) **)parg = s->cert->key->chain;
+        break;
+
+    case SSL_CTRL_SELECT_CURRENT_CERT:
+        return ssl_cert_select_current(s->cert, (X509 *)parg);
+
+    case SSL_CTRL_SET_CURRENT_CERT:
+        if (larg == SSL_CERT_SET_SERVER) {
+            CERT_PKEY *cpk;
+            const SSL_CIPHER *cipher;
+            if (!s->server)
+                return 0;
+            cipher = s->s3->tmp.new_cipher;
+            if (!cipher)
+                return 0;
+            /*
+             * No certificate for unauthenticated ciphersuites or using SRP
+             * authentication
+             */
+            if (cipher->algorithm_auth & (SSL_aNULL | SSL_aSRP))
+                return 2;
+            cpk = ssl_get_server_send_pkey(s);
+            if (!cpk)
+                return 0;
+            s->cert->key = cpk;
+            return 1;
+        }
+        return ssl_cert_set_current(s->cert, larg);
+
+#ifndef OPENSSL_NO_EC
+    case SSL_CTRL_GET_CURVES:
+        {
+            unsigned char *clist;
+            size_t clistlen;
+            if (!s->session)
+                return 0;
+            clist = s->session->tlsext_ellipticcurvelist;
+            clistlen = s->session->tlsext_ellipticcurvelist_length / 2;
+            if (parg) {
+                size_t i;
+                int *cptr = parg;
+                unsigned int cid, nid;
+                for (i = 0; i < clistlen; i++) {
+                    n2s(clist, cid);
+                    nid = tls1_ec_curve_id2nid(cid);
+                    if (nid != 0)
+                        cptr[i] = nid;
+                    else
+                        cptr[i] = TLSEXT_nid_unknown | cid;
+                }
+            }
+            return (int)clistlen;
+        }
+
+    case SSL_CTRL_SET_CURVES:
+        return tls1_set_curves(&s->tlsext_ellipticcurvelist,
+                               &s->tlsext_ellipticcurvelist_length,
+                               parg, larg);
+
+    case SSL_CTRL_SET_CURVES_LIST:
+        return tls1_set_curves_list(&s->tlsext_ellipticcurvelist,
+                                    &s->tlsext_ellipticcurvelist_length,
+                                    parg);
+
+    case SSL_CTRL_GET_SHARED_CURVE:
+        return tls1_shared_curve(s, larg);
+
+# ifndef OPENSSL_NO_ECDH
+    case SSL_CTRL_SET_ECDH_AUTO:
+        s->cert->ecdh_tmp_auto = larg;
+        return 1;
+# endif
+#endif
+    case SSL_CTRL_SET_SIGALGS:
+        return tls1_set_sigalgs(s->cert, parg, larg, 0);
+
+    case SSL_CTRL_SET_SIGALGS_LIST:
+        return tls1_set_sigalgs_list(s->cert, parg, 0);
+
+    case SSL_CTRL_SET_CLIENT_SIGALGS:
+        return tls1_set_sigalgs(s->cert, parg, larg, 1);
+
+    case SSL_CTRL_SET_CLIENT_SIGALGS_LIST:
+        return tls1_set_sigalgs_list(s->cert, parg, 1);
+
+    case SSL_CTRL_GET_CLIENT_CERT_TYPES:
+        {
+            const unsigned char **pctype = parg;
+            if (s->server || !s->s3->tmp.cert_req)
+                return 0;
+            if (s->cert->ctypes) {
+                if (pctype)
+                    *pctype = s->cert->ctypes;
+                return (int)s->cert->ctype_num;
+            }
+            if (pctype)
+                *pctype = (unsigned char *)s->s3->tmp.ctype;
+            return s->s3->tmp.ctype_num;
+        }
+
+    case SSL_CTRL_SET_CLIENT_CERT_TYPES:
+        if (!s->server)
+            return 0;
+        return ssl3_set_req_cert_type(s->cert, parg, larg);
+
+    case SSL_CTRL_BUILD_CERT_CHAIN:
+        return ssl_build_cert_chain(s->cert, s->ctx->cert_store, larg);
+
+    case SSL_CTRL_SET_VERIFY_CERT_STORE:
+        return ssl_cert_set_cert_store(s->cert, parg, 0, larg);
+
+    case SSL_CTRL_SET_CHAIN_CERT_STORE:
+        return ssl_cert_set_cert_store(s->cert, parg, 1, larg);
+
+    case SSL_CTRL_GET_PEER_SIGNATURE_NID:
+        if (SSL_USE_SIGALGS(s)) {
+            if (s->session && s->session->sess_cert) {
+                const EVP_MD *sig;
+                sig = s->session->sess_cert->peer_key->digest;
+                if (sig) {
+                    *(int *)parg = EVP_MD_type(sig);
+                    return 1;
+                }
+            }
+            return 0;
+        }
+        /* Might want to do something here for other versions */
+        else
+            return 0;
+
+    case SSL_CTRL_GET_SERVER_TMP_KEY:
+        if (s->server || !s->session || !s->session->sess_cert)
+            return 0;
+        else {
+            SESS_CERT *sc;
+            EVP_PKEY *ptmp;
+            int rv = 0;
+            sc = s->session->sess_cert;
+#if !defined(OPENSSL_NO_RSA) && !defined(OPENSSL_NO_DH) && !defined(OPENSSL_NO_EC) && !defined(OPENSSL_NO_ECDH)
+            if (!sc->peer_rsa_tmp && !sc->peer_dh_tmp && !sc->peer_ecdh_tmp)
+                return 0;
+#endif
+            ptmp = EVP_PKEY_new();
+            if (!ptmp)
+                return 0;
+            if (0) ;
+#ifndef OPENSSL_NO_RSA
+            else if (sc->peer_rsa_tmp)
+                rv = EVP_PKEY_set1_RSA(ptmp, sc->peer_rsa_tmp);
+#endif
+#ifndef OPENSSL_NO_DH
+            else if (sc->peer_dh_tmp)
+                rv = EVP_PKEY_set1_DH(ptmp, sc->peer_dh_tmp);
+#endif
+#ifndef OPENSSL_NO_ECDH
+            else if (sc->peer_ecdh_tmp)
+                rv = EVP_PKEY_set1_EC_KEY(ptmp, sc->peer_ecdh_tmp);
+#endif
+            if (rv) {
+                *(EVP_PKEY **)parg = ptmp;
+                return 1;
+            }
+            EVP_PKEY_free(ptmp);
+            return 0;
+        }
+#ifndef OPENSSL_NO_EC
+    case SSL_CTRL_GET_EC_POINT_FORMATS:
+        {
+            SSL_SESSION *sess = s->session;
+            const unsigned char **pformat = parg;
+            if (!sess || !sess->tlsext_ecpointformatlist)
+                return 0;
+            *pformat = sess->tlsext_ecpointformatlist;
+            return (int)sess->tlsext_ecpointformatlist_length;
+        }
+#endif
+
     case SSL_CTRL_CHECK_PROTO_VERSION:
         /*
          * For library-internal use; checks that the current protocol is the
@@ -3606,6 +3837,47 @@ long ssl3_ctx_ctrl(SSL_CTX *ctx, int cmd, long larg, void *parg)
         ctx->srp_ctx.strength = larg;
         break;
 # endif
+
+# ifndef OPENSSL_NO_EC
+    case SSL_CTRL_SET_CURVES:
+        return tls1_set_curves(&ctx->tlsext_ellipticcurvelist,
+                               &ctx->tlsext_ellipticcurvelist_length,
+                               parg, larg);
+
+    case SSL_CTRL_SET_CURVES_LIST:
+        return tls1_set_curves_list(&ctx->tlsext_ellipticcurvelist,
+                                    &ctx->tlsext_ellipticcurvelist_length,
+                                    parg);
+#  ifndef OPENSSL_NO_ECDH
+    case SSL_CTRL_SET_ECDH_AUTO:
+        ctx->cert->ecdh_tmp_auto = larg;
+        return 1;
+#  endif
+# endif
+    case SSL_CTRL_SET_SIGALGS:
+        return tls1_set_sigalgs(ctx->cert, parg, larg, 0);
+
+    case SSL_CTRL_SET_SIGALGS_LIST:
+        return tls1_set_sigalgs_list(ctx->cert, parg, 0);
+
+    case SSL_CTRL_SET_CLIENT_SIGALGS:
+        return tls1_set_sigalgs(ctx->cert, parg, larg, 1);
+
+    case SSL_CTRL_SET_CLIENT_SIGALGS_LIST:
+        return tls1_set_sigalgs_list(ctx->cert, parg, 1);
+
+    case SSL_CTRL_SET_CLIENT_CERT_TYPES:
+        return ssl3_set_req_cert_type(ctx->cert, parg, larg);
+
+    case SSL_CTRL_BUILD_CERT_CHAIN:
+        return ssl_build_cert_chain(ctx->cert, ctx->cert_store, larg);
+
+    case SSL_CTRL_SET_VERIFY_CERT_STORE:
+        return ssl_cert_set_cert_store(ctx->cert, parg, 0, larg);
+
+    case SSL_CTRL_SET_CHAIN_CERT_STORE:
+        return ssl_cert_set_cert_store(ctx->cert, parg, 1, larg);
+
 #endif                          /* !OPENSSL_NO_TLSEXT */
 
         /* A Thawte special :-) */
@@ -3618,7 +3890,10 @@ long ssl3_ctx_ctrl(SSL_CTX *ctx, int cmd, long larg, void *parg)
         break;
 
     case SSL_CTRL_GET_EXTRA_CHAIN_CERTS:
-        *(STACK_OF(X509) **)parg = ctx->extra_certs;
+        if (ctx->extra_certs == NULL && larg == 0)
+            *(STACK_OF(X509) **)parg = ctx->cert->key->chain;
+        else
+            *(STACK_OF(X509) **)parg = ctx->extra_certs;
         break;
 
     case SSL_CTRL_CLEAR_EXTRA_CHAIN_CERTS:
@@ -3628,6 +3903,28 @@ long ssl3_ctx_ctrl(SSL_CTX *ctx, int cmd, long larg, void *parg)
         }
         break;
 
+    case SSL_CTRL_CHAIN:
+        if (larg)
+            return ssl_cert_set1_chain(ctx->cert, (STACK_OF(X509) *)parg);
+        else
+            return ssl_cert_set0_chain(ctx->cert, (STACK_OF(X509) *)parg);
+
+    case SSL_CTRL_CHAIN_CERT:
+        if (larg)
+            return ssl_cert_add1_chain_cert(ctx->cert, (X509 *)parg);
+        else
+            return ssl_cert_add0_chain_cert(ctx->cert, (X509 *)parg);
+
+    case SSL_CTRL_GET_CHAIN_CERTS:
+        *(STACK_OF(X509) **)parg = ctx->cert->key->chain;
+        break;
+
+    case SSL_CTRL_SELECT_CURRENT_CERT:
+        return ssl_cert_select_current(ctx->cert, (X509 *)parg);
+
+    case SSL_CTRL_SET_CURRENT_CERT:
+        return ssl_cert_set_current(ctx->cert, larg);
+
     default:
         return (0);
     }
@@ -3702,7 +3999,6 @@ long ssl3_ctx_callback_ctrl(SSL_CTX *ctx, int cmd, void (*fp) (void))
         break;
 # endif
 #endif
-
     default:
         return (0);
     }
@@ -3726,10 +4022,7 @@ const SSL_CIPHER *ssl3_get_cipher_by_char(const unsigned char *p)
     if (cp == NULL)
         fprintf(stderr, "Unknown cipher ID %x\n", (p[0] << 8) | p[1]);
 #endif
-    if (cp == NULL || cp->valid == 0)
-        return NULL;
-    else
-        return cp;
+    return cp;
 }
 
 int ssl3_put_cipher_by_char(const SSL_CIPHER *c, unsigned char *p)
@@ -3752,11 +4045,6 @@ SSL_CIPHER *ssl3_choose_cipher(SSL *s, STACK_OF(SSL_CIPHER) *clnt,
     SSL_CIPHER *c, *ret = NULL;
     STACK_OF(SSL_CIPHER) *prio, *allow;
     int i, ii, ok;
-#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_EC)
-    unsigned int j;
-    int ec_ok, ec_nid;
-    unsigned char ec_search1 = 0, ec_search2 = 0;
-#endif
     CERT *cert;
     unsigned long alg_k, alg_a, mask_k, mask_a, emask_k, emask_a;
 
@@ -3789,7 +4077,7 @@ SSL_CIPHER *ssl3_choose_cipher(SSL *s, STACK_OF(SSL_CIPHER) *clnt,
     }
 #endif
 
-    if (s->options & SSL_OP_CIPHER_SERVER_PREFERENCE) {
+    if (s->options & SSL_OP_CIPHER_SERVER_PREFERENCE || tls1_suiteb(s)) {
         prio = srvr;
         allow = clnt;
     } else {
@@ -3797,12 +4085,13 @@ SSL_CIPHER *ssl3_choose_cipher(SSL *s, STACK_OF(SSL_CIPHER) *clnt,
         allow = srvr;
     }
 
+    tls1_set_cert_validity(s);
+
     for (i = 0; i < sk_SSL_CIPHER_num(prio); i++) {
         c = sk_SSL_CIPHER_value(prio, i);
 
-        /* Skip TLS v1.2 only ciphersuites if lower than v1.2 */
-        if ((c->algorithm_ssl & SSL_TLSV1_2) &&
-            (TLS1_get_version(s) < TLS1_2_VERSION))
+        /* Skip TLS v1.2 only ciphersuites if not supported */
+        if ((c->algorithm_ssl & SSL_TLSV1_2) && !SSL_USE_TLS1_2_CIPHERS(s))
             continue;
 
         ssl_set_cert_masks(cert, c);
@@ -3857,194 +4146,13 @@ SSL_CIPHER *ssl3_choose_cipher(SSL *s, STACK_OF(SSL_CIPHER) *clnt,
 
 #ifndef OPENSSL_NO_TLSEXT
 # ifndef OPENSSL_NO_EC
-        if (
-               /*
-                * if we are considering an ECC cipher suite that uses our
-                * certificate
-                */
-               (alg_a & SSL_aECDSA || alg_a & SSL_aECDH)
-               /* and we have an ECC certificate */
-               && (s->cert->pkeys[SSL_PKEY_ECC].x509 != NULL)
-               /*
-                * and the client specified a Supported Point Formats
-                * extension
-                */
-               && ((s->session->tlsext_ecpointformatlist_length > 0)
-                   && (s->session->tlsext_ecpointformatlist != NULL))
-               /* and our certificate's point is compressed */
-               && ((s->cert->pkeys[SSL_PKEY_ECC].x509->cert_info != NULL)
-                   && (s->cert->pkeys[SSL_PKEY_ECC].x509->cert_info->key !=
-                       NULL)
-                   && (s->cert->pkeys[SSL_PKEY_ECC].x509->cert_info->
-                       key->public_key != NULL)
-                   && (s->cert->pkeys[SSL_PKEY_ECC].x509->cert_info->
-                       key->public_key->data != NULL)
-                   &&
-                   ((*
-                     (s->cert->pkeys[SSL_PKEY_ECC].x509->cert_info->
-                      key->public_key->data) == POINT_CONVERSION_COMPRESSED)
-                    ||
-                    (*
-                     (s->cert->pkeys[SSL_PKEY_ECC].x509->cert_info->
-                      key->public_key->data) ==
-                     POINT_CONVERSION_COMPRESSED + 1)
-                   )
-               )
-            ) {
-            ec_ok = 0;
-            /*
-             * if our certificate's curve is over a field type that the
-             * client does not support then do not allow this cipher suite to
-             * be negotiated
-             */
-            if ((s->cert->pkeys[SSL_PKEY_ECC].privatekey->pkey.ec != NULL)
-                && (s->cert->pkeys[SSL_PKEY_ECC].privatekey->pkey.ec->group !=
-                    NULL)
-                && (s->cert->pkeys[SSL_PKEY_ECC].privatekey->pkey.ec->
-                    group->meth != NULL)
-                &&
-                (EC_METHOD_get_field_type
-                 (s->cert->pkeys[SSL_PKEY_ECC].privatekey->pkey.ec->
-                  group->meth) == NID_X9_62_prime_field)
-                ) {
-                for (j = 0; j < s->session->tlsext_ecpointformatlist_length;
-                     j++) {
-                    if (s->session->tlsext_ecpointformatlist[j] ==
-                        TLSEXT_ECPOINTFORMAT_ansiX962_compressed_prime) {
-                        ec_ok = 1;
-                        break;
-                    }
-                }
-            } else
-                if (EC_METHOD_get_field_type
-                    (s->cert->pkeys[SSL_PKEY_ECC].privatekey->pkey.ec->
-                     group->meth) == NID_X9_62_characteristic_two_field) {
-                for (j = 0; j < s->session->tlsext_ecpointformatlist_length;
-                     j++) {
-                    if (s->session->tlsext_ecpointformatlist[j] ==
-                        TLSEXT_ECPOINTFORMAT_ansiX962_compressed_char2) {
-                        ec_ok = 1;
-                        break;
-                    }
-                }
-            }
-            ok = ok && ec_ok;
-        }
-        if (
-               /*
-                * if we are considering an ECC cipher suite that uses our
-                * certificate
-                */
-               (alg_a & SSL_aECDSA || alg_a & SSL_aECDH)
-               /* and we have an ECC certificate */
-               && (s->cert->pkeys[SSL_PKEY_ECC].x509 != NULL)
-               /*
-                * and the client specified an EllipticCurves extension
-                */
-               && ((s->session->tlsext_ellipticcurvelist_length > 0)
-                   && (s->session->tlsext_ellipticcurvelist != NULL))
-            ) {
-            ec_ok = 0;
-            if ((s->cert->pkeys[SSL_PKEY_ECC].privatekey->pkey.ec != NULL)
-                && (s->cert->pkeys[SSL_PKEY_ECC].privatekey->pkey.ec->group !=
-                    NULL)
-                ) {
-                ec_nid =
-                    EC_GROUP_get_curve_name(s->cert->
-                                            pkeys[SSL_PKEY_ECC].privatekey->
-                                            pkey.ec->group);
-                if ((ec_nid == 0)
-                    && (s->cert->pkeys[SSL_PKEY_ECC].privatekey->pkey.
-                        ec->group->meth != NULL)
-                    ) {
-                    if (EC_METHOD_get_field_type
-                        (s->cert->pkeys[SSL_PKEY_ECC].privatekey->pkey.
-                         ec->group->meth) == NID_X9_62_prime_field) {
-                        ec_search1 = 0xFF;
-                        ec_search2 = 0x01;
-                    } else
-                        if (EC_METHOD_get_field_type
-                            (s->cert->pkeys[SSL_PKEY_ECC].privatekey->
-                             pkey.ec->group->meth) ==
-                            NID_X9_62_characteristic_two_field) {
-                        ec_search1 = 0xFF;
-                        ec_search2 = 0x02;
-                    }
-                } else {
-                    ec_search1 = 0x00;
-                    ec_search2 = tls1_ec_nid2curve_id(ec_nid);
-                }
-                if ((ec_search1 != 0) || (ec_search2 != 0)) {
-                    for (j = 0;
-                         j < s->session->tlsext_ellipticcurvelist_length / 2;
-                         j++) {
-                        if ((s->session->tlsext_ellipticcurvelist[2 * j] ==
-                             ec_search1)
-                            && (s->session->tlsext_ellipticcurvelist[2 * j +
-                                                                     1] ==
-                                ec_search2)) {
-                            ec_ok = 1;
-                            break;
-                        }
-                    }
-                }
-            }
-            ok = ok && ec_ok;
-        }
 #  ifndef OPENSSL_NO_ECDH
-        if (
-               /*
-                * if we are considering an ECC cipher suite that uses an
-                * ephemeral EC key
-                */
-               (alg_k & SSL_kEECDH)
-               /* and we have an ephemeral EC key */
-               && (s->cert->ecdh_tmp != NULL)
-               /*
-                * and the client specified an EllipticCurves extension
-                */
-               && ((s->session->tlsext_ellipticcurvelist_length > 0)
-                   && (s->session->tlsext_ellipticcurvelist != NULL))
-            ) {
-            ec_ok = 0;
-            if (s->cert->ecdh_tmp->group != NULL) {
-                ec_nid = EC_GROUP_get_curve_name(s->cert->ecdh_tmp->group);
-                if ((ec_nid == 0)
-                    && (s->cert->ecdh_tmp->group->meth != NULL)
-                    ) {
-                    if (EC_METHOD_get_field_type
-                        (s->cert->ecdh_tmp->group->meth) ==
-                        NID_X9_62_prime_field) {
-                        ec_search1 = 0xFF;
-                        ec_search2 = 0x01;
-                    } else
-                        if (EC_METHOD_get_field_type
-                            (s->cert->ecdh_tmp->group->meth) ==
-                            NID_X9_62_characteristic_two_field) {
-                        ec_search1 = 0xFF;
-                        ec_search2 = 0x02;
-                    }
-                } else {
-                    ec_search1 = 0x00;
-                    ec_search2 = tls1_ec_nid2curve_id(ec_nid);
-                }
-                if ((ec_search1 != 0) || (ec_search2 != 0)) {
-                    for (j = 0;
-                         j < s->session->tlsext_ellipticcurvelist_length / 2;
-                         j++) {
-                        if ((s->session->tlsext_ellipticcurvelist[2 * j] ==
-                             ec_search1)
-                            && (s->session->tlsext_ellipticcurvelist[2 * j +
-                                                                     1] ==
-                                ec_search2)) {
-                            ec_ok = 1;
-                            break;
-                        }
-                    }
-                }
-            }
-            ok = ok && ec_ok;
-        }
+        /*
+         * if we are considering an ECC cipher suite that uses an ephemeral
+         * EC key check it
+         */
+        if (alg_k & SSL_kEECDH)
+            ok = ok && tls1_check_ec_tmp_key(s, c->id);
 #  endif                        /* OPENSSL_NO_ECDH */
 # endif                         /* OPENSSL_NO_EC */
 #endif                          /* OPENSSL_NO_TLSEXT */
@@ -4071,8 +4179,41 @@ SSL_CIPHER *ssl3_choose_cipher(SSL *s, STACK_OF(SSL_CIPHER) *clnt,
 int ssl3_get_req_cert_type(SSL *s, unsigned char *p)
 {
     int ret = 0;
+    const unsigned char *sig;
+    size_t i, siglen;
+    int have_rsa_sign = 0, have_dsa_sign = 0;
+#ifndef OPENSSL_NO_ECDSA
+    int have_ecdsa_sign = 0;
+#endif
+    int nostrict = 1;
     unsigned long alg_k;
 
+    /* If we have custom certificate types set, use them */
+    if (s->cert->ctypes) {
+        memcpy(p, s->cert->ctypes, s->cert->ctype_num);
+        return (int)s->cert->ctype_num;
+    }
+    /* get configured sigalgs */
+    siglen = tls12_get_psigalgs(s, &sig);
+    if (s->cert->cert_flags & SSL_CERT_FLAGS_CHECK_TLS_STRICT)
+        nostrict = 0;
+    for (i = 0; i < siglen; i += 2, sig += 2) {
+        switch (sig[1]) {
+        case TLSEXT_signature_rsa:
+            have_rsa_sign = 1;
+            break;
+
+        case TLSEXT_signature_dsa:
+            have_dsa_sign = 1;
+            break;
+#ifndef OPENSSL_NO_ECDSA
+        case TLSEXT_signature_ecdsa:
+            have_ecdsa_sign = 1;
+            break;
+#endif
+        }
+    }
+
     alg_k = s->s3->tmp.new_cipher->algorithm_mkey;
 
 #ifndef OPENSSL_NO_GOST
@@ -4088,10 +4229,16 @@ int ssl3_get_req_cert_type(SSL *s, unsigned char *p)
 #ifndef OPENSSL_NO_DH
     if (alg_k & (SSL_kDHr | SSL_kEDH)) {
 # ifndef OPENSSL_NO_RSA
-        p[ret++] = SSL3_CT_RSA_FIXED_DH;
+        /*
+         * Since this refers to a certificate signed with an RSA algorithm,
+         * only check for rsa signing in strict mode.
+         */
+        if (nostrict || have_rsa_sign)
+            p[ret++] = SSL3_CT_RSA_FIXED_DH;
 # endif
 # ifndef OPENSSL_NO_DSA
-        p[ret++] = SSL3_CT_DSS_FIXED_DH;
+        if (nostrict || have_dsa_sign)
+            p[ret++] = SSL3_CT_DSS_FIXED_DH;
 # endif
     }
     if ((s->version == SSL3_VERSION) &&
@@ -4105,15 +4252,19 @@ int ssl3_get_req_cert_type(SSL *s, unsigned char *p)
     }
 #endif                          /* !OPENSSL_NO_DH */
 #ifndef OPENSSL_NO_RSA
-    p[ret++] = SSL3_CT_RSA_SIGN;
+    if (have_rsa_sign)
+        p[ret++] = SSL3_CT_RSA_SIGN;
 #endif
 #ifndef OPENSSL_NO_DSA
-    p[ret++] = SSL3_CT_DSS_SIGN;
+    if (have_dsa_sign)
+        p[ret++] = SSL3_CT_DSS_SIGN;
 #endif
 #ifndef OPENSSL_NO_ECDH
     if ((alg_k & (SSL_kECDHr | SSL_kECDHe)) && (s->version >= TLS1_VERSION)) {
-        p[ret++] = TLS_CT_RSA_FIXED_ECDH;
-        p[ret++] = TLS_CT_ECDSA_FIXED_ECDH;
+        if (nostrict || have_rsa_sign)
+            p[ret++] = TLS_CT_RSA_FIXED_ECDH;
+        if (nostrict || have_ecdsa_sign)
+            p[ret++] = TLS_CT_ECDSA_FIXED_ECDH;
     }
 #endif
 
@@ -4123,12 +4274,31 @@ int ssl3_get_req_cert_type(SSL *s, unsigned char *p)
      * need to check for SSL_kECDH or SSL_kEECDH
      */
     if (s->version >= TLS1_VERSION) {
-        p[ret++] = TLS_CT_ECDSA_SIGN;
+        if (have_ecdsa_sign)
+            p[ret++] = TLS_CT_ECDSA_SIGN;
     }
 #endif
     return (ret);
 }
 
+static int ssl3_set_req_cert_type(CERT *c, const unsigned char *p, size_t len)
+{
+    if (c->ctypes) {
+        OPENSSL_free(c->ctypes);
+        c->ctypes = NULL;
+    }
+    if (!p || !len)
+        return 1;
+    if (len > 0xff)
+        return 0;
+    c->ctypes = OPENSSL_malloc(len);
+    if (!c->ctypes)
+        return 0;
+    memcpy(c->ctypes, p, len);
+    c->ctype_num = len;
+    return 1;
+}
+
 int ssl3_shutdown(SSL *s)
 {
     int ret;
@@ -4310,14 +4480,14 @@ int ssl3_renegotiate_check(SSL *s)
 }
 
 /*
- * If we are using TLS v1.2 or later and default SHA1+MD5 algorithms switch
- * to new SHA256 PRF and handshake macs
+ * If we are using default SHA1+MD5 algorithms switch to new SHA256 PRF and
+ * handshake macs if required.
  */
 long ssl_get_algorithm2(SSL *s)
 {
     long alg2 = s->s3->tmp.new_cipher->algorithm2;
-    if (s->method->version == TLS1_2_VERSION &&
-        alg2 == (SSL_HANDSHAKE_MAC_DEFAULT | TLS1_PRF))
+    if (s->method->ssl3_enc->enc_flags & SSL_ENC_FLAG_SHA256_PRF
+        && alg2 == (SSL_HANDSHAKE_MAC_DEFAULT | TLS1_PRF))
         return SSL_HANDSHAKE_MAC_SHA256 | TLS1_PRF_SHA256;
     return alg2;
 }
index 25cf929..603c285 100644 (file)
 #include <openssl/buffer.h>
 #include <openssl/rand.h>
 
+#ifndef  EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK
+# define EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK 0
+#endif
+
+#if     defined(OPENSSL_SMALL_FOOTPRINT) || \
+        !(      defined(AES_ASM) &&     ( \
+                defined(__x86_64)       || defined(__x86_64__)  || \
+                defined(_M_AMD64)       || defined(_M_X64)      || \
+                defined(__INTEL__)      ) \
+        )
+# undef EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK
+# define EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK 0
+#endif
+
 static int do_ssl3_write(SSL *s, int type, const unsigned char *buf,
                          unsigned int len, int create_empty_fragment);
 static int ssl3_get_record(SSL *s);
@@ -183,7 +197,7 @@ int ssl3_read_n(SSL *s, int n, int max, int extend)
      * operation returns the whole packet at once (as long as it fits into
      * the buffer).
      */
-    if (SSL_version(s) == DTLS1_VERSION || SSL_version(s) == DTLS1_BAD_VER) {
+    if (SSL_IS_DTLS(s)) {
         if (left == 0 && extend)
             return 0;
         if (left > 0 && n > left)
@@ -246,9 +260,7 @@ int ssl3_read_n(SSL *s, int n, int max, int extend)
 
         if (i <= 0) {
             rb->left = left;
-            if (s->mode & SSL_MODE_RELEASE_BUFFERS &&
-                SSL_version(s) != DTLS1_VERSION
-                && SSL_version(s) != DTLS1_BAD_VER)
+            if (s->mode & SSL_MODE_RELEASE_BUFFERS && !SSL_IS_DTLS(s))
                 if (len + left == 0)
                     ssl3_release_read_buffer(s);
             return (i);
@@ -259,8 +271,7 @@ int ssl3_read_n(SSL *s, int n, int max, int extend)
          * underlying transport protocol is message oriented as opposed to
          * byte oriented as in the TLS case.
          */
-        if (SSL_version(s) == DTLS1_VERSION
-            || SSL_version(s) == DTLS1_BAD_VER) {
+        if (SSL_IS_DTLS(s)) {
             if (n > left)
                 n = left;       /* makes the while condition false */
         }
@@ -331,6 +342,9 @@ static int ssl3_get_record(SSL *s)
         s->rstate = SSL_ST_READ_BODY;
 
         p = s->packet;
+        if (s->msg_callback)
+            s->msg_callback(0, 0, SSL3_RT_HEADER, p, 5, s,
+                            s->msg_callback_arg);
 
         /* Pull apart the header into the SSL3_RECORD */
         rr->type = *(p++);
@@ -621,8 +635,13 @@ int ssl3_do_compress(SSL *ssl)
 int ssl3_write_bytes(SSL *s, int type, const void *buf_, int len)
 {
     const unsigned char *buf = buf_;
+    int tot;
     unsigned int n, nw;
-    int i, tot;
+#if !defined(OPENSSL_NO_MULTIBLOCK) && EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK
+    unsigned int max_send_fragment;
+#endif
+    SSL3_BUFFER *wb = &(s->s3->wbuf);
+    int i;
 
     s->rwstate = SSL_NOTHING;
     OPENSSL_assert(s->s3->wnum <= INT_MAX);
@@ -653,6 +672,154 @@ int ssl3_write_bytes(SSL *s, int type, const void *buf_, int len)
         return (-1);
     }
 
+    /*
+     * first check if there is a SSL3_BUFFER still being written out.  This
+     * will happen with non blocking IO
+     */
+    if (wb->left != 0) {
+        i = ssl3_write_pending(s, type, &buf[tot], s->s3->wpend_tot);
+        if (i <= 0) {
+            /* XXX should we ssl3_release_write_buffer if i<0? */
+            s->s3->wnum = tot;
+            return i;
+        }
+        tot += i;               /* this might be last fragment */
+    }
+#if !defined(OPENSSL_NO_MULTIBLOCK) && EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK
+    /*
+     * Depending on platform multi-block can deliver several *times*
+     * better performance. Downside is that it has to allocate
+     * jumbo buffer to accomodate up to 8 records, but the
+     * compromise is considered worthy.
+     */
+    if (type == SSL3_RT_APPLICATION_DATA &&
+        len >= 4 * (int)(max_send_fragment = s->max_send_fragment) &&
+        s->compress == NULL && s->msg_callback == NULL &&
+        SSL_USE_EXPLICIT_IV(s) &&
+        EVP_CIPHER_flags(s->enc_write_ctx->cipher) &
+        EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK) {
+        unsigned char aad[13];
+        EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM mb_param;
+        int packlen;
+
+        /* minimize address aliasing conflicts */
+        if ((max_send_fragment & 0xfff) == 0)
+            max_send_fragment -= 512;
+
+        if (tot == 0 || wb->buf == NULL) { /* allocate jumbo buffer */
+            ssl3_release_write_buffer(s);
+
+            packlen = EVP_CIPHER_CTX_ctrl(s->enc_write_ctx,
+                                          EVP_CTRL_TLS1_1_MULTIBLOCK_MAX_BUFSIZE,
+                                          max_send_fragment, NULL);
+
+            if (len >= 8 * (int)max_send_fragment)
+                packlen *= 8;
+            else
+                packlen *= 4;
+
+            wb->buf = OPENSSL_malloc(packlen);
+            if (!wb->buf) {
+                SSLerr(SSL_F_SSL3_WRITE_BYTES, ERR_R_MALLOC_FAILURE);
+                return -1;
+            }
+            wb->len = packlen;
+        } else if (tot == len) { /* done? */
+            OPENSSL_free(wb->buf); /* free jumbo buffer */
+            wb->buf = NULL;
+            return tot;
+        }
+
+        n = (len - tot);
+        for (;;) {
+            if (n < 4 * max_send_fragment) {
+                OPENSSL_free(wb->buf); /* free jumbo buffer */
+                wb->buf = NULL;
+                break;
+            }
+
+            if (s->s3->alert_dispatch) {
+                i = s->method->ssl_dispatch_alert(s);
+                if (i <= 0) {
+                    s->s3->wnum = tot;
+                    return i;
+                }
+            }
+
+            if (n >= 8 * max_send_fragment)
+                nw = max_send_fragment * (mb_param.interleave = 8);
+            else
+                nw = max_send_fragment * (mb_param.interleave = 4);
+
+            memcpy(aad, s->s3->write_sequence, 8);
+            aad[8] = type;
+            aad[9] = (unsigned char)(s->version >> 8);
+            aad[10] = (unsigned char)(s->version);
+            aad[11] = 0;
+            aad[12] = 0;
+            mb_param.out = NULL;
+            mb_param.inp = aad;
+            mb_param.len = nw;
+
+            packlen = EVP_CIPHER_CTX_ctrl(s->enc_write_ctx,
+                                          EVP_CTRL_TLS1_1_MULTIBLOCK_AAD,
+                                          sizeof(mb_param), &mb_param);
+
+            if (packlen <= 0 || packlen > (int)wb->len) { /* never happens */
+                OPENSSL_free(wb->buf); /* free jumbo buffer */
+                wb->buf = NULL;
+                break;
+            }
+
+            mb_param.out = wb->buf;
+            mb_param.inp = &buf[tot];
+            mb_param.len = nw;
+
+            if (EVP_CIPHER_CTX_ctrl(s->enc_write_ctx,
+                                    EVP_CTRL_TLS1_1_MULTIBLOCK_ENCRYPT,
+                                    sizeof(mb_param), &mb_param) <= 0)
+                return -1;
+
+            s->s3->write_sequence[7] += mb_param.interleave;
+            if (s->s3->write_sequence[7] < mb_param.interleave) {
+                int j = 6;
+                while (j >= 0 && (++s->s3->write_sequence[j--]) == 0) ;
+            }
+
+            wb->offset = 0;
+            wb->left = packlen;
+
+            s->s3->wpend_tot = nw;
+            s->s3->wpend_buf = &buf[tot];
+            s->s3->wpend_type = type;
+            s->s3->wpend_ret = nw;
+
+            i = ssl3_write_pending(s, type, &buf[tot], nw);
+            if (i <= 0) {
+                if (i < 0 && (!s->wbio || !BIO_should_retry(s->wbio))) {
+                    OPENSSL_free(wb->buf);
+                    wb->buf = NULL;
+                }
+                s->s3->wnum = tot;
+                return i;
+            }
+            if (i == (int)n) {
+                OPENSSL_free(wb->buf); /* free jumbo buffer */
+                wb->buf = NULL;
+                return tot + i;
+            }
+            n -= i;
+            tot += i;
+        }
+    } else
+#endif
+    if (tot == len) {           /* done? */
+        if (s->mode & SSL_MODE_RELEASE_BUFFERS && !SSL_IS_DTLS(s))
+            ssl3_release_write_buffer(s);
+
+        return tot;
+    }
+
     n = (len - tot);
     for (;;) {
         if (n > s->max_send_fragment)
@@ -662,6 +829,7 @@ int ssl3_write_bytes(SSL *s, int type, const void *buf_, int len)
 
         i = do_ssl3_write(s, type, &(buf[tot]), nw, 0);
         if (i <= 0) {
+            /* XXX should we ssl3_release_write_buffer if i<0? */
             s->s3->wnum = tot;
             return i;
         }
@@ -675,6 +843,10 @@ int ssl3_write_bytes(SSL *s, int type, const void *buf_, int len)
              */
             s->s3->empty_fragment_done = 0;
 
+            if ((i == (int)n) && s->mode & SSL_MODE_RELEASE_BUFFERS &&
+                !SSL_IS_DTLS(s))
+                ssl3_release_write_buffer(s);
+
             return tot + i;
         }
 
@@ -809,8 +981,8 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf,
     /* field where we are to write out packet length */
     plen = p;
     p += 2;
-    /* Explicit IV length, block ciphers and TLS version 1.1 or later */
-    if (s->enc_write_ctx && s->version >= TLS1_1_VERSION) {
+    /* Explicit IV length, block ciphers appropriate version flag */
+    if (s->enc_write_ctx && SSL_USE_EXPLICIT_IV(s)) {
         int mode = EVP_CIPHER_CTX_mode(s->enc_write_ctx);
         if (mode == EVP_CIPH_CBC_MODE) {
             eivlen = EVP_CIPHER_CTX_iv_length(s->enc_write_ctx);
@@ -873,6 +1045,10 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf,
     /* record length after mac and block padding */
     s2n(wr->length, plen);
 
+    if (s->msg_callback)
+        s->msg_callback(1, 0, SSL3_RT_HEADER, plen - 5, 5, s,
+                        s->msg_callback_arg);
+
     /*
      * we should now have wr->data pointing to the encrypted data, which is
      * wr->length long
@@ -936,10 +1112,6 @@ int ssl3_write_pending(SSL *s, int type, const unsigned char *buf,
         if (i == wb->left) {
             wb->left = 0;
             wb->offset += i;
-            if (s->mode & SSL_MODE_RELEASE_BUFFERS &&
-                SSL_version(s) != DTLS1_VERSION
-                && SSL_version(s) != DTLS1_BAD_VER)
-                ssl3_release_write_buffer(s);
             s->rwstate = SSL_NOTHING;
             return (s->s3->wpend_ret);
         } else if (i <= 0) {
index 3a5f71d..acd3b9e 100644 (file)
@@ -293,7 +293,7 @@ int ssl3_accept(SSL *s)
             }
 
             s->init_num = 0;
-            s->s3->flags &= ~SSL3_FLAGS_SGC_RESTART_DONE;
+            s->s3->flags &= ~TLS1_FLAGS_SKIP_CERT_VERIFY;
             s->s3->flags &= ~SSL3_FLAGS_CCS_OK;
             /*
              * Should have been reset by ssl3_get_finished, too.
@@ -360,12 +360,12 @@ int ssl3_accept(SSL *s)
         case SSL3_ST_SR_CLNT_HELLO_C:
 
             s->shutdown = 0;
-            if (s->rwstate != SSL_X509_LOOKUP) {
-                ret = ssl3_get_client_hello(s);
-                if (ret <= 0)
-                    goto end;
-            }
+            ret = ssl3_get_client_hello(s);
+            if (ret <= 0)
+                goto end;
 #ifndef OPENSSL_NO_SRP
+            s->state = SSL3_ST_SR_CLNT_HELLO_D;
+        case SSL3_ST_SR_CLNT_HELLO_D:
             {
                 int al;
                 if ((ret = ssl_check_srp_ext_ClientHello(s, &al)) < 0) {
@@ -477,7 +477,7 @@ int ssl3_accept(SSL *s)
                 /* SRP: send ServerKeyExchange */
                 || (alg_k & SSL_kSRP)
 #endif
-                || (alg_k & (SSL_kDHr | SSL_kDHd | SSL_kEDH))
+                || (alg_k & SSL_kEDH)
                 || (alg_k & SSL_kEECDH)
                 || ((alg_k & SSL_kRSA)
                     && (s->cert->pkeys[SSL_PKEY_RSA_ENC].privatekey == NULL
@@ -590,21 +590,13 @@ int ssl3_accept(SSL *s)
 
         case SSL3_ST_SR_CERT_A:
         case SSL3_ST_SR_CERT_B:
-            /* Check for second client hello (MS SGC) */
-            ret = ssl3_check_client_hello(s);
-            if (ret <= 0)
-                goto end;
-            if (ret == 2)
-                s->state = SSL3_ST_SR_CLNT_HELLO_C;
-            else {
-                if (s->s3->tmp.cert_request) {
-                    ret = ssl3_get_client_certificate(s);
-                    if (ret <= 0)
-                        goto end;
-                }
-                s->init_num = 0;
-                s->state = SSL3_ST_SR_KEY_EXCH_A;
+            if (s->s3->tmp.cert_request) {
+                ret = ssl3_get_client_certificate(s);
+                if (ret <= 0)
+                    goto end;
             }
+            s->init_num = 0;
+            s->state = SSL3_ST_SR_KEY_EXCH_A;
             break;
 
         case SSL3_ST_SR_KEY_EXCH_A:
@@ -628,13 +620,13 @@ int ssl3_accept(SSL *s)
                     s->state = SSL3_ST_SR_FINISHED_A;
 #endif
                 s->init_num = 0;
-            } else if (TLS1_get_version(s) >= TLS1_2_VERSION) {
+            } else if (SSL_USE_SIGALGS(s)) {
                 s->state = SSL3_ST_SR_CERT_VRFY_A;
                 s->init_num = 0;
                 if (!s->session->peer)
                     break;
                 /*
-                 * For TLS v1.2 freeze the handshake buffer at this point and
+                 * For sigalgs freeze the handshake buffer at this point and
                  * digest cached records.
                  */
                 if (!s->s3->handshake_buffer) {
@@ -898,86 +890,33 @@ int ssl3_accept(SSL *s)
 
 int ssl3_send_hello_request(SSL *s)
 {
-    unsigned char *p;
 
     if (s->state == SSL3_ST_SW_HELLO_REQ_A) {
-        p = (unsigned char *)s->init_buf->data;
-        *(p++) = SSL3_MT_HELLO_REQUEST;
-        *(p++) = 0;
-        *(p++) = 0;
-        *(p++) = 0;
-
+        ssl_set_handshake_header(s, SSL3_MT_HELLO_REQUEST, 0);
         s->state = SSL3_ST_SW_HELLO_REQ_B;
-        /* number of bytes to write */
-        s->init_num = 4;
-        s->init_off = 0;
     }
 
     /* SSL3_ST_SW_HELLO_REQ_B */
-    return (ssl3_do_write(s, SSL3_RT_HANDSHAKE));
-}
-
-int ssl3_check_client_hello(SSL *s)
-{
-    int ok;
-    long n;
-
-    /*
-     * this function is called when we really expect a Certificate message,
-     * so permit appropriate message length
-     */
-    n = s->method->ssl_get_message(s,
-                                   SSL3_ST_SR_CERT_A,
-                                   SSL3_ST_SR_CERT_B,
-                                   -1, s->max_cert_list, &ok);
-    if (!ok)
-        return ((int)n);
-    s->s3->tmp.reuse_message = 1;
-    if (s->s3->tmp.message_type == SSL3_MT_CLIENT_HELLO) {
-        /*
-         * We only allow the client to restart the handshake once per
-         * negotiation.
-         */
-        if (s->s3->flags & SSL3_FLAGS_SGC_RESTART_DONE) {
-            SSLerr(SSL_F_SSL3_CHECK_CLIENT_HELLO,
-                   SSL_R_MULTIPLE_SGC_RESTARTS);
-            return -1;
-        }
-        /*
-         * Throw away what we have done so far in the current handshake,
-         * which will now be aborted. (A full SSL_clear would be too much.)
-         */
-#ifndef OPENSSL_NO_DH
-        if (s->s3->tmp.dh != NULL) {
-            DH_free(s->s3->tmp.dh);
-            s->s3->tmp.dh = NULL;
-        }
-#endif
-#ifndef OPENSSL_NO_ECDH
-        if (s->s3->tmp.ecdh != NULL) {
-            EC_KEY_free(s->s3->tmp.ecdh);
-            s->s3->tmp.ecdh = NULL;
-        }
-#endif
-        s->s3->flags |= SSL3_FLAGS_SGC_RESTART_DONE;
-        return 2;
-    }
-    return 1;
+    return ssl_do_write(s);
 }
 
 int ssl3_get_client_hello(SSL *s)
 {
-    int i, j, ok, al, ret = -1;
+    int i, j, ok, al = SSL_AD_INTERNAL_ERROR, ret = -1;
     unsigned int cookie_len;
     long n;
     unsigned long id;
-    unsigned char *p, *d, *q;
+    unsigned char *p, *d;
     SSL_CIPHER *c;
 #ifndef OPENSSL_NO_COMP
+    unsigned char *q;
     SSL_COMP *comp = NULL;
 #endif
     STACK_OF(SSL_CIPHER) *ciphers = NULL;
 
+    if (s->state == SSL3_ST_SR_CLNT_HELLO_C && !s->first_packet)
+        goto retry_cert;
+
     /*
      * We do this so that we will respond with our native type. If we are
      * TLSv1 and we get SSLv3, we will respond with TLSv1, This down
@@ -1016,8 +955,9 @@ int ssl3_get_client_hello(SSL *s)
     s->client_version = (((int)p[0]) << 8) | (int)p[1];
     p += 2;
 
-    if ((s->version == DTLS1_VERSION && s->client_version > s->version) ||
-        (s->version != DTLS1_VERSION && s->client_version < s->version)) {
+    if (SSL_IS_DTLS(s) ? (s->client_version > s->version &&
+                          s->method->version != DTLS_ANY_VERSION)
+        : (s->client_version < s->version)) {
         SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_WRONG_VERSION_NUMBER);
         if ((s->client_version >> 8) == SSL3_VERSION_MAJOR &&
             !s->enc_write_ctx && !s->write_hash) {
@@ -1107,7 +1047,7 @@ int ssl3_get_client_hello(SSL *s)
 
     p += j;
 
-    if (s->version == DTLS1_VERSION || s->version == DTLS1_BAD_VER) {
+    if (SSL_IS_DTLS(s)) {
         /* cookie stuff */
         if (p + 1 > d + n) {
             al = SSL_AD_DECODE_ERROR;
@@ -1155,11 +1095,36 @@ int ssl3_get_client_hello(SSL *s)
                 SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_COOKIE_MISMATCH);
                 goto f_err;
             }
-
-            ret = 2;
+            /* Set to -2 so if successful we return 2 */
+            ret = -2;
         }
 
         p += cookie_len;
+        if (s->method->version == DTLS_ANY_VERSION) {
+            /* Select version to use */
+            if (s->client_version <= DTLS1_2_VERSION &&
+                !(s->options & SSL_OP_NO_DTLSv1_2)) {
+                s->version = DTLS1_2_VERSION;
+                s->method = DTLSv1_2_server_method();
+            } else if (tls1_suiteb(s)) {
+                SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO,
+                       SSL_R_ONLY_DTLS_1_2_ALLOWED_IN_SUITEB_MODE);
+                s->version = s->client_version;
+                al = SSL_AD_PROTOCOL_VERSION;
+                goto f_err;
+            } else if (s->client_version <= DTLS1_VERSION &&
+                       !(s->options & SSL_OP_NO_DTLSv1)) {
+                s->version = DTLS1_VERSION;
+                s->method = DTLSv1_server_method();
+            } else {
+                SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO,
+                       SSL_R_WRONG_VERSION_NUMBER);
+                s->version = s->client_version;
+                al = SSL_AD_PROTOCOL_VERSION;
+                goto f_err;
+            }
+            s->session->ssl_version = s->version;
+        }
     }
 
     if (p + 2 > d + n) {
@@ -1248,7 +1213,9 @@ int ssl3_get_client_hello(SSL *s)
         SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_LENGTH_MISMATCH);
         goto f_err;
     }
+#ifndef OPENSSL_NO_COMP
     q = p;
+#endif
     for (j = 0; j < i; j++) {
         if (p[j] == 0)
             break;
@@ -1264,16 +1231,11 @@ int ssl3_get_client_hello(SSL *s)
 #ifndef OPENSSL_NO_TLSEXT
     /* TLS extensions */
     if (s->version >= SSL3_VERSION) {
-        if (!ssl_parse_clienthello_tlsext(s, &p, d, n, &al)) {
-            /* 'al' set by ssl_parse_clienthello_tlsext */
+        if (!ssl_parse_clienthello_tlsext(s, &p, d, n)) {
             SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_PARSE_TLSEXT);
-            goto f_err;
+            goto err;
         }
     }
-    if (ssl_check_clienthello_tlsext_early(s) <= 0) {
-        SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_CLIENTHELLO_TLSEXT);
-        goto err;
-    }
 
     /*
      * Check if we want to use external pre-shared secret for this handshake
@@ -1285,7 +1247,6 @@ int ssl3_get_client_hello(SSL *s)
         unsigned char *pos;
         pos = s->s3->server_random;
         if (ssl_fill_hello_random(s, 1, pos, SSL3_RANDOM_SIZE) <= 0) {
-            al = SSL_AD_INTERNAL_ERROR;
             goto f_err;
         }
     }
@@ -1344,7 +1305,6 @@ int ssl3_get_client_hello(SSL *s)
         /* Perform sanity checks on resumed compression algorithm */
         /* Can't disable compression */
         if (s->options & SSL_OP_NO_COMPRESSION) {
-            al = SSL_AD_INTERNAL_ERROR;
             SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO,
                    SSL_R_INCONSISTENT_COMPRESSION);
             goto f_err;
@@ -1358,7 +1318,6 @@ int ssl3_get_client_hello(SSL *s)
             }
         }
         if (s->s3->tmp.new_compression == NULL) {
-            al = SSL_AD_INTERNAL_ERROR;
             SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO,
                    SSL_R_INVALID_COMPRESSION_ALGORITHM);
             goto f_err;
@@ -1404,7 +1363,6 @@ int ssl3_get_client_hello(SSL *s)
      * using compression.
      */
     if (s->session->compress_meth != 0) {
-        al = SSL_AD_INTERNAL_ERROR;
         SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_INCONSISTENT_COMPRESSION);
         goto f_err;
     }
@@ -1429,6 +1387,25 @@ int ssl3_get_client_hello(SSL *s)
             goto f_err;
         }
         ciphers = NULL;
+        if (!tls1_set_server_sigalgs(s)) {
+            SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_CLIENTHELLO_TLSEXT);
+            goto err;
+        }
+        /* Let cert callback update server certificates if required */
+ retry_cert:
+        if (s->cert->cert_cb) {
+            int rv = s->cert->cert_cb(s, s->cert->cert_cb_arg);
+            if (rv == 0) {
+                al = SSL_AD_INTERNAL_ERROR;
+                SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_CERT_CB_ERROR);
+                goto f_err;
+            }
+            if (rv < 0) {
+                s->rwstate = SSL_X509_LOOKUP;
+                return -1;
+            }
+            s->rwstate = SSL_NOTHING;
+        }
         c = ssl3_choose_cipher(s, s->session->ciphers, SSL_get_ciphers(s));
 
         if (c == NULL) {
@@ -1464,16 +1441,13 @@ int ssl3_get_client_hello(SSL *s)
             s->s3->tmp.new_cipher = s->session->cipher;
     }
 
-    if (TLS1_get_version(s) < TLS1_2_VERSION
-        || !(s->verify_mode & SSL_VERIFY_PEER)) {
-        if (!ssl3_digest_cached_records(s)) {
-            al = SSL_AD_INTERNAL_ERROR;
+    if (!SSL_USE_SIGALGS(s) || !(s->verify_mode & SSL_VERIFY_PEER)) {
+        if (!ssl3_digest_cached_records(s))
             goto f_err;
-        }
     }
 
     /*-
-     * we now have the following setup.
+    * we now have the following setup.
      * client_random
      * cipher_list          - our prefered list of ciphers
      * ciphers              - the clients prefered list of ciphers
@@ -1493,7 +1467,7 @@ int ssl3_get_client_hello(SSL *s)
     }
 
     if (ret < 0)
-        ret = 1;
+        ret = -ret;
     if (0) {
  f_err:
         ssl3_send_alert(s, SSL3_AL_FATAL, al);
@@ -1503,7 +1477,7 @@ int ssl3_get_client_hello(SSL *s)
 
     if (ciphers != NULL)
         sk_SSL_CIPHER_free(ciphers);
-    return (ret);
+    return ret < 0 ? -1 : ret;
 }
 
 int ssl3_send_server_hello(SSL *s)
@@ -1511,6 +1485,7 @@ int ssl3_send_server_hello(SSL *s)
     unsigned char *buf;
     unsigned char *p, *d;
     int i, sl;
+    int al = 0;
     unsigned long l;
 
     if (s->state == SSL3_ST_SW_SRVR_HELLO_A) {
@@ -1523,7 +1498,7 @@ int ssl3_send_server_hello(SSL *s)
         }
 #endif
         /* Do the message type and length last */
-        d = p = &(buf[4]);
+        d = p = ssl_handshake_start(s);
 
         *(p++) = s->version >> 8;
         *(p++) = s->version & 0xff;
@@ -1582,9 +1557,9 @@ int ssl3_send_server_hello(SSL *s)
             return -1;
         }
         if ((p =
-             ssl_add_serverhello_tlsext(s, p,
-                                        buf + SSL3_RT_MAX_PLAIN_LENGTH)) ==
-            NULL) {
+             ssl_add_serverhello_tlsext(s, p, buf + SSL3_RT_MAX_PLAIN_LENGTH,
+                                        &al)) == NULL) {
+            ssl3_send_alert(s, SSL3_AL_FATAL, al);
             SSLerr(SSL_F_SSL3_SEND_SERVER_HELLO, ERR_R_INTERNAL_ERROR);
             s->state = SSL_ST_ERR;
             return -1;
@@ -1592,41 +1567,24 @@ int ssl3_send_server_hello(SSL *s)
 #endif
         /* do the header */
         l = (p - d);
-        d = buf;
-        *(d++) = SSL3_MT_SERVER_HELLO;
-        l2n3(l, d);
-
+        ssl_set_handshake_header(s, SSL3_MT_SERVER_HELLO, l);
         s->state = SSL3_ST_SW_SRVR_HELLO_B;
-        /* number of bytes to write */
-        s->init_num = p - buf;
-        s->init_off = 0;
     }
 
     /* SSL3_ST_SW_SRVR_HELLO_B */
-    return (ssl3_do_write(s, SSL3_RT_HANDSHAKE));
+    return ssl_do_write(s);
 }
 
 int ssl3_send_server_done(SSL *s)
 {
-    unsigned char *p;
 
     if (s->state == SSL3_ST_SW_SRVR_DONE_A) {
-        p = (unsigned char *)s->init_buf->data;
-
-        /* do the header */
-        *(p++) = SSL3_MT_SERVER_DONE;
-        *(p++) = 0;
-        *(p++) = 0;
-        *(p++) = 0;
-
+        ssl_set_handshake_header(s, SSL3_MT_SERVER_DONE, 0);
         s->state = SSL3_ST_SW_SRVR_DONE_B;
-        /* number of bytes to write */
-        s->init_num = 4;
-        s->init_off = 0;
     }
 
     /* SSL3_ST_SW_SRVR_DONE_B */
-    return (ssl3_do_write(s, SSL3_RT_HANDSHAKE));
+    return ssl_do_write(s);
 }
 
 int ssl3_send_server_key_exchange(SSL *s)
@@ -1751,7 +1709,12 @@ int ssl3_send_server_key_exchange(SSL *s)
             const EC_GROUP *group;
 
             ecdhp = cert->ecdh_tmp;
-            if ((ecdhp == NULL) && (s->cert->ecdh_tmp_cb != NULL)) {
+            if (s->cert->ecdh_tmp_auto) {
+                /* Get NID of appropriate shared curve */
+                int nid = tls1_shared_curve(s, -2);
+                if (nid != NID_undef)
+                    ecdhp = EC_KEY_new_by_curve_name(nid);
+            } else if ((ecdhp == NULL) && s->cert->ecdh_tmp_cb) {
                 ecdhp = s->cert->ecdh_tmp_cb(s,
                                              SSL_C_IS_EXPORT(s->s3->
                                                              tmp.new_cipher),
@@ -1776,7 +1739,9 @@ int ssl3_send_server_key_exchange(SSL *s)
                 SSLerr(SSL_F_SSL3_SEND_SERVER_KEY_EXCHANGE, ERR_R_ECDH_LIB);
                 goto err;
             }
-            if ((ecdh = EC_KEY_dup(ecdhp)) == NULL) {
+            if (s->cert->ecdh_tmp_auto)
+                ecdh = ecdhp;
+            else if ((ecdh = EC_KEY_dup(ecdhp)) == NULL) {
                 SSLerr(SSL_F_SSL3_SEND_SERVER_KEY_EXCHANGE, ERR_R_ECDH_LIB);
                 goto err;
             }
@@ -1920,12 +1885,11 @@ int ssl3_send_server_key_exchange(SSL *s)
             kn = 0;
         }
 
-        if (!BUF_MEM_grow_clean(buf, n + 4 + kn)) {
+        if (!BUF_MEM_grow_clean(buf, n + SSL_HM_HEADER_LENGTH(s) + kn)) {
             SSLerr(SSL_F_SSL3_SEND_SERVER_KEY_EXCHANGE, ERR_LIB_BUF);
             goto err;
         }
-        d = (unsigned char *)s->init_buf->data;
-        p = &(d[4]);
+        d = p = ssl_handshake_start(s);
 
         for (i = 0; i < 4 && r[i] != NULL; i++) {
 #ifndef OPENSSL_NO_SRP
@@ -1980,8 +1944,7 @@ int ssl3_send_server_key_exchange(SSL *s)
              * points to the space at the end.
              */
 #ifndef OPENSSL_NO_RSA
-            if (pkey->type == EVP_PKEY_RSA
-                && TLS1_get_version(s) < TLS1_2_VERSION) {
+            if (pkey->type == EVP_PKEY_RSA && !SSL_USE_SIGALGS(s)) {
                 q = md_buf;
                 j = 0;
                 for (num = 2; num > 0; num--) {
@@ -1993,7 +1956,7 @@ int ssl3_send_server_key_exchange(SSL *s)
                                      SSL3_RANDOM_SIZE);
                     EVP_DigestUpdate(&md_ctx, &(s->s3->server_random[0]),
                                      SSL3_RANDOM_SIZE);
-                    EVP_DigestUpdate(&md_ctx, &(d[4]), n);
+                    EVP_DigestUpdate(&md_ctx, d, n);
                     EVP_DigestFinal_ex(&md_ctx, q, (unsigned int *)&i);
                     q += i;
                     j += i;
@@ -2008,10 +1971,8 @@ int ssl3_send_server_key_exchange(SSL *s)
             } else
 #endif
             if (md) {
-                /*
-                 * For TLS1.2 and later send signature algorithm
-                 */
-                if (TLS1_get_version(s) >= TLS1_2_VERSION) {
+                /* send signature algorithm */
+                if (SSL_USE_SIGALGS(s)) {
                     if (!tls12_get_sigandhash(p, pkey, md)) {
                         /* Should never happen */
                         al = SSL_AD_INTERNAL_ERROR;
@@ -2029,7 +1990,7 @@ int ssl3_send_server_key_exchange(SSL *s)
                                SSL3_RANDOM_SIZE);
                 EVP_SignUpdate(&md_ctx, &(s->s3->server_random[0]),
                                SSL3_RANDOM_SIZE);
-                EVP_SignUpdate(&md_ctx, &(d[4]), n);
+                EVP_SignUpdate(&md_ctx, d, n);
                 if (!EVP_SignFinal(&md_ctx, &(p[2]),
                                    (unsigned int *)&i, pkey)) {
                     SSLerr(SSL_F_SSL3_SEND_SERVER_KEY_EXCHANGE, ERR_LIB_EVP);
@@ -2037,7 +1998,7 @@ int ssl3_send_server_key_exchange(SSL *s)
                 }
                 s2n(i, p);
                 n += i + 2;
-                if (TLS1_get_version(s) >= TLS1_2_VERSION)
+                if (SSL_USE_SIGALGS(s))
                     n += 2;
             } else {
                 /* Is this error check actually needed? */
@@ -2048,19 +2009,12 @@ int ssl3_send_server_key_exchange(SSL *s)
             }
         }
 
-        *(d++) = SSL3_MT_SERVER_KEY_EXCHANGE;
-        l2n3(n, d);
-
-        /*
-         * we should now have things packed up, so lets send it off
-         */
-        s->init_num = n + 4;
-        s->init_off = 0;
+        ssl_set_handshake_header(s, SSL3_MT_SERVER_KEY_EXCHANGE, n);
     }
 
     s->state = SSL3_ST_SW_KEY_EXCH_B;
     EVP_MD_CTX_cleanup(&md_ctx);
-    return (ssl3_do_write(s, SSL3_RT_HANDSHAKE));
+    return ssl_do_write(s);
  f_err:
     ssl3_send_alert(s, SSL3_AL_FATAL, al);
  err:
@@ -2085,7 +2039,7 @@ int ssl3_send_certificate_request(SSL *s)
     if (s->state == SSL3_ST_SW_CERT_REQ_A) {
         buf = s->init_buf;
 
-        d = p = (unsigned char *)&(buf->data[4]);
+        d = p = ssl_handshake_start(s);
 
         /* get the list of acceptable cert types */
         p++;
@@ -2094,10 +2048,12 @@ int ssl3_send_certificate_request(SSL *s)
         p += n;
         n++;
 
-        if (TLS1_get_version(s) >= TLS1_2_VERSION) {
-            nl = tls12_get_req_sig_algs(s, p + 2);
+        if (SSL_USE_SIGALGS(s)) {
+            const unsigned char *psigs;
+            nl = tls12_get_psigalgs(s, &psigs);
             s2n(nl, p);
-            p += nl + 2;
+            memcpy(p, psigs, nl);
+            p += nl;
             n += nl + 2;
         }
 
@@ -2111,12 +2067,13 @@ int ssl3_send_certificate_request(SSL *s)
             for (i = 0; i < sk_X509_NAME_num(sk); i++) {
                 name = sk_X509_NAME_value(sk, i);
                 j = i2d_X509_NAME(name, NULL);
-                if (!BUF_MEM_grow_clean(buf, 4 + n + j + 2)) {
+                if (!BUF_MEM_grow_clean
+                    (buf, SSL_HM_HEADER_LENGTH(s) + n + j + 2)) {
                     SSLerr(SSL_F_SSL3_SEND_CERTIFICATE_REQUEST,
                            ERR_R_BUF_LIB);
                     goto err;
                 }
-                p = (unsigned char *)&(buf->data[4 + n]);
+                p = ssl_handshake_start(s) + n;
                 if (!(s->options & SSL_OP_NETSCAPE_CA_DN_BUG)) {
                     s2n(j, p);
                     i2d_X509_NAME(name, &p);
@@ -2134,39 +2091,32 @@ int ssl3_send_certificate_request(SSL *s)
             }
         }
         /* else no CA names */
-        p = (unsigned char *)&(buf->data[4 + off]);
+        p = ssl_handshake_start(s) + off;
         s2n(nl, p);
 
-        d = (unsigned char *)buf->data;
-        *(d++) = SSL3_MT_CERTIFICATE_REQUEST;
-        l2n3(n, d);
+        ssl_set_handshake_header(s, SSL3_MT_CERTIFICATE_REQUEST, n);
 
-        /*
-         * we should now have things packed up, so lets send it off
-         */
-
-        s->init_num = n + 4;
-        s->init_off = 0;
 #ifdef NETSCAPE_HANG_BUG
-        if (!BUF_MEM_grow_clean(buf, s->init_num + 4)) {
-            SSLerr(SSL_F_SSL3_SEND_CERTIFICATE_REQUEST, ERR_R_BUF_LIB);
-            goto err;
+        if (!SSL_IS_DTLS(s)) {
+            if (!BUF_MEM_grow_clean(buf, s->init_num + 4)) {
+                SSLerr(SSL_F_SSL3_SEND_CERTIFICATE_REQUEST, ERR_R_BUF_LIB);
+                goto err;
+            }
+            p = (unsigned char *)s->init_buf->data + s->init_num;
+            /* do the header */
+            *(p++) = SSL3_MT_SERVER_DONE;
+            *(p++) = 0;
+            *(p++) = 0;
+            *(p++) = 0;
+            s->init_num += 4;
         }
-        p = (unsigned char *)s->init_buf->data + s->init_num;
-
-        /* do the header */
-        *(p++) = SSL3_MT_SERVER_DONE;
-        *(p++) = 0;
-        *(p++) = 0;
-        *(p++) = 0;
-        s->init_num += 4;
 #endif
 
         s->state = SSL3_ST_SW_CERT_REQ_B;
     }
 
     /* SSL3_ST_SW_CERT_REQ_B */
-    return (ssl3_do_write(s, SSL3_RT_HANDSHAKE));
+    return ssl_do_write(s);
  err:
     s->state = SSL_ST_ERR;
     return (-1);
@@ -2184,7 +2134,7 @@ int ssl3_get_client_key_exchange(SSL *s)
 #endif
 #ifndef OPENSSL_NO_DH
     BIGNUM *pub = NULL;
-    DH *dh_srvr;
+    DH *dh_srvr, *dh_clnt = NULL;
 #endif
 #ifndef OPENSSL_NO_KRB5
     KSSL_ERR kssl_err;
@@ -2355,8 +2305,20 @@ int ssl3_get_client_key_exchange(SSL *s)
 #endif
 #ifndef OPENSSL_NO_DH
     if (alg_k & (SSL_kEDH | SSL_kDHr | SSL_kDHd)) {
-        n2s(p, i);
-        if (n != i + 2) {
+        int idx = -1;
+        EVP_PKEY *skey = NULL;
+        if (n > 1) {
+            n2s(p, i);
+        } else {
+            if (alg_k & SSL_kDHE) {
+                al = SSL_AD_HANDSHAKE_FAILURE;
+                SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
+                       SSL_R_DH_PUBLIC_VALUE_LENGTH_IS_WRONG);
+                goto f_err;
+            }
+            i = 0;
+        }
+        if (n && n != i + 2) {
             if (!(s->options & SSL_OP_SSLEAY_080_CLIENT_DH_BUG)) {
                 SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
                        SSL_R_DH_PUBLIC_VALUE_LENGTH_IS_WRONG);
@@ -2366,23 +2328,45 @@ int ssl3_get_client_key_exchange(SSL *s)
                 i = (int)n;
             }
         }
-
-        if (n == 0L) {          /* the parameters are in the cert */
+        if (alg_k & SSL_kDHr)
+            idx = SSL_PKEY_DH_RSA;
+        else if (alg_k & SSL_kDHd)
+            idx = SSL_PKEY_DH_DSA;
+        if (idx >= 0) {
+            skey = s->cert->pkeys[idx].privatekey;
+            if ((skey == NULL) ||
+                (skey->type != EVP_PKEY_DH) || (skey->pkey.dh == NULL)) {
+                al = SSL_AD_HANDSHAKE_FAILURE;
+                SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
+                       SSL_R_MISSING_RSA_CERTIFICATE);
+                goto f_err;
+            }
+            dh_srvr = skey->pkey.dh;
+        } else if (s->s3->tmp.dh == NULL) {
             al = SSL_AD_HANDSHAKE_FAILURE;
             SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
-                   SSL_R_UNABLE_TO_DECODE_DH_CERTS);
+                   SSL_R_MISSING_TMP_DH_KEY);
             goto f_err;
-        } else {
-            if (s->s3->tmp.dh == NULL) {
+        } else
+            dh_srvr = s->s3->tmp.dh;
+
+        if (n == 0L) {
+            /* Get pubkey from cert */
+            EVP_PKEY *clkey = X509_get_pubkey(s->session->peer);
+            if (clkey) {
+                if (EVP_PKEY_cmp_parameters(clkey, skey) == 1)
+                    dh_clnt = EVP_PKEY_get1_DH(clkey);
+            }
+            if (dh_clnt == NULL) {
                 al = SSL_AD_HANDSHAKE_FAILURE;
                 SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE,
                        SSL_R_MISSING_TMP_DH_KEY);
                 goto f_err;
-            } else
-                dh_srvr = s->s3->tmp.dh;
-        }
-
-        pub = BN_bin2bn(p, i, NULL);
+            }
+            EVP_PKEY_free(clkey);
+            pub = dh_clnt->pub_key;
+        } else
+            pub = BN_bin2bn(p, i, NULL);
         if (pub == NULL) {
             SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE, SSL_R_BN_LIB);
             goto err;
@@ -2398,8 +2382,10 @@ int ssl3_get_client_key_exchange(SSL *s)
 
         DH_free(s->s3->tmp.dh);
         s->s3->tmp.dh = NULL;
-
-        BN_clear_free(pub);
+        if (dh_clnt)
+            DH_free(dh_clnt);
+        else
+            BN_clear_free(pub);
         pub = NULL;
         s->session->master_key_length =
             s->method->ssl3_enc->generate_master_secret(s,
@@ -2407,6 +2393,8 @@ int ssl3_get_client_key_exchange(SSL *s)
                                                         session->master_key,
                                                         p, i);
         OPENSSL_cleanse(p, i);
+        if (dh_clnt)
+            return 2;
     } else
 #endif
 #ifndef OPENSSL_NO_KRB5
@@ -3007,24 +2995,12 @@ int ssl3_get_cert_verify(SSL *s)
                     pkey->type == NID_id_GostR3410_2001)) {
         i = 64;
     } else {
-        if (TLS1_get_version(s) >= TLS1_2_VERSION) {
-            int sigalg = tls12_get_sigid(pkey);
-            /* Should never happen */
-            if (sigalg == -1) {
-                SSLerr(SSL_F_SSL3_GET_CERT_VERIFY, ERR_R_INTERNAL_ERROR);
+        if (SSL_USE_SIGALGS(s)) {
+            int rv = tls12_check_peer_sigalg(&md, s, p, pkey);
+            if (rv == -1) {
                 al = SSL_AD_INTERNAL_ERROR;
                 goto f_err;
-            }
-            /* Check key type is consistent with signature */
-            if (sigalg != (int)p[1]) {
-                SSLerr(SSL_F_SSL3_GET_CERT_VERIFY,
-                       SSL_R_WRONG_SIGNATURE_TYPE);
-                al = SSL_AD_DECODE_ERROR;
-                goto f_err;
-            }
-            md = tls12_get_hash(p[0]);
-            if (md == NULL) {
-                SSLerr(SSL_F_SSL3_GET_CERT_VERIFY, SSL_R_UNKNOWN_DIGEST);
+            } else if (rv == 0) {
                 al = SSL_AD_DECODE_ERROR;
                 goto f_err;
             }
@@ -3049,7 +3025,7 @@ int ssl3_get_cert_verify(SSL *s)
         goto f_err;
     }
 
-    if (TLS1_get_version(s) >= TLS1_2_VERSION) {
+    if (SSL_USE_SIGALGS(s)) {
         long hdatalen = 0;
         void *hdata;
         hdatalen = BIO_get_mem_data(s->s3->handshake_buffer, &hdata);
@@ -3272,7 +3248,7 @@ int ssl3_get_client_certificate(SSL *s)
         if (i <= 0) {
             al = ssl_verify_alarm_type(s->verify_result);
             SSLerr(SSL_F_SSL3_GET_CLIENT_CERTIFICATE,
-                   SSL_R_NO_CERTIFICATE_RETURNED);
+                   SSL_R_CERTIFICATE_VERIFY_FAILED);
             goto f_err;
         }
     }
@@ -3320,12 +3296,11 @@ int ssl3_get_client_certificate(SSL *s)
 
 int ssl3_send_server_certificate(SSL *s)
 {
-    unsigned long l;
-    X509 *x;
+    CERT_PKEY *cpk;
 
     if (s->state == SSL3_ST_SW_CERT_A) {
-        x = ssl_get_server_send_cert(s);
-        if (x == NULL) {
+        cpk = ssl_get_server_send_pkey(s);
+        if (cpk == NULL) {
             /* VRS: allow null cert if auth == KRB5 */
             if ((s->s3->tmp.new_cipher->algorithm_auth != SSL_aKRB5) ||
                 (s->s3->tmp.new_cipher->algorithm_mkey & SSL_kKRB5)) {
@@ -3336,19 +3311,16 @@ int ssl3_send_server_certificate(SSL *s)
             }
         }
 
-        l = ssl3_output_cert_chain(s, x);
-        if (!l) {
+        if (!ssl3_output_cert_chain(s, cpk)) {
             SSLerr(SSL_F_SSL3_SEND_SERVER_CERTIFICATE, ERR_R_INTERNAL_ERROR);
             s->state = SSL_ST_ERR;
             return (0);
         }
         s->state = SSL3_ST_SW_CERT_B;
-        s->init_num = (int)l;
-        s->init_off = 0;
     }
 
     /* SSL3_ST_SW_CERT_B */
-    return (ssl3_do_write(s, SSL3_RT_HANDSHAKE));
+    return ssl_do_write(s);
 }
 
 #ifndef OPENSSL_NO_TLSEXT
@@ -3415,22 +3387,18 @@ int ssl3_send_newsession_ticket(SSL *s)
 
         /*-
          * Grow buffer if need be: the length calculation is as
-         * follows 1 (size of message name) + 3 (message length
-         * bytes) + 4 (ticket lifetime hint) + 2 (ticket length) +
+         * follows handshake_header_length +
+         * 4 (ticket lifetime hint) + 2 (ticket length) +
          * 16 (key name) + max_iv_len (iv length) +
          * session_length + max_enc_block_size (max encrypted session
          * length) + max_md_size (HMAC).
          */
         if (!BUF_MEM_grow(s->init_buf,
-                          26 + EVP_MAX_IV_LENGTH + EVP_MAX_BLOCK_LENGTH +
-                          EVP_MAX_MD_SIZE + slen))
+                          SSL_HM_HEADER_LENGTH(s) + 22 + EVP_MAX_IV_LENGTH +
+                          EVP_MAX_BLOCK_LENGTH + EVP_MAX_MD_SIZE + slen))
             goto err;
 
-        p = (unsigned char *)s->init_buf->data;
-        /* do the header */
-        *(p++) = SSL3_MT_NEWSESSION_TICKET;
-        /* Skip message length for now */
-        p += 3;
+        p = ssl_handshake_start(s);
         /*
          * Initialize HMAC and cipher contexts. If callback present it does
          * all the work otherwise use generated values from parent ctx.
@@ -3486,21 +3454,17 @@ int ssl3_send_newsession_ticket(SSL *s)
         p += hlen;
         /* Now write out lengths: p points to end of data written */
         /* Total length */
-        len = p - (unsigned char *)s->init_buf->data;
-        p = (unsigned char *)s->init_buf->data + 1;
-        l2n3(len - 4, p);       /* Message length */
-        p += 4;
-        s2n(len - 10, p);       /* Ticket length */
-
-        /* number of bytes to write */
-        s->init_num = len;
+        len = p - ssl_handshake_start(s);
+        /* Skip ticket lifetime hint */
+        p = ssl_handshake_start(s) + 4;
+        s2n(len - 6, p);
+        ssl_set_handshake_header(s, SSL3_MT_NEWSESSION_TICKET, len);
         s->state = SSL3_ST_SW_SESSION_TICKET_B;
-        s->init_off = 0;
         OPENSSL_free(senc);
     }
 
     /* SSL3_ST_SW_SESSION_TICKET_B */
-    return (ssl3_do_write(s, SSL3_RT_HANDSHAKE));
+    return ssl_do_write(s);
  err:
     if (senc)
         OPENSSL_free(senc);
@@ -3627,4 +3591,5 @@ int ssl3_get_next_proto(SSL *s)
     return 1;
 }
 # endif
+
 #endif
index 10082c9..2279c32 100644 (file)
@@ -134,7 +134,6 @@ extern "C" {
 
 int SSL_CTX_set_tlsext_use_srtp(SSL_CTX *ctx, const char *profiles);
 int SSL_set_tlsext_use_srtp(SSL *ctx, const char *profiles);
-SRTP_PROTECTION_PROFILE *SSL_get_selected_srtp_profile(SSL *s);
 
 STACK_OF(SRTP_PROTECTION_PROFILE) *SSL_get_srtp_profiles(SSL *ssl);
 SRTP_PROTECTION_PROFILE *SSL_get_selected_srtp_profile(SSL *s);
index 7303bc4..43fea17 100644 (file)
@@ -216,13 +216,13 @@ $!
 $ LIB_SSL = "s2_meth, s2_srvr, s2_clnt, s2_lib, s2_enc, s2_pkt,"+ -
            "s3_meth, s3_srvr, s3_clnt, s3_lib, s3_enc, s3_pkt, s3_both, s3_cbc,"+ -
            "s23_meth,s23_srvr,s23_clnt,s23_lib,        s23_pkt,"+ -
-           "t1_meth, t1_srvr, t1_clnt, t1_lib, t1_enc,"+ -
+           "t1_meth, t1_srvr, t1_clnt, t1_lib, t1_enc, t1_ext,"+ -
            "d1_meth, d1_srvr, d1_clnt, d1_lib, d1_pkt,"+ -
-           "d1_both,d1_enc,d1_srtp,"+ -
+           "d1_both,d1_srtp,"+ -
            "ssl_lib,ssl_err2,ssl_cert,ssl_sess,"+ -
            "ssl_ciph,ssl_stat,ssl_rsa,"+ -
-           "ssl_asn1,ssl_txt,ssl_algs,"+ -
-           "bio_ssl,ssl_err,kssl,tls_srp,t1_reneg,ssl_utst"
+           "ssl_asn1,ssl_txt,ssl_algs,ssl_conf,"+ -
+           "bio_ssl,ssl_err,kssl,t1_reneg,tls_srp,t1_trce,ssl_utst"
 $!
 $ COMPILEWITH_CC5 = ""
 $!
@@ -860,7 +860,7 @@ $ IF F$TYPE(USER_CCFLAGS) .NES. "" THEN CCEXTRAFLAGS = USER_CCFLAGS
 $ CCDISABLEWARNINGS = "" !!! "MAYLOSEDATA3" !!! "LONGLONGTYPE,LONGLONGSUFX,FOUNDCR"
 $ IF F$TYPE(USER_CCDISABLEWARNINGS) .NES. ""
 $ THEN
-$     IF CCDISABLEWARNINGS .NES. "" THEN CCDISABLEWARNINGS = CCDISABLEWARNINGS + ","
+$     IF CCDISABLEWARNINGS .NES. THEN CCDISABLEWARNINGS = CCDISABLEWARNINGS + ","
 $     CCDISABLEWARNINGS = CCDISABLEWARNINGS + USER_CCDISABLEWARNINGS
 $ ENDIF
 $!
index d2ab0c0..6fe1a24 100644 (file)
--- a/ssl/ssl.h
+++ b/ssl/ssl.h
@@ -242,22 +242,24 @@ extern "C" {
 # define SSL_TXT_NULL            "NULL"
 
 # define SSL_TXT_kRSA            "kRSA"
-# define SSL_TXT_kDHr            "kDHr"/* no such ciphersuites supported! */
-# define SSL_TXT_kDHd            "kDHd"/* no such ciphersuites supported! */
-# define SSL_TXT_kDH             "kDH"/* no such ciphersuites supported! */
+# define SSL_TXT_kDHr            "kDHr"
+# define SSL_TXT_kDHd            "kDHd"
+# define SSL_TXT_kDH             "kDH"
 # define SSL_TXT_kEDH            "kEDH"
+# define SSL_TXT_kDHE            "kDHE"/* alias for kEDH */
 # define SSL_TXT_kKRB5           "kKRB5"
 # define SSL_TXT_kECDHr          "kECDHr"
 # define SSL_TXT_kECDHe          "kECDHe"
 # define SSL_TXT_kECDH           "kECDH"
 # define SSL_TXT_kEECDH          "kEECDH"
+# define SSL_TXT_kECDHE          "kECDHE"/* alias for kEECDH */
 # define SSL_TXT_kPSK            "kPSK"
 # define SSL_TXT_kGOST           "kGOST"
 # define SSL_TXT_kSRP            "kSRP"
 
 # define SSL_TXT_aRSA            "aRSA"
 # define SSL_TXT_aDSS            "aDSS"
-# define SSL_TXT_aDH             "aDH"/* no such ciphersuites supported! */
+# define SSL_TXT_aDH             "aDH"
 # define SSL_TXT_aECDH           "aECDH"
 # define SSL_TXT_aKRB5           "aKRB5"
 # define SSL_TXT_aECDSA          "aECDSA"
@@ -270,10 +272,12 @@ extern "C" {
 # define SSL_TXT_DSS             "DSS"
 # define SSL_TXT_DH              "DH"
 # define SSL_TXT_EDH             "EDH"/* same as "kEDH:-ADH" */
+# define SSL_TXT_DHE             "DHE"/* alias for EDH */
 # define SSL_TXT_ADH             "ADH"
 # define SSL_TXT_RSA             "RSA"
 # define SSL_TXT_ECDH            "ECDH"
 # define SSL_TXT_EECDH           "EECDH"/* same as "kEECDH:-AECDH" */
+# define SSL_TXT_ECDHE           "ECDHE"/* alias for ECDHE" */
 # define SSL_TXT_AECDH           "AECDH"
 # define SSL_TXT_ECDSA           "ECDSA"
 # define SSL_TXT_KRB5            "KRB5"
@@ -370,6 +374,8 @@ typedef struct tls_session_ticket_ext_st TLS_SESSION_TICKET_EXT;
 typedef struct ssl_method_st SSL_METHOD;
 typedef struct ssl_cipher_st SSL_CIPHER;
 typedef struct ssl_session_st SSL_SESSION;
+typedef struct tls_sigalgs_st TLS_SIGALGS;
+typedef struct ssl_conf_ctx_st SSL_CONF_CTX;
 
 DECLARE_STACK_OF(SSL_CIPHER)
 
@@ -389,6 +395,23 @@ typedef int (*tls_session_secret_cb_fn) (SSL *s, void *secret,
                                          STACK_OF(SSL_CIPHER) *peer_ciphers,
                                          SSL_CIPHER **cipher, void *arg);
 
+# ifndef OPENSSL_NO_TLSEXT
+
+/* Typedefs for handling custom extensions */
+
+typedef int (*custom_ext_add_cb) (SSL *s, unsigned int ext_type,
+                                  const unsigned char **out,
+                                  size_t *outlen, int *al, void *add_arg);
+
+typedef void (*custom_ext_free_cb) (SSL *s, unsigned int ext_type,
+                                    const unsigned char *out, void *add_arg);
+
+typedef int (*custom_ext_parse_cb) (SSL *s, unsigned int ext_type,
+                                    const unsigned char *in,
+                                    size_t inlen, int *al, void *parse_arg);
+
+# endif
+
 # ifndef OPENSSL_NO_SSL_INTERN
 
 /* used to hold info on the particular ciphers used */
@@ -624,6 +647,12 @@ struct ssl_session_st {
 # define SSL_OP_NO_TLSv1_2                               0x08000000L
 # define SSL_OP_NO_TLSv1_1                               0x10000000L
 
+# define SSL_OP_NO_DTLSv1                                0x04000000L
+# define SSL_OP_NO_DTLSv1_2                              0x08000000L
+
+# define SSL_OP_NO_SSL_MASK (SSL_OP_NO_SSLv2|SSL_OP_NO_SSLv3|\
+        SSL_OP_NO_TLSv1|SSL_OP_NO_TLSv1_1|SSL_OP_NO_TLSv1_2)
+
 /*
  * These next two were never actually used for anything since SSLeay zap so
  * we have some more flags.
@@ -685,6 +714,69 @@ struct ssl_session_st {
  */
 # define SSL_MODE_SEND_FALLBACK_SCSV 0x00000080L
 
+/* Cert related flags */
+/*
+ * Many implementations ignore some aspects of the TLS standards such as
+ * enforcing certifcate chain algorithms. When this is set we enforce them.
+ */
+# define SSL_CERT_FLAG_TLS_STRICT                0x00000001L
+
+/* Suite B modes, takes same values as certificate verify flags */
+# define SSL_CERT_FLAG_SUITEB_128_LOS_ONLY       0x10000
+/* Suite B 192 bit only mode */
+# define SSL_CERT_FLAG_SUITEB_192_LOS            0x20000
+/* Suite B 128 bit mode allowing 192 bit algorithms */
+# define SSL_CERT_FLAG_SUITEB_128_LOS            0x30000
+
+/* Perform all sorts of protocol violations for testing purposes */
+# define SSL_CERT_FLAG_BROKEN_PROTOCOL           0x10000000
+
+/* Flags for building certificate chains */
+/* Treat any existing certificates as untrusted CAs */
+# define SSL_BUILD_CHAIN_FLAG_UNTRUSTED          0x1
+/* Don't include root CA in chain */
+# define SSL_BUILD_CHAIN_FLAG_NO_ROOT            0x2
+/* Just check certificates already there */
+# define SSL_BUILD_CHAIN_FLAG_CHECK              0x4
+/* Ignore verification errors */
+# define SSL_BUILD_CHAIN_FLAG_IGNORE_ERROR       0x8
+/* Clear verification errors from queue */
+# define SSL_BUILD_CHAIN_FLAG_CLEAR_ERROR        0x10
+
+/* Flags returned by SSL_check_chain */
+/* Certificate can be used with this session */
+# define CERT_PKEY_VALID         0x1
+/* Certificate can also be used for signing */
+# define CERT_PKEY_SIGN          0x2
+/* EE certificate signing algorithm OK */
+# define CERT_PKEY_EE_SIGNATURE  0x10
+/* CA signature algorithms OK */
+# define CERT_PKEY_CA_SIGNATURE  0x20
+/* EE certificate parameters OK */
+# define CERT_PKEY_EE_PARAM      0x40
+/* CA certificate parameters OK */
+# define CERT_PKEY_CA_PARAM      0x80
+/* Signing explicitly allowed as opposed to SHA1 fallback */
+# define CERT_PKEY_EXPLICIT_SIGN 0x100
+/* Client CA issuer names match (always set for server cert) */
+# define CERT_PKEY_ISSUER_NAME   0x200
+/* Cert type matches client types (always set for server cert) */
+# define CERT_PKEY_CERT_TYPE     0x400
+/* Cert chain suitable to Suite B */
+# define CERT_PKEY_SUITEB        0x800
+
+# define SSL_CONF_FLAG_CMDLINE           0x1
+# define SSL_CONF_FLAG_FILE              0x2
+# define SSL_CONF_FLAG_CLIENT            0x4
+# define SSL_CONF_FLAG_SERVER            0x8
+# define SSL_CONF_FLAG_SHOW_ERRORS       0x10
+# define SSL_CONF_FLAG_CERTIFICATE       0x20
+/* Configuration value types */
+# define SSL_CONF_TYPE_UNKNOWN           0x0
+# define SSL_CONF_TYPE_STRING            0x1
+# define SSL_CONF_TYPE_FILE              0x2
+# define SSL_CONF_TYPE_DIR               0x3
+
 /*
  * Note: SSL[_CTX]_set_{options,mode} use |= op on the previous value, they
  * cannot be used to clear bits.
@@ -730,6 +822,15 @@ struct ssl_session_st {
         SSL_ctrl((ssl),SSL_CTRL_TLS_EXT_SEND_HEARTBEAT,0,NULL)
 # endif
 
+# define SSL_CTX_set_cert_flags(ctx,op) \
+        SSL_CTX_ctrl((ctx),SSL_CTRL_CERT_FLAGS,(op),NULL)
+# define SSL_set_cert_flags(s,op) \
+        SSL_ctrl((s),SSL_CTRL_CERT_FLAGS,(op),NULL)
+# define SSL_CTX_clear_cert_flags(ctx,op) \
+        SSL_CTX_ctrl((ctx),SSL_CTRL_CLEAR_CERT_FLAGS,(op),NULL)
+# define SSL_clear_cert_flags(s,op) \
+        SSL_ctrl((s),SSL_CTRL_CLEAR_CERT_FLAGS,(op),NULL)
+
 void SSL_CTX_set_msg_callback(SSL_CTX *ctx,
                               void (*cb) (int write_p, int version,
                                           int content_type, const void *buf,
@@ -1042,6 +1143,43 @@ struct ssl_ctx_st {
 #   endif
     /* SRTP profiles we are willing to do from RFC 5764 */
     STACK_OF(SRTP_PROTECTION_PROFILE) *srtp_profiles;
+
+    /*
+     * ALPN information (we are in the process of transitioning from NPN to
+     * ALPN.)
+     */
+
+    /*-
+     * For a server, this contains a callback function that allows the
+     * server to select the protocol for the connection.
+     *   out: on successful return, this must point to the raw protocol
+     *        name (without the length prefix).
+     *   outlen: on successful return, this contains the length of |*out|.
+     *   in: points to the client's list of supported protocols in
+     *       wire-format.
+     *   inlen: the length of |in|.
+     */
+    int (*alpn_select_cb) (SSL *s,
+                           const unsigned char **out,
+                           unsigned char *outlen,
+                           const unsigned char *in,
+                           unsigned int inlen, void *arg);
+    void *alpn_select_cb_arg;
+
+    /*
+     * For a client, this contains the list of supported protocols in wire
+     * format.
+     */
+    unsigned char *alpn_client_proto_list;
+    unsigned alpn_client_proto_list_len;
+
+#   ifndef OPENSSL_NO_EC
+    /* EC extension values inherited by SSL structure */
+    size_t tlsext_ecpointformatlist_length;
+    unsigned char *tlsext_ecpointformatlist;
+    size_t tlsext_ellipticcurvelist_length;
+    unsigned char *tlsext_ellipticcurvelist;
+#   endif                       /* OPENSSL_NO_EC */
 #  endif
 };
 
@@ -1144,19 +1282,35 @@ void SSL_CTX_set_next_proto_select_cb(SSL_CTX *s,
                                                  const unsigned char *in,
                                                  unsigned int inlen,
                                                  void *arg), void *arg);
+void SSL_get0_next_proto_negotiated(const SSL *s, const unsigned char **data,
+                                    unsigned *len);
+# endif
 
+# ifndef OPENSSL_NO_TLSEXT
 int SSL_select_next_proto(unsigned char **out, unsigned char *outlen,
                           const unsigned char *in, unsigned int inlen,
                           const unsigned char *client,
                           unsigned int client_len);
-void SSL_get0_next_proto_negotiated(const SSL *s, const unsigned char **data,
-                                    unsigned *len);
-
-#  define OPENSSL_NPN_UNSUPPORTED 0
-#  define OPENSSL_NPN_NEGOTIATED  1
-#  define OPENSSL_NPN_NO_OVERLAP  2
 # endif
 
+# define OPENSSL_NPN_UNSUPPORTED 0
+# define OPENSSL_NPN_NEGOTIATED  1
+# define OPENSSL_NPN_NO_OVERLAP  2
+
+int SSL_CTX_set_alpn_protos(SSL_CTX *ctx, const unsigned char *protos,
+                            unsigned protos_len);
+int SSL_set_alpn_protos(SSL *ssl, const unsigned char *protos,
+                        unsigned protos_len);
+void SSL_CTX_set_alpn_select_cb(SSL_CTX *ctx,
+                                int (*cb) (SSL *ssl,
+                                           const unsigned char **out,
+                                           unsigned char *outlen,
+                                           const unsigned char *in,
+                                           unsigned int inlen,
+                                           void *arg), void *arg);
+void SSL_get0_alpn_selected(const SSL *ssl, const unsigned char **data,
+                            unsigned *len);
+
 # ifndef OPENSSL_NO_PSK
 /*
  * the maximum length of the buffer given to callbacks containing the
@@ -1228,6 +1382,27 @@ const char *SSL_get_psk_identity_hint(const SSL *s);
 const char *SSL_get_psk_identity(const SSL *s);
 # endif
 
+# ifndef OPENSSL_NO_TLSEXT
+/* Register callbacks to handle custom TLS Extensions for client or server. */
+
+int SSL_CTX_add_client_custom_ext(SSL_CTX *ctx, unsigned int ext_type,
+                                  custom_ext_add_cb add_cb,
+                                  custom_ext_free_cb free_cb,
+                                  void *add_arg,
+                                  custom_ext_parse_cb parse_cb,
+                                  void *parse_arg);
+
+int SSL_CTX_add_server_custom_ext(SSL_CTX *ctx, unsigned int ext_type,
+                                  custom_ext_add_cb add_cb,
+                                  custom_ext_free_cb free_cb,
+                                  void *add_arg,
+                                  custom_ext_parse_cb parse_cb,
+                                  void *parse_arg);
+
+int SSL_extension_supported(unsigned int ext_type);
+
+# endif
+
 # define SSL_NOTHING     1
 # define SSL_WRITING     2
 # define SSL_READING     3
@@ -1502,6 +1677,14 @@ struct ssl_st {
     /* ctx for SRP authentication */
     SRP_CTX srp_ctx;
 #  endif
+#  ifndef OPENSSL_NO_TLSEXT
+    /*
+     * For a client, this contains the list of supported protocols in wire
+     * format.
+     */
+    unsigned char *alpn_client_proto_list;
+    unsigned alpn_client_proto_list_len;
+#  endif                        /* OPENSSL_NO_TLSEXT */
 };
 
 # endif
@@ -1758,7 +1941,7 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION)
 #   define SSL_CTRL_GET_TLS_EXT_HEARTBEAT_PENDING          86
 #   define SSL_CTRL_SET_TLS_EXT_HEARTBEAT_NO_REQUESTS      87
 #  endif
-# endif
+# endif                         /* OPENSSL_NO_TLSEXT */
 # define DTLS_CTRL_GET_TIMEOUT           73
 # define DTLS_CTRL_HANDLE_TIMEOUT        74
 # define DTLS_CTRL_LISTEN                        75
@@ -1767,9 +1950,37 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION)
 # define SSL_CTRL_CLEAR_MODE                     78
 # define SSL_CTRL_GET_EXTRA_CHAIN_CERTS          82
 # define SSL_CTRL_CLEAR_EXTRA_CHAIN_CERTS        83
+# define SSL_CTRL_CHAIN                          88
+# define SSL_CTRL_CHAIN_CERT                     89
+# define SSL_CTRL_GET_CURVES                     90
+# define SSL_CTRL_SET_CURVES                     91
+# define SSL_CTRL_SET_CURVES_LIST                92
+# define SSL_CTRL_GET_SHARED_CURVE               93
+# define SSL_CTRL_SET_ECDH_AUTO                  94
+# define SSL_CTRL_SET_SIGALGS                    97
+# define SSL_CTRL_SET_SIGALGS_LIST               98
+# define SSL_CTRL_CERT_FLAGS                     99
+# define SSL_CTRL_CLEAR_CERT_FLAGS               100
+# define SSL_CTRL_SET_CLIENT_SIGALGS             101
+# define SSL_CTRL_SET_CLIENT_SIGALGS_LIST        102
+# define SSL_CTRL_GET_CLIENT_CERT_TYPES          103
+# define SSL_CTRL_SET_CLIENT_CERT_TYPES          104
+# define SSL_CTRL_BUILD_CERT_CHAIN               105
+# define SSL_CTRL_SET_VERIFY_CERT_STORE          106
+# define SSL_CTRL_SET_CHAIN_CERT_STORE           107
+# define SSL_CTRL_GET_PEER_SIGNATURE_NID         108
+# define SSL_CTRL_GET_SERVER_TMP_KEY             109
+# define SSL_CTRL_GET_RAW_CIPHERLIST             110
+# define SSL_CTRL_GET_EC_POINT_FORMATS           111
+# define SSL_CTRL_GET_CHAIN_CERTS                115
+# define SSL_CTRL_SELECT_CURRENT_CERT            116
+# define SSL_CTRL_SET_CURRENT_CERT               117
 # define SSL_CTRL_CHECK_PROTO_VERSION            119
 # define DTLS_CTRL_SET_LINK_MTU                  120
 # define DTLS_CTRL_GET_LINK_MIN_MTU              121
+# define SSL_CERT_SET_FIRST                      1
+# define SSL_CERT_SET_NEXT                       2
+# define SSL_CERT_SET_SERVER                     3
 # define DTLSv1_get_timeout(ssl, arg) \
         SSL_ctrl(ssl,DTLS_CTRL_GET_TIMEOUT,0, (void *)arg)
 # define DTLSv1_handle_timeout(ssl) \
@@ -1804,8 +2015,108 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION)
         SSL_CTX_ctrl(ctx,SSL_CTRL_EXTRA_CHAIN_CERT,0,(char *)x509)
 # define SSL_CTX_get_extra_chain_certs(ctx,px509) \
         SSL_CTX_ctrl(ctx,SSL_CTRL_GET_EXTRA_CHAIN_CERTS,0,px509)
+# define SSL_CTX_get_extra_chain_certs_only(ctx,px509) \
+        SSL_CTX_ctrl(ctx,SSL_CTRL_GET_EXTRA_CHAIN_CERTS,1,px509)
 # define SSL_CTX_clear_extra_chain_certs(ctx) \
         SSL_CTX_ctrl(ctx,SSL_CTRL_CLEAR_EXTRA_CHAIN_CERTS,0,NULL)
+# define SSL_CTX_set0_chain(ctx,sk) \
+        SSL_CTX_ctrl(ctx,SSL_CTRL_CHAIN,0,(char *)sk)
+# define SSL_CTX_set1_chain(ctx,sk) \
+        SSL_CTX_ctrl(ctx,SSL_CTRL_CHAIN,1,(char *)sk)
+# define SSL_CTX_add0_chain_cert(ctx,x509) \
+        SSL_CTX_ctrl(ctx,SSL_CTRL_CHAIN_CERT,0,(char *)x509)
+# define SSL_CTX_add1_chain_cert(ctx,x509) \
+        SSL_CTX_ctrl(ctx,SSL_CTRL_CHAIN_CERT,1,(char *)x509)
+# define SSL_CTX_get0_chain_certs(ctx,px509) \
+        SSL_CTX_ctrl(ctx,SSL_CTRL_GET_CHAIN_CERTS,0,px509)
+# define SSL_CTX_clear_chain_certs(ctx) \
+        SSL_CTX_set0_chain(ctx,NULL)
+# define SSL_CTX_build_cert_chain(ctx, flags) \
+        SSL_CTX_ctrl(ctx,SSL_CTRL_BUILD_CERT_CHAIN, flags, NULL)
+# define SSL_CTX_select_current_cert(ctx,x509) \
+        SSL_CTX_ctrl(ctx,SSL_CTRL_SELECT_CURRENT_CERT,0,(char *)x509)
+# define SSL_CTX_set_current_cert(ctx, op) \
+        SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CURRENT_CERT, op, NULL)
+# define SSL_CTX_set0_verify_cert_store(ctx,st) \
+        SSL_CTX_ctrl(ctx,SSL_CTRL_SET_VERIFY_CERT_STORE,0,(char *)st)
+# define SSL_CTX_set1_verify_cert_store(ctx,st) \
+        SSL_CTX_ctrl(ctx,SSL_CTRL_SET_VERIFY_CERT_STORE,1,(char *)st)
+# define SSL_CTX_set0_chain_cert_store(ctx,st) \
+        SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CHAIN_CERT_STORE,0,(char *)st)
+# define SSL_CTX_set1_chain_cert_store(ctx,st) \
+        SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CHAIN_CERT_STORE,1,(char *)st)
+# define SSL_set0_chain(ctx,sk) \
+        SSL_ctrl(ctx,SSL_CTRL_CHAIN,0,(char *)sk)
+# define SSL_set1_chain(ctx,sk) \
+        SSL_ctrl(ctx,SSL_CTRL_CHAIN,1,(char *)sk)
+# define SSL_add0_chain_cert(ctx,x509) \
+        SSL_ctrl(ctx,SSL_CTRL_CHAIN_CERT,0,(char *)x509)
+# define SSL_add1_chain_cert(ctx,x509) \
+        SSL_ctrl(ctx,SSL_CTRL_CHAIN_CERT,1,(char *)x509)
+# define SSL_get0_chain_certs(ctx,px509) \
+        SSL_ctrl(ctx,SSL_CTRL_GET_CHAIN_CERTS,0,px509)
+# define SSL_clear_chain_certs(ctx) \
+        SSL_set0_chain(ctx,NULL)
+# define SSL_build_cert_chain(s, flags) \
+        SSL_ctrl(s,SSL_CTRL_BUILD_CERT_CHAIN, flags, NULL)
+# define SSL_select_current_cert(ctx,x509) \
+        SSL_ctrl(ctx,SSL_CTRL_SELECT_CURRENT_CERT,0,(char *)x509)
+# define SSL_set_current_cert(ctx,op) \
+        SSL_ctrl(ctx,SSL_CTRL_SET_CURRENT_CERT, op, NULL)
+# define SSL_set0_verify_cert_store(s,st) \
+        SSL_ctrl(s,SSL_CTRL_SET_VERIFY_CERT_STORE,0,(char *)st)
+# define SSL_set1_verify_cert_store(s,st) \
+        SSL_ctrl(s,SSL_CTRL_SET_VERIFY_CERT_STORE,1,(char *)st)
+# define SSL_set0_chain_cert_store(s,st) \
+        SSL_ctrl(s,SSL_CTRL_SET_CHAIN_CERT_STORE,0,(char *)st)
+# define SSL_set1_chain_cert_store(s,st) \
+        SSL_ctrl(s,SSL_CTRL_SET_CHAIN_CERT_STORE,1,(char *)st)
+# define SSL_get1_curves(ctx, s) \
+        SSL_ctrl(ctx,SSL_CTRL_GET_CURVES,0,(char *)s)
+# define SSL_CTX_set1_curves(ctx, clist, clistlen) \
+        SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CURVES,clistlen,(char *)clist)
+# define SSL_CTX_set1_curves_list(ctx, s) \
+        SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CURVES_LIST,0,(char *)s)
+# define SSL_set1_curves(ctx, clist, clistlen) \
+        SSL_ctrl(ctx,SSL_CTRL_SET_CURVES,clistlen,(char *)clist)
+# define SSL_set1_curves_list(ctx, s) \
+        SSL_ctrl(ctx,SSL_CTRL_SET_CURVES_LIST,0,(char *)s)
+# define SSL_get_shared_curve(s, n) \
+        SSL_ctrl(s,SSL_CTRL_GET_SHARED_CURVE,n,NULL)
+# define SSL_CTX_set_ecdh_auto(ctx, onoff) \
+        SSL_CTX_ctrl(ctx,SSL_CTRL_SET_ECDH_AUTO,onoff,NULL)
+# define SSL_set_ecdh_auto(s, onoff) \
+        SSL_ctrl(s,SSL_CTRL_SET_ECDH_AUTO,onoff,NULL)
+# define SSL_CTX_set1_sigalgs(ctx, slist, slistlen) \
+        SSL_CTX_ctrl(ctx,SSL_CTRL_SET_SIGALGS,slistlen,(int *)slist)
+# define SSL_CTX_set1_sigalgs_list(ctx, s) \
+        SSL_CTX_ctrl(ctx,SSL_CTRL_SET_SIGALGS_LIST,0,(char *)s)
+# define SSL_set1_sigalgs(ctx, slist, slistlen) \
+        SSL_ctrl(ctx,SSL_CTRL_SET_SIGALGS,clistlen,(int *)slist)
+# define SSL_set1_sigalgs_list(ctx, s) \
+        SSL_ctrl(ctx,SSL_CTRL_SET_SIGALGS_LIST,0,(char *)s)
+# define SSL_CTX_set1_client_sigalgs(ctx, slist, slistlen) \
+        SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CLIENT_SIGALGS,slistlen,(int *)slist)
+# define SSL_CTX_set1_client_sigalgs_list(ctx, s) \
+        SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CLIENT_SIGALGS_LIST,0,(char *)s)
+# define SSL_set1_client_sigalgs(ctx, slist, slistlen) \
+        SSL_ctrl(ctx,SSL_CTRL_SET_CLIENT_SIGALGS,clistlen,(int *)slist)
+# define SSL_set1_client_sigalgs_list(ctx, s) \
+        SSL_ctrl(ctx,SSL_CTRL_SET_CLIENT_SIGALGS_LIST,0,(char *)s)
+# define SSL_get0_certificate_types(s, clist) \
+        SSL_ctrl(s, SSL_CTRL_GET_CLIENT_CERT_TYPES, 0, (char *)clist)
+# define SSL_CTX_set1_client_certificate_types(ctx, clist, clistlen) \
+        SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CLIENT_CERT_TYPES,clistlen,(char *)clist)
+# define SSL_set1_client_certificate_types(s, clist, clistlen) \
+        SSL_ctrl(s,SSL_CTRL_SET_CLIENT_CERT_TYPES,clistlen,(char *)clist)
+# define SSL_get_peer_signature_nid(s, pn) \
+        SSL_ctrl(s,SSL_CTRL_GET_PEER_SIGNATURE_NID,0,pn)
+# define SSL_get_server_tmp_key(s, pk) \
+        SSL_ctrl(s,SSL_CTRL_GET_SERVER_TMP_KEY,0,pk)
+# define SSL_get0_raw_cipherlist(s, plst) \
+        SSL_ctrl(s,SSL_CTRL_GET_RAW_CIPHERLIST,0,(char *)plst)
+# define SSL_get0_ec_point_formats(s, plst) \
+        SSL_ctrl(s,SSL_CTRL_GET_EC_POINT_FORMATS,0,(char *)plst)
 # ifndef OPENSSL_NO_BIO
 BIO_METHOD *BIO_f_ssl(void);
 BIO *BIO_new_ssl(SSL_CTX *ctx, int client);
@@ -1859,6 +2170,7 @@ int (*SSL_get_verify_callback(const SSL *s)) (int, X509_STORE_CTX *);
 void SSL_set_verify(SSL *s, int mode,
                     int (*callback) (int ok, X509_STORE_CTX *ctx));
 void SSL_set_verify_depth(SSL *s, int depth);
+void SSL_set_cert_cb(SSL *s, int (*cb) (SSL *ssl, void *arg), void *arg);
 # ifndef OPENSSL_NO_RSA
 int SSL_use_RSAPrivateKey(SSL *ssl, RSA *rsa);
 # endif
@@ -1869,6 +2181,16 @@ int SSL_use_PrivateKey_ASN1(int pk, SSL *ssl, const unsigned char *d,
 int SSL_use_certificate(SSL *ssl, X509 *x);
 int SSL_use_certificate_ASN1(SSL *ssl, const unsigned char *d, int len);
 
+# ifndef OPENSSL_NO_TLSEXT
+/* Set serverinfo data for the current active cert. */
+int SSL_CTX_use_serverinfo(SSL_CTX *ctx, const unsigned char *serverinfo,
+                           size_t serverinfo_length);
+#  ifndef OPENSSL_NO_STDIO
+int SSL_CTX_use_serverinfo_file(SSL_CTX *ctx, const char *file);
+#  endif                        /* NO_STDIO */
+
+# endif
+
 # ifndef OPENSSL_NO_STDIO
 int SSL_use_RSAPrivateKey_file(SSL *ssl, const char *file, int type);
 int SSL_use_PrivateKey_file(SSL *ssl, const char *file, int type);
@@ -1943,6 +2265,8 @@ void SSL_CTX_set_verify_depth(SSL_CTX *ctx, int depth);
 void SSL_CTX_set_cert_verify_callback(SSL_CTX *ctx,
                                       int (*cb) (X509_STORE_CTX *, void *),
                                       void *arg);
+void SSL_CTX_set_cert_cb(SSL_CTX *c, int (*cb) (SSL *ssl, void *arg),
+                         void *arg);
 # ifndef OPENSSL_NO_RSA
 int SSL_CTX_use_RSAPrivateKey(SSL_CTX *ctx, RSA *rsa);
 # endif
@@ -1976,6 +2300,9 @@ int SSL_set_trust(SSL *s, int trust);
 int SSL_CTX_set1_param(SSL_CTX *ctx, X509_VERIFY_PARAM *vpm);
 int SSL_set1_param(SSL *ssl, X509_VERIFY_PARAM *vpm);
 
+X509_VERIFY_PARAM *SSL_CTX_get0_param(SSL_CTX *ctx);
+X509_VERIFY_PARAM *SSL_get0_param(SSL *ssl);
+
 # ifndef OPENSSL_NO_SRP
 int SSL_CTX_set_srp_username(SSL_CTX *ctx, char *name);
 int SSL_CTX_set_srp_password(SSL_CTX *ctx, char *password);
@@ -2000,6 +2327,7 @@ char *SSL_get_srp_username(SSL *s);
 char *SSL_get_srp_userinfo(SSL *s);
 # endif
 
+void SSL_certs_clear(SSL *s);
 void SSL_free(SSL *ssl);
 int SSL_accept(SSL *ssl);
 int SSL_connect(SSL *ssl);
@@ -2052,6 +2380,14 @@ const SSL_METHOD *DTLSv1_method(void); /* DTLSv1.0 */
 const SSL_METHOD *DTLSv1_server_method(void); /* DTLSv1.0 */
 const SSL_METHOD *DTLSv1_client_method(void); /* DTLSv1.0 */
 
+const SSL_METHOD *DTLSv1_2_method(void); /* DTLSv1.2 */
+const SSL_METHOD *DTLSv1_2_server_method(void); /* DTLSv1.2 */
+const SSL_METHOD *DTLSv1_2_client_method(void); /* DTLSv1.2 */
+
+const SSL_METHOD *DTLS_method(void); /* DTLS 1.0 and 1.2 */
+const SSL_METHOD *DTLS_server_method(void); /* DTLS 1.0 and 1.2 */
+const SSL_METHOD *DTLS_client_method(void); /* DTLS 1.0 and 1.2 */
+
 STACK_OF(SSL_CIPHER) *SSL_get_ciphers(const SSL *s);
 
 int SSL_do_handshake(SSL *s);
@@ -2060,6 +2396,7 @@ int SSL_renegotiate_abbreviated(SSL *s);
 int SSL_renegotiate_pending(SSL *s);
 int SSL_shutdown(SSL *s);
 
+const SSL_METHOD *SSL_CTX_get_ssl_method(SSL_CTX *ctx);
 const SSL_METHOD *SSL_get_ssl_method(SSL *s);
 int SSL_set_ssl_method(SSL *s, const SSL_METHOD *method);
 const char *SSL_alert_type_string_long(int value);
@@ -2089,7 +2426,10 @@ SSL *SSL_dup(SSL *ssl);
 X509 *SSL_get_certificate(const SSL *ssl);
 /*
  * EVP_PKEY
- */ struct evp_pkey_st *SSL_get_privatekey(SSL *ssl);
+ */ struct evp_pkey_st *SSL_get_privatekey(const SSL *ssl);
+
+X509 *SSL_CTX_get0_certificate(const SSL_CTX *ctx);
+EVP_PKEY *SSL_CTX_get0_privatekey(const SSL_CTX *ctx);
 
 void SSL_CTX_set_quiet_shutdown(SSL_CTX *ctx, int mode);
 int SSL_CTX_get_quiet_shutdown(const SSL_CTX *ctx);
@@ -2197,6 +2537,9 @@ const COMP_METHOD *SSL_get_current_compression(SSL *s);
 const COMP_METHOD *SSL_get_current_expansion(SSL *s);
 const char *SSL_COMP_get_name(const COMP_METHOD *comp);
 STACK_OF(SSL_COMP) *SSL_COMP_get_compression_methods(void);
+STACK_OF(SSL_COMP) *SSL_COMP_set0_compression_methods(STACK_OF(SSL_COMP)
+                                                      *meths);
+void SSL_COMP_free_compression_methods(void);
 int SSL_COMP_add_compression_method(int id, COMP_METHOD *cm);
 # else
 const void *SSL_get_current_compression(SSL *s);
@@ -2206,6 +2549,8 @@ void *SSL_COMP_get_compression_methods(void);
 int SSL_COMP_add_compression_method(int id, void *cm);
 # endif
 
+const SSL_CIPHER *SSL_CIPHER_find(SSL *ssl, const unsigned char *ptr);
+
 /* TLS extensions functions */
 int SSL_set_session_ticket_ext(SSL *s, void *ext_data, int ext_len);
 
@@ -2219,6 +2564,27 @@ int SSL_set_session_secret_cb(SSL *s,
 
 void SSL_set_debug(SSL *s, int debug);
 int SSL_cache_hit(SSL *s);
+int SSL_is_server(SSL *s);
+
+SSL_CONF_CTX *SSL_CONF_CTX_new(void);
+int SSL_CONF_CTX_finish(SSL_CONF_CTX *cctx);
+void SSL_CONF_CTX_free(SSL_CONF_CTX *cctx);
+unsigned int SSL_CONF_CTX_set_flags(SSL_CONF_CTX *cctx, unsigned int flags);
+unsigned int SSL_CONF_CTX_clear_flags(SSL_CONF_CTX *cctx, unsigned int flags);
+int SSL_CONF_CTX_set1_prefix(SSL_CONF_CTX *cctx, const char *pre);
+
+void SSL_CONF_CTX_set_ssl(SSL_CONF_CTX *cctx, SSL *ssl);
+void SSL_CONF_CTX_set_ssl_ctx(SSL_CONF_CTX *cctx, SSL_CTX *ctx);
+
+int SSL_CONF_cmd(SSL_CONF_CTX *cctx, const char *cmd, const char *value);
+int SSL_CONF_cmd_argv(SSL_CONF_CTX *cctx, int *pargc, char ***pargv);
+int SSL_CONF_cmd_value_type(SSL_CONF_CTX *cctx, const char *cmd);
+
+# ifndef OPENSSL_NO_SSL_TRACE
+void SSL_trace(int write_p, int version, int content_type,
+               const void *buf, size_t len, SSL *ssl, void *arg);
+const char *SSL_CIPHER_standard_name(const SSL_CIPHER *c);
+# endif
 
 # ifndef OPENSSL_NO_UNIT_TEST
 const struct openssl_ssl_test_functions *SSL_test_functions(void);
@@ -2234,6 +2600,7 @@ void ERR_load_SSL_strings(void);
 /* Error codes for the SSL functions. */
 
 /* Function codes. */
+# define SSL_F_CHECK_SUITEB_CIPHER_LIST                   331
 # define SSL_F_CLIENT_CERTIFICATE                         100
 # define SSL_F_CLIENT_FINISHED                            167
 # define SSL_F_CLIENT_HELLO                               101
@@ -2274,6 +2641,7 @@ void ERR_load_SSL_strings(void);
 # define SSL_F_GET_CLIENT_MASTER_KEY                      107
 # define SSL_F_GET_SERVER_FINISHED                        108
 # define SSL_F_GET_SERVER_HELLO                           109
+# define SSL_F_GET_SERVER_STATIC_DH_KEY                   340
 # define SSL_F_GET_SERVER_VERIFY                          110
 # define SSL_F_I2D_SSL_SESSION                            111
 # define SSL_F_READ_N                                     112
@@ -2346,6 +2714,8 @@ void ERR_load_SSL_strings(void);
 # define SSL_F_SSL3_SETUP_WRITE_BUFFER                    291
 # define SSL_F_SSL3_WRITE_BYTES                           158
 # define SSL_F_SSL3_WRITE_PENDING                         159
+# define SSL_F_SSL_ADD_CERT_CHAIN                         318
+# define SSL_F_SSL_ADD_CERT_TO_BUF                        319
 # define SSL_F_SSL_ADD_CLIENTHELLO_RENEGOTIATE_EXT        298
 # define SSL_F_SSL_ADD_CLIENTHELLO_TLSEXT                 277
 # define SSL_F_SSL_ADD_CLIENTHELLO_USE_SRTP_EXT           307
@@ -2355,6 +2725,7 @@ void ERR_load_SSL_strings(void);
 # define SSL_F_SSL_ADD_SERVERHELLO_TLSEXT                 278
 # define SSL_F_SSL_ADD_SERVERHELLO_USE_SRTP_EXT           308
 # define SSL_F_SSL_BAD_METHOD                             160
+# define SSL_F_SSL_BUILD_CERT_CHAIN                       332
 # define SSL_F_SSL_BYTES_TO_CIPHER_LIST                   161
 # define SSL_F_SSL_CERT_DUP                               221
 # define SSL_F_SSL_CERT_INST                              222
@@ -2367,6 +2738,7 @@ void ERR_load_SSL_strings(void);
 # define SSL_F_SSL_CIPHER_STRENGTH_SORT                   231
 # define SSL_F_SSL_CLEAR                                  164
 # define SSL_F_SSL_COMP_ADD_COMPRESSION_METHOD            165
+# define SSL_F_SSL_CONF_CMD                               334
 # define SSL_F_SSL_CREATE_CIPHER_LIST                     166
 # define SSL_F_SSL_CTRL                                   232
 # define SSL_F_SSL_CTX_CHECK_PRIVATE_KEY                  168
@@ -2389,9 +2761,12 @@ void ERR_load_SSL_strings(void);
 # define SSL_F_SSL_CTX_USE_RSAPRIVATEKEY                  177
 # define SSL_F_SSL_CTX_USE_RSAPRIVATEKEY_ASN1             178
 # define SSL_F_SSL_CTX_USE_RSAPRIVATEKEY_FILE             179
+# define SSL_F_SSL_CTX_USE_SERVERINFO                     336
+# define SSL_F_SSL_CTX_USE_SERVERINFO_FILE                337
 # define SSL_F_SSL_DO_HANDSHAKE                           180
 # define SSL_F_SSL_GET_NEW_SESSION                        181
 # define SSL_F_SSL_GET_PREV_SESSION                       217
+# define SSL_F_SSL_GET_SERVER_CERT_INDEX                  322
 # define SSL_F_SSL_GET_SERVER_SEND_CERT                   182
 # define SSL_F_SSL_GET_SERVER_SEND_PKEY                   317
 # define SSL_F_SSL_GET_SIGN_PKEY                          183
@@ -2410,6 +2785,8 @@ void ERR_load_SSL_strings(void);
 # define SSL_F_SSL_READ                                   223
 # define SSL_F_SSL_RSA_PRIVATE_DECRYPT                    187
 # define SSL_F_SSL_RSA_PUBLIC_ENCRYPT                     188
+# define SSL_F_SSL_SCAN_CLIENTHELLO_TLSEXT                320
+# define SSL_F_SSL_SCAN_SERVERHELLO_TLSEXT                321
 # define SSL_F_SSL_SESSION_DUP                            348
 # define SSL_F_SSL_SESSION_NEW                            189
 # define SSL_F_SSL_SESSION_PRINT_FP                       190
@@ -2443,16 +2820,19 @@ void ERR_load_SSL_strings(void);
 # define SSL_F_SSL_USE_RSAPRIVATEKEY_FILE                 206
 # define SSL_F_SSL_VERIFY_CERT_CHAIN                      207
 # define SSL_F_SSL_WRITE                                  208
+# define SSL_F_TLS12_CHECK_PEER_SIGALG                    333
 # define SSL_F_TLS1_CERT_VERIFY_MAC                       286
 # define SSL_F_TLS1_CHANGE_CIPHER_STATE                   209
 # define SSL_F_TLS1_CHECK_SERVERHELLO_TLSEXT              274
 # define SSL_F_TLS1_ENC                                   210
 # define SSL_F_TLS1_EXPORT_KEYING_MATERIAL                314
+# define SSL_F_TLS1_GET_CURVELIST                         338
 # define SSL_F_TLS1_HEARTBEAT                             315
 # define SSL_F_TLS1_PREPARE_CLIENTHELLO_TLSEXT            275
 # define SSL_F_TLS1_PREPARE_SERVERHELLO_TLSEXT            276
 # define SSL_F_TLS1_PRF                                   284
 # define SSL_F_TLS1_SETUP_KEY_BLOCK                       211
+# define SSL_F_TLS1_SET_SERVER_SIGALGS                    335
 # define SSL_F_WRITE_PENDING                              212
 
 /* Reason codes. */
@@ -2462,6 +2842,7 @@ void ERR_load_SSL_strings(void);
 # define SSL_R_BAD_AUTHENTICATION_TYPE                    102
 # define SSL_R_BAD_CHANGE_CIPHER_SPEC                     103
 # define SSL_R_BAD_CHECKSUM                               104
+# define SSL_R_BAD_DATA                                   390
 # define SSL_R_BAD_DATA_RETURNED_BY_CALLBACK              106
 # define SSL_R_BAD_DECOMPRESSION                          107
 # define SSL_R_BAD_DH_G_LENGTH                            108
@@ -2499,6 +2880,7 @@ void ERR_load_SSL_strings(void);
 # define SSL_R_BAD_SSL_FILETYPE                           124
 # define SSL_R_BAD_SSL_SESSION_ID_LENGTH                  125
 # define SSL_R_BAD_STATE                                  126
+# define SSL_R_BAD_VALUE                                  384
 # define SSL_R_BAD_WRITE_RETRY                            127
 # define SSL_R_BIO_NOT_SET                                128
 # define SSL_R_BLOCK_CIPHER_PAD_IS_WRONG                  129
@@ -2507,6 +2889,7 @@ void ERR_load_SSL_strings(void);
 # define SSL_R_CA_DN_TOO_LONG                             132
 # define SSL_R_CCS_RECEIVED_EARLY                         133
 # define SSL_R_CERTIFICATE_VERIFY_FAILED                  134
+# define SSL_R_CERT_CB_ERROR                              377
 # define SSL_R_CERT_LENGTH_MISMATCH                       135
 # define SSL_R_CHALLENGE_IS_DIFFERENT                     136
 # define SSL_R_CIPHER_CODE_WRONG_LENGTH                   137
@@ -2534,6 +2917,7 @@ void ERR_load_SSL_strings(void);
 # define SSL_R_ECC_CERT_NOT_FOR_SIGNING                   318
 # define SSL_R_ECC_CERT_SHOULD_HAVE_RSA_SIGNATURE         322
 # define SSL_R_ECC_CERT_SHOULD_HAVE_SHA1_SIGNATURE        323
+# define SSL_R_ECDH_REQUIRED_FOR_SUITEB_MODE              374
 # define SSL_R_ECGROUP_TOO_LARGE_FOR_CIPHER               310
 # define SSL_R_EMPTY_SRTP_PROTECTION_PROFILE_LIST         354
 # define SSL_R_ENCRYPTED_LENGTH_TOO_LONG                  150
@@ -2547,12 +2931,15 @@ void ERR_load_SSL_strings(void);
 # define SSL_R_HTTPS_PROXY_REQUEST                        155
 # define SSL_R_HTTP_REQUEST                               156
 # define SSL_R_ILLEGAL_PADDING                            283
+# define SSL_R_ILLEGAL_SUITEB_DIGEST                      380
 # define SSL_R_INAPPROPRIATE_FALLBACK                     373
 # define SSL_R_INCONSISTENT_COMPRESSION                   340
 # define SSL_R_INVALID_CHALLENGE_LENGTH                   158
 # define SSL_R_INVALID_COMMAND                            280
 # define SSL_R_INVALID_COMPRESSION_ALGORITHM              341
+# define SSL_R_INVALID_NULL_CMD_NAME                      385
 # define SSL_R_INVALID_PURPOSE                            278
+# define SSL_R_INVALID_SERVERINFO_DATA                    388
 # define SSL_R_INVALID_SRP_USERNAME                       357
 # define SSL_R_INVALID_STATUS_RESPONSE                    328
 # define SSL_R_INVALID_TICKET_KEYS_LENGTH                 325
@@ -2578,6 +2965,8 @@ void ERR_load_SSL_strings(void);
 # define SSL_R_MISSING_DH_KEY                             163
 # define SSL_R_MISSING_DH_RSA_CERT                        164
 # define SSL_R_MISSING_DSA_SIGNING_CERT                   165
+# define SSL_R_MISSING_ECDH_CERT                          382
+# define SSL_R_MISSING_ECDSA_SIGNING_CERT                 381
 # define SSL_R_MISSING_EXPORT_TMP_DH_KEY                  166
 # define SSL_R_MISSING_EXPORT_TMP_RSA_KEY                 167
 # define SSL_R_MISSING_RSA_CERTIFICATE                    168
@@ -2606,6 +2995,7 @@ void ERR_load_SSL_strings(void);
 # define SSL_R_NO_COMPRESSION_SPECIFIED                   187
 # define SSL_R_NO_GOST_CERTIFICATE_SENT_BY_PEER           330
 # define SSL_R_NO_METHOD_SPECIFIED                        188
+# define SSL_R_NO_PEM_EXTENSIONS                          389
 # define SSL_R_NO_PRIVATEKEY                              189
 # define SSL_R_NO_PRIVATE_KEY_ASSIGNED                    190
 # define SSL_R_NO_PROTOCOLS_AVAILABLE                     191
@@ -2613,12 +3003,15 @@ void ERR_load_SSL_strings(void);
 # define SSL_R_NO_RENEGOTIATION                           339
 # define SSL_R_NO_REQUIRED_DIGEST                         324
 # define SSL_R_NO_SHARED_CIPHER                           193
+# define SSL_R_NO_SHARED_SIGATURE_ALGORITHMS              376
 # define SSL_R_NO_SRTP_PROFILES                           359
 # define SSL_R_NO_VERIFY_CALLBACK                         194
 # define SSL_R_NULL_SSL_CTX                               195
 # define SSL_R_NULL_SSL_METHOD_PASSED                     196
 # define SSL_R_OLD_SESSION_CIPHER_NOT_RETURNED            197
 # define SSL_R_OLD_SESSION_COMPRESSION_ALGORITHM_NOT_RETURNED 344
+# define SSL_R_ONLY_DTLS_1_2_ALLOWED_IN_SUITEB_MODE       387
+# define SSL_R_ONLY_TLS_1_2_ALLOWED_IN_SUITEB_MODE        379
 # define SSL_R_ONLY_TLS_ALLOWED_IN_FIPS_MODE              297
 # define SSL_R_OPAQUE_PRF_INPUT_TOO_LONG                  327
 # define SSL_R_PACKET_LENGTH_TOO_LONG                     198
@@ -2630,6 +3023,8 @@ void ERR_load_SSL_strings(void);
 # define SSL_R_PEER_ERROR_NO_CERTIFICATE                  202
 # define SSL_R_PEER_ERROR_NO_CIPHER                       203
 # define SSL_R_PEER_ERROR_UNSUPPORTED_CERTIFICATE_TYPE    204
+# define SSL_R_PEM_NAME_BAD_PREFIX                        391
+# define SSL_R_PEM_NAME_TOO_SHORT                         392
 # define SSL_R_PRE_MAC_LENGTH_TOO_LONG                    205
 # define SSL_R_PROBLEMS_MAPPING_CIPHER_FUNCTIONS          206
 # define SSL_R_PROTOCOL_IS_SHUTDOWN                       207
@@ -2732,6 +3127,7 @@ void ERR_load_SSL_strings(void);
 # define SSL_R_UNKNOWN_CERTIFICATE_TYPE                   247
 # define SSL_R_UNKNOWN_CIPHER_RETURNED                    248
 # define SSL_R_UNKNOWN_CIPHER_TYPE                        249
+# define SSL_R_UNKNOWN_CMD_NAME                           386
 # define SSL_R_UNKNOWN_DIGEST                             368
 # define SSL_R_UNKNOWN_KEY_EXCHANGE_TYPE                  250
 # define SSL_R_UNKNOWN_PKEY_TYPE                          251
@@ -2749,7 +3145,9 @@ void ERR_load_SSL_strings(void);
 # define SSL_R_UNSUPPORTED_STATUS_TYPE                    329
 # define SSL_R_USE_SRTP_NOT_NEGOTIATED                    369
 # define SSL_R_WRITE_BIO_NOT_SET                          260
+# define SSL_R_WRONG_CERTIFICATE_TYPE                     383
 # define SSL_R_WRONG_CIPHER_RETURNED                      261
+# define SSL_R_WRONG_CURVE                                378
 # define SSL_R_WRONG_MESSAGE_TYPE                         262
 # define SSL_R_WRONG_NUMBER_OF_KEY_BITS                   263
 # define SSL_R_WRONG_SIGNATURE_LENGTH                     264
index 2dd5462..e681d50 100644 (file)
@@ -159,11 +159,17 @@ extern "C" {
 # define SSL3_CK_DH_RSA_DES_192_CBC3_SHA         0x03000010
 
 # define SSL3_CK_EDH_DSS_DES_40_CBC_SHA          0x03000011
+# define SSL3_CK_DHE_DSS_DES_40_CBC_SHA          SSL3_CK_EDH_DSS_DES_40_CBC_SHA
 # define SSL3_CK_EDH_DSS_DES_64_CBC_SHA          0x03000012
+# define SSL3_CK_DHE_DSS_DES_64_CBC_SHA          SSL3_CK_EDH_DSS_DES_64_CBC_SHA
 # define SSL3_CK_EDH_DSS_DES_192_CBC3_SHA        0x03000013
+# define SSL3_CK_DHE_DSS_DES_192_CBC3_SHA        SSL3_CK_EDH_DSS_DES_192_CBC3_SHA
 # define SSL3_CK_EDH_RSA_DES_40_CBC_SHA          0x03000014
+# define SSL3_CK_DHE_RSA_DES_40_CBC_SHA          SSL3_CK_EDH_RSA_DES_40_CBC_SHA
 # define SSL3_CK_EDH_RSA_DES_64_CBC_SHA          0x03000015
+# define SSL3_CK_DHE_RSA_DES_64_CBC_SHA          SSL3_CK_EDH_RSA_DES_64_CBC_SHA
 # define SSL3_CK_EDH_RSA_DES_192_CBC3_SHA        0x03000016
+# define SSL3_CK_DHE_RSA_DES_192_CBC3_SHA        SSL3_CK_EDH_RSA_DES_192_CBC3_SHA
 
 # define SSL3_CK_ADH_RC4_40_MD5                  0x03000017
 # define SSL3_CK_ADH_RC4_128_MD5                 0x03000018
@@ -220,6 +226,18 @@ extern "C" {
 # define SSL3_TXT_DH_RSA_DES_64_CBC_SHA          "DH-RSA-DES-CBC-SHA"
 # define SSL3_TXT_DH_RSA_DES_192_CBC3_SHA        "DH-RSA-DES-CBC3-SHA"
 
+# define SSL3_TXT_DHE_DSS_DES_40_CBC_SHA         "EXP-DHE-DSS-DES-CBC-SHA"
+# define SSL3_TXT_DHE_DSS_DES_64_CBC_SHA         "DHE-DSS-DES-CBC-SHA"
+# define SSL3_TXT_DHE_DSS_DES_192_CBC3_SHA       "DHE-DSS-DES-CBC3-SHA"
+# define SSL3_TXT_DHE_RSA_DES_40_CBC_SHA         "EXP-DHE-RSA-DES-CBC-SHA"
+# define SSL3_TXT_DHE_RSA_DES_64_CBC_SHA         "DHE-RSA-DES-CBC-SHA"
+# define SSL3_TXT_DHE_RSA_DES_192_CBC3_SHA       "DHE-RSA-DES-CBC3-SHA"
+
+/*
+ * This next block of six "EDH" labels is for backward compatibility with
+ * older versions of OpenSSL.  New code should use the six "DHE" labels above
+ * instead:
+ */
 # define SSL3_TXT_EDH_DSS_DES_40_CBC_SHA         "EXP-EDH-DSS-DES-CBC-SHA"
 # define SSL3_TXT_EDH_DSS_DES_64_CBC_SHA         "EDH-DSS-DES-CBC-SHA"
 # define SSL3_TXT_EDH_DSS_DES_192_CBC3_SHA       "EDH-DSS-DES-CBC3-SHA"
@@ -263,6 +281,8 @@ extern "C" {
 # define SSL3_SESSION_ID_SIZE                    32
 # define SSL3_RT_HEADER_LENGTH                   5
 
+# define SSL3_HM_HEADER_LENGTH                  4
+
 # ifndef SSL3_ALIGN_PAYLOAD
  /*
   * Some will argue that this increases memory footprint, but it's not
@@ -342,6 +362,23 @@ extern "C" {
 # define SSL3_RT_APPLICATION_DATA        23
 # define TLS1_RT_HEARTBEAT               24
 
+/* Pseudo content types to indicate additional parameters */
+# define TLS1_RT_CRYPTO                  0x1000
+# define TLS1_RT_CRYPTO_PREMASTER        (TLS1_RT_CRYPTO | 0x1)
+# define TLS1_RT_CRYPTO_CLIENT_RANDOM    (TLS1_RT_CRYPTO | 0x2)
+# define TLS1_RT_CRYPTO_SERVER_RANDOM    (TLS1_RT_CRYPTO | 0x3)
+# define TLS1_RT_CRYPTO_MASTER           (TLS1_RT_CRYPTO | 0x4)
+
+# define TLS1_RT_CRYPTO_READ             0x0000
+# define TLS1_RT_CRYPTO_WRITE            0x0100
+# define TLS1_RT_CRYPTO_MAC              (TLS1_RT_CRYPTO | 0x5)
+# define TLS1_RT_CRYPTO_KEY              (TLS1_RT_CRYPTO | 0x6)
+# define TLS1_RT_CRYPTO_IV               (TLS1_RT_CRYPTO | 0x7)
+# define TLS1_RT_CRYPTO_FIXED_IV         (TLS1_RT_CRYPTO | 0x8)
+
+/* Pseudo content type for SSL/TLS header info */
+# define SSL3_RT_HEADER                  0x100
+
 # define SSL3_AL_WARNING                 1
 # define SSL3_AL_FATAL                   2
 
@@ -436,14 +473,7 @@ typedef struct ssl3_buffer_st {
  */
 # define SSL3_FLAGS_CCS_OK                       0x0080
 
-/*
- * SSL3_FLAGS_SGC_RESTART_DONE is set when we restart a handshake because of
- * MS SGC and so prevents us from restarting the handshake in a loop. It's
- * reset on a renegotiation, so effectively limits the client to one restart
- * per negotiation. This limits the possibility of a DDoS attack where the
- * client handshakes in a loop using SGC to restart. Servers which permit
- * renegotiation can still be effected, but we can't prevent that.
- */
+/* SSL3_FLAGS_SGC_RESTART_DONE is no longer used */
 # define SSL3_FLAGS_SGC_RESTART_DONE             0x0040
 
 # ifndef OPENSSL_NO_SSL_INTERN
@@ -584,7 +614,20 @@ typedef struct ssl3_state_st {
      */
     char is_probably_safari;
 #   endif                       /* !OPENSSL_NO_EC */
-#  endif                        /* !OPENSSL_NO_TLSEXT */
+
+    /*
+     * ALPN information (we are in the process of transitioning from NPN to
+     * ALPN.)
+     */
+
+    /*
+     * In a server these point to the selected ALPN protocol after the
+     * ClientHello has been processed. In a client these contain the protocol
+     * that the server selected once the ServerHello has been processed.
+     */
+    unsigned char *alpn_selected;
+    unsigned alpn_selected_len;
+#  endif                        /* OPENSSL_NO_TLSEXT */
 } SSL3_STATE;
 
 # endif
@@ -654,6 +697,7 @@ typedef struct ssl3_state_st {
 # define SSL3_ST_SR_CLNT_HELLO_A         (0x110|SSL_ST_ACCEPT)
 # define SSL3_ST_SR_CLNT_HELLO_B         (0x111|SSL_ST_ACCEPT)
 # define SSL3_ST_SR_CLNT_HELLO_C         (0x112|SSL_ST_ACCEPT)
+# define SSL3_ST_SR_CLNT_HELLO_D         (0x115|SSL_ST_ACCEPT)
 /* write to client */
 # define DTLS1_ST_SW_HELLO_VERIFY_REQUEST_A (0x113|SSL_ST_ACCEPT)
 # define DTLS1_ST_SW_HELLO_VERIFY_REQUEST_B (0x114|SSL_ST_ACCEPT)
index 151422a..e6f515f 100644 (file)
@@ -95,6 +95,10 @@ int SSL_library_init(void)
     EVP_add_cipher(EVP_aes_128_cbc_hmac_sha1());
     EVP_add_cipher(EVP_aes_256_cbc_hmac_sha1());
 # endif
+# if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA256)
+    EVP_add_cipher(EVP_aes_128_cbc_hmac_sha256());
+    EVP_add_cipher(EVP_aes_256_cbc_hmac_sha256());
+# endif
 
 #endif
 #ifndef OPENSSL_NO_CAMELLIA
index 5df2413..93a1eb9 100644 (file)
@@ -139,29 +139,50 @@ int SSL_get_ex_data_X509_STORE_CTX_idx(void)
     static volatile int ssl_x509_store_ctx_idx = -1;
     int got_write_lock = 0;
 
-    CRYPTO_r_lock(CRYPTO_LOCK_SSL_CTX);
+    if (((size_t)&ssl_x509_store_ctx_idx &
+         (sizeof(ssl_x509_store_ctx_idx) - 1))
+        == 0) {                 /* check alignment, practically always true */
+        int ret;
+
+        if ((ret = ssl_x509_store_ctx_idx) < 0) {
+            CRYPTO_w_lock(CRYPTO_LOCK_SSL_CTX);
+            if ((ret = ssl_x509_store_ctx_idx) < 0) {
+                ret = ssl_x509_store_ctx_idx =
+                    X509_STORE_CTX_get_ex_new_index(0,
+                                                    "SSL for verify callback",
+                                                    NULL, NULL, NULL);
+            }
+            CRYPTO_w_unlock(CRYPTO_LOCK_SSL_CTX);
+        }
+
+        return ret;
+    } else {                    /* commonly eliminated */
 
-    if (ssl_x509_store_ctx_idx < 0) {
-        CRYPTO_r_unlock(CRYPTO_LOCK_SSL_CTX);
-        CRYPTO_w_lock(CRYPTO_LOCK_SSL_CTX);
-        got_write_lock = 1;
+        CRYPTO_r_lock(CRYPTO_LOCK_SSL_CTX);
 
         if (ssl_x509_store_ctx_idx < 0) {
-            ssl_x509_store_ctx_idx =
-                X509_STORE_CTX_get_ex_new_index(0, "SSL for verify callback",
-                                                NULL, NULL, NULL);
+            CRYPTO_r_unlock(CRYPTO_LOCK_SSL_CTX);
+            CRYPTO_w_lock(CRYPTO_LOCK_SSL_CTX);
+            got_write_lock = 1;
+
+            if (ssl_x509_store_ctx_idx < 0) {
+                ssl_x509_store_ctx_idx =
+                    X509_STORE_CTX_get_ex_new_index(0,
+                                                    "SSL for verify callback",
+                                                    NULL, NULL, NULL);
+            }
         }
-    }
 
-    if (got_write_lock)
-        CRYPTO_w_unlock(CRYPTO_LOCK_SSL_CTX);
-    else
-        CRYPTO_r_unlock(CRYPTO_LOCK_SSL_CTX);
+        if (got_write_lock)
+            CRYPTO_w_unlock(CRYPTO_LOCK_SSL_CTX);
+        else
+            CRYPTO_r_unlock(CRYPTO_LOCK_SSL_CTX);
 
-    return ssl_x509_store_ctx_idx;
+        return ssl_x509_store_ctx_idx;
+    }
 }
 
-static void ssl_cert_set_default_md(CERT *cert)
+void ssl_cert_set_default_md(CERT *cert)
 {
     /* Set digest values to defaults */
 #ifndef OPENSSL_NO_DSA
@@ -262,25 +283,47 @@ CERT *ssl_cert_dup(CERT *cert)
         }
     }
     ret->ecdh_tmp_cb = cert->ecdh_tmp_cb;
+    ret->ecdh_tmp_auto = cert->ecdh_tmp_auto;
 #endif
 
     for (i = 0; i < SSL_PKEY_NUM; i++) {
-        if (cert->pkeys[i].x509 != NULL) {
-            ret->pkeys[i].x509 = cert->pkeys[i].x509;
-            CRYPTO_add(&ret->pkeys[i].x509->references, 1, CRYPTO_LOCK_X509);
+        CERT_PKEY *cpk = cert->pkeys + i;
+        CERT_PKEY *rpk = ret->pkeys + i;
+        if (cpk->x509 != NULL) {
+            rpk->x509 = cpk->x509;
+            CRYPTO_add(&rpk->x509->references, 1, CRYPTO_LOCK_X509);
         }
 
-        if (cert->pkeys[i].privatekey != NULL) {
-            ret->pkeys[i].privatekey = cert->pkeys[i].privatekey;
-            CRYPTO_add(&ret->pkeys[i].privatekey->references, 1,
-                       CRYPTO_LOCK_EVP_PKEY);
+        if (cpk->privatekey != NULL) {
+            rpk->privatekey = cpk->privatekey;
+            CRYPTO_add(&cpk->privatekey->references, 1, CRYPTO_LOCK_EVP_PKEY);
         }
-    }
 
-    /*
-     * ret->extra_certs *should* exist, but currently the own certificate
-     * chain is held inside SSL_CTX
-     */
+        if (cpk->chain) {
+            rpk->chain = X509_chain_up_ref(cpk->chain);
+            if (!rpk->chain) {
+                SSLerr(SSL_F_SSL_CERT_DUP, ERR_R_MALLOC_FAILURE);
+                goto err;
+            }
+        }
+        rpk->valid_flags = 0;
+#ifndef OPENSSL_NO_TLSEXT
+        if (cert->pkeys[i].serverinfo != NULL) {
+            /* Just copy everything. */
+            ret->pkeys[i].serverinfo =
+                OPENSSL_malloc(cert->pkeys[i].serverinfo_length);
+            if (ret->pkeys[i].serverinfo == NULL) {
+                SSLerr(SSL_F_SSL_CERT_DUP, ERR_R_MALLOC_FAILURE);
+                return NULL;
+            }
+            ret->pkeys[i].serverinfo_length =
+                cert->pkeys[i].serverinfo_length;
+            memcpy(ret->pkeys[i].serverinfo,
+                   cert->pkeys[i].serverinfo,
+                   cert->pkeys[i].serverinfo_length);
+        }
+#endif
+    }
 
     ret->references = 1;
     /*
@@ -288,6 +331,64 @@ CERT *ssl_cert_dup(CERT *cert)
      * will be set during handshake.
      */
     ssl_cert_set_default_md(ret);
+    /* Peer sigalgs set to NULL as we get these from handshake too */
+    ret->peer_sigalgs = NULL;
+    ret->peer_sigalgslen = 0;
+    /* Configured sigalgs however we copy across */
+
+    if (cert->conf_sigalgs) {
+        ret->conf_sigalgs = OPENSSL_malloc(cert->conf_sigalgslen);
+        if (!ret->conf_sigalgs)
+            goto err;
+        memcpy(ret->conf_sigalgs, cert->conf_sigalgs, cert->conf_sigalgslen);
+        ret->conf_sigalgslen = cert->conf_sigalgslen;
+    } else
+        ret->conf_sigalgs = NULL;
+
+    if (cert->client_sigalgs) {
+        ret->client_sigalgs = OPENSSL_malloc(cert->client_sigalgslen);
+        if (!ret->client_sigalgs)
+            goto err;
+        memcpy(ret->client_sigalgs, cert->client_sigalgs,
+               cert->client_sigalgslen);
+        ret->client_sigalgslen = cert->client_sigalgslen;
+    } else
+        ret->client_sigalgs = NULL;
+    /* Shared sigalgs also NULL */
+    ret->shared_sigalgs = NULL;
+    /* Copy any custom client certificate types */
+    if (cert->ctypes) {
+        ret->ctypes = OPENSSL_malloc(cert->ctype_num);
+        if (!ret->ctypes)
+            goto err;
+        memcpy(ret->ctypes, cert->ctypes, cert->ctype_num);
+        ret->ctype_num = cert->ctype_num;
+    }
+
+    ret->cert_flags = cert->cert_flags;
+
+    ret->cert_cb = cert->cert_cb;
+    ret->cert_cb_arg = cert->cert_cb_arg;
+
+    if (cert->verify_store) {
+        CRYPTO_add(&cert->verify_store->references, 1,
+                   CRYPTO_LOCK_X509_STORE);
+        ret->verify_store = cert->verify_store;
+    }
+
+    if (cert->chain_store) {
+        CRYPTO_add(&cert->chain_store->references, 1, CRYPTO_LOCK_X509_STORE);
+        ret->chain_store = cert->chain_store;
+    }
+
+    ret->ciphers_raw = NULL;
+
+#ifndef OPENSSL_NO_TLSEXT
+    if (!custom_exts_copy(&ret->cli_ext, &cert->cli_ext))
+        goto err;
+    if (!custom_exts_copy(&ret->srv_ext, &cert->srv_ext))
+        goto err;
+#endif
 
     return (ret);
 
@@ -307,16 +408,49 @@ CERT *ssl_cert_dup(CERT *cert)
         EC_KEY_free(ret->ecdh_tmp);
 #endif
 
-    for (i = 0; i < SSL_PKEY_NUM; i++) {
-        if (ret->pkeys[i].x509 != NULL)
-            X509_free(ret->pkeys[i].x509);
-        if (ret->pkeys[i].privatekey != NULL)
-            EVP_PKEY_free(ret->pkeys[i].privatekey);
-    }
+#ifndef OPENSSL_NO_TLSEXT
+    custom_exts_free(&ret->cli_ext);
+    custom_exts_free(&ret->srv_ext);
+#endif
+
+    ssl_cert_clear_certs(ret);
 
     return NULL;
 }
 
+/* Free up and clear all certificates and chains */
+
+void ssl_cert_clear_certs(CERT *c)
+{
+    int i;
+    if (c == NULL)
+        return;
+    for (i = 0; i < SSL_PKEY_NUM; i++) {
+        CERT_PKEY *cpk = c->pkeys + i;
+        if (cpk->x509) {
+            X509_free(cpk->x509);
+            cpk->x509 = NULL;
+        }
+        if (cpk->privatekey) {
+            EVP_PKEY_free(cpk->privatekey);
+            cpk->privatekey = NULL;
+        }
+        if (cpk->chain) {
+            sk_X509_pop_free(cpk->chain, X509_free);
+            cpk->chain = NULL;
+        }
+#ifndef OPENSSL_NO_TLSEXT
+        if (cpk->serverinfo) {
+            OPENSSL_free(cpk->serverinfo);
+            cpk->serverinfo = NULL;
+            cpk->serverinfo_length = 0;
+        }
+#endif
+        /* Clear all flags apart from explicit sign */
+        cpk->valid_flags &= CERT_PKEY_EXPLICIT_SIGN;
+    }
+}
+
 void ssl_cert_free(CERT *c)
 {
     int i;
@@ -350,16 +484,27 @@ void ssl_cert_free(CERT *c)
         EC_KEY_free(c->ecdh_tmp);
 #endif
 
-    for (i = 0; i < SSL_PKEY_NUM; i++) {
-        if (c->pkeys[i].x509 != NULL)
-            X509_free(c->pkeys[i].x509);
-        if (c->pkeys[i].privatekey != NULL)
-            EVP_PKEY_free(c->pkeys[i].privatekey);
-#if 0
-        if (c->pkeys[i].publickey != NULL)
-            EVP_PKEY_free(c->pkeys[i].publickey);
+    ssl_cert_clear_certs(c);
+    if (c->peer_sigalgs)
+        OPENSSL_free(c->peer_sigalgs);
+    if (c->conf_sigalgs)
+        OPENSSL_free(c->conf_sigalgs);
+    if (c->client_sigalgs)
+        OPENSSL_free(c->client_sigalgs);
+    if (c->shared_sigalgs)
+        OPENSSL_free(c->shared_sigalgs);
+    if (c->ctypes)
+        OPENSSL_free(c->ctypes);
+    if (c->verify_store)
+        X509_STORE_free(c->verify_store);
+    if (c->chain_store)
+        X509_STORE_free(c->chain_store);
+    if (c->ciphers_raw)
+        OPENSSL_free(c->ciphers_raw);
+#ifndef OPENSSL_NO_TLSEXT
+    custom_exts_free(&c->cli_ext);
+    custom_exts_free(&c->srv_ext);
 #endif
-    }
     OPENSSL_free(c);
 }
 
@@ -388,6 +533,104 @@ int ssl_cert_inst(CERT **o)
     return (1);
 }
 
+int ssl_cert_set0_chain(CERT *c, STACK_OF(X509) *chain)
+{
+    CERT_PKEY *cpk = c->key;
+    if (!cpk)
+        return 0;
+    if (cpk->chain)
+        sk_X509_pop_free(cpk->chain, X509_free);
+    cpk->chain = chain;
+    return 1;
+}
+
+int ssl_cert_set1_chain(CERT *c, STACK_OF(X509) *chain)
+{
+    STACK_OF(X509) *dchain;
+    if (!chain)
+        return ssl_cert_set0_chain(c, NULL);
+    dchain = X509_chain_up_ref(chain);
+    if (!dchain)
+        return 0;
+    if (!ssl_cert_set0_chain(c, dchain)) {
+        sk_X509_pop_free(dchain, X509_free);
+        return 0;
+    }
+    return 1;
+}
+
+int ssl_cert_add0_chain_cert(CERT *c, X509 *x)
+{
+    CERT_PKEY *cpk = c->key;
+    if (!cpk)
+        return 0;
+    if (!cpk->chain)
+        cpk->chain = sk_X509_new_null();
+    if (!cpk->chain || !sk_X509_push(cpk->chain, x))
+        return 0;
+    return 1;
+}
+
+int ssl_cert_add1_chain_cert(CERT *c, X509 *x)
+{
+    if (!ssl_cert_add0_chain_cert(c, x))
+        return 0;
+    CRYPTO_add(&x->references, 1, CRYPTO_LOCK_X509);
+    return 1;
+}
+
+int ssl_cert_select_current(CERT *c, X509 *x)
+{
+    int i;
+    if (x == NULL)
+        return 0;
+    for (i = 0; i < SSL_PKEY_NUM; i++) {
+        CERT_PKEY *cpk = c->pkeys + i;
+        if (cpk->x509 == x && cpk->privatekey) {
+            c->key = cpk;
+            return 1;
+        }
+    }
+
+    for (i = 0; i < SSL_PKEY_NUM; i++) {
+        CERT_PKEY *cpk = c->pkeys + i;
+        if (cpk->privatekey && cpk->x509 && !X509_cmp(cpk->x509, x)) {
+            c->key = cpk;
+            return 1;
+        }
+    }
+    return 0;
+}
+
+int ssl_cert_set_current(CERT *c, long op)
+{
+    int i, idx;
+    if (!c)
+        return 0;
+    if (op == SSL_CERT_SET_FIRST)
+        idx = 0;
+    else if (op == SSL_CERT_SET_NEXT) {
+        idx = (int)(c->key - c->pkeys + 1);
+        if (idx >= SSL_PKEY_NUM)
+            return 0;
+    } else
+        return 0;
+    for (i = idx; i < SSL_PKEY_NUM; i++) {
+        CERT_PKEY *cpk = c->pkeys + i;
+        if (cpk->x509 && cpk->privatekey) {
+            c->key = cpk;
+            return 1;
+        }
+    }
+    return 0;
+}
+
+void ssl_cert_set_cert_cb(CERT *c, int (*cb) (SSL *ssl, void *arg), void *arg)
+{
+    c->cert_cb = cb;
+    c->cert_cb_arg = arg;
+}
+
 SESS_CERT *ssl_sess_cert_new(void)
 {
     SESS_CERT *ret;
@@ -466,16 +709,24 @@ int ssl_verify_cert_chain(SSL *s, STACK_OF(X509) *sk)
 {
     X509 *x;
     int i;
+    X509_STORE *verify_store;
     X509_STORE_CTX ctx;
 
+    if (s->cert->verify_store)
+        verify_store = s->cert->verify_store;
+    else
+        verify_store = s->ctx->cert_store;
+
     if ((sk == NULL) || (sk_X509_num(sk) == 0))
         return (0);
 
     x = sk_X509_value(sk, 0);
-    if (!X509_STORE_CTX_init(&ctx, s->ctx->cert_store, x, sk)) {
+    if (!X509_STORE_CTX_init(&ctx, verify_store, x, sk)) {
         SSLerr(SSL_F_SSL_VERIFY_CERT_CHAIN, ERR_R_X509_LIB);
         return (0);
     }
+    /* Set suite B flags if needed */
+    X509_STORE_CTX_set_flags(&ctx, tls1_suiteb(s));
 #if 0
     if (SSL_get_verify_depth(s) >= 0)
         X509_STORE_CTX_set_depth(&ctx, SSL_get_verify_depth(s));
@@ -797,3 +1048,210 @@ int SSL_add_dir_cert_subjects_to_stack(STACK_OF(X509_NAME) *stack,
     CRYPTO_w_unlock(CRYPTO_LOCK_READDIR);
     return ret;
 }
+
+/* Add a certificate to a BUF_MEM structure */
+
+static int ssl_add_cert_to_buf(BUF_MEM *buf, unsigned long *l, X509 *x)
+{
+    int n;
+    unsigned char *p;
+
+    n = i2d_X509(x, NULL);
+    if (!BUF_MEM_grow_clean(buf, (int)(n + (*l) + 3))) {
+        SSLerr(SSL_F_SSL_ADD_CERT_TO_BUF, ERR_R_BUF_LIB);
+        return 0;
+    }
+    p = (unsigned char *)&(buf->data[*l]);
+    l2n3(n, p);
+    i2d_X509(x, &p);
+    *l += n + 3;
+
+    return 1;
+}
+
+/* Add certificate chain to internal SSL BUF_MEM strcuture */
+int ssl_add_cert_chain(SSL *s, CERT_PKEY *cpk, unsigned long *l)
+{
+    BUF_MEM *buf = s->init_buf;
+    int no_chain;
+    int i;
+
+    X509 *x;
+    STACK_OF(X509) *extra_certs;
+    X509_STORE *chain_store;
+
+    if (cpk)
+        x = cpk->x509;
+    else
+        x = NULL;
+
+    if (s->cert->chain_store)
+        chain_store = s->cert->chain_store;
+    else
+        chain_store = s->ctx->cert_store;
+
+    /*
+     * If we have a certificate specific chain use it, else use parent ctx.
+     */
+    if (cpk && cpk->chain)
+        extra_certs = cpk->chain;
+    else
+        extra_certs = s->ctx->extra_certs;
+
+    if ((s->mode & SSL_MODE_NO_AUTO_CHAIN) || extra_certs)
+        no_chain = 1;
+    else
+        no_chain = 0;
+
+    /* TLSv1 sends a chain with nothing in it, instead of an alert */
+    if (!BUF_MEM_grow_clean(buf, 10)) {
+        SSLerr(SSL_F_SSL_ADD_CERT_CHAIN, ERR_R_BUF_LIB);
+        return 0;
+    }
+    if (x != NULL) {
+        if (no_chain) {
+            if (!ssl_add_cert_to_buf(buf, l, x))
+                return 0;
+        } else {
+            X509_STORE_CTX xs_ctx;
+
+            if (!X509_STORE_CTX_init(&xs_ctx, chain_store, x, NULL)) {
+                SSLerr(SSL_F_SSL_ADD_CERT_CHAIN, ERR_R_X509_LIB);
+                return (0);
+            }
+            X509_verify_cert(&xs_ctx);
+            /* Don't leave errors in the queue */
+            ERR_clear_error();
+            for (i = 0; i < sk_X509_num(xs_ctx.chain); i++) {
+                x = sk_X509_value(xs_ctx.chain, i);
+
+                if (!ssl_add_cert_to_buf(buf, l, x)) {
+                    X509_STORE_CTX_cleanup(&xs_ctx);
+                    return 0;
+                }
+            }
+            X509_STORE_CTX_cleanup(&xs_ctx);
+        }
+    }
+    for (i = 0; i < sk_X509_num(extra_certs); i++) {
+        x = sk_X509_value(extra_certs, i);
+        if (!ssl_add_cert_to_buf(buf, l, x))
+            return 0;
+    }
+
+    return 1;
+}
+
+/* Build a certificate chain for current certificate */
+int ssl_build_cert_chain(CERT *c, X509_STORE *chain_store, int flags)
+{
+    CERT_PKEY *cpk = c->key;
+    X509_STORE_CTX xs_ctx;
+    STACK_OF(X509) *chain = NULL, *untrusted = NULL;
+    X509 *x;
+    int i, rv = 0;
+    unsigned long error;
+
+    if (!cpk->x509) {
+        SSLerr(SSL_F_SSL_BUILD_CERT_CHAIN, SSL_R_NO_CERTIFICATE_SET);
+        goto err;
+    }
+    /* Rearranging and check the chain: add everything to a store */
+    if (flags & SSL_BUILD_CHAIN_FLAG_CHECK) {
+        chain_store = X509_STORE_new();
+        if (!chain_store)
+            goto err;
+        for (i = 0; i < sk_X509_num(cpk->chain); i++) {
+            x = sk_X509_value(cpk->chain, i);
+            if (!X509_STORE_add_cert(chain_store, x)) {
+                error = ERR_peek_last_error();
+                if (ERR_GET_LIB(error) != ERR_LIB_X509 ||
+                    ERR_GET_REASON(error) !=
+                    X509_R_CERT_ALREADY_IN_HASH_TABLE)
+                    goto err;
+                ERR_clear_error();
+            }
+        }
+        /* Add EE cert too: it might be self signed */
+        if (!X509_STORE_add_cert(chain_store, cpk->x509)) {
+            error = ERR_peek_last_error();
+            if (ERR_GET_LIB(error) != ERR_LIB_X509 ||
+                ERR_GET_REASON(error) != X509_R_CERT_ALREADY_IN_HASH_TABLE)
+                goto err;
+            ERR_clear_error();
+        }
+    } else {
+        if (c->chain_store)
+            chain_store = c->chain_store;
+
+        if (flags & SSL_BUILD_CHAIN_FLAG_UNTRUSTED)
+            untrusted = cpk->chain;
+    }
+
+    if (!X509_STORE_CTX_init(&xs_ctx, chain_store, cpk->x509, untrusted)) {
+        SSLerr(SSL_F_SSL_BUILD_CERT_CHAIN, ERR_R_X509_LIB);
+        goto err;
+    }
+    /* Set suite B flags if needed */
+    X509_STORE_CTX_set_flags(&xs_ctx,
+                             c->cert_flags & SSL_CERT_FLAG_SUITEB_128_LOS);
+
+    i = X509_verify_cert(&xs_ctx);
+    if (i <= 0 && flags & SSL_BUILD_CHAIN_FLAG_IGNORE_ERROR) {
+        if (flags & SSL_BUILD_CHAIN_FLAG_CLEAR_ERROR)
+            ERR_clear_error();
+        i = 1;
+        rv = 2;
+    }
+    if (i > 0)
+        chain = X509_STORE_CTX_get1_chain(&xs_ctx);
+    if (i <= 0) {
+        SSLerr(SSL_F_SSL_BUILD_CERT_CHAIN, SSL_R_CERTIFICATE_VERIFY_FAILED);
+        i = X509_STORE_CTX_get_error(&xs_ctx);
+        ERR_add_error_data(2, "Verify error:",
+                           X509_verify_cert_error_string(i));
+
+        X509_STORE_CTX_cleanup(&xs_ctx);
+        goto err;
+    }
+    X509_STORE_CTX_cleanup(&xs_ctx);
+    if (cpk->chain)
+        sk_X509_pop_free(cpk->chain, X509_free);
+    /* Remove EE certificate from chain */
+    x = sk_X509_shift(chain);
+    X509_free(x);
+    if (flags & SSL_BUILD_CHAIN_FLAG_NO_ROOT) {
+        if (sk_X509_num(chain) > 0) {
+            /* See if last cert is self signed */
+            x = sk_X509_value(chain, sk_X509_num(chain) - 1);
+            X509_check_purpose(x, -1, 0);
+            if (x->ex_flags & EXFLAG_SS) {
+                x = sk_X509_pop(chain);
+                X509_free(x);
+            }
+        }
+    }
+    cpk->chain = chain;
+    if (rv == 0)
+        rv = 1;
+ err:
+    if (flags & SSL_BUILD_CHAIN_FLAG_CHECK)
+        X509_STORE_free(chain_store);
+
+    return rv;
+}
+
+int ssl_cert_set_cert_store(CERT *c, X509_STORE *store, int chain, int ref)
+{
+    X509_STORE **pstore;
+    if (chain)
+        pstore = &c->chain_store;
+    else
+        pstore = &c->verify_store;
+    if (*pstore)
+        X509_STORE_free(*pstore);
+    *pstore = store;
+    if (ref && store)
+        CRYPTO_add(&store->references, 1, CRYPTO_LOCK_X509_STORE);
+    return 1;
+}
index cac525e..2cc9a4a 100644 (file)
@@ -245,13 +245,11 @@ static const SSL_CIPHER cipher_aliases[] = {
      */
     {0, SSL_TXT_kRSA, 0, SSL_kRSA, 0, 0, 0, 0, 0, 0, 0, 0},
 
-    /* no such ciphersuites supported! */
     {0, SSL_TXT_kDHr, 0, SSL_kDHr, 0, 0, 0, 0, 0, 0, 0, 0},
-    /* no such ciphersuites supported! */
     {0, SSL_TXT_kDHd, 0, SSL_kDHd, 0, 0, 0, 0, 0, 0, 0, 0},
-    /* no such ciphersuites supported! */
     {0, SSL_TXT_kDH, 0, SSL_kDHr | SSL_kDHd, 0, 0, 0, 0, 0, 0, 0, 0},
-        {0, SSL_TXT_kEDH, 0, SSL_kEDH, 0, 0, 0, 0, 0, 0, 0, 0},
+    {0, SSL_TXT_kEDH, 0, SSL_kEDH, 0, 0, 0, 0, 0, 0, 0, 0},
+    {0, SSL_TXT_kDHE, 0, SSL_kEDH, 0, 0, 0, 0, 0, 0, 0, 0},
     {0, SSL_TXT_DH, 0, SSL_kDHr | SSL_kDHd | SSL_kEDH, 0, 0, 0, 0, 0, 0, 0,
      0},
 
@@ -261,6 +259,7 @@ static const SSL_CIPHER cipher_aliases[] = {
     {0, SSL_TXT_kECDHe, 0, SSL_kECDHe, 0, 0, 0, 0, 0, 0, 0, 0},
     {0, SSL_TXT_kECDH, 0, SSL_kECDHr | SSL_kECDHe, 0, 0, 0, 0, 0, 0, 0, 0},
     {0, SSL_TXT_kEECDH, 0, SSL_kEECDH, 0, 0, 0, 0, 0, 0, 0, 0},
+    {0, SSL_TXT_kECDHE, 0, SSL_kEECDH, 0, 0, 0, 0, 0, 0, 0, 0},
     {0, SSL_TXT_ECDH, 0, SSL_kECDHr | SSL_kECDHe | SSL_kEECDH, 0, 0, 0, 0, 0,
      0, 0, 0},
 
@@ -287,7 +286,9 @@ static const SSL_CIPHER cipher_aliases[] = {
 
     /* aliases combining key exchange and server authentication */
     {0, SSL_TXT_EDH, 0, SSL_kEDH, ~SSL_aNULL, 0, 0, 0, 0, 0, 0, 0},
+    {0, SSL_TXT_DHE, 0, SSL_kEDH, ~SSL_aNULL, 0, 0, 0, 0, 0, 0, 0},
     {0, SSL_TXT_EECDH, 0, SSL_kEECDH, ~SSL_aNULL, 0, 0, 0, 0, 0, 0, 0},
+    {0, SSL_TXT_ECDHE, 0, SSL_kEECDH, ~SSL_aNULL, 0, 0, 0, 0, 0, 0, 0},
     {0, SSL_TXT_NULL, 0, 0, 0, SSL_eNULL, 0, 0, 0, 0, 0, 0},
     {0, SSL_TXT_KRB5, 0, SSL_kKRB5, SSL_aKRB5, 0, 0, 0, 0, 0, 0, 0},
     {0, SSL_TXT_RSA, 0, SSL_kRSA, SSL_aRSA, 0, 0, 0, 0, 0, 0, 0},
@@ -343,6 +344,25 @@ static const SSL_CIPHER cipher_aliases[] = {
     {0, SSL_TXT_HIGH, 0, 0, 0, 0, 0, 0, SSL_HIGH, 0, 0, 0},
     /* FIPS 140-2 approved ciphersuite */
     {0, SSL_TXT_FIPS, 0, 0, 0, ~SSL_eNULL, 0, 0, SSL_FIPS, 0, 0, 0},
+    /* "DHE-" aliases to "EDH-" labels (for forward compatibility) */
+    {0, SSL3_TXT_DHE_DSS_DES_40_CBC_SHA, 0,
+     SSL_kDHE, SSL_aDSS, SSL_DES, SSL_SHA1, SSL_SSLV3, SSL_EXPORT | SSL_EXP40,
+     0, 0, 0,},
+    {0, SSL3_TXT_DHE_DSS_DES_64_CBC_SHA, 0,
+     SSL_kDHE, SSL_aDSS, SSL_DES, SSL_SHA1, SSL_SSLV3, SSL_NOT_EXP | SSL_LOW,
+     0, 0, 0,},
+    {0, SSL3_TXT_DHE_DSS_DES_192_CBC3_SHA, 0,
+     SSL_kDHE, SSL_aDSS, SSL_3DES, SSL_SHA1, SSL_SSLV3,
+     SSL_NOT_EXP | SSL_HIGH | SSL_FIPS, 0, 0, 0,},
+    {0, SSL3_TXT_DHE_RSA_DES_40_CBC_SHA, 0,
+     SSL_kDHE, SSL_aRSA, SSL_DES, SSL_SHA1, SSL_SSLV3, SSL_EXPORT | SSL_EXP40,
+     0, 0, 0,},
+    {0, SSL3_TXT_DHE_RSA_DES_64_CBC_SHA, 0,
+     SSL_kDHE, SSL_aRSA, SSL_DES, SSL_SHA1, SSL_SSLV3, SSL_NOT_EXP | SSL_LOW,
+     0, 0, 0,},
+    {0, SSL3_TXT_DHE_RSA_DES_192_CBC3_SHA, 0,
+     SSL_kDHE, SSL_aRSA, SSL_3DES, SSL_SHA1, SSL_SSLV3,
+     SSL_NOT_EXP | SSL_HIGH | SSL_FIPS, 0, 0, 0,},
 };
 
 /*
@@ -638,6 +658,14 @@ int ssl_cipher_get_evp(const SSL_SESSION *s, const EVP_CIPHER **enc,
                  c->algorithm_mac == SSL_SHA1 &&
                  (evp = EVP_get_cipherbyname("AES-256-CBC-HMAC-SHA1")))
             *enc = evp, *md = NULL;
+        else if (c->algorithm_enc == SSL_AES128 &&
+                 c->algorithm_mac == SSL_SHA256 &&
+                 (evp = EVP_get_cipherbyname("AES-128-CBC-HMAC-SHA256")))
+            *enc = evp, *md = NULL;
+        else if (c->algorithm_enc == SSL_AES256 &&
+                 c->algorithm_mac == SSL_SHA256 &&
+                 (evp = EVP_get_cipherbyname("AES-256-CBC-HMAC-SHA256")))
+            *enc = evp, *md = NULL;
         return (1);
     } else
         return (0);
@@ -710,8 +738,6 @@ static void ssl_cipher_get_disabled(unsigned long *mkey, unsigned long *auth,
 #ifdef OPENSSL_NO_DSA
     *auth |= SSL_aDSS;
 #endif
-    *mkey |= SSL_kDHr | SSL_kDHd; /* no such ciphersuites supported! */
-    *auth |= SSL_aDH;
 #ifdef OPENSSL_NO_DH
     *mkey |= SSL_kDHr | SSL_kDHd | SSL_kEDH;
     *auth |= SSL_aDH;
@@ -997,6 +1023,10 @@ static void ssl_cipher_apply_rule(unsigned long cipher_id,
                     cp->algorithm_enc, cp->algorithm_mac, cp->algorithm_ssl,
                     cp->algo_strength);
 #endif
+#ifdef OPENSSL_SSL_DEBUG_BROKEN_PROTOCOL
+            if (cipher_id && cipher_id != cp->id)
+                continue;
+#endif
             if (algo_strength == SSL_EXP_MASK && SSL_C_IS_EXPORT(cp))
                 goto ok;
             if (alg_ssl == ~SSL_SSLV2 && cp->algorithm_ssl == SSL_SSLV2)
@@ -1369,10 +1399,71 @@ static int ssl_cipher_process_rulestr(const char *rule_str,
     return (retval);
 }
 
+#ifndef OPENSSL_NO_EC
+static int check_suiteb_cipher_list(const SSL_METHOD *meth, CERT *c,
+                                    const char **prule_str)
+{
+    unsigned int suiteb_flags = 0, suiteb_comb2 = 0;
+    if (!strcmp(*prule_str, "SUITEB128"))
+        suiteb_flags = SSL_CERT_FLAG_SUITEB_128_LOS;
+    else if (!strcmp(*prule_str, "SUITEB128ONLY"))
+        suiteb_flags = SSL_CERT_FLAG_SUITEB_128_LOS_ONLY;
+    else if (!strcmp(*prule_str, "SUITEB128C2")) {
+        suiteb_comb2 = 1;
+        suiteb_flags = SSL_CERT_FLAG_SUITEB_128_LOS;
+    } else if (!strcmp(*prule_str, "SUITEB192"))
+        suiteb_flags = SSL_CERT_FLAG_SUITEB_192_LOS;
+
+    if (suiteb_flags) {
+        c->cert_flags &= ~SSL_CERT_FLAG_SUITEB_128_LOS;
+        c->cert_flags |= suiteb_flags;
+    } else
+        suiteb_flags = c->cert_flags & SSL_CERT_FLAG_SUITEB_128_LOS;
+
+    if (!suiteb_flags)
+        return 1;
+    /* Check version: if TLS 1.2 ciphers allowed we can use Suite B */
+
+    if (!(meth->ssl3_enc->enc_flags & SSL_ENC_FLAG_TLS1_2_CIPHERS)) {
+        if (meth->ssl3_enc->enc_flags & SSL_ENC_FLAG_DTLS)
+            SSLerr(SSL_F_CHECK_SUITEB_CIPHER_LIST,
+                   SSL_R_ONLY_DTLS_1_2_ALLOWED_IN_SUITEB_MODE);
+        else
+            SSLerr(SSL_F_CHECK_SUITEB_CIPHER_LIST,
+                   SSL_R_ONLY_TLS_1_2_ALLOWED_IN_SUITEB_MODE);
+        return 0;
+    }
+# ifndef OPENSSL_NO_ECDH
+    switch (suiteb_flags) {
+    case SSL_CERT_FLAG_SUITEB_128_LOS:
+        if (suiteb_comb2)
+            *prule_str = "ECDHE-ECDSA-AES256-GCM-SHA384";
+        else
+            *prule_str =
+                "ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384";
+        break;
+    case SSL_CERT_FLAG_SUITEB_128_LOS_ONLY:
+        *prule_str = "ECDHE-ECDSA-AES128-GCM-SHA256";
+        break;
+    case SSL_CERT_FLAG_SUITEB_192_LOS:
+        *prule_str = "ECDHE-ECDSA-AES256-GCM-SHA384";
+        break;
+    }
+    /* Set auto ECDH parameter determination */
+    c->ecdh_tmp_auto = 1;
+    return 1;
+# else
+    SSLerr(SSL_F_CHECK_SUITEB_CIPHER_LIST,
+           SSL_R_ECDH_REQUIRED_FOR_SUITEB_MODE);
+    return 0;
+# endif
+}
+#endif
+
 STACK_OF(SSL_CIPHER) *ssl_create_cipher_list(const SSL_METHOD *ssl_method, STACK_OF(SSL_CIPHER)
                                              **cipher_list, STACK_OF(SSL_CIPHER)
                                              **cipher_list_by_id,
-                                             const char *rule_str)
+                                             const char *rule_str, CERT *c)
 {
     int ok, num_of_ciphers, num_of_alias_max, num_of_group_aliases;
     unsigned long disabled_mkey, disabled_auth, disabled_enc, disabled_mac,
@@ -1387,6 +1478,10 @@ STACK_OF(SSL_CIPHER) *ssl_create_cipher_list(const SSL_METHOD *ssl_method, STACK
      */
     if (rule_str == NULL || cipher_list == NULL || cipher_list_by_id == NULL)
         return NULL;
+#ifndef OPENSSL_NO_EC
+    if (!check_suiteb_cipher_list(ssl_method, c, &rule_str))
+        return NULL;
+#endif
 
     /*
      * To reduce the work to do we only want to process the compiled
@@ -1854,6 +1949,26 @@ STACK_OF(SSL_COMP) *SSL_COMP_get_compression_methods(void)
     return (ssl_comp_methods);
 }
 
+STACK_OF(SSL_COMP) *SSL_COMP_set0_compression_methods(STACK_OF(SSL_COMP)
+                                                      *meths)
+{
+    STACK_OF(SSL_COMP) *old_meths = ssl_comp_methods;
+    ssl_comp_methods = meths;
+    return old_meths;
+}
+
+static void cmeth_free(SSL_COMP *cm)
+{
+    OPENSSL_free(cm);
+}
+
+void SSL_COMP_free_compression_methods(void)
+{
+    STACK_OF(SSL_COMP) *old_meths = ssl_comp_methods;
+    ssl_comp_methods = NULL;
+    sk_SSL_COMP_pop_free(old_meths, cmeth_free);
+}
+
 int SSL_COMP_add_compression_method(int id, COMP_METHOD *cm)
 {
     SSL_COMP *comp;
@@ -1904,5 +2019,55 @@ const char *SSL_COMP_get_name(const COMP_METHOD *comp)
         return comp->name;
     return NULL;
 }
-
 #endif
+/* For a cipher return the index corresponding to the certificate type */
+int ssl_cipher_get_cert_index(const SSL_CIPHER *c)
+{
+    unsigned long alg_k, alg_a;
+
+    alg_k = c->algorithm_mkey;
+    alg_a = c->algorithm_auth;
+
+    if (alg_k & (SSL_kECDHr | SSL_kECDHe)) {
+        /*
+         * we don't need to look at SSL_kEECDH since no certificate is needed
+         * for anon ECDH and for authenticated EECDH, the check for the auth
+         * algorithm will set i correctly NOTE: For ECDH-RSA, we need an ECC
+         * not an RSA cert but for EECDH-RSA we need an RSA cert. Placing the
+         * checks for SSL_kECDH before RSA checks ensures the correct cert is
+         * chosen.
+         */
+        return SSL_PKEY_ECC;
+    } else if (alg_a & SSL_aECDSA)
+        return SSL_PKEY_ECC;
+    else if (alg_k & SSL_kDHr)
+        return SSL_PKEY_DH_RSA;
+    else if (alg_k & SSL_kDHd)
+        return SSL_PKEY_DH_DSA;
+    else if (alg_a & SSL_aDSS)
+        return SSL_PKEY_DSA_SIGN;
+    else if (alg_a & SSL_aRSA)
+        return SSL_PKEY_RSA_ENC;
+    else if (alg_a & SSL_aKRB5)
+        /* VRS something else here? */
+        return -1;
+    else if (alg_a & SSL_aGOST94)
+        return SSL_PKEY_GOST94;
+    else if (alg_a & SSL_aGOST01)
+        return SSL_PKEY_GOST01;
+    return -1;
+}
+
+const SSL_CIPHER *ssl_get_cipher_by_char(SSL *ssl, const unsigned char *ptr)
+{
+    const SSL_CIPHER *c;
+    c = ssl->method->get_cipher_by_char(ptr);
+    if (c == NULL || c->valid == 0)
+        return NULL;
+    return c;
+}
+
+const SSL_CIPHER *SSL_CIPHER_find(SSL *ssl, const unsigned char *ptr)
+{
+    return ssl->method->get_cipher_by_char(ptr);
+}
diff --git a/ssl/ssl_conf.c b/ssl/ssl_conf.c
new file mode 100644 (file)
index 0000000..5478840
--- /dev/null
@@ -0,0 +1,683 @@
+/*
+ * ! \file ssl/ssl_conf.c \brief SSL configuration functions
+ */
+/* ====================================================================
+ * Copyright (c) 2012 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This product includes cryptographic software written by Eric Young
+ * (eay@cryptsoft.com).  This product includes software written by Tim
+ * Hudson (tjh@cryptsoft.com).
+ *
+ */
+
+#ifdef REF_CHECK
+# include <assert.h>
+#endif
+#include <stdio.h>
+#include "ssl_locl.h"
+#include <openssl/conf.h>
+#include <openssl/objects.h>
+#ifndef OPENSSL_NO_DH
+# include <openssl/dh.h>
+#endif
+
+/*
+ * structure holding name tables. This is used for pemitted elements in lists
+ * such as TLSv1 and single command line switches such as no_tls1
+ */
+
+typedef struct {
+    const char *name;
+    int namelen;
+    unsigned int name_flags;
+    unsigned long option_value;
+} ssl_flag_tbl;
+
+/* Sense of name is inverted e.g. "TLSv1" will clear SSL_OP_NO_TLSv1 */
+#define SSL_TFLAG_INV   0x1
+/* Flags refers to cert_flags not options */
+#define SSL_TFLAG_CERT  0x2
+/* Option can only be used for clients */
+#define SSL_TFLAG_CLIENT SSL_CONF_FLAG_CLIENT
+/* Option can only be used for servers */
+#define SSL_TFLAG_SERVER SSL_CONF_FLAG_SERVER
+#define SSL_TFLAG_BOTH (SSL_TFLAG_CLIENT|SSL_TFLAG_SERVER)
+
+#define SSL_FLAG_TBL(str, flag) \
+        {str, (int)(sizeof(str) - 1), SSL_TFLAG_BOTH, flag}
+#define SSL_FLAG_TBL_SRV(str, flag) \
+        {str, (int)(sizeof(str) - 1), SSL_TFLAG_SERVER, flag}
+#define SSL_FLAG_TBL_CLI(str, flag) \
+        {str, (int)(sizeof(str) - 1), SSL_TFLAG_CLIENT, flag}
+#define SSL_FLAG_TBL_INV(str, flag) \
+        {str, (int)(sizeof(str) - 1), SSL_TFLAG_INV|SSL_TFLAG_BOTH, flag}
+#define SSL_FLAG_TBL_SRV_INV(str, flag) \
+        {str, (int)(sizeof(str) - 1), SSL_TFLAG_INV|SSL_TFLAG_SERVER, flag}
+#define SSL_FLAG_TBL_CERT(str, flag) \
+        {str, (int)(sizeof(str) - 1), SSL_TFLAG_CERT|SSL_TFLAG_BOTH, flag}
+
+/*
+ * Opaque structure containing SSL configuration context.
+ */
+
+struct ssl_conf_ctx_st {
+    /*
+     * Various flags indicating (among other things) which options we will
+     * recognise.
+     */
+    unsigned int flags;
+    /* Prefix and length of commands */
+    char *prefix;
+    size_t prefixlen;
+    /* SSL_CTX or SSL structure to perform operations on */
+    SSL_CTX *ctx;
+    SSL *ssl;
+    /* Pointer to SSL or SSL_CTX options field or NULL if none */
+    unsigned long *poptions;
+    /* Pointer to SSL or SSL_CTX cert_flags or NULL if none */
+    unsigned int *pcert_flags;
+    /* Current flag table being worked on */
+    const ssl_flag_tbl *tbl;
+    /* Size of table */
+    size_t ntbl;
+};
+
+static int ssl_match_option(SSL_CONF_CTX *cctx, const ssl_flag_tbl *tbl,
+                            const char *name, int namelen, int onoff)
+{
+    /* If name not relevant for context skip */
+    if (!(cctx->flags & tbl->name_flags & SSL_TFLAG_BOTH))
+        return 0;
+    if (namelen == -1) {
+        if (strcmp(tbl->name, name))
+            return 0;
+    } else if (tbl->namelen != namelen
+               || strncasecmp(tbl->name, name, namelen))
+        return 0;
+    if (cctx->poptions) {
+        if (tbl->name_flags & SSL_TFLAG_INV)
+            onoff ^= 1;
+        if (tbl->name_flags & SSL_TFLAG_CERT) {
+            if (onoff)
+                *cctx->pcert_flags |= tbl->option_value;
+            else
+                *cctx->pcert_flags &= ~tbl->option_value;
+        } else {
+            if (onoff)
+                *cctx->poptions |= tbl->option_value;
+            else
+                *cctx->poptions &= ~tbl->option_value;
+        }
+    }
+    return 1;
+}
+
+static int ssl_set_option_list(const char *elem, int len, void *usr)
+{
+    SSL_CONF_CTX *cctx = usr;
+    size_t i;
+    const ssl_flag_tbl *tbl;
+    int onoff = 1;
+    /*
+     * len == -1 indicates not being called in list context, just for single
+     * command line switches, so don't allow +, -.
+     */
+    if (elem == NULL)
+        return 0;
+    if (len != -1) {
+        if (*elem == '+') {
+            elem++;
+            len--;
+            onoff = 1;
+        } else if (*elem == '-') {
+            elem++;
+            len--;
+            onoff = 0;
+        }
+    }
+    for (i = 0, tbl = cctx->tbl; i < cctx->ntbl; i++, tbl++) {
+        if (ssl_match_option(cctx, tbl, elem, len, onoff))
+            return 1;
+    }
+    return 0;
+}
+
+/* Single command line switches with no argument e.g. -no_ssl3 */
+static int ctrl_str_option(SSL_CONF_CTX *cctx, const char *cmd)
+{
+    static const ssl_flag_tbl ssl_option_single[] = {
+        SSL_FLAG_TBL("no_ssl2", SSL_OP_NO_SSLv2),
+        SSL_FLAG_TBL("no_ssl3", SSL_OP_NO_SSLv3),
+        SSL_FLAG_TBL("no_tls1", SSL_OP_NO_TLSv1),
+        SSL_FLAG_TBL("no_tls1_1", SSL_OP_NO_TLSv1_1),
+        SSL_FLAG_TBL("no_tls1_2", SSL_OP_NO_TLSv1_2),
+        SSL_FLAG_TBL("bugs", SSL_OP_ALL),
+        SSL_FLAG_TBL("no_comp", SSL_OP_NO_COMPRESSION),
+        SSL_FLAG_TBL_SRV("ecdh_single", SSL_OP_SINGLE_ECDH_USE),
+#ifndef OPENSSL_NO_TLSEXT
+        SSL_FLAG_TBL("no_ticket", SSL_OP_NO_TICKET),
+#endif
+        SSL_FLAG_TBL_SRV("serverpref", SSL_OP_CIPHER_SERVER_PREFERENCE),
+        SSL_FLAG_TBL("legacy_renegotiation",
+                     SSL_OP_ALLOW_UNSAFE_LEGACY_RENEGOTIATION),
+        SSL_FLAG_TBL_SRV("legacy_server_connect",
+                         SSL_OP_LEGACY_SERVER_CONNECT),
+        SSL_FLAG_TBL_SRV("no_resumption_on_reneg",
+                         SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION),
+        SSL_FLAG_TBL_SRV_INV("no_legacy_server_connect",
+                             SSL_OP_LEGACY_SERVER_CONNECT),
+        SSL_FLAG_TBL_CERT("strict", SSL_CERT_FLAG_TLS_STRICT),
+#ifdef OPENSSL_SSL_DEBUG_BROKEN_PROTOCOL
+        SSL_FLAG_TBL_CERT("debug_broken_protocol",
+                          SSL_CERT_FLAG_BROKEN_PROTOCOL),
+#endif
+    };
+    cctx->tbl = ssl_option_single;
+    cctx->ntbl = sizeof(ssl_option_single) / sizeof(ssl_flag_tbl);
+    return ssl_set_option_list(cmd, -1, cctx);
+}
+
+/* Set supported signature algorithms */
+static int cmd_SignatureAlgorithms(SSL_CONF_CTX *cctx, const char *value)
+{
+    int rv;
+    if (cctx->ssl)
+        rv = SSL_set1_sigalgs_list(cctx->ssl, value);
+    /* NB: ctx == NULL performs syntax checking only */
+    else
+        rv = SSL_CTX_set1_sigalgs_list(cctx->ctx, value);
+    return rv > 0;
+}
+
+/* Set supported client signature algorithms */
+static int cmd_ClientSignatureAlgorithms(SSL_CONF_CTX *cctx,
+                                         const char *value)
+{
+    int rv;
+    if (cctx->ssl)
+        rv = SSL_set1_client_sigalgs_list(cctx->ssl, value);
+    /* NB: ctx == NULL performs syntax checking only */
+    else
+        rv = SSL_CTX_set1_client_sigalgs_list(cctx->ctx, value);
+    return rv > 0;
+}
+
+static int cmd_Curves(SSL_CONF_CTX *cctx, const char *value)
+{
+    int rv;
+    if (cctx->ssl)
+        rv = SSL_set1_curves_list(cctx->ssl, value);
+    /* NB: ctx == NULL performs syntax checking only */
+    else
+        rv = SSL_CTX_set1_curves_list(cctx->ctx, value);
+    return rv > 0;
+}
+
+#ifndef OPENSSL_NO_ECDH
+/* ECDH temporary parameters */
+static int cmd_ECDHParameters(SSL_CONF_CTX *cctx, const char *value)
+{
+    int onoff = -1, rv = 1;
+    if (!(cctx->flags & SSL_CONF_FLAG_SERVER))
+        return -2;
+    if (cctx->flags & SSL_CONF_FLAG_FILE) {
+        if (*value == '+') {
+            onoff = 1;
+            value++;
+        }
+        if (*value == '-') {
+            onoff = 0;
+            value++;
+        }
+        if (!strcasecmp(value, "automatic")) {
+            if (onoff == -1)
+                onoff = 1;
+        } else if (onoff != -1)
+            return 0;
+    } else if (cctx->flags & SSL_CONF_FLAG_CMDLINE) {
+        if (!strcmp(value, "auto"))
+            onoff = 1;
+    }
+
+    if (onoff != -1) {
+        if (cctx->ctx)
+            rv = SSL_CTX_set_ecdh_auto(cctx->ctx, onoff);
+        else if (cctx->ssl)
+            rv = SSL_set_ecdh_auto(cctx->ssl, onoff);
+    } else {
+        EC_KEY *ecdh;
+        int nid;
+        nid = EC_curve_nist2nid(value);
+        if (nid == NID_undef)
+            nid = OBJ_sn2nid(value);
+        if (nid == 0)
+            return 0;
+        ecdh = EC_KEY_new_by_curve_name(nid);
+        if (!ecdh)
+            return 0;
+        if (cctx->ctx)
+            rv = SSL_CTX_set_tmp_ecdh(cctx->ctx, ecdh);
+        else if (cctx->ssl)
+            rv = SSL_set_tmp_ecdh(cctx->ssl, ecdh);
+        EC_KEY_free(ecdh);
+    }
+
+    return rv > 0;
+}
+#endif
+static int cmd_CipherString(SSL_CONF_CTX *cctx, const char *value)
+{
+    int rv = 1;
+    if (cctx->ctx)
+        rv = SSL_CTX_set_cipher_list(cctx->ctx, value);
+    if (cctx->ssl)
+        rv = SSL_set_cipher_list(cctx->ssl, value);
+    return rv > 0;
+}
+
+static int cmd_Protocol(SSL_CONF_CTX *cctx, const char *value)
+{
+    static const ssl_flag_tbl ssl_protocol_list[] = {
+        SSL_FLAG_TBL_INV("ALL", SSL_OP_NO_SSL_MASK),
+        SSL_FLAG_TBL_INV("SSLv2", SSL_OP_NO_SSLv2),
+        SSL_FLAG_TBL_INV("SSLv3", SSL_OP_NO_SSLv3),
+        SSL_FLAG_TBL_INV("TLSv1", SSL_OP_NO_TLSv1),
+        SSL_FLAG_TBL_INV("TLSv1.1", SSL_OP_NO_TLSv1_1),
+        SSL_FLAG_TBL_INV("TLSv1.2", SSL_OP_NO_TLSv1_2)
+    };
+    if (!(cctx->flags & SSL_CONF_FLAG_FILE))
+        return -2;
+    cctx->tbl = ssl_protocol_list;
+    cctx->ntbl = sizeof(ssl_protocol_list) / sizeof(ssl_flag_tbl);
+    return CONF_parse_list(value, ',', 1, ssl_set_option_list, cctx);
+}
+
+static int cmd_Options(SSL_CONF_CTX *cctx, const char *value)
+{
+    static const ssl_flag_tbl ssl_option_list[] = {
+        SSL_FLAG_TBL_INV("SessionTicket", SSL_OP_NO_TICKET),
+        SSL_FLAG_TBL_INV("EmptyFragments",
+                         SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS),
+        SSL_FLAG_TBL("Bugs", SSL_OP_ALL),
+        SSL_FLAG_TBL_INV("Compression", SSL_OP_NO_COMPRESSION),
+        SSL_FLAG_TBL_SRV("ServerPreference", SSL_OP_CIPHER_SERVER_PREFERENCE),
+        SSL_FLAG_TBL_SRV("NoResumptionOnRenegotiation",
+                         SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION),
+        SSL_FLAG_TBL_SRV("DHSingle", SSL_OP_SINGLE_DH_USE),
+        SSL_FLAG_TBL_SRV("ECDHSingle", SSL_OP_SINGLE_ECDH_USE),
+        SSL_FLAG_TBL("UnsafeLegacyRenegotiation",
+                     SSL_OP_ALLOW_UNSAFE_LEGACY_RENEGOTIATION),
+    };
+    if (!(cctx->flags & SSL_CONF_FLAG_FILE))
+        return -2;
+    if (value == NULL)
+        return -3;
+    cctx->tbl = ssl_option_list;
+    cctx->ntbl = sizeof(ssl_option_list) / sizeof(ssl_flag_tbl);
+    return CONF_parse_list(value, ',', 1, ssl_set_option_list, cctx);
+}
+
+static int cmd_Certificate(SSL_CONF_CTX *cctx, const char *value)
+{
+    int rv = 1;
+    if (!(cctx->flags & SSL_CONF_FLAG_CERTIFICATE))
+        return -2;
+    if (cctx->ctx)
+        rv = SSL_CTX_use_certificate_chain_file(cctx->ctx, value);
+    if (cctx->ssl)
+        rv = SSL_use_certificate_file(cctx->ssl, value, SSL_FILETYPE_PEM);
+    return rv > 0;
+}
+
+static int cmd_PrivateKey(SSL_CONF_CTX *cctx, const char *value)
+{
+    int rv = 1;
+    if (!(cctx->flags & SSL_CONF_FLAG_CERTIFICATE))
+        return -2;
+    if (cctx->ctx)
+        rv = SSL_CTX_use_PrivateKey_file(cctx->ctx, value, SSL_FILETYPE_PEM);
+    if (cctx->ssl)
+        rv = SSL_use_PrivateKey_file(cctx->ssl, value, SSL_FILETYPE_PEM);
+    return rv > 0;
+}
+
+static int cmd_ServerInfoFile(SSL_CONF_CTX *cctx, const char *value)
+{
+    int rv = 1;
+    if (!(cctx->flags & SSL_CONF_FLAG_CERTIFICATE))
+        return -2;
+    if (!(cctx->flags & SSL_CONF_FLAG_SERVER))
+        return -2;
+    if (cctx->ctx)
+        rv = SSL_CTX_use_serverinfo_file(cctx->ctx, value);
+    return rv > 0;
+}
+
+#ifndef OPENSSL_NO_DH
+static int cmd_DHParameters(SSL_CONF_CTX *cctx, const char *value)
+{
+    int rv = 0;
+    DH *dh = NULL;
+    BIO *in = NULL;
+    if (!(cctx->flags & SSL_CONF_FLAG_CERTIFICATE))
+        return -2;
+    if (cctx->ctx || cctx->ssl) {
+        in = BIO_new(BIO_s_file_internal());
+        if (!in)
+            goto end;
+        if (BIO_read_filename(in, value) <= 0)
+            goto end;
+        dh = PEM_read_bio_DHparams(in, NULL, NULL, NULL);
+        if (!dh)
+            goto end;
+    } else
+        return 1;
+    if (cctx->ctx)
+        rv = SSL_CTX_set_tmp_dh(cctx->ctx, dh);
+    if (cctx->ssl)
+        rv = SSL_set_tmp_dh(cctx->ssl, dh);
+ end:
+    if (dh)
+        DH_free(dh);
+    if (in)
+        BIO_free(in);
+    return rv > 0;
+}
+#endif
+typedef struct {
+    int (*cmd) (SSL_CONF_CTX *cctx, const char *value);
+    const char *str_file;
+    const char *str_cmdline;
+    unsigned int value_type;
+} ssl_conf_cmd_tbl;
+
+/* Table of supported parameters */
+
+#define SSL_CONF_CMD(name, cmdopt, type) \
+        {cmd_##name, #name, cmdopt, type}
+
+#define SSL_CONF_CMD_STRING(name, cmdopt) \
+        SSL_CONF_CMD(name, cmdopt, SSL_CONF_TYPE_STRING)
+
+static const ssl_conf_cmd_tbl ssl_conf_cmds[] = {
+    SSL_CONF_CMD_STRING(SignatureAlgorithms, "sigalgs"),
+    SSL_CONF_CMD_STRING(ClientSignatureAlgorithms, "client_sigalgs"),
+    SSL_CONF_CMD_STRING(Curves, "curves"),
+#ifndef OPENSSL_NO_ECDH
+    SSL_CONF_CMD_STRING(ECDHParameters, "named_curve"),
+#endif
+    SSL_CONF_CMD_STRING(CipherString, "cipher"),
+    SSL_CONF_CMD_STRING(Protocol, NULL),
+    SSL_CONF_CMD_STRING(Options, NULL),
+    SSL_CONF_CMD(Certificate, "cert", SSL_CONF_TYPE_FILE),
+    SSL_CONF_CMD(PrivateKey, "key", SSL_CONF_TYPE_FILE),
+    SSL_CONF_CMD(ServerInfoFile, NULL, SSL_CONF_TYPE_FILE),
+#ifndef OPENSSL_NO_DH
+    SSL_CONF_CMD(DHParameters, "dhparam", SSL_CONF_TYPE_FILE)
+#endif
+};
+
+static int ssl_conf_cmd_skip_prefix(SSL_CONF_CTX *cctx, const char **pcmd)
+{
+    if (!pcmd || !*pcmd)
+        return 0;
+    /* If a prefix is set, check and skip */
+    if (cctx->prefix) {
+        if (strlen(*pcmd) <= cctx->prefixlen)
+            return 0;
+        if (cctx->flags & SSL_CONF_FLAG_CMDLINE &&
+            strncmp(*pcmd, cctx->prefix, cctx->prefixlen))
+            return 0;
+        if (cctx->flags & SSL_CONF_FLAG_FILE &&
+            strncasecmp(*pcmd, cctx->prefix, cctx->prefixlen))
+            return 0;
+        *pcmd += cctx->prefixlen;
+    } else if (cctx->flags & SSL_CONF_FLAG_CMDLINE) {
+        if (**pcmd != '-' || !(*pcmd)[1])
+            return 0;
+        *pcmd += 1;
+    }
+    return 1;
+}
+
+static const ssl_conf_cmd_tbl *ssl_conf_cmd_lookup(SSL_CONF_CTX *cctx,
+                                                   const char *cmd)
+{
+    const ssl_conf_cmd_tbl *t;
+    size_t i;
+    if (cmd == NULL)
+        return NULL;
+
+    /* Look for matching parameter name in table */
+    for (i = 0, t = ssl_conf_cmds;
+         i < sizeof(ssl_conf_cmds) / sizeof(ssl_conf_cmd_tbl); i++, t++) {
+        if (cctx->flags & SSL_CONF_FLAG_CMDLINE) {
+            if (t->str_cmdline && !strcmp(t->str_cmdline, cmd))
+                return t;
+        }
+        if (cctx->flags & SSL_CONF_FLAG_FILE) {
+            if (t->str_file && !strcasecmp(t->str_file, cmd))
+                return t;
+        }
+    }
+    return NULL;
+}
+
+int SSL_CONF_cmd(SSL_CONF_CTX *cctx, const char *cmd, const char *value)
+{
+    const ssl_conf_cmd_tbl *runcmd;
+    if (cmd == NULL) {
+        SSLerr(SSL_F_SSL_CONF_CMD, SSL_R_INVALID_NULL_CMD_NAME);
+        return 0;
+    }
+
+    if (!ssl_conf_cmd_skip_prefix(cctx, &cmd))
+        return -2;
+
+    runcmd = ssl_conf_cmd_lookup(cctx, cmd);
+
+    if (runcmd) {
+        int rv;
+        if (value == NULL)
+            return -3;
+        rv = runcmd->cmd(cctx, value);
+        if (rv > 0)
+            return 2;
+        if (rv == -2)
+            return -2;
+        if (cctx->flags & SSL_CONF_FLAG_SHOW_ERRORS) {
+            SSLerr(SSL_F_SSL_CONF_CMD, SSL_R_BAD_VALUE);
+            ERR_add_error_data(4, "cmd=", cmd, ", value=", value);
+        }
+        return 0;
+    }
+
+    if (cctx->flags & SSL_CONF_FLAG_CMDLINE) {
+        if (ctrl_str_option(cctx, cmd))
+            return 1;
+    }
+
+    if (cctx->flags & SSL_CONF_FLAG_SHOW_ERRORS) {
+        SSLerr(SSL_F_SSL_CONF_CMD, SSL_R_UNKNOWN_CMD_NAME);
+        ERR_add_error_data(2, "cmd=", cmd);
+    }
+
+    return -2;
+}
+
+int SSL_CONF_cmd_argv(SSL_CONF_CTX *cctx, int *pargc, char ***pargv)
+{
+    int rv;
+    const char *arg = NULL, *argn;
+    if (pargc && *pargc == 0)
+        return 0;
+    if (!pargc || *pargc > 0)
+        arg = **pargv;
+    if (arg == NULL)
+        return 0;
+    if (!pargc || *pargc > 1)
+        argn = (*pargv)[1];
+    else
+        argn = NULL;
+    cctx->flags &= ~SSL_CONF_FLAG_FILE;
+    cctx->flags |= SSL_CONF_FLAG_CMDLINE;
+    rv = SSL_CONF_cmd(cctx, arg, argn);
+    if (rv > 0) {
+        /* Success: update pargc, pargv */
+        (*pargv) += rv;
+        if (pargc)
+            (*pargc) -= rv;
+        return rv;
+    }
+    /* Unknown switch: indicate no arguments processed */
+    if (rv == -2)
+        return 0;
+    /* Some error occurred processing command, return fatal error */
+    if (rv == 0)
+        return -1;
+    return rv;
+}
+
+int SSL_CONF_cmd_value_type(SSL_CONF_CTX *cctx, const char *cmd)
+{
+    if (ssl_conf_cmd_skip_prefix(cctx, &cmd)) {
+        const ssl_conf_cmd_tbl *runcmd;
+        runcmd = ssl_conf_cmd_lookup(cctx, cmd);
+        if (runcmd)
+            return runcmd->value_type;
+    }
+    return SSL_CONF_TYPE_UNKNOWN;
+}
+
+SSL_CONF_CTX *SSL_CONF_CTX_new(void)
+{
+    SSL_CONF_CTX *ret;
+    ret = OPENSSL_malloc(sizeof(SSL_CONF_CTX));
+    if (ret) {
+        ret->flags = 0;
+        ret->prefix = NULL;
+        ret->prefixlen = 0;
+        ret->ssl = NULL;
+        ret->ctx = NULL;
+        ret->poptions = NULL;
+        ret->pcert_flags = NULL;
+        ret->tbl = NULL;
+        ret->ntbl = 0;
+    }
+    return ret;
+}
+
+int SSL_CONF_CTX_finish(SSL_CONF_CTX *cctx)
+{
+    return 1;
+}
+
+void SSL_CONF_CTX_free(SSL_CONF_CTX *cctx)
+{
+    if (cctx) {
+        if (cctx->prefix)
+            OPENSSL_free(cctx->prefix);
+        OPENSSL_free(cctx);
+    }
+}
+
+unsigned int SSL_CONF_CTX_set_flags(SSL_CONF_CTX *cctx, unsigned int flags)
+{
+    cctx->flags |= flags;
+    return cctx->flags;
+}
+
+unsigned int SSL_CONF_CTX_clear_flags(SSL_CONF_CTX *cctx, unsigned int flags)
+{
+    cctx->flags &= ~flags;
+    return cctx->flags;
+}
+
+int SSL_CONF_CTX_set1_prefix(SSL_CONF_CTX *cctx, const char *pre)
+{
+    char *tmp = NULL;
+    if (pre) {
+        tmp = BUF_strdup(pre);
+        if (tmp == NULL)
+            return 0;
+    }
+    if (cctx->prefix)
+        OPENSSL_free(cctx->prefix);
+    cctx->prefix = tmp;
+    if (tmp)
+        cctx->prefixlen = strlen(tmp);
+    else
+        cctx->prefixlen = 0;
+    return 1;
+}
+
+void SSL_CONF_CTX_set_ssl(SSL_CONF_CTX *cctx, SSL *ssl)
+{
+    cctx->ssl = ssl;
+    cctx->ctx = NULL;
+    if (ssl) {
+        cctx->poptions = &ssl->options;
+        cctx->pcert_flags = &ssl->cert->cert_flags;
+    } else {
+        cctx->poptions = NULL;
+        cctx->pcert_flags = NULL;
+    }
+}
+
+void SSL_CONF_CTX_set_ssl_ctx(SSL_CONF_CTX *cctx, SSL_CTX *ctx)
+{
+    cctx->ctx = ctx;
+    cctx->ssl = NULL;
+    if (ctx) {
+        cctx->poptions = &ctx->options;
+        cctx->pcert_flags = &ctx->cert->cert_flags;
+    } else {
+        cctx->poptions = NULL;
+        cctx->pcert_flags = NULL;
+    }
+}
index 88621b7..1a6030e 100644 (file)
@@ -1,6 +1,6 @@
 /* ssl/ssl_err.c */
 /* ====================================================================
- * Copyright (c) 1999-2011 The OpenSSL Project.  All rights reserved.
+ * Copyright (c) 1999-2015 The OpenSSL Project.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
 # define ERR_REASON(reason) ERR_PACK(ERR_LIB_SSL,0,reason)
 
 static ERR_STRING_DATA SSL_str_functs[] = {
+    {ERR_FUNC(SSL_F_CHECK_SUITEB_CIPHER_LIST), "CHECK_SUITEB_CIPHER_LIST"},
     {ERR_FUNC(SSL_F_CLIENT_CERTIFICATE), "CLIENT_CERTIFICATE"},
     {ERR_FUNC(SSL_F_CLIENT_FINISHED), "CLIENT_FINISHED"},
     {ERR_FUNC(SSL_F_CLIENT_HELLO), "CLIENT_HELLO"},
     {ERR_FUNC(SSL_F_CLIENT_MASTER_KEY), "CLIENT_MASTER_KEY"},
     {ERR_FUNC(SSL_F_D2I_SSL_SESSION), "d2i_SSL_SESSION"},
-    {ERR_FUNC(SSL_F_DO_DTLS1_WRITE), "DO_DTLS1_WRITE"},
+    {ERR_FUNC(SSL_F_DO_DTLS1_WRITE), "do_dtls1_write"},
     {ERR_FUNC(SSL_F_DO_SSL3_WRITE), "DO_SSL3_WRITE"},
-    {ERR_FUNC(SSL_F_DTLS1_ACCEPT), "DTLS1_ACCEPT"},
+    {ERR_FUNC(SSL_F_DTLS1_ACCEPT), "dtls1_accept"},
     {ERR_FUNC(SSL_F_DTLS1_ADD_CERT_TO_BUF), "DTLS1_ADD_CERT_TO_BUF"},
     {ERR_FUNC(SSL_F_DTLS1_BUFFER_RECORD), "DTLS1_BUFFER_RECORD"},
-    {ERR_FUNC(SSL_F_DTLS1_CHECK_TIMEOUT_NUM), "DTLS1_CHECK_TIMEOUT_NUM"},
-    {ERR_FUNC(SSL_F_DTLS1_CLIENT_HELLO), "DTLS1_CLIENT_HELLO"},
-    {ERR_FUNC(SSL_F_DTLS1_CONNECT), "DTLS1_CONNECT"},
-    {ERR_FUNC(SSL_F_DTLS1_ENC), "DTLS1_ENC"},
+    {ERR_FUNC(SSL_F_DTLS1_CHECK_TIMEOUT_NUM), "dtls1_check_timeout_num"},
+    {ERR_FUNC(SSL_F_DTLS1_CLIENT_HELLO), "dtls1_client_hello"},
+    {ERR_FUNC(SSL_F_DTLS1_CONNECT), "dtls1_connect"},
     {ERR_FUNC(SSL_F_DTLS1_GET_HELLO_VERIFY), "DTLS1_GET_HELLO_VERIFY"},
-    {ERR_FUNC(SSL_F_DTLS1_GET_MESSAGE), "DTLS1_GET_MESSAGE"},
+    {ERR_FUNC(SSL_F_DTLS1_GET_MESSAGE), "dtls1_get_message"},
     {ERR_FUNC(SSL_F_DTLS1_GET_MESSAGE_FRAGMENT),
      "DTLS1_GET_MESSAGE_FRAGMENT"},
-    {ERR_FUNC(SSL_F_DTLS1_GET_RECORD), "DTLS1_GET_RECORD"},
-    {ERR_FUNC(SSL_F_DTLS1_HANDLE_TIMEOUT), "DTLS1_HANDLE_TIMEOUT"},
-    {ERR_FUNC(SSL_F_DTLS1_HEARTBEAT), "DTLS1_HEARTBEAT"},
-    {ERR_FUNC(SSL_F_DTLS1_OUTPUT_CERT_CHAIN), "DTLS1_OUTPUT_CERT_CHAIN"},
+    {ERR_FUNC(SSL_F_DTLS1_GET_RECORD), "dtls1_get_record"},
+    {ERR_FUNC(SSL_F_DTLS1_HANDLE_TIMEOUT), "dtls1_handle_timeout"},
+    {ERR_FUNC(SSL_F_DTLS1_HEARTBEAT), "dtls1_heartbeat"},
+    {ERR_FUNC(SSL_F_DTLS1_OUTPUT_CERT_CHAIN), "dtls1_output_cert_chain"},
     {ERR_FUNC(SSL_F_DTLS1_PREPROCESS_FRAGMENT), "DTLS1_PREPROCESS_FRAGMENT"},
     {ERR_FUNC(SSL_F_DTLS1_PROCESS_OUT_OF_SEQ_MESSAGE),
      "DTLS1_PROCESS_OUT_OF_SEQ_MESSAGE"},
     {ERR_FUNC(SSL_F_DTLS1_PROCESS_RECORD), "DTLS1_PROCESS_RECORD"},
-    {ERR_FUNC(SSL_F_DTLS1_READ_BYTES), "DTLS1_READ_BYTES"},
-    {ERR_FUNC(SSL_F_DTLS1_READ_FAILED), "DTLS1_READ_FAILED"},
+    {ERR_FUNC(SSL_F_DTLS1_READ_BYTES), "dtls1_read_bytes"},
+    {ERR_FUNC(SSL_F_DTLS1_READ_FAILED), "dtls1_read_failed"},
     {ERR_FUNC(SSL_F_DTLS1_SEND_CERTIFICATE_REQUEST),
-     "DTLS1_SEND_CERTIFICATE_REQUEST"},
+     "dtls1_send_certificate_request"},
     {ERR_FUNC(SSL_F_DTLS1_SEND_CLIENT_CERTIFICATE),
-     "DTLS1_SEND_CLIENT_CERTIFICATE"},
+     "dtls1_send_client_certificate"},
     {ERR_FUNC(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE),
-     "DTLS1_SEND_CLIENT_KEY_EXCHANGE"},
-    {ERR_FUNC(SSL_F_DTLS1_SEND_CLIENT_VERIFY), "DTLS1_SEND_CLIENT_VERIFY"},
+     "dtls1_send_client_key_exchange"},
+    {ERR_FUNC(SSL_F_DTLS1_SEND_CLIENT_VERIFY), "dtls1_send_client_verify"},
     {ERR_FUNC(SSL_F_DTLS1_SEND_HELLO_VERIFY_REQUEST),
      "DTLS1_SEND_HELLO_VERIFY_REQUEST"},
     {ERR_FUNC(SSL_F_DTLS1_SEND_SERVER_CERTIFICATE),
-     "DTLS1_SEND_SERVER_CERTIFICATE"},
-    {ERR_FUNC(SSL_F_DTLS1_SEND_SERVER_HELLO), "DTLS1_SEND_SERVER_HELLO"},
+     "dtls1_send_server_certificate"},
+    {ERR_FUNC(SSL_F_DTLS1_SEND_SERVER_HELLO), "dtls1_send_server_hello"},
     {ERR_FUNC(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE),
-     "DTLS1_SEND_SERVER_KEY_EXCHANGE"},
+     "dtls1_send_server_key_exchange"},
     {ERR_FUNC(SSL_F_DTLS1_WRITE_APP_DATA_BYTES),
-     "DTLS1_WRITE_APP_DATA_BYTES"},
+     "dtls1_write_app_data_bytes"},
     {ERR_FUNC(SSL_F_GET_CLIENT_FINISHED), "GET_CLIENT_FINISHED"},
     {ERR_FUNC(SSL_F_GET_CLIENT_HELLO), "GET_CLIENT_HELLO"},
     {ERR_FUNC(SSL_F_GET_CLIENT_MASTER_KEY), "GET_CLIENT_MASTER_KEY"},
     {ERR_FUNC(SSL_F_GET_SERVER_FINISHED), "GET_SERVER_FINISHED"},
     {ERR_FUNC(SSL_F_GET_SERVER_HELLO), "GET_SERVER_HELLO"},
+    {ERR_FUNC(SSL_F_GET_SERVER_STATIC_DH_KEY), "GET_SERVER_STATIC_DH_KEY"},
     {ERR_FUNC(SSL_F_GET_SERVER_VERIFY), "GET_SERVER_VERIFY"},
     {ERR_FUNC(SSL_F_I2D_SSL_SESSION), "i2d_SSL_SESSION"},
     {ERR_FUNC(SSL_F_READ_N), "READ_N"},
@@ -126,119 +127,123 @@ static ERR_STRING_DATA SSL_str_functs[] = {
     {ERR_FUNC(SSL_F_SERVER_FINISH), "SERVER_FINISH"},
     {ERR_FUNC(SSL_F_SERVER_HELLO), "SERVER_HELLO"},
     {ERR_FUNC(SSL_F_SERVER_VERIFY), "SERVER_VERIFY"},
-    {ERR_FUNC(SSL_F_SSL23_ACCEPT), "SSL23_ACCEPT"},
+    {ERR_FUNC(SSL_F_SSL23_ACCEPT), "ssl23_accept"},
     {ERR_FUNC(SSL_F_SSL23_CLIENT_HELLO), "SSL23_CLIENT_HELLO"},
-    {ERR_FUNC(SSL_F_SSL23_CONNECT), "SSL23_CONNECT"},
+    {ERR_FUNC(SSL_F_SSL23_CONNECT), "ssl23_connect"},
     {ERR_FUNC(SSL_F_SSL23_GET_CLIENT_HELLO), "SSL23_GET_CLIENT_HELLO"},
     {ERR_FUNC(SSL_F_SSL23_GET_SERVER_HELLO), "SSL23_GET_SERVER_HELLO"},
-    {ERR_FUNC(SSL_F_SSL23_PEEK), "SSL23_PEEK"},
-    {ERR_FUNC(SSL_F_SSL23_READ), "SSL23_READ"},
-    {ERR_FUNC(SSL_F_SSL23_WRITE), "SSL23_WRITE"},
-    {ERR_FUNC(SSL_F_SSL2_ACCEPT), "SSL2_ACCEPT"},
-    {ERR_FUNC(SSL_F_SSL2_CONNECT), "SSL2_CONNECT"},
-    {ERR_FUNC(SSL_F_SSL2_ENC_INIT), "SSL2_ENC_INIT"},
+    {ERR_FUNC(SSL_F_SSL23_PEEK), "ssl23_peek"},
+    {ERR_FUNC(SSL_F_SSL23_READ), "ssl23_read"},
+    {ERR_FUNC(SSL_F_SSL23_WRITE), "ssl23_write"},
+    {ERR_FUNC(SSL_F_SSL2_ACCEPT), "ssl2_accept"},
+    {ERR_FUNC(SSL_F_SSL2_CONNECT), "ssl2_connect"},
+    {ERR_FUNC(SSL_F_SSL2_ENC_INIT), "ssl2_enc_init"},
     {ERR_FUNC(SSL_F_SSL2_GENERATE_KEY_MATERIAL),
-     "SSL2_GENERATE_KEY_MATERIAL"},
-    {ERR_FUNC(SSL_F_SSL2_PEEK), "SSL2_PEEK"},
-    {ERR_FUNC(SSL_F_SSL2_READ), "SSL2_READ"},
+     "ssl2_generate_key_material"},
+    {ERR_FUNC(SSL_F_SSL2_PEEK), "ssl2_peek"},
+    {ERR_FUNC(SSL_F_SSL2_READ), "ssl2_read"},
     {ERR_FUNC(SSL_F_SSL2_READ_INTERNAL), "SSL2_READ_INTERNAL"},
-    {ERR_FUNC(SSL_F_SSL2_SET_CERTIFICATE), "SSL2_SET_CERTIFICATE"},
-    {ERR_FUNC(SSL_F_SSL2_WRITE), "SSL2_WRITE"},
-    {ERR_FUNC(SSL_F_SSL3_ACCEPT), "SSL3_ACCEPT"},
+    {ERR_FUNC(SSL_F_SSL2_SET_CERTIFICATE), "ssl2_set_certificate"},
+    {ERR_FUNC(SSL_F_SSL2_WRITE), "ssl2_write"},
+    {ERR_FUNC(SSL_F_SSL3_ACCEPT), "ssl3_accept"},
     {ERR_FUNC(SSL_F_SSL3_ADD_CERT_TO_BUF), "SSL3_ADD_CERT_TO_BUF"},
-    {ERR_FUNC(SSL_F_SSL3_CALLBACK_CTRL), "SSL3_CALLBACK_CTRL"},
-    {ERR_FUNC(SSL_F_SSL3_CHANGE_CIPHER_STATE), "SSL3_CHANGE_CIPHER_STATE"},
+    {ERR_FUNC(SSL_F_SSL3_CALLBACK_CTRL), "ssl3_callback_ctrl"},
+    {ERR_FUNC(SSL_F_SSL3_CHANGE_CIPHER_STATE), "ssl3_change_cipher_state"},
     {ERR_FUNC(SSL_F_SSL3_CHECK_CERT_AND_ALGORITHM),
-     "SSL3_CHECK_CERT_AND_ALGORITHM"},
-    {ERR_FUNC(SSL_F_SSL3_CHECK_CLIENT_HELLO), "SSL3_CHECK_CLIENT_HELLO"},
-    {ERR_FUNC(SSL_F_SSL3_CLIENT_HELLO), "SSL3_CLIENT_HELLO"},
-    {ERR_FUNC(SSL_F_SSL3_CONNECT), "SSL3_CONNECT"},
-    {ERR_FUNC(SSL_F_SSL3_CTRL), "SSL3_CTRL"},
-    {ERR_FUNC(SSL_F_SSL3_CTX_CTRL), "SSL3_CTX_CTRL"},
+     "ssl3_check_cert_and_algorithm"},
+    {ERR_FUNC(SSL_F_SSL3_CHECK_CLIENT_HELLO), "ssl3_check_client_hello"},
+    {ERR_FUNC(SSL_F_SSL3_CHECK_FINISHED), "SSL3_CHECK_FINISHED"},
+    {ERR_FUNC(SSL_F_SSL3_CLIENT_HELLO), "ssl3_client_hello"},
+    {ERR_FUNC(SSL_F_SSL3_CONNECT), "ssl3_connect"},
+    {ERR_FUNC(SSL_F_SSL3_CTRL), "ssl3_ctrl"},
+    {ERR_FUNC(SSL_F_SSL3_CTX_CTRL), "ssl3_ctx_ctrl"},
     {ERR_FUNC(SSL_F_SSL3_DIGEST_CACHED_RECORDS),
-     "SSL3_DIGEST_CACHED_RECORDS"},
+     "ssl3_digest_cached_records"},
     {ERR_FUNC(SSL_F_SSL3_DO_CHANGE_CIPHER_SPEC),
-     "SSL3_DO_CHANGE_CIPHER_SPEC"},
-    {ERR_FUNC(SSL_F_SSL3_ENC), "SSL3_ENC"},
-    {ERR_FUNC(SSL_F_SSL3_CHECK_FINISHED), "SSL3_CHECK_FINISHED"},
+     "ssl3_do_change_cipher_spec"},
+    {ERR_FUNC(SSL_F_SSL3_ENC), "ssl3_enc"},
     {ERR_FUNC(SSL_F_SSL3_GENERATE_KEY_BLOCK), "SSL3_GENERATE_KEY_BLOCK"},
     {ERR_FUNC(SSL_F_SSL3_GET_CERTIFICATE_REQUEST),
-     "SSL3_GET_CERTIFICATE_REQUEST"},
-    {ERR_FUNC(SSL_F_SSL3_GET_CERT_STATUS), "SSL3_GET_CERT_STATUS"},
-    {ERR_FUNC(SSL_F_SSL3_GET_CERT_VERIFY), "SSL3_GET_CERT_VERIFY"},
+     "ssl3_get_certificate_request"},
+    {ERR_FUNC(SSL_F_SSL3_GET_CERT_STATUS), "ssl3_get_cert_status"},
+    {ERR_FUNC(SSL_F_SSL3_GET_CERT_VERIFY), "ssl3_get_cert_verify"},
     {ERR_FUNC(SSL_F_SSL3_GET_CLIENT_CERTIFICATE),
-     "SSL3_GET_CLIENT_CERTIFICATE"},
-    {ERR_FUNC(SSL_F_SSL3_GET_CLIENT_HELLO), "SSL3_GET_CLIENT_HELLO"},
+     "ssl3_get_client_certificate"},
+    {ERR_FUNC(SSL_F_SSL3_GET_CLIENT_HELLO), "ssl3_get_client_hello"},
     {ERR_FUNC(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE),
-     "SSL3_GET_CLIENT_KEY_EXCHANGE"},
-    {ERR_FUNC(SSL_F_SSL3_GET_FINISHED), "SSL3_GET_FINISHED"},
-    {ERR_FUNC(SSL_F_SSL3_GET_KEY_EXCHANGE), "SSL3_GET_KEY_EXCHANGE"},
-    {ERR_FUNC(SSL_F_SSL3_GET_MESSAGE), "SSL3_GET_MESSAGE"},
+     "ssl3_get_client_key_exchange"},
+    {ERR_FUNC(SSL_F_SSL3_GET_FINISHED), "ssl3_get_finished"},
+    {ERR_FUNC(SSL_F_SSL3_GET_KEY_EXCHANGE), "ssl3_get_key_exchange"},
+    {ERR_FUNC(SSL_F_SSL3_GET_MESSAGE), "ssl3_get_message"},
     {ERR_FUNC(SSL_F_SSL3_GET_NEW_SESSION_TICKET),
-     "SSL3_GET_NEW_SESSION_TICKET"},
-    {ERR_FUNC(SSL_F_SSL3_GET_NEXT_PROTO), "SSL3_GET_NEXT_PROTO"},
+     "ssl3_get_new_session_ticket"},
+    {ERR_FUNC(SSL_F_SSL3_GET_NEXT_PROTO), "ssl3_get_next_proto"},
     {ERR_FUNC(SSL_F_SSL3_GET_RECORD), "SSL3_GET_RECORD"},
     {ERR_FUNC(SSL_F_SSL3_GET_SERVER_CERTIFICATE),
-     "SSL3_GET_SERVER_CERTIFICATE"},
-    {ERR_FUNC(SSL_F_SSL3_GET_SERVER_DONE), "SSL3_GET_SERVER_DONE"},
-    {ERR_FUNC(SSL_F_SSL3_GET_SERVER_HELLO), "SSL3_GET_SERVER_HELLO"},
+     "ssl3_get_server_certificate"},
+    {ERR_FUNC(SSL_F_SSL3_GET_SERVER_DONE), "ssl3_get_server_done"},
+    {ERR_FUNC(SSL_F_SSL3_GET_SERVER_HELLO), "ssl3_get_server_hello"},
     {ERR_FUNC(SSL_F_SSL3_HANDSHAKE_MAC), "ssl3_handshake_mac"},
     {ERR_FUNC(SSL_F_SSL3_NEW_SESSION_TICKET), "SSL3_NEW_SESSION_TICKET"},
-    {ERR_FUNC(SSL_F_SSL3_OUTPUT_CERT_CHAIN), "SSL3_OUTPUT_CERT_CHAIN"},
-    {ERR_FUNC(SSL_F_SSL3_PEEK), "SSL3_PEEK"},
-    {ERR_FUNC(SSL_F_SSL3_READ_BYTES), "SSL3_READ_BYTES"},
-    {ERR_FUNC(SSL_F_SSL3_READ_N), "SSL3_READ_N"},
+    {ERR_FUNC(SSL_F_SSL3_OUTPUT_CERT_CHAIN), "ssl3_output_cert_chain"},
+    {ERR_FUNC(SSL_F_SSL3_PEEK), "ssl3_peek"},
+    {ERR_FUNC(SSL_F_SSL3_READ_BYTES), "ssl3_read_bytes"},
+    {ERR_FUNC(SSL_F_SSL3_READ_N), "ssl3_read_n"},
     {ERR_FUNC(SSL_F_SSL3_SEND_CERTIFICATE_REQUEST),
-     "SSL3_SEND_CERTIFICATE_REQUEST"},
+     "ssl3_send_certificate_request"},
     {ERR_FUNC(SSL_F_SSL3_SEND_CLIENT_CERTIFICATE),
-     "SSL3_SEND_CLIENT_CERTIFICATE"},
+     "ssl3_send_client_certificate"},
     {ERR_FUNC(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE),
-     "SSL3_SEND_CLIENT_KEY_EXCHANGE"},
-    {ERR_FUNC(SSL_F_SSL3_SEND_CLIENT_VERIFY), "SSL3_SEND_CLIENT_VERIFY"},
+     "ssl3_send_client_key_exchange"},
+    {ERR_FUNC(SSL_F_SSL3_SEND_CLIENT_VERIFY), "ssl3_send_client_verify"},
     {ERR_FUNC(SSL_F_SSL3_SEND_SERVER_CERTIFICATE),
-     "SSL3_SEND_SERVER_CERTIFICATE"},
-    {ERR_FUNC(SSL_F_SSL3_SEND_SERVER_HELLO), "SSL3_SEND_SERVER_HELLO"},
+     "ssl3_send_server_certificate"},
+    {ERR_FUNC(SSL_F_SSL3_SEND_SERVER_HELLO), "ssl3_send_server_hello"},
     {ERR_FUNC(SSL_F_SSL3_SEND_SERVER_KEY_EXCHANGE),
-     "SSL3_SEND_SERVER_KEY_EXCHANGE"},
-    {ERR_FUNC(SSL_F_SSL3_SETUP_KEY_BLOCK), "SSL3_SETUP_KEY_BLOCK"},
-    {ERR_FUNC(SSL_F_SSL3_SETUP_READ_BUFFER), "SSL3_SETUP_READ_BUFFER"},
-    {ERR_FUNC(SSL_F_SSL3_SETUP_WRITE_BUFFER), "SSL3_SETUP_WRITE_BUFFER"},
-    {ERR_FUNC(SSL_F_SSL3_WRITE_BYTES), "SSL3_WRITE_BYTES"},
-    {ERR_FUNC(SSL_F_SSL3_WRITE_PENDING), "SSL3_WRITE_PENDING"},
+     "ssl3_send_server_key_exchange"},
+    {ERR_FUNC(SSL_F_SSL3_SETUP_KEY_BLOCK), "ssl3_setup_key_block"},
+    {ERR_FUNC(SSL_F_SSL3_SETUP_READ_BUFFER), "ssl3_setup_read_buffer"},
+    {ERR_FUNC(SSL_F_SSL3_SETUP_WRITE_BUFFER), "ssl3_setup_write_buffer"},
+    {ERR_FUNC(SSL_F_SSL3_WRITE_BYTES), "ssl3_write_bytes"},
+    {ERR_FUNC(SSL_F_SSL3_WRITE_PENDING), "ssl3_write_pending"},
+    {ERR_FUNC(SSL_F_SSL_ADD_CERT_CHAIN), "ssl_add_cert_chain"},
+    {ERR_FUNC(SSL_F_SSL_ADD_CERT_TO_BUF), "SSL_ADD_CERT_TO_BUF"},
     {ERR_FUNC(SSL_F_SSL_ADD_CLIENTHELLO_RENEGOTIATE_EXT),
-     "SSL_ADD_CLIENTHELLO_RENEGOTIATE_EXT"},
+     "ssl_add_clienthello_renegotiate_ext"},
     {ERR_FUNC(SSL_F_SSL_ADD_CLIENTHELLO_TLSEXT),
-     "SSL_ADD_CLIENTHELLO_TLSEXT"},
+     "ssl_add_clienthello_tlsext"},
     {ERR_FUNC(SSL_F_SSL_ADD_CLIENTHELLO_USE_SRTP_EXT),
-     "SSL_ADD_CLIENTHELLO_USE_SRTP_EXT"},
+     "ssl_add_clienthello_use_srtp_ext"},
     {ERR_FUNC(SSL_F_SSL_ADD_DIR_CERT_SUBJECTS_TO_STACK),
      "SSL_add_dir_cert_subjects_to_stack"},
     {ERR_FUNC(SSL_F_SSL_ADD_FILE_CERT_SUBJECTS_TO_STACK),
      "SSL_add_file_cert_subjects_to_stack"},
     {ERR_FUNC(SSL_F_SSL_ADD_SERVERHELLO_RENEGOTIATE_EXT),
-     "SSL_ADD_SERVERHELLO_RENEGOTIATE_EXT"},
+     "ssl_add_serverhello_renegotiate_ext"},
     {ERR_FUNC(SSL_F_SSL_ADD_SERVERHELLO_TLSEXT),
-     "SSL_ADD_SERVERHELLO_TLSEXT"},
+     "ssl_add_serverhello_tlsext"},
     {ERR_FUNC(SSL_F_SSL_ADD_SERVERHELLO_USE_SRTP_EXT),
-     "SSL_ADD_SERVERHELLO_USE_SRTP_EXT"},
-    {ERR_FUNC(SSL_F_SSL_BAD_METHOD), "SSL_BAD_METHOD"},
-    {ERR_FUNC(SSL_F_SSL_BYTES_TO_CIPHER_LIST), "SSL_BYTES_TO_CIPHER_LIST"},
-    {ERR_FUNC(SSL_F_SSL_CERT_DUP), "SSL_CERT_DUP"},
-    {ERR_FUNC(SSL_F_SSL_CERT_INST), "SSL_CERT_INST"},
+     "ssl_add_serverhello_use_srtp_ext"},
+    {ERR_FUNC(SSL_F_SSL_BAD_METHOD), "ssl_bad_method"},
+    {ERR_FUNC(SSL_F_SSL_BUILD_CERT_CHAIN), "ssl_build_cert_chain"},
+    {ERR_FUNC(SSL_F_SSL_BYTES_TO_CIPHER_LIST), "ssl_bytes_to_cipher_list"},
+    {ERR_FUNC(SSL_F_SSL_CERT_DUP), "ssl_cert_dup"},
+    {ERR_FUNC(SSL_F_SSL_CERT_INST), "ssl_cert_inst"},
     {ERR_FUNC(SSL_F_SSL_CERT_INSTANTIATE), "SSL_CERT_INSTANTIATE"},
-    {ERR_FUNC(SSL_F_SSL_CERT_NEW), "SSL_CERT_NEW"},
+    {ERR_FUNC(SSL_F_SSL_CERT_NEW), "ssl_cert_new"},
     {ERR_FUNC(SSL_F_SSL_CHECK_PRIVATE_KEY), "SSL_check_private_key"},
     {ERR_FUNC(SSL_F_SSL_CHECK_SERVERHELLO_TLSEXT),
      "SSL_CHECK_SERVERHELLO_TLSEXT"},
     {ERR_FUNC(SSL_F_SSL_CHECK_SRVR_ECC_CERT_AND_ALG),
-     "SSL_CHECK_SRVR_ECC_CERT_AND_ALG"},
+     "ssl_check_srvr_ecc_cert_and_alg"},
     {ERR_FUNC(SSL_F_SSL_CIPHER_PROCESS_RULESTR),
      "SSL_CIPHER_PROCESS_RULESTR"},
     {ERR_FUNC(SSL_F_SSL_CIPHER_STRENGTH_SORT), "SSL_CIPHER_STRENGTH_SORT"},
     {ERR_FUNC(SSL_F_SSL_CLEAR), "SSL_clear"},
     {ERR_FUNC(SSL_F_SSL_COMP_ADD_COMPRESSION_METHOD),
      "SSL_COMP_add_compression_method"},
-    {ERR_FUNC(SSL_F_SSL_CREATE_CIPHER_LIST), "SSL_CREATE_CIPHER_LIST"},
+    {ERR_FUNC(SSL_F_SSL_CONF_CMD), "SSL_CONF_cmd"},
+    {ERR_FUNC(SSL_F_SSL_CREATE_CIPHER_LIST), "ssl_create_cipher_list"},
     {ERR_FUNC(SSL_F_SSL_CTRL), "SSL_ctrl"},
     {ERR_FUNC(SSL_F_SSL_CTX_CHECK_PRIVATE_KEY), "SSL_CTX_check_private_key"},
     {ERR_FUNC(SSL_F_SSL_CTX_MAKE_PROFILES), "SSL_CTX_MAKE_PROFILES"},
@@ -270,41 +275,49 @@ static ERR_STRING_DATA SSL_str_functs[] = {
      "SSL_CTX_use_RSAPrivateKey_ASN1"},
     {ERR_FUNC(SSL_F_SSL_CTX_USE_RSAPRIVATEKEY_FILE),
      "SSL_CTX_use_RSAPrivateKey_file"},
+    {ERR_FUNC(SSL_F_SSL_CTX_USE_SERVERINFO), "SSL_CTX_use_serverinfo"},
+    {ERR_FUNC(SSL_F_SSL_CTX_USE_SERVERINFO_FILE),
+     "SSL_CTX_use_serverinfo_file"},
     {ERR_FUNC(SSL_F_SSL_DO_HANDSHAKE), "SSL_do_handshake"},
-    {ERR_FUNC(SSL_F_SSL_GET_NEW_SESSION), "SSL_GET_NEW_SESSION"},
-    {ERR_FUNC(SSL_F_SSL_GET_PREV_SESSION), "SSL_GET_PREV_SESSION"},
+    {ERR_FUNC(SSL_F_SSL_GET_NEW_SESSION), "ssl_get_new_session"},
+    {ERR_FUNC(SSL_F_SSL_GET_PREV_SESSION), "ssl_get_prev_session"},
+    {ERR_FUNC(SSL_F_SSL_GET_SERVER_CERT_INDEX), "SSL_GET_SERVER_CERT_INDEX"},
     {ERR_FUNC(SSL_F_SSL_GET_SERVER_SEND_CERT), "SSL_GET_SERVER_SEND_CERT"},
-    {ERR_FUNC(SSL_F_SSL_GET_SERVER_SEND_PKEY), "SSL_GET_SERVER_SEND_PKEY"},
-    {ERR_FUNC(SSL_F_SSL_GET_SIGN_PKEY), "SSL_GET_SIGN_PKEY"},
-    {ERR_FUNC(SSL_F_SSL_INIT_WBIO_BUFFER), "SSL_INIT_WBIO_BUFFER"},
+    {ERR_FUNC(SSL_F_SSL_GET_SERVER_SEND_PKEY), "ssl_get_server_send_pkey"},
+    {ERR_FUNC(SSL_F_SSL_GET_SIGN_PKEY), "ssl_get_sign_pkey"},
+    {ERR_FUNC(SSL_F_SSL_INIT_WBIO_BUFFER), "ssl_init_wbio_buffer"},
     {ERR_FUNC(SSL_F_SSL_LOAD_CLIENT_CA_FILE), "SSL_load_client_CA_file"},
     {ERR_FUNC(SSL_F_SSL_NEW), "SSL_new"},
     {ERR_FUNC(SSL_F_SSL_PARSE_CLIENTHELLO_RENEGOTIATE_EXT),
-     "SSL_PARSE_CLIENTHELLO_RENEGOTIATE_EXT"},
+     "ssl_parse_clienthello_renegotiate_ext"},
     {ERR_FUNC(SSL_F_SSL_PARSE_CLIENTHELLO_TLSEXT),
-     "SSL_PARSE_CLIENTHELLO_TLSEXT"},
+     "ssl_parse_clienthello_tlsext"},
     {ERR_FUNC(SSL_F_SSL_PARSE_CLIENTHELLO_USE_SRTP_EXT),
-     "SSL_PARSE_CLIENTHELLO_USE_SRTP_EXT"},
+     "ssl_parse_clienthello_use_srtp_ext"},
     {ERR_FUNC(SSL_F_SSL_PARSE_SERVERHELLO_RENEGOTIATE_EXT),
-     "SSL_PARSE_SERVERHELLO_RENEGOTIATE_EXT"},
+     "ssl_parse_serverhello_renegotiate_ext"},
     {ERR_FUNC(SSL_F_SSL_PARSE_SERVERHELLO_TLSEXT),
-     "SSL_PARSE_SERVERHELLO_TLSEXT"},
+     "ssl_parse_serverhello_tlsext"},
     {ERR_FUNC(SSL_F_SSL_PARSE_SERVERHELLO_USE_SRTP_EXT),
-     "SSL_PARSE_SERVERHELLO_USE_SRTP_EXT"},
+     "ssl_parse_serverhello_use_srtp_ext"},
     {ERR_FUNC(SSL_F_SSL_PEEK), "SSL_peek"},
     {ERR_FUNC(SSL_F_SSL_PREPARE_CLIENTHELLO_TLSEXT),
-     "SSL_PREPARE_CLIENTHELLO_TLSEXT"},
+     "ssl_prepare_clienthello_tlsext"},
     {ERR_FUNC(SSL_F_SSL_PREPARE_SERVERHELLO_TLSEXT),
-     "SSL_PREPARE_SERVERHELLO_TLSEXT"},
+     "ssl_prepare_serverhello_tlsext"},
     {ERR_FUNC(SSL_F_SSL_READ), "SSL_read"},
     {ERR_FUNC(SSL_F_SSL_RSA_PRIVATE_DECRYPT), "SSL_RSA_PRIVATE_DECRYPT"},
     {ERR_FUNC(SSL_F_SSL_RSA_PUBLIC_ENCRYPT), "SSL_RSA_PUBLIC_ENCRYPT"},
+    {ERR_FUNC(SSL_F_SSL_SCAN_CLIENTHELLO_TLSEXT),
+     "SSL_SCAN_CLIENTHELLO_TLSEXT"},
+    {ERR_FUNC(SSL_F_SSL_SCAN_SERVERHELLO_TLSEXT),
+     "SSL_SCAN_SERVERHELLO_TLSEXT"},
     {ERR_FUNC(SSL_F_SSL_SESSION_DUP), "ssl_session_dup"},
     {ERR_FUNC(SSL_F_SSL_SESSION_NEW), "SSL_SESSION_new"},
     {ERR_FUNC(SSL_F_SSL_SESSION_PRINT_FP), "SSL_SESSION_print_fp"},
     {ERR_FUNC(SSL_F_SSL_SESSION_SET1_ID_CONTEXT),
      "SSL_SESSION_set1_id_context"},
-    {ERR_FUNC(SSL_F_SSL_SESS_CERT_NEW), "SSL_SESS_CERT_NEW"},
+    {ERR_FUNC(SSL_F_SSL_SESS_CERT_NEW), "ssl_sess_cert_new"},
     {ERR_FUNC(SSL_F_SSL_SET_CERT), "SSL_SET_CERT"},
     {ERR_FUNC(SSL_F_SSL_SET_CIPHER_LIST), "SSL_set_cipher_list"},
     {ERR_FUNC(SSL_F_SSL_SET_FD), "SSL_set_fd"},
@@ -321,10 +334,10 @@ static ERR_STRING_DATA SSL_str_functs[] = {
     {ERR_FUNC(SSL_F_SSL_SHUTDOWN), "SSL_shutdown"},
     {ERR_FUNC(SSL_F_SSL_SRP_CTX_INIT), "SSL_SRP_CTX_init"},
     {ERR_FUNC(SSL_F_SSL_UNDEFINED_CONST_FUNCTION),
-     "SSL_UNDEFINED_CONST_FUNCTION"},
-    {ERR_FUNC(SSL_F_SSL_UNDEFINED_FUNCTION), "SSL_UNDEFINED_FUNCTION"},
+     "ssl_undefined_const_function"},
+    {ERR_FUNC(SSL_F_SSL_UNDEFINED_FUNCTION), "ssl_undefined_function"},
     {ERR_FUNC(SSL_F_SSL_UNDEFINED_VOID_FUNCTION),
-     "SSL_UNDEFINED_VOID_FUNCTION"},
+     "ssl_undefined_void_function"},
     {ERR_FUNC(SSL_F_SSL_USE_CERTIFICATE), "SSL_use_certificate"},
     {ERR_FUNC(SSL_F_SSL_USE_CERTIFICATE_ASN1), "SSL_use_certificate_ASN1"},
     {ERR_FUNC(SSL_F_SSL_USE_CERTIFICATE_FILE), "SSL_use_certificate_file"},
@@ -337,22 +350,25 @@ static ERR_STRING_DATA SSL_str_functs[] = {
      "SSL_use_RSAPrivateKey_ASN1"},
     {ERR_FUNC(SSL_F_SSL_USE_RSAPRIVATEKEY_FILE),
      "SSL_use_RSAPrivateKey_file"},
-    {ERR_FUNC(SSL_F_SSL_VERIFY_CERT_CHAIN), "SSL_VERIFY_CERT_CHAIN"},
+    {ERR_FUNC(SSL_F_SSL_VERIFY_CERT_CHAIN), "ssl_verify_cert_chain"},
     {ERR_FUNC(SSL_F_SSL_WRITE), "SSL_write"},
+    {ERR_FUNC(SSL_F_TLS12_CHECK_PEER_SIGALG), "tls12_check_peer_sigalg"},
     {ERR_FUNC(SSL_F_TLS1_CERT_VERIFY_MAC), "tls1_cert_verify_mac"},
-    {ERR_FUNC(SSL_F_TLS1_CHANGE_CIPHER_STATE), "TLS1_CHANGE_CIPHER_STATE"},
+    {ERR_FUNC(SSL_F_TLS1_CHANGE_CIPHER_STATE), "tls1_change_cipher_state"},
     {ERR_FUNC(SSL_F_TLS1_CHECK_SERVERHELLO_TLSEXT),
      "TLS1_CHECK_SERVERHELLO_TLSEXT"},
-    {ERR_FUNC(SSL_F_TLS1_ENC), "TLS1_ENC"},
+    {ERR_FUNC(SSL_F_TLS1_ENC), "tls1_enc"},
     {ERR_FUNC(SSL_F_TLS1_EXPORT_KEYING_MATERIAL),
-     "TLS1_EXPORT_KEYING_MATERIAL"},
-    {ERR_FUNC(SSL_F_TLS1_HEARTBEAT), "SSL_F_TLS1_HEARTBEAT"},
+     "tls1_export_keying_material"},
+    {ERR_FUNC(SSL_F_TLS1_GET_CURVELIST), "TLS1_GET_CURVELIST"},
+    {ERR_FUNC(SSL_F_TLS1_HEARTBEAT), "tls1_heartbeat"},
     {ERR_FUNC(SSL_F_TLS1_PREPARE_CLIENTHELLO_TLSEXT),
      "TLS1_PREPARE_CLIENTHELLO_TLSEXT"},
     {ERR_FUNC(SSL_F_TLS1_PREPARE_SERVERHELLO_TLSEXT),
      "TLS1_PREPARE_SERVERHELLO_TLSEXT"},
     {ERR_FUNC(SSL_F_TLS1_PRF), "tls1_prf"},
-    {ERR_FUNC(SSL_F_TLS1_SETUP_KEY_BLOCK), "TLS1_SETUP_KEY_BLOCK"},
+    {ERR_FUNC(SSL_F_TLS1_SETUP_KEY_BLOCK), "tls1_setup_key_block"},
+    {ERR_FUNC(SSL_F_TLS1_SET_SERVER_SIGALGS), "tls1_set_server_sigalgs"},
     {ERR_FUNC(SSL_F_WRITE_PENDING), "WRITE_PENDING"},
     {0, NULL}
 };
@@ -365,6 +381,7 @@ static ERR_STRING_DATA SSL_str_reasons[] = {
     {ERR_REASON(SSL_R_BAD_AUTHENTICATION_TYPE), "bad authentication type"},
     {ERR_REASON(SSL_R_BAD_CHANGE_CIPHER_SPEC), "bad change cipher spec"},
     {ERR_REASON(SSL_R_BAD_CHECKSUM), "bad checksum"},
+    {ERR_REASON(SSL_R_BAD_DATA), "bad data"},
     {ERR_REASON(SSL_R_BAD_DATA_RETURNED_BY_CALLBACK),
      "bad data returned by callback"},
     {ERR_REASON(SSL_R_BAD_DECOMPRESSION), "bad decompression"},
@@ -407,6 +424,7 @@ static ERR_STRING_DATA SSL_str_reasons[] = {
     {ERR_REASON(SSL_R_BAD_SSL_SESSION_ID_LENGTH),
      "bad ssl session id length"},
     {ERR_REASON(SSL_R_BAD_STATE), "bad state"},
+    {ERR_REASON(SSL_R_BAD_VALUE), "bad value"},
     {ERR_REASON(SSL_R_BAD_WRITE_RETRY), "bad write retry"},
     {ERR_REASON(SSL_R_BIO_NOT_SET), "bio not set"},
     {ERR_REASON(SSL_R_BLOCK_CIPHER_PAD_IS_WRONG),
@@ -417,6 +435,7 @@ static ERR_STRING_DATA SSL_str_reasons[] = {
     {ERR_REASON(SSL_R_CCS_RECEIVED_EARLY), "ccs received early"},
     {ERR_REASON(SSL_R_CERTIFICATE_VERIFY_FAILED),
      "certificate verify failed"},
+    {ERR_REASON(SSL_R_CERT_CB_ERROR), "cert cb error"},
     {ERR_REASON(SSL_R_CERT_LENGTH_MISMATCH), "cert length mismatch"},
     {ERR_REASON(SSL_R_CHALLENGE_IS_DIFFERENT), "challenge is different"},
     {ERR_REASON(SSL_R_CIPHER_CODE_WRONG_LENGTH), "cipher code wrong length"},
@@ -455,6 +474,8 @@ static ERR_STRING_DATA SSL_str_reasons[] = {
      "ecc cert should have rsa signature"},
     {ERR_REASON(SSL_R_ECC_CERT_SHOULD_HAVE_SHA1_SIGNATURE),
      "ecc cert should have sha1 signature"},
+    {ERR_REASON(SSL_R_ECDH_REQUIRED_FOR_SUITEB_MODE),
+     "ecdh required for suiteb mode"},
     {ERR_REASON(SSL_R_ECGROUP_TOO_LARGE_FOR_CIPHER),
      "ecgroup too large for cipher"},
     {ERR_REASON(SSL_R_EMPTY_SRTP_PROTECTION_PROFILE_LIST),
@@ -475,13 +496,16 @@ static ERR_STRING_DATA SSL_str_reasons[] = {
     {ERR_REASON(SSL_R_HTTPS_PROXY_REQUEST), "https proxy request"},
     {ERR_REASON(SSL_R_HTTP_REQUEST), "http request"},
     {ERR_REASON(SSL_R_ILLEGAL_PADDING), "illegal padding"},
+    {ERR_REASON(SSL_R_ILLEGAL_SUITEB_DIGEST), "illegal Suite B digest"},
     {ERR_REASON(SSL_R_INAPPROPRIATE_FALLBACK), "inappropriate fallback"},
     {ERR_REASON(SSL_R_INCONSISTENT_COMPRESSION), "inconsistent compression"},
     {ERR_REASON(SSL_R_INVALID_CHALLENGE_LENGTH), "invalid challenge length"},
     {ERR_REASON(SSL_R_INVALID_COMMAND), "invalid command"},
     {ERR_REASON(SSL_R_INVALID_COMPRESSION_ALGORITHM),
      "invalid compression algorithm"},
+    {ERR_REASON(SSL_R_INVALID_NULL_CMD_NAME), "invalid null cmd name"},
     {ERR_REASON(SSL_R_INVALID_PURPOSE), "invalid purpose"},
+    {ERR_REASON(SSL_R_INVALID_SERVERINFO_DATA), "invalid serverinfo data"},
     {ERR_REASON(SSL_R_INVALID_SRP_USERNAME), "invalid srp username"},
     {ERR_REASON(SSL_R_INVALID_STATUS_RESPONSE), "invalid status response"},
     {ERR_REASON(SSL_R_INVALID_TICKET_KEYS_LENGTH),
@@ -508,6 +532,9 @@ static ERR_STRING_DATA SSL_str_reasons[] = {
     {ERR_REASON(SSL_R_MISSING_DH_KEY), "missing dh key"},
     {ERR_REASON(SSL_R_MISSING_DH_RSA_CERT), "missing dh rsa cert"},
     {ERR_REASON(SSL_R_MISSING_DSA_SIGNING_CERT), "missing dsa signing cert"},
+    {ERR_REASON(SSL_R_MISSING_ECDH_CERT), "missing ecdh cert"},
+    {ERR_REASON(SSL_R_MISSING_ECDSA_SIGNING_CERT),
+     "missing ecdsa signing cert"},
     {ERR_REASON(SSL_R_MISSING_EXPORT_TMP_DH_KEY),
      "missing export tmp dh key"},
     {ERR_REASON(SSL_R_MISSING_EXPORT_TMP_RSA_KEY),
@@ -540,6 +567,7 @@ static ERR_STRING_DATA SSL_str_reasons[] = {
     {ERR_REASON(SSL_R_NO_GOST_CERTIFICATE_SENT_BY_PEER),
      "Peer haven't sent GOST certificate, required for selected ciphersuite"},
     {ERR_REASON(SSL_R_NO_METHOD_SPECIFIED), "no method specified"},
+    {ERR_REASON(SSL_R_NO_PEM_EXTENSIONS), "no pem extensions"},
     {ERR_REASON(SSL_R_NO_PRIVATEKEY), "no privatekey"},
     {ERR_REASON(SSL_R_NO_PRIVATE_KEY_ASSIGNED), "no private key assigned"},
     {ERR_REASON(SSL_R_NO_PROTOCOLS_AVAILABLE), "no protocols available"},
@@ -548,6 +576,8 @@ static ERR_STRING_DATA SSL_str_reasons[] = {
     {ERR_REASON(SSL_R_NO_REQUIRED_DIGEST),
      "digest requred for handshake isn't computed"},
     {ERR_REASON(SSL_R_NO_SHARED_CIPHER), "no shared cipher"},
+    {ERR_REASON(SSL_R_NO_SHARED_SIGATURE_ALGORITHMS),
+     "no shared sigature algorithms"},
     {ERR_REASON(SSL_R_NO_SRTP_PROFILES), "no srtp profiles"},
     {ERR_REASON(SSL_R_NO_VERIFY_CALLBACK), "no verify callback"},
     {ERR_REASON(SSL_R_NULL_SSL_CTX), "null ssl ctx"},
@@ -556,6 +586,10 @@ static ERR_STRING_DATA SSL_str_reasons[] = {
      "old session cipher not returned"},
     {ERR_REASON(SSL_R_OLD_SESSION_COMPRESSION_ALGORITHM_NOT_RETURNED),
      "old session compression algorithm not returned"},
+    {ERR_REASON(SSL_R_ONLY_DTLS_1_2_ALLOWED_IN_SUITEB_MODE),
+     "only DTLS 1.2 allowed in Suite B mode"},
+    {ERR_REASON(SSL_R_ONLY_TLS_1_2_ALLOWED_IN_SUITEB_MODE),
+     "only TLS 1.2 allowed in Suite B mode"},
     {ERR_REASON(SSL_R_ONLY_TLS_ALLOWED_IN_FIPS_MODE),
      "only tls allowed in fips mode"},
     {ERR_REASON(SSL_R_OPAQUE_PRF_INPUT_TOO_LONG),
@@ -572,6 +606,8 @@ static ERR_STRING_DATA SSL_str_reasons[] = {
     {ERR_REASON(SSL_R_PEER_ERROR_NO_CIPHER), "peer error no cipher"},
     {ERR_REASON(SSL_R_PEER_ERROR_UNSUPPORTED_CERTIFICATE_TYPE),
      "peer error unsupported certificate type"},
+    {ERR_REASON(SSL_R_PEM_NAME_BAD_PREFIX), "pem name bad prefix"},
+    {ERR_REASON(SSL_R_PEM_NAME_TOO_SHORT), "pem name too short"},
     {ERR_REASON(SSL_R_PRE_MAC_LENGTH_TOO_LONG), "pre mac length too long"},
     {ERR_REASON(SSL_R_PROBLEMS_MAPPING_CIPHER_FUNCTIONS),
      "problems mapping cipher functions"},
@@ -742,6 +778,7 @@ static ERR_STRING_DATA SSL_str_reasons[] = {
     {ERR_REASON(SSL_R_UNKNOWN_CERTIFICATE_TYPE), "unknown certificate type"},
     {ERR_REASON(SSL_R_UNKNOWN_CIPHER_RETURNED), "unknown cipher returned"},
     {ERR_REASON(SSL_R_UNKNOWN_CIPHER_TYPE), "unknown cipher type"},
+    {ERR_REASON(SSL_R_UNKNOWN_CMD_NAME), "unknown cmd name"},
     {ERR_REASON(SSL_R_UNKNOWN_DIGEST), "unknown digest"},
     {ERR_REASON(SSL_R_UNKNOWN_KEY_EXCHANGE_TYPE),
      "unknown key exchange type"},
@@ -764,7 +801,9 @@ static ERR_STRING_DATA SSL_str_reasons[] = {
     {ERR_REASON(SSL_R_UNSUPPORTED_STATUS_TYPE), "unsupported status type"},
     {ERR_REASON(SSL_R_USE_SRTP_NOT_NEGOTIATED), "use srtp not negotiated"},
     {ERR_REASON(SSL_R_WRITE_BIO_NOT_SET), "write bio not set"},
+    {ERR_REASON(SSL_R_WRONG_CERTIFICATE_TYPE), "wrong certificate type"},
     {ERR_REASON(SSL_R_WRONG_CIPHER_RETURNED), "wrong cipher returned"},
+    {ERR_REASON(SSL_R_WRONG_CURVE), "wrong curve"},
     {ERR_REASON(SSL_R_WRONG_MESSAGE_TYPE), "wrong message type"},
     {ERR_REASON(SSL_R_WRONG_NUMBER_OF_KEY_BITS), "wrong number of key bits"},
     {ERR_REASON(SSL_R_WRONG_SIGNATURE_LENGTH), "wrong signature length"},
index e11746a..c0931e7 100644 (file)
@@ -273,7 +273,7 @@ int SSL_CTX_set_ssl_version(SSL_CTX *ctx, const SSL_METHOD *meth)
                                 &(ctx->cipher_list_by_id),
                                 meth->version ==
                                 SSL2_VERSION ? "SSLv2" :
-                                SSL_DEFAULT_CIPHER_LIST);
+                                SSL_DEFAULT_CIPHER_LIST, ctx->cert);
     if ((sk == NULL) || (sk_SSL_CIPHER_num(sk) <= 0)) {
         SSLerr(SSL_F_SSL_CTX_SET_SSL_VERSION,
                SSL_R_SSL_LIBRARY_HAS_NO_CIPHERS);
@@ -363,9 +363,39 @@ SSL *SSL_new(SSL_CTX *ctx)
     s->tlsext_ocsp_resplen = -1;
     CRYPTO_add(&ctx->references, 1, CRYPTO_LOCK_SSL_CTX);
     s->initial_ctx = ctx;
+# ifndef OPENSSL_NO_EC
+    if (ctx->tlsext_ecpointformatlist) {
+        s->tlsext_ecpointformatlist =
+            BUF_memdup(ctx->tlsext_ecpointformatlist,
+                       ctx->tlsext_ecpointformatlist_length);
+        if (!s->tlsext_ecpointformatlist)
+            goto err;
+        s->tlsext_ecpointformatlist_length =
+            ctx->tlsext_ecpointformatlist_length;
+    }
+    if (ctx->tlsext_ellipticcurvelist) {
+        s->tlsext_ellipticcurvelist =
+            BUF_memdup(ctx->tlsext_ellipticcurvelist,
+                       ctx->tlsext_ellipticcurvelist_length);
+        if (!s->tlsext_ellipticcurvelist)
+            goto err;
+        s->tlsext_ellipticcurvelist_length =
+            ctx->tlsext_ellipticcurvelist_length;
+    }
+# endif
 # ifndef OPENSSL_NO_NEXTPROTONEG
     s->next_proto_negotiated = NULL;
 # endif
+
+    if (s->ctx->alpn_client_proto_list) {
+        s->alpn_client_proto_list =
+            OPENSSL_malloc(s->ctx->alpn_client_proto_list_len);
+        if (s->alpn_client_proto_list == NULL)
+            goto err;
+        memcpy(s->alpn_client_proto_list, s->ctx->alpn_client_proto_list,
+               s->ctx->alpn_client_proto_list_len);
+        s->alpn_client_proto_list_len = s->ctx->alpn_client_proto_list_len;
+    }
 #endif
 
     s->verify_result = X509_V_OK;
@@ -505,6 +535,21 @@ int SSL_set1_param(SSL *ssl, X509_VERIFY_PARAM *vpm)
     return X509_VERIFY_PARAM_set1(ssl->param, vpm);
 }
 
+X509_VERIFY_PARAM *SSL_CTX_get0_param(SSL_CTX *ctx)
+{
+    return ctx->param;
+}
+
+X509_VERIFY_PARAM *SSL_get0_param(SSL *ssl)
+{
+    return ssl->param;
+}
+
+void SSL_certs_clear(SSL *s)
+{
+    ssl_cert_clear_certs(s->cert);
+}
+
 void SSL_free(SSL *s)
 {
     int i;
@@ -585,6 +630,8 @@ void SSL_free(SSL *s)
         sk_OCSP_RESPID_pop_free(s->tlsext_ocsp_ids, OCSP_RESPID_free);
     if (s->tlsext_ocsp_resp)
         OPENSSL_free(s->tlsext_ocsp_resp);
+    if (s->alpn_client_proto_list)
+        OPENSSL_free(s->alpn_client_proto_list);
 #endif
 
     if (s->client_CA != NULL)
@@ -1088,6 +1135,19 @@ long SSL_ctrl(SSL *s, int cmd, long larg, void *parg)
             return s->s3->send_connection_binding;
         else
             return 0;
+    case SSL_CTRL_CERT_FLAGS:
+        return (s->cert->cert_flags |= larg);
+    case SSL_CTRL_CLEAR_CERT_FLAGS:
+        return (s->cert->cert_flags &= ~larg);
+
+    case SSL_CTRL_GET_RAW_CIPHERLIST:
+        if (parg) {
+            if (s->cert->ciphers_raw == NULL)
+                return 0;
+            *(unsigned char **)parg = s->cert->ciphers_raw;
+            return (int)s->cert->ciphers_rawlen;
+        } else
+            return ssl_put_cipher_by_char(s, NULL, NULL);
     default:
         return (s->method->ssl_ctrl(s, cmd, larg, parg));
     }
@@ -1116,6 +1176,20 @@ LHASH_OF(SSL_SESSION) *SSL_CTX_sessions(SSL_CTX *ctx)
 long SSL_CTX_ctrl(SSL_CTX *ctx, int cmd, long larg, void *parg)
 {
     long l;
+    /* For some cases with ctx == NULL perform syntax checks */
+    if (ctx == NULL) {
+        switch (cmd) {
+#ifndef OPENSSL_NO_EC
+        case SSL_CTRL_SET_CURVES_LIST:
+            return tls1_set_curves_list(NULL, NULL, parg);
+#endif
+        case SSL_CTRL_SET_SIGALGS_LIST:
+        case SSL_CTRL_SET_CLIENT_SIGALGS_LIST:
+            return tls1_set_sigalgs_list(NULL, parg, 0);
+        default:
+            return 0;
+        }
+    }
 
     switch (cmd) {
     case SSL_CTRL_GET_READ_AHEAD:
@@ -1186,6 +1260,10 @@ long SSL_CTX_ctrl(SSL_CTX *ctx, int cmd, long larg, void *parg)
             return 0;
         ctx->max_send_fragment = larg;
         return 1;
+    case SSL_CTRL_CERT_FLAGS:
+        return (ctx->cert->cert_flags |= larg);
+    case SSL_CTRL_CLEAR_CERT_FLAGS:
+        return (ctx->cert->cert_flags &= ~larg);
     default:
         return (ctx->method->ssl_ctx_ctrl(ctx, cmd, larg, parg));
     }
@@ -1280,7 +1358,7 @@ int SSL_CTX_set_cipher_list(SSL_CTX *ctx, const char *str)
     STACK_OF(SSL_CIPHER) *sk;
 
     sk = ssl_create_cipher_list(ctx->method, &ctx->cipher_list,
-                                &ctx->cipher_list_by_id, str);
+                                &ctx->cipher_list_by_id, str, ctx->cert);
     /*
      * ssl_create_cipher_list may return an empty stack if it was unable to
      * find a cipher matching the given rule string (for example if the rule
@@ -1303,7 +1381,7 @@ int SSL_set_cipher_list(SSL *s, const char *str)
     STACK_OF(SSL_CIPHER) *sk;
 
     sk = ssl_create_cipher_list(s->ctx->method, &s->cipher_list,
-                                &s->cipher_list_by_id, str);
+                                &s->cipher_list_by_id, str, s->cert);
     /* see comment in SSL_CTX_set_cipher_list */
     if (sk == NULL)
         return 0;
@@ -1358,10 +1436,11 @@ int ssl_cipher_list_to_bytes(SSL *s, STACK_OF(SSL_CIPHER) *sk,
 {
     int i, j = 0;
     SSL_CIPHER *c;
+    CERT *ct = s->cert;
     unsigned char *q;
-#ifndef OPENSSL_NO_KRB5
-    int nokrb5 = !kssl_tgt_is_available(s->kssl_ctx);
-#endif                          /* OPENSSL_NO_KRB5 */
+    int empty_reneg_info_scsv = !s->renegotiate;
+    /* Set disabled masks for this session */
+    ssl_set_client_disabled(s);
 
     if (sk == NULL)
         return (0);
@@ -1371,26 +1450,18 @@ int ssl_cipher_list_to_bytes(SSL *s, STACK_OF(SSL_CIPHER) *sk,
 
     for (i = 0; i < sk_SSL_CIPHER_num(sk); i++) {
         c = sk_SSL_CIPHER_value(sk, i);
-        /* Skip TLS v1.2 only ciphersuites if lower than v1.2 */
-        if ((c->algorithm_ssl & SSL_TLSV1_2) &&
-            (TLS1_get_client_version(s) < TLS1_2_VERSION))
-            continue;
-#ifndef OPENSSL_NO_KRB5
-        if (((c->algorithm_mkey & SSL_kKRB5)
-             || (c->algorithm_auth & SSL_aKRB5)) && nokrb5)
+        /* Skip disabled ciphers */
+        if (c->algorithm_ssl & ct->mask_ssl ||
+            c->algorithm_mkey & ct->mask_k || c->algorithm_auth & ct->mask_a)
             continue;
-#endif                          /* OPENSSL_NO_KRB5 */
-#ifndef OPENSSL_NO_PSK
-        /* with PSK there must be client callback set */
-        if (((c->algorithm_mkey & SSL_kPSK) || (c->algorithm_auth & SSL_aPSK))
-            && s->psk_client_callback == NULL)
-            continue;
-#endif                          /* OPENSSL_NO_PSK */
-#ifndef OPENSSL_NO_SRP
-        if (((c->algorithm_mkey & SSL_kSRP) || (c->algorithm_auth & SSL_aSRP))
-            && !(s->srp_ctx.srp_Mask & SSL_kSRP))
-            continue;
-#endif                          /* OPENSSL_NO_SRP */
+#ifdef OPENSSL_SSL_DEBUG_BROKEN_PROTOCOL
+        if (c->id == SSL3_CK_SCSV) {
+            if (!empty_reneg_info_scsv)
+                continue;
+            else
+                empty_reneg_info_scsv = 0;
+        }
+#endif
         j = put_cb(c, p);
         p += j;
     }
@@ -1399,7 +1470,7 @@ int ssl_cipher_list_to_bytes(SSL *s, STACK_OF(SSL_CIPHER) *sk,
      * applicable SCSVs.
      */
     if (p != q) {
-        if (!s->renegotiate) {
+        if (empty_reneg_info_scsv) {
             static SSL_CIPHER scsv = {
                 0, NULL, SSL3_CK_SCSV, 0, 0, 0, 0, 0, 0, 0, 0, 0
             };
@@ -1410,7 +1481,6 @@ int ssl_cipher_list_to_bytes(SSL *s, STACK_OF(SSL_CIPHER) *sk,
                     "TLS_EMPTY_RENEGOTIATION_INFO_SCSV sent by client\n");
 #endif
         }
-
         if (s->mode & SSL_MODE_SEND_FALLBACK_SCSV) {
             static SSL_CIPHER scsv = {
                 0, NULL, SSL3_CK_FALLBACK_SCSV, 0, 0, 0, 0, 0, 0, 0, 0, 0
@@ -1451,6 +1521,15 @@ STACK_OF(SSL_CIPHER) *ssl_bytes_to_cipher_list(SSL *s, unsigned char *p,
         sk_SSL_CIPHER_zero(sk);
     }
 
+    if (s->cert->ciphers_raw)
+        OPENSSL_free(s->cert->ciphers_raw);
+    s->cert->ciphers_raw = BUF_memdup(p, num);
+    if (s->cert->ciphers_raw == NULL) {
+        SSLerr(SSL_F_SSL_BYTES_TO_CIPHER_LIST, ERR_R_MALLOC_FAILURE);
+        goto err;
+    }
+    s->cert->ciphers_rawlen = (size_t)num;
+
     for (i = 0; i < num; i += n) {
         /* Check for TLS_EMPTY_RENEGOTIATION_INFO_SCSV */
         if (s->s3 && (n != 3 || !p[0]) &&
@@ -1534,7 +1613,6 @@ int SSL_get_servername_type(const SSL *s)
     return -1;
 }
 
-# ifndef OPENSSL_NO_NEXTPROTONEG
 /*
  * SSL_select_next_proto implements the standard protocol selection. It is
  * expected that this function is called from the callback set by
@@ -1594,6 +1672,7 @@ int SSL_select_next_proto(unsigned char **out, unsigned char *outlen,
     return status;
 }
 
+# ifndef OPENSSL_NO_NEXTPROTONEG
 /*
  * SSL_get0_next_proto_negotiated sets *data and *len to point to the
  * client's requested protocol for this connection and returns 0. If the
@@ -1655,7 +1734,83 @@ void SSL_CTX_set_next_proto_select_cb(SSL_CTX *ctx,
     ctx->next_proto_select_cb_arg = arg;
 }
 # endif
-#endif
+
+/*
+ * SSL_CTX_set_alpn_protos sets the ALPN protocol list on |ctx| to |protos|.
+ * |protos| must be in wire-format (i.e. a series of non-empty, 8-bit
+ * length-prefixed strings). Returns 0 on success.
+ */
+int SSL_CTX_set_alpn_protos(SSL_CTX *ctx, const unsigned char *protos,
+                            unsigned protos_len)
+{
+    if (ctx->alpn_client_proto_list)
+        OPENSSL_free(ctx->alpn_client_proto_list);
+
+    ctx->alpn_client_proto_list = OPENSSL_malloc(protos_len);
+    if (!ctx->alpn_client_proto_list)
+        return 1;
+    memcpy(ctx->alpn_client_proto_list, protos, protos_len);
+    ctx->alpn_client_proto_list_len = protos_len;
+
+    return 0;
+}
+
+/*
+ * SSL_set_alpn_protos sets the ALPN protocol list on |ssl| to |protos|.
+ * |protos| must be in wire-format (i.e. a series of non-empty, 8-bit
+ * length-prefixed strings). Returns 0 on success.
+ */
+int SSL_set_alpn_protos(SSL *ssl, const unsigned char *protos,
+                        unsigned protos_len)
+{
+    if (ssl->alpn_client_proto_list)
+        OPENSSL_free(ssl->alpn_client_proto_list);
+
+    ssl->alpn_client_proto_list = OPENSSL_malloc(protos_len);
+    if (!ssl->alpn_client_proto_list)
+        return 1;
+    memcpy(ssl->alpn_client_proto_list, protos, protos_len);
+    ssl->alpn_client_proto_list_len = protos_len;
+
+    return 0;
+}
+
+/*
+ * SSL_CTX_set_alpn_select_cb sets a callback function on |ctx| that is
+ * called during ClientHello processing in order to select an ALPN protocol
+ * from the client's list of offered protocols.
+ */
+void SSL_CTX_set_alpn_select_cb(SSL_CTX *ctx,
+                                int (*cb) (SSL *ssl,
+                                           const unsigned char **out,
+                                           unsigned char *outlen,
+                                           const unsigned char *in,
+                                           unsigned int inlen,
+                                           void *arg), void *arg)
+{
+    ctx->alpn_select_cb = cb;
+    ctx->alpn_select_cb_arg = arg;
+}
+
+/*
+ * SSL_get0_alpn_selected gets the selected ALPN protocol (if any) from
+ * |ssl|. On return it sets |*data| to point to |*len| bytes of protocol name
+ * (not including the leading length-prefix byte). If the server didn't
+ * respond with a negotiated protocol then |*len| will be zero.
+ */
+void SSL_get0_alpn_selected(const SSL *ssl, const unsigned char **data,
+                            unsigned *len)
+{
+    *data = NULL;
+    if (ssl->s3)
+        *data = ssl->s3->alpn_selected;
+    if (*data == NULL)
+        *len = 0;
+    else
+        *len = ssl->s3->alpn_selected_len;
+}
+
+#endif                          /* !OPENSSL_NO_TLSEXT */
 
 int SSL_export_keying_material(SSL *s, unsigned char *out, size_t olen,
                                const char *label, size_t llen,
@@ -1794,7 +1949,8 @@ SSL_CTX *SSL_CTX_new(const SSL_METHOD *meth)
     ssl_create_cipher_list(ret->method,
                            &ret->cipher_list, &ret->cipher_list_by_id,
                            meth->version ==
-                           SSL2_VERSION ? "SSLv2" : SSL_DEFAULT_CIPHER_LIST);
+                           SSL2_VERSION ? "SSLv2" : SSL_DEFAULT_CIPHER_LIST,
+                           ret->cert);
     if (ret->cipher_list == NULL || sk_SSL_CIPHER_num(ret->cipher_list) <= 0) {
         SSLerr(SSL_F_SSL_CTX_NEW, SSL_R_LIBRARY_HAS_NO_CIPHERS);
         goto err2;
@@ -2007,6 +2163,16 @@ void SSL_CTX_free(SSL_CTX *a)
     if (a->rbuf_freelist)
         ssl_buf_freelist_free(a->rbuf_freelist);
 #endif
+#ifndef OPENSSL_NO_TLSEXT
+# ifndef OPENSSL_NO_EC
+    if (a->tlsext_ecpointformatlist)
+        OPENSSL_free(a->tlsext_ecpointformatlist);
+    if (a->tlsext_ellipticcurvelist)
+        OPENSSL_free(a->tlsext_ellipticcurvelist);
+# endif                         /* OPENSSL_NO_EC */
+    if (a->alpn_client_proto_list != NULL)
+        OPENSSL_free(a->alpn_client_proto_list);
+#endif
 
     OPENSSL_free(a);
 }
@@ -2041,6 +2207,17 @@ void SSL_CTX_set_verify_depth(SSL_CTX *ctx, int depth)
     X509_VERIFY_PARAM_set_depth(ctx->param, depth);
 }
 
+void SSL_CTX_set_cert_cb(SSL_CTX *c, int (*cb) (SSL *ssl, void *arg),
+                         void *arg)
+{
+    ssl_cert_set_cert_cb(c->cert, cb, arg);
+}
+
+void SSL_set_cert_cb(SSL *s, int (*cb) (SSL *ssl, void *arg), void *arg)
+{
+    ssl_cert_set_cert_cb(s->cert, cb, arg);
+}
+
 void ssl_set_cert_masks(CERT *c, const SSL_CIPHER *cipher)
 {
     CERT_PKEY *cpk;
@@ -2080,25 +2257,25 @@ void ssl_set_cert_masks(CERT *c, const SSL_CIPHER *cipher)
 #endif
 
 #ifndef OPENSSL_NO_ECDH
-    have_ecdh_tmp = (c->ecdh_tmp != NULL || c->ecdh_tmp_cb != NULL);
+    have_ecdh_tmp = (c->ecdh_tmp || c->ecdh_tmp_cb || c->ecdh_tmp_auto);
 #endif
     cpk = &(c->pkeys[SSL_PKEY_RSA_ENC]);
-    rsa_enc = (cpk->x509 != NULL && cpk->privatekey != NULL);
+    rsa_enc = cpk->valid_flags & CERT_PKEY_VALID;
     rsa_enc_export = (rsa_enc && EVP_PKEY_size(cpk->privatekey) * 8 <= kl);
     cpk = &(c->pkeys[SSL_PKEY_RSA_SIGN]);
-    rsa_sign = (cpk->x509 != NULL && cpk->privatekey != NULL);
+    rsa_sign = cpk->valid_flags & CERT_PKEY_SIGN;
     cpk = &(c->pkeys[SSL_PKEY_DSA_SIGN]);
-    dsa_sign = (cpk->x509 != NULL && cpk->privatekey != NULL);
+    dsa_sign = cpk->valid_flags & CERT_PKEY_SIGN;
     cpk = &(c->pkeys[SSL_PKEY_DH_RSA]);
-    dh_rsa = (cpk->x509 != NULL && cpk->privatekey != NULL);
+    dh_rsa = cpk->valid_flags & CERT_PKEY_VALID;
     dh_rsa_export = (dh_rsa && EVP_PKEY_size(cpk->privatekey) * 8 <= kl);
     cpk = &(c->pkeys[SSL_PKEY_DH_DSA]);
 /* FIX THIS EAY EAY EAY */
-    dh_dsa = (cpk->x509 != NULL && cpk->privatekey != NULL);
+    dh_dsa = cpk->valid_flags & CERT_PKEY_VALID;
     dh_dsa_export = (dh_dsa && EVP_PKEY_size(cpk->privatekey) * 8 <= kl);
     cpk = &(c->pkeys[SSL_PKEY_ECC]);
 #ifndef OPENSSL_NO_EC
-    have_ecc_cert = (cpk->x509 != NULL && cpk->privatekey != NULL);
+    have_ecc_cert = cpk->valid_flags & CERT_PKEY_VALID;
 #endif
     mask_k = 0;
     mask_a = 0;
@@ -2153,6 +2330,9 @@ void ssl_set_cert_masks(CERT *c, const SSL_CIPHER *cipher)
     if (dh_dsa_export)
         emask_k |= SSL_kDHd;
 
+    if (mask_k & (SSL_kDHr | SSL_kDHd))
+        mask_a |= SSL_aDH;
+
     if (rsa_enc || rsa_sign) {
         mask_a |= SSL_aRSA;
         emask_a |= SSL_aRSA;
@@ -2179,13 +2359,18 @@ void ssl_set_cert_masks(CERT *c, const SSL_CIPHER *cipher)
      */
 #ifndef OPENSSL_NO_EC
     if (have_ecc_cert) {
+        cpk = &c->pkeys[SSL_PKEY_ECC];
+        x = cpk->x509;
         /* This call populates extension flags (ex_flags) */
-        x = (c->pkeys[SSL_PKEY_ECC]).x509;
         X509_check_purpose(x, -1, 0);
+# ifndef OPENSSL_NO_ECDH
         ecdh_ok = (x->ex_flags & EXFLAG_KUSAGE) ?
             (x->ex_kusage & X509v3_KU_KEY_AGREEMENT) : 1;
+# endif
         ecdsa_ok = (x->ex_flags & EXFLAG_KUSAGE) ?
             (x->ex_kusage & X509v3_KU_DIGITAL_SIGNATURE) : 1;
+        if (!(cpk->valid_flags & CERT_PKEY_SIGN))
+            ecdsa_ok = 0;
         ecc_pkey = X509_get_pubkey(x);
         ecc_pkey_size = (ecc_pkey != NULL) ? EVP_PKEY_bits(ecc_pkey) : 0;
         EVP_PKEY_free(ecc_pkey);
@@ -2193,7 +2378,7 @@ void ssl_set_cert_masks(CERT *c, const SSL_CIPHER *cipher)
             signature_nid = OBJ_obj2nid(x->sig_alg->algorithm);
             OBJ_find_sigid_algs(signature_nid, &md_nid, &pk_nid);
         }
-#ifndef OPENSSL_NO_ECDH
+# ifndef OPENSSL_NO_ECDH
         if (ecdh_ok) {
 
             if (pk_nid == NID_rsaEncryption || pk_nid == NID_rsa) {
@@ -2214,15 +2399,16 @@ void ssl_set_cert_masks(CERT *c, const SSL_CIPHER *cipher)
                 }
             }
         }
-#endif
-#ifndef OPENSSL_NO_ECDSA
+# endif
+# ifndef OPENSSL_NO_ECDSA
         if (ecdsa_ok) {
             mask_a |= SSL_aECDSA;
             emask_a |= SSL_aECDSA;
         }
-#endif
+# endif
     }
 #endif
+
 #ifndef OPENSSL_NO_ECDH
     if (have_ecdh_tmp) {
         mask_k |= SSL_kEECDH;
@@ -2317,65 +2503,44 @@ int ssl_check_srvr_ecc_cert_and_alg(X509 *x, SSL *s)
 
 #endif
 
-/* THIS NEEDS CLEANING UP */
+static int ssl_get_server_cert_index(const SSL *s)
+{
+    int idx;
+    idx = ssl_cipher_get_cert_index(s->s3->tmp.new_cipher);
+    if (idx == SSL_PKEY_RSA_ENC && !s->cert->pkeys[SSL_PKEY_RSA_ENC].x509)
+        idx = SSL_PKEY_RSA_SIGN;
+    if (idx == -1)
+        SSLerr(SSL_F_SSL_GET_SERVER_CERT_INDEX, ERR_R_INTERNAL_ERROR);
+    return idx;
+}
+
 CERT_PKEY *ssl_get_server_send_pkey(const SSL *s)
 {
-    unsigned long alg_k, alg_a;
     CERT *c;
     int i;
 
     c = s->cert;
+    if (!s->s3 || !s->s3->tmp.new_cipher)
+        return NULL;
     ssl_set_cert_masks(c, s->s3->tmp.new_cipher);
 
-    alg_k = s->s3->tmp.new_cipher->algorithm_mkey;
-    alg_a = s->s3->tmp.new_cipher->algorithm_auth;
-
-    if (alg_k & (SSL_kECDHr | SSL_kECDHe)) {
-        /*
-         * we don't need to look at SSL_kEECDH since no certificate is needed
-         * for anon ECDH and for authenticated EECDH, the check for the auth
-         * algorithm will set i correctly NOTE: For ECDH-RSA, we need an ECC
-         * not an RSA cert but for EECDH-RSA we need an RSA cert. Placing the
-         * checks for SSL_kECDH before RSA checks ensures the correct cert is
-         * chosen.
-         */
-        i = SSL_PKEY_ECC;
-    } else if (alg_a & SSL_aECDSA) {
-        i = SSL_PKEY_ECC;
-    } else if (alg_k & SSL_kDHr)
-        i = SSL_PKEY_DH_RSA;
-    else if (alg_k & SSL_kDHd)
-        i = SSL_PKEY_DH_DSA;
-    else if (alg_a & SSL_aDSS)
-        i = SSL_PKEY_DSA_SIGN;
-    else if (alg_a & SSL_aRSA) {
-        if (c->pkeys[SSL_PKEY_RSA_ENC].x509 == NULL)
-            i = SSL_PKEY_RSA_SIGN;
-        else
-            i = SSL_PKEY_RSA_ENC;
-    } else if (alg_a & SSL_aKRB5) {
-        /* VRS something else here? */
-        return (NULL);
-    } else if (alg_a & SSL_aGOST94)
-        i = SSL_PKEY_GOST94;
-    else if (alg_a & SSL_aGOST01)
-        i = SSL_PKEY_GOST01;
-    else {                      /* if (alg_a & SSL_aNULL) */
-
-        SSLerr(SSL_F_SSL_GET_SERVER_SEND_PKEY, ERR_R_INTERNAL_ERROR);
-        return (NULL);
-    }
+#ifdef OPENSSL_SSL_DEBUG_BROKEN_PROTOCOL
+    /*
+     * Broken protocol test: return last used certificate: which may mismatch
+     * the one expected.
+     */
+    if (c->cert_flags & SSL_CERT_FLAG_BROKEN_PROTOCOL)
+        return c->key;
+#endif
 
-    return c->pkeys + i;
-}
+    i = ssl_get_server_cert_index(s);
 
-X509 *ssl_get_server_send_cert(const SSL *s)
-{
-    CERT_PKEY *cpk;
-    cpk = ssl_get_server_send_pkey(s);
-    if (!cpk)
+    /* This may or may not be an error. */
+    if (i < 0)
         return NULL;
-    return cpk->x509;
+
+    /* May be NULL. */
+    return &c->pkeys[i];
 }
 
 EVP_PKEY *ssl_get_sign_pkey(SSL *s, const SSL_CIPHER *cipher,
@@ -2388,8 +2553,18 @@ EVP_PKEY *ssl_get_sign_pkey(SSL *s, const SSL_CIPHER *cipher,
     alg_a = cipher->algorithm_auth;
     c = s->cert;
 
+#ifdef OPENSSL_SSL_DEBUG_BROKEN_PROTOCOL
+    /*
+     * Broken protocol test: use last key: which may mismatch the one
+     * expected.
+     */
+    if (c->cert_flags & SSL_CERT_FLAG_BROKEN_PROTOCOL)
+        idx = c->key - c->pkeys;
+    else
+#endif
+
     if ((alg_a & SSL_aDSS) &&
-        (c->pkeys[SSL_PKEY_DSA_SIGN].privatekey != NULL))
+            (c->pkeys[SSL_PKEY_DSA_SIGN].privatekey != NULL))
         idx = SSL_PKEY_DSA_SIGN;
     else if (alg_a & SSL_aRSA) {
         if (c->pkeys[SSL_PKEY_RSA_SIGN].privatekey != NULL)
@@ -2408,6 +2583,28 @@ EVP_PKEY *ssl_get_sign_pkey(SSL *s, const SSL_CIPHER *cipher,
     return c->pkeys[idx].privatekey;
 }
 
+#ifndef OPENSSL_NO_TLSEXT
+int ssl_get_server_cert_serverinfo(SSL *s, const unsigned char **serverinfo,
+                                   size_t *serverinfo_length)
+{
+    CERT *c = NULL;
+    int i = 0;
+    *serverinfo_length = 0;
+
+    c = s->cert;
+    i = ssl_get_server_cert_index(s);
+
+    if (i == -1)
+        return 0;
+    if (c->pkeys[i].serverinfo == NULL)
+        return 0;
+
+    *serverinfo = c->pkeys[i].serverinfo;
+    *serverinfo_length = c->pkeys[i].serverinfo_length;
+    return 1;
+}
+#endif
+
 void ssl_update_cache(SSL *s, int mode)
 {
     int i;
@@ -2439,6 +2636,11 @@ void ssl_update_cache(SSL *s, int mode)
     }
 }
 
+const SSL_METHOD *SSL_CTX_get_ssl_method(SSL_CTX *ctx)
+{
+    return ctx->method;
+}
+
 const SSL_METHOD *SSL_get_ssl_method(SSL *s)
 {
     return (s->method);
@@ -2634,6 +2836,12 @@ const char *SSL_get_version(const SSL *s)
         return ("SSLv3");
     else if (s->version == SSL2_VERSION)
         return ("SSLv2");
+    else if (s->version == DTLS1_BAD_VER)
+        return ("DTLSv0.9");
+    else if (s->version == DTLS1_VERSION)
+        return ("DTLSv1");
+    else if (s->version == DTLS1_2_VERSION)
+        return ("DTLSv1.2");
     else
         return ("unknown");
 }
@@ -2784,7 +2992,6 @@ void ssl_clear_cipher_ctx(SSL *s)
 #endif
 }
 
-/* Fix this function so that it takes an optional type parameter */
 X509 *SSL_get_certificate(const SSL *s)
 {
     if (s->cert != NULL)
@@ -2793,8 +3000,7 @@ X509 *SSL_get_certificate(const SSL *s)
         return (NULL);
 }
 
-/* Fix this function so that it takes an optional type parameter */
-EVP_PKEY *SSL_get_privatekey(SSL *s)
+EVP_PKEY *SSL_get_privatekey(const SSL *s)
 {
     if (s->cert != NULL)
         return (s->cert->key->privatekey);
@@ -2802,6 +3008,22 @@ EVP_PKEY *SSL_get_privatekey(SSL *s)
         return (NULL);
 }
 
+X509 *SSL_CTX_get0_certificate(const SSL_CTX *ctx)
+{
+    if (ctx->cert != NULL)
+        return ctx->cert->key->x509;
+    else
+        return NULL;
+}
+
+EVP_PKEY *SSL_CTX_get0_privatekey(const SSL_CTX *ctx)
+{
+    if (ctx->cert != NULL)
+        return ctx->cert->key->privatekey;
+    else
+        return NULL;
+}
+
 const SSL_CIPHER *SSL_get_current_cipher(const SSL *s)
 {
     if ((s->session != NULL) && (s->session->cipher != NULL))
@@ -2933,13 +3155,15 @@ SSL_CTX *SSL_set_SSL_CTX(SSL *ssl, SSL_CTX *ctx)
         ctx = ssl->initial_ctx;
 #endif
     ssl->cert = ssl_cert_dup(ctx->cert);
-    if (ocert != NULL) {
-        int i;
-        /* Copy negotiated digests from original */
-        for (i = 0; i < SSL_PKEY_NUM; i++) {
-            CERT_PKEY *cpk = ocert->pkeys + i;
-            CERT_PKEY *rpk = ssl->cert->pkeys + i;
-            rpk->digest = cpk->digest;
+    if (ocert) {
+        /* Preserve any already negotiated parameters */
+        if (ssl->server) {
+            ssl->cert->peer_sigalgs = ocert->peer_sigalgs;
+            ssl->cert->peer_sigalgslen = ocert->peer_sigalgslen;
+            ocert->peer_sigalgs = NULL;
+            ssl->cert->ciphers_raw = ocert->ciphers_raw;
+            ssl->cert->ciphers_rawlen = ocert->ciphers_rawlen;
+            ocert->ciphers_raw = NULL;
         }
         ssl_cert_free(ocert);
     }
@@ -3306,6 +3530,11 @@ int SSL_cache_hit(SSL *s)
     return s->hit;
 }
 
+int SSL_is_server(SSL *s)
+{
+    return s->server;
+}
+
 #if defined(_WINDLL) && defined(OPENSSL_SYS_WIN16)
 # include "../crypto/bio/bss_file.c"
 #endif
index a7f3f8d..6c2c551 100644 (file)
 /* RSA key exchange */
 # define SSL_kRSA                0x00000001L
 /* DH cert, RSA CA cert */
-/* no such ciphersuites supported! */
 # define SSL_kDHr                0x00000002L
 /* DH cert, DSA CA cert */
-/* no such ciphersuite supported! */
 # define SSL_kDHd                0x00000004L
 /* tmp DH key no DH cert */
 # define SSL_kEDH                0x00000008L
+/* forward-compatible synonym */
+# define SSL_kDHE                SSL_kEDH
 /* Kerberos5 key exchange */
 # define SSL_kKRB5               0x00000010L
 /* ECDH cert, RSA CA cert */
 # define SSL_kECDHe              0x00000040L
 /* ephemeral ECDH */
 # define SSL_kEECDH              0x00000080L
+/* forward-compatible synonym */
+# define SSL_kECDHE              SSL_kEECDH
 /* PSK */
 # define SSL_kPSK                0x00000100L
 /* GOST key exchange */
 /* no auth (i.e. use ADH or AECDH) */
 # define SSL_aNULL               0x00000004L
 /* Fixed DH auth (kDHd or kDHr) */
-/* no such ciphersuites supported! */
 # define SSL_aDH                 0x00000008L
 /* Fixed ECDH auth (kECDHe or kECDHr) */
 # define SSL_aECDH               0x00000010L
                                 (c)->algo_strength)
 # define SSL_C_EXPORT_PKEYLENGTH(c)      SSL_EXPORT_PKEYLENGTH((c)->algo_strength)
 
+/* Check if an SSL structure is using DTLS */
+# define SSL_IS_DTLS(s)  (s->method->ssl3_enc->enc_flags & SSL_ENC_FLAG_DTLS)
+/* See if we need explicit IV */
+# define SSL_USE_EXPLICIT_IV(s)  \
+                (s->method->ssl3_enc->enc_flags & SSL_ENC_FLAG_EXPLICIT_IV)
+/*
+ * See if we use signature algorithms extension and signature algorithm
+ * before signatures.
+ */
+# define SSL_USE_SIGALGS(s)      \
+                        (s->method->ssl3_enc->enc_flags & SSL_ENC_FLAG_SIGALGS)
+/*
+ * Allow TLS 1.2 ciphersuites: applies to DTLS 1.2 as well as TLS 1.2: may
+ * apply to others in future.
+ */
+# define SSL_USE_TLS1_2_CIPHERS(s)       \
+                (s->method->ssl3_enc->enc_flags & SSL_ENC_FLAG_TLS1_2_CIPHERS)
+/*
+ * Determine if a client can use TLS 1.2 ciphersuites: can't rely on method
+ * flags because it may not be set to correct version yet.
+ */
+# define SSL_CLIENT_USE_TLS1_2_CIPHERS(s)        \
+                ((SSL_IS_DTLS(s) && s->client_version <= DTLS1_2_VERSION) || \
+                (!SSL_IS_DTLS(s) && s->client_version >= TLS1_2_VERSION))
+
 /* Mostly for SSLv3 */
 # define SSL_PKEY_RSA_ENC        0
 # define SSL_PKEY_RSA_SIGN       1
@@ -505,7 +531,63 @@ typedef struct cert_pkey_st {
     EVP_PKEY *privatekey;
     /* Digest to use when signing */
     const EVP_MD *digest;
+    /* Chain for this certificate */
+    STACK_OF(X509) *chain;
+# ifndef OPENSSL_NO_TLSEXT
+    /*-
+     * serverinfo data for this certificate.  The data is in TLS Extension
+     * wire format, specifically it's a series of records like:
+     *   uint16_t extension_type; // (RFC 5246, 7.4.1.4, Extension)
+     *   uint16_t length;
+     *   uint8_t data[length];
+     */
+    unsigned char *serverinfo;
+    size_t serverinfo_length;
+# endif
+    /*
+     * Set if CERT_PKEY can be used with current SSL session: e.g.
+     * appropriate curve, signature algorithms etc. If zero it can't be used
+     * at all.
+     */
+    int valid_flags;
 } CERT_PKEY;
+/* Retrieve Suite B flags */
+# define tls1_suiteb(s)  (s->cert->cert_flags & SSL_CERT_FLAG_SUITEB_128_LOS)
+/* Uses to check strict mode: suite B modes are always strict */
+# define SSL_CERT_FLAGS_CHECK_TLS_STRICT \
+        (SSL_CERT_FLAG_SUITEB_128_LOS|SSL_CERT_FLAG_TLS_STRICT)
+
+typedef struct {
+    unsigned short ext_type;
+    /*
+     * Per-connection flags relating to this extension type: not used if
+     * part of an SSL_CTX structure.
+     */
+    unsigned short ext_flags;
+    custom_ext_add_cb add_cb;
+    custom_ext_free_cb free_cb;
+    void *add_arg;
+    custom_ext_parse_cb parse_cb;
+    void *parse_arg;
+} custom_ext_method;
+
+/* ext_flags values */
+
+/*
+ * Indicates an extension has been received. Used to check for unsolicited or
+ * duplicate extensions.
+ */
+# define SSL_EXT_FLAG_RECEIVED   0x1
+/*
+ * Indicates an extension has been sent: used to enable sending of
+ * corresponding ServerHello extension.
+ */
+# define SSL_EXT_FLAG_SENT       0x2
+
+typedef struct {
+    custom_ext_method *meths;
+    size_t meths_count;
+} custom_ext_methods;
 
 typedef struct cert_st {
     /* Current active set */
@@ -516,14 +598,17 @@ typedef struct cert_st {
      */
     CERT_PKEY *key;
     /*
-     * The following masks are for the key and auth algorithms that are
-     * supported by the certs below
+     * For servers the following masks are for the key and auth algorithms
+     * that are supported by the certs below. For clients they are masks of
+     * *disabled* algorithms based on the current session.
      */
     int valid;
     unsigned long mask_k;
     unsigned long mask_a;
     unsigned long export_mask_k;
     unsigned long export_mask_a;
+    /* Client only */
+    unsigned long mask_ssl;
 # ifndef OPENSSL_NO_RSA
     RSA *rsa_tmp;
     RSA *(*rsa_tmp_cb) (SSL *ssl, int is_export, int keysize);
@@ -536,8 +621,71 @@ typedef struct cert_st {
     EC_KEY *ecdh_tmp;
     /* Callback for generating ephemeral ECDH keys */
     EC_KEY *(*ecdh_tmp_cb) (SSL *ssl, int is_export, int keysize);
+    /* Select ECDH parameters automatically */
+    int ecdh_tmp_auto;
 # endif
+    /* Flags related to certificates */
+    unsigned int cert_flags;
     CERT_PKEY pkeys[SSL_PKEY_NUM];
+    /*
+     * Certificate types (received or sent) in certificate request message.
+     * On receive this is only set if number of certificate types exceeds
+     * SSL3_CT_NUMBER.
+     */
+    unsigned char *ctypes;
+    size_t ctype_num;
+    /*
+     * signature algorithms peer reports: e.g. supported signature algorithms
+     * extension for server or as part of a certificate request for client.
+     */
+    unsigned char *peer_sigalgs;
+    /* Size of above array */
+    size_t peer_sigalgslen;
+    /*
+     * suppported signature algorithms. When set on a client this is sent in
+     * the client hello as the supported signature algorithms extension. For
+     * servers it represents the signature algorithms we are willing to use.
+     */
+    unsigned char *conf_sigalgs;
+    /* Size of above array */
+    size_t conf_sigalgslen;
+    /*
+     * Client authentication signature algorithms, if not set then uses
+     * conf_sigalgs. On servers these will be the signature algorithms sent
+     * to the client in a cerificate request for TLS 1.2. On a client this
+     * represents the signature algortithms we are willing to use for client
+     * authentication.
+     */
+    unsigned char *client_sigalgs;
+    /* Size of above array */
+    size_t client_sigalgslen;
+    /*
+     * Signature algorithms shared by client and server: cached because these
+     * are used most often.
+     */
+    TLS_SIGALGS *shared_sigalgs;
+    size_t shared_sigalgslen;
+    /*
+     * Certificate setup callback: if set is called whenever a certificate
+     * may be required (client or server). the callback can then examine any
+     * appropriate parameters and setup any certificates required. This
+     * allows advanced applications to select certificates on the fly: for
+     * example based on supported signature algorithms or curves.
+     */
+    int (*cert_cb) (SSL *ssl, void *arg);
+    void *cert_cb_arg;
+    /*
+     * Optional X509_STORE for chain building or certificate validation If
+     * NULL the parent SSL_CTX store is used instead.
+     */
+    X509_STORE *chain_store;
+    X509_STORE *verify_store;
+    /* Raw values of the cipher list from a client */
+    unsigned char *ciphers_raw;
+    size_t ciphers_rawlen;
+    /* Custom extension methods for server and client */
+    custom_ext_methods cli_ext;
+    custom_ext_methods srv_ext;
     int references;             /* >1 only if SSL_copy_session_id is used */
 } CERT;
 
@@ -563,6 +711,18 @@ typedef struct sess_cert_st {
 # endif
     int references;             /* actually always 1 at the moment */
 } SESS_CERT;
+/* Structure containing decoded values of signature algorithms extension */
+struct tls_sigalgs_st {
+    /* NID of hash algorithm */
+    int hash_nid;
+    /* NID of signature algorithm */
+    int sign_nid;
+    /* Combined hash and signature NID */
+    int signandhash_nid;
+    /* Raw values used in extension */
+    unsigned char rsign;
+    unsigned char rhash;
+};
 
 /*
  * #define MAC_DEBUG
@@ -596,8 +756,6 @@ typedef struct sess_cert_st {
 # define FP_ICC  (int (*)(const void *,const void *))
 # define ssl_put_cipher_by_char(ssl,ciph,ptr) \
                 ((ssl)->method->put_cipher_by_char((ciph),(ptr)))
-# define ssl_get_cipher_by_char(ssl,ptr) \
-                ((ssl)->method->get_cipher_by_char(ptr))
 
 /*
  * This is for the SSLv3/TLSv1.0 differences in crypto/hash stuff It is a bit
@@ -622,8 +780,39 @@ typedef struct ssl3_enc_method {
                                    const char *, size_t,
                                    const unsigned char *, size_t,
                                    int use_context);
+    /* Various flags indicating protocol version requirements */
+    unsigned int enc_flags;
+    /* Handshake header length */
+    unsigned int hhlen;
+    /* Set the handshake header */
+    void (*set_handshake_header) (SSL *s, int type, unsigned long len);
+    /* Write out handshake message */
+    int (*do_write) (SSL *s);
 } SSL3_ENC_METHOD;
 
+# define SSL_HM_HEADER_LENGTH(s) s->method->ssl3_enc->hhlen
+# define ssl_handshake_start(s) \
+        (((unsigned char *)s->init_buf->data) + s->method->ssl3_enc->hhlen)
+# define ssl_set_handshake_header(s, htype, len) \
+        s->method->ssl3_enc->set_handshake_header(s, htype, len)
+# define ssl_do_write(s)  s->method->ssl3_enc->do_write(s)
+
+/* Values for enc_flags */
+
+/* Uses explicit IV for CBC mode */
+# define SSL_ENC_FLAG_EXPLICIT_IV        0x1
+/* Uses signature algorithms extension */
+# define SSL_ENC_FLAG_SIGALGS            0x2
+/* Uses SHA256 default PRF */
+# define SSL_ENC_FLAG_SHA256_PRF         0x4
+/* Is DTLS */
+# define SSL_ENC_FLAG_DTLS               0x8
+/*
+ * Allow TLS 1.2 ciphersuites: applies to DTLS 1.2 as well as TLS 1.2: may
+ * apply to others in future.
+ */
+# define SSL_ENC_FLAG_TLS1_2_CIPHERS     0x10
+
 # ifndef OPENSSL_NO_COMP
 /* Used for holding the relevant compression methods loaded into SSL_CTX */
 typedef struct ssl3_comp_st {
@@ -653,13 +842,14 @@ OPENSSL_EXTERN SSL_CIPHER ssl3_ciphers[];
 SSL_METHOD *ssl_bad_method(int ver);
 
 extern SSL3_ENC_METHOD TLSv1_enc_data;
+extern SSL3_ENC_METHOD TLSv1_1_enc_data;
+extern SSL3_ENC_METHOD TLSv1_2_enc_data;
 extern SSL3_ENC_METHOD SSLv3_enc_data;
 extern SSL3_ENC_METHOD DTLSv1_enc_data;
-
-# define SSL_IS_DTLS(s) (s->method->version == DTLS1_VERSION)
+extern SSL3_ENC_METHOD DTLSv1_2_enc_data;
 
 # define IMPLEMENT_tls_meth_func(version, func_name, s_accept, s_connect, \
-                                s_get_meth) \
+                                s_get_meth, enc_data) \
 const SSL_METHOD *func_name(void)  \
         { \
         static const SSL_METHOD func_name##_data= { \
@@ -688,7 +878,7 @@ const SSL_METHOD *func_name(void)  \
                 ssl3_get_cipher, \
                 s_get_meth, \
                 tls1_default_timeout, \
-                &TLSv1_enc_data, \
+                &enc_data, \
                 ssl_undefined_void_function, \
                 ssl3_callback_ctrl, \
                 ssl3_ctx_callback_ctrl, \
@@ -762,7 +952,7 @@ const SSL_METHOD *func_name(void)  \
         ssl23_get_cipher, \
         s_get_meth, \
         ssl23_default_timeout, \
-        &ssl3_undef_enc_method, \
+        &TLSv1_2_enc_data, \
         ssl_undefined_void_function, \
         ssl3_callback_ctrl, \
         ssl3_ctx_callback_ctrl, \
@@ -807,11 +997,12 @@ const SSL_METHOD *func_name(void)  \
         return &func_name##_data; \
         }
 
-# define IMPLEMENT_dtls1_meth_func(func_name, s_accept, s_connect, s_get_meth) \
+# define IMPLEMENT_dtls1_meth_func(version, func_name, s_accept, s_connect, \
+                                        s_get_meth, enc_data) \
 const SSL_METHOD *func_name(void)  \
         { \
         static const SSL_METHOD func_name##_data= { \
-                DTLS1_VERSION, \
+                version, \
                 dtls1_new, \
                 dtls1_clear, \
                 dtls1_free, \
@@ -836,7 +1027,7 @@ const SSL_METHOD *func_name(void)  \
                 dtls1_get_cipher, \
                 s_get_meth, \
                 dtls1_default_timeout, \
-                &DTLSv1_enc_data, \
+                &enc_data, \
                 ssl_undefined_void_function, \
                 ssl3_callback_ctrl, \
                 ssl3_ctx_callback_ctrl, \
@@ -857,7 +1048,9 @@ void ssl_clear_cipher_ctx(SSL *s);
 int ssl_clear_bad_session(SSL *s);
 CERT *ssl_cert_new(void);
 CERT *ssl_cert_dup(CERT *cert);
+void ssl_cert_set_default_md(CERT *cert);
 int ssl_cert_inst(CERT **o);
+void ssl_cert_clear_certs(CERT *c);
 void ssl_cert_free(CERT *c);
 SESS_CERT *ssl_sess_cert_new(void);
 void ssl_sess_cert_free(SESS_CERT *sc);
@@ -880,18 +1073,36 @@ int ssl_cipher_list_to_bytes(SSL *s, STACK_OF(SSL_CIPHER) *sk,
 STACK_OF(SSL_CIPHER) *ssl_create_cipher_list(const SSL_METHOD *meth,
                                              STACK_OF(SSL_CIPHER) **pref,
                                              STACK_OF(SSL_CIPHER) **sorted,
-                                             const char *rule_str);
+                                             const char *rule_str, CERT *c);
 void ssl_update_cache(SSL *s, int mode);
 int ssl_cipher_get_evp(const SSL_SESSION *s, const EVP_CIPHER **enc,
                        const EVP_MD **md, int *mac_pkey_type,
                        int *mac_secret_size, SSL_COMP **comp);
 int ssl_get_handshake_digest(int i, long *mask, const EVP_MD **md);
+int ssl_cipher_get_cert_index(const SSL_CIPHER *c);
+const SSL_CIPHER *ssl_get_cipher_by_char(SSL *ssl, const unsigned char *ptr);
+int ssl_cert_set0_chain(CERT *c, STACK_OF(X509) *chain);
+int ssl_cert_set1_chain(CERT *c, STACK_OF(X509) *chain);
+int ssl_cert_add0_chain_cert(CERT *c, X509 *x);
+int ssl_cert_add1_chain_cert(CERT *c, X509 *x);
+int ssl_cert_select_current(CERT *c, X509 *x);
+int ssl_cert_set_current(CERT *c, long arg);
+X509 *ssl_cert_get0_next_certificate(CERT *c, int first);
+void ssl_cert_set_cert_cb(CERT *c, int (*cb) (SSL *ssl, void *arg),
+                          void *arg);
+
 int ssl_verify_cert_chain(SSL *s, STACK_OF(X509) *sk);
+int ssl_add_cert_chain(SSL *s, CERT_PKEY *cpk, unsigned long *l);
+int ssl_build_cert_chain(CERT *c, X509_STORE *chain_store, int flags);
+int ssl_cert_set_cert_store(CERT *c, X509_STORE *store, int chain, int ref);
 int ssl_undefined_function(SSL *s);
 int ssl_undefined_void_function(void);
 int ssl_undefined_const_function(const SSL *s);
 CERT_PKEY *ssl_get_server_send_pkey(const SSL *s);
-X509 *ssl_get_server_send_cert(const SSL *);
+#  ifndef OPENSSL_NO_TLSEXT
+int ssl_get_server_cert_serverinfo(SSL *s, const unsigned char **serverinfo,
+                                   size_t *serverinfo_length);
+#  endif
 EVP_PKEY *ssl_get_sign_pkey(SSL *s, const SSL_CIPHER *c, const EVP_MD **pmd);
 int ssl_cert_type(X509 *x, EVP_PKEY *pkey);
 void ssl_set_cert_masks(CERT *c, const SSL_CIPHER *cipher);
@@ -962,7 +1173,7 @@ void ssl3_finish_mac(SSL *s, const unsigned char *buf, int len);
 int ssl3_enc(SSL *s, int send_data);
 int n_ssl3_mac(SSL *ssl, unsigned char *md, int send_data);
 void ssl3_free_digest_list(SSL *s);
-unsigned long ssl3_output_cert_chain(SSL *s, X509 *x);
+unsigned long ssl3_output_cert_chain(SSL *s, CERT_PKEY *cpk);
 SSL_CIPHER *ssl3_choose_cipher(SSL *ssl, STACK_OF(SSL_CIPHER) *clnt,
                                STACK_OF(SSL_CIPHER) *srvr);
 int ssl3_setup_buffers(SSL *s);
@@ -990,6 +1201,9 @@ void ssl3_record_sequence_update(unsigned char *seq);
 int ssl3_do_change_cipher_spec(SSL *ssl);
 long ssl3_default_timeout(void);
 
+void ssl3_set_handshake_header(SSL *s, int htype, unsigned long len);
+int ssl3_handshake_write(SSL *s);
+
 int ssl23_num_ciphers(void);
 const SSL_CIPHER *ssl23_get_cipher(unsigned int u);
 int ssl23_read(SSL *s, void *buf, int len);
@@ -1017,8 +1231,6 @@ int dtls1_write_app_data_bytes(SSL *s, int type, const void *buf, int len);
 int dtls1_write_bytes(SSL *s, int type, const void *buf, int len);
 
 int dtls1_send_change_cipher_spec(SSL *s, int a, int b);
-int dtls1_send_finished(SSL *s, int a, int b, const char *sender, int slen);
-unsigned long dtls1_output_cert_chain(SSL *s, X509 *x);
 int dtls1_read_failed(SSL *s, int code);
 int dtls1_buffer_message(SSL *s, int ccs);
 int dtls1_retransmit_message(SSL *s, unsigned short seq,
@@ -1065,9 +1277,6 @@ int ssl3_send_next_proto(SSL *s);
 #  endif
 
 int dtls1_client_hello(SSL *s);
-int dtls1_send_client_certificate(SSL *s);
-int dtls1_send_client_key_exchange(SSL *s);
-int dtls1_send_client_verify(SSL *s);
 
 /* some server-only functions */
 int ssl3_get_client_hello(SSL *s);
@@ -1076,7 +1285,6 @@ int ssl3_send_hello_request(SSL *s);
 int ssl3_send_server_key_exchange(SSL *s);
 int ssl3_send_certificate_request(SSL *s);
 int ssl3_send_server_done(SSL *s);
-int ssl3_check_client_hello(SSL *s);
 int ssl3_get_client_certificate(SSL *s);
 int ssl3_get_client_key_exchange(SSL *s);
 int ssl3_get_cert_verify(SSL *s);
@@ -1084,13 +1292,6 @@ int ssl3_get_cert_verify(SSL *s);
 int ssl3_get_next_proto(SSL *s);
 #  endif
 
-int dtls1_send_hello_request(SSL *s);
-int dtls1_send_server_hello(SSL *s);
-int dtls1_send_server_certificate(SSL *s);
-int dtls1_send_server_key_exchange(SSL *s);
-int dtls1_send_certificate_request(SSL *s);
-int dtls1_send_server_done(SSL *s);
-
 int ssl23_accept(SSL *s);
 int ssl23_connect(SSL *s);
 int ssl23_read_bytes(SSL *s, int n);
@@ -1115,7 +1316,6 @@ int dtls1_get_record(SSL *s);
 int do_dtls1_write(SSL *s, int type, const unsigned char *buf,
                    unsigned int len, int create_empty_fragement);
 int dtls1_dispatch_alert(SSL *s);
-int dtls1_enc(SSL *s, int snd);
 
 int ssl_init_wbio_buffer(SSL *s, int push);
 void ssl_free_wbio_buffer(SSL *s);
@@ -1146,22 +1346,33 @@ SSL_COMP *ssl3_comp_find(STACK_OF(SSL_COMP) *sk, int n);
 #  ifndef OPENSSL_NO_EC
 int tls1_ec_curve_id2nid(int curve_id);
 int tls1_ec_nid2curve_id(int nid);
+int tls1_check_curve(SSL *s, const unsigned char *p, size_t len);
+int tls1_shared_curve(SSL *s, int nmatch);
+int tls1_set_curves(unsigned char **pext, size_t *pextlen,
+                    int *curves, size_t ncurves);
+int tls1_set_curves_list(unsigned char **pext, size_t *pextlen,
+                         const char *str);
+#   ifndef OPENSSL_NO_ECDH
+int tls1_check_ec_tmp_key(SSL *s, unsigned long id);
+#   endif                       /* OPENSSL_NO_ECDH */
 #  endif                        /* OPENSSL_NO_EC */
 
 #  ifndef OPENSSL_NO_TLSEXT
+int tls1_shared_list(SSL *s,
+                     const unsigned char *l1, size_t l1len,
+                     const unsigned char *l2, size_t l2len, int nmatch);
 unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf,
-                                          unsigned char *limit);
+                                          unsigned char *limit, int *al);
 unsigned char *ssl_add_serverhello_tlsext(SSL *s, unsigned char *buf,
-                                          unsigned char *limit);
+                                          unsigned char *limit, int *al);
 int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **data,
-                                 unsigned char *d, int n, int *al);
+                                 unsigned char *d, int n);
+int tls1_set_server_sigalgs(SSL *s);
+int ssl_check_clienthello_tlsext_late(SSL *s);
 int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **data,
-                                 unsigned char *d, int n, int *al);
+                                 unsigned char *d, int n);
 int ssl_prepare_clienthello_tlsext(SSL *s);
 int ssl_prepare_serverhello_tlsext(SSL *s);
-int ssl_check_clienthello_tlsext_early(SSL *s);
-int ssl_check_clienthello_tlsext_late(SSL *s);
-int ssl_check_serverhello_tlsext(SSL *s);
 
 #   ifndef OPENSSL_NO_HEARTBEATS
 int tls1_heartbeat(SSL *s);
@@ -1183,6 +1394,12 @@ int tls12_get_sigandhash(unsigned char *p, const EVP_PKEY *pk,
 int tls12_get_sigid(const EVP_PKEY *pk);
 const EVP_MD *tls12_get_hash(unsigned char hash_alg);
 
+int tls1_set_sigalgs_list(CERT *c, const char *str, int client);
+int tls1_set_sigalgs(CERT *c, const int *salg, size_t salglen, int client);
+int tls1_check_chain(SSL *s, X509 *x, EVP_PKEY *pk, STACK_OF(X509) *chain,
+                     int idx);
+void tls1_set_cert_validity(SSL *s);
+
 #  endif
 EVP_MD_CTX *ssl_replace_hash(EVP_MD_CTX **hash, const EVP_MD *md);
 void ssl_clear_hash_ctx(EVP_MD_CTX **hash);
@@ -1195,8 +1412,12 @@ int ssl_add_clienthello_renegotiate_ext(SSL *s, unsigned char *p, int *len,
 int ssl_parse_clienthello_renegotiate_ext(SSL *s, unsigned char *d, int len,
                                           int *al);
 long ssl_get_algorithm2(SSL *s);
-int tls1_process_sigalgs(SSL *s, const unsigned char *data, int dsize);
-int tls12_get_req_sig_algs(SSL *s, unsigned char *p);
+int tls1_save_sigalgs(SSL *s, const unsigned char *data, int dsize);
+int tls1_process_sigalgs(SSL *s);
+size_t tls12_get_psigalgs(SSL *s, const unsigned char **psigs);
+int tls12_check_peer_sigalg(const EVP_MD **pmd, SSL *s,
+                            const unsigned char *sig, EVP_PKEY *pkey);
+void ssl_set_client_disabled(SSL *s);
 
 int ssl_add_clienthello_use_srtp_ext(SSL *s, unsigned char *p, int *len,
                                      int maxlen);
@@ -1234,6 +1455,19 @@ void tls_fips_digest_extra(const EVP_CIPHER_CTX *cipher_ctx,
 
 int srp_verify_server_param(SSL *s, int *al);
 
+/* t1_ext.c */
+
+void custom_ext_init(custom_ext_methods *meths);
+
+int custom_ext_parse(SSL *s, int server,
+                     unsigned int ext_type,
+                     const unsigned char *ext_data, size_t ext_size, int *al);
+int custom_ext_add(SSL *s, int server,
+                   unsigned char **pret, unsigned char *limit, int *al);
+
+int custom_exts_copy(custom_ext_methods *dst, const custom_ext_methods *src);
+void custom_exts_free(custom_ext_methods *exts);
+
 # else
 
 #  define ssl_init_wbio_buffer SSL_test_functions()->p_ssl_init_wbio_buffer
index daf15dd..b1b2318 100644 (file)
@@ -171,8 +171,22 @@ int SSL_use_RSAPrivateKey(SSL *ssl, RSA *rsa)
 static int ssl_set_pkey(CERT *c, EVP_PKEY *pkey)
 {
     int i;
-
-    i = ssl_cert_type(NULL, pkey);
+    /*
+     * Special case for DH: check two DH certificate types for a match. This
+     * means for DH certificates we must set the certificate first.
+     */
+    if (pkey->type == EVP_PKEY_DH) {
+        X509 *x;
+        i = -1;
+        x = c->pkeys[SSL_PKEY_DH_RSA].x509;
+        if (x && X509_check_private_key(x, pkey))
+            i = SSL_PKEY_DH_RSA;
+        x = c->pkeys[SSL_PKEY_DH_DSA].x509;
+        if (i == -1 && x && X509_check_private_key(x, pkey))
+            i = SSL_PKEY_DH_DSA;
+        ERR_clear_error();
+    } else
+        i = ssl_cert_type(NULL, pkey);
     if (i < 0) {
         SSLerr(SSL_F_SSL_SET_PKEY, SSL_R_UNKNOWN_CERTIFICATE_TYPE);
         return (0);
@@ -690,16 +704,13 @@ int SSL_CTX_use_certificate_chain_file(SSL_CTX *ctx, const char *file)
         int r;
         unsigned long err;
 
-        if (ctx->extra_certs != NULL) {
-            sk_X509_pop_free(ctx->extra_certs, X509_free);
-            ctx->extra_certs = NULL;
-        }
+        SSL_CTX_clear_chain_certs(ctx);
 
         while ((ca = PEM_read_bio_X509(in, NULL,
                                        ctx->default_passwd_callback,
                                        ctx->default_passwd_callback_userdata))
                != NULL) {
-            r = SSL_CTX_add_extra_chain_cert(ctx, ca);
+            r = SSL_CTX_add0_chain_cert(ctx, ca);
             if (!r) {
                 X509_free(ca);
                 ret = 0;
@@ -728,3 +739,270 @@ int SSL_CTX_use_certificate_chain_file(SSL_CTX *ctx, const char *file)
     return (ret);
 }
 #endif
+
+#ifndef OPENSSL_NO_TLSEXT
+static int serverinfo_find_extension(const unsigned char *serverinfo,
+                                     size_t serverinfo_length,
+                                     unsigned int extension_type,
+                                     const unsigned char **extension_data,
+                                     size_t *extension_length)
+{
+    *extension_data = NULL;
+    *extension_length = 0;
+    if (serverinfo == NULL || serverinfo_length == 0)
+        return 0;
+    for (;;) {
+        unsigned int type = 0;
+        size_t len = 0;
+
+        /* end of serverinfo */
+        if (serverinfo_length == 0)
+            return -1;          /* Extension not found */
+
+        /* read 2-byte type field */
+        if (serverinfo_length < 2)
+            return 0;           /* Error */
+        type = (serverinfo[0] << 8) + serverinfo[1];
+        serverinfo += 2;
+        serverinfo_length -= 2;
+
+        /* read 2-byte len field */
+        if (serverinfo_length < 2)
+            return 0;           /* Error */
+        len = (serverinfo[0] << 8) + serverinfo[1];
+        serverinfo += 2;
+        serverinfo_length -= 2;
+
+        if (len > serverinfo_length)
+            return 0;           /* Error */
+
+        if (type == extension_type) {
+            *extension_data = serverinfo;
+            *extension_length = len;
+            return 1;           /* Success */
+        }
+
+        serverinfo += len;
+        serverinfo_length -= len;
+    }
+    return 0;                   /* Error */
+}
+
+static int serverinfo_srv_parse_cb(SSL *s, unsigned int ext_type,
+                                   const unsigned char *in,
+                                   size_t inlen, int *al, void *arg)
+{
+
+    if (inlen != 0) {
+        *al = SSL_AD_DECODE_ERROR;
+        return 0;
+    }
+
+    return 1;
+}
+
+static int serverinfo_srv_add_cb(SSL *s, unsigned int ext_type,
+                                 const unsigned char **out, size_t *outlen,
+                                 int *al, void *arg)
+{
+    const unsigned char *serverinfo = NULL;
+    size_t serverinfo_length = 0;
+
+    /* Is there serverinfo data for the chosen server cert? */
+    if ((ssl_get_server_cert_serverinfo(s, &serverinfo,
+                                        &serverinfo_length)) != 0) {
+        /* Find the relevant extension from the serverinfo */
+        int retval = serverinfo_find_extension(serverinfo, serverinfo_length,
+                                               ext_type, out, outlen);
+        if (retval == 0)
+            return 0;           /* Error */
+        if (retval == -1)
+            return -1;          /* No extension found, don't send extension */
+        return 1;               /* Send extension */
+    }
+    return -1;                  /* No serverinfo data found, don't send
+                                 * extension */
+}
+
+/*
+ * With a NULL context, this function just checks that the serverinfo data
+ * parses correctly.  With a non-NULL context, it registers callbacks for
+ * the included extensions.
+ */
+static int serverinfo_process_buffer(const unsigned char *serverinfo,
+                                     size_t serverinfo_length, SSL_CTX *ctx)
+{
+    if (serverinfo == NULL || serverinfo_length == 0)
+        return 0;
+    for (;;) {
+        unsigned int ext_type = 0;
+        size_t len = 0;
+
+        /* end of serverinfo */
+        if (serverinfo_length == 0)
+            return 1;
+
+        /* read 2-byte type field */
+        if (serverinfo_length < 2)
+            return 0;
+        /* FIXME: check for types we understand explicitly? */
+
+        /* Register callbacks for extensions */
+        ext_type = (serverinfo[0] << 8) + serverinfo[1];
+        if (ctx && !SSL_CTX_add_server_custom_ext(ctx, ext_type,
+                                                  serverinfo_srv_add_cb,
+                                                  NULL, NULL,
+                                                  serverinfo_srv_parse_cb,
+                                                  NULL))
+            return 0;
+
+        serverinfo += 2;
+        serverinfo_length -= 2;
+
+        /* read 2-byte len field */
+        if (serverinfo_length < 2)
+            return 0;
+        len = (serverinfo[0] << 8) + serverinfo[1];
+        serverinfo += 2;
+        serverinfo_length -= 2;
+
+        if (len > serverinfo_length)
+            return 0;
+
+        serverinfo += len;
+        serverinfo_length -= len;
+    }
+}
+
+int SSL_CTX_use_serverinfo(SSL_CTX *ctx, const unsigned char *serverinfo,
+                           size_t serverinfo_length)
+{
+    if (ctx == NULL || serverinfo == NULL || serverinfo_length == 0) {
+        SSLerr(SSL_F_SSL_CTX_USE_SERVERINFO, ERR_R_PASSED_NULL_PARAMETER);
+        return 0;
+    }
+    if (!serverinfo_process_buffer(serverinfo, serverinfo_length, NULL)) {
+        SSLerr(SSL_F_SSL_CTX_USE_SERVERINFO, SSL_R_INVALID_SERVERINFO_DATA);
+        return 0;
+    }
+    if (!ssl_cert_inst(&ctx->cert)) {
+        SSLerr(SSL_F_SSL_CTX_USE_SERVERINFO, ERR_R_MALLOC_FAILURE);
+        return 0;
+    }
+    if (ctx->cert->key == NULL) {
+        SSLerr(SSL_F_SSL_CTX_USE_SERVERINFO, ERR_R_INTERNAL_ERROR);
+        return 0;
+    }
+    ctx->cert->key->serverinfo = OPENSSL_realloc(ctx->cert->key->serverinfo,
+                                                 serverinfo_length);
+    if (ctx->cert->key->serverinfo == NULL) {
+        SSLerr(SSL_F_SSL_CTX_USE_SERVERINFO, ERR_R_MALLOC_FAILURE);
+        return 0;
+    }
+    memcpy(ctx->cert->key->serverinfo, serverinfo, serverinfo_length);
+    ctx->cert->key->serverinfo_length = serverinfo_length;
+
+    /*
+     * Now that the serverinfo is validated and stored, go ahead and
+     * register callbacks.
+     */
+    if (!serverinfo_process_buffer(serverinfo, serverinfo_length, ctx)) {
+        SSLerr(SSL_F_SSL_CTX_USE_SERVERINFO, SSL_R_INVALID_SERVERINFO_DATA);
+        return 0;
+    }
+    return 1;
+}
+
+# ifndef OPENSSL_NO_STDIO
+int SSL_CTX_use_serverinfo_file(SSL_CTX *ctx, const char *file)
+{
+    unsigned char *serverinfo = NULL;
+    size_t serverinfo_length = 0;
+    unsigned char *extension = 0;
+    long extension_length = 0;
+    char *name = NULL;
+    char *header = NULL;
+    char namePrefix[] = "SERVERINFO FOR ";
+    int ret = 0;
+    BIO *bin = NULL;
+    size_t num_extensions = 0;
+
+    if (ctx == NULL || file == NULL) {
+        SSLerr(SSL_F_SSL_CTX_USE_SERVERINFO_FILE,
+               ERR_R_PASSED_NULL_PARAMETER);
+        goto end;
+    }
+
+    bin = BIO_new(BIO_s_file_internal());
+    if (bin == NULL) {
+        SSLerr(SSL_F_SSL_CTX_USE_SERVERINFO_FILE, ERR_R_BUF_LIB);
+        goto end;
+    }
+    if (BIO_read_filename(bin, file) <= 0) {
+        SSLerr(SSL_F_SSL_CTX_USE_SERVERINFO_FILE, ERR_R_SYS_LIB);
+        goto end;
+    }
+
+    for (num_extensions = 0;; num_extensions++) {
+        if (PEM_read_bio(bin, &name, &header, &extension, &extension_length)
+            == 0) {
+            /*
+             * There must be at least one extension in this file
+             */
+            if (num_extensions == 0) {
+                SSLerr(SSL_F_SSL_CTX_USE_SERVERINFO_FILE,
+                       SSL_R_NO_PEM_EXTENSIONS);
+                goto end;
+            } else              /* End of file, we're done */
+                break;
+        }
+        /* Check that PEM name starts with "BEGIN SERVERINFO FOR " */
+        if (strlen(name) < strlen(namePrefix)) {
+            SSLerr(SSL_F_SSL_CTX_USE_SERVERINFO_FILE,
+                   SSL_R_PEM_NAME_TOO_SHORT);
+            goto end;
+        }
+        if (strncmp(name, namePrefix, strlen(namePrefix)) != 0) {
+            SSLerr(SSL_F_SSL_CTX_USE_SERVERINFO_FILE,
+                   SSL_R_PEM_NAME_BAD_PREFIX);
+            goto end;
+        }
+        /*
+         * Check that the decoded PEM data is plausible (valid length field)
+         */
+        if (extension_length < 4
+            || (extension[2] << 8) + extension[3] != extension_length - 4) {
+            SSLerr(SSL_F_SSL_CTX_USE_SERVERINFO_FILE, SSL_R_BAD_DATA);
+            goto end;
+        }
+        /* Append the decoded extension to the serverinfo buffer */
+        serverinfo =
+            OPENSSL_realloc(serverinfo, serverinfo_length + extension_length);
+        if (serverinfo == NULL) {
+            SSLerr(SSL_F_SSL_CTX_USE_SERVERINFO_FILE, ERR_R_MALLOC_FAILURE);
+            goto end;
+        }
+        memcpy(serverinfo + serverinfo_length, extension, extension_length);
+        serverinfo_length += extension_length;
+
+        OPENSSL_free(name);
+        name = NULL;
+        OPENSSL_free(header);
+        header = NULL;
+        OPENSSL_free(extension);
+        extension = NULL;
+    }
+
+    ret = SSL_CTX_use_serverinfo(ctx, serverinfo, serverinfo_length);
+ end:
+    /* SSL_CTX_use_serverinfo makes a local copy of the serverinfo. */
+    OPENSSL_free(name);
+    OPENSSL_free(header);
+    OPENSSL_free(extension);
+    OPENSSL_free(serverinfo);
+    if (bin != NULL)
+        BIO_free(bin);
+    return ret;
+}
+# endif                         /* OPENSSL_NO_STDIO */
+#endif                          /* OPENSSL_NO_TLSEXT */
index 1ad9dc7..07e7379 100644 (file)
@@ -444,6 +444,9 @@ int ssl_get_new_session(SSL *s, int session)
         } else if (s->version == DTLS1_VERSION) {
             ss->ssl_version = DTLS1_VERSION;
             ss->session_id_length = SSL3_SSL_SESSION_ID_LENGTH;
+        } else if (s->version == DTLS1_2_VERSION) {
+            ss->ssl_version = DTLS1_2_VERSION;
+            ss->session_id_length = SSL3_SSL_SESSION_ID_LENGTH;
         } else {
             SSLerr(SSL_F_SSL_GET_NEW_SESSION, SSL_R_UNSUPPORTED_SSL_VERSION);
             SSL_SESSION_free(ss);
@@ -519,38 +522,6 @@ int ssl_get_new_session(SSL *s, int session)
                 return 0;
             }
         }
-# ifndef OPENSSL_NO_EC
-        if (s->tlsext_ecpointformatlist) {
-            if (ss->tlsext_ecpointformatlist != NULL)
-                OPENSSL_free(ss->tlsext_ecpointformatlist);
-            if ((ss->tlsext_ecpointformatlist =
-                 OPENSSL_malloc(s->tlsext_ecpointformatlist_length)) ==
-                NULL) {
-                SSLerr(SSL_F_SSL_GET_NEW_SESSION, ERR_R_MALLOC_FAILURE);
-                SSL_SESSION_free(ss);
-                return 0;
-            }
-            ss->tlsext_ecpointformatlist_length =
-                s->tlsext_ecpointformatlist_length;
-            memcpy(ss->tlsext_ecpointformatlist, s->tlsext_ecpointformatlist,
-                   s->tlsext_ecpointformatlist_length);
-        }
-        if (s->tlsext_ellipticcurvelist) {
-            if (ss->tlsext_ellipticcurvelist != NULL)
-                OPENSSL_free(ss->tlsext_ellipticcurvelist);
-            if ((ss->tlsext_ellipticcurvelist =
-                 OPENSSL_malloc(s->tlsext_ellipticcurvelist_length)) ==
-                NULL) {
-                SSLerr(SSL_F_SSL_GET_NEW_SESSION, ERR_R_MALLOC_FAILURE);
-                SSL_SESSION_free(ss);
-                return 0;
-            }
-            ss->tlsext_ellipticcurvelist_length =
-                s->tlsext_ellipticcurvelist_length;
-            memcpy(ss->tlsext_ellipticcurvelist, s->tlsext_ellipticcurvelist,
-                   s->tlsext_ellipticcurvelist_length);
-        }
-# endif
 #endif
     } else {
         ss->session_id_length = 0;
index bd67dc7..45308d8 100644 (file)
@@ -124,6 +124,8 @@ int SSL_SESSION_print(BIO *bp, const SSL_SESSION *x)
         s = "TLSv1";
     else if (x->ssl_version == DTLS1_VERSION)
         s = "DTLSv1";
+    else if (x->ssl_version == DTLS1_2_VERSION)
+        s = "DTLSv1.2";
     else if (x->ssl_version == DTLS1_BAD_VER)
         s = "DTLSv1-bad";
     else
index 6a0c293..6737adf 100644 (file)
@@ -298,6 +298,362 @@ static int MS_CALLBACK ssl_srp_server_param_cb(SSL *s, int *ad, void *arg)
 static BIO *bio_err = NULL;
 static BIO *bio_stdout = NULL;
 
+static const char *alpn_client;
+static const char *alpn_server;
+static const char *alpn_expected;
+static unsigned char *alpn_selected;
+
+/*-
+ * next_protos_parse parses a comma separated list of strings into a string
+ * in a format suitable for passing to SSL_CTX_set_next_protos_advertised.
+ *   outlen: (output) set to the length of the resulting buffer on success.
+ *   err: (maybe NULL) on failure, an error message line is written to this BIO.
+ *   in: a NUL terminated string like "abc,def,ghi"
+ *
+ *   returns: a malloced buffer or NULL on failure.
+ */
+static unsigned char *next_protos_parse(unsigned short *outlen,
+                                        const char *in)
+{
+    size_t len;
+    unsigned char *out;
+    size_t i, start = 0;
+
+    len = strlen(in);
+    if (len >= 65535)
+        return NULL;
+
+    out = OPENSSL_malloc(strlen(in) + 1);
+    if (!out)
+        return NULL;
+
+    for (i = 0; i <= len; ++i) {
+        if (i == len || in[i] == ',') {
+            if (i - start > 255) {
+                OPENSSL_free(out);
+                return NULL;
+            }
+            out[start] = i - start;
+            start = i + 1;
+        } else
+            out[i + 1] = in[i];
+    }
+
+    *outlen = len + 1;
+    return out;
+}
+
+static int cb_server_alpn(SSL *s, const unsigned char **out,
+                          unsigned char *outlen, const unsigned char *in,
+                          unsigned int inlen, void *arg)
+{
+    unsigned char *protos;
+    unsigned short protos_len;
+
+    protos = next_protos_parse(&protos_len, alpn_server);
+    if (protos == NULL) {
+        fprintf(stderr, "failed to parser ALPN server protocol string: %s\n",
+                alpn_server);
+        abort();
+    }
+
+    if (SSL_select_next_proto
+        ((unsigned char **)out, outlen, protos, protos_len, in,
+         inlen) != OPENSSL_NPN_NEGOTIATED) {
+        OPENSSL_free(protos);
+        return SSL_TLSEXT_ERR_NOACK;
+    }
+
+    /*
+     * Make a copy of the selected protocol which will be freed in
+     * verify_alpn.
+     */
+    alpn_selected = OPENSSL_malloc(*outlen);
+    memcpy(alpn_selected, *out, *outlen);
+    *out = alpn_selected;
+
+    OPENSSL_free(protos);
+    return SSL_TLSEXT_ERR_OK;
+}
+
+static int verify_alpn(SSL *client, SSL *server)
+{
+    const unsigned char *client_proto, *server_proto;
+    unsigned int client_proto_len = 0, server_proto_len = 0;
+    SSL_get0_alpn_selected(client, &client_proto, &client_proto_len);
+    SSL_get0_alpn_selected(server, &server_proto, &server_proto_len);
+
+    if (alpn_selected != NULL) {
+        OPENSSL_free(alpn_selected);
+        alpn_selected = NULL;
+    }
+
+    if (client_proto_len != server_proto_len ||
+        memcmp(client_proto, server_proto, client_proto_len) != 0) {
+        BIO_printf(bio_stdout, "ALPN selected protocols differ!\n");
+        goto err;
+    }
+
+    if (client_proto_len > 0 && alpn_expected == NULL) {
+        BIO_printf(bio_stdout, "ALPN unexpectedly negotiated\n");
+        goto err;
+    }
+
+    if (alpn_expected != NULL &&
+        (client_proto_len != strlen(alpn_expected) ||
+         memcmp(client_proto, alpn_expected, client_proto_len) != 0)) {
+        BIO_printf(bio_stdout,
+                   "ALPN selected protocols not equal to expected protocol: %s\n",
+                   alpn_expected);
+        goto err;
+    }
+
+    return 0;
+
+ err:
+    BIO_printf(bio_stdout, "ALPN results: client: '");
+    BIO_write(bio_stdout, client_proto, client_proto_len);
+    BIO_printf(bio_stdout, "', server: '");
+    BIO_write(bio_stdout, server_proto, server_proto_len);
+    BIO_printf(bio_stdout, "'\n");
+    BIO_printf(bio_stdout, "ALPN configured: client: '%s', server: '%s'\n",
+               alpn_client, alpn_server);
+    return -1;
+}
+
+#define SCT_EXT_TYPE 18
+
+/*
+ * WARNING : below extension types are *NOT* IETF assigned, and could
+ * conflict if these types are reassigned and handled specially by OpenSSL
+ * in the future
+ */
+#define TACK_EXT_TYPE 62208
+#define CUSTOM_EXT_TYPE_0 1000
+#define CUSTOM_EXT_TYPE_1 1001
+#define CUSTOM_EXT_TYPE_2 1002
+#define CUSTOM_EXT_TYPE_3 1003
+
+const char custom_ext_cli_string[] = "abc";
+const char custom_ext_srv_string[] = "defg";
+
+/* These set from cmdline */
+char *serverinfo_file = NULL;
+int serverinfo_sct = 0;
+int serverinfo_tack = 0;
+
+/* These set based on extension callbacks */
+int serverinfo_sct_seen = 0;
+int serverinfo_tack_seen = 0;
+int serverinfo_other_seen = 0;
+
+/* This set from cmdline */
+int custom_ext = 0;
+
+/* This set based on extension callbacks */
+int custom_ext_error = 0;
+
+static int serverinfo_cli_parse_cb(SSL *s, unsigned int ext_type,
+                                   const unsigned char *in, size_t inlen,
+                                   int *al, void *arg)
+{
+    if (ext_type == SCT_EXT_TYPE)
+        serverinfo_sct_seen++;
+    else if (ext_type == TACK_EXT_TYPE)
+        serverinfo_tack_seen++;
+    else
+        serverinfo_other_seen++;
+    return 1;
+}
+
+static int verify_serverinfo()
+{
+    if (serverinfo_sct != serverinfo_sct_seen)
+        return -1;
+    if (serverinfo_tack != serverinfo_tack_seen)
+        return -1;
+    if (serverinfo_other_seen)
+        return -1;
+    return 0;
+}
+
+/*-
+ * Four test cases for custom extensions:
+ * 0 - no ClientHello extension or ServerHello response
+ * 1 - ClientHello with "abc", no response
+ * 2 - ClientHello with "abc", empty response
+ * 3 - ClientHello with "abc", "defg" response
+ */
+
+static int custom_ext_0_cli_add_cb(SSL *s, unsigned int ext_type,
+                                   const unsigned char **out,
+                                   size_t *outlen, int *al, void *arg)
+{
+    if (ext_type != CUSTOM_EXT_TYPE_0)
+        custom_ext_error = 1;
+    return 0;                   /* Don't send an extension */
+}
+
+static int custom_ext_0_cli_parse_cb(SSL *s, unsigned int ext_type,
+                                     const unsigned char *in,
+                                     size_t inlen, int *al, void *arg)
+{
+    return 1;
+}
+
+static int custom_ext_1_cli_add_cb(SSL *s, unsigned int ext_type,
+                                   const unsigned char **out,
+                                   size_t *outlen, int *al, void *arg)
+{
+    if (ext_type != CUSTOM_EXT_TYPE_1)
+        custom_ext_error = 1;
+    *out = (const unsigned char *)custom_ext_cli_string;
+    *outlen = strlen(custom_ext_cli_string);
+    return 1;                   /* Send "abc" */
+}
+
+static int custom_ext_1_cli_parse_cb(SSL *s, unsigned int ext_type,
+                                     const unsigned char *in,
+                                     size_t inlen, int *al, void *arg)
+{
+    return 1;
+}
+
+static int custom_ext_2_cli_add_cb(SSL *s, unsigned int ext_type,
+                                   const unsigned char **out,
+                                   size_t *outlen, int *al, void *arg)
+{
+    if (ext_type != CUSTOM_EXT_TYPE_2)
+        custom_ext_error = 1;
+    *out = (const unsigned char *)custom_ext_cli_string;
+    *outlen = strlen(custom_ext_cli_string);
+    return 1;                   /* Send "abc" */
+}
+
+static int custom_ext_2_cli_parse_cb(SSL *s, unsigned int ext_type,
+                                     const unsigned char *in,
+                                     size_t inlen, int *al, void *arg)
+{
+    if (ext_type != CUSTOM_EXT_TYPE_2)
+        custom_ext_error = 1;
+    if (inlen != 0)
+        custom_ext_error = 1;   /* Should be empty response */
+    return 1;
+}
+
+static int custom_ext_3_cli_add_cb(SSL *s, unsigned int ext_type,
+                                   const unsigned char **out,
+                                   size_t *outlen, int *al, void *arg)
+{
+    if (ext_type != CUSTOM_EXT_TYPE_3)
+        custom_ext_error = 1;
+    *out = (const unsigned char *)custom_ext_cli_string;
+    *outlen = strlen(custom_ext_cli_string);
+    return 1;                   /* Send "abc" */
+}
+
+static int custom_ext_3_cli_parse_cb(SSL *s, unsigned int ext_type,
+                                     const unsigned char *in,
+                                     size_t inlen, int *al, void *arg)
+{
+    if (ext_type != CUSTOM_EXT_TYPE_3)
+        custom_ext_error = 1;
+    if (inlen != strlen(custom_ext_srv_string))
+        custom_ext_error = 1;
+    if (memcmp(custom_ext_srv_string, in, inlen) != 0)
+        custom_ext_error = 1;   /* Check for "defg" */
+    return 1;
+}
+
+/*
+ * custom_ext_0_cli_add_cb returns 0 - the server won't receive a callback
+ * for this extension
+ */
+static int custom_ext_0_srv_parse_cb(SSL *s, unsigned int ext_type,
+                                     const unsigned char *in,
+                                     size_t inlen, int *al, void *arg)
+{
+    custom_ext_error = 1;
+    return 1;
+}
+
+/* 'add' callbacks are only called if the 'parse' callback is called */
+static int custom_ext_0_srv_add_cb(SSL *s, unsigned int ext_type,
+                                   const unsigned char **out,
+                                   size_t *outlen, int *al, void *arg)
+{
+    /* Error: should not have been called */
+    custom_ext_error = 1;
+    return 0;                   /* Don't send an extension */
+}
+
+static int custom_ext_1_srv_parse_cb(SSL *s, unsigned int ext_type,
+                                     const unsigned char *in,
+                                     size_t inlen, int *al, void *arg)
+{
+    if (ext_type != CUSTOM_EXT_TYPE_1)
+        custom_ext_error = 1;
+    /* Check for "abc" */
+    if (inlen != strlen(custom_ext_cli_string))
+        custom_ext_error = 1;
+    if (memcmp(in, custom_ext_cli_string, inlen) != 0)
+        custom_ext_error = 1;
+    return 1;
+}
+
+static int custom_ext_1_srv_add_cb(SSL *s, unsigned int ext_type,
+                                   const unsigned char **out,
+                                   size_t *outlen, int *al, void *arg)
+{
+    return 0;                   /* Don't send an extension */
+}
+
+static int custom_ext_2_srv_parse_cb(SSL *s, unsigned int ext_type,
+                                     const unsigned char *in,
+                                     size_t inlen, int *al, void *arg)
+{
+    if (ext_type != CUSTOM_EXT_TYPE_2)
+        custom_ext_error = 1;
+    /* Check for "abc" */
+    if (inlen != strlen(custom_ext_cli_string))
+        custom_ext_error = 1;
+    if (memcmp(in, custom_ext_cli_string, inlen) != 0)
+        custom_ext_error = 1;
+    return 1;
+}
+
+static int custom_ext_2_srv_add_cb(SSL *s, unsigned int ext_type,
+                                   const unsigned char **out,
+                                   size_t *outlen, int *al, void *arg)
+{
+    *out = NULL;
+    *outlen = 0;
+    return 1;                   /* Send empty extension */
+}
+
+static int custom_ext_3_srv_parse_cb(SSL *s, unsigned int ext_type,
+                                     const unsigned char *in,
+                                     size_t inlen, int *al, void *arg)
+{
+    if (ext_type != CUSTOM_EXT_TYPE_3)
+        custom_ext_error = 1;
+    /* Check for "abc" */
+    if (inlen != strlen(custom_ext_cli_string))
+        custom_ext_error = 1;
+    if (memcmp(in, custom_ext_cli_string, inlen) != 0)
+        custom_ext_error = 1;
+    return 1;
+}
+
+static int custom_ext_3_srv_add_cb(SSL *s, unsigned int ext_type,
+                                   const unsigned char **out,
+                                   size_t *outlen, int *al, void *arg)
+{
+    *out = (const unsigned char *)custom_ext_srv_string;
+    *outlen = strlen(custom_ext_srv_string);
+    return 1;                   /* Send "defg" */
+}
+
 static char *cipher = NULL;
 static int verbose = 0;
 static int debug = 0;
@@ -327,7 +683,7 @@ static void sv_usage(void)
     fprintf(stderr, " -proxy        - allow proxy certificates\n");
     fprintf(stderr, " -proxy_auth <val> - set proxy policy rights\n");
     fprintf(stderr,
-            " -proxy_cond <val> - experssion to test proxy policy rights\n");
+            " -proxy_cond <val> - expression to test proxy policy rights\n");
     fprintf(stderr, " -v            - more output\n");
     fprintf(stderr, " -d            - debug output\n");
     fprintf(stderr, " -reuse        - use session-id reuse\n");
@@ -362,6 +718,10 @@ static void sv_usage(void)
 #ifndef OPENSSL_NO_TLS1
     fprintf(stderr, " -tls1         - use TLSv1\n");
 #endif
+#ifndef OPENSSL_NO_DTLS
+    fprintf(stderr, " -dtls1        - use DTLSv1\n");
+    fprintf(stderr, " -dtls12       - use DTLSv1.2\n");
+#endif
     fprintf(stderr, " -CApath arg   - PEM format directory of CA's\n");
     fprintf(stderr, " -CAfile arg   - PEM format file of CA's\n");
     fprintf(stderr, " -cert arg     - Server certificate file\n");
@@ -387,6 +747,16 @@ static void sv_usage(void)
             " -test_cipherlist - Verifies the order of the ssl cipher lists.\n"
             "                    When this option is requested, the cipherlist\n"
             "                    tests are run instead of handshake tests.\n");
+    fprintf(stderr, " -serverinfo_file file - have server use this file\n");
+    fprintf(stderr, " -serverinfo_sct  - have client offer and expect SCT\n");
+    fprintf(stderr,
+            " -serverinfo_tack - have client offer and expect TACK\n");
+    fprintf(stderr,
+            " -custom_ext - try various custom extension callbacks\n");
+    fprintf(stderr, " -alpn_client <string> - have client side offer ALPN\n");
+    fprintf(stderr, " -alpn_server <string> - have server side offer ALPN\n");
+    fprintf(stderr,
+            " -alpn_expected <string> - the ALPN protocol that should be negotiated\n");
 }
 
 static void print_details(SSL *c_ssl, const char *prefix)
@@ -513,7 +883,7 @@ int main(int argc, char *argv[])
     int badop = 0;
     int bio_pair = 0;
     int force = 0;
-    int tls1 = 0, ssl2 = 0, ssl3 = 0, ret = 1;
+    int dtls1 = 0, dtls12 = 0, tls1 = 0, ssl2 = 0, ssl3 = 0, ret = 1;
     int client_auth = 0;
     int server_auth = 0, i;
     struct app_verify_arg app_verify_arg =
@@ -549,8 +919,8 @@ int main(int argc, char *argv[])
     int no_psk = 0;
     int print_time = 0;
     clock_t s_time = 0, c_time = 0;
-    int comp = 0;
 #ifndef OPENSSL_NO_COMP
+    int comp = 0;
     COMP_METHOD *cm = NULL;
     STACK_OF(SSL_COMP) *ssl_comp_methods = NULL;
 #endif
@@ -592,7 +962,7 @@ int main(int argc, char *argv[])
             fips_mode = 1;
 #else
             fprintf(stderr,
-                    "not compiled with FIPS support, so exitting without running.\n");
+                    "not compiled with FIPS support, so exiting without running.\n");
             EXIT(0);
 #endif
         } else if (strcmp(*argv, "-server_auth") == 0)
@@ -673,6 +1043,16 @@ int main(int argc, char *argv[])
             no_protocol = 1;
 #endif
             ssl3 = 1;
+        } else if (strcmp(*argv, "-dtls1") == 0) {
+#ifdef OPENSSL_NO_DTLS
+            no_protocol = 1;
+#endif
+            dtls1 = 1;
+        } else if (strcmp(*argv, "-dtls12") == 0) {
+#ifdef OPENSSL_NO_DTLS
+            no_protocol = 1;
+#endif
+            dtls12 = 1;
         } else if (strncmp(*argv, "-num", 4) == 0) {
             if (--argc < 1)
                 goto bad;
@@ -732,11 +1112,15 @@ int main(int argc, char *argv[])
             force = 1;
         } else if (strcmp(*argv, "-time") == 0) {
             print_time = 1;
-        } else if (strcmp(*argv, "-zlib") == 0) {
+        }
+#ifndef OPENSSL_NO_COMP
+        else if (strcmp(*argv, "-zlib") == 0) {
             comp = COMP_ZLIB;
         } else if (strcmp(*argv, "-rle") == 0) {
             comp = COMP_RLE;
-        } else if (strcmp(*argv, "-named_curve") == 0) {
+        }
+#endif
+        else if (strcmp(*argv, "-named_curve") == 0) {
             if (--argc < 1)
                 goto bad;
 #ifndef OPENSSL_NO_ECDH
@@ -752,6 +1136,28 @@ int main(int argc, char *argv[])
             app_verify_arg.allow_proxy_certs = 1;
         } else if (strcmp(*argv, "-test_cipherlist") == 0) {
             test_cipherlist = 1;
+        } else if (strcmp(*argv, "-serverinfo_sct") == 0) {
+            serverinfo_sct = 1;
+        } else if (strcmp(*argv, "-serverinfo_tack") == 0) {
+            serverinfo_tack = 1;
+        } else if (strcmp(*argv, "-serverinfo_file") == 0) {
+            if (--argc < 1)
+                goto bad;
+            serverinfo_file = *(++argv);
+        } else if (strcmp(*argv, "-custom_ext") == 0) {
+            custom_ext = 1;
+        } else if (strcmp(*argv, "-alpn_client") == 0) {
+            if (--argc < 1)
+                goto bad;
+            alpn_client = *(++argv);
+        } else if (strcmp(*argv, "-alpn_server") == 0) {
+            if (--argc < 1)
+                goto bad;
+            alpn_server = *(++argv);
+        } else if (strcmp(*argv, "-alpn_expected") == 0) {
+            if (--argc < 1)
+                goto bad;
+            alpn_expected = *(++argv);
         } else {
             fprintf(stderr, "unknown option %s\n", *argv);
             badop = 1;
@@ -782,8 +1188,8 @@ int main(int argc, char *argv[])
         goto end;
     }
 
-    if (ssl2 + ssl3 + tls1 > 1) {
-        fprintf(stderr, "At most one of -ssl2, -ssl3, or -tls1 should "
+    if (ssl2 + ssl3 + tls1 + dtls1 + dtls12 > 1) {
+        fprintf(stderr, "At most one of -ssl2, -ssl3, -tls1, -dtls1 or -dtls12 should "
                 "be requested.\n");
         EXIT(1);
     }
@@ -800,10 +1206,10 @@ int main(int argc, char *argv[])
         goto end;
     }
 
-    if (!ssl2 && !ssl3 && !tls1 && number > 1 && !reuse && !force) {
+    if (!ssl2 && !ssl3 && !tls1 && !dtls1 && !dtls12 && number > 1 && !reuse && !force) {
         fprintf(stderr, "This case cannot work.  Use -f to perform "
                 "the test anyway (and\n-d to see what happens), "
-                "or add one of -ssl2, -ssl3, -tls1, -reuse\n"
+                "or add one of ssl2, -ssl3, -tls1, -dtls1, -dtls12, -reuse\n"
                 "to avoid protocol mismatch.\n");
         EXIT(1);
     }
@@ -881,6 +1287,13 @@ int main(int argc, char *argv[])
         meth = SSLv3_method();
     else
 #endif
+#ifndef OPENSSL_NO_DTLS
+    if (dtls1)
+        meth = DTLSv1_method();
+    else if (dtls12)
+        meth = DTLSv1_2_method();
+    else
+#endif
 #ifndef OPENSSL_NO_TLS1
     if (tls1)
         meth = TLSv1_method();
@@ -928,12 +1341,9 @@ int main(int argc, char *argv[])
                 BIO_printf(bio_err, "unknown curve name (%s)\n", named_curve);
                 goto end;
             }
-        } else
-# ifdef OPENSSL_NO_EC2M
+        } else {
             nid = NID_X9_62_prime256v1;
-# else
-            nid = NID_sect163r2;
-# endif
+        }
 
         ecdh = EC_KEY_new_by_curve_name(nid);
         if (ecdh == NULL) {
@@ -1055,6 +1465,72 @@ int main(int argc, char *argv[])
     }
 #endif
 
+    if (serverinfo_sct)
+        SSL_CTX_add_client_custom_ext(c_ctx, SCT_EXT_TYPE,
+                                      NULL, NULL, NULL,
+                                      serverinfo_cli_parse_cb, NULL);
+    if (serverinfo_tack)
+        SSL_CTX_add_client_custom_ext(c_ctx, TACK_EXT_TYPE,
+                                      NULL, NULL, NULL,
+                                      serverinfo_cli_parse_cb, NULL);
+
+    if (serverinfo_file)
+        if (!SSL_CTX_use_serverinfo_file(s_ctx, serverinfo_file)) {
+            BIO_printf(bio_err, "missing serverinfo file\n");
+            goto end;
+        }
+
+    if (custom_ext) {
+        SSL_CTX_add_client_custom_ext(c_ctx, CUSTOM_EXT_TYPE_0,
+                                      custom_ext_0_cli_add_cb,
+                                      NULL, NULL,
+                                      custom_ext_0_cli_parse_cb, NULL);
+        SSL_CTX_add_client_custom_ext(c_ctx, CUSTOM_EXT_TYPE_1,
+                                      custom_ext_1_cli_add_cb,
+                                      NULL, NULL,
+                                      custom_ext_1_cli_parse_cb, NULL);
+        SSL_CTX_add_client_custom_ext(c_ctx, CUSTOM_EXT_TYPE_2,
+                                      custom_ext_2_cli_add_cb,
+                                      NULL, NULL,
+                                      custom_ext_2_cli_parse_cb, NULL);
+        SSL_CTX_add_client_custom_ext(c_ctx, CUSTOM_EXT_TYPE_3,
+                                      custom_ext_3_cli_add_cb,
+                                      NULL, NULL,
+                                      custom_ext_3_cli_parse_cb, NULL);
+
+        SSL_CTX_add_server_custom_ext(s_ctx, CUSTOM_EXT_TYPE_0,
+                                      custom_ext_0_srv_add_cb,
+                                      NULL, NULL,
+                                      custom_ext_0_srv_parse_cb, NULL);
+        SSL_CTX_add_server_custom_ext(s_ctx, CUSTOM_EXT_TYPE_1,
+                                      custom_ext_1_srv_add_cb,
+                                      NULL, NULL,
+                                      custom_ext_1_srv_parse_cb, NULL);
+        SSL_CTX_add_server_custom_ext(s_ctx, CUSTOM_EXT_TYPE_2,
+                                      custom_ext_2_srv_add_cb,
+                                      NULL, NULL,
+                                      custom_ext_2_srv_parse_cb, NULL);
+        SSL_CTX_add_server_custom_ext(s_ctx, CUSTOM_EXT_TYPE_3,
+                                      custom_ext_3_srv_add_cb,
+                                      NULL, NULL,
+                                      custom_ext_3_srv_parse_cb, NULL);
+    }
+
+    if (alpn_server)
+        SSL_CTX_set_alpn_select_cb(s_ctx, cb_server_alpn, NULL);
+
+    if (alpn_client) {
+        unsigned short alpn_len;
+        unsigned char *alpn = next_protos_parse(&alpn_len, alpn_client);
+
+        if (alpn == NULL) {
+            BIO_printf(bio_err, "Error parsing -alpn_client argument\n");
+            goto end;
+        }
+        SSL_CTX_set_alpn_protos(c_ctx, alpn, alpn_len);
+        OPENSSL_free(alpn);
+    }
+
     c_ssl = SSL_new(c_ctx);
     s_ssl = SSL_new(s_ctx);
 
@@ -1479,6 +1955,21 @@ int doit_biopair(SSL *s_ssl, SSL *c_ssl, long count,
 
     if (verbose)
         print_details(c_ssl, "DONE via BIO pair: ");
+
+    if (verify_serverinfo() < 0) {
+        ret = 1;
+        goto err;
+    }
+    if (verify_alpn(c_ssl, s_ssl) < 0) {
+        ret = 1;
+        goto err;
+    }
+
+    if (custom_ext_error) {
+        ret = 1;
+        goto err;
+    }
+
  end:
     ret = 0;
 
@@ -1508,7 +1999,8 @@ int doit_biopair(SSL *s_ssl, SSL *c_ssl, long count,
 
 int doit(SSL *s_ssl, SSL *c_ssl, long count)
 {
-    MS_STATIC char cbuf[1024 * 8], sbuf[1024 * 8];
+    char *cbuf = NULL, *sbuf = NULL;
+    long bufsiz;
     long cw_num = count, cr_num = count;
     long sw_num = count, sr_num = count;
     int ret = 1;
@@ -1521,9 +2013,17 @@ int doit(SSL *s_ssl, SSL *c_ssl, long count)
     int done = 0;
     int c_write, s_write;
     int do_server = 0, do_client = 0;
+    int max_frag = 5 * 1024;
 
-    memset(cbuf, 0, sizeof(cbuf));
-    memset(sbuf, 0, sizeof(sbuf));
+    bufsiz = count > 40 * 1024 ? 40 * 1024 : count;
+
+    if ((cbuf = OPENSSL_malloc(bufsiz)) == NULL)
+        goto err;
+    if ((sbuf = OPENSSL_malloc(bufsiz)) == NULL)
+        goto err;
+
+    memset(cbuf, 0, bufsiz);
+    memset(sbuf, 0, bufsiz);
 
     c_to_s = BIO_new(BIO_s_mem());
     s_to_c = BIO_new(BIO_s_mem());
@@ -1541,10 +2041,12 @@ int doit(SSL *s_ssl, SSL *c_ssl, long count)
 
     SSL_set_connect_state(c_ssl);
     SSL_set_bio(c_ssl, s_to_c, c_to_s);
+    SSL_set_max_send_fragment(c_ssl, max_frag);
     BIO_set_ssl(c_bio, c_ssl, BIO_NOCLOSE);
 
     SSL_set_accept_state(s_ssl);
     SSL_set_bio(s_ssl, c_to_s, s_to_c);
+    SSL_set_max_send_fragment(s_ssl, max_frag);
     BIO_set_ssl(s_bio, s_ssl, BIO_NOCLOSE);
 
     c_r = 0;
@@ -1595,8 +2097,7 @@ int doit(SSL *s_ssl, SSL *c_ssl, long count)
         }
         if (do_client && !(done & C_DONE)) {
             if (c_write) {
-                j = (cw_num > (long)sizeof(cbuf)) ?
-                    (int)sizeof(cbuf) : (int)cw_num;
+                j = (cw_num > bufsiz) ? (int)bufsiz : (int)cw_num;
                 i = BIO_write(c_bio, cbuf, j);
                 if (i < 0) {
                     c_r = 0;
@@ -1621,9 +2122,11 @@ int doit(SSL *s_ssl, SSL *c_ssl, long count)
                     s_r = 1;
                     c_write = 0;
                     cw_num -= i;
+                    if (max_frag > 1029)
+                        SSL_set_max_send_fragment(c_ssl, max_frag -= 5);
                 }
             } else {
-                i = BIO_read(c_bio, cbuf, sizeof(cbuf));
+                i = BIO_read(c_bio, cbuf, bufsiz);
                 if (i < 0) {
                     c_r = 0;
                     c_w = 0;
@@ -1659,7 +2162,7 @@ int doit(SSL *s_ssl, SSL *c_ssl, long count)
 
         if (do_server && !(done & S_DONE)) {
             if (!s_write) {
-                i = BIO_read(s_bio, sbuf, sizeof(cbuf));
+                i = BIO_read(s_bio, sbuf, bufsiz);
                 if (i < 0) {
                     s_r = 0;
                     s_w = 0;
@@ -1693,8 +2196,7 @@ int doit(SSL *s_ssl, SSL *c_ssl, long count)
                     }
                 }
             } else {
-                j = (sw_num > (long)sizeof(sbuf)) ?
-                    (int)sizeof(sbuf) : (int)sw_num;
+                j = (sw_num > bufsiz) ? (int)bufsiz : (int)sw_num;
                 i = BIO_write(s_bio, sbuf, j);
                 if (i < 0) {
                     s_r = 0;
@@ -1722,6 +2224,8 @@ int doit(SSL *s_ssl, SSL *c_ssl, long count)
                     c_r = 1;
                     if (sw_num <= 0)
                         done |= S_DONE;
+                    if (max_frag > 1029)
+                        SSL_set_max_send_fragment(s_ssl, max_frag -= 5);
                 }
             }
         }
@@ -1732,6 +2236,14 @@ int doit(SSL *s_ssl, SSL *c_ssl, long count)
 
     if (verbose)
         print_details(c_ssl, "DONE: ");
+    if (verify_serverinfo() < 0) {
+        ret = 1;
+        goto err;
+    }
+    if (custom_ext_error) {
+        ret = 1;
+        goto err;
+    }
     ret = 0;
  err:
     /*
@@ -1759,6 +2271,12 @@ int doit(SSL *s_ssl, SSL *c_ssl, long count)
         BIO_free_all(c_bio);
     if (s_bio != NULL)
         BIO_free_all(s_bio);
+
+    if (cbuf)
+        OPENSSL_free(cbuf);
+    if (sbuf)
+        OPENSSL_free(sbuf);
+
     return (ret);
 }
 
index 05c7f20..746b4e6 100644 (file)
@@ -77,12 +77,14 @@ static const SSL_METHOD *tls1_get_client_method(int ver)
 
 IMPLEMENT_tls_meth_func(TLS1_2_VERSION, TLSv1_2_client_method,
                         ssl_undefined_function,
-                        ssl3_connect, tls1_get_client_method)
+                        ssl3_connect,
+                        tls1_get_client_method, TLSv1_2_enc_data)
 
     IMPLEMENT_tls_meth_func(TLS1_1_VERSION, TLSv1_1_client_method,
                         ssl_undefined_function,
-                        ssl3_connect, tls1_get_client_method)
+                        ssl3_connect,
+                        tls1_get_client_method, TLSv1_1_enc_data)
 
     IMPLEMENT_tls_meth_func(TLS1_VERSION, TLSv1_client_method,
                         ssl_undefined_function,
-                        ssl3_connect, tls1_get_client_method)
+                        ssl3_connect, tls1_get_client_method, TLSv1_enc_data)
index 8f45294..e2a8f86 100644 (file)
@@ -160,7 +160,7 @@ static int tls1_P_hash(const EVP_MD *md, const unsigned char *sec,
 {
     int chunk;
     size_t j;
-    EVP_MD_CTX ctx, ctx_tmp;
+    EVP_MD_CTX ctx, ctx_tmp, ctx_init;
     EVP_PKEY *mac_key;
     unsigned char A1[EVP_MAX_MD_SIZE];
     size_t A1_len;
@@ -171,14 +171,14 @@ static int tls1_P_hash(const EVP_MD *md, const unsigned char *sec,
 
     EVP_MD_CTX_init(&ctx);
     EVP_MD_CTX_init(&ctx_tmp);
-    EVP_MD_CTX_set_flags(&ctx, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW);
-    EVP_MD_CTX_set_flags(&ctx_tmp, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW);
+    EVP_MD_CTX_init(&ctx_init);
+    EVP_MD_CTX_set_flags(&ctx_init, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW);
     mac_key = EVP_PKEY_new_mac_key(EVP_PKEY_HMAC, NULL, sec, sec_len);
     if (!mac_key)
         goto err;
-    if (!EVP_DigestSignInit(&ctx, NULL, md, NULL, mac_key))
+    if (!EVP_DigestSignInit(&ctx_init, NULL, md, NULL, mac_key))
         goto err;
-    if (!EVP_DigestSignInit(&ctx_tmp, NULL, md, NULL, mac_key))
+    if (!EVP_MD_CTX_copy_ex(&ctx, &ctx_init))
         goto err;
     if (seed1 && !EVP_DigestSignUpdate(&ctx, seed1, seed1_len))
         goto err;
@@ -195,13 +195,11 @@ static int tls1_P_hash(const EVP_MD *md, const unsigned char *sec,
 
     for (;;) {
         /* Reinit mac contexts */
-        if (!EVP_DigestSignInit(&ctx, NULL, md, NULL, mac_key))
-            goto err;
-        if (!EVP_DigestSignInit(&ctx_tmp, NULL, md, NULL, mac_key))
+        if (!EVP_MD_CTX_copy_ex(&ctx, &ctx_init))
             goto err;
         if (!EVP_DigestSignUpdate(&ctx, A1, A1_len))
             goto err;
-        if (!EVP_DigestSignUpdate(&ctx_tmp, A1, A1_len))
+        if (olen > chunk && !EVP_MD_CTX_copy_ex(&ctx_tmp, &ctx))
             goto err;
         if (seed1 && !EVP_DigestSignUpdate(&ctx, seed1, seed1_len))
             goto err;
@@ -235,6 +233,7 @@ static int tls1_P_hash(const EVP_MD *md, const unsigned char *sec,
     EVP_PKEY_free(mac_key);
     EVP_MD_CTX_cleanup(&ctx);
     EVP_MD_CTX_cleanup(&ctx_tmp);
+    EVP_MD_CTX_cleanup(&ctx_init);
     OPENSSL_cleanse(A1, sizeof(A1));
     return ret;
 }
@@ -405,9 +404,9 @@ int tls1_change_cipher_state(SSL *s, int which)
         }
 #endif
         /*
-         * this is done by dtls1_reset_seq_numbers for DTLS1_VERSION
+         * this is done by dtls1_reset_seq_numbers for DTLS
          */
-        if (s->version != DTLS1_VERSION)
+        if (!SSL_IS_DTLS(s))
             memset(&(s->s3->read_sequence[0]), 0, 8);
         mac_secret = &(s->s3->read_mac_secret[0]);
         mac_secret_size = &(s->s3->read_mac_secret_size);
@@ -443,9 +442,9 @@ int tls1_change_cipher_state(SSL *s, int which)
         }
 #endif
         /*
-         * this is done by dtls1_reset_seq_numbers for DTLS1_VERSION
+         * this is done by dtls1_reset_seq_numbers for DTLS
          */
-        if (s->version != DTLS1_VERSION)
+        if (!SSL_IS_DTLS(s))
             memset(&(s->s3->write_sequence[0]), 0, 8);
         mac_secret = &(s->s3->write_mac_secret[0]);
         mac_secret_size = &(s->s3->write_mac_secret_size);
@@ -574,6 +573,25 @@ int tls1_change_cipher_state(SSL *s, int which)
         SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE, ERR_R_INTERNAL_ERROR);
         goto err2;
     }
+#ifdef OPENSSL_SSL_TRACE_CRYPTO
+    if (s->msg_callback) {
+        int wh = which & SSL3_CC_WRITE ? TLS1_RT_CRYPTO_WRITE : 0;
+        if (*mac_secret_size)
+            s->msg_callback(2, s->version, wh | TLS1_RT_CRYPTO_MAC,
+                            mac_secret, *mac_secret_size,
+                            s, s->msg_callback_arg);
+        if (c->key_len)
+            s->msg_callback(2, s->version, wh | TLS1_RT_CRYPTO_KEY,
+                            key, c->key_len, s, s->msg_callback_arg);
+        if (k) {
+            if (EVP_CIPHER_mode(c) == EVP_CIPH_GCM_MODE)
+                wh |= TLS1_RT_CRYPTO_FIXED_IV;
+            else
+                wh |= TLS1_RT_CRYPTO_IV;
+            s->msg_callback(2, s->version, wh, iv, k, s, s->msg_callback_arg);
+        }
+    }
+#endif
 
 #ifdef TLS_DEBUG
     printf("which = %04X\nkey=", which);
@@ -741,7 +759,7 @@ int tls1_enc(SSL *s, int send)
             int ivlen;
             enc = EVP_CIPHER_CTX_cipher(s->enc_write_ctx);
             /* For TLSv1.1 and later explicit IV */
-            if (s->version >= TLS1_1_VERSION
+            if (SSL_USE_EXPLICIT_IV(s)
                 && EVP_CIPHER_mode(enc) == EVP_CIPH_CBC_MODE)
                 ivlen = EVP_CIPHER_iv_length(enc);
             else
@@ -789,7 +807,7 @@ int tls1_enc(SSL *s, int send)
 
             seq = send ? s->s3->write_sequence : s->s3->read_sequence;
 
-            if (s->version == DTLS1_VERSION || s->version == DTLS1_BAD_VER) {
+            if (SSL_IS_DTLS(s)) {
                 unsigned char dtlsseq[9], *p = dtlsseq;
 
                 s2n(send ? s->d1->w_epoch : s->d1->r_epoch, p);
@@ -1010,7 +1028,7 @@ int tls1_mac(SSL *ssl, unsigned char *md, int send)
         mac_ctx = &hmac;
     }
 
-    if (ssl->version == DTLS1_VERSION || ssl->version == DTLS1_BAD_VER) {
+    if (SSL_IS_DTLS(ssl)) {
         unsigned char dtlsseq[8], *p = dtlsseq;
 
         s2n(send ? ssl->d1->w_epoch : ssl->d1->r_epoch, p);
@@ -1078,7 +1096,7 @@ int tls1_mac(SSL *ssl, unsigned char *md, int send)
     }
 #endif
 
-    if (ssl->version != DTLS1_VERSION && ssl->version != DTLS1_BAD_VER) {
+    if (!SSL_IS_DTLS(ssl)) {
         for (i = 7; i >= 0; i--) {
             ++seq[i];
             if (seq[i] != 0)
@@ -1144,6 +1162,22 @@ int tls1_generate_master_secret(SSL *s, unsigned char *out, unsigned char *p,
                 SSL3_MASTER_SECRET_SIZE);
 #endif
 
+#ifdef OPENSSL_SSL_TRACE_CRYPTO
+    if (s->msg_callback) {
+        s->msg_callback(2, s->version, TLS1_RT_CRYPTO_PREMASTER,
+                        p, len, s, s->msg_callback_arg);
+        s->msg_callback(2, s->version, TLS1_RT_CRYPTO_CLIENT_RANDOM,
+                        s->s3->client_random, SSL3_RANDOM_SIZE,
+                        s, s->msg_callback_arg);
+        s->msg_callback(2, s->version, TLS1_RT_CRYPTO_SERVER_RANDOM,
+                        s->s3->server_random, SSL3_RANDOM_SIZE,
+                        s, s->msg_callback_arg);
+        s->msg_callback(2, s->version, TLS1_RT_CRYPTO_MASTER,
+                        s->session->master_key,
+                        SSL3_MASTER_SECRET_SIZE, s, s->msg_callback_arg);
+    }
+#endif
+
 #ifdef KSSL_DEBUG
     fprintf(stderr, "tls1_generate_master_secret() complete\n");
 #endif                          /* KSSL_DEBUG */
diff --git a/ssl/t1_ext.c b/ssl/t1_ext.c
new file mode 100644 (file)
index 0000000..724ddf7
--- /dev/null
@@ -0,0 +1,298 @@
+/* ssl/t1_ext.c */
+/* ====================================================================
+ * Copyright (c) 2014 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This product includes cryptographic software written by Eric Young
+ * (eay@cryptsoft.com).  This product includes software written by Tim
+ * Hudson (tjh@cryptsoft.com).
+ *
+ */
+
+/* Custom extension utility functions */
+
+#include "ssl_locl.h"
+
+#ifndef OPENSSL_NO_TLSEXT
+
+/* Find a custom extension from the list. */
+static custom_ext_method *custom_ext_find(custom_ext_methods *exts,
+                                          unsigned int ext_type)
+{
+    size_t i;
+    custom_ext_method *meth = exts->meths;
+    for (i = 0; i < exts->meths_count; i++, meth++) {
+        if (ext_type == meth->ext_type)
+            return meth;
+    }
+    return NULL;
+}
+
+/*
+ * Initialise custom extensions flags to indicate neither sent nor received.
+ */
+void custom_ext_init(custom_ext_methods *exts)
+{
+    size_t i;
+    custom_ext_method *meth = exts->meths;
+    for (i = 0; i < exts->meths_count; i++, meth++)
+        meth->ext_flags = 0;
+}
+
+/* Pass received custom extension data to the application for parsing. */
+int custom_ext_parse(SSL *s, int server,
+                     unsigned int ext_type,
+                     const unsigned char *ext_data, size_t ext_size, int *al)
+{
+    custom_ext_methods *exts = server ? &s->cert->srv_ext : &s->cert->cli_ext;
+    custom_ext_method *meth;
+    meth = custom_ext_find(exts, ext_type);
+    /* If not found return success */
+    if (!meth)
+        return 1;
+    if (!server) {
+        /*
+         * If it's ServerHello we can't have any extensions not sent in
+         * ClientHello.
+         */
+        if (!(meth->ext_flags & SSL_EXT_FLAG_SENT)) {
+            *al = TLS1_AD_UNSUPPORTED_EXTENSION;
+            return 0;
+        }
+    }
+    /* If already present it's a duplicate */
+    if (meth->ext_flags & SSL_EXT_FLAG_RECEIVED) {
+        *al = TLS1_AD_DECODE_ERROR;
+        return 0;
+    }
+    meth->ext_flags |= SSL_EXT_FLAG_RECEIVED;
+    /* If no parse function set return success */
+    if (!meth->parse_cb)
+        return 1;
+
+    return meth->parse_cb(s, ext_type, ext_data, ext_size, al,
+                          meth->parse_arg);
+}
+
+/*
+ * Request custom extension data from the application and add to the return
+ * buffer.
+ */
+int custom_ext_add(SSL *s, int server,
+                   unsigned char **pret, unsigned char *limit, int *al)
+{
+    custom_ext_methods *exts = server ? &s->cert->srv_ext : &s->cert->cli_ext;
+    custom_ext_method *meth;
+    unsigned char *ret = *pret;
+    size_t i;
+
+    for (i = 0; i < exts->meths_count; i++) {
+        const unsigned char *out = NULL;
+        size_t outlen = 0;
+        meth = exts->meths + i;
+
+        if (server) {
+            /*
+             * For ServerHello only send extensions present in ClientHello.
+             */
+            if (!(meth->ext_flags & SSL_EXT_FLAG_RECEIVED))
+                continue;
+            /* If callback absent for server skip it */
+            if (!meth->add_cb)
+                continue;
+        }
+        if (meth->add_cb) {
+            int cb_retval = 0;
+            cb_retval = meth->add_cb(s, meth->ext_type,
+                                     &out, &outlen, al, meth->add_arg);
+            if (cb_retval < 0)
+                return 0;       /* error */
+            if (cb_retval == 0)
+                continue;       /* skip this extension */
+        }
+        if (4 > limit - ret || outlen > (size_t)(limit - ret - 4))
+            return 0;
+        s2n(meth->ext_type, ret);
+        s2n(outlen, ret);
+        if (outlen) {
+            memcpy(ret, out, outlen);
+            ret += outlen;
+        }
+        /*
+         * We can't send duplicates: code logic should prevent this.
+         */
+        OPENSSL_assert(!(meth->ext_flags & SSL_EXT_FLAG_SENT));
+        /*
+         * Indicate extension has been sent: this is both a sanity check to
+         * ensure we don't send duplicate extensions and indicates that it is
+         * not an error if the extension is present in ServerHello.
+         */
+        meth->ext_flags |= SSL_EXT_FLAG_SENT;
+        if (meth->free_cb)
+            meth->free_cb(s, meth->ext_type, out, meth->add_arg);
+    }
+    *pret = ret;
+    return 1;
+}
+
+/* Copy table of custom extensions */
+int custom_exts_copy(custom_ext_methods *dst, const custom_ext_methods *src)
+{
+    if (src->meths_count) {
+        dst->meths =
+            BUF_memdup(src->meths,
+                       sizeof(custom_ext_method) * src->meths_count);
+        if (dst->meths == NULL)
+            return 0;
+        dst->meths_count = src->meths_count;
+    }
+    return 1;
+}
+
+void custom_exts_free(custom_ext_methods *exts)
+{
+    if (exts->meths)
+        OPENSSL_free(exts->meths);
+}
+
+/* Set callbacks for a custom extension. */
+static int custom_ext_meth_add(custom_ext_methods *exts,
+                               unsigned int ext_type,
+                               custom_ext_add_cb add_cb,
+                               custom_ext_free_cb free_cb,
+                               void *add_arg,
+                               custom_ext_parse_cb parse_cb, void *parse_arg)
+{
+    custom_ext_method *meth;
+    /*
+     * Check application error: if add_cb is not set free_cb will never be
+     * called.
+     */
+    if (!add_cb && free_cb)
+        return 0;
+    /* Don't add if extension supported internally. */
+    if (SSL_extension_supported(ext_type))
+        return 0;
+    /* Extension type must fit in 16 bits */
+    if (ext_type > 0xffff)
+        return 0;
+    /* Search for duplicate */
+    if (custom_ext_find(exts, ext_type))
+        return 0;
+    exts->meths = OPENSSL_realloc(exts->meths,
+                                  (exts->meths_count +
+                                   1) * sizeof(custom_ext_method));
+
+    if (!exts->meths) {
+        exts->meths_count = 0;
+        return 0;
+    }
+
+    meth = exts->meths + exts->meths_count;
+    memset(meth, 0, sizeof(custom_ext_method));
+    meth->parse_cb = parse_cb;
+    meth->add_cb = add_cb;
+    meth->free_cb = free_cb;
+    meth->ext_type = ext_type;
+    meth->add_arg = add_arg;
+    meth->parse_arg = parse_arg;
+    exts->meths_count++;
+    return 1;
+}
+
+/* Application level functions to add custom extension callbacks */
+int SSL_CTX_add_client_custom_ext(SSL_CTX *ctx, unsigned int ext_type,
+                                  custom_ext_add_cb add_cb,
+                                  custom_ext_free_cb free_cb,
+                                  void *add_arg,
+                                  custom_ext_parse_cb parse_cb,
+                                  void *parse_arg)
+{
+    return custom_ext_meth_add(&ctx->cert->cli_ext, ext_type,
+                               add_cb, free_cb, add_arg, parse_cb, parse_arg);
+}
+
+int SSL_CTX_add_server_custom_ext(SSL_CTX *ctx, unsigned int ext_type,
+                                  custom_ext_add_cb add_cb,
+                                  custom_ext_free_cb free_cb,
+                                  void *add_arg,
+                                  custom_ext_parse_cb parse_cb,
+                                  void *parse_arg)
+{
+    return custom_ext_meth_add(&ctx->cert->srv_ext, ext_type,
+                               add_cb, free_cb, add_arg, parse_cb, parse_arg);
+}
+
+int SSL_extension_supported(unsigned int ext_type)
+{
+    switch (ext_type) {
+        /* Internally supported extensions. */
+    case TLSEXT_TYPE_application_layer_protocol_negotiation:
+    case TLSEXT_TYPE_ec_point_formats:
+    case TLSEXT_TYPE_elliptic_curves:
+    case TLSEXT_TYPE_heartbeat:
+    case TLSEXT_TYPE_next_proto_neg:
+    case TLSEXT_TYPE_padding:
+    case TLSEXT_TYPE_renegotiate:
+    case TLSEXT_TYPE_server_name:
+    case TLSEXT_TYPE_session_ticket:
+    case TLSEXT_TYPE_signature_algorithms:
+    case TLSEXT_TYPE_srp:
+    case TLSEXT_TYPE_status_request:
+    case TLSEXT_TYPE_use_srtp:
+# ifdef TLSEXT_TYPE_opaque_prf_input
+    case TLSEXT_TYPE_opaque_prf_input:
+# endif
+# ifdef TLSEXT_TYPE_encrypt_then_mac
+    case TLSEXT_TYPE_encrypt_then_mac:
+# endif
+        return 1;
+    default:
+        return 0;
+    }
+}
+#endif
index d70b93f..210a5e8 100644 (file)
 #include <openssl/objects.h>
 #include <openssl/evp.h>
 #include <openssl/hmac.h>
+#ifndef OPENSSL_NO_EC
+#ifdef OPENSSL_NO_EC2M
+# include <openssl/ec.h>
+#endif
+#endif
 #include <openssl/ocsp.h>
 #include <openssl/rand.h>
 #include "ssl_locl.h"
@@ -123,6 +128,8 @@ const char tls1_version_str[] = "TLSv1" OPENSSL_VERSION_PTEXT;
 static int tls_decrypt_ticket(SSL *s, const unsigned char *tick, int ticklen,
                               const unsigned char *sess_id, int sesslen,
                               SSL_SESSION **psess);
+static int ssl_check_clienthello_tlsext_early(SSL *s);
+int ssl_check_serverhello_tlsext(SSL *s);
 #endif
 
 SSL3_ENC_METHOD TLSv1_enc_data = {
@@ -138,6 +145,49 @@ SSL3_ENC_METHOD TLSv1_enc_data = {
     TLS_MD_SERVER_FINISH_CONST, TLS_MD_SERVER_FINISH_CONST_SIZE,
     tls1_alert_code,
     tls1_export_keying_material,
+    0,
+    SSL3_HM_HEADER_LENGTH,
+    ssl3_set_handshake_header,
+    ssl3_handshake_write
+};
+
+SSL3_ENC_METHOD TLSv1_1_enc_data = {
+    tls1_enc,
+    tls1_mac,
+    tls1_setup_key_block,
+    tls1_generate_master_secret,
+    tls1_change_cipher_state,
+    tls1_final_finish_mac,
+    TLS1_FINISH_MAC_LENGTH,
+    tls1_cert_verify_mac,
+    TLS_MD_CLIENT_FINISH_CONST, TLS_MD_CLIENT_FINISH_CONST_SIZE,
+    TLS_MD_SERVER_FINISH_CONST, TLS_MD_SERVER_FINISH_CONST_SIZE,
+    tls1_alert_code,
+    tls1_export_keying_material,
+    SSL_ENC_FLAG_EXPLICIT_IV,
+    SSL3_HM_HEADER_LENGTH,
+    ssl3_set_handshake_header,
+    ssl3_handshake_write
+};
+
+SSL3_ENC_METHOD TLSv1_2_enc_data = {
+    tls1_enc,
+    tls1_mac,
+    tls1_setup_key_block,
+    tls1_generate_master_secret,
+    tls1_change_cipher_state,
+    tls1_final_finish_mac,
+    TLS1_FINISH_MAC_LENGTH,
+    tls1_cert_verify_mac,
+    TLS_MD_CLIENT_FINISH_CONST, TLS_MD_CLIENT_FINISH_CONST_SIZE,
+    TLS_MD_SERVER_FINISH_CONST, TLS_MD_SERVER_FINISH_CONST_SIZE,
+    tls1_alert_code,
+    tls1_export_keying_material,
+    SSL_ENC_FLAG_EXPLICIT_IV | SSL_ENC_FLAG_SIGALGS | SSL_ENC_FLAG_SHA256_PRF
+        | SSL_ENC_FLAG_TLS1_2_CIPHERS,
+    SSL3_HM_HEADER_LENGTH,
+    ssl3_set_handshake_header,
+    ssl3_handshake_write
 };
 
 long tls1_default_timeout(void)
@@ -200,52 +250,133 @@ static int nid_list[] = {
     NID_secp256k1,              /* secp256k1 (22) */
     NID_X9_62_prime256v1,       /* secp256r1 (23) */
     NID_secp384r1,              /* secp384r1 (24) */
-    NID_secp521r1               /* secp521r1 (25) */
+    NID_secp521r1,              /* secp521r1 (25) */
+    NID_brainpoolP256r1,        /* brainpoolP256r1 (26) */
+    NID_brainpoolP384r1,        /* brainpoolP384r1 (27) */
+    NID_brainpoolP512r1         /* brainpool512r1 (28) */
 };
 
-static int pref_list[] = {
-# ifndef OPENSSL_NO_EC2M
-    NID_sect571r1,              /* sect571r1 (14) */
-    NID_sect571k1,              /* sect571k1 (13) */
-# endif
-    NID_secp521r1,              /* secp521r1 (25) */
-# ifndef OPENSSL_NO_EC2M
-    NID_sect409k1,              /* sect409k1 (11) */
-    NID_sect409r1,              /* sect409r1 (12) */
-# endif
-    NID_secp384r1,              /* secp384r1 (24) */
-# ifndef OPENSSL_NO_EC2M
-    NID_sect283k1,              /* sect283k1 (9) */
-    NID_sect283r1,              /* sect283r1 (10) */
-# endif
-    NID_secp256k1,              /* secp256k1 (22) */
-    NID_X9_62_prime256v1,       /* secp256r1 (23) */
+static const unsigned char ecformats_default[] = {
+    TLSEXT_ECPOINTFORMAT_uncompressed,
+    TLSEXT_ECPOINTFORMAT_ansiX962_compressed_prime,
+    TLSEXT_ECPOINTFORMAT_ansiX962_compressed_char2
+};
+
+/* The client's default curves / the server's 'auto' curves. */
+static const unsigned char eccurves_auto[] = {
+    /* Prefer P-256 which has the fastest and most secure implementations. */
+    0, 23,                      /* secp256r1 (23) */
+    /* Other >= 256-bit prime curves. */
+    0, 25,                      /* secp521r1 (25) */
+    0, 28,                      /* brainpool512r1 (28) */
+    0, 27,                      /* brainpoolP384r1 (27) */
+    0, 24,                      /* secp384r1 (24) */
+    0, 26,                      /* brainpoolP256r1 (26) */
+    0, 22,                      /* secp256k1 (22) */
 # ifndef OPENSSL_NO_EC2M
-    NID_sect239k1,              /* sect239k1 (8) */
-    NID_sect233k1,              /* sect233k1 (6) */
-    NID_sect233r1,              /* sect233r1 (7) */
+    /* >= 256-bit binary curves. */
+    0, 14,                      /* sect571r1 (14) */
+    0, 13,                      /* sect571k1 (13) */
+    0, 11,                      /* sect409k1 (11) */
+    0, 12,                      /* sect409r1 (12) */
+    0, 9,                       /* sect283k1 (9) */
+    0, 10,                      /* sect283r1 (10) */
 # endif
-    NID_secp224k1,              /* secp224k1 (20) */
-    NID_secp224r1,              /* secp224r1 (21) */
+};
+
+static const unsigned char eccurves_all[] = {
+    /* Prefer P-256 which has the fastest and most secure implementations. */
+    0, 23,                      /* secp256r1 (23) */
+    /* Other >= 256-bit prime curves. */
+    0, 25,                      /* secp521r1 (25) */
+    0, 28,                      /* brainpool512r1 (28) */
+    0, 27,                      /* brainpoolP384r1 (27) */
+    0, 24,                      /* secp384r1 (24) */
+    0, 26,                      /* brainpoolP256r1 (26) */
+    0, 22,                      /* secp256k1 (22) */
 # ifndef OPENSSL_NO_EC2M
-    NID_sect193r1,              /* sect193r1 (4) */
-    NID_sect193r2,              /* sect193r2 (5) */
+    /* >= 256-bit binary curves. */
+    0, 14,                      /* sect571r1 (14) */
+    0, 13,                      /* sect571k1 (13) */
+    0, 11,                      /* sect409k1 (11) */
+    0, 12,                      /* sect409r1 (12) */
+    0, 9,                       /* sect283k1 (9) */
+    0, 10,                      /* sect283r1 (10) */
 # endif
-    NID_secp192k1,              /* secp192k1 (18) */
-    NID_X9_62_prime192v1,       /* secp192r1 (19) */
+    /*
+     * Remaining curves disabled by default but still permitted if set
+     * via an explicit callback or parameters.
+     */
+    0, 20,                      /* secp224k1 (20) */
+    0, 21,                      /* secp224r1 (21) */
+    0, 18,                      /* secp192k1 (18) */
+    0, 19,                      /* secp192r1 (19) */
+    0, 15,                      /* secp160k1 (15) */
+    0, 16,                      /* secp160r1 (16) */
+    0, 17,                      /* secp160r2 (17) */
 # ifndef OPENSSL_NO_EC2M
-    NID_sect163k1,              /* sect163k1 (1) */
-    NID_sect163r1,              /* sect163r1 (2) */
-    NID_sect163r2,              /* sect163r2 (3) */
+    0, 8,                       /* sect239k1 (8) */
+    0, 6,                       /* sect233k1 (6) */
+    0, 7,                       /* sect233r1 (7) */
+    0, 4,                       /* sect193r1 (4) */
+    0, 5,                       /* sect193r2 (5) */
+    0, 1,                       /* sect163k1 (1) */
+    0, 2,                       /* sect163r1 (2) */
+    0, 3,                       /* sect163r2 (3) */
 # endif
-    NID_secp160k1,              /* secp160k1 (15) */
-    NID_secp160r1,              /* secp160r1 (16) */
-    NID_secp160r2,              /* secp160r2 (17) */
 };
 
+static const unsigned char suiteb_curves[] = {
+    0, TLSEXT_curve_P_256,
+    0, TLSEXT_curve_P_384
+};
+
+# ifdef OPENSSL_FIPS
+/* Brainpool not allowed in FIPS mode */
+static const unsigned char fips_curves_default[] = {
+#  ifndef OPENSSL_NO_EC2M
+    0, 14,                      /* sect571r1 (14) */
+    0, 13,                      /* sect571k1 (13) */
+#  endif
+    0, 25,                      /* secp521r1 (25) */
+#  ifndef OPENSSL_NO_EC2M
+    0, 11,                      /* sect409k1 (11) */
+    0, 12,                      /* sect409r1 (12) */
+#  endif
+    0, 24,                      /* secp384r1 (24) */
+#  ifndef OPENSSL_NO_EC2M
+    0, 9,                       /* sect283k1 (9) */
+    0, 10,                      /* sect283r1 (10) */
+#  endif
+    0, 22,                      /* secp256k1 (22) */
+    0, 23,                      /* secp256r1 (23) */
+#  ifndef OPENSSL_NO_EC2M
+    0, 8,                       /* sect239k1 (8) */
+    0, 6,                       /* sect233k1 (6) */
+    0, 7,                       /* sect233r1 (7) */
+#  endif
+    0, 20,                      /* secp224k1 (20) */
+    0, 21,                      /* secp224r1 (21) */
+#  ifndef OPENSSL_NO_EC2M
+    0, 4,                       /* sect193r1 (4) */
+    0, 5,                       /* sect193r2 (5) */
+#  endif
+    0, 18,                      /* secp192k1 (18) */
+    0, 19,                      /* secp192r1 (19) */
+#  ifndef OPENSSL_NO_EC2M
+    0, 1,                       /* sect163k1 (1) */
+    0, 2,                       /* sect163r1 (2) */
+    0, 3,                       /* sect163r2 (3) */
+#  endif
+    0, 15,                      /* secp160k1 (15) */
+    0, 16,                      /* secp160r1 (16) */
+    0, 17,                      /* secp160r2 (17) */
+};
+# endif
+
 int tls1_ec_curve_id2nid(int curve_id)
 {
-    /* ECC curves from RFC 4492 */
+    /* ECC curves from RFC 4492 and RFC 7027 */
     if ((curve_id < 1) || ((unsigned int)curve_id >
                            sizeof(nid_list) / sizeof(nid_list[0])))
         return 0;
@@ -254,7 +385,7 @@ int tls1_ec_curve_id2nid(int curve_id)
 
 int tls1_ec_nid2curve_id(int nid)
 {
-    /* ECC curves from RFC 4492 */
+    /* ECC curves from RFC 4492 and RFC 7027 */
     switch (nid) {
     case NID_sect163k1:        /* sect163k1 (1) */
         return 1;
@@ -306,10 +437,548 @@ int tls1_ec_nid2curve_id(int nid)
         return 24;
     case NID_secp521r1:        /* secp521r1 (25) */
         return 25;
+    case NID_brainpoolP256r1:  /* brainpoolP256r1 (26) */
+        return 26;
+    case NID_brainpoolP384r1:  /* brainpoolP384r1 (27) */
+        return 27;
+    case NID_brainpoolP512r1:  /* brainpool512r1 (28) */
+        return 28;
     default:
         return 0;
     }
 }
+
+/*
+ * Get curves list, if "sess" is set return client curves otherwise
+ * preferred list.
+ * Sets |num_curves| to the number of curves in the list, i.e.,
+ * the length of |pcurves| is 2 * num_curves.
+ * Returns 1 on success and 0 if the client curves list has invalid format.
+ * The latter indicates an internal error: we should not be accepting such
+ * lists in the first place.
+ * TODO(emilia): we should really be storing the curves list in explicitly
+ * parsed form instead. (However, this would affect binary compatibility
+ * so cannot happen in the 1.0.x series.)
+ */
+static int tls1_get_curvelist(SSL *s, int sess,
+                              const unsigned char **pcurves,
+                              size_t *num_curves)
+{
+    size_t pcurveslen = 0;
+    if (sess) {
+        *pcurves = s->session->tlsext_ellipticcurvelist;
+        pcurveslen = s->session->tlsext_ellipticcurvelist_length;
+    } else {
+        /* For Suite B mode only include P-256, P-384 */
+        switch (tls1_suiteb(s)) {
+        case SSL_CERT_FLAG_SUITEB_128_LOS:
+            *pcurves = suiteb_curves;
+            pcurveslen = sizeof(suiteb_curves);
+            break;
+
+        case SSL_CERT_FLAG_SUITEB_128_LOS_ONLY:
+            *pcurves = suiteb_curves;
+            pcurveslen = 2;
+            break;
+
+        case SSL_CERT_FLAG_SUITEB_192_LOS:
+            *pcurves = suiteb_curves + 2;
+            pcurveslen = 2;
+            break;
+        default:
+            *pcurves = s->tlsext_ellipticcurvelist;
+            pcurveslen = s->tlsext_ellipticcurvelist_length;
+        }
+        if (!*pcurves) {
+# ifdef OPENSSL_FIPS
+            if (FIPS_mode()) {
+                *pcurves = fips_curves_default;
+                pcurveslen = sizeof(fips_curves_default);
+            } else
+# endif
+            {
+                if (!s->server || (s->cert && s->cert->ecdh_tmp_auto)) {
+                    *pcurves = eccurves_auto;
+                    pcurveslen = sizeof(eccurves_auto);
+                } else {
+                    *pcurves = eccurves_all;
+                    pcurveslen = sizeof(eccurves_all);
+                }
+            }
+        }
+    }
+    /* We do not allow odd length arrays to enter the system. */
+    if (pcurveslen & 1) {
+        SSLerr(SSL_F_TLS1_GET_CURVELIST, ERR_R_INTERNAL_ERROR);
+        *num_curves = 0;
+        return 0;
+    } else {
+        *num_curves = pcurveslen / 2;
+        return 1;
+    }
+}
+
+/* Check a curve is one of our preferences */
+int tls1_check_curve(SSL *s, const unsigned char *p, size_t len)
+{
+    const unsigned char *curves;
+    size_t num_curves, i;
+    unsigned int suiteb_flags = tls1_suiteb(s);
+    if (len != 3 || p[0] != NAMED_CURVE_TYPE)
+        return 0;
+    /* Check curve matches Suite B preferences */
+    if (suiteb_flags) {
+        unsigned long cid = s->s3->tmp.new_cipher->id;
+        if (p[1])
+            return 0;
+        if (cid == TLS1_CK_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256) {
+            if (p[2] != TLSEXT_curve_P_256)
+                return 0;
+        } else if (cid == TLS1_CK_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384) {
+            if (p[2] != TLSEXT_curve_P_384)
+                return 0;
+        } else                  /* Should never happen */
+            return 0;
+    }
+    if (!tls1_get_curvelist(s, 0, &curves, &num_curves))
+        return 0;
+    for (i = 0; i < num_curves; i++, curves += 2) {
+        if (p[1] == curves[0] && p[2] == curves[1])
+            return 1;
+    }
+    return 0;
+}
+
+/*-
+ * Return |nmatch|th shared curve or NID_undef if there is no match.
+ * For nmatch == -1, return number of  matches
+ * For nmatch == -2, return the NID of the curve to use for
+ * an EC tmp key, or NID_undef if there is no match.
+ */
+int tls1_shared_curve(SSL *s, int nmatch)
+{
+    const unsigned char *pref, *supp;
+    size_t num_pref, num_supp, i, j;
+    int k;
+    /* Can't do anything on client side */
+    if (s->server == 0)
+        return -1;
+    if (nmatch == -2) {
+        if (tls1_suiteb(s)) {
+            /*
+             * For Suite B ciphersuite determines curve: we already know
+             * these are acceptable due to previous checks.
+             */
+            unsigned long cid = s->s3->tmp.new_cipher->id;
+            if (cid == TLS1_CK_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256)
+                return NID_X9_62_prime256v1; /* P-256 */
+            if (cid == TLS1_CK_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384)
+                return NID_secp384r1; /* P-384 */
+            /* Should never happen */
+            return NID_undef;
+        }
+        /* If not Suite B just return first preference shared curve */
+        nmatch = 0;
+    }
+    /*
+     * Avoid truncation. tls1_get_curvelist takes an int
+     * but s->options is a long...
+     */
+    if (!tls1_get_curvelist
+        (s, (s->options & SSL_OP_CIPHER_SERVER_PREFERENCE) != 0, &supp,
+         &num_supp))
+        /* In practice, NID_undef == 0 but let's be precise. */
+        return nmatch == -1 ? 0 : NID_undef;
+    if (!tls1_get_curvelist
+        (s, !(s->options & SSL_OP_CIPHER_SERVER_PREFERENCE), &pref,
+         &num_pref))
+        return nmatch == -1 ? 0 : NID_undef;
+
+    /*
+     * If the client didn't send the elliptic_curves extension all of them
+     * are allowed.
+     */
+    if (num_supp == 0 && (s->options & SSL_OP_CIPHER_SERVER_PREFERENCE) != 0) {
+        supp = eccurves_all;
+        num_supp = sizeof(eccurves_all) / 2;
+    } else if (num_pref == 0 &&
+        (s->options & SSL_OP_CIPHER_SERVER_PREFERENCE) == 0) {
+        pref = eccurves_all;
+        num_pref = sizeof(eccurves_all) / 2;
+    }
+
+    k = 0;
+    for (i = 0; i < num_pref; i++, pref += 2) {
+        const unsigned char *tsupp = supp;
+        for (j = 0; j < num_supp; j++, tsupp += 2) {
+            if (pref[0] == tsupp[0] && pref[1] == tsupp[1]) {
+                if (nmatch == k) {
+                    int id = (pref[0] << 8) | pref[1];
+                    return tls1_ec_curve_id2nid(id);
+                }
+                k++;
+            }
+        }
+    }
+    if (nmatch == -1)
+        return k;
+    /* Out of range (nmatch > k). */
+    return NID_undef;
+}
+
+int tls1_set_curves(unsigned char **pext, size_t *pextlen,
+                    int *curves, size_t ncurves)
+{
+    unsigned char *clist, *p;
+    size_t i;
+    /*
+     * Bitmap of curves included to detect duplicates: only works while curve
+     * ids < 32
+     */
+    unsigned long dup_list = 0;
+# ifdef OPENSSL_NO_EC2M
+    EC_GROUP *curve;
+# endif
+
+    clist = OPENSSL_malloc(ncurves * 2);
+    if (!clist)
+        return 0;
+    for (i = 0, p = clist; i < ncurves; i++) {
+        unsigned long idmask;
+        int id;
+        id = tls1_ec_nid2curve_id(curves[i]);
+# ifdef OPENSSL_FIPS
+        /* NB: 25 is last curve ID supported by FIPS module */
+        if (FIPS_mode() && id > 25) {
+            OPENSSL_free(clist);
+            return 0;
+        }
+# endif
+# ifdef OPENSSL_NO_EC2M
+        curve = EC_GROUP_new_by_curve_name(curves[i]);
+        if (!curve || EC_METHOD_get_field_type(EC_GROUP_method_of(curve))
+            == NID_X9_62_characteristic_two_field) {
+            if (curve)
+                EC_GROUP_free(curve);
+            OPENSSL_free(clist);
+            return 0;
+        } else
+            EC_GROUP_free(curve);
+# endif
+        idmask = 1L << id;
+        if (!id || (dup_list & idmask)) {
+            OPENSSL_free(clist);
+            return 0;
+        }
+        dup_list |= idmask;
+        s2n(id, p);
+    }
+    if (*pext)
+        OPENSSL_free(*pext);
+    *pext = clist;
+    *pextlen = ncurves * 2;
+    return 1;
+}
+
+# define MAX_CURVELIST   28
+
+typedef struct {
+    size_t nidcnt;
+    int nid_arr[MAX_CURVELIST];
+} nid_cb_st;
+
+static int nid_cb(const char *elem, int len, void *arg)
+{
+    nid_cb_st *narg = arg;
+    size_t i;
+    int nid;
+    char etmp[20];
+    if (elem == NULL)
+        return 0;
+    if (narg->nidcnt == MAX_CURVELIST)
+        return 0;
+    if (len > (int)(sizeof(etmp) - 1))
+        return 0;
+    memcpy(etmp, elem, len);
+    etmp[len] = 0;
+    nid = EC_curve_nist2nid(etmp);
+    if (nid == NID_undef)
+        nid = OBJ_sn2nid(etmp);
+    if (nid == NID_undef)
+        nid = OBJ_ln2nid(etmp);
+    if (nid == NID_undef)
+        return 0;
+    for (i = 0; i < narg->nidcnt; i++)
+        if (narg->nid_arr[i] == nid)
+            return 0;
+    narg->nid_arr[narg->nidcnt++] = nid;
+    return 1;
+}
+
+/* Set curves based on a colon separate list */
+int tls1_set_curves_list(unsigned char **pext, size_t *pextlen,
+                         const char *str)
+{
+    nid_cb_st ncb;
+    ncb.nidcnt = 0;
+    if (!CONF_parse_list(str, ':', 1, nid_cb, &ncb))
+        return 0;
+    if (pext == NULL)
+        return 1;
+    return tls1_set_curves(pext, pextlen, ncb.nid_arr, ncb.nidcnt);
+}
+
+/* For an EC key set TLS id and required compression based on parameters */
+static int tls1_set_ec_id(unsigned char *curve_id, unsigned char *comp_id,
+                          EC_KEY *ec)
+{
+    int is_prime, id;
+    const EC_GROUP *grp;
+    const EC_METHOD *meth;
+    if (!ec)
+        return 0;
+    /* Determine if it is a prime field */
+    grp = EC_KEY_get0_group(ec);
+    if (!grp)
+        return 0;
+    meth = EC_GROUP_method_of(grp);
+    if (!meth)
+        return 0;
+    if (EC_METHOD_get_field_type(meth) == NID_X9_62_prime_field)
+        is_prime = 1;
+    else
+        is_prime = 0;
+    /* Determine curve ID */
+    id = EC_GROUP_get_curve_name(grp);
+    id = tls1_ec_nid2curve_id(id);
+    /* If we have an ID set it, otherwise set arbitrary explicit curve */
+    if (id) {
+        curve_id[0] = 0;
+        curve_id[1] = (unsigned char)id;
+    } else {
+        curve_id[0] = 0xff;
+        if (is_prime)
+            curve_id[1] = 0x01;
+        else
+            curve_id[1] = 0x02;
+    }
+    if (comp_id) {
+        if (EC_KEY_get0_public_key(ec) == NULL)
+            return 0;
+        if (EC_KEY_get_conv_form(ec) == POINT_CONVERSION_COMPRESSED) {
+            if (is_prime)
+                *comp_id = TLSEXT_ECPOINTFORMAT_ansiX962_compressed_prime;
+            else
+                *comp_id = TLSEXT_ECPOINTFORMAT_ansiX962_compressed_char2;
+        } else
+            *comp_id = TLSEXT_ECPOINTFORMAT_uncompressed;
+    }
+    return 1;
+}
+
+/* Check an EC key is compatible with extensions */
+static int tls1_check_ec_key(SSL *s,
+                             unsigned char *curve_id, unsigned char *comp_id)
+{
+    const unsigned char *pformats, *pcurves;
+    size_t num_formats, num_curves, i;
+    int j;
+    /*
+     * If point formats extension present check it, otherwise everything is
+     * supported (see RFC4492).
+     */
+    if (comp_id && s->session->tlsext_ecpointformatlist) {
+        pformats = s->session->tlsext_ecpointformatlist;
+        num_formats = s->session->tlsext_ecpointformatlist_length;
+        for (i = 0; i < num_formats; i++, pformats++) {
+            if (*comp_id == *pformats)
+                break;
+        }
+        if (i == num_formats)
+            return 0;
+    }
+    if (!curve_id)
+        return 1;
+    /* Check curve is consistent with client and server preferences */
+    for (j = 0; j <= 1; j++) {
+        if (!tls1_get_curvelist(s, j, &pcurves, &num_curves))
+            return 0;
+        if (j == 1 && num_curves == 0) {
+            /*
+             * If we've not received any curves then skip this check.
+             * RFC 4492 does not require the supported elliptic curves extension
+             * so if it is not sent we can just choose any curve.
+             * It is invalid to send an empty list in the elliptic curves
+             * extension, so num_curves == 0 always means no extension.
+             */
+            break;
+        }
+        for (i = 0; i < num_curves; i++, pcurves += 2) {
+            if (pcurves[0] == curve_id[0] && pcurves[1] == curve_id[1])
+                break;
+        }
+        if (i == num_curves)
+            return 0;
+        /* For clients can only check sent curve list */
+        if (!s->server)
+            return 1;
+    }
+    return 1;
+}
+
+static void tls1_get_formatlist(SSL *s, const unsigned char **pformats,
+                                size_t *num_formats)
+{
+    /*
+     * If we have a custom point format list use it otherwise use default
+     */
+    if (s->tlsext_ecpointformatlist) {
+        *pformats = s->tlsext_ecpointformatlist;
+        *num_formats = s->tlsext_ecpointformatlist_length;
+    } else {
+        *pformats = ecformats_default;
+        /* For Suite B we don't support char2 fields */
+        if (tls1_suiteb(s))
+            *num_formats = sizeof(ecformats_default) - 1;
+        else
+            *num_formats = sizeof(ecformats_default);
+    }
+}
+
+/*
+ * Check cert parameters compatible with extensions: currently just checks EC
+ * certificates have compatible curves and compression.
+ */
+static int tls1_check_cert_param(SSL *s, X509 *x, int set_ee_md)
+{
+    unsigned char comp_id, curve_id[2];
+    EVP_PKEY *pkey;
+    int rv;
+    pkey = X509_get_pubkey(x);
+    if (!pkey)
+        return 0;
+    /* If not EC nothing to do */
+    if (pkey->type != EVP_PKEY_EC) {
+        EVP_PKEY_free(pkey);
+        return 1;
+    }
+    rv = tls1_set_ec_id(curve_id, &comp_id, pkey->pkey.ec);
+    EVP_PKEY_free(pkey);
+    if (!rv)
+        return 0;
+    /*
+     * Can't check curve_id for client certs as we don't have a supported
+     * curves extension.
+     */
+    rv = tls1_check_ec_key(s, s->server ? curve_id : NULL, &comp_id);
+    if (!rv)
+        return 0;
+    /*
+     * Special case for suite B. We *MUST* sign using SHA256+P-256 or
+     * SHA384+P-384, adjust digest if necessary.
+     */
+    if (set_ee_md && tls1_suiteb(s)) {
+        int check_md;
+        size_t i;
+        CERT *c = s->cert;
+        if (curve_id[0])
+            return 0;
+        /* Check to see we have necessary signing algorithm */
+        if (curve_id[1] == TLSEXT_curve_P_256)
+            check_md = NID_ecdsa_with_SHA256;
+        else if (curve_id[1] == TLSEXT_curve_P_384)
+            check_md = NID_ecdsa_with_SHA384;
+        else
+            return 0;           /* Should never happen */
+        for (i = 0; i < c->shared_sigalgslen; i++)
+            if (check_md == c->shared_sigalgs[i].signandhash_nid)
+                break;
+        if (i == c->shared_sigalgslen)
+            return 0;
+        if (set_ee_md == 2) {
+            if (check_md == NID_ecdsa_with_SHA256)
+                c->pkeys[SSL_PKEY_ECC].digest = EVP_sha256();
+            else
+                c->pkeys[SSL_PKEY_ECC].digest = EVP_sha384();
+        }
+    }
+    return rv;
+}
+
+# ifndef OPENSSL_NO_ECDH
+/* Check EC temporary key is compatible with client extensions */
+int tls1_check_ec_tmp_key(SSL *s, unsigned long cid)
+{
+    unsigned char curve_id[2];
+    EC_KEY *ec = s->cert->ecdh_tmp;
+#  ifdef OPENSSL_SSL_DEBUG_BROKEN_PROTOCOL
+    /* Allow any curve: not just those peer supports */
+    if (s->cert->cert_flags & SSL_CERT_FLAG_BROKEN_PROTOCOL)
+        return 1;
+#  endif
+    /*
+     * If Suite B, AES128 MUST use P-256 and AES256 MUST use P-384, no other
+     * curves permitted.
+     */
+    if (tls1_suiteb(s)) {
+        /* Curve to check determined by ciphersuite */
+        if (cid == TLS1_CK_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256)
+            curve_id[1] = TLSEXT_curve_P_256;
+        else if (cid == TLS1_CK_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384)
+            curve_id[1] = TLSEXT_curve_P_384;
+        else
+            return 0;
+        curve_id[0] = 0;
+        /* Check this curve is acceptable */
+        if (!tls1_check_ec_key(s, curve_id, NULL))
+            return 0;
+        /* If auto or setting curve from callback assume OK */
+        if (s->cert->ecdh_tmp_auto || s->cert->ecdh_tmp_cb)
+            return 1;
+        /* Otherwise check curve is acceptable */
+        else {
+            unsigned char curve_tmp[2];
+            if (!ec)
+                return 0;
+            if (!tls1_set_ec_id(curve_tmp, NULL, ec))
+                return 0;
+            if (!curve_tmp[0] || curve_tmp[1] == curve_id[1])
+                return 1;
+            return 0;
+        }
+
+    }
+    if (s->cert->ecdh_tmp_auto) {
+        /* Need a shared curve */
+        if (tls1_shared_curve(s, 0))
+            return 1;
+        else
+            return 0;
+    }
+    if (!ec) {
+        if (s->cert->ecdh_tmp_cb)
+            return 1;
+        else
+            return 0;
+    }
+    if (!tls1_set_ec_id(curve_id, NULL, ec))
+        return 0;
+/* Set this to allow use of invalid curves for testing */
+#  if 0
+    return 1;
+#  else
+    return tls1_check_ec_key(s, curve_id, NULL);
+#  endif
+}
+# endif                         /* OPENSSL_NO_ECDH */
+
+#else
+
+static int tls1_check_cert_param(SSL *s, X509 *x, int set_ee_md)
+{
+    return 1;
+}
+
 #endif                          /* OPENSSL_NO_EC */
 
 #ifndef OPENSSL_NO_TLSEXT
@@ -338,39 +1007,250 @@ int tls1_ec_nid2curve_id(int nid)
 #  define tlsext_sigalg_ecdsa(md) md, TLSEXT_signature_ecdsa,
 # endif
 
-# define tlsext_sigalg(md) \
-                tlsext_sigalg_rsa(md) \
-                tlsext_sigalg_dsa(md) \
-                tlsext_sigalg_ecdsa(md)
+# define tlsext_sigalg(md) \
+                tlsext_sigalg_rsa(md) \
+                tlsext_sigalg_dsa(md) \
+                tlsext_sigalg_ecdsa(md)
+
+static unsigned char tls12_sigalgs[] = {
+# ifndef OPENSSL_NO_SHA512
+    tlsext_sigalg(TLSEXT_hash_sha512)
+        tlsext_sigalg(TLSEXT_hash_sha384)
+# endif
+# ifndef OPENSSL_NO_SHA256
+        tlsext_sigalg(TLSEXT_hash_sha256)
+        tlsext_sigalg(TLSEXT_hash_sha224)
+# endif
+# ifndef OPENSSL_NO_SHA
+        tlsext_sigalg(TLSEXT_hash_sha1)
+# endif
+};
+
+# ifndef OPENSSL_NO_ECDSA
+static unsigned char suiteb_sigalgs[] = {
+    tlsext_sigalg_ecdsa(TLSEXT_hash_sha256)
+        tlsext_sigalg_ecdsa(TLSEXT_hash_sha384)
+};
+# endif
+size_t tls12_get_psigalgs(SSL *s, const unsigned char **psigs)
+{
+    /*
+     * If Suite B mode use Suite B sigalgs only, ignore any other
+     * preferences.
+     */
+# ifndef OPENSSL_NO_EC
+    switch (tls1_suiteb(s)) {
+    case SSL_CERT_FLAG_SUITEB_128_LOS:
+        *psigs = suiteb_sigalgs;
+        return sizeof(suiteb_sigalgs);
+
+    case SSL_CERT_FLAG_SUITEB_128_LOS_ONLY:
+        *psigs = suiteb_sigalgs;
+        return 2;
+
+    case SSL_CERT_FLAG_SUITEB_192_LOS:
+        *psigs = suiteb_sigalgs + 2;
+        return 2;
+    }
+# endif
+    /* If server use client authentication sigalgs if not NULL */
+    if (s->server && s->cert->client_sigalgs) {
+        *psigs = s->cert->client_sigalgs;
+        return s->cert->client_sigalgslen;
+    } else if (s->cert->conf_sigalgs) {
+        *psigs = s->cert->conf_sigalgs;
+        return s->cert->conf_sigalgslen;
+    } else {
+        *psigs = tls12_sigalgs;
+        return sizeof(tls12_sigalgs);
+    }
+}
+
+/*
+ * Check signature algorithm is consistent with sent supported signature
+ * algorithms and if so return relevant digest.
+ */
+int tls12_check_peer_sigalg(const EVP_MD **pmd, SSL *s,
+                            const unsigned char *sig, EVP_PKEY *pkey)
+{
+    const unsigned char *sent_sigs;
+    size_t sent_sigslen, i;
+    int sigalg = tls12_get_sigid(pkey);
+    /* Should never happen */
+    if (sigalg == -1)
+        return -1;
+    /* Check key type is consistent with signature */
+    if (sigalg != (int)sig[1]) {
+        SSLerr(SSL_F_TLS12_CHECK_PEER_SIGALG, SSL_R_WRONG_SIGNATURE_TYPE);
+        return 0;
+    }
+# ifndef OPENSSL_NO_EC
+    if (pkey->type == EVP_PKEY_EC) {
+        unsigned char curve_id[2], comp_id;
+        /* Check compression and curve matches extensions */
+        if (!tls1_set_ec_id(curve_id, &comp_id, pkey->pkey.ec))
+            return 0;
+        if (!s->server && !tls1_check_ec_key(s, curve_id, &comp_id)) {
+            SSLerr(SSL_F_TLS12_CHECK_PEER_SIGALG, SSL_R_WRONG_CURVE);
+            return 0;
+        }
+        /* If Suite B only P-384+SHA384 or P-256+SHA-256 allowed */
+        if (tls1_suiteb(s)) {
+            if (curve_id[0])
+                return 0;
+            if (curve_id[1] == TLSEXT_curve_P_256) {
+                if (sig[0] != TLSEXT_hash_sha256) {
+                    SSLerr(SSL_F_TLS12_CHECK_PEER_SIGALG,
+                           SSL_R_ILLEGAL_SUITEB_DIGEST);
+                    return 0;
+                }
+            } else if (curve_id[1] == TLSEXT_curve_P_384) {
+                if (sig[0] != TLSEXT_hash_sha384) {
+                    SSLerr(SSL_F_TLS12_CHECK_PEER_SIGALG,
+                           SSL_R_ILLEGAL_SUITEB_DIGEST);
+                    return 0;
+                }
+            } else
+                return 0;
+        }
+    } else if (tls1_suiteb(s))
+        return 0;
+# endif
+
+    /* Check signature matches a type we sent */
+    sent_sigslen = tls12_get_psigalgs(s, &sent_sigs);
+    for (i = 0; i < sent_sigslen; i += 2, sent_sigs += 2) {
+        if (sig[0] == sent_sigs[0] && sig[1] == sent_sigs[1])
+            break;
+    }
+    /* Allow fallback to SHA1 if not strict mode */
+    if (i == sent_sigslen
+        && (sig[0] != TLSEXT_hash_sha1
+            || s->cert->cert_flags & SSL_CERT_FLAGS_CHECK_TLS_STRICT)) {
+        SSLerr(SSL_F_TLS12_CHECK_PEER_SIGALG, SSL_R_WRONG_SIGNATURE_TYPE);
+        return 0;
+    }
+    *pmd = tls12_get_hash(sig[0]);
+    if (*pmd == NULL) {
+        SSLerr(SSL_F_TLS12_CHECK_PEER_SIGALG, SSL_R_UNKNOWN_DIGEST);
+        return 0;
+    }
+    /*
+     * Store the digest used so applications can retrieve it if they wish.
+     */
+    if (s->session && s->session->sess_cert)
+        s->session->sess_cert->peer_key->digest = *pmd;
+    return 1;
+}
 
-static unsigned char tls12_sigalgs[] = {
-# ifndef OPENSSL_NO_SHA512
-    tlsext_sigalg(TLSEXT_hash_sha512)
-        tlsext_sigalg(TLSEXT_hash_sha384)
+/*
+ * Get a mask of disabled algorithms: an algorithm is disabled if it isn't
+ * supported or doesn't appear in supported signature algorithms. Unlike
+ * ssl_cipher_get_disabled this applies to a specific session and not global
+ * settings.
+ */
+void ssl_set_client_disabled(SSL *s)
+{
+    CERT *c = s->cert;
+    const unsigned char *sigalgs;
+    size_t i, sigalgslen;
+    int have_rsa = 0, have_dsa = 0, have_ecdsa = 0;
+    c->mask_a = 0;
+    c->mask_k = 0;
+    /* Don't allow TLS 1.2 only ciphers if we don't suppport them */
+    if (!SSL_CLIENT_USE_TLS1_2_CIPHERS(s))
+        c->mask_ssl = SSL_TLSV1_2;
+    else
+        c->mask_ssl = 0;
+    /*
+     * Now go through all signature algorithms seeing if we support any for
+     * RSA, DSA, ECDSA. Do this for all versions not just TLS 1.2.
+     */
+    sigalgslen = tls12_get_psigalgs(s, &sigalgs);
+    for (i = 0; i < sigalgslen; i += 2, sigalgs += 2) {
+        switch (sigalgs[1]) {
+# ifndef OPENSSL_NO_RSA
+        case TLSEXT_signature_rsa:
+            have_rsa = 1;
+            break;
 # endif
-# ifndef OPENSSL_NO_SHA256
-        tlsext_sigalg(TLSEXT_hash_sha256)
-        tlsext_sigalg(TLSEXT_hash_sha224)
+# ifndef OPENSSL_NO_DSA
+        case TLSEXT_signature_dsa:
+            have_dsa = 1;
+            break;
 # endif
-# ifndef OPENSSL_NO_SHA
-        tlsext_sigalg(TLSEXT_hash_sha1)
+# ifndef OPENSSL_NO_ECDSA
+        case TLSEXT_signature_ecdsa:
+            have_ecdsa = 1;
+            break;
 # endif
-};
-
-int tls12_get_req_sig_algs(SSL *s, unsigned char *p)
-{
-    size_t slen = sizeof(tls12_sigalgs);
-    if (p)
-        memcpy(p, tls12_sigalgs, slen);
-    return (int)slen;
+        }
+    }
+    /*
+     * Disable auth and static DH if we don't include any appropriate
+     * signature algorithms.
+     */
+    if (!have_rsa) {
+        c->mask_a |= SSL_aRSA;
+        c->mask_k |= SSL_kDHr | SSL_kECDHr;
+    }
+    if (!have_dsa) {
+        c->mask_a |= SSL_aDSS;
+        c->mask_k |= SSL_kDHd;
+    }
+    if (!have_ecdsa) {
+        c->mask_a |= SSL_aECDSA;
+        c->mask_k |= SSL_kECDHe;
+    }
+# ifndef OPENSSL_NO_KRB5
+    if (!kssl_tgt_is_available(s->kssl_ctx)) {
+        c->mask_a |= SSL_aKRB5;
+        c->mask_k |= SSL_kKRB5;
+    }
+# endif
+# ifndef OPENSSL_NO_PSK
+    /* with PSK there must be client callback set */
+    if (!s->psk_client_callback) {
+        c->mask_a |= SSL_aPSK;
+        c->mask_k |= SSL_kPSK;
+    }
+# endif                         /* OPENSSL_NO_PSK */
+# ifndef OPENSSL_NO_SRP
+    if (!(s->srp_ctx.srp_Mask & SSL_kSRP)) {
+        c->mask_a |= SSL_aSRP;
+        c->mask_k |= SSL_kSRP;
+    }
+# endif
+    c->valid = 1;
 }
 
 unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf,
-                                          unsigned char *limit)
+                                          unsigned char *limit, int *al)
 {
     int extdatalen = 0;
     unsigned char *orig = buf;
     unsigned char *ret = buf;
+# ifndef OPENSSL_NO_EC
+    /* See if we support any ECC ciphersuites */
+    int using_ecc = 0;
+    if (s->version >= TLS1_VERSION || SSL_IS_DTLS(s)) {
+        int i;
+        unsigned long alg_k, alg_a;
+        STACK_OF(SSL_CIPHER) *cipher_stack = SSL_get_ciphers(s);
+
+        for (i = 0; i < sk_SSL_CIPHER_num(cipher_stack); i++) {
+            SSL_CIPHER *c = sk_SSL_CIPHER_value(cipher_stack, i);
+
+            alg_k = c->algorithm_mkey;
+            alg_a = c->algorithm_auth;
+            if ((alg_k & (SSL_kEECDH | SSL_kECDHr | SSL_kECDHe)
+                 || (alg_a & SSL_aECDSA))) {
+                using_ecc = 1;
+                break;
+            }
+        }
+    }
+# endif
 
     /* don't add extensions for SSLv3 unless doing secure renegotiation */
     if (s->client_version == SSL3_VERSION && !s->s3->send_connection_binding)
@@ -466,50 +1346,53 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf,
 # endif
 
 # ifndef OPENSSL_NO_EC
-    if (s->tlsext_ecpointformatlist != NULL) {
+    if (using_ecc) {
         /*
          * Add TLS extension ECPointFormats to the ClientHello message
          */
         long lenmax;
+        const unsigned char *pcurves, *pformats;
+        size_t num_curves, num_formats, curves_list_len;
+
+        tls1_get_formatlist(s, &pformats, &num_formats);
 
         if ((lenmax = limit - ret - 5) < 0)
             return NULL;
-        if (s->tlsext_ecpointformatlist_length > (unsigned long)lenmax)
+        if (num_formats > (size_t)lenmax)
             return NULL;
-        if (s->tlsext_ecpointformatlist_length > 255) {
+        if (num_formats > 255) {
             SSLerr(SSL_F_SSL_ADD_CLIENTHELLO_TLSEXT, ERR_R_INTERNAL_ERROR);
             return NULL;
         }
 
         s2n(TLSEXT_TYPE_ec_point_formats, ret);
-        s2n(s->tlsext_ecpointformatlist_length + 1, ret);
-        *(ret++) = (unsigned char)s->tlsext_ecpointformatlist_length;
-        memcpy(ret, s->tlsext_ecpointformatlist,
-               s->tlsext_ecpointformatlist_length);
-        ret += s->tlsext_ecpointformatlist_length;
-    }
-    if (s->tlsext_ellipticcurvelist != NULL) {
+        /* The point format list has 1-byte length. */
+        s2n(num_formats + 1, ret);
+        *(ret++) = (unsigned char)num_formats;
+        memcpy(ret, pformats, num_formats);
+        ret += num_formats;
+
         /*
          * Add TLS extension EllipticCurves to the ClientHello message
          */
-        long lenmax;
+        pcurves = s->tlsext_ellipticcurvelist;
+        if (!tls1_get_curvelist(s, 0, &pcurves, &num_curves))
+            return NULL;
 
         if ((lenmax = limit - ret - 6) < 0)
             return NULL;
-        if (s->tlsext_ellipticcurvelist_length > (unsigned long)lenmax)
+        if (num_curves > (size_t)lenmax / 2)
             return NULL;
-        if (s->tlsext_ellipticcurvelist_length > 65532) {
+        if (num_curves > 65532 / 2) {
             SSLerr(SSL_F_SSL_ADD_CLIENTHELLO_TLSEXT, ERR_R_INTERNAL_ERROR);
             return NULL;
         }
-
+        curves_list_len = 2 * num_curves;
         s2n(TLSEXT_TYPE_elliptic_curves, ret);
-        s2n(s->tlsext_ellipticcurvelist_length + 2, ret);
-
-        s2n(s->tlsext_ellipticcurvelist_length, ret);
-        memcpy(ret, s->tlsext_ellipticcurvelist,
-               s->tlsext_ellipticcurvelist_length);
-        ret += s->tlsext_ellipticcurvelist_length;
+        s2n(curves_list_len + 2, ret);
+        s2n(curves_list_len, ret);
+        memcpy(ret, pcurves, curves_list_len);
+        ret += curves_list_len;
     }
 # endif                         /* OPENSSL_NO_EC */
 
@@ -546,17 +1429,20 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf,
     }
  skip_ext:
 
-    if (TLS1_get_client_version(s) >= TLS1_2_VERSION) {
-        if ((size_t)(limit - ret) < sizeof(tls12_sigalgs) + 6)
+    if (SSL_USE_SIGALGS(s)) {
+        size_t salglen;
+        const unsigned char *salg;
+        salglen = tls12_get_psigalgs(s, &salg);
+        if ((size_t)(limit - ret) < salglen + 6)
             return NULL;
         s2n(TLSEXT_TYPE_signature_algorithms, ret);
-        s2n(sizeof(tls12_sigalgs) + 2, ret);
-        s2n(sizeof(tls12_sigalgs), ret);
-        memcpy(ret, tls12_sigalgs, sizeof(tls12_sigalgs));
-        ret += sizeof(tls12_sigalgs);
+        s2n(salglen + 2, ret);
+        s2n(salglen, ret);
+        memcpy(ret, salg, salglen);
+        ret += salglen;
     }
 # ifdef TLSEXT_TYPE_opaque_prf_input
-    if (s->s3->client_opaque_prf_input != NULL && s->version != DTLS1_VERSION) {
+    if (s->s3->client_opaque_prf_input != NULL) {
         size_t col = s->s3->client_opaque_prf_input_len;
 
         if ((long)(limit - ret - 6 - col < 0))
@@ -572,8 +1458,7 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf,
     }
 # endif
 
-    if (s->tlsext_status_type == TLSEXT_STATUSTYPE_ocsp &&
-        s->version != DTLS1_VERSION) {
+    if (s->tlsext_status_type == TLSEXT_STATUSTYPE_ocsp) {
         int i;
         long extlen, idlen, itmp;
         OCSP_RESPID *id;
@@ -646,6 +1531,15 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf,
     }
 # endif
 
+    if (s->alpn_client_proto_list && !s->s3->tmp.finish_md_len) {
+        if ((size_t)(limit - ret) < 6 + s->alpn_client_proto_list_len)
+            return NULL;
+        s2n(TLSEXT_TYPE_application_layer_protocol_negotiation, ret);
+        s2n(2 + s->alpn_client_proto_list_len, ret);
+        s2n(s->alpn_client_proto_list_len, ret);
+        memcpy(ret, s->alpn_client_proto_list, s->alpn_client_proto_list_len);
+        ret += s->alpn_client_proto_list_len;
+    }
 # ifndef OPENSSL_NO_SRTP
     if (SSL_IS_DTLS(s) && SSL_get_srtp_profiles(s)) {
         int el;
@@ -665,6 +1559,11 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf,
         ret += el;
     }
 # endif
+    custom_ext_init(&s->cert->cli_ext);
+    /* Add custom TLS Extensions to ClientHello */
+    if (!custom_ext_add(s, 0, &ret, limit, al))
+        return NULL;
+
     /*
      * Add padding to workaround bugs in F5 terminators. See
      * https://tools.ietf.org/html/draft-agl-tls-padding-03 NB: because this
@@ -702,7 +1601,7 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf,
 }
 
 unsigned char *ssl_add_serverhello_tlsext(SSL *s, unsigned char *buf,
-                                          unsigned char *limit)
+                                          unsigned char *limit, int *al)
 {
     int extdatalen = 0;
     unsigned char *orig = buf;
@@ -710,7 +1609,13 @@ unsigned char *ssl_add_serverhello_tlsext(SSL *s, unsigned char *buf,
 # ifndef OPENSSL_NO_NEXTPROTONEG
     int next_proto_neg_seen;
 # endif
-
+# ifndef OPENSSL_NO_EC
+    unsigned long alg_k = s->s3->tmp.new_cipher->algorithm_mkey;
+    unsigned long alg_a = s->s3->tmp.new_cipher->algorithm_auth;
+    int using_ecc = (alg_k & (SSL_kEECDH | SSL_kECDHr | SSL_kECDHe))
+        || (alg_a & SSL_aECDSA);
+    using_ecc = using_ecc && (s->session->tlsext_ecpointformatlist != NULL);
+# endif
     /*
      * don't add extensions for SSLv3, unless doing secure renegotiation
      */
@@ -752,27 +1657,30 @@ unsigned char *ssl_add_serverhello_tlsext(SSL *s, unsigned char *buf,
         ret += el;
     }
 # ifndef OPENSSL_NO_EC
-    if (s->tlsext_ecpointformatlist != NULL) {
+    if (using_ecc) {
+        const unsigned char *plist;
+        size_t plistlen;
         /*
          * Add TLS extension ECPointFormats to the ServerHello message
          */
         long lenmax;
 
+        tls1_get_formatlist(s, &plist, &plistlen);
+
         if ((lenmax = limit - ret - 5) < 0)
             return NULL;
-        if (s->tlsext_ecpointformatlist_length > (unsigned long)lenmax)
+        if (plistlen > (size_t)lenmax)
             return NULL;
-        if (s->tlsext_ecpointformatlist_length > 255) {
+        if (plistlen > 255) {
             SSLerr(SSL_F_SSL_ADD_SERVERHELLO_TLSEXT, ERR_R_INTERNAL_ERROR);
             return NULL;
         }
 
         s2n(TLSEXT_TYPE_ec_point_formats, ret);
-        s2n(s->tlsext_ecpointformatlist_length + 1, ret);
-        *(ret++) = (unsigned char)s->tlsext_ecpointformatlist_length;
-        memcpy(ret, s->tlsext_ecpointformatlist,
-               s->tlsext_ecpointformatlist_length);
-        ret += s->tlsext_ecpointformatlist_length;
+        s2n(plistlen + 1, ret);
+        *(ret++) = (unsigned char)plistlen;
+        memcpy(ret, plist, plistlen);
+        ret += plistlen;
 
     }
     /*
@@ -795,7 +1703,7 @@ unsigned char *ssl_add_serverhello_tlsext(SSL *s, unsigned char *buf,
         s2n(0, ret);
     }
 # ifdef TLSEXT_TYPE_opaque_prf_input
-    if (s->s3->server_opaque_prf_input != NULL && s->version != DTLS1_VERSION) {
+    if (s->s3->server_opaque_prf_input != NULL) {
         size_t sol = s->s3->server_opaque_prf_input_len;
 
         if ((long)(limit - ret - 6 - sol) < 0)
@@ -890,6 +1798,22 @@ unsigned char *ssl_add_serverhello_tlsext(SSL *s, unsigned char *buf,
         }
     }
 # endif
+    if (!custom_ext_add(s, 1, &ret, limit, al))
+        return NULL;
+
+    if (s->s3->alpn_selected) {
+        const unsigned char *selected = s->s3->alpn_selected;
+        unsigned len = s->s3->alpn_selected_len;
+
+        if ((long)(limit - ret - 4 - 2 - 1 - len) < 0)
+            return NULL;
+        s2n(TLSEXT_TYPE_application_layer_protocol_negotiation, ret);
+        s2n(3 + len, ret);
+        s2n(1 + len, ret);
+        *ret++ = len;
+        memcpy(ret, selected, len);
+        ret += len;
+    }
 
     if ((extdatalen = ret - orig - 2) == 0)
         return orig;
@@ -981,15 +1905,82 @@ static void ssl_check_for_safari(SSL *s, const unsigned char *data,
 }
 # endif                         /* !OPENSSL_NO_EC */
 
-int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d,
-                                 int n, int *al)
+/*
+ * tls1_alpn_handle_client_hello is called to process the ALPN extension in a
+ * ClientHello.  data: the contents of the extension, not including the type
+ * and length.  data_len: the number of bytes in |data| al: a pointer to the
+ * alert value to send in the event of a non-zero return.  returns: 0 on
+ * success.
+ */
+static int tls1_alpn_handle_client_hello(SSL *s, const unsigned char *data,
+                                         unsigned data_len, int *al)
+{
+    unsigned i;
+    unsigned proto_len;
+    const unsigned char *selected;
+    unsigned char selected_len;
+    int r;
+
+    if (s->ctx->alpn_select_cb == NULL)
+        return 0;
+
+    if (data_len < 2)
+        goto parse_error;
+
+    /*
+     * data should contain a uint16 length followed by a series of 8-bit,
+     * length-prefixed strings.
+     */
+    i = ((unsigned)data[0]) << 8 | ((unsigned)data[1]);
+    data_len -= 2;
+    data += 2;
+    if (data_len != i)
+        goto parse_error;
+
+    if (data_len < 2)
+        goto parse_error;
+
+    for (i = 0; i < data_len;) {
+        proto_len = data[i];
+        i++;
+
+        if (proto_len == 0)
+            goto parse_error;
+
+        if (i + proto_len < i || i + proto_len > data_len)
+            goto parse_error;
+
+        i += proto_len;
+    }
+
+    r = s->ctx->alpn_select_cb(s, &selected, &selected_len, data, data_len,
+                               s->ctx->alpn_select_cb_arg);
+    if (r == SSL_TLSEXT_ERR_OK) {
+        if (s->s3->alpn_selected)
+            OPENSSL_free(s->s3->alpn_selected);
+        s->s3->alpn_selected = OPENSSL_malloc(selected_len);
+        if (!s->s3->alpn_selected) {
+            *al = SSL_AD_INTERNAL_ERROR;
+            return -1;
+        }
+        memcpy(s->s3->alpn_selected, selected, selected_len);
+        s->s3->alpn_selected_len = selected_len;
+    }
+    return 0;
+
+ parse_error:
+    *al = SSL_AD_DECODE_ERROR;
+    return -1;
+}
+
+static int ssl_scan_clienthello_tlsext(SSL *s, unsigned char **p,
+                                       unsigned char *d, int n, int *al)
 {
     unsigned short type;
     unsigned short size;
     unsigned short len;
     unsigned char *data = *p;
     int renegotiate_seen = 0;
-    int sigalg_seen = 0;
 
     s->servername_done = 0;
     s->tlsext_status_type = -1;
@@ -997,6 +1988,10 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d,
     s->s3->next_proto_neg_seen = 0;
 # endif
 
+    if (s->s3->alpn_selected) {
+        OPENSSL_free(s->s3->alpn_selected);
+        s->s3->alpn_selected = NULL;
+    }
 # ifndef OPENSSL_NO_HEARTBEATS
     s->tlsext_heartbeat &= ~(SSL_TLSEXT_HB_ENABLED |
                              SSL_TLSEXT_HB_DONT_SEND_REQUESTS);
@@ -1007,6 +2002,11 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d,
         ssl_check_for_safari(s, data, d, n);
 # endif                         /* !OPENSSL_NO_EC */
 
+    /* Clear any signature algorithms extension received */
+    if (s->cert->peer_sigalgs) {
+        OPENSSL_free(s->cert->peer_sigalgs);
+        s->cert->peer_sigalgs = NULL;
+    }
 # ifndef OPENSSL_NO_SRP
     if (s->srp_ctx.login != NULL) {
         OPENSSL_free(s->srp_ctx.login);
@@ -1149,7 +2149,8 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d,
             unsigned char *sdata = data;
             int ecpointformatlist_length = *(sdata++);
 
-            if (ecpointformatlist_length != size - 1)
+            if (ecpointformatlist_length != size - 1 ||
+                ecpointformatlist_length < 1)
                 goto err;
             if (!s->hit) {
                 if (s->session->tlsext_ecpointformatlist) {
@@ -1214,8 +2215,7 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d,
         }
 # endif                         /* OPENSSL_NO_EC */
 # ifdef TLSEXT_TYPE_opaque_prf_input
-        else if (type == TLSEXT_TYPE_opaque_prf_input &&
-                 s->version != DTLS1_VERSION) {
+        else if (type == TLSEXT_TYPE_opaque_prf_input) {
             unsigned char *sdata = data;
 
             if (size < 2) {
@@ -1259,17 +2259,15 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d,
             renegotiate_seen = 1;
         } else if (type == TLSEXT_TYPE_signature_algorithms) {
             int dsize;
-            if (sigalg_seen || size < 2)
+            if (s->cert->peer_sigalgs || size < 2)
                 goto err;
-            sigalg_seen = 1;
             n2s(data, dsize);
             size -= 2;
-            if (dsize != size || dsize & 1)
+            if (dsize != size || dsize & 1 || !dsize)
                 goto err;
-            if (!tls1_process_sigalgs(s, data, dsize))
+            if (!tls1_save_sigalgs(s, data, dsize))
                 goto err;
-        } else if (type == TLSEXT_TYPE_status_request &&
-                   s->version != DTLS1_VERSION) {
+        } else if (type == TLSEXT_TYPE_status_request) {
 
             if (size < 5)
                 goto err;
@@ -1361,7 +2359,8 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d,
 # endif
 # ifndef OPENSSL_NO_NEXTPROTONEG
         else if (type == TLSEXT_TYPE_next_proto_neg &&
-                 s->s3->tmp.finish_md_len == 0) {
+                 s->s3->tmp.finish_md_len == 0 &&
+                 s->s3->alpn_selected == NULL) {
             /*-
              * We shouldn't accept this extension on a
              * renegotiation.
@@ -1383,6 +2382,16 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d,
         }
 # endif
 
+        else if (type == TLSEXT_TYPE_application_layer_protocol_negotiation &&
+                 s->ctx->alpn_select_cb && s->s3->tmp.finish_md_len == 0) {
+            if (tls1_alpn_handle_client_hello(s, data, size, al) != 0)
+                return 0;
+# ifndef OPENSSL_NO_NEXTPROTONEG
+            /* ALPN takes precedence over NPN. */
+            s->s3->next_proto_neg_seen = 0;
+# endif
+        }
+
         /* session ticket processed earlier */
 # ifndef OPENSSL_NO_SRTP
         else if (SSL_IS_DTLS(s) && SSL_get_srtp_profiles(s)
@@ -1408,7 +2417,7 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d,
     if (!renegotiate_seen && s->renegotiate &&
         !(s->options & SSL_OP_ALLOW_UNSAFE_LEGACY_RENEGOTIATION)) {
         *al = SSL_AD_HANDSHAKE_FAILURE;
-        SSLerr(SSL_F_SSL_PARSE_CLIENTHELLO_TLSEXT,
+        SSLerr(SSL_F_SSL_SCAN_CLIENTHELLO_TLSEXT,
                SSL_R_UNSAFE_LEGACY_RENEGOTIATION_DISABLED);
         return 0;
     }
@@ -1419,6 +2428,73 @@ err:
     return 0;
 }
 
+/*
+ * Parse any custom extensions found.  "data" is the start of the extension data
+ * and "limit" is the end of the record. TODO: add strict syntax checking.
+ */
+
+static int ssl_scan_clienthello_custom_tlsext(SSL *s,
+                                              const unsigned char *data,
+                                              const unsigned char *limit,
+                                              int *al)
+{
+    unsigned short type, size, len;
+    /* If resumed session or no custom extensions nothing to do */
+    if (s->hit || s->cert->srv_ext.meths_count == 0)
+        return 1;
+
+    if (data >= limit - 2)
+        return 1;
+    n2s(data, len);
+
+    if (data > limit - len)
+        return 1;
+
+    while (data <= limit - 4) {
+        n2s(data, type);
+        n2s(data, size);
+
+        if (data + size > limit)
+            return 1;
+        if (custom_ext_parse(s, 1 /* server */ , type, data, size, al) <= 0)
+            return 0;
+
+        data += size;
+    }
+
+    return 1;
+}
+
+int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d,
+                                 int n)
+{
+    int al = -1;
+    unsigned char *ptmp = *p;
+    /*
+     * Internally supported extensions are parsed first so SNI can be handled
+     * before custom extensions. An application processing SNI will typically
+     * switch the parent context using SSL_set_SSL_CTX and custom extensions
+     * need to be handled by the new SSL_CTX structure.
+     */
+    if (ssl_scan_clienthello_tlsext(s, p, d, n, &al) <= 0) {
+        ssl3_send_alert(s, SSL3_AL_FATAL, al);
+        return 0;
+    }
+
+    if (ssl_check_clienthello_tlsext_early(s) <= 0) {
+        SSLerr(SSL_F_SSL_PARSE_CLIENTHELLO_TLSEXT, SSL_R_CLIENTHELLO_TLSEXT);
+        return 0;
+    }
+
+    custom_ext_init(&s->cert->srv_ext);
+    if (ssl_scan_clienthello_custom_tlsext(s, ptmp, d + n, &al) <= 0) {
+        ssl3_send_alert(s, SSL3_AL_FATAL, al);
+        return 0;
+    }
+
+    return 1;
+}
+
 # ifndef OPENSSL_NO_NEXTPROTONEG
 /*
  * ssl_next_proto_validate validates a Next Protocol Negotiation block. No
@@ -1440,8 +2516,8 @@ static char ssl_next_proto_validate(unsigned char *d, unsigned len)
 }
 # endif
 
-int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **p, unsigned char *d,
-                                 int n, int *al)
+static int ssl_scan_serverhello_tlsext(SSL *s, unsigned char **p,
+                                       unsigned char *d, int n, int *al)
 {
     unsigned short length;
     unsigned short type;
@@ -1455,6 +2531,10 @@ int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **p, unsigned char *d,
 # endif
     s->tlsext_ticket_expected = 0;
 
+    if (s->s3->alpn_selected) {
+        OPENSSL_free(s->s3->alpn_selected);
+        s->s3->alpn_selected = NULL;
+    }
 # ifndef OPENSSL_NO_HEARTBEATS
     s->tlsext_heartbeat &= ~(SSL_TLSEXT_HB_ENABLED |
                              SSL_TLSEXT_HB_DONT_SEND_REQUESTS);
@@ -1491,8 +2571,7 @@ int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **p, unsigned char *d,
             unsigned char *sdata = data;
             int ecpointformatlist_length = *(sdata++);
 
-            if (ecpointformatlist_length != size - 1 ||
-                ecpointformatlist_length < 1) {
+            if (ecpointformatlist_length != size - 1) {
                 *al = TLS1_AD_DECODE_ERROR;
                 return 0;
             }
@@ -1537,8 +2616,7 @@ int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **p, unsigned char *d,
             s->tlsext_ticket_expected = 1;
         }
 # ifdef TLSEXT_TYPE_opaque_prf_input
-        else if (type == TLSEXT_TYPE_opaque_prf_input &&
-                 s->version != DTLS1_VERSION) {
+        else if (type == TLSEXT_TYPE_opaque_prf_input) {
             unsigned char *sdata = data;
 
             if (size < 2) {
@@ -1569,8 +2647,7 @@ int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **p, unsigned char *d,
             }
         }
 # endif
-        else if (type == TLSEXT_TYPE_status_request &&
-                 s->version != DTLS1_VERSION) {
+        else if (type == TLSEXT_TYPE_status_request) {
             /*
              * MUST be empty and only sent if we've requested a status
              * request message.
@@ -1616,6 +2693,48 @@ int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **p, unsigned char *d,
             s->s3->next_proto_neg_seen = 1;
         }
 # endif
+
+        else if (type == TLSEXT_TYPE_application_layer_protocol_negotiation) {
+            unsigned len;
+
+            /* We must have requested it. */
+            if (s->alpn_client_proto_list == NULL) {
+                *al = TLS1_AD_UNSUPPORTED_EXTENSION;
+                return 0;
+            }
+            if (size < 4) {
+                *al = TLS1_AD_DECODE_ERROR;
+                return 0;
+            }
+            /*-
+             * The extension data consists of:
+             *   uint16 list_length
+             *   uint8 proto_length;
+             *   uint8 proto[proto_length];
+             */
+            len = data[0];
+            len <<= 8;
+            len |= data[1];
+            if (len != (unsigned)size - 2) {
+                *al = TLS1_AD_DECODE_ERROR;
+                return 0;
+            }
+            len = data[2];
+            if (len != (unsigned)size - 3) {
+                *al = TLS1_AD_DECODE_ERROR;
+                return 0;
+            }
+            if (s->s3->alpn_selected)
+                OPENSSL_free(s->s3->alpn_selected);
+            s->s3->alpn_selected = OPENSSL_malloc(len);
+            if (!s->s3->alpn_selected) {
+                *al = TLS1_AD_INTERNAL_ERROR;
+                return 0;
+            }
+            memcpy(s->s3->alpn_selected, data + 3, len);
+            s->s3->alpn_selected_len = len;
+        }
+
         else if (type == TLSEXT_TYPE_renegotiate) {
             if (!ssl_parse_serverhello_renegotiate_ext(s, data, size, al))
                 return 0;
@@ -1643,6 +2762,12 @@ int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **p, unsigned char *d,
                 return 0;
         }
 # endif
+        /*
+         * If this extension type was not otherwise handled, but matches a
+         * custom_cli_ext_record, then send it to the c callback
+         */
+        else if (custom_ext_parse(s, 0, type, data, size, al) <= 0)
+            return 0;
 
         data += size;
     }
@@ -1682,7 +2807,7 @@ int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **p, unsigned char *d,
     if (!renegotiate_seen && !(s->options & SSL_OP_LEGACY_SERVER_CONNECT)
         && !(s->options & SSL_OP_ALLOW_UNSAFE_LEGACY_RENEGOTIATION)) {
         *al = SSL_AD_HANDSHAKE_FAILURE;
-        SSLerr(SSL_F_SSL_PARSE_SERVERHELLO_TLSEXT,
+        SSLerr(SSL_F_SSL_SCAN_SERVERHELLO_TLSEXT,
                SSL_R_UNSAFE_LEGACY_RENEGOTIATION_DISABLED);
         return 0;
     }
@@ -1692,63 +2817,6 @@ int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **p, unsigned char *d,
 
 int ssl_prepare_clienthello_tlsext(SSL *s)
 {
-# ifndef OPENSSL_NO_EC
-    /*
-     * If we are client and using an elliptic curve cryptography cipher
-     * suite, send the point formats and elliptic curves we support.
-     */
-    int using_ecc = 0;
-    int i;
-    unsigned char *j;
-    unsigned long alg_k, alg_a;
-    STACK_OF(SSL_CIPHER) *cipher_stack = SSL_get_ciphers(s);
-
-    for (i = 0; i < sk_SSL_CIPHER_num(cipher_stack); i++) {
-        SSL_CIPHER *c = sk_SSL_CIPHER_value(cipher_stack, i);
-
-        alg_k = c->algorithm_mkey;
-        alg_a = c->algorithm_auth;
-        if ((alg_k & (SSL_kEECDH | SSL_kECDHr | SSL_kECDHe)
-             || (alg_a & SSL_aECDSA))) {
-            using_ecc = 1;
-            break;
-        }
-    }
-    using_ecc = using_ecc && (s->version >= TLS1_VERSION);
-    if (using_ecc) {
-        if (s->tlsext_ecpointformatlist != NULL)
-            OPENSSL_free(s->tlsext_ecpointformatlist);
-        if ((s->tlsext_ecpointformatlist = OPENSSL_malloc(3)) == NULL) {
-            SSLerr(SSL_F_SSL_PREPARE_CLIENTHELLO_TLSEXT,
-                   ERR_R_MALLOC_FAILURE);
-            return -1;
-        }
-        s->tlsext_ecpointformatlist_length = 3;
-        s->tlsext_ecpointformatlist[0] = TLSEXT_ECPOINTFORMAT_uncompressed;
-        s->tlsext_ecpointformatlist[1] =
-            TLSEXT_ECPOINTFORMAT_ansiX962_compressed_prime;
-        s->tlsext_ecpointformatlist[2] =
-            TLSEXT_ECPOINTFORMAT_ansiX962_compressed_char2;
-
-        /* we support all named elliptic curves in RFC 4492 */
-        if (s->tlsext_ellipticcurvelist != NULL)
-            OPENSSL_free(s->tlsext_ellipticcurvelist);
-        s->tlsext_ellipticcurvelist_length =
-            sizeof(pref_list) / sizeof(pref_list[0]) * 2;
-        if ((s->tlsext_ellipticcurvelist =
-             OPENSSL_malloc(s->tlsext_ellipticcurvelist_length)) == NULL) {
-            s->tlsext_ellipticcurvelist_length = 0;
-            SSLerr(SSL_F_SSL_PREPARE_CLIENTHELLO_TLSEXT,
-                   ERR_R_MALLOC_FAILURE);
-            return -1;
-        }
-        for (i = 0, j = s->tlsext_ellipticcurvelist; (unsigned int)i <
-             sizeof(pref_list) / sizeof(pref_list[0]); i++) {
-            int id = tls1_ec_nid2curve_id(pref_list[i]);
-            s2n(id, j);
-        }
-    }
-# endif                         /* OPENSSL_NO_EC */
 
 # ifdef TLSEXT_TYPE_opaque_prf_input
     {
@@ -1800,40 +2868,10 @@ int ssl_prepare_clienthello_tlsext(SSL *s)
 
 int ssl_prepare_serverhello_tlsext(SSL *s)
 {
-# ifndef OPENSSL_NO_EC
-    /*
-     * If we are server and using an ECC cipher suite, send the point formats
-     * we support if the client sent us an ECPointsFormat extension.  Note
-     * that the server is not supposed to send an EllipticCurves extension.
-     */
-
-    unsigned long alg_k = s->s3->tmp.new_cipher->algorithm_mkey;
-    unsigned long alg_a = s->s3->tmp.new_cipher->algorithm_auth;
-    int using_ecc = (alg_k & (SSL_kEECDH | SSL_kECDHr | SSL_kECDHe))
-        || (alg_a & SSL_aECDSA);
-    using_ecc = using_ecc && (s->session->tlsext_ecpointformatlist != NULL);
-
-    if (using_ecc) {
-        if (s->tlsext_ecpointformatlist != NULL)
-            OPENSSL_free(s->tlsext_ecpointformatlist);
-        if ((s->tlsext_ecpointformatlist = OPENSSL_malloc(3)) == NULL) {
-            SSLerr(SSL_F_SSL_PREPARE_SERVERHELLO_TLSEXT,
-                   ERR_R_MALLOC_FAILURE);
-            return -1;
-        }
-        s->tlsext_ecpointformatlist_length = 3;
-        s->tlsext_ecpointformatlist[0] = TLSEXT_ECPOINTFORMAT_uncompressed;
-        s->tlsext_ecpointformatlist[1] =
-            TLSEXT_ECPOINTFORMAT_ansiX962_compressed_prime;
-        s->tlsext_ecpointformatlist[2] =
-            TLSEXT_ECPOINTFORMAT_ansiX962_compressed_char2;
-    }
-# endif                         /* OPENSSL_NO_EC */
-
     return 1;
 }
 
-int ssl_check_clienthello_tlsext_early(SSL *s)
+static int ssl_check_clienthello_tlsext_early(SSL *s)
 {
     int ret = SSL_TLSEXT_ERR_NOACK;
     int al = SSL_AD_UNRECOGNIZED_NAME;
@@ -1943,6 +2981,44 @@ int ssl_check_clienthello_tlsext_early(SSL *s)
     }
 }
 
+int tls1_set_server_sigalgs(SSL *s)
+{
+    int al;
+    size_t i;
+    /* Clear any shared sigtnature algorithms */
+    if (s->cert->shared_sigalgs) {
+        OPENSSL_free(s->cert->shared_sigalgs);
+        s->cert->shared_sigalgs = NULL;
+        s->cert->shared_sigalgslen = 0;
+    }
+    /* Clear certificate digests and validity flags */
+    for (i = 0; i < SSL_PKEY_NUM; i++) {
+        s->cert->pkeys[i].digest = NULL;
+        s->cert->pkeys[i].valid_flags = 0;
+    }
+
+    /* If sigalgs received process it. */
+    if (s->cert->peer_sigalgs) {
+        if (!tls1_process_sigalgs(s)) {
+            SSLerr(SSL_F_TLS1_SET_SERVER_SIGALGS, ERR_R_MALLOC_FAILURE);
+            al = SSL_AD_INTERNAL_ERROR;
+            goto err;
+        }
+        /* Fatal error is no shared signature algorithms */
+        if (!s->cert->shared_sigalgs) {
+            SSLerr(SSL_F_TLS1_SET_SERVER_SIGALGS,
+                   SSL_R_NO_SHARED_SIGATURE_ALGORITHMS);
+            al = SSL_AD_ILLEGAL_PARAMETER;
+            goto err;
+        }
+    } else
+        ssl_cert_set_default_md(s->cert);
+    return 1;
+ err:
+    ssl3_send_alert(s, SSL3_AL_FATAL, al);
+    return 0;
+}
+
 int ssl_check_clienthello_tlsext_late(SSL *s)
 {
     int ret = SSL_TLSEXT_ERR_OK;
@@ -1950,9 +3026,9 @@ int ssl_check_clienthello_tlsext_late(SSL *s)
 
     /*
      * If status request then ask callback what to do. Note: this must be
-     * called after servername callbacks in case the certificate has
-     * changed, and must be called after the cipher has been chosen because
-     * this may influence which certificate is sent
+     * called after servername callbacks in case the certificate has changed,
+     * and must be called after the cipher has been chosen because this may
+     * influence which certificate is sent
      */
     if ((s->tlsext_status_type != -1) && s->ctx && s->ctx->tlsext_status_cb) {
         int r;
@@ -2124,6 +3200,24 @@ int ssl_check_serverhello_tlsext(SSL *s)
     }
 }
 
+int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **p, unsigned char *d,
+                                 int n)
+{
+    int al = -1;
+    if (s->version < SSL3_VERSION)
+        return 1;
+    if (ssl_scan_serverhello_tlsext(s, p, d, n, &al) <= 0) {
+        ssl3_send_alert(s, SSL3_AL_FATAL, al);
+        return 0;
+    }
+
+    if (ssl_check_serverhello_tlsext(s) <= 0) {
+        SSLerr(SSL_F_SSL_PARSE_SERVERHELLO_TLSEXT, SSL_R_SERVERHELLO_TLSEXT);
+        return 0;
+    }
+    return 1;
+}
+
 /*-
  * Since the server cache lookup is done early on in the processing of the
  * ClientHello, and other operations depend on the result, we need to handle
@@ -2179,7 +3273,7 @@ int tls1_process_ticket(SSL *s, unsigned char *session_id, int len,
     if (p >= limit)
         return -1;
     /* Skip past DTLS cookie */
-    if (s->version == DTLS1_VERSION || s->version == DTLS1_BAD_VER) {
+    if (SSL_IS_DTLS(s)) {
         i = *(p++);
         p += i;
         if (p >= limit)
@@ -2366,32 +3460,18 @@ typedef struct {
 } tls12_lookup;
 
 static tls12_lookup tls12_md[] = {
-# ifndef OPENSSL_NO_MD5
     {NID_md5, TLSEXT_hash_md5},
-# endif
-# ifndef OPENSSL_NO_SHA
     {NID_sha1, TLSEXT_hash_sha1},
-# endif
-# ifndef OPENSSL_NO_SHA256
     {NID_sha224, TLSEXT_hash_sha224},
     {NID_sha256, TLSEXT_hash_sha256},
-# endif
-# ifndef OPENSSL_NO_SHA512
     {NID_sha384, TLSEXT_hash_sha384},
     {NID_sha512, TLSEXT_hash_sha512}
-# endif
 };
 
 static tls12_lookup tls12_sig[] = {
-# ifndef OPENSSL_NO_RSA
     {EVP_PKEY_RSA, TLSEXT_signature_rsa},
-# endif
-# ifndef OPENSSL_NO_DSA
     {EVP_PKEY_DSA, TLSEXT_signature_dsa},
-# endif
-# ifndef OPENSSL_NO_ECDSA
     {EVP_PKEY_EC, TLSEXT_signature_ecdsa}
-# endif
 };
 
 static int tls12_find_id(int nid, tls12_lookup *table, size_t tlen)
@@ -2404,17 +3484,15 @@ static int tls12_find_id(int nid, tls12_lookup *table, size_t tlen)
     return -1;
 }
 
-# if 0
 static int tls12_find_nid(int id, tls12_lookup *table, size_t tlen)
 {
     size_t i;
     for (i = 0; i < tlen; i++) {
-        if (table[i].id == id)
+        if ((table[i].id) == id)
             return table[i].nid;
     }
-    return -1;
+    return NID_undef;
 }
-# endif
 
 int tls12_get_sigandhash(unsigned char *p, const EVP_PKEY *pk,
                          const EVP_MD *md)
@@ -2443,6 +3521,14 @@ int tls12_get_sigid(const EVP_PKEY *pk)
 const EVP_MD *tls12_get_hash(unsigned char hash_alg)
 {
     switch (hash_alg) {
+# ifndef OPENSSL_NO_MD5
+    case TLSEXT_hash_md5:
+#  ifdef OPENSSL_FIPS
+        if (FIPS_mode())
+            return NULL;
+#  endif
+        return EVP_md5();
+# endif
 # ifndef OPENSSL_NO_SHA
     case TLSEXT_hash_sha1:
         return EVP_sha1();
@@ -2467,83 +3553,274 @@ const EVP_MD *tls12_get_hash(unsigned char hash_alg)
     }
 }
 
+static int tls12_get_pkey_idx(unsigned char sig_alg)
+{
+    switch (sig_alg) {
+# ifndef OPENSSL_NO_RSA
+    case TLSEXT_signature_rsa:
+        return SSL_PKEY_RSA_SIGN;
+# endif
+# ifndef OPENSSL_NO_DSA
+    case TLSEXT_signature_dsa:
+        return SSL_PKEY_DSA_SIGN;
+# endif
+# ifndef OPENSSL_NO_ECDSA
+    case TLSEXT_signature_ecdsa:
+        return SSL_PKEY_ECC;
+# endif
+    }
+    return -1;
+}
+
+/* Convert TLS 1.2 signature algorithm extension values into NIDs */
+static void tls1_lookup_sigalg(int *phash_nid, int *psign_nid,
+                               int *psignhash_nid, const unsigned char *data)
+{
+    int sign_nid = 0, hash_nid = 0;
+    if (!phash_nid && !psign_nid && !psignhash_nid)
+        return;
+    if (phash_nid || psignhash_nid) {
+        hash_nid = tls12_find_nid(data[0], tls12_md,
+                                  sizeof(tls12_md) / sizeof(tls12_lookup));
+        if (phash_nid)
+            *phash_nid = hash_nid;
+    }
+    if (psign_nid || psignhash_nid) {
+        sign_nid = tls12_find_nid(data[1], tls12_sig,
+                                  sizeof(tls12_sig) / sizeof(tls12_lookup));
+        if (psign_nid)
+            *psign_nid = sign_nid;
+    }
+    if (psignhash_nid) {
+        if (sign_nid && hash_nid)
+            OBJ_find_sigid_by_algs(psignhash_nid, hash_nid, sign_nid);
+        else
+            *psignhash_nid = NID_undef;
+    }
+}
+
+/* Given preference and allowed sigalgs set shared sigalgs */
+static int tls12_do_shared_sigalgs(TLS_SIGALGS *shsig,
+                                   const unsigned char *pref, size_t preflen,
+                                   const unsigned char *allow,
+                                   size_t allowlen)
+{
+    const unsigned char *ptmp, *atmp;
+    size_t i, j, nmatch = 0;
+    for (i = 0, ptmp = pref; i < preflen; i += 2, ptmp += 2) {
+        /* Skip disabled hashes or signature algorithms */
+        if (tls12_get_hash(ptmp[0]) == NULL)
+            continue;
+        if (tls12_get_pkey_idx(ptmp[1]) == -1)
+            continue;
+        for (j = 0, atmp = allow; j < allowlen; j += 2, atmp += 2) {
+            if (ptmp[0] == atmp[0] && ptmp[1] == atmp[1]) {
+                nmatch++;
+                if (shsig) {
+                    shsig->rhash = ptmp[0];
+                    shsig->rsign = ptmp[1];
+                    tls1_lookup_sigalg(&shsig->hash_nid,
+                                       &shsig->sign_nid,
+                                       &shsig->signandhash_nid, ptmp);
+                    shsig++;
+                }
+                break;
+            }
+        }
+    }
+    return nmatch;
+}
+
+/* Set shared signature algorithms for SSL structures */
+static int tls1_set_shared_sigalgs(SSL *s)
+{
+    const unsigned char *pref, *allow, *conf;
+    size_t preflen, allowlen, conflen;
+    size_t nmatch;
+    TLS_SIGALGS *salgs = NULL;
+    CERT *c = s->cert;
+    unsigned int is_suiteb = tls1_suiteb(s);
+    if (c->shared_sigalgs) {
+        OPENSSL_free(c->shared_sigalgs);
+        c->shared_sigalgs = NULL;
+        c->shared_sigalgslen = 0;
+    }
+    /* If client use client signature algorithms if not NULL */
+    if (!s->server && c->client_sigalgs && !is_suiteb) {
+        conf = c->client_sigalgs;
+        conflen = c->client_sigalgslen;
+    } else if (c->conf_sigalgs && !is_suiteb) {
+        conf = c->conf_sigalgs;
+        conflen = c->conf_sigalgslen;
+    } else
+        conflen = tls12_get_psigalgs(s, &conf);
+    if (s->options & SSL_OP_CIPHER_SERVER_PREFERENCE || is_suiteb) {
+        pref = conf;
+        preflen = conflen;
+        allow = c->peer_sigalgs;
+        allowlen = c->peer_sigalgslen;
+    } else {
+        allow = conf;
+        allowlen = conflen;
+        pref = c->peer_sigalgs;
+        preflen = c->peer_sigalgslen;
+    }
+    nmatch = tls12_do_shared_sigalgs(NULL, pref, preflen, allow, allowlen);
+    if (nmatch) {
+        salgs = OPENSSL_malloc(nmatch * sizeof(TLS_SIGALGS));
+        if (!salgs)
+            return 0;
+        nmatch = tls12_do_shared_sigalgs(salgs, pref, preflen, allow, allowlen);
+    } else {
+        salgs = NULL;
+    }
+    c->shared_sigalgs = salgs;
+    c->shared_sigalgslen = nmatch;
+    return 1;
+}
+
 /* Set preferred digest for each key type */
 
-int tls1_process_sigalgs(SSL *s, const unsigned char *data, int dsize)
+int tls1_save_sigalgs(SSL *s, const unsigned char *data, int dsize)
 {
-    int i, idx;
-    const EVP_MD *md;
     CERT *c = s->cert;
-    /* Extension ignored for TLS versions below 1.2 */
-    if (TLS1_get_version(s) < TLS1_2_VERSION)
+    /* Extension ignored for inappropriate versions */
+    if (!SSL_USE_SIGALGS(s))
         return 1;
     /* Should never happen */
     if (!c)
         return 0;
 
-    c->pkeys[SSL_PKEY_DSA_SIGN].digest = NULL;
-    c->pkeys[SSL_PKEY_RSA_SIGN].digest = NULL;
-    c->pkeys[SSL_PKEY_RSA_ENC].digest = NULL;
-    c->pkeys[SSL_PKEY_ECC].digest = NULL;
+    if (c->peer_sigalgs)
+        OPENSSL_free(c->peer_sigalgs);
+    c->peer_sigalgs = OPENSSL_malloc(dsize);
+    if (!c->peer_sigalgs)
+        return 0;
+    c->peer_sigalgslen = dsize;
+    memcpy(c->peer_sigalgs, data, dsize);
+    return 1;
+}
 
-    for (i = 0; i < dsize; i += 2) {
-        unsigned char hash_alg = data[i], sig_alg = data[i + 1];
+int tls1_process_sigalgs(SSL *s)
+{
+    int idx;
+    size_t i;
+    const EVP_MD *md;
+    CERT *c = s->cert;
+    TLS_SIGALGS *sigptr;
+    if (!tls1_set_shared_sigalgs(s))
+        return 0;
 
-        switch (sig_alg) {
-# ifndef OPENSSL_NO_RSA
-        case TLSEXT_signature_rsa:
-            idx = SSL_PKEY_RSA_SIGN;
-            break;
-# endif
-# ifndef OPENSSL_NO_DSA
-        case TLSEXT_signature_dsa:
-            idx = SSL_PKEY_DSA_SIGN;
-            break;
-# endif
-# ifndef OPENSSL_NO_ECDSA
-        case TLSEXT_signature_ecdsa:
-            idx = SSL_PKEY_ECC;
-            break;
-# endif
-        default:
-            continue;
+# ifdef OPENSSL_SSL_DEBUG_BROKEN_PROTOCOL
+    if (s->cert->cert_flags & SSL_CERT_FLAG_BROKEN_PROTOCOL) {
+        /*
+         * Use first set signature preference to force message digest,
+         * ignoring any peer preferences.
+         */
+        const unsigned char *sigs = NULL;
+        if (s->server)
+            sigs = c->conf_sigalgs;
+        else
+            sigs = c->client_sigalgs;
+        if (sigs) {
+            idx = tls12_get_pkey_idx(sigs[1]);
+            md = tls12_get_hash(sigs[0]);
+            c->pkeys[idx].digest = md;
+            c->pkeys[idx].valid_flags = CERT_PKEY_EXPLICIT_SIGN;
+            if (idx == SSL_PKEY_RSA_SIGN) {
+                c->pkeys[SSL_PKEY_RSA_ENC].valid_flags =
+                    CERT_PKEY_EXPLICIT_SIGN;
+                c->pkeys[SSL_PKEY_RSA_ENC].digest = md;
+            }
         }
+    }
+# endif
 
-        if (c->pkeys[idx].digest == NULL) {
-            md = tls12_get_hash(hash_alg);
-            if (md) {
-                c->pkeys[idx].digest = md;
-                if (idx == SSL_PKEY_RSA_SIGN)
-                    c->pkeys[SSL_PKEY_RSA_ENC].digest = md;
+    for (i = 0, sigptr = c->shared_sigalgs;
+         i < c->shared_sigalgslen; i++, sigptr++) {
+        idx = tls12_get_pkey_idx(sigptr->rsign);
+        if (idx > 0 && c->pkeys[idx].digest == NULL) {
+            md = tls12_get_hash(sigptr->rhash);
+            c->pkeys[idx].digest = md;
+            c->pkeys[idx].valid_flags = CERT_PKEY_EXPLICIT_SIGN;
+            if (idx == SSL_PKEY_RSA_SIGN) {
+                c->pkeys[SSL_PKEY_RSA_ENC].valid_flags =
+                    CERT_PKEY_EXPLICIT_SIGN;
+                c->pkeys[SSL_PKEY_RSA_ENC].digest = md;
             }
         }
 
     }
-
     /*
-     * Set any remaining keys to default values. NOTE: if alg is not
-     * supported it stays as NULL.
+     * In strict mode leave unset digests as NULL to indicate we can't use
+     * the certificate for signing.
      */
+    if (!(s->cert->cert_flags & SSL_CERT_FLAGS_CHECK_TLS_STRICT)) {
+        /*
+         * Set any remaining keys to default values. NOTE: if alg is not
+         * supported it stays as NULL.
+         */
 # ifndef OPENSSL_NO_DSA
-    if (!c->pkeys[SSL_PKEY_DSA_SIGN].digest)
-        c->pkeys[SSL_PKEY_DSA_SIGN].digest = EVP_sha1();
+        if (!c->pkeys[SSL_PKEY_DSA_SIGN].digest)
+            c->pkeys[SSL_PKEY_DSA_SIGN].digest = EVP_sha1();
 # endif
 # ifndef OPENSSL_NO_RSA
-    if (!c->pkeys[SSL_PKEY_RSA_SIGN].digest) {
-        c->pkeys[SSL_PKEY_RSA_SIGN].digest = EVP_sha1();
-        c->pkeys[SSL_PKEY_RSA_ENC].digest = EVP_sha1();
-    }
+        if (!c->pkeys[SSL_PKEY_RSA_SIGN].digest) {
+            c->pkeys[SSL_PKEY_RSA_SIGN].digest = EVP_sha1();
+            c->pkeys[SSL_PKEY_RSA_ENC].digest = EVP_sha1();
+        }
 # endif
 # ifndef OPENSSL_NO_ECDSA
-    if (!c->pkeys[SSL_PKEY_ECC].digest)
-        c->pkeys[SSL_PKEY_ECC].digest = EVP_sha1();
+        if (!c->pkeys[SSL_PKEY_ECC].digest)
+            c->pkeys[SSL_PKEY_ECC].digest = EVP_sha1();
 # endif
+    }
     return 1;
 }
 
-#endif
+int SSL_get_sigalgs(SSL *s, int idx,
+                    int *psign, int *phash, int *psignhash,
+                    unsigned char *rsig, unsigned char *rhash)
+{
+    const unsigned char *psig = s->cert->peer_sigalgs;
+    if (psig == NULL)
+        return 0;
+    if (idx >= 0) {
+        idx <<= 1;
+        if (idx >= (int)s->cert->peer_sigalgslen)
+            return 0;
+        psig += idx;
+        if (rhash)
+            *rhash = psig[0];
+        if (rsig)
+            *rsig = psig[1];
+        tls1_lookup_sigalg(phash, psign, psignhash, psig);
+    }
+    return s->cert->peer_sigalgslen / 2;
+}
+
+int SSL_get_shared_sigalgs(SSL *s, int idx,
+                           int *psign, int *phash, int *psignhash,
+                           unsigned char *rsig, unsigned char *rhash)
+{
+    TLS_SIGALGS *shsigalgs = s->cert->shared_sigalgs;
+    if (!shsigalgs || idx >= (int)s->cert->shared_sigalgslen)
+        return 0;
+    shsigalgs += idx;
+    if (phash)
+        *phash = shsigalgs->hash_nid;
+    if (psign)
+        *psign = shsigalgs->sign_nid;
+    if (psignhash)
+        *psignhash = shsigalgs->signandhash_nid;
+    if (rsig)
+        *rsig = shsigalgs->rsign;
+    if (rhash)
+        *rhash = shsigalgs->rhash;
+    return s->cert->shared_sigalgslen;
+}
 
-#ifndef OPENSSL_NO_HEARTBEATS
+# ifndef OPENSSL_NO_HEARTBEATS
 int tls1_process_heartbeat(SSL *s)
 {
     unsigned char *p = &s->s3->rrec.data[0], *pl;
@@ -2694,4 +3971,426 @@ err:
 
     return ret;
 }
+# endif
+
+# define MAX_SIGALGLEN   (TLSEXT_hash_num * TLSEXT_signature_num * 2)
+
+typedef struct {
+    size_t sigalgcnt;
+    int sigalgs[MAX_SIGALGLEN];
+} sig_cb_st;
+
+static int sig_cb(const char *elem, int len, void *arg)
+{
+    sig_cb_st *sarg = arg;
+    size_t i;
+    char etmp[20], *p;
+    int sig_alg, hash_alg;
+    if (elem == NULL)
+        return 0;
+    if (sarg->sigalgcnt == MAX_SIGALGLEN)
+        return 0;
+    if (len > (int)(sizeof(etmp) - 1))
+        return 0;
+    memcpy(etmp, elem, len);
+    etmp[len] = 0;
+    p = strchr(etmp, '+');
+    if (!p)
+        return 0;
+    *p = 0;
+    p++;
+    if (!*p)
+        return 0;
+
+    if (!strcmp(etmp, "RSA"))
+        sig_alg = EVP_PKEY_RSA;
+    else if (!strcmp(etmp, "DSA"))
+        sig_alg = EVP_PKEY_DSA;
+    else if (!strcmp(etmp, "ECDSA"))
+        sig_alg = EVP_PKEY_EC;
+    else
+        return 0;
+
+    hash_alg = OBJ_sn2nid(p);
+    if (hash_alg == NID_undef)
+        hash_alg = OBJ_ln2nid(p);
+    if (hash_alg == NID_undef)
+        return 0;
+
+    for (i = 0; i < sarg->sigalgcnt; i += 2) {
+        if (sarg->sigalgs[i] == sig_alg && sarg->sigalgs[i + 1] == hash_alg)
+            return 0;
+    }
+    sarg->sigalgs[sarg->sigalgcnt++] = hash_alg;
+    sarg->sigalgs[sarg->sigalgcnt++] = sig_alg;
+    return 1;
+}
+
+/*
+ * Set suppored signature algorithms based on a colon separated list of the
+ * form sig+hash e.g. RSA+SHA512:DSA+SHA512
+ */
+int tls1_set_sigalgs_list(CERT *c, const char *str, int client)
+{
+    sig_cb_st sig;
+    sig.sigalgcnt = 0;
+    if (!CONF_parse_list(str, ':', 1, sig_cb, &sig))
+        return 0;
+    if (c == NULL)
+        return 1;
+    return tls1_set_sigalgs(c, sig.sigalgs, sig.sigalgcnt, client);
+}
+
+int tls1_set_sigalgs(CERT *c, const int *psig_nids, size_t salglen,
+                     int client)
+{
+    unsigned char *sigalgs, *sptr;
+    int rhash, rsign;
+    size_t i;
+    if (salglen & 1)
+        return 0;
+    sigalgs = OPENSSL_malloc(salglen);
+    if (sigalgs == NULL)
+        return 0;
+    for (i = 0, sptr = sigalgs; i < salglen; i += 2) {
+        rhash = tls12_find_id(*psig_nids++, tls12_md,
+                              sizeof(tls12_md) / sizeof(tls12_lookup));
+        rsign = tls12_find_id(*psig_nids++, tls12_sig,
+                              sizeof(tls12_sig) / sizeof(tls12_lookup));
+
+        if (rhash == -1 || rsign == -1)
+            goto err;
+        *sptr++ = rhash;
+        *sptr++ = rsign;
+    }
+
+    if (client) {
+        if (c->client_sigalgs)
+            OPENSSL_free(c->client_sigalgs);
+        c->client_sigalgs = sigalgs;
+        c->client_sigalgslen = salglen;
+    } else {
+        if (c->conf_sigalgs)
+            OPENSSL_free(c->conf_sigalgs);
+        c->conf_sigalgs = sigalgs;
+        c->conf_sigalgslen = salglen;
+    }
+
+    return 1;
+
+ err:
+    OPENSSL_free(sigalgs);
+    return 0;
+}
+
+static int tls1_check_sig_alg(CERT *c, X509 *x, int default_nid)
+{
+    int sig_nid;
+    size_t i;
+    if (default_nid == -1)
+        return 1;
+    sig_nid = X509_get_signature_nid(x);
+    if (default_nid)
+        return sig_nid == default_nid ? 1 : 0;
+    for (i = 0; i < c->shared_sigalgslen; i++)
+        if (sig_nid == c->shared_sigalgs[i].signandhash_nid)
+            return 1;
+    return 0;
+}
+
+/* Check to see if a certificate issuer name matches list of CA names */
+static int ssl_check_ca_name(STACK_OF(X509_NAME) *names, X509 *x)
+{
+    X509_NAME *nm;
+    int i;
+    nm = X509_get_issuer_name(x);
+    for (i = 0; i < sk_X509_NAME_num(names); i++) {
+        if (!X509_NAME_cmp(nm, sk_X509_NAME_value(names, i)))
+            return 1;
+    }
+    return 0;
+}
+
+/*
+ * Check certificate chain is consistent with TLS extensions and is usable by
+ * server. This servers two purposes: it allows users to check chains before
+ * passing them to the server and it allows the server to check chains before
+ * attempting to use them.
+ */
+
+/* Flags which need to be set for a certificate when stict mode not set */
+
+# define CERT_PKEY_VALID_FLAGS \
+        (CERT_PKEY_EE_SIGNATURE|CERT_PKEY_EE_PARAM)
+/* Strict mode flags */
+# define CERT_PKEY_STRICT_FLAGS \
+         (CERT_PKEY_VALID_FLAGS|CERT_PKEY_CA_SIGNATURE|CERT_PKEY_CA_PARAM \
+         | CERT_PKEY_ISSUER_NAME|CERT_PKEY_CERT_TYPE)
+
+int tls1_check_chain(SSL *s, X509 *x, EVP_PKEY *pk, STACK_OF(X509) *chain,
+                     int idx)
+{
+    int i;
+    int rv = 0;
+    int check_flags = 0, strict_mode;
+    CERT_PKEY *cpk = NULL;
+    CERT *c = s->cert;
+    unsigned int suiteb_flags = tls1_suiteb(s);
+    /* idx == -1 means checking server chains */
+    if (idx != -1) {
+        /* idx == -2 means checking client certificate chains */
+        if (idx == -2) {
+            cpk = c->key;
+            idx = cpk - c->pkeys;
+        } else
+            cpk = c->pkeys + idx;
+        x = cpk->x509;
+        pk = cpk->privatekey;
+        chain = cpk->chain;
+        strict_mode = c->cert_flags & SSL_CERT_FLAGS_CHECK_TLS_STRICT;
+        /* If no cert or key, forget it */
+        if (!x || !pk)
+            goto end;
+# ifdef OPENSSL_SSL_DEBUG_BROKEN_PROTOCOL
+        /* Allow any certificate to pass test */
+        if (s->cert->cert_flags & SSL_CERT_FLAG_BROKEN_PROTOCOL) {
+            rv = CERT_PKEY_STRICT_FLAGS | CERT_PKEY_EXPLICIT_SIGN |
+                CERT_PKEY_VALID | CERT_PKEY_SIGN;
+            cpk->valid_flags = rv;
+            return rv;
+        }
+# endif
+    } else {
+        if (!x || !pk)
+            return 0;
+        idx = ssl_cert_type(x, pk);
+        if (idx == -1)
+            return 0;
+        cpk = c->pkeys + idx;
+        if (c->cert_flags & SSL_CERT_FLAGS_CHECK_TLS_STRICT)
+            check_flags = CERT_PKEY_STRICT_FLAGS;
+        else
+            check_flags = CERT_PKEY_VALID_FLAGS;
+        strict_mode = 1;
+    }
+
+    if (suiteb_flags) {
+        int ok;
+        if (check_flags)
+            check_flags |= CERT_PKEY_SUITEB;
+        ok = X509_chain_check_suiteb(NULL, x, chain, suiteb_flags);
+        if (ok == X509_V_OK)
+            rv |= CERT_PKEY_SUITEB;
+        else if (!check_flags)
+            goto end;
+    }
+
+    /*
+     * Check all signature algorithms are consistent with signature
+     * algorithms extension if TLS 1.2 or later and strict mode.
+     */
+    if (TLS1_get_version(s) >= TLS1_2_VERSION && strict_mode) {
+        int default_nid;
+        unsigned char rsign = 0;
+        if (c->peer_sigalgs)
+            default_nid = 0;
+        /* If no sigalgs extension use defaults from RFC5246 */
+        else {
+            switch (idx) {
+            case SSL_PKEY_RSA_ENC:
+            case SSL_PKEY_RSA_SIGN:
+            case SSL_PKEY_DH_RSA:
+                rsign = TLSEXT_signature_rsa;
+                default_nid = NID_sha1WithRSAEncryption;
+                break;
+
+            case SSL_PKEY_DSA_SIGN:
+            case SSL_PKEY_DH_DSA:
+                rsign = TLSEXT_signature_dsa;
+                default_nid = NID_dsaWithSHA1;
+                break;
+
+            case SSL_PKEY_ECC:
+                rsign = TLSEXT_signature_ecdsa;
+                default_nid = NID_ecdsa_with_SHA1;
+                break;
+
+            default:
+                default_nid = -1;
+                break;
+            }
+        }
+        /*
+         * If peer sent no signature algorithms extension and we have set
+         * preferred signature algorithms check we support sha1.
+         */
+        if (default_nid > 0 && c->conf_sigalgs) {
+            size_t j;
+            const unsigned char *p = c->conf_sigalgs;
+            for (j = 0; j < c->conf_sigalgslen; j += 2, p += 2) {
+                if (p[0] == TLSEXT_hash_sha1 && p[1] == rsign)
+                    break;
+            }
+            if (j == c->conf_sigalgslen) {
+                if (check_flags)
+                    goto skip_sigs;
+                else
+                    goto end;
+            }
+        }
+        /* Check signature algorithm of each cert in chain */
+        if (!tls1_check_sig_alg(c, x, default_nid)) {
+            if (!check_flags)
+                goto end;
+        } else
+            rv |= CERT_PKEY_EE_SIGNATURE;
+        rv |= CERT_PKEY_CA_SIGNATURE;
+        for (i = 0; i < sk_X509_num(chain); i++) {
+            if (!tls1_check_sig_alg(c, sk_X509_value(chain, i), default_nid)) {
+                if (check_flags) {
+                    rv &= ~CERT_PKEY_CA_SIGNATURE;
+                    break;
+                } else
+                    goto end;
+            }
+        }
+    }
+    /* Else not TLS 1.2, so mark EE and CA signing algorithms OK */
+    else if (check_flags)
+        rv |= CERT_PKEY_EE_SIGNATURE | CERT_PKEY_CA_SIGNATURE;
+ skip_sigs:
+    /* Check cert parameters are consistent */
+    if (tls1_check_cert_param(s, x, check_flags ? 1 : 2))
+        rv |= CERT_PKEY_EE_PARAM;
+    else if (!check_flags)
+        goto end;
+    if (!s->server)
+        rv |= CERT_PKEY_CA_PARAM;
+    /* In strict mode check rest of chain too */
+    else if (strict_mode) {
+        rv |= CERT_PKEY_CA_PARAM;
+        for (i = 0; i < sk_X509_num(chain); i++) {
+            X509 *ca = sk_X509_value(chain, i);
+            if (!tls1_check_cert_param(s, ca, 0)) {
+                if (check_flags) {
+                    rv &= ~CERT_PKEY_CA_PARAM;
+                    break;
+                } else
+                    goto end;
+            }
+        }
+    }
+    if (!s->server && strict_mode) {
+        STACK_OF(X509_NAME) *ca_dn;
+        int check_type = 0;
+        switch (pk->type) {
+        case EVP_PKEY_RSA:
+            check_type = TLS_CT_RSA_SIGN;
+            break;
+        case EVP_PKEY_DSA:
+            check_type = TLS_CT_DSS_SIGN;
+            break;
+        case EVP_PKEY_EC:
+            check_type = TLS_CT_ECDSA_SIGN;
+            break;
+        case EVP_PKEY_DH:
+        case EVP_PKEY_DHX:
+            {
+                int cert_type = X509_certificate_type(x, pk);
+                if (cert_type & EVP_PKS_RSA)
+                    check_type = TLS_CT_RSA_FIXED_DH;
+                if (cert_type & EVP_PKS_DSA)
+                    check_type = TLS_CT_DSS_FIXED_DH;
+            }
+        }
+        if (check_type) {
+            const unsigned char *ctypes;
+            int ctypelen;
+            if (c->ctypes) {
+                ctypes = c->ctypes;
+                ctypelen = (int)c->ctype_num;
+            } else {
+                ctypes = (unsigned char *)s->s3->tmp.ctype;
+                ctypelen = s->s3->tmp.ctype_num;
+            }
+            for (i = 0; i < ctypelen; i++) {
+                if (ctypes[i] == check_type) {
+                    rv |= CERT_PKEY_CERT_TYPE;
+                    break;
+                }
+            }
+            if (!(rv & CERT_PKEY_CERT_TYPE) && !check_flags)
+                goto end;
+        } else
+            rv |= CERT_PKEY_CERT_TYPE;
+
+        ca_dn = s->s3->tmp.ca_names;
+
+        if (!sk_X509_NAME_num(ca_dn))
+            rv |= CERT_PKEY_ISSUER_NAME;
+
+        if (!(rv & CERT_PKEY_ISSUER_NAME)) {
+            if (ssl_check_ca_name(ca_dn, x))
+                rv |= CERT_PKEY_ISSUER_NAME;
+        }
+        if (!(rv & CERT_PKEY_ISSUER_NAME)) {
+            for (i = 0; i < sk_X509_num(chain); i++) {
+                X509 *xtmp = sk_X509_value(chain, i);
+                if (ssl_check_ca_name(ca_dn, xtmp)) {
+                    rv |= CERT_PKEY_ISSUER_NAME;
+                    break;
+                }
+            }
+        }
+        if (!check_flags && !(rv & CERT_PKEY_ISSUER_NAME))
+            goto end;
+    } else
+        rv |= CERT_PKEY_ISSUER_NAME | CERT_PKEY_CERT_TYPE;
+
+    if (!check_flags || (rv & check_flags) == check_flags)
+        rv |= CERT_PKEY_VALID;
+
+ end:
+
+    if (TLS1_get_version(s) >= TLS1_2_VERSION) {
+        if (cpk->valid_flags & CERT_PKEY_EXPLICIT_SIGN)
+            rv |= CERT_PKEY_EXPLICIT_SIGN | CERT_PKEY_SIGN;
+        else if (cpk->digest)
+            rv |= CERT_PKEY_SIGN;
+    } else
+        rv |= CERT_PKEY_SIGN | CERT_PKEY_EXPLICIT_SIGN;
+
+    /*
+     * When checking a CERT_PKEY structure all flags are irrelevant if the
+     * chain is invalid.
+     */
+    if (!check_flags) {
+        if (rv & CERT_PKEY_VALID)
+            cpk->valid_flags = rv;
+        else {
+            /* Preserve explicit sign flag, clear rest */
+            cpk->valid_flags &= CERT_PKEY_EXPLICIT_SIGN;
+            return 0;
+        }
+    }
+    return rv;
+}
+
+/* Set validity of certificates in an SSL structure */
+void tls1_set_cert_validity(SSL *s)
+{
+    tls1_check_chain(s, NULL, NULL, NULL, SSL_PKEY_RSA_ENC);
+    tls1_check_chain(s, NULL, NULL, NULL, SSL_PKEY_RSA_SIGN);
+    tls1_check_chain(s, NULL, NULL, NULL, SSL_PKEY_DSA_SIGN);
+    tls1_check_chain(s, NULL, NULL, NULL, SSL_PKEY_DH_RSA);
+    tls1_check_chain(s, NULL, NULL, NULL, SSL_PKEY_DH_DSA);
+    tls1_check_chain(s, NULL, NULL, NULL, SSL_PKEY_ECC);
+}
+
+/* User level utiity function to check a chain is suitable */
+int SSL_check_chain(SSL *s, X509 *x, EVP_PKEY *pk, STACK_OF(X509) *chain)
+{
+    return tls1_check_chain(s, x, pk, chain, -1);
+}
+
 #endif
index 4a1b052..335d57b 100644 (file)
@@ -72,10 +72,13 @@ static const SSL_METHOD *tls1_get_method(int ver)
 }
 
 IMPLEMENT_tls_meth_func(TLS1_2_VERSION, TLSv1_2_method,
-                        ssl3_accept, ssl3_connect, tls1_get_method)
+                        ssl3_accept,
+                        ssl3_connect, tls1_get_method, TLSv1_2_enc_data)
 
     IMPLEMENT_tls_meth_func(TLS1_1_VERSION, TLSv1_1_method,
-                        ssl3_accept, ssl3_connect, tls1_get_method)
+                        ssl3_accept,
+                        ssl3_connect, tls1_get_method, TLSv1_1_enc_data)
 
     IMPLEMENT_tls_meth_func(TLS1_VERSION, TLSv1_method,
-                        ssl3_accept, ssl3_connect, tls1_get_method)
+                        ssl3_accept,
+                        ssl3_connect, tls1_get_method, TLSv1_enc_data)
index 076ec86..8c6b3df 100644 (file)
@@ -78,12 +78,15 @@ static const SSL_METHOD *tls1_get_server_method(int ver)
 
 IMPLEMENT_tls_meth_func(TLS1_2_VERSION, TLSv1_2_server_method,
                         ssl3_accept,
-                        ssl_undefined_function, tls1_get_server_method)
+                        ssl_undefined_function,
+                        tls1_get_server_method, TLSv1_2_enc_data)
 
     IMPLEMENT_tls_meth_func(TLS1_1_VERSION, TLSv1_1_server_method,
                         ssl3_accept,
-                        ssl_undefined_function, tls1_get_server_method)
+                        ssl_undefined_function,
+                        tls1_get_server_method, TLSv1_1_enc_data)
 
     IMPLEMENT_tls_meth_func(TLS1_VERSION, TLSv1_server_method,
                         ssl3_accept,
-                        ssl_undefined_function, tls1_get_server_method)
+                        ssl_undefined_function,
+                        tls1_get_server_method, TLSv1_enc_data)
diff --git a/ssl/t1_trce.c b/ssl/t1_trce.c
new file mode 100644 (file)
index 0000000..c5e21df
--- /dev/null
@@ -0,0 +1,1266 @@
+/* ssl/t1_trce.c */
+/*
+ * Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
+ * project.
+ */
+/* ====================================================================
+ * Copyright (c) 2012 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    licensing@OpenSSL.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ */
+
+#include "ssl_locl.h"
+
+#ifndef OPENSSL_NO_SSL_TRACE
+
+/* Packet trace support for OpenSSL */
+
+typedef struct {
+    int num;
+    const char *name;
+} ssl_trace_tbl;
+
+# define ssl_trace_str(val, tbl) \
+        do_ssl_trace_str(val, tbl, sizeof(tbl)/sizeof(ssl_trace_tbl))
+
+# define ssl_trace_list(bio, indent, msg, msglen, value, table) \
+        do_ssl_trace_list(bio, indent, msg, msglen, value, \
+         table, sizeof(table)/sizeof(ssl_trace_tbl))
+
+static const char *do_ssl_trace_str(int val, ssl_trace_tbl *tbl, size_t ntbl)
+{
+    size_t i;
+    for (i = 0; i < ntbl; i++, tbl++) {
+        if (tbl->num == val)
+            return tbl->name;
+    }
+    return "UNKNOWN";
+}
+
+static int do_ssl_trace_list(BIO *bio, int indent,
+                             const unsigned char *msg, size_t msglen,
+                             size_t vlen, ssl_trace_tbl *tbl, size_t ntbl)
+{
+    int val;
+    if (msglen % vlen)
+        return 0;
+    while (msglen) {
+        val = msg[0];
+        if (vlen == 2)
+            val = (val << 8) | msg[1];
+        BIO_indent(bio, indent, 80);
+        BIO_printf(bio, "%s (%d)\n", do_ssl_trace_str(val, tbl, ntbl), val);
+        msg += vlen;
+        msglen -= vlen;
+    }
+    return 1;
+}
+
+/* Version number */
+
+static ssl_trace_tbl ssl_version_tbl[] = {
+    {SSL2_VERSION, "SSL 2.0"},
+    {SSL3_VERSION, "SSL 3.0"},
+    {TLS1_VERSION, "TLS 1.0"},
+    {TLS1_1_VERSION, "TLS 1.1"},
+    {TLS1_2_VERSION, "TLS 1.2"},
+    {DTLS1_VERSION, "DTLS 1.0"},
+    {DTLS1_2_VERSION, "DTLS 1.2"},
+    {DTLS1_BAD_VER, "DTLS 1.0 (bad)"}
+};
+
+static ssl_trace_tbl ssl_content_tbl[] = {
+    {SSL3_RT_CHANGE_CIPHER_SPEC, "ChangeCipherSpec"},
+    {SSL3_RT_ALERT, "Alert"},
+    {SSL3_RT_HANDSHAKE, "Handshake"},
+    {SSL3_RT_APPLICATION_DATA, "ApplicationData"},
+    {TLS1_RT_HEARTBEAT, "HeartBeat"}
+};
+
+/* Handshake types */
+static ssl_trace_tbl ssl_handshake_tbl[] = {
+    {SSL3_MT_HELLO_REQUEST, "HelloRequest"},
+    {SSL3_MT_CLIENT_HELLO, "ClientHello"},
+    {SSL3_MT_SERVER_HELLO, "ServerHello"},
+    {DTLS1_MT_HELLO_VERIFY_REQUEST, "HelloVerifyRequest"},
+    {SSL3_MT_NEWSESSION_TICKET, "NewSessionTicket"},
+    {SSL3_MT_CERTIFICATE, "Certificate"},
+    {SSL3_MT_SERVER_KEY_EXCHANGE, "ServerKeyExchange"},
+    {SSL3_MT_CERTIFICATE_REQUEST, "CertificateRequest"},
+    {SSL3_MT_CLIENT_KEY_EXCHANGE, "ClientKeyExchange"},
+    {SSL3_MT_CERTIFICATE_STATUS, "CertificateStatus"},
+    {SSL3_MT_SERVER_DONE, "ServerHelloDone"},
+    {SSL3_MT_CERTIFICATE_VERIFY, "CertificateVerify"},
+    {SSL3_MT_CLIENT_KEY_EXCHANGE, "ClientKeyExchange"},
+    {SSL3_MT_FINISHED, "Finished"},
+    {SSL3_MT_CERTIFICATE_STATUS, "CertificateStatus"}
+};
+
+/* Cipher suites */
+static ssl_trace_tbl ssl_ciphers_tbl[] = {
+    {0x0000, "SSL_NULL_WITH_NULL_NULL"},
+    {0x0001, "SSL_RSA_WITH_NULL_MD5"},
+    {0x0002, "SSL_RSA_WITH_NULL_SHA"},
+    {0x0003, "SSL_RSA_EXPORT_WITH_RC4_40_MD5"},
+    {0x0004, "SSL_RSA_WITH_RC4_128_MD5"},
+    {0x0005, "SSL_RSA_WITH_RC4_128_SHA"},
+    {0x0006, "SSL_RSA_EXPORT_WITH_RC2_CBC_40_MD5"},
+    {0x0007, "SSL_RSA_WITH_IDEA_CBC_SHA"},
+    {0x0008, "SSL_RSA_EXPORT_WITH_DES40_CBC_SHA"},
+    {0x0009, "SSL_RSA_WITH_DES_CBC_SHA"},
+    {0x000A, "SSL_RSA_WITH_3DES_EDE_CBC_SHA"},
+    {0x000B, "SSL_DH_DSS_EXPORT_WITH_DES40_CBC_SHA"},
+    {0x000C, "SSL_DH_DSS_WITH_DES_CBC_SHA"},
+    {0x000D, "SSL_DH_DSS_WITH_3DES_EDE_CBC_SHA"},
+    {0x000E, "SSL_DH_RSA_EXPORT_WITH_DES40_CBC_SHA"},
+    {0x000F, "SSL_DH_RSA_WITH_DES_CBC_SHA"},
+    {0x0010, "SSL_DH_RSA_WITH_3DES_EDE_CBC_SHA"},
+    {0x0011, "SSL_DHE_DSS_EXPORT_WITH_DES40_CBC_SHA"},
+    {0x0012, "SSL_DHE_DSS_WITH_DES_CBC_SHA"},
+    {0x0013, "SSL_DHE_DSS_WITH_3DES_EDE_CBC_SHA"},
+    {0x0014, "SSL_DHE_RSA_EXPORT_WITH_DES40_CBC_SHA"},
+    {0x0015, "SSL_DHE_RSA_WITH_DES_CBC_SHA"},
+    {0x0016, "SSL_DHE_RSA_WITH_3DES_EDE_CBC_SHA"},
+    {0x0017, "SSL_DH_anon_EXPORT_WITH_RC4_40_MD5"},
+    {0x0018, "SSL_DH_anon_WITH_RC4_128_MD5"},
+    {0x0019, "SSL_DH_anon_EXPORT_WITH_DES40_CBC_SHA"},
+    {0x001A, "SSL_DH_anon_WITH_DES_CBC_SHA"},
+    {0x001B, "SSL_DH_anon_WITH_3DES_EDE_CBC_SHA"},
+    {0x001D, "SSL_FORTEZZA_KEA_WITH_FORTEZZA_CBC_SHA"},
+    {0x001E, "SSL_FORTEZZA_KEA_WITH_RC4_128_SHA"},
+    {0x001F, "TLS_KRB5_WITH_3DES_EDE_CBC_SHA"},
+    {0x0020, "TLS_KRB5_WITH_RC4_128_SHA"},
+    {0x0021, "TLS_KRB5_WITH_IDEA_CBC_SHA"},
+    {0x0022, "TLS_KRB5_WITH_DES_CBC_MD5"},
+    {0x0023, "TLS_KRB5_WITH_3DES_EDE_CBC_MD5"},
+    {0x0024, "TLS_KRB5_WITH_RC4_128_MD5"},
+    {0x0025, "TLS_KRB5_WITH_IDEA_CBC_MD5"},
+    {0x0026, "TLS_KRB5_EXPORT_WITH_DES_CBC_40_SHA"},
+    {0x0027, "TLS_KRB5_EXPORT_WITH_RC2_CBC_40_SHA"},
+    {0x0028, "TLS_KRB5_EXPORT_WITH_RC4_40_SHA"},
+    {0x0029, "TLS_KRB5_EXPORT_WITH_DES_CBC_40_MD5"},
+    {0x002A, "TLS_KRB5_EXPORT_WITH_RC2_CBC_40_MD5"},
+    {0x002B, "TLS_KRB5_EXPORT_WITH_RC4_40_MD5"},
+    {0x002F, "TLS_RSA_WITH_AES_128_CBC_SHA"},
+    {0x0030, "TLS_DH_DSS_WITH_AES_128_CBC_SHA"},
+    {0x0031, "TLS_DH_RSA_WITH_AES_128_CBC_SHA"},
+    {0x0032, "TLS_DHE_DSS_WITH_AES_128_CBC_SHA"},
+    {0x0033, "TLS_DHE_RSA_WITH_AES_128_CBC_SHA"},
+    {0x0034, "TLS_DH_anon_WITH_AES_128_CBC_SHA"},
+    {0x0035, "TLS_RSA_WITH_AES_256_CBC_SHA"},
+    {0x0036, "TLS_DH_DSS_WITH_AES_256_CBC_SHA"},
+    {0x0037, "TLS_DH_RSA_WITH_AES_256_CBC_SHA"},
+    {0x0038, "TLS_DHE_DSS_WITH_AES_256_CBC_SHA"},
+    {0x0039, "TLS_DHE_RSA_WITH_AES_256_CBC_SHA"},
+    {0x003A, "TLS_DH_anon_WITH_AES_256_CBC_SHA"},
+    {0x003B, "TLS_RSA_WITH_NULL_SHA256"},
+    {0x003C, "TLS_RSA_WITH_AES_128_CBC_SHA256"},
+    {0x003D, "TLS_RSA_WITH_AES_256_CBC_SHA256"},
+    {0x003E, "TLS_DH_DSS_WITH_AES_128_CBC_SHA256"},
+    {0x003F, "TLS_DH_RSA_WITH_AES_128_CBC_SHA256"},
+    {0x0040, "TLS_DHE_DSS_WITH_AES_128_CBC_SHA256"},
+    {0x0041, "TLS_RSA_WITH_CAMELLIA_128_CBC_SHA"},
+    {0x0042, "TLS_DH_DSS_WITH_CAMELLIA_128_CBC_SHA"},
+    {0x0043, "TLS_DH_RSA_WITH_CAMELLIA_128_CBC_SHA"},
+    {0x0044, "TLS_DHE_DSS_WITH_CAMELLIA_128_CBC_SHA"},
+    {0x0045, "TLS_DHE_RSA_WITH_CAMELLIA_128_CBC_SHA"},
+    {0x0046, "TLS_DH_anon_WITH_CAMELLIA_128_CBC_SHA"},
+    {0x0067, "TLS_DHE_RSA_WITH_AES_128_CBC_SHA256"},
+    {0x0068, "TLS_DH_DSS_WITH_AES_256_CBC_SHA256"},
+    {0x0069, "TLS_DH_RSA_WITH_AES_256_CBC_SHA256"},
+    {0x006A, "TLS_DHE_DSS_WITH_AES_256_CBC_SHA256"},
+    {0x006B, "TLS_DHE_RSA_WITH_AES_256_CBC_SHA256"},
+    {0x006C, "TLS_DH_anon_WITH_AES_128_CBC_SHA256"},
+    {0x006D, "TLS_DH_anon_WITH_AES_256_CBC_SHA256"},
+    {0x0084, "TLS_RSA_WITH_CAMELLIA_256_CBC_SHA"},
+    {0x0085, "TLS_DH_DSS_WITH_CAMELLIA_256_CBC_SHA"},
+    {0x0086, "TLS_DH_RSA_WITH_CAMELLIA_256_CBC_SHA"},
+    {0x0087, "TLS_DHE_DSS_WITH_CAMELLIA_256_CBC_SHA"},
+    {0x0088, "TLS_DHE_RSA_WITH_CAMELLIA_256_CBC_SHA"},
+    {0x0089, "TLS_DH_anon_WITH_CAMELLIA_256_CBC_SHA"},
+    {0x008A, "TLS_PSK_WITH_RC4_128_SHA"},
+    {0x008B, "TLS_PSK_WITH_3DES_EDE_CBC_SHA"},
+    {0x008C, "TLS_PSK_WITH_AES_128_CBC_SHA"},
+    {0x008D, "TLS_PSK_WITH_AES_256_CBC_SHA"},
+    {0x008E, "TLS_DHE_PSK_WITH_RC4_128_SHA"},
+    {0x008F, "TLS_DHE_PSK_WITH_3DES_EDE_CBC_SHA"},
+    {0x0090, "TLS_DHE_PSK_WITH_AES_128_CBC_SHA"},
+    {0x0091, "TLS_DHE_PSK_WITH_AES_256_CBC_SHA"},
+    {0x0092, "TLS_RSA_PSK_WITH_RC4_128_SHA"},
+    {0x0093, "TLS_RSA_PSK_WITH_3DES_EDE_CBC_SHA"},
+    {0x0094, "TLS_RSA_PSK_WITH_AES_128_CBC_SHA"},
+    {0x0095, "TLS_RSA_PSK_WITH_AES_256_CBC_SHA"},
+    {0x0096, "TLS_RSA_WITH_SEED_CBC_SHA"},
+    {0x0097, "TLS_DH_DSS_WITH_SEED_CBC_SHA"},
+    {0x0098, "TLS_DH_RSA_WITH_SEED_CBC_SHA"},
+    {0x0099, "TLS_DHE_DSS_WITH_SEED_CBC_SHA"},
+    {0x009A, "TLS_DHE_RSA_WITH_SEED_CBC_SHA"},
+    {0x009B, "TLS_DH_anon_WITH_SEED_CBC_SHA"},
+    {0x009C, "TLS_RSA_WITH_AES_128_GCM_SHA256"},
+    {0x009D, "TLS_RSA_WITH_AES_256_GCM_SHA384"},
+    {0x009E, "TLS_DHE_RSA_WITH_AES_128_GCM_SHA256"},
+    {0x009F, "TLS_DHE_RSA_WITH_AES_256_GCM_SHA384"},
+    {0x00A0, "TLS_DH_RSA_WITH_AES_128_GCM_SHA256"},
+    {0x00A1, "TLS_DH_RSA_WITH_AES_256_GCM_SHA384"},
+    {0x00A2, "TLS_DHE_DSS_WITH_AES_128_GCM_SHA256"},
+    {0x00A3, "TLS_DHE_DSS_WITH_AES_256_GCM_SHA384"},
+    {0x00A4, "TLS_DH_DSS_WITH_AES_128_GCM_SHA256"},
+    {0x00A5, "TLS_DH_DSS_WITH_AES_256_GCM_SHA384"},
+    {0x00A6, "TLS_DH_anon_WITH_AES_128_GCM_SHA256"},
+    {0x00A7, "TLS_DH_anon_WITH_AES_256_GCM_SHA384"},
+    {0x00A8, "TLS_PSK_WITH_AES_128_GCM_SHA256"},
+    {0x00A9, "TLS_PSK_WITH_AES_256_GCM_SHA384"},
+    {0x00AA, "TLS_DHE_PSK_WITH_AES_128_GCM_SHA256"},
+    {0x00AB, "TLS_DHE_PSK_WITH_AES_256_GCM_SHA384"},
+    {0x00AC, "TLS_RSA_PSK_WITH_AES_128_GCM_SHA256"},
+    {0x00AD, "TLS_RSA_PSK_WITH_AES_256_GCM_SHA384"},
+    {0x00AE, "TLS_PSK_WITH_AES_128_CBC_SHA256"},
+    {0x00AF, "TLS_PSK_WITH_AES_256_CBC_SHA384"},
+    {0x00B0, "TLS_PSK_WITH_NULL_SHA256"},
+    {0x00B1, "TLS_PSK_WITH_NULL_SHA384"},
+    {0x00B2, "TLS_DHE_PSK_WITH_AES_128_CBC_SHA256"},
+    {0x00B3, "TLS_DHE_PSK_WITH_AES_256_CBC_SHA384"},
+    {0x00B4, "TLS_DHE_PSK_WITH_NULL_SHA256"},
+    {0x00B5, "TLS_DHE_PSK_WITH_NULL_SHA384"},
+    {0x00B6, "TLS_RSA_PSK_WITH_AES_128_CBC_SHA256"},
+    {0x00B7, "TLS_RSA_PSK_WITH_AES_256_CBC_SHA384"},
+    {0x00B8, "TLS_RSA_PSK_WITH_NULL_SHA256"},
+    {0x00B9, "TLS_RSA_PSK_WITH_NULL_SHA384"},
+    {0x00BA, "TLS_RSA_WITH_CAMELLIA_128_CBC_SHA256"},
+    {0x00BB, "TLS_DH_DSS_WITH_CAMELLIA_128_CBC_SHA256"},
+    {0x00BC, "TLS_DH_RSA_WITH_CAMELLIA_128_CBC_SHA256"},
+    {0x00BD, "TLS_DHE_DSS_WITH_CAMELLIA_128_CBC_SHA256"},
+    {0x00BE, "TLS_DHE_RSA_WITH_CAMELLIA_128_CBC_SHA256"},
+    {0x00BF, "TLS_DH_anon_WITH_CAMELLIA_128_CBC_SHA256"},
+    {0x00C0, "TLS_RSA_WITH_CAMELLIA_256_CBC_SHA256"},
+    {0x00C1, "TLS_DH_DSS_WITH_CAMELLIA_256_CBC_SHA256"},
+    {0x00C2, "TLS_DH_RSA_WITH_CAMELLIA_256_CBC_SHA256"},
+    {0x00C3, "TLS_DHE_DSS_WITH_CAMELLIA_256_CBC_SHA256"},
+    {0x00C4, "TLS_DHE_RSA_WITH_CAMELLIA_256_CBC_SHA256"},
+    {0x00C5, "TLS_DH_anon_WITH_CAMELLIA_256_CBC_SHA256"},
+    {0x00FF, "TLS_EMPTY_RENEGOTIATION_INFO_SCSV"},
+    {0xC001, "TLS_ECDH_ECDSA_WITH_NULL_SHA"},
+    {0xC002, "TLS_ECDH_ECDSA_WITH_RC4_128_SHA"},
+    {0xC003, "TLS_ECDH_ECDSA_WITH_3DES_EDE_CBC_SHA"},
+    {0xC004, "TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA"},
+    {0xC005, "TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA"},
+    {0xC006, "TLS_ECDHE_ECDSA_WITH_NULL_SHA"},
+    {0xC007, "TLS_ECDHE_ECDSA_WITH_RC4_128_SHA"},
+    {0xC008, "TLS_ECDHE_ECDSA_WITH_3DES_EDE_CBC_SHA"},
+    {0xC009, "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA"},
+    {0xC00A, "TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA"},
+    {0xC00B, "TLS_ECDH_RSA_WITH_NULL_SHA"},
+    {0xC00C, "TLS_ECDH_RSA_WITH_RC4_128_SHA"},
+    {0xC00D, "TLS_ECDH_RSA_WITH_3DES_EDE_CBC_SHA"},
+    {0xC00E, "TLS_ECDH_RSA_WITH_AES_128_CBC_SHA"},
+    {0xC00F, "TLS_ECDH_RSA_WITH_AES_256_CBC_SHA"},
+    {0xC010, "TLS_ECDHE_RSA_WITH_NULL_SHA"},
+    {0xC011, "TLS_ECDHE_RSA_WITH_RC4_128_SHA"},
+    {0xC012, "TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA"},
+    {0xC013, "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA"},
+    {0xC014, "TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA"},
+    {0xC015, "TLS_ECDH_anon_WITH_NULL_SHA"},
+    {0xC016, "TLS_ECDH_anon_WITH_RC4_128_SHA"},
+    {0xC017, "TLS_ECDH_anon_WITH_3DES_EDE_CBC_SHA"},
+    {0xC018, "TLS_ECDH_anon_WITH_AES_128_CBC_SHA"},
+    {0xC019, "TLS_ECDH_anon_WITH_AES_256_CBC_SHA"},
+    {0xC01A, "TLS_SRP_SHA_WITH_3DES_EDE_CBC_SHA"},
+    {0xC01B, "TLS_SRP_SHA_RSA_WITH_3DES_EDE_CBC_SHA"},
+    {0xC01C, "TLS_SRP_SHA_DSS_WITH_3DES_EDE_CBC_SHA"},
+    {0xC01D, "TLS_SRP_SHA_WITH_AES_128_CBC_SHA"},
+    {0xC01E, "TLS_SRP_SHA_RSA_WITH_AES_128_CBC_SHA"},
+    {0xC01F, "TLS_SRP_SHA_DSS_WITH_AES_128_CBC_SHA"},
+    {0xC020, "TLS_SRP_SHA_WITH_AES_256_CBC_SHA"},
+    {0xC021, "TLS_SRP_SHA_RSA_WITH_AES_256_CBC_SHA"},
+    {0xC022, "TLS_SRP_SHA_DSS_WITH_AES_256_CBC_SHA"},
+    {0xC023, "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256"},
+    {0xC024, "TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384"},
+    {0xC025, "TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA256"},
+    {0xC026, "TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA384"},
+    {0xC027, "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256"},
+    {0xC028, "TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384"},
+    {0xC029, "TLS_ECDH_RSA_WITH_AES_128_CBC_SHA256"},
+    {0xC02A, "TLS_ECDH_RSA_WITH_AES_256_CBC_SHA384"},
+    {0xC02B, "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256"},
+    {0xC02C, "TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384"},
+    {0xC02D, "TLS_ECDH_ECDSA_WITH_AES_128_GCM_SHA256"},
+    {0xC02E, "TLS_ECDH_ECDSA_WITH_AES_256_GCM_SHA384"},
+    {0xC02F, "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256"},
+    {0xC030, "TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384"},
+    {0xC031, "TLS_ECDH_RSA_WITH_AES_128_GCM_SHA256"},
+    {0xC032, "TLS_ECDH_RSA_WITH_AES_256_GCM_SHA384"},
+    {0xFEFE, "SSL_RSA_FIPS_WITH_DES_CBC_SHA"},
+    {0xFEFF, "SSL_RSA_FIPS_WITH_3DES_EDE_CBC_SHA"},
+};
+
+/* Compression methods */
+static ssl_trace_tbl ssl_comp_tbl[] = {
+    {0x0000, "No Compression"},
+    {0x0001, "Zlib Compression"}
+};
+
+/* Extensions */
+static ssl_trace_tbl ssl_exts_tbl[] = {
+    {TLSEXT_TYPE_server_name, "server_name"},
+    {TLSEXT_TYPE_max_fragment_length, "max_fragment_length"},
+    {TLSEXT_TYPE_client_certificate_url, "client_certificate_url"},
+    {TLSEXT_TYPE_trusted_ca_keys, "trusted_ca_keys"},
+    {TLSEXT_TYPE_truncated_hmac, "truncated_hmac"},
+    {TLSEXT_TYPE_status_request, "status_request"},
+    {TLSEXT_TYPE_user_mapping, "user_mapping"},
+    {TLSEXT_TYPE_client_authz, "client_authz"},
+    {TLSEXT_TYPE_server_authz, "server_authz"},
+    {TLSEXT_TYPE_cert_type, "cert_type"},
+    {TLSEXT_TYPE_elliptic_curves, "elliptic_curves"},
+    {TLSEXT_TYPE_ec_point_formats, "ec_point_formats"},
+    {TLSEXT_TYPE_srp, "srp"},
+    {TLSEXT_TYPE_signature_algorithms, "signature_algorithms"},
+    {TLSEXT_TYPE_use_srtp, "use_srtp"},
+    {TLSEXT_TYPE_heartbeat, "heartbeat"},
+    {TLSEXT_TYPE_session_ticket, "session_ticket"},
+# ifdef TLSEXT_TYPE_opaque_prf_input
+    {TLSEXT_TYPE_opaque_prf_input, "opaque_prf_input"},
+# endif
+    {TLSEXT_TYPE_renegotiate, "renegotiate"},
+    {TLSEXT_TYPE_next_proto_neg, "next_proto_neg"},
+    {TLSEXT_TYPE_padding, "padding"}
+};
+
+static ssl_trace_tbl ssl_curve_tbl[] = {
+    {1, "sect163k1 (K-163)"},
+    {2, "sect163r1"},
+    {3, "sect163r2 (B-163)"},
+    {4, "sect193r1"},
+    {5, "sect193r2"},
+    {6, "sect233k1 (K-233)"},
+    {7, "sect233r1 (B-233)"},
+    {8, "sect239k1"},
+    {9, "sect283k1 (K-283)"},
+    {10, "sect283r1 (B-283)"},
+    {11, "sect409k1 (K-409)"},
+    {12, "sect409r1 (B-409)"},
+    {13, "sect571k1 (K-571)"},
+    {14, "sect571r1 (B-571)"},
+    {15, "secp160k1"},
+    {16, "secp160r1"},
+    {17, "secp160r2"},
+    {18, "secp192k1"},
+    {19, "secp192r1 (P-192)"},
+    {20, "secp224k1"},
+    {21, "secp224r1 (P-224)"},
+    {22, "secp256k1"},
+    {23, "secp256r1 (P-256)"},
+    {24, "secp384r1 (P-384)"},
+    {25, "secp521r1 (P-521)"},
+    {26, "brainpoolP256r1"},
+    {27, "brainpoolP384r1"},
+    {28, "brainpoolP512r1"},
+    {0xFF01, "arbitrary_explicit_prime_curves"},
+    {0xFF02, "arbitrary_explicit_char2_curves"}
+};
+
+static ssl_trace_tbl ssl_point_tbl[] = {
+    {0, "uncompressed"},
+    {1, "ansiX962_compressed_prime"},
+    {2, "ansiX962_compressed_char2"}
+};
+
+static ssl_trace_tbl ssl_md_tbl[] = {
+    {0, "none"},
+    {1, "md5"},
+    {2, "sha1"},
+    {3, "sha224"},
+    {4, "sha256"},
+    {5, "sha384"},
+    {6, "sha512"}
+};
+
+static ssl_trace_tbl ssl_sig_tbl[] = {
+    {0, "anonymous"},
+    {1, "rsa"},
+    {2, "dsa"},
+    {3, "ecdsa"}
+};
+
+static ssl_trace_tbl ssl_hb_tbl[] = {
+    {1, "peer_allowed_to_send"},
+    {2, "peer_not_allowed_to_send"}
+};
+
+static ssl_trace_tbl ssl_hb_type_tbl[] = {
+    {1, "heartbeat_request"},
+    {2, "heartbeat_response"}
+};
+
+static ssl_trace_tbl ssl_ctype_tbl[] = {
+    {1, "rsa_sign"},
+    {2, "dss_sign"},
+    {3, "rsa_fixed_dh"},
+    {4, "dss_fixed_dh"},
+    {5, "rsa_ephemeral_dh"},
+    {6, "dss_ephemeral_dh"},
+    {20, "fortezza_dms"},
+    {64, "ecdsa_sign"},
+    {65, "rsa_fixed_ecdh"},
+    {66, "ecdsa_fixed_ecdh"}
+};
+
+static ssl_trace_tbl ssl_crypto_tbl[] = {
+    {TLS1_RT_CRYPTO_PREMASTER, "Premaster Secret"},
+    {TLS1_RT_CRYPTO_CLIENT_RANDOM, "Client Random"},
+    {TLS1_RT_CRYPTO_SERVER_RANDOM, "Server Random"},
+    {TLS1_RT_CRYPTO_MASTER, "Master Secret"},
+    {TLS1_RT_CRYPTO_MAC | TLS1_RT_CRYPTO_WRITE, "Write Mac Secret"},
+    {TLS1_RT_CRYPTO_MAC | TLS1_RT_CRYPTO_READ, "Read Mac Secret"},
+    {TLS1_RT_CRYPTO_KEY | TLS1_RT_CRYPTO_WRITE, "Write Key"},
+    {TLS1_RT_CRYPTO_KEY | TLS1_RT_CRYPTO_READ, "Read Key"},
+    {TLS1_RT_CRYPTO_IV | TLS1_RT_CRYPTO_WRITE, "Write IV"},
+    {TLS1_RT_CRYPTO_IV | TLS1_RT_CRYPTO_READ, "Read IV"},
+    {TLS1_RT_CRYPTO_FIXED_IV | TLS1_RT_CRYPTO_WRITE, "Write IV (fixed part)"},
+    {TLS1_RT_CRYPTO_FIXED_IV | TLS1_RT_CRYPTO_READ, "Read IV (fixed part)"}
+};
+
+static void ssl_print_hex(BIO *bio, int indent, const char *name,
+                          const unsigned char *msg, size_t msglen)
+{
+    size_t i;
+    BIO_indent(bio, indent, 80);
+    BIO_printf(bio, "%s (len=%d): ", name, (int)msglen);
+    for (i = 0; i < msglen; i++)
+        BIO_printf(bio, "%02X", msg[i]);
+    BIO_puts(bio, "\n");
+}
+
+static int ssl_print_hexbuf(BIO *bio, int indent,
+                            const char *name, size_t nlen,
+                            const unsigned char **pmsg, size_t *pmsglen)
+{
+    size_t blen;
+    const unsigned char *p = *pmsg;
+    if (*pmsglen < nlen)
+        return 0;
+    blen = p[0];
+    if (nlen > 1)
+        blen = (blen << 8) | p[1];
+    if (*pmsglen < nlen + blen)
+        return 0;
+    p += nlen;
+    ssl_print_hex(bio, indent, name, p, blen);
+    *pmsg += blen + nlen;
+    *pmsglen -= blen + nlen;
+    return 1;
+}
+
+static int ssl_print_version(BIO *bio, int indent, const char *name,
+                             const unsigned char **pmsg, size_t *pmsglen)
+{
+    int vers;
+    if (*pmsglen < 2)
+        return 0;
+    vers = ((*pmsg)[0] << 8) | (*pmsg)[1];
+    BIO_indent(bio, indent, 80);
+    BIO_printf(bio, "%s=0x%x (%s)\n",
+               name, vers, ssl_trace_str(vers, ssl_version_tbl));
+    *pmsg += 2;
+    *pmsglen -= 2;
+    return 1;
+}
+
+static int ssl_print_random(BIO *bio, int indent,
+                            const unsigned char **pmsg, size_t *pmsglen)
+{
+    unsigned int tm;
+    const unsigned char *p = *pmsg;
+    if (*pmsglen < 32)
+        return 0;
+    tm = (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
+    p += 4;
+    BIO_indent(bio, indent, 80);
+    BIO_puts(bio, "Random:\n");
+    BIO_indent(bio, indent + 2, 80);
+    BIO_printf(bio, "gmt_unix_time=0x%08X\n", tm);
+    ssl_print_hex(bio, indent + 2, "random_bytes", p, 28);
+    *pmsg += 32;
+    *pmsglen -= 32;
+    return 1;
+}
+
+static int ssl_print_signature(BIO *bio, int indent, SSL *s,
+                               const unsigned char **pmsg, size_t *pmsglen)
+{
+    if (*pmsglen < 2)
+        return 0;
+    if (SSL_USE_SIGALGS(s)) {
+        const unsigned char *p = *pmsg;
+        BIO_indent(bio, indent, 80);
+        BIO_printf(bio, "Signature Algorithm %s+%s (%d+%d)\n",
+                   ssl_trace_str(p[0], ssl_md_tbl),
+                   ssl_trace_str(p[1], ssl_sig_tbl), p[0], p[1]);
+        *pmsg += 2;
+        *pmsglen -= 2;
+    }
+    return ssl_print_hexbuf(bio, indent, "Signature", 2, pmsg, pmsglen);
+}
+
+static int ssl_print_extension(BIO *bio, int indent, int server, int extype,
+                               const unsigned char *ext, size_t extlen)
+{
+    size_t xlen;
+    BIO_indent(bio, indent, 80);
+    BIO_printf(bio, "extension_type=%s(%d), length=%d\n",
+               ssl_trace_str(extype, ssl_exts_tbl), extype, (int)extlen);
+    switch (extype) {
+    case TLSEXT_TYPE_ec_point_formats:
+        if (extlen < 1)
+            return 0;
+        xlen = ext[0];
+        if (extlen != xlen + 1)
+            return 0;
+        return ssl_trace_list(bio, indent + 2,
+                              ext + 1, xlen, 1, ssl_point_tbl);
+
+    case TLSEXT_TYPE_elliptic_curves:
+        if (extlen < 2)
+            return 0;
+        xlen = (ext[0] << 8) | ext[1];
+        if (extlen != xlen + 2)
+            return 0;
+        return ssl_trace_list(bio, indent + 2,
+                              ext + 2, xlen, 2, ssl_curve_tbl);
+
+    case TLSEXT_TYPE_signature_algorithms:
+
+        if (extlen < 2)
+            return 0;
+        xlen = (ext[0] << 8) | ext[1];
+        if (extlen != xlen + 2)
+            return 0;
+        if (xlen & 1)
+            return 0;
+        ext += 2;
+        while (xlen > 0) {
+            BIO_indent(bio, indent + 2, 80);
+            BIO_printf(bio, "%s+%s (%d+%d)\n",
+                       ssl_trace_str(ext[0], ssl_md_tbl),
+                       ssl_trace_str(ext[1], ssl_sig_tbl), ext[0], ext[1]);
+            xlen -= 2;
+            ext += 2;
+        }
+        break;
+
+    case TLSEXT_TYPE_renegotiate:
+        if (extlen < 1)
+            return 0;
+        xlen = ext[0];
+        if (xlen + 1 != extlen)
+            return 0;
+        ext++;
+        if (xlen) {
+            if (server) {
+                if (xlen & 1)
+                    return 0;
+                xlen >>= 1;
+            }
+            ssl_print_hex(bio, indent + 4, "client_verify_data", ext, xlen);
+            if (server) {
+                ext += xlen;
+                ssl_print_hex(bio, indent + 4,
+                              "server_verify_data", ext, xlen);
+            }
+        } else {
+            BIO_indent(bio, indent + 4, 80);
+            BIO_puts(bio, "<EMPTY>\n");
+        }
+        break;
+
+    case TLSEXT_TYPE_heartbeat:
+        if (extlen != 1)
+            return 0;
+        BIO_indent(bio, indent + 2, 80);
+        BIO_printf(bio, "HeartbeatMode: %s\n",
+                   ssl_trace_str(ext[0], ssl_hb_tbl));
+        break;
+
+    case TLSEXT_TYPE_session_ticket:
+        if (extlen != 0)
+            ssl_print_hex(bio, indent + 4, "ticket", ext, extlen);
+        break;
+
+    default:
+        BIO_dump_indent(bio, (char *)ext, extlen, indent + 2);
+    }
+    return 1;
+}
+
+static int ssl_print_extensions(BIO *bio, int indent, int server,
+                                const unsigned char *msg, size_t msglen)
+{
+    size_t extslen;
+    BIO_indent(bio, indent, 80);
+    if (msglen == 0) {
+        BIO_puts(bio, "No Extensions\n");
+        return 1;
+    }
+    extslen = (msg[0] << 8) | msg[1];
+    if (extslen != msglen - 2)
+        return 0;
+    msg += 2;
+    msglen = extslen;
+    BIO_printf(bio, "extensions, length = %d\n", (int)msglen);
+    while (msglen > 0) {
+        int extype;
+        size_t extlen;
+        if (msglen < 4)
+            return 0;
+        extype = (msg[0] << 8) | msg[1];
+        extlen = (msg[2] << 8) | msg[3];
+        if (msglen < extlen + 4)
+            return 0;
+        msg += 4;
+        if (!ssl_print_extension(bio, indent + 2, server,
+                                 extype, msg, extlen))
+            return 0;
+        msg += extlen;
+        msglen -= extlen + 4;
+    }
+    return 1;
+}
+
+static int ssl_print_client_hello(BIO *bio, SSL *ssl, int indent,
+                                  const unsigned char *msg, size_t msglen)
+{
+    size_t len;
+    unsigned int cs;
+    if (!ssl_print_version(bio, indent, "client_version", &msg, &msglen))
+        return 0;
+    if (!ssl_print_random(bio, indent, &msg, &msglen))
+        return 0;
+    if (!ssl_print_hexbuf(bio, indent, "session_id", 1, &msg, &msglen))
+        return 0;
+    if (SSL_IS_DTLS(ssl)) {
+        if (!ssl_print_hexbuf(bio, indent, "cookie", 1, &msg, &msglen))
+            return 0;
+    }
+    if (msglen < 2)
+        return 0;
+    len = (msg[0] << 8) | msg[1];
+    msg += 2;
+    msglen -= 2;
+    BIO_indent(bio, indent, 80);
+    BIO_printf(bio, "cipher_suites (len=%d)\n", (int)len);
+    if (msglen < len || len & 1)
+        return 0;
+    while (len > 0) {
+        cs = (msg[0] << 8) | msg[1];
+        BIO_indent(bio, indent + 2, 80);
+        BIO_printf(bio, "{0x%02X, 0x%02X} %s\n",
+                   msg[0], msg[1], ssl_trace_str(cs, ssl_ciphers_tbl));
+        msg += 2;
+        msglen -= 2;
+        len -= 2;
+    }
+    if (msglen < 1)
+        return 0;
+    len = msg[0];
+    msg++;
+    msglen--;
+    if (msglen < len)
+        return 0;
+    BIO_indent(bio, indent, 80);
+    BIO_printf(bio, "compression_methods (len=%d)\n", (int)len);
+    while (len > 0) {
+        BIO_indent(bio, indent + 2, 80);
+        BIO_printf(bio, "%s (0x%02X)\n",
+                   ssl_trace_str(msg[0], ssl_comp_tbl), msg[0]);
+        msg++;
+        msglen--;
+        len--;
+    }
+    if (!ssl_print_extensions(bio, indent, 0, msg, msglen))
+        return 0;
+    return 1;
+}
+
+static int dtls_print_hello_vfyrequest(BIO *bio, int indent,
+                                       const unsigned char *msg,
+                                       size_t msglen)
+{
+    if (!ssl_print_version(bio, indent, "server_version", &msg, &msglen))
+        return 0;
+    if (!ssl_print_hexbuf(bio, indent, "cookie", 1, &msg, &msglen))
+        return 0;
+    return 1;
+}
+
+static int ssl_print_server_hello(BIO *bio, int indent,
+                                  const unsigned char *msg, size_t msglen)
+{
+    unsigned int cs;
+    if (!ssl_print_version(bio, indent, "server_version", &msg, &msglen))
+        return 0;
+    if (!ssl_print_random(bio, indent, &msg, &msglen))
+        return 0;
+    if (!ssl_print_hexbuf(bio, indent, "session_id", 1, &msg, &msglen))
+        return 0;
+    if (msglen < 2)
+        return 0;
+    cs = (msg[0] << 8) | msg[1];
+    BIO_indent(bio, indent, 80);
+    BIO_printf(bio, "cipher_suite {0x%02X, 0x%02X} %s\n",
+               msg[0], msg[1], ssl_trace_str(cs, ssl_ciphers_tbl));
+    msg += 2;
+    msglen -= 2;
+    if (msglen < 1)
+        return 0;
+    BIO_indent(bio, indent, 80);
+    BIO_printf(bio, "compression_method: %s (0x%02X)\n",
+               ssl_trace_str(msg[0], ssl_comp_tbl), msg[0]);
+    msg++;
+    msglen--;
+    if (!ssl_print_extensions(bio, indent, 1, msg, msglen))
+        return 0;
+    return 1;
+}
+
+static int ssl_get_keyex(const char **pname, SSL *ssl)
+{
+    unsigned long alg_k = ssl->s3->tmp.new_cipher->algorithm_mkey;
+    if (alg_k & SSL_kRSA) {
+        *pname = "rsa";
+        return SSL_kRSA;
+    }
+    if (alg_k & SSL_kDHr) {
+        *pname = "dh_rsa";
+        return SSL_kDHr;
+    }
+    if (alg_k & SSL_kDHd) {
+        *pname = "dh_dss";
+        return SSL_kDHd;
+    }
+    if (alg_k & SSL_kKRB5) {
+        *pname = "krb5";
+        return SSL_kKRB5;
+    }
+    if (alg_k & SSL_kEDH) {
+        *pname = "edh";
+        return SSL_kEDH;
+    }
+    if (alg_k & SSL_kEECDH) {
+        *pname = "EECDH";
+        return SSL_kEECDH;
+    }
+    if (alg_k & SSL_kECDHr) {
+        *pname = "ECDH RSA";
+        return SSL_kECDHr;
+    }
+    if (alg_k & SSL_kECDHe) {
+        *pname = "ECDH ECDSA";
+        return SSL_kECDHe;
+    }
+    if (alg_k & SSL_kPSK) {
+        *pname = "PSK";
+        return SSL_kPSK;
+    }
+    if (alg_k & SSL_kSRP) {
+        *pname = "SRP";
+        return SSL_kSRP;
+    }
+    if (alg_k & SSL_kGOST) {
+        *pname = "GOST";
+        return SSL_kGOST;
+    }
+    *pname = "UNKNOWN";
+    return 0;
+}
+
+static int ssl_print_client_keyex(BIO *bio, int indent, SSL *ssl,
+                                  const unsigned char *msg, size_t msglen)
+{
+    const char *algname;
+    int id;
+    id = ssl_get_keyex(&algname, ssl);
+    BIO_indent(bio, indent, 80);
+    BIO_printf(bio, "KeyExchangeAlgorithm=%s\n", algname);
+    switch (id) {
+
+    case SSL_kRSA:
+        if (TLS1_get_version(ssl) == SSL3_VERSION) {
+            ssl_print_hex(bio, indent + 2,
+                          "EncyptedPreMasterSecret", msg, msglen);
+        } else {
+            if (!ssl_print_hexbuf(bio, indent + 2,
+                                  "EncyptedPreMasterSecret", 2,
+                                  &msg, &msglen))
+                return 0;
+        }
+        break;
+
+        /* Implicit parameters only allowed for static DH */
+    case SSL_kDHd:
+    case SSL_kDHr:
+        if (msglen == 0) {
+            BIO_indent(bio, indent + 2, 80);
+            BIO_puts(bio, "implicit\n");
+            break;
+        }
+    case SSL_kEDH:
+        if (!ssl_print_hexbuf(bio, indent + 2, "dh_Yc", 2, &msg, &msglen))
+            return 0;
+        break;
+
+    case SSL_kECDHr:
+    case SSL_kECDHe:
+        if (msglen == 0) {
+            BIO_indent(bio, indent + 2, 80);
+            BIO_puts(bio, "implicit\n");
+            break;
+        }
+    case SSL_kEECDH:
+        if (!ssl_print_hexbuf(bio, indent + 2, "ecdh_Yc", 1, &msg, &msglen))
+            return 0;
+        break;
+    }
+
+    return 1;
+}
+
+static int ssl_print_server_keyex(BIO *bio, int indent, SSL *ssl,
+                                  const unsigned char *msg, size_t msglen)
+{
+    const char *algname;
+    int id;
+    id = ssl_get_keyex(&algname, ssl);
+    BIO_indent(bio, indent, 80);
+    BIO_printf(bio, "KeyExchangeAlgorithm=%s\n", algname);
+    switch (id) {
+        /* Should never happen */
+    case SSL_kDHd:
+    case SSL_kDHr:
+    case SSL_kECDHr:
+    case SSL_kECDHe:
+        BIO_indent(bio, indent + 2, 80);
+        BIO_printf(bio, "Unexpected Message\n");
+        break;
+
+    case SSL_kRSA:
+
+        if (!ssl_print_hexbuf(bio, indent + 2, "rsa_modulus", 2,
+                              &msg, &msglen))
+            return 0;
+        if (!ssl_print_hexbuf(bio, indent + 2, "rsa_exponent", 2,
+                              &msg, &msglen))
+            return 0;
+        break;
+
+    case SSL_kEDH:
+        if (!ssl_print_hexbuf(bio, indent + 2, "dh_p", 2, &msg, &msglen))
+            return 0;
+        if (!ssl_print_hexbuf(bio, indent + 2, "dh_g", 2, &msg, &msglen))
+            return 0;
+        if (!ssl_print_hexbuf(bio, indent + 2, "dh_Ys", 2, &msg, &msglen))
+            return 0;
+        break;
+
+    case SSL_kEECDH:
+        if (msglen < 1)
+            return 0;
+        BIO_indent(bio, indent + 2, 80);
+        if (msg[0] == EXPLICIT_PRIME_CURVE_TYPE)
+            BIO_puts(bio, "explicit_prime\n");
+        else if (msg[0] == EXPLICIT_CHAR2_CURVE_TYPE)
+            BIO_puts(bio, "explicit_char2\n");
+        else if (msg[0] == NAMED_CURVE_TYPE) {
+            int curve;
+            if (msglen < 3)
+                return 0;
+            curve = (msg[1] << 8) | msg[2];
+            BIO_printf(bio, "named_curve: %s (%d)\n",
+                       ssl_trace_str(curve, ssl_curve_tbl), curve);
+            msg += 3;
+            msglen -= 3;
+            if (!ssl_print_hexbuf(bio, indent + 2, "point", 1, &msg, &msglen))
+                return 0;
+        }
+        break;
+    }
+    return ssl_print_signature(bio, indent, ssl, &msg, &msglen);
+}
+
+static int ssl_print_certificate(BIO *bio, int indent,
+                                 const unsigned char **pmsg, size_t *pmsglen)
+{
+    size_t msglen = *pmsglen;
+    size_t clen;
+    X509 *x;
+    const unsigned char *p = *pmsg, *q;
+    if (msglen < 3)
+        return 0;
+    clen = (p[0] << 16) | (p[1] << 8) | p[2];
+    if (msglen < clen + 3)
+        return 0;
+    q = p + 3;
+    BIO_indent(bio, indent, 80);
+    BIO_printf(bio, "ASN.1Cert, length=%d", (int)clen);
+    x = d2i_X509(NULL, &q, clen);
+    if (!x)
+        BIO_puts(bio, "<UNPARSEABLE CERTIFICATE>\n");
+    else {
+        BIO_puts(bio, "\n------details-----\n");
+        X509_print_ex(bio, x, XN_FLAG_ONELINE, 0);
+        PEM_write_bio_X509(bio, x);
+        /* Print certificate stuff */
+        BIO_puts(bio, "------------------\n");
+        X509_free(x);
+    }
+    if (q != p + 3 + clen) {
+        BIO_puts(bio, "<TRAILING GARBAGE AFTER CERTIFICATE>\n");
+    }
+    *pmsg += clen + 3;
+    *pmsglen -= clen + 3;
+    return 1;
+}
+
+static int ssl_print_certificates(BIO *bio, int indent,
+                                  const unsigned char *msg, size_t msglen)
+{
+    size_t clen;
+    if (msglen < 3)
+        return 0;
+    clen = (msg[0] << 16) | (msg[1] << 8) | msg[2];
+    if (msglen != clen + 3)
+        return 0;
+    msg += 3;
+    BIO_indent(bio, indent, 80);
+    BIO_printf(bio, "certificate_list, length=%d\n", (int)clen);
+    while (clen > 0) {
+        if (!ssl_print_certificate(bio, indent + 2, &msg, &clen))
+            return 0;
+    }
+    return 1;
+}
+
+static int ssl_print_cert_request(BIO *bio, int indent, SSL *s,
+                                  const unsigned char *msg, size_t msglen)
+{
+    size_t xlen;
+    if (msglen < 1)
+        return 0;
+    xlen = msg[0];
+    if (msglen < xlen + 1)
+        return 0;
+    msg++;
+    BIO_indent(bio, indent, 80);
+    BIO_printf(bio, "certificate_types (len=%d)\n", (int)xlen);
+    if (!ssl_trace_list(bio, indent + 2, msg, xlen, 1, ssl_ctype_tbl))
+        return 0;
+    msg += xlen;
+    msglen -= xlen + 1;
+    if (!SSL_USE_SIGALGS(s))
+        goto skip_sig;
+    if (msglen < 2)
+        return 0;
+    xlen = (msg[0] << 8) | msg[1];
+    if (msglen < xlen + 2 || (xlen & 1))
+        return 0;
+    msg += 2;
+    BIO_indent(bio, indent, 80);
+    BIO_printf(bio, "signature_algorithms (len=%d)\n", (int)xlen);
+    while (xlen > 0) {
+        BIO_indent(bio, indent + 2, 80);
+        BIO_printf(bio, "%s+%s (%d+%d)\n",
+                   ssl_trace_str(msg[0], ssl_md_tbl),
+                   ssl_trace_str(msg[1], ssl_sig_tbl), msg[0], msg[1]);
+        xlen -= 2;
+        msg += 2;
+    }
+    msg += xlen;
+    msglen -= xlen + 2;
+
+ skip_sig:
+    xlen = (msg[0] << 8) | msg[1];
+    BIO_indent(bio, indent, 80);
+    if (msglen < xlen + 2)
+        return 0;
+    msg += 2;
+    msglen -= 2;
+    BIO_printf(bio, "certificate_authorities (len=%d)\n", (int)xlen);
+    while (xlen > 0) {
+        size_t dlen;
+        X509_NAME *nm;
+        const unsigned char *p;
+        if (xlen < 2)
+            return 0;
+        dlen = (msg[0] << 8) | msg[1];
+        if (xlen < dlen + 2)
+            return 0;
+        msg += 2;
+        BIO_indent(bio, indent + 2, 80);
+        BIO_printf(bio, "DistinguishedName (len=%d): ", (int)dlen);
+        p = msg;
+        nm = d2i_X509_NAME(NULL, &p, dlen);
+        if (!nm) {
+            BIO_puts(bio, "<UNPARSEABLE DN>\n");
+        } else {
+            X509_NAME_print_ex(bio, nm, 0, XN_FLAG_ONELINE);
+            BIO_puts(bio, "\n");
+            X509_NAME_free(nm);
+        }
+        xlen -= dlen + 2;
+        msg += dlen;
+    }
+    return 1;
+}
+
+static int ssl_print_ticket(BIO *bio, int indent,
+                            const unsigned char *msg, size_t msglen)
+{
+    unsigned int tick_life;
+    if (msglen == 0) {
+        BIO_indent(bio, indent + 2, 80);
+        BIO_puts(bio, "No Ticket\n");
+        return 1;
+    }
+    if (msglen < 4)
+        return 0;
+    tick_life = (msg[0] << 24) | (msg[1] << 16) | (msg[2] << 8) | msg[3];
+    msglen -= 4;
+    msg += 4;
+    BIO_indent(bio, indent + 2, 80);
+    BIO_printf(bio, "ticket_lifetime_hint=%u\n", tick_life);
+    if (!ssl_print_hexbuf(bio, indent + 2, "ticket", 2, &msg, &msglen))
+        return 0;
+    if (msglen)
+        return 0;
+    return 1;
+}
+
+static int ssl_print_handshake(BIO *bio, SSL *ssl,
+                               const unsigned char *msg, size_t msglen,
+                               int indent)
+{
+    size_t hlen;
+    unsigned char htype;
+    if (msglen < 4)
+        return 0;
+    htype = msg[0];
+    hlen = (msg[1] << 16) | (msg[2] << 8) | msg[3];
+    BIO_indent(bio, indent, 80);
+    BIO_printf(bio, "%s, Length=%d\n",
+               ssl_trace_str(htype, ssl_handshake_tbl), (int)hlen);
+    msg += 4;
+    msglen -= 4;
+    if (SSL_IS_DTLS(ssl)) {
+        if (msglen < 8)
+            return 0;
+        BIO_indent(bio, indent, 80);
+        BIO_printf(bio, "message_seq=%d, fragment_offset=%d, "
+                   "fragment_length=%d\n",
+                   (msg[0] << 8) | msg[1],
+                   (msg[2] << 16) | (msg[3] << 8) | msg[4],
+                   (msg[5] << 16) | (msg[6] << 8) | msg[7]);
+        msg += 8;
+        msglen -= 8;
+    }
+    if (msglen < hlen)
+        return 0;
+    switch (htype) {
+    case SSL3_MT_CLIENT_HELLO:
+        if (!ssl_print_client_hello(bio, ssl, indent + 2, msg, msglen))
+            return 0;
+        break;
+
+    case DTLS1_MT_HELLO_VERIFY_REQUEST:
+        if (!dtls_print_hello_vfyrequest(bio, indent + 2, msg, msglen))
+            return 0;
+        break;
+
+    case SSL3_MT_SERVER_HELLO:
+        if (!ssl_print_server_hello(bio, indent + 2, msg, msglen))
+            return 0;
+        break;
+
+    case SSL3_MT_SERVER_KEY_EXCHANGE:
+        if (!ssl_print_server_keyex(bio, indent + 2, ssl, msg, msglen))
+            return 0;
+        break;
+
+    case SSL3_MT_CLIENT_KEY_EXCHANGE:
+        if (!ssl_print_client_keyex(bio, indent + 2, ssl, msg, msglen))
+            return 0;
+        break;
+
+    case SSL3_MT_CERTIFICATE:
+        if (!ssl_print_certificates(bio, indent + 2, msg, msglen))
+            return 0;
+        break;
+
+    case SSL3_MT_CERTIFICATE_VERIFY:
+        if (!ssl_print_signature(bio, indent + 2, ssl, &msg, &msglen))
+            return 0;
+        break;
+
+    case SSL3_MT_CERTIFICATE_REQUEST:
+        if (!ssl_print_cert_request(bio, indent + 2, ssl, msg, msglen))
+            return 0;
+        break;
+
+    case SSL3_MT_FINISHED:
+        ssl_print_hex(bio, indent + 2, "verify_data", msg, msglen);
+        break;
+
+    case SSL3_MT_SERVER_DONE:
+        if (msglen != 0)
+            ssl_print_hex(bio, indent + 2, "unexpected value", msg, msglen);
+        break;
+
+    case SSL3_MT_NEWSESSION_TICKET:
+        if (!ssl_print_ticket(bio, indent + 2, msg, msglen))
+            return 0;
+        break;
+
+    default:
+        BIO_indent(bio, indent + 2, 80);
+        BIO_puts(bio, "Unsupported, hex dump follows:\n");
+        BIO_dump_indent(bio, (char *)msg, msglen, indent + 4);
+    }
+    return 1;
+}
+
+static int ssl_print_heartbeat(BIO *bio, int indent,
+                               const unsigned char *msg, size_t msglen)
+{
+    if (msglen < 3)
+        return 0;
+    BIO_indent(bio, indent, 80);
+    BIO_printf(bio, "HeartBeatMessageType: %s\n",
+               ssl_trace_str(msg[0], ssl_hb_type_tbl));
+    msg++;
+    msglen--;
+    if (!ssl_print_hexbuf(bio, indent, "payload", 2, &msg, &msglen))
+        return 0;
+    ssl_print_hex(bio, indent, "padding", msg, msglen);
+    return 1;
+}
+
+const char *SSL_CIPHER_standard_name(const SSL_CIPHER *c)
+{
+    if (c->algorithm_ssl & SSL_SSLV2)
+        return NULL;
+    return ssl_trace_str(c->id & 0xFFFF, ssl_ciphers_tbl);
+}
+
+void SSL_trace(int write_p, int version, int content_type,
+               const void *buf, size_t msglen, SSL *ssl, void *arg)
+{
+    const unsigned char *msg = buf;
+    BIO *bio = arg;
+
+    if (write_p == 2) {
+        BIO_puts(bio, "Session ");
+        ssl_print_hex(bio, 0,
+                      ssl_trace_str(content_type, ssl_crypto_tbl),
+                      msg, msglen);
+        return;
+    }
+    switch (content_type) {
+    case SSL3_RT_HEADER:
+        {
+            int hvers = msg[1] << 8 | msg[2];
+            BIO_puts(bio, write_p ? "Sent" : "Received");
+            BIO_printf(bio, " Record\nHeader:\n  Version = %s (0x%x)\n",
+                       ssl_trace_str(hvers, ssl_version_tbl), hvers);
+            if (SSL_IS_DTLS(ssl)) {
+                BIO_printf(bio,
+                           "  epoch=%d, sequence_number=%04x%04x%04x\n",
+                           (msg[3] << 8 | msg[4]),
+                           (msg[5] << 8 | msg[6]),
+                           (msg[7] << 8 | msg[8]), (msg[9] << 8 | msg[10]));
+# if 0
+                /*
+                 * Just print handshake type so we can see what is going on
+                 * during fragmentation.
+                 */
+                BIO_printf(bio, "(%s)\n",
+                           ssl_trace_str(msg[msglen], ssl_handshake_tbl));
+# endif
+            }
+
+            BIO_printf(bio, "  Content Type = %s (%d)\n  Length = %d",
+                       ssl_trace_str(msg[0], ssl_content_tbl), msg[0],
+                       msg[msglen - 2] << 8 | msg[msglen - 1]);
+        }
+        break;
+    case SSL3_RT_HANDSHAKE:
+        if (!ssl_print_handshake(bio, ssl, msg, msglen, 4))
+            BIO_printf(bio, "Message length parse error!\n");
+        break;
+
+    case SSL3_RT_CHANGE_CIPHER_SPEC:
+        if (msglen == 1 && msg[0] == 1)
+            BIO_puts(bio, "    change_cipher_spec (1)\n");
+        else
+            ssl_print_hex(bio, 4, "unknown value", msg, msglen);
+        break;
+
+    case SSL3_RT_ALERT:
+        if (msglen != 2)
+            BIO_puts(bio, "    Illegal Alert Length\n");
+        else {
+            BIO_printf(bio, "    Level=%s(%d), description=%s(%d)\n",
+                       SSL_alert_type_string_long(msg[0] << 8),
+                       msg[0], SSL_alert_desc_string_long(msg[1]), msg[1]);
+        }
+    case TLS1_RT_HEARTBEAT:
+        ssl_print_heartbeat(bio, 4, msg, msglen);
+        break;
+
+    }
+
+    BIO_puts(bio, "\n");
+}
+
+#endif
index 69d8186..5929607 100644 (file)
@@ -209,11 +209,9 @@ extern "C" {
 # define TLSEXT_TYPE_status_request              5
 /* ExtensionType values from RFC4681 */
 # define TLSEXT_TYPE_user_mapping                6
-
 /* ExtensionType values from RFC5878 */
 # define TLSEXT_TYPE_client_authz                7
 # define TLSEXT_TYPE_server_authz                8
-
 /* ExtensionType values from RFC6091 */
 # define TLSEXT_TYPE_cert_type           9
 
@@ -233,6 +231,9 @@ extern "C" {
 /* ExtensionType value from RFC5620 */
 # define TLSEXT_TYPE_heartbeat   15
 
+/* ExtensionType value from draft-ietf-tls-applayerprotoneg-00 */
+# define TLSEXT_TYPE_application_layer_protocol_negotiation 16
+
 /*
  * ExtensionType value for TLS padding extension.
  * http://www.iana.org/assignments/tls-extensiontype-values/tls-extensiontype-values.xhtml
@@ -250,7 +251,7 @@ extern "C" {
  * i.e. build with -DTLSEXT_TYPE_opaque_prf_input=38183
  * using whatever extension number you'd like to try
  */
-#  define TLSEXT_TYPE_opaque_prf_input           ?? */
+#  define TLSEXT_TYPE_opaque_prf_input           ??
 # endif
 
 /* Temporary extension type */
@@ -280,6 +281,9 @@ extern "C" {
 # define TLSEXT_signature_dsa                            2
 # define TLSEXT_signature_ecdsa                          3
 
+/* Total number of different signature algorithms */
+# define TLSEXT_signature_num                            4
+
 # define TLSEXT_hash_none                                0
 # define TLSEXT_hash_md5                                 1
 # define TLSEXT_hash_sha1                                2
@@ -288,6 +292,18 @@ extern "C" {
 # define TLSEXT_hash_sha384                              5
 # define TLSEXT_hash_sha512                              6
 
+/* Total number of different digest algorithms */
+
+# define TLSEXT_hash_num                                 7
+
+/* Flag set for unrecognised algorithms */
+# define TLSEXT_nid_unknown                              0x1000000
+
+/* ECC curves */
+
+# define TLSEXT_curve_P_256                              23
+# define TLSEXT_curve_P_384                              24
+
 # ifndef OPENSSL_NO_TLSEXT
 
 #  define TLSEXT_MAXLEN_host_name 255
@@ -306,6 +322,16 @@ int SSL_export_keying_material(SSL *s, unsigned char *out, size_t olen,
                                const unsigned char *p, size_t plen,
                                int use_context);
 
+int SSL_get_sigalgs(SSL *s, int idx,
+                    int *psign, int *phash, int *psignandhash,
+                    unsigned char *rsig, unsigned char *rhash);
+
+int SSL_get_shared_sigalgs(SSL *s, int idx,
+                           int *psign, int *phash, int *psignandhash,
+                           unsigned char *rsig, unsigned char *rhash);
+
+int SSL_check_chain(SSL *s, X509 *x, EVP_PKEY *pk, STACK_OF(X509) *chain);
+
 #  define SSL_set_tlsext_host_name(s,name) \
 SSL_ctrl(s,SSL_CTRL_SET_TLSEXT_HOSTNAME,TLSEXT_NAMETYPE_host_name,(char *)name)
 
@@ -541,11 +567,10 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb)
 # define TLS1_CK_ECDH_RSA_WITH_AES_256_GCM_SHA384        0x0300C032
 
 /*
- * XXX Inconsistency alert: The OpenSSL names of ciphers with ephemeral DH
- * here include the string "DHE", while elsewhere it has always been "EDH".
- * (The alias for the list of all such ciphers also is "EDH".) The
- * specifications speak of "EDH"; maybe we should allow both forms for
- * everything.
+ * XXX * Backward compatibility alert: + * Older versions of OpenSSL gave
+ * some DHE ciphers names with "EDH" + * instead of "DHE".  Going forward, we
+ * should be using DHE + * everywhere, though we may indefinitely maintain
+ * aliases for users + * or configurations that used "EDH" +
  */
 # define TLS1_TXT_RSA_EXPORT1024_WITH_RC4_56_MD5         "EXP1024-RC4-MD5"
 # define TLS1_TXT_RSA_EXPORT1024_WITH_RC2_CBC_56_MD5     "EXP1024-RC2-CBC-MD5"
index eca1400..e695073 100644 (file)
@@ -12,6 +12,7 @@ PERL=         perl
 # KRB5 stuff
 KRB5_INCLUDES=
 LIBKRB5=
+TEST=          igetest.c
 
 PEX_LIBS=
 EX_LIBS= #-lnsl -lsocket
@@ -63,6 +64,7 @@ EVPEXTRATEST=evp_extra_test
 IGETEST=       igetest
 JPAKETEST=     jpaketest
 SRPTEST=       srptest
+V3NAMETEST=    v3nametest
 ASN1TEST=      asn1test
 HEARTBEATTEST=  heartbeat_test
 CONSTTIMETEST=  constant_time_test
@@ -78,7 +80,8 @@ EXE=  $(BNTEST)$(EXE_EXT) $(ECTEST)$(EXE_EXT)  $(ECDSATEST)$(EXE_EXT) $(ECDHTEST)
        $(RANDTEST)$(EXE_EXT) $(DHTEST)$(EXE_EXT) $(ENGINETEST)$(EXE_EXT) \
        $(BFTEST)$(EXE_EXT) $(CASTTEST)$(EXE_EXT) $(SSLTEST)$(EXE_EXT) $(EXPTEST)$(EXE_EXT) $(DSATEST)$(EXE_EXT) $(RSATEST)$(EXE_EXT) \
        $(EVPTEST)$(EXE_EXT) $(EVPEXTRATEST)$(EXE_EXT) $(IGETEST)$(EXE_EXT) $(JPAKETEST)$(EXE_EXT) $(SRPTEST)$(EXE_EXT) \
-       $(ASN1TEST)$(EXE_EXT) $(HEARTBEATTEST)$(EXE_EXT) $(CONSTTIMETEST)$(EXE_EXT) $(VERIFYEXTRATEST)$(EXE_EXT)
+       $(ASN1TEST)$(EXE_EXT) $(V3NAMETEST)$(EXE_EXT) $(HEARTBEATTEST)$(EXE_EXT) \
+       $(CONSTTIMETEST)$(EXE_EXT) $(VERIFYEXTRATEST)$(EXE_EXT)
 
 # $(METHTEST)$(EXE_EXT)
 
@@ -90,7 +93,7 @@ OBJ=  $(BNTEST).o $(ECTEST).o  $(ECDSATEST).o $(ECDHTEST).o $(IDEATEST).o \
        $(MDC2TEST).o $(RMDTEST).o \
        $(RANDTEST).o $(DHTEST).o $(ENGINETEST).o $(CASTTEST).o \
        $(BFTEST).o  $(SSLTEST).o  $(DSATEST).o  $(EXPTEST).o $(RSATEST).o \
-       $(EVPTEST).o $(EVPEXTRATEST).o $(IGETEST).o $(JPAKETEST).o $(ASN1TEST).o \
+       $(EVPTEST).o $(EVPEXTRATEST).o $(IGETEST).o $(JPAKETEST).o $(ASN1TEST).o $(V3NAMETEST).o \
        $(HEARTBEATTEST).o $(CONSTTIMETEST).o $(VERIFYEXTRATEST).o
 
 SRC=   $(BNTEST).c $(ECTEST).c  $(ECDSATEST).c $(ECDHTEST).c $(IDEATEST).c \
@@ -101,10 +104,10 @@ SRC=      $(BNTEST).c $(ECTEST).c  $(ECDSATEST).c $(ECDHTEST).c $(IDEATEST).c \
        $(RANDTEST).c $(DHTEST).c $(ENGINETEST).c $(CASTTEST).c \
        $(BFTEST).c  $(SSLTEST).c $(DSATEST).c   $(EXPTEST).c $(RSATEST).c \
        $(EVPTEST).c $(EVPEXTRATEST).c $(IGETEST).c $(JPAKETEST).c $(SRPTEST).c $(ASN1TEST).c \
-       $(HEARTBEATTEST).c $(CONSTTIMETEST).c $(VERIFYEXTRATEST).c
+       $(V3NAMETEST).c $(HEARTBEATTEST).c $(CONSTTIMETEST).c $(VERIFYEXTRATEST).c
 
 EXHEADER= 
-HEADER=        $(EXHEADER)
+HEADER=        testutil.h $(EXHEADER)
 
 ALL=    $(GENERAL) $(SRC) $(HEADER)
 
@@ -144,69 +147,70 @@ alltests: \
        test_enc test_x509 test_rsa test_crl test_sid \
        test_gen test_req test_pkcs7 test_verify test_dh test_dsa \
        test_ss test_ca test_engine test_evp test_evp_extra test_ssl test_tsa test_ige \
-       test_jpake test_srp test_cms test_heartbeat test_constant_time test_verify_extra
+       test_jpake test_srp test_cms test_ocsp test_v3name test_heartbeat \
+       test_constant_time test_verify_extra
 
-test_evp:
+test_evp: $(EVPTEST)$(EXE_EXT) evptests.txt
        ../util/shlib_wrap.sh ./$(EVPTEST) evptests.txt
 
-test_evp_extra:
+test_evp_extra: $(EVPEXTRATEST)$(EXE_EXT)
        ../util/shlib_wrap.sh ./$(EVPEXTRATEST)
 
-test_des:
+test_des: $(DESTEST)$(EXE_EXT)
        ../util/shlib_wrap.sh ./$(DESTEST)
 
-test_idea:
+test_idea: $(IDEATEST)$(EXE_EXT)
        ../util/shlib_wrap.sh ./$(IDEATEST)
 
-test_sha:
+test_sha: $(SHATEST)$(EXE_EXT) $(SHA1TEST)$(EXE_EXT) $(SHA256TEST)$(EXE_EXT) $(SHA512TEST)$(EXE_EXT)
        ../util/shlib_wrap.sh ./$(SHATEST)
        ../util/shlib_wrap.sh ./$(SHA1TEST)
        ../util/shlib_wrap.sh ./$(SHA256TEST)
        ../util/shlib_wrap.sh ./$(SHA512TEST)
 
-test_mdc2:
+test_mdc2: $(MDC2TEST)$(EXE_EXT)
        ../util/shlib_wrap.sh ./$(MDC2TEST)
 
-test_md5:
+test_md5: $(MD5TEST)$(EXE_EXT)
        ../util/shlib_wrap.sh ./$(MD5TEST)
 
-test_md4:
+test_md4: $(MD4TEST)$(EXE_EXT)
        ../util/shlib_wrap.sh ./$(MD4TEST)
 
-test_hmac:
+test_hmac: $(HMACTEST)$(EXE_EXT)
        ../util/shlib_wrap.sh ./$(HMACTEST)
 
-test_wp:
+test_wp: $(WPTEST)$(EXE_EXT)
        ../util/shlib_wrap.sh ./$(WPTEST)
 
-test_md2:
+test_md2: $(MD2TEST)$(EXE_EXT)
        ../util/shlib_wrap.sh ./$(MD2TEST)
 
-test_rmd:
+test_rmd: $(RMDTEST)$(EXE_EXT)
        ../util/shlib_wrap.sh ./$(RMDTEST)
 
-test_bf:
+test_bf: $(BFTEST)$(EXE_EXT)
        ../util/shlib_wrap.sh ./$(BFTEST)
 
-test_cast:
+test_cast: $(CASTTEST)$(EXE_EXT)
        ../util/shlib_wrap.sh ./$(CASTTEST)
 
-test_rc2:
+test_rc2: $(RC2TEST)$(EXE_EXT)
        ../util/shlib_wrap.sh ./$(RC2TEST)
 
-test_rc4:
+test_rc4: $(RC4TEST)$(EXE_EXT)
        ../util/shlib_wrap.sh ./$(RC4TEST)
 
-test_rc5:
+test_rc5: $(RC5TEST)$(EXE_EXT)
        ../util/shlib_wrap.sh ./$(RC5TEST)
 
-test_rand:
+test_rand: $(RANDTEST)$(EXE_EXT)
        ../util/shlib_wrap.sh ./$(RANDTEST)
 
-test_enc:
+test_enc: ../apps/openssl$(EXE_EXT) testenc
        @sh ./testenc
 
-test_x509:
+test_x509: ../apps/openssl$(EXE_EXT) tx509 testx509.pem v3-cert1.pem v3-cert2.pem
        echo test normal x509v1 certificate
        sh ./tx509 2>/dev/null
        echo test first x509v3 certificate
@@ -214,76 +218,78 @@ test_x509:
        echo test second x509v3 certificate
        sh ./tx509 v3-cert2.pem 2>/dev/null
 
-test_rsa: $(RSATEST)$(EXE_EXT)
+test_rsa: $(RSATEST)$(EXE_EXT) ../apps/openssl$(EXE_EXT) trsa testrsa.pem
        @sh ./trsa 2>/dev/null
        ../util/shlib_wrap.sh ./$(RSATEST)
 
-test_crl:
+test_crl: ../apps/openssl$(EXE_EXT) tcrl testcrl.pem
        @sh ./tcrl 2>/dev/null
 
-test_sid:
+test_sid: ../apps/openssl$(EXE_EXT) tsid testsid.pem
        @sh ./tsid 2>/dev/null
 
-test_req:
+test_req: ../apps/openssl$(EXE_EXT) treq testreq.pem testreq2.pem
        @sh ./treq 2>/dev/null
        @sh ./treq testreq2.pem 2>/dev/null
 
-test_pkcs7:
+test_pkcs7: ../apps/openssl$(EXE_EXT) tpkcs7 tpkcs7d testp7.pem pkcs7-1.pem
        @sh ./tpkcs7 2>/dev/null
        @sh ./tpkcs7d 2>/dev/null
 
-test_bn:
+test_bn: $(BNTEST)$(EXE_EXT) $(EXPTEST)$(EXE_EXT) bctest
        @echo starting big number library test, could take a while...
        @../util/shlib_wrap.sh ./$(BNTEST) >tmp.bntest
        @echo quit >>tmp.bntest
        @echo "running bc"
-       @<tmp.bntest sh -c "`sh ./bctest ignore`" | $(PERL) -e '$$i=0; while (<STDIN>) {if (/^test (.*)/) {print STDERR "\nverify $$1";} elsif (!/^0\r?$$/) {die "\nFailed! bc: $$_";} else {print STDERR "."; $$i++;}} print STDERR "\n$$i tests passed\n"'
+       @<tmp.bntest sh -c "`sh ./bctest ignore`" | $(PERL) -e '$$i=0; while (<STDIN>) {if (/^test (.*)/) {print STDERR "\nverify $$1";} elsif (!/^0$$/) {die "\nFailed! bc: $$_";} else {print STDERR "."; $$i++;}} print STDERR "\n$$i tests passed\n"'
        @echo 'test a^b%c implementations'
        ../util/shlib_wrap.sh ./$(EXPTEST)
 
-test_ec:
+test_ec: $(ECTEST)$(EXE_EXT)
        @echo 'test elliptic curves'
        ../util/shlib_wrap.sh ./$(ECTEST)
 
-test_ecdsa:
+test_ecdsa: $(ECDSATEST)$(EXE_EXT)
        @echo 'test ecdsa'
        ../util/shlib_wrap.sh ./$(ECDSATEST)
 
-test_ecdh:
+test_ecdh: $(ECDHTEST)$(EXE_EXT)
        @echo 'test ecdh'
        ../util/shlib_wrap.sh ./$(ECDHTEST)
 
-test_verify:
+test_verify: ../apps/openssl$(EXE_EXT)
        @echo "The following command should have some OK's and some failures"
        @echo "There are definitly a few expired certificates"
        ../util/shlib_wrap.sh ../apps/openssl verify -CApath ../certs/demo ../certs/demo/*.pem
 
-test_dh:
+test_dh: $(DHTEST)$(EXE_EXT)
        @echo "Generate a set of DH parameters"
        ../util/shlib_wrap.sh ./$(DHTEST)
 
-test_dsa:
+test_dsa: $(DSATEST)$(EXE_EXT)
        @echo "Generate a set of DSA parameters"
        ../util/shlib_wrap.sh ./$(DSATEST)
        ../util/shlib_wrap.sh ./$(DSATEST) -app2_1
 
-test_gen:
+test_gen testreq.pem: ../apps/openssl$(EXE_EXT) testgen test.cnf
        @echo "Generate and verify a certificate request"
        @sh ./testgen
 
 test_ss keyU.ss certU.ss certCA.ss certP1.ss keyP1.ss certP2.ss keyP2.ss \
-               intP1.ss intP2.ss: testss
+               intP1.ss intP2.ss: testss CAss.cnf Uss.cnf P1ss.cnf P2ss.cnf \
+                                   ../apps/openssl$(EXE_EXT)
        @echo "Generate and certify a test certificate"
        @sh ./testss
        @cat certCA.ss certU.ss > intP1.ss
        @cat certCA.ss certU.ss certP1.ss > intP2.ss
 
-test_engine: 
+test_engine:  $(ENGINETEST)$(EXE_EXT)
        @echo "Manipulate the ENGINE structures"
        ../util/shlib_wrap.sh ./$(ENGINETEST)
 
 test_ssl: keyU.ss certU.ss certCA.ss certP1.ss keyP1.ss certP2.ss keyP2.ss \
-               intP1.ss intP2.ss
+               intP1.ss intP2.ss $(SSLTEST)$(EXE_EXT) testssl testsslproxy \
+               ../apps/server2.pem serverinfo.pem
        @echo "test SSL protocol"
        @if [ -n "$(FIPSCANLIB)" ]; then \
          sh ./testfipsssl keyU.ss certU.ss certCA.ss; \
@@ -293,7 +299,7 @@ test_ssl: keyU.ss certU.ss certCA.ss certP1.ss keyP1.ss certP2.ss keyP2.ss \
        @sh ./testsslproxy keyP1.ss certP1.ss intP1.ss
        @sh ./testsslproxy keyP2.ss certP2.ss intP2.ss
 
-test_ca:
+test_ca: ../apps/openssl$(EXE_EXT) testca CAss.cnf Uss.cnf
        @if ../util/shlib_wrap.sh ../apps/openssl no-rsa; then \
          echo "skipping CA.sh test -- requires RSA"; \
        else \
@@ -305,7 +311,7 @@ test_aes: #$(AESTEST)
 #      @echo "test Rijndael"
 #      ../util/shlib_wrap.sh ./$(AESTEST)
 
-test_tsa:
+test_tsa: ../apps/openssl$(EXE_EXT) testtsa CAtsa.cnf ../util/shlib_wrap.sh
        @if ../util/shlib_wrap.sh ../apps/openssl no-rsa; then \
          echo "skipping testtsa test -- requires RSA"; \
        else \
@@ -320,7 +326,7 @@ test_jpake: $(JPAKETEST)$(EXE_EXT)
        @echo "Test JPAKE"
        ../util/shlib_wrap.sh ./$(JPAKETEST)
 
-test_cms:
+test_cms: ../apps/openssl$(EXE_EXT) cms-test.pl smcont.txt
        @echo "CMS consistency test"
        $(PERL) cms-test.pl
 
@@ -328,6 +334,14 @@ test_srp: $(SRPTEST)$(EXE_EXT)
        @echo "Test SRP"
        ../util/shlib_wrap.sh ./srptest
 
+test_ocsp: ../apps/openssl$(EXE_EXT) tocsp
+       @echo "Test OCSP"
+       @sh ./tocsp
+
+test_v3name: $(V3NAMETEST)$(EXE_EXT)
+       @echo "Test X509v3_check_*"
+       ../util/shlib_wrap.sh ./$(V3NAMETEST)
+
 test_heartbeat: $(HEARTBEATTEST)$(EXE_EXT)
        ../util/shlib_wrap.sh ./$(HEARTBEATTEST)
 
@@ -501,6 +515,9 @@ $(ASN1TEST)$(EXE_EXT): $(ASN1TEST).o $(DLIBCRYPTO)
 $(SRPTEST)$(EXE_EXT): $(SRPTEST).o $(DLIBCRYPTO)
        @target=$(SRPTEST); $(BUILD_CMD)
 
+$(V3NAMETEST)$(EXE_EXT): $(V3NAMETEST).o $(DLIBCRYPTO)
+       @target=$(V3NAMETEST); $(BUILD_CMD)
+
 $(HEARTBEATTEST)$(EXE_EXT): $(HEARTBEATTEST).o $(DLIBCRYPTO)
        @target=$(HEARTBEATTEST); $(BUILD_CMD_STATIC)
 
@@ -800,6 +817,19 @@ ssltest.o: ../include/openssl/ssl3.h ../include/openssl/stack.h
 ssltest.o: ../include/openssl/symhacks.h ../include/openssl/tls1.h
 ssltest.o: ../include/openssl/x509.h ../include/openssl/x509_vfy.h
 ssltest.o: ../include/openssl/x509v3.h ssltest.c
+v3nametest.o: ../e_os.h ../include/openssl/asn1.h ../include/openssl/bio.h
+v3nametest.o: ../include/openssl/buffer.h ../include/openssl/conf.h
+v3nametest.o: ../include/openssl/crypto.h ../include/openssl/e_os2.h
+v3nametest.o: ../include/openssl/ec.h ../include/openssl/ecdh.h
+v3nametest.o: ../include/openssl/ecdsa.h ../include/openssl/evp.h
+v3nametest.o: ../include/openssl/lhash.h ../include/openssl/obj_mac.h
+v3nametest.o: ../include/openssl/objects.h ../include/openssl/opensslconf.h
+v3nametest.o: ../include/openssl/opensslv.h ../include/openssl/ossl_typ.h
+v3nametest.o: ../include/openssl/pkcs7.h ../include/openssl/safestack.h
+v3nametest.o: ../include/openssl/sha.h ../include/openssl/stack.h
+v3nametest.o: ../include/openssl/symhacks.h ../include/openssl/x509.h
+v3nametest.o: ../include/openssl/x509_vfy.h ../include/openssl/x509v3.h
+v3nametest.o: v3nametest.c
 verify_extra_test.o: ../include/openssl/asn1.h ../include/openssl/bio.h
 verify_extra_test.o: ../include/openssl/buffer.h ../include/openssl/crypto.h
 verify_extra_test.o: ../include/openssl/e_os2.h ../include/openssl/ec.h
index dfef799..baa3b59 100644 (file)
@@ -58,19 +58,32 @@ my $redir = " 2> cms.err > cms.out";
 # Make VMS work
 if ( $^O eq "VMS" && -f "OSSLX:openssl.exe" ) {
     $ossl_path = "pipe mcr OSSLX:openssl";
+    $null_path = "NL:";
+    # On VMS, the lowest 3 bits of the exit code indicates severity
+    # 1 is success (perl translates it to 0 for $?), 2 is error
+    # (perl doesn't translate it)
+    $failure_code = 512;       # 2 << 8 = 512
 }
 # Make MSYS work
 elsif ( $^O eq "MSWin32" && -f "../apps/openssl.exe" ) {
     $ossl_path = "cmd /c ..\\apps\\openssl";
+    $null_path = "NUL";
+    $failure_code = 256;
 }
 elsif ( -f "../apps/openssl$ENV{EXE_EXT}" ) {
     $ossl_path = "../util/shlib_wrap.sh ../apps/openssl";
+    $null_path = "/dev/null";
+    $failure_code = 256;
 }
 elsif ( -f "..\\out32dll\\openssl.exe" ) {
     $ossl_path = "..\\out32dll\\openssl.exe";
+    $null_path = "NUL";
+    $failure_code = 256;
 }
 elsif ( -f "..\\out32\\openssl.exe" ) {
     $ossl_path = "..\\out32\\openssl.exe";
+    $null_path = "NUL";
+    $failure_code = 256;
 }
 else {
     die "Can't find OpenSSL executable";
@@ -82,8 +95,53 @@ my $smdir    = "smime-certs";
 my $halt_err = 1;
 
 my $badcmd = 0;
+my $no_ec;
+my $no_ec2m;
+my $no_ecdh;
 my $ossl8 = `$ossl_path version -v` =~ /0\.9\.8/;
 
+system ("$ossl_path no-ec > $null_path");
+if ($? == 0)
+       {
+       $no_ec = 1;
+       }
+elsif ($? == $failure_code)
+       {
+       $no_ec = 0;
+       }
+else
+       {
+       die "Error checking for EC support\n";
+       }
+    
+system ("$ossl_path no-ec2m > $null_path");
+if ($? == 0)
+       {
+       $no_ec2m = 1;
+       }
+elsif ($? == $failure_code)
+       {
+       $no_ec2m = 0;
+       }
+else
+       {
+       die "Error checking for EC2M support\n";
+       }
+
+system ("$ossl_path no-ec > $null_path");
+if ($? == 0)
+       {
+       $no_ecdh = 1;
+       }
+elsif ($? == $failure_code)
+       {
+       $no_ecdh = 0;
+       }
+else
+       {
+       die "Error checking for ECDH support\n";
+       }
+    
 my @smime_pkcs7_tests = (
 
     [
@@ -341,6 +399,93 @@ my @smime_cms_comp_tests = (
 
 );
 
+my @smime_cms_param_tests = (
+    [
+        "signed content test streaming PEM format, RSA keys, PSS signature",
+        "-sign -in smcont.txt -outform PEM -nodetach"
+          . " -signer $smdir/smrsa1.pem -keyopt rsa_padding_mode:pss"
+         . " -out test.cms",
+        "-verify -in test.cms -inform PEM "
+          . " \"-CAfile\" $smdir/smroot.pem -out smtst.txt"
+    ],
+
+    [
+        "signed content test streaming PEM format, RSA keys, PSS signature, no attributes",
+        "-sign -in smcont.txt -outform PEM -nodetach -noattr"
+          . " -signer $smdir/smrsa1.pem -keyopt rsa_padding_mode:pss"
+         . " -out test.cms",
+        "-verify -in test.cms -inform PEM "
+          . " \"-CAfile\" $smdir/smroot.pem -out smtst.txt"
+    ],
+
+    [
+        "signed content test streaming PEM format, RSA keys, PSS signature, SHA384 MGF1",
+        "-sign -in smcont.txt -outform PEM -nodetach"
+          . " -signer $smdir/smrsa1.pem -keyopt rsa_padding_mode:pss"
+         . " -keyopt rsa_mgf1_md:sha384 -out test.cms",
+        "-verify -in test.cms -inform PEM "
+          . " \"-CAfile\" $smdir/smroot.pem -out smtst.txt"
+    ],
+
+    [
+"enveloped content test streaming S/MIME format, OAEP default parameters",
+        "-encrypt -in smcont.txt"
+          . " -stream -out test.cms"
+          . " -recip $smdir/smrsa1.pem -keyopt rsa_padding_mode:oaep",
+        "-decrypt -recip $smdir/smrsa1.pem -in test.cms -out smtst.txt"
+    ],
+
+    [
+"enveloped content test streaming S/MIME format, OAEP SHA256",
+        "-encrypt -in smcont.txt"
+          . " -stream -out test.cms"
+          . " -recip $smdir/smrsa1.pem -keyopt rsa_padding_mode:oaep"
+         . " -keyopt rsa_oaep_md:sha256",
+        "-decrypt -recip $smdir/smrsa1.pem -in test.cms -out smtst.txt"
+    ],
+
+    [
+"enveloped content test streaming S/MIME format, ECDH",
+        "-encrypt -in smcont.txt"
+          . " -stream -out test.cms"
+          . " -recip $smdir/smec1.pem",
+        "-decrypt -recip $smdir/smec1.pem -in test.cms -out smtst.txt"
+    ],
+
+    [
+"enveloped content test streaming S/MIME format, ECDH, key identifier",
+        "-encrypt -keyid -in smcont.txt"
+          . " -stream -out test.cms"
+          . " -recip $smdir/smec1.pem",
+        "-decrypt -recip $smdir/smec1.pem -in test.cms -out smtst.txt"
+    ],
+
+    [
+"enveloped content test streaming S/MIME format, ECDH, AES128, SHA256 KDF",
+        "-encrypt -in smcont.txt"
+          . " -stream -out test.cms"
+          . " -recip $smdir/smec1.pem -aes128 -keyopt ecdh_kdf_md:sha256",
+        "-decrypt -recip $smdir/smec1.pem -in test.cms -out smtst.txt"
+    ],
+
+    [
+"enveloped content test streaming S/MIME format, ECDH, K-283, cofactor DH",
+        "-encrypt -in smcont.txt"
+          . " -stream -out test.cms"
+          . " -recip $smdir/smec2.pem -aes128"
+         . " -keyopt ecdh_kdf_md:sha256 -keyopt ecdh_cofactor_mode:1",
+        "-decrypt -recip $smdir/smec2.pem -in test.cms -out smtst.txt"
+    ],
+
+    [
+"enveloped content test streaming S/MIME format, X9.42 DH",
+        "-encrypt -in smcont.txt"
+          . " -stream -out test.cms"
+          . " -recip $smdir/smdh.pem -aes128",
+        "-decrypt -recip $smdir/smdh.pem -in test.cms -out smtst.txt"
+    ]
+);
+
 print "CMS => PKCS#7 compatibility tests\n";
 
 run_smime_tests( \$badcmd, \@smime_pkcs7_tests, $cmscmd, $pk7cmd );
@@ -354,6 +499,9 @@ print "CMS <=> CMS consistency tests\n";
 run_smime_tests( \$badcmd, \@smime_pkcs7_tests, $cmscmd, $cmscmd );
 run_smime_tests( \$badcmd, \@smime_cms_tests,   $cmscmd, $cmscmd );
 
+print "CMS <=> CMS consistency tests, modified key parameters\n";
+run_smime_tests( \$badcmd, \@smime_cms_param_tests,   $cmscmd, $cmscmd );
+
 if ( `$ossl_path version -f` =~ /ZLIB/ ) {
     run_smime_tests( \$badcmd, \@smime_cms_comp_tests, $cmscmd, $cmscmd );
 }
@@ -390,6 +538,21 @@ sub run_smime_tests {
                $rscmd =~ s/-stream//;  
                $rvcmd =~ s/-stream//;
                }
+       if ($no_ec && $tnam =~ /ECDH/)
+               {
+               print "$tnam: skipped, EC disabled\n";
+               next;
+               }
+       if ($no_ecdh && $tnam =~ /ECDH/)
+               {
+               print "$tnam: skipped, ECDH disabled\n";
+               next;
+               }
+       if ($no_ec2m && $tnam =~ /K-283/)
+               {
+               print "$tnam: skipped, EC2M disabled\n";
+               next;
+               }
         system("$scmd$rscmd$redir");
         if ($?) {
             print "$tnam: generation error\n";
index c273707..4e9958b 100644 (file)
@@ -1,4 +1,5 @@
 #cipher:key:iv:plaintext:ciphertext:0/1(decrypt/encrypt)
+#aadcipher:key:iv:plaintext:ciphertext:aad:tag:0/1(decrypt/encrypt)
 #digest:::input:output
 
 # SHA(1) tests (from shatest.c)
@@ -332,3 +333,69 @@ SEED-ECB:00000000000000000000000000000000::000102030405060708090A0B0C0D0E0F:5EBA
 SEED-ECB:000102030405060708090A0B0C0D0E0F::00000000000000000000000000000000:C11F22F20140505084483597E4370F43:1
 SEED-ECB:4706480851E61BE85D74BFB3FD956185::83A2F8A288641FB9A4E9A5CC2F131C7D:EE54D13EBCAE706D226BC3142CD40D4A:1
 SEED-ECB:28DBC3BC49FFD87DCFA509B11D422BE7::B41E6BE2EBA84A148E2EED84593C5EC7:9B9B7BFCD1813CB95D0B3618F40F5122:1
+
+# AES CCM 256 bit key
+aes-256-ccm:1bde3251d41a8b5ea013c195ae128b218b3e0306376357077ef1c1c78548b92e:5b8e40746f6b98e00f1d13ff41:53bd72a97089e312422bf72e242377b3c6ee3e2075389b999c4ef7f28bd2b80a:9a5fcccdb4cf04e7293d2775cc76a488f042382d949b43b7d6bb2b9864786726:c17a32514eb6103f3249e076d4c871dc97e04b286699e54491dc18f6d734d4c0:2024931d73bca480c24a24ece6b6c2bf
+
+# AES GCM test vectors from http://csrc.nist.gov/groups/ST/toolkit/BCM/documents/proposedmodes/gcm/gcm-spec.pdf
+aes-128-gcm:00000000000000000000000000000000:000000000000000000000000::::58e2fccefa7e3061367f1d57a4e7455a
+aes-128-gcm:00000000000000000000000000000000:000000000000000000000000:00000000000000000000000000000000:0388dace60b6a392f328c2b971b2fe78::ab6e47d42cec13bdf53a67b21257bddf
+aes-128-gcm:feffe9928665731c6d6a8f9467308308:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b391aafd255:42831ec2217774244b7221b784d0d49ce3aa212f2c02a4e035c17e2329aca12e21d514b25466931c7d8f6a5aac84aa051ba30b396a0aac973d58e091473f5985::4d5c2af327cd64a62cf35abd2ba6fab4
+aes-128-gcm:feffe9928665731c6d6a8f9467308308:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:42831ec2217774244b7221b784d0d49ce3aa212f2c02a4e035c17e2329aca12e21d514b25466931c7d8f6a5aac84aa051ba30b396a0aac973d58e091:feedfacedeadbeeffeedfacedeadbeefabaddad2:5bc94fbc3221a5db94fae95ae7121a47
+aes-128-gcm:feffe9928665731c6d6a8f9467308308:cafebabefacedbad:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:61353b4c2806934a777ff51fa22a4755699b2a714fcdc6f83766e5f97b6c742373806900e49f24b22b097544d4896b424989b5e1ebac0f07c23f4598:feedfacedeadbeeffeedfacedeadbeefabaddad2:3612d2e79e3b0785561be14aaca2fccb
+aes-128-gcm:feffe9928665731c6d6a8f9467308308:9313225df88406e555909c5aff5269aa6a7a9538534f7da1e4c303d2a318a728c3c0c95156809539fcf0e2429a6b525416aedbf5a0de6a57a637b39b:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:8ce24998625615b603a033aca13fb894be9112a5c3a211a8ba262a3cca7e2ca701e4a9a4fba43c90ccdcb281d48c7c6fd62875d2aca417034c34aee5:feedfacedeadbeeffeedfacedeadbeefabaddad2:619cc5aefffe0bfa462af43c1699d050
+aes-192-gcm:000000000000000000000000000000000000000000000000:000000000000000000000000::::cd33b28ac773f74ba00ed1f312572435
+aes-192-gcm:000000000000000000000000000000000000000000000000:000000000000000000000000:00000000000000000000000000000000:98e7247c07f0fe411c267e4384b0f600::2ff58d80033927ab8ef4d4587514f0fb
+aes-192-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b391aafd255:3980ca0b3c00e841eb06fac4872a2757859e1ceaa6efd984628593b40ca1e19c7d773d00c144c525ac619d18c84a3f4718e2448b2fe324d9ccda2710acade256::9924a7c8587336bfb118024db8674a14
+aes-192-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:3980ca0b3c00e841eb06fac4872a2757859e1ceaa6efd984628593b40ca1e19c7d773d00c144c525ac619d18c84a3f4718e2448b2fe324d9ccda2710:feedfacedeadbeeffeedfacedeadbeefabaddad2:2519498e80f1478f37ba55bd6d27618c
+aes-192-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c:cafebabefacedbad:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:0f10f599ae14a154ed24b36e25324db8c566632ef2bbb34f8347280fc4507057fddc29df9a471f75c66541d4d4dad1c9e93a19a58e8b473fa0f062f7:feedfacedeadbeeffeedfacedeadbeefabaddad2:65dcc57fcf623a24094fcca40d3533f8
+aes-192-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c:9313225df88406e555909c5aff5269aa6a7a9538534f7da1e4c303d2a318a728c3c0c95156809539fcf0e2429a6b525416aedbf5a0de6a57a637b39b:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:d27e88681ce3243c4830165a8fdcf9ff1de9a1d8e6b447ef6ef7b79828666e4581e79012af34ddd9e2f037589b292db3e67c036745fa22e7e9b7373b:feedfacedeadbeeffeedfacedeadbeefabaddad2:dcf566ff291c25bbb8568fc3d376a6d9
+aes-256-gcm:0000000000000000000000000000000000000000000000000000000000000000:000000000000000000000000::::530f8afbc74536b9a963b4f1c4cb738b
+aes-256-gcm:0000000000000000000000000000000000000000000000000000000000000000:000000000000000000000000:00000000000000000000000000000000:cea7403d4d606b6e074ec5d3baf39d18::d0d1c8a799996bf0265b98b5d48ab919
+aes-256-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c6d6a8f9467308308:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b391aafd255:522dc1f099567d07f47f37a32a84427d643a8cdcbfe5c0c97598a2bd2555d1aa8cb08e48590dbb3da7b08b1056828838c5f61e6393ba7a0abcc9f662898015ad::b094dac5d93471bdec1a502270e3cc6c
+aes-256-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c6d6a8f9467308308:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:522dc1f099567d07f47f37a32a84427d643a8cdcbfe5c0c97598a2bd2555d1aa8cb08e48590dbb3da7b08b1056828838c5f61e6393ba7a0abcc9f662:feedfacedeadbeeffeedfacedeadbeefabaddad2:76fc6ece0f4e1768cddf8853bb2d551b
+aes-256-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c6d6a8f9467308308:cafebabefacedbad:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:c3762df1ca787d32ae47c13bf19844cbaf1ae14d0b976afac52ff7d79bba9de0feb582d33934a4f0954cc2363bc73f7862ac430e64abe499f47c9b1f:feedfacedeadbeeffeedfacedeadbeefabaddad2:3a337dbf46a792c45e454913fe2ea8f2
+aes-256-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c6d6a8f9467308308:9313225df88406e555909c5aff5269aa6a7a9538534f7da1e4c303d2a318a728c3c0c95156809539fcf0e2429a6b525416aedbf5a0de6a57a637b39b:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:5a8def2f0c9e53f1f75d7853659e2a20eeb2b22aafde6419a058ab4f6f746bf40fc0c3b780f244452da3ebf1c5d82cdea2418997200ef82e44ae7e3f:feedfacedeadbeeffeedfacedeadbeefabaddad2:a44a8266ee1c8eb0c8b5d4cf5ae9f19a
+# local add-ons, primarily streaming ghash tests
+# 128 bytes aad
+aes-128-gcm:00000000000000000000000000000000:000000000000000000000000:::d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b391aafd255522dc1f099567d07f47f37a32a84427d643a8cdcbfe5c0c97598a2bd2555d1aa8cb08e48590dbb3da7b08b1056828838c5f61e6393ba7a0abcc9f662898015ad:5fea793a2d6f974d37e68e0cb8ff9492
+# 48 bytes plaintext
+aes-128-gcm:00000000000000000000000000000000:000000000000000000000000:000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000:0388dace60b6a392f328c2b971b2fe78f795aaab494b5923f7fd89ff948bc1e0200211214e7394da2089b6acd093abe0::9dd0a376b08e40eb00c35f29f9ea61a4
+# 80 bytes plaintext
+aes-128-gcm:00000000000000000000000000000000:000000000000000000000000:0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000:0388dace60b6a392f328c2b971b2fe78f795aaab494b5923f7fd89ff948bc1e0200211214e7394da2089b6acd093abe0c94da219118e297d7b7ebcbcc9c388f28ade7d85a8ee35616f7124a9d5270291::98885a3a22bd4742fe7b72172193b163
+# 128 bytes plaintext
+aes-128-gcm:00000000000000000000000000000000:000000000000000000000000:0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000:0388dace60b6a392f328c2b971b2fe78f795aaab494b5923f7fd89ff948bc1e0200211214e7394da2089b6acd093abe0c94da219118e297d7b7ebcbcc9c388f28ade7d85a8ee35616f7124a9d527029195b84d1b96c690ff2f2de30bf2ec89e00253786e126504f0dab90c48a30321de3345e6b0461e7c9e6c6b7afedde83f40::cac45f60e31efd3b5a43b98a22ce1aa1
+# 192 bytes plaintext, iv is chosen so that initial counter LSB is 0xFF
+aes-128-gcm:00000000000000000000000000000000:ffffffff000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000:000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000:56b3373ca9ef6e4a2b64fe1e9a17b61425f10d47a75a5fce13efc6bc784af24f4141bdd48cf7c770887afd573cca5418a9aeffcd7c5ceddfc6a78397b9a85b499da558257267caab2ad0b23ca476a53cb17fb41c4b8b475cb4f3f7165094c229c9e8c4dc0a2a5ff1903e501511221376a1cdb8364c5061a20cae74bc4acd76ceb0abc9fd3217ef9f8c90be402ddf6d8697f4f880dff15bfb7a6b28241ec8fe183c2d59e3f9dfff653c7126f0acb9e64211f42bae12af462b1070bef1ab5e3606::566f8ef683078bfdeeffa869d751a017
+# 80 bytes plaintext, submitted by Intel
+aes-128-gcm:843ffcf5d2b72694d19ed01d01249412:dbcca32ebf9b804617c3aa9e:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f:6268c6fa2a80b2d137467f092f657ac04d89be2beaa623d61b5a868c8f03ff95d3dcee23ad2f1ab3a6c80eaf4b140eb05de3457f0fbc111a6b43d0763aa422a3013cf1dc37fe417d1fbfc449b75d4cc5:00000000000000000000000000000000101112131415161718191a1b1c1d1e1f:3b629ccfbc1119b7319e1dce2cd6fd6d
+
+# AES XTS test vectors from IEEE Std 1619-2007
+aes-128-xts:0000000000000000000000000000000000000000000000000000000000000000:00000000000000000000000000000000:0000000000000000000000000000000000000000000000000000000000000000:917cf69ebd68b2ec9b9fe9a3eadda692cd43d2f59598ed858c02c2652fbf922e
+aes-128-xts:1111111111111111111111111111111122222222222222222222222222222222:33333333330000000000000000000000:4444444444444444444444444444444444444444444444444444444444444444:c454185e6a16936e39334038acef838bfb186fff7480adc4289382ecd6d394f0
+aes-128-xts:fffefdfcfbfaf9f8f7f6f5f4f3f2f1f022222222222222222222222222222222:33333333330000000000000000000000:4444444444444444444444444444444444444444444444444444444444444444:af85336b597afc1a900b2eb21ec949d292df4c047e0b21532186a5971a227a89
+aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:00000000000000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:27a7479befa1d476489f308cd4cfa6e2a96e4bbe3208ff25287dd3819616e89cc78cf7f5e543445f8333d8fa7f56000005279fa5d8b5e4ad40e736ddb4d35412328063fd2aab53e5ea1e0a9f332500a5df9487d07a5c92cc512c8866c7e860ce93fdf166a24912b422976146ae20ce846bb7dc9ba94a767aaef20c0d61ad02655ea92dc4c4e41a8952c651d33174be51a10c421110e6d81588ede82103a252d8a750e8768defffed9122810aaeb99f9172af82b604dc4b8e51bcb08235a6f4341332e4ca60482a4ba1a03b3e65008fc5da76b70bf1690db4eae29c5f1badd03c5ccf2a55d705ddcd86d449511ceb7ec30bf12b1fa35b913f9f747a8afd1b130e94bff94effd01a91735ca1726acd0b197c4e5b03393697e126826fb6bbde8ecc1e08298516e2c9ed03ff3c1b7860f6de76d4cecd94c8119855ef5297ca67e9f3e7ff72b1e99785ca0a7e7720c5b36dc6d72cac9574c8cbbc2f801e23e56fd344b07f22154beba0f08ce8891e643ed995c94d9a69c9f1b5f499027a78572aeebd74d20cc39881c213ee770b1010e4bea718846977ae119f7a023ab58cca0ad752afe656bb3c17256a9f6e9bf19fdd5a38fc82bbe872c5539edb609ef4f79c203ebb140f2e583cb2ad15b4aa5b655016a8449277dbd477ef2c8d6c017db738b18deb4a427d1923ce3ff262735779a418f20a282df920147beabe421ee5319d0568
+aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:01000000000000000000000000000000:27a7479befa1d476489f308cd4cfa6e2a96e4bbe3208ff25287dd3819616e89cc78cf7f5e543445f8333d8fa7f56000005279fa5d8b5e4ad40e736ddb4d35412328063fd2aab53e5ea1e0a9f332500a5df9487d07a5c92cc512c8866c7e860ce93fdf166a24912b422976146ae20ce846bb7dc9ba94a767aaef20c0d61ad02655ea92dc4c4e41a8952c651d33174be51a10c421110e6d81588ede82103a252d8a750e8768defffed9122810aaeb99f9172af82b604dc4b8e51bcb08235a6f4341332e4ca60482a4ba1a03b3e65008fc5da76b70bf1690db4eae29c5f1badd03c5ccf2a55d705ddcd86d449511ceb7ec30bf12b1fa35b913f9f747a8afd1b130e94bff94effd01a91735ca1726acd0b197c4e5b03393697e126826fb6bbde8ecc1e08298516e2c9ed03ff3c1b7860f6de76d4cecd94c8119855ef5297ca67e9f3e7ff72b1e99785ca0a7e7720c5b36dc6d72cac9574c8cbbc2f801e23e56fd344b07f22154beba0f08ce8891e643ed995c94d9a69c9f1b5f499027a78572aeebd74d20cc39881c213ee770b1010e4bea718846977ae119f7a023ab58cca0ad752afe656bb3c17256a9f6e9bf19fdd5a38fc82bbe872c5539edb609ef4f79c203ebb140f2e583cb2ad15b4aa5b655016a8449277dbd477ef2c8d6c017db738b18deb4a427d1923ce3ff262735779a418f20a282df920147beabe421ee5319d0568:264d3ca8512194fec312c8c9891f279fefdd608d0c027b60483a3fa811d65ee59d52d9e40ec5672d81532b38b6b089ce951f0f9c35590b8b978d175213f329bb1c2fd30f2f7f30492a61a532a79f51d36f5e31a7c9a12c286082ff7d2394d18f783e1a8e72c722caaaa52d8f065657d2631fd25bfd8e5baad6e527d763517501c68c5edc3cdd55435c532d7125c8614deed9adaa3acade5888b87bef641c4c994c8091b5bcd387f3963fb5bc37aa922fbfe3df4e5b915e6eb514717bdd2a74079a5073f5c4bfd46adf7d282e7a393a52579d11a028da4d9cd9c77124f9648ee383b1ac763930e7162a8d37f350b2f74b8472cf09902063c6b32e8c2d9290cefbd7346d1c779a0df50edcde4531da07b099c638e83a755944df2aef1aa31752fd323dcb710fb4bfbb9d22b925bc3577e1b8949e729a90bbafeacf7f7879e7b1147e28ba0bae940db795a61b15ecf4df8db07b824bb062802cc98a9545bb2aaeed77cb3fc6db15dcd7d80d7d5bc406c4970a3478ada8899b329198eb61c193fb6275aa8ca340344a75a862aebe92eee1ce032fd950b47d7704a3876923b4ad62844bf4a09c4dbe8b4397184b7471360c9564880aedddb9baa4af2e75394b08cd32ff479c57a07d3eab5d54de5f9738b8d27f27a9f0ab11799d7b7ffefb2704c95c6ad12c39f1e867a4b7b1d7818a4b753dfd2a89ccb45e001a03a867b187f225dd
+aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:02000000000000000000000000000000:264d3ca8512194fec312c8c9891f279fefdd608d0c027b60483a3fa811d65ee59d52d9e40ec5672d81532b38b6b089ce951f0f9c35590b8b978d175213f329bb1c2fd30f2f7f30492a61a532a79f51d36f5e31a7c9a12c286082ff7d2394d18f783e1a8e72c722caaaa52d8f065657d2631fd25bfd8e5baad6e527d763517501c68c5edc3cdd55435c532d7125c8614deed9adaa3acade5888b87bef641c4c994c8091b5bcd387f3963fb5bc37aa922fbfe3df4e5b915e6eb514717bdd2a74079a5073f5c4bfd46adf7d282e7a393a52579d11a028da4d9cd9c77124f9648ee383b1ac763930e7162a8d37f350b2f74b8472cf09902063c6b32e8c2d9290cefbd7346d1c779a0df50edcde4531da07b099c638e83a755944df2aef1aa31752fd323dcb710fb4bfbb9d22b925bc3577e1b8949e729a90bbafeacf7f7879e7b1147e28ba0bae940db795a61b15ecf4df8db07b824bb062802cc98a9545bb2aaeed77cb3fc6db15dcd7d80d7d5bc406c4970a3478ada8899b329198eb61c193fb6275aa8ca340344a75a862aebe92eee1ce032fd950b47d7704a3876923b4ad62844bf4a09c4dbe8b4397184b7471360c9564880aedddb9baa4af2e75394b08cd32ff479c57a07d3eab5d54de5f9738b8d27f27a9f0ab11799d7b7ffefb2704c95c6ad12c39f1e867a4b7b1d7818a4b753dfd2a89ccb45e001a03a867b187f225dd:fa762a3680b76007928ed4a4f49a9456031b704782e65e16cecb54ed7d017b5e18abd67b338e81078f21edb7868d901ebe9c731a7c18b5e6dec1d6a72e078ac9a4262f860beefa14f4e821018272e411a951502b6e79066e84252c3346f3aa62344351a291d4bedc7a07618bdea2af63145cc7a4b8d4070691ae890cd65733e7946e9021a1dffc4c59f159425ee6d50ca9b135fa6162cea18a939838dc000fb386fad086acce5ac07cb2ece7fd580b00cfa5e98589631dc25e8e2a3daf2ffdec26531659912c9d8f7a15e5865ea8fb5816d6207052bd7128cd743c12c8118791a4736811935eb982a532349e31dd401e0b660a568cb1a4711f552f55ded59f1f15bf7196b3ca12a91e488ef59d64f3a02bf45239499ac6176ae321c4a211ec545365971c5d3f4f09d4eb139bfdf2073d33180b21002b65cc9865e76cb24cd92c874c24c18350399a936ab3637079295d76c417776b94efce3a0ef7206b15110519655c956cbd8b2489405ee2b09a6b6eebe0c53790a12a8998378b33a5b71159625f4ba49d2a2fdba59fbf0897bc7aabd8d707dc140a80f0f309f835d3da54ab584e501dfa0ee977fec543f74186a802b9a37adb3e8291eca04d66520d229e60401e7282bef486ae059aa70696e0e305d777140a7a883ecdcb69b9ff938e8a4231864c69ca2c2043bed007ff3e605e014bcf518138dc3a25c5e236171a2d01d6
+aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:fd000000000000000000000000000000:8e41b78c390b5af9d758bb214a67e9f6bf7727b09ac6124084c37611398fa45daad94868600ed391fb1acd4857a95b466e62ef9f4b377244d1c152e7b30d731aad30c716d214b707aed99eb5b5e580b3e887cf7497465651d4b60e6042051da3693c3b78c14489543be8b6ad0ba629565bba202313ba7b0d0c94a3252b676f46cc02ce0f8a7d34c0ed229129673c1f61aed579d08a9203a25aac3a77e9db60267996db38df637356d9dcd1632e369939f2a29d89345c66e05066f1a3677aef18dea4113faeb629e46721a66d0a7e785d3e29af2594eb67dfa982affe0aac058f6e15864269b135418261fc3afb089472cf68c45dd7f231c6249ba0255e1e033833fc4d00a3fe02132d7bc3873614b8aee34273581ea0325c81f0270affa13641d052d36f0757d484014354d02d6883ca15c24d8c3956b1bd027bcf41f151fd8023c5340e5606f37e90fdb87c86fb4fa634b3718a30bace06a66eaf8f63c4aa3b637826a87fe8cfa44282e92cb1615af3a28e53bc74c7cba1a0977be9065d0c1a5dec6c54ae38d37f37aa35283e048e5530a85c4e7a29d7b92ec0c3169cdf2a805c7604bce60049b9fb7b8eaac10f51ae23794ceba68bb58112e293b9b692ca721b37c662f8574ed4dba6f88e170881c82cddc1034a0ca7e284bf0962b6b26292d836fa9f73c1ac770eef0f2d3a1eaf61d3e03555fd424eedd67e18a18094f888:d55f684f81f4426e9fde92a5ff02df2ac896af63962888a97910c1379e20b0a3b1db613fb7fe2e07004329ea5c22bfd33e3dbe4cf58cc608c2c26c19a2e2fe22f98732c2b5cb844cc6c0702d91e1d50fc4382a7eba5635cd602432a2306ac4ce82f8d70c8d9bc15f918fe71e74c622d5cf71178bf6e0b9cc9f2b41dd8dbe441c41cd0c73a6dc47a348f6702f9d0e9b1b1431e948e299b9ec2272ab2c5f0c7be86affa5dec87a0bee81d3d50007edaa2bcfccb35605155ff36ed8edd4a40dcd4b243acd11b2b987bdbfaf91a7cac27e9c5aea525ee53de7b2d3332c8644402b823e94a7db26276d2d23aa07180f76b4fd29b9c0823099c9d62c519880aee7e9697617c1497d47bf3e571950311421b6b734d38b0db91eb85331b91ea9f61530f54512a5a52a4bad589eb69781d537f23297bb459bdad2948a29e1550bf4787e0be95bb173cf5fab17dab7a13a052a63453d97ccec1a321954886b7a1299faaeecae35c6eaaca753b041b5e5f093bf83397fd21dd6b3012066fcc058cc32c3b09d7562dee29509b5839392c9ff05f51f3166aaac4ac5f238038a3045e6f72e48ef0fe8bc675e82c318a268e43970271bf119b81bf6a982746554f84e72b9f00280a320a08142923c23c883423ff949827f29bbacdc1ccdb04938ce6098c95ba6b32528f4ef78eed778b2e122ddfd1cbdd11d1c0a6783e011fc536d63d053260637
+aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:fe000000000000000000000000000000:d55f684f81f4426e9fde92a5ff02df2ac896af63962888a97910c1379e20b0a3b1db613fb7fe2e07004329ea5c22bfd33e3dbe4cf58cc608c2c26c19a2e2fe22f98732c2b5cb844cc6c0702d91e1d50fc4382a7eba5635cd602432a2306ac4ce82f8d70c8d9bc15f918fe71e74c622d5cf71178bf6e0b9cc9f2b41dd8dbe441c41cd0c73a6dc47a348f6702f9d0e9b1b1431e948e299b9ec2272ab2c5f0c7be86affa5dec87a0bee81d3d50007edaa2bcfccb35605155ff36ed8edd4a40dcd4b243acd11b2b987bdbfaf91a7cac27e9c5aea525ee53de7b2d3332c8644402b823e94a7db26276d2d23aa07180f76b4fd29b9c0823099c9d62c519880aee7e9697617c1497d47bf3e571950311421b6b734d38b0db91eb85331b91ea9f61530f54512a5a52a4bad589eb69781d537f23297bb459bdad2948a29e1550bf4787e0be95bb173cf5fab17dab7a13a052a63453d97ccec1a321954886b7a1299faaeecae35c6eaaca753b041b5e5f093bf83397fd21dd6b3012066fcc058cc32c3b09d7562dee29509b5839392c9ff05f51f3166aaac4ac5f238038a3045e6f72e48ef0fe8bc675e82c318a268e43970271bf119b81bf6a982746554f84e72b9f00280a320a08142923c23c883423ff949827f29bbacdc1ccdb04938ce6098c95ba6b32528f4ef78eed778b2e122ddfd1cbdd11d1c0a6783e011fc536d63d053260637:72efc1ebfe1ee25975a6eb3aa8589dda2b261f1c85bdab442a9e5b2dd1d7c3957a16fc08e526d4b1223f1b1232a11af274c3d70dac57f83e0983c498f1a6f1aecb021c3e70085a1e527f1ce41ee5911a82020161529cd82773762daf5459de94a0a82adae7e1703c808543c29ed6fb32d9e004327c1355180c995a07741493a09c21ba01a387882da4f62534b87bb15d60d197201c0fd3bf30c1500a3ecfecdd66d8721f90bcc4c17ee925c61b0a03727a9c0d5f5ca462fbfa0af1c2513a9d9d4b5345bd27a5f6e653f751693e6b6a2b8ead57d511e00e58c45b7b8d005af79288f5c7c22fd4f1bf7a898b03a5634c6a1ae3f9fae5de4f296a2896b23e7ed43ed14fa5a2803f4d28f0d3ffcf24757677aebdb47bb388378708948a8d4126ed1839e0da29a537a8c198b3c66ab00712dd261674bf45a73d67f76914f830ca014b65596f27e4cf62de66125a5566df9975155628b400fbfb3a29040ed50faffdbb18aece7c5c44693260aab386c0a37b11b114f1c415aebb653be468179428d43a4d8bc3ec38813eca30a13cf1bb18d524f1992d44d8b1a42ea30b22e6c95b199d8d182f8840b09d059585c31ad691fa0619ff038aca2c39a943421157361717c49d322028a74648113bd8c9d7ec77cf3c89c1ec8718ceff8516d96b34c3c614f10699c9abc4ed0411506223bea16af35c883accdbe1104eef0cfdb54e12fb230a
+aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:ff000000000000000000000000000000:72efc1ebfe1ee25975a6eb3aa8589dda2b261f1c85bdab442a9e5b2dd1d7c3957a16fc08e526d4b1223f1b1232a11af274c3d70dac57f83e0983c498f1a6f1aecb021c3e70085a1e527f1ce41ee5911a82020161529cd82773762daf5459de94a0a82adae7e1703c808543c29ed6fb32d9e004327c1355180c995a07741493a09c21ba01a387882da4f62534b87bb15d60d197201c0fd3bf30c1500a3ecfecdd66d8721f90bcc4c17ee925c61b0a03727a9c0d5f5ca462fbfa0af1c2513a9d9d4b5345bd27a5f6e653f751693e6b6a2b8ead57d511e00e58c45b7b8d005af79288f5c7c22fd4f1bf7a898b03a5634c6a1ae3f9fae5de4f296a2896b23e7ed43ed14fa5a2803f4d28f0d3ffcf24757677aebdb47bb388378708948a8d4126ed1839e0da29a537a8c198b3c66ab00712dd261674bf45a73d67f76914f830ca014b65596f27e4cf62de66125a5566df9975155628b400fbfb3a29040ed50faffdbb18aece7c5c44693260aab386c0a37b11b114f1c415aebb653be468179428d43a4d8bc3ec38813eca30a13cf1bb18d524f1992d44d8b1a42ea30b22e6c95b199d8d182f8840b09d059585c31ad691fa0619ff038aca2c39a943421157361717c49d322028a74648113bd8c9d7ec77cf3c89c1ec8718ceff8516d96b34c3c614f10699c9abc4ed0411506223bea16af35c883accdbe1104eef0cfdb54e12fb230a:3260ae8dad1f4a32c5cafe3ab0eb95549d461a67ceb9e5aa2d3afb62dece0553193ba50c75be251e08d1d08f1088576c7efdfaaf3f459559571e12511753b07af073f35da06af0ce0bbf6b8f5ccc5cea500ec1b211bd51f63b606bf6528796ca12173ba39b8935ee44ccce646f90a45bf9ccc567f0ace13dc2d53ebeedc81f58b2e41179dddf0d5a5c42f5d8506c1a5d2f8f59f3ea873cbcd0eec19acbf325423bd3dcb8c2b1bf1d1eaed0eba7f0698e4314fbeb2f1566d1b9253008cbccf45a2b0d9c5c9c21474f4076e02be26050b99dee4fd68a4cf890e496e4fcae7b70f94ea5a9062da0daeba1993d2ccd1dd3c244b8428801495a58b216547e7e847c46d1d756377b6242d2e5fb83bf752b54e0df71e889f3a2bb0f4c10805bf3c590376e3c24e22ff57f7fa965577375325cea5d920db94b9c336b455f6e894c01866fe9fbb8c8d3f70a2957285f6dfb5dcd8cbf54782f8fe7766d4723819913ac773421e3a31095866bad22c86a6036b2518b2059b4229d18c8c2ccbdf906c6cc6e82464ee57bddb0bebcb1dc645325bfb3e665ef7251082c88ebb1cf203bd779fdd38675713c8daadd17e1cabee432b09787b6ddf3304e38b731b45df5df51b78fcfb3d32466028d0ba36555e7e11ab0ee0666061d1645d962444bc47a38188930a84b4d561395c73c087021927ca638b7afc8a8679ccb84c26555440ec7f10445cd
+
+aes-256-xts:27182818284590452353602874713526624977572470936999595749669676273141592653589793238462643383279502884197169399375105820974944592:ff000000000000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:1c3b3a102f770386e4836c99e370cf9bea00803f5e482357a4ae12d414a3e63b5d31e276f8fe4a8d66b317f9ac683f44680a86ac35adfc3345befecb4bb188fd5776926c49a3095eb108fd1098baec70aaa66999a72a82f27d848b21d4a741b0c5cd4d5fff9dac89aeba122961d03a757123e9870f8acf1000020887891429ca2a3e7a7d7df7b10355165c8b9a6d0a7de8b062c4500dc4cd120c0f7418dae3d0b5781c34803fa75421c790dfe1de1834f280d7667b327f6c8cd7557e12ac3a0f93ec05c52e0493ef31a12d3d9260f79a289d6a379bc70c50841473d1a8cc81ec583e9645e07b8d9670655ba5bbcfecc6dc3966380ad8fecb17b6ba02469a020a84e18e8f84252070c13e9f1f289be54fbc481457778f616015e1327a02b140f1505eb309326d68378f8374595c849d84f4c333ec4423885143cb47bd71c5edae9be69a2ffeceb1bec9de244fbe15992b11b77c040f12bd8f6a975a44a0f90c29a9abc3d4d893927284c58754cce294529f8614dcd2aba991925fedc4ae74ffac6e333b93eb4aff0479da9a410e4450e0dd7ae4c6e2910900575da401fc07059f645e8b7e9bfdef33943054ff84011493c27b3429eaedb4ed5376441a77ed43851ad77f16f541dfd269d50d6a5f14fb0aab1cbb4c1550be97f7ab4066193c4caa773dad38014bd2092fa755c824bb5e54c4f36ffda9fcea70b9c6e693e148c151
+aes-256-xts:27182818284590452353602874713526624977572470936999595749669676273141592653589793238462643383279502884197169399375105820974944592:ffff0000000000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:77a31251618a15e6b92d1d66dffe7b50b50bad552305ba0217a610688eff7e11e1d0225438e093242d6db274fde801d4cae06f2092c728b2478559df58e837c2469ee4a4fa794e4bbc7f39bc026e3cb72c33b0888f25b4acf56a2a9804f1ce6d3d6e1dc6ca181d4b546179d55544aa7760c40d06741539c7e3cd9d2f6650b2013fd0eeb8c2b8e3d8d240ccae2d4c98320a7442e1c8d75a42d6e6cfa4c2eca1798d158c7aecdf82490f24bb9b38e108bcda12c3faf9a21141c3613b58367f922aaa26cd22f23d708dae699ad7cb40a8ad0b6e2784973dcb605684c08b8d6998c69aac049921871ebb65301a4619ca80ecb485a31d744223ce8ddc2394828d6a80470c092f5ba413c3378fa6054255c6f9df4495862bbb3287681f931b687c888abf844dfc8fc28331e579928cd12bd2390ae123cf03818d14dedde5c0c24c8ab018bfca75ca096f2d531f3d1619e785f1ada437cab92e980558b3dce1474afb75bfedbf8ff54cb2618e0244c9ac0d3c66fb51598cd2db11f9be39791abe447c63094f7c453b7ff87cb5bb36b7c79efb0872d17058b83b15ab0866ad8a58656c5a7e20dbdf308b2461d97c0ec0024a2715055249cf3b478ddd4740de654f75ca686e0d7345c69ed50cdc2a8b332b1f8824108ac937eb050585608ee734097fc09054fbff89eeaeea791f4a7ab1f9868294a4f9e27b42af8100cb9d59cef9645803
+aes-256-xts:27182818284590452353602874713526624977572470936999595749669676273141592653589793238462643383279502884197169399375105820974944592:ffffff00000000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:e387aaa58ba483afa7e8eb469778317ecf4cf573aa9d4eac23f2cdf914e4e200a8b490e42ee646802dc6ee2b471b278195d60918ececb44bf79966f83faba0499298ebc699c0c8634715a320bb4f075d622e74c8c932004f25b41e361025b5a87815391f6108fc4afa6a05d9303c6ba68a128a55705d415985832fdeaae6c8e19110e84d1b1f199a2692119edc96132658f09da7c623efcec712537a3d94c0bf5d7e352ec94ae5797fdb377dc1551150721adf15bd26a8efc2fcaad56881fa9e62462c28f30ae1ceaca93c345cf243b73f542e2074a705bd2643bb9f7cc79bb6e7091ea6e232df0f9ad0d6cf502327876d82207abf2115cdacf6d5a48f6c1879a65b115f0f8b3cb3c59d15dd8c769bc014795a1837f3901b5845eb491adfefe097b1fa30a12fc1f65ba22905031539971a10f2f36c321bb51331cdefb39e3964c7ef079994f5b69b2edd83a71ef549971ee93f44eac3938fcdd61d01fa71799da3a8091c4c48aa9ed263ff0749df95d44fef6a0bb578ec69456aa5408ae32c7af08ad7ba8921287e3bbee31b767be06a0e705c864a769137df28292283ea81a2480241b44d9921cdbec1bc28dc1fda114bd8e5217ac9d8ebafa720e9da4f9ace231cc949e5b96fe76ffc21063fddc83a6b8679c00d35e09576a875305bed5f36ed242c8900dd1fa965bc950dfce09b132263a1eef52dd6888c309f5a7d712826
+aes-256-xts:27182818284590452353602874713526624977572470936999595749669676273141592653589793238462643383279502884197169399375105820974944592:ffffffff000000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:bf53d2dade78e822a4d949a9bc6766b01b06a8ef70d26748c6a7fc36d80ae4c5520f7c4ab0ac8544424fa405162fef5a6b7f229498063618d39f0003cb5fb8d1c86b643497da1ff945c8d3bedeca4f479702a7a735f043ddb1d6aaade3c4a0ac7ca7f3fa5279bef56f82cd7a2f38672e824814e10700300a055e1630b8f1cb0e919f5e942010a416e2bf48cb46993d3cb6a51c19bacf864785a00bc2ecff15d350875b246ed53e68be6f55bd7e05cfc2b2ed6432198a6444b6d8c247fab941f569768b5c429366f1d3f00f0345b96123d56204c01c63b22ce78baf116e525ed90fdea39fa469494d3866c31e05f295ff21fea8d4e6e13d67e47ce722e9698a1c1048d68ebcde76b86fcf976eab8aa9790268b7068e017a8b9b749409514f1053027fd16c3786ea1bac5f15cb79711ee2abe82f5cf8b13ae73030ef5b9e4457e75d1304f988d62dd6fc4b94ed38ba831da4b7634971b6cd8ec325d9c61c00f1df73627ed3745a5e8489f3a95c69639c32cd6e1d537a85f75cc844726e8a72fc0077ad22000f1d5078f6b866318c668f1ad03d5a5fced5219f2eabbd0aa5c0f460d183f04404a0d6f469558e81fab24a167905ab4c7878502ad3e38fdbe62a41556cec37325759533ce8f25f367c87bb5578d667ae93f9e2fd99bcbc5f2fbba88cf6516139420fcff3b7361d86322c4bd84c82f335abb152c4a93411373aaa8220
+aes-256-xts:27182818284590452353602874713526624977572470936999595749669676273141592653589793238462643383279502884197169399375105820974944592:ffffffffff0000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:64497e5a831e4a932c09be3e5393376daa599548b816031d224bbf50a818ed2350eae7e96087c8a0db51ad290bd00c1ac1620857635bf246c176ab463be30b808da548081ac847b158e1264be25bb0910bbc92647108089415d45fab1b3d2604e8a8eff1ae4020cfa39936b66827b23f371b92200be90251e6d73c5f86de5fd4a950781933d79a28272b782a2ec313efdfcc0628f43d744c2dc2ff3dcb66999b50c7ca895b0c64791eeaa5f29499fb1c026f84ce5b5c72ba1083cddb5ce45434631665c333b60b11593fb253c5179a2c8db813782a004856a1653011e93fb6d876c18366dd8683f53412c0c180f9c848592d593f8609ca736317d356e13e2bff3a9f59cd9aeb19cd482593d8c46128bb32423b37a9adfb482b99453fbe25a41bf6feb4aa0bef5ed24bf73c762978025482c13115e4015aac992e5613a3b5c2f685b84795cb6e9b2656d8c88157e52c42f978d8634c43d06fea928f2822e465aa6576e9bf419384506cc3ce3c54ac1a6f67dc66f3b30191e698380bc999b05abce19dc0c6dcc2dd001ec535ba18deb2df1a101023108318c75dc98611a09dc48a0acdec676fabdf222f07e026f059b672b56e5cbc8e1d21bbd867dd927212054681d70ea737134cdfce93b6f82ae22423274e58a0821cc5502e2d0ab4585e94de6975be5e0b4efce51cd3e70c25a1fbbbd609d273ad5b0d59631c531f6a0a57b9
+
+aes-128-xts:fffefdfcfbfaf9f8f7f6f5f4f3f2f1f0bfbebdbcbbbab9b8b7b6b5b4b3b2b1b0:9a785634120000000000000000000000:000102030405060708090a0b0c0d0e0f10:6c1625db4671522d3d7599601de7ca09ed
+aes-128-xts:fffefdfcfbfaf9f8f7f6f5f4f3f2f1f0bfbebdbcbbbab9b8b7b6b5b4b3b2b1b0:9a785634120000000000000000000000:000102030405060708090a0b0c0d0e0f1011:d069444b7a7e0cab09e24447d24deb1fedbf
+aes-128-xts:fffefdfcfbfaf9f8f7f6f5f4f3f2f1f0bfbebdbcbbbab9b8b7b6b5b4b3b2b1b0:9a785634120000000000000000000000:000102030405060708090a0b0c0d0e0f101112:e5df1351c0544ba1350b3363cd8ef4beedbf9d
+aes-128-xts:fffefdfcfbfaf9f8f7f6f5f4f3f2f1f0bfbebdbcbbbab9b8b7b6b5b4b3b2b1b0:9a785634120000000000000000000000:000102030405060708090a0b0c0d0e0f10111213:9d84c813f719aa2c7be3f66171c7c5c2edbf9dac
+aes-128-xts:e0e1e2e3e4e5e6e7e8e9eaebecedeeefc0c1c2c3c4c5c6c7c8c9cacbcccdcecf:21436587a90000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:38b45812ef43a05bd957e545907e223b954ab4aaf088303ad910eadf14b42be68b2461149d8c8ba85f992be970bc621f1b06573f63e867bf5875acafa04e42ccbd7bd3c2a0fb1fff791ec5ec36c66ae4ac1e806d81fbf709dbe29e471fad38549c8e66f5345d7c1eb94f405d1ec785cc6f6a68f6254dd8339f9d84057e01a17741990482999516b5611a38f41bb6478e6f173f320805dd71b1932fc333cb9ee39936beea9ad96fa10fb4112b901734ddad40bc1878995f8e11aee7d141a2f5d48b7a4e1e7f0b2c04830e69a4fd1378411c2f287edf48c6c4e5c247a19680f7fe41cefbd49b582106e3616cbbe4dfb2344b2ae9519391f3e0fb4922254b1d6d2d19c6d4d537b3a26f3bcc51588b32f3eca0829b6a5ac72578fb814fb43cf80d64a233e3f997a3f02683342f2b33d25b492536b93becb2f5e1a8b82f5b883342729e8ae09d16938841a21a97fb543eea3bbff59f13c1a18449e398701c1ad51648346cbc04c27bb2da3b93a1372ccae548fb53bee476f9e9c91773b1bb19828394d55d3e1a20ed69113a860b6829ffa847224604435070221b257e8dff783615d2cae4803a93aa4334ab482a0afac9c0aeda70b45a481df5dec5df8cc0f423c77a5fd46cd312021d4b438862419a791be03bb4d97c0e59578542531ba466a83baf92cefc151b5cc1611a167893819b63fb8a6b18e86de60290fa72b797b0ce59f3
+# AES wrap tests from RFC3394
+id-aes128-wrap:000102030405060708090A0B0C0D0E0F::00112233445566778899AABBCCDDEEFF:1FA68B0A8112B447AEF34BD8FB5A7B829D3E862371D2CFE5
+id-aes192-wrap:000102030405060708090A0B0C0D0E0F1011121314151617::00112233445566778899AABBCCDDEEFF:96778B25AE6CA435F92B5B97C050AED2468AB8A17AD84E5D
+id-aes256-wrap:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F::00112233445566778899AABBCCDDEEFF:64E8C3F9CE0F5BA263E9777905818A2A93C8191E7D6E8AE7
+id-aes192-wrap:000102030405060708090A0B0C0D0E0F1011121314151617::00112233445566778899AABBCCDDEEFF0001020304050607:031D33264E15D33268F24EC260743EDCE1C6C7DDEE725A936BA814915C6762D2
+id-aes256-wrap:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F::00112233445566778899AABBCCDDEEFF0001020304050607:A8F9BC1612C68B3FF6E6F4FBE30E71E4769C8B80A32CB8958CD5D17D6B254DA1
+id-aes256-wrap:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F::00112233445566778899AABBCCDDEEFF000102030405060708090A0B0C0D0E0F:28C9F404C4B810F4CBCCB35CFB87F8263F5786E2D80ED326CBC7F0E71A99F43BFB988B9B7A02DD21
index 97e0c3b..7e2d939 100644 (file)
@@ -148,7 +148,8 @@ $ TEST_FILES = "BNTEST,ECTEST,ECDSATEST,ECDHTEST,IDEATEST,"+ -
               "RANDTEST,DHTEST,ENGINETEST,"+ -
               "BFTEST,CASTTEST,SSLTEST,EXPTEST,DSATEST,RSA_TEST,"+ -
               "EVP_TEST,IGETEST,JPAKETEST,SRPTEST,"+ -
-              "ASN1TEST,HEARTBEAT_TEST,CONSTANT_TIME_TEST"
+              "ASN1TEST,V3NAMETEST,HEARTBEAT_TEST,"+ -
+              "CONSTANT_TIME_TEST"
 $! Should we add MTTEST,PQ_TEST,LH_TEST,DIVTEST,TABTEST as well?
 $!
 $! Additional directory information.
@@ -185,6 +186,7 @@ $ T_D_EVP_TEST   := [-.crypto.evp]
 $ T_D_IGETEST    := [-.test]
 $ T_D_JPAKETEST  := [-.crypto.jpake]
 $ T_D_SRPTEST    := [-.crypto.srp]
+$ T_D_V3NAMETEST := [-.crypto.x509v3]
 $ T_D_ASN1TEST   := [-.test]
 $ T_D_HEARTBEAT_TEST := [-.ssl]
 $ T_D_CONSTANT_TIME_TEST := [-.crypto]
diff --git a/test/ocsp-tests/D1.ors b/test/ocsp-tests/D1.ors
new file mode 100644 (file)
index 0000000..3fa4a11
--- /dev/null
@@ -0,0 +1,32 @@
+MIIFzwoBAKCCBcgwggXEBgkrBgEFBQcwAQEEggW1MIIFsTCBoKIWBBRf2uQDFpGg
+Ywh4P1y2H9bZ2/BQNBgPMjAxMjEwMjMxMDI1MzZaMHUwczBLMAkGBSsOAwIaBQAE
+FKByDqBqfGICVPKo9Z3Se6Tzty+kBBSwsEr9HHUo+BxhqhP2+sGQPWsWowISESG8
+vx4IzALnkqQG05AvM+2bgAAYDzIwMTIxMDIzMDcwMDAwWqARGA8yMDEyMTAzMDA4
+MDAwMFowCwYJKoZIhvcNAQEFA4IBAQAJU3hXN7NApN50/vlZTG2p8+QQJp4uaod3
+wyBQ0Ux3DoQZQ9RG6/7Mm4qpOLCCSTh/lJjZ0fD+9eB3gcp/JupN1JrU+dgTyv/Y
+9MOctJz7y+VoU9I+qB8knV4sQCwohAVm8GmA9s4p/rHq5Oymci0SuG/QCfkVxOub
+rI1bWjbHLvvXyvF3PoGMORVHG3SA+jJ9VkHWJyi6brHxY+QR/iYxer8lJsBtpyc7
+q2itFgvax/OHwne3lxsck9q0QgKpmEdJu2LuGyWFIhrEwR3b7ASEu1G/nKClv3dR
+vyOXMm1XIwuUhCjAcpNEKiOMorFwnLS1F8LhfqFWTAFG0JbWpAi8oIID+DCCA/Qw
+ggPwMIIC2KADAgECAhIRISdENsrz1CSWG3VIBwfQERQwDQYJKoZIhvcNAQEFBQAw
+WTELMAkGA1UEBhMCQkUxGTAXBgNVBAoTEEdsb2JhbFNpZ24gbnYtc2ExLzAtBgNV
+BAMTJkdsb2JhbFNpZ24gRXh0ZW5kZWQgVmFsaWRhdGlvbiBDQSAtIEcyMB4XDTEy
+MDkxOTA3NDA1MFoXDTEyMTIxOTA4NDA1MFowgYUxCzAJBgNVBAYTAkJFMRkwFwYD
+VQQKExBHbG9iYWxTaWduIG52LXNhMUIwQAYDVQQDEzlHbG9iYWxTaWduIEV4dGVu
+ZGVkIFZhbGlkYXRpb24gQ0EgLSBHMiBPQ1NQIHJlc3BvbmRlciAtIDIxFzAVBgNV
+BAUTDjIwMTIwOTE5MDk0MDAwMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKC
+AQEAnCgMsBO+IxIqCnXCOfXJoIC3wj+f0s4DV9h2gJBzisWXkaJD2DfNrd0kHUXK
+qVVPUxnA4G5iZu0Z385/KiOt1/P6vQ/Z2/AsEh/8Z/hIyeZCHL31wrSZW4yLeZwi
+M76wPiBHJxPun681HQlVs/OGKSHnbHc1XJAIeA/M8u+lLWqIKB+AJ82TrOqUMj1s
+LjGhQNs84xPliONN5K7DrEy+Y65X/rFxN77Smw+UtcH1GgH2NgaHH8dpt1m25sgm
+UxZWhdx66opB/lbRQwWdGt7MC0kJFaWHDZq64DTuYoekFYSxAFu0nd0EekEHEJEi
+9mquB9cv/96SuEJl8BcUWU/1LwIDAQABo4GEMIGBMAkGA1UdEwQCMAAwDgYDVR0P
+AQH/BAQDAgeAMBMGA1UdJQQMMAoGCCsGAQUFBwMJMA8GCSsGAQUFBzABBQQCBQAw
+HQYDVR0OBBYEFF/a5AMWkaBjCHg/XLYf1tnb8FA0MB8GA1UdIwQYMBaAFLCwSv0c
+dSj4HGGqE/b6wZA9axajMA0GCSqGSIb3DQEBBQUAA4IBAQCKRl1iXFmOQtLseDWP
+Y5icDDBGiRi17CGgvIzGJi/ha0PhbO+X0TmQIEnRX3Mu0Er/Mm4RZSjMtJ2iZRh3
+tGf4Dn+jKgKOmgXC3oOG/l8RPHLf0yaPSdn/z0TXtA30vTFBLlFeWnhbfhovea4+
+snPdBxLqWZdtxmiwojgqA7YATCWwavizrBr09YRyDwzgtpZ2BwMruGuFuV9FsEwL
+PCM53yFlrM32oFghyfyE5kYjgnnueKM+pw1kA0jgb1CnVJRrMEN1TXuXDAZLtHKG
+5X/drah1JtkoZhCzxzZ3bYdVDQJ90OHFqM58lwGD6z3XuPKrHDKZKt+CPIsl5g7p
+4J2l
diff --git a/test/ocsp-tests/D1_Cert_EE.pem b/test/ocsp-tests/D1_Cert_EE.pem
new file mode 100644 (file)
index 0000000..c5b993c
--- /dev/null
@@ -0,0 +1,38 @@
+-----BEGIN CERTIFICATE-----
+MIIGujCCBaKgAwIBAgISESG8vx4IzALnkqQG05AvM+2bMA0GCSqGSIb3DQEBBQUA
+MFkxCzAJBgNVBAYTAkJFMRkwFwYDVQQKExBHbG9iYWxTaWduIG52LXNhMS8wLQYD
+VQQDEyZHbG9iYWxTaWduIEV4dGVuZGVkIFZhbGlkYXRpb24gQ0EgLSBHMjAeFw0x
+MjA4MTQxMjM1MDJaFw0xMzA4MTUxMDMxMjlaMIIBCjEdMBsGA1UEDwwUUHJpdmF0
+ZSBPcmdhbml6YXRpb24xDzANBgNVBAUTBjU3ODYxMTETMBEGCysGAQQBgjc8AgED
+EwJVUzEeMBwGCysGAQQBgjc8AgECEw1OZXcgSGFtcHNoaXJlMQswCQYDVQQGEwJV
+UzEWMBQGA1UECAwNTmV3IEhhbXBzaGlyZTETMBEGA1UEBwwKUG9ydHNtb3V0aDEg
+MB4GA1UECRMXVHdvIEludGVybmF0aW9uYWwgRHJpdmUxDTALBgNVBAsMBC5DT00x
+GzAZBgNVBAoMEkdNTyBHbG9iYWxTaWduIEluYzEbMBkGA1UEAwwSd3d3Lmdsb2Jh
+bHNpZ24uY29tMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAqx/nHBP4
+6s5KKMDlfZS4qFDiAWsoPSRn6WO4nrUF/G2S3I/AdJ0IcSDOHb48/3APj5alqbgo
+o4IzdG6KLAbENpHMl0L3pHBq/5tJPTi02SbiYUHfp2fhueMauRo8spfEk6fNRnDn
+QpyMFRkYd7Jz+KMerTO1xAcOH+xp0KkcP0i2jFTEuM3LwR0yTms1rry+RryjDDt5
+7W0DLnNFWhyGd6YymzNkCPeL6weV8uk2uYRKKf2XOAzgIpNo3zU6iakZOzlQB9h9
+qRuIks2AU/cZ89cBkDjHua0ezX5rG3/Url33jAT9cR5zCXHWtj7VzlOjDXXnn16b
+L9/AWsvGMNkYHQIDAQABo4ICxzCCAsMwDgYDVR0PAQH/BAQDAgWgMEwGA1UdIARF
+MEMwQQYJKwYBBAGgMgEBMDQwMgYIKwYBBQUHAgEWJmh0dHBzOi8vd3d3Lmdsb2Jh
+bHNpZ24uY29tL3JlcG9zaXRvcnkvMIIBKwYDVR0RBIIBIjCCAR6CEnd3dy5nbG9i
+YWxzaWduLmNvbYIVc3RhdHVzLmdsb2JhbHNpZ24uY29tghF0aC5nbG9iYWxzaWdu
+LmNvbYISZGV2Lmdsb2JhbHNpZ24uY29tghNpbmZvLmdsb2JhbHNpZ24uY29tghZh
+cmNoaXZlLmdsb2JhbHNpZ24uY29tghZzdGF0aWMxLmdsb2JhbHNpZ24uY29tghZz
+dGF0aWMyLmdsb2JhbHNpZ24uY29tghNibG9nLmdsb2JhbHNpZ24uY29tghdzc2xj
+aGVjay5nbG9iYWxzaWduLmNvbYIVc3lzdGVtLmdsb2JhbHNpZ24uY29tghhvcGVy
+YXRpb24uZ2xvYmFsc2lnbi5jb22CDmdsb2JhbHNpZ24uY29tMAkGA1UdEwQCMAAw
+HQYDVR0lBBYwFAYIKwYBBQUHAwEGCCsGAQUFBwMCMD8GA1UdHwQ4MDYwNKAyoDCG
+Lmh0dHA6Ly9jcmwuZ2xvYmFsc2lnbi5jb20vZ3MvZ3NleHRlbmR2YWxnMi5jcmww
+gYgGCCsGAQUFBwEBBHwwejBBBggrBgEFBQcwAoY1aHR0cDovL3NlY3VyZS5nbG9i
+YWxzaWduLmNvbS9jYWNlcnQvZ3NleHRlbmR2YWxnMi5jcnQwNQYIKwYBBQUHMAGG
+KWh0dHA6Ly9vY3NwMi5nbG9iYWxzaWduLmNvbS9nc2V4dGVuZHZhbGcyMB0GA1Ud
+DgQWBBSvMoTDlFB0aVgVrNkkS1QSmYfx1zAfBgNVHSMEGDAWgBSwsEr9HHUo+Bxh
+qhP2+sGQPWsWozANBgkqhkiG9w0BAQUFAAOCAQEAgnohm8IRw1ukfc0GmArK3ZLC
+DLGpsefwWMvNrclqwrgtVrBx4pfe5xGAjqyQ2QI8V8a8a1ytVMCSC1AMWiWxawvW
+fw48fHunqtpTYNDyEe1Q+7tTGZ0SQ3HljYY9toVEjAMDhiM0Szl6ERRO5S7BTCen
+mDpWZF8w3ScRRY2UJc8xwWFiYyGWDNzNL1O8R2Y95QIkHUgQpSD3cjl4YvF/Xx/o
+hBEzl884uNAggIyQRu0ImLEetEtHWB2w0pZG3nTAqjOAAAyH2Q8IHoJtjQzvg6fy
+IQEO1C5GoQ7isiKIjKBXVYOm+gKSQXlzwj1BlU/OW6kEe24IiERhAN9ILA24wA==
+-----END CERTIFICATE-----
diff --git a/test/ocsp-tests/D1_Issuer_ICA.pem b/test/ocsp-tests/D1_Issuer_ICA.pem
new file mode 100644 (file)
index 0000000..b650f38
--- /dev/null
@@ -0,0 +1,27 @@
+-----BEGIN CERTIFICATE-----
+MIIEhjCCA26gAwIBAgILBAAAAAABL07hXdQwDQYJKoZIhvcNAQEFBQAwTDEgMB4G
+A1UECxMXR2xvYmFsU2lnbiBSb290IENBIC0gUjIxEzARBgNVBAoTCkdsb2JhbFNp
+Z24xEzARBgNVBAMTCkdsb2JhbFNpZ24wHhcNMTEwNDEzMTAwMDAwWhcNMjIwNDEz
+MTAwMDAwWjBZMQswCQYDVQQGEwJCRTEZMBcGA1UEChMQR2xvYmFsU2lnbiBudi1z
+YTEvMC0GA1UEAxMmR2xvYmFsU2lnbiBFeHRlbmRlZCBWYWxpZGF0aW9uIENBIC0g
+RzIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDNoUbMUpq4pbR/WNnN
+2EugcgyXW6aIIMO5PUbc0FxSMPb6WU+FX7DbiLSpXysjSKyr9ZJ4FLYyD/tcaoVb
+AJDgu2X1WvlPZ37HbCnsk8ArysRe2LDb1r4/mwvAj6ldrvcAAqT8umYROHf+IyAl
+VRDFvYK5TLFoxuJwe4NcE2fBofN8C6iZmtDimyUxyCuNQPZSY7GgrVou9Xk2bTUs
+Dt0F5NDiB0i3KF4r1VjVbNAMoQFGAVqPxq9kx1UBXeHRxmxQJaAFrQCrDI1la93r
+wnJUyQ88ABeHIu/buYZ4FlGud9mmKE3zWI2DZ7k0JZscUYBR84OSaqOuR5rW5Isb
+wO2xAgMBAAGjggFaMIIBVjAOBgNVHQ8BAf8EBAMCAQYwEgYDVR0TAQH/BAgwBgEB
+/wIBADAdBgNVHQ4EFgQUsLBK/Rx1KPgcYaoT9vrBkD1rFqMwRwYDVR0gBEAwPjA8
+BgRVHSAAMDQwMgYIKwYBBQUHAgEWJmh0dHBzOi8vd3d3Lmdsb2JhbHNpZ24uY29t
+L3JlcG9zaXRvcnkvMDYGA1UdHwQvMC0wK6ApoCeGJWh0dHA6Ly9jcmwuZ2xvYmFs
+c2lnbi5uZXQvcm9vdC1yMi5jcmwwRAYIKwYBBQUHAQEEODA2MDQGCCsGAQUFBzAB
+hihodHRwOi8vb2NzcC5nbG9iYWxzaWduLmNvbS9FeHRlbmRlZFNTTENBMCkGA1Ud
+JQQiMCAGCCsGAQUFBwMBBggrBgEFBQcDAgYKKwYBBAGCNwoDAzAfBgNVHSMEGDAW
+gBSb4gdXZxwewGoG3lm0mi3f3BmGLjANBgkqhkiG9w0BAQUFAAOCAQEAL0m28rZa
+pJWrnlrpK4KbzJBrfHRFIOde2Mcj7ig1sTVlKqVR4FU/9oNntOQ2KbDa7JeVqYoF
+o0X+Iy5SiLQfEICt0oufo1+oxetz3nmIQZgz7qdgGLFGyUAQB5yPClLJExoGbqCb
+LTr2rk/no1E1KlsYBRLlUdy2NmLz4aQP++TPw5S/EauhWTEB8MxT7I9j12yW00gq
+iiPtRVaoZkHqAblH7qFHDBTxI+Egc8p9UHxkOFejj0qcm+ltRc9Ea01gIEBxJbVG
+qmwIft/I+shWKpLLg7h5CZctXqEBzgbttJfJBNxB7+BPNk3kQHNG7BESfIhbNCYl
+TercGL7FG81kwA==
+-----END CERTIFICATE-----
diff --git a/test/ocsp-tests/D2.ors b/test/ocsp-tests/D2.ors
new file mode 100644 (file)
index 0000000..dcbd4d4
--- /dev/null
@@ -0,0 +1,32 @@
+MIIF4AoBAKCCBdkwggXVBgkrBgEFBQcwAQEEggXGMIIFwjCBmaIWBBTqlwecTarB
+yVdbHxANRLCFYj1mqBgPMjAxMjEwMjMxMDI1MzZaMG4wbDBEMAkGBSsOAwIaBQAE
+FLdXtbacB/gWIxOOkMkqDr4yAaoxBBRge2YaRQ2XyolQL30EzTSo//z9SwILBAAA
+AAABL07hRxCAABgPMjAxMjEwMDEwNjAwMDBaoBEYDzIwMTMwNDE1MDYwMDAwWjAL
+BgkqhkiG9w0BAQUDggEBAEJN4FuPQPnizPIwEj4Q8Ht765gI6QqMNrvj3UykxYeu
+qUajKcqA+V1zaDHTaz+eCQthtmCNKC9T+zVkjGelVsd7Kn2fVKWqp+5wVPI8dVkm
+6Gs/IGZ16HDnQ/siTrY3ILWCRz4Hf6lnHpIErQuQRQyjlGKNcE7RYmjGw4w0bxx8
+vHN/baCMApBL0D0zeBqlpJCMUZqJJ3D1+87HxHYR1MkMZDC9rOPIhlpEP4yL17gx
+ckrPf+w+A/3kC++jVeA3b8Xtr+MaWOFH4xVn6BTxopczZKVl18tSYqgwITlx5/cL
+LpYEdllC0l83E8GRzsOp0SvFxo0NBotgFNZQQujpOzagggQQMIIEDDCCBAgwggLw
+oAMCAQICCwQAAAAAAThXovYBMA0GCSqGSIb3DQEBBQUAMFcxCzAJBgNVBAYTAkJF
+MRkwFwYDVQQKExBHbG9iYWxTaWduIG52LXNhMRAwDgYDVQQLEwdSb290IENBMRsw
+GQYDVQQDExJHbG9iYWxTaWduIFJvb3QgQ0EwHhcNMTIwNzA1MTgwMDAwWhcNMTMw
+NzA1MTgwMDAwWjBZMQswCQYDVQQGEwJCRTEZMBcGA1UEChMQR2xvYmFsU2lnbiBu
+di1zYTEvMC0GA1UEAxMmR2xvYmFsU2lnbiBPQ1NQIGZvciBSb290IFIxIC0gQnJh
+bmNoIDEwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDP2QF8p0+Fb7ID
+MwwD1gEr2oazjqbW28EZr3YEyMPk+7VFaGePSO1xjBGIE48Q7m7d6p6ZXCzlBZEi
+oudrHSr3WDqdIVKLDrZIDkgEgdjJE72Hq6Pf5CEGXyebbODm4sV96EfewSvOOYLL
+866g3aoVhLDK02ny+Q5OsokW7nhnmGMMh10tZqR5VmdQTiw8MgeqUxBEaEO4WH2J
+ltgSsgNJBNBYuDgnn5ryzVqhvmCJvYZMYeN6qZFKy1MgHcR+wEpGLPlRL4ttu6e5
+MJrVta7dVFobHUHoFog97LtQT1PY0Ubaihswjge5O04bYeCrgSSjr1e4xH/KDxRw
+yyhoscaFAgMBAAGjgdIwgc8wDgYDVR0PAQH/BAQDAgeAMB0GA1UdDgQWBBTqlwec
+TarByVdbHxANRLCFYj1mqDBMBgNVHSAERTBDMEEGCSsGAQQBoDIBXzA0MDIGCCsG
+AQUFBwIBFiZodHRwczovL3d3dy5nbG9iYWxzaWduLmNvbS9yZXBvc2l0b3J5LzAJ
+BgNVHRMEAjAAMBMGA1UdJQQMMAoGCCsGAQUFBwMJMB8GA1UdIwQYMBaAFGB7ZhpF
+DZfKiVAvfQTNNKj//P1LMA8GCSsGAQUFBzABBQQCBQAwDQYJKoZIhvcNAQEFBQAD
+ggEBAHiC6N1uF29d7CmiVapA8Nr1xLSVeIkBd4A8yHsUTQ7ATI7bwT14QUV4awe7
+8cvmO5ZND8YG1ViwN162WFm9ivSoWBzvWDbU2JhQFb+XzrzCcdn0YbNiTxJh/vYm
+uDuxto00dpBgujSOAQv8B90iDEJ+sZpYRzDRj62qStRey0zpq5eX+pA+gdppMUFb
+4QvJf0El8TbLCWLN4TjrFe6ju7ZaN9zmgVYGQ2fMHKIGNScLuIA950nYwzRkIfHa
+YW6HqP1rCR1EiYmstEeCQyDxJx+RUlh+q8L1BKzaMYhS6s63MZzQuGseYStaCmbC
+fBIRKjnK621vAWvc7UR+0hqnZ+U=
diff --git a/test/ocsp-tests/D2_Cert_ICA.pem b/test/ocsp-tests/D2_Cert_ICA.pem
new file mode 100644 (file)
index 0000000..459f98e
--- /dev/null
@@ -0,0 +1,26 @@
+-----BEGIN CERTIFICATE-----
+MIIEdzCCA1+gAwIBAgILBAAAAAABL07hRxAwDQYJKoZIhvcNAQEFBQAwVzELMAkG
+A1UEBhMCQkUxGTAXBgNVBAoTEEdsb2JhbFNpZ24gbnYtc2ExEDAOBgNVBAsTB1Jv
+b3QgQ0ExGzAZBgNVBAMTEkdsb2JhbFNpZ24gUm9vdCBDQTAeFw0wNjEyMTUwODAw
+MDBaFw0yODAxMjgxMjAwMDBaMEwxIDAeBgNVBAsTF0dsb2JhbFNpZ24gUm9vdCBD
+QSAtIFIyMRMwEQYDVQQKEwpHbG9iYWxTaWduMRMwEQYDVQQDEwpHbG9iYWxTaWdu
+MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAps8kDr4ubyiZRULEqz4h
+VJsL03+EcPoSs8u/h1/Gf4bTsjBc1v2t8Xvc5fhglgmSEPXQU977e35ziKxSiHtK
+pspJpl6op4xaEbx6guu+jOmzrJYlB5dKmSoHL7Qed7+KD7UCfBuWuMW5Oiy81hK5
+61l94tAGhl9eSWq1OV6INOy8eAwImIRsqM1LtKB9DHlN8LgtyyHK1WxbfeGgKYSh
++dOUScskYpEgvN0L1dnM+eonCitzkcadG6zIy+jgoPQvkItN+7A2G/YZeoXgbfJh
+E4hcn+CTClGXilrOr6vV96oJqmC93Nlf33KpYBNeAAHJSvo/pOoHAyECjoLKA8Kb
+jwIDAQABo4IBTTCCAUkwDgYDVR0PAQH/BAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8w
+HQYDVR0OBBYEFJviB1dnHB7AagbeWbSaLd/cGYYuMEcGA1UdIARAMD4wPAYEVR0g
+ADA0MDIGCCsGAQUFBwIBFiZodHRwczovL3d3dy5nbG9iYWxzaWduLmNvbS9yZXBv
+c2l0b3J5LzAzBgNVHR8ELDAqMCigJqAkhiJodHRwOi8vY3JsLmdsb2JhbHNpZ24u
+bmV0L3Jvb3QuY3JsMD0GCCsGAQUFBwEBBDEwLzAtBggrBgEFBQcwAYYhaHR0cDov
+L29jc3AuZ2xvYmFsc2lnbi5jb20vcm9vdHIxMCkGA1UdJQQiMCAGCCsGAQUFBwMB
+BggrBgEFBQcDAgYKKwYBBAGCNwoDAzAfBgNVHSMEGDAWgBRge2YaRQ2XyolQL30E
+zTSo//z9SzANBgkqhkiG9w0BAQUFAAOCAQEAOg/NJk04MAioxvxc2Ah67/ocKgPO
+Mq5EluFSA5UKUtZnr1uWfN0ZizBbNjprbqAVxoKhyzlmAFeLAqJuhfusVVq4FVAa
+kN4JSOyo9lccGDG9xn3IvevCpzlRbaL/HHjeHCcE4c8klegO5NUfsPn7UMrLbp5i
+JniG9cT1eI/dcq9uLtWe3c48y7jHLVRg1+WcAkuGRPBXUSvNCps8sfU6TB2KxfAw
+PmWHxA5fbkqsiqge5/rkM4AVhFZlJZv7njCIy5EWwQXDqSTsIdLVsPy3I0annff3
+xlMSeDe0E3OPN5deBJv5mYuTPiZCl5/9HrXVy4hINKJmoPqsco/dRy+CdA==
+-----END CERTIFICATE-----
diff --git a/test/ocsp-tests/D2_Issuer_Root.pem b/test/ocsp-tests/D2_Issuer_Root.pem
new file mode 100644 (file)
index 0000000..f4ce4ca
--- /dev/null
@@ -0,0 +1,21 @@
+-----BEGIN CERTIFICATE-----
+MIIDdTCCAl2gAwIBAgILBAAAAAABFUtaw5QwDQYJKoZIhvcNAQEFBQAwVzELMAkG
+A1UEBhMCQkUxGTAXBgNVBAoTEEdsb2JhbFNpZ24gbnYtc2ExEDAOBgNVBAsTB1Jv
+b3QgQ0ExGzAZBgNVBAMTEkdsb2JhbFNpZ24gUm9vdCBDQTAeFw05ODA5MDExMjAw
+MDBaFw0yODAxMjgxMjAwMDBaMFcxCzAJBgNVBAYTAkJFMRkwFwYDVQQKExBHbG9i
+YWxTaWduIG52LXNhMRAwDgYDVQQLEwdSb290IENBMRswGQYDVQQDExJHbG9iYWxT
+aWduIFJvb3QgQ0EwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDaDuaZ
+jc6j40+Kfvvxi4Mla+pIH/EqsLmVEQS98GPR4mdmzxzdzxtIK+6NiY6arymAZavp
+xy0Sy6scTHAHoT0KMM0VjU/43dSMUBUc71DuxC73/OlS8pF94G3VNTCOXkNz8kHp
+1Wrjsok6Vjk4bwY8iGlbKk3Fp1S4bInMm/k8yuX9ifUSPJJ4ltbcdG6TRGHRjcdG
+snUOhugZitVtbNV4FpWi6cgKOOvyJBNPc1STE4U6G7weNLWLBYy5d4ux2x8gkasJ
+U26Qzns3dLlwR5EiUWMWea6xrkEmCMgZK9FGqkjWZCrXgzT/LCrBbBlDSgeF59N8
+9iFo7+ryUp9/k5DPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMBAf8E
+BTADAQH/MB0GA1UdDgQWBBRge2YaRQ2XyolQL30EzTSo//z9SzANBgkqhkiG9w0B
+AQUFAAOCAQEA1nPnfE920I2/7LqivjTFKDK1fPxsnCwrvQmeU79rXqoRSLblCKOz
+yj1hTdNGCbM+w6DjY1Ub8rrvrTnhQ7k4o+YviiY776BQVvnGCv04zcQLcFGUl5gE
+38NflNUVyRRBnMRddWQVDf9VMOyGj/8N7yy5Y0b2qvzfvGn9LhJIZJrglfCm7ymP
+AbEVtQwdpf5pLGkkeB6zpxxxYu7KyJesF12KwvhHhm4qxFYxldBniYUr+WymXUad
+DKqC5JlR3XC321Y9YeRq4VzW9v493kHMB65jUr9TU/Qr6cf9tveCX4XSQRjbgbME
+HMUfpIBvFSDJ3gyICh3WZlXi/EjJKSZp4A==
+-----END CERTIFICATE-----
diff --git a/test/ocsp-tests/D3.ors b/test/ocsp-tests/D3.ors
new file mode 100644 (file)
index 0000000..d66439b
--- /dev/null
@@ -0,0 +1,38 @@
+MIIG8AoBAKCCBukwggblBgkrBgEFBQcwAQEEggbWMIIG0jCB+aF+MHwxCzAJBgNV
+BAYTAkFVMQwwCgYDVQQIEwNOU1cxDzANBgNVBAcTBlN5ZG5leTEUMBIGA1UEChML
+Q0FjZXJ0IEluYy4xHjAcBgNVBAsTFVNlcnZlciBBZG1pbmlzdHJhdGlvbjEYMBYG
+A1UEAxMPb2NzcC5jYWNlcnQub3JnGA8yMDEyMTAyMzEwMzkzMFowZjBkMDwwCQYF
+Kw4DAhoFAAQUi6TJyxcpGUU+u45zCZG5JfKDImUEFBa1MhvUx/Pg5o7zvdKwOu6y
+ORjRAgMLs8aAABgPMjAxMjEwMjMwOTU5MTJaoBEYDzIwMTIxMDI1MTAzOTMwWjAN
+BgkqhkiG9w0BAQUFAAOCAQEAYaaAzW26JQGFRyawj9ROtnSdJ9QPJ6B/wfpJif8e
+QU9lmKx0zIDdTum3Mc5tfxML71W025UW9jzowAfQ5bZbqa4nwZlWX5Py3hKebeYo
+WiND4pvhS4BRkheSkycEok0bj1FJYWYiJVpnTqKAPnOKrlL4qvGC2IOHk2toS/Je
+iLyoUwxrPtqaXt4Caoa3I70HE3H1QqvPIGIY6V4bxV7Km/xv99QOutkbfANGiNsx
+W7EDB3TRNhldzMnjEwG58X5Pe3xwEVqjCiBL+wQ8JALn08bJzFn9E04aYrqCGc8s
+gw1dgaBoZt+0vbQUN71KEocwMj5mzJqottOyqNwo7FZnBaCCBL4wggS6MIIEtjCC
+Ap6gAwIBAgIDCpvzMA0GCSqGSIb3DQEBBQUAMHkxEDAOBgNVBAoTB1Jvb3QgQ0Ex
+HjAcBgNVBAsTFWh0dHA6Ly93d3cuY2FjZXJ0Lm9yZzEiMCAGA1UEAxMZQ0EgQ2Vy
+dCBTaWduaW5nIEF1dGhvcml0eTEhMB8GCSqGSIb3DQEJARYSc3VwcG9ydEBjYWNl
+cnQub3JnMB4XDTExMDgyMzAwMDI1NloXDTEzMDgyMjAwMDI1NlowfDELMAkGA1UE
+BhMCQVUxDDAKBgNVBAgTA05TVzEPMA0GA1UEBxMGU3lkbmV5MRQwEgYDVQQKEwtD
+QWNlcnQgSW5jLjEeMBwGA1UECxMVU2VydmVyIEFkbWluaXN0cmF0aW9uMRgwFgYD
+VQQDEw9vY3NwLmNhY2VydC5vcmcwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEK
+AoIBAQCcxtRv5CPHw3BLdR/k/K72YsRgodbP+UdAONmvBvWzhwm6B8h6O+M64sFr
+2w6be7SYBECIyOQgNJ1flK4MoAWhdBA/H5NtxaDOKbAqA27tO9GaevcPp7c518O0
+3hVnlPLvsN1f48nY0jQOXUTfv5nYXmD0OSSK/V3IRo0KsWB6T9UnMGCeEwb4Oqqz
+uzM0b4SBflzMEony/m6Tg/qL7qs2TLZAqe77+BZaVdFkDUnaBN7RyMruXySxeXiz
+mogT3WhROeloMa/X+E01bWBYBEK7VZIY9pgBpXQ7vDbbIGgYuIXUi20wh03WMy16
+VDYdV0IUXHpidNUeK9W/BPP/7APBAgMBAAGjRDBCMAwGA1UdEwEB/wQCMAAwJwYD
+VR0lBCAwHgYIKwYBBQUHAwIGCCsGAQUFBwMBBggrBgEFBQcDCTAJBgNVHREEAjAA
+MA0GCSqGSIb3DQEBBQUAA4ICAQAoT6p5f3cGprAcgrnzdenfTmDe9LCW7k2VnazA
+MAzpsD6gXcSlo4+3hoHem/SpKRH2tqi34DmImCiv/S6fxsKM4Gfn5rlkAFviuTvS
+r5Zrwh4ZKSfaoWv4bmbzmcAxvuxdMWHf/5PbjegjzFTbBMekVPZY/abYtD6kdHQZ
+VNgzwZVfTBfYhfa+Rg72I2zjKpMsjxMqWfTmUzW6wfK6LFudZqu0U1NnJw+IlnVU
+6WtjL885ebQrmcRqWz3nMhVLIu5L3w/s+VTLvm7If6jcMDNUjz8s2BPcJeCXg3TE
+STsyl6tvk17RRz2+9JskxVOk11xIn96xR4FCERIid2ek9z1xi7oYOajQF50i/9Gj
+ReDEfRSyb4/LzoKDOY+h4Q6jryeHh7WIHFiK5qrBN2y8qOoRJ/OqQnqci/BJBNpe
+g9Q9PJRgGSzRndTXNHiYRbeLpq7eGo3sPqlR9qBQ3rd98XGOU0RCMnzjKhENC3qo
+5PkSF2xs8RmjWktFSTDwjYo0qf1teo7CGHjgaPjQ7JE8Q4ysFOQndSWmLpqwDcI9
+HfIvPwUIWArQrJRh9LCNSyvHVgLqY9kw8NW4TlMxV2WqaYCkiKi3XVRrSFR3ahS1
+VBvRZ8KpplrV7rhXjVSSqqfLk1sX3l72Ck2F9ON+qbNFmvhgNjSiBY9neMgo804a
+wG/pag==
diff --git a/test/ocsp-tests/D3_Cert_EE.pem b/test/ocsp-tests/D3_Cert_EE.pem
new file mode 100644 (file)
index 0000000..f371ed1
--- /dev/null
@@ -0,0 +1,31 @@
+-----BEGIN CERTIFICATE-----
+MIIFZDCCA0ygAwIBAgIDC7PGMA0GCSqGSIb3DQEBBQUAMHkxEDAOBgNVBAoTB1Jv
+b3QgQ0ExHjAcBgNVBAsTFWh0dHA6Ly93d3cuY2FjZXJ0Lm9yZzEiMCAGA1UEAxMZ
+Q0EgQ2VydCBTaWduaW5nIEF1dGhvcml0eTEhMB8GCSqGSIb3DQEJARYSc3VwcG9y
+dEBjYWNlcnQub3JnMB4XDTEyMDUwNjE4NDY0MVoXDTE0MDUwNjE4NDY0MVowWzEL
+MAkGA1UEBhMCQVUxDDAKBgNVBAgTA05TVzEPMA0GA1UEBxMGU3lkbmV5MRQwEgYD
+VQQKEwtDQWNlcnQgSW5jLjEXMBUGA1UEAxMOd3d3LmNhY2VydC5vcmcwggEiMA0G
+CSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDeNSAxSFtymeN6rQD69eXIJEnCCP7Z
+24/fdOgxRDSBhfQDUVhdmsuDOvuziOoWGqRxZPcWdMEMRcJ5SrA2aHIstvnaLhUl
+xp2fuaeXx9XMCJ9ZmzHZbH4wqLaU+UlhcSsdkPzapf3N3HaUAW8kT4bHEGzObYVC
+UBxxhpY01EoGRQmnFojzLNF3+0O1npQzXg5MeIWHW/Z+9jE+6odL6IXgg1bvrP4d
+FgoveTcG6BmJu+50RwHaUad7hQuNeS+pNsVzCiDdMF2qoCQXtAGhnEQ9/KHpBD2z
+ISBVIyEbYxdyU/WxnkaOof63Mf/TAgMNzVN9duqEtFyvvMrQY1XkBBwfAgMBAAGj
+ggERMIIBDTAMBgNVHRMBAf8EAjAAMDQGA1UdJQQtMCsGCCsGAQUFBwMCBggrBgEF
+BQcDAQYJYIZIAYb4QgQBBgorBgEEAYI3CgMDMAsGA1UdDwQEAwIFoDAzBggrBgEF
+BQcBAQQnMCUwIwYIKwYBBQUHMAGGF2h0dHA6Ly9vY3NwLmNhY2VydC5vcmcvMIGE
+BgNVHREEfTB7gg53d3cuY2FjZXJ0Lm9yZ4IRc2VjdXJlLmNhY2VydC5vcmeCEnd3
+d21haWwuY2FjZXJ0Lm9yZ4IKY2FjZXJ0Lm9yZ4IOd3d3LmNhY2VydC5uZXSCCmNh
+Y2VydC5uZXSCDnd3dy5jYWNlcnQuY29tggpjYWNlcnQuY29tMA0GCSqGSIb3DQEB
+BQUAA4ICAQA2+uCGX18kZD8gyfj44TlwV4TXJ5BrT0M9qogg2k5u057i+X2ePy3D
+iE2REyLkU+i5ekH5gvTl74uSJKtpSf/hMyJEByyPyIULhlXCl46z2Z60drYzO4ig
+apCdkm0JthVGvk6/hjdaxgBGhUvSTEP5nLNkDa+uYVHJI58wfX2oh9gqxf8VnMJ8
+/A8Zi6mYCWUlFUobNd/ozyDZ6WVntrLib85sAFhds93nkoUYxgx1N9Xg/I31/jcL
+6bqmpRAZcbPtvEom0RyqPLM+AOgySWiYbg1Nl8nKx25C2AuXk63NN4CVwkXpdFF3                                                                                                               
+q5qk1izPruvJ68jNW0pG7nrMQsiY2BCesfGyEzY8vfrMjeR5MLNv5r+obeYFnC1j                                                                                                               
+uYp6JBt+thW+xPFzHYLjohKPwo/NbMOjIUM9gv/Pq3rVRPgWru4/8yYWhrmEK370                                                                                                               
+rtlYBUSGRUdR8xed1Jvs+4qJ3s9t41mLSXvUfwyPsT7eoloUAfw3RhdwOzXoC2P6                                                                                                               
+ftmniyu/b/HuYH1AWK+HFtFi9CHiMIqOJMhj/LnzL9udrQOpir7bVej/mlb3kSRo                                                                                                               
+2lZymKOvuMymMpJkvBvUU/QEbCxWZAkTyqL2qlcQhHv7W366DOFjxDqpthaTRD69                                                                                                               
+T8i/2AnsBDjYFxa47DisIvR57rLmE+fILjSvd94N/IpGs3lSOS5JeA==                                                                                                                       
+-----END CERTIFICATE-----
diff --git a/test/ocsp-tests/D3_Issuer_Root.pem b/test/ocsp-tests/D3_Issuer_Root.pem
new file mode 100644 (file)
index 0000000..3ccc18e
--- /dev/null
@@ -0,0 +1,83 @@
+-----BEGIN CERTIFICATE-----
+MIIHPTCCBSWgAwIBAgIBADANBgkqhkiG9w0BAQQFADB5MRAwDgYDVQQKEwdSb290
+IENBMR4wHAYDVQQLExVodHRwOi8vd3d3LmNhY2VydC5vcmcxIjAgBgNVBAMTGUNB
+IENlcnQgU2lnbmluZyBBdXRob3JpdHkxITAfBgkqhkiG9w0BCQEWEnN1cHBvcnRA
+Y2FjZXJ0Lm9yZzAeFw0wMzAzMzAxMjI5NDlaFw0zMzAzMjkxMjI5NDlaMHkxEDAO
+BgNVBAoTB1Jvb3QgQ0ExHjAcBgNVBAsTFWh0dHA6Ly93d3cuY2FjZXJ0Lm9yZzEi
+MCAGA1UEAxMZQ0EgQ2VydCBTaWduaW5nIEF1dGhvcml0eTEhMB8GCSqGSIb3DQEJ
+ARYSc3VwcG9ydEBjYWNlcnQub3JnMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIIC
+CgKCAgEAziLA4kZ97DYoB1CW8qAzQIxL8TtmPzHlawI229Z89vGIj053NgVBlfkJ
+8BLPRoZzYLdufujAWGSuzbCtRRcMY/pnCujW0r8+55jE8Ez64AO7NV1sId6eINm6
+zWYyN3L69wj1x81YyY7nDl7qPv4coRQKFWyGhFtkZip6qUtTefWIonvuLwphK42y
+fk1WpRPs6tqSnqxEQR5YYGUFZvjARL3LlPdCfgv3ZWiYUQXw8wWRBB0bF4LsyFe7
+w2t6iPGwcswlWyCR7BYCEo8y6RcYSNDHBS4CMEK4JZwFaz+qOqfrU0j36NK2B5jc
+G8Y0f3/JHIJ6BVgrCFvzOKKrF11myZjXnhCLotLddJr3cQxyYN/Nb5gznZY0dj4k
+epKwDpUeb+agRThHqtdB7Uq3EvbXG4OKDy7YCbZZ16oE/9KTfWgu3YtLq1i6L43q
+laegw1SJpfvbi1EinbLDvhG+LJGGi5Z4rSDTii8aP8bQUWWHIbEZAWV/RRyH9XzQ
+QUxPKZgh/TMfdQwEUfoZd9vUFBzugcMd9Zi3aQaRIt0AUMyBMawSB3s42mhb5ivU
+fslfrejrckzzAeVLIL+aplfKkQABi6F1ITe1Yw1nPkZPcCBnzsXWWdsC4PDSy826
+YreQQejdIOQpvGQpQsgi3Hia/0PsmBsJUUtaWsJx8cTLc6nloQsCAwEAAaOCAc4w
+ggHKMB0GA1UdDgQWBBQWtTIb1Mfz4OaO873SsDrusjkY0TCBowYDVR0jBIGbMIGY
+gBQWtTIb1Mfz4OaO873SsDrusjkY0aF9pHsweTEQMA4GA1UEChMHUm9vdCBDQTEe
+MBwGA1UECxMVaHR0cDovL3d3dy5jYWNlcnQub3JnMSIwIAYDVQQDExlDQSBDZXJ0
+IFNpZ25pbmcgQXV0aG9yaXR5MSEwHwYJKoZIhvcNAQkBFhJzdXBwb3J0QGNhY2Vy
+dC5vcmeCAQAwDwYDVR0TAQH/BAUwAwEB/zAyBgNVHR8EKzApMCegJaAjhiFodHRw
+czovL3d3dy5jYWNlcnQub3JnL3Jldm9rZS5jcmwwMAYJYIZIAYb4QgEEBCMWIWh0
+dHBzOi8vd3d3LmNhY2VydC5vcmcvcmV2b2tlLmNybDA0BglghkgBhvhCAQgEJxYl
+aHR0cDovL3d3dy5jYWNlcnQub3JnL2luZGV4LnBocD9pZD0xMDBWBglghkgBhvhC
+AQ0ESRZHVG8gZ2V0IHlvdXIgb3duIGNlcnRpZmljYXRlIGZvciBGUkVFIGhlYWQg
+b3ZlciB0byBodHRwOi8vd3d3LmNhY2VydC5vcmcwDQYJKoZIhvcNAQEEBQADggIB
+ACjH7pyCArpcgBLKNQodgW+JapnM8mgPf6fhjViVPr3yBsOQWqy1YPaZQwGjiHCc
+nWKdpIevZ1gNMDY75q1I08t0AoZxPuIrA2jxNGJARjtT6ij0rPtmlVOKTV39O9lg
+18p5aTuxZZKmxoGCXJzN600BiqXfEVWqFcofN8CCmHBh22p8lqOOLlQ+TyGpkO/c
+gr/c6EWtTZBzCDyUZbAEmXZ/4rzCahWqlwQ3JNgelE5tDlG+1sSPypZt90Pf6DBl
+Jzt7u0NDY8RD97LsaMzhGY4i+5jhe1o+ATc7iwiwovOVThrLm82asduycPAtStvY
+sONvRUgzEv/+PDIqVPfE94rwiCPCR/5kenHA0R6mY7AHfqQv0wGP3J8rtsYIqQ+T
+SCX8Ev2fQtzzxD72V7DX3WnRBnc0CkvSyqD/HMaMyRa+xMwyN2hzXwj7UfdJUzYF
+CpUCTPJ5GhD22Dp1nPMd8aINcGeGG7MW9S/lpOt5hvk9C8JzC6WZrG/8Z7jlLwum
+GCSNe9FINSkYQKyTYOGWhlC0elnYjyELn8+CkcY7v2vcB5G5l1YjqrZslMZIBjzk
+zk6q5PYvCdxTby78dOs6Y5nCpqyJvKeyRKANihDjbPIky/qbn3BHLt4Ui9SyIAmW
+omTxJBzcoTWcFbLUvFUufQb1nA5V9FrWk9p2rSVzTMVD
+-----END CERTIFICATE-----
+-----BEGIN CERTIFICATE-----
+MIIHWTCCBUGgAwIBAgIDCkGKMA0GCSqGSIb3DQEBCwUAMHkxEDAOBgNVBAoTB1Jv
+b3QgQ0ExHjAcBgNVBAsTFWh0dHA6Ly93d3cuY2FjZXJ0Lm9yZzEiMCAGA1UEAxMZ
+Q0EgQ2VydCBTaWduaW5nIEF1dGhvcml0eTEhMB8GCSqGSIb3DQEJARYSc3VwcG9y
+dEBjYWNlcnQub3JnMB4XDTExMDUyMzE3NDgwMloXDTIxMDUyMDE3NDgwMlowVDEU
+MBIGA1UEChMLQ0FjZXJ0IEluYy4xHjAcBgNVBAsTFWh0dHA6Ly93d3cuQ0FjZXJ0
+Lm9yZzEcMBoGA1UEAxMTQ0FjZXJ0IENsYXNzIDMgUm9vdDCCAiIwDQYJKoZIhvcN
+AQEBBQADggIPADCCAgoCggIBAKtJNRFIfNImflOUz0Op3SjXQiqL84d4GVh8D57a
+iX3h++tykA10oZZkq5+gJJlz2uJVdscXe/UErEa4w75/ZI0QbCTzYZzA8pD6Ueb1
+aQFjww9W4kpCz+JEjCUoqMV5CX1GuYrz6fM0KQhF5Byfy5QEHIGoFLOYZcRD7E6C
+jQnRvapbjZLQ7N6QxX8KwuPr5jFaXnQ+lzNZ6MMDPWAzv/fRb0fEze5ig1JuLgia
+pNkVGJGmhZJHsK5I6223IeyFGmhyNav/8BBdwPSUp2rVO5J+TJAFfpPBLIukjmJ0
+FXFuC3ED6q8VOJrU0gVyb4z5K+taciX5OUbjchs+BMNkJyIQKopPWKcDrb60LhPt
+XapI19V91Cp7XPpGBFDkzA5CW4zt2/LP/JaT4NsRNlRiNDiPDGCbO5dWOK3z0luL
+oFvqTpa4fNfVoIZwQNORKbeiPK31jLvPGpKK5DR7wNhsX+kKwsOnIJpa3yxdUly6
+R9Wb7yQocDggL9V/KcCyQQNokszgnMyXS0XvOhAKq3A6mJVwrTWx6oUrpByAITGp
+rmB6gCZIALgBwJNjVSKRPFbnr9s6JfOPMVTqJouBWfmh0VMRxXudA/Z0EeBtsSw/
+LIaRmXGapneLNGDRFLQsrJ2vjBDTn8Rq+G8T/HNZ92ZCdB6K4/jc0m+YnMtHmJVA
+BfvpAgMBAAGjggINMIICCTAdBgNVHQ4EFgQUdahxYEyIE/B42Yl3tW3Fid+8sXow
+gaMGA1UdIwSBmzCBmIAUFrUyG9TH8+DmjvO90rA67rI5GNGhfaR7MHkxEDAOBgNV
+BAoTB1Jvb3QgQ0ExHjAcBgNVBAsTFWh0dHA6Ly93d3cuY2FjZXJ0Lm9yZzEiMCAG
+A1UEAxMZQ0EgQ2VydCBTaWduaW5nIEF1dGhvcml0eTEhMB8GCSqGSIb3DQEJARYS
+c3VwcG9ydEBjYWNlcnQub3JnggEAMA8GA1UdEwEB/wQFMAMBAf8wXQYIKwYBBQUH
+AQEEUTBPMCMGCCsGAQUFBzABhhdodHRwOi8vb2NzcC5DQWNlcnQub3JnLzAoBggr
+BgEFBQcwAoYcaHR0cDovL3d3dy5DQWNlcnQub3JnL2NhLmNydDBKBgNVHSAEQzBB
+MD8GCCsGAQQBgZBKMDMwMQYIKwYBBQUHAgEWJWh0dHA6Ly93d3cuQ0FjZXJ0Lm9y
+Zy9pbmRleC5waHA/aWQ9MTAwNAYJYIZIAYb4QgEIBCcWJWh0dHA6Ly93d3cuQ0Fj
+ZXJ0Lm9yZy9pbmRleC5waHA/aWQ9MTAwUAYJYIZIAYb4QgENBEMWQVRvIGdldCB5
+b3VyIG93biBjZXJ0aWZpY2F0ZSBmb3IgRlJFRSwgZ28gdG8gaHR0cDovL3d3dy5D
+QWNlcnQub3JnMA0GCSqGSIb3DQEBCwUAA4ICAQApKIWuRKm5r6R5E/CooyuXYPNc
+7uMvwfbiZqARrjY3OnYVBFPqQvX56sAV2KaC2eRhrnILKVyQQ+hBsuF32wITRHhH
+Va9Y/MyY9kW50SD42CEH/m2qc9SzxgfpCYXMO/K2viwcJdVxjDm1Luq+GIG6sJO4
+D+Pm1yaMMVpyA4RS5qb1MyJFCsgLDYq4Nm+QCaGrvdfVTi5xotSu+qdUK+s1jVq3
+VIgv7nSf7UgWyg1I0JTTrKSi9iTfkuO960NAkW4cGI5WtIIS86mTn9S8nK2cde5a
+lxuV53QtHA+wLJef+6kzOXrnAzqSjiL2jA3k2X4Ndhj3AfnvlpaiVXPAPHG0HRpW
+Q7fDCo1y/OIQCQtBzoyUoPkD/XFzS4pXM+WOdH4VAQDmzEoc53+VGS3FpQyLu7Xt
+hbNc09+4ufLKxw0BFKxwWMWMjTPUnWajGlCVI/xI4AZDEtnNp4Y5LzZyo4AQ5OHz
+0ctbGsDkgJp8E3MGT9ujayQKurMcvEp4u+XjdTilSKeiHq921F73OIZWWonO1sOn
+ebJSoMbxhbQljPI/lrMQ2Y1sVzufb4Y6GIIiNsiwkTjbKqGTqoQ/9SdlrnPVyNXT
+d+pLncdBu8fA46A/5H2kjXPmEkvfoXNzczqA6NXLji/L6hOn1kGLrPo8idck9U60
+4GGSt/M3mMS+lqO3ig==
+-----END CERTIFICATE-----
diff --git a/test/ocsp-tests/ISDOSC_D1.ors b/test/ocsp-tests/ISDOSC_D1.ors
new file mode 100644 (file)
index 0000000..66a60a2
--- /dev/null
@@ -0,0 +1,32 @@
+MIIFzwoBAKCCBcgwggXEBgkrBgEFBQcwAQEEggW1MIIFsTCBoKIWBBSpTXftIZX0
+lLT9zwVSQC5Jfp3pqhgPMjAxMjEwMTAxNDU0NDNaMHUwczBLMAkGBSsOAwIaBQAE
+FKByDqBqfGICVPKo9Z3Se6Tzty+kBBSwsEr9HHUo+BxhqhP2+sGQPWsWowISESG8
+vx4IzALnkqQG05AvM+2bgAAYDzIwMTIxMDEwMTMwMDAwWqARGA8yMDEyMTAxNzEz
+MDAwMFowCwYJKoZIhvcNAQEFA4IBAQBw5Z+0ggEddRTIq7cXlMoxG9Nrx4HtutsH
+itIUoZp/rlLoxHsJTo/VmdZvTTGIc7Ok9XuoH61lY/x9glAKsGRjz4Myc9+5rx0O
+675lwmOS+uaf3/hRkicVrVr7Pt2ug3R7OXm2MJrohjNKP8lqtLJ0hHP88a8rotKA
+r9uz/qHm7K4Uh7dRt/Pnu9MPG74tZeFNN4M1ONMEiRdG39FqzFDXWxwQ3NmyC0Wo
+DQn+NklZMknr8mm7IBWpzgU1fTD9R0yv0zdhUZGiEXxvdhm7GJrTET5jS30Ksm5j
+o+n39YVu/vGbjyyYx3+WdeQLEyipaGvldSuJpT+R684/RuFWNetcoIID+DCCA/Qw
+ggPwMIIC2KADAgECAhIRIcYjwu4UNkR1VGrDbSdFei8wDQYJKoZIhvcNAQEFBQAw
+WTELMAkGA1UEBhMCQkUxGTAXBgNVBAoTEEdsb2JhbFNpZ24gbnYtc2ExLzAtBgNV
+BAMTJkdsb2JhbFNpZ24gRXh0ZW5kZWQgVmFsaWRhdGlvbiBDQSAtIEcyMB4XDTEy
+MDkxOTA3NDAzMVoXDTEyMTIxOTA4NDAzMVowgYUxCzAJBgNVBAYTAkJFMRkwFwYD
+VQQKExBHbG9iYWxTaWduIG52LXNhMUIwQAYDVQQDEzlHbG9iYWxTaWduIEV4dGVu
+ZGVkIFZhbGlkYXRpb24gQ0EgLSBHMiBPQ1NQIHJlc3BvbmRlciAtIDExFzAVBgNV
+BAUTDjIwMTIwOTE5MDkzOTAwMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKC
+AQEAxkkb6QhDH3sEDj4zaysjVzYelq9lZ1cso4R2IyQxaoPaG6GkaCmHA4sz6KP+
+m3ADqplibEUBa/mzCxHW8/oy3NhGMFdbezduZrnRFLbzakOTeIo8VEIM3JPfgREv
+CX8nj6Xu7ERD6JO/ZQ9Xr7YVzKKN+3cVZlcMHoGBnOPcO2Sz0AcYyk5m5IsGBRoT
+T86j6Cr9PhOPTVwXL6Wxy1KVHsUZXUwnRacV0O4SHWQ4zM9Sablus9fTbh1CgIqW
+sKDyzVB4yECXkBVeUlA+cuCaRRVHRiR+jPDSgbU62nnNudEpGG7dyoop6IOvXv2O
+ydncWzaukxIVvQ/Ij85kHqs7HQIDAQABo4GEMIGBMAkGA1UdEwQCMAAwDgYDVR0P
+AQH/BAQDAgeAMBMGA1UdJQQMMAoGCCsGAQUFBwMJMA8GCSsGAQUFBzABBQQCBQAw
+HQYDVR0OBBYEFKlNd+0hlfSUtP3PBVJALkl+nemqMB8GA1UdIwQYMBaAFLCwSv0c
+dSj4HGGqE/b6wZA9axajMA0GCSqGSIb3DQEBBQUAA4IBAQCe4rZg61Dmwygl/Uae
+BJZog64/FvuB1sfCqKLJTjKOfLcugSTX1TT7bLJbzXRGPQuorI3TIZEOwldIw01d
+DTLlsOCHrfHd+bpxgijxPkUuaA4NYnpvqTEMJqPKOC8QYfKupNjAPSuHvwqvqCfO
+RCe3jY6xQDO0WCTZ8/xMsOkw+J/YEYqALETf2Ug7k5eRL/TvfLd8Sgi7vPfmUeiW
+ptlsbhMOWQoQc+JA3vCI01rrjNq+0kIZ/r8nPGvablRr0Aakk6eDuS2dcReaPwuK
+0xE136pJYiXdQ3SA7uwmlorjxmejavyoPCr23TU74DQEt6hhc6uIcabsa4Y8KvJy
+RI4F
diff --git a/test/ocsp-tests/ISDOSC_D2.ors b/test/ocsp-tests/ISDOSC_D2.ors
new file mode 100644 (file)
index 0000000..664c8d2
--- /dev/null
@@ -0,0 +1,32 @@
+MIIF4AoBAKCCBdkwggXVBgkrBgEFBQcwAQEEggXGMIIFwjCBmaIWBBTqlwecTarB
+yVdbHxANRLCFYj1mqBgPMjAxMjEwMTEwOTE1MzNaMG4wbDBEMAkGBSsOAwIaBQAE
+FLdXtbacB/gWIxOOkMkqDr4yAaoxBBRge2YaRQ2XyolQL30EzTSo//z9SwILBAAA
+AAABL07hRxCAABgPMjAxMjEwMDEwNjAwMDBaoBEYDzIwMTMwNDE1MDYwMDAwWjAL
+BgkqhkiG9w0BAQUDggEBAF/9ByrCS+pCCK4qovqUAH/yoWckmpLFCzKJGHkErJeY
+FlUbAJuu/Gs0IdLmLp+2VbStjsL4vLtDU2Q4e417C1fm8+ixh+kP7qPRd8cxyMBx
+cmD2m1v0CgbrflCZEC71cTrrWpcW+6jg623lI4Ug3A4zlizbT/f9IrxuV9VB9/G5
+6kPI5dYOVZM0ColIxmJsafuxfr6ONQLPHKTlZJK3SyWebs25006OmrSyfBi0j26j
+WU5d6B2NJZBKqvDVMXxZ0q6QOgKxOs8WD+6DaA1d1f7gTOl45XJZWz5KnRePyRxM
+Fp0ak6XYbE1y2vHE2RWp1w4lcVJ0BUQXWxx+g86F5W2gggQQMIIEDDCCBAgwggLw
+oAMCAQICCwQAAAAAAThXovYBMA0GCSqGSIb3DQEBBQUAMFcxCzAJBgNVBAYTAkJF
+MRkwFwYDVQQKExBHbG9iYWxTaWduIG52LXNhMRAwDgYDVQQLEwdSb290IENBMRsw
+GQYDVQQDExJHbG9iYWxTaWduIFJvb3QgQ0EwHhcNMTIwNzA1MTgwMDAwWhcNMTMw
+NzA1MTgwMDAwWjBZMQswCQYDVQQGEwJCRTEZMBcGA1UEChMQR2xvYmFsU2lnbiBu
+di1zYTEvMC0GA1UEAxMmR2xvYmFsU2lnbiBPQ1NQIGZvciBSb290IFIxIC0gQnJh
+bmNoIDEwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDP2QF8p0+Fb7ID
+MwwD1gEr2oazjqbW28EZr3YEyMPk+7VFaGePSO1xjBGIE48Q7m7d6p6ZXCzlBZEi
+oudrHSr3WDqdIVKLDrZIDkgEgdjJE72Hq6Pf5CEGXyebbODm4sV96EfewSvOOYLL
+866g3aoVhLDK02ny+Q5OsokW7nhnmGMMh10tZqR5VmdQTiw8MgeqUxBEaEO4WH2J
+ltgSsgNJBNBYuDgnn5ryzVqhvmCJvYZMYeN6qZFKy1MgHcR+wEpGLPlRL4ttu6e5
+MJrVta7dVFobHUHoFog97LtQT1PY0Ubaihswjge5O04bYeCrgSSjr1e4xH/KDxRw
+yyhoscaFAgMBAAGjgdIwgc8wDgYDVR0PAQH/BAQDAgeAMB0GA1UdDgQWBBTqlwec
+TarByVdbHxANRLCFYj1mqDBMBgNVHSAERTBDMEEGCSsGAQQBoDIBXzA0MDIGCCsG
+AQUFBwIBFiZodHRwczovL3d3dy5nbG9iYWxzaWduLmNvbS9yZXBvc2l0b3J5LzAJ
+BgNVHRMEAjAAMBMGA1UdJQQMMAoGCCsGAQUFBwMJMB8GA1UdIwQYMBaAFGB7ZhpF
+DZfKiVAvfQTNNKj//P1LMA8GCSsGAQUFBzABBQQCBQAwDQYJKoZIhvcNAQEFBQAD
+ggEBAHiC6N1uF29d7CmiVapA8Nr1xLSVeIkBd4A8yHsUTQ7ATI7bwT14QUV4awe7
+8cvmO5ZND8YG1ViwN162WFm9ivSoWBzvWDbU2JhQFb+XzrzCcdn0YbNiTxJh/vYm
+uDuxto00dpBgujSOAQv8B90iDEJ+sZpYRzDRj62qStRey0zpq5eX+pA+gdppMUFb
+4QvJf0El8TbLCWLN4TjrFe6ju7ZaN9zmgVYGQ2fMHKIGNScLuIA950nYwzRkIfHa
+YW6HqP1rCR1EiYmstEeCQyDxJx+RUlh+q8L1BKzaMYhS6s63MZzQuGseYStaCmbC
+fBIRKjnK621vAWvc7UR+0hqnZ+Y=
diff --git a/test/ocsp-tests/ISDOSC_D3.ors b/test/ocsp-tests/ISDOSC_D3.ors
new file mode 100644 (file)
index 0000000..ac2bb25
--- /dev/null
@@ -0,0 +1,38 @@
+MIIG8AoBAKCCBukwggblBgkrBgEFBQcwAQEEggbWMIIG0jCB+aF+MHwxCzAJBgNV
+BAYTAkFVMQwwCgYDVQQIEwNOU1cxDzANBgNVBAcTBlN5ZG5leTEUMBIGA1UEChML
+Q0FjZXJ0IEluYy4xHjAcBgNVBAsTFVNlcnZlciBBZG1pbmlzdHJhdGlvbjEYMBYG
+A1UEAxMPb2NzcC5jYWNlcnQub3JnGA8yMDEyMTAxMTEwMTAyMVowZjBkMDwwCQYF
+Kw4DAhoFAAQUi6TJyxcpGUU+u45zCZG5JfKDImUEFBa1MhvUx/Pg5o7zvdKwOu6y
+ORjRAgMLs8aAABgPMjAxMjEwMTEwOTUyNDJaoBEYDzIwMTIxMDEzMTAxMDIxWjAN
+BgkqhkiG9w0BAQUFAAOCAQEAWX7faLDXkmIdOv/IKBh7awhPmGUhFPVSrMI4dc9/
+fcPDOYhFwWr9evKT/QdXRGpZY493mfa4Z6eEDxRDTexOloaiaJzVpSeV9hoJUxoS
+8NEWDyi33bDlIJH6zru4kk1LpuSMiSWsvLaeoRhHmW3EPDeadpCa5tYX2yNW5hdP
+iCfphDJ34/hWHHwHP6mLd1wEO1Rw6nymqeDbuLk1FviD/ZWXMGzK8Sv++tmsQ0Tg
+7XrkIPcSrozPKOTCf/1iJVF5KeQVIb0Ju1PvGUKtGaVTX8IZQmer2WQ1D6OOUcsS
+cWA6NSpWmScX/0/uBpXdSDX0AnGUS9SNrPNEolz6rA5OUaCCBL4wggS6MIIEtjCC
+Ap6gAwIBAgIDCpvzMA0GCSqGSIb3DQEBBQUAMHkxEDAOBgNVBAoTB1Jvb3QgQ0Ex
+HjAcBgNVBAsTFWh0dHA6Ly93d3cuY2FjZXJ0Lm9yZzEiMCAGA1UEAxMZQ0EgQ2Vy
+dCBTaWduaW5nIEF1dGhvcml0eTEhMB8GCSqGSIb3DQEJARYSc3VwcG9ydEBjYWNl
+cnQub3JnMB4XDTExMDgyMzAwMDI1NloXDTEzMDgyMjAwMDI1NlowfDELMAkGA1UE
+BhMCQVUxDDAKBgNVBAgTA05TVzEPMA0GA1UEBxMGU3lkbmV5MRQwEgYDVQQKEwtD
+QWNlcnQgSW5jLjEeMBwGA1UECxMVU2VydmVyIEFkbWluaXN0cmF0aW9uMRgwFgYD
+VQQDEw9vY3NwLmNhY2VydC5vcmcwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEK
+AoIBAQCcxtRv5CPHw3BLdR/k/K72YsRgodbP+UdAONmvBvWzhwm6B8h6O+M64sFr
+2w6be7SYBECIyOQgNJ1flK4MoAWhdBA/H5NtxaDOKbAqA27tO9GaevcPp7c518O0
+3hVnlPLvsN1f48nY0jQOXUTfv5nYXmD0OSSK/V3IRo0KsWB6T9UnMGCeEwb4Oqqz
+uzM0b4SBflzMEony/m6Tg/qL7qs2TLZAqe77+BZaVdFkDUnaBN7RyMruXySxeXiz
+mogT3WhROeloMa/X+E01bWBYBEK7VZIY9pgBpXQ7vDbbIGgYuIXUi20wh03WMy16
+VDYdV0IUXHpidNUeK9W/BPP/7APBAgMBAAGjRDBCMAwGA1UdEwEB/wQCMAAwJwYD
+VR0lBCAwHgYIKwYBBQUHAwIGCCsGAQUFBwMBBggrBgEFBQcDCTAJBgNVHREEAjAA
+MA0GCSqGSIb3DQEBBQUAA4ICAQAoT6p5f3cGprAcgrnzdenfTmDe9LCW7k2VnazA
+MAzpsD6gXcSlo4+3hoHem/SpKRH2tqi34DmImCiv/S6fxsKM4Gfn5rlkAFviuTvS
+r5Zrwh4ZKSfaoWv4bmbzmcAxvuxdMWHf/5PbjegjzFTbBMekVPZY/abYtD6kdHQZ
+VNgzwZVfTBfYhfa+Rg72I2zjKpMsjxMqWfTmUzW6wfK6LFudZqu0U1NnJw+IlnVU
+6WtjL885ebQrmcRqWz3nMhVLIu5L3w/s+VTLvm7If6jcMDNUjz8s2BPcJeCXg3TE
+STsyl6tvk17RRz2+9JskxVOk11xIn96xR4FCERIid2ek9z1xi7oYOajQF50i/9Gj
+ReDEfRSyb4/LzoKDOY+h4Q6jryeHh7WIHFiK5qrBN2y8qOoRJ/OqQnqci/BJBNpe
+g9Q9PJRgGSzRndTXNHiYRbeLpq7eGo3sPqlR9qBQ3rd98XGOU0RCMnzjKhENC3qo
+5PkSF2xs8RmjWktFSTDwjYo0qf1teo7CGHjgaPjQ7JE8Q4ysFOQndSWmLpqwDcI9
+HfIvPwUIWArQrJRh9LCNSyvHVgLqY9kw8NW4TlMxV2WqaYCkiKi3XVRrSFR3ahS1
+VBvRZ8KpplrV7rhXjVSSqqfLk1sX3l72Ck2F9ON+qbNFmvhgNjSiBY9neMgo804a
+wG/paw==
diff --git a/test/ocsp-tests/ISIC_D1_Issuer_ICA.pem b/test/ocsp-tests/ISIC_D1_Issuer_ICA.pem
new file mode 100644 (file)
index 0000000..b884775
--- /dev/null
@@ -0,0 +1,27 @@
+-----BEGIN CERTIFICATE-----
+MIIEhjCCA26gAwIBAgILBAAAAAABL07hXdQwDQYJKoZIhvcNAQEFBQAwTDEgMB4G
+A1UECxMXR2xvYmFsU2lnbiBSb290IENBIC0gUjIxEzARBgNVBAoTCkdsb2JhbFNp
+Z24xEzARBgNVBAMTCkdsb2JhbFNpZ24wHhcNMTEwNDEzMTAwMDAwWhcNMjIwNDEz
+MTAwMDAwWjBZMQswCQYDVQQGEwJCRTEZMBcGA1UEChMQR2xvYmFsU2lnbiBudi1z
+YTEvMC0GA1UEAxMmR2xvYmFsU2lnbiBFeHRlbmRlZCBWYWxpZGF0aW9uIENBIC0g
+RzIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDNoUbMUpq4pbR/WNnN
+2EugcgyXW6aIIMO5PUbc0FxSMPb6WU+FX7DbiLSpXysjSKyr9ZJ4FLYyD/tcaoVb
+AJDgu2X1WvlPZ37HbCnsk8ArysRe2LDb1r4/mwvAj6ldrvcAAqT8umYROHf+IyAl
+VRDFvYK5TLFoxuJwe4NcE2fBofN8C6iZmtDimyUxyCuNQPZSY7GgrVou9Xk2bTUs
+Dt0F5NDiB0i3KF4r1VjVbNAMoQFGAVqPxq9kx1UBXeHRxmxQJaAFrQCrDI1la93r
+wnJUyQ88ABeHIu/buYZ4FlGud9mmKE3zWI2DZ7k0JZscUYBR84OSaqOuR5rW5Isb
+wO2xAgMBAAGjggFaMIIBVjAOBgNVHQ8BAf8EBAMCAQYwEgYDVR0TAQH/BAgwBgEB
+/wIBADAdBgNVHQ4EFgQUsLBK/Rx1KPgcYaoT9vrBkD1rFqMwRwYDVR0gBEAwPjA8
+BgRVHSAAMDQwMgYIKwYBBQUHAgEWJmh0dHBzOi8vd3d3Lmdsb2JhbHNpZ24uY29t
+L3JlcG9zaXRvcnkvMDYGA1UdHwQvMC0wK6ApoCeGJWh0dHA6Ly9jcmwuZ2xvYmFs
+c2lnbi5uZXQvcm9vdC1yMi5jcmwwRAYIKwYBBQUHAQEEODA2MDQGCCsGAQUFBzAB
+hihodHRwOi8vb2NzcC5nbG9iYWxzaWduLmNvbS9FeHRlbmRlZFNTTENBMCkGA1Ud
+JQQiMCAGCCsGAQUFBwMBBggrBgEFBQcDAgYKKwYBBAGCNwoDAzAfBgNVHSMEGDAW
+gBSb4gdXZxwewGoG3lm0mi3f3BmGLjANBgkqhkiG9w0BAQUFAAOCAQEAL0m28rZa
+pJWrnlrpK4KbzJBrfHRFIOde2Mcj7ig1sTVlKqVR4FU/9oNntOQ2KbDa7JeVqYoF
+o0X+Iy5SiLQfEICt0oufo1+oxetz3nmIQZgz7qdgGLFGyUAQB5yPClLJExoGbqCb
+LTr2rk/no1E1KlsYBRLlUdy2NmLz4aQP++TPw5S/EauhWTEB8MxT7I9j12yW00gq
+iiPtRVaoZkHqAblH7qFHDBTxI+Egc8p9UHxkOFejj0qcm+ltRc9Ea01gIEBxJbVG
+qmwIft/I+shWKpLLg7h5CZctXqEBzgbttJfJBNxB7+BPNk3kQHNG7BESfIhbNCYl
+TercGL7FG81kwQ==
+-----END CERTIFICATE-----
diff --git a/test/ocsp-tests/ISIC_D2_Issuer_Root.pem b/test/ocsp-tests/ISIC_D2_Issuer_Root.pem
new file mode 100644 (file)
index 0000000..22f34cb
--- /dev/null
@@ -0,0 +1,21 @@
+-----BEGIN CERTIFICATE-----
+MIIDdTCCAl2gAwIBAgILBAAAAAABFUtaw5QwDQYJKoZIhvcNAQEFBQAwVzELMAkG
+A1UEBhMCQkUxGTAXBgNVBAoTEEdsb2JhbFNpZ24gbnYtc2ExEDAOBgNVBAsTB1Jv
+b3QgQ0ExGzAZBgNVBAMTEkdsb2JhbFNpZ24gUm9vdCBDQTAeFw05ODA5MDExMjAw
+MDBaFw0yODAxMjgxMjAwMDBaMFcxCzAJBgNVBAYTAkJFMRkwFwYDVQQKExBHbG9i
+YWxTaWduIG52LXNhMRAwDgYDVQQLEwdSb290IENBMRswGQYDVQQDExJHbG9iYWxT
+aWduIFJvb3QgQ0EwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDaDuaZ
+jc6j40+Kfvvxi4Mla+pIH/EqsLmVEQS98GPR4mdmzxzdzxtIK+6NiY6arymAZavp
+xy0Sy6scTHAHoT0KMM0VjU/43dSMUBUc71DuxC73/OlS8pF94G3VNTCOXkNz8kHp
+1Wrjsok6Vjk4bwY8iGlbKk3Fp1S4bInMm/k8yuX9ifUSPJJ4ltbcdG6TRGHRjcdG
+snUOhugZitVtbNV4FpWi6cgKOOvyJBNPc1STE4U6G7weNLWLBYy5d4ux2x8gkasJ
+U26Qzns3dLlwR5EiUWMWea6xrkEmCMgZK9FGqkjWZCrXgzT/LCrBbBlDSgeF59N8
+9iFo7+ryUp9/k5DPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMBAf8E
+BTADAQH/MB0GA1UdDgQWBBRge2YaRQ2XyolQL30EzTSo//z9SzANBgkqhkiG9w0B
+AQUFAAOCAQEA1nPnfE920I2/7LqivjTFKDK1fPxsnCwrvQmeU79rXqoRSLblCKOz
+yj1hTdNGCbM+w6DjY1Ub8rrvrTnhQ7k4o+YviiY776BQVvnGCv04zcQLcFGUl5gE
+38NflNUVyRRBnMRddWQVDf9VMOyGj/8N7yy5Y0b2qvzfvGn9LhJIZJrglfCm7ymP
+AbEVtQwdpf5pLGkkeB6zpxxxYu7KyJesF12KwvhHhm4qxFYxldBniYUr+WymXUad
+DKqC5JlR3XC321Y9YeRq4VzW9v493kHMB65jUr9TU/Qr6cf9tveCX4XSQRjbgbME
+HMUfpIBvFSDJ3gyICh3WZlXi/EjJKSZp4Q==
+-----END CERTIFICATE-----
diff --git a/test/ocsp-tests/ISIC_D3_Issuer_Root.pem b/test/ocsp-tests/ISIC_D3_Issuer_Root.pem
new file mode 100644 (file)
index 0000000..c1752e6
--- /dev/null
@@ -0,0 +1,41 @@
+-----BEGIN CERTIFICATE-----
+MIIHPTCCBSWgAwIBAgIBADANBgkqhkiG9w0BAQQFADB5MRAwDgYDVQQKEwdSb290
+IENBMR4wHAYDVQQLExVodHRwOi8vd3d3LmNhY2VydC5vcmcxIjAgBgNVBAMTGUNB
+IENlcnQgU2lnbmluZyBBdXRob3JpdHkxITAfBgkqhkiG9w0BCQEWEnN1cHBvcnRA
+Y2FjZXJ0Lm9yZzAeFw0wMzAzMzAxMjI5NDlaFw0zMzAzMjkxMjI5NDlaMHkxEDAO
+BgNVBAoTB1Jvb3QgQ0ExHjAcBgNVBAsTFWh0dHA6Ly93d3cuY2FjZXJ0Lm9yZzEi
+MCAGA1UEAxMZQ0EgQ2VydCBTaWduaW5nIEF1dGhvcml0eTEhMB8GCSqGSIb3DQEJ
+ARYSc3VwcG9ydEBjYWNlcnQub3JnMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIIC
+CgKCAgEAziLA4kZ97DYoB1CW8qAzQIxL8TtmPzHlawI229Z89vGIj053NgVBlfkJ
+8BLPRoZzYLdufujAWGSuzbCtRRcMY/pnCujW0r8+55jE8Ez64AO7NV1sId6eINm6
+zWYyN3L69wj1x81YyY7nDl7qPv4coRQKFWyGhFtkZip6qUtTefWIonvuLwphK42y
+fk1WpRPs6tqSnqxEQR5YYGUFZvjARL3LlPdCfgv3ZWiYUQXw8wWRBB0bF4LsyFe7
+w2t6iPGwcswlWyCR7BYCEo8y6RcYSNDHBS4CMEK4JZwFaz+qOqfrU0j36NK2B5jc
+G8Y0f3/JHIJ6BVgrCFvzOKKrF11myZjXnhCLotLddJr3cQxyYN/Nb5gznZY0dj4k
+epKwDpUeb+agRThHqtdB7Uq3EvbXG4OKDy7YCbZZ16oE/9KTfWgu3YtLq1i6L43q
+laegw1SJpfvbi1EinbLDvhG+LJGGi5Z4rSDTii8aP8bQUWWHIbEZAWV/RRyH9XzQ
+QUxPKZgh/TMfdQwEUfoZd9vUFBzugcMd9Zi3aQaRIt0AUMyBMawSB3s42mhb5ivU
+fslfrejrckzzAeVLIL+aplfKkQABi6F1ITe1Yw1nPkZPcCBnzsXWWdsC4PDSy826
+YreQQejdIOQpvGQpQsgi3Hia/0PsmBsJUUtaWsJx8cTLc6nloQsCAwEAAaOCAc4w
+ggHKMB0GA1UdDgQWBBQWtTIb1Mfz4OaO873SsDrusjkY0TCBowYDVR0jBIGbMIGY
+gBQWtTIb1Mfz4OaO873SsDrusjkY0aF9pHsweTEQMA4GA1UEChMHUm9vdCBDQTEe
+MBwGA1UECxMVaHR0cDovL3d3dy5jYWNlcnQub3JnMSIwIAYDVQQDExlDQSBDZXJ0
+IFNpZ25pbmcgQXV0aG9yaXR5MSEwHwYJKoZIhvcNAQkBFhJzdXBwb3J0QGNhY2Vy
+dC5vcmeCAQAwDwYDVR0TAQH/BAUwAwEB/zAyBgNVHR8EKzApMCegJaAjhiFodHRw
+czovL3d3dy5jYWNlcnQub3JnL3Jldm9rZS5jcmwwMAYJYIZIAYb4QgEEBCMWIWh0
+dHBzOi8vd3d3LmNhY2VydC5vcmcvcmV2b2tlLmNybDA0BglghkgBhvhCAQgEJxYl
+aHR0cDovL3d3dy5jYWNlcnQub3JnL2luZGV4LnBocD9pZD0xMDBWBglghkgBhvhC
+AQ0ESRZHVG8gZ2V0IHlvdXIgb3duIGNlcnRpZmljYXRlIGZvciBGUkVFIGhlYWQg
+b3ZlciB0byBodHRwOi8vd3d3LmNhY2VydC5vcmcwDQYJKoZIhvcNAQEEBQADggIB
+ACjH7pyCArpcgBLKNQodgW+JapnM8mgPf6fhjViVPr3yBsOQWqy1YPaZQwGjiHCc
+nWKdpIevZ1gNMDY75q1I08t0AoZxPuIrA2jxNGJARjtT6ij0rPtmlVOKTV39O9lg
+18p5aTuxZZKmxoGCXJzN600BiqXfEVWqFcofN8CCmHBh22p8lqOOLlQ+TyGpkO/c
+gr/c6EWtTZBzCDyUZbAEmXZ/4rzCahWqlwQ3JNgelE5tDlG+1sSPypZt90Pf6DBl
+Jzt7u0NDY8RD97LsaMzhGY4i+5jhe1o+ATc7iwiwovOVThrLm82asduycPAtStvY
+sONvRUgzEv/+PDIqVPfE94rwiCPCR/5kenHA0R6mY7AHfqQv0wGP3J8rtsYIqQ+T
+SCX8Ev2fQtzzxD72V7DX3WnRBnc0CkvSyqD/HMaMyRa+xMwyN2hzXwj7UfdJUzYF
+CpUCTPJ5GhD22Dp1nPMd8aINcGeGG7MW9S/lpOt5hvk9C8JzC6WZrG/8Z7jlLwum
+GCSNe9FINSkYQKyTYOGWhlC0elnYjyELn8+CkcY7v2vcB5G5l1YjqrZslMZIBjzk
+zk6q5PYvCdxTby78dOs6Y5nCpqyJvKeyRKANihDjbPIky/qbn3BHLt4Ui9SyIAmW
+omTxJBzcoTWcFbLUvFUufQb1nA5V9FrWk9p2rSVzTMVE
+-----END CERTIFICATE-----
diff --git a/test/ocsp-tests/ISIC_ND1_Issuer_ICA.pem b/test/ocsp-tests/ISIC_ND1_Issuer_ICA.pem
new file mode 100644 (file)
index 0000000..25bb90f
--- /dev/null
@@ -0,0 +1,29 @@
+-----BEGIN CERTIFICATE-----
+MIIFBjCCA+6gAwIBAgIQEaO00OyNt3+doM1dLVEvQjANBgkqhkiG9w0BAQUFADCB
+gTELMAkGA1UEBhMCR0IxGzAZBgNVBAgTEkdyZWF0ZXIgTWFuY2hlc3RlcjEQMA4G
+A1UEBxMHU2FsZm9yZDEaMBgGA1UEChMRQ09NT0RPIENBIExpbWl0ZWQxJzAlBgNV
+BAMTHkNPTU9ETyBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw0xMDA1MjQwMDAw
+MDBaFw0yMDA1MzAxMDQ4MzhaMIGOMQswCQYDVQQGEwJHQjEbMBkGA1UECBMSR3Jl
+YXRlciBNYW5jaGVzdGVyMRAwDgYDVQQHEwdTYWxmb3JkMRowGAYDVQQKExFDT01P
+RE8gQ0EgTGltaXRlZDE0MDIGA1UEAxMrQ09NT0RPIEV4dGVuZGVkIFZhbGlkYXRp
+b24gU2VjdXJlIFNlcnZlciBDQTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC
+ggEBAMxKljPNJY1n7iiWN4dG8PYEooR/U6qW5h+xAhxu7X0h1Nc8HqLYaS+ot/Wi
+7WRYZOFEZTZJQSABjTsT4gjzDPJXOZM3txyTRIOOvy3xoQV12m7ue28b6naDKHRK
+HCvT9cQDcpOvhs4JjDx11MkKL3Lzrb0OMDyEoXMfAyUUpY/D1vS15N2GevUZumjy
+hVSiMBHK0ZLLO3QGEqA3q2rYVBHfbJoWlLm0p2XGdC0x801S6VVRn8s+oo12mHDS
+b6ZlRS8bhbtbbfnywARmE4R6nc4n2PREnr+svpnba0/bWCGwiSe0jzLWS15ykV7f
+BZ3ZSS/0tm9QH3XLgJ3m0+TR8tMCAwEAAaOCAWkwggFlMB8GA1UdIwQYMBaAFAtY
+5YvGTBU3pECpMKkhvkc2Wlb/MB0GA1UdDgQWBBSIRFH/UCppXi2I9CG62Qzyzsvq
+fDAOBgNVHQ8BAf8EBAMCAQYwEgYDVR0TAQH/BAgwBgEB/wIBADA+BgNVHSAENzA1
+MDMGBFUdIAAwKzApBggrBgEFBQcCARYdaHR0cHM6Ly9zZWN1cmUuY29tb2RvLmNv
+bS9DUFMwSQYDVR0fBEIwQDA+oDygOoY4aHR0cDovL2NybC5jb21vZG9jYS5jb20v
+Q09NT0RPQ2VydGlmaWNhdGlvbkF1dGhvcml0eS5jcmwwdAYIKwYBBQUHAQEEaDBm
+MD4GCCsGAQUFBzAChjJodHRwOi8vY3J0LmNvbW9kb2NhLmNvbS9DT01PRE9BZGRU
+cnVzdFNlcnZlckNBLmNydDAkBggrBgEFBQcwAYYYaHR0cDovL29jc3AuY29tb2Rv
+Y2EuY29tMA0GCSqGSIb3DQEBBQUAA4IBAQCaQ7+vpHJezX1vf/T8PYy7cOYe3QT9
+P9ydn7+JdpvyhjH8f7PtKpFTLOKqsOPILHH3FYojHPFpLoH7sbxiC6saVBzZIl40
+TKX2Iw9dej3bQ81pfhc3Us1TocIR1FN4J2TViUFNFlW7kMvw2OTd3dMJZEgo/zIj
+hC+Me1UvzymINzR4DzOq/7fylqSbRIC1vmxWVKukgZ4lGChUOn8sY89ZIIwYazgs
+tN3t40DeDDYlV5rA0WCeXgNol64aO+pF11GZSe5EWVYLXrGPaOqKnsrSyaADfnAl
+9DLJTlCDh6I0SD1PNXf82Ijq9n0ezkO21cJqfjhmY03n7jLvDyToKmf7
+-----END CERTIFICATE-----
diff --git a/test/ocsp-tests/ISIC_ND2_Issuer_Root.pem b/test/ocsp-tests/ISIC_ND2_Issuer_Root.pem
new file mode 100644 (file)
index 0000000..129eb4b
--- /dev/null
@@ -0,0 +1,23 @@
+-----BEGIN CERTIFICATE-----
+MIID0DCCArigAwIBAgIQIKTEf93f4cdTYwcTiHdgEjANBgkqhkiG9w0BAQUFADCB
+gTELMAkGA1UEBhMCR0IxGzAZBgNVBAgTEkdyZWF0ZXIgTWFuY2hlc3RlcjEQMA4G
+A1UEBxMHU2FsZm9yZDEaMBgGA1UEChMRQ09NT0RPIENBIExpbWl0ZWQxJzAlBgNV
+BAMTHkNPTU9ETyBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw0xMTAxMDEwMDAw
+MDBaFw0zMDEyMzEyMzU5NTlaMIGBMQswCQYDVQQGEwJHQjEbMBkGA1UECBMSR3Jl
+YXRlciBNYW5jaGVzdGVyMRAwDgYDVQQHEwdTYWxmb3JkMRowGAYDVQQKExFDT01P
+RE8gQ0EgTGltaXRlZDEnMCUGA1UEAxMeQ09NT0RPIENlcnRpZmljYXRpb24gQXV0
+aG9yaXR5MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA0ECLi3LjkRv3
+UcEbVASY06m/weaKXTuH+7uIzg3jLz8GlvCiKVCZrts7oVewdFFxze1CkU1B/qnI
+2GqGd0S7WWaXUF601CxwRM/aN5VCaTwwxHGzUvAhTaHYujl8HJ6jJJ3ygxaYqhZ8
+Q5sVW7euNJH+1GImGEaaP+vB+fGQV+useg2L23IwambV4EajcNxo2f8ESIl33rXp
++2dtQem8Ob0y2WIC8bGoPW43nOIv4tOiJovGuFVDiOEjPqXSJDlqR6sA1KGzqSX+
+DT+nHbrTUcELpNqsOO9VUCQFZUaTNE8tja3G1CEZ0o7KBWFxB3NH5YoZEr0ETc5O
+nKVIrLsm9wIDAQABo0IwQDAdBgNVHQ4EFgQUC1jli8ZMFTekQKkwqSG+RzZaVv8w
+DgYDVR0PAQH/BAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQEFBQAD
+ggEBAC/JxBwHO89hAgCx2SFRdXIDMLDEFh9sAIsQrK/xR9SuEDwMGvjUk2ysEDd8
+t6aDZK3N3w6HM503sMZ7OHKx8xoOo/lVem0DZgMXlUrxsXrfViEGQo+x06iF3u6X
+HWLrp+cxEmbDD6ZLLkGC9/3JG6gbr+48zuOcrigHoSybJMIPIyaDMouGDx8rEkYl
+Fo92kANr3ryqImhrjKGsKxE5pttwwn1y6TPn/CbxdFqR5p2ErPioBhlG5qfpqjQi
+pKGfeq23sqSaM4hxAjwu1nqyH6LKwN0vEJT9s4yEIHlG1QXUEOTS22RPuFvuG8Ug
+R1uUq27UlTMdphVx8fiUylQ5PsI=
+-----END CERTIFICATE-----
diff --git a/test/ocsp-tests/ISIC_ND3_Issuer_Root.pem b/test/ocsp-tests/ISIC_ND3_Issuer_Root.pem
new file mode 100644 (file)
index 0000000..4904a32
--- /dev/null
@@ -0,0 +1,25 @@
+-----BEGIN CERTIFICATE-----
+MIIENjCCAx6gAwIBAgIBATANBgkqhkiG9w0BAQUFADBvMQswCQYDVQQGEwJTRTEU
+MBIGA1UEChMLQWRkVHJ1c3QgQUIxJjAkBgNVBAsTHUFkZFRydXN0IEV4dGVybmFs
+IFRUUCBOZXR3b3JrMSIwIAYDVQQDExlBZGRUcnVzdCBFeHRlcm5hbCBDQSBSb290
+MB4XDTAwMDUzMDEwNDgzOFoXDTIwMDUzMDEwNDgzOFowbzELMAkGA1UEBhMCU0Ux
+FDASBgNVBAoTC0FkZFRydXN0IEFCMSYwJAYDVQQLEx1BZGRUcnVzdCBFeHRlcm5h
+bCBUVFAgTmV0d29yazEiMCAGA1UEAxMZQWRkVHJ1c3QgRXh0ZXJuYWwgQ0EgUm9v
+dDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBALf3GjPm8gAELTngTlvt
+H7xsD821+iO2zt6bETOXpClMfZOfvUq8k+0DGuOPz+VtUFrWlymUWoCwSXrbLpX9
+uMq/NzgtHj6RQa1wVsfwTz/oMp50ysiQVOnGXw94nZpAPA6sYapeFI+eh6FqUNzX
+mk6vBbOmcZSccbNQYArHE504B4YCqOmoaSYYkKtMsE8jqzpPhNjfzp/haW+710LX
+a0Tkx63ubUFfclpxCDezeWWkWaCUN/cALw3CknLa0Dhy2xSoRcRdKn23tNbE7qzN
+E0S3ySvdQwAl+mG5aWpYIxG3pzOPVnVZ9c0p10a3CitlttNCbxWyuHv77+ldU9U0
+WicCAwEAAaOB3DCB2TAdBgNVHQ4EFgQUrb2YejS0Jvf6xCZU7wO94CTLVBowCwYD
+VR0PBAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wgZkGA1UdIwSBkTCBjoAUrb2YejS0
+Jvf6xCZU7wO94CTLVBqhc6RxMG8xCzAJBgNVBAYTAlNFMRQwEgYDVQQKEwtBZGRU
+cnVzdCBBQjEmMCQGA1UECxMdQWRkVHJ1c3QgRXh0ZXJuYWwgVFRQIE5ldHdvcmsx
+IjAgBgNVBAMTGUFkZFRydXN0IEV4dGVybmFsIENBIFJvb3SCAQEwDQYJKoZIhvcN
+AQEFBQADggEBALCb4IUlwtYj4g+WBpKdQZic2YR5gdkeWxQHIzZlj7DYd7usQWxH
+YINRsPkyPef89iYTx4AWpb9a/IfPeHmJIZriTAcKhjW88t5RxNKWt9x+Tu5w/Rw5
+6wwCURQtjr0W4MHfRnXnJK3s9EK0hZNwEGe6nQY1ShjTK3rMUUKhemPR5ruhxSvC
+Nr4TDea9Y355e6cJDUCrat2PisP29owaQgVR1EX1n6diIWgVIEM8med8vSTYqZEX
+c4g/VhsxOBi0cQ+azcgOno4uG+GMmIPLHzHxREzGBHNJdmAPx/i9F4BrLunMTA5a
+mnkPIAou1Z5jJh5VkpTYghdae9C8x49OhgU=
+-----END CERTIFICATE-----
diff --git a/test/ocsp-tests/ISOP_D1.ors b/test/ocsp-tests/ISOP_D1.ors
new file mode 100644 (file)
index 0000000..d28c9ec
--- /dev/null
@@ -0,0 +1,32 @@
+MIIFzwoBAKCCBcgwggXEBgkrBgEFBQcwAQEEggW1MIIFsTCBoKIWBBSpTXftIZX0
+lLT9zwVSQC5Jfp3pqhgPMjAxMjEwMTAxMTU1NDVaMHUwczBLMAkGBSsOAwIaBQAE
+FKByDqBqfGICVPKo9Z3Se6Tzty+kBBSwsEr9HHUo+BxhqhP2+sGQPWsWowISESG8
+vx4IzALnkqQG05AvM+2bgAAYDzIwMTIxMDEwMTAwMDAwWqARGA8yMDEyMTAxNzEw
+MDAwMFowCwYJKoZIhvcNAQEFA4IBAQCaiUf6TuPaSmZR2i3hUwqdEfhjcZkcCXPu
+9diWuDZbaL6ubthfeTwx6OsZ0eM3Q+WPhBNlYQ9Sm8PDUQsQiq3YvuYu+QUisChx
+PN6BUEwFQZAGz+FX2h5+kAmK1M/xZeXMBCXJWJCClagiw5hOJfeV0ue7RUZRVuZv
+am0ZjyIeLsxsIrxghlcaJRosFmYNoM++euu5lvclutv1UQ5yyNxlYy0T/jA9gS07
+WJ/i38+zxnXTuAPOm67p5N1IkEAEg/7OPRIG17Ig1C38NctN74vAOdTU1d/ay05V
+Bz4ZiI9PffkUkPgW2QRQCEjv50i80wYkKH5pIbT/mTk4t53DUK1UoIID+DCCA/Qw
+ggPwMIIC2KADAgECAhIRIcYjwu4UNkR1VGrDbSdFei8wDQYJKoZIhvcNAQEFBQAw
+WTELMAkGA1UEBhMCQkUxGTAXBgNVBAoTEEdsb2JhbFNpZ24gbnYtc2ExLzAtBgNV
+BAMTJkdsb2JhbFNpZ24gRXh0ZW5kZWQgVmFsaWRhdGlvbiBDQSAtIEcyMB4XDTEy
+MDkxOTA3NDAzMVoXDTEyMTIxOTA4NDAzMVowgYUxCzAJBgNVBAYTAkJFMRkwFwYD
+VQQKExBHbG9iYWxTaWduIG52LXNhMUIwQAYDVQQDEzlHbG9iYWxTaWduIEV4dGVu
+ZGVkIFZhbGlkYXRpb24gQ0EgLSBHMiBPQ1NQIHJlc3BvbmRlciAtIDExFzAVBgNV
+BAUTDjIwMTIwOTE5MDkzOTAwMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKC
+AQEAxkkb6QhDH3sEDj4zaysjVzYelq9lZ1cso4R2IyQxaoPaG6GkaCmHA4sz6KP+
+m3ADqplibEUBa/mzCxHW8/oy3NhGMFdbezduZrnRFLbzakOTeIo8VEIM3JPfgREv
+CX8nj6Xu7ERD6JO/ZQ9Xr7YVzKKN+3cVZlcMHoGBnOPcO2Sz0AcYyk5m5IsGBRoT
+T86j6Cr9PhOPTVwXL6Wxy1KVHsUZXUwnRacV0O4SHWQ4zM9Sablus9fTbh1CgIqW
+sKDyzVB4yECXkBVeUlA+cuCaRRVHRiR+jPDSgbU62nnNudEpGG7dyoop6IOvXv2O
+ydncWzaukxIVvQ/Ij85kHqs7HQIDAQABo4GEMIGBMAkGA1UdEwQCMAAwDgYDVR0P
+AQH/BAQDAgeAMBMGA1UdJQQMMAoGCCsGAQUFBwMJMA8GCSsGAQUFBzABBQQCBQAw
+HQYDVR0OBBYEFKlNd+0hlfSUtP3PBVJALkl+nemqMB8GA1UdIwQYMBaAFLCwSv0c
+dSj4HGGqE/b6wZA9axajMA0GCSqGSIb3DQEBBQUAA4IBAQCe4rZg61Dmwygl/Uae
+BJZog64/FvuB1sfCqKLJTjKOfLcugSTX1TT7bLJbzXRGPQuorI3TIZEOwldIw01d
+DTLlsOCHrfHd+bpxgijxPkUuaA4NYnpvqTEMJqPKOC8QYfKupNjAPSuHvwqvqCfO
+RCe3jY6xQDO0WCTZ8/xMsOkw+J/YEYqALETf2Ug7k5eRL/TvfLd8Sgi7vPfmUeiW
+ptlsbhMOWQoQc+JA3vCI01rrjNq+0kIZ/r8nPGvablRr0Aakk6eDuS2dcReaPwuK
+0xE136pJYiXdQ3SA7uwmlorjxmejavyoPCr23TU74DQEt6hhc6uIcabsa4Y8KvJy
+RI4G
diff --git a/test/ocsp-tests/ISOP_D2.ors b/test/ocsp-tests/ISOP_D2.ors
new file mode 100644 (file)
index 0000000..b6c541d
--- /dev/null
@@ -0,0 +1,32 @@
+MIIF4AoBAKCCBdkwggXVBgkrBgEFBQcwAQEEggXGMIIFwjCBmaIWBBT0zghPr/K8
+jV5hpjGMML9Q+DwzShgPMjAxMjEwMTAxMjA5NTlaMG4wbDBEMAkGBSsOAwIaBQAE
+FLdXtbacB/gWIxOOkMkqDr4yAaoxBBRge2YaRQ2XyolQL30EzTSo//z9SwILBAAA
+AAABL07hRxCAABgPMjAxMjEwMDEwNjAwMDBaoBEYDzIwMTMwNDE1MDYwMDAwWjAL
+BgkqhkiG9w0BAQUDggEBAGZY28eFWl169g7puLnKSeEzi6Ma5/rErOveFRp052ck
+785B83HWkNmW/Bgw7Ws6Y7jBJce6ZQ5TMhwgNP34HuG/mVyn2ZjtCe4KKFBVnZV7
+mHGx93jgKkQvdp4pbNKxZ504eZDp8UOlR9+uwWOWHVObn7o+2N8iWKErSbZ2uX54
+Ajk8Hg/XN5wI4RUtcK3QpZSf3Ren5iit4NInwCpmTOkDz/IVK96BWaEQICq4VlHG
+ziD0H0SlBQCdcSPzZndGoCtIhNyJEL3O2y3Grg4X1XH7VeeyGesuTLEIAEMHJPJD
+TOVNoe5YPRK9Tqb+6jsubw8X/1b72kw3xVgb6MfC0tqgggQQMIIEDDCCBAgwggLw
+oAMCAQICCwQAAAAAAThXoveHMA0GCSqGSIb3DQEBBQUAMFcxCzAJBgNVBAYTAkJF
+MRkwFwYDVQQKExBHbG9iYWxTaWduIG52LXNhMRAwDgYDVQQLEwdSb290IENBMRsw
+GQYDVQQDExJHbG9iYWxTaWduIFJvb3QgQ0EwHhcNMTIwNzA1MTgwMDAwWhcNMTMw
+NzA1MTgwMDAwWjBZMQswCQYDVQQGEwJCRTEZMBcGA1UEChMQR2xvYmFsU2lnbiBu
+di1zYTEvMC0GA1UEAxMmR2xvYmFsU2lnbiBPQ1NQIGZvciBSb290IFIxIC0gQnJh
+bmNoIDIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDMQY/h5DSRT24n
+mMtD19lrn8WZzOoIl+Z9qOsrLLjEQeTMDlL7JPZh5pLaHHb6kSWT+O/RcEwpw6Dq
+H9jtAgDOsGoN7gCK7wJbIvn4MdmkXZqVBcVl3uLuII3v1CPnlc/zoz5d9qXcZKb6
+YuzseyzhDPecQ+7l2NVAUOFUj8GXOZi//bIveMsm+/zSLMfriIC84Uym2QY649SC
+aFNbtF/tR6upvLCLe0b2D1g+OBfGqZasi3QI5uX6lT0gHbCnPhRo3uxG2+S4KL3M
+9sndMByrR5K6QuVf7UqA1vt0CfbA2OUXwcH5x3/TsHxtXDj2F/fWnC9QBBSN5n4I
+G8K7ZpYtAgMBAAGjgdIwgc8wDgYDVR0PAQH/BAQDAgeAMB0GA1UdDgQWBBT0zghP
+r/K8jV5hpjGMML9Q+DwzSjBMBgNVHSAERTBDMEEGCSsGAQQBoDIBXzA0MDIGCCsG
+AQUFBwIBFiZodHRwczovL3d3dy5nbG9iYWxzaWduLmNvbS9yZXBvc2l0b3J5LzAJ
+BgNVHRMEAjAAMBMGA1UdJQQMMAoGCCsGAQUFBwMJMB8GA1UdIwQYMBaAFGB7ZhpF
+DZfKiVAvfQTNNKj//P1LMA8GCSsGAQUFBzABBQQCBQAwDQYJKoZIhvcNAQEFBQAD
+ggEBAGU9HIQImzhTHkQLyA178dUdnF5E3DdzmNtwVV3cxGrFOLMpciMQLioQ/xp5
+t6j5Mshlp59imFylqowRRxRy4aN5TtMCufNh7yHIxI2Dt4O6qpPM946t5CJkMy+k
+63pXz2xFIxaJDzAmzpWzu70OY0jrh3dZa8NR4AvhtoZ8zFE6suva6ZGK7JIoINaA
+j5uyZ0qU+7vFwV1awdReNV6494z/HRjs1n956mNbalB9mKp9XXyfZlix/nN5mTJd
+NlJqz7QjnCzZRM/Gfamzk8L3/CPS3XmSblFyn6SeZ92Vms4PNqZiEUNa2TMKXQR1
+EMiDRMkyfIIMI80VgRvvzCiOt0c=
diff --git a/test/ocsp-tests/ISOP_D3.ors b/test/ocsp-tests/ISOP_D3.ors
new file mode 100644 (file)
index 0000000..a8a2f91
--- /dev/null
@@ -0,0 +1,38 @@
+MIIG8AoBAKCCBukwggblBgkrBgEFBQcwAQEEggbWMIIG0jCB+aF+MHwxCzAJBgNV
+BAYTAkFVMQwwCgYDVQQIEwNOU1cxDzANBgNVBAcTBlN5ZG5leTEUMBIGA1UEChML
+Q0FjZXJ0IEluYy4xHjAcBgNVBAsTFVNlcnZlciBBZG1pbmlzdHJhdGlvbjEYMBYG
+A1UEAxMPb2NzcC5jYWNlcnQub3JnGA8yMDEyMTAxMDEzMjE1OVowZjBkMDwwCQYF
+Kw4DAhoFAAQUi6TJyxcpGUU+u45zCZG5JfKDImUEFBa1MhvUx/Pg5o7zvdKwOu6y
+ORjRAgMLs8aAABgPMjAxMjEwMTAxMzA1MjBaoBEYDzIwMTIxMDEyMTMyMTU5WjAN
+BgkqhkiG9w0BAQUFAAOCAQEAH1auyXFf1fOdfShSnAFkg5JsRUvajrilUioTkPIn
+IGYV//huaPNZwZGCC2haZIdUuKB6G2OCXeZVskBTXPjt8/6JmoHgsZeI3x5xKXxZ
+vddLC0PgYp0cA3FqjXR2UCpdBF+GK37rnfZsdW2vD9JaEBXxTV4+ICDAg15ZphJW
+lLGmdP3mQqPURIwamcYam8tntARimgEpA0KgfVue2A+izjcxC7qk9BQYG72Fh3hC
+ZFxi5u6xKNUQ2EBF9KXZyP9d2i/bYCZAUeUSRtir+fsOXHlihYRih9npKyAPwpHd
+NqhwK9NhKed8gmkX3cSaK0arBx7ev7avhM4Dqem+BzppjKCCBL4wggS6MIIEtjCC
+Ap6gAwIBAgIDCpvzMA0GCSqGSIb3DQEBBQUAMHkxEDAOBgNVBAoTB1Jvb3QgQ0Ex
+HjAcBgNVBAsTFWh0dHA6Ly93d3cuY2FjZXJ0Lm9yZzEiMCAGA1UEAxMZQ0EgQ2Vy
+dCBTaWduaW5nIEF1dGhvcml0eTEhMB8GCSqGSIb3DQEJARYSc3VwcG9ydEBjYWNl
+cnQub3JnMB4XDTExMDgyMzAwMDI1NloXDTEzMDgyMjAwMDI1NlowfDELMAkGA1UE
+BhMCQVUxDDAKBgNVBAgTA05TVzEPMA0GA1UEBxMGU3lkbmV5MRQwEgYDVQQKEwtD
+QWNlcnQgSW5jLjEeMBwGA1UECxMVU2VydmVyIEFkbWluaXN0cmF0aW9uMRgwFgYD
+VQQDEw9vY3NwLmNhY2VydC5vcmcwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEK
+AoIBAQCcxtRv5CPHw3BLdR/k/K72YsRgodbP+UdAONmvBvWzhwm6B8h6O+M64sFr
+2w6be7SYBECIyOQgNJ1flK4MoAWhdBA/H5NtxaDOKbAqA27tO9GaevcPp7c518O0
+3hVnlPLvsN1f48nY0jQOXUTfv5nYXmD0OSSK/V3IRo0KsWB6T9UnMGCeEwb4Oqqz
+uzM0b4SBflzMEony/m6Tg/qL7qs2TLZAqe77+BZaVdFkDUnaBN7RyMruXySxeXiz
+mogT3WhROeloMa/X+E01bWBYBEK7VZIY9pgBpXQ7vDbbIGgYuIXUi20wh03WMy16
+VDYdV0IUXHpidNUeK9W/BPP/7APBAgMBAAGjRDBCMAwGA1UdEwEB/wQCMAAwJwYD
+VR0lBCAwHgYIKwYBBQUHAwIGCCsGAQUFBwMBBggrBgEFBQcDCTAJBgNVHREEAjAA
+MA0GCSqGSIb3DQEBBQUAA4ICAQAoT6p5f3cGprAcgrnzdenfTmDe9LCW7k2VnazA
+MAzpsD6gXcSlo4+3hoHem/SpKRH2tqi34DmImCiv/S6fxsKM4Gfn5rlkAFviuTvS
+r5Zrwh4ZKSfaoWv4bmbzmcAxvuxdMWHf/5PbjegjzFTbBMekVPZY/abYtD6kdHQZ
+VNgzwZVfTBfYhfa+Rg72I2zjKpMsjxMqWfTmUzW6wfK6LFudZqu0U1NnJw+IlnVU
+6WtjL885ebQrmcRqWz3nMhVLIu5L3w/s+VTLvm7If6jcMDNUjz8s2BPcJeCXg3TE
+STsyl6tvk17RRz2+9JskxVOk11xIn96xR4FCERIid2ek9z1xi7oYOajQF50i/9Gj
+ReDEfRSyb4/LzoKDOY+h4Q6jryeHh7WIHFiK5qrBN2y8qOoRJ/OqQnqci/BJBNpe
+g9Q9PJRgGSzRndTXNHiYRbeLpq7eGo3sPqlR9qBQ3rd98XGOU0RCMnzjKhENC3qo
+5PkSF2xs8RmjWktFSTDwjYo0qf1teo7CGHjgaPjQ7JE8Q4ysFOQndSWmLpqwDcI9
+HfIvPwUIWArQrJRh9LCNSyvHVgLqY9kw8NW4TlMxV2WqaYCkiKi3XVRrSFR3ahS1
+VBvRZ8KpplrV7rhXjVSSqqfLk1sX3l72Ck2F9ON+qbNFmvhgNjSiBY9neMgo804a
+wG/pag==
diff --git a/test/ocsp-tests/ISOP_ND1.ors b/test/ocsp-tests/ISOP_ND1.ors
new file mode 100644 (file)
index 0000000..b230622
--- /dev/null
@@ -0,0 +1,10 @@
+MIIB0woBAKCCAcwwggHIBgkrBgEFBQcwAQEEggG5MIIBtTCBnqIWBBSIRFH/UCpp
+Xi2I9CG62QzyzsvqfBgPMjAxMjEwMTAwODU0NDVaMHMwcTBJMAkGBSsOAwIaBQAE
+FEi2DTgjjfhFbk7lhD6jlBEYApefBBSIRFH/UCppXi2I9CG62QzyzsvqfAIQIuEz
+IiCgSN8psr+aMcKbB4AAGA8yMDEyMTAxMDA4NTQ0NVqgERgPMjAxMjEwMTQwODU0
+NDVaMA0GCSqGSIb3DQEBBQUAA4IBAQDHKDxWTbAHRXY7HapfhE99T+OSa/AfRYqX
+H9yIeMRa5VftXMyvBFuvVm/qLRwK6mxhkiVIvF/Pk5yxMjbm7xPO26D+WHOdQML4
++M4OX9BO76FjZRin5x+4b0Xo5SuSU1ulqfvSZnx+nG+hMbt/3Y7ODCEUWCYFoXNp
+U+TXTbv2mwJ9AL8Q/zjL4P8NJHzFJBKjEs+AAVRxTY/5RHHKU9dcm7ux/gsWoDUM
+w677Xxzn6icd8mqn72/HmzPnMrLHKKJFe2escbJn7JlV6qbZ9EWbrr+3OH0IJy5I
+E3LcPIsNZ//QEc6vS6J+j8ljV8Xne6rS1EmiOwV9NgubvYwDCm4R
diff --git a/test/ocsp-tests/ISOP_ND2.ors b/test/ocsp-tests/ISOP_ND2.ors
new file mode 100644 (file)
index 0000000..d14efb9
--- /dev/null
@@ -0,0 +1,10 @@
+MIIB0woBAKCCAcwwggHIBgkrBgEFBQcwAQEEggG5MIIBtTCBnqIWBBQLWOWLxkwV
+N6RAqTCpIb5HNlpW/xgPMjAxMjEwMTAwMDI1NTdaMHMwcTBJMAkGBSsOAwIaBQAE
+FOy+ZAvtiWulchtVZmfKU1ZI9ewTBBQLWOWLxkwVN6RAqTCpIb5HNlpW/wIQEaO0
+0OyNt3+doM1dLVEvQoAAGA8yMDEyMTAxMDAwMjU1N1qgERgPMjAxMjEwMTQwMDI1
+NTdaMA0GCSqGSIb3DQEBBQUAA4IBAQCJRXcrz4wJe7bqWBHULu/QDXVz74OhSNlu
+swI0J4h+UmzJuW1GpdhTwJcTG3ARVwCLKz3evvpvHSumcsop0G3NolryNLP/oGD0
+Vf6PbLrJ8v+NxUNugPbtWM985Ti/B2a+XjbzYlH2vS3KOTL4X1zWSL07IQFNXc2h
+yHBscKpYgt0mZcFZFxN3NTCNpT6IjJzZzTG9xTYZ3hZdMQQ3DYO+/Hv4J+U1/Ybq
+CjuMWRak/0R/BiBDJdGhbThlvV7bNUxYY7DVaOiLER8ptpmhnzlB/vsTAxZqX48J
+mJdv2bxoTby98Pm/BMydEA9qcFqyP1XvqhzIY35ngoS/1XREyW7t
diff --git a/test/ocsp-tests/ISOP_ND3.ors b/test/ocsp-tests/ISOP_ND3.ors
new file mode 100644 (file)
index 0000000..3ee7158
--- /dev/null
@@ -0,0 +1,10 @@
+MIIB1AoBAKCCAc0wggHJBgkrBgEFBQcwAQEEggG6MIIBtjCBn6IWBBStvZh6NLQm
+9/rEJlTvA73gJMtUGhgPMjAxMjEwMDkxNjAxNTNaMHQwcjBKMAkGBSsOAwIaBQAE
+FHyxZlScq9tE7mImFq30ZXv3etWUBBStvZh6NLQm9/rEJlTvA73gJMtUGgIRAKcN
+bJWejX5BTb8DmevkCauAABgPMjAxMjEwMDkxNjAxNTNaoBEYDzIwMTIxMDEzMTYw
+MTUzWjANBgkqhkiG9w0BAQUFAAOCAQEAFnJAzuT8P4KKyTI6sdj5HkQ352qEu5CN
+K9M2kU/eg9kPfwLv8z3yArobwgx+/IDRajbVAKrk8UPCGUqkDc0OiU5c0+jpn+nT
+20VVCtWsBSWDfzKqYln/NGrblhv+/iuFZJpyfud5nWguW5nogPC8IAfgt9FMDMl6
+wlQWLSWEkgAJWvhNR3nzgvyMnuDuMIVQgB9/+vAIxA7nlpEEh6KTswyGqE9+u1yC
+kvrz4PwKZQMT6r1eRCLs6NaagOZT84QHhZ6TAA+QHjfK406KL8F9mFgbGKbW+st2
+QHm+giUhrgZMv+1Yaxe34BjDS439LCPjdZ29On8FeZr3F55T+s3VzA==
diff --git a/test/ocsp-tests/ND1.ors b/test/ocsp-tests/ND1.ors
new file mode 100644 (file)
index 0000000..7452741
--- /dev/null
@@ -0,0 +1,10 @@
+MIIB0woBAKCCAcwwggHIBgkrBgEFBQcwAQEEggG5MIIBtTCBnqIWBBSIRFH/UCpp
+Xi2I9CG62QzyzsvqfBgPMjAxMjEwMTEwODQxMTNaMHMwcTBJMAkGBSsOAwIaBQAE
+FEi2DTgjjfhFbk7lhD6jlBEYApefBBSIRFH/UCppXi2I9CG62QzyzsvqfAIQIuEz
+IiCgSN8psr+aMcKbB4AAGA8yMDEyMTAxMTA4NDExM1qgERgPMjAxMjEwMTUwODQx
+MTNaMA0GCSqGSIb3DQEBBQUAA4IBAQCNnhlBMxxh9z5AKfzAxiKs90CfxUsqfYfk
+8XlyF9VIfWRfEwzS6MF1pEzLnghRxTAmjrFgK+sxD9wk+S5Mdgw3nbED9DVFH2Hs
+RGKm/t9wkvrYOX6yRQqw6uRvU/5cibMjcyzKB/VQMwk4p4FwSUgBv88A5sTkKr2V
+eYdEm34hg2TZVkipPMBiyTyBLXs8D/9oALtnczg4xlTRSjDUvqoXL5haqY4QK2Pv
+mNwna6ACkwLmSuMe29UQ8IX2PUB4R5Etni5czyiKGxZLm+4NAhuEwWFNEzCyImPc
+087gHGU1zx+qVSlajqMJ/9ZXYjbt7WiWdhOTGEv4VMn8dHhRUs32
diff --git a/test/ocsp-tests/ND1_Cert_EE.pem b/test/ocsp-tests/ND1_Cert_EE.pem
new file mode 100644 (file)
index 0000000..e646162
--- /dev/null
@@ -0,0 +1,36 @@
+-----BEGIN CERTIFICATE-----
+MIIGTTCCBTWgAwIBAgIQIuEzIiCgSN8psr+aMcKbBzANBgkqhkiG9w0BAQUFADCB
+jjELMAkGA1UEBhMCR0IxGzAZBgNVBAgTEkdyZWF0ZXIgTWFuY2hlc3RlcjEQMA4G
+A1UEBxMHU2FsZm9yZDEaMBgGA1UEChMRQ09NT0RPIENBIExpbWl0ZWQxNDAyBgNV
+BAMTK0NPTU9ETyBFeHRlbmRlZCBWYWxpZGF0aW9uIFNlY3VyZSBTZXJ2ZXIgQ0Ew
+HhcNMTEwMzMxMDAwMDAwWhcNMTMwNjI3MjM1OTU5WjCCAT8xETAPBgNVBAUTCDA0
+MDU4NjkwMRMwEQYLKwYBBAGCNzwCAQMTAkdCMR0wGwYDVQQPExRQcml2YXRlIE9y
+Z2FuaXphdGlvbjELMAkGA1UEBhMCR0IxDzANBgNVBBETBk01IDNFUTEbMBkGA1UE
+CBMSR3JlYXRlciBNYW5jaGVzdGVyMRAwDgYDVQQHEwdTYWxmb3JkMRYwFAYDVQQJ
+Ew1UcmFmZm9yZCBSb2FkMRYwFAYDVQQJEw1FeGNoYW5nZSBRdWF5MSUwIwYDVQQJ
+ExwzcmQgRmxvb3IsIDI2IE9mZmljZSBWaWxsYWdlMRowGAYDVQQKExFDT01PRE8g
+Q0EgTGltaXRlZDEaMBgGA1UECxMRQ29tb2RvIEVWIFNHQyBTU0wxGjAYBgNVBAMT
+EXNlY3VyZS5jb21vZG8uY29tMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKC
+AQEA168izw0zK6cChTGFuAwNARwTu1Ky/z+dXHkSmB0tQrAk3bq7mnUPtmQ+td8r
+G2hlhQPd+YXQVYEW3RuopydmdB9wMlEGCCfU2ZqohsC9uut+HenCVbYvn4sSB0KJ
+VdOXLPCEnfdk/FmcNWcYv73HmoJXZjT0THNQmnfpo6mMGAOerenMgNuCpq1buZ8c
+fFUeUY18ZGLZKZyRNM6GPgVA37Dm8Ru+9Cf8/rm7NSIoVWH4BDztM3Y1BZvZ0d4G
+49jRA4MXbhsDEMYzaSCDmaRHSFhCtrGkN2S4A1ZxoSoxQVCLcnnInVd+J0X8J6pa
+Efio/aD6UQBQq29HyTsWVe6BewIDAQABo4IB8TCCAe0wHwYDVR0jBBgwFoAUiERR
+/1AqaV4tiPQhutkM8s7L6nwwHQYDVR0OBBYEFKvAXKp4bYRmxU4SlM8k8FbWiXiL
+MA4GA1UdDwEB/wQEAwIFoDAMBgNVHRMBAf8EAjAAMDQGA1UdJQQtMCsGCCsGAQUF
+BwMBBggrBgEFBQcDAgYKKwYBBAGCNwoDAwYJYIZIAYb4QgQBMEYGA1UdIAQ/MD0w
+OwYMKwYBBAGyMQECAQUBMCswKQYIKwYBBQUHAgEWHWh0dHBzOi8vc2VjdXJlLmNv
+bW9kby5jb20vQ1BTMFMGA1UdHwRMMEowSKBGoESGQmh0dHA6Ly9jcmwuY29tb2Rv
+Y2EuY29tL0NPTU9ET0V4dGVuZGVkVmFsaWRhdGlvblNlY3VyZVNlcnZlckNBLmNy
+bDCBhAYIKwYBBQUHAQEEeDB2ME4GCCsGAQUFBzAChkJodHRwOi8vY3J0LmNvbW9k
+b2NhLmNvbS9DT01PRE9FeHRlbmRlZFZhbGlkYXRpb25TZWN1cmVTZXJ2ZXJDQS5j
+cnQwJAYIKwYBBQUHMAGGGGh0dHA6Ly9vY3NwLmNvbW9kb2NhLmNvbTAzBgNVHREE
+LDAqghFzZWN1cmUuY29tb2RvLmNvbYIVd3d3LnNlY3VyZS5jb21vZG8uY29tMA0G
+CSqGSIb3DQEBBQUAA4IBAQC9SoVG+B40khDWAzlz+G0WDBM3OuqK5n8vY/XxdPS5
+qyv6K05S4VRGR/6PQa1UVzMbnhfLh54OWrpnalRGabpTmKDu8Pa912pzDSzMxg4U
+Rff4/hVLd1n/58q+riLxdtkIigLUjtFfwUrE1H89QODOCb4nw7f9BQaDoug+ovM3
+KO9rxVZ/3TshaxW0mPVM/cMbX+6RrQ7+d1y5fdX/fksCZhOW+P25+FPlaorQEWNa
+s0UZNQ6qVuxB7CPmnLqmLBfAKTbeKcQFxx//0eyyZqCkzIvYUNjeRR0Q7DnxXq4C
+Pj1Y6VcPJDmZOeogte5/vNIdU8Wq55IJJ1G/uKXztwVT
+-----END CERTIFICATE-----
diff --git a/test/ocsp-tests/ND1_Issuer_ICA.pem b/test/ocsp-tests/ND1_Issuer_ICA.pem
new file mode 100644 (file)
index 0000000..3260db3
--- /dev/null
@@ -0,0 +1,29 @@
+-----BEGIN CERTIFICATE-----
+MIIFBjCCA+6gAwIBAgIQEaO00OyNt3+doM1dLVEvQjANBgkqhkiG9w0BAQUFADCB
+gTELMAkGA1UEBhMCR0IxGzAZBgNVBAgTEkdyZWF0ZXIgTWFuY2hlc3RlcjEQMA4G
+A1UEBxMHU2FsZm9yZDEaMBgGA1UEChMRQ09NT0RPIENBIExpbWl0ZWQxJzAlBgNV
+BAMTHkNPTU9ETyBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw0xMDA1MjQwMDAw
+MDBaFw0yMDA1MzAxMDQ4MzhaMIGOMQswCQYDVQQGEwJHQjEbMBkGA1UECBMSR3Jl
+YXRlciBNYW5jaGVzdGVyMRAwDgYDVQQHEwdTYWxmb3JkMRowGAYDVQQKExFDT01P
+RE8gQ0EgTGltaXRlZDE0MDIGA1UEAxMrQ09NT0RPIEV4dGVuZGVkIFZhbGlkYXRp
+b24gU2VjdXJlIFNlcnZlciBDQTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC
+ggEBAMxKljPNJY1n7iiWN4dG8PYEooR/U6qW5h+xAhxu7X0h1Nc8HqLYaS+ot/Wi
+7WRYZOFEZTZJQSABjTsT4gjzDPJXOZM3txyTRIOOvy3xoQV12m7ue28b6naDKHRK
+HCvT9cQDcpOvhs4JjDx11MkKL3Lzrb0OMDyEoXMfAyUUpY/D1vS15N2GevUZumjy
+hVSiMBHK0ZLLO3QGEqA3q2rYVBHfbJoWlLm0p2XGdC0x801S6VVRn8s+oo12mHDS
+b6ZlRS8bhbtbbfnywARmE4R6nc4n2PREnr+svpnba0/bWCGwiSe0jzLWS15ykV7f
+BZ3ZSS/0tm9QH3XLgJ3m0+TR8tMCAwEAAaOCAWkwggFlMB8GA1UdIwQYMBaAFAtY
+5YvGTBU3pECpMKkhvkc2Wlb/MB0GA1UdDgQWBBSIRFH/UCppXi2I9CG62Qzyzsvq
+fDAOBgNVHQ8BAf8EBAMCAQYwEgYDVR0TAQH/BAgwBgEB/wIBADA+BgNVHSAENzA1
+MDMGBFUdIAAwKzApBggrBgEFBQcCARYdaHR0cHM6Ly9zZWN1cmUuY29tb2RvLmNv
+bS9DUFMwSQYDVR0fBEIwQDA+oDygOoY4aHR0cDovL2NybC5jb21vZG9jYS5jb20v
+Q09NT0RPQ2VydGlmaWNhdGlvbkF1dGhvcml0eS5jcmwwdAYIKwYBBQUHAQEEaDBm
+MD4GCCsGAQUFBzAChjJodHRwOi8vY3J0LmNvbW9kb2NhLmNvbS9DT01PRE9BZGRU
+cnVzdFNlcnZlckNBLmNydDAkBggrBgEFBQcwAYYYaHR0cDovL29jc3AuY29tb2Rv
+Y2EuY29tMA0GCSqGSIb3DQEBBQUAA4IBAQCaQ7+vpHJezX1vf/T8PYy7cOYe3QT9
+P9ydn7+JdpvyhjH8f7PtKpFTLOKqsOPILHH3FYojHPFpLoH7sbxiC6saVBzZIl40
+TKX2Iw9dej3bQ81pfhc3Us1TocIR1FN4J2TViUFNFlW7kMvw2OTd3dMJZEgo/zIj
+hC+Me1UvzymINzR4DzOq/7fylqSbRIC1vmxWVKukgZ4lGChUOn8sY89ZIIwYazgs
+tN3t40DeDDYlV5rA0WCeXgNol64aO+pF11GZSe5EWVYLXrGPaOqKnsrSyaADfnAl
+9DLJTlCDh6I0SD1PNXf82Ijq9n0ezkO21cJqfjhmY03n7jLvDyToKmf6
+-----END CERTIFICATE-----
diff --git a/test/ocsp-tests/ND2.ors b/test/ocsp-tests/ND2.ors
new file mode 100644 (file)
index 0000000..24c1cb2
--- /dev/null
@@ -0,0 +1,10 @@
+MIIB0woBAKCCAcwwggHIBgkrBgEFBQcwAQEEggG5MIIBtTCBnqIWBBQLWOWLxkwV
+N6RAqTCpIb5HNlpW/xgPMjAxMjEwMTAyMzAzMTlaMHMwcTBJMAkGBSsOAwIaBQAE
+FOy+ZAvtiWulchtVZmfKU1ZI9ewTBBQLWOWLxkwVN6RAqTCpIb5HNlpW/wIQEaO0
+0OyNt3+doM1dLVEvQoAAGA8yMDEyMTAxMDIzMDMxOVqgERgPMjAxMjEwMTQyMzAz
+MTlaMA0GCSqGSIb3DQEBBQUAA4IBAQCHn2nGfEUX/EJruMkTgh7GgB0u9cpAepaD
+sPv9gtl3KLUZyR+NbGMIa5/bpoJp0yg1z5VL6CLMusy3AF6Cn2fyaioDxG+yc+gA
+PcPFdEqiIMr+TP8s7qcEiE6WZddSSCqCn90VZSCWkpDhnCjDRwJLBBPU3803fdMz
+oguvyr7y6Koxik8X/iUe8EpSzAvmm4GZL3veTI+x7IezJSrhCS9zM0ZHjySjoDxC
++ljGH0EuWPTmFEqZVGIq3cuahIYzKItUbYnXU6ipi/2p42qbsFeok7eEN0EYsY1a
+vRATHGRmU7Q5HLCq4rQtZC1cis52Mvc9x1W4z/Gt5A3FtgElXXNA
diff --git a/test/ocsp-tests/ND2_Cert_ICA.pem b/test/ocsp-tests/ND2_Cert_ICA.pem
new file mode 100644 (file)
index 0000000..3260db3
--- /dev/null
@@ -0,0 +1,29 @@
+-----BEGIN CERTIFICATE-----
+MIIFBjCCA+6gAwIBAgIQEaO00OyNt3+doM1dLVEvQjANBgkqhkiG9w0BAQUFADCB
+gTELMAkGA1UEBhMCR0IxGzAZBgNVBAgTEkdyZWF0ZXIgTWFuY2hlc3RlcjEQMA4G
+A1UEBxMHU2FsZm9yZDEaMBgGA1UEChMRQ09NT0RPIENBIExpbWl0ZWQxJzAlBgNV
+BAMTHkNPTU9ETyBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw0xMDA1MjQwMDAw
+MDBaFw0yMDA1MzAxMDQ4MzhaMIGOMQswCQYDVQQGEwJHQjEbMBkGA1UECBMSR3Jl
+YXRlciBNYW5jaGVzdGVyMRAwDgYDVQQHEwdTYWxmb3JkMRowGAYDVQQKExFDT01P
+RE8gQ0EgTGltaXRlZDE0MDIGA1UEAxMrQ09NT0RPIEV4dGVuZGVkIFZhbGlkYXRp
+b24gU2VjdXJlIFNlcnZlciBDQTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC
+ggEBAMxKljPNJY1n7iiWN4dG8PYEooR/U6qW5h+xAhxu7X0h1Nc8HqLYaS+ot/Wi
+7WRYZOFEZTZJQSABjTsT4gjzDPJXOZM3txyTRIOOvy3xoQV12m7ue28b6naDKHRK
+HCvT9cQDcpOvhs4JjDx11MkKL3Lzrb0OMDyEoXMfAyUUpY/D1vS15N2GevUZumjy
+hVSiMBHK0ZLLO3QGEqA3q2rYVBHfbJoWlLm0p2XGdC0x801S6VVRn8s+oo12mHDS
+b6ZlRS8bhbtbbfnywARmE4R6nc4n2PREnr+svpnba0/bWCGwiSe0jzLWS15ykV7f
+BZ3ZSS/0tm9QH3XLgJ3m0+TR8tMCAwEAAaOCAWkwggFlMB8GA1UdIwQYMBaAFAtY
+5YvGTBU3pECpMKkhvkc2Wlb/MB0GA1UdDgQWBBSIRFH/UCppXi2I9CG62Qzyzsvq
+fDAOBgNVHQ8BAf8EBAMCAQYwEgYDVR0TAQH/BAgwBgEB/wIBADA+BgNVHSAENzA1
+MDMGBFUdIAAwKzApBggrBgEFBQcCARYdaHR0cHM6Ly9zZWN1cmUuY29tb2RvLmNv
+bS9DUFMwSQYDVR0fBEIwQDA+oDygOoY4aHR0cDovL2NybC5jb21vZG9jYS5jb20v
+Q09NT0RPQ2VydGlmaWNhdGlvbkF1dGhvcml0eS5jcmwwdAYIKwYBBQUHAQEEaDBm
+MD4GCCsGAQUFBzAChjJodHRwOi8vY3J0LmNvbW9kb2NhLmNvbS9DT01PRE9BZGRU
+cnVzdFNlcnZlckNBLmNydDAkBggrBgEFBQcwAYYYaHR0cDovL29jc3AuY29tb2Rv
+Y2EuY29tMA0GCSqGSIb3DQEBBQUAA4IBAQCaQ7+vpHJezX1vf/T8PYy7cOYe3QT9
+P9ydn7+JdpvyhjH8f7PtKpFTLOKqsOPILHH3FYojHPFpLoH7sbxiC6saVBzZIl40
+TKX2Iw9dej3bQ81pfhc3Us1TocIR1FN4J2TViUFNFlW7kMvw2OTd3dMJZEgo/zIj
+hC+Me1UvzymINzR4DzOq/7fylqSbRIC1vmxWVKukgZ4lGChUOn8sY89ZIIwYazgs
+tN3t40DeDDYlV5rA0WCeXgNol64aO+pF11GZSe5EWVYLXrGPaOqKnsrSyaADfnAl
+9DLJTlCDh6I0SD1PNXf82Ijq9n0ezkO21cJqfjhmY03n7jLvDyToKmf6
+-----END CERTIFICATE-----
diff --git a/test/ocsp-tests/ND2_Issuer_Root.pem b/test/ocsp-tests/ND2_Issuer_Root.pem
new file mode 100644 (file)
index 0000000..dc99810
--- /dev/null
@@ -0,0 +1,23 @@
+-----BEGIN CERTIFICATE-----
+MIID0DCCArigAwIBAgIQIKTEf93f4cdTYwcTiHdgEjANBgkqhkiG9w0BAQUFADCB
+gTELMAkGA1UEBhMCR0IxGzAZBgNVBAgTEkdyZWF0ZXIgTWFuY2hlc3RlcjEQMA4G
+A1UEBxMHU2FsZm9yZDEaMBgGA1UEChMRQ09NT0RPIENBIExpbWl0ZWQxJzAlBgNV
+BAMTHkNPTU9ETyBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw0xMTAxMDEwMDAw
+MDBaFw0zMDEyMzEyMzU5NTlaMIGBMQswCQYDVQQGEwJHQjEbMBkGA1UECBMSR3Jl
+YXRlciBNYW5jaGVzdGVyMRAwDgYDVQQHEwdTYWxmb3JkMRowGAYDVQQKExFDT01P
+RE8gQ0EgTGltaXRlZDEnMCUGA1UEAxMeQ09NT0RPIENlcnRpZmljYXRpb24gQXV0
+aG9yaXR5MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA0ECLi3LjkRv3
+UcEbVASY06m/weaKXTuH+7uIzg3jLz8GlvCiKVCZrts7oVewdFFxze1CkU1B/qnI
+2GqGd0S7WWaXUF601CxwRM/aN5VCaTwwxHGzUvAhTaHYujl8HJ6jJJ3ygxaYqhZ8
+Q5sVW7euNJH+1GImGEaaP+vB+fGQV+useg2L23IwambV4EajcNxo2f8ESIl33rXp
++2dtQem8Ob0y2WIC8bGoPW43nOIv4tOiJovGuFVDiOEjPqXSJDlqR6sA1KGzqSX+
+DT+nHbrTUcELpNqsOO9VUCQFZUaTNE8tja3G1CEZ0o7KBWFxB3NH5YoZEr0ETc5O
+nKVIrLsm9wIDAQABo0IwQDAdBgNVHQ4EFgQUC1jli8ZMFTekQKkwqSG+RzZaVv8w
+DgYDVR0PAQH/BAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQEFBQAD
+ggEBAC/JxBwHO89hAgCx2SFRdXIDMLDEFh9sAIsQrK/xR9SuEDwMGvjUk2ysEDd8
+t6aDZK3N3w6HM503sMZ7OHKx8xoOo/lVem0DZgMXlUrxsXrfViEGQo+x06iF3u6X
+HWLrp+cxEmbDD6ZLLkGC9/3JG6gbr+48zuOcrigHoSybJMIPIyaDMouGDx8rEkYl
+Fo92kANr3ryqImhrjKGsKxE5pttwwn1y6TPn/CbxdFqR5p2ErPioBhlG5qfpqjQi
+pKGfeq23sqSaM4hxAjwu1nqyH6LKwN0vEJT9s4yEIHlG1QXUEOTS22RPuFvuG8Ug
+R1uUq27UlTMdphVx8fiUylQ5PsE=
+-----END CERTIFICATE-----
diff --git a/test/ocsp-tests/ND3.ors b/test/ocsp-tests/ND3.ors
new file mode 100644 (file)
index 0000000..2008418
--- /dev/null
@@ -0,0 +1,10 @@
+MIIB1AoBAKCCAc0wggHJBgkrBgEFBQcwAQEEggG6MIIBtjCBn6IWBBStvZh6NLQm
+9/rEJlTvA73gJMtUGhgPMjAxMjEwMTExMTM2NDdaMHQwcjBKMAkGBSsOAwIaBQAE
+FHyxZlScq9tE7mImFq30ZXv3etWUBBStvZh6NLQm9/rEJlTvA73gJMtUGgIRAKcN
+bJWejX5BTb8DmevkCauAABgPMjAxMjEwMTExMTM2NDdaoBEYDzIwMTIxMDE1MTEz
+NjQ3WjANBgkqhkiG9w0BAQUFAAOCAQEAfnj3nh6z+USW6VlDWRytWpNmC1ZRwWlg
+P2+G4UF4HE8bMJkuiFLcZEVYTxlTYv+xAEpSFxdInFM2Q5C+O6pWOZ9NbikeR4oZ
+FTI1kAZ0Uw+YMpVM4ztvKBIpUSqlbi69iNJ9WGF6qzxVeqobSOyrjjwtTsuglUbR
++mshp/SP7Br2IIK+KM1vgsmVExPfGPYANyk7ki/Q8uUnjqkreeSa9WC2iJLGcybW
+YavDhYWALebUGukNeedkloYhdjPboPPxDkKNjakwIG8EkbJK7uXewMOHHOFvFTX3
+K388me8u5iQf4f3fj6ilEgs6f5Szzmb+vklPX0zIny/TVk2+Az7HmA==
diff --git a/test/ocsp-tests/ND3_Cert_EE.pem b/test/ocsp-tests/ND3_Cert_EE.pem
new file mode 100644 (file)
index 0000000..c6a76d6
--- /dev/null
@@ -0,0 +1,34 @@
+-----BEGIN CERTIFICATE-----
+MIIF3TCCBMWgAwIBAgIRAKcNbJWejX5BTb8DmevkCaswDQYJKoZIhvcNAQEFBQAw
+bzELMAkGA1UEBhMCU0UxFDASBgNVBAoTC0FkZFRydXN0IEFCMSYwJAYDVQQLEx1B
+ZGRUcnVzdCBFeHRlcm5hbCBUVFAgTmV0d29yazEiMCAGA1UEAxMZQWRkVHJ1c3Qg
+RXh0ZXJuYWwgQ0EgUm9vdDAeFw0xMDA1MDQwMDAwMDBaFw0xNTA1MDQyMzU5NTla
+MIIBCjELMAkGA1UEBhMCR0IxDzANBgNVBBETBk01IDNFUTEbMBkGA1UECBMSR3Jl
+YXRlciBNYW5jaGVzdGVyMRAwDgYDVQQHEwdTYWxmb3JkMRYwFAYDVQQJEw1UcmFm
+Zm9yZCBSb2FkMRYwFAYDVQQJEw1FeGNoYW5nZSBRdWF5MSUwIwYDVQQJExwzcmQg
+Rmxvb3IsIDI2IE9mZmljZSBWaWxsYWdlMRowGAYDVQQKExFDT01PRE8gQ0EgTGlt
+aXRlZDEaMBgGA1UECxMRQ29tb2RvIFByZW1pdW1TU0wxLDAqBgNVBAMTI2FkZHRy
+dXN0ZXh0ZXJuYWxjYXJvb3QuY29tb2RvY2EuY29tMIIBIjANBgkqhkiG9w0BAQEF
+AAOCAQ8AMIIBCgKCAQEAz5MM/mco91yFJNtF3t9c0x/bGds+zGAqJlHBXCR43og+
+3vgsBkCcn5M3PAqmL6XxilpsrEfS6RqtNcLfxwDyl7rr3qpJSM537Km1ZGOTHs0C
+i0JA4YBZFOxBwPO2nHQGD+t9kJx3auFdBLnjJc5Q3jFUmnyJ8D2h3P9BrHgOoIbO
+KYOUc/3zcqE6NttdbiuUMzlad8guhnXlWPCh2NJtNtMLDQxG7DWWDEm/Kt+CdKAR
+jko6kEp7nqBKyujjJoGD2nEtEnuuqiB9n6sgSXR1NGtecJrW8IqIS7hkcsxhGTI9
+jnY73+NiMV3nglejkNseTUdcEi6L94EdifXuVLgEAwIDAQABo4IB1TCCAdEwHwYD
+VR0jBBgwFoAUrb2YejS0Jvf6xCZU7wO94CTLVBowHQYDVR0OBBYEFDXpt6NocCrd
+7XZ2MLUa116TIesKMA4GA1UdDwEB/wQEAwIFoDAMBgNVHRMBAf8EAjAAMB0GA1Ud
+JQQWMBQGCCsGAQUFBwMBBggrBgEFBQcDAjBGBgNVHSAEPzA9MDsGDCsGAQQBsjEB
+AgEDBDArMCkGCCsGAQUFBwIBFh1odHRwczovL3NlY3VyZS5jb21vZG8ubmV0L0NQ
+UzB7BgNVHR8EdDByMDigNqA0hjJodHRwOi8vY3JsLmNvbW9kb2NhLmNvbS9BZGRU
+cnVzdEV4dGVybmFsQ0FSb290LmNybDA2oDSgMoYwaHR0cDovL2NybC5jb21vZG8u
+bmV0L0FkZFRydXN0RXh0ZXJuYWxDQVJvb3QuY3JsMDQGCCsGAQUFBwEBBCgwJjAk
+BggrBgEFBQcwAYYYaHR0cDovL29jc3AuY29tb2RvY2EuY29tMFcGA1UdEQRQME6C
+I2FkZHRydXN0ZXh0ZXJuYWxjYXJvb3QuY29tb2RvY2EuY29tgid3d3cuYWRkdHJ1
+c3RleHRlcm5hbGNhcm9vdC5jb21vZG9jYS5jb20wDQYJKoZIhvcNAQEFBQADggEB
+AF2TF6xg8ZoBICoiQvjD2Z0SKcJRw1Dhj3HpGzV9F+Y0e/MxCXhYA+340JZxnC2P
+VA968QKFrNwDWiS9Klc+cs4k3HIeiZp3uHw1ezElqXXNa+S1CrSS03FqWeeugSrB
+xpuXCWDJSfD4DJq835hlEuXgxmAjsbuRUjaq1lxwSWnNoBkfMCCAgVlHtFljTlqq
+nwfBZcnj73+yiERgTvhN4gEL59ZzjFliKEUuXHZoe8klhn73cnY+XoRV0e7wU+Xj
+PzLoAhjGkS35hfDQTHdCwNBaN3iI2Q+HBjhfffAYFdK+Jo3kSXq12s7CJD7utAho
+xxRhA0l1ziJgrEubLi6ItNg=
+-----END CERTIFICATE-----
diff --git a/test/ocsp-tests/ND3_Issuer_Root.pem b/test/ocsp-tests/ND3_Issuer_Root.pem
new file mode 100644 (file)
index 0000000..20585f1
--- /dev/null
@@ -0,0 +1,25 @@
+-----BEGIN CERTIFICATE-----
+MIIENjCCAx6gAwIBAgIBATANBgkqhkiG9w0BAQUFADBvMQswCQYDVQQGEwJTRTEU
+MBIGA1UEChMLQWRkVHJ1c3QgQUIxJjAkBgNVBAsTHUFkZFRydXN0IEV4dGVybmFs
+IFRUUCBOZXR3b3JrMSIwIAYDVQQDExlBZGRUcnVzdCBFeHRlcm5hbCBDQSBSb290
+MB4XDTAwMDUzMDEwNDgzOFoXDTIwMDUzMDEwNDgzOFowbzELMAkGA1UEBhMCU0Ux
+FDASBgNVBAoTC0FkZFRydXN0IEFCMSYwJAYDVQQLEx1BZGRUcnVzdCBFeHRlcm5h
+bCBUVFAgTmV0d29yazEiMCAGA1UEAxMZQWRkVHJ1c3QgRXh0ZXJuYWwgQ0EgUm9v
+dDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBALf3GjPm8gAELTngTlvt
+H7xsD821+iO2zt6bETOXpClMfZOfvUq8k+0DGuOPz+VtUFrWlymUWoCwSXrbLpX9
+uMq/NzgtHj6RQa1wVsfwTz/oMp50ysiQVOnGXw94nZpAPA6sYapeFI+eh6FqUNzX
+mk6vBbOmcZSccbNQYArHE504B4YCqOmoaSYYkKtMsE8jqzpPhNjfzp/haW+710LX
+a0Tkx63ubUFfclpxCDezeWWkWaCUN/cALw3CknLa0Dhy2xSoRcRdKn23tNbE7qzN
+E0S3ySvdQwAl+mG5aWpYIxG3pzOPVnVZ9c0p10a3CitlttNCbxWyuHv77+ldU9U0
+WicCAwEAAaOB3DCB2TAdBgNVHQ4EFgQUrb2YejS0Jvf6xCZU7wO94CTLVBowCwYD
+VR0PBAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wgZkGA1UdIwSBkTCBjoAUrb2YejS0
+Jvf6xCZU7wO94CTLVBqhc6RxMG8xCzAJBgNVBAYTAlNFMRQwEgYDVQQKEwtBZGRU
+cnVzdCBBQjEmMCQGA1UECxMdQWRkVHJ1c3QgRXh0ZXJuYWwgVFRQIE5ldHdvcmsx
+IjAgBgNVBAMTGUFkZFRydXN0IEV4dGVybmFsIENBIFJvb3SCAQEwDQYJKoZIhvcN
+AQEFBQADggEBALCb4IUlwtYj4g+WBpKdQZic2YR5gdkeWxQHIzZlj7DYd7usQWxH
+YINRsPkyPef89iYTx4AWpb9a/IfPeHmJIZriTAcKhjW88t5RxNKWt9x+Tu5w/Rw5
+6wwCURQtjr0W4MHfRnXnJK3s9EK0hZNwEGe6nQY1ShjTK3rMUUKhemPR5ruhxSvC
+Nr4TDea9Y355e6cJDUCrat2PisP29owaQgVR1EX1n6diIWgVIEM8med8vSTYqZEX
+c4g/VhsxOBi0cQ+azcgOno4uG+GMmIPLHzHxREzGBHNJdmAPx/i9F4BrLunMTA5a
+mnkPIAou1Z5jJh5VkpTYghdae9C8x49OhgQ=
+-----END CERTIFICATE-----
diff --git a/test/ocsp-tests/WIKH_D1.ors b/test/ocsp-tests/WIKH_D1.ors
new file mode 100644 (file)
index 0000000..c8ce8d8
--- /dev/null
@@ -0,0 +1,32 @@
+MIIFzwoBAKCCBcgwggXEBgkrBgEFBQcwAQEEggW1MIIFsTCBoKIWBBRf2uQDFpGg
+Ywh4P1y2H9bZ2/BQNBgPMjAxMjEwMTExMzI5NDJaMHUwczBLMAkGBSsOAwIaBQAE
+FKByDqBqfGICVPKo9Z3Se6Tzty+kBBSxsEr9HHUo+BxhqhP2+sGQPWsWowISESG8
+vx4IzALnkqQG05AvM+2bgAAYDzIwMTIxMDExMTAwMDAwWqARGA8yMDEyMTAxODEw
+MDAwMFowCwYJKoZIhvcNAQEFA4IBAQCX3gEX+JVfxuYmxBBxC9sNCi3o76ODIicr
+XMvm0DTO9VSyDBl7LDsMMgNMIDtO3flQSlBNZ2B9ikwyckXOSWXiXzybZVMdA/uq
+NchgkM9aChrlhG0AHZyYe/+dJSmEBFXkIomy+S6YQ7Mcs2s6WxCeWU7gB4XOy1zO
+/CvWjv0WQV1J2lZZ6pkvtECKAEjrVP275LA38HInFbYvVPXWzl4sDcX2TAxwUa4S
+xAJAfwl+B+oZSerZWGRo6KjZuB/OB31cB5n/lABmRez6Obi27D0UUCRv/eSbwOF4
+Ofaa/XzJt7sF7WpVgoR41HI88W7aN4vtcw1zcVsBmfRMUNYZSqtfoIID+DCCA/Qw
+ggPwMIIC2KADAgECAhIRISdENsrz1CSWG3VIBwfQERQwDQYJKoZIhvcNAQEFBQAw
+WTELMAkGA1UEBhMCQkUxGTAXBgNVBAoTEEdsb2JhbFNpZ24gbnYtc2ExLzAtBgNV
+BAMTJkdsb2JhbFNpZ24gRXh0ZW5kZWQgVmFsaWRhdGlvbiBDQSAtIEcyMB4XDTEy
+MDkxOTA3NDA1MFoXDTEyMTIxOTA4NDA1MFowgYUxCzAJBgNVBAYTAkJFMRkwFwYD
+VQQKExBHbG9iYWxTaWduIG52LXNhMUIwQAYDVQQDEzlHbG9iYWxTaWduIEV4dGVu
+ZGVkIFZhbGlkYXRpb24gQ0EgLSBHMiBPQ1NQIHJlc3BvbmRlciAtIDIxFzAVBgNV
+BAUTDjIwMTIwOTE5MDk0MDAwMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKC
+AQEAnCgMsBO+IxIqCnXCOfXJoIC3wj+f0s4DV9h2gJBzisWXkaJD2DfNrd0kHUXK
+qVVPUxnA4G5iZu0Z385/KiOt1/P6vQ/Z2/AsEh/8Z/hIyeZCHL31wrSZW4yLeZwi
+M76wPiBHJxPun681HQlVs/OGKSHnbHc1XJAIeA/M8u+lLWqIKB+AJ82TrOqUMj1s
+LjGhQNs84xPliONN5K7DrEy+Y65X/rFxN77Smw+UtcH1GgH2NgaHH8dpt1m25sgm
+UxZWhdx66opB/lbRQwWdGt7MC0kJFaWHDZq64DTuYoekFYSxAFu0nd0EekEHEJEi
+9mquB9cv/96SuEJl8BcUWU/1LwIDAQABo4GEMIGBMAkGA1UdEwQCMAAwDgYDVR0P
+AQH/BAQDAgeAMBMGA1UdJQQMMAoGCCsGAQUFBwMJMA8GCSsGAQUFBzABBQQCBQAw
+HQYDVR0OBBYEFF/a5AMWkaBjCHg/XLYf1tnb8FA0MB8GA1UdIwQYMBaAFLCwSv0c
+dSj4HGGqE/b6wZA9axajMA0GCSqGSIb3DQEBBQUAA4IBAQCKRl1iXFmOQtLseDWP
+Y5icDDBGiRi17CGgvIzGJi/ha0PhbO+X0TmQIEnRX3Mu0Er/Mm4RZSjMtJ2iZRh3
+tGf4Dn+jKgKOmgXC3oOG/l8RPHLf0yaPSdn/z0TXtA30vTFBLlFeWnhbfhovea4+
+snPdBxLqWZdtxmiwojgqA7YATCWwavizrBr09YRyDwzgtpZ2BwMruGuFuV9FsEwL
+PCM53yFlrM32oFghyfyE5kYjgnnueKM+pw1kA0jgb1CnVJRrMEN1TXuXDAZLtHKG
+5X/drah1JtkoZhCzxzZ3bYdVDQJ90OHFqM58lwGD6z3XuPKrHDKZKt+CPIsl5g7p
+4J2l
diff --git a/test/ocsp-tests/WIKH_D2.ors b/test/ocsp-tests/WIKH_D2.ors
new file mode 100644 (file)
index 0000000..1d562fa
--- /dev/null
@@ -0,0 +1,32 @@
+MIIF4AoBAKCCBdkwggXVBgkrBgEFBQcwAQEEggXGMIIFwjCBmaIWBBTqlwecTarB
+yVdbHxANRLCFYj1mqBgPMjAxMjEwMTExMzMwMTBaMG4wbDBEMAkGBSsOAwIaBQAE
+FLdXtbacB/gWIxOOkMkqDr4yAaoxBBRhe2YaRQ2XyolQL30EzTSo//z9SwILBAAA
+AAABL07hRxCAABgPMjAxMjEwMDEwNjAwMDBaoBEYDzIwMTMwNDE1MDYwMDAwWjAL
+BgkqhkiG9w0BAQUDggEBAA0H7bvcULg1GayFtQVrYDyW0feOEMNGLmgaGuwRdrY3
+KuWyNJLUUJKQZnOkdT8A4RpVX8xD4EgVyOqRACUahgdgp0g3QOn+vf2Zyf+NJIgW
+woF5qaJgCOeIOw5O6F4r1vUhp8NvqXHotswgG58Nzz6UMD+uyIgq5o8uzOjryEm6
+wO2X+KvN9sMzkeZhNvAHkgBQL8CG4CggWnzn7At1DmhhsizfhDrosigM4Zr6Sm6z
+v1YfSPznD0b3TQ7RzvpbJPofF2aJXMIMxdKR5pemuevTDR2+JCXjVPsD/ZODFykc
+rsQeqx2vTOIg84PRKboXjCAwHn4rIN7JJtQqebLtD9egggQQMIIEDDCCBAgwggLw
+oAMCAQICCwQAAAAAAThXovYBMA0GCSqGSIb3DQEBBQUAMFcxCzAJBgNVBAYTAkJF
+MRkwFwYDVQQKExBHbG9iYWxTaWduIG52LXNhMRAwDgYDVQQLEwdSb290IENBMRsw
+GQYDVQQDExJHbG9iYWxTaWduIFJvb3QgQ0EwHhcNMTIwNzA1MTgwMDAwWhcNMTMw
+NzA1MTgwMDAwWjBZMQswCQYDVQQGEwJCRTEZMBcGA1UEChMQR2xvYmFsU2lnbiBu
+di1zYTEvMC0GA1UEAxMmR2xvYmFsU2lnbiBPQ1NQIGZvciBSb290IFIxIC0gQnJh
+bmNoIDEwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDP2QF8p0+Fb7ID
+MwwD1gEr2oazjqbW28EZr3YEyMPk+7VFaGePSO1xjBGIE48Q7m7d6p6ZXCzlBZEi
+oudrHSr3WDqdIVKLDrZIDkgEgdjJE72Hq6Pf5CEGXyebbODm4sV96EfewSvOOYLL
+866g3aoVhLDK02ny+Q5OsokW7nhnmGMMh10tZqR5VmdQTiw8MgeqUxBEaEO4WH2J
+ltgSsgNJBNBYuDgnn5ryzVqhvmCJvYZMYeN6qZFKy1MgHcR+wEpGLPlRL4ttu6e5
+MJrVta7dVFobHUHoFog97LtQT1PY0Ubaihswjge5O04bYeCrgSSjr1e4xH/KDxRw
+yyhoscaFAgMBAAGjgdIwgc8wDgYDVR0PAQH/BAQDAgeAMB0GA1UdDgQWBBTqlwec
+TarByVdbHxANRLCFYj1mqDBMBgNVHSAERTBDMEEGCSsGAQQBoDIBXzA0MDIGCCsG
+AQUFBwIBFiZodHRwczovL3d3dy5nbG9iYWxzaWduLmNvbS9yZXBvc2l0b3J5LzAJ
+BgNVHRMEAjAAMBMGA1UdJQQMMAoGCCsGAQUFBwMJMB8GA1UdIwQYMBaAFGB7ZhpF
+DZfKiVAvfQTNNKj//P1LMA8GCSsGAQUFBzABBQQCBQAwDQYJKoZIhvcNAQEFBQAD
+ggEBAHiC6N1uF29d7CmiVapA8Nr1xLSVeIkBd4A8yHsUTQ7ATI7bwT14QUV4awe7
+8cvmO5ZND8YG1ViwN162WFm9ivSoWBzvWDbU2JhQFb+XzrzCcdn0YbNiTxJh/vYm
+uDuxto00dpBgujSOAQv8B90iDEJ+sZpYRzDRj62qStRey0zpq5eX+pA+gdppMUFb
+4QvJf0El8TbLCWLN4TjrFe6ju7ZaN9zmgVYGQ2fMHKIGNScLuIA950nYwzRkIfHa
+YW6HqP1rCR1EiYmstEeCQyDxJx+RUlh+q8L1BKzaMYhS6s63MZzQuGseYStaCmbC
+fBIRKjnK621vAWvc7UR+0hqnZ+U=
diff --git a/test/ocsp-tests/WIKH_D3.ors b/test/ocsp-tests/WIKH_D3.ors
new file mode 100644 (file)
index 0000000..cbac8e8
--- /dev/null
@@ -0,0 +1,38 @@
+MIIG8AoBAKCCBukwggblBgkrBgEFBQcwAQEEggbWMIIG0jCB+aF+MHwxCzAJBgNV
+BAYTAkFVMQwwCgYDVQQIEwNOU1cxDzANBgNVBAcTBlN5ZG5leTEUMBIGA1UEChML
+Q0FjZXJ0IEluYy4xHjAcBgNVBAsTFVNlcnZlciBBZG1pbmlzdHJhdGlvbjEYMBYG
+A1UEAxMPb2NzcC5jYWNlcnQub3JnGA8yMDEyMTAxMTE0MDYzNlowZjBkMDwwCQYF
+Kw4DAhoFAAQUi6TJyxcpGUU+u45zCZG5JfKDImUEFBe1MhvUx/Pg5o7zvdKwOu6y
+ORjRAgMLs8aAABgPMjAxMjEwMTExMzU4MTBaoBEYDzIwMTIxMDEzMTQwNjM2WjAN
+BgkqhkiG9w0BAQUFAAOCAQEAjcryO6FUK5+TcPBxJKixVt9q07Xy3qv1e/VFuJ0f
+tnYDcu83Q5yCta49PXaA13nFDFZ445wCDivDBLolS6JKSh+JrLpAxSBzak7Ps8wz
+DPNAtexZz9/hPPzHnGOMlRtew07jk+NX5ZgCxDZGmBHIHOGyab2WoqmpRTll0oP4
+b/DzI3mzrur5lm2NAT3ZJ8bVaWsAJBVTfUye3S4GRWlfGSRVAMk0QHnCkYP42okc
+psIKbvdIoS2gxo6kBTMevxciPV2lPIiSrIWH0IGm7AqGM5+Vz7IdbD6fOQd1I3uw
+O+1NugMYfScB6jCvSW2uESeRZ+qW/HMXQbU1eiH+x88UIKCCBL4wggS6MIIEtjCC
+Ap6gAwIBAgIDCpvzMA0GCSqGSIb3DQEBBQUAMHkxEDAOBgNVBAoTB1Jvb3QgQ0Ex
+HjAcBgNVBAsTFWh0dHA6Ly93d3cuY2FjZXJ0Lm9yZzEiMCAGA1UEAxMZQ0EgQ2Vy
+dCBTaWduaW5nIEF1dGhvcml0eTEhMB8GCSqGSIb3DQEJARYSc3VwcG9ydEBjYWNl
+cnQub3JnMB4XDTExMDgyMzAwMDI1NloXDTEzMDgyMjAwMDI1NlowfDELMAkGA1UE
+BhMCQVUxDDAKBgNVBAgTA05TVzEPMA0GA1UEBxMGU3lkbmV5MRQwEgYDVQQKEwtD
+QWNlcnQgSW5jLjEeMBwGA1UECxMVU2VydmVyIEFkbWluaXN0cmF0aW9uMRgwFgYD
+VQQDEw9vY3NwLmNhY2VydC5vcmcwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEK
+AoIBAQCcxtRv5CPHw3BLdR/k/K72YsRgodbP+UdAONmvBvWzhwm6B8h6O+M64sFr
+2w6be7SYBECIyOQgNJ1flK4MoAWhdBA/H5NtxaDOKbAqA27tO9GaevcPp7c518O0
+3hVnlPLvsN1f48nY0jQOXUTfv5nYXmD0OSSK/V3IRo0KsWB6T9UnMGCeEwb4Oqqz
+uzM0b4SBflzMEony/m6Tg/qL7qs2TLZAqe77+BZaVdFkDUnaBN7RyMruXySxeXiz
+mogT3WhROeloMa/X+E01bWBYBEK7VZIY9pgBpXQ7vDbbIGgYuIXUi20wh03WMy16
+VDYdV0IUXHpidNUeK9W/BPP/7APBAgMBAAGjRDBCMAwGA1UdEwEB/wQCMAAwJwYD
+VR0lBCAwHgYIKwYBBQUHAwIGCCsGAQUFBwMBBggrBgEFBQcDCTAJBgNVHREEAjAA
+MA0GCSqGSIb3DQEBBQUAA4ICAQAoT6p5f3cGprAcgrnzdenfTmDe9LCW7k2VnazA
+MAzpsD6gXcSlo4+3hoHem/SpKRH2tqi34DmImCiv/S6fxsKM4Gfn5rlkAFviuTvS
+r5Zrwh4ZKSfaoWv4bmbzmcAxvuxdMWHf/5PbjegjzFTbBMekVPZY/abYtD6kdHQZ
+VNgzwZVfTBfYhfa+Rg72I2zjKpMsjxMqWfTmUzW6wfK6LFudZqu0U1NnJw+IlnVU
+6WtjL885ebQrmcRqWz3nMhVLIu5L3w/s+VTLvm7If6jcMDNUjz8s2BPcJeCXg3TE
+STsyl6tvk17RRz2+9JskxVOk11xIn96xR4FCERIid2ek9z1xi7oYOajQF50i/9Gj
+ReDEfRSyb4/LzoKDOY+h4Q6jryeHh7WIHFiK5qrBN2y8qOoRJ/OqQnqci/BJBNpe
+g9Q9PJRgGSzRndTXNHiYRbeLpq7eGo3sPqlR9qBQ3rd98XGOU0RCMnzjKhENC3qo
+5PkSF2xs8RmjWktFSTDwjYo0qf1teo7CGHjgaPjQ7JE8Q4ysFOQndSWmLpqwDcI9
+HfIvPwUIWArQrJRh9LCNSyvHVgLqY9kw8NW4TlMxV2WqaYCkiKi3XVRrSFR3ahS1
+VBvRZ8KpplrV7rhXjVSSqqfLk1sX3l72Ck2F9ON+qbNFmvhgNjSiBY9neMgo804a
+wG/pag==
diff --git a/test/ocsp-tests/WIKH_ND1.ors b/test/ocsp-tests/WIKH_ND1.ors
new file mode 100644 (file)
index 0000000..a16476f
--- /dev/null
@@ -0,0 +1,10 @@
+MIIB0woBAKCCAcwwggHIBgkrBgEFBQcwAQEEggG5MIIBtTCBnqIWBBSIRFH/UCpp
+Xi2I9CG62QzyzsvqfBgPMjAxMjEwMTEwODQxMTNaMHMwcTBJMAkGBSsOAwIaBQAE
+FEi2DTgjjfhFbk7lhD6jlBEYApefBBSJRFH/UCppXi2I9CG62QzyzsvqfAIQIuEz
+IiCgSN8psr+aMcKbB4AAGA8yMDEyMTAxMTA4NDExM1qgERgPMjAxMjEwMTUwODQx
+MTNaMA0GCSqGSIb3DQEBBQUAA4IBAQCNnhlBMxxh9z5AKfzAxiKs90CfxUsqfYfk
+8XlyF9VIfWRfEwzS6MF1pEzLnghRxTAmjrFgK+sxD9wk+S5Mdgw3nbED9DVFH2Hs
+RGKm/t9wkvrYOX6yRQqw6uRvU/5cibMjcyzKB/VQMwk4p4FwSUgBv88A5sTkKr2V
+eYdEm34hg2TZVkipPMBiyTyBLXs8D/9oALtnczg4xlTRSjDUvqoXL5haqY4QK2Pv
+mNwna6ACkwLmSuMe29UQ8IX2PUB4R5Etni5czyiKGxZLm+4NAhuEwWFNEzCyImPc
+087gHGU1zx+qVSlajqMJ/9ZXYjbt7WiWdhOTGEv4VMn8dHhRUs32
diff --git a/test/ocsp-tests/WIKH_ND2.ors b/test/ocsp-tests/WIKH_ND2.ors
new file mode 100644 (file)
index 0000000..5aff2ab
--- /dev/null
@@ -0,0 +1,10 @@
+MIIB0woBAKCCAcwwggHIBgkrBgEFBQcwAQEEggG5MIIBtTCBnqIWBBQLWOWLxkwV
+N6RAqTCpIb5HNlpW/xgPMjAxMjEwMTAyMzAzMTlaMHMwcTBJMAkGBSsOAwIaBQAE
+FOy+ZAvtiWulchtVZmfKU1ZI9ewTBBQMWOWLxkwVN6RAqTCpIb5HNlpW/wIQEaO0
+0OyNt3+doM1dLVEvQoAAGA8yMDEyMTAxMDIzMDMxOVqgERgPMjAxMjEwMTQyMzAz
+MTlaMA0GCSqGSIb3DQEBBQUAA4IBAQCHn2nGfEUX/EJruMkTgh7GgB0u9cpAepaD
+sPv9gtl3KLUZyR+NbGMIa5/bpoJp0yg1z5VL6CLMusy3AF6Cn2fyaioDxG+yc+gA
+PcPFdEqiIMr+TP8s7qcEiE6WZddSSCqCn90VZSCWkpDhnCjDRwJLBBPU3803fdMz
+oguvyr7y6Koxik8X/iUe8EpSzAvmm4GZL3veTI+x7IezJSrhCS9zM0ZHjySjoDxC
++ljGH0EuWPTmFEqZVGIq3cuahIYzKItUbYnXU6ipi/2p42qbsFeok7eEN0EYsY1a
+vRATHGRmU7Q5HLCq4rQtZC1cis52Mvc9x1W4z/Gt5A3FtgElXXNA
diff --git a/test/ocsp-tests/WIKH_ND3.ors b/test/ocsp-tests/WIKH_ND3.ors
new file mode 100644 (file)
index 0000000..4f8a6ea
--- /dev/null
@@ -0,0 +1,10 @@
+MIIB1AoBAKCCAc0wggHJBgkrBgEFBQcwAQEEggG6MIIBtjCBn6IWBBStvZh6NLQm
+9/rEJlTvA73gJMtUGhgPMjAxMjEwMTExMTM2NDdaMHQwcjBKMAkGBSsOAwIaBQAE
+FHyxZlScq9tE7mImFq30ZXv3etWUBBSuvZh6NLQm9/rEJlTvA73gJMtUGgIRAKcN
+bJWejX5BTb8DmevkCauAABgPMjAxMjEwMTExMTM2NDdaoBEYDzIwMTIxMDE1MTEz
+NjQ3WjANBgkqhkiG9w0BAQUFAAOCAQEAfnj3nh6z+USW6VlDWRytWpNmC1ZRwWlg
+P2+G4UF4HE8bMJkuiFLcZEVYTxlTYv+xAEpSFxdInFM2Q5C+O6pWOZ9NbikeR4oZ
+FTI1kAZ0Uw+YMpVM4ztvKBIpUSqlbi69iNJ9WGF6qzxVeqobSOyrjjwtTsuglUbR
++mshp/SP7Br2IIK+KM1vgsmVExPfGPYANyk7ki/Q8uUnjqkreeSa9WC2iJLGcybW
+YavDhYWALebUGukNeedkloYhdjPboPPxDkKNjakwIG8EkbJK7uXewMOHHOFvFTX3
+K388me8u5iQf4f3fj6ilEgs6f5Szzmb+vklPX0zIny/TVk2+Az7HmA==
diff --git a/test/ocsp-tests/WINH_D1.ors b/test/ocsp-tests/WINH_D1.ors
new file mode 100644 (file)
index 0000000..ed627ba
--- /dev/null
@@ -0,0 +1,32 @@
+MIIFzwoBAKCCBcgwggXEBgkrBgEFBQcwAQEEggW1MIIFsTCBoKIWBBRf2uQDFpGg
+Ywh4P1y2H9bZ2/BQNBgPMjAxMjEwMTExMzI5NDJaMHUwczBLMAkGBSsOAwIaBQAE
+FKFyDqBqfGICVPKo9Z3Se6Tzty+kBBSwsEr9HHUo+BxhqhP2+sGQPWsWowISESG8
+vx4IzALnkqQG05AvM+2bgAAYDzIwMTIxMDExMTAwMDAwWqARGA8yMDEyMTAxODEw
+MDAwMFowCwYJKoZIhvcNAQEFA4IBAQCX3gEX+JVfxuYmxBBxC9sNCi3o76ODIicr
+XMvm0DTO9VSyDBl7LDsMMgNMIDtO3flQSlBNZ2B9ikwyckXOSWXiXzybZVMdA/uq
+NchgkM9aChrlhG0AHZyYe/+dJSmEBFXkIomy+S6YQ7Mcs2s6WxCeWU7gB4XOy1zO
+/CvWjv0WQV1J2lZZ6pkvtECKAEjrVP275LA38HInFbYvVPXWzl4sDcX2TAxwUa4S
+xAJAfwl+B+oZSerZWGRo6KjZuB/OB31cB5n/lABmRez6Obi27D0UUCRv/eSbwOF4
+Ofaa/XzJt7sF7WpVgoR41HI88W7aN4vtcw1zcVsBmfRMUNYZSqtfoIID+DCCA/Qw
+ggPwMIIC2KADAgECAhIRISdENsrz1CSWG3VIBwfQERQwDQYJKoZIhvcNAQEFBQAw
+WTELMAkGA1UEBhMCQkUxGTAXBgNVBAoTEEdsb2JhbFNpZ24gbnYtc2ExLzAtBgNV
+BAMTJkdsb2JhbFNpZ24gRXh0ZW5kZWQgVmFsaWRhdGlvbiBDQSAtIEcyMB4XDTEy
+MDkxOTA3NDA1MFoXDTEyMTIxOTA4NDA1MFowgYUxCzAJBgNVBAYTAkJFMRkwFwYD
+VQQKExBHbG9iYWxTaWduIG52LXNhMUIwQAYDVQQDEzlHbG9iYWxTaWduIEV4dGVu
+ZGVkIFZhbGlkYXRpb24gQ0EgLSBHMiBPQ1NQIHJlc3BvbmRlciAtIDIxFzAVBgNV
+BAUTDjIwMTIwOTE5MDk0MDAwMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKC
+AQEAnCgMsBO+IxIqCnXCOfXJoIC3wj+f0s4DV9h2gJBzisWXkaJD2DfNrd0kHUXK
+qVVPUxnA4G5iZu0Z385/KiOt1/P6vQ/Z2/AsEh/8Z/hIyeZCHL31wrSZW4yLeZwi
+M76wPiBHJxPun681HQlVs/OGKSHnbHc1XJAIeA/M8u+lLWqIKB+AJ82TrOqUMj1s
+LjGhQNs84xPliONN5K7DrEy+Y65X/rFxN77Smw+UtcH1GgH2NgaHH8dpt1m25sgm
+UxZWhdx66opB/lbRQwWdGt7MC0kJFaWHDZq64DTuYoekFYSxAFu0nd0EekEHEJEi
+9mquB9cv/96SuEJl8BcUWU/1LwIDAQABo4GEMIGBMAkGA1UdEwQCMAAwDgYDVR0P
+AQH/BAQDAgeAMBMGA1UdJQQMMAoGCCsGAQUFBwMJMA8GCSsGAQUFBzABBQQCBQAw
+HQYDVR0OBBYEFF/a5AMWkaBjCHg/XLYf1tnb8FA0MB8GA1UdIwQYMBaAFLCwSv0c
+dSj4HGGqE/b6wZA9axajMA0GCSqGSIb3DQEBBQUAA4IBAQCKRl1iXFmOQtLseDWP
+Y5icDDBGiRi17CGgvIzGJi/ha0PhbO+X0TmQIEnRX3Mu0Er/Mm4RZSjMtJ2iZRh3
+tGf4Dn+jKgKOmgXC3oOG/l8RPHLf0yaPSdn/z0TXtA30vTFBLlFeWnhbfhovea4+
+snPdBxLqWZdtxmiwojgqA7YATCWwavizrBr09YRyDwzgtpZ2BwMruGuFuV9FsEwL
+PCM53yFlrM32oFghyfyE5kYjgnnueKM+pw1kA0jgb1CnVJRrMEN1TXuXDAZLtHKG
+5X/drah1JtkoZhCzxzZ3bYdVDQJ90OHFqM58lwGD6z3XuPKrHDKZKt+CPIsl5g7p
+4J2l
diff --git a/test/ocsp-tests/WINH_D2.ors b/test/ocsp-tests/WINH_D2.ors
new file mode 100644 (file)
index 0000000..b89fcf8
--- /dev/null
@@ -0,0 +1,32 @@
+MIIF4AoBAKCCBdkwggXVBgkrBgEFBQcwAQEEggXGMIIFwjCBmaIWBBTqlwecTarB
+yVdbHxANRLCFYj1mqBgPMjAxMjEwMTExMzMwMTBaMG4wbDBEMAkGBSsOAwIaBQAE
+FLhXtbacB/gWIxOOkMkqDr4yAaoxBBRge2YaRQ2XyolQL30EzTSo//z9SwILBAAA
+AAABL07hRxCAABgPMjAxMjEwMDEwNjAwMDBaoBEYDzIwMTMwNDE1MDYwMDAwWjAL
+BgkqhkiG9w0BAQUDggEBAA0H7bvcULg1GayFtQVrYDyW0feOEMNGLmgaGuwRdrY3
+KuWyNJLUUJKQZnOkdT8A4RpVX8xD4EgVyOqRACUahgdgp0g3QOn+vf2Zyf+NJIgW
+woF5qaJgCOeIOw5O6F4r1vUhp8NvqXHotswgG58Nzz6UMD+uyIgq5o8uzOjryEm6
+wO2X+KvN9sMzkeZhNvAHkgBQL8CG4CggWnzn7At1DmhhsizfhDrosigM4Zr6Sm6z
+v1YfSPznD0b3TQ7RzvpbJPofF2aJXMIMxdKR5pemuevTDR2+JCXjVPsD/ZODFykc
+rsQeqx2vTOIg84PRKboXjCAwHn4rIN7JJtQqebLtD9egggQQMIIEDDCCBAgwggLw
+oAMCAQICCwQAAAAAAThXovYBMA0GCSqGSIb3DQEBBQUAMFcxCzAJBgNVBAYTAkJF
+MRkwFwYDVQQKExBHbG9iYWxTaWduIG52LXNhMRAwDgYDVQQLEwdSb290IENBMRsw
+GQYDVQQDExJHbG9iYWxTaWduIFJvb3QgQ0EwHhcNMTIwNzA1MTgwMDAwWhcNMTMw
+NzA1MTgwMDAwWjBZMQswCQYDVQQGEwJCRTEZMBcGA1UEChMQR2xvYmFsU2lnbiBu
+di1zYTEvMC0GA1UEAxMmR2xvYmFsU2lnbiBPQ1NQIGZvciBSb290IFIxIC0gQnJh
+bmNoIDEwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDP2QF8p0+Fb7ID
+MwwD1gEr2oazjqbW28EZr3YEyMPk+7VFaGePSO1xjBGIE48Q7m7d6p6ZXCzlBZEi
+oudrHSr3WDqdIVKLDrZIDkgEgdjJE72Hq6Pf5CEGXyebbODm4sV96EfewSvOOYLL
+866g3aoVhLDK02ny+Q5OsokW7nhnmGMMh10tZqR5VmdQTiw8MgeqUxBEaEO4WH2J
+ltgSsgNJBNBYuDgnn5ryzVqhvmCJvYZMYeN6qZFKy1MgHcR+wEpGLPlRL4ttu6e5
+MJrVta7dVFobHUHoFog97LtQT1PY0Ubaihswjge5O04bYeCrgSSjr1e4xH/KDxRw
+yyhoscaFAgMBAAGjgdIwgc8wDgYDVR0PAQH/BAQDAgeAMB0GA1UdDgQWBBTqlwec
+TarByVdbHxANRLCFYj1mqDBMBgNVHSAERTBDMEEGCSsGAQQBoDIBXzA0MDIGCCsG
+AQUFBwIBFiZodHRwczovL3d3dy5nbG9iYWxzaWduLmNvbS9yZXBvc2l0b3J5LzAJ
+BgNVHRMEAjAAMBMGA1UdJQQMMAoGCCsGAQUFBwMJMB8GA1UdIwQYMBaAFGB7ZhpF
+DZfKiVAvfQTNNKj//P1LMA8GCSsGAQUFBzABBQQCBQAwDQYJKoZIhvcNAQEFBQAD
+ggEBAHiC6N1uF29d7CmiVapA8Nr1xLSVeIkBd4A8yHsUTQ7ATI7bwT14QUV4awe7
+8cvmO5ZND8YG1ViwN162WFm9ivSoWBzvWDbU2JhQFb+XzrzCcdn0YbNiTxJh/vYm
+uDuxto00dpBgujSOAQv8B90iDEJ+sZpYRzDRj62qStRey0zpq5eX+pA+gdppMUFb
+4QvJf0El8TbLCWLN4TjrFe6ju7ZaN9zmgVYGQ2fMHKIGNScLuIA950nYwzRkIfHa
+YW6HqP1rCR1EiYmstEeCQyDxJx+RUlh+q8L1BKzaMYhS6s63MZzQuGseYStaCmbC
+fBIRKjnK621vAWvc7UR+0hqnZ+U=
diff --git a/test/ocsp-tests/WINH_D3.ors b/test/ocsp-tests/WINH_D3.ors
new file mode 100644 (file)
index 0000000..c3d7c94
--- /dev/null
@@ -0,0 +1,38 @@
+MIIG8AoBAKCCBukwggblBgkrBgEFBQcwAQEEggbWMIIG0jCB+aF+MHwxCzAJBgNV
+BAYTAkFVMQwwCgYDVQQIEwNOU1cxDzANBgNVBAcTBlN5ZG5leTEUMBIGA1UEChML
+Q0FjZXJ0IEluYy4xHjAcBgNVBAsTFVNlcnZlciBBZG1pbmlzdHJhdGlvbjEYMBYG
+A1UEAxMPb2NzcC5jYWNlcnQub3JnGA8yMDEyMTAxMTE0MzkxOFowZjBkMDwwCQYF
+Kw4DAhoFAAQUjKTJyxcpGUU+u45zCZG5JfKDImUEFBa1MhvUx/Pg5o7zvdKwOu6y
+ORjRAgMLs8aAABgPMjAxMjEwMTExNDIzMjVaoBEYDzIwMTIxMDEzMTQzOTE4WjAN
+BgkqhkiG9w0BAQUFAAOCAQEAgdrf+v+BwEhG0ghTLMVmuxWprJr/9VFtpKpxQrTo
+egSoW+5JOPCUAStfw3R3u7QM8sJf9bnPorgoCoY1hPKcWNLhvf1Ng3QlVkNa6NcO
+EonbuI4KE9Rhoflpf//pD/3AFKzU+ecRs04KtYezKrUvC1RayGabd7bgtIpdFss4
+ZCZ22riqjFtqD3+2//AHg7VaqiJMKlRt05CMmGe+HKn5PEN9HaeI52nsTf+L1Jeh
+ItnaDPfV76vFHHXyUhR3iIgnqQDCig0q3yj7BQqH50+K+myiMAY+p8cuVqebno1i
+BzXxxpZl/fw1KnTFdEa7p2jtmXw3KZiHAWAddwg1F1tHTaCCBL4wggS6MIIEtjCC
+Ap6gAwIBAgIDCpvzMA0GCSqGSIb3DQEBBQUAMHkxEDAOBgNVBAoTB1Jvb3QgQ0Ex
+HjAcBgNVBAsTFWh0dHA6Ly93d3cuY2FjZXJ0Lm9yZzEiMCAGA1UEAxMZQ0EgQ2Vy
+dCBTaWduaW5nIEF1dGhvcml0eTEhMB8GCSqGSIb3DQEJARYSc3VwcG9ydEBjYWNl
+cnQub3JnMB4XDTExMDgyMzAwMDI1NloXDTEzMDgyMjAwMDI1NlowfDELMAkGA1UE
+BhMCQVUxDDAKBgNVBAgTA05TVzEPMA0GA1UEBxMGU3lkbmV5MRQwEgYDVQQKEwtD
+QWNlcnQgSW5jLjEeMBwGA1UECxMVU2VydmVyIEFkbWluaXN0cmF0aW9uMRgwFgYD
+VQQDEw9vY3NwLmNhY2VydC5vcmcwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEK
+AoIBAQCcxtRv5CPHw3BLdR/k/K72YsRgodbP+UdAONmvBvWzhwm6B8h6O+M64sFr
+2w6be7SYBECIyOQgNJ1flK4MoAWhdBA/H5NtxaDOKbAqA27tO9GaevcPp7c518O0
+3hVnlPLvsN1f48nY0jQOXUTfv5nYXmD0OSSK/V3IRo0KsWB6T9UnMGCeEwb4Oqqz
+uzM0b4SBflzMEony/m6Tg/qL7qs2TLZAqe77+BZaVdFkDUnaBN7RyMruXySxeXiz
+mogT3WhROeloMa/X+E01bWBYBEK7VZIY9pgBpXQ7vDbbIGgYuIXUi20wh03WMy16
+VDYdV0IUXHpidNUeK9W/BPP/7APBAgMBAAGjRDBCMAwGA1UdEwEB/wQCMAAwJwYD
+VR0lBCAwHgYIKwYBBQUHAwIGCCsGAQUFBwMBBggrBgEFBQcDCTAJBgNVHREEAjAA
+MA0GCSqGSIb3DQEBBQUAA4ICAQAoT6p5f3cGprAcgrnzdenfTmDe9LCW7k2VnazA
+MAzpsD6gXcSlo4+3hoHem/SpKRH2tqi34DmImCiv/S6fxsKM4Gfn5rlkAFviuTvS
+r5Zrwh4ZKSfaoWv4bmbzmcAxvuxdMWHf/5PbjegjzFTbBMekVPZY/abYtD6kdHQZ
+VNgzwZVfTBfYhfa+Rg72I2zjKpMsjxMqWfTmUzW6wfK6LFudZqu0U1NnJw+IlnVU
+6WtjL885ebQrmcRqWz3nMhVLIu5L3w/s+VTLvm7If6jcMDNUjz8s2BPcJeCXg3TE
+STsyl6tvk17RRz2+9JskxVOk11xIn96xR4FCERIid2ek9z1xi7oYOajQF50i/9Gj
+ReDEfRSyb4/LzoKDOY+h4Q6jryeHh7WIHFiK5qrBN2y8qOoRJ/OqQnqci/BJBNpe
+g9Q9PJRgGSzRndTXNHiYRbeLpq7eGo3sPqlR9qBQ3rd98XGOU0RCMnzjKhENC3qo
+5PkSF2xs8RmjWktFSTDwjYo0qf1teo7CGHjgaPjQ7JE8Q4ysFOQndSWmLpqwDcI9
+HfIvPwUIWArQrJRh9LCNSyvHVgLqY9kw8NW4TlMxV2WqaYCkiKi3XVRrSFR3ahS1
+VBvRZ8KpplrV7rhXjVSSqqfLk1sX3l72Ck2F9ON+qbNFmvhgNjSiBY9neMgo804a
+wG/pag==
diff --git a/test/ocsp-tests/WINH_ND1.ors b/test/ocsp-tests/WINH_ND1.ors
new file mode 100644 (file)
index 0000000..af47552
--- /dev/null
@@ -0,0 +1,10 @@
+MIIB0woBAKCCAcwwggHIBgkrBgEFBQcwAQEEggG5MIIBtTCBnqIWBBSIRFH/UCpp
+Xi2I9CG62QzyzsvqfBgPMjAxMjEwMTEwODQxMTNaMHMwcTBJMAkGBSsOAwIaBQAE
+FEm2DTgjjfhFbk7lhD6jlBEYApefBBSIRFH/UCppXi2I9CG62QzyzsvqfAIQIuEz
+IiCgSN8psr+aMcKbB4AAGA8yMDEyMTAxMTA4NDExM1qgERgPMjAxMjEwMTUwODQx
+MTNaMA0GCSqGSIb3DQEBBQUAA4IBAQCNnhlBMxxh9z5AKfzAxiKs90CfxUsqfYfk
+8XlyF9VIfWRfEwzS6MF1pEzLnghRxTAmjrFgK+sxD9wk+S5Mdgw3nbED9DVFH2Hs
+RGKm/t9wkvrYOX6yRQqw6uRvU/5cibMjcyzKB/VQMwk4p4FwSUgBv88A5sTkKr2V
+eYdEm34hg2TZVkipPMBiyTyBLXs8D/9oALtnczg4xlTRSjDUvqoXL5haqY4QK2Pv
+mNwna6ACkwLmSuMe29UQ8IX2PUB4R5Etni5czyiKGxZLm+4NAhuEwWFNEzCyImPc
+087gHGU1zx+qVSlajqMJ/9ZXYjbt7WiWdhOTGEv4VMn8dHhRUs32
diff --git a/test/ocsp-tests/WINH_ND2.ors b/test/ocsp-tests/WINH_ND2.ors
new file mode 100644 (file)
index 0000000..99417f7
--- /dev/null
@@ -0,0 +1,10 @@
+MIIB0woBAKCCAcwwggHIBgkrBgEFBQcwAQEEggG5MIIBtTCBnqIWBBQLWOWLxkwV
+N6RAqTCpIb5HNlpW/xgPMjAxMjEwMTAyMzAzMTlaMHMwcTBJMAkGBSsOAwIaBQAE
+FO2+ZAvtiWulchtVZmfKU1ZI9ewTBBQLWOWLxkwVN6RAqTCpIb5HNlpW/wIQEaO0
+0OyNt3+doM1dLVEvQoAAGA8yMDEyMTAxMDIzMDMxOVqgERgPMjAxMjEwMTQyMzAz
+MTlaMA0GCSqGSIb3DQEBBQUAA4IBAQCHn2nGfEUX/EJruMkTgh7GgB0u9cpAepaD
+sPv9gtl3KLUZyR+NbGMIa5/bpoJp0yg1z5VL6CLMusy3AF6Cn2fyaioDxG+yc+gA
+PcPFdEqiIMr+TP8s7qcEiE6WZddSSCqCn90VZSCWkpDhnCjDRwJLBBPU3803fdMz
+oguvyr7y6Koxik8X/iUe8EpSzAvmm4GZL3veTI+x7IezJSrhCS9zM0ZHjySjoDxC
++ljGH0EuWPTmFEqZVGIq3cuahIYzKItUbYnXU6ipi/2p42qbsFeok7eEN0EYsY1a
+vRATHGRmU7Q5HLCq4rQtZC1cis52Mvc9x1W4z/Gt5A3FtgElXXNA
diff --git a/test/ocsp-tests/WINH_ND3.ors b/test/ocsp-tests/WINH_ND3.ors
new file mode 100644 (file)
index 0000000..73dc42d
--- /dev/null
@@ -0,0 +1,10 @@
+MIIB1AoBAKCCAc0wggHJBgkrBgEFBQcwAQEEggG6MIIBtjCBn6IWBBStvZh6NLQm
+9/rEJlTvA73gJMtUGhgPMjAxMjEwMTExMTM2NDdaMHQwcjBKMAkGBSsOAwIaBQAE
+FH2xZlScq9tE7mImFq30ZXv3etWUBBStvZh6NLQm9/rEJlTvA73gJMtUGgIRAKcN
+bJWejX5BTb8DmevkCauAABgPMjAxMjEwMTExMTM2NDdaoBEYDzIwMTIxMDE1MTEz
+NjQ3WjANBgkqhkiG9w0BAQUFAAOCAQEAfnj3nh6z+USW6VlDWRytWpNmC1ZRwWlg
+P2+G4UF4HE8bMJkuiFLcZEVYTxlTYv+xAEpSFxdInFM2Q5C+O6pWOZ9NbikeR4oZ
+FTI1kAZ0Uw+YMpVM4ztvKBIpUSqlbi69iNJ9WGF6qzxVeqobSOyrjjwtTsuglUbR
++mshp/SP7Br2IIK+KM1vgsmVExPfGPYANyk7ki/Q8uUnjqkreeSa9WC2iJLGcybW
+YavDhYWALebUGukNeedkloYhdjPboPPxDkKNjakwIG8EkbJK7uXewMOHHOFvFTX3
+K388me8u5iQf4f3fj6ilEgs6f5Szzmb+vklPX0zIny/TVk2+Az7HmA==
diff --git a/test/ocsp-tests/WKDOSC_D1.ors b/test/ocsp-tests/WKDOSC_D1.ors
new file mode 100644 (file)
index 0000000..d7566cf
--- /dev/null
@@ -0,0 +1,32 @@
+MIIFzwoBAKCCBcgwggXEBgkrBgEFBQcwAQEEggW1MIIFsTCBoKIWBBSpTXftIZX0
+lLT9zwVSQC5Jfp3pqhgPMjAxMjEwMTAxNDU0NDNaMHUwczBLMAkGBSsOAwIaBQAE
+FKByDqBqfGICVPKo9Z3Se6Tzty+kBBSwsEr9HHUo+BxhqhP2+sGQPWsWowISESG8
+vx4IzALnkqQG05AvM+2bgAAYDzIwMTIxMDEwMTMwMDAwWqARGA8yMDEyMTAxNzEz
+MDAwMFowCwYJKoZIhvcNAQEFA4IBAQBw5Z+0ggEddRTIq7cXlMoxG9Nrx4HtutsH
+itIUoZp/rlLoxHsJTo/VmdZvTTGIc7Ok9XuoH61lY/x9glAKsGRjz4Myc9+5rx0O
+675lwmOS+uaf3/hRkicVrVr7Pt2ug3R7OXm2MJrohjNKP8lqtLJ0hHP88a8rotKA
+r9uz/qHm7K4Uh7dRt/Pnu9MPG74tZeFNN4M1ONMEiRdG39FqzFDXWxwQ3NmyC0Wo
+DQn+NklZMknr8mm7IBWpzgU1fTD9R0yv0zdhUZGiEXxvdhm7GJrTET5jS30Ksm5j
+o+n39YVu/vGbjyyYx3+WdeQLEyipaGvldSuJpT+R684/RuFWNetcoIID+DCCA/Qw
+ggPwMIIC2KADAgECAhIRIcYjwu4UNkR1VGrDbSdFei8wDQYJKoZIhvcNAQEFBQAw
+WTELMAkGA1UEBhMCQkUxGTAXBgNVBAoTEEdsb2JhbFNpZ24gbnYtc2ExLzAtBgNV
+BAMTJkdsb2JhbFNpZ24gRXh0ZW5kZWQgVmFsaWRhdGlvbiBDQSAtIEcyMB4XDTEy
+MDkxOTA3NDAzMVoXDTEyMTIxOTA4NDAzMVowgYUxCzAJBgNVBAYTAkJFMRkwFwYD
+VQQKExBHbG9iYWxTaWduIG52LXNhMUIwQAYDVQQDEzlHbG9iYWxTaWduIEV4dGVu
+ZGVkIFZhbGlkYXRpb24gQ0EgLSBHMiBPQ1NQIHJlc3BvbmRlciAtIDExFzAVBgNV
+BAUTDjIwMTIwOTE5MDkzOTAwMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKC
+AQEAx0kb6QhDH3sEDj4zaysjVzYelq9lZ1cso4R2IyQxaoPaG6GkaCmHA4sz6KP+
+m3ADqplibEUBa/mzCxHW8/oy3NhGMFdbezduZrnRFLbzakOTeIo8VEIM3JPfgREv
+CX8nj6Xu7ERD6JO/ZQ9Xr7YVzKKN+3cVZlcMHoGBnOPcO2Sz0AcYyk5m5IsGBRoT
+T86j6Cr9PhOPTVwXL6Wxy1KVHsUZXUwnRacV0O4SHWQ4zM9Sablus9fTbh1CgIqW
+sKDyzVB4yECXkBVeUlA+cuCaRRVHRiR+jPDSgbU62nnNudEpGG7dyoop6IOvXv2O
+ydncWzaukxIVvQ/Ij85kHqs7HQIDAQABo4GEMIGBMAkGA1UdEwQCMAAwDgYDVR0P
+AQH/BAQDAgeAMBMGA1UdJQQMMAoGCCsGAQUFBwMJMA8GCSsGAQUFBzABBQQCBQAw
+HQYDVR0OBBYEFKlNd+0hlfSUtP3PBVJALkl+nemqMB8GA1UdIwQYMBaAFLCwSv0c
+dSj4HGGqE/b6wZA9axajMA0GCSqGSIb3DQEBBQUAA4IBAQCe4rZg61Dmwygl/Uae
+BJZog64/FvuB1sfCqKLJTjKOfLcugSTX1TT7bLJbzXRGPQuorI3TIZEOwldIw01d
+DTLlsOCHrfHd+bpxgijxPkUuaA4NYnpvqTEMJqPKOC8QYfKupNjAPSuHvwqvqCfO
+RCe3jY6xQDO0WCTZ8/xMsOkw+J/YEYqALETf2Ug7k5eRL/TvfLd8Sgi7vPfmUeiW
+ptlsbhMOWQoQc+JA3vCI01rrjNq+0kIZ/r8nPGvablRr0Aakk6eDuS2dcReaPwuK
+0xE136pJYiXdQ3SA7uwmlorjxmejavyoPCr23TU74DQEt6hhc6uIcabsa4Y8KvJy
+RI4G
diff --git a/test/ocsp-tests/WKDOSC_D2.ors b/test/ocsp-tests/WKDOSC_D2.ors
new file mode 100644 (file)
index 0000000..757db75
--- /dev/null
@@ -0,0 +1,32 @@
+MIIF4AoBAKCCBdkwggXVBgkrBgEFBQcwAQEEggXGMIIFwjCBmaIWBBTqlwecTarB
+yVdbHxANRLCFYj1mqBgPMjAxMjEwMTAxNDU0NDhaMG4wbDBEMAkGBSsOAwIaBQAE
+FLdXtbacB/gWIxOOkMkqDr4yAaoxBBRge2YaRQ2XyolQL30EzTSo//z9SwILBAAA
+AAABL07hRxCAABgPMjAxMjEwMDEwNjAwMDBaoBEYDzIwMTMwNDE1MDYwMDAwWjAL
+BgkqhkiG9w0BAQUDggEBACkGyoGefA2WuktIerofBoPgeyT8Mry57DxF7IEvX8dI
+Adk+MZRo5suYIE2AJty8bohYYiIxS7sZ5nsUM+iyu5cIdmsIwt/YifYsSdHc6DKz
+l3Yh4bS27QX05/Vuok3HmEMsRBmensKATMfvGP+TOwhuFeHWAK8KHSCmUbGZFP3A
+WKtrhRh/qC4qetMt07z/OKZcqHUYegEpO3xqRJ4MdqRJpV1urjdL/852US0mWAOL
+/EPoexWiHiKJmsNy7HAEKFQ+daqdZYM1BTGbS2aj3go/BVqf0xEhRLT0fsdof4Is
+1Cy2ZHGbaVEyOQpXsxUEAqEdJcFRcLFGhdgnUjcQ9lqgggQQMIIEDDCCBAgwggLw
+oAMCAQICCwQAAAAAAThXovYBMA0GCSqGSIb3DQEBBQUAMFcxCzAJBgNVBAYTAkJF
+MRkwFwYDVQQKExBHbG9iYWxTaWduIG52LXNhMRAwDgYDVQQLEwdSb290IENBMRsw
+GQYDVQQDExJHbG9iYWxTaWduIFJvb3QgQ0EwHhcNMTIwNzA1MTgwMDAwWhcNMTMw
+NzA1MTgwMDAwWjBZMQswCQYDVQQGEwJCRTEZMBcGA1UEChMQR2xvYmFsU2lnbiBu
+di1zYTEvMC0GA1UEAxMmR2xvYmFsU2lnbiBPQ1NQIGZvciBSb290IFIxIC0gQnJh
+bmNoIDEwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDQ2QF8p0+Fb7ID
+MwwD1gEr2oazjqbW28EZr3YEyMPk+7VFaGePSO1xjBGIE48Q7m7d6p6ZXCzlBZEi
+oudrHSr3WDqdIVKLDrZIDkgEgdjJE72Hq6Pf5CEGXyebbODm4sV96EfewSvOOYLL
+866g3aoVhLDK02ny+Q5OsokW7nhnmGMMh10tZqR5VmdQTiw8MgeqUxBEaEO4WH2J
+ltgSsgNJBNBYuDgnn5ryzVqhvmCJvYZMYeN6qZFKy1MgHcR+wEpGLPlRL4ttu6e5
+MJrVta7dVFobHUHoFog97LtQT1PY0Ubaihswjge5O04bYeCrgSSjr1e4xH/KDxRw
+yyhoscaFAgMBAAGjgdIwgc8wDgYDVR0PAQH/BAQDAgeAMB0GA1UdDgQWBBTqlwec
+TarByVdbHxANRLCFYj1mqDBMBgNVHSAERTBDMEEGCSsGAQQBoDIBXzA0MDIGCCsG
+AQUFBwIBFiZodHRwczovL3d3dy5nbG9iYWxzaWduLmNvbS9yZXBvc2l0b3J5LzAJ
+BgNVHRMEAjAAMBMGA1UdJQQMMAoGCCsGAQUFBwMJMB8GA1UdIwQYMBaAFGB7ZhpF
+DZfKiVAvfQTNNKj//P1LMA8GCSsGAQUFBzABBQQCBQAwDQYJKoZIhvcNAQEFBQAD
+ggEBAHiC6N1uF29d7CmiVapA8Nr1xLSVeIkBd4A8yHsUTQ7ATI7bwT14QUV4awe7
+8cvmO5ZND8YG1ViwN162WFm9ivSoWBzvWDbU2JhQFb+XzrzCcdn0YbNiTxJh/vYm
+uDuxto00dpBgujSOAQv8B90iDEJ+sZpYRzDRj62qStRey0zpq5eX+pA+gdppMUFb
+4QvJf0El8TbLCWLN4TjrFe6ju7ZaN9zmgVYGQ2fMHKIGNScLuIA950nYwzRkIfHa
+YW6HqP1rCR1EiYmstEeCQyDxJx+RUlh+q8L1BKzaMYhS6s63MZzQuGseYStaCmbC
+fBIRKjnK621vAWvc7UR+0hqnZ+U=
diff --git a/test/ocsp-tests/WKDOSC_D3.ors b/test/ocsp-tests/WKDOSC_D3.ors
new file mode 100644 (file)
index 0000000..c33179c
--- /dev/null
@@ -0,0 +1,38 @@
+MIIG8AoBAKCCBukwggblBgkrBgEFBQcwAQEEggbWMIIG0jCB+aF+MHwxCzAJBgNV
+BAYTAkFVMQwwCgYDVQQIEwNOU1cxDzANBgNVBAcTBlN5ZG5leTEUMBIGA1UEChML
+Q0FjZXJ0IEluYy4xHjAcBgNVBAsTFVNlcnZlciBBZG1pbmlzdHJhdGlvbjEYMBYG
+A1UEAxMPb2NzcC5jYWNlcnQub3JnGA8yMDEyMTAxMDE1MTkzOVowZjBkMDwwCQYF
+Kw4DAhoFAAQUi6TJyxcpGUU+u45zCZG5JfKDImUEFBa1MhvUx/Pg5o7zvdKwOu6y
+ORjRAgMLs8aAABgPMjAxMjEwMTAxNDU2MTdaoBEYDzIwMTIxMDEyMTUxOTM5WjAN
+BgkqhkiG9w0BAQUFAAOCAQEAH1Bs3glJoAvCHhgVtN4F/avlKA1St74v7yuD1DIu
+cBf/4YRJdxZATXMI8I0TPjSl8L+rRAiUTVd8sPhWQ9XD9WaYKkTEjuQSPp851/81
+zDihz9Kj5Rzo5PYpFsbSps/ALMQSRkrtuX4DCm9fbK7xC+adpbhQDnWW/GXM1+Ob
+lv3pHDQXLh2GQbRsaJBgLeSUxIIE7RWJv1N+Ugi5zF8rja5qnJ9DnkilEqMeXQp8
+SThaI+TOe+KHK+7wTp5QkFNIE5l/uKgvSNIOwLe9HDevlSl1wYF6e+mAz3uoQyJa
+Ucx8FIoV6CIr+wUd+P8CmNXiQ7M59I8gm3FCDiEvWDQGEaCCBL4wggS6MIIEtjCC
+Ap6gAwIBAgIDCpvzMA0GCSqGSIb3DQEBBQUAMHkxEDAOBgNVBAoTB1Jvb3QgQ0Ex
+HjAcBgNVBAsTFWh0dHA6Ly93d3cuY2FjZXJ0Lm9yZzEiMCAGA1UEAxMZQ0EgQ2Vy
+dCBTaWduaW5nIEF1dGhvcml0eTEhMB8GCSqGSIb3DQEJARYSc3VwcG9ydEBjYWNl
+cnQub3JnMB4XDTExMDgyMzAwMDI1NloXDTEzMDgyMjAwMDI1NlowfDELMAkGA1UE
+BhMCQVUxDDAKBgNVBAgTA05TVzEPMA0GA1UEBxMGU3lkbmV5MRQwEgYDVQQKEwtD
+QWNlcnQgSW5jLjEeMBwGA1UECxMVU2VydmVyIEFkbWluaXN0cmF0aW9uMRgwFgYD
+VQQDEw9vY3NwLmNhY2VydC5vcmcwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEK
+AoIBAQCdxtRv5CPHw3BLdR/k/K72YsRgodbP+UdAONmvBvWzhwm6B8h6O+M64sFr
+2w6be7SYBECIyOQgNJ1flK4MoAWhdBA/H5NtxaDOKbAqA27tO9GaevcPp7c518O0
+3hVnlPLvsN1f48nY0jQOXUTfv5nYXmD0OSSK/V3IRo0KsWB6T9UnMGCeEwb4Oqqz
+uzM0b4SBflzMEony/m6Tg/qL7qs2TLZAqe77+BZaVdFkDUnaBN7RyMruXySxeXiz
+mogT3WhROeloMa/X+E01bWBYBEK7VZIY9pgBpXQ7vDbbIGgYuIXUi20wh03WMy16
+VDYdV0IUXHpidNUeK9W/BPP/7APBAgMBAAGjRDBCMAwGA1UdEwEB/wQCMAAwJwYD
+VR0lBCAwHgYIKwYBBQUHAwIGCCsGAQUFBwMBBggrBgEFBQcDCTAJBgNVHREEAjAA
+MA0GCSqGSIb3DQEBBQUAA4ICAQAoT6p5f3cGprAcgrnzdenfTmDe9LCW7k2VnazA
+MAzpsD6gXcSlo4+3hoHem/SpKRH2tqi34DmImCiv/S6fxsKM4Gfn5rlkAFviuTvS
+r5Zrwh4ZKSfaoWv4bmbzmcAxvuxdMWHf/5PbjegjzFTbBMekVPZY/abYtD6kdHQZ
+VNgzwZVfTBfYhfa+Rg72I2zjKpMsjxMqWfTmUzW6wfK6LFudZqu0U1NnJw+IlnVU
+6WtjL885ebQrmcRqWz3nMhVLIu5L3w/s+VTLvm7If6jcMDNUjz8s2BPcJeCXg3TE
+STsyl6tvk17RRz2+9JskxVOk11xIn96xR4FCERIid2ek9z1xi7oYOajQF50i/9Gj
+ReDEfRSyb4/LzoKDOY+h4Q6jryeHh7WIHFiK5qrBN2y8qOoRJ/OqQnqci/BJBNpe
+g9Q9PJRgGSzRndTXNHiYRbeLpq7eGo3sPqlR9qBQ3rd98XGOU0RCMnzjKhENC3qo
+5PkSF2xs8RmjWktFSTDwjYo0qf1teo7CGHjgaPjQ7JE8Q4ysFOQndSWmLpqwDcI9
+HfIvPwUIWArQrJRh9LCNSyvHVgLqY9kw8NW4TlMxV2WqaYCkiKi3XVRrSFR3ahS1
+VBvRZ8KpplrV7rhXjVSSqqfLk1sX3l72Ck2F9ON+qbNFmvhgNjSiBY9neMgo804a
+wG/pag==
diff --git a/test/ocsp-tests/WKIC_D1_Issuer_ICA.pem b/test/ocsp-tests/WKIC_D1_Issuer_ICA.pem
new file mode 100644 (file)
index 0000000..93fb70d
--- /dev/null
@@ -0,0 +1,27 @@
+-----BEGIN CERTIFICATE-----
+MIIEhjCCA26gAwIBAgILBAAAAAABL07hXdQwDQYJKoZIhvcNAQEFBQAwTDEgMB4G
+A1UECxMXR2xvYmFsU2lnbiBSb290IENBIC0gUjIxEzARBgNVBAoTCkdsb2JhbFNp
+Z24xEzARBgNVBAMTCkdsb2JhbFNpZ24wHhcNMTEwNDEzMTAwMDAwWhcNMjIwNDEz
+MTAwMDAwWjBZMQswCQYDVQQGEwJCRTEZMBcGA1UEChMQR2xvYmFsU2lnbiBudi1z
+YTEvMC0GA1UEAxMmR2xvYmFsU2lnbiBFeHRlbmRlZCBWYWxpZGF0aW9uIENBIC0g
+RzIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDOoUbMUpq4pbR/WNnN
+2EugcgyXW6aIIMO5PUbc0FxSMPb6WU+FX7DbiLSpXysjSKyr9ZJ4FLYyD/tcaoVb
+AJDgu2X1WvlPZ37HbCnsk8ArysRe2LDb1r4/mwvAj6ldrvcAAqT8umYROHf+IyAl
+VRDFvYK5TLFoxuJwe4NcE2fBofN8C6iZmtDimyUxyCuNQPZSY7GgrVou9Xk2bTUs
+Dt0F5NDiB0i3KF4r1VjVbNAMoQFGAVqPxq9kx1UBXeHRxmxQJaAFrQCrDI1la93r
+wnJUyQ88ABeHIu/buYZ4FlGud9mmKE3zWI2DZ7k0JZscUYBR84OSaqOuR5rW5Isb
+wO2xAgMBAAGjggFaMIIBVjAOBgNVHQ8BAf8EBAMCAQYwEgYDVR0TAQH/BAgwBgEB
+/wIBADAdBgNVHQ4EFgQUsLBK/Rx1KPgcYaoT9vrBkD1rFqMwRwYDVR0gBEAwPjA8
+BgRVHSAAMDQwMgYIKwYBBQUHAgEWJmh0dHBzOi8vd3d3Lmdsb2JhbHNpZ24uY29t
+L3JlcG9zaXRvcnkvMDYGA1UdHwQvMC0wK6ApoCeGJWh0dHA6Ly9jcmwuZ2xvYmFs
+c2lnbi5uZXQvcm9vdC1yMi5jcmwwRAYIKwYBBQUHAQEEODA2MDQGCCsGAQUFBzAB
+hihodHRwOi8vb2NzcC5nbG9iYWxzaWduLmNvbS9FeHRlbmRlZFNTTENBMCkGA1Ud
+JQQiMCAGCCsGAQUFBwMBBggrBgEFBQcDAgYKKwYBBAGCNwoDAzAfBgNVHSMEGDAW
+gBSb4gdXZxwewGoG3lm0mi3f3BmGLjANBgkqhkiG9w0BAQUFAAOCAQEAL0m28rZa
+pJWrnlrpK4KbzJBrfHRFIOde2Mcj7ig1sTVlKqVR4FU/9oNntOQ2KbDa7JeVqYoF
+o0X+Iy5SiLQfEICt0oufo1+oxetz3nmIQZgz7qdgGLFGyUAQB5yPClLJExoGbqCb
+LTr2rk/no1E1KlsYBRLlUdy2NmLz4aQP++TPw5S/EauhWTEB8MxT7I9j12yW00gq
+iiPtRVaoZkHqAblH7qFHDBTxI+Egc8p9UHxkOFejj0qcm+ltRc9Ea01gIEBxJbVG
+qmwIft/I+shWKpLLg7h5CZctXqEBzgbttJfJBNxB7+BPNk3kQHNG7BESfIhbNCYl
+TercGL7FG81kwA==
+-----END CERTIFICATE-----
diff --git a/test/ocsp-tests/WKIC_D2_Issuer_Root.pem b/test/ocsp-tests/WKIC_D2_Issuer_Root.pem
new file mode 100644 (file)
index 0000000..61db7ae
--- /dev/null
@@ -0,0 +1,21 @@
+-----BEGIN CERTIFICATE-----
+MIIDdTCCAl2gAwIBAgILBAAAAAABFUtaw5QwDQYJKoZIhvcNAQEFBQAwVzELMAkG
+A1UEBhMCQkUxGTAXBgNVBAoTEEdsb2JhbFNpZ24gbnYtc2ExEDAOBgNVBAsTB1Jv
+b3QgQ0ExGzAZBgNVBAMTEkdsb2JhbFNpZ24gUm9vdCBDQTAeFw05ODA5MDExMjAw
+MDBaFw0yODAxMjgxMjAwMDBaMFcxCzAJBgNVBAYTAkJFMRkwFwYDVQQKExBHbG9i
+YWxTaWduIG52LXNhMRAwDgYDVQQLEwdSb290IENBMRswGQYDVQQDExJHbG9iYWxT
+aWduIFJvb3QgQ0EwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDbDuaZ
+jc6j40+Kfvvxi4Mla+pIH/EqsLmVEQS98GPR4mdmzxzdzxtIK+6NiY6arymAZavp
+xy0Sy6scTHAHoT0KMM0VjU/43dSMUBUc71DuxC73/OlS8pF94G3VNTCOXkNz8kHp
+1Wrjsok6Vjk4bwY8iGlbKk3Fp1S4bInMm/k8yuX9ifUSPJJ4ltbcdG6TRGHRjcdG
+snUOhugZitVtbNV4FpWi6cgKOOvyJBNPc1STE4U6G7weNLWLBYy5d4ux2x8gkasJ
+U26Qzns3dLlwR5EiUWMWea6xrkEmCMgZK9FGqkjWZCrXgzT/LCrBbBlDSgeF59N8
+9iFo7+ryUp9/k5DPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMBAf8E
+BTADAQH/MB0GA1UdDgQWBBRge2YaRQ2XyolQL30EzTSo//z9SzANBgkqhkiG9w0B
+AQUFAAOCAQEA1nPnfE920I2/7LqivjTFKDK1fPxsnCwrvQmeU79rXqoRSLblCKOz
+yj1hTdNGCbM+w6DjY1Ub8rrvrTnhQ7k4o+YviiY776BQVvnGCv04zcQLcFGUl5gE
+38NflNUVyRRBnMRddWQVDf9VMOyGj/8N7yy5Y0b2qvzfvGn9LhJIZJrglfCm7ymP
+AbEVtQwdpf5pLGkkeB6zpxxxYu7KyJesF12KwvhHhm4qxFYxldBniYUr+WymXUad
+DKqC5JlR3XC321Y9YeRq4VzW9v493kHMB65jUr9TU/Qr6cf9tveCX4XSQRjbgbME
+HMUfpIBvFSDJ3gyICh3WZlXi/EjJKSZp4A==
+-----END CERTIFICATE-----
diff --git a/test/ocsp-tests/WKIC_D3_Issuer_Root.pem b/test/ocsp-tests/WKIC_D3_Issuer_Root.pem
new file mode 100644 (file)
index 0000000..f03432b
--- /dev/null
@@ -0,0 +1,41 @@
+-----BEGIN CERTIFICATE-----
+MIIHPTCCBSWgAwIBAgIBADANBgkqhkiG9w0BAQQFADB5MRAwDgYDVQQKEwdSb290
+IENBMR4wHAYDVQQLExVodHRwOi8vd3d3LmNhY2VydC5vcmcxIjAgBgNVBAMTGUNB
+IENlcnQgU2lnbmluZyBBdXRob3JpdHkxITAfBgkqhkiG9w0BCQEWEnN1cHBvcnRA
+Y2FjZXJ0Lm9yZzAeFw0wMzAzMzAxMjI5NDlaFw0zMzAzMjkxMjI5NDlaMHkxEDAO
+BgNVBAoTB1Jvb3QgQ0ExHjAcBgNVBAsTFWh0dHA6Ly93d3cuY2FjZXJ0Lm9yZzEi
+MCAGA1UEAxMZQ0EgQ2VydCBTaWduaW5nIEF1dGhvcml0eTEhMB8GCSqGSIb3DQEJ
+ARYSc3VwcG9ydEBjYWNlcnQub3JnMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIIC
+CgKCAgEAzyLA4kZ97DYoB1CW8qAzQIxL8TtmPzHlawI229Z89vGIj053NgVBlfkJ
+8BLPRoZzYLdufujAWGSuzbCtRRcMY/pnCujW0r8+55jE8Ez64AO7NV1sId6eINm6
+zWYyN3L69wj1x81YyY7nDl7qPv4coRQKFWyGhFtkZip6qUtTefWIonvuLwphK42y
+fk1WpRPs6tqSnqxEQR5YYGUFZvjARL3LlPdCfgv3ZWiYUQXw8wWRBB0bF4LsyFe7
+w2t6iPGwcswlWyCR7BYCEo8y6RcYSNDHBS4CMEK4JZwFaz+qOqfrU0j36NK2B5jc
+G8Y0f3/JHIJ6BVgrCFvzOKKrF11myZjXnhCLotLddJr3cQxyYN/Nb5gznZY0dj4k
+epKwDpUeb+agRThHqtdB7Uq3EvbXG4OKDy7YCbZZ16oE/9KTfWgu3YtLq1i6L43q
+laegw1SJpfvbi1EinbLDvhG+LJGGi5Z4rSDTii8aP8bQUWWHIbEZAWV/RRyH9XzQ
+QUxPKZgh/TMfdQwEUfoZd9vUFBzugcMd9Zi3aQaRIt0AUMyBMawSB3s42mhb5ivU
+fslfrejrckzzAeVLIL+aplfKkQABi6F1ITe1Yw1nPkZPcCBnzsXWWdsC4PDSy826
+YreQQejdIOQpvGQpQsgi3Hia/0PsmBsJUUtaWsJx8cTLc6nloQsCAwEAAaOCAc4w
+ggHKMB0GA1UdDgQWBBQWtTIb1Mfz4OaO873SsDrusjkY0TCBowYDVR0jBIGbMIGY
+gBQWtTIb1Mfz4OaO873SsDrusjkY0aF9pHsweTEQMA4GA1UEChMHUm9vdCBDQTEe
+MBwGA1UECxMVaHR0cDovL3d3dy5jYWNlcnQub3JnMSIwIAYDVQQDExlDQSBDZXJ0
+IFNpZ25pbmcgQXV0aG9yaXR5MSEwHwYJKoZIhvcNAQkBFhJzdXBwb3J0QGNhY2Vy
+dC5vcmeCAQAwDwYDVR0TAQH/BAUwAwEB/zAyBgNVHR8EKzApMCegJaAjhiFodHRw
+czovL3d3dy5jYWNlcnQub3JnL3Jldm9rZS5jcmwwMAYJYIZIAYb4QgEEBCMWIWh0
+dHBzOi8vd3d3LmNhY2VydC5vcmcvcmV2b2tlLmNybDA0BglghkgBhvhCAQgEJxYl
+aHR0cDovL3d3dy5jYWNlcnQub3JnL2luZGV4LnBocD9pZD0xMDBWBglghkgBhvhC
+AQ0ESRZHVG8gZ2V0IHlvdXIgb3duIGNlcnRpZmljYXRlIGZvciBGUkVFIGhlYWQg
+b3ZlciB0byBodHRwOi8vd3d3LmNhY2VydC5vcmcwDQYJKoZIhvcNAQEEBQADggIB
+ACjH7pyCArpcgBLKNQodgW+JapnM8mgPf6fhjViVPr3yBsOQWqy1YPaZQwGjiHCc
+nWKdpIevZ1gNMDY75q1I08t0AoZxPuIrA2jxNGJARjtT6ij0rPtmlVOKTV39O9lg
+18p5aTuxZZKmxoGCXJzN600BiqXfEVWqFcofN8CCmHBh22p8lqOOLlQ+TyGpkO/c
+gr/c6EWtTZBzCDyUZbAEmXZ/4rzCahWqlwQ3JNgelE5tDlG+1sSPypZt90Pf6DBl
+Jzt7u0NDY8RD97LsaMzhGY4i+5jhe1o+ATc7iwiwovOVThrLm82asduycPAtStvY
+sONvRUgzEv/+PDIqVPfE94rwiCPCR/5kenHA0R6mY7AHfqQv0wGP3J8rtsYIqQ+T
+SCX8Ev2fQtzzxD72V7DX3WnRBnc0CkvSyqD/HMaMyRa+xMwyN2hzXwj7UfdJUzYF
+CpUCTPJ5GhD22Dp1nPMd8aINcGeGG7MW9S/lpOt5hvk9C8JzC6WZrG/8Z7jlLwum
+GCSNe9FINSkYQKyTYOGWhlC0elnYjyELn8+CkcY7v2vcB5G5l1YjqrZslMZIBjzk
+zk6q5PYvCdxTby78dOs6Y5nCpqyJvKeyRKANihDjbPIky/qbn3BHLt4Ui9SyIAmW
+omTxJBzcoTWcFbLUvFUufQb1nA5V9FrWk9p2rSVzTMVD
+-----END CERTIFICATE-----
diff --git a/test/ocsp-tests/WKIC_ND1_Issuer_ICA.pem b/test/ocsp-tests/WKIC_ND1_Issuer_ICA.pem
new file mode 100644 (file)
index 0000000..f0d9811
--- /dev/null
@@ -0,0 +1,29 @@
+-----BEGIN CERTIFICATE-----
+MIIFBjCCA+6gAwIBAgIQEaO00OyNt3+doM1dLVEvQjANBgkqhkiG9w0BAQUFADCB
+gTELMAkGA1UEBhMCR0IxGzAZBgNVBAgTEkdyZWF0ZXIgTWFuY2hlc3RlcjEQMA4G
+A1UEBxMHU2FsZm9yZDEaMBgGA1UEChMRQ09NT0RPIENBIExpbWl0ZWQxJzAlBgNV
+BAMTHkNPTU9ETyBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw0xMDA1MjQwMDAw
+MDBaFw0yMDA1MzAxMDQ4MzhaMIGOMQswCQYDVQQGEwJHQjEbMBkGA1UECBMSR3Jl
+YXRlciBNYW5jaGVzdGVyMRAwDgYDVQQHEwdTYWxmb3JkMRowGAYDVQQKExFDT01P
+RE8gQ0EgTGltaXRlZDE0MDIGA1UEAxMrQ09NT0RPIEV4dGVuZGVkIFZhbGlkYXRp
+b24gU2VjdXJlIFNlcnZlciBDQTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC
+ggEBAM1KljPNJY1n7iiWN4dG8PYEooR/U6qW5h+xAhxu7X0h1Nc8HqLYaS+ot/Wi
+7WRYZOFEZTZJQSABjTsT4gjzDPJXOZM3txyTRIOOvy3xoQV12m7ue28b6naDKHRK
+HCvT9cQDcpOvhs4JjDx11MkKL3Lzrb0OMDyEoXMfAyUUpY/D1vS15N2GevUZumjy
+hVSiMBHK0ZLLO3QGEqA3q2rYVBHfbJoWlLm0p2XGdC0x801S6VVRn8s+oo12mHDS
+b6ZlRS8bhbtbbfnywARmE4R6nc4n2PREnr+svpnba0/bWCGwiSe0jzLWS15ykV7f
+BZ3ZSS/0tm9QH3XLgJ3m0+TR8tMCAwEAAaOCAWkwggFlMB8GA1UdIwQYMBaAFAtY
+5YvGTBU3pECpMKkhvkc2Wlb/MB0GA1UdDgQWBBSIRFH/UCppXi2I9CG62Qzyzsvq
+fDAOBgNVHQ8BAf8EBAMCAQYwEgYDVR0TAQH/BAgwBgEB/wIBADA+BgNVHSAENzA1
+MDMGBFUdIAAwKzApBggrBgEFBQcCARYdaHR0cHM6Ly9zZWN1cmUuY29tb2RvLmNv
+bS9DUFMwSQYDVR0fBEIwQDA+oDygOoY4aHR0cDovL2NybC5jb21vZG9jYS5jb20v
+Q09NT0RPQ2VydGlmaWNhdGlvbkF1dGhvcml0eS5jcmwwdAYIKwYBBQUHAQEEaDBm
+MD4GCCsGAQUFBzAChjJodHRwOi8vY3J0LmNvbW9kb2NhLmNvbS9DT01PRE9BZGRU
+cnVzdFNlcnZlckNBLmNydDAkBggrBgEFBQcwAYYYaHR0cDovL29jc3AuY29tb2Rv
+Y2EuY29tMA0GCSqGSIb3DQEBBQUAA4IBAQCaQ7+vpHJezX1vf/T8PYy7cOYe3QT9
+P9ydn7+JdpvyhjH8f7PtKpFTLOKqsOPILHH3FYojHPFpLoH7sbxiC6saVBzZIl40
+TKX2Iw9dej3bQ81pfhc3Us1TocIR1FN4J2TViUFNFlW7kMvw2OTd3dMJZEgo/zIj
+hC+Me1UvzymINzR4DzOq/7fylqSbRIC1vmxWVKukgZ4lGChUOn8sY89ZIIwYazgs
+tN3t40DeDDYlV5rA0WCeXgNol64aO+pF11GZSe5EWVYLXrGPaOqKnsrSyaADfnAl
+9DLJTlCDh6I0SD1PNXf82Ijq9n0ezkO21cJqfjhmY03n7jLvDyToKmf6
+-----END CERTIFICATE-----
diff --git a/test/ocsp-tests/WKIC_ND2_Issuer_Root.pem b/test/ocsp-tests/WKIC_ND2_Issuer_Root.pem
new file mode 100644 (file)
index 0000000..14d35cf
--- /dev/null
@@ -0,0 +1,23 @@
+-----BEGIN CERTIFICATE-----
+MIID0DCCArigAwIBAgIQIKTEf93f4cdTYwcTiHdgEjANBgkqhkiG9w0BAQUFADCB
+gTELMAkGA1UEBhMCR0IxGzAZBgNVBAgTEkdyZWF0ZXIgTWFuY2hlc3RlcjEQMA4G
+A1UEBxMHU2FsZm9yZDEaMBgGA1UEChMRQ09NT0RPIENBIExpbWl0ZWQxJzAlBgNV
+BAMTHkNPTU9ETyBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw0xMTAxMDEwMDAw
+MDBaFw0zMDEyMzEyMzU5NTlaMIGBMQswCQYDVQQGEwJHQjEbMBkGA1UECBMSR3Jl
+YXRlciBNYW5jaGVzdGVyMRAwDgYDVQQHEwdTYWxmb3JkMRowGAYDVQQKExFDT01P
+RE8gQ0EgTGltaXRlZDEnMCUGA1UEAxMeQ09NT0RPIENlcnRpZmljYXRpb24gQXV0
+aG9yaXR5MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA0UCLi3LjkRv3
+UcEbVASY06m/weaKXTuH+7uIzg3jLz8GlvCiKVCZrts7oVewdFFxze1CkU1B/qnI
+2GqGd0S7WWaXUF601CxwRM/aN5VCaTwwxHGzUvAhTaHYujl8HJ6jJJ3ygxaYqhZ8
+Q5sVW7euNJH+1GImGEaaP+vB+fGQV+useg2L23IwambV4EajcNxo2f8ESIl33rXp
++2dtQem8Ob0y2WIC8bGoPW43nOIv4tOiJovGuFVDiOEjPqXSJDlqR6sA1KGzqSX+
+DT+nHbrTUcELpNqsOO9VUCQFZUaTNE8tja3G1CEZ0o7KBWFxB3NH5YoZEr0ETc5O
+nKVIrLsm9wIDAQABo0IwQDAdBgNVHQ4EFgQUC1jli8ZMFTekQKkwqSG+RzZaVv8w
+DgYDVR0PAQH/BAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQEFBQAD
+ggEBAC/JxBwHO89hAgCx2SFRdXIDMLDEFh9sAIsQrK/xR9SuEDwMGvjUk2ysEDd8
+t6aDZK3N3w6HM503sMZ7OHKx8xoOo/lVem0DZgMXlUrxsXrfViEGQo+x06iF3u6X
+HWLrp+cxEmbDD6ZLLkGC9/3JG6gbr+48zuOcrigHoSybJMIPIyaDMouGDx8rEkYl
+Fo92kANr3ryqImhrjKGsKxE5pttwwn1y6TPn/CbxdFqR5p2ErPioBhlG5qfpqjQi
+pKGfeq23sqSaM4hxAjwu1nqyH6LKwN0vEJT9s4yEIHlG1QXUEOTS22RPuFvuG8Ug
+R1uUq27UlTMdphVx8fiUylQ5PsE=
+-----END CERTIFICATE-----
diff --git a/test/ocsp-tests/WKIC_ND3_Issuer_Root.pem b/test/ocsp-tests/WKIC_ND3_Issuer_Root.pem
new file mode 100644 (file)
index 0000000..ba7fb8d
--- /dev/null
@@ -0,0 +1,25 @@
+-----BEGIN CERTIFICATE-----
+MIIENjCCAx6gAwIBAgIBATANBgkqhkiG9w0BAQUFADBvMQswCQYDVQQGEwJTRTEU
+MBIGA1UEChMLQWRkVHJ1c3QgQUIxJjAkBgNVBAsTHUFkZFRydXN0IEV4dGVybmFs
+IFRUUCBOZXR3b3JrMSIwIAYDVQQDExlBZGRUcnVzdCBFeHRlcm5hbCBDQSBSb290
+MB4XDTAwMDUzMDEwNDgzOFoXDTIwMDUzMDEwNDgzOFowbzELMAkGA1UEBhMCU0Ux
+FDASBgNVBAoTC0FkZFRydXN0IEFCMSYwJAYDVQQLEx1BZGRUcnVzdCBFeHRlcm5h
+bCBUVFAgTmV0d29yazEiMCAGA1UEAxMZQWRkVHJ1c3QgRXh0ZXJuYWwgQ0EgUm9v
+dDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBALj3GjPm8gAELTngTlvt
+H7xsD821+iO2zt6bETOXpClMfZOfvUq8k+0DGuOPz+VtUFrWlymUWoCwSXrbLpX9
+uMq/NzgtHj6RQa1wVsfwTz/oMp50ysiQVOnGXw94nZpAPA6sYapeFI+eh6FqUNzX
+mk6vBbOmcZSccbNQYArHE504B4YCqOmoaSYYkKtMsE8jqzpPhNjfzp/haW+710LX
+a0Tkx63ubUFfclpxCDezeWWkWaCUN/cALw3CknLa0Dhy2xSoRcRdKn23tNbE7qzN
+E0S3ySvdQwAl+mG5aWpYIxG3pzOPVnVZ9c0p10a3CitlttNCbxWyuHv77+ldU9U0
+WicCAwEAAaOB3DCB2TAdBgNVHQ4EFgQUrb2YejS0Jvf6xCZU7wO94CTLVBowCwYD
+VR0PBAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wgZkGA1UdIwSBkTCBjoAUrb2YejS0
+Jvf6xCZU7wO94CTLVBqhc6RxMG8xCzAJBgNVBAYTAlNFMRQwEgYDVQQKEwtBZGRU
+cnVzdCBBQjEmMCQGA1UECxMdQWRkVHJ1c3QgRXh0ZXJuYWwgVFRQIE5ldHdvcmsx
+IjAgBgNVBAMTGUFkZFRydXN0IEV4dGVybmFsIENBIFJvb3SCAQEwDQYJKoZIhvcN
+AQEFBQADggEBALCb4IUlwtYj4g+WBpKdQZic2YR5gdkeWxQHIzZlj7DYd7usQWxH
+YINRsPkyPef89iYTx4AWpb9a/IfPeHmJIZriTAcKhjW88t5RxNKWt9x+Tu5w/Rw5
+6wwCURQtjr0W4MHfRnXnJK3s9EK0hZNwEGe6nQY1ShjTK3rMUUKhemPR5ruhxSvC
+Nr4TDea9Y355e6cJDUCrat2PisP29owaQgVR1EX1n6diIWgVIEM8med8vSTYqZEX
+c4g/VhsxOBi0cQ+azcgOno4uG+GMmIPLHzHxREzGBHNJdmAPx/i9F4BrLunMTA5a
+mnkPIAou1Z5jJh5VkpTYghdae9C8x49OhgQ=
+-----END CERTIFICATE-----
diff --git a/test/ocsp-tests/WRID_D1.ors b/test/ocsp-tests/WRID_D1.ors
new file mode 100644 (file)
index 0000000..6589782
--- /dev/null
@@ -0,0 +1,32 @@
+MIIFzwoBAKCCBcgwggXEBgkrBgEFBQcwAQEEggW1MIIFsTCBoKIWBBRg2uQDFpGg
+Ywh4P1y2H9bZ2/BQNBgPMjAxMjEwMTExMTI1MjJaMHUwczBLMAkGBSsOAwIaBQAE
+FKByDqBqfGICVPKo9Z3Se6Tzty+kBBSwsEr9HHUo+BxhqhP2+sGQPWsWowISESG8
+vx4IzALnkqQG05AvM+2bgAAYDzIwMTIxMDExMTAwMDAwWqARGA8yMDEyMTAxODEw
+MDAwMFowCwYJKoZIhvcNAQEFA4IBAQAHQBPHdHWNzaFs5bfBvQcvxBWsDnsCFXNs
+a1fECiWDFNt6Nz4MCBY4rC7n0nhQfvg4m1woNcTAZVO8lacYomwUU/5/XpeFM6yc
+NeFcVbfVXA48GWPANitNQCwyRL5hGfIqNy1I9T1BHlBqYusmJKy65r2iqpmld/hD
+7S1dsCd4fXhjBQQORPmBqhKvWEU08Dh5aoaDAuaZoxRH8B1q+mUs0ODOIu34L84y
+JcxTKccd/HCwI8oxwLoBtyXSHb+dCzc7zSjFvQhbT5dOCvJNNe/fk6+EhMtQ6ybC
+D7p9EShCvU5jAdw54bZWk5wIQSvsWk9axUmYFFLYI3hAaoybpFVroIID+DCCA/Qw
+ggPwMIIC2KADAgECAhIRISdENsrz1CSWG3VIBwfQERQwDQYJKoZIhvcNAQEFBQAw
+WTELMAkGA1UEBhMCQkUxGTAXBgNVBAoTEEdsb2JhbFNpZ24gbnYtc2ExLzAtBgNV
+BAMTJkdsb2JhbFNpZ24gRXh0ZW5kZWQgVmFsaWRhdGlvbiBDQSAtIEcyMB4XDTEy
+MDkxOTA3NDA1MFoXDTEyMTIxOTA4NDA1MFowgYUxCzAJBgNVBAYTAkJFMRkwFwYD
+VQQKExBHbG9iYWxTaWduIG52LXNhMUIwQAYDVQQDEzlHbG9iYWxTaWduIEV4dGVu
+ZGVkIFZhbGlkYXRpb24gQ0EgLSBHMiBPQ1NQIHJlc3BvbmRlciAtIDIxFzAVBgNV
+BAUTDjIwMTIwOTE5MDk0MDAwMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKC
+AQEAnCgMsBO+IxIqCnXCOfXJoIC3wj+f0s4DV9h2gJBzisWXkaJD2DfNrd0kHUXK
+qVVPUxnA4G5iZu0Z385/KiOt1/P6vQ/Z2/AsEh/8Z/hIyeZCHL31wrSZW4yLeZwi
+M76wPiBHJxPun681HQlVs/OGKSHnbHc1XJAIeA/M8u+lLWqIKB+AJ82TrOqUMj1s
+LjGhQNs84xPliONN5K7DrEy+Y65X/rFxN77Smw+UtcH1GgH2NgaHH8dpt1m25sgm
+UxZWhdx66opB/lbRQwWdGt7MC0kJFaWHDZq64DTuYoekFYSxAFu0nd0EekEHEJEi
+9mquB9cv/96SuEJl8BcUWU/1LwIDAQABo4GEMIGBMAkGA1UdEwQCMAAwDgYDVR0P
+AQH/BAQDAgeAMBMGA1UdJQQMMAoGCCsGAQUFBwMJMA8GCSsGAQUFBzABBQQCBQAw
+HQYDVR0OBBYEFF/a5AMWkaBjCHg/XLYf1tnb8FA0MB8GA1UdIwQYMBaAFLCwSv0c
+dSj4HGGqE/b6wZA9axajMA0GCSqGSIb3DQEBBQUAA4IBAQCKRl1iXFmOQtLseDWP
+Y5icDDBGiRi17CGgvIzGJi/ha0PhbO+X0TmQIEnRX3Mu0Er/Mm4RZSjMtJ2iZRh3
+tGf4Dn+jKgKOmgXC3oOG/l8RPHLf0yaPSdn/z0TXtA30vTFBLlFeWnhbfhovea4+
+snPdBxLqWZdtxmiwojgqA7YATCWwavizrBr09YRyDwzgtpZ2BwMruGuFuV9FsEwL
+PCM53yFlrM32oFghyfyE5kYjgnnueKM+pw1kA0jgb1CnVJRrMEN1TXuXDAZLtHKG
+5X/drah1JtkoZhCzxzZ3bYdVDQJ90OHFqM58lwGD6z3XuPKrHDKZKt+CPIsl5g7p
+4J2l
diff --git a/test/ocsp-tests/WRID_D2.ors b/test/ocsp-tests/WRID_D2.ors
new file mode 100644 (file)
index 0000000..4e11e4b
--- /dev/null
@@ -0,0 +1,32 @@
+MIIF4AoBAKCCBdkwggXVBgkrBgEFBQcwAQEEggXGMIIFwjCBmaIWBBTrlwecTarB
+yVdbHxANRLCFYj1mqBgPMjAxMjEwMTExMTI1MjVaMG4wbDBEMAkGBSsOAwIaBQAE
+FLdXtbacB/gWIxOOkMkqDr4yAaoxBBRge2YaRQ2XyolQL30EzTSo//z9SwILBAAA
+AAABL07hRxCAABgPMjAxMjEwMDEwNjAwMDBaoBEYDzIwMTMwNDE1MDYwMDAwWjAL
+BgkqhkiG9w0BAQUDggEBAHThkPoy6eA7qX9y5C5b1ElRSwdjzsd15OJSqP2yjQbS
+Ol1K8DWtX0UhTfRH+CrIPoWL40g2HjXtIVeMD6s3hakYimZUenIJ/IRRSVWp+EXU
+MewgTVPz/wJN/9dJIkSbOI/BmpIGlaaBaLwcb39nJjZMq0sXj8jRI5i0isotOAFz
+Zc0R20viBEH099KuGktB2fKKEpVbbWPljTxKzkIBs9SXZBIqd/X2MWzQWcLKzhL0
+oynkvqxTFqNVjjZKcKSXPS/XEUufLrv/E3xQZYAfTJr778kFkyA8JzrXiH6W5DX6
+UbqsnO5DaPZvMDfvlQWETkoS1j+Qgu2mIWzdiw7sPrOgggQQMIIEDDCCBAgwggLw
+oAMCAQICCwQAAAAAAThXovYBMA0GCSqGSIb3DQEBBQUAMFcxCzAJBgNVBAYTAkJF
+MRkwFwYDVQQKExBHbG9iYWxTaWduIG52LXNhMRAwDgYDVQQLEwdSb290IENBMRsw
+GQYDVQQDExJHbG9iYWxTaWduIFJvb3QgQ0EwHhcNMTIwNzA1MTgwMDAwWhcNMTMw
+NzA1MTgwMDAwWjBZMQswCQYDVQQGEwJCRTEZMBcGA1UEChMQR2xvYmFsU2lnbiBu
+di1zYTEvMC0GA1UEAxMmR2xvYmFsU2lnbiBPQ1NQIGZvciBSb290IFIxIC0gQnJh
+bmNoIDEwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDP2QF8p0+Fb7ID
+MwwD1gEr2oazjqbW28EZr3YEyMPk+7VFaGePSO1xjBGIE48Q7m7d6p6ZXCzlBZEi
+oudrHSr3WDqdIVKLDrZIDkgEgdjJE72Hq6Pf5CEGXyebbODm4sV96EfewSvOOYLL
+866g3aoVhLDK02ny+Q5OsokW7nhnmGMMh10tZqR5VmdQTiw8MgeqUxBEaEO4WH2J
+ltgSsgNJBNBYuDgnn5ryzVqhvmCJvYZMYeN6qZFKy1MgHcR+wEpGLPlRL4ttu6e5
+MJrVta7dVFobHUHoFog97LtQT1PY0Ubaihswjge5O04bYeCrgSSjr1e4xH/KDxRw
+yyhoscaFAgMBAAGjgdIwgc8wDgYDVR0PAQH/BAQDAgeAMB0GA1UdDgQWBBTqlwec
+TarByVdbHxANRLCFYj1mqDBMBgNVHSAERTBDMEEGCSsGAQQBoDIBXzA0MDIGCCsG
+AQUFBwIBFiZodHRwczovL3d3dy5nbG9iYWxzaWduLmNvbS9yZXBvc2l0b3J5LzAJ
+BgNVHRMEAjAAMBMGA1UdJQQMMAoGCCsGAQUFBwMJMB8GA1UdIwQYMBaAFGB7ZhpF
+DZfKiVAvfQTNNKj//P1LMA8GCSsGAQUFBzABBQQCBQAwDQYJKoZIhvcNAQEFBQAD
+ggEBAHiC6N1uF29d7CmiVapA8Nr1xLSVeIkBd4A8yHsUTQ7ATI7bwT14QUV4awe7
+8cvmO5ZND8YG1ViwN162WFm9ivSoWBzvWDbU2JhQFb+XzrzCcdn0YbNiTxJh/vYm
+uDuxto00dpBgujSOAQv8B90iDEJ+sZpYRzDRj62qStRey0zpq5eX+pA+gdppMUFb
+4QvJf0El8TbLCWLN4TjrFe6ju7ZaN9zmgVYGQ2fMHKIGNScLuIA950nYwzRkIfHa
+YW6HqP1rCR1EiYmstEeCQyDxJx+RUlh+q8L1BKzaMYhS6s63MZzQuGseYStaCmbC
+fBIRKjnK621vAWvc7UR+0hqnZ+U=
diff --git a/test/ocsp-tests/WRID_D3.ors b/test/ocsp-tests/WRID_D3.ors
new file mode 100644 (file)
index 0000000..61e2d09
--- /dev/null
@@ -0,0 +1,38 @@
+MIIG8AoBAKCCBukwggblBgkrBgEFBQcwAQEEggbWMIIG0jCB+aF+MHwxCzAJBgNV
+BAYTAlVTMQwwCgYDVQQIEwNOU1cxDzANBgNVBAcTBlN5ZG5leTEUMBIGA1UEChML
+Q0FjZXJ0IEluYy4xHjAcBgNVBAsTFVNlcnZlciBBZG1pbmlzdHJhdGlvbjEYMBYG
+A1UEAxMPb2NzcC5jYWNlcnQub3JnGA8yMDEyMTAxMTEzMjE0MVowZjBkMDwwCQYF
+Kw4DAhoFAAQUi6TJyxcpGUU+u45zCZG5JfKDImUEFBa1MhvUx/Pg5o7zvdKwOu6y
+ORjRAgMLs8aAABgPMjAxMjEwMTExMjQyMTZaoBEYDzIwMTIxMDEzMTMyMTQxWjAN
+BgkqhkiG9w0BAQUFAAOCAQEAEWd9kKEfaurOXDV98OVtU27TmK4L4MeGEPdkg1i+
+fbPMe1mouWlVm23W6yaM7mM2NMXLW+hTNzqfyMPM7rByXNaFAAniCPTXNO3eJRIA
+Zf0F10OSdBQ/ln4igHQCVZCnXR30/aP5/PMb4u3/LTuC9aW6K7mLXcuCvJztGnXO
+v3r64q/qTGG/b4eS65exykV9riSFuGp1rzLAy5fSYTBWTOBQ679PFjQnL60GkrZA
+Egtxw2ozEDwo+X0WamEouxN8mjX/VQlMdEbykUFDuPD3vZydZ04BV9f18RJZOU9j
+gCwMzd9gb4jUL4ykdWiLmO+YPDWFyNSYEIfnGgk1VvPHuaCCBL4wggS6MIIEtjCC
+Ap6gAwIBAgIDCpvzMA0GCSqGSIb3DQEBBQUAMHkxEDAOBgNVBAoTB1Jvb3QgQ0Ex
+HjAcBgNVBAsTFWh0dHA6Ly93d3cuY2FjZXJ0Lm9yZzEiMCAGA1UEAxMZQ0EgQ2Vy
+dCBTaWduaW5nIEF1dGhvcml0eTEhMB8GCSqGSIb3DQEJARYSc3VwcG9ydEBjYWNl
+cnQub3JnMB4XDTExMDgyMzAwMDI1NloXDTEzMDgyMjAwMDI1NlowfDELMAkGA1UE
+BhMCQVUxDDAKBgNVBAgTA05TVzEPMA0GA1UEBxMGU3lkbmV5MRQwEgYDVQQKEwtD
+QWNlcnQgSW5jLjEeMBwGA1UECxMVU2VydmVyIEFkbWluaXN0cmF0aW9uMRgwFgYD
+VQQDEw9vY3NwLmNhY2VydC5vcmcwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEK
+AoIBAQCcxtRv5CPHw3BLdR/k/K72YsRgodbP+UdAONmvBvWzhwm6B8h6O+M64sFr
+2w6be7SYBECIyOQgNJ1flK4MoAWhdBA/H5NtxaDOKbAqA27tO9GaevcPp7c518O0
+3hVnlPLvsN1f48nY0jQOXUTfv5nYXmD0OSSK/V3IRo0KsWB6T9UnMGCeEwb4Oqqz
+uzM0b4SBflzMEony/m6Tg/qL7qs2TLZAqe77+BZaVdFkDUnaBN7RyMruXySxeXiz
+mogT3WhROeloMa/X+E01bWBYBEK7VZIY9pgBpXQ7vDbbIGgYuIXUi20wh03WMy16
+VDYdV0IUXHpidNUeK9W/BPP/7APBAgMBAAGjRDBCMAwGA1UdEwEB/wQCMAAwJwYD
+VR0lBCAwHgYIKwYBBQUHAwIGCCsGAQUFBwMBBggrBgEFBQcDCTAJBgNVHREEAjAA
+MA0GCSqGSIb3DQEBBQUAA4ICAQAoT6p5f3cGprAcgrnzdenfTmDe9LCW7k2VnazA
+MAzpsD6gXcSlo4+3hoHem/SpKRH2tqi34DmImCiv/S6fxsKM4Gfn5rlkAFviuTvS
+r5Zrwh4ZKSfaoWv4bmbzmcAxvuxdMWHf/5PbjegjzFTbBMekVPZY/abYtD6kdHQZ
+VNgzwZVfTBfYhfa+Rg72I2zjKpMsjxMqWfTmUzW6wfK6LFudZqu0U1NnJw+IlnVU
+6WtjL885ebQrmcRqWz3nMhVLIu5L3w/s+VTLvm7If6jcMDNUjz8s2BPcJeCXg3TE
+STsyl6tvk17RRz2+9JskxVOk11xIn96xR4FCERIid2ek9z1xi7oYOajQF50i/9Gj
+ReDEfRSyb4/LzoKDOY+h4Q6jryeHh7WIHFiK5qrBN2y8qOoRJ/OqQnqci/BJBNpe
+g9Q9PJRgGSzRndTXNHiYRbeLpq7eGo3sPqlR9qBQ3rd98XGOU0RCMnzjKhENC3qo
+5PkSF2xs8RmjWktFSTDwjYo0qf1teo7CGHjgaPjQ7JE8Q4ysFOQndSWmLpqwDcI9
+HfIvPwUIWArQrJRh9LCNSyvHVgLqY9kw8NW4TlMxV2WqaYCkiKi3XVRrSFR3ahS1
+VBvRZ8KpplrV7rhXjVSSqqfLk1sX3l72Ck2F9ON+qbNFmvhgNjSiBY9neMgo804a
+wG/pag==
diff --git a/test/ocsp-tests/WRID_ND1.ors b/test/ocsp-tests/WRID_ND1.ors
new file mode 100644 (file)
index 0000000..b6fadc5
--- /dev/null
@@ -0,0 +1,10 @@
+MIIB0woBAKCCAcwwggHIBgkrBgEFBQcwAQEEggG5MIIBtTCBnqIWBBSJRFH/UCpp
+Xi2I9CG62QzyzsvqfBgPMjAxMjEwMTEwODQxMTNaMHMwcTBJMAkGBSsOAwIaBQAE
+FEi2DTgjjfhFbk7lhD6jlBEYApefBBSIRFH/UCppXi2I9CG62QzyzsvqfAIQIuEz
+IiCgSN8psr+aMcKbB4AAGA8yMDEyMTAxMTA4NDExM1qgERgPMjAxMjEwMTUwODQx
+MTNaMA0GCSqGSIb3DQEBBQUAA4IBAQCNnhlBMxxh9z5AKfzAxiKs90CfxUsqfYfk
+8XlyF9VIfWRfEwzS6MF1pEzLnghRxTAmjrFgK+sxD9wk+S5Mdgw3nbED9DVFH2Hs
+RGKm/t9wkvrYOX6yRQqw6uRvU/5cibMjcyzKB/VQMwk4p4FwSUgBv88A5sTkKr2V
+eYdEm34hg2TZVkipPMBiyTyBLXs8D/9oALtnczg4xlTRSjDUvqoXL5haqY4QK2Pv
+mNwna6ACkwLmSuMe29UQ8IX2PUB4R5Etni5czyiKGxZLm+4NAhuEwWFNEzCyImPc
+087gHGU1zx+qVSlajqMJ/9ZXYjbt7WiWdhOTGEv4VMn8dHhRUs32
diff --git a/test/ocsp-tests/WRID_ND2.ors b/test/ocsp-tests/WRID_ND2.ors
new file mode 100644 (file)
index 0000000..251f0df
--- /dev/null
@@ -0,0 +1,10 @@
+MIIB0woBAKCCAcwwggHIBgkrBgEFBQcwAQEEggG5MIIBtTCBnqIWBBQMWOWLxkwV
+N6RAqTCpIb5HNlpW/xgPMjAxMjEwMTAyMzAzMTlaMHMwcTBJMAkGBSsOAwIaBQAE
+FOy+ZAvtiWulchtVZmfKU1ZI9ewTBBQLWOWLxkwVN6RAqTCpIb5HNlpW/wIQEaO0
+0OyNt3+doM1dLVEvQoAAGA8yMDEyMTAxMDIzMDMxOVqgERgPMjAxMjEwMTQyMzAz
+MTlaMA0GCSqGSIb3DQEBBQUAA4IBAQCHn2nGfEUX/EJruMkTgh7GgB0u9cpAepaD
+sPv9gtl3KLUZyR+NbGMIa5/bpoJp0yg1z5VL6CLMusy3AF6Cn2fyaioDxG+yc+gA
+PcPFdEqiIMr+TP8s7qcEiE6WZddSSCqCn90VZSCWkpDhnCjDRwJLBBPU3803fdMz
+oguvyr7y6Koxik8X/iUe8EpSzAvmm4GZL3veTI+x7IezJSrhCS9zM0ZHjySjoDxC
++ljGH0EuWPTmFEqZVGIq3cuahIYzKItUbYnXU6ipi/2p42qbsFeok7eEN0EYsY1a
+vRATHGRmU7Q5HLCq4rQtZC1cis52Mvc9x1W4z/Gt5A3FtgElXXNA
diff --git a/test/ocsp-tests/WRID_ND3.ors b/test/ocsp-tests/WRID_ND3.ors
new file mode 100644 (file)
index 0000000..19641f5
--- /dev/null
@@ -0,0 +1,10 @@
+MIIB1AoBAKCCAc0wggHJBgkrBgEFBQcwAQEEggG6MIIBtjCBn6IWBBSuvZh6NLQm
+9/rEJlTvA73gJMtUGhgPMjAxMjEwMTAxMzA3NDZaMHQwcjBKMAkGBSsOAwIaBQAE
+FHyxZlScq9tE7mImFq30ZXv3etWUBBStvZh6NLQm9/rEJlTvA73gJMtUGgIRAKcN
+bJWejX5BTb8DmevkCauAABgPMjAxMjEwMTAxMzA3NDZaoBEYDzIwMTIxMDE0MTMw
+NzQ2WjANBgkqhkiG9w0BAQUFAAOCAQEAA70+GYJoFuUBwIN9KHMqmOOtnmoLBBlm
+HL2Su70ZEqSmL4zTt3iHY3m2YaNYSPphgDlQ4lY8zGAkCSrZ3ulpJun3RRy+gD29
+0ks155tChMbYNZrFm46vKWabBjh2p+623daymlcbgizi5Z+P4oJL68VrOqh+DArE
+MpHH16BTGaF+bAjzTRSbS90xUReqwnnEpRBrmcQVo4uKpSkbyrx7iMLqsJ2vGpgh
+xqj1kNPT9g3+gegmdU9QpFV0l9ZV8X/f0uz5nT4I0NL81d/KDHGx2rd+bftLODeL
+ZAWAzFbr5B5EMqPGoh/SQXpcuVOqMHjh8fi8PBXBcitlIFzdDKXDvA==
diff --git a/test/ocsp-tests/WSNIC_D1_Issuer_ICA.pem b/test/ocsp-tests/WSNIC_D1_Issuer_ICA.pem
new file mode 100644 (file)
index 0000000..3f1c053
--- /dev/null
@@ -0,0 +1,27 @@
+-----BEGIN CERTIFICATE-----
+MIIEhjCCA26gAwIBAgILBAAAAAABL07hXdQwDQYJKoZIhvcNAQEFBQAwTDEgMB4G
+A1UECxMXR2xvYmFsU2lnbiBSb290IENBIC0gUjIxEzARBgNVBAoTCkdsb2JhbFNp
+Z24xEzARBgNVBAMTCkdsb2JhbFNpZ24wHhcNMTEwNDEzMTAwMDAwWhcNMjIwNDEz
+MTAwMDAwWjBZMQswCQYDVQQGEwJVUzEZMBcGA1UEChMQR2xvYmFsU2lnbiBudi1z
+YTEvMC0GA1UEAxMmR2xvYmFsU2lnbiBFeHRlbmRlZCBWYWxpZGF0aW9uIENBIC0g
+RzIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDNoUbMUpq4pbR/WNnN
+2EugcgyXW6aIIMO5PUbc0FxSMPb6WU+FX7DbiLSpXysjSKyr9ZJ4FLYyD/tcaoVb
+AJDgu2X1WvlPZ37HbCnsk8ArysRe2LDb1r4/mwvAj6ldrvcAAqT8umYROHf+IyAl
+VRDFvYK5TLFoxuJwe4NcE2fBofN8C6iZmtDimyUxyCuNQPZSY7GgrVou9Xk2bTUs
+Dt0F5NDiB0i3KF4r1VjVbNAMoQFGAVqPxq9kx1UBXeHRxmxQJaAFrQCrDI1la93r
+wnJUyQ88ABeHIu/buYZ4FlGud9mmKE3zWI2DZ7k0JZscUYBR84OSaqOuR5rW5Isb
+wO2xAgMBAAGjggFaMIIBVjAOBgNVHQ8BAf8EBAMCAQYwEgYDVR0TAQH/BAgwBgEB
+/wIBADAdBgNVHQ4EFgQUsLBK/Rx1KPgcYaoT9vrBkD1rFqMwRwYDVR0gBEAwPjA8
+BgRVHSAAMDQwMgYIKwYBBQUHAgEWJmh0dHBzOi8vd3d3Lmdsb2JhbHNpZ24uY29t
+L3JlcG9zaXRvcnkvMDYGA1UdHwQvMC0wK6ApoCeGJWh0dHA6Ly9jcmwuZ2xvYmFs
+c2lnbi5uZXQvcm9vdC1yMi5jcmwwRAYIKwYBBQUHAQEEODA2MDQGCCsGAQUFBzAB
+hihodHRwOi8vb2NzcC5nbG9iYWxzaWduLmNvbS9FeHRlbmRlZFNTTENBMCkGA1Ud
+JQQiMCAGCCsGAQUFBwMBBggrBgEFBQcDAgYKKwYBBAGCNwoDAzAfBgNVHSMEGDAW
+gBSb4gdXZxwewGoG3lm0mi3f3BmGLjANBgkqhkiG9w0BAQUFAAOCAQEAL0m28rZa
+pJWrnlrpK4KbzJBrfHRFIOde2Mcj7ig1sTVlKqVR4FU/9oNntOQ2KbDa7JeVqYoF
+o0X+Iy5SiLQfEICt0oufo1+oxetz3nmIQZgz7qdgGLFGyUAQB5yPClLJExoGbqCb
+LTr2rk/no1E1KlsYBRLlUdy2NmLz4aQP++TPw5S/EauhWTEB8MxT7I9j12yW00gq
+iiPtRVaoZkHqAblH7qFHDBTxI+Egc8p9UHxkOFejj0qcm+ltRc9Ea01gIEBxJbVG
+qmwIft/I+shWKpLLg7h5CZctXqEBzgbttJfJBNxB7+BPNk3kQHNG7BESfIhbNCYl
+TercGL7FG81kwA==
+-----END CERTIFICATE-----
diff --git a/test/ocsp-tests/WSNIC_D2_Issuer_Root.pem b/test/ocsp-tests/WSNIC_D2_Issuer_Root.pem
new file mode 100644 (file)
index 0000000..af1b8b0
--- /dev/null
@@ -0,0 +1,21 @@
+-----BEGIN CERTIFICATE-----
+MIIDdTCCAl2gAwIBAgILBAAAAAABFUtaw5QwDQYJKoZIhvcNAQEFBQAwVzELMAkG
+A1UEBhMCVVMxGTAXBgNVBAoTEEdsb2JhbFNpZ24gbnYtc2ExEDAOBgNVBAsTB1Jv
+b3QgQ0ExGzAZBgNVBAMTEkdsb2JhbFNpZ24gUm9vdCBDQTAeFw05ODA5MDExMjAw
+MDBaFw0yODAxMjgxMjAwMDBaMFcxCzAJBgNVBAYTAlVTMRkwFwYDVQQKExBHbG9i
+YWxTaWduIG52LXNhMRAwDgYDVQQLEwdSb290IENBMRswGQYDVQQDExJHbG9iYWxT
+aWduIFJvb3QgQ0EwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDaDuaZ
+jc6j40+Kfvvxi4Mla+pIH/EqsLmVEQS98GPR4mdmzxzdzxtIK+6NiY6arymAZavp
+xy0Sy6scTHAHoT0KMM0VjU/43dSMUBUc71DuxC73/OlS8pF94G3VNTCOXkNz8kHp
+1Wrjsok6Vjk4bwY8iGlbKk3Fp1S4bInMm/k8yuX9ifUSPJJ4ltbcdG6TRGHRjcdG
+snUOhugZitVtbNV4FpWi6cgKOOvyJBNPc1STE4U6G7weNLWLBYy5d4ux2x8gkasJ
+U26Qzns3dLlwR5EiUWMWea6xrkEmCMgZK9FGqkjWZCrXgzT/LCrBbBlDSgeF59N8
+9iFo7+ryUp9/k5DPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMBAf8E
+BTADAQH/MB0GA1UdDgQWBBRge2YaRQ2XyolQL30EzTSo//z9SzANBgkqhkiG9w0B
+AQUFAAOCAQEA1nPnfE920I2/7LqivjTFKDK1fPxsnCwrvQmeU79rXqoRSLblCKOz
+yj1hTdNGCbM+w6DjY1Ub8rrvrTnhQ7k4o+YviiY776BQVvnGCv04zcQLcFGUl5gE
+38NflNUVyRRBnMRddWQVDf9VMOyGj/8N7yy5Y0b2qvzfvGn9LhJIZJrglfCm7ymP
+AbEVtQwdpf5pLGkkeB6zpxxxYu7KyJesF12KwvhHhm4qxFYxldBniYUr+WymXUad
+DKqC5JlR3XC321Y9YeRq4VzW9v493kHMB65jUr9TU/Qr6cf9tveCX4XSQRjbgbME
+HMUfpIBvFSDJ3gyICh3WZlXi/EjJKSZp4A==
+-----END CERTIFICATE-----
diff --git a/test/ocsp-tests/WSNIC_D3_Issuer_Root.pem b/test/ocsp-tests/WSNIC_D3_Issuer_Root.pem
new file mode 100644 (file)
index 0000000..764797a
--- /dev/null
@@ -0,0 +1,41 @@
+-----BEGIN CERTIFICATE-----
+MIIHPTCCBSWgAwIBAgIBADANBgkqhkiG9w0BAQQFADB5MRAwDgYDVQQKEwdUZXN0
+IENBMR4wHAYDVQQLExVodHRwOi8vd3d3LmNhY2VydC5vcmcxIjAgBgNVBAMTGUNB
+IENlcnQgU2lnbmluZyBBdXRob3JpdHkxITAfBgkqhkiG9w0BCQEWEnN1cHBvcnRA
+Y2FjZXJ0Lm9yZzAeFw0wMzAzMzAxMjI5NDlaFw0zMzAzMjkxMjI5NDlaMHkxEDAO
+BgNVBAoTB1Rlc3QgQ0ExHjAcBgNVBAsTFWh0dHA6Ly93d3cuY2FjZXJ0Lm9yZzEi
+MCAGA1UEAxMZQ0EgQ2VydCBTaWduaW5nIEF1dGhvcml0eTEhMB8GCSqGSIb3DQEJ
+ARYSc3VwcG9ydEBjYWNlcnQub3JnMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIIC
+CgKCAgEAziLA4kZ97DYoB1CW8qAzQIxL8TtmPzHlawI229Z89vGIj053NgVBlfkJ
+8BLPRoZzYLdufujAWGSuzbCtRRcMY/pnCujW0r8+55jE8Ez64AO7NV1sId6eINm6
+zWYyN3L69wj1x81YyY7nDl7qPv4coRQKFWyGhFtkZip6qUtTefWIonvuLwphK42y
+fk1WpRPs6tqSnqxEQR5YYGUFZvjARL3LlPdCfgv3ZWiYUQXw8wWRBB0bF4LsyFe7
+w2t6iPGwcswlWyCR7BYCEo8y6RcYSNDHBS4CMEK4JZwFaz+qOqfrU0j36NK2B5jc
+G8Y0f3/JHIJ6BVgrCFvzOKKrF11myZjXnhCLotLddJr3cQxyYN/Nb5gznZY0dj4k
+epKwDpUeb+agRThHqtdB7Uq3EvbXG4OKDy7YCbZZ16oE/9KTfWgu3YtLq1i6L43q
+laegw1SJpfvbi1EinbLDvhG+LJGGi5Z4rSDTii8aP8bQUWWHIbEZAWV/RRyH9XzQ
+QUxPKZgh/TMfdQwEUfoZd9vUFBzugcMd9Zi3aQaRIt0AUMyBMawSB3s42mhb5ivU
+fslfrejrckzzAeVLIL+aplfKkQABi6F1ITe1Yw1nPkZPcCBnzsXWWdsC4PDSy826
+YreQQejdIOQpvGQpQsgi3Hia/0PsmBsJUUtaWsJx8cTLc6nloQsCAwEAAaOCAc4w
+ggHKMB0GA1UdDgQWBBQWtTIb1Mfz4OaO873SsDrusjkY0TCBowYDVR0jBIGbMIGY
+gBQWtTIb1Mfz4OaO873SsDrusjkY0aF9pHsweTEQMA4GA1UEChMHUm9vdCBDQTEe
+MBwGA1UECxMVaHR0cDovL3d3dy5jYWNlcnQub3JnMSIwIAYDVQQDExlDQSBDZXJ0
+IFNpZ25pbmcgQXV0aG9yaXR5MSEwHwYJKoZIhvcNAQkBFhJzdXBwb3J0QGNhY2Vy
+dC5vcmeCAQAwDwYDVR0TAQH/BAUwAwEB/zAyBgNVHR8EKzApMCegJaAjhiFodHRw
+czovL3d3dy5jYWNlcnQub3JnL3Jldm9rZS5jcmwwMAYJYIZIAYb4QgEEBCMWIWh0
+dHBzOi8vd3d3LmNhY2VydC5vcmcvcmV2b2tlLmNybDA0BglghkgBhvhCAQgEJxYl
+aHR0cDovL3d3dy5jYWNlcnQub3JnL2luZGV4LnBocD9pZD0xMDBWBglghkgBhvhC
+AQ0ESRZHVG8gZ2V0IHlvdXIgb3duIGNlcnRpZmljYXRlIGZvciBGUkVFIGhlYWQg
+b3ZlciB0byBodHRwOi8vd3d3LmNhY2VydC5vcmcwDQYJKoZIhvcNAQEEBQADggIB
+ACjH7pyCArpcgBLKNQodgW+JapnM8mgPf6fhjViVPr3yBsOQWqy1YPaZQwGjiHCc
+nWKdpIevZ1gNMDY75q1I08t0AoZxPuIrA2jxNGJARjtT6ij0rPtmlVOKTV39O9lg
+18p5aTuxZZKmxoGCXJzN600BiqXfEVWqFcofN8CCmHBh22p8lqOOLlQ+TyGpkO/c
+gr/c6EWtTZBzCDyUZbAEmXZ/4rzCahWqlwQ3JNgelE5tDlG+1sSPypZt90Pf6DBl
+Jzt7u0NDY8RD97LsaMzhGY4i+5jhe1o+ATc7iwiwovOVThrLm82asduycPAtStvY
+sONvRUgzEv/+PDIqVPfE94rwiCPCR/5kenHA0R6mY7AHfqQv0wGP3J8rtsYIqQ+T
+SCX8Ev2fQtzzxD72V7DX3WnRBnc0CkvSyqD/HMaMyRa+xMwyN2hzXwj7UfdJUzYF
+CpUCTPJ5GhD22Dp1nPMd8aINcGeGG7MW9S/lpOt5hvk9C8JzC6WZrG/8Z7jlLwum
+GCSNe9FINSkYQKyTYOGWhlC0elnYjyELn8+CkcY7v2vcB5G5l1YjqrZslMZIBjzk
+zk6q5PYvCdxTby78dOs6Y5nCpqyJvKeyRKANihDjbPIky/qbn3BHLt4Ui9SyIAmW
+omTxJBzcoTWcFbLUvFUufQb1nA5V9FrWk9p2rSVzTMVD
+-----END CERTIFICATE-----
diff --git a/test/ocsp-tests/WSNIC_ND1_Issuer_ICA.pem b/test/ocsp-tests/WSNIC_ND1_Issuer_ICA.pem
new file mode 100644 (file)
index 0000000..06b6908
--- /dev/null
@@ -0,0 +1,29 @@
+-----BEGIN CERTIFICATE-----
+MIIFBjCCA+6gAwIBAgIQEaO00OyNt3+doM1dLVEvQjANBgkqhkiG9w0BAQUFADCB
+gTELMAkGA1UEBhMCR0IxGzAZBgNVBAgTEkdyZWF0ZXIgTWFuY2hlc3RlcjEQMA4G
+A1UEBxMHU2FsZm9yZDEaMBgGA1UEChMRQ09NT0RPIENBIExpbWl0ZWQxJzAlBgNV
+BAMTHkNPTU9ETyBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw0xMDA1MjQwMDAw
+MDBaFw0yMDA1MzAxMDQ4MzhaMIGOMQswCQYDVQQGEwJVUzEbMBkGA1UECBMSR3Jl
+YXRlciBNYW5jaGVzdGVyMRAwDgYDVQQHEwdTYWxmb3JkMRowGAYDVQQKExFDT01P
+RE8gQ0EgTGltaXRlZDE0MDIGA1UEAxMrQ09NT0RPIEV4dGVuZGVkIFZhbGlkYXRp
+b24gU2VjdXJlIFNlcnZlciBDQTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC
+ggEBAMxKljPNJY1n7iiWN4dG8PYEooR/U6qW5h+xAhxu7X0h1Nc8HqLYaS+ot/Wi
+7WRYZOFEZTZJQSABjTsT4gjzDPJXOZM3txyTRIOOvy3xoQV12m7ue28b6naDKHRK
+HCvT9cQDcpOvhs4JjDx11MkKL3Lzrb0OMDyEoXMfAyUUpY/D1vS15N2GevUZumjy
+hVSiMBHK0ZLLO3QGEqA3q2rYVBHfbJoWlLm0p2XGdC0x801S6VVRn8s+oo12mHDS
+b6ZlRS8bhbtbbfnywARmE4R6nc4n2PREnr+svpnba0/bWCGwiSe0jzLWS15ykV7f
+BZ3ZSS/0tm9QH3XLgJ3m0+TR8tMCAwEAAaOCAWkwggFlMB8GA1UdIwQYMBaAFAtY
+5YvGTBU3pECpMKkhvkc2Wlb/MB0GA1UdDgQWBBSIRFH/UCppXi2I9CG62Qzyzsvq
+fDAOBgNVHQ8BAf8EBAMCAQYwEgYDVR0TAQH/BAgwBgEB/wIBADA+BgNVHSAENzA1
+MDMGBFUdIAAwKzApBggrBgEFBQcCARYdaHR0cHM6Ly9zZWN1cmUuY29tb2RvLmNv
+bS9DUFMwSQYDVR0fBEIwQDA+oDygOoY4aHR0cDovL2NybC5jb21vZG9jYS5jb20v
+Q09NT0RPQ2VydGlmaWNhdGlvbkF1dGhvcml0eS5jcmwwdAYIKwYBBQUHAQEEaDBm
+MD4GCCsGAQUFBzAChjJodHRwOi8vY3J0LmNvbW9kb2NhLmNvbS9DT01PRE9BZGRU
+cnVzdFNlcnZlckNBLmNydDAkBggrBgEFBQcwAYYYaHR0cDovL29jc3AuY29tb2Rv
+Y2EuY29tMA0GCSqGSIb3DQEBBQUAA4IBAQCaQ7+vpHJezX1vf/T8PYy7cOYe3QT9
+P9ydn7+JdpvyhjH8f7PtKpFTLOKqsOPILHH3FYojHPFpLoH7sbxiC6saVBzZIl40
+TKX2Iw9dej3bQ81pfhc3Us1TocIR1FN4J2TViUFNFlW7kMvw2OTd3dMJZEgo/zIj
+hC+Me1UvzymINzR4DzOq/7fylqSbRIC1vmxWVKukgZ4lGChUOn8sY89ZIIwYazgs
+tN3t40DeDDYlV5rA0WCeXgNol64aO+pF11GZSe5EWVYLXrGPaOqKnsrSyaADfnAl
+9DLJTlCDh6I0SD1PNXf82Ijq9n0ezkO21cJqfjhmY03n7jLvDyToKmf6
+-----END CERTIFICATE-----
diff --git a/test/ocsp-tests/WSNIC_ND2_Issuer_Root.pem b/test/ocsp-tests/WSNIC_ND2_Issuer_Root.pem
new file mode 100644 (file)
index 0000000..1b46fcf
--- /dev/null
@@ -0,0 +1,23 @@
+-----BEGIN CERTIFICATE-----
+MIID0DCCArigAwIBAgIQIKTEf93f4cdTYwcTiHdgEjANBgkqhkiG9w0BAQUFADCB
+gTELMAkGA1UEBhMCR0IxGzAZBgNVBAgTEkdyZWF0ZXIgTWFuY2hlc3RlcjEQMA4G
+A1UEBxMHU2FsZm9yZDEaMBgGA1UEChMRQ09NT0RPIENBIExpbWl0ZWQxJzAlBgNV
+BAMTHkNPTU9ETyBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw0xMTAxMDEwMDAw
+MDBaFw0zMDEyMzEyMzU5NTlaMIGBMQswCQYDVQQGEwJVUzEbMBkGA1UECBMSR3Jl
+YXRlciBNYW5jaGVzdGVyMRAwDgYDVQQHEwdTYWxmb3JkMRowGAYDVQQKExFDT01P
+RE8gQ0EgTGltaXRlZDEnMCUGA1UEAxMeQ09NT0RPIENlcnRpZmljYXRpb24gQXV0
+aG9yaXR5MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA0ECLi3LjkRv3
+UcEbVASY06m/weaKXTuH+7uIzg3jLz8GlvCiKVCZrts7oVewdFFxze1CkU1B/qnI
+2GqGd0S7WWaXUF601CxwRM/aN5VCaTwwxHGzUvAhTaHYujl8HJ6jJJ3ygxaYqhZ8
+Q5sVW7euNJH+1GImGEaaP+vB+fGQV+useg2L23IwambV4EajcNxo2f8ESIl33rXp
++2dtQem8Ob0y2WIC8bGoPW43nOIv4tOiJovGuFVDiOEjPqXSJDlqR6sA1KGzqSX+
+DT+nHbrTUcELpNqsOO9VUCQFZUaTNE8tja3G1CEZ0o7KBWFxB3NH5YoZEr0ETc5O
+nKVIrLsm9wIDAQABo0IwQDAdBgNVHQ4EFgQUC1jli8ZMFTekQKkwqSG+RzZaVv8w
+DgYDVR0PAQH/BAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQEFBQAD
+ggEBAC/JxBwHO89hAgCx2SFRdXIDMLDEFh9sAIsQrK/xR9SuEDwMGvjUk2ysEDd8
+t6aDZK3N3w6HM503sMZ7OHKx8xoOo/lVem0DZgMXlUrxsXrfViEGQo+x06iF3u6X
+HWLrp+cxEmbDD6ZLLkGC9/3JG6gbr+48zuOcrigHoSybJMIPIyaDMouGDx8rEkYl
+Fo92kANr3ryqImhrjKGsKxE5pttwwn1y6TPn/CbxdFqR5p2ErPioBhlG5qfpqjQi
+pKGfeq23sqSaM4hxAjwu1nqyH6LKwN0vEJT9s4yEIHlG1QXUEOTS22RPuFvuG8Ug
+R1uUq27UlTMdphVx8fiUylQ5PsE=
+-----END CERTIFICATE-----
diff --git a/test/ocsp-tests/WSNIC_ND3_Issuer_Root.pem b/test/ocsp-tests/WSNIC_ND3_Issuer_Root.pem
new file mode 100644 (file)
index 0000000..4d1f454
--- /dev/null
@@ -0,0 +1,25 @@
+-----BEGIN CERTIFICATE-----
+MIIENjCCAx6gAwIBAgIBATANBgkqhkiG9w0BAQUFADBvMQswCQYDVQQGEwJTRTEU
+MBIGA1UEChMLQWRkVHJ1c3QgQUIxJjAkBgNVBAsTHUFkZFRydXN0IEV4dGVybmFs
+IFRUUCBOZXR3b3JrMSIwIAYDVQQDExlBZGRUcnVzdCBFeHRlcm5hbCBDQSBSb290
+MB4XDTAwMDUzMDEwNDgzOFoXDTIwMDUzMDEwNDgzOFowbzELMAkGA1UEBhMCVVMx
+FDASBgNVBAoTC0FkZFRydXN0IEFCMSYwJAYDVQQLEx1BZGRUcnVzdCBFeHRlcm5h
+bCBUVFAgTmV0d29yazEiMCAGA1UEAxMZQWRkVHJ1c3QgRXh0ZXJuYWwgQ0EgUm9v
+dDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBALf3GjPm8gAELTngTlvt
+H7xsD821+iO2zt6bETOXpClMfZOfvUq8k+0DGuOPz+VtUFrWlymUWoCwSXrbLpX9
+uMq/NzgtHj6RQa1wVsfwTz/oMp50ysiQVOnGXw94nZpAPA6sYapeFI+eh6FqUNzX
+mk6vBbOmcZSccbNQYArHE504B4YCqOmoaSYYkKtMsE8jqzpPhNjfzp/haW+710LX
+a0Tkx63ubUFfclpxCDezeWWkWaCUN/cALw3CknLa0Dhy2xSoRcRdKn23tNbE7qzN
+E0S3ySvdQwAl+mG5aWpYIxG3pzOPVnVZ9c0p10a3CitlttNCbxWyuHv77+ldU9U0
+WicCAwEAAaOB3DCB2TAdBgNVHQ4EFgQUrb2YejS0Jvf6xCZU7wO94CTLVBowCwYD
+VR0PBAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wgZkGA1UdIwSBkTCBjoAUrb2YejS0
+Jvf6xCZU7wO94CTLVBqhc6RxMG8xCzAJBgNVBAYTAlNFMRQwEgYDVQQKEwtBZGRU
+cnVzdCBBQjEmMCQGA1UECxMdQWRkVHJ1c3QgRXh0ZXJuYWwgVFRQIE5ldHdvcmsx
+IjAgBgNVBAMTGUFkZFRydXN0IEV4dGVybmFsIENBIFJvb3SCAQEwDQYJKoZIhvcN
+AQEFBQADggEBALCb4IUlwtYj4g+WBpKdQZic2YR5gdkeWxQHIzZlj7DYd7usQWxH
+YINRsPkyPef89iYTx4AWpb9a/IfPeHmJIZriTAcKhjW88t5RxNKWt9x+Tu5w/Rw5
+6wwCURQtjr0W4MHfRnXnJK3s9EK0hZNwEGe6nQY1ShjTK3rMUUKhemPR5ruhxSvC
+Nr4TDea9Y355e6cJDUCrat2PisP29owaQgVR1EX1n6diIWgVIEM8med8vSTYqZEX
+c4g/VhsxOBi0cQ+azcgOno4uG+GMmIPLHzHxREzGBHNJdmAPx/i9F4BrLunMTA5a
+mnkPIAou1Z5jJh5VkpTYghdae9C8x49OhgQ=
+-----END CERTIFICATE-----
diff --git a/test/serverinfo.pem b/test/serverinfo.pem
new file mode 100644 (file)
index 0000000..cd3020e
--- /dev/null
@@ -0,0 +1,16 @@
+-----BEGIN SERVERINFO FOR CT-----
+ABIAZMevsj4TC5rgwjZNciLGwh15YXoIK9t5aypGJIG4QzyMowmwwDdqxudkUcGa
+DvuqlYL7psO5j4/BIHTe677CAZBBH3Ho2NOM5q1zub4AbfUMlKeufuQgeQ2Tj1oe
+LJLRzrwDnPs=
+-----END SERVERINFO FOR CT-----
+
+-----BEGIN SERVERINFO FOR TACK-----
+8wABTwFMh1Dz+3W6zULWJKjav5TNaFEXL1h98YtCXeyZnORYg4mbKpxH5CMbjpgx
+To3amSqUPF4Ntjc/i9+poutxebYkbgAAAkMcxb8+RaM9YEywaJEGViKJJmpYG/gJ
+HgfGaefI9kKbXSDmP9ntg8dLvDzuyYw14ktM2850Q9WvBiltpekilZxVuT2bFtfs
+cmS++SAK9YOM8RrKhL1TLmrktoBEJZ6z5GTukYdQ8/t1us1C1iSo2r+UzWhRFy9Y
+ffGLQl3smZzkWIOJmyqcR+QjG46YMU6N2pkqlDxeDbY3P4vfqaLrcXm2JG4AAAGN
+xXQJPbdniI9rEydVXb1Cu1yT/t7FBEx6hLxuoypXjCI1wCGpXsd8zEnloR0Ank5h
+VO/874E/BZlItzSPpcmDKl5Def6BrAJTErQlE9npo52S05YWORxJw1+VYBdqQ09A
+x3wA
+-----END SERVERINFO FOR TACK-----
diff --git a/test/smime-certs/ca.cnf b/test/smime-certs/ca.cnf
new file mode 100644 (file)
index 0000000..5e8b108
--- /dev/null
@@ -0,0 +1,66 @@
+#
+# OpenSSL example configuration file for automated certificate creation.
+#
+
+# This definition stops the following lines choking if HOME or CN
+# is undefined.
+HOME                   = .
+RANDFILE               = $ENV::HOME/.rnd
+CN                     = "Not Defined"
+default_ca             = ca
+
+####################################################################
+[ req ]
+default_bits           = 2048
+default_keyfile        = privkey.pem
+# Don't prompt for fields: use those in section directly
+prompt                 = no
+distinguished_name     = req_distinguished_name
+x509_extensions        = v3_ca # The extentions to add to the self signed cert
+string_mask = utf8only
+
+# req_extensions = v3_req # The extensions to add to a certificate request
+
+[ req_distinguished_name ]
+countryName                    = UK
+
+organizationName               = OpenSSL Group
+# Take CN from environment so it can come from a script.
+commonName                     = $ENV::CN
+
+[ usr_cert ]
+
+# These extensions are added when 'ca' signs a request for an end entity
+# certificate
+
+basicConstraints=critical, CA:FALSE
+keyUsage=critical, nonRepudiation, digitalSignature, keyEncipherment
+
+# PKIX recommendations harmless if included in all certificates.
+subjectKeyIdentifier=hash
+authorityKeyIdentifier=keyid
+
+[ dh_cert ]
+
+# These extensions are added when 'ca' signs a request for an end entity
+# DH certificate
+
+basicConstraints=critical, CA:FALSE
+keyUsage=critical, keyAgreement
+
+# PKIX recommendations harmless if included in all certificates.
+subjectKeyIdentifier=hash
+authorityKeyIdentifier=keyid
+
+[ v3_ca ]
+
+
+# Extensions for a typical CA
+
+# PKIX recommendation.
+
+subjectKeyIdentifier=hash
+authorityKeyIdentifier=keyid:always
+basicConstraints = critical,CA:true
+keyUsage = critical, cRLSign, keyCertSign
+
diff --git a/test/smime-certs/mksmime-certs.sh b/test/smime-certs/mksmime-certs.sh
new file mode 100644 (file)
index 0000000..f01f664
--- /dev/null
@@ -0,0 +1,74 @@
+#!/bin/sh
+
+# Utility to recreate S/MIME certificates
+
+OPENSSL=../../apps/openssl
+OPENSSL_CONF=./ca.cnf
+export OPENSSL_CONF
+
+# Root CA: create certificate directly
+CN="Test S/MIME RSA Root" $OPENSSL req -config ca.cnf -x509 -nodes \
+       -keyout smroot.pem -out smroot.pem -newkey rsa:2048 -days 3650
+
+# EE RSA certificates: create request first
+CN="Test S/MIME EE RSA #1" $OPENSSL req -config ca.cnf -nodes \
+       -keyout smrsa1.pem -out req.pem -newkey rsa:2048
+# Sign request: end entity extensions
+$OPENSSL x509 -req -in req.pem -CA smroot.pem -days 3600 \
+       -extfile ca.cnf -extensions usr_cert -CAcreateserial >>smrsa1.pem
+
+CN="Test S/MIME EE RSA #2" $OPENSSL req -config ca.cnf -nodes \
+       -keyout smrsa2.pem -out req.pem -newkey rsa:2048
+$OPENSSL x509 -req -in req.pem -CA smroot.pem -days 3600 \
+       -extfile ca.cnf -extensions usr_cert -CAcreateserial >>smrsa2.pem
+
+CN="Test S/MIME EE RSA #3" $OPENSSL req -config ca.cnf -nodes \
+       -keyout smrsa3.pem -out req.pem -newkey rsa:2048
+$OPENSSL x509 -req -in req.pem -CA smroot.pem -days 3600 \
+       -extfile ca.cnf -extensions usr_cert -CAcreateserial >>smrsa3.pem
+
+# Create DSA parameters
+
+$OPENSSL dsaparam -out dsap.pem 2048
+
+CN="Test S/MIME EE DSA #1" $OPENSSL req -config ca.cnf -nodes \
+       -keyout smdsa1.pem -out req.pem -newkey dsa:dsap.pem
+$OPENSSL x509 -req -in req.pem -CA smroot.pem -days 3600 \
+       -extfile ca.cnf -extensions usr_cert -CAcreateserial >>smdsa1.pem
+CN="Test S/MIME EE DSA #2" $OPENSSL req -config ca.cnf -nodes \
+       -keyout smdsa2.pem -out req.pem -newkey dsa:dsap.pem
+$OPENSSL x509 -req -in req.pem -CA smroot.pem -days 3600 \
+       -extfile ca.cnf -extensions usr_cert -CAcreateserial >>smdsa2.pem
+CN="Test S/MIME EE DSA #3" $OPENSSL req -config ca.cnf -nodes \
+       -keyout smdsa3.pem -out req.pem -newkey dsa:dsap.pem
+$OPENSSL x509 -req -in req.pem -CA smroot.pem -days 3600 \
+       -extfile ca.cnf -extensions usr_cert -CAcreateserial >>smdsa3.pem
+
+# Create EC parameters
+
+$OPENSSL ecparam -out ecp.pem -name P-256
+$OPENSSL ecparam -out ecp2.pem -name K-283
+
+CN="Test S/MIME EE EC #1" $OPENSSL req -config ca.cnf -nodes \
+       -keyout smec1.pem -out req.pem -newkey ec:ecp.pem
+$OPENSSL x509 -req -in req.pem -CA smroot.pem -days 3600 \
+       -extfile ca.cnf -extensions usr_cert -CAcreateserial >>smec1.pem
+CN="Test S/MIME EE EC #2" $OPENSSL req -config ca.cnf -nodes \
+       -keyout smec2.pem -out req.pem -newkey ec:ecp2.pem
+$OPENSSL x509 -req -in req.pem -CA smroot.pem -days 3600 \
+       -extfile ca.cnf -extensions usr_cert -CAcreateserial >>smec2.pem
+# Create X9.42 DH parameters.
+$OPENSSL genpkey -genparam -algorithm DH -pkeyopt dh_paramgen_type:2 \
+       -out dhp.pem
+# Generate X9.42 DH key.
+$OPENSSL genpkey -paramfile dhp.pem -out smdh.pem
+$OPENSSL pkey -pubout -in smdh.pem -out dhpub.pem
+# Generate dummy request.
+CN="Test S/MIME EE DH #1" $OPENSSL req -config ca.cnf -nodes \
+       -keyout smtmp.pem -out req.pem -newkey rsa:2048
+# Sign request but force public key to DH
+$OPENSSL x509 -req -in req.pem -CA smroot.pem -days 3600 \
+       -force_pubkey dhpub.pem \
+       -extfile ca.cnf -extensions usr_cert -CAcreateserial >>smdh.pem
+# Remove temp files.
+rm -f req.pem ecp.pem ecp2.pem dsap.pem dhp.pem dhpub.pem smtmp.pem smroot.srl
diff --git a/test/smime-certs/smdh.pem b/test/smime-certs/smdh.pem
new file mode 100644 (file)
index 0000000..f831b07
--- /dev/null
@@ -0,0 +1,33 @@
+-----BEGIN PRIVATE KEY-----
+MIIBSgIBADCCASsGByqGSM4+AgEwggEeAoGBANQMSgwEcnEZ31kZxa9Ef8qOK/AJ
+9dMlsXMWVYnf/QevGdN/0Aei/j9a8QHG+CvvTm0DOEKhN9QUtABKsYZag865CA7B
+mSdHjQuFqILtzA25sDJ+3+jk9vbss+56ETRll/wasJVLGbmmHNkBMvc1fC1d/sGF
+cEn4zJnQvvFaeMgDAoGAaQD9ZvL8FYsJuNxN6qp5VfnfRqYvyi2PWSqtRKPGGC+V
+thYg49PRjwPOcXzvOsdEOQ7iH9jTiSvnUdwSSEwYTZkSBuQXAgOMJAWOpoXyaRvh
+atziBDoBnWS+/kX5RBhxvS0+em9yfRqAQleuGG+R1mEDihyJc8dWQQPT+O1l4oUC
+FQCJlKsQZ0VBrWPGcUCNa54ZW6TH9QQWAhRR2NMZrQSfWthXDO8Lj5WZ34zQrA==
+-----END PRIVATE KEY-----
+-----BEGIN CERTIFICATE-----
+MIID/zCCAuegAwIBAgIJANv1TSKgememMA0GCSqGSIb3DQEBBQUAMEQxCzAJBgNV
+BAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDDBRUZXN0IFMv
+TUlNRSBSU0EgUm9vdDAeFw0xMzA4MDIxNDQ5MjlaFw0yMzA2MTExNDQ5MjlaMEQx
+CzAJBgNVBAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDDBRU
+ZXN0IFMvTUlNRSBFRSBESCAjMTCCAbYwggErBgcqhkjOPgIBMIIBHgKBgQDUDEoM
+BHJxGd9ZGcWvRH/KjivwCfXTJbFzFlWJ3/0HrxnTf9AHov4/WvEBxvgr705tAzhC
+oTfUFLQASrGGWoPOuQgOwZknR40LhaiC7cwNubAyft/o5Pb27LPuehE0ZZf8GrCV
+Sxm5phzZATL3NXwtXf7BhXBJ+MyZ0L7xWnjIAwKBgGkA/Wby/BWLCbjcTeqqeVX5
+30amL8otj1kqrUSjxhgvlbYWIOPT0Y8DznF87zrHRDkO4h/Y04kr51HcEkhMGE2Z
+EgbkFwIDjCQFjqaF8mkb4Wrc4gQ6AZ1kvv5F+UQYcb0tPnpvcn0agEJXrhhvkdZh
+A4ociXPHVkED0/jtZeKFAhUAiZSrEGdFQa1jxnFAjWueGVukx/UDgYQAAoGAL1ve
+cgI2awBeJH8ULBhSQpdL224VUDxFPiXzt8Vu5VLnxPv0pfA5En+8VByTuV7u6RSw
+3/78NuTyr/sTyN8YlB1AuXHdTJynA1ICte1xgD4j2ijlq+dv8goOAFt9xkvXx7LD
+umJ/cCignXETcNGfMi8+0s0bpMZyoHRdce8DQ26jYDBeMAwGA1UdEwEB/wQCMAAw
+DgYDVR0PAQH/BAQDAgXgMB0GA1UdDgQWBBQLWk1ffSXH8p3Bqrdjgi/6jzLnwDAf
+BgNVHSMEGDAWgBTffl6IBSQzCN0igQKXzJq3sTMnMDANBgkqhkiG9w0BAQUFAAOC
+AQEAWvJj79MW1/Wq3RIANgAhonsI1jufYqxTH+1M0RU0ZXHulgem77Le2Ls1bizi
+0SbvfpTiiFGkbKonKtO2wvfqwwuptSg3omMI5IjAGxYbyv2KBzIpp1O1LTDk9RbD
+48JMMF01gByi2+NLUQ1MYF+5RqyoRqcyp5x2+Om1GeIM4Q/GRuI4p4dybWy8iC+d
+LeXQfR7HXfh+tAum+WzjfLJwbnWbHmPhTbKB01U4lBp6+r8BGHAtNdPjEHqap4/z
+vVZVXti9ThZ20EhM+VFU3y2wyapeQjhQvw/A2YRES0Ik7BSj3hHfWH/CTbLVQnhu
+Uj6tw18ExOYxqoEGixNLPA5qsQ==
+-----END CERTIFICATE-----
index d5677db..b424f67 100644 (file)
@@ -1,34 +1,47 @@
------BEGIN DSA PRIVATE KEY-----
-MIIBuwIBAAKBgQDFJfsIPOIawMO5biw+AoYUhNVxReBOLQosU3Qv4B8krac0BNr3
-OjSGLh1wZxHqhlAE0QmasTaKojuk20nNWeFnczSz6vDl0IVJEhS8VYor5kt9gLqt
-GcoAgsf4gRDIutJyQDaNn3IVY89uXUVIoexvQeLQDBCgQPC5O8rJdqBwtwIVAK2J
-jt+dqk07eQUE59koYUEKyNorAoGBAI4IEpusf8G14kCHmRtnHXM2tG5EWJDmW6Qt
-wjqvWp1GKUx5WFy1tVWR9nl5rL0Di+kNdENo+SkKj7h3uDulGOI6T0mQYbV2h1IK
-+FMOGnOqvZ8eNTE2n4PGTo5puZ63LBm+QYrQsrNiUY4vakLFQ2rEK/SLwdsDFK4Z
-SJCBQw5zAoGATQlPPF+OeU8nu3rsdXGDiZdJzOkuCce3KQfTABA9C+Dk4CVcvBdd
-YRLGpnykumkNTO1sTO+4/Gphsuje1ujK9td4UEhdYqylCe5QjEMrszDlJtelDQF9
-C0yhdjKGTP0kxofLhsGckcuQvcKEKffT2pDDKJIy4vWQO0UyJl1vjLcCFG2uiGGx
-9fMUZq1v0ePD4Wo0Xkxo
------END DSA PRIVATE KEY-----
+-----BEGIN PRIVATE KEY-----
+MIICZQIBADCCAjkGByqGSM44BAEwggIsAoIBAQCQfLlNdehPnTrGIMhw4rk0uua6
+k1nCG3zcyfXli17BdB2k0HBPaTA3a3ZHfOt1Awy0Uu0wZ3gdPr9z0I64hnJXIGou
+zIanZ7nYRImHtX5JMFbXeyxo1Owd2Zs3oEk9nQUoUsMxvmYC/ghPL5Zx1pPxcHCO
+wzWxoG4yZMjimXOc1/W7zvK/4/g/Cz9fItD3zdcydfgM/hK0/CeYQ21xfhqf4mjK
+v9plnCcWgToGI+7H8VK80MFbkO2QKRz3vP1/TjK6PRm9sEeB5b10+SvGv2j2w+CC
+0fXL4s6n7PtBlm/bww8xL1/Az8kwejUcII1Dc8uNwwISwGbwaGBvl7IHpm21AiEA
+rodZi+nCKZdTL8IgCjX3n0DuhPRkVQPjz/B6VweLW9MCggEAfimkUNwnsGFp7mKM
+zJKhHoQkMB1qJzyIHjDzQ/J1xjfoF6i27afw1/WKboND5eseZhlhA2TO5ZJB6nGx
+DOE9lVQxYVml++cQj6foHh1TVJAgGl4mWuveW/Rz+NEhpK4zVeEsfMrbkBypPByy
+xzF1Z49t568xdIo+e8jLI8FjEdXOIUg4ehB3NY6SL8r4oJ49j/sJWfHcDoWH/LK9
+ZaBF8NpflJe3F40S8RDvM8j2HC+y2Q4QyKk1DXGiH+7yQLGWzr3M73kC3UBnnH0h
+Hxb7ISDCT7dCw/lH1nCbVFBOM0ASI26SSsFSXQrvD2kryRcTZ0KkyyhhoPODWpU+
+TQMsxQQjAiEAkolGvb/76X3vm5Ov09ezqyBYt9cdj/FLH7DyMkxO7X0=
+-----END PRIVATE KEY-----
 -----BEGIN CERTIFICATE-----
-MIIDpDCCAw2gAwIBAgIJAMtotfHYdEsWMA0GCSqGSIb3DQEBBQUAMEQxCzAJBgNV
-BAYTAlVLMRYwFAYDVQQKEw1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDExRUZXN0IFMv
-TUlNRSBSU0EgUm9vdDAeFw0wODAyMjIxMzUzMDlaFw0xNjA1MTAxMzUzMDlaMEUx
+MIIFkDCCBHigAwIBAgIJANk5lu6mSyBDMA0GCSqGSIb3DQEBBQUAMEQxCzAJBgNV
+BAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDDBRUZXN0IFMv
+TUlNRSBSU0EgUm9vdDAeFw0xMzA3MTcxNzI4MzFaFw0yMzA1MjYxNzI4MzFaMEUx
 CzAJBgNVBAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR4wHAYDVQQDDBVU
-ZXN0IFMvTUlNRSBFRSBEU0EgIzEwggG3MIIBLAYHKoZIzjgEATCCAR8CgYEAxSX7
-CDziGsDDuW4sPgKGFITVcUXgTi0KLFN0L+AfJK2nNATa9zo0hi4dcGcR6oZQBNEJ
-mrE2iqI7pNtJzVnhZ3M0s+rw5dCFSRIUvFWKK+ZLfYC6rRnKAILH+IEQyLrSckA2
-jZ9yFWPPbl1FSKHsb0Hi0AwQoEDwuTvKyXagcLcCFQCtiY7fnapNO3kFBOfZKGFB
-CsjaKwKBgQCOCBKbrH/BteJAh5kbZx1zNrRuRFiQ5lukLcI6r1qdRilMeVhctbVV
-kfZ5eay9A4vpDXRDaPkpCo+4d7g7pRjiOk9JkGG1dodSCvhTDhpzqr2fHjUxNp+D
-xk6OabmetywZvkGK0LKzYlGOL2pCxUNqxCv0i8HbAxSuGUiQgUMOcwOBhAACgYBN
-CU88X455Tye7eux1cYOJl0nM6S4Jx7cpB9MAED0L4OTgJVy8F11hEsamfKS6aQ1M
-7WxM77j8amGy6N7W6Mr213hQSF1irKUJ7lCMQyuzMOUm16UNAX0LTKF2MoZM/STG
-h8uGwZyRy5C9woQp99PakMMokjLi9ZA7RTImXW+Mt6OBgzCBgDAdBgNVHQ4EFgQU
-4Qfbhpi5yqXaXuCLXj427mR25MkwHwYDVR0jBBgwFoAUE89Lp7uJLrM4Vxd2xput
-aFvl7RcwDAYDVR0TAQH/BAIwADAOBgNVHQ8BAf8EBAMCBsAwIAYDVR0RBBkwF4EV
-c21pbWVkc2ExQG9wZW5zc2wub3JnMA0GCSqGSIb3DQEBBQUAA4GBAFrdUzKK1pWO
-kd02S423KUBc4GWWyiGlVoEO7WxVhHLJ8sm67X7OtJOwe0UGt+Nc5qLtyJYSirw8
-phjiTdNpQCTJ8+Kc56tWkJ6H7NAI4vTJtPL5BM/EmeYrVSU9JI9xhqpyKw9IBD+n
-hRJ79W9FaiJRvaAOX+TkyTukJrxAWRyv
+ZXN0IFMvTUlNRSBFRSBEU0EgIzEwggNGMIICOQYHKoZIzjgEATCCAiwCggEBAJB8
+uU116E+dOsYgyHDiuTS65rqTWcIbfNzJ9eWLXsF0HaTQcE9pMDdrdkd863UDDLRS
+7TBneB0+v3PQjriGclcgai7MhqdnudhEiYe1fkkwVtd7LGjU7B3ZmzegST2dBShS
+wzG+ZgL+CE8vlnHWk/FwcI7DNbGgbjJkyOKZc5zX9bvO8r/j+D8LP18i0PfN1zJ1
++Az+ErT8J5hDbXF+Gp/iaMq/2mWcJxaBOgYj7sfxUrzQwVuQ7ZApHPe8/X9OMro9
+Gb2wR4HlvXT5K8a/aPbD4ILR9cvizqfs+0GWb9vDDzEvX8DPyTB6NRwgjUNzy43D
+AhLAZvBoYG+XsgembbUCIQCuh1mL6cIpl1MvwiAKNfefQO6E9GRVA+PP8HpXB4tb
+0wKCAQB+KaRQ3CewYWnuYozMkqEehCQwHWonPIgeMPND8nXGN+gXqLbtp/DX9Ypu
+g0Pl6x5mGWEDZM7lkkHqcbEM4T2VVDFhWaX75xCPp+geHVNUkCAaXiZa695b9HP4
+0SGkrjNV4Sx8ytuQHKk8HLLHMXVnj23nrzF0ij57yMsjwWMR1c4hSDh6EHc1jpIv
+yvignj2P+wlZ8dwOhYf8sr1loEXw2l+Ul7cXjRLxEO8zyPYcL7LZDhDIqTUNcaIf
+7vJAsZbOvczveQLdQGecfSEfFvshIMJPt0LD+UfWcJtUUE4zQBIjbpJKwVJdCu8P
+aSvJFxNnQqTLKGGg84NalT5NAyzFA4IBBQACggEAGXSQADbuRIZBjiQ6NikwZl+x
+EDEffIE0RWbvwf1tfWxw4ZvanO/djyz5FePO0AIJDBCLUjr9D32nkmIG1Hu3dWgV
+86knQsM6uFiMSzY9nkJGZOlH3w4NHLE78pk75xR1sg1MEZr4x/t+a/ea9Y4AXklE
+DCcaHtpMGeAx3ZAqSKec+zQOOA73JWP1/gYHGdYyTQpQtwRTsh0Gi5mOOdpoJ0vp
+O83xYbFCZ+ZZKX1RWOjJe2OQBRtw739q1nRga1VMLAT/LFSQsSE3IOp8hiWbjnit
+1SE6q3II2a/aHZH/x4OzszfmtQfmerty3eQSq3bgajfxCsccnRjSbLeNiazRSKNg
+MF4wDAYDVR0TAQH/BAIwADAOBgNVHQ8BAf8EBAMCBeAwHQYDVR0OBBYEFNHQYTOO
+xaZ/N68OpxqjHKuatw6sMB8GA1UdIwQYMBaAFMmRUwpjexZbi71E8HaIqSTm5bZs
+MA0GCSqGSIb3DQEBBQUAA4IBAQAAiLociMMXcLkO/uKjAjCIQMrsghrOrxn4ZGBx
+d/mCTeqPxhcrX2UorwxVCKI2+Dmz5dTC2xKprtvkiIadJamJmxYYzeF1pgRriFN3
+MkmMMkTbe/ekSvSeMtHQ2nHDCAJIaA/k9akWfA0+26Ec25/JKMrl3LttllsJMK1z
+Xj7TcQpAIWORKWSNxY/ezM34+9ABHDZB2waubFqS+irlZsn38aZRuUI0K67fuuIt
+17vMUBqQpe2hfNAjpZ8dIpEdAGjQ6izV2uwP1lXbiaK9U4dvUqmwyCIPniX7Hpaf
+0VnX0mEViXMT6vWZTjLBUv0oKmO7xBkWHIaaX6oyF32pK5AO
 -----END CERTIFICATE-----
index ef86c11..648447f 100644 (file)
@@ -1,34 +1,47 @@
------BEGIN DSA PRIVATE KEY-----
-MIIBvAIBAAKBgQDFJfsIPOIawMO5biw+AoYUhNVxReBOLQosU3Qv4B8krac0BNr3
-OjSGLh1wZxHqhlAE0QmasTaKojuk20nNWeFnczSz6vDl0IVJEhS8VYor5kt9gLqt
-GcoAgsf4gRDIutJyQDaNn3IVY89uXUVIoexvQeLQDBCgQPC5O8rJdqBwtwIVAK2J
-jt+dqk07eQUE59koYUEKyNorAoGBAI4IEpusf8G14kCHmRtnHXM2tG5EWJDmW6Qt
-wjqvWp1GKUx5WFy1tVWR9nl5rL0Di+kNdENo+SkKj7h3uDulGOI6T0mQYbV2h1IK
-+FMOGnOqvZ8eNTE2n4PGTo5puZ63LBm+QYrQsrNiUY4vakLFQ2rEK/SLwdsDFK4Z
-SJCBQw5zAoGBAIPmO8BtJ+Yac58trrPwq9b/6VW3jQTWzTLWSH84/QQdqQa+Pz3v
-It/+hHM0daNF5uls8ICsPL1aLXmRx0pHvIyb0aAzYae4T4Jv/COPDMTdKbA1uitJ
-VbkGZrm+LIrs7I9lOkb4T0vI6kL/XdOCXY1469zsqCgJ/O2ibn6mq0nWAhR716o2
-Nf8SimTZYB0/CKje6M5ufA==
------END DSA PRIVATE KEY-----
+-----BEGIN PRIVATE KEY-----
+MIICZAIBADCCAjkGByqGSM44BAEwggIsAoIBAQCQfLlNdehPnTrGIMhw4rk0uua6
+k1nCG3zcyfXli17BdB2k0HBPaTA3a3ZHfOt1Awy0Uu0wZ3gdPr9z0I64hnJXIGou
+zIanZ7nYRImHtX5JMFbXeyxo1Owd2Zs3oEk9nQUoUsMxvmYC/ghPL5Zx1pPxcHCO
+wzWxoG4yZMjimXOc1/W7zvK/4/g/Cz9fItD3zdcydfgM/hK0/CeYQ21xfhqf4mjK
+v9plnCcWgToGI+7H8VK80MFbkO2QKRz3vP1/TjK6PRm9sEeB5b10+SvGv2j2w+CC
+0fXL4s6n7PtBlm/bww8xL1/Az8kwejUcII1Dc8uNwwISwGbwaGBvl7IHpm21AiEA
+rodZi+nCKZdTL8IgCjX3n0DuhPRkVQPjz/B6VweLW9MCggEAfimkUNwnsGFp7mKM
+zJKhHoQkMB1qJzyIHjDzQ/J1xjfoF6i27afw1/WKboND5eseZhlhA2TO5ZJB6nGx
+DOE9lVQxYVml++cQj6foHh1TVJAgGl4mWuveW/Rz+NEhpK4zVeEsfMrbkBypPByy
+xzF1Z49t568xdIo+e8jLI8FjEdXOIUg4ehB3NY6SL8r4oJ49j/sJWfHcDoWH/LK9
+ZaBF8NpflJe3F40S8RDvM8j2HC+y2Q4QyKk1DXGiH+7yQLGWzr3M73kC3UBnnH0h
+Hxb7ISDCT7dCw/lH1nCbVFBOM0ASI26SSsFSXQrvD2kryRcTZ0KkyyhhoPODWpU+
+TQMsxQQiAiAdCUJ5n2Q9hIynN8BMpnRcdfH696BKejGx+2Mr2kfnnA==
+-----END PRIVATE KEY-----
 -----BEGIN CERTIFICATE-----
-MIIDpTCCAw6gAwIBAgIJAMtotfHYdEsXMA0GCSqGSIb3DQEBBQUAMEQxCzAJBgNV
-BAYTAlVLMRYwFAYDVQQKEw1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDExRUZXN0IFMv
-TUlNRSBSU0EgUm9vdDAeFw0wODAyMjIxMzUzMDlaFw0xNjA1MTAxMzUzMDlaMEUx
+MIIFkDCCBHigAwIBAgIJANk5lu6mSyBEMA0GCSqGSIb3DQEBBQUAMEQxCzAJBgNV
+BAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDDBRUZXN0IFMv
+TUlNRSBSU0EgUm9vdDAeFw0xMzA3MTcxNzI4MzFaFw0yMzA1MjYxNzI4MzFaMEUx
 CzAJBgNVBAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR4wHAYDVQQDDBVU
-ZXN0IFMvTUlNRSBFRSBEU0EgIzIwggG4MIIBLAYHKoZIzjgEATCCAR8CgYEAxSX7
-CDziGsDDuW4sPgKGFITVcUXgTi0KLFN0L+AfJK2nNATa9zo0hi4dcGcR6oZQBNEJ
-mrE2iqI7pNtJzVnhZ3M0s+rw5dCFSRIUvFWKK+ZLfYC6rRnKAILH+IEQyLrSckA2
-jZ9yFWPPbl1FSKHsb0Hi0AwQoEDwuTvKyXagcLcCFQCtiY7fnapNO3kFBOfZKGFB
-CsjaKwKBgQCOCBKbrH/BteJAh5kbZx1zNrRuRFiQ5lukLcI6r1qdRilMeVhctbVV
-kfZ5eay9A4vpDXRDaPkpCo+4d7g7pRjiOk9JkGG1dodSCvhTDhpzqr2fHjUxNp+D
-xk6OabmetywZvkGK0LKzYlGOL2pCxUNqxCv0i8HbAxSuGUiQgUMOcwOBhQACgYEA
-g+Y7wG0n5hpzny2us/Cr1v/pVbeNBNbNMtZIfzj9BB2pBr4/Pe8i3/6EczR1o0Xm
-6WzwgKw8vVoteZHHSke8jJvRoDNhp7hPgm/8I48MxN0psDW6K0lVuQZmub4siuzs
-j2U6RvhPS8jqQv9d04JdjXjr3OyoKAn87aJufqarSdajgYMwgYAwHQYDVR0OBBYE
-FHsAGNfVltSYUq4hC+YVYwsYtA+dMB8GA1UdIwQYMBaAFBPPS6e7iS6zOFcXdsab
-rWhb5e0XMAwGA1UdEwEB/wQCMAAwDgYDVR0PAQH/BAQDAgbAMCAGA1UdEQQZMBeB
-FXNtaW1lZHNhMkBvcGVuc3NsLm9yZzANBgkqhkiG9w0BAQUFAAOBgQCx9BtCbaYF
-FXjLClkuKXbESaDZA1biPgY25i00FsUzARuhCpqD2v+0tu5c33ZzIhL6xlvBRU5l
-6Atw/xpZhae+hdBEtxPJoGekLLrHOau7Md3XwDjV4lFgcEJkWZoaSOOIK+4D5jF0
-jZWtHjnwEzuLYlo7ScHSsbcQfjH0M1TP5A==
+ZXN0IFMvTUlNRSBFRSBEU0EgIzIwggNGMIICOQYHKoZIzjgEATCCAiwCggEBAJB8
+uU116E+dOsYgyHDiuTS65rqTWcIbfNzJ9eWLXsF0HaTQcE9pMDdrdkd863UDDLRS
+7TBneB0+v3PQjriGclcgai7MhqdnudhEiYe1fkkwVtd7LGjU7B3ZmzegST2dBShS
+wzG+ZgL+CE8vlnHWk/FwcI7DNbGgbjJkyOKZc5zX9bvO8r/j+D8LP18i0PfN1zJ1
++Az+ErT8J5hDbXF+Gp/iaMq/2mWcJxaBOgYj7sfxUrzQwVuQ7ZApHPe8/X9OMro9
+Gb2wR4HlvXT5K8a/aPbD4ILR9cvizqfs+0GWb9vDDzEvX8DPyTB6NRwgjUNzy43D
+AhLAZvBoYG+XsgembbUCIQCuh1mL6cIpl1MvwiAKNfefQO6E9GRVA+PP8HpXB4tb
+0wKCAQB+KaRQ3CewYWnuYozMkqEehCQwHWonPIgeMPND8nXGN+gXqLbtp/DX9Ypu
+g0Pl6x5mGWEDZM7lkkHqcbEM4T2VVDFhWaX75xCPp+geHVNUkCAaXiZa695b9HP4
+0SGkrjNV4Sx8ytuQHKk8HLLHMXVnj23nrzF0ij57yMsjwWMR1c4hSDh6EHc1jpIv
+yvignj2P+wlZ8dwOhYf8sr1loEXw2l+Ul7cXjRLxEO8zyPYcL7LZDhDIqTUNcaIf
+7vJAsZbOvczveQLdQGecfSEfFvshIMJPt0LD+UfWcJtUUE4zQBIjbpJKwVJdCu8P
+aSvJFxNnQqTLKGGg84NalT5NAyzFA4IBBQACggEAItQlFu0t7Mw1HHROuuwKLS+E
+h2WNNZP96MLQTygOVlqgaJY+1mJLzvl/51LLH6YezX0t89Z2Dm/3SOJEdNrdbIEt
+tbu5rzymXxFhc8uaIYZFhST38oQwJOjM8wFitAQESe6/9HZjkexMqSqx/r5aEKTa
+LBinqA1BJRI72So1/1dv8P99FavPADdj8V7fAccReKEQKnfnwA7mrnD+OlIqFKFn
+3wCGk8Sw7tSJ9g6jgCI+zFwrKn2w+w+iot/Ogxl9yMAtKmAd689IAZr5GPPvV2y0
+KOogCiUYgSTSawZhr+rjyFavfI5dBWzMq4tKx/zAi6MJ+6hGJjJ8jHoT9JAPmaNg
+MF4wDAYDVR0TAQH/BAIwADAOBgNVHQ8BAf8EBAMCBeAwHQYDVR0OBBYEFGaxw04k
+qpufeGZC+TTBq8oMnXyrMB8GA1UdIwQYMBaAFMmRUwpjexZbi71E8HaIqSTm5bZs
+MA0GCSqGSIb3DQEBBQUAA4IBAQCk2Xob1ICsdHYx/YsBzY6E1eEwcI4RZbZ3hEXp
+VA72/Mbz60gjv1OwE5Ay4j+xG7IpTio6y2A9ZNepGpzidYcsL/Lx9Sv1LlN0Ukzb
+uk6Czd2sZJp+PFMTTrgCd5rXKnZs/0D84Vci611vGMA1hnUnbAnBBmgLXe9pDNRV
+6mhmCLLjJ4GOr5Wxt/hhknr7V2e1VMx3Q47GZhc0o/gExfhxXA8+gicM0nEYNakD
+2A1F0qDhQGakjuofANHhjdUDqKJ1sxurAy80fqb0ddzJt2el89iXKN+aXx/zEX96
+GI5ON7z/bkVwIi549lUOpWb2Mved61NBzCLKVP7HSuEIsC/I
 -----END CERTIFICATE-----
index eeb848d..77acc5e 100644 (file)
@@ -1,34 +1,47 @@
------BEGIN DSA PRIVATE KEY-----
-MIIBvAIBAAKBgQDFJfsIPOIawMO5biw+AoYUhNVxReBOLQosU3Qv4B8krac0BNr3
-OjSGLh1wZxHqhlAE0QmasTaKojuk20nNWeFnczSz6vDl0IVJEhS8VYor5kt9gLqt
-GcoAgsf4gRDIutJyQDaNn3IVY89uXUVIoexvQeLQDBCgQPC5O8rJdqBwtwIVAK2J
-jt+dqk07eQUE59koYUEKyNorAoGBAI4IEpusf8G14kCHmRtnHXM2tG5EWJDmW6Qt
-wjqvWp1GKUx5WFy1tVWR9nl5rL0Di+kNdENo+SkKj7h3uDulGOI6T0mQYbV2h1IK
-+FMOGnOqvZ8eNTE2n4PGTo5puZ63LBm+QYrQsrNiUY4vakLFQ2rEK/SLwdsDFK4Z
-SJCBQw5zAoGAYzOpPmh8Je1IDauEXhgaLz14wqYUHHcrj2VWVJ6fRm8GhdQFJSI7
-GUk08pgKZSKic2lNqxuzW7/vFxKQ/nvzfytY16b+2i+BR4Q6yvMzCebE1hHVg0Ju
-TwfUMwoFEOhYP6ZwHSUiQl9IBMH9TNJCMwYMxfY+VOrURFsjGTRUgpwCFQCIGt5g
-Y+XZd0Sv69CatDIRYWvaIA==
------END DSA PRIVATE KEY-----
+-----BEGIN PRIVATE KEY-----
+MIICZQIBADCCAjkGByqGSM44BAEwggIsAoIBAQCQfLlNdehPnTrGIMhw4rk0uua6
+k1nCG3zcyfXli17BdB2k0HBPaTA3a3ZHfOt1Awy0Uu0wZ3gdPr9z0I64hnJXIGou
+zIanZ7nYRImHtX5JMFbXeyxo1Owd2Zs3oEk9nQUoUsMxvmYC/ghPL5Zx1pPxcHCO
+wzWxoG4yZMjimXOc1/W7zvK/4/g/Cz9fItD3zdcydfgM/hK0/CeYQ21xfhqf4mjK
+v9plnCcWgToGI+7H8VK80MFbkO2QKRz3vP1/TjK6PRm9sEeB5b10+SvGv2j2w+CC
+0fXL4s6n7PtBlm/bww8xL1/Az8kwejUcII1Dc8uNwwISwGbwaGBvl7IHpm21AiEA
+rodZi+nCKZdTL8IgCjX3n0DuhPRkVQPjz/B6VweLW9MCggEAfimkUNwnsGFp7mKM
+zJKhHoQkMB1qJzyIHjDzQ/J1xjfoF6i27afw1/WKboND5eseZhlhA2TO5ZJB6nGx
+DOE9lVQxYVml++cQj6foHh1TVJAgGl4mWuveW/Rz+NEhpK4zVeEsfMrbkBypPByy
+xzF1Z49t568xdIo+e8jLI8FjEdXOIUg4ehB3NY6SL8r4oJ49j/sJWfHcDoWH/LK9
+ZaBF8NpflJe3F40S8RDvM8j2HC+y2Q4QyKk1DXGiH+7yQLGWzr3M73kC3UBnnH0h
+Hxb7ISDCT7dCw/lH1nCbVFBOM0ASI26SSsFSXQrvD2kryRcTZ0KkyyhhoPODWpU+
+TQMsxQQjAiEArJr6p2zTbhRppQurHGTdmdYHqrDdZH4MCsD9tQCw1xY=
+-----END PRIVATE KEY-----
 -----BEGIN CERTIFICATE-----
-MIIDpDCCAw2gAwIBAgIJAMtotfHYdEsYMA0GCSqGSIb3DQEBBQUAMEQxCzAJBgNV
-BAYTAlVLMRYwFAYDVQQKEw1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDExRUZXN0IFMv
-TUlNRSBSU0EgUm9vdDAeFw0wODAyMjIxMzUzMDlaFw0xNjA1MTAxMzUzMDlaMEUx
+MIIFkDCCBHigAwIBAgIJANk5lu6mSyBFMA0GCSqGSIb3DQEBBQUAMEQxCzAJBgNV
+BAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDDBRUZXN0IFMv
+TUlNRSBSU0EgUm9vdDAeFw0xMzA3MTcxNzI4MzFaFw0yMzA1MjYxNzI4MzFaMEUx
 CzAJBgNVBAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR4wHAYDVQQDDBVU
-ZXN0IFMvTUlNRSBFRSBEU0EgIzMwggG3MIIBLAYHKoZIzjgEATCCAR8CgYEAxSX7
-CDziGsDDuW4sPgKGFITVcUXgTi0KLFN0L+AfJK2nNATa9zo0hi4dcGcR6oZQBNEJ
-mrE2iqI7pNtJzVnhZ3M0s+rw5dCFSRIUvFWKK+ZLfYC6rRnKAILH+IEQyLrSckA2
-jZ9yFWPPbl1FSKHsb0Hi0AwQoEDwuTvKyXagcLcCFQCtiY7fnapNO3kFBOfZKGFB
-CsjaKwKBgQCOCBKbrH/BteJAh5kbZx1zNrRuRFiQ5lukLcI6r1qdRilMeVhctbVV
-kfZ5eay9A4vpDXRDaPkpCo+4d7g7pRjiOk9JkGG1dodSCvhTDhpzqr2fHjUxNp+D
-xk6OabmetywZvkGK0LKzYlGOL2pCxUNqxCv0i8HbAxSuGUiQgUMOcwOBhAACgYBj
-M6k+aHwl7UgNq4ReGBovPXjCphQcdyuPZVZUnp9GbwaF1AUlIjsZSTTymAplIqJz
-aU2rG7Nbv+8XEpD+e/N/K1jXpv7aL4FHhDrK8zMJ5sTWEdWDQm5PB9QzCgUQ6Fg/
-pnAdJSJCX0gEwf1M0kIzBgzF9j5U6tREWyMZNFSCnKOBgzCBgDAdBgNVHQ4EFgQU
-VhpVXqQ/EzUMdxLvP7o9EhJ8h70wHwYDVR0jBBgwFoAUE89Lp7uJLrM4Vxd2xput
-aFvl7RcwDAYDVR0TAQH/BAIwADAOBgNVHQ8BAf8EBAMCBsAwIAYDVR0RBBkwF4EV
-c21pbWVkc2EzQG9wZW5zc2wub3JnMA0GCSqGSIb3DQEBBQUAA4GBACM9e75EQa8m
-k/AZkH/tROqf3yeqijULl9x8FjFatqoY+29OM6oMGM425IqSkKd2ipz7OxO0SShu
-rE0O3edS7DvYBwvhWPviRaYBMyZ4iFJVup+fOzoYK/j/bASxS3BHQBwb2r4rhe25
-OlTyyFEk7DJyW18YFOG97S1P52oQ5f5x
+ZXN0IFMvTUlNRSBFRSBEU0EgIzMwggNGMIICOQYHKoZIzjgEATCCAiwCggEBAJB8
+uU116E+dOsYgyHDiuTS65rqTWcIbfNzJ9eWLXsF0HaTQcE9pMDdrdkd863UDDLRS
+7TBneB0+v3PQjriGclcgai7MhqdnudhEiYe1fkkwVtd7LGjU7B3ZmzegST2dBShS
+wzG+ZgL+CE8vlnHWk/FwcI7DNbGgbjJkyOKZc5zX9bvO8r/j+D8LP18i0PfN1zJ1
++Az+ErT8J5hDbXF+Gp/iaMq/2mWcJxaBOgYj7sfxUrzQwVuQ7ZApHPe8/X9OMro9
+Gb2wR4HlvXT5K8a/aPbD4ILR9cvizqfs+0GWb9vDDzEvX8DPyTB6NRwgjUNzy43D
+AhLAZvBoYG+XsgembbUCIQCuh1mL6cIpl1MvwiAKNfefQO6E9GRVA+PP8HpXB4tb
+0wKCAQB+KaRQ3CewYWnuYozMkqEehCQwHWonPIgeMPND8nXGN+gXqLbtp/DX9Ypu
+g0Pl6x5mGWEDZM7lkkHqcbEM4T2VVDFhWaX75xCPp+geHVNUkCAaXiZa695b9HP4
+0SGkrjNV4Sx8ytuQHKk8HLLHMXVnj23nrzF0ij57yMsjwWMR1c4hSDh6EHc1jpIv
+yvignj2P+wlZ8dwOhYf8sr1loEXw2l+Ul7cXjRLxEO8zyPYcL7LZDhDIqTUNcaIf
+7vJAsZbOvczveQLdQGecfSEfFvshIMJPt0LD+UfWcJtUUE4zQBIjbpJKwVJdCu8P
+aSvJFxNnQqTLKGGg84NalT5NAyzFA4IBBQACggEAcXvtfiJfIZ0wgGpN72ZeGrJ9
+msUXOxow7w3fDbP8r8nfVkBNbfha8rx0eY6fURFVZzIOd8EHGKypcH1gS6eZNucf
+zgsH1g5r5cRahMZmgGXBEBsWrh2IaDG7VSKt+9ghz27EKgjAQCzyHQL5FCJgR2p7
+cv0V4SRqgiAGYlJ191k2WtLOsVd8kX//jj1l8TUgE7TqpuSEpaSyQ4nzJROpZWZp
+N1RwFmCURReykABU/Nzin/+rZnvZrp8WoXSXEqxeB4mShRSaH57xFnJCpRwKJ4qS
+2uhATzJaKH7vu63k3DjftbSBVh+32YXwtHc+BGjs8S2aDtCW3FtDA7Z6J8BIxaNg
+MF4wDAYDVR0TAQH/BAIwADAOBgNVHQ8BAf8EBAMCBeAwHQYDVR0OBBYEFMJxatDE
+FCEFGl4uoiQQ1050Ju9RMB8GA1UdIwQYMBaAFMmRUwpjexZbi71E8HaIqSTm5bZs
+MA0GCSqGSIb3DQEBBQUAA4IBAQBGZD1JnMep39KMOhD0iBTmyjhtcnRemckvRask
+pS/CqPwo+M+lPNdxpLU2w9b0QhPnj0yAS/BS1yBjsLGY4DP156k4Q3QOhwsrTmrK
+YOxg0w7DOpkv5g11YLJpHsjSOwg5uIMoefL8mjQK6XOFOmQXHJrUtGulu+fs6FlM
+khGJcW4xYVPK0x/mHvTT8tQaTTkgTdVHObHF5Dyx/F9NMpB3RFguQPk2kT4lJc4i
+Up8T9mLzaxz6xc4wwh8h70Zw81lkGYhX+LRk3sfd/REq9x4QXQNP9t9qU1CgrBzv
+4orzt9cda4r+rleSg2XjWnXzMydE6DuwPVPZlqnLbSYUy660
 -----END CERTIFICATE-----
diff --git a/test/smime-certs/smec1.pem b/test/smime-certs/smec1.pem
new file mode 100644 (file)
index 0000000..75a8626
--- /dev/null
@@ -0,0 +1,22 @@
+-----BEGIN PRIVATE KEY-----
+MIGHAgEAMBMGByqGSM49AgEGCCqGSM49AwEHBG0wawIBAQQgXzBRX9Z5Ib4LAVAS
+DMlYvkj0SmLmYvWULe2LfyXRmpWhRANCAAS+SIj2FY2DouPRuNDp9WVpsqef58tV
+3gIwV0EOV/xyYTzZhufZi/aBcXugWR1x758x4nHus2uEuEFi3Mr3K3+x
+-----END PRIVATE KEY-----
+-----BEGIN CERTIFICATE-----
+MIICoDCCAYigAwIBAgIJANk5lu6mSyBGMA0GCSqGSIb3DQEBBQUAMEQxCzAJBgNV
+BAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDDBRUZXN0IFMv
+TUlNRSBSU0EgUm9vdDAeFw0xMzA3MTcxNzI4MzFaFw0yMzA1MjYxNzI4MzFaMEQx
+CzAJBgNVBAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDDBRU
+ZXN0IFMvTUlNRSBFRSBFQyAjMTBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABL5I
+iPYVjYOi49G40On1ZWmyp5/ny1XeAjBXQQ5X/HJhPNmG59mL9oFxe6BZHXHvnzHi
+ce6za4S4QWLcyvcrf7GjYDBeMAwGA1UdEwEB/wQCMAAwDgYDVR0PAQH/BAQDAgXg
+MB0GA1UdDgQWBBR/ybxC2DI+Jydhx1FMgPbMTmLzRzAfBgNVHSMEGDAWgBTJkVMK
+Y3sWW4u9RPB2iKkk5uW2bDANBgkqhkiG9w0BAQUFAAOCAQEAdk9si83JjtgHHHGy
+WcgWDfM0jzlWBsgFNQ9DwAuB7gJd/LG+5Ocajg5XdA5FXAdKkfwI6be3PdcVs3Bt
+7f/fdKfBxfr9/SvFHnK7PVAX2x1wwS4HglX1lfoyq1boSvsiJOnAX3jsqXJ9TJiV
+FlgRVnhnrw6zz3Xs/9ZDMTENUrqDHPNsDkKEi+9SqIsqDXpMCrGHP4ic+S8Rov1y
+S+0XioMxVyXDp6XcL4PQ/NgHbw5/+UcS0me0atZ6pW68C0vi6xeU5vxojyuZxMI1
+DXXwMhOXWaKff7KNhXDUN0g58iWlnyaCz4XQwFsbbFs88TQ1+e/aj3bbwTxUeyN7
+qtcHJA==
+-----END CERTIFICATE-----
diff --git a/test/smime-certs/smec2.pem b/test/smime-certs/smec2.pem
new file mode 100644 (file)
index 0000000..457297a
--- /dev/null
@@ -0,0 +1,23 @@
+-----BEGIN PRIVATE KEY-----
+MIGPAgEAMBAGByqGSM49AgEGBSuBBAAQBHgwdgIBAQQjhHaq507MOBznelrLG/pl
+brnnJi/iEJUUp+Pm3PEiteXqckmhTANKAAQF2zs6vobmoT+M+P2+9LZ7asvFBNi7
+uCzLYF/8j1Scn/spczoC9vNzVhNw+Lg7dnjNL4EDIyYZLl7E0v69luzbvy+q44/8
+6bQ=
+-----END PRIVATE KEY-----
+-----BEGIN CERTIFICATE-----
+MIICpTCCAY2gAwIBAgIJANk5lu6mSyBHMA0GCSqGSIb3DQEBBQUAMEQxCzAJBgNV
+BAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDDBRUZXN0IFMv
+TUlNRSBSU0EgUm9vdDAeFw0xMzA3MTcxNzI4MzFaFw0yMzA1MjYxNzI4MzFaMEQx
+CzAJBgNVBAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDDBRU
+ZXN0IFMvTUlNRSBFRSBFQyAjMjBeMBAGByqGSM49AgEGBSuBBAAQA0oABAXbOzq+
+huahP4z4/b70tntqy8UE2Lu4LMtgX/yPVJyf+ylzOgL283NWE3D4uDt2eM0vgQMj
+JhkuXsTS/r2W7Nu/L6rjj/zptKNgMF4wDAYDVR0TAQH/BAIwADAOBgNVHQ8BAf8E
+BAMCBeAwHQYDVR0OBBYEFGf+QSQlkN20PsNN7x+jmQIJBDcXMB8GA1UdIwQYMBaA
+FMmRUwpjexZbi71E8HaIqSTm5bZsMA0GCSqGSIb3DQEBBQUAA4IBAQBaBBryl2Ez
+ftBrGENXMKQP3bBEw4n9ely6HvYQi9IC7HyK0ktz7B2FcJ4z96q38JN3cLxV0DhK
+xT/72pFmQwZVJngvRaol0k1B+bdmM03llxCw/uNNZejixDjHUI9gEfbigehd7QY0
+uYDu4k4O35/z/XPQ6O5Kzw+J2vdzU8GXlMBbWeZWAmEfLGbk3Ux0ouITnSz0ty5P
+rkHTo0uprlFcZAsrsNY5v5iuomYT7ZXAR3sqGZL1zPOKBnyfXeNFUfnKsZW7Fnlq
+IlYBQIjqR1HGxxgCSy66f1oplhxSch4PUpk5tqrs6LeOqc2+xROy1T5YrB3yjVs0
+4ZdCllHZkhop
+-----END CERTIFICATE-----
index a59eb26..d1a253f 100644 (file)
@@ -1,30 +1,49 @@
------BEGIN RSA PRIVATE KEY-----
-MIICXAIBAAKBgQDBV1Z/Q5gPF7lojc8pKUdyz5+Jf2B3vs4he6egekugWnoJduki
-9Lnae/JchB/soIX0co3nLc11NuFFlnAWJNMDJr08l5AHAJLYNHevF5l/f9oDQwvZ
-speKh1xpIAJNqCTzVeQ/ZLx6/GccIXV/xDuKIiovqJTPgR5WPkYKaw++lQIDAQAB
-AoGALXnUj5SflJU4+B2652ydMKUjWl0KnL/VjkyejgGV/j6py8Ybaixz9q8Gv7oY
-JDlRqMC1HfZJCFQDQrHy5VJ+CywA/H9WrqKo/Ch9U4tJAZtkig1Cmay/BAYixVu0
-xBeim10aKF6hxHH4Chg9We+OCuzWBWJhqveNjuDedL/i7JUCQQDlejovcwBUCbhJ
-U12qKOwlaboolWbl7yF3XdckTJZg7+1UqQHZH5jYZlLZyZxiaC92SNV0SyTLJZnS
-Jh5CO+VDAkEA16/pPcuVtMMz/R6SSPpRSIAa1stLs0mFSs3NpR4pdm0n42mu05pO
-1tJEt3a1g7zkreQBf53+Dwb+lA841EkjRwJBAIFmt0DifKDnCkBu/jZh9SfzwsH3
-3Zpzik+hXxxdA7+ODCrdUul449vDd5zQD5t+XKU61QNLDGhxv5e9XvrCg7kCQH/a
-3ldsVF0oDaxxL+QkxoREtCQ5tLEd1u7F2q6Tl56FDE0pe6Ih6bQ8RtG+g9EI60IN
-U7oTrOO5kLWx5E0q4ccCQAZVgoenn9MhRU1agKOCuM6LT2DxReTu4XztJzynej+8
-0J93n3ebanB1MlRpn1XJwhQ7gAC8ImaQKLJK5jdJzFc=
------END RSA PRIVATE KEY-----
+-----BEGIN PRIVATE KEY-----
+MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCyyQXED5HyVWwq
+nXyzmY317yMUJrIfsKvREG2C691dJNHgNg+oq5sjt/fzkyS84AvdOiicAsao4cYL
+DulthaLpbC7msEBhvwAil0FNb5g3ERupe1KuTdUV1UuD/i6S2VoaNXUBBn1rD9Wc
+BBc0lnx/4Wt92eQTI6925pt7ZHPQw2Olp7TQDElyi5qPxCem4uT0g3zbZsWqmmsI
+MXbu+K3dEprzqA1ucKXbxUmZNkMwVs2XCmlLxrRUj8C3/zENtH17HWCznhR/IVcV
+kgIuklkeiDsEhbWvUQumVXR7oPh/CPZAbjGqq5mVueHSHrp7brBVZKHZvoUka28Q
+LWitq1W5AgMBAAECggEASkRnOMKfBeOmQy2Yl6K57eeg0sYgSDnDpd0FINWJ5x9c
+b58FcjOXBodtYKlHIY6QXx3BsM0WaSEge4d+QBi7S+u8r+eXVwNYswXSArDQsk9R
+Bl5MQkvisGciL3pvLmFLpIeASyS/BLJXMbAhU58PqK+jT2wr6idwxBuXivJ3ichu
+ISdT1s2aMmnD86ulCD2DruZ4g0mmk5ffV+Cdj+WWkyvEaJW2GRYov2qdaqwSOxV4
+Yve9qStvEIWAf2cISQjbnw2Ww6Z5ebrqlOz9etkmwIly6DTbrIneBnoqJlFFWGlF
+ghuzc5RE2w1GbcKSOt0qXH44MTf/j0r86dlu7UIxgQKBgQDq0pEaiZuXHi9OQAOp
+PsDEIznCU1bcTDJewANHag5DPEnMKLltTNyLaBRulMypI+CrDbou0nDr29VOzfXx
+mNvi/c7RttOBOx7kXKvu0JUFKe2oIWRsg0KsyMX7UFMVaHFgrW+8DhQc7HK7URiw
+nitOnA7YwIHRF9BMmcWcLFEYBQKBgQDC6LPbXV8COKO0YCfGXPnE7EZGD/p0Q92Z
+8CoSefphEScSdO1IpxFXG7fOZ4x2GQb9q7D3IvaeKAqNjUjkuyxdB30lIWDBwSWw
+fFgsa2SZwD5P60G/ar50YJr6LiF333aUMDVmC9swFfZERAEmGUz2NTrPWQdIx/lu
+PyDtUR75JQKBgHaoCCJ8vl5SJl1IA5GV4Bo8IoeLTSzsY9d09zMy6BoZcMD1Ix2T
+5S2cXhayoegl9PT6bsYSGHVWFCdJ86ktMI826TcXRzDaCvYhzc9THroJQcnfdbtP
+aHWezkv7fsAmkoPjn75K7ubeo+r7Q5qbkg6a1PW58N8TRXIvkackzaVxAoGBALAq
+qh3U+AHG9dgbrPeyo6KkuCOtX39ks8/mbfCDRZYkbb9V5f5r2tVz3R93IlK/7jyr
+yWimtmde46Lrl33922w+T5OW5qBZllo9GWkUrDn3s5qClcuQjJIdmxYTSfbSCJiK
+NkmE39lHkG5FVRB9f71tgTlWS6ox7TYDYxx83NTtAoGAUJPAkGt4yGAN4Pdebv53
+bSEpAAULBHntiqDEOu3lVColHuZIucml/gbTpQDruE4ww4wE7dOhY8Q4wEBVYbRI
+vHkSiWpJUvZCuKG8Foh5pm9hU0qb+rbQV7NhLJ02qn1AMGO3F/WKrHPPY8/b9YhQ
+KfvPCYimQwBjVrEnSntLPR0=
+-----END PRIVATE KEY-----
 -----BEGIN CERTIFICATE-----
-MIICaTCCAdKgAwIBAgIJAP6VN47boiXRMA0GCSqGSIb3DQEBBQUAMEQxCzAJBgNV
-BAYTAlVLMRYwFAYDVQQKEw1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDExRUZXN0IFMv
-TUlNRSBSU0EgUm9vdDAeFw0wODAyMjIxMzUzMDdaFw0xNjA1MTExMzUzMDdaMEQx
-CzAJBgNVBAYTAlVLMRYwFAYDVQQKEw1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDExRU
-ZXN0IFMvTUlNRSBSU0EgUm9vdDCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEA
-wVdWf0OYDxe5aI3PKSlHcs+fiX9gd77OIXunoHpLoFp6CXbpIvS52nvyXIQf7KCF
-9HKN5y3NdTbhRZZwFiTTAya9PJeQBwCS2DR3rxeZf3/aA0ML2bKXiodcaSACTagk
-81XkP2S8evxnHCF1f8Q7iiIqL6iUz4EeVj5GCmsPvpUCAwEAAaNjMGEwHQYDVR0O
-BBYEFBPPS6e7iS6zOFcXdsabrWhb5e0XMB8GA1UdIwQYMBaAFBPPS6e7iS6zOFcX
-dsabrWhb5e0XMA8GA1UdEwEB/wQFMAMBAf8wDgYDVR0PAQH/BAQDAgEGMA0GCSqG
-SIb3DQEBBQUAA4GBAIECprq5viDvnDbkyOaiSr9ubMUmWqvycfAJMdPZRKcOZczS
-l+L9R9lF3JSqbt3knOe9u6bGDBOTY2285PdCCuHRVMk2Af1f6El1fqAlRUwNqipp
-r68sWFuRqrcRNtk6QQvXfkOhrqQBuDa7te/OVQLa2lGN9Dr2mQsD8ijctatG
+MIIDbjCCAlagAwIBAgIJAMc+8VKBJ/S9MA0GCSqGSIb3DQEBBQUAMEQxCzAJBgNV
+BAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDDBRUZXN0IFMv
+TUlNRSBSU0EgUm9vdDAeFw0xMzA3MTcxNzI4MjlaFw0yMzA3MTUxNzI4MjlaMEQx
+CzAJBgNVBAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDDBRU
+ZXN0IFMvTUlNRSBSU0EgUm9vdDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC
+ggEBALLJBcQPkfJVbCqdfLOZjfXvIxQmsh+wq9EQbYLr3V0k0eA2D6irmyO39/OT
+JLzgC906KJwCxqjhxgsO6W2FoulsLuawQGG/ACKXQU1vmDcRG6l7Uq5N1RXVS4P+
+LpLZWho1dQEGfWsP1ZwEFzSWfH/ha33Z5BMjr3bmm3tkc9DDY6WntNAMSXKLmo/E
+J6bi5PSDfNtmxaqaawgxdu74rd0SmvOoDW5wpdvFSZk2QzBWzZcKaUvGtFSPwLf/
+MQ20fXsdYLOeFH8hVxWSAi6SWR6IOwSFta9RC6ZVdHug+H8I9kBuMaqrmZW54dIe
+untusFVkodm+hSRrbxAtaK2rVbkCAwEAAaNjMGEwHQYDVR0OBBYEFMmRUwpjexZb
+i71E8HaIqSTm5bZsMB8GA1UdIwQYMBaAFMmRUwpjexZbi71E8HaIqSTm5bZsMA8G
+A1UdEwEB/wQFMAMBAf8wDgYDVR0PAQH/BAQDAgEGMA0GCSqGSIb3DQEBBQUAA4IB
+AQAwpIVWQey2u/XoQSMSu0jd0EZvU+lhLaFrDy/AHQeG3yX1+SAOM6f6w+efPvyb
+Op1NPI9UkMPb4PCg9YC7jgYokBkvAcI7J4FcuDKMVhyCD3cljp0ouuKruvEf4FBl
+zyQ9pLqA97TuG8g1hLTl8G90NzTRcmKpmhs18BmCxiqHcTfoIpb3QvPkDX8R7LVt
+9BUGgPY+8ELCgw868TuHh/Cnc67gBtRjBp0sCYVzGZmKsO5f1XdHrAZKYN5mEp0C
+7/OqcDoFqORTquLeycg1At/9GqhDEgxNrqA+YEsPbLGAfsNuXUsXs2ubpGsOZxKt
+Emsny2ah6fU2z7PztrUy/A80
 -----END CERTIFICATE-----
index 2cf3148..d0d0b9e 100644 (file)
@@ -1,31 +1,49 @@
------BEGIN RSA PRIVATE KEY-----
-MIICXgIBAAKBgQC6A978j4pmPgUtUQqF+bjh6vdhwGOGZSD7xXgFTMjm88twfv+E
-ixkq2KXSDjD0ZXoQbdOaSbvGRQrIJpG2NGiKAFdYNrP025kCCdh5wF/aEI7KLEm7
-JlHwXpQsuj4wkMgmkFjL3Ty4Z55aNH+2pPQIa0k+ENJXm2gDuhqgBmduAwIDAQAB
-AoGBAJMuYu51aO2THyeHGwt81uOytcCbqGP7eoib62ZOJhxPRGYjpmuqX+R9/V5i
-KiwGavm63JYUx0WO9YP+uIZxm1BUATzkgkS74u5LP6ajhkZh6/Bck1oIYYkbVOXl
-JVrdENuH6U7nupznsyYgONByo+ykFPVUGmutgiaC7NMVo/MxAkEA6KLejWXdCIEn
-xr7hGph9NlvY9xuRIMexRV/WrddcFfCdjI1PciIupgrIkR65M9yr7atm1iU6/aRf
-KOr8rLZsSQJBAMyyXN71NsDNx4BP6rtJ/LJMP0BylznWkA7zWfGCbAYn9VhZVlSY
-Eu9Gyr7quD1ix7G3kInKVYOEEOpockBLz+sCQQCedyMmKjcQLfpMVYW8uhbAynvW
-h36qV5yXZxszO7nMcCTBsxhk5IfmLv5EbCs3+p9avCDGyoGOeUMg+kC33WORAkAg
-oUIarH4o5+SoeJTTfCzTA0KF9H5U0vYt2+73h7HOnWoHxl3zqDZEfEVvf50U8/0f
-QELDJETTbScBJtsnkq43AkEA38etvoZ2i4FJvvo7R/9gWBHVEcrGzcsCBYrNnIR1
-SZLRwHEGaiOK1wxMsWzqp7PJwL9z/M8A8DyOFBx3GPOniA==
------END RSA PRIVATE KEY-----
+-----BEGIN PRIVATE KEY-----
+MIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQDXr9uzB/20QXKC
+xhkfNnJvl2xl1hzdOcrQmAqo+AAAcA/D49ImuJDVQRaK2bcj54XB26i1kXuOrxID
+3/etUb8yudfx8OAVwh8G0xVA4zhr8uXW85W2tBr4v0Lt+W6lSd6Hmfrk4GmE9LTU
+/vzl9HUPW6SZShN1G0nY6oeUXvLi0vasEUKv3a51T6JFYg4c7qt5RCk/w8kwrQ0D
+orQwCdkOPEIiC4b+nPStF12SVm5bx8rbYzioxuY/PdSebvt0APeqgRxSpCxqYnHs
+CoNeHzSrGXcP0COzFeUOz2tdrhmH09JLbGZs4nbojPxMkjpJSv3/ekDG2CHYxXSH
+XxpJstxZAgMBAAECggEASY4xsJaTEPwY3zxLqPdag2/yibBBW7ivz/9p80HQTlXp
+KnbxXj8nNXLjCytAZ8A3P2t316PrrTdLP4ML5lGwkM4MNPhek00GY79syhozTa0i
+cPHVJt+5Kwee/aVI9JmCiGAczh0yHyOM3+6ttIZvvXMVaSl4BUHvJ0ikQBc5YdzL
+s6VM2gCOR6K6n+39QHDI/T7WwO9FFSNnpWFOCHwAWtyBMlleVj+xeZX8OZ/aT+35
+27yjsGNBftWKku29VDineiQC+o+fZGJs6w4JZHoBSP8TfxP8fRCFVNA281G78Xak
+cEnKXwZ54bpoSa3ThKl+56J6NHkkfRGb8Rgt/ipJYQKBgQD5DKb82mLw85iReqsT
+8bkp408nPOBGz7KYnQsZqAVNGfehM02+dcN5z+w0jOj6GMPLPg5whlEo/O+rt9ze
+j6c2+8/+B4Bt5oqCKoOCIndH68jl65+oUxFkcHYxa3zYKGC9Uvb+x2BtBmYgvDRG
+ew6I2Q3Zyd2ThZhJygUZpsjsbQKBgQDdtNiGTkgWOm+WuqBI1LT5cQfoPfgI7/da
+ZA+37NBUQRe0cM7ddEcNqx7E3uUa1JJOoOYv65VyGI33Ul+evI8h5WE5bupcCEFk
+LolzbMc4YQUlsySY9eUXM8jQtfVtaWhuQaABt97l+9oADkrhA+YNdEu2yiz3T6W+
+msI5AnvkHQKBgDEjuPMdF/aY6dqSjJzjzfgg3KZOUaZHJuML4XvPdjRPUlfhKo7Q
+55/qUZ3Qy8tFBaTderXjGrJurc+A+LiFOaYUq2ZhDosguOWUA9yydjyfnkUXZ6or
+sbvSoM+BeOGhnezdKNT+e90nLRF6cQoTD7war6vwM6L+8hxlGvqDuRNFAoGAD4K8
+d0D4yB1Uez4ZQp8m/iCLRhM3zCBFtNw1QU/fD1Xye5w8zL96zRkAsRNLAgKHLdsR
+355iuTXAkOIBcJCOjveGQsdgvAmT0Zdz5FBi663V91o+IDlryqDD1t40CnCKbtRG
+hng/ruVczg4x7OYh7SUKuwIP/UlkNh6LogNreX0CgYBQF9troLex6X94VTi1V5hu
+iCwzDT6AJj63cS3VRO2ait3ZiLdpKdSNNW2WrlZs8FZr/mVutGEcWho8BugGMWST
+1iZkYwly9Xfjnpd0I00ZIlr2/B3+ZsK8w5cOW5Lpb7frol6+BkDnBjbNZI5kQndn
+zQpuMJliRlrq/5JkIbH6SA==
+-----END PRIVATE KEY-----
 -----BEGIN CERTIFICATE-----
-MIICizCCAfSgAwIBAgIJAMtotfHYdEsTMA0GCSqGSIb3DQEBBQUAMEQxCzAJBgNV
-BAYTAlVLMRYwFAYDVQQKEw1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDExRUZXN0IFMv
-TUlNRSBSU0EgUm9vdDAeFw0wODAyMjIxMzUzMDhaFw0xNjA1MTAxMzUzMDhaMEUx
+MIIDbDCCAlSgAwIBAgIJANk5lu6mSyBAMA0GCSqGSIb3DQEBBQUAMEQxCzAJBgNV
+BAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDDBRUZXN0IFMv
+TUlNRSBSU0EgUm9vdDAeFw0xMzA3MTcxNzI4MzBaFw0yMzA1MjYxNzI4MzBaMEUx
 CzAJBgNVBAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR4wHAYDVQQDDBVU
-ZXN0IFMvTUlNRSBFRSBSU0EgIzEwgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGB
-ALoD3vyPimY+BS1RCoX5uOHq92HAY4ZlIPvFeAVMyObzy3B+/4SLGSrYpdIOMPRl
-ehBt05pJu8ZFCsgmkbY0aIoAV1g2s/TbmQIJ2HnAX9oQjsosSbsmUfBelCy6PjCQ
-yCaQWMvdPLhnnlo0f7ak9AhrST4Q0lebaAO6GqAGZ24DAgMBAAGjgYMwgYAwHQYD
-VR0OBBYEFE2vMvKz5jrC7Lbdg68XwZ95iL/QMB8GA1UdIwQYMBaAFBPPS6e7iS6z
-OFcXdsabrWhb5e0XMAwGA1UdEwEB/wQCMAAwDgYDVR0PAQH/BAQDAgXgMCAGA1Ud
-EQQZMBeBFXNtaW1lcnNhMUBvcGVuc3NsLm9yZzANBgkqhkiG9w0BAQUFAAOBgQAi
-O3GOkUl646oLnOimc36i9wxZ1tejsqs8vMjJ0Pym6Uq9FE2JoGzJ6OhB1GOsEVmj
-9cQ5UNQcRYL3cqOFtl6f4Dpu/lhzfbaqgmLjv29G1mS0uuTZrixhlyCXjwcbOkNC
-I/+wvHHENYIK5+T/79M9LaZ2Qk4F9MNE1VMljdz9Qw==
+ZXN0IFMvTUlNRSBFRSBSU0EgIzEwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEK
+AoIBAQDXr9uzB/20QXKCxhkfNnJvl2xl1hzdOcrQmAqo+AAAcA/D49ImuJDVQRaK
+2bcj54XB26i1kXuOrxID3/etUb8yudfx8OAVwh8G0xVA4zhr8uXW85W2tBr4v0Lt
++W6lSd6Hmfrk4GmE9LTU/vzl9HUPW6SZShN1G0nY6oeUXvLi0vasEUKv3a51T6JF
+Yg4c7qt5RCk/w8kwrQ0DorQwCdkOPEIiC4b+nPStF12SVm5bx8rbYzioxuY/PdSe
+bvt0APeqgRxSpCxqYnHsCoNeHzSrGXcP0COzFeUOz2tdrhmH09JLbGZs4nbojPxM
+kjpJSv3/ekDG2CHYxXSHXxpJstxZAgMBAAGjYDBeMAwGA1UdEwEB/wQCMAAwDgYD
+VR0PAQH/BAQDAgXgMB0GA1UdDgQWBBTmjc+lrTQuYx/VBOBGjMvufajvhDAfBgNV
+HSMEGDAWgBTJkVMKY3sWW4u9RPB2iKkk5uW2bDANBgkqhkiG9w0BAQUFAAOCAQEA
+dr2IRXcFtlF16kKWs1VTaFIHHNQrfSVHBkhKblPX3f/0s/i3eXgwKUu7Hnb6T3/o
+E8L+e4ioQNhahTLt9ruJNHWA/QDwOfkqM3tshCs2xOD1Cpy7Bd3Dn0YBrHKyNXRK
+WelGp+HetSXJGW4IZJP7iES7Um0DGktLabhZbe25EnthRDBjNnaAmcofHECWESZp
+lEHczGZfS9tRbzOCofxvgLbF64H7wYSyjAe6R8aain0VRbIusiD4tCHX/lOMh9xT
+GNBW8zTL+tV9H1unjPMORLnT0YQ3oAyEND0jCu0ACA1qGl+rzxhF6bQcTUNEbRMu
+9Hjq6s316fk4Ne0EUF3PbA==
 -----END CERTIFICATE-----
index d41f69c..2f17cb2 100644 (file)
@@ -1,31 +1,49 @@
------BEGIN RSA PRIVATE KEY-----
-MIICWwIBAAKBgQCwBfryW4Vu5U9wNIDKspJO/N9YF4CcTlrCUyzVlKgb+8urHlSe
-59i5verR9IOCCXkemjOzZ/3nALTGqYZlnEvHp0Rjk+KdKXnKBIB+SRPpeu3LcXMT
-WPgsThPa0UQxedNKG0g6aG+kLhsDlFBCoxd09jJtSpb9jmroJOq0ZYEHLwIDAQAB
-AoGAKa/w4677Je1W5+r3SYoLDnvi5TkDs4D3C6ipKJgBTEdQz+DqB4w/DpZE4551
-+rkFn1LDxcxuHGRVa+tAMhZW97fwq9YUbjVZEyOz79qrX+BMyl/NbHkf1lIKDo3q
-dWalzQvop7nbzeLC+VmmviwZfLQUbA61AQl3jm4dswT4XykCQQDloDadEv/28NTx
-bvvywvyGuvJkCkEIycm4JrIInvwsd76h/chZ3oymrqzc7hkEtK6kThqlS5y+WXl6
-QzPruTKTAkEAxD2ro/VUoN+scIVaLmn0RBmZ67+9Pdn6pNSfjlK3s0T0EM6/iUWS
-M06l6L9wFS3/ceu1tIifsh9BeqOGTa+udQJARIFnybTBaIqw/NZ/lA1YCVn8tpvY
-iyaoZ6gjtS65TQrsdKeh/i3HCHNUXxUpoZ3F/H7QtD+6o49ODou+EbVOwQJAVmex
-A2gp8wuJKaINqxIL81AybZLnCCzKJ3lXJ5tUNyLNM/lUbGStktm2Q1zHRQwTxV07
-jFn7trn8YrtNjzcjYQJAUKIJRt38A8Jw3HoPT+D0WS2IgxjVL0eYGsZX1lyeammG
-6rfnQ3u5uP7mEK2EH2o8mDUpAE0gclWBU9UkKxJsGA==
------END RSA PRIVATE KEY-----
+-----BEGIN PRIVATE KEY-----
+MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDcYC4tS2Uvn1Z2
+iDgtfkJA5tAqgbN6X4yK02RtVH5xekV9+6+eTt/9S+iFAzAnwqR/UB1R67ETrsWq
+V8u9xLg5fHIwIkmu9/6P31UU9cghO7J1lcrhHvooHaFpcXepPWQacpuBq2VvcKRD
+lDfVmdM5z6eS3dSZPTOMMP/xk4nhZB8mcw27qiccPieS0PZ9EZB63T1gmwaK1Rd5
+U94Pl0+zpDqhViuXmBfiIDWjjz0BzHnHSz5Rg4S3oXF1NcojhptIWyI0r7dgn5J3
+NxC4kgKdjzysxo6iWd0nLgz7h0jUdj79EOis4fg9G4f0EFWyQf7iDxGaA93Y9ePB
+Jv5iFZVZAgMBAAECggEBAILIPX856EHb0KclbhlpfY4grFcdg9LS04grrcTISQW1
+J3p9nBpZ+snKe6I8Yx6lf5PiipPsSLlCliHiWpIzJZVQCkAQiSPiHttpEYgP2IYI
+dH8dtznkdVbLRthZs0bnnPmpHCpW+iqpcYJ9eqkz0cvUNUGOjjWmwWmoRqwp/8CW
+3S1qbkQiCh0Mk2fQeGar76R06kXQ9MKDEj14zyS3rJX+cokjEoMSlH8Sbmdh2mJz
+XlNZcvqmeGJZwQWgbVVHOMUuZaKJiFa+lqvOdppbqSx0AsCRq6vjmjEYQEoOefYK
+3IJM9IvqW5UNx0Cy4kQdjhZFFwMO/ALD3QyF21iP4gECgYEA+isQiaWdaY4UYxwK
+Dg+pnSCKD7UGZUaCUIv9ds3CbntMOONFe0FxPsgcc4jRYQYj1rpQiFB8F11+qXGa
+P/IHcnjr2+mTrNY4I9Bt1Lg+pHSS8QCgzeueFybYMLaSsXUo7tGwpvw6UUb6/YWI
+LNCzZbrCLg1KZjGODhhxtvN45ZkCgYEA4YNSe+GMZlxgsvxbLs86WOm6DzJUPvxN
+bWmni0+Oe0cbevgGEUjDVc895uMFnpvlgO49/C0AYJ+VVbStjIMgAeMnWj6OZoSX
+q49rI8KmKUxKgORZiiaMqGWQ7Rxv68+4S8WANsjFxoUrE6dNV3uYDIUsiSLbZeI8
+38KVTcLohcECgYEAiOdyWHGq0G4xl/9rPUCzCMsa4velNV09yYiiwBZgVgfhsawm
+hQpOSBZJA60XMGqkyEkT81VgY4UF4QLLcD0qeCnWoXWVHFvrQyY4RNZDacpl87/t
+QGO2E2NtolL3umesa+2TJ/8Whw46Iu2llSjtVDm9NGiPk5eA7xPPf1iEi9kCgYAb
+0EmVE91wJoaarLtGS7LDkpgrFacEWbPnAbfzW62UENIX2Y1OBm5pH/Vfi7J+vHWS
+8E9e0eIRCL2vY2hgQy/oa67H151SkZnvQ/IP6Ar8Xvd1bDSK8HQ6tMQqKm63Y9g0
+KDjHCP4znOsSMnk8h/bZ3HcAtvbeWwftBR/LBnYNQQKBgA1leIXLLHRoX0VtS/7e
+y7Xmn7gepj+gDbSuCs5wGtgw0RB/1z/S3QoS2TCbZzKPBo20+ivoRP7gcuFhduFR
+hT8V87esr/QzLVpjLedQDW8Xb7GiO3BsU/gVC9VcngenbL7JObl3NgvdreIYo6+n
+yrLyf+8hjm6H6zkjqiOkHAl+
+-----END PRIVATE KEY-----
 -----BEGIN CERTIFICATE-----
-MIICizCCAfSgAwIBAgIJAMtotfHYdEsUMA0GCSqGSIb3DQEBBQUAMEQxCzAJBgNV
-BAYTAlVLMRYwFAYDVQQKEw1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDExRUZXN0IFMv
-TUlNRSBSU0EgUm9vdDAeFw0wODAyMjIxMzUzMDhaFw0xNjA1MTAxMzUzMDhaMEUx
+MIIDbDCCAlSgAwIBAgIJANk5lu6mSyBBMA0GCSqGSIb3DQEBBQUAMEQxCzAJBgNV
+BAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDDBRUZXN0IFMv
+TUlNRSBSU0EgUm9vdDAeFw0xMzA3MTcxNzI4MzBaFw0yMzA1MjYxNzI4MzBaMEUx
 CzAJBgNVBAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR4wHAYDVQQDDBVU
-ZXN0IFMvTUlNRSBFRSBSU0EgIzIwgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGB
-ALAF+vJbhW7lT3A0gMqykk7831gXgJxOWsJTLNWUqBv7y6seVJ7n2Lm96tH0g4IJ
-eR6aM7Nn/ecAtMaphmWcS8enRGOT4p0pecoEgH5JE+l67ctxcxNY+CxOE9rRRDF5
-00obSDpob6QuGwOUUEKjF3T2Mm1Klv2Oaugk6rRlgQcvAgMBAAGjgYMwgYAwHQYD
-VR0OBBYEFIL/u+mEvaw7RuKLRuElfVkxSQjYMB8GA1UdIwQYMBaAFBPPS6e7iS6z
-OFcXdsabrWhb5e0XMAwGA1UdEwEB/wQCMAAwDgYDVR0PAQH/BAQDAgXgMCAGA1Ud
-EQQZMBeBFXNtaW1lcnNhMkBvcGVuc3NsLm9yZzANBgkqhkiG9w0BAQUFAAOBgQC2
-rXR5bm/9RtOMQPleNpd3y6uUX3oy+0CafK5Yl3PMnItjjnKJ0l1/DbLbDj2twehe
-ewaB8CROcBCA3AMLSmGvPKgUCFMGtWam3328M4fBHzon5ka7qDXzM+imkAly/Yx2
-YNdR/aNOug+5sXygHmTSKqiCpQjOIClzXoPVVeEVHw==
+ZXN0IFMvTUlNRSBFRSBSU0EgIzIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEK
+AoIBAQDcYC4tS2Uvn1Z2iDgtfkJA5tAqgbN6X4yK02RtVH5xekV9+6+eTt/9S+iF
+AzAnwqR/UB1R67ETrsWqV8u9xLg5fHIwIkmu9/6P31UU9cghO7J1lcrhHvooHaFp
+cXepPWQacpuBq2VvcKRDlDfVmdM5z6eS3dSZPTOMMP/xk4nhZB8mcw27qiccPieS
+0PZ9EZB63T1gmwaK1Rd5U94Pl0+zpDqhViuXmBfiIDWjjz0BzHnHSz5Rg4S3oXF1
+NcojhptIWyI0r7dgn5J3NxC4kgKdjzysxo6iWd0nLgz7h0jUdj79EOis4fg9G4f0
+EFWyQf7iDxGaA93Y9ePBJv5iFZVZAgMBAAGjYDBeMAwGA1UdEwEB/wQCMAAwDgYD
+VR0PAQH/BAQDAgXgMB0GA1UdDgQWBBT0arpyYMHXDPVL7MvzE+lx71L7sjAfBgNV
+HSMEGDAWgBTJkVMKY3sWW4u9RPB2iKkk5uW2bDANBgkqhkiG9w0BAQUFAAOCAQEA
+I8nM42am3aImkZyrw8iGkaGhKyi/dfajSWx6B9izBUh+3FleBnUxxOA+mn7M8C47
+Ne18iaaWK8vEux9KYTIY8BzXQZL1AuZ896cXEc6bGKsME37JSsocfuB5BIGWlYLv
+/ON5/SJ0iVFj4fAp8z7Vn5qxRJj9BhZDxaO1Raa6cz6pm0imJy9v8y01TI6HsK8c
+XJQLs7/U4Qb91K+IDNX/lgW3hzWjifNpIpT5JyY3DUgbkD595LFV5DDMZd0UOqcv
+6cyN42zkX8a0TWr3i5wu7pw4k1oD19RbUyljyleEp0DBauIct4GARdBGgi5y1H2i
+NzYzLAPBkHCMY0Is3KKIBw==
 -----END CERTIFICATE-----
index c8cbe55..14c27f6 100644 (file)
@@ -1,31 +1,49 @@
------BEGIN RSA PRIVATE KEY-----
-MIICXAIBAAKBgQC6syTZtZNe1hRScFc4PUVyVLsr7+C1HDIZnOHmwFoLayX6RHwy
-ep/TkdwiPHnemVLuwvpSjLMLZkXy/J764kSHJrNeVl3UvmCVCOm40hAtK1+F39pM
-h8phkbPPD7i+hwq4/Vs79o46nzwbVKmzgoZBJhZ+codujUSYM3LjJ4aq+wIDAQAB
-AoGAE1Zixrnr3bLGwBMqtYSDIOhtyos59whImCaLr17U9MHQWS+mvYO98if1aQZi
-iQ/QazJ+wvYXxWJ+dEB+JvYwqrGeuAU6He/rAb4OShG4FPVU2D19gzRnaButWMeT
-/1lgXV08hegGBL7RQNaN7b0viFYMcKnSghleMP0/q+Y/oaECQQDkXEwDYJW13X9p
-ijS20ykWdY5lLknjkHRhhOYux0rlhOqsyMZjoUmwI2m0qj9yrIysKhrk4MZaM/uC
-hy0xp3hdAkEA0Uv/UY0Kwsgc+W6YxeypECtg1qCE6FBib8n4iFy/6VcWqhvE5xrs
-OdhKv9/p6aLjLneGd1sU+F8eS9LGyKIbNwJBAJPgbNzXA7uUZriqZb5qeTXxBDfj
-RLfXSHYKAKEULxz3+JvRHB9SR4yHMiFrCdExiZrHXUkPgYLSHLGG5a4824UCQD6T
-9XvhquUARkGCAuWy0/3Eqoihp/t6BWSdQ9Upviu7YUhtUxsyXo0REZB7F4pGrJx5
-GlhXgFaewgUzuUHFzlMCQCzJMMWslWpoLntnR6sMhBMhBFHSw+Y5CbxBmFrdtSkd
-VdtNO1VuDCTxjjW7W3Khj7LX4KZ1ye/5jfAgnnnXisc=
------END RSA PRIVATE KEY-----
+-----BEGIN PRIVATE KEY-----
+MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQCyK+BTAOJKJjji
+OhY60NeZjzGGZxEBfCm62n0mwkzusW/V/e63uwj6uOVCFoVBz5doMf3M6QIS2jL3
+Aw6Qs5+vcuLA0gHrqIwjYQz1UZ5ETLKLKbQw6YOIVfsFSTxytUVpfcByrubWiLKX
+63theG1/IVokDK/9/k52Kyt+wcCjuRb7AJQFj2OLDRuWm/gavozkK103gQ+dUq4H
+XamZMtTq1EhQOfc0IUeCOEL6xz4jzlHHfzLdkvb7Enhav2sXDfOmZp/DYf9IqS7l
+vFkkINPVbYFBTexaPZlFwmpGRjkmoyH/w+Jlcpzs+w6p1diWRpaSn62bbkRN49j6
+L2dVb+DfAgMBAAECggEAciwDl6zdVT6g/PbT/+SMA+7qgYHSN+1koEQaJpgjzGEP
+lUUfj8TewCtzXaIoyj9IepBuXryBg6snNXpT/w3bqgYon/7zFBvxkUpDj4A5tvKf
+BuY2fZFlpBvUu1Ju1eKrFCptBBBoA9mc+BUB/ze4ktrAdJFcxZoMlVScjqGB3GdR
+OHw2x9BdWGCJBhiu9VHhAAb/LVWi6xgDumYSWZwN2yovg+7J91t5bsENeBRHycK+
+i5dNFh1umIK9N0SH6bpHPnLHrCRchrQ6ZRRxL4ZBKA9jFRDeI7OOsJuCvhGyJ1se
+snsLjr/Ahg00aiHCcC1SPQ6pmXAVBCG7hf4AX82V4QKBgQDaFDE+Fcpv84mFo4s9
+wn4CZ8ymoNIaf5zPl/gpH7MGots4NT5+Ns+6zzJQ6TEpDjTPx+vDaabP7QGXwVZn
+8NAHYvCQK37b+u9HrOt256YYRDOmnJFSbsJdmqzMEzpTNmQ8GuI37cZCS9CmSMv+
+ab/plcwuv0cJRSC83NN2AFyu1QKBgQDRJzKIBQlpprF9rA0D5ZjLVW4OH18A0Mmm
+oanw7qVutBaM4taFN4M851WnNIROyYIlkk2fNgW57Y4M8LER4zLrjU5HY4lB0BMX
+LQWDbyz4Y7L4lVnnEKfQxWFt9avNZwiCxCxEKy/n/icmVCzc91j9uwKcupdzrN6E
+yzPd1s5y4wKBgQCkJvzmAdsOp9/Fg1RFWcgmIWHvrzBXl+U+ceLveZf1j9K5nYJ7
+2OBGer4iH1XM1I+2M4No5XcWHg3L4FEdDixY0wXHT6Y/CcThS+015Kqmq3fBmyrc
+RNjzQoF9X5/QkSmkAIx1kvpgXtcgw70htRIrToGSUpKzDKDW6NYXhbA+PQKBgDJK
+KH5IJ8E9kYPUMLT1Kc4KVpISvPcnPLVSPdhuqVx69MkfadFSTb4BKbkwiXegQCjk
+isFzbeEM25EE9q6EYKP+sAm+RyyJ6W0zKBY4TynSXyAiWSGUAaXTL+AOqCaVVZiL
+rtEdSUGQ/LzclIT0/HLV2oTw4KWxtTdc3LXEhpNdAoGBAM3LckiHENqtoeK2gVNw
+IPeEuruEqoN4n+XltbEEv6Ymhxrs6T6HSKsEsLhqsUiIvIzH43KMm45SNYTn5eZh
+yzYMXLmervN7c1jJe2Y2MYv6hE+Ypj1xGW4w7s8WNKmVzLv97beisD9AZrS7sXfF
+RvOAi5wVkYylDxV4238MAZIq
+-----END PRIVATE KEY-----
 -----BEGIN CERTIFICATE-----
-MIICizCCAfSgAwIBAgIJAMtotfHYdEsVMA0GCSqGSIb3DQEBBQUAMEQxCzAJBgNV
-BAYTAlVLMRYwFAYDVQQKEw1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDExRUZXN0IFMv
-TUlNRSBSU0EgUm9vdDAeFw0wODAyMjIxMzUzMDlaFw0xNjA1MTAxMzUzMDlaMEUx
+MIIDbDCCAlSgAwIBAgIJANk5lu6mSyBCMA0GCSqGSIb3DQEBBQUAMEQxCzAJBgNV
+BAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDDBRUZXN0IFMv
+TUlNRSBSU0EgUm9vdDAeFw0xMzA3MTcxNzI4MzBaFw0yMzA1MjYxNzI4MzBaMEUx
 CzAJBgNVBAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR4wHAYDVQQDDBVU
-ZXN0IFMvTUlNRSBFRSBSU0EgIzMwgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGB
-ALqzJNm1k17WFFJwVzg9RXJUuyvv4LUcMhmc4ebAWgtrJfpEfDJ6n9OR3CI8ed6Z
-Uu7C+lKMswtmRfL8nvriRIcms15WXdS+YJUI6bjSEC0rX4Xf2kyHymGRs88PuL6H
-Crj9Wzv2jjqfPBtUqbOChkEmFn5yh26NRJgzcuMnhqr7AgMBAAGjgYMwgYAwHQYD
-VR0OBBYEFDsSFjNtYZzd0tTHafNS7tneQQj6MB8GA1UdIwQYMBaAFBPPS6e7iS6z
-OFcXdsabrWhb5e0XMAwGA1UdEwEB/wQCMAAwDgYDVR0PAQH/BAQDAgXgMCAGA1Ud
-EQQZMBeBFXNtaW1lcnNhM0BvcGVuc3NsLm9yZzANBgkqhkiG9w0BAQUFAAOBgQBE
-tUDB+1Dqigu4p1xtdq7JRK6S+gfA7RWmhz0j2scb2zhpS12h37JLHsidGeKAzZYq
-jUjOrH/j3xcV5AnuJoqImJaN23nzzxtR4qGGX2mrq6EtObzdEGgCUaizsGM+0slJ
-PYxcy8KeY/63B1BpYhj2RjGkL6HrvuAaxVORa3acoA==
+ZXN0IFMvTUlNRSBFRSBSU0EgIzMwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEK
+AoIBAQCyK+BTAOJKJjjiOhY60NeZjzGGZxEBfCm62n0mwkzusW/V/e63uwj6uOVC
+FoVBz5doMf3M6QIS2jL3Aw6Qs5+vcuLA0gHrqIwjYQz1UZ5ETLKLKbQw6YOIVfsF
+STxytUVpfcByrubWiLKX63theG1/IVokDK/9/k52Kyt+wcCjuRb7AJQFj2OLDRuW
+m/gavozkK103gQ+dUq4HXamZMtTq1EhQOfc0IUeCOEL6xz4jzlHHfzLdkvb7Enha
+v2sXDfOmZp/DYf9IqS7lvFkkINPVbYFBTexaPZlFwmpGRjkmoyH/w+Jlcpzs+w6p
+1diWRpaSn62bbkRN49j6L2dVb+DfAgMBAAGjYDBeMAwGA1UdEwEB/wQCMAAwDgYD
+VR0PAQH/BAQDAgXgMB0GA1UdDgQWBBQ6CkW5sa6HrBsWvuPOvMjyL5AnsDAfBgNV
+HSMEGDAWgBTJkVMKY3sWW4u9RPB2iKkk5uW2bDANBgkqhkiG9w0BAQUFAAOCAQEA
+JhcrD7AKafVzlncA3cZ6epAruj1xwcfiE+EbuAaeWEGjoSltmevcjgoIxvijRVcp
+sCbNmHJZ/siQlqzWjjf3yoERvLDqngJZZpQeocMIbLRQf4wgLAuiBcvT52wTE+sa
+VexeETDy5J1OW3wE4A3rkdBp6hLaymlijFNnd5z/bP6w3AcIMWm45yPm0skM8RVr
+O3UstEFYD/iy+p+Y/YZDoxYQSW5Vl+NkpGmc5bzet8gQz4JeXtH3z5zUGoDM4XK7
+tXP3yUi2eecCbyjh/wgaQiVdylr1Kv3mxXcTl+cFO22asDkh0R/y72nTCu5fSILY
+CscFo2Z2pYROGtZDmYqhRw==
 -----END CERTIFICATE-----
index 055269e..1075a4f 100644 (file)
--- a/test/tcrl
+++ b/test/tcrl
@@ -9,70 +9,70 @@ else
 fi
 
 echo testing crl conversions
-cp $t fff.p
+cp $t crl-fff.p
 
 echo "p -> d"
-$cmd -in fff.p -inform p -outform d >f.d
+$cmd -in crl-fff.p -inform p -outform d >crl-f.d
 if [ $? != 0 ]; then exit 1; fi
 #echo "p -> t"
-#$cmd -in fff.p -inform p -outform t >f.t
+#$cmd -in crl-fff.p -inform p -outform t >crl-f.t
 #if [ $? != 0 ]; then exit 1; fi
 echo "p -> p"
-$cmd -in fff.p -inform p -outform p >f.p
+$cmd -in crl-fff.p -inform p -outform p >crl-f.p
 if [ $? != 0 ]; then exit 1; fi
 
 echo "d -> d"
-$cmd -in f.d -inform d -outform d >ff.d1
+$cmd -in crl-f.d -inform d -outform d >crl-ff.d1
 if [ $? != 0 ]; then exit 1; fi
 #echo "t -> d"
-#$cmd -in f.t -inform t -outform d >ff.d2
+#$cmd -in crl-f.t -inform t -outform d >crl-ff.d2
 #if [ $? != 0 ]; then exit 1; fi
 echo "p -> d"
-$cmd -in f.p -inform p -outform d >ff.d3
+$cmd -in crl-f.p -inform p -outform d >crl-ff.d3
 if [ $? != 0 ]; then exit 1; fi
 
 #echo "d -> t"
-#$cmd -in f.d -inform d -outform t >ff.t1
+#$cmd -in crl-f.d -inform d -outform t >crl-ff.t1
 #if [ $? != 0 ]; then exit 1; fi
 #echo "t -> t"
-#$cmd -in f.t -inform t -outform t >ff.t2
+#$cmd -in crl-f.t -inform t -outform t >crl-ff.t2
 #if [ $? != 0 ]; then exit 1; fi
 #echo "p -> t"
-#$cmd -in f.p -inform p -outform t >ff.t3
+#$cmd -in crl-f.p -inform p -outform t >crl-ff.t3
 #if [ $? != 0 ]; then exit 1; fi
 
 echo "d -> p"
-$cmd -in f.d -inform d -outform p >ff.p1
+$cmd -in crl-f.d -inform d -outform p >crl-ff.p1
 if [ $? != 0 ]; then exit 1; fi
 #echo "t -> p"
-#$cmd -in f.t -inform t -outform p >ff.p2
+#$cmd -in crl-f.t -inform t -outform p >crl-ff.p2
 #if [ $? != 0 ]; then exit 1; fi
 echo "p -> p"
-$cmd -in f.p -inform p -outform p >ff.p3
+$cmd -in crl-f.p -inform p -outform p >crl-ff.p3
 if [ $? != 0 ]; then exit 1; fi
 
-cmp fff.p f.p
+cmp crl-fff.p crl-f.p
 if [ $? != 0 ]; then exit 1; fi
-cmp fff.p ff.p1
+cmp crl-fff.p crl-ff.p1
 if [ $? != 0 ]; then exit 1; fi
-#cmp fff.p ff.p2
+#cmp crl-fff.p crl-ff.p2
 #if [ $? != 0 ]; then exit 1; fi
-cmp fff.p ff.p3
+cmp crl-fff.p crl-ff.p3
 if [ $? != 0 ]; then exit 1; fi
 
-#cmp f.t ff.t1
+#cmp crl-f.t crl-ff.t1
 #if [ $? != 0 ]; then exit 1; fi
-#cmp f.t ff.t2
+#cmp crl-f.t crl-ff.t2
 #if [ $? != 0 ]; then exit 1; fi
-#cmp f.t ff.t3
+#cmp crl-f.t crl-ff.t3
 #if [ $? != 0 ]; then exit 1; fi
 
-cmp f.p ff.p1
+cmp crl-f.p crl-ff.p1
 if [ $? != 0 ]; then exit 1; fi
-#cmp f.p ff.p2
+#cmp crl-f.p crl-ff.p2
 #if [ $? != 0 ]; then exit 1; fi
-cmp f.p ff.p3
+cmp crl-f.p crl-ff.p3
 if [ $? != 0 ]; then exit 1; fi
 
-/bin/rm -f f.* ff.* fff.*
+/bin/rm -f crl-f.* crl-ff.* crl-fff.*
 exit 0
index f5ce7c0..d9fd52e 100644 (file)
@@ -1,6 +1,6 @@
 #!/bin/sh
 
-testsrc=Makefile
+testsrc=testenc
 test=./p
 cmd="../util/shlib_wrap.sh ../apps/openssl"
 
index 39a7bfa..6e1c818 100644 (file)
@@ -56,7 +56,8 @@ $         tests := -
        test_enc,test_x509,test_rsa,test_crl,test_sid,-
        test_gen,test_req,test_pkcs7,test_verify,test_dh,test_dsa,-
        test_ss,test_ca,test_engine,test_evp,test_ssl,test_tsa,test_ige,-
-       test_jpake,test_srp,test_cms,test_heartbeat,test_constant_time
+       test_jpake,test_srp,test_cms,test_ocsp,test_v3name,test_heartbeat,-
+       test_constant_time
 $      endif
 $      tests = f$edit(tests,"COLLAPSE")
 $
@@ -94,9 +95,10 @@ $    EVPTEST :=      evp_test
 $      IGETEST :=      igetest
 $      JPAKETEST :=    jpaketest
 $      SRPTEST :=      srptest
+$      V3NAMETEST :=   v3nametest
 $      ASN1TEST :=     asn1test
-$      HEARTBEATTEST := heartbeat_test
-$      CONSTTIMETEST := constant_time_test
+$      HEARTBEATTEST :=        heartbeat_test
+$      CONSTTIMETEST :=        constant_time_test
 $!
 $      tests_i = 0
 $ loop_tests:
@@ -368,6 +370,14 @@ $ test_srp:
 $      write sys$output "Test SRP"
 $      mcr 'texe_dir''srptest'
 $      return
+$ test_ocsp:
+$      write sys$output "Test OCSP"
+$      @tocsp.com
+$      return
+$ test_v3name:
+$       write sys$output "Test V3NAME"
+$       mcr 'texe_dir''v3nametest'
+$       return
 $ test_heartbeat:
 $       write sys$output "Test HEARTBEAT"
 $       mcr 'texe_dir''heartbeattest'
index 261097b..747e4ba 100644 (file)
@@ -30,6 +30,8 @@ else
   extra="$4"
 fi
 
+serverinfo="./serverinfo.pem"
+
 #############################################################################
 
 echo test sslv2
@@ -99,6 +101,30 @@ $ssltest -bio_pair -ssl3 -server_auth -client_auth $CA $extra || exit 1
 echo test sslv2/sslv3 via BIO pair
 $ssltest $extra || exit 1
 
+echo test dtlsv1
+$ssltest -dtls1 $extra || exit 1
+
+echo test dtlsv1 with server authentication
+$ssltest -dtls1 -server_auth $CA $extra || exit 1
+
+echo test dtlsv1 with client authentication
+$ssltest -dtls1 -client_auth $CA $extra || exit 1
+
+echo test dtlsv1 with both client and server authentication
+$ssltest -dtls1 -server_auth -client_auth $CA $extra || exit 1
+
+echo test dtlsv1.2
+$ssltest -dtls12 $extra || exit 1
+
+echo test dtlsv1.2 with server authentication
+$ssltest -dtls12 -server_auth $CA $extra || exit 1
+
+echo test dtlsv1.2 with client authentication
+$ssltest -dtls12 -client_auth $CA $extra || exit 1
+
+echo test dtlsv1.2 with both client and server authentication
+$ssltest -dtls12 -server_auth -client_auth $CA $extra || exit 1
+
 if [ $dsa_cert = NO ]; then
   echo 'test sslv2/sslv3 w/o (EC)DHE via BIO pair'
   $ssltest -bio_pair -no_dhe -no_ecdhe $extra || exit 1
@@ -194,6 +220,35 @@ $ssltest -tls1 -cipher PSK -psk abc123 $extra || exit 1
 echo test tls1 with PSK via BIO pair
 $ssltest -bio_pair -tls1 -cipher PSK -psk abc123 $extra || exit 1
 
+#############################################################################
+# Custom Extension tests
+
+echo test tls1 with custom extensions
+$ssltest -bio_pair -tls1 -custom_ext || exit 1
+
+#############################################################################
+# Serverinfo tests
+
+echo test tls1 with serverinfo
+$ssltest -bio_pair -tls1 -serverinfo_file $serverinfo || exit 1
+$ssltest -bio_pair -tls1 -serverinfo_file $serverinfo -serverinfo_sct || exit 1
+$ssltest -bio_pair -tls1 -serverinfo_file $serverinfo -serverinfo_tack || exit 1
+$ssltest -bio_pair -tls1 -serverinfo_file $serverinfo -serverinfo_sct -serverinfo_tack || exit 1
+$ssltest -bio_pair -tls1 -custom_ext -serverinfo_file $serverinfo -serverinfo_sct -serverinfo_tack || exit 1
+
+
+#############################################################################
+# ALPN tests
+
+$ssltest -bio_pair -tls1 -alpn_client foo -alpn_server bar || exit 1
+$ssltest -bio_pair -tls1 -alpn_client foo -alpn_server foo -alpn_expected foo || exit 1
+$ssltest -bio_pair -tls1 -alpn_client foo,bar -alpn_server foo -alpn_expected foo || exit 1
+$ssltest -bio_pair -tls1 -alpn_client bar,foo -alpn_server foo -alpn_expected foo || exit 1
+$ssltest -bio_pair -tls1 -alpn_client bar,foo -alpn_server foo,bar -alpn_expected foo || exit 1
+$ssltest -bio_pair -tls1 -alpn_client bar,foo -alpn_server bar,foo -alpn_expected bar || exit 1
+$ssltest -bio_pair -tls1 -alpn_client foo,bar -alpn_server bar,foo -alpn_expected bar || exit 1
+$ssltest -bio_pair -tls1 -alpn_client baz -alpn_server bar,foo || exit 1
+
 if ../util/shlib_wrap.sh ../apps/openssl no-srp; then
   echo skipping SRP tests
 else
@@ -210,4 +265,12 @@ else
   $ssltest -bio_pair -tls1 -cipher aSRP -srpuser test -srppass abc123 || exit 1
 fi
 
+#############################################################################
+# Multi-buffer tests
+
+if [ -z "$extra" -a `uname -m` = "x86_64" ]; then
+  $ssltest -cipher AES128-SHA    -bytes 8m     || exit 1
+  $ssltest -cipher AES128-SHA256 -bytes 8m     || exit 1
+fi
+
 exit 0
diff --git a/test/tocsp b/test/tocsp
new file mode 100644 (file)
index 0000000..48e81bf
--- /dev/null
@@ -0,0 +1,147 @@
+#!/bin/sh
+
+cmd='../util/shlib_wrap.sh ../apps/openssl'
+ocspdir="ocsp-tests"
+# 17 December 2012 so we don't get certificate expiry errors.
+check_time="-attime 1355875200"
+
+test_ocsp () {
+
+       $cmd base64 -d -in $ocspdir/$1 | \
+               $cmd ocsp -respin - -partial_chain $check_time -trusted_first \
+               -CAfile $ocspdir/$2 -verify_other $ocspdir/$2 -CApath /dev/null
+       [ $? != $3 ] && exit 1
+}
+
+
+echo "=== VALID OCSP RESPONSES ==="
+echo "NON-DELEGATED; Intermediate CA -> EE"
+test_ocsp ND1.ors ND1_Issuer_ICA.pem 0
+echo "NON-DELEGATED; Root CA -> Intermediate CA"
+test_ocsp ND2.ors ND2_Issuer_Root.pem 0
+echo "NON-DELEGATED; Root CA -> EE"
+test_ocsp ND3.ors ND3_Issuer_Root.pem 0
+echo "DELEGATED; Intermediate CA -> EE"
+test_ocsp D1.ors D1_Issuer_ICA.pem 0
+echo "DELEGATED; Root CA -> Intermediate CA"
+test_ocsp D2.ors D2_Issuer_Root.pem 0
+echo "DELEGATED; Root CA -> EE"
+test_ocsp D3.ors D3_Issuer_Root.pem 0
+
+echo "=== INVALID SIGNATURE on the OCSP RESPONSE ==="
+echo "NON-DELEGATED; Intermediate CA -> EE"
+test_ocsp ISOP_ND1.ors ND1_Issuer_ICA.pem 1
+echo "NON-DELEGATED; Root CA -> Intermediate CA"
+test_ocsp ISOP_ND2.ors ND2_Issuer_Root.pem 1
+echo "NON-DELEGATED; Root CA -> EE"
+test_ocsp ISOP_ND3.ors ND3_Issuer_Root.pem 1
+echo "DELEGATED; Intermediate CA -> EE"
+test_ocsp ISOP_D1.ors D1_Issuer_ICA.pem 1
+echo "DELEGATED; Root CA -> Intermediate CA"
+test_ocsp ISOP_D2.ors D2_Issuer_Root.pem 1
+echo "DELEGATED; Root CA -> EE"
+test_ocsp ISOP_D3.ors D3_Issuer_Root.pem 1
+
+echo "=== WRONG RESPONDERID in the OCSP RESPONSE ==="
+echo "NON-DELEGATED; Intermediate CA -> EE"
+test_ocsp WRID_ND1.ors ND1_Issuer_ICA.pem 1
+echo "NON-DELEGATED; Root CA -> Intermediate CA"
+test_ocsp WRID_ND2.ors ND2_Issuer_Root.pem 1
+echo "NON-DELEGATED; Root CA -> EE"
+test_ocsp WRID_ND3.ors ND3_Issuer_Root.pem 1
+echo "DELEGATED; Intermediate CA -> EE"
+test_ocsp WRID_D1.ors D1_Issuer_ICA.pem 1
+echo "DELEGATED; Root CA -> Intermediate CA"
+test_ocsp WRID_D2.ors D2_Issuer_Root.pem 1
+echo "DELEGATED; Root CA -> EE"
+test_ocsp WRID_D3.ors D3_Issuer_Root.pem 1
+
+echo "=== WRONG ISSUERNAMEHASH in the OCSP RESPONSE ==="
+echo "NON-DELEGATED; Intermediate CA -> EE"
+test_ocsp WINH_ND1.ors ND1_Issuer_ICA.pem 1
+echo "NON-DELEGATED; Root CA -> Intermediate CA"
+test_ocsp WINH_ND2.ors ND2_Issuer_Root.pem 1
+echo "NON-DELEGATED; Root CA -> EE"
+test_ocsp WINH_ND3.ors ND3_Issuer_Root.pem 1
+echo "DELEGATED; Intermediate CA -> EE"
+test_ocsp WINH_D1.ors D1_Issuer_ICA.pem 1
+echo "DELEGATED; Root CA -> Intermediate CA"
+test_ocsp WINH_D2.ors D2_Issuer_Root.pem 1
+echo "DELEGATED; Root CA -> EE"
+test_ocsp WINH_D3.ors D3_Issuer_Root.pem 1
+
+echo "=== WRONG ISSUERKEYHASH in the OCSP RESPONSE ==="
+echo "NON-DELEGATED; Intermediate CA -> EE"
+test_ocsp WIKH_ND1.ors ND1_Issuer_ICA.pem 1
+echo "NON-DELEGATED; Root CA -> Intermediate CA"
+test_ocsp WIKH_ND2.ors ND2_Issuer_Root.pem 1
+echo "NON-DELEGATED; Root CA -> EE"
+test_ocsp WIKH_ND3.ors ND3_Issuer_Root.pem 1
+echo "DELEGATED; Intermediate CA -> EE"
+test_ocsp WIKH_D1.ors D1_Issuer_ICA.pem 1
+echo "DELEGATED; Root CA -> Intermediate CA"
+test_ocsp WIKH_D2.ors D2_Issuer_Root.pem 1
+echo "DELEGATED; Root CA -> EE"
+test_ocsp WIKH_D3.ors D3_Issuer_Root.pem 1
+
+echo "=== WRONG KEY in the DELEGATED OCSP SIGNING CERTIFICATE ==="
+echo "DELEGATED; Intermediate CA -> EE"
+test_ocsp WKDOSC_D1.ors D1_Issuer_ICA.pem 1
+echo "DELEGATED; Root CA -> Intermediate CA"
+test_ocsp WKDOSC_D2.ors D2_Issuer_Root.pem 1
+echo "DELEGATED; Root CA -> EE"
+test_ocsp WKDOSC_D3.ors D3_Issuer_Root.pem 1
+
+echo "=== INVALID SIGNATURE on the DELEGATED OCSP SIGNING CERTIFICATE ==="
+echo "DELEGATED; Intermediate CA -> EE"
+test_ocsp ISDOSC_D1.ors D1_Issuer_ICA.pem 1
+echo "DELEGATED; Root CA -> Intermediate CA"
+test_ocsp ISDOSC_D2.ors D2_Issuer_Root.pem 1
+echo "DELEGATED; Root CA -> EE"
+test_ocsp ISDOSC_D3.ors D3_Issuer_Root.pem 1
+
+echo "=== WRONG SUBJECT NAME in the ISSUER CERTIFICATE ==="
+echo "NON-DELEGATED; Intermediate CA -> EE"
+test_ocsp ND1.ors WSNIC_ND1_Issuer_ICA.pem 1
+echo "NON-DELEGATED; Root CA -> Intermediate CA"
+test_ocsp ND2.ors WSNIC_ND2_Issuer_Root.pem 1
+echo "NON-DELEGATED; Root CA -> EE"
+test_ocsp ND3.ors WSNIC_ND3_Issuer_Root.pem 1
+echo "DELEGATED; Intermediate CA -> EE"
+test_ocsp D1.ors WSNIC_D1_Issuer_ICA.pem 1
+echo "DELEGATED; Root CA -> Intermediate CA"
+test_ocsp D2.ors WSNIC_D2_Issuer_Root.pem 1
+echo "DELEGATED; Root CA -> EE"
+test_ocsp D3.ors WSNIC_D3_Issuer_Root.pem 1
+
+echo "=== WRONG KEY in the ISSUER CERTIFICATE ==="
+echo "NON-DELEGATED; Intermediate CA -> EE"
+test_ocsp ND1.ors WKIC_ND1_Issuer_ICA.pem 1
+echo "NON-DELEGATED; Root CA -> Intermediate CA"
+test_ocsp ND2.ors WKIC_ND2_Issuer_Root.pem 1
+echo "NON-DELEGATED; Root CA -> EE"
+test_ocsp ND3.ors WKIC_ND3_Issuer_Root.pem 1
+echo "DELEGATED; Intermediate CA -> EE"
+test_ocsp D1.ors WKIC_D1_Issuer_ICA.pem 1
+echo "DELEGATED; Root CA -> Intermediate CA"
+test_ocsp D2.ors WKIC_D2_Issuer_Root.pem 1
+echo "DELEGATED; Root CA -> EE"
+test_ocsp D3.ors WKIC_D3_Issuer_Root.pem 1
+
+echo "=== INVALID SIGNATURE on the ISSUER CERTIFICATE ==="
+# Expect success, because we're explicitly trusting the issuer certificate.
+echo "NON-DELEGATED; Intermediate CA -> EE"
+test_ocsp ND1.ors ISIC_ND1_Issuer_ICA.pem 0
+echo "NON-DELEGATED; Root CA -> Intermediate CA"
+test_ocsp ND2.ors ISIC_ND2_Issuer_Root.pem 0
+echo "NON-DELEGATED; Root CA -> EE"
+test_ocsp ND3.ors ISIC_ND3_Issuer_Root.pem 0
+echo "DELEGATED; Intermediate CA -> EE"
+test_ocsp D1.ors ISIC_D1_Issuer_ICA.pem 0
+echo "DELEGATED; Root CA -> Intermediate CA"
+test_ocsp D2.ors ISIC_D2_Issuer_Root.pem 0
+echo "DELEGATED; Root CA -> EE"
+test_ocsp D3.ors ISIC_D3_Issuer_Root.pem 0
+
+echo "ALL OCSP TESTS SUCCESSFUL"
+exit 0
diff --git a/test/tocsp.com b/test/tocsp.com
new file mode 100644 (file)
index 0000000..3b974c9
--- /dev/null
@@ -0,0 +1,152 @@
+$! TOCSP.COM
+$
+$      cmd = "mcr ''exe_dir'openssl"
+$      ocspdir = "ocsp-tests"
+$      ! 17 December 2012 so we don't get certificate expiry errors.
+$      check_time = "-attime 1355875200"
+$
+$ test_ocsp: subroutine
+$      set noon
+$      'cmd' base64 -d -in [.'ocspdir']'p1' -out f.d
+$      'cmd' ocsp -respin f.d -partial_chain 'check_time' -
+             "-CAfile" [.'ocspdir']'p2' -verify_other [.'ocspdir']'p2' -
+             "-CApath" nul:
+$      ! when ocsp exits with 0, VMS severity becomes 1
+$      ! when ocsp exits with 1, VMS severity becomes 2
+$      ! See the definition of EXIT(n) in the VMS sextion in e_os.h
+$      if $severity .ne. 'p3'+1 then exit 2 ! severity error
+$      exit 1
+$      endsubroutine
+$
+$      on error then exit 2
+$      write sys$output "=== VALID OCSP RESPONSES ==="
+$      write sys$output "NON-DELEGATED; Intermediate CA -> EE"
+$      call test_ocsp ND1.ors ND1_Issuer_ICA.pem 0
+$      write sys$output "NON-DELEGATED; Root CA -> Intermediate CA"
+$      call test_ocsp ND2.ors ND2_Issuer_Root.pem 0
+$      write sys$output "NON-DELEGATED; Root CA -> EE"
+$      call test_ocsp ND3.ors ND3_Issuer_Root.pem 0
+$      write sys$output "DELEGATED; Intermediate CA -> EE"
+$      call test_ocsp D1.ors D1_Issuer_ICA.pem 0
+$      write sys$output "DELEGATED; Root CA -> Intermediate CA"
+$      call test_ocsp D2.ors D2_Issuer_Root.pem 0
+$      write sys$output "DELEGATED; Root CA -> EE"
+$      call test_ocsp D3.ors D3_Issuer_Root.pem 0
+$
+$      write sys$output "=== INVALID SIGNATURE on the OCSP RESPONSE ==="
+$      write sys$output "NON-DELEGATED; Intermediate CA -> EE"
+$      call test_ocsp ISOP_ND1.ors ND1_Issuer_ICA.pem 1
+$      write sys$output "NON-DELEGATED; Root CA -> Intermediate CA"
+$      call test_ocsp ISOP_ND2.ors ND2_Issuer_Root.pem 1
+$      write sys$output "NON-DELEGATED; Root CA -> EE"
+$      call test_ocsp ISOP_ND3.ors ND3_Issuer_Root.pem 1
+$      write sys$output "DELEGATED; Intermediate CA -> EE"
+$      call test_ocsp ISOP_D1.ors D1_Issuer_ICA.pem 1
+$      write sys$output "DELEGATED; Root CA -> Intermediate CA"
+$      call test_ocsp ISOP_D2.ors D2_Issuer_Root.pem 1
+$      write sys$output "DELEGATED; Root CA -> EE"
+$      call test_ocsp ISOP_D3.ors D3_Issuer_Root.pem 1
+$
+$      write sys$output "=== WRONG RESPONDERID in the OCSP RESPONSE ==="
+$      write sys$output "NON-DELEGATED; Intermediate CA -> EE"
+$      call test_ocsp WRID_ND1.ors ND1_Issuer_ICA.pem 1
+$      write sys$output "NON-DELEGATED; Root CA -> Intermediate CA"
+$      call test_ocsp WRID_ND2.ors ND2_Issuer_Root.pem 1
+$      write sys$output "NON-DELEGATED; Root CA -> EE"
+$      call test_ocsp WRID_ND3.ors ND3_Issuer_Root.pem 1
+$      write sys$output "DELEGATED; Intermediate CA -> EE"
+$      call test_ocsp WRID_D1.ors D1_Issuer_ICA.pem 1
+$      write sys$output "DELEGATED; Root CA -> Intermediate CA"
+$      call test_ocsp WRID_D2.ors D2_Issuer_Root.pem 1
+$      write sys$output "DELEGATED; Root CA -> EE"
+$      call test_ocsp WRID_D3.ors D3_Issuer_Root.pem 1
+$
+$      write sys$output "=== WRONG ISSUERNAMEHASH in the OCSP RESPONSE ==="
+$      write sys$output "NON-DELEGATED; Intermediate CA -> EE"
+$      call test_ocsp WINH_ND1.ors ND1_Issuer_ICA.pem 1
+$      write sys$output "NON-DELEGATED; Root CA -> Intermediate CA"
+$      call test_ocsp WINH_ND2.ors ND2_Issuer_Root.pem 1
+$      write sys$output "NON-DELEGATED; Root CA -> EE"
+$      call test_ocsp WINH_ND3.ors ND3_Issuer_Root.pem 1
+$      write sys$output "DELEGATED; Intermediate CA -> EE"
+$      call test_ocsp WINH_D1.ors D1_Issuer_ICA.pem 1
+$      write sys$output "DELEGATED; Root CA -> Intermediate CA"
+$      call test_ocsp WINH_D2.ors D2_Issuer_Root.pem 1
+$      write sys$output "DELEGATED; Root CA -> EE"
+$      call test_ocsp WINH_D3.ors D3_Issuer_Root.pem 1
+$
+$      write sys$output "=== WRONG ISSUERKEYHASH in the OCSP RESPONSE ==="
+$      write sys$output "NON-DELEGATED; Intermediate CA -> EE"
+$      call test_ocsp WIKH_ND1.ors ND1_Issuer_ICA.pem 1
+$      write sys$output "NON-DELEGATED; Root CA -> Intermediate CA"
+$      call test_ocsp WIKH_ND2.ors ND2_Issuer_Root.pem 1
+$      write sys$output "NON-DELEGATED; Root CA -> EE"
+$      call test_ocsp WIKH_ND3.ors ND3_Issuer_Root.pem 1
+$      write sys$output "DELEGATED; Intermediate CA -> EE"
+$      call test_ocsp WIKH_D1.ors D1_Issuer_ICA.pem 1
+$      write sys$output "DELEGATED; Root CA -> Intermediate CA"
+$      call test_ocsp WIKH_D2.ors D2_Issuer_Root.pem 1
+$      write sys$output "DELEGATED; Root CA -> EE"
+$      call test_ocsp WIKH_D3.ors D3_Issuer_Root.pem 1
+$
+$      write sys$output "=== WRONG KEY in the DELEGATED OCSP SIGNING CERTIFICATE ==="
+$      write sys$output "DELEGATED; Intermediate CA -> EE"
+$      call test_ocsp WKDOSC_D1.ors D1_Issuer_ICA.pem 1
+$      write sys$output "DELEGATED; Root CA -> Intermediate CA"
+$      call test_ocsp WKDOSC_D2.ors D2_Issuer_Root.pem 1
+$      write sys$output "DELEGATED; Root CA -> EE"
+$      call test_ocsp WKDOSC_D3.ors D3_Issuer_Root.pem 1
+$
+$      write sys$output "=== INVALID SIGNATURE on the DELEGATED OCSP SIGNING CERTIFICATE ==="
+$      write sys$output "DELEGATED; Intermediate CA -> EE"
+$      call test_ocsp ISDOSC_D1.ors D1_Issuer_ICA.pem 1
+$      write sys$output "DELEGATED; Root CA -> Intermediate CA"
+$      call test_ocsp ISDOSC_D2.ors D2_Issuer_Root.pem 1
+$      write sys$output "DELEGATED; Root CA -> EE"
+$      call test_ocsp ISDOSC_D3.ors D3_Issuer_Root.pem 1
+$
+$      write sys$output "=== WRONG SUBJECT NAME in the ISSUER CERTIFICATE ==="
+$      write sys$output "NON-DELEGATED; Intermediate CA -> EE"
+$      call test_ocsp ND1.ors WSNIC_ND1_Issuer_ICA.pem 1
+$      write sys$output "NON-DELEGATED; Root CA -> Intermediate CA"
+$      call test_ocsp ND2.ors WSNIC_ND2_Issuer_Root.pem 1
+$      write sys$output "NON-DELEGATED; Root CA -> EE"
+$      call test_ocsp ND3.ors WSNIC_ND3_Issuer_Root.pem 1
+$      write sys$output "DELEGATED; Intermediate CA -> EE"
+$      call test_ocsp D1.ors WSNIC_D1_Issuer_ICA.pem 1
+$      write sys$output "DELEGATED; Root CA -> Intermediate CA"
+$      call test_ocsp D2.ors WSNIC_D2_Issuer_Root.pem 1
+$      write sys$output "DELEGATED; Root CA -> EE"
+$      call test_ocsp D3.ors WSNIC_D3_Issuer_Root.pem 1
+$
+$      write sys$output "=== WRONG KEY in the ISSUER CERTIFICATE ==="
+$      write sys$output "NON-DELEGATED; Intermediate CA -> EE"
+$      call test_ocsp ND1.ors WKIC_ND1_Issuer_ICA.pem 1
+$      write sys$output "NON-DELEGATED; Root CA -> Intermediate CA"
+$      call test_ocsp ND2.ors WKIC_ND2_Issuer_Root.pem 1
+$      write sys$output "NON-DELEGATED; Root CA -> EE"
+$      call test_ocsp ND3.ors WKIC_ND3_Issuer_Root.pem 1
+$      write sys$output "DELEGATED; Intermediate CA -> EE"
+$      call test_ocsp D1.ors WKIC_D1_Issuer_ICA.pem 1
+$      write sys$output "DELEGATED; Root CA -> Intermediate CA"
+$      call test_ocsp D2.ors WKIC_D2_Issuer_Root.pem 1
+$      write sys$output "DELEGATED; Root CA -> EE"
+$      call test_ocsp D3.ors WKIC_D3_Issuer_Root.pem 1
+$
+$      write sys$output "=== INVALID SIGNATURE on the ISSUER CERTIFICATE ==="
+$      !# Expect success, because we're explicitly trusting the issuer certificate.
+$      write sys$output "NON-DELEGATED; Intermediate CA -> EE"
+$      call test_ocsp ND1.ors ISIC_ND1_Issuer_ICA.pem 0
+$      write sys$output "NON-DELEGATED; Root CA -> Intermediate CA"
+$      call test_ocsp ND2.ors ISIC_ND2_Issuer_Root.pem 0
+$      write sys$output "NON-DELEGATED; Root CA -> EE"
+$      call test_ocsp ND3.ors ISIC_ND3_Issuer_Root.pem 0
+$      write sys$output "DELEGATED; Intermediate CA -> EE"
+$      call test_ocsp D1.ors ISIC_D1_Issuer_ICA.pem 0
+$      write sys$output "DELEGATED; Root CA -> Intermediate CA"
+$      call test_ocsp D2.ors ISIC_D2_Issuer_Root.pem 0
+$      write sys$output "DELEGATED; Root CA -> EE"
+$      call test_ocsp D3.ors ISIC_D3_Issuer_Root.pem 0
+$
+$      write sys$output "ALL OCSP TESTS SUCCESSFUL"
+$      exit 1
index 3e435ff..d7029a0 100644 (file)
@@ -9,40 +9,40 @@ else
 fi
 
 echo testing pkcs7 conversions
-cp $t fff.p
+cp $t p7-fff.p
 
 echo "p -> d"
-$cmd -in fff.p -inform p -outform d >f.d
+$cmd -in p7-fff.p -inform p -outform d >p7-f.d
 if [ $? != 0 ]; then exit 1; fi
 echo "p -> p"
-$cmd -in fff.p -inform p -outform p >f.p
+$cmd -in p7-fff.p -inform p -outform p >p7-f.p
 if [ $? != 0 ]; then exit 1; fi
 
 echo "d -> d"
-$cmd -in f.d -inform d -outform d >ff.d1
+$cmd -in p7-f.d -inform d -outform d >p7-ff.d1
 if [ $? != 0 ]; then exit 1; fi
 echo "p -> d"
-$cmd -in f.p -inform p -outform d >ff.d3
+$cmd -in p7-f.p -inform p -outform d >p7-ff.d3
 if [ $? != 0 ]; then exit 1; fi
 
 echo "d -> p"
-$cmd -in f.d -inform d -outform p >ff.p1
+$cmd -in p7-f.d -inform d -outform p >p7-ff.p1
 if [ $? != 0 ]; then exit 1; fi
 echo "p -> p"
-$cmd -in f.p -inform p -outform p >ff.p3
+$cmd -in p7-f.p -inform p -outform p >p7-ff.p3
 if [ $? != 0 ]; then exit 1; fi
 
-cmp fff.p f.p
+cmp p7-fff.p p7-f.p
 if [ $? != 0 ]; then exit 1; fi
-cmp fff.p ff.p1
+cmp p7-fff.p p7-ff.p1
 if [ $? != 0 ]; then exit 1; fi
-cmp fff.p ff.p3
+cmp p7-fff.p p7-ff.p3
 if [ $? != 0 ]; then exit 1; fi
 
-cmp f.p ff.p1
+cmp p7-f.p p7-ff.p1
 if [ $? != 0 ]; then exit 1; fi
-cmp f.p ff.p3
+cmp p7-f.p p7-ff.p3
 if [ $? != 0 ]; then exit 1; fi
 
-/bin/rm -f f.* ff.* fff.*
+/bin/rm -f p7-f.* p7-ff.* p7-fff.*
 exit 0
index 64fc28e..d4bfbdf 100644 (file)
@@ -9,33 +9,33 @@ else
 fi
 
 echo "testing pkcs7 conversions (2)"
-cp $t fff.p
+cp $t p7d-fff.p
 
 echo "p -> d"
-$cmd -in fff.p -inform p -outform d >f.d
+$cmd -in p7d-fff.p -inform p -outform d >p7d-f.d
 if [ $? != 0 ]; then exit 1; fi
 echo "p -> p"
-$cmd -in fff.p -inform p -outform p >f.p
+$cmd -in p7d-fff.p -inform p -outform p >p7d-f.p
 if [ $? != 0 ]; then exit 1; fi
 
 echo "d -> d"
-$cmd -in f.d -inform d -outform d >ff.d1
+$cmd -in p7d-f.d -inform d -outform d >p7d-ff.d1
 if [ $? != 0 ]; then exit 1; fi
 echo "p -> d"
-$cmd -in f.p -inform p -outform d >ff.d3
+$cmd -in p7d-f.p -inform p -outform d >p7d-ff.d3
 if [ $? != 0 ]; then exit 1; fi
 
 echo "d -> p"
-$cmd -in f.d -inform d -outform p >ff.p1
+$cmd -in p7d-f.d -inform d -outform p >p7d-ff.p1
 if [ $? != 0 ]; then exit 1; fi
 echo "p -> p"
-$cmd -in f.p -inform p -outform p >ff.p3
+$cmd -in p7d-f.p -inform p -outform p >p7d-ff.p3
 if [ $? != 0 ]; then exit 1; fi
 
-cmp f.p ff.p1
+cmp p7d-f.p p7d-ff.p1
 if [ $? != 0 ]; then exit 1; fi
-cmp f.p ff.p3
+cmp p7d-f.p p7d-ff.p3
 if [ $? != 0 ]; then exit 1; fi
 
-/bin/rm -f f.* ff.* fff.*
+/bin/rm -f p7d-f.* p7d-ff.* p7d-fff.*
 exit 0
index 77f37dc..420d25e 100644 (file)
--- a/test/treq
+++ b/test/treq
@@ -14,70 +14,70 @@ if $cmd -in $t -inform p -noout -text 2>&1 | fgrep -i 'Unknown Public Key'; then
 fi
 
 echo testing req conversions
-cp $t fff.p
+cp $t req-fff.p
 
 echo "p -> d"
-$cmd -in fff.p -inform p -outform d >f.d
+$cmd -in req-fff.p -inform p -outform d >req-f.d
 if [ $? != 0 ]; then exit 1; fi
 #echo "p -> t"
-#$cmd -in fff.p -inform p -outform t >f.t
+#$cmd -in req-fff.p -inform p -outform t >req-f.t
 #if [ $? != 0 ]; then exit 1; fi
 echo "p -> p"
-$cmd -in fff.p -inform p -outform p >f.p
+$cmd -in req-fff.p -inform p -outform p >req-f.p
 if [ $? != 0 ]; then exit 1; fi
 
 echo "d -> d"
-$cmd -verify -in f.d -inform d -outform d >ff.d1
+$cmd -verify -in req-f.d -inform d -outform d >req-ff.d1
 if [ $? != 0 ]; then exit 1; fi
 #echo "t -> d"
-#$cmd -in f.t -inform t -outform d >ff.d2
+#$cmd -in req-f.t -inform t -outform d >req-ff.d2
 #if [ $? != 0 ]; then exit 1; fi
 echo "p -> d"
-$cmd -verify -in f.p -inform p -outform d >ff.d3
+$cmd -verify -in req-f.p -inform p -outform d >req-ff.d3
 if [ $? != 0 ]; then exit 1; fi
 
 #echo "d -> t"
-#$cmd -in f.d -inform d -outform t >ff.t1
+#$cmd -in req-f.d -inform d -outform t >req-ff.t1
 #if [ $? != 0 ]; then exit 1; fi
 #echo "t -> t"
-#$cmd -in f.t -inform t -outform t >ff.t2
+#$cmd -in req-f.t -inform t -outform t >req-ff.t2
 #if [ $? != 0 ]; then exit 1; fi
 #echo "p -> t"
-#$cmd -in f.p -inform p -outform t >ff.t3
+#$cmd -in req-f.p -inform p -outform t >req-ff.t3
 #if [ $? != 0 ]; then exit 1; fi
 
 echo "d -> p"
-$cmd -in f.d -inform d -outform p >ff.p1
+$cmd -in req-f.d -inform d -outform p >req-ff.p1
 if [ $? != 0 ]; then exit 1; fi
 #echo "t -> p"
-#$cmd -in f.t -inform t -outform p >ff.p2
+#$cmd -in req-f.t -inform t -outform p >req-ff.p2
 #if [ $? != 0 ]; then exit 1; fi
 echo "p -> p"
-$cmd -in f.p -inform p -outform p >ff.p3
+$cmd -in req-f.p -inform p -outform p >req-ff.p3
 if [ $? != 0 ]; then exit 1; fi
 
-cmp fff.p f.p
+cmp req-fff.p req-f.p
 if [ $? != 0 ]; then exit 1; fi
-cmp fff.p ff.p1
+cmp req-fff.p req-ff.p1
 if [ $? != 0 ]; then exit 1; fi
-#cmp fff.p ff.p2
+#cmp req-fff.p req-ff.p2
 #if [ $? != 0 ]; then exit 1; fi
-cmp fff.p ff.p3
+cmp req-fff.p req-ff.p3
 if [ $? != 0 ]; then exit 1; fi
 
-#cmp f.t ff.t1
+#cmp req-f.t req-ff.t1
 #if [ $? != 0 ]; then exit 1; fi
-#cmp f.t ff.t2
+#cmp req-f.t req-ff.t2
 #if [ $? != 0 ]; then exit 1; fi
-#cmp f.t ff.t3
+#cmp req-f.t req-ff.t3
 #if [ $? != 0 ]; then exit 1; fi
 
-cmp f.p ff.p1
+cmp req-f.p req-ff.p1
 if [ $? != 0 ]; then exit 1; fi
-#cmp f.p ff.p2
+#cmp req-f.p req-ff.p2
 #if [ $? != 0 ]; then exit 1; fi
-cmp f.p ff.p3
+cmp req-f.p req-ff.p3
 if [ $? != 0 ]; then exit 1; fi
 
-/bin/rm -f f.* ff.* fff.*
+/bin/rm -f req-f.* req-ff.* req-fff.*
 exit 0
index 249ac1d..5a2290f 100644 (file)
--- a/test/trsa
+++ b/test/trsa
@@ -14,70 +14,70 @@ else
 fi
 
 echo testing rsa conversions
-cp $t fff.p
+cp $t rsa-fff.p
 
 echo "p -> d"
-$cmd -in fff.p -inform p -outform d >f.d
+$cmd -in rsa-fff.p -inform p -outform d >rsa-f.d
 if [ $? != 0 ]; then exit 1; fi
 #echo "p -> t"
-#$cmd -in fff.p -inform p -outform t >f.t
+#$cmd -in rsa-fff.p -inform p -outform t >rsa-f.t
 #if [ $? != 0 ]; then exit 1; fi
 echo "p -> p"
-$cmd -in fff.p -inform p -outform p >f.p
+$cmd -in rsa-fff.p -inform p -outform p >rsa-f.p
 if [ $? != 0 ]; then exit 1; fi
 
 echo "d -> d"
-$cmd -in f.d -inform d -outform d >ff.d1
+$cmd -in rsa-f.d -inform d -outform d >rsa-ff.d1
 if [ $? != 0 ]; then exit 1; fi
 #echo "t -> d"
-#$cmd -in f.t -inform t -outform d >ff.d2
+#$cmd -in rsa-f.t -inform t -outform d >rsa-ff.d2
 #if [ $? != 0 ]; then exit 1; fi
 echo "p -> d"
-$cmd -in f.p -inform p -outform d >ff.d3
+$cmd -in rsa-f.p -inform p -outform d >rsa-ff.d3
 if [ $? != 0 ]; then exit 1; fi
 
 #echo "d -> t"
-#$cmd -in f.d -inform d -outform t >ff.t1
+#$cmd -in rsa-f.d -inform d -outform t >rsa-ff.t1
 #if [ $? != 0 ]; then exit 1; fi
 #echo "t -> t"
-#$cmd -in f.t -inform t -outform t >ff.t2
+#$cmd -in rsa-f.t -inform t -outform t >rsa-ff.t2
 #if [ $? != 0 ]; then exit 1; fi
 #echo "p -> t"
-#$cmd -in f.p -inform p -outform t >ff.t3
+#$cmd -in rsa-f.p -inform p -outform t >rsa-ff.t3
 #if [ $? != 0 ]; then exit 1; fi
 
 echo "d -> p"
-$cmd -in f.d -inform d -outform p >ff.p1
+$cmd -in rsa-f.d -inform d -outform p >rsa-ff.p1
 if [ $? != 0 ]; then exit 1; fi
 #echo "t -> p"
-#$cmd -in f.t -inform t -outform p >ff.p2
+#$cmd -in rsa-f.t -inform t -outform p >rsa-ff.p2
 #if [ $? != 0 ]; then exit 1; fi
 echo "p -> p"
-$cmd -in f.p -inform p -outform p >ff.p3
+$cmd -in rsa-f.p -inform p -outform p >rsa-ff.p3
 if [ $? != 0 ]; then exit 1; fi
 
-cmp fff.p f.p
+cmp rsa-fff.p rsa-f.p
 if [ $? != 0 ]; then exit 1; fi
-cmp fff.p ff.p1
+cmp rsa-fff.p rsa-ff.p1
 if [ $? != 0 ]; then exit 1; fi
-#cmp fff.p ff.p2
+#cmp rsa-fff.p rsa-ff.p2
 #if [ $? != 0 ]; then exit 1; fi
-cmp fff.p ff.p3
+cmp rsa-fff.p rsa-ff.p3
 if [ $? != 0 ]; then exit 1; fi
 
-#cmp f.t ff.t1
+#cmp rsa-f.t rsa-ff.t1
 #if [ $? != 0 ]; then exit 1; fi
-#cmp f.t ff.t2
+#cmp rsa-f.t rsa-ff.t2
 #if [ $? != 0 ]; then exit 1; fi
-#cmp f.t ff.t3
+#cmp rsa-f.t rsa-ff.t3
 #if [ $? != 0 ]; then exit 1; fi
 
-cmp f.p ff.p1
+cmp rsa-f.p rsa-ff.p1
 if [ $? != 0 ]; then exit 1; fi
-#cmp f.p ff.p2
+#cmp rsa-f.p rsa-ff.p2
 #if [ $? != 0 ]; then exit 1; fi
-cmp f.p ff.p3
+cmp rsa-f.p rsa-ff.p3
 if [ $? != 0 ]; then exit 1; fi
 
-/bin/rm -f f.* ff.* fff.*
+/bin/rm -f rsa-f.* rsa-ff.* rsa-fff.*
 exit 0
index 6adbd53..e1eb503 100644 (file)
--- a/test/tsid
+++ b/test/tsid
@@ -9,70 +9,70 @@ else
 fi
 
 echo testing session-id conversions
-cp $t fff.p
+cp $t sid-fff.p
 
 echo "p -> d"
-$cmd -in fff.p -inform p -outform d >f.d
+$cmd -in sid-fff.p -inform p -outform d >sid-f.d
 if [ $? != 0 ]; then exit 1; fi
 #echo "p -> t"
-#$cmd -in fff.p -inform p -outform t >f.t
+#$cmd -in sid-fff.p -inform p -outform t >sid-f.t
 #if [ $? != 0 ]; then exit 1; fi
 echo "p -> p"
-$cmd -in fff.p -inform p -outform p >f.p
+$cmd -in sid-fff.p -inform p -outform p >sid-f.p
 if [ $? != 0 ]; then exit 1; fi
 
 echo "d -> d"
-$cmd -in f.d -inform d -outform d >ff.d1
+$cmd -in sid-f.d -inform d -outform d >sid-ff.d1
 if [ $? != 0 ]; then exit 1; fi
 #echo "t -> d"
-#$cmd -in f.t -inform t -outform d >ff.d2
+#$cmd -in sid-f.t -inform t -outform d >sid-ff.d2
 #if [ $? != 0 ]; then exit 1; fi
 echo "p -> d"
-$cmd -in f.p -inform p -outform d >ff.d3
+$cmd -in sid-f.p -inform p -outform d >sid-ff.d3
 if [ $? != 0 ]; then exit 1; fi
 
 #echo "d -> t"
-#$cmd -in f.d -inform d -outform t >ff.t1
+#$cmd -in sid-f.d -inform d -outform t >sid-ff.t1
 #if [ $? != 0 ]; then exit 1; fi
 #echo "t -> t"
-#$cmd -in f.t -inform t -outform t >ff.t2
+#$cmd -in sid-f.t -inform t -outform t >sid-ff.t2
 #if [ $? != 0 ]; then exit 1; fi
 #echo "p -> t"
-#$cmd -in f.p -inform p -outform t >ff.t3
+#$cmd -in sid-f.p -inform p -outform t >sid-ff.t3
 #if [ $? != 0 ]; then exit 1; fi
 
 echo "d -> p"
-$cmd -in f.d -inform d -outform p >ff.p1
+$cmd -in sid-f.d -inform d -outform p >sid-ff.p1
 if [ $? != 0 ]; then exit 1; fi
 #echo "t -> p"
-#$cmd -in f.t -inform t -outform p >ff.p2
+#$cmd -in sid-f.t -inform t -outform p >sid-ff.p2
 #if [ $? != 0 ]; then exit 1; fi
 echo "p -> p"
-$cmd -in f.p -inform p -outform p >ff.p3
+$cmd -in sid-f.p -inform p -outform p >sid-ff.p3
 if [ $? != 0 ]; then exit 1; fi
 
-cmp fff.p f.p
+cmp sid-fff.p sid-f.p
 if [ $? != 0 ]; then exit 1; fi
-cmp fff.p ff.p1
+cmp sid-fff.p sid-ff.p1
 if [ $? != 0 ]; then exit 1; fi
-#cmp fff.p ff.p2
+#cmp sid-fff.p sid-ff.p2
 #if [ $? != 0 ]; then exit 1; fi
-cmp fff.p ff.p3
+cmp sid-fff.p sid-ff.p3
 if [ $? != 0 ]; then exit 1; fi
 
-#cmp f.t ff.t1
+#cmp sid-f.t sid-ff.t1
 #if [ $? != 0 ]; then exit 1; fi
-#cmp f.t ff.t2
+#cmp sid-f.t sid-ff.t2
 #if [ $? != 0 ]; then exit 1; fi
-#cmp f.t ff.t3
+#cmp sid-f.t sid-ff.t3
 #if [ $? != 0 ]; then exit 1; fi
 
-cmp f.p ff.p1
+cmp sid-f.p sid-ff.p1
 if [ $? != 0 ]; then exit 1; fi
-#cmp f.p ff.p2
+#cmp sid-f.p sid-ff.p2
 #if [ $? != 0 ]; then exit 1; fi
-cmp f.p ff.p3
+cmp sid-f.p sid-ff.p3
 if [ $? != 0 ]; then exit 1; fi
 
-/bin/rm -f f.* ff.* fff.*
+/bin/rm -f sid-f.* sid-ff.* sid-fff.*
 exit 0
index 4a15b98..0ce3b52 100644 (file)
@@ -9,70 +9,70 @@ else
 fi
 
 echo testing X509 conversions
-cp $t fff.p
+cp $t x509-fff.p
 
 echo "p -> d"
-$cmd -in fff.p -inform p -outform d >f.d
+$cmd -in x509-fff.p -inform p -outform d >x509-f.d
 if [ $? != 0 ]; then exit 1; fi
 echo "p -> n"
-$cmd -in fff.p -inform p -outform n >f.n
+$cmd -in x509-fff.p -inform p -outform n >x509-f.n
 if [ $? != 0 ]; then exit 1; fi
 echo "p -> p"
-$cmd -in fff.p -inform p -outform p >f.p
+$cmd -in x509-fff.p -inform p -outform p >x509-f.p
 if [ $? != 0 ]; then exit 1; fi
 
 echo "d -> d"
-$cmd -in f.d -inform d -outform d >ff.d1
+$cmd -in x509-f.d -inform d -outform d >x509-ff.d1
 if [ $? != 0 ]; then exit 1; fi
 echo "n -> d"
-$cmd -in f.n -inform n -outform d >ff.d2
+$cmd -in x509-f.n -inform n -outform d >x509-ff.d2
 if [ $? != 0 ]; then exit 1; fi
 echo "p -> d"
-$cmd -in f.p -inform p -outform d >ff.d3
+$cmd -in x509-f.p -inform p -outform d >x509-ff.d3
 if [ $? != 0 ]; then exit 1; fi
 
 echo "d -> n"
-$cmd -in f.d -inform d -outform n >ff.n1
+$cmd -in x509-f.d -inform d -outform n >x509-ff.n1
 if [ $? != 0 ]; then exit 1; fi
 echo "n -> n"
-$cmd -in f.n -inform n -outform n >ff.n2
+$cmd -in x509-f.n -inform n -outform n >x509-ff.n2
 if [ $? != 0 ]; then exit 1; fi
 echo "p -> n"
-$cmd -in f.p -inform p -outform n >ff.n3
+$cmd -in x509-f.p -inform p -outform n >x509-ff.n3
 if [ $? != 0 ]; then exit 1; fi
 
 echo "d -> p"
-$cmd -in f.d -inform d -outform p >ff.p1
+$cmd -in x509-f.d -inform d -outform p >x509-ff.p1
 if [ $? != 0 ]; then exit 1; fi
 echo "n -> p"
-$cmd -in f.n -inform n -outform p >ff.p2
+$cmd -in x509-f.n -inform n -outform p >x509-ff.p2
 if [ $? != 0 ]; then exit 1; fi
 echo "p -> p"
-$cmd -in f.p -inform p -outform p >ff.p3
+$cmd -in x509-f.p -inform p -outform p >x509-ff.p3
 if [ $? != 0 ]; then exit 1; fi
 
-cmp fff.p f.p
+cmp x509-fff.p x509-f.p
 if [ $? != 0 ]; then exit 1; fi
-cmp fff.p ff.p1
+cmp x509-fff.p x509-ff.p1
 if [ $? != 0 ]; then exit 1; fi
-cmp fff.p ff.p2
+cmp x509-fff.p x509-ff.p2
 if [ $? != 0 ]; then exit 1; fi
-cmp fff.p ff.p3
+cmp x509-fff.p x509-ff.p3
 if [ $? != 0 ]; then exit 1; fi
 
-cmp f.n ff.n1
+cmp x509-f.n x509-ff.n1
 if [ $? != 0 ]; then exit 1; fi
-cmp f.n ff.n2
+cmp x509-f.n x509-ff.n2
 if [ $? != 0 ]; then exit 1; fi
-cmp f.n ff.n3
+cmp x509-f.n x509-ff.n3
 if [ $? != 0 ]; then exit 1; fi
 
-cmp f.p ff.p1
+cmp x509-f.p x509-ff.p1
 if [ $? != 0 ]; then exit 1; fi
-cmp f.p ff.p2
+cmp x509-f.p x509-ff.p2
 if [ $? != 0 ]; then exit 1; fi
-cmp f.p ff.p3
+cmp x509-f.p x509-ff.p3
 if [ $? != 0 ]; then exit 1; fi
 
-/bin/rm -f f.* ff.* fff.*
+/bin/rm -f x509-f.* x509-ff.* x509-fff.*
 exit 0
diff --git a/test/v3nametest.c b/test/v3nametest.c
new file mode 120000 (symlink)
index 0000000..1d209eb
--- /dev/null
@@ -0,0 +1 @@
+../crypto/x509v3/v3nametest.c
\ No newline at end of file
index 6a20011..6a27c02 100644 (file)
@@ -1,31 +1,58 @@
 #!/usr/bin/perl
 
-
 # Perl c_rehash script, scan all files in a directory
 # and add symbolic links to their hash values.
 
-my $openssl;
-
 my $dir = "/usr/local/ssl";
 my $prefix = "/usr/local/ssl";
 
-if(defined $ENV{OPENSSL}) {
-       $openssl = $ENV{OPENSSL};
-} else {
-       $openssl = "openssl";
-       $ENV{OPENSSL} = $openssl;
+my $openssl = $ENV{OPENSSL} || "openssl";
+my $pwd;
+my $x509hash = "-subject_hash";
+my $crlhash = "-hash";
+my $verbose = 0;
+my $symlink_exists=eval {symlink("",""); 1};
+my $removelinks = 1;
+
+##  Parse flags.
+while ( $ARGV[0] =~ /^-/ ) {
+    my $flag = shift @ARGV;
+    last if ( $flag eq '--');
+    if ( $flag eq '-old') {
+           $x509hash = "-subject_hash_old";
+           $crlhash = "-hash_old";
+    } elsif ( $flag eq '-h') {
+           help();
+    } elsif ( $flag eq '-n' ) {
+           $removelinks = 0;
+    } elsif ( $flag eq '-v' ) {
+           $verbose++;
+    }
+    else {
+           print STDERR "Usage error; try -help.\n";
+           exit 1;
+    }
+}
+
+sub help {
+       print "Usage: c_rehash [-old] [-h] [-v] [dirs...]\n";
+       print "   -old use old-style digest\n";
+       print "   -h print this help text\n";
+       print "   -v print files removed and linked\n";
+       exit 0;
 }
 
-my $pwd;
 eval "require Cwd";
 if (defined(&Cwd::getcwd)) {
        $pwd=Cwd::getcwd();
 } else {
-       $pwd=`pwd`; chomp($pwd);
+       $pwd=`pwd`;
+       chomp($pwd);
 }
-my $path_delim = ($pwd =~ /^[a-z]\:/i) ? ';' : ':'; # DOS/Win32 or Unix delimiter?
 
-$ENV{PATH} = "$prefix/bin" . ($ENV{PATH} ? $path_delim . $ENV{PATH} : ""); # prefix our path
+# DOS/Win32 or Unix delimiter?  Prefix our installdir, then search.
+my $path_delim = ($pwd =~ /^[a-z]\:/i) ? ';' : ':';
+$ENV{PATH} = "$prefix/bin" . ($ENV{PATH} ? $path_delim . $ENV{PATH} : "");
 
 if(! -x $openssl) {
        my $found = 0;
@@ -68,14 +95,17 @@ sub hash_dir {
        chdir $_[0];
        opendir(DIR, ".");
        my @flist = readdir(DIR);
-       # Delete any existing symbolic links
-       foreach (grep {/^[\da-f]+\.r{0,1}\d+$/} @flist) {
-               if(-l $_) {
-                       unlink $_;
+       closedir DIR;
+       if ( $removelinks ) {
+               # Delete any existing symbolic links
+               foreach (grep {/^[\da-f]+\.r{0,1}\d+$/} @flist) {
+                       if(-l $_) {
+                               unlink $_;
+                               print "unlink $_" if $verbose;
+                       }
                }
        }
-       closedir DIR;
-       FILE: foreach $fname (grep {/\.pem$/} @flist) {
+       FILE: foreach $fname (grep {/\.(pem)|(crt)|(cer)|(crl)$/} @flist) {
                # Check to see if certificates and/or CRLs present.
                my ($cert, $crl) = check_file($fname);
                if(!$cert && !$crl) {
@@ -117,7 +147,7 @@ sub check_file {
 sub link_hash_cert {
                my $fname = $_[0];
                $fname =~ s/'/'\\''/g;
-               my ($hash, $fprint) = `"$openssl" x509 -hash -fingerprint -noout -in "$fname"`;
+               my ($hash, $fprint) = `"$openssl" x509 $x509hash -fingerprint -noout -in "$fname"`;
                chomp $hash;
                chomp $fprint;
                $fprint =~ s/^.*=//;
@@ -133,16 +163,16 @@ sub link_hash_cert {
                        $suffix++;
                }
                $hash .= ".$suffix";
-               print "$fname => $hash\n";
-               $symlink_exists=eval {symlink("",""); 1};
                if ($symlink_exists) {
                        symlink $fname, $hash;
+                       print "link $fname -> $hash\n" if $verbose;
                } else {
                        open IN,"<$fname" or die "can't open $fname for read";
                        open OUT,">$hash" or die "can't open $hash for write";
                        print OUT <IN>; # does the job for small text files
                        close OUT;
                        close IN;
+                       print "copy $fname -> $hash\n" if $verbose;
                }
                $hashlist{$hash} = $fprint;
 }
@@ -152,7 +182,7 @@ sub link_hash_cert {
 sub link_hash_crl {
                my $fname = $_[0];
                $fname =~ s/'/'\\''/g;
-               my ($hash, $fprint) = `"$openssl" crl -hash -fingerprint -noout -in '$fname'`;
+               my ($hash, $fprint) = `"$openssl" crl $crlhash -fingerprint -noout -in '$fname'`;
                chomp $hash;
                chomp $fprint;
                $fprint =~ s/^.*=//;
@@ -168,12 +198,12 @@ sub link_hash_crl {
                        $suffix++;
                }
                $hash .= ".r$suffix";
-               print "$fname => $hash\n";
-               $symlink_exists=eval {symlink("",""); 1};
                if ($symlink_exists) {
                        symlink $fname, $hash;
+                       print "link $fname -> $hash\n" if $verbose;
                } else {
                        system ("cp", $fname, $hash);
+                       print "cp $fname -> $hash\n" if $verbose;
                }
                $hashlist{$hash} = $fprint;
 }
index bfc4a69..b086ff9 100644 (file)
@@ -1,31 +1,58 @@
 #!/usr/local/bin/perl
 
-
 # Perl c_rehash script, scan all files in a directory
 # and add symbolic links to their hash values.
 
-my $openssl;
-
 my $dir;
 my $prefix;
 
-if(defined $ENV{OPENSSL}) {
-       $openssl = $ENV{OPENSSL};
-} else {
-       $openssl = "openssl";
-       $ENV{OPENSSL} = $openssl;
+my $openssl = $ENV{OPENSSL} || "openssl";
+my $pwd;
+my $x509hash = "-subject_hash";
+my $crlhash = "-hash";
+my $verbose = 0;
+my $symlink_exists=eval {symlink("",""); 1};
+my $removelinks = 1;
+
+##  Parse flags.
+while ( $ARGV[0] =~ /^-/ ) {
+    my $flag = shift @ARGV;
+    last if ( $flag eq '--');
+    if ( $flag eq '-old') {
+           $x509hash = "-subject_hash_old";
+           $crlhash = "-hash_old";
+    } elsif ( $flag eq '-h') {
+           help();
+    } elsif ( $flag eq '-n' ) {
+           $removelinks = 0;
+    } elsif ( $flag eq '-v' ) {
+           $verbose++;
+    }
+    else {
+           print STDERR "Usage error; try -help.\n";
+           exit 1;
+    }
+}
+
+sub help {
+       print "Usage: c_rehash [-old] [-h] [-v] [dirs...]\n";
+       print "   -old use old-style digest\n";
+       print "   -h print this help text\n";
+       print "   -v print files removed and linked\n";
+       exit 0;
 }
 
-my $pwd;
 eval "require Cwd";
 if (defined(&Cwd::getcwd)) {
        $pwd=Cwd::getcwd();
 } else {
-       $pwd=`pwd`; chomp($pwd);
+       $pwd=`pwd`;
+       chomp($pwd);
 }
-my $path_delim = ($pwd =~ /^[a-z]\:/i) ? ';' : ':'; # DOS/Win32 or Unix delimiter?
 
-$ENV{PATH} = "$prefix/bin" . ($ENV{PATH} ? $path_delim . $ENV{PATH} : ""); # prefix our path
+# DOS/Win32 or Unix delimiter?  Prefix our installdir, then search.
+my $path_delim = ($pwd =~ /^[a-z]\:/i) ? ';' : ':';
+$ENV{PATH} = "$prefix/bin" . ($ENV{PATH} ? $path_delim . $ENV{PATH} : "");
 
 if(! -x $openssl) {
        my $found = 0;
@@ -68,14 +95,17 @@ sub hash_dir {
        chdir $_[0];
        opendir(DIR, ".");
        my @flist = readdir(DIR);
-       # Delete any existing symbolic links
-       foreach (grep {/^[\da-f]+\.r{0,1}\d+$/} @flist) {
-               if(-l $_) {
-                       unlink $_;
+       closedir DIR;
+       if ( $removelinks ) {
+               # Delete any existing symbolic links
+               foreach (grep {/^[\da-f]+\.r{0,1}\d+$/} @flist) {
+                       if(-l $_) {
+                               unlink $_;
+                               print "unlink $_" if $verbose;
+                       }
                }
        }
-       closedir DIR;
-       FILE: foreach $fname (grep {/\.pem$/} @flist) {
+       FILE: foreach $fname (grep {/\.(pem)|(crt)|(cer)|(crl)$/} @flist) {
                # Check to see if certificates and/or CRLs present.
                my ($cert, $crl) = check_file($fname);
                if(!$cert && !$crl) {
@@ -117,7 +147,7 @@ sub check_file {
 sub link_hash_cert {
                my $fname = $_[0];
                $fname =~ s/'/'\\''/g;
-               my ($hash, $fprint) = `"$openssl" x509 -hash -fingerprint -noout -in "$fname"`;
+               my ($hash, $fprint) = `"$openssl" x509 $x509hash -fingerprint -noout -in "$fname"`;
                chomp $hash;
                chomp $fprint;
                $fprint =~ s/^.*=//;
@@ -133,16 +163,16 @@ sub link_hash_cert {
                        $suffix++;
                }
                $hash .= ".$suffix";
-               print "$fname => $hash\n";
-               $symlink_exists=eval {symlink("",""); 1};
                if ($symlink_exists) {
                        symlink $fname, $hash;
+                       print "link $fname -> $hash\n" if $verbose;
                } else {
                        open IN,"<$fname" or die "can't open $fname for read";
                        open OUT,">$hash" or die "can't open $hash for write";
                        print OUT <IN>; # does the job for small text files
                        close OUT;
                        close IN;
+                       print "copy $fname -> $hash\n" if $verbose;
                }
                $hashlist{$hash} = $fprint;
 }
@@ -152,7 +182,7 @@ sub link_hash_cert {
 sub link_hash_crl {
                my $fname = $_[0];
                $fname =~ s/'/'\\''/g;
-               my ($hash, $fprint) = `"$openssl" crl -hash -fingerprint -noout -in '$fname'`;
+               my ($hash, $fprint) = `"$openssl" crl $crlhash -fingerprint -noout -in '$fname'`;
                chomp $hash;
                chomp $fprint;
                $fprint =~ s/^.*=//;
@@ -168,12 +198,12 @@ sub link_hash_crl {
                        $suffix++;
                }
                $hash .= ".r$suffix";
-               print "$fname => $hash\n";
-               $symlink_exists=eval {symlink("",""); 1};
                if ($symlink_exists) {
                        symlink $fname, $hash;
+                       print "link $fname -> $hash\n" if $verbose;
                } else {
                        system ("cp", $fname, $hash);
+                       print "cp $fname -> $hash\n" if $verbose;
                }
                $hashlist{$hash} = $fprint;
 }
diff --git a/util/copy-if-different.pl b/util/copy-if-different.pl
new file mode 100644 (file)
index 0000000..ec99e08
--- /dev/null
@@ -0,0 +1,78 @@
+#!/usr/local/bin/perl
+
+use strict;
+
+use Fcntl;
+
+# copy-if-different.pl
+
+# Copy to the destination if the source is not the same as it.
+
+my @filelist;
+
+foreach my $arg (@ARGV) {
+       $arg =~ s|\\|/|g;       # compensate for bug/feature in cygwin glob...
+       foreach (glob $arg)
+               {
+               push @filelist, $_;
+               }
+}
+
+my $fnum = @filelist;
+
+if ($fnum <= 1)
+       {
+       die "Need at least two filenames";
+       }
+
+my $dest = pop @filelist;
+
+if ($fnum > 2 && ! -d $dest)
+       {
+       die "Destination must be a directory";
+       }
+
+foreach (@filelist)
+       {
+        my $dfile;
+       if (-d $dest)
+               {
+               $dfile = $_;
+               $dfile =~ s|^.*[/\\]([^/\\]*)$|$1|;
+               $dfile = "$dest/$dfile";
+               }
+       else
+               {
+               $dfile = $dest;
+               }
+
+       my $buf;
+       if (-f $dfile)
+               {
+               sysopen(IN, $_, O_RDONLY|O_BINARY) || die "Can't Open $_";
+               sysopen(OUT, $dfile, O_RDONLY|O_BINARY)
+                 || die "Can't Open $dfile";
+               while (sysread IN, $buf, 10240)
+                       {
+                       my $b2;
+                       goto copy if !sysread(OUT, $b2, 10240) || $buf ne $b2;
+                       }
+               goto copy if sysread(OUT, $buf, 1);
+               close(IN);
+               close(OUT);
+               print "NOT copying: $_ to $dfile\n";
+               next;
+               }
+      copy:
+       sysopen(IN, $_, O_RDONLY|O_BINARY) || die "Can't Open $_";
+       sysopen(OUT, $dfile, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY)
+                                       || die "Can't Open $dfile";
+       while (sysread IN, $buf, 10240)
+               {
+               syswrite(OUT, $buf, length($buf));
+               }
+       close(IN);
+       close(OUT);
+       print "Copying: $_ to $dfile\n";
+       }
+
index 41f033e..b15407f 100755 (executable)
@@ -4,6 +4,12 @@
 # It is basically a list of all variables from the passed makefile
 #
 
+while ($ARGV[0] =~ /^(\S+)\s*=(.*)$/)
+       {
+       $sym{$1} = $2;
+       shift;
+       }
+
 $s="";
 while (<>)
        {
@@ -33,7 +39,7 @@ while (<>)
                $o =~ s/\s+/ /g;
 
                $o =~ s/\$[({]([^)}]+)[)}]/$sym{$1}/g;
-               $sym{$s}=$o;
+               $sym{$s}=$o if !exists $sym{$s};
                }
        }
 
index b594caf..7f7487d 100755 (executable)
@@ -4282,7 +4282,7 @@ CRYPTO_ccm128_decrypt                   4648      EXIST::FUNCTION:
 CRYPTO_ccm128_aad                       4649   EXIST::FUNCTION:
 CRYPTO_gcm128_init                      4650   EXIST::FUNCTION:
 CRYPTO_gcm128_decrypt                   4651   EXIST::FUNCTION:
-ENGINE_load_rsax                        4652   EXIST::FUNCTION:ENGINE
+ENGINE_load_rsax                        4652   NOEXIST::FUNCTION:
 CRYPTO_gcm128_decrypt_ctr32             4653   EXIST::FUNCTION:
 CRYPTO_gcm128_encrypt_ctr32             4654   EXIST::FUNCTION:
 CRYPTO_gcm128_finish                    4655   EXIST::FUNCTION:
@@ -4314,3 +4314,103 @@ BIO_dgram_sctp_wait_for_dry             4679    EXIST::FUNCTION:SCTP
 BIO_s_datagram_sctp                     4680   EXIST::FUNCTION:DGRAM,SCTP
 BIO_dgram_is_sctp                       4681   EXIST::FUNCTION:SCTP
 BIO_dgram_sctp_notification_cb          4682   EXIST::FUNCTION:SCTP
+i2d_DHxparams                           4683   EXIST::FUNCTION:DH
+EC_curve_nist2nid                       4684   EXIST::FUNCTION:EC
+DH_get_1024_160                         4685   EXIST::FUNCTION:DH
+PEM_write_DHxparams                     4686   EXIST:!WIN16:FUNCTION:DH
+d2i_DHxparams                           4687   EXIST::FUNCTION:DH
+EC_curve_nid2nist                       4688   EXIST::FUNCTION:EC
+DH_get_2048_256                         4689   EXIST::FUNCTION:DH
+PEM_write_bio_DHxparams                 4690   EXIST::FUNCTION:DH
+DH_get_2048_224                         4691   EXIST::FUNCTION:DH
+X509_chain_check_suiteb                 4692   EXIST::FUNCTION:
+X509_chain_up_ref                       4693   EXIST::FUNCTION:
+X509_VERIFY_PARAM_set1_ip_asc           4694   EXIST::FUNCTION:
+X509_CRL_check_suiteb                   4695   EXIST::FUNCTION:
+X509_VERIFY_PARAM_set1_email            4696   EXIST::FUNCTION:
+X509_check_email                        4697   EXIST::FUNCTION:
+X509_check_host                         4698   EXIST::FUNCTION:
+X509_check_ip_asc                       4699   EXIST::FUNCTION:
+X509_get0_signature                     4700   EXIST::FUNCTION:
+X509_get_signature_nid                  4701   EXIST::FUNCTION:
+X509_VERIFY_PARAM_set1_host             4702   EXIST::FUNCTION:
+X509_VERIFY_PARAM_set1_ip               4703   EXIST::FUNCTION:
+X509_check_ip                           4704   EXIST::FUNCTION:
+X509_STORE_set_lookup_crls_cb           4705   EXIST::FUNCTION:
+X509_CRL_diff                           4706   EXIST::FUNCTION:
+X509_CRL_http_nbio                      4707   EXIST::FUNCTION:EVP
+OCSP_REQ_CTX_i2d                        4708   EXIST::FUNCTION:
+OCSP_REQ_CTX_get0_mem_bio               4709   EXIST::FUNCTION:
+X509_STORE_CTX_get0_store               4710   EXIST::FUNCTION:
+X509_REVOKED_dup                        4711   EXIST::FUNCTION:
+CMS_RecipientInfo_encrypt               4712   EXIST::FUNCTION:CMS
+OCSP_REQ_CTX_http                       4713   EXIST::FUNCTION:
+OCSP_REQ_CTX_nbio                       4714   EXIST::FUNCTION:
+X509_http_nbio                          4715   EXIST::FUNCTION:EVP
+OCSP_set_max_response_length            4716   EXIST::FUNCTION:
+OCSP_REQ_CTX_new                        4717   EXIST::FUNCTION:
+OCSP_REQ_CTX_nbio_d2i                   4718   EXIST::FUNCTION:
+EVP_aes_256_wrap                        4719   EXIST::FUNCTION:AES
+CRYPTO_128_wrap                         4720   EXIST::FUNCTION:
+RSA_OAEP_PARAMS_new                     4721   EXIST::FUNCTION:RSA
+CRYPTO_128_unwrap                       4722   EXIST::FUNCTION:
+ECDSA_METHOD_set_name                   4723   EXIST::FUNCTION:ECDSA
+CMS_RecipientInfo_kari_decrypt          4724   EXIST::FUNCTION:CMS
+CMS_SignerInfo_get0_pkey_ctx            4725   EXIST::FUNCTION:CMS
+ECDSA_METHOD_set_flags                  4726   EXIST::FUNCTION:ECDSA
+ECDSA_METHOD_set_sign_setup             4727   EXIST::FUNCTION:ECDSA
+CMS_RecipientInfo_kari_orig_id_cmp      4728   EXIST:!VMS:FUNCTION:CMS
+CMS_RecipInfo_kari_orig_id_cmp          4728   EXIST:VMS:FUNCTION:CMS
+CMS_RecipientInfo_kari_get0_alg         4729   EXIST::FUNCTION:CMS
+EVP_aes_192_wrap                        4730   EXIST::FUNCTION:AES
+EVP_aes_128_cbc_hmac_sha256             4731   EXIST::FUNCTION:AES,SHA256
+DH_compute_key_padded                   4732   EXIST::FUNCTION:DH
+ECDSA_METHOD_set_sign                   4733   EXIST::FUNCTION:ECDSA
+CMS_RecipientEncryptedKey_cert_cmp      4734   EXIST:!VMS:FUNCTION:CMS
+CMS_RecipEncryptedKey_cert_cmp          4734   EXIST:VMS:FUNCTION:CMS
+DH_KDF_X9_42                            4735   EXIST::FUNCTION:DH
+RSA_OAEP_PARAMS_free                    4736   EXIST::FUNCTION:RSA
+EVP_des_ede3_wrap                       4737   EXIST::FUNCTION:DES
+RSA_OAEP_PARAMS_it                      4738   EXIST:!EXPORT_VAR_AS_FUNCTION:VARIABLE:RSA
+RSA_OAEP_PARAMS_it                      4738   EXIST:EXPORT_VAR_AS_FUNCTION:FUNCTION:RSA
+ASN1_TIME_diff                          4739   EXIST::FUNCTION:
+EVP_aes_256_cbc_hmac_sha256             4740   EXIST::FUNCTION:AES,SHA256
+CMS_SignerInfo_get0_signature           4741   EXIST::FUNCTION:CMS
+CMS_RecipientInfo_kari_get0_reks        4742   EXIST:!VMS:FUNCTION:CMS
+CMS_RecipInfo_kari_get0_reks            4742   EXIST:VMS:FUNCTION:CMS
+EVP_aes_128_wrap                        4743   EXIST::FUNCTION:AES
+CMS_SignerInfo_get0_md_ctx              4744   EXIST::FUNCTION:CMS
+OPENSSL_gmtime_diff                     4745   EXIST::FUNCTION:
+CMS_RecipientInfo_kari_set0_pkey        4746   EXIST:!VMS:FUNCTION:CMS
+CMS_RecipInfo_kari_set0_pkey            4746   EXIST:VMS:FUNCTION:CMS
+i2d_RSA_OAEP_PARAMS                     4747   EXIST::FUNCTION:RSA
+d2i_RSA_OAEP_PARAMS                     4748   EXIST::FUNCTION:RSA
+ECDH_KDF_X9_62                          4749   EXIST::FUNCTION:ECDH
+CMS_RecipientInfo_kari_get0_ctx         4750   EXIST::FUNCTION:CMS
+ECDSA_METHOD_new                        4751   EXIST::FUNCTION:ECDSA
+CMS_RecipientInfo_get0_pkey_ctx         4752   EXIST::FUNCTION:CMS
+CMS_RecipientEncryptedKey_get0_id       4753   EXIST:!VMS:FUNCTION:CMS
+CMS_RecipEncryptedKey_get0_id           4753   EXIST:VMS:FUNCTION:CMS
+RSA_padding_check_PKCS1_OAEP_mgf1       4754   EXIST:!VMS:FUNCTION:RSA
+RSA_pad_check_PKCS1_OAEP_mgf1           4754   EXIST:VMS:FUNCTION:RSA
+ECDSA_METHOD_set_verify                 4755   EXIST::FUNCTION:ECDSA
+CMS_SharedInfo_encode                   4756   EXIST::FUNCTION:CMS
+RSA_padding_add_PKCS1_OAEP_mgf1         4757   EXIST::FUNCTION:RSA
+CMS_RecipientInfo_kari_get0_orig_id     4758   EXIST:!VMS:FUNCTION:CMS
+CMS_RecipInfo_kari_get0_orig_id         4758   EXIST:VMS:FUNCTION:CMS
+ECDSA_METHOD_free                       4759   EXIST::FUNCTION:ECDSA
+X509_VERIFY_PARAM_get_count             4760   EXIST::FUNCTION:
+X509_VERIFY_PARAM_get0_name             4761   EXIST::FUNCTION:
+X509_VERIFY_PARAM_get0                  4762   EXIST::FUNCTION:
+X509V3_EXT_free                         4763   EXIST::FUNCTION:
+BIO_hex_string                          4764   EXIST::FUNCTION:
+X509_VERIFY_PARAM_set_hostflags         4765   EXIST::FUNCTION:
+BUF_strnlen                             4766   EXIST::FUNCTION:
+X509_VERIFY_PARAM_get0_peername         4767   EXIST::FUNCTION:
+ECDSA_METHOD_set_app_data               4768   EXIST::FUNCTION:ECDSA
+sk_deep_copy                            4769   EXIST::FUNCTION:
+ECDSA_METHOD_get_app_data               4770   EXIST::FUNCTION:ECDSA
+X509_VERIFY_PARAM_add1_host             4771   EXIST::FUNCTION:
+EC_GROUP_get_mont_data                  4772   EXIST::FUNCTION:EC
+i2d_re_X509_tbs                         4773   EXIST::FUNCTION:
+EVP_PKEY_asn1_set_item                  4774   EXIST::FUNCTION:
index 1eee7aa..9b8abc0 100755 (executable)
@@ -2,8 +2,12 @@
 # A bit of an evil hack but it post processes the file ../MINFO which
 # is generated by `make files` in the top directory.
 # This script outputs one mega makefile that has no shell stuff or any
-# funny stuff
-#
+# funny stuff (if the target is not "copy").
+# If the target is "copy", then it tries to create a makefile that can be
+# safely used with the -j flag and that is compatible with the top-level
+# Makefile, in the sense that it uses the same options and assembler files etc.
+
+use Cwd;
 
 $INSTALLTOP="/usr/local/ssl";
 $OPENSSLDIR="/usr/local/ssl";
@@ -28,6 +32,7 @@ my %mf_import = (
        INSTALLTOP     => \$INSTALLTOP,
        OPENSSLDIR     => \$OPENSSLDIR,
        PLATFORM       => \$mf_platform,
+       CC             => \$mf_cc,
        CFLAG          => \$mf_cflag,
        DEPFLAG        => \$mf_depflag,
        CPUID_OBJ      => \$mf_cpuid_asm,
@@ -43,16 +48,18 @@ my %mf_import = (
        RMD160_ASM_OBJ => \$mf_rmd_asm,
        WP_ASM_OBJ     => \$mf_wp_asm,
        CMLL_ENC       => \$mf_cm_asm,
+       MODES_ASM_OBJ  => \$mf_modes_asm,
+        ENGINES_ASM_OBJ=> \$mf_engines_asm,
        BASEADDR       => \$baseaddr,
        FIPSDIR        => \$fipsdir,
+       EC_ASM         => \$mf_ec_asm,
 );
 
-
 open(IN,"<Makefile") || die "unable to open Makefile!\n";
 while(<IN>) {
     my ($mf_opt, $mf_ref);
     while (($mf_opt, $mf_ref) = each %mf_import) {
-       if (/^$mf_opt\s*=\s*(.*)$/) {
+       if (/^$mf_opt\s*=\s*(.*)$/ && !defined($$mfref)) {
           $$mf_ref = $1;
        }
     }
@@ -83,7 +90,8 @@ $infile="MINFO";
        "netware-libc", "CodeWarrior for NetWare - LibC - with WinSock Sockets",
        "netware-libc-bsdsock", "CodeWarrior for NetWare - LibC - with BSD Sockets",
        "default","cc under unix",
-       "auto", "auto detect from top level Makefile"
+       "auto", "auto detect from top level Makefile",
+        "copy", "copy from top level Makefile"
        );
 
 $platform="";
@@ -162,7 +170,7 @@ $mkdir="-mkdir" unless defined $mkdir;
 $ranlib="echo ranlib";
 
 $cc=(defined($VARS{'CC'}))?$VARS{'CC'}:'cc';
-$src_dir=(defined($VARS{'SRC'}))?$VARS{'SRC'}:'.';
+$src_dir=(defined($VARS{'SRC'}))?$VARS{'SRC'}: $platform eq 'copy' ? getcwd() : '.';
 $bin_dir=(defined($VARS{'BIN'}))?$VARS{'BIN'}:'';
 
 # $bin_dir.=$o causes a core dump on my sparc :-(
@@ -172,7 +180,8 @@ $NT=0;
 
 push(@INC,"util/pl","pl");
 
-if ($platform eq "auto") {
+if ($platform eq "auto" || $platform eq 'copy') {
+       $orig_platform = $platform;
        $platform = $mf_platform;
        print STDERR "Imported platform $mf_platform\n";
 }
@@ -300,6 +309,11 @@ else
 ##else
        { $cflags="$c_flags$cflags" if ($c_flags ne ""); }
 
+if ($orig_platform eq 'copy') {
+    $cflags = $mf_cflag;
+    $cc = $mf_cc;
+}
+
 $ex_libs="$l_flags$ex_libs" if ($l_flags ne "");
 
 
@@ -391,6 +405,14 @@ for (;;)
        }
 close(IN);
 
+if ($orig_platform eq 'copy')
+       {
+       # Remove opensslconf.h so it doesn't get updated if we configure a
+       # different branch.
+       $exheader =~ s/[^ ]+\/opensslconf.h//;
+       $header =~ s/[^ ]+\/opensslconf.h//;
+       }
+
 if ($shlib)
        {
        $extra_install= <<"EOF";
@@ -422,6 +444,7 @@ EOF
        }
 
 $defs= <<"EOF";
+# N.B. You MUST use -j on FreeBSD.
 # This makefile has been automatically generated from the OpenSSL distribution.
 # This single makefile will build the complete OpenSSL distribution and
 # by default leave the 'intertesting' output files in .${o}out and the stuff
@@ -463,7 +486,7 @@ LINK=$link
 LFLAGS=$lflags
 RSC=$rsc
 
-# The output directory for everything intersting
+# The output directory for everything interesting
 OUT_D=$out_dir
 # The output directory for all the temporary muck
 TMP_D=$tmp_dir
@@ -482,13 +505,14 @@ ASM=$bin_dir$asm
 
 # FIPS validated module and support file locations
 
+E_PREMAIN_DSO=fips_premain_dso
+
 FIPSDIR=$fipsdir
 BASEADDR=$baseaddr
 FIPSLIB_D=\$(FIPSDIR)${o}lib
 FIPS_PREMAIN_SRC=\$(FIPSLIB_D)${o}fips_premain.c
 O_FIPSCANISTER=\$(FIPSLIB_D)${o}fipscanister.lib
 FIPS_SHA1_EXE=\$(FIPSDIR)${o}bin${o}fips_standalone_sha1${exep}
-E_PREMAIN_DSO=fips_premain_dso
 PREMAIN_DSO_EXE=\$(BIN_D)${o}fips_premain_dso$exep
 FIPSLINK=\$(PERL) \$(FIPSDIR)${o}bin${o}fipslink.pl
 
@@ -563,8 +587,12 @@ $banner
 \$(INC_D):
        \$(MKDIR) \"\$(INC_D)\"
 
+# This needs to be invoked once, when the makefile is first constructed, or
+# after cleaning.
+init: \$(TMP_D) \$(LIB_D) \$(INC_D) \$(INCO_D) \$(BIN_D) \$(TEST_D) headers
+       \$(PERL) \$(SRC_D)/util/copy-if-different.pl "\$(SRC_D)/crypto/opensslconf.h" "\$(INCO_D)/opensslconf.h"
+
 headers: \$(HEADER) \$(EXHEADER)
-       @
 
 lib: \$(LIBS_DEP) \$(E_SHLIB)
 
@@ -582,11 +610,6 @@ install: all
        \$(CP) apps${o}openssl.cnf \"\$(OPENSSLDIR)\"
 $extra_install
 
-
-test: \$(T_EXE)
-       cd \$(BIN_D)
-       ..${o}ms${o}test
-
 clean:
        \$(RM) \$(TMP_D)$o*.*
 
@@ -594,8 +617,25 @@ vclean:
        \$(RM) \$(TMP_D)$o*.*
        \$(RM) \$(OUT_D)$o*.*
 
+reallyclean:
+       \$(RM) -rf \$(TMP_D)
+       \$(RM) -rf \$(BIN_D)
+       \$(RM) -rf \$(TEST_D)
+       \$(RM) -rf \$(LIB_D)
+       \$(RM) -rf \$(INC_D)
+
+EOF
+
+if ($orig_platform ne 'copy')
+       {
+        $rules .= <<"EOF";
+test: \$(T_EXE)
+       cd \$(BIN_D)
+       ..${o}ms${o}test
+
 EOF
-    
+       }
+
 my $platform_cpp_symbol = "MK1MF_PLATFORM_$platform";
 $platform_cpp_symbol =~ s/-/_/g;
 if (open(IN,"crypto/buildinf.h"))
@@ -632,7 +672,7 @@ printf OUT "  #define DATE \"%s\"\n", scalar gmtime();
 printf OUT "#endif\n";
 close(OUT);
 
-# Strip of trailing ' '
+# Strip off trailing ' '
 foreach (keys %lib_obj) { $lib_obj{$_}=&clean_up_ws($lib_obj{$_}); }
 $test=&clean_up_ws($test);
 $e_exe=&clean_up_ws($e_exe);
@@ -662,10 +702,38 @@ if ($fips)
        {
        $rules.=&cc_compile_target("\$(OBJ_D)${o}\$(E_PREMAIN_DSO)$obj",
                "\$(FIPS_PREMAIN_SRC)",
-               "-DFINGERPRINT_PREMAIN_DSO_LOAD \$(SHLIB_CFLAGS)", "");
+               "-DFINGERPRINT_PREMAIN_DSO_LOAD \$(APP_CFLAGS)", "");
        $rules.=&do_link_rule("\$(PREMAIN_DSO_EXE)","\$(OBJ_D)${o}\$(E_PREMAIN_DSO)$obj \$(CRYPTOOBJ) \$(O_FIPSCANISTER)","","\$(EX_LIBS)", 1);
        }
 
+sub fix_asm
+       {
+       my($asm, $dir) = @_;
+
+       return '' if $asm eq '';
+
+       $asm = " $asm";
+       $asm =~ s/\s+/ $dir\//g;
+       $asm =~ s/\.o//g;
+       $asm =~ s/^ //;
+
+       return $asm . ' ';
+       }
+
+if ($orig_platform eq 'copy') {
+       $lib_obj{CRYPTO} .= fix_asm($mf_md5_asm, 'crypto/md5');
+       $lib_obj{CRYPTO} .= fix_asm($mf_bn_asm, 'crypto/bn');
+       # cpuid is included by the crypto dir
+       #$lib_obj{CRYPTO} .= fix_asm($mf_cpuid_asm, 'crypto');
+       # AES asm files DON'T end up included by the aes dir itself
+       $lib_obj{CRYPTO} .= fix_asm($mf_aes_asm, 'crypto/aes');
+       $lib_obj{CRYPTO} .= fix_asm($mf_sha_asm, 'crypto/sha');
+       $lib_obj{CRYPTO} .= fix_asm($mf_engines_asm, 'engines');
+       $lib_obj{CRYPTO} .= fix_asm($mf_rc4_asm, 'crypto/rc4');
+       $lib_obj{CRYPTO} .= fix_asm($mf_modes_asm, 'crypto/modes');
+       $lib_obj{CRYPTO} .= fix_asm($mf_ec_asm, 'crypto/ec');
+}
+
 foreach (values %lib_nam)
        {
        $lib_obj=$lib_obj{$_};
@@ -741,6 +809,8 @@ foreach (split(" ",$otherlibs))
 
 $rules.=&do_link_rule("\$(BIN_D)$o\$(E_EXE)$exep","\$(E_OBJ)","\$(LIBS_DEP)","\$(L_LIBS) \$(EX_LIBS)", ($fips && !$shlib) ? 2 : 0);
 
+$rules .= get_tests('test/Makefile') if $orig_platform eq 'copy';
+
 print $defs;
 
 if ($platform eq "linux-elf") {
@@ -958,6 +1028,11 @@ sub do_compile_rule
                        {
                        $ret.=&Sasm_compile_target("$to${o}$n$obj",$s,$n);
                        }
+               elsif (defined &special_compile_target and
+                      ($s=special_compile_target($_)))
+                       {
+                       $ret.=$s;
+                       }
                else    { die "no rule for $_"; }
                }
        return($ret);
@@ -968,6 +1043,10 @@ sub do_compile_rule
 sub perlasm_compile_target
        {
        my($target,$source,$bname)=@_;
+
+       return platform_perlasm_compile_target($target, $source, $bname)
+           if defined &platform_perlasm_compile_target;
+
        my($ret);
 
        $bname =~ s/(.*)\.[^\.]$/$1/;
@@ -999,9 +1078,13 @@ sub cc_compile_target
        $ex_flags.=" -DMK1MF_BUILD -D$platform_cpp_symbol" if ($source =~ /cversion/);
        $target =~ s/\//$o/g if $o ne "/";
        $source =~ s/\//$o/g if $o ne "/";
-       $srcd = "\$(SRC_D)$o" unless defined $srcd;
+       $srcd = "\$(SRC_D)$o" unless defined $srcd && $platform ne 'copy';
        $ret ="$target: $srcd$source\n\t";
-       $ret.="\$(CC) ${ofile}$target $ex_flags -c $srcd$source\n\n";
+       $ret.="\$(CC)";
+       $ret.= " -MMD" if $orig_platform eq "copy";
+       $ret.= " ${ofile}$target $ex_flags -c $srcd$source\n\n";
+       $target =~ s/\.o$/.d/;
+       $ret.=".sinclude \"$target\"\n\n" if $orig_platform eq "copy";
        return($ret);
        }
 
@@ -1066,7 +1149,7 @@ sub do_copy_rule
                if ($n =~ /bss_file/)
                        { $pp=".c"; }
                else    { $pp=$p; }
-               $ret.="$to${o}$n$pp: \$(SRC_D)$o$_$pp\n\t\$(CP) \"\$(SRC_D)$o$_$pp\" \"$to${o}$n$pp\"\n\n";
+               $ret.="$to${o}$n$pp: \$(SRC_D)$o$_$pp\n\t\$(PERL) \$(SRC_D)${o}util${o}copy-if-different.pl \"\$(SRC_D)$o$_$pp\" \"$to${o}$n$pp\"\n\n";
                }
        return($ret);
        }
@@ -1119,8 +1202,8 @@ sub read_options
                "no-tlsext" => \$no_tlsext,
                "no-srp" => \$no_srp,
                "no-cms" => \$no_cms,
-               "no-ec2m" => \$no_ec2m,
                "no-jpake" => \$no_jpake,
+               "no-ec2m" => \$no_ec2m,
                "no-ec_nistp_64_gcc_128" => 0,
                "no-err" => \$no_err,
                "no-sock" => \$no_sock,
@@ -1151,9 +1234,12 @@ sub read_options
                "no-montasm" => 0,
                "no-shared" => 0,
                "no-store" => 0,
-               "no-unit-test" => 0,
                "no-zlib" => 0,
                "no-zlib-dynamic" => 0,
+               "no-ssl-trace" => 0,
+               "no-unit-test" => 0,
+               "no-libunbound" => 0,
+               "no-multiblock" => 0,
                "fips" => \$fips
                );
 
index 894f052..c57c7f7 100755 (executable)
@@ -121,8 +121,10 @@ my @known_algorithms = ( "RC2", "RC4", "RC5", "IDEA", "DES", "BF",
                         "SCTP",
                         # SRTP
                         "SRTP",
+                        # SSL TRACE
+                        "SSL_TRACE",
                         # Unit testing
-                        "UNIT_TEST");
+                        "UNIT_TEST");
 
 my $options="";
 open(IN,"<Makefile") || die "unable to open Makefile!\n";
@@ -142,7 +144,7 @@ my $no_ec; my $no_ecdsa; my $no_ecdh; my $no_engine; my $no_hw;
 my $no_fp_api; my $no_static_engine=1; my $no_gmp; my $no_deprecated;
 my $no_rfc3779; my $no_psk; my $no_tlsext; my $no_cms; my $no_capieng;
 my $no_jpake; my $no_srp; my $no_ssl2; my $no_ec2m; my $no_nistp_gcc; 
-my $no_nextprotoneg; my $no_sctp; my $no_srtp;
+my $no_nextprotoneg; my $no_sctp; my $no_srtp; my $no_ssl_trace;
 my $no_unit_test; my $no_ssl3_method;
 
 my $fips;
@@ -239,6 +241,7 @@ foreach (@ARGV, split(/ /, $options))
        elsif (/^no-nextprotoneg$/)     { $no_nextprotoneg=1; }
        elsif (/^no-ssl2$/)     { $no_ssl2=1; }
        elsif (/^no-ssl3-method$/) { $no_ssl3_method=1; }
+       elsif (/^no-ssl-trace$/) { $no_ssl_trace=1; }
        elsif (/^no-capieng$/)  { $no_capieng=1; }
        elsif (/^no-jpake$/)    { $no_jpake=1; }
        elsif (/^no-srp$/)      { $no_srp=1; }
@@ -842,6 +845,7 @@ sub do_defs
                                        $def .= "int PEM_write_bio_$1(void);";
                                        next;
                                } elsif (/^DECLARE_PEM_write\s*\(\s*(\w*)\s*,/ ||
+                                       /^DECLARE_PEM_write_const\s*\(\s*(\w*)\s*,/ ||
                                         /^DECLARE_PEM_write_cb\s*\(\s*(\w*)\s*,/ ) {
                                        # Things not in Win16
                                        $def .=
@@ -1212,6 +1216,7 @@ sub is_valid
                                        { return 0; }
                        if ($keyword eq "SSL2" && $no_ssl2) { return 0; }
                        if ($keyword eq "SSL3_METHOD" && $no_ssl3_method) { return 0; }
+                       if ($keyword eq "SSL_TRACE" && $no_ssl_trace) { return 0; }
                        if ($keyword eq "CAPIENG" && $no_capieng) { return 0; }
                        if ($keyword eq "JPAKE" && $no_jpake) { return 0; }
                        if ($keyword eq "SRP" && $no_srp) { return 0; }
index d87c4fd..09ebebe 100644 (file)
@@ -14,6 +14,7 @@ my $pack_errcode;
 my $load_errcode;
 
 my $errcount;
+my $year = (localtime)[5] + 1900;
 
 while (@ARGV) {
        my $arg = $ARGV[0];
@@ -391,7 +392,7 @@ foreach $lib (keys %csrc)
        } else {
            push @out,
 "/* ====================================================================\n",
-" * Copyright (c) 2001-2011 The OpenSSL Project.  All rights reserved.\n",
+" * Copyright (c) 2001-$year The OpenSSL Project.  All rights reserved.\n",
 " *\n",
 " * Redistribution and use in source and binary forms, with or without\n",
 " * modification, are permitted provided that the following conditions\n",
@@ -584,7 +585,7 @@ EOF
        print OUT <<"EOF";
 /* $cfile */
 /* ====================================================================
- * Copyright (c) 1999-2011 The OpenSSL Project.  All rights reserved.
+ * Copyright (c) 1999-$year The OpenSSL Project.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
index f708610..2bd96cd 100755 (executable)
@@ -90,6 +90,7 @@ while(<IN>) {
 #define sk_${type_thing}_set_cmp_func(st, cmp) SKM_sk_set_cmp_func($type_thing, (st), (cmp))
 #define sk_${type_thing}_dup(st) SKM_sk_dup($type_thing, st)
 #define sk_${type_thing}_pop_free(st, free_func) SKM_sk_pop_free($type_thing, (st), (free_func))
+#define sk_${type_thing}_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy($type_thing, (st), (copy_func), (free_func))
 #define sk_${type_thing}_shift(st) SKM_sk_shift($type_thing, (st))
 #define sk_${type_thing}_pop(st) SKM_sk_pop($type_thing, (st))
 #define sk_${type_thing}_sort(st) SKM_sk_sort($type_thing, (st))
@@ -108,7 +109,8 @@ EOF
 #define sk_${t1}_find(st, val) sk_find(CHECKED_STACK_OF($t1, st), CHECKED_PTR_OF($t2, val))
 #define sk_${t1}_value(st, i) (($t1)sk_value(CHECKED_STACK_OF($t1, st), i))
 #define sk_${t1}_num(st) SKM_sk_num($t1, st)
-#define sk_${t1}_pop_free(st, free_func) sk_pop_free(CHECKED_STACK_OF($t1, st), CHECKED_SK_FREE_FUNC2($t1, free_func))
+#define sk_${t1}_pop_free(st, free_func) sk_pop_free(CHECKED_STACK_OF($t1, st), CHECKED_SK_FREE_FUNC($t2, free_func))
+#define sk_${t1}_deep_copy(st, copy_func, free_func) ((STACK_OF($t1) *)sk_deep_copy(CHECKED_STACK_OF($t1, st), CHECKED_SK_COPY_FUNC($t2, copy_func), CHECKED_SK_FREE_FUNC($t2, free_func)))
 #define sk_${t1}_insert(st, val, i) sk_insert(CHECKED_STACK_OF($t1, st), CHECKED_PTR_OF($t2, val), i)
 #define sk_${t1}_free(st) SKM_sk_free(${t1}, st)
 #define sk_${t1}_set(st, i, val) sk_set(CHECKED_STACK_OF($t1, st), i, CHECKED_PTR_OF($t2, val))
index 6d03664..f7161d7 100644 (file)
@@ -18,7 +18,7 @@ $out_def="out32";
 $tmp_def="tmp32";
 $inc_def="inc32";
 #enable max error messages, disable most common warnings
-$cflags="-DWIN32_LEAN_AND_MEAN -q -w-ccc -w-rch -w-pia -w-aus -w-par -w-inl  -c -tWC -tWM -DOPENSSL_SYSNAME_WIN32 -DL_ENDIAN -DDSO_WIN32 -D_stricmp=stricmp -D_strnicmp=strnicmp -D_timeb=timeb -D_ftime=ftime ";
+$cflags="-DWIN32_LEAN_AND_MEAN -q -w-ccc -w-rch -w-pia -w-aus -w-par -w-inl  -c -tWC -tWM -DOPENSSL_SYSNAME_WIN32 -DL_ENDIAN -DDSO_WIN32 -D_stricmp=stricmp -D_strnicmp=strnicmp ";
 if ($debug)
 {
     $cflags.="-Od -y -v -vi- -D_DEBUG";
index b597998..da05e9d 100644 (file)
@@ -46,7 +46,6 @@ if ($FLAVOR =~ /WIN64/)
     # 
     $base_cflags= " $mf_cflag";
     my $f = $shlib || $fips ?' /MD':' /MT';
-    $lib_cflag='/Zl' if (!$shlib);     # remove /DEFAULTLIBs from static lib
     $opt_cflags=$f.' /Ox';
     $dbg_cflags=$f.'d /Od -DDEBUG -D_DEBUG';
     $lflags="/nologo /subsystem:console /opt:ref";
@@ -89,7 +88,7 @@ elsif ($FLAVOR =~ /CE/)
     $wcetgt = $ENV{'TARGETCPU'};       # just shorter name...
     SWITCH: for($wcetgt) {
        /^X86/          && do { $wcecdefs.=" -Dx86 -D_X86_ -D_i386_ -Di_386_";
-                               $wcelflag.=" /machine:IX86";    last; };
+                               $wcelflag.=" /machine:X86";     last; };
        /^ARMV4[IT]/    && do { $wcecdefs.=" -DARM -D_ARM_ -D$wcetgt";
                                $wcecdefs.=" -DTHUMB -D_THUMB_" if($wcetgt=~/T$/);
                                $wcecdefs.=" -QRarch4T -QRinterwork-return";
@@ -114,25 +113,30 @@ elsif ($FLAVOR =~ /CE/)
          $wcelflag.=" /machine:$wcetgt";                       last; };
     }
 
-    $cc='$(CC)';
+    $cc=($ENV{CC} or "cl");
     $base_cflags=' /W3 /WX /GF /Gy /nologo -DUNICODE -D_UNICODE -DOPENSSL_SYSNAME_WINCE -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DDSO_WIN32 -DNO_CHMOD -DOPENSSL_SMALL_FOOTPRINT';
     $base_cflags.=" $wcecdefs";
     $base_cflags.=' -I$(WCECOMPAT)/include'            if (defined($ENV{'WCECOMPAT'}));
     $base_cflags.=' -I$(PORTSDK_LIBPATH)/../../include'        if (defined($ENV{'PORTSDK_LIBPATH'}));
-    $opt_cflags=' /MC /O1i';   # optimize for space, but with intrinsics...
-    $dbg_cflags=' /MC /Od -DDEBUG -D_DEBUG';
+    if (`$cc 2>&1` =~ /Version ([0-9]+)\./ && $1>=14) {
+       $base_cflags.=$shlib?' /MD':' /MT';
+    } else {
+       $base_cflags.=' /MC';
+    }
+    $opt_cflags=' /O1i';       # optimize for space, but with intrinsics...
+    $dbg_cflags=' /Od -DDEBUG -D_DEBUG';
     $lflags="/nologo /opt:ref $wcelflag";
     }
 else   # Win32
     {
     $base_cflags= " $mf_cflag";
     my $f = $shlib || $fips ?' /MD':' /MT';
-    $lib_cflag='/Zl' if (!$shlib);     # remove /DEFAULTLIBs from static lib
     $ff = "/fixed";
     $opt_cflags=$f.' /Ox /O2 /Ob2';
     $dbg_cflags=$f.'d /Od -DDEBUG -D_DEBUG';
     $lflags="/nologo /subsystem:console /opt:ref";
     }
+$lib_cflag='/Zl' if (!$shlib); # remove /DEFAULTLIBs from static lib
 $mlflags='';
 
 $out_def ="out32";     $out_def.="dll"                 if ($shlib);
@@ -165,14 +169,26 @@ $rsc="rc";
 $efile="/out:";
 $exep='.exe';
 if ($no_sock)          { $ex_libs=''; }
-elsif ($FLAVOR =~ /CE/)        { $ex_libs='winsock.lib'; }
+elsif ($FLAVOR =~ /CE/)        { $ex_libs='ws2.lib'; }
 else                   { $ex_libs='ws2_32.lib'; }
 
 if ($FLAVOR =~ /CE/)
        {
-       $ex_libs.=' $(WCECOMPAT)/lib/wcecompatex.lib'   if (defined($ENV{'WCECOMPAT'}));
+       $ex_libs.=' crypt32.lib';       # for e_capi.c
+       if (defined($ENV{WCECOMPAT}))
+               {
+               $ex_libs.= ' $(WCECOMPAT)/lib';
+               if (-f "$ENV{WCECOMPAT}/lib/$ENV{TARGETCPU}/wcecompatex.lib")
+                       {
+                       $ex_libs.='/$(TARGETCPU)/wcecompatex.lib';
+                       }
+               else
+                       {
+                       $ex_libs.='/wcecompatex.lib';
+                       }
+               }
        $ex_libs.=' $(PORTSDK_LIBPATH)/portlib.lib'     if (defined($ENV{'PORTSDK_LIBPATH'}));
-       $ex_libs.=' /nodefaultlib:oldnames.lib coredll.lib corelibc.lib' if ($ENV{'TARGETCPU'} eq "X86");
+       $ex_libs.=' /nodefaultlib coredll.lib corelibc.lib' if ($ENV{'TARGETCPU'} eq "X86");
        }
 else
        {
index 146611a..1d4e9dc 100644 (file)
@@ -26,11 +26,12 @@ else
                { $cflags="-O"; }
        }
 $obj='.o';
+$asm_suffix='.s';
 $ofile='-o ';
 
 # EXE linking stuff
 $link='${CC}';
-$lflags='${CFLAGS}';
+$lflags='${CFLAG}';
 $efile='-o ';
 $exep='';
 $ex_libs="";
@@ -53,6 +54,93 @@ $des_enc_src="";
 $bf_enc_obj="";
 $bf_enc_src="";
 
+%perl1 = (
+         'md5-x86_64' => 'crypto/md5',
+         'x86_64-mont' => 'crypto/bn',
+         'x86_64-mont5' => 'crypto/bn',
+         'x86_64-gf2m' => 'crypto/bn',
+         'aes-x86_64' => 'crypto/aes',
+         'vpaes-x86_64' => 'crypto/aes',
+         'bsaes-x86_64' => 'crypto/aes',
+         'aesni-x86_64' => 'crypto/aes',
+         'aesni-sha1-x86_64' => 'crypto/aes',
+         'sha1-x86_64' => 'crypto/sha',
+         'e_padlock-x86_64' => 'engines',
+         'rc4-x86_64' => 'crypto/rc4',
+         'rc4-md5-x86_64' => 'crypto/rc4',
+         'ghash-x86_64' => 'crypto/modes',
+         'aesni-gcm-x86_64' => 'crypto/modes',
+         'aesni-sha256-x86_64' => 'crypto/aes',
+          'rsaz-x86_64' => 'crypto/bn',
+          'rsaz-avx2' => 'crypto/bn',
+         'aesni-mb-x86_64' => 'crypto/aes',
+         'sha1-mb-x86_64' => 'crypto/sha',
+         'sha256-mb-x86_64' => 'crypto/sha',
+         'ecp_nistz256-x86_64' => 'crypto/ec',
+         );
+
+# If I were feeling more clever, these could probably be extracted
+# from makefiles.
+sub platform_perlasm_compile_target
+       {
+       local($target, $source, $bname) = @_;
+
+       for $p (keys %perl1)
+               {
+               if ($target eq "\$(OBJ_D)/$p.o")
+                       {
+                       return << "EOF";
+\$(TMP_D)/$p.s: $perl1{$p}/asm/$p.pl
+       \$(PERL) $perl1{$p}/asm/$p.pl \$(PERLASM_SCHEME) > \$@
+EOF
+                       }
+               }
+       if ($target eq '$(OBJ_D)/x86_64cpuid.o')
+               {
+               return << 'EOF';
+$(TMP_D)/x86_64cpuid.s: crypto/x86_64cpuid.pl
+       $(PERL) crypto/x86_64cpuid.pl $(PERLASM_SCHEME) > $@
+EOF
+               }
+       elsif ($target eq '$(OBJ_D)/sha256-x86_64.o')
+               {
+               return << 'EOF';
+$(TMP_D)/sha256-x86_64.s: crypto/sha/asm/sha512-x86_64.pl
+       $(PERL) crypto/sha/asm/sha512-x86_64.pl $(PERLASM_SCHEME) $@
+EOF
+               }
+       elsif ($target eq '$(OBJ_D)/sha512-x86_64.o')
+               {
+               return << 'EOF';
+$(TMP_D)/sha512-x86_64.s: crypto/sha/asm/sha512-x86_64.pl
+       $(PERL) crypto/sha/asm/sha512-x86_64.pl $(PERLASM_SCHEME) $@
+EOF
+               }
+       elsif ($target eq '$(OBJ_D)/sha512-x86_64.o')
+               {
+               return << 'EOF';
+$(TMP_D)/sha512-x86_64.s: crypto/sha/asm/sha512-x86_64.pl
+       $(PERL) crypto/sha/asm/sha512-x86_64.pl $(PERLASM_SCHEME) $@
+EOF
+               }
+
+       die $target;
+       }
+
+sub special_compile_target
+       {
+       local($target) = @_;
+
+       if ($target eq 'crypto/bn/x86_64-gcc')
+               {
+               return << "EOF";
+\$(TMP_D)/x86_64-gcc.o:        crypto/bn/asm/x86_64-gcc.c
+       \$(CC) \$(CFLAGS) -c -o \$@ crypto/bn/asm/x86_64-gcc.c
+EOF
+               }
+       return undef;
+       }
+
 sub do_lib_rule
        {
        local($obj,$target,$name,$shlib)=@_;
@@ -72,7 +160,7 @@ sub do_link_rule
        {
        local($target,$files,$dep_libs,$libs)=@_;
        local($ret,$_);
-       
+
        $file =~ s/\//$o/g if $o ne '/';
        $n=&bname($target);
        $ret.="$target: $files $dep_libs\n";
@@ -93,4 +181,262 @@ sub which
                }
        }
 
+sub fixtests
+  {
+  my ($str, $tests) = @_;
+
+  foreach my $t (keys %$tests)
+    {
+    $str =~ s/(\.\/)?\$\($t\)/\$(TEST_D)\/$tests->{$t}/g;
+    }
+
+  return $str;
+  }
+
+sub fixdeps
+ {
+  my ($str, $fakes) = @_;
+
+  my @t = split(/\s+/, $str);
+  $str = '';
+  foreach my $t (@t)
+    {
+    $str .= ' ' if $str ne '';
+    if (exists($fakes->{$t}))
+      {
+      $str .= $fakes->{$t};
+      next;
+      }
+    if ($t =~ /^[^\/]+$/)
+      {
+      $str .= '$(TEST_D)/' . $t;
+      }
+    else
+      {
+      $str .= $t;
+      }
+    }
+
+  return $str;
+  }
+
+sub fixrules
+  {
+  my ($str) = @_;
+
+  # Compatible with -j...
+  $str =~ s/^(\s+@?)/$1cd \$(TEST_D) && /;
+  return $str;
+
+  # Compatible with not -j.
+  my @t = split("\n", $str);
+  $str = '';
+  my $prev;
+  foreach my $t (@t)
+    {
+    $t =~ s/^\s+//;
+    if (!$prev)
+      {
+      if ($t =~ /^@/)
+       {
+        $t =~ s/^@/\@cd \$(TEST_D) && /;
+        }
+      elsif ($t !~ /^\s*#/)
+       {
+        $t = 'cd $(TEST_D) && ' . $t;
+        }
+      }
+    $str .= "\t$t\n";
+    $prev = $t =~/\\$/;
+    }
+  return $str;
+}
+
+sub copy_scripts
+  {
+  my ($sed, $src, @targets) = @_;
+
+  my $s = '';
+  foreach my $t (@targets)
+    {
+    # Copy first so we get file modes...
+    $s .= "\$(TEST_D)/$t: \$(SRC_D)/$src/$t\n\tcp \$(SRC_D)/$src/$t \$(TEST_D)/$t\n";
+    $s .= "\tsed -e 's/\\.\\.\\/apps/..\\/\$(OUT_D)/' -e 's/\\.\\.\\/util/..\\/\$(TEST_D)/' < \$(SRC_D)/$src/$t > \$(TEST_D)/$t\n" if $sed;
+    $s .= "\n";
+    }
+  return $s;
+  }
+
+sub get_tests
+  {
+  my ($makefile) = @_;
+
+  open(M, $makefile) || die "Can't open $makefile: $!";
+  my %targets;
+  my %deps;
+  my %tests;
+  my %alltests;
+  my %fakes;
+  while (my $line = <M>)
+    {
+    chomp $line;
+    while ($line =~ /^(.*)\\$/)
+      {
+      $line = $1 . <M>;
+      }
+
+    if ($line =~ /^alltests:(.*)$/)
+      {
+      my @t = split(/\s+/, $1);
+      foreach my $t (@t)
+       {
+       $targets{$t} = '';
+       $alltests{$t} = undef;
+        }
+      }
+
+    if (($line =~ /^(?<t>\S+):(?<d>.*)$/ && exists $targets{$1})
+       || $line =~ /^(?<t>test_(ss|gen) .*):(?<d>.*)/)
+      {
+      my $t = $+{t};
+      my $d = $+{d};
+      # If there are multiple targets stupid FreeBSD make runs the
+      # rules once for each dependency that matches one of the
+      # targets. Running the same rule twice concurrently causes
+      # breakage, so replace with a fake target.
+      if ($t =~ /\s/)
+        {
+       ++$fake;
+       my @targets = split /\s+/, $t;
+       $t = "_fake$fake";
+       foreach my $f (@targets)
+         {
+         $fakes{$f} = $t;
+         }
+       }
+      $deps{$t} = $d;
+      $deps{$t} =~ s/#.*$//;
+      for (;;)
+       {
+       $line = <M>;
+       chomp $line;
+       last if $line eq '';
+       $targets{$t} .= "$line\n";
+        }
+      next;
+      }
+
+    if ($line =~ /^(\S+TEST)=\s*(\S+)$/)
+      {
+      $tests{$1} = $2;
+      next;
+      }
+    }
+
+  delete $alltests{test_jpake} if $no_jpake;
+  delete $targets{test_ige} if $no_ige;
+  delete $alltests{test_md2} if $no_md2;
+  delete $alltests{test_rc5} if $no_rc5;
+
+  my $tests;
+  foreach my $t (keys %tests)
+    {
+    $tests .= "$t = $tests{$t}\n";
+    }
+
+  my $each;
+  foreach my $t (keys %targets)
+    {
+    next if $t eq '';
+
+    my $d = $deps{$t};
+    $d =~ s/\.\.\/apps/\$(BIN_D)/g;
+    $d =~ s/\.\.\/util/\$(TEST_D)/g;
+    $d = fixtests($d, \%tests);
+    $d = fixdeps($d, \%fakes);
+
+    my $r = $targets{$t};
+    $r =~ s/\.\.\/apps/..\/\$(BIN_D)/g;
+    $r =~ s/\.\.\/util/..\/\$(TEST_D)/g;
+    $r =~ s/\.\.\/(\S+)/\$(SRC_D)\/$1/g;
+    $r = fixrules($r);
+
+    next if $r eq '';
+
+    $t =~ s/\s+/ \$(TEST_D)\//g;
+
+    $each .= "$t: test_scripts $d\n\t\@echo '$t test started'\n$r\t\@echo '$t test done'\n\n";
+    }
+
+  # FIXME: Might be a clever way to figure out what needs copying
+  my @copies = ( 'bctest',
+                'testgen',
+                'cms-test.pl',
+                'tx509',
+                'test.cnf',
+                'testenc',
+                'tocsp',
+                'testca',
+                'CAss.cnf',
+                'testtsa',
+                'CAtsa.cnf',
+                'Uss.cnf',
+                'P1ss.cnf',
+                'P2ss.cnf',
+                'tcrl',
+                'tsid',
+                'treq',
+                'tpkcs7',
+                'tpkcs7d',
+                'testcrl.pem',
+                'testx509.pem',
+                'v3-cert1.pem',
+                'v3-cert2.pem',
+                'testreq2.pem',
+                'testp7.pem',
+                'pkcs7-1.pem',
+                'trsa',
+                'testrsa.pem',
+                'testsid.pem',
+                'testss',
+                'testssl',
+                'testsslproxy',
+                'serverinfo.pem',
+              );
+  my $copies = copy_scripts(1, 'test', @copies);
+  $copies .= copy_scripts(0, 'test', ('smcont.txt'));
+
+  my @utils = ( 'shlib_wrap.sh',
+               'opensslwrap.sh',
+             );
+  $copies .= copy_scripts(1, 'util', @utils);
+
+  my @apps = ( 'CA.sh',
+              'openssl.cnf',
+              'server2.pem',
+            );
+  $copies .= copy_scripts(1, 'apps', @apps);
+
+  $copies .= copy_scripts(1, 'crypto/evp', ('evptests.txt'));
+
+  $scripts = "test_scripts: \$(TEST_D)/CA.sh \$(TEST_D)/opensslwrap.sh \$(TEST_D)/openssl.cnf \$(TEST_D)/shlib_wrap.sh ocsp smime\n";
+  $scripts .= "\nocsp:\n\tcp -R test/ocsp-tests \$(TEST_D)\n";
+  $scripts .= "\smime:\n\tcp -R test/smime-certs \$(TEST_D)\n";
+
+  my $all = 'test:';
+  foreach my $t (keys %alltests)
+    {
+    if (exists($fakes{$t}))
+      {
+      $all .= " $fakes{$t}";
+      }
+    else
+      {
+      $all .= " $t";
+      }
+    }
+
+  return "$scripts\n$copies\n$tests\n$all\n\n$each";
+  }
+
 1;
index dd1c5e8..5a89913 100755 (executable)
@@ -316,8 +316,55 @@ SSL_CTX_set_next_protos_adv_cb          355        EXIST:VMS:FUNCTION:NEXTPROTONEG
 SSL_get0_next_proto_negotiated          356    EXIST::FUNCTION:NEXTPROTONEG
 SSL_get_selected_srtp_profile           357    EXIST::FUNCTION:SRTP
 SSL_CTX_set_tlsext_use_srtp             358    EXIST::FUNCTION:SRTP
-SSL_select_next_proto                   359    EXIST::FUNCTION:NEXTPROTONEG
+SSL_select_next_proto                   359    EXIST::FUNCTION:TLSEXT
 SSL_get_srtp_profiles                   360    EXIST::FUNCTION:SRTP
 SSL_CTX_set_next_proto_select_cb        361    EXIST:!VMS:FUNCTION:NEXTPROTONEG
 SSL_CTX_set_next_proto_sel_cb           361    EXIST:VMS:FUNCTION:NEXTPROTONEG
 SSL_SESSION_get_compress_id             362    EXIST::FUNCTION:
+SSL_get0_param                          363    EXIST::FUNCTION:
+SSL_CTX_get0_privatekey                 364    EXIST::FUNCTION:
+SSL_get_shared_sigalgs                  365    EXIST::FUNCTION:TLSEXT
+SSL_CONF_CTX_finish                     366    EXIST::FUNCTION:
+DTLS_method                             367    EXIST::FUNCTION:
+DTLS_client_method                      368    EXIST::FUNCTION:
+SSL_CIPHER_standard_name                369    EXIST::FUNCTION:SSL_TRACE
+SSL_set_alpn_protos                     370    EXIST::FUNCTION:
+SSL_CTX_set_srv_supp_data               371    NOEXIST::FUNCTION:
+SSL_CONF_cmd_argv                       372    EXIST::FUNCTION:
+DTLSv1_2_server_method                  373    EXIST::FUNCTION:
+SSL_COMP_set0_compression_methods       374    EXIST:!VMS:FUNCTION:COMP
+SSL_COMP_set0_compress_methods          374    EXIST:VMS:FUNCTION:COMP
+SSL_CTX_set_cert_cb                     375    EXIST::FUNCTION:
+SSL_CTX_add_client_custom_ext           376    EXIST::FUNCTION:TLSEXT
+SSL_is_server                           377    EXIST::FUNCTION:
+SSL_CTX_get0_param                      378    EXIST::FUNCTION:
+SSL_CONF_cmd                            379    EXIST::FUNCTION:
+SSL_CTX_get_ssl_method                  380    EXIST::FUNCTION:
+SSL_CONF_CTX_set_ssl_ctx                381    EXIST::FUNCTION:
+SSL_CIPHER_find                         382    EXIST::FUNCTION:
+SSL_CTX_use_serverinfo                  383    EXIST::FUNCTION:TLSEXT
+DTLSv1_2_client_method                  384    EXIST::FUNCTION:
+SSL_get0_alpn_selected                  385    EXIST::FUNCTION:
+SSL_CONF_CTX_clear_flags                386    EXIST::FUNCTION:
+SSL_CTX_set_alpn_protos                 387    EXIST::FUNCTION:
+SSL_CTX_add_server_custom_ext           389    EXIST::FUNCTION:TLSEXT
+SSL_CTX_get0_certificate                390    EXIST::FUNCTION:
+SSL_CTX_set_alpn_select_cb              391    EXIST::FUNCTION:
+SSL_CONF_cmd_value_type                 392    EXIST::FUNCTION:
+SSL_set_cert_cb                         393    EXIST::FUNCTION:
+SSL_get_sigalgs                         394    EXIST::FUNCTION:TLSEXT
+SSL_CONF_CTX_set1_prefix                395    EXIST::FUNCTION:
+SSL_CONF_CTX_new                        396    EXIST::FUNCTION:
+SSL_CONF_CTX_set_flags                  397    EXIST::FUNCTION:
+SSL_CONF_CTX_set_ssl                    398    EXIST::FUNCTION:
+SSL_check_chain                         399    EXIST::FUNCTION:TLSEXT
+SSL_certs_clear                         400    EXIST::FUNCTION:
+SSL_CONF_CTX_free                       401    EXIST::FUNCTION:
+SSL_trace                               402    EXIST::FUNCTION:SSL_TRACE
+SSL_CTX_set_cli_supp_data               403    NOEXIST::FUNCTION:
+DTLSv1_2_method                         404    EXIST::FUNCTION:
+DTLS_server_method                      405    EXIST::FUNCTION:
+SSL_CTX_use_serverinfo_file             406    EXIST::FUNCTION:STDIO,TLSEXT
+SSL_COMP_free_compression_methods       407    EXIST:!VMS:FUNCTION:COMP
+SSL_COMP_free_compress_methods          407    EXIST:VMS:FUNCTION:COMP
+SSL_extension_supported                 409    EXIST::FUNCTION:TLSEXT