7048212 T4 engine needed for openssl
authorDan Anderson <dan.anderson@oracle.com>
Wed, 20 Jul 2011 10:20:13 -0700
changeset 426 8c675b553a27
parent 425 ddb1d95b4cfc
child 427 1902114e6f0f
7048212 T4 engine needed for openssl
components/openssl/README
components/openssl/openssl-1.0.0/Makefile
components/openssl/openssl-1.0.0/engines/t4/eng_t4.c
components/openssl/openssl-1.0.0/engines/t4/eng_t4_aes_asm.h
components/openssl/openssl-1.0.0/engines/t4/eng_t4_digest.c
components/openssl/openssl-1.0.0/engines/t4/eng_t4_err.c
components/openssl/openssl-1.0.0/engines/t4/eng_t4_err.h
components/openssl/openssl-1.0.0/engines/t4/t4_aes.S
components/openssl/openssl-1.0.0/engines/t4/t4_md5.S
components/openssl/openssl-1.0.0/engines/t4/t4_sha1.S
components/openssl/openssl-1.0.0/engines/t4/t4_sha2.S
components/openssl/openssl-1.0.0/patches/openssl-1.0.0d-t4-engine.sparc-patch
--- a/components/openssl/README	Tue Jul 19 13:59:56 2011 -0700
+++ b/components/openssl/README	Wed Jul 20 10:20:13 2011 -0700
@@ -85,6 +85,11 @@
 This patch is for OpenSSL 1.0.0d.  For newer OpenSSL versions, a newer patch
 may be needed.
 
+openssl-1.0.0d-t4-engine.sparc-patch
+SPARC-only patch.
+Add a built-in engine, t4, to support SPARC T4 crypto instructions.
+along with files in directory engines/t4.
+
 opensslconf.patch
 Modifies opensslconf.h so that it is suitable for both 32bit and 64bit installs.
 OpenSSL either builds for 32bit or 64bit - it doesn't allow for combined 32bit
--- a/components/openssl/openssl-1.0.0/Makefile	Tue Jul 19 13:59:56 2011 -0700
+++ b/components/openssl/openssl-1.0.0/Makefile	Wed Jul 20 10:20:13 2011 -0700
@@ -36,6 +36,7 @@
 
 # Architecture-specific patches
 EXTRA_PATCHES.i386 = $(PATCH_DIR)/openssl-1.0.0d-aesni-v4.i386-patch
+EXTRA_PATCHES.sparc = $(PATCH_DIR)/openssl-1.0.0d-t4-engine.sparc-patch
 EXTRA_PATCHES = $(EXTRA_PATCHES.$(MACH))
 
 include $(WS_TOP)/make-rules/prep.mk
@@ -126,7 +127,10 @@
       $(CP) -fp engines/aesni/aesni-x86*.pl	$(@D)/crypto/aes/asm; \
       $(CP) -fp engines/devcrypto/*.[ch]	$(@D)/engines; \
       $(CP) -fp engines/pkcs11/*.[ch]		$(@D)/crypto/engine; \
-      )
+      $(CP) -fp engines/t4/eng_t4*.[ch]		$(@D)/crypto/engine; \
+      $(CP) -fp engines/t4/t4_aes.S		$(@D)/crypto/aes/asm; \
+      $(CP) -fp engines/t4/t4_md5.S		$(@D)/crypto/md5/asm; \
+      $(CP) -fp engines/t4/t4_sha?.S		$(@D)/crypto/sha/asm; )
 
 build:		$(BUILD_32_and_64)
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/components/openssl/openssl-1.0.0/engines/t4/eng_t4.c	Wed Jul 20 10:20:13 2011 -0700
@@ -0,0 +1,989 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/*
+ * This engine supports SPARC microprocessors that provide AES and other
+ * cipher and hash instructions, such as the T4 microprocessor.
+ */
+
+#include <openssl/opensslconf.h>
+
+#if !defined(OPENSSL_NO_HW) && !defined(OPENSSL_NO_HW_AES_T4) && \
+	!defined(OPENSSL_NO_AES)
+#include <sys/types.h>
+#include <sys/auxv.h>	/* getisax() */
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include <openssl/bio.h>
+#include <openssl/aes.h>
+#include <openssl/engine.h>
+#include "eng_t4_aes_asm.h"
+
+#define	T4_LIB_NAME "SPARC T4 engine"
+#include "eng_t4_err.c"
+
+/* Copied from Solaris aes_impl.h */
+#ifndef	MAX_AES_NR
+#define	MAX_AES_NR		14 /* Maximum number of rounds */
+#endif
+#ifndef	MAX_AES_NB
+#define	MAX_AES_NB		4  /* Number of columns comprising a state */
+#endif
+
+/* Index for the supported ciphers */
+typedef enum {
+	T4_AES_128_CBC,
+	T4_AES_192_CBC,
+	T4_AES_256_CBC,
+#ifndef	SOLARIS_NO_AES_CFB128
+	T4_AES_128_CFB128,
+	T4_AES_192_CFB128,
+	T4_AES_256_CFB128,
+#endif	/* !SOLARIS_NO_AES_CFB128 */
+#ifndef	SOLARIS_NO_AES_CTR
+	T4_AES_128_CTR,
+	T4_AES_192_CTR,
+	T4_AES_256_CTR,
+#endif
+	T4_AES_128_ECB,
+	T4_AES_192_ECB,
+	T4_AES_256_ECB,
+	T4_CIPHER_MAX
+} t4_cipher_id;
+
+/* T4 cipher context; must be 8-byte aligned (last field must be uint64_t)  */
+typedef struct t4_cipher_ctx {
+	t4_cipher_id	index;
+	uint64_t	*iv;
+	uint64_t	aligned_iv_buffer[2]; /* use if original IV unaligned */
+	/* Encryption and decryption key schedule are the same: */
+	uint64_t	t4_ks[((MAX_AES_NR) + 1) * (MAX_AES_NB)];
+} t4_cipher_ctx_t;
+
+typedef struct t4_cipher {
+	t4_cipher_id	id;
+	int		nid;
+	int		iv_len;
+	int		min_key_len;
+	int		max_key_len;
+	unsigned long	flags;
+} t4_cipher_t;
+
+/* Constants used when creating the ENGINE */
+static const char *ENGINE_T4_ID = "t4";
+static const char *ENGINE_T4_NAME = "SPARC T4 engine support";
+static const char *ENGINE_NO_T4_NAME = "SPARC T4 engine support (no T4)";
+
+
+#if (defined(sun4v) || defined(__sparcv9) || defined(__sparcv8plus) || \
+	defined(__sparcv8)) && !defined(OPENSSL_NO_ASM)
+#define	COMPILE_HW_T4
+static int t4_bind_helper(ENGINE *e, const char *id);
+#pragma inline(t4_bind_helper)
+#endif
+
+/*
+ * This makes the engine "built-in" with OpenSSL.
+ * On non-T4 CPUs this just returns.
+ * Called by ENGINE_load_builtin_engines().
+ */
+void
+ENGINE_load_t4(void)
+{
+#ifdef COMPILE_HW_T4
+	ENGINE *toadd = ENGINE_new();
+	if (toadd != NULL) {
+		if (t4_bind_helper(toadd, ENGINE_T4_ID) != 0) {
+			(void) ENGINE_add(toadd);
+			(void) ENGINE_free(toadd);
+			ERR_clear_error();
+		} else {
+			(void) ENGINE_free(toadd);
+		}
+	}
+#endif
+}
+
+
+#ifdef	COMPILE_HW_T4
+static int t4_bind(ENGINE *e);
+#ifndef DYNAMIC_ENGINE
+#pragma inline(t4_bind)
+#endif
+static t4_cipher_id get_cipher_index_by_nid(int nid);
+#pragma inline(get_cipher_index_by_nid)
+static boolean_t t4_aes_instructions_present(void);
+#pragma inline(t4_aes_instructions_present)
+static boolean_t t4_digest_instructions_present(void);
+#pragma inline(t4_digest_instructions_present)
+
+/* Digest registration function. Called by ENGINE_set_ciphers() */
+int t4_get_all_digests(ENGINE *e, const EVP_MD **digest,
+    const int **nids, int nid);
+
+#ifndef	SOLARIS_NO_AES_CTR
+/*
+ * NIDs for AES counter mode that will be defined during the engine
+ * initialization (because OpenSSL doesn't support CTR mode).
+ */
+static int NID_t4_aes_128_ctr = NID_undef;
+static int NID_t4_aes_192_ctr = NID_undef;
+static int NID_t4_aes_256_ctr = NID_undef;
+
+static int t4_add_NID(char *sn, char *ln);
+static int t4_add_aes_ctr_NIDs(void);
+#pragma inline(t4_add_aes_ctr_NIDs)
+static void t4_free_aes_ctr_NIDs(void);
+#define	T4_FREE_AES_CTR_NIDS	t4_free_aes_ctr_NIDs()
+#else
+#define	T4_FREE_AES_CTR_NIDS
+#endif	/* !SOLARIS_NO_AES_CTR */
+
+/* Static variables */
+/* This can't be const as NID*ctr is inserted when the engine is initialized */
+static int t4_cipher_nids[] = {
+	NID_aes_128_cbc,
+	NID_aes_192_cbc,
+	NID_aes_256_cbc,
+#ifndef	SOLARIS_NO_AES_CFB128
+	NID_aes_128_cfb128,
+	NID_aes_192_cfb128,
+	NID_aes_256_cfb128,
+#endif
+#ifndef	SOLARIS_NO_AES_CTR
+	NID_undef, /* NID_t4_aes_128_ctr */
+	NID_undef, /* NID_t4_aes_192_ctr */
+	NID_undef, /* NID_t4_aes_256_ctr */
+#endif
+	NID_aes_128_ecb,
+	NID_aes_192_ecb,
+	NID_aes_256_ecb,
+};
+static const int t4_cipher_count =
+	(sizeof (t4_cipher_nids) / sizeof (t4_cipher_nids[0]));
+
+
+/*
+ * Cipher Table for all supported symmetric ciphers.
+ * Must be in same order as t4_cipher_id.
+ */
+static t4_cipher_t t4_cipher_table[] = {
+	/* ID			NID			IV min- max-key flags */
+	{T4_AES_128_CBC,	NID_aes_128_cbc,	16, 16, 16, 0},
+	{T4_AES_192_CBC,	NID_aes_192_cbc,	16, 24, 24, 0},
+	{T4_AES_256_CBC,	NID_aes_256_cbc,	16, 32, 32, 0},
+#ifndef	SOLARIS_NO_AES_CFB128
+	{T4_AES_128_CFB128,	NID_aes_128_cfb128,	16, 16, 16,
+							EVP_CIPH_NO_PADDING},
+	{T4_AES_192_CFB128,	NID_aes_192_cfb128,	16, 24, 24,
+							EVP_CIPH_NO_PADDING},
+	{T4_AES_256_CFB128,	NID_aes_256_cfb128,	16, 32, 32,
+							EVP_CIPH_NO_PADDING},
+#endif
+#ifndef	SOLARIS_NO_AES_CTR
+	/* We don't know the correct NIDs until the engine is initialized */
+	{T4_AES_128_CTR,	NID_undef,		16, 16, 16,
+							EVP_CIPH_NO_PADDING},
+	{T4_AES_192_CTR,	NID_undef,		16, 24, 24,
+							EVP_CIPH_NO_PADDING},
+	{T4_AES_256_CTR,	NID_undef,		16, 32, 32,
+							EVP_CIPH_NO_PADDING},
+#endif
+	{T4_AES_128_ECB,	NID_aes_128_ecb,	0, 16, 16, 0},
+	{T4_AES_192_ECB,	NID_aes_192_ecb,	0, 24, 24, 0},
+	{T4_AES_256_ECB,	NID_aes_256_ecb,	0, 32, 32, 0},
+};
+
+
+/* Formal declaration for functions in EVP_CIPHER structure */
+static int t4_cipher_init_aes(EVP_CIPHER_CTX *ctx, const unsigned char *key,
+    const unsigned char *iv, int enc);
+
+static int t4_cipher_do_aes_128_cbc(EVP_CIPHER_CTX *ctx, unsigned char *out,
+    const unsigned char *in, size_t inl);
+static int t4_cipher_do_aes_192_cbc(EVP_CIPHER_CTX *ctx, unsigned char *out,
+    const unsigned char *in, size_t inl);
+static int t4_cipher_do_aes_256_cbc(EVP_CIPHER_CTX *ctx, unsigned char *out,
+    const unsigned char *in, size_t inl);
+#ifndef	SOLARIS_NO_AES_CFB128
+static int t4_cipher_do_aes_128_cfb128(EVP_CIPHER_CTX *ctx, unsigned char *out,
+    const unsigned char *in, size_t inl);
+static int t4_cipher_do_aes_192_cfb128(EVP_CIPHER_CTX *ctx, unsigned char *out,
+    const unsigned char *in, size_t inl);
+static int t4_cipher_do_aes_256_cfb128(EVP_CIPHER_CTX *ctx, unsigned char *out,
+    const unsigned char *in, size_t inl);
+#endif
+#ifndef	SOLARIS_NO_AES_CTR
+static int t4_cipher_do_aes_128_ctr(EVP_CIPHER_CTX *ctx, unsigned char *out,
+    const unsigned char *in, size_t inl);
+static int t4_cipher_do_aes_192_ctr(EVP_CIPHER_CTX *ctx, unsigned char *out,
+    const unsigned char *in, size_t inl);
+static int t4_cipher_do_aes_256_ctr(EVP_CIPHER_CTX *ctx, unsigned char *out,
+    const unsigned char *in, size_t inl);
+#endif	/* !SOLARIS_NO_AES_CTR */
+static int t4_cipher_do_aes_128_ecb(EVP_CIPHER_CTX *ctx, unsigned char *out,
+    const unsigned char *in, size_t inl);
+static int t4_cipher_do_aes_192_ecb(EVP_CIPHER_CTX *ctx, unsigned char *out,
+    const unsigned char *in, size_t inl);
+static int t4_cipher_do_aes_256_ecb(EVP_CIPHER_CTX *ctx, unsigned char *out,
+    const unsigned char *in, size_t inl);
+
+
+/*
+ * Cipher Algorithms
+ *
+ * OpenSSL's libcrypto EVP stuff. This is how this engine gets wired to EVP.
+ * EVP_CIPHER is defined in evp.h.  To maintain binary compatibility the
+ * definition cannot be modified.
+ * Stuff specific to the t4 engine is kept in t4_cipher_ctx_t, which is
+ * pointed to by the last field, app_data.
+ *
+ * Fields: nid, block_size, key_len, iv_len, flags,
+ *	init(), do_cipher(), cleanup(),
+ *	ctx_size,
+ *	set_asn1_parameters(), get_asn1_parameters(), ctrl(), app_data
+ * For the T4 engine, field app_data points to t4_cipher_ctx_t.
+ */
+
+static const EVP_CIPHER t4_aes_128_cbc = {
+	NID_aes_128_cbc,
+	16, 16, 16,
+	EVP_CIPH_CBC_MODE,
+	t4_cipher_init_aes, t4_cipher_do_aes_128_cbc, NULL,
+	sizeof (t4_cipher_ctx_t),
+	EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv,
+	NULL, NULL
+};
+static const EVP_CIPHER t4_aes_192_cbc = {
+	NID_aes_192_cbc,
+	16, 24, 16,
+	EVP_CIPH_CBC_MODE,
+	t4_cipher_init_aes, t4_cipher_do_aes_192_cbc, NULL,
+	sizeof (t4_cipher_ctx_t),
+	EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv,
+	NULL, NULL
+};
+static const EVP_CIPHER t4_aes_256_cbc = {
+	NID_aes_256_cbc,
+	16, 32, 16,
+	EVP_CIPH_CBC_MODE,
+	t4_cipher_init_aes, t4_cipher_do_aes_256_cbc, NULL,
+	sizeof (t4_cipher_ctx_t),
+	EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv,
+	NULL, NULL
+};
+
+#ifndef	SOLARIS_NO_AES_CFB128
+static const EVP_CIPHER t4_aes_128_cfb128 = {
+	NID_aes_128_cfb128,
+	16, 16, 16,
+	EVP_CIPH_CFB_MODE,
+	t4_cipher_init_aes, t4_cipher_do_aes_128_cfb128, NULL,
+	sizeof (t4_cipher_ctx_t),
+	EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv,
+	NULL, NULL
+};
+static const EVP_CIPHER t4_aes_192_cfb128 = {
+	NID_aes_192_cfb128,
+	16, 24, 16,
+	EVP_CIPH_CFB_MODE,
+	t4_cipher_init_aes, t4_cipher_do_aes_192_cfb128, NULL,
+	sizeof (t4_cipher_ctx_t),
+	EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv,
+	NULL, NULL
+};
+static const EVP_CIPHER t4_aes_256_cfb128 = {
+	NID_aes_256_cfb128,
+	16, 32, 16,
+	EVP_CIPH_CFB_MODE,
+	t4_cipher_init_aes, t4_cipher_do_aes_256_cfb128, NULL,
+	sizeof (t4_cipher_ctx_t),
+	EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv,
+	NULL, NULL
+};
+#endif	/* !SOLARIS_NO_AES_CFB128 */
+
+#ifndef	SOLARIS_NO_AES_CTR
+/*
+ * Counter mode is not defined in OpenSSL.
+ * NID_undef's will be changed to AES counter mode NIDs as soon as they are
+ * created in t4_add_aes_ctr_NIDs() when the engine is initialized.
+ * Note that the need to change these structures during initialization is the
+ * reason why we don't define them with the const keyword.
+ */
+static EVP_CIPHER t4_aes_128_ctr = {
+	NID_undef,
+	16, 16, 16,
+	EVP_CIPH_CBC_MODE,
+	t4_cipher_init_aes, t4_cipher_do_aes_128_ctr, NULL,
+	sizeof (t4_cipher_ctx_t),
+	EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv,
+	NULL, NULL
+};
+static EVP_CIPHER t4_aes_192_ctr = {
+	NID_undef,
+	16, 24, 16,
+	EVP_CIPH_CBC_MODE,
+	t4_cipher_init_aes, t4_cipher_do_aes_192_ctr, NULL,
+	sizeof (t4_cipher_ctx_t),
+	EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv,
+	NULL, NULL
+};
+static EVP_CIPHER t4_aes_256_ctr = {
+	NID_undef,
+	16, 32, 16,
+	EVP_CIPH_CBC_MODE,
+	t4_cipher_init_aes, t4_cipher_do_aes_256_ctr, NULL,
+	sizeof (t4_cipher_ctx_t),
+	EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv,
+	NULL, NULL
+};
+#endif	/* !SOLARIS_NO_AES_CTR */
+
+/*
+ * ECB modes don't use an Initial Vector, so that's why set_asn1_parameters,
+ * get_asn1_parameters, and cleanup fields are set to NULL.
+ */
+static const EVP_CIPHER t4_aes_128_ecb = {
+	NID_aes_128_ecb,
+	16, 16, 0,
+	EVP_CIPH_ECB_MODE,
+	t4_cipher_init_aes, t4_cipher_do_aes_128_ecb, NULL,
+	sizeof (t4_cipher_ctx_t),
+	NULL, NULL, NULL, NULL
+};
+static const EVP_CIPHER t4_aes_192_ecb = {
+	NID_aes_192_ecb,
+	16, 24, 0,
+	EVP_CIPH_ECB_MODE,
+	t4_cipher_init_aes, t4_cipher_do_aes_192_ecb, NULL,
+	sizeof (t4_cipher_ctx_t),
+	NULL, NULL, NULL, NULL
+};
+static const EVP_CIPHER t4_aes_256_ecb = {
+	NID_aes_256_ecb,
+	16, 32, 0,
+	EVP_CIPH_ECB_MODE,
+	t4_cipher_init_aes, t4_cipher_do_aes_256_ecb, NULL,
+	sizeof (t4_cipher_ctx_t),
+	NULL, NULL, NULL, NULL
+};
+
+
+/*
+ * Return true if executing on a SPARC processor with AES instruction support,
+ * such as a T4; otherwise false.
+ */
+static boolean_t
+t4_aes_instructions_present(void)
+{
+	uint_t ui;
+
+	(void) getisax(&ui, 1);
+	return ((ui & AV_SPARC_AES) != 0);
+}
+
+
+/*
+ * Return true if executing on a SPARC processor with MD5/SHA1/SHA{1,256,512}
+ * instruction support, such as a T4; otherwise false.
+ */
+static boolean_t
+t4_digest_instructions_present(void)
+{
+#ifndef	OPENSSL_NO_SHA
+#define	UI_MASK	(AV_SPARC_MD5 | AV_SPARC_SHA1 | AV_SPARC_SHA256 | \
+	AV_SPARC_SHA512)
+#else
+#define	UI_MASK	(AV_SPARC_MD5)
+#endif
+	uint_t		ui;
+
+	(void) getisax(&ui, 1);
+	return ((ui & UI_MASK) == UI_MASK);
+}
+
+
+#ifndef	SOLARIS_NO_AES_CTR
+/* Create a new NID when we have no OID for that mechanism */
+static int
+t4_add_NID(char *sn, char *ln)
+{
+	ASN1_OBJECT	*o;
+	int		nid;
+
+	if ((o = ASN1_OBJECT_create(OBJ_new_nid(1), (unsigned char *)"",
+	    1, sn, ln)) == NULL) {
+		T4err(T4_F_ADD_NID, T4_R_ASN1_OBJECT_CREATE);
+		return (0);
+	}
+
+	/* Will return NID_undef on error */
+	nid = OBJ_add_object(o);
+	ASN1_OBJECT_free(o);
+
+	return (nid);
+}
+
+
+/*
+ * Create new NIDs for AES counter mode.
+ * OpenSSL doesn't support them now so we have to help ourselves here.
+ */
+static int
+t4_add_aes_ctr_NIDs(void)
+{
+	/* Are we already set? */
+	if (NID_t4_aes_256_ctr != NID_undef)
+		return (1);
+
+	/*
+	 * There are no official names for AES counter modes yet so we just
+	 * follow the format of those that exist.
+	 */
+
+	/* Initialize NID_t4_aes_*_ctr and t4_cipher_table[] variables */
+	if ((NID_t4_aes_128_ctr = t4_add_NID("AES-128-CTR", "aes-128-ctr")) ==
+	    NID_undef)
+		return (0);
+	t4_cipher_table[T4_AES_128_CTR].nid =
+	    t4_aes_128_ctr.nid = NID_t4_aes_128_ctr;
+
+	if ((NID_t4_aes_192_ctr = t4_add_NID("AES-192-CTR", "aes-192-ctr")) ==
+	    NID_undef)
+		return (0);
+	t4_cipher_table[T4_AES_192_CTR].nid =
+	    t4_aes_192_ctr.nid = NID_t4_aes_192_ctr;
+
+	if ((NID_t4_aes_256_ctr = t4_add_NID("AES-256-CTR", "aes-256-ctr")) ==
+	    NID_undef)
+		return (0);
+	t4_cipher_table[T4_AES_256_CTR].nid =
+	    t4_aes_256_ctr.nid = NID_t4_aes_256_ctr;
+
+	/* Initialize t4_cipher_nids[] */
+	for (int i = 0; i < t4_cipher_count; ++i) {
+		if (t4_cipher_nids[i] == NID_undef) { /* found */
+			t4_cipher_nids[i] = NID_t4_aes_128_ctr;
+			t4_cipher_nids[++i] = NID_t4_aes_192_ctr;
+			t4_cipher_nids[++i] = NID_t4_aes_256_ctr;
+			break;
+		}
+	}
+
+	return (1);
+}
+
+
+static void
+t4_free_aes_ctr_NIDs(void)
+{
+	ASN1_OBJECT *o = NULL;
+
+	/* Clear entries in t4_cipher_nids[] */
+	for (int i = 0; i < t4_cipher_count; ++i) {
+		if (t4_cipher_nids[i] == NID_t4_aes_128_ctr) {
+			t4_cipher_nids[i] = NID_undef;
+		} else if (t4_cipher_nids[i] == NID_t4_aes_192_ctr) {
+			t4_cipher_nids[i] = NID_undef;
+		} else if (t4_cipher_nids[i] == NID_t4_aes_256_ctr) {
+			t4_cipher_nids[i] = NID_undef;
+		}
+	}
+
+	/* Clear NID_t4_aes_*_ctr and t4_cipher_table[] variables */
+	if (NID_t4_aes_128_ctr != NID_undef) {
+		o = OBJ_nid2obj(NID_t4_aes_128_ctr);
+		if (o != NULL)
+			ASN1_OBJECT_free(o);
+		NID_t4_aes_128_ctr = NID_undef;
+		t4_cipher_table[T4_AES_128_CTR].nid =
+		    t4_aes_128_ctr.nid = NID_undef;
+	}
+
+	if (NID_t4_aes_192_ctr != NID_undef) {
+		o = OBJ_nid2obj(NID_t4_aes_192_ctr);
+		if (o != NULL)
+			ASN1_OBJECT_free(o);
+		NID_t4_aes_192_ctr = NID_undef;
+		t4_cipher_table[T4_AES_192_CTR].nid =
+		    t4_aes_192_ctr.nid = NID_undef;
+	}
+
+	if (NID_t4_aes_256_ctr != NID_undef) {
+		o = OBJ_nid2obj(NID_t4_aes_256_ctr);
+		if (o != NULL)
+		ASN1_OBJECT_free(o);
+		NID_t4_aes_256_ctr = NID_undef;
+		t4_cipher_table[T4_AES_256_CTR].nid =
+		    t4_aes_256_ctr.nid = NID_undef;
+	}
+}
+#endif	/* !SOLARIS_NO_AES_CTR */
+
+
+/*
+ * Cipher functions
+ */
+
+
+/*
+ * Registered by the ENGINE with ENGINE_set_ciphers().
+ * Finds out how to deal with a particular cipher NID in the ENGINE.
+ */
+/* ARGSUSED */
+static int
+t4_get_all_ciphers(ENGINE *e, const EVP_CIPHER **cipher,
+    const int **nids, int nid)
+{
+	if (cipher == NULL) { /* return a list of all supported ciphers */
+		*nids = (t4_cipher_count > 0) ? t4_cipher_nids : NULL;
+		return (t4_cipher_count);
+	}
+
+	switch (nid) {
+	case NID_aes_128_cbc:
+		*cipher = &t4_aes_128_cbc;
+		break;
+	case NID_aes_192_cbc:
+		*cipher = &t4_aes_192_cbc;
+		break;
+	case NID_aes_256_cbc:
+		*cipher = &t4_aes_256_cbc;
+		break;
+	case NID_aes_128_ecb:
+		*cipher = &t4_aes_128_ecb;
+		break;
+	case NID_aes_192_ecb:
+		*cipher = &t4_aes_192_ecb;
+		break;
+	case NID_aes_256_ecb:
+		*cipher = &t4_aes_256_ecb;
+		break;
+#ifndef	SOLARIS_NO_AES_CFB128
+	case NID_aes_128_cfb128:
+		*cipher = &t4_aes_128_cfb128;
+		break;
+	case NID_aes_192_cfb128:
+		*cipher = &t4_aes_192_cfb128;
+		break;
+	case NID_aes_256_cfb128:
+		*cipher = &t4_aes_256_cfb128;
+		break;
+#endif	/* !SOLARIS_NO_AES_CFB128 */
+
+	default:
+#ifndef	SOLARIS_NO_AES_CTR
+		/* These NIDs cannot be const, so must be tested with "if" */
+		if (nid == NID_t4_aes_128_ctr) {
+			*cipher = &t4_aes_128_ctr;
+			break;
+		} else if (nid == NID_t4_aes_192_ctr) {
+			*cipher = &t4_aes_192_ctr;
+			break;
+		} else if (nid == NID_t4_aes_256_ctr) {
+			*cipher = &t4_aes_256_ctr;
+			break;
+		} else
+#endif	/* !SOLARIS_NO_AES_CTR */
+		{
+			/* cipher not supported */
+			*cipher = NULL;
+			return (0);
+		}
+	}
+
+	return (1);
+}
+
+
+/* Called by t4_cipher_init_aes() */
+static t4_cipher_id
+get_cipher_index_by_nid(int nid)
+{
+	t4_cipher_id i;
+
+	for (i = (t4_cipher_id)0; i < T4_CIPHER_MAX; ++i)
+		if (t4_cipher_table[i].nid == nid)
+			return (i);
+	return (T4_CIPHER_MAX);
+}
+
+
+/* ARGSUSED2 */
+static int
+t4_cipher_init_aes(EVP_CIPHER_CTX *ctx, const unsigned char *key,
+    const unsigned char *iv, int enc)
+{
+	t4_cipher_ctx_t	*tctx = ctx->cipher_data;
+	uint64_t	*t4_ks = tctx->t4_ks;
+	t4_cipher_t	*t4_cipher;
+	t4_cipher_id	index;
+	int		key_len = ctx->key_len;
+	uint64_t	aligned_key_buffer[4]; /* 16, 24, or 32 bytes long */
+	uint64_t	*aligned_key;
+
+	if (key == NULL) {
+		T4err(T4_F_CIPHER_INIT_AES, T4_R_CIPHER_KEY);
+		return (0);
+	}
+
+	/* Get the cipher entry index in t4_cipher_table from nid */
+	index = get_cipher_index_by_nid(ctx->cipher->nid);
+	if (index >= T4_CIPHER_MAX) {
+		T4err(T4_F_CIPHER_INIT_AES, T4_R_CIPHER_NID);
+		return (0); /* Error */
+	}
+	t4_cipher = &t4_cipher_table[index];
+
+	/* Check key size and iv size */
+	if (ctx->cipher->iv_len < t4_cipher->iv_len) {
+		T4err(T4_F_CIPHER_INIT_AES, T4_R_IV_LEN_INCORRECT);
+		return (0); /* Error */
+	}
+	if ((key_len < t4_cipher->min_key_len) ||
+	    (key_len > t4_cipher->max_key_len)) {
+		T4err(T4_F_CIPHER_INIT_AES, T4_R_KEY_LEN_INCORRECT);
+		return (0); /* Error */
+	}
+
+	/* Set cipher flags, if any */
+	ctx->flags |= t4_cipher->flags;
+
+	/* Align the key */
+	if (((unsigned long)key & 0x7) == 0) /* already aligned */
+		aligned_key = (uint64_t *)key;
+	else { /* key is not 8-byte aligned */
+#ifdef DEBUG_T4
+		(void) fprintf(stderr, "T4: key is not 8 byte aligned\n");
+#endif
+		(void) memcpy(aligned_key_buffer, key, key_len);
+		aligned_key = aligned_key_buffer;
+	}
+
+
+	/*
+	 * Expand the key schedule.
+	 * Copy original key to start of t4_ks key schedule. Note that the
+	 * encryption and decryption key schedule are the same for T4.
+	 */
+	switch (key_len) {
+		case 16:
+			t4_aes_expand128(&t4_ks[2],
+			    (const uint32_t *)aligned_key);
+			t4_ks[0] = aligned_key[0];
+			t4_ks[1] = aligned_key[1];
+			break;
+		case 24:
+			t4_aes_expand192(&t4_ks[3],
+			    (const uint32_t *)aligned_key);
+			t4_ks[0] = aligned_key[0];
+			t4_ks[1] = aligned_key[1];
+			t4_ks[2] = aligned_key[2];
+			break;
+		case 32:
+			t4_aes_expand256(&t4_ks[4],
+			    (const uint32_t *)aligned_key);
+			t4_ks[0] = aligned_key[0];
+			t4_ks[1] = aligned_key[1];
+			t4_ks[2] = aligned_key[2];
+			t4_ks[3] = aligned_key[3];
+			break;
+		default:
+			T4err(T4_F_CIPHER_INIT_AES, T4_R_CIPHER_KEY);
+			return (0);
+	}
+
+	/* Save index to cipher */
+	tctx->index = index;
+
+	/* Process IV */
+	if (t4_cipher->iv_len <= 0) { /* no IV (such as with ECB mode) */
+		tctx->iv = NULL;
+	} else if (((unsigned long)ctx->iv & 0x7) == 0) { /* already aligned */
+		tctx->iv = (uint64_t *)ctx->iv;
+	} else {
+		/* IV is not 8 byte aligned */
+		(void) memcpy(tctx->aligned_iv_buffer, ctx->iv,
+		    ctx->cipher->iv_len);
+		tctx->iv = tctx->aligned_iv_buffer;
+#ifdef DEBUG_T4
+		(void) fprintf(stderr,
+		    "t4_cipher_init_aes: IV is not 8 byte aligned\n");
+		(void) fprintf(stderr,
+		    "t4_cipher_init_aes: ctx->cipher->iv_len =%d\n",
+		    ctx->cipher->iv_len);
+		(void) fprintf(stderr, "t4_cipher_init_aes: after "
+		    "re-alignment, tctx->iv = %p\n", (void *)tctx->iv);
+#endif	/* DEBUG_T4 */
+	}
+
+	return (1);
+}
+
+
+/*
+ * ENCRYPT_UPDATE or DECRYPT_UPDATE
+ */
+#define	T4_CIPHER_DO_AES(t4_cipher_do_aes, t4_aes_load_keys_for_encrypt, \
+    t4_aes_encrypt, t4_aes_load_keys_for_decrypt, t4_aes_decrypt, iv)	\
+static int								\
+t4_cipher_do_aes(EVP_CIPHER_CTX *ctx, unsigned char *out,		\
+    const unsigned char *in, size_t inl)				\
+{									\
+	t4_cipher_ctx_t	*tctx = ctx->cipher_data;			\
+	uint64_t	*t4_ks = tctx->t4_ks;				\
+	unsigned long	outl = inl;					\
+	unsigned char	*bufin_alloc = NULL, *bufout_alloc = NULL;	\
+	unsigned char	*bufin, *bufout;				\
+									\
+	/* "in" and "out" must be 8 byte aligned */			\
+	if (((unsigned long)in & 0x7) == 0) { /* already aligned */	\
+		bufin = (unsigned char *)in;				\
+	} else { /* "in" is not 8 byte aligned */			\
+		if (((unsigned long)out & 0x7) == 0) { /* aligned */	\
+			/* use output buffer for input */		\
+			bufin = out;					\
+		} else {						\
+			bufin = bufin_alloc = OPENSSL_malloc(inl);	\
+			if (bufin_alloc == NULL)			\
+				return (0); /* error */			\
+		}							\
+		(void) memcpy(bufin, in, inl);				\
+	}								\
+									\
+	if (((unsigned long)out & 0x7) == 0) { /* already aligned */	\
+		bufout = out;						\
+	} else { /* "out" is not 8 byte aligned */			\
+		if (bufin_alloc != NULL) {				\
+			/* use allocated input buffer for output */	\
+			bufout = bufin_alloc;				\
+		} else {						\
+			bufout = bufout_alloc = OPENSSL_malloc(outl);	\
+			if (bufout_alloc == NULL) {			\
+				OPENSSL_free(bufin_alloc);		\
+				return (0); /* error */			\
+			}						\
+		}							\
+	}								\
+									\
+	/* Data length must be an even multiple of block size. */	\
+	if ((inl & 0xf) != 0) {						\
+		OPENSSL_free(bufout_alloc);				\
+		OPENSSL_free(bufin_alloc);				\
+		T4err(T4_F_CIPHER_DO_AES, T4_R_NOT_BLOCKSIZE_LENGTH);	\
+		return (0);						\
+	}								\
+									\
+	if (ctx->encrypt) {						\
+		t4_aes_load_keys_for_encrypt(t4_ks);			\
+		t4_aes_encrypt(t4_ks, (uint64_t *)bufin,		\
+		    (uint64_t *)bufout, (size_t)inl, iv);		\
+	} else { /* decrypt */						\
+		t4_aes_load_keys_for_decrypt(t4_ks);			\
+		t4_aes_decrypt(t4_ks, (uint64_t *)bufin,		\
+		    (uint64_t *)bufout,					\
+		    (size_t)inl, iv);					\
+	}								\
+									\
+	/* Cleanup */							\
+	if (bufin_alloc != NULL) {					\
+		if (bufout == bufin_alloc)				\
+			(void) memcpy(out, bufout, outl);		\
+		OPENSSL_free(bufin_alloc);				\
+	}								\
+	if (bufout_alloc != NULL) {					\
+		(void) memcpy(out, bufout_alloc, outl);			\
+		OPENSSL_free(bufout_alloc);				\
+	}								\
+									\
+	return (1);							\
+}
+
+
+/* AES CBC mode. */
+T4_CIPHER_DO_AES(t4_cipher_do_aes_128_cbc,
+	t4_aes128_load_keys_for_encrypt, t4_aes128_cbc_encrypt,
+	t4_aes128_load_keys_for_decrypt, t4_aes128_cbc_decrypt, tctx->iv)
+T4_CIPHER_DO_AES(t4_cipher_do_aes_192_cbc,
+	t4_aes192_load_keys_for_encrypt, t4_aes192_cbc_encrypt,
+	t4_aes192_load_keys_for_decrypt, t4_aes192_cbc_decrypt, tctx->iv)
+T4_CIPHER_DO_AES(t4_cipher_do_aes_256_cbc,
+	t4_aes256_load_keys_for_encrypt, t4_aes256_cbc_encrypt,
+	t4_aes256_load_keys_for_decrypt, t4_aes256_cbc_decrypt, tctx->iv)
+
+/*
+ * AES CFB128 mode.
+ * CFB128 decrypt uses load_keys_for_encrypt() as the mode uses
+ * the raw AES encrypt operation for the decryption, too.
+ */
+#ifndef	SOLARIS_NO_AES_CFB128
+T4_CIPHER_DO_AES(t4_cipher_do_aes_128_cfb128,
+	t4_aes128_load_keys_for_encrypt, t4_aes128_cfb128_encrypt,
+	t4_aes128_load_keys_for_encrypt, t4_aes128_cfb128_decrypt, tctx->iv)
+T4_CIPHER_DO_AES(t4_cipher_do_aes_192_cfb128,
+	t4_aes192_load_keys_for_encrypt, t4_aes192_cfb128_encrypt,
+	t4_aes192_load_keys_for_encrypt, t4_aes192_cfb128_decrypt, tctx->iv)
+T4_CIPHER_DO_AES(t4_cipher_do_aes_256_cfb128,
+	t4_aes256_load_keys_for_encrypt, t4_aes256_cfb128_encrypt,
+	t4_aes256_load_keys_for_encrypt, t4_aes256_cfb128_decrypt, tctx->iv)
+#endif	/* !SOLARIS_NO_AES_CFB128 */
+
+/* AES CTR mode. */
+#ifndef	SOLARIS_NO_AES_CTR
+T4_CIPHER_DO_AES(t4_cipher_do_aes_128_ctr,
+	t4_aes128_load_keys_for_encrypt, t4_aes128_ctr_crypt,
+	t4_aes128_load_keys_for_decrypt, t4_aes128_ctr_crypt, tctx->iv)
+T4_CIPHER_DO_AES(t4_cipher_do_aes_192_ctr,
+	t4_aes192_load_keys_for_encrypt, t4_aes192_ctr_crypt,
+	t4_aes192_load_keys_for_decrypt, t4_aes192_ctr_crypt, tctx->iv)
+T4_CIPHER_DO_AES(t4_cipher_do_aes_256_ctr,
+	t4_aes256_load_keys_for_encrypt, t4_aes256_ctr_crypt,
+	t4_aes256_load_keys_for_decrypt, t4_aes256_ctr_crypt, tctx->iv)
+#endif	/* !SOLARIS_NO_AES_CTR */
+
+/* AES ECB mode. */
+T4_CIPHER_DO_AES(t4_cipher_do_aes_128_ecb,
+	t4_aes128_load_keys_for_encrypt, t4_aes128_ecb_encrypt,
+	t4_aes128_load_keys_for_decrypt, t4_aes128_ecb_decrypt, NULL)
+T4_CIPHER_DO_AES(t4_cipher_do_aes_192_ecb,
+	t4_aes192_load_keys_for_encrypt, t4_aes192_ecb_encrypt,
+	t4_aes192_load_keys_for_decrypt, t4_aes192_ecb_decrypt, NULL)
+T4_CIPHER_DO_AES(t4_cipher_do_aes_256_ecb,
+	t4_aes256_load_keys_for_encrypt, t4_aes256_ecb_encrypt,
+	t4_aes256_load_keys_for_decrypt, t4_aes256_ecb_decrypt, NULL)
+
+
+/*
+ * Is the t4 engine available?
+ * Passed to ENGINE_set_init_function().
+ */
+/* ARGSUSED */
+static int
+t4_init(ENGINE *e)
+{
+	return (1);
+}
+
+
+/* Passed to ENGINE_set_destroy_function(). */
+/* ARGSUSED */
+static int
+t4_destroy(ENGINE *e)
+{
+	T4_FREE_AES_CTR_NIDS;
+	ERR_unload_t4_strings();
+	return (1);
+}
+
+
+/*
+ * Called by t4_bind_helper().
+ * Note: too early to use T4err() functions on errors.
+ */
+/* ARGSUSED */
+static int
+t4_bind(ENGINE *e)
+{
+	static int aes_engage = -1, digest_engage = -1;
+
+	if (aes_engage == -1) {
+		aes_engage = (t4_aes_instructions_present() != 0);
+	}
+	if (digest_engage == -1) {
+		digest_engage = (t4_digest_instructions_present() != 0);
+	}
+#ifdef DEBUG_T4
+	(void) fprintf(stderr,
+	    "t4_bind: engage aes=%d, digest=%d\n", aes_engage, digest_engage);
+#endif
+
+#ifndef  SOLARIS_NO_AES_CTR
+	/*
+	 * We must do this before we start working with slots since we need all
+	 * NIDs there.
+	 */
+	if (aes_engage) {
+		if (t4_add_aes_ctr_NIDs() == 0) {
+			T4_FREE_AES_CTR_NIDS;
+			return (0);
+		}
+	}
+#endif	/* !SOLARIS_NO_AES_CTR */
+
+#ifdef DEBUG_T4
+	(void) fprintf(stderr, "t4_cipher_count = %d; t4_cipher_nids[] =\n",
+	    t4_cipher_count);
+	for (int i = 0; i < t4_cipher_count; ++i) {
+		(void) fprintf(stderr, " %d", t4_cipher_nids[i]);
+	}
+	(void) fprintf(stderr, "\n");
+#endif	/* DEBUG_T4 */
+
+	/* Register T4 engine ID, name, and functions */
+	if (!ENGINE_set_id(e, ENGINE_T4_ID) ||
+	    !ENGINE_set_name(e,
+	    aes_engage ? ENGINE_T4_NAME: ENGINE_NO_T4_NAME) ||
+	    !ENGINE_set_init_function(e, t4_init) ||
+	    (aes_engage && !ENGINE_set_ciphers(e, t4_get_all_ciphers)) ||
+	    (digest_engage && !ENGINE_set_digests(e, t4_get_all_digests)) ||
+	    !ENGINE_set_destroy_function(e, t4_destroy)) {
+		T4_FREE_AES_CTR_NIDS;
+		return (0);
+	}
+
+	return (1);
+}
+
+
+/*
+ * Called by ENGINE_load_t4().
+ * Note: too early to use T4err() functions on errors.
+ */
+static int
+t4_bind_helper(ENGINE *e, const char *id)
+{
+	if (id != NULL && (strcmp(id, ENGINE_T4_ID) != 0)) {
+		(void) fprintf(stderr, "T4: bad t4 engine ID\n");
+		return (0);
+	}
+	if (!t4_bind(e)) {
+		(void) fprintf(stderr,
+		    "T4: failed to bind t4 engine\n");
+		return (0);
+	}
+
+	return (1);
+}
+
+
+#ifdef DYNAMIC_ENGINE
+IMPLEMENT_DYNAMIC_CHECK_FN()
+IMPLEMENT_DYNAMIC_BIND_FN(t4_bind_helper)
+#endif	/* DYNAMIC_ENGINE */
+#endif	/* COMPILE_HW_T4 */
+#endif	/* !OPENSSL_NO_HW && !OPENSSL_NO_HW_AES_T4 && !OPENSSL_NO_AES */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/components/openssl/openssl-1.0.0/engines/t4/eng_t4_aes_asm.h	Wed Jul 20 10:20:13 2011 -0700
@@ -0,0 +1,113 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#ifndef	ENG_T4_AES_ASM_H
+#define	ENG_T4_AES_ASM_H
+
+/*
+ * SPARC AES assembly language functions.
+ *
+ * Based on Solaris file aes_impl.h.
+ */
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+
+#if (defined(sun4v) || defined(__sparcv9) || defined(__sparcv8plus) || \
+	defined(__sparcv8)) && ! defined(OPENSSL_NO_ASM)
+
+extern void t4_aes_expand128(uint64_t *rk, const uint32_t *key);
+extern void t4_aes_expand192(uint64_t *rk, const uint32_t *key);
+extern void t4_aes_expand256(uint64_t *rk, const uint32_t *key);
+extern void t4_aes_encrypt128(const uint64_t *rk, const uint32_t *pt,
+    uint32_t *ct);
+extern void t4_aes_encrypt192(const uint64_t *rk, const uint32_t *pt,
+    uint32_t *ct);
+extern void t4_aes_encrypt256(const uint64_t *rk, const uint32_t *pt,
+    uint32_t *ct);
+extern void t4_aes_decrypt128(const uint64_t *rk, const uint32_t *ct,
+    uint32_t *pt);
+extern void t4_aes_decrypt192(const uint64_t *rk, const uint32_t *ct,
+    uint32_t *pt);
+extern void t4_aes_decrypt256(const uint64_t *rk, const uint32_t *ct,
+    uint32_t *pt);
+extern void t4_aes128_load_keys_for_encrypt(uint64_t *ks);
+extern void t4_aes192_load_keys_for_encrypt(uint64_t *ks);
+extern void t4_aes256_load_keys_for_encrypt(uint64_t *ks);
+extern void t4_aes128_ecb_encrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t *asm_out, size_t amount_to_encrypt, uint64_t *dummy);
+extern void t4_aes192_ecb_encrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t *asm_out, size_t amount_to_encrypt, uint64_t *dummy);
+extern void t4_aes256_ecb_encrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t *asm_out, size_t amount_to_encrypt, uint64_t *dummy);
+extern void t4_aes128_cbc_encrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t *asm_out, size_t amount_to_encrypt, uint64_t *iv);
+extern void t4_aes192_cbc_encrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t *asm_out, size_t amount_to_encrypt, uint64_t *iv);
+extern void t4_aes256_cbc_encrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t *asm_out, size_t amount_to_encrypt, uint64_t *iv);
+extern void t4_aes128_ctr_crypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t *asm_out, size_t amount_to_encrypt, uint64_t *iv);
+extern void t4_aes192_ctr_crypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t *asm_out, size_t amount_to_encrypt, uint64_t *iv);
+extern void t4_aes256_ctr_crypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t *asm_out, size_t amount_to_encrypt, uint64_t *iv);
+extern void t4_aes128_cfb128_encrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t *asm_out, size_t amount_to_encrypt, uint64_t *iv);
+extern void t4_aes192_cfb128_encrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t *asm_out, size_t amount_to_encrypt, uint64_t *iv);
+extern void t4_aes256_cfb128_encrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t *asm_out, size_t amount_to_encrypt, uint64_t *iv);
+extern void t4_aes128_load_keys_for_decrypt(uint64_t *ks);
+extern void t4_aes192_load_keys_for_decrypt(uint64_t *ks);
+extern void t4_aes256_load_keys_for_decrypt(uint64_t *ks);
+extern void t4_aes128_ecb_decrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t *asm_out, size_t amount_to_decrypt, uint64_t *dummy);
+extern void t4_aes192_ecb_decrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t *asm_out, size_t amount_to_decrypt, uint64_t *dummy);
+extern void t4_aes256_ecb_decrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t *asm_out, size_t amount_to_decrypt, uint64_t *dummy);
+extern void t4_aes128_cbc_decrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t *asm_out, size_t amount_to_decrypt, uint64_t *iv);
+extern void t4_aes192_cbc_decrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t *asm_out, size_t amount_to_decrypt, uint64_t *iv);
+extern void t4_aes256_cbc_decrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t *asm_out, size_t amount_to_decrypt, uint64_t *iv);
+extern void t4_aes128_cfb128_decrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t *asm_out, size_t amount_to_decrypt, uint64_t *iv);
+extern void t4_aes192_cfb128_decrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t *asm_out, size_t amount_to_decrypt, uint64_t *iv);
+extern void t4_aes256_cfb128_decrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t *asm_out, size_t amount_to_decrypt, uint64_t *iv);
+
+#endif	/* (sun4v||__sparv9||__sparcv8plus||__sparvc8) && !OPENSSL_NO_ASM */
+
+#ifdef	__cplusplus
+}
+#endif
+#endif	/* ENG_T4_AES_ASM_H */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/components/openssl/openssl-1.0.0/engines/t4/eng_t4_digest.c	Wed Jul 20 10:20:13 2011 -0700
@@ -0,0 +1,755 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/*
+ * The basic framework for this code came from the reference
+ * implementation for MD5 provided in RFC 1321.
+ *
+ * Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
+ * rights reserved.
+ *
+ * License to copy and use this software is granted provided that it
+ * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
+ * Algorithm" in all material mentioning or referencing this software
+ * or this function.
+ *
+ * License is also granted to make and use derivative works provided
+ * that such works are identified as "derived from the RSA Data
+ * Security, Inc. MD5 Message-Digest Algorithm" in all material
+ * mentioning or referencing the derived work.
+ *
+ * RSA Data Security, Inc. makes no representations concerning either
+ * the merchantability of this software or the suitability of this
+ * software for any particular purpose. It is provided "as is"
+ * without express or implied warranty of any kind.
+ *
+ * These notices must be retained in any copies of any part of this
+ * documentation and/or software.
+ */
+
+
+/*
+ * This engine supports SPARC microprocessors that provide AES and other
+ * cipher and hash instructions, such as the T4 microprocessor.
+ *
+ * This file implements the MD5, SHA1, and SHA2 message digest operations.
+ */
+
+#include <openssl/opensslconf.h>
+
+#if !defined(OPENSSL_NO_HW) && !defined(OPENSSL_NO_HW_MD_T4)
+#include <sys/types.h>
+#include <sys/auxv.h>		/* getisax() */
+#include <sys/sysmacros.h>	/* IS_P2ALIGNED() */
+#include <sys/byteorder.h>	/* htonl() and friends */
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#ifndef OPENSSL_NO_SHA
+/*
+ * Solaris sys/sha2.h and OpenSSL openssl/sha.h both define
+ * SHA256_CTX, SHA512_CTX, SHA256, SHA384, and SHA512.
+ */
+#define	SHA256_CTX	OPENSSL_SHA256_CTX
+#define	SHA512_CTX	OPENSSL_SHA512_CTX
+#define	SHA256		OPENSSL_SHA256
+#define	SHA512		OPENSSL_SHA512
+#include <openssl/sha.h>
+#undef	SHA256_CTX
+#undef	SHA512_CTX
+#undef	SHA256
+#undef	SHA512
+#endif	/* !OPENSSL_NO_SHA */
+
+#include <openssl/bio.h>
+#include <openssl/aes.h>
+#include <openssl/engine.h>
+
+/* Solaris digest definitions (must follow openssl/sha.h) */
+#include <sys/md5.h>
+#ifndef OPENSSL_NO_SHA
+#include <sys/sha1.h>
+#define	_SHA2_IMPL	/* Required for SHA*_MECH_INFO_TYPE enum */
+#include <sys/sha2.h>
+#endif	/* !OPENSSL_NO_SHA */
+
+#if (defined(sun4v) || defined(__sparcv9) || defined(__sparcv8plus) || \
+	defined(__sparcv8)) && !defined(OPENSSL_NO_ASM)
+#define	COMPILE_HW_T4
+#endif
+
+#ifdef	COMPILE_HW_T4
+
+/* Copied from OpenSSL md5.h */
+#ifndef MD5_CBLOCK
+#define	MD5_CBLOCK		64
+#endif
+
+/* Padding needed is 64 bytes for MD5 and SHA1, 128 for SHA2 */
+static const uint8_t PADDING[128] = { 0x80, 0 /* all zeros */ };
+
+static const int t4_digest_nids[] = {
+	NID_md5,
+#ifndef OPENSSL_NO_SHA
+	NID_sha1,
+	NID_sha256,
+	NID_sha512,
+#endif	/* !OPENSSL_NO_SHA */
+};
+static const int t4_digest_count =
+	(sizeof (t4_digest_nids) / sizeof (t4_digest_nids[0]));
+
+/* Assembly language functions */
+extern void t4_md5_multiblock(MD5_CTX *ctx, const uint8_t *input,
+    unsigned int input_length_in_blocks);
+extern void t4_sha1_multiblock(SHA1_CTX *ctx, const uint8_t *input,
+    size_t nr_blocks);
+extern void t4_sha256_multiblock(SHA2_CTX *ctx, const uint8_t *input,
+    size_t nr_blocks);
+extern void t4_sha512_multiblock(SHA2_CTX *ctx, const uint8_t *input,
+    size_t nr_blocks);
+
+/* Internal functions */
+static void t4_md5_encode(uint8_t *restrict output,
+    const uint32_t *restrict input, size_t input_len);
+#pragma inline(t4_md5_encode)
+static void t4_sha1_256_encode(uint8_t *restrict output,
+    const uint32_t *restrict input, size_t len);
+#pragma inline(t4_sha1_256_encode)
+static void t4_sha512_encode64(uint8_t *restrict output,
+    const uint64_t *restrict input, size_t len);
+#pragma inline(t4_sha512_encode64)
+
+/* Formal declaration for functions in EVP_MD structure */
+static int t4_digest_init_md5(EVP_MD_CTX *ctx);
+static int t4_digest_update_md5(EVP_MD_CTX *ctx, const void *data,
+    size_t count);
+static int t4_digest_final_md5(EVP_MD_CTX *ctx, unsigned char *md);
+static int t4_digest_copy_md5(EVP_MD_CTX *to, const EVP_MD_CTX *from);
+#ifndef OPENSSL_NO_SHA
+static int t4_digest_init_sha1(EVP_MD_CTX *ctx);
+static int t4_digest_update_sha1(EVP_MD_CTX *ctx, const void *data,
+    size_t count);
+static int t4_digest_final_sha1(EVP_MD_CTX *ctx, unsigned char *md);
+static int t4_digest_copy_sha1(EVP_MD_CTX *to, const EVP_MD_CTX *from);
+static int t4_digest_copy_sha2(EVP_MD_CTX *to, const EVP_MD_CTX *from);
+static int t4_digest_init_sha256(EVP_MD_CTX *ctx);
+static int t4_digest_update_sha256(EVP_MD_CTX *ctx, const void *data,
+    size_t count);
+static int t4_digest_final_sha256(EVP_MD_CTX *ctx, unsigned char *md);
+static int t4_digest_init_sha512(EVP_MD_CTX *ctx);
+static int t4_digest_update_sha512(EVP_MD_CTX *ctx, const void *data,
+    size_t count);
+static int t4_digest_final_sha512(EVP_MD_CTX *ctx, unsigned char *md);
+#endif	/* !OPENSSL_NO_SHA */
+
+
+/*
+ * Message Digests (MD5 and SHA*)
+ *
+ * OpenSSL's libcrypto EVP stuff. This is how this engine gets wired to EVP.
+ * EVP_MD is defined in evp.h.  To maintain binary compatibility the
+ * definition cannot be modified.
+ * Stuff specific to the t4 engine is kept in t4_cipher_ctx_t, which is
+ * pointed to by the last field, app_data.
+ *
+ * Fields: type, pkey_type, md_size, flags,
+ *	init(), update(), final(),
+ *	copy(), cleanup(), sign(), verify(),
+ *	required_pkey_type, block_size, ctx_size, md5_ctrl()
+ */
+static const EVP_MD t4_md5 = {
+	NID_md5, NID_md5WithRSAEncryption, MD5_DIGEST_LENGTH, 0,
+	t4_digest_init_md5, t4_digest_update_md5, t4_digest_final_md5,
+	t4_digest_copy_md5, NULL,
+	EVP_PKEY_RSA_method, MD5_CBLOCK,
+	sizeof (MD5_CTX), NULL
+	};
+
+#ifndef OPENSSL_NO_SHA
+static const EVP_MD t4_sha1 = {
+	NID_sha1, NID_sha1WithRSAEncryption, SHA_DIGEST_LENGTH, 0,
+	t4_digest_init_sha1, t4_digest_update_sha1, t4_digest_final_sha1,
+	t4_digest_copy_sha1, NULL,
+	EVP_PKEY_RSA_method, SHA_CBLOCK,
+	sizeof (SHA1_CTX), NULL
+	};
+
+static const EVP_MD t4_sha256 = {
+	NID_sha256, NID_sha256WithRSAEncryption, SHA256_DIGEST_LENGTH, 0,
+	t4_digest_init_sha256, t4_digest_update_sha256, t4_digest_final_sha256,
+	t4_digest_copy_sha2, NULL,
+	EVP_PKEY_RSA_method, SHA256_CBLOCK,
+	sizeof (SHA2_CTX), NULL
+	};
+
+static const EVP_MD t4_sha512 =
+	{
+	NID_sha512, NID_sha512WithRSAEncryption, SHA512_DIGEST_LENGTH, 0,
+	t4_digest_init_sha512, t4_digest_update_sha512, t4_digest_final_sha512,
+	t4_digest_copy_sha2, NULL,
+	EVP_PKEY_RSA_method, SHA512_CBLOCK,
+	sizeof (SHA2_CTX), NULL
+	};
+#endif	/* !OPENSSL_NO_SHA */
+
+
+/*
+ * Message Digest functions
+ */
+
+/*
+ * Registered by the ENGINE with ENGINE_set_digests().
+ * Finds out how to deal with a particular digest NID in the ENGINE.
+ */
+/* ARGSUSED */
+int
+t4_get_all_digests(ENGINE *e, const EVP_MD **digest,
+    const int **nids, int nid)
+{
+	if (digest == NULL) { /* return a list of all supported digests */
+		*nids = (t4_digest_count > 0) ? t4_digest_nids : NULL;
+		return (t4_digest_count);
+	}
+
+	switch (nid) {
+	case NID_md5:
+		*digest = &t4_md5;
+		break;
+#ifndef OPENSSL_NO_SHA
+	/*
+	 * A special case. For "openssl dgst -dss1 ...",
+	 * OpenSSL calls EVP_get_digestbyname() on "dss1" which ends up
+	 * calling t4_get_all_digests() for NID_dsa. Internally, if an
+	 * engine is not used, OpenSSL uses SHA1_Init() as expected for
+	 * DSA. So, we must return t4_sha1 for NID_dsa as well. Note
+	 * that this must have changed between 0.9.8 and 1.0.0 since we
+	 * did not have the problem with the 0.9.8 version.
+	 */
+	case NID_dsa:
+	case NID_sha1:
+		*digest = &t4_sha1;
+		break;
+	case NID_sha256:
+		*digest = &t4_sha256;
+		break;
+	case NID_sha512:
+		*digest = &t4_sha512;
+		break;
+#endif	/* !OPENSSL_NO_SHA */
+	default:
+		/* digest not supported */
+		*digest = NULL;
+		return (0);
+	}
+
+	return (1);
+}
+
+
+/*
+ * MD5 functions
+ */
+static int
+t4_digest_init_md5(EVP_MD_CTX *ctx)
+{
+	MD5_CTX	*md5_ctx = (MD5_CTX *)ctx->md_data;
+
+	md5_ctx->state[0] = 0x01234567U;
+	md5_ctx->state[1] = 0x89abcdefU;
+	md5_ctx->state[2] = 0xfedcba98U;
+	md5_ctx->state[3] = 0x76543210U;
+	md5_ctx->count[0] = md5_ctx->count[1] = 0;
+
+	return (1);
+}
+
+
+/*
+ * Continue a MD5 digest operation, using the message block to update the
+ * context.  MD5 crunches in 64-byte blocks.
+ */
+static int
+t4_digest_update_md5(EVP_MD_CTX *ctx, const void *data, size_t count)
+{
+	MD5_CTX	*md5_ctx = (MD5_CTX *)ctx->md_data;
+
+	uint32_t		i = 0, buf_index, remaining_len;
+	const uint32_t		buf_limit = 64;
+	uint32_t		block_count;
+	const unsigned char	*input = (const unsigned char *)data;
+
+	if (count == 0)
+		return (1);
+
+	/* Compute (number of bytes computed so far) mod 64 */
+	buf_index = (md5_ctx->count[0] >> 3) & 0x3F;
+
+	/* Update number of bits hashed into this MD5 computation so far */
+	if ((md5_ctx->count[0] += (count << 3)) < (count << 3))
+		md5_ctx->count[1]++;
+	md5_ctx->count[1] += (count >> 29);
+
+	remaining_len = buf_limit - buf_index;
+
+	if (count >= remaining_len) {
+		if (buf_index != 0) {
+			(void) memcpy(&md5_ctx->buf_un.buf8[buf_index], input,
+			    remaining_len);
+			t4_md5_multiblock(md5_ctx, md5_ctx->buf_un.buf8, 1);
+			i = remaining_len;
+		}
+
+		block_count = (count - i) >> 6;
+		if (block_count > 0) {
+			t4_md5_multiblock(md5_ctx, &input[i], block_count);
+			i += block_count << 6;
+		}
+
+		if (count == i) {
+			return (1);
+		}
+
+		buf_index = 0;
+	}
+
+	/* Buffer remaining input */
+	(void) memcpy(&md5_ctx->buf_un.buf8[buf_index], &input[i], count - i);
+
+	return (1);
+}
+
+
+/* Convert numbers from big endian to little endian. */
+static void
+t4_md5_encode(uint8_t *restrict output, const uint32_t *restrict input,
+    size_t input_len)
+{
+	size_t		i, j;
+
+	for (i = 0, j = 0; j < input_len; i++, j += sizeof (uint32_t)) {
+		output[j] = input[i] & 0xff;
+		output[j + 1] = (input[i] >> 8)  & 0xff;
+		output[j + 2] = (input[i] >> 16) & 0xff;
+		output[j + 3] = (input[i] >> 24) & 0xff;
+	}
+}
+
+
+/* End MD5 digest operation, finalizing message digest and zeroing context. */
+static int
+t4_digest_final_md5(EVP_MD_CTX *ctx, unsigned char *md)
+{
+	MD5_CTX		*md5_ctx = (MD5_CTX *)ctx->md_data;
+	uint8_t		bitcount_le[sizeof (md5_ctx->count)];
+	uint32_t	index = (md5_ctx->count[0] >> 3) & 0x3f;
+
+	/* store bit count, little endian */
+	t4_md5_encode(bitcount_le, md5_ctx->count, sizeof (bitcount_le));
+
+	/* pad out to 56 mod 64 */
+	(void) t4_digest_update_md5(ctx, PADDING,
+	    ((index < 56) ? 56 : 120) - index);
+
+	/* append length (before padding) */
+	(void) t4_digest_update_md5(ctx, bitcount_le, sizeof (bitcount_le));
+
+	(void) memcpy(md, md5_ctx->state, 16);
+
+	/* zeroize sensitive information */
+	(void) memset(md5_ctx, 0, sizeof (*md5_ctx));
+
+	return (1);
+}
+
+
+static int
+t4_digest_copy_md5(EVP_MD_CTX *to, const EVP_MD_CTX *from)
+{
+	(void) memcpy(to->md_data, from->md_data, sizeof (MD5_CTX));
+	return (1);
+}
+
+
+#ifndef OPENSSL_NO_SHA
+/*
+ * SHA1 functions
+ */
+static int
+t4_digest_init_sha1(EVP_MD_CTX *ctx)
+{
+	SHA1_CTX	*sha1_ctx = (SHA1_CTX *)ctx->md_data;
+
+	sha1_ctx->state[0] = 0x67452301U;
+	sha1_ctx->state[1] = 0xefcdab89U;
+	sha1_ctx->state[2] = 0x98badcfeU;
+	sha1_ctx->state[3] = 0x10325476U;
+	sha1_ctx->state[4] = 0xc3d2e1f0U;
+	sha1_ctx->count[0] = sha1_ctx->count[1] = 0;
+
+	return (1);
+}
+
+
+/*
+ * Continue a SHA1 digest operation, using the message block to update the
+ * context.
+ */
+static int
+t4_digest_update_sha1(EVP_MD_CTX *ctx, const void *data, size_t count)
+{
+	SHA1_CTX	*sha1_ctx = (SHA1_CTX *)ctx->md_data;
+	size_t	  i;
+	uint32_t	buf_index, remaining_len;
+	const uint32_t	buf_limit = 64;
+	const uint8_t   *input = data;
+	size_t	  block_count;
+
+	if (count == 0)
+		return (1);
+
+	/* compute number of bytes mod 64 */
+	buf_index = (sha1_ctx->count[1] >> 3) & 0x3F;
+
+	/* update number of bits */
+	if ((sha1_ctx->count[1] += (count << 3)) < (count << 3))
+		sha1_ctx->count[0]++;
+
+	sha1_ctx->count[0] += (count >> 29);
+
+	remaining_len = buf_limit - buf_index;
+
+	i = 0;
+	if (count >= remaining_len) {
+		if (buf_index) {
+			(void) memcpy(&sha1_ctx->buf_un.buf8[buf_index], input,
+			    remaining_len);
+
+			t4_sha1_multiblock(sha1_ctx, sha1_ctx->buf_un.buf8, 1);
+			i = remaining_len;
+		}
+
+		block_count = (count - i) >> 6;
+		if (block_count > 0) {
+			t4_sha1_multiblock(sha1_ctx, &input[i], block_count);
+			i += block_count << 6;
+		}
+
+		if (count == i)
+			return (1);
+
+		buf_index = 0;
+	}
+
+	/* buffer remaining input */
+	(void) memcpy(&sha1_ctx->buf_un.buf8[buf_index], &input[i], count - i);
+
+	return (1);
+}
+
+
+/* Convert numbers from little endian to big endian for SHA1/SHA384/SHA256. */
+static void
+t4_sha1_256_encode(uint8_t *restrict output,
+    const uint32_t *restrict input, size_t len)
+{
+	size_t		i, j;
+
+	if (IS_P2ALIGNED(output, sizeof (uint32_t))) {
+		for (i = 0, j = 0; j < len; i++, j += sizeof (uint32_t)) {
+			/* LINTED E_BAD_PTR_CAST_ALIGN */
+			*((uint32_t *)(output + j)) = htonl(input[i]);
+		}
+	} else { /* Big and little endian independent, but slower */
+		for (i = 0, j = 0; j < len; i++, j += 4) {
+			output[j]	= (input[i] >> 24) & 0xff;
+			output[j + 1]	= (input[i] >> 16) & 0xff;
+			output[j + 2]	= (input[i] >>  8) & 0xff;
+			output[j + 3]	= input[i] & 0xff;
+		}
+	}
+}
+
+
+/* End SHA1 digest operation, finalizing message digest and zeroing context. */
+static int
+t4_digest_final_sha1(EVP_MD_CTX *ctx, unsigned char *md)
+{
+	SHA1_CTX		*sha1_ctx = (SHA1_CTX *)ctx->md_data;
+	uint8_t			bitcount_be[sizeof (sha1_ctx->count)];
+	uint32_t		index = (sha1_ctx->count[1] >> 3) & 0x3f;
+
+	/* store bit count, big endian */
+	t4_sha1_256_encode(bitcount_be, sha1_ctx->count, sizeof (bitcount_be));
+
+	/* pad out to 56 mod 64 */
+	(void) t4_digest_update_sha1(ctx, PADDING,
+	    ((index < 56) ? 56 : 120) - index);
+
+	/* append length (before padding) */
+	(void) t4_digest_update_sha1(ctx, bitcount_be, sizeof (bitcount_be));
+
+	/* store state in digest */
+	t4_sha1_256_encode(md, sha1_ctx->state, sizeof (sha1_ctx->state));
+
+	/* zeroize sensitive information */
+	(void) memset(sha1_ctx, 0, sizeof (*sha1_ctx));
+
+	return (1);
+}
+
+
+static int
+t4_digest_copy_sha1(EVP_MD_CTX *to, const EVP_MD_CTX *from)
+{
+	(void) memcpy(to->md_data, from->md_data, sizeof (SHA1_CTX));
+	return (1);
+}
+#endif	/* !OPENSSL_NO_SHA */
+
+
+#ifndef OPENSSL_NO_SHA
+/* SHA2 (SHA256/SHA512) functions */
+static int
+t4_digest_copy_sha2(EVP_MD_CTX *to, const EVP_MD_CTX *from)
+{
+	(void) memcpy(to->md_data, from->md_data, sizeof (SHA2_CTX));
+	return (1);
+}
+
+
+/* Convert numbers from little endian to big endian for SHA384/SHA512. */
+static void t4_sha512_encode64(uint8_t *restrict output,
+    const uint64_t *restrict input, size_t len)
+{
+	size_t		i, j;
+
+	if (IS_P2ALIGNED(output, sizeof (uint64_t))) {
+		for (i = 0, j = 0; j < len; i++, j += sizeof (uint64_t)) {
+			/* LINTED E_BAD_PTR_CAST_ALIGN */
+			*((uint64_t *)(output + j)) = htonll(input[i]);
+		}
+	} else { /* Big and little endian independent, but slower */
+		for (i = 0, j = 0; j < len; i++, j += 8) {
+			output[j]	= (input[i] >> 56) & 0xff;
+			output[j + 1]	= (input[i] >> 48) & 0xff;
+			output[j + 2]	= (input[i] >> 40) & 0xff;
+			output[j + 3]	= (input[i] >> 32) & 0xff;
+			output[j + 4]	= (input[i] >> 24) & 0xff;
+			output[j + 5]	= (input[i] >> 16) & 0xff;
+			output[j + 6]	= (input[i] >>  8) & 0xff;
+			output[j + 7]	= input[i] & 0xff;
+		}
+	}
+}
+
+
+/*
+ * SHA256 functions
+ */
+static int
+t4_digest_init_sha256(EVP_MD_CTX *ctx)
+{
+	SHA2_CTX	*sha2_ctx = (SHA2_CTX *)ctx->md_data;
+
+	sha2_ctx->algotype = SHA256_MECH_INFO_TYPE;
+	sha2_ctx->state.s32[0] = 0x6a09e667U;
+	sha2_ctx->state.s32[1] = 0xbb67ae85U;
+	sha2_ctx->state.s32[2] = 0x3c6ef372U;
+	sha2_ctx->state.s32[3] = 0xa54ff53aU;
+	sha2_ctx->state.s32[4] = 0x510e527fU;
+	sha2_ctx->state.s32[5] = 0x9b05688cU;
+	sha2_ctx->state.s32[6] = 0x1f83d9abU;
+	sha2_ctx->state.s32[7] = 0x5be0cd19U;
+	sha2_ctx->count.c64[0] = sha2_ctx->count.c64[1] = 0;
+
+	return (1);
+}
+
+
+/*
+ * Continue a SHA256 digest operation, using the message block to update the
+ * context.
+ */
+static int
+t4_digest_update_sha256(EVP_MD_CTX *ctx, const void *data, size_t count)
+{
+	SHA2_CTX	*sha2_ctx = (SHA2_CTX *)ctx->md_data;
+	size_t		i;
+	uint32_t	buf_index, remaining_len;
+	const uint32_t	buf_limit = 64;
+	const uint8_t	*input = data;
+	size_t		block_count;
+
+	if (count == 0)
+		return (1);
+
+	/* compute number of bytes mod 64 */
+	buf_index = (sha2_ctx->count.c32[1] >> 3) & 0x3F;
+
+	/* update number of bits */
+	if ((sha2_ctx->count.c32[1] += (count << 3)) < (count << 3))
+		sha2_ctx->count.c32[0]++;
+
+	sha2_ctx->count.c32[0] += (count >> 29);
+
+	remaining_len = buf_limit - buf_index;
+
+	i = 0;
+	if (count >= remaining_len) {
+		if (buf_index) {
+			(void) memcpy(&sha2_ctx->buf_un.buf8[buf_index], input,
+			    remaining_len);
+			t4_sha256_multiblock(sha2_ctx, sha2_ctx->buf_un.buf8,
+			    1);
+			i = remaining_len;
+		}
+
+		block_count = (count - i) >> 6;
+		if (block_count > 0) {
+			t4_sha256_multiblock(sha2_ctx, &input[i], block_count);
+			i += block_count << 6;
+		}
+
+		if (count == i)
+			return (1);
+
+		buf_index = 0;
+	}
+
+	/* buffer remaining input */
+	(void) memcpy(&sha2_ctx->buf_un.buf8[buf_index], &input[i], count - i);
+	return (1);
+}
+
+
+/* End SHA256 digest operation, finalizing message digest and zeroing context */
+static int
+t4_digest_final_sha256(EVP_MD_CTX *ctx, unsigned char *md)
+{
+	SHA2_CTX	*sha2_ctx = (SHA2_CTX *)ctx->md_data;
+	uint8_t		bitcount_be[sizeof (sha2_ctx->count.c32)];
+	uint32_t	index  = (sha2_ctx->count.c32[1] >> 3) & 0x3f;
+
+	t4_sha1_256_encode(bitcount_be, sha2_ctx->count.c32,
+	    sizeof (bitcount_be));
+	(void) t4_digest_update_sha256(ctx, PADDING,
+	    ((index < 56) ? 56 : 120) - index);
+	(void) t4_digest_update_sha256(ctx, bitcount_be, sizeof (bitcount_be));
+	t4_sha1_256_encode(md, sha2_ctx->state.s32,
+	    sizeof (sha2_ctx->state.s32));
+
+	/* zeroize sensitive information */
+	(void) memset(sha2_ctx, 0, sizeof (*sha2_ctx));
+
+	return (1);
+}
+
+
+/*
+ * SHA512 functions
+ */
+static int
+t4_digest_init_sha512(EVP_MD_CTX *ctx)
+{
+	SHA2_CTX	*sha2_ctx = (SHA2_CTX *)ctx->md_data;
+
+	sha2_ctx->algotype = SHA512_MECH_INFO_TYPE;
+	sha2_ctx->state.s64[0] = 0x6a09e667f3bcc908ULL;
+	sha2_ctx->state.s64[1] = 0xbb67ae8584caa73bULL;
+	sha2_ctx->state.s64[2] = 0x3c6ef372fe94f82bULL;
+	sha2_ctx->state.s64[3] = 0xa54ff53a5f1d36f1ULL;
+	sha2_ctx->state.s64[4] = 0x510e527fade682d1ULL;
+	sha2_ctx->state.s64[5] = 0x9b05688c2b3e6c1fULL;
+	sha2_ctx->state.s64[6] = 0x1f83d9abfb41bd6bULL;
+	sha2_ctx->state.s64[7] = 0x5be0cd19137e2179ULL;
+	sha2_ctx->count.c64[0] = sha2_ctx->count.c64[1] = 0;
+
+	return (1);
+}
+
+
+/*
+ * Continue a SHA512 digest operation, using the message block to update the
+ * context.
+ */
+static int
+t4_digest_update_sha512(EVP_MD_CTX *ctx, const void *data, size_t count)
+{
+	SHA2_CTX	*sha2_ctx = (SHA2_CTX *)ctx->md_data;
+	size_t		i;
+	uint32_t	buf_index, remaining_len;
+	const uint32_t	buf_limit = 128;
+	const uint8_t	*input = data;
+	size_t		block_count;
+
+	if (count == 0)
+		return (1);
+
+	/* compute number of bytes mod 128 */
+	buf_index = (sha2_ctx->count.c64[1] >> 3) & 0x7F;
+
+	/* update numb-g -xer of bits */
+	if ((sha2_ctx->count.c64[1] += (count << 3)) < (count << 3))
+		sha2_ctx->count.c64[0]++;
+
+	sha2_ctx->count.c64[0] += (count >> 29);
+
+	remaining_len = buf_limit - buf_index;
+
+	i = 0;
+	if (count >= remaining_len) {
+		if (buf_index) {
+			(void) memcpy(&sha2_ctx->buf_un.buf8[buf_index], input,
+			    remaining_len);
+			t4_sha512_multiblock(sha2_ctx, sha2_ctx->buf_un.buf8,
+			    1);
+			i = remaining_len;
+		}
+
+		block_count = (count - i) >> 7;
+		if (block_count > 0) {
+			t4_sha512_multiblock(sha2_ctx, &input[i], block_count);
+			i += block_count << 7;
+		}
+
+		if (count == i)
+			return (1);
+
+		buf_index = 0;
+	}
+
+	/* buffer remaining input */
+	(void) memcpy(&sha2_ctx->buf_un.buf8[buf_index], &input[i], count - i);
+	return (1);
+}
+
+
+/* End SHA512 digest operation, finalizing message digest and zeroing context */
+static int
+t4_digest_final_sha512(EVP_MD_CTX *ctx, unsigned char *md)
+{
+	SHA2_CTX	*sha2_ctx = (SHA2_CTX *)ctx->md_data;
+	uint8_t		bitcount_be64[sizeof (sha2_ctx->count.c64)];
+	uint32_t	index  = (sha2_ctx->count.c64[1] >> 3) & 0x7f;
+
+	t4_sha512_encode64(bitcount_be64, sha2_ctx->count.c64,
+	    sizeof (bitcount_be64));
+	(void) t4_digest_update_sha512(ctx, PADDING,
+	    ((index < 112) ? 112 : 240) - index);
+	(void) t4_digest_update_sha512(ctx, bitcount_be64,
+	    sizeof (bitcount_be64));
+	t4_sha512_encode64(md, sha2_ctx->state.s64,
+	    sizeof (sha2_ctx->state.s64));
+
+	/* zeroize sensitive information */
+	(void) memset(sha2_ctx, 0, sizeof (*sha2_ctx));
+
+	return (1);
+}
+
+#endif	/* !OPENSSL_NO_SHA */
+#endif	/* COMPILE_HW_T4 */
+#endif	/* !OPENSSL_NO_HW && !OPENSSL_NO_HW_MD_T4 */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/components/openssl/openssl-1.0.0/engines/t4/eng_t4_err.c	Wed Jul 20 10:20:13 2011 -0700
@@ -0,0 +1,121 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <openssl/err.h>
+#include "eng_t4_err.h"
+
+/* BEGIN ERROR CODES */
+
+#ifndef	OPENSSL_NO_ERR
+
+#define	ERR_FUNC(func)		ERR_PACK(0, func, 0)
+#define	ERR_REASON(reason)	ERR_PACK(0, 0, reason)
+
+static ERR_STRING_DATA t4_str_functs[] = {
+	{ERR_FUNC(T4_F_INIT),			"T4_INIT"},
+	{ERR_FUNC(T4_F_DESTROY),		"T4_DESTROY"},
+	{ERR_FUNC(T4_F_FINISH),			"T4_FINISH"},
+	{ERR_FUNC(T4_F_CIPHER_INIT_AES),	"T4_CIPHER_INIT_AES"},
+	{ERR_FUNC(T4_F_ADD_NID),		"T4_ADD_NID"},
+	{ERR_FUNC(T4_F_GET_ALL_CIPHERS),	"T4_GET_ALL_CIPHERS"},
+	{ERR_FUNC(T4_F_CIPHER_DO_AES),		"T4_CIPHER_DO_AES"},
+	{ERR_FUNC(T4_F_CIPHER_CLEANUP),		"T4_CIPHER_CLEANUP"},
+	{0, NULL}
+};
+
+static ERR_STRING_DATA t4_str_reasons[] = {
+	{ERR_REASON(T4_R_CIPHER_KEY),		"invalid cipher key"},
+	{ERR_REASON(T4_R_CIPHER_NID),		"invalid cipher NID"},
+	{ERR_REASON(T4_R_IV_LEN_INCORRECT),	"IV length incorrect"},
+	{ERR_REASON(T4_R_KEY_LEN_INCORRECT),	"key length incorrect"},
+	{ERR_REASON(T4_R_ASN1_OBJECT_CREATE),	"ASN1_OBJECT_create failed"},
+	{ERR_REASON(T4_R_NOT_BLOCKSIZE_LENGTH),	"blocksize length not even"},
+	{0, NULL}
+};
+#endif	/* OPENSSL_NO_ERR */
+
+
+#ifdef T4_LIB_NAME
+static ERR_STRING_DATA T4_lib_name[] = {
+	{0, T4_LIB_NAME},
+	{0, NULL}
+};
+#endif
+
+static int t4_error_code = 0;
+static int t4_error_init = 1;
+
+
+static void
+ERR_load_t4_strings(void)
+{
+	if (t4_error_code == 0)
+		t4_error_code = ERR_get_next_error_library();
+
+	if (t4_error_init != 0) {
+		t4_error_init = 0;
+
+#ifndef OPENSSL_NO_ERR
+		ERR_load_strings(t4_error_code, t4_str_functs);
+		ERR_load_strings(t4_error_code, t4_str_reasons);
+#endif
+
+#ifdef T4_LIB_NAME
+		T4_lib_name->error =
+		    ERR_PACK(t4_error_code, 0, 0);
+		ERR_load_strings(0, T4_lib_name);
+#endif
+	}
+}
+
+
+static void
+ERR_unload_t4_strings(void)
+{
+	if (t4_error_init == 0) {
+#ifndef OPENSSL_NO_ERR
+		ERR_unload_strings(t4_error_code, t4_str_functs);
+		ERR_unload_strings(t4_error_code, t4_str_reasons);
+#endif
+
+#ifdef T4_LIB_NAME
+		ERR_unload_strings(0, T4_lib_name);
+#endif
+		t4_error_init = 1;
+	}
+}
+
+
+static void
+ERR_t4_error(int function, int reason, char *file, int line)
+{
+	if (t4_error_init != 0) {
+		ERR_load_t4_strings();
+	}
+
+	if (t4_error_code == 0)
+		t4_error_code = ERR_get_next_error_library();
+	ERR_PUT_error(t4_error_code, function, reason, file, line);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/components/openssl/openssl-1.0.0/engines/t4/eng_t4_err.h	Wed Jul 20 10:20:13 2011 -0700
@@ -0,0 +1,61 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#ifndef	ENG_T4_ERR_H
+#define	ENG_T4_ERR_H
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+static void ERR_unload_t4_strings(void);
+#pragma inline(ERR_unload_t4_strings)
+static void ERR_t4_error(int function, int reason, char *file, int line);
+
+#define	T4err(f, r)	ERR_t4_error((f), (r), __FILE__, __LINE__)
+
+/* Function codes */
+#define	T4_F_INIT 				100
+#define	T4_F_DESTROY 				101
+#define	T4_F_FINISH				102
+#define	T4_F_CIPHER_INIT_AES			103
+#define	T4_F_ADD_NID				104
+#define	T4_F_GET_ALL_CIPHERS			105
+#define	T4_F_CIPHER_DO_AES			106
+#define	T4_F_CIPHER_CLEANUP			107
+
+/* Reason codes */
+#define	T4_R_CIPHER_KEY				100
+#define	T4_R_CIPHER_NID				101
+#define	T4_R_IV_LEN_INCORRECT			102
+#define	T4_R_KEY_LEN_INCORRECT			103
+#define	T4_R_ASN1_OBJECT_CREATE			104
+#define	T4_R_NOT_BLOCKSIZE_LENGTH		105
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* ENG_T4_ERR_H */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/components/openssl/openssl-1.0.0/engines/t4/t4_aes.S	Wed Jul 20 10:20:13 2011 -0700
@@ -0,0 +1,2662 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/*LINTLIBRARY*/
+
+#if defined(lint) || defined(__lint)
+
+
+#include <sys/types.h>
+
+/*ARGSUSED*/
+void t4_aes_expand128(uint64_t *rk, const uint32_t *key)
+{ return; }
+
+/*ARGSUSED*/
+void t4_aes_expand192(uint64_t *rk, const uint32_t *key)
+{ return; }
+
+/*ARGSUSED*/
+void t4_aes_expand256(uint64_t *rk, const uint32_t *key)
+{ return; }
+
+void t4_aes128_load_keys_for_encrypt(uint64_t *ks)
+{ return; }
+
+/*ARGSUSED*/
+void t4_aes192_load_keys_for_encrypt(uint64_t *ks)
+{ return; }
+
+/*ARGSUSED*/
+void t4_aes256_load_keys_for_encrypt(uint64_t *ks)
+{ return; }
+
+/*ARGSUSED*/
+void t4_aes128_ecb_encrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
+{ return; }
+
+/*ARGSUSED*/
+void t4_aes192_ecb_encrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
+{ return; }
+
+/*ARGSUSED*/
+void t4_aes256_ecb_encrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
+{ return; }
+
+/*ARGSUSED*/
+void t4_aes128_cbc_encrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
+{ return; }
+
+/*ARGSUSED*/
+void t4_aes192_cbc_encrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
+{ return; }
+
+/*ARGSUSED*/
+void t4_aes256_cbc_encrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
+{ return; }
+
+/*ARGSUSED*/
+void t4_aes128_ctr_crypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
+{ return; }
+
+/*ARGSUSED*/
+void t4_aes192_ctr_crypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
+{ return; }
+
+/*ARGSUSED*/
+void t4_aes256_ctr_crypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
+{ return; }
+
+/*ARGSUSED*/
+void t4_aes128_cfb128_encrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
+{ return; }
+
+/*ARGSUSED*/
+void t4_aes192_cfb128_encrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
+{ return; }
+
+/*ARGSUSED*/
+void t4_aes256_cfb128_encrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
+{ return; }
+
+void t4_aes128_load_keys_for_decrypt(uint64_t *ks)
+{ return; }
+
+/*ARGSUSED*/
+void t4_aes192_load_keys_for_decrypt(uint64_t *ks)
+{ return; }
+
+/*ARGSUSED*/
+void t4_aes256_load_keys_for_decrypt(uint64_t *ks)
+{ return; }
+
+/*ARGSUSED*/
+void t4_aes128_ecb_decrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
+{ return; }
+
+/*ARGSUSED*/
+void t4_aes192_ecb_decrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
+{ return; }
+
+/*ARGSUSED*/
+void t4_aes256_ecb_decrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
+{ return; }
+
+/*ARGSUSED*/
+void t4_aes128_cbc_decrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
+{ return; }
+
+/*ARGSUSED*/
+void t4_aes192_cbc_decrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
+{ return; }
+
+/*ARGSUSED*/
+void t4_aes256_cbc_decrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
+{ return; }
+
+/*ARGSUSED*/
+void t4_aes128_cfb128_decrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
+{ return; }
+
+/*ARGSUSED*/
+void t4_aes192_cfb128_decrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
+{ return; }
+
+/*ARGSUSED*/
+void t4_aes256_cfb128_decrypt(uint64_t *ks, uint64_t *asm_in,
+    uint64_t * asm_out, size_t amount_to_encrypt, uint64_t *iv)
+{ return; }
+
+#else	/* lint || __lint */
+
+#include<sys/asm_linkage.h>
+
+
+	ENTRY(t4_aes_expand128)
+
+!load key
+	ld	[%o1], %f0
+	ld	[%o1 + 0x4], %f1
+	ld	[%o1 + 0x8], %f2
+	ld	[%o1 + 0xc], %f3
+
+!expand the key
+	.byte	0x88, 0xc8, 0x01, 0x02
+	.byte	0x8d, 0xb0, 0xa6, 0x24
+	.byte	0x90, 0xc9, 0x03, 0x06
+	.byte	0x95, 0xb1, 0xa6, 0x28
+	.byte	0x98, 0xca, 0x05, 0x0a
+	.byte	0x9d, 0xb2, 0xa6, 0x2c
+	.byte	0xa0, 0xcb, 0x07, 0x0e
+	.byte	0xa5, 0xb3, 0xa6, 0x30
+	.byte	0xa8, 0xcc, 0x09, 0x12
+	.byte	0xad, 0xb4, 0xa6, 0x34
+	.byte	0xb0, 0xcd, 0x0b, 0x16
+	.byte	0xb5, 0xb5, 0xa6, 0x38
+	.byte	0xb8, 0xce, 0x0d, 0x1a
+	.byte	0xbd, 0xb6, 0xa6, 0x3c
+	.byte	0x82, 0xcf, 0x0f, 0x1e
+	.byte	0x87, 0xb7, 0xa6, 0x21
+	.byte	0x8a, 0xc8, 0x51, 0x03
+	.byte	0x8f, 0xb0, 0xe6, 0x25
+	.byte	0x92, 0xc9, 0x53, 0x07
+	.byte	0x97, 0xb1, 0xe6, 0x29
+
+!copy expanded key back into array
+	std	%f4, [%o0]
+	std	%f6, [%o0 + 0x8]
+	std	%f8, [%o0 + 0x10]
+	std	%f10, [%o0 + 0x18]
+	std	%f12, [%o0 + 0x20]
+	std	%f14, [%o0 + 0x28]
+	std	%f16, [%o0 + 0x30]
+	std	%f18, [%o0 + 0x38]
+	std	%f20, [%o0 + 0x40]
+	std	%f22, [%o0 + 0x48]
+	std	%f24, [%o0 + 0x50]
+	std	%f26, [%o0 + 0x58]
+	std	%f28, [%o0 + 0x60]
+	std	%f30, [%o0 + 0x68]
+	std	%f32, [%o0 + 0x70]
+	std	%f34, [%o0 + 0x78]
+	std	%f36, [%o0 + 0x80]
+	std	%f38, [%o0 + 0x88]
+	std	%f40, [%o0 + 0x90]
+	retl
+	std	%f42, [%o0 + 0x98]
+
+	SET_SIZE(t4_aes_expand128)
+
+
+	ENTRY(t4_aes_expand192)
+
+!load key
+	ld	[%o1], %f0
+	ld	[%o1 + 0x4], %f1
+	ld	[%o1 + 0x8], %f2
+	ld	[%o1 + 0xc], %f3
+	ld	[%o1 + 0x10], %f4
+	ld	[%o1 + 0x14], %f5
+
+!expand the key
+	.byte	0x8c, 0xc8, 0x01, 0x04
+	.byte	0x91, 0xb0, 0xa6, 0x26
+	.byte	0x95, 0xb1, 0x26, 0x28
+	.byte	0x98, 0xc9, 0x83, 0x0a
+	.byte	0x9d, 0xb2, 0x26, 0x2c
+	.byte	0xa1, 0xb2, 0xa6, 0x2e
+	.byte	0xa4, 0xcb, 0x05, 0x10
+	.byte	0xa9, 0xb3, 0xa6, 0x32
+	.byte	0xad, 0xb4, 0x26, 0x34
+	.byte	0xb0, 0xcc, 0x87, 0x16
+	.byte	0xb5, 0xb5, 0x26, 0x38
+	.byte	0xb9, 0xb5, 0xa6, 0x3a
+	.byte	0xbc, 0xce, 0x09, 0x1c
+	.byte	0x83, 0xb6, 0xa6, 0x3e
+	.byte	0x87, 0xb7, 0x26, 0x21
+	.byte	0x8a, 0xcf, 0x8b, 0x03
+	.byte	0x8f, 0xb0, 0x66, 0x25
+	.byte	0x93, 0xb0, 0xe6, 0x27
+	.byte	0x96, 0xc9, 0x4d, 0x09
+	.byte	0x9b, 0xb1, 0xe6, 0x2b
+	.byte	0x9f, 0xb2, 0x66, 0x2d
+	.byte	0xa2, 0xca, 0xcf, 0x0f
+	.byte	0xa7, 0xb3, 0x66, 0x31
+
+!copy expanded key back into array
+	std	%f6, [%o0]
+	std	%f8, [%o0 + 0x8]
+	std	%f10, [%o0 + 0x10]
+	std	%f12, [%o0 + 0x18]
+	std	%f14, [%o0 + 0x20]
+	std	%f16, [%o0 + 0x28]
+	std	%f18, [%o0 + 0x30]
+	std	%f20, [%o0 + 0x38]
+	std	%f22, [%o0 + 0x40]
+	std	%f24, [%o0 + 0x48]
+	std	%f26, [%o0 + 0x50]
+	std	%f28, [%o0 + 0x58]
+	std	%f30, [%o0 + 0x60]
+	std	%f32, [%o0 + 0x68]
+	std	%f34, [%o0 + 0x70]
+	std	%f36, [%o0 + 0x78]
+	std	%f38, [%o0 + 0x80]
+	std	%f40, [%o0 + 0x88]
+	std	%f42, [%o0 + 0x90]
+	std	%f44, [%o0 + 0x98]
+	std	%f46, [%o0 + 0xa0]
+	std	%f48, [%o0 + 0xa8]
+	retl
+	std	%f50, [%o0 + 0xb0]
+
+	SET_SIZE(t4_aes_expand192)
+
+
+	ENTRY(t4_aes_expand256)
+
+!load key
+	ld	[%o1], %f0
+	ld	[%o1 + 0x4], %f1
+	ld	[%o1 + 0x8], %f2
+	ld	[%o1 + 0xc], %f3
+	ld	[%o1 + 0x10], %f4
+	ld	[%o1 + 0x14], %f5
+	ld	[%o1 + 0x18], %f6
+	ld	[%o1 + 0x1c], %f7
+
+!expand the key
+	.byte	0x90, 0xc8, 0x01, 0x06
+	.byte	0x95, 0xb0, 0xa6, 0x28
+	.byte	0x99, 0xb1, 0x26, 0x0a
+	.byte	0x9d, 0xb1, 0xa6, 0x2c
+	.byte	0xa0, 0xca, 0x03, 0x0e
+	.byte	0xa5, 0xb2, 0xa6, 0x30
+	.byte	0xa9, 0xb3, 0x26, 0x12
+	.byte	0xad, 0xb3, 0xa6, 0x34
+	.byte	0xb0, 0xcc, 0x05, 0x16
+	.byte	0xb5, 0xb4, 0xa6, 0x38
+	.byte	0xb9, 0xb5, 0x26, 0x1a
+	.byte	0xbd, 0xb5, 0xa6, 0x3c
+	.byte	0x82, 0xce, 0x07, 0x1e
+	.byte	0x87, 0xb6, 0xa6, 0x21
+	.byte	0x8b, 0xb7, 0x26, 0x03
+	.byte	0x8f, 0xb7, 0xa6, 0x25
+	.byte	0x92, 0xc8, 0x49, 0x07
+	.byte	0x97, 0xb0, 0xe6, 0x29
+	.byte	0x9b, 0xb1, 0x66, 0x0b
+	.byte	0x9f, 0xb1, 0xe6, 0x2d
+	.byte	0xa2, 0xca, 0x4b, 0x0f
+	.byte	0xa7, 0xb2, 0xe6, 0x31
+	.byte	0xab, 0xb3, 0x66, 0x13
+	.byte	0xaf, 0xb3, 0xe6, 0x35
+	.byte	0xb2, 0xcc, 0x4d, 0x17
+	.byte	0xb7, 0xb4, 0xe6, 0x39
+
+!copy expanded key back into array
+	std	%f8, [%o0]
+	std	%f10, [%o0 + 0x8]
+	std	%f12, [%o0 + 0x10]
+	std	%f14, [%o0 + 0x18]
+	std	%f16, [%o0 + 0x20]
+	std	%f18, [%o0 + 0x28]
+	std	%f20, [%o0 + 0x30]
+	std	%f22, [%o0 + 0x38]
+	std	%f24, [%o0 + 0x40]
+	std	%f26, [%o0 + 0x48]
+	std	%f28, [%o0 + 0x50]
+	std	%f30, [%o0 + 0x58]
+	std	%f32, [%o0 + 0x60]
+	std	%f34, [%o0 + 0x68]
+	std	%f36, [%o0 + 0x70]
+	std	%f38, [%o0 + 0x78]
+	std	%f40, [%o0 + 0x80]
+	std	%f42, [%o0 + 0x88]
+	std	%f44, [%o0 + 0x90]
+	std	%f46, [%o0 + 0x98]
+	std	%f48, [%o0 + 0xa0]
+	std	%f50, [%o0 + 0xa8]
+	std	%f52, [%o0 + 0xb0]
+	std	%f54, [%o0 + 0xb8]
+	std	%f56, [%o0 + 0xc0]
+	retl
+	std	%f58, [%o0 + 0xc8]
+
+	SET_SIZE(t4_aes_expand256)
+
+
+#define	FIRST_TWO_EROUNDS \
+	.byte	0xb2, 0xc8, 0x3e, 0x1d ; \
+	.byte	0xb6, 0xc8, 0xbe, 0x3d ; \
+	.byte	0xba, 0xc9, 0x36, 0x19 ; \
+	.byte	0xbe, 0xc9, 0xb6, 0x39
+
+#define	MID_TWO_EROUNDS \
+	.byte	0xb2, 0xca, 0x3e, 0x1d ; \
+	.byte	0xb6, 0xca, 0xbe, 0x3d ; \
+	.byte	0xba, 0xcb, 0x36, 0x19 ; \
+	.byte	0xbe, 0xcb, 0xb6, 0x39
+
+#define	MID_TWO_EROUNDS_2 \
+	.byte	0x8c, 0xca, 0x04, 0x00 ; \
+	.byte	0x88, 0xca, 0x84, 0x20 ; \
+	.byte	0xb2, 0xca, 0x3e, 0x1d ; \
+	.byte	0xb6, 0xca, 0xbe, 0x3d ; \
+	.byte	0x80, 0xcb, 0x08, 0x06 ; \
+	.byte	0x84, 0xcb, 0x88, 0x26 ; \
+	.byte	0xba, 0xcb, 0x36, 0x19 ; \
+	.byte	0xbe, 0xcb, 0xb6, 0x39
+
+#define	TEN_EROUNDS \
+	.byte	0xb2, 0xcc, 0x3e, 0x1d ; \
+	.byte	0xb6, 0xcc, 0xbe, 0x3d ; \
+	.byte	0xba, 0xcd, 0x36, 0x19 ; \
+	.byte	0xbe, 0xcd, 0xb6, 0x39 ; \
+	.byte	0xb2, 0xce, 0x3e, 0x1d ; \
+	.byte	0xb6, 0xce, 0xbe, 0x3d ; \
+	.byte	0xba, 0xcf, 0x36, 0x19 ; \
+	.byte	0xbe, 0xcf, 0xb6, 0x39 ; \
+	.byte	0xb2, 0xc8, 0x7e, 0x1d ; \
+	.byte	0xb6, 0xc8, 0xfe, 0x3d ; \
+	.byte	0xba, 0xc9, 0x76, 0x19 ; \
+	.byte	0xbe, 0xc9, 0xf6, 0x39 ; \
+	.byte	0xb2, 0xca, 0x7e, 0x1d ; \
+	.byte	0xb6, 0xca, 0xfe, 0x3d ; \
+	.byte	0xba, 0xcb, 0x76, 0x19 ; \
+	.byte	0xbe, 0xcb, 0xf6, 0x39 ; \
+	.byte	0xb2, 0xcc, 0x7e, 0x1d ; \
+	.byte	0xb6, 0xcc, 0xfe, 0x3d ; \
+	.byte	0xba, 0xcd, 0x76, 0x99 ; \
+	.byte	0xbe, 0xcd, 0xf6, 0xb9
+
+#define	TEN_EROUNDS_2 \
+	.byte	0x8c, 0xcc, 0x04, 0x00 ; \
+	.byte	0x88, 0xcc, 0x84, 0x20 ; \
+	.byte	0xb2, 0xcc, 0x3e, 0x1d ; \
+	.byte	0xb6, 0xcc, 0xbe, 0x3d ; \
+	.byte	0x80, 0xcd, 0x08, 0x06 ; \
+	.byte	0x84, 0xcd, 0x88, 0x26 ; \
+	.byte	0xba, 0xcd, 0x36, 0x19 ; \
+	.byte	0xbe, 0xcd, 0xb6, 0x39 ; \
+	.byte	0x8c, 0xce, 0x04, 0x00 ; \
+	.byte	0x88, 0xce, 0x84, 0x20 ; \
+	.byte	0xb2, 0xce, 0x3e, 0x1d ; \
+	.byte	0xb6, 0xce, 0xbe, 0x3d ; \
+	.byte	0x80, 0xcf, 0x08, 0x06 ; \
+	.byte	0x84, 0xcf, 0x88, 0x26 ; \
+	.byte	0xba, 0xcf, 0x36, 0x19 ; \
+	.byte	0xbe, 0xcf, 0xb6, 0x39 ; \
+	.byte	0x8c, 0xc8, 0x44, 0x00 ; \
+	.byte	0x88, 0xc8, 0xc4, 0x20 ; \
+	.byte	0xb2, 0xc8, 0x7e, 0x1d ; \
+	.byte	0xb6, 0xc8, 0xfe, 0x3d ; \
+	.byte	0x80, 0xc9, 0x48, 0x06 ; \
+	.byte	0x84, 0xc9, 0xc8, 0x26 ; \
+	.byte	0xba, 0xc9, 0x76, 0x19 ; \
+	.byte	0xbe, 0xc9, 0xf6, 0x39 ; \
+	.byte	0x8c, 0xca, 0x44, 0x00 ; \
+	.byte	0x88, 0xca, 0xc4, 0x20 ; \
+	.byte	0xb2, 0xca, 0x7e, 0x1d ; \
+	.byte	0xb6, 0xca, 0xfe, 0x3d ; \
+	.byte	0x80, 0xcb, 0x48, 0x06 ; \
+	.byte	0x84, 0xcb, 0xc8, 0x26 ; \
+	.byte	0xba, 0xcb, 0x76, 0x19 ; \
+	.byte	0xbe, 0xcb, 0xf6, 0x39 ; \
+	.byte	0x8c, 0xcc, 0x44, 0x00 ; \
+	.byte	0x88, 0xcc, 0xc4, 0x20 ; \
+	.byte	0xb2, 0xcc, 0x7e, 0x1d ; \
+	.byte	0xb6, 0xcc, 0xfe, 0x3d ; \
+	.byte	0x80, 0xcd, 0x48, 0x86 ; \
+	.byte	0x84, 0xcd, 0xc8, 0xa6 ; \
+	.byte	0xba, 0xcd, 0x76, 0x99 ; \
+	.byte	0xbe, 0xcd, 0xf6, 0xb9
+
+#define	TWELVE_EROUNDS \
+	MID_TWO_EROUNDS	; \
+	TEN_EROUNDS
+
+#define	TWELVE_EROUNDS_2 \
+	MID_TWO_EROUNDS_2	; \
+	TEN_EROUNDS_2
+
+#define	FOURTEEN_EROUNDS \
+	FIRST_TWO_EROUNDS ; \
+	TWELVE_EROUNDS
+
+#define	FOURTEEN_EROUNDS_2 \
+	.byte	0xb0, 0xc8, 0x2c, 0x14 ; \
+	.byte	0xac, 0xc8, 0xac, 0x34 ; \
+	ldd	[%o0 + 0x60], %f20 ; \
+	.byte	0xb2, 0xc8, 0x3e, 0x1d ; \
+	.byte	0xb6, 0xc8, 0xbe, 0x3d ; \
+	.byte	0x80, 0xc9, 0x2c, 0x18 ; \
+	.byte	0x84, 0xc9, 0xac, 0x38 ;\
+	ldd	[%o0 + 0x68], %f22 ; \
+	.byte	0xba, 0xc9, 0x36, 0x19 ; \
+	ldd	[%o0 + 0x70], %f24 ; \
+	.byte	0xbe, 0xc9, 0xb6, 0x39 ; \
+	.byte	0x8c, 0xca, 0x04, 0x00 ; \
+	.byte	0x88, 0xca, 0x84, 0x20 ; \
+	.byte	0xb2, 0xca, 0x3e, 0x1d ; \
+	.byte	0xb6, 0xca, 0xbe, 0x3d ; \
+	.byte	0x80, 0xcb, 0x08, 0x06 ; \
+	.byte	0x84, 0xcb, 0x88, 0x26 ; \
+	.byte	0xba, 0xcb, 0x36, 0x19 ; \
+	.byte	0xbe, 0xcb, 0xb6, 0x39 ; \
+	.byte	0x8c, 0xcc, 0x04, 0x00 ; \
+	.byte	0x88, 0xcc, 0x84, 0x20 ; \
+	.byte	0xb2, 0xcc, 0x3e, 0x1d ; \
+	.byte	0xb6, 0xcc, 0xbe, 0x3d ; \
+	.byte	0x80, 0xcd, 0x08, 0x06 ; \
+	.byte	0x84, 0xcd, 0x88, 0x26 ; \
+	.byte	0xba, 0xcd, 0x36, 0x19 ; \
+	.byte	0xbe, 0xcd, 0xb6, 0x39 ; \
+	.byte	0x8c, 0xce, 0x04, 0x00 ; \
+	.byte	0x88, 0xce, 0x84, 0x20 ; \
+	.byte	0xb2, 0xce, 0x3e, 0x1d ; \
+	.byte	0xb6, 0xce, 0xbe, 0x3d ; \
+	.byte	0x80, 0xcf, 0x08, 0x06 ; \
+	.byte	0x84, 0xcf, 0x88, 0x26 ; \
+	.byte	0xba, 0xcf, 0x36, 0x19 ; \
+	.byte	0xbe, 0xcf, 0xb6, 0x39 ; \
+	.byte	0x8c, 0xc8, 0x44, 0x00 ; \
+	.byte	0x88, 0xc8, 0xc4, 0x20 ; \
+	.byte	0xb2, 0xc8, 0x7e, 0x1d ; \
+	.byte	0xb6, 0xc8, 0xfe, 0x3d ; \
+	.byte	0x80, 0xc9, 0x48, 0x06 ; \
+	.byte	0x84, 0xc9, 0xc8, 0x26 ; \
+	.byte	0xba, 0xc9, 0x76, 0x19 ; \
+	.byte	0xbe, 0xc9, 0xf6, 0x39 ; \
+	.byte	0x8c, 0xca, 0x44, 0x00 ; \
+	.byte	0x88, 0xca, 0xc4, 0x20 ; \
+	.byte	0xb2, 0xca, 0x7e, 0x1d ; \
+	.byte	0xb6, 0xca, 0xfe, 0x3d ; \
+	.byte	0x80, 0xcb, 0x48, 0x06 ; \
+	.byte	0x84, 0xcb, 0xc8, 0x26 ; \
+	.byte	0xba, 0xcb, 0x76, 0x19 ; \
+	.byte	0xbe, 0xcb, 0xf6, 0x39 ; \
+	.byte	0x8c, 0xcc, 0x44, 0x00 ; \
+	.byte	0x88, 0xcc, 0xc4, 0x20 ; \
+	ldd	[%o0 + 0x10], %f0 ; \
+	.byte	0xb2, 0xcc, 0x7e, 0x1d ; \
+	ldd	[%o0 + 0x18], %f2 ; \
+	.byte	0xb6, 0xcc, 0xfe, 0x3d ; \
+	.byte	0xa8, 0xcd, 0x48, 0x86 ; \
+	.byte	0xac, 0xcd, 0xc8, 0xa6 ; \
+	ldd	[%o0 + 0x20], %f4 ; \
+	.byte	0xba, 0xcd, 0x76, 0x99 ; \
+	ldd	[%o0 + 0x28], %f6 ; \
+	.byte	0xbe, 0xcd, 0xf6, 0xb9
+
+#define	FIRST_TWO_DROUNDS \
+	.byte	0xb2, 0xc8, 0x3e, 0x5d ; \
+	.byte	0xb6, 0xc8, 0xbe, 0x7d ; \
+	.byte	0xba, 0xc9, 0x36, 0x59 ; \
+	.byte	0xbe, 0xc9, 0xb6, 0x79
+
+#define	MID_TWO_DROUNDS \
+	.byte	0xb2, 0xca, 0x3e, 0x5d ; \
+	.byte	0xb6, 0xca, 0xbe, 0x7d ; \
+	.byte	0xba, 0xcb, 0x36, 0x59 ; \
+	.byte	0xbe, 0xcb, 0xb6, 0x79
+
+#define	MID_TWO_DROUNDS_2 \
+	.byte	0x8c, 0xca, 0x04, 0x40 ; \
+	.byte	0x88, 0xca, 0x84, 0x60 ; \
+	.byte	0xb2, 0xca, 0x3e, 0x5d ; \
+	.byte	0xb6, 0xca, 0xbe, 0x7d ; \
+	.byte	0x80, 0xcb, 0x08, 0x46 ; \
+	.byte	0x84, 0xcb, 0x88, 0x66 ; \
+	.byte	0xba, 0xcb, 0x36, 0x59 ; \
+	.byte	0xbe, 0xcb, 0xb6, 0x79
+
+#define	TEN_DROUNDS \
+	.byte	0xb2, 0xcc, 0x3e, 0x5d ; \
+	.byte	0xb6, 0xcc, 0xbe, 0x7d ; \
+	.byte	0xba, 0xcd, 0x36, 0x59 ; \
+	.byte	0xbe, 0xcd, 0xb6, 0x79 ; \
+	.byte	0xb2, 0xce, 0x3e, 0x5d ; \
+	.byte	0xb6, 0xce, 0xbe, 0x7d ; \
+	.byte	0xba, 0xcf, 0x36, 0x59 ; \
+	.byte	0xbe, 0xcf, 0xb6, 0x79 ; \
+	.byte	0xb2, 0xc8, 0x7e, 0x5d ; \
+	.byte	0xb6, 0xc8, 0xfe, 0x7d ; \
+	.byte	0xba, 0xc9, 0x76, 0x59 ; \
+	.byte	0xbe, 0xc9, 0xf6, 0x79 ; \
+	.byte	0xb2, 0xca, 0x7e, 0x5d ; \
+	.byte	0xb6, 0xca, 0xfe, 0x7d ; \
+	.byte	0xba, 0xcb, 0x76, 0x59 ; \
+	.byte	0xbe, 0xcb, 0xf6, 0x79 ; \
+	.byte	0xb2, 0xcc, 0x7e, 0x5d ; \
+	.byte	0xb6, 0xcc, 0xfe, 0x7d ; \
+	.byte	0xba, 0xcd, 0x76, 0xd9 ; \
+	.byte	0xbe, 0xcd, 0xf6, 0xf9
+
+#define	TEN_DROUNDS_2 \
+	.byte	0x8c, 0xcc, 0x04, 0x40 ; \
+	.byte	0x88, 0xcc, 0x84, 0x60 ; \
+	.byte	0xb2, 0xcc, 0x3e, 0x5d ; \
+	.byte	0xb6, 0xcc, 0xbe, 0x7d ; \
+	.byte	0x80, 0xcd, 0x08, 0x46 ; \
+	.byte	0x84, 0xcd, 0x88, 0x66 ; \
+	.byte	0xba, 0xcd, 0x36, 0x59 ; \
+	.byte	0xbe, 0xcd, 0xb6, 0x79 ; \
+	.byte	0x8c, 0xce, 0x04, 0x40 ; \
+	.byte	0x88, 0xce, 0x84, 0x60 ; \
+	.byte	0xb2, 0xce, 0x3e, 0x5d ; \
+	.byte	0xb6, 0xce, 0xbe, 0x7d ; \
+	.byte	0x80, 0xcf, 0x08, 0x46 ; \
+	.byte	0x84, 0xcf, 0x88, 0x66 ; \
+	.byte	0xba, 0xcf, 0x36, 0x59 ; \
+	.byte	0xbe, 0xcf, 0xb6, 0x79 ; \
+	.byte	0x8c, 0xc8, 0x44, 0x40 ; \
+	.byte	0x88, 0xc8, 0xc4, 0x60 ; \
+	.byte	0xb2, 0xc8, 0x7e, 0x5d ; \
+	.byte	0xb6, 0xc8, 0xfe, 0x7d ; \
+	.byte	0x80, 0xc9, 0x48, 0x46 ; \
+	.byte	0x84, 0xc9, 0xc8, 0x66 ; \
+	.byte	0xba, 0xc9, 0x76, 0x59 ; \
+	.byte	0xbe, 0xc9, 0xf6, 0x79 ; \
+	.byte	0x8c, 0xca, 0x44, 0x40 ; \
+	.byte	0x88, 0xca, 0xc4, 0x60 ; \
+	.byte	0xb2, 0xca, 0x7e, 0x5d ; \
+	.byte	0xb6, 0xca, 0xfe, 0x7d ; \
+	.byte	0x80, 0xcb, 0x48, 0x46 ; \
+	.byte	0x84, 0xcb, 0xc8, 0x66 ; \
+	.byte	0xba, 0xcb, 0x76, 0x59 ; \
+	.byte	0xbe, 0xcb, 0xf6, 0x79 ; \
+	.byte	0x8c, 0xcc, 0x44, 0x40 ; \
+	.byte	0x88, 0xcc, 0xc4, 0x60 ; \
+	.byte	0xb2, 0xcc, 0x7e, 0x5d ; \
+	.byte	0xb6, 0xcc, 0xfe, 0x7d ; \
+	.byte	0x80, 0xcd, 0x48, 0xc6 ; \
+	.byte	0x84, 0xcd, 0xc8, 0xe6 ; \
+	.byte	0xba, 0xcd, 0x76, 0xd9 ; \
+	.byte	0xbe, 0xcd, 0xf6, 0xf9
+
+#define	TWELVE_DROUNDS \
+	MID_TWO_DROUNDS	; \
+	TEN_DROUNDS
+
+#define	TWELVE_DROUNDS_2 \
+	MID_TWO_DROUNDS_2	; \
+	TEN_DROUNDS_2
+
+#define	FOURTEEN_DROUNDS \
+	FIRST_TWO_DROUNDS ; \
+	TWELVE_DROUNDS
+
+#define	FOURTEEN_DROUNDS_2 \
+	.byte	0xb0, 0xc8, 0x2c, 0x54 ; \
+	.byte	0xac, 0xc8, 0xac, 0x74 ; \
+	ldd	[%o0 + 0x80], %f20 ; \
+	.byte	0xb2, 0xc8, 0x3e, 0x5d ; \
+	.byte	0xb6, 0xc8, 0xbe, 0x7d ; \
+	.byte	0x80, 0xc9, 0x2c, 0x58 ; \
+	.byte	0x84, 0xc9, 0xac, 0x78 ; \
+	ldd	[%o0 + 0x88], %f22 ; \
+	.byte	0xba, 0xc9, 0x36, 0x59 ; \
+	ldd	[%o0 + 0x70], %f24 ; \
+	.byte	0xbe, 0xc9, 0xb6, 0x79 ; \
+	.byte	0x8c, 0xca, 0x04, 0x40 ; \
+	.byte	0x88, 0xca, 0x84, 0x60 ; \
+	.byte	0xb2, 0xca, 0x3e, 0x5d ; \
+	.byte	0xb6, 0xca, 0xbe, 0x7d ; \
+	.byte	0x80, 0xcb, 0x08, 0x46 ; \
+	.byte	0x84, 0xcb, 0x88, 0x66 ; \
+	.byte	0xba, 0xcb, 0x36, 0x59 ; \
+	.byte	0xbe, 0xcb, 0xb6, 0x79 ; \
+	.byte	0x8c, 0xcc, 0x04, 0x40 ; \
+	.byte	0x88, 0xcc, 0x84, 0x60 ; \
+	.byte	0xb2, 0xcc, 0x3e, 0x5d ; \
+	.byte	0xb6, 0xcc, 0xbe, 0x7d ; \
+	.byte	0x80, 0xcd, 0x08, 0x46 ; \
+	.byte	0x84, 0xcd, 0x88, 0x66 ; \
+	.byte	0xba, 0xcd, 0x36, 0x59 ; \
+	.byte	0xbe, 0xcd, 0xb6, 0x79 ; \
+	.byte	0x8c, 0xce, 0x04, 0x40 ; \
+	.byte	0x88, 0xce, 0x84, 0x60 ; \
+	.byte	0xb2, 0xce, 0x3e, 0x5d ; \
+	.byte	0xb6, 0xce, 0xbe, 0x7d ; \
+	.byte	0x80, 0xcf, 0x08, 0x46 ; \
+	.byte	0x84, 0xcf, 0x88, 0x66 ; \
+	.byte	0xba, 0xcf, 0x36, 0x59 ; \
+	.byte	0xbe, 0xcf, 0xb6, 0x79 ; \
+	.byte	0x8c, 0xc8, 0x44, 0x40 ; \
+	.byte	0x88, 0xc8, 0xc4, 0x60 ; \
+	.byte	0xb2, 0xc8, 0x7e, 0x5d ; \
+	.byte	0xb6, 0xc8, 0xfe, 0x7d ; \
+	.byte	0x80, 0xc9, 0x48, 0x46 ; \
+	.byte	0x84, 0xc9, 0xc8, 0x66 ; \
+	.byte	0xba, 0xc9, 0x76, 0x59 ; \
+	.byte	0xbe, 0xc9, 0xf6, 0x79 ; \
+	.byte	0x8c, 0xca, 0x44, 0x40 ; \
+	.byte	0x88, 0xca, 0xc4, 0x60 ; \
+	.byte	0xb2, 0xca, 0x7e, 0x5d ; \
+	.byte	0xb6, 0xca, 0xfe, 0x7d ; \
+	.byte	0x80, 0xcb, 0x48, 0x46 ; \
+	.byte	0x84, 0xcb, 0xc8, 0x66 ; \
+	.byte	0xba, 0xcb, 0x76, 0x59 ; \
+	.byte	0xbe, 0xcb, 0xf6, 0x79 ; \
+	.byte	0x8c, 0xcc, 0x44, 0x40 ; \
+	.byte	0x88, 0xcc, 0xc4, 0x60 ; \
+	ldd	[%o0 + 0xd0], %f0 ; \
+	.byte	0xb2, 0xcc, 0x7e, 0x5d ; \
+	ldd	[%o0 + 0xd8], %f2 ; \
+	.byte	0xb6, 0xcc, 0xfe, 0x7d ; \
+	.byte	0xa8, 0xcd, 0x48, 0xc6 ; \
+	.byte	0xac, 0xcd, 0xc8, 0xe6 ; \
+	ldd	[%o0 + 0xc0], %f4 ; \
+	.byte	0xba, 0xcd, 0x76, 0xd9 ; \
+	ldd	[%o0 + 0xc8], %f6 ; \
+	.byte	0xbe, 0xcd, 0xf6, 0xf9
+
+
+	ENTRY(t4_aes128_load_keys_for_encrypt)
+
+	ldd	[%o0 + 0x10], %f16
+	ldd	[%o0 + 0x18], %f18
+	ldd	[%o0 + 0x20], %f20
+	ldd	[%o0 + 0x28], %f22
+	ldd	[%o0 + 0x30], %f24
+	ldd	[%o0 + 0x38], %f26
+	ldd	[%o0 + 0x40], %f28
+	ldd	[%o0 + 0x48], %f30
+	ldd	[%o0 + 0x50], %f32
+	ldd	[%o0 + 0x58], %f34
+	ldd	[%o0 + 0x60], %f36
+	ldd	[%o0 + 0x68], %f38
+	ldd	[%o0 + 0x70], %f40
+	ldd	[%o0 + 0x78], %f42
+	ldd	[%o0 + 0x80], %f44
+	ldd	[%o0 + 0x88], %f46
+	ldd	[%o0 + 0x90], %f48
+	ldd	[%o0 + 0x98], %f50
+	ldd	[%o0 + 0xa0], %f52
+	retl
+	ldd	[%o0 + 0xa8], %f54
+
+	SET_SIZE(t4_aes128_load_keys_for_encrypt)
+
+
+	ENTRY(t4_aes192_load_keys_for_encrypt)
+
+	ldd	[%o0 + 0x10], %f8
+	ldd	[%o0 + 0x18], %f10
+	ldd	[%o0 + 0x20], %f12
+	ldd	[%o0 + 0x28], %f14
+	ldd	[%o0 + 0x30], %f16
+	ldd	[%o0 + 0x38], %f18
+	ldd	[%o0 + 0x40], %f20
+	ldd	[%o0 + 0x48], %f22
+	ldd	[%o0 + 0x50], %f24
+	ldd	[%o0 + 0x58], %f26
+	ldd	[%o0 + 0x60], %f28
+	ldd	[%o0 + 0x68], %f30
+	ldd	[%o0 + 0x70], %f32
+	ldd	[%o0 + 0x78], %f34
+	ldd	[%o0 + 0x80], %f36
+	ldd	[%o0 + 0x88], %f38
+	ldd	[%o0 + 0x90], %f40
+	ldd	[%o0 + 0x98], %f42
+	ldd	[%o0 + 0xa0], %f44
+	ldd	[%o0 + 0xa8], %f46
+	ldd	[%o0 + 0xb0], %f48
+	ldd	[%o0 + 0xb8], %f50
+	ldd	[%o0 + 0xc0], %f52
+	retl
+	ldd	[%o0 + 0xc8], %f54
+
+	SET_SIZE(t4_aes192_load_keys_for_encrypt)
+
+
+	ENTRY(t4_aes256_load_keys_for_encrypt)
+
+	ldd	[%o0 + 0x10], %f0
+	ldd	[%o0 + 0x18], %f2
+	ldd	[%o0 + 0x20], %f4
+	ldd	[%o0 + 0x28], %f6
+	ldd	[%o0 + 0x30], %f8
+	ldd	[%o0 + 0x38], %f10
+	ldd	[%o0 + 0x40], %f12
+	ldd	[%o0 + 0x48], %f14
+	ldd	[%o0 + 0x50], %f16
+	ldd	[%o0 + 0x58], %f18
+	ldd	[%o0 + 0x60], %f20
+	ldd	[%o0 + 0x68], %f22
+	ldd	[%o0 + 0x70], %f24
+	ldd	[%o0 + 0x78], %f26
+	ldd	[%o0 + 0x80], %f28
+	ldd	[%o0 + 0x88], %f30
+	ldd	[%o0 + 0x90], %f32
+	ldd	[%o0 + 0x98], %f34
+	ldd	[%o0 + 0xa0], %f36
+	ldd	[%o0 + 0xa8], %f38
+	ldd	[%o0 + 0xb0], %f40
+	ldd	[%o0 + 0xb8], %f42
+	ldd	[%o0 + 0xc0], %f44
+	ldd	[%o0 + 0xc8], %f46
+	ldd	[%o0 + 0xd0], %f48
+	ldd	[%o0 + 0xd8], %f50
+	ldd	[%o0 + 0xe0], %f52
+	retl
+	ldd	[%o0 + 0xe8], %f54
+
+	SET_SIZE(t4_aes256_load_keys_for_encrypt)
+
+
+#define	TEST_PARALLEL_ECB_ENCRYPT
+#ifdef	TEST_PARALLEL_ECB_ENCRYPT
+	ENTRY(t4_aes128_ecb_encrypt)
+
+	ldx	[%o0], %g1	! ks[0]
+	ldx	[%o0 + 8], %g2	! ks[1]
+	and	%o3, 16, %o4
+	brz	%o4, ecbenc128_loop
+	nop
+
+	ldx	[%o1], %g3	!input
+	ldx	[%o1 + 8], %g4	!input
+	xor	%g1, %g3, %g3	!input ^ ks[0-1]
+	xor	%g2, %g4, %g4	!input ^ ks[0-1]
+	movxtod	%g3, %f60
+	movxtod	%g4, %f62
+
+	TEN_EROUNDS
+
+	std	%f60, [%o2]
+	std	%f62, [%o2 + 8]
+
+	add	%o1, 16, %o1
+	subcc	%o3, 16, %o3
+	be	ecbenc128_loop_end
+	add	%o2, 16, %o2
+
+ecbenc128_loop:
+	ldx	[%o1], %g3	!input
+	ldx	[%o1 + 8], %g4	!input
+	xor	%g1, %g3, %g3	!input ^ ks[0-1]
+	xor	%g2, %g4, %g4	!input ^ ks[0-1]
+	movxtod	%g3, %f0
+	movxtod	%g4, %f2
+	ldx	[%o1 + 16], %g3	!input
+	ldx	[%o1 + 24], %g4	!input
+	xor	%g1, %g3, %g3	!input ^ ks[0-1]
+	xor	%g2, %g4, %g4	!input ^ ks[0-1]
+	movxtod	%g3, %f60
+	movxtod	%g4, %f62
+
+	TEN_EROUNDS_2
+
+	std	%f0, [%o2]
+	std	%f2, [%o2 + 8]
+
+	std	%f60, [%o2 + 16]
+	std	%f62, [%o2 + 24]
+
+	add	%o1, 32, %o1
+	subcc	%o3, 32, %o3
+	bne	ecbenc128_loop
+	add	%o2, 32, %o2
+ecbenc128_loop_end:
+	retl
+	nop
+
+	SET_SIZE(t4_aes128_ecb_encrypt)
+
+
+	ENTRY(t4_aes192_ecb_encrypt)
+
+	ldx	[%o0], %g1	! ks[0]
+	ldx	[%o0 + 8], %g2	! ks[1]
+	and	%o3, 16, %o4
+	brz	%o4, ecbenc192_loop
+	nop
+
+	ldx	[%o1], %g3	!input
+	ldx	[%o1 + 8], %g4	!input
+	xor	%g1, %g3, %g3	!input ^ ks[0-1]
+	xor	%g2, %g4, %g4	!input ^ ks[0-1]
+	movxtod	%g3, %f60
+	movxtod	%g4, %f62
+
+	TWELVE_EROUNDS
+
+	std	%f60, [%o2]
+	std	%f62, [%o2 + 8]
+
+	add	%o1, 16, %o1
+	subcc	%o3, 16, %o3
+	be	ecbenc192_loop_end
+	add	%o2, 16, %o2
+
+ecbenc192_loop:
+	ldx	[%o1], %g3	!input
+	ldx	[%o1 + 8], %g4	!input
+	xor	%g1, %g3, %g3	!input ^ ks[0-1]
+	xor	%g2, %g4, %g4	!input ^ ks[0-1]
+	movxtod	%g3, %f0
+	movxtod	%g4, %f2
+	ldx	[%o1 + 16], %g3	!input
+	ldx	[%o1 + 24], %g4	!input
+	xor	%g1, %g3, %g3	!input ^ ks[0-1]
+	xor	%g2, %g4, %g4	!input ^ ks[0-1]
+	movxtod	%g3, %f60
+	movxtod	%g4, %f62
+
+	TWELVE_EROUNDS_2
+
+	std	%f0, [%o2]
+	std	%f2, [%o2 + 8]
+
+	std	%f60, [%o2 + 16]
+	std	%f62, [%o2 + 24]
+
+	add	%o1, 32, %o1
+	subcc	%o3, 32, %o3
+	bne	ecbenc192_loop
+	add	%o2, 32, %o2
+ecbenc192_loop_end:
+	retl
+	nop
+
+	SET_SIZE(t4_aes192_ecb_encrypt)
+
+
+	ENTRY(t4_aes256_ecb_encrypt)
+
+	ldx	[%o0], %g1	! ks[0]
+	ldx	[%o0 + 8], %g2	! ks[1]
+	and	%o3, 16, %o4
+	brz	%o4, ecbenc256_loop
+	nop
+
+	ldx	[%o1], %g3	!input
+	ldx	[%o1 + 8], %g4	!input
+	xor	%g1, %g3, %g3	!input ^ ks[0-1]
+	xor	%g2, %g4, %g4	!input ^ ks[0-1]
+	movxtod	%g3, %f60
+	movxtod	%g4, %f62
+
+	FOURTEEN_EROUNDS
+
+	std	%f60, [%o2]
+	std	%f62, [%o2 + 8]
+
+	add	%o1, 16, %o1
+	subcc	%o3, 16, %o3
+	be	ecbenc256_loop_end
+	add	%o2, 16, %o2
+
+ecbenc256_loop:
+	ldx	[%o1], %g3	!input
+	ldx	[%o1 + 8], %g4	!input
+	xor	%g1, %g3, %g3	!input ^ ks[0-1]
+	xor	%g2, %g4, %g4	!input ^ ks[0-1]
+	movxtod	%g3, %f20
+	movxtod	%g4, %f22
+	ldx	[%o1 + 16], %g3	!input
+	ldx	[%o1 + 24], %g4	!input
+	xor	%g1, %g3, %g3	!input ^ ks[0-1]
+	xor	%g2, %g4, %g4	!input ^ ks[0-1]
+	movxtod	%g3, %f60
+	movxtod	%g4, %f62
+
+	FOURTEEN_EROUNDS_2
+
+	std	%f20, [%o2]
+	std	%f22, [%o2 + 8]
+
+	std	%f60, [%o2 + 16]
+	std	%f62, [%o2 + 24]
+
+	add	%o1, 32, %o1
+	subcc	%o3, 32, %o3
+	bne	ecbenc256_loop
+	add	%o2, 32, %o2
+
+	ldd	[%o0 + 0x60], %f20
+	ldd	[%o0 + 0x68], %f22 
+
+ecbenc256_loop_end:
+	retl
+	nop
+
+	SET_SIZE(t4_aes256_ecb_encrypt)
+
+#else
+
+	ENTRY(t4_aes128_ecb_encrypt)
+
+	ldx	[%o0], %g1	! ks[0]
+	ldx	[%o0 + 8], %g2	! ks[1]
+
+ecbenc128_loop:
+	ldx	[%o1], %g3	!input
+	ldx	[%o1 + 8], %g4	!input
+	xor	%g1, %g3, %g3	!input ^ ks[0-1]
+	xor	%g2, %g4, %g4	!input ^ ks[0-1]
+	movxtod	%g3, %f60
+	movxtod	%g4, %f62
+
+	TEN_EROUNDS
+
+	std	%f60, [%o2]
+	std	%f62, [%o2 + 8]
+
+	add	%o1, 16, %o1
+	subcc	%o3, 16, %o3
+	bne	ecbenc128_loop
+	add	%o2, 16, %o2
+
+	retl
+	nop
+
+	SET_SIZE(t4_aes128_ecb_encrypt)
+
+
+	ENTRY(t4_aes192_ecb_encrypt)
+
+	ldx	[%o0], %g1	! ks[0]
+	ldx	[%o0 + 8], %g2	! ks[1]
+
+ecbenc192_loop:
+	ldx	[%o1], %g3	!input
+	ldx	[%o1 + 8], %g4	!input
+	xor	%g1, %g3, %g3	!input ^ ks[0-1]
+	xor	%g2, %g4, %g4	!input ^ ks[0-1]
+	movxtod	%g3, %f60
+	movxtod	%g4, %f62
+
+	TWELVE_EROUNDS
+
+	std	%f60, [%o2]
+	std	%f62, [%o2 + 8]
+
+	add	%o1, 16, %o1
+	subcc	%o3, 16, %o3
+	bne	ecbenc192_loop
+	add	%o2, 16, %o2
+
+	retl
+	nop
+
+	SET_SIZE(t4_aes192_ecb_encrypt)
+
+
+	ENTRY(t4_aes256_ecb_encrypt)
+
+	ldx	[%o0], %g1	! ks[0]
+	ldx	[%o0 + 8], %g2	! ks[1]
+
+ecbenc256_loop:
+	ldx	[%o1], %g3	!input
+	ldx	[%o1 + 8], %g4	!input
+	xor	%g1, %g3, %g3	!input ^ ks[0-1]
+	xor	%g2, %g4, %g4	!input ^ ks[0-1]
+	movxtod	%g3, %f60
+	movxtod	%g4, %f62
+
+	FOURTEEN_EROUNDS
+
+	std	%f60, [%o2]
+	std	%f62, [%o2 + 8]
+
+	add	%o1, 16, %o1
+	subcc	%o3, 16, %o3
+	bne	ecbenc256_loop
+	add	%o2, 16, %o2
+
+	retl
+	nop
+
+	SET_SIZE(t4_aes256_ecb_encrypt)
+#endif
+
+
+	ENTRY(t4_aes128_cbc_encrypt)
+
+	ldd	[%o4], %f60	! IV
+	ldd	[%o4 +8], %f62	! IV
+	ldx	[%o0], %g1	! ks[0]
+	ldx	[%o0 + 8], %g2	! ks[1]
+
+cbcenc128_loop:
+	ldx	[%o1], %g3	!input
+	ldx	[%o1 + 8], %g4	!input
+	xor	%g1, %g3, %g3	!input ^ ks[0-1]
+	xor	%g2, %g4, %g4	!input ^ ks[0-1]
+	movxtod	%g3, %f56
+	movxtod	%g4, %f58
+	fxor	%f60, %f56, %f60
+	fxor	%f62, %f58, %f62
+
+	TEN_EROUNDS
+
+	std	%f60, [%o2]
+	std	%f62, [%o2 + 8]
+
+	add	%o1, 16, %o1
+	subcc	%o3, 16, %o3
+	bne	cbcenc128_loop
+	add	%o2, 16, %o2
+
+	std	%f60, [%o4]
+	retl
+	std	%f62, [%o4 + 8]
+
+	SET_SIZE(t4_aes128_cbc_encrypt)
+
+
+	ENTRY(t4_aes192_cbc_encrypt)
+
+	ldd	[%o4], %f60	! IV
+	ldd	[%o4 + 8], %f62	! IV
+	ldx	[%o0], %g1	! ks[0]
+	ldx	[%o0 + 8], %g2	! ks[1]
+
+cbcenc192_loop:
+	ldx	[%o1], %g3	!input
+	ldx	[%o1 + 8], %g4	!input
+	xor	%g1, %g3, %g3	!input ^ ks[0-1]
+	xor	%g2, %g4, %g4	!input ^ ks[0-1]
+	movxtod	%g3, %f56
+	movxtod	%g4, %f58
+	fxor	%f60, %f56, %f60
+	fxor	%f62, %f58, %f62
+
+	TWELVE_EROUNDS
+
+	std	%f60, [%o2]
+	std	%f62, [%o2 + 8]
+
+	add	%o1, 16, %o1
+	subcc	%o3, 16, %o3
+	bne	cbcenc192_loop
+	add	%o2, 16, %o2
+
+	std	%f60, [%o4]
+	retl
+	std	%f62, [%o4 + 8]
+
+	SET_SIZE(t4_aes192_cbc_encrypt)
+
+
+	ENTRY(t4_aes256_cbc_encrypt)
+
+	ldd	[%o4], %f60	! IV
+	ldd	[%o4 + 8], %f62	! IV
+	ldx	[%o0], %g1	! ks[0]
+	ldx	[%o0 + 8], %g2	! ks[1]
+
+cbcenc256_loop:
+	ldx	[%o1], %g3	!input
+	ldx	[%o1 + 8], %g4	!input
+	xor	%g1, %g3, %g3	!input ^ ks[0-1]
+	xor	%g2, %g4, %g4	!input ^ ks[0-1]
+	movxtod	%g3, %f56
+	movxtod	%g4, %f58
+	fxor	%f60, %f56, %f60
+	fxor	%f62, %f58, %f62
+
+	FOURTEEN_EROUNDS
+
+	std	%f60, [%o2]
+	std	%f62, [%o2 + 8]
+
+	add	%o1, 16, %o1
+	subcc	%o3, 16, %o3
+	bne	cbcenc256_loop
+	add	%o2, 16, %o2
+
+	std	%f60, [%o4]
+	retl
+	std	%f62, [%o4 + 8]
+
+	SET_SIZE(t4_aes256_cbc_encrypt)
+
+
+#define	 TEST_PARALLEL_CTR_CRYPT
+#ifdef	TEST_PARALLEL_CTR_CRYPT
+	ENTRY(t4_aes128_ctr_crypt)
+
+	ldx	[%o4], %g3	! IV
+	ldx	[%o4 +8], %g4	! IV
+	ldx	[%o0], %g1	! ks[0]
+	ldx	[%o0 + 8], %g2	! ks[1]
+	and	%o3, 16, %g5
+	brz, %g5, ctr128_loop
+
+	xor	%g1, %g3, %g5
+	movxtod	%g5, %f60
+	xor	%g2, %g4, %g5
+	movxtod	%g5, %f62
+	inc	%g4
+
+	TEN_EROUNDS
+
+	ldd	[%o1], %f56	!input
+	ldd	[%o1 + 8], %f58	!input
+	fxor	%f60, %f56, %f60
+	fxor	%f62, %f58, %f62
+	std	%f60, [%o2]
+	std	%f62, [%o2 + 8]
+
+	add	%o1, 16, %o1
+	subcc	%o3, 16, %o3
+	be	ctr128_loop_end
+	add	%o2, 16, %o2
+
+ctr128_loop:
+	xor	%g1, %g3, %g5
+	movxtod	%g5, %f0
+	xor	%g2, %g4, %g5
+	movxtod	%g5, %f2
+	inc	%g4
+
+	xor	%g1, %g3, %g5
+	movxtod	%g5, %f60
+	xor	%g2, %g4, %g5
+	movxtod	%g5, %f62
+	inc	%g4
+
+	TEN_EROUNDS_2
+
+	ldd	[%o1], %f6		!input
+	ldd	[%o1 + 8], %f4		!input
+	ldd	[%o1 + 16], %f56	!input
+	ldd	[%o1 + 24], %f58	!input
+	fxor	%f0, %f6, %f0
+	fxor	%f2, %f4, %f2
+	fxor	%f60, %f56, %f60
+	fxor	%f62, %f58, %f62
+	std	%f0, [%o2]
+	std	%f2, [%o2 + 8]
+	std	%f60, [%o2 + 16]
+	std	%f62, [%o2 + 24]
+
+	add	%o1, 32, %o1
+	subcc	%o3, 32, %o3
+	bne	ctr128_loop
+	add	%o2, 32, %o2
+
+ctr128_loop_end:
+	stx	%g3, [%o4]
+	retl
+	stx	%g4, [%o4 + 8]
+
+	SET_SIZE(t4_aes128_ctr_crypt)
+
+
+	ENTRY(t4_aes192_ctr_crypt)
+
+	ldx	[%o4], %g3	! IV
+	ldx	[%o4 +8], %g4	! IV
+	ldx	[%o0], %g1	! ks[0]
+	ldx	[%o0 + 8], %g2	! ks[1]
+	and	%o3, 16, %g5
+	brz, %g5, ctr192_loop
+
+	xor	%g1, %g3, %g5
+	movxtod	%g5, %f60
+	xor	%g2, %g4, %g5
+	movxtod	%g5, %f62
+	inc	%g4
+
+	TWELVE_EROUNDS
+
+	ldd	[%o1], %f56	!input
+	ldd	[%o1 + 8], %f58	!input
+	fxor	%f60, %f56, %f60
+	fxor	%f62, %f58, %f62
+	std	%f60, [%o2]
+	std	%f62, [%o2 + 8]
+
+	add	%o1, 16, %o1
+	subcc	%o3, 16, %o3
+	be	ctr192_loop_end
+	add	%o2, 16, %o2
+
+ctr192_loop:
+	xor	%g1, %g3, %g5
+	movxtod	%g5, %f0
+	xor	%g2, %g4, %g5
+	movxtod	%g5, %f2
+	inc	%g4
+
+	xor	%g1, %g3, %g5
+	movxtod	%g5, %f60
+	xor	%g2, %g4, %g5
+	movxtod	%g5, %f62
+	inc	%g4
+
+	TWELVE_EROUNDS_2
+
+	ldd	[%o1], %f6		!input
+	ldd	[%o1 + 8], %f4		!input
+	ldd	[%o1 + 16], %f56	!input
+	ldd	[%o1 + 24], %f58	!input
+	fxor	%f0, %f6, %f0
+	fxor	%f2, %f4, %f2
+	fxor	%f60, %f56, %f60
+	fxor	%f62, %f58, %f62
+	std	%f0, [%o2]
+	std	%f2, [%o2 + 8]
+	std	%f60, [%o2 + 16]
+	std	%f62, [%o2 + 24]
+
+	add	%o1, 32, %o1
+	subcc	%o3, 32, %o3
+	bne	ctr192_loop
+	add	%o2, 32, %o2
+
+ctr192_loop_end:
+	stx	%g3, [%o4]
+	retl
+	stx	%g4, [%o4 + 8]
+
+	SET_SIZE(t4_aes192_ctr_crypt)
+
+
+	ENTRY(t4_aes256_ctr_crypt)
+
+	ldx	[%o4], %g3	! IV
+	ldx	[%o4 +8], %g4	! IV
+	ldx	[%o0], %g1	! ks[0]
+	ldx	[%o0 + 8], %g2	! ks[1]
+	and	%o3, 16, %g5
+	brz,	%g5, ctr256_loop
+
+	xor	%g1, %g3, %g5
+	movxtod	%g5, %f60
+	xor	%g2, %g4, %g5
+	movxtod	%g5, %f62
+	inc	%g4
+
+	FOURTEEN_EROUNDS
+
+	ldd	[%o1], %f56	!input
+	ldd	[%o1 + 8], %f58	!input
+	fxor	%f60, %f56, %f60
+	fxor	%f62, %f58, %f62
+	std	%f60, [%o2]
+	std	%f62, [%o2 + 8]
+
+	add	%o1, 16, %o1
+	subcc	%o3, 16, %o3
+	be	ctr256_loop_end
+	add	%o2, 16, %o2
+
+ctr256_loop:
+	xor	%g1, %g3, %g5
+	movxtod	%g5, %f20
+	xor	%g2, %g4, %g5
+	movxtod	%g5, %f22
+	inc	%g4
+
+	xor	%g1, %g3, %g5
+	movxtod	%g5, %f60
+	xor	%g2, %g4, %g5
+	movxtod	%g5, %f62
+	inc	%g4
+
+	FOURTEEN_EROUNDS_2
+
+	ldd	[%o1], %f56		!input
+	ldd	[%o1 + 8], %f58		!input
+	fxor	%f20, %f56, %f20
+	fxor	%f22, %f58, %f22
+	ldd	[%o1 + 16], %f56	!input
+	ldd	[%o1 + 24], %f58	!input
+	fxor	%f60, %f56, %f60
+	fxor	%f62, %f58, %f62
+	std	%f20, [%o2]
+	std	%f22, [%o2 + 8]
+	std	%f60, [%o2 + 16]
+	std	%f62, [%o2 + 24]
+
+	add	%o1, 32, %o1
+	subcc	%o3, 32, %o3
+	bne	ctr256_loop
+	add	%o2, 32, %o2
+
+	ldd	[%o0 + 0x60], %f20
+	ldd	[%o0 + 0x68], %f22
+
+ctr256_loop_end:
+	stx	%g3, [%o4]
+	retl
+	stx	%g4, [%o4 + 8]
+
+	SET_SIZE(t4_aes256_ctr_crypt)
+
+#else
+
+	ENTRY(t4_aes128_ctr_crypt)
+
+	ldx	[%o4], %g3	! IV
+	ldx	[%o4 +8], %g4	! IV
+	ldx	[%o0], %g1	! ks[0]
+	ldx	[%o0 + 8], %g2	! ks[1]
+
+ctr128_loop:
+	xor	%g1, %g3, %g5
+	movxtod	%g5, %f60
+	xor	%g2, %g4, %g5
+	movxtod	%g5, %f62
+	inc	%g4
+
+	TEN_EROUNDS
+
+	ldd	[%o1], %f56	!input
+	ldd	[%o1 + 8], %f58	!input
+	fxor	%f60, %f56, %f60
+	fxor	%f62, %f58, %f62
+	std	%f60, [%o2]
+	std	%f62, [%o2 + 8]
+
+	add	%o1, 16, %o1
+	subcc	%o3, 16, %o3
+	bne	ctr128_loop
+	add	%o2, 16, %o2
+
+	stx	%g3, [%o4]
+	retl
+	stx	%g4, [%o4 + 8]
+
+	SET_SIZE(t4_aes128_ctr_crypt)
+
+	ENTRY(t4_aes192_ctr_crypt)
+
+	ldx	[%o4], %g3	! IV
+	ldx	[%o4 +8], %g4	! IV
+	ldx	[%o0], %g1	! ks[0]
+	ldx	[%o0 + 8], %g2	! ks[1]
+
+ctr192_loop:
+	xor	%g1, %g3, %g5
+	movxtod	%g5, %f60
+	xor	%g2, %g4, %g5
+	movxtod	%g5, %f62
+	inc	%g4
+
+	TWELVE_EROUNDS
+
+	ldd	[%o1], %f56	!input
+	ldd	[%o1 + 8], %f58	!input
+	fxor	%f60, %f56, %f60
+	fxor	%f62, %f58, %f62
+	std	%f60, [%o2]
+	std	%f62, [%o2 + 8]
+
+	add	%o1, 16, %o1
+	subcc	%o3, 16, %o3
+	bne	ctr192_loop
+	add	%o2, 16, %o2
+
+	stx	%g3, [%o4]
+	retl
+	stx	%g4, [%o4 + 8]
+
+	SET_SIZE(t4_aes192_ctr_crypt)
+
+
+	ENTRY(t4_aes256_ctr_crypt)
+
+	ldx	[%o4], %g3	! IV
+	ldx	[%o4 +8], %g4	! IV
+	ldx	[%o0], %g1	! ks[0]
+	ldx	[%o0 + 8], %g2	! ks[1]
+
+ctr256_loop:
+	xor	%g1, %g3, %g5
+	movxtod	%g5, %f60
+	xor	%g2, %g4, %g5
+	movxtod	%g5, %f62
+	inc	%g4
+
+	FOURTEEN_EROUNDS
+
+	ldd	[%o1], %f56	!input
+	ldd	[%o1 + 8], %f58	!input
+	fxor	%f60, %f56, %f60
+	fxor	%f62, %f58, %f62
+	std	%f60, [%o2]
+	std	%f62, [%o2 + 8]
+
+	add	%o1, 16, %o1
+	subcc	%o3, 16, %o3
+	bne	ctr256_loop
+	add	%o2, 16, %o2
+
+	stx	%g3, [%o4]
+	retl
+	stx	%g4, [%o4 + 8]
+
+	SET_SIZE(t4_aes256_ctr_crypt)
+
+#endif
+
+	ENTRY(t4_aes128_cfb128_encrypt)
+
+	ldd	[%o4], %f60	! IV
+	ldd	[%o4 +8], %f62	! IV
+	ldx	[%o0], %g1	! ks[0]
+	ldx	[%o0 + 8], %g2	! ks[1]
+
+cfb128_128_loop:
+	movxtod	%g1, %f56
+	movxtod	%g2, %f58
+	fxor	%f60, %f56, %f60
+	fxor	%f62, %f58, %f62
+
+	TEN_EROUNDS
+
+	ldd	[%o1], %f56	!input
+	ldd	[%o1 + 8], %f58	!input
+	fxor	%f60, %f56, %f60
+	fxor	%f62, %f58, %f62
+
+	std	%f60, [%o2]
+	std	%f62, [%o2 + 8]
+
+	add	%o1, 16, %o1
+	subcc	%o3, 16, %o3
+	bne	cfb128_128_loop
+	add	%o2, 16, %o2
+
+	std	%f60, [%o4]
+	retl
+	std	%f62, [%o4 + 8]
+
+	SET_SIZE(t4_aes128_cfb128_encrypt)
+
+
+	ENTRY(t4_aes192_cfb128_encrypt)
+
+	ldd	[%o4], %f60	! IV
+	ldd	[%o4 +8], %f62	! IV
+	ldx	[%o0], %g1	! ks[0]
+	ldx	[%o0 + 8], %g2	! ks[1]
+
+cfb128_192_loop:
+	movxtod	%g1, %f56
+	movxtod	%g2, %f58
+	fxor	%f60, %f56, %f60
+	fxor	%f62, %f58, %f62
+
+	TWELVE_EROUNDS
+
+	ldd	[%o1], %f56	!input
+	ldd	[%o1 + 8], %f58	!input
+	fxor	%f60, %f56, %f60
+	fxor	%f62, %f58, %f62
+
+	std	%f60, [%o2]
+	std	%f62, [%o2 + 8]
+
+	add	%o1, 16, %o1
+	subcc	%o3, 16, %o3
+	bne	cfb128_192_loop
+	add	%o2, 16, %o2
+
+	std	%f60, [%o4]
+	retl
+	std	%f62, [%o4 + 8]
+
+	SET_SIZE(t4_aes192_cfb128_encrypt)
+
+
+	ENTRY(t4_aes256_cfb128_encrypt)
+
+	ldd	[%o4], %f60	! IV
+	ldd	[%o4 +8], %f62	! IV
+	ldx	[%o0], %g1	! ks[0]
+	ldx	[%o0 + 8], %g2	! ks[1]
+
+cfb128_256_loop:
+	movxtod	%g1, %f56
+	movxtod	%g2, %f58
+	fxor	%f60, %f56, %f60
+	fxor	%f62, %f58, %f62
+
+	FOURTEEN_EROUNDS
+
+	ldd	[%o1], %f56	!input
+	ldd	[%o1 + 8], %f58	!input
+	fxor	%f60, %f56, %f60
+	fxor	%f62, %f58, %f62
+
+	std	%f60, [%o2]
+	std	%f62, [%o2 + 8]
+
+	add	%o1, 16, %o1
+	subcc	%o3, 16, %o3
+	bne	cfb128_256_loop
+	add	%o2, 16, %o2
+
+	std	%f60, [%o4]
+	retl
+	std	%f62, [%o4 + 8]
+
+	SET_SIZE(t4_aes256_cfb128_encrypt)
+
+
+	ENTRY(t4_aes128_load_keys_for_decrypt)
+
+	ldd	[%o0], %f52
+	ldd	[%o0 + 0x8], %f54
+	ldd	[%o0 + 0x10], %f48
+	ldd	[%o0 + 0x18], %f50
+	ldd	[%o0 + 0x20], %f44
+	ldd	[%o0 + 0x28], %f46
+	ldd	[%o0 + 0x30], %f40
+	ldd	[%o0 + 0x38], %f42
+	ldd	[%o0 + 0x40], %f36
+	ldd	[%o0 + 0x48], %f38
+	ldd	[%o0 + 0x50], %f32
+	ldd	[%o0 + 0x58], %f34
+	ldd	[%o0 + 0x60], %f28
+	ldd	[%o0 + 0x68], %f30
+	ldd	[%o0 + 0x70], %f24
+	ldd	[%o0 + 0x78], %f26
+	ldd	[%o0 + 0x80], %f20
+	ldd	[%o0 + 0x88], %f22
+	ldd	[%o0 + 0x90], %f16
+	retl
+	ldd	[%o0 + 0x98], %f18
+
+	SET_SIZE(t4_aes128_load_keys_for_decrypt)
+
+
+	ENTRY(t4_aes192_load_keys_for_decrypt)
+
+	ldd	[%o0], %f52
+	ldd	[%o0 + 0x8], %f54
+	ldd	[%o0 + 0x10], %f48
+	ldd	[%o0 + 0x18], %f50
+	ldd	[%o0 + 0x20], %f44
+	ldd	[%o0 + 0x28], %f46
+	ldd	[%o0 + 0x30], %f40
+	ldd	[%o0 + 0x38], %f42
+	ldd	[%o0 + 0x40], %f36
+	ldd	[%o0 + 0x48], %f38
+	ldd	[%o0 + 0x50], %f32
+	ldd	[%o0 + 0x58], %f34
+	ldd	[%o0 + 0x60], %f28
+	ldd	[%o0 + 0x68], %f30
+	ldd	[%o0 + 0x70], %f24
+	ldd	[%o0 + 0x78], %f26
+	ldd	[%o0 + 0x80], %f20
+	ldd	[%o0 + 0x88], %f22
+	ldd	[%o0 + 0x90], %f16
+	ldd	[%o0 + 0x98], %f18
+	ldd	[%o0 + 0xa0], %f12
+	ldd	[%o0 + 0xa8], %f14
+	ldd	[%o0 + 0xb0], %f8
+	retl
+	ldd	[%o0 + 0xb8], %f10
+
+	SET_SIZE(t4_aes192_load_keys_for_decrypt)
+
+
+	ENTRY(t4_aes256_load_keys_for_decrypt)
+
+
+	ldd	[%o0], %f52
+	ldd	[%o0 + 0x8], %f54
+	ldd	[%o0 + 0x10], %f48
+	ldd	[%o0 + 0x18], %f50
+	ldd	[%o0 + 0x20], %f44
+	ldd	[%o0 + 0x28], %f46
+	ldd	[%o0 + 0x30], %f40
+	ldd	[%o0 + 0x38], %f42
+	ldd	[%o0 + 0x40], %f36
+	ldd	[%o0 + 0x48], %f38
+	ldd	[%o0 + 0x50], %f32
+	ldd	[%o0 + 0x58], %f34
+	ldd	[%o0 + 0x60], %f28
+	ldd	[%o0 + 0x68], %f30
+	ldd	[%o0 + 0x70], %f24
+	ldd	[%o0 + 0x78], %f26
+	ldd	[%o0 + 0x80], %f20
+	ldd	[%o0 + 0x88], %f22
+	ldd	[%o0 + 0x90], %f16
+	ldd	[%o0 + 0x98], %f18
+	ldd	[%o0 + 0xa0], %f12
+	ldd	[%o0 + 0xa8], %f14
+	ldd	[%o0 + 0xb0], %f8
+	ldd	[%o0 + 0xb8], %f10
+	ldd	[%o0 + 0xc0], %f4
+	ldd	[%o0 + 0xc8], %f6
+	ldd	[%o0 + 0xd0], %f0
+	retl
+	ldd	[%o0 + 0xd8], %f2
+
+	SET_SIZE(t4_aes256_load_keys_for_decrypt)
+
+
+#define	 TEST_PARALLEL_ECB_DECRYPT
+#ifdef	TEST_PARALLEL_ECB_DECRYPT
+	ENTRY(t4_aes128_ecb_decrypt)
+
+	ldx	[%o0 + 0xa0], %g1	!ks[last-1]
+	ldx	[%o0 + 0xa8], %g2	!ks[last]
+	and	%o3, 16, %o4
+	brz	%o4, ecbdec128_loop
+	nop
+
+	ldx	[%o1], %o4
+	ldx	[%o1 + 8], %o5
+	xor	%g1, %o4, %g3	!initial ARK
+	movxtod	%g3, %f60
+	xor	%g2, %o5, %g3	!initial ARK
+	movxtod	%g3, %f62
+
+	TEN_DROUNDS
+
+	std	%f60, [%o2]
+	std	%f62, [%o2 + 0x8]
+
+	add	%o1, 16, %o1
+	subcc	%o3, 16, %o3
+	be	ecbdec128_loop_end
+	add	%o2, 16, %o2
+
+ecbdec128_loop:
+	ldx	[%o1], %o4
+	ldx	[%o1 + 8], %o5
+	xor	%g1, %o4, %g3	!initial ARK
+	movxtod	%g3, %f0
+	xor	%g2, %o5, %g3	!initial ARK
+	movxtod	%g3, %f2
+	ldx	[%o1 + 16], %o4
+	ldx	[%o1 + 24], %o5
+	xor	%g1, %o4, %g3	!initial ARK
+	movxtod	%g3, %f60
+	xor	%g2, %o5, %g3	!initial ARK
+	movxtod	%g3, %f62
+
+	TEN_DROUNDS_2
+
+	std	%f0, [%o2]
+	std	%f2, [%o2 + 8]
+	std	%f60, [%o2 + 16]
+	std	%f62, [%o2 + 24]
+
+	add	%o1, 32, %o1
+	subcc	%o3, 32, %o3
+	bne	ecbdec128_loop
+	add	%o2, 32, %o2
+ecbdec128_loop_end:
+
+	retl
+	nop
+
+	SET_SIZE(t4_aes128_ecb_decrypt)
+
+	ENTRY(t4_aes192_ecb_decrypt)
+
+	ldx	[%o0 + 0xc0], %g1	!ks[last-1]
+	ldx	[%o0 + 0xc8], %g2	!ks[last]
+	and	%o3, 16, %o4
+	brz	%o4, ecbdec192_loop
+	nop
+
+	ldx	[%o1], %o4
+	ldx	[%o1 + 8], %o5
+	xor	%g1, %o4, %g3	!initial ARK
+	movxtod	%g3, %f60
+	xor	%g2, %o5, %g3	!initial ARK
+	movxtod	%g3, %f62
+
+	TWELVE_DROUNDS
+
+	std	%f60, [%o2]
+	std	%f62, [%o2 + 0x8]
+
+	add	%o1, 16, %o1
+	subcc	%o3, 16, %o3
+	be	ecbdec192_loop_end
+	add	%o2, 16, %o2
+
+ecbdec192_loop:
+	ldx	[%o1], %o4
+	ldx	[%o1 + 8], %o5
+	xor	%g1, %o4, %g3	!initial ARK
+	movxtod	%g3, %f0
+	xor	%g2, %o5, %g3	!initial ARK
+	movxtod	%g3, %f2
+	ldx	[%o1 + 16], %o4
+	ldx	[%o1 + 24], %o5
+	xor	%g1, %o4, %g3	!initial ARK
+	movxtod	%g3, %f60
+	xor	%g2, %o5, %g3	!initial ARK
+	movxtod	%g3, %f62
+
+	TWELVE_DROUNDS_2
+
+	std	%f0, [%o2]
+	std	%f2, [%o2 + 8]
+	std	%f60, [%o2 + 16]
+	std	%f62, [%o2 + 24]
+
+	add	%o1, 32, %o1
+	subcc	%o3, 32, %o3
+	bne	ecbdec192_loop
+	add	%o2, 32, %o2
+ecbdec192_loop_end:
+
+	retl
+	nop
+
+	SET_SIZE(t4_aes192_ecb_decrypt)
+
+
+	ENTRY(t4_aes256_ecb_decrypt)
+
+	ldx	[%o0 + 0xe0], %g1	!ks[last-1]
+	ldx	[%o0 + 0xe8], %g2	!ks[last]
+	and	%o3, 16, %o4
+	brz	%o4, ecbdec256_loop
+	nop
+
+	ldx	[%o1], %o4
+	ldx	[%o1 + 8], %o5
+	xor	%g1, %o4, %g3	!initial ARK
+	movxtod	%g3, %f60
+	xor	%g2, %o5, %g3	!initial ARK
+	movxtod	%g3, %f62
+
+	FOURTEEN_DROUNDS
+
+	std	%f60, [%o2]
+	std	%f62, [%o2 + 0x8]
+
+	add	%o1, 16, %o1
+	subcc	%o3, 16, %o3
+	be	ecbdec256_loop_end
+	add	%o2, 16, %o2
+
+ecbdec256_loop:
+	ldx	[%o1], %o4
+	ldx	[%o1 + 8], %o5
+	xor	%g1, %o4, %g3	!initial ARK
+	movxtod	%g3, %f20
+	xor	%g2, %o5, %g3	!initial ARK
+	movxtod	%g3, %f22
+	ldx	[%o1 + 16], %o4
+	ldx	[%o1 + 24], %o5
+	xor	%g1, %o4, %g3	!initial ARK
+	movxtod	%g3, %f60
+	xor	%g2, %o5, %g3	!initial ARK
+	movxtod	%g3, %f62
+
+	FOURTEEN_DROUNDS_2
+
+	std	%f20, [%o2]
+	std	%f22, [%o2 + 8]
+	std	%f60, [%o2 + 16]
+	std	%f62, [%o2 + 24]
+
+	add	%o1, 32, %o1
+	subcc	%o3, 32, %o3
+	bne	ecbdec256_loop
+	add	%o2, 32, %o2
+
+	ldd	[%o0 + 0x80], %f20
+	ldd	[%o0 + 0x88], %f22 
+
+ecbdec256_loop_end:
+
+	retl
+	nop
+
+	SET_SIZE(t4_aes256_ecb_decrypt)
+
+#else
+
+	ENTRY(t4_aes128_ecb_decrypt)
+
+	ldx	[%o0 + 0xa0], %g1	!ks[last-1]
+	ldx	[%o0 + 0xa8], %g2	!ks[last]
+
+ecbdec128_loop:
+	ldx	[%o1], %o4
+	ldx	[%o1 + 8], %o5
+	xor	%g1, %o4, %g3	!initial ARK
+	movxtod	%g3, %f60
+	xor	%g2, %o5, %g3	!initial ARK
+	movxtod	%g3, %f62
+
+	TEN_DROUNDS
+
+	std	%f60, [%o2]
+	std	%f62, [%o2 + 0x8]
+
+	add	%o1, 16, %o1
+	subcc	%o3, 16, %o3
+	bne	ecbdec128_loop
+	add	%o2, 16, %o2
+
+	retl
+	nop
+
+	SET_SIZE(t4_aes128_ecb_decrypt)
+
+
+	ENTRY(t4_aes192_ecb_decrypt)
+
+	ldx	[%o0 + 0xc0], %g1	!ks[last-1]
+	ldx	[%o0 + 0xc8], %g2	!ks[last]
+
+ecbdec192_loop:
+	ldx	[%o1], %o4
+	ldx	[%o1 + 8], %o5
+	xor	%g1, %o4, %g3	!initial ARK
+	movxtod	%g3, %f60
+	xor	%g2, %o5, %g3	!initial ARK
+	movxtod	%g3, %f62
+
+	TWELVE_DROUNDS
+
+	std	%f60, [%o2]
+	std	%f62, [%o2 + 0x8]
+
+	add	%o1, 16, %o1
+	subcc	%o3, 16, %o3
+	bne	ecbdec192_loop
+	add	%o2, 16, %o2
+
+	retl
+	nop
+
+	SET_SIZE(t4_aes192_ecb_decrypt)
+
+
+	ENTRY(t4_aes256_ecb_decrypt)
+
+	ldx	[%o0 + 0xe0], %g1	!ks[last-1]
+	ldx	[%o0 + 0xe8], %g2	!ks[last]
+
+ecbdec256_loop:
+	ldx	[%o1], %o4
+	ldx	[%o1 + 8], %o5
+	xor	%g1, %o4, %g3	!initial ARK
+	movxtod	%g3, %f60
+	xor	%g2, %o5, %g3	!initial ARK
+	movxtod	%g3, %f62
+
+	FOURTEEN_DROUNDS
+
+	std	%f60, [%o2]
+	std	%f62, [%o2 + 0x8]
+
+	add	%o1, 16, %o1
+	subcc	%o3, 16, %o3
+	bne	ecbdec256_loop
+	add	%o2, 16, %o2
+
+	retl
+	nop
+
+	SET_SIZE(t4_aes256_ecb_decrypt)
+
+#endif
+
+#define	TEST_PARALLEL_CBC_DECRYPT
+#ifdef	EST_PARALLEL_CBC_DECRYPT
+		ENTRY(t4_aes128_cbc_decrypt)
+
+	save	%sp, -SA(MINFRAME), %sp
+	ldx	[%i4], %o0		!IV
+	ldx	[%i4 + 8], %o1		!IV
+	ldx	[%i0 + 0xa0], %o2	!ks[last-1]
+	ldx	[%i0 + 0xa8], %o3	!ks[last]
+	and	%i3, 16, %o4
+	brz	%o4, cbcdec128_loop
+	nop
+
+	ldx	[%i1], %o4
+	ldx	[%i1 + 8], %o5
+	xor	%o2, %o4, %i5	!initial ARK
+	movxtod	%i5, %f60
+	xor	%o3, %o5, %i5	!initial ARK
+	movxtod	%i5, %f62
+
+	TEN_DROUNDS
+
+	movxtod	%o0, %f56
+	movxtod	%o1, %f58
+	mov	%o4, %o0	!save last block as next IV
+	mov	%o5, %o1
+	fxor	%f56, %f60, %f60	!add in previous IV
+	fxor	%f58, %f62, %f62
+
+	std	%f60, [%i2]
+	std	%f62, [%i2 + 0x8]
+
+	add	%i1, 16, %i1
+	subcc	%i3, 16, %i3
+	be	cbcdec128_loop_end
+	add	%i2, 16, %i2
+
+
+cbcdec128_loop:
+	ldx	[%i1], %g4
+	ldx	[%i1 + 8], %g5
+	xor	%o2, %g4, %i5	!initial ARK
+	movxtod	%i5, %f0
+	xor	%o3, %g5, %i5	!initial ARK
+	movxtod	%i5, %f2
+
+	ldx	[%i1 + 16], %o4
+	ldx	[%i1 + 24], %o5
+	xor	%o2, %o4, %i5	!initial ARK
+	movxtod	%i5, %f60
+	xor	%o3, %o5, %i5	!initial ARK
+	movxtod	%i5, %f62
+
+	TEN_DROUNDS_2
+
+	movxtod	%o0, %f6
+	movxtod	%o1, %f4
+	fxor	%f6, %f0, %f0	!add in previous IV
+	fxor	%f4, %f2, %f2
+
+	std	%f0, [%i2]
+	std	%f2, [%i2 + 8]
+
+	movxtod	%g4, %f56
+	movxtod	%g5, %f58
+	mov	%o4, %o0	!save last block as next IV
+	mov	%o5, %o1
+	fxor	%f56, %f60, %f60	!add in previous IV
+	fxor	%f58, %f62, %f62
+
+	std	%f60, [%i2 + 16]
+	std	%f62, [%i2 + 24]
+
+	add	%i1, 32, %i1
+	subcc	%i3, 32, %i3
+	bne	cbcdec128_loop
+	add	%i2, 32, %i2
+
+cbcdec128_loop_end:
+	stx	%o0, [%i4]
+	stx	%o1, [%i4 + 8]
+	ret
+	restore
+
+	SET_SIZE(t4_aes128_cbc_decrypt)
+
+
+	ENTRY(t4_aes192_cbc_decrypt)
+
+	save	%sp, -SA(MINFRAME), %sp
+	ldx	[%i4], %o0		!IV
+	ldx	[%i4 + 8], %o1		!IV
+	ldx	[%i0 + 0xc0], %o2	!ks[last-1]
+	ldx	[%i0 + 0xc8], %o3	!ks[last]
+	and	%i3, 16, %o4
+	brz	%o4, cbcdec192_loop
+	nop
+
+	ldx	[%i1], %o4
+	ldx	[%i1 + 8], %o5
+	xor	%o2, %o4, %i5	!initial ARK
+	movxtod	%i5, %f60
+	xor	%o3, %o5, %i5	!initial ARK
+	movxtod	%i5, %f62
+
+	TWELVE_DROUNDS
+
+	movxtod	%o0, %f56
+	movxtod	%o1, %f58
+	mov	%o4, %o0	!save last block as next IV
+	mov	%o5, %o1
+	fxor	%f56, %f60, %f60	!add in previous IV
+	fxor	%f58, %f62, %f62
+
+	std	%f60, [%i2]
+	std	%f62, [%i2 + 0x8]
+
+	add	%i1, 16, %i1
+	subcc	%i3, 16, %i3
+	be	cbcdec192_loop_end
+	add	%i2, 16, %i2
+
+
+cbcdec192_loop:
+	ldx	[%i1], %g4
+	ldx	[%i1 + 8], %g5
+	xor	%o2, %g4, %i5	!initial ARK
+	movxtod	%i5, %f0
+	xor	%o3, %g5, %i5	!initial ARK
+	movxtod	%i5, %f2
+
+	ldx	[%i1 + 16], %o4
+	ldx	[%i1 + 24], %o5
+	xor	%o2, %o4, %i5	!initial ARK
+	movxtod	%i5, %f60
+	xor	%o3, %o5, %i5	!initial ARK
+	movxtod	%i5, %f62
+
+	TWELVE_DROUNDS_2
+
+	movxtod	%o0, %f6
+	movxtod	%o1, %f4
+	fxor	%f6, %f0, %f0	!add in previous IV
+	fxor	%f4, %f2, %f2
+
+	std	%f0, [%i2]
+	std	%f2, [%i2 + 8]
+
+	movxtod	%g4, %f56
+	movxtod	%g5, %f58
+	mov	%o4, %o0	!save last block as next IV
+	mov	%o5, %o1
+	fxor	%f56, %f60, %f60	!add in previous IV
+	fxor	%f58, %f62, %f62
+
+	std	%f60, [%i2 + 16]
+	std	%f62, [%i2 + 24]
+
+	add	%i1, 32, %i1
+	subcc	%i3, 32, %i3
+	bne	cbcdec192_loop
+	add	%i2, 32, %i2
+
+cbcdec192_loop_end:
+	stx	%o0, [%i4]
+	stx	%o1, [%i4 + 8]
+	ret
+	restore
+
+	SET_SIZE(t4_aes192_cbc_decrypt)
+
+
+	ENTRY(t4_aes256_cbc_decrypt)
+
+	save	%sp, -SA(MINFRAME), %sp
+	mov	%i0, %o0
+	ldx	[%i4], %i0		!IV
+	ldx	[%i4 + 8], %o1		!IV
+	ldx	[%o0 + 0xe0], %o2	!ks[last-1]
+	ldx	[%o0 + 0xe8], %o3	!ks[last]
+	and	%i3, 16, %o4
+	brz	%o4, cbcdec256_loop
+	nop
+
+	ldx	[%i1], %o4
+	ldx	[%i1 + 8], %o5
+	xor	%o2, %o4, %i5	!initial ARK
+	movxtod	%i5, %f60
+	xor	%o3, %o5, %i5	!initial ARK
+	movxtod	%i5, %f62
+
+	FOURTEEN_DROUNDS
+
+	movxtod	%i0, %f56
+	movxtod	%o1, %f58
+	mov	%o4, %i0	!save last block as next IV
+	mov	%o5, %o1
+	fxor	%f56, %f60, %f60	!add in previous IV
+	fxor	%f58, %f62, %f62
+
+	std	%f60, [%i2]
+	std	%f62, [%i2 + 0x8]
+
+	add	%i1, 16, %i1
+	subcc	%i3, 16, %i3
+	be	cbcdec256_loop_end
+	add	%i2, 16, %i2
+
+
+cbcdec256_loop:
+	ldx	[%i1], %g4
+	ldx	[%i1 + 8], %g5
+	xor	%o2, %g4, %i5	!initial ARK
+	movxtod	%i5, %f20
+	xor	%o3, %g5, %i5	!initial ARK
+	movxtod	%i5, %f22
+
+	ldx	[%i1 + 16], %o4
+	ldx	[%i1 + 24], %o5
+	xor	%o2, %o4, %i5	!initial ARK
+	movxtod	%i5, %f60
+	xor	%o3, %o5, %i5	!initial ARK
+	movxtod	%i5, %f62
+
+	FOURTEEN_DROUNDS_2
+
+	movxtod	%i0, %f56
+	movxtod	%o1, %f58
+	fxor	%f56, %f20, %f20	!add in previous IV
+	fxor	%f58, %f22, %f22
+
+	std	%f20, [%i2]
+	std	%f22, [%i2 + 8]
+
+	movxtod	%g4, %f56
+	movxtod	%g5, %f58
+	mov	%o4, %i0	!save last block as next IV
+	mov	%o5, %o1
+	fxor	%f56, %f60, %f60	!add in previous IV
+	fxor	%f58, %f62, %f62
+
+	std	%f60, [%i2 + 16]
+	std	%f62, [%i2 + 24]
+
+	add	%i1, 32, %i1
+	subcc	%i3, 32, %i3
+	bne	cbcdec256_loop
+	add	%i2, 32, %i2
+
+	ldd	[%o0 + 0x80], %f20
+	ldd	[%o0 + 0x88], %f22 
+
+cbcdec256_loop_end:
+	stx	%i0, [%i4]
+	stx	%o1, [%i4 + 8]
+	ret
+	restore
+
+	SET_SIZE(t4_aes256_cbc_decrypt)
+
+#else
+
+	ENTRY(t4_aes128_cbc_decrypt)
+
+	save	%sp, -SA(MINFRAME), %sp
+	ldx	[%i4], %o0		!IV
+	ldx	[%i4 + 8], %o1		!IV
+	ldx	[%i0 + 0xa0], %o2	!ks[last-1]
+	ldx	[%i0 + 0xa8], %o3	!ks[last]
+
+cbcdec128_loop:
+	ldx	[%i1], %o4
+	ldx	[%i1 + 8], %o5
+	xor	%o2, %o4, %i5	!initial ARK
+	movxtod	%i5, %f60
+	xor	%o3, %o5, %i5	!initial ARK
+	movxtod	%i5, %f62
+
+	TEN_DROUNDS
+
+	movxtod	%o0, %f56
+	movxtod	%o1, %f58
+	mov	%o4, %o0	!save last block as next IV
+	mov	%o5, %o1
+	fxor	%f56, %f60, %f60	!add in previous IV
+	fxor	%f58, %f62, %f62
+
+	std	%f60, [%i2]
+	std	%f62, [%i2 + 0x8]
+
+	add	%i1, 16, %i1
+	subcc	%i3, 16, %i3
+	bne	cbcdec128_loop
+	add	%i2, 16, %i2
+
+	stx	%o0, [%i4]
+	stx	%o1, [%i4 + 8]
+	ret
+	restore
+
+	SET_SIZE(t4_aes128_cbc_decrypt)
+
+
+	ENTRY(t4_aes192_cbc_decrypt)
+
+	save	%sp, -SA(MINFRAME), %sp
+	ldx	[%i4], %o0		!IV
+	ldx	[%i4 + 8], %o1		!IV
+	ldx	[%i0 + 0xc0], %o2	!ks[last-1]
+	ldx	[%i0 + 0xc8], %o3	!ks[last]
+
+cbcdec192_loop:
+	ldx	[%i1], %o4
+	ldx	[%i1 + 8], %o5
+	xor	%o2, %o4, %i5	!initial ARK
+	movxtod	%i5, %f60
+	xor	%o3, %o5, %i5	!initial ARK
+	movxtod	%i5, %f62
+
+	TWELVE_DROUNDS
+
+	movxtod	%o0, %f56
+	movxtod	%o1, %f58
+	mov	%o4, %o0	!save last block as next IV
+	mov	%o5, %o1
+	fxor	%f56, %f60, %f60	!add in previous IV
+	fxor	%f58, %f62, %f62
+
+	std	%f60, [%i2]
+	std	%f62, [%i2 + 0x8]
+
+	add	%i1, 16, %i1
+	subcc	%i3, 16, %i3
+	bne	cbcdec192_loop
+	add	%i2, 16, %i2
+
+	stx	%o0, [%i4]
+	stx	%o1, [%i4 + 8]
+	ret
+	restore
+
+	SET_SIZE(t4_aes192_cbc_decrypt)
+
+
+	ENTRY(t4_aes256_cbc_decrypt)
+
+	save	%sp, -SA(MINFRAME), %sp
+	ldx	[%i4], %o0		!IV
+	ldx	[%i4 + 8], %o1		!IV
+	ldx	[%i0 + 0xe0], %o2	!ks[last-1]
+	ldx	[%i0 + 0xe8], %o3	!ks[last]
+
+cbcdec256_loop:
+	ldx	[%i1], %o4
+	ldx	[%i1 + 8], %o5
+	xor	%o2, %o4, %i5	!initial ARK
+	movxtod	%i5, %f60
+	xor	%o3, %o5, %i5	!initial ARK
+	movxtod	%i5, %f62
+
+	FOURTEEN_DROUNDS
+
+	movxtod	%o0, %f56
+	movxtod	%o1, %f58
+	mov	%o4, %o0	!save last block as next IV
+	mov	%o5, %o1
+	fxor	%f56, %f60, %f60	!add in previous IV
+	fxor	%f58, %f62, %f62
+
+	std	%f60, [%i2]
+	std	%f62, [%i2 + 0x8]
+
+	add	%i1, 16, %i1
+	subcc	%i3, 16, %i3
+	bne	cbcdec256_loop
+	add	%i2, 16, %i2
+
+	stx	%o0, [%i4]
+	stx	%o1, [%i4 + 8]
+	ret
+	restore
+
+	SET_SIZE(t4_aes256_cbc_decrypt)
+
+#endif
+
+#define	TEST_PARALLEL_CFB128_DECRYPT
+#ifdef	TEST_PARALLEL_CFB128_DECRYPT
+
+	ENTRY(t4_aes128_cfb128_decrypt)
+
+	ldd	[%o4], %f56	!IV
+	ldd	[%o4 + 8], %f58	!IV
+	ldx	[%o0], %g1	! ks[0]
+	ldx	[%o0 + 8], %g2	! ks[1]
+	and	%o3, 16, %o5
+	brz	%o5, cfb128dec_128_loop
+
+	movxtod	%g1, %f60
+	movxtod	%g2, %f62
+	fxor	%f60, %f56, %f60
+	fxor	%f62, %f58, %f62
+
+	/* CFB mode uses encryption for the decrypt operation */
+	TEN_EROUNDS
+
+	ldd	[%o1], %f56	!input
+	ldd	[%o1 + 8], %f58	!input
+	fxor	%f60, %f56, %f60
+	fxor	%f62, %f58, %f62
+
+	std	%f60, [%o2]
+	std	%f62, [%o2 + 8]
+
+	add	%o1, 16, %o1
+	subcc	%o3, 16, %o3
+	be	cfb128dec_128_loop_end
+	add	%o2, 16, %o2
+
+cfb128dec_128_loop:
+	ldd	[%o1], %f6	!input
+	ldd	[%o1 + 8], %f4	!input
+	movxtod	%g1, %f60
+	movxtod	%g2, %f62
+	fxor	%f60, %f6, %f0
+	fxor	%f62, %f4, %f2
+	fxor	%f60, %f56, %f60
+	fxor	%f62, %f58, %f62
+
+	/* CFB mode uses encryption for the decrypt operation */
+	TEN_EROUNDS_2
+
+	ldd	[%o1], %f6	!input
+	ldd	[%o1 + 8], %f4	!input
+	ldd	[%o1 + 16], %f56	!input
+	ldd	[%o1 + 24], %f58	!input
+
+	fxor	%f60, %f6, %f6
+	fxor	%f62, %f4, %f4
+	fxor	%f0, %f56, %f60
+	fxor	%f2, %f58, %f62
+
+	std	%f6, [%o2]
+	std	%f4, [%o2 + 8]
+	std	%f60, [%o2 + 16]
+	std	%f62, [%o2 + 24]
+
+	add	%o1, 32, %o1
+	subcc	%o3, 32, %o3
+	bne	cfb128dec_128_loop
+	add	%o2, 32, %o2
+
+cfb128dec_128_loop_end:
+	std	%f56, [%o4]
+	retl
+	std	%f58, [%o4 + 8]
+
+	SET_SIZE(t4_aes128_cfb128_decrypt)
+
+
+	ENTRY(t4_aes192_cfb128_decrypt)
+
+	ldd	[%o4], %f56	!IV
+	ldd	[%o4 + 8], %f58	!IV
+	ldx	[%o0], %g1	! ks[0]
+	ldx	[%o0 + 8], %g2	! ks[1]
+	and	%o3, 16, %o5
+	brz	%o5, cfb128dec_192_loop
+
+	movxtod	%g1, %f60
+	movxtod	%g2, %f62
+	fxor	%f60, %f56, %f60
+	fxor	%f62, %f58, %f62
+
+	/* CFB mode uses encryption for the decrypt operation */
+	TWELVE_EROUNDS
+
+	ldd	[%o1], %f56	!input
+	ldd	[%o1 + 8], %f58	!input
+	fxor	%f60, %f56, %f60
+	fxor	%f62, %f58, %f62
+
+	std	%f60, [%o2]
+	std	%f62, [%o2 + 8]
+
+	add	%o1, 16, %o1
+	subcc	%o3, 16, %o3
+	be	cfb128dec_192_loop_end
+	add	%o2, 16, %o2
+
+cfb128dec_192_loop:
+	ldd	[%o1], %f6	!input
+	ldd	[%o1 + 8], %f4	!input
+	movxtod	%g1, %f60
+	movxtod	%g2, %f62
+	fxor	%f60, %f6, %f0
+	fxor	%f62, %f4, %f2
+	fxor	%f60, %f56, %f60
+	fxor	%f62, %f58, %f62
+
+	/* CFB mode uses encryption for the decrypt operation */
+	TWELVE_EROUNDS_2
+
+	ldd	[%o1], %f6	!input
+	ldd	[%o1 + 8], %f4	!input
+	ldd	[%o1 + 16], %f56	!input
+	ldd	[%o1 + 24], %f58	!input
+
+	fxor	%f60, %f6, %f6
+	fxor	%f62, %f4, %f4
+	fxor	%f0, %f56, %f60
+	fxor	%f2, %f58, %f62
+
+	std	%f6, [%o2]
+	std	%f4, [%o2 + 8]
+	std	%f60, [%o2 + 16]
+	std	%f62, [%o2 + 24]
+
+	add	%o1, 32, %o1
+	subcc	%o3, 32, %o3
+	bne	cfb128dec_192_loop
+	add	%o2, 32, %o2
+
+cfb128dec_192_loop_end:
+	std	%f56, [%o4]
+	retl
+	std	%f58, [%o4 + 8]
+
+	SET_SIZE(t4_aes192_cfb128_decrypt)
+
+
+	ENTRY(t4_aes256_cfb128_decrypt)
+
+	ldd	[%o4], %f56	!IV
+	ldd	[%o4 + 8], %f58	!IV
+	ldx	[%o0], %g1	! ks[0]
+	ldx	[%o0 + 8], %g2	! ks[1]
+	and	%o3, 16, %o5
+	brz	%o5, cfb128dec_256_loop
+
+	movxtod	%g1, %f60
+	movxtod	%g2, %f62
+	fxor	%f60, %f56, %f60
+	fxor	%f62, %f58, %f62
+
+	/* CFB mode uses encryption for the decrypt operation */
+	FOURTEEN_EROUNDS
+
+	ldd	[%o1], %f56	!input
+	ldd	[%o1 + 8], %f58	!input
+	fxor	%f60, %f56, %f60
+	fxor	%f62, %f58, %f62
+
+	std	%f60, [%o2]
+	std	%f62, [%o2 + 8]
+
+	add	%o1, 16, %o1
+	subcc	%o3, 16, %o3
+	be	cfb128dec_256_loop_end
+	add	%o2, 16, %o2
+
+cfb128dec_256_loop:
+	ldd	[%o1], %f20	!input
+	ldd	[%o1 + 8], %f22	!input
+	movxtod	%g1, %f60
+	movxtod	%g2, %f62
+	fxor	%f60, %f20, %f20
+	fxor	%f62, %f22, %f22
+	fxor	%f60, %f56, %f60
+	fxor	%f62, %f58, %f62
+
+	/* CFB mode uses encryption for the decrypt operation */
+	FOURTEEN_EROUNDS_2
+
+	ldd	[%o1 + 16], %f56	!input
+	ldd	[%o1 + 24], %f58	!input
+	fxor	%f20, %f56, %f20
+	fxor	%f22, %f58, %f22
+	std	%f20, [%o2 + 16]
+	std	%f22, [%o2 + 24]
+
+	ldd	[%o1], %f20	!input
+	ldd	[%o1 + 8], %f22	!input
+
+	fxor	%f60, %f20, %f20
+	fxor	%f62, %f22, %f22
+
+	std	%f20, [%o2]
+	std	%f22, [%o2 + 8]
+
+	add	%o1, 32, %o1
+	subcc	%o3, 32, %o3
+	bne	cfb128dec_256_loop
+	add	%o2, 32, %o2
+
+	ldd	[%o0 + 0x60], %f20
+	ldd	[%o0 + 0x68], %f22 
+
+cfb128dec_256_loop_end:
+	std	%f56, [%o4]
+	retl
+	std	%f58, [%o4 + 8]
+
+	SET_SIZE(t4_aes256_cfb128_decrypt)
+
+#else
+	ENTRY(t4_aes128_cfb128_decrypt)
+
+	ldd	[%o4], %f56	!IV
+	ldd	[%o4 + 8], %f58	!IV
+	ldx	[%o0], %g1	! ks[0]
+	ldx	[%o0 + 8], %g2	! ks[1]
+
+cfb128dec_128_loop:
+	movxtod	%g1, %f60
+	movxtod	%g2, %f62
+	fxor	%f60, %f56, %f60
+	fxor	%f62, %f58, %f62
+
+	/* CFB mode uses encryption for the decrypt operation */
+	TEN_EROUNDS
+
+	ldd	[%o1], %f56	!input
+	ldd	[%o1 + 8], %f58	!input
+	fxor	%f60, %f56, %f60
+	fxor	%f62, %f58, %f62
+
+	std	%f60, [%o2]
+	std	%f62, [%o2 + 8]
+
+	add	%o1, 16, %o1
+	subcc	%o3, 16, %o3
+	bne	cfb128dec_128_loop
+	add	%o2, 16, %o2
+
+	std	%f56, [%o4]
+	retl
+	std	%f58, [%o4 + 8]
+
+	SET_SIZE(t4_aes128_cfb128_decrypt)
+
+
+	ENTRY(t4_aes192_cfb128_decrypt)
+
+	ldd	[%o4], %f56	!IV
+	ldd	[%o4 + 8], %f58	!IV
+	ldx	[%o0], %g1	! ks[0]
+	ldx	[%o0 + 8], %g2	! ks[1]
+
+cfb128dec_192_loop:
+	movxtod	%g1, %f60
+	movxtod	%g2, %f62
+	fxor	%f60, %f56, %f60
+	fxor	%f62, %f58, %f62
+
+	/* CFB mode uses encryption for the decrypt operation */
+	TWELVE_EROUNDS
+
+	ldd	[%o1], %f56	!input
+	ldd	[%o1 + 8], %f58	!input
+	fxor	%f60, %f56, %f60
+	fxor	%f62, %f58, %f62
+
+	std	%f60, [%o2]
+	std	%f62, [%o2 + 8]
+
+	add	%o1, 16, %o1
+	subcc	%o3, 16, %o3
+	bne	cfb128dec_192_loop
+	add	%o2, 16, %o2
+
+	std	%f56, [%o4]
+	retl
+	std	%f58, [%o4 + 8]
+
+	SET_SIZE(t4_aes192_cfb128_decrypt)
+
+
+	ENTRY(t4_aes256_cfb128_decrypt)
+
+	ldd	[%o4], %f56	!IV
+	ldd	[%o4 + 8], %f58	!IV
+	ldx	[%o0], %g1	! ks[0]
+	ldx	[%o0 + 8], %g2	! ks[1]
+
+cfb128dec_256_loop:
+	movxtod	%g1, %f60
+	movxtod	%g2, %f62
+	fxor	%f60, %f56, %f60
+	fxor	%f62, %f58, %f62
+
+	/* CFB mode uses encryption for the decrypt operation */
+	FOURTEEN_EROUNDS
+
+	ldd	[%o1], %f56	!input
+	ldd	[%o1 + 8], %f58	!input
+	fxor	%f60, %f56, %f60
+	fxor	%f62, %f58, %f62
+
+	std	%f60, [%o2]
+	std	%f62, [%o2 + 8]
+
+	add	%o1, 16, %o1
+	subcc	%o3, 16, %o3
+	bne	cfb128dec_256_loop
+	add	%o2, 16, %o2
+
+	std	%f56, [%o4]
+	retl
+	std	%f58, [%o4 + 8]
+
+	SET_SIZE(t4_aes256_cfb128_decrypt)
+
+#endif
+
+#endif	/* lint || __lint */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/components/openssl/openssl-1.0.0/engines/t4/t4_md5.S	Wed Jul 20 10:20:13 2011 -0700
@@ -0,0 +1,117 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/*LINTLIBRARY*/
+
+#if defined(lint) || defined(__lint)
+
+#include <sys/types.h>
+#include <sys/md5.h>
+
+/*ARGSUSED*/
+void
+t4_md5_multiblock(MD5_CTX *ctx, const uint8_t *input,
+    unsigned int input_length_in_blocks)
+{ return; }
+
+#else	/* lint || __lint */
+
+#include<sys/asm_linkage.h>
+
+	ENTRY(t4_md5_multiblock)
+
+!load result from previous digest (stored in ctx)
+	ld	[%o0], %f0
+	ld	[%o0 + 0x4], %f1
+	ld	[%o0 + 0x8], %f2
+	ld	[%o0 + 0xc], %f3
+
+	and	%o1, 7, %o3
+	brnz	%o3, md5_unaligned_input
+	nop
+
+md5_loop:
+
+!load 64 bytes of data
+	ldd	[%o1], %f8		!load 8 bytes of data
+	ldd	[%o1 + 0x8], %f10	!load 8 bytes of data
+	ldd	[%o1 + 0x10], %f12	!load 8 bytes of data
+	ldd	[%o1 + 0x18], %f14	!load 8 bytes of data
+	ldd	[%o1 + 0x20], %f16	!load 8 bytes of data
+	ldd	[%o1 + 0x28], %f18	!load 8 bytes of data
+	ldd	[%o1 + 0x30], %f20	!load 8 bytes of data
+	ldd	[%o1 + 0x38], %f22	!load 8 bytes of data
+
+	.byte	0x81, 0xb0, 0x28, 0x00
+
+	dec	%o2
+	brnz	%o2, md5_loop
+	add	%o1, 0x40, %o1
+
+!copy digest back into ctx
+	st	%f0, [%o0]
+	st	%f1, [%o0 + 0x4]
+	st	%f2, [%o0 + 0x8]
+	retl
+	st	%f3, [%o0 + 0xc]
+
+md5_unaligned_input:
+	alignaddr %o1, %g0, %g0		! generate %gsr
+	andn	%o1, 7, %o1
+
+md5_unaligned_input_loop:
+	ldd	[%o1], %f8		!load 8 bytes of data
+	ldd	[%o1 + 0x8], %f10	!load 8 bytes of data
+	ldd	[%o1 + 0x10], %f12	!load 8 bytes of data
+	ldd	[%o1 + 0x18], %f14	!load 8 bytes of data
+	ldd	[%o1 + 0x20], %f16	!load 8 bytes of data
+	ldd	[%o1 + 0x28], %f18	!load 8 bytes of data
+	ldd	[%o1 + 0x30], %f20	!load 8 bytes of data
+	ldd	[%o1 + 0x38], %f22	!load 8 bytes of data
+	ldd	[%o1 + 0x40], %f24	!load 8 bytes of data
+	faligndata %f8, %f10, %f8
+	faligndata %f10, %f12, %f10
+	faligndata %f12, %f14, %f12
+	faligndata %f14, %f16, %f14
+	faligndata %f16, %f18, %f16
+	faligndata %f18, %f20, %f18
+	faligndata %f20, %f22, %f20
+	faligndata %f22, %f24, %f22
+
+	.byte	0x81, 0xb0, 0x28, 0x00
+
+	dec	%o2
+	brnz	%o2, md5_unaligned_input_loop
+	add	%o1, 0x40, %o1
+
+!copy digest back into ctx
+	st	%f0, [%o0]
+	st	%f1, [%o0 + 0x4]
+	st	%f2, [%o0 + 0x8]
+	retl
+	st	%f3, [%o0 + 0xc]
+
+	SET_SIZE(t4_md5_multiblock)
+
+#endif  /* lint || __lint */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/components/openssl/openssl-1.0.0/engines/t4/t4_sha1.S	Wed Jul 20 10:20:13 2011 -0700
@@ -0,0 +1,120 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/*LINTLIBRARY*/
+
+#if defined(lint) || defined(__lint)
+
+#include <sys/types.h>
+#include <sys/sha1.h>
+
+/*ARGSUSED*/
+void
+t4_sha1_multiblock(SHA1_CTX *ctx, const uint8_t *input,
+    size_t nr_blocks)
+{ return; }
+
+#else	/* lint || __lint */
+
+#include<sys/asm_linkage.h>
+
+	ENTRY(t4_sha1_multiblock)
+
+!load result from previous digest (stored in ctx)
+	ld	[%o0], %f0
+	ld	[%o0 + 0x4], %f1
+	ld	[%o0 + 0x8], %f2
+	ld	[%o0 + 0xc], %f3
+	ld	[%o0 + 0x10], %f4
+
+	and	%o1, 7, %o3
+	brnz	%o3, sha1_unaligned_input
+	nop
+
+sha1_loop:
+
+!load 64 bytes of data
+	ldd	[%o1], %f8		!load 8 bytes of data
+	ldd	[%o1 + 0x8], %f10	!load 8 bytes of data
+	ldd	[%o1 + 0x10], %f12	!load 8 bytes of data
+	ldd	[%o1 + 0x18], %f14	!load 8 bytes of data
+	ldd	[%o1 + 0x20], %f16	!load 8 bytes of data
+	ldd	[%o1 + 0x28], %f18	!load 8 bytes of data
+	ldd	[%o1 + 0x30], %f20	!load 8 bytes of data
+	ldd	[%o1 + 0x38], %f22	!load 8 bytes of data
+
+	.byte	0x81, 0xb0, 0x28, 0x20
+
+	dec	%o2
+	brnz	%o2, sha1_loop
+	add	%o1, 0x40, %o1
+
+!copy digest back into ctx
+	st	%f0, [%o0]
+	st	%f1, [%o0 + 0x4]
+	st	%f2, [%o0 + 0x8]
+	st	%f3, [%o0 + 0xc]
+	retl
+	st	%f4, [%o0 + 0x10]
+
+sha1_unaligned_input:
+	alignaddr %o1, %g0, %g0		! generate %gsr
+	andn	%o1, 7, %o1
+
+sha1_unaligned_input_loop:
+	ldd	[%o1], %f8		!load 8 bytes of data
+	ldd	[%o1 + 0x8], %f10	!load 8 bytes of data
+	ldd	[%o1 + 0x10], %f12	!load 8 bytes of data
+	ldd	[%o1 + 0x18], %f14	!load 8 bytes of data
+	ldd	[%o1 + 0x20], %f16	!load 8 bytes of data
+	ldd	[%o1 + 0x28], %f18	!load 8 bytes of data
+	ldd	[%o1 + 0x30], %f20	!load 8 bytes of data
+	ldd	[%o1 + 0x38], %f22	!load 8 bytes of data
+	ldd	[%o1 + 0x40], %f24	!load 8 bytes of data
+	faligndata %f8, %f10, %f8
+	faligndata %f10, %f12, %f10
+	faligndata %f12, %f14, %f12
+	faligndata %f14, %f16, %f14
+	faligndata %f16, %f18, %f16
+	faligndata %f18, %f20, %f18
+	faligndata %f20, %f22, %f20
+	faligndata %f22, %f24, %f22
+
+	.byte	0x81, 0xb0, 0x28, 0x20
+
+	dec	%o2
+	brnz	%o2, sha1_unaligned_input_loop
+	add	%o1, 0x40, %o1
+
+!copy digest back into ctx
+	st	%f0, [%o0]
+	st	%f1, [%o0 + 0x4]
+	st	%f2, [%o0 + 0x8]
+	st	%f3, [%o0 + 0xc]
+	retl
+	st	%f4, [%o0 + 0x10]
+
+	SET_SIZE(t4_sha1_multiblock)
+
+#endif  /* lint || __lint */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/components/openssl/openssl-1.0.0/engines/t4/t4_sha2.S	Wed Jul 20 10:20:13 2011 -0700
@@ -0,0 +1,275 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/*LINTLIBRARY*/
+
+#if defined(lint) || defined(__lint)
+
+#include <sys/types.h>
+#include <sys/sha2.h>
+
+/*ARGSUSED*/
+void
+t4_sha256_multiblock(SHA2_CTX *ctx, const uint8_t *input,
+    size_t nr_blocks)
+{ return; }
+
+/*ARGSUSED*/
+void
+t4_sha512_multiblock(SHA2_CTX *ctx, const uint8_t *input,
+    size_t nr_blocks)
+{ return; }
+
+#else	/* lint || __lint */
+
+#include<sys/asm_linkage.h>
+
+	ENTRY(t4_sha256_multiblock)
+
+	add	%o0, 0x8, %o0		!skip over first field in ctx
+
+!load result from previous digest (stored in ctx)
+	ld	[%o0], %f0
+	ld	[%o0 + 0x4], %f1
+	ld	[%o0 + 0x8], %f2
+	ld	[%o0 + 0xc], %f3
+	ld	[%o0 + 0x10], %f4
+	ld	[%o0 + 0x14], %f5
+	ld	[%o0 + 0x18], %f6
+	ld	[%o0 + 0x1c], %f7
+
+	and	%o1, 7, %o3
+	brnz	%o3, sha256_unaligned_input
+	nop
+
+sha256_loop:
+
+!load 64 bytes of data
+	ldd	[%o1], %f8		!load 8 bytes of data
+	ldd	[%o1 + 0x8], %f10	!load 8 bytes of data
+	ldd	[%o1 + 0x10], %f12	!load 8 bytes of data
+	ldd	[%o1 + 0x18], %f14	!load 8 bytes of data
+	ldd	[%o1 + 0x20], %f16	!load 8 bytes of data
+	ldd	[%o1 + 0x28], %f18	!load 8 bytes of data
+	ldd	[%o1 + 0x30], %f20	!load 8 bytes of data
+	ldd	[%o1 + 0x38], %f22	!load 8 bytes of data
+
+	.byte	0x81, 0xb0, 0x28, 0x40
+
+	dec	%o2
+	brnz	%o2, sha256_loop
+	add	%o1, 0x40, %o1
+
+!copy digest back into ctx
+	st	%f0, [%o0]
+	st	%f1, [%o0 + 0x4]
+	st	%f2, [%o0 + 0x8]
+	st	%f3, [%o0 + 0xc]
+	st	%f4, [%o0 + 0x10]
+	st	%f5, [%o0 + 0x14]
+	st	%f6, [%o0 + 0x18]
+	retl
+	st	%f7, [%o0 + 0x1c]
+
+sha256_unaligned_input:
+	alignaddr %o1, %g0, %g0		! generate %gsr
+	andn	%o1, 7, %o1
+
+sha256_unaligned_input_loop:
+	ldd	[%o1], %f8		!load 8 bytes of data
+	ldd	[%o1 + 0x8], %f10	!load 8 bytes of data
+	ldd	[%o1 + 0x10], %f12	!load 8 bytes of data
+	ldd	[%o1 + 0x18], %f14	!load 8 bytes of data
+	ldd	[%o1 + 0x20], %f16	!load 8 bytes of data
+	ldd	[%o1 + 0x28], %f18	!load 8 bytes of data
+	ldd	[%o1 + 0x30], %f20	!load 8 bytes of data
+	ldd	[%o1 + 0x38], %f22	!load 8 bytes of data
+	ldd	[%o1 + 0x40], %f24	!load 8 bytes of data
+	faligndata %f8, %f10, %f8
+	faligndata %f10, %f12, %f10
+	faligndata %f12, %f14, %f12
+	faligndata %f14, %f16, %f14
+	faligndata %f16, %f18, %f16
+	faligndata %f18, %f20, %f18
+	faligndata %f20, %f22, %f20
+	faligndata %f22, %f24, %f22
+
+	.byte	0x81, 0xb0, 0x28, 0x40
+
+	dec	%o2
+	brnz	%o2, sha256_unaligned_input_loop
+	add	%o1, 0x40, %o1
+
+!copy digest back into ctx
+	st	%f0, [%o0]
+	st	%f1, [%o0 + 0x4]
+	st	%f2, [%o0 + 0x8]
+	st	%f3, [%o0 + 0xc]
+	st	%f4, [%o0 + 0x10]
+	st	%f5, [%o0 + 0x14]
+	st	%f6, [%o0 + 0x18]
+	retl
+	st	%f7, [%o0 + 0x1c]
+
+	SET_SIZE(t4_sha256_multiblock)
+
+
+	ENTRY(t4_sha512_multiblock)
+
+	add	%o0, 0x8, %o0		!skip over first field in ctx
+
+!load result from previous digest (stored in ctx)
+	ld	[%o0], %f0
+	ld	[%o0 + 0x4], %f1
+	ld	[%o0 + 0x8], %f2
+	ld	[%o0 + 0xc], %f3
+	ld	[%o0 + 0x10], %f4
+	ld	[%o0 + 0x14], %f5
+	ld	[%o0 + 0x18], %f6
+	ld	[%o0 + 0x1c], %f7
+	ld	[%o0 + 0x20], %f8
+	ld	[%o0 + 0x24], %f9
+	ld	[%o0 + 0x28], %f10
+	ld	[%o0 + 0x2c], %f11
+	ld	[%o0 + 0x30], %f12
+	ld	[%o0 + 0x34], %f13
+	ld	[%o0 + 0x38], %f14
+	ld	[%o0 + 0x3c], %f15
+
+	and	%o1, 7, %o3
+	brnz	%o3, sha512_unaligned_input
+	nop
+
+sha512_loop:
+
+!load 128 bytes of data
+	ldd	[%o1], %f16		!load 8 bytes of data
+	ldd	[%o1 + 0x8], %f18	!load 8 bytes of data
+	ldd	[%o1 + 0x10], %f20	!load 8 bytes of data
+	ldd	[%o1 + 0x18], %f22	!load 8 bytes of data
+	ldd	[%o1 + 0x20], %f24	!load 8 bytes of data
+	ldd	[%o1 + 0x28], %f26	!load 8 bytes of data
+	ldd	[%o1 + 0x30], %f28	!load 8 bytes of data
+	ldd	[%o1 + 0x38], %f30	!load 8 bytes of data
+	ldd	[%o1 + 0x40], %f32	!load 8 bytes of data
+	ldd	[%o1 + 0x48], %f34	!load 8 bytes of data
+	ldd	[%o1 + 0x50], %f36	!load 8 bytes of data
+	ldd	[%o1 + 0x58], %f38	!load 8 bytes of data
+	ldd	[%o1 + 0x60], %f40	!load 8 bytes of data
+	ldd	[%o1 + 0x68], %f42	!load 8 bytes of data
+	ldd	[%o1 + 0x70], %f44	!load 8 bytes of data
+	ldd	[%o1 + 0x78], %f46	!load 8 bytes of data
+
+	.byte	0x81, 0xb0, 0x28, 0x60
+
+	dec	%o2
+	brnz	%o2, sha512_loop
+	add	%o1, 0x80, %o1
+
+!copy digest back into ctx
+	st	%f0, [%o0]
+	st	%f1, [%o0 + 0x4]
+	st	%f2, [%o0 + 0x8]
+	st	%f3, [%o0 + 0xc]
+	st	%f4, [%o0 + 0x10]
+	st	%f5, [%o0 + 0x14]
+	st	%f6, [%o0 + 0x18]
+	st	%f7, [%o0+ 0x1c]
+	st	%f8, [%o0+ 0x20]
+	st	%f9, [%o0+ 0x24]
+	st	%f10, [%o0+ 0x28]
+	st	%f11, [%o0+ 0x2c]
+	st	%f12, [%o0+ 0x30]
+	st	%f13, [%o0+ 0x34]
+	st	%f14, [%o0+ 0x38]
+	retl
+	st	%f15, [%o0+ 0x3c]
+
+sha512_unaligned_input:
+	alignaddr %o1, %g0, %g0		! generate %gsr
+	andn	%o1, 7, %o1
+
+sha512_unaligned_input_loop:
+	ldd	[%o1], %f16		!load 8 bytes of data
+	ldd	[%o1 + 0x8], %f18	!load 8 bytes of data
+	ldd	[%o1 + 0x10], %f20	!load 8 bytes of data
+	ldd	[%o1 + 0x18], %f22	!load 8 bytes of data
+	ldd	[%o1 + 0x20], %f24	!load 8 bytes of data
+	ldd	[%o1 + 0x28], %f26	!load 8 bytes of data
+	ldd	[%o1 + 0x30], %f28	!load 8 bytes of data
+	ldd	[%o1 + 0x38], %f30	!load 8 bytes of data
+	ldd	[%o1 + 0x40], %f32	!load 8 bytes of data
+	ldd	[%o1 + 0x48], %f34	!load 8 bytes of data
+	ldd	[%o1 + 0x50], %f36	!load 8 bytes of data
+	ldd	[%o1 + 0x58], %f38	!load 8 bytes of data
+	ldd	[%o1 + 0x60], %f40	!load 8 bytes of data
+	ldd	[%o1 + 0x68], %f42	!load 8 bytes of data
+	ldd	[%o1 + 0x70], %f44	!load 8 bytes of data
+	ldd	[%o1 + 0x78], %f46	!load 8 bytes of data
+	ldd	[%o1 + 0x80], %f48	!load 8 bytes of data
+	faligndata %f16, %f18, %f16
+	faligndata %f18, %f20, %f18
+	faligndata %f20, %f22, %f20
+	faligndata %f22, %f24, %f22
+	faligndata %f24, %f26, %f24
+	faligndata %f26, %f28, %f26
+	faligndata %f28, %f30, %f28
+	faligndata %f30, %f32, %f30
+	faligndata %f32, %f34, %f32
+	faligndata %f34, %f36, %f34
+	faligndata %f36, %f38, %f36
+	faligndata %f38, %f40, %f38
+	faligndata %f40, %f42, %f40
+	faligndata %f42, %f44, %f42
+	faligndata %f44, %f46, %f44
+	faligndata %f46, %f48, %f46
+
+	.byte	0x81, 0xb0, 0x28, 0x60
+
+	dec	%o2
+	brnz	%o2, sha512_unaligned_input_loop
+	add	%o1, 0x80, %o1
+
+!copy digest back into ctx
+	st	%f0, [%o0]
+	st	%f1, [%o0 + 0x4]
+	st	%f2, [%o0 + 0x8]
+	st	%f3, [%o0 + 0xc]
+	st	%f4, [%o0 + 0x10]
+	st	%f5, [%o0 + 0x14]
+	st	%f6, [%o0 + 0x18]
+	st	%f7, [%o0+ 0x1c]
+	st	%f8, [%o0+ 0x20]
+	st	%f9, [%o0+ 0x24]
+	st	%f10, [%o0+ 0x28]
+	st	%f11, [%o0+ 0x2c]
+	st	%f12, [%o0+ 0x30]
+	st	%f13, [%o0+ 0x34]
+	st	%f14, [%o0+ 0x38]
+	retl
+	st	%f15, [%o0+ 0x3c]
+
+	SET_SIZE(t4_sha512_multiblock)
+
+#endif  /* lint || __lint */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/components/openssl/openssl-1.0.0/patches/openssl-1.0.0d-t4-engine.sparc-patch	Wed Jul 20 10:20:13 2011 -0700
@@ -0,0 +1,173 @@
+#
+# Engine t4 patch openssl-1.0.0d-t4-engine.sparc-patch.
+# This engine is SPARC-only.
+#
+Index: Configure
+===================================================================
+diff -ru openssl-1.0.0d/Configure openssl-1.0.0d/Configure
+--- openssl-1.0.0d/Configure	2011-05-24 17:02:24.000000000 -0700
++++ openssl-1.0.0d/Configure	2011-06-17 16:53:34.203229852 -0700
+@@ -134,8 +134,8 @@
+ 
+ my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o::aes-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o";
+ my $ia64_asm="ia64cpuid.o:bn-ia64.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::void";
+-my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::void";
+-my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::void";
++my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o t4_aes.o::t4_md5.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o t4_sha1.o t4_sha2.o:::::::void";
++my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:t4_aes.o::t4_md5.o:t4_sha1.o t4_sha2.o:::::::void";
+ my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o::::::::::::void";
+ my $mips3_asm=":bn-mips3.o::::::::::::void";
+ my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o::aes-s390x.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-s390x.o:::::void";
+Index: crypto/aes/Makefile
+===================================================================
+diff -ru openssl-1.0.0d/crypto/aes/ openssl-1.0.0d/crypto/aes/Makefile
+--- openssl-1.0.0d/crypto/aes/Makefile	2011-05-24 17:03:31.000000000 -0700
++++ openssl-1.0.0d/crypto/aes/Makefile	2011-06-30 17:26:34.980110000 -0700
+@@ -17,6 +17,10 @@
+ ASFLAGS= $(INCLUDES) $(ASFLAG)
+ AFLAGS= $(ASFLAGS)
+ 
++BITS:=	$(shell if grep '^SHARED_LDFLAGS.*=.*-m32' ../../Makefile >/dev/null; \
++		then echo 32; else echo 64; fi)
++ASFLAGSYF= -xregsym=no -K pic -P -xarch=v9v -D_sparcv9 -D_ASM -Dsparc -m$(BITS)
++
+ GENERAL=Makefile
+ #TEST=aestest.c
+ TEST=
+@@ -57,6 +61,10 @@
+ aes-sparcv9.s: asm/aes-sparcv9.pl
+ 	$(PERL) asm/aes-sparcv9.pl $(CFLAGS) > $@
+ 
++t4_aes.o: asm/t4_aes.S
++	as $(ASFLAGSYF) -o $@ asm/t4_aes.S
++	elfedit -e 'cap:hw1 -and -cmp vis vis3' $@
++
+ aes-ppc.s:	asm/aes-ppc.pl
+ 	$(PERL) asm/aes-ppc.pl $(PERLASM_SCHEME) $@
+ 
+Index: crypto/engine/Makefile
+===================================================================
+diff -ru openssl-1.0.0d/crypto/engine/Makefile openssl-1.0.0d/crypto/engine/Makefile
+--- openssl-1.0.0d/crypto/engine/Makefile	2011-05-24 17:04:12.000000000 -0700
++++ openssl-1.0.0d/crypto/engine/Makefile	2011-06-17 16:55:13.074884424 -0700
+@@ -22,12 +22,14 @@
+ 	tb_rsa.c tb_dsa.c tb_ecdsa.c tb_dh.c tb_ecdh.c tb_rand.c tb_store.c \
+ 	tb_cipher.c tb_digest.c tb_pkmeth.c tb_asnmth.c \
+ 	eng_openssl.c eng_cnf.c eng_dyn.c eng_cryptodev.c \
++	eng_t4.c eng_t4_digest.c \
+ 	hw_pk11.c hw_pk11_pub.c hw_pk11_uri.c
+ LIBOBJ= eng_err.o eng_lib.o eng_list.o eng_init.o eng_ctrl.o \
+ 	eng_table.o eng_pkey.o eng_fat.o eng_all.o \
+ 	tb_rsa.o tb_dsa.o tb_ecdsa.o tb_dh.o tb_ecdh.o tb_rand.o tb_store.o \
+ 	tb_cipher.o tb_digest.o tb_pkmeth.o tb_asnmth.o \
+ 	eng_openssl.o eng_cnf.o eng_dyn.o eng_cryptodev.o \
++	eng_t4.o eng_t4_digest.o \
+ 	hw_pk11.o hw_pk11_pub.o hw_pk11_uri.o
+ 
+ SRC= $(LIBSRC)
+Index: crypto/engine/eng_all.c
+===================================================================
+diff -ru openssl-1.0.0d/crypto/engine/eng_all.c openssl-1.0.0d/crypto/engine/eng_all.c
+--- openssl-1.0.0d/crypto/engine/eng_all.c	2011-05-24 17:02:20.000000000 -0700
++++ openssl-1.0.0d/crypto/engine/eng_all.c	2011-06-22 17:34:25.145829355 -0700
+@@ -61,6 +61,8 @@
+ 
+ void ENGINE_load_builtin_engines(void)
+ 	{
++	/* Engines may depend on CPU capabilities */
++	OPENSSL_cpuid_setup();
+ #if 0
+ 	/* There's no longer any need for an "openssl" ENGINE unless, one day,
+ 	 * it is the *only* way for standard builtin implementations to be be
+@@ -71,6 +73,10 @@
+ #if !defined(OPENSSL_NO_HW) && (defined(__OpenBSD__) || defined(__FreeBSD__) || defined(HAVE_CRYPTODEV))
+ 	ENGINE_load_cryptodev();
+ #endif
++#if !defined(OPENSSL_NO_HW) && !defined(OPENSSL_NO_HW_T4)
++	ENGINE_load_t4();
++	ENGINE_register_all_complete();
++#endif
+ 	ENGINE_load_dynamic();
+ #ifndef OPENSSL_NO_HW_PKCS11
+ 	ENGINE_load_pk11();
+Index: crypto/engine/engine.h
+==================================================================
+diff -ru openssl-1.0.0d/crypto/engine/engine.h openssl-1.0.0d/crypto/engine/engine.h
+--- openssl-1.0.0d/crypto/engine/engine.h	2011-05-24 17:02:20.000000000 -0700
++++ openssl-1.0.0d/crypto/engine/engine.h	2011-05-24 18:05:29.075766123 -0700
+@@ -344,6 +344,7 @@
+ #endif
+ #endif
+ void ENGINE_load_cryptodev(void);
++void ENGINE_load_t4(void);
+ void ENGINE_load_pk11(void);
+ void ENGINE_load_builtin_engines(void);
+ 
+Index: crypto/md5/Makefile
+==================================================================
+diff -ru openssl-1.0.0d/crypto/md5/Makefile openssl-1.0.0d/crypto/md5/Makefile
+--- openssl-1.0.0d/crypto/md5/Makefile	2011-05-24 17:03:14.000000000 -0700
++++ openssl-1.0.0d/crypto/md5/Makefile	2011-06-30 16:53:53.000000000 -0700
+@@ -17,6 +17,10 @@
+ ASFLAGS= $(INCLUDES) $(ASFLAG)
+ AFLAGS= $(ASFLAGS)
+ 
++BITS:=	$(shell if grep '^SHARED_LDFLAGS.*=.*-m32' ../../Makefile >/dev/null; \
++		then echo 32; else echo 64; fi)
++ASFLAGSYF= -xregsym=no -K pic -P -xarch=v9b -D_sparcv9 -D_ASM -Dsparc -m$(BITS)
++
+ GENERAL=Makefile
+ TEST=md5test.c
+ APPS=
+@@ -52,6 +58,9 @@
+ 	$(CC) $(CFLAGS) -E asm/md5-ia64.S | \
+ 	$(PERL) -ne 's/;\s+/;\n/g; print;' > $@
+ 
++t4_md5.o: asm/t4_md5.S
++	as $(ASFLAGSYF) -o $@ asm/t4_md5.S
++
+ files:
+ 	$(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
+ 
+Index: crypto/sha/Makefile
+==================================================================
+diff -ru openssl-1.0.0d/crypto/sha/Makefile openssl-1.0.0d/crypto/sha/Makefile
+--- openssl-1.0.0d/crypto/sha/Makefile	2011-05-24 17:03:18.000000000 -0700
++++ openssl-1.0.0d/crypto/sha/Makefile	2011-06-30 16:53:53.000000000 -0700
+@@ -17,6 +17,10 @@
+ ASFLAGS= $(INCLUDES) $(ASFLAG)
+ AFLAGS= $(ASFLAGS)
+ 
++BITS:=	$(shell if grep '^SHARED_LDFLAGS.*=.*-m32' ../../Makefile >/dev/null; \
++		then echo 32; else echo 64; fi)
++ASFLAGSYF= -xregsym=no -K pic -P -xarch=v9b -D_sparcv9 -D_ASM -Dsparc -m$(BITS)
++
+ GENERAL=Makefile
+ TEST=shatest.c sha1test.c sha256t.c sha512t.c
+ APPS=
+@@ -76,6 +82,12 @@
+ sha256-%.s:	asm/sha512-%.pl;	$(PERL) $< $@
+ sha512-%.s:	asm/sha512-%.pl;	$(PERL) $< $@
+ 
++t4_sha1.o: asm/t4_sha1.S
++	as $(ASFLAGSYF) -o $@ asm/t4_sha1.S
++
++t4_sha2.o: asm/t4_sha2.S
++	as $(ASFLAGSYF) -o $@ asm/t4_sha2.S
++
+ files:
+ 	$(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
+ 
+Index: util/libeay.num
+==================================================================
+diff -ru openssl-1.0.0d/util/libeay.num openssl-1.0.0d/util/libeay.num
+--- openssl-1.0.0d/util/libeay.num	2010-07-25 09:56:06.000000000 -0700
++++ openssl-1.0.0d/util/libeay.num	2011-05-25 11:19:15.585211842 -0700
+@@ -4178,6 +4178,7 @@
+ UI_method_set_prompt_constructor        4551	EXIST:!VMS:FUNCTION:
+ UI_method_set_prompt_constructr         4551	EXIST:VMS:FUNCTION:
+ EVP_read_pw_string_min                  4552	EXIST::FUNCTION:
++ENGINE_load_t4                          4553	EXIST::FUNCTION:ENGINE
+ CRYPTO_cts128_encrypt                   4553	EXIST::FUNCTION:
+ CRYPTO_cts128_decrypt_block             4554	EXIST::FUNCTION:
+ CRYPTO_cfb128_1_encrypt                 4555	EXIST::FUNCTION: