PSARC 2013/034 OpenSSL 1.0.1
15824599 SUNBT7206151 T4 hash should be embedded in the OpenSSL upstream src
16868728 srp.h header file missing in openssl 1.0.1
--- a/components/openssl/README Thu Jul 11 05:45:11 2013 -0700
+++ b/components/openssl/README Fri Jul 12 07:47:27 2013 -0700
@@ -18,7 +18,7 @@
#
# CDDL HEADER END
#
-# Copyright (c) 2009, 2012, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved.
#
Build Layout
@@ -33,6 +33,24 @@
See also comments in all the Makefiles for more information.
+OpenSSL Version
+---
+
+For non-FIPS build, we currently deliver OpenSSL 1.0.1e with some updates
+from OpenSSL 1.0.2 to make T4 instructions embedded in the OpenSSL
+upstream code. As of April 2013, 1.0.2 is not yet released, and therefore,
+we have decided to patch the code.
+The following files/code are copied in from 1.0.2.
+added:
+ components/openssl/openssl-1.0.1/inline-t4/md5-sparcv9.pl
+ components/openssl/openssl-1.0.1/inline-t4/sparc_arch.h
+ components/openssl/openssl-1.0.1/patches/openssl-t4-inline.sparc-patch
+TPNO for OpenSSL 1.0.1e is 13003.
+
+For FIPS build, we currently deliver OpenSSL 0.9.8y with OpenSSL FIPS module 2.1.
+TPNO for OpenSSL 0.9.8y is 13019.
+
+
The non-fips Build.
---
@@ -98,19 +116,19 @@
- should be ok - original detection seems broken, FPU gets never used
- implementation of atoi()
-
-openssl-1.0.0d-aesni-v4.i386-patch
-X86-only patch.
-Add a built-in engine, aesni, to support X86 AES-NI instructions, along with
-files engines/aesni/aesni-x86[_64].pl.
-This patch is for OpenSSL 1.0.0d. For newer OpenSSL versions, a newer patch
-may be needed.
+31_dtls_version.patch
+Fix DTLS_BAD_VER bug reported after OpenSSL 1.0.1e is released.
openssl-1.0.0d-t4-engine.sparc-patch
SPARC-only patch.
Add a built-in engine, t4, to support SPARC T4 crypto instructions.
along with files in directory engines/t4.
+openssl-t4-inline.sparc-patch
+SPARC-only patch.
+Add patch to support inline T4 instruction in OpenSSL upstream code until
+OpenSSL 1.0.2 is released.
+
opensslconf.patch
Modifies opensslconf.h so that it is suitable for both 32bit and 64bit installs.
OpenSSL either builds for 32bit or 64bit - it doesn't allow for combined 32bit
--- a/components/openssl/openssl-1.0.1/Makefile Thu Jul 11 05:45:11 2013 -0700
+++ b/components/openssl/openssl-1.0.1/Makefile Fri Jul 12 07:47:27 2013 -0700
@@ -41,7 +41,8 @@
COMPONENT_BUGDB= utility/openssl
# Architecture-specific patches
-EXTRA_PATCHES.sparc = $(PATCH_DIR)/openssl-1.0.1e-t4-engine.sparc-patch
+EXTRA_PATCHES.sparc = $(PATCH_DIR)/openssl-t4-inline.sparc-patch
+EXTRA_PATCHES.sparc += $(PATCH_DIR)/openssl-1.0.1e-t4-engine.sparc-patch
EXTRA_PATCHES = $(EXTRA_PATCHES.$(MACH))
include $(WS_TOP)/make-rules/prep.mk
@@ -178,7 +179,10 @@
$(LN) -fs $(COMPONENT_DIR)/engines/t4/t4_des.S $(@D)/crypto/des/asm; \
$(LN) -fs $(COMPONENT_DIR)/engines/t4/t4_md5.S $(@D)/crypto/md5/asm; \
$(LN) -fs $(COMPONENT_DIR)/engines/t4/t4_sha?.S $(@D)/crypto/sha/asm; \
- $(LN) -fs $(COMPONENT_DIR)/wanboot-openssl/wanboot-stubs.c $(@D)/crypto; )
+ $(LN) -fs $(COMPONENT_DIR)/wanboot-openssl/wanboot-stubs.c $(@D)/crypto; \
+ $(LN) -fs $(COMPONENT_DIR)/inline-t4/sparc_arch.h $(@D)/crypto/; \
+ $(LN) -fs $(COMPONENT_DIR)/inline-t4/md5-sparcv9.pl $(@D)/crypto/md5/asm; )
+
# OpenSSL for wanboot is built on sparc only.
ifeq ($(MACH), sparc)
@@ -269,6 +273,7 @@
crypto/evp/pmeth_fn.o crypto/evp/pmeth_gn.o crypto/evp/pmeth_lib.o \
crypto/ex_data.o crypto/hmac/hm_ameth.o crypto/hmac/hm_pmeth.o \
crypto/hmac/hmac.o crypto/lhash/lhash.o crypto/md4/md4_dgst.o \
+ crypto/md5/md5-sparcv9.o \
crypto/md5/md5_dgst.o crypto/mem.o crypto/mem_dbg.o crypto/modes/cbc128.o \
crypto/modes/ccm128.o crypto/modes/cfb128.o crypto/modes/ctr128.o \
crypto/modes/gcm128.o crypto/modes/ghash-sparcv9.o crypto/modes/ofb128.o \
--- a/components/openssl/openssl-1.0.1/engines/pkcs11/hw_pk11.c Thu Jul 11 05:45:11 2013 -0700
+++ b/components/openssl/openssl-1.0.1/engines/pkcs11/hw_pk11.c Fri Jul 12 07:47:27 2013 -0700
@@ -332,6 +332,9 @@
static int check_hw_mechanisms(void);
static int nid_in_table(int nid, int *nid_table);
static int hw_aes_instruction_set_present(void);
+#if defined(__sparc)
+static int hw_yf_digest_instruction_present(void);
+#endif
#endif /* SOLARIS_HW_SLOT_SELECTION */
#define TRY_OBJ_DESTROY(sp, obj_hdl, retval, uselock, alg_type) \
@@ -911,13 +914,24 @@
if (!ENGINE_set_id(e, engine_pk11_id) ||
!ENGINE_set_name(e, engine_pk11_name) ||
- !ENGINE_set_ciphers(e, pk11_engine_ciphers) ||
- !ENGINE_set_digests(e, pk11_engine_digests))
+ !ENGINE_set_ciphers(e, pk11_engine_ciphers))
return (0);
if (!ENGINE_set_pkey_meths(e, pk11_engine_pkey_methods))
return (0);
+#if defined(__sparc)
+ /*
+ * Enable hash mechanisms for pkcs11 engine only if T4 digest
+ * instruction is not present.
+ */
+ if (!hw_yf_digest_instruction_present())
+#endif /* defined(__sparc) */
+ if (!ENGINE_set_digests(e, pk11_engine_digests)) {
+ return (0);
+ }
+
+
#ifndef OPENSSL_NO_RSA
if (pk11_have_rsa == CK_TRUE)
{
@@ -3798,6 +3812,25 @@
return (present);
}
+#if defined(__sparc)
+static int
+hw_yf_digest_instruction_present(void)
+{
+ static int cached_result = -1;
+ uint_t ui = 0;
+
+ if (cached_result == -1) {
+ (void) getisax(&ui, 1);
+ cached_result = ((ui & AV_SPARC_MD5) != 0) &&
+ ((ui & AV_SPARC_SHA1) != 0) &&
+ ((ui & AV_SPARC_SHA256) != 0) &&
+ ((ui & AV_SPARC_SHA512) != 0);
+ }
+ return (cached_result != 0);
+}
+
+#endif /* defined(__sparc) */
+
#endif /* SOLARIS_HW_SLOT_SELECTION */
#endif /* OPENSSL_NO_HW_PK11 */
--- a/components/openssl/openssl-1.0.1/engines/t4/eng_t4.c Thu Jul 11 05:45:11 2013 -0700
+++ b/components/openssl/openssl-1.0.1/engines/t4/eng_t4.c Fri Jul 12 07:47:27 2013 -0700
@@ -168,13 +168,9 @@
static t4_cipher_id get_cipher_index_by_nid(int nid);
#pragma inline(get_cipher_index_by_nid)
static void t4_instructions_present(_Bool *aes_present, _Bool *des_present,
- _Bool *digest_present, _Bool *montmul_present);
+ _Bool *montmul_present);
#pragma inline(t4_instructions_present)
-/* Digest registration function. Called by ENGINE_set_ciphers() */
-int t4_get_all_digests(ENGINE *e, const EVP_MD **digest,
- const int **nids, int nid);
-
/* RSA_METHOD structure used by ENGINE_set_RSA() */
extern RSA_METHOD *t4_RSA(void);
@@ -402,165 +398,27 @@
/*
- * Message Digest variables
- */
-static const int t4_digest_nids[] = {
-#ifndef OPENSSL_NO_MD5
- NID_md5,
-#endif
-#ifndef OPENSSL_NO_SHA
-#ifndef OPENSSL_NO_SHA1
- NID_sha1,
-#endif
-#ifndef OPENSSL_NO_SHA256
- NID_sha224,
- NID_sha256,
-#endif
-#ifndef OPENSSL_NO_SHA512
- NID_sha384,
- NID_sha512,
-#endif
-#endif /* !OPENSSL_NO_SHA */
-};
-static const int t4_digest_count =
- (sizeof (t4_digest_nids) / sizeof (t4_digest_nids[0]));
-
-#ifndef OPENSSL_NO_MD5
-extern const EVP_MD t4_md5;
-#endif
-#ifndef OPENSSL_NO_SHA
-#ifndef OPENSSL_NO_SHA1
-extern const EVP_MD t4_sha1;
-#endif
-#ifndef OPENSSL_NO_SHA256
-extern const EVP_MD t4_sha224;
-extern const EVP_MD t4_sha256;
-#endif
-#ifndef OPENSSL_NO_SHA512
-extern const EVP_MD t4_sha384;
-extern const EVP_MD t4_sha512;
-#endif
-#endif /* !OPENSSL_NO_SHA */
-
-/*
- * Message Digest functions
- */
-
-/*
- * Registered by the ENGINE with ENGINE_set_digests().
- * Finds out how to deal with a particular digest NID in the ENGINE.
- */
-/* ARGSUSED */
-int
-t4_get_all_digests(ENGINE *e, const EVP_MD **digest,
- const int **nids, int nid)
-{
- if (digest == NULL) { /* return a list of all supported digests */
- *nids = (t4_digest_count > 0) ? t4_digest_nids : NULL;
- return (t4_digest_count);
- }
-
- switch (nid) {
-#ifndef OPENSSL_NO_MD5
- case NID_md5:
- *digest = &t4_md5;
- break;
-#endif
-#ifndef OPENSSL_NO_SHA
-#ifndef OPENSSL_NO_SHA1
- /*
- * A special case. For "openssl dgst -dss1 ...",
- * OpenSSL calls EVP_get_digestbyname() on "dss1" which ends up
- * calling t4_get_all_digests() for NID_dsa. Internally, if an
- * engine is not used, OpenSSL uses SHA1_Init() as expected for
- * DSA. So, we must return t4_sha1 for NID_dsa as well. Note
- * that this must have changed between 0.9.8 and 1.0.0 since we
- * did not have the problem with the 0.9.8 version.
- */
- case NID_dsa:
- case NID_sha1:
- *digest = &t4_sha1;
- break;
-#endif
-#ifndef OPENSSL_NO_SHA256
- case NID_sha224:
- *digest = &t4_sha224;
- break;
- case NID_sha256:
- *digest = &t4_sha256;
- break;
-#endif
-#ifndef OPENSSL_NO_SHA512
- case NID_sha384:
- *digest = &t4_sha384;
- break;
- case NID_sha512:
- *digest = &t4_sha512;
- break;
-#endif
-#endif /* !OPENSSL_NO_SHA */
- default:
- /* digest not supported */
- *digest = NULL;
- return (0);
- }
-
- return (1);
-}
-
-
-/*
* Utility Functions
*/
/*
- * Set aes_present, des_present, digest_present and montmul_present
- * to B_FALSE or B_TRUE depending on
- * whether the current SPARC processor supports AES, DES,
- * MD5/SHA1/SHA256/SHA512 and MONTMUL, respectively.
+ * Set aes_present, des_present and montmul_present to B_FALSE or B_TRUE
+ * depending on whether the current SPARC processor supports AES, DES
+ * and MONTMUL, respectively.
*/
static void
t4_instructions_present(_Bool *aes_present, _Bool *des_present,
- _Bool *digest_present, _Bool *montmul_present)
+ _Bool *montmul_present)
{
#ifdef OPENSSL_NO_DES
#undef AV_SPARC_DES
#define AV_SPARC_DES 0
#endif
-#ifdef OPENSSL_NO_MD5
-#undef AV_SPARC_MD5
-#define AV_SPARC_MD5 0
-#endif
-#ifndef OPENSSL_NO_SHA
-#ifdef OPENSSL_NO_SHA1
-#undef AV_SPARC_SHA1
-#define AV_SPARC_SHA1 0
-#endif
-#ifdef OPENSSL_NO_SHA256
-#undef AV_SPARC_SHA256
-#define AV_SPARC_SHA256 0
-#endif
-#ifdef OPENSSL_NO_SHA512
-#undef AV_SPARC_SHA512
-#define AV_SPARC_SHA512 0
-#endif
-#else
-#undef AV_SPARC_SHA1
-#undef AV_SPARC_SHA256
-#undef AV_SPARC_SHA512
-#define AV_SPARC_SHA1 0
-#define AV_SPARC_SHA256 0
-#define AV_SPARC_SHA512 0
-#endif /* !OPENSSL_NO_SHA */
-
-#define DIGEST_MASK (AV_SPARC_MD5 | AV_SPARC_SHA1 | AV_SPARC_SHA256 | \
- AV_SPARC_SHA512)
uint_t ui;
(void) getisax(&ui, 1);
*aes_present = ((ui & AV_SPARC_AES) != 0);
*des_present = ((ui & AV_SPARC_DES) != 0);
- *digest_present = ((ui & DIGEST_MASK) == DIGEST_MASK);
*montmul_present = ((ui & AV_SPARC_MONT) != 0);
}
@@ -931,14 +789,12 @@
static int
t4_bind(ENGINE *e)
{
- _Bool aes_engage, digest_engage, des_engage, montmul_engage;
+ _Bool aes_engage, des_engage, montmul_engage;
- t4_instructions_present(&aes_engage, &des_engage, &digest_engage,
- &montmul_engage);
+ t4_instructions_present(&aes_engage, &des_engage, &montmul_engage);
#ifdef DEBUG_T4
(void) fprintf(stderr,
- "t4_bind: engage aes=%d, des=%d, digest=%d\n",
- aes_engage, des_engage, digest_engage);
+ "t4_bind: engage aes=%d, des=%d\n", aes_engage, des_engage);
#endif
#ifndef OPENSSL_NO_DES
if (!des_engage) { /* Remove DES ciphers from list */
@@ -961,7 +817,6 @@
aes_engage ? ENGINE_T4_NAME: ENGINE_NO_T4_NAME) ||
!ENGINE_set_init_function(e, t4_init) ||
(aes_engage && !ENGINE_set_ciphers(e, t4_get_all_ciphers)) ||
- (digest_engage && !ENGINE_set_digests(e, t4_get_all_digests)) ||
#ifndef OPENSSL_NO_RSA
(montmul_engage && !ENGINE_set_RSA(e, t4_RSA())) ||
#endif /* OPENSSL_NO_RSA */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/components/openssl/openssl-1.0.1/inline-t4/md5-sparcv9.pl Fri Jul 12 07:47:27 2013 -0700
@@ -0,0 +1,434 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by Andy Polyakov <[email protected]> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+#
+# Hardware SPARC T4 support by David S. Miller <[email protected]>.
+# ====================================================================
+
+# MD5 for SPARCv9, 6.9 cycles per byte on UltraSPARC, >40% faster than
+# code generated by Sun C 5.2.
+
+# SPARC T4 MD5 hardware achieves 3.20 cycles per byte, which is 2.1x
+# faster than software. Multi-process benchmark saturates at 12x
+# single-process result on 8-core processor, or ~11GBps per 2.85GHz
+# socket.
+
+$bits=32;
+for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
+if ($bits==64) { $bias=2047; $frame=192; }
+else { $bias=0; $frame=112; }
+
+$output=shift;
+open STDOUT,">$output";
+
+use integer;
+
+($ctx,$inp,$len)=("%i0","%i1","%i2"); # input arguments
+
+# 64-bit values
[email protected]=("%o0","%o1","%o2","%o3","%o4","%o5","%o7","%g1","%g2");
+$tx="%g3";
+($AB,$CD)=("%g4","%g5");
+
+# 32-bit values
[email protected]=($A,$B,$C,$D)=map("%l$_",(0..3));
+($t1,$t2,$t3,$saved_asi)=map("%l$_",(4..7));
+($shr,$shl1,$shl2)=("%i3","%i4","%i5");
+
+my @K=( 0xd76aa478,0xe8c7b756,0x242070db,0xc1bdceee,
+ 0xf57c0faf,0x4787c62a,0xa8304613,0xfd469501,
+ 0x698098d8,0x8b44f7af,0xffff5bb1,0x895cd7be,
+ 0x6b901122,0xfd987193,0xa679438e,0x49b40821,
+
+ 0xf61e2562,0xc040b340,0x265e5a51,0xe9b6c7aa,
+ 0xd62f105d,0x02441453,0xd8a1e681,0xe7d3fbc8,
+ 0x21e1cde6,0xc33707d6,0xf4d50d87,0x455a14ed,
+ 0xa9e3e905,0xfcefa3f8,0x676f02d9,0x8d2a4c8a,
+
+ 0xfffa3942,0x8771f681,0x6d9d6122,0xfde5380c,
+ 0xa4beea44,0x4bdecfa9,0xf6bb4b60,0xbebfbc70,
+ 0x289b7ec6,0xeaa127fa,0xd4ef3085,0x04881d05,
+ 0xd9d4d039,0xe6db99e5,0x1fa27cf8,0xc4ac5665,
+
+ 0xf4292244,0x432aff97,0xab9423a7,0xfc93a039,
+ 0x655b59c3,0x8f0ccc92,0xffeff47d,0x85845dd1,
+ 0x6fa87e4f,0xfe2ce6e0,0xa3014314,0x4e0811a1,
+ 0xf7537e82,0xbd3af235,0x2ad7d2bb,0xeb86d391, 0 );
+
+sub R0 {
+ my ($i,$a,$b,$c,$d) = @_;
+ my $rot = (7,12,17,22)[$i%4];
+ my $j = ($i+1)/2;
+
+ if ($i&1) {
+ $code.=<<___;
+ srlx @X[$j],$shr,@X[$j] ! align X[`$i+1`]
+ and $b,$t1,$t1 ! round $i
+ sllx @X[$j+1],$shl1,$tx
+ add $t2,$a,$a
+ sllx $tx,$shl2,$tx
+ xor $d,$t1,$t1
+ or $tx,@X[$j],@X[$j]
+ sethi %hi(@K[$i+1]),$t2
+ add $t1,$a,$a
+ or $t2,%lo(@K[$i+1]),$t2
+ sll $a,$rot,$t3
+ add @X[$j],$t2,$t2 ! X[`$i+1`]+K[`$i+1`]
+ srl $a,32-$rot,$a
+ add $b,$t3,$t3
+ xor $b,$c,$t1
+ add $t3,$a,$a
+___
+ } else {
+ $code.=<<___;
+ srlx @X[$j],32,$tx ! extract X[`2*$j+1`]
+ and $b,$t1,$t1 ! round $i
+ add $t2,$a,$a
+ xor $d,$t1,$t1
+ sethi %hi(@K[$i+1]),$t2
+ add $t1,$a,$a
+ or $t2,%lo(@K[$i+1]),$t2
+ sll $a,$rot,$t3
+ add $tx,$t2,$t2 ! X[`2*$j+1`]+K[`$i+1`]
+ srl $a,32-$rot,$a
+ add $b,$t3,$t3
+ xor $b,$c,$t1
+ add $t3,$a,$a
+___
+ }
+}
+
+sub R0_1 {
+ my ($i,$a,$b,$c,$d) = @_;
+ my $rot = (7,12,17,22)[$i%4];
+
+$code.=<<___;
+ srlx @X[0],32,$tx ! extract X[1]
+ and $b,$t1,$t1 ! round $i
+ add $t2,$a,$a
+ xor $d,$t1,$t1
+ sethi %hi(@K[$i+1]),$t2
+ add $t1,$a,$a
+ or $t2,%lo(@K[$i+1]),$t2
+ sll $a,$rot,$t3
+ add $tx,$t2,$t2 ! X[1]+K[`$i+1`]
+ srl $a,32-$rot,$a
+ add $b,$t3,$t3
+ andn $b,$c,$t1
+ add $t3,$a,$a
+___
+}
+
+sub R1 {
+ my ($i,$a,$b,$c,$d) = @_;
+ my $rot = (5,9,14,20)[$i%4];
+ my $j = $i<31 ? (1+5*($i+1))%16 : (5+3*($i+1))%16;
+ my $xi = @X[$j/2];
+
+$code.=<<___ if ($j&1 && ($xi=$tx));
+ srlx @X[$j/2],32,$xi ! extract X[$j]
+___
+$code.=<<___;
+ and $b,$d,$t3 ! round $i
+ add $t2,$a,$a
+ or $t3,$t1,$t1
+ sethi %hi(@K[$i+1]),$t2
+ add $t1,$a,$a
+ or $t2,%lo(@K[$i+1]),$t2
+ sll $a,$rot,$t3
+ add $xi,$t2,$t2 ! X[$j]+K[`$i+1`]
+ srl $a,32-$rot,$a
+ add $b,$t3,$t3
+ `$i<31?"andn":"xor"` $b,$c,$t1
+ add $t3,$a,$a
+___
+}
+
+sub R2 {
+ my ($i,$a,$b,$c,$d) = @_;
+ my $rot = (4,11,16,23)[$i%4];
+ my $j = $i<47 ? (5+3*($i+1))%16 : (0+7*($i+1))%16;
+ my $xi = @X[$j/2];
+
+$code.=<<___ if ($j&1 && ($xi=$tx));
+ srlx @X[$j/2],32,$xi ! extract X[$j]
+___
+$code.=<<___;
+ add $t2,$a,$a ! round $i
+ xor $b,$t1,$t1
+ sethi %hi(@K[$i+1]),$t2
+ add $t1,$a,$a
+ or $t2,%lo(@K[$i+1]),$t2
+ sll $a,$rot,$t3
+ add $xi,$t2,$t2 ! X[$j]+K[`$i+1`]
+ srl $a,32-$rot,$a
+ add $b,$t3,$t3
+ xor $b,$c,$t1
+ add $t3,$a,$a
+___
+}
+
+sub R3 {
+ my ($i,$a,$b,$c,$d) = @_;
+ my $rot = (6,10,15,21)[$i%4];
+ my $j = (0+7*($i+1))%16;
+ my $xi = @X[$j/2];
+
+$code.=<<___;
+ add $t2,$a,$a ! round $i
+___
+$code.=<<___ if ($j&1 && ($xi=$tx));
+ srlx @X[$j/2],32,$xi ! extract X[$j]
+___
+$code.=<<___;
+ orn $b,$d,$t1
+ sethi %hi(@K[$i+1]),$t2
+ xor $c,$t1,$t1
+ or $t2,%lo(@K[$i+1]),$t2
+ add $t1,$a,$a
+ sll $a,$rot,$t3
+ add $xi,$t2,$t2 ! X[$j]+K[`$i+1`]
+ srl $a,32-$rot,$a
+ add $b,$t3,$t3
+ add $t3,$a,$a
+___
+}
+
+$code.=<<___ if ($bits==64);
+.register %g2,#scratch
+.register %g3,#scratch
+___
+$code.=<<___;
+#include "sparc_arch.h"
+
+.section ".text",#alloc,#execinstr
+
+#ifdef __PIC__
+SPARC_PIC_THUNK(%g1)
+#endif
+
+.globl md5_block_asm_data_order
+.align 32
+md5_block_asm_data_order:
+ SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
+ ld [%g1+4],%g1 ! OPENSSL_sparcv9cap_P[1]
+
+ andcc %g1, CFR_MD5, %g0
+ be .Lsoftware
+ nop
+
+ mov 4, %g1
+ andcc %o1, 0x7, %g0
+ lda [%o0 + %g0]0x88, %f0 ! load context
+ lda [%o0 + %g1]0x88, %f1
+ add %o0, 8, %o0
+ lda [%o0 + %g0]0x88, %f2
+ lda [%o0 + %g1]0x88, %f3
+ bne,pn %icc, .Lhwunaligned
+ sub %o0, 8, %o0
+
+.Lhw_loop:
+ ldd [%o1 + 0x00], %f8
+ ldd [%o1 + 0x08], %f10
+ ldd [%o1 + 0x10], %f12
+ ldd [%o1 + 0x18], %f14
+ ldd [%o1 + 0x20], %f16
+ ldd [%o1 + 0x28], %f18
+ ldd [%o1 + 0x30], %f20
+ subcc %o2, 1, %o2 ! done yet?
+ ldd [%o1 + 0x38], %f22
+ add %o1, 0x40, %o1
+ prefetch [%o1 + 63], 20
+
+ .word 0x81b02800 ! MD5
+
+ bne,pt `$bits==64?"%xcc":"%icc"`, .Lhw_loop
+ nop
+
+.Lhwfinish:
+ sta %f0, [%o0 + %g0]0x88 ! store context
+ sta %f1, [%o0 + %g1]0x88
+ add %o0, 8, %o0
+ sta %f2, [%o0 + %g0]0x88
+ sta %f3, [%o0 + %g1]0x88
+ retl
+ nop
+
+.align 8
+.Lhwunaligned:
+ alignaddr %o1, %g0, %o1
+
+ ldd [%o1 + 0x00], %f10
+.Lhwunaligned_loop:
+ ldd [%o1 + 0x08], %f12
+ ldd [%o1 + 0x10], %f14
+ ldd [%o1 + 0x18], %f16
+ ldd [%o1 + 0x20], %f18
+ ldd [%o1 + 0x28], %f20
+ ldd [%o1 + 0x30], %f22
+ ldd [%o1 + 0x38], %f24
+ subcc %o2, 1, %o2 ! done yet?
+ ldd [%o1 + 0x40], %f26
+ add %o1, 0x40, %o1
+ prefetch [%o1 + 63], 20
+
+ faligndata %f10, %f12, %f8
+ faligndata %f12, %f14, %f10
+ faligndata %f14, %f16, %f12
+ faligndata %f16, %f18, %f14
+ faligndata %f18, %f20, %f16
+ faligndata %f20, %f22, %f18
+ faligndata %f22, %f24, %f20
+ faligndata %f24, %f26, %f22
+
+ .word 0x81b02800 ! MD5
+
+ bne,pt `$bits==64?"%xcc":"%icc"`, .Lhwunaligned_loop
+ for %f26, %f26, %f10 ! %f10=%f26
+
+ ba .Lhwfinish
+ nop
+
+.align 16
+.Lsoftware:
+ save %sp,-$frame,%sp
+
+ rd %asi,$saved_asi
+ wr %g0,0x88,%asi ! ASI_PRIMARY_LITTLE
+ and $inp,7,$shr
+ andn $inp,7,$inp
+
+ sll $shr,3,$shr ! *=8
+ mov 56,$shl2
+ ld [$ctx+0],$A
+ sub $shl2,$shr,$shl2
+ ld [$ctx+4],$B
+ and $shl2,32,$shl1
+ add $shl2,8,$shl2
+ ld [$ctx+8],$C
+ sub $shl2,$shl1,$shl2 ! shr+shl1+shl2==64
+ ld [$ctx+12],$D
+ nop
+
+.Loop:
+ cmp $shr,0 ! was inp aligned?
+ ldxa [$inp+0]%asi,@X[0] ! load little-endian input
+ ldxa [$inp+8]%asi,@X[1]
+ ldxa [$inp+16]%asi,@X[2]
+ ldxa [$inp+24]%asi,@X[3]
+ ldxa [$inp+32]%asi,@X[4]
+ sllx $A,32,$AB ! pack A,B
+ ldxa [$inp+40]%asi,@X[5]
+ sllx $C,32,$CD ! pack C,D
+ ldxa [$inp+48]%asi,@X[6]
+ or $B,$AB,$AB
+ ldxa [$inp+56]%asi,@X[7]
+ or $D,$CD,$CD
+ bnz,a,pn %icc,.+8
+ ldxa [$inp+64]%asi,@X[8]
+
+ srlx @X[0],$shr,@X[0] ! align X[0]
+ sllx @X[1],$shl1,$tx
+ sethi %hi(@K[0]),$t2
+ sllx $tx,$shl2,$tx
+ or $t2,%lo(@K[0]),$t2
+ or $tx,@X[0],@X[0]
+ xor $C,$D,$t1
+ add @X[0],$t2,$t2 ! X[0]+K[0]
+___
+ for ($i=0;$i<15;$i++) { &R0($i,@V); unshift(@V,pop(@V)); }
+ for (;$i<16;$i++) { &R0_1($i,@V); unshift(@V,pop(@V)); }
+ for (;$i<32;$i++) { &R1($i,@V); unshift(@V,pop(@V)); }
+ for (;$i<48;$i++) { &R2($i,@V); unshift(@V,pop(@V)); }
+ for (;$i<64;$i++) { &R3($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+ srlx $AB,32,$t1 ! unpack A,B,C,D and accumulate
+ add $inp,64,$inp ! advance inp
+ srlx $CD,32,$t2
+ add $t1,$A,$A
+ subcc $len,1,$len ! done yet?
+ add $AB,$B,$B
+ add $t2,$C,$C
+ add $CD,$D,$D
+ srl $B,0,$B ! clruw $B
+ bne `$bits==64?"%xcc":"%icc"`,.Loop
+ srl $D,0,$D ! clruw $D
+
+ st $A,[$ctx+0] ! write out ctx
+ st $B,[$ctx+4]
+ st $C,[$ctx+8]
+ st $D,[$ctx+12]
+
+ wr %g0,$saved_asi,%asi
+ ret
+ restore
+.type md5_block_asm_data_order,#function
+.size md5_block_asm_data_order,(.-md5_block_asm_data_order)
+
+.asciz "MD5 block transform for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>"
+.align 4
+___
+
+# Purpose of these subroutines is to explicitly encode VIS instructions,
+# so that one can compile the module without having to specify VIS
+# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
+# Idea is to reserve for option to produce "universal" binary and let
+# programmer detect if current CPU is VIS capable at run-time.
+sub unvis {
+my ($mnemonic,$rs1,$rs2,$rd)[email protected]_;
+my $ref,$opf;
+my %visopf = ( "faligndata" => 0x048,
+ "for" => 0x07c );
+
+ $ref = "$mnemonic\t$rs1,$rs2,$rd";
+
+ if ($opf=$visopf{$mnemonic}) {
+ foreach ($rs1,$rs2,$rd) {
+ return $ref if (!/%f([0-9]{1,2})/);
+ $_=$1;
+ if ($1>=32) {
+ return $ref if ($1&1);
+ # re-encode for upper double register addressing
+ $_=($1|$1>>5)&31;
+ }
+ }
+
+ return sprintf ".word\t0x%08x !%s",
+ 0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2,
+ $ref;
+ } else {
+ return $ref;
+ }
+}
+sub unalignaddr {
+my ($mnemonic,$rs1,$rs2,$rd)[email protected]_;
+my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 );
+my $ref="$mnemonic\t$rs1,$rs2,$rd";
+
+ foreach ($rs1,$rs2,$rd) {
+ if (/%([goli])([0-7])/) { $_=$bias{$1}+$2; }
+ else { return $ref; }
+ }
+ return sprintf ".word\t0x%08x !%s",
+ 0x81b00300|$rd<<25|$rs1<<14|$rs2,
+ $ref;
+}
+
+foreach (split("\n",$code)) {
+ s/\`([^\`]*)\`/eval $1/ge;
+
+ s/\b(f[^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/
+ &unvis($1,$2,$3,$4)
+ /ge;
+ s/\b(alignaddr)\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/
+ &unalignaddr($1,$2,$3,$4)
+ /ge;
+
+ print $_,"\n";
+}
+
+close STDOUT;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/components/openssl/openssl-1.0.1/inline-t4/sparc_arch.h Fri Jul 12 07:47:27 2013 -0700
@@ -0,0 +1,89 @@
+#ifndef __SPARC_ARCH_H__
+#define __SPARC_ARCH_H__
+
+#define SPARCV9_TICK_PRIVILEGED (1<<0)
+#define SPARCV9_PREFER_FPU (1<<1)
+#define SPARCV9_VIS1 (1<<2)
+#define SPARCV9_VIS2 (1<<3) /* reserved */
+#define SPARCV9_FMADD (1<<4) /* reserved for SPARC64 V */
+#define SPARCV9_BLK (1<<5) /* VIS1 block copy */
+#define SPARCV9_VIS3 (1<<6)
+#define SPARCV9_RANDOM (1<<7)
+
+/*
+ * OPENSSL_sparcv9cap_P[1] is copy of Compatibility Feature Register,
+ * %asr26, SPARC-T4 and later. There is no SPARCV9_CFR bit in
+ * OPENSSL_sparcv9cap_P[0], as %cfr copy is sufficient...
+ */
+#define CFR_AES 0x00000001 /* Supports AES opcodes */
+#define CFR_DES 0x00000002 /* Supports DES opcodes */
+#define CFR_KASUMI 0x00000004 /* Supports KASUMI opcodes */
+#define CFR_CAMELLIA 0x00000008 /* Supports CAMELLIA opcodes*/
+#define CFR_MD5 0x00000010 /* Supports MD5 opcodes */
+#define CFR_SHA1 0x00000020 /* Supports SHA1 opcodes */
+#define CFR_SHA256 0x00000040 /* Supports SHA256 opcodes */
+#define CFR_SHA512 0x00000080 /* Supports SHA512 opcodes */
+#define CFR_MPMUL 0x00000100 /* Supports MPMUL opcodes */
+#define CFR_MONTMUL 0x00000200 /* Supports MONTMUL opcodes */
+#define CFR_MONTSQR 0x00000400 /* Supports MONTSQR opcodes */
+#define CFR_CRC32C 0x00000800 /* Supports CRC32C opcodes */
+
+#if defined(OPENSSL_PIC) && !defined(__PIC__)
+# define __PIC__
+#endif
+
+#define SPARC_PIC_THUNK(reg) \
+ .align 32; \
+.Lpic_thunk: \
+ jmp %o7 + 8; \
+ add %o7, reg, reg;
+
+#define SPARC_PIC_THUNK_CALL(reg) \
+ sethi %hi(_GLOBAL_OFFSET_TABLE_-4), reg; \
+ call .Lpic_thunk; \
+ or reg, %lo(_GLOBAL_OFFSET_TABLE_+4), reg;
+
+#if 1
+# define SPARC_SETUP_GOT_REG(reg) SPARC_PIC_THUNK_CALL(reg)
+#else
+# define SPARC_SETUP_GOT_REG(reg) \
+ sethi %hi(_GLOBAL_OFFSET_TABLE_-4), reg; \
+ call .+8; \
+ or reg,%lo(_GLOBAL_OFFSET_TABLE_+4), reg; \
+ add %o7, reg, reg
+#endif
+
+#if (defined(__GNUC__) && defined(__arch64__)) || \
+ (defined(__SUNPRO_C) && defined(__sparcv9))
+
+# define SPARC_LOAD_ADDRESS(SYM, reg) \
+ setx SYM, %o7, reg;
+# define LDPTR ldx
+
+#else
+
+# define SPARC_LOAD_ADDRESS(SYM, reg) \
+ set SYM, reg;
+# define LDPTR ld
+# define SPARC_LOAD_ADDRESS_LEAF(SYM,reg,tmp) SPARC_LOAD_ADDRESS(SYM,reg)
+
+#endif
+
+#ifdef __PIC__
+# undef SPARC_LOAD_ADDRESS
+# undef SPARC_LOAD_ADDRESS_LEAF
+# define SPARC_LOAD_ADDRESS(SYM, reg) \
+ SPARC_SETUP_GOT_REG(reg); \
+ sethi %hi(SYM), %o7; \
+ or %o7, %lo(SYM), %o7; \
+ LDPTR [reg + %o7], reg;
+#endif
+
+#ifndef SPARC_LOAD_ADDRESS_LEAF
+# define SPARC_LOAD_ADDRESS_LEAF(SYM, reg, tmp) \
+ mov %o7, tmp; \
+ SPARC_LOAD_ADDRESS(SYM, reg) \
+ mov tmp, %o7;
+#endif
+
+#endif /* __SPARC_ARCH_H__ */
--- a/components/openssl/openssl-1.0.1/openssl-1.0.1.p5m Thu Jul 11 05:45:11 2013 -0700
+++ b/components/openssl/openssl-1.0.1/openssl-1.0.1.p5m Fri Jul 12 07:47:27 2013 -0700
@@ -113,6 +113,7 @@
file path=usr/include/openssl/buffer.h
file path=usr/include/openssl/camellia.h
file path=usr/include/openssl/cast.h
+file path=usr/include/openssl/cmac.h
file path=usr/include/openssl/cms.h
file path=usr/include/openssl/comp.h
file path=usr/include/openssl/conf.h
@@ -158,6 +159,7 @@
file path=usr/include/openssl/rsa.h
file path=usr/include/openssl/safestack.h
file path=usr/include/openssl/sha.h
+file path=usr/include/openssl/srp.h
file path=usr/include/openssl/srtp.h
file path=usr/include/openssl/ssl.h
file path=usr/include/openssl/ssl2.h
--- a/components/openssl/openssl-1.0.1/patches/openssl-1.0.1e-t4-engine.sparc-patch Thu Jul 11 05:45:11 2013 -0700
+++ b/components/openssl/openssl-1.0.1/patches/openssl-1.0.1e-t4-engine.sparc-patch Fri Jul 12 07:47:27 2013 -0700
@@ -11,10 +11,10 @@
my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghash-x86_64.o:";
my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::ghash-ia64.o::void";
--my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void";
+-my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void";
-my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::void";
-+my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o t4_des.o:aes_core.o aes_cbc.o aes-sparcv9.o t4_aes.o::t4_md5.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o t4_sha1.o t4_sha2.o:::::::ghash-sparcv9.o::void";
-+my $sparcv9_fips_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o:des_enc-sparc.o fcrypt_b.o t4_des.o:aes_core.o aes_cbc.o aes-sparcv9.o t4_aes.o::t4_md5.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o t4_sha1.o t4_sha2.o:::::::ghash-sparcv9.o::void";
++my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o t4_des.o:aes_core.o aes_cbc.o aes-sparcv9.o t4_aes.o::md5-sparcv9.o t4_md5.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o t4_sha1.o t4_sha2.o:::::::ghash-sparcv9.o::void";
++my $sparcv9_fips_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o:des_enc-sparc.o fcrypt_b.o t4_des.o:aes_core.o aes_cbc.o aes-sparcv9.o t4_aes.o::md5-sparcv9.o t4_md5.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o t4_sha1.o t4_sha2.o:::::::ghash-sparcv9.o::void";
+my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o t4_des.o:t4_aes.o::t4_md5.o:t4_sha1.o t4_sha2.o:::::::void";
my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-alpha.o::void";
my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o::::::::";
@@ -154,9 +154,9 @@
GENERAL=Makefile
TEST=md5test.c
APPS=
[email protected]@ -52,6 +58,10 @@
- $(CC) $(CFLAGS) -E asm/md5-ia64.S | \
- $(PERL) -ne 's/;\s+/;\n/g; print;' > [email protected]
[email protected]@ -55,6 +59,10 @@
+ md5-sparcv9.S: asm/md5-sparcv9.pl
+ $(PERL) asm/md5-sparcv9.pl [email protected] $(CFLAGS)
+t4_md5.o: asm/t4_md5.S
+ as $(ASFLAGSYF) -o [email protected] asm/t4_md5.S
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/components/openssl/openssl-1.0.1/patches/openssl-t4-inline.sparc-patch Fri Jul 12 07:47:27 2013 -0700
@@ -0,0 +1,854 @@
+#
+# This file addds inline T4 instruction support to OpenSSL upstream code.
+#
+Index: Configure
+===================================================================
+diff -ru openssl-1.0.1e/Configure openssl-1.0.1e/Configure
+--- openssl-1.0.1e/Configure 2011-05-24 17:02:24.000000000 -0700
++++ openssl-1.0.1e/Configure 2011-07-27 10:48:17.817470000 -0700
[email protected]@ -135,7 +135,7 @@
+
+ my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghash-x86_64.o:";
+ my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::ghash-ia64.o::void";
+-my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void";
++my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void";
+ my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::void";
+ my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-alpha.o::void";
+ my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o::::::::";
+Index: crypto/sparccpuid.S
+===================================================================
+diff -ru openssl-1.0.1e/crypto/sparccpuid.S openssl-1.0.1e/crypto/sparccpuid.S
+--- openssl-1.0.1e/crypto/sparccpuid.S 2011-05-24 17:02:24.000000000 -0700
++++ openssl-1.0.1e/crypto/sparccpuid.S 2011-07-27 10:48:17.817470000 -0700
[email protected]@ -251,6 +251,11 @@
+ ! UltraSPARC IIe 7
+ ! UltraSPARC III 7
+ ! UltraSPARC T1 24
++! SPARC T4 65(*)
++!
++! (*) result has lesser to do with VIS instruction latencies, rdtick
++! appears that slow, but it does the trick in sense that FP and
++! VIS code paths are still slower than integer-only ones.
+ !
+ ! Numbers for T2 and SPARC64 V-VII are more than welcomed.
+ !
[email protected]@ -260,6 +265,8 @@
+ .global _sparcv9_vis1_instrument
+ .align 8
+ _sparcv9_vis1_instrument:
++ .word 0x81b00d80 !fxor %f0,%f0,%f0
++ .word 0x85b08d82 !fxor %f2,%f2,%f2
+ .word 0x91410000 !rd %tick,%o0
+ .word 0x81b00d80 !fxor %f0,%f0,%f0
+ .word 0x85b08d82 !fxor %f2,%f2,%f2
[email protected]@ -314,6 +321,30 @@
+ .type _sparcv9_fmadd_probe,#function
+ .size _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe
+
++.global _sparcv9_rdcfr
++.align 8
++_sparcv9_rdcfr:
++ retl
++ .word 0x91468000 !rd %asr26,%o0
++.type _sparcv9_rdcfr,#function
++.size _sparcv9_rdcfr,.-_sparcv9_rdcfr
++
++.global _sparcv9_vis3_probe
++.align 8
++_sparcv9_vis3_probe:
++ retl
++ .word 0x81b022a0 !xmulx %g0,%g0,%g0
++.type _sparcv9_vis3_probe,#function
++.size _sparcv9_vis3_probe,.-_sparcv9_vis3_probe
++
++.global _sparcv9_random
++.align 8
++_sparcv9_random:
++ retl
++ .word 0x91b002a0 !random %o0
++.type _sparcv9_random,#function
++.size _sparcv9_random,.-_sparcv9_vis3_probe
++
+ .global OPENSSL_cleanse
+ .align 32
+ OPENSSL_cleanse:
+
+Index: crypto/sparcv9cap.c
+===================================================================
+diff -ru openssl-1.0.1e/crypto/sparcv9cap.c openssl-1.0.1e/crypto/sparcv9cap.c
+--- openssl-1.0.1e/crypto/sparcv9cap.c 2011-05-24 17:02:24.000000000 -0700
++++ openssl-1.0.1e/crypto/sparcv9cap.c 2011-07-27 10:48:17.817470000 -0700
[email protected]@ -6,16 +6,15 @@
+ #include <sys/time.h>
+ #include <openssl/bn.h>
+
+-#define SPARCV9_TICK_PRIVILEGED (1<<0)
+-#define SPARCV9_PREFER_FPU (1<<1)
+-#define SPARCV9_VIS1 (1<<2)
+-#define SPARCV9_VIS2 (1<<3) /* reserved */
+-#define SPARCV9_FMADD (1<<4) /* reserved for SPARC64 V */
++#include "sparc_arch.h"
+
++#if defined(__GNUC__) && defined(__linux)
++__attribute__((visibility("hidden")))
++#endif
+ #ifndef _BOOT
+-static int OPENSSL_sparcv9cap_P=SPARCV9_TICK_PRIVILEGED;
++unsigned int OPENSSL_sparcv9cap_P[2]={SPARCV9_TICK_PRIVILEGED,0};
+ #else
+-static int OPENSSL_sparcv9cap_P = SPARCV9_VIS1;
++unsigned int OPENSSL_sparcv9cap_P[2]={SPARCV9_VIS1,0};
+ #endif
+
+ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num)
[email protected]@ -24,7 +23,7 @@
+ int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num);
+
+ if (num>=8 && !(num&1) &&
+- (OPENSSL_sparcv9cap_P&(SPARCV9_PREFER_FPU|SPARCV9_VIS1)) ==
++ (OPENSSL_sparcv9cap_P[0]&(SPARCV9_PREFER_FPU|SPARCV9_VIS1)) ==
+ (SPARCV9_PREFER_FPU|SPARCV9_VIS1))
+ return bn_mul_mont_fpu(rp,ap,bp,np,n0,num);
+ else
[email protected]@ -36,11 +35,16 @@
+ unsigned long _sparcv9_vis1_instrument(void);
+ void _sparcv9_vis2_probe(void);
+ void _sparcv9_fmadd_probe(void);
++unsigned long _sparcv9_rdcfr(void);
++void _sparcv9_vis3_probe(void);
++unsigned long _sparcv9_random(void);
++size_t _sparcv9_vis1_instrument_bus(unsigned int *,size_t);
++size_t _sparcv9_vis1_instrument_bus2(unsigned int *,size_t,size_t);
+
+ #ifndef _BOOT
+ unsigned long OPENSSL_rdtsc(void)
+ {
+- if (OPENSSL_sparcv9cap_P&SPARCV9_TICK_PRIVILEGED)
++ if (OPENSSL_sparcv9cap_P[0]&SPARCV9_TICK_PRIVILEGED)
+ #if defined(__sun) && defined(__SVR4)
+ return gethrtime();
+ #else
[email protected]@ -51,6 +55,25 @@
+ }
+ #endif
+
++size_t OPENSSL_instrument_bus(unsigned int *out,size_t cnt)
++ {
++ if (OPENSSL_sparcv9cap_P[0]&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK) ==
++ SPARCV9_BLK)
++ return _sparcv9_vis1_instrument_bus(out,cnt);
++ else
++ return 0;
++ }
++
++size_t OPENSSL_instrument_bus2(unsigned int *out,size_t cnt,size_t max)
++ {
++ if (OPENSSL_sparcv9cap_P[0]&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK) ==
++ SPARCV9_BLK)
++ return _sparcv9_vis1_instrument_bus2(out,cnt,max);
++ else
++ return 0;
++ }
++
++
+ #if defined(_BOOT)
+ /*
+ * Hardcoding sparc capabilities for wanboot.
[email protected]@ -58,7 +81,7 @@
+ */
+ void OPENSSL_cpuid_setup(void)
+ {
+- OPENSSL_sparcv9cap_P = SPARCV9_VIS1;
++ OPENSSL_sparcv9cap_P[0] = SPARCV9_VIS1;
+ }
+
+ #elif 0 && defined(__sun) && defined(__SVR4)
[email protected]@ -85,11 +108,11 @@
+ if (!strcmp (name,"SUNW,UltraSPARC") ||
+ !strncmp(name,"SUNW,UltraSPARC-I",17)) /* covers II,III,IV */
+ {
+- OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU|SPARCV9_VIS1;
++ OPENSSL_sparcv9cap_P[0] |= SPARCV9_PREFER_FPU|SPARCV9_VIS1;
+
+ /* %tick is privileged only on UltraSPARC-I/II, but not IIe */
+ if (name[14]!='\0' && name[17]!='\0' && name[18]!='\0')
+- OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED;
++ OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED;
+
+ return DI_WALK_TERMINATE;
+ }
[email protected]@ -96,7 +119,7 @@
+ /* This is expected to catch remaining UltraSPARCs, such as T1 */
+ else if (!strncmp(name,"SUNW,UltraSPARC",15))
+ {
+- OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED;
++ OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED;
+
+ return DI_WALK_TERMINATE;
+ }
[email protected]@ -115,7 +138,7 @@
+
+ if ((e=getenv("OPENSSL_sparcv9cap")))
+ {
+- OPENSSL_sparcv9cap_P=strtoul(e,NULL,0);
++ OPENSSL_sparcv9cap_P[0]=strtoul(e,NULL,0);
+ return;
+ }
+
[email protected]@ -123,17 +146,17 @@
+ {
+ if (strcmp(si,"sun4v"))
+ /* FPU is preferred for all CPUs, but US-T1/2 */
+- OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU;
++ OPENSSL_sparcv9cap_P[0] |= SPARCV9_PREFER_FPU;
+ }
+
+ if (sysinfo(SI_ISALIST,si,sizeof(si))>0)
+ {
+ if (strstr(si,"+vis"))
+- OPENSSL_sparcv9cap_P |= SPARCV9_VIS1;
++ OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS1|SPARCV9_BLK;
+ if (strstr(si,"+vis2"))
+ {
+- OPENSSL_sparcv9cap_P |= SPARCV9_VIS2;
+- OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED;
++ OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS2;
++ OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED;
+ return;
+ }
+ }
[email protected]@ -193,12 +216,14 @@
+
+ if ((e=getenv("OPENSSL_sparcv9cap")))
+ {
+- OPENSSL_sparcv9cap_P=strtoul(e,NULL,0);
++ OPENSSL_sparcv9cap_P[0]=strtoul(e,NULL,0);
++ if ((e=strchr(e,':')))
++ OPENSSL_sparcv9cap_P[1]=strtoul(e+1,NULL,0);
+ return;
+ }
+
+ /* Initial value, fits UltraSPARC-I&II... */
+- OPENSSL_sparcv9cap_P = SPARCV9_PREFER_FPU|SPARCV9_TICK_PRIVILEGED;
++ OPENSSL_sparcv9cap_P[0] = SPARCV9_PREFER_FPU|SPARCV9_TICK_PRIVILEGED;
+
+ sigfillset(&all_masked);
+ sigdelset(&all_masked,SIGILL);
[email protected]@ -221,20 +246,20 @@
+ if (sigsetjmp(common_jmp,1) == 0)
+ {
+ _sparcv9_rdtick();
+- OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED;
++ OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED;
+ }
+
+ if (sigsetjmp(common_jmp,1) == 0)
+ {
+ _sparcv9_vis1_probe();
+- OPENSSL_sparcv9cap_P |= SPARCV9_VIS1;
++ OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS1|SPARCV9_BLK;
+ /* detect UltraSPARC-Tx, see sparccpud.S for details... */
+ if (_sparcv9_vis1_instrument() >= 12)
+- OPENSSL_sparcv9cap_P &= ~(SPARCV9_VIS1|SPARCV9_PREFER_FPU);
++ OPENSSL_sparcv9cap_P[0] &= ~(SPARCV9_VIS1|SPARCV9_PREFER_FPU);
+ else
+ {
+ _sparcv9_vis2_probe();
+- OPENSSL_sparcv9cap_P |= SPARCV9_VIS2;
++ OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS2;
+ }
+ }
+
[email protected]@ -241,9 +266,37 @@
+ if (sigsetjmp(common_jmp,1) == 0)
+ {
+ _sparcv9_fmadd_probe();
+- OPENSSL_sparcv9cap_P |= SPARCV9_FMADD;
++ OPENSSL_sparcv9cap_P[0] |= SPARCV9_FMADD;
+ }
+
++ /*
++ * VIS3 flag is tested independently from VIS1, unlike VIS2 that is,
++ * because VIS3 defines even integer instructions.
++ */
++ if (sigsetjmp(common_jmp,1) == 0)
++ {
++ _sparcv9_vis3_probe();
++ OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS3;
++ }
++
++ if (sigsetjmp(common_jmp,1) == 0)
++ {
++ (void)_sparcv9_random();
++ OPENSSL_sparcv9cap_P[0] |= SPARCV9_RANDOM;
++ }
++
++ /*
++ * In wait for better solution _sparcv9_rdcfr is masked by
++ * VIS3 flag, because it goes to uninterruptable endless
++ * loop on UltraSPARC II running Solaris. Things might be
++ * different on Linux...
++ */
++ if ((OPENSSL_sparcv9cap_P[0]&SPARCV9_VIS3) &&
++ sigsetjmp(common_jmp,1) == 0)
++ {
++ OPENSSL_sparcv9cap_P[1] = (unsigned int)_sparcv9_rdcfr();
++ }
++
+ sigaction(SIGBUS,&bus_oact,NULL);
+ sigaction(SIGILL,&ill_oact,NULL);
+
+Index: crypto/md5/Makefile
+===================================================================
+diff -ru openssl-1.0.1e/crypto/md5/Makefile openssl-1.0.1e/crypto/md5/Makefile
+--- openssl-1.0.1e/crypto/md5/Makefile 2011-05-24 17:02:24.000000000 -0700
++++ openssl-1.0.1e/crypto/md5/Makefile 2011-07-27 10:48:17.817470000 -0700
[email protected]@ -52,6 +52,9 @@
+ $(CC) $(CFLAGS) -E asm/md5-ia64.S | \
+ $(PERL) -ne 's/;\s+/;\n/g; print;' > [email protected]
+
++md5-sparcv9.S: asm/md5-sparcv9.pl
++ $(PERL) asm/md5-sparcv9.pl [email protected] $(CFLAGS)
++
+ files:
+ $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
+
+Index: crypto/md5/md5_locl.h
+===================================================================
+diff -ru openssl-1.0.1e/crypto/md5/md5_locl.h openssl-1.0.1e/crypto/md5/md5_locl.h
+--- openssl-1.0.1e/crypto/md5/md5_locl.h 2011-05-24 17:02:24.000000000 -0700
++++ openssl-1.0.1e/crypto/md5/md5_locl.h 2011-07-27 10:48:17.817470000 -0700
[email protected]@ -71,6 +71,8 @@
+ # define md5_block_data_order md5_block_asm_data_order
+ # elif defined(__ia64) || defined(__ia64__) || defined(_M_IA64)
+ # define md5_block_data_order md5_block_asm_data_order
++# elif defined(__sparc) || defined(__sparc__)
++# define md5_block_data_order md5_block_asm_data_order
+ # endif
+ #endif
+
+Index: crypto/sha/Makefile
+===================================================================
+diff -ru openssl-1.0.1e/crypto/sha/Makefile openssl-1.0.1e/crypto/sha/Makefile
+--- openssl-1.0.1e/crypto/sha/Makefile 2011-05-24 17:02:24.000000000 -0700
++++ openssl-1.0.1e/crypto/sha/Makefile 2011-07-27 10:48:17.817470000 -0700
[email protected]@ -66,9 +66,9 @@
+ sha1-x86_64.s: asm/sha1-x86_64.pl; $(PERL) asm/sha1-x86_64.pl $(PERLASM_SCHEME) > [email protected]
+ sha256-x86_64.s:asm/sha512-x86_64.pl; $(PERL) asm/sha512-x86_64.pl $(PERLASM_SCHEME) [email protected]
+ sha512-x86_64.s:asm/sha512-x86_64.pl; $(PERL) asm/sha512-x86_64.pl $(PERLASM_SCHEME) [email protected]
+-sha1-sparcv9.s: asm/sha1-sparcv9.pl; $(PERL) asm/sha1-sparcv9.pl [email protected] $(CFLAGS)
+-sha256-sparcv9.s:asm/sha512-sparcv9.pl; $(PERL) asm/sha512-sparcv9.pl [email protected] $(CFLAGS)
+-sha512-sparcv9.s:asm/sha512-sparcv9.pl; $(PERL) asm/sha512-sparcv9.pl [email protected] $(CFLAGS)
++sha1-sparcv9.S: asm/sha1-sparcv9.pl; $(PERL) asm/sha1-sparcv9.pl [email protected] $(CFLAGS)
++sha256-sparcv9.S:asm/sha512-sparcv9.pl; $(PERL) asm/sha512-sparcv9.pl [email protected] $(CFLAGS)
++sha512-sparcv9.S:asm/sha512-sparcv9.pl; $(PERL) asm/sha512-sparcv9.pl [email protected] $(CFLAGS)
+
+ sha1-ppc.s: asm/sha1-ppc.pl; $(PERL) asm/sha1-ppc.pl $(PERLASM_SCHEME) [email protected]
+ sha256-ppc.s: asm/sha512-ppc.pl; $(PERL) asm/sha512-ppc.pl $(PERLASM_SCHEME) [email protected]
+Index: crypto/sha/asm/sha1-sparcv9.pl
+===================================================================
+diff -ru openssl-1.0.1e/crypto/sha/asm/sha1-sparcv9.pl openssl-1.0.1e/crypto/sha/asm/sha1-sparcv9.pl
+--- openssl-1.0.1e/crypto/sha/asm/sha1-sparcv9.pl 2011-05-24 17:02:24.000000000 -0700
++++ openssl-1.0.1e/crypto/sha/asm/sha1-sparcv9.pl 2011-07-27 10:48:17.817470000 -0700
[email protected]@ -5,6 +5,8 @@
+ # project. The module is, however, dual licensed under OpenSSL and
+ # CRYPTOGAMS licenses depending on where you obtain it. For further
+ # details see http://www.openssl.org/~appro/cryptogams/.
++#
++# Hardware SPARC T4 support by David S. Miller <[email protected]>.
+ # ====================================================================
+
+ # Performance improvement is not really impressive on pre-T1 CPU: +8%
[email protected]@ -18,6 +20,11 @@
+ # ensure scalability on UltraSPARC T1, or rather to avoid decay when
+ # amount of active threads exceeds the number of physical cores.
+
++# SPARC T4 SHA1 hardware achieves 3.72 cycles per byte, which is 3.1x
++# faster than software. Multi-process benchmark saturates at 11x
++# single-process result on 8-core processor, or ~9GBps per 2.85GHz
++# socket.
++
+ $bits=32;
+ for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
+ if ($bits==64) { $bias=2047; $frame=192; }
[email protected]@ -183,11 +190,93 @@
+ .register %g3,#scratch
+ ___
+ $code.=<<___;
++#include "sparc_arch.h"
++
+ .section ".text",#alloc,#execinstr
+
++#ifdef __PIC__
++SPARC_PIC_THUNK(%g1)
++#endif
++
+ .align 32
+ .globl sha1_block_data_order
+ sha1_block_data_order:
++ SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
++ ld [%g1+4],%g1 ! OPENSSL_sparcv9cap_P[1]
++
++ andcc %g1, CFR_SHA1, %g0
++ be .Lsoftware
++ nop
++
++ ld [%o0 + 0x00], %f0 ! load context
++ ld [%o0 + 0x04], %f1
++ ld [%o0 + 0x08], %f2
++ andcc %o1, 0x7, %g0
++ ld [%o0 + 0x0c], %f3
++ bne,pn %icc, .Lhwunaligned
++ ld [%o0 + 0x10], %f4
++
++.Lhw_loop:
++ ldd [%o1 + 0x00], %f8
++ ldd [%o1 + 0x08], %f10
++ ldd [%o1 + 0x10], %f12
++ ldd [%o1 + 0x18], %f14
++ ldd [%o1 + 0x20], %f16
++ ldd [%o1 + 0x28], %f18
++ ldd [%o1 + 0x30], %f20
++ subcc %o2, 1, %o2 ! done yet?
++ ldd [%o1 + 0x38], %f22
++ add %o1, 0x40, %o1
++
++ .word 0x81b02820 ! SHA1
++
++ bne,pt `$bits==64?"%xcc":"%icc"`, .Lhw_loop
++ nop
++
++.Lhwfinish:
++ st %f0, [%o0 + 0x00] ! store context
++ st %f1, [%o0 + 0x04]
++ st %f2, [%o0 + 0x08]
++ st %f3, [%o0 + 0x0c]
++ retl
++ st %f4, [%o0 + 0x10]
++
++.align 8
++.Lhwunaligned:
++ alignaddr %o1, %g0, %o1
++
++ ldd [%o1 + 0x00], %f10
++.Lhwunaligned_loop:
++ ldd [%o1 + 0x08], %f12
++ ldd [%o1 + 0x10], %f14
++ ldd [%o1 + 0x18], %f16
++ ldd [%o1 + 0x20], %f18
++ ldd [%o1 + 0x28], %f20
++ ldd [%o1 + 0x30], %f22
++ ldd [%o1 + 0x38], %f24
++ subcc %o2, 1, %o2 ! done yet?
++ ldd [%o1 + 0x40], %f26
++ add %o1, 0x40, %o1
++
++ faligndata %f10, %f12, %f8
++ faligndata %f12, %f14, %f10
++ faligndata %f14, %f16, %f12
++ faligndata %f16, %f18, %f14
++ faligndata %f18, %f20, %f16
++ faligndata %f20, %f22, %f18
++ faligndata %f22, %f24, %f20
++ faligndata %f24, %f26, %f22
++
++ .word 0x81b02820 ! SHA1
++
++ bne,pt `$bits==64?"%xcc":"%icc"`, .Lhwunaligned_loop
++ for %f26, %f26, %f10 ! %f10=%f26
++
++ ba .Lhwfinish
++ nop
++
++.align 16
++.Lsoftware:
+ save %sp,-$frame,%sp
+ sllx $len,6,$len
+ add $inp,$len,$len
[email protected]@ -279,6 +368,62 @@
+ .align 4
+ ___
+
+-$code =~ s/\`([^\`]*)\`/eval $1/gem;
+-print $code;
++# Purpose of these subroutines is to explicitly encode VIS instructions,
++# so that one can compile the module without having to specify VIS
++# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
++# Idea is to reserve for option to produce "universal" binary and let
++# programmer detect if current CPU is VIS capable at run-time.
++sub unvis {
++my ($mnemonic,$rs1,$rs2,$rd)[email protected]_;
++my $ref,$opf;
++my %visopf = ( "faligndata" => 0x048,
++ "for" => 0x07c );
++
++ $ref = "$mnemonic\t$rs1,$rs2,$rd";
++
++ if ($opf=$visopf{$mnemonic}) {
++ foreach ($rs1,$rs2,$rd) {
++ return $ref if (!/%f([0-9]{1,2})/);
++ $_=$1;
++ if ($1>=32) {
++ return $ref if ($1&1);
++ # re-encode for upper double register addressing
++ $_=($1|$1>>5)&31;
++ }
++ }
++
++ return sprintf ".word\t0x%08x !%s",
++ 0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2,
++ $ref;
++ } else {
++ return $ref;
++ }
++}
++sub unalignaddr {
++my ($mnemonic,$rs1,$rs2,$rd)[email protected]_;
++my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 );
++my $ref="$mnemonic\t$rs1,$rs2,$rd";
++
++ foreach ($rs1,$rs2,$rd) {
++ if (/%([goli])([0-7])/) { $_=$bias{$1}+$2; }
++ else { return $ref; }
++ }
++ return sprintf ".word\t0x%08x !%s",
++ 0x81b00300|$rd<<25|$rs1<<14|$rs2,
++ $ref;
++}
++
++foreach (split("\n",$code)) {
++ s/\`([^\`]*)\`/eval $1/ge;
++
++ s/\b(f[^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/
++ &unvis($1,$2,$3,$4)
++ /ge;
++ s/\b(alignaddr)\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/
++ &unalignaddr($1,$2,$3,$4)
++ /ge;
++
++ print $_,"\n";
++}
++
+ close STDOUT;
+
+Index: crypto/sha/asm/sha512-sparcv9.pl
+===================================================================
+diff -ru openssl-1.0.1e/crypto/sha/asm/sha512-sparcv9.pl openssl-1.0.1e/crypto/sha/asm/sha512-sparcv9.pl
+--- openssl-1.0.1e/crypto/sha/asm/sha512-sparcv9.pl 2011-05-24 17:02:24.000000000 -0700
++++ openssl-1.0.1e/crypto/sha/asm/sha512-sparcv9.pl 2011-07-27 10:48:17.817470000 -0700
[email protected]@ -5,6 +5,8 @@
+ # project. The module is, however, dual licensed under OpenSSL and
+ # CRYPTOGAMS licenses depending on where you obtain it. For further
+ # details see http://www.openssl.org/~appro/cryptogams/.
++#
++# Hardware SPARC T4 support by David S. Miller <[email protected]>.
+ # ====================================================================
+
+ # SHA256 performance improvement over compiler generated code varies
[email protected]@ -41,6 +43,12 @@
+ # loads are always slower than one 64-bit load. Once again this
+ # is unlike pre-T1 UltraSPARC, where, if scheduled appropriately,
+ # 2x32-bit loads can be as fast as 1x64-bit ones.
++#
++# SPARC T4 SHA256/512 hardware achieves 3.17/2.01 cycles per byte,
++# which is 9.3x/11.1x faster than software. Multi-process benchmark
++# saturates at 11.5x single-process result on 8-core processor, or
++# ~11/16GBps per 2.85GHz socket.
++
+
+ $bits=32;
+ for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
[email protected]@ -386,6 +394,8 @@
+ .register %g3,#scratch
+ ___
+ $code.=<<___;
++#include "sparc_arch.h"
++
+ .section ".text",#alloc,#execinstr
+
+ .align 64
[email protected]@ -457,8 +467,196 @@
+ }
+ $code.=<<___;
+ .size K${label},.-K${label}
++
++#ifdef __PIC__
++SPARC_PIC_THUNK(%g1)
++#endif
++
+ .globl sha${label}_block_data_order
++.align 32
+ sha${label}_block_data_order:
++ SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
++ ld [%g1+4],%g1 ! OPENSSL_sparcv9cap_P[1]
++
++ andcc %g1, CFR_SHA${label}, %g0
++ be .Lsoftware
++ nop
++___
++$code.=<<___ if ($SZ==8); # SHA512
++ ldd [%o0 + 0x00], %f0 ! load context
++ ldd [%o0 + 0x08], %f2
++ ldd [%o0 + 0x10], %f4
++ ldd [%o0 + 0x18], %f6
++ ldd [%o0 + 0x20], %f8
++ ldd [%o0 + 0x28], %f10
++ andcc %o1, 0x7, %g0
++ ldd [%o0 + 0x30], %f12
++ bne,pn %icc, .Lhwunaligned
++ ldd [%o0 + 0x38], %f14
++
++.Lhwaligned_loop:
++ ldd [%o1 + 0x00], %f16
++ ldd [%o1 + 0x08], %f18
++ ldd [%o1 + 0x10], %f20
++ ldd [%o1 + 0x18], %f22
++ ldd [%o1 + 0x20], %f24
++ ldd [%o1 + 0x28], %f26
++ ldd [%o1 + 0x30], %f28
++ ldd [%o1 + 0x38], %f30
++ ldd [%o1 + 0x40], %f32
++ ldd [%o1 + 0x48], %f34
++ ldd [%o1 + 0x50], %f36
++ ldd [%o1 + 0x58], %f38
++ ldd [%o1 + 0x60], %f40
++ ldd [%o1 + 0x68], %f42
++ ldd [%o1 + 0x70], %f44
++ subcc %o2, 1, %o2 ! done yet?
++ ldd [%o1 + 0x78], %f46
++ add %o1, 0x80, %o1
++
++ .word 0x81b02860 ! SHA512
++
++ bne,pt `$bits==64?"%xcc":"%icc"`, .Lhwaligned_loop
++ nop
++
++.Lhwfinish:
++ std %f0, [%o0 + 0x00] ! store context
++ std %f2, [%o0 + 0x08]
++ std %f4, [%o0 + 0x10]
++ std %f6, [%o0 + 0x18]
++ std %f8, [%o0 + 0x20]
++ std %f10, [%o0 + 0x28]
++ std %f12, [%o0 + 0x30]
++ retl
++ std %f14, [%o0 + 0x38]
++
++.align 16
++.Lhwunaligned:
++ alignaddr %o1, %g0, %o1
++
++ ldd [%o1 + 0x00], %f18
++.Lhwunaligned_loop:
++ ldd [%o1 + 0x08], %f20
++ ldd [%o1 + 0x10], %f22
++ ldd [%o1 + 0x18], %f24
++ ldd [%o1 + 0x20], %f26
++ ldd [%o1 + 0x28], %f28
++ ldd [%o1 + 0x30], %f30
++ ldd [%o1 + 0x38], %f32
++ ldd [%o1 + 0x40], %f34
++ ldd [%o1 + 0x48], %f36
++ ldd [%o1 + 0x50], %f38
++ ldd [%o1 + 0x58], %f40
++ ldd [%o1 + 0x60], %f42
++ ldd [%o1 + 0x68], %f44
++ ldd [%o1 + 0x70], %f46
++ ldd [%o1 + 0x78], %f48
++ subcc %o2, 1, %o2 ! done yet?
++ ldd [%o1 + 0x80], %f50
++ add %o1, 0x80, %o1
++
++ faligndata %f18, %f20, %f16
++ faligndata %f20, %f22, %f18
++ faligndata %f22, %f24, %f20
++ faligndata %f24, %f26, %f22
++ faligndata %f26, %f28, %f24
++ faligndata %f28, %f30, %f26
++ faligndata %f30, %f32, %f28
++ faligndata %f32, %f34, %f30
++ faligndata %f34, %f36, %f32
++ faligndata %f36, %f38, %f34
++ faligndata %f38, %f40, %f36
++ faligndata %f40, %f42, %f38
++ faligndata %f42, %f44, %f40
++ faligndata %f44, %f46, %f42
++ faligndata %f46, %f48, %f44
++ faligndata %f48, %f50, %f46
++
++ .word 0x81b02860 ! SHA512
++
++ bne,pt `$bits==64?"%xcc":"%icc"`, .Lhwunaligned_loop
++ for %f50, %f50, %f18 ! %f18=%f50
++
++ ba .Lhwfinish
++ nop
++___
++$code.=<<___ if ($SZ==4); # SHA256
++ ld [%o0 + 0x00], %f0
++ ld [%o0 + 0x04], %f1
++ ld [%o0 + 0x08], %f2
++ ld [%o0 + 0x0c], %f3
++ ld [%o0 + 0x10], %f4
++ ld [%o0 + 0x14], %f5
++ andcc %o1, 0x7, %g0
++ ld [%o0 + 0x18], %f6
++ bne,pn %icc, .Lhwunaligned
++ ld [%o0 + 0x1c], %f7
++
++.Lhwloop:
++ ldd [%o1 + 0x00], %f8
++ ldd [%o1 + 0x08], %f10
++ ldd [%o1 + 0x10], %f12
++ ldd [%o1 + 0x18], %f14
++ ldd [%o1 + 0x20], %f16
++ ldd [%o1 + 0x28], %f18
++ ldd [%o1 + 0x30], %f20
++ subcc %o2, 1, %o2 ! done yet?
++ ldd [%o1 + 0x38], %f22
++ add %o1, 0x40, %o1
++
++ .word 0x81b02840 ! SHA256
++
++ bne,pt `$bits==64?"%xcc":"%icc"`, .Lhwloop
++ nop
++
++.Lhwfinish:
++ st %f0, [%o0 + 0x00] ! store context
++ st %f1, [%o0 + 0x04]
++ st %f2, [%o0 + 0x08]
++ st %f3, [%o0 + 0x0c]
++ st %f4, [%o0 + 0x10]
++ st %f5, [%o0 + 0x14]
++ st %f6, [%o0 + 0x18]
++ retl
++ st %f7, [%o0 + 0x1c]
++
++.align 8
++.Lhwunaligned:
++ alignaddr %o1, %g0, %o1
++
++ ldd [%o1 + 0x00], %f10
++.Lhwunaligned_loop:
++ ldd [%o1 + 0x08], %f12
++ ldd [%o1 + 0x10], %f14
++ ldd [%o1 + 0x18], %f16
++ ldd [%o1 + 0x20], %f18
++ ldd [%o1 + 0x28], %f20
++ ldd [%o1 + 0x30], %f22
++ ldd [%o1 + 0x38], %f24
++ subcc %o2, 1, %o2 ! done yet?
++ ldd [%o1 + 0x40], %f26
++ add %o1, 0x40, %o1
++
++ faligndata %f10, %f12, %f8
++ faligndata %f12, %f14, %f10
++ faligndata %f14, %f16, %f12
++ faligndata %f16, %f18, %f14
++ faligndata %f18, %f20, %f16
++ faligndata %f20, %f22, %f18
++ faligndata %f22, %f24, %f20
++ faligndata %f24, %f26, %f22
++
++ .word 0x81b02840 ! SHA256
++
++ bne,pt `$bits==64?"%xcc":"%icc"`, .Lhwunaligned_loop
++ for %f26, %f26, %f10 ! %f10=%f26
++
++ ba .Lhwfinish
++ nop
++___
++$code.=<<___;
++.align 16
++.Lsoftware:
+ save %sp,`-$frame-$locals`,%sp
+ and $inp,`$align-1`,$tmp31
+ sllx $len,`log(16*$SZ)/log(2)`,$len
[email protected]@ -589,6 +787,62 @@
+ .align 4
+ ___
+
+-$code =~ s/\`([^\`]*)\`/eval $1/gem;
+-print $code;
++# Purpose of these subroutines is to explicitly encode VIS instructions,
++# so that one can compile the module without having to specify VIS
++# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
++# Idea is to reserve for option to produce "universal" binary and let
++# programmer detect if current CPU is VIS capable at run-time.
++sub unvis {
++my ($mnemonic,$rs1,$rs2,$rd)[email protected]_;
++my $ref,$opf;
++my %visopf = ( "faligndata" => 0x048,
++ "for" => 0x07c );
++
++ $ref = "$mnemonic\t$rs1,$rs2,$rd";
++
++ if ($opf=$visopf{$mnemonic}) {
++ foreach ($rs1,$rs2,$rd) {
++ return $ref if (!/%f([0-9]{1,2})/);
++ $_=$1;
++ if ($1>=32) {
++ return $ref if ($1&1);
++ # re-encode for upper double register addressing
++ $_=($1|$1>>5)&31;
++ }
++ }
++
++ return sprintf ".word\t0x%08x !%s",
++ 0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2,
++ $ref;
++ } else {
++ return $ref;
++ }
++}
++sub unalignaddr {
++my ($mnemonic,$rs1,$rs2,$rd)[email protected]_;
++my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 );
++my $ref="$mnemonic\t$rs1,$rs2,$rd";
++
++ foreach ($rs1,$rs2,$rd) {
++ if (/%([goli])([0-7])/) { $_=$bias{$1}+$2; }
++ else { return $ref; }
++ }
++ return sprintf ".word\t0x%08x !%s",
++ 0x81b00300|$rd<<25|$rs1<<14|$rs2,
++ $ref;
++}
++
++foreach (split("\n",$code)) {
++ s/\`([^\`]*)\`/eval $1/ge;
++
++ s/\b(f[^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/
++ &unvis($1,$2,$3,$4)
++ /ge;
++ s/\b(alignaddr)\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/
++ &unalignaddr($1,$2,$3,$4)
++ /ge;
++
++ print $_,"\n";
++}
++
+ close STDOUT;
+Index: openssl/apps/speed.c
+===================================================================
+diff -ru openssl-1.0.1e/apps/spped.c openssl-1.0.1e/apps/speed.c
+--- openssl-1.0.1e/apps/speed.c 2011-05-24 17:02:24.000000000 -0700
++++ openssl-1.0.1e/apps/spped.c 2011-07-27 10:48:17.817470000 -0700
[email protected]@ -1551,7 +1551,7 @@
+ print_message(names[D_MD5],c[D_MD5][j],lengths[j]);
+ Time_F(START);
+ for (count=0,run=1; COND(c[D_MD5][j]); count++)
+- EVP_Digest(&(buf[0]),(unsigned long)lengths[j],&(md5[0]),NULL,EVP_get_digestbyname("md5"),NULL);
++ MD5(buf,lengths[j],md5);
+ d=Time_F(STOP);
+ print_result(D_MD5,j,count,d);
+ }
[email protected]@ -1591,7 +1591,7 @@
+ print_message(names[D_SHA1],c[D_SHA1][j],lengths[j]);
+ Time_F(START);
+ for (count=0,run=1; COND(c[D_SHA1][j]); count++)
+- EVP_Digest(buf,(unsigned long)lengths[j],&(sha[0]),NULL,EVP_sha1(),NULL);
++ SHA1(buf,lengths[j],sha);
+ d=Time_F(STOP);
+ print_result(D_SHA1,j,count,d);
+ }