components/openssl/openssl-1.0.1/patches/37_openssl-t4-inline.patch
branchs11u2-sru
changeset 4016 7f9e1e7611b8
parent 4013 29dc96079508
child 4019 825fa37cfbe0
equal deleted inserted replaced
4013:29dc96079508 4016:7f9e1e7611b8
     1 #
       
     2 # This file adds inline T4 instruction support to OpenSSL upstream code.
       
     3 # The change was brought in from OpenSSL 1.0.2.
       
     4 #
       
     5 Index: Configure
       
     6 ===================================================================
       
     7 diff -ru openssl-1.0.1e/Configure openssl-1.0.1e/Configure
       
     8 --- openssl-1.0.1e/Configure 2011-05-24 17:02:24.000000000 -0700
       
     9 +++ openssl-1.0.1e/Configure 2011-07-27 10:48:17.817470000 -0700
       
    10 @@ -135,7 +135,7 @@
       
    11 
       
    12  my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghash-x86_64.o:";
       
    13  my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::ghash-ia64.o::void";
       
    14 -my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void";
       
    15 +my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o:des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o:aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void";
       
    16  my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::void";
       
    17  my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-alpha.o::void";
       
    18  my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o::::::::";
       
    19 Index: crypto/sparccpuid.S
       
    20 ===================================================================
       
    21 diff -ru openssl-1.0.1e/crypto/sparccpuid.S openssl-1.0.1e/crypto/sparccpuid.S
       
    22 --- openssl-1.0.1e/crypto/sparccpuid.S 2011-05-24 17:02:24.000000000 -0700
       
    23 +++ openssl-1.0.1e/crypto/sparccpuid.S 2011-07-27 10:48:17.817470000 -0700
       
    24 @@ -1,3 +1,7 @@
       
    25 +#ifdef OPENSSL_FIPSCANISTER
       
    26 +#include <openssl/fipssyms.h>
       
    27 +#endif
       
    28 +
       
    29  #if defined(__SUNPRO_C) && defined(__sparcv9)
       
    30  # define ABI64  /* They've said -xarch=v9 at command line */
       
    31  #elif defined(__GNUC__) && defined(__arch64__)
       
    32 @@ -123,7 +127,7 @@
       
    33  			fmovs	%f1,%f3
       
    34  			fmovs	%f0,%f2
       
    35  
       
    36 -	add	%fp,BIAS,%i0	! return pointer to caller�s top of stack
       
    37 +	add	%fp,BIAS,%i0	! return pointer to caller?s top of stack
       
    38  
       
    39  	ret
       
    40  	restore
       
    41 @@ -235,10 +239,10 @@
       
    42  .global	_sparcv9_vis1_probe
       
    43  .align	8
       
    44  _sparcv9_vis1_probe:
       
    45 +	.word	0x81b00d80	!fxor	%f0,%f0,%f0
       
    46  	add	%sp,BIAS+2,%o1
       
    47 -	.word	0xc19a5a40	!ldda	[%o1]ASI_FP16_P,%f0
       
    48  	retl
       
    49 -	.word	0x81b00d80	!fxor	%f0,%f0,%f0
       
    50 +	.word	0xc19a5a40	!ldda	[%o1]ASI_FP16_P,%f0
       
    51  .type	_sparcv9_vis1_probe,#function
       
    52  .size	_sparcv9_vis1_probe,.-_sparcv9_vis1_probe
       
    53  
       
    54 @@ -251,7 +255,12 @@
       
    55  !	UltraSPARC IIe		7
       
    56  !	UltraSPARC III		7
       
    57  !	UltraSPARC T1		24
       
    58 +!	SPARC T4		65(*)
       
    59  !
       
    60 +! (*)	result has lesser to do with VIS instruction latencies, rdtick
       
    61 +!	appears that slow, but it does the trick in sense that FP and
       
    62 +!	VIS code paths are still slower than integer-only ones.
       
    63 +!
       
    64  ! Numbers for T2 and SPARC64 V-VII are more than welcomed.
       
    65  !
       
    66  ! It would be possible to detect specifically US-T1 by instrumenting
       
    67 @@ -260,6 +269,8 @@
       
    68  .global	_sparcv9_vis1_instrument
       
    69  .align	8
       
    70  _sparcv9_vis1_instrument:
       
    71 +	.word	0x81b00d80	!fxor	%f0,%f0,%f0
       
    72 +	.word	0x85b08d82	!fxor	%f2,%f2,%f2
       
    73  	.word	0x91410000	!rd	%tick,%o0
       
    74  	.word	0x81b00d80	!fxor	%f0,%f0,%f0
       
    75  	.word	0x85b08d82	!fxor	%f2,%f2,%f2
       
    76 @@ -314,6 +325,30 @@
       
    77  .type	_sparcv9_fmadd_probe,#function
       
    78  .size	_sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe
       
    79  
       
    80 +.global	_sparcv9_rdcfr
       
    81 +.align	8
       
    82 +_sparcv9_rdcfr:
       
    83 +	retl
       
    84 +	.word	0x91468000	!rd	%asr26,%o0
       
    85 +.type	_sparcv9_rdcfr,#function
       
    86 +.size	_sparcv9_rdcfr,.-_sparcv9_rdcfr
       
    87 +
       
    88 +.global	_sparcv9_vis3_probe
       
    89 +.align	8
       
    90 +_sparcv9_vis3_probe:
       
    91 +	retl
       
    92 +	.word	0x81b022a0	!xmulx	%g0,%g0,%g0
       
    93 +.type	_sparcv9_vis3_probe,#function
       
    94 +.size	_sparcv9_vis3_probe,.-_sparcv9_vis3_probe
       
    95 +
       
    96 +.global	_sparcv9_random
       
    97 +.align	8
       
    98 +_sparcv9_random:
       
    99 +	retl
       
   100 +	.word	0x91b002a0	!random	%o0
       
   101 +.type	_sparcv9_random,#function
       
   102 +.size	_sparcv9_random,.-_sparcv9_vis3_probe
       
   103 +
       
   104  .global	OPENSSL_cleanse
       
   105  .align	32
       
   106  OPENSSL_cleanse:
       
   107 @@ -398,6 +433,102 @@
       
   108  .size	OPENSSL_cleanse,.-OPENSSL_cleanse
       
   109  
       
   110  #ifndef _BOOT
       
   111 +.global	_sparcv9_vis1_instrument_bus
       
   112 +.align	8
       
   113 +_sparcv9_vis1_instrument_bus:
       
   114 +	mov	%o1,%o3					! save cnt
       
   115 +	.word	0x99410000	!rd	%tick,%o4	! tick
       
   116 +	mov	%o4,%o5					! lasttick = tick
       
   117 +	set	0,%g4					! diff
       
   118 +
       
   119 +	andn	%o0,63,%g1
       
   120 +	.word	0xc1985e00	!ldda	[%g1]0xf0,%f0	! block load
       
   121 +	.word	0x8143e040	!membar	#Sync
       
   122 +	.word	0xc1b85c00	!stda	%f0,[%g1]0xe0	! block store and commit
       
   123 +	.word	0x8143e040	!membar	#Sync
       
   124 +	ld	[%o0],%o4
       
   125 +	add	%o4,%g4,%g4
       
   126 +	.word	0xc9e2100c	!cas	[%o0],%o4,%g4
       
   127 +
       
   128 +.Loop:	.word	0x99410000	!rd	%tick,%o4
       
   129 +	sub	%o4,%o5,%g4				! diff=tick-lasttick
       
   130 +	mov	%o4,%o5					! lasttick=tick
       
   131 +
       
   132 +	andn	%o0,63,%g1
       
   133 +	.word	0xc1985e00	!ldda	[%g1]0xf0,%f0	! block load
       
   134 +	.word	0x8143e040	!membar	#Sync
       
   135 +	.word	0xc1b85c00	!stda	%f0,[%g1]0xe0	! block store and commit
       
   136 +	.word	0x8143e040	!membar	#Sync
       
   137 +	ld	[%o0],%o4
       
   138 +	add	%o4,%g4,%g4
       
   139 +	.word	0xc9e2100c	!cas	[%o0],%o4,%g4
       
   140 +	subcc	%o1,1,%o1				! --$cnt
       
   141 +	bnz	.Loop
       
   142 +	add	%o0,4,%o0				! ++$out
       
   143 +
       
   144 +	retl
       
   145 +	mov	%o3,%o0
       
   146 +.type	_sparcv9_vis1_instrument_bus,#function
       
   147 +.size	_sparcv9_vis1_instrument_bus,.-_sparcv9_vis1_instrument_bus
       
   148 +
       
   149 +.global	_sparcv9_vis1_instrument_bus2
       
   150 +.align	8
       
   151 +_sparcv9_vis1_instrument_bus2:
       
   152 +	mov	%o1,%o3					! save cnt
       
   153 +	sll	%o1,2,%o1				! cnt*=4
       
   154 +
       
   155 +	.word	0x99410000	!rd	%tick,%o4	! tick
       
   156 +	mov	%o4,%o5					! lasttick = tick
       
   157 +	set	0,%g4					! diff
       
   158 +
       
   159 +	andn	%o0,63,%g1
       
   160 +	.word	0xc1985e00	!ldda	[%g1]0xf0,%f0	! block load
       
   161 +	.word	0x8143e040	!membar	#Sync
       
   162 +	.word	0xc1b85c00	!stda	%f0,[%g1]0xe0	! block store and commit
       
   163 +	.word	0x8143e040	!membar	#Sync
       
   164 +	ld	[%o0],%o4
       
   165 +	add	%o4,%g4,%g4
       
   166 +	.word	0xc9e2100c	!cas	[%o0],%o4,%g4
       
   167 +
       
   168 +	.word	0x99410000	!rd	%tick,%o4	! tick
       
   169 +	sub	%o4,%o5,%g4				! diff=tick-lasttick
       
   170 +	mov	%o4,%o5					! lasttick=tick
       
   171 +	mov	%g4,%g5					! lastdiff=diff
       
   172 +.Loop2:
       
   173 +	andn	%o0,63,%g1
       
   174 +	.word	0xc1985e00	!ldda	[%g1]0xf0,%f0	! block load
       
   175 +	.word	0x8143e040	!membar	#Sync
       
   176 +	.word	0xc1b85c00	!stda	%f0,[%g1]0xe0	! block store and commit
       
   177 +	.word	0x8143e040	!membar	#Sync
       
   178 +	ld	[%o0],%o4
       
   179 +	add	%o4,%g4,%g4
       
   180 +	.word	0xc9e2100c	!cas	[%o0],%o4,%g4
       
   181 +
       
   182 +	subcc	%o2,1,%o2				! --max
       
   183 +	bz	.Ldone2
       
   184 +	nop
       
   185 +
       
   186 +	.word	0x99410000	!rd	%tick,%o4	! tick
       
   187 +	sub	%o4,%o5,%g4				! diff=tick-lasttick
       
   188 +	mov	%o4,%o5					! lasttick=tick
       
   189 +	cmp	%g4,%g5
       
   190 +	mov	%g4,%g5					! lastdiff=diff
       
   191 +
       
   192 +	.word	0x83408000	!rd	%ccr,%g1
       
   193 +	and	%g1,4,%g1				! isolate zero flag
       
   194 +	xor	%g1,4,%g1				! flip zero flag
       
   195 +
       
   196 +	subcc	%o1,%g1,%o1				! conditional --$cnt
       
   197 +	bnz	.Loop2
       
   198 +	add	%o0,%g1,%o0				! conditional ++$out
       
   199 +
       
   200 +.Ldone2:
       
   201 +	srl	%o1,2,%o1
       
   202 +	retl
       
   203 +	sub	%o3,%o1,%o0
       
   204 +.type	_sparcv9_vis1_instrument_bus2,#function
       
   205 +.size	_sparcv9_vis1_instrument_bus2,.-_sparcv9_vis1_instrument_bus2
       
   206 +
       
   207  .section	".init",#alloc,#execinstr
       
   208  	call	solaris_locking_setup
       
   209  	nop
       
   210 Index: crypto/sparcv9cap.c
       
   211 ===================================================================
       
   212 diff -ru openssl-1.0.1e/crypto/sparcv9cap.c openssl-1.0.1e/crypto/sparcv9cap.c
       
   213 --- openssl-1.0.1e/crypto/sparcv9cap.c 2011-05-24 17:02:24.000000000 -0700
       
   214 +++ openssl-1.0.1e/crypto/sparcv9cap.c 2011-07-27 10:48:17.817470000 -0700
       
   215 @@ -4,31 +4,55 @@
       
   216  #include <setjmp.h>
       
   217  #include <signal.h>
       
   218  #include <sys/time.h>
       
   219 +#include <unistd.h>
       
   220  #include <openssl/bn.h>
       
   221  
       
   222 -#define SPARCV9_TICK_PRIVILEGED	(1<<0)
       
   223 -#define SPARCV9_PREFER_FPU	(1<<1)
       
   224 -#define SPARCV9_VIS1		(1<<2)
       
   225 -#define SPARCV9_VIS2		(1<<3)	/* reserved */
       
   226 -#define SPARCV9_FMADD		(1<<4)	/* reserved for SPARC64 V */
       
   227 +#include "sparc_arch.h"
       
   228  
       
   229 +#if defined(__GNUC__) && defined(__linux)
       
   230 +__attribute__((visibility("hidden")))
       
   231 +#endif
       
   232  #ifndef	_BOOT
       
   233 -static int OPENSSL_sparcv9cap_P=SPARCV9_TICK_PRIVILEGED;
       
   234 +unsigned int OPENSSL_sparcv9cap_P[2]={SPARCV9_TICK_PRIVILEGED,0};
       
   235  #else
       
   236 -static int OPENSSL_sparcv9cap_P = SPARCV9_VIS1;
       
   237 +unsigned int OPENSSL_sparcv9cap_P[2]={SPARCV9_VIS1,0};
       
   238  #endif
       
   239  
       
   240  int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num)
       
   241  	{
       
   242 +	int bn_mul_mont_vis3(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num);
       
   243  	int bn_mul_mont_fpu(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num);
       
   244  	int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num);
       
   245  
       
   246 -	if (num>=8 && !(num&1) &&
       
   247 -	    (OPENSSL_sparcv9cap_P&(SPARCV9_PREFER_FPU|SPARCV9_VIS1)) ==
       
   248 -		(SPARCV9_PREFER_FPU|SPARCV9_VIS1))
       
   249 -		return bn_mul_mont_fpu(rp,ap,bp,np,n0,num);
       
   250 -	else
       
   251 -		return bn_mul_mont_int(rp,ap,bp,np,n0,num);
       
   252 +	if (!(num&1) && num>=6)
       
   253 +		{
       
   254 +		if ((num&15)==0 && num<=64 &&
       
   255 +		    (OPENSSL_sparcv9cap_P[1]&(CFR_MONTMUL|CFR_MONTSQR))== 
       
   256 +		    			     (CFR_MONTMUL|CFR_MONTSQR))
       
   257 +			{
       
   258 +			typedef int (*bn_mul_mont_f)(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0);
       
   259 +			int bn_mul_mont_t4_8(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0);
       
   260 +			int bn_mul_mont_t4_16(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0);
       
   261 +			int bn_mul_mont_t4_24(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0);
       
   262 +			int bn_mul_mont_t4_32(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0);
       
   263 +			static const bn_mul_mont_f funcs[4] = {
       
   264 +				bn_mul_mont_t4_8,	bn_mul_mont_t4_16,
       
   265 +				bn_mul_mont_t4_24,	bn_mul_mont_t4_32 };
       
   266 +			bn_mul_mont_f worker = funcs[num/16-1];
       
   267 +
       
   268 +			if ((*worker)(rp,ap,bp,np,n0)) return 1;
       
   269 +			/* retry once and fall back */
       
   270 +			if ((*worker)(rp,ap,bp,np,n0)) return 1;
       
   271 +			return bn_mul_mont_vis3(rp,ap,bp,np,n0,num);
       
   272 +			}
       
   273 +		if ((OPENSSL_sparcv9cap_P[0]&SPARCV9_VIS3))
       
   274 +			return bn_mul_mont_vis3(rp,ap,bp,np,n0,num);
       
   275 +		else if (num>=8 &&
       
   276 +			(OPENSSL_sparcv9cap_P[0]&(SPARCV9_PREFER_FPU|SPARCV9_VIS1)) ==
       
   277 +			(SPARCV9_PREFER_FPU|SPARCV9_VIS1))
       
   278 +			return bn_mul_mont_fpu(rp,ap,bp,np,n0,num);
       
   279 +		}
       
   280 +	return bn_mul_mont_int(rp,ap,bp,np,n0,num);
       
   281  	}
       
   282  
       
   283  unsigned long	_sparcv9_rdtick(void);
       
   284 @@ -36,11 +60,18 @@
       
   285  unsigned long	_sparcv9_vis1_instrument(void);
       
   286  void		_sparcv9_vis2_probe(void);
       
   287  void		_sparcv9_fmadd_probe(void);
       
   288 +unsigned long	_sparcv9_rdcfr(void);
       
   289 +void		_sparcv9_vis3_probe(void);
       
   290 +unsigned long	_sparcv9_random(void);
       
   291 +#ifndef _BOOT
       
   292 +size_t 	_sparcv9_vis1_instrument_bus(unsigned int *,size_t);
       
   293 +size_t		_sparcv9_vis1_instrument_bus2(unsigned int *,size_t,size_t);
       
   294 +#endif
       
   295  
       
   296  #ifndef _BOOT
       
   297  unsigned long OPENSSL_rdtsc(void)
       
   298  	{
       
   299 -	if (OPENSSL_sparcv9cap_P&SPARCV9_TICK_PRIVILEGED)
       
   300 +	if (OPENSSL_sparcv9cap_P[0]&SPARCV9_TICK_PRIVILEGED)
       
   301  #if defined(__sun) && defined(__SVR4)
       
   302  		return gethrtime();
       
   303  #else
       
   304 @@ -49,6 +80,24 @@
       
   305  	else
       
   306  		return _sparcv9_rdtick();
       
   307  	}
       
   308 +
       
   309 +size_t OPENSSL_instrument_bus(unsigned int *out,size_t cnt)
       
   310 +	{
       
   311 +	if ((OPENSSL_sparcv9cap_P[0]&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK)) ==
       
   312 +			SPARCV9_BLK)
       
   313 +		return _sparcv9_vis1_instrument_bus(out,cnt);
       
   314 +	else
       
   315 +		return 0;
       
   316 +	}
       
   317 +
       
   318 +size_t OPENSSL_instrument_bus2(unsigned int *out,size_t cnt,size_t max)
       
   319 +	{
       
   320 +	if ((OPENSSL_sparcv9cap_P[0]&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK)) ==
       
   321 +			SPARCV9_BLK)
       
   322 +		return _sparcv9_vis1_instrument_bus2(out,cnt,max);
       
   323 +	else
       
   324 +		return 0;
       
   325 +	}
       
   326  #endif
       
   327 
       
   328  #if defined(_BOOT)
       
   329 @@ -58,7 +107,7 @@
       
   330   */
       
   331  void OPENSSL_cpuid_setup(void)
       
   332  	{
       
   333 -	OPENSSL_sparcv9cap_P = SPARCV9_VIS1;
       
   334 +	OPENSSL_sparcv9cap_P[0] = SPARCV9_VIS1;
       
   335  	}
       
   336  
       
   337  #elif 0 && defined(__sun) && defined(__SVR4)
       
   338 @@ -85,11 +116,11 @@
       
   339  	if (!strcmp (name,"SUNW,UltraSPARC") ||
       
   340  	    !strncmp(name,"SUNW,UltraSPARC-I",17))  /* covers II,III,IV */
       
   341  		{
       
   342 -		OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU|SPARCV9_VIS1;
       
   343 +		OPENSSL_sparcv9cap_P[0] |= SPARCV9_PREFER_FPU|SPARCV9_VIS1;
       
   344  
       
   345  		/* %tick is privileged only on UltraSPARC-I/II, but not IIe */
       
   346  		if (name[14]!='\0' && name[17]!='\0' && name[18]!='\0')
       
   347 -			OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED;
       
   348 +			OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED;
       
   349  
       
   350  		return DI_WALK_TERMINATE;
       
   351  		}
       
   352 @@ -96,7 +127,7 @@
       
   353  	/* This is expected to catch remaining UltraSPARCs, such as T1 */
       
   354  	else if (!strncmp(name,"SUNW,UltraSPARC",15))
       
   355  		{
       
   356 -		OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED;
       
   357 +		OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED;
       
   358  
       
   359  		return DI_WALK_TERMINATE;
       
   360  		}
       
   361 @@ -115,7 +146,7 @@
       
   362  
       
   363  	if ((e=getenv("OPENSSL_sparcv9cap")))
       
   364  		{
       
   365 -		OPENSSL_sparcv9cap_P=strtoul(e,NULL,0);
       
   366 +		OPENSSL_sparcv9cap_P[0]=strtoul(e,NULL,0);
       
   367  		return;
       
   368  		}
       
   369  
       
   370 @@ -123,17 +154,17 @@
       
   371  		{
       
   372  		if (strcmp(si,"sun4v"))
       
   373  			/* FPU is preferred for all CPUs, but US-T1/2 */
       
   374 -			OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU;
       
   375 +			OPENSSL_sparcv9cap_P[0] |= SPARCV9_PREFER_FPU;
       
   376  		}
       
   377  
       
   378  	if (sysinfo(SI_ISALIST,si,sizeof(si))>0)
       
   379  		{
       
   380  		if (strstr(si,"+vis"))
       
   381 -			OPENSSL_sparcv9cap_P |= SPARCV9_VIS1;
       
   382 +			OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS1|SPARCV9_BLK;
       
   383  		if (strstr(si,"+vis2"))
       
   384  			{
       
   385 -			OPENSSL_sparcv9cap_P |= SPARCV9_VIS2;
       
   386 -			OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED;
       
   387 +			OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS2;
       
   388 +			OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED;
       
   389  			return;
       
   390  			}
       
   391  		}
       
   392 @@ -193,12 +224,14 @@
       
   393   
       
   394  	if ((e=getenv("OPENSSL_sparcv9cap")))
       
   395  		{
       
   396 -		OPENSSL_sparcv9cap_P=strtoul(e,NULL,0);
       
   397 +		OPENSSL_sparcv9cap_P[0]=strtoul(e,NULL,0);
       
   398 +		if ((e=strchr(e,':')))
       
   399 +			OPENSSL_sparcv9cap_P[1]=strtoul(e+1,NULL,0);
       
   400  		return;
       
   401  		}
       
   402  
       
   403  	/* Initial value, fits UltraSPARC-I&II... */
       
   404 -	OPENSSL_sparcv9cap_P = SPARCV9_PREFER_FPU|SPARCV9_TICK_PRIVILEGED;
       
   405 +	OPENSSL_sparcv9cap_P[0] = SPARCV9_PREFER_FPU|SPARCV9_TICK_PRIVILEGED;
       
   406  
       
   407  	sigfillset(&all_masked);
       
   408  	sigdelset(&all_masked,SIGILL);
       
   409 @@ -221,20 +254,20 @@
       
   410  	if (sigsetjmp(common_jmp,1) == 0)
       
   411  		{
       
   412  		_sparcv9_rdtick();
       
   413 -		OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED;
       
   414 +		OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED;
       
   415  		}
       
   416  
       
   417  	if (sigsetjmp(common_jmp,1) == 0)
       
   418  		{
       
   419  		_sparcv9_vis1_probe();
       
   420 -		OPENSSL_sparcv9cap_P |= SPARCV9_VIS1;
       
   421 +		OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS1|SPARCV9_BLK;
       
   422  		/* detect UltraSPARC-Tx, see sparccpud.S for details... */
       
   423  		if (_sparcv9_vis1_instrument() >= 12)
       
   424 -			OPENSSL_sparcv9cap_P &= ~(SPARCV9_VIS1|SPARCV9_PREFER_FPU);
       
   425 +			OPENSSL_sparcv9cap_P[0] &= ~(SPARCV9_VIS1|SPARCV9_PREFER_FPU);
       
   426  		else
       
   427  			{
       
   428  			_sparcv9_vis2_probe();
       
   429 -			OPENSSL_sparcv9cap_P |= SPARCV9_VIS2;
       
   430 +			OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS2;
       
   431  			}
       
   432  		}
       
   433  
       
   434 @@ -241,13 +274,53 @@
       
   435  	if (sigsetjmp(common_jmp,1) == 0)
       
   436  		{
       
   437  		_sparcv9_fmadd_probe();
       
   438 -		OPENSSL_sparcv9cap_P |= SPARCV9_FMADD;
       
   439 +		OPENSSL_sparcv9cap_P[0] |= SPARCV9_FMADD;
       
   440  		}
       
   441  
       
   442 +	/*
       
   443 +	 * VIS3 flag is tested independently from VIS1, unlike VIS2 that is,
       
   444 +	 * because VIS3 defines even integer instructions.
       
   445 +	 */
       
   446 +	if (sigsetjmp(common_jmp,1) == 0)
       
   447 +		{
       
   448 +		_sparcv9_vis3_probe();
       
   449 +		OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS3;
       
   450 +		}
       
   451 +
       
   452 +	if (sigsetjmp(common_jmp,1) == 0)
       
   453 +		{
       
   454 +		(void)_sparcv9_random();
       
   455 +		OPENSSL_sparcv9cap_P[0] |= SPARCV9_RANDOM;
       
   456 +		}
       
   457 +
       
   458 +	/*
       
   459 +	 * In wait for better solution _sparcv9_rdcfr is masked by
       
   460 +	 * VIS3 flag, because it goes to uninterruptable endless
       
   461 +	 * loop on UltraSPARC II running Solaris. Things might be
       
   462 +	 * different on Linux...
       
   463 +	 */
       
   464 +	if ((OPENSSL_sparcv9cap_P[0]&SPARCV9_VIS3) &&
       
   465 +	    sigsetjmp(common_jmp,1) == 0)
       
   466 +		{
       
   467 +		OPENSSL_sparcv9cap_P[1] = (unsigned int)_sparcv9_rdcfr();
       
   468 +		}
       
   469 +
       
   470  	sigaction(SIGBUS,&bus_oact,NULL);
       
   471  	sigaction(SIGILL,&ill_oact,NULL);
       
   472  
       
   473  	sigprocmask(SIG_SETMASK,&oset,NULL);
       
   474 +
       
   475 +	if (sizeof(size_t)==8)
       
   476 +		OPENSSL_sparcv9cap_P[0] |= SPARCV9_64BIT_STACK;
       
   477 +#ifdef __linux
       
   478 +	else
       
   479 +		{
       
   480 +		int ret = syscall(340);
       
   481 +
       
   482 +		if (ret>=0 && ret&1)
       
   483 +			OPENSSL_sparcv9cap_P[0] |= SPARCV9_64BIT_STACK;
       
   484 +		}
       
   485 +#endif
       
   486  	}
       
   487  
       
   488  #endif
       
   489 Index: crypto/md5/Makefile
       
   490 ===================================================================
       
   491 diff -ru openssl-1.0.1e/crypto/md5/Makefile openssl-1.0.1e/crypto/md5/Makefile
       
   492 --- openssl-1.0.1e/crypto/md5/Makefile    2011-05-24 17:02:24.000000000 -0700
       
   493 +++ openssl-1.0.1e/crypto/md5/Makefile    2011-07-27 10:48:17.817470000 -0700
       
   494 @@ -52,6 +52,9 @@
       
   495  	$(CC) $(CFLAGS) -E asm/md5-ia64.S | \
       
   496  	$(PERL) -ne 's/;\s+/;\n/g; print;' > $@
       
   497  
       
   498 +md5-sparcv9.S:	asm/md5-sparcv9.pl
       
   499 +	$(PERL) asm/md5-sparcv9.pl $@ $(CFLAGS)
       
   500 +
       
   501  files:
       
   502  	$(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
       
   503  
       
   504 Index: crypto/md5/md5_locl.h
       
   505 ===================================================================
       
   506 diff -ru openssl-1.0.1e/crypto/md5/md5_locl.h openssl-1.0.1e/crypto/md5/md5_locl.h
       
   507 --- openssl-1.0.1e/crypto/md5/md5_locl.h    2011-05-24 17:02:24.000000000 -0700
       
   508 +++ openssl-1.0.1e/crypto/md5/md5_locl.h    2011-07-27 10:48:17.817470000 -0700
       
   509 @@ -71,6 +71,8 @@
       
   510  #  define md5_block_data_order md5_block_asm_data_order
       
   511  # elif defined(__ia64) || defined(__ia64__) || defined(_M_IA64)
       
   512  #  define md5_block_data_order md5_block_asm_data_order
       
   513 +# elif defined(__sparc) || defined(__sparc__)
       
   514 +#  define md5_block_data_order md5_block_asm_data_order
       
   515  # endif
       
   516  #endif
       
   517 
       
   518 Index: crypto/sha/Makefile
       
   519 ===================================================================
       
   520 diff -ru openssl-1.0.1e/crypto/sha/Makefile openssl-1.0.1e/crypto/sha/Makefile
       
   521 --- openssl-1.0.1e/crypto/sha/Makefile    2011-05-24 17:02:24.000000000 -0700
       
   522 +++ openssl-1.0.1e/crypto/sha/Makefile    2011-07-27 10:48:17.817470000 -0700
       
   523 @@ -68,9 +68,9 @@
       
   524  sha1-x86_64.s:	asm/sha1-x86_64.pl;	$(PERL) asm/sha1-x86_64.pl $(PERLASM_SCHEME) > $@
       
   525  sha256-x86_64.s:asm/sha512-x86_64.pl;	$(PERL) asm/sha512-x86_64.pl $(PERLASM_SCHEME) $@
       
   526  sha512-x86_64.s:asm/sha512-x86_64.pl;	$(PERL) asm/sha512-x86_64.pl $(PERLASM_SCHEME) $@
       
   527 -sha1-sparcv9.s:	asm/sha1-sparcv9.pl;	$(PERL) asm/sha1-sparcv9.pl $@ $(CFLAGS)
       
   528 -sha256-sparcv9.s:asm/sha512-sparcv9.pl;	$(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS)
       
   529 -sha512-sparcv9.s:asm/sha512-sparcv9.pl;	$(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS)
       
   530 +sha1-sparcv9.S:	asm/sha1-sparcv9.pl;	$(PERL) asm/sha1-sparcv9.pl $@ $(CFLAGS)
       
   531 +sha256-sparcv9.S:asm/sha512-sparcv9.pl;	$(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS)
       
   532 +sha512-sparcv9.S:asm/sha512-sparcv9.pl;	$(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS)
       
   533  
       
   534  sha1-ppc.s:	asm/sha1-ppc.pl;	$(PERL) asm/sha1-ppc.pl $(PERLASM_SCHEME) $@
       
   535  sha256-ppc.s:	asm/sha512-ppc.pl;	$(PERL) asm/sha512-ppc.pl $(PERLASM_SCHEME) $@
       
   536 Index: crypto/sha/asm/sha1-sparcv9.pl
       
   537 ===================================================================
       
   538 diff -ru openssl-1.0.1e/crypto/sha/asm/sha1-sparcv9.pl openssl-1.0.1e/crypto/sha/asm/sha1-sparcv9.pl
       
   539 --- openssl-1.0.1e/crypto/sha/asm/sha1-sparcv9.pl 2011-05-24 17:02:24.000000000 -0700
       
   540 +++ openssl-1.0.1e/crypto/sha/asm/sha1-sparcv9.pl 2011-07-27 10:48:17.817470000 -0700
       
   541 @@ -5,6 +5,8 @@
       
   542  # project. The module is, however, dual licensed under OpenSSL and
       
   543  # CRYPTOGAMS licenses depending on where you obtain it. For further
       
   544  # details see http://www.openssl.org/~appro/cryptogams/.
       
   545 +#
       
   546 +# Hardware SPARC T4 support by David S. Miller <[email protected]>.
       
   547  # ====================================================================
       
   548  
       
   549  # Performance improvement is not really impressive on pre-T1 CPU: +8%
       
   550 @@ -18,6 +20,11 @@
       
   551  # ensure scalability on UltraSPARC T1, or rather to avoid decay when
       
   552  # amount of active threads exceeds the number of physical cores.
       
   553  
       
   554 +# SPARC T4 SHA1 hardware achieves 3.72 cycles per byte, which is 3.1x
       
   555 +# faster than software. Multi-process benchmark saturates at 11x
       
   556 +# single-process result on 8-core processor, or ~9GBps per 2.85GHz
       
   557 +# socket.
       
   558 +
       
   559  $bits=32;
       
   560  for (@ARGV)	{ $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
       
   561  if ($bits==64)	{ $bias=2047; $frame=192; }
       
   562 @@ -183,11 +190,93 @@
       
   563  .register	%g3,#scratch
       
   564  ___
       
   565  $code.=<<___;
       
   566 +#include "sparc_arch.h"
       
   567 +
       
   568  .section	".text",#alloc,#execinstr
       
   569  
       
   570 +#ifdef __PIC__
       
   571 +SPARC_PIC_THUNK(%g1)
       
   572 +#endif
       
   573 +
       
   574  .align	32
       
   575  .globl	sha1_block_data_order
       
   576  sha1_block_data_order:
       
   577 +	SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
       
   578 +	ld	[%g1+4],%g1		! OPENSSL_sparcv9cap_P[1]
       
   579 +
       
   580 +	andcc	%g1, CFR_SHA1, %g0
       
   581 +	be	.Lsoftware
       
   582 +	nop
       
   583 +
       
   584 +	ld	[%o0 + 0x00], %f0	! load context
       
   585 +	ld	[%o0 + 0x04], %f1
       
   586 +	ld	[%o0 + 0x08], %f2
       
   587 +	andcc	%o1, 0x7, %g0
       
   588 +	ld	[%o0 + 0x0c], %f3
       
   589 +	bne,pn	%icc, .Lhwunaligned
       
   590 +	 ld	[%o0 + 0x10], %f4
       
   591 +
       
   592 +.Lhw_loop:
       
   593 +	ldd	[%o1 + 0x00], %f8
       
   594 +	ldd	[%o1 + 0x08], %f10
       
   595 +	ldd	[%o1 + 0x10], %f12
       
   596 +	ldd	[%o1 + 0x18], %f14
       
   597 +	ldd	[%o1 + 0x20], %f16
       
   598 +	ldd	[%o1 + 0x28], %f18
       
   599 +	ldd	[%o1 + 0x30], %f20
       
   600 +	subcc	%o2, 1, %o2		! done yet? 
       
   601 +	ldd	[%o1 + 0x38], %f22
       
   602 +	add	%o1, 0x40, %o1
       
   603 +
       
   604 +	.word	0x81b02820		! SHA1
       
   605 +
       
   606 +	bne,pt	`$bits==64?"%xcc":"%icc"`, .Lhw_loop
       
   607 +	nop
       
   608 +
       
   609 +.Lhwfinish:
       
   610 +	st	%f0, [%o0 + 0x00]	! store context
       
   611 +	st	%f1, [%o0 + 0x04]
       
   612 +	st	%f2, [%o0 + 0x08]
       
   613 +	st	%f3, [%o0 + 0x0c]
       
   614 +	retl
       
   615 +	st	%f4, [%o0 + 0x10]
       
   616 +
       
   617 +.align	8
       
   618 +.Lhwunaligned:
       
   619 +	alignaddr %o1, %g0, %o1
       
   620 +
       
   621 +	ldd	[%o1 + 0x00], %f10
       
   622 +.Lhwunaligned_loop:
       
   623 +	ldd	[%o1 + 0x08], %f12
       
   624 +	ldd	[%o1 + 0x10], %f14
       
   625 +	ldd	[%o1 + 0x18], %f16
       
   626 +	ldd	[%o1 + 0x20], %f18
       
   627 +	ldd	[%o1 + 0x28], %f20
       
   628 +	ldd	[%o1 + 0x30], %f22
       
   629 +	ldd	[%o1 + 0x38], %f24
       
   630 +	subcc	%o2, 1, %o2		! done yet?
       
   631 +	ldd	[%o1 + 0x40], %f26
       
   632 +	add	%o1, 0x40, %o1
       
   633 +
       
   634 +	faligndata %f10, %f12, %f8
       
   635 +	faligndata %f12, %f14, %f10
       
   636 +	faligndata %f14, %f16, %f12
       
   637 +	faligndata %f16, %f18, %f14
       
   638 +	faligndata %f18, %f20, %f16
       
   639 +	faligndata %f20, %f22, %f18
       
   640 +	faligndata %f22, %f24, %f20
       
   641 +	faligndata %f24, %f26, %f22
       
   642 +
       
   643 +	.word	0x81b02820		! SHA1
       
   644 +
       
   645 +	bne,pt	`$bits==64?"%xcc":"%icc"`, .Lhwunaligned_loop
       
   646 +	for	%f26, %f26, %f10	! %f10=%f26
       
   647 +
       
   648 +	ba	.Lhwfinish
       
   649 +	nop
       
   650 +
       
   651 +.align	16
       
   652 +.Lsoftware:
       
   653  	save	%sp,-$frame,%sp
       
   654  	sllx	$len,6,$len
       
   655  	add	$inp,$len,$len
       
   656 @@ -279,6 +368,62 @@
       
   657  .align	4
       
   658  ___
       
   659  
       
   660 -$code =~ s/\`([^\`]*)\`/eval $1/gem;
       
   661 -print $code;
       
   662 +# Purpose of these subroutines is to explicitly encode VIS instructions,
       
   663 +# so that one can compile the module without having to specify VIS
       
   664 +# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
       
   665 +# Idea is to reserve for option to produce "universal" binary and let
       
   666 +# programmer detect if current CPU is VIS capable at run-time.
       
   667 +sub unvis {
       
   668 +my ($mnemonic,$rs1,$rs2,$rd)=@_;
       
   669 +my $ref,$opf;
       
   670 +my %visopf = (	"faligndata"	=> 0x048,
       
   671 +		"for"		=> 0x07c	);
       
   672 +
       
   673 +    $ref = "$mnemonic\t$rs1,$rs2,$rd";
       
   674 +
       
   675 +    if ($opf=$visopf{$mnemonic}) {
       
   676 +	foreach ($rs1,$rs2,$rd) {
       
   677 +	    return $ref if (!/%f([0-9]{1,2})/);
       
   678 +	    $_=$1;
       
   679 +	    if ($1>=32) {
       
   680 +		return $ref if ($1&1);
       
   681 +		# re-encode for upper double register addressing
       
   682 +		$_=($1|$1>>5)&31;
       
   683 +	    }
       
   684 +	}
       
   685 +
       
   686 +	return	sprintf ".word\t0x%08x !%s",
       
   687 +			0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2,
       
   688 +			$ref;
       
   689 +    } else {
       
   690 +	return $ref;
       
   691 +    }
       
   692 +}
       
   693 +sub unalignaddr {
       
   694 +my ($mnemonic,$rs1,$rs2,$rd)=@_;
       
   695 +my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 );
       
   696 +my $ref="$mnemonic\t$rs1,$rs2,$rd";
       
   697 +
       
   698 +    foreach ($rs1,$rs2,$rd) {
       
   699 +	if (/%([goli])([0-7])/)	{ $_=$bias{$1}+$2; }
       
   700 +	else			{ return $ref; }
       
   701 +    }
       
   702 +    return  sprintf ".word\t0x%08x !%s",
       
   703 +		    0x81b00300|$rd<<25|$rs1<<14|$rs2,
       
   704 +		    $ref;
       
   705 +}
       
   706 +
       
   707 +foreach (split("\n",$code)) {
       
   708 +	s/\`([^\`]*)\`/eval $1/ge;
       
   709 +
       
   710 +	s/\b(f[^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/
       
   711 +		&unvis($1,$2,$3,$4)
       
   712 +	 /ge;
       
   713 +	s/\b(alignaddr)\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/
       
   714 +		&unalignaddr($1,$2,$3,$4)
       
   715 +	 /ge;
       
   716 +
       
   717 +	print $_,"\n";
       
   718 +}
       
   719 +
       
   720  close STDOUT;
       
   721 
       
   722 Index: crypto/sha/asm/sha512-sparcv9.pl
       
   723 ===================================================================
       
   724 diff -ru openssl-1.0.1e/crypto/sha/asm/sha512-sparcv9.pl openssl-1.0.1e/crypto/sha/asm/sha512-sparcv9.pl
       
   725 --- openssl-1.0.1e/crypto/sha/asm/sha512-sparcv9.pl 2011-05-24 17:02:24.000000000 -0700
       
   726 +++ openssl-1.0.1e/crypto/sha/asm/sha512-sparcv9.pl 2011-07-27 10:48:17.817470000 -0700
       
   727 @@ -5,6 +5,8 @@
       
   728  # project. The module is, however, dual licensed under OpenSSL and
       
   729  # CRYPTOGAMS licenses depending on where you obtain it. For further
       
   730  # details see http://www.openssl.org/~appro/cryptogams/.
       
   731 +#
       
   732 +# Hardware SPARC T4 support by David S. Miller <[email protected]>.
       
   733  # ====================================================================
       
   734  
       
   735  # SHA256 performance improvement over compiler generated code varies
       
   736 @@ -41,6 +43,12 @@
       
   737  #	loads are always slower than one 64-bit load. Once again this
       
   738  #	is unlike pre-T1 UltraSPARC, where, if scheduled appropriately,
       
   739  #	2x32-bit loads can be as fast as 1x64-bit ones.
       
   740 +#
       
   741 +# SPARC T4 SHA256/512 hardware achieves 3.17/2.01 cycles per byte,
       
   742 +# which is 9.3x/11.1x faster than software. Multi-process benchmark
       
   743 +# saturates at 11.5x single-process result on 8-core processor, or
       
   744 +# ~11/16GBps per 2.85GHz socket.
       
   745 +
       
   746  
       
   747  $bits=32;
       
   748  for (@ARGV)	{ $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
       
   749 @@ -386,6 +394,8 @@
       
   750  .register	%g3,#scratch
       
   751  ___
       
   752  $code.=<<___;
       
   753 +#include "sparc_arch.h"
       
   754 +
       
   755  .section	".text",#alloc,#execinstr
       
   756  
       
   757  .align	64
       
   758 @@ -457,8 +467,196 @@
       
   759  }
       
   760  $code.=<<___;
       
   761  .size	K${label},.-K${label}
       
   762 +
       
   763 +#ifdef __PIC__
       
   764 +SPARC_PIC_THUNK(%g1)
       
   765 +#endif
       
   766 +
       
   767  .globl	sha${label}_block_data_order
       
   768 +.align	32
       
   769  sha${label}_block_data_order:
       
   770 +	SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
       
   771 +	ld	[%g1+4],%g1		! OPENSSL_sparcv9cap_P[1]
       
   772 +
       
   773 +	andcc	%g1, CFR_SHA${label}, %g0
       
   774 +	be	.Lsoftware
       
   775 +	nop
       
   776 +___
       
   777 +$code.=<<___ if ($SZ==8); 		# SHA512
       
   778 +	ldd	[%o0 + 0x00], %f0	! load context
       
   779 +	ldd	[%o0 + 0x08], %f2
       
   780 +	ldd	[%o0 + 0x10], %f4
       
   781 +	ldd	[%o0 + 0x18], %f6
       
   782 +	ldd	[%o0 + 0x20], %f8
       
   783 +	ldd	[%o0 + 0x28], %f10
       
   784 +	andcc	%o1, 0x7, %g0
       
   785 +	ldd	[%o0 + 0x30], %f12
       
   786 +	bne,pn	%icc, .Lhwunaligned
       
   787 +	 ldd	[%o0 + 0x38], %f14
       
   788 +
       
   789 +.Lhwaligned_loop:
       
   790 +	ldd	[%o1 + 0x00], %f16
       
   791 +	ldd	[%o1 + 0x08], %f18
       
   792 +	ldd	[%o1 + 0x10], %f20
       
   793 +	ldd	[%o1 + 0x18], %f22
       
   794 +	ldd	[%o1 + 0x20], %f24
       
   795 +	ldd	[%o1 + 0x28], %f26
       
   796 +	ldd	[%o1 + 0x30], %f28
       
   797 +	ldd	[%o1 + 0x38], %f30
       
   798 +	ldd	[%o1 + 0x40], %f32
       
   799 +	ldd	[%o1 + 0x48], %f34
       
   800 +	ldd	[%o1 + 0x50], %f36
       
   801 +	ldd	[%o1 + 0x58], %f38
       
   802 +	ldd	[%o1 + 0x60], %f40
       
   803 +	ldd	[%o1 + 0x68], %f42
       
   804 +	ldd	[%o1 + 0x70], %f44
       
   805 +	subcc	%o2, 1, %o2		! done yet?
       
   806 +	ldd	[%o1 + 0x78], %f46
       
   807 +	add	%o1, 0x80, %o1
       
   808 +
       
   809 +	.word	0x81b02860		! SHA512
       
   810 +
       
   811 +	bne,pt	`$bits==64?"%xcc":"%icc"`, .Lhwaligned_loop
       
   812 +	nop
       
   813 +
       
   814 +.Lhwfinish:
       
   815 +	std	%f0, [%o0 + 0x00]	! store context
       
   816 +	std	%f2, [%o0 + 0x08]
       
   817 +	std	%f4, [%o0 + 0x10]
       
   818 +	std	%f6, [%o0 + 0x18]
       
   819 +	std	%f8, [%o0 + 0x20]
       
   820 +	std	%f10, [%o0 + 0x28]
       
   821 +	std	%f12, [%o0 + 0x30]
       
   822 +	retl
       
   823 +	 std	%f14, [%o0 + 0x38]
       
   824 +
       
   825 +.align	16
       
   826 +.Lhwunaligned:
       
   827 +	alignaddr %o1, %g0, %o1
       
   828 +
       
   829 +	ldd	[%o1 + 0x00], %f18
       
   830 +.Lhwunaligned_loop:
       
   831 +	ldd	[%o1 + 0x08], %f20
       
   832 +	ldd	[%o1 + 0x10], %f22
       
   833 +	ldd	[%o1 + 0x18], %f24
       
   834 +	ldd	[%o1 + 0x20], %f26
       
   835 +	ldd	[%o1 + 0x28], %f28
       
   836 +	ldd	[%o1 + 0x30], %f30
       
   837 +	ldd	[%o1 + 0x38], %f32
       
   838 +	ldd	[%o1 + 0x40], %f34
       
   839 +	ldd	[%o1 + 0x48], %f36
       
   840 +	ldd	[%o1 + 0x50], %f38
       
   841 +	ldd	[%o1 + 0x58], %f40
       
   842 +	ldd	[%o1 + 0x60], %f42
       
   843 +	ldd	[%o1 + 0x68], %f44
       
   844 +	ldd	[%o1 + 0x70], %f46
       
   845 +	ldd	[%o1 + 0x78], %f48
       
   846 +	subcc	%o2, 1, %o2		! done yet?
       
   847 +	ldd	[%o1 + 0x80], %f50
       
   848 +	add	%o1, 0x80, %o1
       
   849 +
       
   850 +	faligndata %f18, %f20, %f16
       
   851 +	faligndata %f20, %f22, %f18
       
   852 +	faligndata %f22, %f24, %f20
       
   853 +	faligndata %f24, %f26, %f22
       
   854 +	faligndata %f26, %f28, %f24
       
   855 +	faligndata %f28, %f30, %f26
       
   856 +	faligndata %f30, %f32, %f28
       
   857 +	faligndata %f32, %f34, %f30
       
   858 +	faligndata %f34, %f36, %f32
       
   859 +	faligndata %f36, %f38, %f34
       
   860 +	faligndata %f38, %f40, %f36
       
   861 +	faligndata %f40, %f42, %f38
       
   862 +	faligndata %f42, %f44, %f40
       
   863 +	faligndata %f44, %f46, %f42
       
   864 +	faligndata %f46, %f48, %f44
       
   865 +	faligndata %f48, %f50, %f46
       
   866 +
       
   867 +	.word	0x81b02860		! SHA512
       
   868 +
       
   869 +	bne,pt	`$bits==64?"%xcc":"%icc"`, .Lhwunaligned_loop
       
   870 +	for	%f50, %f50, %f18	! %f18=%f50
       
   871 +
       
   872 +	ba	.Lhwfinish
       
   873 +	nop
       
   874 +___
       
   875 +$code.=<<___ if ($SZ==4); 		# SHA256
       
   876 +	ld	[%o0 + 0x00], %f0
       
   877 +	ld	[%o0 + 0x04], %f1
       
   878 +	ld	[%o0 + 0x08], %f2
       
   879 +	ld	[%o0 + 0x0c], %f3
       
   880 +	ld	[%o0 + 0x10], %f4
       
   881 +	ld	[%o0 + 0x14], %f5
       
   882 +	andcc	%o1, 0x7, %g0
       
   883 +	ld	[%o0 + 0x18], %f6
       
   884 +	bne,pn	%icc, .Lhwunaligned
       
   885 +	 ld	[%o0 + 0x1c], %f7
       
   886 +
       
   887 +.Lhwloop:
       
   888 +	ldd	[%o1 + 0x00], %f8
       
   889 +	ldd	[%o1 + 0x08], %f10
       
   890 +	ldd	[%o1 + 0x10], %f12
       
   891 +	ldd	[%o1 + 0x18], %f14
       
   892 +	ldd	[%o1 + 0x20], %f16
       
   893 +	ldd	[%o1 + 0x28], %f18
       
   894 +	ldd	[%o1 + 0x30], %f20
       
   895 +	subcc	%o2, 1, %o2		! done yet?
       
   896 +	ldd	[%o1 + 0x38], %f22
       
   897 +	add	%o1, 0x40, %o1
       
   898 +
       
   899 +	.word	0x81b02840		! SHA256
       
   900 +
       
   901 +	bne,pt	`$bits==64?"%xcc":"%icc"`, .Lhwloop
       
   902 +	nop
       
   903 +
       
   904 +.Lhwfinish:
       
   905 +	st	%f0, [%o0 + 0x00]	! store context
       
   906 +	st	%f1, [%o0 + 0x04]
       
   907 +	st	%f2, [%o0 + 0x08]
       
   908 +	st	%f3, [%o0 + 0x0c]
       
   909 +	st	%f4, [%o0 + 0x10]
       
   910 +	st	%f5, [%o0 + 0x14]
       
   911 +	st	%f6, [%o0 + 0x18]
       
   912 +	retl
       
   913 +	 st	%f7, [%o0 + 0x1c]
       
   914 +
       
   915 +.align	8
       
   916 +.Lhwunaligned:
       
   917 +	alignaddr %o1, %g0, %o1
       
   918 +
       
   919 +	ldd	[%o1 + 0x00], %f10
       
   920 +.Lhwunaligned_loop:
       
   921 +	ldd	[%o1 + 0x08], %f12
       
   922 +	ldd	[%o1 + 0x10], %f14
       
   923 +	ldd	[%o1 + 0x18], %f16
       
   924 +	ldd	[%o1 + 0x20], %f18
       
   925 +	ldd	[%o1 + 0x28], %f20
       
   926 +	ldd	[%o1 + 0x30], %f22
       
   927 +	ldd	[%o1 + 0x38], %f24
       
   928 +	subcc	%o2, 1, %o2		! done yet?
       
   929 +	ldd	[%o1 + 0x40], %f26
       
   930 +	add	%o1, 0x40, %o1
       
   931 +
       
   932 +	faligndata %f10, %f12, %f8
       
   933 +	faligndata %f12, %f14, %f10
       
   934 +	faligndata %f14, %f16, %f12
       
   935 +	faligndata %f16, %f18, %f14
       
   936 +	faligndata %f18, %f20, %f16
       
   937 +	faligndata %f20, %f22, %f18
       
   938 +	faligndata %f22, %f24, %f20
       
   939 +	faligndata %f24, %f26, %f22
       
   940 +
       
   941 +	.word	0x81b02840		! SHA256
       
   942 +
       
   943 +	bne,pt	`$bits==64?"%xcc":"%icc"`, .Lhwunaligned_loop
       
   944 +	for	%f26, %f26, %f10	! %f10=%f26
       
   945 +
       
   946 +	ba	.Lhwfinish
       
   947 +	nop
       
   948 +___
       
   949 +$code.=<<___;
       
   950 +.align	16
       
   951 +.Lsoftware:
       
   952  	save	%sp,`-$frame-$locals`,%sp
       
   953  	and	$inp,`$align-1`,$tmp31
       
   954  	sllx	$len,`log(16*$SZ)/log(2)`,$len
       
   955 @@ -589,6 +787,62 @@
       
   956  .align	4
       
   957  ___
       
   958  
       
   959 -$code =~ s/\`([^\`]*)\`/eval $1/gem;
       
   960 -print $code;
       
   961 +# Purpose of these subroutines is to explicitly encode VIS instructions,
       
   962 +# so that one can compile the module without having to specify VIS
       
   963 +# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
       
   964 +# Idea is to reserve for option to produce "universal" binary and let
       
   965 +# programmer detect if current CPU is VIS capable at run-time.
       
   966 +sub unvis {
       
   967 +my ($mnemonic,$rs1,$rs2,$rd)=@_;
       
   968 +my $ref,$opf;
       
   969 +my %visopf = (	"faligndata"	=> 0x048,
       
   970 +		"for"		=> 0x07c	);
       
   971 +
       
   972 +    $ref = "$mnemonic\t$rs1,$rs2,$rd";
       
   973 +
       
   974 +    if ($opf=$visopf{$mnemonic}) {
       
   975 +	foreach ($rs1,$rs2,$rd) {
       
   976 +	    return $ref if (!/%f([0-9]{1,2})/);
       
   977 +	    $_=$1;
       
   978 +	    if ($1>=32) {
       
   979 +		return $ref if ($1&1);
       
   980 +		# re-encode for upper double register addressing
       
   981 +		$_=($1|$1>>5)&31;
       
   982 +	    }
       
   983 +	}
       
   984 +
       
   985 +	return	sprintf ".word\t0x%08x !%s",
       
   986 +			0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2,
       
   987 +			$ref;
       
   988 +    } else {
       
   989 +	return $ref;
       
   990 +    }
       
   991 +}
       
   992 +sub unalignaddr {
       
   993 +my ($mnemonic,$rs1,$rs2,$rd)=@_;
       
   994 +my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 );
       
   995 +my $ref="$mnemonic\t$rs1,$rs2,$rd";
       
   996 +
       
   997 +    foreach ($rs1,$rs2,$rd) {
       
   998 +	if (/%([goli])([0-7])/)	{ $_=$bias{$1}+$2; }
       
   999 +	else			{ return $ref; }
       
  1000 +    }
       
  1001 +    return  sprintf ".word\t0x%08x !%s",
       
  1002 +		    0x81b00300|$rd<<25|$rs1<<14|$rs2,
       
  1003 +		    $ref;
       
  1004 +}
       
  1005 +
       
  1006 +foreach (split("\n",$code)) {
       
  1007 +	s/\`([^\`]*)\`/eval $1/ge;
       
  1008 +
       
  1009 +	s/\b(f[^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/
       
  1010 +		&unvis($1,$2,$3,$4)
       
  1011 +	 /ge;
       
  1012 +	s/\b(alignaddr)\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/
       
  1013 +		&unalignaddr($1,$2,$3,$4)
       
  1014 +	 /ge;
       
  1015 +
       
  1016 +	print $_,"\n";
       
  1017 +}
       
  1018 +
       
  1019  close STDOUT;
       
  1020 Index: crypto/des/Makefile
       
  1021 ===================================================================
       
  1022 diff -ru openssl-1.0.1e/crypto/des/Makefile.orig openssl-1.0.1e/crypto/des/Makefile
       
  1023 --- a/crypto/des/Makefile
       
  1024 +++ b/crypto/des/Makefile
       
  1025 @@ -61,6 +61,8 @@ des: des.o cbc3_enc.o lib
       
  1026  
       
  1027  des_enc-sparc.S:	asm/des_enc.m4
       
  1028  	m4 -B 8192 asm/des_enc.m4 > des_enc-sparc.S
       
  1029 +dest4-sparcv9.s:	asm/dest4-sparcv9.pl
       
  1030 +	$(PERL) asm/dest4-sparcv9.pl $(CFLAGS) > $@
       
  1031  
       
  1032  des-586.s:	asm/des-586.pl ../perlasm/x86asm.pl ../perlasm/cbc.pl
       
  1033  	$(PERL) asm/des-586.pl $(PERLASM_SCHEME) $(CFLAGS) > $@
       
  1034 Index: crypto/evp/e_des.c
       
  1035 ===================================================================
       
  1036 diff -ru openssl-1.0.1e/crypto/evp/e_des.c.orig openssl-1.0.1e/crypto/evp/e_des.c
       
  1037 --- a/crypto/evp/e_des.c
       
  1038 +++ b/crypto/evp/e_des.c
       
  1039 @@ -65,6 +65,30 @@
       
  1040  #include <openssl/des.h>
       
  1041  #include <openssl/rand.h>
       
  1042  
       
  1043 +typedef struct
       
  1044 +	{
       
  1045 +	union { double align; DES_key_schedule ks; } ks;
       
  1046 +	union {
       
  1047 +		void (*cbc)(const void *,void *,size_t,const void *,void *);
       
  1048 +	} stream;
       
  1049 +	} EVP_DES_KEY;
       
  1050 +
       
  1051 +#if defined(AES_ASM) && (defined(__sparc) || defined(__sparc__))
       
  1052 +/* ---------^^^ this is not a typo, just a way to detect that
       
  1053 + * assembler support was in general requested... */
       
  1054 +#include "sparc_arch.h"
       
  1055 +
       
  1056 +extern unsigned int OPENSSL_sparcv9cap_P[];
       
  1057 +
       
  1058 +#define SPARC_DES_CAPABLE	(OPENSSL_sparcv9cap_P[1] & CFR_DES)
       
  1059 +
       
  1060 +void	des_t4_key_expand(const void *key, DES_key_schedule *ks);
       
  1061 +void	des_t4_cbc_encrypt(const void *inp,void *out,size_t len,
       
  1062 +				DES_key_schedule *ks,unsigned char iv[8]);
       
  1063 +void	des_t4_cbc_decrypt(const void *inp,void *out,size_t len,
       
  1064 +				DES_key_schedule *ks,unsigned char iv[8]);
       
  1065 +#endif
       
  1066 +
       
  1067  static int des_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
       
  1068  			const unsigned char *iv, int enc);
       
  1069  static int des_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr);
       
  1070 @@ -99,6 +123,13 @@ static int des_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
       
  1071  static int des_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
       
  1072  			  const unsigned char *in, size_t inl)
       
  1073  {
       
  1074 +	EVP_DES_KEY *dat = (EVP_DES_KEY *)ctx->cipher_data;
       
  1075 +
       
  1076 +	if (dat->stream.cbc)
       
  1077 +		{
       
  1078 +		(*dat->stream.cbc)(in,out,inl,&dat->ks.ks,ctx->iv);
       
  1079 +		return 1;
       
  1080 +		}
       
  1081  	while(inl>=EVP_MAXCHUNK)
       
  1082  		{
       
  1083  		DES_ncbc_encrypt(in, out, (long)EVP_MAXCHUNK, ctx->cipher_data,
       
  1084 @@ -176,18 +207,18 @@
       
  1085      return 1;
       
  1086      }
       
  1087  
       
  1088 -BLOCK_CIPHER_defs(des, DES_key_schedule, NID_des, 8, 8, 8, 64,
       
  1089 +BLOCK_CIPHER_defs(des, EVP_DES_KEY, NID_des, 8, 8, 8, 64,
       
  1090  			EVP_CIPH_RAND_KEY, des_init_key, NULL,
       
  1091  			EVP_CIPHER_set_asn1_iv,
       
  1092  			EVP_CIPHER_get_asn1_iv,
       
  1093  			des_ctrl)
       
  1094  
       
  1095 -BLOCK_CIPHER_def_cfb(des,DES_key_schedule,NID_des,8,8,1,
       
  1096 +BLOCK_CIPHER_def_cfb(des,EVP_DES_KEY,NID_des,8,8,1,
       
  1097  		     EVP_CIPH_RAND_KEY, des_init_key,NULL,
       
  1098  		     EVP_CIPHER_set_asn1_iv,
       
  1099  		     EVP_CIPHER_get_asn1_iv,des_ctrl)
       
  1100  
       
  1101 -BLOCK_CIPHER_def_cfb(des,DES_key_schedule,NID_des,8,8,8,
       
  1102 +BLOCK_CIPHER_def_cfb(des,EVP_DES_KEY,NID_des,8,8,8,
       
  1103  		     EVP_CIPH_RAND_KEY,des_init_key,NULL,
       
  1104  		     EVP_CIPHER_set_asn1_iv,
       
  1105  		     EVP_CIPHER_get_asn1_iv,des_ctrl)
       
  1106 @@ -196,8 +227,25 @@ static int des_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
       
  1107  			const unsigned char *iv, int enc)
       
  1108  	{
       
  1109  	DES_cblock *deskey = (DES_cblock *)key;
       
  1110 +	EVP_DES_KEY *dat = (EVP_DES_KEY *)ctx->cipher_data;
       
  1111 +
       
  1112 +	dat->stream.cbc = NULL;
       
  1113 +#if defined(SPARC_DES_CAPABLE)
       
  1114 +	if (SPARC_DES_CAPABLE)
       
  1115 +		{
       
  1116 +		int mode = ctx->cipher->flags & EVP_CIPH_MODE;
       
  1117 +
       
  1118 +		if (mode == EVP_CIPH_CBC_MODE)
       
  1119 +			{
       
  1120 +			des_t4_key_expand(key,&dat->ks.ks);
       
  1121 +			dat->stream.cbc = enc ? des_t4_cbc_encrypt :
       
  1122 +						des_t4_cbc_decrypt;
       
  1123 +			return 1;
       
  1124 +			}
       
  1125 +		}
       
  1126 +#endif
       
  1127  #ifdef EVP_CHECK_DES_KEY
       
  1128 -	if(DES_set_key_checked(deskey,ctx->cipher_data) != 0)
       
  1129 +	if(DES_set_key_checked(deskey,dat->ks.ks) != 0)
       
  1130  		return 0;
       
  1131  #else
       
  1132  	DES_set_key_unchecked(deskey,ctx->cipher_data);
       
  1133 Index: crypto/evp/e_des3.c
       
  1134 ===================================================================
       
  1135 diff -ru openssl-1.0.1e/crypto/evp/e_des3.c.orig openssl-1.0.1e/crypto/evp/e_des3.c
       
  1136 --- a/crypto/evp/e_des3.c
       
  1137 +++ b/crypto/evp/e_des3.c
       
  1138 @@ -65,6 +65,33 @@
       
  1139  #include <openssl/des.h>
       
  1140  #include <openssl/rand.h>
       
  1141  
       
  1142 +typedef struct
       
  1143 +	{
       
  1144 +	union { double align; DES_key_schedule ks[3]; } ks;
       
  1145 +	union {
       
  1146 +		void (*cbc)(const void *,void *,size_t,const void *,void *);
       
  1147 +	} stream;
       
  1148 +	} DES_EDE_KEY;
       
  1149 +#define ks1 ks.ks[0]
       
  1150 +#define ks2 ks.ks[1]
       
  1151 +#define ks3 ks.ks[2]
       
  1152 +
       
  1153 +#if defined(AES_ASM) && (defined(__sparc) || defined(__sparc__))
       
  1154 +/* ---------^^^ this is not a typo, just a way to detect that
       
  1155 + * assembler support was in general requested... */
       
  1156 +#include "sparc_arch.h"
       
  1157 +
       
  1158 +extern unsigned int OPENSSL_sparcv9cap_P[];
       
  1159 +
       
  1160 +#define SPARC_DES_CAPABLE	(OPENSSL_sparcv9cap_P[1] & CFR_DES)
       
  1161 +
       
  1162 +void	des_t4_key_expand(const void *key, DES_key_schedule *ks);
       
  1163 +void	des_t4_ede3_cbc_encrypt(const void *inp,void *out,size_t len,
       
  1164 +				DES_key_schedule *ks,unsigned char iv[8]);
       
  1165 +void	des_t4_ede3_cbc_decrypt(const void *inp,void *out,size_t len,
       
  1166 +				DES_key_schedule *ks,unsigned char iv[8]);
       
  1167 +#endif
       
  1168 +
       
  1169  #ifndef OPENSSL_FIPS
       
  1170  
       
  1171  static int des_ede_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
       
  1172 @@ -75,13 +100,6 @@ static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
       
  1173  
       
  1174  static int des3_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr);
       
  1175  
       
  1176 -typedef struct
       
  1177 -    {
       
  1178 -    DES_key_schedule ks1;/* key schedule */
       
  1179 -    DES_key_schedule ks2;/* key schedule (for ede) */
       
  1180 -    DES_key_schedule ks3;/* key schedule (for ede3) */
       
  1181 -    } DES_EDE_KEY;
       
  1182 -
       
  1183  #define data(ctx) ((DES_EDE_KEY *)(ctx)->cipher_data)
       
  1184  
       
  1185  /* Because of various casts and different args can't use IMPLEMENT_BLOCK_CIPHER */
       
  1186 @@ -121,6 +141,8 @@ static int des_ede_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
       
  1187  static int des_ede_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
       
  1188  			      const unsigned char *in, size_t inl)
       
  1189  {
       
  1190 +	DES_EDE_KEY *dat = data(ctx);
       
  1191 +
       
  1192  #ifdef KSSL_DEBUG
       
  1193  	{
       
  1194          int i;
       
  1195 @@ -132,10 +154,16 @@
       
  1196  	printf("\n");
       
  1197  	}
       
  1198  #endif    /* KSSL_DEBUG */
       
  1199 +	if (dat->stream.cbc)
       
  1200 +		{
       
  1201 +		(*dat->stream.cbc)(in,out,inl,&dat->ks,ctx->iv);
       
  1202 +		return 1;
       
  1203 +		}
       
  1204 +
       
  1205  	while (inl>=EVP_MAXCHUNK)
       
  1206  		{
       
  1207  		DES_ede3_cbc_encrypt(in, out, (long)EVP_MAXCHUNK,
       
  1208 -			     &data(ctx)->ks1, &data(ctx)->ks2, &data(ctx)->ks3,
       
  1209 +			     &dat->ks1, &dat->ks2, &dat->ks3,
       
  1210  			     (DES_cblock *)ctx->iv, ctx->encrypt);
       
  1211  		inl-=EVP_MAXCHUNK;
       
  1212  		in +=EVP_MAXCHUNK;
       
  1213 @@ -143,7 +169,7 @@ static int des_ede_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
       
  1214  		}
       
  1215  	if (inl)
       
  1216  		DES_ede3_cbc_encrypt(in, out, (long)inl,
       
  1217 -			     &data(ctx)->ks1, &data(ctx)->ks2, &data(ctx)->ks3,
       
  1218 +			     &dat->ks1, &dat->ks2, &dat->ks3,
       
  1219                               (DES_cblock *)ctx->iv, ctx->encrypt);
       
  1220  	return 1;
       
  1221  }
       
  1222 @@ -208,9 +234,8 @@ static int des_ede3_cfb8_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
       
  1223      }
       
  1224  
       
  1225  BLOCK_CIPHER_defs(des_ede, DES_EDE_KEY, NID_des_ede, 8, 16, 8, 64,
       
  1226 -			EVP_CIPH_RAND_KEY, des_ede_init_key, NULL, 
       
  1227 -			EVP_CIPHER_set_asn1_iv,
       
  1228 -			EVP_CIPHER_get_asn1_iv,
       
  1229 +			EVP_CIPH_RAND_KEY|EVP_CIPH_FLAG_DEFAULT_ASN1,
       
  1230 +			des_ede_init_key, NULL, NULL, NULL,
       
  1231  			des3_ctrl)
       
  1232  
       
  1233  #define des_ede3_cfb64_cipher des_ede_cfb64_cipher
       
  1234 @@ -219,37 +246,53 @@
       
  1235  #define des_ede3_ecb_cipher des_ede_ecb_cipher
       
  1236  
       
  1237  BLOCK_CIPHER_defs(des_ede3, DES_EDE_KEY, NID_des_ede3, 8, 24, 8, 64,
       
  1238 -			EVP_CIPH_RAND_KEY, des_ede3_init_key, NULL, 
       
  1239 -			EVP_CIPHER_set_asn1_iv,
       
  1240 -			EVP_CIPHER_get_asn1_iv,
       
  1241 -			des3_ctrl)
       
  1242 +		EVP_CIPH_RAND_KEY|EVP_CIPH_FLAG_FIPS|EVP_CIPH_FLAG_DEFAULT_ASN1,
       
  1243 +		des_ede3_init_key, NULL, NULL, NULL,
       
  1244 +		des3_ctrl)
       
  1245  
       
  1246  BLOCK_CIPHER_def_cfb(des_ede3,DES_EDE_KEY,NID_des_ede3,24,8,1,
       
  1247 -		     EVP_CIPH_RAND_KEY, des_ede3_init_key,NULL,
       
  1248 -		     EVP_CIPHER_set_asn1_iv,
       
  1249 -		     EVP_CIPHER_get_asn1_iv,
       
  1250 -		     des3_ctrl)
       
  1251 +		EVP_CIPH_RAND_KEY|EVP_CIPH_FLAG_FIPS|EVP_CIPH_FLAG_DEFAULT_ASN1,
       
  1252 +		des_ede3_init_key, NULL, NULL, NULL,
       
  1253 +		des3_ctrl)
       
  1254  
       
  1255  BLOCK_CIPHER_def_cfb(des_ede3,DES_EDE_KEY,NID_des_ede3,24,8,8,
       
  1256 -		     EVP_CIPH_RAND_KEY, des_ede3_init_key,NULL,
       
  1257 -		     EVP_CIPHER_set_asn1_iv,
       
  1258 -		     EVP_CIPHER_get_asn1_iv,
       
  1259 -		     des3_ctrl)
       
  1260 +		EVP_CIPH_RAND_KEY|EVP_CIPH_FLAG_FIPS|EVP_CIPH_FLAG_DEFAULT_ASN1,
       
  1261 +		des_ede3_init_key, NULL, NULL, NULL,
       
  1262 +		des3_ctrl)
       
  1263  
       
  1264  static int des_ede_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
       
  1265  			    const unsigned char *iv, int enc)
       
  1266  	{
       
  1267  	DES_cblock *deskey = (DES_cblock *)key;
       
  1268 +	DES_EDE_KEY *dat = data(ctx);
       
  1269 +
       
  1270 +	dat->stream.cbc = NULL;
       
  1271 +#if defined(SPARC_DES_CAPABLE)
       
  1272 +	if (SPARC_DES_CAPABLE)
       
  1273 +		{
       
  1274 +		int mode = ctx->cipher->flags & EVP_CIPH_MODE;
       
  1275 +
       
  1276 +		if (mode == EVP_CIPH_CBC_MODE)
       
  1277 +			{
       
  1278 +			des_t4_key_expand(&deskey[0],&dat->ks1);
       
  1279 +			des_t4_key_expand(&deskey[1],&dat->ks2);
       
  1280 +			memcpy(&dat->ks3,&dat->ks1,sizeof(dat->ks1));
       
  1281 +			dat->stream.cbc = enc ? des_t4_ede3_cbc_encrypt :
       
  1282 +						des_t4_ede3_cbc_decrypt;
       
  1283 +			return 1;
       
  1284 +			}
       
  1285 +		}
       
  1286 +#endif
       
  1287  #ifdef EVP_CHECK_DES_KEY
       
  1288 -	if (DES_set_key_checked(&deskey[0],&data(ctx)->ks1)
       
  1289 -		!! DES_set_key_checked(&deskey[1],&data(ctx)->ks2))
       
  1290 +	if (DES_set_key_checked(&deskey[0],&dat->ks1)
       
  1291 +		!! DES_set_key_checked(&deskey[1],&dat->ks2))
       
  1292  		return 0;
       
  1293  #else
       
  1294 -	DES_set_key_unchecked(&deskey[0],&data(ctx)->ks1);
       
  1295 -	DES_set_key_unchecked(&deskey[1],&data(ctx)->ks2);
       
  1296 +	DES_set_key_unchecked(&deskey[0],&dat->ks1);
       
  1297 +	DES_set_key_unchecked(&deskey[1],&dat->ks2);
       
  1298  #endif
       
  1299 -	memcpy(&data(ctx)->ks3,&data(ctx)->ks1,
       
  1300 -	       sizeof(data(ctx)->ks1));
       
  1301 +	memcpy(&dat->ks3,&dat->ks1,
       
  1302 +		sizeof(dat->ks1));
       
  1303  	return 1;
       
  1304  	}
       
  1305  
       
  1306 @@ -257,6 +300,8 @@ static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
       
  1307  			     const unsigned char *iv, int enc)
       
  1308  	{
       
  1309  	DES_cblock *deskey = (DES_cblock *)key;
       
  1310 +	DES_EDE_KEY *dat = data(ctx);
       
  1311 +
       
  1312  #ifdef KSSL_DEBUG
       
  1313  	{
       
  1314          int i;
       
  1315 @@ -268,15 +313,32 @@ static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
       
  1316  	}
       
  1317  #endif	/* KSSL_DEBUG */
       
  1318  
       
  1319 +	dat->stream.cbc = NULL;
       
  1320 +#if defined(SPARC_DES_CAPABLE)
       
  1321 +	if (SPARC_DES_CAPABLE)
       
  1322 +		{
       
  1323 +		int mode = ctx->cipher->flags & EVP_CIPH_MODE;
       
  1324 +
       
  1325 +		if (mode == EVP_CIPH_CBC_MODE)
       
  1326 +			{
       
  1327 +			des_t4_key_expand(&deskey[0],&dat->ks1);
       
  1328 +			des_t4_key_expand(&deskey[1],&dat->ks2);
       
  1329 +			des_t4_key_expand(&deskey[2],&dat->ks3);
       
  1330 +			dat->stream.cbc = enc ? des_t4_ede3_cbc_encrypt :
       
  1331 +						des_t4_ede3_cbc_decrypt;
       
  1332 +			return 1;
       
  1333 +			}
       
  1334 +		}
       
  1335 +#endif
       
  1336  #ifdef EVP_CHECK_DES_KEY
       
  1337 -	if (DES_set_key_checked(&deskey[0],&data(ctx)->ks1)
       
  1338 -		|| DES_set_key_checked(&deskey[1],&data(ctx)->ks2)
       
  1339 -		|| DES_set_key_checked(&deskey[2],&data(ctx)->ks3))
       
  1340 +	if (DES_set_key_checked(&deskey[0],&dat->ks1)
       
  1341 +		|| DES_set_key_checked(&deskey[1],&dat->ks2)
       
  1342 +		|| DES_set_key_checked(&deskey[2],&dat->ks3))
       
  1343  		return 0;
       
  1344  #else
       
  1345 -	DES_set_key_unchecked(&deskey[0],&data(ctx)->ks1);
       
  1346 -	DES_set_key_unchecked(&deskey[1],&data(ctx)->ks2);
       
  1347 -	DES_set_key_unchecked(&deskey[2],&data(ctx)->ks3);
       
  1348 +	DES_set_key_unchecked(&deskey[0],&dat->ks1);
       
  1349 +	DES_set_key_unchecked(&deskey[1],&dat->ks2);
       
  1350 +	DES_set_key_unchecked(&deskey[2],&dat->ks3);
       
  1351  #endif
       
  1352  	return 1;
       
  1353  	}
       
  1354 Index: openssl/crypto/bn/Makefile
       
  1355 ===================================================================
       
  1356 diff -ru openssl-1.0.1e/crypto/bn/Makefile openssl-1.0.1e/crypto/bn/Makefile.new
       
  1357 --- openssl-1.0.1e/crypto/bn/Makefile 2011-05-24 17:02:24.000000000 -0700
       
  1358 +++ openssl-1.0.1e/crypto/bn/Makefile 2011-07-27 10:48:17.817470000 -0700
       
  1359 @@ -77,6 +77,12 @@
       
  1360  	$(PERL) asm/sparcv9a-mont.pl $(CFLAGS) > $@
       
  1361  sparcv9-mont.s:		asm/sparcv9-mont.pl
       
  1362  	$(PERL) asm/sparcv9-mont.pl $(CFLAGS) > $@
       
  1363 +vis3-mont.s:		asm/vis3-mont.pl
       
  1364 +	$(PERL) asm/vis3-mont.pl $(CFLAGS) > $@
       
  1365 +sparct4-mont.S:	asm/sparct4-mont.pl
       
  1366 +	$(PERL) asm/sparct4-mont.pl $(CFLAGS) > $@
       
  1367 +sparcv9-gf2m.S:	asm/sparcv9-gf2m.pl
       
  1368 +	$(PERL) asm/sparcv9-gf2m.pl $(CFLAGS) > $@
       
  1369  
       
  1370  bn-mips3.o:	asm/mips3.s
       
  1371  	@if [ "$(CC)" = "gcc" ]; then \
       
  1372 Index: openssl/crypto/bn/bn_exp.c
       
  1373 ===================================================================
       
  1374 diff -ru openssl-1.0.1e/crypto/bn/bn_exp.c openssl-1.0.1e/crypto/bn/bn_exp.c.new
       
  1375 --- bn_exp.c	2011/10/29 19:25:13	1.38
       
  1376 +++ bn_exp.c	2012/11/17 10:34:11	1.39
       
  1377 @@ -123,8 +123,15 @@
       
  1378  # ifndef alloca
       
  1379  #  define alloca(s) __builtin_alloca((s))
       
  1380  # endif
       
  1381 +#else
       
  1382 +#include <alloca.h>
       
  1383  #endif
       
  1384  
       
  1385 +#if defined(OPENSSL_BN_ASM_MONT) && defined(__sparc)
       
  1386 +# include "sparc_arch.h"
       
  1387 +extern unsigned int OPENSSL_sparcv9cap_P[];
       
  1388 +#endif
       
  1389 +
       
  1390  /* maximum precomputation table size for *variable* sliding windows */
       
  1391  #define TABLE_SIZE	32
       
  1392  
       
  1393 @@ -467,7 +467,15 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
       
  1394  	wstart=bits-1;	/* The top bit of the window */
       
  1395  	wend=0;		/* The bottom bit of the window */
       
  1396  
       
  1397 +#if 1	/* by Shay Gueron's suggestion */
       
  1398 +	j = mont->N.top;	/* borrow j */
       
  1399 +	if (bn_wexpand(r,j) == NULL) goto err;
       
  1400 +	r->d[0] = (0-m->d[0])&BN_MASK2;		/* 2^(top*BN_BITS2) - m */
       
  1401 +	for(i=1;i<j;i++) r->d[i] = (~m->d[i])&BN_MASK2;
       
  1402 +	r->top = j;
       
  1403 +#else
       
  1404  	if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
       
  1405 +#endif
       
  1406  	for (;;)
       
  1407  		{
       
  1408  		if (BN_is_bit_set(p,wstart) == 0)
       
  1409 @@ -519,6 +527,17 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
       
  1410  		start=0;
       
  1411  		if (wstart < 0) break;
       
  1412  		}
       
  1413 +#if defined(OPENSSL_BN_ASM_MONT) && (defined(__sparc__) || defined(__sparc))
       
  1414 +	if (OPENSSL_sparcv9cap_P[0]&(SPARCV9_VIS3|SPARCV9_PREFER_FPU))
       
  1415 + 	{
       
  1416 + 		j = mont->N.top;	/* borrow j */
       
  1417 + 		val[0]->d[0] = 1;	/* borrow val[0] */
       
  1418 + 		for (i=1;i<j;i++) val[0]->d[i] = 0;
       
  1419 + 		val[0]->top = j;
       
  1420 + 		if (!BN_mod_mul_montgomery(rr,r,val[0],mont,ctx)) goto err;
       
  1421 + 		}
       
  1422 + 	else
       
  1423 +#endif
       
  1424  	if (!BN_from_montgomery(rr,r,mont,ctx)) goto err;
       
  1425  	ret=1;
       
  1426  err:
       
  1427 @@ -528,6 +547,28 @@ err:
       
  1428  	return(ret);
       
  1429  	}
       
  1430  
       
  1431 +#if defined(OPENSSL_BN_ASM_MONT) && (defined(__sparc__) || defined(__sparc))
       
  1432 +static BN_ULONG bn_get_bits(const BIGNUM *a, int bitpos)
       
  1433 +	{
       
  1434 +	BN_ULONG ret=0;
       
  1435 +	int wordpos;
       
  1436 +
       
  1437 +	wordpos = bitpos/BN_BITS2;
       
  1438 +	bitpos %= BN_BITS2;
       
  1439 +	if (wordpos>=0 && wordpos < a->top)
       
  1440 +		{
       
  1441 +		ret = a->d[wordpos]&BN_MASK2;
       
  1442 +		if (bitpos)
       
  1443 +			{
       
  1444 +			ret >>= bitpos;
       
  1445 +			if (++wordpos < a->top)
       
  1446 +				ret |= a->d[wordpos]<<(BN_BITS2-bitpos);
       
  1447 +			}
       
  1448 +		}
       
  1449 +
       
  1450 +	return ret&BN_MASK2;
       
  1451 +}
       
  1452 +#endif
       
  1453  
       
  1454  /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout
       
  1455   * so that accessing any of these table values shows the same access pattern as far
       
  1456 @@ -587,6 +592,9 @@
       
  1457  	int powerbufLen = 0;
       
  1458  	unsigned char *powerbuf=NULL;
       
  1459  	BIGNUM tmp, am;
       
  1460 +#if defined(OPENSSL_BN_ASM_MONT) && defined(__sparc)
       
  1461 +	unsigned int t4=0;
       
  1462 +#endif
       
  1463  
       
  1464  	bn_check_top(a);
       
  1465  	bn_check_top(p);
       
  1466 @@ -621,9 +629,18 @@
       
  1467  
       
  1468  	/* Get the window size to use with size of p. */
       
  1469  	window = BN_window_bits_for_ctime_exponent_size(bits);
       
  1470 +#if defined(OPENSSL_BN_ASM_MONT) && defined(__sparc)
       
  1471 +	if (window>=5 && (top&15)==0 && top<=64 &&
       
  1472 +	    (OPENSSL_sparcv9cap_P[1]&(CFR_MONTMUL|CFR_MONTSQR))==
       
  1473 +	    			     (CFR_MONTMUL|CFR_MONTSQR) &&
       
  1474 +	    (t4=OPENSSL_sparcv9cap_P[0]))
       
  1475 +		window=5;
       
  1476 +	else
       
  1477 +#endif
       
  1478  #if defined(OPENSSL_BN_ASM_MONT5)
       
  1479  	if (window==6 && bits<=1024) window=5;	/* ~5% improvement of 2048-bit RSA sign */
       
  1480  #endif
       
  1481 +	(void)0;
       
  1482  
       
  1483  	/* Allocate a buffer large enough to hold all of the pre-computed
       
  1484  	 * powers of am, am itself and tmp.
       
  1485 @@ -656,13 +715,13 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
       
  1486  	tmp.flags = am.flags = BN_FLG_STATIC_DATA;
       
  1487  
       
  1488  	/* prepare a^0 in Montgomery domain */
       
  1489 -#if 1
       
  1490 - 	if (!BN_to_montgomery(&tmp,BN_value_one(),mont,ctx))	goto err;
       
  1491 -#else
       
  1492 +#if 1	/* by Shay Gueron's suggestion */
       
  1493  	tmp.d[0] = (0-m->d[0])&BN_MASK2;	/* 2^(top*BN_BITS2) - m */
       
  1494  	for (i=1;i<top;i++)
       
  1495  		tmp.d[i] = (~m->d[i])&BN_MASK2;
       
  1496  	tmp.top = top;
       
  1497 +#else
       
  1498 +	if (!BN_to_montgomery(&tmp,BN_value_one(),mont,ctx))	goto err;
       
  1499  #endif
       
  1500  
       
  1501  	/* prepare a^1 in Montgomery domain */
       
  1502 @@ -673,6 +690,121 @@
       
  1503  		}
       
  1504  	else	if (!BN_to_montgomery(&am,a,mont,ctx))		goto err;
       
  1505  
       
  1506 +#if defined(OPENSSL_BN_ASM_MONT) && defined(__sparc)
       
  1507 +    if (t4)
       
  1508 +	{
       
  1509 +	typedef int (*bn_pwr5_mont_f)(BN_ULONG *tp,const BN_ULONG *np,
       
  1510 +			const BN_ULONG *n0,const void *table,int power,int bits);
       
  1511 +	int bn_pwr5_mont_t4_8(BN_ULONG *tp,const BN_ULONG *np,
       
  1512 +			const BN_ULONG *n0,const void *table,int power,int bits);
       
  1513 +	int bn_pwr5_mont_t4_16(BN_ULONG *tp,const BN_ULONG *np,
       
  1514 +			const BN_ULONG *n0,const void *table,int power,int bits);
       
  1515 +	int bn_pwr5_mont_t4_24(BN_ULONG *tp,const BN_ULONG *np,
       
  1516 +			const BN_ULONG *n0,const void *table,int power,int bits);
       
  1517 +	int bn_pwr5_mont_t4_32(BN_ULONG *tp,const BN_ULONG *np,
       
  1518 +			const BN_ULONG *n0,const void *table,int power,int bits);
       
  1519 +	static const bn_pwr5_mont_f pwr5_funcs[4] = {
       
  1520 +			bn_pwr5_mont_t4_8,	bn_pwr5_mont_t4_16,
       
  1521 +			bn_pwr5_mont_t4_24,	bn_pwr5_mont_t4_32 };
       
  1522 +	bn_pwr5_mont_f pwr5_worker = pwr5_funcs[top/16-1];
       
  1523 +
       
  1524 +	typedef int (*bn_mul_mont_f)(BN_ULONG *rp,const BN_ULONG *ap,
       
  1525 +			const void *bp,const BN_ULONG *np,const BN_ULONG *n0);
       
  1526 +	int bn_mul_mont_t4_8(BN_ULONG *rp,const BN_ULONG *ap,
       
  1527 +			const void *bp,const BN_ULONG *np,const BN_ULONG *n0);
       
  1528 +	int bn_mul_mont_t4_16(BN_ULONG *rp,const BN_ULONG *ap,
       
  1529 +			const void *bp,const BN_ULONG *np,const BN_ULONG *n0);
       
  1530 +	int bn_mul_mont_t4_24(BN_ULONG *rp,const BN_ULONG *ap,
       
  1531 +			const void *bp,const BN_ULONG *np,const BN_ULONG *n0);
       
  1532 +	int bn_mul_mont_t4_32(BN_ULONG *rp,const BN_ULONG *ap,
       
  1533 +			const void *bp,const BN_ULONG *np,const BN_ULONG *n0);
       
  1534 +	static const bn_mul_mont_f mul_funcs[4] = {
       
  1535 +			bn_mul_mont_t4_8,	bn_mul_mont_t4_16,
       
  1536 +			bn_mul_mont_t4_24,	bn_mul_mont_t4_32 };
       
  1537 +	bn_mul_mont_f mul_worker = mul_funcs[top/16-1];
       
  1538 +
       
  1539 +	void bn_mul_mont_vis3(BN_ULONG *rp,const BN_ULONG *ap,
       
  1540 +			const void *bp,const BN_ULONG *np,
       
  1541 +			const BN_ULONG *n0,int num);
       
  1542 +	void bn_mul_mont_t4(BN_ULONG *rp,const BN_ULONG *ap,
       
  1543 +			const void *bp,const BN_ULONG *np,
       
  1544 +			const BN_ULONG *n0,int num);
       
  1545 +	void bn_mul_mont_gather5_t4(BN_ULONG *rp,const BN_ULONG *ap,
       
  1546 +			const void *table,const BN_ULONG *np,
       
  1547 +			const BN_ULONG *n0,int num,int power);
       
  1548 +	void bn_flip_n_scatter5_t4(const BN_ULONG *inp,size_t num,
       
  1549 +			void *table,size_t power);
       
  1550 +	void bn_gather5_t4(BN_ULONG *out,size_t num,
       
  1551 +			void *table,size_t power);
       
  1552 +	void bn_flip_t4(BN_ULONG *dst,BN_ULONG *src,size_t num);
       
  1553 +
       
  1554 +	BN_ULONG *np=mont->N.d, *n0=mont->n0;
       
  1555 +	int stride = 5*(6-(top/16-1));	/* multiple of 5, but less than 32 */
       
  1556 +
       
  1557 +	/* BN_to_montgomery can contaminate words above .top
       
  1558 +	 * [in BN_DEBUG[_DEBUG] build]... */
       
  1559 +	for (i=am.top; i<top; i++)	am.d[i]=0;
       
  1560 +	for (i=tmp.top; i<top; i++)	tmp.d[i]=0;
       
  1561 +
       
  1562 +	bn_flip_n_scatter5_t4(tmp.d,top,powerbuf,0);
       
  1563 +	bn_flip_n_scatter5_t4(am.d,top,powerbuf,1);
       
  1564 +	if (!(*mul_worker)(tmp.d,am.d,am.d,np,n0) &&
       
  1565 +	    !(*mul_worker)(tmp.d,am.d,am.d,np,n0))
       
  1566 +		bn_mul_mont_vis3(tmp.d,am.d,am.d,np,n0,top);
       
  1567 +	bn_flip_n_scatter5_t4(tmp.d,top,powerbuf,2);
       
  1568 +
       
  1569 +	for (i=3; i<32; i++)
       
  1570 +		{
       
  1571 +		/* Calculate a^i = a^(i-1) * a */
       
  1572 +		if (!(*mul_worker)(tmp.d,tmp.d,am.d,np,n0) &&
       
  1573 +		    !(*mul_worker)(tmp.d,tmp.d,am.d,np,n0))
       
  1574 +			bn_mul_mont_vis3(tmp.d,tmp.d,am.d,np,n0,top);
       
  1575 +		bn_flip_n_scatter5_t4(tmp.d,top,powerbuf,i);
       
  1576 +		}
       
  1577 +
       
  1578 +	/* switch to 64-bit domain */ 
       
  1579 +	np = alloca(top*sizeof(BN_ULONG));
       
  1580 +	top /= 2;
       
  1581 +	bn_flip_t4(np,mont->N.d,top);
       
  1582 +
       
  1583 +	bits--;
       
  1584 +	for (wvalue=0, i=bits%5; i>=0; i--,bits--)
       
  1585 +		wvalue = (wvalue<<1)+BN_is_bit_set(p,bits);
       
  1586 +	bn_gather5_t4(tmp.d,top,powerbuf,wvalue);
       
  1587 +
       
  1588 +	/* Scan the exponent one window at a time starting from the most
       
  1589 +	 * significant bits.
       
  1590 +	 */
       
  1591 +	while (bits >= 0)
       
  1592 +		{
       
  1593 +		if (bits < stride) stride = bits+1;
       
  1594 +		bits -= stride;
       
  1595 +		wvalue = (bn_get_bits(p,bits+1));
       
  1596 +
       
  1597 +		if ((*pwr5_worker)(tmp.d,np,n0,powerbuf,wvalue,stride)) continue;
       
  1598 +		/* retry once and fall back */
       
  1599 +		if ((*pwr5_worker)(tmp.d,np,n0,powerbuf,wvalue,stride)) continue;
       
  1600 +
       
  1601 +		bits += stride-5;
       
  1602 +		wvalue >>= stride-5;
       
  1603 +		wvalue &= 31;
       
  1604 +		bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top);
       
  1605 +		bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top);
       
  1606 +		bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top);
       
  1607 +		bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top);
       
  1608 +		bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top);
       
  1609 +		bn_mul_mont_gather5_t4(tmp.d,tmp.d,powerbuf,np,n0,top,wvalue);
       
  1610 +		}
       
  1611 +
       
  1612 +	bn_flip_t4(tmp.d,tmp.d,top);
       
  1613 +	top *= 2;
       
  1614 +	/* back to 32-bit domain */
       
  1615 +	tmp.top=top;
       
  1616 +	bn_correct_top(&tmp);
       
  1617 +	OPENSSL_cleanse(np,top*sizeof(BN_ULONG));
       
  1618 +	}
       
  1619 +    else
       
  1620 +#endif
       
  1621  #if defined(OPENSSL_BN_ASM_MONT5)
       
  1622      /* This optimization uses ideas from http://eprint.iacr.org/2011/239,
       
  1623       * specifically optimization of cache-timing attack countermeasures
       
  1624 @@ -816,6 +990,15 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
       
  1625  	}
       
  1626  
       
  1627   	/* Convert the final result from montgomery to standard format */
       
  1628 +#if defined(OPENSSL_BN_ASM_MONT) && (defined(__sparc__) || defined(__sparc))
       
  1629 +	if (OPENSSL_sparcv9cap_P[0]&(SPARCV9_VIS3|SPARCV9_PREFER_FPU))
       
  1630 +		{
       
  1631 +		am.d[0] = 1;	/* borrow am */
       
  1632 +		for (i=1;i<top;i++) am.d[i] = 0;
       
  1633 +		if (!BN_mod_mul_montgomery(rr,&tmp,&am,mont,ctx)) goto err;
       
  1634 +		}
       
  1635 +	else
       
  1636 +#endif
       
  1637  	if (!BN_from_montgomery(rr,&tmp,mont,ctx)) goto err;
       
  1638  	ret=1;
       
  1639  err:
       
  1640 Index: openssl/apps/speed.c
       
  1641 ===================================================================
       
  1642 diff -ru openssl-1.0.1e/apps/spped.c openssl-1.0.1e/apps/speed.c
       
  1643 --- openssl-1.0.1e/apps/speed.c 2011-05-24 17:02:24.000000000 -0700
       
  1644 +++ openssl-1.0.1e/apps/spped.c 2011-07-27 10:48:17.817470000 -0700
       
  1645 @@ -1551,7 +1551,7 @@
       
  1646  			print_message(names[D_MD5],c[D_MD5][j],lengths[j]);
       
  1647  			Time_F(START);
       
  1648  			for (count=0,run=1; COND(c[D_MD5][j]); count++)
       
  1649 -				EVP_Digest(&(buf[0]),(unsigned long)lengths[j],&(md5[0]),NULL,EVP_get_digestbyname("md5"),NULL);
       
  1650 +				MD5(buf,lengths[j],md5);
       
  1651  			d=Time_F(STOP);
       
  1652  			print_result(D_MD5,j,count,d);
       
  1653  			}
       
  1654 @@ -1591,7 +1591,7 @@
       
  1655  			print_message(names[D_SHA1],c[D_SHA1][j],lengths[j]);
       
  1656  			Time_F(START);
       
  1657  			for (count=0,run=1; COND(c[D_SHA1][j]); count++)
       
  1658 -				EVP_Digest(buf,(unsigned long)lengths[j],&(sha[0]),NULL,EVP_sha1(),NULL);
       
  1659 +				SHA1(buf,lengths[j],sha);
       
  1660  			d=Time_F(STOP);
       
  1661  			print_result(D_SHA1,j,count,d);
       
  1662  			}
       
  1663 Index: openssl/crypto/aes/Makefile
       
  1664 ===================================================================
       
  1665 --- Makefile    Thu May  2 13:42:37 2013
       
  1666 +++ Makefile.orig       Thu May  2 13:41:51 2013
       
  1667 @@ -69,6 +69,9 @@
       
  1668  aes-sparcv9.s: asm/aes-sparcv9.pl
       
  1669  	$(PERL) asm/aes-sparcv9.pl $(CFLAGS) > $@
       
  1670  
       
  1671 +aest4-sparcv9.s: asm/aest4-sparcv9.pl
       
  1672 +	$(PERL) asm/aest4-sparcv9.pl $(CFLAGS) > $@
       
  1673 +
       
  1674  aes-ppc.s:	asm/aes-ppc.pl
       
  1675  	$(PERL) asm/aes-ppc.pl $(PERLASM_SCHEME) $@
       
  1676 
       
  1677 Index: openssl/crypto/evp/e_aes.c
       
  1678 ===================================================================
       
  1679 --- e_aes.c	Mon Feb 11 07:26:04 2013
       
  1680 +++ e_aes.c.56	Thu May  2 14:26:35 2013
       
  1681 @@ -56,13 +58,12 @@
       
  1682  #include <assert.h>
       
  1683  #include <openssl/aes.h>
       
  1684  #include "evp_locl.h"
       
  1685 -#ifndef OPENSSL_FIPS
       
  1686  #include "modes_lcl.h"
       
  1687  #include <openssl/rand.h>
       
  1688  
       
  1689  typedef struct
       
  1690  	{
       
  1691 -	AES_KEY ks;
       
  1692 +	union { double align; AES_KEY ks; } ks;
       
  1693  	block128_f block;
       
  1694  	union {
       
  1695  		cbc128_f cbc;
       
  1696 @@ -72,7 +73,7 @@
       
  1697  
       
  1698  typedef struct
       
  1699  	{
       
  1700 -	AES_KEY ks;		/* AES key schedule to use */
       
  1701 +	union { double align; AES_KEY ks; } ks;	/* AES key schedule to use */
       
  1702  	int key_set;		/* Set if key initialised */
       
  1703  	int iv_set;		/* Set if an iv is set */
       
  1704  	GCM128_CONTEXT gcm;
       
  1705 @@ -86,7 +87,7 @@
       
  1706  
       
  1707  typedef struct
       
  1708  	{
       
  1709 -	AES_KEY ks1, ks2;	/* AES key schedules to use */
       
  1710 +	union { double align; AES_KEY ks; } ks1, ks2;	/* AES key schedules to use */
       
  1711  	XTS128_CONTEXT xts;
       
  1712  	void     (*stream)(const unsigned char *in,
       
  1713  			unsigned char *out, size_t length,
       
  1714 @@ -96,7 +97,7 @@
       
  1715  
       
  1716  typedef struct
       
  1717  	{
       
  1718 -	AES_KEY ks;		/* AES key schedule to use */
       
  1719 +	union { double align; AES_KEY ks; } ks;	/* AES key schedule to use */
       
  1720  	int key_set;		/* Set if key initialised */
       
  1721  	int iv_set;		/* Set if an iv is set */
       
  1722  	int tag_set;		/* Set if tag is valid */
       
  1723 @@ -160,7 +161,7 @@
       
  1724  	defined(_M_AMD64)	|| defined(_M_X64)	|| \
       
  1725  	defined(__INTEL__)				)
       
  1726  
       
  1727 -extern unsigned int OPENSSL_ia32cap_P[2];
       
  1728 +extern unsigned int OPENSSL_ia32cap_P[];
       
  1729  
       
  1730  #ifdef VPAES_ASM
       
  1731  #define VPAES_CAPABLE	(OPENSSL_ia32cap_P[1]&(1<<(41-32)))
       
  1732 @@ -310,7 +311,7 @@
       
  1733  		return 1;
       
  1734  	if (key)
       
  1735  		{
       
  1736 -		aesni_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks);
       
  1737 +		aesni_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks);
       
  1738  		CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks,
       
  1739  				(block128_f)aesni_encrypt);
       
  1740  		gctx->ctr = (ctr128_f)aesni_ctr32_encrypt_blocks;
       
  1741 @@ -355,19 +356,19 @@
       
  1742  		/* key_len is two AES keys */
       
  1743  		if (enc)
       
  1744  			{
       
  1745 -			aesni_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1);
       
  1746 +			aesni_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
       
  1747  			xctx->xts.block1 = (block128_f)aesni_encrypt;
       
  1748  			xctx->stream = aesni_xts_encrypt;
       
  1749  			}
       
  1750  		else
       
  1751  			{
       
  1752 -			aesni_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1);
       
  1753 +			aesni_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
       
  1754  			xctx->xts.block1 = (block128_f)aesni_decrypt;
       
  1755  			xctx->stream = aesni_xts_decrypt;
       
  1756  			}
       
  1757  
       
  1758  		aesni_set_encrypt_key(key + ctx->key_len/2,
       
  1759 -						ctx->key_len * 4, &xctx->ks2);
       
  1760 +						ctx->key_len * 4, &xctx->ks2.ks);
       
  1761  		xctx->xts.block2 = (block128_f)aesni_encrypt;
       
  1762  
       
  1763  		xctx->xts.key1 = &xctx->ks1;
       
  1764 @@ -394,7 +395,7 @@
       
  1765  		return 1;
       
  1766  	if (key)
       
  1767  		{
       
  1768 -		aesni_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks);
       
  1769 +		aesni_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks);
       
  1770  		CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L,
       
  1771  					&cctx->ks, (block128_f)aesni_encrypt);
       
  1772  		cctx->str = enc?(ccm128_f)aesni_ccm64_encrypt_blocks :
       
  1773 @@ -456,6 +457,379 @@
       
  1774  const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \
       
  1775  { return AESNI_CAPABLE?&aesni_##keylen##_##mode:&aes_##keylen##_##mode; }
       
  1776  
       
  1777 +#elif	defined(AES_ASM) && (defined(__sparc) || defined(__sparc__))
       
  1778 +
       
  1779 +#include "sparc_arch.h"
       
  1780 +
       
  1781 +extern unsigned int OPENSSL_sparcv9cap_P[];
       
  1782 +
       
  1783 +#define	SPARC_AES_CAPABLE	(OPENSSL_sparcv9cap_P[1] & CFR_AES)
       
  1784 +
       
  1785 +void	aes_t4_set_encrypt_key (const unsigned char *key, int bits,
       
  1786 +				AES_KEY *ks);
       
  1787 +void	aes_t4_set_decrypt_key (const unsigned char *key, int bits,
       
  1788 +				AES_KEY *ks);
       
  1789 +void	aes_t4_encrypt (const unsigned char *in, unsigned char *out,
       
  1790 +				const AES_KEY *key);
       
  1791 +void	aes_t4_decrypt (const unsigned char *in, unsigned char *out,
       
  1792 +				const AES_KEY *key);
       
  1793 +/*
       
  1794 + * Key-length specific subroutines were chosen for following reason.
       
  1795 + * Each SPARC T4 core can execute up to 8 threads which share core's
       
  1796 + * resources. Loading as much key material to registers allows to
       
  1797 + * minimize references to shared memory interface, as well as amount
       
  1798 + * of instructions in inner loops [much needed on T4]. But then having
       
  1799 + * non-key-length specific routines would require conditional branches
       
  1800 + * either in inner loops or on subroutines' entries. Former is hardly
       
  1801 + * acceptable, while latter means code size increase to size occupied
       
  1802 + * by multiple key-length specfic subroutines, so why fight?
       
  1803 + */
       
  1804 +void	aes128_t4_cbc_encrypt (const unsigned char *in, unsigned char *out,
       
  1805 +				size_t len, const AES_KEY *key,
       
  1806 +				unsigned char *ivec);
       
  1807 +void	aes128_t4_cbc_decrypt (const unsigned char *in, unsigned char *out,
       
  1808 +				size_t len, const AES_KEY *key,
       
  1809 +				unsigned char *ivec);
       
  1810 +void	aes192_t4_cbc_encrypt (const unsigned char *in, unsigned char *out,
       
  1811 +				size_t len, const AES_KEY *key,
       
  1812 +				unsigned char *ivec);
       
  1813 +void	aes192_t4_cbc_decrypt (const unsigned char *in, unsigned char *out,
       
  1814 +				size_t len, const AES_KEY *key,
       
  1815 +				unsigned char *ivec);
       
  1816 +void	aes256_t4_cbc_encrypt (const unsigned char *in, unsigned char *out,
       
  1817 +				size_t len, const AES_KEY *key,
       
  1818 +				unsigned char *ivec);
       
  1819 +void	aes256_t4_cbc_decrypt (const unsigned char *in, unsigned char *out,
       
  1820 +				size_t len, const AES_KEY *key,
       
  1821 +				unsigned char *ivec);
       
  1822 +void	aes128_t4_ctr32_encrypt (const unsigned char *in, unsigned char *out,
       
  1823 +				size_t blocks, const AES_KEY *key,
       
  1824 +				unsigned char *ivec);
       
  1825 +void	aes192_t4_ctr32_encrypt (const unsigned char *in, unsigned char *out,
       
  1826 +				size_t blocks, const AES_KEY *key,
       
  1827 +				unsigned char *ivec);
       
  1828 +void	aes256_t4_ctr32_encrypt (const unsigned char *in, unsigned char *out,
       
  1829 +				size_t blocks, const AES_KEY *key,
       
  1830 +				unsigned char *ivec);
       
  1831 +
       
  1832 +static int aes_t4_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
       
  1833 +		   const unsigned char *iv, int enc)
       
  1834 +	{
       
  1835 +	int ret, mode, bits;
       
  1836 +	EVP_AES_KEY *dat = (EVP_AES_KEY *)ctx->cipher_data;
       
  1837 +
       
  1838 +	mode = ctx->cipher->flags & EVP_CIPH_MODE;
       
  1839 +	bits = ctx->key_len*8;
       
  1840 +	if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE)
       
  1841 +	    && !enc)
       
  1842 +		{
       
  1843 +		    ret = 0;
       
  1844 +		    aes_t4_set_decrypt_key(key, bits, ctx->cipher_data);
       
  1845 +		    dat->block	= (block128_f)aes_t4_decrypt;
       
  1846 +		    switch (bits) {
       
  1847 +		    case 128:
       
  1848 +			dat->stream.cbc	= mode==EVP_CIPH_CBC_MODE ?
       
  1849 +						(cbc128_f)aes128_t4_cbc_decrypt :
       
  1850 +						NULL;
       
  1851 +			break;
       
  1852 +		    case 192:
       
  1853 +			dat->stream.cbc	= mode==EVP_CIPH_CBC_MODE ?
       
  1854 +						(cbc128_f)aes192_t4_cbc_decrypt :
       
  1855 +						NULL;
       
  1856 +			break;
       
  1857 +		    case 256:
       
  1858 +			dat->stream.cbc	= mode==EVP_CIPH_CBC_MODE ?
       
  1859 +						(cbc128_f)aes256_t4_cbc_decrypt :
       
  1860 +						NULL;
       
  1861 +			break;
       
  1862 +		    default:
       
  1863 +			ret = -1;
       
  1864 +		    }
       
  1865 +		}
       
  1866 +	else	{
       
  1867 +		    ret = 0;
       
  1868 +		    aes_t4_set_encrypt_key(key, bits, ctx->cipher_data);
       
  1869 +		    dat->block	= (block128_f)aes_t4_encrypt;
       
  1870 +		    switch (bits) {
       
  1871 +		    case 128:
       
  1872 +			if (mode==EVP_CIPH_CBC_MODE)
       
  1873 +				dat->stream.cbc	= (cbc128_f)aes128_t4_cbc_encrypt;
       
  1874 +			else if (mode==EVP_CIPH_CTR_MODE)
       
  1875 +				dat->stream.ctr = (ctr128_f)aes128_t4_ctr32_encrypt;
       
  1876 +			else
       
  1877 +				dat->stream.cbc = NULL;
       
  1878 +			break;
       
  1879 +		    case 192:
       
  1880 +			if (mode==EVP_CIPH_CBC_MODE)
       
  1881 +				dat->stream.cbc	= (cbc128_f)aes192_t4_cbc_encrypt;
       
  1882 +			else if (mode==EVP_CIPH_CTR_MODE)
       
  1883 +				dat->stream.ctr = (ctr128_f)aes192_t4_ctr32_encrypt;
       
  1884 +			else
       
  1885 +				dat->stream.cbc = NULL;
       
  1886 +			break;
       
  1887 +		    case 256:
       
  1888 +			if (mode==EVP_CIPH_CBC_MODE)
       
  1889 +				dat->stream.cbc	= (cbc128_f)aes256_t4_cbc_encrypt;
       
  1890 +			else if (mode==EVP_CIPH_CTR_MODE)
       
  1891 +				dat->stream.ctr = (ctr128_f)aes256_t4_ctr32_encrypt;
       
  1892 +			else
       
  1893 +				dat->stream.cbc = NULL;
       
  1894 +			break;
       
  1895 +		    default:
       
  1896 +			ret = -1;
       
  1897 +		    }
       
  1898 +		}
       
  1899 +
       
  1900 +	if(ret < 0)
       
  1901 +		{
       
  1902 +		EVPerr(EVP_F_AES_T4_INIT_KEY,EVP_R_AES_KEY_SETUP_FAILED);
       
  1903 +		return 0;
       
  1904 +		}
       
  1905 +
       
  1906 +	return 1;
       
  1907 +	}
       
  1908 +
       
  1909 +#define aes_t4_cbc_cipher aes_cbc_cipher
       
  1910 +static int aes_t4_cbc_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out,
       
  1911 +	const unsigned char *in, size_t len);
       
  1912 +
       
  1913 +#define aes_t4_ecb_cipher aes_ecb_cipher 
       
  1914 +static int aes_t4_ecb_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out,
       
  1915 +	const unsigned char *in, size_t len);
       
  1916 +
       
  1917 +#define aes_t4_ofb_cipher aes_ofb_cipher
       
  1918 +static int aes_t4_ofb_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out,
       
  1919 +	const unsigned char *in,size_t len);
       
  1920 +
       
  1921 +#define aes_t4_cfb_cipher aes_cfb_cipher
       
  1922 +static int aes_t4_cfb_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out,
       
  1923 +	const unsigned char *in,size_t len);
       
  1924 +
       
  1925 +#define aes_t4_cfb8_cipher aes_cfb8_cipher
       
  1926 +static int aes_t4_cfb8_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out,
       
  1927 +	const unsigned char *in,size_t len);
       
  1928 +
       
  1929 +#define aes_t4_cfb1_cipher aes_cfb1_cipher
       
  1930 +static int aes_t4_cfb1_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out,
       
  1931 +	const unsigned char *in,size_t len);
       
  1932 +
       
  1933 +#define aes_t4_ctr_cipher aes_ctr_cipher
       
  1934 +static int aes_t4_ctr_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
       
  1935 +		const unsigned char *in, size_t len);
       
  1936 +
       
  1937 +static int aes_t4_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
       
  1938 +                        const unsigned char *iv, int enc)
       
  1939 +	{
       
  1940 +	EVP_AES_GCM_CTX *gctx = ctx->cipher_data;
       
  1941 +	if (!iv && !key)
       
  1942 +		return 1;
       
  1943 +	if (key)
       
  1944 +		{
       
  1945 +		int bits = ctx->key_len * 8;
       
  1946 +		aes_t4_set_encrypt_key(key, bits, &gctx->ks.ks);
       
  1947 +		CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks,
       
  1948 +				(block128_f)aes_t4_encrypt);
       
  1949 +		switch (bits) {
       
  1950 +		    case 128:
       
  1951 +			gctx->ctr = (ctr128_f)aes128_t4_ctr32_encrypt;
       
  1952 +			break;
       
  1953 +		    case 192:
       
  1954 +			gctx->ctr = (ctr128_f)aes192_t4_ctr32_encrypt;
       
  1955 +			break;
       
  1956 +		    case 256:
       
  1957 +			gctx->ctr = (ctr128_f)aes256_t4_ctr32_encrypt;
       
  1958 +			break;
       
  1959 +		    default:
       
  1960 +			return 0;
       
  1961 +		}
       
  1962 +		/* If we have an iv can set it directly, otherwise use
       
  1963 +		 * saved IV.
       
  1964 +		 */
       
  1965 +		if (iv == NULL && gctx->iv_set)
       
  1966 +			iv = gctx->iv;
       
  1967 +		if (iv)
       
  1968 +			{
       
  1969 +			CRYPTO_gcm128_setiv(&gctx->gcm, iv, gctx->ivlen);
       
  1970 +			gctx->iv_set = 1;
       
  1971 +			}
       
  1972 +		gctx->key_set = 1;
       
  1973 +		}
       
  1974 +	else
       
  1975 +		{
       
  1976 +		/* If key set use IV, otherwise copy */
       
  1977 +		if (gctx->key_set)
       
  1978 +			CRYPTO_gcm128_setiv(&gctx->gcm, iv, gctx->ivlen);
       
  1979 +		else
       
  1980 +			memcpy(gctx->iv, iv, gctx->ivlen);
       
  1981 +		gctx->iv_set = 1;
       
  1982 +		gctx->iv_gen = 0;
       
  1983 +		}
       
  1984 +	return 1;
       
  1985 +	}
       
  1986 +
       
  1987 +#define aes_t4_gcm_cipher aes_gcm_cipher
       
  1988 +static int aes_t4_gcm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
       
  1989 +		const unsigned char *in, size_t len);
       
  1990 +
       
  1991 +static int aes_t4_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
       
  1992 +                        const unsigned char *iv, int enc)
       
  1993 +	{
       
  1994 +	EVP_AES_XTS_CTX *xctx = ctx->cipher_data;
       
  1995 +	if (!iv && !key)
       
  1996 +		return 1;
       
  1997 +
       
  1998 +	if (key)
       
  1999 +		{
       
  2000 +		int bits = ctx->key_len * 4;
       
  2001 +		/* key_len is two AES keys */
       
  2002 +		if (enc)
       
  2003 +			{
       
  2004 +			aes_t4_set_encrypt_key(key, bits, &xctx->ks1.ks);
       
  2005 +			xctx->xts.block1 = (block128_f)aes_t4_encrypt;
       
  2006 +#if 0 /* not yet */
       
  2007 +			switch (bits) {
       
  2008 +			    case 128:
       
  2009 +				xctx->stream = aes128_t4_xts_encrypt;
       
  2010 +				break;
       
  2011 +			    case 192:
       
  2012 +				xctx->stream = aes192_t4_xts_encrypt;
       
  2013 +				break;
       
  2014 +			    case 256:
       
  2015 +				xctx->stream = aes256_t4_xts_encrypt;
       
  2016 +				break;
       
  2017 +			    default:
       
  2018 +				return 0;
       
  2019 +			    }
       
  2020 +#endif
       
  2021 +			}
       
  2022 +		else
       
  2023 +			{
       
  2024 +			aes_t4_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
       
  2025 +			xctx->xts.block1 = (block128_f)aes_t4_decrypt;
       
  2026 +#if 0 /* not yet */
       
  2027 +			switch (bits) {
       
  2028 +			    case 128:
       
  2029 +				xctx->stream = aes128_t4_xts_decrypt;
       
  2030 +				break;
       
  2031 +			    case 192:
       
  2032 +				xctx->stream = aes192_t4_xts_decrypt;
       
  2033 +				break;
       
  2034 +			    case 256:
       
  2035 +				xctx->stream = aes256_t4_xts_decrypt;
       
  2036 +				break;
       
  2037 +			    default:
       
  2038 +				return 0;
       
  2039 +			    }
       
  2040 +#endif
       
  2041 +			}
       
  2042 +
       
  2043 +		aes_t4_set_encrypt_key(key + ctx->key_len/2,
       
  2044 +						ctx->key_len * 4, &xctx->ks2.ks);
       
  2045 +		xctx->xts.block2 = (block128_f)aes_t4_encrypt;
       
  2046 +
       
  2047 +		xctx->xts.key1 = &xctx->ks1;
       
  2048 +		}
       
  2049 +
       
  2050 +	if (iv)
       
  2051 +		{
       
  2052 +		xctx->xts.key2 = &xctx->ks2;
       
  2053 +		memcpy(ctx->iv, iv, 16);
       
  2054 +		}
       
  2055 +
       
  2056 +	return 1;
       
  2057 +	}
       
  2058 +
       
  2059 +#define aes_t4_xts_cipher aes_xts_cipher
       
  2060 +static int aes_t4_xts_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
       
  2061 +		const unsigned char *in, size_t len);
       
  2062 +
       
  2063 +static int aes_t4_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
       
  2064 +                        const unsigned char *iv, int enc)
       
  2065 +	{
       
  2066 +	EVP_AES_CCM_CTX *cctx = ctx->cipher_data;
       
  2067 +	if (!iv && !key)
       
  2068 +		return 1;
       
  2069 +	if (key)
       
  2070 +		{
       
  2071 +		int bits = ctx->key_len * 8;
       
  2072 +		aes_t4_set_encrypt_key(key, bits, &cctx->ks.ks);
       
  2073 +		CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L,
       
  2074 +					&cctx->ks, (block128_f)aes_t4_encrypt);
       
  2075 +#if 0 /* not yet */
       
  2076 +		switch (bits) {
       
  2077 +		    case 128:
       
  2078 +			cctx->str = enc?(ccm128_f)aes128_t4_ccm64_encrypt :
       
  2079 +				(ccm128_f)ae128_t4_ccm64_decrypt;
       
  2080 +			break;
       
  2081 +		    case 192:
       
  2082 +			cctx->str = enc?(ccm128_f)aes192_t4_ccm64_encrypt :
       
  2083 +				(ccm128_f)ae192_t4_ccm64_decrypt;
       
  2084 +			break;
       
  2085 +		    case 256:
       
  2086 +			cctx->str = enc?(ccm128_f)aes256_t4_ccm64_encrypt :
       
  2087 +				(ccm128_f)ae256_t4_ccm64_decrypt;
       
  2088 +			break;
       
  2089 +		    default:
       
  2090 +			return 0;
       
  2091 +		    }
       
  2092 +#endif
       
  2093 +		cctx->key_set = 1;
       
  2094 +		}
       
  2095 +	if (iv)
       
  2096 +		{
       
  2097 +		memcpy(ctx->iv, iv, 15 - cctx->L);
       
  2098 +		cctx->iv_set = 1;
       
  2099 +		}
       
  2100 +	return 1;
       
  2101 +	}
       
  2102 +
       
  2103 +#define aes_t4_ccm_cipher aes_ccm_cipher
       
  2104 +static int aes_t4_ccm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
       
  2105 +		const unsigned char *in, size_t len);
       
  2106 +
       
  2107 +#define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \
       
  2108 +static const EVP_CIPHER aes_t4_##keylen##_##mode = { \
       
  2109 +	nid##_##keylen##_##nmode,blocksize,keylen/8,ivlen, \
       
  2110 +	flags|EVP_CIPH_##MODE##_MODE,	\
       
  2111 +	aes_t4_init_key,		\
       
  2112 +	aes_t4_##mode##_cipher,		\
       
  2113 +	NULL,				\
       
  2114 +	sizeof(EVP_AES_KEY),		\
       
  2115 +	NULL,NULL,NULL,NULL }; \
       
  2116 +static const EVP_CIPHER aes_##keylen##_##mode = { \
       
  2117 +	nid##_##keylen##_##nmode,blocksize,	\
       
  2118 +	keylen/8,ivlen, \
       
  2119 +	flags|EVP_CIPH_##MODE##_MODE,	\
       
  2120 +	aes_init_key,			\
       
  2121 +	aes_##mode##_cipher,		\
       
  2122 +	NULL,				\
       
  2123 +	sizeof(EVP_AES_KEY),		\
       
  2124 +	NULL,NULL,NULL,NULL }; \
       
  2125 +const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \
       
  2126 +{ return SPARC_AES_CAPABLE?&aes_t4_##keylen##_##mode:&aes_##keylen##_##mode; }
       
  2127 +
       
  2128 +#define BLOCK_CIPHER_custom(nid,keylen,blocksize,ivlen,mode,MODE,flags) \
       
  2129 +static const EVP_CIPHER aes_t4_##keylen##_##mode = { \
       
  2130 +	nid##_##keylen##_##mode,blocksize, \
       
  2131 +	(EVP_CIPH_##MODE##_MODE==EVP_CIPH_XTS_MODE?2:1)*keylen/8, ivlen, \
       
  2132 +	flags|EVP_CIPH_##MODE##_MODE,	\
       
  2133 +	aes_t4_##mode##_init_key,	\
       
  2134 +	aes_t4_##mode##_cipher,		\
       
  2135 +	aes_##mode##_cleanup,		\
       
  2136 +	sizeof(EVP_AES_##MODE##_CTX),	\
       
  2137 +	NULL,NULL,aes_##mode##_ctrl,NULL }; \
       
  2138 +static const EVP_CIPHER aes_##keylen##_##mode = { \
       
  2139 +	nid##_##keylen##_##mode,blocksize, \
       
  2140 +	(EVP_CIPH_##MODE##_MODE==EVP_CIPH_XTS_MODE?2:1)*keylen/8, ivlen, \
       
  2141 +	flags|EVP_CIPH_##MODE##_MODE,	\
       
  2142 +	aes_##mode##_init_key,		\
       
  2143 +	aes_##mode##_cipher,		\
       
  2144 +	aes_##mode##_cleanup,		\
       
  2145 +	sizeof(EVP_AES_##MODE##_CTX),	\
       
  2146 +	NULL,NULL,aes_##mode##_ctrl,NULL }; \
       
  2147 +const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \
       
  2148 +{ return SPARC_AES_CAPABLE?&aes_t4_##keylen##_##mode:&aes_##keylen##_##mode; }
       
  2149 +
       
  2150  #else
       
  2151  
       
  2152  #define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \
       
  2153 @@ -505,7 +879,7 @@
       
  2154  #ifdef BSAES_CAPABLE
       
  2155  	    if (BSAES_CAPABLE && mode==EVP_CIPH_CBC_MODE)
       
  2156  		{
       
  2157 -		ret = AES_set_decrypt_key(key,ctx->key_len*8,&dat->ks);
       
  2158 +		ret = AES_set_decrypt_key(key,ctx->key_len*8,&dat->ks.ks);
       
  2159  		dat->block	= (block128_f)AES_decrypt;
       
  2160  		dat->stream.cbc	= (cbc128_f)bsaes_cbc_encrypt;
       
  2161  		}
       
  2162 @@ -514,7 +888,7 @@
       
  2163  #ifdef VPAES_CAPABLE
       
  2164  	    if (VPAES_CAPABLE)
       
  2165  		{
       
  2166 -		ret = vpaes_set_decrypt_key(key,ctx->key_len*8,&dat->ks);
       
  2167 +		ret = vpaes_set_decrypt_key(key,ctx->key_len*8,&dat->ks.ks);
       
  2168  		dat->block	= (block128_f)vpaes_decrypt;
       
  2169  		dat->stream.cbc	= mode==EVP_CIPH_CBC_MODE ?
       
  2170  					(cbc128_f)vpaes_cbc_encrypt :
       
  2171 @@ -523,7 +897,7 @@
       
  2172  	    else
       
  2173  #endif
       
  2174  		{
       
  2175 -		ret = AES_set_decrypt_key(key,ctx->key_len*8,&dat->ks);
       
  2176 +		ret = AES_set_decrypt_key(key,ctx->key_len*8,&dat->ks.ks);
       
  2177  		dat->block	= (block128_f)AES_decrypt;
       
  2178  		dat->stream.cbc	= mode==EVP_CIPH_CBC_MODE ?
       
  2179  					(cbc128_f)AES_cbc_encrypt :
       
  2180 @@ -533,7 +907,7 @@
       
  2181  #ifdef BSAES_CAPABLE
       
  2182  	    if (BSAES_CAPABLE && mode==EVP_CIPH_CTR_MODE)
       
  2183  		{
       
  2184 -		ret = AES_set_encrypt_key(key,ctx->key_len*8,&dat->ks);
       
  2185 +		ret = AES_set_encrypt_key(key,ctx->key_len*8,&dat->ks.ks);
       
  2186  		dat->block	= (block128_f)AES_encrypt;
       
  2187  		dat->stream.ctr	= (ctr128_f)bsaes_ctr32_encrypt_blocks;
       
  2188  		}
       
  2189 @@ -542,7 +916,7 @@
       
  2190  #ifdef VPAES_CAPABLE
       
  2191  	    if (VPAES_CAPABLE)
       
  2192  		{
       
  2193 -		ret = vpaes_set_encrypt_key(key,ctx->key_len*8,&dat->ks);
       
  2194 +		ret = vpaes_set_encrypt_key(key,ctx->key_len*8,&dat->ks.ks);
       
  2195  		dat->block	= (block128_f)vpaes_encrypt;
       
  2196  		dat->stream.cbc	= mode==EVP_CIPH_CBC_MODE ?
       
  2197  					(cbc128_f)vpaes_cbc_encrypt :
       
  2198 @@ -551,7 +925,7 @@
       
  2199  	    else
       
  2200  #endif
       
  2201  		{
       
  2202 -		ret = AES_set_encrypt_key(key,ctx->key_len*8,&dat->ks);
       
  2203 +		ret = AES_set_encrypt_key(key,ctx->key_len*8,&dat->ks.ks);
       
  2204  		dat->block	= (block128_f)AES_encrypt;
       
  2205  		dat->stream.cbc	= mode==EVP_CIPH_CBC_MODE ?
       
  2206  					(cbc128_f)AES_cbc_encrypt :
       
  2207 @@ -828,7 +1202,7 @@
       
  2208  #ifdef BSAES_CAPABLE
       
  2209  		if (BSAES_CAPABLE)
       
  2210  			{
       
  2211 -			AES_set_encrypt_key(key,ctx->key_len*8,&gctx->ks);
       
  2212 +			AES_set_encrypt_key(key,ctx->key_len*8,&gctx->ks.ks);
       
  2213  			CRYPTO_gcm128_init(&gctx->gcm,&gctx->ks,
       
  2214  					(block128_f)AES_encrypt);
       
  2215  			gctx->ctr = (ctr128_f)bsaes_ctr32_encrypt_blocks;
       
  2216 @@ -839,7 +1213,7 @@
       
  2217  #ifdef VPAES_CAPABLE
       
  2218  		if (VPAES_CAPABLE)
       
  2219  			{
       
  2220 -			vpaes_set_encrypt_key(key,ctx->key_len*8,&gctx->ks);
       
  2221 +			vpaes_set_encrypt_key(key,ctx->key_len*8,&gctx->ks.ks);
       
  2222  			CRYPTO_gcm128_init(&gctx->gcm,&gctx->ks,
       
  2223  					(block128_f)vpaes_encrypt);
       
  2224  			gctx->ctr = NULL;
       
  2225 @@ -849,7 +1223,7 @@
       
  2226  #endif
       
  2227  		(void)0;	/* terminate potentially open 'else' */
       
  2228  
       
  2229 -		AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks);
       
  2230 +		AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks);
       
  2231  		CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, (block128_f)AES_encrypt);
       
  2232  #ifdef AES_CTR_ASM
       
  2233  		gctx->ctr = (ctr128_f)AES_ctr32_encrypt;
       
  2234 @@ -1080,17 +1454,17 @@
       
  2235  		    {
       
  2236  		    if (enc)
       
  2237  			{
       
  2238 -			vpaes_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1);
       
  2239 +			vpaes_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
       
  2240  			xctx->xts.block1 = (block128_f)vpaes_encrypt;
       
  2241  			}
       
  2242  		    else
       
  2243  			{
       
  2244 -			vpaes_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1);
       
  2245 +			vpaes_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
       
  2246  			xctx->xts.block1 = (block128_f)vpaes_decrypt;
       
  2247  			}
       
  2248  
       
  2249  		    vpaes_set_encrypt_key(key + ctx->key_len/2,
       
  2250 -						ctx->key_len * 4, &xctx->ks2);
       
  2251 +						ctx->key_len * 4, &xctx->ks2.ks);
       
  2252  		    xctx->xts.block2 = (block128_f)vpaes_encrypt;
       
  2253  
       
  2254  		    xctx->xts.key1 = &xctx->ks1;
       
  2255 @@ -1102,17 +1476,17 @@
       
  2256  
       
  2257  		if (enc)
       
  2258  			{
       
  2259 -			AES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1);
       
  2260 +			AES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
       
  2261  			xctx->xts.block1 = (block128_f)AES_encrypt;
       
  2262  			}
       
  2263  		else
       
  2264  			{
       
  2265 -			AES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1);
       
  2266 +			AES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
       
  2267  			xctx->xts.block1 = (block128_f)AES_decrypt;
       
  2268  			}
       
  2269  
       
  2270  		AES_set_encrypt_key(key + ctx->key_len/2,
       
  2271 -						ctx->key_len * 4, &xctx->ks2);
       
  2272 +						ctx->key_len * 4, &xctx->ks2.ks);
       
  2273  		xctx->xts.block2 = (block128_f)AES_encrypt;
       
  2274  
       
  2275  		xctx->xts.key1 = &xctx->ks1;
       
  2276 @@ -1223,7 +1597,7 @@
       
  2277  #ifdef VPAES_CAPABLE
       
  2278  		if (VPAES_CAPABLE)
       
  2279  			{
       
  2280 -			vpaes_set_encrypt_key(key, ctx->key_len*8, &cctx->ks);
       
  2281 +			vpaes_set_encrypt_key(key, ctx->key_len*8, &cctx->ks.ks);
       
  2282  			CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L,
       
  2283  					&cctx->ks, (block128_f)vpaes_encrypt);
       
  2284  			cctx->str = NULL;
       
  2285 @@ -1231,7 +1605,7 @@
       
  2286  			break;
       
  2287  			}
       
  2288  #endif
       
  2289 -		AES_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks);
       
  2290 +		AES_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks);
       
  2291  		CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L,
       
  2292  					&cctx->ks, (block128_f)AES_encrypt);
       
  2293  		cctx->str = NULL;
       
  2294 @@ -1319,5 +1693,4 @@
       
  2295  BLOCK_CIPHER_custom(NID_aes,192,1,12,ccm,CCM,EVP_CIPH_FLAG_FIPS|CUSTOM_FLAGS)
       
  2296  BLOCK_CIPHER_custom(NID_aes,256,1,12,ccm,CCM,EVP_CIPH_FLAG_FIPS|CUSTOM_FLAGS)
       
  2297  
       
  2298 -#endif
       
  2299  #endif
       
  2300 Index: openssl/crypto/evp/evp.h
       
  2301 ===================================================================
       
  2302 --- evp.h	Mon Feb 11 07:26:04 2013
       
  2303 +++ evp.h.new	Thu May  2 14:31:55 2013
       
  2304 @@ -1256,6 +1256,7 @@
       
  2305  #define EVP_F_AESNI_INIT_KEY				 165
       
  2306  #define EVP_F_AESNI_XTS_CIPHER				 176
       
  2307  #define EVP_F_AES_INIT_KEY				 133
       
  2308 +#define EVP_F_AES_T4_INIT_KEY				 178
       
  2309  #define EVP_F_AES_XTS					 172
       
  2310  #define EVP_F_AES_XTS_CIPHER				 175
       
  2311  #define EVP_F_ALG_MODULE_INIT				 177
       
  2312 Index: openssl/crypto/evp/evp_err.c
       
  2313 ===================================================================
       
  2314 --- evp_err.c	Mon Feb 11 07:26:04 2013
       
  2315 +++ evp_err.c.new	Thu May  2 14:33:24 2013
       
  2316 @@ -73,6 +73,7 @@
       
  2317  {ERR_FUNC(EVP_F_AESNI_INIT_KEY),	"AESNI_INIT_KEY"},
       
  2318  {ERR_FUNC(EVP_F_AESNI_XTS_CIPHER),	"AESNI_XTS_CIPHER"},
       
  2319  {ERR_FUNC(EVP_F_AES_INIT_KEY),	"AES_INIT_KEY"},
       
  2320 +{ERR_FUNC(EVP_F_AES_T4_INIT_KEY),	"AES_T4_INIT_KEY"},
       
  2321  {ERR_FUNC(EVP_F_AES_XTS),	"AES_XTS"},
       
  2322  {ERR_FUNC(EVP_F_AES_XTS_CIPHER),	"AES_XTS_CIPHER"},
       
  2323  {ERR_FUNC(EVP_F_ALG_MODULE_INIT),	"ALG_MODULE_INIT"},