components/openssl/openssl-1.0.1/patches/37_openssl_t4_inline.patch
branchs11u2-sru8-backport
changeset 4020 8e3f49e23eaa
parent 3285 78460de60ed1
equal deleted inserted replaced
3862:0e46f6a07b7f 4020:8e3f49e23eaa
       
     1 #
       
     2 # This file adds inline T4 instruction support to OpenSSL upstream code.
       
     3 # The change was brought in from OpenSSL 1.0.2.
       
     4 #
       
     5 Index: Configure
       
     6 ===================================================================
       
     7 diff -ru openssl-1.0.1e/Configure openssl-1.0.1e/Configure
       
     8 --- openssl-1.0.1e/Configure 2011-05-24 17:02:24.000000000 -0700
       
     9 +++ openssl-1.0.1e/Configure 2011-07-27 10:48:17.817470000 -0700
       
    10 @@ -135,7 +135,7 @@
       
    11 
       
    12  my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghash-x86_64.o:";
       
    13  my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::ghash-ia64.o::void";
       
    14 -my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void";
       
    15 +my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o:des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o:aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void";
       
    16  my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::void";
       
    17  my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-alpha.o::void";
       
    18  my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o::::::::";
       
    19 Index: crypto/sparccpuid.S
       
    20 ===================================================================
       
    21 diff -ru openssl-1.0.1e/crypto/sparccpuid.S openssl-1.0.1e/crypto/sparccpuid.S
       
    22 --- openssl-1.0.1e/crypto/sparccpuid.S 2011-05-24 17:02:24.000000000 -0700
       
    23 +++ openssl-1.0.1e/crypto/sparccpuid.S 2011-07-27 10:48:17.817470000 -0700
       
    24 @@ -1,3 +1,7 @@
       
    25 +#ifdef OPENSSL_FIPSCANISTER
       
    26 +#include <openssl/fipssyms.h>
       
    27 +#endif
       
    28 +
       
    29  #if defined(__SUNPRO_C) && defined(__sparcv9)
       
    30  # define ABI64  /* They've said -xarch=v9 at command line */
       
    31  #elif defined(__GNUC__) && defined(__arch64__)
       
    32 @@ -235,10 +239,10 @@
       
    33  .global	_sparcv9_vis1_probe
       
    34  .align	8
       
    35  _sparcv9_vis1_probe:
       
    36 +	.word	0x81b00d80	!fxor	%f0,%f0,%f0
       
    37  	add	%sp,BIAS+2,%o1
       
    38 -	.word	0xc19a5a40	!ldda	[%o1]ASI_FP16_P,%f0
       
    39  	retl
       
    40 -	.word	0x81b00d80	!fxor	%f0,%f0,%f0
       
    41 +	.word	0xc19a5a40	!ldda	[%o1]ASI_FP16_P,%f0
       
    42  .type	_sparcv9_vis1_probe,#function
       
    43  .size	_sparcv9_vis1_probe,.-_sparcv9_vis1_probe
       
    44  
       
    45 @@ -251,7 +255,12 @@
       
    46  !	UltraSPARC IIe		7
       
    47  !	UltraSPARC III		7
       
    48  !	UltraSPARC T1		24
       
    49 +!	SPARC T4		65(*)
       
    50  !
       
    51 +! (*)	result has lesser to do with VIS instruction latencies, rdtick
       
    52 +!	appears that slow, but it does the trick in sense that FP and
       
    53 +!	VIS code paths are still slower than integer-only ones.
       
    54 +!
       
    55  ! Numbers for T2 and SPARC64 V-VII are more than welcomed.
       
    56  !
       
    57  ! It would be possible to detect specifically US-T1 by instrumenting
       
    58 @@ -260,6 +269,8 @@
       
    59  .global	_sparcv9_vis1_instrument
       
    60  .align	8
       
    61  _sparcv9_vis1_instrument:
       
    62 +	.word	0x81b00d80	!fxor	%f0,%f0,%f0
       
    63 +	.word	0x85b08d82	!fxor	%f2,%f2,%f2
       
    64  	.word	0x91410000	!rd	%tick,%o0
       
    65  	.word	0x81b00d80	!fxor	%f0,%f0,%f0
       
    66  	.word	0x85b08d82	!fxor	%f2,%f2,%f2
       
    67 @@ -314,6 +325,30 @@
       
    68  .type	_sparcv9_fmadd_probe,#function
       
    69  .size	_sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe
       
    70  
       
    71 +.global	_sparcv9_rdcfr
       
    72 +.align	8
       
    73 +_sparcv9_rdcfr:
       
    74 +	retl
       
    75 +	.word	0x91468000	!rd	%asr26,%o0
       
    76 +.type	_sparcv9_rdcfr,#function
       
    77 +.size	_sparcv9_rdcfr,.-_sparcv9_rdcfr
       
    78 +
       
    79 +.global	_sparcv9_vis3_probe
       
    80 +.align	8
       
    81 +_sparcv9_vis3_probe:
       
    82 +	retl
       
    83 +	.word	0x81b022a0	!xmulx	%g0,%g0,%g0
       
    84 +.type	_sparcv9_vis3_probe,#function
       
    85 +.size	_sparcv9_vis3_probe,.-_sparcv9_vis3_probe
       
    86 +
       
    87 +.global	_sparcv9_random
       
    88 +.align	8
       
    89 +_sparcv9_random:
       
    90 +	retl
       
    91 +	.word	0x91b002a0	!random	%o0
       
    92 +.type	_sparcv9_random,#function
       
    93 +.size	_sparcv9_random,.-_sparcv9_vis3_probe
       
    94 +
       
    95  .global	OPENSSL_cleanse
       
    96  .align	32
       
    97  OPENSSL_cleanse:
       
    98 @@ -398,6 +433,102 @@
       
    99  .size	OPENSSL_cleanse,.-OPENSSL_cleanse
       
   100  
       
   101  #ifndef _BOOT
       
   102 +.global	_sparcv9_vis1_instrument_bus
       
   103 +.align	8
       
   104 +_sparcv9_vis1_instrument_bus:
       
   105 +    mov    %o1,%o3                    ! save cnt
       
   106 +    .word    0x99410000    !rd    %tick,%o4    ! tick
       
   107 +    mov    %o4,%o5                    ! lasttick = tick
       
   108 +    set    0,%g4                    ! diff
       
   109 +
       
   110 +    andn    %o0,63,%g1
       
   111 +    .word    0xc1985e00    !ldda    [%g1]0xf0,%f0    ! block load
       
   112 +    .word    0x8143e040    !membar    #Sync
       
   113 +    .word    0xc1b85c00    !stda    %f0,[%g1]0xe0    ! block store and commit
       
   114 +    .word    0x8143e040    !membar    #Sync
       
   115 +    ld    [%o0],%o4
       
   116 +    add    %o4,%g4,%g4
       
   117 +    .word    0xc9e2100c    !cas    [%o0],%o4,%g4
       
   118 +
       
   119 +.Loop:    .word    0x99410000    !rd    %tick,%o4
       
   120 +    sub    %o4,%o5,%g4                ! diff=tick-lasttick
       
   121 +    mov    %o4,%o5                    ! lasttick=tick
       
   122 +
       
   123 +    andn    %o0,63,%g1
       
   124 +    .word    0xc1985e00    !ldda    [%g1]0xf0,%f0    ! block load
       
   125 +    .word    0x8143e040    !membar    #Sync
       
   126 +    .word    0xc1b85c00    !stda    %f0,[%g1]0xe0    ! block store and commit
       
   127 +    .word    0x8143e040    !membar    #Sync
       
   128 +    ld    [%o0],%o4
       
   129 +    add    %o4,%g4,%g4
       
   130 +    .word    0xc9e2100c    !cas    [%o0],%o4,%g4
       
   131 +    subcc    %o1,1,%o1                ! --$cnt
       
   132 +    bnz    .Loop
       
   133 +    add    %o0,4,%o0                ! ++$out
       
   134 +
       
   135 +    retl
       
   136 +    mov    %o3,%o0
       
   137 +.type    _sparcv9_vis1_instrument_bus,#function
       
   138 +.size    _sparcv9_vis1_instrument_bus,.-_sparcv9_vis1_instrument_bus
       
   139 +
       
   140 +.global    _sparcv9_vis1_instrument_bus2
       
   141 +.align    8
       
   142 +_sparcv9_vis1_instrument_bus2:
       
   143 +    mov    %o1,%o3                    ! save cnt
       
   144 +    sll    %o1,2,%o1                ! cnt*=4
       
   145 +
       
   146 +    .word    0x99410000    !rd    %tick,%o4    ! tick
       
   147 +    mov    %o4,%o5                    ! lasttick = tick
       
   148 +    set    0,%g4                    ! diff
       
   149 +
       
   150 +    andn    %o0,63,%g1
       
   151 +    .word    0xc1985e00    !ldda    [%g1]0xf0,%f0    ! block load
       
   152 +    .word    0x8143e040    !membar    #Sync
       
   153 +    .word    0xc1b85c00    !stda    %f0,[%g1]0xe0    ! block store and commit
       
   154 +    .word    0x8143e040    !membar    #Sync
       
   155 +    ld    [%o0],%o4
       
   156 +    add    %o4,%g4,%g4
       
   157 +    .word    0xc9e2100c    !cas    [%o0],%o4,%g4
       
   158 +
       
   159 +    .word    0x99410000    !rd    %tick,%o4    ! tick
       
   160 +    sub    %o4,%o5,%g4                ! diff=tick-lasttick
       
   161 +    mov    %o4,%o5                    ! lasttick=tick
       
   162 +    mov    %g4,%g5                    ! lastdiff=diff
       
   163 +.Loop2:
       
   164 +    andn    %o0,63,%g1
       
   165 +    .word    0xc1985e00    !ldda    [%g1]0xf0,%f0    ! block load
       
   166 +    .word    0x8143e040    !membar    #Sync
       
   167 +    .word    0xc1b85c00    !stda    %f0,[%g1]0xe0    ! block store and commit
       
   168 +    .word    0x8143e040    !membar    #Sync
       
   169 +    ld    [%o0],%o4
       
   170 +    add    %o4,%g4,%g4
       
   171 +    .word    0xc9e2100c    !cas    [%o0],%o4,%g4
       
   172 +
       
   173 +    subcc    %o2,1,%o2                ! --max
       
   174 +    bz    .Ldone2
       
   175 +    nop
       
   176 +
       
   177 +    .word    0x99410000    !rd    %tick,%o4    ! tick
       
   178 +    sub    %o4,%o5,%g4                ! diff=tick-lasttick
       
   179 +    mov    %o4,%o5                    ! lasttick=tick
       
   180 +    cmp    %g4,%g5
       
   181 +    mov    %g4,%g5                    ! lastdiff=diff
       
   182 +
       
   183 +    .word    0x83408000    !rd    %ccr,%g1
       
   184 +    and    %g1,4,%g1                ! isolate zero flag
       
   185 +    xor    %g1,4,%g1                ! flip zero flag
       
   186 +
       
   187 +    subcc    %o1,%g1,%o1                ! conditional --$cnt
       
   188 +    bnz    .Loop2
       
   189 +    add    %o0,%g1,%o0                ! conditional ++$out
       
   190 +
       
   191 +.Ldone2:
       
   192 +    srl    %o1,2,%o1
       
   193 +    retl
       
   194 +    sub    %o3,%o1,%o0
       
   195 +.type    _sparcv9_vis1_instrument_bus2,#function
       
   196 +.size    _sparcv9_vis1_instrument_bus2,.-_sparcv9_vis1_instrument_bus2
       
   197 +
       
   198  .section	".init",#alloc,#execinstr
       
   199  	call	solaris_locking_setup
       
   200  	nop
       
   201 Index: crypto/sparcv9cap.c
       
   202 ===================================================================
       
   203 diff -ru openssl-1.0.1e/crypto/sparcv9cap.c openssl-1.0.1e/crypto/sparcv9cap.c
       
   204 --- openssl-1.0.1e/crypto/sparcv9cap.c 2011-05-24 17:02:24.000000000 -0700
       
   205 +++ openssl-1.0.1e/crypto/sparcv9cap.c 2011-07-27 10:48:17.817470000 -0700
       
   206 @@ -4,34 +4,58 @@
       
   207  #include <setjmp.h>
       
   208  #include <signal.h>
       
   209  #include <sys/time.h>
       
   210 +#include <unistd.h>
       
   211  #include <openssl/bn.h>
       
   212  
       
   213 -#define SPARCV9_TICK_PRIVILEGED (1<<0)
       
   214 -#define SPARCV9_PREFER_FPU      (1<<1)
       
   215 -#define SPARCV9_VIS1            (1<<2)
       
   216 -#define SPARCV9_VIS2            (1<<3) /* reserved */
       
   217 -#define SPARCV9_FMADD           (1<<4) /* reserved for SPARC64 V */
       
   218 +#include "sparc_arch.h"
       
   219  
       
   220 +#if defined(__GNUC__) && defined(__linux)
       
   221 +__attribute__((visibility("hidden")))
       
   222 +#endif
       
   223  #ifndef        _BOOT
       
   224 -static int OPENSSL_sparcv9cap_P = SPARCV9_TICK_PRIVILEGED;
       
   225 +unsigned int OPENSSL_sparcv9cap_P[2] = {SPARCV9_TICK_PRIVILEGED, 0};
       
   226  #else
       
   227 -static int OPENSSL_sparcv9cap_P = SPARCV9_VIS1;
       
   228 +unsigned int OPENSSL_sparcv9cap_P[2] = {SPARCV9_VIS1, 0};
       
   229  #endif
       
   230  
       
   231  int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
       
   232                  const BN_ULONG *np, const BN_ULONG *n0, int num)
       
   233  {
       
   234 +    int bn_mul_mont_vis3(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
       
   235 +                         const BN_ULONG *np,const BN_ULONG *n0, int num);
       
   236      int bn_mul_mont_fpu(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
       
   237                          const BN_ULONG *np, const BN_ULONG *n0, int num);
       
   238      int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
       
   239                          const BN_ULONG *np, const BN_ULONG *n0, int num);
       
   240  
       
   241 -    if (num >= 8 && !(num & 1) &&
       
   242 -        (OPENSSL_sparcv9cap_P & (SPARCV9_PREFER_FPU | SPARCV9_VIS1)) ==
       
   243 -        (SPARCV9_PREFER_FPU | SPARCV9_VIS1))
       
   244 -        return bn_mul_mont_fpu(rp, ap, bp, np, n0, num);
       
   245 -    else
       
   246 -        return bn_mul_mont_int(rp, ap, bp, np, n0, num);
       
   247 +    if (!(num&1) && num>=6) {
       
   248 +        if ((num&15)==0 && num<=64 &&
       
   249 +            (OPENSSL_sparcv9cap_P[1]&(CFR_MONTMUL|CFR_MONTSQR))== 
       
   250 +                             (CFR_MONTMUL|CFR_MONTSQR))
       
   251 +            {
       
   252 +            typedef int (*bn_mul_mont_f)(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0);
       
   253 +            int bn_mul_mont_t4_8(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0);
       
   254 +            int bn_mul_mont_t4_16(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0);
       
   255 +            int bn_mul_mont_t4_24(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0);
       
   256 +            int bn_mul_mont_t4_32(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0);
       
   257 +            static const bn_mul_mont_f funcs[4] = {
       
   258 +                bn_mul_mont_t4_8,    bn_mul_mont_t4_16,
       
   259 +                bn_mul_mont_t4_24,    bn_mul_mont_t4_32 };
       
   260 +            bn_mul_mont_f worker = funcs[num/16-1];
       
   261 +
       
   262 +            if ((*worker)(rp,ap,bp,np,n0)) return 1;
       
   263 +            /* retry once and fall back */
       
   264 +            if ((*worker)(rp,ap,bp,np,n0)) return 1;
       
   265 +            return bn_mul_mont_vis3(rp,ap,bp,np,n0,num);
       
   266 +            }
       
   267 +        if ((OPENSSL_sparcv9cap_P[0]&SPARCV9_VIS3))
       
   268 +            return bn_mul_mont_vis3(rp,ap,bp,np,n0,num);
       
   269 +        else if (num>=8 &&
       
   270 +            (OPENSSL_sparcv9cap_P[0]&(SPARCV9_PREFER_FPU|SPARCV9_VIS1)) ==
       
   271 +            (SPARCV9_PREFER_FPU|SPARCV9_VIS1))
       
   272 +            return bn_mul_mont_fpu(rp,ap,bp,np,n0,num);
       
   273 +        }
       
   274 +    return bn_mul_mont_int(rp,ap,bp,np,n0,num);
       
   275  }
       
   276  
       
   277  unsigned long _sparcv9_rdtick(void);
       
   278 @@ -39,11 +63,18 @@
       
   279  unsigned long _sparcv9_vis1_instrument(void);
       
   280  void _sparcv9_vis2_probe(void);
       
   281  void _sparcv9_fmadd_probe(void);
       
   282 +unsigned long _sparcv9_rdcfr(void);
       
   283 +void _sparcv9_vis3_probe(void);
       
   284 +unsigned long _sparcv9_random(void);
       
   285 +#ifndef _BOOT
       
   286 +size_t _sparcv9_vis1_instrument_bus(unsigned int *,size_t);
       
   287 +size_t _sparcv9_vis1_instrument_bus2(unsigned int *,size_t,size_t);
       
   288 +#endif
       
   289  
       
   290  #ifndef _BOOT
       
   291  unsigned long OPENSSL_rdtsc(void)
       
   292  {
       
   293 -    if (OPENSSL_sparcv9cap_P & SPARCV9_TICK_PRIVILEGED)
       
   294 +    if (OPENSSL_sparcv9cap_P[0] & SPARCV9_TICK_PRIVILEGED)
       
   295  #if defined(__sun) && defined(__SVR4)
       
   296          return gethrtime();
       
   297  #else
       
   298 @@ -52,6 +83,24 @@
       
   299      else
       
   300          return _sparcv9_rdtick();
       
   301  }
       
   302 +
       
   303 +size_t OPENSSL_instrument_bus(unsigned int *out,size_t cnt)
       
   304 +{
       
   305 +    if ((OPENSSL_sparcv9cap_P[0]&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK)) ==
       
   306 +            SPARCV9_BLK)
       
   307 +        return _sparcv9_vis1_instrument_bus(out,cnt);
       
   308 +    else
       
   309 +        return 0;
       
   310 +}
       
   311 +
       
   312 +size_t OPENSSL_instrument_bus2(unsigned int *out,size_t cnt,size_t max)
       
   313 +{
       
   314 +    if ((OPENSSL_sparcv9cap_P[0]&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK)) ==
       
   315 +            SPARCV9_BLK)
       
   316 +        return _sparcv9_vis1_instrument_bus2(out,cnt,max);
       
   317 +    else
       
   318 +        return 0;
       
   319 +}
       
   320  #endif
       
   321 
       
   322  #if defined(_BOOT)
       
   323 @@ -61,7 +110,7 @@
       
   324   */
       
   325  void OPENSSL_cpuid_setup(void)
       
   326         {
       
   327 -       OPENSSL_sparcv9cap_P = SPARCV9_VIS1;
       
   328 +       OPENSSL_sparcv9cap_P[0] = SPARCV9_VIS1;
       
   329         }
       
   330  
       
   331  #elif 0 && defined(__sun) && defined(__SVR4)
       
   332 @@ -90,11 +139,11 @@
       
   333      if (!strcmp(name, "SUNW,UltraSPARC") ||
       
   334          /* covers II,III,IV */
       
   335          !strncmp(name, "SUNW,UltraSPARC-I", 17)) {
       
   336 -        OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU | SPARCV9_VIS1;
       
   337 +        OPENSSL_sparcv9cap_P[0] |= SPARCV9_PREFER_FPU | SPARCV9_VIS1;
       
   338  
       
   339          /* %tick is privileged only on UltraSPARC-I/II, but not IIe */
       
   340          if (name[14] != '\0' && name[17] != '\0' && name[18] != '\0')
       
   341 -            OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED;
       
   342 +            OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED;
       
   343  
       
   344          return DI_WALK_TERMINATE;
       
   345      }
       
   346 @@ -100,7 +149,7 @@
       
   347      }
       
   348      /* This is expected to catch remaining UltraSPARCs, such as T1 */
       
   349      else if (!strncmp(name, "SUNW,UltraSPARC", 15)) {
       
   350 -        OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED;
       
   351 +        OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED;
       
   352  
       
   353          return DI_WALK_TERMINATE;
       
   354      }
       
   355 @@ -119,7 +168,7 @@
       
   356      trigger = 1;
       
   357  
       
   358      if ((e = getenv("OPENSSL_sparcv9cap"))) {
       
   359 -        OPENSSL_sparcv9cap_P = strtoul(e, NULL, 0);
       
   360 +        OPENSSL_sparcv9cap_P[0] = strtoul(e, NULL, 0);
       
   361          return;
       
   362      }
       
   363  
       
   364 @@ -126,15 +175,15 @@
       
   365      if (sysinfo(SI_MACHINE, si, sizeof(si)) > 0) {
       
   366          if (strcmp(si, "sun4v"))
       
   367              /* FPU is preferred for all CPUs, but US-T1/2 */
       
   368 -            OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU;
       
   369 +            OPENSSL_sparcv9cap_P[0] |= SPARCV9_PREFER_FPU;
       
   370      }
       
   371  
       
   372      if (sysinfo(SI_ISALIST, si, sizeof(si)) > 0) {
       
   373          if (strstr(si, "+vis"))
       
   374 -            OPENSSL_sparcv9cap_P |= SPARCV9_VIS1;
       
   375 +            OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS1 | SPARCV9_BLK;
       
   376          if (strstr(si, "+vis2")) {
       
   377 -            OPENSSL_sparcv9cap_P |= SPARCV9_VIS2;
       
   378 -            OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED;
       
   379 +            OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS2;
       
   380 +            OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED;
       
   381              return;
       
   382          }
       
   383      }
       
   384 @@ -204,12 +253,14 @@
       
   385      trigger = 1;
       
   386  
       
   387      if ((e = getenv("OPENSSL_sparcv9cap"))) {
       
   388 -        OPENSSL_sparcv9cap_P = strtoul(e, NULL, 0);
       
   389 +        OPENSSL_sparcv9cap_P[0] = strtoul(e, NULL, 0);
       
   390 +        if ((e = strchr(e, ':')))
       
   391 +            OPENSSL_sparcv9cap_P[1] = strtoul(e + 1, NULL, 0);
       
   392          return;
       
   393      }
       
   394  
       
   395      /* Initial value, fits UltraSPARC-I&II... */
       
   396 -    OPENSSL_sparcv9cap_P = SPARCV9_PREFER_FPU | SPARCV9_TICK_PRIVILEGED;
       
   397 +    OPENSSL_sparcv9cap_P[0] = SPARCV9_PREFER_FPU | SPARCV9_TICK_PRIVILEGED;
       
   398  
       
   399      sigfillset(&all_masked);
       
   400      sigdelset(&all_masked, SIGILL);
       
   401 @@ -232,18 +283,18 @@
       
   402  
       
   403      if (sigsetjmp(common_jmp, 1) == 0) {
       
   404          _sparcv9_rdtick();
       
   405 -        OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED;
       
   406 +        OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED;
       
   407      }
       
   408  
       
   409      if (sigsetjmp(common_jmp, 1) == 0) {
       
   410          _sparcv9_vis1_probe();
       
   411 -        OPENSSL_sparcv9cap_P |= SPARCV9_VIS1;
       
   412 +        OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS1 | SPARCV9_BLK;
       
   413          /* detect UltraSPARC-Tx, see sparccpud.S for details... */
       
   414          if (_sparcv9_vis1_instrument() >= 12)
       
   415 -            OPENSSL_sparcv9cap_P &= ~(SPARCV9_VIS1 | SPARCV9_PREFER_FPU);
       
   416 +            OPENSSL_sparcv9cap_P[0] &= ~(SPARCV9_VIS1 | SPARCV9_PREFER_FPU);
       
   417          else {
       
   418              _sparcv9_vis2_probe();
       
   419 -            OPENSSL_sparcv9cap_P |= SPARCV9_VIS2;
       
   420 +            OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS2;
       
   421          }
       
   422      }
       
   423  
       
   424 @@ -249,13 +300,50 @@
       
   425  
       
   426      if (sigsetjmp(common_jmp, 1) == 0) {
       
   427          _sparcv9_fmadd_probe();
       
   428 -        OPENSSL_sparcv9cap_P |= SPARCV9_FMADD;
       
   429 +        OPENSSL_sparcv9cap_P[0] |= SPARCV9_FMADD;
       
   430      }
       
   431  
       
   432 +    /*
       
   433 +     * VIS3 flag is tested independently from VIS1, unlike VIS2 that is,
       
   434 +     * because VIS3 defines even integer instructions.
       
   435 +     */
       
   436 +    if (sigsetjmp(common_jmp,1) == 0) {
       
   437 +        _sparcv9_vis3_probe();
       
   438 +        OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS3;
       
   439 +    }
       
   440 +
       
   441 +    if (sigsetjmp(common_jmp,1) == 0) {
       
   442 +        (void)_sparcv9_random();
       
   443 +        OPENSSL_sparcv9cap_P[0] |= SPARCV9_RANDOM;
       
   444 +    }
       
   445 +
       
   446 +    /*
       
   447 +     * In wait for better solution _sparcv9_rdcfr is masked by
       
   448 +     * VIS3 flag, because it goes to uninterruptable endless
       
   449 +     * loop on UltraSPARC II running Solaris. Things might be
       
   450 +     * different on Linux...
       
   451 +     */
       
   452 +    if ((OPENSSL_sparcv9cap_P[0]&SPARCV9_VIS3) &&
       
   453 +        sigsetjmp(common_jmp, 1) == 0) {
       
   454 +        OPENSSL_sparcv9cap_P[1] = (unsigned int)_sparcv9_rdcfr();
       
   455 +    }
       
   456 +
       
   457      sigaction(SIGBUS, &bus_oact, NULL);
       
   458      sigaction(SIGILL, &ill_oact, NULL);
       
   459  
       
   460      sigprocmask(SIG_SETMASK, &oset, NULL);
       
   461 +
       
   462 +    if (sizeof(size_t) == 8)
       
   463 +        OPENSSL_sparcv9cap_P[0] |= SPARCV9_64BIT_STACK;
       
   464 +#ifdef __linux
       
   465 +    else
       
   466 +        {
       
   467 +        int ret = syscall(340);
       
   468 +
       
   469 +        if (ret >= 0 && ret & 1)
       
   470 +            OPENSSL_sparcv9cap_P[0] |= SPARCV9_64BIT_STACK;
       
   471 +        }
       
   472 +#endif
       
   473  }
       
   474  
       
   475  #endif
       
   476 Index: crypto/md5/Makefile
       
   477 ===================================================================
       
   478 diff -ru openssl-1.0.1e/crypto/md5/Makefile openssl-1.0.1e/crypto/md5/Makefile
       
   479 --- openssl-1.0.1e/crypto/md5/Makefile	2011-05-24 17:02:24.000000000 -0700
       
   480 +++ openssl-1.0.1e/crypto/md5/Makefile	2011-07-27 10:48:17.817470000 -0700
       
   481 @@ -52,6 +52,9 @@
       
   482  	$(CC) $(CFLAGS) -E asm/md5-ia64.S | \
       
   483  	$(PERL) -ne 's/;\s+/;\n/g; print;' > $@
       
   484  
       
   485 +md5-sparcv9.S:	asm/md5-sparcv9.pl
       
   486 +	$(PERL) asm/md5-sparcv9.pl $@ $(CFLAGS)
       
   487 +
       
   488  files:
       
   489  	$(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
       
   490  
       
   491 Index: crypto/md5/md5_locl.h
       
   492 ===================================================================
       
   493 diff -ru openssl-1.0.1e/crypto/md5/md5_locl.h openssl-1.0.1e/crypto/md5/md5_locl.h
       
   494 --- openssl-1.0.1e/crypto/md5/md5_locl.h    2011-05-24 17:02:24.000000000 -0700
       
   495 +++ openssl-1.0.1e/crypto/md5/md5_locl.h    2011-07-27 10:48:17.817470000 -0700
       
   496 @@ -71,6 +71,8 @@
       
   497  #  define md5_block_data_order md5_block_asm_data_order
       
   498  # elif defined(__ia64) || defined(__ia64__) || defined(_M_IA64)
       
   499  #  define md5_block_data_order md5_block_asm_data_order
       
   500 +# elif defined(__sparc) || defined(__sparc__)
       
   501 +#  define md5_block_data_order md5_block_asm_data_order
       
   502  # endif
       
   503  #endif
       
   504 
       
   505 Index: crypto/sha/Makefile
       
   506 ===================================================================
       
   507 diff -ru openssl-1.0.1e/crypto/sha/Makefile openssl-1.0.1e/crypto/sha/Makefile
       
   508 --- openssl-1.0.1e/crypto/sha/Makefile    2011-05-24 17:02:24.000000000 -0700
       
   509 +++ openssl-1.0.1e/crypto/sha/Makefile    2011-07-27 10:48:17.817470000 -0700
       
   510 @@ -68,9 +68,9 @@
       
   511  sha1-x86_64.s:	asm/sha1-x86_64.pl;	$(PERL) asm/sha1-x86_64.pl $(PERLASM_SCHEME) > $@
       
   512  sha256-x86_64.s:asm/sha512-x86_64.pl;	$(PERL) asm/sha512-x86_64.pl $(PERLASM_SCHEME) $@
       
   513  sha512-x86_64.s:asm/sha512-x86_64.pl;	$(PERL) asm/sha512-x86_64.pl $(PERLASM_SCHEME) $@
       
   514 -sha1-sparcv9.s:	asm/sha1-sparcv9.pl;	$(PERL) asm/sha1-sparcv9.pl $@ $(CFLAGS)
       
   515 -sha256-sparcv9.s:asm/sha512-sparcv9.pl;	$(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS)
       
   516 -sha512-sparcv9.s:asm/sha512-sparcv9.pl;	$(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS)
       
   517 +sha1-sparcv9.S:	asm/sha1-sparcv9.pl;	$(PERL) asm/sha1-sparcv9.pl $@ $(CFLAGS)
       
   518 +sha256-sparcv9.S:asm/sha512-sparcv9.pl;	$(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS)
       
   519 +sha512-sparcv9.S:asm/sha512-sparcv9.pl;	$(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS)
       
   520  
       
   521  sha1-ppc.s:	asm/sha1-ppc.pl;	$(PERL) asm/sha1-ppc.pl $(PERLASM_SCHEME) $@
       
   522  sha256-ppc.s:	asm/sha512-ppc.pl;	$(PERL) asm/sha512-ppc.pl $(PERLASM_SCHEME) $@
       
   523 Index: crypto/sha/asm/sha1-sparcv9.pl
       
   524 ===================================================================
       
   525 diff -ru openssl-1.0.1e/crypto/sha/asm/sha1-sparcv9.pl openssl-1.0.1e/crypto/sha/asm/sha1-sparcv9.pl
       
   526 --- openssl-1.0.1e/crypto/sha/asm/sha1-sparcv9.pl 2011-05-24 17:02:24.000000000 -0700
       
   527 +++ openssl-1.0.1e/crypto/sha/asm/sha1-sparcv9.pl 2011-07-27 10:48:17.817470000 -0700
       
   528 @@ -5,6 +5,8 @@
       
   529  # project. The module is, however, dual licensed under OpenSSL and
       
   530  # CRYPTOGAMS licenses depending on where you obtain it. For further
       
   531  # details see http://www.openssl.org/~appro/cryptogams/.
       
   532 +#
       
   533 +# Hardware SPARC T4 support by David S. Miller <[email protected]>.
       
   534  # ====================================================================
       
   535  
       
   536  # Performance improvement is not really impressive on pre-T1 CPU: +8%
       
   537 @@ -18,6 +20,11 @@
       
   538  # ensure scalability on UltraSPARC T1, or rather to avoid decay when
       
   539  # amount of active threads exceeds the number of physical cores.
       
   540  
       
   541 +# SPARC T4 SHA1 hardware achieves 3.72 cycles per byte, which is 3.1x
       
   542 +# faster than software. Multi-process benchmark saturates at 11x
       
   543 +# single-process result on 8-core processor, or ~9GBps per 2.85GHz
       
   544 +# socket.
       
   545 +
       
   546  $bits=32;
       
   547  for (@ARGV)	{ $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
       
   548  if ($bits==64)	{ $bias=2047; $frame=192; }
       
   549 @@ -183,11 +190,93 @@
       
   550  .register	%g3,#scratch
       
   551  ___
       
   552  $code.=<<___;
       
   553 +#include "sparc_arch.h"
       
   554 +
       
   555  .section	".text",#alloc,#execinstr
       
   556  
       
   557 +#ifdef __PIC__
       
   558 +SPARC_PIC_THUNK(%g1)
       
   559 +#endif
       
   560 +
       
   561  .align	32
       
   562  .globl	sha1_block_data_order
       
   563  sha1_block_data_order:
       
   564 +    SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
       
   565 +    ld    [%g1+4],%g1        ! OPENSSL_sparcv9cap_P[1]
       
   566 +
       
   567 +    andcc    %g1, CFR_SHA1, %g0
       
   568 +    be    .Lsoftware
       
   569 +    nop
       
   570 +
       
   571 +    ld    [%o0 + 0x00], %f0    ! load context
       
   572 +    ld    [%o0 + 0x04], %f1
       
   573 +    ld    [%o0 + 0x08], %f2
       
   574 +    andcc    %o1, 0x7, %g0
       
   575 +    ld    [%o0 + 0x0c], %f3
       
   576 +    bne,pn    %icc, .Lhwunaligned
       
   577 +     ld    [%o0 + 0x10], %f4
       
   578 +
       
   579 +.Lhw_loop:
       
   580 +    ldd    [%o1 + 0x00], %f8
       
   581 +    ldd    [%o1 + 0x08], %f10
       
   582 +    ldd    [%o1 + 0x10], %f12
       
   583 +    ldd    [%o1 + 0x18], %f14
       
   584 +    ldd    [%o1 + 0x20], %f16
       
   585 +    ldd    [%o1 + 0x28], %f18
       
   586 +    ldd    [%o1 + 0x30], %f20
       
   587 +    subcc    %o2, 1, %o2        ! done yet? 
       
   588 +    ldd    [%o1 + 0x38], %f22
       
   589 +    add    %o1, 0x40, %o1
       
   590 +
       
   591 +    .word    0x81b02820        ! SHA1
       
   592 +
       
   593 +    bne,pt    `$bits==64?"%xcc":"%icc"`, .Lhw_loop
       
   594 +    nop
       
   595 +
       
   596 +.Lhwfinish:
       
   597 +    st    %f0, [%o0 + 0x00]    ! store context
       
   598 +    st    %f1, [%o0 + 0x04]
       
   599 +    st    %f2, [%o0 + 0x08]
       
   600 +    st    %f3, [%o0 + 0x0c]
       
   601 +    retl
       
   602 +    st    %f4, [%o0 + 0x10]
       
   603 +
       
   604 +.align    8
       
   605 +.Lhwunaligned:
       
   606 +    alignaddr %o1, %g0, %o1
       
   607 +
       
   608 +    ldd    [%o1 + 0x00], %f10
       
   609 +.Lhwunaligned_loop:
       
   610 +    ldd    [%o1 + 0x08], %f12
       
   611 +    ldd    [%o1 + 0x10], %f14
       
   612 +    ldd    [%o1 + 0x18], %f16
       
   613 +    ldd    [%o1 + 0x20], %f18
       
   614 +    ldd    [%o1 + 0x28], %f20
       
   615 +    ldd    [%o1 + 0x30], %f22
       
   616 +    ldd    [%o1 + 0x38], %f24
       
   617 +    subcc    %o2, 1, %o2        ! done yet?
       
   618 +    ldd    [%o1 + 0x40], %f26
       
   619 +    add    %o1, 0x40, %o1
       
   620 +
       
   621 +    faligndata %f10, %f12, %f8
       
   622 +    faligndata %f12, %f14, %f10
       
   623 +    faligndata %f14, %f16, %f12
       
   624 +    faligndata %f16, %f18, %f14
       
   625 +    faligndata %f18, %f20, %f16
       
   626 +    faligndata %f20, %f22, %f18
       
   627 +    faligndata %f22, %f24, %f20
       
   628 +    faligndata %f24, %f26, %f22
       
   629 +
       
   630 +    .word    0x81b02820        ! SHA1
       
   631 +
       
   632 +    bne,pt    `$bits==64?"%xcc":"%icc"`, .Lhwunaligned_loop
       
   633 +    for    %f26, %f26, %f10    ! %f10=%f26
       
   634 +
       
   635 +    ba    .Lhwfinish
       
   636 +    nop
       
   637 +
       
   638 +.align    16
       
   639 +.Lsoftware:
       
   640  	save	%sp,-$frame,%sp
       
   641  	sllx	$len,6,$len
       
   642  	add	$inp,$len,$len
       
   643 @@ -279,6 +368,62 @@
       
   644  .align	4
       
   645  ___
       
   646  
       
   647 -$code =~ s/\`([^\`]*)\`/eval $1/gem;
       
   648 -print $code;
       
   649 +# Purpose of these subroutines is to explicitly encode VIS instructions,
       
   650 +# so that one can compile the module without having to specify VIS
       
   651 +# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
       
   652 +# Idea is to reserve for option to produce "universal" binary and let
       
   653 +# programmer detect if current CPU is VIS capable at run-time.
       
   654 +sub unvis {
       
   655 +my ($mnemonic,$rs1,$rs2,$rd)=@_;
       
   656 +my $ref,$opf;
       
   657 +my %visopf = (    "faligndata"    => 0x048,
       
   658 +        "for"        => 0x07c    );
       
   659 +
       
   660 +    $ref = "$mnemonic\t$rs1,$rs2,$rd";
       
   661 +
       
   662 +    if ($opf=$visopf{$mnemonic}) {
       
   663 +    foreach ($rs1,$rs2,$rd) {
       
   664 +        return $ref if (!/%f([0-9]{1,2})/);
       
   665 +        $_=$1;
       
   666 +        if ($1>=32) {
       
   667 +        return $ref if ($1&1);
       
   668 +        # re-encode for upper double register addressing
       
   669 +        $_=($1|$1>>5)&31;
       
   670 +        }
       
   671 +    }
       
   672 +
       
   673 +    return    sprintf ".word\t0x%08x !%s",
       
   674 +            0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2,
       
   675 +            $ref;
       
   676 +    } else {
       
   677 +    return $ref;
       
   678 +    }
       
   679 +}
       
   680 +sub unalignaddr {
       
   681 +my ($mnemonic,$rs1,$rs2,$rd)=@_;
       
   682 +my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 );
       
   683 +my $ref="$mnemonic\t$rs1,$rs2,$rd";
       
   684 +
       
   685 +    foreach ($rs1,$rs2,$rd) {
       
   686 +    if (/%([goli])([0-7])/)    { $_=$bias{$1}+$2; }
       
   687 +    else            { return $ref; }
       
   688 +    }
       
   689 +    return  sprintf ".word\t0x%08x !%s",
       
   690 +            0x81b00300|$rd<<25|$rs1<<14|$rs2,
       
   691 +            $ref;
       
   692 +}
       
   693 +
       
   694 +foreach (split("\n",$code)) {
       
   695 +    s/\`([^\`]*)\`/eval $1/ge;
       
   696 +
       
   697 +    s/\b(f[^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/
       
   698 +        &unvis($1,$2,$3,$4)
       
   699 +     /ge;
       
   700 +    s/\b(alignaddr)\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/
       
   701 +        &unalignaddr($1,$2,$3,$4)
       
   702 +     /ge;
       
   703 +
       
   704 +    print $_,"\n";
       
   705 +}
       
   706 +
       
   707  close STDOUT;
       
   708 
       
   709 Index: crypto/sha/asm/sha512-sparcv9.pl
       
   710 ===================================================================
       
   711 diff -ru openssl-1.0.1e/crypto/sha/asm/sha512-sparcv9.pl openssl-1.0.1e/crypto/sha/asm/sha512-sparcv9.pl
       
   712 --- openssl-1.0.1e/crypto/sha/asm/sha512-sparcv9.pl 2011-05-24 17:02:24.000000000 -0700
       
   713 +++ openssl-1.0.1e/crypto/sha/asm/sha512-sparcv9.pl 2011-07-27 10:48:17.817470000 -0700
       
   714 @@ -5,6 +5,8 @@
       
   715  # project. The module is, however, dual licensed under OpenSSL and
       
   716  # CRYPTOGAMS licenses depending on where you obtain it. For further
       
   717  # details see http://www.openssl.org/~appro/cryptogams/.
       
   718 +#
       
   719 +# Hardware SPARC T4 support by David S. Miller <[email protected]>.
       
   720  # ====================================================================
       
   721  
       
   722  # SHA256 performance improvement over compiler generated code varies
       
   723 @@ -41,6 +43,12 @@
       
   724  #	loads are always slower than one 64-bit load. Once again this
       
   725  #	is unlike pre-T1 UltraSPARC, where, if scheduled appropriately,
       
   726  #	2x32-bit loads can be as fast as 1x64-bit ones.
       
   727 +#
       
   728 +# SPARC T4 SHA256/512 hardware achieves 3.17/2.01 cycles per byte,
       
   729 +# which is 9.3x/11.1x faster than software. Multi-process benchmark
       
   730 +# saturates at 11.5x single-process result on 8-core processor, or
       
   731 +# ~11/16GBps per 2.85GHz socket.
       
   732 +
       
   733  
       
   734  $bits=32;
       
   735  for (@ARGV)	{ $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
       
   736 @@ -386,6 +394,8 @@
       
   737  .register	%g3,#scratch
       
   738  ___
       
   739  $code.=<<___;
       
   740 +#include "sparc_arch.h"
       
   741 +
       
   742  .section	".text",#alloc,#execinstr
       
   743  
       
   744  .align	64
       
   745 @@ -457,8 +467,196 @@
       
   746  }
       
   747  $code.=<<___;
       
   748  .size	K${label},.-K${label}
       
   749 +
       
   750 +#ifdef __PIC__
       
   751 +SPARC_PIC_THUNK(%g1)
       
   752 +#endif
       
   753 +
       
   754  .globl	sha${label}_block_data_order
       
   755 +.align	32
       
   756  sha${label}_block_data_order:
       
   757 +    SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
       
   758 +    ld    [%g1+4],%g1        ! OPENSSL_sparcv9cap_P[1]
       
   759 +
       
   760 +    andcc    %g1, CFR_SHA${label}, %g0
       
   761 +    be    .Lsoftware
       
   762 +    nop
       
   763 +___
       
   764 +$code.=<<___ if ($SZ==8);         # SHA512
       
   765 +    ldd    [%o0 + 0x00], %f0    ! load context
       
   766 +    ldd    [%o0 + 0x08], %f2
       
   767 +    ldd    [%o0 + 0x10], %f4
       
   768 +    ldd    [%o0 + 0x18], %f6
       
   769 +    ldd    [%o0 + 0x20], %f8
       
   770 +    ldd    [%o0 + 0x28], %f10
       
   771 +    andcc    %o1, 0x7, %g0
       
   772 +    ldd    [%o0 + 0x30], %f12
       
   773 +    bne,pn    %icc, .Lhwunaligned
       
   774 +     ldd    [%o0 + 0x38], %f14
       
   775 +
       
   776 +.Lhwaligned_loop:
       
   777 +    ldd    [%o1 + 0x00], %f16
       
   778 +    ldd    [%o1 + 0x08], %f18
       
   779 +    ldd    [%o1 + 0x10], %f20
       
   780 +    ldd    [%o1 + 0x18], %f22
       
   781 +    ldd    [%o1 + 0x20], %f24
       
   782 +    ldd    [%o1 + 0x28], %f26
       
   783 +    ldd    [%o1 + 0x30], %f28
       
   784 +    ldd    [%o1 + 0x38], %f30
       
   785 +    ldd    [%o1 + 0x40], %f32
       
   786 +    ldd    [%o1 + 0x48], %f34
       
   787 +    ldd    [%o1 + 0x50], %f36
       
   788 +    ldd    [%o1 + 0x58], %f38
       
   789 +    ldd    [%o1 + 0x60], %f40
       
   790 +    ldd    [%o1 + 0x68], %f42
       
   791 +    ldd    [%o1 + 0x70], %f44
       
   792 +    subcc    %o2, 1, %o2        ! done yet?
       
   793 +    ldd    [%o1 + 0x78], %f46
       
   794 +    add    %o1, 0x80, %o1
       
   795 +
       
   796 +    .word    0x81b02860        ! SHA512
       
   797 +
       
   798 +    bne,pt    `$bits==64?"%xcc":"%icc"`, .Lhwaligned_loop
       
   799 +    nop
       
   800 +
       
   801 +.Lhwfinish:
       
   802 +    std    %f0, [%o0 + 0x00]    ! store context
       
   803 +    std    %f2, [%o0 + 0x08]
       
   804 +    std    %f4, [%o0 + 0x10]
       
   805 +    std    %f6, [%o0 + 0x18]
       
   806 +    std    %f8, [%o0 + 0x20]
       
   807 +    std    %f10, [%o0 + 0x28]
       
   808 +    std    %f12, [%o0 + 0x30]
       
   809 +    retl
       
   810 +     std    %f14, [%o0 + 0x38]
       
   811 +
       
   812 +.align    16
       
   813 +.Lhwunaligned:
       
   814 +    alignaddr %o1, %g0, %o1
       
   815 +
       
   816 +    ldd    [%o1 + 0x00], %f18
       
   817 +.Lhwunaligned_loop:
       
   818 +    ldd    [%o1 + 0x08], %f20
       
   819 +    ldd    [%o1 + 0x10], %f22
       
   820 +    ldd    [%o1 + 0x18], %f24
       
   821 +    ldd    [%o1 + 0x20], %f26
       
   822 +    ldd    [%o1 + 0x28], %f28
       
   823 +    ldd    [%o1 + 0x30], %f30
       
   824 +    ldd    [%o1 + 0x38], %f32
       
   825 +    ldd    [%o1 + 0x40], %f34
       
   826 +    ldd    [%o1 + 0x48], %f36
       
   827 +    ldd    [%o1 + 0x50], %f38
       
   828 +    ldd    [%o1 + 0x58], %f40
       
   829 +    ldd    [%o1 + 0x60], %f42
       
   830 +    ldd    [%o1 + 0x68], %f44
       
   831 +    ldd    [%o1 + 0x70], %f46
       
   832 +    ldd    [%o1 + 0x78], %f48
       
   833 +    subcc    %o2, 1, %o2        ! done yet?
       
   834 +    ldd    [%o1 + 0x80], %f50
       
   835 +    add    %o1, 0x80, %o1
       
   836 +
       
   837 +    faligndata %f18, %f20, %f16
       
   838 +    faligndata %f20, %f22, %f18
       
   839 +    faligndata %f22, %f24, %f20
       
   840 +    faligndata %f24, %f26, %f22
       
   841 +    faligndata %f26, %f28, %f24
       
   842 +    faligndata %f28, %f30, %f26
       
   843 +    faligndata %f30, %f32, %f28
       
   844 +    faligndata %f32, %f34, %f30
       
   845 +    faligndata %f34, %f36, %f32
       
   846 +    faligndata %f36, %f38, %f34
       
   847 +    faligndata %f38, %f40, %f36
       
   848 +    faligndata %f40, %f42, %f38
       
   849 +    faligndata %f42, %f44, %f40
       
   850 +    faligndata %f44, %f46, %f42
       
   851 +    faligndata %f46, %f48, %f44
       
   852 +    faligndata %f48, %f50, %f46
       
   853 +
       
   854 +    .word    0x81b02860        ! SHA512
       
   855 +
       
   856 +    bne,pt    `$bits==64?"%xcc":"%icc"`, .Lhwunaligned_loop
       
   857 +    for    %f50, %f50, %f18    ! %f18=%f50
       
   858 +
       
   859 +    ba    .Lhwfinish
       
   860 +    nop
       
   861 +___
       
   862 +$code.=<<___ if ($SZ==4);         # SHA256
       
   863 +    ld    [%o0 + 0x00], %f0
       
   864 +    ld    [%o0 + 0x04], %f1
       
   865 +    ld    [%o0 + 0x08], %f2
       
   866 +    ld    [%o0 + 0x0c], %f3
       
   867 +    ld    [%o0 + 0x10], %f4
       
   868 +    ld    [%o0 + 0x14], %f5
       
   869 +    andcc    %o1, 0x7, %g0
       
   870 +    ld    [%o0 + 0x18], %f6
       
   871 +    bne,pn    %icc, .Lhwunaligned
       
   872 +     ld    [%o0 + 0x1c], %f7
       
   873 +
       
   874 +.Lhwloop:
       
   875 +    ldd    [%o1 + 0x00], %f8
       
   876 +    ldd    [%o1 + 0x08], %f10
       
   877 +    ldd    [%o1 + 0x10], %f12
       
   878 +    ldd    [%o1 + 0x18], %f14
       
   879 +    ldd    [%o1 + 0x20], %f16
       
   880 +    ldd    [%o1 + 0x28], %f18
       
   881 +    ldd    [%o1 + 0x30], %f20
       
   882 +    subcc    %o2, 1, %o2        ! done yet?
       
   883 +    ldd    [%o1 + 0x38], %f22
       
   884 +    add    %o1, 0x40, %o1
       
   885 +
       
   886 +    .word    0x81b02840        ! SHA256
       
   887 +
       
   888 +    bne,pt    `$bits==64?"%xcc":"%icc"`, .Lhwloop
       
   889 +    nop
       
   890 +
       
   891 +.Lhwfinish:
       
   892 +    st    %f0, [%o0 + 0x00]    ! store context
       
   893 +    st    %f1, [%o0 + 0x04]
       
   894 +    st    %f2, [%o0 + 0x08]
       
   895 +    st    %f3, [%o0 + 0x0c]
       
   896 +    st    %f4, [%o0 + 0x10]
       
   897 +    st    %f5, [%o0 + 0x14]
       
   898 +    st    %f6, [%o0 + 0x18]
       
   899 +    retl
       
   900 +     st    %f7, [%o0 + 0x1c]
       
   901 +
       
   902 +.align    8
       
   903 +.Lhwunaligned:
       
   904 +    alignaddr %o1, %g0, %o1
       
   905 +
       
   906 +    ldd    [%o1 + 0x00], %f10
       
   907 +.Lhwunaligned_loop:
       
   908 +    ldd    [%o1 + 0x08], %f12
       
   909 +    ldd    [%o1 + 0x10], %f14
       
   910 +    ldd    [%o1 + 0x18], %f16
       
   911 +    ldd    [%o1 + 0x20], %f18
       
   912 +    ldd    [%o1 + 0x28], %f20
       
   913 +    ldd    [%o1 + 0x30], %f22
       
   914 +    ldd    [%o1 + 0x38], %f24
       
   915 +    subcc    %o2, 1, %o2        ! done yet?
       
   916 +    ldd    [%o1 + 0x40], %f26
       
   917 +    add    %o1, 0x40, %o1
       
   918 +
       
   919 +    faligndata %f10, %f12, %f8
       
   920 +    faligndata %f12, %f14, %f10
       
   921 +    faligndata %f14, %f16, %f12
       
   922 +    faligndata %f16, %f18, %f14
       
   923 +    faligndata %f18, %f20, %f16
       
   924 +    faligndata %f20, %f22, %f18
       
   925 +    faligndata %f22, %f24, %f20
       
   926 +    faligndata %f24, %f26, %f22
       
   927 +
       
   928 +    .word    0x81b02840        ! SHA256
       
   929 +
       
   930 +    bne,pt    `$bits==64?"%xcc":"%icc"`, .Lhwunaligned_loop
       
   931 +    for    %f26, %f26, %f10    ! %f10=%f26
       
   932 +
       
   933 +    ba    .Lhwfinish
       
   934 +    nop
       
   935 +___
       
   936 +$code.=<<___;
       
   937 +.align    16
       
   938 +.Lsoftware:
       
   939  	save	%sp,`-$frame-$locals`,%sp
       
   940  	and	$inp,`$align-1`,$tmp31
       
   941  	sllx	$len,`log(16*$SZ)/log(2)`,$len
       
   942 @@ -589,6 +787,62 @@
       
   943  .align	4
       
   944  ___
       
   945  
       
   946 -$code =~ s/\`([^\`]*)\`/eval $1/gem;
       
   947 -print $code;
       
   948 +# Purpose of these subroutines is to explicitly encode VIS instructions,
       
   949 +# so that one can compile the module without having to specify VIS
       
   950 +# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
       
   951 +# Idea is to reserve for option to produce "universal" binary and let
       
   952 +# programmer detect if current CPU is VIS capable at run-time.
       
   953 +sub unvis {
       
   954 +my ($mnemonic,$rs1,$rs2,$rd)=@_;
       
   955 +my $ref,$opf;
       
   956 +my %visopf = (    "faligndata"    => 0x048,
       
   957 +        "for"        => 0x07c    );
       
   958 +
       
   959 +    $ref = "$mnemonic\t$rs1,$rs2,$rd";
       
   960 +
       
   961 +    if ($opf=$visopf{$mnemonic}) {
       
   962 +    foreach ($rs1,$rs2,$rd) {
       
   963 +        return $ref if (!/%f([0-9]{1,2})/);
       
   964 +        $_=$1;
       
   965 +        if ($1>=32) {
       
   966 +        return $ref if ($1&1);
       
   967 +        # re-encode for upper double register addressing
       
   968 +        $_=($1|$1>>5)&31;
       
   969 +        }
       
   970 +    }
       
   971 +
       
   972 +    return    sprintf ".word\t0x%08x !%s",
       
   973 +            0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2,
       
   974 +            $ref;
       
   975 +    } else {
       
   976 +    return $ref;
       
   977 +    }
       
   978 +}
       
   979 +sub unalignaddr {
       
   980 +my ($mnemonic,$rs1,$rs2,$rd)=@_;
       
   981 +my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 );
       
   982 +my $ref="$mnemonic\t$rs1,$rs2,$rd";
       
   983 +
       
   984 +    foreach ($rs1,$rs2,$rd) {
       
   985 +    if (/%([goli])([0-7])/)    { $_=$bias{$1}+$2; }
       
   986 +    else            { return $ref; }
       
   987 +    }
       
   988 +    return  sprintf ".word\t0x%08x !%s",
       
   989 +            0x81b00300|$rd<<25|$rs1<<14|$rs2,
       
   990 +            $ref;
       
   991 +}
       
   992 +
       
   993 +foreach (split("\n",$code)) {
       
   994 +    s/\`([^\`]*)\`/eval $1/ge;
       
   995 +
       
   996 +    s/\b(f[^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/
       
   997 +        &unvis($1,$2,$3,$4)
       
   998 +     /ge;
       
   999 +    s/\b(alignaddr)\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/
       
  1000 +        &unalignaddr($1,$2,$3,$4)
       
  1001 +     /ge;
       
  1002 +
       
  1003 +    print $_,"\n";
       
  1004 +}
       
  1005 +
       
  1006  close STDOUT;
       
  1007 Index: crypto/des/Makefile
       
  1008 ===================================================================
       
  1009 diff -ru openssl-1.0.1e/crypto/des/Makefile.orig openssl-1.0.1e/crypto/des/Makefile
       
  1010 --- a/crypto/des/Makefile
       
  1011 +++ b/crypto/des/Makefile
       
  1012 @@ -61,6 +61,8 @@ des: des.o cbc3_enc.o lib
       
  1013  
       
  1014  des_enc-sparc.S:	asm/des_enc.m4
       
  1015  	m4 -B 8192 asm/des_enc.m4 > des_enc-sparc.S
       
  1016 +dest4-sparcv9.s:	asm/dest4-sparcv9.pl
       
  1017 +	$(PERL) asm/dest4-sparcv9.pl $(CFLAGS) > $@
       
  1018  
       
  1019  des-586.s:	asm/des-586.pl ../perlasm/x86asm.pl ../perlasm/cbc.pl
       
  1020  	$(PERL) asm/des-586.pl $(PERLASM_SCHEME) $(CFLAGS) > $@
       
  1021 Index: crypto/evp/e_des.c
       
  1022 ===================================================================
       
  1023 diff -ru openssl-1.0.1e/crypto/evp/e_des.c.orig openssl-1.0.1e/crypto/evp/e_des.c
       
  1024 --- a/crypto/evp/e_des.c
       
  1025 +++ b/crypto/evp/e_des.c
       
  1026 @@ -65,6 +65,30 @@
       
  1027  # include <openssl/des.h>
       
  1028  # include <openssl/rand.h>
       
  1029  
       
  1030 +typedef struct {
       
  1031 +    union { double align; DES_key_schedule ks; } ks;
       
  1032 +    union {
       
  1033 +        void (*cbc)(const void *,void *,size_t,const void *,void *);
       
  1034 +    } stream;
       
  1035 +} EVP_DES_KEY;
       
  1036 +
       
  1037 +#if defined(AES_ASM) && (defined(__sparc) || defined(__sparc__))
       
  1038 +/* ---------^^^ this is not a typo, just a way to detect that
       
  1039 + * assembler support was in general requested...
       
  1040 + */
       
  1041 +#include "sparc_arch.h"
       
  1042 +
       
  1043 +extern unsigned int OPENSSL_sparcv9cap_P[];
       
  1044 +
       
  1045 +#define SPARC_DES_CAPABLE    (OPENSSL_sparcv9cap_P[1] & CFR_DES)
       
  1046 +
       
  1047 +void    des_t4_key_expand(const void *key, DES_key_schedule *ks);
       
  1048 +void    des_t4_cbc_encrypt(const void *inp,void *out,size_t len,
       
  1049 +                DES_key_schedule *ks,unsigned char iv[8]);
       
  1050 +void    des_t4_cbc_decrypt(const void *inp,void *out,size_t len,
       
  1051 +                DES_key_schedule *ks,unsigned char iv[8]);
       
  1052 +#endif
       
  1053 +
       
  1054  static int des_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
       
  1055                          const unsigned char *iv, int enc);
       
  1056  static int des_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr);
       
  1057 @@ -102,6 +126,12 @@ static int des_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
       
  1058  static int des_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
       
  1059                            const unsigned char *in, size_t inl)
       
  1060  {
       
  1061 +    EVP_DES_KEY *dat = (EVP_DES_KEY *)ctx->cipher_data;
       
  1062 +
       
  1063 +    if (dat->stream.cbc) {
       
  1064 +        (*dat->stream.cbc)(in,out,inl,&dat->ks.ks,ctx->iv);
       
  1065 +        return 1;
       
  1066 +    }
       
  1067      while (inl >= EVP_MAXCHUNK) {
       
  1068          DES_ncbc_encrypt(in, out, (long)EVP_MAXCHUNK, ctx->cipher_data,
       
  1069                           (DES_cblock *)ctx->iv, ctx->encrypt);
       
  1070 @@ -179,16 +209,16 @@
       
  1071      return 1;
       
  1072  }
       
  1073  
       
  1074 -BLOCK_CIPHER_defs(des, DES_key_schedule, NID_des, 8, 8, 8, 64,
       
  1075 +BLOCK_CIPHER_defs(des, EVP_DES_KEY, NID_des, 8, 8, 8, 64,
       
  1076                    EVP_CIPH_RAND_KEY, des_init_key, NULL,
       
  1077                    EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des_ctrl)
       
  1078  
       
  1079  
       
  1080 -BLOCK_CIPHER_def_cfb(des, DES_key_schedule, NID_des, 8, 8, 1,
       
  1081 +BLOCK_CIPHER_def_cfb(des, EVP_DES_KEY, NID_des, 8, 8, 1,
       
  1082                   EVP_CIPH_RAND_KEY, des_init_key, NULL,
       
  1083                   EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des_ctrl)
       
  1084  
       
  1085 -BLOCK_CIPHER_def_cfb(des, DES_key_schedule, NID_des, 8, 8, 8,
       
  1086 +BLOCK_CIPHER_def_cfb(des, EVP_DES_KEY, NID_des, 8, 8, 8,
       
  1087                       EVP_CIPH_RAND_KEY, des_init_key, NULL,
       
  1088                       EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des_ctrl)
       
  1089  
       
  1090 @@ -196,8 +226,23 @@ static int des_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
       
  1091                          const unsigned char *iv, int enc)
       
  1092  {
       
  1093      DES_cblock *deskey = (DES_cblock *)key;
       
  1094 +    EVP_DES_KEY *dat = (EVP_DES_KEY *)ctx->cipher_data;
       
  1095 +
       
  1096 +    dat->stream.cbc = NULL;
       
  1097 +#if defined(SPARC_DES_CAPABLE)
       
  1098 +    if (SPARC_DES_CAPABLE) {
       
  1099 +        int mode = ctx->cipher->flags & EVP_CIPH_MODE;
       
  1100 +
       
  1101 +        if (mode == EVP_CIPH_CBC_MODE) {
       
  1102 +            des_t4_key_expand(key,&dat->ks.ks);
       
  1103 +            dat->stream.cbc = enc ? des_t4_cbc_encrypt :
       
  1104 +                        des_t4_cbc_decrypt;
       
  1105 +            return 1;
       
  1106 +        }
       
  1107 +    }
       
  1108 +#endif
       
  1109  # ifdef EVP_CHECK_DES_KEY
       
  1110 -    if (DES_set_key_checked(deskey, ctx->cipher_data) != 0)
       
  1111 +    if (DES_set_key_checked(deskey, dat->ks.ks) != 0)
       
  1112          return 0;
       
  1113  # else
       
  1114      DES_set_key_unchecked(deskey, ctx->cipher_data);
       
  1115 Index: crypto/evp/e_des3.c
       
  1116 ===================================================================
       
  1117 diff -ru openssl-1.0.1e/crypto/evp/e_des3.c.orig openssl-1.0.1e/crypto/evp/e_des3.c
       
  1118 --- a/crypto/evp/e_des3.c
       
  1119 +++ b/crypto/evp/e_des3.c
       
  1120 @@ -65,6 +65,32 @@
       
  1121  # include <openssl/des.h>
       
  1122  # include <openssl/rand.h>
       
  1123  
       
  1124 +typedef struct {
       
  1125 +    union { double align; DES_key_schedule ks[3]; } ks;
       
  1126 +    union {
       
  1127 +        void (*cbc)(const void *,void *,size_t,const void *,void *);
       
  1128 +    } stream;
       
  1129 +} DES_EDE_KEY;
       
  1130 +#define ks1 ks.ks[0]
       
  1131 +#define ks2 ks.ks[1]
       
  1132 +#define ks3 ks.ks[2]
       
  1133 +
       
  1134 +#if defined(AES_ASM) && (defined(__sparc) || defined(__sparc__))
       
  1135 +/* ---------^^^ this is not a typo, just a way to detect that
       
  1136 + * assembler support was in general requested... */
       
  1137 +#include "sparc_arch.h"
       
  1138 +
       
  1139 +extern unsigned int OPENSSL_sparcv9cap_P[];
       
  1140 +
       
  1141 +#define SPARC_DES_CAPABLE    (OPENSSL_sparcv9cap_P[1] & CFR_DES)
       
  1142 +
       
  1143 +void    des_t4_key_expand(const void *key, DES_key_schedule *ks);
       
  1144 +void    des_t4_ede3_cbc_encrypt(const void *inp,void *out,size_t len,
       
  1145 +                DES_key_schedule *ks,unsigned char iv[8]);
       
  1146 +void    des_t4_ede3_cbc_decrypt(const void *inp,void *out,size_t len,
       
  1147 +                DES_key_schedule *ks,unsigned char iv[8]);
       
  1148 +#endif
       
  1149 +
       
  1150  # ifndef OPENSSL_FIPS
       
  1151  
       
  1152  static int des_ede_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
       
  1153 @@ -75,12 +100,6 @@ static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
       
  1154 
       
  1155  static int des3_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr);
       
  1156 
       
  1157 -typedef struct {
       
  1158 -    DES_key_schedule ks1;       /* key schedule */
       
  1159 -    DES_key_schedule ks2;       /* key schedule (for ede) */
       
  1160 -    DES_key_schedule ks3;       /* key schedule (for ede3) */
       
  1161 -} DES_EDE_KEY;
       
  1162 -
       
  1163  #  define data(ctx) ((DES_EDE_KEY *)(ctx)->cipher_data)
       
  1164 
       
  1165  /*
       
  1166 @@ -123,6 +117,7 @@ static int des_ede_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
       
  1167  static int des_ede_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
       
  1168                                const unsigned char *in, size_t inl)
       
  1169  {
       
  1170 +    DES_EDE_KEY *dat = data(ctx);
       
  1171  #  ifdef KSSL_DEBUG
       
  1172      {
       
  1173          int i;
       
  1174 @@ -134,11 +155,15 @@
       
  1175          fprintf(stderr, "\n");
       
  1176      }
       
  1177  #  endif                        /* KSSL_DEBUG */
       
  1178 +    if (dat->stream.cbc) {
       
  1179 +        (*dat->stream.cbc)(in,out,inl,&dat->ks,ctx->iv);
       
  1180 +        return 1;
       
  1181 +    }
       
  1182 +
       
  1183      while (inl >= EVP_MAXCHUNK) {
       
  1184          DES_ede3_cbc_encrypt(in, out, (long)EVP_MAXCHUNK,
       
  1185 -                             &data(ctx)->ks1, &data(ctx)->ks2,
       
  1186 -                             &data(ctx)->ks3, (DES_cblock *)ctx->iv,
       
  1187 -                             ctx->encrypt);
       
  1188 +                             &dat->ks1, &dat->ks2, &dat->ks3,
       
  1189 +                             (DES_cblock *)ctx->iv, ctx->encrypt);
       
  1190          inl -= EVP_MAXCHUNK;
       
  1191          in += EVP_MAXCHUNK;
       
  1192          out += EVP_MAXCHUNK;
       
  1193 @@ -145,9 +170,8 @@ static int des_ede_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
       
  1194      }
       
  1195      if (inl)
       
  1196          DES_ede3_cbc_encrypt(in, out, (long)inl,
       
  1197 -                             &data(ctx)->ks1, &data(ctx)->ks2,
       
  1198 -                             &data(ctx)->ks3, (DES_cblock *)ctx->iv,
       
  1199 -                             ctx->encrypt);
       
  1200 +                             &dat->ks1, &dat->ks2, &dat->ks3,
       
  1201 +                             (DES_cblock *)ctx->iv, ctx->encrypt);
       
  1202      return 1;
       
  1203  }
       
  1204 
       
  1205 @@ -215,39 +239,58 @@ static int des_ede3_cfb8_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
       
  1206  }
       
  1207 
       
  1208  BLOCK_CIPHER_defs(des_ede, DES_EDE_KEY, NID_des_ede, 8, 16, 8, 64,
       
  1209 -                  EVP_CIPH_RAND_KEY, des_ede_init_key, NULL,
       
  1210 -                  EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des3_ctrl)
       
  1211 +                  EVP_CIPH_RAND_KEY|EVP_CIPH_FLAG_DEFAULT_ASN1,
       
  1212 +                  des_ede_init_key, NULL, NULL, NULL,
       
  1213 +                 des3_ctrl)
       
  1214  #  define des_ede3_cfb64_cipher des_ede_cfb64_cipher
       
  1215  #  define des_ede3_ofb_cipher des_ede_ofb_cipher
       
  1216  #  define des_ede3_cbc_cipher des_ede_cbc_cipher
       
  1217  #  define des_ede3_ecb_cipher des_ede_ecb_cipher
       
  1218      BLOCK_CIPHER_defs(des_ede3, DES_EDE_KEY, NID_des_ede3, 8, 24, 8, 64,
       
  1219 -                  EVP_CIPH_RAND_KEY, des_ede3_init_key, NULL,
       
  1220 -                  EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des3_ctrl)
       
  1221 +        EVP_CIPH_RAND_KEY|EVP_CIPH_FLAG_FIPS|EVP_CIPH_FLAG_DEFAULT_ASN1,
       
  1222 +        des_ede3_init_key, NULL, NULL, NULL,
       
  1223 +        des3_ctrl)
       
  1224 
       
  1225      BLOCK_CIPHER_def_cfb(des_ede3, DES_EDE_KEY, NID_des_ede3, 24, 8, 1,
       
  1226 -                     EVP_CIPH_RAND_KEY, des_ede3_init_key, NULL,
       
  1227 -                     EVP_CIPHER_set_asn1_iv,
       
  1228 -                     EVP_CIPHER_get_asn1_iv, des3_ctrl)
       
  1229 +        EVP_CIPH_RAND_KEY|EVP_CIPH_FLAG_FIPS|EVP_CIPH_FLAG_DEFAULT_ASN1,
       
  1230 +        des_ede3_init_key, NULL, NULL, NULL,
       
  1231 +        des3_ctrl)
       
  1232 
       
  1233      BLOCK_CIPHER_def_cfb(des_ede3, DES_EDE_KEY, NID_des_ede3, 24, 8, 8,
       
  1234 -                     EVP_CIPH_RAND_KEY, des_ede3_init_key, NULL,
       
  1235 -                     EVP_CIPHER_set_asn1_iv,
       
  1236 -                     EVP_CIPHER_get_asn1_iv, des3_ctrl)
       
  1237 +        EVP_CIPH_RAND_KEY|EVP_CIPH_FLAG_FIPS|EVP_CIPH_FLAG_DEFAULT_ASN1,
       
  1238 +        des_ede3_init_key, NULL, NULL, NULL,
       
  1239 +        des3_ctrl)
       
  1240 
       
  1241  static int des_ede_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
       
  1242                              const unsigned char *iv, int enc)
       
  1243  {
       
  1244      DES_cblock *deskey = (DES_cblock *)key;
       
  1245 +    DES_EDE_KEY *dat = data(ctx);
       
  1246 +
       
  1247 +    dat->stream.cbc = NULL;
       
  1248 +#if defined(SPARC_DES_CAPABLE)
       
  1249 +    if (SPARC_DES_CAPABLE) {
       
  1250 +        int mode = ctx->cipher->flags & EVP_CIPH_MODE;
       
  1251 +
       
  1252 +        if (mode == EVP_CIPH_CBC_MODE) {
       
  1253 +            des_t4_key_expand(&deskey[0],&dat->ks1);
       
  1254 +            des_t4_key_expand(&deskey[1],&dat->ks2);
       
  1255 +            memcpy(&dat->ks3,&dat->ks1,sizeof(dat->ks1));
       
  1256 +            dat->stream.cbc = enc ? des_t4_ede3_cbc_encrypt :
       
  1257 +                        des_t4_ede3_cbc_decrypt;
       
  1258 +            return 1;
       
  1259 +        }
       
  1260 +    }
       
  1261 +#endif
       
  1262  #  ifdef EVP_CHECK_DES_KEY
       
  1263 -    if (DES_set_key_checked(&deskey[0], &data(ctx)->ks1)
       
  1264 -        ! !DES_set_key_checked(&deskey[1], &data(ctx)->ks2))
       
  1265 +    if (DES_set_key_checked(&deskey[0],&dat->ks1)
       
  1266 +        !! DES_set_key_checked(&deskey[1],&dat->ks2))
       
  1267          return 0;
       
  1268  #  else
       
  1269 -    DES_set_key_unchecked(&deskey[0], &data(ctx)->ks1);
       
  1270 -    DES_set_key_unchecked(&deskey[1], &data(ctx)->ks2);
       
  1271 +    DES_set_key_unchecked(&deskey[0],&dat->ks1);
       
  1272 +    DES_set_key_unchecked(&deskey[1],&dat->ks2);
       
  1273  #  endif
       
  1274 -    memcpy(&data(ctx)->ks3, &data(ctx)->ks1, sizeof(data(ctx)->ks1));
       
  1275 +    memcpy(&dat->ks3,&dat->ks1, sizeof(dat->ks1));
       
  1276      return 1;
       
  1277  }
       
  1278 
       
  1279 @@ -255,6 +298,8 @@ static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
       
  1280                               const unsigned char *iv, int enc)
       
  1281  {
       
  1282      DES_cblock *deskey = (DES_cblock *)key;
       
  1283 +    DES_EDE_KEY *dat = data(ctx);
       
  1284 +
       
  1285  #  ifdef KSSL_DEBUG
       
  1286      {
       
  1287          int i;
       
  1288 @@ -272,15 +317,30 @@ static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
       
  1289      }
       
  1290  #  endif                        /* KSSL_DEBUG */
       
  1291 
       
  1292 +    dat->stream.cbc = NULL;
       
  1293 +#if defined(SPARC_DES_CAPABLE)
       
  1294 +    if (SPARC_DES_CAPABLE) {
       
  1295 +        int mode = ctx->cipher->flags & EVP_CIPH_MODE;
       
  1296 +
       
  1297 +        if (mode == EVP_CIPH_CBC_MODE) {
       
  1298 +            des_t4_key_expand(&deskey[0],&dat->ks1);
       
  1299 +            des_t4_key_expand(&deskey[1],&dat->ks2);
       
  1300 +            des_t4_key_expand(&deskey[2],&dat->ks3);
       
  1301 +            dat->stream.cbc = enc ? des_t4_ede3_cbc_encrypt :
       
  1302 +                        des_t4_ede3_cbc_decrypt;
       
  1303 +            return 1;
       
  1304 +        }
       
  1305 +    }
       
  1306 +#endif
       
  1307  #  ifdef EVP_CHECK_DES_KEY
       
  1308 -    if (DES_set_key_checked(&deskey[0], &data(ctx)->ks1)
       
  1309 -        || DES_set_key_checked(&deskey[1], &data(ctx)->ks2)
       
  1310 -        || DES_set_key_checked(&deskey[2], &data(ctx)->ks3))
       
  1311 +    if (DES_set_key_checked(&deskey[0],&dat->ks1)
       
  1312 +        || DES_set_key_checked(&deskey[1],&dat->ks2)
       
  1313 +        || DES_set_key_checked(&deskey[2],&dat->ks3))
       
  1314          return 0;
       
  1315  #  else
       
  1316 -    DES_set_key_unchecked(&deskey[0], &data(ctx)->ks1);
       
  1317 -    DES_set_key_unchecked(&deskey[1], &data(ctx)->ks2);
       
  1318 -    DES_set_key_unchecked(&deskey[2], &data(ctx)->ks3);
       
  1319 +    DES_set_key_unchecked(&deskey[0],&dat->ks1);
       
  1320 +    DES_set_key_unchecked(&deskey[1],&dat->ks2);
       
  1321 +    DES_set_key_unchecked(&deskey[2],&dat->ks3);
       
  1322  #  endif
       
  1323      return 1;
       
  1324  }
       
  1325 Index: openssl/crypto/bn/Makefile
       
  1326 ===================================================================
       
  1327 diff -ru openssl-1.0.1e/crypto/bn/Makefile openssl-1.0.1e/crypto/bn/Makefile.new
       
  1328 --- openssl-1.0.1e/crypto/bn/Makefile 2011-05-24 17:02:24.000000000 -0700
       
  1329 +++ openssl-1.0.1e/crypto/bn/Makefile 2011-07-27 10:48:17.817470000 -0700
       
  1330 @@ -77,6 +77,12 @@
       
  1331  	$(PERL) asm/sparcv9a-mont.pl $(CFLAGS) > $@
       
  1332  sparcv9-mont.s:		asm/sparcv9-mont.pl
       
  1333  	$(PERL) asm/sparcv9-mont.pl $(CFLAGS) > $@
       
  1334 +vis3-mont.s:		asm/vis3-mont.pl
       
  1335 +	$(PERL) asm/vis3-mont.pl $(CFLAGS) > $@
       
  1336 +sparct4-mont.S:	asm/sparct4-mont.pl
       
  1337 +	$(PERL) asm/sparct4-mont.pl $(CFLAGS) > $@
       
  1338 +sparcv9-gf2m.S:	asm/sparcv9-gf2m.pl
       
  1339 +	$(PERL) asm/sparcv9-gf2m.pl $(CFLAGS) > $@
       
  1340  
       
  1341  bn-mips3.o:	asm/mips3.s
       
  1342  	@if [ "$(CC)" = "gcc" ]; then \
       
  1343 Index: openssl/crypto/bn/bn_exp.c
       
  1344 ===================================================================
       
  1345 diff -ru openssl-1.0.1e/crypto/bn/bn_exp.c openssl-1.0.1e/crypto/bn/bn_exp.c.new
       
  1346 --- bn_exp.c    2011/10/29 19:25:13    1.38
       
  1347 +++ bn_exp.c    2012/11/17 10:34:11    1.39
       
  1348 @@ -122,8 +122,15 @@
       
  1349  # ifndef alloca
       
  1350  #  define alloca(s) __builtin_alloca((s))
       
  1351  # endif
       
  1352 +#else
       
  1353 +#include <alloca.h>
       
  1354  #endif
       
  1355  
       
  1356 +#if defined(OPENSSL_BN_ASM_MONT) && defined(__sparc)
       
  1357 +# include "sparc_arch.h"
       
  1358 +extern unsigned int OPENSSL_sparcv9cap_P[];
       
  1359 +#endif
       
  1360 +
       
  1361  /* maximum precomputation table size for *variable* sliding windows */
       
  1362  #define TABLE_SIZE      32
       
  1363  
       
  1364 @@ -464,8 +471,16 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
       
  1365      wstart = bits - 1;          /* The top bit of the window */
       
  1366      wend = 0;                   /* The bottom bit of the window */
       
  1367  
       
  1368 +#if 1    /* by Shay Gueron's suggestion */
       
  1369 +    j = mont->N.top;    /* borrow j */
       
  1370 +    if (bn_wexpand(r,j) == NULL) goto err;
       
  1371 +    r->d[0] = (0-m->d[0])&BN_MASK2;        /* 2^(top*BN_BITS2) - m */
       
  1372 +    for(i=1;i<j;i++) r->d[i] = (~m->d[i])&BN_MASK2;
       
  1373 +    r->top = j;
       
  1374 +#else
       
  1375      if (!BN_to_montgomery(r, BN_value_one(), mont, ctx))
       
  1376          goto err;
       
  1377 +#endif
       
  1378      for (;;) {
       
  1379          if (BN_is_bit_set(p, wstart) == 0) {
       
  1380              if (!start) {
       
  1381 @@ -515,6 +530,17 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
       
  1382          if (wstart < 0)
       
  1383              break;
       
  1384      }
       
  1385 +#if defined(OPENSSL_BN_ASM_MONT) && (defined(__sparc__) || defined(__sparc))
       
  1386 +    if (OPENSSL_sparcv9cap_P[0] & (SPARCV9_VIS3|SPARCV9_PREFER_FPU)) {
       
  1387 +        j = mont->N.top;    /* borrow j */
       
  1388 +        val[0]->d[0] = 1;    /* borrow val[0] */
       
  1389 +        for (i=1;i<j;i++)
       
  1390 +            val[0]->d[i] = 0;
       
  1391 +        val[0]->top = j;
       
  1392 +        if (!BN_mod_mul_montgomery(rr, r, val[0], mont, ctx))
       
  1393 +            goto err;
       
  1394 +    } else
       
  1395 +#endif
       
  1396      if (!BN_from_montgomery(rr, r, mont, ctx))
       
  1397          goto err;
       
  1398      ret = 1;
       
  1399 @@ -526,6 +552,26 @@ err:
       
  1400      return (ret);
       
  1401  }
       
  1402 
       
  1403 +#if defined(OPENSSL_BN_ASM_MONT) && (defined(__sparc__) || defined(__sparc))
       
  1404 +static BN_ULONG bn_get_bits(const BIGNUM *a, int bitpos) {
       
  1405 +    BN_ULONG ret = 0;
       
  1406 +    int wordpos;
       
  1407 +
       
  1408 +    wordpos = bitpos / BN_BITS2;
       
  1409 +    bitpos %= BN_BITS2;
       
  1410 +    if (wordpos>=0 && wordpos < a->top) {
       
  1411 +        ret = a->d[wordpos]&BN_MASK2;
       
  1412 +        if (bitpos) {
       
  1413 +            ret >>= bitpos;
       
  1414 +            if (++wordpos < a->top)
       
  1415 +                ret |= a->d[wordpos]<<(BN_BITS2-bitpos);
       
  1416 +        }
       
  1417 +    }
       
  1418 +
       
  1419 +    return ret & BN_MASK2;
       
  1420 +}
       
  1421 +#endif
       
  1422 +
       
  1423  /*
       
  1424   * BN_mod_exp_mont_consttime() stores the precomputed powers in a specific
       
  1425   * layout so that accessing any of these table values shows the same access
       
  1426 @@ -594,6 +640,9 @@
       
  1427      int powerbufLen = 0;
       
  1428      unsigned char *powerbuf = NULL;
       
  1429      BIGNUM tmp, am;
       
  1430 +#if defined(OPENSSL_BN_ASM_MONT) && defined(__sparc)
       
  1431 +    unsigned int t4=0;
       
  1432 +#endif
       
  1433 
       
  1434      bn_check_top(a);
       
  1435      bn_check_top(p);
       
  1436 @@ -628,10 +677,18 @@
       
  1437 
       
  1438      /* Get the window size to use with size of p. */
       
  1439      window = BN_window_bits_for_ctime_exponent_size(bits);
       
  1440 +#if defined(OPENSSL_BN_ASM_MONT) && defined(__sparc)
       
  1441 +    if (window>=5 && (top&15)==0 && top<=64 &&
       
  1442 +        (OPENSSL_sparcv9cap_P[1]&(CFR_MONTMUL|CFR_MONTSQR))==
       
  1443 +        (CFR_MONTMUL|CFR_MONTSQR) && (t4=OPENSSL_sparcv9cap_P[0]))
       
  1444 +            window=5;
       
  1445 +    else
       
  1446 +#endif
       
  1447  #if defined(OPENSSL_BN_ASM_MONT5)
       
  1448      if (window == 6 && bits <= 1024)
       
  1449          window = 5;             /* ~5% improvement of 2048-bit RSA sign */
       
  1450  #endif
       
  1451 +    (void) 0;
       
  1452 
       
  1453      /*
       
  1454       * Allocate a buffer large enough to hold all of the pre-computed powers
       
  1455 @@ -670,14 +727,14 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
       
  1456      tmp.flags = am.flags = BN_FLG_STATIC_DATA;
       
  1457 
       
  1458      /* prepare a^0 in Montgomery domain */
       
  1459 -#if 1
       
  1460 -    if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx))
       
  1461 -        goto err;
       
  1462 -#else
       
  1463 +#if 1    /* by Shay Gueron's suggestion */
       
  1464      tmp.d[0] = (0 - m->d[0]) & BN_MASK2; /* 2^(top*BN_BITS2) - m */
       
  1465      for (i = 1; i < top; i++)
       
  1466          tmp.d[i] = (~m->d[i]) & BN_MASK2;
       
  1467      tmp.top = top;
       
  1468 +#else
       
  1469 +    if (!BN_to_montgomery(&tmp,BN_value_one(),mont,ctx))
       
  1470 +        goto err;
       
  1471  #endif
       
  1472 
       
  1473      /* prepare a^1 in Montgomery domain */
       
  1474 @@ -689,6 +746,122 @@
       
  1475      } else if (!BN_to_montgomery(&am, a, mont, ctx))
       
  1476          goto err;
       
  1477 
       
  1478 +#if defined(OPENSSL_BN_ASM_MONT) && defined(__sparc)
       
  1479 +    if (t4) {
       
  1480 +        typedef int (*bn_pwr5_mont_f)(BN_ULONG *tp,const BN_ULONG *np,
       
  1481 +            const BN_ULONG *n0,const void *table,int power,int bits);
       
  1482 +        int bn_pwr5_mont_t4_8(BN_ULONG *tp,const BN_ULONG *np,
       
  1483 +            const BN_ULONG *n0,const void *table,int power,int bits);
       
  1484 +        int bn_pwr5_mont_t4_16(BN_ULONG *tp,const BN_ULONG *np,
       
  1485 +            const BN_ULONG *n0,const void *table,int power,int bits);
       
  1486 +        int bn_pwr5_mont_t4_24(BN_ULONG *tp,const BN_ULONG *np,
       
  1487 +            const BN_ULONG *n0,const void *table,int power,int bits);
       
  1488 +        int bn_pwr5_mont_t4_32(BN_ULONG *tp,const BN_ULONG *np,
       
  1489 +            const BN_ULONG *n0,const void *table,int power,int bits);
       
  1490 +        static const bn_pwr5_mont_f pwr5_funcs[4] = {
       
  1491 +            bn_pwr5_mont_t4_8,    bn_pwr5_mont_t4_16,
       
  1492 +            bn_pwr5_mont_t4_24,    bn_pwr5_mont_t4_32 };
       
  1493 +        bn_pwr5_mont_f pwr5_worker = pwr5_funcs[top/16-1];
       
  1494 +
       
  1495 +        typedef int (*bn_mul_mont_f)(BN_ULONG *rp,const BN_ULONG *ap,
       
  1496 +            const void *bp,const BN_ULONG *np,const BN_ULONG *n0);
       
  1497 +        int bn_mul_mont_t4_8(BN_ULONG *rp,const BN_ULONG *ap,
       
  1498 +            const void *bp,const BN_ULONG *np,const BN_ULONG *n0);
       
  1499 +        int bn_mul_mont_t4_16(BN_ULONG *rp,const BN_ULONG *ap,
       
  1500 +            const void *bp,const BN_ULONG *np,const BN_ULONG *n0);
       
  1501 +        int bn_mul_mont_t4_24(BN_ULONG *rp,const BN_ULONG *ap,
       
  1502 +            const void *bp,const BN_ULONG *np,const BN_ULONG *n0);
       
  1503 +        int bn_mul_mont_t4_32(BN_ULONG *rp,const BN_ULONG *ap,
       
  1504 +            const void *bp,const BN_ULONG *np,const BN_ULONG *n0);
       
  1505 +        static const bn_mul_mont_f mul_funcs[4] = {
       
  1506 +            bn_mul_mont_t4_8,    bn_mul_mont_t4_16,
       
  1507 +            bn_mul_mont_t4_24,    bn_mul_mont_t4_32 };
       
  1508 +        bn_mul_mont_f mul_worker = mul_funcs[top/16-1];
       
  1509 +
       
  1510 +        void bn_mul_mont_vis3(BN_ULONG *rp,const BN_ULONG *ap,
       
  1511 +            const void *bp,const BN_ULONG *np,
       
  1512 +            const BN_ULONG *n0,int num);
       
  1513 +        void bn_mul_mont_t4(BN_ULONG *rp,const BN_ULONG *ap,
       
  1514 +            const void *bp,const BN_ULONG *np,
       
  1515 +            const BN_ULONG *n0,int num);
       
  1516 +        void bn_mul_mont_gather5_t4(BN_ULONG *rp,const BN_ULONG *ap,
       
  1517 +            const void *table,const BN_ULONG *np,
       
  1518 +            const BN_ULONG *n0,int num,int power);
       
  1519 +        void bn_flip_n_scatter5_t4(const BN_ULONG *inp,size_t num,
       
  1520 +            void *table,size_t power);
       
  1521 +        void bn_gather5_t4(BN_ULONG *out,size_t num,
       
  1522 +            void *table,size_t power);
       
  1523 +        void bn_flip_t4(BN_ULONG *dst,BN_ULONG *src,size_t num);
       
  1524 +
       
  1525 +        BN_ULONG *np=mont->N.d, *n0=mont->n0;
       
  1526 +        int stride = 5*(6-(top/16-1));    /* multiple of 5, but less than 32 */
       
  1527 +
       
  1528 +        /*
       
  1529 +         * BN_to_montgomery can contaminate words above .top
       
  1530 +         * [in BN_DEBUG[_DEBUG] build]...
       
  1531 +         */
       
  1532 +        for (i=am.top; i<top; i++)    am.d[i]=0;
       
  1533 +        for (i=tmp.top; i<top; i++)    tmp.d[i]=0;
       
  1534 +
       
  1535 +        bn_flip_n_scatter5_t4(tmp.d,top,powerbuf,0);
       
  1536 +        bn_flip_n_scatter5_t4(am.d,top,powerbuf,1);
       
  1537 +        if (!(*mul_worker)(tmp.d,am.d,am.d,np,n0) &&
       
  1538 +        !(*mul_worker)(tmp.d,am.d,am.d,np,n0))
       
  1539 +        bn_mul_mont_vis3(tmp.d,am.d,am.d,np,n0,top);
       
  1540 +        bn_flip_n_scatter5_t4(tmp.d,top,powerbuf,2);
       
  1541 +
       
  1542 +        for (i=3; i<32; i++) {
       
  1543 +        /* Calculate a^i = a^(i-1) * a */
       
  1544 +        if (!(*mul_worker)(tmp.d,tmp.d,am.d,np,n0) &&
       
  1545 +            !(*mul_worker)(tmp.d,tmp.d,am.d,np,n0))
       
  1546 +            bn_mul_mont_vis3(tmp.d,tmp.d,am.d,np,n0,top);
       
  1547 +        bn_flip_n_scatter5_t4(tmp.d,top,powerbuf,i);
       
  1548 +        }
       
  1549 +
       
  1550 +        /* switch to 64-bit domain */
       
  1551 +        np = alloca(top*sizeof(BN_ULONG));
       
  1552 +        top /= 2;
       
  1553 +        bn_flip_t4(np,mont->N.d,top);
       
  1554 +
       
  1555 +        bits--;
       
  1556 +        for (wvalue=0, i=bits%5; i>=0; i--,bits--)
       
  1557 +        wvalue = (wvalue<<1)+BN_is_bit_set(p,bits);
       
  1558 +        bn_gather5_t4(tmp.d,top,powerbuf,wvalue);
       
  1559 +
       
  1560 +        /* Scan the exponent one window at a time starting from the most
       
  1561 +         * significant bits.
       
  1562 +         */
       
  1563 +        while (bits >= 0) {
       
  1564 +        if (bits < stride)
       
  1565 +            stride = bits+1;
       
  1566 +        bits -= stride;
       
  1567 +        wvalue = (bn_get_bits(p,bits+1));
       
  1568 +
       
  1569 +        if ((*pwr5_worker)(tmp.d,np,n0,powerbuf,wvalue,stride))
       
  1570 +            continue;
       
  1571 +        /* retry once and fall back */
       
  1572 +        if ((*pwr5_worker)(tmp.d,np,n0,powerbuf,wvalue,stride))
       
  1573 +            continue;
       
  1574 +
       
  1575 +        bits += stride-5;
       
  1576 +        wvalue >>= stride-5;
       
  1577 +        wvalue &= 31;
       
  1578 +        bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top);
       
  1579 +        bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top);
       
  1580 +        bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top);
       
  1581 +        bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top);
       
  1582 +        bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top);
       
  1583 +        bn_mul_mont_gather5_t4(tmp.d,tmp.d,powerbuf,np,n0,top,wvalue);
       
  1584 +        }
       
  1585 +
       
  1586 +        bn_flip_t4(tmp.d,tmp.d,top);
       
  1587 +        top *= 2;
       
  1588 +        /* back to 32-bit domain */
       
  1589 +        tmp.top=top;
       
  1590 +        bn_correct_top(&tmp);
       
  1591 +        OPENSSL_cleanse(np,top*sizeof(BN_ULONG));
       
  1592 +    } else
       
  1593 +#endif
       
  1594  #if defined(OPENSSL_BN_ASM_MONT5)
       
  1595      if (window == 5 && top > 1) {
       
  1596          /*
       
  1597 @@ -844,6 +1017,15 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
       
  1598      }
       
  1599 
       
  1600      /* Convert the final result from montgomery to standard format */
       
  1601 +#if defined(OPENSSL_BN_ASM_MONT) && (defined(__sparc__) || defined(__sparc))
       
  1602 +    if (OPENSSL_sparcv9cap_P[0] & (SPARCV9_VIS3|SPARCV9_PREFER_FPU)) {
       
  1603 +        am.d[0] = 1;    /* borrow am */
       
  1604 +        for (i = 1; i < top; i++)
       
  1605 +            am.d[i] = 0;
       
  1606 +        if (!BN_mod_mul_montgomery(rr,&tmp,&am,mont,ctx))
       
  1607 +            goto err;
       
  1608 +    } else
       
  1609 +#endif
       
  1610      if (!BN_from_montgomery(rr, &tmp, mont, ctx))
       
  1611          goto err;
       
  1612      ret = 1;
       
  1613 Index: openssl/apps/speed.c
       
  1614 ===================================================================
       
  1615 diff -ru openssl-1.0.1e/apps/spped.c openssl-1.0.1e/apps/speed.c
       
  1616 --- openssl-1.0.1e/apps/speed.c 2011-05-24 17:02:24.000000000 -0700
       
  1617 +++ openssl-1.0.1e/apps/spped.c 2011-07-27 10:48:17.817470000 -0700
       
  1618 @@ -1586,8 +1586,7 @@
       
  1619              print_message(names[D_MD5], c[D_MD5][j], lengths[j]);
       
  1620              Time_F(START);
       
  1621              for (count = 0, run = 1; COND(c[D_MD5][j]); count++)
       
  1622 -                EVP_Digest(&(buf[0]), (unsigned long)lengths[j], &(md5[0]),
       
  1623 -                           NULL, EVP_get_digestbyname("md5"), NULL);
       
  1624 +                MD5(buf, lengths[j], md5);
       
  1625              d = Time_F(STOP);
       
  1626              print_result(D_MD5, j, count, d);
       
  1627          }
       
  1628 @@ -1622,8 +1621,7 @@
       
  1629              print_message(names[D_SHA1], c[D_SHA1][j], lengths[j]);
       
  1630              Time_F(START);
       
  1631              for (count = 0, run = 1; COND(c[D_SHA1][j]); count++)
       
  1632 -                EVP_Digest(buf, (unsigned long)lengths[j], &(sha[0]), NULL,
       
  1633 -                           EVP_sha1(), NULL);
       
  1634 +                SHA1(buf, lengths[j], sha);
       
  1635              d = Time_F(STOP);
       
  1636              print_result(D_SHA1, j, count, d);
       
  1637          }
       
  1638 Index: openssl/crypto/aes/Makefile
       
  1639 ===================================================================
       
  1640 --- Makefile	Thu May  2 13:42:37 2013
       
  1641 +++ Makefile.orig	Thu May  2 13:41:51 2013
       
  1642 @@ -69,6 +69,9 @@
       
  1643  aes-sparcv9.s: asm/aes-sparcv9.pl
       
  1644  	$(PERL) asm/aes-sparcv9.pl $(CFLAGS) > $@
       
  1645  
       
  1646 +aest4-sparcv9.s: asm/aest4-sparcv9.pl
       
  1647 +	$(PERL) asm/aest4-sparcv9.pl $(CFLAGS) > $@
       
  1648 +
       
  1649  aes-ppc.s:	asm/aes-ppc.pl
       
  1650  	$(PERL) asm/aes-ppc.pl $(PERLASM_SCHEME) $@
       
  1651  
       
  1652 Index: openssl/crypto/evp/e_aes.c
       
  1653 ===================================================================
       
  1654 --- e_aes.c    Mon Feb 11 07:26:04 2013
       
  1655 +++ e_aes.c.56    Thu May  2 14:26:35 2013
       
  1656 @@ -56,12 +58,11 @@
       
  1657  # include <assert.h>
       
  1658  # include <openssl/aes.h>
       
  1659  # include "evp_locl.h"
       
  1660 -# ifndef OPENSSL_FIPS
       
  1661  #  include "modes_lcl.h"
       
  1662  #  include <openssl/rand.h>
       
  1663 
       
  1664  typedef struct {
       
  1665 -    AES_KEY ks;
       
  1666 +    union { double align; AES_KEY ks; } ks;
       
  1667      block128_f block;
       
  1668      union {
       
  1669          cbc128_f cbc;
       
  1670 @@ -70,7 +69,7 @@
       
  1671  } EVP_AES_KEY;
       
  1672 
       
  1673  typedef struct {
       
  1674 -    AES_KEY ks;                 /* AES key schedule to use */
       
  1675 +    union { double align; AES_KEY ks; } ks;    /* AES key schedule to use */
       
  1676      int key_set;                /* Set if key initialised */
       
  1677      int iv_set;                 /* Set if an iv is set */
       
  1678      GCM128_CONTEXT gcm;
       
  1679 @@ -83,7 +82,7 @@
       
  1680  } EVP_AES_GCM_CTX;
       
  1681 
       
  1682  typedef struct {
       
  1683 -    AES_KEY ks1, ks2;           /* AES key schedules to use */
       
  1684 +    union { double align; AES_KEY ks; } ks1, ks2;    /* AES key schedules to use */
       
  1685      XTS128_CONTEXT xts;
       
  1686      void (*stream) (const unsigned char *in,
       
  1687                      unsigned char *out, size_t length,
       
  1688 @@ -92,7 +91,7 @@
       
  1689  } EVP_AES_XTS_CTX;
       
  1690 
       
  1691  typedef struct {
       
  1692 -    AES_KEY ks;                 /* AES key schedule to use */
       
  1693 +    union { double align; AES_KEY ks; } ks;    /* AES key schedule to use */
       
  1694      int key_set;                /* Set if key initialised */
       
  1695      int iv_set;                 /* Set if an iv is set */
       
  1696      int tag_set;                /* Set if tag is valid */
       
  1697 @@ -155,7 +154,7 @@
       
  1698          defined(_M_AMD64)       || defined(_M_X64)      || \
       
  1699          defined(__INTEL__)                              )
       
  1700 
       
  1701 -extern unsigned int OPENSSL_ia32cap_P[2];
       
  1702 +extern unsigned int OPENSSL_ia32cap_P[];
       
  1703 
       
  1704  #   ifdef VPAES_ASM
       
  1705  #    define VPAES_CAPABLE   (OPENSSL_ia32cap_P[1]&(1<<(41-32)))
       
  1706 @@ -297,7 +296,7 @@
       
  1707      if (!iv && !key)
       
  1708          return 1;
       
  1709      if (key) {
       
  1710 -        aesni_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks);
       
  1711 +        aesni_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks);
       
  1712          CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, (block128_f) aesni_encrypt);
       
  1713          gctx->ctr = (ctr128_f) aesni_ctr32_encrypt_blocks;
       
  1714          /*
       
  1715 @@ -336,17 +335,17 @@
       
  1716      if (key) {
       
  1717          /* key_len is two AES keys */
       
  1718          if (enc) {
       
  1719 -            aesni_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1);
       
  1720 +            aesni_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
       
  1721              xctx->xts.block1 = (block128_f) aesni_encrypt;
       
  1722              xctx->stream = aesni_xts_encrypt;
       
  1723          } else {
       
  1724 -            aesni_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1);
       
  1725 +            aesni_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
       
  1726              xctx->xts.block1 = (block128_f) aesni_decrypt;
       
  1727              xctx->stream = aesni_xts_decrypt;
       
  1728          }
       
  1729 
       
  1730          aesni_set_encrypt_key(key + ctx->key_len / 2,
       
  1731 -                              ctx->key_len * 4, &xctx->ks2);
       
  1732 +                              ctx->key_len * 4, &xctx->ks2.ks);
       
  1733          xctx->xts.block2 = (block128_f) aesni_encrypt;
       
  1734 
       
  1735          xctx->xts.key1 = &xctx->ks1;
       
  1736 @@ -371,7 +370,7 @@
       
  1737      if (!iv && !key)
       
  1738          return 1;
       
  1739      if (key) {
       
  1740 -        aesni_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks);
       
  1741 +        aesni_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks);
       
  1742          CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L,
       
  1743                             &cctx->ks, (block128_f) aesni_encrypt);
       
  1744          cctx->str = enc ? (ccm128_f) aesni_ccm64_encrypt_blocks :
       
  1745 @@ -432,6 +431,364 @@
       
  1746  const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \
       
  1747  { return AESNI_CAPABLE?&aesni_##keylen##_##mode:&aes_##keylen##_##mode; }
       
  1748 
       
  1749 +#elif    defined(AES_ASM) && (defined(__sparc) || defined(__sparc__))
       
  1750 +
       
  1751 +#include "sparc_arch.h"
       
  1752 +
       
  1753 +extern unsigned int OPENSSL_sparcv9cap_P[];
       
  1754 +
       
  1755 +#define    SPARC_AES_CAPABLE    (OPENSSL_sparcv9cap_P[1] & CFR_AES)
       
  1756 +
       
  1757 +void    aes_t4_set_encrypt_key (const unsigned char *key, int bits,
       
  1758 +                AES_KEY *ks);
       
  1759 +void    aes_t4_set_decrypt_key (const unsigned char *key, int bits,
       
  1760 +                AES_KEY *ks);
       
  1761 +void    aes_t4_encrypt (const unsigned char *in, unsigned char *out,
       
  1762 +                const AES_KEY *key);
       
  1763 +void    aes_t4_decrypt (const unsigned char *in, unsigned char *out,
       
  1764 +                const AES_KEY *key);
       
  1765 +/*
       
  1766 + * Key-length specific subroutines were chosen for following reason.
       
  1767 + * Each SPARC T4 core can execute up to 8 threads which share core's
       
  1768 + * resources. Loading as much key material to registers allows to
       
  1769 + * minimize references to shared memory interface, as well as amount
       
  1770 + * of instructions in inner loops [much needed on T4]. But then having
       
  1771 + * non-key-length specific routines would require conditional branches
       
  1772 + * either in inner loops or on subroutines' entries. Former is hardly
       
  1773 + * acceptable, while latter means code size increase to size occupied
       
  1774 + * by multiple key-length specfic subroutines, so why fight?
       
  1775 + */
       
  1776 +void    aes128_t4_cbc_encrypt (const unsigned char *in, unsigned char *out,
       
  1777 +                size_t len, const AES_KEY *key,
       
  1778 +                unsigned char *ivec);
       
  1779 +void    aes128_t4_cbc_decrypt (const unsigned char *in, unsigned char *out,
       
  1780 +                size_t len, const AES_KEY *key,
       
  1781 +                unsigned char *ivec);
       
  1782 +void    aes192_t4_cbc_encrypt (const unsigned char *in, unsigned char *out,
       
  1783 +                size_t len, const AES_KEY *key,
       
  1784 +                unsigned char *ivec);
       
  1785 +void    aes192_t4_cbc_decrypt (const unsigned char *in, unsigned char *out,
       
  1786 +                size_t len, const AES_KEY *key,
       
  1787 +                unsigned char *ivec);
       
  1788 +void    aes256_t4_cbc_encrypt (const unsigned char *in, unsigned char *out,
       
  1789 +                size_t len, const AES_KEY *key,
       
  1790 +                unsigned char *ivec);
       
  1791 +void    aes256_t4_cbc_decrypt (const unsigned char *in, unsigned char *out,
       
  1792 +                size_t len, const AES_KEY *key,
       
  1793 +                unsigned char *ivec);
       
  1794 +void    aes128_t4_ctr32_encrypt (const unsigned char *in, unsigned char *out,
       
  1795 +                size_t blocks, const AES_KEY *key,
       
  1796 +                unsigned char *ivec);
       
  1797 +void    aes192_t4_ctr32_encrypt (const unsigned char *in, unsigned char *out,
       
  1798 +                size_t blocks, const AES_KEY *key,
       
  1799 +                unsigned char *ivec);
       
  1800 +void    aes256_t4_ctr32_encrypt (const unsigned char *in, unsigned char *out,
       
  1801 +                size_t blocks, const AES_KEY *key,
       
  1802 +                unsigned char *ivec);
       
  1803 +
       
  1804 +static int aes_t4_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
       
  1805 +           const unsigned char *iv, int enc)
       
  1806 +{
       
  1807 +    int ret, mode, bits;
       
  1808 +    EVP_AES_KEY *dat = (EVP_AES_KEY *)ctx->cipher_data;
       
  1809 +
       
  1810 +    mode = ctx->cipher->flags & EVP_CIPH_MODE;
       
  1811 +    bits = ctx->key_len*8;
       
  1812 +    if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE) && !enc) {
       
  1813 +            ret = 0;
       
  1814 +            aes_t4_set_decrypt_key(key, bits, ctx->cipher_data);
       
  1815 +            dat->block    = (block128_f)aes_t4_decrypt;
       
  1816 +            switch (bits) {
       
  1817 +            case 128:
       
  1818 +            dat->stream.cbc    = mode==EVP_CIPH_CBC_MODE ?
       
  1819 +                        (cbc128_f)aes128_t4_cbc_decrypt :
       
  1820 +                        NULL;
       
  1821 +            break;
       
  1822 +            case 192:
       
  1823 +            dat->stream.cbc    = mode==EVP_CIPH_CBC_MODE ?
       
  1824 +                        (cbc128_f)aes192_t4_cbc_decrypt :
       
  1825 +                        NULL;
       
  1826 +            break;
       
  1827 +            case 256:
       
  1828 +            dat->stream.cbc    = mode==EVP_CIPH_CBC_MODE ?
       
  1829 +                        (cbc128_f)aes256_t4_cbc_decrypt :
       
  1830 +                        NULL;
       
  1831 +            break;
       
  1832 +            default:
       
  1833 +            ret = -1;
       
  1834 +        }
       
  1835 +    } else    {
       
  1836 +            ret = 0;
       
  1837 +            aes_t4_set_encrypt_key(key, bits, ctx->cipher_data);
       
  1838 +            dat->block    = (block128_f)aes_t4_encrypt;
       
  1839 +            switch (bits) {
       
  1840 +            case 128:
       
  1841 +            if (mode==EVP_CIPH_CBC_MODE)
       
  1842 +                dat->stream.cbc    = (cbc128_f)aes128_t4_cbc_encrypt;
       
  1843 +            else if (mode==EVP_CIPH_CTR_MODE)
       
  1844 +                dat->stream.ctr = (ctr128_f)aes128_t4_ctr32_encrypt;
       
  1845 +            else
       
  1846 +                dat->stream.cbc = NULL;
       
  1847 +            break;
       
  1848 +            case 192:
       
  1849 +            if (mode==EVP_CIPH_CBC_MODE)
       
  1850 +                dat->stream.cbc    = (cbc128_f)aes192_t4_cbc_encrypt;
       
  1851 +            else if (mode==EVP_CIPH_CTR_MODE)
       
  1852 +                dat->stream.ctr = (ctr128_f)aes192_t4_ctr32_encrypt;
       
  1853 +            else
       
  1854 +                dat->stream.cbc = NULL;
       
  1855 +            break;
       
  1856 +            case 256:
       
  1857 +            if (mode==EVP_CIPH_CBC_MODE)
       
  1858 +                dat->stream.cbc    = (cbc128_f)aes256_t4_cbc_encrypt;
       
  1859 +            else if (mode==EVP_CIPH_CTR_MODE)
       
  1860 +                dat->stream.ctr = (ctr128_f)aes256_t4_ctr32_encrypt;
       
  1861 +            else
       
  1862 +                dat->stream.cbc = NULL;
       
  1863 +            break;
       
  1864 +            default:
       
  1865 +            ret = -1;
       
  1866 +        }
       
  1867 +    }
       
  1868 +
       
  1869 +    if (ret < 0) {
       
  1870 +        EVPerr(EVP_F_AES_T4_INIT_KEY,EVP_R_AES_KEY_SETUP_FAILED);
       
  1871 +        return 0;
       
  1872 +    }
       
  1873 +
       
  1874 +    return 1;
       
  1875 +}
       
  1876 +
       
  1877 +#define aes_t4_cbc_cipher aes_cbc_cipher
       
  1878 +static int aes_t4_cbc_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out,
       
  1879 +    const unsigned char *in, size_t len);
       
  1880 +
       
  1881 +#define aes_t4_ecb_cipher aes_ecb_cipher
       
  1882 +static int aes_t4_ecb_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out,
       
  1883 +    const unsigned char *in, size_t len);
       
  1884 +
       
  1885 +#define aes_t4_ofb_cipher aes_ofb_cipher
       
  1886 +static int aes_t4_ofb_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out,
       
  1887 +    const unsigned char *in,size_t len);
       
  1888 +
       
  1889 +#define aes_t4_cfb_cipher aes_cfb_cipher
       
  1890 +static int aes_t4_cfb_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out,
       
  1891 +    const unsigned char *in,size_t len);
       
  1892 +
       
  1893 +#define aes_t4_cfb8_cipher aes_cfb8_cipher
       
  1894 +static int aes_t4_cfb8_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out,
       
  1895 +    const unsigned char *in,size_t len);
       
  1896 +
       
  1897 +#define aes_t4_cfb1_cipher aes_cfb1_cipher
       
  1898 +static int aes_t4_cfb1_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out,
       
  1899 +    const unsigned char *in,size_t len);
       
  1900 +
       
  1901 +#define aes_t4_ctr_cipher aes_ctr_cipher
       
  1902 +static int aes_t4_ctr_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
       
  1903 +        const unsigned char *in, size_t len);
       
  1904 +
       
  1905 +static int aes_t4_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
       
  1906 +                        const unsigned char *iv, int enc)
       
  1907 +{
       
  1908 +    EVP_AES_GCM_CTX *gctx = ctx->cipher_data;
       
  1909 +    if (!iv && !key)
       
  1910 +        return 1;
       
  1911 +    if (key) {
       
  1912 +        int bits = ctx->key_len * 8;
       
  1913 +        aes_t4_set_encrypt_key(key, bits, &gctx->ks.ks);
       
  1914 +        CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks,
       
  1915 +                (block128_f)aes_t4_encrypt);
       
  1916 +        switch (bits) {
       
  1917 +        case 128:
       
  1918 +            gctx->ctr = (ctr128_f)aes128_t4_ctr32_encrypt;
       
  1919 +            break;
       
  1920 +         case 192:
       
  1921 +            gctx->ctr = (ctr128_f)aes192_t4_ctr32_encrypt;
       
  1922 +            break;
       
  1923 +         case 256:
       
  1924 +            gctx->ctr = (ctr128_f)aes256_t4_ctr32_encrypt;
       
  1925 +            break;
       
  1926 +         default:
       
  1927 +            return 0;
       
  1928 +     }
       
  1929 +        /* If we have an iv can set it directly, otherwise use
       
  1930 +         * saved IV.
       
  1931 +         */
       
  1932 +        if (iv == NULL && gctx->iv_set)
       
  1933 +            iv = gctx->iv;
       
  1934 +        if (iv) {
       
  1935 +            CRYPTO_gcm128_setiv(&gctx->gcm, iv, gctx->ivlen);
       
  1936 +            gctx->iv_set = 1;
       
  1937 +        }
       
  1938 +        gctx->key_set = 1;
       
  1939 +    } else {
       
  1940 +        /* If key set use IV, otherwise copy */
       
  1941 +        if (gctx->key_set)
       
  1942 +            CRYPTO_gcm128_setiv(&gctx->gcm, iv, gctx->ivlen);
       
  1943 +        else
       
  1944 +            memcpy(gctx->iv, iv, gctx->ivlen);
       
  1945 +        gctx->iv_set = 1;
       
  1946 +        gctx->iv_gen = 0;
       
  1947 +    }
       
  1948 +    return 1;
       
  1949 +}
       
  1950 +
       
  1951 +#define aes_t4_gcm_cipher aes_gcm_cipher
       
  1952 +static int aes_t4_gcm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
       
  1953 +        const unsigned char *in, size_t len);
       
  1954 +
       
  1955 +static int aes_t4_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
       
  1956 +                        const unsigned char *iv, int enc)
       
  1957 +{
       
  1958 +    EVP_AES_XTS_CTX *xctx = ctx->cipher_data;
       
  1959 +    if (!iv && !key)
       
  1960 +        return 1;
       
  1961 +
       
  1962 +    if (key) {
       
  1963 +        int bits = ctx->key_len * 4;
       
  1964 +        /* key_len is two AES keys */
       
  1965 +        if (enc) {
       
  1966 +            aes_t4_set_encrypt_key(key, bits, &xctx->ks1.ks);
       
  1967 +            xctx->xts.block1 = (block128_f)aes_t4_encrypt;
       
  1968 +#if 0 /* not yet */
       
  1969 +            switch (bits) {
       
  1970 +            case 128:
       
  1971 +                xctx->stream = aes128_t4_xts_encrypt;
       
  1972 +                break;
       
  1973 +            case 192:
       
  1974 +                xctx->stream = aes192_t4_xts_encrypt;
       
  1975 +                break;
       
  1976 +            case 256:
       
  1977 +                xctx->stream = aes256_t4_xts_encrypt;
       
  1978 +                break;
       
  1979 +            default:
       
  1980 +                return 0;
       
  1981 +            }
       
  1982 +#endif
       
  1983 +        } else {
       
  1984 +            aes_t4_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
       
  1985 +            xctx->xts.block1 = (block128_f)aes_t4_decrypt;
       
  1986 +#if 0 /* not yet */
       
  1987 +            switch (bits) {
       
  1988 +            case 128:
       
  1989 +                xctx->stream = aes128_t4_xts_decrypt;
       
  1990 +                break;
       
  1991 +            case 192:
       
  1992 +                xctx->stream = aes192_t4_xts_decrypt;
       
  1993 +                break;
       
  1994 +            case 256:
       
  1995 +                xctx->stream = aes256_t4_xts_decrypt;
       
  1996 +                break;
       
  1997 +            default:
       
  1998 +                return 0;
       
  1999 +                }
       
  2000 +#endif
       
  2001 +            }
       
  2002 +
       
  2003 +        aes_t4_set_encrypt_key(key + ctx->key_len/2,
       
  2004 +                        ctx->key_len * 4, &xctx->ks2.ks);
       
  2005 +        xctx->xts.block2 = (block128_f)aes_t4_encrypt;
       
  2006 +
       
  2007 +        xctx->xts.key1 = &xctx->ks1;
       
  2008 +    }
       
  2009 +
       
  2010 +    if (iv) {
       
  2011 +        xctx->xts.key2 = &xctx->ks2;
       
  2012 +        memcpy(ctx->iv, iv, 16);
       
  2013 +    }
       
  2014 +
       
  2015 +    return 1;
       
  2016 +}
       
  2017 +
       
  2018 +#define aes_t4_xts_cipher aes_xts_cipher
       
  2019 +static int aes_t4_xts_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
       
  2020 +        const unsigned char *in, size_t len);
       
  2021 +
       
  2022 +static int aes_t4_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
       
  2023 +                        const unsigned char *iv, int enc)
       
  2024 +{
       
  2025 +    EVP_AES_CCM_CTX *cctx = ctx->cipher_data;
       
  2026 +    if (!iv && !key)
       
  2027 +        return 1;
       
  2028 +    if (key) {
       
  2029 +        int bits = ctx->key_len * 8;
       
  2030 +        aes_t4_set_encrypt_key(key, bits, &cctx->ks.ks);
       
  2031 +        CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L,
       
  2032 +                    &cctx->ks, (block128_f)aes_t4_encrypt);
       
  2033 +#if 0 /* not yet */
       
  2034 +       switch (bits) {
       
  2035 +        case 128:
       
  2036 +            cctx->str = enc?(ccm128_f)aes128_t4_ccm64_encrypt :
       
  2037 +                (ccm128_f)ae128_t4_ccm64_decrypt;
       
  2038 +            break;
       
  2039 +        case 192:
       
  2040 +            cctx->str = enc?(ccm128_f)aes192_t4_ccm64_encrypt :
       
  2041 +                (ccm128_f)ae192_t4_ccm64_decrypt;
       
  2042 +            break;
       
  2043 +        case 256:
       
  2044 +            cctx->str = enc?(ccm128_f)aes256_t4_ccm64_encrypt :
       
  2045 +                (ccm128_f)ae256_t4_ccm64_decrypt;
       
  2046 +            break;
       
  2047 +        default:
       
  2048 +            return 0;
       
  2049 +        }
       
  2050 +#endif
       
  2051 +        cctx->key_set = 1;
       
  2052 +    }
       
  2053 +    if (iv) {
       
  2054 +        memcpy(ctx->iv, iv, 15 - cctx->L);
       
  2055 +        cctx->iv_set = 1;
       
  2056 +    }
       
  2057 +    return 1;
       
  2058 +}
       
  2059 +
       
  2060 +#define aes_t4_ccm_cipher aes_ccm_cipher
       
  2061 +static int aes_t4_ccm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
       
  2062 +        const unsigned char *in, size_t len);
       
  2063 +
       
  2064 +#define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \
       
  2065 +static const EVP_CIPHER aes_t4_##keylen##_##mode = { \
       
  2066 +    nid##_##keylen##_##nmode,blocksize,keylen/8,ivlen, \
       
  2067 +    flags|EVP_CIPH_##MODE##_MODE,    \
       
  2068 +    aes_t4_init_key,        \
       
  2069 +    aes_t4_##mode##_cipher,        \
       
  2070 +    NULL,                \
       
  2071 +    sizeof(EVP_AES_KEY),        \
       
  2072 +    NULL,NULL,NULL,NULL }; \
       
  2073 +static const EVP_CIPHER aes_##keylen##_##mode = { \
       
  2074 +    nid##_##keylen##_##nmode,blocksize,    \
       
  2075 +    keylen/8,ivlen, \
       
  2076 +    flags|EVP_CIPH_##MODE##_MODE,    \
       
  2077 +    aes_init_key,            \
       
  2078 +    aes_##mode##_cipher,        \
       
  2079 +    NULL,                \
       
  2080 +    sizeof(EVP_AES_KEY),        \
       
  2081 +    NULL,NULL,NULL,NULL }; \
       
  2082 +const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \
       
  2083 +{ return SPARC_AES_CAPABLE?&aes_t4_##keylen##_##mode:&aes_##keylen##_##mode; }
       
  2084 +
       
  2085 +#define BLOCK_CIPHER_custom(nid,keylen,blocksize,ivlen,mode,MODE,flags) \
       
  2086 +static const EVP_CIPHER aes_t4_##keylen##_##mode = { \
       
  2087 +    nid##_##keylen##_##mode,blocksize, \
       
  2088 +    (EVP_CIPH_##MODE##_MODE==EVP_CIPH_XTS_MODE?2:1)*keylen/8, ivlen, \
       
  2089 +    flags|EVP_CIPH_##MODE##_MODE,    \
       
  2090 +    aes_t4_##mode##_init_key,    \
       
  2091 +    aes_t4_##mode##_cipher,        \
       
  2092 +    aes_##mode##_cleanup,        \
       
  2093 +    sizeof(EVP_AES_##MODE##_CTX),    \
       
  2094 +    NULL,NULL,aes_##mode##_ctrl,NULL }; \
       
  2095 +static const EVP_CIPHER aes_##keylen##_##mode = { \
       
  2096 +    nid##_##keylen##_##mode,blocksize, \
       
  2097 +    (EVP_CIPH_##MODE##_MODE==EVP_CIPH_XTS_MODE?2:1)*keylen/8, ivlen, \
       
  2098 +    flags|EVP_CIPH_##MODE##_MODE,    \
       
  2099 +    aes_##mode##_init_key,        \
       
  2100 +    aes_##mode##_cipher,        \
       
  2101 +    aes_##mode##_cleanup,        \
       
  2102 +    sizeof(EVP_AES_##MODE##_CTX),    \
       
  2103 +    NULL,NULL,aes_##mode##_ctrl,NULL }; \
       
  2104 +const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \
       
  2105 +{ return SPARC_AES_CAPABLE?&aes_t4_##keylen##_##mode:&aes_##keylen##_##mode; }
       
  2106 +
       
  2107  #  else
       
  2108 
       
  2109  #   define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \
       
  2110 @@ -480,7 +837,7 @@
       
  2111          && !enc)
       
  2112  #  ifdef BSAES_CAPABLE
       
  2113          if (BSAES_CAPABLE && mode == EVP_CIPH_CBC_MODE) {
       
  2114 -            ret = AES_set_decrypt_key(key, ctx->key_len * 8, &dat->ks);
       
  2115 +            ret = AES_set_decrypt_key(key, ctx->key_len * 8, &dat->ks.ks);
       
  2116              dat->block = (block128_f) AES_decrypt;
       
  2117              dat->stream.cbc = (cbc128_f) bsaes_cbc_encrypt;
       
  2118          } else
       
  2119 @@ -487,7 +844,7 @@
       
  2120  #  endif
       
  2121  #  ifdef VPAES_CAPABLE
       
  2122          if (VPAES_CAPABLE) {
       
  2123 -            ret = vpaes_set_decrypt_key(key, ctx->key_len * 8, &dat->ks);
       
  2124 +            ret = vpaes_set_decrypt_key(key, ctx->key_len * 8, &dat->ks.ks);
       
  2125              dat->block = (block128_f) vpaes_decrypt;
       
  2126              dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ?
       
  2127                  (cbc128_f) vpaes_cbc_encrypt : NULL;
       
  2128 @@ -494,7 +851,7 @@
       
  2129          } else
       
  2130  #  endif
       
  2131          {
       
  2132 -            ret = AES_set_decrypt_key(key, ctx->key_len * 8, &dat->ks);
       
  2133 +            ret = AES_set_decrypt_key(key, ctx->key_len * 8, &dat->ks.ks);
       
  2134              dat->block = (block128_f) AES_decrypt;
       
  2135              dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ?
       
  2136                  (cbc128_f) AES_cbc_encrypt : NULL;
       
  2137 @@ -508,7 +865,7 @@
       
  2138  #  endif
       
  2139  #  ifdef VPAES_CAPABLE
       
  2140      if (VPAES_CAPABLE) {
       
  2141 -        ret = vpaes_set_encrypt_key(key, ctx->key_len * 8, &dat->ks);
       
  2142 +        ret = vpaes_set_encrypt_key(key, ctx->key_len * 8, &dat->ks.ks);
       
  2143          dat->block = (block128_f) vpaes_encrypt;
       
  2144          dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ?
       
  2145              (cbc128_f) vpaes_cbc_encrypt : NULL;
       
  2146 @@ -515,7 +872,7 @@
       
  2147      } else
       
  2148  #  endif
       
  2149      {
       
  2150 -        ret = AES_set_encrypt_key(key, ctx->key_len * 8, &dat->ks);
       
  2151 +        ret = AES_set_encrypt_key(key, ctx->key_len*8, &dat->ks.ks);
       
  2152          dat->block = (block128_f) AES_encrypt;
       
  2153          dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ?
       
  2154              (cbc128_f) AES_cbc_encrypt : NULL;
       
  2155 @@ -810,7 +1167,7 @@
       
  2156          do {
       
  2157  #  ifdef BSAES_CAPABLE
       
  2158              if (BSAES_CAPABLE) {
       
  2159 -                AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks);
       
  2160 +                AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks);
       
  2161                  CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks,
       
  2162                                     (block128_f) AES_encrypt);
       
  2163                  gctx->ctr = (ctr128_f) bsaes_ctr32_encrypt_blocks;
       
  2164 @@ -819,7 +1176,7 @@
       
  2165  #  endif
       
  2166  #  ifdef VPAES_CAPABLE
       
  2167              if (VPAES_CAPABLE) {
       
  2168 -                vpaes_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks);
       
  2169 +                vpaes_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks);
       
  2170                  CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks,
       
  2171                                     (block128_f) vpaes_encrypt);
       
  2172                  gctx->ctr = NULL;
       
  2173 @@ -828,7 +1185,7 @@
       
  2174  #  endif
       
  2175                  (void)0;        /* terminate potentially open 'else' */
       
  2176 
       
  2177 -            AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks);
       
  2178 +            AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks);
       
  2179              CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks,
       
  2180                                 (block128_f) AES_encrypt);
       
  2181  #  ifdef AES_CTR_ASM
       
  2182 @@ -1049,15 +1406,15 @@
       
  2183  #  ifdef VPAES_CAPABLE
       
  2184              if (VPAES_CAPABLE) {
       
  2185                  if (enc) {
       
  2186 -                    vpaes_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1);
       
  2187 +                    vpaes_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
       
  2188                      xctx->xts.block1 = (block128_f) vpaes_encrypt;
       
  2189                  } else {
       
  2190 -                    vpaes_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1);
       
  2191 +                    vpaes_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
       
  2192                      xctx->xts.block1 = (block128_f) vpaes_decrypt;
       
  2193                  }
       
  2194 
       
  2195                  vpaes_set_encrypt_key(key + ctx->key_len / 2,
       
  2196 -                                      ctx->key_len * 4, &xctx->ks2);
       
  2197 +                                      ctx->key_len * 4, &xctx->ks2.ks);
       
  2198                  xctx->xts.block2 = (block128_f) vpaes_encrypt;
       
  2199 
       
  2200                  xctx->xts.key1 = &xctx->ks1;
       
  2201 @@ -1067,15 +1424,15 @@
       
  2202                  (void)0;        /* terminate potentially open 'else' */
       
  2203 
       
  2204              if (enc) {
       
  2205 -                AES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1);
       
  2206 +                AES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
       
  2207                  xctx->xts.block1 = (block128_f) AES_encrypt;
       
  2208              } else {
       
  2209 -                AES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1);
       
  2210 +                AES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
       
  2211                  xctx->xts.block1 = (block128_f) AES_decrypt;
       
  2212              }
       
  2213 
       
  2214              AES_set_encrypt_key(key + ctx->key_len / 2,
       
  2215 -                                ctx->key_len * 4, &xctx->ks2);
       
  2216 +                                ctx->key_len * 4, &xctx->ks2.ks);
       
  2217              xctx->xts.block2 = (block128_f) AES_encrypt;
       
  2218 
       
  2219              xctx->xts.key1 = &xctx->ks1;
       
  2220 @@ -1196,7 +1553,7 @@
       
  2221          do {
       
  2222  #  ifdef VPAES_CAPABLE
       
  2223              if (VPAES_CAPABLE) {
       
  2224 -                vpaes_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks);
       
  2225 +                vpaes_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks);
       
  2226                  CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L,
       
  2227                                     &cctx->ks, (block128_f) vpaes_encrypt);
       
  2228                  cctx->str = NULL;
       
  2229 @@ -1204,7 +1561,7 @@
       
  2230                  break;
       
  2231              }
       
  2232  #  endif
       
  2233 -            AES_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks);
       
  2234 +            AES_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks);
       
  2235              CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L,
       
  2236                                 &cctx->ks, (block128_f) AES_encrypt);
       
  2237              cctx->str = NULL;
       
  2238 @@ -1285,5 +1642,4 @@
       
  2239                      EVP_CIPH_FLAG_FIPS | CUSTOM_FLAGS)
       
  2240      BLOCK_CIPHER_custom(NID_aes, 256, 1, 12, ccm, CCM,
       
  2241                      EVP_CIPH_FLAG_FIPS | CUSTOM_FLAGS)
       
  2242 -# endif
       
  2243  #endif
       
  2244 Index: openssl/crypto/evp/evp.h
       
  2245 ===================================================================
       
  2246 --- evp.h    Mon Feb 11 07:26:04 2013
       
  2247 +++ evp.h.new    Thu May  2 14:31:55 2013
       
  2248 @@ -1325,6 +1325,7 @@
       
  2249  # define EVP_F_AESNI_INIT_KEY                             165
       
  2250  # define EVP_F_AESNI_XTS_CIPHER                           176
       
  2251  # define EVP_F_AES_INIT_KEY                               133
       
  2252 +# define EVP_F_AES_T4_INIT_KEY                            178
       
  2253  # define EVP_F_AES_XTS                                    172
       
  2254  # define EVP_F_AES_XTS_CIPHER                             175
       
  2255  # define EVP_F_ALG_MODULE_INIT                            177
       
  2256 Index: openssl/crypto/evp/evp_err.c
       
  2257 ===================================================================
       
  2258 --- evp_err.c	Mon Feb 11 07:26:04 2013
       
  2259 +++ evp_err.c.new	Thu May  2 14:33:24 2013
       
  2260 @@ -73,6 +73,7 @@
       
  2261      {ERR_FUNC(EVP_F_AESNI_INIT_KEY), "AESNI_INIT_KEY"},
       
  2262      {ERR_FUNC(EVP_F_AESNI_XTS_CIPHER), "AESNI_XTS_CIPHER"},
       
  2263      {ERR_FUNC(EVP_F_AES_INIT_KEY), "AES_INIT_KEY"},
       
  2264 +    {ERR_FUNC(EVP_F_AES_T4_INIT_KEY), "AES_T4_INIT_KEY"},
       
  2265      {ERR_FUNC(EVP_F_AES_XTS), "AES_XTS"},
       
  2266      {ERR_FUNC(EVP_F_AES_XTS_CIPHER), "AES_XTS_CIPHER"},
       
  2267      {ERR_FUNC(EVP_F_ALG_MODULE_INIT), "ALG_MODULE_INIT"},