components/openssl/openssl-1.0.1/patches/103-openssl_t4_inline.patch
changeset 4822 1fb8a14c6702
parent 4821 54dafbe33fdb
child 4823 3ef8b7f4d9d8
equal deleted inserted replaced
4821:54dafbe33fdb 4822:1fb8a14c6702
     1 #
       
     2 # This file adds inline T4 instruction support to OpenSSL upstream code.
       
     3 # The change was brought in from OpenSSL 1.0.2.
       
     4 #
       
     5 Index: Configure
       
     6 ===================================================================
       
     7 diff -ru openssl-1.0.1e/Configure openssl-1.0.1e/Configure
       
     8 --- openssl-1.0.1e/Configure 2011-05-24 17:02:24.000000000 -0700
       
     9 +++ openssl-1.0.1e/Configure 2011-07-27 10:48:17.817470000 -0700
       
    10 @@ -135,7 +135,7 @@
       
    11 
       
    12  my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghash-x86_64.o:";
       
    13  my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::ghash-ia64.o::void";
       
    14 -my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void";
       
    15 +my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o:des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o:aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void";
       
    16  my $fips_sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void";
       
    17  my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::void";
       
    18  my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-alpha.o::void";
       
    19 Index: crypto/sparccpuid.S
       
    20 ===================================================================
       
    21 diff -ru openssl-1.0.1e/crypto/sparccpuid.S openssl-1.0.1e/crypto/sparccpuid.S
       
    22 --- openssl-1.0.1e/crypto/sparccpuid.S 2011-05-24 17:02:24.000000000 -0700
       
    23 +++ openssl-1.0.1e/crypto/sparccpuid.S 2011-07-27 10:48:17.817470000 -0700
       
    24 @@ -1,3 +1,7 @@
       
    25 +#ifdef OPENSSL_FIPSCANISTER
       
    26 +#include <openssl/fipssyms.h>
       
    27 +#endif
       
    28 +
       
    29  #if defined(__SUNPRO_C) && defined(__sparcv9)
       
    30  # define ABI64  /* They've said -xarch=v9 at command line */
       
    31  #elif defined(__GNUC__) && defined(__arch64__)
       
    32 @@ -241,7 +245,12 @@
       
    33  !	UltraSPARC IIe		7
       
    34  !	UltraSPARC III		7
       
    35  !	UltraSPARC T1		24
       
    36 +!	SPARC T4		65(*)
       
    37  !
       
    38 +! (*)	result has lesser to do with VIS instruction latencies, rdtick
       
    39 +!	appears that slow, but it does the trick in sense that FP and
       
    40 +!	VIS code paths are still slower than integer-only ones.
       
    41 +!
       
    42  ! Numbers for T2 and SPARC64 V-VII are more than welcomed.
       
    43  !
       
    44  ! It would be possible to detect specifically US-T1 by instrumenting
       
    45 @@ -250,6 +259,8 @@
       
    46  .global	_sparcv9_vis1_instrument
       
    47  .align	8
       
    48  _sparcv9_vis1_instrument:
       
    49 +	.word	0x81b00d80	!fxor	%f0,%f0,%f0
       
    50 +	.word	0x85b08d82	!fxor	%f2,%f2,%f2
       
    51  	.word	0x91410000	!rd	%tick,%o0
       
    52  	.word	0x81b00d80	!fxor	%f0,%f0,%f0
       
    53  	.word	0x85b08d82	!fxor	%f2,%f2,%f2
       
    54 @@ -286,6 +297,30 @@
       
    55  .type	_sparcv9_vis1_instrument,#function
       
    56  .size	_sparcv9_vis1_instrument,.-_sparcv9_vis1_instrument
       
    57  
       
    58 +.global	_sparcv9_rdcfr
       
    59 +.align	8
       
    60 +_sparcv9_rdcfr:
       
    61 +	retl
       
    62 +	.word	0x91468000	!rd	%asr26,%o0
       
    63 +.type	_sparcv9_rdcfr,#function
       
    64 +.size	_sparcv9_rdcfr,.-_sparcv9_rdcfr
       
    65 +
       
    66 +.global	_sparcv9_vis3_probe
       
    67 +.align	8
       
    68 +_sparcv9_vis3_probe:
       
    69 +	retl
       
    70 +	.word	0x81b022a0	!xmulx	%g0,%g0,%g0
       
    71 +.type	_sparcv9_vis3_probe,#function
       
    72 +.size	_sparcv9_vis3_probe,.-_sparcv9_vis3_probe
       
    73 +
       
    74 +.global	_sparcv9_random
       
    75 +.align	8
       
    76 +_sparcv9_random:
       
    77 +	retl
       
    78 +	.word	0x91b002a0	!random	%o0
       
    79 +.type	_sparcv9_random,#function
       
    80 +.size	_sparcv9_random,.-_sparcv9_vis3_probe
       
    81 +
       
    82  .global	OPENSSL_cleanse
       
    83  .align	32
       
    84  OPENSSL_cleanse:
       
    85 @@ -370,6 +405,102 @@
       
    86  .size	OPENSSL_cleanse,.-OPENSSL_cleanse
       
    87  
       
    88  #ifndef _BOOT
       
    89 +.global	_sparcv9_vis1_instrument_bus
       
    90 +.align	8
       
    91 +_sparcv9_vis1_instrument_bus:
       
    92 +    mov    %o1,%o3                    ! save cnt
       
    93 +    .word    0x99410000    !rd    %tick,%o4    ! tick
       
    94 +    mov    %o4,%o5                    ! lasttick = tick
       
    95 +    set    0,%g4                    ! diff
       
    96 +
       
    97 +    andn    %o0,63,%g1
       
    98 +    .word    0xc1985e00    !ldda    [%g1]0xf0,%f0    ! block load
       
    99 +    .word    0x8143e040    !membar    #Sync
       
   100 +    .word    0xc1b85c00    !stda    %f0,[%g1]0xe0    ! block store and commit
       
   101 +    .word    0x8143e040    !membar    #Sync
       
   102 +    ld    [%o0],%o4
       
   103 +    add    %o4,%g4,%g4
       
   104 +    .word    0xc9e2100c    !cas    [%o0],%o4,%g4
       
   105 +
       
   106 +.Loop:    .word    0x99410000    !rd    %tick,%o4
       
   107 +    sub    %o4,%o5,%g4                ! diff=tick-lasttick
       
   108 +    mov    %o4,%o5                    ! lasttick=tick
       
   109 +
       
   110 +    andn    %o0,63,%g1
       
   111 +    .word    0xc1985e00    !ldda    [%g1]0xf0,%f0    ! block load
       
   112 +    .word    0x8143e040    !membar    #Sync
       
   113 +    .word    0xc1b85c00    !stda    %f0,[%g1]0xe0    ! block store and commit
       
   114 +    .word    0x8143e040    !membar    #Sync
       
   115 +    ld    [%o0],%o4
       
   116 +    add    %o4,%g4,%g4
       
   117 +    .word    0xc9e2100c    !cas    [%o0],%o4,%g4
       
   118 +    subcc    %o1,1,%o1                ! --$cnt
       
   119 +    bnz    .Loop
       
   120 +    add    %o0,4,%o0                ! ++$out
       
   121 +
       
   122 +    retl
       
   123 +    mov    %o3,%o0
       
   124 +.type    _sparcv9_vis1_instrument_bus,#function
       
   125 +.size    _sparcv9_vis1_instrument_bus,.-_sparcv9_vis1_instrument_bus
       
   126 +
       
   127 +.global    _sparcv9_vis1_instrument_bus2
       
   128 +.align    8
       
   129 +_sparcv9_vis1_instrument_bus2:
       
   130 +    mov    %o1,%o3                    ! save cnt
       
   131 +    sll    %o1,2,%o1                ! cnt*=4
       
   132 +
       
   133 +    .word    0x99410000    !rd    %tick,%o4    ! tick
       
   134 +    mov    %o4,%o5                    ! lasttick = tick
       
   135 +    set    0,%g4                    ! diff
       
   136 +
       
   137 +    andn    %o0,63,%g1
       
   138 +    .word    0xc1985e00    !ldda    [%g1]0xf0,%f0    ! block load
       
   139 +    .word    0x8143e040    !membar    #Sync
       
   140 +    .word    0xc1b85c00    !stda    %f0,[%g1]0xe0    ! block store and commit
       
   141 +    .word    0x8143e040    !membar    #Sync
       
   142 +    ld    [%o0],%o4
       
   143 +    add    %o4,%g4,%g4
       
   144 +    .word    0xc9e2100c    !cas    [%o0],%o4,%g4
       
   145 +
       
   146 +    .word    0x99410000    !rd    %tick,%o4    ! tick
       
   147 +    sub    %o4,%o5,%g4                ! diff=tick-lasttick
       
   148 +    mov    %o4,%o5                    ! lasttick=tick
       
   149 +    mov    %g4,%g5                    ! lastdiff=diff
       
   150 +.Loop2:
       
   151 +    andn    %o0,63,%g1
       
   152 +    .word    0xc1985e00    !ldda    [%g1]0xf0,%f0    ! block load
       
   153 +    .word    0x8143e040    !membar    #Sync
       
   154 +    .word    0xc1b85c00    !stda    %f0,[%g1]0xe0    ! block store and commit
       
   155 +    .word    0x8143e040    !membar    #Sync
       
   156 +    ld    [%o0],%o4
       
   157 +    add    %o4,%g4,%g4
       
   158 +    .word    0xc9e2100c    !cas    [%o0],%o4,%g4
       
   159 +
       
   160 +    subcc    %o2,1,%o2                ! --max
       
   161 +    bz    .Ldone2
       
   162 +    nop
       
   163 +
       
   164 +    .word    0x99410000    !rd    %tick,%o4    ! tick
       
   165 +    sub    %o4,%o5,%g4                ! diff=tick-lasttick
       
   166 +    mov    %o4,%o5                    ! lasttick=tick
       
   167 +    cmp    %g4,%g5
       
   168 +    mov    %g4,%g5                    ! lastdiff=diff
       
   169 +
       
   170 +    .word    0x83408000    !rd    %ccr,%g1
       
   171 +    and    %g1,4,%g1                ! isolate zero flag
       
   172 +    xor    %g1,4,%g1                ! flip zero flag
       
   173 +
       
   174 +    subcc    %o1,%g1,%o1                ! conditional --$cnt
       
   175 +    bnz    .Loop2
       
   176 +    add    %o0,%g1,%o0                ! conditional ++$out
       
   177 +
       
   178 +.Ldone2:
       
   179 +    srl    %o1,2,%o1
       
   180 +    retl
       
   181 +    sub    %o3,%o1,%o0
       
   182 +.type    _sparcv9_vis1_instrument_bus2,#function
       
   183 +.size    _sparcv9_vis1_instrument_bus2,.-_sparcv9_vis1_instrument_bus2
       
   184 +
       
   185  .section	".init",#alloc,#execinstr
       
   186  	call	solaris_locking_setup
       
   187  	nop
       
   188 Index: crypto/sparcv9cap.c
       
   189 ===================================================================
       
   190 diff -ru openssl-1.0.1e/crypto/sparcv9cap.c openssl-1.0.1e/crypto/sparcv9cap.c
       
   191 --- openssl-1.0.1e/crypto/sparcv9cap.c 2011-05-24 17:02:24.000000000 -0700
       
   192 +++ openssl-1.0.1e/crypto/sparcv9cap.c 2011-07-27 10:48:17.817470000 -0700
       
   193 @@ -3,36 +3,59 @@
       
   194  #include <string.h>
       
   195  #include <setjmp.h>
       
   196  #include <sys/time.h>
       
   197 +#include <unistd.h>
       
   198  #include <openssl/bn.h>
       
   199  #include <sys/auxv.h>
       
   200  
       
   201 -#define SPARCV9_TICK_PRIVILEGED (1<<0)
       
   202 -#define SPARCV9_PREFER_FPU      (1<<1)
       
   203 -#define SPARCV9_VIS1            (1<<2)
       
   204 -#define SPARCV9_VIS2            (1<<3) /* reserved */
       
   205 -#define SPARCV9_FMADD           (1<<4) /* reserved for SPARC64 V */
       
   206 -#define SPARCV9_BLK             (1<<5)
       
   207 +#include "sparc_arch.h"
       
   208  
       
   209 +#if defined(__GNUC__) && defined(__linux)
       
   210 +__attribute__((visibility("hidden")))
       
   211 +#endif
       
   212  #ifndef        _BOOT
       
   213 -static int OPENSSL_sparcv9cap_P = SPARCV9_TICK_PRIVILEGED;
       
   214 +unsigned int OPENSSL_sparcv9cap_P[2] = {SPARCV9_TICK_PRIVILEGED, 0};
       
   215  #else
       
   216 -static int OPENSSL_sparcv9cap_P = SPARCV9_VIS1;
       
   217 +unsigned int OPENSSL_sparcv9cap_P[2] = {SPARCV9_VIS1, 0};
       
   218  #endif
       
   219  
       
   220  int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
       
   221                  const BN_ULONG *np, const BN_ULONG *n0, int num)
       
   222  {
       
   223 +    int bn_mul_mont_vis3(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
       
   224 +                         const BN_ULONG *np,const BN_ULONG *n0, int num);
       
   225      int bn_mul_mont_fpu(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
       
   226                          const BN_ULONG *np, const BN_ULONG *n0, int num);
       
   227      int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
       
   228                          const BN_ULONG *np, const BN_ULONG *n0, int num);
       
   229  
       
   230 -    if (num >= 8 && !(num & 1) &&
       
   231 -        (OPENSSL_sparcv9cap_P & (SPARCV9_PREFER_FPU | SPARCV9_VIS1)) ==
       
   232 -        (SPARCV9_PREFER_FPU | SPARCV9_VIS1))
       
   233 -        return bn_mul_mont_fpu(rp, ap, bp, np, n0, num);
       
   234 -    else
       
   235 -        return bn_mul_mont_int(rp, ap, bp, np, n0, num);
       
   236 +    if (!(num&1) && num>=6) {
       
   237 +        if ((num&15)==0 && num<=64 &&
       
   238 +            (OPENSSL_sparcv9cap_P[1]&(CFR_MONTMUL|CFR_MONTSQR))== 
       
   239 +                             (CFR_MONTMUL|CFR_MONTSQR))
       
   240 +            {
       
   241 +            typedef int (*bn_mul_mont_f)(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0);
       
   242 +            int bn_mul_mont_t4_8(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0);
       
   243 +            int bn_mul_mont_t4_16(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0);
       
   244 +            int bn_mul_mont_t4_24(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0);
       
   245 +            int bn_mul_mont_t4_32(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0);
       
   246 +            static const bn_mul_mont_f funcs[4] = {
       
   247 +                bn_mul_mont_t4_8,    bn_mul_mont_t4_16,
       
   248 +                bn_mul_mont_t4_24,    bn_mul_mont_t4_32 };
       
   249 +            bn_mul_mont_f worker = funcs[num/16-1];
       
   250 +
       
   251 +            if ((*worker)(rp,ap,bp,np,n0)) return 1;
       
   252 +            /* retry once and fall back */
       
   253 +            if ((*worker)(rp,ap,bp,np,n0)) return 1;
       
   254 +            return bn_mul_mont_vis3(rp,ap,bp,np,n0,num);
       
   255 +            }
       
   256 +        if ((OPENSSL_sparcv9cap_P[0]&SPARCV9_VIS3))
       
   257 +            return bn_mul_mont_vis3(rp,ap,bp,np,n0,num);
       
   258 +        else if (num>=8 &&
       
   259 +            (OPENSSL_sparcv9cap_P[0]&(SPARCV9_PREFER_FPU|SPARCV9_VIS1)) ==
       
   260 +            (SPARCV9_PREFER_FPU|SPARCV9_VIS1))
       
   261 +            return bn_mul_mont_fpu(rp,ap,bp,np,n0,num);
       
   262 +        }
       
   263 +    return bn_mul_mont_int(rp,ap,bp,np,n0,num);
       
   264  }
       
   265  
       
   266  unsigned long _sparcv9_rdtick(void);
       
   267 @@ -37,11 +60,16 @@
       
   268  
       
   269  unsigned long _sparcv9_rdtick(void);
       
   270  unsigned long _sparcv9_vis1_instrument(void);
       
   271 +unsigned long _sparcv9_rdcfr(void);
       
   272 +#ifndef _BOOT
       
   273 +size_t _sparcv9_vis1_instrument_bus(unsigned int *,size_t);
       
   274 +size_t _sparcv9_vis1_instrument_bus2(unsigned int *,size_t,size_t);
       
   275 +#endif
       
   276  
       
   277  #ifndef _BOOT
       
   278  unsigned long OPENSSL_rdtsc(void)
       
   279  {
       
   280 -    if (OPENSSL_sparcv9cap_P & SPARCV9_TICK_PRIVILEGED)
       
   281 +    if (OPENSSL_sparcv9cap_P[0] & SPARCV9_TICK_PRIVILEGED)
       
   282  #if defined(__sun) && defined(__SVR4)
       
   283          return gethrtime();
       
   284  #else
       
   285 @@ -50,6 +80,24 @@
       
   286      else
       
   287          return _sparcv9_rdtick();
       
   288  }
       
   289 +
       
   290 +size_t OPENSSL_instrument_bus(unsigned int *out,size_t cnt)
       
   291 +{
       
   292 +    if ((OPENSSL_sparcv9cap_P[0]&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK)) ==
       
   293 +            SPARCV9_BLK)
       
   294 +        return _sparcv9_vis1_instrument_bus(out,cnt);
       
   295 +    else
       
   296 +        return 0;
       
   297 +}
       
   298 +
       
   299 +size_t OPENSSL_instrument_bus2(unsigned int *out,size_t cnt,size_t max)
       
   300 +{
       
   301 +    if ((OPENSSL_sparcv9cap_P[0]&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK)) ==
       
   302 +            SPARCV9_BLK)
       
   303 +        return _sparcv9_vis1_instrument_bus2(out,cnt,max);
       
   304 +    else
       
   305 +        return 0;
       
   306 +}
       
   307  #endif
       
   308 
       
   309  #if defined(_BOOT)
       
   310 @@ -59,7 +107,7 @@
       
   311   */
       
   312  void OPENSSL_cpuid_setup(void)
       
   313         {
       
   314 -       OPENSSL_sparcv9cap_P = SPARCV9_VIS1;
       
   315 +       OPENSSL_sparcv9cap_P[0] = SPARCV9_VIS1;
       
   316         }
       
   317  
       
   318  #elif 0 && defined(__sun) && defined(__SVR4)
       
   319 @@ -88,11 +136,11 @@
       
   320      if (!strcmp(name, "SUNW,UltraSPARC") ||
       
   321          /* covers II,III,IV */
       
   322          !strncmp(name, "SUNW,UltraSPARC-I", 17)) {
       
   323 -        OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU | SPARCV9_VIS1;
       
   324 +        OPENSSL_sparcv9cap_P[0] |= SPARCV9_PREFER_FPU | SPARCV9_VIS1;
       
   325  
       
   326          /* %tick is privileged only on UltraSPARC-I/II, but not IIe */
       
   327          if (name[14] != '\0' && name[17] != '\0' && name[18] != '\0')
       
   328 -            OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED;
       
   329 +            OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED;
       
   330  
       
   331          return DI_WALK_TERMINATE;
       
   332      }
       
   333 @@ -98,7 +146,7 @@
       
   334      }
       
   335      /* This is expected to catch remaining UltraSPARCs, such as T1 */
       
   336      else if (!strncmp(name, "SUNW,UltraSPARC", 15)) {
       
   337 -        OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED;
       
   338 +        OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED;
       
   339  
       
   340          return DI_WALK_TERMINATE;
       
   341      }
       
   342 @@ -117,7 +165,7 @@
       
   343      trigger = 1;
       
   344  
       
   345      if ((e = getenv("OPENSSL_sparcv9cap"))) {
       
   346 -        OPENSSL_sparcv9cap_P = strtoul(e, NULL, 0);
       
   347 +        OPENSSL_sparcv9cap_P[0] = strtoul(e, NULL, 0);
       
   348          return;
       
   349      }
       
   350  
       
   351 @@ -124,15 +172,15 @@
       
   352      if (sysinfo(SI_MACHINE, si, sizeof(si)) > 0) {
       
   353          if (strcmp(si, "sun4v"))
       
   354              /* FPU is preferred for all CPUs, but US-T1/2 */
       
   355 -            OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU;
       
   356 +            OPENSSL_sparcv9cap_P[0] |= SPARCV9_PREFER_FPU;
       
   357      }
       
   358  
       
   359      if (sysinfo(SI_ISALIST, si, sizeof(si)) > 0) {
       
   360          if (strstr(si, "+vis"))
       
   361 -            OPENSSL_sparcv9cap_P |= SPARCV9_VIS1;
       
   362 +            OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS1 | SPARCV9_BLK;
       
   363          if (strstr(si, "+vis2")) {
       
   364 -            OPENSSL_sparcv9cap_P |= SPARCV9_VIS2;
       
   365 -            OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED;
       
   366 +            OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS2;
       
   367 +            OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED;
       
   368              return;
       
   369          }
       
   370      }
       
   371 @@ -195,7 +241,9 @@
       
   372      trigger = 1;
       
   373  
       
   374      if ((e = getenv("OPENSSL_sparcv9cap"))) {
       
   375 -        OPENSSL_sparcv9cap_P = strtoul(e, NULL, 0);
       
   376 +        OPENSSL_sparcv9cap_P[0] = strtoul(e, NULL, 0);
       
   377 +        if ((e = strchr(e, ':')))
       
   378 +            OPENSSL_sparcv9cap_P[1] = strtoul(e + 1, NULL, 0);
       
   379          return;
       
   380      }
       
   381  
       
   382 @@ -202,21 +250,48 @@
       
   383      (void) getisax(&ui, 1);
       
   384  
       
   385      /* Initial value, fits UltraSPARC-I&II... */
       
   386 -    OPENSSL_sparcv9cap_P = SPARCV9_BLK;
       
   387 +    OPENSSL_sparcv9cap_P[0] = SPARCV9_BLK;
       
   388  
       
   389      if (ui & AV_SPARC_VIS) {
       
   390 -        /* detect UltraSPARC-Tx, see sparccpuid.S for details... */
       
   391 +        /* detect UltraSPARC-Tx, see sparccpud.S for details... */
       
   392          if (_sparcv9_vis1_instrument() < 7)
       
   393 -            OPENSSL_sparcv9cap_P |= SPARCV9_TICK_PRIVILEGED;
       
   394 +            OPENSSL_sparcv9cap_P[0] |= SPARCV9_TICK_PRIVILEGED;
       
   395          if (_sparcv9_vis1_instrument() < 12) {
       
   396 -            OPENSSL_sparcv9cap_P |= SPARCV9_VIS1|SPARCV9_PREFER_FPU;
       
   397 +            OPENSSL_sparcv9cap_P[0] |= (SPARCV9_VIS1 | SPARCV9_PREFER_FPU);
       
   398              if (ui & AV_SPARC_VIS2)
       
   399 -                OPENSSL_sparcv9cap_P |= SPARCV9_VIS2;
       
   400 -        }
       
   401 +                OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS2;
       
   402 +         }
       
   403      }
       
   404  
       
   405      if (ui & AV_SPARC_FMAF)
       
   406 -        OPENSSL_sparcv9cap_P |= SPARCV9_FMADD;
       
   407 +        OPENSSL_sparcv9cap_P[0] |= SPARCV9_FMADD;
       
   408 +
       
   409 +    /*
       
   410 +     * VIS3 flag is tested independently from VIS1, unlike VIS2 that is,
       
   411 +     * because VIS3 defines even integer instructions.
       
   412 +     */
       
   413 +    if (ui & AV_SPARC_VIS3)
       
   414 +            OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS3;
       
   415 +
       
   416 +#define AV_T4_MECHS     (AV_SPARC_AES | AV_SPARC_DES | AV_SPARC_KASUMI | \
       
   417 +                         AV_SPARC_CAMELLIA | AV_SPARC_MD5 | AV_SPARC_SHA1 | \
       
   418 +                         AV_SPARC_SHA256 | AV_SPARC_SHA512 | AV_SPARC_MPMUL | \
       
   419 +                         AV_SPARC_CRC32C)
       
   420 +
       
   421 +    if ((OPENSSL_sparcv9cap_P[0]&SPARCV9_VIS3) && (ui & AV_T4_MECHS))
       
   422 +        OPENSSL_sparcv9cap_P[1] = (unsigned int)_sparcv9_rdcfr();
       
   423 +
       
   424 +    if (sizeof(size_t) == 8)
       
   425 +        OPENSSL_sparcv9cap_P[0] |= SPARCV9_64BIT_STACK;
       
   426 +#ifdef __linux
       
   427 +    else
       
   428 +        {
       
   429 +        int ret = syscall(340);
       
   430 +
       
   431 +        if (ret >= 0 && ret & 1)
       
   432 +            OPENSSL_sparcv9cap_P[0] |= SPARCV9_64BIT_STACK;
       
   433 +        }
       
   434 +#endif
       
   435  }
       
   436  
       
   437  #endif
       
   438 Index: crypto/md5/Makefile
       
   439 ===================================================================
       
   440 diff -ru openssl-1.0.1e/crypto/md5/Makefile openssl-1.0.1e/crypto/md5/Makefile
       
   441 --- openssl-1.0.1e/crypto/md5/Makefile	2011-05-24 17:02:24.000000000 -0700
       
   442 +++ openssl-1.0.1e/crypto/md5/Makefile	2011-07-27 10:48:17.817470000 -0700
       
   443 @@ -52,6 +52,9 @@
       
   444  	$(CC) $(CFLAGS) -E asm/md5-ia64.S | \
       
   445  	$(PERL) -ne 's/;\s+/;\n/g; print;' > $@
       
   446  
       
   447 +md5-sparcv9.S:	asm/md5-sparcv9.pl
       
   448 +	$(PERL) asm/md5-sparcv9.pl $@ $(CFLAGS)
       
   449 +
       
   450  files:
       
   451  	$(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
       
   452  
       
   453 Index: crypto/md5/md5_locl.h
       
   454 ===================================================================
       
   455 diff -ru openssl-1.0.1e/crypto/md5/md5_locl.h openssl-1.0.1e/crypto/md5/md5_locl.h
       
   456 --- openssl-1.0.1e/crypto/md5/md5_locl.h    2011-05-24 17:02:24.000000000 -0700
       
   457 +++ openssl-1.0.1e/crypto/md5/md5_locl.h    2011-07-27 10:48:17.817470000 -0700
       
   458 @@ -71,6 +71,8 @@
       
   459  #  define md5_block_data_order md5_block_asm_data_order
       
   460  # elif defined(__ia64) || defined(__ia64__) || defined(_M_IA64)
       
   461  #  define md5_block_data_order md5_block_asm_data_order
       
   462 +# elif defined(__sparc) || defined(__sparc__)
       
   463 +#  define md5_block_data_order md5_block_asm_data_order
       
   464  # endif
       
   465  #endif
       
   466 
       
   467 Index: crypto/sha/Makefile
       
   468 ===================================================================
       
   469 diff -ru openssl-1.0.1e/crypto/sha/Makefile openssl-1.0.1e/crypto/sha/Makefile
       
   470 --- openssl-1.0.1e/crypto/sha/Makefile    2011-05-24 17:02:24.000000000 -0700
       
   471 +++ openssl-1.0.1e/crypto/sha/Makefile    2011-07-27 10:48:17.817470000 -0700
       
   472 @@ -68,9 +68,9 @@
       
   473  sha1-x86_64.s:	asm/sha1-x86_64.pl;	$(PERL) asm/sha1-x86_64.pl $(PERLASM_SCHEME) > $@
       
   474  sha256-x86_64.s:asm/sha512-x86_64.pl;	$(PERL) asm/sha512-x86_64.pl $(PERLASM_SCHEME) $@
       
   475  sha512-x86_64.s:asm/sha512-x86_64.pl;	$(PERL) asm/sha512-x86_64.pl $(PERLASM_SCHEME) $@
       
   476 -sha1-sparcv9.s:	asm/sha1-sparcv9.pl;	$(PERL) asm/sha1-sparcv9.pl $@ $(CFLAGS)
       
   477 -sha256-sparcv9.s:asm/sha512-sparcv9.pl;	$(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS)
       
   478 -sha512-sparcv9.s:asm/sha512-sparcv9.pl;	$(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS)
       
   479 +sha1-sparcv9.S:	asm/sha1-sparcv9.pl;	$(PERL) asm/sha1-sparcv9.pl $@ $(CFLAGS)
       
   480 +sha256-sparcv9.S:asm/sha512-sparcv9.pl;	$(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS)
       
   481 +sha512-sparcv9.S:asm/sha512-sparcv9.pl;	$(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS)
       
   482  
       
   483  sha1-ppc.s:	asm/sha1-ppc.pl;	$(PERL) asm/sha1-ppc.pl $(PERLASM_SCHEME) $@
       
   484  sha256-ppc.s:	asm/sha512-ppc.pl;	$(PERL) asm/sha512-ppc.pl $(PERLASM_SCHEME) $@
       
   485 Index: crypto/sha/asm/sha1-sparcv9.pl
       
   486 ===================================================================
       
   487 diff -ru openssl-1.0.1e/crypto/sha/asm/sha1-sparcv9.pl openssl-1.0.1e/crypto/sha/asm/sha1-sparcv9.pl
       
   488 --- openssl-1.0.1e/crypto/sha/asm/sha1-sparcv9.pl 2011-05-24 17:02:24.000000000 -0700
       
   489 +++ openssl-1.0.1e/crypto/sha/asm/sha1-sparcv9.pl 2011-07-27 10:48:17.817470000 -0700
       
   490 @@ -5,6 +5,8 @@
       
   491  # project. The module is, however, dual licensed under OpenSSL and
       
   492  # CRYPTOGAMS licenses depending on where you obtain it. For further
       
   493  # details see http://www.openssl.org/~appro/cryptogams/.
       
   494 +#
       
   495 +# Hardware SPARC T4 support by David S. Miller <[email protected]>.
       
   496  # ====================================================================
       
   497  
       
   498  # Performance improvement is not really impressive on pre-T1 CPU: +8%
       
   499 @@ -18,6 +20,11 @@
       
   500  # ensure scalability on UltraSPARC T1, or rather to avoid decay when
       
   501  # amount of active threads exceeds the number of physical cores.
       
   502  
       
   503 +# SPARC T4 SHA1 hardware achieves 3.72 cycles per byte, which is 3.1x
       
   504 +# faster than software. Multi-process benchmark saturates at 11x
       
   505 +# single-process result on 8-core processor, or ~9GBps per 2.85GHz
       
   506 +# socket.
       
   507 +
       
   508  $bits=32;
       
   509  for (@ARGV)	{ $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
       
   510  if ($bits==64)	{ $bias=2047; $frame=192; }
       
   511 @@ -183,11 +190,93 @@
       
   512  .register	%g3,#scratch
       
   513  ___
       
   514  $code.=<<___;
       
   515 +#include "sparc_arch.h"
       
   516 +
       
   517  .section	".text",#alloc,#execinstr
       
   518  
       
   519 +#ifdef __PIC__
       
   520 +SPARC_PIC_THUNK(%g1)
       
   521 +#endif
       
   522 +
       
   523  .align	32
       
   524  .globl	sha1_block_data_order
       
   525  sha1_block_data_order:
       
   526 +    SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
       
   527 +    ld    [%g1+4],%g1        ! OPENSSL_sparcv9cap_P[1]
       
   528 +
       
   529 +    andcc    %g1, CFR_SHA1, %g0
       
   530 +    be    .Lsoftware
       
   531 +    nop
       
   532 +
       
   533 +    ld    [%o0 + 0x00], %f0    ! load context
       
   534 +    ld    [%o0 + 0x04], %f1
       
   535 +    ld    [%o0 + 0x08], %f2
       
   536 +    andcc    %o1, 0x7, %g0
       
   537 +    ld    [%o0 + 0x0c], %f3
       
   538 +    bne,pn    %icc, .Lhwunaligned
       
   539 +     ld    [%o0 + 0x10], %f4
       
   540 +
       
   541 +.Lhw_loop:
       
   542 +    ldd    [%o1 + 0x00], %f8
       
   543 +    ldd    [%o1 + 0x08], %f10
       
   544 +    ldd    [%o1 + 0x10], %f12
       
   545 +    ldd    [%o1 + 0x18], %f14
       
   546 +    ldd    [%o1 + 0x20], %f16
       
   547 +    ldd    [%o1 + 0x28], %f18
       
   548 +    ldd    [%o1 + 0x30], %f20
       
   549 +    subcc    %o2, 1, %o2        ! done yet? 
       
   550 +    ldd    [%o1 + 0x38], %f22
       
   551 +    add    %o1, 0x40, %o1
       
   552 +
       
   553 +    .word    0x81b02820        ! SHA1
       
   554 +
       
   555 +    bne,pt    `$bits==64?"%xcc":"%icc"`, .Lhw_loop
       
   556 +    nop
       
   557 +
       
   558 +.Lhwfinish:
       
   559 +    st    %f0, [%o0 + 0x00]    ! store context
       
   560 +    st    %f1, [%o0 + 0x04]
       
   561 +    st    %f2, [%o0 + 0x08]
       
   562 +    st    %f3, [%o0 + 0x0c]
       
   563 +    retl
       
   564 +    st    %f4, [%o0 + 0x10]
       
   565 +
       
   566 +.align    8
       
   567 +.Lhwunaligned:
       
   568 +    alignaddr %o1, %g0, %o1
       
   569 +
       
   570 +    ldd    [%o1 + 0x00], %f10
       
   571 +.Lhwunaligned_loop:
       
   572 +    ldd    [%o1 + 0x08], %f12
       
   573 +    ldd    [%o1 + 0x10], %f14
       
   574 +    ldd    [%o1 + 0x18], %f16
       
   575 +    ldd    [%o1 + 0x20], %f18
       
   576 +    ldd    [%o1 + 0x28], %f20
       
   577 +    ldd    [%o1 + 0x30], %f22
       
   578 +    ldd    [%o1 + 0x38], %f24
       
   579 +    subcc    %o2, 1, %o2        ! done yet?
       
   580 +    ldd    [%o1 + 0x40], %f26
       
   581 +    add    %o1, 0x40, %o1
       
   582 +
       
   583 +    faligndata %f10, %f12, %f8
       
   584 +    faligndata %f12, %f14, %f10
       
   585 +    faligndata %f14, %f16, %f12
       
   586 +    faligndata %f16, %f18, %f14
       
   587 +    faligndata %f18, %f20, %f16
       
   588 +    faligndata %f20, %f22, %f18
       
   589 +    faligndata %f22, %f24, %f20
       
   590 +    faligndata %f24, %f26, %f22
       
   591 +
       
   592 +    .word    0x81b02820        ! SHA1
       
   593 +
       
   594 +    bne,pt    `$bits==64?"%xcc":"%icc"`, .Lhwunaligned_loop
       
   595 +    for    %f26, %f26, %f10    ! %f10=%f26
       
   596 +
       
   597 +    ba    .Lhwfinish
       
   598 +    nop
       
   599 +
       
   600 +.align    16
       
   601 +.Lsoftware:
       
   602  	save	%sp,-$frame,%sp
       
   603  	sllx	$len,6,$len
       
   604  	add	$inp,$len,$len
       
   605 @@ -279,6 +368,62 @@
       
   606  .align	4
       
   607  ___
       
   608  
       
   609 -$code =~ s/\`([^\`]*)\`/eval $1/gem;
       
   610 -print $code;
       
   611 +# Purpose of these subroutines is to explicitly encode VIS instructions,
       
   612 +# so that one can compile the module without having to specify VIS
       
   613 +# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
       
   614 +# Idea is to reserve for option to produce "universal" binary and let
       
   615 +# programmer detect if current CPU is VIS capable at run-time.
       
   616 +sub unvis {
       
   617 +my ($mnemonic,$rs1,$rs2,$rd)=@_;
       
   618 +my $ref,$opf;
       
   619 +my %visopf = (    "faligndata"    => 0x048,
       
   620 +        "for"        => 0x07c    );
       
   621 +
       
   622 +    $ref = "$mnemonic\t$rs1,$rs2,$rd";
       
   623 +
       
   624 +    if ($opf=$visopf{$mnemonic}) {
       
   625 +    foreach ($rs1,$rs2,$rd) {
       
   626 +        return $ref if (!/%f([0-9]{1,2})/);
       
   627 +        $_=$1;
       
   628 +        if ($1>=32) {
       
   629 +        return $ref if ($1&1);
       
   630 +        # re-encode for upper double register addressing
       
   631 +        $_=($1|$1>>5)&31;
       
   632 +        }
       
   633 +    }
       
   634 +
       
   635 +    return    sprintf ".word\t0x%08x !%s",
       
   636 +            0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2,
       
   637 +            $ref;
       
   638 +    } else {
       
   639 +    return $ref;
       
   640 +    }
       
   641 +}
       
   642 +sub unalignaddr {
       
   643 +my ($mnemonic,$rs1,$rs2,$rd)=@_;
       
   644 +my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 );
       
   645 +my $ref="$mnemonic\t$rs1,$rs2,$rd";
       
   646 +
       
   647 +    foreach ($rs1,$rs2,$rd) {
       
   648 +    if (/%([goli])([0-7])/)    { $_=$bias{$1}+$2; }
       
   649 +    else            { return $ref; }
       
   650 +    }
       
   651 +    return  sprintf ".word\t0x%08x !%s",
       
   652 +            0x81b00300|$rd<<25|$rs1<<14|$rs2,
       
   653 +            $ref;
       
   654 +}
       
   655 +
       
   656 +foreach (split("\n",$code)) {
       
   657 +    s/\`([^\`]*)\`/eval $1/ge;
       
   658 +
       
   659 +    s/\b(f[^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/
       
   660 +        &unvis($1,$2,$3,$4)
       
   661 +     /ge;
       
   662 +    s/\b(alignaddr)\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/
       
   663 +        &unalignaddr($1,$2,$3,$4)
       
   664 +     /ge;
       
   665 +
       
   666 +    print $_,"\n";
       
   667 +}
       
   668 +
       
   669  close STDOUT;
       
   670 
       
   671 Index: crypto/sha/asm/sha512-sparcv9.pl
       
   672 ===================================================================
       
   673 diff -ru openssl-1.0.1e/crypto/sha/asm/sha512-sparcv9.pl openssl-1.0.1e/crypto/sha/asm/sha512-sparcv9.pl
       
   674 --- openssl-1.0.1e/crypto/sha/asm/sha512-sparcv9.pl 2011-05-24 17:02:24.000000000 -0700
       
   675 +++ openssl-1.0.1e/crypto/sha/asm/sha512-sparcv9.pl 2011-07-27 10:48:17.817470000 -0700
       
   676 @@ -5,6 +5,8 @@
       
   677  # project. The module is, however, dual licensed under OpenSSL and
       
   678  # CRYPTOGAMS licenses depending on where you obtain it. For further
       
   679  # details see http://www.openssl.org/~appro/cryptogams/.
       
   680 +#
       
   681 +# Hardware SPARC T4 support by David S. Miller <[email protected]>.
       
   682  # ====================================================================
       
   683  
       
   684  # SHA256 performance improvement over compiler generated code varies
       
   685 @@ -41,6 +43,12 @@
       
   686  #	loads are always slower than one 64-bit load. Once again this
       
   687  #	is unlike pre-T1 UltraSPARC, where, if scheduled appropriately,
       
   688  #	2x32-bit loads can be as fast as 1x64-bit ones.
       
   689 +#
       
   690 +# SPARC T4 SHA256/512 hardware achieves 3.17/2.01 cycles per byte,
       
   691 +# which is 9.3x/11.1x faster than software. Multi-process benchmark
       
   692 +# saturates at 11.5x single-process result on 8-core processor, or
       
   693 +# ~11/16GBps per 2.85GHz socket.
       
   694 +
       
   695  
       
   696  $bits=32;
       
   697  for (@ARGV)	{ $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
       
   698 @@ -386,6 +394,8 @@
       
   699  .register	%g3,#scratch
       
   700  ___
       
   701  $code.=<<___;
       
   702 +#include "sparc_arch.h"
       
   703 +
       
   704  .section	".text",#alloc,#execinstr
       
   705  
       
   706  .align	64
       
   707 @@ -457,8 +467,196 @@
       
   708  }
       
   709  $code.=<<___;
       
   710  .size	K${label},.-K${label}
       
   711 +
       
   712 +#ifdef __PIC__
       
   713 +SPARC_PIC_THUNK(%g1)
       
   714 +#endif
       
   715 +
       
   716  .globl	sha${label}_block_data_order
       
   717 +.align	32
       
   718  sha${label}_block_data_order:
       
   719 +    SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
       
   720 +    ld    [%g1+4],%g1        ! OPENSSL_sparcv9cap_P[1]
       
   721 +
       
   722 +    andcc    %g1, CFR_SHA${label}, %g0
       
   723 +    be    .Lsoftware
       
   724 +    nop
       
   725 +___
       
   726 +$code.=<<___ if ($SZ==8);         # SHA512
       
   727 +    ldd    [%o0 + 0x00], %f0    ! load context
       
   728 +    ldd    [%o0 + 0x08], %f2
       
   729 +    ldd    [%o0 + 0x10], %f4
       
   730 +    ldd    [%o0 + 0x18], %f6
       
   731 +    ldd    [%o0 + 0x20], %f8
       
   732 +    ldd    [%o0 + 0x28], %f10
       
   733 +    andcc    %o1, 0x7, %g0
       
   734 +    ldd    [%o0 + 0x30], %f12
       
   735 +    bne,pn    %icc, .Lhwunaligned
       
   736 +     ldd    [%o0 + 0x38], %f14
       
   737 +
       
   738 +.Lhwaligned_loop:
       
   739 +    ldd    [%o1 + 0x00], %f16
       
   740 +    ldd    [%o1 + 0x08], %f18
       
   741 +    ldd    [%o1 + 0x10], %f20
       
   742 +    ldd    [%o1 + 0x18], %f22
       
   743 +    ldd    [%o1 + 0x20], %f24
       
   744 +    ldd    [%o1 + 0x28], %f26
       
   745 +    ldd    [%o1 + 0x30], %f28
       
   746 +    ldd    [%o1 + 0x38], %f30
       
   747 +    ldd    [%o1 + 0x40], %f32
       
   748 +    ldd    [%o1 + 0x48], %f34
       
   749 +    ldd    [%o1 + 0x50], %f36
       
   750 +    ldd    [%o1 + 0x58], %f38
       
   751 +    ldd    [%o1 + 0x60], %f40
       
   752 +    ldd    [%o1 + 0x68], %f42
       
   753 +    ldd    [%o1 + 0x70], %f44
       
   754 +    subcc    %o2, 1, %o2        ! done yet?
       
   755 +    ldd    [%o1 + 0x78], %f46
       
   756 +    add    %o1, 0x80, %o1
       
   757 +
       
   758 +    .word    0x81b02860        ! SHA512
       
   759 +
       
   760 +    bne,pt    `$bits==64?"%xcc":"%icc"`, .Lhwaligned_loop
       
   761 +    nop
       
   762 +
       
   763 +.Lhwfinish:
       
   764 +    std    %f0, [%o0 + 0x00]    ! store context
       
   765 +    std    %f2, [%o0 + 0x08]
       
   766 +    std    %f4, [%o0 + 0x10]
       
   767 +    std    %f6, [%o0 + 0x18]
       
   768 +    std    %f8, [%o0 + 0x20]
       
   769 +    std    %f10, [%o0 + 0x28]
       
   770 +    std    %f12, [%o0 + 0x30]
       
   771 +    retl
       
   772 +     std    %f14, [%o0 + 0x38]
       
   773 +
       
   774 +.align    16
       
   775 +.Lhwunaligned:
       
   776 +    alignaddr %o1, %g0, %o1
       
   777 +
       
   778 +    ldd    [%o1 + 0x00], %f18
       
   779 +.Lhwunaligned_loop:
       
   780 +    ldd    [%o1 + 0x08], %f20
       
   781 +    ldd    [%o1 + 0x10], %f22
       
   782 +    ldd    [%o1 + 0x18], %f24
       
   783 +    ldd    [%o1 + 0x20], %f26
       
   784 +    ldd    [%o1 + 0x28], %f28
       
   785 +    ldd    [%o1 + 0x30], %f30
       
   786 +    ldd    [%o1 + 0x38], %f32
       
   787 +    ldd    [%o1 + 0x40], %f34
       
   788 +    ldd    [%o1 + 0x48], %f36
       
   789 +    ldd    [%o1 + 0x50], %f38
       
   790 +    ldd    [%o1 + 0x58], %f40
       
   791 +    ldd    [%o1 + 0x60], %f42
       
   792 +    ldd    [%o1 + 0x68], %f44
       
   793 +    ldd    [%o1 + 0x70], %f46
       
   794 +    ldd    [%o1 + 0x78], %f48
       
   795 +    subcc    %o2, 1, %o2        ! done yet?
       
   796 +    ldd    [%o1 + 0x80], %f50
       
   797 +    add    %o1, 0x80, %o1
       
   798 +
       
   799 +    faligndata %f18, %f20, %f16
       
   800 +    faligndata %f20, %f22, %f18
       
   801 +    faligndata %f22, %f24, %f20
       
   802 +    faligndata %f24, %f26, %f22
       
   803 +    faligndata %f26, %f28, %f24
       
   804 +    faligndata %f28, %f30, %f26
       
   805 +    faligndata %f30, %f32, %f28
       
   806 +    faligndata %f32, %f34, %f30
       
   807 +    faligndata %f34, %f36, %f32
       
   808 +    faligndata %f36, %f38, %f34
       
   809 +    faligndata %f38, %f40, %f36
       
   810 +    faligndata %f40, %f42, %f38
       
   811 +    faligndata %f42, %f44, %f40
       
   812 +    faligndata %f44, %f46, %f42
       
   813 +    faligndata %f46, %f48, %f44
       
   814 +    faligndata %f48, %f50, %f46
       
   815 +
       
   816 +    .word    0x81b02860        ! SHA512
       
   817 +
       
   818 +    bne,pt    `$bits==64?"%xcc":"%icc"`, .Lhwunaligned_loop
       
   819 +    for    %f50, %f50, %f18    ! %f18=%f50
       
   820 +
       
   821 +    ba    .Lhwfinish
       
   822 +    nop
       
   823 +___
       
   824 +$code.=<<___ if ($SZ==4);         # SHA256
       
   825 +    ld    [%o0 + 0x00], %f0
       
   826 +    ld    [%o0 + 0x04], %f1
       
   827 +    ld    [%o0 + 0x08], %f2
       
   828 +    ld    [%o0 + 0x0c], %f3
       
   829 +    ld    [%o0 + 0x10], %f4
       
   830 +    ld    [%o0 + 0x14], %f5
       
   831 +    andcc    %o1, 0x7, %g0
       
   832 +    ld    [%o0 + 0x18], %f6
       
   833 +    bne,pn    %icc, .Lhwunaligned
       
   834 +     ld    [%o0 + 0x1c], %f7
       
   835 +
       
   836 +.Lhwloop:
       
   837 +    ldd    [%o1 + 0x00], %f8
       
   838 +    ldd    [%o1 + 0x08], %f10
       
   839 +    ldd    [%o1 + 0x10], %f12
       
   840 +    ldd    [%o1 + 0x18], %f14
       
   841 +    ldd    [%o1 + 0x20], %f16
       
   842 +    ldd    [%o1 + 0x28], %f18
       
   843 +    ldd    [%o1 + 0x30], %f20
       
   844 +    subcc    %o2, 1, %o2        ! done yet?
       
   845 +    ldd    [%o1 + 0x38], %f22
       
   846 +    add    %o1, 0x40, %o1
       
   847 +
       
   848 +    .word    0x81b02840        ! SHA256
       
   849 +
       
   850 +    bne,pt    `$bits==64?"%xcc":"%icc"`, .Lhwloop
       
   851 +    nop
       
   852 +
       
   853 +.Lhwfinish:
       
   854 +    st    %f0, [%o0 + 0x00]    ! store context
       
   855 +    st    %f1, [%o0 + 0x04]
       
   856 +    st    %f2, [%o0 + 0x08]
       
   857 +    st    %f3, [%o0 + 0x0c]
       
   858 +    st    %f4, [%o0 + 0x10]
       
   859 +    st    %f5, [%o0 + 0x14]
       
   860 +    st    %f6, [%o0 + 0x18]
       
   861 +    retl
       
   862 +     st    %f7, [%o0 + 0x1c]
       
   863 +
       
   864 +.align    8
       
   865 +.Lhwunaligned:
       
   866 +    alignaddr %o1, %g0, %o1
       
   867 +
       
   868 +    ldd    [%o1 + 0x00], %f10
       
   869 +.Lhwunaligned_loop:
       
   870 +    ldd    [%o1 + 0x08], %f12
       
   871 +    ldd    [%o1 + 0x10], %f14
       
   872 +    ldd    [%o1 + 0x18], %f16
       
   873 +    ldd    [%o1 + 0x20], %f18
       
   874 +    ldd    [%o1 + 0x28], %f20
       
   875 +    ldd    [%o1 + 0x30], %f22
       
   876 +    ldd    [%o1 + 0x38], %f24
       
   877 +    subcc    %o2, 1, %o2        ! done yet?
       
   878 +    ldd    [%o1 + 0x40], %f26
       
   879 +    add    %o1, 0x40, %o1
       
   880 +
       
   881 +    faligndata %f10, %f12, %f8
       
   882 +    faligndata %f12, %f14, %f10
       
   883 +    faligndata %f14, %f16, %f12
       
   884 +    faligndata %f16, %f18, %f14
       
   885 +    faligndata %f18, %f20, %f16
       
   886 +    faligndata %f20, %f22, %f18
       
   887 +    faligndata %f22, %f24, %f20
       
   888 +    faligndata %f24, %f26, %f22
       
   889 +
       
   890 +    .word    0x81b02840        ! SHA256
       
   891 +
       
   892 +    bne,pt    `$bits==64?"%xcc":"%icc"`, .Lhwunaligned_loop
       
   893 +    for    %f26, %f26, %f10    ! %f10=%f26
       
   894 +
       
   895 +    ba    .Lhwfinish
       
   896 +    nop
       
   897 +___
       
   898 +$code.=<<___;
       
   899 +.align    16
       
   900 +.Lsoftware:
       
   901  	save	%sp,`-$frame-$locals`,%sp
       
   902  	and	$inp,`$align-1`,$tmp31
       
   903  	sllx	$len,`log(16*$SZ)/log(2)`,$len
       
   904 @@ -589,6 +787,62 @@
       
   905  .align	4
       
   906  ___
       
   907  
       
   908 -$code =~ s/\`([^\`]*)\`/eval $1/gem;
       
   909 -print $code;
       
   910 +# Purpose of these subroutines is to explicitly encode VIS instructions,
       
   911 +# so that one can compile the module without having to specify VIS
       
   912 +# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
       
   913 +# Idea is to reserve for option to produce "universal" binary and let
       
   914 +# programmer detect if current CPU is VIS capable at run-time.
       
   915 +sub unvis {
       
   916 +my ($mnemonic,$rs1,$rs2,$rd)=@_;
       
   917 +my $ref,$opf;
       
   918 +my %visopf = (    "faligndata"    => 0x048,
       
   919 +        "for"        => 0x07c    );
       
   920 +
       
   921 +    $ref = "$mnemonic\t$rs1,$rs2,$rd";
       
   922 +
       
   923 +    if ($opf=$visopf{$mnemonic}) {
       
   924 +    foreach ($rs1,$rs2,$rd) {
       
   925 +        return $ref if (!/%f([0-9]{1,2})/);
       
   926 +        $_=$1;
       
   927 +        if ($1>=32) {
       
   928 +        return $ref if ($1&1);
       
   929 +        # re-encode for upper double register addressing
       
   930 +        $_=($1|$1>>5)&31;
       
   931 +        }
       
   932 +    }
       
   933 +
       
   934 +    return    sprintf ".word\t0x%08x !%s",
       
   935 +            0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2,
       
   936 +            $ref;
       
   937 +    } else {
       
   938 +    return $ref;
       
   939 +    }
       
   940 +}
       
   941 +sub unalignaddr {
       
   942 +my ($mnemonic,$rs1,$rs2,$rd)=@_;
       
   943 +my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 );
       
   944 +my $ref="$mnemonic\t$rs1,$rs2,$rd";
       
   945 +
       
   946 +    foreach ($rs1,$rs2,$rd) {
       
   947 +    if (/%([goli])([0-7])/)    { $_=$bias{$1}+$2; }
       
   948 +    else            { return $ref; }
       
   949 +    }
       
   950 +    return  sprintf ".word\t0x%08x !%s",
       
   951 +            0x81b00300|$rd<<25|$rs1<<14|$rs2,
       
   952 +            $ref;
       
   953 +}
       
   954 +
       
   955 +foreach (split("\n",$code)) {
       
   956 +    s/\`([^\`]*)\`/eval $1/ge;
       
   957 +
       
   958 +    s/\b(f[^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/
       
   959 +        &unvis($1,$2,$3,$4)
       
   960 +     /ge;
       
   961 +    s/\b(alignaddr)\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/
       
   962 +        &unalignaddr($1,$2,$3,$4)
       
   963 +     /ge;
       
   964 +
       
   965 +    print $_,"\n";
       
   966 +}
       
   967 +
       
   968  close STDOUT;
       
   969 Index: crypto/des/Makefile
       
   970 ===================================================================
       
   971 diff -ru openssl-1.0.1e/crypto/des/Makefile.orig openssl-1.0.1e/crypto/des/Makefile
       
   972 --- a/crypto/des/Makefile
       
   973 +++ b/crypto/des/Makefile
       
   974 @@ -61,6 +61,8 @@ des: des.o cbc3_enc.o lib
       
   975  
       
   976  des_enc-sparc.S:	asm/des_enc.m4
       
   977  	m4 -B 8192 asm/des_enc.m4 > des_enc-sparc.S
       
   978 +dest4-sparcv9.s:	asm/dest4-sparcv9.pl
       
   979 +	$(PERL) asm/dest4-sparcv9.pl $(CFLAGS) > $@
       
   980  
       
   981  des-586.s:	asm/des-586.pl ../perlasm/x86asm.pl ../perlasm/cbc.pl
       
   982  	$(PERL) asm/des-586.pl $(PERLASM_SCHEME) $(CFLAGS) > $@
       
   983 Index: crypto/evp/e_des.c
       
   984 ===================================================================
       
   985 diff -ru openssl-1.0.1e/crypto/evp/e_des.c.orig openssl-1.0.1e/crypto/evp/e_des.c
       
   986 --- a/crypto/evp/e_des.c
       
   987 +++ b/crypto/evp/e_des.c
       
   988 @@ -65,6 +65,30 @@
       
   989  # include <openssl/des.h>
       
   990  # include <openssl/rand.h>
       
   991  
       
   992 +typedef struct {
       
   993 +    union { double align; DES_key_schedule ks; } ks;
       
   994 +    union {
       
   995 +        void (*cbc)(const void *,void *,size_t,const void *,void *);
       
   996 +    } stream;
       
   997 +} EVP_DES_KEY;
       
   998 +
       
   999 +#if defined(AES_ASM) && (defined(__sparc) || defined(__sparc__))
       
  1000 +/* ---------^^^ this is not a typo, just a way to detect that
       
  1001 + * assembler support was in general requested...
       
  1002 + */
       
  1003 +#include "sparc_arch.h"
       
  1004 +
       
  1005 +extern unsigned int OPENSSL_sparcv9cap_P[];
       
  1006 +
       
  1007 +#define SPARC_DES_CAPABLE    (OPENSSL_sparcv9cap_P[1] & CFR_DES)
       
  1008 +
       
  1009 +void    des_t4_key_expand(const void *key, DES_key_schedule *ks);
       
  1010 +void    des_t4_cbc_encrypt(const void *inp,void *out,size_t len,
       
  1011 +                DES_key_schedule *ks,unsigned char iv[8]);
       
  1012 +void    des_t4_cbc_decrypt(const void *inp,void *out,size_t len,
       
  1013 +                DES_key_schedule *ks,unsigned char iv[8]);
       
  1014 +#endif
       
  1015 +
       
  1016  static int des_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
       
  1017                          const unsigned char *iv, int enc);
       
  1018  static int des_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr);
       
  1019 @@ -102,6 +126,12 @@ static int des_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
       
  1020  static int des_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
       
  1021                            const unsigned char *in, size_t inl)
       
  1022  {
       
  1023 +    EVP_DES_KEY *dat = (EVP_DES_KEY *)ctx->cipher_data;
       
  1024 +
       
  1025 +    if (dat->stream.cbc) {
       
  1026 +        (*dat->stream.cbc)(in,out,inl,&dat->ks.ks,ctx->iv);
       
  1027 +        return 1;
       
  1028 +    }
       
  1029      while (inl >= EVP_MAXCHUNK) {
       
  1030          DES_ncbc_encrypt(in, out, (long)EVP_MAXCHUNK, ctx->cipher_data,
       
  1031                           (DES_cblock *)ctx->iv, ctx->encrypt);
       
  1032 @@ -179,16 +209,16 @@
       
  1033      return 1;
       
  1034  }
       
  1035  
       
  1036 -BLOCK_CIPHER_defs(des, DES_key_schedule, NID_des, 8, 8, 8, 64,
       
  1037 +BLOCK_CIPHER_defs(des, EVP_DES_KEY, NID_des, 8, 8, 8, 64,
       
  1038                    EVP_CIPH_RAND_KEY, des_init_key, NULL,
       
  1039                    EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des_ctrl)
       
  1040  
       
  1041  
       
  1042 -BLOCK_CIPHER_def_cfb(des, DES_key_schedule, NID_des, 8, 8, 1,
       
  1043 +BLOCK_CIPHER_def_cfb(des, EVP_DES_KEY, NID_des, 8, 8, 1,
       
  1044                   EVP_CIPH_RAND_KEY, des_init_key, NULL,
       
  1045                   EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des_ctrl)
       
  1046  
       
  1047 -BLOCK_CIPHER_def_cfb(des, DES_key_schedule, NID_des, 8, 8, 8,
       
  1048 +BLOCK_CIPHER_def_cfb(des, EVP_DES_KEY, NID_des, 8, 8, 8,
       
  1049                       EVP_CIPH_RAND_KEY, des_init_key, NULL,
       
  1050                       EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des_ctrl)
       
  1051  
       
  1052 @@ -196,8 +226,23 @@ static int des_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
       
  1053                          const unsigned char *iv, int enc)
       
  1054  {
       
  1055      DES_cblock *deskey = (DES_cblock *)key;
       
  1056 +    EVP_DES_KEY *dat = (EVP_DES_KEY *)ctx->cipher_data;
       
  1057 +
       
  1058 +    dat->stream.cbc = NULL;
       
  1059 +#if defined(SPARC_DES_CAPABLE)
       
  1060 +    if (SPARC_DES_CAPABLE) {
       
  1061 +        int mode = ctx->cipher->flags & EVP_CIPH_MODE;
       
  1062 +
       
  1063 +        if (mode == EVP_CIPH_CBC_MODE) {
       
  1064 +            des_t4_key_expand(key,&dat->ks.ks);
       
  1065 +            dat->stream.cbc = enc ? des_t4_cbc_encrypt :
       
  1066 +                        des_t4_cbc_decrypt;
       
  1067 +            return 1;
       
  1068 +        }
       
  1069 +    }
       
  1070 +#endif
       
  1071  # ifdef EVP_CHECK_DES_KEY
       
  1072 -    if (DES_set_key_checked(deskey, ctx->cipher_data) != 0)
       
  1073 +    if (DES_set_key_checked(deskey, dat->ks.ks) != 0)
       
  1074          return 0;
       
  1075  # else
       
  1076      DES_set_key_unchecked(deskey, ctx->cipher_data);
       
  1077 Index: crypto/evp/e_des3.c
       
  1078 ===================================================================
       
  1079 diff -ru openssl-1.0.1e/crypto/evp/e_des3.c.orig openssl-1.0.1e/crypto/evp/e_des3.c
       
  1080 --- a/crypto/evp/e_des3.c
       
  1081 +++ b/crypto/evp/e_des3.c
       
  1082 @@ -65,6 +65,32 @@
       
  1083  # include <openssl/des.h>
       
  1084  # include <openssl/rand.h>
       
  1085  
       
  1086 +typedef struct {
       
  1087 +    union { double align; DES_key_schedule ks[3]; } ks;
       
  1088 +    union {
       
  1089 +        void (*cbc)(const void *,void *,size_t,const void *,void *);
       
  1090 +    } stream;
       
  1091 +} DES_EDE_KEY;
       
  1092 +#define ks1 ks.ks[0]
       
  1093 +#define ks2 ks.ks[1]
       
  1094 +#define ks3 ks.ks[2]
       
  1095 +
       
  1096 +#if defined(AES_ASM) && (defined(__sparc) || defined(__sparc__))
       
  1097 +/* ---------^^^ this is not a typo, just a way to detect that
       
  1098 + * assembler support was in general requested... */
       
  1099 +#include "sparc_arch.h"
       
  1100 +
       
  1101 +extern unsigned int OPENSSL_sparcv9cap_P[];
       
  1102 +
       
  1103 +#define SPARC_DES_CAPABLE    (OPENSSL_sparcv9cap_P[1] & CFR_DES)
       
  1104 +
       
  1105 +void    des_t4_key_expand(const void *key, DES_key_schedule *ks);
       
  1106 +void    des_t4_ede3_cbc_encrypt(const void *inp,void *out,size_t len,
       
  1107 +                DES_key_schedule *ks,unsigned char iv[8]);
       
  1108 +void    des_t4_ede3_cbc_decrypt(const void *inp,void *out,size_t len,
       
  1109 +                DES_key_schedule *ks,unsigned char iv[8]);
       
  1110 +#endif
       
  1111 +
       
  1112  # ifndef OPENSSL_FIPS
       
  1113  
       
  1114  static int des_ede_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
       
  1115 @@ -75,12 +100,6 @@ static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
       
  1116 
       
  1117  static int des3_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr);
       
  1118 
       
  1119 -typedef struct {
       
  1120 -    DES_key_schedule ks1;       /* key schedule */
       
  1121 -    DES_key_schedule ks2;       /* key schedule (for ede) */
       
  1122 -    DES_key_schedule ks3;       /* key schedule (for ede3) */
       
  1123 -} DES_EDE_KEY;
       
  1124 -
       
  1125  #  define data(ctx) ((DES_EDE_KEY *)(ctx)->cipher_data)
       
  1126 
       
  1127  /*
       
  1128 @@ -123,6 +117,7 @@ static int des_ede_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
       
  1129  static int des_ede_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
       
  1130                                const unsigned char *in, size_t inl)
       
  1131  {
       
  1132 +    DES_EDE_KEY *dat = data(ctx);
       
  1133  #  ifdef KSSL_DEBUG
       
  1134      {
       
  1135          int i;
       
  1136 @@ -134,11 +155,15 @@
       
  1137          fprintf(stderr, "\n");
       
  1138      }
       
  1139  #  endif                        /* KSSL_DEBUG */
       
  1140 +    if (dat->stream.cbc) {
       
  1141 +        (*dat->stream.cbc)(in,out,inl,&dat->ks,ctx->iv);
       
  1142 +        return 1;
       
  1143 +    }
       
  1144 +
       
  1145      while (inl >= EVP_MAXCHUNK) {
       
  1146          DES_ede3_cbc_encrypt(in, out, (long)EVP_MAXCHUNK,
       
  1147 -                             &data(ctx)->ks1, &data(ctx)->ks2,
       
  1148 -                             &data(ctx)->ks3, (DES_cblock *)ctx->iv,
       
  1149 -                             ctx->encrypt);
       
  1150 +                             &dat->ks1, &dat->ks2, &dat->ks3,
       
  1151 +                             (DES_cblock *)ctx->iv, ctx->encrypt);
       
  1152          inl -= EVP_MAXCHUNK;
       
  1153          in += EVP_MAXCHUNK;
       
  1154          out += EVP_MAXCHUNK;
       
  1155 @@ -145,9 +170,8 @@ static int des_ede_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
       
  1156      }
       
  1157      if (inl)
       
  1158          DES_ede3_cbc_encrypt(in, out, (long)inl,
       
  1159 -                             &data(ctx)->ks1, &data(ctx)->ks2,
       
  1160 -                             &data(ctx)->ks3, (DES_cblock *)ctx->iv,
       
  1161 -                             ctx->encrypt);
       
  1162 +                             &dat->ks1, &dat->ks2, &dat->ks3,
       
  1163 +                             (DES_cblock *)ctx->iv, ctx->encrypt);
       
  1164      return 1;
       
  1165  }
       
  1166 
       
  1167 @@ -215,39 +239,58 @@ static int des_ede3_cfb8_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
       
  1168  }
       
  1169 
       
  1170  BLOCK_CIPHER_defs(des_ede, DES_EDE_KEY, NID_des_ede, 8, 16, 8, 64,
       
  1171 -                  EVP_CIPH_RAND_KEY, des_ede_init_key, NULL,
       
  1172 -                  EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des3_ctrl)
       
  1173 +                  EVP_CIPH_RAND_KEY|EVP_CIPH_FLAG_DEFAULT_ASN1,
       
  1174 +                  des_ede_init_key, NULL, NULL, NULL,
       
  1175 +                 des3_ctrl)
       
  1176  #  define des_ede3_cfb64_cipher des_ede_cfb64_cipher
       
  1177  #  define des_ede3_ofb_cipher des_ede_ofb_cipher
       
  1178  #  define des_ede3_cbc_cipher des_ede_cbc_cipher
       
  1179  #  define des_ede3_ecb_cipher des_ede_ecb_cipher
       
  1180      BLOCK_CIPHER_defs(des_ede3, DES_EDE_KEY, NID_des_ede3, 8, 24, 8, 64,
       
  1181 -                  EVP_CIPH_RAND_KEY, des_ede3_init_key, NULL,
       
  1182 -                  EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des3_ctrl)
       
  1183 +        EVP_CIPH_RAND_KEY|EVP_CIPH_FLAG_FIPS|EVP_CIPH_FLAG_DEFAULT_ASN1,
       
  1184 +        des_ede3_init_key, NULL, NULL, NULL,
       
  1185 +        des3_ctrl)
       
  1186 
       
  1187      BLOCK_CIPHER_def_cfb(des_ede3, DES_EDE_KEY, NID_des_ede3, 24, 8, 1,
       
  1188 -                     EVP_CIPH_RAND_KEY, des_ede3_init_key, NULL,
       
  1189 -                     EVP_CIPHER_set_asn1_iv,
       
  1190 -                     EVP_CIPHER_get_asn1_iv, des3_ctrl)
       
  1191 +        EVP_CIPH_RAND_KEY|EVP_CIPH_FLAG_FIPS|EVP_CIPH_FLAG_DEFAULT_ASN1,
       
  1192 +        des_ede3_init_key, NULL, NULL, NULL,
       
  1193 +        des3_ctrl)
       
  1194 
       
  1195      BLOCK_CIPHER_def_cfb(des_ede3, DES_EDE_KEY, NID_des_ede3, 24, 8, 8,
       
  1196 -                     EVP_CIPH_RAND_KEY, des_ede3_init_key, NULL,
       
  1197 -                     EVP_CIPHER_set_asn1_iv,
       
  1198 -                     EVP_CIPHER_get_asn1_iv, des3_ctrl)
       
  1199 +        EVP_CIPH_RAND_KEY|EVP_CIPH_FLAG_FIPS|EVP_CIPH_FLAG_DEFAULT_ASN1,
       
  1200 +        des_ede3_init_key, NULL, NULL, NULL,
       
  1201 +        des3_ctrl)
       
  1202 
       
  1203  static int des_ede_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
       
  1204                              const unsigned char *iv, int enc)
       
  1205  {
       
  1206      DES_cblock *deskey = (DES_cblock *)key;
       
  1207 +    DES_EDE_KEY *dat = data(ctx);
       
  1208 +
       
  1209 +    dat->stream.cbc = NULL;
       
  1210 +#if defined(SPARC_DES_CAPABLE)
       
  1211 +    if (SPARC_DES_CAPABLE) {
       
  1212 +        int mode = ctx->cipher->flags & EVP_CIPH_MODE;
       
  1213 +
       
  1214 +        if (mode == EVP_CIPH_CBC_MODE) {
       
  1215 +            des_t4_key_expand(&deskey[0],&dat->ks1);
       
  1216 +            des_t4_key_expand(&deskey[1],&dat->ks2);
       
  1217 +            memcpy(&dat->ks3,&dat->ks1,sizeof(dat->ks1));
       
  1218 +            dat->stream.cbc = enc ? des_t4_ede3_cbc_encrypt :
       
  1219 +                        des_t4_ede3_cbc_decrypt;
       
  1220 +            return 1;
       
  1221 +        }
       
  1222 +    }
       
  1223 +#endif
       
  1224  #  ifdef EVP_CHECK_DES_KEY
       
  1225 -    if (DES_set_key_checked(&deskey[0], &data(ctx)->ks1)
       
  1226 -        ! !DES_set_key_checked(&deskey[1], &data(ctx)->ks2))
       
  1227 +    if (DES_set_key_checked(&deskey[0],&dat->ks1)
       
  1228 +        !! DES_set_key_checked(&deskey[1],&dat->ks2))
       
  1229          return 0;
       
  1230  #  else
       
  1231 -    DES_set_key_unchecked(&deskey[0], &data(ctx)->ks1);
       
  1232 -    DES_set_key_unchecked(&deskey[1], &data(ctx)->ks2);
       
  1233 +    DES_set_key_unchecked(&deskey[0],&dat->ks1);
       
  1234 +    DES_set_key_unchecked(&deskey[1],&dat->ks2);
       
  1235  #  endif
       
  1236 -    memcpy(&data(ctx)->ks3, &data(ctx)->ks1, sizeof(data(ctx)->ks1));
       
  1237 +    memcpy(&dat->ks3,&dat->ks1, sizeof(dat->ks1));
       
  1238      return 1;
       
  1239  }
       
  1240 
       
  1241 @@ -255,6 +298,8 @@ static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
       
  1242                               const unsigned char *iv, int enc)
       
  1243  {
       
  1244      DES_cblock *deskey = (DES_cblock *)key;
       
  1245 +    DES_EDE_KEY *dat = data(ctx);
       
  1246 +
       
  1247  #  ifdef KSSL_DEBUG
       
  1248      {
       
  1249          int i;
       
  1250 @@ -272,15 +317,30 @@ static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
       
  1251      }
       
  1252  #  endif                        /* KSSL_DEBUG */
       
  1253 
       
  1254 +    dat->stream.cbc = NULL;
       
  1255 +#if defined(SPARC_DES_CAPABLE)
       
  1256 +    if (SPARC_DES_CAPABLE) {
       
  1257 +        int mode = ctx->cipher->flags & EVP_CIPH_MODE;
       
  1258 +
       
  1259 +        if (mode == EVP_CIPH_CBC_MODE) {
       
  1260 +            des_t4_key_expand(&deskey[0],&dat->ks1);
       
  1261 +            des_t4_key_expand(&deskey[1],&dat->ks2);
       
  1262 +            des_t4_key_expand(&deskey[2],&dat->ks3);
       
  1263 +            dat->stream.cbc = enc ? des_t4_ede3_cbc_encrypt :
       
  1264 +                        des_t4_ede3_cbc_decrypt;
       
  1265 +            return 1;
       
  1266 +        }
       
  1267 +    }
       
  1268 +#endif
       
  1269  #  ifdef EVP_CHECK_DES_KEY
       
  1270 -    if (DES_set_key_checked(&deskey[0], &data(ctx)->ks1)
       
  1271 -        || DES_set_key_checked(&deskey[1], &data(ctx)->ks2)
       
  1272 -        || DES_set_key_checked(&deskey[2], &data(ctx)->ks3))
       
  1273 +    if (DES_set_key_checked(&deskey[0],&dat->ks1)
       
  1274 +        || DES_set_key_checked(&deskey[1],&dat->ks2)
       
  1275 +        || DES_set_key_checked(&deskey[2],&dat->ks3))
       
  1276          return 0;
       
  1277  #  else
       
  1278 -    DES_set_key_unchecked(&deskey[0], &data(ctx)->ks1);
       
  1279 -    DES_set_key_unchecked(&deskey[1], &data(ctx)->ks2);
       
  1280 -    DES_set_key_unchecked(&deskey[2], &data(ctx)->ks3);
       
  1281 +    DES_set_key_unchecked(&deskey[0],&dat->ks1);
       
  1282 +    DES_set_key_unchecked(&deskey[1],&dat->ks2);
       
  1283 +    DES_set_key_unchecked(&deskey[2],&dat->ks3);
       
  1284  #  endif
       
  1285      return 1;
       
  1286  }
       
  1287 Index: openssl/crypto/bn/Makefile
       
  1288 ===================================================================
       
  1289 diff -ru openssl-1.0.1e/crypto/bn/Makefile openssl-1.0.1e/crypto/bn/Makefile.new
       
  1290 --- openssl-1.0.1e/crypto/bn/Makefile 2011-05-24 17:02:24.000000000 -0700
       
  1291 +++ openssl-1.0.1e/crypto/bn/Makefile 2011-07-27 10:48:17.817470000 -0700
       
  1292 @@ -77,6 +77,12 @@
       
  1293  	$(PERL) asm/sparcv9a-mont.pl $(CFLAGS) > $@
       
  1294  sparcv9-mont.s:		asm/sparcv9-mont.pl
       
  1295  	$(PERL) asm/sparcv9-mont.pl $(CFLAGS) > $@
       
  1296 +vis3-mont.s:		asm/vis3-mont.pl
       
  1297 +	$(PERL) asm/vis3-mont.pl $(CFLAGS) > $@
       
  1298 +sparct4-mont.S:	asm/sparct4-mont.pl
       
  1299 +	$(PERL) asm/sparct4-mont.pl $(CFLAGS) > $@
       
  1300 +sparcv9-gf2m.S:	asm/sparcv9-gf2m.pl
       
  1301 +	$(PERL) asm/sparcv9-gf2m.pl $(CFLAGS) > $@
       
  1302  
       
  1303  bn-mips3.o:	asm/mips3.s
       
  1304  	@if [ "$(CC)" = "gcc" ]; then \
       
  1305 Index: openssl/crypto/bn/bn_exp.c
       
  1306 ===================================================================
       
  1307 diff -ru openssl-1.0.1e/crypto/bn/bn_exp.c openssl-1.0.1e/crypto/bn/bn_exp.c.new
       
  1308 --- bn_exp.c    2011/10/29 19:25:13    1.38
       
  1309 +++ bn_exp.c    2012/11/17 10:34:11    1.39
       
  1310 @@ -122,8 +122,15 @@
       
  1311  # ifndef alloca
       
  1312  #  define alloca(s) __builtin_alloca((s))
       
  1313  # endif
       
  1314 +#else
       
  1315 +#include <alloca.h>
       
  1316  #endif
       
  1317  
       
  1318 +#if defined(OPENSSL_BN_ASM_MONT) && defined(__sparc)
       
  1319 +# include "sparc_arch.h"
       
  1320 +extern unsigned int OPENSSL_sparcv9cap_P[];
       
  1321 +#endif
       
  1322 +
       
  1323  /* maximum precomputation table size for *variable* sliding windows */
       
  1324  #define TABLE_SIZE      32
       
  1325  
       
  1326 @@ -464,8 +471,16 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
       
  1327      wstart = bits - 1;          /* The top bit of the window */
       
  1328      wend = 0;                   /* The bottom bit of the window */
       
  1329  
       
  1330 +#if 1    /* by Shay Gueron's suggestion */
       
  1331 +    j = mont->N.top;    /* borrow j */
       
  1332 +    if (bn_wexpand(r,j) == NULL) goto err;
       
  1333 +    r->d[0] = (0-m->d[0])&BN_MASK2;        /* 2^(top*BN_BITS2) - m */
       
  1334 +    for(i=1;i<j;i++) r->d[i] = (~m->d[i])&BN_MASK2;
       
  1335 +    r->top = j;
       
  1336 +#else
       
  1337      if (!BN_to_montgomery(r, BN_value_one(), mont, ctx))
       
  1338          goto err;
       
  1339 +#endif
       
  1340      for (;;) {
       
  1341          if (BN_is_bit_set(p, wstart) == 0) {
       
  1342              if (!start) {
       
  1343 @@ -515,6 +530,17 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
       
  1344          if (wstart < 0)
       
  1345              break;
       
  1346      }
       
  1347 +#if defined(OPENSSL_BN_ASM_MONT) && (defined(__sparc__) || defined(__sparc))
       
  1348 +    if (OPENSSL_sparcv9cap_P[0] & (SPARCV9_VIS3|SPARCV9_PREFER_FPU)) {
       
  1349 +        j = mont->N.top;    /* borrow j */
       
  1350 +        val[0]->d[0] = 1;    /* borrow val[0] */
       
  1351 +        for (i=1;i<j;i++)
       
  1352 +            val[0]->d[i] = 0;
       
  1353 +        val[0]->top = j;
       
  1354 +        if (!BN_mod_mul_montgomery(rr, r, val[0], mont, ctx))
       
  1355 +            goto err;
       
  1356 +    } else
       
  1357 +#endif
       
  1358      if (!BN_from_montgomery(rr, r, mont, ctx))
       
  1359          goto err;
       
  1360      ret = 1;
       
  1361 @@ -526,6 +552,26 @@ err:
       
  1362      return (ret);
       
  1363  }
       
  1364 
       
  1365 +#if defined(OPENSSL_BN_ASM_MONT) && (defined(__sparc__) || defined(__sparc))
       
  1366 +static BN_ULONG bn_get_bits(const BIGNUM *a, int bitpos) {
       
  1367 +    BN_ULONG ret = 0;
       
  1368 +    int wordpos;
       
  1369 +
       
  1370 +    wordpos = bitpos / BN_BITS2;
       
  1371 +    bitpos %= BN_BITS2;
       
  1372 +    if (wordpos>=0 && wordpos < a->top) {
       
  1373 +        ret = a->d[wordpos]&BN_MASK2;
       
  1374 +        if (bitpos) {
       
  1375 +            ret >>= bitpos;
       
  1376 +            if (++wordpos < a->top)
       
  1377 +                ret |= a->d[wordpos]<<(BN_BITS2-bitpos);
       
  1378 +        }
       
  1379 +    }
       
  1380 +
       
  1381 +    return ret & BN_MASK2;
       
  1382 +}
       
  1383 +#endif
       
  1384 +
       
  1385  /*
       
  1386   * BN_mod_exp_mont_consttime() stores the precomputed powers in a specific
       
  1387   * layout so that accessing any of these table values shows the same access
       
  1388 @@ -594,6 +640,9 @@
       
  1389      int powerbufLen = 0;
       
  1390      unsigned char *powerbuf = NULL;
       
  1391      BIGNUM tmp, am;
       
  1392 +#if defined(OPENSSL_BN_ASM_MONT) && defined(__sparc)
       
  1393 +    unsigned int t4=0;
       
  1394 +#endif
       
  1395 
       
  1396      bn_check_top(a);
       
  1397      bn_check_top(p);
       
  1398 @@ -628,10 +677,18 @@
       
  1399 
       
  1400      /* Get the window size to use with size of p. */
       
  1401      window = BN_window_bits_for_ctime_exponent_size(bits);
       
  1402 +#if defined(OPENSSL_BN_ASM_MONT) && defined(__sparc)
       
  1403 +    if (window>=5 && (top&15)==0 && top<=64 &&
       
  1404 +        (OPENSSL_sparcv9cap_P[1]&(CFR_MONTMUL|CFR_MONTSQR))==
       
  1405 +        (CFR_MONTMUL|CFR_MONTSQR) && (t4=OPENSSL_sparcv9cap_P[0]))
       
  1406 +            window=5;
       
  1407 +    else
       
  1408 +#endif
       
  1409  #if defined(OPENSSL_BN_ASM_MONT5)
       
  1410      if (window == 6 && bits <= 1024)
       
  1411          window = 5;             /* ~5% improvement of 2048-bit RSA sign */
       
  1412  #endif
       
  1413 +    (void) 0;
       
  1414 
       
  1415      /*
       
  1416       * Allocate a buffer large enough to hold all of the pre-computed powers
       
  1417 @@ -670,14 +727,14 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
       
  1418      tmp.flags = am.flags = BN_FLG_STATIC_DATA;
       
  1419 
       
  1420      /* prepare a^0 in Montgomery domain */
       
  1421 -#if 1
       
  1422 -    if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx))
       
  1423 -        goto err;
       
  1424 -#else
       
  1425 +#if 1    /* by Shay Gueron's suggestion */
       
  1426      tmp.d[0] = (0 - m->d[0]) & BN_MASK2; /* 2^(top*BN_BITS2) - m */
       
  1427      for (i = 1; i < top; i++)
       
  1428          tmp.d[i] = (~m->d[i]) & BN_MASK2;
       
  1429      tmp.top = top;
       
  1430 +#else
       
  1431 +    if (!BN_to_montgomery(&tmp,BN_value_one(),mont,ctx))
       
  1432 +        goto err;
       
  1433  #endif
       
  1434 
       
  1435      /* prepare a^1 in Montgomery domain */
       
  1436 @@ -689,6 +746,122 @@
       
  1437      } else if (!BN_to_montgomery(&am, a, mont, ctx))
       
  1438          goto err;
       
  1439 
       
  1440 +#if defined(OPENSSL_BN_ASM_MONT) && defined(__sparc)
       
  1441 +    if (t4) {
       
  1442 +        typedef int (*bn_pwr5_mont_f)(BN_ULONG *tp,const BN_ULONG *np,
       
  1443 +            const BN_ULONG *n0,const void *table,int power,int bits);
       
  1444 +        int bn_pwr5_mont_t4_8(BN_ULONG *tp,const BN_ULONG *np,
       
  1445 +            const BN_ULONG *n0,const void *table,int power,int bits);
       
  1446 +        int bn_pwr5_mont_t4_16(BN_ULONG *tp,const BN_ULONG *np,
       
  1447 +            const BN_ULONG *n0,const void *table,int power,int bits);
       
  1448 +        int bn_pwr5_mont_t4_24(BN_ULONG *tp,const BN_ULONG *np,
       
  1449 +            const BN_ULONG *n0,const void *table,int power,int bits);
       
  1450 +        int bn_pwr5_mont_t4_32(BN_ULONG *tp,const BN_ULONG *np,
       
  1451 +            const BN_ULONG *n0,const void *table,int power,int bits);
       
  1452 +        static const bn_pwr5_mont_f pwr5_funcs[4] = {
       
  1453 +            bn_pwr5_mont_t4_8,    bn_pwr5_mont_t4_16,
       
  1454 +            bn_pwr5_mont_t4_24,    bn_pwr5_mont_t4_32 };
       
  1455 +        bn_pwr5_mont_f pwr5_worker = pwr5_funcs[top/16-1];
       
  1456 +
       
  1457 +        typedef int (*bn_mul_mont_f)(BN_ULONG *rp,const BN_ULONG *ap,
       
  1458 +            const void *bp,const BN_ULONG *np,const BN_ULONG *n0);
       
  1459 +        int bn_mul_mont_t4_8(BN_ULONG *rp,const BN_ULONG *ap,
       
  1460 +            const void *bp,const BN_ULONG *np,const BN_ULONG *n0);
       
  1461 +        int bn_mul_mont_t4_16(BN_ULONG *rp,const BN_ULONG *ap,
       
  1462 +            const void *bp,const BN_ULONG *np,const BN_ULONG *n0);
       
  1463 +        int bn_mul_mont_t4_24(BN_ULONG *rp,const BN_ULONG *ap,
       
  1464 +            const void *bp,const BN_ULONG *np,const BN_ULONG *n0);
       
  1465 +        int bn_mul_mont_t4_32(BN_ULONG *rp,const BN_ULONG *ap,
       
  1466 +            const void *bp,const BN_ULONG *np,const BN_ULONG *n0);
       
  1467 +        static const bn_mul_mont_f mul_funcs[4] = {
       
  1468 +            bn_mul_mont_t4_8,    bn_mul_mont_t4_16,
       
  1469 +            bn_mul_mont_t4_24,    bn_mul_mont_t4_32 };
       
  1470 +        bn_mul_mont_f mul_worker = mul_funcs[top/16-1];
       
  1471 +
       
  1472 +        void bn_mul_mont_vis3(BN_ULONG *rp,const BN_ULONG *ap,
       
  1473 +            const void *bp,const BN_ULONG *np,
       
  1474 +            const BN_ULONG *n0,int num);
       
  1475 +        void bn_mul_mont_t4(BN_ULONG *rp,const BN_ULONG *ap,
       
  1476 +            const void *bp,const BN_ULONG *np,
       
  1477 +            const BN_ULONG *n0,int num);
       
  1478 +        void bn_mul_mont_gather5_t4(BN_ULONG *rp,const BN_ULONG *ap,
       
  1479 +            const void *table,const BN_ULONG *np,
       
  1480 +            const BN_ULONG *n0,int num,int power);
       
  1481 +        void bn_flip_n_scatter5_t4(const BN_ULONG *inp,size_t num,
       
  1482 +            void *table,size_t power);
       
  1483 +        void bn_gather5_t4(BN_ULONG *out,size_t num,
       
  1484 +            void *table,size_t power);
       
  1485 +        void bn_flip_t4(BN_ULONG *dst,BN_ULONG *src,size_t num);
       
  1486 +
       
  1487 +        BN_ULONG *np=mont->N.d, *n0=mont->n0;
       
  1488 +        int stride = 5*(6-(top/16-1));    /* multiple of 5, but less than 32 */
       
  1489 +
       
  1490 +        /*
       
  1491 +         * BN_to_montgomery can contaminate words above .top
       
  1492 +         * [in BN_DEBUG[_DEBUG] build]...
       
  1493 +         */
       
  1494 +        for (i=am.top; i<top; i++)    am.d[i]=0;
       
  1495 +        for (i=tmp.top; i<top; i++)    tmp.d[i]=0;
       
  1496 +
       
  1497 +        bn_flip_n_scatter5_t4(tmp.d,top,powerbuf,0);
       
  1498 +        bn_flip_n_scatter5_t4(am.d,top,powerbuf,1);
       
  1499 +        if (!(*mul_worker)(tmp.d,am.d,am.d,np,n0) &&
       
  1500 +        !(*mul_worker)(tmp.d,am.d,am.d,np,n0))
       
  1501 +        bn_mul_mont_vis3(tmp.d,am.d,am.d,np,n0,top);
       
  1502 +        bn_flip_n_scatter5_t4(tmp.d,top,powerbuf,2);
       
  1503 +
       
  1504 +        for (i=3; i<32; i++) {
       
  1505 +        /* Calculate a^i = a^(i-1) * a */
       
  1506 +        if (!(*mul_worker)(tmp.d,tmp.d,am.d,np,n0) &&
       
  1507 +            !(*mul_worker)(tmp.d,tmp.d,am.d,np,n0))
       
  1508 +            bn_mul_mont_vis3(tmp.d,tmp.d,am.d,np,n0,top);
       
  1509 +        bn_flip_n_scatter5_t4(tmp.d,top,powerbuf,i);
       
  1510 +        }
       
  1511 +
       
  1512 +        /* switch to 64-bit domain */
       
  1513 +        np = alloca(top*sizeof(BN_ULONG));
       
  1514 +        top /= 2;
       
  1515 +        bn_flip_t4(np,mont->N.d,top);
       
  1516 +
       
  1517 +        bits--;
       
  1518 +        for (wvalue=0, i=bits%5; i>=0; i--,bits--)
       
  1519 +        wvalue = (wvalue<<1)+BN_is_bit_set(p,bits);
       
  1520 +        bn_gather5_t4(tmp.d,top,powerbuf,wvalue);
       
  1521 +
       
  1522 +        /* Scan the exponent one window at a time starting from the most
       
  1523 +         * significant bits.
       
  1524 +         */
       
  1525 +        while (bits >= 0) {
       
  1526 +        if (bits < stride)
       
  1527 +            stride = bits+1;
       
  1528 +        bits -= stride;
       
  1529 +        wvalue = (bn_get_bits(p,bits+1));
       
  1530 +
       
  1531 +        if ((*pwr5_worker)(tmp.d,np,n0,powerbuf,wvalue,stride))
       
  1532 +            continue;
       
  1533 +        /* retry once and fall back */
       
  1534 +        if ((*pwr5_worker)(tmp.d,np,n0,powerbuf,wvalue,stride))
       
  1535 +            continue;
       
  1536 +
       
  1537 +        bits += stride-5;
       
  1538 +        wvalue >>= stride-5;
       
  1539 +        wvalue &= 31;
       
  1540 +        bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top);
       
  1541 +        bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top);
       
  1542 +        bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top);
       
  1543 +        bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top);
       
  1544 +        bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top);
       
  1545 +        bn_mul_mont_gather5_t4(tmp.d,tmp.d,powerbuf,np,n0,top,wvalue);
       
  1546 +        }
       
  1547 +
       
  1548 +        bn_flip_t4(tmp.d,tmp.d,top);
       
  1549 +        top *= 2;
       
  1550 +        /* back to 32-bit domain */
       
  1551 +        tmp.top=top;
       
  1552 +        bn_correct_top(&tmp);
       
  1553 +        OPENSSL_cleanse(np,top*sizeof(BN_ULONG));
       
  1554 +    } else
       
  1555 +#endif
       
  1556  #if defined(OPENSSL_BN_ASM_MONT5)
       
  1557      if (window == 5 && top > 1) {
       
  1558          /*
       
  1559 @@ -844,6 +1017,15 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
       
  1560      }
       
  1561 
       
  1562      /* Convert the final result from montgomery to standard format */
       
  1563 +#if defined(OPENSSL_BN_ASM_MONT) && (defined(__sparc__) || defined(__sparc))
       
  1564 +    if (OPENSSL_sparcv9cap_P[0] & (SPARCV9_VIS3|SPARCV9_PREFER_FPU)) {
       
  1565 +        am.d[0] = 1;    /* borrow am */
       
  1566 +        for (i = 1; i < top; i++)
       
  1567 +            am.d[i] = 0;
       
  1568 +        if (!BN_mod_mul_montgomery(rr,&tmp,&am,mont,ctx))
       
  1569 +            goto err;
       
  1570 +    } else
       
  1571 +#endif
       
  1572      if (!BN_from_montgomery(rr, &tmp, mont, ctx))
       
  1573          goto err;
       
  1574      ret = 1;
       
  1575 Index: openssl/apps/speed.c
       
  1576 ===================================================================
       
  1577 diff -ru openssl-1.0.1e/apps/spped.c openssl-1.0.1e/apps/speed.c
       
  1578 --- openssl-1.0.1e/apps/speed.c 2011-05-24 17:02:24.000000000 -0700
       
  1579 +++ openssl-1.0.1e/apps/spped.c 2011-07-27 10:48:17.817470000 -0700
       
  1580 @@ -1586,8 +1586,7 @@
       
  1581              print_message(names[D_MD5], c[D_MD5][j], lengths[j]);
       
  1582              Time_F(START);
       
  1583              for (count = 0, run = 1; COND(c[D_MD5][j]); count++)
       
  1584 -                EVP_Digest(&(buf[0]), (unsigned long)lengths[j], &(md5[0]),
       
  1585 -                           NULL, EVP_get_digestbyname("md5"), NULL);
       
  1586 +                MD5(buf, lengths[j], md5);
       
  1587              d = Time_F(STOP);
       
  1588              print_result(D_MD5, j, count, d);
       
  1589          }
       
  1590 @@ -1622,8 +1621,7 @@
       
  1591              print_message(names[D_SHA1], c[D_SHA1][j], lengths[j]);
       
  1592              Time_F(START);
       
  1593              for (count = 0, run = 1; COND(c[D_SHA1][j]); count++)
       
  1594 -                EVP_Digest(buf, (unsigned long)lengths[j], &(sha[0]), NULL,
       
  1595 -                           EVP_sha1(), NULL);
       
  1596 +                SHA1(buf, lengths[j], sha);
       
  1597              d = Time_F(STOP);
       
  1598              print_result(D_SHA1, j, count, d);
       
  1599          }
       
  1600 Index: openssl/crypto/aes/Makefile
       
  1601 ===================================================================
       
  1602 --- Makefile	Thu May  2 13:42:37 2013
       
  1603 +++ Makefile.orig	Thu May  2 13:41:51 2013
       
  1604 @@ -69,6 +69,9 @@
       
  1605  aes-sparcv9.s: asm/aes-sparcv9.pl
       
  1606  	$(PERL) asm/aes-sparcv9.pl $(CFLAGS) > $@
       
  1607  
       
  1608 +aest4-sparcv9.s: asm/aest4-sparcv9.pl
       
  1609 +	$(PERL) asm/aest4-sparcv9.pl $(CFLAGS) > $@
       
  1610 +
       
  1611  aes-ppc.s:	asm/aes-ppc.pl
       
  1612  	$(PERL) asm/aes-ppc.pl $(PERLASM_SCHEME) $@
       
  1613  
       
  1614 Index: openssl/crypto/evp/e_aes.c
       
  1615 ===================================================================
       
  1616 --- e_aes.c    Mon Feb 11 07:26:04 2013
       
  1617 +++ e_aes.c.56    Thu May  2 14:26:35 2013
       
  1618 @@ -56,12 +58,11 @@
       
  1619  # include <assert.h>
       
  1620  # include <openssl/aes.h>
       
  1621  # include "evp_locl.h"
       
  1622 -# ifndef OPENSSL_FIPS
       
  1623  #  include "modes_lcl.h"
       
  1624  #  include <openssl/rand.h>
       
  1625 
       
  1626  typedef struct {
       
  1627 -    AES_KEY ks;
       
  1628 +    union { double align; AES_KEY ks; } ks;
       
  1629      block128_f block;
       
  1630      union {
       
  1631          cbc128_f cbc;
       
  1632 @@ -70,7 +69,7 @@
       
  1633  } EVP_AES_KEY;
       
  1634 
       
  1635  typedef struct {
       
  1636 -    AES_KEY ks;                 /* AES key schedule to use */
       
  1637 +    union { double align; AES_KEY ks; } ks;    /* AES key schedule to use */
       
  1638      int key_set;                /* Set if key initialised */
       
  1639      int iv_set;                 /* Set if an iv is set */
       
  1640      GCM128_CONTEXT gcm;
       
  1641 @@ -83,7 +82,7 @@
       
  1642  } EVP_AES_GCM_CTX;
       
  1643 
       
  1644  typedef struct {
       
  1645 -    AES_KEY ks1, ks2;           /* AES key schedules to use */
       
  1646 +    union { double align; AES_KEY ks; } ks1, ks2;    /* AES key schedules to use */
       
  1647      XTS128_CONTEXT xts;
       
  1648      void (*stream) (const unsigned char *in,
       
  1649                      unsigned char *out, size_t length,
       
  1650 @@ -92,7 +91,7 @@
       
  1651  } EVP_AES_XTS_CTX;
       
  1652 
       
  1653  typedef struct {
       
  1654 -    AES_KEY ks;                 /* AES key schedule to use */
       
  1655 +    union { double align; AES_KEY ks; } ks;    /* AES key schedule to use */
       
  1656      int key_set;                /* Set if key initialised */
       
  1657      int iv_set;                 /* Set if an iv is set */
       
  1658      int tag_set;                /* Set if tag is valid */
       
  1659 @@ -155,7 +154,7 @@
       
  1660          defined(_M_AMD64)       || defined(_M_X64)      || \
       
  1661          defined(__INTEL__)                              )
       
  1662 
       
  1663 -extern unsigned int OPENSSL_ia32cap_P[2];
       
  1664 +extern unsigned int OPENSSL_ia32cap_P[];
       
  1665 
       
  1666  #   ifdef VPAES_ASM
       
  1667  #    define VPAES_CAPABLE   (OPENSSL_ia32cap_P[1]&(1<<(41-32)))
       
  1668 @@ -297,7 +296,7 @@
       
  1669      if (!iv && !key)
       
  1670          return 1;
       
  1671      if (key) {
       
  1672 -        aesni_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks);
       
  1673 +        aesni_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks);
       
  1674          CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, (block128_f) aesni_encrypt);
       
  1675          gctx->ctr = (ctr128_f) aesni_ctr32_encrypt_blocks;
       
  1676          /*
       
  1677 @@ -336,17 +335,17 @@
       
  1678      if (key) {
       
  1679          /* key_len is two AES keys */
       
  1680          if (enc) {
       
  1681 -            aesni_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1);
       
  1682 +            aesni_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
       
  1683              xctx->xts.block1 = (block128_f) aesni_encrypt;
       
  1684              xctx->stream = aesni_xts_encrypt;
       
  1685          } else {
       
  1686 -            aesni_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1);
       
  1687 +            aesni_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
       
  1688              xctx->xts.block1 = (block128_f) aesni_decrypt;
       
  1689              xctx->stream = aesni_xts_decrypt;
       
  1690          }
       
  1691 
       
  1692          aesni_set_encrypt_key(key + ctx->key_len / 2,
       
  1693 -                              ctx->key_len * 4, &xctx->ks2);
       
  1694 +                              ctx->key_len * 4, &xctx->ks2.ks);
       
  1695          xctx->xts.block2 = (block128_f) aesni_encrypt;
       
  1696 
       
  1697          xctx->xts.key1 = &xctx->ks1;
       
  1698 @@ -371,7 +370,7 @@
       
  1699      if (!iv && !key)
       
  1700          return 1;
       
  1701      if (key) {
       
  1702 -        aesni_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks);
       
  1703 +        aesni_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks);
       
  1704          CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L,
       
  1705                             &cctx->ks, (block128_f) aesni_encrypt);
       
  1706          cctx->str = enc ? (ccm128_f) aesni_ccm64_encrypt_blocks :
       
  1707 @@ -432,6 +431,364 @@
       
  1708  const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \
       
  1709  { return AESNI_CAPABLE?&aesni_##keylen##_##mode:&aes_##keylen##_##mode; }
       
  1710 
       
  1711 +#elif    defined(AES_ASM) && (defined(__sparc) || defined(__sparc__))
       
  1712 +
       
  1713 +#include "sparc_arch.h"
       
  1714 +
       
  1715 +extern unsigned int OPENSSL_sparcv9cap_P[];
       
  1716 +
       
  1717 +#define    SPARC_AES_CAPABLE    (OPENSSL_sparcv9cap_P[1] & CFR_AES)
       
  1718 +
       
  1719 +void    aes_t4_set_encrypt_key (const unsigned char *key, int bits,
       
  1720 +                AES_KEY *ks);
       
  1721 +void    aes_t4_set_decrypt_key (const unsigned char *key, int bits,
       
  1722 +                AES_KEY *ks);
       
  1723 +void    aes_t4_encrypt (const unsigned char *in, unsigned char *out,
       
  1724 +                const AES_KEY *key);
       
  1725 +void    aes_t4_decrypt (const unsigned char *in, unsigned char *out,
       
  1726 +                const AES_KEY *key);
       
  1727 +/*
       
  1728 + * Key-length specific subroutines were chosen for following reason.
       
  1729 + * Each SPARC T4 core can execute up to 8 threads which share core's
       
  1730 + * resources. Loading as much key material to registers allows to
       
  1731 + * minimize references to shared memory interface, as well as amount
       
  1732 + * of instructions in inner loops [much needed on T4]. But then having
       
  1733 + * non-key-length specific routines would require conditional branches
       
  1734 + * either in inner loops or on subroutines' entries. Former is hardly
       
  1735 + * acceptable, while latter means code size increase to size occupied
       
  1736 + * by multiple key-length specfic subroutines, so why fight?
       
  1737 + */
       
  1738 +void    aes128_t4_cbc_encrypt (const unsigned char *in, unsigned char *out,
       
  1739 +                size_t len, const AES_KEY *key,
       
  1740 +                unsigned char *ivec);
       
  1741 +void    aes128_t4_cbc_decrypt (const unsigned char *in, unsigned char *out,
       
  1742 +                size_t len, const AES_KEY *key,
       
  1743 +                unsigned char *ivec);
       
  1744 +void    aes192_t4_cbc_encrypt (const unsigned char *in, unsigned char *out,
       
  1745 +                size_t len, const AES_KEY *key,
       
  1746 +                unsigned char *ivec);
       
  1747 +void    aes192_t4_cbc_decrypt (const unsigned char *in, unsigned char *out,
       
  1748 +                size_t len, const AES_KEY *key,
       
  1749 +                unsigned char *ivec);
       
  1750 +void    aes256_t4_cbc_encrypt (const unsigned char *in, unsigned char *out,
       
  1751 +                size_t len, const AES_KEY *key,
       
  1752 +                unsigned char *ivec);
       
  1753 +void    aes256_t4_cbc_decrypt (const unsigned char *in, unsigned char *out,
       
  1754 +                size_t len, const AES_KEY *key,
       
  1755 +                unsigned char *ivec);
       
  1756 +void    aes128_t4_ctr32_encrypt (const unsigned char *in, unsigned char *out,
       
  1757 +                size_t blocks, const AES_KEY *key,
       
  1758 +                unsigned char *ivec);
       
  1759 +void    aes192_t4_ctr32_encrypt (const unsigned char *in, unsigned char *out,
       
  1760 +                size_t blocks, const AES_KEY *key,
       
  1761 +                unsigned char *ivec);
       
  1762 +void    aes256_t4_ctr32_encrypt (const unsigned char *in, unsigned char *out,
       
  1763 +                size_t blocks, const AES_KEY *key,
       
  1764 +                unsigned char *ivec);
       
  1765 +
       
  1766 +static int aes_t4_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
       
  1767 +           const unsigned char *iv, int enc)
       
  1768 +{
       
  1769 +    int ret, mode, bits;
       
  1770 +    EVP_AES_KEY *dat = (EVP_AES_KEY *)ctx->cipher_data;
       
  1771 +
       
  1772 +    mode = ctx->cipher->flags & EVP_CIPH_MODE;
       
  1773 +    bits = ctx->key_len*8;
       
  1774 +    if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE) && !enc) {
       
  1775 +            ret = 0;
       
  1776 +            aes_t4_set_decrypt_key(key, bits, ctx->cipher_data);
       
  1777 +            dat->block    = (block128_f)aes_t4_decrypt;
       
  1778 +            switch (bits) {
       
  1779 +            case 128:
       
  1780 +            dat->stream.cbc    = mode==EVP_CIPH_CBC_MODE ?
       
  1781 +                        (cbc128_f)aes128_t4_cbc_decrypt :
       
  1782 +                        NULL;
       
  1783 +            break;
       
  1784 +            case 192:
       
  1785 +            dat->stream.cbc    = mode==EVP_CIPH_CBC_MODE ?
       
  1786 +                        (cbc128_f)aes192_t4_cbc_decrypt :
       
  1787 +                        NULL;
       
  1788 +            break;
       
  1789 +            case 256:
       
  1790 +            dat->stream.cbc    = mode==EVP_CIPH_CBC_MODE ?
       
  1791 +                        (cbc128_f)aes256_t4_cbc_decrypt :
       
  1792 +                        NULL;
       
  1793 +            break;
       
  1794 +            default:
       
  1795 +            ret = -1;
       
  1796 +        }
       
  1797 +    } else    {
       
  1798 +            ret = 0;
       
  1799 +            aes_t4_set_encrypt_key(key, bits, ctx->cipher_data);
       
  1800 +            dat->block    = (block128_f)aes_t4_encrypt;
       
  1801 +            switch (bits) {
       
  1802 +            case 128:
       
  1803 +            if (mode==EVP_CIPH_CBC_MODE)
       
  1804 +                dat->stream.cbc    = (cbc128_f)aes128_t4_cbc_encrypt;
       
  1805 +            else if (mode==EVP_CIPH_CTR_MODE)
       
  1806 +                dat->stream.ctr = (ctr128_f)aes128_t4_ctr32_encrypt;
       
  1807 +            else
       
  1808 +                dat->stream.cbc = NULL;
       
  1809 +            break;
       
  1810 +            case 192:
       
  1811 +            if (mode==EVP_CIPH_CBC_MODE)
       
  1812 +                dat->stream.cbc    = (cbc128_f)aes192_t4_cbc_encrypt;
       
  1813 +            else if (mode==EVP_CIPH_CTR_MODE)
       
  1814 +                dat->stream.ctr = (ctr128_f)aes192_t4_ctr32_encrypt;
       
  1815 +            else
       
  1816 +                dat->stream.cbc = NULL;
       
  1817 +            break;
       
  1818 +            case 256:
       
  1819 +            if (mode==EVP_CIPH_CBC_MODE)
       
  1820 +                dat->stream.cbc    = (cbc128_f)aes256_t4_cbc_encrypt;
       
  1821 +            else if (mode==EVP_CIPH_CTR_MODE)
       
  1822 +                dat->stream.ctr = (ctr128_f)aes256_t4_ctr32_encrypt;
       
  1823 +            else
       
  1824 +                dat->stream.cbc = NULL;
       
  1825 +            break;
       
  1826 +            default:
       
  1827 +            ret = -1;
       
  1828 +        }
       
  1829 +    }
       
  1830 +
       
  1831 +    if (ret < 0) {
       
  1832 +        EVPerr(EVP_F_AES_T4_INIT_KEY,EVP_R_AES_KEY_SETUP_FAILED);
       
  1833 +        return 0;
       
  1834 +    }
       
  1835 +
       
  1836 +    return 1;
       
  1837 +}
       
  1838 +
       
  1839 +#define aes_t4_cbc_cipher aes_cbc_cipher
       
  1840 +static int aes_t4_cbc_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out,
       
  1841 +    const unsigned char *in, size_t len);
       
  1842 +
       
  1843 +#define aes_t4_ecb_cipher aes_ecb_cipher
       
  1844 +static int aes_t4_ecb_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out,
       
  1845 +    const unsigned char *in, size_t len);
       
  1846 +
       
  1847 +#define aes_t4_ofb_cipher aes_ofb_cipher
       
  1848 +static int aes_t4_ofb_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out,
       
  1849 +    const unsigned char *in,size_t len);
       
  1850 +
       
  1851 +#define aes_t4_cfb_cipher aes_cfb_cipher
       
  1852 +static int aes_t4_cfb_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out,
       
  1853 +    const unsigned char *in,size_t len);
       
  1854 +
       
  1855 +#define aes_t4_cfb8_cipher aes_cfb8_cipher
       
  1856 +static int aes_t4_cfb8_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out,
       
  1857 +    const unsigned char *in,size_t len);
       
  1858 +
       
  1859 +#define aes_t4_cfb1_cipher aes_cfb1_cipher
       
  1860 +static int aes_t4_cfb1_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out,
       
  1861 +    const unsigned char *in,size_t len);
       
  1862 +
       
  1863 +#define aes_t4_ctr_cipher aes_ctr_cipher
       
  1864 +static int aes_t4_ctr_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
       
  1865 +        const unsigned char *in, size_t len);
       
  1866 +
       
  1867 +static int aes_t4_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
       
  1868 +                        const unsigned char *iv, int enc)
       
  1869 +{
       
  1870 +    EVP_AES_GCM_CTX *gctx = ctx->cipher_data;
       
  1871 +    if (!iv && !key)
       
  1872 +        return 1;
       
  1873 +    if (key) {
       
  1874 +        int bits = ctx->key_len * 8;
       
  1875 +        aes_t4_set_encrypt_key(key, bits, &gctx->ks.ks);
       
  1876 +        CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks,
       
  1877 +                (block128_f)aes_t4_encrypt);
       
  1878 +        switch (bits) {
       
  1879 +        case 128:
       
  1880 +            gctx->ctr = (ctr128_f)aes128_t4_ctr32_encrypt;
       
  1881 +            break;
       
  1882 +         case 192:
       
  1883 +            gctx->ctr = (ctr128_f)aes192_t4_ctr32_encrypt;
       
  1884 +            break;
       
  1885 +         case 256:
       
  1886 +            gctx->ctr = (ctr128_f)aes256_t4_ctr32_encrypt;
       
  1887 +            break;
       
  1888 +         default:
       
  1889 +            return 0;
       
  1890 +     }
       
  1891 +        /* If we have an iv can set it directly, otherwise use
       
  1892 +         * saved IV.
       
  1893 +         */
       
  1894 +        if (iv == NULL && gctx->iv_set)
       
  1895 +            iv = gctx->iv;
       
  1896 +        if (iv) {
       
  1897 +            CRYPTO_gcm128_setiv(&gctx->gcm, iv, gctx->ivlen);
       
  1898 +            gctx->iv_set = 1;
       
  1899 +        }
       
  1900 +        gctx->key_set = 1;
       
  1901 +    } else {
       
  1902 +        /* If key set use IV, otherwise copy */
       
  1903 +        if (gctx->key_set)
       
  1904 +            CRYPTO_gcm128_setiv(&gctx->gcm, iv, gctx->ivlen);
       
  1905 +        else
       
  1906 +            memcpy(gctx->iv, iv, gctx->ivlen);
       
  1907 +        gctx->iv_set = 1;
       
  1908 +        gctx->iv_gen = 0;
       
  1909 +    }
       
  1910 +    return 1;
       
  1911 +}
       
  1912 +
       
  1913 +#define aes_t4_gcm_cipher aes_gcm_cipher
       
  1914 +static int aes_t4_gcm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
       
  1915 +        const unsigned char *in, size_t len);
       
  1916 +
       
  1917 +static int aes_t4_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
       
  1918 +                        const unsigned char *iv, int enc)
       
  1919 +{
       
  1920 +    EVP_AES_XTS_CTX *xctx = ctx->cipher_data;
       
  1921 +    if (!iv && !key)
       
  1922 +        return 1;
       
  1923 +
       
  1924 +    if (key) {
       
  1925 +        int bits = ctx->key_len * 4;
       
  1926 +        /* key_len is two AES keys */
       
  1927 +        if (enc) {
       
  1928 +            aes_t4_set_encrypt_key(key, bits, &xctx->ks1.ks);
       
  1929 +            xctx->xts.block1 = (block128_f)aes_t4_encrypt;
       
  1930 +#if 0 /* not yet */
       
  1931 +            switch (bits) {
       
  1932 +            case 128:
       
  1933 +                xctx->stream = aes128_t4_xts_encrypt;
       
  1934 +                break;
       
  1935 +            case 192:
       
  1936 +                xctx->stream = aes192_t4_xts_encrypt;
       
  1937 +                break;
       
  1938 +            case 256:
       
  1939 +                xctx->stream = aes256_t4_xts_encrypt;
       
  1940 +                break;
       
  1941 +            default:
       
  1942 +                return 0;
       
  1943 +            }
       
  1944 +#endif
       
  1945 +        } else {
       
  1946 +            aes_t4_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
       
  1947 +            xctx->xts.block1 = (block128_f)aes_t4_decrypt;
       
  1948 +#if 0 /* not yet */
       
  1949 +            switch (bits) {
       
  1950 +            case 128:
       
  1951 +                xctx->stream = aes128_t4_xts_decrypt;
       
  1952 +                break;
       
  1953 +            case 192:
       
  1954 +                xctx->stream = aes192_t4_xts_decrypt;
       
  1955 +                break;
       
  1956 +            case 256:
       
  1957 +                xctx->stream = aes256_t4_xts_decrypt;
       
  1958 +                break;
       
  1959 +            default:
       
  1960 +                return 0;
       
  1961 +                }
       
  1962 +#endif
       
  1963 +            }
       
  1964 +
       
  1965 +        aes_t4_set_encrypt_key(key + ctx->key_len/2,
       
  1966 +                        ctx->key_len * 4, &xctx->ks2.ks);
       
  1967 +        xctx->xts.block2 = (block128_f)aes_t4_encrypt;
       
  1968 +
       
  1969 +        xctx->xts.key1 = &xctx->ks1;
       
  1970 +    }
       
  1971 +
       
  1972 +    if (iv) {
       
  1973 +        xctx->xts.key2 = &xctx->ks2;
       
  1974 +        memcpy(ctx->iv, iv, 16);
       
  1975 +    }
       
  1976 +
       
  1977 +    return 1;
       
  1978 +}
       
  1979 +
       
  1980 +#define aes_t4_xts_cipher aes_xts_cipher
       
  1981 +static int aes_t4_xts_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
       
  1982 +        const unsigned char *in, size_t len);
       
  1983 +
       
  1984 +static int aes_t4_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
       
  1985 +                        const unsigned char *iv, int enc)
       
  1986 +{
       
  1987 +    EVP_AES_CCM_CTX *cctx = ctx->cipher_data;
       
  1988 +    if (!iv && !key)
       
  1989 +        return 1;
       
  1990 +    if (key) {
       
  1991 +        int bits = ctx->key_len * 8;
       
  1992 +        aes_t4_set_encrypt_key(key, bits, &cctx->ks.ks);
       
  1993 +        CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L,
       
  1994 +                    &cctx->ks, (block128_f)aes_t4_encrypt);
       
  1995 +#if 0 /* not yet */
       
  1996 +       switch (bits) {
       
  1997 +        case 128:
       
  1998 +            cctx->str = enc?(ccm128_f)aes128_t4_ccm64_encrypt :
       
  1999 +                (ccm128_f)ae128_t4_ccm64_decrypt;
       
  2000 +            break;
       
  2001 +        case 192:
       
  2002 +            cctx->str = enc?(ccm128_f)aes192_t4_ccm64_encrypt :
       
  2003 +                (ccm128_f)ae192_t4_ccm64_decrypt;
       
  2004 +            break;
       
  2005 +        case 256:
       
  2006 +            cctx->str = enc?(ccm128_f)aes256_t4_ccm64_encrypt :
       
  2007 +                (ccm128_f)ae256_t4_ccm64_decrypt;
       
  2008 +            break;
       
  2009 +        default:
       
  2010 +            return 0;
       
  2011 +        }
       
  2012 +#endif
       
  2013 +        cctx->key_set = 1;
       
  2014 +    }
       
  2015 +    if (iv) {
       
  2016 +        memcpy(ctx->iv, iv, 15 - cctx->L);
       
  2017 +        cctx->iv_set = 1;
       
  2018 +    }
       
  2019 +    return 1;
       
  2020 +}
       
  2021 +
       
  2022 +#define aes_t4_ccm_cipher aes_ccm_cipher
       
  2023 +static int aes_t4_ccm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
       
  2024 +        const unsigned char *in, size_t len);
       
  2025 +
       
  2026 +#define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \
       
  2027 +static const EVP_CIPHER aes_t4_##keylen##_##mode = { \
       
  2028 +    nid##_##keylen##_##nmode,blocksize,keylen/8,ivlen, \
       
  2029 +    flags|EVP_CIPH_##MODE##_MODE,    \
       
  2030 +    aes_t4_init_key,        \
       
  2031 +    aes_t4_##mode##_cipher,        \
       
  2032 +    NULL,                \
       
  2033 +    sizeof(EVP_AES_KEY),        \
       
  2034 +    NULL,NULL,NULL,NULL }; \
       
  2035 +static const EVP_CIPHER aes_##keylen##_##mode = { \
       
  2036 +    nid##_##keylen##_##nmode,blocksize,    \
       
  2037 +    keylen/8,ivlen, \
       
  2038 +    flags|EVP_CIPH_##MODE##_MODE,    \
       
  2039 +    aes_init_key,            \
       
  2040 +    aes_##mode##_cipher,        \
       
  2041 +    NULL,                \
       
  2042 +    sizeof(EVP_AES_KEY),        \
       
  2043 +    NULL,NULL,NULL,NULL }; \
       
  2044 +const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \
       
  2045 +{ return SPARC_AES_CAPABLE?&aes_t4_##keylen##_##mode:&aes_##keylen##_##mode; }
       
  2046 +
       
  2047 +#define BLOCK_CIPHER_custom(nid,keylen,blocksize,ivlen,mode,MODE,flags) \
       
  2048 +static const EVP_CIPHER aes_t4_##keylen##_##mode = { \
       
  2049 +    nid##_##keylen##_##mode,blocksize, \
       
  2050 +    (EVP_CIPH_##MODE##_MODE==EVP_CIPH_XTS_MODE?2:1)*keylen/8, ivlen, \
       
  2051 +    flags|EVP_CIPH_##MODE##_MODE,    \
       
  2052 +    aes_t4_##mode##_init_key,    \
       
  2053 +    aes_t4_##mode##_cipher,        \
       
  2054 +    aes_##mode##_cleanup,        \
       
  2055 +    sizeof(EVP_AES_##MODE##_CTX),    \
       
  2056 +    NULL,NULL,aes_##mode##_ctrl,NULL }; \
       
  2057 +static const EVP_CIPHER aes_##keylen##_##mode = { \
       
  2058 +    nid##_##keylen##_##mode,blocksize, \
       
  2059 +    (EVP_CIPH_##MODE##_MODE==EVP_CIPH_XTS_MODE?2:1)*keylen/8, ivlen, \
       
  2060 +    flags|EVP_CIPH_##MODE##_MODE,    \
       
  2061 +    aes_##mode##_init_key,        \
       
  2062 +    aes_##mode##_cipher,        \
       
  2063 +    aes_##mode##_cleanup,        \
       
  2064 +    sizeof(EVP_AES_##MODE##_CTX),    \
       
  2065 +    NULL,NULL,aes_##mode##_ctrl,NULL }; \
       
  2066 +const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \
       
  2067 +{ return SPARC_AES_CAPABLE?&aes_t4_##keylen##_##mode:&aes_##keylen##_##mode; }
       
  2068 +
       
  2069  #  else
       
  2070 
       
  2071  #   define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \
       
  2072 @@ -480,7 +837,7 @@
       
  2073          && !enc)
       
  2074  #  ifdef BSAES_CAPABLE
       
  2075          if (BSAES_CAPABLE && mode == EVP_CIPH_CBC_MODE) {
       
  2076 -            ret = AES_set_decrypt_key(key, ctx->key_len * 8, &dat->ks);
       
  2077 +            ret = AES_set_decrypt_key(key, ctx->key_len * 8, &dat->ks.ks);
       
  2078              dat->block = (block128_f) AES_decrypt;
       
  2079              dat->stream.cbc = (cbc128_f) bsaes_cbc_encrypt;
       
  2080          } else
       
  2081 @@ -487,7 +844,7 @@
       
  2082  #  endif
       
  2083  #  ifdef VPAES_CAPABLE
       
  2084          if (VPAES_CAPABLE) {
       
  2085 -            ret = vpaes_set_decrypt_key(key, ctx->key_len * 8, &dat->ks);
       
  2086 +            ret = vpaes_set_decrypt_key(key, ctx->key_len * 8, &dat->ks.ks);
       
  2087              dat->block = (block128_f) vpaes_decrypt;
       
  2088              dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ?
       
  2089                  (cbc128_f) vpaes_cbc_encrypt : NULL;
       
  2090 @@ -494,7 +851,7 @@
       
  2091          } else
       
  2092  #  endif
       
  2093          {
       
  2094 -            ret = AES_set_decrypt_key(key, ctx->key_len * 8, &dat->ks);
       
  2095 +            ret = AES_set_decrypt_key(key, ctx->key_len * 8, &dat->ks.ks);
       
  2096              dat->block = (block128_f) AES_decrypt;
       
  2097              dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ?
       
  2098                  (cbc128_f) AES_cbc_encrypt : NULL;
       
  2099 @@ -501,7 +858,7 @@
       
  2100      } else
       
  2101  #  ifdef BSAES_CAPABLE
       
  2102      if (BSAES_CAPABLE && mode == EVP_CIPH_CTR_MODE) {
       
  2103 -        ret = AES_set_encrypt_key(key, ctx->key_len * 8, &dat->ks);
       
  2104 +        ret = AES_set_encrypt_key(key, ctx->key_len * 8, &dat->ks.ks);
       
  2105          dat->block = (block128_f) AES_encrypt;
       
  2106          dat->stream.ctr = (ctr128_f) bsaes_ctr32_encrypt_blocks;
       
  2107      } else
       
  2108 @@ -508,7 +865,7 @@
       
  2109  #  endif
       
  2110  #  ifdef VPAES_CAPABLE
       
  2111      if (VPAES_CAPABLE) {
       
  2112 -        ret = vpaes_set_encrypt_key(key, ctx->key_len * 8, &dat->ks);
       
  2113 +        ret = vpaes_set_encrypt_key(key, ctx->key_len * 8, &dat->ks.ks);
       
  2114          dat->block = (block128_f) vpaes_encrypt;
       
  2115          dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ?
       
  2116              (cbc128_f) vpaes_cbc_encrypt : NULL;
       
  2117 @@ -515,7 +872,7 @@
       
  2118      } else
       
  2119  #  endif
       
  2120      {
       
  2121 -        ret = AES_set_encrypt_key(key, ctx->key_len * 8, &dat->ks);
       
  2122 +        ret = AES_set_encrypt_key(key, ctx->key_len*8, &dat->ks.ks);
       
  2123          dat->block = (block128_f) AES_encrypt;
       
  2124          dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ?
       
  2125              (cbc128_f) AES_cbc_encrypt : NULL;
       
  2126 @@ -810,7 +1167,7 @@
       
  2127          do {
       
  2128  #  ifdef BSAES_CAPABLE
       
  2129              if (BSAES_CAPABLE) {
       
  2130 -                AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks);
       
  2131 +                AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks);
       
  2132                  CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks,
       
  2133                                     (block128_f) AES_encrypt);
       
  2134                  gctx->ctr = (ctr128_f) bsaes_ctr32_encrypt_blocks;
       
  2135 @@ -819,7 +1176,7 @@
       
  2136  #  endif
       
  2137  #  ifdef VPAES_CAPABLE
       
  2138              if (VPAES_CAPABLE) {
       
  2139 -                vpaes_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks);
       
  2140 +                vpaes_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks);
       
  2141                  CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks,
       
  2142                                     (block128_f) vpaes_encrypt);
       
  2143                  gctx->ctr = NULL;
       
  2144 @@ -828,7 +1185,7 @@
       
  2145  #  endif
       
  2146                  (void)0;        /* terminate potentially open 'else' */
       
  2147 
       
  2148 -            AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks);
       
  2149 +            AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks);
       
  2150              CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks,
       
  2151                                 (block128_f) AES_encrypt);
       
  2152  #  ifdef AES_CTR_ASM
       
  2153 @@ -1049,15 +1406,15 @@
       
  2154  #  ifdef VPAES_CAPABLE
       
  2155              if (VPAES_CAPABLE) {
       
  2156                  if (enc) {
       
  2157 -                    vpaes_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1);
       
  2158 +                    vpaes_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
       
  2159                      xctx->xts.block1 = (block128_f) vpaes_encrypt;
       
  2160                  } else {
       
  2161 -                    vpaes_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1);
       
  2162 +                    vpaes_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
       
  2163                      xctx->xts.block1 = (block128_f) vpaes_decrypt;
       
  2164                  }
       
  2165 
       
  2166                  vpaes_set_encrypt_key(key + ctx->key_len / 2,
       
  2167 -                                      ctx->key_len * 4, &xctx->ks2);
       
  2168 +                                      ctx->key_len * 4, &xctx->ks2.ks);
       
  2169                  xctx->xts.block2 = (block128_f) vpaes_encrypt;
       
  2170 
       
  2171                  xctx->xts.key1 = &xctx->ks1;
       
  2172 @@ -1067,15 +1424,15 @@
       
  2173                  (void)0;        /* terminate potentially open 'else' */
       
  2174 
       
  2175              if (enc) {
       
  2176 -                AES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1);
       
  2177 +                AES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
       
  2178                  xctx->xts.block1 = (block128_f) AES_encrypt;
       
  2179              } else {
       
  2180 -                AES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1);
       
  2181 +                AES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks);
       
  2182                  xctx->xts.block1 = (block128_f) AES_decrypt;
       
  2183              }
       
  2184 
       
  2185              AES_set_encrypt_key(key + ctx->key_len / 2,
       
  2186 -                                ctx->key_len * 4, &xctx->ks2);
       
  2187 +                                ctx->key_len * 4, &xctx->ks2.ks);
       
  2188              xctx->xts.block2 = (block128_f) AES_encrypt;
       
  2189 
       
  2190              xctx->xts.key1 = &xctx->ks1;
       
  2191 @@ -1196,7 +1553,7 @@
       
  2192          do {
       
  2193  #  ifdef VPAES_CAPABLE
       
  2194              if (VPAES_CAPABLE) {
       
  2195 -                vpaes_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks);
       
  2196 +                vpaes_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks);
       
  2197                  CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L,
       
  2198                                     &cctx->ks, (block128_f) vpaes_encrypt);
       
  2199                  cctx->str = NULL;
       
  2200 @@ -1204,7 +1561,7 @@
       
  2201                  break;
       
  2202              }
       
  2203  #  endif
       
  2204 -            AES_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks);
       
  2205 +            AES_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks);
       
  2206              CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L,
       
  2207                                 &cctx->ks, (block128_f) AES_encrypt);
       
  2208              cctx->str = NULL;
       
  2209 @@ -1285,5 +1642,4 @@
       
  2210                      EVP_CIPH_FLAG_FIPS | CUSTOM_FLAGS)
       
  2211      BLOCK_CIPHER_custom(NID_aes, 256, 1, 12, ccm, CCM,
       
  2212                      EVP_CIPH_FLAG_FIPS | CUSTOM_FLAGS)
       
  2213 -# endif
       
  2214  #endif
       
  2215 Index: openssl/crypto/evp/evp.h
       
  2216 ===================================================================
       
  2217 --- evp.h    Mon Feb 11 07:26:04 2013
       
  2218 +++ evp.h.new    Thu May  2 14:31:55 2013
       
  2219 @@ -1325,6 +1325,7 @@
       
  2220  # define EVP_F_AESNI_INIT_KEY                             165
       
  2221  # define EVP_F_AESNI_XTS_CIPHER                           176
       
  2222  # define EVP_F_AES_INIT_KEY                               133
       
  2223 +# define EVP_F_AES_T4_INIT_KEY                            178
       
  2224  # define EVP_F_AES_XTS                                    172
       
  2225  # define EVP_F_AES_XTS_CIPHER                             175
       
  2226  # define EVP_F_ALG_MODULE_INIT                            177
       
  2227 Index: openssl/crypto/evp/evp_err.c
       
  2228 ===================================================================
       
  2229 --- evp_err.c	Mon Feb 11 07:26:04 2013
       
  2230 +++ evp_err.c.new	Thu May  2 14:33:24 2013
       
  2231 @@ -73,6 +73,7 @@
       
  2232      {ERR_FUNC(EVP_F_AESNI_INIT_KEY), "AESNI_INIT_KEY"},
       
  2233      {ERR_FUNC(EVP_F_AESNI_XTS_CIPHER), "AESNI_XTS_CIPHER"},
       
  2234      {ERR_FUNC(EVP_F_AES_INIT_KEY), "AES_INIT_KEY"},
       
  2235 +    {ERR_FUNC(EVP_F_AES_T4_INIT_KEY), "AES_T4_INIT_KEY"},
       
  2236      {ERR_FUNC(EVP_F_AES_XTS), "AES_XTS"},
       
  2237      {ERR_FUNC(EVP_F_AES_XTS_CIPHER), "AES_XTS_CIPHER"},
       
  2238      {ERR_FUNC(EVP_F_ALG_MODULE_INIT), "ALG_MODULE_INIT"},