1 # |
|
2 # This file adds inline T4 instruction support to OpenSSL upstream code. |
|
3 # The change was brought in from OpenSSL 1.0.2. |
|
4 # |
|
5 Index: Configure |
|
6 =================================================================== |
|
7 diff -ru openssl-1.0.1e/Configure openssl-1.0.1e/Configure |
|
8 --- openssl-1.0.1e/Configure 2011-05-24 17:02:24.000000000 -0700 |
|
9 +++ openssl-1.0.1e/Configure 2011-07-27 10:48:17.817470000 -0700 |
|
10 @@ -135,7 +135,7 @@ |
|
11 |
|
12 my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghash-x86_64.o:"; |
|
13 my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::ghash-ia64.o::void"; |
|
14 -my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void"; |
|
15 +my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o:des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o:aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void"; |
|
16 my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::void"; |
|
17 my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-alpha.o::void"; |
|
18 my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o::::::::"; |
|
19 Index: crypto/sparccpuid.S |
|
20 =================================================================== |
|
21 diff -ru openssl-1.0.1e/crypto/sparccpuid.S openssl-1.0.1e/crypto/sparccpuid.S |
|
22 --- openssl-1.0.1e/crypto/sparccpuid.S 2011-05-24 17:02:24.000000000 -0700 |
|
23 +++ openssl-1.0.1e/crypto/sparccpuid.S 2011-07-27 10:48:17.817470000 -0700 |
|
24 @@ -1,3 +1,7 @@ |
|
25 +#ifdef OPENSSL_FIPSCANISTER |
|
26 +#include <openssl/fipssyms.h> |
|
27 +#endif |
|
28 + |
|
29 #if defined(__SUNPRO_C) && defined(__sparcv9) |
|
30 # define ABI64 /* They've said -xarch=v9 at command line */ |
|
31 #elif defined(__GNUC__) && defined(__arch64__) |
|
32 @@ -123,7 +127,7 @@ |
|
33 fmovs %f1,%f3 |
|
34 fmovs %f0,%f2 |
|
35 |
|
36 - add %fp,BIAS,%i0 ! return pointer to caller�s top of stack |
|
37 + add %fp,BIAS,%i0 ! return pointer to caller?s top of stack |
|
38 |
|
39 ret |
|
40 restore |
|
41 @@ -235,10 +239,10 @@ |
|
42 .global _sparcv9_vis1_probe |
|
43 .align 8 |
|
44 _sparcv9_vis1_probe: |
|
45 + .word 0x81b00d80 !fxor %f0,%f0,%f0 |
|
46 add %sp,BIAS+2,%o1 |
|
47 - .word 0xc19a5a40 !ldda [%o1]ASI_FP16_P,%f0 |
|
48 retl |
|
49 - .word 0x81b00d80 !fxor %f0,%f0,%f0 |
|
50 + .word 0xc19a5a40 !ldda [%o1]ASI_FP16_P,%f0 |
|
51 .type _sparcv9_vis1_probe,#function |
|
52 .size _sparcv9_vis1_probe,.-_sparcv9_vis1_probe |
|
53 |
|
54 @@ -251,7 +255,12 @@ |
|
55 ! UltraSPARC IIe 7 |
|
56 ! UltraSPARC III 7 |
|
57 ! UltraSPARC T1 24 |
|
58 +! SPARC T4 65(*) |
|
59 ! |
|
60 +! (*) result has lesser to do with VIS instruction latencies, rdtick |
|
61 +! appears that slow, but it does the trick in sense that FP and |
|
62 +! VIS code paths are still slower than integer-only ones. |
|
63 +! |
|
64 ! Numbers for T2 and SPARC64 V-VII are more than welcomed. |
|
65 ! |
|
66 ! It would be possible to detect specifically US-T1 by instrumenting |
|
67 @@ -260,6 +269,8 @@ |
|
68 .global _sparcv9_vis1_instrument |
|
69 .align 8 |
|
70 _sparcv9_vis1_instrument: |
|
71 + .word 0x81b00d80 !fxor %f0,%f0,%f0 |
|
72 + .word 0x85b08d82 !fxor %f2,%f2,%f2 |
|
73 .word 0x91410000 !rd %tick,%o0 |
|
74 .word 0x81b00d80 !fxor %f0,%f0,%f0 |
|
75 .word 0x85b08d82 !fxor %f2,%f2,%f2 |
|
76 @@ -314,6 +325,30 @@ |
|
77 .type _sparcv9_fmadd_probe,#function |
|
78 .size _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe |
|
79 |
|
80 +.global _sparcv9_rdcfr |
|
81 +.align 8 |
|
82 +_sparcv9_rdcfr: |
|
83 + retl |
|
84 + .word 0x91468000 !rd %asr26,%o0 |
|
85 +.type _sparcv9_rdcfr,#function |
|
86 +.size _sparcv9_rdcfr,.-_sparcv9_rdcfr |
|
87 + |
|
88 +.global _sparcv9_vis3_probe |
|
89 +.align 8 |
|
90 +_sparcv9_vis3_probe: |
|
91 + retl |
|
92 + .word 0x81b022a0 !xmulx %g0,%g0,%g0 |
|
93 +.type _sparcv9_vis3_probe,#function |
|
94 +.size _sparcv9_vis3_probe,.-_sparcv9_vis3_probe |
|
95 + |
|
96 +.global _sparcv9_random |
|
97 +.align 8 |
|
98 +_sparcv9_random: |
|
99 + retl |
|
100 + .word 0x91b002a0 !random %o0 |
|
101 +.type _sparcv9_random,#function |
|
102 +.size _sparcv9_random,.-_sparcv9_vis3_probe |
|
103 + |
|
104 .global OPENSSL_cleanse |
|
105 .align 32 |
|
106 OPENSSL_cleanse: |
|
107 @@ -398,6 +433,102 @@ |
|
108 .size OPENSSL_cleanse,.-OPENSSL_cleanse |
|
109 |
|
110 #ifndef _BOOT |
|
111 +.global _sparcv9_vis1_instrument_bus |
|
112 +.align 8 |
|
113 +_sparcv9_vis1_instrument_bus: |
|
114 + mov %o1,%o3 ! save cnt |
|
115 + .word 0x99410000 !rd %tick,%o4 ! tick |
|
116 + mov %o4,%o5 ! lasttick = tick |
|
117 + set 0,%g4 ! diff |
|
118 + |
|
119 + andn %o0,63,%g1 |
|
120 + .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load |
|
121 + .word 0x8143e040 !membar #Sync |
|
122 + .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit |
|
123 + .word 0x8143e040 !membar #Sync |
|
124 + ld [%o0],%o4 |
|
125 + add %o4,%g4,%g4 |
|
126 + .word 0xc9e2100c !cas [%o0],%o4,%g4 |
|
127 + |
|
128 +.Loop: .word 0x99410000 !rd %tick,%o4 |
|
129 + sub %o4,%o5,%g4 ! diff=tick-lasttick |
|
130 + mov %o4,%o5 ! lasttick=tick |
|
131 + |
|
132 + andn %o0,63,%g1 |
|
133 + .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load |
|
134 + .word 0x8143e040 !membar #Sync |
|
135 + .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit |
|
136 + .word 0x8143e040 !membar #Sync |
|
137 + ld [%o0],%o4 |
|
138 + add %o4,%g4,%g4 |
|
139 + .word 0xc9e2100c !cas [%o0],%o4,%g4 |
|
140 + subcc %o1,1,%o1 ! --$cnt |
|
141 + bnz .Loop |
|
142 + add %o0,4,%o0 ! ++$out |
|
143 + |
|
144 + retl |
|
145 + mov %o3,%o0 |
|
146 +.type _sparcv9_vis1_instrument_bus,#function |
|
147 +.size _sparcv9_vis1_instrument_bus,.-_sparcv9_vis1_instrument_bus |
|
148 + |
|
149 +.global _sparcv9_vis1_instrument_bus2 |
|
150 +.align 8 |
|
151 +_sparcv9_vis1_instrument_bus2: |
|
152 + mov %o1,%o3 ! save cnt |
|
153 + sll %o1,2,%o1 ! cnt*=4 |
|
154 + |
|
155 + .word 0x99410000 !rd %tick,%o4 ! tick |
|
156 + mov %o4,%o5 ! lasttick = tick |
|
157 + set 0,%g4 ! diff |
|
158 + |
|
159 + andn %o0,63,%g1 |
|
160 + .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load |
|
161 + .word 0x8143e040 !membar #Sync |
|
162 + .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit |
|
163 + .word 0x8143e040 !membar #Sync |
|
164 + ld [%o0],%o4 |
|
165 + add %o4,%g4,%g4 |
|
166 + .word 0xc9e2100c !cas [%o0],%o4,%g4 |
|
167 + |
|
168 + .word 0x99410000 !rd %tick,%o4 ! tick |
|
169 + sub %o4,%o5,%g4 ! diff=tick-lasttick |
|
170 + mov %o4,%o5 ! lasttick=tick |
|
171 + mov %g4,%g5 ! lastdiff=diff |
|
172 +.Loop2: |
|
173 + andn %o0,63,%g1 |
|
174 + .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load |
|
175 + .word 0x8143e040 !membar #Sync |
|
176 + .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit |
|
177 + .word 0x8143e040 !membar #Sync |
|
178 + ld [%o0],%o4 |
|
179 + add %o4,%g4,%g4 |
|
180 + .word 0xc9e2100c !cas [%o0],%o4,%g4 |
|
181 + |
|
182 + subcc %o2,1,%o2 ! --max |
|
183 + bz .Ldone2 |
|
184 + nop |
|
185 + |
|
186 + .word 0x99410000 !rd %tick,%o4 ! tick |
|
187 + sub %o4,%o5,%g4 ! diff=tick-lasttick |
|
188 + mov %o4,%o5 ! lasttick=tick |
|
189 + cmp %g4,%g5 |
|
190 + mov %g4,%g5 ! lastdiff=diff |
|
191 + |
|
192 + .word 0x83408000 !rd %ccr,%g1 |
|
193 + and %g1,4,%g1 ! isolate zero flag |
|
194 + xor %g1,4,%g1 ! flip zero flag |
|
195 + |
|
196 + subcc %o1,%g1,%o1 ! conditional --$cnt |
|
197 + bnz .Loop2 |
|
198 + add %o0,%g1,%o0 ! conditional ++$out |
|
199 + |
|
200 +.Ldone2: |
|
201 + srl %o1,2,%o1 |
|
202 + retl |
|
203 + sub %o3,%o1,%o0 |
|
204 +.type _sparcv9_vis1_instrument_bus2,#function |
|
205 +.size _sparcv9_vis1_instrument_bus2,.-_sparcv9_vis1_instrument_bus2 |
|
206 + |
|
207 .section ".init",#alloc,#execinstr |
|
208 call solaris_locking_setup |
|
209 nop |
|
210 Index: crypto/sparcv9cap.c |
|
211 =================================================================== |
|
212 diff -ru openssl-1.0.1e/crypto/sparcv9cap.c openssl-1.0.1e/crypto/sparcv9cap.c |
|
213 --- openssl-1.0.1e/crypto/sparcv9cap.c 2011-05-24 17:02:24.000000000 -0700 |
|
214 +++ openssl-1.0.1e/crypto/sparcv9cap.c 2011-07-27 10:48:17.817470000 -0700 |
|
215 @@ -4,31 +4,55 @@ |
|
216 #include <setjmp.h> |
|
217 #include <signal.h> |
|
218 #include <sys/time.h> |
|
219 +#include <unistd.h> |
|
220 #include <openssl/bn.h> |
|
221 |
|
222 -#define SPARCV9_TICK_PRIVILEGED (1<<0) |
|
223 -#define SPARCV9_PREFER_FPU (1<<1) |
|
224 -#define SPARCV9_VIS1 (1<<2) |
|
225 -#define SPARCV9_VIS2 (1<<3) /* reserved */ |
|
226 -#define SPARCV9_FMADD (1<<4) /* reserved for SPARC64 V */ |
|
227 +#include "sparc_arch.h" |
|
228 |
|
229 +#if defined(__GNUC__) && defined(__linux) |
|
230 +__attribute__((visibility("hidden"))) |
|
231 +#endif |
|
232 #ifndef _BOOT |
|
233 -static int OPENSSL_sparcv9cap_P=SPARCV9_TICK_PRIVILEGED; |
|
234 +unsigned int OPENSSL_sparcv9cap_P[2]={SPARCV9_TICK_PRIVILEGED,0}; |
|
235 #else |
|
236 -static int OPENSSL_sparcv9cap_P = SPARCV9_VIS1; |
|
237 +unsigned int OPENSSL_sparcv9cap_P[2]={SPARCV9_VIS1,0}; |
|
238 #endif |
|
239 |
|
240 int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num) |
|
241 { |
|
242 + int bn_mul_mont_vis3(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num); |
|
243 int bn_mul_mont_fpu(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num); |
|
244 int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num); |
|
245 |
|
246 - if (num>=8 && !(num&1) && |
|
247 - (OPENSSL_sparcv9cap_P&(SPARCV9_PREFER_FPU|SPARCV9_VIS1)) == |
|
248 - (SPARCV9_PREFER_FPU|SPARCV9_VIS1)) |
|
249 - return bn_mul_mont_fpu(rp,ap,bp,np,n0,num); |
|
250 - else |
|
251 - return bn_mul_mont_int(rp,ap,bp,np,n0,num); |
|
252 + if (!(num&1) && num>=6) |
|
253 + { |
|
254 + if ((num&15)==0 && num<=64 && |
|
255 + (OPENSSL_sparcv9cap_P[1]&(CFR_MONTMUL|CFR_MONTSQR))== |
|
256 + (CFR_MONTMUL|CFR_MONTSQR)) |
|
257 + { |
|
258 + typedef int (*bn_mul_mont_f)(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0); |
|
259 + int bn_mul_mont_t4_8(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0); |
|
260 + int bn_mul_mont_t4_16(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0); |
|
261 + int bn_mul_mont_t4_24(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0); |
|
262 + int bn_mul_mont_t4_32(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0); |
|
263 + static const bn_mul_mont_f funcs[4] = { |
|
264 + bn_mul_mont_t4_8, bn_mul_mont_t4_16, |
|
265 + bn_mul_mont_t4_24, bn_mul_mont_t4_32 }; |
|
266 + bn_mul_mont_f worker = funcs[num/16-1]; |
|
267 + |
|
268 + if ((*worker)(rp,ap,bp,np,n0)) return 1; |
|
269 + /* retry once and fall back */ |
|
270 + if ((*worker)(rp,ap,bp,np,n0)) return 1; |
|
271 + return bn_mul_mont_vis3(rp,ap,bp,np,n0,num); |
|
272 + } |
|
273 + if ((OPENSSL_sparcv9cap_P[0]&SPARCV9_VIS3)) |
|
274 + return bn_mul_mont_vis3(rp,ap,bp,np,n0,num); |
|
275 + else if (num>=8 && |
|
276 + (OPENSSL_sparcv9cap_P[0]&(SPARCV9_PREFER_FPU|SPARCV9_VIS1)) == |
|
277 + (SPARCV9_PREFER_FPU|SPARCV9_VIS1)) |
|
278 + return bn_mul_mont_fpu(rp,ap,bp,np,n0,num); |
|
279 + } |
|
280 + return bn_mul_mont_int(rp,ap,bp,np,n0,num); |
|
281 } |
|
282 |
|
283 unsigned long _sparcv9_rdtick(void); |
|
284 @@ -36,11 +60,18 @@ |
|
285 unsigned long _sparcv9_vis1_instrument(void); |
|
286 void _sparcv9_vis2_probe(void); |
|
287 void _sparcv9_fmadd_probe(void); |
|
288 +unsigned long _sparcv9_rdcfr(void); |
|
289 +void _sparcv9_vis3_probe(void); |
|
290 +unsigned long _sparcv9_random(void); |
|
291 +#ifndef _BOOT |
|
292 +size_t _sparcv9_vis1_instrument_bus(unsigned int *,size_t); |
|
293 +size_t _sparcv9_vis1_instrument_bus2(unsigned int *,size_t,size_t); |
|
294 +#endif |
|
295 |
|
296 #ifndef _BOOT |
|
297 unsigned long OPENSSL_rdtsc(void) |
|
298 { |
|
299 - if (OPENSSL_sparcv9cap_P&SPARCV9_TICK_PRIVILEGED) |
|
300 + if (OPENSSL_sparcv9cap_P[0]&SPARCV9_TICK_PRIVILEGED) |
|
301 #if defined(__sun) && defined(__SVR4) |
|
302 return gethrtime(); |
|
303 #else |
|
304 @@ -49,6 +80,24 @@ |
|
305 else |
|
306 return _sparcv9_rdtick(); |
|
307 } |
|
308 + |
|
309 +size_t OPENSSL_instrument_bus(unsigned int *out,size_t cnt) |
|
310 + { |
|
311 + if ((OPENSSL_sparcv9cap_P[0]&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK)) == |
|
312 + SPARCV9_BLK) |
|
313 + return _sparcv9_vis1_instrument_bus(out,cnt); |
|
314 + else |
|
315 + return 0; |
|
316 + } |
|
317 + |
|
318 +size_t OPENSSL_instrument_bus2(unsigned int *out,size_t cnt,size_t max) |
|
319 + { |
|
320 + if ((OPENSSL_sparcv9cap_P[0]&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK)) == |
|
321 + SPARCV9_BLK) |
|
322 + return _sparcv9_vis1_instrument_bus2(out,cnt,max); |
|
323 + else |
|
324 + return 0; |
|
325 + } |
|
326 #endif |
|
327 |
|
328 #if defined(_BOOT) |
|
329 @@ -58,7 +107,7 @@ |
|
330 */ |
|
331 void OPENSSL_cpuid_setup(void) |
|
332 { |
|
333 - OPENSSL_sparcv9cap_P = SPARCV9_VIS1; |
|
334 + OPENSSL_sparcv9cap_P[0] = SPARCV9_VIS1; |
|
335 } |
|
336 |
|
337 #elif 0 && defined(__sun) && defined(__SVR4) |
|
338 @@ -85,11 +116,11 @@ |
|
339 if (!strcmp (name,"SUNW,UltraSPARC") || |
|
340 !strncmp(name,"SUNW,UltraSPARC-I",17)) /* covers II,III,IV */ |
|
341 { |
|
342 - OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU|SPARCV9_VIS1; |
|
343 + OPENSSL_sparcv9cap_P[0] |= SPARCV9_PREFER_FPU|SPARCV9_VIS1; |
|
344 |
|
345 /* %tick is privileged only on UltraSPARC-I/II, but not IIe */ |
|
346 if (name[14]!='\0' && name[17]!='\0' && name[18]!='\0') |
|
347 - OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; |
|
348 + OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; |
|
349 |
|
350 return DI_WALK_TERMINATE; |
|
351 } |
|
352 @@ -96,7 +127,7 @@ |
|
353 /* This is expected to catch remaining UltraSPARCs, such as T1 */ |
|
354 else if (!strncmp(name,"SUNW,UltraSPARC",15)) |
|
355 { |
|
356 - OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; |
|
357 + OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; |
|
358 |
|
359 return DI_WALK_TERMINATE; |
|
360 } |
|
361 @@ -115,7 +146,7 @@ |
|
362 |
|
363 if ((e=getenv("OPENSSL_sparcv9cap"))) |
|
364 { |
|
365 - OPENSSL_sparcv9cap_P=strtoul(e,NULL,0); |
|
366 + OPENSSL_sparcv9cap_P[0]=strtoul(e,NULL,0); |
|
367 return; |
|
368 } |
|
369 |
|
370 @@ -123,17 +154,17 @@ |
|
371 { |
|
372 if (strcmp(si,"sun4v")) |
|
373 /* FPU is preferred for all CPUs, but US-T1/2 */ |
|
374 - OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU; |
|
375 + OPENSSL_sparcv9cap_P[0] |= SPARCV9_PREFER_FPU; |
|
376 } |
|
377 |
|
378 if (sysinfo(SI_ISALIST,si,sizeof(si))>0) |
|
379 { |
|
380 if (strstr(si,"+vis")) |
|
381 - OPENSSL_sparcv9cap_P |= SPARCV9_VIS1; |
|
382 + OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS1|SPARCV9_BLK; |
|
383 if (strstr(si,"+vis2")) |
|
384 { |
|
385 - OPENSSL_sparcv9cap_P |= SPARCV9_VIS2; |
|
386 - OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; |
|
387 + OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS2; |
|
388 + OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; |
|
389 return; |
|
390 } |
|
391 } |
|
392 @@ -193,12 +224,14 @@ |
|
393 |
|
394 if ((e=getenv("OPENSSL_sparcv9cap"))) |
|
395 { |
|
396 - OPENSSL_sparcv9cap_P=strtoul(e,NULL,0); |
|
397 + OPENSSL_sparcv9cap_P[0]=strtoul(e,NULL,0); |
|
398 + if ((e=strchr(e,':'))) |
|
399 + OPENSSL_sparcv9cap_P[1]=strtoul(e+1,NULL,0); |
|
400 return; |
|
401 } |
|
402 |
|
403 /* Initial value, fits UltraSPARC-I&II... */ |
|
404 - OPENSSL_sparcv9cap_P = SPARCV9_PREFER_FPU|SPARCV9_TICK_PRIVILEGED; |
|
405 + OPENSSL_sparcv9cap_P[0] = SPARCV9_PREFER_FPU|SPARCV9_TICK_PRIVILEGED; |
|
406 |
|
407 sigfillset(&all_masked); |
|
408 sigdelset(&all_masked,SIGILL); |
|
409 @@ -221,20 +254,20 @@ |
|
410 if (sigsetjmp(common_jmp,1) == 0) |
|
411 { |
|
412 _sparcv9_rdtick(); |
|
413 - OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; |
|
414 + OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; |
|
415 } |
|
416 |
|
417 if (sigsetjmp(common_jmp,1) == 0) |
|
418 { |
|
419 _sparcv9_vis1_probe(); |
|
420 - OPENSSL_sparcv9cap_P |= SPARCV9_VIS1; |
|
421 + OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS1|SPARCV9_BLK; |
|
422 /* detect UltraSPARC-Tx, see sparccpud.S for details... */ |
|
423 if (_sparcv9_vis1_instrument() >= 12) |
|
424 - OPENSSL_sparcv9cap_P &= ~(SPARCV9_VIS1|SPARCV9_PREFER_FPU); |
|
425 + OPENSSL_sparcv9cap_P[0] &= ~(SPARCV9_VIS1|SPARCV9_PREFER_FPU); |
|
426 else |
|
427 { |
|
428 _sparcv9_vis2_probe(); |
|
429 - OPENSSL_sparcv9cap_P |= SPARCV9_VIS2; |
|
430 + OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS2; |
|
431 } |
|
432 } |
|
433 |
|
434 @@ -241,13 +274,53 @@ |
|
435 if (sigsetjmp(common_jmp,1) == 0) |
|
436 { |
|
437 _sparcv9_fmadd_probe(); |
|
438 - OPENSSL_sparcv9cap_P |= SPARCV9_FMADD; |
|
439 + OPENSSL_sparcv9cap_P[0] |= SPARCV9_FMADD; |
|
440 } |
|
441 |
|
442 + /* |
|
443 + * VIS3 flag is tested independently from VIS1, unlike VIS2 that is, |
|
444 + * because VIS3 defines even integer instructions. |
|
445 + */ |
|
446 + if (sigsetjmp(common_jmp,1) == 0) |
|
447 + { |
|
448 + _sparcv9_vis3_probe(); |
|
449 + OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS3; |
|
450 + } |
|
451 + |
|
452 + if (sigsetjmp(common_jmp,1) == 0) |
|
453 + { |
|
454 + (void)_sparcv9_random(); |
|
455 + OPENSSL_sparcv9cap_P[0] |= SPARCV9_RANDOM; |
|
456 + } |
|
457 + |
|
458 + /* |
|
459 + * In wait for better solution _sparcv9_rdcfr is masked by |
|
460 + * VIS3 flag, because it goes to uninterruptable endless |
|
461 + * loop on UltraSPARC II running Solaris. Things might be |
|
462 + * different on Linux... |
|
463 + */ |
|
464 + if ((OPENSSL_sparcv9cap_P[0]&SPARCV9_VIS3) && |
|
465 + sigsetjmp(common_jmp,1) == 0) |
|
466 + { |
|
467 + OPENSSL_sparcv9cap_P[1] = (unsigned int)_sparcv9_rdcfr(); |
|
468 + } |
|
469 + |
|
470 sigaction(SIGBUS,&bus_oact,NULL); |
|
471 sigaction(SIGILL,&ill_oact,NULL); |
|
472 |
|
473 sigprocmask(SIG_SETMASK,&oset,NULL); |
|
474 + |
|
475 + if (sizeof(size_t)==8) |
|
476 + OPENSSL_sparcv9cap_P[0] |= SPARCV9_64BIT_STACK; |
|
477 +#ifdef __linux |
|
478 + else |
|
479 + { |
|
480 + int ret = syscall(340); |
|
481 + |
|
482 + if (ret>=0 && ret&1) |
|
483 + OPENSSL_sparcv9cap_P[0] |= SPARCV9_64BIT_STACK; |
|
484 + } |
|
485 +#endif |
|
486 } |
|
487 |
|
488 #endif |
|
489 Index: crypto/md5/Makefile |
|
490 =================================================================== |
|
491 diff -ru openssl-1.0.1e/crypto/md5/Makefile openssl-1.0.1e/crypto/md5/Makefile |
|
492 --- openssl-1.0.1e/crypto/md5/Makefile 2011-05-24 17:02:24.000000000 -0700 |
|
493 +++ openssl-1.0.1e/crypto/md5/Makefile 2011-07-27 10:48:17.817470000 -0700 |
|
494 @@ -52,6 +52,9 @@ |
|
495 $(CC) $(CFLAGS) -E asm/md5-ia64.S | \ |
|
496 $(PERL) -ne 's/;\s+/;\n/g; print;' > $@ |
|
497 |
|
498 +md5-sparcv9.S: asm/md5-sparcv9.pl |
|
499 + $(PERL) asm/md5-sparcv9.pl $@ $(CFLAGS) |
|
500 + |
|
501 files: |
|
502 $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO |
|
503 |
|
504 Index: crypto/md5/md5_locl.h |
|
505 =================================================================== |
|
506 diff -ru openssl-1.0.1e/crypto/md5/md5_locl.h openssl-1.0.1e/crypto/md5/md5_locl.h |
|
507 --- openssl-1.0.1e/crypto/md5/md5_locl.h 2011-05-24 17:02:24.000000000 -0700 |
|
508 +++ openssl-1.0.1e/crypto/md5/md5_locl.h 2011-07-27 10:48:17.817470000 -0700 |
|
509 @@ -71,6 +71,8 @@ |
|
510 # define md5_block_data_order md5_block_asm_data_order |
|
511 # elif defined(__ia64) || defined(__ia64__) || defined(_M_IA64) |
|
512 # define md5_block_data_order md5_block_asm_data_order |
|
513 +# elif defined(__sparc) || defined(__sparc__) |
|
514 +# define md5_block_data_order md5_block_asm_data_order |
|
515 # endif |
|
516 #endif |
|
517 |
|
518 Index: crypto/sha/Makefile |
|
519 =================================================================== |
|
520 diff -ru openssl-1.0.1e/crypto/sha/Makefile openssl-1.0.1e/crypto/sha/Makefile |
|
521 --- openssl-1.0.1e/crypto/sha/Makefile 2011-05-24 17:02:24.000000000 -0700 |
|
522 +++ openssl-1.0.1e/crypto/sha/Makefile 2011-07-27 10:48:17.817470000 -0700 |
|
523 @@ -68,9 +68,9 @@ |
|
524 sha1-x86_64.s: asm/sha1-x86_64.pl; $(PERL) asm/sha1-x86_64.pl $(PERLASM_SCHEME) > $@ |
|
525 sha256-x86_64.s:asm/sha512-x86_64.pl; $(PERL) asm/sha512-x86_64.pl $(PERLASM_SCHEME) $@ |
|
526 sha512-x86_64.s:asm/sha512-x86_64.pl; $(PERL) asm/sha512-x86_64.pl $(PERLASM_SCHEME) $@ |
|
527 -sha1-sparcv9.s: asm/sha1-sparcv9.pl; $(PERL) asm/sha1-sparcv9.pl $@ $(CFLAGS) |
|
528 -sha256-sparcv9.s:asm/sha512-sparcv9.pl; $(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS) |
|
529 -sha512-sparcv9.s:asm/sha512-sparcv9.pl; $(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS) |
|
530 +sha1-sparcv9.S: asm/sha1-sparcv9.pl; $(PERL) asm/sha1-sparcv9.pl $@ $(CFLAGS) |
|
531 +sha256-sparcv9.S:asm/sha512-sparcv9.pl; $(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS) |
|
532 +sha512-sparcv9.S:asm/sha512-sparcv9.pl; $(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS) |
|
533 |
|
534 sha1-ppc.s: asm/sha1-ppc.pl; $(PERL) asm/sha1-ppc.pl $(PERLASM_SCHEME) $@ |
|
535 sha256-ppc.s: asm/sha512-ppc.pl; $(PERL) asm/sha512-ppc.pl $(PERLASM_SCHEME) $@ |
|
536 Index: crypto/sha/asm/sha1-sparcv9.pl |
|
537 =================================================================== |
|
538 diff -ru openssl-1.0.1e/crypto/sha/asm/sha1-sparcv9.pl openssl-1.0.1e/crypto/sha/asm/sha1-sparcv9.pl |
|
539 --- openssl-1.0.1e/crypto/sha/asm/sha1-sparcv9.pl 2011-05-24 17:02:24.000000000 -0700 |
|
540 +++ openssl-1.0.1e/crypto/sha/asm/sha1-sparcv9.pl 2011-07-27 10:48:17.817470000 -0700 |
|
541 @@ -5,6 +5,8 @@ |
|
542 # project. The module is, however, dual licensed under OpenSSL and |
|
543 # CRYPTOGAMS licenses depending on where you obtain it. For further |
|
544 # details see http://www.openssl.org/~appro/cryptogams/. |
|
545 +# |
|
546 +# Hardware SPARC T4 support by David S. Miller <[email protected]>. |
|
547 # ==================================================================== |
|
548 |
|
549 # Performance improvement is not really impressive on pre-T1 CPU: +8% |
|
550 @@ -18,6 +20,11 @@ |
|
551 # ensure scalability on UltraSPARC T1, or rather to avoid decay when |
|
552 # amount of active threads exceeds the number of physical cores. |
|
553 |
|
554 +# SPARC T4 SHA1 hardware achieves 3.72 cycles per byte, which is 3.1x |
|
555 +# faster than software. Multi-process benchmark saturates at 11x |
|
556 +# single-process result on 8-core processor, or ~9GBps per 2.85GHz |
|
557 +# socket. |
|
558 + |
|
559 $bits=32; |
|
560 for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); } |
|
561 if ($bits==64) { $bias=2047; $frame=192; } |
|
562 @@ -183,11 +190,93 @@ |
|
563 .register %g3,#scratch |
|
564 ___ |
|
565 $code.=<<___; |
|
566 +#include "sparc_arch.h" |
|
567 + |
|
568 .section ".text",#alloc,#execinstr |
|
569 |
|
570 +#ifdef __PIC__ |
|
571 +SPARC_PIC_THUNK(%g1) |
|
572 +#endif |
|
573 + |
|
574 .align 32 |
|
575 .globl sha1_block_data_order |
|
576 sha1_block_data_order: |
|
577 + SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) |
|
578 + ld [%g1+4],%g1 ! OPENSSL_sparcv9cap_P[1] |
|
579 + |
|
580 + andcc %g1, CFR_SHA1, %g0 |
|
581 + be .Lsoftware |
|
582 + nop |
|
583 + |
|
584 + ld [%o0 + 0x00], %f0 ! load context |
|
585 + ld [%o0 + 0x04], %f1 |
|
586 + ld [%o0 + 0x08], %f2 |
|
587 + andcc %o1, 0x7, %g0 |
|
588 + ld [%o0 + 0x0c], %f3 |
|
589 + bne,pn %icc, .Lhwunaligned |
|
590 + ld [%o0 + 0x10], %f4 |
|
591 + |
|
592 +.Lhw_loop: |
|
593 + ldd [%o1 + 0x00], %f8 |
|
594 + ldd [%o1 + 0x08], %f10 |
|
595 + ldd [%o1 + 0x10], %f12 |
|
596 + ldd [%o1 + 0x18], %f14 |
|
597 + ldd [%o1 + 0x20], %f16 |
|
598 + ldd [%o1 + 0x28], %f18 |
|
599 + ldd [%o1 + 0x30], %f20 |
|
600 + subcc %o2, 1, %o2 ! done yet? |
|
601 + ldd [%o1 + 0x38], %f22 |
|
602 + add %o1, 0x40, %o1 |
|
603 + |
|
604 + .word 0x81b02820 ! SHA1 |
|
605 + |
|
606 + bne,pt `$bits==64?"%xcc":"%icc"`, .Lhw_loop |
|
607 + nop |
|
608 + |
|
609 +.Lhwfinish: |
|
610 + st %f0, [%o0 + 0x00] ! store context |
|
611 + st %f1, [%o0 + 0x04] |
|
612 + st %f2, [%o0 + 0x08] |
|
613 + st %f3, [%o0 + 0x0c] |
|
614 + retl |
|
615 + st %f4, [%o0 + 0x10] |
|
616 + |
|
617 +.align 8 |
|
618 +.Lhwunaligned: |
|
619 + alignaddr %o1, %g0, %o1 |
|
620 + |
|
621 + ldd [%o1 + 0x00], %f10 |
|
622 +.Lhwunaligned_loop: |
|
623 + ldd [%o1 + 0x08], %f12 |
|
624 + ldd [%o1 + 0x10], %f14 |
|
625 + ldd [%o1 + 0x18], %f16 |
|
626 + ldd [%o1 + 0x20], %f18 |
|
627 + ldd [%o1 + 0x28], %f20 |
|
628 + ldd [%o1 + 0x30], %f22 |
|
629 + ldd [%o1 + 0x38], %f24 |
|
630 + subcc %o2, 1, %o2 ! done yet? |
|
631 + ldd [%o1 + 0x40], %f26 |
|
632 + add %o1, 0x40, %o1 |
|
633 + |
|
634 + faligndata %f10, %f12, %f8 |
|
635 + faligndata %f12, %f14, %f10 |
|
636 + faligndata %f14, %f16, %f12 |
|
637 + faligndata %f16, %f18, %f14 |
|
638 + faligndata %f18, %f20, %f16 |
|
639 + faligndata %f20, %f22, %f18 |
|
640 + faligndata %f22, %f24, %f20 |
|
641 + faligndata %f24, %f26, %f22 |
|
642 + |
|
643 + .word 0x81b02820 ! SHA1 |
|
644 + |
|
645 + bne,pt `$bits==64?"%xcc":"%icc"`, .Lhwunaligned_loop |
|
646 + for %f26, %f26, %f10 ! %f10=%f26 |
|
647 + |
|
648 + ba .Lhwfinish |
|
649 + nop |
|
650 + |
|
651 +.align 16 |
|
652 +.Lsoftware: |
|
653 save %sp,-$frame,%sp |
|
654 sllx $len,6,$len |
|
655 add $inp,$len,$len |
|
656 @@ -279,6 +368,62 @@ |
|
657 .align 4 |
|
658 ___ |
|
659 |
|
660 -$code =~ s/\`([^\`]*)\`/eval $1/gem; |
|
661 -print $code; |
|
662 +# Purpose of these subroutines is to explicitly encode VIS instructions, |
|
663 +# so that one can compile the module without having to specify VIS |
|
664 +# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a. |
|
665 +# Idea is to reserve for option to produce "universal" binary and let |
|
666 +# programmer detect if current CPU is VIS capable at run-time. |
|
667 +sub unvis { |
|
668 +my ($mnemonic,$rs1,$rs2,$rd)=@_; |
|
669 +my $ref,$opf; |
|
670 +my %visopf = ( "faligndata" => 0x048, |
|
671 + "for" => 0x07c ); |
|
672 + |
|
673 + $ref = "$mnemonic\t$rs1,$rs2,$rd"; |
|
674 + |
|
675 + if ($opf=$visopf{$mnemonic}) { |
|
676 + foreach ($rs1,$rs2,$rd) { |
|
677 + return $ref if (!/%f([0-9]{1,2})/); |
|
678 + $_=$1; |
|
679 + if ($1>=32) { |
|
680 + return $ref if ($1&1); |
|
681 + # re-encode for upper double register addressing |
|
682 + $_=($1|$1>>5)&31; |
|
683 + } |
|
684 + } |
|
685 + |
|
686 + return sprintf ".word\t0x%08x !%s", |
|
687 + 0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2, |
|
688 + $ref; |
|
689 + } else { |
|
690 + return $ref; |
|
691 + } |
|
692 +} |
|
693 +sub unalignaddr { |
|
694 +my ($mnemonic,$rs1,$rs2,$rd)=@_; |
|
695 +my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 ); |
|
696 +my $ref="$mnemonic\t$rs1,$rs2,$rd"; |
|
697 + |
|
698 + foreach ($rs1,$rs2,$rd) { |
|
699 + if (/%([goli])([0-7])/) { $_=$bias{$1}+$2; } |
|
700 + else { return $ref; } |
|
701 + } |
|
702 + return sprintf ".word\t0x%08x !%s", |
|
703 + 0x81b00300|$rd<<25|$rs1<<14|$rs2, |
|
704 + $ref; |
|
705 +} |
|
706 + |
|
707 +foreach (split("\n",$code)) { |
|
708 + s/\`([^\`]*)\`/eval $1/ge; |
|
709 + |
|
710 + s/\b(f[^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/ |
|
711 + &unvis($1,$2,$3,$4) |
|
712 + /ge; |
|
713 + s/\b(alignaddr)\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/ |
|
714 + &unalignaddr($1,$2,$3,$4) |
|
715 + /ge; |
|
716 + |
|
717 + print $_,"\n"; |
|
718 +} |
|
719 + |
|
720 close STDOUT; |
|
721 |
|
722 Index: crypto/sha/asm/sha512-sparcv9.pl |
|
723 =================================================================== |
|
724 diff -ru openssl-1.0.1e/crypto/sha/asm/sha512-sparcv9.pl openssl-1.0.1e/crypto/sha/asm/sha512-sparcv9.pl |
|
725 --- openssl-1.0.1e/crypto/sha/asm/sha512-sparcv9.pl 2011-05-24 17:02:24.000000000 -0700 |
|
726 +++ openssl-1.0.1e/crypto/sha/asm/sha512-sparcv9.pl 2011-07-27 10:48:17.817470000 -0700 |
|
727 @@ -5,6 +5,8 @@ |
|
728 # project. The module is, however, dual licensed under OpenSSL and |
|
729 # CRYPTOGAMS licenses depending on where you obtain it. For further |
|
730 # details see http://www.openssl.org/~appro/cryptogams/. |
|
731 +# |
|
732 +# Hardware SPARC T4 support by David S. Miller <[email protected]>. |
|
733 # ==================================================================== |
|
734 |
|
735 # SHA256 performance improvement over compiler generated code varies |
|
736 @@ -41,6 +43,12 @@ |
|
737 # loads are always slower than one 64-bit load. Once again this |
|
738 # is unlike pre-T1 UltraSPARC, where, if scheduled appropriately, |
|
739 # 2x32-bit loads can be as fast as 1x64-bit ones. |
|
740 +# |
|
741 +# SPARC T4 SHA256/512 hardware achieves 3.17/2.01 cycles per byte, |
|
742 +# which is 9.3x/11.1x faster than software. Multi-process benchmark |
|
743 +# saturates at 11.5x single-process result on 8-core processor, or |
|
744 +# ~11/16GBps per 2.85GHz socket. |
|
745 + |
|
746 |
|
747 $bits=32; |
|
748 for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); } |
|
749 @@ -386,6 +394,8 @@ |
|
750 .register %g3,#scratch |
|
751 ___ |
|
752 $code.=<<___; |
|
753 +#include "sparc_arch.h" |
|
754 + |
|
755 .section ".text",#alloc,#execinstr |
|
756 |
|
757 .align 64 |
|
758 @@ -457,8 +467,196 @@ |
|
759 } |
|
760 $code.=<<___; |
|
761 .size K${label},.-K${label} |
|
762 + |
|
763 +#ifdef __PIC__ |
|
764 +SPARC_PIC_THUNK(%g1) |
|
765 +#endif |
|
766 + |
|
767 .globl sha${label}_block_data_order |
|
768 +.align 32 |
|
769 sha${label}_block_data_order: |
|
770 + SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) |
|
771 + ld [%g1+4],%g1 ! OPENSSL_sparcv9cap_P[1] |
|
772 + |
|
773 + andcc %g1, CFR_SHA${label}, %g0 |
|
774 + be .Lsoftware |
|
775 + nop |
|
776 +___ |
|
777 +$code.=<<___ if ($SZ==8); # SHA512 |
|
778 + ldd [%o0 + 0x00], %f0 ! load context |
|
779 + ldd [%o0 + 0x08], %f2 |
|
780 + ldd [%o0 + 0x10], %f4 |
|
781 + ldd [%o0 + 0x18], %f6 |
|
782 + ldd [%o0 + 0x20], %f8 |
|
783 + ldd [%o0 + 0x28], %f10 |
|
784 + andcc %o1, 0x7, %g0 |
|
785 + ldd [%o0 + 0x30], %f12 |
|
786 + bne,pn %icc, .Lhwunaligned |
|
787 + ldd [%o0 + 0x38], %f14 |
|
788 + |
|
789 +.Lhwaligned_loop: |
|
790 + ldd [%o1 + 0x00], %f16 |
|
791 + ldd [%o1 + 0x08], %f18 |
|
792 + ldd [%o1 + 0x10], %f20 |
|
793 + ldd [%o1 + 0x18], %f22 |
|
794 + ldd [%o1 + 0x20], %f24 |
|
795 + ldd [%o1 + 0x28], %f26 |
|
796 + ldd [%o1 + 0x30], %f28 |
|
797 + ldd [%o1 + 0x38], %f30 |
|
798 + ldd [%o1 + 0x40], %f32 |
|
799 + ldd [%o1 + 0x48], %f34 |
|
800 + ldd [%o1 + 0x50], %f36 |
|
801 + ldd [%o1 + 0x58], %f38 |
|
802 + ldd [%o1 + 0x60], %f40 |
|
803 + ldd [%o1 + 0x68], %f42 |
|
804 + ldd [%o1 + 0x70], %f44 |
|
805 + subcc %o2, 1, %o2 ! done yet? |
|
806 + ldd [%o1 + 0x78], %f46 |
|
807 + add %o1, 0x80, %o1 |
|
808 + |
|
809 + .word 0x81b02860 ! SHA512 |
|
810 + |
|
811 + bne,pt `$bits==64?"%xcc":"%icc"`, .Lhwaligned_loop |
|
812 + nop |
|
813 + |
|
814 +.Lhwfinish: |
|
815 + std %f0, [%o0 + 0x00] ! store context |
|
816 + std %f2, [%o0 + 0x08] |
|
817 + std %f4, [%o0 + 0x10] |
|
818 + std %f6, [%o0 + 0x18] |
|
819 + std %f8, [%o0 + 0x20] |
|
820 + std %f10, [%o0 + 0x28] |
|
821 + std %f12, [%o0 + 0x30] |
|
822 + retl |
|
823 + std %f14, [%o0 + 0x38] |
|
824 + |
|
825 +.align 16 |
|
826 +.Lhwunaligned: |
|
827 + alignaddr %o1, %g0, %o1 |
|
828 + |
|
829 + ldd [%o1 + 0x00], %f18 |
|
830 +.Lhwunaligned_loop: |
|
831 + ldd [%o1 + 0x08], %f20 |
|
832 + ldd [%o1 + 0x10], %f22 |
|
833 + ldd [%o1 + 0x18], %f24 |
|
834 + ldd [%o1 + 0x20], %f26 |
|
835 + ldd [%o1 + 0x28], %f28 |
|
836 + ldd [%o1 + 0x30], %f30 |
|
837 + ldd [%o1 + 0x38], %f32 |
|
838 + ldd [%o1 + 0x40], %f34 |
|
839 + ldd [%o1 + 0x48], %f36 |
|
840 + ldd [%o1 + 0x50], %f38 |
|
841 + ldd [%o1 + 0x58], %f40 |
|
842 + ldd [%o1 + 0x60], %f42 |
|
843 + ldd [%o1 + 0x68], %f44 |
|
844 + ldd [%o1 + 0x70], %f46 |
|
845 + ldd [%o1 + 0x78], %f48 |
|
846 + subcc %o2, 1, %o2 ! done yet? |
|
847 + ldd [%o1 + 0x80], %f50 |
|
848 + add %o1, 0x80, %o1 |
|
849 + |
|
850 + faligndata %f18, %f20, %f16 |
|
851 + faligndata %f20, %f22, %f18 |
|
852 + faligndata %f22, %f24, %f20 |
|
853 + faligndata %f24, %f26, %f22 |
|
854 + faligndata %f26, %f28, %f24 |
|
855 + faligndata %f28, %f30, %f26 |
|
856 + faligndata %f30, %f32, %f28 |
|
857 + faligndata %f32, %f34, %f30 |
|
858 + faligndata %f34, %f36, %f32 |
|
859 + faligndata %f36, %f38, %f34 |
|
860 + faligndata %f38, %f40, %f36 |
|
861 + faligndata %f40, %f42, %f38 |
|
862 + faligndata %f42, %f44, %f40 |
|
863 + faligndata %f44, %f46, %f42 |
|
864 + faligndata %f46, %f48, %f44 |
|
865 + faligndata %f48, %f50, %f46 |
|
866 + |
|
867 + .word 0x81b02860 ! SHA512 |
|
868 + |
|
869 + bne,pt `$bits==64?"%xcc":"%icc"`, .Lhwunaligned_loop |
|
870 + for %f50, %f50, %f18 ! %f18=%f50 |
|
871 + |
|
872 + ba .Lhwfinish |
|
873 + nop |
|
874 +___ |
|
875 +$code.=<<___ if ($SZ==4); # SHA256 |
|
876 + ld [%o0 + 0x00], %f0 |
|
877 + ld [%o0 + 0x04], %f1 |
|
878 + ld [%o0 + 0x08], %f2 |
|
879 + ld [%o0 + 0x0c], %f3 |
|
880 + ld [%o0 + 0x10], %f4 |
|
881 + ld [%o0 + 0x14], %f5 |
|
882 + andcc %o1, 0x7, %g0 |
|
883 + ld [%o0 + 0x18], %f6 |
|
884 + bne,pn %icc, .Lhwunaligned |
|
885 + ld [%o0 + 0x1c], %f7 |
|
886 + |
|
887 +.Lhwloop: |
|
888 + ldd [%o1 + 0x00], %f8 |
|
889 + ldd [%o1 + 0x08], %f10 |
|
890 + ldd [%o1 + 0x10], %f12 |
|
891 + ldd [%o1 + 0x18], %f14 |
|
892 + ldd [%o1 + 0x20], %f16 |
|
893 + ldd [%o1 + 0x28], %f18 |
|
894 + ldd [%o1 + 0x30], %f20 |
|
895 + subcc %o2, 1, %o2 ! done yet? |
|
896 + ldd [%o1 + 0x38], %f22 |
|
897 + add %o1, 0x40, %o1 |
|
898 + |
|
899 + .word 0x81b02840 ! SHA256 |
|
900 + |
|
901 + bne,pt `$bits==64?"%xcc":"%icc"`, .Lhwloop |
|
902 + nop |
|
903 + |
|
904 +.Lhwfinish: |
|
905 + st %f0, [%o0 + 0x00] ! store context |
|
906 + st %f1, [%o0 + 0x04] |
|
907 + st %f2, [%o0 + 0x08] |
|
908 + st %f3, [%o0 + 0x0c] |
|
909 + st %f4, [%o0 + 0x10] |
|
910 + st %f5, [%o0 + 0x14] |
|
911 + st %f6, [%o0 + 0x18] |
|
912 + retl |
|
913 + st %f7, [%o0 + 0x1c] |
|
914 + |
|
915 +.align 8 |
|
916 +.Lhwunaligned: |
|
917 + alignaddr %o1, %g0, %o1 |
|
918 + |
|
919 + ldd [%o1 + 0x00], %f10 |
|
920 +.Lhwunaligned_loop: |
|
921 + ldd [%o1 + 0x08], %f12 |
|
922 + ldd [%o1 + 0x10], %f14 |
|
923 + ldd [%o1 + 0x18], %f16 |
|
924 + ldd [%o1 + 0x20], %f18 |
|
925 + ldd [%o1 + 0x28], %f20 |
|
926 + ldd [%o1 + 0x30], %f22 |
|
927 + ldd [%o1 + 0x38], %f24 |
|
928 + subcc %o2, 1, %o2 ! done yet? |
|
929 + ldd [%o1 + 0x40], %f26 |
|
930 + add %o1, 0x40, %o1 |
|
931 + |
|
932 + faligndata %f10, %f12, %f8 |
|
933 + faligndata %f12, %f14, %f10 |
|
934 + faligndata %f14, %f16, %f12 |
|
935 + faligndata %f16, %f18, %f14 |
|
936 + faligndata %f18, %f20, %f16 |
|
937 + faligndata %f20, %f22, %f18 |
|
938 + faligndata %f22, %f24, %f20 |
|
939 + faligndata %f24, %f26, %f22 |
|
940 + |
|
941 + .word 0x81b02840 ! SHA256 |
|
942 + |
|
943 + bne,pt `$bits==64?"%xcc":"%icc"`, .Lhwunaligned_loop |
|
944 + for %f26, %f26, %f10 ! %f10=%f26 |
|
945 + |
|
946 + ba .Lhwfinish |
|
947 + nop |
|
948 +___ |
|
949 +$code.=<<___; |
|
950 +.align 16 |
|
951 +.Lsoftware: |
|
952 save %sp,`-$frame-$locals`,%sp |
|
953 and $inp,`$align-1`,$tmp31 |
|
954 sllx $len,`log(16*$SZ)/log(2)`,$len |
|
955 @@ -589,6 +787,62 @@ |
|
956 .align 4 |
|
957 ___ |
|
958 |
|
959 -$code =~ s/\`([^\`]*)\`/eval $1/gem; |
|
960 -print $code; |
|
961 +# Purpose of these subroutines is to explicitly encode VIS instructions, |
|
962 +# so that one can compile the module without having to specify VIS |
|
963 +# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a. |
|
964 +# Idea is to reserve for option to produce "universal" binary and let |
|
965 +# programmer detect if current CPU is VIS capable at run-time. |
|
966 +sub unvis { |
|
967 +my ($mnemonic,$rs1,$rs2,$rd)=@_; |
|
968 +my $ref,$opf; |
|
969 +my %visopf = ( "faligndata" => 0x048, |
|
970 + "for" => 0x07c ); |
|
971 + |
|
972 + $ref = "$mnemonic\t$rs1,$rs2,$rd"; |
|
973 + |
|
974 + if ($opf=$visopf{$mnemonic}) { |
|
975 + foreach ($rs1,$rs2,$rd) { |
|
976 + return $ref if (!/%f([0-9]{1,2})/); |
|
977 + $_=$1; |
|
978 + if ($1>=32) { |
|
979 + return $ref if ($1&1); |
|
980 + # re-encode for upper double register addressing |
|
981 + $_=($1|$1>>5)&31; |
|
982 + } |
|
983 + } |
|
984 + |
|
985 + return sprintf ".word\t0x%08x !%s", |
|
986 + 0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2, |
|
987 + $ref; |
|
988 + } else { |
|
989 + return $ref; |
|
990 + } |
|
991 +} |
|
992 +sub unalignaddr { |
|
993 +my ($mnemonic,$rs1,$rs2,$rd)=@_; |
|
994 +my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 ); |
|
995 +my $ref="$mnemonic\t$rs1,$rs2,$rd"; |
|
996 + |
|
997 + foreach ($rs1,$rs2,$rd) { |
|
998 + if (/%([goli])([0-7])/) { $_=$bias{$1}+$2; } |
|
999 + else { return $ref; } |
|
1000 + } |
|
1001 + return sprintf ".word\t0x%08x !%s", |
|
1002 + 0x81b00300|$rd<<25|$rs1<<14|$rs2, |
|
1003 + $ref; |
|
1004 +} |
|
1005 + |
|
1006 +foreach (split("\n",$code)) { |
|
1007 + s/\`([^\`]*)\`/eval $1/ge; |
|
1008 + |
|
1009 + s/\b(f[^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/ |
|
1010 + &unvis($1,$2,$3,$4) |
|
1011 + /ge; |
|
1012 + s/\b(alignaddr)\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/ |
|
1013 + &unalignaddr($1,$2,$3,$4) |
|
1014 + /ge; |
|
1015 + |
|
1016 + print $_,"\n"; |
|
1017 +} |
|
1018 + |
|
1019 close STDOUT; |
|
1020 Index: crypto/des/Makefile |
|
1021 =================================================================== |
|
1022 diff -ru openssl-1.0.1e/crypto/des/Makefile.orig openssl-1.0.1e/crypto/des/Makefile |
|
1023 --- a/crypto/des/Makefile |
|
1024 +++ b/crypto/des/Makefile |
|
1025 @@ -61,6 +61,8 @@ des: des.o cbc3_enc.o lib |
|
1026 |
|
1027 des_enc-sparc.S: asm/des_enc.m4 |
|
1028 m4 -B 8192 asm/des_enc.m4 > des_enc-sparc.S |
|
1029 +dest4-sparcv9.s: asm/dest4-sparcv9.pl |
|
1030 + $(PERL) asm/dest4-sparcv9.pl $(CFLAGS) > $@ |
|
1031 |
|
1032 des-586.s: asm/des-586.pl ../perlasm/x86asm.pl ../perlasm/cbc.pl |
|
1033 $(PERL) asm/des-586.pl $(PERLASM_SCHEME) $(CFLAGS) > $@ |
|
1034 Index: crypto/evp/e_des.c |
|
1035 =================================================================== |
|
1036 diff -ru openssl-1.0.1e/crypto/evp/e_des.c.orig openssl-1.0.1e/crypto/evp/e_des.c |
|
1037 --- a/crypto/evp/e_des.c |
|
1038 +++ b/crypto/evp/e_des.c |
|
1039 @@ -65,6 +65,30 @@ |
|
1040 #include <openssl/des.h> |
|
1041 #include <openssl/rand.h> |
|
1042 |
|
1043 +typedef struct |
|
1044 + { |
|
1045 + union { double align; DES_key_schedule ks; } ks; |
|
1046 + union { |
|
1047 + void (*cbc)(const void *,void *,size_t,const void *,void *); |
|
1048 + } stream; |
|
1049 + } EVP_DES_KEY; |
|
1050 + |
|
1051 +#if defined(AES_ASM) && (defined(__sparc) || defined(__sparc__)) |
|
1052 +/* ---------^^^ this is not a typo, just a way to detect that |
|
1053 + * assembler support was in general requested... */ |
|
1054 +#include "sparc_arch.h" |
|
1055 + |
|
1056 +extern unsigned int OPENSSL_sparcv9cap_P[]; |
|
1057 + |
|
1058 +#define SPARC_DES_CAPABLE (OPENSSL_sparcv9cap_P[1] & CFR_DES) |
|
1059 + |
|
1060 +void des_t4_key_expand(const void *key, DES_key_schedule *ks); |
|
1061 +void des_t4_cbc_encrypt(const void *inp,void *out,size_t len, |
|
1062 + DES_key_schedule *ks,unsigned char iv[8]); |
|
1063 +void des_t4_cbc_decrypt(const void *inp,void *out,size_t len, |
|
1064 + DES_key_schedule *ks,unsigned char iv[8]); |
|
1065 +#endif |
|
1066 + |
|
1067 static int des_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
|
1068 const unsigned char *iv, int enc); |
|
1069 static int des_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr); |
|
1070 @@ -99,6 +123,13 @@ static int des_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, |
|
1071 static int des_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, |
|
1072 const unsigned char *in, size_t inl) |
|
1073 { |
|
1074 + EVP_DES_KEY *dat = (EVP_DES_KEY *)ctx->cipher_data; |
|
1075 + |
|
1076 + if (dat->stream.cbc) |
|
1077 + { |
|
1078 + (*dat->stream.cbc)(in,out,inl,&dat->ks.ks,ctx->iv); |
|
1079 + return 1; |
|
1080 + } |
|
1081 while(inl>=EVP_MAXCHUNK) |
|
1082 { |
|
1083 DES_ncbc_encrypt(in, out, (long)EVP_MAXCHUNK, ctx->cipher_data, |
|
1084 @@ -176,18 +207,18 @@ |
|
1085 return 1; |
|
1086 } |
|
1087 |
|
1088 -BLOCK_CIPHER_defs(des, DES_key_schedule, NID_des, 8, 8, 8, 64, |
|
1089 +BLOCK_CIPHER_defs(des, EVP_DES_KEY, NID_des, 8, 8, 8, 64, |
|
1090 EVP_CIPH_RAND_KEY, des_init_key, NULL, |
|
1091 EVP_CIPHER_set_asn1_iv, |
|
1092 EVP_CIPHER_get_asn1_iv, |
|
1093 des_ctrl) |
|
1094 |
|
1095 -BLOCK_CIPHER_def_cfb(des,DES_key_schedule,NID_des,8,8,1, |
|
1096 +BLOCK_CIPHER_def_cfb(des,EVP_DES_KEY,NID_des,8,8,1, |
|
1097 EVP_CIPH_RAND_KEY, des_init_key,NULL, |
|
1098 EVP_CIPHER_set_asn1_iv, |
|
1099 EVP_CIPHER_get_asn1_iv,des_ctrl) |
|
1100 |
|
1101 -BLOCK_CIPHER_def_cfb(des,DES_key_schedule,NID_des,8,8,8, |
|
1102 +BLOCK_CIPHER_def_cfb(des,EVP_DES_KEY,NID_des,8,8,8, |
|
1103 EVP_CIPH_RAND_KEY,des_init_key,NULL, |
|
1104 EVP_CIPHER_set_asn1_iv, |
|
1105 EVP_CIPHER_get_asn1_iv,des_ctrl) |
|
1106 @@ -196,8 +227,25 @@ static int des_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
|
1107 const unsigned char *iv, int enc) |
|
1108 { |
|
1109 DES_cblock *deskey = (DES_cblock *)key; |
|
1110 + EVP_DES_KEY *dat = (EVP_DES_KEY *)ctx->cipher_data; |
|
1111 + |
|
1112 + dat->stream.cbc = NULL; |
|
1113 +#if defined(SPARC_DES_CAPABLE) |
|
1114 + if (SPARC_DES_CAPABLE) |
|
1115 + { |
|
1116 + int mode = ctx->cipher->flags & EVP_CIPH_MODE; |
|
1117 + |
|
1118 + if (mode == EVP_CIPH_CBC_MODE) |
|
1119 + { |
|
1120 + des_t4_key_expand(key,&dat->ks.ks); |
|
1121 + dat->stream.cbc = enc ? des_t4_cbc_encrypt : |
|
1122 + des_t4_cbc_decrypt; |
|
1123 + return 1; |
|
1124 + } |
|
1125 + } |
|
1126 +#endif |
|
1127 #ifdef EVP_CHECK_DES_KEY |
|
1128 - if(DES_set_key_checked(deskey,ctx->cipher_data) != 0) |
|
1129 + if(DES_set_key_checked(deskey,dat->ks.ks) != 0) |
|
1130 return 0; |
|
1131 #else |
|
1132 DES_set_key_unchecked(deskey,ctx->cipher_data); |
|
1133 Index: crypto/evp/e_des3.c |
|
1134 =================================================================== |
|
1135 diff -ru openssl-1.0.1e/crypto/evp/e_des3.c.orig openssl-1.0.1e/crypto/evp/e_des3.c |
|
1136 --- a/crypto/evp/e_des3.c |
|
1137 +++ b/crypto/evp/e_des3.c |
|
1138 @@ -65,6 +65,33 @@ |
|
1139 #include <openssl/des.h> |
|
1140 #include <openssl/rand.h> |
|
1141 |
|
1142 +typedef struct |
|
1143 + { |
|
1144 + union { double align; DES_key_schedule ks[3]; } ks; |
|
1145 + union { |
|
1146 + void (*cbc)(const void *,void *,size_t,const void *,void *); |
|
1147 + } stream; |
|
1148 + } DES_EDE_KEY; |
|
1149 +#define ks1 ks.ks[0] |
|
1150 +#define ks2 ks.ks[1] |
|
1151 +#define ks3 ks.ks[2] |
|
1152 + |
|
1153 +#if defined(AES_ASM) && (defined(__sparc) || defined(__sparc__)) |
|
1154 +/* ---------^^^ this is not a typo, just a way to detect that |
|
1155 + * assembler support was in general requested... */ |
|
1156 +#include "sparc_arch.h" |
|
1157 + |
|
1158 +extern unsigned int OPENSSL_sparcv9cap_P[]; |
|
1159 + |
|
1160 +#define SPARC_DES_CAPABLE (OPENSSL_sparcv9cap_P[1] & CFR_DES) |
|
1161 + |
|
1162 +void des_t4_key_expand(const void *key, DES_key_schedule *ks); |
|
1163 +void des_t4_ede3_cbc_encrypt(const void *inp,void *out,size_t len, |
|
1164 + DES_key_schedule *ks,unsigned char iv[8]); |
|
1165 +void des_t4_ede3_cbc_decrypt(const void *inp,void *out,size_t len, |
|
1166 + DES_key_schedule *ks,unsigned char iv[8]); |
|
1167 +#endif |
|
1168 + |
|
1169 #ifndef OPENSSL_FIPS |
|
1170 |
|
1171 static int des_ede_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
|
1172 @@ -75,13 +100,6 @@ static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
|
1173 |
|
1174 static int des3_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr); |
|
1175 |
|
1176 -typedef struct |
|
1177 - { |
|
1178 - DES_key_schedule ks1;/* key schedule */ |
|
1179 - DES_key_schedule ks2;/* key schedule (for ede) */ |
|
1180 - DES_key_schedule ks3;/* key schedule (for ede3) */ |
|
1181 - } DES_EDE_KEY; |
|
1182 - |
|
1183 #define data(ctx) ((DES_EDE_KEY *)(ctx)->cipher_data) |
|
1184 |
|
1185 /* Because of various casts and different args can't use IMPLEMENT_BLOCK_CIPHER */ |
|
1186 @@ -121,6 +141,8 @@ static int des_ede_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, |
|
1187 static int des_ede_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, |
|
1188 const unsigned char *in, size_t inl) |
|
1189 { |
|
1190 + DES_EDE_KEY *dat = data(ctx); |
|
1191 + |
|
1192 #ifdef KSSL_DEBUG |
|
1193 { |
|
1194 int i; |
|
1195 @@ -132,10 +154,16 @@ |
|
1196 printf("\n"); |
|
1197 } |
|
1198 #endif /* KSSL_DEBUG */ |
|
1199 + if (dat->stream.cbc) |
|
1200 + { |
|
1201 + (*dat->stream.cbc)(in,out,inl,&dat->ks,ctx->iv); |
|
1202 + return 1; |
|
1203 + } |
|
1204 + |
|
1205 while (inl>=EVP_MAXCHUNK) |
|
1206 { |
|
1207 DES_ede3_cbc_encrypt(in, out, (long)EVP_MAXCHUNK, |
|
1208 - &data(ctx)->ks1, &data(ctx)->ks2, &data(ctx)->ks3, |
|
1209 + &dat->ks1, &dat->ks2, &dat->ks3, |
|
1210 (DES_cblock *)ctx->iv, ctx->encrypt); |
|
1211 inl-=EVP_MAXCHUNK; |
|
1212 in +=EVP_MAXCHUNK; |
|
1213 @@ -143,7 +169,7 @@ static int des_ede_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, |
|
1214 } |
|
1215 if (inl) |
|
1216 DES_ede3_cbc_encrypt(in, out, (long)inl, |
|
1217 - &data(ctx)->ks1, &data(ctx)->ks2, &data(ctx)->ks3, |
|
1218 + &dat->ks1, &dat->ks2, &dat->ks3, |
|
1219 (DES_cblock *)ctx->iv, ctx->encrypt); |
|
1220 return 1; |
|
1221 } |
|
1222 @@ -208,9 +234,8 @@ static int des_ede3_cfb8_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, |
|
1223 } |
|
1224 |
|
1225 BLOCK_CIPHER_defs(des_ede, DES_EDE_KEY, NID_des_ede, 8, 16, 8, 64, |
|
1226 - EVP_CIPH_RAND_KEY, des_ede_init_key, NULL, |
|
1227 - EVP_CIPHER_set_asn1_iv, |
|
1228 - EVP_CIPHER_get_asn1_iv, |
|
1229 + EVP_CIPH_RAND_KEY|EVP_CIPH_FLAG_DEFAULT_ASN1, |
|
1230 + des_ede_init_key, NULL, NULL, NULL, |
|
1231 des3_ctrl) |
|
1232 |
|
1233 #define des_ede3_cfb64_cipher des_ede_cfb64_cipher |
|
1234 @@ -219,37 +246,53 @@ |
|
1235 #define des_ede3_ecb_cipher des_ede_ecb_cipher |
|
1236 |
|
1237 BLOCK_CIPHER_defs(des_ede3, DES_EDE_KEY, NID_des_ede3, 8, 24, 8, 64, |
|
1238 - EVP_CIPH_RAND_KEY, des_ede3_init_key, NULL, |
|
1239 - EVP_CIPHER_set_asn1_iv, |
|
1240 - EVP_CIPHER_get_asn1_iv, |
|
1241 - des3_ctrl) |
|
1242 + EVP_CIPH_RAND_KEY|EVP_CIPH_FLAG_FIPS|EVP_CIPH_FLAG_DEFAULT_ASN1, |
|
1243 + des_ede3_init_key, NULL, NULL, NULL, |
|
1244 + des3_ctrl) |
|
1245 |
|
1246 BLOCK_CIPHER_def_cfb(des_ede3,DES_EDE_KEY,NID_des_ede3,24,8,1, |
|
1247 - EVP_CIPH_RAND_KEY, des_ede3_init_key,NULL, |
|
1248 - EVP_CIPHER_set_asn1_iv, |
|
1249 - EVP_CIPHER_get_asn1_iv, |
|
1250 - des3_ctrl) |
|
1251 + EVP_CIPH_RAND_KEY|EVP_CIPH_FLAG_FIPS|EVP_CIPH_FLAG_DEFAULT_ASN1, |
|
1252 + des_ede3_init_key, NULL, NULL, NULL, |
|
1253 + des3_ctrl) |
|
1254 |
|
1255 BLOCK_CIPHER_def_cfb(des_ede3,DES_EDE_KEY,NID_des_ede3,24,8,8, |
|
1256 - EVP_CIPH_RAND_KEY, des_ede3_init_key,NULL, |
|
1257 - EVP_CIPHER_set_asn1_iv, |
|
1258 - EVP_CIPHER_get_asn1_iv, |
|
1259 - des3_ctrl) |
|
1260 + EVP_CIPH_RAND_KEY|EVP_CIPH_FLAG_FIPS|EVP_CIPH_FLAG_DEFAULT_ASN1, |
|
1261 + des_ede3_init_key, NULL, NULL, NULL, |
|
1262 + des3_ctrl) |
|
1263 |
|
1264 static int des_ede_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
|
1265 const unsigned char *iv, int enc) |
|
1266 { |
|
1267 DES_cblock *deskey = (DES_cblock *)key; |
|
1268 + DES_EDE_KEY *dat = data(ctx); |
|
1269 + |
|
1270 + dat->stream.cbc = NULL; |
|
1271 +#if defined(SPARC_DES_CAPABLE) |
|
1272 + if (SPARC_DES_CAPABLE) |
|
1273 + { |
|
1274 + int mode = ctx->cipher->flags & EVP_CIPH_MODE; |
|
1275 + |
|
1276 + if (mode == EVP_CIPH_CBC_MODE) |
|
1277 + { |
|
1278 + des_t4_key_expand(&deskey[0],&dat->ks1); |
|
1279 + des_t4_key_expand(&deskey[1],&dat->ks2); |
|
1280 + memcpy(&dat->ks3,&dat->ks1,sizeof(dat->ks1)); |
|
1281 + dat->stream.cbc = enc ? des_t4_ede3_cbc_encrypt : |
|
1282 + des_t4_ede3_cbc_decrypt; |
|
1283 + return 1; |
|
1284 + } |
|
1285 + } |
|
1286 +#endif |
|
1287 #ifdef EVP_CHECK_DES_KEY |
|
1288 - if (DES_set_key_checked(&deskey[0],&data(ctx)->ks1) |
|
1289 - !! DES_set_key_checked(&deskey[1],&data(ctx)->ks2)) |
|
1290 + if (DES_set_key_checked(&deskey[0],&dat->ks1) |
|
1291 + !! DES_set_key_checked(&deskey[1],&dat->ks2)) |
|
1292 return 0; |
|
1293 #else |
|
1294 - DES_set_key_unchecked(&deskey[0],&data(ctx)->ks1); |
|
1295 - DES_set_key_unchecked(&deskey[1],&data(ctx)->ks2); |
|
1296 + DES_set_key_unchecked(&deskey[0],&dat->ks1); |
|
1297 + DES_set_key_unchecked(&deskey[1],&dat->ks2); |
|
1298 #endif |
|
1299 - memcpy(&data(ctx)->ks3,&data(ctx)->ks1, |
|
1300 - sizeof(data(ctx)->ks1)); |
|
1301 + memcpy(&dat->ks3,&dat->ks1, |
|
1302 + sizeof(dat->ks1)); |
|
1303 return 1; |
|
1304 } |
|
1305 |
|
1306 @@ -257,6 +300,8 @@ static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
|
1307 const unsigned char *iv, int enc) |
|
1308 { |
|
1309 DES_cblock *deskey = (DES_cblock *)key; |
|
1310 + DES_EDE_KEY *dat = data(ctx); |
|
1311 + |
|
1312 #ifdef KSSL_DEBUG |
|
1313 { |
|
1314 int i; |
|
1315 @@ -268,15 +313,32 @@ static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
|
1316 } |
|
1317 #endif /* KSSL_DEBUG */ |
|
1318 |
|
1319 + dat->stream.cbc = NULL; |
|
1320 +#if defined(SPARC_DES_CAPABLE) |
|
1321 + if (SPARC_DES_CAPABLE) |
|
1322 + { |
|
1323 + int mode = ctx->cipher->flags & EVP_CIPH_MODE; |
|
1324 + |
|
1325 + if (mode == EVP_CIPH_CBC_MODE) |
|
1326 + { |
|
1327 + des_t4_key_expand(&deskey[0],&dat->ks1); |
|
1328 + des_t4_key_expand(&deskey[1],&dat->ks2); |
|
1329 + des_t4_key_expand(&deskey[2],&dat->ks3); |
|
1330 + dat->stream.cbc = enc ? des_t4_ede3_cbc_encrypt : |
|
1331 + des_t4_ede3_cbc_decrypt; |
|
1332 + return 1; |
|
1333 + } |
|
1334 + } |
|
1335 +#endif |
|
1336 #ifdef EVP_CHECK_DES_KEY |
|
1337 - if (DES_set_key_checked(&deskey[0],&data(ctx)->ks1) |
|
1338 - || DES_set_key_checked(&deskey[1],&data(ctx)->ks2) |
|
1339 - || DES_set_key_checked(&deskey[2],&data(ctx)->ks3)) |
|
1340 + if (DES_set_key_checked(&deskey[0],&dat->ks1) |
|
1341 + || DES_set_key_checked(&deskey[1],&dat->ks2) |
|
1342 + || DES_set_key_checked(&deskey[2],&dat->ks3)) |
|
1343 return 0; |
|
1344 #else |
|
1345 - DES_set_key_unchecked(&deskey[0],&data(ctx)->ks1); |
|
1346 - DES_set_key_unchecked(&deskey[1],&data(ctx)->ks2); |
|
1347 - DES_set_key_unchecked(&deskey[2],&data(ctx)->ks3); |
|
1348 + DES_set_key_unchecked(&deskey[0],&dat->ks1); |
|
1349 + DES_set_key_unchecked(&deskey[1],&dat->ks2); |
|
1350 + DES_set_key_unchecked(&deskey[2],&dat->ks3); |
|
1351 #endif |
|
1352 return 1; |
|
1353 } |
|
1354 Index: openssl/crypto/bn/Makefile |
|
1355 =================================================================== |
|
1356 diff -ru openssl-1.0.1e/crypto/bn/Makefile openssl-1.0.1e/crypto/bn/Makefile.new |
|
1357 --- openssl-1.0.1e/crypto/bn/Makefile 2011-05-24 17:02:24.000000000 -0700 |
|
1358 +++ openssl-1.0.1e/crypto/bn/Makefile 2011-07-27 10:48:17.817470000 -0700 |
|
1359 @@ -77,6 +77,12 @@ |
|
1360 $(PERL) asm/sparcv9a-mont.pl $(CFLAGS) > $@ |
|
1361 sparcv9-mont.s: asm/sparcv9-mont.pl |
|
1362 $(PERL) asm/sparcv9-mont.pl $(CFLAGS) > $@ |
|
1363 +vis3-mont.s: asm/vis3-mont.pl |
|
1364 + $(PERL) asm/vis3-mont.pl $(CFLAGS) > $@ |
|
1365 +sparct4-mont.S: asm/sparct4-mont.pl |
|
1366 + $(PERL) asm/sparct4-mont.pl $(CFLAGS) > $@ |
|
1367 +sparcv9-gf2m.S: asm/sparcv9-gf2m.pl |
|
1368 + $(PERL) asm/sparcv9-gf2m.pl $(CFLAGS) > $@ |
|
1369 |
|
1370 bn-mips3.o: asm/mips3.s |
|
1371 @if [ "$(CC)" = "gcc" ]; then \ |
|
1372 Index: openssl/crypto/bn/bn_exp.c |
|
1373 =================================================================== |
|
1374 diff -ru openssl-1.0.1e/crypto/bn/bn_exp.c openssl-1.0.1e/crypto/bn/bn_exp.c.new |
|
1375 --- bn_exp.c 2011/10/29 19:25:13 1.38 |
|
1376 +++ bn_exp.c 2012/11/17 10:34:11 1.39 |
|
1377 @@ -123,8 +123,15 @@ |
|
1378 # ifndef alloca |
|
1379 # define alloca(s) __builtin_alloca((s)) |
|
1380 # endif |
|
1381 +#else |
|
1382 +#include <alloca.h> |
|
1383 #endif |
|
1384 |
|
1385 +#if defined(OPENSSL_BN_ASM_MONT) && defined(__sparc) |
|
1386 +# include "sparc_arch.h" |
|
1387 +extern unsigned int OPENSSL_sparcv9cap_P[]; |
|
1388 +#endif |
|
1389 + |
|
1390 /* maximum precomputation table size for *variable* sliding windows */ |
|
1391 #define TABLE_SIZE 32 |
|
1392 |
|
1393 @@ -467,7 +467,15 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, |
|
1394 wstart=bits-1; /* The top bit of the window */ |
|
1395 wend=0; /* The bottom bit of the window */ |
|
1396 |
|
1397 +#if 1 /* by Shay Gueron's suggestion */ |
|
1398 + j = mont->N.top; /* borrow j */ |
|
1399 + if (bn_wexpand(r,j) == NULL) goto err; |
|
1400 + r->d[0] = (0-m->d[0])&BN_MASK2; /* 2^(top*BN_BITS2) - m */ |
|
1401 + for(i=1;i<j;i++) r->d[i] = (~m->d[i])&BN_MASK2; |
|
1402 + r->top = j; |
|
1403 +#else |
|
1404 if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err; |
|
1405 +#endif |
|
1406 for (;;) |
|
1407 { |
|
1408 if (BN_is_bit_set(p,wstart) == 0) |
|
1409 @@ -519,6 +527,17 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, |
|
1410 start=0; |
|
1411 if (wstart < 0) break; |
|
1412 } |
|
1413 +#if defined(OPENSSL_BN_ASM_MONT) && (defined(__sparc__) || defined(__sparc)) |
|
1414 + if (OPENSSL_sparcv9cap_P[0]&(SPARCV9_VIS3|SPARCV9_PREFER_FPU)) |
|
1415 + { |
|
1416 + j = mont->N.top; /* borrow j */ |
|
1417 + val[0]->d[0] = 1; /* borrow val[0] */ |
|
1418 + for (i=1;i<j;i++) val[0]->d[i] = 0; |
|
1419 + val[0]->top = j; |
|
1420 + if (!BN_mod_mul_montgomery(rr,r,val[0],mont,ctx)) goto err; |
|
1421 + } |
|
1422 + else |
|
1423 +#endif |
|
1424 if (!BN_from_montgomery(rr,r,mont,ctx)) goto err; |
|
1425 ret=1; |
|
1426 err: |
|
1427 @@ -528,6 +547,28 @@ err: |
|
1428 return(ret); |
|
1429 } |
|
1430 |
|
1431 +#if defined(OPENSSL_BN_ASM_MONT) && (defined(__sparc__) || defined(__sparc)) |
|
1432 +static BN_ULONG bn_get_bits(const BIGNUM *a, int bitpos) |
|
1433 + { |
|
1434 + BN_ULONG ret=0; |
|
1435 + int wordpos; |
|
1436 + |
|
1437 + wordpos = bitpos/BN_BITS2; |
|
1438 + bitpos %= BN_BITS2; |
|
1439 + if (wordpos>=0 && wordpos < a->top) |
|
1440 + { |
|
1441 + ret = a->d[wordpos]&BN_MASK2; |
|
1442 + if (bitpos) |
|
1443 + { |
|
1444 + ret >>= bitpos; |
|
1445 + if (++wordpos < a->top) |
|
1446 + ret |= a->d[wordpos]<<(BN_BITS2-bitpos); |
|
1447 + } |
|
1448 + } |
|
1449 + |
|
1450 + return ret&BN_MASK2; |
|
1451 +} |
|
1452 +#endif |
|
1453 |
|
1454 /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout |
|
1455 * so that accessing any of these table values shows the same access pattern as far |
|
1456 @@ -587,6 +592,9 @@ |
|
1457 int powerbufLen = 0; |
|
1458 unsigned char *powerbuf=NULL; |
|
1459 BIGNUM tmp, am; |
|
1460 +#if defined(OPENSSL_BN_ASM_MONT) && defined(__sparc) |
|
1461 + unsigned int t4=0; |
|
1462 +#endif |
|
1463 |
|
1464 bn_check_top(a); |
|
1465 bn_check_top(p); |
|
1466 @@ -621,9 +629,18 @@ |
|
1467 |
|
1468 /* Get the window size to use with size of p. */ |
|
1469 window = BN_window_bits_for_ctime_exponent_size(bits); |
|
1470 +#if defined(OPENSSL_BN_ASM_MONT) && defined(__sparc) |
|
1471 + if (window>=5 && (top&15)==0 && top<=64 && |
|
1472 + (OPENSSL_sparcv9cap_P[1]&(CFR_MONTMUL|CFR_MONTSQR))== |
|
1473 + (CFR_MONTMUL|CFR_MONTSQR) && |
|
1474 + (t4=OPENSSL_sparcv9cap_P[0])) |
|
1475 + window=5; |
|
1476 + else |
|
1477 +#endif |
|
1478 #if defined(OPENSSL_BN_ASM_MONT5) |
|
1479 if (window==6 && bits<=1024) window=5; /* ~5% improvement of 2048-bit RSA sign */ |
|
1480 #endif |
|
1481 + (void)0; |
|
1482 |
|
1483 /* Allocate a buffer large enough to hold all of the pre-computed |
|
1484 * powers of am, am itself and tmp. |
|
1485 @@ -656,13 +715,13 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, |
|
1486 tmp.flags = am.flags = BN_FLG_STATIC_DATA; |
|
1487 |
|
1488 /* prepare a^0 in Montgomery domain */ |
|
1489 -#if 1 |
|
1490 - if (!BN_to_montgomery(&tmp,BN_value_one(),mont,ctx)) goto err; |
|
1491 -#else |
|
1492 +#if 1 /* by Shay Gueron's suggestion */ |
|
1493 tmp.d[0] = (0-m->d[0])&BN_MASK2; /* 2^(top*BN_BITS2) - m */ |
|
1494 for (i=1;i<top;i++) |
|
1495 tmp.d[i] = (~m->d[i])&BN_MASK2; |
|
1496 tmp.top = top; |
|
1497 +#else |
|
1498 + if (!BN_to_montgomery(&tmp,BN_value_one(),mont,ctx)) goto err; |
|
1499 #endif |
|
1500 |
|
1501 /* prepare a^1 in Montgomery domain */ |
|
1502 @@ -673,6 +690,121 @@ |
|
1503 } |
|
1504 else if (!BN_to_montgomery(&am,a,mont,ctx)) goto err; |
|
1505 |
|
1506 +#if defined(OPENSSL_BN_ASM_MONT) && defined(__sparc) |
|
1507 + if (t4) |
|
1508 + { |
|
1509 + typedef int (*bn_pwr5_mont_f)(BN_ULONG *tp,const BN_ULONG *np, |
|
1510 + const BN_ULONG *n0,const void *table,int power,int bits); |
|
1511 + int bn_pwr5_mont_t4_8(BN_ULONG *tp,const BN_ULONG *np, |
|
1512 + const BN_ULONG *n0,const void *table,int power,int bits); |
|
1513 + int bn_pwr5_mont_t4_16(BN_ULONG *tp,const BN_ULONG *np, |
|
1514 + const BN_ULONG *n0,const void *table,int power,int bits); |
|
1515 + int bn_pwr5_mont_t4_24(BN_ULONG *tp,const BN_ULONG *np, |
|
1516 + const BN_ULONG *n0,const void *table,int power,int bits); |
|
1517 + int bn_pwr5_mont_t4_32(BN_ULONG *tp,const BN_ULONG *np, |
|
1518 + const BN_ULONG *n0,const void *table,int power,int bits); |
|
1519 + static const bn_pwr5_mont_f pwr5_funcs[4] = { |
|
1520 + bn_pwr5_mont_t4_8, bn_pwr5_mont_t4_16, |
|
1521 + bn_pwr5_mont_t4_24, bn_pwr5_mont_t4_32 }; |
|
1522 + bn_pwr5_mont_f pwr5_worker = pwr5_funcs[top/16-1]; |
|
1523 + |
|
1524 + typedef int (*bn_mul_mont_f)(BN_ULONG *rp,const BN_ULONG *ap, |
|
1525 + const void *bp,const BN_ULONG *np,const BN_ULONG *n0); |
|
1526 + int bn_mul_mont_t4_8(BN_ULONG *rp,const BN_ULONG *ap, |
|
1527 + const void *bp,const BN_ULONG *np,const BN_ULONG *n0); |
|
1528 + int bn_mul_mont_t4_16(BN_ULONG *rp,const BN_ULONG *ap, |
|
1529 + const void *bp,const BN_ULONG *np,const BN_ULONG *n0); |
|
1530 + int bn_mul_mont_t4_24(BN_ULONG *rp,const BN_ULONG *ap, |
|
1531 + const void *bp,const BN_ULONG *np,const BN_ULONG *n0); |
|
1532 + int bn_mul_mont_t4_32(BN_ULONG *rp,const BN_ULONG *ap, |
|
1533 + const void *bp,const BN_ULONG *np,const BN_ULONG *n0); |
|
1534 + static const bn_mul_mont_f mul_funcs[4] = { |
|
1535 + bn_mul_mont_t4_8, bn_mul_mont_t4_16, |
|
1536 + bn_mul_mont_t4_24, bn_mul_mont_t4_32 }; |
|
1537 + bn_mul_mont_f mul_worker = mul_funcs[top/16-1]; |
|
1538 + |
|
1539 + void bn_mul_mont_vis3(BN_ULONG *rp,const BN_ULONG *ap, |
|
1540 + const void *bp,const BN_ULONG *np, |
|
1541 + const BN_ULONG *n0,int num); |
|
1542 + void bn_mul_mont_t4(BN_ULONG *rp,const BN_ULONG *ap, |
|
1543 + const void *bp,const BN_ULONG *np, |
|
1544 + const BN_ULONG *n0,int num); |
|
1545 + void bn_mul_mont_gather5_t4(BN_ULONG *rp,const BN_ULONG *ap, |
|
1546 + const void *table,const BN_ULONG *np, |
|
1547 + const BN_ULONG *n0,int num,int power); |
|
1548 + void bn_flip_n_scatter5_t4(const BN_ULONG *inp,size_t num, |
|
1549 + void *table,size_t power); |
|
1550 + void bn_gather5_t4(BN_ULONG *out,size_t num, |
|
1551 + void *table,size_t power); |
|
1552 + void bn_flip_t4(BN_ULONG *dst,BN_ULONG *src,size_t num); |
|
1553 + |
|
1554 + BN_ULONG *np=mont->N.d, *n0=mont->n0; |
|
1555 + int stride = 5*(6-(top/16-1)); /* multiple of 5, but less than 32 */ |
|
1556 + |
|
1557 + /* BN_to_montgomery can contaminate words above .top |
|
1558 + * [in BN_DEBUG[_DEBUG] build]... */ |
|
1559 + for (i=am.top; i<top; i++) am.d[i]=0; |
|
1560 + for (i=tmp.top; i<top; i++) tmp.d[i]=0; |
|
1561 + |
|
1562 + bn_flip_n_scatter5_t4(tmp.d,top,powerbuf,0); |
|
1563 + bn_flip_n_scatter5_t4(am.d,top,powerbuf,1); |
|
1564 + if (!(*mul_worker)(tmp.d,am.d,am.d,np,n0) && |
|
1565 + !(*mul_worker)(tmp.d,am.d,am.d,np,n0)) |
|
1566 + bn_mul_mont_vis3(tmp.d,am.d,am.d,np,n0,top); |
|
1567 + bn_flip_n_scatter5_t4(tmp.d,top,powerbuf,2); |
|
1568 + |
|
1569 + for (i=3; i<32; i++) |
|
1570 + { |
|
1571 + /* Calculate a^i = a^(i-1) * a */ |
|
1572 + if (!(*mul_worker)(tmp.d,tmp.d,am.d,np,n0) && |
|
1573 + !(*mul_worker)(tmp.d,tmp.d,am.d,np,n0)) |
|
1574 + bn_mul_mont_vis3(tmp.d,tmp.d,am.d,np,n0,top); |
|
1575 + bn_flip_n_scatter5_t4(tmp.d,top,powerbuf,i); |
|
1576 + } |
|
1577 + |
|
1578 + /* switch to 64-bit domain */ |
|
1579 + np = alloca(top*sizeof(BN_ULONG)); |
|
1580 + top /= 2; |
|
1581 + bn_flip_t4(np,mont->N.d,top); |
|
1582 + |
|
1583 + bits--; |
|
1584 + for (wvalue=0, i=bits%5; i>=0; i--,bits--) |
|
1585 + wvalue = (wvalue<<1)+BN_is_bit_set(p,bits); |
|
1586 + bn_gather5_t4(tmp.d,top,powerbuf,wvalue); |
|
1587 + |
|
1588 + /* Scan the exponent one window at a time starting from the most |
|
1589 + * significant bits. |
|
1590 + */ |
|
1591 + while (bits >= 0) |
|
1592 + { |
|
1593 + if (bits < stride) stride = bits+1; |
|
1594 + bits -= stride; |
|
1595 + wvalue = (bn_get_bits(p,bits+1)); |
|
1596 + |
|
1597 + if ((*pwr5_worker)(tmp.d,np,n0,powerbuf,wvalue,stride)) continue; |
|
1598 + /* retry once and fall back */ |
|
1599 + if ((*pwr5_worker)(tmp.d,np,n0,powerbuf,wvalue,stride)) continue; |
|
1600 + |
|
1601 + bits += stride-5; |
|
1602 + wvalue >>= stride-5; |
|
1603 + wvalue &= 31; |
|
1604 + bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top); |
|
1605 + bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top); |
|
1606 + bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top); |
|
1607 + bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top); |
|
1608 + bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top); |
|
1609 + bn_mul_mont_gather5_t4(tmp.d,tmp.d,powerbuf,np,n0,top,wvalue); |
|
1610 + } |
|
1611 + |
|
1612 + bn_flip_t4(tmp.d,tmp.d,top); |
|
1613 + top *= 2; |
|
1614 + /* back to 32-bit domain */ |
|
1615 + tmp.top=top; |
|
1616 + bn_correct_top(&tmp); |
|
1617 + OPENSSL_cleanse(np,top*sizeof(BN_ULONG)); |
|
1618 + } |
|
1619 + else |
|
1620 +#endif |
|
1621 #if defined(OPENSSL_BN_ASM_MONT5) |
|
1622 /* This optimization uses ideas from http://eprint.iacr.org/2011/239, |
|
1623 * specifically optimization of cache-timing attack countermeasures |
|
1624 @@ -816,6 +990,15 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, |
|
1625 } |
|
1626 |
|
1627 /* Convert the final result from montgomery to standard format */ |
|
1628 +#if defined(OPENSSL_BN_ASM_MONT) && (defined(__sparc__) || defined(__sparc)) |
|
1629 + if (OPENSSL_sparcv9cap_P[0]&(SPARCV9_VIS3|SPARCV9_PREFER_FPU)) |
|
1630 + { |
|
1631 + am.d[0] = 1; /* borrow am */ |
|
1632 + for (i=1;i<top;i++) am.d[i] = 0; |
|
1633 + if (!BN_mod_mul_montgomery(rr,&tmp,&am,mont,ctx)) goto err; |
|
1634 + } |
|
1635 + else |
|
1636 +#endif |
|
1637 if (!BN_from_montgomery(rr,&tmp,mont,ctx)) goto err; |
|
1638 ret=1; |
|
1639 err: |
|
1640 Index: openssl/apps/speed.c |
|
1641 =================================================================== |
|
1642 diff -ru openssl-1.0.1e/apps/spped.c openssl-1.0.1e/apps/speed.c |
|
1643 --- openssl-1.0.1e/apps/speed.c 2011-05-24 17:02:24.000000000 -0700 |
|
1644 +++ openssl-1.0.1e/apps/spped.c 2011-07-27 10:48:17.817470000 -0700 |
|
1645 @@ -1551,7 +1551,7 @@ |
|
1646 print_message(names[D_MD5],c[D_MD5][j],lengths[j]); |
|
1647 Time_F(START); |
|
1648 for (count=0,run=1; COND(c[D_MD5][j]); count++) |
|
1649 - EVP_Digest(&(buf[0]),(unsigned long)lengths[j],&(md5[0]),NULL,EVP_get_digestbyname("md5"),NULL); |
|
1650 + MD5(buf,lengths[j],md5); |
|
1651 d=Time_F(STOP); |
|
1652 print_result(D_MD5,j,count,d); |
|
1653 } |
|
1654 @@ -1591,7 +1591,7 @@ |
|
1655 print_message(names[D_SHA1],c[D_SHA1][j],lengths[j]); |
|
1656 Time_F(START); |
|
1657 for (count=0,run=1; COND(c[D_SHA1][j]); count++) |
|
1658 - EVP_Digest(buf,(unsigned long)lengths[j],&(sha[0]),NULL,EVP_sha1(),NULL); |
|
1659 + SHA1(buf,lengths[j],sha); |
|
1660 d=Time_F(STOP); |
|
1661 print_result(D_SHA1,j,count,d); |
|
1662 } |
|
1663 Index: openssl/crypto/aes/Makefile |
|
1664 =================================================================== |
|
1665 --- Makefile Thu May 2 13:42:37 2013 |
|
1666 +++ Makefile.orig Thu May 2 13:41:51 2013 |
|
1667 @@ -69,6 +69,9 @@ |
|
1668 aes-sparcv9.s: asm/aes-sparcv9.pl |
|
1669 $(PERL) asm/aes-sparcv9.pl $(CFLAGS) > $@ |
|
1670 |
|
1671 +aest4-sparcv9.s: asm/aest4-sparcv9.pl |
|
1672 + $(PERL) asm/aest4-sparcv9.pl $(CFLAGS) > $@ |
|
1673 + |
|
1674 aes-ppc.s: asm/aes-ppc.pl |
|
1675 $(PERL) asm/aes-ppc.pl $(PERLASM_SCHEME) $@ |
|
1676 |
|
1677 Index: openssl/crypto/evp/e_aes.c |
|
1678 =================================================================== |
|
1679 --- e_aes.c Mon Feb 11 07:26:04 2013 |
|
1680 +++ e_aes.c.56 Thu May 2 14:26:35 2013 |
|
1681 @@ -56,13 +58,12 @@ |
|
1682 #include <assert.h> |
|
1683 #include <openssl/aes.h> |
|
1684 #include "evp_locl.h" |
|
1685 -#ifndef OPENSSL_FIPS |
|
1686 #include "modes_lcl.h" |
|
1687 #include <openssl/rand.h> |
|
1688 |
|
1689 typedef struct |
|
1690 { |
|
1691 - AES_KEY ks; |
|
1692 + union { double align; AES_KEY ks; } ks; |
|
1693 block128_f block; |
|
1694 union { |
|
1695 cbc128_f cbc; |
|
1696 @@ -72,7 +73,7 @@ |
|
1697 |
|
1698 typedef struct |
|
1699 { |
|
1700 - AES_KEY ks; /* AES key schedule to use */ |
|
1701 + union { double align; AES_KEY ks; } ks; /* AES key schedule to use */ |
|
1702 int key_set; /* Set if key initialised */ |
|
1703 int iv_set; /* Set if an iv is set */ |
|
1704 GCM128_CONTEXT gcm; |
|
1705 @@ -86,7 +87,7 @@ |
|
1706 |
|
1707 typedef struct |
|
1708 { |
|
1709 - AES_KEY ks1, ks2; /* AES key schedules to use */ |
|
1710 + union { double align; AES_KEY ks; } ks1, ks2; /* AES key schedules to use */ |
|
1711 XTS128_CONTEXT xts; |
|
1712 void (*stream)(const unsigned char *in, |
|
1713 unsigned char *out, size_t length, |
|
1714 @@ -96,7 +97,7 @@ |
|
1715 |
|
1716 typedef struct |
|
1717 { |
|
1718 - AES_KEY ks; /* AES key schedule to use */ |
|
1719 + union { double align; AES_KEY ks; } ks; /* AES key schedule to use */ |
|
1720 int key_set; /* Set if key initialised */ |
|
1721 int iv_set; /* Set if an iv is set */ |
|
1722 int tag_set; /* Set if tag is valid */ |
|
1723 @@ -160,7 +161,7 @@ |
|
1724 defined(_M_AMD64) || defined(_M_X64) || \ |
|
1725 defined(__INTEL__) ) |
|
1726 |
|
1727 -extern unsigned int OPENSSL_ia32cap_P[2]; |
|
1728 +extern unsigned int OPENSSL_ia32cap_P[]; |
|
1729 |
|
1730 #ifdef VPAES_ASM |
|
1731 #define VPAES_CAPABLE (OPENSSL_ia32cap_P[1]&(1<<(41-32))) |
|
1732 @@ -310,7 +311,7 @@ |
|
1733 return 1; |
|
1734 if (key) |
|
1735 { |
|
1736 - aesni_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks); |
|
1737 + aesni_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks); |
|
1738 CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, |
|
1739 (block128_f)aesni_encrypt); |
|
1740 gctx->ctr = (ctr128_f)aesni_ctr32_encrypt_blocks; |
|
1741 @@ -355,19 +356,19 @@ |
|
1742 /* key_len is two AES keys */ |
|
1743 if (enc) |
|
1744 { |
|
1745 - aesni_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1); |
|
1746 + aesni_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); |
|
1747 xctx->xts.block1 = (block128_f)aesni_encrypt; |
|
1748 xctx->stream = aesni_xts_encrypt; |
|
1749 } |
|
1750 else |
|
1751 { |
|
1752 - aesni_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1); |
|
1753 + aesni_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); |
|
1754 xctx->xts.block1 = (block128_f)aesni_decrypt; |
|
1755 xctx->stream = aesni_xts_decrypt; |
|
1756 } |
|
1757 |
|
1758 aesni_set_encrypt_key(key + ctx->key_len/2, |
|
1759 - ctx->key_len * 4, &xctx->ks2); |
|
1760 + ctx->key_len * 4, &xctx->ks2.ks); |
|
1761 xctx->xts.block2 = (block128_f)aesni_encrypt; |
|
1762 |
|
1763 xctx->xts.key1 = &xctx->ks1; |
|
1764 @@ -394,7 +395,7 @@ |
|
1765 return 1; |
|
1766 if (key) |
|
1767 { |
|
1768 - aesni_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks); |
|
1769 + aesni_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks); |
|
1770 CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, |
|
1771 &cctx->ks, (block128_f)aesni_encrypt); |
|
1772 cctx->str = enc?(ccm128_f)aesni_ccm64_encrypt_blocks : |
|
1773 @@ -456,6 +457,379 @@ |
|
1774 const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \ |
|
1775 { return AESNI_CAPABLE?&aesni_##keylen##_##mode:&aes_##keylen##_##mode; } |
|
1776 |
|
1777 +#elif defined(AES_ASM) && (defined(__sparc) || defined(__sparc__)) |
|
1778 + |
|
1779 +#include "sparc_arch.h" |
|
1780 + |
|
1781 +extern unsigned int OPENSSL_sparcv9cap_P[]; |
|
1782 + |
|
1783 +#define SPARC_AES_CAPABLE (OPENSSL_sparcv9cap_P[1] & CFR_AES) |
|
1784 + |
|
1785 +void aes_t4_set_encrypt_key (const unsigned char *key, int bits, |
|
1786 + AES_KEY *ks); |
|
1787 +void aes_t4_set_decrypt_key (const unsigned char *key, int bits, |
|
1788 + AES_KEY *ks); |
|
1789 +void aes_t4_encrypt (const unsigned char *in, unsigned char *out, |
|
1790 + const AES_KEY *key); |
|
1791 +void aes_t4_decrypt (const unsigned char *in, unsigned char *out, |
|
1792 + const AES_KEY *key); |
|
1793 +/* |
|
1794 + * Key-length specific subroutines were chosen for following reason. |
|
1795 + * Each SPARC T4 core can execute up to 8 threads which share core's |
|
1796 + * resources. Loading as much key material to registers allows to |
|
1797 + * minimize references to shared memory interface, as well as amount |
|
1798 + * of instructions in inner loops [much needed on T4]. But then having |
|
1799 + * non-key-length specific routines would require conditional branches |
|
1800 + * either in inner loops or on subroutines' entries. Former is hardly |
|
1801 + * acceptable, while latter means code size increase to size occupied |
|
1802 + * by multiple key-length specfic subroutines, so why fight? |
|
1803 + */ |
|
1804 +void aes128_t4_cbc_encrypt (const unsigned char *in, unsigned char *out, |
|
1805 + size_t len, const AES_KEY *key, |
|
1806 + unsigned char *ivec); |
|
1807 +void aes128_t4_cbc_decrypt (const unsigned char *in, unsigned char *out, |
|
1808 + size_t len, const AES_KEY *key, |
|
1809 + unsigned char *ivec); |
|
1810 +void aes192_t4_cbc_encrypt (const unsigned char *in, unsigned char *out, |
|
1811 + size_t len, const AES_KEY *key, |
|
1812 + unsigned char *ivec); |
|
1813 +void aes192_t4_cbc_decrypt (const unsigned char *in, unsigned char *out, |
|
1814 + size_t len, const AES_KEY *key, |
|
1815 + unsigned char *ivec); |
|
1816 +void aes256_t4_cbc_encrypt (const unsigned char *in, unsigned char *out, |
|
1817 + size_t len, const AES_KEY *key, |
|
1818 + unsigned char *ivec); |
|
1819 +void aes256_t4_cbc_decrypt (const unsigned char *in, unsigned char *out, |
|
1820 + size_t len, const AES_KEY *key, |
|
1821 + unsigned char *ivec); |
|
1822 +void aes128_t4_ctr32_encrypt (const unsigned char *in, unsigned char *out, |
|
1823 + size_t blocks, const AES_KEY *key, |
|
1824 + unsigned char *ivec); |
|
1825 +void aes192_t4_ctr32_encrypt (const unsigned char *in, unsigned char *out, |
|
1826 + size_t blocks, const AES_KEY *key, |
|
1827 + unsigned char *ivec); |
|
1828 +void aes256_t4_ctr32_encrypt (const unsigned char *in, unsigned char *out, |
|
1829 + size_t blocks, const AES_KEY *key, |
|
1830 + unsigned char *ivec); |
|
1831 + |
|
1832 +static int aes_t4_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
|
1833 + const unsigned char *iv, int enc) |
|
1834 + { |
|
1835 + int ret, mode, bits; |
|
1836 + EVP_AES_KEY *dat = (EVP_AES_KEY *)ctx->cipher_data; |
|
1837 + |
|
1838 + mode = ctx->cipher->flags & EVP_CIPH_MODE; |
|
1839 + bits = ctx->key_len*8; |
|
1840 + if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE) |
|
1841 + && !enc) |
|
1842 + { |
|
1843 + ret = 0; |
|
1844 + aes_t4_set_decrypt_key(key, bits, ctx->cipher_data); |
|
1845 + dat->block = (block128_f)aes_t4_decrypt; |
|
1846 + switch (bits) { |
|
1847 + case 128: |
|
1848 + dat->stream.cbc = mode==EVP_CIPH_CBC_MODE ? |
|
1849 + (cbc128_f)aes128_t4_cbc_decrypt : |
|
1850 + NULL; |
|
1851 + break; |
|
1852 + case 192: |
|
1853 + dat->stream.cbc = mode==EVP_CIPH_CBC_MODE ? |
|
1854 + (cbc128_f)aes192_t4_cbc_decrypt : |
|
1855 + NULL; |
|
1856 + break; |
|
1857 + case 256: |
|
1858 + dat->stream.cbc = mode==EVP_CIPH_CBC_MODE ? |
|
1859 + (cbc128_f)aes256_t4_cbc_decrypt : |
|
1860 + NULL; |
|
1861 + break; |
|
1862 + default: |
|
1863 + ret = -1; |
|
1864 + } |
|
1865 + } |
|
1866 + else { |
|
1867 + ret = 0; |
|
1868 + aes_t4_set_encrypt_key(key, bits, ctx->cipher_data); |
|
1869 + dat->block = (block128_f)aes_t4_encrypt; |
|
1870 + switch (bits) { |
|
1871 + case 128: |
|
1872 + if (mode==EVP_CIPH_CBC_MODE) |
|
1873 + dat->stream.cbc = (cbc128_f)aes128_t4_cbc_encrypt; |
|
1874 + else if (mode==EVP_CIPH_CTR_MODE) |
|
1875 + dat->stream.ctr = (ctr128_f)aes128_t4_ctr32_encrypt; |
|
1876 + else |
|
1877 + dat->stream.cbc = NULL; |
|
1878 + break; |
|
1879 + case 192: |
|
1880 + if (mode==EVP_CIPH_CBC_MODE) |
|
1881 + dat->stream.cbc = (cbc128_f)aes192_t4_cbc_encrypt; |
|
1882 + else if (mode==EVP_CIPH_CTR_MODE) |
|
1883 + dat->stream.ctr = (ctr128_f)aes192_t4_ctr32_encrypt; |
|
1884 + else |
|
1885 + dat->stream.cbc = NULL; |
|
1886 + break; |
|
1887 + case 256: |
|
1888 + if (mode==EVP_CIPH_CBC_MODE) |
|
1889 + dat->stream.cbc = (cbc128_f)aes256_t4_cbc_encrypt; |
|
1890 + else if (mode==EVP_CIPH_CTR_MODE) |
|
1891 + dat->stream.ctr = (ctr128_f)aes256_t4_ctr32_encrypt; |
|
1892 + else |
|
1893 + dat->stream.cbc = NULL; |
|
1894 + break; |
|
1895 + default: |
|
1896 + ret = -1; |
|
1897 + } |
|
1898 + } |
|
1899 + |
|
1900 + if(ret < 0) |
|
1901 + { |
|
1902 + EVPerr(EVP_F_AES_T4_INIT_KEY,EVP_R_AES_KEY_SETUP_FAILED); |
|
1903 + return 0; |
|
1904 + } |
|
1905 + |
|
1906 + return 1; |
|
1907 + } |
|
1908 + |
|
1909 +#define aes_t4_cbc_cipher aes_cbc_cipher |
|
1910 +static int aes_t4_cbc_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out, |
|
1911 + const unsigned char *in, size_t len); |
|
1912 + |
|
1913 +#define aes_t4_ecb_cipher aes_ecb_cipher |
|
1914 +static int aes_t4_ecb_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out, |
|
1915 + const unsigned char *in, size_t len); |
|
1916 + |
|
1917 +#define aes_t4_ofb_cipher aes_ofb_cipher |
|
1918 +static int aes_t4_ofb_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out, |
|
1919 + const unsigned char *in,size_t len); |
|
1920 + |
|
1921 +#define aes_t4_cfb_cipher aes_cfb_cipher |
|
1922 +static int aes_t4_cfb_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out, |
|
1923 + const unsigned char *in,size_t len); |
|
1924 + |
|
1925 +#define aes_t4_cfb8_cipher aes_cfb8_cipher |
|
1926 +static int aes_t4_cfb8_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out, |
|
1927 + const unsigned char *in,size_t len); |
|
1928 + |
|
1929 +#define aes_t4_cfb1_cipher aes_cfb1_cipher |
|
1930 +static int aes_t4_cfb1_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out, |
|
1931 + const unsigned char *in,size_t len); |
|
1932 + |
|
1933 +#define aes_t4_ctr_cipher aes_ctr_cipher |
|
1934 +static int aes_t4_ctr_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, |
|
1935 + const unsigned char *in, size_t len); |
|
1936 + |
|
1937 +static int aes_t4_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
|
1938 + const unsigned char *iv, int enc) |
|
1939 + { |
|
1940 + EVP_AES_GCM_CTX *gctx = ctx->cipher_data; |
|
1941 + if (!iv && !key) |
|
1942 + return 1; |
|
1943 + if (key) |
|
1944 + { |
|
1945 + int bits = ctx->key_len * 8; |
|
1946 + aes_t4_set_encrypt_key(key, bits, &gctx->ks.ks); |
|
1947 + CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, |
|
1948 + (block128_f)aes_t4_encrypt); |
|
1949 + switch (bits) { |
|
1950 + case 128: |
|
1951 + gctx->ctr = (ctr128_f)aes128_t4_ctr32_encrypt; |
|
1952 + break; |
|
1953 + case 192: |
|
1954 + gctx->ctr = (ctr128_f)aes192_t4_ctr32_encrypt; |
|
1955 + break; |
|
1956 + case 256: |
|
1957 + gctx->ctr = (ctr128_f)aes256_t4_ctr32_encrypt; |
|
1958 + break; |
|
1959 + default: |
|
1960 + return 0; |
|
1961 + } |
|
1962 + /* If we have an iv can set it directly, otherwise use |
|
1963 + * saved IV. |
|
1964 + */ |
|
1965 + if (iv == NULL && gctx->iv_set) |
|
1966 + iv = gctx->iv; |
|
1967 + if (iv) |
|
1968 + { |
|
1969 + CRYPTO_gcm128_setiv(&gctx->gcm, iv, gctx->ivlen); |
|
1970 + gctx->iv_set = 1; |
|
1971 + } |
|
1972 + gctx->key_set = 1; |
|
1973 + } |
|
1974 + else |
|
1975 + { |
|
1976 + /* If key set use IV, otherwise copy */ |
|
1977 + if (gctx->key_set) |
|
1978 + CRYPTO_gcm128_setiv(&gctx->gcm, iv, gctx->ivlen); |
|
1979 + else |
|
1980 + memcpy(gctx->iv, iv, gctx->ivlen); |
|
1981 + gctx->iv_set = 1; |
|
1982 + gctx->iv_gen = 0; |
|
1983 + } |
|
1984 + return 1; |
|
1985 + } |
|
1986 + |
|
1987 +#define aes_t4_gcm_cipher aes_gcm_cipher |
|
1988 +static int aes_t4_gcm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, |
|
1989 + const unsigned char *in, size_t len); |
|
1990 + |
|
1991 +static int aes_t4_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
|
1992 + const unsigned char *iv, int enc) |
|
1993 + { |
|
1994 + EVP_AES_XTS_CTX *xctx = ctx->cipher_data; |
|
1995 + if (!iv && !key) |
|
1996 + return 1; |
|
1997 + |
|
1998 + if (key) |
|
1999 + { |
|
2000 + int bits = ctx->key_len * 4; |
|
2001 + /* key_len is two AES keys */ |
|
2002 + if (enc) |
|
2003 + { |
|
2004 + aes_t4_set_encrypt_key(key, bits, &xctx->ks1.ks); |
|
2005 + xctx->xts.block1 = (block128_f)aes_t4_encrypt; |
|
2006 +#if 0 /* not yet */ |
|
2007 + switch (bits) { |
|
2008 + case 128: |
|
2009 + xctx->stream = aes128_t4_xts_encrypt; |
|
2010 + break; |
|
2011 + case 192: |
|
2012 + xctx->stream = aes192_t4_xts_encrypt; |
|
2013 + break; |
|
2014 + case 256: |
|
2015 + xctx->stream = aes256_t4_xts_encrypt; |
|
2016 + break; |
|
2017 + default: |
|
2018 + return 0; |
|
2019 + } |
|
2020 +#endif |
|
2021 + } |
|
2022 + else |
|
2023 + { |
|
2024 + aes_t4_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); |
|
2025 + xctx->xts.block1 = (block128_f)aes_t4_decrypt; |
|
2026 +#if 0 /* not yet */ |
|
2027 + switch (bits) { |
|
2028 + case 128: |
|
2029 + xctx->stream = aes128_t4_xts_decrypt; |
|
2030 + break; |
|
2031 + case 192: |
|
2032 + xctx->stream = aes192_t4_xts_decrypt; |
|
2033 + break; |
|
2034 + case 256: |
|
2035 + xctx->stream = aes256_t4_xts_decrypt; |
|
2036 + break; |
|
2037 + default: |
|
2038 + return 0; |
|
2039 + } |
|
2040 +#endif |
|
2041 + } |
|
2042 + |
|
2043 + aes_t4_set_encrypt_key(key + ctx->key_len/2, |
|
2044 + ctx->key_len * 4, &xctx->ks2.ks); |
|
2045 + xctx->xts.block2 = (block128_f)aes_t4_encrypt; |
|
2046 + |
|
2047 + xctx->xts.key1 = &xctx->ks1; |
|
2048 + } |
|
2049 + |
|
2050 + if (iv) |
|
2051 + { |
|
2052 + xctx->xts.key2 = &xctx->ks2; |
|
2053 + memcpy(ctx->iv, iv, 16); |
|
2054 + } |
|
2055 + |
|
2056 + return 1; |
|
2057 + } |
|
2058 + |
|
2059 +#define aes_t4_xts_cipher aes_xts_cipher |
|
2060 +static int aes_t4_xts_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, |
|
2061 + const unsigned char *in, size_t len); |
|
2062 + |
|
2063 +static int aes_t4_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
|
2064 + const unsigned char *iv, int enc) |
|
2065 + { |
|
2066 + EVP_AES_CCM_CTX *cctx = ctx->cipher_data; |
|
2067 + if (!iv && !key) |
|
2068 + return 1; |
|
2069 + if (key) |
|
2070 + { |
|
2071 + int bits = ctx->key_len * 8; |
|
2072 + aes_t4_set_encrypt_key(key, bits, &cctx->ks.ks); |
|
2073 + CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, |
|
2074 + &cctx->ks, (block128_f)aes_t4_encrypt); |
|
2075 +#if 0 /* not yet */ |
|
2076 + switch (bits) { |
|
2077 + case 128: |
|
2078 + cctx->str = enc?(ccm128_f)aes128_t4_ccm64_encrypt : |
|
2079 + (ccm128_f)ae128_t4_ccm64_decrypt; |
|
2080 + break; |
|
2081 + case 192: |
|
2082 + cctx->str = enc?(ccm128_f)aes192_t4_ccm64_encrypt : |
|
2083 + (ccm128_f)ae192_t4_ccm64_decrypt; |
|
2084 + break; |
|
2085 + case 256: |
|
2086 + cctx->str = enc?(ccm128_f)aes256_t4_ccm64_encrypt : |
|
2087 + (ccm128_f)ae256_t4_ccm64_decrypt; |
|
2088 + break; |
|
2089 + default: |
|
2090 + return 0; |
|
2091 + } |
|
2092 +#endif |
|
2093 + cctx->key_set = 1; |
|
2094 + } |
|
2095 + if (iv) |
|
2096 + { |
|
2097 + memcpy(ctx->iv, iv, 15 - cctx->L); |
|
2098 + cctx->iv_set = 1; |
|
2099 + } |
|
2100 + return 1; |
|
2101 + } |
|
2102 + |
|
2103 +#define aes_t4_ccm_cipher aes_ccm_cipher |
|
2104 +static int aes_t4_ccm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, |
|
2105 + const unsigned char *in, size_t len); |
|
2106 + |
|
2107 +#define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \ |
|
2108 +static const EVP_CIPHER aes_t4_##keylen##_##mode = { \ |
|
2109 + nid##_##keylen##_##nmode,blocksize,keylen/8,ivlen, \ |
|
2110 + flags|EVP_CIPH_##MODE##_MODE, \ |
|
2111 + aes_t4_init_key, \ |
|
2112 + aes_t4_##mode##_cipher, \ |
|
2113 + NULL, \ |
|
2114 + sizeof(EVP_AES_KEY), \ |
|
2115 + NULL,NULL,NULL,NULL }; \ |
|
2116 +static const EVP_CIPHER aes_##keylen##_##mode = { \ |
|
2117 + nid##_##keylen##_##nmode,blocksize, \ |
|
2118 + keylen/8,ivlen, \ |
|
2119 + flags|EVP_CIPH_##MODE##_MODE, \ |
|
2120 + aes_init_key, \ |
|
2121 + aes_##mode##_cipher, \ |
|
2122 + NULL, \ |
|
2123 + sizeof(EVP_AES_KEY), \ |
|
2124 + NULL,NULL,NULL,NULL }; \ |
|
2125 +const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \ |
|
2126 +{ return SPARC_AES_CAPABLE?&aes_t4_##keylen##_##mode:&aes_##keylen##_##mode; } |
|
2127 + |
|
2128 +#define BLOCK_CIPHER_custom(nid,keylen,blocksize,ivlen,mode,MODE,flags) \ |
|
2129 +static const EVP_CIPHER aes_t4_##keylen##_##mode = { \ |
|
2130 + nid##_##keylen##_##mode,blocksize, \ |
|
2131 + (EVP_CIPH_##MODE##_MODE==EVP_CIPH_XTS_MODE?2:1)*keylen/8, ivlen, \ |
|
2132 + flags|EVP_CIPH_##MODE##_MODE, \ |
|
2133 + aes_t4_##mode##_init_key, \ |
|
2134 + aes_t4_##mode##_cipher, \ |
|
2135 + aes_##mode##_cleanup, \ |
|
2136 + sizeof(EVP_AES_##MODE##_CTX), \ |
|
2137 + NULL,NULL,aes_##mode##_ctrl,NULL }; \ |
|
2138 +static const EVP_CIPHER aes_##keylen##_##mode = { \ |
|
2139 + nid##_##keylen##_##mode,blocksize, \ |
|
2140 + (EVP_CIPH_##MODE##_MODE==EVP_CIPH_XTS_MODE?2:1)*keylen/8, ivlen, \ |
|
2141 + flags|EVP_CIPH_##MODE##_MODE, \ |
|
2142 + aes_##mode##_init_key, \ |
|
2143 + aes_##mode##_cipher, \ |
|
2144 + aes_##mode##_cleanup, \ |
|
2145 + sizeof(EVP_AES_##MODE##_CTX), \ |
|
2146 + NULL,NULL,aes_##mode##_ctrl,NULL }; \ |
|
2147 +const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \ |
|
2148 +{ return SPARC_AES_CAPABLE?&aes_t4_##keylen##_##mode:&aes_##keylen##_##mode; } |
|
2149 + |
|
2150 #else |
|
2151 |
|
2152 #define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \ |
|
2153 @@ -505,7 +879,7 @@ |
|
2154 #ifdef BSAES_CAPABLE |
|
2155 if (BSAES_CAPABLE && mode==EVP_CIPH_CBC_MODE) |
|
2156 { |
|
2157 - ret = AES_set_decrypt_key(key,ctx->key_len*8,&dat->ks); |
|
2158 + ret = AES_set_decrypt_key(key,ctx->key_len*8,&dat->ks.ks); |
|
2159 dat->block = (block128_f)AES_decrypt; |
|
2160 dat->stream.cbc = (cbc128_f)bsaes_cbc_encrypt; |
|
2161 } |
|
2162 @@ -514,7 +888,7 @@ |
|
2163 #ifdef VPAES_CAPABLE |
|
2164 if (VPAES_CAPABLE) |
|
2165 { |
|
2166 - ret = vpaes_set_decrypt_key(key,ctx->key_len*8,&dat->ks); |
|
2167 + ret = vpaes_set_decrypt_key(key,ctx->key_len*8,&dat->ks.ks); |
|
2168 dat->block = (block128_f)vpaes_decrypt; |
|
2169 dat->stream.cbc = mode==EVP_CIPH_CBC_MODE ? |
|
2170 (cbc128_f)vpaes_cbc_encrypt : |
|
2171 @@ -523,7 +897,7 @@ |
|
2172 else |
|
2173 #endif |
|
2174 { |
|
2175 - ret = AES_set_decrypt_key(key,ctx->key_len*8,&dat->ks); |
|
2176 + ret = AES_set_decrypt_key(key,ctx->key_len*8,&dat->ks.ks); |
|
2177 dat->block = (block128_f)AES_decrypt; |
|
2178 dat->stream.cbc = mode==EVP_CIPH_CBC_MODE ? |
|
2179 (cbc128_f)AES_cbc_encrypt : |
|
2180 @@ -533,7 +907,7 @@ |
|
2181 #ifdef BSAES_CAPABLE |
|
2182 if (BSAES_CAPABLE && mode==EVP_CIPH_CTR_MODE) |
|
2183 { |
|
2184 - ret = AES_set_encrypt_key(key,ctx->key_len*8,&dat->ks); |
|
2185 + ret = AES_set_encrypt_key(key,ctx->key_len*8,&dat->ks.ks); |
|
2186 dat->block = (block128_f)AES_encrypt; |
|
2187 dat->stream.ctr = (ctr128_f)bsaes_ctr32_encrypt_blocks; |
|
2188 } |
|
2189 @@ -542,7 +916,7 @@ |
|
2190 #ifdef VPAES_CAPABLE |
|
2191 if (VPAES_CAPABLE) |
|
2192 { |
|
2193 - ret = vpaes_set_encrypt_key(key,ctx->key_len*8,&dat->ks); |
|
2194 + ret = vpaes_set_encrypt_key(key,ctx->key_len*8,&dat->ks.ks); |
|
2195 dat->block = (block128_f)vpaes_encrypt; |
|
2196 dat->stream.cbc = mode==EVP_CIPH_CBC_MODE ? |
|
2197 (cbc128_f)vpaes_cbc_encrypt : |
|
2198 @@ -551,7 +925,7 @@ |
|
2199 else |
|
2200 #endif |
|
2201 { |
|
2202 - ret = AES_set_encrypt_key(key,ctx->key_len*8,&dat->ks); |
|
2203 + ret = AES_set_encrypt_key(key,ctx->key_len*8,&dat->ks.ks); |
|
2204 dat->block = (block128_f)AES_encrypt; |
|
2205 dat->stream.cbc = mode==EVP_CIPH_CBC_MODE ? |
|
2206 (cbc128_f)AES_cbc_encrypt : |
|
2207 @@ -828,7 +1202,7 @@ |
|
2208 #ifdef BSAES_CAPABLE |
|
2209 if (BSAES_CAPABLE) |
|
2210 { |
|
2211 - AES_set_encrypt_key(key,ctx->key_len*8,&gctx->ks); |
|
2212 + AES_set_encrypt_key(key,ctx->key_len*8,&gctx->ks.ks); |
|
2213 CRYPTO_gcm128_init(&gctx->gcm,&gctx->ks, |
|
2214 (block128_f)AES_encrypt); |
|
2215 gctx->ctr = (ctr128_f)bsaes_ctr32_encrypt_blocks; |
|
2216 @@ -839,7 +1213,7 @@ |
|
2217 #ifdef VPAES_CAPABLE |
|
2218 if (VPAES_CAPABLE) |
|
2219 { |
|
2220 - vpaes_set_encrypt_key(key,ctx->key_len*8,&gctx->ks); |
|
2221 + vpaes_set_encrypt_key(key,ctx->key_len*8,&gctx->ks.ks); |
|
2222 CRYPTO_gcm128_init(&gctx->gcm,&gctx->ks, |
|
2223 (block128_f)vpaes_encrypt); |
|
2224 gctx->ctr = NULL; |
|
2225 @@ -849,7 +1223,7 @@ |
|
2226 #endif |
|
2227 (void)0; /* terminate potentially open 'else' */ |
|
2228 |
|
2229 - AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks); |
|
2230 + AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks); |
|
2231 CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, (block128_f)AES_encrypt); |
|
2232 #ifdef AES_CTR_ASM |
|
2233 gctx->ctr = (ctr128_f)AES_ctr32_encrypt; |
|
2234 @@ -1080,17 +1454,17 @@ |
|
2235 { |
|
2236 if (enc) |
|
2237 { |
|
2238 - vpaes_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1); |
|
2239 + vpaes_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); |
|
2240 xctx->xts.block1 = (block128_f)vpaes_encrypt; |
|
2241 } |
|
2242 else |
|
2243 { |
|
2244 - vpaes_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1); |
|
2245 + vpaes_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); |
|
2246 xctx->xts.block1 = (block128_f)vpaes_decrypt; |
|
2247 } |
|
2248 |
|
2249 vpaes_set_encrypt_key(key + ctx->key_len/2, |
|
2250 - ctx->key_len * 4, &xctx->ks2); |
|
2251 + ctx->key_len * 4, &xctx->ks2.ks); |
|
2252 xctx->xts.block2 = (block128_f)vpaes_encrypt; |
|
2253 |
|
2254 xctx->xts.key1 = &xctx->ks1; |
|
2255 @@ -1102,17 +1476,17 @@ |
|
2256 |
|
2257 if (enc) |
|
2258 { |
|
2259 - AES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1); |
|
2260 + AES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); |
|
2261 xctx->xts.block1 = (block128_f)AES_encrypt; |
|
2262 } |
|
2263 else |
|
2264 { |
|
2265 - AES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1); |
|
2266 + AES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); |
|
2267 xctx->xts.block1 = (block128_f)AES_decrypt; |
|
2268 } |
|
2269 |
|
2270 AES_set_encrypt_key(key + ctx->key_len/2, |
|
2271 - ctx->key_len * 4, &xctx->ks2); |
|
2272 + ctx->key_len * 4, &xctx->ks2.ks); |
|
2273 xctx->xts.block2 = (block128_f)AES_encrypt; |
|
2274 |
|
2275 xctx->xts.key1 = &xctx->ks1; |
|
2276 @@ -1223,7 +1597,7 @@ |
|
2277 #ifdef VPAES_CAPABLE |
|
2278 if (VPAES_CAPABLE) |
|
2279 { |
|
2280 - vpaes_set_encrypt_key(key, ctx->key_len*8, &cctx->ks); |
|
2281 + vpaes_set_encrypt_key(key, ctx->key_len*8, &cctx->ks.ks); |
|
2282 CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, |
|
2283 &cctx->ks, (block128_f)vpaes_encrypt); |
|
2284 cctx->str = NULL; |
|
2285 @@ -1231,7 +1605,7 @@ |
|
2286 break; |
|
2287 } |
|
2288 #endif |
|
2289 - AES_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks); |
|
2290 + AES_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks); |
|
2291 CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, |
|
2292 &cctx->ks, (block128_f)AES_encrypt); |
|
2293 cctx->str = NULL; |
|
2294 @@ -1319,5 +1693,4 @@ |
|
2295 BLOCK_CIPHER_custom(NID_aes,192,1,12,ccm,CCM,EVP_CIPH_FLAG_FIPS|CUSTOM_FLAGS) |
|
2296 BLOCK_CIPHER_custom(NID_aes,256,1,12,ccm,CCM,EVP_CIPH_FLAG_FIPS|CUSTOM_FLAGS) |
|
2297 |
|
2298 -#endif |
|
2299 #endif |
|
2300 Index: openssl/crypto/evp/evp.h |
|
2301 =================================================================== |
|
2302 --- evp.h Mon Feb 11 07:26:04 2013 |
|
2303 +++ evp.h.new Thu May 2 14:31:55 2013 |
|
2304 @@ -1256,6 +1256,7 @@ |
|
2305 #define EVP_F_AESNI_INIT_KEY 165 |
|
2306 #define EVP_F_AESNI_XTS_CIPHER 176 |
|
2307 #define EVP_F_AES_INIT_KEY 133 |
|
2308 +#define EVP_F_AES_T4_INIT_KEY 178 |
|
2309 #define EVP_F_AES_XTS 172 |
|
2310 #define EVP_F_AES_XTS_CIPHER 175 |
|
2311 #define EVP_F_ALG_MODULE_INIT 177 |
|
2312 Index: openssl/crypto/evp/evp_err.c |
|
2313 =================================================================== |
|
2314 --- evp_err.c Mon Feb 11 07:26:04 2013 |
|
2315 +++ evp_err.c.new Thu May 2 14:33:24 2013 |
|
2316 @@ -73,6 +73,7 @@ |
|
2317 {ERR_FUNC(EVP_F_AESNI_INIT_KEY), "AESNI_INIT_KEY"}, |
|
2318 {ERR_FUNC(EVP_F_AESNI_XTS_CIPHER), "AESNI_XTS_CIPHER"}, |
|
2319 {ERR_FUNC(EVP_F_AES_INIT_KEY), "AES_INIT_KEY"}, |
|
2320 +{ERR_FUNC(EVP_F_AES_T4_INIT_KEY), "AES_T4_INIT_KEY"}, |
|
2321 {ERR_FUNC(EVP_F_AES_XTS), "AES_XTS"}, |
|
2322 {ERR_FUNC(EVP_F_AES_XTS_CIPHER), "AES_XTS_CIPHER"}, |
|
2323 {ERR_FUNC(EVP_F_ALG_MODULE_INIT), "ALG_MODULE_INIT"}, |
|