|
1 # |
|
2 # This file adds inline T4 instruction support to OpenSSL upstream code. |
|
3 # The change was brought in from OpenSSL 1.0.2. |
|
4 # |
|
5 Index: Configure |
|
6 =================================================================== |
|
7 diff -ru openssl-1.0.1e/Configure openssl-1.0.1e/Configure |
|
8 --- openssl-1.0.1e/Configure 2011-05-24 17:02:24.000000000 -0700 |
|
9 +++ openssl-1.0.1e/Configure 2011-07-27 10:48:17.817470000 -0700 |
|
10 @@ -135,7 +135,7 @@ |
|
11 |
|
12 my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghash-x86_64.o:"; |
|
13 my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::ghash-ia64.o::void"; |
|
14 -my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void"; |
|
15 +my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o:des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o:aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void"; |
|
16 my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::void"; |
|
17 my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-alpha.o::void"; |
|
18 my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o::::::::"; |
|
19 Index: crypto/sparccpuid.S |
|
20 =================================================================== |
|
21 diff -ru openssl-1.0.1e/crypto/sparccpuid.S openssl-1.0.1e/crypto/sparccpuid.S |
|
22 --- openssl-1.0.1e/crypto/sparccpuid.S 2011-05-24 17:02:24.000000000 -0700 |
|
23 +++ openssl-1.0.1e/crypto/sparccpuid.S 2011-07-27 10:48:17.817470000 -0700 |
|
24 @@ -1,3 +1,7 @@ |
|
25 +#ifdef OPENSSL_FIPSCANISTER |
|
26 +#include <openssl/fipssyms.h> |
|
27 +#endif |
|
28 + |
|
29 #if defined(__SUNPRO_C) && defined(__sparcv9) |
|
30 # define ABI64 /* They've said -xarch=v9 at command line */ |
|
31 #elif defined(__GNUC__) && defined(__arch64__) |
|
32 @@ -235,10 +239,10 @@ |
|
33 .global _sparcv9_vis1_probe |
|
34 .align 8 |
|
35 _sparcv9_vis1_probe: |
|
36 + .word 0x81b00d80 !fxor %f0,%f0,%f0 |
|
37 add %sp,BIAS+2,%o1 |
|
38 - .word 0xc19a5a40 !ldda [%o1]ASI_FP16_P,%f0 |
|
39 retl |
|
40 - .word 0x81b00d80 !fxor %f0,%f0,%f0 |
|
41 + .word 0xc19a5a40 !ldda [%o1]ASI_FP16_P,%f0 |
|
42 .type _sparcv9_vis1_probe,#function |
|
43 .size _sparcv9_vis1_probe,.-_sparcv9_vis1_probe |
|
44 |
|
45 @@ -251,7 +255,12 @@ |
|
46 ! UltraSPARC IIe 7 |
|
47 ! UltraSPARC III 7 |
|
48 ! UltraSPARC T1 24 |
|
49 +! SPARC T4 65(*) |
|
50 ! |
|
51 +! (*) result has lesser to do with VIS instruction latencies, rdtick |
|
52 +! appears that slow, but it does the trick in sense that FP and |
|
53 +! VIS code paths are still slower than integer-only ones. |
|
54 +! |
|
55 ! Numbers for T2 and SPARC64 V-VII are more than welcomed. |
|
56 ! |
|
57 ! It would be possible to detect specifically US-T1 by instrumenting |
|
58 @@ -260,6 +269,8 @@ |
|
59 .global _sparcv9_vis1_instrument |
|
60 .align 8 |
|
61 _sparcv9_vis1_instrument: |
|
62 + .word 0x81b00d80 !fxor %f0,%f0,%f0 |
|
63 + .word 0x85b08d82 !fxor %f2,%f2,%f2 |
|
64 .word 0x91410000 !rd %tick,%o0 |
|
65 .word 0x81b00d80 !fxor %f0,%f0,%f0 |
|
66 .word 0x85b08d82 !fxor %f2,%f2,%f2 |
|
67 @@ -314,6 +325,30 @@ |
|
68 .type _sparcv9_fmadd_probe,#function |
|
69 .size _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe |
|
70 |
|
71 +.global _sparcv9_rdcfr |
|
72 +.align 8 |
|
73 +_sparcv9_rdcfr: |
|
74 + retl |
|
75 + .word 0x91468000 !rd %asr26,%o0 |
|
76 +.type _sparcv9_rdcfr,#function |
|
77 +.size _sparcv9_rdcfr,.-_sparcv9_rdcfr |
|
78 + |
|
79 +.global _sparcv9_vis3_probe |
|
80 +.align 8 |
|
81 +_sparcv9_vis3_probe: |
|
82 + retl |
|
83 + .word 0x81b022a0 !xmulx %g0,%g0,%g0 |
|
84 +.type _sparcv9_vis3_probe,#function |
|
85 +.size _sparcv9_vis3_probe,.-_sparcv9_vis3_probe |
|
86 + |
|
87 +.global _sparcv9_random |
|
88 +.align 8 |
|
89 +_sparcv9_random: |
|
90 + retl |
|
91 + .word 0x91b002a0 !random %o0 |
|
92 +.type _sparcv9_random,#function |
|
93 +.size _sparcv9_random,.-_sparcv9_vis3_probe |
|
94 + |
|
95 .global OPENSSL_cleanse |
|
96 .align 32 |
|
97 OPENSSL_cleanse: |
|
98 @@ -398,6 +433,102 @@ |
|
99 .size OPENSSL_cleanse,.-OPENSSL_cleanse |
|
100 |
|
101 #ifndef _BOOT |
|
102 +.global _sparcv9_vis1_instrument_bus |
|
103 +.align 8 |
|
104 +_sparcv9_vis1_instrument_bus: |
|
105 + mov %o1,%o3 ! save cnt |
|
106 + .word 0x99410000 !rd %tick,%o4 ! tick |
|
107 + mov %o4,%o5 ! lasttick = tick |
|
108 + set 0,%g4 ! diff |
|
109 + |
|
110 + andn %o0,63,%g1 |
|
111 + .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load |
|
112 + .word 0x8143e040 !membar #Sync |
|
113 + .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit |
|
114 + .word 0x8143e040 !membar #Sync |
|
115 + ld [%o0],%o4 |
|
116 + add %o4,%g4,%g4 |
|
117 + .word 0xc9e2100c !cas [%o0],%o4,%g4 |
|
118 + |
|
119 +.Loop: .word 0x99410000 !rd %tick,%o4 |
|
120 + sub %o4,%o5,%g4 ! diff=tick-lasttick |
|
121 + mov %o4,%o5 ! lasttick=tick |
|
122 + |
|
123 + andn %o0,63,%g1 |
|
124 + .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load |
|
125 + .word 0x8143e040 !membar #Sync |
|
126 + .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit |
|
127 + .word 0x8143e040 !membar #Sync |
|
128 + ld [%o0],%o4 |
|
129 + add %o4,%g4,%g4 |
|
130 + .word 0xc9e2100c !cas [%o0],%o4,%g4 |
|
131 + subcc %o1,1,%o1 ! --$cnt |
|
132 + bnz .Loop |
|
133 + add %o0,4,%o0 ! ++$out |
|
134 + |
|
135 + retl |
|
136 + mov %o3,%o0 |
|
137 +.type _sparcv9_vis1_instrument_bus,#function |
|
138 +.size _sparcv9_vis1_instrument_bus,.-_sparcv9_vis1_instrument_bus |
|
139 + |
|
140 +.global _sparcv9_vis1_instrument_bus2 |
|
141 +.align 8 |
|
142 +_sparcv9_vis1_instrument_bus2: |
|
143 + mov %o1,%o3 ! save cnt |
|
144 + sll %o1,2,%o1 ! cnt*=4 |
|
145 + |
|
146 + .word 0x99410000 !rd %tick,%o4 ! tick |
|
147 + mov %o4,%o5 ! lasttick = tick |
|
148 + set 0,%g4 ! diff |
|
149 + |
|
150 + andn %o0,63,%g1 |
|
151 + .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load |
|
152 + .word 0x8143e040 !membar #Sync |
|
153 + .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit |
|
154 + .word 0x8143e040 !membar #Sync |
|
155 + ld [%o0],%o4 |
|
156 + add %o4,%g4,%g4 |
|
157 + .word 0xc9e2100c !cas [%o0],%o4,%g4 |
|
158 + |
|
159 + .word 0x99410000 !rd %tick,%o4 ! tick |
|
160 + sub %o4,%o5,%g4 ! diff=tick-lasttick |
|
161 + mov %o4,%o5 ! lasttick=tick |
|
162 + mov %g4,%g5 ! lastdiff=diff |
|
163 +.Loop2: |
|
164 + andn %o0,63,%g1 |
|
165 + .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load |
|
166 + .word 0x8143e040 !membar #Sync |
|
167 + .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit |
|
168 + .word 0x8143e040 !membar #Sync |
|
169 + ld [%o0],%o4 |
|
170 + add %o4,%g4,%g4 |
|
171 + .word 0xc9e2100c !cas [%o0],%o4,%g4 |
|
172 + |
|
173 + subcc %o2,1,%o2 ! --max |
|
174 + bz .Ldone2 |
|
175 + nop |
|
176 + |
|
177 + .word 0x99410000 !rd %tick,%o4 ! tick |
|
178 + sub %o4,%o5,%g4 ! diff=tick-lasttick |
|
179 + mov %o4,%o5 ! lasttick=tick |
|
180 + cmp %g4,%g5 |
|
181 + mov %g4,%g5 ! lastdiff=diff |
|
182 + |
|
183 + .word 0x83408000 !rd %ccr,%g1 |
|
184 + and %g1,4,%g1 ! isolate zero flag |
|
185 + xor %g1,4,%g1 ! flip zero flag |
|
186 + |
|
187 + subcc %o1,%g1,%o1 ! conditional --$cnt |
|
188 + bnz .Loop2 |
|
189 + add %o0,%g1,%o0 ! conditional ++$out |
|
190 + |
|
191 +.Ldone2: |
|
192 + srl %o1,2,%o1 |
|
193 + retl |
|
194 + sub %o3,%o1,%o0 |
|
195 +.type _sparcv9_vis1_instrument_bus2,#function |
|
196 +.size _sparcv9_vis1_instrument_bus2,.-_sparcv9_vis1_instrument_bus2 |
|
197 + |
|
198 .section ".init",#alloc,#execinstr |
|
199 call solaris_locking_setup |
|
200 nop |
|
201 Index: crypto/sparcv9cap.c |
|
202 =================================================================== |
|
203 diff -ru openssl-1.0.1e/crypto/sparcv9cap.c openssl-1.0.1e/crypto/sparcv9cap.c |
|
204 --- openssl-1.0.1e/crypto/sparcv9cap.c 2011-05-24 17:02:24.000000000 -0700 |
|
205 +++ openssl-1.0.1e/crypto/sparcv9cap.c 2011-07-27 10:48:17.817470000 -0700 |
|
206 @@ -4,34 +4,58 @@ |
|
207 #include <setjmp.h> |
|
208 #include <signal.h> |
|
209 #include <sys/time.h> |
|
210 +#include <unistd.h> |
|
211 #include <openssl/bn.h> |
|
212 |
|
213 -#define SPARCV9_TICK_PRIVILEGED (1<<0) |
|
214 -#define SPARCV9_PREFER_FPU (1<<1) |
|
215 -#define SPARCV9_VIS1 (1<<2) |
|
216 -#define SPARCV9_VIS2 (1<<3) /* reserved */ |
|
217 -#define SPARCV9_FMADD (1<<4) /* reserved for SPARC64 V */ |
|
218 +#include "sparc_arch.h" |
|
219 |
|
220 +#if defined(__GNUC__) && defined(__linux) |
|
221 +__attribute__((visibility("hidden"))) |
|
222 +#endif |
|
223 #ifndef _BOOT |
|
224 -static int OPENSSL_sparcv9cap_P = SPARCV9_TICK_PRIVILEGED; |
|
225 +unsigned int OPENSSL_sparcv9cap_P[2] = {SPARCV9_TICK_PRIVILEGED, 0}; |
|
226 #else |
|
227 -static int OPENSSL_sparcv9cap_P = SPARCV9_VIS1; |
|
228 +unsigned int OPENSSL_sparcv9cap_P[2] = {SPARCV9_VIS1, 0}; |
|
229 #endif |
|
230 |
|
231 int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, |
|
232 const BN_ULONG *np, const BN_ULONG *n0, int num) |
|
233 { |
|
234 + int bn_mul_mont_vis3(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, |
|
235 + const BN_ULONG *np,const BN_ULONG *n0, int num); |
|
236 int bn_mul_mont_fpu(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, |
|
237 const BN_ULONG *np, const BN_ULONG *n0, int num); |
|
238 int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, |
|
239 const BN_ULONG *np, const BN_ULONG *n0, int num); |
|
240 |
|
241 - if (num >= 8 && !(num & 1) && |
|
242 - (OPENSSL_sparcv9cap_P & (SPARCV9_PREFER_FPU | SPARCV9_VIS1)) == |
|
243 - (SPARCV9_PREFER_FPU | SPARCV9_VIS1)) |
|
244 - return bn_mul_mont_fpu(rp, ap, bp, np, n0, num); |
|
245 - else |
|
246 - return bn_mul_mont_int(rp, ap, bp, np, n0, num); |
|
247 + if (!(num&1) && num>=6) { |
|
248 + if ((num&15)==0 && num<=64 && |
|
249 + (OPENSSL_sparcv9cap_P[1]&(CFR_MONTMUL|CFR_MONTSQR))== |
|
250 + (CFR_MONTMUL|CFR_MONTSQR)) |
|
251 + { |
|
252 + typedef int (*bn_mul_mont_f)(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0); |
|
253 + int bn_mul_mont_t4_8(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0); |
|
254 + int bn_mul_mont_t4_16(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0); |
|
255 + int bn_mul_mont_t4_24(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0); |
|
256 + int bn_mul_mont_t4_32(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0); |
|
257 + static const bn_mul_mont_f funcs[4] = { |
|
258 + bn_mul_mont_t4_8, bn_mul_mont_t4_16, |
|
259 + bn_mul_mont_t4_24, bn_mul_mont_t4_32 }; |
|
260 + bn_mul_mont_f worker = funcs[num/16-1]; |
|
261 + |
|
262 + if ((*worker)(rp,ap,bp,np,n0)) return 1; |
|
263 + /* retry once and fall back */ |
|
264 + if ((*worker)(rp,ap,bp,np,n0)) return 1; |
|
265 + return bn_mul_mont_vis3(rp,ap,bp,np,n0,num); |
|
266 + } |
|
267 + if ((OPENSSL_sparcv9cap_P[0]&SPARCV9_VIS3)) |
|
268 + return bn_mul_mont_vis3(rp,ap,bp,np,n0,num); |
|
269 + else if (num>=8 && |
|
270 + (OPENSSL_sparcv9cap_P[0]&(SPARCV9_PREFER_FPU|SPARCV9_VIS1)) == |
|
271 + (SPARCV9_PREFER_FPU|SPARCV9_VIS1)) |
|
272 + return bn_mul_mont_fpu(rp,ap,bp,np,n0,num); |
|
273 + } |
|
274 + return bn_mul_mont_int(rp,ap,bp,np,n0,num); |
|
275 } |
|
276 |
|
277 unsigned long _sparcv9_rdtick(void); |
|
278 @@ -39,11 +63,18 @@ |
|
279 unsigned long _sparcv9_vis1_instrument(void); |
|
280 void _sparcv9_vis2_probe(void); |
|
281 void _sparcv9_fmadd_probe(void); |
|
282 +unsigned long _sparcv9_rdcfr(void); |
|
283 +void _sparcv9_vis3_probe(void); |
|
284 +unsigned long _sparcv9_random(void); |
|
285 +#ifndef _BOOT |
|
286 +size_t _sparcv9_vis1_instrument_bus(unsigned int *,size_t); |
|
287 +size_t _sparcv9_vis1_instrument_bus2(unsigned int *,size_t,size_t); |
|
288 +#endif |
|
289 |
|
290 #ifndef _BOOT |
|
291 unsigned long OPENSSL_rdtsc(void) |
|
292 { |
|
293 - if (OPENSSL_sparcv9cap_P & SPARCV9_TICK_PRIVILEGED) |
|
294 + if (OPENSSL_sparcv9cap_P[0] & SPARCV9_TICK_PRIVILEGED) |
|
295 #if defined(__sun) && defined(__SVR4) |
|
296 return gethrtime(); |
|
297 #else |
|
298 @@ -52,6 +83,24 @@ |
|
299 else |
|
300 return _sparcv9_rdtick(); |
|
301 } |
|
302 + |
|
303 +size_t OPENSSL_instrument_bus(unsigned int *out,size_t cnt) |
|
304 +{ |
|
305 + if ((OPENSSL_sparcv9cap_P[0]&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK)) == |
|
306 + SPARCV9_BLK) |
|
307 + return _sparcv9_vis1_instrument_bus(out,cnt); |
|
308 + else |
|
309 + return 0; |
|
310 +} |
|
311 + |
|
312 +size_t OPENSSL_instrument_bus2(unsigned int *out,size_t cnt,size_t max) |
|
313 +{ |
|
314 + if ((OPENSSL_sparcv9cap_P[0]&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK)) == |
|
315 + SPARCV9_BLK) |
|
316 + return _sparcv9_vis1_instrument_bus2(out,cnt,max); |
|
317 + else |
|
318 + return 0; |
|
319 +} |
|
320 #endif |
|
321 |
|
322 #if defined(_BOOT) |
|
323 @@ -61,7 +110,7 @@ |
|
324 */ |
|
325 void OPENSSL_cpuid_setup(void) |
|
326 { |
|
327 - OPENSSL_sparcv9cap_P = SPARCV9_VIS1; |
|
328 + OPENSSL_sparcv9cap_P[0] = SPARCV9_VIS1; |
|
329 } |
|
330 |
|
331 #elif 0 && defined(__sun) && defined(__SVR4) |
|
332 @@ -90,11 +139,11 @@ |
|
333 if (!strcmp(name, "SUNW,UltraSPARC") || |
|
334 /* covers II,III,IV */ |
|
335 !strncmp(name, "SUNW,UltraSPARC-I", 17)) { |
|
336 - OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU | SPARCV9_VIS1; |
|
337 + OPENSSL_sparcv9cap_P[0] |= SPARCV9_PREFER_FPU | SPARCV9_VIS1; |
|
338 |
|
339 /* %tick is privileged only on UltraSPARC-I/II, but not IIe */ |
|
340 if (name[14] != '\0' && name[17] != '\0' && name[18] != '\0') |
|
341 - OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; |
|
342 + OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; |
|
343 |
|
344 return DI_WALK_TERMINATE; |
|
345 } |
|
346 @@ -100,7 +149,7 @@ |
|
347 } |
|
348 /* This is expected to catch remaining UltraSPARCs, such as T1 */ |
|
349 else if (!strncmp(name, "SUNW,UltraSPARC", 15)) { |
|
350 - OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; |
|
351 + OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; |
|
352 |
|
353 return DI_WALK_TERMINATE; |
|
354 } |
|
355 @@ -119,7 +168,7 @@ |
|
356 trigger = 1; |
|
357 |
|
358 if ((e = getenv("OPENSSL_sparcv9cap"))) { |
|
359 - OPENSSL_sparcv9cap_P = strtoul(e, NULL, 0); |
|
360 + OPENSSL_sparcv9cap_P[0] = strtoul(e, NULL, 0); |
|
361 return; |
|
362 } |
|
363 |
|
364 @@ -126,15 +175,15 @@ |
|
365 if (sysinfo(SI_MACHINE, si, sizeof(si)) > 0) { |
|
366 if (strcmp(si, "sun4v")) |
|
367 /* FPU is preferred for all CPUs, but US-T1/2 */ |
|
368 - OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU; |
|
369 + OPENSSL_sparcv9cap_P[0] |= SPARCV9_PREFER_FPU; |
|
370 } |
|
371 |
|
372 if (sysinfo(SI_ISALIST, si, sizeof(si)) > 0) { |
|
373 if (strstr(si, "+vis")) |
|
374 - OPENSSL_sparcv9cap_P |= SPARCV9_VIS1; |
|
375 + OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS1 | SPARCV9_BLK; |
|
376 if (strstr(si, "+vis2")) { |
|
377 - OPENSSL_sparcv9cap_P |= SPARCV9_VIS2; |
|
378 - OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; |
|
379 + OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS2; |
|
380 + OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; |
|
381 return; |
|
382 } |
|
383 } |
|
384 @@ -204,12 +253,14 @@ |
|
385 trigger = 1; |
|
386 |
|
387 if ((e = getenv("OPENSSL_sparcv9cap"))) { |
|
388 - OPENSSL_sparcv9cap_P = strtoul(e, NULL, 0); |
|
389 + OPENSSL_sparcv9cap_P[0] = strtoul(e, NULL, 0); |
|
390 + if ((e = strchr(e, ':'))) |
|
391 + OPENSSL_sparcv9cap_P[1] = strtoul(e + 1, NULL, 0); |
|
392 return; |
|
393 } |
|
394 |
|
395 /* Initial value, fits UltraSPARC-I&II... */ |
|
396 - OPENSSL_sparcv9cap_P = SPARCV9_PREFER_FPU | SPARCV9_TICK_PRIVILEGED; |
|
397 + OPENSSL_sparcv9cap_P[0] = SPARCV9_PREFER_FPU | SPARCV9_TICK_PRIVILEGED; |
|
398 |
|
399 sigfillset(&all_masked); |
|
400 sigdelset(&all_masked, SIGILL); |
|
401 @@ -232,18 +283,18 @@ |
|
402 |
|
403 if (sigsetjmp(common_jmp, 1) == 0) { |
|
404 _sparcv9_rdtick(); |
|
405 - OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; |
|
406 + OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; |
|
407 } |
|
408 |
|
409 if (sigsetjmp(common_jmp, 1) == 0) { |
|
410 _sparcv9_vis1_probe(); |
|
411 - OPENSSL_sparcv9cap_P |= SPARCV9_VIS1; |
|
412 + OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS1 | SPARCV9_BLK; |
|
413 /* detect UltraSPARC-Tx, see sparccpud.S for details... */ |
|
414 if (_sparcv9_vis1_instrument() >= 12) |
|
415 - OPENSSL_sparcv9cap_P &= ~(SPARCV9_VIS1 | SPARCV9_PREFER_FPU); |
|
416 + OPENSSL_sparcv9cap_P[0] &= ~(SPARCV9_VIS1 | SPARCV9_PREFER_FPU); |
|
417 else { |
|
418 _sparcv9_vis2_probe(); |
|
419 - OPENSSL_sparcv9cap_P |= SPARCV9_VIS2; |
|
420 + OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS2; |
|
421 } |
|
422 } |
|
423 |
|
424 @@ -249,13 +300,50 @@ |
|
425 |
|
426 if (sigsetjmp(common_jmp, 1) == 0) { |
|
427 _sparcv9_fmadd_probe(); |
|
428 - OPENSSL_sparcv9cap_P |= SPARCV9_FMADD; |
|
429 + OPENSSL_sparcv9cap_P[0] |= SPARCV9_FMADD; |
|
430 } |
|
431 |
|
432 + /* |
|
433 + * VIS3 flag is tested independently from VIS1, unlike VIS2 that is, |
|
434 + * because VIS3 defines even integer instructions. |
|
435 + */ |
|
436 + if (sigsetjmp(common_jmp,1) == 0) { |
|
437 + _sparcv9_vis3_probe(); |
|
438 + OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS3; |
|
439 + } |
|
440 + |
|
441 + if (sigsetjmp(common_jmp,1) == 0) { |
|
442 + (void)_sparcv9_random(); |
|
443 + OPENSSL_sparcv9cap_P[0] |= SPARCV9_RANDOM; |
|
444 + } |
|
445 + |
|
446 + /* |
|
447 + * In wait for better solution _sparcv9_rdcfr is masked by |
|
448 + * VIS3 flag, because it goes to uninterruptable endless |
|
449 + * loop on UltraSPARC II running Solaris. Things might be |
|
450 + * different on Linux... |
|
451 + */ |
|
452 + if ((OPENSSL_sparcv9cap_P[0]&SPARCV9_VIS3) && |
|
453 + sigsetjmp(common_jmp, 1) == 0) { |
|
454 + OPENSSL_sparcv9cap_P[1] = (unsigned int)_sparcv9_rdcfr(); |
|
455 + } |
|
456 + |
|
457 sigaction(SIGBUS, &bus_oact, NULL); |
|
458 sigaction(SIGILL, &ill_oact, NULL); |
|
459 |
|
460 sigprocmask(SIG_SETMASK, &oset, NULL); |
|
461 + |
|
462 + if (sizeof(size_t) == 8) |
|
463 + OPENSSL_sparcv9cap_P[0] |= SPARCV9_64BIT_STACK; |
|
464 +#ifdef __linux |
|
465 + else |
|
466 + { |
|
467 + int ret = syscall(340); |
|
468 + |
|
469 + if (ret >= 0 && ret & 1) |
|
470 + OPENSSL_sparcv9cap_P[0] |= SPARCV9_64BIT_STACK; |
|
471 + } |
|
472 +#endif |
|
473 } |
|
474 |
|
475 #endif |
|
476 Index: crypto/md5/Makefile |
|
477 =================================================================== |
|
478 diff -ru openssl-1.0.1e/crypto/md5/Makefile openssl-1.0.1e/crypto/md5/Makefile |
|
479 --- openssl-1.0.1e/crypto/md5/Makefile 2011-05-24 17:02:24.000000000 -0700 |
|
480 +++ openssl-1.0.1e/crypto/md5/Makefile 2011-07-27 10:48:17.817470000 -0700 |
|
481 @@ -52,6 +52,9 @@ |
|
482 $(CC) $(CFLAGS) -E asm/md5-ia64.S | \ |
|
483 $(PERL) -ne 's/;\s+/;\n/g; print;' > $@ |
|
484 |
|
485 +md5-sparcv9.S: asm/md5-sparcv9.pl |
|
486 + $(PERL) asm/md5-sparcv9.pl $@ $(CFLAGS) |
|
487 + |
|
488 files: |
|
489 $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO |
|
490 |
|
491 Index: crypto/md5/md5_locl.h |
|
492 =================================================================== |
|
493 diff -ru openssl-1.0.1e/crypto/md5/md5_locl.h openssl-1.0.1e/crypto/md5/md5_locl.h |
|
494 --- openssl-1.0.1e/crypto/md5/md5_locl.h 2011-05-24 17:02:24.000000000 -0700 |
|
495 +++ openssl-1.0.1e/crypto/md5/md5_locl.h 2011-07-27 10:48:17.817470000 -0700 |
|
496 @@ -71,6 +71,8 @@ |
|
497 # define md5_block_data_order md5_block_asm_data_order |
|
498 # elif defined(__ia64) || defined(__ia64__) || defined(_M_IA64) |
|
499 # define md5_block_data_order md5_block_asm_data_order |
|
500 +# elif defined(__sparc) || defined(__sparc__) |
|
501 +# define md5_block_data_order md5_block_asm_data_order |
|
502 # endif |
|
503 #endif |
|
504 |
|
505 Index: crypto/sha/Makefile |
|
506 =================================================================== |
|
507 diff -ru openssl-1.0.1e/crypto/sha/Makefile openssl-1.0.1e/crypto/sha/Makefile |
|
508 --- openssl-1.0.1e/crypto/sha/Makefile 2011-05-24 17:02:24.000000000 -0700 |
|
509 +++ openssl-1.0.1e/crypto/sha/Makefile 2011-07-27 10:48:17.817470000 -0700 |
|
510 @@ -68,9 +68,9 @@ |
|
511 sha1-x86_64.s: asm/sha1-x86_64.pl; $(PERL) asm/sha1-x86_64.pl $(PERLASM_SCHEME) > $@ |
|
512 sha256-x86_64.s:asm/sha512-x86_64.pl; $(PERL) asm/sha512-x86_64.pl $(PERLASM_SCHEME) $@ |
|
513 sha512-x86_64.s:asm/sha512-x86_64.pl; $(PERL) asm/sha512-x86_64.pl $(PERLASM_SCHEME) $@ |
|
514 -sha1-sparcv9.s: asm/sha1-sparcv9.pl; $(PERL) asm/sha1-sparcv9.pl $@ $(CFLAGS) |
|
515 -sha256-sparcv9.s:asm/sha512-sparcv9.pl; $(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS) |
|
516 -sha512-sparcv9.s:asm/sha512-sparcv9.pl; $(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS) |
|
517 +sha1-sparcv9.S: asm/sha1-sparcv9.pl; $(PERL) asm/sha1-sparcv9.pl $@ $(CFLAGS) |
|
518 +sha256-sparcv9.S:asm/sha512-sparcv9.pl; $(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS) |
|
519 +sha512-sparcv9.S:asm/sha512-sparcv9.pl; $(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS) |
|
520 |
|
521 sha1-ppc.s: asm/sha1-ppc.pl; $(PERL) asm/sha1-ppc.pl $(PERLASM_SCHEME) $@ |
|
522 sha256-ppc.s: asm/sha512-ppc.pl; $(PERL) asm/sha512-ppc.pl $(PERLASM_SCHEME) $@ |
|
523 Index: crypto/sha/asm/sha1-sparcv9.pl |
|
524 =================================================================== |
|
525 diff -ru openssl-1.0.1e/crypto/sha/asm/sha1-sparcv9.pl openssl-1.0.1e/crypto/sha/asm/sha1-sparcv9.pl |
|
526 --- openssl-1.0.1e/crypto/sha/asm/sha1-sparcv9.pl 2011-05-24 17:02:24.000000000 -0700 |
|
527 +++ openssl-1.0.1e/crypto/sha/asm/sha1-sparcv9.pl 2011-07-27 10:48:17.817470000 -0700 |
|
528 @@ -5,6 +5,8 @@ |
|
529 # project. The module is, however, dual licensed under OpenSSL and |
|
530 # CRYPTOGAMS licenses depending on where you obtain it. For further |
|
531 # details see http://www.openssl.org/~appro/cryptogams/. |
|
532 +# |
|
533 +# Hardware SPARC T4 support by David S. Miller <[email protected]>. |
|
534 # ==================================================================== |
|
535 |
|
536 # Performance improvement is not really impressive on pre-T1 CPU: +8% |
|
537 @@ -18,6 +20,11 @@ |
|
538 # ensure scalability on UltraSPARC T1, or rather to avoid decay when |
|
539 # amount of active threads exceeds the number of physical cores. |
|
540 |
|
541 +# SPARC T4 SHA1 hardware achieves 3.72 cycles per byte, which is 3.1x |
|
542 +# faster than software. Multi-process benchmark saturates at 11x |
|
543 +# single-process result on 8-core processor, or ~9GBps per 2.85GHz |
|
544 +# socket. |
|
545 + |
|
546 $bits=32; |
|
547 for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); } |
|
548 if ($bits==64) { $bias=2047; $frame=192; } |
|
549 @@ -183,11 +190,93 @@ |
|
550 .register %g3,#scratch |
|
551 ___ |
|
552 $code.=<<___; |
|
553 +#include "sparc_arch.h" |
|
554 + |
|
555 .section ".text",#alloc,#execinstr |
|
556 |
|
557 +#ifdef __PIC__ |
|
558 +SPARC_PIC_THUNK(%g1) |
|
559 +#endif |
|
560 + |
|
561 .align 32 |
|
562 .globl sha1_block_data_order |
|
563 sha1_block_data_order: |
|
564 + SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) |
|
565 + ld [%g1+4],%g1 ! OPENSSL_sparcv9cap_P[1] |
|
566 + |
|
567 + andcc %g1, CFR_SHA1, %g0 |
|
568 + be .Lsoftware |
|
569 + nop |
|
570 + |
|
571 + ld [%o0 + 0x00], %f0 ! load context |
|
572 + ld [%o0 + 0x04], %f1 |
|
573 + ld [%o0 + 0x08], %f2 |
|
574 + andcc %o1, 0x7, %g0 |
|
575 + ld [%o0 + 0x0c], %f3 |
|
576 + bne,pn %icc, .Lhwunaligned |
|
577 + ld [%o0 + 0x10], %f4 |
|
578 + |
|
579 +.Lhw_loop: |
|
580 + ldd [%o1 + 0x00], %f8 |
|
581 + ldd [%o1 + 0x08], %f10 |
|
582 + ldd [%o1 + 0x10], %f12 |
|
583 + ldd [%o1 + 0x18], %f14 |
|
584 + ldd [%o1 + 0x20], %f16 |
|
585 + ldd [%o1 + 0x28], %f18 |
|
586 + ldd [%o1 + 0x30], %f20 |
|
587 + subcc %o2, 1, %o2 ! done yet? |
|
588 + ldd [%o1 + 0x38], %f22 |
|
589 + add %o1, 0x40, %o1 |
|
590 + |
|
591 + .word 0x81b02820 ! SHA1 |
|
592 + |
|
593 + bne,pt `$bits==64?"%xcc":"%icc"`, .Lhw_loop |
|
594 + nop |
|
595 + |
|
596 +.Lhwfinish: |
|
597 + st %f0, [%o0 + 0x00] ! store context |
|
598 + st %f1, [%o0 + 0x04] |
|
599 + st %f2, [%o0 + 0x08] |
|
600 + st %f3, [%o0 + 0x0c] |
|
601 + retl |
|
602 + st %f4, [%o0 + 0x10] |
|
603 + |
|
604 +.align 8 |
|
605 +.Lhwunaligned: |
|
606 + alignaddr %o1, %g0, %o1 |
|
607 + |
|
608 + ldd [%o1 + 0x00], %f10 |
|
609 +.Lhwunaligned_loop: |
|
610 + ldd [%o1 + 0x08], %f12 |
|
611 + ldd [%o1 + 0x10], %f14 |
|
612 + ldd [%o1 + 0x18], %f16 |
|
613 + ldd [%o1 + 0x20], %f18 |
|
614 + ldd [%o1 + 0x28], %f20 |
|
615 + ldd [%o1 + 0x30], %f22 |
|
616 + ldd [%o1 + 0x38], %f24 |
|
617 + subcc %o2, 1, %o2 ! done yet? |
|
618 + ldd [%o1 + 0x40], %f26 |
|
619 + add %o1, 0x40, %o1 |
|
620 + |
|
621 + faligndata %f10, %f12, %f8 |
|
622 + faligndata %f12, %f14, %f10 |
|
623 + faligndata %f14, %f16, %f12 |
|
624 + faligndata %f16, %f18, %f14 |
|
625 + faligndata %f18, %f20, %f16 |
|
626 + faligndata %f20, %f22, %f18 |
|
627 + faligndata %f22, %f24, %f20 |
|
628 + faligndata %f24, %f26, %f22 |
|
629 + |
|
630 + .word 0x81b02820 ! SHA1 |
|
631 + |
|
632 + bne,pt `$bits==64?"%xcc":"%icc"`, .Lhwunaligned_loop |
|
633 + for %f26, %f26, %f10 ! %f10=%f26 |
|
634 + |
|
635 + ba .Lhwfinish |
|
636 + nop |
|
637 + |
|
638 +.align 16 |
|
639 +.Lsoftware: |
|
640 save %sp,-$frame,%sp |
|
641 sllx $len,6,$len |
|
642 add $inp,$len,$len |
|
643 @@ -279,6 +368,62 @@ |
|
644 .align 4 |
|
645 ___ |
|
646 |
|
647 -$code =~ s/\`([^\`]*)\`/eval $1/gem; |
|
648 -print $code; |
|
649 +# Purpose of these subroutines is to explicitly encode VIS instructions, |
|
650 +# so that one can compile the module without having to specify VIS |
|
651 +# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a. |
|
652 +# Idea is to reserve for option to produce "universal" binary and let |
|
653 +# programmer detect if current CPU is VIS capable at run-time. |
|
654 +sub unvis { |
|
655 +my ($mnemonic,$rs1,$rs2,$rd)=@_; |
|
656 +my $ref,$opf; |
|
657 +my %visopf = ( "faligndata" => 0x048, |
|
658 + "for" => 0x07c ); |
|
659 + |
|
660 + $ref = "$mnemonic\t$rs1,$rs2,$rd"; |
|
661 + |
|
662 + if ($opf=$visopf{$mnemonic}) { |
|
663 + foreach ($rs1,$rs2,$rd) { |
|
664 + return $ref if (!/%f([0-9]{1,2})/); |
|
665 + $_=$1; |
|
666 + if ($1>=32) { |
|
667 + return $ref if ($1&1); |
|
668 + # re-encode for upper double register addressing |
|
669 + $_=($1|$1>>5)&31; |
|
670 + } |
|
671 + } |
|
672 + |
|
673 + return sprintf ".word\t0x%08x !%s", |
|
674 + 0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2, |
|
675 + $ref; |
|
676 + } else { |
|
677 + return $ref; |
|
678 + } |
|
679 +} |
|
680 +sub unalignaddr { |
|
681 +my ($mnemonic,$rs1,$rs2,$rd)=@_; |
|
682 +my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 ); |
|
683 +my $ref="$mnemonic\t$rs1,$rs2,$rd"; |
|
684 + |
|
685 + foreach ($rs1,$rs2,$rd) { |
|
686 + if (/%([goli])([0-7])/) { $_=$bias{$1}+$2; } |
|
687 + else { return $ref; } |
|
688 + } |
|
689 + return sprintf ".word\t0x%08x !%s", |
|
690 + 0x81b00300|$rd<<25|$rs1<<14|$rs2, |
|
691 + $ref; |
|
692 +} |
|
693 + |
|
694 +foreach (split("\n",$code)) { |
|
695 + s/\`([^\`]*)\`/eval $1/ge; |
|
696 + |
|
697 + s/\b(f[^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/ |
|
698 + &unvis($1,$2,$3,$4) |
|
699 + /ge; |
|
700 + s/\b(alignaddr)\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/ |
|
701 + &unalignaddr($1,$2,$3,$4) |
|
702 + /ge; |
|
703 + |
|
704 + print $_,"\n"; |
|
705 +} |
|
706 + |
|
707 close STDOUT; |
|
708 |
|
709 Index: crypto/sha/asm/sha512-sparcv9.pl |
|
710 =================================================================== |
|
711 diff -ru openssl-1.0.1e/crypto/sha/asm/sha512-sparcv9.pl openssl-1.0.1e/crypto/sha/asm/sha512-sparcv9.pl |
|
712 --- openssl-1.0.1e/crypto/sha/asm/sha512-sparcv9.pl 2011-05-24 17:02:24.000000000 -0700 |
|
713 +++ openssl-1.0.1e/crypto/sha/asm/sha512-sparcv9.pl 2011-07-27 10:48:17.817470000 -0700 |
|
714 @@ -5,6 +5,8 @@ |
|
715 # project. The module is, however, dual licensed under OpenSSL and |
|
716 # CRYPTOGAMS licenses depending on where you obtain it. For further |
|
717 # details see http://www.openssl.org/~appro/cryptogams/. |
|
718 +# |
|
719 +# Hardware SPARC T4 support by David S. Miller <[email protected]>. |
|
720 # ==================================================================== |
|
721 |
|
722 # SHA256 performance improvement over compiler generated code varies |
|
723 @@ -41,6 +43,12 @@ |
|
724 # loads are always slower than one 64-bit load. Once again this |
|
725 # is unlike pre-T1 UltraSPARC, where, if scheduled appropriately, |
|
726 # 2x32-bit loads can be as fast as 1x64-bit ones. |
|
727 +# |
|
728 +# SPARC T4 SHA256/512 hardware achieves 3.17/2.01 cycles per byte, |
|
729 +# which is 9.3x/11.1x faster than software. Multi-process benchmark |
|
730 +# saturates at 11.5x single-process result on 8-core processor, or |
|
731 +# ~11/16GBps per 2.85GHz socket. |
|
732 + |
|
733 |
|
734 $bits=32; |
|
735 for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); } |
|
736 @@ -386,6 +394,8 @@ |
|
737 .register %g3,#scratch |
|
738 ___ |
|
739 $code.=<<___; |
|
740 +#include "sparc_arch.h" |
|
741 + |
|
742 .section ".text",#alloc,#execinstr |
|
743 |
|
744 .align 64 |
|
745 @@ -457,8 +467,196 @@ |
|
746 } |
|
747 $code.=<<___; |
|
748 .size K${label},.-K${label} |
|
749 + |
|
750 +#ifdef __PIC__ |
|
751 +SPARC_PIC_THUNK(%g1) |
|
752 +#endif |
|
753 + |
|
754 .globl sha${label}_block_data_order |
|
755 +.align 32 |
|
756 sha${label}_block_data_order: |
|
757 + SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) |
|
758 + ld [%g1+4],%g1 ! OPENSSL_sparcv9cap_P[1] |
|
759 + |
|
760 + andcc %g1, CFR_SHA${label}, %g0 |
|
761 + be .Lsoftware |
|
762 + nop |
|
763 +___ |
|
764 +$code.=<<___ if ($SZ==8); # SHA512 |
|
765 + ldd [%o0 + 0x00], %f0 ! load context |
|
766 + ldd [%o0 + 0x08], %f2 |
|
767 + ldd [%o0 + 0x10], %f4 |
|
768 + ldd [%o0 + 0x18], %f6 |
|
769 + ldd [%o0 + 0x20], %f8 |
|
770 + ldd [%o0 + 0x28], %f10 |
|
771 + andcc %o1, 0x7, %g0 |
|
772 + ldd [%o0 + 0x30], %f12 |
|
773 + bne,pn %icc, .Lhwunaligned |
|
774 + ldd [%o0 + 0x38], %f14 |
|
775 + |
|
776 +.Lhwaligned_loop: |
|
777 + ldd [%o1 + 0x00], %f16 |
|
778 + ldd [%o1 + 0x08], %f18 |
|
779 + ldd [%o1 + 0x10], %f20 |
|
780 + ldd [%o1 + 0x18], %f22 |
|
781 + ldd [%o1 + 0x20], %f24 |
|
782 + ldd [%o1 + 0x28], %f26 |
|
783 + ldd [%o1 + 0x30], %f28 |
|
784 + ldd [%o1 + 0x38], %f30 |
|
785 + ldd [%o1 + 0x40], %f32 |
|
786 + ldd [%o1 + 0x48], %f34 |
|
787 + ldd [%o1 + 0x50], %f36 |
|
788 + ldd [%o1 + 0x58], %f38 |
|
789 + ldd [%o1 + 0x60], %f40 |
|
790 + ldd [%o1 + 0x68], %f42 |
|
791 + ldd [%o1 + 0x70], %f44 |
|
792 + subcc %o2, 1, %o2 ! done yet? |
|
793 + ldd [%o1 + 0x78], %f46 |
|
794 + add %o1, 0x80, %o1 |
|
795 + |
|
796 + .word 0x81b02860 ! SHA512 |
|
797 + |
|
798 + bne,pt `$bits==64?"%xcc":"%icc"`, .Lhwaligned_loop |
|
799 + nop |
|
800 + |
|
801 +.Lhwfinish: |
|
802 + std %f0, [%o0 + 0x00] ! store context |
|
803 + std %f2, [%o0 + 0x08] |
|
804 + std %f4, [%o0 + 0x10] |
|
805 + std %f6, [%o0 + 0x18] |
|
806 + std %f8, [%o0 + 0x20] |
|
807 + std %f10, [%o0 + 0x28] |
|
808 + std %f12, [%o0 + 0x30] |
|
809 + retl |
|
810 + std %f14, [%o0 + 0x38] |
|
811 + |
|
812 +.align 16 |
|
813 +.Lhwunaligned: |
|
814 + alignaddr %o1, %g0, %o1 |
|
815 + |
|
816 + ldd [%o1 + 0x00], %f18 |
|
817 +.Lhwunaligned_loop: |
|
818 + ldd [%o1 + 0x08], %f20 |
|
819 + ldd [%o1 + 0x10], %f22 |
|
820 + ldd [%o1 + 0x18], %f24 |
|
821 + ldd [%o1 + 0x20], %f26 |
|
822 + ldd [%o1 + 0x28], %f28 |
|
823 + ldd [%o1 + 0x30], %f30 |
|
824 + ldd [%o1 + 0x38], %f32 |
|
825 + ldd [%o1 + 0x40], %f34 |
|
826 + ldd [%o1 + 0x48], %f36 |
|
827 + ldd [%o1 + 0x50], %f38 |
|
828 + ldd [%o1 + 0x58], %f40 |
|
829 + ldd [%o1 + 0x60], %f42 |
|
830 + ldd [%o1 + 0x68], %f44 |
|
831 + ldd [%o1 + 0x70], %f46 |
|
832 + ldd [%o1 + 0x78], %f48 |
|
833 + subcc %o2, 1, %o2 ! done yet? |
|
834 + ldd [%o1 + 0x80], %f50 |
|
835 + add %o1, 0x80, %o1 |
|
836 + |
|
837 + faligndata %f18, %f20, %f16 |
|
838 + faligndata %f20, %f22, %f18 |
|
839 + faligndata %f22, %f24, %f20 |
|
840 + faligndata %f24, %f26, %f22 |
|
841 + faligndata %f26, %f28, %f24 |
|
842 + faligndata %f28, %f30, %f26 |
|
843 + faligndata %f30, %f32, %f28 |
|
844 + faligndata %f32, %f34, %f30 |
|
845 + faligndata %f34, %f36, %f32 |
|
846 + faligndata %f36, %f38, %f34 |
|
847 + faligndata %f38, %f40, %f36 |
|
848 + faligndata %f40, %f42, %f38 |
|
849 + faligndata %f42, %f44, %f40 |
|
850 + faligndata %f44, %f46, %f42 |
|
851 + faligndata %f46, %f48, %f44 |
|
852 + faligndata %f48, %f50, %f46 |
|
853 + |
|
854 + .word 0x81b02860 ! SHA512 |
|
855 + |
|
856 + bne,pt `$bits==64?"%xcc":"%icc"`, .Lhwunaligned_loop |
|
857 + for %f50, %f50, %f18 ! %f18=%f50 |
|
858 + |
|
859 + ba .Lhwfinish |
|
860 + nop |
|
861 +___ |
|
862 +$code.=<<___ if ($SZ==4); # SHA256 |
|
863 + ld [%o0 + 0x00], %f0 |
|
864 + ld [%o0 + 0x04], %f1 |
|
865 + ld [%o0 + 0x08], %f2 |
|
866 + ld [%o0 + 0x0c], %f3 |
|
867 + ld [%o0 + 0x10], %f4 |
|
868 + ld [%o0 + 0x14], %f5 |
|
869 + andcc %o1, 0x7, %g0 |
|
870 + ld [%o0 + 0x18], %f6 |
|
871 + bne,pn %icc, .Lhwunaligned |
|
872 + ld [%o0 + 0x1c], %f7 |
|
873 + |
|
874 +.Lhwloop: |
|
875 + ldd [%o1 + 0x00], %f8 |
|
876 + ldd [%o1 + 0x08], %f10 |
|
877 + ldd [%o1 + 0x10], %f12 |
|
878 + ldd [%o1 + 0x18], %f14 |
|
879 + ldd [%o1 + 0x20], %f16 |
|
880 + ldd [%o1 + 0x28], %f18 |
|
881 + ldd [%o1 + 0x30], %f20 |
|
882 + subcc %o2, 1, %o2 ! done yet? |
|
883 + ldd [%o1 + 0x38], %f22 |
|
884 + add %o1, 0x40, %o1 |
|
885 + |
|
886 + .word 0x81b02840 ! SHA256 |
|
887 + |
|
888 + bne,pt `$bits==64?"%xcc":"%icc"`, .Lhwloop |
|
889 + nop |
|
890 + |
|
891 +.Lhwfinish: |
|
892 + st %f0, [%o0 + 0x00] ! store context |
|
893 + st %f1, [%o0 + 0x04] |
|
894 + st %f2, [%o0 + 0x08] |
|
895 + st %f3, [%o0 + 0x0c] |
|
896 + st %f4, [%o0 + 0x10] |
|
897 + st %f5, [%o0 + 0x14] |
|
898 + st %f6, [%o0 + 0x18] |
|
899 + retl |
|
900 + st %f7, [%o0 + 0x1c] |
|
901 + |
|
902 +.align 8 |
|
903 +.Lhwunaligned: |
|
904 + alignaddr %o1, %g0, %o1 |
|
905 + |
|
906 + ldd [%o1 + 0x00], %f10 |
|
907 +.Lhwunaligned_loop: |
|
908 + ldd [%o1 + 0x08], %f12 |
|
909 + ldd [%o1 + 0x10], %f14 |
|
910 + ldd [%o1 + 0x18], %f16 |
|
911 + ldd [%o1 + 0x20], %f18 |
|
912 + ldd [%o1 + 0x28], %f20 |
|
913 + ldd [%o1 + 0x30], %f22 |
|
914 + ldd [%o1 + 0x38], %f24 |
|
915 + subcc %o2, 1, %o2 ! done yet? |
|
916 + ldd [%o1 + 0x40], %f26 |
|
917 + add %o1, 0x40, %o1 |
|
918 + |
|
919 + faligndata %f10, %f12, %f8 |
|
920 + faligndata %f12, %f14, %f10 |
|
921 + faligndata %f14, %f16, %f12 |
|
922 + faligndata %f16, %f18, %f14 |
|
923 + faligndata %f18, %f20, %f16 |
|
924 + faligndata %f20, %f22, %f18 |
|
925 + faligndata %f22, %f24, %f20 |
|
926 + faligndata %f24, %f26, %f22 |
|
927 + |
|
928 + .word 0x81b02840 ! SHA256 |
|
929 + |
|
930 + bne,pt `$bits==64?"%xcc":"%icc"`, .Lhwunaligned_loop |
|
931 + for %f26, %f26, %f10 ! %f10=%f26 |
|
932 + |
|
933 + ba .Lhwfinish |
|
934 + nop |
|
935 +___ |
|
936 +$code.=<<___; |
|
937 +.align 16 |
|
938 +.Lsoftware: |
|
939 save %sp,`-$frame-$locals`,%sp |
|
940 and $inp,`$align-1`,$tmp31 |
|
941 sllx $len,`log(16*$SZ)/log(2)`,$len |
|
942 @@ -589,6 +787,62 @@ |
|
943 .align 4 |
|
944 ___ |
|
945 |
|
946 -$code =~ s/\`([^\`]*)\`/eval $1/gem; |
|
947 -print $code; |
|
948 +# Purpose of these subroutines is to explicitly encode VIS instructions, |
|
949 +# so that one can compile the module without having to specify VIS |
|
950 +# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a. |
|
951 +# Idea is to reserve for option to produce "universal" binary and let |
|
952 +# programmer detect if current CPU is VIS capable at run-time. |
|
953 +sub unvis { |
|
954 +my ($mnemonic,$rs1,$rs2,$rd)=@_; |
|
955 +my $ref,$opf; |
|
956 +my %visopf = ( "faligndata" => 0x048, |
|
957 + "for" => 0x07c ); |
|
958 + |
|
959 + $ref = "$mnemonic\t$rs1,$rs2,$rd"; |
|
960 + |
|
961 + if ($opf=$visopf{$mnemonic}) { |
|
962 + foreach ($rs1,$rs2,$rd) { |
|
963 + return $ref if (!/%f([0-9]{1,2})/); |
|
964 + $_=$1; |
|
965 + if ($1>=32) { |
|
966 + return $ref if ($1&1); |
|
967 + # re-encode for upper double register addressing |
|
968 + $_=($1|$1>>5)&31; |
|
969 + } |
|
970 + } |
|
971 + |
|
972 + return sprintf ".word\t0x%08x !%s", |
|
973 + 0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2, |
|
974 + $ref; |
|
975 + } else { |
|
976 + return $ref; |
|
977 + } |
|
978 +} |
|
979 +sub unalignaddr { |
|
980 +my ($mnemonic,$rs1,$rs2,$rd)=@_; |
|
981 +my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 ); |
|
982 +my $ref="$mnemonic\t$rs1,$rs2,$rd"; |
|
983 + |
|
984 + foreach ($rs1,$rs2,$rd) { |
|
985 + if (/%([goli])([0-7])/) { $_=$bias{$1}+$2; } |
|
986 + else { return $ref; } |
|
987 + } |
|
988 + return sprintf ".word\t0x%08x !%s", |
|
989 + 0x81b00300|$rd<<25|$rs1<<14|$rs2, |
|
990 + $ref; |
|
991 +} |
|
992 + |
|
993 +foreach (split("\n",$code)) { |
|
994 + s/\`([^\`]*)\`/eval $1/ge; |
|
995 + |
|
996 + s/\b(f[^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/ |
|
997 + &unvis($1,$2,$3,$4) |
|
998 + /ge; |
|
999 + s/\b(alignaddr)\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/ |
|
1000 + &unalignaddr($1,$2,$3,$4) |
|
1001 + /ge; |
|
1002 + |
|
1003 + print $_,"\n"; |
|
1004 +} |
|
1005 + |
|
1006 close STDOUT; |
|
1007 Index: crypto/des/Makefile |
|
1008 =================================================================== |
|
1009 diff -ru openssl-1.0.1e/crypto/des/Makefile.orig openssl-1.0.1e/crypto/des/Makefile |
|
1010 --- a/crypto/des/Makefile |
|
1011 +++ b/crypto/des/Makefile |
|
1012 @@ -61,6 +61,8 @@ des: des.o cbc3_enc.o lib |
|
1013 |
|
1014 des_enc-sparc.S: asm/des_enc.m4 |
|
1015 m4 -B 8192 asm/des_enc.m4 > des_enc-sparc.S |
|
1016 +dest4-sparcv9.s: asm/dest4-sparcv9.pl |
|
1017 + $(PERL) asm/dest4-sparcv9.pl $(CFLAGS) > $@ |
|
1018 |
|
1019 des-586.s: asm/des-586.pl ../perlasm/x86asm.pl ../perlasm/cbc.pl |
|
1020 $(PERL) asm/des-586.pl $(PERLASM_SCHEME) $(CFLAGS) > $@ |
|
1021 Index: crypto/evp/e_des.c |
|
1022 =================================================================== |
|
1023 diff -ru openssl-1.0.1e/crypto/evp/e_des.c.orig openssl-1.0.1e/crypto/evp/e_des.c |
|
1024 --- a/crypto/evp/e_des.c |
|
1025 +++ b/crypto/evp/e_des.c |
|
1026 @@ -65,6 +65,30 @@ |
|
1027 # include <openssl/des.h> |
|
1028 # include <openssl/rand.h> |
|
1029 |
|
1030 +typedef struct { |
|
1031 + union { double align; DES_key_schedule ks; } ks; |
|
1032 + union { |
|
1033 + void (*cbc)(const void *,void *,size_t,const void *,void *); |
|
1034 + } stream; |
|
1035 +} EVP_DES_KEY; |
|
1036 + |
|
1037 +#if defined(AES_ASM) && (defined(__sparc) || defined(__sparc__)) |
|
1038 +/* ---------^^^ this is not a typo, just a way to detect that |
|
1039 + * assembler support was in general requested... |
|
1040 + */ |
|
1041 +#include "sparc_arch.h" |
|
1042 + |
|
1043 +extern unsigned int OPENSSL_sparcv9cap_P[]; |
|
1044 + |
|
1045 +#define SPARC_DES_CAPABLE (OPENSSL_sparcv9cap_P[1] & CFR_DES) |
|
1046 + |
|
1047 +void des_t4_key_expand(const void *key, DES_key_schedule *ks); |
|
1048 +void des_t4_cbc_encrypt(const void *inp,void *out,size_t len, |
|
1049 + DES_key_schedule *ks,unsigned char iv[8]); |
|
1050 +void des_t4_cbc_decrypt(const void *inp,void *out,size_t len, |
|
1051 + DES_key_schedule *ks,unsigned char iv[8]); |
|
1052 +#endif |
|
1053 + |
|
1054 static int des_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
|
1055 const unsigned char *iv, int enc); |
|
1056 static int des_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr); |
|
1057 @@ -102,6 +126,12 @@ static int des_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, |
|
1058 static int des_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, |
|
1059 const unsigned char *in, size_t inl) |
|
1060 { |
|
1061 + EVP_DES_KEY *dat = (EVP_DES_KEY *)ctx->cipher_data; |
|
1062 + |
|
1063 + if (dat->stream.cbc) { |
|
1064 + (*dat->stream.cbc)(in,out,inl,&dat->ks.ks,ctx->iv); |
|
1065 + return 1; |
|
1066 + } |
|
1067 while (inl >= EVP_MAXCHUNK) { |
|
1068 DES_ncbc_encrypt(in, out, (long)EVP_MAXCHUNK, ctx->cipher_data, |
|
1069 (DES_cblock *)ctx->iv, ctx->encrypt); |
|
1070 @@ -179,16 +209,16 @@ |
|
1071 return 1; |
|
1072 } |
|
1073 |
|
1074 -BLOCK_CIPHER_defs(des, DES_key_schedule, NID_des, 8, 8, 8, 64, |
|
1075 +BLOCK_CIPHER_defs(des, EVP_DES_KEY, NID_des, 8, 8, 8, 64, |
|
1076 EVP_CIPH_RAND_KEY, des_init_key, NULL, |
|
1077 EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des_ctrl) |
|
1078 |
|
1079 |
|
1080 -BLOCK_CIPHER_def_cfb(des, DES_key_schedule, NID_des, 8, 8, 1, |
|
1081 +BLOCK_CIPHER_def_cfb(des, EVP_DES_KEY, NID_des, 8, 8, 1, |
|
1082 EVP_CIPH_RAND_KEY, des_init_key, NULL, |
|
1083 EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des_ctrl) |
|
1084 |
|
1085 -BLOCK_CIPHER_def_cfb(des, DES_key_schedule, NID_des, 8, 8, 8, |
|
1086 +BLOCK_CIPHER_def_cfb(des, EVP_DES_KEY, NID_des, 8, 8, 8, |
|
1087 EVP_CIPH_RAND_KEY, des_init_key, NULL, |
|
1088 EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des_ctrl) |
|
1089 |
|
1090 @@ -196,8 +226,23 @@ static int des_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
|
1091 const unsigned char *iv, int enc) |
|
1092 { |
|
1093 DES_cblock *deskey = (DES_cblock *)key; |
|
1094 + EVP_DES_KEY *dat = (EVP_DES_KEY *)ctx->cipher_data; |
|
1095 + |
|
1096 + dat->stream.cbc = NULL; |
|
1097 +#if defined(SPARC_DES_CAPABLE) |
|
1098 + if (SPARC_DES_CAPABLE) { |
|
1099 + int mode = ctx->cipher->flags & EVP_CIPH_MODE; |
|
1100 + |
|
1101 + if (mode == EVP_CIPH_CBC_MODE) { |
|
1102 + des_t4_key_expand(key,&dat->ks.ks); |
|
1103 + dat->stream.cbc = enc ? des_t4_cbc_encrypt : |
|
1104 + des_t4_cbc_decrypt; |
|
1105 + return 1; |
|
1106 + } |
|
1107 + } |
|
1108 +#endif |
|
1109 # ifdef EVP_CHECK_DES_KEY |
|
1110 - if (DES_set_key_checked(deskey, ctx->cipher_data) != 0) |
|
1111 + if (DES_set_key_checked(deskey, dat->ks.ks) != 0) |
|
1112 return 0; |
|
1113 # else |
|
1114 DES_set_key_unchecked(deskey, ctx->cipher_data); |
|
1115 Index: crypto/evp/e_des3.c |
|
1116 =================================================================== |
|
1117 diff -ru openssl-1.0.1e/crypto/evp/e_des3.c.orig openssl-1.0.1e/crypto/evp/e_des3.c |
|
1118 --- a/crypto/evp/e_des3.c |
|
1119 +++ b/crypto/evp/e_des3.c |
|
1120 @@ -65,6 +65,32 @@ |
|
1121 # include <openssl/des.h> |
|
1122 # include <openssl/rand.h> |
|
1123 |
|
1124 +typedef struct { |
|
1125 + union { double align; DES_key_schedule ks[3]; } ks; |
|
1126 + union { |
|
1127 + void (*cbc)(const void *,void *,size_t,const void *,void *); |
|
1128 + } stream; |
|
1129 +} DES_EDE_KEY; |
|
1130 +#define ks1 ks.ks[0] |
|
1131 +#define ks2 ks.ks[1] |
|
1132 +#define ks3 ks.ks[2] |
|
1133 + |
|
1134 +#if defined(AES_ASM) && (defined(__sparc) || defined(__sparc__)) |
|
1135 +/* ---------^^^ this is not a typo, just a way to detect that |
|
1136 + * assembler support was in general requested... */ |
|
1137 +#include "sparc_arch.h" |
|
1138 + |
|
1139 +extern unsigned int OPENSSL_sparcv9cap_P[]; |
|
1140 + |
|
1141 +#define SPARC_DES_CAPABLE (OPENSSL_sparcv9cap_P[1] & CFR_DES) |
|
1142 + |
|
1143 +void des_t4_key_expand(const void *key, DES_key_schedule *ks); |
|
1144 +void des_t4_ede3_cbc_encrypt(const void *inp,void *out,size_t len, |
|
1145 + DES_key_schedule *ks,unsigned char iv[8]); |
|
1146 +void des_t4_ede3_cbc_decrypt(const void *inp,void *out,size_t len, |
|
1147 + DES_key_schedule *ks,unsigned char iv[8]); |
|
1148 +#endif |
|
1149 + |
|
1150 # ifndef OPENSSL_FIPS |
|
1151 |
|
1152 static int des_ede_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
|
1153 @@ -75,12 +100,6 @@ static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
|
1154 |
|
1155 static int des3_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr); |
|
1156 |
|
1157 -typedef struct { |
|
1158 - DES_key_schedule ks1; /* key schedule */ |
|
1159 - DES_key_schedule ks2; /* key schedule (for ede) */ |
|
1160 - DES_key_schedule ks3; /* key schedule (for ede3) */ |
|
1161 -} DES_EDE_KEY; |
|
1162 - |
|
1163 # define data(ctx) ((DES_EDE_KEY *)(ctx)->cipher_data) |
|
1164 |
|
1165 /* |
|
1166 @@ -123,6 +117,7 @@ static int des_ede_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, |
|
1167 static int des_ede_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, |
|
1168 const unsigned char *in, size_t inl) |
|
1169 { |
|
1170 + DES_EDE_KEY *dat = data(ctx); |
|
1171 # ifdef KSSL_DEBUG |
|
1172 { |
|
1173 int i; |
|
1174 @@ -134,11 +155,15 @@ |
|
1175 fprintf(stderr, "\n"); |
|
1176 } |
|
1177 # endif /* KSSL_DEBUG */ |
|
1178 + if (dat->stream.cbc) { |
|
1179 + (*dat->stream.cbc)(in,out,inl,&dat->ks,ctx->iv); |
|
1180 + return 1; |
|
1181 + } |
|
1182 + |
|
1183 while (inl >= EVP_MAXCHUNK) { |
|
1184 DES_ede3_cbc_encrypt(in, out, (long)EVP_MAXCHUNK, |
|
1185 - &data(ctx)->ks1, &data(ctx)->ks2, |
|
1186 - &data(ctx)->ks3, (DES_cblock *)ctx->iv, |
|
1187 - ctx->encrypt); |
|
1188 + &dat->ks1, &dat->ks2, &dat->ks3, |
|
1189 + (DES_cblock *)ctx->iv, ctx->encrypt); |
|
1190 inl -= EVP_MAXCHUNK; |
|
1191 in += EVP_MAXCHUNK; |
|
1192 out += EVP_MAXCHUNK; |
|
1193 @@ -145,9 +170,8 @@ static int des_ede_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, |
|
1194 } |
|
1195 if (inl) |
|
1196 DES_ede3_cbc_encrypt(in, out, (long)inl, |
|
1197 - &data(ctx)->ks1, &data(ctx)->ks2, |
|
1198 - &data(ctx)->ks3, (DES_cblock *)ctx->iv, |
|
1199 - ctx->encrypt); |
|
1200 + &dat->ks1, &dat->ks2, &dat->ks3, |
|
1201 + (DES_cblock *)ctx->iv, ctx->encrypt); |
|
1202 return 1; |
|
1203 } |
|
1204 |
|
1205 @@ -215,39 +239,58 @@ static int des_ede3_cfb8_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, |
|
1206 } |
|
1207 |
|
1208 BLOCK_CIPHER_defs(des_ede, DES_EDE_KEY, NID_des_ede, 8, 16, 8, 64, |
|
1209 - EVP_CIPH_RAND_KEY, des_ede_init_key, NULL, |
|
1210 - EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des3_ctrl) |
|
1211 + EVP_CIPH_RAND_KEY|EVP_CIPH_FLAG_DEFAULT_ASN1, |
|
1212 + des_ede_init_key, NULL, NULL, NULL, |
|
1213 + des3_ctrl) |
|
1214 # define des_ede3_cfb64_cipher des_ede_cfb64_cipher |
|
1215 # define des_ede3_ofb_cipher des_ede_ofb_cipher |
|
1216 # define des_ede3_cbc_cipher des_ede_cbc_cipher |
|
1217 # define des_ede3_ecb_cipher des_ede_ecb_cipher |
|
1218 BLOCK_CIPHER_defs(des_ede3, DES_EDE_KEY, NID_des_ede3, 8, 24, 8, 64, |
|
1219 - EVP_CIPH_RAND_KEY, des_ede3_init_key, NULL, |
|
1220 - EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des3_ctrl) |
|
1221 + EVP_CIPH_RAND_KEY|EVP_CIPH_FLAG_FIPS|EVP_CIPH_FLAG_DEFAULT_ASN1, |
|
1222 + des_ede3_init_key, NULL, NULL, NULL, |
|
1223 + des3_ctrl) |
|
1224 |
|
1225 BLOCK_CIPHER_def_cfb(des_ede3, DES_EDE_KEY, NID_des_ede3, 24, 8, 1, |
|
1226 - EVP_CIPH_RAND_KEY, des_ede3_init_key, NULL, |
|
1227 - EVP_CIPHER_set_asn1_iv, |
|
1228 - EVP_CIPHER_get_asn1_iv, des3_ctrl) |
|
1229 + EVP_CIPH_RAND_KEY|EVP_CIPH_FLAG_FIPS|EVP_CIPH_FLAG_DEFAULT_ASN1, |
|
1230 + des_ede3_init_key, NULL, NULL, NULL, |
|
1231 + des3_ctrl) |
|
1232 |
|
1233 BLOCK_CIPHER_def_cfb(des_ede3, DES_EDE_KEY, NID_des_ede3, 24, 8, 8, |
|
1234 - EVP_CIPH_RAND_KEY, des_ede3_init_key, NULL, |
|
1235 - EVP_CIPHER_set_asn1_iv, |
|
1236 - EVP_CIPHER_get_asn1_iv, des3_ctrl) |
|
1237 + EVP_CIPH_RAND_KEY|EVP_CIPH_FLAG_FIPS|EVP_CIPH_FLAG_DEFAULT_ASN1, |
|
1238 + des_ede3_init_key, NULL, NULL, NULL, |
|
1239 + des3_ctrl) |
|
1240 |
|
1241 static int des_ede_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
|
1242 const unsigned char *iv, int enc) |
|
1243 { |
|
1244 DES_cblock *deskey = (DES_cblock *)key; |
|
1245 + DES_EDE_KEY *dat = data(ctx); |
|
1246 + |
|
1247 + dat->stream.cbc = NULL; |
|
1248 +#if defined(SPARC_DES_CAPABLE) |
|
1249 + if (SPARC_DES_CAPABLE) { |
|
1250 + int mode = ctx->cipher->flags & EVP_CIPH_MODE; |
|
1251 + |
|
1252 + if (mode == EVP_CIPH_CBC_MODE) { |
|
1253 + des_t4_key_expand(&deskey[0],&dat->ks1); |
|
1254 + des_t4_key_expand(&deskey[1],&dat->ks2); |
|
1255 + memcpy(&dat->ks3,&dat->ks1,sizeof(dat->ks1)); |
|
1256 + dat->stream.cbc = enc ? des_t4_ede3_cbc_encrypt : |
|
1257 + des_t4_ede3_cbc_decrypt; |
|
1258 + return 1; |
|
1259 + } |
|
1260 + } |
|
1261 +#endif |
|
1262 # ifdef EVP_CHECK_DES_KEY |
|
1263 - if (DES_set_key_checked(&deskey[0], &data(ctx)->ks1) |
|
1264 - ! !DES_set_key_checked(&deskey[1], &data(ctx)->ks2)) |
|
1265 + if (DES_set_key_checked(&deskey[0],&dat->ks1) |
|
1266 + !! DES_set_key_checked(&deskey[1],&dat->ks2)) |
|
1267 return 0; |
|
1268 # else |
|
1269 - DES_set_key_unchecked(&deskey[0], &data(ctx)->ks1); |
|
1270 - DES_set_key_unchecked(&deskey[1], &data(ctx)->ks2); |
|
1271 + DES_set_key_unchecked(&deskey[0],&dat->ks1); |
|
1272 + DES_set_key_unchecked(&deskey[1],&dat->ks2); |
|
1273 # endif |
|
1274 - memcpy(&data(ctx)->ks3, &data(ctx)->ks1, sizeof(data(ctx)->ks1)); |
|
1275 + memcpy(&dat->ks3,&dat->ks1, sizeof(dat->ks1)); |
|
1276 return 1; |
|
1277 } |
|
1278 |
|
1279 @@ -255,6 +298,8 @@ static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
|
1280 const unsigned char *iv, int enc) |
|
1281 { |
|
1282 DES_cblock *deskey = (DES_cblock *)key; |
|
1283 + DES_EDE_KEY *dat = data(ctx); |
|
1284 + |
|
1285 # ifdef KSSL_DEBUG |
|
1286 { |
|
1287 int i; |
|
1288 @@ -272,15 +317,30 @@ static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
|
1289 } |
|
1290 # endif /* KSSL_DEBUG */ |
|
1291 |
|
1292 + dat->stream.cbc = NULL; |
|
1293 +#if defined(SPARC_DES_CAPABLE) |
|
1294 + if (SPARC_DES_CAPABLE) { |
|
1295 + int mode = ctx->cipher->flags & EVP_CIPH_MODE; |
|
1296 + |
|
1297 + if (mode == EVP_CIPH_CBC_MODE) { |
|
1298 + des_t4_key_expand(&deskey[0],&dat->ks1); |
|
1299 + des_t4_key_expand(&deskey[1],&dat->ks2); |
|
1300 + des_t4_key_expand(&deskey[2],&dat->ks3); |
|
1301 + dat->stream.cbc = enc ? des_t4_ede3_cbc_encrypt : |
|
1302 + des_t4_ede3_cbc_decrypt; |
|
1303 + return 1; |
|
1304 + } |
|
1305 + } |
|
1306 +#endif |
|
1307 # ifdef EVP_CHECK_DES_KEY |
|
1308 - if (DES_set_key_checked(&deskey[0], &data(ctx)->ks1) |
|
1309 - || DES_set_key_checked(&deskey[1], &data(ctx)->ks2) |
|
1310 - || DES_set_key_checked(&deskey[2], &data(ctx)->ks3)) |
|
1311 + if (DES_set_key_checked(&deskey[0],&dat->ks1) |
|
1312 + || DES_set_key_checked(&deskey[1],&dat->ks2) |
|
1313 + || DES_set_key_checked(&deskey[2],&dat->ks3)) |
|
1314 return 0; |
|
1315 # else |
|
1316 - DES_set_key_unchecked(&deskey[0], &data(ctx)->ks1); |
|
1317 - DES_set_key_unchecked(&deskey[1], &data(ctx)->ks2); |
|
1318 - DES_set_key_unchecked(&deskey[2], &data(ctx)->ks3); |
|
1319 + DES_set_key_unchecked(&deskey[0],&dat->ks1); |
|
1320 + DES_set_key_unchecked(&deskey[1],&dat->ks2); |
|
1321 + DES_set_key_unchecked(&deskey[2],&dat->ks3); |
|
1322 # endif |
|
1323 return 1; |
|
1324 } |
|
1325 Index: openssl/crypto/bn/Makefile |
|
1326 =================================================================== |
|
1327 diff -ru openssl-1.0.1e/crypto/bn/Makefile openssl-1.0.1e/crypto/bn/Makefile.new |
|
1328 --- openssl-1.0.1e/crypto/bn/Makefile 2011-05-24 17:02:24.000000000 -0700 |
|
1329 +++ openssl-1.0.1e/crypto/bn/Makefile 2011-07-27 10:48:17.817470000 -0700 |
|
1330 @@ -77,6 +77,12 @@ |
|
1331 $(PERL) asm/sparcv9a-mont.pl $(CFLAGS) > $@ |
|
1332 sparcv9-mont.s: asm/sparcv9-mont.pl |
|
1333 $(PERL) asm/sparcv9-mont.pl $(CFLAGS) > $@ |
|
1334 +vis3-mont.s: asm/vis3-mont.pl |
|
1335 + $(PERL) asm/vis3-mont.pl $(CFLAGS) > $@ |
|
1336 +sparct4-mont.S: asm/sparct4-mont.pl |
|
1337 + $(PERL) asm/sparct4-mont.pl $(CFLAGS) > $@ |
|
1338 +sparcv9-gf2m.S: asm/sparcv9-gf2m.pl |
|
1339 + $(PERL) asm/sparcv9-gf2m.pl $(CFLAGS) > $@ |
|
1340 |
|
1341 bn-mips3.o: asm/mips3.s |
|
1342 @if [ "$(CC)" = "gcc" ]; then \ |
|
1343 Index: openssl/crypto/bn/bn_exp.c |
|
1344 =================================================================== |
|
1345 diff -ru openssl-1.0.1e/crypto/bn/bn_exp.c openssl-1.0.1e/crypto/bn/bn_exp.c.new |
|
1346 --- bn_exp.c 2011/10/29 19:25:13 1.38 |
|
1347 +++ bn_exp.c 2012/11/17 10:34:11 1.39 |
|
1348 @@ -122,8 +122,15 @@ |
|
1349 # ifndef alloca |
|
1350 # define alloca(s) __builtin_alloca((s)) |
|
1351 # endif |
|
1352 +#else |
|
1353 +#include <alloca.h> |
|
1354 #endif |
|
1355 |
|
1356 +#if defined(OPENSSL_BN_ASM_MONT) && defined(__sparc) |
|
1357 +# include "sparc_arch.h" |
|
1358 +extern unsigned int OPENSSL_sparcv9cap_P[]; |
|
1359 +#endif |
|
1360 + |
|
1361 /* maximum precomputation table size for *variable* sliding windows */ |
|
1362 #define TABLE_SIZE 32 |
|
1363 |
|
1364 @@ -464,8 +471,16 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, |
|
1365 wstart = bits - 1; /* The top bit of the window */ |
|
1366 wend = 0; /* The bottom bit of the window */ |
|
1367 |
|
1368 +#if 1 /* by Shay Gueron's suggestion */ |
|
1369 + j = mont->N.top; /* borrow j */ |
|
1370 + if (bn_wexpand(r,j) == NULL) goto err; |
|
1371 + r->d[0] = (0-m->d[0])&BN_MASK2; /* 2^(top*BN_BITS2) - m */ |
|
1372 + for(i=1;i<j;i++) r->d[i] = (~m->d[i])&BN_MASK2; |
|
1373 + r->top = j; |
|
1374 +#else |
|
1375 if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) |
|
1376 goto err; |
|
1377 +#endif |
|
1378 for (;;) { |
|
1379 if (BN_is_bit_set(p, wstart) == 0) { |
|
1380 if (!start) { |
|
1381 @@ -515,6 +530,17 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, |
|
1382 if (wstart < 0) |
|
1383 break; |
|
1384 } |
|
1385 +#if defined(OPENSSL_BN_ASM_MONT) && (defined(__sparc__) || defined(__sparc)) |
|
1386 + if (OPENSSL_sparcv9cap_P[0] & (SPARCV9_VIS3|SPARCV9_PREFER_FPU)) { |
|
1387 + j = mont->N.top; /* borrow j */ |
|
1388 + val[0]->d[0] = 1; /* borrow val[0] */ |
|
1389 + for (i=1;i<j;i++) |
|
1390 + val[0]->d[i] = 0; |
|
1391 + val[0]->top = j; |
|
1392 + if (!BN_mod_mul_montgomery(rr, r, val[0], mont, ctx)) |
|
1393 + goto err; |
|
1394 + } else |
|
1395 +#endif |
|
1396 if (!BN_from_montgomery(rr, r, mont, ctx)) |
|
1397 goto err; |
|
1398 ret = 1; |
|
1399 @@ -526,6 +552,26 @@ err: |
|
1400 return (ret); |
|
1401 } |
|
1402 |
|
1403 +#if defined(OPENSSL_BN_ASM_MONT) && (defined(__sparc__) || defined(__sparc)) |
|
1404 +static BN_ULONG bn_get_bits(const BIGNUM *a, int bitpos) { |
|
1405 + BN_ULONG ret = 0; |
|
1406 + int wordpos; |
|
1407 + |
|
1408 + wordpos = bitpos / BN_BITS2; |
|
1409 + bitpos %= BN_BITS2; |
|
1410 + if (wordpos>=0 && wordpos < a->top) { |
|
1411 + ret = a->d[wordpos]&BN_MASK2; |
|
1412 + if (bitpos) { |
|
1413 + ret >>= bitpos; |
|
1414 + if (++wordpos < a->top) |
|
1415 + ret |= a->d[wordpos]<<(BN_BITS2-bitpos); |
|
1416 + } |
|
1417 + } |
|
1418 + |
|
1419 + return ret & BN_MASK2; |
|
1420 +} |
|
1421 +#endif |
|
1422 + |
|
1423 /* |
|
1424 * BN_mod_exp_mont_consttime() stores the precomputed powers in a specific |
|
1425 * layout so that accessing any of these table values shows the same access |
|
1426 @@ -594,6 +640,9 @@ |
|
1427 int powerbufLen = 0; |
|
1428 unsigned char *powerbuf = NULL; |
|
1429 BIGNUM tmp, am; |
|
1430 +#if defined(OPENSSL_BN_ASM_MONT) && defined(__sparc) |
|
1431 + unsigned int t4=0; |
|
1432 +#endif |
|
1433 |
|
1434 bn_check_top(a); |
|
1435 bn_check_top(p); |
|
1436 @@ -628,10 +677,18 @@ |
|
1437 |
|
1438 /* Get the window size to use with size of p. */ |
|
1439 window = BN_window_bits_for_ctime_exponent_size(bits); |
|
1440 +#if defined(OPENSSL_BN_ASM_MONT) && defined(__sparc) |
|
1441 + if (window>=5 && (top&15)==0 && top<=64 && |
|
1442 + (OPENSSL_sparcv9cap_P[1]&(CFR_MONTMUL|CFR_MONTSQR))== |
|
1443 + (CFR_MONTMUL|CFR_MONTSQR) && (t4=OPENSSL_sparcv9cap_P[0])) |
|
1444 + window=5; |
|
1445 + else |
|
1446 +#endif |
|
1447 #if defined(OPENSSL_BN_ASM_MONT5) |
|
1448 if (window == 6 && bits <= 1024) |
|
1449 window = 5; /* ~5% improvement of 2048-bit RSA sign */ |
|
1450 #endif |
|
1451 + (void) 0; |
|
1452 |
|
1453 /* |
|
1454 * Allocate a buffer large enough to hold all of the pre-computed powers |
|
1455 @@ -670,14 +727,14 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, |
|
1456 tmp.flags = am.flags = BN_FLG_STATIC_DATA; |
|
1457 |
|
1458 /* prepare a^0 in Montgomery domain */ |
|
1459 -#if 1 |
|
1460 - if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx)) |
|
1461 - goto err; |
|
1462 -#else |
|
1463 +#if 1 /* by Shay Gueron's suggestion */ |
|
1464 tmp.d[0] = (0 - m->d[0]) & BN_MASK2; /* 2^(top*BN_BITS2) - m */ |
|
1465 for (i = 1; i < top; i++) |
|
1466 tmp.d[i] = (~m->d[i]) & BN_MASK2; |
|
1467 tmp.top = top; |
|
1468 +#else |
|
1469 + if (!BN_to_montgomery(&tmp,BN_value_one(),mont,ctx)) |
|
1470 + goto err; |
|
1471 #endif |
|
1472 |
|
1473 /* prepare a^1 in Montgomery domain */ |
|
1474 @@ -689,6 +746,122 @@ |
|
1475 } else if (!BN_to_montgomery(&am, a, mont, ctx)) |
|
1476 goto err; |
|
1477 |
|
1478 +#if defined(OPENSSL_BN_ASM_MONT) && defined(__sparc) |
|
1479 + if (t4) { |
|
1480 + typedef int (*bn_pwr5_mont_f)(BN_ULONG *tp,const BN_ULONG *np, |
|
1481 + const BN_ULONG *n0,const void *table,int power,int bits); |
|
1482 + int bn_pwr5_mont_t4_8(BN_ULONG *tp,const BN_ULONG *np, |
|
1483 + const BN_ULONG *n0,const void *table,int power,int bits); |
|
1484 + int bn_pwr5_mont_t4_16(BN_ULONG *tp,const BN_ULONG *np, |
|
1485 + const BN_ULONG *n0,const void *table,int power,int bits); |
|
1486 + int bn_pwr5_mont_t4_24(BN_ULONG *tp,const BN_ULONG *np, |
|
1487 + const BN_ULONG *n0,const void *table,int power,int bits); |
|
1488 + int bn_pwr5_mont_t4_32(BN_ULONG *tp,const BN_ULONG *np, |
|
1489 + const BN_ULONG *n0,const void *table,int power,int bits); |
|
1490 + static const bn_pwr5_mont_f pwr5_funcs[4] = { |
|
1491 + bn_pwr5_mont_t4_8, bn_pwr5_mont_t4_16, |
|
1492 + bn_pwr5_mont_t4_24, bn_pwr5_mont_t4_32 }; |
|
1493 + bn_pwr5_mont_f pwr5_worker = pwr5_funcs[top/16-1]; |
|
1494 + |
|
1495 + typedef int (*bn_mul_mont_f)(BN_ULONG *rp,const BN_ULONG *ap, |
|
1496 + const void *bp,const BN_ULONG *np,const BN_ULONG *n0); |
|
1497 + int bn_mul_mont_t4_8(BN_ULONG *rp,const BN_ULONG *ap, |
|
1498 + const void *bp,const BN_ULONG *np,const BN_ULONG *n0); |
|
1499 + int bn_mul_mont_t4_16(BN_ULONG *rp,const BN_ULONG *ap, |
|
1500 + const void *bp,const BN_ULONG *np,const BN_ULONG *n0); |
|
1501 + int bn_mul_mont_t4_24(BN_ULONG *rp,const BN_ULONG *ap, |
|
1502 + const void *bp,const BN_ULONG *np,const BN_ULONG *n0); |
|
1503 + int bn_mul_mont_t4_32(BN_ULONG *rp,const BN_ULONG *ap, |
|
1504 + const void *bp,const BN_ULONG *np,const BN_ULONG *n0); |
|
1505 + static const bn_mul_mont_f mul_funcs[4] = { |
|
1506 + bn_mul_mont_t4_8, bn_mul_mont_t4_16, |
|
1507 + bn_mul_mont_t4_24, bn_mul_mont_t4_32 }; |
|
1508 + bn_mul_mont_f mul_worker = mul_funcs[top/16-1]; |
|
1509 + |
|
1510 + void bn_mul_mont_vis3(BN_ULONG *rp,const BN_ULONG *ap, |
|
1511 + const void *bp,const BN_ULONG *np, |
|
1512 + const BN_ULONG *n0,int num); |
|
1513 + void bn_mul_mont_t4(BN_ULONG *rp,const BN_ULONG *ap, |
|
1514 + const void *bp,const BN_ULONG *np, |
|
1515 + const BN_ULONG *n0,int num); |
|
1516 + void bn_mul_mont_gather5_t4(BN_ULONG *rp,const BN_ULONG *ap, |
|
1517 + const void *table,const BN_ULONG *np, |
|
1518 + const BN_ULONG *n0,int num,int power); |
|
1519 + void bn_flip_n_scatter5_t4(const BN_ULONG *inp,size_t num, |
|
1520 + void *table,size_t power); |
|
1521 + void bn_gather5_t4(BN_ULONG *out,size_t num, |
|
1522 + void *table,size_t power); |
|
1523 + void bn_flip_t4(BN_ULONG *dst,BN_ULONG *src,size_t num); |
|
1524 + |
|
1525 + BN_ULONG *np=mont->N.d, *n0=mont->n0; |
|
1526 + int stride = 5*(6-(top/16-1)); /* multiple of 5, but less than 32 */ |
|
1527 + |
|
1528 + /* |
|
1529 + * BN_to_montgomery can contaminate words above .top |
|
1530 + * [in BN_DEBUG[_DEBUG] build]... |
|
1531 + */ |
|
1532 + for (i=am.top; i<top; i++) am.d[i]=0; |
|
1533 + for (i=tmp.top; i<top; i++) tmp.d[i]=0; |
|
1534 + |
|
1535 + bn_flip_n_scatter5_t4(tmp.d,top,powerbuf,0); |
|
1536 + bn_flip_n_scatter5_t4(am.d,top,powerbuf,1); |
|
1537 + if (!(*mul_worker)(tmp.d,am.d,am.d,np,n0) && |
|
1538 + !(*mul_worker)(tmp.d,am.d,am.d,np,n0)) |
|
1539 + bn_mul_mont_vis3(tmp.d,am.d,am.d,np,n0,top); |
|
1540 + bn_flip_n_scatter5_t4(tmp.d,top,powerbuf,2); |
|
1541 + |
|
1542 + for (i=3; i<32; i++) { |
|
1543 + /* Calculate a^i = a^(i-1) * a */ |
|
1544 + if (!(*mul_worker)(tmp.d,tmp.d,am.d,np,n0) && |
|
1545 + !(*mul_worker)(tmp.d,tmp.d,am.d,np,n0)) |
|
1546 + bn_mul_mont_vis3(tmp.d,tmp.d,am.d,np,n0,top); |
|
1547 + bn_flip_n_scatter5_t4(tmp.d,top,powerbuf,i); |
|
1548 + } |
|
1549 + |
|
1550 + /* switch to 64-bit domain */ |
|
1551 + np = alloca(top*sizeof(BN_ULONG)); |
|
1552 + top /= 2; |
|
1553 + bn_flip_t4(np,mont->N.d,top); |
|
1554 + |
|
1555 + bits--; |
|
1556 + for (wvalue=0, i=bits%5; i>=0; i--,bits--) |
|
1557 + wvalue = (wvalue<<1)+BN_is_bit_set(p,bits); |
|
1558 + bn_gather5_t4(tmp.d,top,powerbuf,wvalue); |
|
1559 + |
|
1560 + /* Scan the exponent one window at a time starting from the most |
|
1561 + * significant bits. |
|
1562 + */ |
|
1563 + while (bits >= 0) { |
|
1564 + if (bits < stride) |
|
1565 + stride = bits+1; |
|
1566 + bits -= stride; |
|
1567 + wvalue = (bn_get_bits(p,bits+1)); |
|
1568 + |
|
1569 + if ((*pwr5_worker)(tmp.d,np,n0,powerbuf,wvalue,stride)) |
|
1570 + continue; |
|
1571 + /* retry once and fall back */ |
|
1572 + if ((*pwr5_worker)(tmp.d,np,n0,powerbuf,wvalue,stride)) |
|
1573 + continue; |
|
1574 + |
|
1575 + bits += stride-5; |
|
1576 + wvalue >>= stride-5; |
|
1577 + wvalue &= 31; |
|
1578 + bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top); |
|
1579 + bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top); |
|
1580 + bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top); |
|
1581 + bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top); |
|
1582 + bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top); |
|
1583 + bn_mul_mont_gather5_t4(tmp.d,tmp.d,powerbuf,np,n0,top,wvalue); |
|
1584 + } |
|
1585 + |
|
1586 + bn_flip_t4(tmp.d,tmp.d,top); |
|
1587 + top *= 2; |
|
1588 + /* back to 32-bit domain */ |
|
1589 + tmp.top=top; |
|
1590 + bn_correct_top(&tmp); |
|
1591 + OPENSSL_cleanse(np,top*sizeof(BN_ULONG)); |
|
1592 + } else |
|
1593 +#endif |
|
1594 #if defined(OPENSSL_BN_ASM_MONT5) |
|
1595 if (window == 5 && top > 1) { |
|
1596 /* |
|
1597 @@ -844,6 +1017,15 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, |
|
1598 } |
|
1599 |
|
1600 /* Convert the final result from montgomery to standard format */ |
|
1601 +#if defined(OPENSSL_BN_ASM_MONT) && (defined(__sparc__) || defined(__sparc)) |
|
1602 + if (OPENSSL_sparcv9cap_P[0] & (SPARCV9_VIS3|SPARCV9_PREFER_FPU)) { |
|
1603 + am.d[0] = 1; /* borrow am */ |
|
1604 + for (i = 1; i < top; i++) |
|
1605 + am.d[i] = 0; |
|
1606 + if (!BN_mod_mul_montgomery(rr,&tmp,&am,mont,ctx)) |
|
1607 + goto err; |
|
1608 + } else |
|
1609 +#endif |
|
1610 if (!BN_from_montgomery(rr, &tmp, mont, ctx)) |
|
1611 goto err; |
|
1612 ret = 1; |
|
1613 Index: openssl/apps/speed.c |
|
1614 =================================================================== |
|
1615 diff -ru openssl-1.0.1e/apps/spped.c openssl-1.0.1e/apps/speed.c |
|
1616 --- openssl-1.0.1e/apps/speed.c 2011-05-24 17:02:24.000000000 -0700 |
|
1617 +++ openssl-1.0.1e/apps/spped.c 2011-07-27 10:48:17.817470000 -0700 |
|
1618 @@ -1586,8 +1586,7 @@ |
|
1619 print_message(names[D_MD5], c[D_MD5][j], lengths[j]); |
|
1620 Time_F(START); |
|
1621 for (count = 0, run = 1; COND(c[D_MD5][j]); count++) |
|
1622 - EVP_Digest(&(buf[0]), (unsigned long)lengths[j], &(md5[0]), |
|
1623 - NULL, EVP_get_digestbyname("md5"), NULL); |
|
1624 + MD5(buf, lengths[j], md5); |
|
1625 d = Time_F(STOP); |
|
1626 print_result(D_MD5, j, count, d); |
|
1627 } |
|
1628 @@ -1622,8 +1621,7 @@ |
|
1629 print_message(names[D_SHA1], c[D_SHA1][j], lengths[j]); |
|
1630 Time_F(START); |
|
1631 for (count = 0, run = 1; COND(c[D_SHA1][j]); count++) |
|
1632 - EVP_Digest(buf, (unsigned long)lengths[j], &(sha[0]), NULL, |
|
1633 - EVP_sha1(), NULL); |
|
1634 + SHA1(buf, lengths[j], sha); |
|
1635 d = Time_F(STOP); |
|
1636 print_result(D_SHA1, j, count, d); |
|
1637 } |
|
1638 Index: openssl/crypto/aes/Makefile |
|
1639 =================================================================== |
|
1640 --- Makefile Thu May 2 13:42:37 2013 |
|
1641 +++ Makefile.orig Thu May 2 13:41:51 2013 |
|
1642 @@ -69,6 +69,9 @@ |
|
1643 aes-sparcv9.s: asm/aes-sparcv9.pl |
|
1644 $(PERL) asm/aes-sparcv9.pl $(CFLAGS) > $@ |
|
1645 |
|
1646 +aest4-sparcv9.s: asm/aest4-sparcv9.pl |
|
1647 + $(PERL) asm/aest4-sparcv9.pl $(CFLAGS) > $@ |
|
1648 + |
|
1649 aes-ppc.s: asm/aes-ppc.pl |
|
1650 $(PERL) asm/aes-ppc.pl $(PERLASM_SCHEME) $@ |
|
1651 |
|
1652 Index: openssl/crypto/evp/e_aes.c |
|
1653 =================================================================== |
|
1654 --- e_aes.c Mon Feb 11 07:26:04 2013 |
|
1655 +++ e_aes.c.56 Thu May 2 14:26:35 2013 |
|
1656 @@ -56,12 +58,11 @@ |
|
1657 # include <assert.h> |
|
1658 # include <openssl/aes.h> |
|
1659 # include "evp_locl.h" |
|
1660 -# ifndef OPENSSL_FIPS |
|
1661 # include "modes_lcl.h" |
|
1662 # include <openssl/rand.h> |
|
1663 |
|
1664 typedef struct { |
|
1665 - AES_KEY ks; |
|
1666 + union { double align; AES_KEY ks; } ks; |
|
1667 block128_f block; |
|
1668 union { |
|
1669 cbc128_f cbc; |
|
1670 @@ -70,7 +69,7 @@ |
|
1671 } EVP_AES_KEY; |
|
1672 |
|
1673 typedef struct { |
|
1674 - AES_KEY ks; /* AES key schedule to use */ |
|
1675 + union { double align; AES_KEY ks; } ks; /* AES key schedule to use */ |
|
1676 int key_set; /* Set if key initialised */ |
|
1677 int iv_set; /* Set if an iv is set */ |
|
1678 GCM128_CONTEXT gcm; |
|
1679 @@ -83,7 +82,7 @@ |
|
1680 } EVP_AES_GCM_CTX; |
|
1681 |
|
1682 typedef struct { |
|
1683 - AES_KEY ks1, ks2; /* AES key schedules to use */ |
|
1684 + union { double align; AES_KEY ks; } ks1, ks2; /* AES key schedules to use */ |
|
1685 XTS128_CONTEXT xts; |
|
1686 void (*stream) (const unsigned char *in, |
|
1687 unsigned char *out, size_t length, |
|
1688 @@ -92,7 +91,7 @@ |
|
1689 } EVP_AES_XTS_CTX; |
|
1690 |
|
1691 typedef struct { |
|
1692 - AES_KEY ks; /* AES key schedule to use */ |
|
1693 + union { double align; AES_KEY ks; } ks; /* AES key schedule to use */ |
|
1694 int key_set; /* Set if key initialised */ |
|
1695 int iv_set; /* Set if an iv is set */ |
|
1696 int tag_set; /* Set if tag is valid */ |
|
1697 @@ -155,7 +154,7 @@ |
|
1698 defined(_M_AMD64) || defined(_M_X64) || \ |
|
1699 defined(__INTEL__) ) |
|
1700 |
|
1701 -extern unsigned int OPENSSL_ia32cap_P[2]; |
|
1702 +extern unsigned int OPENSSL_ia32cap_P[]; |
|
1703 |
|
1704 # ifdef VPAES_ASM |
|
1705 # define VPAES_CAPABLE (OPENSSL_ia32cap_P[1]&(1<<(41-32))) |
|
1706 @@ -297,7 +296,7 @@ |
|
1707 if (!iv && !key) |
|
1708 return 1; |
|
1709 if (key) { |
|
1710 - aesni_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks); |
|
1711 + aesni_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks); |
|
1712 CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, (block128_f) aesni_encrypt); |
|
1713 gctx->ctr = (ctr128_f) aesni_ctr32_encrypt_blocks; |
|
1714 /* |
|
1715 @@ -336,17 +335,17 @@ |
|
1716 if (key) { |
|
1717 /* key_len is two AES keys */ |
|
1718 if (enc) { |
|
1719 - aesni_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1); |
|
1720 + aesni_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); |
|
1721 xctx->xts.block1 = (block128_f) aesni_encrypt; |
|
1722 xctx->stream = aesni_xts_encrypt; |
|
1723 } else { |
|
1724 - aesni_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1); |
|
1725 + aesni_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); |
|
1726 xctx->xts.block1 = (block128_f) aesni_decrypt; |
|
1727 xctx->stream = aesni_xts_decrypt; |
|
1728 } |
|
1729 |
|
1730 aesni_set_encrypt_key(key + ctx->key_len / 2, |
|
1731 - ctx->key_len * 4, &xctx->ks2); |
|
1732 + ctx->key_len * 4, &xctx->ks2.ks); |
|
1733 xctx->xts.block2 = (block128_f) aesni_encrypt; |
|
1734 |
|
1735 xctx->xts.key1 = &xctx->ks1; |
|
1736 @@ -371,7 +370,7 @@ |
|
1737 if (!iv && !key) |
|
1738 return 1; |
|
1739 if (key) { |
|
1740 - aesni_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks); |
|
1741 + aesni_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks); |
|
1742 CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, |
|
1743 &cctx->ks, (block128_f) aesni_encrypt); |
|
1744 cctx->str = enc ? (ccm128_f) aesni_ccm64_encrypt_blocks : |
|
1745 @@ -432,6 +431,364 @@ |
|
1746 const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \ |
|
1747 { return AESNI_CAPABLE?&aesni_##keylen##_##mode:&aes_##keylen##_##mode; } |
|
1748 |
|
1749 +#elif defined(AES_ASM) && (defined(__sparc) || defined(__sparc__)) |
|
1750 + |
|
1751 +#include "sparc_arch.h" |
|
1752 + |
|
1753 +extern unsigned int OPENSSL_sparcv9cap_P[]; |
|
1754 + |
|
1755 +#define SPARC_AES_CAPABLE (OPENSSL_sparcv9cap_P[1] & CFR_AES) |
|
1756 + |
|
1757 +void aes_t4_set_encrypt_key (const unsigned char *key, int bits, |
|
1758 + AES_KEY *ks); |
|
1759 +void aes_t4_set_decrypt_key (const unsigned char *key, int bits, |
|
1760 + AES_KEY *ks); |
|
1761 +void aes_t4_encrypt (const unsigned char *in, unsigned char *out, |
|
1762 + const AES_KEY *key); |
|
1763 +void aes_t4_decrypt (const unsigned char *in, unsigned char *out, |
|
1764 + const AES_KEY *key); |
|
1765 +/* |
|
1766 + * Key-length specific subroutines were chosen for following reason. |
|
1767 + * Each SPARC T4 core can execute up to 8 threads which share core's |
|
1768 + * resources. Loading as much key material to registers allows to |
|
1769 + * minimize references to shared memory interface, as well as amount |
|
1770 + * of instructions in inner loops [much needed on T4]. But then having |
|
1771 + * non-key-length specific routines would require conditional branches |
|
1772 + * either in inner loops or on subroutines' entries. Former is hardly |
|
1773 + * acceptable, while latter means code size increase to size occupied |
|
1774 + * by multiple key-length specfic subroutines, so why fight? |
|
1775 + */ |
|
1776 +void aes128_t4_cbc_encrypt (const unsigned char *in, unsigned char *out, |
|
1777 + size_t len, const AES_KEY *key, |
|
1778 + unsigned char *ivec); |
|
1779 +void aes128_t4_cbc_decrypt (const unsigned char *in, unsigned char *out, |
|
1780 + size_t len, const AES_KEY *key, |
|
1781 + unsigned char *ivec); |
|
1782 +void aes192_t4_cbc_encrypt (const unsigned char *in, unsigned char *out, |
|
1783 + size_t len, const AES_KEY *key, |
|
1784 + unsigned char *ivec); |
|
1785 +void aes192_t4_cbc_decrypt (const unsigned char *in, unsigned char *out, |
|
1786 + size_t len, const AES_KEY *key, |
|
1787 + unsigned char *ivec); |
|
1788 +void aes256_t4_cbc_encrypt (const unsigned char *in, unsigned char *out, |
|
1789 + size_t len, const AES_KEY *key, |
|
1790 + unsigned char *ivec); |
|
1791 +void aes256_t4_cbc_decrypt (const unsigned char *in, unsigned char *out, |
|
1792 + size_t len, const AES_KEY *key, |
|
1793 + unsigned char *ivec); |
|
1794 +void aes128_t4_ctr32_encrypt (const unsigned char *in, unsigned char *out, |
|
1795 + size_t blocks, const AES_KEY *key, |
|
1796 + unsigned char *ivec); |
|
1797 +void aes192_t4_ctr32_encrypt (const unsigned char *in, unsigned char *out, |
|
1798 + size_t blocks, const AES_KEY *key, |
|
1799 + unsigned char *ivec); |
|
1800 +void aes256_t4_ctr32_encrypt (const unsigned char *in, unsigned char *out, |
|
1801 + size_t blocks, const AES_KEY *key, |
|
1802 + unsigned char *ivec); |
|
1803 + |
|
1804 +static int aes_t4_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
|
1805 + const unsigned char *iv, int enc) |
|
1806 +{ |
|
1807 + int ret, mode, bits; |
|
1808 + EVP_AES_KEY *dat = (EVP_AES_KEY *)ctx->cipher_data; |
|
1809 + |
|
1810 + mode = ctx->cipher->flags & EVP_CIPH_MODE; |
|
1811 + bits = ctx->key_len*8; |
|
1812 + if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE) && !enc) { |
|
1813 + ret = 0; |
|
1814 + aes_t4_set_decrypt_key(key, bits, ctx->cipher_data); |
|
1815 + dat->block = (block128_f)aes_t4_decrypt; |
|
1816 + switch (bits) { |
|
1817 + case 128: |
|
1818 + dat->stream.cbc = mode==EVP_CIPH_CBC_MODE ? |
|
1819 + (cbc128_f)aes128_t4_cbc_decrypt : |
|
1820 + NULL; |
|
1821 + break; |
|
1822 + case 192: |
|
1823 + dat->stream.cbc = mode==EVP_CIPH_CBC_MODE ? |
|
1824 + (cbc128_f)aes192_t4_cbc_decrypt : |
|
1825 + NULL; |
|
1826 + break; |
|
1827 + case 256: |
|
1828 + dat->stream.cbc = mode==EVP_CIPH_CBC_MODE ? |
|
1829 + (cbc128_f)aes256_t4_cbc_decrypt : |
|
1830 + NULL; |
|
1831 + break; |
|
1832 + default: |
|
1833 + ret = -1; |
|
1834 + } |
|
1835 + } else { |
|
1836 + ret = 0; |
|
1837 + aes_t4_set_encrypt_key(key, bits, ctx->cipher_data); |
|
1838 + dat->block = (block128_f)aes_t4_encrypt; |
|
1839 + switch (bits) { |
|
1840 + case 128: |
|
1841 + if (mode==EVP_CIPH_CBC_MODE) |
|
1842 + dat->stream.cbc = (cbc128_f)aes128_t4_cbc_encrypt; |
|
1843 + else if (mode==EVP_CIPH_CTR_MODE) |
|
1844 + dat->stream.ctr = (ctr128_f)aes128_t4_ctr32_encrypt; |
|
1845 + else |
|
1846 + dat->stream.cbc = NULL; |
|
1847 + break; |
|
1848 + case 192: |
|
1849 + if (mode==EVP_CIPH_CBC_MODE) |
|
1850 + dat->stream.cbc = (cbc128_f)aes192_t4_cbc_encrypt; |
|
1851 + else if (mode==EVP_CIPH_CTR_MODE) |
|
1852 + dat->stream.ctr = (ctr128_f)aes192_t4_ctr32_encrypt; |
|
1853 + else |
|
1854 + dat->stream.cbc = NULL; |
|
1855 + break; |
|
1856 + case 256: |
|
1857 + if (mode==EVP_CIPH_CBC_MODE) |
|
1858 + dat->stream.cbc = (cbc128_f)aes256_t4_cbc_encrypt; |
|
1859 + else if (mode==EVP_CIPH_CTR_MODE) |
|
1860 + dat->stream.ctr = (ctr128_f)aes256_t4_ctr32_encrypt; |
|
1861 + else |
|
1862 + dat->stream.cbc = NULL; |
|
1863 + break; |
|
1864 + default: |
|
1865 + ret = -1; |
|
1866 + } |
|
1867 + } |
|
1868 + |
|
1869 + if (ret < 0) { |
|
1870 + EVPerr(EVP_F_AES_T4_INIT_KEY,EVP_R_AES_KEY_SETUP_FAILED); |
|
1871 + return 0; |
|
1872 + } |
|
1873 + |
|
1874 + return 1; |
|
1875 +} |
|
1876 + |
|
1877 +#define aes_t4_cbc_cipher aes_cbc_cipher |
|
1878 +static int aes_t4_cbc_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out, |
|
1879 + const unsigned char *in, size_t len); |
|
1880 + |
|
1881 +#define aes_t4_ecb_cipher aes_ecb_cipher |
|
1882 +static int aes_t4_ecb_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out, |
|
1883 + const unsigned char *in, size_t len); |
|
1884 + |
|
1885 +#define aes_t4_ofb_cipher aes_ofb_cipher |
|
1886 +static int aes_t4_ofb_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out, |
|
1887 + const unsigned char *in,size_t len); |
|
1888 + |
|
1889 +#define aes_t4_cfb_cipher aes_cfb_cipher |
|
1890 +static int aes_t4_cfb_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out, |
|
1891 + const unsigned char *in,size_t len); |
|
1892 + |
|
1893 +#define aes_t4_cfb8_cipher aes_cfb8_cipher |
|
1894 +static int aes_t4_cfb8_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out, |
|
1895 + const unsigned char *in,size_t len); |
|
1896 + |
|
1897 +#define aes_t4_cfb1_cipher aes_cfb1_cipher |
|
1898 +static int aes_t4_cfb1_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out, |
|
1899 + const unsigned char *in,size_t len); |
|
1900 + |
|
1901 +#define aes_t4_ctr_cipher aes_ctr_cipher |
|
1902 +static int aes_t4_ctr_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, |
|
1903 + const unsigned char *in, size_t len); |
|
1904 + |
|
1905 +static int aes_t4_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
|
1906 + const unsigned char *iv, int enc) |
|
1907 +{ |
|
1908 + EVP_AES_GCM_CTX *gctx = ctx->cipher_data; |
|
1909 + if (!iv && !key) |
|
1910 + return 1; |
|
1911 + if (key) { |
|
1912 + int bits = ctx->key_len * 8; |
|
1913 + aes_t4_set_encrypt_key(key, bits, &gctx->ks.ks); |
|
1914 + CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, |
|
1915 + (block128_f)aes_t4_encrypt); |
|
1916 + switch (bits) { |
|
1917 + case 128: |
|
1918 + gctx->ctr = (ctr128_f)aes128_t4_ctr32_encrypt; |
|
1919 + break; |
|
1920 + case 192: |
|
1921 + gctx->ctr = (ctr128_f)aes192_t4_ctr32_encrypt; |
|
1922 + break; |
|
1923 + case 256: |
|
1924 + gctx->ctr = (ctr128_f)aes256_t4_ctr32_encrypt; |
|
1925 + break; |
|
1926 + default: |
|
1927 + return 0; |
|
1928 + } |
|
1929 + /* If we have an iv can set it directly, otherwise use |
|
1930 + * saved IV. |
|
1931 + */ |
|
1932 + if (iv == NULL && gctx->iv_set) |
|
1933 + iv = gctx->iv; |
|
1934 + if (iv) { |
|
1935 + CRYPTO_gcm128_setiv(&gctx->gcm, iv, gctx->ivlen); |
|
1936 + gctx->iv_set = 1; |
|
1937 + } |
|
1938 + gctx->key_set = 1; |
|
1939 + } else { |
|
1940 + /* If key set use IV, otherwise copy */ |
|
1941 + if (gctx->key_set) |
|
1942 + CRYPTO_gcm128_setiv(&gctx->gcm, iv, gctx->ivlen); |
|
1943 + else |
|
1944 + memcpy(gctx->iv, iv, gctx->ivlen); |
|
1945 + gctx->iv_set = 1; |
|
1946 + gctx->iv_gen = 0; |
|
1947 + } |
|
1948 + return 1; |
|
1949 +} |
|
1950 + |
|
1951 +#define aes_t4_gcm_cipher aes_gcm_cipher |
|
1952 +static int aes_t4_gcm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, |
|
1953 + const unsigned char *in, size_t len); |
|
1954 + |
|
1955 +static int aes_t4_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
|
1956 + const unsigned char *iv, int enc) |
|
1957 +{ |
|
1958 + EVP_AES_XTS_CTX *xctx = ctx->cipher_data; |
|
1959 + if (!iv && !key) |
|
1960 + return 1; |
|
1961 + |
|
1962 + if (key) { |
|
1963 + int bits = ctx->key_len * 4; |
|
1964 + /* key_len is two AES keys */ |
|
1965 + if (enc) { |
|
1966 + aes_t4_set_encrypt_key(key, bits, &xctx->ks1.ks); |
|
1967 + xctx->xts.block1 = (block128_f)aes_t4_encrypt; |
|
1968 +#if 0 /* not yet */ |
|
1969 + switch (bits) { |
|
1970 + case 128: |
|
1971 + xctx->stream = aes128_t4_xts_encrypt; |
|
1972 + break; |
|
1973 + case 192: |
|
1974 + xctx->stream = aes192_t4_xts_encrypt; |
|
1975 + break; |
|
1976 + case 256: |
|
1977 + xctx->stream = aes256_t4_xts_encrypt; |
|
1978 + break; |
|
1979 + default: |
|
1980 + return 0; |
|
1981 + } |
|
1982 +#endif |
|
1983 + } else { |
|
1984 + aes_t4_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); |
|
1985 + xctx->xts.block1 = (block128_f)aes_t4_decrypt; |
|
1986 +#if 0 /* not yet */ |
|
1987 + switch (bits) { |
|
1988 + case 128: |
|
1989 + xctx->stream = aes128_t4_xts_decrypt; |
|
1990 + break; |
|
1991 + case 192: |
|
1992 + xctx->stream = aes192_t4_xts_decrypt; |
|
1993 + break; |
|
1994 + case 256: |
|
1995 + xctx->stream = aes256_t4_xts_decrypt; |
|
1996 + break; |
|
1997 + default: |
|
1998 + return 0; |
|
1999 + } |
|
2000 +#endif |
|
2001 + } |
|
2002 + |
|
2003 + aes_t4_set_encrypt_key(key + ctx->key_len/2, |
|
2004 + ctx->key_len * 4, &xctx->ks2.ks); |
|
2005 + xctx->xts.block2 = (block128_f)aes_t4_encrypt; |
|
2006 + |
|
2007 + xctx->xts.key1 = &xctx->ks1; |
|
2008 + } |
|
2009 + |
|
2010 + if (iv) { |
|
2011 + xctx->xts.key2 = &xctx->ks2; |
|
2012 + memcpy(ctx->iv, iv, 16); |
|
2013 + } |
|
2014 + |
|
2015 + return 1; |
|
2016 +} |
|
2017 + |
|
2018 +#define aes_t4_xts_cipher aes_xts_cipher |
|
2019 +static int aes_t4_xts_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, |
|
2020 + const unsigned char *in, size_t len); |
|
2021 + |
|
2022 +static int aes_t4_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
|
2023 + const unsigned char *iv, int enc) |
|
2024 +{ |
|
2025 + EVP_AES_CCM_CTX *cctx = ctx->cipher_data; |
|
2026 + if (!iv && !key) |
|
2027 + return 1; |
|
2028 + if (key) { |
|
2029 + int bits = ctx->key_len * 8; |
|
2030 + aes_t4_set_encrypt_key(key, bits, &cctx->ks.ks); |
|
2031 + CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, |
|
2032 + &cctx->ks, (block128_f)aes_t4_encrypt); |
|
2033 +#if 0 /* not yet */ |
|
2034 + switch (bits) { |
|
2035 + case 128: |
|
2036 + cctx->str = enc?(ccm128_f)aes128_t4_ccm64_encrypt : |
|
2037 + (ccm128_f)ae128_t4_ccm64_decrypt; |
|
2038 + break; |
|
2039 + case 192: |
|
2040 + cctx->str = enc?(ccm128_f)aes192_t4_ccm64_encrypt : |
|
2041 + (ccm128_f)ae192_t4_ccm64_decrypt; |
|
2042 + break; |
|
2043 + case 256: |
|
2044 + cctx->str = enc?(ccm128_f)aes256_t4_ccm64_encrypt : |
|
2045 + (ccm128_f)ae256_t4_ccm64_decrypt; |
|
2046 + break; |
|
2047 + default: |
|
2048 + return 0; |
|
2049 + } |
|
2050 +#endif |
|
2051 + cctx->key_set = 1; |
|
2052 + } |
|
2053 + if (iv) { |
|
2054 + memcpy(ctx->iv, iv, 15 - cctx->L); |
|
2055 + cctx->iv_set = 1; |
|
2056 + } |
|
2057 + return 1; |
|
2058 +} |
|
2059 + |
|
2060 +#define aes_t4_ccm_cipher aes_ccm_cipher |
|
2061 +static int aes_t4_ccm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, |
|
2062 + const unsigned char *in, size_t len); |
|
2063 + |
|
2064 +#define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \ |
|
2065 +static const EVP_CIPHER aes_t4_##keylen##_##mode = { \ |
|
2066 + nid##_##keylen##_##nmode,blocksize,keylen/8,ivlen, \ |
|
2067 + flags|EVP_CIPH_##MODE##_MODE, \ |
|
2068 + aes_t4_init_key, \ |
|
2069 + aes_t4_##mode##_cipher, \ |
|
2070 + NULL, \ |
|
2071 + sizeof(EVP_AES_KEY), \ |
|
2072 + NULL,NULL,NULL,NULL }; \ |
|
2073 +static const EVP_CIPHER aes_##keylen##_##mode = { \ |
|
2074 + nid##_##keylen##_##nmode,blocksize, \ |
|
2075 + keylen/8,ivlen, \ |
|
2076 + flags|EVP_CIPH_##MODE##_MODE, \ |
|
2077 + aes_init_key, \ |
|
2078 + aes_##mode##_cipher, \ |
|
2079 + NULL, \ |
|
2080 + sizeof(EVP_AES_KEY), \ |
|
2081 + NULL,NULL,NULL,NULL }; \ |
|
2082 +const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \ |
|
2083 +{ return SPARC_AES_CAPABLE?&aes_t4_##keylen##_##mode:&aes_##keylen##_##mode; } |
|
2084 + |
|
2085 +#define BLOCK_CIPHER_custom(nid,keylen,blocksize,ivlen,mode,MODE,flags) \ |
|
2086 +static const EVP_CIPHER aes_t4_##keylen##_##mode = { \ |
|
2087 + nid##_##keylen##_##mode,blocksize, \ |
|
2088 + (EVP_CIPH_##MODE##_MODE==EVP_CIPH_XTS_MODE?2:1)*keylen/8, ivlen, \ |
|
2089 + flags|EVP_CIPH_##MODE##_MODE, \ |
|
2090 + aes_t4_##mode##_init_key, \ |
|
2091 + aes_t4_##mode##_cipher, \ |
|
2092 + aes_##mode##_cleanup, \ |
|
2093 + sizeof(EVP_AES_##MODE##_CTX), \ |
|
2094 + NULL,NULL,aes_##mode##_ctrl,NULL }; \ |
|
2095 +static const EVP_CIPHER aes_##keylen##_##mode = { \ |
|
2096 + nid##_##keylen##_##mode,blocksize, \ |
|
2097 + (EVP_CIPH_##MODE##_MODE==EVP_CIPH_XTS_MODE?2:1)*keylen/8, ivlen, \ |
|
2098 + flags|EVP_CIPH_##MODE##_MODE, \ |
|
2099 + aes_##mode##_init_key, \ |
|
2100 + aes_##mode##_cipher, \ |
|
2101 + aes_##mode##_cleanup, \ |
|
2102 + sizeof(EVP_AES_##MODE##_CTX), \ |
|
2103 + NULL,NULL,aes_##mode##_ctrl,NULL }; \ |
|
2104 +const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \ |
|
2105 +{ return SPARC_AES_CAPABLE?&aes_t4_##keylen##_##mode:&aes_##keylen##_##mode; } |
|
2106 + |
|
2107 # else |
|
2108 |
|
2109 # define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \ |
|
2110 @@ -480,7 +837,7 @@ |
|
2111 && !enc) |
|
2112 # ifdef BSAES_CAPABLE |
|
2113 if (BSAES_CAPABLE && mode == EVP_CIPH_CBC_MODE) { |
|
2114 - ret = AES_set_decrypt_key(key, ctx->key_len * 8, &dat->ks); |
|
2115 + ret = AES_set_decrypt_key(key, ctx->key_len * 8, &dat->ks.ks); |
|
2116 dat->block = (block128_f) AES_decrypt; |
|
2117 dat->stream.cbc = (cbc128_f) bsaes_cbc_encrypt; |
|
2118 } else |
|
2119 @@ -487,7 +844,7 @@ |
|
2120 # endif |
|
2121 # ifdef VPAES_CAPABLE |
|
2122 if (VPAES_CAPABLE) { |
|
2123 - ret = vpaes_set_decrypt_key(key, ctx->key_len * 8, &dat->ks); |
|
2124 + ret = vpaes_set_decrypt_key(key, ctx->key_len * 8, &dat->ks.ks); |
|
2125 dat->block = (block128_f) vpaes_decrypt; |
|
2126 dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ? |
|
2127 (cbc128_f) vpaes_cbc_encrypt : NULL; |
|
2128 @@ -494,7 +851,7 @@ |
|
2129 } else |
|
2130 # endif |
|
2131 { |
|
2132 - ret = AES_set_decrypt_key(key, ctx->key_len * 8, &dat->ks); |
|
2133 + ret = AES_set_decrypt_key(key, ctx->key_len * 8, &dat->ks.ks); |
|
2134 dat->block = (block128_f) AES_decrypt; |
|
2135 dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ? |
|
2136 (cbc128_f) AES_cbc_encrypt : NULL; |
|
2137 @@ -508,7 +865,7 @@ |
|
2138 # endif |
|
2139 # ifdef VPAES_CAPABLE |
|
2140 if (VPAES_CAPABLE) { |
|
2141 - ret = vpaes_set_encrypt_key(key, ctx->key_len * 8, &dat->ks); |
|
2142 + ret = vpaes_set_encrypt_key(key, ctx->key_len * 8, &dat->ks.ks); |
|
2143 dat->block = (block128_f) vpaes_encrypt; |
|
2144 dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ? |
|
2145 (cbc128_f) vpaes_cbc_encrypt : NULL; |
|
2146 @@ -515,7 +872,7 @@ |
|
2147 } else |
|
2148 # endif |
|
2149 { |
|
2150 - ret = AES_set_encrypt_key(key, ctx->key_len * 8, &dat->ks); |
|
2151 + ret = AES_set_encrypt_key(key, ctx->key_len*8, &dat->ks.ks); |
|
2152 dat->block = (block128_f) AES_encrypt; |
|
2153 dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ? |
|
2154 (cbc128_f) AES_cbc_encrypt : NULL; |
|
2155 @@ -810,7 +1167,7 @@ |
|
2156 do { |
|
2157 # ifdef BSAES_CAPABLE |
|
2158 if (BSAES_CAPABLE) { |
|
2159 - AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks); |
|
2160 + AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks); |
|
2161 CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, |
|
2162 (block128_f) AES_encrypt); |
|
2163 gctx->ctr = (ctr128_f) bsaes_ctr32_encrypt_blocks; |
|
2164 @@ -819,7 +1176,7 @@ |
|
2165 # endif |
|
2166 # ifdef VPAES_CAPABLE |
|
2167 if (VPAES_CAPABLE) { |
|
2168 - vpaes_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks); |
|
2169 + vpaes_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks); |
|
2170 CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, |
|
2171 (block128_f) vpaes_encrypt); |
|
2172 gctx->ctr = NULL; |
|
2173 @@ -828,7 +1185,7 @@ |
|
2174 # endif |
|
2175 (void)0; /* terminate potentially open 'else' */ |
|
2176 |
|
2177 - AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks); |
|
2178 + AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks); |
|
2179 CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, |
|
2180 (block128_f) AES_encrypt); |
|
2181 # ifdef AES_CTR_ASM |
|
2182 @@ -1049,15 +1406,15 @@ |
|
2183 # ifdef VPAES_CAPABLE |
|
2184 if (VPAES_CAPABLE) { |
|
2185 if (enc) { |
|
2186 - vpaes_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1); |
|
2187 + vpaes_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); |
|
2188 xctx->xts.block1 = (block128_f) vpaes_encrypt; |
|
2189 } else { |
|
2190 - vpaes_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1); |
|
2191 + vpaes_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); |
|
2192 xctx->xts.block1 = (block128_f) vpaes_decrypt; |
|
2193 } |
|
2194 |
|
2195 vpaes_set_encrypt_key(key + ctx->key_len / 2, |
|
2196 - ctx->key_len * 4, &xctx->ks2); |
|
2197 + ctx->key_len * 4, &xctx->ks2.ks); |
|
2198 xctx->xts.block2 = (block128_f) vpaes_encrypt; |
|
2199 |
|
2200 xctx->xts.key1 = &xctx->ks1; |
|
2201 @@ -1067,15 +1424,15 @@ |
|
2202 (void)0; /* terminate potentially open 'else' */ |
|
2203 |
|
2204 if (enc) { |
|
2205 - AES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1); |
|
2206 + AES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); |
|
2207 xctx->xts.block1 = (block128_f) AES_encrypt; |
|
2208 } else { |
|
2209 - AES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1); |
|
2210 + AES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); |
|
2211 xctx->xts.block1 = (block128_f) AES_decrypt; |
|
2212 } |
|
2213 |
|
2214 AES_set_encrypt_key(key + ctx->key_len / 2, |
|
2215 - ctx->key_len * 4, &xctx->ks2); |
|
2216 + ctx->key_len * 4, &xctx->ks2.ks); |
|
2217 xctx->xts.block2 = (block128_f) AES_encrypt; |
|
2218 |
|
2219 xctx->xts.key1 = &xctx->ks1; |
|
2220 @@ -1196,7 +1553,7 @@ |
|
2221 do { |
|
2222 # ifdef VPAES_CAPABLE |
|
2223 if (VPAES_CAPABLE) { |
|
2224 - vpaes_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks); |
|
2225 + vpaes_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks); |
|
2226 CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, |
|
2227 &cctx->ks, (block128_f) vpaes_encrypt); |
|
2228 cctx->str = NULL; |
|
2229 @@ -1204,7 +1561,7 @@ |
|
2230 break; |
|
2231 } |
|
2232 # endif |
|
2233 - AES_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks); |
|
2234 + AES_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks); |
|
2235 CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, |
|
2236 &cctx->ks, (block128_f) AES_encrypt); |
|
2237 cctx->str = NULL; |
|
2238 @@ -1285,5 +1642,4 @@ |
|
2239 EVP_CIPH_FLAG_FIPS | CUSTOM_FLAGS) |
|
2240 BLOCK_CIPHER_custom(NID_aes, 256, 1, 12, ccm, CCM, |
|
2241 EVP_CIPH_FLAG_FIPS | CUSTOM_FLAGS) |
|
2242 -# endif |
|
2243 #endif |
|
2244 Index: openssl/crypto/evp/evp.h |
|
2245 =================================================================== |
|
2246 --- evp.h Mon Feb 11 07:26:04 2013 |
|
2247 +++ evp.h.new Thu May 2 14:31:55 2013 |
|
2248 @@ -1325,6 +1325,7 @@ |
|
2249 # define EVP_F_AESNI_INIT_KEY 165 |
|
2250 # define EVP_F_AESNI_XTS_CIPHER 176 |
|
2251 # define EVP_F_AES_INIT_KEY 133 |
|
2252 +# define EVP_F_AES_T4_INIT_KEY 178 |
|
2253 # define EVP_F_AES_XTS 172 |
|
2254 # define EVP_F_AES_XTS_CIPHER 175 |
|
2255 # define EVP_F_ALG_MODULE_INIT 177 |
|
2256 Index: openssl/crypto/evp/evp_err.c |
|
2257 =================================================================== |
|
2258 --- evp_err.c Mon Feb 11 07:26:04 2013 |
|
2259 +++ evp_err.c.new Thu May 2 14:33:24 2013 |
|
2260 @@ -73,6 +73,7 @@ |
|
2261 {ERR_FUNC(EVP_F_AESNI_INIT_KEY), "AESNI_INIT_KEY"}, |
|
2262 {ERR_FUNC(EVP_F_AESNI_XTS_CIPHER), "AESNI_XTS_CIPHER"}, |
|
2263 {ERR_FUNC(EVP_F_AES_INIT_KEY), "AES_INIT_KEY"}, |
|
2264 + {ERR_FUNC(EVP_F_AES_T4_INIT_KEY), "AES_T4_INIT_KEY"}, |
|
2265 {ERR_FUNC(EVP_F_AES_XTS), "AES_XTS"}, |
|
2266 {ERR_FUNC(EVP_F_AES_XTS_CIPHER), "AES_XTS_CIPHER"}, |
|
2267 {ERR_FUNC(EVP_F_ALG_MODULE_INIT), "ALG_MODULE_INIT"}, |