1 # |
|
2 # This file adds inline T4 instruction support to OpenSSL upstream code. |
|
3 # The change was brought in from OpenSSL 1.0.2. |
|
4 # |
|
5 Index: Configure |
|
6 =================================================================== |
|
7 diff -ru openssl-1.0.1e/Configure openssl-1.0.1e/Configure |
|
8 --- openssl-1.0.1e/Configure 2011-05-24 17:02:24.000000000 -0700 |
|
9 +++ openssl-1.0.1e/Configure 2011-07-27 10:48:17.817470000 -0700 |
|
10 @@ -135,7 +135,7 @@ |
|
11 |
|
12 my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghash-x86_64.o:"; |
|
13 my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::ghash-ia64.o::void"; |
|
14 -my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void"; |
|
15 +my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o:des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o:aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void"; |
|
16 my $fips_sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void"; |
|
17 my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::void"; |
|
18 my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-alpha.o::void"; |
|
19 Index: crypto/sparccpuid.S |
|
20 =================================================================== |
|
21 diff -ru openssl-1.0.1e/crypto/sparccpuid.S openssl-1.0.1e/crypto/sparccpuid.S |
|
22 --- openssl-1.0.1e/crypto/sparccpuid.S 2011-05-24 17:02:24.000000000 -0700 |
|
23 +++ openssl-1.0.1e/crypto/sparccpuid.S 2011-07-27 10:48:17.817470000 -0700 |
|
24 @@ -1,3 +1,7 @@ |
|
25 +#ifdef OPENSSL_FIPSCANISTER |
|
26 +#include <openssl/fipssyms.h> |
|
27 +#endif |
|
28 + |
|
29 #if defined(__SUNPRO_C) && defined(__sparcv9) |
|
30 # define ABI64 /* They've said -xarch=v9 at command line */ |
|
31 #elif defined(__GNUC__) && defined(__arch64__) |
|
32 @@ -241,7 +245,12 @@ |
|
33 ! UltraSPARC IIe 7 |
|
34 ! UltraSPARC III 7 |
|
35 ! UltraSPARC T1 24 |
|
36 +! SPARC T4 65(*) |
|
37 ! |
|
38 +! (*) result has lesser to do with VIS instruction latencies, rdtick |
|
39 +! appears that slow, but it does the trick in sense that FP and |
|
40 +! VIS code paths are still slower than integer-only ones. |
|
41 +! |
|
42 ! Numbers for T2 and SPARC64 V-VII are more than welcomed. |
|
43 ! |
|
44 ! It would be possible to detect specifically US-T1 by instrumenting |
|
45 @@ -250,6 +259,8 @@ |
|
46 .global _sparcv9_vis1_instrument |
|
47 .align 8 |
|
48 _sparcv9_vis1_instrument: |
|
49 + .word 0x81b00d80 !fxor %f0,%f0,%f0 |
|
50 + .word 0x85b08d82 !fxor %f2,%f2,%f2 |
|
51 .word 0x91410000 !rd %tick,%o0 |
|
52 .word 0x81b00d80 !fxor %f0,%f0,%f0 |
|
53 .word 0x85b08d82 !fxor %f2,%f2,%f2 |
|
54 @@ -286,6 +297,30 @@ |
|
55 .type _sparcv9_vis1_instrument,#function |
|
56 .size _sparcv9_vis1_instrument,.-_sparcv9_vis1_instrument |
|
57 |
|
58 +.global _sparcv9_rdcfr |
|
59 +.align 8 |
|
60 +_sparcv9_rdcfr: |
|
61 + retl |
|
62 + .word 0x91468000 !rd %asr26,%o0 |
|
63 +.type _sparcv9_rdcfr,#function |
|
64 +.size _sparcv9_rdcfr,.-_sparcv9_rdcfr |
|
65 + |
|
66 +.global _sparcv9_vis3_probe |
|
67 +.align 8 |
|
68 +_sparcv9_vis3_probe: |
|
69 + retl |
|
70 + .word 0x81b022a0 !xmulx %g0,%g0,%g0 |
|
71 +.type _sparcv9_vis3_probe,#function |
|
72 +.size _sparcv9_vis3_probe,.-_sparcv9_vis3_probe |
|
73 + |
|
74 +.global _sparcv9_random |
|
75 +.align 8 |
|
76 +_sparcv9_random: |
|
77 + retl |
|
78 + .word 0x91b002a0 !random %o0 |
|
79 +.type _sparcv9_random,#function |
|
80 +.size _sparcv9_random,.-_sparcv9_vis3_probe |
|
81 + |
|
82 .global OPENSSL_cleanse |
|
83 .align 32 |
|
84 OPENSSL_cleanse: |
|
85 @@ -370,6 +405,102 @@ |
|
86 .size OPENSSL_cleanse,.-OPENSSL_cleanse |
|
87 |
|
88 #ifndef _BOOT |
|
89 +.global _sparcv9_vis1_instrument_bus |
|
90 +.align 8 |
|
91 +_sparcv9_vis1_instrument_bus: |
|
92 + mov %o1,%o3 ! save cnt |
|
93 + .word 0x99410000 !rd %tick,%o4 ! tick |
|
94 + mov %o4,%o5 ! lasttick = tick |
|
95 + set 0,%g4 ! diff |
|
96 + |
|
97 + andn %o0,63,%g1 |
|
98 + .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load |
|
99 + .word 0x8143e040 !membar #Sync |
|
100 + .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit |
|
101 + .word 0x8143e040 !membar #Sync |
|
102 + ld [%o0],%o4 |
|
103 + add %o4,%g4,%g4 |
|
104 + .word 0xc9e2100c !cas [%o0],%o4,%g4 |
|
105 + |
|
106 +.Loop: .word 0x99410000 !rd %tick,%o4 |
|
107 + sub %o4,%o5,%g4 ! diff=tick-lasttick |
|
108 + mov %o4,%o5 ! lasttick=tick |
|
109 + |
|
110 + andn %o0,63,%g1 |
|
111 + .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load |
|
112 + .word 0x8143e040 !membar #Sync |
|
113 + .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit |
|
114 + .word 0x8143e040 !membar #Sync |
|
115 + ld [%o0],%o4 |
|
116 + add %o4,%g4,%g4 |
|
117 + .word 0xc9e2100c !cas [%o0],%o4,%g4 |
|
118 + subcc %o1,1,%o1 ! --$cnt |
|
119 + bnz .Loop |
|
120 + add %o0,4,%o0 ! ++$out |
|
121 + |
|
122 + retl |
|
123 + mov %o3,%o0 |
|
124 +.type _sparcv9_vis1_instrument_bus,#function |
|
125 +.size _sparcv9_vis1_instrument_bus,.-_sparcv9_vis1_instrument_bus |
|
126 + |
|
127 +.global _sparcv9_vis1_instrument_bus2 |
|
128 +.align 8 |
|
129 +_sparcv9_vis1_instrument_bus2: |
|
130 + mov %o1,%o3 ! save cnt |
|
131 + sll %o1,2,%o1 ! cnt*=4 |
|
132 + |
|
133 + .word 0x99410000 !rd %tick,%o4 ! tick |
|
134 + mov %o4,%o5 ! lasttick = tick |
|
135 + set 0,%g4 ! diff |
|
136 + |
|
137 + andn %o0,63,%g1 |
|
138 + .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load |
|
139 + .word 0x8143e040 !membar #Sync |
|
140 + .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit |
|
141 + .word 0x8143e040 !membar #Sync |
|
142 + ld [%o0],%o4 |
|
143 + add %o4,%g4,%g4 |
|
144 + .word 0xc9e2100c !cas [%o0],%o4,%g4 |
|
145 + |
|
146 + .word 0x99410000 !rd %tick,%o4 ! tick |
|
147 + sub %o4,%o5,%g4 ! diff=tick-lasttick |
|
148 + mov %o4,%o5 ! lasttick=tick |
|
149 + mov %g4,%g5 ! lastdiff=diff |
|
150 +.Loop2: |
|
151 + andn %o0,63,%g1 |
|
152 + .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load |
|
153 + .word 0x8143e040 !membar #Sync |
|
154 + .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit |
|
155 + .word 0x8143e040 !membar #Sync |
|
156 + ld [%o0],%o4 |
|
157 + add %o4,%g4,%g4 |
|
158 + .word 0xc9e2100c !cas [%o0],%o4,%g4 |
|
159 + |
|
160 + subcc %o2,1,%o2 ! --max |
|
161 + bz .Ldone2 |
|
162 + nop |
|
163 + |
|
164 + .word 0x99410000 !rd %tick,%o4 ! tick |
|
165 + sub %o4,%o5,%g4 ! diff=tick-lasttick |
|
166 + mov %o4,%o5 ! lasttick=tick |
|
167 + cmp %g4,%g5 |
|
168 + mov %g4,%g5 ! lastdiff=diff |
|
169 + |
|
170 + .word 0x83408000 !rd %ccr,%g1 |
|
171 + and %g1,4,%g1 ! isolate zero flag |
|
172 + xor %g1,4,%g1 ! flip zero flag |
|
173 + |
|
174 + subcc %o1,%g1,%o1 ! conditional --$cnt |
|
175 + bnz .Loop2 |
|
176 + add %o0,%g1,%o0 ! conditional ++$out |
|
177 + |
|
178 +.Ldone2: |
|
179 + srl %o1,2,%o1 |
|
180 + retl |
|
181 + sub %o3,%o1,%o0 |
|
182 +.type _sparcv9_vis1_instrument_bus2,#function |
|
183 +.size _sparcv9_vis1_instrument_bus2,.-_sparcv9_vis1_instrument_bus2 |
|
184 + |
|
185 .section ".init",#alloc,#execinstr |
|
186 call solaris_locking_setup |
|
187 nop |
|
188 Index: crypto/sparcv9cap.c |
|
189 =================================================================== |
|
190 diff -ru openssl-1.0.1e/crypto/sparcv9cap.c openssl-1.0.1e/crypto/sparcv9cap.c |
|
191 --- openssl-1.0.1e/crypto/sparcv9cap.c 2011-05-24 17:02:24.000000000 -0700 |
|
192 +++ openssl-1.0.1e/crypto/sparcv9cap.c 2011-07-27 10:48:17.817470000 -0700 |
|
193 @@ -3,36 +3,59 @@ |
|
194 #include <string.h> |
|
195 #include <setjmp.h> |
|
196 #include <sys/time.h> |
|
197 +#include <unistd.h> |
|
198 #include <openssl/bn.h> |
|
199 #include <sys/auxv.h> |
|
200 |
|
201 -#define SPARCV9_TICK_PRIVILEGED (1<<0) |
|
202 -#define SPARCV9_PREFER_FPU (1<<1) |
|
203 -#define SPARCV9_VIS1 (1<<2) |
|
204 -#define SPARCV9_VIS2 (1<<3) /* reserved */ |
|
205 -#define SPARCV9_FMADD (1<<4) /* reserved for SPARC64 V */ |
|
206 -#define SPARCV9_BLK (1<<5) |
|
207 +#include "sparc_arch.h" |
|
208 |
|
209 +#if defined(__GNUC__) && defined(__linux) |
|
210 +__attribute__((visibility("hidden"))) |
|
211 +#endif |
|
212 #ifndef _BOOT |
|
213 -static int OPENSSL_sparcv9cap_P = SPARCV9_TICK_PRIVILEGED; |
|
214 +unsigned int OPENSSL_sparcv9cap_P[2] = {SPARCV9_TICK_PRIVILEGED, 0}; |
|
215 #else |
|
216 -static int OPENSSL_sparcv9cap_P = SPARCV9_VIS1; |
|
217 +unsigned int OPENSSL_sparcv9cap_P[2] = {SPARCV9_VIS1, 0}; |
|
218 #endif |
|
219 |
|
220 int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, |
|
221 const BN_ULONG *np, const BN_ULONG *n0, int num) |
|
222 { |
|
223 + int bn_mul_mont_vis3(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, |
|
224 + const BN_ULONG *np,const BN_ULONG *n0, int num); |
|
225 int bn_mul_mont_fpu(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, |
|
226 const BN_ULONG *np, const BN_ULONG *n0, int num); |
|
227 int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, |
|
228 const BN_ULONG *np, const BN_ULONG *n0, int num); |
|
229 |
|
230 - if (num >= 8 && !(num & 1) && |
|
231 - (OPENSSL_sparcv9cap_P & (SPARCV9_PREFER_FPU | SPARCV9_VIS1)) == |
|
232 - (SPARCV9_PREFER_FPU | SPARCV9_VIS1)) |
|
233 - return bn_mul_mont_fpu(rp, ap, bp, np, n0, num); |
|
234 - else |
|
235 - return bn_mul_mont_int(rp, ap, bp, np, n0, num); |
|
236 + if (!(num&1) && num>=6) { |
|
237 + if ((num&15)==0 && num<=64 && |
|
238 + (OPENSSL_sparcv9cap_P[1]&(CFR_MONTMUL|CFR_MONTSQR))== |
|
239 + (CFR_MONTMUL|CFR_MONTSQR)) |
|
240 + { |
|
241 + typedef int (*bn_mul_mont_f)(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0); |
|
242 + int bn_mul_mont_t4_8(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0); |
|
243 + int bn_mul_mont_t4_16(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0); |
|
244 + int bn_mul_mont_t4_24(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0); |
|
245 + int bn_mul_mont_t4_32(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0); |
|
246 + static const bn_mul_mont_f funcs[4] = { |
|
247 + bn_mul_mont_t4_8, bn_mul_mont_t4_16, |
|
248 + bn_mul_mont_t4_24, bn_mul_mont_t4_32 }; |
|
249 + bn_mul_mont_f worker = funcs[num/16-1]; |
|
250 + |
|
251 + if ((*worker)(rp,ap,bp,np,n0)) return 1; |
|
252 + /* retry once and fall back */ |
|
253 + if ((*worker)(rp,ap,bp,np,n0)) return 1; |
|
254 + return bn_mul_mont_vis3(rp,ap,bp,np,n0,num); |
|
255 + } |
|
256 + if ((OPENSSL_sparcv9cap_P[0]&SPARCV9_VIS3)) |
|
257 + return bn_mul_mont_vis3(rp,ap,bp,np,n0,num); |
|
258 + else if (num>=8 && |
|
259 + (OPENSSL_sparcv9cap_P[0]&(SPARCV9_PREFER_FPU|SPARCV9_VIS1)) == |
|
260 + (SPARCV9_PREFER_FPU|SPARCV9_VIS1)) |
|
261 + return bn_mul_mont_fpu(rp,ap,bp,np,n0,num); |
|
262 + } |
|
263 + return bn_mul_mont_int(rp,ap,bp,np,n0,num); |
|
264 } |
|
265 |
|
266 unsigned long _sparcv9_rdtick(void); |
|
267 @@ -37,11 +60,16 @@ |
|
268 |
|
269 unsigned long _sparcv9_rdtick(void); |
|
270 unsigned long _sparcv9_vis1_instrument(void); |
|
271 +unsigned long _sparcv9_rdcfr(void); |
|
272 +#ifndef _BOOT |
|
273 +size_t _sparcv9_vis1_instrument_bus(unsigned int *,size_t); |
|
274 +size_t _sparcv9_vis1_instrument_bus2(unsigned int *,size_t,size_t); |
|
275 +#endif |
|
276 |
|
277 #ifndef _BOOT |
|
278 unsigned long OPENSSL_rdtsc(void) |
|
279 { |
|
280 - if (OPENSSL_sparcv9cap_P & SPARCV9_TICK_PRIVILEGED) |
|
281 + if (OPENSSL_sparcv9cap_P[0] & SPARCV9_TICK_PRIVILEGED) |
|
282 #if defined(__sun) && defined(__SVR4) |
|
283 return gethrtime(); |
|
284 #else |
|
285 @@ -50,6 +80,24 @@ |
|
286 else |
|
287 return _sparcv9_rdtick(); |
|
288 } |
|
289 + |
|
290 +size_t OPENSSL_instrument_bus(unsigned int *out,size_t cnt) |
|
291 +{ |
|
292 + if ((OPENSSL_sparcv9cap_P[0]&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK)) == |
|
293 + SPARCV9_BLK) |
|
294 + return _sparcv9_vis1_instrument_bus(out,cnt); |
|
295 + else |
|
296 + return 0; |
|
297 +} |
|
298 + |
|
299 +size_t OPENSSL_instrument_bus2(unsigned int *out,size_t cnt,size_t max) |
|
300 +{ |
|
301 + if ((OPENSSL_sparcv9cap_P[0]&(SPARCV9_TICK_PRIVILEGED|SPARCV9_BLK)) == |
|
302 + SPARCV9_BLK) |
|
303 + return _sparcv9_vis1_instrument_bus2(out,cnt,max); |
|
304 + else |
|
305 + return 0; |
|
306 +} |
|
307 #endif |
|
308 |
|
309 #if defined(_BOOT) |
|
310 @@ -59,7 +107,7 @@ |
|
311 */ |
|
312 void OPENSSL_cpuid_setup(void) |
|
313 { |
|
314 - OPENSSL_sparcv9cap_P = SPARCV9_VIS1; |
|
315 + OPENSSL_sparcv9cap_P[0] = SPARCV9_VIS1; |
|
316 } |
|
317 |
|
318 #elif 0 && defined(__sun) && defined(__SVR4) |
|
319 @@ -88,11 +136,11 @@ |
|
320 if (!strcmp(name, "SUNW,UltraSPARC") || |
|
321 /* covers II,III,IV */ |
|
322 !strncmp(name, "SUNW,UltraSPARC-I", 17)) { |
|
323 - OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU | SPARCV9_VIS1; |
|
324 + OPENSSL_sparcv9cap_P[0] |= SPARCV9_PREFER_FPU | SPARCV9_VIS1; |
|
325 |
|
326 /* %tick is privileged only on UltraSPARC-I/II, but not IIe */ |
|
327 if (name[14] != '\0' && name[17] != '\0' && name[18] != '\0') |
|
328 - OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; |
|
329 + OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; |
|
330 |
|
331 return DI_WALK_TERMINATE; |
|
332 } |
|
333 @@ -98,7 +146,7 @@ |
|
334 } |
|
335 /* This is expected to catch remaining UltraSPARCs, such as T1 */ |
|
336 else if (!strncmp(name, "SUNW,UltraSPARC", 15)) { |
|
337 - OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; |
|
338 + OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; |
|
339 |
|
340 return DI_WALK_TERMINATE; |
|
341 } |
|
342 @@ -117,7 +165,7 @@ |
|
343 trigger = 1; |
|
344 |
|
345 if ((e = getenv("OPENSSL_sparcv9cap"))) { |
|
346 - OPENSSL_sparcv9cap_P = strtoul(e, NULL, 0); |
|
347 + OPENSSL_sparcv9cap_P[0] = strtoul(e, NULL, 0); |
|
348 return; |
|
349 } |
|
350 |
|
351 @@ -124,15 +172,15 @@ |
|
352 if (sysinfo(SI_MACHINE, si, sizeof(si)) > 0) { |
|
353 if (strcmp(si, "sun4v")) |
|
354 /* FPU is preferred for all CPUs, but US-T1/2 */ |
|
355 - OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU; |
|
356 + OPENSSL_sparcv9cap_P[0] |= SPARCV9_PREFER_FPU; |
|
357 } |
|
358 |
|
359 if (sysinfo(SI_ISALIST, si, sizeof(si)) > 0) { |
|
360 if (strstr(si, "+vis")) |
|
361 - OPENSSL_sparcv9cap_P |= SPARCV9_VIS1; |
|
362 + OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS1 | SPARCV9_BLK; |
|
363 if (strstr(si, "+vis2")) { |
|
364 - OPENSSL_sparcv9cap_P |= SPARCV9_VIS2; |
|
365 - OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; |
|
366 + OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS2; |
|
367 + OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; |
|
368 return; |
|
369 } |
|
370 } |
|
371 @@ -195,7 +241,9 @@ |
|
372 trigger = 1; |
|
373 |
|
374 if ((e = getenv("OPENSSL_sparcv9cap"))) { |
|
375 - OPENSSL_sparcv9cap_P = strtoul(e, NULL, 0); |
|
376 + OPENSSL_sparcv9cap_P[0] = strtoul(e, NULL, 0); |
|
377 + if ((e = strchr(e, ':'))) |
|
378 + OPENSSL_sparcv9cap_P[1] = strtoul(e + 1, NULL, 0); |
|
379 return; |
|
380 } |
|
381 |
|
382 @@ -202,21 +250,48 @@ |
|
383 (void) getisax(&ui, 1); |
|
384 |
|
385 /* Initial value, fits UltraSPARC-I&II... */ |
|
386 - OPENSSL_sparcv9cap_P = SPARCV9_BLK; |
|
387 + OPENSSL_sparcv9cap_P[0] = SPARCV9_BLK; |
|
388 |
|
389 if (ui & AV_SPARC_VIS) { |
|
390 - /* detect UltraSPARC-Tx, see sparccpuid.S for details... */ |
|
391 + /* detect UltraSPARC-Tx, see sparccpud.S for details... */ |
|
392 if (_sparcv9_vis1_instrument() < 7) |
|
393 - OPENSSL_sparcv9cap_P |= SPARCV9_TICK_PRIVILEGED; |
|
394 + OPENSSL_sparcv9cap_P[0] |= SPARCV9_TICK_PRIVILEGED; |
|
395 if (_sparcv9_vis1_instrument() < 12) { |
|
396 - OPENSSL_sparcv9cap_P |= SPARCV9_VIS1|SPARCV9_PREFER_FPU; |
|
397 + OPENSSL_sparcv9cap_P[0] |= (SPARCV9_VIS1 | SPARCV9_PREFER_FPU); |
|
398 if (ui & AV_SPARC_VIS2) |
|
399 - OPENSSL_sparcv9cap_P |= SPARCV9_VIS2; |
|
400 - } |
|
401 + OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS2; |
|
402 + } |
|
403 } |
|
404 |
|
405 if (ui & AV_SPARC_FMAF) |
|
406 - OPENSSL_sparcv9cap_P |= SPARCV9_FMADD; |
|
407 + OPENSSL_sparcv9cap_P[0] |= SPARCV9_FMADD; |
|
408 + |
|
409 + /* |
|
410 + * VIS3 flag is tested independently from VIS1, unlike VIS2 that is, |
|
411 + * because VIS3 defines even integer instructions. |
|
412 + */ |
|
413 + if (ui & AV_SPARC_VIS3) |
|
414 + OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS3; |
|
415 + |
|
416 +#define AV_T4_MECHS (AV_SPARC_AES | AV_SPARC_DES | AV_SPARC_KASUMI | \ |
|
417 + AV_SPARC_CAMELLIA | AV_SPARC_MD5 | AV_SPARC_SHA1 | \ |
|
418 + AV_SPARC_SHA256 | AV_SPARC_SHA512 | AV_SPARC_MPMUL | \ |
|
419 + AV_SPARC_CRC32C) |
|
420 + |
|
421 + if ((OPENSSL_sparcv9cap_P[0]&SPARCV9_VIS3) && (ui & AV_T4_MECHS)) |
|
422 + OPENSSL_sparcv9cap_P[1] = (unsigned int)_sparcv9_rdcfr(); |
|
423 + |
|
424 + if (sizeof(size_t) == 8) |
|
425 + OPENSSL_sparcv9cap_P[0] |= SPARCV9_64BIT_STACK; |
|
426 +#ifdef __linux |
|
427 + else |
|
428 + { |
|
429 + int ret = syscall(340); |
|
430 + |
|
431 + if (ret >= 0 && ret & 1) |
|
432 + OPENSSL_sparcv9cap_P[0] |= SPARCV9_64BIT_STACK; |
|
433 + } |
|
434 +#endif |
|
435 } |
|
436 |
|
437 #endif |
|
438 Index: crypto/md5/Makefile |
|
439 =================================================================== |
|
440 diff -ru openssl-1.0.1e/crypto/md5/Makefile openssl-1.0.1e/crypto/md5/Makefile |
|
441 --- openssl-1.0.1e/crypto/md5/Makefile 2011-05-24 17:02:24.000000000 -0700 |
|
442 +++ openssl-1.0.1e/crypto/md5/Makefile 2011-07-27 10:48:17.817470000 -0700 |
|
443 @@ -52,6 +52,9 @@ |
|
444 $(CC) $(CFLAGS) -E asm/md5-ia64.S | \ |
|
445 $(PERL) -ne 's/;\s+/;\n/g; print;' > $@ |
|
446 |
|
447 +md5-sparcv9.S: asm/md5-sparcv9.pl |
|
448 + $(PERL) asm/md5-sparcv9.pl $@ $(CFLAGS) |
|
449 + |
|
450 files: |
|
451 $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO |
|
452 |
|
453 Index: crypto/md5/md5_locl.h |
|
454 =================================================================== |
|
455 diff -ru openssl-1.0.1e/crypto/md5/md5_locl.h openssl-1.0.1e/crypto/md5/md5_locl.h |
|
456 --- openssl-1.0.1e/crypto/md5/md5_locl.h 2011-05-24 17:02:24.000000000 -0700 |
|
457 +++ openssl-1.0.1e/crypto/md5/md5_locl.h 2011-07-27 10:48:17.817470000 -0700 |
|
458 @@ -71,6 +71,8 @@ |
|
459 # define md5_block_data_order md5_block_asm_data_order |
|
460 # elif defined(__ia64) || defined(__ia64__) || defined(_M_IA64) |
|
461 # define md5_block_data_order md5_block_asm_data_order |
|
462 +# elif defined(__sparc) || defined(__sparc__) |
|
463 +# define md5_block_data_order md5_block_asm_data_order |
|
464 # endif |
|
465 #endif |
|
466 |
|
467 Index: crypto/sha/Makefile |
|
468 =================================================================== |
|
469 diff -ru openssl-1.0.1e/crypto/sha/Makefile openssl-1.0.1e/crypto/sha/Makefile |
|
470 --- openssl-1.0.1e/crypto/sha/Makefile 2011-05-24 17:02:24.000000000 -0700 |
|
471 +++ openssl-1.0.1e/crypto/sha/Makefile 2011-07-27 10:48:17.817470000 -0700 |
|
472 @@ -68,9 +68,9 @@ |
|
473 sha1-x86_64.s: asm/sha1-x86_64.pl; $(PERL) asm/sha1-x86_64.pl $(PERLASM_SCHEME) > $@ |
|
474 sha256-x86_64.s:asm/sha512-x86_64.pl; $(PERL) asm/sha512-x86_64.pl $(PERLASM_SCHEME) $@ |
|
475 sha512-x86_64.s:asm/sha512-x86_64.pl; $(PERL) asm/sha512-x86_64.pl $(PERLASM_SCHEME) $@ |
|
476 -sha1-sparcv9.s: asm/sha1-sparcv9.pl; $(PERL) asm/sha1-sparcv9.pl $@ $(CFLAGS) |
|
477 -sha256-sparcv9.s:asm/sha512-sparcv9.pl; $(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS) |
|
478 -sha512-sparcv9.s:asm/sha512-sparcv9.pl; $(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS) |
|
479 +sha1-sparcv9.S: asm/sha1-sparcv9.pl; $(PERL) asm/sha1-sparcv9.pl $@ $(CFLAGS) |
|
480 +sha256-sparcv9.S:asm/sha512-sparcv9.pl; $(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS) |
|
481 +sha512-sparcv9.S:asm/sha512-sparcv9.pl; $(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS) |
|
482 |
|
483 sha1-ppc.s: asm/sha1-ppc.pl; $(PERL) asm/sha1-ppc.pl $(PERLASM_SCHEME) $@ |
|
484 sha256-ppc.s: asm/sha512-ppc.pl; $(PERL) asm/sha512-ppc.pl $(PERLASM_SCHEME) $@ |
|
485 Index: crypto/sha/asm/sha1-sparcv9.pl |
|
486 =================================================================== |
|
487 diff -ru openssl-1.0.1e/crypto/sha/asm/sha1-sparcv9.pl openssl-1.0.1e/crypto/sha/asm/sha1-sparcv9.pl |
|
488 --- openssl-1.0.1e/crypto/sha/asm/sha1-sparcv9.pl 2011-05-24 17:02:24.000000000 -0700 |
|
489 +++ openssl-1.0.1e/crypto/sha/asm/sha1-sparcv9.pl 2011-07-27 10:48:17.817470000 -0700 |
|
490 @@ -5,6 +5,8 @@ |
|
491 # project. The module is, however, dual licensed under OpenSSL and |
|
492 # CRYPTOGAMS licenses depending on where you obtain it. For further |
|
493 # details see http://www.openssl.org/~appro/cryptogams/. |
|
494 +# |
|
495 +# Hardware SPARC T4 support by David S. Miller <[email protected]>. |
|
496 # ==================================================================== |
|
497 |
|
498 # Performance improvement is not really impressive on pre-T1 CPU: +8% |
|
499 @@ -18,6 +20,11 @@ |
|
500 # ensure scalability on UltraSPARC T1, or rather to avoid decay when |
|
501 # amount of active threads exceeds the number of physical cores. |
|
502 |
|
503 +# SPARC T4 SHA1 hardware achieves 3.72 cycles per byte, which is 3.1x |
|
504 +# faster than software. Multi-process benchmark saturates at 11x |
|
505 +# single-process result on 8-core processor, or ~9GBps per 2.85GHz |
|
506 +# socket. |
|
507 + |
|
508 $bits=32; |
|
509 for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); } |
|
510 if ($bits==64) { $bias=2047; $frame=192; } |
|
511 @@ -183,11 +190,93 @@ |
|
512 .register %g3,#scratch |
|
513 ___ |
|
514 $code.=<<___; |
|
515 +#include "sparc_arch.h" |
|
516 + |
|
517 .section ".text",#alloc,#execinstr |
|
518 |
|
519 +#ifdef __PIC__ |
|
520 +SPARC_PIC_THUNK(%g1) |
|
521 +#endif |
|
522 + |
|
523 .align 32 |
|
524 .globl sha1_block_data_order |
|
525 sha1_block_data_order: |
|
526 + SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) |
|
527 + ld [%g1+4],%g1 ! OPENSSL_sparcv9cap_P[1] |
|
528 + |
|
529 + andcc %g1, CFR_SHA1, %g0 |
|
530 + be .Lsoftware |
|
531 + nop |
|
532 + |
|
533 + ld [%o0 + 0x00], %f0 ! load context |
|
534 + ld [%o0 + 0x04], %f1 |
|
535 + ld [%o0 + 0x08], %f2 |
|
536 + andcc %o1, 0x7, %g0 |
|
537 + ld [%o0 + 0x0c], %f3 |
|
538 + bne,pn %icc, .Lhwunaligned |
|
539 + ld [%o0 + 0x10], %f4 |
|
540 + |
|
541 +.Lhw_loop: |
|
542 + ldd [%o1 + 0x00], %f8 |
|
543 + ldd [%o1 + 0x08], %f10 |
|
544 + ldd [%o1 + 0x10], %f12 |
|
545 + ldd [%o1 + 0x18], %f14 |
|
546 + ldd [%o1 + 0x20], %f16 |
|
547 + ldd [%o1 + 0x28], %f18 |
|
548 + ldd [%o1 + 0x30], %f20 |
|
549 + subcc %o2, 1, %o2 ! done yet? |
|
550 + ldd [%o1 + 0x38], %f22 |
|
551 + add %o1, 0x40, %o1 |
|
552 + |
|
553 + .word 0x81b02820 ! SHA1 |
|
554 + |
|
555 + bne,pt `$bits==64?"%xcc":"%icc"`, .Lhw_loop |
|
556 + nop |
|
557 + |
|
558 +.Lhwfinish: |
|
559 + st %f0, [%o0 + 0x00] ! store context |
|
560 + st %f1, [%o0 + 0x04] |
|
561 + st %f2, [%o0 + 0x08] |
|
562 + st %f3, [%o0 + 0x0c] |
|
563 + retl |
|
564 + st %f4, [%o0 + 0x10] |
|
565 + |
|
566 +.align 8 |
|
567 +.Lhwunaligned: |
|
568 + alignaddr %o1, %g0, %o1 |
|
569 + |
|
570 + ldd [%o1 + 0x00], %f10 |
|
571 +.Lhwunaligned_loop: |
|
572 + ldd [%o1 + 0x08], %f12 |
|
573 + ldd [%o1 + 0x10], %f14 |
|
574 + ldd [%o1 + 0x18], %f16 |
|
575 + ldd [%o1 + 0x20], %f18 |
|
576 + ldd [%o1 + 0x28], %f20 |
|
577 + ldd [%o1 + 0x30], %f22 |
|
578 + ldd [%o1 + 0x38], %f24 |
|
579 + subcc %o2, 1, %o2 ! done yet? |
|
580 + ldd [%o1 + 0x40], %f26 |
|
581 + add %o1, 0x40, %o1 |
|
582 + |
|
583 + faligndata %f10, %f12, %f8 |
|
584 + faligndata %f12, %f14, %f10 |
|
585 + faligndata %f14, %f16, %f12 |
|
586 + faligndata %f16, %f18, %f14 |
|
587 + faligndata %f18, %f20, %f16 |
|
588 + faligndata %f20, %f22, %f18 |
|
589 + faligndata %f22, %f24, %f20 |
|
590 + faligndata %f24, %f26, %f22 |
|
591 + |
|
592 + .word 0x81b02820 ! SHA1 |
|
593 + |
|
594 + bne,pt `$bits==64?"%xcc":"%icc"`, .Lhwunaligned_loop |
|
595 + for %f26, %f26, %f10 ! %f10=%f26 |
|
596 + |
|
597 + ba .Lhwfinish |
|
598 + nop |
|
599 + |
|
600 +.align 16 |
|
601 +.Lsoftware: |
|
602 save %sp,-$frame,%sp |
|
603 sllx $len,6,$len |
|
604 add $inp,$len,$len |
|
605 @@ -279,6 +368,62 @@ |
|
606 .align 4 |
|
607 ___ |
|
608 |
|
609 -$code =~ s/\`([^\`]*)\`/eval $1/gem; |
|
610 -print $code; |
|
611 +# Purpose of these subroutines is to explicitly encode VIS instructions, |
|
612 +# so that one can compile the module without having to specify VIS |
|
613 +# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a. |
|
614 +# Idea is to reserve for option to produce "universal" binary and let |
|
615 +# programmer detect if current CPU is VIS capable at run-time. |
|
616 +sub unvis { |
|
617 +my ($mnemonic,$rs1,$rs2,$rd)=@_; |
|
618 +my $ref,$opf; |
|
619 +my %visopf = ( "faligndata" => 0x048, |
|
620 + "for" => 0x07c ); |
|
621 + |
|
622 + $ref = "$mnemonic\t$rs1,$rs2,$rd"; |
|
623 + |
|
624 + if ($opf=$visopf{$mnemonic}) { |
|
625 + foreach ($rs1,$rs2,$rd) { |
|
626 + return $ref if (!/%f([0-9]{1,2})/); |
|
627 + $_=$1; |
|
628 + if ($1>=32) { |
|
629 + return $ref if ($1&1); |
|
630 + # re-encode for upper double register addressing |
|
631 + $_=($1|$1>>5)&31; |
|
632 + } |
|
633 + } |
|
634 + |
|
635 + return sprintf ".word\t0x%08x !%s", |
|
636 + 0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2, |
|
637 + $ref; |
|
638 + } else { |
|
639 + return $ref; |
|
640 + } |
|
641 +} |
|
642 +sub unalignaddr { |
|
643 +my ($mnemonic,$rs1,$rs2,$rd)=@_; |
|
644 +my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 ); |
|
645 +my $ref="$mnemonic\t$rs1,$rs2,$rd"; |
|
646 + |
|
647 + foreach ($rs1,$rs2,$rd) { |
|
648 + if (/%([goli])([0-7])/) { $_=$bias{$1}+$2; } |
|
649 + else { return $ref; } |
|
650 + } |
|
651 + return sprintf ".word\t0x%08x !%s", |
|
652 + 0x81b00300|$rd<<25|$rs1<<14|$rs2, |
|
653 + $ref; |
|
654 +} |
|
655 + |
|
656 +foreach (split("\n",$code)) { |
|
657 + s/\`([^\`]*)\`/eval $1/ge; |
|
658 + |
|
659 + s/\b(f[^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/ |
|
660 + &unvis($1,$2,$3,$4) |
|
661 + /ge; |
|
662 + s/\b(alignaddr)\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/ |
|
663 + &unalignaddr($1,$2,$3,$4) |
|
664 + /ge; |
|
665 + |
|
666 + print $_,"\n"; |
|
667 +} |
|
668 + |
|
669 close STDOUT; |
|
670 |
|
671 Index: crypto/sha/asm/sha512-sparcv9.pl |
|
672 =================================================================== |
|
673 diff -ru openssl-1.0.1e/crypto/sha/asm/sha512-sparcv9.pl openssl-1.0.1e/crypto/sha/asm/sha512-sparcv9.pl |
|
674 --- openssl-1.0.1e/crypto/sha/asm/sha512-sparcv9.pl 2011-05-24 17:02:24.000000000 -0700 |
|
675 +++ openssl-1.0.1e/crypto/sha/asm/sha512-sparcv9.pl 2011-07-27 10:48:17.817470000 -0700 |
|
676 @@ -5,6 +5,8 @@ |
|
677 # project. The module is, however, dual licensed under OpenSSL and |
|
678 # CRYPTOGAMS licenses depending on where you obtain it. For further |
|
679 # details see http://www.openssl.org/~appro/cryptogams/. |
|
680 +# |
|
681 +# Hardware SPARC T4 support by David S. Miller <[email protected]>. |
|
682 # ==================================================================== |
|
683 |
|
684 # SHA256 performance improvement over compiler generated code varies |
|
685 @@ -41,6 +43,12 @@ |
|
686 # loads are always slower than one 64-bit load. Once again this |
|
687 # is unlike pre-T1 UltraSPARC, where, if scheduled appropriately, |
|
688 # 2x32-bit loads can be as fast as 1x64-bit ones. |
|
689 +# |
|
690 +# SPARC T4 SHA256/512 hardware achieves 3.17/2.01 cycles per byte, |
|
691 +# which is 9.3x/11.1x faster than software. Multi-process benchmark |
|
692 +# saturates at 11.5x single-process result on 8-core processor, or |
|
693 +# ~11/16GBps per 2.85GHz socket. |
|
694 + |
|
695 |
|
696 $bits=32; |
|
697 for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); } |
|
698 @@ -386,6 +394,8 @@ |
|
699 .register %g3,#scratch |
|
700 ___ |
|
701 $code.=<<___; |
|
702 +#include "sparc_arch.h" |
|
703 + |
|
704 .section ".text",#alloc,#execinstr |
|
705 |
|
706 .align 64 |
|
707 @@ -457,8 +467,196 @@ |
|
708 } |
|
709 $code.=<<___; |
|
710 .size K${label},.-K${label} |
|
711 + |
|
712 +#ifdef __PIC__ |
|
713 +SPARC_PIC_THUNK(%g1) |
|
714 +#endif |
|
715 + |
|
716 .globl sha${label}_block_data_order |
|
717 +.align 32 |
|
718 sha${label}_block_data_order: |
|
719 + SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) |
|
720 + ld [%g1+4],%g1 ! OPENSSL_sparcv9cap_P[1] |
|
721 + |
|
722 + andcc %g1, CFR_SHA${label}, %g0 |
|
723 + be .Lsoftware |
|
724 + nop |
|
725 +___ |
|
726 +$code.=<<___ if ($SZ==8); # SHA512 |
|
727 + ldd [%o0 + 0x00], %f0 ! load context |
|
728 + ldd [%o0 + 0x08], %f2 |
|
729 + ldd [%o0 + 0x10], %f4 |
|
730 + ldd [%o0 + 0x18], %f6 |
|
731 + ldd [%o0 + 0x20], %f8 |
|
732 + ldd [%o0 + 0x28], %f10 |
|
733 + andcc %o1, 0x7, %g0 |
|
734 + ldd [%o0 + 0x30], %f12 |
|
735 + bne,pn %icc, .Lhwunaligned |
|
736 + ldd [%o0 + 0x38], %f14 |
|
737 + |
|
738 +.Lhwaligned_loop: |
|
739 + ldd [%o1 + 0x00], %f16 |
|
740 + ldd [%o1 + 0x08], %f18 |
|
741 + ldd [%o1 + 0x10], %f20 |
|
742 + ldd [%o1 + 0x18], %f22 |
|
743 + ldd [%o1 + 0x20], %f24 |
|
744 + ldd [%o1 + 0x28], %f26 |
|
745 + ldd [%o1 + 0x30], %f28 |
|
746 + ldd [%o1 + 0x38], %f30 |
|
747 + ldd [%o1 + 0x40], %f32 |
|
748 + ldd [%o1 + 0x48], %f34 |
|
749 + ldd [%o1 + 0x50], %f36 |
|
750 + ldd [%o1 + 0x58], %f38 |
|
751 + ldd [%o1 + 0x60], %f40 |
|
752 + ldd [%o1 + 0x68], %f42 |
|
753 + ldd [%o1 + 0x70], %f44 |
|
754 + subcc %o2, 1, %o2 ! done yet? |
|
755 + ldd [%o1 + 0x78], %f46 |
|
756 + add %o1, 0x80, %o1 |
|
757 + |
|
758 + .word 0x81b02860 ! SHA512 |
|
759 + |
|
760 + bne,pt `$bits==64?"%xcc":"%icc"`, .Lhwaligned_loop |
|
761 + nop |
|
762 + |
|
763 +.Lhwfinish: |
|
764 + std %f0, [%o0 + 0x00] ! store context |
|
765 + std %f2, [%o0 + 0x08] |
|
766 + std %f4, [%o0 + 0x10] |
|
767 + std %f6, [%o0 + 0x18] |
|
768 + std %f8, [%o0 + 0x20] |
|
769 + std %f10, [%o0 + 0x28] |
|
770 + std %f12, [%o0 + 0x30] |
|
771 + retl |
|
772 + std %f14, [%o0 + 0x38] |
|
773 + |
|
774 +.align 16 |
|
775 +.Lhwunaligned: |
|
776 + alignaddr %o1, %g0, %o1 |
|
777 + |
|
778 + ldd [%o1 + 0x00], %f18 |
|
779 +.Lhwunaligned_loop: |
|
780 + ldd [%o1 + 0x08], %f20 |
|
781 + ldd [%o1 + 0x10], %f22 |
|
782 + ldd [%o1 + 0x18], %f24 |
|
783 + ldd [%o1 + 0x20], %f26 |
|
784 + ldd [%o1 + 0x28], %f28 |
|
785 + ldd [%o1 + 0x30], %f30 |
|
786 + ldd [%o1 + 0x38], %f32 |
|
787 + ldd [%o1 + 0x40], %f34 |
|
788 + ldd [%o1 + 0x48], %f36 |
|
789 + ldd [%o1 + 0x50], %f38 |
|
790 + ldd [%o1 + 0x58], %f40 |
|
791 + ldd [%o1 + 0x60], %f42 |
|
792 + ldd [%o1 + 0x68], %f44 |
|
793 + ldd [%o1 + 0x70], %f46 |
|
794 + ldd [%o1 + 0x78], %f48 |
|
795 + subcc %o2, 1, %o2 ! done yet? |
|
796 + ldd [%o1 + 0x80], %f50 |
|
797 + add %o1, 0x80, %o1 |
|
798 + |
|
799 + faligndata %f18, %f20, %f16 |
|
800 + faligndata %f20, %f22, %f18 |
|
801 + faligndata %f22, %f24, %f20 |
|
802 + faligndata %f24, %f26, %f22 |
|
803 + faligndata %f26, %f28, %f24 |
|
804 + faligndata %f28, %f30, %f26 |
|
805 + faligndata %f30, %f32, %f28 |
|
806 + faligndata %f32, %f34, %f30 |
|
807 + faligndata %f34, %f36, %f32 |
|
808 + faligndata %f36, %f38, %f34 |
|
809 + faligndata %f38, %f40, %f36 |
|
810 + faligndata %f40, %f42, %f38 |
|
811 + faligndata %f42, %f44, %f40 |
|
812 + faligndata %f44, %f46, %f42 |
|
813 + faligndata %f46, %f48, %f44 |
|
814 + faligndata %f48, %f50, %f46 |
|
815 + |
|
816 + .word 0x81b02860 ! SHA512 |
|
817 + |
|
818 + bne,pt `$bits==64?"%xcc":"%icc"`, .Lhwunaligned_loop |
|
819 + for %f50, %f50, %f18 ! %f18=%f50 |
|
820 + |
|
821 + ba .Lhwfinish |
|
822 + nop |
|
823 +___ |
|
824 +$code.=<<___ if ($SZ==4); # SHA256 |
|
825 + ld [%o0 + 0x00], %f0 |
|
826 + ld [%o0 + 0x04], %f1 |
|
827 + ld [%o0 + 0x08], %f2 |
|
828 + ld [%o0 + 0x0c], %f3 |
|
829 + ld [%o0 + 0x10], %f4 |
|
830 + ld [%o0 + 0x14], %f5 |
|
831 + andcc %o1, 0x7, %g0 |
|
832 + ld [%o0 + 0x18], %f6 |
|
833 + bne,pn %icc, .Lhwunaligned |
|
834 + ld [%o0 + 0x1c], %f7 |
|
835 + |
|
836 +.Lhwloop: |
|
837 + ldd [%o1 + 0x00], %f8 |
|
838 + ldd [%o1 + 0x08], %f10 |
|
839 + ldd [%o1 + 0x10], %f12 |
|
840 + ldd [%o1 + 0x18], %f14 |
|
841 + ldd [%o1 + 0x20], %f16 |
|
842 + ldd [%o1 + 0x28], %f18 |
|
843 + ldd [%o1 + 0x30], %f20 |
|
844 + subcc %o2, 1, %o2 ! done yet? |
|
845 + ldd [%o1 + 0x38], %f22 |
|
846 + add %o1, 0x40, %o1 |
|
847 + |
|
848 + .word 0x81b02840 ! SHA256 |
|
849 + |
|
850 + bne,pt `$bits==64?"%xcc":"%icc"`, .Lhwloop |
|
851 + nop |
|
852 + |
|
853 +.Lhwfinish: |
|
854 + st %f0, [%o0 + 0x00] ! store context |
|
855 + st %f1, [%o0 + 0x04] |
|
856 + st %f2, [%o0 + 0x08] |
|
857 + st %f3, [%o0 + 0x0c] |
|
858 + st %f4, [%o0 + 0x10] |
|
859 + st %f5, [%o0 + 0x14] |
|
860 + st %f6, [%o0 + 0x18] |
|
861 + retl |
|
862 + st %f7, [%o0 + 0x1c] |
|
863 + |
|
864 +.align 8 |
|
865 +.Lhwunaligned: |
|
866 + alignaddr %o1, %g0, %o1 |
|
867 + |
|
868 + ldd [%o1 + 0x00], %f10 |
|
869 +.Lhwunaligned_loop: |
|
870 + ldd [%o1 + 0x08], %f12 |
|
871 + ldd [%o1 + 0x10], %f14 |
|
872 + ldd [%o1 + 0x18], %f16 |
|
873 + ldd [%o1 + 0x20], %f18 |
|
874 + ldd [%o1 + 0x28], %f20 |
|
875 + ldd [%o1 + 0x30], %f22 |
|
876 + ldd [%o1 + 0x38], %f24 |
|
877 + subcc %o2, 1, %o2 ! done yet? |
|
878 + ldd [%o1 + 0x40], %f26 |
|
879 + add %o1, 0x40, %o1 |
|
880 + |
|
881 + faligndata %f10, %f12, %f8 |
|
882 + faligndata %f12, %f14, %f10 |
|
883 + faligndata %f14, %f16, %f12 |
|
884 + faligndata %f16, %f18, %f14 |
|
885 + faligndata %f18, %f20, %f16 |
|
886 + faligndata %f20, %f22, %f18 |
|
887 + faligndata %f22, %f24, %f20 |
|
888 + faligndata %f24, %f26, %f22 |
|
889 + |
|
890 + .word 0x81b02840 ! SHA256 |
|
891 + |
|
892 + bne,pt `$bits==64?"%xcc":"%icc"`, .Lhwunaligned_loop |
|
893 + for %f26, %f26, %f10 ! %f10=%f26 |
|
894 + |
|
895 + ba .Lhwfinish |
|
896 + nop |
|
897 +___ |
|
898 +$code.=<<___; |
|
899 +.align 16 |
|
900 +.Lsoftware: |
|
901 save %sp,`-$frame-$locals`,%sp |
|
902 and $inp,`$align-1`,$tmp31 |
|
903 sllx $len,`log(16*$SZ)/log(2)`,$len |
|
904 @@ -589,6 +787,62 @@ |
|
905 .align 4 |
|
906 ___ |
|
907 |
|
908 -$code =~ s/\`([^\`]*)\`/eval $1/gem; |
|
909 -print $code; |
|
910 +# Purpose of these subroutines is to explicitly encode VIS instructions, |
|
911 +# so that one can compile the module without having to specify VIS |
|
912 +# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a. |
|
913 +# Idea is to reserve for option to produce "universal" binary and let |
|
914 +# programmer detect if current CPU is VIS capable at run-time. |
|
915 +sub unvis { |
|
916 +my ($mnemonic,$rs1,$rs2,$rd)=@_; |
|
917 +my $ref,$opf; |
|
918 +my %visopf = ( "faligndata" => 0x048, |
|
919 + "for" => 0x07c ); |
|
920 + |
|
921 + $ref = "$mnemonic\t$rs1,$rs2,$rd"; |
|
922 + |
|
923 + if ($opf=$visopf{$mnemonic}) { |
|
924 + foreach ($rs1,$rs2,$rd) { |
|
925 + return $ref if (!/%f([0-9]{1,2})/); |
|
926 + $_=$1; |
|
927 + if ($1>=32) { |
|
928 + return $ref if ($1&1); |
|
929 + # re-encode for upper double register addressing |
|
930 + $_=($1|$1>>5)&31; |
|
931 + } |
|
932 + } |
|
933 + |
|
934 + return sprintf ".word\t0x%08x !%s", |
|
935 + 0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2, |
|
936 + $ref; |
|
937 + } else { |
|
938 + return $ref; |
|
939 + } |
|
940 +} |
|
941 +sub unalignaddr { |
|
942 +my ($mnemonic,$rs1,$rs2,$rd)=@_; |
|
943 +my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 ); |
|
944 +my $ref="$mnemonic\t$rs1,$rs2,$rd"; |
|
945 + |
|
946 + foreach ($rs1,$rs2,$rd) { |
|
947 + if (/%([goli])([0-7])/) { $_=$bias{$1}+$2; } |
|
948 + else { return $ref; } |
|
949 + } |
|
950 + return sprintf ".word\t0x%08x !%s", |
|
951 + 0x81b00300|$rd<<25|$rs1<<14|$rs2, |
|
952 + $ref; |
|
953 +} |
|
954 + |
|
955 +foreach (split("\n",$code)) { |
|
956 + s/\`([^\`]*)\`/eval $1/ge; |
|
957 + |
|
958 + s/\b(f[^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/ |
|
959 + &unvis($1,$2,$3,$4) |
|
960 + /ge; |
|
961 + s/\b(alignaddr)\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/ |
|
962 + &unalignaddr($1,$2,$3,$4) |
|
963 + /ge; |
|
964 + |
|
965 + print $_,"\n"; |
|
966 +} |
|
967 + |
|
968 close STDOUT; |
|
969 Index: crypto/des/Makefile |
|
970 =================================================================== |
|
971 diff -ru openssl-1.0.1e/crypto/des/Makefile.orig openssl-1.0.1e/crypto/des/Makefile |
|
972 --- a/crypto/des/Makefile |
|
973 +++ b/crypto/des/Makefile |
|
974 @@ -61,6 +61,8 @@ des: des.o cbc3_enc.o lib |
|
975 |
|
976 des_enc-sparc.S: asm/des_enc.m4 |
|
977 m4 -B 8192 asm/des_enc.m4 > des_enc-sparc.S |
|
978 +dest4-sparcv9.s: asm/dest4-sparcv9.pl |
|
979 + $(PERL) asm/dest4-sparcv9.pl $(CFLAGS) > $@ |
|
980 |
|
981 des-586.s: asm/des-586.pl ../perlasm/x86asm.pl ../perlasm/cbc.pl |
|
982 $(PERL) asm/des-586.pl $(PERLASM_SCHEME) $(CFLAGS) > $@ |
|
983 Index: crypto/evp/e_des.c |
|
984 =================================================================== |
|
985 diff -ru openssl-1.0.1e/crypto/evp/e_des.c.orig openssl-1.0.1e/crypto/evp/e_des.c |
|
986 --- a/crypto/evp/e_des.c |
|
987 +++ b/crypto/evp/e_des.c |
|
988 @@ -65,6 +65,30 @@ |
|
989 # include <openssl/des.h> |
|
990 # include <openssl/rand.h> |
|
991 |
|
992 +typedef struct { |
|
993 + union { double align; DES_key_schedule ks; } ks; |
|
994 + union { |
|
995 + void (*cbc)(const void *,void *,size_t,const void *,void *); |
|
996 + } stream; |
|
997 +} EVP_DES_KEY; |
|
998 + |
|
999 +#if defined(AES_ASM) && (defined(__sparc) || defined(__sparc__)) |
|
1000 +/* ---------^^^ this is not a typo, just a way to detect that |
|
1001 + * assembler support was in general requested... |
|
1002 + */ |
|
1003 +#include "sparc_arch.h" |
|
1004 + |
|
1005 +extern unsigned int OPENSSL_sparcv9cap_P[]; |
|
1006 + |
|
1007 +#define SPARC_DES_CAPABLE (OPENSSL_sparcv9cap_P[1] & CFR_DES) |
|
1008 + |
|
1009 +void des_t4_key_expand(const void *key, DES_key_schedule *ks); |
|
1010 +void des_t4_cbc_encrypt(const void *inp,void *out,size_t len, |
|
1011 + DES_key_schedule *ks,unsigned char iv[8]); |
|
1012 +void des_t4_cbc_decrypt(const void *inp,void *out,size_t len, |
|
1013 + DES_key_schedule *ks,unsigned char iv[8]); |
|
1014 +#endif |
|
1015 + |
|
1016 static int des_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
|
1017 const unsigned char *iv, int enc); |
|
1018 static int des_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr); |
|
1019 @@ -102,6 +126,12 @@ static int des_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, |
|
1020 static int des_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, |
|
1021 const unsigned char *in, size_t inl) |
|
1022 { |
|
1023 + EVP_DES_KEY *dat = (EVP_DES_KEY *)ctx->cipher_data; |
|
1024 + |
|
1025 + if (dat->stream.cbc) { |
|
1026 + (*dat->stream.cbc)(in,out,inl,&dat->ks.ks,ctx->iv); |
|
1027 + return 1; |
|
1028 + } |
|
1029 while (inl >= EVP_MAXCHUNK) { |
|
1030 DES_ncbc_encrypt(in, out, (long)EVP_MAXCHUNK, ctx->cipher_data, |
|
1031 (DES_cblock *)ctx->iv, ctx->encrypt); |
|
1032 @@ -179,16 +209,16 @@ |
|
1033 return 1; |
|
1034 } |
|
1035 |
|
1036 -BLOCK_CIPHER_defs(des, DES_key_schedule, NID_des, 8, 8, 8, 64, |
|
1037 +BLOCK_CIPHER_defs(des, EVP_DES_KEY, NID_des, 8, 8, 8, 64, |
|
1038 EVP_CIPH_RAND_KEY, des_init_key, NULL, |
|
1039 EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des_ctrl) |
|
1040 |
|
1041 |
|
1042 -BLOCK_CIPHER_def_cfb(des, DES_key_schedule, NID_des, 8, 8, 1, |
|
1043 +BLOCK_CIPHER_def_cfb(des, EVP_DES_KEY, NID_des, 8, 8, 1, |
|
1044 EVP_CIPH_RAND_KEY, des_init_key, NULL, |
|
1045 EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des_ctrl) |
|
1046 |
|
1047 -BLOCK_CIPHER_def_cfb(des, DES_key_schedule, NID_des, 8, 8, 8, |
|
1048 +BLOCK_CIPHER_def_cfb(des, EVP_DES_KEY, NID_des, 8, 8, 8, |
|
1049 EVP_CIPH_RAND_KEY, des_init_key, NULL, |
|
1050 EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des_ctrl) |
|
1051 |
|
1052 @@ -196,8 +226,23 @@ static int des_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
|
1053 const unsigned char *iv, int enc) |
|
1054 { |
|
1055 DES_cblock *deskey = (DES_cblock *)key; |
|
1056 + EVP_DES_KEY *dat = (EVP_DES_KEY *)ctx->cipher_data; |
|
1057 + |
|
1058 + dat->stream.cbc = NULL; |
|
1059 +#if defined(SPARC_DES_CAPABLE) |
|
1060 + if (SPARC_DES_CAPABLE) { |
|
1061 + int mode = ctx->cipher->flags & EVP_CIPH_MODE; |
|
1062 + |
|
1063 + if (mode == EVP_CIPH_CBC_MODE) { |
|
1064 + des_t4_key_expand(key,&dat->ks.ks); |
|
1065 + dat->stream.cbc = enc ? des_t4_cbc_encrypt : |
|
1066 + des_t4_cbc_decrypt; |
|
1067 + return 1; |
|
1068 + } |
|
1069 + } |
|
1070 +#endif |
|
1071 # ifdef EVP_CHECK_DES_KEY |
|
1072 - if (DES_set_key_checked(deskey, ctx->cipher_data) != 0) |
|
1073 + if (DES_set_key_checked(deskey, dat->ks.ks) != 0) |
|
1074 return 0; |
|
1075 # else |
|
1076 DES_set_key_unchecked(deskey, ctx->cipher_data); |
|
1077 Index: crypto/evp/e_des3.c |
|
1078 =================================================================== |
|
1079 diff -ru openssl-1.0.1e/crypto/evp/e_des3.c.orig openssl-1.0.1e/crypto/evp/e_des3.c |
|
1080 --- a/crypto/evp/e_des3.c |
|
1081 +++ b/crypto/evp/e_des3.c |
|
1082 @@ -65,6 +65,32 @@ |
|
1083 # include <openssl/des.h> |
|
1084 # include <openssl/rand.h> |
|
1085 |
|
1086 +typedef struct { |
|
1087 + union { double align; DES_key_schedule ks[3]; } ks; |
|
1088 + union { |
|
1089 + void (*cbc)(const void *,void *,size_t,const void *,void *); |
|
1090 + } stream; |
|
1091 +} DES_EDE_KEY; |
|
1092 +#define ks1 ks.ks[0] |
|
1093 +#define ks2 ks.ks[1] |
|
1094 +#define ks3 ks.ks[2] |
|
1095 + |
|
1096 +#if defined(AES_ASM) && (defined(__sparc) || defined(__sparc__)) |
|
1097 +/* ---------^^^ this is not a typo, just a way to detect that |
|
1098 + * assembler support was in general requested... */ |
|
1099 +#include "sparc_arch.h" |
|
1100 + |
|
1101 +extern unsigned int OPENSSL_sparcv9cap_P[]; |
|
1102 + |
|
1103 +#define SPARC_DES_CAPABLE (OPENSSL_sparcv9cap_P[1] & CFR_DES) |
|
1104 + |
|
1105 +void des_t4_key_expand(const void *key, DES_key_schedule *ks); |
|
1106 +void des_t4_ede3_cbc_encrypt(const void *inp,void *out,size_t len, |
|
1107 + DES_key_schedule *ks,unsigned char iv[8]); |
|
1108 +void des_t4_ede3_cbc_decrypt(const void *inp,void *out,size_t len, |
|
1109 + DES_key_schedule *ks,unsigned char iv[8]); |
|
1110 +#endif |
|
1111 + |
|
1112 # ifndef OPENSSL_FIPS |
|
1113 |
|
1114 static int des_ede_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
|
1115 @@ -75,12 +100,6 @@ static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
|
1116 |
|
1117 static int des3_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr); |
|
1118 |
|
1119 -typedef struct { |
|
1120 - DES_key_schedule ks1; /* key schedule */ |
|
1121 - DES_key_schedule ks2; /* key schedule (for ede) */ |
|
1122 - DES_key_schedule ks3; /* key schedule (for ede3) */ |
|
1123 -} DES_EDE_KEY; |
|
1124 - |
|
1125 # define data(ctx) ((DES_EDE_KEY *)(ctx)->cipher_data) |
|
1126 |
|
1127 /* |
|
1128 @@ -123,6 +117,7 @@ static int des_ede_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, |
|
1129 static int des_ede_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, |
|
1130 const unsigned char *in, size_t inl) |
|
1131 { |
|
1132 + DES_EDE_KEY *dat = data(ctx); |
|
1133 # ifdef KSSL_DEBUG |
|
1134 { |
|
1135 int i; |
|
1136 @@ -134,11 +155,15 @@ |
|
1137 fprintf(stderr, "\n"); |
|
1138 } |
|
1139 # endif /* KSSL_DEBUG */ |
|
1140 + if (dat->stream.cbc) { |
|
1141 + (*dat->stream.cbc)(in,out,inl,&dat->ks,ctx->iv); |
|
1142 + return 1; |
|
1143 + } |
|
1144 + |
|
1145 while (inl >= EVP_MAXCHUNK) { |
|
1146 DES_ede3_cbc_encrypt(in, out, (long)EVP_MAXCHUNK, |
|
1147 - &data(ctx)->ks1, &data(ctx)->ks2, |
|
1148 - &data(ctx)->ks3, (DES_cblock *)ctx->iv, |
|
1149 - ctx->encrypt); |
|
1150 + &dat->ks1, &dat->ks2, &dat->ks3, |
|
1151 + (DES_cblock *)ctx->iv, ctx->encrypt); |
|
1152 inl -= EVP_MAXCHUNK; |
|
1153 in += EVP_MAXCHUNK; |
|
1154 out += EVP_MAXCHUNK; |
|
1155 @@ -145,9 +170,8 @@ static int des_ede_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, |
|
1156 } |
|
1157 if (inl) |
|
1158 DES_ede3_cbc_encrypt(in, out, (long)inl, |
|
1159 - &data(ctx)->ks1, &data(ctx)->ks2, |
|
1160 - &data(ctx)->ks3, (DES_cblock *)ctx->iv, |
|
1161 - ctx->encrypt); |
|
1162 + &dat->ks1, &dat->ks2, &dat->ks3, |
|
1163 + (DES_cblock *)ctx->iv, ctx->encrypt); |
|
1164 return 1; |
|
1165 } |
|
1166 |
|
1167 @@ -215,39 +239,58 @@ static int des_ede3_cfb8_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, |
|
1168 } |
|
1169 |
|
1170 BLOCK_CIPHER_defs(des_ede, DES_EDE_KEY, NID_des_ede, 8, 16, 8, 64, |
|
1171 - EVP_CIPH_RAND_KEY, des_ede_init_key, NULL, |
|
1172 - EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des3_ctrl) |
|
1173 + EVP_CIPH_RAND_KEY|EVP_CIPH_FLAG_DEFAULT_ASN1, |
|
1174 + des_ede_init_key, NULL, NULL, NULL, |
|
1175 + des3_ctrl) |
|
1176 # define des_ede3_cfb64_cipher des_ede_cfb64_cipher |
|
1177 # define des_ede3_ofb_cipher des_ede_ofb_cipher |
|
1178 # define des_ede3_cbc_cipher des_ede_cbc_cipher |
|
1179 # define des_ede3_ecb_cipher des_ede_ecb_cipher |
|
1180 BLOCK_CIPHER_defs(des_ede3, DES_EDE_KEY, NID_des_ede3, 8, 24, 8, 64, |
|
1181 - EVP_CIPH_RAND_KEY, des_ede3_init_key, NULL, |
|
1182 - EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des3_ctrl) |
|
1183 + EVP_CIPH_RAND_KEY|EVP_CIPH_FLAG_FIPS|EVP_CIPH_FLAG_DEFAULT_ASN1, |
|
1184 + des_ede3_init_key, NULL, NULL, NULL, |
|
1185 + des3_ctrl) |
|
1186 |
|
1187 BLOCK_CIPHER_def_cfb(des_ede3, DES_EDE_KEY, NID_des_ede3, 24, 8, 1, |
|
1188 - EVP_CIPH_RAND_KEY, des_ede3_init_key, NULL, |
|
1189 - EVP_CIPHER_set_asn1_iv, |
|
1190 - EVP_CIPHER_get_asn1_iv, des3_ctrl) |
|
1191 + EVP_CIPH_RAND_KEY|EVP_CIPH_FLAG_FIPS|EVP_CIPH_FLAG_DEFAULT_ASN1, |
|
1192 + des_ede3_init_key, NULL, NULL, NULL, |
|
1193 + des3_ctrl) |
|
1194 |
|
1195 BLOCK_CIPHER_def_cfb(des_ede3, DES_EDE_KEY, NID_des_ede3, 24, 8, 8, |
|
1196 - EVP_CIPH_RAND_KEY, des_ede3_init_key, NULL, |
|
1197 - EVP_CIPHER_set_asn1_iv, |
|
1198 - EVP_CIPHER_get_asn1_iv, des3_ctrl) |
|
1199 + EVP_CIPH_RAND_KEY|EVP_CIPH_FLAG_FIPS|EVP_CIPH_FLAG_DEFAULT_ASN1, |
|
1200 + des_ede3_init_key, NULL, NULL, NULL, |
|
1201 + des3_ctrl) |
|
1202 |
|
1203 static int des_ede_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
|
1204 const unsigned char *iv, int enc) |
|
1205 { |
|
1206 DES_cblock *deskey = (DES_cblock *)key; |
|
1207 + DES_EDE_KEY *dat = data(ctx); |
|
1208 + |
|
1209 + dat->stream.cbc = NULL; |
|
1210 +#if defined(SPARC_DES_CAPABLE) |
|
1211 + if (SPARC_DES_CAPABLE) { |
|
1212 + int mode = ctx->cipher->flags & EVP_CIPH_MODE; |
|
1213 + |
|
1214 + if (mode == EVP_CIPH_CBC_MODE) { |
|
1215 + des_t4_key_expand(&deskey[0],&dat->ks1); |
|
1216 + des_t4_key_expand(&deskey[1],&dat->ks2); |
|
1217 + memcpy(&dat->ks3,&dat->ks1,sizeof(dat->ks1)); |
|
1218 + dat->stream.cbc = enc ? des_t4_ede3_cbc_encrypt : |
|
1219 + des_t4_ede3_cbc_decrypt; |
|
1220 + return 1; |
|
1221 + } |
|
1222 + } |
|
1223 +#endif |
|
1224 # ifdef EVP_CHECK_DES_KEY |
|
1225 - if (DES_set_key_checked(&deskey[0], &data(ctx)->ks1) |
|
1226 - ! !DES_set_key_checked(&deskey[1], &data(ctx)->ks2)) |
|
1227 + if (DES_set_key_checked(&deskey[0],&dat->ks1) |
|
1228 + !! DES_set_key_checked(&deskey[1],&dat->ks2)) |
|
1229 return 0; |
|
1230 # else |
|
1231 - DES_set_key_unchecked(&deskey[0], &data(ctx)->ks1); |
|
1232 - DES_set_key_unchecked(&deskey[1], &data(ctx)->ks2); |
|
1233 + DES_set_key_unchecked(&deskey[0],&dat->ks1); |
|
1234 + DES_set_key_unchecked(&deskey[1],&dat->ks2); |
|
1235 # endif |
|
1236 - memcpy(&data(ctx)->ks3, &data(ctx)->ks1, sizeof(data(ctx)->ks1)); |
|
1237 + memcpy(&dat->ks3,&dat->ks1, sizeof(dat->ks1)); |
|
1238 return 1; |
|
1239 } |
|
1240 |
|
1241 @@ -255,6 +298,8 @@ static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
|
1242 const unsigned char *iv, int enc) |
|
1243 { |
|
1244 DES_cblock *deskey = (DES_cblock *)key; |
|
1245 + DES_EDE_KEY *dat = data(ctx); |
|
1246 + |
|
1247 # ifdef KSSL_DEBUG |
|
1248 { |
|
1249 int i; |
|
1250 @@ -272,15 +317,30 @@ static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
|
1251 } |
|
1252 # endif /* KSSL_DEBUG */ |
|
1253 |
|
1254 + dat->stream.cbc = NULL; |
|
1255 +#if defined(SPARC_DES_CAPABLE) |
|
1256 + if (SPARC_DES_CAPABLE) { |
|
1257 + int mode = ctx->cipher->flags & EVP_CIPH_MODE; |
|
1258 + |
|
1259 + if (mode == EVP_CIPH_CBC_MODE) { |
|
1260 + des_t4_key_expand(&deskey[0],&dat->ks1); |
|
1261 + des_t4_key_expand(&deskey[1],&dat->ks2); |
|
1262 + des_t4_key_expand(&deskey[2],&dat->ks3); |
|
1263 + dat->stream.cbc = enc ? des_t4_ede3_cbc_encrypt : |
|
1264 + des_t4_ede3_cbc_decrypt; |
|
1265 + return 1; |
|
1266 + } |
|
1267 + } |
|
1268 +#endif |
|
1269 # ifdef EVP_CHECK_DES_KEY |
|
1270 - if (DES_set_key_checked(&deskey[0], &data(ctx)->ks1) |
|
1271 - || DES_set_key_checked(&deskey[1], &data(ctx)->ks2) |
|
1272 - || DES_set_key_checked(&deskey[2], &data(ctx)->ks3)) |
|
1273 + if (DES_set_key_checked(&deskey[0],&dat->ks1) |
|
1274 + || DES_set_key_checked(&deskey[1],&dat->ks2) |
|
1275 + || DES_set_key_checked(&deskey[2],&dat->ks3)) |
|
1276 return 0; |
|
1277 # else |
|
1278 - DES_set_key_unchecked(&deskey[0], &data(ctx)->ks1); |
|
1279 - DES_set_key_unchecked(&deskey[1], &data(ctx)->ks2); |
|
1280 - DES_set_key_unchecked(&deskey[2], &data(ctx)->ks3); |
|
1281 + DES_set_key_unchecked(&deskey[0],&dat->ks1); |
|
1282 + DES_set_key_unchecked(&deskey[1],&dat->ks2); |
|
1283 + DES_set_key_unchecked(&deskey[2],&dat->ks3); |
|
1284 # endif |
|
1285 return 1; |
|
1286 } |
|
1287 Index: openssl/crypto/bn/Makefile |
|
1288 =================================================================== |
|
1289 diff -ru openssl-1.0.1e/crypto/bn/Makefile openssl-1.0.1e/crypto/bn/Makefile.new |
|
1290 --- openssl-1.0.1e/crypto/bn/Makefile 2011-05-24 17:02:24.000000000 -0700 |
|
1291 +++ openssl-1.0.1e/crypto/bn/Makefile 2011-07-27 10:48:17.817470000 -0700 |
|
1292 @@ -77,6 +77,12 @@ |
|
1293 $(PERL) asm/sparcv9a-mont.pl $(CFLAGS) > $@ |
|
1294 sparcv9-mont.s: asm/sparcv9-mont.pl |
|
1295 $(PERL) asm/sparcv9-mont.pl $(CFLAGS) > $@ |
|
1296 +vis3-mont.s: asm/vis3-mont.pl |
|
1297 + $(PERL) asm/vis3-mont.pl $(CFLAGS) > $@ |
|
1298 +sparct4-mont.S: asm/sparct4-mont.pl |
|
1299 + $(PERL) asm/sparct4-mont.pl $(CFLAGS) > $@ |
|
1300 +sparcv9-gf2m.S: asm/sparcv9-gf2m.pl |
|
1301 + $(PERL) asm/sparcv9-gf2m.pl $(CFLAGS) > $@ |
|
1302 |
|
1303 bn-mips3.o: asm/mips3.s |
|
1304 @if [ "$(CC)" = "gcc" ]; then \ |
|
1305 Index: openssl/crypto/bn/bn_exp.c |
|
1306 =================================================================== |
|
1307 diff -ru openssl-1.0.1e/crypto/bn/bn_exp.c openssl-1.0.1e/crypto/bn/bn_exp.c.new |
|
1308 --- bn_exp.c 2011/10/29 19:25:13 1.38 |
|
1309 +++ bn_exp.c 2012/11/17 10:34:11 1.39 |
|
1310 @@ -122,8 +122,15 @@ |
|
1311 # ifndef alloca |
|
1312 # define alloca(s) __builtin_alloca((s)) |
|
1313 # endif |
|
1314 +#else |
|
1315 +#include <alloca.h> |
|
1316 #endif |
|
1317 |
|
1318 +#if defined(OPENSSL_BN_ASM_MONT) && defined(__sparc) |
|
1319 +# include "sparc_arch.h" |
|
1320 +extern unsigned int OPENSSL_sparcv9cap_P[]; |
|
1321 +#endif |
|
1322 + |
|
1323 /* maximum precomputation table size for *variable* sliding windows */ |
|
1324 #define TABLE_SIZE 32 |
|
1325 |
|
1326 @@ -464,8 +471,16 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, |
|
1327 wstart = bits - 1; /* The top bit of the window */ |
|
1328 wend = 0; /* The bottom bit of the window */ |
|
1329 |
|
1330 +#if 1 /* by Shay Gueron's suggestion */ |
|
1331 + j = mont->N.top; /* borrow j */ |
|
1332 + if (bn_wexpand(r,j) == NULL) goto err; |
|
1333 + r->d[0] = (0-m->d[0])&BN_MASK2; /* 2^(top*BN_BITS2) - m */ |
|
1334 + for(i=1;i<j;i++) r->d[i] = (~m->d[i])&BN_MASK2; |
|
1335 + r->top = j; |
|
1336 +#else |
|
1337 if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) |
|
1338 goto err; |
|
1339 +#endif |
|
1340 for (;;) { |
|
1341 if (BN_is_bit_set(p, wstart) == 0) { |
|
1342 if (!start) { |
|
1343 @@ -515,6 +530,17 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, |
|
1344 if (wstart < 0) |
|
1345 break; |
|
1346 } |
|
1347 +#if defined(OPENSSL_BN_ASM_MONT) && (defined(__sparc__) || defined(__sparc)) |
|
1348 + if (OPENSSL_sparcv9cap_P[0] & (SPARCV9_VIS3|SPARCV9_PREFER_FPU)) { |
|
1349 + j = mont->N.top; /* borrow j */ |
|
1350 + val[0]->d[0] = 1; /* borrow val[0] */ |
|
1351 + for (i=1;i<j;i++) |
|
1352 + val[0]->d[i] = 0; |
|
1353 + val[0]->top = j; |
|
1354 + if (!BN_mod_mul_montgomery(rr, r, val[0], mont, ctx)) |
|
1355 + goto err; |
|
1356 + } else |
|
1357 +#endif |
|
1358 if (!BN_from_montgomery(rr, r, mont, ctx)) |
|
1359 goto err; |
|
1360 ret = 1; |
|
1361 @@ -526,6 +552,26 @@ err: |
|
1362 return (ret); |
|
1363 } |
|
1364 |
|
1365 +#if defined(OPENSSL_BN_ASM_MONT) && (defined(__sparc__) || defined(__sparc)) |
|
1366 +static BN_ULONG bn_get_bits(const BIGNUM *a, int bitpos) { |
|
1367 + BN_ULONG ret = 0; |
|
1368 + int wordpos; |
|
1369 + |
|
1370 + wordpos = bitpos / BN_BITS2; |
|
1371 + bitpos %= BN_BITS2; |
|
1372 + if (wordpos>=0 && wordpos < a->top) { |
|
1373 + ret = a->d[wordpos]&BN_MASK2; |
|
1374 + if (bitpos) { |
|
1375 + ret >>= bitpos; |
|
1376 + if (++wordpos < a->top) |
|
1377 + ret |= a->d[wordpos]<<(BN_BITS2-bitpos); |
|
1378 + } |
|
1379 + } |
|
1380 + |
|
1381 + return ret & BN_MASK2; |
|
1382 +} |
|
1383 +#endif |
|
1384 + |
|
1385 /* |
|
1386 * BN_mod_exp_mont_consttime() stores the precomputed powers in a specific |
|
1387 * layout so that accessing any of these table values shows the same access |
|
1388 @@ -594,6 +640,9 @@ |
|
1389 int powerbufLen = 0; |
|
1390 unsigned char *powerbuf = NULL; |
|
1391 BIGNUM tmp, am; |
|
1392 +#if defined(OPENSSL_BN_ASM_MONT) && defined(__sparc) |
|
1393 + unsigned int t4=0; |
|
1394 +#endif |
|
1395 |
|
1396 bn_check_top(a); |
|
1397 bn_check_top(p); |
|
1398 @@ -628,10 +677,18 @@ |
|
1399 |
|
1400 /* Get the window size to use with size of p. */ |
|
1401 window = BN_window_bits_for_ctime_exponent_size(bits); |
|
1402 +#if defined(OPENSSL_BN_ASM_MONT) && defined(__sparc) |
|
1403 + if (window>=5 && (top&15)==0 && top<=64 && |
|
1404 + (OPENSSL_sparcv9cap_P[1]&(CFR_MONTMUL|CFR_MONTSQR))== |
|
1405 + (CFR_MONTMUL|CFR_MONTSQR) && (t4=OPENSSL_sparcv9cap_P[0])) |
|
1406 + window=5; |
|
1407 + else |
|
1408 +#endif |
|
1409 #if defined(OPENSSL_BN_ASM_MONT5) |
|
1410 if (window == 6 && bits <= 1024) |
|
1411 window = 5; /* ~5% improvement of 2048-bit RSA sign */ |
|
1412 #endif |
|
1413 + (void) 0; |
|
1414 |
|
1415 /* |
|
1416 * Allocate a buffer large enough to hold all of the pre-computed powers |
|
1417 @@ -670,14 +727,14 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, |
|
1418 tmp.flags = am.flags = BN_FLG_STATIC_DATA; |
|
1419 |
|
1420 /* prepare a^0 in Montgomery domain */ |
|
1421 -#if 1 |
|
1422 - if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx)) |
|
1423 - goto err; |
|
1424 -#else |
|
1425 +#if 1 /* by Shay Gueron's suggestion */ |
|
1426 tmp.d[0] = (0 - m->d[0]) & BN_MASK2; /* 2^(top*BN_BITS2) - m */ |
|
1427 for (i = 1; i < top; i++) |
|
1428 tmp.d[i] = (~m->d[i]) & BN_MASK2; |
|
1429 tmp.top = top; |
|
1430 +#else |
|
1431 + if (!BN_to_montgomery(&tmp,BN_value_one(),mont,ctx)) |
|
1432 + goto err; |
|
1433 #endif |
|
1434 |
|
1435 /* prepare a^1 in Montgomery domain */ |
|
1436 @@ -689,6 +746,122 @@ |
|
1437 } else if (!BN_to_montgomery(&am, a, mont, ctx)) |
|
1438 goto err; |
|
1439 |
|
1440 +#if defined(OPENSSL_BN_ASM_MONT) && defined(__sparc) |
|
1441 + if (t4) { |
|
1442 + typedef int (*bn_pwr5_mont_f)(BN_ULONG *tp,const BN_ULONG *np, |
|
1443 + const BN_ULONG *n0,const void *table,int power,int bits); |
|
1444 + int bn_pwr5_mont_t4_8(BN_ULONG *tp,const BN_ULONG *np, |
|
1445 + const BN_ULONG *n0,const void *table,int power,int bits); |
|
1446 + int bn_pwr5_mont_t4_16(BN_ULONG *tp,const BN_ULONG *np, |
|
1447 + const BN_ULONG *n0,const void *table,int power,int bits); |
|
1448 + int bn_pwr5_mont_t4_24(BN_ULONG *tp,const BN_ULONG *np, |
|
1449 + const BN_ULONG *n0,const void *table,int power,int bits); |
|
1450 + int bn_pwr5_mont_t4_32(BN_ULONG *tp,const BN_ULONG *np, |
|
1451 + const BN_ULONG *n0,const void *table,int power,int bits); |
|
1452 + static const bn_pwr5_mont_f pwr5_funcs[4] = { |
|
1453 + bn_pwr5_mont_t4_8, bn_pwr5_mont_t4_16, |
|
1454 + bn_pwr5_mont_t4_24, bn_pwr5_mont_t4_32 }; |
|
1455 + bn_pwr5_mont_f pwr5_worker = pwr5_funcs[top/16-1]; |
|
1456 + |
|
1457 + typedef int (*bn_mul_mont_f)(BN_ULONG *rp,const BN_ULONG *ap, |
|
1458 + const void *bp,const BN_ULONG *np,const BN_ULONG *n0); |
|
1459 + int bn_mul_mont_t4_8(BN_ULONG *rp,const BN_ULONG *ap, |
|
1460 + const void *bp,const BN_ULONG *np,const BN_ULONG *n0); |
|
1461 + int bn_mul_mont_t4_16(BN_ULONG *rp,const BN_ULONG *ap, |
|
1462 + const void *bp,const BN_ULONG *np,const BN_ULONG *n0); |
|
1463 + int bn_mul_mont_t4_24(BN_ULONG *rp,const BN_ULONG *ap, |
|
1464 + const void *bp,const BN_ULONG *np,const BN_ULONG *n0); |
|
1465 + int bn_mul_mont_t4_32(BN_ULONG *rp,const BN_ULONG *ap, |
|
1466 + const void *bp,const BN_ULONG *np,const BN_ULONG *n0); |
|
1467 + static const bn_mul_mont_f mul_funcs[4] = { |
|
1468 + bn_mul_mont_t4_8, bn_mul_mont_t4_16, |
|
1469 + bn_mul_mont_t4_24, bn_mul_mont_t4_32 }; |
|
1470 + bn_mul_mont_f mul_worker = mul_funcs[top/16-1]; |
|
1471 + |
|
1472 + void bn_mul_mont_vis3(BN_ULONG *rp,const BN_ULONG *ap, |
|
1473 + const void *bp,const BN_ULONG *np, |
|
1474 + const BN_ULONG *n0,int num); |
|
1475 + void bn_mul_mont_t4(BN_ULONG *rp,const BN_ULONG *ap, |
|
1476 + const void *bp,const BN_ULONG *np, |
|
1477 + const BN_ULONG *n0,int num); |
|
1478 + void bn_mul_mont_gather5_t4(BN_ULONG *rp,const BN_ULONG *ap, |
|
1479 + const void *table,const BN_ULONG *np, |
|
1480 + const BN_ULONG *n0,int num,int power); |
|
1481 + void bn_flip_n_scatter5_t4(const BN_ULONG *inp,size_t num, |
|
1482 + void *table,size_t power); |
|
1483 + void bn_gather5_t4(BN_ULONG *out,size_t num, |
|
1484 + void *table,size_t power); |
|
1485 + void bn_flip_t4(BN_ULONG *dst,BN_ULONG *src,size_t num); |
|
1486 + |
|
1487 + BN_ULONG *np=mont->N.d, *n0=mont->n0; |
|
1488 + int stride = 5*(6-(top/16-1)); /* multiple of 5, but less than 32 */ |
|
1489 + |
|
1490 + /* |
|
1491 + * BN_to_montgomery can contaminate words above .top |
|
1492 + * [in BN_DEBUG[_DEBUG] build]... |
|
1493 + */ |
|
1494 + for (i=am.top; i<top; i++) am.d[i]=0; |
|
1495 + for (i=tmp.top; i<top; i++) tmp.d[i]=0; |
|
1496 + |
|
1497 + bn_flip_n_scatter5_t4(tmp.d,top,powerbuf,0); |
|
1498 + bn_flip_n_scatter5_t4(am.d,top,powerbuf,1); |
|
1499 + if (!(*mul_worker)(tmp.d,am.d,am.d,np,n0) && |
|
1500 + !(*mul_worker)(tmp.d,am.d,am.d,np,n0)) |
|
1501 + bn_mul_mont_vis3(tmp.d,am.d,am.d,np,n0,top); |
|
1502 + bn_flip_n_scatter5_t4(tmp.d,top,powerbuf,2); |
|
1503 + |
|
1504 + for (i=3; i<32; i++) { |
|
1505 + /* Calculate a^i = a^(i-1) * a */ |
|
1506 + if (!(*mul_worker)(tmp.d,tmp.d,am.d,np,n0) && |
|
1507 + !(*mul_worker)(tmp.d,tmp.d,am.d,np,n0)) |
|
1508 + bn_mul_mont_vis3(tmp.d,tmp.d,am.d,np,n0,top); |
|
1509 + bn_flip_n_scatter5_t4(tmp.d,top,powerbuf,i); |
|
1510 + } |
|
1511 + |
|
1512 + /* switch to 64-bit domain */ |
|
1513 + np = alloca(top*sizeof(BN_ULONG)); |
|
1514 + top /= 2; |
|
1515 + bn_flip_t4(np,mont->N.d,top); |
|
1516 + |
|
1517 + bits--; |
|
1518 + for (wvalue=0, i=bits%5; i>=0; i--,bits--) |
|
1519 + wvalue = (wvalue<<1)+BN_is_bit_set(p,bits); |
|
1520 + bn_gather5_t4(tmp.d,top,powerbuf,wvalue); |
|
1521 + |
|
1522 + /* Scan the exponent one window at a time starting from the most |
|
1523 + * significant bits. |
|
1524 + */ |
|
1525 + while (bits >= 0) { |
|
1526 + if (bits < stride) |
|
1527 + stride = bits+1; |
|
1528 + bits -= stride; |
|
1529 + wvalue = (bn_get_bits(p,bits+1)); |
|
1530 + |
|
1531 + if ((*pwr5_worker)(tmp.d,np,n0,powerbuf,wvalue,stride)) |
|
1532 + continue; |
|
1533 + /* retry once and fall back */ |
|
1534 + if ((*pwr5_worker)(tmp.d,np,n0,powerbuf,wvalue,stride)) |
|
1535 + continue; |
|
1536 + |
|
1537 + bits += stride-5; |
|
1538 + wvalue >>= stride-5; |
|
1539 + wvalue &= 31; |
|
1540 + bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top); |
|
1541 + bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top); |
|
1542 + bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top); |
|
1543 + bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top); |
|
1544 + bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top); |
|
1545 + bn_mul_mont_gather5_t4(tmp.d,tmp.d,powerbuf,np,n0,top,wvalue); |
|
1546 + } |
|
1547 + |
|
1548 + bn_flip_t4(tmp.d,tmp.d,top); |
|
1549 + top *= 2; |
|
1550 + /* back to 32-bit domain */ |
|
1551 + tmp.top=top; |
|
1552 + bn_correct_top(&tmp); |
|
1553 + OPENSSL_cleanse(np,top*sizeof(BN_ULONG)); |
|
1554 + } else |
|
1555 +#endif |
|
1556 #if defined(OPENSSL_BN_ASM_MONT5) |
|
1557 if (window == 5 && top > 1) { |
|
1558 /* |
|
1559 @@ -844,6 +1017,15 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, |
|
1560 } |
|
1561 |
|
1562 /* Convert the final result from montgomery to standard format */ |
|
1563 +#if defined(OPENSSL_BN_ASM_MONT) && (defined(__sparc__) || defined(__sparc)) |
|
1564 + if (OPENSSL_sparcv9cap_P[0] & (SPARCV9_VIS3|SPARCV9_PREFER_FPU)) { |
|
1565 + am.d[0] = 1; /* borrow am */ |
|
1566 + for (i = 1; i < top; i++) |
|
1567 + am.d[i] = 0; |
|
1568 + if (!BN_mod_mul_montgomery(rr,&tmp,&am,mont,ctx)) |
|
1569 + goto err; |
|
1570 + } else |
|
1571 +#endif |
|
1572 if (!BN_from_montgomery(rr, &tmp, mont, ctx)) |
|
1573 goto err; |
|
1574 ret = 1; |
|
1575 Index: openssl/apps/speed.c |
|
1576 =================================================================== |
|
1577 diff -ru openssl-1.0.1e/apps/spped.c openssl-1.0.1e/apps/speed.c |
|
1578 --- openssl-1.0.1e/apps/speed.c 2011-05-24 17:02:24.000000000 -0700 |
|
1579 +++ openssl-1.0.1e/apps/spped.c 2011-07-27 10:48:17.817470000 -0700 |
|
1580 @@ -1586,8 +1586,7 @@ |
|
1581 print_message(names[D_MD5], c[D_MD5][j], lengths[j]); |
|
1582 Time_F(START); |
|
1583 for (count = 0, run = 1; COND(c[D_MD5][j]); count++) |
|
1584 - EVP_Digest(&(buf[0]), (unsigned long)lengths[j], &(md5[0]), |
|
1585 - NULL, EVP_get_digestbyname("md5"), NULL); |
|
1586 + MD5(buf, lengths[j], md5); |
|
1587 d = Time_F(STOP); |
|
1588 print_result(D_MD5, j, count, d); |
|
1589 } |
|
1590 @@ -1622,8 +1621,7 @@ |
|
1591 print_message(names[D_SHA1], c[D_SHA1][j], lengths[j]); |
|
1592 Time_F(START); |
|
1593 for (count = 0, run = 1; COND(c[D_SHA1][j]); count++) |
|
1594 - EVP_Digest(buf, (unsigned long)lengths[j], &(sha[0]), NULL, |
|
1595 - EVP_sha1(), NULL); |
|
1596 + SHA1(buf, lengths[j], sha); |
|
1597 d = Time_F(STOP); |
|
1598 print_result(D_SHA1, j, count, d); |
|
1599 } |
|
1600 Index: openssl/crypto/aes/Makefile |
|
1601 =================================================================== |
|
1602 --- Makefile Thu May 2 13:42:37 2013 |
|
1603 +++ Makefile.orig Thu May 2 13:41:51 2013 |
|
1604 @@ -69,6 +69,9 @@ |
|
1605 aes-sparcv9.s: asm/aes-sparcv9.pl |
|
1606 $(PERL) asm/aes-sparcv9.pl $(CFLAGS) > $@ |
|
1607 |
|
1608 +aest4-sparcv9.s: asm/aest4-sparcv9.pl |
|
1609 + $(PERL) asm/aest4-sparcv9.pl $(CFLAGS) > $@ |
|
1610 + |
|
1611 aes-ppc.s: asm/aes-ppc.pl |
|
1612 $(PERL) asm/aes-ppc.pl $(PERLASM_SCHEME) $@ |
|
1613 |
|
1614 Index: openssl/crypto/evp/e_aes.c |
|
1615 =================================================================== |
|
1616 --- e_aes.c Mon Feb 11 07:26:04 2013 |
|
1617 +++ e_aes.c.56 Thu May 2 14:26:35 2013 |
|
1618 @@ -56,12 +58,11 @@ |
|
1619 # include <assert.h> |
|
1620 # include <openssl/aes.h> |
|
1621 # include "evp_locl.h" |
|
1622 -# ifndef OPENSSL_FIPS |
|
1623 # include "modes_lcl.h" |
|
1624 # include <openssl/rand.h> |
|
1625 |
|
1626 typedef struct { |
|
1627 - AES_KEY ks; |
|
1628 + union { double align; AES_KEY ks; } ks; |
|
1629 block128_f block; |
|
1630 union { |
|
1631 cbc128_f cbc; |
|
1632 @@ -70,7 +69,7 @@ |
|
1633 } EVP_AES_KEY; |
|
1634 |
|
1635 typedef struct { |
|
1636 - AES_KEY ks; /* AES key schedule to use */ |
|
1637 + union { double align; AES_KEY ks; } ks; /* AES key schedule to use */ |
|
1638 int key_set; /* Set if key initialised */ |
|
1639 int iv_set; /* Set if an iv is set */ |
|
1640 GCM128_CONTEXT gcm; |
|
1641 @@ -83,7 +82,7 @@ |
|
1642 } EVP_AES_GCM_CTX; |
|
1643 |
|
1644 typedef struct { |
|
1645 - AES_KEY ks1, ks2; /* AES key schedules to use */ |
|
1646 + union { double align; AES_KEY ks; } ks1, ks2; /* AES key schedules to use */ |
|
1647 XTS128_CONTEXT xts; |
|
1648 void (*stream) (const unsigned char *in, |
|
1649 unsigned char *out, size_t length, |
|
1650 @@ -92,7 +91,7 @@ |
|
1651 } EVP_AES_XTS_CTX; |
|
1652 |
|
1653 typedef struct { |
|
1654 - AES_KEY ks; /* AES key schedule to use */ |
|
1655 + union { double align; AES_KEY ks; } ks; /* AES key schedule to use */ |
|
1656 int key_set; /* Set if key initialised */ |
|
1657 int iv_set; /* Set if an iv is set */ |
|
1658 int tag_set; /* Set if tag is valid */ |
|
1659 @@ -155,7 +154,7 @@ |
|
1660 defined(_M_AMD64) || defined(_M_X64) || \ |
|
1661 defined(__INTEL__) ) |
|
1662 |
|
1663 -extern unsigned int OPENSSL_ia32cap_P[2]; |
|
1664 +extern unsigned int OPENSSL_ia32cap_P[]; |
|
1665 |
|
1666 # ifdef VPAES_ASM |
|
1667 # define VPAES_CAPABLE (OPENSSL_ia32cap_P[1]&(1<<(41-32))) |
|
1668 @@ -297,7 +296,7 @@ |
|
1669 if (!iv && !key) |
|
1670 return 1; |
|
1671 if (key) { |
|
1672 - aesni_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks); |
|
1673 + aesni_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks); |
|
1674 CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, (block128_f) aesni_encrypt); |
|
1675 gctx->ctr = (ctr128_f) aesni_ctr32_encrypt_blocks; |
|
1676 /* |
|
1677 @@ -336,17 +335,17 @@ |
|
1678 if (key) { |
|
1679 /* key_len is two AES keys */ |
|
1680 if (enc) { |
|
1681 - aesni_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1); |
|
1682 + aesni_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); |
|
1683 xctx->xts.block1 = (block128_f) aesni_encrypt; |
|
1684 xctx->stream = aesni_xts_encrypt; |
|
1685 } else { |
|
1686 - aesni_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1); |
|
1687 + aesni_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); |
|
1688 xctx->xts.block1 = (block128_f) aesni_decrypt; |
|
1689 xctx->stream = aesni_xts_decrypt; |
|
1690 } |
|
1691 |
|
1692 aesni_set_encrypt_key(key + ctx->key_len / 2, |
|
1693 - ctx->key_len * 4, &xctx->ks2); |
|
1694 + ctx->key_len * 4, &xctx->ks2.ks); |
|
1695 xctx->xts.block2 = (block128_f) aesni_encrypt; |
|
1696 |
|
1697 xctx->xts.key1 = &xctx->ks1; |
|
1698 @@ -371,7 +370,7 @@ |
|
1699 if (!iv && !key) |
|
1700 return 1; |
|
1701 if (key) { |
|
1702 - aesni_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks); |
|
1703 + aesni_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks); |
|
1704 CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, |
|
1705 &cctx->ks, (block128_f) aesni_encrypt); |
|
1706 cctx->str = enc ? (ccm128_f) aesni_ccm64_encrypt_blocks : |
|
1707 @@ -432,6 +431,364 @@ |
|
1708 const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \ |
|
1709 { return AESNI_CAPABLE?&aesni_##keylen##_##mode:&aes_##keylen##_##mode; } |
|
1710 |
|
1711 +#elif defined(AES_ASM) && (defined(__sparc) || defined(__sparc__)) |
|
1712 + |
|
1713 +#include "sparc_arch.h" |
|
1714 + |
|
1715 +extern unsigned int OPENSSL_sparcv9cap_P[]; |
|
1716 + |
|
1717 +#define SPARC_AES_CAPABLE (OPENSSL_sparcv9cap_P[1] & CFR_AES) |
|
1718 + |
|
1719 +void aes_t4_set_encrypt_key (const unsigned char *key, int bits, |
|
1720 + AES_KEY *ks); |
|
1721 +void aes_t4_set_decrypt_key (const unsigned char *key, int bits, |
|
1722 + AES_KEY *ks); |
|
1723 +void aes_t4_encrypt (const unsigned char *in, unsigned char *out, |
|
1724 + const AES_KEY *key); |
|
1725 +void aes_t4_decrypt (const unsigned char *in, unsigned char *out, |
|
1726 + const AES_KEY *key); |
|
1727 +/* |
|
1728 + * Key-length specific subroutines were chosen for following reason. |
|
1729 + * Each SPARC T4 core can execute up to 8 threads which share core's |
|
1730 + * resources. Loading as much key material to registers allows to |
|
1731 + * minimize references to shared memory interface, as well as amount |
|
1732 + * of instructions in inner loops [much needed on T4]. But then having |
|
1733 + * non-key-length specific routines would require conditional branches |
|
1734 + * either in inner loops or on subroutines' entries. Former is hardly |
|
1735 + * acceptable, while latter means code size increase to size occupied |
|
1736 + * by multiple key-length specfic subroutines, so why fight? |
|
1737 + */ |
|
1738 +void aes128_t4_cbc_encrypt (const unsigned char *in, unsigned char *out, |
|
1739 + size_t len, const AES_KEY *key, |
|
1740 + unsigned char *ivec); |
|
1741 +void aes128_t4_cbc_decrypt (const unsigned char *in, unsigned char *out, |
|
1742 + size_t len, const AES_KEY *key, |
|
1743 + unsigned char *ivec); |
|
1744 +void aes192_t4_cbc_encrypt (const unsigned char *in, unsigned char *out, |
|
1745 + size_t len, const AES_KEY *key, |
|
1746 + unsigned char *ivec); |
|
1747 +void aes192_t4_cbc_decrypt (const unsigned char *in, unsigned char *out, |
|
1748 + size_t len, const AES_KEY *key, |
|
1749 + unsigned char *ivec); |
|
1750 +void aes256_t4_cbc_encrypt (const unsigned char *in, unsigned char *out, |
|
1751 + size_t len, const AES_KEY *key, |
|
1752 + unsigned char *ivec); |
|
1753 +void aes256_t4_cbc_decrypt (const unsigned char *in, unsigned char *out, |
|
1754 + size_t len, const AES_KEY *key, |
|
1755 + unsigned char *ivec); |
|
1756 +void aes128_t4_ctr32_encrypt (const unsigned char *in, unsigned char *out, |
|
1757 + size_t blocks, const AES_KEY *key, |
|
1758 + unsigned char *ivec); |
|
1759 +void aes192_t4_ctr32_encrypt (const unsigned char *in, unsigned char *out, |
|
1760 + size_t blocks, const AES_KEY *key, |
|
1761 + unsigned char *ivec); |
|
1762 +void aes256_t4_ctr32_encrypt (const unsigned char *in, unsigned char *out, |
|
1763 + size_t blocks, const AES_KEY *key, |
|
1764 + unsigned char *ivec); |
|
1765 + |
|
1766 +static int aes_t4_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
|
1767 + const unsigned char *iv, int enc) |
|
1768 +{ |
|
1769 + int ret, mode, bits; |
|
1770 + EVP_AES_KEY *dat = (EVP_AES_KEY *)ctx->cipher_data; |
|
1771 + |
|
1772 + mode = ctx->cipher->flags & EVP_CIPH_MODE; |
|
1773 + bits = ctx->key_len*8; |
|
1774 + if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE) && !enc) { |
|
1775 + ret = 0; |
|
1776 + aes_t4_set_decrypt_key(key, bits, ctx->cipher_data); |
|
1777 + dat->block = (block128_f)aes_t4_decrypt; |
|
1778 + switch (bits) { |
|
1779 + case 128: |
|
1780 + dat->stream.cbc = mode==EVP_CIPH_CBC_MODE ? |
|
1781 + (cbc128_f)aes128_t4_cbc_decrypt : |
|
1782 + NULL; |
|
1783 + break; |
|
1784 + case 192: |
|
1785 + dat->stream.cbc = mode==EVP_CIPH_CBC_MODE ? |
|
1786 + (cbc128_f)aes192_t4_cbc_decrypt : |
|
1787 + NULL; |
|
1788 + break; |
|
1789 + case 256: |
|
1790 + dat->stream.cbc = mode==EVP_CIPH_CBC_MODE ? |
|
1791 + (cbc128_f)aes256_t4_cbc_decrypt : |
|
1792 + NULL; |
|
1793 + break; |
|
1794 + default: |
|
1795 + ret = -1; |
|
1796 + } |
|
1797 + } else { |
|
1798 + ret = 0; |
|
1799 + aes_t4_set_encrypt_key(key, bits, ctx->cipher_data); |
|
1800 + dat->block = (block128_f)aes_t4_encrypt; |
|
1801 + switch (bits) { |
|
1802 + case 128: |
|
1803 + if (mode==EVP_CIPH_CBC_MODE) |
|
1804 + dat->stream.cbc = (cbc128_f)aes128_t4_cbc_encrypt; |
|
1805 + else if (mode==EVP_CIPH_CTR_MODE) |
|
1806 + dat->stream.ctr = (ctr128_f)aes128_t4_ctr32_encrypt; |
|
1807 + else |
|
1808 + dat->stream.cbc = NULL; |
|
1809 + break; |
|
1810 + case 192: |
|
1811 + if (mode==EVP_CIPH_CBC_MODE) |
|
1812 + dat->stream.cbc = (cbc128_f)aes192_t4_cbc_encrypt; |
|
1813 + else if (mode==EVP_CIPH_CTR_MODE) |
|
1814 + dat->stream.ctr = (ctr128_f)aes192_t4_ctr32_encrypt; |
|
1815 + else |
|
1816 + dat->stream.cbc = NULL; |
|
1817 + break; |
|
1818 + case 256: |
|
1819 + if (mode==EVP_CIPH_CBC_MODE) |
|
1820 + dat->stream.cbc = (cbc128_f)aes256_t4_cbc_encrypt; |
|
1821 + else if (mode==EVP_CIPH_CTR_MODE) |
|
1822 + dat->stream.ctr = (ctr128_f)aes256_t4_ctr32_encrypt; |
|
1823 + else |
|
1824 + dat->stream.cbc = NULL; |
|
1825 + break; |
|
1826 + default: |
|
1827 + ret = -1; |
|
1828 + } |
|
1829 + } |
|
1830 + |
|
1831 + if (ret < 0) { |
|
1832 + EVPerr(EVP_F_AES_T4_INIT_KEY,EVP_R_AES_KEY_SETUP_FAILED); |
|
1833 + return 0; |
|
1834 + } |
|
1835 + |
|
1836 + return 1; |
|
1837 +} |
|
1838 + |
|
1839 +#define aes_t4_cbc_cipher aes_cbc_cipher |
|
1840 +static int aes_t4_cbc_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out, |
|
1841 + const unsigned char *in, size_t len); |
|
1842 + |
|
1843 +#define aes_t4_ecb_cipher aes_ecb_cipher |
|
1844 +static int aes_t4_ecb_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out, |
|
1845 + const unsigned char *in, size_t len); |
|
1846 + |
|
1847 +#define aes_t4_ofb_cipher aes_ofb_cipher |
|
1848 +static int aes_t4_ofb_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out, |
|
1849 + const unsigned char *in,size_t len); |
|
1850 + |
|
1851 +#define aes_t4_cfb_cipher aes_cfb_cipher |
|
1852 +static int aes_t4_cfb_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out, |
|
1853 + const unsigned char *in,size_t len); |
|
1854 + |
|
1855 +#define aes_t4_cfb8_cipher aes_cfb8_cipher |
|
1856 +static int aes_t4_cfb8_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out, |
|
1857 + const unsigned char *in,size_t len); |
|
1858 + |
|
1859 +#define aes_t4_cfb1_cipher aes_cfb1_cipher |
|
1860 +static int aes_t4_cfb1_cipher(EVP_CIPHER_CTX *ctx,unsigned char *out, |
|
1861 + const unsigned char *in,size_t len); |
|
1862 + |
|
1863 +#define aes_t4_ctr_cipher aes_ctr_cipher |
|
1864 +static int aes_t4_ctr_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, |
|
1865 + const unsigned char *in, size_t len); |
|
1866 + |
|
1867 +static int aes_t4_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
|
1868 + const unsigned char *iv, int enc) |
|
1869 +{ |
|
1870 + EVP_AES_GCM_CTX *gctx = ctx->cipher_data; |
|
1871 + if (!iv && !key) |
|
1872 + return 1; |
|
1873 + if (key) { |
|
1874 + int bits = ctx->key_len * 8; |
|
1875 + aes_t4_set_encrypt_key(key, bits, &gctx->ks.ks); |
|
1876 + CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, |
|
1877 + (block128_f)aes_t4_encrypt); |
|
1878 + switch (bits) { |
|
1879 + case 128: |
|
1880 + gctx->ctr = (ctr128_f)aes128_t4_ctr32_encrypt; |
|
1881 + break; |
|
1882 + case 192: |
|
1883 + gctx->ctr = (ctr128_f)aes192_t4_ctr32_encrypt; |
|
1884 + break; |
|
1885 + case 256: |
|
1886 + gctx->ctr = (ctr128_f)aes256_t4_ctr32_encrypt; |
|
1887 + break; |
|
1888 + default: |
|
1889 + return 0; |
|
1890 + } |
|
1891 + /* If we have an iv can set it directly, otherwise use |
|
1892 + * saved IV. |
|
1893 + */ |
|
1894 + if (iv == NULL && gctx->iv_set) |
|
1895 + iv = gctx->iv; |
|
1896 + if (iv) { |
|
1897 + CRYPTO_gcm128_setiv(&gctx->gcm, iv, gctx->ivlen); |
|
1898 + gctx->iv_set = 1; |
|
1899 + } |
|
1900 + gctx->key_set = 1; |
|
1901 + } else { |
|
1902 + /* If key set use IV, otherwise copy */ |
|
1903 + if (gctx->key_set) |
|
1904 + CRYPTO_gcm128_setiv(&gctx->gcm, iv, gctx->ivlen); |
|
1905 + else |
|
1906 + memcpy(gctx->iv, iv, gctx->ivlen); |
|
1907 + gctx->iv_set = 1; |
|
1908 + gctx->iv_gen = 0; |
|
1909 + } |
|
1910 + return 1; |
|
1911 +} |
|
1912 + |
|
1913 +#define aes_t4_gcm_cipher aes_gcm_cipher |
|
1914 +static int aes_t4_gcm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, |
|
1915 + const unsigned char *in, size_t len); |
|
1916 + |
|
1917 +static int aes_t4_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
|
1918 + const unsigned char *iv, int enc) |
|
1919 +{ |
|
1920 + EVP_AES_XTS_CTX *xctx = ctx->cipher_data; |
|
1921 + if (!iv && !key) |
|
1922 + return 1; |
|
1923 + |
|
1924 + if (key) { |
|
1925 + int bits = ctx->key_len * 4; |
|
1926 + /* key_len is two AES keys */ |
|
1927 + if (enc) { |
|
1928 + aes_t4_set_encrypt_key(key, bits, &xctx->ks1.ks); |
|
1929 + xctx->xts.block1 = (block128_f)aes_t4_encrypt; |
|
1930 +#if 0 /* not yet */ |
|
1931 + switch (bits) { |
|
1932 + case 128: |
|
1933 + xctx->stream = aes128_t4_xts_encrypt; |
|
1934 + break; |
|
1935 + case 192: |
|
1936 + xctx->stream = aes192_t4_xts_encrypt; |
|
1937 + break; |
|
1938 + case 256: |
|
1939 + xctx->stream = aes256_t4_xts_encrypt; |
|
1940 + break; |
|
1941 + default: |
|
1942 + return 0; |
|
1943 + } |
|
1944 +#endif |
|
1945 + } else { |
|
1946 + aes_t4_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); |
|
1947 + xctx->xts.block1 = (block128_f)aes_t4_decrypt; |
|
1948 +#if 0 /* not yet */ |
|
1949 + switch (bits) { |
|
1950 + case 128: |
|
1951 + xctx->stream = aes128_t4_xts_decrypt; |
|
1952 + break; |
|
1953 + case 192: |
|
1954 + xctx->stream = aes192_t4_xts_decrypt; |
|
1955 + break; |
|
1956 + case 256: |
|
1957 + xctx->stream = aes256_t4_xts_decrypt; |
|
1958 + break; |
|
1959 + default: |
|
1960 + return 0; |
|
1961 + } |
|
1962 +#endif |
|
1963 + } |
|
1964 + |
|
1965 + aes_t4_set_encrypt_key(key + ctx->key_len/2, |
|
1966 + ctx->key_len * 4, &xctx->ks2.ks); |
|
1967 + xctx->xts.block2 = (block128_f)aes_t4_encrypt; |
|
1968 + |
|
1969 + xctx->xts.key1 = &xctx->ks1; |
|
1970 + } |
|
1971 + |
|
1972 + if (iv) { |
|
1973 + xctx->xts.key2 = &xctx->ks2; |
|
1974 + memcpy(ctx->iv, iv, 16); |
|
1975 + } |
|
1976 + |
|
1977 + return 1; |
|
1978 +} |
|
1979 + |
|
1980 +#define aes_t4_xts_cipher aes_xts_cipher |
|
1981 +static int aes_t4_xts_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, |
|
1982 + const unsigned char *in, size_t len); |
|
1983 + |
|
1984 +static int aes_t4_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
|
1985 + const unsigned char *iv, int enc) |
|
1986 +{ |
|
1987 + EVP_AES_CCM_CTX *cctx = ctx->cipher_data; |
|
1988 + if (!iv && !key) |
|
1989 + return 1; |
|
1990 + if (key) { |
|
1991 + int bits = ctx->key_len * 8; |
|
1992 + aes_t4_set_encrypt_key(key, bits, &cctx->ks.ks); |
|
1993 + CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, |
|
1994 + &cctx->ks, (block128_f)aes_t4_encrypt); |
|
1995 +#if 0 /* not yet */ |
|
1996 + switch (bits) { |
|
1997 + case 128: |
|
1998 + cctx->str = enc?(ccm128_f)aes128_t4_ccm64_encrypt : |
|
1999 + (ccm128_f)ae128_t4_ccm64_decrypt; |
|
2000 + break; |
|
2001 + case 192: |
|
2002 + cctx->str = enc?(ccm128_f)aes192_t4_ccm64_encrypt : |
|
2003 + (ccm128_f)ae192_t4_ccm64_decrypt; |
|
2004 + break; |
|
2005 + case 256: |
|
2006 + cctx->str = enc?(ccm128_f)aes256_t4_ccm64_encrypt : |
|
2007 + (ccm128_f)ae256_t4_ccm64_decrypt; |
|
2008 + break; |
|
2009 + default: |
|
2010 + return 0; |
|
2011 + } |
|
2012 +#endif |
|
2013 + cctx->key_set = 1; |
|
2014 + } |
|
2015 + if (iv) { |
|
2016 + memcpy(ctx->iv, iv, 15 - cctx->L); |
|
2017 + cctx->iv_set = 1; |
|
2018 + } |
|
2019 + return 1; |
|
2020 +} |
|
2021 + |
|
2022 +#define aes_t4_ccm_cipher aes_ccm_cipher |
|
2023 +static int aes_t4_ccm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, |
|
2024 + const unsigned char *in, size_t len); |
|
2025 + |
|
2026 +#define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \ |
|
2027 +static const EVP_CIPHER aes_t4_##keylen##_##mode = { \ |
|
2028 + nid##_##keylen##_##nmode,blocksize,keylen/8,ivlen, \ |
|
2029 + flags|EVP_CIPH_##MODE##_MODE, \ |
|
2030 + aes_t4_init_key, \ |
|
2031 + aes_t4_##mode##_cipher, \ |
|
2032 + NULL, \ |
|
2033 + sizeof(EVP_AES_KEY), \ |
|
2034 + NULL,NULL,NULL,NULL }; \ |
|
2035 +static const EVP_CIPHER aes_##keylen##_##mode = { \ |
|
2036 + nid##_##keylen##_##nmode,blocksize, \ |
|
2037 + keylen/8,ivlen, \ |
|
2038 + flags|EVP_CIPH_##MODE##_MODE, \ |
|
2039 + aes_init_key, \ |
|
2040 + aes_##mode##_cipher, \ |
|
2041 + NULL, \ |
|
2042 + sizeof(EVP_AES_KEY), \ |
|
2043 + NULL,NULL,NULL,NULL }; \ |
|
2044 +const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \ |
|
2045 +{ return SPARC_AES_CAPABLE?&aes_t4_##keylen##_##mode:&aes_##keylen##_##mode; } |
|
2046 + |
|
2047 +#define BLOCK_CIPHER_custom(nid,keylen,blocksize,ivlen,mode,MODE,flags) \ |
|
2048 +static const EVP_CIPHER aes_t4_##keylen##_##mode = { \ |
|
2049 + nid##_##keylen##_##mode,blocksize, \ |
|
2050 + (EVP_CIPH_##MODE##_MODE==EVP_CIPH_XTS_MODE?2:1)*keylen/8, ivlen, \ |
|
2051 + flags|EVP_CIPH_##MODE##_MODE, \ |
|
2052 + aes_t4_##mode##_init_key, \ |
|
2053 + aes_t4_##mode##_cipher, \ |
|
2054 + aes_##mode##_cleanup, \ |
|
2055 + sizeof(EVP_AES_##MODE##_CTX), \ |
|
2056 + NULL,NULL,aes_##mode##_ctrl,NULL }; \ |
|
2057 +static const EVP_CIPHER aes_##keylen##_##mode = { \ |
|
2058 + nid##_##keylen##_##mode,blocksize, \ |
|
2059 + (EVP_CIPH_##MODE##_MODE==EVP_CIPH_XTS_MODE?2:1)*keylen/8, ivlen, \ |
|
2060 + flags|EVP_CIPH_##MODE##_MODE, \ |
|
2061 + aes_##mode##_init_key, \ |
|
2062 + aes_##mode##_cipher, \ |
|
2063 + aes_##mode##_cleanup, \ |
|
2064 + sizeof(EVP_AES_##MODE##_CTX), \ |
|
2065 + NULL,NULL,aes_##mode##_ctrl,NULL }; \ |
|
2066 +const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \ |
|
2067 +{ return SPARC_AES_CAPABLE?&aes_t4_##keylen##_##mode:&aes_##keylen##_##mode; } |
|
2068 + |
|
2069 # else |
|
2070 |
|
2071 # define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \ |
|
2072 @@ -480,7 +837,7 @@ |
|
2073 && !enc) |
|
2074 # ifdef BSAES_CAPABLE |
|
2075 if (BSAES_CAPABLE && mode == EVP_CIPH_CBC_MODE) { |
|
2076 - ret = AES_set_decrypt_key(key, ctx->key_len * 8, &dat->ks); |
|
2077 + ret = AES_set_decrypt_key(key, ctx->key_len * 8, &dat->ks.ks); |
|
2078 dat->block = (block128_f) AES_decrypt; |
|
2079 dat->stream.cbc = (cbc128_f) bsaes_cbc_encrypt; |
|
2080 } else |
|
2081 @@ -487,7 +844,7 @@ |
|
2082 # endif |
|
2083 # ifdef VPAES_CAPABLE |
|
2084 if (VPAES_CAPABLE) { |
|
2085 - ret = vpaes_set_decrypt_key(key, ctx->key_len * 8, &dat->ks); |
|
2086 + ret = vpaes_set_decrypt_key(key, ctx->key_len * 8, &dat->ks.ks); |
|
2087 dat->block = (block128_f) vpaes_decrypt; |
|
2088 dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ? |
|
2089 (cbc128_f) vpaes_cbc_encrypt : NULL; |
|
2090 @@ -494,7 +851,7 @@ |
|
2091 } else |
|
2092 # endif |
|
2093 { |
|
2094 - ret = AES_set_decrypt_key(key, ctx->key_len * 8, &dat->ks); |
|
2095 + ret = AES_set_decrypt_key(key, ctx->key_len * 8, &dat->ks.ks); |
|
2096 dat->block = (block128_f) AES_decrypt; |
|
2097 dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ? |
|
2098 (cbc128_f) AES_cbc_encrypt : NULL; |
|
2099 @@ -501,7 +858,7 @@ |
|
2100 } else |
|
2101 # ifdef BSAES_CAPABLE |
|
2102 if (BSAES_CAPABLE && mode == EVP_CIPH_CTR_MODE) { |
|
2103 - ret = AES_set_encrypt_key(key, ctx->key_len * 8, &dat->ks); |
|
2104 + ret = AES_set_encrypt_key(key, ctx->key_len * 8, &dat->ks.ks); |
|
2105 dat->block = (block128_f) AES_encrypt; |
|
2106 dat->stream.ctr = (ctr128_f) bsaes_ctr32_encrypt_blocks; |
|
2107 } else |
|
2108 @@ -508,7 +865,7 @@ |
|
2109 # endif |
|
2110 # ifdef VPAES_CAPABLE |
|
2111 if (VPAES_CAPABLE) { |
|
2112 - ret = vpaes_set_encrypt_key(key, ctx->key_len * 8, &dat->ks); |
|
2113 + ret = vpaes_set_encrypt_key(key, ctx->key_len * 8, &dat->ks.ks); |
|
2114 dat->block = (block128_f) vpaes_encrypt; |
|
2115 dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ? |
|
2116 (cbc128_f) vpaes_cbc_encrypt : NULL; |
|
2117 @@ -515,7 +872,7 @@ |
|
2118 } else |
|
2119 # endif |
|
2120 { |
|
2121 - ret = AES_set_encrypt_key(key, ctx->key_len * 8, &dat->ks); |
|
2122 + ret = AES_set_encrypt_key(key, ctx->key_len*8, &dat->ks.ks); |
|
2123 dat->block = (block128_f) AES_encrypt; |
|
2124 dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ? |
|
2125 (cbc128_f) AES_cbc_encrypt : NULL; |
|
2126 @@ -810,7 +1167,7 @@ |
|
2127 do { |
|
2128 # ifdef BSAES_CAPABLE |
|
2129 if (BSAES_CAPABLE) { |
|
2130 - AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks); |
|
2131 + AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks); |
|
2132 CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, |
|
2133 (block128_f) AES_encrypt); |
|
2134 gctx->ctr = (ctr128_f) bsaes_ctr32_encrypt_blocks; |
|
2135 @@ -819,7 +1176,7 @@ |
|
2136 # endif |
|
2137 # ifdef VPAES_CAPABLE |
|
2138 if (VPAES_CAPABLE) { |
|
2139 - vpaes_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks); |
|
2140 + vpaes_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks); |
|
2141 CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, |
|
2142 (block128_f) vpaes_encrypt); |
|
2143 gctx->ctr = NULL; |
|
2144 @@ -828,7 +1185,7 @@ |
|
2145 # endif |
|
2146 (void)0; /* terminate potentially open 'else' */ |
|
2147 |
|
2148 - AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks); |
|
2149 + AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks); |
|
2150 CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, |
|
2151 (block128_f) AES_encrypt); |
|
2152 # ifdef AES_CTR_ASM |
|
2153 @@ -1049,15 +1406,15 @@ |
|
2154 # ifdef VPAES_CAPABLE |
|
2155 if (VPAES_CAPABLE) { |
|
2156 if (enc) { |
|
2157 - vpaes_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1); |
|
2158 + vpaes_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); |
|
2159 xctx->xts.block1 = (block128_f) vpaes_encrypt; |
|
2160 } else { |
|
2161 - vpaes_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1); |
|
2162 + vpaes_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); |
|
2163 xctx->xts.block1 = (block128_f) vpaes_decrypt; |
|
2164 } |
|
2165 |
|
2166 vpaes_set_encrypt_key(key + ctx->key_len / 2, |
|
2167 - ctx->key_len * 4, &xctx->ks2); |
|
2168 + ctx->key_len * 4, &xctx->ks2.ks); |
|
2169 xctx->xts.block2 = (block128_f) vpaes_encrypt; |
|
2170 |
|
2171 xctx->xts.key1 = &xctx->ks1; |
|
2172 @@ -1067,15 +1424,15 @@ |
|
2173 (void)0; /* terminate potentially open 'else' */ |
|
2174 |
|
2175 if (enc) { |
|
2176 - AES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1); |
|
2177 + AES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); |
|
2178 xctx->xts.block1 = (block128_f) AES_encrypt; |
|
2179 } else { |
|
2180 - AES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1); |
|
2181 + AES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); |
|
2182 xctx->xts.block1 = (block128_f) AES_decrypt; |
|
2183 } |
|
2184 |
|
2185 AES_set_encrypt_key(key + ctx->key_len / 2, |
|
2186 - ctx->key_len * 4, &xctx->ks2); |
|
2187 + ctx->key_len * 4, &xctx->ks2.ks); |
|
2188 xctx->xts.block2 = (block128_f) AES_encrypt; |
|
2189 |
|
2190 xctx->xts.key1 = &xctx->ks1; |
|
2191 @@ -1196,7 +1553,7 @@ |
|
2192 do { |
|
2193 # ifdef VPAES_CAPABLE |
|
2194 if (VPAES_CAPABLE) { |
|
2195 - vpaes_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks); |
|
2196 + vpaes_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks); |
|
2197 CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, |
|
2198 &cctx->ks, (block128_f) vpaes_encrypt); |
|
2199 cctx->str = NULL; |
|
2200 @@ -1204,7 +1561,7 @@ |
|
2201 break; |
|
2202 } |
|
2203 # endif |
|
2204 - AES_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks); |
|
2205 + AES_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks); |
|
2206 CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, |
|
2207 &cctx->ks, (block128_f) AES_encrypt); |
|
2208 cctx->str = NULL; |
|
2209 @@ -1285,5 +1642,4 @@ |
|
2210 EVP_CIPH_FLAG_FIPS | CUSTOM_FLAGS) |
|
2211 BLOCK_CIPHER_custom(NID_aes, 256, 1, 12, ccm, CCM, |
|
2212 EVP_CIPH_FLAG_FIPS | CUSTOM_FLAGS) |
|
2213 -# endif |
|
2214 #endif |
|
2215 Index: openssl/crypto/evp/evp.h |
|
2216 =================================================================== |
|
2217 --- evp.h Mon Feb 11 07:26:04 2013 |
|
2218 +++ evp.h.new Thu May 2 14:31:55 2013 |
|
2219 @@ -1325,6 +1325,7 @@ |
|
2220 # define EVP_F_AESNI_INIT_KEY 165 |
|
2221 # define EVP_F_AESNI_XTS_CIPHER 176 |
|
2222 # define EVP_F_AES_INIT_KEY 133 |
|
2223 +# define EVP_F_AES_T4_INIT_KEY 178 |
|
2224 # define EVP_F_AES_XTS 172 |
|
2225 # define EVP_F_AES_XTS_CIPHER 175 |
|
2226 # define EVP_F_ALG_MODULE_INIT 177 |
|
2227 Index: openssl/crypto/evp/evp_err.c |
|
2228 =================================================================== |
|
2229 --- evp_err.c Mon Feb 11 07:26:04 2013 |
|
2230 +++ evp_err.c.new Thu May 2 14:33:24 2013 |
|
2231 @@ -73,6 +73,7 @@ |
|
2232 {ERR_FUNC(EVP_F_AESNI_INIT_KEY), "AESNI_INIT_KEY"}, |
|
2233 {ERR_FUNC(EVP_F_AESNI_XTS_CIPHER), "AESNI_XTS_CIPHER"}, |
|
2234 {ERR_FUNC(EVP_F_AES_INIT_KEY), "AES_INIT_KEY"}, |
|
2235 + {ERR_FUNC(EVP_F_AES_T4_INIT_KEY), "AES_T4_INIT_KEY"}, |
|
2236 {ERR_FUNC(EVP_F_AES_XTS), "AES_XTS"}, |
|
2237 {ERR_FUNC(EVP_F_AES_XTS_CIPHER), "AES_XTS_CIPHER"}, |
|
2238 {ERR_FUNC(EVP_F_ALG_MODULE_INIT), "ALG_MODULE_INIT"}, |
|