|
1 #!/usr/bin/env perl |
|
2 |
|
3 # ==================================================================== |
|
4 # Written by David S. Miller <[email protected]> and Andy Polyakov |
|
5 # <[email protected]>. The module is licensed under 2-clause BSD |
|
6 # license. March 2013. All rights reserved. |
|
7 # ==================================================================== |
|
8 |
|
9 ###################################################################### |
|
10 # DES for SPARC T4. |
|
11 # |
|
12 # As with other hardware-assisted ciphers CBC encrypt results [for |
|
13 # aligned data] are virtually identical to critical path lengths: |
|
14 # |
|
15 # DES Triple-DES |
|
16 # CBC encrypt 4.14/4.15(*) 11.7/11.7 |
|
17 # CBC decrypt 1.77/4.11(**) 6.42/7.47 |
|
18 # |
|
19 # (*) numbers after slash are for |
|
20 # misaligned data; |
|
21 # (**) this is result for largest |
|
22 # block size, unlike all other |
|
23 # cases smaller blocks results |
|
24 # are better[?]; |
|
25 |
|
26 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; |
|
27 push(@INC,"${dir}","${dir}../../perlasm"); |
|
28 require "sparcv9_modes.pl"; |
|
29 |
|
30 &asm_init(@ARGV); |
|
31 |
|
32 $code.=<<___ if ($::abibits==64); |
|
33 .register %g2,#scratch |
|
34 .register %g3,#scratch |
|
35 ___ |
|
36 |
|
37 $code.=<<___; |
|
38 .text |
|
39 ___ |
|
40 |
|
41 { my ($inp,$out)=("%o0","%o1"); |
|
42 |
|
43 $code.=<<___; |
|
44 .align 32 |
|
45 .globl des_t4_key_expand |
|
46 .type des_t4_key_expand,#function |
|
47 des_t4_key_expand: |
|
48 andcc $inp, 0x7, %g0 |
|
49 alignaddr $inp, %g0, $inp |
|
50 bz,pt %icc, 1f |
|
51 ldd [$inp + 0x00], %f0 |
|
52 ldd [$inp + 0x08], %f2 |
|
53 faligndata %f0, %f2, %f0 |
|
54 1: des_kexpand %f0, 0, %f0 |
|
55 des_kexpand %f0, 1, %f2 |
|
56 std %f0, [$out + 0x00] |
|
57 des_kexpand %f2, 3, %f6 |
|
58 std %f2, [$out + 0x08] |
|
59 des_kexpand %f2, 2, %f4 |
|
60 des_kexpand %f6, 3, %f10 |
|
61 std %f6, [$out + 0x18] |
|
62 des_kexpand %f6, 2, %f8 |
|
63 std %f4, [$out + 0x10] |
|
64 des_kexpand %f10, 3, %f14 |
|
65 std %f10, [$out + 0x28] |
|
66 des_kexpand %f10, 2, %f12 |
|
67 std %f8, [$out + 0x20] |
|
68 des_kexpand %f14, 1, %f16 |
|
69 std %f14, [$out + 0x38] |
|
70 des_kexpand %f16, 3, %f20 |
|
71 std %f12, [$out + 0x30] |
|
72 des_kexpand %f16, 2, %f18 |
|
73 std %f16, [$out + 0x40] |
|
74 des_kexpand %f20, 3, %f24 |
|
75 std %f20, [$out + 0x50] |
|
76 des_kexpand %f20, 2, %f22 |
|
77 std %f18, [$out + 0x48] |
|
78 des_kexpand %f24, 3, %f28 |
|
79 std %f24, [$out + 0x60] |
|
80 des_kexpand %f24, 2, %f26 |
|
81 std %f22, [$out + 0x58] |
|
82 des_kexpand %f28, 1, %f30 |
|
83 std %f28, [$out + 0x70] |
|
84 std %f26, [$out + 0x68] |
|
85 retl |
|
86 std %f30, [$out + 0x78] |
|
87 .size des_t4_key_expand,.-des_t4_key_expand |
|
88 ___ |
|
89 } |
|
90 { my ($inp,$out,$len,$key,$ivec) = map("%o$_",(0..4)); |
|
91 my ($ileft,$iright,$omask) = map("%g$_",(1..3)); |
|
92 |
|
93 $code.=<<___; |
|
94 .globl des_t4_cbc_encrypt |
|
95 .align 32 |
|
96 des_t4_cbc_encrypt: |
|
97 ld [$ivec + 0], %f0 ! load ivec |
|
98 ld [$ivec + 4], %f1 |
|
99 |
|
100 and $inp, 7, $ileft |
|
101 andn $inp, 7, $inp |
|
102 sll $ileft, 3, $ileft |
|
103 mov 0xff, $omask |
|
104 prefetch [$inp], 20 |
|
105 prefetch [$inp + 63], 20 |
|
106 sub %g0, $ileft, $iright |
|
107 and $out, 7, %g4 |
|
108 alignaddrl $out, %g0, $out |
|
109 srl $omask, %g4, $omask |
|
110 srlx $len, 3, $len |
|
111 movrz %g4, 0, $omask |
|
112 prefetch [$out], 22 |
|
113 |
|
114 ldd [$key + 0x00], %f4 ! load key schedule |
|
115 ldd [$key + 0x08], %f6 |
|
116 ldd [$key + 0x10], %f8 |
|
117 ldd [$key + 0x18], %f10 |
|
118 ldd [$key + 0x20], %f12 |
|
119 ldd [$key + 0x28], %f14 |
|
120 ldd [$key + 0x30], %f16 |
|
121 ldd [$key + 0x38], %f18 |
|
122 ldd [$key + 0x40], %f20 |
|
123 ldd [$key + 0x48], %f22 |
|
124 ldd [$key + 0x50], %f24 |
|
125 ldd [$key + 0x58], %f26 |
|
126 ldd [$key + 0x60], %f28 |
|
127 ldd [$key + 0x68], %f30 |
|
128 ldd [$key + 0x70], %f32 |
|
129 ldd [$key + 0x78], %f34 |
|
130 |
|
131 .Ldes_cbc_enc_loop: |
|
132 ldx [$inp + 0], %g4 |
|
133 brz,pt $ileft, 4f |
|
134 nop |
|
135 |
|
136 ldx [$inp + 8], %g5 |
|
137 sllx %g4, $ileft, %g4 |
|
138 srlx %g5, $iright, %g5 |
|
139 or %g5, %g4, %g4 |
|
140 4: |
|
141 movxtod %g4, %f2 |
|
142 prefetch [$inp + 8+63], 20 |
|
143 add $inp, 8, $inp |
|
144 fxor %f2, %f0, %f0 ! ^= ivec |
|
145 prefetch [$out + 63], 22 |
|
146 |
|
147 des_ip %f0, %f0 |
|
148 des_round %f4, %f6, %f0, %f0 |
|
149 des_round %f8, %f10, %f0, %f0 |
|
150 des_round %f12, %f14, %f0, %f0 |
|
151 des_round %f16, %f18, %f0, %f0 |
|
152 des_round %f20, %f22, %f0, %f0 |
|
153 des_round %f24, %f26, %f0, %f0 |
|
154 des_round %f28, %f30, %f0, %f0 |
|
155 des_round %f32, %f34, %f0, %f0 |
|
156 des_iip %f0, %f0 |
|
157 |
|
158 brnz,pn $omask, 2f |
|
159 sub $len, 1, $len |
|
160 |
|
161 std %f0, [$out + 0] |
|
162 brnz,pt $len, .Ldes_cbc_enc_loop |
|
163 add $out, 8, $out |
|
164 |
|
165 st %f0, [$ivec + 0] ! write out ivec |
|
166 retl |
|
167 st %f1, [$ivec + 4] |
|
168 |
|
169 .align 16 |
|
170 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard |
|
171 ! and ~4x deterioration |
|
172 ! in inp==out case |
|
173 faligndata %f0, %f0, %f2 ! handle unaligned output |
|
174 |
|
175 stda %f2, [$out + $omask]0xc0 ! partial store |
|
176 add $out, 8, $out |
|
177 orn %g0, $omask, $omask |
|
178 stda %f2, [$out + $omask]0xc0 ! partial store |
|
179 |
|
180 brnz,pt $len, .Ldes_cbc_enc_loop+4 |
|
181 orn %g0, $omask, $omask |
|
182 |
|
183 st %f0, [$ivec + 0] ! write out ivec |
|
184 retl |
|
185 st %f1, [$ivec + 4] |
|
186 .type des_t4_cbc_encrypt,#function |
|
187 .size des_t4_cbc_encrypt,.-des_t4_cbc_encrypt |
|
188 |
|
189 .globl des_t4_cbc_decrypt |
|
190 .align 32 |
|
191 des_t4_cbc_decrypt: |
|
192 ld [$ivec + 0], %f2 ! load ivec |
|
193 ld [$ivec + 4], %f3 |
|
194 |
|
195 and $inp, 7, $ileft |
|
196 andn $inp, 7, $inp |
|
197 sll $ileft, 3, $ileft |
|
198 mov 0xff, $omask |
|
199 prefetch [$inp], 20 |
|
200 prefetch [$inp + 63], 20 |
|
201 sub %g0, $ileft, $iright |
|
202 and $out, 7, %g4 |
|
203 alignaddrl $out, %g0, $out |
|
204 srl $omask, %g4, $omask |
|
205 srlx $len, 3, $len |
|
206 movrz %g4, 0, $omask |
|
207 prefetch [$out], 22 |
|
208 |
|
209 ldd [$key + 0x78], %f4 ! load key schedule |
|
210 ldd [$key + 0x70], %f6 |
|
211 ldd [$key + 0x68], %f8 |
|
212 ldd [$key + 0x60], %f10 |
|
213 ldd [$key + 0x58], %f12 |
|
214 ldd [$key + 0x50], %f14 |
|
215 ldd [$key + 0x48], %f16 |
|
216 ldd [$key + 0x40], %f18 |
|
217 ldd [$key + 0x38], %f20 |
|
218 ldd [$key + 0x30], %f22 |
|
219 ldd [$key + 0x28], %f24 |
|
220 ldd [$key + 0x20], %f26 |
|
221 ldd [$key + 0x18], %f28 |
|
222 ldd [$key + 0x10], %f30 |
|
223 ldd [$key + 0x08], %f32 |
|
224 ldd [$key + 0x00], %f34 |
|
225 |
|
226 .Ldes_cbc_dec_loop: |
|
227 ldx [$inp + 0], %g4 |
|
228 brz,pt $ileft, 4f |
|
229 nop |
|
230 |
|
231 ldx [$inp + 8], %g5 |
|
232 sllx %g4, $ileft, %g4 |
|
233 srlx %g5, $iright, %g5 |
|
234 or %g5, %g4, %g4 |
|
235 4: |
|
236 movxtod %g4, %f0 |
|
237 prefetch [$inp + 8+63], 20 |
|
238 add $inp, 8, $inp |
|
239 prefetch [$out + 63], 22 |
|
240 |
|
241 des_ip %f0, %f0 |
|
242 des_round %f4, %f6, %f0, %f0 |
|
243 des_round %f8, %f10, %f0, %f0 |
|
244 des_round %f12, %f14, %f0, %f0 |
|
245 des_round %f16, %f18, %f0, %f0 |
|
246 des_round %f20, %f22, %f0, %f0 |
|
247 des_round %f24, %f26, %f0, %f0 |
|
248 des_round %f28, %f30, %f0, %f0 |
|
249 des_round %f32, %f34, %f0, %f0 |
|
250 des_iip %f0, %f0 |
|
251 |
|
252 fxor %f2, %f0, %f0 ! ^= ivec |
|
253 movxtod %g4, %f2 |
|
254 |
|
255 brnz,pn $omask, 2f |
|
256 sub $len, 1, $len |
|
257 |
|
258 std %f0, [$out + 0] |
|
259 brnz,pt $len, .Ldes_cbc_dec_loop |
|
260 add $out, 8, $out |
|
261 |
|
262 st %f2, [$ivec + 0] ! write out ivec |
|
263 retl |
|
264 st %f3, [$ivec + 4] |
|
265 |
|
266 .align 16 |
|
267 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard |
|
268 ! and ~4x deterioration |
|
269 ! in inp==out case |
|
270 faligndata %f0, %f0, %f0 ! handle unaligned output |
|
271 |
|
272 stda %f0, [$out + $omask]0xc0 ! partial store |
|
273 add $out, 8, $out |
|
274 orn %g0, $omask, $omask |
|
275 stda %f0, [$out + $omask]0xc0 ! partial store |
|
276 |
|
277 brnz,pt $len, .Ldes_cbc_dec_loop+4 |
|
278 orn %g0, $omask, $omask |
|
279 |
|
280 st %f2, [$ivec + 0] ! write out ivec |
|
281 retl |
|
282 st %f3, [$ivec + 4] |
|
283 .type des_t4_cbc_decrypt,#function |
|
284 .size des_t4_cbc_decrypt,.-des_t4_cbc_decrypt |
|
285 ___ |
|
286 |
|
287 # One might wonder why does one have back-to-back des_iip/des_ip |
|
288 # pairs between EDE passes. Indeed, aren't they inverse of each other? |
|
289 # They almost are. Outcome of the pair is 32-bit words being swapped |
|
290 # in target register. Consider pair of des_iip/des_ip as a way to |
|
291 # perform the due swap, it's actually fastest way in this case. |
|
292 |
|
293 $code.=<<___; |
|
294 .globl des_t4_ede3_cbc_encrypt |
|
295 .align 32 |
|
296 des_t4_ede3_cbc_encrypt: |
|
297 ld [$ivec + 0], %f0 ! load ivec |
|
298 ld [$ivec + 4], %f1 |
|
299 |
|
300 and $inp, 7, $ileft |
|
301 andn $inp, 7, $inp |
|
302 sll $ileft, 3, $ileft |
|
303 mov 0xff, $omask |
|
304 prefetch [$inp], 20 |
|
305 prefetch [$inp + 63], 20 |
|
306 sub %g0, $ileft, $iright |
|
307 and $out, 7, %g4 |
|
308 alignaddrl $out, %g0, $out |
|
309 srl $omask, %g4, $omask |
|
310 srlx $len, 3, $len |
|
311 movrz %g4, 0, $omask |
|
312 prefetch [$out], 22 |
|
313 |
|
314 ldd [$key + 0x00], %f4 ! load key schedule |
|
315 ldd [$key + 0x08], %f6 |
|
316 ldd [$key + 0x10], %f8 |
|
317 ldd [$key + 0x18], %f10 |
|
318 ldd [$key + 0x20], %f12 |
|
319 ldd [$key + 0x28], %f14 |
|
320 ldd [$key + 0x30], %f16 |
|
321 ldd [$key + 0x38], %f18 |
|
322 ldd [$key + 0x40], %f20 |
|
323 ldd [$key + 0x48], %f22 |
|
324 ldd [$key + 0x50], %f24 |
|
325 ldd [$key + 0x58], %f26 |
|
326 ldd [$key + 0x60], %f28 |
|
327 ldd [$key + 0x68], %f30 |
|
328 ldd [$key + 0x70], %f32 |
|
329 ldd [$key + 0x78], %f34 |
|
330 |
|
331 .Ldes_ede3_cbc_enc_loop: |
|
332 ldx [$inp + 0], %g4 |
|
333 brz,pt $ileft, 4f |
|
334 nop |
|
335 |
|
336 ldx [$inp + 8], %g5 |
|
337 sllx %g4, $ileft, %g4 |
|
338 srlx %g5, $iright, %g5 |
|
339 or %g5, %g4, %g4 |
|
340 4: |
|
341 movxtod %g4, %f2 |
|
342 prefetch [$inp + 8+63], 20 |
|
343 add $inp, 8, $inp |
|
344 fxor %f2, %f0, %f0 ! ^= ivec |
|
345 prefetch [$out + 63], 22 |
|
346 |
|
347 des_ip %f0, %f0 |
|
348 des_round %f4, %f6, %f0, %f0 |
|
349 des_round %f8, %f10, %f0, %f0 |
|
350 des_round %f12, %f14, %f0, %f0 |
|
351 des_round %f16, %f18, %f0, %f0 |
|
352 ldd [$key + 0x100-0x08], %f36 |
|
353 ldd [$key + 0x100-0x10], %f38 |
|
354 des_round %f20, %f22, %f0, %f0 |
|
355 ldd [$key + 0x100-0x18], %f40 |
|
356 ldd [$key + 0x100-0x20], %f42 |
|
357 des_round %f24, %f26, %f0, %f0 |
|
358 ldd [$key + 0x100-0x28], %f44 |
|
359 ldd [$key + 0x100-0x30], %f46 |
|
360 des_round %f28, %f30, %f0, %f0 |
|
361 ldd [$key + 0x100-0x38], %f48 |
|
362 ldd [$key + 0x100-0x40], %f50 |
|
363 des_round %f32, %f34, %f0, %f0 |
|
364 ldd [$key + 0x100-0x48], %f52 |
|
365 ldd [$key + 0x100-0x50], %f54 |
|
366 des_iip %f0, %f0 |
|
367 |
|
368 ldd [$key + 0x100-0x58], %f56 |
|
369 ldd [$key + 0x100-0x60], %f58 |
|
370 des_ip %f0, %f0 |
|
371 ldd [$key + 0x100-0x68], %f60 |
|
372 ldd [$key + 0x100-0x70], %f62 |
|
373 des_round %f36, %f38, %f0, %f0 |
|
374 ldd [$key + 0x100-0x78], %f36 |
|
375 ldd [$key + 0x100-0x80], %f38 |
|
376 des_round %f40, %f42, %f0, %f0 |
|
377 des_round %f44, %f46, %f0, %f0 |
|
378 des_round %f48, %f50, %f0, %f0 |
|
379 ldd [$key + 0x100+0x00], %f40 |
|
380 ldd [$key + 0x100+0x08], %f42 |
|
381 des_round %f52, %f54, %f0, %f0 |
|
382 ldd [$key + 0x100+0x10], %f44 |
|
383 ldd [$key + 0x100+0x18], %f46 |
|
384 des_round %f56, %f58, %f0, %f0 |
|
385 ldd [$key + 0x100+0x20], %f48 |
|
386 ldd [$key + 0x100+0x28], %f50 |
|
387 des_round %f60, %f62, %f0, %f0 |
|
388 ldd [$key + 0x100+0x30], %f52 |
|
389 ldd [$key + 0x100+0x38], %f54 |
|
390 des_round %f36, %f38, %f0, %f0 |
|
391 ldd [$key + 0x100+0x40], %f56 |
|
392 ldd [$key + 0x100+0x48], %f58 |
|
393 des_iip %f0, %f0 |
|
394 |
|
395 ldd [$key + 0x100+0x50], %f60 |
|
396 ldd [$key + 0x100+0x58], %f62 |
|
397 des_ip %f0, %f0 |
|
398 ldd [$key + 0x100+0x60], %f36 |
|
399 ldd [$key + 0x100+0x68], %f38 |
|
400 des_round %f40, %f42, %f0, %f0 |
|
401 ldd [$key + 0x100+0x70], %f40 |
|
402 ldd [$key + 0x100+0x78], %f42 |
|
403 des_round %f44, %f46, %f0, %f0 |
|
404 des_round %f48, %f50, %f0, %f0 |
|
405 des_round %f52, %f54, %f0, %f0 |
|
406 des_round %f56, %f58, %f0, %f0 |
|
407 des_round %f60, %f62, %f0, %f0 |
|
408 des_round %f36, %f38, %f0, %f0 |
|
409 des_round %f40, %f42, %f0, %f0 |
|
410 des_iip %f0, %f0 |
|
411 |
|
412 brnz,pn $omask, 2f |
|
413 sub $len, 1, $len |
|
414 |
|
415 std %f0, [$out + 0] |
|
416 brnz,pt $len, .Ldes_ede3_cbc_enc_loop |
|
417 add $out, 8, $out |
|
418 |
|
419 st %f0, [$ivec + 0] ! write out ivec |
|
420 retl |
|
421 st %f1, [$ivec + 4] |
|
422 |
|
423 .align 16 |
|
424 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard |
|
425 ! and ~2x deterioration |
|
426 ! in inp==out case |
|
427 faligndata %f0, %f0, %f2 ! handle unaligned output |
|
428 |
|
429 stda %f2, [$out + $omask]0xc0 ! partial store |
|
430 add $out, 8, $out |
|
431 orn %g0, $omask, $omask |
|
432 stda %f2, [$out + $omask]0xc0 ! partial store |
|
433 |
|
434 brnz,pt $len, .Ldes_ede3_cbc_enc_loop+4 |
|
435 orn %g0, $omask, $omask |
|
436 |
|
437 st %f0, [$ivec + 0] ! write out ivec |
|
438 retl |
|
439 st %f1, [$ivec + 4] |
|
440 .type des_t4_ede3_cbc_encrypt,#function |
|
441 .size des_t4_ede3_cbc_encrypt,.-des_t4_ede3_cbc_encrypt |
|
442 |
|
443 .globl des_t4_ede3_cbc_decrypt |
|
444 .align 32 |
|
445 des_t4_ede3_cbc_decrypt: |
|
446 ld [$ivec + 0], %f2 ! load ivec |
|
447 ld [$ivec + 4], %f3 |
|
448 |
|
449 and $inp, 7, $ileft |
|
450 andn $inp, 7, $inp |
|
451 sll $ileft, 3, $ileft |
|
452 mov 0xff, $omask |
|
453 prefetch [$inp], 20 |
|
454 prefetch [$inp + 63], 20 |
|
455 sub %g0, $ileft, $iright |
|
456 and $out, 7, %g4 |
|
457 alignaddrl $out, %g0, $out |
|
458 srl $omask, %g4, $omask |
|
459 srlx $len, 3, $len |
|
460 movrz %g4, 0, $omask |
|
461 prefetch [$out], 22 |
|
462 |
|
463 ldd [$key + 0x100+0x78], %f4 ! load key schedule |
|
464 ldd [$key + 0x100+0x70], %f6 |
|
465 ldd [$key + 0x100+0x68], %f8 |
|
466 ldd [$key + 0x100+0x60], %f10 |
|
467 ldd [$key + 0x100+0x58], %f12 |
|
468 ldd [$key + 0x100+0x50], %f14 |
|
469 ldd [$key + 0x100+0x48], %f16 |
|
470 ldd [$key + 0x100+0x40], %f18 |
|
471 ldd [$key + 0x100+0x38], %f20 |
|
472 ldd [$key + 0x100+0x30], %f22 |
|
473 ldd [$key + 0x100+0x28], %f24 |
|
474 ldd [$key + 0x100+0x20], %f26 |
|
475 ldd [$key + 0x100+0x18], %f28 |
|
476 ldd [$key + 0x100+0x10], %f30 |
|
477 ldd [$key + 0x100+0x08], %f32 |
|
478 ldd [$key + 0x100+0x00], %f34 |
|
479 |
|
480 .Ldes_ede3_cbc_dec_loop: |
|
481 ldx [$inp + 0], %g4 |
|
482 brz,pt $ileft, 4f |
|
483 nop |
|
484 |
|
485 ldx [$inp + 8], %g5 |
|
486 sllx %g4, $ileft, %g4 |
|
487 srlx %g5, $iright, %g5 |
|
488 or %g5, %g4, %g4 |
|
489 4: |
|
490 movxtod %g4, %f0 |
|
491 prefetch [$inp + 8+63], 20 |
|
492 add $inp, 8, $inp |
|
493 prefetch [$out + 63], 22 |
|
494 |
|
495 des_ip %f0, %f0 |
|
496 des_round %f4, %f6, %f0, %f0 |
|
497 des_round %f8, %f10, %f0, %f0 |
|
498 des_round %f12, %f14, %f0, %f0 |
|
499 des_round %f16, %f18, %f0, %f0 |
|
500 ldd [$key + 0x80+0x00], %f36 |
|
501 ldd [$key + 0x80+0x08], %f38 |
|
502 des_round %f20, %f22, %f0, %f0 |
|
503 ldd [$key + 0x80+0x10], %f40 |
|
504 ldd [$key + 0x80+0x18], %f42 |
|
505 des_round %f24, %f26, %f0, %f0 |
|
506 ldd [$key + 0x80+0x20], %f44 |
|
507 ldd [$key + 0x80+0x28], %f46 |
|
508 des_round %f28, %f30, %f0, %f0 |
|
509 ldd [$key + 0x80+0x30], %f48 |
|
510 ldd [$key + 0x80+0x38], %f50 |
|
511 des_round %f32, %f34, %f0, %f0 |
|
512 ldd [$key + 0x80+0x40], %f52 |
|
513 ldd [$key + 0x80+0x48], %f54 |
|
514 des_iip %f0, %f0 |
|
515 |
|
516 ldd [$key + 0x80+0x50], %f56 |
|
517 ldd [$key + 0x80+0x58], %f58 |
|
518 des_ip %f0, %f0 |
|
519 ldd [$key + 0x80+0x60], %f60 |
|
520 ldd [$key + 0x80+0x68], %f62 |
|
521 des_round %f36, %f38, %f0, %f0 |
|
522 ldd [$key + 0x80+0x70], %f36 |
|
523 ldd [$key + 0x80+0x78], %f38 |
|
524 des_round %f40, %f42, %f0, %f0 |
|
525 des_round %f44, %f46, %f0, %f0 |
|
526 des_round %f48, %f50, %f0, %f0 |
|
527 ldd [$key + 0x80-0x08], %f40 |
|
528 ldd [$key + 0x80-0x10], %f42 |
|
529 des_round %f52, %f54, %f0, %f0 |
|
530 ldd [$key + 0x80-0x18], %f44 |
|
531 ldd [$key + 0x80-0x20], %f46 |
|
532 des_round %f56, %f58, %f0, %f0 |
|
533 ldd [$key + 0x80-0x28], %f48 |
|
534 ldd [$key + 0x80-0x30], %f50 |
|
535 des_round %f60, %f62, %f0, %f0 |
|
536 ldd [$key + 0x80-0x38], %f52 |
|
537 ldd [$key + 0x80-0x40], %f54 |
|
538 des_round %f36, %f38, %f0, %f0 |
|
539 ldd [$key + 0x80-0x48], %f56 |
|
540 ldd [$key + 0x80-0x50], %f58 |
|
541 des_iip %f0, %f0 |
|
542 |
|
543 ldd [$key + 0x80-0x58], %f60 |
|
544 ldd [$key + 0x80-0x60], %f62 |
|
545 des_ip %f0, %f0 |
|
546 ldd [$key + 0x80-0x68], %f36 |
|
547 ldd [$key + 0x80-0x70], %f38 |
|
548 des_round %f40, %f42, %f0, %f0 |
|
549 ldd [$key + 0x80-0x78], %f40 |
|
550 ldd [$key + 0x80-0x80], %f42 |
|
551 des_round %f44, %f46, %f0, %f0 |
|
552 des_round %f48, %f50, %f0, %f0 |
|
553 des_round %f52, %f54, %f0, %f0 |
|
554 des_round %f56, %f58, %f0, %f0 |
|
555 des_round %f60, %f62, %f0, %f0 |
|
556 des_round %f36, %f38, %f0, %f0 |
|
557 des_round %f40, %f42, %f0, %f0 |
|
558 des_iip %f0, %f0 |
|
559 |
|
560 fxor %f2, %f0, %f0 ! ^= ivec |
|
561 movxtod %g4, %f2 |
|
562 |
|
563 brnz,pn $omask, 2f |
|
564 sub $len, 1, $len |
|
565 |
|
566 std %f0, [$out + 0] |
|
567 brnz,pt $len, .Ldes_ede3_cbc_dec_loop |
|
568 add $out, 8, $out |
|
569 |
|
570 st %f2, [$ivec + 0] ! write out ivec |
|
571 retl |
|
572 st %f3, [$ivec + 4] |
|
573 |
|
574 .align 16 |
|
575 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard |
|
576 ! and ~3x deterioration |
|
577 ! in inp==out case |
|
578 faligndata %f0, %f0, %f0 ! handle unaligned output |
|
579 |
|
580 stda %f0, [$out + $omask]0xc0 ! partial store |
|
581 add $out, 8, $out |
|
582 orn %g0, $omask, $omask |
|
583 stda %f0, [$out + $omask]0xc0 ! partial store |
|
584 |
|
585 brnz,pt $len, .Ldes_ede3_cbc_dec_loop+4 |
|
586 orn %g0, $omask, $omask |
|
587 |
|
588 st %f2, [$ivec + 0] ! write out ivec |
|
589 retl |
|
590 st %f3, [$ivec + 4] |
|
591 .type des_t4_ede3_cbc_decrypt,#function |
|
592 .size des_t4_ede3_cbc_decrypt,.-des_t4_ede3_cbc_decrypt |
|
593 ___ |
|
594 } |
|
595 $code.=<<___; |
|
596 .asciz "DES for SPARC T4, David S. Miller, Andy Polyakov" |
|
597 .align 4 |
|
598 ___ |
|
599 |
|
600 &emit_assembler(); |
|
601 |
|
602 close STDOUT; |