1 #!/usr/bin/env perl |
|
2 |
|
3 # ==================================================================== |
|
4 # Written by David S. Miller <[email protected]> and Andy Polyakov |
|
5 # <[email protected]>. The module is licensed under 2-clause BSD |
|
6 # license. March 2013. All rights reserved. |
|
7 # ==================================================================== |
|
8 |
|
9 ###################################################################### |
|
10 # DES for SPARC T4. |
|
11 # |
|
12 # As with other hardware-assisted ciphers CBC encrypt results [for |
|
13 # aligned data] are virtually identical to critical path lengths: |
|
14 # |
|
15 # DES Triple-DES |
|
16 # CBC encrypt 4.14/4.15(*) 11.7/11.7 |
|
17 # CBC decrypt 1.77/4.11(**) 6.42/7.47 |
|
18 # |
|
19 # (*) numbers after slash are for |
|
20 # misaligned data; |
|
21 # (**) this is result for largest |
|
22 # block size, unlike all other |
|
23 # cases smaller blocks results |
|
24 # are better[?]; |
|
25 |
|
26 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; |
|
27 push(@INC,"${dir}","${dir}../../perlasm"); |
|
28 require "sparcv9_modes.pl"; |
|
29 |
|
30 &asm_init(@ARGV); |
|
31 |
|
32 $code.=<<___ if ($::abibits==64); |
|
33 .register %g2,#scratch |
|
34 .register %g3,#scratch |
|
35 ___ |
|
36 |
|
37 $code.=<<___; |
|
38 .text |
|
39 ___ |
|
40 |
|
41 { my ($inp,$out)=("%o0","%o1"); |
|
42 |
|
43 $code.=<<___; |
|
44 .align 32 |
|
45 .globl des_t4_key_expand |
|
46 .type des_t4_key_expand,#function |
|
47 des_t4_key_expand: |
|
48 andcc $inp, 0x7, %g0 |
|
49 alignaddr $inp, %g0, $inp |
|
50 bz,pt %icc, 1f |
|
51 ldd [$inp + 0x00], %f0 |
|
52 ldd [$inp + 0x08], %f2 |
|
53 faligndata %f0, %f2, %f0 |
|
54 1: des_kexpand %f0, 0, %f0 |
|
55 des_kexpand %f0, 1, %f2 |
|
56 std %f0, [$out + 0x00] |
|
57 des_kexpand %f2, 3, %f6 |
|
58 std %f2, [$out + 0x08] |
|
59 des_kexpand %f2, 2, %f4 |
|
60 des_kexpand %f6, 3, %f10 |
|
61 std %f6, [$out + 0x18] |
|
62 des_kexpand %f6, 2, %f8 |
|
63 std %f4, [$out + 0x10] |
|
64 des_kexpand %f10, 3, %f14 |
|
65 std %f10, [$out + 0x28] |
|
66 des_kexpand %f10, 2, %f12 |
|
67 std %f8, [$out + 0x20] |
|
68 des_kexpand %f14, 1, %f16 |
|
69 std %f14, [$out + 0x38] |
|
70 des_kexpand %f16, 3, %f20 |
|
71 std %f12, [$out + 0x30] |
|
72 des_kexpand %f16, 2, %f18 |
|
73 std %f16, [$out + 0x40] |
|
74 des_kexpand %f20, 3, %f24 |
|
75 std %f20, [$out + 0x50] |
|
76 des_kexpand %f20, 2, %f22 |
|
77 std %f18, [$out + 0x48] |
|
78 des_kexpand %f24, 3, %f28 |
|
79 std %f24, [$out + 0x60] |
|
80 des_kexpand %f24, 2, %f26 |
|
81 std %f22, [$out + 0x58] |
|
82 des_kexpand %f28, 1, %f30 |
|
83 std %f28, [$out + 0x70] |
|
84 std %f26, [$out + 0x68] |
|
85 retl |
|
86 std %f30, [$out + 0x78] |
|
87 .size des_t4_key_expand,.-des_t4_key_expand |
|
88 ___ |
|
89 } |
|
90 { my ($inp,$out,$len,$key,$ivec) = map("%o$_",(0..4)); |
|
91 my ($ileft,$iright,$omask) = map("%g$_",(1..3)); |
|
92 |
|
93 $code.=<<___; |
|
94 .globl des_t4_cbc_encrypt |
|
95 .align 32 |
|
96 des_t4_cbc_encrypt: |
|
97 cmp $len, 0 |
|
98 be,pn $::size_t_cc, .Lcbc_abort |
|
99 nop |
|
100 ld [$ivec + 0], %f0 ! load ivec |
|
101 ld [$ivec + 4], %f1 |
|
102 |
|
103 and $inp, 7, $ileft |
|
104 andn $inp, 7, $inp |
|
105 sll $ileft, 3, $ileft |
|
106 mov 0xff, $omask |
|
107 prefetch [$inp], 20 |
|
108 prefetch [$inp + 63], 20 |
|
109 sub %g0, $ileft, $iright |
|
110 and $out, 7, %g4 |
|
111 alignaddrl $out, %g0, $out |
|
112 srl $omask, %g4, $omask |
|
113 srlx $len, 3, $len |
|
114 movrz %g4, 0, $omask |
|
115 prefetch [$out], 22 |
|
116 |
|
117 ldd [$key + 0x00], %f4 ! load key schedule |
|
118 ldd [$key + 0x08], %f6 |
|
119 ldd [$key + 0x10], %f8 |
|
120 ldd [$key + 0x18], %f10 |
|
121 ldd [$key + 0x20], %f12 |
|
122 ldd [$key + 0x28], %f14 |
|
123 ldd [$key + 0x30], %f16 |
|
124 ldd [$key + 0x38], %f18 |
|
125 ldd [$key + 0x40], %f20 |
|
126 ldd [$key + 0x48], %f22 |
|
127 ldd [$key + 0x50], %f24 |
|
128 ldd [$key + 0x58], %f26 |
|
129 ldd [$key + 0x60], %f28 |
|
130 ldd [$key + 0x68], %f30 |
|
131 ldd [$key + 0x70], %f32 |
|
132 ldd [$key + 0x78], %f34 |
|
133 |
|
134 .Ldes_cbc_enc_loop: |
|
135 ldx [$inp + 0], %g4 |
|
136 brz,pt $ileft, 4f |
|
137 nop |
|
138 |
|
139 ldx [$inp + 8], %g5 |
|
140 sllx %g4, $ileft, %g4 |
|
141 srlx %g5, $iright, %g5 |
|
142 or %g5, %g4, %g4 |
|
143 4: |
|
144 movxtod %g4, %f2 |
|
145 prefetch [$inp + 8+63], 20 |
|
146 add $inp, 8, $inp |
|
147 fxor %f2, %f0, %f0 ! ^= ivec |
|
148 prefetch [$out + 63], 22 |
|
149 |
|
150 des_ip %f0, %f0 |
|
151 des_round %f4, %f6, %f0, %f0 |
|
152 des_round %f8, %f10, %f0, %f0 |
|
153 des_round %f12, %f14, %f0, %f0 |
|
154 des_round %f16, %f18, %f0, %f0 |
|
155 des_round %f20, %f22, %f0, %f0 |
|
156 des_round %f24, %f26, %f0, %f0 |
|
157 des_round %f28, %f30, %f0, %f0 |
|
158 des_round %f32, %f34, %f0, %f0 |
|
159 des_iip %f0, %f0 |
|
160 |
|
161 brnz,pn $omask, 2f |
|
162 sub $len, 1, $len |
|
163 |
|
164 std %f0, [$out + 0] |
|
165 brnz,pt $len, .Ldes_cbc_enc_loop |
|
166 add $out, 8, $out |
|
167 |
|
168 st %f0, [$ivec + 0] ! write out ivec |
|
169 retl |
|
170 st %f1, [$ivec + 4] |
|
171 .Lcbc_abort: |
|
172 retl |
|
173 nop |
|
174 |
|
175 .align 16 |
|
176 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard |
|
177 ! and ~4x deterioration |
|
178 ! in inp==out case |
|
179 faligndata %f0, %f0, %f2 ! handle unaligned output |
|
180 |
|
181 stda %f2, [$out + $omask]0xc0 ! partial store |
|
182 add $out, 8, $out |
|
183 orn %g0, $omask, $omask |
|
184 stda %f2, [$out + $omask]0xc0 ! partial store |
|
185 |
|
186 brnz,pt $len, .Ldes_cbc_enc_loop+4 |
|
187 orn %g0, $omask, $omask |
|
188 |
|
189 st %f0, [$ivec + 0] ! write out ivec |
|
190 retl |
|
191 st %f1, [$ivec + 4] |
|
192 .type des_t4_cbc_encrypt,#function |
|
193 .size des_t4_cbc_encrypt,.-des_t4_cbc_encrypt |
|
194 |
|
195 .globl des_t4_cbc_decrypt |
|
196 .align 32 |
|
197 des_t4_cbc_decrypt: |
|
198 cmp $len, 0 |
|
199 be,pn $::size_t_cc, .Lcbc_abort |
|
200 nop |
|
201 ld [$ivec + 0], %f2 ! load ivec |
|
202 ld [$ivec + 4], %f3 |
|
203 |
|
204 and $inp, 7, $ileft |
|
205 andn $inp, 7, $inp |
|
206 sll $ileft, 3, $ileft |
|
207 mov 0xff, $omask |
|
208 prefetch [$inp], 20 |
|
209 prefetch [$inp + 63], 20 |
|
210 sub %g0, $ileft, $iright |
|
211 and $out, 7, %g4 |
|
212 alignaddrl $out, %g0, $out |
|
213 srl $omask, %g4, $omask |
|
214 srlx $len, 3, $len |
|
215 movrz %g4, 0, $omask |
|
216 prefetch [$out], 22 |
|
217 |
|
218 ldd [$key + 0x78], %f4 ! load key schedule |
|
219 ldd [$key + 0x70], %f6 |
|
220 ldd [$key + 0x68], %f8 |
|
221 ldd [$key + 0x60], %f10 |
|
222 ldd [$key + 0x58], %f12 |
|
223 ldd [$key + 0x50], %f14 |
|
224 ldd [$key + 0x48], %f16 |
|
225 ldd [$key + 0x40], %f18 |
|
226 ldd [$key + 0x38], %f20 |
|
227 ldd [$key + 0x30], %f22 |
|
228 ldd [$key + 0x28], %f24 |
|
229 ldd [$key + 0x20], %f26 |
|
230 ldd [$key + 0x18], %f28 |
|
231 ldd [$key + 0x10], %f30 |
|
232 ldd [$key + 0x08], %f32 |
|
233 ldd [$key + 0x00], %f34 |
|
234 |
|
235 .Ldes_cbc_dec_loop: |
|
236 ldx [$inp + 0], %g4 |
|
237 brz,pt $ileft, 4f |
|
238 nop |
|
239 |
|
240 ldx [$inp + 8], %g5 |
|
241 sllx %g4, $ileft, %g4 |
|
242 srlx %g5, $iright, %g5 |
|
243 or %g5, %g4, %g4 |
|
244 4: |
|
245 movxtod %g4, %f0 |
|
246 prefetch [$inp + 8+63], 20 |
|
247 add $inp, 8, $inp |
|
248 prefetch [$out + 63], 22 |
|
249 |
|
250 des_ip %f0, %f0 |
|
251 des_round %f4, %f6, %f0, %f0 |
|
252 des_round %f8, %f10, %f0, %f0 |
|
253 des_round %f12, %f14, %f0, %f0 |
|
254 des_round %f16, %f18, %f0, %f0 |
|
255 des_round %f20, %f22, %f0, %f0 |
|
256 des_round %f24, %f26, %f0, %f0 |
|
257 des_round %f28, %f30, %f0, %f0 |
|
258 des_round %f32, %f34, %f0, %f0 |
|
259 des_iip %f0, %f0 |
|
260 |
|
261 fxor %f2, %f0, %f0 ! ^= ivec |
|
262 movxtod %g4, %f2 |
|
263 |
|
264 brnz,pn $omask, 2f |
|
265 sub $len, 1, $len |
|
266 |
|
267 std %f0, [$out + 0] |
|
268 brnz,pt $len, .Ldes_cbc_dec_loop |
|
269 add $out, 8, $out |
|
270 |
|
271 st %f2, [$ivec + 0] ! write out ivec |
|
272 retl |
|
273 st %f3, [$ivec + 4] |
|
274 |
|
275 .align 16 |
|
276 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard |
|
277 ! and ~4x deterioration |
|
278 ! in inp==out case |
|
279 faligndata %f0, %f0, %f0 ! handle unaligned output |
|
280 |
|
281 stda %f0, [$out + $omask]0xc0 ! partial store |
|
282 add $out, 8, $out |
|
283 orn %g0, $omask, $omask |
|
284 stda %f0, [$out + $omask]0xc0 ! partial store |
|
285 |
|
286 brnz,pt $len, .Ldes_cbc_dec_loop+4 |
|
287 orn %g0, $omask, $omask |
|
288 |
|
289 st %f2, [$ivec + 0] ! write out ivec |
|
290 retl |
|
291 st %f3, [$ivec + 4] |
|
292 .type des_t4_cbc_decrypt,#function |
|
293 .size des_t4_cbc_decrypt,.-des_t4_cbc_decrypt |
|
294 ___ |
|
295 |
|
296 # One might wonder why does one have back-to-back des_iip/des_ip |
|
297 # pairs between EDE passes. Indeed, aren't they inverse of each other? |
|
298 # They almost are. Outcome of the pair is 32-bit words being swapped |
|
299 # in target register. Consider pair of des_iip/des_ip as a way to |
|
300 # perform the due swap, it's actually fastest way in this case. |
|
301 |
|
302 $code.=<<___; |
|
303 .globl des_t4_ede3_cbc_encrypt |
|
304 .align 32 |
|
305 des_t4_ede3_cbc_encrypt: |
|
306 cmp $len, 0 |
|
307 be,pn $::size_t_cc, .Lcbc_abort |
|
308 nop |
|
309 ld [$ivec + 0], %f0 ! load ivec |
|
310 ld [$ivec + 4], %f1 |
|
311 |
|
312 and $inp, 7, $ileft |
|
313 andn $inp, 7, $inp |
|
314 sll $ileft, 3, $ileft |
|
315 mov 0xff, $omask |
|
316 prefetch [$inp], 20 |
|
317 prefetch [$inp + 63], 20 |
|
318 sub %g0, $ileft, $iright |
|
319 and $out, 7, %g4 |
|
320 alignaddrl $out, %g0, $out |
|
321 srl $omask, %g4, $omask |
|
322 srlx $len, 3, $len |
|
323 movrz %g4, 0, $omask |
|
324 prefetch [$out], 22 |
|
325 |
|
326 ldd [$key + 0x00], %f4 ! load key schedule |
|
327 ldd [$key + 0x08], %f6 |
|
328 ldd [$key + 0x10], %f8 |
|
329 ldd [$key + 0x18], %f10 |
|
330 ldd [$key + 0x20], %f12 |
|
331 ldd [$key + 0x28], %f14 |
|
332 ldd [$key + 0x30], %f16 |
|
333 ldd [$key + 0x38], %f18 |
|
334 ldd [$key + 0x40], %f20 |
|
335 ldd [$key + 0x48], %f22 |
|
336 ldd [$key + 0x50], %f24 |
|
337 ldd [$key + 0x58], %f26 |
|
338 ldd [$key + 0x60], %f28 |
|
339 ldd [$key + 0x68], %f30 |
|
340 ldd [$key + 0x70], %f32 |
|
341 ldd [$key + 0x78], %f34 |
|
342 |
|
343 .Ldes_ede3_cbc_enc_loop: |
|
344 ldx [$inp + 0], %g4 |
|
345 brz,pt $ileft, 4f |
|
346 nop |
|
347 |
|
348 ldx [$inp + 8], %g5 |
|
349 sllx %g4, $ileft, %g4 |
|
350 srlx %g5, $iright, %g5 |
|
351 or %g5, %g4, %g4 |
|
352 4: |
|
353 movxtod %g4, %f2 |
|
354 prefetch [$inp + 8+63], 20 |
|
355 add $inp, 8, $inp |
|
356 fxor %f2, %f0, %f0 ! ^= ivec |
|
357 prefetch [$out + 63], 22 |
|
358 |
|
359 des_ip %f0, %f0 |
|
360 des_round %f4, %f6, %f0, %f0 |
|
361 des_round %f8, %f10, %f0, %f0 |
|
362 des_round %f12, %f14, %f0, %f0 |
|
363 des_round %f16, %f18, %f0, %f0 |
|
364 ldd [$key + 0x100-0x08], %f36 |
|
365 ldd [$key + 0x100-0x10], %f38 |
|
366 des_round %f20, %f22, %f0, %f0 |
|
367 ldd [$key + 0x100-0x18], %f40 |
|
368 ldd [$key + 0x100-0x20], %f42 |
|
369 des_round %f24, %f26, %f0, %f0 |
|
370 ldd [$key + 0x100-0x28], %f44 |
|
371 ldd [$key + 0x100-0x30], %f46 |
|
372 des_round %f28, %f30, %f0, %f0 |
|
373 ldd [$key + 0x100-0x38], %f48 |
|
374 ldd [$key + 0x100-0x40], %f50 |
|
375 des_round %f32, %f34, %f0, %f0 |
|
376 ldd [$key + 0x100-0x48], %f52 |
|
377 ldd [$key + 0x100-0x50], %f54 |
|
378 des_iip %f0, %f0 |
|
379 |
|
380 ldd [$key + 0x100-0x58], %f56 |
|
381 ldd [$key + 0x100-0x60], %f58 |
|
382 des_ip %f0, %f0 |
|
383 ldd [$key + 0x100-0x68], %f60 |
|
384 ldd [$key + 0x100-0x70], %f62 |
|
385 des_round %f36, %f38, %f0, %f0 |
|
386 ldd [$key + 0x100-0x78], %f36 |
|
387 ldd [$key + 0x100-0x80], %f38 |
|
388 des_round %f40, %f42, %f0, %f0 |
|
389 des_round %f44, %f46, %f0, %f0 |
|
390 des_round %f48, %f50, %f0, %f0 |
|
391 ldd [$key + 0x100+0x00], %f40 |
|
392 ldd [$key + 0x100+0x08], %f42 |
|
393 des_round %f52, %f54, %f0, %f0 |
|
394 ldd [$key + 0x100+0x10], %f44 |
|
395 ldd [$key + 0x100+0x18], %f46 |
|
396 des_round %f56, %f58, %f0, %f0 |
|
397 ldd [$key + 0x100+0x20], %f48 |
|
398 ldd [$key + 0x100+0x28], %f50 |
|
399 des_round %f60, %f62, %f0, %f0 |
|
400 ldd [$key + 0x100+0x30], %f52 |
|
401 ldd [$key + 0x100+0x38], %f54 |
|
402 des_round %f36, %f38, %f0, %f0 |
|
403 ldd [$key + 0x100+0x40], %f56 |
|
404 ldd [$key + 0x100+0x48], %f58 |
|
405 des_iip %f0, %f0 |
|
406 |
|
407 ldd [$key + 0x100+0x50], %f60 |
|
408 ldd [$key + 0x100+0x58], %f62 |
|
409 des_ip %f0, %f0 |
|
410 ldd [$key + 0x100+0x60], %f36 |
|
411 ldd [$key + 0x100+0x68], %f38 |
|
412 des_round %f40, %f42, %f0, %f0 |
|
413 ldd [$key + 0x100+0x70], %f40 |
|
414 ldd [$key + 0x100+0x78], %f42 |
|
415 des_round %f44, %f46, %f0, %f0 |
|
416 des_round %f48, %f50, %f0, %f0 |
|
417 des_round %f52, %f54, %f0, %f0 |
|
418 des_round %f56, %f58, %f0, %f0 |
|
419 des_round %f60, %f62, %f0, %f0 |
|
420 des_round %f36, %f38, %f0, %f0 |
|
421 des_round %f40, %f42, %f0, %f0 |
|
422 des_iip %f0, %f0 |
|
423 |
|
424 brnz,pn $omask, 2f |
|
425 sub $len, 1, $len |
|
426 |
|
427 std %f0, [$out + 0] |
|
428 brnz,pt $len, .Ldes_ede3_cbc_enc_loop |
|
429 add $out, 8, $out |
|
430 |
|
431 st %f0, [$ivec + 0] ! write out ivec |
|
432 retl |
|
433 st %f1, [$ivec + 4] |
|
434 |
|
435 .align 16 |
|
436 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard |
|
437 ! and ~2x deterioration |
|
438 ! in inp==out case |
|
439 faligndata %f0, %f0, %f2 ! handle unaligned output |
|
440 |
|
441 stda %f2, [$out + $omask]0xc0 ! partial store |
|
442 add $out, 8, $out |
|
443 orn %g0, $omask, $omask |
|
444 stda %f2, [$out + $omask]0xc0 ! partial store |
|
445 |
|
446 brnz,pt $len, .Ldes_ede3_cbc_enc_loop+4 |
|
447 orn %g0, $omask, $omask |
|
448 |
|
449 st %f0, [$ivec + 0] ! write out ivec |
|
450 retl |
|
451 st %f1, [$ivec + 4] |
|
452 .type des_t4_ede3_cbc_encrypt,#function |
|
453 .size des_t4_ede3_cbc_encrypt,.-des_t4_ede3_cbc_encrypt |
|
454 |
|
455 .globl des_t4_ede3_cbc_decrypt |
|
456 .align 32 |
|
457 des_t4_ede3_cbc_decrypt: |
|
458 cmp $len, 0 |
|
459 be,pn $::size_t_cc, .Lcbc_abort |
|
460 nop |
|
461 ld [$ivec + 0], %f2 ! load ivec |
|
462 ld [$ivec + 4], %f3 |
|
463 |
|
464 and $inp, 7, $ileft |
|
465 andn $inp, 7, $inp |
|
466 sll $ileft, 3, $ileft |
|
467 mov 0xff, $omask |
|
468 prefetch [$inp], 20 |
|
469 prefetch [$inp + 63], 20 |
|
470 sub %g0, $ileft, $iright |
|
471 and $out, 7, %g4 |
|
472 alignaddrl $out, %g0, $out |
|
473 srl $omask, %g4, $omask |
|
474 srlx $len, 3, $len |
|
475 movrz %g4, 0, $omask |
|
476 prefetch [$out], 22 |
|
477 |
|
478 ldd [$key + 0x100+0x78], %f4 ! load key schedule |
|
479 ldd [$key + 0x100+0x70], %f6 |
|
480 ldd [$key + 0x100+0x68], %f8 |
|
481 ldd [$key + 0x100+0x60], %f10 |
|
482 ldd [$key + 0x100+0x58], %f12 |
|
483 ldd [$key + 0x100+0x50], %f14 |
|
484 ldd [$key + 0x100+0x48], %f16 |
|
485 ldd [$key + 0x100+0x40], %f18 |
|
486 ldd [$key + 0x100+0x38], %f20 |
|
487 ldd [$key + 0x100+0x30], %f22 |
|
488 ldd [$key + 0x100+0x28], %f24 |
|
489 ldd [$key + 0x100+0x20], %f26 |
|
490 ldd [$key + 0x100+0x18], %f28 |
|
491 ldd [$key + 0x100+0x10], %f30 |
|
492 ldd [$key + 0x100+0x08], %f32 |
|
493 ldd [$key + 0x100+0x00], %f34 |
|
494 |
|
495 .Ldes_ede3_cbc_dec_loop: |
|
496 ldx [$inp + 0], %g4 |
|
497 brz,pt $ileft, 4f |
|
498 nop |
|
499 |
|
500 ldx [$inp + 8], %g5 |
|
501 sllx %g4, $ileft, %g4 |
|
502 srlx %g5, $iright, %g5 |
|
503 or %g5, %g4, %g4 |
|
504 4: |
|
505 movxtod %g4, %f0 |
|
506 prefetch [$inp + 8+63], 20 |
|
507 add $inp, 8, $inp |
|
508 prefetch [$out + 63], 22 |
|
509 |
|
510 des_ip %f0, %f0 |
|
511 des_round %f4, %f6, %f0, %f0 |
|
512 des_round %f8, %f10, %f0, %f0 |
|
513 des_round %f12, %f14, %f0, %f0 |
|
514 des_round %f16, %f18, %f0, %f0 |
|
515 ldd [$key + 0x80+0x00], %f36 |
|
516 ldd [$key + 0x80+0x08], %f38 |
|
517 des_round %f20, %f22, %f0, %f0 |
|
518 ldd [$key + 0x80+0x10], %f40 |
|
519 ldd [$key + 0x80+0x18], %f42 |
|
520 des_round %f24, %f26, %f0, %f0 |
|
521 ldd [$key + 0x80+0x20], %f44 |
|
522 ldd [$key + 0x80+0x28], %f46 |
|
523 des_round %f28, %f30, %f0, %f0 |
|
524 ldd [$key + 0x80+0x30], %f48 |
|
525 ldd [$key + 0x80+0x38], %f50 |
|
526 des_round %f32, %f34, %f0, %f0 |
|
527 ldd [$key + 0x80+0x40], %f52 |
|
528 ldd [$key + 0x80+0x48], %f54 |
|
529 des_iip %f0, %f0 |
|
530 |
|
531 ldd [$key + 0x80+0x50], %f56 |
|
532 ldd [$key + 0x80+0x58], %f58 |
|
533 des_ip %f0, %f0 |
|
534 ldd [$key + 0x80+0x60], %f60 |
|
535 ldd [$key + 0x80+0x68], %f62 |
|
536 des_round %f36, %f38, %f0, %f0 |
|
537 ldd [$key + 0x80+0x70], %f36 |
|
538 ldd [$key + 0x80+0x78], %f38 |
|
539 des_round %f40, %f42, %f0, %f0 |
|
540 des_round %f44, %f46, %f0, %f0 |
|
541 des_round %f48, %f50, %f0, %f0 |
|
542 ldd [$key + 0x80-0x08], %f40 |
|
543 ldd [$key + 0x80-0x10], %f42 |
|
544 des_round %f52, %f54, %f0, %f0 |
|
545 ldd [$key + 0x80-0x18], %f44 |
|
546 ldd [$key + 0x80-0x20], %f46 |
|
547 des_round %f56, %f58, %f0, %f0 |
|
548 ldd [$key + 0x80-0x28], %f48 |
|
549 ldd [$key + 0x80-0x30], %f50 |
|
550 des_round %f60, %f62, %f0, %f0 |
|
551 ldd [$key + 0x80-0x38], %f52 |
|
552 ldd [$key + 0x80-0x40], %f54 |
|
553 des_round %f36, %f38, %f0, %f0 |
|
554 ldd [$key + 0x80-0x48], %f56 |
|
555 ldd [$key + 0x80-0x50], %f58 |
|
556 des_iip %f0, %f0 |
|
557 |
|
558 ldd [$key + 0x80-0x58], %f60 |
|
559 ldd [$key + 0x80-0x60], %f62 |
|
560 des_ip %f0, %f0 |
|
561 ldd [$key + 0x80-0x68], %f36 |
|
562 ldd [$key + 0x80-0x70], %f38 |
|
563 des_round %f40, %f42, %f0, %f0 |
|
564 ldd [$key + 0x80-0x78], %f40 |
|
565 ldd [$key + 0x80-0x80], %f42 |
|
566 des_round %f44, %f46, %f0, %f0 |
|
567 des_round %f48, %f50, %f0, %f0 |
|
568 des_round %f52, %f54, %f0, %f0 |
|
569 des_round %f56, %f58, %f0, %f0 |
|
570 des_round %f60, %f62, %f0, %f0 |
|
571 des_round %f36, %f38, %f0, %f0 |
|
572 des_round %f40, %f42, %f0, %f0 |
|
573 des_iip %f0, %f0 |
|
574 |
|
575 fxor %f2, %f0, %f0 ! ^= ivec |
|
576 movxtod %g4, %f2 |
|
577 |
|
578 brnz,pn $omask, 2f |
|
579 sub $len, 1, $len |
|
580 |
|
581 std %f0, [$out + 0] |
|
582 brnz,pt $len, .Ldes_ede3_cbc_dec_loop |
|
583 add $out, 8, $out |
|
584 |
|
585 st %f2, [$ivec + 0] ! write out ivec |
|
586 retl |
|
587 st %f3, [$ivec + 4] |
|
588 |
|
589 .align 16 |
|
590 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard |
|
591 ! and ~3x deterioration |
|
592 ! in inp==out case |
|
593 faligndata %f0, %f0, %f0 ! handle unaligned output |
|
594 |
|
595 stda %f0, [$out + $omask]0xc0 ! partial store |
|
596 add $out, 8, $out |
|
597 orn %g0, $omask, $omask |
|
598 stda %f0, [$out + $omask]0xc0 ! partial store |
|
599 |
|
600 brnz,pt $len, .Ldes_ede3_cbc_dec_loop+4 |
|
601 orn %g0, $omask, $omask |
|
602 |
|
603 st %f2, [$ivec + 0] ! write out ivec |
|
604 retl |
|
605 st %f3, [$ivec + 4] |
|
606 .type des_t4_ede3_cbc_decrypt,#function |
|
607 .size des_t4_ede3_cbc_decrypt,.-des_t4_ede3_cbc_decrypt |
|
608 ___ |
|
609 } |
|
610 $code.=<<___; |
|
611 .asciz "DES for SPARC T4, David S. Miller, Andy Polyakov" |
|
612 .align 4 |
|
613 ___ |
|
614 |
|
615 &emit_assembler(); |
|
616 |
|
617 close STDOUT; |
|