1 #!/usr/bin/env perl |
|
2 |
|
3 # Specific modes implementations for SPARC Architecture 2011. There |
|
4 # is T4 dependency though, an ASI value that is not specified in the |
|
5 # Architecture Manual. But as SPARC universe is rather monocultural, |
|
6 # we imply that processor capable of executing crypto instructions |
|
7 # can handle the ASI in question as well. This means that we ought to |
|
8 # keep eyes open when new processors emerge... |
|
9 # |
|
10 # As for above mentioned ASI. It's so called "block initializing |
|
11 # store" which cancels "read" in "read-update-write" on cache lines. |
|
12 # This is "cooperative" optimization, as it reduces overall pressure |
|
13 # on memory interface. Benefits can't be observed/quantified with |
|
14 # usual benchmarks, on the contrary you can notice that single-thread |
|
15 # performance for parallelizable modes is ~1.5% worse for largest |
|
16 # block sizes [though few percent better for not so long ones]. All |
|
17 # this based on suggestions from David Miller. |
|
18 |
|
19 sub asm_init { # to be called with @ARGV as argument |
|
20 for (@_) { $::abibits=64 if (/\-m64/ || /\-xarch\=v9/); } |
|
21 if ($::abibits==64) { $::bias=2047; $::frame=192; $::size_t_cc="%xcc"; } |
|
22 else { $::bias=0; $::frame=112; $::size_t_cc="%icc"; } |
|
23 } |
|
24 |
|
25 # unified interface |
|
26 my ($inp,$out,$len,$key,$ivec)=map("%i$_",(0..5)); |
|
27 # local variables |
|
28 my ($ileft,$iright,$ooff,$omask,$ivoff,$blk_init)=map("%l$_",(0..7)); |
|
29 |
|
30 sub alg_cbc_encrypt_implement { |
|
31 my ($alg,$bits) = @_; |
|
32 |
|
33 $::code.=<<___; |
|
34 .globl ${alg}${bits}_t4_cbc_encrypt |
|
35 .align 32 |
|
36 ${alg}${bits}_t4_cbc_encrypt: |
|
37 save %sp, -$::frame, %sp |
|
38 sub $inp, $out, $blk_init ! $inp!=$out |
|
39 ___ |
|
40 $::code.=<<___ if (!$::evp); |
|
41 andcc $ivec, 7, $ivoff |
|
42 alignaddr $ivec, %g0, $ivec |
|
43 |
|
44 ldd [$ivec + 0], %f0 ! load ivec |
|
45 bz,pt %icc, 1f |
|
46 ldd [$ivec + 8], %f2 |
|
47 ldd [$ivec + 16], %f4 |
|
48 faligndata %f0, %f2, %f0 |
|
49 faligndata %f2, %f4, %f2 |
|
50 1: |
|
51 ___ |
|
52 $::code.=<<___ if ($::evp); |
|
53 ld [$ivec + 0], %f0 |
|
54 ld [$ivec + 4], %f1 |
|
55 ld [$ivec + 8], %f2 |
|
56 ld [$ivec + 12], %f3 |
|
57 ___ |
|
58 $::code.=<<___; |
|
59 prefetch [$inp], 20 |
|
60 prefetch [$inp + 63], 20 |
|
61 call _${alg}${bits}_load_enckey |
|
62 and $inp, 7, $ileft |
|
63 andn $inp, 7, $inp |
|
64 sll $ileft, 3, $ileft |
|
65 mov 64, $iright |
|
66 mov 0xff, $omask |
|
67 sub $iright, $ileft, $iright |
|
68 and $out, 7, $ooff |
|
69 cmp $len, 127 |
|
70 movrnz $ooff, 0, $blk_init ! if ( $out&7 || |
|
71 movleu $::size_t_cc, 0, $blk_init ! $len<128 || |
|
72 brnz,pn $blk_init, .L${bits}cbc_enc_blk ! $inp==$out) |
|
73 srl $omask, $ooff, $omask |
|
74 |
|
75 alignaddrl $out, %g0, $out |
|
76 srlx $len, 4, $len |
|
77 prefetch [$out], 22 |
|
78 |
|
79 .L${bits}_cbc_enc_loop: |
|
80 ldx [$inp + 0], %o0 |
|
81 brz,pt $ileft, 4f |
|
82 ldx [$inp + 8], %o1 |
|
83 |
|
84 ldx [$inp + 16], %o2 |
|
85 sllx %o0, $ileft, %o0 |
|
86 srlx %o1, $iright, %g1 |
|
87 sllx %o1, $ileft, %o1 |
|
88 or %g1, %o0, %o0 |
|
89 srlx %o2, $iright, %o2 |
|
90 or %o2, %o1, %o1 |
|
91 4: |
|
92 xor %g4, %o0, %o0 ! ^= rk[0] |
|
93 xor %g5, %o1, %o1 |
|
94 movxtod %o0, %f12 |
|
95 movxtod %o1, %f14 |
|
96 |
|
97 fxor %f12, %f0, %f0 ! ^= ivec |
|
98 fxor %f14, %f2, %f2 |
|
99 prefetch [$out + 63], 22 |
|
100 prefetch [$inp + 16+63], 20 |
|
101 call _${alg}${bits}_encrypt_1x |
|
102 add $inp, 16, $inp |
|
103 |
|
104 brnz,pn $ooff, 2f |
|
105 sub $len, 1, $len |
|
106 |
|
107 std %f0, [$out + 0] |
|
108 std %f2, [$out + 8] |
|
109 brnz,pt $len, .L${bits}_cbc_enc_loop |
|
110 add $out, 16, $out |
|
111 ___ |
|
112 $::code.=<<___ if ($::evp); |
|
113 st %f0, [$ivec + 0] |
|
114 st %f1, [$ivec + 4] |
|
115 st %f2, [$ivec + 8] |
|
116 st %f3, [$ivec + 12] |
|
117 ___ |
|
118 $::code.=<<___ if (!$::evp); |
|
119 brnz,pn $ivoff, 3f |
|
120 nop |
|
121 |
|
122 std %f0, [$ivec + 0] ! write out ivec |
|
123 std %f2, [$ivec + 8] |
|
124 ___ |
|
125 $::code.=<<___; |
|
126 ret |
|
127 restore |
|
128 |
|
129 .align 16 |
|
130 2: ldxa [$inp]0x82, %o0 ! avoid read-after-write hazard |
|
131 ! and ~3x deterioration |
|
132 ! in inp==out case |
|
133 faligndata %f0, %f0, %f4 ! handle unaligned output |
|
134 faligndata %f0, %f2, %f6 |
|
135 faligndata %f2, %f2, %f8 |
|
136 |
|
137 stda %f4, [$out + $omask]0xc0 ! partial store |
|
138 std %f6, [$out + 8] |
|
139 add $out, 16, $out |
|
140 orn %g0, $omask, $omask |
|
141 stda %f8, [$out + $omask]0xc0 ! partial store |
|
142 |
|
143 brnz,pt $len, .L${bits}_cbc_enc_loop+4 |
|
144 orn %g0, $omask, $omask |
|
145 ___ |
|
146 $::code.=<<___ if ($::evp); |
|
147 st %f0, [$ivec + 0] |
|
148 st %f1, [$ivec + 4] |
|
149 st %f2, [$ivec + 8] |
|
150 st %f3, [$ivec + 12] |
|
151 ___ |
|
152 $::code.=<<___ if (!$::evp); |
|
153 brnz,pn $ivoff, 3f |
|
154 nop |
|
155 |
|
156 std %f0, [$ivec + 0] ! write out ivec |
|
157 std %f2, [$ivec + 8] |
|
158 ret |
|
159 restore |
|
160 |
|
161 .align 16 |
|
162 3: alignaddrl $ivec, $ivoff, %g0 ! handle unaligned ivec |
|
163 mov 0xff, $omask |
|
164 srl $omask, $ivoff, $omask |
|
165 faligndata %f0, %f0, %f4 |
|
166 faligndata %f0, %f2, %f6 |
|
167 faligndata %f2, %f2, %f8 |
|
168 stda %f4, [$ivec + $omask]0xc0 |
|
169 std %f6, [$ivec + 8] |
|
170 add $ivec, 16, $ivec |
|
171 orn %g0, $omask, $omask |
|
172 stda %f8, [$ivec + $omask]0xc0 |
|
173 ___ |
|
174 $::code.=<<___; |
|
175 ret |
|
176 restore |
|
177 |
|
178 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
|
179 .align 32 |
|
180 .L${bits}cbc_enc_blk: |
|
181 add $out, $len, $blk_init |
|
182 and $blk_init, 63, $blk_init ! tail |
|
183 sub $len, $blk_init, $len |
|
184 add $blk_init, 15, $blk_init ! round up to 16n |
|
185 srlx $len, 4, $len |
|
186 srl $blk_init, 4, $blk_init |
|
187 |
|
188 .L${bits}_cbc_enc_blk_loop: |
|
189 ldx [$inp + 0], %o0 |
|
190 brz,pt $ileft, 5f |
|
191 ldx [$inp + 8], %o1 |
|
192 |
|
193 ldx [$inp + 16], %o2 |
|
194 sllx %o0, $ileft, %o0 |
|
195 srlx %o1, $iright, %g1 |
|
196 sllx %o1, $ileft, %o1 |
|
197 or %g1, %o0, %o0 |
|
198 srlx %o2, $iright, %o2 |
|
199 or %o2, %o1, %o1 |
|
200 5: |
|
201 xor %g4, %o0, %o0 ! ^= rk[0] |
|
202 xor %g5, %o1, %o1 |
|
203 movxtod %o0, %f12 |
|
204 movxtod %o1, %f14 |
|
205 |
|
206 fxor %f12, %f0, %f0 ! ^= ivec |
|
207 fxor %f14, %f2, %f2 |
|
208 prefetch [$inp + 16+63], 20 |
|
209 call _${alg}${bits}_encrypt_1x |
|
210 add $inp, 16, $inp |
|
211 sub $len, 1, $len |
|
212 |
|
213 stda %f0, [$out]0xe2 ! ASI_BLK_INIT, T4-specific |
|
214 add $out, 8, $out |
|
215 stda %f2, [$out]0xe2 ! ASI_BLK_INIT, T4-specific |
|
216 brnz,pt $len, .L${bits}_cbc_enc_blk_loop |
|
217 add $out, 8, $out |
|
218 |
|
219 membar #StoreLoad|#StoreStore |
|
220 brnz,pt $blk_init, .L${bits}_cbc_enc_loop |
|
221 mov $blk_init, $len |
|
222 ___ |
|
223 $::code.=<<___ if ($::evp); |
|
224 st %f0, [$ivec + 0] |
|
225 st %f1, [$ivec + 4] |
|
226 st %f2, [$ivec + 8] |
|
227 st %f3, [$ivec + 12] |
|
228 ___ |
|
229 $::code.=<<___ if (!$::evp); |
|
230 brnz,pn $ivoff, 3b |
|
231 nop |
|
232 |
|
233 std %f0, [$ivec + 0] ! write out ivec |
|
234 std %f2, [$ivec + 8] |
|
235 ___ |
|
236 $::code.=<<___; |
|
237 ret |
|
238 restore |
|
239 .type ${alg}${bits}_t4_cbc_encrypt,#function |
|
240 .size ${alg}${bits}_t4_cbc_encrypt,.-${alg}${bits}_t4_cbc_encrypt |
|
241 ___ |
|
242 } |
|
243 |
|
244 sub alg_cbc_decrypt_implement { |
|
245 my ($alg,$bits) = @_; |
|
246 |
|
247 $::code.=<<___; |
|
248 .globl ${alg}${bits}_t4_cbc_decrypt |
|
249 .align 32 |
|
250 ${alg}${bits}_t4_cbc_decrypt: |
|
251 save %sp, -$::frame, %sp |
|
252 sub $inp, $out, $blk_init ! $inp!=$out |
|
253 ___ |
|
254 $::code.=<<___ if (!$::evp); |
|
255 andcc $ivec, 7, $ivoff |
|
256 alignaddr $ivec, %g0, $ivec |
|
257 |
|
258 ldd [$ivec + 0], %f12 ! load ivec |
|
259 bz,pt %icc, 1f |
|
260 ldd [$ivec + 8], %f14 |
|
261 ldd [$ivec + 16], %f0 |
|
262 faligndata %f12, %f14, %f12 |
|
263 faligndata %f14, %f0, %f14 |
|
264 1: |
|
265 ___ |
|
266 $::code.=<<___ if ($::evp); |
|
267 ld [$ivec + 0], %f12 ! load ivec |
|
268 ld [$ivec + 4], %f13 |
|
269 ld [$ivec + 8], %f14 |
|
270 ld [$ivec + 12], %f15 |
|
271 ___ |
|
272 $::code.=<<___; |
|
273 prefetch [$inp], 20 |
|
274 prefetch [$inp + 63], 20 |
|
275 call _${alg}${bits}_load_deckey |
|
276 and $inp, 7, $ileft |
|
277 andn $inp, 7, $inp |
|
278 sll $ileft, 3, $ileft |
|
279 mov 64, $iright |
|
280 mov 0xff, $omask |
|
281 sub $iright, $ileft, $iright |
|
282 and $out, 7, $ooff |
|
283 cmp $len, 255 |
|
284 movrnz $ooff, 0, $blk_init ! if ( $out&7 || |
|
285 movleu $::size_t_cc, 0, $blk_init ! $len<256 || |
|
286 brnz,pn $blk_init, .L${bits}cbc_dec_blk ! $inp==$out) |
|
287 srl $omask, $ooff, $omask |
|
288 |
|
289 andcc $len, 16, %g0 ! is number of blocks even? |
|
290 srlx $len, 4, $len |
|
291 alignaddrl $out, %g0, $out |
|
292 bz %icc, .L${bits}_cbc_dec_loop2x |
|
293 prefetch [$out], 22 |
|
294 .L${bits}_cbc_dec_loop: |
|
295 ldx [$inp + 0], %o0 |
|
296 brz,pt $ileft, 4f |
|
297 ldx [$inp + 8], %o1 |
|
298 |
|
299 ldx [$inp + 16], %o2 |
|
300 sllx %o0, $ileft, %o0 |
|
301 srlx %o1, $iright, %g1 |
|
302 sllx %o1, $ileft, %o1 |
|
303 or %g1, %o0, %o0 |
|
304 srlx %o2, $iright, %o2 |
|
305 or %o2, %o1, %o1 |
|
306 4: |
|
307 xor %g4, %o0, %o2 ! ^= rk[0] |
|
308 xor %g5, %o1, %o3 |
|
309 movxtod %o2, %f0 |
|
310 movxtod %o3, %f2 |
|
311 |
|
312 prefetch [$out + 63], 22 |
|
313 prefetch [$inp + 16+63], 20 |
|
314 call _${alg}${bits}_decrypt_1x |
|
315 add $inp, 16, $inp |
|
316 |
|
317 fxor %f12, %f0, %f0 ! ^= ivec |
|
318 fxor %f14, %f2, %f2 |
|
319 movxtod %o0, %f12 |
|
320 movxtod %o1, %f14 |
|
321 |
|
322 brnz,pn $ooff, 2f |
|
323 sub $len, 1, $len |
|
324 |
|
325 std %f0, [$out + 0] |
|
326 std %f2, [$out + 8] |
|
327 brnz,pt $len, .L${bits}_cbc_dec_loop2x |
|
328 add $out, 16, $out |
|
329 ___ |
|
330 $::code.=<<___ if ($::evp); |
|
331 st %f12, [$ivec + 0] |
|
332 st %f13, [$ivec + 4] |
|
333 st %f14, [$ivec + 8] |
|
334 st %f15, [$ivec + 12] |
|
335 ___ |
|
336 $::code.=<<___ if (!$::evp); |
|
337 brnz,pn $ivoff, .L${bits}_cbc_dec_unaligned_ivec |
|
338 nop |
|
339 |
|
340 std %f12, [$ivec + 0] ! write out ivec |
|
341 std %f14, [$ivec + 8] |
|
342 ___ |
|
343 $::code.=<<___; |
|
344 ret |
|
345 restore |
|
346 |
|
347 .align 16 |
|
348 2: ldxa [$inp]0x82, %o0 ! avoid read-after-write hazard |
|
349 ! and ~3x deterioration |
|
350 ! in inp==out case |
|
351 faligndata %f0, %f0, %f4 ! handle unaligned output |
|
352 faligndata %f0, %f2, %f6 |
|
353 faligndata %f2, %f2, %f8 |
|
354 |
|
355 stda %f4, [$out + $omask]0xc0 ! partial store |
|
356 std %f6, [$out + 8] |
|
357 add $out, 16, $out |
|
358 orn %g0, $omask, $omask |
|
359 stda %f8, [$out + $omask]0xc0 ! partial store |
|
360 |
|
361 brnz,pt $len, .L${bits}_cbc_dec_loop2x+4 |
|
362 orn %g0, $omask, $omask |
|
363 ___ |
|
364 $::code.=<<___ if ($::evp); |
|
365 st %f12, [$ivec + 0] |
|
366 st %f13, [$ivec + 4] |
|
367 st %f14, [$ivec + 8] |
|
368 st %f15, [$ivec + 12] |
|
369 ___ |
|
370 $::code.=<<___ if (!$::evp); |
|
371 brnz,pn $ivoff, .L${bits}_cbc_dec_unaligned_ivec |
|
372 nop |
|
373 |
|
374 std %f12, [$ivec + 0] ! write out ivec |
|
375 std %f14, [$ivec + 8] |
|
376 ___ |
|
377 $::code.=<<___; |
|
378 ret |
|
379 restore |
|
380 |
|
381 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
|
382 .align 32 |
|
383 .L${bits}_cbc_dec_loop2x: |
|
384 ldx [$inp + 0], %o0 |
|
385 ldx [$inp + 8], %o1 |
|
386 ldx [$inp + 16], %o2 |
|
387 brz,pt $ileft, 4f |
|
388 ldx [$inp + 24], %o3 |
|
389 |
|
390 ldx [$inp + 32], %o4 |
|
391 sllx %o0, $ileft, %o0 |
|
392 srlx %o1, $iright, %g1 |
|
393 or %g1, %o0, %o0 |
|
394 sllx %o1, $ileft, %o1 |
|
395 srlx %o2, $iright, %g1 |
|
396 or %g1, %o1, %o1 |
|
397 sllx %o2, $ileft, %o2 |
|
398 srlx %o3, $iright, %g1 |
|
399 or %g1, %o2, %o2 |
|
400 sllx %o3, $ileft, %o3 |
|
401 srlx %o4, $iright, %o4 |
|
402 or %o4, %o3, %o3 |
|
403 4: |
|
404 xor %g4, %o0, %o4 ! ^= rk[0] |
|
405 xor %g5, %o1, %o5 |
|
406 movxtod %o4, %f0 |
|
407 movxtod %o5, %f2 |
|
408 xor %g4, %o2, %o4 |
|
409 xor %g5, %o3, %o5 |
|
410 movxtod %o4, %f4 |
|
411 movxtod %o5, %f6 |
|
412 |
|
413 prefetch [$out + 63], 22 |
|
414 prefetch [$inp + 32+63], 20 |
|
415 call _${alg}${bits}_decrypt_2x |
|
416 add $inp, 32, $inp |
|
417 |
|
418 movxtod %o0, %f8 |
|
419 movxtod %o1, %f10 |
|
420 fxor %f12, %f0, %f0 ! ^= ivec |
|
421 fxor %f14, %f2, %f2 |
|
422 movxtod %o2, %f12 |
|
423 movxtod %o3, %f14 |
|
424 fxor %f8, %f4, %f4 |
|
425 fxor %f10, %f6, %f6 |
|
426 |
|
427 brnz,pn $ooff, 2f |
|
428 sub $len, 2, $len |
|
429 |
|
430 std %f0, [$out + 0] |
|
431 std %f2, [$out + 8] |
|
432 std %f4, [$out + 16] |
|
433 std %f6, [$out + 24] |
|
434 brnz,pt $len, .L${bits}_cbc_dec_loop2x |
|
435 add $out, 32, $out |
|
436 ___ |
|
437 $::code.=<<___ if ($::evp); |
|
438 st %f12, [$ivec + 0] |
|
439 st %f13, [$ivec + 4] |
|
440 st %f14, [$ivec + 8] |
|
441 st %f15, [$ivec + 12] |
|
442 ___ |
|
443 $::code.=<<___ if (!$::evp); |
|
444 brnz,pn $ivoff, .L${bits}_cbc_dec_unaligned_ivec |
|
445 nop |
|
446 |
|
447 std %f12, [$ivec + 0] ! write out ivec |
|
448 std %f14, [$ivec + 8] |
|
449 ___ |
|
450 $::code.=<<___; |
|
451 ret |
|
452 restore |
|
453 |
|
454 .align 16 |
|
455 2: ldxa [$inp]0x82, %o0 ! avoid read-after-write hazard |
|
456 ! and ~3x deterioration |
|
457 ! in inp==out case |
|
458 faligndata %f0, %f0, %f8 ! handle unaligned output |
|
459 faligndata %f0, %f2, %f0 |
|
460 faligndata %f2, %f4, %f2 |
|
461 faligndata %f4, %f6, %f4 |
|
462 faligndata %f6, %f6, %f6 |
|
463 stda %f8, [$out + $omask]0xc0 ! partial store |
|
464 std %f0, [$out + 8] |
|
465 std %f2, [$out + 16] |
|
466 std %f4, [$out + 24] |
|
467 add $out, 32, $out |
|
468 orn %g0, $omask, $omask |
|
469 stda %f6, [$out + $omask]0xc0 ! partial store |
|
470 |
|
471 brnz,pt $len, .L${bits}_cbc_dec_loop2x+4 |
|
472 orn %g0, $omask, $omask |
|
473 ___ |
|
474 $::code.=<<___ if ($::evp); |
|
475 st %f12, [$ivec + 0] |
|
476 st %f13, [$ivec + 4] |
|
477 st %f14, [$ivec + 8] |
|
478 st %f15, [$ivec + 12] |
|
479 ___ |
|
480 $::code.=<<___ if (!$::evp); |
|
481 brnz,pn $ivoff, .L${bits}_cbc_dec_unaligned_ivec |
|
482 nop |
|
483 |
|
484 std %f12, [$ivec + 0] ! write out ivec |
|
485 std %f14, [$ivec + 8] |
|
486 ret |
|
487 restore |
|
488 |
|
489 .align 16 |
|
490 .L${bits}_cbc_dec_unaligned_ivec: |
|
491 alignaddrl $ivec, $ivoff, %g0 ! handle unaligned ivec |
|
492 mov 0xff, $omask |
|
493 srl $omask, $ivoff, $omask |
|
494 faligndata %f12, %f12, %f0 |
|
495 faligndata %f12, %f14, %f2 |
|
496 faligndata %f14, %f14, %f4 |
|
497 stda %f0, [$ivec + $omask]0xc0 |
|
498 std %f2, [$ivec + 8] |
|
499 add $ivec, 16, $ivec |
|
500 orn %g0, $omask, $omask |
|
501 stda %f4, [$ivec + $omask]0xc0 |
|
502 ___ |
|
503 $::code.=<<___; |
|
504 ret |
|
505 restore |
|
506 |
|
507 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
|
508 .align 32 |
|
509 .L${bits}cbc_dec_blk: |
|
510 add $out, $len, $blk_init |
|
511 and $blk_init, 63, $blk_init ! tail |
|
512 sub $len, $blk_init, $len |
|
513 add $blk_init, 15, $blk_init ! round up to 16n |
|
514 srlx $len, 4, $len |
|
515 srl $blk_init, 4, $blk_init |
|
516 sub $len, 1, $len |
|
517 add $blk_init, 1, $blk_init |
|
518 |
|
519 .L${bits}_cbc_dec_blk_loop2x: |
|
520 ldx [$inp + 0], %o0 |
|
521 ldx [$inp + 8], %o1 |
|
522 ldx [$inp + 16], %o2 |
|
523 brz,pt $ileft, 5f |
|
524 ldx [$inp + 24], %o3 |
|
525 |
|
526 ldx [$inp + 32], %o4 |
|
527 sllx %o0, $ileft, %o0 |
|
528 srlx %o1, $iright, %g1 |
|
529 or %g1, %o0, %o0 |
|
530 sllx %o1, $ileft, %o1 |
|
531 srlx %o2, $iright, %g1 |
|
532 or %g1, %o1, %o1 |
|
533 sllx %o2, $ileft, %o2 |
|
534 srlx %o3, $iright, %g1 |
|
535 or %g1, %o2, %o2 |
|
536 sllx %o3, $ileft, %o3 |
|
537 srlx %o4, $iright, %o4 |
|
538 or %o4, %o3, %o3 |
|
539 5: |
|
540 xor %g4, %o0, %o4 ! ^= rk[0] |
|
541 xor %g5, %o1, %o5 |
|
542 movxtod %o4, %f0 |
|
543 movxtod %o5, %f2 |
|
544 xor %g4, %o2, %o4 |
|
545 xor %g5, %o3, %o5 |
|
546 movxtod %o4, %f4 |
|
547 movxtod %o5, %f6 |
|
548 |
|
549 prefetch [$inp + 32+63], 20 |
|
550 call _${alg}${bits}_decrypt_2x |
|
551 add $inp, 32, $inp |
|
552 subcc $len, 2, $len |
|
553 |
|
554 movxtod %o0, %f8 |
|
555 movxtod %o1, %f10 |
|
556 fxor %f12, %f0, %f0 ! ^= ivec |
|
557 fxor %f14, %f2, %f2 |
|
558 movxtod %o2, %f12 |
|
559 movxtod %o3, %f14 |
|
560 fxor %f8, %f4, %f4 |
|
561 fxor %f10, %f6, %f6 |
|
562 |
|
563 stda %f0, [$out]0xe2 ! ASI_BLK_INIT, T4-specific |
|
564 add $out, 8, $out |
|
565 stda %f2, [$out]0xe2 ! ASI_BLK_INIT, T4-specific |
|
566 add $out, 8, $out |
|
567 stda %f4, [$out]0xe2 ! ASI_BLK_INIT, T4-specific |
|
568 add $out, 8, $out |
|
569 stda %f6, [$out]0xe2 ! ASI_BLK_INIT, T4-specific |
|
570 bgu,pt $::size_t_cc, .L${bits}_cbc_dec_blk_loop2x |
|
571 add $out, 8, $out |
|
572 |
|
573 add $blk_init, $len, $len |
|
574 andcc $len, 1, %g0 ! is number of blocks even? |
|
575 membar #StoreLoad|#StoreStore |
|
576 bnz,pt %icc, .L${bits}_cbc_dec_loop |
|
577 srl $len, 0, $len |
|
578 brnz,pn $len, .L${bits}_cbc_dec_loop2x |
|
579 nop |
|
580 ___ |
|
581 $::code.=<<___ if ($::evp); |
|
582 st %f12, [$ivec + 0] ! write out ivec |
|
583 st %f13, [$ivec + 4] |
|
584 st %f14, [$ivec + 8] |
|
585 st %f15, [$ivec + 12] |
|
586 ___ |
|
587 $::code.=<<___ if (!$::evp); |
|
588 brnz,pn $ivoff, 3b |
|
589 nop |
|
590 |
|
591 std %f12, [$ivec + 0] ! write out ivec |
|
592 std %f14, [$ivec + 8] |
|
593 ___ |
|
594 $::code.=<<___; |
|
595 ret |
|
596 restore |
|
597 .type ${alg}${bits}_t4_cbc_decrypt,#function |
|
598 .size ${alg}${bits}_t4_cbc_decrypt,.-${alg}${bits}_t4_cbc_decrypt |
|
599 ___ |
|
600 } |
|
601 |
|
602 sub alg_ctr32_implement { |
|
603 my ($alg,$bits) = @_; |
|
604 |
|
605 $::code.=<<___; |
|
606 .globl ${alg}${bits}_t4_ctr32_encrypt |
|
607 .align 32 |
|
608 ${alg}${bits}_t4_ctr32_encrypt: |
|
609 save %sp, -$::frame, %sp |
|
610 |
|
611 prefetch [$inp], 20 |
|
612 prefetch [$inp + 63], 20 |
|
613 call _${alg}${bits}_load_enckey |
|
614 sllx $len, 4, $len |
|
615 |
|
616 ld [$ivec + 0], %l4 ! counter |
|
617 ld [$ivec + 4], %l5 |
|
618 ld [$ivec + 8], %l6 |
|
619 ld [$ivec + 12], %l7 |
|
620 |
|
621 sllx %l4, 32, %o5 |
|
622 or %l5, %o5, %o5 |
|
623 sllx %l6, 32, %g1 |
|
624 xor %o5, %g4, %g4 ! ^= rk[0] |
|
625 xor %g1, %g5, %g5 |
|
626 movxtod %g4, %f14 ! most significant 64 bits |
|
627 |
|
628 sub $inp, $out, $blk_init ! $inp!=$out |
|
629 and $inp, 7, $ileft |
|
630 andn $inp, 7, $inp |
|
631 sll $ileft, 3, $ileft |
|
632 mov 64, $iright |
|
633 mov 0xff, $omask |
|
634 sub $iright, $ileft, $iright |
|
635 and $out, 7, $ooff |
|
636 cmp $len, 255 |
|
637 movrnz $ooff, 0, $blk_init ! if ( $out&7 || |
|
638 movleu $::size_t_cc, 0, $blk_init ! $len<256 || |
|
639 brnz,pn $blk_init, .L${bits}_ctr32_blk ! $inp==$out) |
|
640 srl $omask, $ooff, $omask |
|
641 |
|
642 andcc $len, 16, %g0 ! is number of blocks even? |
|
643 alignaddrl $out, %g0, $out |
|
644 bz %icc, .L${bits}_ctr32_loop2x |
|
645 srlx $len, 4, $len |
|
646 .L${bits}_ctr32_loop: |
|
647 ldx [$inp + 0], %o0 |
|
648 brz,pt $ileft, 4f |
|
649 ldx [$inp + 8], %o1 |
|
650 |
|
651 ldx [$inp + 16], %o2 |
|
652 sllx %o0, $ileft, %o0 |
|
653 srlx %o1, $iright, %g1 |
|
654 sllx %o1, $ileft, %o1 |
|
655 or %g1, %o0, %o0 |
|
656 srlx %o2, $iright, %o2 |
|
657 or %o2, %o1, %o1 |
|
658 4: |
|
659 xor %g5, %l7, %g1 ! ^= rk[0] |
|
660 add %l7, 1, %l7 |
|
661 movxtod %g1, %f2 |
|
662 srl %l7, 0, %l7 ! clruw |
|
663 prefetch [$out + 63], 22 |
|
664 prefetch [$inp + 16+63], 20 |
|
665 ___ |
|
666 $::code.=<<___ if ($alg eq "aes"); |
|
667 aes_eround01 %f16, %f14, %f2, %f4 |
|
668 aes_eround23 %f18, %f14, %f2, %f2 |
|
669 ___ |
|
670 $::code.=<<___ if ($alg eq "cmll"); |
|
671 camellia_f %f16, %f2, %f14, %f2 |
|
672 camellia_f %f18, %f14, %f2, %f0 |
|
673 ___ |
|
674 $::code.=<<___; |
|
675 call _${alg}${bits}_encrypt_1x+8 |
|
676 add $inp, 16, $inp |
|
677 |
|
678 movxtod %o0, %f10 |
|
679 movxtod %o1, %f12 |
|
680 fxor %f10, %f0, %f0 ! ^= inp |
|
681 fxor %f12, %f2, %f2 |
|
682 |
|
683 brnz,pn $ooff, 2f |
|
684 sub $len, 1, $len |
|
685 |
|
686 std %f0, [$out + 0] |
|
687 std %f2, [$out + 8] |
|
688 brnz,pt $len, .L${bits}_ctr32_loop2x |
|
689 add $out, 16, $out |
|
690 |
|
691 ret |
|
692 restore |
|
693 |
|
694 .align 16 |
|
695 2: ldxa [$inp]0x82, %o0 ! avoid read-after-write hazard |
|
696 ! and ~3x deterioration |
|
697 ! in inp==out case |
|
698 faligndata %f0, %f0, %f4 ! handle unaligned output |
|
699 faligndata %f0, %f2, %f6 |
|
700 faligndata %f2, %f2, %f8 |
|
701 stda %f4, [$out + $omask]0xc0 ! partial store |
|
702 std %f6, [$out + 8] |
|
703 add $out, 16, $out |
|
704 orn %g0, $omask, $omask |
|
705 stda %f8, [$out + $omask]0xc0 ! partial store |
|
706 |
|
707 brnz,pt $len, .L${bits}_ctr32_loop2x+4 |
|
708 orn %g0, $omask, $omask |
|
709 |
|
710 ret |
|
711 restore |
|
712 |
|
713 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
|
714 .align 32 |
|
715 .L${bits}_ctr32_loop2x: |
|
716 ldx [$inp + 0], %o0 |
|
717 ldx [$inp + 8], %o1 |
|
718 ldx [$inp + 16], %o2 |
|
719 brz,pt $ileft, 4f |
|
720 ldx [$inp + 24], %o3 |
|
721 |
|
722 ldx [$inp + 32], %o4 |
|
723 sllx %o0, $ileft, %o0 |
|
724 srlx %o1, $iright, %g1 |
|
725 or %g1, %o0, %o0 |
|
726 sllx %o1, $ileft, %o1 |
|
727 srlx %o2, $iright, %g1 |
|
728 or %g1, %o1, %o1 |
|
729 sllx %o2, $ileft, %o2 |
|
730 srlx %o3, $iright, %g1 |
|
731 or %g1, %o2, %o2 |
|
732 sllx %o3, $ileft, %o3 |
|
733 srlx %o4, $iright, %o4 |
|
734 or %o4, %o3, %o3 |
|
735 4: |
|
736 xor %g5, %l7, %g1 ! ^= rk[0] |
|
737 add %l7, 1, %l7 |
|
738 movxtod %g1, %f2 |
|
739 srl %l7, 0, %l7 ! clruw |
|
740 xor %g5, %l7, %g1 |
|
741 add %l7, 1, %l7 |
|
742 movxtod %g1, %f6 |
|
743 srl %l7, 0, %l7 ! clruw |
|
744 prefetch [$out + 63], 22 |
|
745 prefetch [$inp + 32+63], 20 |
|
746 ___ |
|
747 $::code.=<<___ if ($alg eq "aes"); |
|
748 aes_eround01 %f16, %f14, %f2, %f8 |
|
749 aes_eround23 %f18, %f14, %f2, %f2 |
|
750 aes_eround01 %f16, %f14, %f6, %f10 |
|
751 aes_eround23 %f18, %f14, %f6, %f6 |
|
752 ___ |
|
753 $::code.=<<___ if ($alg eq "cmll"); |
|
754 camellia_f %f16, %f2, %f14, %f2 |
|
755 camellia_f %f16, %f6, %f14, %f6 |
|
756 camellia_f %f18, %f14, %f2, %f0 |
|
757 camellia_f %f18, %f14, %f6, %f4 |
|
758 ___ |
|
759 $::code.=<<___; |
|
760 call _${alg}${bits}_encrypt_2x+16 |
|
761 add $inp, 32, $inp |
|
762 |
|
763 movxtod %o0, %f8 |
|
764 movxtod %o1, %f10 |
|
765 movxtod %o2, %f12 |
|
766 fxor %f8, %f0, %f0 ! ^= inp |
|
767 movxtod %o3, %f8 |
|
768 fxor %f10, %f2, %f2 |
|
769 fxor %f12, %f4, %f4 |
|
770 fxor %f8, %f6, %f6 |
|
771 |
|
772 brnz,pn $ooff, 2f |
|
773 sub $len, 2, $len |
|
774 |
|
775 std %f0, [$out + 0] |
|
776 std %f2, [$out + 8] |
|
777 std %f4, [$out + 16] |
|
778 std %f6, [$out + 24] |
|
779 brnz,pt $len, .L${bits}_ctr32_loop2x |
|
780 add $out, 32, $out |
|
781 |
|
782 ret |
|
783 restore |
|
784 |
|
785 .align 16 |
|
786 2: ldxa [$inp]0x82, %o0 ! avoid read-after-write hazard |
|
787 ! and ~3x deterioration |
|
788 ! in inp==out case |
|
789 faligndata %f0, %f0, %f8 ! handle unaligned output |
|
790 faligndata %f0, %f2, %f0 |
|
791 faligndata %f2, %f4, %f2 |
|
792 faligndata %f4, %f6, %f4 |
|
793 faligndata %f6, %f6, %f6 |
|
794 |
|
795 stda %f8, [$out + $omask]0xc0 ! partial store |
|
796 std %f0, [$out + 8] |
|
797 std %f2, [$out + 16] |
|
798 std %f4, [$out + 24] |
|
799 add $out, 32, $out |
|
800 orn %g0, $omask, $omask |
|
801 stda %f6, [$out + $omask]0xc0 ! partial store |
|
802 |
|
803 brnz,pt $len, .L${bits}_ctr32_loop2x+4 |
|
804 orn %g0, $omask, $omask |
|
805 |
|
806 ret |
|
807 restore |
|
808 |
|
809 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
|
810 .align 32 |
|
811 .L${bits}_ctr32_blk: |
|
812 add $out, $len, $blk_init |
|
813 and $blk_init, 63, $blk_init ! tail |
|
814 sub $len, $blk_init, $len |
|
815 add $blk_init, 15, $blk_init ! round up to 16n |
|
816 srlx $len, 4, $len |
|
817 srl $blk_init, 4, $blk_init |
|
818 sub $len, 1, $len |
|
819 add $blk_init, 1, $blk_init |
|
820 |
|
821 .L${bits}_ctr32_blk_loop2x: |
|
822 ldx [$inp + 0], %o0 |
|
823 ldx [$inp + 8], %o1 |
|
824 ldx [$inp + 16], %o2 |
|
825 brz,pt $ileft, 5f |
|
826 ldx [$inp + 24], %o3 |
|
827 |
|
828 ldx [$inp + 32], %o4 |
|
829 sllx %o0, $ileft, %o0 |
|
830 srlx %o1, $iright, %g1 |
|
831 or %g1, %o0, %o0 |
|
832 sllx %o1, $ileft, %o1 |
|
833 srlx %o2, $iright, %g1 |
|
834 or %g1, %o1, %o1 |
|
835 sllx %o2, $ileft, %o2 |
|
836 srlx %o3, $iright, %g1 |
|
837 or %g1, %o2, %o2 |
|
838 sllx %o3, $ileft, %o3 |
|
839 srlx %o4, $iright, %o4 |
|
840 or %o4, %o3, %o3 |
|
841 5: |
|
842 xor %g5, %l7, %g1 ! ^= rk[0] |
|
843 add %l7, 1, %l7 |
|
844 movxtod %g1, %f2 |
|
845 srl %l7, 0, %l7 ! clruw |
|
846 xor %g5, %l7, %g1 |
|
847 add %l7, 1, %l7 |
|
848 movxtod %g1, %f6 |
|
849 srl %l7, 0, %l7 ! clruw |
|
850 prefetch [$inp + 32+63], 20 |
|
851 ___ |
|
852 $::code.=<<___ if ($alg eq "aes"); |
|
853 aes_eround01 %f16, %f14, %f2, %f8 |
|
854 aes_eround23 %f18, %f14, %f2, %f2 |
|
855 aes_eround01 %f16, %f14, %f6, %f10 |
|
856 aes_eround23 %f18, %f14, %f6, %f6 |
|
857 ___ |
|
858 $::code.=<<___ if ($alg eq "cmll"); |
|
859 camellia_f %f16, %f2, %f14, %f2 |
|
860 camellia_f %f16, %f6, %f14, %f6 |
|
861 camellia_f %f18, %f14, %f2, %f0 |
|
862 camellia_f %f18, %f14, %f6, %f4 |
|
863 ___ |
|
864 $::code.=<<___; |
|
865 call _${alg}${bits}_encrypt_2x+16 |
|
866 add $inp, 32, $inp |
|
867 subcc $len, 2, $len |
|
868 |
|
869 movxtod %o0, %f8 |
|
870 movxtod %o1, %f10 |
|
871 movxtod %o2, %f12 |
|
872 fxor %f8, %f0, %f0 ! ^= inp |
|
873 movxtod %o3, %f8 |
|
874 fxor %f10, %f2, %f2 |
|
875 fxor %f12, %f4, %f4 |
|
876 fxor %f8, %f6, %f6 |
|
877 |
|
878 stda %f0, [$out]0xe2 ! ASI_BLK_INIT, T4-specific |
|
879 add $out, 8, $out |
|
880 stda %f2, [$out]0xe2 ! ASI_BLK_INIT, T4-specific |
|
881 add $out, 8, $out |
|
882 stda %f4, [$out]0xe2 ! ASI_BLK_INIT, T4-specific |
|
883 add $out, 8, $out |
|
884 stda %f6, [$out]0xe2 ! ASI_BLK_INIT, T4-specific |
|
885 bgu,pt $::size_t_cc, .L${bits}_ctr32_blk_loop2x |
|
886 add $out, 8, $out |
|
887 |
|
888 add $blk_init, $len, $len |
|
889 andcc $len, 1, %g0 ! is number of blocks even? |
|
890 membar #StoreLoad|#StoreStore |
|
891 bnz,pt %icc, .L${bits}_ctr32_loop |
|
892 srl $len, 0, $len |
|
893 brnz,pn $len, .L${bits}_ctr32_loop2x |
|
894 nop |
|
895 |
|
896 ret |
|
897 restore |
|
898 .type ${alg}${bits}_t4_ctr32_encrypt,#function |
|
899 .size ${alg}${bits}_t4_ctr32_encrypt,.-${alg}${bits}_t4_ctr32_encrypt |
|
900 ___ |
|
901 } |
|
902 |
|
903 sub alg_xts_implement { |
|
904 my ($alg,$bits,$dir) = @_; |
|
905 my ($inp,$out,$len,$key1,$key2,$ivec)=map("%i$_",(0..5)); |
|
906 my $rem=$ivec; |
|
907 |
|
908 $::code.=<<___; |
|
909 .globl ${alg}${bits}_t4_xts_${dir}crypt |
|
910 .align 32 |
|
911 ${alg}${bits}_t4_xts_${dir}crypt: |
|
912 save %sp, -$::frame-16, %sp |
|
913 |
|
914 mov $ivec, %o0 |
|
915 add %fp, $::bias-16, %o1 |
|
916 call ${alg}_t4_encrypt |
|
917 mov $key2, %o2 |
|
918 |
|
919 add %fp, $::bias-16, %l7 |
|
920 ldxa [%l7]0x88, %g2 |
|
921 add %fp, $::bias-8, %l7 |
|
922 ldxa [%l7]0x88, %g3 ! %g3:%g2 is tweak |
|
923 |
|
924 sethi %hi(0x76543210), %l7 |
|
925 or %l7, %lo(0x76543210), %l7 |
|
926 bmask %l7, %g0, %g0 ! byte swap mask |
|
927 |
|
928 prefetch [$inp], 20 |
|
929 prefetch [$inp + 63], 20 |
|
930 call _${alg}${bits}_load_${dir}ckey |
|
931 and $len, 15, $rem |
|
932 and $len, -16, $len |
|
933 ___ |
|
934 $code.=<<___ if ($dir eq "de"); |
|
935 mov 0, %l7 |
|
936 movrnz $rem, 16, %l7 |
|
937 sub $len, %l7, $len |
|
938 ___ |
|
939 $code.=<<___; |
|
940 |
|
941 sub $inp, $out, $blk_init ! $inp!=$out |
|
942 and $inp, 7, $ileft |
|
943 andn $inp, 7, $inp |
|
944 sll $ileft, 3, $ileft |
|
945 mov 64, $iright |
|
946 mov 0xff, $omask |
|
947 sub $iright, $ileft, $iright |
|
948 and $out, 7, $ooff |
|
949 cmp $len, 255 |
|
950 movrnz $ooff, 0, $blk_init ! if ( $out&7 || |
|
951 movleu $::size_t_cc, 0, $blk_init ! $len<256 || |
|
952 brnz,pn $blk_init, .L${bits}_xts_${dir}blk ! $inp==$out) |
|
953 srl $omask, $ooff, $omask |
|
954 |
|
955 andcc $len, 16, %g0 ! is number of blocks even? |
|
956 ___ |
|
957 $code.=<<___ if ($dir eq "de"); |
|
958 brz,pn $len, .L${bits}_xts_${dir}steal |
|
959 ___ |
|
960 $code.=<<___; |
|
961 alignaddrl $out, %g0, $out |
|
962 bz %icc, .L${bits}_xts_${dir}loop2x |
|
963 srlx $len, 4, $len |
|
964 .L${bits}_xts_${dir}loop: |
|
965 ldx [$inp + 0], %o0 |
|
966 brz,pt $ileft, 4f |
|
967 ldx [$inp + 8], %o1 |
|
968 |
|
969 ldx [$inp + 16], %o2 |
|
970 sllx %o0, $ileft, %o0 |
|
971 srlx %o1, $iright, %g1 |
|
972 sllx %o1, $ileft, %o1 |
|
973 or %g1, %o0, %o0 |
|
974 srlx %o2, $iright, %o2 |
|
975 or %o2, %o1, %o1 |
|
976 4: |
|
977 movxtod %g2, %f12 |
|
978 movxtod %g3, %f14 |
|
979 bshuffle %f12, %f12, %f12 |
|
980 bshuffle %f14, %f14, %f14 |
|
981 |
|
982 xor %g4, %o0, %o0 ! ^= rk[0] |
|
983 xor %g5, %o1, %o1 |
|
984 movxtod %o0, %f0 |
|
985 movxtod %o1, %f2 |
|
986 |
|
987 fxor %f12, %f0, %f0 ! ^= tweak[0] |
|
988 fxor %f14, %f2, %f2 |
|
989 |
|
990 prefetch [$out + 63], 22 |
|
991 prefetch [$inp + 16+63], 20 |
|
992 call _${alg}${bits}_${dir}crypt_1x |
|
993 add $inp, 16, $inp |
|
994 |
|
995 fxor %f12, %f0, %f0 ! ^= tweak[0] |
|
996 fxor %f14, %f2, %f2 |
|
997 |
|
998 srax %g3, 63, %l7 ! next tweak value |
|
999 addcc %g2, %g2, %g2 |
|
1000 and %l7, 0x87, %l7 |
|
1001 addxc %g3, %g3, %g3 |
|
1002 xor %l7, %g2, %g2 |
|
1003 |
|
1004 brnz,pn $ooff, 2f |
|
1005 sub $len, 1, $len |
|
1006 |
|
1007 std %f0, [$out + 0] |
|
1008 std %f2, [$out + 8] |
|
1009 brnz,pt $len, .L${bits}_xts_${dir}loop2x |
|
1010 add $out, 16, $out |
|
1011 |
|
1012 brnz,pn $rem, .L${bits}_xts_${dir}steal |
|
1013 nop |
|
1014 |
|
1015 ret |
|
1016 restore |
|
1017 |
|
1018 .align 16 |
|
1019 2: ldxa [$inp]0x82, %o0 ! avoid read-after-write hazard |
|
1020 ! and ~3x deterioration |
|
1021 ! in inp==out case |
|
1022 faligndata %f0, %f0, %f4 ! handle unaligned output |
|
1023 faligndata %f0, %f2, %f6 |
|
1024 faligndata %f2, %f2, %f8 |
|
1025 stda %f4, [$out + $omask]0xc0 ! partial store |
|
1026 std %f6, [$out + 8] |
|
1027 add $out, 16, $out |
|
1028 orn %g0, $omask, $omask |
|
1029 stda %f8, [$out + $omask]0xc0 ! partial store |
|
1030 |
|
1031 brnz,pt $len, .L${bits}_xts_${dir}loop2x+4 |
|
1032 orn %g0, $omask, $omask |
|
1033 |
|
1034 brnz,pn $rem, .L${bits}_xts_${dir}steal |
|
1035 nop |
|
1036 |
|
1037 ret |
|
1038 restore |
|
1039 |
|
1040 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
|
1041 .align 32 |
|
1042 .L${bits}_xts_${dir}loop2x: |
|
1043 ldx [$inp + 0], %o0 |
|
1044 ldx [$inp + 8], %o1 |
|
1045 ldx [$inp + 16], %o2 |
|
1046 brz,pt $ileft, 4f |
|
1047 ldx [$inp + 24], %o3 |
|
1048 |
|
1049 ldx [$inp + 32], %o4 |
|
1050 sllx %o0, $ileft, %o0 |
|
1051 srlx %o1, $iright, %g1 |
|
1052 or %g1, %o0, %o0 |
|
1053 sllx %o1, $ileft, %o1 |
|
1054 srlx %o2, $iright, %g1 |
|
1055 or %g1, %o1, %o1 |
|
1056 sllx %o2, $ileft, %o2 |
|
1057 srlx %o3, $iright, %g1 |
|
1058 or %g1, %o2, %o2 |
|
1059 sllx %o3, $ileft, %o3 |
|
1060 srlx %o4, $iright, %o4 |
|
1061 or %o4, %o3, %o3 |
|
1062 4: |
|
1063 movxtod %g2, %f12 |
|
1064 movxtod %g3, %f14 |
|
1065 bshuffle %f12, %f12, %f12 |
|
1066 bshuffle %f14, %f14, %f14 |
|
1067 |
|
1068 srax %g3, 63, %l7 ! next tweak value |
|
1069 addcc %g2, %g2, %g2 |
|
1070 and %l7, 0x87, %l7 |
|
1071 addxc %g3, %g3, %g3 |
|
1072 xor %l7, %g2, %g2 |
|
1073 |
|
1074 movxtod %g2, %f8 |
|
1075 movxtod %g3, %f10 |
|
1076 bshuffle %f8, %f8, %f8 |
|
1077 bshuffle %f10, %f10, %f10 |
|
1078 |
|
1079 xor %g4, %o0, %o0 ! ^= rk[0] |
|
1080 xor %g5, %o1, %o1 |
|
1081 xor %g4, %o2, %o2 ! ^= rk[0] |
|
1082 xor %g5, %o3, %o3 |
|
1083 movxtod %o0, %f0 |
|
1084 movxtod %o1, %f2 |
|
1085 movxtod %o2, %f4 |
|
1086 movxtod %o3, %f6 |
|
1087 |
|
1088 fxor %f12, %f0, %f0 ! ^= tweak[0] |
|
1089 fxor %f14, %f2, %f2 |
|
1090 fxor %f8, %f4, %f4 ! ^= tweak[0] |
|
1091 fxor %f10, %f6, %f6 |
|
1092 |
|
1093 prefetch [$out + 63], 22 |
|
1094 prefetch [$inp + 32+63], 20 |
|
1095 call _${alg}${bits}_${dir}crypt_2x |
|
1096 add $inp, 32, $inp |
|
1097 |
|
1098 movxtod %g2, %f8 |
|
1099 movxtod %g3, %f10 |
|
1100 |
|
1101 srax %g3, 63, %l7 ! next tweak value |
|
1102 addcc %g2, %g2, %g2 |
|
1103 and %l7, 0x87, %l7 |
|
1104 addxc %g3, %g3, %g3 |
|
1105 xor %l7, %g2, %g2 |
|
1106 |
|
1107 bshuffle %f8, %f8, %f8 |
|
1108 bshuffle %f10, %f10, %f10 |
|
1109 |
|
1110 fxor %f12, %f0, %f0 ! ^= tweak[0] |
|
1111 fxor %f14, %f2, %f2 |
|
1112 fxor %f8, %f4, %f4 |
|
1113 fxor %f10, %f6, %f6 |
|
1114 |
|
1115 brnz,pn $ooff, 2f |
|
1116 sub $len, 2, $len |
|
1117 |
|
1118 std %f0, [$out + 0] |
|
1119 std %f2, [$out + 8] |
|
1120 std %f4, [$out + 16] |
|
1121 std %f6, [$out + 24] |
|
1122 brnz,pt $len, .L${bits}_xts_${dir}loop2x |
|
1123 add $out, 32, $out |
|
1124 |
|
1125 fsrc2 %f4, %f0 |
|
1126 fsrc2 %f6, %f2 |
|
1127 brnz,pn $rem, .L${bits}_xts_${dir}steal |
|
1128 nop |
|
1129 |
|
1130 ret |
|
1131 restore |
|
1132 |
|
1133 .align 16 |
|
1134 2: ldxa [$inp]0x82, %o0 ! avoid read-after-write hazard |
|
1135 ! and ~3x deterioration |
|
1136 ! in inp==out case |
|
1137 faligndata %f0, %f0, %f8 ! handle unaligned output |
|
1138 faligndata %f0, %f2, %f10 |
|
1139 faligndata %f2, %f4, %f12 |
|
1140 faligndata %f4, %f6, %f14 |
|
1141 faligndata %f6, %f6, %f0 |
|
1142 |
|
1143 stda %f8, [$out + $omask]0xc0 ! partial store |
|
1144 std %f10, [$out + 8] |
|
1145 std %f12, [$out + 16] |
|
1146 std %f14, [$out + 24] |
|
1147 add $out, 32, $out |
|
1148 orn %g0, $omask, $omask |
|
1149 stda %f0, [$out + $omask]0xc0 ! partial store |
|
1150 |
|
1151 brnz,pt $len, .L${bits}_xts_${dir}loop2x+4 |
|
1152 orn %g0, $omask, $omask |
|
1153 |
|
1154 fsrc2 %f4, %f0 |
|
1155 fsrc2 %f6, %f2 |
|
1156 brnz,pn $rem, .L${bits}_xts_${dir}steal |
|
1157 nop |
|
1158 |
|
1159 ret |
|
1160 restore |
|
1161 |
|
1162 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
|
1163 .align 32 |
|
1164 .L${bits}_xts_${dir}blk: |
|
1165 add $out, $len, $blk_init |
|
1166 and $blk_init, 63, $blk_init ! tail |
|
1167 sub $len, $blk_init, $len |
|
1168 add $blk_init, 15, $blk_init ! round up to 16n |
|
1169 srlx $len, 4, $len |
|
1170 srl $blk_init, 4, $blk_init |
|
1171 sub $len, 1, $len |
|
1172 add $blk_init, 1, $blk_init |
|
1173 |
|
1174 .L${bits}_xts_${dir}blk2x: |
|
1175 ldx [$inp + 0], %o0 |
|
1176 ldx [$inp + 8], %o1 |
|
1177 ldx [$inp + 16], %o2 |
|
1178 brz,pt $ileft, 5f |
|
1179 ldx [$inp + 24], %o3 |
|
1180 |
|
1181 ldx [$inp + 32], %o4 |
|
1182 sllx %o0, $ileft, %o0 |
|
1183 srlx %o1, $iright, %g1 |
|
1184 or %g1, %o0, %o0 |
|
1185 sllx %o1, $ileft, %o1 |
|
1186 srlx %o2, $iright, %g1 |
|
1187 or %g1, %o1, %o1 |
|
1188 sllx %o2, $ileft, %o2 |
|
1189 srlx %o3, $iright, %g1 |
|
1190 or %g1, %o2, %o2 |
|
1191 sllx %o3, $ileft, %o3 |
|
1192 srlx %o4, $iright, %o4 |
|
1193 or %o4, %o3, %o3 |
|
1194 5: |
|
1195 movxtod %g2, %f12 |
|
1196 movxtod %g3, %f14 |
|
1197 bshuffle %f12, %f12, %f12 |
|
1198 bshuffle %f14, %f14, %f14 |
|
1199 |
|
1200 srax %g3, 63, %l7 ! next tweak value |
|
1201 addcc %g2, %g2, %g2 |
|
1202 and %l7, 0x87, %l7 |
|
1203 addxc %g3, %g3, %g3 |
|
1204 xor %l7, %g2, %g2 |
|
1205 |
|
1206 movxtod %g2, %f8 |
|
1207 movxtod %g3, %f10 |
|
1208 bshuffle %f8, %f8, %f8 |
|
1209 bshuffle %f10, %f10, %f10 |
|
1210 |
|
1211 xor %g4, %o0, %o0 ! ^= rk[0] |
|
1212 xor %g5, %o1, %o1 |
|
1213 xor %g4, %o2, %o2 ! ^= rk[0] |
|
1214 xor %g5, %o3, %o3 |
|
1215 movxtod %o0, %f0 |
|
1216 movxtod %o1, %f2 |
|
1217 movxtod %o2, %f4 |
|
1218 movxtod %o3, %f6 |
|
1219 |
|
1220 fxor %f12, %f0, %f0 ! ^= tweak[0] |
|
1221 fxor %f14, %f2, %f2 |
|
1222 fxor %f8, %f4, %f4 ! ^= tweak[0] |
|
1223 fxor %f10, %f6, %f6 |
|
1224 |
|
1225 prefetch [$inp + 32+63], 20 |
|
1226 call _${alg}${bits}_${dir}crypt_2x |
|
1227 add $inp, 32, $inp |
|
1228 |
|
1229 movxtod %g2, %f8 |
|
1230 movxtod %g3, %f10 |
|
1231 |
|
1232 srax %g3, 63, %l7 ! next tweak value |
|
1233 addcc %g2, %g2, %g2 |
|
1234 and %l7, 0x87, %l7 |
|
1235 addxc %g3, %g3, %g3 |
|
1236 xor %l7, %g2, %g2 |
|
1237 |
|
1238 bshuffle %f8, %f8, %f8 |
|
1239 bshuffle %f10, %f10, %f10 |
|
1240 |
|
1241 fxor %f12, %f0, %f0 ! ^= tweak[0] |
|
1242 fxor %f14, %f2, %f2 |
|
1243 fxor %f8, %f4, %f4 |
|
1244 fxor %f10, %f6, %f6 |
|
1245 |
|
1246 stda %f0, [$out]0xe2 ! ASI_BLK_INIT, T4-specific |
|
1247 add $out, 8, $out |
|
1248 stda %f2, [$out]0xe2 ! ASI_BLK_INIT, T4-specific |
|
1249 add $out, 8, $out |
|
1250 stda %f4, [$out]0xe2 ! ASI_BLK_INIT, T4-specific |
|
1251 add $out, 8, $out |
|
1252 stda %f6, [$out]0xe2 ! ASI_BLK_INIT, T4-specific |
|
1253 bgu,pt $::size_t_cc, .L${bits}_xts_${dir}blk2x |
|
1254 add $out, 8, $out |
|
1255 |
|
1256 add $blk_init, $len, $len |
|
1257 andcc $len, 1, %g0 ! is number of blocks even? |
|
1258 membar #StoreLoad|#StoreStore |
|
1259 bnz,pt %icc, .L${bits}_xts_${dir}loop |
|
1260 srl $len, 0, $len |
|
1261 brnz,pn $len, .L${bits}_xts_${dir}loop2x |
|
1262 nop |
|
1263 |
|
1264 fsrc2 %f4, %f0 |
|
1265 fsrc2 %f6, %f2 |
|
1266 brnz,pn $rem, .L${bits}_xts_${dir}steal |
|
1267 nop |
|
1268 |
|
1269 ret |
|
1270 restore |
|
1271 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
|
1272 ___ |
|
1273 $code.=<<___ if ($dir eq "en"); |
|
1274 .align 32 |
|
1275 .L${bits}_xts_${dir}steal: |
|
1276 std %f0, [%fp + $::bias-16] ! copy of output |
|
1277 std %f2, [%fp + $::bias-8] |
|
1278 |
|
1279 srl $ileft, 3, $ileft |
|
1280 add %fp, $::bias-16, %l7 |
|
1281 add $inp, $ileft, $inp ! original $inp+$len&-15 |
|
1282 add $out, $ooff, $out ! original $out+$len&-15 |
|
1283 mov 0, $ileft |
|
1284 nop ! align |
|
1285 |
|
1286 .L${bits}_xts_${dir}stealing: |
|
1287 ldub [$inp + $ileft], %o0 |
|
1288 ldub [%l7 + $ileft], %o1 |
|
1289 dec $rem |
|
1290 stb %o0, [%l7 + $ileft] |
|
1291 stb %o1, [$out + $ileft] |
|
1292 brnz $rem, .L${bits}_xts_${dir}stealing |
|
1293 inc $ileft |
|
1294 |
|
1295 mov %l7, $inp |
|
1296 sub $out, 16, $out |
|
1297 mov 0, $ileft |
|
1298 sub $out, $ooff, $out |
|
1299 ba .L${bits}_xts_${dir}loop ! one more time |
|
1300 mov 1, $len ! $rem is 0 |
|
1301 ___ |
|
1302 $code.=<<___ if ($dir eq "de"); |
|
1303 .align 32 |
|
1304 .L${bits}_xts_${dir}steal: |
|
1305 ldx [$inp + 0], %o0 |
|
1306 brz,pt $ileft, 8f |
|
1307 ldx [$inp + 8], %o1 |
|
1308 |
|
1309 ldx [$inp + 16], %o2 |
|
1310 sllx %o0, $ileft, %o0 |
|
1311 srlx %o1, $iright, %g1 |
|
1312 sllx %o1, $ileft, %o1 |
|
1313 or %g1, %o0, %o0 |
|
1314 srlx %o2, $iright, %o2 |
|
1315 or %o2, %o1, %o1 |
|
1316 8: |
|
1317 srax %g3, 63, %l7 ! next tweak value |
|
1318 addcc %g2, %g2, %o2 |
|
1319 and %l7, 0x87, %l7 |
|
1320 addxc %g3, %g3, %o3 |
|
1321 xor %l7, %o2, %o2 |
|
1322 |
|
1323 movxtod %o2, %f12 |
|
1324 movxtod %o3, %f14 |
|
1325 bshuffle %f12, %f12, %f12 |
|
1326 bshuffle %f14, %f14, %f14 |
|
1327 |
|
1328 xor %g4, %o0, %o0 ! ^= rk[0] |
|
1329 xor %g5, %o1, %o1 |
|
1330 movxtod %o0, %f0 |
|
1331 movxtod %o1, %f2 |
|
1332 |
|
1333 fxor %f12, %f0, %f0 ! ^= tweak[0] |
|
1334 fxor %f14, %f2, %f2 |
|
1335 |
|
1336 call _${alg}${bits}_${dir}crypt_1x |
|
1337 add $inp, 16, $inp |
|
1338 |
|
1339 fxor %f12, %f0, %f0 ! ^= tweak[0] |
|
1340 fxor %f14, %f2, %f2 |
|
1341 |
|
1342 std %f0, [%fp + $::bias-16] |
|
1343 std %f2, [%fp + $::bias-8] |
|
1344 |
|
1345 srl $ileft, 3, $ileft |
|
1346 add %fp, $::bias-16, %l7 |
|
1347 add $inp, $ileft, $inp ! original $inp+$len&-15 |
|
1348 add $out, $ooff, $out ! original $out+$len&-15 |
|
1349 mov 0, $ileft |
|
1350 add $out, 16, $out |
|
1351 nop ! align |
|
1352 |
|
1353 .L${bits}_xts_${dir}stealing: |
|
1354 ldub [$inp + $ileft], %o0 |
|
1355 ldub [%l7 + $ileft], %o1 |
|
1356 dec $rem |
|
1357 stb %o0, [%l7 + $ileft] |
|
1358 stb %o1, [$out + $ileft] |
|
1359 brnz $rem, .L${bits}_xts_${dir}stealing |
|
1360 inc $ileft |
|
1361 |
|
1362 mov %l7, $inp |
|
1363 sub $out, 16, $out |
|
1364 mov 0, $ileft |
|
1365 sub $out, $ooff, $out |
|
1366 ba .L${bits}_xts_${dir}loop ! one more time |
|
1367 mov 1, $len ! $rem is 0 |
|
1368 ___ |
|
1369 $code.=<<___; |
|
1370 ret |
|
1371 restore |
|
1372 .type ${alg}${bits}_t4_xts_${dir}crypt,#function |
|
1373 .size ${alg}${bits}_t4_xts_${dir}crypt,.-${alg}${bits}_t4_xts_${dir}crypt |
|
1374 ___ |
|
1375 } |
|
1376 |
|
1377 # Purpose of these subroutines is to explicitly encode VIS instructions, |
|
1378 # so that one can compile the module without having to specify VIS |
|
1379 # extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a. |
|
1380 # Idea is to reserve for option to produce "universal" binary and let |
|
1381 # programmer detect if current CPU is VIS capable at run-time. |
|
1382 sub unvis { |
|
1383 my ($mnemonic,$rs1,$rs2,$rd)=@_; |
|
1384 my ($ref,$opf); |
|
1385 my %visopf = ( "faligndata" => 0x048, |
|
1386 "bshuffle" => 0x04c, |
|
1387 "fnot2" => 0x066, |
|
1388 "fxor" => 0x06c, |
|
1389 "fsrc2" => 0x078 ); |
|
1390 |
|
1391 $ref = "$mnemonic\t$rs1,$rs2,$rd"; |
|
1392 |
|
1393 if ($opf=$visopf{$mnemonic}) { |
|
1394 foreach ($rs1,$rs2,$rd) { |
|
1395 return $ref if (!/%f([0-9]{1,2})/); |
|
1396 $_=$1; |
|
1397 if ($1>=32) { |
|
1398 return $ref if ($1&1); |
|
1399 # re-encode for upper double register addressing |
|
1400 $_=($1|$1>>5)&31; |
|
1401 } |
|
1402 } |
|
1403 |
|
1404 return sprintf ".word\t0x%08x !%s", |
|
1405 0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2, |
|
1406 $ref; |
|
1407 } else { |
|
1408 return $ref; |
|
1409 } |
|
1410 } |
|
1411 |
|
1412 sub unvis3 { |
|
1413 my ($mnemonic,$rs1,$rs2,$rd)=@_; |
|
1414 my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 ); |
|
1415 my ($ref,$opf); |
|
1416 my %visopf = ( "addxc" => 0x011, |
|
1417 "addxccc" => 0x013, |
|
1418 "umulxhi" => 0x016, |
|
1419 "alignaddr" => 0x018, |
|
1420 "bmask" => 0x019, |
|
1421 "alignaddrl" => 0x01a ); |
|
1422 |
|
1423 $ref = "$mnemonic\t$rs1,$rs2,$rd"; |
|
1424 |
|
1425 if ($opf=$visopf{$mnemonic}) { |
|
1426 foreach ($rs1,$rs2,$rd) { |
|
1427 return $ref if (!/%([goli])([0-9])/); |
|
1428 $_=$bias{$1}+$2; |
|
1429 } |
|
1430 |
|
1431 return sprintf ".word\t0x%08x !%s", |
|
1432 0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2, |
|
1433 $ref; |
|
1434 } else { |
|
1435 return $ref; |
|
1436 } |
|
1437 } |
|
1438 |
|
1439 sub unaes_round { # 4-argument instructions |
|
1440 my ($mnemonic,$rs1,$rs2,$rs3,$rd)=@_; |
|
1441 my ($ref,$opf); |
|
1442 my %aesopf = ( "aes_eround01" => 0, |
|
1443 "aes_eround23" => 1, |
|
1444 "aes_dround01" => 2, |
|
1445 "aes_dround23" => 3, |
|
1446 "aes_eround01_l"=> 4, |
|
1447 "aes_eround23_l"=> 5, |
|
1448 "aes_dround01_l"=> 6, |
|
1449 "aes_dround23_l"=> 7, |
|
1450 "aes_kexpand1" => 8 ); |
|
1451 |
|
1452 $ref = "$mnemonic\t$rs1,$rs2,$rs3,$rd"; |
|
1453 |
|
1454 if (defined($opf=$aesopf{$mnemonic})) { |
|
1455 $rs3 = ($rs3 =~ /%f([0-6]*[02468])/) ? (($1|$1>>5)&31) : $rs3; |
|
1456 foreach ($rs1,$rs2,$rd) { |
|
1457 return $ref if (!/%f([0-9]{1,2})/); |
|
1458 $_=$1; |
|
1459 if ($1>=32) { |
|
1460 return $ref if ($1&1); |
|
1461 # re-encode for upper double register addressing |
|
1462 $_=($1|$1>>5)&31; |
|
1463 } |
|
1464 } |
|
1465 |
|
1466 return sprintf ".word\t0x%08x !%s", |
|
1467 2<<30|$rd<<25|0x19<<19|$rs1<<14|$rs3<<9|$opf<<5|$rs2, |
|
1468 $ref; |
|
1469 } else { |
|
1470 return $ref; |
|
1471 } |
|
1472 } |
|
1473 |
|
1474 sub unaes_kexpand { # 3-argument instructions |
|
1475 my ($mnemonic,$rs1,$rs2,$rd)=@_; |
|
1476 my ($ref,$opf); |
|
1477 my %aesopf = ( "aes_kexpand0" => 0x130, |
|
1478 "aes_kexpand2" => 0x131 ); |
|
1479 |
|
1480 $ref = "$mnemonic\t$rs1,$rs2,$rd"; |
|
1481 |
|
1482 if (defined($opf=$aesopf{$mnemonic})) { |
|
1483 foreach ($rs1,$rs2,$rd) { |
|
1484 return $ref if (!/%f([0-9]{1,2})/); |
|
1485 $_=$1; |
|
1486 if ($1>=32) { |
|
1487 return $ref if ($1&1); |
|
1488 # re-encode for upper double register addressing |
|
1489 $_=($1|$1>>5)&31; |
|
1490 } |
|
1491 } |
|
1492 |
|
1493 return sprintf ".word\t0x%08x !%s", |
|
1494 2<<30|$rd<<25|0x36<<19|$rs1<<14|$opf<<5|$rs2, |
|
1495 $ref; |
|
1496 } else { |
|
1497 return $ref; |
|
1498 } |
|
1499 } |
|
1500 |
|
1501 sub uncamellia_f { # 4-argument instructions |
|
1502 my ($mnemonic,$rs1,$rs2,$rs3,$rd)=@_; |
|
1503 my ($ref,$opf); |
|
1504 |
|
1505 $ref = "$mnemonic\t$rs1,$rs2,$rs3,$rd"; |
|
1506 |
|
1507 if (1) { |
|
1508 $rs3 = ($rs3 =~ /%f([0-6]*[02468])/) ? (($1|$1>>5)&31) : $rs3; |
|
1509 foreach ($rs1,$rs2,$rd) { |
|
1510 return $ref if (!/%f([0-9]{1,2})/); |
|
1511 $_=$1; |
|
1512 if ($1>=32) { |
|
1513 return $ref if ($1&1); |
|
1514 # re-encode for upper double register addressing |
|
1515 $_=($1|$1>>5)&31; |
|
1516 } |
|
1517 } |
|
1518 |
|
1519 return sprintf ".word\t0x%08x !%s", |
|
1520 2<<30|$rd<<25|0x19<<19|$rs1<<14|$rs3<<9|0xc<<5|$rs2, |
|
1521 $ref; |
|
1522 } else { |
|
1523 return $ref; |
|
1524 } |
|
1525 } |
|
1526 |
|
1527 sub uncamellia3 { # 3-argument instructions |
|
1528 my ($mnemonic,$rs1,$rs2,$rd)=@_; |
|
1529 my ($ref,$opf); |
|
1530 my %cmllopf = ( "camellia_fl" => 0x13c, |
|
1531 "camellia_fli" => 0x13d ); |
|
1532 |
|
1533 $ref = "$mnemonic\t$rs1,$rs2,$rd"; |
|
1534 |
|
1535 if (defined($opf=$cmllopf{$mnemonic})) { |
|
1536 foreach ($rs1,$rs2,$rd) { |
|
1537 return $ref if (!/%f([0-9]{1,2})/); |
|
1538 $_=$1; |
|
1539 if ($1>=32) { |
|
1540 return $ref if ($1&1); |
|
1541 # re-encode for upper double register addressing |
|
1542 $_=($1|$1>>5)&31; |
|
1543 } |
|
1544 } |
|
1545 |
|
1546 return sprintf ".word\t0x%08x !%s", |
|
1547 2<<30|$rd<<25|0x36<<19|$rs1<<14|$opf<<5|$rs2, |
|
1548 $ref; |
|
1549 } else { |
|
1550 return $ref; |
|
1551 } |
|
1552 } |
|
1553 |
|
1554 sub unmovxtox { # 2-argument instructions |
|
1555 my ($mnemonic,$rs,$rd)=@_; |
|
1556 my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24, "f" => 0 ); |
|
1557 my ($ref,$opf); |
|
1558 my %movxopf = ( "movdtox" => 0x110, |
|
1559 "movstouw" => 0x111, |
|
1560 "movstosw" => 0x113, |
|
1561 "movxtod" => 0x118, |
|
1562 "movwtos" => 0x119 ); |
|
1563 |
|
1564 $ref = "$mnemonic\t$rs,$rd"; |
|
1565 |
|
1566 if (defined($opf=$movxopf{$mnemonic})) { |
|
1567 foreach ($rs,$rd) { |
|
1568 return $ref if (!/%([fgoli])([0-9]{1,2})/); |
|
1569 $_=$bias{$1}+$2; |
|
1570 if ($2>=32) { |
|
1571 return $ref if ($2&1); |
|
1572 # re-encode for upper double register addressing |
|
1573 $_=($2|$2>>5)&31; |
|
1574 } |
|
1575 } |
|
1576 |
|
1577 return sprintf ".word\t0x%08x !%s", |
|
1578 2<<30|$rd<<25|0x36<<19|$opf<<5|$rs, |
|
1579 $ref; |
|
1580 } else { |
|
1581 return $ref; |
|
1582 } |
|
1583 } |
|
1584 |
|
1585 sub undes { |
|
1586 my ($mnemonic)=shift; |
|
1587 my @args=@_; |
|
1588 my ($ref,$opf); |
|
1589 my %desopf = ( "des_round" => 0b1001, |
|
1590 "des_ip" => 0b100110100, |
|
1591 "des_iip" => 0b100110101, |
|
1592 "des_kexpand" => 0b100110110 ); |
|
1593 |
|
1594 $ref = "$mnemonic\t".join(",",@_); |
|
1595 |
|
1596 if (defined($opf=$desopf{$mnemonic})) { # 4-arg |
|
1597 if ($mnemonic eq "des_round") { |
|
1598 foreach (@args[0..3]) { |
|
1599 return $ref if (!/%f([0-9]{1,2})/); |
|
1600 $_=$1; |
|
1601 if ($1>=32) { |
|
1602 return $ref if ($1&1); |
|
1603 # re-encode for upper double register addressing |
|
1604 $_=($1|$1>>5)&31; |
|
1605 } |
|
1606 } |
|
1607 return sprintf ".word\t0x%08x !%s", |
|
1608 2<<30|0b011001<<19|$opf<<5|$args[0]<<14|$args[1]|$args[2]<<9|$args[3]<<25, |
|
1609 $ref; |
|
1610 } elsif ($mnemonic eq "des_kexpand") { # 3-arg |
|
1611 foreach (@args[0..2]) { |
|
1612 return $ref if (!/(%f)?([0-9]{1,2})/); |
|
1613 $_=$2; |
|
1614 if ($2>=32) { |
|
1615 return $ref if ($2&1); |
|
1616 # re-encode for upper double register addressing |
|
1617 $_=($2|$2>>5)&31; |
|
1618 } |
|
1619 } |
|
1620 return sprintf ".word\t0x%08x !%s", |
|
1621 2<<30|0b110110<<19|$opf<<5|$args[0]<<14|$args[1]|$args[2]<<25, |
|
1622 $ref; |
|
1623 } else { # 2-arg |
|
1624 foreach (@args[0..1]) { |
|
1625 return $ref if (!/%f([0-9]{1,2})/); |
|
1626 $_=$1; |
|
1627 if ($1>=32) { |
|
1628 return $ref if ($2&1); |
|
1629 # re-encode for upper double register addressing |
|
1630 $_=($1|$1>>5)&31; |
|
1631 } |
|
1632 } |
|
1633 return sprintf ".word\t0x%08x !%s", |
|
1634 2<<30|0b110110<<19|$opf<<5|$args[0]<<14|$args[1]<<25, |
|
1635 $ref; |
|
1636 } |
|
1637 } else { |
|
1638 return $ref; |
|
1639 } |
|
1640 } |
|
1641 |
|
1642 sub emit_assembler { |
|
1643 foreach (split("\n",$::code)) { |
|
1644 s/\`([^\`]*)\`/eval $1/ge; |
|
1645 |
|
1646 s/\b(f[a-z]+2[sd]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})\s*$/$1\t%f0,$2,$3/go; |
|
1647 |
|
1648 s/\b(aes_[edk][^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*([%fx0-9]+),\s*(%f[0-9]{1,2})/ |
|
1649 &unaes_round($1,$2,$3,$4,$5) |
|
1650 /geo or |
|
1651 s/\b(aes_kexpand[02])\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/ |
|
1652 &unaes_kexpand($1,$2,$3,$4) |
|
1653 /geo or |
|
1654 s/\b(camellia_f)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*([%fx0-9]+),\s*(%f[0-9]{1,2})/ |
|
1655 &uncamellia_f($1,$2,$3,$4,$5) |
|
1656 /geo or |
|
1657 s/\b(camellia_[^s]+)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/ |
|
1658 &uncamellia3($1,$2,$3,$4) |
|
1659 /geo or |
|
1660 s/\b(des_\w+)\s+(?<rs1>%f[0-9]{1,2}),\s*(?<rs2>[%fx0-9]+)(,\s*(?<rs3>%f[0-9]{1,2})(,\s*(?<rs4>%f[0-9]{1,2}))?)?/ |
|
1661 &undes($1,$+{rs1},$+{rs2},$+{rs3},$+{rs4}) |
|
1662 /geo or |
|
1663 s/\b(mov[ds]to\w+)\s+(%f[0-9]{1,2}),\s*(%[goli][0-7])/ |
|
1664 &unmovxtox($1,$2,$3) |
|
1665 /geo or |
|
1666 s/\b(mov[xw]to[ds])\s+(%[goli][0-7]),\s*(%f[0-9]{1,2})/ |
|
1667 &unmovxtox($1,$2,$3) |
|
1668 /geo or |
|
1669 s/\b([fb][^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/ |
|
1670 &unvis($1,$2,$3,$4) |
|
1671 /geo or |
|
1672 s/\b(umulxhi|bmask|addxc[c]{0,2}|alignaddr[l]*)\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/ |
|
1673 &unvis3($1,$2,$3,$4) |
|
1674 /geo; |
|
1675 |
|
1676 print $_,"\n"; |
|
1677 } |
|
1678 } |
|
1679 |
|
1680 1; |
|