components/openssl/openssl-1.0.1/inline-t4/dest4-sparcv9.pl
changeset 4822 1fb8a14c6702
parent 4821 54dafbe33fdb
child 4823 3ef8b7f4d9d8
equal deleted inserted replaced
4821:54dafbe33fdb 4822:1fb8a14c6702
     1 #!/usr/bin/env perl
       
     2 
       
     3 # ====================================================================
       
     4 # Written by David S. Miller <[email protected]> and Andy Polyakov
       
     5 # <[email protected]>. The module is licensed under 2-clause BSD
       
     6 # license. March 2013. All rights reserved.
       
     7 # ====================================================================
       
     8 
       
     9 ######################################################################
       
    10 # DES for SPARC T4.
       
    11 #
       
    12 # As with other hardware-assisted ciphers CBC encrypt results [for
       
    13 # aligned data] are virtually identical to critical path lengths:
       
    14 #
       
    15 #		DES		Triple-DES
       
    16 # CBC encrypt	4.14/4.15(*)	11.7/11.7
       
    17 # CBC decrypt	1.77/4.11(**)	6.42/7.47
       
    18 #
       
    19 #			 (*)	numbers after slash are for
       
    20 #				misaligned data;
       
    21 #			 (**)	this is result for largest
       
    22 #				block size, unlike all other
       
    23 #				cases smaller blocks results
       
    24 #				are better[?];
       
    25 
       
    26 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
       
    27 push(@INC,"${dir}","${dir}../../perlasm");
       
    28 require "sparcv9_modes.pl";
       
    29 
       
    30 &asm_init(@ARGV);
       
    31 
       
    32 $code.=<<___ if ($::abibits==64);
       
    33 .register       %g2,#scratch
       
    34 .register       %g3,#scratch
       
    35 ___
       
    36 
       
    37 $code.=<<___;
       
    38 .text
       
    39 ___
       
    40 
       
    41 { my ($inp,$out)=("%o0","%o1");
       
    42 
       
    43 $code.=<<___;
       
    44 .align	32
       
    45 .globl	des_t4_key_expand
       
    46 .type	des_t4_key_expand,#function
       
    47 des_t4_key_expand:
       
    48 	andcc		$inp, 0x7, %g0
       
    49 	alignaddr	$inp, %g0, $inp
       
    50 	bz,pt		%icc, 1f
       
    51 	ldd		[$inp + 0x00], %f0
       
    52 	ldd		[$inp + 0x08], %f2
       
    53 	faligndata	%f0, %f2, %f0
       
    54 1:	des_kexpand	%f0, 0, %f0
       
    55 	des_kexpand	%f0, 1, %f2
       
    56 	std		%f0, [$out + 0x00]
       
    57 	des_kexpand	%f2, 3, %f6
       
    58 	std		%f2, [$out + 0x08]
       
    59 	des_kexpand	%f2, 2, %f4
       
    60 	des_kexpand	%f6, 3, %f10
       
    61 	std		%f6, [$out + 0x18]
       
    62 	des_kexpand	%f6, 2, %f8
       
    63 	std		%f4, [$out + 0x10]
       
    64 	des_kexpand	%f10, 3, %f14
       
    65 	std		%f10, [$out + 0x28]
       
    66 	des_kexpand	%f10, 2, %f12
       
    67 	std		%f8, [$out + 0x20]
       
    68 	des_kexpand	%f14, 1, %f16
       
    69 	std		%f14, [$out + 0x38]
       
    70 	des_kexpand	%f16, 3, %f20
       
    71 	std		%f12, [$out + 0x30]
       
    72 	des_kexpand	%f16, 2, %f18
       
    73 	std		%f16, [$out + 0x40]
       
    74 	des_kexpand	%f20, 3, %f24
       
    75 	std		%f20, [$out + 0x50]
       
    76 	des_kexpand	%f20, 2, %f22
       
    77 	std		%f18, [$out + 0x48]
       
    78 	des_kexpand	%f24, 3, %f28
       
    79 	std		%f24, [$out + 0x60]
       
    80 	des_kexpand	%f24, 2, %f26
       
    81 	std		%f22, [$out + 0x58]
       
    82 	des_kexpand	%f28, 1, %f30
       
    83 	std		%f28, [$out + 0x70]
       
    84 	std		%f26, [$out + 0x68]
       
    85 	retl
       
    86 	std		%f30, [$out + 0x78]
       
    87 .size	des_t4_key_expand,.-des_t4_key_expand
       
    88 ___
       
    89 }
       
    90 { my ($inp,$out,$len,$key,$ivec) = map("%o$_",(0..4));
       
    91   my ($ileft,$iright,$omask) = map("%g$_",(1..3));
       
    92 
       
    93 $code.=<<___;
       
    94 .globl	des_t4_cbc_encrypt
       
    95 .align	32
       
    96 des_t4_cbc_encrypt:
       
    97 	ld		[$ivec + 0], %f0	! load ivec
       
    98 	ld		[$ivec + 4], %f1
       
    99 
       
   100 	and		$inp, 7, $ileft
       
   101 	andn		$inp, 7, $inp
       
   102 	sll		$ileft, 3, $ileft
       
   103 	mov		0xff, $omask
       
   104 	prefetch	[$inp], 20
       
   105 	prefetch	[$inp + 63], 20
       
   106 	sub		%g0, $ileft, $iright
       
   107 	and		$out, 7, %g4
       
   108 	alignaddrl	$out, %g0, $out
       
   109 	srl		$omask, %g4, $omask
       
   110 	srlx		$len, 3, $len
       
   111 	movrz		%g4, 0, $omask
       
   112 	prefetch	[$out], 22
       
   113 
       
   114 	ldd		[$key + 0x00], %f4	! load key schedule
       
   115 	ldd		[$key + 0x08], %f6
       
   116 	ldd		[$key + 0x10], %f8
       
   117 	ldd		[$key + 0x18], %f10
       
   118 	ldd		[$key + 0x20], %f12
       
   119 	ldd		[$key + 0x28], %f14
       
   120 	ldd		[$key + 0x30], %f16
       
   121 	ldd		[$key + 0x38], %f18
       
   122 	ldd		[$key + 0x40], %f20
       
   123 	ldd		[$key + 0x48], %f22
       
   124 	ldd		[$key + 0x50], %f24
       
   125 	ldd		[$key + 0x58], %f26
       
   126 	ldd		[$key + 0x60], %f28
       
   127 	ldd		[$key + 0x68], %f30
       
   128 	ldd		[$key + 0x70], %f32
       
   129 	ldd		[$key + 0x78], %f34
       
   130 
       
   131 .Ldes_cbc_enc_loop:
       
   132 	ldx		[$inp + 0], %g4
       
   133 	brz,pt		$ileft, 4f
       
   134 	nop
       
   135 
       
   136 	ldx		[$inp + 8], %g5
       
   137 	sllx		%g4, $ileft, %g4
       
   138 	srlx		%g5, $iright, %g5
       
   139 	or		%g5, %g4, %g4
       
   140 4:
       
   141 	movxtod		%g4, %f2
       
   142 	prefetch	[$inp + 8+63], 20
       
   143 	add		$inp, 8, $inp
       
   144 	fxor		%f2, %f0, %f0		! ^= ivec
       
   145 	prefetch	[$out + 63], 22
       
   146 
       
   147 	des_ip		%f0, %f0
       
   148 	des_round	%f4, %f6, %f0, %f0
       
   149 	des_round	%f8, %f10, %f0, %f0
       
   150 	des_round	%f12, %f14, %f0, %f0
       
   151 	des_round	%f16, %f18, %f0, %f0
       
   152 	des_round	%f20, %f22, %f0, %f0
       
   153 	des_round	%f24, %f26, %f0, %f0
       
   154 	des_round	%f28, %f30, %f0, %f0
       
   155 	des_round	%f32, %f34, %f0, %f0
       
   156 	des_iip		%f0, %f0
       
   157 
       
   158 	brnz,pn		$omask, 2f
       
   159 	sub		$len, 1, $len
       
   160 
       
   161 	std		%f0, [$out + 0]
       
   162 	brnz,pt		$len, .Ldes_cbc_enc_loop
       
   163 	add		$out, 8, $out
       
   164 
       
   165 	st		%f0, [$ivec + 0]	! write out ivec
       
   166 	retl
       
   167 	st		%f1, [$ivec + 4]
       
   168 
       
   169 .align	16
       
   170 2:	ldxa		[$inp]0x82, %g4		! avoid read-after-write hazard
       
   171 						! and ~4x deterioration
       
   172 						! in inp==out case
       
   173 	faligndata	%f0, %f0, %f2		! handle unaligned output
       
   174 
       
   175 	stda		%f2, [$out + $omask]0xc0	! partial store
       
   176 	add		$out, 8, $out
       
   177 	orn		%g0, $omask, $omask
       
   178 	stda		%f2, [$out + $omask]0xc0	! partial store
       
   179 
       
   180 	brnz,pt		$len, .Ldes_cbc_enc_loop+4
       
   181 	orn		%g0, $omask, $omask
       
   182 
       
   183 	st		%f0, [$ivec + 0]	! write out ivec
       
   184 	retl
       
   185 	st		%f1, [$ivec + 4]
       
   186 .type	des_t4_cbc_encrypt,#function
       
   187 .size	des_t4_cbc_encrypt,.-des_t4_cbc_encrypt
       
   188 
       
   189 .globl	des_t4_cbc_decrypt
       
   190 .align	32
       
   191 des_t4_cbc_decrypt:
       
   192 	ld		[$ivec + 0], %f2	! load ivec
       
   193 	ld		[$ivec + 4], %f3
       
   194 
       
   195 	and		$inp, 7, $ileft
       
   196 	andn		$inp, 7, $inp
       
   197 	sll		$ileft, 3, $ileft
       
   198 	mov		0xff, $omask
       
   199 	prefetch	[$inp], 20
       
   200 	prefetch	[$inp + 63], 20
       
   201 	sub		%g0, $ileft, $iright
       
   202 	and		$out, 7, %g4
       
   203 	alignaddrl	$out, %g0, $out
       
   204 	srl		$omask, %g4, $omask
       
   205 	srlx		$len, 3, $len
       
   206 	movrz		%g4, 0, $omask
       
   207 	prefetch	[$out], 22
       
   208 
       
   209 	ldd		[$key + 0x78], %f4	! load key schedule
       
   210 	ldd		[$key + 0x70], %f6
       
   211 	ldd		[$key + 0x68], %f8
       
   212 	ldd		[$key + 0x60], %f10
       
   213 	ldd		[$key + 0x58], %f12
       
   214 	ldd		[$key + 0x50], %f14
       
   215 	ldd		[$key + 0x48], %f16
       
   216 	ldd		[$key + 0x40], %f18
       
   217 	ldd		[$key + 0x38], %f20
       
   218 	ldd		[$key + 0x30], %f22
       
   219 	ldd		[$key + 0x28], %f24
       
   220 	ldd		[$key + 0x20], %f26
       
   221 	ldd		[$key + 0x18], %f28
       
   222 	ldd		[$key + 0x10], %f30
       
   223 	ldd		[$key + 0x08], %f32
       
   224 	ldd		[$key + 0x00], %f34
       
   225 
       
   226 .Ldes_cbc_dec_loop:
       
   227 	ldx		[$inp + 0], %g4
       
   228 	brz,pt		$ileft, 4f
       
   229 	nop
       
   230 
       
   231 	ldx		[$inp + 8], %g5
       
   232 	sllx		%g4, $ileft, %g4
       
   233 	srlx		%g5, $iright, %g5
       
   234 	or		%g5, %g4, %g4
       
   235 4:
       
   236 	movxtod		%g4, %f0
       
   237 	prefetch	[$inp + 8+63], 20
       
   238 	add		$inp, 8, $inp
       
   239 	prefetch	[$out + 63], 22
       
   240 
       
   241 	des_ip		%f0, %f0
       
   242 	des_round	%f4, %f6, %f0, %f0
       
   243 	des_round	%f8, %f10, %f0, %f0
       
   244 	des_round	%f12, %f14, %f0, %f0
       
   245 	des_round	%f16, %f18, %f0, %f0
       
   246 	des_round	%f20, %f22, %f0, %f0
       
   247 	des_round	%f24, %f26, %f0, %f0
       
   248 	des_round	%f28, %f30, %f0, %f0
       
   249 	des_round	%f32, %f34, %f0, %f0
       
   250 	des_iip		%f0, %f0
       
   251 
       
   252 	fxor		%f2, %f0, %f0		! ^= ivec
       
   253 	movxtod		%g4, %f2
       
   254 
       
   255 	brnz,pn		$omask, 2f
       
   256 	sub		$len, 1, $len
       
   257 
       
   258 	std		%f0, [$out + 0]
       
   259 	brnz,pt		$len, .Ldes_cbc_dec_loop
       
   260 	add		$out, 8, $out
       
   261 
       
   262 	st		%f2, [$ivec + 0]	! write out ivec
       
   263 	retl
       
   264 	st		%f3, [$ivec + 4]
       
   265 
       
   266 .align	16
       
   267 2:	ldxa		[$inp]0x82, %g4		! avoid read-after-write hazard
       
   268 						! and ~4x deterioration
       
   269 						! in inp==out case
       
   270 	faligndata	%f0, %f0, %f0		! handle unaligned output
       
   271 
       
   272 	stda		%f0, [$out + $omask]0xc0	! partial store
       
   273 	add		$out, 8, $out
       
   274 	orn		%g0, $omask, $omask
       
   275 	stda		%f0, [$out + $omask]0xc0	! partial store
       
   276 
       
   277 	brnz,pt		$len, .Ldes_cbc_dec_loop+4
       
   278 	orn		%g0, $omask, $omask
       
   279 
       
   280 	st		%f2, [$ivec + 0]	! write out ivec
       
   281 	retl
       
   282 	st		%f3, [$ivec + 4]
       
   283 .type	des_t4_cbc_decrypt,#function
       
   284 .size	des_t4_cbc_decrypt,.-des_t4_cbc_decrypt
       
   285 ___
       
   286 
       
   287 # One might wonder why does one have back-to-back des_iip/des_ip
       
   288 # pairs between EDE passes. Indeed, aren't they inverse of each other?
       
   289 # They almost are. Outcome of the pair is 32-bit words being swapped
       
   290 # in target register. Consider pair of des_iip/des_ip as a way to
       
   291 # perform the due swap, it's actually fastest way in this case.
       
   292 
       
   293 $code.=<<___;
       
   294 .globl	des_t4_ede3_cbc_encrypt
       
   295 .align	32
       
   296 des_t4_ede3_cbc_encrypt:
       
   297 	ld		[$ivec + 0], %f0	! load ivec
       
   298 	ld		[$ivec + 4], %f1
       
   299 
       
   300 	and		$inp, 7, $ileft
       
   301 	andn		$inp, 7, $inp
       
   302 	sll		$ileft, 3, $ileft
       
   303 	mov		0xff, $omask
       
   304 	prefetch	[$inp], 20
       
   305 	prefetch	[$inp + 63], 20
       
   306 	sub		%g0, $ileft, $iright
       
   307 	and		$out, 7, %g4
       
   308 	alignaddrl	$out, %g0, $out
       
   309 	srl		$omask, %g4, $omask
       
   310 	srlx		$len, 3, $len
       
   311 	movrz		%g4, 0, $omask
       
   312 	prefetch	[$out], 22
       
   313 
       
   314 	ldd		[$key + 0x00], %f4	! load key schedule
       
   315 	ldd		[$key + 0x08], %f6
       
   316 	ldd		[$key + 0x10], %f8
       
   317 	ldd		[$key + 0x18], %f10
       
   318 	ldd		[$key + 0x20], %f12
       
   319 	ldd		[$key + 0x28], %f14
       
   320 	ldd		[$key + 0x30], %f16
       
   321 	ldd		[$key + 0x38], %f18
       
   322 	ldd		[$key + 0x40], %f20
       
   323 	ldd		[$key + 0x48], %f22
       
   324 	ldd		[$key + 0x50], %f24
       
   325 	ldd		[$key + 0x58], %f26
       
   326 	ldd		[$key + 0x60], %f28
       
   327 	ldd		[$key + 0x68], %f30
       
   328 	ldd		[$key + 0x70], %f32
       
   329 	ldd		[$key + 0x78], %f34
       
   330 
       
   331 .Ldes_ede3_cbc_enc_loop:
       
   332 	ldx		[$inp + 0], %g4
       
   333 	brz,pt		$ileft, 4f
       
   334 	nop
       
   335 
       
   336 	ldx		[$inp + 8], %g5
       
   337 	sllx		%g4, $ileft, %g4
       
   338 	srlx		%g5, $iright, %g5
       
   339 	or		%g5, %g4, %g4
       
   340 4:
       
   341 	movxtod		%g4, %f2
       
   342 	prefetch	[$inp + 8+63], 20
       
   343 	add		$inp, 8, $inp
       
   344 	fxor		%f2, %f0, %f0		! ^= ivec
       
   345 	prefetch	[$out + 63], 22
       
   346 
       
   347 	des_ip		%f0, %f0
       
   348 	des_round	%f4, %f6, %f0, %f0
       
   349 	des_round	%f8, %f10, %f0, %f0
       
   350 	des_round	%f12, %f14, %f0, %f0
       
   351 	des_round	%f16, %f18, %f0, %f0
       
   352 	ldd		[$key + 0x100-0x08], %f36
       
   353 	ldd		[$key + 0x100-0x10], %f38
       
   354 	des_round	%f20, %f22, %f0, %f0
       
   355 	ldd		[$key + 0x100-0x18], %f40
       
   356 	ldd		[$key + 0x100-0x20], %f42
       
   357 	des_round	%f24, %f26, %f0, %f0
       
   358 	ldd		[$key + 0x100-0x28], %f44
       
   359 	ldd		[$key + 0x100-0x30], %f46
       
   360 	des_round	%f28, %f30, %f0, %f0
       
   361 	ldd		[$key + 0x100-0x38], %f48
       
   362 	ldd		[$key + 0x100-0x40], %f50
       
   363 	des_round	%f32, %f34, %f0, %f0
       
   364 	ldd		[$key + 0x100-0x48], %f52
       
   365 	ldd		[$key + 0x100-0x50], %f54
       
   366 	des_iip		%f0, %f0
       
   367 
       
   368 	ldd		[$key + 0x100-0x58], %f56
       
   369 	ldd		[$key + 0x100-0x60], %f58
       
   370 	des_ip		%f0, %f0
       
   371 	ldd		[$key + 0x100-0x68], %f60
       
   372 	ldd		[$key + 0x100-0x70], %f62
       
   373 	des_round	%f36, %f38, %f0, %f0
       
   374 	ldd		[$key + 0x100-0x78], %f36
       
   375 	ldd		[$key + 0x100-0x80], %f38
       
   376 	des_round	%f40, %f42, %f0, %f0
       
   377 	des_round	%f44, %f46, %f0, %f0
       
   378 	des_round	%f48, %f50, %f0, %f0
       
   379 	ldd		[$key + 0x100+0x00], %f40
       
   380 	ldd		[$key + 0x100+0x08], %f42
       
   381 	des_round	%f52, %f54, %f0, %f0
       
   382 	ldd		[$key + 0x100+0x10], %f44
       
   383 	ldd		[$key + 0x100+0x18], %f46
       
   384 	des_round	%f56, %f58, %f0, %f0
       
   385 	ldd		[$key + 0x100+0x20], %f48
       
   386 	ldd		[$key + 0x100+0x28], %f50
       
   387 	des_round	%f60, %f62, %f0, %f0
       
   388 	ldd		[$key + 0x100+0x30], %f52
       
   389 	ldd		[$key + 0x100+0x38], %f54
       
   390 	des_round	%f36, %f38, %f0, %f0
       
   391 	ldd		[$key + 0x100+0x40], %f56
       
   392 	ldd		[$key + 0x100+0x48], %f58
       
   393 	des_iip		%f0, %f0
       
   394 
       
   395 	ldd		[$key + 0x100+0x50], %f60
       
   396 	ldd		[$key + 0x100+0x58], %f62
       
   397 	des_ip		%f0, %f0
       
   398 	ldd		[$key + 0x100+0x60], %f36
       
   399 	ldd		[$key + 0x100+0x68], %f38
       
   400 	des_round	%f40, %f42, %f0, %f0
       
   401 	ldd		[$key + 0x100+0x70], %f40
       
   402 	ldd		[$key + 0x100+0x78], %f42
       
   403 	des_round	%f44, %f46, %f0, %f0
       
   404 	des_round	%f48, %f50, %f0, %f0
       
   405 	des_round	%f52, %f54, %f0, %f0
       
   406 	des_round	%f56, %f58, %f0, %f0
       
   407 	des_round	%f60, %f62, %f0, %f0
       
   408 	des_round	%f36, %f38, %f0, %f0
       
   409 	des_round	%f40, %f42, %f0, %f0
       
   410 	des_iip		%f0, %f0
       
   411 
       
   412 	brnz,pn		$omask, 2f
       
   413 	sub		$len, 1, $len
       
   414 
       
   415 	std		%f0, [$out + 0]
       
   416 	brnz,pt		$len, .Ldes_ede3_cbc_enc_loop
       
   417 	add		$out, 8, $out
       
   418 
       
   419 	st		%f0, [$ivec + 0]	! write out ivec
       
   420 	retl
       
   421 	st		%f1, [$ivec + 4]
       
   422 
       
   423 .align	16
       
   424 2:	ldxa		[$inp]0x82, %g4		! avoid read-after-write hazard
       
   425 						! and ~2x deterioration
       
   426 						! in inp==out case
       
   427 	faligndata	%f0, %f0, %f2		! handle unaligned output
       
   428 
       
   429 	stda		%f2, [$out + $omask]0xc0	! partial store
       
   430 	add		$out, 8, $out
       
   431 	orn		%g0, $omask, $omask
       
   432 	stda		%f2, [$out + $omask]0xc0	! partial store
       
   433 
       
   434 	brnz,pt		$len, .Ldes_ede3_cbc_enc_loop+4
       
   435 	orn		%g0, $omask, $omask
       
   436 
       
   437 	st		%f0, [$ivec + 0]	! write out ivec
       
   438 	retl
       
   439 	st		%f1, [$ivec + 4]
       
   440 .type	des_t4_ede3_cbc_encrypt,#function
       
   441 .size	des_t4_ede3_cbc_encrypt,.-des_t4_ede3_cbc_encrypt
       
   442 
       
   443 .globl	des_t4_ede3_cbc_decrypt
       
   444 .align	32
       
   445 des_t4_ede3_cbc_decrypt:
       
   446 	ld		[$ivec + 0], %f2	! load ivec
       
   447 	ld		[$ivec + 4], %f3
       
   448 
       
   449 	and		$inp, 7, $ileft
       
   450 	andn		$inp, 7, $inp
       
   451 	sll		$ileft, 3, $ileft
       
   452 	mov		0xff, $omask
       
   453 	prefetch	[$inp], 20
       
   454 	prefetch	[$inp + 63], 20
       
   455 	sub		%g0, $ileft, $iright
       
   456 	and		$out, 7, %g4
       
   457 	alignaddrl	$out, %g0, $out
       
   458 	srl		$omask, %g4, $omask
       
   459 	srlx		$len, 3, $len
       
   460 	movrz		%g4, 0, $omask
       
   461 	prefetch	[$out], 22
       
   462 
       
   463 	ldd		[$key + 0x100+0x78], %f4	! load key schedule
       
   464 	ldd		[$key + 0x100+0x70], %f6
       
   465 	ldd		[$key + 0x100+0x68], %f8
       
   466 	ldd		[$key + 0x100+0x60], %f10
       
   467 	ldd		[$key + 0x100+0x58], %f12
       
   468 	ldd		[$key + 0x100+0x50], %f14
       
   469 	ldd		[$key + 0x100+0x48], %f16
       
   470 	ldd		[$key + 0x100+0x40], %f18
       
   471 	ldd		[$key + 0x100+0x38], %f20
       
   472 	ldd		[$key + 0x100+0x30], %f22
       
   473 	ldd		[$key + 0x100+0x28], %f24
       
   474 	ldd		[$key + 0x100+0x20], %f26
       
   475 	ldd		[$key + 0x100+0x18], %f28
       
   476 	ldd		[$key + 0x100+0x10], %f30
       
   477 	ldd		[$key + 0x100+0x08], %f32
       
   478 	ldd		[$key + 0x100+0x00], %f34
       
   479 
       
   480 .Ldes_ede3_cbc_dec_loop:
       
   481 	ldx		[$inp + 0], %g4
       
   482 	brz,pt		$ileft, 4f
       
   483 	nop
       
   484 
       
   485 	ldx		[$inp + 8], %g5
       
   486 	sllx		%g4, $ileft, %g4
       
   487 	srlx		%g5, $iright, %g5
       
   488 	or		%g5, %g4, %g4
       
   489 4:
       
   490 	movxtod		%g4, %f0
       
   491 	prefetch	[$inp + 8+63], 20
       
   492 	add		$inp, 8, $inp
       
   493 	prefetch	[$out + 63], 22
       
   494 
       
   495 	des_ip		%f0, %f0
       
   496 	des_round	%f4, %f6, %f0, %f0
       
   497 	des_round	%f8, %f10, %f0, %f0
       
   498 	des_round	%f12, %f14, %f0, %f0
       
   499 	des_round	%f16, %f18, %f0, %f0
       
   500 	ldd		[$key + 0x80+0x00], %f36
       
   501 	ldd		[$key + 0x80+0x08], %f38
       
   502 	des_round	%f20, %f22, %f0, %f0
       
   503 	ldd		[$key + 0x80+0x10], %f40
       
   504 	ldd		[$key + 0x80+0x18], %f42
       
   505 	des_round	%f24, %f26, %f0, %f0
       
   506 	ldd		[$key + 0x80+0x20], %f44
       
   507 	ldd		[$key + 0x80+0x28], %f46
       
   508 	des_round	%f28, %f30, %f0, %f0
       
   509 	ldd		[$key + 0x80+0x30], %f48
       
   510 	ldd		[$key + 0x80+0x38], %f50
       
   511 	des_round	%f32, %f34, %f0, %f0
       
   512 	ldd		[$key + 0x80+0x40], %f52
       
   513 	ldd		[$key + 0x80+0x48], %f54
       
   514 	des_iip		%f0, %f0
       
   515 
       
   516 	ldd		[$key + 0x80+0x50], %f56
       
   517 	ldd		[$key + 0x80+0x58], %f58
       
   518 	des_ip		%f0, %f0
       
   519 	ldd		[$key + 0x80+0x60], %f60
       
   520 	ldd		[$key + 0x80+0x68], %f62
       
   521 	des_round	%f36, %f38, %f0, %f0
       
   522 	ldd		[$key + 0x80+0x70], %f36
       
   523 	ldd		[$key + 0x80+0x78], %f38
       
   524 	des_round	%f40, %f42, %f0, %f0
       
   525 	des_round	%f44, %f46, %f0, %f0
       
   526 	des_round	%f48, %f50, %f0, %f0
       
   527 	ldd		[$key + 0x80-0x08], %f40
       
   528 	ldd		[$key + 0x80-0x10], %f42
       
   529 	des_round	%f52, %f54, %f0, %f0
       
   530 	ldd		[$key + 0x80-0x18], %f44
       
   531 	ldd		[$key + 0x80-0x20], %f46
       
   532 	des_round	%f56, %f58, %f0, %f0
       
   533 	ldd		[$key + 0x80-0x28], %f48
       
   534 	ldd		[$key + 0x80-0x30], %f50
       
   535 	des_round	%f60, %f62, %f0, %f0
       
   536 	ldd		[$key + 0x80-0x38], %f52
       
   537 	ldd		[$key + 0x80-0x40], %f54
       
   538 	des_round	%f36, %f38, %f0, %f0
       
   539 	ldd		[$key + 0x80-0x48], %f56
       
   540 	ldd		[$key + 0x80-0x50], %f58
       
   541 	des_iip		%f0, %f0
       
   542 
       
   543 	ldd		[$key + 0x80-0x58], %f60
       
   544 	ldd		[$key + 0x80-0x60], %f62
       
   545 	des_ip		%f0, %f0
       
   546 	ldd		[$key + 0x80-0x68], %f36
       
   547 	ldd		[$key + 0x80-0x70], %f38
       
   548 	des_round	%f40, %f42, %f0, %f0
       
   549 	ldd		[$key + 0x80-0x78], %f40
       
   550 	ldd		[$key + 0x80-0x80], %f42
       
   551 	des_round	%f44, %f46, %f0, %f0
       
   552 	des_round	%f48, %f50, %f0, %f0
       
   553 	des_round	%f52, %f54, %f0, %f0
       
   554 	des_round	%f56, %f58, %f0, %f0
       
   555 	des_round	%f60, %f62, %f0, %f0
       
   556 	des_round	%f36, %f38, %f0, %f0
       
   557 	des_round	%f40, %f42, %f0, %f0
       
   558 	des_iip		%f0, %f0
       
   559 
       
   560 	fxor		%f2, %f0, %f0		! ^= ivec
       
   561 	movxtod		%g4, %f2
       
   562 
       
   563 	brnz,pn		$omask, 2f
       
   564 	sub		$len, 1, $len
       
   565 
       
   566 	std		%f0, [$out + 0]
       
   567 	brnz,pt		$len, .Ldes_ede3_cbc_dec_loop
       
   568 	add		$out, 8, $out
       
   569 
       
   570 	st		%f2, [$ivec + 0]	! write out ivec
       
   571 	retl
       
   572 	st		%f3, [$ivec + 4]
       
   573 
       
   574 .align	16
       
   575 2:	ldxa		[$inp]0x82, %g4		! avoid read-after-write hazard
       
   576 						! and ~3x deterioration
       
   577 						! in inp==out case
       
   578 	faligndata	%f0, %f0, %f0		! handle unaligned output
       
   579 
       
   580 	stda		%f0, [$out + $omask]0xc0	! partial store
       
   581 	add		$out, 8, $out
       
   582 	orn		%g0, $omask, $omask
       
   583 	stda		%f0, [$out + $omask]0xc0	! partial store
       
   584 
       
   585 	brnz,pt		$len, .Ldes_ede3_cbc_dec_loop+4
       
   586 	orn		%g0, $omask, $omask
       
   587 
       
   588 	st		%f2, [$ivec + 0]	! write out ivec
       
   589 	retl
       
   590 	st		%f3, [$ivec + 4]
       
   591 .type	des_t4_ede3_cbc_decrypt,#function
       
   592 .size	des_t4_ede3_cbc_decrypt,.-des_t4_ede3_cbc_decrypt
       
   593 ___
       
   594 }
       
   595 $code.=<<___;
       
   596 .asciz  "DES for SPARC T4, David S. Miller, Andy Polyakov"
       
   597 .align  4
       
   598 ___
       
   599 
       
   600 &emit_assembler();
       
   601 
       
   602 close STDOUT;