components/openssl/openssl-1.0.1/inline-t4/sparcv9-gf2m.pl
changeset 4822 1fb8a14c6702
parent 4821 54dafbe33fdb
child 4823 3ef8b7f4d9d8
equal deleted inserted replaced
4821:54dafbe33fdb 4822:1fb8a14c6702
     1 #!/usr/bin/env perl
       
     2 #
       
     3 # ====================================================================
       
     4 # Written by Andy Polyakov <[email protected]> for the OpenSSL
       
     5 # project. The module is, however, dual licensed under OpenSSL and
       
     6 # CRYPTOGAMS licenses depending on where you obtain it. For further
       
     7 # details see http://www.openssl.org/~appro/cryptogams/.
       
     8 # ====================================================================
       
     9 #
       
    10 # October 2012
       
    11 #
       
    12 # The module implements bn_GF2m_mul_2x2 polynomial multiplication used
       
    13 # in bn_gf2m.c. It's kind of low-hanging mechanical port from C for
       
    14 # the time being... Except that it has two code paths: one suitable
       
    15 # for all SPARCv9 processors and one for VIS3-capable ones. Former
       
    16 # delivers ~25-45% more, more for longer keys, heaviest DH and DSA
       
    17 # verify operations on venerable UltraSPARC II. On T4 VIS3 code is
       
    18 # ~100-230% faster than gcc-generated code and ~35-90% faster than
       
    19 # the pure SPARCv9 code path.
       
    20 
       
    21 $bits=32;
       
    22 for (@ARGV)     { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
       
    23 if ($bits==64)  { $bias=2047; $frame=192; }
       
    24 else            { $bias=0;    $frame=112; }
       
    25 
       
    26 $locals=16*8;
       
    27 
       
    28 $code.=<<___;
       
    29 #include <sparc_arch.h>
       
    30 
       
    31 .section        ".text",#alloc,#execinstr
       
    32 ___
       
    33 $code.=<<___ if ($bits==64);
       
    34 .register       %g2,#scratch
       
    35 .register       %g3,#scratch
       
    36 ___
       
    37 
       
    38 $tab="%l0";
       
    39 
       
    40 @T=("%g2","%g3");
       
    41 @i=("%g4","%g5");
       
    42 
       
    43 ($a1,$a2,$a4,$a8,$a12,$a48)=map("%o$_",(0..5));
       
    44 ($lo,$hi,$b)=("%g1",$a8,"%o7"); $a=$lo;
       
    45 
       
    46 $code.=<<___;
       
    47 #ifdef __PIC__
       
    48 SPARC_PIC_THUNK(%g1)
       
    49 #endif
       
    50 
       
    51 .globl	bn_GF2m_mul_2x2
       
    52 .align	16
       
    53 bn_GF2m_mul_2x2:
       
    54         SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
       
    55         ld	[%g1+0],%g1             	! OPENSSL_sparcv9cap_P[0]
       
    56 
       
    57         andcc	%g1, SPARCV9_VIS3, %g0
       
    58         bz,pn	%icc,.Lsoftware
       
    59         nop
       
    60 
       
    61 	sllx	%o1, 32, %o1
       
    62 	sllx	%o3, 32, %o3
       
    63 	or	%o2, %o1, %o1
       
    64 	or	%o4, %o3, %o3
       
    65 	.word	0x95b262ab			! xmulx   %o1, %o3, %o2
       
    66 	.word	0x99b262cb			! xmulxhi %o1, %o3, %o4
       
    67 	srlx	%o2, 32, %o1			! 13 cycles later
       
    68 	st	%o2, [%o0+0]
       
    69 	st	%o1, [%o0+4]
       
    70 	srlx	%o4, 32, %o3
       
    71 	st	%o4, [%o0+8]
       
    72 	retl
       
    73 	st	%o3, [%o0+12]
       
    74 
       
    75 .align	16
       
    76 .Lsoftware:
       
    77 	save	%sp,-$frame-$locals,%sp
       
    78 
       
    79 	sllx	%i1,32,$a
       
    80 	mov	-1,$a12
       
    81 	sllx	%i3,32,$b
       
    82 	or	%i2,$a,$a
       
    83 	srlx	$a12,1,$a48			! 0x7fff...
       
    84 	or	%i4,$b,$b
       
    85 	srlx	$a12,2,$a12			! 0x3fff...
       
    86 	add	%sp,$bias+$frame,$tab
       
    87 
       
    88 	sllx	$a,2,$a4
       
    89 	mov	$a,$a1
       
    90 	sllx	$a,1,$a2
       
    91 
       
    92 	srax	$a4,63,@i[1]			! broadcast 61st bit
       
    93 	and	$a48,$a4,$a4			! (a<<2)&0x7fff...
       
    94 	srlx	$a48,2,$a48
       
    95 	srax	$a2,63,@i[0]			! broadcast 62nd bit
       
    96 	and	$a12,$a2,$a2			! (a<<1)&0x3fff...
       
    97 	srax	$a1,63,$lo			! broadcast 63rd bit
       
    98 	and	$a48,$a1,$a1			! (a<<0)&0x1fff...
       
    99 
       
   100 	sllx	$a1,3,$a8
       
   101 	and	$b,$lo,$lo
       
   102 	and	$b,@i[0],@i[0]
       
   103 	and	$b,@i[1],@i[1]
       
   104 
       
   105 	stx	%g0,[$tab+0*8]			! tab[0]=0
       
   106 	xor	$a1,$a2,$a12
       
   107 	stx	$a1,[$tab+1*8]			! tab[1]=a1
       
   108 	stx	$a2,[$tab+2*8]			! tab[2]=a2
       
   109 	 xor	$a4,$a8,$a48
       
   110 	stx	$a12,[$tab+3*8]			! tab[3]=a1^a2
       
   111 	 xor	$a4,$a1,$a1
       
   112 
       
   113 	stx	$a4,[$tab+4*8]			! tab[4]=a4
       
   114 	xor	$a4,$a2,$a2
       
   115 	stx	$a1,[$tab+5*8]			! tab[5]=a1^a4
       
   116 	xor	$a4,$a12,$a12
       
   117 	stx	$a2,[$tab+6*8]			! tab[6]=a2^a4
       
   118 	 xor	$a48,$a1,$a1
       
   119 	stx	$a12,[$tab+7*8]			! tab[7]=a1^a2^a4
       
   120 	 xor	$a48,$a2,$a2
       
   121 
       
   122 	stx	$a8,[$tab+8*8]			! tab[8]=a8
       
   123 	xor	$a48,$a12,$a12
       
   124 	stx	$a1,[$tab+9*8]			! tab[9]=a1^a8
       
   125 	 xor	$a4,$a1,$a1
       
   126 	stx	$a2,[$tab+10*8]			! tab[10]=a2^a8
       
   127 	 xor	$a4,$a2,$a2
       
   128 	stx	$a12,[$tab+11*8]		! tab[11]=a1^a2^a8
       
   129 
       
   130 	xor	$a4,$a12,$a12
       
   131 	stx	$a48,[$tab+12*8]		! tab[12]=a4^a8
       
   132 	 srlx	$lo,1,$hi
       
   133 	stx	$a1,[$tab+13*8]			! tab[13]=a1^a4^a8
       
   134 	 sllx	$lo,63,$lo
       
   135 	stx	$a2,[$tab+14*8]			! tab[14]=a2^a4^a8
       
   136 	 srlx	@i[0],2,@T[0]
       
   137 	stx	$a12,[$tab+15*8]		! tab[15]=a1^a2^a4^a8
       
   138 
       
   139 	sllx	@i[0],62,$a1
       
   140 	 sllx	$b,3,@i[0]
       
   141 	srlx	@i[1],3,@T[1]
       
   142 	 and	@i[0],`0xf<<3`,@i[0]
       
   143 	sllx	@i[1],61,$a2
       
   144 	 ldx	[$tab+@i[0]],@i[0]
       
   145 	 srlx	$b,4-3,@i[1]
       
   146 	xor	@T[0],$hi,$hi
       
   147 	 and	@i[1],`0xf<<3`,@i[1]
       
   148 	xor	$a1,$lo,$lo
       
   149 	 ldx	[$tab+@i[1]],@i[1]
       
   150 	xor	@T[1],$hi,$hi
       
   151 
       
   152 	xor	@i[0],$lo,$lo
       
   153 	srlx	$b,8-3,@i[0]
       
   154 	 xor	$a2,$lo,$lo
       
   155 	and	@i[0],`0xf<<3`,@i[0]
       
   156 ___
       
   157 for($n=1;$n<14;$n++) {
       
   158 $code.=<<___;
       
   159 	sllx	@i[1],`$n*4`,@T[0]
       
   160 	ldx	[$tab+@i[0]],@i[0]
       
   161 	srlx	@i[1],`64-$n*4`,@T[1]
       
   162 	xor	@T[0],$lo,$lo
       
   163 	srlx	$b,`($n+2)*4`-3,@i[1]
       
   164 	xor	@T[1],$hi,$hi
       
   165 	and	@i[1],`0xf<<3`,@i[1]
       
   166 ___
       
   167 	push(@i,shift(@i)); push(@T,shift(@T));
       
   168 }
       
   169 $code.=<<___;
       
   170 	sllx	@i[1],`$n*4`,@T[0]
       
   171 	ldx	[$tab+@i[0]],@i[0]
       
   172 	srlx	@i[1],`64-$n*4`,@T[1]
       
   173 	xor	@T[0],$lo,$lo
       
   174 
       
   175 	sllx	@i[0],`($n+1)*4`,@T[0]
       
   176 	 xor	@T[1],$hi,$hi
       
   177 	srlx	@i[0],`64-($n+1)*4`,@T[1]
       
   178 	xor	@T[0],$lo,$lo
       
   179 	xor	@T[1],$hi,$hi
       
   180 
       
   181 	srlx	$lo,32,%i1
       
   182 	st	$lo,[%i0+0]
       
   183 	st	%i1,[%i0+4]
       
   184 	srlx	$hi,32,%i2
       
   185 	st	$hi,[%i0+8]
       
   186 	st	%i2,[%i0+12]
       
   187 
       
   188 	ret
       
   189 	restore
       
   190 .type	bn_GF2m_mul_2x2,#function
       
   191 .size	bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
       
   192 .asciz	"GF(2^m) Multiplication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>"
       
   193 .align	4
       
   194 ___
       
   195 
       
   196 $code =~ s/\`([^\`]*)\`/eval($1)/gem;
       
   197 print $code;
       
   198 close STDOUT;