components/zlib/capabilities/sun4v/sparcv9/longest_match_t4.s
changeset 825 78809aba88fe
child 1129 4de26d59569c
equal deleted inserted replaced
824:3c0f8247e0cd 825:78809aba88fe
       
     1 !
       
     2 ! This file was generated by a compiler that is currently not part of the CBE
       
     3 ! (as the CBE compiler does not generate code for the T4 architecture), and
       
     4 ! then it was modified by hand to remove some unnecessary instructions that
       
     5 ! the compiler generated and the main loop's branches was rearranged for
       
     6 ! fewer taken branches on the most frequent code path. These modifications 
       
     7 ! were made in 7 steps. In each step, a few lines were removed from and added
       
     8 ! to the compiler generated code to produce an equivalent binary. The lines
       
     9 ! that were removed in step <i> are marked by "!<i>" at the beginning of the
       
    10 ! line, the lines added in this step are marked by the same added at the end of
       
    11 ! the line. In other words, let C_i mean the code, after step <i> (C_0 is
       
    12 ! the original, compiler generated code, C_7 is the code in this file)
       
    13 ! To reproduce C_i (0 <= i < 7) first take C_<i+1>, remove the lines that
       
    14 ! end in !<i+1>, and then remove the !<i+1> string from the beginning of those
       
    15 ! lines that start with it. Comparing C_i and C_<i+1> is a simple task, as
       
    16 ! only a few lines have changed.
       
    17 !  If a compiler (e.g. the Oracle Studio 12.3) becomes part of the CBE and
       
    18 ! it will be able to generate as efficient code as in this file the 
       
    19 ! longest_match.o file can simply be comp[iled from longest_match.c .
       
    20 !
       
    21 
       
    22 	.section	".text",#alloc,#execinstr,#progbits
       
    23 	.file	"deflate-t4.c"
       
    24 
       
    25 	.section	".bss",#alloc,#write,#nobits
       
    26 
       
    27 Bbss.bss:
       
    28 
       
    29 	.section	".data",#alloc,#write,#progbits
       
    30 
       
    31 Ddata.data:
       
    32 
       
    33 	.section	".rodata",#alloc,#progbits
       
    34 !
       
    35 ! CONSTANT POOL
       
    36 !
       
    37 
       
    38 Drodata.rodata:
       
    39 
       
    40 	.section	".picdata",#alloc,#write
       
    41 
       
    42 Dpicdata.picdata:
       
    43 
       
    44 	.section	".tbss",#alloc,#write,#tls,#nobits
       
    45 
       
    46 Ttbss.bss:
       
    47 
       
    48 	.section	".tdata",#alloc,#write,#tls,#progbits
       
    49 
       
    50 Ttdata.data:
       
    51 
       
    52 	.section	".rodata1",#alloc,#progbits
       
    53 	.align	8
       
    54 !
       
    55 ! CONSTANT POOL
       
    56 !
       
    57 
       
    58 .L95:
       
    59 	.ascii	"invalid distance too far back\000"
       
    60 	.align	8
       
    61 !
       
    62 ! CONSTANT POOL
       
    63 !
       
    64 
       
    65 .L147:
       
    66 	.ascii	"invalid distance code\000"
       
    67 	.align	8
       
    68 !
       
    69 ! CONSTANT POOL
       
    70 !
       
    71 
       
    72 .L153:
       
    73 	.ascii	"invalid literal/length code\000"
       
    74 
       
    75 	.section	".text",#alloc,#execinstr,#progbits
       
    76 /* 000000	   0 */		.align	4
       
    77 ! FILE deflate-t4.c
       
    78 
       
    79 !    1		      !#include <sun_prefetch.h>
       
    80 !    2		      !#include "deflate.h"
       
    81 !    3		      !#define NIL 0
       
    82 !    5		      !uInt longest_match(s, cur_match)
       
    83 !    6		      !    deflate_state *s;
       
    84 !    7		      !    IPos cur_match;                             /* current match */
       
    85 !    8		      !{
       
    86 
       
    87 !
       
    88 ! SUBROUTINE longest_match
       
    89 !
       
    90 ! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
       
    91 
       
    92                        	.global longest_match
       
    93                        
       
    94 
       
    95 			longest_match:
       
    96 
       
    97 			.L900000112:
       
    98 	save      %sp, -0xb0, %sp
       
    99 	 ld        [%i0 + 0x4c], %l4	!7
       
   100 	 ldn       [%i0 + 0x60], %l5	!7
       
   101 	 and       %i1, %l4, %l2	!7
       
   102 	 prefetch [%i5 + %l2], #n_reads	!7
       
   103 !7	ld        [%i0 + 0x9c], %l4
       
   104 	 ld        [%i0 + 0x9c], %l1	!7
       
   105 	ld        [%i0 + 0x44], %l6
       
   106 	clr       %g4
       
   107 	ldn       [%i0 + 0x50], %g1
       
   108 	ld        [%i0 + 0xa8], %i2
       
   109 	ld        [%i0 + 0xac], %g5
       
   110 	ld        [%i0 + 0xc0], %o0
       
   111 !5	srl       %l4, 0x0, %l5
       
   112 	ld        [%i0 + 0xbc], %l7
       
   113 	add       %l6, -0x106, %i3
       
   114 !5	add       %g1, %l5, %i4
       
   115 !7	 add       %g1, %l4, %i4	!5
       
   116 	 add       %g1, %l1, %i4	!7
       
   117 !7	cwbleu    %l4, %i3, lm_0x38
       
   118 	 cwbleu    %l1, %i3, lm_0x38	!7
       
   119 !7	sub       %l4, %i3, %g4
       
   120 	 sub       %l1, %i3, %g4	!7
       
   121 		
       
   122 lm_0x38:
       
   123 !7	ld        [%i0 + 0x4c], %l4
       
   124 !4	add       %i2, -0x1, %l3
       
   125 !7	ldn       [%i0 + 0x60], %l5
       
   126 !4	sra       %l3, 0x0, %o2
       
   127 	 add       %i2, -0x1, %o2	!4
       
   128 	ldub      [%i4 + %o2], %o2
       
   129 !3	sra       %i2, 0x0, %l6
       
   130 !3	ldub      [%i4 + %l6], %o1
       
   131 	 ldub      [%i4 + %i2], %o1	!3
       
   132 	cmp       %i2, %l7
       
   133 	add       %i4, 0x102, %l7
       
   134 	ld        [%i0 + 0xa4], %i3
       
   135 	bcs,pn    %icc, lm_0x6c
       
   136 	mov       0x102, %l3
       
   137 
       
   138 	srl       %g5, 0x2, %g5
       
   139 		
       
   140 lm_0x6c:
       
   141 	cmp       %o0, %i3
       
   142 !6	srl       %i1, 0x0, %l0
       
   143 !7	 and       %i1, %l4, %l2	!6
       
   144 	movgu     %icc, %i3, %o0
       
   145 		
       
   146 lm_0x78:
       
   147 !6	and       %i1, %l4, %l2
       
   148 !6	add       %l0, %g1, %o3
       
   149 	 add       %i1, %g1, %o3	!6
       
   150 !3	ldub      [%o3 + %l6], %o5
       
   151 	 ldub      [%o3 + %i2], %o5		!3
       
   152 !1	srl       %l2, 0x0, %o4
       
   153 !1	sllx      %o4, 0x1, %l2
       
   154 	 sllx	  %l2, 0x1, %l2			!1
       
   155 	 add       %l2, %l5, %l1		!1
       
   156 	 prefetch  [%l1 - 0x40], #n_reads	!1
       
   157 	cwbe     %o5, %o1, lm_0x17c_neg
       
   158 
       
   159 lm_0x17c:
       
   160 	lduh      [%l5 + %l2], %i1
       
   161 	cwbleu    %i1, %g4, lm_0x190
       
   162 
       
   163 	addcc     %g5, -0x1, %g5
       
   164 	bne,pt    %icc, lm_0x78
       
   165 !6	srl       %i1, 0x0, %l0
       
   166 	 and       %i1, %l4, %l2	!6
       
   167 		
       
   168 lm_0x190:
       
   169 	cmp       %i2, %i3
       
   170 	movgu     %icc, %i3, %i2
       
   171 	return    %i7 + 0x8
       
   172 	srl       %o2, 0x0, %o0
       
   173 
       
   174 lm_0x17c_neg:
       
   175 !3	add       %o3, %l6, %o7
       
   176 	 add       %o3, %i2, %o7		!3
       
   177 	ldub      [%o7 - 0x1], %l1
       
   178 	cwbne     %l1, %o2, lm_0x17c
       
   179 
       
   180 !6	ldub      [%g1 + %l0], %i5
       
   181 	 ldub      [%g1 + %i1], %i5	!6
       
   182 	ldub      [%i4], %o5
       
   183 	cwbne     %i5, %o5, lm_0x17c
       
   184 
       
   185 	ldub      [%i4 + 0x1], %l1
       
   186 	ldub      [%o3 + 0x1], %o4
       
   187 	cwbne     %o4, %l1, lm_0x17c
       
   188 
       
   189 	add       %o3, 0x2, %o3
       
   190 !1	add       %l2, %l5, %l1
       
   191 	add       %i4, 0x2, %o4
       
   192 
       
   193 lm_0xc0:
       
   194 	ldub      [%o4 + 0x1], %l0
       
   195 	add       %o4, 0x1, %o4
       
   196 	ldub      [%o3 + 0x1], %o7
       
   197 	cwbne     %l0, %o7, lm_0x14c
       
   198 
       
   199 	ldub      [%o4 + 0x1], %i5
       
   200 	add       %o4, 0x1, %o4
       
   201 	ldub      [%o3 + 0x2], %o5
       
   202 	cwbne     %i5, %o5, lm_0x14c
       
   203 
       
   204 	ldub      [%o4 + 0x1], %l0
       
   205 	add       %o4, 0x1, %o4
       
   206 	ldub      [%o3 + 0x3], %o7
       
   207 	cwbne     %l0, %o7, lm_0x14c
       
   208 
       
   209 	ldub      [%o4 + 0x1], %i5
       
   210 	add       %o4, 0x1, %o4
       
   211 	ldub      [%o3 + 0x4], %o5
       
   212 	cwbne     %i5, %o5, lm_0x14c
       
   213 
       
   214 	ldub      [%o4 + 0x1], %l0
       
   215 	add       %o4, 0x1, %o4
       
   216 	ldub      [%o3 + 0x5], %o7
       
   217 	cwbne     %l0, %o7, lm_0x14c
       
   218 
       
   219 	ldub      [%o4 + 0x1], %i5
       
   220 	add       %o4, 0x1, %o4
       
   221 	ldub      [%o3 + 0x6], %o5
       
   222 	cwbne     %i5, %o5, lm_0x14c
       
   223 
       
   224 	ldub      [%o4 + 0x1], %l0
       
   225 	add       %o4, 0x1, %o4
       
   226 	ldub      [%o3 + 0x7], %o7
       
   227 	cwbne     %l0, %o7, lm_0x14c
       
   228 
       
   229 	ldub      [%o4 + 0x1], %i5
       
   230 	add       %o4, 0x1, %o4
       
   231 	ldub      [%o3 + 0x8], %o5
       
   232 	add       %o3, 0x8, %o3
       
   233 	cwbne     %i5, %o5, lm_0x14c
       
   234 
       
   235 	nop
       
   236 	cxbcs     %o4, %l7, lm_0xc0
       
   237 		
       
   238 lm_0x14c:
       
   239 !1	prefetch  [%l1 - 0x40], #n_reads
       
   240 	sub       %l7, %o4, %l0
       
   241 	sub       %l3, %l0, %o7
       
   242 	cwble     %o7, %i2, lm_0x17c
       
   243 
       
   244 	st        %i1, [%i0 + 0xa0]
       
   245 	mov       %o7, %i2
       
   246 	cwbge     %o7, %o0, lm_0x190
       
   247 
       
   248 !2	sra       %o7, 0x0, %i1
       
   249 !3	sra       %o7, 0x0, %l6
       
   250 !2	add       %i4, %i1, %l1
       
   251 	 add       %i4, %o7, %l1	!2
       
   252 !2	ldub      [%i4 + %i1], %o1
       
   253 	 ldub      [%i4 + %o7], %o1	!2
       
   254 	 ba	   lm_0x17c
       
   255 	ldub      [%l1 - 0x1], %o2
       
   256 	
       
   257 
       
   258 /* 0x0220	   0 */		.type	longest_match,#function
       
   259 /* 0x0220	   0 */		.size	longest_match,(.-longest_match)
       
   260                        
       
   261 
       
   262 			.L900000113:
       
   263 
       
   264 	.section	".text",#alloc,#execinstr,#progbits
       
   265 /* 000000	   0 */		.align	8
       
   266 /* 000000	     */		.skip	24
       
   267 /* 0x0018	     */		.align	4
       
   268 
       
   269 
       
   270 			.L900000286:
       
   271 
       
   272 	.section	".text",#alloc,#execinstr,#progbits
       
   273 
       
   274 ! Begin Disassembling Ident
       
   275 	.ident	"cg: Sun Compiler Common 12.3 SunOS_sparc 2011/11/16"	! (NO SOURCE LINE)
       
   276 	.ident	"acomp: Sun C 5.12 SunOS_sparc 2011/11/16"	! (/tmp/acomp.1329237379.172468.02.sd:24)
       
   277 	.ident	"iropt: Sun Compiler Common 12.3 SunOS_sparc 2011/11/16"	! (/tmp/acomp.1329237379.172468.02.sd:25)
       
   278 	.ident	"cg: Sun Compiler Common 12.3 SunOS_sparc 2011/11/16"	! (NO SOURCE LINE)
       
   279 ! End Disassembling Ident