1 ! |
|
2 ! This file was generated by a compiler that is currently not part of the CBE |
|
3 ! (as the CBE compiler does not generate code for the T4 architecture), and |
|
4 ! then it was modified by hand to remove some unnecessary instructions that |
|
5 ! the compiler generated and the main loop's branches was rearranged for |
|
6 ! fewer taken branches on the most frequent code path. These modifications |
|
7 ! were made in 7 steps. In each step, a few lines were removed from and added |
|
8 ! to the compiler generated code to produce an equivalent binary. The lines |
|
9 ! that were removed in step <i> are marked by "!<i>" at the beginning of the |
|
10 ! line, the lines added in this step are marked by the same added at the end of |
|
11 ! the line. In other words, let C_i mean the code, after step <i> (C_0 is |
|
12 ! the original, compiler generated code, C_7 is the code in this file) |
|
13 ! To reproduce C_i (0 <= i < 7) first take C_<i+1>, remove the lines that |
|
14 ! end in !<i+1>, and then remove the !<i+1> string from the beginning of those |
|
15 ! lines that start with it. Comparing C_i and C_<i+1> is a simple task, as |
|
16 ! only a few lines have changed. |
|
17 ! If a compiler (e.g. the Oracle Studio 12.3) becomes part of the CBE and |
|
18 ! it will be able to generate as efficient code as in this file the |
|
19 ! longest_match.o file can simply be comp[iled from longest_match.c . |
|
20 ! |
|
21 |
|
22 .section ".text",#alloc,#execinstr,#progbits |
|
23 .file "deflate-t4.c" |
|
24 |
|
25 .section ".bss",#alloc,#write,#nobits |
|
26 |
|
27 Bbss.bss: |
|
28 |
|
29 .section ".data",#alloc,#write,#progbits |
|
30 |
|
31 Ddata.data: |
|
32 |
|
33 .section ".rodata",#alloc,#progbits |
|
34 ! |
|
35 ! CONSTANT POOL |
|
36 ! |
|
37 |
|
38 Drodata.rodata: |
|
39 |
|
40 .section ".picdata",#alloc,#write |
|
41 |
|
42 Dpicdata.picdata: |
|
43 |
|
44 .section ".tbss",#alloc,#write,#tls,#nobits |
|
45 |
|
46 Ttbss.bss: |
|
47 |
|
48 .section ".tdata",#alloc,#write,#tls,#progbits |
|
49 |
|
50 Ttdata.data: |
|
51 |
|
52 .section ".rodata1",#alloc,#progbits |
|
53 .align 8 |
|
54 ! |
|
55 ! CONSTANT POOL |
|
56 ! |
|
57 |
|
58 .L95: |
|
59 .ascii "invalid distance too far back\000" |
|
60 .align 8 |
|
61 ! |
|
62 ! CONSTANT POOL |
|
63 ! |
|
64 |
|
65 .L147: |
|
66 .ascii "invalid distance code\000" |
|
67 .align 8 |
|
68 ! |
|
69 ! CONSTANT POOL |
|
70 ! |
|
71 |
|
72 .L153: |
|
73 .ascii "invalid literal/length code\000" |
|
74 |
|
75 .section ".text",#alloc,#execinstr,#progbits |
|
76 /* 000000 0 */ .align 4 |
|
77 ! FILE deflate-t4.c |
|
78 |
|
79 ! 1 !#include <sun_prefetch.h> |
|
80 ! 2 !#include "deflate.h" |
|
81 ! 3 !#define NIL 0 |
|
82 ! 5 !uInt longest_match(s, cur_match) |
|
83 ! 6 ! deflate_state *s; |
|
84 ! 7 ! IPos cur_match; /* current match */ |
|
85 ! 8 !{ |
|
86 |
|
87 ! |
|
88 ! SUBROUTINE longest_match |
|
89 ! |
|
90 ! OFFSET SOURCE LINE LABEL INSTRUCTION |
|
91 |
|
92 .global longest_match |
|
93 |
|
94 |
|
95 longest_match: |
|
96 |
|
97 .L900000112: |
|
98 save %sp, -0xb0, %sp |
|
99 ld [%i0 + 0x4c], %l4 !7 |
|
100 ldn [%i0 + 0x60], %l5 !7 |
|
101 and %i1, %l4, %l2 !7 |
|
102 prefetch [%l5 + %l2], #n_reads !7 |
|
103 !7 ld [%i0 + 0x9c], %l4 |
|
104 ld [%i0 + 0x9c], %l1 !7 |
|
105 ld [%i0 + 0x44], %l6 |
|
106 clr %g4 |
|
107 ldn [%i0 + 0x50], %g1 |
|
108 ld [%i0 + 0xa8], %i2 |
|
109 ld [%i0 + 0xac], %g5 |
|
110 ld [%i0 + 0xc0], %o0 |
|
111 !5 srl %l4, 0x0, %l5 |
|
112 ld [%i0 + 0xbc], %l7 |
|
113 add %l6, -0x106, %i3 |
|
114 !5 add %g1, %l5, %i4 |
|
115 !7 add %g1, %l4, %i4 !5 |
|
116 add %g1, %l1, %i4 !7 |
|
117 !7 cwbleu %l4, %i3, lm_0x38 |
|
118 cwbleu %l1, %i3, lm_0x38 !7 |
|
119 !7 sub %l4, %i3, %g4 |
|
120 sub %l1, %i3, %g4 !7 |
|
121 |
|
122 lm_0x38: |
|
123 !7 ld [%i0 + 0x4c], %l4 |
|
124 !4 add %i2, -0x1, %l3 |
|
125 !7 ldn [%i0 + 0x60], %l5 |
|
126 !4 sra %l3, 0x0, %o2 |
|
127 add %i2, -0x1, %o2 !4 |
|
128 ldub [%i4 + %o2], %o2 |
|
129 !3 sra %i2, 0x0, %l6 |
|
130 !3 ldub [%i4 + %l6], %o1 |
|
131 ldub [%i4 + %i2], %o1 !3 |
|
132 cmp %i2, %l7 |
|
133 add %i4, 0x102, %l7 |
|
134 ld [%i0 + 0xa4], %i3 |
|
135 bcs,pn %icc, lm_0x6c |
|
136 mov 0x102, %l3 |
|
137 |
|
138 srl %g5, 0x2, %g5 |
|
139 |
|
140 lm_0x6c: |
|
141 cmp %o0, %i3 |
|
142 !6 srl %i1, 0x0, %l0 |
|
143 !7 and %i1, %l4, %l2 !6 |
|
144 movgu %icc, %i3, %o0 |
|
145 |
|
146 lm_0x78: |
|
147 !6 and %i1, %l4, %l2 |
|
148 !6 add %l0, %g1, %o3 |
|
149 add %i1, %g1, %o3 !6 |
|
150 !3 ldub [%o3 + %l6], %o5 |
|
151 ldub [%o3 + %i2], %o5 !3 |
|
152 !1 srl %l2, 0x0, %o4 |
|
153 !1 sllx %o4, 0x1, %l2 |
|
154 sllx %l2, 0x1, %l2 !1 |
|
155 add %l2, %l5, %l1 !1 |
|
156 prefetch [%l1 - 0x40], #n_reads !1 |
|
157 cwbe %o5, %o1, lm_0x17c_neg |
|
158 |
|
159 lm_0x17c: |
|
160 lduh [%l5 + %l2], %i1 |
|
161 cwbleu %i1, %g4, lm_0x190 |
|
162 |
|
163 addcc %g5, -0x1, %g5 |
|
164 bne,pt %icc, lm_0x78 |
|
165 !6 srl %i1, 0x0, %l0 |
|
166 and %i1, %l4, %l2 !6 |
|
167 |
|
168 lm_0x190: |
|
169 cmp %i2, %i3 |
|
170 movgu %icc, %i3, %i2 |
|
171 return %i7 + 0x8 |
|
172 srl %o2, 0x0, %o0 |
|
173 |
|
174 lm_0x17c_neg: |
|
175 !3 add %o3, %l6, %o7 |
|
176 add %o3, %i2, %o7 !3 |
|
177 ldub [%o7 - 0x1], %l1 |
|
178 cwbne %l1, %o2, lm_0x17c |
|
179 |
|
180 !6 ldub [%g1 + %l0], %i5 |
|
181 ldub [%g1 + %i1], %i5 !6 |
|
182 ldub [%i4], %o5 |
|
183 cwbne %i5, %o5, lm_0x17c |
|
184 |
|
185 ldub [%i4 + 0x1], %l1 |
|
186 ldub [%o3 + 0x1], %o4 |
|
187 cwbne %o4, %l1, lm_0x17c |
|
188 |
|
189 add %o3, 0x2, %o3 |
|
190 !1 add %l2, %l5, %l1 |
|
191 add %i4, 0x2, %o4 |
|
192 |
|
193 lm_0xc0: |
|
194 ldub [%o4 + 0x1], %l0 |
|
195 add %o4, 0x1, %o4 |
|
196 ldub [%o3 + 0x1], %o7 |
|
197 cwbne %l0, %o7, lm_0x14c |
|
198 |
|
199 ldub [%o4 + 0x1], %i5 |
|
200 add %o4, 0x1, %o4 |
|
201 ldub [%o3 + 0x2], %o5 |
|
202 cwbne %i5, %o5, lm_0x14c |
|
203 |
|
204 ldub [%o4 + 0x1], %l0 |
|
205 add %o4, 0x1, %o4 |
|
206 ldub [%o3 + 0x3], %o7 |
|
207 cwbne %l0, %o7, lm_0x14c |
|
208 |
|
209 ldub [%o4 + 0x1], %i5 |
|
210 add %o4, 0x1, %o4 |
|
211 ldub [%o3 + 0x4], %o5 |
|
212 cwbne %i5, %o5, lm_0x14c |
|
213 |
|
214 ldub [%o4 + 0x1], %l0 |
|
215 add %o4, 0x1, %o4 |
|
216 ldub [%o3 + 0x5], %o7 |
|
217 cwbne %l0, %o7, lm_0x14c |
|
218 |
|
219 ldub [%o4 + 0x1], %i5 |
|
220 add %o4, 0x1, %o4 |
|
221 ldub [%o3 + 0x6], %o5 |
|
222 cwbne %i5, %o5, lm_0x14c |
|
223 |
|
224 ldub [%o4 + 0x1], %l0 |
|
225 add %o4, 0x1, %o4 |
|
226 ldub [%o3 + 0x7], %o7 |
|
227 cwbne %l0, %o7, lm_0x14c |
|
228 |
|
229 ldub [%o4 + 0x1], %i5 |
|
230 add %o4, 0x1, %o4 |
|
231 ldub [%o3 + 0x8], %o5 |
|
232 add %o3, 0x8, %o3 |
|
233 cwbne %i5, %o5, lm_0x14c |
|
234 |
|
235 nop |
|
236 cxbcs %o4, %l7, lm_0xc0 |
|
237 |
|
238 lm_0x14c: |
|
239 !1 prefetch [%l1 - 0x40], #n_reads |
|
240 sub %l7, %o4, %l0 |
|
241 sub %l3, %l0, %o7 |
|
242 cwble %o7, %i2, lm_0x17c |
|
243 |
|
244 st %i1, [%i0 + 0xa0] |
|
245 mov %o7, %i2 |
|
246 cwbge %o7, %o0, lm_0x190 |
|
247 |
|
248 !2 sra %o7, 0x0, %i1 |
|
249 !3 sra %o7, 0x0, %l6 |
|
250 !2 add %i4, %i1, %l1 |
|
251 add %i4, %o7, %l1 !2 |
|
252 !2 ldub [%i4 + %i1], %o1 |
|
253 ldub [%i4 + %o7], %o1 !2 |
|
254 ba lm_0x17c |
|
255 ldub [%l1 - 0x1], %o2 |
|
256 |
|
257 |
|
258 /* 0x0220 0 */ .type longest_match,#function |
|
259 /* 0x0220 0 */ .size longest_match,(.-longest_match) |
|
260 |
|
261 |
|
262 .L900000113: |
|
263 |
|
264 .section ".text",#alloc,#execinstr,#progbits |
|
265 /* 000000 0 */ .align 8 |
|
266 /* 000000 */ .skip 24 |
|
267 /* 0x0018 */ .align 4 |
|
268 |
|
269 |
|
270 .L900000286: |
|
271 |
|
272 .section ".text",#alloc,#execinstr,#progbits |
|
273 |
|
274 ! Begin Disassembling Ident |
|
275 .ident "cg: Sun Compiler Common 12.3 SunOS_sparc 2011/11/16" ! (NO SOURCE LINE) |
|
276 .ident "acomp: Sun C 5.12 SunOS_sparc 2011/11/16" ! (/tmp/acomp.1329237379.172468.02.sd:24) |
|
277 .ident "iropt: Sun Compiler Common 12.3 SunOS_sparc 2011/11/16" ! (/tmp/acomp.1329237379.172468.02.sd:25) |
|
278 .ident "cg: Sun Compiler Common 12.3 SunOS_sparc 2011/11/16" ! (NO SOURCE LINE) |
|
279 ! End Disassembling Ident |
|