author | Mark J. Nelson <Mark.J.Nelson@Sun.COM> |
Wed, 06 Aug 2008 16:29:39 -0600 | |
changeset 7298 | b69e27387f74 |
parent 6812 | febeba71273d |
permissions | -rw-r--r-- |
0 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
6812 | 5 |
* Common Development and Distribution License (the "License"). |
6 |
* You may not use this file except in compliance with the License. |
|
0 | 7 |
* |
8 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
9 |
* or http://www.opensolaris.org/os/licensing. |
|
10 |
* See the License for the specific language governing permissions |
|
11 |
* and limitations under the License. |
|
12 |
* |
|
13 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
14 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
15 |
* If applicable, add the following below this CDDL HEADER, with the |
|
16 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
17 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
18 |
* |
|
19 |
* CDDL HEADER END |
|
20 |
*/ |
|
6812 | 21 |
|
0 | 22 |
/* |
6812 | 23 |
* Copyright 2008 Sun Microsystems, Inc. All rights reserved. |
0 | 24 |
* Use is subject to license terms. |
25 |
*/ |
|
26 |
||
7298
b69e27387f74
6733918 Teamware has retired, please welcome your new manager, Mercurial
Mark J. Nelson <Mark.J.Nelson@Sun.COM>
parents:
6812
diff
changeset
|
27 |
.file "strlcpy.s" |
0 | 28 |
|
29 |
/* |
|
30 |
* The strlcpy() function copies at most dstsize-1 characters |
|
31 |
* (dstsize being the size of the string buffer dst) from src |
|
32 |
* to dst, truncating src if necessary. The result is always |
|
33 |
* null-terminated. The function returns strlen(src). Buffer |
|
34 |
* overflow can be checked as follows: |
|
35 |
* |
|
36 |
* if (strlcpy(dst, src, dstsize) >= dstsize) |
|
37 |
* return -1; |
|
38 |
*/ |
|
39 |
||
40 |
#include <sys/asm_linkage.h> |
|
41 |
||
42 |
! strlcpy implementation is similar to that of strcpy, except |
|
43 |
! in this case, the maximum size of the detination must be |
|
44 |
! tracked since it bounds our maximum copy size. However, |
|
45 |
! we must still continue to check for zero since the routine |
|
46 |
! is expected to null-terminate any string that is within |
|
47 |
! the dest size bound. |
|
48 |
! |
|
49 |
! this method starts by checking for and arranging source alignment. |
|
50 |
! Once this has occurred, we copy based upon destination alignment. |
|
51 |
! This is either by xword, word, halfword, or byte. As this occurs, we |
|
52 |
! check for a zero-byte. If one is found, we branch to a method |
|
53 |
! which checks for the exact location of a zero-byte within a |
|
54 |
! larger xword/word/half-word quantity. |
|
55 |
||
56 |
||
57 |
ENTRY(strlcpy) |
|
58 |
||
59 |
.align 32 |
|
60 |
||
61 |
save %sp, -SA(WINDOWSIZE), %sp |
|
62 |
subcc %g0, %i2, %g4 ! n = -n, n == 0 ? |
|
63 |
bz,pn %ncc, .getstrlen ! n == 0, must determine strlen |
|
64 |
add %i1, %i2, %i3 ! src = src + n |
|
65 |
andcc %i1, 7, %i4 ! src dword aligned ? |
|
66 |
bz,pn %ncc, .dwordaligned ! yup |
|
67 |
add %i0, %i2, %i2 ! dst = dst + n |
|
68 |
sub %i4, 8, %i4 ! bytes until src aligned |
|
69 |
||
70 |
.alignsrc: |
|
71 |
ldub [%i3 + %g4], %l1 ! src[] |
|
72 |
andcc %l1, 0xff, %g0 ! end of src reached (null byte) ? |
|
73 |
stub %l1, [%i2 + %g4] ! dst[] = src[] |
|
74 |
bz,a %ncc, .done ! yes, done |
|
75 |
add %i2, %g4, %i2 ! need single dest pointer for strlen |
|
76 |
addcc %g4, 1, %g4 ! src++, dst++, n-- |
|
77 |
bz,pn %ncc, .forcenullunalign ! n == 0, force null byte, compute len |
|
78 |
addcc %i4, 1, %i4 ! src aligned now? |
|
79 |
bnz,a %ncc, .alignsrc ! no, copy another byte |
|
80 |
nop ! pad |
|
81 |
||
82 |
.dwordaligned: |
|
83 |
sethi %hi(0x01010101), %i4 ! Alan Mycroft's magic1 |
|
84 |
add %i2, %g4, %l0 ! dst |
|
85 |
or %i4, %lo(0x01010101),%i4! finish loading magic1 |
|
86 |
and %l0, 3, %g1 ! dst<1:0> to examine offset |
|
87 |
sllx %i4, 32, %l1 ! spread magic1 |
|
88 |
cmp %g1, 1 ! dst offset of 1 or 5 |
|
89 |
or %i4, %l1, %i4 ! to all 64 bits |
|
90 |
sub %i2, 8, %i2 ! adjust for dest pre-incr in cpy loops |
|
91 |
be,pn %ncc, .storebyte1241 ! store 1, 2, 4, 1 bytes |
|
92 |
sllx %i4, 7, %i5 ! Alan Mycroft's magic2 |
|
93 |
cmp %g1, 3 ! dst offset of 3 or 7 |
|
94 |
be,pn %ncc, .storebyte1421 ! store 1, 4, 2, 1 bytes |
|
95 |
cmp %g1, 2 ! dst halfword aligned ? |
|
96 |
be,pn %ncc, .storehalfword ! yup, store half-word wise |
|
97 |
andcc %l0, 7, %g0 ! dst word aligned ? |
|
98 |
bnz,pn %ncc, .storeword2 ! yup, store word wise |
|
99 |
nop ! ensure loop is 16-byte aligned |
|
100 |
nop ! ensure loop is 16-byte aligned |
|
101 |
||
102 |
.storedword: |
|
103 |
ldx [%i3 + %g4], %l1 ! src dword |
|
104 |
addcc %g4, 8, %g4 ! n += 8, src += 8, dst += 8 |
|
105 |
bcs,pn %ncc, .lastword ! if counter wraps, last word |
|
106 |
andn %i5, %l1, %g1 ! ~dword & 0x8080808080808080 |
|
107 |
sub %l1, %i4, %l0 ! dword - 0x0101010101010101 |
|
108 |
andcc %l0, %g1, %g0 ! ((dword - 0x0101010101010101) & ~dword & 0x8080808080808080) |
|
109 |
bz,a,pt %ncc, .storedword ! no zero byte if magic expression == 0 |
|
110 |
stx %l1, [%i2 + %g4] ! store word to dst (address pre-incremented) |
|
111 |
||
112 |
! n has not expired, but src is at the end. we need to push out the |
|
113 |
! remaining src bytes. Since strlen(dts) == strlen(src), we can |
|
114 |
! compute the return value as the difference of final dst pointer |
|
115 |
! and the pointer to the start of dst |
|
116 |
||
117 |
.zerobyte: |
|
118 |
add %i2, %g4, %i2 ! pointer to dest string |
|
119 |
srlx %l1, 56, %g1 ! first byte |
|
120 |
andcc %g1, 0xff, %g0 ! end of string ? |
|
121 |
bz,pn %ncc, .done ! yup, copy done, return length |
|
122 |
stb %g1, [%i2] ! store it |
|
123 |
add %i2, 1, %i2 ! dst++ |
|
124 |
srlx %l1, 48, %g1 ! second byte |
|
125 |
andcc %g1, 0xff, %g0 ! end of string ? |
|
126 |
bz,pn %ncc, .done ! yup, copy done, return length |
|
127 |
stb %g1, [%i2] ! store it |
|
128 |
add %i2, 1, %i2 ! dst++ |
|
129 |
srlx %l1, 40, %g1 ! third byte |
|
130 |
andcc %g1, 0xff, %g0 ! end of string ? |
|
131 |
bz,pn %ncc, .done ! yup, copy done, return length |
|
132 |
stb %g1, [%i2] ! store it |
|
133 |
add %i2, 1, %i2 ! dst++ |
|
134 |
srlx %l1, 32, %g1 ! fourth byte |
|
135 |
andcc %g1, 0xff, %g0 ! end of string ? |
|
136 |
bz,pn %ncc, .done ! yup, copy done, return length |
|
137 |
stb %g1, [%i2] ! store it |
|
138 |
add %i2, 1, %i2 ! dst++ |
|
139 |
srlx %l1, 24, %g1 ! fifth byte |
|
140 |
andcc %g1, 0xff, %g0 ! end of string ? |
|
141 |
bz,pn %ncc, .done ! yup, copy done, return length |
|
142 |
stb %g1, [%i2] ! store it |
|
143 |
add %i2, 1, %i2 ! dst++ |
|
144 |
srlx %l1, 16, %g1 ! sixth byte |
|
145 |
andcc %g1, 0xff, %g0 ! end of string ? |
|
146 |
bz,pn %ncc, .done ! yup, copy done, return length |
|
147 |
stb %g1, [%i2] ! store it |
|
148 |
add %i2, 1, %i2 ! dst++ |
|
149 |
srlx %l1, 8, %g1 ! seventh byte |
|
150 |
andcc %g1, 0xff, %g0 ! end of string ? |
|
151 |
bz,pn %ncc, .done ! yup, copy done, return length |
|
152 |
stb %g1, [%i2] ! store it |
|
153 |
stb %l1, [%i2 + 1] ! store eigth byte |
|
154 |
add %i2, 1, %i2 ! dst++ |
|
155 |
||
156 |
.done: |
|
157 |
sub %i2, %i0, %i0 ! len = dst - orig dst |
|
158 |
ret ! subroutine done |
|
159 |
restore %i0, %g0, %o0 ! restore register window, return len |
|
160 |
||
161 |
! n expired, so this is the last word. It may contain null bytes. |
|
162 |
! Store bytes until n == 0. If a null byte is encountered during |
|
163 |
! processing of this last src word, we are done. Otherwise continue |
|
164 |
! to scan src until we hit the end, and compute strlen from the |
|
165 |
! difference between the pointer past the last byte of src and the |
|
166 |
! original pointer to the start of src |
|
167 |
||
168 |
.lastword: |
|
169 |
add %i2, %g4, %i2 ! we want a single dst pointer here |
|
170 |
sub %g4, 8, %g4 ! undo counter pre-increment |
|
171 |
add %i3, %g4, %i3 ! we want a single src pointer here |
|
172 |
||
173 |
srlx %l1, 56, %g1 ! first byte |
|
174 |
andcc %g1, 0xff, %g0 ! end of src reached ? |
|
175 |
bz,pn %ncc, .done ! yup |
|
176 |
stb %g1, [%i2] ! store it |
|
177 |
inccc %g4 ! n-- |
|
178 |
bz .forcenull ! if n == 0, force null byte, compute len |
|
179 |
srlx %l1, 48, %g1 ! second byte |
|
180 |
add %i2, 1, %i2 ! dst++ |
|
181 |
andcc %g1, 0xff, %g0 ! end of src reached ? |
|
182 |
bz,pn %ncc, .done ! yup |
|
183 |
stb %g1, [%i2] ! store it |
|
184 |
inccc %g4 ! n-- |
|
185 |
bz .forcenull ! if n == 0, force null byte, compute len |
|
186 |
srlx %l1, 40, %g1 ! third byte |
|
187 |
add %i2, 1, %i2 ! dst++ |
|
188 |
andcc %g1, 0xff, %g0 ! end of src reached ? |
|
189 |
bz,pn %ncc, .done ! yup |
|
190 |
stb %g1, [%i2] ! store it |
|
191 |
inccc %g4 ! n-- |
|
192 |
bz .forcenull ! if n == 0, force null byte, compute strlen |
|
193 |
srlx %l1, 32, %g1 ! fourth byte |
|
194 |
add %i2, 1, %i2 ! dst++ |
|
195 |
andcc %g1, 0xff, %g0 ! end of src reached ? |
|
196 |
bz,pn %ncc, .done ! yup |
|
197 |
stb %g1, [%i2] ! store it |
|
198 |
inccc %g4 ! n-- |
|
199 |
bz .forcenull ! if n == 0, force null byte, compute strlen |
|
200 |
srlx %l1, 24, %g1 ! fifth byte |
|
201 |
add %i2, 1, %i2 ! dst++ |
|
202 |
andcc %g1, 0xff, %g0 ! end of src reached ? |
|
203 |
bz,pn %ncc, .done ! yup |
|
204 |
stb %g1, [%i2] ! store it |
|
205 |
inccc %g4 ! n-- |
|
206 |
bz .forcenull ! if n == 0, force null byte, compute strlen |
|
207 |
srlx %l1, 16, %g1 ! sixth byte |
|
208 |
add %i2, 1, %i2 ! dst++ |
|
209 |
andcc %g1, 0xff, %g0 ! end of src reached ? |
|
210 |
bz,pn %ncc, .done ! yup |
|
211 |
stb %g1, [%i2] ! store it |
|
212 |
inccc %g4 ! n-- |
|
213 |
bz .forcenull ! if n == 0, force null byte, compute strlen |
|
214 |
srlx %l1, 8, %g1 ! seventh byte |
|
215 |
add %i2, 1, %i2 ! dst++ |
|
216 |
andcc %g1, 0xff, %g0 ! end of src reached ? |
|
217 |
bz,pn %ncc, .done ! yup |
|
218 |
stb %g1, [%i2] ! store it |
|
219 |
inccc %g4 ! n-- |
|
220 |
bz .forcenull ! if n == 0, force null byte, compute strlen |
|
221 |
andcc %l1, 0xff, %g0 ! end of src reached ? |
|
222 |
add %i2, 1, %i2 ! dst++ |
|
223 |
bz,pn %ncc, .done ! yup |
|
224 |
stb %l1, [%i2] ! store eigth byte |
|
225 |
||
226 |
! we need to force a null byte in the last position of dst |
|
227 |
! %i2 points to the location |
|
228 |
||
229 |
.forcenull: |
|
230 |
stb %g0, [%i2] ! force string terminating null byte |
|
231 |
||
232 |
! here: %i1 points to src start |
|
233 |
! %i3 points is current src ptr (8-byte aligned) |
|
234 |
||
235 |
.searchword: |
|
236 |
ldx [%i3], %l1 ! src dword |
|
237 |
.searchword2: |
|
238 |
andn %i5, %l1, %g1 ! ~dword & 0x8080808080808080 |
|
239 |
sub %l1, %i4, %l0 ! dword - 0x0101010101010101 |
|
240 |
andcc %l0, %g1, %g0 ! ((dword - 0x0101010101010101) & ~dword & 0x80808080 |
|
241 |
bz,a,pt %ncc, .searchword ! no null byte if expression is 0 |
|
242 |
add %i3, 8, %i3 ! src += 8 |
|
243 |
||
244 |
mov 0xff, %i5 ! create byte mask for null byte scanning |
|
245 |
sllx %i5, 56, %i5 ! mask for 1st byte = 0xff0000000000000000 |
|
246 |
.searchbyte: |
|
247 |
andcc %l1, %i5, %g0 ! current byte zero? |
|
248 |
srlx %i5, 8, %i5 ! byte mask for next byte |
|
249 |
bnz,a %ncc, .searchbyte ! current byte != zero, continue search |
|
250 |
add %i3, 1, %i3 ! src++ |
|
251 |
||
252 |
.endfound: |
|
253 |
sub %i3, %i1, %i0 ! len = src - orig src |
|
254 |
ret ! done |
|
255 |
restore %i0, %g0, %o0 ! restore register window, return len |
|
256 |
nop ! align loop on 16-byte |
|
257 |
||
258 |
.storebyte1421: |
|
259 |
ldx [%i3 + %g4], %l1 ! x = src[] |
|
260 |
addcc %g4, 8, %g4 ! src += 8, dst += 8 |
|
261 |
bcs,pn %ncc, .lastword ! if counter wraps, last word |
|
262 |
andn %i5, %l1, %g1 ! ~x & 0x8080808080808080 |
|
263 |
sub %l1, %i4, %l0 ! x - 0x0101010101010101 |
|
264 |
andcc %l0, %g1, %g0 ! ((x - 0x0101010101010101) & ~x & 0x8080808080808080) |
|
265 |
bnz,pn %ncc, .zerobyte ! end of src found, may need to pad |
|
266 |
add %i2, %g4, %l0 ! dst (in pointer form) |
|
267 |
srlx %l1, 56, %g1 ! %g1<7:0> = first byte; word aligned now |
|
268 |
stb %g1, [%l0] ! store first byte |
|
269 |
srlx %l1, 24, %g1 ! %g1<31:0> = bytes 2, 3, 4, 5 |
|
270 |
stw %g1, [%l0 + 1] ! store bytes 2, 3, 4, 5 |
|
271 |
srlx %l1, 8, %g1 ! %g1<15:0> = bytes 6, 7 |
|
272 |
sth %g1, [%l0 + 5] ! store bytes 6, 7 |
|
273 |
ba .storebyte1421 ! next dword |
|
274 |
stb %l1, [%l0 + 7] ! store eigth byte |
|
275 |
||
276 |
.storebyte1241: |
|
277 |
ldx [%i3 + %g4], %l1 ! x = src[] |
|
278 |
addcc %g4, 8, %g4 ! src += 8, dst += 8 |
|
279 |
bcs,pn %ncc, .lastword ! if counter wraps, last word |
|
280 |
andn %i5, %l1, %g1 ! ~x & 0x8080808080808080 |
|
281 |
sub %l1, %i4, %l0 ! x - 0x0101010101010101 |
|
282 |
andcc %l0, %g1, %g0 ! ((x - 0x0101010101010101) & ~x & 0x8080808080808080) |
|
283 |
bnz,pn %ncc, .zerobyte ! x has zero byte, handle end cases |
|
284 |
add %i2, %g4, %l0 ! dst (in pointer form) |
|
285 |
srlx %l1, 56, %g1 ! %g1<7:0> = first byte; half-word aligned now |
|
286 |
stb %g1, [%l0] ! store first byte |
|
287 |
srlx %l1, 40, %g1 ! %g1<15:0> = bytes 2, 3 |
|
288 |
sth %g1, [%l0 + 1] ! store bytes 2, 3 |
|
289 |
srlx %l1, 8, %g1 ! %g1<31:0> = bytes 4, 5, 6, 7 |
|
290 |
stw %g1, [%l0 + 3] ! store bytes 4, 5, 6, 7 |
|
291 |
ba .storebyte1241 ! next dword |
|
292 |
stb %l1, [%l0 + 7] ! store eigth byte |
|
293 |
||
294 |
.storehalfword: |
|
295 |
ldx [%i3 + %g4], %l1 ! x = src[] |
|
296 |
addcc %g4, 8, %g4 ! src += 8, dst += 8 |
|
297 |
bcs,pn %ncc, .lastword ! if counter wraps, last word |
|
298 |
andn %i5, %l1, %g1 ! ~x & 0x8080808080808080 |
|
299 |
sub %l1, %i4, %l0 ! x - 0x0101010101010101 |
|
300 |
andcc %l0, %g1, %g0 ! ((x - 0x0101010101010101) & ~x & 0x8080808080808080) |
|
301 |
bnz,pn %ncc, .zerobyte ! x has zero byte, handle end cases |
|
302 |
add %i2, %g4, %l0 ! dst (in pointer form) |
|
303 |
srlx %l1, 48, %g1 ! %g1<15:0> = bytes 1, 2; word aligned now |
|
304 |
sth %g1, [%l0] ! store bytes 1, 2 |
|
305 |
srlx %l1, 16, %g1 ! %g1<31:0> = bytes 3, 4, 5, 6 |
|
306 |
stw %g1, [%l0 + 2] ! store bytes 3, 4, 5, 6 |
|
307 |
ba .storehalfword ! next dword |
|
308 |
sth %l1, [%l0 + 6] ! store bytes 7, 8 |
|
309 |
nop ! align next loop to 16-byte boundary |
|
310 |
nop ! align next loop to 16-byte boundary |
|
311 |
||
312 |
.storeword2: |
|
313 |
ldx [%i3 + %g4], %l1 ! x = src[] |
|
314 |
addcc %g4, 8, %g4 ! src += 8, dst += 8 |
|
315 |
bcs,pn %ncc, .lastword ! if counter wraps, last word |
|
316 |
andn %i5, %l1, %g1 ! ~x & 0x8080808080808080 |
|
317 |
sub %l1, %i4, %l0 ! x - 0x0101010101010101 |
|
318 |
andcc %l0, %g1, %g0 ! ((x - 0x0101010101010101) & ~x & 0x8080808080808080) |
|
319 |
bnz,pn %ncc, .zerobyte ! x has zero byte, handle end cases |
|
320 |
add %i2, %g4, %l0 ! dst (in pointer form) |
|
321 |
srlx %l1, 32, %g1 ! %g1<31:0> = bytes 1, 2, 3, 4 |
|
322 |
stw %g1, [%l0] ! store bytes 1, 2, 3, 4 |
|
323 |
ba .storeword2 ! next dword |
|
324 |
stw %l1, [%l0 + 4] ! store bytes 5, 6, 7, 8 |
|
325 |
||
326 |
! n expired, i.e. end of destination buffer reached. Force null |
|
327 |
! null termination of dst, then scan src until end foudn for |
|
328 |
! determination of strlen(src) |
|
329 |
! |
|
330 |
! here: %i3 points to current src byte |
|
331 |
! %i2 points one byte past end of dst |
|
332 |
! magic constants not loaded |
|
333 |
||
334 |
.forcenullunalign: |
|
335 |
add %i2, %g4, %i2 ! we need a single dst ptr |
|
336 |
stb %g0, [%i2 - 1] ! force string terminating null byte |
|
337 |
||
338 |
.getstrlen: |
|
339 |
sethi %hi(0x01010101), %i4 ! Alan Mycroft's magic1 |
|
340 |
or %i4, %lo(0x01010101),%i4! finish loading magic1 |
|
341 |
sllx %i4, 32, %i2 ! spread magic1 |
|
342 |
or %i4, %i2, %i4 ! to all 64 bits |
|
343 |
sllx %i4, 7, %i5 ! Alan Mycroft's magic2 |
|
344 |
nop ! align loop to 16-byte boundary |
|
345 |
||
346 |
.getstrlenloop: |
|
347 |
andcc %i3, 7, %g0 ! src dword aligned? |
|
348 |
bz,a,pn %ncc, .searchword2 ! yup, now search a dword at a time |
|
349 |
ldx [%i3], %l1 ! src dword |
|
350 |
ldub [%i3], %l1 ! load src byte |
|
351 |
andcc %l1, 0xff, %g0 ! end of src reached? |
|
352 |
bnz,a %ncc, .getstrlenloop ! yup, return length |
|
353 |
add %i3, 1, %i3 ! src++ |
|
354 |
sub %i3, %i1, %i0 ! len = src - orig src |
|
355 |
ret ! done |
|
356 |
restore %i0, %g0, %o0 ! restore register window, return len |
|
357 |
||
358 |
nop ! pad tp 16-byte boundary |
|
359 |
nop ! pad tp 16-byte boundary |
|
360 |
SET_SIZE(strlcpy) |