author | Mark J. Nelson <Mark.J.Nelson@Sun.COM> |
Wed, 06 Aug 2008 16:29:39 -0600 | |
changeset 7298 | b69e27387f74 |
parent 6812 | febeba71273d |
permissions | -rw-r--r-- |
0 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
6812 | 5 |
* Common Development and Distribution License (the "License"). |
6 |
* You may not use this file except in compliance with the License. |
|
0 | 7 |
* |
8 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
9 |
* or http://www.opensolaris.org/os/licensing. |
|
10 |
* See the License for the specific language governing permissions |
|
11 |
* and limitations under the License. |
|
12 |
* |
|
13 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
14 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
15 |
* If applicable, add the following below this CDDL HEADER, with the |
|
16 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
17 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
18 |
* |
|
19 |
* CDDL HEADER END |
|
20 |
*/ |
|
6812 | 21 |
|
0 | 22 |
/* |
6812 | 23 |
* Copyright 2008 Sun Microsystems, Inc. All rights reserved. |
0 | 24 |
* Use is subject to license terms. |
25 |
*/ |
|
26 |
||
7298
b69e27387f74
6733918 Teamware has retired, please welcome your new manager, Mercurial
Mark J. Nelson <Mark.J.Nelson@Sun.COM>
parents:
6812
diff
changeset
|
27 |
.file "strncpy.s" |
0 | 28 |
|
29 |
/* |
|
30 |
* strncpy(s1, s2) |
|
31 |
* |
|
32 |
* Copy string s2 to s1, truncating or null-padding to always copy n bytes |
|
33 |
* return s1. |
|
34 |
* |
|
35 |
* Fast assembler language version of the following C-program for strncpy |
|
36 |
* which represents the `standard' for the C-library. |
|
37 |
* |
|
38 |
* char * |
|
39 |
* strncpy(char *s1, const char *s2, size_t n) |
|
40 |
* { |
|
41 |
* char *os1 = s1; |
|
42 |
* |
|
43 |
* n++; |
|
44 |
* while ((--n != 0) && ((*s1++ = *s2++) != '\0')) |
|
45 |
* ; |
|
46 |
* if (n != 0) |
|
47 |
* while (--n != 0) |
|
48 |
* *s1++ = '\0'; |
|
49 |
* return (os1); |
|
50 |
* } |
|
51 |
*/ |
|
52 |
||
53 |
#include <sys/asm_linkage.h> |
|
54 |
||
55 |
! strncpy works similarly to strcpy, except that n bytes of s2 |
|
56 |
! are copied to s1. If a null character is reached in s2 yet more |
|
57 |
! bytes remain to be copied, strncpy will copy null bytes into |
|
58 |
! the destination string. |
|
59 |
! |
|
60 |
! This implementation works by first aligning the src ptr and |
|
61 |
! performing small copies until it is aligned. Then, the string |
|
62 |
! is copied based upon destination alignment. (byte, half-word, |
|
63 |
! word, etc.) |
|
64 |
||
65 |
ENTRY(strncpy) |
|
66 |
||
67 |
.align 32 |
|
68 |
subcc %g0, %o2, %o4 ! n = -n |
|
69 |
bz .doneshort ! if n == 0, done |
|
70 |
cmp %o2, 7 ! n < 7 ? |
|
71 |
add %o1, %o2, %o3 ! src = src + n |
|
72 |
blu .shortcpy ! n < 7, use byte-wise copy |
|
73 |
add %o0, %o2, %o2 ! dst = dst + n |
|
74 |
andcc %o1, 3, %o5 ! src word aligned ? |
|
75 |
bz .wordaligned ! yup |
|
76 |
save %sp, -0x40, %sp ! create new register window |
|
77 |
sub %i5, 4, %i5 ! bytes until src aligned |
|
78 |
nop ! align loop on 16-byte boundary |
|
79 |
nop ! align loop on 16-byte boundary |
|
80 |
||
81 |
.alignsrc: |
|
82 |
ldub [%i3 + %i4], %i1 ! src[] |
|
83 |
stb %i1, [%i2 + %i4] ! dst[] = src[] |
|
84 |
inccc %i4 ! src++, dst++, n-- |
|
85 |
bz .done ! n == 0, done |
|
86 |
tst %i1 ! end of src reached (null byte) ? |
|
87 |
bz,a .bytepad ! yes, at least one byte to pad here |
|
88 |
add %i2, %i4, %l0 ! need single dest pointer for fill |
|
89 |
inccc %i5 ! src aligned now? |
|
90 |
bnz .alignsrc ! no, copy another byte |
|
91 |
.empty |
|
92 |
||
93 |
.wordaligned: |
|
94 |
add %i2, %i4, %l0 ! dst |
|
95 |
sethi %hi(0x01010101), %l1 ! Alan Mycroft's magic1 |
|
96 |
sub %i2, 4, %i2 ! adjust for dest pre-incr in cpy loops |
|
97 |
or %l1, %lo(0x01010101),%l1! finish loading magic1 |
|
98 |
andcc %l0, 3, %g1 ! destination word aligned ? |
|
99 |
bnz .dstnotaligned ! nope |
|
100 |
sll %l1, 7, %i5 ! create Alan Mycroft's magic2 |
|
101 |
||
102 |
.storeword: |
|
103 |
lduw [%i3 + %i4], %i1 ! src dword |
|
104 |
addcc %i4, 4, %i4 ! n += 4, src += 4, dst += 4 |
|
105 |
bcs .lastword ! if counter wraps, last word |
|
106 |
andn %i5, %i1, %g1 ! ~dword & 0x80808080 |
|
107 |
sub %i1, %l1, %l0 ! dword - 0x01010101 |
|
108 |
andcc %l0, %g1, %g0 ! ((dword - 0x01010101) & ~dword & 0x80808080) |
|
109 |
bz,a .storeword ! no zero byte if magic expression == 0 |
|
110 |
stw %i1, [%i2 + %i4] ! store word to dst (address pre-incremented) |
|
111 |
||
112 |
! n has not expired, but src is at the end. we need to push out the |
|
113 |
! remaining src bytes and then start padding with null bytes |
|
114 |
||
115 |
.zerobyte: |
|
116 |
add %i2, %i4, %l0 ! pointer to dest string |
|
117 |
srl %i1, 24, %g1 ! first byte |
|
118 |
stb %g1, [%l0] ! store it |
|
119 |
sub %g1, 1, %g1 ! byte == 0 ? -1 : byte - 1 |
|
120 |
sra %g1, 31, %g1 ! byte == 0 ? -1 : 0 |
|
121 |
andn %i1, %g1, %i1 ! if byte == 0, start padding with null bytes |
|
122 |
srl %i1, 16, %g1 ! second byte |
|
123 |
stb %g1, [%l0 + 1] ! store it |
|
124 |
and %g1, 0xff, %g1 ! isolate byte |
|
125 |
sub %g1, 1, %g1 ! byte == 0 ? -1 : byte - 1 |
|
126 |
sra %g1, 31, %g1 ! byte == 0 ? -1 : 0 |
|
127 |
andn %i1, %g1, %i1 ! if byte == 0, start padding with null bytes |
|
128 |
srl %i1, 8, %g1 ! third byte |
|
129 |
stb %g1, [%l0 + 2] ! store it |
|
130 |
and %g1, 0xff, %g1 ! isolate byte |
|
131 |
sub %g1, 1, %g1 ! byte == 0 ? -1 : byte - 1 |
|
132 |
sra %g1, 31, %g1 ! byte == 0 ? -1 : 0 |
|
133 |
andn %i1, %g1, %i1 ! if byte == 0, start padding with null bytes |
|
134 |
stb %i1, [%l0 + 3] ! store fourth byte |
|
135 |
addcc %i4, 8, %g0 ! number of pad bytes < 8 ? |
|
136 |
bcs .bytepad ! yes, do simple byte wise fill |
|
137 |
add %l0, 4, %l0 ! dst += 4 |
|
138 |
andcc %l0, 3, %l1 ! dst offset relative to word boundary |
|
139 |
bz .fillaligned ! dst already word aligned |
|
140 |
||
141 |
! here there is a least one more byte to zero out: otherwise we would |
|
142 |
! have exited through label .lastword |
|
143 |
||
144 |
sub %l1, 4, %l1 ! bytes to align dst to word boundary |
|
145 |
.makealigned: |
|
146 |
stb %g0, [%l0] ! dst[] = 0 |
|
147 |
addcc %i4, 1, %i4 ! n-- |
|
148 |
bz .done ! n == 0, we are done |
|
149 |
addcc %l1, 1, %l1 ! any more byte needed to align |
|
150 |
bnz .makealigned ! yup, pad another byte |
|
151 |
add %l0, 1, %l0 ! dst++ |
|
152 |
nop ! pad to align copy loop below |
|
153 |
||
154 |
! here we know that there at least another 4 bytes to pad, since |
|
155 |
! we don't get here unless there were >= 8 bytes to pad to begin |
|
156 |
! with, and we have padded at most 3 bytes suring dst aligning |
|
157 |
||
158 |
.fillaligned: |
|
159 |
add %i4, 3, %i2 ! round up to next word boundary |
|
160 |
and %i2, -4, %l1 ! pointer to next word boundary |
|
161 |
and %i2, 4, %i2 ! word count odd ? 4 : 0 |
|
162 |
stw %g0, [%l0] ! store first word |
|
163 |
addcc %l1, %i2, %l1 ! dword count == 1 ? |
|
164 |
add %i4, %i2, %i4 ! if word count odd, n -= 4 |
|
165 |
bz .bytepad ! if word count == 1, pad bytes left |
|
166 |
add %l0, %i2, %l0 ! bump dst if word count odd |
|
167 |
||
168 |
.fillword: |
|
169 |
addcc %l1, 8, %l1 ! count -= 8 |
|
170 |
stw %g0, [%l0] ! dst[n] = 0 |
|
171 |
stw %g0, [%l0 + 4] ! dst[n+4] = 0 |
|
172 |
add %l0, 8, %l0 ! dst += 8 |
|
173 |
bcc .fillword ! fill words until count == 0 |
|
174 |
addcc %i4, 8, %i4 ! n -= 8 |
|
175 |
bz .done ! if n == 0, we are done |
|
176 |
.empty |
|
177 |
||
178 |
.bytepad: |
|
179 |
and %i4, 1, %i2 ! byte count odd ? 1 : 0 |
|
180 |
stb %g0, [%l0] ! store first byte |
|
181 |
addcc %i4, %i2, %i4 ! byte count == 1 ? |
|
182 |
bz .done ! yup, we are done |
|
183 |
add %l0, %i2, %l0 ! bump pointer if odd |
|
184 |
||
185 |
.fillbyte: |
|
186 |
addcc %i4, 2, %i4 ! n -= 2 |
|
187 |
stb %g0, [%l0] ! dst[n] = 0 |
|
188 |
stb %g0, [%l0 + 1] ! dst[n+1] = 0 |
|
189 |
bnz .fillbyte ! fill until n == 0 |
|
190 |
add %l0, 2, %l0 ! dst += 2 |
|
191 |
||
192 |
.done: |
|
193 |
ret ! done |
|
194 |
restore %i0, %g0, %o0 ! restore reg window, return dst |
|
195 |
||
196 |
! this is the last word. It may contain null bytes. store bytes |
|
197 |
! until n == 0. if null byte encountered, continue |
|
198 |
||
199 |
.lastword: |
|
200 |
sub %i4, 4, %i4 ! undo counter pre-increment |
|
201 |
add %i2, 4, %i2 ! adjust dst for counter un-bumping |
|
202 |
||
203 |
srl %i1, 24, %g1 ! first byte |
|
204 |
stb %g1, [%i2 + %i4] ! store it |
|
205 |
inccc %i4 ! n-- |
|
206 |
bz .done ! if n == 0, we're done |
|
207 |
sub %g1, 1, %g1 ! byte == 0 ? -1 : byte - 1 |
|
208 |
sra %g1, 31, %g1 ! byte == 0 ? -1 : 0 |
|
209 |
andn %i1, %g1, %i1 ! if byte == 0, start padding with null |
|
210 |
srl %i1, 16, %g1 ! second byte |
|
211 |
stb %g1, [%i2 + %i4] ! store it |
|
212 |
inccc %i4 ! n-- |
|
213 |
bz .done ! if n == 0, we're done |
|
214 |
and %g1, 0xff, %g1 ! isolate byte |
|
215 |
sub %g1, 1, %g1 ! byte == 0 ? -1 : byte - 1 |
|
216 |
sra %g1, 31, %g1 ! byte == 0 ? -1 : 0 |
|
217 |
andn %i1, %g1, %i1 ! if byte == 0, start padding with null |
|
218 |
srl %i1, 8, %g1 ! third byte |
|
219 |
stb %g1, [%i2 + %i4] ! store it |
|
220 |
inccc %i4 ! n-- |
|
221 |
bz .done ! if n == 0, we're done |
|
222 |
and %g1, 0xff, %g1 ! isolate byte |
|
223 |
sub %g1, 1, %g1 ! byte == 0 ? -1 : byte - 1 |
|
224 |
sra %g1, 31, %g1 ! byte == 0 ? -1 : 0 |
|
225 |
andn %i1, %g1, %i1 ! if byte == 0, start padding with null |
|
226 |
ba .done ! here n must be zero, we are done |
|
227 |
stb %i1, [%i2 + %i4] ! store fourth byte |
|
228 |
||
229 |
.dstnotaligned: |
|
230 |
cmp %g1, 2 ! dst half word aligned? |
|
231 |
be .storehalfword2 ! yup, store half word at a time |
|
232 |
.empty |
|
233 |
.storebyte: |
|
234 |
lduw [%i3 + %i4], %i1 ! x = src[] |
|
235 |
addcc %i4, 4, %i4 ! src += 4, dst += 4, n -= 4 |
|
236 |
bcs .lastword ! if counter wraps, last word |
|
237 |
andn %i5, %i1, %g1 ! ~x & 0x80808080 |
|
238 |
sub %i1, %l1, %l0 ! x - 0x01010101 |
|
239 |
andcc %l0, %g1, %g0 ! ((x - 0x01010101) & ~x & 0x80808080) |
|
240 |
bnz .zerobyte ! end of src found, may need to pad |
|
241 |
add %i2, %i4, %l0 ! dst (in pointer form) |
|
242 |
srl %i1, 24, %g1 ! %g1<7:0> = 1st byte; half-word aligned now |
|
243 |
stb %g1, [%l0] ! store first byte |
|
244 |
srl %i1, 8, %g1 ! %g1<15:0> = bytes 2, 3 |
|
245 |
sth %g1, [%l0 + 1] ! store bytes 2, 3 |
|
246 |
ba .storebyte ! next word |
|
247 |
stb %i1, [%l0 + 3] ! store fourth byte |
|
248 |
nop |
|
249 |
nop |
|
250 |
||
251 |
.storehalfword: |
|
252 |
lduw [%i3 + %i4], %i1 ! x = src[] |
|
253 |
.storehalfword2: |
|
254 |
addcc %i4, 4, %i4 ! src += 4, dst += 4, n -= 4 |
|
255 |
bcs .lastword ! if counter wraps, last word |
|
256 |
andn %i5, %i1, %g1 ! ~x & 0x80808080 |
|
257 |
sub %i1, %l1, %l0 ! x - 0x01010101 |
|
258 |
andcc %l0, %g1, %g0 ! ((x -0x01010101) & ~x & 0x8080808080) |
|
259 |
bnz .zerobyte ! x has zero byte, handle end cases |
|
260 |
add %i2, %i4, %l0 ! dst (in pointer form) |
|
261 |
srl %i1, 16, %g1 ! %g1<15:0> = bytes 1, 2 |
|
262 |
sth %g1, [%l0] ! store bytes 1, 2 |
|
263 |
ba .storehalfword ! next dword |
|
264 |
sth %i1, [%l0 + 2] ! store bytes 3, 4 |
|
265 |
||
266 |
.shortcpy: |
|
267 |
ldub [%o3 + %o4], %o5 ! src[] |
|
268 |
stb %o5, [%o2 + %o4] ! dst[] = src[] |
|
269 |
inccc %o4 ! src++, dst++, n-- |
|
270 |
bz .doneshort ! if n == 0, done |
|
271 |
tst %o5 ! src[] == 0 ? |
|
272 |
bnz,a .shortcpy ! nope, next byte |
|
273 |
nop ! empty delay slot |
|
274 |
||
275 |
.padbyte: |
|
276 |
stb %g0, [%o2 + %o4] ! dst[] = 0 |
|
277 |
.padbyte2: |
|
278 |
addcc %o4, 1, %o4 ! dst++, n-- |
|
279 |
bnz,a .padbyte2 ! if n != 0, next byte |
|
280 |
stb %g0, [%o2 + %o4] ! dst[] = 0 |
|
281 |
nop ! align label below to 16-byte boundary |
|
282 |
||
283 |
.doneshort: |
|
284 |
retl ! return from leaf |
|
285 |
nop ! empty delay slot |
|
286 |
SET_SIZE(strncpy) |