author | Mark J. Nelson <Mark.J.Nelson@Sun.COM> |
Wed, 06 Aug 2008 16:29:39 -0600 | |
changeset 7298 | b69e27387f74 |
parent 6812 | febeba71273d |
permissions | -rw-r--r-- |
0 | 1 |
/* |
2 |
* CDDL HEADER START |
|
3 |
* |
|
4 |
* The contents of this file are subject to the terms of the |
|
6812 | 5 |
* Common Development and Distribution License (the "License"). |
6 |
* You may not use this file except in compliance with the License. |
|
0 | 7 |
* |
8 |
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
|
9 |
* or http://www.opensolaris.org/os/licensing. |
|
10 |
* See the License for the specific language governing permissions |
|
11 |
* and limitations under the License. |
|
12 |
* |
|
13 |
* When distributing Covered Code, include this CDDL HEADER in each |
|
14 |
* file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
|
15 |
* If applicable, add the following below this CDDL HEADER, with the |
|
16 |
* fields enclosed by brackets "[]" replaced with your own identifying |
|
17 |
* information: Portions Copyright [yyyy] [name of copyright owner] |
|
18 |
* |
|
19 |
* CDDL HEADER END |
|
20 |
*/ |
|
6812 | 21 |
|
0 | 22 |
/* |
6812 | 23 |
* Copyright 2008 Sun Microsystems, Inc. All rights reserved. |
24 |
* Use is subject to license terms. |
|
0 | 25 |
*/ |
26 |
||
7298
b69e27387f74
6733918 Teamware has retired, please welcome your new manager, Mercurial
Mark J. Nelson <Mark.J.Nelson@Sun.COM>
parents:
6812
diff
changeset
|
27 |
.file "__align_cpy_8.s" |
0 | 28 |
|
29 |
/* __align_cpy_8(s1, s2, n) |
|
30 |
* |
|
31 |
* Copy 8-byte aligned source to 8-byte aligned target in multiples of 8 bytes. |
|
32 |
* |
|
33 |
* Input: |
|
34 |
* o0 address of target |
|
35 |
* o1 address of source |
|
36 |
* o2 number of bytes to copy (must be a multiple of 8) |
|
37 |
* Output: |
|
38 |
* o0 address of target |
|
39 |
* Caller's registers that have been changed by this function: |
|
40 |
* o1-o5 |
|
41 |
* |
|
42 |
* Note: |
|
43 |
* This helper routine will not be used by any 32-bit compilations. To do |
|
44 |
* so would break binary compatibility with previous versions of Solaris. |
|
45 |
* |
|
46 |
* Assumptions: |
|
47 |
* Source and target addresses are 8-byte aligned. |
|
48 |
* Bytes to be copied are non-overlapping or _exactly_ overlapping. |
|
49 |
* The number of bytes to be copied is a multiple of 8. |
|
50 |
* Call will _usually_ be made with a byte count of more than 4*8 and |
|
51 |
* less than a few hundred bytes. Legal values are 0 to MAX_SIZE_T. |
|
52 |
* |
|
53 |
* Optimization attempt: |
|
54 |
* Reasonable speed for a generic v9. Going for 32 bytes at a time |
|
55 |
* rather than 16 bytes at a time did not result in a time saving for |
|
56 |
* the number of bytes expected to be copied. No timing runs using other |
|
57 |
* levels of optimization have been tried yet. |
|
58 |
* |
|
59 |
* Even when multiples of 16 bytes were used, the savings by going for 32 bytes |
|
60 |
* at a time were about 2%. Thus, __align_cpy_16 is a second entry point to |
|
61 |
* the same code as __align_cpy_8. |
|
62 |
* |
|
63 |
* Register usage: |
|
64 |
* o1 source address (updated for each read) |
|
65 |
* o2 byte count remaining |
|
66 |
* o3 contents being copied |
|
67 |
* o4 more contents being copied |
|
68 |
* o5 target address |
|
69 |
*/ |
|
70 |
||
71 |
#include <sys/asm_linkage.h> |
|
72 |
||
73 |
ENTRY(__align_cpy_8) |
|
74 |
ENTRY(__align_cpy_16) |
|
75 |
cmp %o0, %o1 ! Identical--do nothing. |
|
76 |
be,pn %xcc, .done |
|
77 |
subcc %o2, 8, %o2 |
|
78 |
bz,pn %xcc, .wrdbl2 ! Only 8 bytes need to be copied. |
|
79 |
mov %o0, %o5 ! Original target address is returned. |
|
80 |
bpos,a,pt %xcc, .wrdbl1 ! Have at least 16 bytes to copy. |
|
81 |
ldx [%o1], %o3 |
|
82 |
.done: |
|
83 |
retl ! No bytes to copy. |
|
84 |
nop |
|
85 |
||
86 |
.align 32 |
|
87 |
.wrdbl1: ! Copy 16 bytes at a time. |
|
88 |
subcc %o2, 16, %o2 |
|
89 |
ldx [%o1+8], %o4 |
|
90 |
add %o1, 16, %o1 |
|
91 |
stx %o3, [%o5] |
|
92 |
stx %o4, [%o5+8] |
|
93 |
add %o5, 16, %o5 |
|
94 |
bg,a,pt %xcc, .wrdbl1 ! Have at least 16 more bytes. |
|
95 |
ldx [%o1], %o3 |
|
96 |
||
97 |
bz,a,pt %xcc, .wrdbl3 ! Have 8 bytes remaining to copy. |
|
98 |
ldx [%o1], %o3 |
|
99 |
||
100 |
retl |
|
101 |
nop |
|
102 |
||
103 |
.wrdbl2: |
|
104 |
ldx [%o1], %o3 ! Copy last 8 bytes. |
|
105 |
.wrdbl3: |
|
106 |
stx %o3, [%o5] |
|
107 |
retl |
|
108 |
nop |
|
109 |
||
110 |
SET_SIZE(__align_cpy_8) |
|
111 |
SET_SIZE(__align_cpy_16) |