1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33#include <stddef.h>
34
35
36
37#define MEMCPY_BY_BLOCK_THRESHOLD (44 * 2)
38
39
40__asm__ (".syntax no_register_prefix");
41
42void *
43memcpy(void *pdst, const void *psrc, size_t pn)
44{
45
46
47
48
49
50
51
52 register void *return_dst __asm__ ("r10") = pdst;
53 register unsigned char *dst __asm__ ("r13") = pdst;
54 register unsigned const char *src __asm__ ("r11") = psrc;
55 register int n __asm__ ("r12") = pn;
56
57
58
59
60 if (((unsigned long) dst & 3) != 0
61
62
63 && n >= 3)
64 {
65 if ((unsigned long) dst & 1)
66 {
67 n--;
68 *dst = *src;
69 src++;
70 dst++;
71 }
72
73 if ((unsigned long) dst & 2)
74 {
75 n -= 2;
76 *(short *) dst = *(short *) src;
77 src += 2;
78 dst += 2;
79 }
80 }
81
82
83 if (n >= MEMCPY_BY_BLOCK_THRESHOLD)
84 {
85
86
87
88
89 __asm__ volatile
90 ("\
91 ;; GCC does promise correct register allocations, but let's \n\
92 ;; make sure it keeps its promises. \n\
93 .ifnc %0-%1-%2,$r13-$r11-$r12 \n\
94 .error \"GCC reg alloc bug: %0-%1-%4 != $r13-$r12-$r11\" \n\
95 .endif \n\
96 \n\
97 ;; Save the registers we'll use in the movem process \n\
98 ;; on the stack. \n\
99 subq 11*4,sp \n\
100 movem r10,[sp] \n\
101 \n\
102 ;; Now we've got this: \n\
103 ;; r11 - src \n\
104 ;; r13 - dst \n\
105 ;; r12 - n \n\
106 \n\
107 ;; Update n for the first loop. \n\
108 subq 44,r12 \n\
1090: \n\
110"
111#ifdef __arch_common_v10_v32
112
113
114" setf\n"
115#endif
116" movem [r11+],r10 \n\
117 subq 44,r12 \n\
118 bge 0b \n\
119 movem r10,[r13+] \n\
120 \n\
121 ;; Compensate for last loop underflowing n. \n\
122 addq 44,r12 \n\
123 \n\
124 ;; Restore registers from stack. \n\
125 movem [sp+],r10"
126
127
128 : "=r" (dst), "=r" (src), "=r" (n)
129
130
131 : "0" (dst), "1" (src), "2" (n));
132 }
133
134 while (n >= 16)
135 {
136 *(long *) dst = *(long *) src; dst += 4; src += 4;
137 *(long *) dst = *(long *) src; dst += 4; src += 4;
138 *(long *) dst = *(long *) src; dst += 4; src += 4;
139 *(long *) dst = *(long *) src; dst += 4; src += 4;
140
141 n -= 16;
142 }
143
144 switch (n)
145 {
146 case 0:
147 break;
148
149 case 1:
150 *dst = *src;
151 break;
152
153 case 2:
154 *(short *) dst = *(short *) src;
155 break;
156
157 case 3:
158 *(short *) dst = *(short *) src; dst += 2; src += 2;
159 *dst = *src;
160 break;
161
162 case 4:
163 *(long *) dst = *(long *) src;
164 break;
165
166 case 5:
167 *(long *) dst = *(long *) src; dst += 4; src += 4;
168 *dst = *src;
169 break;
170
171 case 6:
172 *(long *) dst = *(long *) src; dst += 4; src += 4;
173 *(short *) dst = *(short *) src;
174 break;
175
176 case 7:
177 *(long *) dst = *(long *) src; dst += 4; src += 4;
178 *(short *) dst = *(short *) src; dst += 2; src += 2;
179 *dst = *src;
180 break;
181
182 case 8:
183 *(long *) dst = *(long *) src; dst += 4; src += 4;
184 *(long *) dst = *(long *) src;
185 break;
186
187 case 9:
188 *(long *) dst = *(long *) src; dst += 4; src += 4;
189 *(long *) dst = *(long *) src; dst += 4; src += 4;
190 *dst = *src;
191 break;
192
193 case 10:
194 *(long *) dst = *(long *) src; dst += 4; src += 4;
195 *(long *) dst = *(long *) src; dst += 4; src += 4;
196 *(short *) dst = *(short *) src;
197 break;
198
199 case 11:
200 *(long *) dst = *(long *) src; dst += 4; src += 4;
201 *(long *) dst = *(long *) src; dst += 4; src += 4;
202 *(short *) dst = *(short *) src; dst += 2; src += 2;
203 *dst = *src;
204 break;
205
206 case 12:
207 *(long *) dst = *(long *) src; dst += 4; src += 4;
208 *(long *) dst = *(long *) src; dst += 4; src += 4;
209 *(long *) dst = *(long *) src;
210 break;
211
212 case 13:
213 *(long *) dst = *(long *) src; dst += 4; src += 4;
214 *(long *) dst = *(long *) src; dst += 4; src += 4;
215 *(long *) dst = *(long *) src; dst += 4; src += 4;
216 *dst = *src;
217 break;
218
219 case 14:
220 *(long *) dst = *(long *) src; dst += 4; src += 4;
221 *(long *) dst = *(long *) src; dst += 4; src += 4;
222 *(long *) dst = *(long *) src; dst += 4; src += 4;
223 *(short *) dst = *(short *) src;
224 break;
225
226 case 15:
227 *(long *) dst = *(long *) src; dst += 4; src += 4;
228 *(long *) dst = *(long *) src; dst += 4; src += 4;
229 *(long *) dst = *(long *) src; dst += 4; src += 4;
230 *(short *) dst = *(short *) src; dst += 2; src += 2;
231 *dst = *src;
232 break;
233 }
234
235 return return_dst;
236}
237