1
2
3
4
5
6
7
8
9
10
11
12
13
14#include "asmdefs.h"
15
16#define dstin x0
17#define src x1
18#define count x2
19#define dst x3
20#define srcend x4
21#define dstend x5
22#define A_l x6
23#define A_lw w6
24#define A_h x7
25#define B_l x8
26#define B_lw w8
27#define B_h x9
28#define C_l x10
29#define C_lw w10
30#define C_h x11
31#define D_l x12
32#define D_h x13
33#define E_l x14
34#define E_h x15
35#define F_l x16
36#define F_h x17
37#define G_l count
38#define G_h dst
39#define H_l src
40#define H_h srcend
41#define tmp1 x14
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56ENTRY_ALIAS (memmove)
57ENTRY (memcpy)
58 PTR_ARG (0)
59 PTR_ARG (1)
60 SIZE_ARG (2)
61 add srcend, src, count
62 add dstend, dstin, count
63 cmp count, 128
64 b.hi L(copy_long)
65 cmp count, 32
66 b.hi L(copy32_128)
67
68
69 cmp count, 16
70 b.lo L(copy16)
71 ldp A_l, A_h, [src]
72 ldp D_l, D_h, [srcend, -16]
73 stp A_l, A_h, [dstin]
74 stp D_l, D_h, [dstend, -16]
75 ret
76
77
78L(copy16):
79 tbz count, 3, L(copy8)
80 ldr A_l, [src]
81 ldr A_h, [srcend, -8]
82 str A_l, [dstin]
83 str A_h, [dstend, -8]
84 ret
85
86 .p2align 3
87
88L(copy8):
89 tbz count, 2, L(copy4)
90 ldr A_lw, [src]
91 ldr B_lw, [srcend, -4]
92 str A_lw, [dstin]
93 str B_lw, [dstend, -4]
94 ret
95
96
97L(copy4):
98 cbz count, L(copy0)
99 lsr tmp1, count, 1
100 ldrb A_lw, [src]
101 ldrb C_lw, [srcend, -1]
102 ldrb B_lw, [src, tmp1]
103 strb A_lw, [dstin]
104 strb B_lw, [dstin, tmp1]
105 strb C_lw, [dstend, -1]
106L(copy0):
107 ret
108
109 .p2align 4
110
111L(copy32_128):
112 ldp A_l, A_h, [src]
113 ldp B_l, B_h, [src, 16]
114 ldp C_l, C_h, [srcend, -32]
115 ldp D_l, D_h, [srcend, -16]
116 cmp count, 64
117 b.hi L(copy128)
118 stp A_l, A_h, [dstin]
119 stp B_l, B_h, [dstin, 16]
120 stp C_l, C_h, [dstend, -32]
121 stp D_l, D_h, [dstend, -16]
122 ret
123
124 .p2align 4
125
126L(copy128):
127 ldp E_l, E_h, [src, 32]
128 ldp F_l, F_h, [src, 48]
129 cmp count, 96
130 b.ls L(copy96)
131 ldp G_l, G_h, [srcend, -64]
132 ldp H_l, H_h, [srcend, -48]
133 stp G_l, G_h, [dstend, -64]
134 stp H_l, H_h, [dstend, -48]
135L(copy96):
136 stp A_l, A_h, [dstin]
137 stp B_l, B_h, [dstin, 16]
138 stp E_l, E_h, [dstin, 32]
139 stp F_l, F_h, [dstin, 48]
140 stp C_l, C_h, [dstend, -32]
141 stp D_l, D_h, [dstend, -16]
142 ret
143
144 .p2align 4
145
146L(copy_long):
147
148 sub tmp1, dstin, src
149 cbz tmp1, L(copy0)
150 cmp tmp1, count
151 b.lo L(copy_long_backwards)
152
153
154
155 ldp D_l, D_h, [src]
156 and tmp1, dstin, 15
157 bic dst, dstin, 15
158 sub src, src, tmp1
159 add count, count, tmp1
160 ldp A_l, A_h, [src, 16]
161 stp D_l, D_h, [dstin]
162 ldp B_l, B_h, [src, 32]
163 ldp C_l, C_h, [src, 48]
164 ldp D_l, D_h, [src, 64]!
165 subs count, count, 128 + 16
166 b.ls L(copy64_from_end)
167
168L(loop64):
169 stp A_l, A_h, [dst, 16]
170 ldp A_l, A_h, [src, 16]
171 stp B_l, B_h, [dst, 32]
172 ldp B_l, B_h, [src, 32]
173 stp C_l, C_h, [dst, 48]
174 ldp C_l, C_h, [src, 48]
175 stp D_l, D_h, [dst, 64]!
176 ldp D_l, D_h, [src, 64]!
177 subs count, count, 64
178 b.hi L(loop64)
179
180
181L(copy64_from_end):
182 ldp E_l, E_h, [srcend, -64]
183 stp A_l, A_h, [dst, 16]
184 ldp A_l, A_h, [srcend, -48]
185 stp B_l, B_h, [dst, 32]
186 ldp B_l, B_h, [srcend, -32]
187 stp C_l, C_h, [dst, 48]
188 ldp C_l, C_h, [srcend, -16]
189 stp D_l, D_h, [dst, 64]
190 stp E_l, E_h, [dstend, -64]
191 stp A_l, A_h, [dstend, -48]
192 stp B_l, B_h, [dstend, -32]
193 stp C_l, C_h, [dstend, -16]
194 ret
195
196 .p2align 4
197
198
199
200L(copy_long_backwards):
201 ldp D_l, D_h, [srcend, -16]
202 and tmp1, dstend, 15
203 sub srcend, srcend, tmp1
204 sub count, count, tmp1
205 ldp A_l, A_h, [srcend, -16]
206 stp D_l, D_h, [dstend, -16]
207 ldp B_l, B_h, [srcend, -32]
208 ldp C_l, C_h, [srcend, -48]
209 ldp D_l, D_h, [srcend, -64]!
210 sub dstend, dstend, tmp1
211 subs count, count, 128
212 b.ls L(copy64_from_start)
213
214L(loop64_backwards):
215 stp A_l, A_h, [dstend, -16]
216 ldp A_l, A_h, [srcend, -16]
217 stp B_l, B_h, [dstend, -32]
218 ldp B_l, B_h, [srcend, -32]
219 stp C_l, C_h, [dstend, -48]
220 ldp C_l, C_h, [srcend, -48]
221 stp D_l, D_h, [dstend, -64]!
222 ldp D_l, D_h, [srcend, -64]!
223 subs count, count, 64
224 b.hi L(loop64_backwards)
225
226
227L(copy64_from_start):
228 ldp G_l, G_h, [src, 48]
229 stp A_l, A_h, [dstend, -16]
230 ldp A_l, A_h, [src, 32]
231 stp B_l, B_h, [dstend, -32]
232 ldp B_l, B_h, [src, 16]
233 stp C_l, C_h, [dstend, -48]
234 ldp C_l, C_h, [src]
235 stp D_l, D_h, [dstend, -64]
236 stp G_l, G_h, [dstin, 48]
237 stp A_l, A_h, [dstin, 32]
238 stp B_l, B_h, [dstin, 16]
239 stp C_l, C_h, [dstin]
240 ret
241
242END (memcpy)
243