1
2
3
4
5
6
7
8
9#include <linux/linkage.h>
10#include <asm/assembler.h>
11
12
13
14
15
16
17
18#define L(label) .L
19
20#define dstin x0
21#define src x1
22#define count x2
23#define dst x3
24#define srcend x4
25#define dstend x5
26#define A_l x6
27#define A_lw w6
28#define A_h x7
29#define B_l x8
30#define B_lw w8
31#define B_h x9
32#define C_l x10
33#define C_lw w10
34#define C_h x11
35#define D_l x12
36#define D_h x13
37#define E_l x14
38#define E_h x15
39#define F_l x16
40#define F_h x17
41#define G_l count
42#define G_h dst
43#define H_l src
44#define H_h srcend
45#define tmp1 x14
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60SYM_FUNC_START_ALIAS(__memmove)
61SYM_FUNC_START_WEAK_ALIAS_PI(memmove)
62SYM_FUNC_START_ALIAS(__memcpy)
63SYM_FUNC_START_WEAK_PI(memcpy)
64 add srcend, src, count
65 add dstend, dstin, count
66 cmp count, 128
67 b.hi L(copy_long)
68 cmp count, 32
69 b.hi L(copy32_128)
70
71
72 cmp count, 16
73 b.lo L(copy16)
74 ldp A_l, A_h, [src]
75 ldp D_l, D_h, [srcend, -16]
76 stp A_l, A_h, [dstin]
77 stp D_l, D_h, [dstend, -16]
78 ret
79
80
81L(copy16):
82 tbz count, 3, L(copy8)
83 ldr A_l, [src]
84 ldr A_h, [srcend, -8]
85 str A_l, [dstin]
86 str A_h, [dstend, -8]
87 ret
88
89 .p2align 3
90
91L(copy8):
92 tbz count, 2, L(copy4)
93 ldr A_lw, [src]
94 ldr B_lw, [srcend, -4]
95 str A_lw, [dstin]
96 str B_lw, [dstend, -4]
97 ret
98
99
100L(copy4):
101 cbz count, L(copy0)
102 lsr tmp1, count, 1
103 ldrb A_lw, [src]
104 ldrb C_lw, [srcend, -1]
105 ldrb B_lw, [src, tmp1]
106 strb A_lw, [dstin]
107 strb B_lw, [dstin, tmp1]
108 strb C_lw, [dstend, -1]
109L(copy0):
110 ret
111
112 .p2align 4
113
114L(copy32_128):
115 ldp A_l, A_h, [src]
116 ldp B_l, B_h, [src, 16]
117 ldp C_l, C_h, [srcend, -32]
118 ldp D_l, D_h, [srcend, -16]
119 cmp count, 64
120 b.hi L(copy128)
121 stp A_l, A_h, [dstin]
122 stp B_l, B_h, [dstin, 16]
123 stp C_l, C_h, [dstend, -32]
124 stp D_l, D_h, [dstend, -16]
125 ret
126
127 .p2align 4
128
129L(copy128):
130 ldp E_l, E_h, [src, 32]
131 ldp F_l, F_h, [src, 48]
132 cmp count, 96
133 b.ls L(copy96)
134 ldp G_l, G_h, [srcend, -64]
135 ldp H_l, H_h, [srcend, -48]
136 stp G_l, G_h, [dstend, -64]
137 stp H_l, H_h, [dstend, -48]
138L(copy96):
139 stp A_l, A_h, [dstin]
140 stp B_l, B_h, [dstin, 16]
141 stp E_l, E_h, [dstin, 32]
142 stp F_l, F_h, [dstin, 48]
143 stp C_l, C_h, [dstend, -32]
144 stp D_l, D_h, [dstend, -16]
145 ret
146
147 .p2align 4
148
149L(copy_long):
150
151 sub tmp1, dstin, src
152 cbz tmp1, L(copy0)
153 cmp tmp1, count
154 b.lo L(copy_long_backwards)
155
156
157
158 ldp D_l, D_h, [src]
159 and tmp1, dstin, 15
160 bic dst, dstin, 15
161 sub src, src, tmp1
162 add count, count, tmp1
163 ldp A_l, A_h, [src, 16]
164 stp D_l, D_h, [dstin]
165 ldp B_l, B_h, [src, 32]
166 ldp C_l, C_h, [src, 48]
167 ldp D_l, D_h, [src, 64]!
168 subs count, count, 128 + 16
169 b.ls L(copy64_from_end)
170
171L(loop64):
172 stp A_l, A_h, [dst, 16]
173 ldp A_l, A_h, [src, 16]
174 stp B_l, B_h, [dst, 32]
175 ldp B_l, B_h, [src, 32]
176 stp C_l, C_h, [dst, 48]
177 ldp C_l, C_h, [src, 48]
178 stp D_l, D_h, [dst, 64]!
179 ldp D_l, D_h, [src, 64]!
180 subs count, count, 64
181 b.hi L(loop64)
182
183
184L(copy64_from_end):
185 ldp E_l, E_h, [srcend, -64]
186 stp A_l, A_h, [dst, 16]
187 ldp A_l, A_h, [srcend, -48]
188 stp B_l, B_h, [dst, 32]
189 ldp B_l, B_h, [srcend, -32]
190 stp C_l, C_h, [dst, 48]
191 ldp C_l, C_h, [srcend, -16]
192 stp D_l, D_h, [dst, 64]
193 stp E_l, E_h, [dstend, -64]
194 stp A_l, A_h, [dstend, -48]
195 stp B_l, B_h, [dstend, -32]
196 stp C_l, C_h, [dstend, -16]
197 ret
198
199 .p2align 4
200
201
202
203L(copy_long_backwards):
204 ldp D_l, D_h, [srcend, -16]
205 and tmp1, dstend, 15
206 sub srcend, srcend, tmp1
207 sub count, count, tmp1
208 ldp A_l, A_h, [srcend, -16]
209 stp D_l, D_h, [dstend, -16]
210 ldp B_l, B_h, [srcend, -32]
211 ldp C_l, C_h, [srcend, -48]
212 ldp D_l, D_h, [srcend, -64]!
213 sub dstend, dstend, tmp1
214 subs count, count, 128
215 b.ls L(copy64_from_start)
216
217L(loop64_backwards):
218 stp A_l, A_h, [dstend, -16]
219 ldp A_l, A_h, [srcend, -16]
220 stp B_l, B_h, [dstend, -32]
221 ldp B_l, B_h, [srcend, -32]
222 stp C_l, C_h, [dstend, -48]
223 ldp C_l, C_h, [srcend, -48]
224 stp D_l, D_h, [dstend, -64]!
225 ldp D_l, D_h, [srcend, -64]!
226 subs count, count, 64
227 b.hi L(loop64_backwards)
228
229
230L(copy64_from_start):
231 ldp G_l, G_h, [src, 48]
232 stp A_l, A_h, [dstend, -16]
233 ldp A_l, A_h, [src, 32]
234 stp B_l, B_h, [dstend, -32]
235 ldp B_l, B_h, [src, 16]
236 stp C_l, C_h, [dstend, -48]
237 ldp C_l, C_h, [src]
238 stp D_l, D_h, [dstend, -64]
239 stp G_l, G_h, [dstin, 48]
240 stp A_l, A_h, [dstin, 32]
241 stp B_l, B_h, [dstin, 16]
242 stp C_l, C_h, [dstin]
243 ret
244
245SYM_FUNC_END_PI(memcpy)
246EXPORT_SYMBOL(memcpy)
247SYM_FUNC_END_ALIAS(__memcpy)
248EXPORT_SYMBOL(__memcpy)
249SYM_FUNC_END_ALIAS_PI(memmove)
250EXPORT_SYMBOL(memmove)
251SYM_FUNC_END_ALIAS(__memmove)
252EXPORT_SYMBOL(__memmove)
253