1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#include <asm/asm.h>
17#include <asm/asm-offsets.h>
18#include <asm/export.h>
19#include <asm/regdef.h>
20
21#define dst a0
22#define src a1
23#define len a2
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74#define EXC(inst_reg,addr,handler) \
759: inst_reg, addr; \
76 .section __ex_table,"a"; \
77 PTR 9b, handler; \
78 .previous
79
80
81
82
83
84#define LOAD ld
85#define LOADL ldl
86#define LOADR ldr
87#define STOREL sdl
88#define STORER sdr
89#define STORE sd
90#define ADD daddu
91#define SUB dsubu
92#define SRL dsrl
93#define SRA dsra
94#define SLL dsll
95#define SLLV dsllv
96#define SRLV dsrlv
97#define NBYTES 8
98#define LOG_NBYTES 3
99
100
101
102
103
104
105#undef t0
106#undef t1
107#undef t2
108#undef t3
109#define t0 $8
110#define t1 $9
111#define t2 $10
112#define t3 $11
113#define t4 $12
114#define t5 $13
115#define t6 $14
116#define t7 $15
117
118#ifdef CONFIG_CPU_LITTLE_ENDIAN
119#define LDFIRST LOADR
120#define LDREST LOADL
121#define STFIRST STORER
122#define STREST STOREL
123#define SHIFT_DISCARD SLLV
124#else
125#define LDFIRST LOADL
126#define LDREST LOADR
127#define STFIRST STOREL
128#define STREST STORER
129#define SHIFT_DISCARD SRLV
130#endif
131
132#define FIRST(unit) ((unit)*NBYTES)
133#define REST(unit) (FIRST(unit)+NBYTES-1)
134#define UNIT(unit) FIRST(unit)
135
136#define ADDRMASK (NBYTES-1)
137
138 .text
139 .set noreorder
140 .set noat
141
142
143
144
145
146
147
148 .align 5
149LEAF(memcpy)
150EXPORT_SYMBOL(memcpy)
151 move v0, dst
152__memcpy:
153FEXPORT(__copy_user)
154EXPORT_SYMBOL(__copy_user)
155
156
157
158
159
160
161
162
163 pref 0, 0(src)
164 sltu t0, len, NBYTES
165 bnez t0, copy_bytes_checklen
166 and t0, src, ADDRMASK
167 bnez t0, src_unaligned
168 sltu t0, len, 4*NBYTES
169 bnez t0, less_than_4units
170 sltu t0, len, 8*NBYTES
171 bnez t0, less_than_8units
172 sltu t0, len, 16*NBYTES
173 bnez t0, cleanup_both_aligned
174 sltu t0, len, 128+1
175 bnez t0, 1f
176 sltu t0, len, 256+1
177 bnez t0, 1f
178 pref 0, 128(src)
179
180
1812: pref 0, 256(src)
182
183
1841:
185EXC( LOAD t0, UNIT(0)(src), l_exc)
186EXC( LOAD t1, UNIT(1)(src), l_exc_copy)
187EXC( LOAD t2, UNIT(2)(src), l_exc_copy)
188EXC( LOAD t3, UNIT(3)(src), l_exc_copy)
189 SUB len, len, 16*NBYTES
190EXC( STORE t0, UNIT(0)(dst), s_exc_p16u)
191EXC( STORE t1, UNIT(1)(dst), s_exc_p15u)
192EXC( STORE t2, UNIT(2)(dst), s_exc_p14u)
193EXC( STORE t3, UNIT(3)(dst), s_exc_p13u)
194EXC( LOAD t0, UNIT(4)(src), l_exc_copy)
195EXC( LOAD t1, UNIT(5)(src), l_exc_copy)
196EXC( LOAD t2, UNIT(6)(src), l_exc_copy)
197EXC( LOAD t3, UNIT(7)(src), l_exc_copy)
198EXC( STORE t0, UNIT(4)(dst), s_exc_p12u)
199EXC( STORE t1, UNIT(5)(dst), s_exc_p11u)
200EXC( STORE t2, UNIT(6)(dst), s_exc_p10u)
201 ADD src, src, 16*NBYTES
202EXC( STORE t3, UNIT(7)(dst), s_exc_p9u)
203 ADD dst, dst, 16*NBYTES
204EXC( LOAD t0, UNIT(-8)(src), l_exc_copy_rewind16)
205EXC( LOAD t1, UNIT(-7)(src), l_exc_copy_rewind16)
206EXC( LOAD t2, UNIT(-6)(src), l_exc_copy_rewind16)
207EXC( LOAD t3, UNIT(-5)(src), l_exc_copy_rewind16)
208EXC( STORE t0, UNIT(-8)(dst), s_exc_p8u)
209EXC( STORE t1, UNIT(-7)(dst), s_exc_p7u)
210EXC( STORE t2, UNIT(-6)(dst), s_exc_p6u)
211EXC( STORE t3, UNIT(-5)(dst), s_exc_p5u)
212EXC( LOAD t0, UNIT(-4)(src), l_exc_copy_rewind16)
213EXC( LOAD t1, UNIT(-3)(src), l_exc_copy_rewind16)
214EXC( LOAD t2, UNIT(-2)(src), l_exc_copy_rewind16)
215EXC( LOAD t3, UNIT(-1)(src), l_exc_copy_rewind16)
216EXC( STORE t0, UNIT(-4)(dst), s_exc_p4u)
217EXC( STORE t1, UNIT(-3)(dst), s_exc_p3u)
218EXC( STORE t2, UNIT(-2)(dst), s_exc_p2u)
219EXC( STORE t3, UNIT(-1)(dst), s_exc_p1u)
220 sltu t0, len, 256+1
221 beqz t0, 2b
222 sltu t0, len, 128
223 beqz t0, 1b
224 nop
225
226
227
228cleanup_both_aligned:
229 beqz len, done
230 sltu t0, len, 8*NBYTES
231 bnez t0, less_than_8units
232 nop
233EXC( LOAD t0, UNIT(0)(src), l_exc)
234EXC( LOAD t1, UNIT(1)(src), l_exc_copy)
235EXC( LOAD t2, UNIT(2)(src), l_exc_copy)
236EXC( LOAD t3, UNIT(3)(src), l_exc_copy)
237 SUB len, len, 8*NBYTES
238EXC( STORE t0, UNIT(0)(dst), s_exc_p8u)
239EXC( STORE t1, UNIT(1)(dst), s_exc_p7u)
240EXC( STORE t2, UNIT(2)(dst), s_exc_p6u)
241EXC( STORE t3, UNIT(3)(dst), s_exc_p5u)
242EXC( LOAD t0, UNIT(4)(src), l_exc_copy)
243EXC( LOAD t1, UNIT(5)(src), l_exc_copy)
244EXC( LOAD t2, UNIT(6)(src), l_exc_copy)
245EXC( LOAD t3, UNIT(7)(src), l_exc_copy)
246EXC( STORE t0, UNIT(4)(dst), s_exc_p4u)
247EXC( STORE t1, UNIT(5)(dst), s_exc_p3u)
248EXC( STORE t2, UNIT(6)(dst), s_exc_p2u)
249EXC( STORE t3, UNIT(7)(dst), s_exc_p1u)
250 ADD src, src, 8*NBYTES
251 beqz len, done
252 ADD dst, dst, 8*NBYTES
253
254
255
256less_than_8units:
257 sltu t0, len, 4*NBYTES
258 bnez t0, less_than_4units
259 nop
260EXC( LOAD t0, UNIT(0)(src), l_exc)
261EXC( LOAD t1, UNIT(1)(src), l_exc_copy)
262EXC( LOAD t2, UNIT(2)(src), l_exc_copy)
263EXC( LOAD t3, UNIT(3)(src), l_exc_copy)
264 SUB len, len, 4*NBYTES
265EXC( STORE t0, UNIT(0)(dst), s_exc_p4u)
266EXC( STORE t1, UNIT(1)(dst), s_exc_p3u)
267EXC( STORE t2, UNIT(2)(dst), s_exc_p2u)
268EXC( STORE t3, UNIT(3)(dst), s_exc_p1u)
269 ADD src, src, 4*NBYTES
270 beqz len, done
271 ADD dst, dst, 4*NBYTES
272
273
274
275
276less_than_4units:
277 sltu t0, len, 1*NBYTES
278 bnez t0, copy_bytes_checklen
279 nop
280
281
282
283EXC( LOAD t0, 0(src), l_exc)
284 SUB len, len, NBYTES
285 sltu t1, len, 8
286EXC( STORE t0, 0(dst), s_exc_p1u)
287 ADD src, src, NBYTES
288 bnez t1, copy_bytes_checklen
289 ADD dst, dst, NBYTES
290
291
292
293EXC( LOAD t0, 0(src), l_exc)
294 SUB len, len, NBYTES
295 sltu t1, len, 8
296EXC( STORE t0, 0(dst), s_exc_p1u)
297 ADD src, src, NBYTES
298 bnez t1, copy_bytes_checklen
299 ADD dst, dst, NBYTES
300
301
302
303EXC( LOAD t0, 0(src), l_exc)
304 SUB len, len, NBYTES
305 ADD src, src, NBYTES
306 ADD dst, dst, NBYTES
307 b copy_bytes_checklen
308EXC( STORE t0, -8(dst), s_exc_p1u)
309
310src_unaligned:
311#define rem t8
312 SRL t0, len, LOG_NBYTES+2
313 beqz t0, cleanup_src_unaligned
314 and rem, len, (4*NBYTES-1)
3151:
316
317
318
319
320
321
322EXC( LDFIRST t0, FIRST(0)(src), l_exc)
323EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy)
324 SUB len, len, 4*NBYTES
325EXC( LDREST t0, REST(0)(src), l_exc_copy)
326EXC( LDREST t1, REST(1)(src), l_exc_copy)
327EXC( LDFIRST t2, FIRST(2)(src), l_exc_copy)
328EXC( LDFIRST t3, FIRST(3)(src), l_exc_copy)
329EXC( LDREST t2, REST(2)(src), l_exc_copy)
330EXC( LDREST t3, REST(3)(src), l_exc_copy)
331 ADD src, src, 4*NBYTES
332EXC( STORE t0, UNIT(0)(dst), s_exc_p4u)
333EXC( STORE t1, UNIT(1)(dst), s_exc_p3u)
334EXC( STORE t2, UNIT(2)(dst), s_exc_p2u)
335EXC( STORE t3, UNIT(3)(dst), s_exc_p1u)
336 bne len, rem, 1b
337 ADD dst, dst, 4*NBYTES
338
339cleanup_src_unaligned:
340 beqz len, done
341 and rem, len, NBYTES-1
342 beq rem, len, copy_bytes
343 nop
3441:
345EXC( LDFIRST t0, FIRST(0)(src), l_exc)
346EXC( LDREST t0, REST(0)(src), l_exc_copy)
347 SUB len, len, NBYTES
348EXC( STORE t0, 0(dst), s_exc_p1u)
349 ADD src, src, NBYTES
350 bne len, rem, 1b
351 ADD dst, dst, NBYTES
352
353copy_bytes_checklen:
354 beqz len, done
355 nop
356copy_bytes:
357
358#define COPY_BYTE(N) \
359EXC( lb t0, N(src), l_exc); \
360 SUB len, len, 1; \
361 beqz len, done; \
362EXC( sb t0, N(dst), s_exc_p1)
363
364 COPY_BYTE(0)
365 COPY_BYTE(1)
366 COPY_BYTE(2)
367 COPY_BYTE(3)
368 COPY_BYTE(4)
369 COPY_BYTE(5)
370EXC( lb t0, NBYTES-2(src), l_exc)
371 SUB len, len, 1
372 jr ra
373EXC( sb t0, NBYTES-2(dst), s_exc_p1)
374done:
375 jr ra
376 nop
377 END(memcpy)
378
379l_exc_copy_rewind16:
380
381 SUB src, src, 16*NBYTES
382 SUB dst, dst, 16*NBYTES
383l_exc_copy:
384
385
386
387
388
389
390
391
392
393
394 LOAD t0, TI_TASK($28)
395 LOAD t0, THREAD_BUADDR(t0)
3961:
397EXC( lb t1, 0(src), l_exc)
398 ADD src, src, 1
399 sb t1, 0(dst)
400 bne src, t0, 1b
401 ADD dst, dst, 1
402l_exc:
403 LOAD t0, TI_TASK($28)
404 LOAD t0, THREAD_BUADDR(t0)
405 SUB len, AT, t0
406 jr ra
407 nop
408
409
410#define SEXC(n) \
411s_exc_p
412 jr ra; \
413 ADD len, len, n*NBYTES
414
415SEXC(16)
416SEXC(15)
417SEXC(14)
418SEXC(13)
419SEXC(12)
420SEXC(11)
421SEXC(10)
422SEXC(9)
423SEXC(8)
424SEXC(7)
425SEXC(6)
426SEXC(5)
427SEXC(4)
428SEXC(3)
429SEXC(2)
430SEXC(1)
431
432s_exc_p1:
433 jr ra
434 ADD len, len, 1
435s_exc:
436 jr ra
437 nop
438
439 .align 5
440LEAF(memmove)
441EXPORT_SYMBOL(memmove)
442 ADD t0, a0, a2
443 ADD t1, a1, a2
444 sltu t0, a1, t0
445 sltu t1, a0, t1
446 and t0, t1
447 beqz t0, __memcpy
448 move v0, a0
449 beqz a2, r_out
450 END(memmove)
451
452
453LEAF(__rmemcpy)
454 sltu t0, a1, a0
455 beqz t0, r_end_bytes_up
456 nop
457 ADD a0, a2
458 ADD a1, a2
459
460r_end_bytes:
461 lb t0, -1(a1)
462 SUB a2, a2, 0x1
463 sb t0, -1(a0)
464 SUB a1, a1, 0x1
465 bnez a2, r_end_bytes
466 SUB a0, a0, 0x1
467
468r_out:
469 jr ra
470 move a2, zero
471
472r_end_bytes_up:
473 lb t0, (a1)
474 SUB a2, a2, 0x1
475 sb t0, (a0)
476 ADD a1, a1, 0x1
477 bnez a2, r_end_bytes_up
478 ADD a0, a0, 0x1
479
480 jr ra
481 move a2, zero
482 END(__rmemcpy)
483