1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#include <asm/asm.h>
17#include <asm/asm-offsets.h>
18#include <asm/export.h>
19#include <asm/regdef.h>
20
21#define dst a0
22#define src a1
23#define len a2
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74#define EXC(inst_reg,addr,handler) \
759: inst_reg, addr; \
76 .section __ex_table,"a"; \
77 PTR_WD 9b, handler; \
78 .previous
79
80
81
82
83
84#define LOAD ld
85#define LOADL ldl
86#define LOADR ldr
87#define STOREL sdl
88#define STORER sdr
89#define STORE sd
90#define ADD daddu
91#define SUB dsubu
92#define SRL dsrl
93#define SRA dsra
94#define SLL dsll
95#define SLLV dsllv
96#define SRLV dsrlv
97#define NBYTES 8
98#define LOG_NBYTES 3
99
100
101
102
103
104
105#undef t0
106#undef t1
107#undef t2
108#undef t3
109#define t0 $8
110#define t1 $9
111#define t2 $10
112#define t3 $11
113#define t4 $12
114#define t5 $13
115#define t6 $14
116#define t7 $15
117
118#ifdef CONFIG_CPU_LITTLE_ENDIAN
119#define LDFIRST LOADR
120#define LDREST LOADL
121#define STFIRST STORER
122#define STREST STOREL
123#define SHIFT_DISCARD SLLV
124#else
125#define LDFIRST LOADL
126#define LDREST LOADR
127#define STFIRST STOREL
128#define STREST STORER
129#define SHIFT_DISCARD SRLV
130#endif
131
132#define FIRST(unit) ((unit)*NBYTES)
133#define REST(unit) (FIRST(unit)+NBYTES-1)
134#define UNIT(unit) FIRST(unit)
135
136#define ADDRMASK (NBYTES-1)
137
138 .text
139 .set noreorder
140 .set noat
141
142
143
144
145
146
147
148 .align 5
149LEAF(memcpy)
150EXPORT_SYMBOL(memcpy)
151 move v0, dst
152__memcpy:
153FEXPORT(__raw_copy_from_user)
154EXPORT_SYMBOL(__raw_copy_from_user)
155FEXPORT(__raw_copy_to_user)
156EXPORT_SYMBOL(__raw_copy_to_user)
157
158
159
160
161
162
163
164
165 pref 0, 0(src)
166 sltu t0, len, NBYTES
167 bnez t0, copy_bytes_checklen
168 and t0, src, ADDRMASK
169 bnez t0, src_unaligned
170 sltu t0, len, 4*NBYTES
171 bnez t0, less_than_4units
172 sltu t0, len, 8*NBYTES
173 bnez t0, less_than_8units
174 sltu t0, len, 16*NBYTES
175 bnez t0, cleanup_both_aligned
176 sltu t0, len, 128+1
177 bnez t0, 1f
178 sltu t0, len, 256+1
179 bnez t0, 1f
180 pref 0, 128(src)
181
182
1832: pref 0, 256(src)
184
185
1861:
187EXC( LOAD t0, UNIT(0)(src), l_exc)
188EXC( LOAD t1, UNIT(1)(src), l_exc_copy)
189EXC( LOAD t2, UNIT(2)(src), l_exc_copy)
190EXC( LOAD t3, UNIT(3)(src), l_exc_copy)
191 SUB len, len, 16*NBYTES
192EXC( STORE t0, UNIT(0)(dst), s_exc_p16u)
193EXC( STORE t1, UNIT(1)(dst), s_exc_p15u)
194EXC( STORE t2, UNIT(2)(dst), s_exc_p14u)
195EXC( STORE t3, UNIT(3)(dst), s_exc_p13u)
196EXC( LOAD t0, UNIT(4)(src), l_exc_copy)
197EXC( LOAD t1, UNIT(5)(src), l_exc_copy)
198EXC( LOAD t2, UNIT(6)(src), l_exc_copy)
199EXC( LOAD t3, UNIT(7)(src), l_exc_copy)
200EXC( STORE t0, UNIT(4)(dst), s_exc_p12u)
201EXC( STORE t1, UNIT(5)(dst), s_exc_p11u)
202EXC( STORE t2, UNIT(6)(dst), s_exc_p10u)
203 ADD src, src, 16*NBYTES
204EXC( STORE t3, UNIT(7)(dst), s_exc_p9u)
205 ADD dst, dst, 16*NBYTES
206EXC( LOAD t0, UNIT(-8)(src), l_exc_copy_rewind16)
207EXC( LOAD t1, UNIT(-7)(src), l_exc_copy_rewind16)
208EXC( LOAD t2, UNIT(-6)(src), l_exc_copy_rewind16)
209EXC( LOAD t3, UNIT(-5)(src), l_exc_copy_rewind16)
210EXC( STORE t0, UNIT(-8)(dst), s_exc_p8u)
211EXC( STORE t1, UNIT(-7)(dst), s_exc_p7u)
212EXC( STORE t2, UNIT(-6)(dst), s_exc_p6u)
213EXC( STORE t3, UNIT(-5)(dst), s_exc_p5u)
214EXC( LOAD t0, UNIT(-4)(src), l_exc_copy_rewind16)
215EXC( LOAD t1, UNIT(-3)(src), l_exc_copy_rewind16)
216EXC( LOAD t2, UNIT(-2)(src), l_exc_copy_rewind16)
217EXC( LOAD t3, UNIT(-1)(src), l_exc_copy_rewind16)
218EXC( STORE t0, UNIT(-4)(dst), s_exc_p4u)
219EXC( STORE t1, UNIT(-3)(dst), s_exc_p3u)
220EXC( STORE t2, UNIT(-2)(dst), s_exc_p2u)
221EXC( STORE t3, UNIT(-1)(dst), s_exc_p1u)
222 sltu t0, len, 256+1
223 beqz t0, 2b
224 sltu t0, len, 128
225 beqz t0, 1b
226 nop
227
228
229
230cleanup_both_aligned:
231 beqz len, done
232 sltu t0, len, 8*NBYTES
233 bnez t0, less_than_8units
234 nop
235EXC( LOAD t0, UNIT(0)(src), l_exc)
236EXC( LOAD t1, UNIT(1)(src), l_exc_copy)
237EXC( LOAD t2, UNIT(2)(src), l_exc_copy)
238EXC( LOAD t3, UNIT(3)(src), l_exc_copy)
239 SUB len, len, 8*NBYTES
240EXC( STORE t0, UNIT(0)(dst), s_exc_p8u)
241EXC( STORE t1, UNIT(1)(dst), s_exc_p7u)
242EXC( STORE t2, UNIT(2)(dst), s_exc_p6u)
243EXC( STORE t3, UNIT(3)(dst), s_exc_p5u)
244EXC( LOAD t0, UNIT(4)(src), l_exc_copy)
245EXC( LOAD t1, UNIT(5)(src), l_exc_copy)
246EXC( LOAD t2, UNIT(6)(src), l_exc_copy)
247EXC( LOAD t3, UNIT(7)(src), l_exc_copy)
248EXC( STORE t0, UNIT(4)(dst), s_exc_p4u)
249EXC( STORE t1, UNIT(5)(dst), s_exc_p3u)
250EXC( STORE t2, UNIT(6)(dst), s_exc_p2u)
251EXC( STORE t3, UNIT(7)(dst), s_exc_p1u)
252 ADD src, src, 8*NBYTES
253 beqz len, done
254 ADD dst, dst, 8*NBYTES
255
256
257
258less_than_8units:
259 sltu t0, len, 4*NBYTES
260 bnez t0, less_than_4units
261 nop
262EXC( LOAD t0, UNIT(0)(src), l_exc)
263EXC( LOAD t1, UNIT(1)(src), l_exc_copy)
264EXC( LOAD t2, UNIT(2)(src), l_exc_copy)
265EXC( LOAD t3, UNIT(3)(src), l_exc_copy)
266 SUB len, len, 4*NBYTES
267EXC( STORE t0, UNIT(0)(dst), s_exc_p4u)
268EXC( STORE t1, UNIT(1)(dst), s_exc_p3u)
269EXC( STORE t2, UNIT(2)(dst), s_exc_p2u)
270EXC( STORE t3, UNIT(3)(dst), s_exc_p1u)
271 ADD src, src, 4*NBYTES
272 beqz len, done
273 ADD dst, dst, 4*NBYTES
274
275
276
277
278less_than_4units:
279 sltu t0, len, 1*NBYTES
280 bnez t0, copy_bytes_checklen
281 nop
282
283
284
285EXC( LOAD t0, 0(src), l_exc)
286 SUB len, len, NBYTES
287 sltu t1, len, 8
288EXC( STORE t0, 0(dst), s_exc_p1u)
289 ADD src, src, NBYTES
290 bnez t1, copy_bytes_checklen
291 ADD dst, dst, NBYTES
292
293
294
295EXC( LOAD t0, 0(src), l_exc)
296 SUB len, len, NBYTES
297 sltu t1, len, 8
298EXC( STORE t0, 0(dst), s_exc_p1u)
299 ADD src, src, NBYTES
300 bnez t1, copy_bytes_checklen
301 ADD dst, dst, NBYTES
302
303
304
305EXC( LOAD t0, 0(src), l_exc)
306 SUB len, len, NBYTES
307 ADD src, src, NBYTES
308 ADD dst, dst, NBYTES
309 b copy_bytes_checklen
310EXC( STORE t0, -8(dst), s_exc_p1u)
311
312src_unaligned:
313#define rem t8
314 SRL t0, len, LOG_NBYTES+2
315 beqz t0, cleanup_src_unaligned
316 and rem, len, (4*NBYTES-1)
3171:
318
319
320
321
322
323
324EXC( LDFIRST t0, FIRST(0)(src), l_exc)
325EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy)
326 SUB len, len, 4*NBYTES
327EXC( LDREST t0, REST(0)(src), l_exc_copy)
328EXC( LDREST t1, REST(1)(src), l_exc_copy)
329EXC( LDFIRST t2, FIRST(2)(src), l_exc_copy)
330EXC( LDFIRST t3, FIRST(3)(src), l_exc_copy)
331EXC( LDREST t2, REST(2)(src), l_exc_copy)
332EXC( LDREST t3, REST(3)(src), l_exc_copy)
333 ADD src, src, 4*NBYTES
334EXC( STORE t0, UNIT(0)(dst), s_exc_p4u)
335EXC( STORE t1, UNIT(1)(dst), s_exc_p3u)
336EXC( STORE t2, UNIT(2)(dst), s_exc_p2u)
337EXC( STORE t3, UNIT(3)(dst), s_exc_p1u)
338 bne len, rem, 1b
339 ADD dst, dst, 4*NBYTES
340
341cleanup_src_unaligned:
342 beqz len, done
343 and rem, len, NBYTES-1
344 beq rem, len, copy_bytes
345 nop
3461:
347EXC( LDFIRST t0, FIRST(0)(src), l_exc)
348EXC( LDREST t0, REST(0)(src), l_exc_copy)
349 SUB len, len, NBYTES
350EXC( STORE t0, 0(dst), s_exc_p1u)
351 ADD src, src, NBYTES
352 bne len, rem, 1b
353 ADD dst, dst, NBYTES
354
355copy_bytes_checklen:
356 beqz len, done
357 nop
358copy_bytes:
359
360#define COPY_BYTE(N) \
361EXC( lb t0, N(src), l_exc); \
362 SUB len, len, 1; \
363 beqz len, done; \
364EXC( sb t0, N(dst), s_exc_p1)
365
366 COPY_BYTE(0)
367 COPY_BYTE(1)
368 COPY_BYTE(2)
369 COPY_BYTE(3)
370 COPY_BYTE(4)
371 COPY_BYTE(5)
372EXC( lb t0, NBYTES-2(src), l_exc)
373 SUB len, len, 1
374 jr ra
375EXC( sb t0, NBYTES-2(dst), s_exc_p1)
376done:
377 jr ra
378 nop
379 END(memcpy)
380
381l_exc_copy_rewind16:
382
383 SUB src, src, 16*NBYTES
384 SUB dst, dst, 16*NBYTES
385l_exc_copy:
386
387
388
389
390
391
392
393
394
395
396 LOAD t0, TI_TASK($28)
397 LOAD t0, THREAD_BUADDR(t0)
3981:
399EXC( lb t1, 0(src), l_exc)
400 ADD src, src, 1
401 sb t1, 0(dst)
402 bne src, t0, 1b
403 ADD dst, dst, 1
404l_exc:
405 LOAD t0, TI_TASK($28)
406 LOAD t0, THREAD_BUADDR(t0)
407 SUB len, AT, t0
408 jr ra
409 nop
410
411
412#define SEXC(n) \
413s_exc_p
414 jr ra; \
415 ADD len, len, n*NBYTES
416
417SEXC(16)
418SEXC(15)
419SEXC(14)
420SEXC(13)
421SEXC(12)
422SEXC(11)
423SEXC(10)
424SEXC(9)
425SEXC(8)
426SEXC(7)
427SEXC(6)
428SEXC(5)
429SEXC(4)
430SEXC(3)
431SEXC(2)
432SEXC(1)
433
434s_exc_p1:
435 jr ra
436 ADD len, len, 1
437s_exc:
438 jr ra
439 nop
440
441 .align 5
442LEAF(memmove)
443EXPORT_SYMBOL(memmove)
444 ADD t0, a0, a2
445 ADD t1, a1, a2
446 sltu t0, a1, t0
447 sltu t1, a0, t1
448 and t0, t1
449 beqz t0, __memcpy
450 move v0, a0
451 beqz a2, r_out
452 END(memmove)
453
454
455LEAF(__rmemcpy)
456 sltu t0, a1, a0
457 beqz t0, r_end_bytes_up
458 nop
459 ADD a0, a2
460 ADD a1, a2
461
462r_end_bytes:
463 lb t0, -1(a1)
464 SUB a2, a2, 0x1
465 sb t0, -1(a0)
466 SUB a1, a1, 0x1
467 bnez a2, r_end_bytes
468 SUB a0, a0, 0x1
469
470r_out:
471 jr ra
472 move a2, zero
473
474r_end_bytes_up:
475 lb t0, (a1)
476 SUB a2, a2, 0x1
477 sb t0, (a0)
478 ADD a1, a1, 0x1
479 bnez a2, r_end_bytes_up
480 ADD a0, a0, 0x1
481
482 jr ra
483 move a2, zero
484 END(__rmemcpy)
485