1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#ifdef CONFIG_DMA_NONCOHERENT
25#undef CONFIG_CPU_HAS_PREFETCH
26#endif
27#ifdef CONFIG_MIPS_MALTA
28#undef CONFIG_CPU_HAS_PREFETCH
29#endif
30
31#include <asm/asm.h>
32#include <asm/asm-offsets.h>
33#include <asm/regdef.h>
34
35#define dst a0
36#define src a1
37#define len a2
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88#define EXC(inst_reg,addr,handler) \
899: inst_reg, addr; \
90 .section __ex_table,"a"; \
91 PTR 9b, handler; \
92 .previous
93
94
95
96
97#ifdef CONFIG_64BIT
98#define USE_DOUBLE
99#endif
100
101#ifdef USE_DOUBLE
102
103#define LOAD ld
104#define LOADL ldl
105#define LOADR ldr
106#define STOREL sdl
107#define STORER sdr
108#define STORE sd
109#define ADD daddu
110#define SUB dsubu
111#define SRL dsrl
112#define SRA dsra
113#define SLL dsll
114#define SLLV dsllv
115#define SRLV dsrlv
116#define NBYTES 8
117#define LOG_NBYTES 3
118
119
120
121
122
123
124#undef t0
125#undef t1
126#undef t2
127#undef t3
128#define t0 $8
129#define t1 $9
130#define t2 $10
131#define t3 $11
132#define t4 $12
133#define t5 $13
134#define t6 $14
135#define t7 $15
136
137#else
138
139#define LOAD lw
140#define LOADL lwl
141#define LOADR lwr
142#define STOREL swl
143#define STORER swr
144#define STORE sw
145#define ADD addu
146#define SUB subu
147#define SRL srl
148#define SLL sll
149#define SRA sra
150#define SLLV sllv
151#define SRLV srlv
152#define NBYTES 4
153#define LOG_NBYTES 2
154
155#endif
156
157#ifdef CONFIG_CPU_LITTLE_ENDIAN
158#define LDFIRST LOADR
159#define LDREST LOADL
160#define STFIRST STORER
161#define STREST STOREL
162#define SHIFT_DISCARD SLLV
163#else
164#define LDFIRST LOADL
165#define LDREST LOADR
166#define STFIRST STOREL
167#define STREST STORER
168#define SHIFT_DISCARD SRLV
169#endif
170
171#define FIRST(unit) ((unit)*NBYTES)
172#define REST(unit) (FIRST(unit)+NBYTES-1)
173#define UNIT(unit) FIRST(unit)
174
175#define ADDRMASK (NBYTES-1)
176
177 .text
178 .set noreorder
179#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
180 .set noat
181#else
182 .set at=v1
183#endif
184
185
186
187
188
189
190
191 .align 5
192LEAF(memcpy)
193 move v0, dst
194.L__memcpy:
195FEXPORT(__copy_user)
196
197
198
199
200#define rem t8
201
202 R10KCBARRIER(0(ra))
203
204
205
206
207
208
209
210 PREF( 0, 0(src) )
211 PREF( 1, 0(dst) )
212 sltu t2, len, NBYTES
213 and t1, dst, ADDRMASK
214 PREF( 0, 1*32(src) )
215 PREF( 1, 1*32(dst) )
216 bnez t2, .Lcopy_bytes_checklen
217 and t0, src, ADDRMASK
218 PREF( 0, 2*32(src) )
219 PREF( 1, 2*32(dst) )
220 bnez t1, .Ldst_unaligned
221 nop
222 bnez t0, .Lsrc_unaligned_dst_aligned
223
224
225
226
227.Lboth_aligned:
228 SRL t0, len, LOG_NBYTES+3
229 beqz t0, .Lcleanup_both_aligned
230 and rem, len, (8*NBYTES-1)
231 PREF( 0, 3*32(src) )
232 PREF( 1, 3*32(dst) )
233 .align 4
2341:
235 R10KCBARRIER(0(ra))
236EXC( LOAD t0, UNIT(0)(src), .Ll_exc)
237EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy)
238EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy)
239EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy)
240 SUB len, len, 8*NBYTES
241EXC( LOAD t4, UNIT(4)(src), .Ll_exc_copy)
242EXC( LOAD t7, UNIT(5)(src), .Ll_exc_copy)
243EXC( STORE t0, UNIT(0)(dst), .Ls_exc_p8u)
244EXC( STORE t1, UNIT(1)(dst), .Ls_exc_p7u)
245EXC( LOAD t0, UNIT(6)(src), .Ll_exc_copy)
246EXC( LOAD t1, UNIT(7)(src), .Ll_exc_copy)
247 ADD src, src, 8*NBYTES
248 ADD dst, dst, 8*NBYTES
249EXC( STORE t2, UNIT(-6)(dst), .Ls_exc_p6u)
250EXC( STORE t3, UNIT(-5)(dst), .Ls_exc_p5u)
251EXC( STORE t4, UNIT(-4)(dst), .Ls_exc_p4u)
252EXC( STORE t7, UNIT(-3)(dst), .Ls_exc_p3u)
253EXC( STORE t0, UNIT(-2)(dst), .Ls_exc_p2u)
254EXC( STORE t1, UNIT(-1)(dst), .Ls_exc_p1u)
255 PREF( 0, 8*32(src) )
256 PREF( 1, 8*32(dst) )
257 bne len, rem, 1b
258 nop
259
260
261
262
263.Lcleanup_both_aligned:
264 beqz len, .Ldone
265 sltu t0, len, 4*NBYTES
266 bnez t0, .Lless_than_4units
267 and rem, len, (NBYTES-1)
268
269
270
271EXC( LOAD t0, UNIT(0)(src), .Ll_exc)
272EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy)
273EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy)
274EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy)
275 SUB len, len, 4*NBYTES
276 ADD src, src, 4*NBYTES
277 R10KCBARRIER(0(ra))
278EXC( STORE t0, UNIT(0)(dst), .Ls_exc_p4u)
279EXC( STORE t1, UNIT(1)(dst), .Ls_exc_p3u)
280EXC( STORE t2, UNIT(2)(dst), .Ls_exc_p2u)
281EXC( STORE t3, UNIT(3)(dst), .Ls_exc_p1u)
282 .set reorder
283 ADD dst, dst, 4*NBYTES
284 beqz len, .Ldone
285 .set noreorder
286.Lless_than_4units:
287
288
289
290 beq rem, len, .Lcopy_bytes
291 nop
2921:
293 R10KCBARRIER(0(ra))
294EXC( LOAD t0, 0(src), .Ll_exc)
295 ADD src, src, NBYTES
296 SUB len, len, NBYTES
297EXC( STORE t0, 0(dst), .Ls_exc_p1u)
298 .set reorder
299 ADD dst, dst, NBYTES
300 bne rem, len, 1b
301 .set noreorder
302
303
304
305
306
307
308
309
310
311
312
313
314#define bits t2
315 beqz len, .Ldone
316 ADD t1, dst, len
317 li bits, 8*NBYTES
318 SLL rem, len, 3
319EXC( LOAD t0, 0(src), .Ll_exc)
320 SUB bits, bits, rem
321 SHIFT_DISCARD t0, t0, bits
322EXC( STREST t0, -1(t1), .Ls_exc)
323 jr ra
324 move len, zero
325.Ldst_unaligned:
326
327
328
329
330
331
332
333
334
335#define match rem
336EXC( LDFIRST t3, FIRST(0)(src), .Ll_exc)
337 ADD t2, zero, NBYTES
338EXC( LDREST t3, REST(0)(src), .Ll_exc_copy)
339 SUB t2, t2, t1
340 xor match, t0, t1
341 R10KCBARRIER(0(ra))
342EXC( STFIRST t3, FIRST(0)(dst), .Ls_exc)
343 beq len, t2, .Ldone
344 SUB len, len, t2
345 ADD dst, dst, t2
346 beqz match, .Lboth_aligned
347 ADD src, src, t2
348
349.Lsrc_unaligned_dst_aligned:
350 SRL t0, len, LOG_NBYTES+2
351 PREF( 0, 3*32(src) )
352 beqz t0, .Lcleanup_src_unaligned
353 and rem, len, (4*NBYTES-1)
354 PREF( 1, 3*32(dst) )
3551:
356
357
358
359
360
361
362 R10KCBARRIER(0(ra))
363EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc)
364EXC( LDFIRST t1, FIRST(1)(src), .Ll_exc_copy)
365 SUB len, len, 4*NBYTES
366EXC( LDREST t0, REST(0)(src), .Ll_exc_copy)
367EXC( LDREST t1, REST(1)(src), .Ll_exc_copy)
368EXC( LDFIRST t2, FIRST(2)(src), .Ll_exc_copy)
369EXC( LDFIRST t3, FIRST(3)(src), .Ll_exc_copy)
370EXC( LDREST t2, REST(2)(src), .Ll_exc_copy)
371EXC( LDREST t3, REST(3)(src), .Ll_exc_copy)
372 PREF( 0, 9*32(src) )
373 ADD src, src, 4*NBYTES
374#ifdef CONFIG_CPU_SB1
375 nop
376#endif
377EXC( STORE t0, UNIT(0)(dst), .Ls_exc_p4u)
378EXC( STORE t1, UNIT(1)(dst), .Ls_exc_p3u)
379EXC( STORE t2, UNIT(2)(dst), .Ls_exc_p2u)
380EXC( STORE t3, UNIT(3)(dst), .Ls_exc_p1u)
381 PREF( 1, 9*32(dst) )
382 .set reorder
383 ADD dst, dst, 4*NBYTES
384 bne len, rem, 1b
385 .set noreorder
386
387.Lcleanup_src_unaligned:
388 beqz len, .Ldone
389 and rem, len, NBYTES-1
390 beq rem, len, .Lcopy_bytes
391 nop
3921:
393 R10KCBARRIER(0(ra))
394EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc)
395EXC( LDREST t0, REST(0)(src), .Ll_exc_copy)
396 ADD src, src, NBYTES
397 SUB len, len, NBYTES
398EXC( STORE t0, 0(dst), .Ls_exc_p1u)
399 .set reorder
400 ADD dst, dst, NBYTES
401 bne len, rem, 1b
402 .set noreorder
403
404.Lcopy_bytes_checklen:
405 beqz len, .Ldone
406 nop
407.Lcopy_bytes:
408
409 R10KCBARRIER(0(ra))
410#define COPY_BYTE(N) \
411EXC( lb t0, N(src), .Ll_exc); \
412 SUB len, len, 1; \
413 beqz len, .Ldone; \
414EXC( sb t0, N(dst), .Ls_exc_p1)
415
416 COPY_BYTE(0)
417 COPY_BYTE(1)
418#ifdef USE_DOUBLE
419 COPY_BYTE(2)
420 COPY_BYTE(3)
421 COPY_BYTE(4)
422 COPY_BYTE(5)
423#endif
424EXC( lb t0, NBYTES-2(src), .Ll_exc)
425 SUB len, len, 1
426 jr ra
427EXC( sb t0, NBYTES-2(dst), .Ls_exc_p1)
428.Ldone:
429 jr ra
430 nop
431 END(memcpy)
432
433.Ll_exc_copy:
434
435
436
437
438
439
440
441
442
443
444 LOAD t0, TI_TASK($28)
445 nop
446 LOAD t0, THREAD_BUADDR(t0)
4471:
448EXC( lb t1, 0(src), .Ll_exc)
449 ADD src, src, 1
450 sb t1, 0(dst)
451 .set reorder
452 ADD dst, dst, 1
453 bne src, t0, 1b
454 .set noreorder
455.Ll_exc:
456 LOAD t0, TI_TASK($28)
457 nop
458 LOAD t0, THREAD_BUADDR(t0)
459 nop
460 SUB len, AT, t0
461
462
463
464
465
466 ADD dst, t0
467 SUB dst, src
468
469
470
471
472 .set reorder
473 SUB src, len, 1
474 beqz len, .Ldone
475 .set noreorder
4761: sb zero, 0(dst)
477 ADD dst, dst, 1
478#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
479 bnez src, 1b
480 SUB src, src, 1
481#else
482 .set push
483 .set noat
484 li v1, 1
485 bnez src, 1b
486 SUB src, src, v1
487 .set pop
488#endif
489 jr ra
490 nop
491
492
493#define SEXC(n) \
494 .set reorder; \
495.Ls_exc_p
496 ADD len, len, n*NBYTES; \
497 jr ra; \
498 .set noreorder
499
500SEXC(8)
501SEXC(7)
502SEXC(6)
503SEXC(5)
504SEXC(4)
505SEXC(3)
506SEXC(2)
507SEXC(1)
508
509.Ls_exc_p1:
510 .set reorder
511 ADD len, len, 1
512 jr ra
513 .set noreorder
514.Ls_exc:
515 jr ra
516 nop
517
518 .align 5
519LEAF(memmove)
520 ADD t0, a0, a2
521 ADD t1, a1, a2
522 sltu t0, a1, t0
523 sltu t1, a0, t1
524 and t0, t1
525 beqz t0, .L__memcpy
526 move v0, a0
527 beqz a2, .Lr_out
528 END(memmove)
529
530
531LEAF(__rmemcpy)
532 sltu t0, a1, a0
533 beqz t0, .Lr_end_bytes_up
534 nop
535 ADD a0, a2
536 ADD a1, a2
537
538.Lr_end_bytes:
539 R10KCBARRIER(0(ra))
540 lb t0, -1(a1)
541 SUB a2, a2, 0x1
542 sb t0, -1(a0)
543 SUB a1, a1, 0x1
544 .set reorder
545 SUB a0, a0, 0x1
546 bnez a2, .Lr_end_bytes
547 .set noreorder
548
549.Lr_out:
550 jr ra
551 move a2, zero
552
553.Lr_end_bytes_up:
554 R10KCBARRIER(0(ra))
555 lb t0, (a1)
556 SUB a2, a2, 0x1
557 sb t0, (a0)
558 ADD a1, a1, 0x1
559 .set reorder
560 ADD a0, a0, 0x1
561 bnez a2, .Lr_end_bytes_up
562 .set noreorder
563
564 jr ra
565 move a2, zero
566 END(__rmemcpy)
567