1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#include <asm/asm.h>
17#include <asm/asm-offsets.h>
18#include <asm/regdef.h>
19
20#define dst a0
21#define src a1
22#define len a2
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73#define EXC(inst_reg,addr,handler) \
749: inst_reg, addr; \
75 .section __ex_table,"a"; \
76 PTR 9b, handler; \
77 .previous
78
79
80
81
82#ifdef CONFIG_64BIT
83#define USE_DOUBLE
84#endif
85
86#ifdef USE_DOUBLE
87
88#define LOAD ld
89#define LOADL ldl
90#define LOADR ldr
91#define STOREL sdl
92#define STORER sdr
93#define STORE sd
94#define ADD daddu
95#define SUB dsubu
96#define SRL dsrl
97#define SRA dsra
98#define SLL dsll
99#define SLLV dsllv
100#define SRLV dsrlv
101#define NBYTES 8
102#define LOG_NBYTES 3
103
104
105
106
107
108
109#undef t0
110#undef t1
111#undef t2
112#undef t3
113#define t0 $8
114#define t1 $9
115#define t2 $10
116#define t3 $11
117#define t4 $12
118#define t5 $13
119#define t6 $14
120#define t7 $15
121
122#else
123
124#define LOAD lw
125#define LOADL lwl
126#define LOADR lwr
127#define STOREL swl
128#define STORER swr
129#define STORE sw
130#define ADD addu
131#define SUB subu
132#define SRL srl
133#define SLL sll
134#define SRA sra
135#define SLLV sllv
136#define SRLV srlv
137#define NBYTES 4
138#define LOG_NBYTES 2
139
140#endif
141
142#ifdef CONFIG_CPU_LITTLE_ENDIAN
143#define LDFIRST LOADR
144#define LDREST LOADL
145#define STFIRST STORER
146#define STREST STOREL
147#define SHIFT_DISCARD SLLV
148#else
149#define LDFIRST LOADL
150#define LDREST LOADR
151#define STFIRST STOREL
152#define STREST STORER
153#define SHIFT_DISCARD SRLV
154#endif
155
156#define FIRST(unit) ((unit)*NBYTES)
157#define REST(unit) (FIRST(unit)+NBYTES-1)
158#define UNIT(unit) FIRST(unit)
159
160#define ADDRMASK (NBYTES-1)
161
162 .text
163 .set noreorder
164 .set noat
165
166
167
168
169
170
171
172 .align 5
173LEAF(memcpy)
174 move v0, dst
175__memcpy:
176FEXPORT(__copy_user)
177
178
179
180
181
182
183
184
185 pref 0, 0(src)
186 sltu t0, len, NBYTES
187 bnez t0, copy_bytes_checklen
188 and t0, src, ADDRMASK
189 bnez t0, src_unaligned
190 sltu t0, len, 4*NBYTES
191 bnez t0, less_than_4units
192 sltu t0, len, 8*NBYTES
193 bnez t0, less_than_8units
194 sltu t0, len, 16*NBYTES
195 bnez t0, cleanup_both_aligned
196 sltu t0, len, 128+1
197 bnez t0, 1f
198 sltu t0, len, 256+1
199 bnez t0, 1f
200 pref 0, 128(src)
201
202
2032: pref 0, 256(src)
204
205
2061:
207EXC( LOAD t0, UNIT(0)(src), l_exc)
208EXC( LOAD t1, UNIT(1)(src), l_exc_copy)
209EXC( LOAD t2, UNIT(2)(src), l_exc_copy)
210EXC( LOAD t3, UNIT(3)(src), l_exc_copy)
211 SUB len, len, 16*NBYTES
212EXC( STORE t0, UNIT(0)(dst), s_exc_p16u)
213EXC( STORE t1, UNIT(1)(dst), s_exc_p15u)
214EXC( STORE t2, UNIT(2)(dst), s_exc_p14u)
215EXC( STORE t3, UNIT(3)(dst), s_exc_p13u)
216EXC( LOAD t0, UNIT(4)(src), l_exc_copy)
217EXC( LOAD t1, UNIT(5)(src), l_exc_copy)
218EXC( LOAD t2, UNIT(6)(src), l_exc_copy)
219EXC( LOAD t3, UNIT(7)(src), l_exc_copy)
220EXC( STORE t0, UNIT(4)(dst), s_exc_p12u)
221EXC( STORE t1, UNIT(5)(dst), s_exc_p11u)
222EXC( STORE t2, UNIT(6)(dst), s_exc_p10u)
223 ADD src, src, 16*NBYTES
224EXC( STORE t3, UNIT(7)(dst), s_exc_p9u)
225 ADD dst, dst, 16*NBYTES
226EXC( LOAD t0, UNIT(-8)(src), l_exc_copy)
227EXC( LOAD t1, UNIT(-7)(src), l_exc_copy)
228EXC( LOAD t2, UNIT(-6)(src), l_exc_copy)
229EXC( LOAD t3, UNIT(-5)(src), l_exc_copy)
230EXC( STORE t0, UNIT(-8)(dst), s_exc_p8u)
231EXC( STORE t1, UNIT(-7)(dst), s_exc_p7u)
232EXC( STORE t2, UNIT(-6)(dst), s_exc_p6u)
233EXC( STORE t3, UNIT(-5)(dst), s_exc_p5u)
234EXC( LOAD t0, UNIT(-4)(src), l_exc_copy)
235EXC( LOAD t1, UNIT(-3)(src), l_exc_copy)
236EXC( LOAD t2, UNIT(-2)(src), l_exc_copy)
237EXC( LOAD t3, UNIT(-1)(src), l_exc_copy)
238EXC( STORE t0, UNIT(-4)(dst), s_exc_p4u)
239EXC( STORE t1, UNIT(-3)(dst), s_exc_p3u)
240EXC( STORE t2, UNIT(-2)(dst), s_exc_p2u)
241EXC( STORE t3, UNIT(-1)(dst), s_exc_p1u)
242 sltu t0, len, 256+1
243 beqz t0, 2b
244 sltu t0, len, 128
245 beqz t0, 1b
246 nop
247
248
249
250cleanup_both_aligned:
251 beqz len, done
252 sltu t0, len, 8*NBYTES
253 bnez t0, less_than_8units
254 nop
255EXC( LOAD t0, UNIT(0)(src), l_exc)
256EXC( LOAD t1, UNIT(1)(src), l_exc_copy)
257EXC( LOAD t2, UNIT(2)(src), l_exc_copy)
258EXC( LOAD t3, UNIT(3)(src), l_exc_copy)
259 SUB len, len, 8*NBYTES
260EXC( STORE t0, UNIT(0)(dst), s_exc_p8u)
261EXC( STORE t1, UNIT(1)(dst), s_exc_p7u)
262EXC( STORE t2, UNIT(2)(dst), s_exc_p6u)
263EXC( STORE t3, UNIT(3)(dst), s_exc_p5u)
264EXC( LOAD t0, UNIT(4)(src), l_exc_copy)
265EXC( LOAD t1, UNIT(5)(src), l_exc_copy)
266EXC( LOAD t2, UNIT(6)(src), l_exc_copy)
267EXC( LOAD t3, UNIT(7)(src), l_exc_copy)
268EXC( STORE t0, UNIT(4)(dst), s_exc_p4u)
269EXC( STORE t1, UNIT(5)(dst), s_exc_p3u)
270EXC( STORE t2, UNIT(6)(dst), s_exc_p2u)
271EXC( STORE t3, UNIT(7)(dst), s_exc_p1u)
272 ADD src, src, 8*NBYTES
273 beqz len, done
274 ADD dst, dst, 8*NBYTES
275
276
277
278less_than_8units:
279 sltu t0, len, 4*NBYTES
280 bnez t0, less_than_4units
281 nop
282EXC( LOAD t0, UNIT(0)(src), l_exc)
283EXC( LOAD t1, UNIT(1)(src), l_exc_copy)
284EXC( LOAD t2, UNIT(2)(src), l_exc_copy)
285EXC( LOAD t3, UNIT(3)(src), l_exc_copy)
286 SUB len, len, 4*NBYTES
287EXC( STORE t0, UNIT(0)(dst), s_exc_p4u)
288EXC( STORE t1, UNIT(1)(dst), s_exc_p3u)
289EXC( STORE t2, UNIT(2)(dst), s_exc_p2u)
290EXC( STORE t3, UNIT(3)(dst), s_exc_p1u)
291 ADD src, src, 4*NBYTES
292 beqz len, done
293 ADD dst, dst, 4*NBYTES
294
295
296
297
298less_than_4units:
299 sltu t0, len, 1*NBYTES
300 bnez t0, copy_bytes_checklen
301 nop
302
303
304
305EXC( LOAD t0, 0(src), l_exc)
306 SUB len, len, NBYTES
307 sltu t1, len, 8
308EXC( STORE t0, 0(dst), s_exc_p1u)
309 ADD src, src, NBYTES
310 bnez t1, copy_bytes_checklen
311 ADD dst, dst, NBYTES
312
313
314
315EXC( LOAD t0, 0(src), l_exc)
316 SUB len, len, NBYTES
317 sltu t1, len, 8
318EXC( STORE t0, 0(dst), s_exc_p1u)
319 ADD src, src, NBYTES
320 bnez t1, copy_bytes_checklen
321 ADD dst, dst, NBYTES
322
323
324
325EXC( LOAD t0, 0(src), l_exc)
326 SUB len, len, NBYTES
327 ADD src, src, NBYTES
328 ADD dst, dst, NBYTES
329 b copy_bytes_checklen
330EXC( STORE t0, -8(dst), s_exc_p1u)
331
332src_unaligned:
333#define rem t8
334 SRL t0, len, LOG_NBYTES+2
335 beqz t0, cleanup_src_unaligned
336 and rem, len, (4*NBYTES-1)
3371:
338
339
340
341
342
343
344EXC( LDFIRST t0, FIRST(0)(src), l_exc)
345EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy)
346 SUB len, len, 4*NBYTES
347EXC( LDREST t0, REST(0)(src), l_exc_copy)
348EXC( LDREST t1, REST(1)(src), l_exc_copy)
349EXC( LDFIRST t2, FIRST(2)(src), l_exc_copy)
350EXC( LDFIRST t3, FIRST(3)(src), l_exc_copy)
351EXC( LDREST t2, REST(2)(src), l_exc_copy)
352EXC( LDREST t3, REST(3)(src), l_exc_copy)
353 ADD src, src, 4*NBYTES
354EXC( STORE t0, UNIT(0)(dst), s_exc_p4u)
355EXC( STORE t1, UNIT(1)(dst), s_exc_p3u)
356EXC( STORE t2, UNIT(2)(dst), s_exc_p2u)
357EXC( STORE t3, UNIT(3)(dst), s_exc_p1u)
358 bne len, rem, 1b
359 ADD dst, dst, 4*NBYTES
360
361cleanup_src_unaligned:
362 beqz len, done
363 and rem, len, NBYTES-1
364 beq rem, len, copy_bytes
365 nop
3661:
367EXC( LDFIRST t0, FIRST(0)(src), l_exc)
368EXC( LDREST t0, REST(0)(src), l_exc_copy)
369 SUB len, len, NBYTES
370EXC( STORE t0, 0(dst), s_exc_p1u)
371 ADD src, src, NBYTES
372 bne len, rem, 1b
373 ADD dst, dst, NBYTES
374
375copy_bytes_checklen:
376 beqz len, done
377 nop
378copy_bytes:
379
380#define COPY_BYTE(N) \
381EXC( lb t0, N(src), l_exc); \
382 SUB len, len, 1; \
383 beqz len, done; \
384EXC( sb t0, N(dst), s_exc_p1)
385
386 COPY_BYTE(0)
387 COPY_BYTE(1)
388#ifdef USE_DOUBLE
389 COPY_BYTE(2)
390 COPY_BYTE(3)
391 COPY_BYTE(4)
392 COPY_BYTE(5)
393#endif
394EXC( lb t0, NBYTES-2(src), l_exc)
395 SUB len, len, 1
396 jr ra
397EXC( sb t0, NBYTES-2(dst), s_exc_p1)
398done:
399 jr ra
400 nop
401 END(memcpy)
402
403l_exc_copy:
404
405
406
407
408
409
410
411
412
413
414 LOAD t0, TI_TASK($28)
415 nop
416 LOAD t0, THREAD_BUADDR(t0)
4171:
418EXC( lb t1, 0(src), l_exc)
419 ADD src, src, 1
420 sb t1, 0(dst)
421 bne src, t0, 1b
422 ADD dst, dst, 1
423l_exc:
424 LOAD t0, TI_TASK($28)
425 nop
426 LOAD t0, THREAD_BUADDR(t0)
427 nop
428 SUB len, AT, t0
429
430
431
432
433
434 ADD dst, t0
435 SUB dst, src
436
437
438
439
440 beqz len, done
441 SUB src, len, 1
4421: sb zero, 0(dst)
443 ADD dst, dst, 1
444 bnez src, 1b
445 SUB src, src, 1
446 jr ra
447 nop
448
449
450#define SEXC(n) \
451s_exc_p
452 jr ra; \
453 ADD len, len, n*NBYTES
454
455SEXC(16)
456SEXC(15)
457SEXC(14)
458SEXC(13)
459SEXC(12)
460SEXC(11)
461SEXC(10)
462SEXC(9)
463SEXC(8)
464SEXC(7)
465SEXC(6)
466SEXC(5)
467SEXC(4)
468SEXC(3)
469SEXC(2)
470SEXC(1)
471
472s_exc_p1:
473 jr ra
474 ADD len, len, 1
475s_exc:
476 jr ra
477 nop
478
479 .align 5
480LEAF(memmove)
481 ADD t0, a0, a2
482 ADD t1, a1, a2
483 sltu t0, a1, t0
484 sltu t1, a0, t1
485 and t0, t1
486 beqz t0, __memcpy
487 move v0, a0
488 beqz a2, r_out
489 END(memmove)
490
491
492LEAF(__rmemcpy)
493 sltu t0, a1, a0
494 beqz t0, r_end_bytes_up
495 nop
496 ADD a0, a2
497 ADD a1, a2
498
499r_end_bytes:
500 lb t0, -1(a1)
501 SUB a2, a2, 0x1
502 sb t0, -1(a0)
503 SUB a1, a1, 0x1
504 bnez a2, r_end_bytes
505 SUB a0, a0, 0x1
506
507r_out:
508 jr ra
509 move a2, zero
510
511r_end_bytes_up:
512 lb t0, (a1)
513 SUB a2, a2, 0x1
514 sb t0, (a0)
515 ADD a1, a1, 0x1
516 bnez a2, r_end_bytes_up
517 ADD a0, a0, 0x1
518
519 jr ra
520 move a2, zero
521 END(__rmemcpy)
522