1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/errno.h>
14#include <asm/asm.h>
15#include <asm/asm-offsets.h>
16#include <asm/export.h>
17#include <asm/regdef.h>
18
19#ifdef CONFIG_64BIT
20
21
22
23
24
25#undef t0
26#undef t1
27#undef t2
28#undef t3
29#define t0 $8
30#define t1 $9
31#define t2 $10
32#define t3 $11
33#define t4 $12
34#define t5 $13
35#define t6 $14
36#define t7 $15
37
38#define USE_DOUBLE
39#endif
40
41#ifdef USE_DOUBLE
42
43#define LOAD ld
44#define LOAD32 lwu
45#define ADD daddu
46#define NBYTES 8
47
48#else
49
50#define LOAD lw
51#define LOAD32 lw
52#define ADD addu
53#define NBYTES 4
54
55#endif
56
57#define UNIT(unit) ((unit)*NBYTES)
58
59#define ADDC(sum,reg) \
60 .set push; \
61 .set noat; \
62 ADD sum, reg; \
63 sltu v1, sum, reg; \
64 ADD sum, v1; \
65 .set pop
66
67#define ADDC32(sum,reg) \
68 .set push; \
69 .set noat; \
70 addu sum, reg; \
71 sltu v1, sum, reg; \
72 addu sum, v1; \
73 .set pop
74
75#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \
76 LOAD _t0, (offset + UNIT(0))(src); \
77 LOAD _t1, (offset + UNIT(1))(src); \
78 LOAD _t2, (offset + UNIT(2))(src); \
79 LOAD _t3, (offset + UNIT(3))(src); \
80 ADDC(_t0, _t1); \
81 ADDC(_t2, _t3); \
82 ADDC(sum, _t0); \
83 ADDC(sum, _t2)
84
85#ifdef USE_DOUBLE
86#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \
87 CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3)
88#else
89#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \
90 CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3); \
91 CSUM_BIGCHUNK1(src, offset + 0x10, sum, _t0, _t1, _t2, _t3)
92#endif
93
94
95
96
97
98
99
100#define src a0
101#define sum v0
102
103 .text
104 .set noreorder
105 .align 5
106LEAF(csum_partial)
107EXPORT_SYMBOL(csum_partial)
108 move sum, zero
109 move t7, zero
110
111 sltiu t8, a1, 0x8
112 bnez t8, .Lsmall_csumcpy
113 move t2, a1
114
115 andi t7, src, 0x1
116
117.Lhword_align:
118 beqz t7, .Lword_align
119 andi t8, src, 0x2
120
121 lbu t0, (src)
122 LONG_SUBU a1, a1, 0x1
123#ifdef __MIPSEL__
124 sll t0, t0, 8
125#endif
126 ADDC(sum, t0)
127 PTR_ADDU src, src, 0x1
128 andi t8, src, 0x2
129
130.Lword_align:
131 beqz t8, .Ldword_align
132 sltiu t8, a1, 56
133
134 lhu t0, (src)
135 LONG_SUBU a1, a1, 0x2
136 ADDC(sum, t0)
137 sltiu t8, a1, 56
138 PTR_ADDU src, src, 0x2
139
140.Ldword_align:
141 bnez t8, .Ldo_end_words
142 move t8, a1
143
144 andi t8, src, 0x4
145 beqz t8, .Lqword_align
146 andi t8, src, 0x8
147
148 LOAD32 t0, 0x00(src)
149 LONG_SUBU a1, a1, 0x4
150 ADDC(sum, t0)
151 PTR_ADDU src, src, 0x4
152 andi t8, src, 0x8
153
154.Lqword_align:
155 beqz t8, .Loword_align
156 andi t8, src, 0x10
157
158#ifdef USE_DOUBLE
159 ld t0, 0x00(src)
160 LONG_SUBU a1, a1, 0x8
161 ADDC(sum, t0)
162#else
163 lw t0, 0x00(src)
164 lw t1, 0x04(src)
165 LONG_SUBU a1, a1, 0x8
166 ADDC(sum, t0)
167 ADDC(sum, t1)
168#endif
169 PTR_ADDU src, src, 0x8
170 andi t8, src, 0x10
171
172.Loword_align:
173 beqz t8, .Lbegin_movement
174 LONG_SRL t8, a1, 0x7
175
176#ifdef USE_DOUBLE
177 ld t0, 0x00(src)
178 ld t1, 0x08(src)
179 ADDC(sum, t0)
180 ADDC(sum, t1)
181#else
182 CSUM_BIGCHUNK1(src, 0x00, sum, t0, t1, t3, t4)
183#endif
184 LONG_SUBU a1, a1, 0x10
185 PTR_ADDU src, src, 0x10
186 LONG_SRL t8, a1, 0x7
187
188.Lbegin_movement:
189 beqz t8, 1f
190 andi t2, a1, 0x40
191
192.Lmove_128bytes:
193 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
194 CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
195 CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4)
196 CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4)
197 LONG_SUBU t8, t8, 0x01
198 .set reorder
199 PTR_ADDU src, src, 0x80
200 bnez t8, .Lmove_128bytes
201 .set noreorder
202
2031:
204 beqz t2, 1f
205 andi t2, a1, 0x20
206
207.Lmove_64bytes:
208 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
209 CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
210 PTR_ADDU src, src, 0x40
211
2121:
213 beqz t2, .Ldo_end_words
214 andi t8, a1, 0x1c
215
216.Lmove_32bytes:
217 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
218 andi t8, a1, 0x1c
219 PTR_ADDU src, src, 0x20
220
221.Ldo_end_words:
222 beqz t8, .Lsmall_csumcpy
223 andi t2, a1, 0x3
224 LONG_SRL t8, t8, 0x2
225
226.Lend_words:
227 LOAD32 t0, (src)
228 LONG_SUBU t8, t8, 0x1
229 ADDC(sum, t0)
230 .set reorder
231 PTR_ADDU src, src, 0x4
232 bnez t8, .Lend_words
233 .set noreorder
234
235
236.Lsmall_csumcpy:
237 move a1, t2
238
239 andi t0, a1, 4
240 beqz t0, 1f
241 andi t0, a1, 2
242
243
244 ulw t1, (src)
245 PTR_ADDIU src, 4
246#ifdef USE_DOUBLE
247 dsll t1, t1, 32
248#endif
249 ADDC(sum, t1)
250
2511: move t1, zero
252 beqz t0, 1f
253 andi t0, a1, 1
254
255
256 ulhu t1, (src)
257 PTR_ADDIU src, 2
258
2591: beqz t0, 1f
260 sll t1, t1, 16
261
262 lbu t2, (src)
263 nop
264
265#ifdef __MIPSEB__
266 sll t2, t2, 8
267#endif
268 or t1, t2
269
2701: ADDC(sum, t1)
271
272
273#ifdef USE_DOUBLE
274 dsll32 v1, sum, 0
275 daddu sum, v1
276 sltu v1, sum, v1
277 dsra32 sum, sum, 0
278 addu sum, v1
279#endif
280
281
282
283 defined(CONFIG_CPU_LOONGSON64)
284 .set push
285 .set arch=mips32r2
286 wsbh v1, sum
287 movn sum, v1, t7
288 .set pop
289#else
290 beqz t7, 1f
291 lui v1, 0x00ff
292 addu v1, 0x00ff
293 and t0, sum, v1
294 sll t0, t0, 8
295 srl sum, sum, 8
296 and sum, sum, v1
297 or sum, sum, t0
2981:
299#endif
300 .set reorder
301
302 ADDC32(sum, a2)
303 jr ra
304 .set noreorder
305 END(csum_partial)
306
307
308
309
310
311
312
313
314
315
316
317
318#define src a0
319#define dst a1
320#define len a2
321#define sum v0
322#define odd t8
323
324
325
326
327
328
329#define LD_INSN 1
330#define ST_INSN 2
331#define LEGACY_MODE 1
332#define EVA_MODE 2
333#define USEROP 1
334#define KERNELOP 2
335
336
337
338
339
340
341
342
343
344
345
346#define EXC(insn, type, reg, addr) \
347 .if \mode == LEGACY_MODE; \
3489: insn reg, addr; \
349 .section __ex_table,"a"; \
350 PTR 9b, .L_exc; \
351 .previous; \
352 \
353 .else; \
354 \
355 .if ((\from == USEROP) && (type == LD_INSN)) || \
356 ((\to == USEROP) && (type == ST_INSN)); \
3579: __BUILD_EVA_INSN(insn
358 .section __ex_table,"a"; \
359 PTR 9b, .L_exc; \
360 .previous; \
361 .else; \
362 \
363 insn reg, addr; \
364 .endif; \
365 .endif
366
367#undef LOAD
368
369#ifdef USE_DOUBLE
370
371#define LOADK ld
372#define LOAD(reg, addr) EXC(ld, LD_INSN, reg, addr)
373#define LOADBU(reg, addr) EXC(lbu, LD_INSN, reg, addr)
374#define LOADL(reg, addr) EXC(ldl, LD_INSN, reg, addr)
375#define LOADR(reg, addr) EXC(ldr, LD_INSN, reg, addr)
376#define STOREB(reg, addr) EXC(sb, ST_INSN, reg, addr)
377#define STOREL(reg, addr) EXC(sdl, ST_INSN, reg, addr)
378#define STORER(reg, addr) EXC(sdr, ST_INSN, reg, addr)
379#define STORE(reg, addr) EXC(sd, ST_INSN, reg, addr)
380#define ADD daddu
381#define SUB dsubu
382#define SRL dsrl
383#define SLL dsll
384#define SLLV dsllv
385#define SRLV dsrlv
386#define NBYTES 8
387#define LOG_NBYTES 3
388
389#else
390
391#define LOADK lw
392#define LOAD(reg, addr) EXC(lw, LD_INSN, reg, addr)
393#define LOADBU(reg, addr) EXC(lbu, LD_INSN, reg, addr)
394#define LOADL(reg, addr) EXC(lwl, LD_INSN, reg, addr)
395#define LOADR(reg, addr) EXC(lwr, LD_INSN, reg, addr)
396#define STOREB(reg, addr) EXC(sb, ST_INSN, reg, addr)
397#define STOREL(reg, addr) EXC(swl, ST_INSN, reg, addr)
398#define STORER(reg, addr) EXC(swr, ST_INSN, reg, addr)
399#define STORE(reg, addr) EXC(sw, ST_INSN, reg, addr)
400#define ADD addu
401#define SUB subu
402#define SRL srl
403#define SLL sll
404#define SLLV sllv
405#define SRLV srlv
406#define NBYTES 4
407#define LOG_NBYTES 2
408
409#endif
410
411#ifdef CONFIG_CPU_LITTLE_ENDIAN
412#define LDFIRST LOADR
413#define LDREST LOADL
414#define STFIRST STORER
415#define STREST STOREL
416#define SHIFT_DISCARD SLLV
417#define SHIFT_DISCARD_REVERT SRLV
418#else
419#define LDFIRST LOADL
420#define LDREST LOADR
421#define STFIRST STOREL
422#define STREST STORER
423#define SHIFT_DISCARD SRLV
424#define SHIFT_DISCARD_REVERT SLLV
425#endif
426
427#define FIRST(unit) ((unit)*NBYTES)
428#define REST(unit) (FIRST(unit)+NBYTES-1)
429
430#define ADDRMASK (NBYTES-1)
431
432#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
433 .set noat
434#else
435 .set at=v1
436#endif
437
438 .macro __BUILD_CSUM_PARTIAL_COPY_USER mode, from, to
439
440 li sum, -1
441 move odd, zero
442
443
444
445
446
447
448
449
450
451
452
453 sltu t2, len, NBYTES
454 and t1, dst, ADDRMASK
455 bnez t2, .Lcopy_bytes_checklen\@
456 and t0, src, ADDRMASK
457 andi odd, dst, 0x1
458 bnez t1, .Ldst_unaligned\@
459 nop
460 bnez t0, .Lsrc_unaligned_dst_aligned\@
461
462
463
464
465.Lboth_aligned\@:
466 SRL t0, len, LOG_NBYTES+3
467 beqz t0, .Lcleanup_both_aligned\@
468 nop
469 SUB len, 8*NBYTES
470 .align 4
4711:
472 LOAD(t0, UNIT(0)(src))
473 LOAD(t1, UNIT(1)(src))
474 LOAD(t2, UNIT(2)(src))
475 LOAD(t3, UNIT(3)(src))
476 LOAD(t4, UNIT(4)(src))
477 LOAD(t5, UNIT(5)(src))
478 LOAD(t6, UNIT(6)(src))
479 LOAD(t7, UNIT(7)(src))
480 SUB len, len, 8*NBYTES
481 ADD src, src, 8*NBYTES
482 STORE(t0, UNIT(0)(dst))
483 ADDC(t0, t1)
484 STORE(t1, UNIT(1)(dst))
485 ADDC(sum, t0)
486 STORE(t2, UNIT(2)(dst))
487 ADDC(t2, t3)
488 STORE(t3, UNIT(3)(dst))
489 ADDC(sum, t2)
490 STORE(t4, UNIT(4)(dst))
491 ADDC(t4, t5)
492 STORE(t5, UNIT(5)(dst))
493 ADDC(sum, t4)
494 STORE(t6, UNIT(6)(dst))
495 ADDC(t6, t7)
496 STORE(t7, UNIT(7)(dst))
497 ADDC(sum, t6)
498 .set reorder
499 ADD dst, dst, 8*NBYTES
500 bgez len, 1b
501 .set noreorder
502 ADD len, 8*NBYTES
503
504
505
506
507.Lcleanup_both_aligned\@:
508#define rem t7
509 beqz len, .Ldone\@
510 sltu t0, len, 4*NBYTES
511 bnez t0, .Lless_than_4units\@
512 and rem, len, (NBYTES-1)
513
514
515
516 LOAD(t0, UNIT(0)(src))
517 LOAD(t1, UNIT(1)(src))
518 LOAD(t2, UNIT(2)(src))
519 LOAD(t3, UNIT(3)(src))
520 SUB len, len, 4*NBYTES
521 ADD src, src, 4*NBYTES
522 STORE(t0, UNIT(0)(dst))
523 ADDC(t0, t1)
524 STORE(t1, UNIT(1)(dst))
525 ADDC(sum, t0)
526 STORE(t2, UNIT(2)(dst))
527 ADDC(t2, t3)
528 STORE(t3, UNIT(3)(dst))
529 ADDC(sum, t2)
530 .set reorder
531 ADD dst, dst, 4*NBYTES
532 beqz len, .Ldone\@
533 .set noreorder
534.Lless_than_4units\@:
535
536
537
538 beq rem, len, .Lcopy_bytes\@
539 nop
5401:
541 LOAD(t0, 0(src))
542 ADD src, src, NBYTES
543 SUB len, len, NBYTES
544 STORE(t0, 0(dst))
545 ADDC(sum, t0)
546 .set reorder
547 ADD dst, dst, NBYTES
548 bne rem, len, 1b
549 .set noreorder
550
551
552
553
554
555
556
557
558
559
560
561
562#define bits t2
563 beqz len, .Ldone\@
564 ADD t1, dst, len
565 li bits, 8*NBYTES
566 SLL rem, len, 3
567 LOAD(t0, 0(src))
568 SUB bits, bits, rem
569 SHIFT_DISCARD t0, t0, bits
570 STREST(t0, -1(t1))
571 SHIFT_DISCARD_REVERT t0, t0, bits
572 .set reorder
573 ADDC(sum, t0)
574 b .Ldone\@
575 .set noreorder
576.Ldst_unaligned\@:
577
578
579
580
581
582
583
584
585
586#define match rem
587 LDFIRST(t3, FIRST(0)(src))
588 ADD t2, zero, NBYTES
589 LDREST(t3, REST(0)(src))
590 SUB t2, t2, t1
591 xor match, t0, t1
592 STFIRST(t3, FIRST(0)(dst))
593 SLL t4, t1, 3
594 SHIFT_DISCARD t3, t3, t4
595
596 ADDC(sum, t3)
597 beq len, t2, .Ldone\@
598 SUB len, len, t2
599 ADD dst, dst, t2
600 beqz match, .Lboth_aligned\@
601 ADD src, src, t2
602
603.Lsrc_unaligned_dst_aligned\@:
604 SRL t0, len, LOG_NBYTES+2
605 beqz t0, .Lcleanup_src_unaligned\@
606 and rem, len, (4*NBYTES-1)
6071:
608
609
610
611
612
613
614 LDFIRST(t0, FIRST(0)(src))
615 LDFIRST(t1, FIRST(1)(src))
616 SUB len, len, 4*NBYTES
617 LDREST(t0, REST(0)(src))
618 LDREST(t1, REST(1)(src))
619 LDFIRST(t2, FIRST(2)(src))
620 LDFIRST(t3, FIRST(3)(src))
621 LDREST(t2, REST(2)(src))
622 LDREST(t3, REST(3)(src))
623 ADD src, src, 4*NBYTES
624#ifdef CONFIG_CPU_SB1
625 nop
626#endif
627 STORE(t0, UNIT(0)(dst))
628 ADDC(t0, t1)
629 STORE(t1, UNIT(1)(dst))
630 ADDC(sum, t0)
631 STORE(t2, UNIT(2)(dst))
632 ADDC(t2, t3)
633 STORE(t3, UNIT(3)(dst))
634 ADDC(sum, t2)
635 .set reorder
636 ADD dst, dst, 4*NBYTES
637 bne len, rem, 1b
638 .set noreorder
639
640.Lcleanup_src_unaligned\@:
641 beqz len, .Ldone\@
642 and rem, len, NBYTES-1
643 beq rem, len, .Lcopy_bytes\@
644 nop
6451:
646 LDFIRST(t0, FIRST(0)(src))
647 LDREST(t0, REST(0)(src))
648 ADD src, src, NBYTES
649 SUB len, len, NBYTES
650 STORE(t0, 0(dst))
651 ADDC(sum, t0)
652 .set reorder
653 ADD dst, dst, NBYTES
654 bne len, rem, 1b
655 .set noreorder
656
657.Lcopy_bytes_checklen\@:
658 beqz len, .Ldone\@
659 nop
660.Lcopy_bytes\@:
661
662#ifdef CONFIG_CPU_LITTLE_ENDIAN
663#define SHIFT_START 0
664#define SHIFT_INC 8
665#else
666#define SHIFT_START 8*(NBYTES-1)
667#define SHIFT_INC -8
668#endif
669 move t2, zero
670 li t3, SHIFT_START
671#define COPY_BYTE(N) \
672 LOADBU(t0, N(src)); \
673 SUB len, len, 1; \
674 STOREB(t0, N(dst)); \
675 SLLV t0, t0, t3; \
676 addu t3, SHIFT_INC; \
677 beqz len, .Lcopy_bytes_done\@; \
678 or t2, t0
679
680 COPY_BYTE(0)
681 COPY_BYTE(1)
682#ifdef USE_DOUBLE
683 COPY_BYTE(2)
684 COPY_BYTE(3)
685 COPY_BYTE(4)
686 COPY_BYTE(5)
687#endif
688 LOADBU(t0, NBYTES-2(src))
689 SUB len, len, 1
690 STOREB(t0, NBYTES-2(dst))
691 SLLV t0, t0, t3
692 or t2, t0
693.Lcopy_bytes_done\@:
694 ADDC(sum, t2)
695.Ldone\@:
696
697 .set push
698 .set noat
699#ifdef USE_DOUBLE
700 dsll32 v1, sum, 0
701 daddu sum, v1
702 sltu v1, sum, v1
703 dsra32 sum, sum, 0
704 addu sum, v1
705#endif
706
707
708 defined(CONFIG_CPU_LOONGSON64)
709 .set push
710 .set arch=mips32r2
711 wsbh v1, sum
712 movn sum, v1, odd
713 .set pop
714#else
715 beqz odd, 1f
716 lui v1, 0x00ff
717 addu v1, 0x00ff
718 and t0, sum, v1
719 sll t0, t0, 8
720 srl sum, sum, 8
721 and sum, sum, v1
722 or sum, sum, t0
7231:
724#endif
725 .set pop
726 .set reorder
727 jr ra
728 .set noreorder
729 .endm
730
731 .set noreorder
732.L_exc:
733 jr ra
734 li v0, 0
735
736FEXPORT(__csum_partial_copy_nocheck)
737EXPORT_SYMBOL(__csum_partial_copy_nocheck)
738#ifndef CONFIG_EVA
739FEXPORT(__csum_partial_copy_to_user)
740EXPORT_SYMBOL(__csum_partial_copy_to_user)
741FEXPORT(__csum_partial_copy_from_user)
742EXPORT_SYMBOL(__csum_partial_copy_from_user)
743#endif
744__BUILD_CSUM_PARTIAL_COPY_USER LEGACY_MODE USEROP USEROP
745
746#ifdef CONFIG_EVA
747LEAF(__csum_partial_copy_to_user)
748__BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE KERNELOP USEROP
749END(__csum_partial_copy_to_user)
750
751LEAF(__csum_partial_copy_from_user)
752__BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE USEROP KERNELOP
753END(__csum_partial_copy_from_user)
754#endif
755