1
2
3
4
5
6
7
8#include <asm/ppc_asm.h>
9
10#ifndef SELFTEST_CASE
11
12#define SELFTEST_CASE 0
13#endif
14
15#ifdef __BIG_ENDIAN__
16#define LVS(VRT,RA,RB) lvsl VRT,RA,RB
17#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC
18#else
19#define LVS(VRT,RA,RB) lvsr VRT,RA,RB
20#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRB,VRA,VRC
21#endif
22
23 .macro err1
24100:
25 EX_TABLE(100b,.Ldo_err1)
26 .endm
27
28 .macro err2
29200:
30 EX_TABLE(200b,.Ldo_err2)
31 .endm
32
33#ifdef CONFIG_ALTIVEC
34 .macro err3
35300:
36 EX_TABLE(300b,.Ldo_err3)
37 .endm
38
39 .macro err4
40400:
41 EX_TABLE(400b,.Ldo_err4)
42 .endm
43
44
45.Ldo_err4:
46 ld r16,STK_REG(R16)(r1)
47 ld r15,STK_REG(R15)(r1)
48 ld r14,STK_REG(R14)(r1)
49.Ldo_err3:
50 bl exit_vmx_usercopy
51 ld r0,STACKFRAMESIZE+16(r1)
52 mtlr r0
53 b .Lexit
54#endif
55
56.Ldo_err2:
57 ld r22,STK_REG(R22)(r1)
58 ld r21,STK_REG(R21)(r1)
59 ld r20,STK_REG(R20)(r1)
60 ld r19,STK_REG(R19)(r1)
61 ld r18,STK_REG(R18)(r1)
62 ld r17,STK_REG(R17)(r1)
63 ld r16,STK_REG(R16)(r1)
64 ld r15,STK_REG(R15)(r1)
65 ld r14,STK_REG(R14)(r1)
66.Lexit:
67 addi r1,r1,STACKFRAMESIZE
68.Ldo_err1:
69 ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
70 ld r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
71 ld r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
72 b __copy_tofrom_user_base
73
74
75_GLOBAL(__copy_tofrom_user_power7)
76 cmpldi r5,16
77 cmpldi cr1,r5,3328
78
79 std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
80 std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
81 std r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
82
83 blt .Lshort_copy
84
85#ifdef CONFIG_ALTIVEC
86test_feature = SELFTEST_CASE
87BEGIN_FTR_SECTION
88 bgt cr1,.Lvmx_copy
89END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
90#endif
91
92.Lnonvmx_copy:
93
94 neg r6,r4
95 mtocrf 0x01,r6
96 clrldi r6,r6,(64-3)
97
98 bf cr7*4+3,1f
99err1; lbz r0,0(r4)
100 addi r4,r4,1
101err1; stb r0,0(r3)
102 addi r3,r3,1
103
1041: bf cr7*4+2,2f
105err1; lhz r0,0(r4)
106 addi r4,r4,2
107err1; sth r0,0(r3)
108 addi r3,r3,2
109
1102: bf cr7*4+1,3f
111err1; lwz r0,0(r4)
112 addi r4,r4,4
113err1; stw r0,0(r3)
114 addi r3,r3,4
115
1163: sub r5,r5,r6
117 cmpldi r5,128
118 blt 5f
119
120 mflr r0
121 stdu r1,-STACKFRAMESIZE(r1)
122 std r14,STK_REG(R14)(r1)
123 std r15,STK_REG(R15)(r1)
124 std r16,STK_REG(R16)(r1)
125 std r17,STK_REG(R17)(r1)
126 std r18,STK_REG(R18)(r1)
127 std r19,STK_REG(R19)(r1)
128 std r20,STK_REG(R20)(r1)
129 std r21,STK_REG(R21)(r1)
130 std r22,STK_REG(R22)(r1)
131 std r0,STACKFRAMESIZE+16(r1)
132
133 srdi r6,r5,7
134 mtctr r6
135
136
137 .align 5
1384:
139err2; ld r0,0(r4)
140err2; ld r6,8(r4)
141err2; ld r7,16(r4)
142err2; ld r8,24(r4)
143err2; ld r9,32(r4)
144err2; ld r10,40(r4)
145err2; ld r11,48(r4)
146err2; ld r12,56(r4)
147err2; ld r14,64(r4)
148err2; ld r15,72(r4)
149err2; ld r16,80(r4)
150err2; ld r17,88(r4)
151err2; ld r18,96(r4)
152err2; ld r19,104(r4)
153err2; ld r20,112(r4)
154err2; ld r21,120(r4)
155 addi r4,r4,128
156err2; std r0,0(r3)
157err2; std r6,8(r3)
158err2; std r7,16(r3)
159err2; std r8,24(r3)
160err2; std r9,32(r3)
161err2; std r10,40(r3)
162err2; std r11,48(r3)
163err2; std r12,56(r3)
164err2; std r14,64(r3)
165err2; std r15,72(r3)
166err2; std r16,80(r3)
167err2; std r17,88(r3)
168err2; std r18,96(r3)
169err2; std r19,104(r3)
170err2; std r20,112(r3)
171err2; std r21,120(r3)
172 addi r3,r3,128
173 bdnz 4b
174
175 clrldi r5,r5,(64-7)
176
177 ld r14,STK_REG(R14)(r1)
178 ld r15,STK_REG(R15)(r1)
179 ld r16,STK_REG(R16)(r1)
180 ld r17,STK_REG(R17)(r1)
181 ld r18,STK_REG(R18)(r1)
182 ld r19,STK_REG(R19)(r1)
183 ld r20,STK_REG(R20)(r1)
184 ld r21,STK_REG(R21)(r1)
185 ld r22,STK_REG(R22)(r1)
186 addi r1,r1,STACKFRAMESIZE
187
188
1895: srdi r6,r5,4
190 mtocrf 0x01,r6
191
1926: bf cr7*4+1,7f
193err1; ld r0,0(r4)
194err1; ld r6,8(r4)
195err1; ld r7,16(r4)
196err1; ld r8,24(r4)
197err1; ld r9,32(r4)
198err1; ld r10,40(r4)
199err1; ld r11,48(r4)
200err1; ld r12,56(r4)
201 addi r4,r4,64
202err1; std r0,0(r3)
203err1; std r6,8(r3)
204err1; std r7,16(r3)
205err1; std r8,24(r3)
206err1; std r9,32(r3)
207err1; std r10,40(r3)
208err1; std r11,48(r3)
209err1; std r12,56(r3)
210 addi r3,r3,64
211
212
2137: bf cr7*4+2,8f
214err1; ld r0,0(r4)
215err1; ld r6,8(r4)
216err1; ld r7,16(r4)
217err1; ld r8,24(r4)
218 addi r4,r4,32
219err1; std r0,0(r3)
220err1; std r6,8(r3)
221err1; std r7,16(r3)
222err1; std r8,24(r3)
223 addi r3,r3,32
224
225
2268: bf cr7*4+3,9f
227err1; ld r0,0(r4)
228err1; ld r6,8(r4)
229 addi r4,r4,16
230err1; std r0,0(r3)
231err1; std r6,8(r3)
232 addi r3,r3,16
233
2349: clrldi r5,r5,(64-4)
235
236
237.Lshort_copy:
238 mtocrf 0x01,r5
239 bf cr7*4+0,12f
240err1; lwz r0,0(r4)
241err1; lwz r6,4(r4)
242 addi r4,r4,8
243err1; stw r0,0(r3)
244err1; stw r6,4(r3)
245 addi r3,r3,8
246
24712: bf cr7*4+1,13f
248err1; lwz r0,0(r4)
249 addi r4,r4,4
250err1; stw r0,0(r3)
251 addi r3,r3,4
252
25313: bf cr7*4+2,14f
254err1; lhz r0,0(r4)
255 addi r4,r4,2
256err1; sth r0,0(r3)
257 addi r3,r3,2
258
25914: bf cr7*4+3,15f
260err1; lbz r0,0(r4)
261err1; stb r0,0(r3)
262
26315: li r3,0
264 blr
265
266.Lunwind_stack_nonvmx_copy:
267 addi r1,r1,STACKFRAMESIZE
268 b .Lnonvmx_copy
269
270.Lvmx_copy:
271#ifdef CONFIG_ALTIVEC
272 mflr r0
273 std r0,16(r1)
274 stdu r1,-STACKFRAMESIZE(r1)
275 bl enter_vmx_usercopy
276 cmpwi cr1,r3,0
277 ld r0,STACKFRAMESIZE+16(r1)
278 ld r3,STK_REG(R31)(r1)
279 ld r4,STK_REG(R30)(r1)
280 ld r5,STK_REG(R29)(r1)
281 mtlr r0
282
283
284
285
286
287
288 clrrdi r6,r4,7
289 clrrdi r9,r3,7
290 ori r9,r9,1
291
292 srdi r7,r5,7
293 cmpldi r7,0x3FF
294 ble 1f
295 li r7,0x3FF
2961: lis r0,0x0E00
297 sldi r7,r7,7
298 or r7,r7,r0
299 ori r10,r7,1
300
301 lis r8,0x8000
302 clrldi r8,r8,32
303
304
305 dcbt 0,r6,0b01000
306 dcbt 0,r7,0b01010
307
308 dcbtst 0,r9,0b01000
309 dcbtst 0,r10,0b01010
310 eieio
311 dcbt 0,r8,0b01010
312
313 beq cr1,.Lunwind_stack_nonvmx_copy
314
315
316
317
318
319 xor r6,r4,r3
320 rldicl. r6,r6,0,(64-4)
321 bne .Lvmx_unaligned_copy
322
323
324 neg r6,r3
325 mtocrf 0x01,r6
326 clrldi r6,r6,(64-4)
327
328 bf cr7*4+3,1f
329err3; lbz r0,0(r4)
330 addi r4,r4,1
331err3; stb r0,0(r3)
332 addi r3,r3,1
333
3341: bf cr7*4+2,2f
335err3; lhz r0,0(r4)
336 addi r4,r4,2
337err3; sth r0,0(r3)
338 addi r3,r3,2
339
3402: bf cr7*4+1,3f
341err3; lwz r0,0(r4)
342 addi r4,r4,4
343err3; stw r0,0(r3)
344 addi r3,r3,4
345
3463: bf cr7*4+0,4f
347err3; ld r0,0(r4)
348 addi r4,r4,8
349err3; std r0,0(r3)
350 addi r3,r3,8
351
3524: sub r5,r5,r6
353
354
355 neg r6,r3
356 srdi r7,r6,4
357 mtocrf 0x01,r7
358 clrldi r6,r6,(64-7)
359
360 li r9,16
361 li r10,32
362 li r11,48
363
364 bf cr7*4+3,5f
365err3; lvx v1,0,r4
366 addi r4,r4,16
367err3; stvx v1,0,r3
368 addi r3,r3,16
369
3705: bf cr7*4+2,6f
371err3; lvx v1,0,r4
372err3; lvx v0,r4,r9
373 addi r4,r4,32
374err3; stvx v1,0,r3
375err3; stvx v0,r3,r9
376 addi r3,r3,32
377
3786: bf cr7*4+1,7f
379err3; lvx v3,0,r4
380err3; lvx v2,r4,r9
381err3; lvx v1,r4,r10
382err3; lvx v0,r4,r11
383 addi r4,r4,64
384err3; stvx v3,0,r3
385err3; stvx v2,r3,r9
386err3; stvx v1,r3,r10
387err3; stvx v0,r3,r11
388 addi r3,r3,64
389
3907: sub r5,r5,r6
391 srdi r6,r5,7
392
393 std r14,STK_REG(R14)(r1)
394 std r15,STK_REG(R15)(r1)
395 std r16,STK_REG(R16)(r1)
396
397 li r12,64
398 li r14,80
399 li r15,96
400 li r16,112
401
402 mtctr r6
403
404
405
406
407
408 .align 5
4098:
410err4; lvx v7,0,r4
411err4; lvx v6,r4,r9
412err4; lvx v5,r4,r10
413err4; lvx v4,r4,r11
414err4; lvx v3,r4,r12
415err4; lvx v2,r4,r14
416err4; lvx v1,r4,r15
417err4; lvx v0,r4,r16
418 addi r4,r4,128
419err4; stvx v7,0,r3
420err4; stvx v6,r3,r9
421err4; stvx v5,r3,r10
422err4; stvx v4,r3,r11
423err4; stvx v3,r3,r12
424err4; stvx v2,r3,r14
425err4; stvx v1,r3,r15
426err4; stvx v0,r3,r16
427 addi r3,r3,128
428 bdnz 8b
429
430 ld r14,STK_REG(R14)(r1)
431 ld r15,STK_REG(R15)(r1)
432 ld r16,STK_REG(R16)(r1)
433
434
435 clrldi r5,r5,(64-7)
436 srdi r6,r5,4
437 mtocrf 0x01,r6
438
439 bf cr7*4+1,9f
440err3; lvx v3,0,r4
441err3; lvx v2,r4,r9
442err3; lvx v1,r4,r10
443err3; lvx v0,r4,r11
444 addi r4,r4,64
445err3; stvx v3,0,r3
446err3; stvx v2,r3,r9
447err3; stvx v1,r3,r10
448err3; stvx v0,r3,r11
449 addi r3,r3,64
450
4519: bf cr7*4+2,10f
452err3; lvx v1,0,r4
453err3; lvx v0,r4,r9
454 addi r4,r4,32
455err3; stvx v1,0,r3
456err3; stvx v0,r3,r9
457 addi r3,r3,32
458
45910: bf cr7*4+3,11f
460err3; lvx v1,0,r4
461 addi r4,r4,16
462err3; stvx v1,0,r3
463 addi r3,r3,16
464
465
46611: clrldi r5,r5,(64-4)
467 mtocrf 0x01,r5
468 bf cr7*4+0,12f
469err3; ld r0,0(r4)
470 addi r4,r4,8
471err3; std r0,0(r3)
472 addi r3,r3,8
473
47412: bf cr7*4+1,13f
475err3; lwz r0,0(r4)
476 addi r4,r4,4
477err3; stw r0,0(r3)
478 addi r3,r3,4
479
48013: bf cr7*4+2,14f
481err3; lhz r0,0(r4)
482 addi r4,r4,2
483err3; sth r0,0(r3)
484 addi r3,r3,2
485
48614: bf cr7*4+3,15f
487err3; lbz r0,0(r4)
488err3; stb r0,0(r3)
489
49015: addi r1,r1,STACKFRAMESIZE
491 b exit_vmx_usercopy
492
493.Lvmx_unaligned_copy:
494
495 neg r6,r3
496 mtocrf 0x01,r6
497 clrldi r6,r6,(64-4)
498
499 bf cr7*4+3,1f
500err3; lbz r0,0(r4)
501 addi r4,r4,1
502err3; stb r0,0(r3)
503 addi r3,r3,1
504
5051: bf cr7*4+2,2f
506err3; lhz r0,0(r4)
507 addi r4,r4,2
508err3; sth r0,0(r3)
509 addi r3,r3,2
510
5112: bf cr7*4+1,3f
512err3; lwz r0,0(r4)
513 addi r4,r4,4
514err3; stw r0,0(r3)
515 addi r3,r3,4
516
5173: bf cr7*4+0,4f
518err3; lwz r0,0(r4)
519err3; lwz r7,4(r4)
520 addi r4,r4,8
521err3; stw r0,0(r3)
522err3; stw r7,4(r3)
523 addi r3,r3,8
524
5254: sub r5,r5,r6
526
527
528 neg r6,r3
529 srdi r7,r6,4
530 mtocrf 0x01,r7
531 clrldi r6,r6,(64-7)
532
533 li r9,16
534 li r10,32
535 li r11,48
536
537 LVS(v16,0,r4)
538err3; lvx v0,0,r4
539 addi r4,r4,16
540
541 bf cr7*4+3,5f
542err3; lvx v1,0,r4
543 VPERM(v8,v0,v1,v16)
544 addi r4,r4,16
545err3; stvx v8,0,r3
546 addi r3,r3,16
547 vor v0,v1,v1
548
5495: bf cr7*4+2,6f
550err3; lvx v1,0,r4
551 VPERM(v8,v0,v1,v16)
552err3; lvx v0,r4,r9
553 VPERM(v9,v1,v0,v16)
554 addi r4,r4,32
555err3; stvx v8,0,r3
556err3; stvx v9,r3,r9
557 addi r3,r3,32
558
5596: bf cr7*4+1,7f
560err3; lvx v3,0,r4
561 VPERM(v8,v0,v3,v16)
562err3; lvx v2,r4,r9
563 VPERM(v9,v3,v2,v16)
564err3; lvx v1,r4,r10
565 VPERM(v10,v2,v1,v16)
566err3; lvx v0,r4,r11
567 VPERM(v11,v1,v0,v16)
568 addi r4,r4,64
569err3; stvx v8,0,r3
570err3; stvx v9,r3,r9
571err3; stvx v10,r3,r10
572err3; stvx v11,r3,r11
573 addi r3,r3,64
574
5757: sub r5,r5,r6
576 srdi r6,r5,7
577
578 std r14,STK_REG(R14)(r1)
579 std r15,STK_REG(R15)(r1)
580 std r16,STK_REG(R16)(r1)
581
582 li r12,64
583 li r14,80
584 li r15,96
585 li r16,112
586
587 mtctr r6
588
589
590
591
592
593 .align 5
5948:
595err4; lvx v7,0,r4
596 VPERM(v8,v0,v7,v16)
597err4; lvx v6,r4,r9
598 VPERM(v9,v7,v6,v16)
599err4; lvx v5,r4,r10
600 VPERM(v10,v6,v5,v16)
601err4; lvx v4,r4,r11
602 VPERM(v11,v5,v4,v16)
603err4; lvx v3,r4,r12
604 VPERM(v12,v4,v3,v16)
605err4; lvx v2,r4,r14
606 VPERM(v13,v3,v2,v16)
607err4; lvx v1,r4,r15
608 VPERM(v14,v2,v1,v16)
609err4; lvx v0,r4,r16
610 VPERM(v15,v1,v0,v16)
611 addi r4,r4,128
612err4; stvx v8,0,r3
613err4; stvx v9,r3,r9
614err4; stvx v10,r3,r10
615err4; stvx v11,r3,r11
616err4; stvx v12,r3,r12
617err4; stvx v13,r3,r14
618err4; stvx v14,r3,r15
619err4; stvx v15,r3,r16
620 addi r3,r3,128
621 bdnz 8b
622
623 ld r14,STK_REG(R14)(r1)
624 ld r15,STK_REG(R15)(r1)
625 ld r16,STK_REG(R16)(r1)
626
627
628 clrldi r5,r5,(64-7)
629 srdi r6,r5,4
630 mtocrf 0x01,r6
631
632 bf cr7*4+1,9f
633err3; lvx v3,0,r4
634 VPERM(v8,v0,v3,v16)
635err3; lvx v2,r4,r9
636 VPERM(v9,v3,v2,v16)
637err3; lvx v1,r4,r10
638 VPERM(v10,v2,v1,v16)
639err3; lvx v0,r4,r11
640 VPERM(v11,v1,v0,v16)
641 addi r4,r4,64
642err3; stvx v8,0,r3
643err3; stvx v9,r3,r9
644err3; stvx v10,r3,r10
645err3; stvx v11,r3,r11
646 addi r3,r3,64
647
6489: bf cr7*4+2,10f
649err3; lvx v1,0,r4
650 VPERM(v8,v0,v1,v16)
651err3; lvx v0,r4,r9
652 VPERM(v9,v1,v0,v16)
653 addi r4,r4,32
654err3; stvx v8,0,r3
655err3; stvx v9,r3,r9
656 addi r3,r3,32
657
65810: bf cr7*4+3,11f
659err3; lvx v1,0,r4
660 VPERM(v8,v0,v1,v16)
661 addi r4,r4,16
662err3; stvx v8,0,r3
663 addi r3,r3,16
664
665
66611: clrldi r5,r5,(64-4)
667 addi r4,r4,-16
668 mtocrf 0x01,r5
669 bf cr7*4+0,12f
670err3; lwz r0,0(r4)
671err3; lwz r6,4(r4)
672 addi r4,r4,8
673err3; stw r0,0(r3)
674err3; stw r6,4(r3)
675 addi r3,r3,8
676
67712: bf cr7*4+1,13f
678err3; lwz r0,0(r4)
679 addi r4,r4,4
680err3; stw r0,0(r3)
681 addi r3,r3,4
682
68313: bf cr7*4+2,14f
684err3; lhz r0,0(r4)
685 addi r4,r4,2
686err3; sth r0,0(r3)
687 addi r3,r3,2
688
68914: bf cr7*4+3,15f
690err3; lbz r0,0(r4)
691err3; stb r0,0(r3)
692
69315: addi r1,r1,STACKFRAMESIZE
694 b exit_vmx_usercopy
695#endif
696