1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20#include <asm/ppc_asm.h>
21
22#ifdef __BIG_ENDIAN__
23#define LVS(VRT,RA,RB) lvsl VRT,RA,RB
24#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC
25#else
26#define LVS(VRT,RA,RB) lvsr VRT,RA,RB
27#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRB,VRA,VRC
28#endif
29
30 .macro err1
31100:
32 EX_TABLE(100b,.Ldo_err1)
33 .endm
34
35 .macro err2
36200:
37 EX_TABLE(200b,.Ldo_err2)
38 .endm
39
40#ifdef CONFIG_ALTIVEC
41 .macro err3
42300:
43 EX_TABLE(300b,.Ldo_err3)
44 .endm
45
46 .macro err4
47400:
48 EX_TABLE(400b,.Ldo_err4)
49 .endm
50
51
52.Ldo_err4:
53 ld r16,STK_REG(R16)(r1)
54 ld r15,STK_REG(R15)(r1)
55 ld r14,STK_REG(R14)(r1)
56.Ldo_err3:
57 bl exit_vmx_usercopy
58 ld r0,STACKFRAMESIZE+16(r1)
59 mtlr r0
60 b .Lexit
61#endif
62
63.Ldo_err2:
64 ld r22,STK_REG(R22)(r1)
65 ld r21,STK_REG(R21)(r1)
66 ld r20,STK_REG(R20)(r1)
67 ld r19,STK_REG(R19)(r1)
68 ld r18,STK_REG(R18)(r1)
69 ld r17,STK_REG(R17)(r1)
70 ld r16,STK_REG(R16)(r1)
71 ld r15,STK_REG(R15)(r1)
72 ld r14,STK_REG(R14)(r1)
73.Lexit:
74 addi r1,r1,STACKFRAMESIZE
75.Ldo_err1:
76 ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
77 ld r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
78 ld r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
79 b __copy_tofrom_user_base
80
81
82_GLOBAL(__copy_tofrom_user_power7)
83#ifdef CONFIG_ALTIVEC
84 cmpldi r5,16
85 cmpldi cr1,r5,3328
86
87 std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
88 std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
89 std r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
90
91 blt .Lshort_copy
92 bge cr1,.Lvmx_copy
93#else
94 cmpldi r5,16
95
96 std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
97 std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
98 std r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
99
100 blt .Lshort_copy
101#endif
102
103.Lnonvmx_copy:
104
105 neg r6,r4
106 mtocrf 0x01,r6
107 clrldi r6,r6,(64-3)
108
109 bf cr7*4+3,1f
110err1; lbz r0,0(r4)
111 addi r4,r4,1
112err1; stb r0,0(r3)
113 addi r3,r3,1
114
1151: bf cr7*4+2,2f
116err1; lhz r0,0(r4)
117 addi r4,r4,2
118err1; sth r0,0(r3)
119 addi r3,r3,2
120
1212: bf cr7*4+1,3f
122err1; lwz r0,0(r4)
123 addi r4,r4,4
124err1; stw r0,0(r3)
125 addi r3,r3,4
126
1273: sub r5,r5,r6
128 cmpldi r5,128
129 blt 5f
130
131 mflr r0
132 stdu r1,-STACKFRAMESIZE(r1)
133 std r14,STK_REG(R14)(r1)
134 std r15,STK_REG(R15)(r1)
135 std r16,STK_REG(R16)(r1)
136 std r17,STK_REG(R17)(r1)
137 std r18,STK_REG(R18)(r1)
138 std r19,STK_REG(R19)(r1)
139 std r20,STK_REG(R20)(r1)
140 std r21,STK_REG(R21)(r1)
141 std r22,STK_REG(R22)(r1)
142 std r0,STACKFRAMESIZE+16(r1)
143
144 srdi r6,r5,7
145 mtctr r6
146
147
148 .align 5
1494:
150err2; ld r0,0(r4)
151err2; ld r6,8(r4)
152err2; ld r7,16(r4)
153err2; ld r8,24(r4)
154err2; ld r9,32(r4)
155err2; ld r10,40(r4)
156err2; ld r11,48(r4)
157err2; ld r12,56(r4)
158err2; ld r14,64(r4)
159err2; ld r15,72(r4)
160err2; ld r16,80(r4)
161err2; ld r17,88(r4)
162err2; ld r18,96(r4)
163err2; ld r19,104(r4)
164err2; ld r20,112(r4)
165err2; ld r21,120(r4)
166 addi r4,r4,128
167err2; std r0,0(r3)
168err2; std r6,8(r3)
169err2; std r7,16(r3)
170err2; std r8,24(r3)
171err2; std r9,32(r3)
172err2; std r10,40(r3)
173err2; std r11,48(r3)
174err2; std r12,56(r3)
175err2; std r14,64(r3)
176err2; std r15,72(r3)
177err2; std r16,80(r3)
178err2; std r17,88(r3)
179err2; std r18,96(r3)
180err2; std r19,104(r3)
181err2; std r20,112(r3)
182err2; std r21,120(r3)
183 addi r3,r3,128
184 bdnz 4b
185
186 clrldi r5,r5,(64-7)
187
188 ld r14,STK_REG(R14)(r1)
189 ld r15,STK_REG(R15)(r1)
190 ld r16,STK_REG(R16)(r1)
191 ld r17,STK_REG(R17)(r1)
192 ld r18,STK_REG(R18)(r1)
193 ld r19,STK_REG(R19)(r1)
194 ld r20,STK_REG(R20)(r1)
195 ld r21,STK_REG(R21)(r1)
196 ld r22,STK_REG(R22)(r1)
197 addi r1,r1,STACKFRAMESIZE
198
199
2005: srdi r6,r5,4
201 mtocrf 0x01,r6
202
2036: bf cr7*4+1,7f
204err1; ld r0,0(r4)
205err1; ld r6,8(r4)
206err1; ld r7,16(r4)
207err1; ld r8,24(r4)
208err1; ld r9,32(r4)
209err1; ld r10,40(r4)
210err1; ld r11,48(r4)
211err1; ld r12,56(r4)
212 addi r4,r4,64
213err1; std r0,0(r3)
214err1; std r6,8(r3)
215err1; std r7,16(r3)
216err1; std r8,24(r3)
217err1; std r9,32(r3)
218err1; std r10,40(r3)
219err1; std r11,48(r3)
220err1; std r12,56(r3)
221 addi r3,r3,64
222
223
2247: bf cr7*4+2,8f
225err1; ld r0,0(r4)
226err1; ld r6,8(r4)
227err1; ld r7,16(r4)
228err1; ld r8,24(r4)
229 addi r4,r4,32
230err1; std r0,0(r3)
231err1; std r6,8(r3)
232err1; std r7,16(r3)
233err1; std r8,24(r3)
234 addi r3,r3,32
235
236
2378: bf cr7*4+3,9f
238err1; ld r0,0(r4)
239err1; ld r6,8(r4)
240 addi r4,r4,16
241err1; std r0,0(r3)
242err1; std r6,8(r3)
243 addi r3,r3,16
244
2459: clrldi r5,r5,(64-4)
246
247
248.Lshort_copy:
249 mtocrf 0x01,r5
250 bf cr7*4+0,12f
251err1; lwz r0,0(r4)
252err1; lwz r6,4(r4)
253 addi r4,r4,8
254err1; stw r0,0(r3)
255err1; stw r6,4(r3)
256 addi r3,r3,8
257
25812: bf cr7*4+1,13f
259err1; lwz r0,0(r4)
260 addi r4,r4,4
261err1; stw r0,0(r3)
262 addi r3,r3,4
263
26413: bf cr7*4+2,14f
265err1; lhz r0,0(r4)
266 addi r4,r4,2
267err1; sth r0,0(r3)
268 addi r3,r3,2
269
27014: bf cr7*4+3,15f
271err1; lbz r0,0(r4)
272err1; stb r0,0(r3)
273
27415: li r3,0
275 blr
276
277.Lunwind_stack_nonvmx_copy:
278 addi r1,r1,STACKFRAMESIZE
279 b .Lnonvmx_copy
280
281#ifdef CONFIG_ALTIVEC
282.Lvmx_copy:
283 mflr r0
284 std r0,16(r1)
285 stdu r1,-STACKFRAMESIZE(r1)
286 bl enter_vmx_usercopy
287 cmpwi cr1,r3,0
288 ld r0,STACKFRAMESIZE+16(r1)
289 ld r3,STK_REG(R31)(r1)
290 ld r4,STK_REG(R30)(r1)
291 ld r5,STK_REG(R29)(r1)
292 mtlr r0
293
294
295
296
297
298
299 clrrdi r6,r4,7
300 clrrdi r9,r3,7
301 ori r9,r9,1
302
303 srdi r7,r5,7
304 cmpldi r7,0x3FF
305 ble 1f
306 li r7,0x3FF
3071: lis r0,0x0E00
308 sldi r7,r7,7
309 or r7,r7,r0
310 ori r10,r7,1
311
312 lis r8,0x8000
313 clrldi r8,r8,32
314
315
316 dcbt 0,r6,0b01000
317 dcbt 0,r7,0b01010
318
319 dcbtst 0,r9,0b01000
320 dcbtst 0,r10,0b01010
321 eieio
322 dcbt 0,r8,0b01010
323
324 beq cr1,.Lunwind_stack_nonvmx_copy
325
326
327
328
329
330 xor r6,r4,r3
331 rldicl. r6,r6,0,(64-4)
332 bne .Lvmx_unaligned_copy
333
334
335 neg r6,r3
336 mtocrf 0x01,r6
337 clrldi r6,r6,(64-4)
338
339 bf cr7*4+3,1f
340err3; lbz r0,0(r4)
341 addi r4,r4,1
342err3; stb r0,0(r3)
343 addi r3,r3,1
344
3451: bf cr7*4+2,2f
346err3; lhz r0,0(r4)
347 addi r4,r4,2
348err3; sth r0,0(r3)
349 addi r3,r3,2
350
3512: bf cr7*4+1,3f
352err3; lwz r0,0(r4)
353 addi r4,r4,4
354err3; stw r0,0(r3)
355 addi r3,r3,4
356
3573: bf cr7*4+0,4f
358err3; ld r0,0(r4)
359 addi r4,r4,8
360err3; std r0,0(r3)
361 addi r3,r3,8
362
3634: sub r5,r5,r6
364
365
366 neg r6,r3
367 srdi r7,r6,4
368 mtocrf 0x01,r7
369 clrldi r6,r6,(64-7)
370
371 li r9,16
372 li r10,32
373 li r11,48
374
375 bf cr7*4+3,5f
376err3; lvx v1,0,r4
377 addi r4,r4,16
378err3; stvx v1,0,r3
379 addi r3,r3,16
380
3815: bf cr7*4+2,6f
382err3; lvx v1,0,r4
383err3; lvx v0,r4,r9
384 addi r4,r4,32
385err3; stvx v1,0,r3
386err3; stvx v0,r3,r9
387 addi r3,r3,32
388
3896: bf cr7*4+1,7f
390err3; lvx v3,0,r4
391err3; lvx v2,r4,r9
392err3; lvx v1,r4,r10
393err3; lvx v0,r4,r11
394 addi r4,r4,64
395err3; stvx v3,0,r3
396err3; stvx v2,r3,r9
397err3; stvx v1,r3,r10
398err3; stvx v0,r3,r11
399 addi r3,r3,64
400
4017: sub r5,r5,r6
402 srdi r6,r5,7
403
404 std r14,STK_REG(R14)(r1)
405 std r15,STK_REG(R15)(r1)
406 std r16,STK_REG(R16)(r1)
407
408 li r12,64
409 li r14,80
410 li r15,96
411 li r16,112
412
413 mtctr r6
414
415
416
417
418
419 .align 5
4208:
421err4; lvx v7,0,r4
422err4; lvx v6,r4,r9
423err4; lvx v5,r4,r10
424err4; lvx v4,r4,r11
425err4; lvx v3,r4,r12
426err4; lvx v2,r4,r14
427err4; lvx v1,r4,r15
428err4; lvx v0,r4,r16
429 addi r4,r4,128
430err4; stvx v7,0,r3
431err4; stvx v6,r3,r9
432err4; stvx v5,r3,r10
433err4; stvx v4,r3,r11
434err4; stvx v3,r3,r12
435err4; stvx v2,r3,r14
436err4; stvx v1,r3,r15
437err4; stvx v0,r3,r16
438 addi r3,r3,128
439 bdnz 8b
440
441 ld r14,STK_REG(R14)(r1)
442 ld r15,STK_REG(R15)(r1)
443 ld r16,STK_REG(R16)(r1)
444
445
446 clrldi r5,r5,(64-7)
447 srdi r6,r5,4
448 mtocrf 0x01,r6
449
450 bf cr7*4+1,9f
451err3; lvx v3,0,r4
452err3; lvx v2,r4,r9
453err3; lvx v1,r4,r10
454err3; lvx v0,r4,r11
455 addi r4,r4,64
456err3; stvx v3,0,r3
457err3; stvx v2,r3,r9
458err3; stvx v1,r3,r10
459err3; stvx v0,r3,r11
460 addi r3,r3,64
461
4629: bf cr7*4+2,10f
463err3; lvx v1,0,r4
464err3; lvx v0,r4,r9
465 addi r4,r4,32
466err3; stvx v1,0,r3
467err3; stvx v0,r3,r9
468 addi r3,r3,32
469
47010: bf cr7*4+3,11f
471err3; lvx v1,0,r4
472 addi r4,r4,16
473err3; stvx v1,0,r3
474 addi r3,r3,16
475
476
47711: clrldi r5,r5,(64-4)
478 mtocrf 0x01,r5
479 bf cr7*4+0,12f
480err3; ld r0,0(r4)
481 addi r4,r4,8
482err3; std r0,0(r3)
483 addi r3,r3,8
484
48512: bf cr7*4+1,13f
486err3; lwz r0,0(r4)
487 addi r4,r4,4
488err3; stw r0,0(r3)
489 addi r3,r3,4
490
49113: bf cr7*4+2,14f
492err3; lhz r0,0(r4)
493 addi r4,r4,2
494err3; sth r0,0(r3)
495 addi r3,r3,2
496
49714: bf cr7*4+3,15f
498err3; lbz r0,0(r4)
499err3; stb r0,0(r3)
500
50115: addi r1,r1,STACKFRAMESIZE
502 b exit_vmx_usercopy
503
504.Lvmx_unaligned_copy:
505
506 neg r6,r3
507 mtocrf 0x01,r6
508 clrldi r6,r6,(64-4)
509
510 bf cr7*4+3,1f
511err3; lbz r0,0(r4)
512 addi r4,r4,1
513err3; stb r0,0(r3)
514 addi r3,r3,1
515
5161: bf cr7*4+2,2f
517err3; lhz r0,0(r4)
518 addi r4,r4,2
519err3; sth r0,0(r3)
520 addi r3,r3,2
521
5222: bf cr7*4+1,3f
523err3; lwz r0,0(r4)
524 addi r4,r4,4
525err3; stw r0,0(r3)
526 addi r3,r3,4
527
5283: bf cr7*4+0,4f
529err3; lwz r0,0(r4)
530err3; lwz r7,4(r4)
531 addi r4,r4,8
532err3; stw r0,0(r3)
533err3; stw r7,4(r3)
534 addi r3,r3,8
535
5364: sub r5,r5,r6
537
538
539 neg r6,r3
540 srdi r7,r6,4
541 mtocrf 0x01,r7
542 clrldi r6,r6,(64-7)
543
544 li r9,16
545 li r10,32
546 li r11,48
547
548 LVS(v16,0,r4)
549err3; lvx v0,0,r4
550 addi r4,r4,16
551
552 bf cr7*4+3,5f
553err3; lvx v1,0,r4
554 VPERM(v8,v0,v1,v16)
555 addi r4,r4,16
556err3; stvx v8,0,r3
557 addi r3,r3,16
558 vor v0,v1,v1
559
5605: bf cr7*4+2,6f
561err3; lvx v1,0,r4
562 VPERM(v8,v0,v1,v16)
563err3; lvx v0,r4,r9
564 VPERM(v9,v1,v0,v16)
565 addi r4,r4,32
566err3; stvx v8,0,r3
567err3; stvx v9,r3,r9
568 addi r3,r3,32
569
5706: bf cr7*4+1,7f
571err3; lvx v3,0,r4
572 VPERM(v8,v0,v3,v16)
573err3; lvx v2,r4,r9
574 VPERM(v9,v3,v2,v16)
575err3; lvx v1,r4,r10
576 VPERM(v10,v2,v1,v16)
577err3; lvx v0,r4,r11
578 VPERM(v11,v1,v0,v16)
579 addi r4,r4,64
580err3; stvx v8,0,r3
581err3; stvx v9,r3,r9
582err3; stvx v10,r3,r10
583err3; stvx v11,r3,r11
584 addi r3,r3,64
585
5867: sub r5,r5,r6
587 srdi r6,r5,7
588
589 std r14,STK_REG(R14)(r1)
590 std r15,STK_REG(R15)(r1)
591 std r16,STK_REG(R16)(r1)
592
593 li r12,64
594 li r14,80
595 li r15,96
596 li r16,112
597
598 mtctr r6
599
600
601
602
603
604 .align 5
6058:
606err4; lvx v7,0,r4
607 VPERM(v8,v0,v7,v16)
608err4; lvx v6,r4,r9
609 VPERM(v9,v7,v6,v16)
610err4; lvx v5,r4,r10
611 VPERM(v10,v6,v5,v16)
612err4; lvx v4,r4,r11
613 VPERM(v11,v5,v4,v16)
614err4; lvx v3,r4,r12
615 VPERM(v12,v4,v3,v16)
616err4; lvx v2,r4,r14
617 VPERM(v13,v3,v2,v16)
618err4; lvx v1,r4,r15
619 VPERM(v14,v2,v1,v16)
620err4; lvx v0,r4,r16
621 VPERM(v15,v1,v0,v16)
622 addi r4,r4,128
623err4; stvx v8,0,r3
624err4; stvx v9,r3,r9
625err4; stvx v10,r3,r10
626err4; stvx v11,r3,r11
627err4; stvx v12,r3,r12
628err4; stvx v13,r3,r14
629err4; stvx v14,r3,r15
630err4; stvx v15,r3,r16
631 addi r3,r3,128
632 bdnz 8b
633
634 ld r14,STK_REG(R14)(r1)
635 ld r15,STK_REG(R15)(r1)
636 ld r16,STK_REG(R16)(r1)
637
638
639 clrldi r5,r5,(64-7)
640 srdi r6,r5,4
641 mtocrf 0x01,r6
642
643 bf cr7*4+1,9f
644err3; lvx v3,0,r4
645 VPERM(v8,v0,v3,v16)
646err3; lvx v2,r4,r9
647 VPERM(v9,v3,v2,v16)
648err3; lvx v1,r4,r10
649 VPERM(v10,v2,v1,v16)
650err3; lvx v0,r4,r11
651 VPERM(v11,v1,v0,v16)
652 addi r4,r4,64
653err3; stvx v8,0,r3
654err3; stvx v9,r3,r9
655err3; stvx v10,r3,r10
656err3; stvx v11,r3,r11
657 addi r3,r3,64
658
6599: bf cr7*4+2,10f
660err3; lvx v1,0,r4
661 VPERM(v8,v0,v1,v16)
662err3; lvx v0,r4,r9
663 VPERM(v9,v1,v0,v16)
664 addi r4,r4,32
665err3; stvx v8,0,r3
666err3; stvx v9,r3,r9
667 addi r3,r3,32
668
66910: bf cr7*4+3,11f
670err3; lvx v1,0,r4
671 VPERM(v8,v0,v1,v16)
672 addi r4,r4,16
673err3; stvx v8,0,r3
674 addi r3,r3,16
675
676
67711: clrldi r5,r5,(64-4)
678 addi r4,r4,-16
679 mtocrf 0x01,r5
680 bf cr7*4+0,12f
681err3; lwz r0,0(r4)
682err3; lwz r6,4(r4)
683 addi r4,r4,8
684err3; stw r0,0(r3)
685err3; stw r6,4(r3)
686 addi r3,r3,8
687
68812: bf cr7*4+1,13f
689err3; lwz r0,0(r4)
690 addi r4,r4,4
691err3; stw r0,0(r3)
692 addi r3,r3,4
693
69413: bf cr7*4+2,14f
695err3; lhz r0,0(r4)
696 addi r4,r4,2
697err3; sth r0,0(r3)
698 addi r3,r3,2
699
70014: bf cr7*4+3,15f
701err3; lbz r0,0(r4)
702err3; stb r0,0(r3)
703
70415: addi r1,r1,STACKFRAMESIZE
705 b exit_vmx_usercopy
706#endif
707