1
2
3
4
5
6
7
8
9#include <asm/processor.h>
10#include <asm/ppc_asm.h>
11
12 .align 7
13_GLOBAL_TOC(memcpy)
14BEGIN_FTR_SECTION
15#ifdef __LITTLE_ENDIAN__
16 cmpdi cr7,r5,0
17#else
18 std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
19#endif
20FTR_SECTION_ELSE
21 b memcpy_power7
22ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
23#ifdef __LITTLE_ENDIAN__
24
25 addi r9,r3,-1
26 addi r4,r4,-1
27 beqlr cr7
28 mtctr r5
291: lbzu r10,1(r4)
30 stbu r10,1(r9)
31 bdnz 1b
32 blr
33#else
34 PPC_MTOCRF(0x01,r5)
35 cmpldi cr1,r5,16
36 neg r6,r3
37 andi. r6,r6,7
38 dcbt 0,r4
39 blt cr1,.Lshort_copy
40
41
42
43
44
45BEGIN_FTR_SECTION
46 nop
47FTR_SECTION_ELSE
48 bne .Ldst_unaligned
49ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
50 CPU_FTR_UNALIGNED_LD_STD)
51.Ldst_aligned:
52 addi r3,r3,-16
53BEGIN_FTR_SECTION
54 andi. r0,r4,7
55 bne .Lsrc_unaligned
56END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
57 srdi r7,r5,4
58 ld r9,0(r4)
59 addi r4,r4,-8
60 mtctr r7
61 andi. r5,r5,7
62 bf cr7*4+0,2f
63 addi r3,r3,8
64 addi r4,r4,8
65 mr r8,r9
66 blt cr1,3f
671: ld r9,8(r4)
68 std r8,8(r3)
692: ldu r8,16(r4)
70 stdu r9,16(r3)
71 bdnz 1b
723: std r8,8(r3)
73 beq 3f
74 addi r3,r3,16
75.Ldo_tail:
76 bf cr7*4+1,1f
77 lwz r9,8(r4)
78 addi r4,r4,4
79 stw r9,0(r3)
80 addi r3,r3,4
811: bf cr7*4+2,2f
82 lhz r9,8(r4)
83 addi r4,r4,2
84 sth r9,0(r3)
85 addi r3,r3,2
862: bf cr7*4+3,3f
87 lbz r9,8(r4)
88 stb r9,0(r3)
893: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
90 blr
91
92.Lsrc_unaligned:
93 srdi r6,r5,3
94 addi r5,r5,-16
95 subf r4,r0,r4
96 srdi r7,r5,4
97 sldi r10,r0,3
98 cmpdi cr6,r6,3
99 andi. r5,r5,7
100 mtctr r7
101 subfic r11,r10,64
102 add r5,r5,r0
103
104 bt cr7*4+0,0f
105
106 ld r9,0(r4)
107 ld r0,8(r4)
108 sld r6,r9,r10
109 ldu r9,16(r4)
110 srd r7,r0,r11
111 sld r8,r0,r10
112 or r7,r7,r6
113 blt cr6,4f
114 ld r0,8(r4)
115
116 b 2f
117
1180: ld r0,0(r4)
119 ldu r9,8(r4)
120 sld r8,r0,r10
121 addi r3,r3,-8
122 blt cr6,5f
123 ld r0,8(r4)
124 srd r12,r9,r11
125 sld r6,r9,r10
126 ldu r9,16(r4)
127 or r12,r8,r12
128 srd r7,r0,r11
129 sld r8,r0,r10
130 addi r3,r3,16
131 beq cr6,3f
132
133
1341: or r7,r7,r6
135 ld r0,8(r4)
136 std r12,8(r3)
1372: srd r12,r9,r11
138 sld r6,r9,r10
139 ldu r9,16(r4)
140 or r12,r8,r12
141 stdu r7,16(r3)
142 srd r7,r0,r11
143 sld r8,r0,r10
144 bdnz 1b
145
1463: std r12,8(r3)
147 or r7,r7,r6
1484: std r7,16(r3)
1495: srd r12,r9,r11
150 or r12,r8,r12
151 std r12,24(r3)
152 beq 4f
153 cmpwi cr1,r5,8
154 addi r3,r3,32
155 sld r9,r9,r10
156 ble cr1,6f
157 ld r0,8(r4)
158 srd r7,r0,r11
159 or r9,r7,r9
1606:
161 bf cr7*4+1,1f
162 rotldi r9,r9,32
163 stw r9,0(r3)
164 addi r3,r3,4
1651: bf cr7*4+2,2f
166 rotldi r9,r9,16
167 sth r9,0(r3)
168 addi r3,r3,2
1692: bf cr7*4+3,3f
170 rotldi r9,r9,8
171 stb r9,0(r3)
1723: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
173 blr
174
175.Ldst_unaligned:
176 PPC_MTOCRF(0x01,r6)
177 subf r5,r6,r5
178 li r7,0
179 cmpldi cr1,r5,16
180 bf cr7*4+3,1f
181 lbz r0,0(r4)
182 stb r0,0(r3)
183 addi r7,r7,1
1841: bf cr7*4+2,2f
185 lhzx r0,r7,r4
186 sthx r0,r7,r3
187 addi r7,r7,2
1882: bf cr7*4+1,3f
189 lwzx r0,r7,r4
190 stwx r0,r7,r3
1913: PPC_MTOCRF(0x01,r5)
192 add r4,r6,r4
193 add r3,r6,r3
194 b .Ldst_aligned
195
196.Lshort_copy:
197 bf cr7*4+0,1f
198 lwz r0,0(r4)
199 lwz r9,4(r4)
200 addi r4,r4,8
201 stw r0,0(r3)
202 stw r9,4(r3)
203 addi r3,r3,8
2041: bf cr7*4+1,2f
205 lwz r0,0(r4)
206 addi r4,r4,4
207 stw r0,0(r3)
208 addi r3,r3,4
2092: bf cr7*4+2,3f
210 lhz r0,0(r4)
211 addi r4,r4,2
212 sth r0,0(r3)
213 addi r3,r3,2
2143: bf cr7*4+3,4f
215 lbz r0,0(r4)
216 stb r0,0(r3)
2174: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
218 blr
219#endif
220