1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28#include <asm/errno.h>
29#include <linux/linkage.h>
30
31
32
33
34
35
36
37
38
39.text
40ENTRY(csum_partial)
41
42
43
44
45
46
47
48 mov r4, r0
49 tst
50 bt/s 2f ! Jump if alignment is ok.
51 mov r4, r7 ! Keep a copy to check for alignment
52 !
53 tst
54 bt 21f ! Jump if alignment is boundary of 2bytes.
55
56 ! buf is odd
57 tst r5, r5
58 add
59 bt 9f
60 mov.b @r4+, r0
61 extu.b r0, r0
62 addc r0, r6 ! t=0 from previous tst
63 mov r6, r0
64 shll8 r6
65 shlr16 r0
66 shlr8 r0
67 or r0, r6
68 mov r4, r0
69 tst
70 bt 2f
7121:
72 ! buf is 2 byte aligned (len could be 0)
73 add
74 cmp/pz r5 !
75 bt/s 1f ! Jump if we had at least two bytes.
76 clrt
77 bra 6f
78 add
791:
80 mov.w @r4+, r0
81 extu.w r0, r0
82 addc r0, r6
83 bf 2f
84 add
852:
86 ! buf is 4 byte aligned (len could be 0)
87 mov r5, r1
88 mov
89 shld r0, r1
90 tst r1, r1
91 bt/s 4f ! if it's =0, go to 4f
92 clrt
93 .align 2
943:
95 mov.l @r4+, r0
96 mov.l @r4+, r2
97 mov.l @r4+, r3
98 addc r0, r6
99 mov.l @r4+, r0
100 addc r2, r6
101 mov.l @r4+, r2
102 addc r3, r6
103 mov.l @r4+, r3
104 addc r0, r6
105 mov.l @r4+, r0
106 addc r2, r6
107 mov.l @r4+, r2
108 addc r3, r6
109 addc r0, r6
110 addc r2, r6
111 movt r0
112 dt r1
113 bf/s 3b
114 cmp/eq
115 ! here, we know r1==0
116 addc r1, r6 ! add carry to r6
1174:
118 mov r5, r0
119 and
120 tst r0, r0
121 bt 6f
122 ! 4 bytes or more remaining
123 mov r0, r1
124 shlr2 r1
125 mov
1265:
127 addc r2, r6
128 mov.l @r4+, r2
129 movt r0
130 dt r1
131 bf/s 5b
132 cmp/eq
133 addc r2, r6
134 addc r1, r6 ! r1==0 here, so it means add carry-bit
1356:
136 ! 3 bytes or less remaining
137 mov
138 and r0, r5
139 tst r5, r5
140 bt 9f ! if it's =0 go to 9f
141 mov
142 cmp/hs r1, r5
143 bf 7f
144 mov.w @r4+, r0
145 extu.w r0, r0
146 cmp/eq r1, r5
147 bt/s 8f
148 clrt
149 shll16 r0
150 addc r0, r6
1517:
152 mov.b @r4+, r0
153 extu.b r0, r0
154#ifndef __LITTLE_ENDIAN__
155 shll8 r0
156#endif
1578:
158 addc r0, r6
159 mov
160 addc r0, r6
1619:
162 ! Check if the buffer was misaligned, if so realign sum
163 mov r7, r0
164 tst
165 bt 10f
166 mov r6, r0
167 shll8 r6
168 shlr16 r0
169 shlr8 r0
170 or r0, r6
17110:
172 rts
173 mov r6, r0
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191#define SRC(...) \
192 9999: __VA_ARGS__ ; \
193 .section __ex_table, "a"; \
194 .long 9999b, 6001f ; \
195 .previous
196
197#define DST(...) \
198 9999: __VA_ARGS__ ; \
199 .section __ex_table, "a"; \
200 .long 9999b, 6002f ; \
201 .previous
202
203!
204! r4: const char *SRC
205! r5: char *DST
206! r6: int LEN
207! r7: int SUM
208!
209! on stack:
210! int *SRC_ERR_PTR
211! int *DST_ERR_PTR
212!
213ENTRY(csum_partial_copy_generic)
214 mov.l r5,@-r15
215 mov.l r6,@-r15
216
217 mov
218 mov r4,r1
219 and r0,r1
220 and r5,r0
221 cmp/eq r1,r0
222 bf 3f ! Different alignments, use slow version
223 tst
224 bf 3f ! If not, do it the slow way
225
226 mov
227 tst r0,r5 ! Check dest alignment.
228 bt 2f ! Jump if alignment is ok.
229 add
230 cmp/pz r6 ! Jump if we had at least two bytes.
231 bt/s 1f
232 clrt
233 add
234 bra 4f
235 mov r6,r2
236
2373: ! Handle different src and dest alignments.
238 ! This is not common, so simple byte by byte copy will do.
239 mov r6,r2
240 shlr r6
241 tst r6,r6
242 bt 4f
243 clrt
244 .align 2
2455:
246SRC( mov.b @r4+,r1 )
247SRC( mov.b @r4+,r0 )
248 extu.b r1,r1
249DST( mov.b r1,@r5 )
250DST( mov.b r0,@(1,r5) )
251 extu.b r0,r0
252 add
253
254#ifdef __LITTLE_ENDIAN__
255 shll8 r0
256#else
257 shll8 r1
258#endif
259 or r1,r0
260
261 addc r0,r7
262 movt r0
263 dt r6
264 bf/s 5b
265 cmp/eq
266 mov
267 addc r0, r7
268
269 mov r2, r0
270 tst
271 bt 7f
272 bra 5f
273 clrt
274
275 ! src and dest equally aligned, but to a two byte boundary.
276 ! Handle first two bytes as a special case
277 .align 2
2781:
279SRC( mov.w @r4+,r0 )
280DST( mov.w r0,@r5 )
281 add
282 extu.w r0,r0
283 addc r0,r7
284 mov
285 addc r0,r7
2862:
287 mov r6,r2
288 mov
289 shld r0,r6
290 tst r6,r6
291 bt/s 2f
292 clrt
293 .align 2
2941:
295SRC( mov.l @r4+,r0 )
296SRC( mov.l @r4+,r1 )
297 addc r0,r7
298DST( mov.l r0,@r5 )
299DST( mov.l r1,@(4,r5) )
300 addc r1,r7
301
302SRC( mov.l @r4+,r0 )
303SRC( mov.l @r4+,r1 )
304 addc r0,r7
305DST( mov.l r0,@(8,r5) )
306DST( mov.l r1,@(12,r5) )
307 addc r1,r7
308
309SRC( mov.l @r4+,r0 )
310SRC( mov.l @r4+,r1 )
311 addc r0,r7
312DST( mov.l r0,@(16,r5) )
313DST( mov.l r1,@(20,r5) )
314 addc r1,r7
315
316SRC( mov.l @r4+,r0 )
317SRC( mov.l @r4+,r1 )
318 addc r0,r7
319DST( mov.l r0,@(24,r5) )
320DST( mov.l r1,@(28,r5) )
321 addc r1,r7
322 add
323 movt r0
324 dt r6
325 bf/s 1b
326 cmp/eq
327 mov
328 addc r0,r7
329
3302: mov r2,r6
331 mov
332 and r0,r6
333 cmp/pl r6
334 bf/s 4f
335 clrt
336 shlr2 r6
3373:
338SRC( mov.l @r4+,r0 )
339 addc r0,r7
340DST( mov.l r0,@r5 )
341 add
342 movt r0
343 dt r6
344 bf/s 3b
345 cmp/eq
346 mov
347 addc r0,r7
3484: mov r2,r6
349 mov
350 and r0,r6
351 cmp/pl r6
352 bf 7f
353 mov
354 cmp/hs r1,r6
355 bf 5f
356SRC( mov.w @r4+,r0 )
357DST( mov.w r0,@r5 )
358 extu.w r0,r0
359 add
360 cmp/eq r1,r6
361 bt/s 6f
362 clrt
363 shll16 r0
364 addc r0,r7
3655:
366SRC( mov.b @r4+,r0 )
367DST( mov.b r0,@r5 )
368 extu.b r0,r0
369#ifndef __LITTLE_ENDIAN__
370 shll8 r0
371#endif
3726: addc r0,r7
373 mov
374 addc r0,r7
3757:
3765000:
377
378
379.section .fixup, "ax"
380
3816001:
382 mov.l @(8,r15),r0 ! src_err_ptr
383 mov
384 mov.l r1,@r0
385
386 ! zero the complete destination - computing the rest
387 ! is too much work
388 mov.l @(4,r15),r5 ! dst
389 mov.l @r15,r6 ! len
390 mov
3911: mov.b r7,@r5
392 dt r6
393 bf/s 1b
394 add
395 mov.l 8000f,r0
396 jmp @r0
397 nop
398 .align 2
3998000: .long 5000b
400
4016002:
402 mov.l @(12,r15),r0 ! dst_err_ptr
403 mov
404 mov.l r1,@r0
405 mov.l 8001f,r0
406 jmp @r0
407 nop
408 .align 2
4098001: .long 5000b
410
411.previous
412 add
413 rts
414 mov r7,r0
415