1
2
3
4
5
6
7
8 .globl csum_partial
9 .type csum_partial,@function
10csum_partial:
11
12 ;; r10 - src
13 ;; r11 - length
14 ;; r12 - checksum
15
16 ;; Optimized for large packets
17 subq 10*4, $r11
18 blt _word_loop
19 move.d $r11, $acr
20
21 subq 9*4,$sp
22 clearf c
23 movem $r8,[$sp]
24
25 ;; do a movem checksum
26
27_mloop: movem [$r10+],$r9 ; read 10 longwords
28 ;; Loop count without touching the c flag.
29 addoq -10*4, $acr, $acr
30 ;; perform dword checksumming on the 10 longwords
31
32 addc $r0,$r12
33 addc $r1,$r12
34 addc $r2,$r12
35 addc $r3,$r12
36 addc $r4,$r12
37 addc $r5,$r12
38 addc $r6,$r12
39 addc $r7,$r12
40 addc $r8,$r12
41 addc $r9,$r12
42
43 ;; test $acr without trashing carry.
44 move.d $acr, $acr
45 bpl _mloop
46 ;; r11 <= acr is not really needed in the mloop, just using the dslot
47 ;; to prepare for what is needed after mloop.
48 move.d $acr, $r11
49
50 ;; fold the last carry into r13
51 addc 0, $r12
52 movem [$sp+],$r8 ; restore regs
53
54_word_loop:
55 addq 10*4,$r11 ; compensate for last loop underflowing length
56
57 moveq -1,$r9 ; put 0xffff in r9, faster than move.d 0xffff,r9
58 lsrq 16,$r9
59
60 move.d $r12,$r13
61 lsrq 16,$r13 ; r13 = checksum >> 16
62 and.d $r9,$r12 ; checksum = checksum & 0xffff
63
64_no_fold:
65 subq 2,$r11
66 blt _no_words
67 add.d $r13,$r12 ; checksum += r13
68
69 ;; checksum the rest of the words
70_wloop: subq 2,$r11
71 bge _wloop
72 addu.w [$r10+],$r12
73
74_no_words:
75 addq 2,$r11
76 ;; see if we have one odd byte more
77 bne _do_byte
78 nop
79 ret
80 move.d $r12,$r10
81
82_do_byte:
83 ;; copy and checksum the last byte
84 addu.b [$r10],$r12
85 ret
86 move.d $r12,$r10
87
88 .size csum_partial, .-csum_partial
89