LXR linux/arch/sh/lib/checksum.S

   1/* SPDX-License-Identifier: GPL-2.0+
   2 *
   3 * $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $
   4 *
   5 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   6 *              operating system.  INET is implemented using the  BSD Socket
   7 *              interface as the means of communication with the user level.
   8 *
   9 *              IP/TCP/UDP checksumming routines
  10 *
  11 * Authors:     Jorge Cwik, <jorge@laser.satlink.net>
  12 *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  13 *              Tom May, <ftom@netcom.com>
  14 *              Pentium Pro/II routines:
  15 *              Alexander Kjeldaas <astor@guardian.no>
  16 *              Finn Arne Gangstad <finnag@guardian.no>
  17 *              Lots of code moved from tcp.c and ip.c; see those files
  18 *              for more names.
  19 *
  20 * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
  21 *                           handling.
  22 *              Andi Kleen,  add zeroing on error
  23 *                   converted to pure assembler
  24 *
  25 * SuperH version:  Copyright (C) 1999  Niibe Yutaka
  26 */
  27
  28#include <asm/errno.h>
  29#include <linux/linkage.h>
  30
  31/*
  32 * computes a partial checksum, e.g. for TCP/UDP fragments
  33 */
  34
  35/*      
  36 * asmlinkage __wsum csum_partial(const void *buf, int len, __wsum sum);
  37 */
  38
  39.text
  40ENTRY(csum_partial)
  41          /*
  42           * Experiments with Ethernet and SLIP connections show that buff
  43           * is aligned on either a 2-byte or 4-byte boundary.  We get at
  44           * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
  45           * Fortunately, it is easy to convert 2-byte alignment to 4-byte
  46           * alignment for the unrolled loop.
  47           */
  48        mov     r4, r0
  49        tst     #3, r0          ! Check alignment.
  50        bt/s    2f              ! Jump if alignment is ok.
  51         mov    r4, r7          ! Keep a copy to check for alignment
  52        !
  53        tst     #1, r0          ! Check alignment.
  54        bt      21f             ! Jump if alignment is boundary of 2bytes.
  55
  56        ! buf is odd
  57        tst     r5, r5
  58        add     #-1, r5
  59        bt      9f
  60        mov.b   @r4+, r0
  61        extu.b  r0, r0
  62        addc    r0, r6          ! t=0 from previous tst
  63        mov     r6, r0
  64        shll8   r6
  65        shlr16  r0
  66        shlr8   r0
  67        or      r0, r6
  68        mov     r4, r0
  69        tst     #2, r0
  70        bt      2f
  7121:
  72        ! buf is 2 byte aligned (len could be 0)
  73        add     #-2, r5         ! Alignment uses up two bytes.
  74        cmp/pz  r5              !
  75        bt/s    1f              ! Jump if we had at least two bytes.
  76         clrt
  77        bra     6f
  78         add    #2, r5          ! r5 was < 2.  Deal with it.
  791:
  80        mov.w   @r4+, r0
  81        extu.w  r0, r0
  82        addc    r0, r6
  83        bf      2f
  84        add     #1, r6
  852:
  86        ! buf is 4 byte aligned (len could be 0)
  87        mov     r5, r1
  88        mov     #-5, r0
  89        shld    r0, r1
  90        tst     r1, r1
  91        bt/s    4f              ! if it's =0, go to 4f
  92         clrt
  93        .align  2
  943:
  95        mov.l   @r4+, r0
  96        mov.l   @r4+, r2
  97        mov.l   @r4+, r3
  98        addc    r0, r6
  99        mov.l   @r4+, r0
 100        addc    r2, r6
 101        mov.l   @r4+, r2
 102        addc    r3, r6
 103        mov.l   @r4+, r3
 104        addc    r0, r6
 105        mov.l   @r4+, r0
 106        addc    r2, r6
 107        mov.l   @r4+, r2
 108        addc    r3, r6
 109        addc    r0, r6
 110        addc    r2, r6
 111        movt    r0
 112        dt      r1
 113        bf/s    3b
 114         cmp/eq #1, r0
 115        ! here, we know r1==0
 116        addc    r1, r6                  ! add carry to r6
 1174:
 118        mov     r5, r0
 119        and     #0x1c, r0
 120        tst     r0, r0
 121        bt      6f
 122        ! 4 bytes or more remaining
 123        mov     r0, r1
 124        shlr2   r1
 125        mov     #0, r2
 1265:
 127        addc    r2, r6
 128        mov.l   @r4+, r2
 129        movt    r0
 130        dt      r1
 131        bf/s    5b
 132         cmp/eq #1, r0
 133        addc    r2, r6
 134        addc    r1, r6          ! r1==0 here, so it means add carry-bit
 1356:
 136        ! 3 bytes or less remaining
 137        mov     #3, r0
 138        and     r0, r5
 139        tst     r5, r5
 140        bt      9f              ! if it's =0 go to 9f
 141        mov     #2, r1
 142        cmp/hs  r1, r5
 143        bf      7f
 144        mov.w   @r4+, r0
 145        extu.w  r0, r0
 146        cmp/eq  r1, r5
 147        bt/s    8f
 148         clrt
 149        shll16  r0
 150        addc    r0, r6
 1517:
 152        mov.b   @r4+, r0
 153        extu.b  r0, r0
 154#ifndef __LITTLE_ENDIAN__
 155        shll8   r0
 156#endif
 1578:
 158        addc    r0, r6
 159        mov     #0, r0
 160        addc    r0, r6
 1619:
 162        ! Check if the buffer was misaligned, if so realign sum
 163        mov     r7, r0
 164        tst     #1, r0
 165        bt      10f
 166        mov     r6, r0
 167        shll8   r6
 168        shlr16  r0
 169        shlr8   r0
 170        or      r0, r6
 17110:
 172        rts
 173         mov    r6, r0
 174
 175/*
 176unsigned int csum_partial_copy_generic (const char *src, char *dst, int len, 
 177                                        int sum, int *src_err_ptr, int *dst_err_ptr)
 178 */ 
 179
 180/*
 181 * Copy from ds while checksumming, otherwise like csum_partial
 182 *
 183 * The macros SRC and DST specify the type of access for the instruction.
 184 * thus we can call a custom exception handler for all access types.
 185 *
 186 * FIXME: could someone double-check whether I haven't mixed up some SRC and
 187 *        DST definitions? It's damn hard to trigger all cases.  I hope I got
 188 *        them all but there's no guarantee.
 189 */
 190
 191#define SRC(...)                        \
 192        9999: __VA_ARGS__ ;             \
 193        .section __ex_table, "a";       \
 194        .long 9999b, 6001f      ;       \
 195        .previous
 196
 197#define DST(...)                        \
 198        9999: __VA_ARGS__ ;             \
 199        .section __ex_table, "a";       \
 200        .long 9999b, 6002f      ;       \
 201        .previous
 202
 203!
 204! r4:   const char *SRC
 205! r5:   char *DST
 206! r6:   int LEN
 207! r7:   int SUM
 208!
 209! on stack:
 210! int *SRC_ERR_PTR
 211! int *DST_ERR_PTR
 212!
 213ENTRY(csum_partial_copy_generic)
 214        mov.l   r5,@-r15
 215        mov.l   r6,@-r15
 216
 217        mov     #3,r0           ! Check src and dest are equally aligned
 218        mov     r4,r1
 219        and     r0,r1
 220        and     r5,r0
 221        cmp/eq  r1,r0
 222        bf      3f              ! Different alignments, use slow version
 223        tst     #1,r0           ! Check dest word aligned
 224        bf      3f              ! If not, do it the slow way
 225
 226        mov     #2,r0
 227        tst     r0,r5           ! Check dest alignment. 
 228        bt      2f              ! Jump if alignment is ok.
 229        add     #-2,r6          ! Alignment uses up two bytes.
 230        cmp/pz  r6              ! Jump if we had at least two bytes.
 231        bt/s    1f
 232         clrt
 233        add     #2,r6           ! r6 was < 2.   Deal with it.
 234        bra     4f
 235         mov    r6,r2
 236
 2373:      ! Handle different src and dest alignments.
 238        ! This is not common, so simple byte by byte copy will do.
 239        mov     r6,r2
 240        shlr    r6
 241        tst     r6,r6
 242        bt      4f
 243        clrt
 244        .align  2
 2455:
 246SRC(    mov.b   @r4+,r1         )
 247SRC(    mov.b   @r4+,r0         )
 248        extu.b  r1,r1
 249DST(    mov.b   r1,@r5          )
 250DST(    mov.b   r0,@(1,r5)      )
 251        extu.b  r0,r0
 252        add     #2,r5
 253
 254#ifdef  __LITTLE_ENDIAN__
 255        shll8   r0
 256#else
 257        shll8   r1
 258#endif
 259        or      r1,r0
 260
 261        addc    r0,r7
 262        movt    r0
 263        dt      r6
 264        bf/s    5b
 265         cmp/eq #1,r0
 266        mov     #0,r0
 267        addc    r0, r7
 268
 269        mov     r2, r0
 270        tst     #1, r0
 271        bt      7f
 272        bra     5f
 273         clrt
 274
 275        ! src and dest equally aligned, but to a two byte boundary.
 276        ! Handle first two bytes as a special case
 277        .align  2
 2781:      
 279SRC(    mov.w   @r4+,r0         )
 280DST(    mov.w   r0,@r5          )
 281        add     #2,r5
 282        extu.w  r0,r0
 283        addc    r0,r7
 284        mov     #0,r0
 285        addc    r0,r7
 2862:
 287        mov     r6,r2
 288        mov     #-5,r0
 289        shld    r0,r6
 290        tst     r6,r6
 291        bt/s    2f
 292         clrt
 293        .align  2
 2941:      
 295SRC(    mov.l   @r4+,r0         )
 296SRC(    mov.l   @r4+,r1         )
 297        addc    r0,r7
 298DST(    mov.l   r0,@r5          )
 299DST(    mov.l   r1,@(4,r5)      )
 300        addc    r1,r7
 301
 302SRC(    mov.l   @r4+,r0         )
 303SRC(    mov.l   @r4+,r1         )
 304        addc    r0,r7
 305DST(    mov.l   r0,@(8,r5)      )
 306DST(    mov.l   r1,@(12,r5)     )
 307        addc    r1,r7
 308
 309SRC(    mov.l   @r4+,r0         )
 310SRC(    mov.l   @r4+,r1         )
 311        addc    r0,r7
 312DST(    mov.l   r0,@(16,r5)     )
 313DST(    mov.l   r1,@(20,r5)     )
 314        addc    r1,r7
 315
 316SRC(    mov.l   @r4+,r0         )
 317SRC(    mov.l   @r4+,r1         )
 318        addc    r0,r7
 319DST(    mov.l   r0,@(24,r5)     )
 320DST(    mov.l   r1,@(28,r5)     )
 321        addc    r1,r7
 322        add     #32,r5
 323        movt    r0
 324        dt      r6
 325        bf/s    1b
 326         cmp/eq #1,r0
 327        mov     #0,r0
 328        addc    r0,r7
 329
 3302:      mov     r2,r6
 331        mov     #0x1c,r0
 332        and     r0,r6
 333        cmp/pl  r6
 334        bf/s    4f
 335         clrt
 336        shlr2   r6
 3373:      
 338SRC(    mov.l   @r4+,r0 )
 339        addc    r0,r7
 340DST(    mov.l   r0,@r5  )
 341        add     #4,r5
 342        movt    r0
 343        dt      r6
 344        bf/s    3b
 345         cmp/eq #1,r0
 346        mov     #0,r0
 347        addc    r0,r7
 3484:      mov     r2,r6
 349        mov     #3,r0
 350        and     r0,r6
 351        cmp/pl  r6
 352        bf      7f
 353        mov     #2,r1
 354        cmp/hs  r1,r6
 355        bf      5f
 356SRC(    mov.w   @r4+,r0 )
 357DST(    mov.w   r0,@r5  )
 358        extu.w  r0,r0
 359        add     #2,r5
 360        cmp/eq  r1,r6
 361        bt/s    6f
 362         clrt
 363        shll16  r0
 364        addc    r0,r7
 3655:      
 366SRC(    mov.b   @r4+,r0 )
 367DST(    mov.b   r0,@r5  )
 368        extu.b  r0,r0
 369#ifndef __LITTLE_ENDIAN__
 370        shll8   r0
 371#endif
 3726:      addc    r0,r7
 373        mov     #0,r0
 374        addc    r0,r7
 3757:
 3765000:
 377
 378# Exception handler:
 379.section .fixup, "ax"                                                   
 380
 3816001:
 382        mov.l   @(8,r15),r0                     ! src_err_ptr
 383        mov     #-EFAULT,r1
 384        mov.l   r1,@r0
 385
 386        ! zero the complete destination - computing the rest
 387        ! is too much work 
 388        mov.l   @(4,r15),r5             ! dst
 389        mov.l   @r15,r6                 ! len
 390        mov     #0,r7
 3911:      mov.b   r7,@r5
 392        dt      r6
 393        bf/s    1b
 394         add    #1,r5
 395        mov.l   8000f,r0
 396        jmp     @r0
 397         nop
 398        .align  2
 3998000:   .long   5000b
 400
 4016002:
 402        mov.l   @(12,r15),r0                    ! dst_err_ptr
 403        mov     #-EFAULT,r1
 404        mov.l   r1,@r0
 405        mov.l   8001f,r0
 406        jmp     @r0
 407         nop
 408        .align  2
 4098001:   .long   5000b
 410
 411.previous
 412        add     #8,r15
 413        rts
 414         mov    r7,r0
 415