linux/arch/x86/um/checksum_32.S
<<
>>
Prefs
   1/*
   2 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3 *              operating system.  INET is implemented using the  BSD Socket
   4 *              interface as the means of communication with the user level.
   5 *
   6 *              IP/TCP/UDP checksumming routines
   7 *
   8 * Authors:     Jorge Cwik, <jorge@laser.satlink.net>
   9 *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  10 *              Tom May, <ftom@netcom.com>
  11 *              Pentium Pro/II routines:
  12 *              Alexander Kjeldaas <astor@guardian.no>
  13 *              Finn Arne Gangstad <finnag@guardian.no>
  14 *              Lots of code moved from tcp.c and ip.c; see those files
  15 *              for more names.
  16 *
  17 * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
  18 *                           handling.
  19 *              Andi Kleen,  add zeroing on error
  20 *                   converted to pure assembler
  21 *
  22 *              This program is free software; you can redistribute it and/or
  23 *              modify it under the terms of the GNU General Public License
  24 *              as published by the Free Software Foundation; either version
  25 *              2 of the License, or (at your option) any later version.
  26 */
  27
  28#include <asm/errno.h>
  29#include <asm/asm.h>
  30                                
  31/*
  32 * computes a partial checksum, e.g. for TCP/UDP fragments
  33 */
  34
  35/*      
  36unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
  37 */
  38                
  39.text
  40.align 4
  41.globl csum_partial
  42                
  43#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
  44
  45          /*            
  46           * Experiments with Ethernet and SLIP connections show that buff
  47           * is aligned on either a 2-byte or 4-byte boundary.  We get at
  48           * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
  49           * Fortunately, it is easy to convert 2-byte alignment to 4-byte
  50           * alignment for the unrolled loop.
  51           */           
  52csum_partial:
  53        pushl %esi
  54        pushl %ebx
  55        movl 20(%esp),%eax      # Function arg: unsigned int sum
  56        movl 16(%esp),%ecx      # Function arg: int len
  57        movl 12(%esp),%esi      # Function arg: unsigned char *buff
  58        testl $2, %esi          # Check alignment.
  59        jz 2f                   # Jump if alignment is ok.
  60        subl $2, %ecx           # Alignment uses up two bytes.
  61        jae 1f                  # Jump if we had at least two bytes.
  62        addl $2, %ecx           # ecx was < 2.  Deal with it.
  63        jmp 4f
  641:      movw (%esi), %bx
  65        addl $2, %esi
  66        addw %bx, %ax
  67        adcl $0, %eax
  682:
  69        movl %ecx, %edx
  70        shrl $5, %ecx
  71        jz 2f
  72        testl %esi, %esi
  731:      movl (%esi), %ebx
  74        adcl %ebx, %eax
  75        movl 4(%esi), %ebx
  76        adcl %ebx, %eax
  77        movl 8(%esi), %ebx
  78        adcl %ebx, %eax
  79        movl 12(%esi), %ebx
  80        adcl %ebx, %eax
  81        movl 16(%esi), %ebx
  82        adcl %ebx, %eax
  83        movl 20(%esi), %ebx
  84        adcl %ebx, %eax
  85        movl 24(%esi), %ebx
  86        adcl %ebx, %eax
  87        movl 28(%esi), %ebx
  88        adcl %ebx, %eax
  89        lea 32(%esi), %esi
  90        dec %ecx
  91        jne 1b
  92        adcl $0, %eax
  932:      movl %edx, %ecx
  94        andl $0x1c, %edx
  95        je 4f
  96        shrl $2, %edx           # This clears CF
  973:      adcl (%esi), %eax
  98        lea 4(%esi), %esi
  99        dec %edx
 100        jne 3b
 101        adcl $0, %eax
 1024:      andl $3, %ecx
 103        jz 7f
 104        cmpl $2, %ecx
 105        jb 5f
 106        movw (%esi),%cx
 107        leal 2(%esi),%esi
 108        je 6f
 109        shll $16,%ecx
 1105:      movb (%esi),%cl
 1116:      addl %ecx,%eax
 112        adcl $0, %eax 
 1137:      
 114        popl %ebx
 115        popl %esi
 116        ret
 117
 118#else
 119
 120/* Version for PentiumII/PPro */
 121
 122csum_partial:
 123        pushl %esi
 124        pushl %ebx
 125        movl 20(%esp),%eax      # Function arg: unsigned int sum
 126        movl 16(%esp),%ecx      # Function arg: int len
 127        movl 12(%esp),%esi      # Function arg: const unsigned char *buf
 128
 129        testl $2, %esi         
 130        jnz 30f                 
 13110:
 132        movl %ecx, %edx
 133        movl %ecx, %ebx
 134        andl $0x7c, %ebx
 135        shrl $7, %ecx
 136        addl %ebx,%esi
 137        shrl $2, %ebx  
 138        negl %ebx
 139        lea 45f(%ebx,%ebx,2), %ebx
 140        testl %esi, %esi
 141        jmp *%ebx
 142
 143        # Handle 2-byte-aligned regions
 14420:     addw (%esi), %ax
 145        lea 2(%esi), %esi
 146        adcl $0, %eax
 147        jmp 10b
 148
 14930:     subl $2, %ecx          
 150        ja 20b                 
 151        je 32f
 152        movzbl (%esi),%ebx      # csumming 1 byte, 2-aligned
 153        addl %ebx, %eax
 154        adcl $0, %eax
 155        jmp 80f
 15632:
 157        addw (%esi), %ax        # csumming 2 bytes, 2-aligned
 158        adcl $0, %eax
 159        jmp 80f
 160
 16140: 
 162        addl -128(%esi), %eax
 163        adcl -124(%esi), %eax
 164        adcl -120(%esi), %eax
 165        adcl -116(%esi), %eax   
 166        adcl -112(%esi), %eax   
 167        adcl -108(%esi), %eax
 168        adcl -104(%esi), %eax
 169        adcl -100(%esi), %eax
 170        adcl -96(%esi), %eax
 171        adcl -92(%esi), %eax
 172        adcl -88(%esi), %eax
 173        adcl -84(%esi), %eax
 174        adcl -80(%esi), %eax
 175        adcl -76(%esi), %eax
 176        adcl -72(%esi), %eax
 177        adcl -68(%esi), %eax
 178        adcl -64(%esi), %eax     
 179        adcl -60(%esi), %eax     
 180        adcl -56(%esi), %eax     
 181        adcl -52(%esi), %eax   
 182        adcl -48(%esi), %eax   
 183        adcl -44(%esi), %eax
 184        adcl -40(%esi), %eax
 185        adcl -36(%esi), %eax
 186        adcl -32(%esi), %eax
 187        adcl -28(%esi), %eax
 188        adcl -24(%esi), %eax
 189        adcl -20(%esi), %eax
 190        adcl -16(%esi), %eax
 191        adcl -12(%esi), %eax
 192        adcl -8(%esi), %eax
 193        adcl -4(%esi), %eax
 19445:
 195        lea 128(%esi), %esi
 196        adcl $0, %eax
 197        dec %ecx
 198        jge 40b
 199        movl %edx, %ecx
 20050:     andl $3, %ecx
 201        jz 80f
 202
 203        # Handle the last 1-3 bytes without jumping
 204        notl %ecx               # 1->2, 2->1, 3->0, higher bits are masked
 205        movl $0xffffff,%ebx     # by the shll and shrl instructions
 206        shll $3,%ecx
 207        shrl %cl,%ebx
 208        andl -128(%esi),%ebx    # esi is 4-aligned so should be ok
 209        addl %ebx,%eax
 210        adcl $0,%eax
 21180: 
 212        popl %ebx
 213        popl %esi
 214        ret
 215                                
 216#endif
 217