linux/arch/x86/lib/checksum_32.S
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0-or-later */
   2/*
   3 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   4 *              operating system.  INET is implemented using the  BSD Socket
   5 *              interface as the means of communication with the user level.
   6 *
   7 *              IP/TCP/UDP checksumming routines
   8 *
   9 * Authors:     Jorge Cwik, <jorge@laser.satlink.net>
  10 *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  11 *              Tom May, <ftom@netcom.com>
  12 *              Pentium Pro/II routines:
  13 *              Alexander Kjeldaas <astor@guardian.no>
  14 *              Finn Arne Gangstad <finnag@guardian.no>
  15 *              Lots of code moved from tcp.c and ip.c; see those files
  16 *              for more names.
  17 *
  18 * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
  19 *                           handling.
  20 *              Andi Kleen,  add zeroing on error
  21 *                   converted to pure assembler
  22 */
  23
  24#include <linux/linkage.h>
  25#include <asm/errno.h>
  26#include <asm/asm.h>
  27#include <asm/export.h>
  28#include <asm/nospec-branch.h>
  29
  30/*
  31 * computes a partial checksum, e.g. for TCP/UDP fragments
  32 */
  33
  34/*      
  35unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
  36 */
  37                
  38.text
  39                
  40#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
  41
  42          /*            
  43           * Experiments with Ethernet and SLIP connections show that buff
  44           * is aligned on either a 2-byte or 4-byte boundary.  We get at
  45           * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
  46           * Fortunately, it is easy to convert 2-byte alignment to 4-byte
  47           * alignment for the unrolled loop.
  48           */           
  49SYM_FUNC_START(csum_partial)
  50        pushl %esi
  51        pushl %ebx
  52        movl 20(%esp),%eax      # Function arg: unsigned int sum
  53        movl 16(%esp),%ecx      # Function arg: int len
  54        movl 12(%esp),%esi      # Function arg: unsigned char *buff
  55        testl $3, %esi          # Check alignment.
  56        jz 2f                   # Jump if alignment is ok.
  57        testl $1, %esi          # Check alignment.
  58        jz 10f                  # Jump if alignment is boundary of 2 bytes.
  59
  60        # buf is odd
  61        dec %ecx
  62        jl 8f
  63        movzbl (%esi), %ebx
  64        adcl %ebx, %eax
  65        roll $8, %eax
  66        inc %esi
  67        testl $2, %esi
  68        jz 2f
  6910:
  70        subl $2, %ecx           # Alignment uses up two bytes.
  71        jae 1f                  # Jump if we had at least two bytes.
  72        addl $2, %ecx           # ecx was < 2.  Deal with it.
  73        jmp 4f
  741:      movw (%esi), %bx
  75        addl $2, %esi
  76        addw %bx, %ax
  77        adcl $0, %eax
  782:
  79        movl %ecx, %edx
  80        shrl $5, %ecx
  81        jz 2f
  82        testl %esi, %esi
  831:      movl (%esi), %ebx
  84        adcl %ebx, %eax
  85        movl 4(%esi), %ebx
  86        adcl %ebx, %eax
  87        movl 8(%esi), %ebx
  88        adcl %ebx, %eax
  89        movl 12(%esi), %ebx
  90        adcl %ebx, %eax
  91        movl 16(%esi), %ebx
  92        adcl %ebx, %eax
  93        movl 20(%esi), %ebx
  94        adcl %ebx, %eax
  95        movl 24(%esi), %ebx
  96        adcl %ebx, %eax
  97        movl 28(%esi), %ebx
  98        adcl %ebx, %eax
  99        lea 32(%esi), %esi
 100        dec %ecx
 101        jne 1b
 102        adcl $0, %eax
 1032:      movl %edx, %ecx
 104        andl $0x1c, %edx
 105        je 4f
 106        shrl $2, %edx           # This clears CF
 1073:      adcl (%esi), %eax
 108        lea 4(%esi), %esi
 109        dec %edx
 110        jne 3b
 111        adcl $0, %eax
 1124:      andl $3, %ecx
 113        jz 7f
 114        cmpl $2, %ecx
 115        jb 5f
 116        movw (%esi),%cx
 117        leal 2(%esi),%esi
 118        je 6f
 119        shll $16,%ecx
 1205:      movb (%esi),%cl
 1216:      addl %ecx,%eax
 122        adcl $0, %eax 
 1237:      
 124        testb $1, 12(%esp)
 125        jz 8f
 126        roll $8, %eax
 1278:
 128        popl %ebx
 129        popl %esi
 130        ret
 131SYM_FUNC_END(csum_partial)
 132
 133#else
 134
 135/* Version for PentiumII/PPro */
 136
 137SYM_FUNC_START(csum_partial)
 138        pushl %esi
 139        pushl %ebx
 140        movl 20(%esp),%eax      # Function arg: unsigned int sum
 141        movl 16(%esp),%ecx      # Function arg: int len
 142        movl 12(%esp),%esi      # Function arg: const unsigned char *buf
 143
 144        testl $3, %esi         
 145        jnz 25f                 
 14610:
 147        movl %ecx, %edx
 148        movl %ecx, %ebx
 149        andl $0x7c, %ebx
 150        shrl $7, %ecx
 151        addl %ebx,%esi
 152        shrl $2, %ebx  
 153        negl %ebx
 154        lea 45f(%ebx,%ebx,2), %ebx
 155        testl %esi, %esi
 156        JMP_NOSPEC ebx
 157
 158        # Handle 2-byte-aligned regions
 15920:     addw (%esi), %ax
 160        lea 2(%esi), %esi
 161        adcl $0, %eax
 162        jmp 10b
 16325:
 164        testl $1, %esi         
 165        jz 30f                 
 166        # buf is odd
 167        dec %ecx
 168        jl 90f
 169        movzbl (%esi), %ebx
 170        addl %ebx, %eax
 171        adcl $0, %eax
 172        roll $8, %eax
 173        inc %esi
 174        testl $2, %esi
 175        jz 10b
 176
 17730:     subl $2, %ecx          
 178        ja 20b                 
 179        je 32f
 180        addl $2, %ecx
 181        jz 80f
 182        movzbl (%esi),%ebx      # csumming 1 byte, 2-aligned
 183        addl %ebx, %eax
 184        adcl $0, %eax
 185        jmp 80f
 18632:
 187        addw (%esi), %ax        # csumming 2 bytes, 2-aligned
 188        adcl $0, %eax
 189        jmp 80f
 190
 19140: 
 192        addl -128(%esi), %eax
 193        adcl -124(%esi), %eax
 194        adcl -120(%esi), %eax
 195        adcl -116(%esi), %eax   
 196        adcl -112(%esi), %eax   
 197        adcl -108(%esi), %eax
 198        adcl -104(%esi), %eax
 199        adcl -100(%esi), %eax
 200        adcl -96(%esi), %eax
 201        adcl -92(%esi), %eax
 202        adcl -88(%esi), %eax
 203        adcl -84(%esi), %eax
 204        adcl -80(%esi), %eax
 205        adcl -76(%esi), %eax
 206        adcl -72(%esi), %eax
 207        adcl -68(%esi), %eax
 208        adcl -64(%esi), %eax     
 209        adcl -60(%esi), %eax     
 210        adcl -56(%esi), %eax     
 211        adcl -52(%esi), %eax   
 212        adcl -48(%esi), %eax   
 213        adcl -44(%esi), %eax
 214        adcl -40(%esi), %eax
 215        adcl -36(%esi), %eax
 216        adcl -32(%esi), %eax
 217        adcl -28(%esi), %eax
 218        adcl -24(%esi), %eax
 219        adcl -20(%esi), %eax
 220        adcl -16(%esi), %eax
 221        adcl -12(%esi), %eax
 222        adcl -8(%esi), %eax
 223        adcl -4(%esi), %eax
 22445:
 225        lea 128(%esi), %esi
 226        adcl $0, %eax
 227        dec %ecx
 228        jge 40b
 229        movl %edx, %ecx
 23050:     andl $3, %ecx
 231        jz 80f
 232
 233        # Handle the last 1-3 bytes without jumping
 234        notl %ecx               # 1->2, 2->1, 3->0, higher bits are masked
 235        movl $0xffffff,%ebx     # by the shll and shrl instructions
 236        shll $3,%ecx
 237        shrl %cl,%ebx
 238        andl -128(%esi),%ebx    # esi is 4-aligned so should be ok
 239        addl %ebx,%eax
 240        adcl $0,%eax
 24180: 
 242        testb $1, 12(%esp)
 243        jz 90f
 244        roll $8, %eax
 24590: 
 246        popl %ebx
 247        popl %esi
 248        ret
 249SYM_FUNC_END(csum_partial)
 250                                
 251#endif
 252EXPORT_SYMBOL(csum_partial)
 253
 254/*
 255unsigned int csum_partial_copy_generic (const char *src, char *dst,
 256                                  int len)
 257 */ 
 258
 259/*
 260 * Copy from ds while checksumming, otherwise like csum_partial
 261 */
 262
 263#define EXC(y...)                       \
 264        9999: y;                        \
 265        _ASM_EXTABLE_UA(9999b, 6001f)
 266
 267#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
 268
 269#define ARGBASE 16              
 270#define FP              12
 271                
 272SYM_FUNC_START(csum_partial_copy_generic)
 273        subl  $4,%esp   
 274        pushl %edi
 275        pushl %esi
 276        pushl %ebx
 277        movl ARGBASE+12(%esp),%ecx      # len
 278        movl ARGBASE+4(%esp),%esi       # src
 279        movl ARGBASE+8(%esp),%edi       # dst
 280
 281        movl $-1, %eax                  # sum
 282        testl $2, %edi                  # Check alignment. 
 283        jz 2f                           # Jump if alignment is ok.
 284        subl $2, %ecx                   # Alignment uses up two bytes.
 285        jae 1f                          # Jump if we had at least two bytes.
 286        addl $2, %ecx                   # ecx was < 2.  Deal with it.
 287        jmp 4f
 288EXC(1:  movw (%esi), %bx        )
 289        addl $2, %esi
 290EXC(    movw %bx, (%edi)        )
 291        addl $2, %edi
 292        addw %bx, %ax   
 293        adcl $0, %eax
 2942:
 295        movl %ecx, FP(%esp)
 296        shrl $5, %ecx
 297        jz 2f
 298        testl %esi, %esi                # what's wrong with clc?
 299EXC(1:  movl (%esi), %ebx       )
 300EXC(    movl 4(%esi), %edx      )
 301        adcl %ebx, %eax
 302EXC(    movl %ebx, (%edi)       )
 303        adcl %edx, %eax
 304EXC(    movl %edx, 4(%edi)      )
 305
 306EXC(    movl 8(%esi), %ebx      )
 307EXC(    movl 12(%esi), %edx     )
 308        adcl %ebx, %eax
 309EXC(    movl %ebx, 8(%edi)      )
 310        adcl %edx, %eax
 311EXC(    movl %edx, 12(%edi)     )
 312
 313EXC(    movl 16(%esi), %ebx     )
 314EXC(    movl 20(%esi), %edx     )
 315        adcl %ebx, %eax
 316EXC(    movl %ebx, 16(%edi)     )
 317        adcl %edx, %eax
 318EXC(    movl %edx, 20(%edi)     )
 319
 320EXC(    movl 24(%esi), %ebx     )
 321EXC(    movl 28(%esi), %edx     )
 322        adcl %ebx, %eax
 323EXC(    movl %ebx, 24(%edi)     )
 324        adcl %edx, %eax
 325EXC(    movl %edx, 28(%edi)     )
 326
 327        lea 32(%esi), %esi
 328        lea 32(%edi), %edi
 329        dec %ecx
 330        jne 1b
 331        adcl $0, %eax
 3322:      movl FP(%esp), %edx
 333        movl %edx, %ecx
 334        andl $0x1c, %edx
 335        je 4f
 336        shrl $2, %edx                   # This clears CF
 337EXC(3:  movl (%esi), %ebx       )
 338        adcl %ebx, %eax
 339EXC(    movl %ebx, (%edi)       )
 340        lea 4(%esi), %esi
 341        lea 4(%edi), %edi
 342        dec %edx
 343        jne 3b
 344        adcl $0, %eax
 3454:      andl $3, %ecx
 346        jz 7f
 347        cmpl $2, %ecx
 348        jb 5f
 349EXC(    movw (%esi), %cx        )
 350        leal 2(%esi), %esi
 351EXC(    movw %cx, (%edi)        )
 352        leal 2(%edi), %edi
 353        je 6f
 354        shll $16,%ecx
 355EXC(5:  movb (%esi), %cl        )
 356EXC(    movb %cl, (%edi)        )
 3576:      addl %ecx, %eax
 358        adcl $0, %eax
 3597:
 360
 361# Exception handler:
 362.section .fixup, "ax"                                                   
 363
 3646001:
 365        xorl %eax, %eax
 366        jmp 7b
 367
 368.previous
 369
 370        popl %ebx
 371        popl %esi
 372        popl %edi
 373        popl %ecx                       # equivalent to addl $4,%esp
 374        ret     
 375SYM_FUNC_END(csum_partial_copy_generic)
 376
 377#else
 378
 379/* Version for PentiumII/PPro */
 380
 381#define ROUND1(x) \
 382        EXC(movl x(%esi), %ebx  )       ;       \
 383        addl %ebx, %eax                 ;       \
 384        EXC(movl %ebx, x(%edi)  )       ;
 385
 386#define ROUND(x) \
 387        EXC(movl x(%esi), %ebx  )       ;       \
 388        adcl %ebx, %eax                 ;       \
 389        EXC(movl %ebx, x(%edi)  )       ;
 390
 391#define ARGBASE 12
 392                
 393SYM_FUNC_START(csum_partial_copy_generic)
 394        pushl %ebx
 395        pushl %edi
 396        pushl %esi
 397        movl ARGBASE+4(%esp),%esi       #src
 398        movl ARGBASE+8(%esp),%edi       #dst    
 399        movl ARGBASE+12(%esp),%ecx      #len
 400        movl $-1, %eax                  #sum
 401#       movl %ecx, %edx  
 402        movl %ecx, %ebx  
 403        movl %esi, %edx
 404        shrl $6, %ecx     
 405        andl $0x3c, %ebx  
 406        negl %ebx
 407        subl %ebx, %esi  
 408        subl %ebx, %edi  
 409        lea  -1(%esi),%edx
 410        andl $-32,%edx
 411        lea 3f(%ebx,%ebx), %ebx
 412        testl %esi, %esi 
 413        JMP_NOSPEC ebx
 4141:      addl $64,%esi
 415        addl $64,%edi 
 416        EXC(movb -32(%edx),%bl) ; EXC(movb (%edx),%bl)
 417        ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52)    
 418        ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36)    
 419        ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20)    
 420        ROUND (-16) ROUND(-12) ROUND(-8)  ROUND(-4)     
 4213:      adcl $0,%eax
 422        addl $64, %edx
 423        dec %ecx
 424        jge 1b
 4254:      movl ARGBASE+12(%esp),%edx      #len
 426        andl $3, %edx
 427        jz 7f
 428        cmpl $2, %edx
 429        jb 5f
 430EXC(    movw (%esi), %dx         )
 431        leal 2(%esi), %esi
 432EXC(    movw %dx, (%edi)         )
 433        leal 2(%edi), %edi
 434        je 6f
 435        shll $16,%edx
 4365:
 437EXC(    movb (%esi), %dl         )
 438EXC(    movb %dl, (%edi)         )
 4396:      addl %edx, %eax
 440        adcl $0, %eax
 4417:
 442.section .fixup, "ax"
 4436001:   xorl %eax, %eax
 444        jmp  7b                 
 445.previous                               
 446
 447        popl %esi
 448        popl %edi
 449        popl %ebx
 450        ret
 451SYM_FUNC_END(csum_partial_copy_generic)
 452                                
 453#undef ROUND
 454#undef ROUND1           
 455                
 456#endif
 457EXPORT_SYMBOL(csum_partial_copy_generic)
 458