linux/arch/x86/lib/csum-copy_64.S
<<
>>
Prefs
   1/*
   2 * Copyright 2002,2003 Andi Kleen, SuSE Labs.
   3 *      
   4 * This file is subject to the terms and conditions of the GNU General Public
   5 * License.  See the file COPYING in the main directory of this archive
   6 * for more details. No warranty for anything given at all.
   7 */
   8#include <linux/linkage.h>
   9#include <asm/dwarf2.h>
  10#include <asm/errno.h>
  11
  12/*
  13 * Checksum copy with exception handling.
  14 * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the 
  15 * destination is zeroed.
  16 * 
  17 * Input
  18 * rdi  source
  19 * rsi  destination
  20 * edx  len (32bit)
  21 * ecx  sum (32bit) 
  22 * r8   src_err_ptr (int)
  23 * r9   dst_err_ptr (int)
  24 *
  25 * Output
  26 * eax  64bit sum. undefined in case of exception.
  27 * 
  28 * Wrappers need to take care of valid exception sum and zeroing.                
  29 * They also should align source or destination to 8 bytes.
  30 */
  31
  32        .macro source
  3310:
  34        .section __ex_table,"a"
  35        .align 8
  36        .quad 10b,.Lbad_source
  37        .previous
  38        .endm
  39                
  40        .macro dest
  4120:
  42        .section __ex_table,"a"
  43        .align 8
  44        .quad 20b,.Lbad_dest
  45        .previous
  46        .endm
  47                        
  48        .macro ignore L=.Lignore
  4930:
  50        .section __ex_table,"a"
  51        .align 8
  52        .quad 30b,\L
  53        .previous
  54        .endm
  55        
  56                                
  57ENTRY(csum_partial_copy_generic)
  58        CFI_STARTPROC
  59        cmpl     $3*64,%edx
  60        jle      .Lignore
  61
  62.Lignore:               
  63        subq  $7*8,%rsp
  64        CFI_ADJUST_CFA_OFFSET 7*8
  65        movq  %rbx,2*8(%rsp)
  66        CFI_REL_OFFSET rbx, 2*8
  67        movq  %r12,3*8(%rsp)
  68        CFI_REL_OFFSET r12, 3*8
  69        movq  %r14,4*8(%rsp)
  70        CFI_REL_OFFSET r14, 4*8
  71        movq  %r13,5*8(%rsp)
  72        CFI_REL_OFFSET r13, 5*8
  73        movq  %rbp,6*8(%rsp)
  74        CFI_REL_OFFSET rbp, 6*8
  75
  76        movq  %r8,(%rsp)
  77        movq  %r9,1*8(%rsp)
  78        
  79        movl  %ecx,%eax
  80        movl  %edx,%ecx
  81
  82        xorl  %r9d,%r9d
  83        movq  %rcx,%r12
  84
  85        shrq  $6,%r12
  86        jz    .Lhandle_tail       /* < 64 */
  87
  88        clc
  89        
  90        /* main loop. clear in 64 byte blocks */
  91        /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
  92        /* r11: temp3, rdx: temp4, r12 loopcnt */
  93        /* r10: temp5, rbp: temp6, r14 temp7, r13 temp8 */
  94        .p2align 4
  95.Lloop:
  96        source
  97        movq  (%rdi),%rbx
  98        source
  99        movq  8(%rdi),%r8
 100        source
 101        movq  16(%rdi),%r11
 102        source
 103        movq  24(%rdi),%rdx
 104
 105        source
 106        movq  32(%rdi),%r10
 107        source
 108        movq  40(%rdi),%rbp
 109        source
 110        movq  48(%rdi),%r14
 111        source
 112        movq  56(%rdi),%r13
 113                
 114        ignore 2f
 115        prefetcht0 5*64(%rdi)
 1162:                                                      
 117        adcq  %rbx,%rax
 118        adcq  %r8,%rax
 119        adcq  %r11,%rax
 120        adcq  %rdx,%rax
 121        adcq  %r10,%rax
 122        adcq  %rbp,%rax
 123        adcq  %r14,%rax
 124        adcq  %r13,%rax
 125
 126        decl %r12d
 127        
 128        dest
 129        movq %rbx,(%rsi)
 130        dest
 131        movq %r8,8(%rsi)
 132        dest
 133        movq %r11,16(%rsi)
 134        dest
 135        movq %rdx,24(%rsi)
 136
 137        dest
 138        movq %r10,32(%rsi)
 139        dest
 140        movq %rbp,40(%rsi)
 141        dest
 142        movq %r14,48(%rsi)
 143        dest
 144        movq %r13,56(%rsi)
 145        
 1463:
 147        
 148        leaq 64(%rdi),%rdi
 149        leaq 64(%rsi),%rsi
 150
 151        jnz   .Lloop
 152
 153        adcq  %r9,%rax
 154
 155        /* do last upto 56 bytes */
 156.Lhandle_tail:
 157        /* ecx: count */
 158        movl %ecx,%r10d
 159        andl $63,%ecx
 160        shrl $3,%ecx
 161        jz       .Lfold
 162        clc
 163        .p2align 4
 164.Lloop_8:       
 165        source
 166        movq (%rdi),%rbx
 167        adcq %rbx,%rax
 168        decl %ecx
 169        dest
 170        movq %rbx,(%rsi)
 171        leaq 8(%rsi),%rsi /* preserve carry */
 172        leaq 8(%rdi),%rdi
 173        jnz     .Lloop_8
 174        adcq %r9,%rax   /* add in carry */
 175
 176.Lfold:
 177        /* reduce checksum to 32bits */
 178        movl %eax,%ebx
 179        shrq $32,%rax
 180        addl %ebx,%eax
 181        adcl %r9d,%eax
 182
 183        /* do last upto 6 bytes */      
 184.Lhandle_7:
 185        movl %r10d,%ecx
 186        andl $7,%ecx
 187        shrl $1,%ecx
 188        jz   .Lhandle_1
 189        movl $2,%edx
 190        xorl %ebx,%ebx
 191        clc  
 192        .p2align 4
 193.Lloop_1:       
 194        source
 195        movw (%rdi),%bx
 196        adcl %ebx,%eax
 197        decl %ecx
 198        dest
 199        movw %bx,(%rsi)
 200        leaq 2(%rdi),%rdi
 201        leaq 2(%rsi),%rsi
 202        jnz .Lloop_1
 203        adcl %r9d,%eax  /* add in carry */
 204        
 205        /* handle last odd byte */
 206.Lhandle_1:
 207        testl $1,%r10d
 208        jz    .Lende
 209        xorl  %ebx,%ebx
 210        source
 211        movb (%rdi),%bl
 212        dest
 213        movb %bl,(%rsi)
 214        addl %ebx,%eax
 215        adcl %r9d,%eax          /* carry */
 216                        
 217        CFI_REMEMBER_STATE
 218.Lende:
 219        movq 2*8(%rsp),%rbx
 220        CFI_RESTORE rbx
 221        movq 3*8(%rsp),%r12
 222        CFI_RESTORE r12
 223        movq 4*8(%rsp),%r14
 224        CFI_RESTORE r14
 225        movq 5*8(%rsp),%r13
 226        CFI_RESTORE r13
 227        movq 6*8(%rsp),%rbp
 228        CFI_RESTORE rbp
 229        addq $7*8,%rsp
 230        CFI_ADJUST_CFA_OFFSET -7*8
 231        ret
 232        CFI_RESTORE_STATE
 233
 234        /* Exception handlers. Very simple, zeroing is done in the wrappers */
 235.Lbad_source:
 236        movq (%rsp),%rax
 237        testq %rax,%rax
 238        jz   .Lende
 239        movl $-EFAULT,(%rax)
 240        jmp  .Lende
 241        
 242.Lbad_dest:
 243        movq 8(%rsp),%rax
 244        testq %rax,%rax
 245        jz   .Lende     
 246        movl $-EFAULT,(%rax)
 247        jmp .Lende
 248        CFI_ENDPROC
 249ENDPROC(csum_partial_copy_generic)
 250