linux/arch/x86/lib/csum-copy_64.S
<<
>>
Prefs
   1/*
   2 * Copyright 2002, 2003 Andi Kleen, SuSE Labs.
   3 *
   4 * This file is subject to the terms and conditions of the GNU General Public
   5 * License.  See the file COPYING in the main directory of this archive
   6 * for more details. No warranty for anything given at all.
   7 */
   8#include <linux/linkage.h>
   9#include <asm/dwarf2.h>
  10#include <asm/errno.h>
  11#include <asm/asm.h>
  12
  13/*
  14 * Checksum copy with exception handling.
  15 * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
  16 * destination is zeroed.
  17 *
  18 * Input
  19 * rdi  source
  20 * rsi  destination
  21 * edx  len (32bit)
  22 * ecx  sum (32bit)
  23 * r8   src_err_ptr (int)
  24 * r9   dst_err_ptr (int)
  25 *
  26 * Output
  27 * eax  64bit sum. undefined in case of exception.
  28 *
  29 * Wrappers need to take care of valid exception sum and zeroing.
  30 * They also should align source or destination to 8 bytes.
  31 */
  32
  33        .macro source
  3410:
  35        _ASM_EXTABLE(10b, .Lbad_source)
  36        .endm
  37
  38        .macro dest
  3920:
  40        _ASM_EXTABLE(20b, .Lbad_dest)
  41        .endm
  42
  43        .macro ignore L=.Lignore
  4430:
  45        _ASM_EXTABLE(30b, \L)
  46        .endm
  47
  48
  49ENTRY(csum_partial_copy_generic)
  50        CFI_STARTPROC
  51        cmpl    $3*64, %edx
  52        jle     .Lignore
  53
  54.Lignore:
  55        subq  $7*8, %rsp
  56        CFI_ADJUST_CFA_OFFSET 7*8
  57        movq  %rbx, 2*8(%rsp)
  58        CFI_REL_OFFSET rbx, 2*8
  59        movq  %r12, 3*8(%rsp)
  60        CFI_REL_OFFSET r12, 3*8
  61        movq  %r14, 4*8(%rsp)
  62        CFI_REL_OFFSET r14, 4*8
  63        movq  %r13, 5*8(%rsp)
  64        CFI_REL_OFFSET r13, 5*8
  65        movq  %rbp, 6*8(%rsp)
  66        CFI_REL_OFFSET rbp, 6*8
  67
  68        movq  %r8, (%rsp)
  69        movq  %r9, 1*8(%rsp)
  70
  71        movl  %ecx, %eax
  72        movl  %edx, %ecx
  73
  74        xorl  %r9d, %r9d
  75        movq  %rcx, %r12
  76
  77        shrq  $6, %r12
  78        jz      .Lhandle_tail       /* < 64 */
  79
  80        clc
  81
  82        /* main loop. clear in 64 byte blocks */
  83        /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
  84        /* r11: temp3, rdx: temp4, r12 loopcnt */
  85        /* r10: temp5, rbp: temp6, r14 temp7, r13 temp8 */
  86        .p2align 4
  87.Lloop:
  88        source
  89        movq  (%rdi), %rbx
  90        source
  91        movq  8(%rdi), %r8
  92        source
  93        movq  16(%rdi), %r11
  94        source
  95        movq  24(%rdi), %rdx
  96
  97        source
  98        movq  32(%rdi), %r10
  99        source
 100        movq  40(%rdi), %rbp
 101        source
 102        movq  48(%rdi), %r14
 103        source
 104        movq  56(%rdi), %r13
 105
 106        ignore 2f
 107        prefetcht0 5*64(%rdi)
 1082:
 109        adcq  %rbx, %rax
 110        adcq  %r8, %rax
 111        adcq  %r11, %rax
 112        adcq  %rdx, %rax
 113        adcq  %r10, %rax
 114        adcq  %rbp, %rax
 115        adcq  %r14, %rax
 116        adcq  %r13, %rax
 117
 118        decl %r12d
 119
 120        dest
 121        movq %rbx, (%rsi)
 122        dest
 123        movq %r8, 8(%rsi)
 124        dest
 125        movq %r11, 16(%rsi)
 126        dest
 127        movq %rdx, 24(%rsi)
 128
 129        dest
 130        movq %r10, 32(%rsi)
 131        dest
 132        movq %rbp, 40(%rsi)
 133        dest
 134        movq %r14, 48(%rsi)
 135        dest
 136        movq %r13, 56(%rsi)
 137
 1383:
 139
 140        leaq 64(%rdi), %rdi
 141        leaq 64(%rsi), %rsi
 142
 143        jnz     .Lloop
 144
 145        adcq  %r9, %rax
 146
 147        /* do last up to 56 bytes */
 148.Lhandle_tail:
 149        /* ecx: count */
 150        movl %ecx, %r10d
 151        andl $63, %ecx
 152        shrl $3, %ecx
 153        jz      .Lfold
 154        clc
 155        .p2align 4
 156.Lloop_8:
 157        source
 158        movq (%rdi), %rbx
 159        adcq %rbx, %rax
 160        decl %ecx
 161        dest
 162        movq %rbx, (%rsi)
 163        leaq 8(%rsi), %rsi /* preserve carry */
 164        leaq 8(%rdi), %rdi
 165        jnz     .Lloop_8
 166        adcq %r9, %rax  /* add in carry */
 167
 168.Lfold:
 169        /* reduce checksum to 32bits */
 170        movl %eax, %ebx
 171        shrq $32, %rax
 172        addl %ebx, %eax
 173        adcl %r9d, %eax
 174
 175        /* do last up to 6 bytes */
 176.Lhandle_7:
 177        movl %r10d, %ecx
 178        andl $7, %ecx
 179        shrl $1, %ecx
 180        jz   .Lhandle_1
 181        movl $2, %edx
 182        xorl %ebx, %ebx
 183        clc
 184        .p2align 4
 185.Lloop_1:
 186        source
 187        movw (%rdi), %bx
 188        adcl %ebx, %eax
 189        decl %ecx
 190        dest
 191        movw %bx, (%rsi)
 192        leaq 2(%rdi), %rdi
 193        leaq 2(%rsi), %rsi
 194        jnz .Lloop_1
 195        adcl %r9d, %eax /* add in carry */
 196
 197        /* handle last odd byte */
 198.Lhandle_1:
 199        testb $1, %r10b
 200        jz    .Lende
 201        xorl  %ebx, %ebx
 202        source
 203        movb (%rdi), %bl
 204        dest
 205        movb %bl, (%rsi)
 206        addl %ebx, %eax
 207        adcl %r9d, %eax         /* carry */
 208
 209        CFI_REMEMBER_STATE
 210.Lende:
 211        movq 2*8(%rsp), %rbx
 212        CFI_RESTORE rbx
 213        movq 3*8(%rsp), %r12
 214        CFI_RESTORE r12
 215        movq 4*8(%rsp), %r14
 216        CFI_RESTORE r14
 217        movq 5*8(%rsp), %r13
 218        CFI_RESTORE r13
 219        movq 6*8(%rsp), %rbp
 220        CFI_RESTORE rbp
 221        addq $7*8, %rsp
 222        CFI_ADJUST_CFA_OFFSET -7*8
 223        ret
 224        CFI_RESTORE_STATE
 225
 226        /* Exception handlers. Very simple, zeroing is done in the wrappers */
 227.Lbad_source:
 228        movq (%rsp), %rax
 229        testq %rax, %rax
 230        jz   .Lende
 231        movl $-EFAULT, (%rax)
 232        jmp  .Lende
 233
 234.Lbad_dest:
 235        movq 8(%rsp), %rax
 236        testq %rax, %rax
 237        jz   .Lende
 238        movl $-EFAULT, (%rax)
 239        jmp .Lende
 240        CFI_ENDPROC
 241ENDPROC(csum_partial_copy_generic)
 242