linux/arch/x86/lib/memcpy_64.S
<<
>>
Prefs
   1/* Copyright 2002 Andi Kleen */
   2
   3#include <linux/linkage.h>
   4#include <asm/errno.h>
   5#include <asm/cpufeatures.h>
   6#include <asm/mcsafe_test.h>
   7#include <asm/alternative-asm.h>
   8#include <asm/export.h>
   9
  10/*
  11 * We build a jump to memcpy_orig by default which gets NOPped out on
  12 * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
  13 * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
  14 * to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
  15 */
  16
  17.weak memcpy
  18
  19/*
  20 * memcpy - Copy a memory block.
  21 *
  22 * Input:
  23 *  rdi destination
  24 *  rsi source
  25 *  rdx count
  26 *
  27 * Output:
  28 * rax original destination
  29 */
  30ENTRY(__memcpy)
  31ENTRY(memcpy)
  32        ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
  33                      "jmp memcpy_erms", X86_FEATURE_ERMS
  34
  35        movq %rdi, %rax
  36        movq %rdx, %rcx
  37        shrq $3, %rcx
  38        andl $7, %edx
  39        rep movsq
  40        movl %edx, %ecx
  41        rep movsb
  42        ret
  43ENDPROC(memcpy)
  44ENDPROC(__memcpy)
  45EXPORT_SYMBOL(memcpy)
  46EXPORT_SYMBOL(__memcpy)
  47
  48/*
  49 * memcpy_erms() - enhanced fast string memcpy. This is faster and
  50 * simpler than memcpy. Use memcpy_erms when possible.
  51 */
  52ENTRY(memcpy_erms)
  53        movq %rdi, %rax
  54        movq %rdx, %rcx
  55        rep movsb
  56        ret
  57ENDPROC(memcpy_erms)
  58
  59ENTRY(memcpy_orig)
  60        movq %rdi, %rax
  61
  62        cmpq $0x20, %rdx
  63        jb .Lhandle_tail
  64
  65        /*
  66         * We check whether memory false dependence could occur,
  67         * then jump to corresponding copy mode.
  68         */
  69        cmp  %dil, %sil
  70        jl .Lcopy_backward
  71        subq $0x20, %rdx
  72.Lcopy_forward_loop:
  73        subq $0x20,     %rdx
  74
  75        /*
  76         * Move in blocks of 4x8 bytes:
  77         */
  78        movq 0*8(%rsi), %r8
  79        movq 1*8(%rsi), %r9
  80        movq 2*8(%rsi), %r10
  81        movq 3*8(%rsi), %r11
  82        leaq 4*8(%rsi), %rsi
  83
  84        movq %r8,       0*8(%rdi)
  85        movq %r9,       1*8(%rdi)
  86        movq %r10,      2*8(%rdi)
  87        movq %r11,      3*8(%rdi)
  88        leaq 4*8(%rdi), %rdi
  89        jae  .Lcopy_forward_loop
  90        addl $0x20,     %edx
  91        jmp  .Lhandle_tail
  92
  93.Lcopy_backward:
  94        /*
  95         * Calculate copy position to tail.
  96         */
  97        addq %rdx,      %rsi
  98        addq %rdx,      %rdi
  99        subq $0x20,     %rdx
 100        /*
 101         * At most 3 ALU operations in one cycle,
 102         * so append NOPS in the same 16 bytes trunk.
 103         */
 104        .p2align 4
 105.Lcopy_backward_loop:
 106        subq $0x20,     %rdx
 107        movq -1*8(%rsi),        %r8
 108        movq -2*8(%rsi),        %r9
 109        movq -3*8(%rsi),        %r10
 110        movq -4*8(%rsi),        %r11
 111        leaq -4*8(%rsi),        %rsi
 112        movq %r8,               -1*8(%rdi)
 113        movq %r9,               -2*8(%rdi)
 114        movq %r10,              -3*8(%rdi)
 115        movq %r11,              -4*8(%rdi)
 116        leaq -4*8(%rdi),        %rdi
 117        jae  .Lcopy_backward_loop
 118
 119        /*
 120         * Calculate copy position to head.
 121         */
 122        addl $0x20,     %edx
 123        subq %rdx,      %rsi
 124        subq %rdx,      %rdi
 125.Lhandle_tail:
 126        cmpl $16,       %edx
 127        jb   .Lless_16bytes
 128
 129        /*
 130         * Move data from 16 bytes to 31 bytes.
 131         */
 132        movq 0*8(%rsi), %r8
 133        movq 1*8(%rsi), %r9
 134        movq -2*8(%rsi, %rdx),  %r10
 135        movq -1*8(%rsi, %rdx),  %r11
 136        movq %r8,       0*8(%rdi)
 137        movq %r9,       1*8(%rdi)
 138        movq %r10,      -2*8(%rdi, %rdx)
 139        movq %r11,      -1*8(%rdi, %rdx)
 140        retq
 141        .p2align 4
 142.Lless_16bytes:
 143        cmpl $8,        %edx
 144        jb   .Lless_8bytes
 145        /*
 146         * Move data from 8 bytes to 15 bytes.
 147         */
 148        movq 0*8(%rsi), %r8
 149        movq -1*8(%rsi, %rdx),  %r9
 150        movq %r8,       0*8(%rdi)
 151        movq %r9,       -1*8(%rdi, %rdx)
 152        retq
 153        .p2align 4
 154.Lless_8bytes:
 155        cmpl $4,        %edx
 156        jb   .Lless_3bytes
 157
 158        /*
 159         * Move data from 4 bytes to 7 bytes.
 160         */
 161        movl (%rsi), %ecx
 162        movl -4(%rsi, %rdx), %r8d
 163        movl %ecx, (%rdi)
 164        movl %r8d, -4(%rdi, %rdx)
 165        retq
 166        .p2align 4
 167.Lless_3bytes:
 168        subl $1, %edx
 169        jb .Lend
 170        /*
 171         * Move data from 1 bytes to 3 bytes.
 172         */
 173        movzbl (%rsi), %ecx
 174        jz .Lstore_1byte
 175        movzbq 1(%rsi), %r8
 176        movzbq (%rsi, %rdx), %r9
 177        movb %r8b, 1(%rdi)
 178        movb %r9b, (%rdi, %rdx)
 179.Lstore_1byte:
 180        movb %cl, (%rdi)
 181
 182.Lend:
 183        retq
 184ENDPROC(memcpy_orig)
 185
 186#ifndef CONFIG_UML
 187
 188MCSAFE_TEST_CTL
 189
 190/*
 191 * __memcpy_mcsafe - memory copy with machine check exception handling
 192 * Note that we only catch machine checks when reading the source addresses.
 193 * Writes to target are posted and don't generate machine checks.
 194 */
 195ENTRY(__memcpy_mcsafe)
 196        cmpl $8, %edx
 197        /* Less than 8 bytes? Go to byte copy loop */
 198        jb .L_no_whole_words
 199
 200        /* Check for bad alignment of source */
 201        testl $7, %esi
 202        /* Already aligned */
 203        jz .L_8byte_aligned
 204
 205        /* Copy one byte at a time until source is 8-byte aligned */
 206        movl %esi, %ecx
 207        andl $7, %ecx
 208        subl $8, %ecx
 209        negl %ecx
 210        subl %ecx, %edx
 211.L_read_leading_bytes:
 212        movb (%rsi), %al
 213        MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes
 214        MCSAFE_TEST_DST %rdi 1 .E_leading_bytes
 215.L_write_leading_bytes:
 216        movb %al, (%rdi)
 217        incq %rsi
 218        incq %rdi
 219        decl %ecx
 220        jnz .L_read_leading_bytes
 221
 222.L_8byte_aligned:
 223        movl %edx, %ecx
 224        andl $7, %edx
 225        shrl $3, %ecx
 226        jz .L_no_whole_words
 227
 228.L_read_words:
 229        movq (%rsi), %r8
 230        MCSAFE_TEST_SRC %rsi 8 .E_read_words
 231        MCSAFE_TEST_DST %rdi 8 .E_write_words
 232.L_write_words:
 233        movq %r8, (%rdi)
 234        addq $8, %rsi
 235        addq $8, %rdi
 236        decl %ecx
 237        jnz .L_read_words
 238
 239        /* Any trailing bytes? */
 240.L_no_whole_words:
 241        andl %edx, %edx
 242        jz .L_done_memcpy_trap
 243
 244        /* Copy trailing bytes */
 245        movl %edx, %ecx
 246.L_read_trailing_bytes:
 247        movb (%rsi), %al
 248        MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes
 249        MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes
 250.L_write_trailing_bytes:
 251        movb %al, (%rdi)
 252        incq %rsi
 253        incq %rdi
 254        decl %ecx
 255        jnz .L_read_trailing_bytes
 256
 257        /* Copy successful. Return zero */
 258.L_done_memcpy_trap:
 259        xorq %rax, %rax
 260        ret
 261ENDPROC(__memcpy_mcsafe)
 262EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
 263
 264        .section .fixup, "ax"
 265        /*
 266         * Return number of bytes not copied for any failure. Note that
 267         * there is no "tail" handling since the source buffer is 8-byte
 268         * aligned and poison is cacheline aligned.
 269         */
 270.E_read_words:
 271        shll    $3, %ecx
 272.E_leading_bytes:
 273        addl    %edx, %ecx
 274.E_trailing_bytes:
 275        mov     %ecx, %eax
 276        ret
 277
 278        /*
 279         * For write fault handling, given the destination is unaligned,
 280         * we handle faults on multi-byte writes with a byte-by-byte
 281         * copy up to the write-protected page.
 282         */
 283.E_write_words:
 284        shll    $3, %ecx
 285        addl    %edx, %ecx
 286        movl    %ecx, %edx
 287        jmp mcsafe_handle_tail
 288
 289        .previous
 290
 291        _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
 292        _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
 293        _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
 294        _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
 295        _ASM_EXTABLE(.L_write_words, .E_write_words)
 296        _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
 297#endif
 298