linux/arch/powerpc/lib/memcpy_64.S
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0-or-later */
   2/*
   3 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
   4 */
   5#include <asm/processor.h>
   6#include <asm/ppc_asm.h>
   7#include <asm/export.h>
   8#include <asm/asm-compat.h>
   9#include <asm/feature-fixups.h>
  10#include <asm/kasan.h>
  11
  12#ifndef SELFTEST_CASE
  13/* For big-endian, 0 == most CPUs, 1 == POWER6, 2 == Cell */
  14#define SELFTEST_CASE   0
  15#endif
  16
  17        .align  7
  18_GLOBAL_TOC_KASAN(memcpy)
  19BEGIN_FTR_SECTION
  20#ifdef __LITTLE_ENDIAN__
  21        cmpdi   cr7,r5,0
  22#else
  23        std     r3,-STACKFRAMESIZE+STK_REG(R31)(r1)     /* save destination pointer for return value */
  24#endif
  25FTR_SECTION_ELSE
  26#ifdef CONFIG_PPC_BOOK3S_64
  27        b       memcpy_power7
  28#endif
  29ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
  30#ifdef __LITTLE_ENDIAN__
  31        /* dumb little-endian memcpy that will get replaced at runtime */
  32        addi r9,r3,-1
  33        addi r4,r4,-1
  34        beqlr cr7
  35        mtctr r5
  361:      lbzu r10,1(r4)
  37        stbu r10,1(r9)
  38        bdnz 1b
  39        blr
  40#else
  41        PPC_MTOCRF(0x01,r5)
  42        cmpldi  cr1,r5,16
  43        neg     r6,r3           # LS 3 bits = # bytes to 8-byte dest bdry
  44        andi.   r6,r6,7
  45        dcbt    0,r4
  46        blt     cr1,.Lshort_copy
  47/* Below we want to nop out the bne if we're on a CPU that has the
  48   CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
  49   cleared.
  50   At the time of writing the only CPU that has this combination of bits
  51   set is Power6. */
  52test_feature = (SELFTEST_CASE == 1)
  53BEGIN_FTR_SECTION
  54        nop
  55FTR_SECTION_ELSE
  56        bne     .Ldst_unaligned
  57ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
  58                    CPU_FTR_UNALIGNED_LD_STD)
  59.Ldst_aligned:
  60        addi    r3,r3,-16
  61test_feature = (SELFTEST_CASE == 0)
  62BEGIN_FTR_SECTION
  63        andi.   r0,r4,7
  64        bne     .Lsrc_unaligned
  65END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
  66        srdi    r7,r5,4
  67        ld      r9,0(r4)
  68        addi    r4,r4,-8
  69        mtctr   r7
  70        andi.   r5,r5,7
  71        bf      cr7*4+0,2f
  72        addi    r3,r3,8
  73        addi    r4,r4,8
  74        mr      r8,r9
  75        blt     cr1,3f
  761:      ld      r9,8(r4)
  77        std     r8,8(r3)
  782:      ldu     r8,16(r4)
  79        stdu    r9,16(r3)
  80        bdnz    1b
  813:      std     r8,8(r3)
  82        beq     3f
  83        addi    r3,r3,16
  84.Ldo_tail:
  85        bf      cr7*4+1,1f
  86        lwz     r9,8(r4)
  87        addi    r4,r4,4
  88        stw     r9,0(r3)
  89        addi    r3,r3,4
  901:      bf      cr7*4+2,2f
  91        lhz     r9,8(r4)
  92        addi    r4,r4,2
  93        sth     r9,0(r3)
  94        addi    r3,r3,2
  952:      bf      cr7*4+3,3f
  96        lbz     r9,8(r4)
  97        stb     r9,0(r3)
  983:      ld      r3,-STACKFRAMESIZE+STK_REG(R31)(r1)     /* return dest pointer */
  99        blr
 100
 101.Lsrc_unaligned:
 102        srdi    r6,r5,3
 103        addi    r5,r5,-16
 104        subf    r4,r0,r4
 105        srdi    r7,r5,4
 106        sldi    r10,r0,3
 107        cmpdi   cr6,r6,3
 108        andi.   r5,r5,7
 109        mtctr   r7
 110        subfic  r11,r10,64
 111        add     r5,r5,r0
 112
 113        bt      cr7*4+0,0f
 114
 115        ld      r9,0(r4)        # 3+2n loads, 2+2n stores
 116        ld      r0,8(r4)
 117        sld     r6,r9,r10
 118        ldu     r9,16(r4)
 119        srd     r7,r0,r11
 120        sld     r8,r0,r10
 121        or      r7,r7,r6
 122        blt     cr6,4f
 123        ld      r0,8(r4)
 124        # s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12
 125        b       2f
 126
 1270:      ld      r0,0(r4)        # 4+2n loads, 3+2n stores
 128        ldu     r9,8(r4)
 129        sld     r8,r0,r10
 130        addi    r3,r3,-8
 131        blt     cr6,5f
 132        ld      r0,8(r4)
 133        srd     r12,r9,r11
 134        sld     r6,r9,r10
 135        ldu     r9,16(r4)
 136        or      r12,r8,r12
 137        srd     r7,r0,r11
 138        sld     r8,r0,r10
 139        addi    r3,r3,16
 140        beq     cr6,3f
 141
 142        # d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9
 1431:      or      r7,r7,r6
 144        ld      r0,8(r4)
 145        std     r12,8(r3)
 1462:      srd     r12,r9,r11
 147        sld     r6,r9,r10
 148        ldu     r9,16(r4)
 149        or      r12,r8,r12
 150        stdu    r7,16(r3)
 151        srd     r7,r0,r11
 152        sld     r8,r0,r10
 153        bdnz    1b
 154
 1553:      std     r12,8(r3)
 156        or      r7,r7,r6
 1574:      std     r7,16(r3)
 1585:      srd     r12,r9,r11
 159        or      r12,r8,r12
 160        std     r12,24(r3)
 161        beq     4f
 162        cmpwi   cr1,r5,8
 163        addi    r3,r3,32
 164        sld     r9,r9,r10
 165        ble     cr1,6f
 166        ld      r0,8(r4)
 167        srd     r7,r0,r11
 168        or      r9,r7,r9
 1696:
 170        bf      cr7*4+1,1f
 171        rotldi  r9,r9,32
 172        stw     r9,0(r3)
 173        addi    r3,r3,4
 1741:      bf      cr7*4+2,2f
 175        rotldi  r9,r9,16
 176        sth     r9,0(r3)
 177        addi    r3,r3,2
 1782:      bf      cr7*4+3,3f
 179        rotldi  r9,r9,8
 180        stb     r9,0(r3)
 1813:      ld      r3,-STACKFRAMESIZE+STK_REG(R31)(r1)     /* return dest pointer */
 182        blr
 183
 184.Ldst_unaligned:
 185        PPC_MTOCRF(0x01,r6)             # put #bytes to 8B bdry into cr7
 186        subf    r5,r6,r5
 187        li      r7,0
 188        cmpldi  cr1,r5,16
 189        bf      cr7*4+3,1f
 190        lbz     r0,0(r4)
 191        stb     r0,0(r3)
 192        addi    r7,r7,1
 1931:      bf      cr7*4+2,2f
 194        lhzx    r0,r7,r4
 195        sthx    r0,r7,r3
 196        addi    r7,r7,2
 1972:      bf      cr7*4+1,3f
 198        lwzx    r0,r7,r4
 199        stwx    r0,r7,r3
 2003:      PPC_MTOCRF(0x01,r5)
 201        add     r4,r6,r4
 202        add     r3,r6,r3
 203        b       .Ldst_aligned
 204
 205.Lshort_copy:
 206        bf      cr7*4+0,1f
 207        lwz     r0,0(r4)
 208        lwz     r9,4(r4)
 209        addi    r4,r4,8
 210        stw     r0,0(r3)
 211        stw     r9,4(r3)
 212        addi    r3,r3,8
 2131:      bf      cr7*4+1,2f
 214        lwz     r0,0(r4)
 215        addi    r4,r4,4
 216        stw     r0,0(r3)
 217        addi    r3,r3,4
 2182:      bf      cr7*4+2,3f
 219        lhz     r0,0(r4)
 220        addi    r4,r4,2
 221        sth     r0,0(r3)
 222        addi    r3,r3,2
 2233:      bf      cr7*4+3,4f
 224        lbz     r0,0(r4)
 225        stb     r0,0(r3)
 2264:      ld      r3,-STACKFRAMESIZE+STK_REG(R31)(r1)     /* return dest pointer */
 227        blr
 228#endif
 229EXPORT_SYMBOL(memcpy)
 230EXPORT_SYMBOL_KASAN(memcpy)
 231