linux/arch/powerpc/lib/copy_mc_64.S
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2/*
   3 * Copyright (C) IBM Corporation, 2011
   4 * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com>
   5 * Author - Balbir Singh <bsingharora@gmail.com>
   6 */
   7#include <asm/ppc_asm.h>
   8#include <asm/errno.h>
   9#include <asm/export.h>
  10
  11        .macro err1
  12100:
  13        EX_TABLE(100b,.Ldo_err1)
  14        .endm
  15
  16        .macro err2
  17200:
  18        EX_TABLE(200b,.Ldo_err2)
  19        .endm
  20
  21        .macro err3
  22300:    EX_TABLE(300b,.Ldone)
  23        .endm
  24
  25.Ldo_err2:
  26        ld      r22,STK_REG(R22)(r1)
  27        ld      r21,STK_REG(R21)(r1)
  28        ld      r20,STK_REG(R20)(r1)
  29        ld      r19,STK_REG(R19)(r1)
  30        ld      r18,STK_REG(R18)(r1)
  31        ld      r17,STK_REG(R17)(r1)
  32        ld      r16,STK_REG(R16)(r1)
  33        ld      r15,STK_REG(R15)(r1)
  34        ld      r14,STK_REG(R14)(r1)
  35        addi    r1,r1,STACKFRAMESIZE
  36.Ldo_err1:
  37        /* Do a byte by byte copy to get the exact remaining size */
  38        mtctr   r7
  3946:
  40err3;   lbz     r0,0(r4)
  41        addi    r4,r4,1
  42err3;   stb     r0,0(r3)
  43        addi    r3,r3,1
  44        bdnz    46b
  45        li      r3,0
  46        blr
  47
  48.Ldone:
  49        mfctr   r3
  50        blr
  51
  52
  53_GLOBAL(copy_mc_generic)
  54        mr      r7,r5
  55        cmpldi  r5,16
  56        blt     .Lshort_copy
  57
  58.Lcopy:
  59        /* Get the source 8B aligned */
  60        neg     r6,r4
  61        mtocrf  0x01,r6
  62        clrldi  r6,r6,(64-3)
  63
  64        bf      cr7*4+3,1f
  65err1;   lbz     r0,0(r4)
  66        addi    r4,r4,1
  67err1;   stb     r0,0(r3)
  68        addi    r3,r3,1
  69        subi    r7,r7,1
  70
  711:      bf      cr7*4+2,2f
  72err1;   lhz     r0,0(r4)
  73        addi    r4,r4,2
  74err1;   sth     r0,0(r3)
  75        addi    r3,r3,2
  76        subi    r7,r7,2
  77
  782:      bf      cr7*4+1,3f
  79err1;   lwz     r0,0(r4)
  80        addi    r4,r4,4
  81err1;   stw     r0,0(r3)
  82        addi    r3,r3,4
  83        subi    r7,r7,4
  84
  853:      sub     r5,r5,r6
  86        cmpldi  r5,128
  87
  88        mflr    r0
  89        stdu    r1,-STACKFRAMESIZE(r1)
  90        std     r14,STK_REG(R14)(r1)
  91        std     r15,STK_REG(R15)(r1)
  92        std     r16,STK_REG(R16)(r1)
  93        std     r17,STK_REG(R17)(r1)
  94        std     r18,STK_REG(R18)(r1)
  95        std     r19,STK_REG(R19)(r1)
  96        std     r20,STK_REG(R20)(r1)
  97        std     r21,STK_REG(R21)(r1)
  98        std     r22,STK_REG(R22)(r1)
  99        std     r0,STACKFRAMESIZE+16(r1)
 100
 101        blt     5f
 102        srdi    r6,r5,7
 103        mtctr   r6
 104
 105        /* Now do cacheline (128B) sized loads and stores. */
 106        .align  5
 1074:
 108err2;   ld      r0,0(r4)
 109err2;   ld      r6,8(r4)
 110err2;   ld      r8,16(r4)
 111err2;   ld      r9,24(r4)
 112err2;   ld      r10,32(r4)
 113err2;   ld      r11,40(r4)
 114err2;   ld      r12,48(r4)
 115err2;   ld      r14,56(r4)
 116err2;   ld      r15,64(r4)
 117err2;   ld      r16,72(r4)
 118err2;   ld      r17,80(r4)
 119err2;   ld      r18,88(r4)
 120err2;   ld      r19,96(r4)
 121err2;   ld      r20,104(r4)
 122err2;   ld      r21,112(r4)
 123err2;   ld      r22,120(r4)
 124        addi    r4,r4,128
 125err2;   std     r0,0(r3)
 126err2;   std     r6,8(r3)
 127err2;   std     r8,16(r3)
 128err2;   std     r9,24(r3)
 129err2;   std     r10,32(r3)
 130err2;   std     r11,40(r3)
 131err2;   std     r12,48(r3)
 132err2;   std     r14,56(r3)
 133err2;   std     r15,64(r3)
 134err2;   std     r16,72(r3)
 135err2;   std     r17,80(r3)
 136err2;   std     r18,88(r3)
 137err2;   std     r19,96(r3)
 138err2;   std     r20,104(r3)
 139err2;   std     r21,112(r3)
 140err2;   std     r22,120(r3)
 141        addi    r3,r3,128
 142        subi    r7,r7,128
 143        bdnz    4b
 144
 145        clrldi  r5,r5,(64-7)
 146
 147        /* Up to 127B to go */
 1485:      srdi    r6,r5,4
 149        mtocrf  0x01,r6
 150
 1516:      bf      cr7*4+1,7f
 152err2;   ld      r0,0(r4)
 153err2;   ld      r6,8(r4)
 154err2;   ld      r8,16(r4)
 155err2;   ld      r9,24(r4)
 156err2;   ld      r10,32(r4)
 157err2;   ld      r11,40(r4)
 158err2;   ld      r12,48(r4)
 159err2;   ld      r14,56(r4)
 160        addi    r4,r4,64
 161err2;   std     r0,0(r3)
 162err2;   std     r6,8(r3)
 163err2;   std     r8,16(r3)
 164err2;   std     r9,24(r3)
 165err2;   std     r10,32(r3)
 166err2;   std     r11,40(r3)
 167err2;   std     r12,48(r3)
 168err2;   std     r14,56(r3)
 169        addi    r3,r3,64
 170        subi    r7,r7,64
 171
 1727:      ld      r14,STK_REG(R14)(r1)
 173        ld      r15,STK_REG(R15)(r1)
 174        ld      r16,STK_REG(R16)(r1)
 175        ld      r17,STK_REG(R17)(r1)
 176        ld      r18,STK_REG(R18)(r1)
 177        ld      r19,STK_REG(R19)(r1)
 178        ld      r20,STK_REG(R20)(r1)
 179        ld      r21,STK_REG(R21)(r1)
 180        ld      r22,STK_REG(R22)(r1)
 181        addi    r1,r1,STACKFRAMESIZE
 182
 183        /* Up to 63B to go */
 184        bf      cr7*4+2,8f
 185err1;   ld      r0,0(r4)
 186err1;   ld      r6,8(r4)
 187err1;   ld      r8,16(r4)
 188err1;   ld      r9,24(r4)
 189        addi    r4,r4,32
 190err1;   std     r0,0(r3)
 191err1;   std     r6,8(r3)
 192err1;   std     r8,16(r3)
 193err1;   std     r9,24(r3)
 194        addi    r3,r3,32
 195        subi    r7,r7,32
 196
 197        /* Up to 31B to go */
 1988:      bf      cr7*4+3,9f
 199err1;   ld      r0,0(r4)
 200err1;   ld      r6,8(r4)
 201        addi    r4,r4,16
 202err1;   std     r0,0(r3)
 203err1;   std     r6,8(r3)
 204        addi    r3,r3,16
 205        subi    r7,r7,16
 206
 2079:      clrldi  r5,r5,(64-4)
 208
 209        /* Up to 15B to go */
 210.Lshort_copy:
 211        mtocrf  0x01,r5
 212        bf      cr7*4+0,12f
 213err1;   lwz     r0,0(r4)        /* Less chance of a reject with word ops */
 214err1;   lwz     r6,4(r4)
 215        addi    r4,r4,8
 216err1;   stw     r0,0(r3)
 217err1;   stw     r6,4(r3)
 218        addi    r3,r3,8
 219        subi    r7,r7,8
 220
 22112:     bf      cr7*4+1,13f
 222err1;   lwz     r0,0(r4)
 223        addi    r4,r4,4
 224err1;   stw     r0,0(r3)
 225        addi    r3,r3,4
 226        subi    r7,r7,4
 227
 22813:     bf      cr7*4+2,14f
 229err1;   lhz     r0,0(r4)
 230        addi    r4,r4,2
 231err1;   sth     r0,0(r3)
 232        addi    r3,r3,2
 233        subi    r7,r7,2
 234
 23514:     bf      cr7*4+3,15f
 236err1;   lbz     r0,0(r4)
 237err1;   stb     r0,0(r3)
 238
 23915:     li      r3,0
 240        blr
 241
 242EXPORT_SYMBOL_GPL(copy_mc_generic);
 243