linux/arch/powerpc/lib/copyuser_64.S
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of the GNU General Public License
   6 * as published by the Free Software Foundation; either version
   7 * 2 of the License, or (at your option) any later version.
   8 */
   9#include <asm/processor.h>
  10#include <asm/ppc_asm.h>
  11#include <asm/export.h>
  12
  13#ifdef __BIG_ENDIAN__
  14#define sLd sld         /* Shift towards low-numbered address. */
  15#define sHd srd         /* Shift towards high-numbered address. */
  16#else
  17#define sLd srd         /* Shift towards low-numbered address. */
  18#define sHd sld         /* Shift towards high-numbered address. */
  19#endif
  20
  21        .align  7
  22_GLOBAL_TOC(__copy_tofrom_user)
  23BEGIN_FTR_SECTION
  24        nop
  25FTR_SECTION_ELSE
  26        b       __copy_tofrom_user_power7
  27ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
  28_GLOBAL(__copy_tofrom_user_base)
  29        /* first check for a whole page copy on a page boundary */
  30        cmpldi  cr1,r5,16
  31        cmpdi   cr6,r5,4096
  32        or      r0,r3,r4
  33        neg     r6,r3           /* LS 3 bits = # bytes to 8-byte dest bdry */
  34        andi.   r0,r0,4095
  35        std     r3,-24(r1)
  36        crand   cr0*4+2,cr0*4+2,cr6*4+2
  37        std     r4,-16(r1)
  38        std     r5,-8(r1)
  39        dcbt    0,r4
  40        beq     .Lcopy_page_4K
  41        andi.   r6,r6,7
  42        PPC_MTOCRF(0x01,r5)
  43        blt     cr1,.Lshort_copy
  44/* Below we want to nop out the bne if we're on a CPU that has the
  45 * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
  46 * cleared.
  47 * At the time of writing the only CPU that has this combination of bits
  48 * set is Power6.
  49 */
  50BEGIN_FTR_SECTION
  51        nop
  52FTR_SECTION_ELSE
  53        bne     .Ldst_unaligned
  54ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
  55                    CPU_FTR_UNALIGNED_LD_STD)
  56.Ldst_aligned:
  57        addi    r3,r3,-16
  58BEGIN_FTR_SECTION
  59        andi.   r0,r4,7
  60        bne     .Lsrc_unaligned
  61END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
  62        blt     cr1,.Ldo_tail           /* if < 16 bytes to copy */
  63        srdi    r0,r5,5
  64        cmpdi   cr1,r0,0
  6520:     ld      r7,0(r4)
  66220:    ld      r6,8(r4)
  67        addi    r4,r4,16
  68        mtctr   r0
  69        andi.   r0,r5,0x10
  70        beq     22f
  71        addi    r3,r3,16
  72        addi    r4,r4,-16
  73        mr      r9,r7
  74        mr      r8,r6
  75        beq     cr1,72f
  7621:     ld      r7,16(r4)
  77221:    ld      r6,24(r4)
  78        addi    r4,r4,32
  7970:     std     r9,0(r3)
  80270:    std     r8,8(r3)
  8122:     ld      r9,0(r4)
  82222:    ld      r8,8(r4)
  8371:     std     r7,16(r3)
  84271:    std     r6,24(r3)
  85        addi    r3,r3,32
  86        bdnz    21b
  8772:     std     r9,0(r3)
  88272:    std     r8,8(r3)
  89        andi.   r5,r5,0xf
  90        beq+    3f
  91        addi    r4,r4,16
  92.Ldo_tail:
  93        addi    r3,r3,16
  94        bf      cr7*4+0,246f
  95244:    ld      r9,0(r4)
  96        addi    r4,r4,8
  97245:    std     r9,0(r3)
  98        addi    r3,r3,8
  99246:    bf      cr7*4+1,1f
 10023:     lwz     r9,0(r4)
 101        addi    r4,r4,4
 10273:     stw     r9,0(r3)
 103        addi    r3,r3,4
 1041:      bf      cr7*4+2,2f
 10544:     lhz     r9,0(r4)
 106        addi    r4,r4,2
 10774:     sth     r9,0(r3)
 108        addi    r3,r3,2
 1092:      bf      cr7*4+3,3f
 11045:     lbz     r9,0(r4)
 11175:     stb     r9,0(r3)
 1123:      li      r3,0
 113        blr
 114
 115.Lsrc_unaligned:
 116        srdi    r6,r5,3
 117        addi    r5,r5,-16
 118        subf    r4,r0,r4
 119        srdi    r7,r5,4
 120        sldi    r10,r0,3
 121        cmpldi  cr6,r6,3
 122        andi.   r5,r5,7
 123        mtctr   r7
 124        subfic  r11,r10,64
 125        add     r5,r5,r0
 126        bt      cr7*4+0,28f
 127
 12824:     ld      r9,0(r4)        /* 3+2n loads, 2+2n stores */
 12925:     ld      r0,8(r4)
 130        sLd     r6,r9,r10
 13126:     ldu     r9,16(r4)
 132        sHd     r7,r0,r11
 133        sLd     r8,r0,r10
 134        or      r7,r7,r6
 135        blt     cr6,79f
 13627:     ld      r0,8(r4)
 137        b       2f
 138
 13928:     ld      r0,0(r4)        /* 4+2n loads, 3+2n stores */
 14029:     ldu     r9,8(r4)
 141        sLd     r8,r0,r10
 142        addi    r3,r3,-8
 143        blt     cr6,5f
 14430:     ld      r0,8(r4)
 145        sHd     r12,r9,r11
 146        sLd     r6,r9,r10
 14731:     ldu     r9,16(r4)
 148        or      r12,r8,r12
 149        sHd     r7,r0,r11
 150        sLd     r8,r0,r10
 151        addi    r3,r3,16
 152        beq     cr6,78f
 153
 1541:      or      r7,r7,r6
 15532:     ld      r0,8(r4)
 15676:     std     r12,8(r3)
 1572:      sHd     r12,r9,r11
 158        sLd     r6,r9,r10
 15933:     ldu     r9,16(r4)
 160        or      r12,r8,r12
 16177:     stdu    r7,16(r3)
 162        sHd     r7,r0,r11
 163        sLd     r8,r0,r10
 164        bdnz    1b
 165
 16678:     std     r12,8(r3)
 167        or      r7,r7,r6
 16879:     std     r7,16(r3)
 1695:      sHd     r12,r9,r11
 170        or      r12,r8,r12
 17180:     std     r12,24(r3)
 172        bne     6f
 173        li      r3,0
 174        blr
 1756:      cmpwi   cr1,r5,8
 176        addi    r3,r3,32
 177        sLd     r9,r9,r10
 178        ble     cr1,7f
 17934:     ld      r0,8(r4)
 180        sHd     r7,r0,r11
 181        or      r9,r7,r9
 1827:
 183        bf      cr7*4+1,1f
 184#ifdef __BIG_ENDIAN__
 185        rotldi  r9,r9,32
 186#endif
 18794:     stw     r9,0(r3)
 188#ifdef __LITTLE_ENDIAN__
 189        rotrdi  r9,r9,32
 190#endif
 191        addi    r3,r3,4
 1921:      bf      cr7*4+2,2f
 193#ifdef __BIG_ENDIAN__
 194        rotldi  r9,r9,16
 195#endif
 19695:     sth     r9,0(r3)
 197#ifdef __LITTLE_ENDIAN__
 198        rotrdi  r9,r9,16
 199#endif
 200        addi    r3,r3,2
 2012:      bf      cr7*4+3,3f
 202#ifdef __BIG_ENDIAN__
 203        rotldi  r9,r9,8
 204#endif
 20596:     stb     r9,0(r3)
 206#ifdef __LITTLE_ENDIAN__
 207        rotrdi  r9,r9,8
 208#endif
 2093:      li      r3,0
 210        blr
 211
 212.Ldst_unaligned:
 213        PPC_MTOCRF(0x01,r6)             /* put #bytes to 8B bdry into cr7 */
 214        subf    r5,r6,r5
 215        li      r7,0
 216        cmpldi  cr1,r5,16
 217        bf      cr7*4+3,1f
 21835:     lbz     r0,0(r4)
 21981:     stb     r0,0(r3)
 220        addi    r7,r7,1
 2211:      bf      cr7*4+2,2f
 22236:     lhzx    r0,r7,r4
 22382:     sthx    r0,r7,r3
 224        addi    r7,r7,2
 2252:      bf      cr7*4+1,3f
 22637:     lwzx    r0,r7,r4
 22783:     stwx    r0,r7,r3
 2283:      PPC_MTOCRF(0x01,r5)
 229        add     r4,r6,r4
 230        add     r3,r6,r3
 231        b       .Ldst_aligned
 232
 233.Lshort_copy:
 234        bf      cr7*4+0,1f
 23538:     lwz     r0,0(r4)
 23639:     lwz     r9,4(r4)
 237        addi    r4,r4,8
 23884:     stw     r0,0(r3)
 23985:     stw     r9,4(r3)
 240        addi    r3,r3,8
 2411:      bf      cr7*4+1,2f
 24240:     lwz     r0,0(r4)
 243        addi    r4,r4,4
 24486:     stw     r0,0(r3)
 245        addi    r3,r3,4
 2462:      bf      cr7*4+2,3f
 24741:     lhz     r0,0(r4)
 248        addi    r4,r4,2
 24987:     sth     r0,0(r3)
 250        addi    r3,r3,2
 2513:      bf      cr7*4+3,4f
 25242:     lbz     r0,0(r4)
 25388:     stb     r0,0(r3)
 2544:      li      r3,0
 255        blr
 256
 257/*
 258 * exception handlers follow
 259 * we have to return the number of bytes not copied
 260 * for an exception on a load, we set the rest of the destination to 0
 261 */
 262
 263136:
 264137:
 265        add     r3,r3,r7
 266        b       1f
 267130:
 268131:
 269        addi    r3,r3,8
 270120:
 271320:
 272122:
 273322:
 274124:
 275125:
 276126:
 277127:
 278128:
 279129:
 280133:
 281        addi    r3,r3,8
 282132:
 283        addi    r3,r3,8
 284121:
 285321:
 286344:
 287134:
 288135:
 289138:
 290139:
 291140:
 292141:
 293142:
 294123:
 295144:
 296145:
 297
 298/*
 299 * here we have had a fault on a load and r3 points to the first
 300 * unmodified byte of the destination
 301 */
 3021:      ld      r6,-24(r1)
 303        ld      r4,-16(r1)
 304        ld      r5,-8(r1)
 305        subf    r6,r6,r3
 306        add     r4,r4,r6
 307        subf    r5,r6,r5        /* #bytes left to go */
 308
 309/*
 310 * first see if we can copy any more bytes before hitting another exception
 311 */
 312        mtctr   r5
 31343:     lbz     r0,0(r4)
 314        addi    r4,r4,1
 31589:     stb     r0,0(r3)
 316        addi    r3,r3,1
 317        bdnz    43b
 318        li      r3,0            /* huh? all copied successfully this time? */
 319        blr
 320
 321/*
 322 * here we have trapped again, amount remaining is in ctr.
 323 */
 324143:    mfctr   r3
 325        blr
 326
 327/*
 328 * exception handlers for stores: we just need to work
 329 * out how many bytes weren't copied
 330 */
 331182:
 332183:
 333        add     r3,r3,r7
 334        b       1f
 335371:
 336180:
 337        addi    r3,r3,8
 338171:
 339177:
 340179:
 341        addi    r3,r3,8
 342370:
 343372:
 344176:
 345178:
 346        addi    r3,r3,4
 347185:
 348        addi    r3,r3,4
 349170:
 350172:
 351345:
 352173:
 353174:
 354175:
 355181:
 356184:
 357186:
 358187:
 359188:
 360189:    
 361194:
 362195:
 363196:
 3641:
 365        ld      r6,-24(r1)
 366        ld      r5,-8(r1)
 367        add     r6,r6,r5
 368        subf    r3,r3,r6        /* #bytes not copied */
 369        blr
 370
 371        EX_TABLE(20b,120b)
 372        EX_TABLE(220b,320b)
 373        EX_TABLE(21b,121b)
 374        EX_TABLE(221b,321b)
 375        EX_TABLE(70b,170b)
 376        EX_TABLE(270b,370b)
 377        EX_TABLE(22b,122b)
 378        EX_TABLE(222b,322b)
 379        EX_TABLE(71b,171b)
 380        EX_TABLE(271b,371b)
 381        EX_TABLE(72b,172b)
 382        EX_TABLE(272b,372b)
 383        EX_TABLE(244b,344b)
 384        EX_TABLE(245b,345b)
 385        EX_TABLE(23b,123b)
 386        EX_TABLE(73b,173b)
 387        EX_TABLE(44b,144b)
 388        EX_TABLE(74b,174b)
 389        EX_TABLE(45b,145b)
 390        EX_TABLE(75b,175b)
 391        EX_TABLE(24b,124b)
 392        EX_TABLE(25b,125b)
 393        EX_TABLE(26b,126b)
 394        EX_TABLE(27b,127b)
 395        EX_TABLE(28b,128b)
 396        EX_TABLE(29b,129b)
 397        EX_TABLE(30b,130b)
 398        EX_TABLE(31b,131b)
 399        EX_TABLE(32b,132b)
 400        EX_TABLE(76b,176b)
 401        EX_TABLE(33b,133b)
 402        EX_TABLE(77b,177b)
 403        EX_TABLE(78b,178b)
 404        EX_TABLE(79b,179b)
 405        EX_TABLE(80b,180b)
 406        EX_TABLE(34b,134b)
 407        EX_TABLE(94b,194b)
 408        EX_TABLE(95b,195b)
 409        EX_TABLE(96b,196b)
 410        EX_TABLE(35b,135b)
 411        EX_TABLE(81b,181b)
 412        EX_TABLE(36b,136b)
 413        EX_TABLE(82b,182b)
 414        EX_TABLE(37b,137b)
 415        EX_TABLE(83b,183b)
 416        EX_TABLE(38b,138b)
 417        EX_TABLE(39b,139b)
 418        EX_TABLE(84b,184b)
 419        EX_TABLE(85b,185b)
 420        EX_TABLE(40b,140b)
 421        EX_TABLE(86b,186b)
 422        EX_TABLE(41b,141b)
 423        EX_TABLE(87b,187b)
 424        EX_TABLE(42b,142b)
 425        EX_TABLE(88b,188b)
 426        EX_TABLE(43b,143b)
 427        EX_TABLE(89b,189b)
 428
 429/*
 430 * Routine to copy a whole page of data, optimized for POWER4.
 431 * On POWER4 it is more than 50% faster than the simple loop
 432 * above (following the .Ldst_aligned label).
 433 */
 434.Lcopy_page_4K:
 435        std     r31,-32(1)
 436        std     r30,-40(1)
 437        std     r29,-48(1)
 438        std     r28,-56(1)
 439        std     r27,-64(1)
 440        std     r26,-72(1)
 441        std     r25,-80(1)
 442        std     r24,-88(1)
 443        std     r23,-96(1)
 444        std     r22,-104(1)
 445        std     r21,-112(1)
 446        std     r20,-120(1)
 447        li      r5,4096/32 - 1
 448        addi    r3,r3,-8
 449        li      r0,5
 4500:      addi    r5,r5,-24
 451        mtctr   r0
 45220:     ld      r22,640(4)
 45321:     ld      r21,512(4)
 45422:     ld      r20,384(4)
 45523:     ld      r11,256(4)
 45624:     ld      r9,128(4)
 45725:     ld      r7,0(4)
 45826:     ld      r25,648(4)
 45927:     ld      r24,520(4)
 46028:     ld      r23,392(4)
 46129:     ld      r10,264(4)
 46230:     ld      r8,136(4)
 46331:     ldu     r6,8(4)
 464        cmpwi   r5,24
 4651:
 46632:     std     r22,648(3)
 46733:     std     r21,520(3)
 46834:     std     r20,392(3)
 46935:     std     r11,264(3)
 47036:     std     r9,136(3)
 47137:     std     r7,8(3)
 47238:     ld      r28,648(4)
 47339:     ld      r27,520(4)
 47440:     ld      r26,392(4)
 47541:     ld      r31,264(4)
 47642:     ld      r30,136(4)
 47743:     ld      r29,8(4)
 47844:     std     r25,656(3)
 47945:     std     r24,528(3)
 48046:     std     r23,400(3)
 48147:     std     r10,272(3)
 48248:     std     r8,144(3)
 48349:     std     r6,16(3)
 48450:     ld      r22,656(4)
 48551:     ld      r21,528(4)
 48652:     ld      r20,400(4)
 48753:     ld      r11,272(4)
 48854:     ld      r9,144(4)
 48955:     ld      r7,16(4)
 49056:     std     r28,664(3)
 49157:     std     r27,536(3)
 49258:     std     r26,408(3)
 49359:     std     r31,280(3)
 49460:     std     r30,152(3)
 49561:     stdu    r29,24(3)
 49662:     ld      r25,664(4)
 49763:     ld      r24,536(4)
 49864:     ld      r23,408(4)
 49965:     ld      r10,280(4)
 50066:     ld      r8,152(4)
 50167:     ldu     r6,24(4)
 502        bdnz    1b
 50368:     std     r22,648(3)
 50469:     std     r21,520(3)
 50570:     std     r20,392(3)
 50671:     std     r11,264(3)
 50772:     std     r9,136(3)
 50873:     std     r7,8(3)
 50974:     addi    r4,r4,640
 51075:     addi    r3,r3,648
 511        bge     0b
 512        mtctr   r5
 51376:     ld      r7,0(4)
 51477:     ld      r8,8(4)
 51578:     ldu     r9,16(4)
 5163:
 51779:     ld      r10,8(4)
 51880:     std     r7,8(3)
 51981:     ld      r7,16(4)
 52082:     std     r8,16(3)
 52183:     ld      r8,24(4)
 52284:     std     r9,24(3)
 52385:     ldu     r9,32(4)
 52486:     stdu    r10,32(3)
 525        bdnz    3b
 5264:
 52787:     ld      r10,8(4)
 52888:     std     r7,8(3)
 52989:     std     r8,16(3)
 53090:     std     r9,24(3)
 53191:     std     r10,32(3)
 5329:      ld      r20,-120(1)
 533        ld      r21,-112(1)
 534        ld      r22,-104(1)
 535        ld      r23,-96(1)
 536        ld      r24,-88(1)
 537        ld      r25,-80(1)
 538        ld      r26,-72(1)
 539        ld      r27,-64(1)
 540        ld      r28,-56(1)
 541        ld      r29,-48(1)
 542        ld      r30,-40(1)
 543        ld      r31,-32(1)
 544        li      r3,0
 545        blr
 546
 547/*
 548 * on an exception, reset to the beginning and jump back into the
 549 * standard __copy_tofrom_user
 550 */
 551100:    ld      r20,-120(1)
 552        ld      r21,-112(1)
 553        ld      r22,-104(1)
 554        ld      r23,-96(1)
 555        ld      r24,-88(1)
 556        ld      r25,-80(1)
 557        ld      r26,-72(1)
 558        ld      r27,-64(1)
 559        ld      r28,-56(1)
 560        ld      r29,-48(1)
 561        ld      r30,-40(1)
 562        ld      r31,-32(1)
 563        ld      r3,-24(r1)
 564        ld      r4,-16(r1)
 565        li      r5,4096
 566        b       .Ldst_aligned
 567
 568        EX_TABLE(20b,100b)
 569        EX_TABLE(21b,100b)
 570        EX_TABLE(22b,100b)
 571        EX_TABLE(23b,100b)
 572        EX_TABLE(24b,100b)
 573        EX_TABLE(25b,100b)
 574        EX_TABLE(26b,100b)
 575        EX_TABLE(27b,100b)
 576        EX_TABLE(28b,100b)
 577        EX_TABLE(29b,100b)
 578        EX_TABLE(30b,100b)
 579        EX_TABLE(31b,100b)
 580        EX_TABLE(32b,100b)
 581        EX_TABLE(33b,100b)
 582        EX_TABLE(34b,100b)
 583        EX_TABLE(35b,100b)
 584        EX_TABLE(36b,100b)
 585        EX_TABLE(37b,100b)
 586        EX_TABLE(38b,100b)
 587        EX_TABLE(39b,100b)
 588        EX_TABLE(40b,100b)
 589        EX_TABLE(41b,100b)
 590        EX_TABLE(42b,100b)
 591        EX_TABLE(43b,100b)
 592        EX_TABLE(44b,100b)
 593        EX_TABLE(45b,100b)
 594        EX_TABLE(46b,100b)
 595        EX_TABLE(47b,100b)
 596        EX_TABLE(48b,100b)
 597        EX_TABLE(49b,100b)
 598        EX_TABLE(50b,100b)
 599        EX_TABLE(51b,100b)
 600        EX_TABLE(52b,100b)
 601        EX_TABLE(53b,100b)
 602        EX_TABLE(54b,100b)
 603        EX_TABLE(55b,100b)
 604        EX_TABLE(56b,100b)
 605        EX_TABLE(57b,100b)
 606        EX_TABLE(58b,100b)
 607        EX_TABLE(59b,100b)
 608        EX_TABLE(60b,100b)
 609        EX_TABLE(61b,100b)
 610        EX_TABLE(62b,100b)
 611        EX_TABLE(63b,100b)
 612        EX_TABLE(64b,100b)
 613        EX_TABLE(65b,100b)
 614        EX_TABLE(66b,100b)
 615        EX_TABLE(67b,100b)
 616        EX_TABLE(68b,100b)
 617        EX_TABLE(69b,100b)
 618        EX_TABLE(70b,100b)
 619        EX_TABLE(71b,100b)
 620        EX_TABLE(72b,100b)
 621        EX_TABLE(73b,100b)
 622        EX_TABLE(74b,100b)
 623        EX_TABLE(75b,100b)
 624        EX_TABLE(76b,100b)
 625        EX_TABLE(77b,100b)
 626        EX_TABLE(78b,100b)
 627        EX_TABLE(79b,100b)
 628        EX_TABLE(80b,100b)
 629        EX_TABLE(81b,100b)
 630        EX_TABLE(82b,100b)
 631        EX_TABLE(83b,100b)
 632        EX_TABLE(84b,100b)
 633        EX_TABLE(85b,100b)
 634        EX_TABLE(86b,100b)
 635        EX_TABLE(87b,100b)
 636        EX_TABLE(88b,100b)
 637        EX_TABLE(89b,100b)
 638        EX_TABLE(90b,100b)
 639        EX_TABLE(91b,100b)
 640
 641EXPORT_SYMBOL(__copy_tofrom_user)
 642