linux/arch/parisc/lib/lusercopy.S
<<
>>
Prefs
   1/*
   2 *    User Space Access Routines
   3 *
   4 *    Copyright (C) 2000-2002 Hewlett-Packard (John Marvin)
   5 *    Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org>
   6 *    Copyright (C) 2001 Matthieu Delahaye <delahaym at esiee.fr>
   7 *    Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org>
   8 *    Copyright (C) 2017 Helge Deller <deller@gmx.de>
   9 *    Copyright (C) 2017 John David Anglin <dave.anglin@bell.net>
  10 *
  11 *
  12 *    This program is free software; you can redistribute it and/or modify
  13 *    it under the terms of the GNU General Public License as published by
  14 *    the Free Software Foundation; either version 2, or (at your option)
  15 *    any later version.
  16 *
  17 *    This program is distributed in the hope that it will be useful,
  18 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
  19 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20 *    GNU General Public License for more details.
  21 *
  22 *    You should have received a copy of the GNU General Public License
  23 *    along with this program; if not, write to the Free Software
  24 *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  25 */
  26
  27/*
  28 * These routines still have plenty of room for optimization
  29 * (word & doubleword load/store, dual issue, store hints, etc.).
  30 */
  31
  32/*
  33 * The following routines assume that space register 3 (sr3) contains
  34 * the space id associated with the current users address space.
  35 */
  36
  37
  38        .text
  39        
  40#include <asm/assembly.h>
  41#include <asm/errno.h>
  42#include <linux/linkage.h>
  43
  44        /*
  45         * get_sr gets the appropriate space value into
  46         * sr1 for kernel/user space access, depending
  47         * on the flag stored in the task structure.
  48         */
  49
  50        .macro  get_sr
  51        mfctl       %cr30,%r1
  52        ldw         TI_SEGMENT(%r1),%r22
  53        mfsp        %sr3,%r1
  54        or,<>       %r22,%r0,%r0
  55        copy        %r0,%r1
  56        mtsp        %r1,%sr1
  57        .endm
  58
  59        /*
  60         * unsigned long lclear_user(void *to, unsigned long n)
  61         *
  62         * Returns 0 for success.
  63         * otherwise, returns number of bytes not transferred.
  64         */
  65
  66ENTRY_CFI(lclear_user)
  67        comib,=,n   0,%r25,$lclu_done
  68        get_sr
  69$lclu_loop:
  70        addib,<>    -1,%r25,$lclu_loop
  711:      stbs,ma     %r0,1(%sr1,%r26)
  72
  73$lclu_done:
  74        bv          %r0(%r2)
  75        copy        %r25,%r28
  76
  772:      b           $lclu_done
  78        ldo         1(%r25),%r25
  79
  80        ASM_EXCEPTIONTABLE_ENTRY(1b,2b)
  81ENDPROC_CFI(lclear_user)
  82
  83
  84        /*
  85         * long lstrnlen_user(char *s, long n)
  86         *
  87         * Returns 0 if exception before zero byte or reaching N,
  88         *         N+1 if N would be exceeded,
  89         *         else strlen + 1 (i.e. includes zero byte).
  90         */
  91
  92ENTRY_CFI(lstrnlen_user)
  93        comib,=     0,%r25,$lslen_nzero
  94        copy        %r26,%r24
  95        get_sr
  961:      ldbs,ma     1(%sr1,%r26),%r1
  97$lslen_loop:
  98        comib,=,n   0,%r1,$lslen_done
  99        addib,<>    -1,%r25,$lslen_loop
 1002:      ldbs,ma     1(%sr1,%r26),%r1
 101$lslen_done:
 102        bv          %r0(%r2)
 103        sub         %r26,%r24,%r28
 104
 105$lslen_nzero:
 106        b           $lslen_done
 107        ldo         1(%r26),%r26 /* special case for N == 0 */
 108
 1093:      b           $lslen_done
 110        copy        %r24,%r26    /* reset r26 so 0 is returned on fault */
 111
 112        ASM_EXCEPTIONTABLE_ENTRY(1b,3b)
 113        ASM_EXCEPTIONTABLE_ENTRY(2b,3b)
 114
 115ENDPROC_CFI(lstrnlen_user)
 116
 117
 118/*
 119 * unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
 120 *
 121 * Inputs:
 122 * - sr1 already contains space of source region
 123 * - sr2 already contains space of destination region
 124 *
 125 * Returns:
 126 * - number of bytes that could not be copied.
 127 *   On success, this will be zero.
 128 *
 129 * This code is based on a C-implementation of a copy routine written by
 130 * Randolph Chung, which in turn was derived from the glibc.
 131 *
 132 * Several strategies are tried to try to get the best performance for various
 133 * conditions. In the optimal case, we copy by loops that copy 32- or 16-bytes
 134 * at a time using general registers.  Unaligned copies are handled either by
 135 * aligning the destination and then using shift-and-write method, or in a few
 136 * cases by falling back to a byte-at-a-time copy.
 137 *
 138 * Testing with various alignments and buffer sizes shows that this code is
 139 * often >10x faster than a simple byte-at-a-time copy, even for strangely
 140 * aligned operands. It is interesting to note that the glibc version of memcpy
 141 * (written in C) is actually quite fast already. This routine is able to beat
 142 * it by 30-40% for aligned copies because of the loop unrolling, but in some
 143 * cases the glibc version is still slightly faster. This lends more
 144 * credibility that gcc can generate very good code as long as we are careful.
 145 *
 146 * Possible optimizations:
 147 * - add cache prefetching
 148 * - try not to use the post-increment address modifiers; they may create
 149 *   additional interlocks. Assumption is that those were only efficient on old
 150 *   machines (pre PA8000 processors)
 151 */
 152
 153        dst = arg0
 154        src = arg1
 155        len = arg2
 156        end = arg3
 157        t1  = r19
 158        t2  = r20
 159        t3  = r21
 160        t4  = r22
 161        srcspc = sr1
 162        dstspc = sr2
 163
 164        t0 = r1
 165        a1 = t1
 166        a2 = t2
 167        a3 = t3
 168        a0 = t4
 169
 170        save_src = ret0
 171        save_dst = ret1
 172        save_len = r31
 173
 174ENTRY_CFI(pa_memcpy)
 175        /* Last destination address */
 176        add     dst,len,end
 177
 178        /* short copy with less than 16 bytes? */
 179        cmpib,COND(>>=),n 15,len,.Lbyte_loop
 180
 181        /* same alignment? */
 182        xor     src,dst,t0
 183        extru   t0,31,2,t1
 184        cmpib,<>,n  0,t1,.Lunaligned_copy
 185
 186#ifdef CONFIG_64BIT
 187        /* only do 64-bit copies if we can get aligned. */
 188        extru   t0,31,3,t1
 189        cmpib,<>,n  0,t1,.Lalign_loop32
 190
 191        /* loop until we are 64-bit aligned */
 192.Lalign_loop64:
 193        extru   dst,31,3,t1
 194        cmpib,=,n       0,t1,.Lcopy_loop_16_start
 19520:     ldb,ma  1(srcspc,src),t1
 19621:     stb,ma  t1,1(dstspc,dst)
 197        b       .Lalign_loop64
 198        ldo     -1(len),len
 199
 200        ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
 201        ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
 202
 203.Lcopy_loop_16_start:
 204        ldi     31,t0
 205.Lcopy_loop_16:
 206        cmpb,COND(>>=),n t0,len,.Lword_loop
 207
 20810:     ldd     0(srcspc,src),t1
 20911:     ldd     8(srcspc,src),t2
 210        ldo     16(src),src
 21112:     std,ma  t1,8(dstspc,dst)
 21213:     std,ma  t2,8(dstspc,dst)
 21314:     ldd     0(srcspc,src),t1
 21415:     ldd     8(srcspc,src),t2
 215        ldo     16(src),src
 21616:     std,ma  t1,8(dstspc,dst)
 21717:     std,ma  t2,8(dstspc,dst)
 218
 219        ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
 220        ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy16_fault)
 221        ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
 222        ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
 223        ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
 224        ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy16_fault)
 225        ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
 226        ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
 227
 228        b       .Lcopy_loop_16
 229        ldo     -32(len),len
 230
 231.Lword_loop:
 232        cmpib,COND(>>=),n 3,len,.Lbyte_loop
 23320:     ldw,ma  4(srcspc,src),t1
 23421:     stw,ma  t1,4(dstspc,dst)
 235        b       .Lword_loop
 236        ldo     -4(len),len
 237
 238        ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
 239        ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
 240
 241#endif /* CONFIG_64BIT */
 242
 243        /* loop until we are 32-bit aligned */
 244.Lalign_loop32:
 245        extru   dst,31,2,t1
 246        cmpib,=,n       0,t1,.Lcopy_loop_8
 24720:     ldb,ma  1(srcspc,src),t1
 24821:     stb,ma  t1,1(dstspc,dst)
 249        b       .Lalign_loop32
 250        ldo     -1(len),len
 251
 252        ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
 253        ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
 254
 255
 256.Lcopy_loop_8:
 257        cmpib,COND(>>=),n 15,len,.Lbyte_loop
 258
 25910:     ldw     0(srcspc,src),t1
 26011:     ldw     4(srcspc,src),t2
 26112:     stw,ma  t1,4(dstspc,dst)
 26213:     stw,ma  t2,4(dstspc,dst)
 26314:     ldw     8(srcspc,src),t1
 26415:     ldw     12(srcspc,src),t2
 265        ldo     16(src),src
 26616:     stw,ma  t1,4(dstspc,dst)
 26717:     stw,ma  t2,4(dstspc,dst)
 268
 269        ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
 270        ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy8_fault)
 271        ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
 272        ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
 273        ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
 274        ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy8_fault)
 275        ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
 276        ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
 277
 278        b       .Lcopy_loop_8
 279        ldo     -16(len),len
 280
 281.Lbyte_loop:
 282        cmpclr,COND(<>) len,%r0,%r0
 283        b,n     .Lcopy_done
 28420:     ldb     0(srcspc,src),t1
 285        ldo     1(src),src
 28621:     stb,ma  t1,1(dstspc,dst)
 287        b       .Lbyte_loop
 288        ldo     -1(len),len
 289
 290        ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
 291        ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
 292
 293.Lcopy_done:
 294        bv      %r0(%r2)
 295        sub     end,dst,ret0
 296
 297
 298        /* src and dst are not aligned the same way. */
 299        /* need to go the hard way */
 300.Lunaligned_copy:
 301        /* align until dst is 32bit-word-aligned */
 302        extru   dst,31,2,t1
 303        cmpib,=,n       0,t1,.Lcopy_dstaligned
 30420:     ldb     0(srcspc,src),t1
 305        ldo     1(src),src
 30621:     stb,ma  t1,1(dstspc,dst)
 307        b       .Lunaligned_copy
 308        ldo     -1(len),len
 309
 310        ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
 311        ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
 312
 313.Lcopy_dstaligned:
 314
 315        /* store src, dst and len in safe place */
 316        copy    src,save_src
 317        copy    dst,save_dst
 318        copy    len,save_len
 319
 320        /* len now needs give number of words to copy */
 321        SHRREG  len,2,len
 322
 323        /*
 324         * Copy from a not-aligned src to an aligned dst using shifts.
 325         * Handles 4 words per loop.
 326         */
 327
 328        depw,z src,28,2,t0
 329        subi 32,t0,t0
 330        mtsar t0
 331        extru len,31,2,t0
 332        cmpib,= 2,t0,.Lcase2
 333        /* Make src aligned by rounding it down.  */
 334        depi 0,31,2,src
 335
 336        cmpiclr,<> 3,t0,%r0
 337        b,n .Lcase3
 338        cmpiclr,<> 1,t0,%r0
 339        b,n .Lcase1
 340.Lcase0:
 341        cmpb,COND(=) %r0,len,.Lcda_finish
 342        nop
 343
 3441:      ldw,ma 4(srcspc,src), a3
 345        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 3461:      ldw,ma 4(srcspc,src), a0
 347        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 348        b,n .Ldo3
 349.Lcase1:
 3501:      ldw,ma 4(srcspc,src), a2
 351        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 3521:      ldw,ma 4(srcspc,src), a3
 353        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 354        ldo -1(len),len
 355        cmpb,COND(=),n %r0,len,.Ldo0
 356.Ldo4:
 3571:      ldw,ma 4(srcspc,src), a0
 358        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 359        shrpw a2, a3, %sar, t0
 3601:      stw,ma t0, 4(dstspc,dst)
 361        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
 362.Ldo3:
 3631:      ldw,ma 4(srcspc,src), a1
 364        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 365        shrpw a3, a0, %sar, t0
 3661:      stw,ma t0, 4(dstspc,dst)
 367        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
 368.Ldo2:
 3691:      ldw,ma 4(srcspc,src), a2
 370        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 371        shrpw a0, a1, %sar, t0
 3721:      stw,ma t0, 4(dstspc,dst)
 373        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
 374.Ldo1:
 3751:      ldw,ma 4(srcspc,src), a3
 376        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 377        shrpw a1, a2, %sar, t0
 3781:      stw,ma t0, 4(dstspc,dst)
 379        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
 380        ldo -4(len),len
 381        cmpb,COND(<>) %r0,len,.Ldo4
 382        nop
 383.Ldo0:
 384        shrpw a2, a3, %sar, t0
 3851:      stw,ma t0, 4(dstspc,dst)
 386        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
 387
 388.Lcda_rdfault:
 389.Lcda_finish:
 390        /* calculate new src, dst and len and jump to byte-copy loop */
 391        sub     dst,save_dst,t0
 392        add     save_src,t0,src
 393        b       .Lbyte_loop
 394        sub     save_len,t0,len
 395
 396.Lcase3:
 3971:      ldw,ma 4(srcspc,src), a0
 398        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 3991:      ldw,ma 4(srcspc,src), a1
 400        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 401        b .Ldo2
 402        ldo 1(len),len
 403.Lcase2:
 4041:      ldw,ma 4(srcspc,src), a1
 405        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 4061:      ldw,ma 4(srcspc,src), a2
 407        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 408        b .Ldo1
 409        ldo 2(len),len
 410
 411
 412        /* fault exception fixup handlers: */
 413#ifdef CONFIG_64BIT
 414.Lcopy16_fault:
 415        b       .Lcopy_done
 41610:     std,ma  t1,8(dstspc,dst)
 417        ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
 418#endif
 419
 420.Lcopy8_fault:
 421        b       .Lcopy_done
 42210:     stw,ma  t1,4(dstspc,dst)
 423        ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
 424ENDPROC_CFI(pa_memcpy)
 425
 426        .end
 427