linux/arch/parisc/lib/lusercopy.S
<<
>>
Prefs
   1/*
   2 *    User Space Access Routines
   3 *
   4 *    Copyright (C) 2000-2002 Hewlett-Packard (John Marvin)
   5 *    Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org>
   6 *    Copyright (C) 2001 Matthieu Delahaye <delahaym at esiee.fr>
   7 *    Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org>
   8 *    Copyright (C) 2017 Helge Deller <deller@gmx.de>
   9 *    Copyright (C) 2017 John David Anglin <dave.anglin@bell.net>
  10 *
  11 *
  12 *    This program is free software; you can redistribute it and/or modify
  13 *    it under the terms of the GNU General Public License as published by
  14 *    the Free Software Foundation; either version 2, or (at your option)
  15 *    any later version.
  16 *
  17 *    This program is distributed in the hope that it will be useful,
  18 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
  19 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20 *    GNU General Public License for more details.
  21 *
  22 *    You should have received a copy of the GNU General Public License
  23 *    along with this program; if not, write to the Free Software
  24 *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  25 */
  26
  27/*
  28 * These routines still have plenty of room for optimization
  29 * (word & doubleword load/store, dual issue, store hints, etc.).
  30 */
  31
  32/*
  33 * The following routines assume that space register 3 (sr3) contains
  34 * the space id associated with the current users address space.
  35 */
  36
  37
  38        .text
  39        
  40#include <asm/assembly.h>
  41#include <asm/errno.h>
  42#include <linux/linkage.h>
  43
  44        /*
  45         * get_sr gets the appropriate space value into
  46         * sr1 for kernel/user space access, depending
  47         * on the flag stored in the task structure.
  48         */
  49
  50        .macro  get_sr
  51        mfctl       %cr30,%r1
  52        ldw         TI_SEGMENT(%r1),%r22
  53        mfsp        %sr3,%r1
  54        or,<>       %r22,%r0,%r0
  55        copy        %r0,%r1
  56        mtsp        %r1,%sr1
  57        .endm
  58
  59        /*
  60         * unsigned long lclear_user(void *to, unsigned long n)
  61         *
  62         * Returns 0 for success.
  63         * otherwise, returns number of bytes not transferred.
  64         */
  65
  66ENTRY_CFI(lclear_user)
  67        .proc
  68        .callinfo NO_CALLS
  69        .entry
  70        comib,=,n   0,%r25,$lclu_done
  71        get_sr
  72$lclu_loop:
  73        addib,<>    -1,%r25,$lclu_loop
  741:      stbs,ma     %r0,1(%sr1,%r26)
  75
  76$lclu_done:
  77        bv          %r0(%r2)
  78        copy        %r25,%r28
  79
  802:      b           $lclu_done
  81        ldo         1(%r25),%r25
  82
  83        ASM_EXCEPTIONTABLE_ENTRY(1b,2b)
  84
  85        .exit
  86ENDPROC_CFI(lclear_user)
  87
  88
  89        .procend
  90
  91        /*
  92         * long lstrnlen_user(char *s, long n)
  93         *
  94         * Returns 0 if exception before zero byte or reaching N,
  95         *         N+1 if N would be exceeded,
  96         *         else strlen + 1 (i.e. includes zero byte).
  97         */
  98
  99ENTRY_CFI(lstrnlen_user)
 100        .proc
 101        .callinfo NO_CALLS
 102        .entry
 103        comib,=     0,%r25,$lslen_nzero
 104        copy        %r26,%r24
 105        get_sr
 1061:      ldbs,ma     1(%sr1,%r26),%r1
 107$lslen_loop:
 108        comib,=,n   0,%r1,$lslen_done
 109        addib,<>    -1,%r25,$lslen_loop
 1102:      ldbs,ma     1(%sr1,%r26),%r1
 111$lslen_done:
 112        bv          %r0(%r2)
 113        sub         %r26,%r24,%r28
 114        .exit
 115
 116$lslen_nzero:
 117        b           $lslen_done
 118        ldo         1(%r26),%r26 /* special case for N == 0 */
 119
 1203:      b           $lslen_done
 121        copy        %r24,%r26    /* reset r26 so 0 is returned on fault */
 122
 123        ASM_EXCEPTIONTABLE_ENTRY(1b,3b)
 124        ASM_EXCEPTIONTABLE_ENTRY(2b,3b)
 125
 126ENDPROC_CFI(lstrnlen_user)
 127
 128        .procend
 129
 130
 131
 132/*
 133 * unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
 134 *
 135 * Inputs:
 136 * - sr1 already contains space of source region
 137 * - sr2 already contains space of destination region
 138 *
 139 * Returns:
 140 * - number of bytes that could not be copied.
 141 *   On success, this will be zero.
 142 *
 143 * This code is based on a C-implementation of a copy routine written by
 144 * Randolph Chung, which in turn was derived from the glibc.
 145 *
 146 * Several strategies are tried to try to get the best performance for various
 147 * conditions. In the optimal case, we copy by loops that copy 32- or 16-bytes
 148 * at a time using general registers.  Unaligned copies are handled either by
 149 * aligning the destination and then using shift-and-write method, or in a few
 150 * cases by falling back to a byte-at-a-time copy.
 151 *
 152 * Testing with various alignments and buffer sizes shows that this code is
 153 * often >10x faster than a simple byte-at-a-time copy, even for strangely
 154 * aligned operands. It is interesting to note that the glibc version of memcpy
 155 * (written in C) is actually quite fast already. This routine is able to beat
 156 * it by 30-40% for aligned copies because of the loop unrolling, but in some
 157 * cases the glibc version is still slightly faster. This lends more
 158 * credibility that gcc can generate very good code as long as we are careful.
 159 *
 160 * Possible optimizations:
 161 * - add cache prefetching
 162 * - try not to use the post-increment address modifiers; they may create
 163 *   additional interlocks. Assumption is that those were only efficient on old
 164 *   machines (pre PA8000 processors)
 165 */
 166
 167        dst = arg0
 168        src = arg1
 169        len = arg2
 170        end = arg3
 171        t1  = r19
 172        t2  = r20
 173        t3  = r21
 174        t4  = r22
 175        srcspc = sr1
 176        dstspc = sr2
 177
 178        t0 = r1
 179        a1 = t1
 180        a2 = t2
 181        a3 = t3
 182        a0 = t4
 183
 184        save_src = ret0
 185        save_dst = ret1
 186        save_len = r31
 187
 188ENTRY_CFI(pa_memcpy)
 189        .proc
 190        .callinfo NO_CALLS
 191        .entry
 192
 193        /* Last destination address */
 194        add     dst,len,end
 195
 196        /* short copy with less than 16 bytes? */
 197        cmpib,COND(>>=),n 15,len,.Lbyte_loop
 198
 199        /* same alignment? */
 200        xor     src,dst,t0
 201        extru   t0,31,2,t1
 202        cmpib,<>,n  0,t1,.Lunaligned_copy
 203
 204#ifdef CONFIG_64BIT
 205        /* only do 64-bit copies if we can get aligned. */
 206        extru   t0,31,3,t1
 207        cmpib,<>,n  0,t1,.Lalign_loop32
 208
 209        /* loop until we are 64-bit aligned */
 210.Lalign_loop64:
 211        extru   dst,31,3,t1
 212        cmpib,=,n       0,t1,.Lcopy_loop_16_start
 21320:     ldb,ma  1(srcspc,src),t1
 21421:     stb,ma  t1,1(dstspc,dst)
 215        b       .Lalign_loop64
 216        ldo     -1(len),len
 217
 218        ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
 219        ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
 220
 221.Lcopy_loop_16_start:
 222        ldi     31,t0
 223.Lcopy_loop_16:
 224        cmpb,COND(>>=),n t0,len,.Lword_loop
 225
 22610:     ldd     0(srcspc,src),t1
 22711:     ldd     8(srcspc,src),t2
 228        ldo     16(src),src
 22912:     std,ma  t1,8(dstspc,dst)
 23013:     std,ma  t2,8(dstspc,dst)
 23114:     ldd     0(srcspc,src),t1
 23215:     ldd     8(srcspc,src),t2
 233        ldo     16(src),src
 23416:     std,ma  t1,8(dstspc,dst)
 23517:     std,ma  t2,8(dstspc,dst)
 236
 237        ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
 238        ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy16_fault)
 239        ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
 240        ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
 241        ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
 242        ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy16_fault)
 243        ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
 244        ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
 245
 246        b       .Lcopy_loop_16
 247        ldo     -32(len),len
 248
 249.Lword_loop:
 250        cmpib,COND(>>=),n 3,len,.Lbyte_loop
 25120:     ldw,ma  4(srcspc,src),t1
 25221:     stw,ma  t1,4(dstspc,dst)
 253        b       .Lword_loop
 254        ldo     -4(len),len
 255
 256        ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
 257        ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
 258
 259#endif /* CONFIG_64BIT */
 260
 261        /* loop until we are 32-bit aligned */
 262.Lalign_loop32:
 263        extru   dst,31,2,t1
 264        cmpib,=,n       0,t1,.Lcopy_loop_8
 26520:     ldb,ma  1(srcspc,src),t1
 26621:     stb,ma  t1,1(dstspc,dst)
 267        b       .Lalign_loop32
 268        ldo     -1(len),len
 269
 270        ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
 271        ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
 272
 273
 274.Lcopy_loop_8:
 275        cmpib,COND(>>=),n 15,len,.Lbyte_loop
 276
 27710:     ldw     0(srcspc,src),t1
 27811:     ldw     4(srcspc,src),t2
 27912:     stw,ma  t1,4(dstspc,dst)
 28013:     stw,ma  t2,4(dstspc,dst)
 28114:     ldw     8(srcspc,src),t1
 28215:     ldw     12(srcspc,src),t2
 283        ldo     16(src),src
 28416:     stw,ma  t1,4(dstspc,dst)
 28517:     stw,ma  t2,4(dstspc,dst)
 286
 287        ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
 288        ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy8_fault)
 289        ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
 290        ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
 291        ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
 292        ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy8_fault)
 293        ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
 294        ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
 295
 296        b       .Lcopy_loop_8
 297        ldo     -16(len),len
 298
 299.Lbyte_loop:
 300        cmpclr,COND(<>) len,%r0,%r0
 301        b,n     .Lcopy_done
 30220:     ldb     0(srcspc,src),t1
 303        ldo     1(src),src
 30421:     stb,ma  t1,1(dstspc,dst)
 305        b       .Lbyte_loop
 306        ldo     -1(len),len
 307
 308        ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
 309        ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
 310
 311.Lcopy_done:
 312        bv      %r0(%r2)
 313        sub     end,dst,ret0
 314
 315
 316        /* src and dst are not aligned the same way. */
 317        /* need to go the hard way */
 318.Lunaligned_copy:
 319        /* align until dst is 32bit-word-aligned */
 320        extru   dst,31,2,t1
 321        cmpib,=,n       0,t1,.Lcopy_dstaligned
 32220:     ldb     0(srcspc,src),t1
 323        ldo     1(src),src
 32421:     stb,ma  t1,1(dstspc,dst)
 325        b       .Lunaligned_copy
 326        ldo     -1(len),len
 327
 328        ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
 329        ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
 330
 331.Lcopy_dstaligned:
 332
 333        /* store src, dst and len in safe place */
 334        copy    src,save_src
 335        copy    dst,save_dst
 336        copy    len,save_len
 337
 338        /* len now needs give number of words to copy */
 339        SHRREG  len,2,len
 340
 341        /*
 342         * Copy from a not-aligned src to an aligned dst using shifts.
 343         * Handles 4 words per loop.
 344         */
 345
 346        depw,z src,28,2,t0
 347        subi 32,t0,t0
 348        mtsar t0
 349        extru len,31,2,t0
 350        cmpib,= 2,t0,.Lcase2
 351        /* Make src aligned by rounding it down.  */
 352        depi 0,31,2,src
 353
 354        cmpiclr,<> 3,t0,%r0
 355        b,n .Lcase3
 356        cmpiclr,<> 1,t0,%r0
 357        b,n .Lcase1
 358.Lcase0:
 359        cmpb,COND(=) %r0,len,.Lcda_finish
 360        nop
 361
 3621:      ldw,ma 4(srcspc,src), a3
 363        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 3641:      ldw,ma 4(srcspc,src), a0
 365        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 366        b,n .Ldo3
 367.Lcase1:
 3681:      ldw,ma 4(srcspc,src), a2
 369        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 3701:      ldw,ma 4(srcspc,src), a3
 371        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 372        ldo -1(len),len
 373        cmpb,COND(=),n %r0,len,.Ldo0
 374.Ldo4:
 3751:      ldw,ma 4(srcspc,src), a0
 376        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 377        shrpw a2, a3, %sar, t0
 3781:      stw,ma t0, 4(dstspc,dst)
 379        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
 380.Ldo3:
 3811:      ldw,ma 4(srcspc,src), a1
 382        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 383        shrpw a3, a0, %sar, t0
 3841:      stw,ma t0, 4(dstspc,dst)
 385        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
 386.Ldo2:
 3871:      ldw,ma 4(srcspc,src), a2
 388        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 389        shrpw a0, a1, %sar, t0
 3901:      stw,ma t0, 4(dstspc,dst)
 391        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
 392.Ldo1:
 3931:      ldw,ma 4(srcspc,src), a3
 394        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 395        shrpw a1, a2, %sar, t0
 3961:      stw,ma t0, 4(dstspc,dst)
 397        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
 398        ldo -4(len),len
 399        cmpb,COND(<>) %r0,len,.Ldo4
 400        nop
 401.Ldo0:
 402        shrpw a2, a3, %sar, t0
 4031:      stw,ma t0, 4(dstspc,dst)
 404        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
 405
 406.Lcda_rdfault:
 407.Lcda_finish:
 408        /* calculate new src, dst and len and jump to byte-copy loop */
 409        sub     dst,save_dst,t0
 410        add     save_src,t0,src
 411        b       .Lbyte_loop
 412        sub     save_len,t0,len
 413
 414.Lcase3:
 4151:      ldw,ma 4(srcspc,src), a0
 416        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 4171:      ldw,ma 4(srcspc,src), a1
 418        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 419        b .Ldo2
 420        ldo 1(len),len
 421.Lcase2:
 4221:      ldw,ma 4(srcspc,src), a1
 423        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 4241:      ldw,ma 4(srcspc,src), a2
 425        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 426        b .Ldo1
 427        ldo 2(len),len
 428
 429
 430        /* fault exception fixup handlers: */
 431#ifdef CONFIG_64BIT
 432.Lcopy16_fault:
 433        b       .Lcopy_done
 43410:     std,ma  t1,8(dstspc,dst)
 435        ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
 436#endif
 437
 438.Lcopy8_fault:
 439        b       .Lcopy_done
 44010:     stw,ma  t1,4(dstspc,dst)
 441        ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
 442
 443        .exit
 444ENDPROC_CFI(pa_memcpy)
 445        .procend
 446
 447        .end
 448