linux/arch/parisc/lib/lusercopy.S
<<
>>
Prefs
   1/*
   2 *    User Space Access Routines
   3 *
   4 *    Copyright (C) 2000-2002 Hewlett-Packard (John Marvin)
   5 *    Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org>
   6 *    Copyright (C) 2001 Matthieu Delahaye <delahaym at esiee.fr>
   7 *    Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org>
   8 *    Copyright (C) 2017 Helge Deller <deller@gmx.de>
   9 *    Copyright (C) 2017 John David Anglin <dave.anglin@bell.net>
  10 *
  11 *
  12 *    This program is free software; you can redistribute it and/or modify
  13 *    it under the terms of the GNU General Public License as published by
  14 *    the Free Software Foundation; either version 2, or (at your option)
  15 *    any later version.
  16 *
  17 *    This program is distributed in the hope that it will be useful,
  18 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
  19 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20 *    GNU General Public License for more details.
  21 *
  22 *    You should have received a copy of the GNU General Public License
  23 *    along with this program; if not, write to the Free Software
  24 *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  25 */
  26
  27/*
  28 * These routines still have plenty of room for optimization
  29 * (word & doubleword load/store, dual issue, store hints, etc.).
  30 */
  31
  32/*
  33 * The following routines assume that space register 3 (sr3) contains
  34 * the space id associated with the current users address space.
  35 */
  36
  37
  38        .text
  39        
  40#include <asm/assembly.h>
  41#include <asm/errno.h>
  42#include <linux/linkage.h>
  43
  44        /*
  45         * get_sr gets the appropriate space value into
  46         * sr1 for kernel/user space access, depending
  47         * on the flag stored in the task structure.
  48         */
  49
  50        .macro  get_sr
  51        mfctl       %cr30,%r1
  52        ldw         TI_SEGMENT(%r1),%r22
  53        mfsp        %sr3,%r1
  54        or,<>       %r22,%r0,%r0
  55        copy        %r0,%r1
  56        mtsp        %r1,%sr1
  57        .endm
  58
  59        .macro fixup_branch lbl
  60        ldil        L%\lbl, %r1
  61        ldo         R%\lbl(%r1), %r1
  62        bv          %r0(%r1)
  63        .endm
  64
  65        /*
  66         * unsigned long lclear_user(void *to, unsigned long n)
  67         *
  68         * Returns 0 for success.
  69         * otherwise, returns number of bytes not transferred.
  70         */
  71
  72ENTRY_CFI(lclear_user)
  73        .proc
  74        .callinfo NO_CALLS
  75        .entry
  76        comib,=,n   0,%r25,$lclu_done
  77        get_sr
  78$lclu_loop:
  79        addib,<>    -1,%r25,$lclu_loop
  801:      stbs,ma     %r0,1(%sr1,%r26)
  81
  82$lclu_done:
  83        bv          %r0(%r2)
  84        copy        %r25,%r28
  85        .exit
  86ENDPROC_CFI(lclear_user)
  87
  88        .section .fixup,"ax"
  892:      fixup_branch $lclu_done
  90        ldo        1(%r25),%r25
  91        .previous
  92
  93        ASM_EXCEPTIONTABLE_ENTRY(1b,2b)
  94
  95        .procend
  96
  97        /*
  98         * long lstrnlen_user(char *s, long n)
  99         *
 100         * Returns 0 if exception before zero byte or reaching N,
 101         *         N+1 if N would be exceeded,
 102         *         else strlen + 1 (i.e. includes zero byte).
 103         */
 104
 105ENTRY_CFI(lstrnlen_user)
 106        .proc
 107        .callinfo NO_CALLS
 108        .entry
 109        comib,=     0,%r25,$lslen_nzero
 110        copy        %r26,%r24
 111        get_sr
 1121:      ldbs,ma     1(%sr1,%r26),%r1
 113$lslen_loop:
 114        comib,=,n   0,%r1,$lslen_done
 115        addib,<>    -1,%r25,$lslen_loop
 1162:      ldbs,ma     1(%sr1,%r26),%r1
 117$lslen_done:
 118        bv          %r0(%r2)
 119        sub         %r26,%r24,%r28
 120        .exit
 121
 122$lslen_nzero:
 123        b           $lslen_done
 124        ldo         1(%r26),%r26 /* special case for N == 0 */
 125ENDPROC_CFI(lstrnlen_user)
 126
 127        .section .fixup,"ax"
 1283:      fixup_branch $lslen_done
 129        copy        %r24,%r26    /* reset r26 so 0 is returned on fault */
 130        .previous
 131
 132        ASM_EXCEPTIONTABLE_ENTRY(1b,3b)
 133        ASM_EXCEPTIONTABLE_ENTRY(2b,3b)
 134
 135        .procend
 136
 137
 138
 139/*
 140 * unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
 141 *
 142 * Inputs:
 143 * - sr1 already contains space of source region
 144 * - sr2 already contains space of destination region
 145 *
 146 * Returns:
 147 * - number of bytes that could not be copied.
 148 *   On success, this will be zero.
 149 *
 150 * This code is based on a C-implementation of a copy routine written by
 151 * Randolph Chung, which in turn was derived from the glibc.
 152 *
 153 * Several strategies are tried to try to get the best performance for various
 154 * conditions. In the optimal case, we copy by loops that copy 32- or 16-bytes
 155 * at a time using general registers.  Unaligned copies are handled either by
 156 * aligning the destination and then using shift-and-write method, or in a few
 157 * cases by falling back to a byte-at-a-time copy.
 158 *
 159 * Testing with various alignments and buffer sizes shows that this code is
 160 * often >10x faster than a simple byte-at-a-time copy, even for strangely
 161 * aligned operands. It is interesting to note that the glibc version of memcpy
 162 * (written in C) is actually quite fast already. This routine is able to beat
 163 * it by 30-40% for aligned copies because of the loop unrolling, but in some
 164 * cases the glibc version is still slightly faster. This lends more
 165 * credibility that gcc can generate very good code as long as we are careful.
 166 *
 167 * Possible optimizations:
 168 * - add cache prefetching
 169 * - try not to use the post-increment address modifiers; they may create
 170 *   additional interlocks. Assumption is that those were only efficient on old
 171 *   machines (pre PA8000 processors)
 172 */
 173
 174        dst = arg0
 175        src = arg1
 176        len = arg2
 177        end = arg3
 178        t1  = r19
 179        t2  = r20
 180        t3  = r21
 181        t4  = r22
 182        srcspc = sr1
 183        dstspc = sr2
 184
 185        t0 = r1
 186        a1 = t1
 187        a2 = t2
 188        a3 = t3
 189        a0 = t4
 190
 191        save_src = ret0
 192        save_dst = ret1
 193        save_len = r31
 194
 195ENTRY_CFI(pa_memcpy)
 196        .proc
 197        .callinfo NO_CALLS
 198        .entry
 199
 200        /* Last destination address */
 201        add     dst,len,end
 202
 203        /* short copy with less than 16 bytes? */
 204        cmpib,COND(>>=),n 15,len,.Lbyte_loop
 205
 206        /* same alignment? */
 207        xor     src,dst,t0
 208        extru   t0,31,2,t1
 209        cmpib,<>,n  0,t1,.Lunaligned_copy
 210
 211#ifdef CONFIG_64BIT
 212        /* only do 64-bit copies if we can get aligned. */
 213        extru   t0,31,3,t1
 214        cmpib,<>,n  0,t1,.Lalign_loop32
 215
 216        /* loop until we are 64-bit aligned */
 217.Lalign_loop64:
 218        extru   dst,31,3,t1
 219        cmpib,=,n       0,t1,.Lcopy_loop_16_start
 22020:     ldb,ma  1(srcspc,src),t1
 22121:     stb,ma  t1,1(dstspc,dst)
 222        b       .Lalign_loop64
 223        ldo     -1(len),len
 224
 225        ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
 226        ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
 227
 228.Lcopy_loop_16_start:
 229        ldi     31,t0
 230.Lcopy_loop_16:
 231        cmpb,COND(>>=),n t0,len,.Lword_loop
 232
 23310:     ldd     0(srcspc,src),t1
 23411:     ldd     8(srcspc,src),t2
 235        ldo     16(src),src
 23612:     std,ma  t1,8(dstspc,dst)
 23713:     std,ma  t2,8(dstspc,dst)
 23814:     ldd     0(srcspc,src),t1
 23915:     ldd     8(srcspc,src),t2
 240        ldo     16(src),src
 24116:     std,ma  t1,8(dstspc,dst)
 24217:     std,ma  t2,8(dstspc,dst)
 243
 244        ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
 245        ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy16_fault)
 246        ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
 247        ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
 248        ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
 249        ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy16_fault)
 250        ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
 251        ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
 252
 253        b       .Lcopy_loop_16
 254        ldo     -32(len),len
 255
 256.Lword_loop:
 257        cmpib,COND(>>=),n 3,len,.Lbyte_loop
 25820:     ldw,ma  4(srcspc,src),t1
 25921:     stw,ma  t1,4(dstspc,dst)
 260        b       .Lword_loop
 261        ldo     -4(len),len
 262
 263        ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
 264        ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
 265
 266#endif /* CONFIG_64BIT */
 267
 268        /* loop until we are 32-bit aligned */
 269.Lalign_loop32:
 270        extru   dst,31,2,t1
 271        cmpib,=,n       0,t1,.Lcopy_loop_8
 27220:     ldb,ma  1(srcspc,src),t1
 27321:     stb,ma  t1,1(dstspc,dst)
 274        b       .Lalign_loop32
 275        ldo     -1(len),len
 276
 277        ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
 278        ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
 279
 280
 281.Lcopy_loop_8:
 282        cmpib,COND(>>=),n 15,len,.Lbyte_loop
 283
 28410:     ldw     0(srcspc,src),t1
 28511:     ldw     4(srcspc,src),t2
 28612:     stw,ma  t1,4(dstspc,dst)
 28713:     stw,ma  t2,4(dstspc,dst)
 28814:     ldw     8(srcspc,src),t1
 28915:     ldw     12(srcspc,src),t2
 290        ldo     16(src),src
 29116:     stw,ma  t1,4(dstspc,dst)
 29217:     stw,ma  t2,4(dstspc,dst)
 293
 294        ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
 295        ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy8_fault)
 296        ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
 297        ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
 298        ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
 299        ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy8_fault)
 300        ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
 301        ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
 302
 303        b       .Lcopy_loop_8
 304        ldo     -16(len),len
 305
 306.Lbyte_loop:
 307        cmpclr,COND(<>) len,%r0,%r0
 308        b,n     .Lcopy_done
 30920:     ldb     0(srcspc,src),t1
 310        ldo     1(src),src
 31121:     stb,ma  t1,1(dstspc,dst)
 312        b       .Lbyte_loop
 313        ldo     -1(len),len
 314
 315        ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
 316        ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
 317
 318.Lcopy_done:
 319        bv      %r0(%r2)
 320        sub     end,dst,ret0
 321
 322
 323        /* src and dst are not aligned the same way. */
 324        /* need to go the hard way */
 325.Lunaligned_copy:
 326        /* align until dst is 32bit-word-aligned */
 327        extru   dst,31,2,t1
 328        cmpib,=,n       0,t1,.Lcopy_dstaligned
 32920:     ldb     0(srcspc,src),t1
 330        ldo     1(src),src
 33121:     stb,ma  t1,1(dstspc,dst)
 332        b       .Lunaligned_copy
 333        ldo     -1(len),len
 334
 335        ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
 336        ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
 337
 338.Lcopy_dstaligned:
 339
 340        /* store src, dst and len in safe place */
 341        copy    src,save_src
 342        copy    dst,save_dst
 343        copy    len,save_len
 344
 345        /* len now needs give number of words to copy */
 346        SHRREG  len,2,len
 347
 348        /*
 349         * Copy from a not-aligned src to an aligned dst using shifts.
 350         * Handles 4 words per loop.
 351         */
 352
 353        depw,z src,28,2,t0
 354        subi 32,t0,t0
 355        mtsar t0
 356        extru len,31,2,t0
 357        cmpib,= 2,t0,.Lcase2
 358        /* Make src aligned by rounding it down.  */
 359        depi 0,31,2,src
 360
 361        cmpiclr,<> 3,t0,%r0
 362        b,n .Lcase3
 363        cmpiclr,<> 1,t0,%r0
 364        b,n .Lcase1
 365.Lcase0:
 366        cmpb,COND(=) %r0,len,.Lcda_finish
 367        nop
 368
 3691:      ldw,ma 4(srcspc,src), a3
 370        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 3711:      ldw,ma 4(srcspc,src), a0
 372        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 373        b,n .Ldo3
 374.Lcase1:
 3751:      ldw,ma 4(srcspc,src), a2
 376        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 3771:      ldw,ma 4(srcspc,src), a3
 378        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 379        ldo -1(len),len
 380        cmpb,COND(=),n %r0,len,.Ldo0
 381.Ldo4:
 3821:      ldw,ma 4(srcspc,src), a0
 383        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 384        shrpw a2, a3, %sar, t0
 3851:      stw,ma t0, 4(dstspc,dst)
 386        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
 387.Ldo3:
 3881:      ldw,ma 4(srcspc,src), a1
 389        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 390        shrpw a3, a0, %sar, t0
 3911:      stw,ma t0, 4(dstspc,dst)
 392        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
 393.Ldo2:
 3941:      ldw,ma 4(srcspc,src), a2
 395        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 396        shrpw a0, a1, %sar, t0
 3971:      stw,ma t0, 4(dstspc,dst)
 398        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
 399.Ldo1:
 4001:      ldw,ma 4(srcspc,src), a3
 401        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 402        shrpw a1, a2, %sar, t0
 4031:      stw,ma t0, 4(dstspc,dst)
 404        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
 405        ldo -4(len),len
 406        cmpb,COND(<>) %r0,len,.Ldo4
 407        nop
 408.Ldo0:
 409        shrpw a2, a3, %sar, t0
 4101:      stw,ma t0, 4(dstspc,dst)
 411        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
 412
 413.Lcda_rdfault:
 414.Lcda_finish:
 415        /* calculate new src, dst and len and jump to byte-copy loop */
 416        sub     dst,save_dst,t0
 417        add     save_src,t0,src
 418        b       .Lbyte_loop
 419        sub     save_len,t0,len
 420
 421.Lcase3:
 4221:      ldw,ma 4(srcspc,src), a0
 423        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 4241:      ldw,ma 4(srcspc,src), a1
 425        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 426        b .Ldo2
 427        ldo 1(len),len
 428.Lcase2:
 4291:      ldw,ma 4(srcspc,src), a1
 430        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 4311:      ldw,ma 4(srcspc,src), a2
 432        ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 433        b .Ldo1
 434        ldo 2(len),len
 435
 436
 437        /* fault exception fixup handlers: */
 438#ifdef CONFIG_64BIT
 439.Lcopy16_fault:
 440        b       .Lcopy_done
 44110:     std,ma  t1,8(dstspc,dst)
 442        ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
 443#endif
 444
 445.Lcopy8_fault:
 446        b       .Lcopy_done
 44710:     stw,ma  t1,4(dstspc,dst)
 448        ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
 449
 450        .exit
 451ENDPROC_CFI(pa_memcpy)
 452        .procend
 453
 454        .end
 455