linux/arch/xtensa/lib/usercopy.S
<<
>>
Prefs
   1/*
   2 *  arch/xtensa/lib/usercopy.S
   3 *
   4 *  Copy to/from user space (derived from arch/xtensa/lib/hal/memcopy.S)
   5 *
   6 *  DO NOT COMBINE this function with <arch/xtensa/lib/hal/memcopy.S>.
   7 *  It needs to remain separate and distinct.  The hal files are part
   8 *  of the Xtensa link-time HAL, and those files may differ per
   9 *  processor configuration.  Patching the kernel for another
  10 *  processor configuration includes replacing the hal files, and we
  11 *  could lose the special functionality for accessing user-space
  12 *  memory during such a patch.  We sacrifice a little code space here
  13 *  in favor to simplify code maintenance.
  14 *
  15 *  This file is subject to the terms and conditions of the GNU General
  16 *  Public License.  See the file "COPYING" in the main directory of
  17 *  this archive for more details.
  18 *
  19 *  Copyright (C) 2002 Tensilica Inc.
  20 */
  21
  22
  23/*
  24 * size_t __xtensa_copy_user (void *dst, const void *src, size_t len);
  25 *
  26 * The returned value is the number of bytes not copied.  Implies zero
  27 * is success.
  28 *
  29 * The general case algorithm is as follows:
  30 *   If the destination and source are both aligned,
  31 *     do 16B chunks with a loop, and then finish up with
  32 *     8B, 4B, 2B, and 1B copies conditional on the length.
  33 *   If destination is aligned and source unaligned,
  34 *     do the same, but use SRC to align the source data.
  35 *   If destination is unaligned, align it by conditionally
  36 *     copying 1B and 2B and then retest.
  37 *   This code tries to use fall-through braches for the common
  38 *     case of aligned destinations (except for the branches to
  39 *     the alignment label).
  40 *
  41 * Register use:
  42 *      a0/ return address
  43 *      a1/ stack pointer
  44 *      a2/ return value
  45 *      a3/ src
  46 *      a4/ length
  47 *      a5/ dst
  48 *      a6/ tmp
  49 *      a7/ tmp
  50 *      a8/ tmp
  51 *      a9/ tmp
  52 *      a10/ tmp
  53 *      a11/ original length
  54 */
  55
  56#include <variant/core.h>
  57
  58#ifdef __XTENSA_EB__
  59#define ALIGN(R, W0, W1) src    R, W0, W1
  60#define SSA8(R) ssa8b R
  61#else
  62#define ALIGN(R, W0, W1) src    R, W1, W0
  63#define SSA8(R) ssa8l R
  64#endif
  65
  66/* Load or store instructions that may cause exceptions use the EX macro. */
  67
  68#define EX(insn,reg1,reg2,offset,handler)       \
  699:      insn    reg1, reg2, offset;             \
  70        .section __ex_table, "a";               \
  71        .word   9b, handler;                    \
  72        .previous
  73
  74
  75        .text
  76        .align  4
  77        .global __xtensa_copy_user
  78        .type   __xtensa_copy_user,@function
  79__xtensa_copy_user:
  80        entry   sp, 16          # minimal stack frame
  81        # a2/ dst, a3/ src, a4/ len
  82        mov     a5, a2          # copy dst so that a2 is return value
  83        mov     a11, a4         # preserve original len for error case
  84.Lcommon:
  85        bbsi.l  a2, 0, .Ldst1mod2 # if dst is 1 mod 2
  86        bbsi.l  a2, 1, .Ldst2mod4 # if dst is 2 mod 4
  87.Ldstaligned:   # return here from .Ldstunaligned when dst is aligned
  88        srli    a7, a4, 4       # number of loop iterations with 16B
  89                                # per iteration
  90        movi    a8, 3             # if source is also aligned,
  91        bnone   a3, a8, .Laligned # then use word copy
  92        SSA8(   a3)             # set shift amount from byte offset
  93        bnez    a4, .Lsrcunaligned
  94        movi    a2, 0           # return success for len==0
  95        retw
  96
  97/*
  98 * Destination is unaligned
  99 */
 100
 101.Ldst1mod2:     # dst is only byte aligned
 102        bltui   a4, 7, .Lbytecopy       # do short copies byte by byte
 103
 104        # copy 1 byte
 105        EX(l8ui, a6, a3, 0, l_fixup)
 106        addi    a3, a3,  1
 107        EX(s8i, a6, a5,  0, s_fixup)
 108        addi    a5, a5,  1
 109        addi    a4, a4, -1
 110        bbci.l  a5, 1, .Ldstaligned     # if dst is now aligned, then
 111                                        # return to main algorithm
 112.Ldst2mod4:     # dst 16-bit aligned
 113        # copy 2 bytes
 114        bltui   a4, 6, .Lbytecopy       # do short copies byte by byte
 115        EX(l8ui, a6, a3, 0, l_fixup)
 116        EX(l8ui, a7, a3, 1, l_fixup)
 117        addi    a3, a3,  2
 118        EX(s8i, a6, a5,  0, s_fixup)
 119        EX(s8i, a7, a5,  1, s_fixup)
 120        addi    a5, a5,  2
 121        addi    a4, a4, -2
 122        j       .Ldstaligned    # dst is now aligned, return to main algorithm
 123
 124/*
 125 * Byte by byte copy
 126 */
 127        .align  4
 128        .byte   0               # 1 mod 4 alignment for LOOPNEZ
 129                                # (0 mod 4 alignment for LBEG)
 130.Lbytecopy:
 131#if XCHAL_HAVE_LOOPS
 132        loopnez a4, .Lbytecopydone
 133#else /* !XCHAL_HAVE_LOOPS */
 134        beqz    a4, .Lbytecopydone
 135        add     a7, a3, a4      # a7 = end address for source
 136#endif /* !XCHAL_HAVE_LOOPS */
 137.Lnextbyte:
 138        EX(l8ui, a6, a3, 0, l_fixup)
 139        addi    a3, a3, 1
 140        EX(s8i, a6, a5, 0, s_fixup)
 141        addi    a5, a5, 1
 142#if !XCHAL_HAVE_LOOPS
 143        blt     a3, a7, .Lnextbyte
 144#endif /* !XCHAL_HAVE_LOOPS */
 145.Lbytecopydone:
 146        movi    a2, 0           # return success for len bytes copied
 147        retw
 148
 149/*
 150 * Destination and source are word-aligned.
 151 */
 152        # copy 16 bytes per iteration for word-aligned dst and word-aligned src
 153        .align  4               # 1 mod 4 alignment for LOOPNEZ
 154        .byte   0               # (0 mod 4 alignment for LBEG)
 155.Laligned:
 156#if XCHAL_HAVE_LOOPS
 157        loopnez a7, .Loop1done
 158#else /* !XCHAL_HAVE_LOOPS */
 159        beqz    a7, .Loop1done
 160        slli    a8, a7, 4
 161        add     a8, a8, a3      # a8 = end of last 16B source chunk
 162#endif /* !XCHAL_HAVE_LOOPS */
 163.Loop1:
 164        EX(l32i, a6, a3,  0, l_fixup)
 165        EX(l32i, a7, a3,  4, l_fixup)
 166        EX(s32i, a6, a5,  0, s_fixup)
 167        EX(l32i, a6, a3,  8, l_fixup)
 168        EX(s32i, a7, a5,  4, s_fixup)
 169        EX(l32i, a7, a3, 12, l_fixup)
 170        EX(s32i, a6, a5,  8, s_fixup)
 171        addi    a3, a3, 16
 172        EX(s32i, a7, a5, 12, s_fixup)
 173        addi    a5, a5, 16
 174#if !XCHAL_HAVE_LOOPS
 175        blt     a3, a8, .Loop1
 176#endif /* !XCHAL_HAVE_LOOPS */
 177.Loop1done:
 178        bbci.l  a4, 3, .L2
 179        # copy 8 bytes
 180        EX(l32i, a6, a3,  0, l_fixup)
 181        EX(l32i, a7, a3,  4, l_fixup)
 182        addi    a3, a3,  8
 183        EX(s32i, a6, a5,  0, s_fixup)
 184        EX(s32i, a7, a5,  4, s_fixup)
 185        addi    a5, a5,  8
 186.L2:
 187        bbci.l  a4, 2, .L3
 188        # copy 4 bytes
 189        EX(l32i, a6, a3,  0, l_fixup)
 190        addi    a3, a3,  4
 191        EX(s32i, a6, a5,  0, s_fixup)
 192        addi    a5, a5,  4
 193.L3:
 194        bbci.l  a4, 1, .L4
 195        # copy 2 bytes
 196        EX(l16ui, a6, a3,  0, l_fixup)
 197        addi    a3, a3,  2
 198        EX(s16i,  a6, a5,  0, s_fixup)
 199        addi    a5, a5,  2
 200.L4:
 201        bbci.l  a4, 0, .L5
 202        # copy 1 byte
 203        EX(l8ui, a6, a3,  0, l_fixup)
 204        EX(s8i,  a6, a5,  0, s_fixup)
 205.L5:
 206        movi    a2, 0           # return success for len bytes copied
 207        retw
 208
 209/*
 210 * Destination is aligned, Source is unaligned
 211 */
 212
 213        .align  4
 214        .byte   0               # 1 mod 4 alignement for LOOPNEZ
 215                                # (0 mod 4 alignment for LBEG)
 216.Lsrcunaligned:
 217        # copy 16 bytes per iteration for word-aligned dst and unaligned src
 218        and     a10, a3, a8     # save unalignment offset for below
 219        sub     a3, a3, a10     # align a3 (to avoid sim warnings only; not needed for hardware)
 220        EX(l32i, a6, a3, 0, l_fixup)    # load first word
 221#if XCHAL_HAVE_LOOPS
 222        loopnez a7, .Loop2done
 223#else /* !XCHAL_HAVE_LOOPS */
 224        beqz    a7, .Loop2done
 225        slli    a10, a7, 4
 226        add     a10, a10, a3    # a10 = end of last 16B source chunk
 227#endif /* !XCHAL_HAVE_LOOPS */
 228.Loop2:
 229        EX(l32i, a7, a3,  4, l_fixup)
 230        EX(l32i, a8, a3,  8, l_fixup)
 231        ALIGN(  a6, a6, a7)
 232        EX(s32i, a6, a5,  0, s_fixup)
 233        EX(l32i, a9, a3, 12, l_fixup)
 234        ALIGN(  a7, a7, a8)
 235        EX(s32i, a7, a5,  4, s_fixup)
 236        EX(l32i, a6, a3, 16, l_fixup)
 237        ALIGN(  a8, a8, a9)
 238        EX(s32i, a8, a5,  8, s_fixup)
 239        addi    a3, a3, 16
 240        ALIGN(  a9, a9, a6)
 241        EX(s32i, a9, a5, 12, s_fixup)
 242        addi    a5, a5, 16
 243#if !XCHAL_HAVE_LOOPS
 244        blt     a3, a10, .Loop2
 245#endif /* !XCHAL_HAVE_LOOPS */
 246.Loop2done:
 247        bbci.l  a4, 3, .L12
 248        # copy 8 bytes
 249        EX(l32i, a7, a3,  4, l_fixup)
 250        EX(l32i, a8, a3,  8, l_fixup)
 251        ALIGN(  a6, a6, a7)
 252        EX(s32i, a6, a5,  0, s_fixup)
 253        addi    a3, a3,  8
 254        ALIGN(  a7, a7, a8)
 255        EX(s32i, a7, a5,  4, s_fixup)
 256        addi    a5, a5,  8
 257        mov     a6, a8
 258.L12:
 259        bbci.l  a4, 2, .L13
 260        # copy 4 bytes
 261        EX(l32i, a7, a3,  4, l_fixup)
 262        addi    a3, a3,  4
 263        ALIGN(  a6, a6, a7)
 264        EX(s32i, a6, a5,  0, s_fixup)
 265        addi    a5, a5,  4
 266        mov     a6, a7
 267.L13:
 268        add     a3, a3, a10     # readjust a3 with correct misalignment
 269        bbci.l  a4, 1, .L14
 270        # copy 2 bytes
 271        EX(l8ui, a6, a3,  0, l_fixup)
 272        EX(l8ui, a7, a3,  1, l_fixup)
 273        addi    a3, a3,  2
 274        EX(s8i, a6, a5,  0, s_fixup)
 275        EX(s8i, a7, a5,  1, s_fixup)
 276        addi    a5, a5,  2
 277.L14:
 278        bbci.l  a4, 0, .L15
 279        # copy 1 byte
 280        EX(l8ui, a6, a3,  0, l_fixup)
 281        EX(s8i,  a6, a5,  0, s_fixup)
 282.L15:
 283        movi    a2, 0           # return success for len bytes copied
 284        retw
 285
 286
 287        .section .fixup, "ax"
 288        .align  4
 289
 290/* a2 = original dst; a5 = current dst; a11= original len
 291 * bytes_copied = a5 - a2
 292 * retval = bytes_not_copied = original len - bytes_copied
 293 * retval = a11 - (a5 - a2)
 294 *
 295 * Clearing the remaining pieces of kernel memory plugs security
 296 * holes.  This functionality is the equivalent of the *_zeroing
 297 * functions that some architectures provide.
 298 */
 299
 300.Lmemset:
 301        .word   memset
 302
 303s_fixup:
 304        sub     a2, a5, a2      /* a2 <-- bytes copied */
 305        sub     a2, a11, a2     /* a2 <-- bytes not copied */
 306        retw
 307
 308l_fixup:
 309        sub     a2, a5, a2      /* a2 <-- bytes copied */
 310        sub     a2, a11, a2     /* a2 <-- bytes not copied == return value */
 311
 312        /* void *memset(void *s, int c, size_t n); */
 313        mov     a6, a5          /* s */
 314        movi    a7, 0           /* c */
 315        mov     a8, a2          /* n */
 316        l32r    a4, .Lmemset
 317        callx4  a4
 318        /* Ignore memset return value in a6. */
 319        /* a2 still contains bytes not copied. */
 320        retw
 321
 322