linux/arch/mips/lib/memcpy.S
<<
>>
Prefs
   1/*
   2 * This file is subject to the terms and conditions of the GNU General Public
   3 * License.  See the file "COPYING" in the main directory of this archive
   4 * for more details.
   5 *
   6 * Unified implementation of memcpy, memmove and the __copy_user backend.
   7 *
   8 * Copyright (C) 1998, 99, 2000, 01, 2002 Ralf Baechle (ralf@gnu.org)
   9 * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc.
  10 * Copyright (C) 2002 Broadcom, Inc.
  11 *   memcpy/copy_user author: Mark Vandevoorde
  12 * Copyright (C) 2007  Maciej W. Rozycki
  13 * Copyright (C) 2014 Imagination Technologies Ltd.
  14 *
  15 * Mnemonic names for arguments to memcpy/__copy_user
  16 */
  17
  18/*
  19 * Hack to resolve longstanding prefetch issue
  20 *
  21 * Prefetching may be fatal on some systems if we're prefetching beyond the
  22 * end of memory on some systems.  It's also a seriously bad idea on non
  23 * dma-coherent systems.
  24 */
  25#ifdef CONFIG_DMA_NONCOHERENT
  26#undef CONFIG_CPU_HAS_PREFETCH
  27#endif
  28#ifdef CONFIG_MIPS_MALTA
  29#undef CONFIG_CPU_HAS_PREFETCH
  30#endif
  31#ifdef CONFIG_CPU_MIPSR6
  32#undef CONFIG_CPU_HAS_PREFETCH
  33#endif
  34
  35#include <asm/asm.h>
  36#include <asm/asm-offsets.h>
  37#include <asm/export.h>
  38#include <asm/regdef.h>
  39
  40#define dst a0
  41#define src a1
  42#define len a2
  43
  44/*
  45 * Spec
  46 *
  47 * memcpy copies len bytes from src to dst and sets v0 to dst.
  48 * It assumes that
  49 *   - src and dst don't overlap
  50 *   - src is readable
  51 *   - dst is writable
  52 * memcpy uses the standard calling convention
  53 *
  54 * __copy_user copies up to len bytes from src to dst and sets a2 (len) to
  55 * the number of uncopied bytes due to an exception caused by a read or write.
  56 * __copy_user assumes that src and dst don't overlap, and that the call is
  57 * implementing one of the following:
  58 *   copy_to_user
  59 *     - src is readable  (no exceptions when reading src)
  60 *   copy_from_user
  61 *     - dst is writable  (no exceptions when writing dst)
  62 * __copy_user uses a non-standard calling convention; see
  63 * include/asm-mips/uaccess.h
  64 *
  65 * When an exception happens on a load, the handler must
  66 # ensure that all of the destination buffer is overwritten to prevent
  67 * leaking information to user mode programs.
  68 */
  69
  70/*
  71 * Implementation
  72 */
  73
  74/*
  75 * The exception handler for loads requires that:
  76 *  1- AT contain the address of the byte just past the end of the source
  77 *     of the copy,
  78 *  2- src_entry <= src < AT, and
  79 *  3- (dst - src) == (dst_entry - src_entry),
  80 * The _entry suffix denotes values when __copy_user was called.
  81 *
  82 * (1) is set up up by uaccess.h and maintained by not writing AT in copy_user
  83 * (2) is met by incrementing src by the number of bytes copied
  84 * (3) is met by not doing loads between a pair of increments of dst and src
  85 *
  86 * The exception handlers for stores adjust len (if necessary) and return.
  87 * These handlers do not need to overwrite any data.
  88 *
  89 * For __rmemcpy and memmove an exception is always a kernel bug, therefore
  90 * they're not protected.
  91 */
  92
  93/* Instruction type */
  94#define LD_INSN 1
  95#define ST_INSN 2
  96/* Pretech type */
  97#define SRC_PREFETCH 1
  98#define DST_PREFETCH 2
  99#define LEGACY_MODE 1
 100#define EVA_MODE    2
 101#define USEROP   1
 102#define KERNELOP 2
 103
 104/*
 105 * Wrapper to add an entry in the exception table
 106 * in case the insn causes a memory exception.
 107 * Arguments:
 108 * insn    : Load/store instruction
 109 * type    : Instruction type
 110 * reg     : Register
 111 * addr    : Address
 112 * handler : Exception handler
 113 */
 114
 115#define EXC(insn, type, reg, addr, handler)                     \
 116        .if \mode == LEGACY_MODE;                               \
 1179:              insn reg, addr;                                 \
 118                .section __ex_table,"a";                        \
 119                PTR     9b, handler;                            \
 120                .previous;                                      \
 121        /* This is assembled in EVA mode */                     \
 122        .else;                                                  \
 123                /* If loading from user or storing to user */   \
 124                .if ((\from == USEROP) && (type == LD_INSN)) || \
 125                    ((\to == USEROP) && (type == ST_INSN));     \
 1269:                      __BUILD_EVA_INSN(insn##e, reg, addr);   \
 127                        .section __ex_table,"a";                \
 128                        PTR     9b, handler;                    \
 129                        .previous;                              \
 130                .else;                                          \
 131                        /*                                      \
 132                         *  Still in EVA, but no need for       \
 133                         * exception handler or EVA insn        \
 134                         */                                     \
 135                        insn reg, addr;                         \
 136                .endif;                                         \
 137        .endif
 138
 139/*
 140 * Only on the 64-bit kernel we can made use of 64-bit registers.
 141 */
 142#ifdef CONFIG_64BIT
 143#define USE_DOUBLE
 144#endif
 145
 146#ifdef USE_DOUBLE
 147
 148#define LOADK ld /* No exception */
 149#define LOAD(reg, addr, handler)        EXC(ld, LD_INSN, reg, addr, handler)
 150#define LOADL(reg, addr, handler)       EXC(ldl, LD_INSN, reg, addr, handler)
 151#define LOADR(reg, addr, handler)       EXC(ldr, LD_INSN, reg, addr, handler)
 152#define STOREL(reg, addr, handler)      EXC(sdl, ST_INSN, reg, addr, handler)
 153#define STORER(reg, addr, handler)      EXC(sdr, ST_INSN, reg, addr, handler)
 154#define STORE(reg, addr, handler)       EXC(sd, ST_INSN, reg, addr, handler)
 155#define ADD    daddu
 156#define SUB    dsubu
 157#define SRL    dsrl
 158#define SRA    dsra
 159#define SLL    dsll
 160#define SLLV   dsllv
 161#define SRLV   dsrlv
 162#define NBYTES 8
 163#define LOG_NBYTES 3
 164
 165/*
 166 * As we are sharing code base with the mips32 tree (which use the o32 ABI
 167 * register definitions). We need to redefine the register definitions from
 168 * the n64 ABI register naming to the o32 ABI register naming.
 169 */
 170#undef t0
 171#undef t1
 172#undef t2
 173#undef t3
 174#define t0      $8
 175#define t1      $9
 176#define t2      $10
 177#define t3      $11
 178#define t4      $12
 179#define t5      $13
 180#define t6      $14
 181#define t7      $15
 182
 183#else
 184
 185#define LOADK lw /* No exception */
 186#define LOAD(reg, addr, handler)        EXC(lw, LD_INSN, reg, addr, handler)
 187#define LOADL(reg, addr, handler)       EXC(lwl, LD_INSN, reg, addr, handler)
 188#define LOADR(reg, addr, handler)       EXC(lwr, LD_INSN, reg, addr, handler)
 189#define STOREL(reg, addr, handler)      EXC(swl, ST_INSN, reg, addr, handler)
 190#define STORER(reg, addr, handler)      EXC(swr, ST_INSN, reg, addr, handler)
 191#define STORE(reg, addr, handler)       EXC(sw, ST_INSN, reg, addr, handler)
 192#define ADD    addu
 193#define SUB    subu
 194#define SRL    srl
 195#define SLL    sll
 196#define SRA    sra
 197#define SLLV   sllv
 198#define SRLV   srlv
 199#define NBYTES 4
 200#define LOG_NBYTES 2
 201
 202#endif /* USE_DOUBLE */
 203
 204#define LOADB(reg, addr, handler)       EXC(lb, LD_INSN, reg, addr, handler)
 205#define STOREB(reg, addr, handler)      EXC(sb, ST_INSN, reg, addr, handler)
 206
 207#define _PREF(hint, addr, type)                                         \
 208        .if \mode == LEGACY_MODE;                                       \
 209                PREF(hint, addr);                                       \
 210        .else;                                                          \
 211                .if ((\from == USEROP) && (type == SRC_PREFETCH)) ||    \
 212                    ((\to == USEROP) && (type == DST_PREFETCH));        \
 213                        /*                                              \
 214                         * PREFE has only 9 bits for the offset         \
 215                         * compared to PREF which has 16, so it may     \
 216                         * need to use the $at register but this        \
 217                         * register should remain intact because it's   \
 218                         * used later on. Therefore use $v1.            \
 219                         */                                             \
 220                        .set at=v1;                                     \
 221                        PREFE(hint, addr);                              \
 222                        .set noat;                                      \
 223                .else;                                                  \
 224                        PREF(hint, addr);                               \
 225                .endif;                                                 \
 226        .endif
 227
 228#define PREFS(hint, addr) _PREF(hint, addr, SRC_PREFETCH)
 229#define PREFD(hint, addr) _PREF(hint, addr, DST_PREFETCH)
 230
 231#ifdef CONFIG_CPU_LITTLE_ENDIAN
 232#define LDFIRST LOADR
 233#define LDREST  LOADL
 234#define STFIRST STORER
 235#define STREST  STOREL
 236#define SHIFT_DISCARD SLLV
 237#else
 238#define LDFIRST LOADL
 239#define LDREST  LOADR
 240#define STFIRST STOREL
 241#define STREST  STORER
 242#define SHIFT_DISCARD SRLV
 243#endif
 244
 245#define FIRST(unit) ((unit)*NBYTES)
 246#define REST(unit)  (FIRST(unit)+NBYTES-1)
 247#define UNIT(unit)  FIRST(unit)
 248
 249#define ADDRMASK (NBYTES-1)
 250
 251        .text
 252        .set    noreorder
 253#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
 254        .set    noat
 255#else
 256        .set    at=v1
 257#endif
 258
 259        .align  5
 260
 261        /*
 262         * Macro to build the __copy_user common code
 263         * Arguments:
 264         * mode : LEGACY_MODE or EVA_MODE
 265         * from : Source operand. USEROP or KERNELOP
 266         * to   : Destination operand. USEROP or KERNELOP
 267         */
 268        .macro __BUILD_COPY_USER mode, from, to
 269
 270        /* initialize __memcpy if this the first time we execute this macro */
 271        .ifnotdef __memcpy
 272        .set __memcpy, 1
 273        .hidden __memcpy /* make sure it does not leak */
 274        .endif
 275
 276        /*
 277         * Note: dst & src may be unaligned, len may be 0
 278         * Temps
 279         */
 280#define rem t8
 281
 282        R10KCBARRIER(0(ra))
 283        /*
 284         * The "issue break"s below are very approximate.
 285         * Issue delays for dcache fills will perturb the schedule, as will
 286         * load queue full replay traps, etc.
 287         *
 288         * If len < NBYTES use byte operations.
 289         */
 290        PREFS(  0, 0(src) )
 291        PREFD(  1, 0(dst) )
 292        sltu    t2, len, NBYTES
 293        and     t1, dst, ADDRMASK
 294        PREFS(  0, 1*32(src) )
 295        PREFD(  1, 1*32(dst) )
 296        bnez    t2, .Lcopy_bytes_checklen\@
 297         and    t0, src, ADDRMASK
 298        PREFS(  0, 2*32(src) )
 299        PREFD(  1, 2*32(dst) )
 300#ifndef CONFIG_CPU_MIPSR6
 301        bnez    t1, .Ldst_unaligned\@
 302         nop
 303        bnez    t0, .Lsrc_unaligned_dst_aligned\@
 304#else
 305        or      t0, t0, t1
 306        bnez    t0, .Lcopy_unaligned_bytes\@
 307#endif
 308        /*
 309         * use delay slot for fall-through
 310         * src and dst are aligned; need to compute rem
 311         */
 312.Lboth_aligned\@:
 313         SRL    t0, len, LOG_NBYTES+3    # +3 for 8 units/iter
 314        beqz    t0, .Lcleanup_both_aligned\@ # len < 8*NBYTES
 315         and    rem, len, (8*NBYTES-1)   # rem = len % (8*NBYTES)
 316        PREFS(  0, 3*32(src) )
 317        PREFD(  1, 3*32(dst) )
 318        .align  4
 3191:
 320        R10KCBARRIER(0(ra))
 321        LOAD(t0, UNIT(0)(src), .Ll_exc\@)
 322        LOAD(t1, UNIT(1)(src), .Ll_exc_copy\@)
 323        LOAD(t2, UNIT(2)(src), .Ll_exc_copy\@)
 324        LOAD(t3, UNIT(3)(src), .Ll_exc_copy\@)
 325        SUB     len, len, 8*NBYTES
 326        LOAD(t4, UNIT(4)(src), .Ll_exc_copy\@)
 327        LOAD(t7, UNIT(5)(src), .Ll_exc_copy\@)
 328        STORE(t0, UNIT(0)(dst), .Ls_exc_p8u\@)
 329        STORE(t1, UNIT(1)(dst), .Ls_exc_p7u\@)
 330        LOAD(t0, UNIT(6)(src), .Ll_exc_copy\@)
 331        LOAD(t1, UNIT(7)(src), .Ll_exc_copy\@)
 332        ADD     src, src, 8*NBYTES
 333        ADD     dst, dst, 8*NBYTES
 334        STORE(t2, UNIT(-6)(dst), .Ls_exc_p6u\@)
 335        STORE(t3, UNIT(-5)(dst), .Ls_exc_p5u\@)
 336        STORE(t4, UNIT(-4)(dst), .Ls_exc_p4u\@)
 337        STORE(t7, UNIT(-3)(dst), .Ls_exc_p3u\@)
 338        STORE(t0, UNIT(-2)(dst), .Ls_exc_p2u\@)
 339        STORE(t1, UNIT(-1)(dst), .Ls_exc_p1u\@)
 340        PREFS(  0, 8*32(src) )
 341        PREFD(  1, 8*32(dst) )
 342        bne     len, rem, 1b
 343         nop
 344
 345        /*
 346         * len == rem == the number of bytes left to copy < 8*NBYTES
 347         */
 348.Lcleanup_both_aligned\@:
 349        beqz    len, .Ldone\@
 350         sltu   t0, len, 4*NBYTES
 351        bnez    t0, .Lless_than_4units\@
 352         and    rem, len, (NBYTES-1)    # rem = len % NBYTES
 353        /*
 354         * len >= 4*NBYTES
 355         */
 356        LOAD( t0, UNIT(0)(src), .Ll_exc\@)
 357        LOAD( t1, UNIT(1)(src), .Ll_exc_copy\@)
 358        LOAD( t2, UNIT(2)(src), .Ll_exc_copy\@)
 359        LOAD( t3, UNIT(3)(src), .Ll_exc_copy\@)
 360        SUB     len, len, 4*NBYTES
 361        ADD     src, src, 4*NBYTES
 362        R10KCBARRIER(0(ra))
 363        STORE(t0, UNIT(0)(dst), .Ls_exc_p4u\@)
 364        STORE(t1, UNIT(1)(dst), .Ls_exc_p3u\@)
 365        STORE(t2, UNIT(2)(dst), .Ls_exc_p2u\@)
 366        STORE(t3, UNIT(3)(dst), .Ls_exc_p1u\@)
 367        .set    reorder                         /* DADDI_WAR */
 368        ADD     dst, dst, 4*NBYTES
 369        beqz    len, .Ldone\@
 370        .set    noreorder
 371.Lless_than_4units\@:
 372        /*
 373         * rem = len % NBYTES
 374         */
 375        beq     rem, len, .Lcopy_bytes\@
 376         nop
 3771:
 378        R10KCBARRIER(0(ra))
 379        LOAD(t0, 0(src), .Ll_exc\@)
 380        ADD     src, src, NBYTES
 381        SUB     len, len, NBYTES
 382        STORE(t0, 0(dst), .Ls_exc_p1u\@)
 383        .set    reorder                         /* DADDI_WAR */
 384        ADD     dst, dst, NBYTES
 385        bne     rem, len, 1b
 386        .set    noreorder
 387
 388#ifndef CONFIG_CPU_MIPSR6
 389        /*
 390         * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
 391         * A loop would do only a byte at a time with possible branch
 392         * mispredicts.  Can't do an explicit LOAD dst,mask,or,STORE
 393         * because can't assume read-access to dst.  Instead, use
 394         * STREST dst, which doesn't require read access to dst.
 395         *
 396         * This code should perform better than a simple loop on modern,
 397         * wide-issue mips processors because the code has fewer branches and
 398         * more instruction-level parallelism.
 399         */
 400#define bits t2
 401        beqz    len, .Ldone\@
 402         ADD    t1, dst, len    # t1 is just past last byte of dst
 403        li      bits, 8*NBYTES
 404        SLL     rem, len, 3     # rem = number of bits to keep
 405        LOAD(t0, 0(src), .Ll_exc\@)
 406        SUB     bits, bits, rem # bits = number of bits to discard
 407        SHIFT_DISCARD t0, t0, bits
 408        STREST(t0, -1(t1), .Ls_exc\@)
 409        jr      ra
 410         move   len, zero
 411.Ldst_unaligned\@:
 412        /*
 413         * dst is unaligned
 414         * t0 = src & ADDRMASK
 415         * t1 = dst & ADDRMASK; T1 > 0
 416         * len >= NBYTES
 417         *
 418         * Copy enough bytes to align dst
 419         * Set match = (src and dst have same alignment)
 420         */
 421#define match rem
 422        LDFIRST(t3, FIRST(0)(src), .Ll_exc\@)
 423        ADD     t2, zero, NBYTES
 424        LDREST(t3, REST(0)(src), .Ll_exc_copy\@)
 425        SUB     t2, t2, t1      # t2 = number of bytes copied
 426        xor     match, t0, t1
 427        R10KCBARRIER(0(ra))
 428        STFIRST(t3, FIRST(0)(dst), .Ls_exc\@)
 429        beq     len, t2, .Ldone\@
 430         SUB    len, len, t2
 431        ADD     dst, dst, t2
 432        beqz    match, .Lboth_aligned\@
 433         ADD    src, src, t2
 434
 435.Lsrc_unaligned_dst_aligned\@:
 436        SRL     t0, len, LOG_NBYTES+2    # +2 for 4 units/iter
 437        PREFS(  0, 3*32(src) )
 438        beqz    t0, .Lcleanup_src_unaligned\@
 439         and    rem, len, (4*NBYTES-1)   # rem = len % 4*NBYTES
 440        PREFD(  1, 3*32(dst) )
 4411:
 442/*
 443 * Avoid consecutive LD*'s to the same register since some mips
 444 * implementations can't issue them in the same cycle.
 445 * It's OK to load FIRST(N+1) before REST(N) because the two addresses
 446 * are to the same unit (unless src is aligned, but it's not).
 447 */
 448        R10KCBARRIER(0(ra))
 449        LDFIRST(t0, FIRST(0)(src), .Ll_exc\@)
 450        LDFIRST(t1, FIRST(1)(src), .Ll_exc_copy\@)
 451        SUB     len, len, 4*NBYTES
 452        LDREST(t0, REST(0)(src), .Ll_exc_copy\@)
 453        LDREST(t1, REST(1)(src), .Ll_exc_copy\@)
 454        LDFIRST(t2, FIRST(2)(src), .Ll_exc_copy\@)
 455        LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy\@)
 456        LDREST(t2, REST(2)(src), .Ll_exc_copy\@)
 457        LDREST(t3, REST(3)(src), .Ll_exc_copy\@)
 458        PREFS(  0, 9*32(src) )          # 0 is PREF_LOAD  (not streamed)
 459        ADD     src, src, 4*NBYTES
 460#ifdef CONFIG_CPU_SB1
 461        nop                             # improves slotting
 462#endif
 463        STORE(t0, UNIT(0)(dst), .Ls_exc_p4u\@)
 464        STORE(t1, UNIT(1)(dst), .Ls_exc_p3u\@)
 465        STORE(t2, UNIT(2)(dst), .Ls_exc_p2u\@)
 466        STORE(t3, UNIT(3)(dst), .Ls_exc_p1u\@)
 467        PREFD(  1, 9*32(dst) )          # 1 is PREF_STORE (not streamed)
 468        .set    reorder                         /* DADDI_WAR */
 469        ADD     dst, dst, 4*NBYTES
 470        bne     len, rem, 1b
 471        .set    noreorder
 472
 473.Lcleanup_src_unaligned\@:
 474        beqz    len, .Ldone\@
 475         and    rem, len, NBYTES-1  # rem = len % NBYTES
 476        beq     rem, len, .Lcopy_bytes\@
 477         nop
 4781:
 479        R10KCBARRIER(0(ra))
 480        LDFIRST(t0, FIRST(0)(src), .Ll_exc\@)
 481        LDREST(t0, REST(0)(src), .Ll_exc_copy\@)
 482        ADD     src, src, NBYTES
 483        SUB     len, len, NBYTES
 484        STORE(t0, 0(dst), .Ls_exc_p1u\@)
 485        .set    reorder                         /* DADDI_WAR */
 486        ADD     dst, dst, NBYTES
 487        bne     len, rem, 1b
 488        .set    noreorder
 489
 490#endif /* !CONFIG_CPU_MIPSR6 */
 491.Lcopy_bytes_checklen\@:
 492        beqz    len, .Ldone\@
 493         nop
 494.Lcopy_bytes\@:
 495        /* 0 < len < NBYTES  */
 496        R10KCBARRIER(0(ra))
 497#define COPY_BYTE(N)                    \
 498        LOADB(t0, N(src), .Ll_exc\@);   \
 499        SUB     len, len, 1;            \
 500        beqz    len, .Ldone\@;          \
 501        STOREB(t0, N(dst), .Ls_exc_p1\@)
 502
 503        COPY_BYTE(0)
 504        COPY_BYTE(1)
 505#ifdef USE_DOUBLE
 506        COPY_BYTE(2)
 507        COPY_BYTE(3)
 508        COPY_BYTE(4)
 509        COPY_BYTE(5)
 510#endif
 511        LOADB(t0, NBYTES-2(src), .Ll_exc\@)
 512        SUB     len, len, 1
 513        jr      ra
 514        STOREB(t0, NBYTES-2(dst), .Ls_exc_p1\@)
 515.Ldone\@:
 516        jr      ra
 517         nop
 518
 519#ifdef CONFIG_CPU_MIPSR6
 520.Lcopy_unaligned_bytes\@:
 5211:
 522        COPY_BYTE(0)
 523        COPY_BYTE(1)
 524        COPY_BYTE(2)
 525        COPY_BYTE(3)
 526        COPY_BYTE(4)
 527        COPY_BYTE(5)
 528        COPY_BYTE(6)
 529        COPY_BYTE(7)
 530        ADD     src, src, 8
 531        b       1b
 532         ADD    dst, dst, 8
 533#endif /* CONFIG_CPU_MIPSR6 */
 534        .if __memcpy == 1
 535        END(memcpy)
 536        .set __memcpy, 0
 537        .hidden __memcpy
 538        .endif
 539
 540.Ll_exc_copy\@:
 541        /*
 542         * Copy bytes from src until faulting load address (or until a
 543         * lb faults)
 544         *
 545         * When reached by a faulting LDFIRST/LDREST, THREAD_BUADDR($28)
 546         * may be more than a byte beyond the last address.
 547         * Hence, the lb below may get an exception.
 548         *
 549         * Assumes src < THREAD_BUADDR($28)
 550         */
 551        LOADK   t0, TI_TASK($28)
 552         nop
 553        LOADK   t0, THREAD_BUADDR(t0)
 5541:
 555        LOADB(t1, 0(src), .Ll_exc\@)
 556        ADD     src, src, 1
 557        sb      t1, 0(dst)      # can't fault -- we're copy_from_user
 558        .set    reorder                         /* DADDI_WAR */
 559        ADD     dst, dst, 1
 560        bne     src, t0, 1b
 561        .set    noreorder
 562.Ll_exc\@:
 563        LOADK   t0, TI_TASK($28)
 564         nop
 565        LOADK   t0, THREAD_BUADDR(t0)   # t0 is just past last good address
 566         nop
 567        SUB     len, AT, t0             # len number of uncopied bytes
 568        jr      ra
 569         nop
 570
 571#define SEXC(n)                                                 \
 572        .set    reorder;                        /* DADDI_WAR */ \
 573.Ls_exc_p ## n ## u\@:                                          \
 574        ADD     len, len, n*NBYTES;                             \
 575        jr      ra;                                             \
 576        .set    noreorder
 577
 578SEXC(8)
 579SEXC(7)
 580SEXC(6)
 581SEXC(5)
 582SEXC(4)
 583SEXC(3)
 584SEXC(2)
 585SEXC(1)
 586
 587.Ls_exc_p1\@:
 588        .set    reorder                         /* DADDI_WAR */
 589        ADD     len, len, 1
 590        jr      ra
 591        .set    noreorder
 592.Ls_exc\@:
 593        jr      ra
 594         nop
 595        .endm
 596
 597        .align  5
 598LEAF(memmove)
 599EXPORT_SYMBOL(memmove)
 600        ADD     t0, a0, a2
 601        ADD     t1, a1, a2
 602        sltu    t0, a1, t0                      # dst + len <= src -> memcpy
 603        sltu    t1, a0, t1                      # dst >= src + len -> memcpy
 604        and     t0, t1
 605        beqz    t0, .L__memcpy
 606         move   v0, a0                          /* return value */
 607        beqz    a2, .Lr_out
 608        END(memmove)
 609
 610        /* fall through to __rmemcpy */
 611LEAF(__rmemcpy)                                 /* a0=dst a1=src a2=len */
 612         sltu   t0, a1, a0
 613        beqz    t0, .Lr_end_bytes_up            # src >= dst
 614         nop
 615        ADD     a0, a2                          # dst = dst + len
 616        ADD     a1, a2                          # src = src + len
 617
 618.Lr_end_bytes:
 619        R10KCBARRIER(0(ra))
 620        lb      t0, -1(a1)
 621        SUB     a2, a2, 0x1
 622        sb      t0, -1(a0)
 623        SUB     a1, a1, 0x1
 624        .set    reorder                         /* DADDI_WAR */
 625        SUB     a0, a0, 0x1
 626        bnez    a2, .Lr_end_bytes
 627        .set    noreorder
 628
 629.Lr_out:
 630        jr      ra
 631         move   a2, zero
 632
 633.Lr_end_bytes_up:
 634        R10KCBARRIER(0(ra))
 635        lb      t0, (a1)
 636        SUB     a2, a2, 0x1
 637        sb      t0, (a0)
 638        ADD     a1, a1, 0x1
 639        .set    reorder                         /* DADDI_WAR */
 640        ADD     a0, a0, 0x1
 641        bnez    a2, .Lr_end_bytes_up
 642        .set    noreorder
 643
 644        jr      ra
 645         move   a2, zero
 646        END(__rmemcpy)
 647
 648/*
 649 * A combined memcpy/__copy_user
 650 * __copy_user sets len to 0 for success; else to an upper bound of
 651 * the number of uncopied bytes.
 652 * memcpy sets v0 to dst.
 653 */
 654        .align  5
 655LEAF(memcpy)                                    /* a0=dst a1=src a2=len */
 656EXPORT_SYMBOL(memcpy)
 657        move    v0, dst                         /* return value */
 658.L__memcpy:
 659FEXPORT(__copy_user)
 660EXPORT_SYMBOL(__copy_user)
 661        /* Legacy Mode, user <-> user */
 662        __BUILD_COPY_USER LEGACY_MODE USEROP USEROP
 663
 664#ifdef CONFIG_EVA
 665
 666/*
 667 * For EVA we need distinct symbols for reading and writing to user space.
 668 * This is because we need to use specific EVA instructions to perform the
 669 * virtual <-> physical translation when a virtual address is actually in user
 670 * space
 671 */
 672
 673/*
 674 * __copy_from_user (EVA)
 675 */
 676
 677LEAF(__copy_from_user_eva)
 678EXPORT_SYMBOL(__copy_from_user_eva)
 679        __BUILD_COPY_USER EVA_MODE USEROP KERNELOP
 680END(__copy_from_user_eva)
 681
 682
 683
 684/*
 685 * __copy_to_user (EVA)
 686 */
 687
 688LEAF(__copy_to_user_eva)
 689EXPORT_SYMBOL(__copy_to_user_eva)
 690__BUILD_COPY_USER EVA_MODE KERNELOP USEROP
 691END(__copy_to_user_eva)
 692
 693/*
 694 * __copy_in_user (EVA)
 695 */
 696
 697LEAF(__copy_in_user_eva)
 698EXPORT_SYMBOL(__copy_in_user_eva)
 699__BUILD_COPY_USER EVA_MODE USEROP USEROP
 700END(__copy_in_user_eva)
 701
 702#endif
 703