linux/arch/mips/lib/memcpy.S
<<
>>
Prefs
   1/*
   2 * This file is subject to the terms and conditions of the GNU General Public
   3 * License.  See the file "COPYING" in the main directory of this archive
   4 * for more details.
   5 *
   6 * Unified implementation of memcpy, memmove and the __copy_user backend.
   7 *
   8 * Copyright (C) 1998, 99, 2000, 01, 2002 Ralf Baechle (ralf@gnu.org)
   9 * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc.
  10 * Copyright (C) 2002 Broadcom, Inc.
  11 *   memcpy/copy_user author: Mark Vandevoorde
  12 * Copyright (C) 2007  Maciej W. Rozycki
  13 * Copyright (C) 2014 Imagination Technologies Ltd.
  14 *
  15 * Mnemonic names for arguments to memcpy/__copy_user
  16 */
  17
  18/*
  19 * Hack to resolve longstanding prefetch issue
  20 *
  21 * Prefetching may be fatal on some systems if we're prefetching beyond the
  22 * end of memory on some systems.  It's also a seriously bad idea on non
  23 * dma-coherent systems.
  24 */
  25#ifdef CONFIG_DMA_NONCOHERENT
  26#undef CONFIG_CPU_HAS_PREFETCH
  27#endif
  28#ifdef CONFIG_MIPS_MALTA
  29#undef CONFIG_CPU_HAS_PREFETCH
  30#endif
  31#ifdef CONFIG_CPU_MIPSR6
  32#undef CONFIG_CPU_HAS_PREFETCH
  33#endif
  34
  35#include <asm/asm.h>
  36#include <asm/asm-offsets.h>
  37#include <asm/export.h>
  38#include <asm/regdef.h>
  39
  40#define dst a0
  41#define src a1
  42#define len a2
  43
  44/*
  45 * Spec
  46 *
  47 * memcpy copies len bytes from src to dst and sets v0 to dst.
  48 * It assumes that
  49 *   - src and dst don't overlap
  50 *   - src is readable
  51 *   - dst is writable
  52 * memcpy uses the standard calling convention
  53 *
  54 * __copy_user copies up to len bytes from src to dst and sets a2 (len) to
  55 * the number of uncopied bytes due to an exception caused by a read or write.
  56 * __copy_user assumes that src and dst don't overlap, and that the call is
  57 * implementing one of the following:
  58 *   copy_to_user
  59 *     - src is readable  (no exceptions when reading src)
  60 *   copy_from_user
  61 *     - dst is writable  (no exceptions when writing dst)
  62 * __copy_user uses a non-standard calling convention; see
  63 * include/asm-mips/uaccess.h
  64 *
  65 * When an exception happens on a load, the handler must
  66 # ensure that all of the destination buffer is overwritten to prevent
  67 * leaking information to user mode programs.
  68 */
  69
  70/*
  71 * Implementation
  72 */
  73
  74/*
  75 * The exception handler for loads requires that:
  76 *  1- AT contain the address of the byte just past the end of the source
  77 *     of the copy,
  78 *  2- src_entry <= src < AT, and
  79 *  3- (dst - src) == (dst_entry - src_entry),
  80 * The _entry suffix denotes values when __copy_user was called.
  81 *
  82 * (1) is set up up by uaccess.h and maintained by not writing AT in copy_user
  83 * (2) is met by incrementing src by the number of bytes copied
  84 * (3) is met by not doing loads between a pair of increments of dst and src
  85 *
  86 * The exception handlers for stores adjust len (if necessary) and return.
  87 * These handlers do not need to overwrite any data.
  88 *
  89 * For __rmemcpy and memmove an exception is always a kernel bug, therefore
  90 * they're not protected.
  91 */
  92
  93/* Instruction type */
  94#define LD_INSN 1
  95#define ST_INSN 2
  96/* Pretech type */
  97#define SRC_PREFETCH 1
  98#define DST_PREFETCH 2
  99#define LEGACY_MODE 1
 100#define EVA_MODE    2
 101#define USEROP   1
 102#define KERNELOP 2
 103
 104/*
 105 * Wrapper to add an entry in the exception table
 106 * in case the insn causes a memory exception.
 107 * Arguments:
 108 * insn    : Load/store instruction
 109 * type    : Instruction type
 110 * reg     : Register
 111 * addr    : Address
 112 * handler : Exception handler
 113 */
 114
 115#define EXC(insn, type, reg, addr, handler)                     \
 116        .if \mode == LEGACY_MODE;                               \
 1179:              insn reg, addr;                                 \
 118                .section __ex_table,"a";                        \
 119                PTR     9b, handler;                            \
 120                .previous;                                      \
 121        /* This is assembled in EVA mode */                     \
 122        .else;                                                  \
 123                /* If loading from user or storing to user */   \
 124                .if ((\from == USEROP) && (type == LD_INSN)) || \
 125                    ((\to == USEROP) && (type == ST_INSN));     \
 1269:                      __BUILD_EVA_INSN(insn##e, reg, addr);   \
 127                        .section __ex_table,"a";                \
 128                        PTR     9b, handler;                    \
 129                        .previous;                              \
 130                .else;                                          \
 131                        /*                                      \
 132                         *  Still in EVA, but no need for       \
 133                         * exception handler or EVA insn        \
 134                         */                                     \
 135                        insn reg, addr;                         \
 136                .endif;                                         \
 137        .endif
 138
 139/*
 140 * Only on the 64-bit kernel we can made use of 64-bit registers.
 141 */
 142#ifdef CONFIG_64BIT
 143#define USE_DOUBLE
 144#endif
 145
 146#ifdef USE_DOUBLE
 147
 148#define LOADK ld /* No exception */
 149#define LOAD(reg, addr, handler)        EXC(ld, LD_INSN, reg, addr, handler)
 150#define LOADL(reg, addr, handler)       EXC(ldl, LD_INSN, reg, addr, handler)
 151#define LOADR(reg, addr, handler)       EXC(ldr, LD_INSN, reg, addr, handler)
 152#define STOREL(reg, addr, handler)      EXC(sdl, ST_INSN, reg, addr, handler)
 153#define STORER(reg, addr, handler)      EXC(sdr, ST_INSN, reg, addr, handler)
 154#define STORE(reg, addr, handler)       EXC(sd, ST_INSN, reg, addr, handler)
 155#define ADD    daddu
 156#define SUB    dsubu
 157#define SRL    dsrl
 158#define SRA    dsra
 159#define SLL    dsll
 160#define SLLV   dsllv
 161#define SRLV   dsrlv
 162#define NBYTES 8
 163#define LOG_NBYTES 3
 164
 165/*
 166 * As we are sharing code base with the mips32 tree (which use the o32 ABI
 167 * register definitions). We need to redefine the register definitions from
 168 * the n64 ABI register naming to the o32 ABI register naming.
 169 */
 170#undef t0
 171#undef t1
 172#undef t2
 173#undef t3
 174#define t0      $8
 175#define t1      $9
 176#define t2      $10
 177#define t3      $11
 178#define t4      $12
 179#define t5      $13
 180#define t6      $14
 181#define t7      $15
 182
 183#else
 184
 185#define LOADK lw /* No exception */
 186#define LOAD(reg, addr, handler)        EXC(lw, LD_INSN, reg, addr, handler)
 187#define LOADL(reg, addr, handler)       EXC(lwl, LD_INSN, reg, addr, handler)
 188#define LOADR(reg, addr, handler)       EXC(lwr, LD_INSN, reg, addr, handler)
 189#define STOREL(reg, addr, handler)      EXC(swl, ST_INSN, reg, addr, handler)
 190#define STORER(reg, addr, handler)      EXC(swr, ST_INSN, reg, addr, handler)
 191#define STORE(reg, addr, handler)       EXC(sw, ST_INSN, reg, addr, handler)
 192#define ADD    addu
 193#define SUB    subu
 194#define SRL    srl
 195#define SLL    sll
 196#define SRA    sra
 197#define SLLV   sllv
 198#define SRLV   srlv
 199#define NBYTES 4
 200#define LOG_NBYTES 2
 201
 202#endif /* USE_DOUBLE */
 203
 204#define LOADB(reg, addr, handler)       EXC(lb, LD_INSN, reg, addr, handler)
 205#define STOREB(reg, addr, handler)      EXC(sb, ST_INSN, reg, addr, handler)
 206
 207#ifdef CONFIG_CPU_HAS_PREFETCH
 208# define _PREF(hint, addr, type)                                        \
 209        .if \mode == LEGACY_MODE;                                       \
 210                kernel_pref(hint, addr);                                \
 211        .else;                                                          \
 212                .if ((\from == USEROP) && (type == SRC_PREFETCH)) ||    \
 213                    ((\to == USEROP) && (type == DST_PREFETCH));        \
 214                        /*                                              \
 215                         * PREFE has only 9 bits for the offset         \
 216                         * compared to PREF which has 16, so it may     \
 217                         * need to use the $at register but this        \
 218                         * register should remain intact because it's   \
 219                         * used later on. Therefore use $v1.            \
 220                         */                                             \
 221                        .set at=v1;                                     \
 222                        user_pref(hint, addr);                          \
 223                        .set noat;                                      \
 224                .else;                                                  \
 225                        kernel_pref(hint, addr);                        \
 226                .endif;                                                 \
 227        .endif
 228#else
 229# define _PREF(hint, addr, type)
 230#endif
 231
 232#define PREFS(hint, addr) _PREF(hint, addr, SRC_PREFETCH)
 233#define PREFD(hint, addr) _PREF(hint, addr, DST_PREFETCH)
 234
 235#ifdef CONFIG_CPU_LITTLE_ENDIAN
 236#define LDFIRST LOADR
 237#define LDREST  LOADL
 238#define STFIRST STORER
 239#define STREST  STOREL
 240#define SHIFT_DISCARD SLLV
 241#else
 242#define LDFIRST LOADL
 243#define LDREST  LOADR
 244#define STFIRST STOREL
 245#define STREST  STORER
 246#define SHIFT_DISCARD SRLV
 247#endif
 248
 249#define FIRST(unit) ((unit)*NBYTES)
 250#define REST(unit)  (FIRST(unit)+NBYTES-1)
 251#define UNIT(unit)  FIRST(unit)
 252
 253#define ADDRMASK (NBYTES-1)
 254
 255        .text
 256        .set    noreorder
 257#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
 258        .set    noat
 259#else
 260        .set    at=v1
 261#endif
 262
 263        .align  5
 264
 265        /*
 266         * Macro to build the __copy_user common code
 267         * Arguments:
 268         * mode : LEGACY_MODE or EVA_MODE
 269         * from : Source operand. USEROP or KERNELOP
 270         * to   : Destination operand. USEROP or KERNELOP
 271         */
 272        .macro __BUILD_COPY_USER mode, from, to
 273
 274        /* initialize __memcpy if this the first time we execute this macro */
 275        .ifnotdef __memcpy
 276        .set __memcpy, 1
 277        .hidden __memcpy /* make sure it does not leak */
 278        .endif
 279
 280        /*
 281         * Note: dst & src may be unaligned, len may be 0
 282         * Temps
 283         */
 284#define rem t8
 285
 286        R10KCBARRIER(0(ra))
 287        /*
 288         * The "issue break"s below are very approximate.
 289         * Issue delays for dcache fills will perturb the schedule, as will
 290         * load queue full replay traps, etc.
 291         *
 292         * If len < NBYTES use byte operations.
 293         */
 294        PREFS(  0, 0(src) )
 295        PREFD(  1, 0(dst) )
 296        sltu    t2, len, NBYTES
 297        and     t1, dst, ADDRMASK
 298        PREFS(  0, 1*32(src) )
 299        PREFD(  1, 1*32(dst) )
 300        bnez    t2, .Lcopy_bytes_checklen\@
 301         and    t0, src, ADDRMASK
 302        PREFS(  0, 2*32(src) )
 303        PREFD(  1, 2*32(dst) )
 304#ifndef CONFIG_CPU_NO_LOAD_STORE_LR
 305        bnez    t1, .Ldst_unaligned\@
 306         nop
 307        bnez    t0, .Lsrc_unaligned_dst_aligned\@
 308#else /* CONFIG_CPU_NO_LOAD_STORE_LR */
 309        or      t0, t0, t1
 310        bnez    t0, .Lcopy_unaligned_bytes\@
 311#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */
 312        /*
 313         * use delay slot for fall-through
 314         * src and dst are aligned; need to compute rem
 315         */
 316.Lboth_aligned\@:
 317         SRL    t0, len, LOG_NBYTES+3    # +3 for 8 units/iter
 318        beqz    t0, .Lcleanup_both_aligned\@ # len < 8*NBYTES
 319         and    rem, len, (8*NBYTES-1)   # rem = len % (8*NBYTES)
 320        PREFS(  0, 3*32(src) )
 321        PREFD(  1, 3*32(dst) )
 322        .align  4
 3231:
 324        R10KCBARRIER(0(ra))
 325        LOAD(t0, UNIT(0)(src), .Ll_exc\@)
 326        LOAD(t1, UNIT(1)(src), .Ll_exc_copy\@)
 327        LOAD(t2, UNIT(2)(src), .Ll_exc_copy\@)
 328        LOAD(t3, UNIT(3)(src), .Ll_exc_copy\@)
 329        SUB     len, len, 8*NBYTES
 330        LOAD(t4, UNIT(4)(src), .Ll_exc_copy\@)
 331        LOAD(t7, UNIT(5)(src), .Ll_exc_copy\@)
 332        STORE(t0, UNIT(0)(dst), .Ls_exc_p8u\@)
 333        STORE(t1, UNIT(1)(dst), .Ls_exc_p7u\@)
 334        LOAD(t0, UNIT(6)(src), .Ll_exc_copy\@)
 335        LOAD(t1, UNIT(7)(src), .Ll_exc_copy\@)
 336        ADD     src, src, 8*NBYTES
 337        ADD     dst, dst, 8*NBYTES
 338        STORE(t2, UNIT(-6)(dst), .Ls_exc_p6u\@)
 339        STORE(t3, UNIT(-5)(dst), .Ls_exc_p5u\@)
 340        STORE(t4, UNIT(-4)(dst), .Ls_exc_p4u\@)
 341        STORE(t7, UNIT(-3)(dst), .Ls_exc_p3u\@)
 342        STORE(t0, UNIT(-2)(dst), .Ls_exc_p2u\@)
 343        STORE(t1, UNIT(-1)(dst), .Ls_exc_p1u\@)
 344        PREFS(  0, 8*32(src) )
 345        PREFD(  1, 8*32(dst) )
 346        bne     len, rem, 1b
 347         nop
 348
 349        /*
 350         * len == rem == the number of bytes left to copy < 8*NBYTES
 351         */
 352.Lcleanup_both_aligned\@:
 353        beqz    len, .Ldone\@
 354         sltu   t0, len, 4*NBYTES
 355        bnez    t0, .Lless_than_4units\@
 356         and    rem, len, (NBYTES-1)    # rem = len % NBYTES
 357        /*
 358         * len >= 4*NBYTES
 359         */
 360        LOAD( t0, UNIT(0)(src), .Ll_exc\@)
 361        LOAD( t1, UNIT(1)(src), .Ll_exc_copy\@)
 362        LOAD( t2, UNIT(2)(src), .Ll_exc_copy\@)
 363        LOAD( t3, UNIT(3)(src), .Ll_exc_copy\@)
 364        SUB     len, len, 4*NBYTES
 365        ADD     src, src, 4*NBYTES
 366        R10KCBARRIER(0(ra))
 367        STORE(t0, UNIT(0)(dst), .Ls_exc_p4u\@)
 368        STORE(t1, UNIT(1)(dst), .Ls_exc_p3u\@)
 369        STORE(t2, UNIT(2)(dst), .Ls_exc_p2u\@)
 370        STORE(t3, UNIT(3)(dst), .Ls_exc_p1u\@)
 371        .set    reorder                         /* DADDI_WAR */
 372        ADD     dst, dst, 4*NBYTES
 373        beqz    len, .Ldone\@
 374        .set    noreorder
 375.Lless_than_4units\@:
 376        /*
 377         * rem = len % NBYTES
 378         */
 379        beq     rem, len, .Lcopy_bytes\@
 380         nop
 3811:
 382        R10KCBARRIER(0(ra))
 383        LOAD(t0, 0(src), .Ll_exc\@)
 384        ADD     src, src, NBYTES
 385        SUB     len, len, NBYTES
 386        STORE(t0, 0(dst), .Ls_exc_p1u\@)
 387        .set    reorder                         /* DADDI_WAR */
 388        ADD     dst, dst, NBYTES
 389        bne     rem, len, 1b
 390        .set    noreorder
 391
 392#ifndef CONFIG_CPU_NO_LOAD_STORE_LR
 393        /*
 394         * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
 395         * A loop would do only a byte at a time with possible branch
 396         * mispredicts.  Can't do an explicit LOAD dst,mask,or,STORE
 397         * because can't assume read-access to dst.  Instead, use
 398         * STREST dst, which doesn't require read access to dst.
 399         *
 400         * This code should perform better than a simple loop on modern,
 401         * wide-issue mips processors because the code has fewer branches and
 402         * more instruction-level parallelism.
 403         */
 404#define bits t2
 405        beqz    len, .Ldone\@
 406         ADD    t1, dst, len    # t1 is just past last byte of dst
 407        li      bits, 8*NBYTES
 408        SLL     rem, len, 3     # rem = number of bits to keep
 409        LOAD(t0, 0(src), .Ll_exc\@)
 410        SUB     bits, bits, rem # bits = number of bits to discard
 411        SHIFT_DISCARD t0, t0, bits
 412        STREST(t0, -1(t1), .Ls_exc\@)
 413        jr      ra
 414         move   len, zero
 415.Ldst_unaligned\@:
 416        /*
 417         * dst is unaligned
 418         * t0 = src & ADDRMASK
 419         * t1 = dst & ADDRMASK; T1 > 0
 420         * len >= NBYTES
 421         *
 422         * Copy enough bytes to align dst
 423         * Set match = (src and dst have same alignment)
 424         */
 425#define match rem
 426        LDFIRST(t3, FIRST(0)(src), .Ll_exc\@)
 427        ADD     t2, zero, NBYTES
 428        LDREST(t3, REST(0)(src), .Ll_exc_copy\@)
 429        SUB     t2, t2, t1      # t2 = number of bytes copied
 430        xor     match, t0, t1
 431        R10KCBARRIER(0(ra))
 432        STFIRST(t3, FIRST(0)(dst), .Ls_exc\@)
 433        beq     len, t2, .Ldone\@
 434         SUB    len, len, t2
 435        ADD     dst, dst, t2
 436        beqz    match, .Lboth_aligned\@
 437         ADD    src, src, t2
 438
 439.Lsrc_unaligned_dst_aligned\@:
 440        SRL     t0, len, LOG_NBYTES+2    # +2 for 4 units/iter
 441        PREFS(  0, 3*32(src) )
 442        beqz    t0, .Lcleanup_src_unaligned\@
 443         and    rem, len, (4*NBYTES-1)   # rem = len % 4*NBYTES
 444        PREFD(  1, 3*32(dst) )
 4451:
 446/*
 447 * Avoid consecutive LD*'s to the same register since some mips
 448 * implementations can't issue them in the same cycle.
 449 * It's OK to load FIRST(N+1) before REST(N) because the two addresses
 450 * are to the same unit (unless src is aligned, but it's not).
 451 */
 452        R10KCBARRIER(0(ra))
 453        LDFIRST(t0, FIRST(0)(src), .Ll_exc\@)
 454        LDFIRST(t1, FIRST(1)(src), .Ll_exc_copy\@)
 455        SUB     len, len, 4*NBYTES
 456        LDREST(t0, REST(0)(src), .Ll_exc_copy\@)
 457        LDREST(t1, REST(1)(src), .Ll_exc_copy\@)
 458        LDFIRST(t2, FIRST(2)(src), .Ll_exc_copy\@)
 459        LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy\@)
 460        LDREST(t2, REST(2)(src), .Ll_exc_copy\@)
 461        LDREST(t3, REST(3)(src), .Ll_exc_copy\@)
 462        PREFS(  0, 9*32(src) )          # 0 is PREF_LOAD  (not streamed)
 463        ADD     src, src, 4*NBYTES
 464#ifdef CONFIG_CPU_SB1
 465        nop                             # improves slotting
 466#endif
 467        STORE(t0, UNIT(0)(dst), .Ls_exc_p4u\@)
 468        STORE(t1, UNIT(1)(dst), .Ls_exc_p3u\@)
 469        STORE(t2, UNIT(2)(dst), .Ls_exc_p2u\@)
 470        STORE(t3, UNIT(3)(dst), .Ls_exc_p1u\@)
 471        PREFD(  1, 9*32(dst) )          # 1 is PREF_STORE (not streamed)
 472        .set    reorder                         /* DADDI_WAR */
 473        ADD     dst, dst, 4*NBYTES
 474        bne     len, rem, 1b
 475        .set    noreorder
 476
 477.Lcleanup_src_unaligned\@:
 478        beqz    len, .Ldone\@
 479         and    rem, len, NBYTES-1  # rem = len % NBYTES
 480        beq     rem, len, .Lcopy_bytes\@
 481         nop
 4821:
 483        R10KCBARRIER(0(ra))
 484        LDFIRST(t0, FIRST(0)(src), .Ll_exc\@)
 485        LDREST(t0, REST(0)(src), .Ll_exc_copy\@)
 486        ADD     src, src, NBYTES
 487        SUB     len, len, NBYTES
 488        STORE(t0, 0(dst), .Ls_exc_p1u\@)
 489        .set    reorder                         /* DADDI_WAR */
 490        ADD     dst, dst, NBYTES
 491        bne     len, rem, 1b
 492        .set    noreorder
 493
 494#endif /* !CONFIG_CPU_NO_LOAD_STORE_LR */
 495.Lcopy_bytes_checklen\@:
 496        beqz    len, .Ldone\@
 497         nop
 498.Lcopy_bytes\@:
 499        /* 0 < len < NBYTES  */
 500        R10KCBARRIER(0(ra))
 501#define COPY_BYTE(N)                    \
 502        LOADB(t0, N(src), .Ll_exc\@);   \
 503        SUB     len, len, 1;            \
 504        beqz    len, .Ldone\@;          \
 505        STOREB(t0, N(dst), .Ls_exc_p1\@)
 506
 507        COPY_BYTE(0)
 508        COPY_BYTE(1)
 509#ifdef USE_DOUBLE
 510        COPY_BYTE(2)
 511        COPY_BYTE(3)
 512        COPY_BYTE(4)
 513        COPY_BYTE(5)
 514#endif
 515        LOADB(t0, NBYTES-2(src), .Ll_exc\@)
 516        SUB     len, len, 1
 517        jr      ra
 518        STOREB(t0, NBYTES-2(dst), .Ls_exc_p1\@)
 519.Ldone\@:
 520        jr      ra
 521         nop
 522
 523#ifdef CONFIG_CPU_NO_LOAD_STORE_LR
 524.Lcopy_unaligned_bytes\@:
 5251:
 526        COPY_BYTE(0)
 527        COPY_BYTE(1)
 528        COPY_BYTE(2)
 529        COPY_BYTE(3)
 530        COPY_BYTE(4)
 531        COPY_BYTE(5)
 532        COPY_BYTE(6)
 533        COPY_BYTE(7)
 534        ADD     src, src, 8
 535        b       1b
 536         ADD    dst, dst, 8
 537#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */
 538        .if __memcpy == 1
 539        END(memcpy)
 540        .set __memcpy, 0
 541        .hidden __memcpy
 542        .endif
 543
 544.Ll_exc_copy\@:
 545        /*
 546         * Copy bytes from src until faulting load address (or until a
 547         * lb faults)
 548         *
 549         * When reached by a faulting LDFIRST/LDREST, THREAD_BUADDR($28)
 550         * may be more than a byte beyond the last address.
 551         * Hence, the lb below may get an exception.
 552         *
 553         * Assumes src < THREAD_BUADDR($28)
 554         */
 555        LOADK   t0, TI_TASK($28)
 556         nop
 557        LOADK   t0, THREAD_BUADDR(t0)
 5581:
 559        LOADB(t1, 0(src), .Ll_exc\@)
 560        ADD     src, src, 1
 561        sb      t1, 0(dst)      # can't fault -- we're copy_from_user
 562        .set    reorder                         /* DADDI_WAR */
 563        ADD     dst, dst, 1
 564        bne     src, t0, 1b
 565        .set    noreorder
 566.Ll_exc\@:
 567        LOADK   t0, TI_TASK($28)
 568         nop
 569        LOADK   t0, THREAD_BUADDR(t0)   # t0 is just past last good address
 570         nop
 571        SUB     len, AT, t0             # len number of uncopied bytes
 572        jr      ra
 573         nop
 574
 575#define SEXC(n)                                                 \
 576        .set    reorder;                        /* DADDI_WAR */ \
 577.Ls_exc_p ## n ## u\@:                                          \
 578        ADD     len, len, n*NBYTES;                             \
 579        jr      ra;                                             \
 580        .set    noreorder
 581
 582SEXC(8)
 583SEXC(7)
 584SEXC(6)
 585SEXC(5)
 586SEXC(4)
 587SEXC(3)
 588SEXC(2)
 589SEXC(1)
 590
 591.Ls_exc_p1\@:
 592        .set    reorder                         /* DADDI_WAR */
 593        ADD     len, len, 1
 594        jr      ra
 595        .set    noreorder
 596.Ls_exc\@:
 597        jr      ra
 598         nop
 599        .endm
 600
 601#ifndef CONFIG_HAVE_PLAT_MEMCPY
 602        .align  5
 603LEAF(memmove)
 604EXPORT_SYMBOL(memmove)
 605        ADD     t0, a0, a2
 606        ADD     t1, a1, a2
 607        sltu    t0, a1, t0                      # dst + len <= src -> memcpy
 608        sltu    t1, a0, t1                      # dst >= src + len -> memcpy
 609        and     t0, t1
 610        beqz    t0, .L__memcpy
 611         move   v0, a0                          /* return value */
 612        beqz    a2, .Lr_out
 613        END(memmove)
 614
 615        /* fall through to __rmemcpy */
 616LEAF(__rmemcpy)                                 /* a0=dst a1=src a2=len */
 617         sltu   t0, a1, a0
 618        beqz    t0, .Lr_end_bytes_up            # src >= dst
 619         nop
 620        ADD     a0, a2                          # dst = dst + len
 621        ADD     a1, a2                          # src = src + len
 622
 623.Lr_end_bytes:
 624        R10KCBARRIER(0(ra))
 625        lb      t0, -1(a1)
 626        SUB     a2, a2, 0x1
 627        sb      t0, -1(a0)
 628        SUB     a1, a1, 0x1
 629        .set    reorder                         /* DADDI_WAR */
 630        SUB     a0, a0, 0x1
 631        bnez    a2, .Lr_end_bytes
 632        .set    noreorder
 633
 634.Lr_out:
 635        jr      ra
 636         move   a2, zero
 637
 638.Lr_end_bytes_up:
 639        R10KCBARRIER(0(ra))
 640        lb      t0, (a1)
 641        SUB     a2, a2, 0x1
 642        sb      t0, (a0)
 643        ADD     a1, a1, 0x1
 644        .set    reorder                         /* DADDI_WAR */
 645        ADD     a0, a0, 0x1
 646        bnez    a2, .Lr_end_bytes_up
 647        .set    noreorder
 648
 649        jr      ra
 650         move   a2, zero
 651        END(__rmemcpy)
 652
 653/*
 654 * A combined memcpy/__copy_user
 655 * __copy_user sets len to 0 for success; else to an upper bound of
 656 * the number of uncopied bytes.
 657 * memcpy sets v0 to dst.
 658 */
 659        .align  5
 660LEAF(memcpy)                                    /* a0=dst a1=src a2=len */
 661EXPORT_SYMBOL(memcpy)
 662        move    v0, dst                         /* return value */
 663.L__memcpy:
 664FEXPORT(__copy_user)
 665EXPORT_SYMBOL(__copy_user)
 666        /* Legacy Mode, user <-> user */
 667        __BUILD_COPY_USER LEGACY_MODE USEROP USEROP
 668
 669#endif
 670
 671#ifdef CONFIG_EVA
 672
 673/*
 674 * For EVA we need distinct symbols for reading and writing to user space.
 675 * This is because we need to use specific EVA instructions to perform the
 676 * virtual <-> physical translation when a virtual address is actually in user
 677 * space
 678 */
 679
 680/*
 681 * __copy_from_user (EVA)
 682 */
 683
 684LEAF(__copy_from_user_eva)
 685EXPORT_SYMBOL(__copy_from_user_eva)
 686        __BUILD_COPY_USER EVA_MODE USEROP KERNELOP
 687END(__copy_from_user_eva)
 688
 689
 690
 691/*
 692 * __copy_to_user (EVA)
 693 */
 694
 695LEAF(__copy_to_user_eva)
 696EXPORT_SYMBOL(__copy_to_user_eva)
 697__BUILD_COPY_USER EVA_MODE KERNELOP USEROP
 698END(__copy_to_user_eva)
 699
 700/*
 701 * __copy_in_user (EVA)
 702 */
 703
 704LEAF(__copy_in_user_eva)
 705EXPORT_SYMBOL(__copy_in_user_eva)
 706__BUILD_COPY_USER EVA_MODE USEROP USEROP
 707END(__copy_in_user_eva)
 708
 709#endif
 710