linux/arch/mips/lib/memcpy.S
<<
>>
Prefs
   1/*
   2 * This file is subject to the terms and conditions of the GNU General Public
   3 * License.  See the file "COPYING" in the main directory of this archive
   4 * for more details.
   5 *
   6 * Unified implementation of memcpy, memmove and the __copy_user backend.
   7 *
   8 * Copyright (C) 1998, 99, 2000, 01, 2002 Ralf Baechle (ralf@gnu.org)
   9 * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc.
  10 * Copyright (C) 2002 Broadcom, Inc.
  11 *   memcpy/copy_user author: Mark Vandevoorde
  12 * Copyright (C) 2007  Maciej W. Rozycki
  13 * Copyright (C) 2014 Imagination Technologies Ltd.
  14 *
  15 * Mnemonic names for arguments to memcpy/__copy_user
  16 */
  17
  18/*
  19 * Hack to resolve longstanding prefetch issue
  20 *
  21 * Prefetching may be fatal on some systems if we're prefetching beyond the
  22 * end of memory on some systems.  It's also a seriously bad idea on non
  23 * dma-coherent systems.
  24 */
  25#ifdef CONFIG_DMA_NONCOHERENT
  26#undef CONFIG_CPU_HAS_PREFETCH
  27#endif
  28#ifdef CONFIG_MIPS_MALTA
  29#undef CONFIG_CPU_HAS_PREFETCH
  30#endif
  31
  32#include <asm/asm.h>
  33#include <asm/asm-offsets.h>
  34#include <asm/regdef.h>
  35
  36#define dst a0
  37#define src a1
  38#define len a2
  39
  40/*
  41 * Spec
  42 *
  43 * memcpy copies len bytes from src to dst and sets v0 to dst.
  44 * It assumes that
  45 *   - src and dst don't overlap
  46 *   - src is readable
  47 *   - dst is writable
  48 * memcpy uses the standard calling convention
  49 *
  50 * __copy_user copies up to len bytes from src to dst and sets a2 (len) to
  51 * the number of uncopied bytes due to an exception caused by a read or write.
  52 * __copy_user assumes that src and dst don't overlap, and that the call is
  53 * implementing one of the following:
  54 *   copy_to_user
  55 *     - src is readable  (no exceptions when reading src)
  56 *   copy_from_user
  57 *     - dst is writable  (no exceptions when writing dst)
  58 * __copy_user uses a non-standard calling convention; see
  59 * include/asm-mips/uaccess.h
  60 *
  61 * When an exception happens on a load, the handler must
  62 # ensure that all of the destination buffer is overwritten to prevent
  63 * leaking information to user mode programs.
  64 */
  65
  66/*
  67 * Implementation
  68 */
  69
  70/*
  71 * The exception handler for loads requires that:
  72 *  1- AT contain the address of the byte just past the end of the source
  73 *     of the copy,
  74 *  2- src_entry <= src < AT, and
  75 *  3- (dst - src) == (dst_entry - src_entry),
  76 * The _entry suffix denotes values when __copy_user was called.
  77 *
  78 * (1) is set up up by uaccess.h and maintained by not writing AT in copy_user
  79 * (2) is met by incrementing src by the number of bytes copied
  80 * (3) is met by not doing loads between a pair of increments of dst and src
  81 *
  82 * The exception handlers for stores adjust len (if necessary) and return.
  83 * These handlers do not need to overwrite any data.
  84 *
  85 * For __rmemcpy and memmove an exception is always a kernel bug, therefore
  86 * they're not protected.
  87 */
  88
  89/* Instruction type */
  90#define LD_INSN 1
  91#define ST_INSN 2
  92/* Pretech type */
  93#define SRC_PREFETCH 1
  94#define DST_PREFETCH 2
  95#define LEGACY_MODE 1
  96#define EVA_MODE    2
  97#define USEROP   1
  98#define KERNELOP 2
  99
 100/*
 101 * Wrapper to add an entry in the exception table
 102 * in case the insn causes a memory exception.
 103 * Arguments:
 104 * insn    : Load/store instruction
 105 * type    : Instruction type
 106 * reg     : Register
 107 * addr    : Address
 108 * handler : Exception handler
 109 */
 110
 111#define EXC(insn, type, reg, addr, handler)                     \
 112        .if \mode == LEGACY_MODE;                               \
 1139:              insn reg, addr;                                 \
 114                .section __ex_table,"a";                        \
 115                PTR     9b, handler;                            \
 116                .previous;                                      \
 117        /* This is assembled in EVA mode */                     \
 118        .else;                                                  \
 119                /* If loading from user or storing to user */   \
 120                .if ((\from == USEROP) && (type == LD_INSN)) || \
 121                    ((\to == USEROP) && (type == ST_INSN));     \
 1229:                      __BUILD_EVA_INSN(insn##e, reg, addr);   \
 123                        .section __ex_table,"a";                \
 124                        PTR     9b, handler;                    \
 125                        .previous;                              \
 126                .else;                                          \
 127                        /*                                      \
 128                         *  Still in EVA, but no need for       \
 129                         * exception handler or EVA insn        \
 130                         */                                     \
 131                        insn reg, addr;                         \
 132                .endif;                                         \
 133        .endif
 134
 135/*
 136 * Only on the 64-bit kernel we can made use of 64-bit registers.
 137 */
 138#ifdef CONFIG_64BIT
 139#define USE_DOUBLE
 140#endif
 141
 142#ifdef USE_DOUBLE
 143
 144#define LOADK ld /* No exception */
 145#define LOAD(reg, addr, handler)        EXC(ld, LD_INSN, reg, addr, handler)
 146#define LOADL(reg, addr, handler)       EXC(ldl, LD_INSN, reg, addr, handler)
 147#define LOADR(reg, addr, handler)       EXC(ldr, LD_INSN, reg, addr, handler)
 148#define STOREL(reg, addr, handler)      EXC(sdl, ST_INSN, reg, addr, handler)
 149#define STORER(reg, addr, handler)      EXC(sdr, ST_INSN, reg, addr, handler)
 150#define STORE(reg, addr, handler)       EXC(sd, ST_INSN, reg, addr, handler)
 151#define ADD    daddu
 152#define SUB    dsubu
 153#define SRL    dsrl
 154#define SRA    dsra
 155#define SLL    dsll
 156#define SLLV   dsllv
 157#define SRLV   dsrlv
 158#define NBYTES 8
 159#define LOG_NBYTES 3
 160
 161/*
 162 * As we are sharing code base with the mips32 tree (which use the o32 ABI
 163 * register definitions). We need to redefine the register definitions from
 164 * the n64 ABI register naming to the o32 ABI register naming.
 165 */
 166#undef t0
 167#undef t1
 168#undef t2
 169#undef t3
 170#define t0      $8
 171#define t1      $9
 172#define t2      $10
 173#define t3      $11
 174#define t4      $12
 175#define t5      $13
 176#define t6      $14
 177#define t7      $15
 178
 179#else
 180
 181#define LOADK lw /* No exception */
 182#define LOAD(reg, addr, handler)        EXC(lw, LD_INSN, reg, addr, handler)
 183#define LOADL(reg, addr, handler)       EXC(lwl, LD_INSN, reg, addr, handler)
 184#define LOADR(reg, addr, handler)       EXC(lwr, LD_INSN, reg, addr, handler)
 185#define STOREL(reg, addr, handler)      EXC(swl, ST_INSN, reg, addr, handler)
 186#define STORER(reg, addr, handler)      EXC(swr, ST_INSN, reg, addr, handler)
 187#define STORE(reg, addr, handler)       EXC(sw, ST_INSN, reg, addr, handler)
 188#define ADD    addu
 189#define SUB    subu
 190#define SRL    srl
 191#define SLL    sll
 192#define SRA    sra
 193#define SLLV   sllv
 194#define SRLV   srlv
 195#define NBYTES 4
 196#define LOG_NBYTES 2
 197
 198#endif /* USE_DOUBLE */
 199
 200#define LOADB(reg, addr, handler)       EXC(lb, LD_INSN, reg, addr, handler)
 201#define STOREB(reg, addr, handler)      EXC(sb, ST_INSN, reg, addr, handler)
 202
 203#define _PREF(hint, addr, type)                                         \
 204        .if \mode == LEGACY_MODE;                                       \
 205                PREF(hint, addr);                                       \
 206        .else;                                                          \
 207                .if ((\from == USEROP) && (type == SRC_PREFETCH)) ||    \
 208                    ((\to == USEROP) && (type == DST_PREFETCH));        \
 209                        /*                                              \
 210                         * PREFE has only 9 bits for the offset         \
 211                         * compared to PREF which has 16, so it may     \
 212                         * need to use the $at register but this        \
 213                         * register should remain intact because it's   \
 214                         * used later on. Therefore use $v1.            \
 215                         */                                             \
 216                        .set at=v1;                                     \
 217                        PREFE(hint, addr);                              \
 218                        .set noat;                                      \
 219                .else;                                                  \
 220                        PREF(hint, addr);                               \
 221                .endif;                                                 \
 222        .endif
 223
 224#define PREFS(hint, addr) _PREF(hint, addr, SRC_PREFETCH)
 225#define PREFD(hint, addr) _PREF(hint, addr, DST_PREFETCH)
 226
 227#ifdef CONFIG_CPU_LITTLE_ENDIAN
 228#define LDFIRST LOADR
 229#define LDREST  LOADL
 230#define STFIRST STORER
 231#define STREST  STOREL
 232#define SHIFT_DISCARD SLLV
 233#else
 234#define LDFIRST LOADL
 235#define LDREST  LOADR
 236#define STFIRST STOREL
 237#define STREST  STORER
 238#define SHIFT_DISCARD SRLV
 239#endif
 240
 241#define FIRST(unit) ((unit)*NBYTES)
 242#define REST(unit)  (FIRST(unit)+NBYTES-1)
 243#define UNIT(unit)  FIRST(unit)
 244
 245#define ADDRMASK (NBYTES-1)
 246
 247        .text
 248        .set    noreorder
 249#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
 250        .set    noat
 251#else
 252        .set    at=v1
 253#endif
 254
 255        .align  5
 256
 257        /*
 258         * Macro to build the __copy_user common code
 259         * Arguements:
 260         * mode : LEGACY_MODE or EVA_MODE
 261         * from : Source operand. USEROP or KERNELOP
 262         * to   : Destination operand. USEROP or KERNELOP
 263         */
 264        .macro __BUILD_COPY_USER mode, from, to
 265
 266        /* initialize __memcpy if this the first time we execute this macro */
 267        .ifnotdef __memcpy
 268        .set __memcpy, 1
 269        .hidden __memcpy /* make sure it does not leak */
 270        .endif
 271
 272        /*
 273         * Note: dst & src may be unaligned, len may be 0
 274         * Temps
 275         */
 276#define rem t8
 277
 278        R10KCBARRIER(0(ra))
 279        /*
 280         * The "issue break"s below are very approximate.
 281         * Issue delays for dcache fills will perturb the schedule, as will
 282         * load queue full replay traps, etc.
 283         *
 284         * If len < NBYTES use byte operations.
 285         */
 286        PREFS(  0, 0(src) )
 287        PREFD(  1, 0(dst) )
 288        sltu    t2, len, NBYTES
 289        and     t1, dst, ADDRMASK
 290        PREFS(  0, 1*32(src) )
 291        PREFD(  1, 1*32(dst) )
 292        bnez    t2, .Lcopy_bytes_checklen\@
 293         and    t0, src, ADDRMASK
 294        PREFS(  0, 2*32(src) )
 295        PREFD(  1, 2*32(dst) )
 296#ifndef CONFIG_CPU_MIPSR6
 297        bnez    t1, .Ldst_unaligned\@
 298         nop
 299        bnez    t0, .Lsrc_unaligned_dst_aligned\@
 300#else
 301        or      t0, t0, t1
 302        bnez    t0, .Lcopy_unaligned_bytes\@
 303#endif
 304        /*
 305         * use delay slot for fall-through
 306         * src and dst are aligned; need to compute rem
 307         */
 308.Lboth_aligned\@:
 309         SRL    t0, len, LOG_NBYTES+3    # +3 for 8 units/iter
 310        beqz    t0, .Lcleanup_both_aligned\@ # len < 8*NBYTES
 311         and    rem, len, (8*NBYTES-1)   # rem = len % (8*NBYTES)
 312        PREFS(  0, 3*32(src) )
 313        PREFD(  1, 3*32(dst) )
 314        .align  4
 3151:
 316        R10KCBARRIER(0(ra))
 317        LOAD(t0, UNIT(0)(src), .Ll_exc\@)
 318        LOAD(t1, UNIT(1)(src), .Ll_exc_copy\@)
 319        LOAD(t2, UNIT(2)(src), .Ll_exc_copy\@)
 320        LOAD(t3, UNIT(3)(src), .Ll_exc_copy\@)
 321        SUB     len, len, 8*NBYTES
 322        LOAD(t4, UNIT(4)(src), .Ll_exc_copy\@)
 323        LOAD(t7, UNIT(5)(src), .Ll_exc_copy\@)
 324        STORE(t0, UNIT(0)(dst), .Ls_exc_p8u\@)
 325        STORE(t1, UNIT(1)(dst), .Ls_exc_p7u\@)
 326        LOAD(t0, UNIT(6)(src), .Ll_exc_copy\@)
 327        LOAD(t1, UNIT(7)(src), .Ll_exc_copy\@)
 328        ADD     src, src, 8*NBYTES
 329        ADD     dst, dst, 8*NBYTES
 330        STORE(t2, UNIT(-6)(dst), .Ls_exc_p6u\@)
 331        STORE(t3, UNIT(-5)(dst), .Ls_exc_p5u\@)
 332        STORE(t4, UNIT(-4)(dst), .Ls_exc_p4u\@)
 333        STORE(t7, UNIT(-3)(dst), .Ls_exc_p3u\@)
 334        STORE(t0, UNIT(-2)(dst), .Ls_exc_p2u\@)
 335        STORE(t1, UNIT(-1)(dst), .Ls_exc_p1u\@)
 336        PREFS(  0, 8*32(src) )
 337        PREFD(  1, 8*32(dst) )
 338        bne     len, rem, 1b
 339         nop
 340
 341        /*
 342         * len == rem == the number of bytes left to copy < 8*NBYTES
 343         */
 344.Lcleanup_both_aligned\@:
 345        beqz    len, .Ldone\@
 346         sltu   t0, len, 4*NBYTES
 347        bnez    t0, .Lless_than_4units\@
 348         and    rem, len, (NBYTES-1)    # rem = len % NBYTES
 349        /*
 350         * len >= 4*NBYTES
 351         */
 352        LOAD( t0, UNIT(0)(src), .Ll_exc\@)
 353        LOAD( t1, UNIT(1)(src), .Ll_exc_copy\@)
 354        LOAD( t2, UNIT(2)(src), .Ll_exc_copy\@)
 355        LOAD( t3, UNIT(3)(src), .Ll_exc_copy\@)
 356        SUB     len, len, 4*NBYTES
 357        ADD     src, src, 4*NBYTES
 358        R10KCBARRIER(0(ra))
 359        STORE(t0, UNIT(0)(dst), .Ls_exc_p4u\@)
 360        STORE(t1, UNIT(1)(dst), .Ls_exc_p3u\@)
 361        STORE(t2, UNIT(2)(dst), .Ls_exc_p2u\@)
 362        STORE(t3, UNIT(3)(dst), .Ls_exc_p1u\@)
 363        .set    reorder                         /* DADDI_WAR */
 364        ADD     dst, dst, 4*NBYTES
 365        beqz    len, .Ldone\@
 366        .set    noreorder
 367.Lless_than_4units\@:
 368        /*
 369         * rem = len % NBYTES
 370         */
 371        beq     rem, len, .Lcopy_bytes\@
 372         nop
 3731:
 374        R10KCBARRIER(0(ra))
 375        LOAD(t0, 0(src), .Ll_exc\@)
 376        ADD     src, src, NBYTES
 377        SUB     len, len, NBYTES
 378        STORE(t0, 0(dst), .Ls_exc_p1u\@)
 379        .set    reorder                         /* DADDI_WAR */
 380        ADD     dst, dst, NBYTES
 381        bne     rem, len, 1b
 382        .set    noreorder
 383
 384#ifndef CONFIG_CPU_MIPSR6
 385        /*
 386         * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
 387         * A loop would do only a byte at a time with possible branch
 388         * mispredicts.  Can't do an explicit LOAD dst,mask,or,STORE
 389         * because can't assume read-access to dst.  Instead, use
 390         * STREST dst, which doesn't require read access to dst.
 391         *
 392         * This code should perform better than a simple loop on modern,
 393         * wide-issue mips processors because the code has fewer branches and
 394         * more instruction-level parallelism.
 395         */
 396#define bits t2
 397        beqz    len, .Ldone\@
 398         ADD    t1, dst, len    # t1 is just past last byte of dst
 399        li      bits, 8*NBYTES
 400        SLL     rem, len, 3     # rem = number of bits to keep
 401        LOAD(t0, 0(src), .Ll_exc\@)
 402        SUB     bits, bits, rem # bits = number of bits to discard
 403        SHIFT_DISCARD t0, t0, bits
 404        STREST(t0, -1(t1), .Ls_exc\@)
 405        jr      ra
 406         move   len, zero
 407.Ldst_unaligned\@:
 408        /*
 409         * dst is unaligned
 410         * t0 = src & ADDRMASK
 411         * t1 = dst & ADDRMASK; T1 > 0
 412         * len >= NBYTES
 413         *
 414         * Copy enough bytes to align dst
 415         * Set match = (src and dst have same alignment)
 416         */
 417#define match rem
 418        LDFIRST(t3, FIRST(0)(src), .Ll_exc\@)
 419        ADD     t2, zero, NBYTES
 420        LDREST(t3, REST(0)(src), .Ll_exc_copy\@)
 421        SUB     t2, t2, t1      # t2 = number of bytes copied
 422        xor     match, t0, t1
 423        R10KCBARRIER(0(ra))
 424        STFIRST(t3, FIRST(0)(dst), .Ls_exc\@)
 425        beq     len, t2, .Ldone\@
 426         SUB    len, len, t2
 427        ADD     dst, dst, t2
 428        beqz    match, .Lboth_aligned\@
 429         ADD    src, src, t2
 430
 431.Lsrc_unaligned_dst_aligned\@:
 432        SRL     t0, len, LOG_NBYTES+2    # +2 for 4 units/iter
 433        PREFS(  0, 3*32(src) )
 434        beqz    t0, .Lcleanup_src_unaligned\@
 435         and    rem, len, (4*NBYTES-1)   # rem = len % 4*NBYTES
 436        PREFD(  1, 3*32(dst) )
 4371:
 438/*
 439 * Avoid consecutive LD*'s to the same register since some mips
 440 * implementations can't issue them in the same cycle.
 441 * It's OK to load FIRST(N+1) before REST(N) because the two addresses
 442 * are to the same unit (unless src is aligned, but it's not).
 443 */
 444        R10KCBARRIER(0(ra))
 445        LDFIRST(t0, FIRST(0)(src), .Ll_exc\@)
 446        LDFIRST(t1, FIRST(1)(src), .Ll_exc_copy\@)
 447        SUB     len, len, 4*NBYTES
 448        LDREST(t0, REST(0)(src), .Ll_exc_copy\@)
 449        LDREST(t1, REST(1)(src), .Ll_exc_copy\@)
 450        LDFIRST(t2, FIRST(2)(src), .Ll_exc_copy\@)
 451        LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy\@)
 452        LDREST(t2, REST(2)(src), .Ll_exc_copy\@)
 453        LDREST(t3, REST(3)(src), .Ll_exc_copy\@)
 454        PREFS(  0, 9*32(src) )          # 0 is PREF_LOAD  (not streamed)
 455        ADD     src, src, 4*NBYTES
 456#ifdef CONFIG_CPU_SB1
 457        nop                             # improves slotting
 458#endif
 459        STORE(t0, UNIT(0)(dst), .Ls_exc_p4u\@)
 460        STORE(t1, UNIT(1)(dst), .Ls_exc_p3u\@)
 461        STORE(t2, UNIT(2)(dst), .Ls_exc_p2u\@)
 462        STORE(t3, UNIT(3)(dst), .Ls_exc_p1u\@)
 463        PREFD(  1, 9*32(dst) )          # 1 is PREF_STORE (not streamed)
 464        .set    reorder                         /* DADDI_WAR */
 465        ADD     dst, dst, 4*NBYTES
 466        bne     len, rem, 1b
 467        .set    noreorder
 468
 469.Lcleanup_src_unaligned\@:
 470        beqz    len, .Ldone\@
 471         and    rem, len, NBYTES-1  # rem = len % NBYTES
 472        beq     rem, len, .Lcopy_bytes\@
 473         nop
 4741:
 475        R10KCBARRIER(0(ra))
 476        LDFIRST(t0, FIRST(0)(src), .Ll_exc\@)
 477        LDREST(t0, REST(0)(src), .Ll_exc_copy\@)
 478        ADD     src, src, NBYTES
 479        SUB     len, len, NBYTES
 480        STORE(t0, 0(dst), .Ls_exc_p1u\@)
 481        .set    reorder                         /* DADDI_WAR */
 482        ADD     dst, dst, NBYTES
 483        bne     len, rem, 1b
 484        .set    noreorder
 485
 486#endif /* !CONFIG_CPU_MIPSR6 */
 487.Lcopy_bytes_checklen\@:
 488        beqz    len, .Ldone\@
 489         nop
 490.Lcopy_bytes\@:
 491        /* 0 < len < NBYTES  */
 492        R10KCBARRIER(0(ra))
 493#define COPY_BYTE(N)                    \
 494        LOADB(t0, N(src), .Ll_exc\@);   \
 495        SUB     len, len, 1;            \
 496        beqz    len, .Ldone\@;          \
 497        STOREB(t0, N(dst), .Ls_exc_p1\@)
 498
 499        COPY_BYTE(0)
 500        COPY_BYTE(1)
 501#ifdef USE_DOUBLE
 502        COPY_BYTE(2)
 503        COPY_BYTE(3)
 504        COPY_BYTE(4)
 505        COPY_BYTE(5)
 506#endif
 507        LOADB(t0, NBYTES-2(src), .Ll_exc\@)
 508        SUB     len, len, 1
 509        jr      ra
 510        STOREB(t0, NBYTES-2(dst), .Ls_exc_p1\@)
 511.Ldone\@:
 512        jr      ra
 513         nop
 514
 515#ifdef CONFIG_CPU_MIPSR6
 516.Lcopy_unaligned_bytes\@:
 5171:
 518        COPY_BYTE(0)
 519        COPY_BYTE(1)
 520        COPY_BYTE(2)
 521        COPY_BYTE(3)
 522        COPY_BYTE(4)
 523        COPY_BYTE(5)
 524        COPY_BYTE(6)
 525        COPY_BYTE(7)
 526        ADD     src, src, 8
 527        b       1b
 528         ADD    dst, dst, 8
 529#endif /* CONFIG_CPU_MIPSR6 */
 530        .if __memcpy == 1
 531        END(memcpy)
 532        .set __memcpy, 0
 533        .hidden __memcpy
 534        .endif
 535
 536.Ll_exc_copy\@:
 537        /*
 538         * Copy bytes from src until faulting load address (or until a
 539         * lb faults)
 540         *
 541         * When reached by a faulting LDFIRST/LDREST, THREAD_BUADDR($28)
 542         * may be more than a byte beyond the last address.
 543         * Hence, the lb below may get an exception.
 544         *
 545         * Assumes src < THREAD_BUADDR($28)
 546         */
 547        LOADK   t0, TI_TASK($28)
 548         nop
 549        LOADK   t0, THREAD_BUADDR(t0)
 5501:
 551        LOADB(t1, 0(src), .Ll_exc\@)
 552        ADD     src, src, 1
 553        sb      t1, 0(dst)      # can't fault -- we're copy_from_user
 554        .set    reorder                         /* DADDI_WAR */
 555        ADD     dst, dst, 1
 556        bne     src, t0, 1b
 557        .set    noreorder
 558.Ll_exc\@:
 559        LOADK   t0, TI_TASK($28)
 560         nop
 561        LOADK   t0, THREAD_BUADDR(t0)   # t0 is just past last good address
 562         nop
 563        SUB     len, AT, t0             # len number of uncopied bytes
 564        bnez    t6, .Ldone\@    /* Skip the zeroing part if inatomic */
 565        /*
 566         * Here's where we rely on src and dst being incremented in tandem,
 567         *   See (3) above.
 568         * dst += (fault addr - src) to put dst at first byte to clear
 569         */
 570        ADD     dst, t0                 # compute start address in a1
 571        SUB     dst, src
 572        /*
 573         * Clear len bytes starting at dst.  Can't call __bzero because it
 574         * might modify len.  An inefficient loop for these rare times...
 575         */
 576        .set    reorder                         /* DADDI_WAR */
 577        SUB     src, len, 1
 578        beqz    len, .Ldone\@
 579        .set    noreorder
 5801:      sb      zero, 0(dst)
 581        ADD     dst, dst, 1
 582#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
 583        bnez    src, 1b
 584         SUB    src, src, 1
 585#else
 586        .set    push
 587        .set    noat
 588        li      v1, 1
 589        bnez    src, 1b
 590         SUB    src, src, v1
 591        .set    pop
 592#endif
 593        jr      ra
 594         nop
 595
 596
 597#define SEXC(n)                                                 \
 598        .set    reorder;                        /* DADDI_WAR */ \
 599.Ls_exc_p ## n ## u\@:                                          \
 600        ADD     len, len, n*NBYTES;                             \
 601        jr      ra;                                             \
 602        .set    noreorder
 603
 604SEXC(8)
 605SEXC(7)
 606SEXC(6)
 607SEXC(5)
 608SEXC(4)
 609SEXC(3)
 610SEXC(2)
 611SEXC(1)
 612
 613.Ls_exc_p1\@:
 614        .set    reorder                         /* DADDI_WAR */
 615        ADD     len, len, 1
 616        jr      ra
 617        .set    noreorder
 618.Ls_exc\@:
 619        jr      ra
 620         nop
 621        .endm
 622
 623        .align  5
 624LEAF(memmove)
 625        ADD     t0, a0, a2
 626        ADD     t1, a1, a2
 627        sltu    t0, a1, t0                      # dst + len <= src -> memcpy
 628        sltu    t1, a0, t1                      # dst >= src + len -> memcpy
 629        and     t0, t1
 630        beqz    t0, .L__memcpy
 631         move   v0, a0                          /* return value */
 632        beqz    a2, .Lr_out
 633        END(memmove)
 634
 635        /* fall through to __rmemcpy */
 636LEAF(__rmemcpy)                                 /* a0=dst a1=src a2=len */
 637         sltu   t0, a1, a0
 638        beqz    t0, .Lr_end_bytes_up            # src >= dst
 639         nop
 640        ADD     a0, a2                          # dst = dst + len
 641        ADD     a1, a2                          # src = src + len
 642
 643.Lr_end_bytes:
 644        R10KCBARRIER(0(ra))
 645        lb      t0, -1(a1)
 646        SUB     a2, a2, 0x1
 647        sb      t0, -1(a0)
 648        SUB     a1, a1, 0x1
 649        .set    reorder                         /* DADDI_WAR */
 650        SUB     a0, a0, 0x1
 651        bnez    a2, .Lr_end_bytes
 652        .set    noreorder
 653
 654.Lr_out:
 655        jr      ra
 656         move   a2, zero
 657
 658.Lr_end_bytes_up:
 659        R10KCBARRIER(0(ra))
 660        lb      t0, (a1)
 661        SUB     a2, a2, 0x1
 662        sb      t0, (a0)
 663        ADD     a1, a1, 0x1
 664        .set    reorder                         /* DADDI_WAR */
 665        ADD     a0, a0, 0x1
 666        bnez    a2, .Lr_end_bytes_up
 667        .set    noreorder
 668
 669        jr      ra
 670         move   a2, zero
 671        END(__rmemcpy)
 672
 673/*
 674 * t6 is used as a flag to note inatomic mode.
 675 */
 676LEAF(__copy_user_inatomic)
 677        b       __copy_user_common
 678        li      t6, 1
 679        END(__copy_user_inatomic)
 680
 681/*
 682 * A combined memcpy/__copy_user
 683 * __copy_user sets len to 0 for success; else to an upper bound of
 684 * the number of uncopied bytes.
 685 * memcpy sets v0 to dst.
 686 */
 687        .align  5
 688LEAF(memcpy)                                    /* a0=dst a1=src a2=len */
 689        move    v0, dst                         /* return value */
 690.L__memcpy:
 691FEXPORT(__copy_user)
 692        li      t6, 0   /* not inatomic */
 693__copy_user_common:
 694        /* Legacy Mode, user <-> user */
 695        __BUILD_COPY_USER LEGACY_MODE USEROP USEROP
 696
 697#ifdef CONFIG_EVA
 698
 699/*
 700 * For EVA we need distinct symbols for reading and writing to user space.
 701 * This is because we need to use specific EVA instructions to perform the
 702 * virtual <-> physical translation when a virtual address is actually in user
 703 * space
 704 */
 705
 706LEAF(__copy_user_inatomic_eva)
 707        b       __copy_from_user_common
 708        li      t6, 1
 709        END(__copy_user_inatomic_eva)
 710
 711/*
 712 * __copy_from_user (EVA)
 713 */
 714
 715LEAF(__copy_from_user_eva)
 716        li      t6, 0   /* not inatomic */
 717__copy_from_user_common:
 718        __BUILD_COPY_USER EVA_MODE USEROP KERNELOP
 719END(__copy_from_user_eva)
 720
 721
 722
 723/*
 724 * __copy_to_user (EVA)
 725 */
 726
 727LEAF(__copy_to_user_eva)
 728__BUILD_COPY_USER EVA_MODE KERNELOP USEROP
 729END(__copy_to_user_eva)
 730
 731/*
 732 * __copy_in_user (EVA)
 733 */
 734
 735LEAF(__copy_in_user_eva)
 736__BUILD_COPY_USER EVA_MODE USEROP USEROP
 737END(__copy_in_user_eva)
 738
 739#endif
 740