linux/arch/mips/lib/csum_partial.S
<<
>>
Prefs
   1/*
   2 * This file is subject to the terms and conditions of the GNU General Public
   3 * License.  See the file "COPYING" in the main directory of this archive
   4 * for more details.
   5 *
   6 * Quick'n'dirty IP checksum ...
   7 *
   8 * Copyright (C) 1998, 1999 Ralf Baechle
   9 * Copyright (C) 1999 Silicon Graphics, Inc.
  10 * Copyright (C) 2007  Maciej W. Rozycki
  11 * Copyright (C) 2014 Imagination Technologies Ltd.
  12 */
  13#include <linux/errno.h>
  14#include <asm/asm.h>
  15#include <asm/asm-offsets.h>
  16#include <asm/export.h>
  17#include <asm/regdef.h>
  18
  19#ifdef CONFIG_64BIT
  20/*
  21 * As we are sharing code base with the mips32 tree (which use the o32 ABI
  22 * register definitions). We need to redefine the register definitions from
  23 * the n64 ABI register naming to the o32 ABI register naming.
  24 */
  25#undef t0
  26#undef t1
  27#undef t2
  28#undef t3
  29#define t0      $8
  30#define t1      $9
  31#define t2      $10
  32#define t3      $11
  33#define t4      $12
  34#define t5      $13
  35#define t6      $14
  36#define t7      $15
  37
  38#define USE_DOUBLE
  39#endif
  40
  41#ifdef USE_DOUBLE
  42
  43#define LOAD   ld
  44#define LOAD32 lwu
  45#define ADD    daddu
  46#define NBYTES 8
  47
  48#else
  49
  50#define LOAD   lw
  51#define LOAD32 lw
  52#define ADD    addu
  53#define NBYTES 4
  54
  55#endif /* USE_DOUBLE */
  56
  57#define UNIT(unit)  ((unit)*NBYTES)
  58
  59#define ADDC(sum,reg)                                           \
  60        .set    push;                                           \
  61        .set    noat;                                           \
  62        ADD     sum, reg;                                       \
  63        sltu    v1, sum, reg;                                   \
  64        ADD     sum, v1;                                        \
  65        .set    pop
  66
  67#define ADDC32(sum,reg)                                         \
  68        .set    push;                                           \
  69        .set    noat;                                           \
  70        addu    sum, reg;                                       \
  71        sltu    v1, sum, reg;                                   \
  72        addu    sum, v1;                                        \
  73        .set    pop
  74
  75#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3)    \
  76        LOAD    _t0, (offset + UNIT(0))(src);                   \
  77        LOAD    _t1, (offset + UNIT(1))(src);                   \
  78        LOAD    _t2, (offset + UNIT(2))(src);                   \
  79        LOAD    _t3, (offset + UNIT(3))(src);                   \
  80        ADDC(_t0, _t1);                                         \
  81        ADDC(_t2, _t3);                                         \
  82        ADDC(sum, _t0);                                         \
  83        ADDC(sum, _t2)
  84
  85#ifdef USE_DOUBLE
  86#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3)     \
  87        CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3)
  88#else
  89#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3)     \
  90        CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3);   \
  91        CSUM_BIGCHUNK1(src, offset + 0x10, sum, _t0, _t1, _t2, _t3)
  92#endif
  93
  94/*
  95 * a0: source address
  96 * a1: length of the area to checksum
  97 * a2: partial checksum
  98 */
  99
 100#define src a0
 101#define sum v0
 102
 103        .text
 104        .set    noreorder
 105        .align  5
 106LEAF(csum_partial)
 107EXPORT_SYMBOL(csum_partial)
 108        move    sum, zero
 109        move    t7, zero
 110
 111        sltiu   t8, a1, 0x8
 112        bnez    t8, .Lsmall_csumcpy             /* < 8 bytes to copy */
 113         move   t2, a1
 114
 115        andi    t7, src, 0x1                    /* odd buffer? */
 116
 117.Lhword_align:
 118        beqz    t7, .Lword_align
 119         andi   t8, src, 0x2
 120
 121        lbu     t0, (src)
 122        LONG_SUBU       a1, a1, 0x1
 123#ifdef __MIPSEL__
 124        sll     t0, t0, 8
 125#endif
 126        ADDC(sum, t0)
 127        PTR_ADDU        src, src, 0x1
 128        andi    t8, src, 0x2
 129
 130.Lword_align:
 131        beqz    t8, .Ldword_align
 132         sltiu  t8, a1, 56
 133
 134        lhu     t0, (src)
 135        LONG_SUBU       a1, a1, 0x2
 136        ADDC(sum, t0)
 137        sltiu   t8, a1, 56
 138        PTR_ADDU        src, src, 0x2
 139
 140.Ldword_align:
 141        bnez    t8, .Ldo_end_words
 142         move   t8, a1
 143
 144        andi    t8, src, 0x4
 145        beqz    t8, .Lqword_align
 146         andi   t8, src, 0x8
 147
 148        LOAD32  t0, 0x00(src)
 149        LONG_SUBU       a1, a1, 0x4
 150        ADDC(sum, t0)
 151        PTR_ADDU        src, src, 0x4
 152        andi    t8, src, 0x8
 153
 154.Lqword_align:
 155        beqz    t8, .Loword_align
 156         andi   t8, src, 0x10
 157
 158#ifdef USE_DOUBLE
 159        ld      t0, 0x00(src)
 160        LONG_SUBU       a1, a1, 0x8
 161        ADDC(sum, t0)
 162#else
 163        lw      t0, 0x00(src)
 164        lw      t1, 0x04(src)
 165        LONG_SUBU       a1, a1, 0x8
 166        ADDC(sum, t0)
 167        ADDC(sum, t1)
 168#endif
 169        PTR_ADDU        src, src, 0x8
 170        andi    t8, src, 0x10
 171
 172.Loword_align:
 173        beqz    t8, .Lbegin_movement
 174         LONG_SRL       t8, a1, 0x7
 175
 176#ifdef USE_DOUBLE
 177        ld      t0, 0x00(src)
 178        ld      t1, 0x08(src)
 179        ADDC(sum, t0)
 180        ADDC(sum, t1)
 181#else
 182        CSUM_BIGCHUNK1(src, 0x00, sum, t0, t1, t3, t4)
 183#endif
 184        LONG_SUBU       a1, a1, 0x10
 185        PTR_ADDU        src, src, 0x10
 186        LONG_SRL        t8, a1, 0x7
 187
 188.Lbegin_movement:
 189        beqz    t8, 1f
 190         andi   t2, a1, 0x40
 191
 192.Lmove_128bytes:
 193        CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
 194        CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
 195        CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4)
 196        CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4)
 197        LONG_SUBU       t8, t8, 0x01
 198        .set    reorder                         /* DADDI_WAR */
 199        PTR_ADDU        src, src, 0x80
 200        bnez    t8, .Lmove_128bytes
 201        .set    noreorder
 202
 2031:
 204        beqz    t2, 1f
 205         andi   t2, a1, 0x20
 206
 207.Lmove_64bytes:
 208        CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
 209        CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
 210        PTR_ADDU        src, src, 0x40
 211
 2121:
 213        beqz    t2, .Ldo_end_words
 214         andi   t8, a1, 0x1c
 215
 216.Lmove_32bytes:
 217        CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
 218        andi    t8, a1, 0x1c
 219        PTR_ADDU        src, src, 0x20
 220
 221.Ldo_end_words:
 222        beqz    t8, .Lsmall_csumcpy
 223         andi   t2, a1, 0x3
 224        LONG_SRL        t8, t8, 0x2
 225
 226.Lend_words:
 227        LOAD32  t0, (src)
 228        LONG_SUBU       t8, t8, 0x1
 229        ADDC(sum, t0)
 230        .set    reorder                         /* DADDI_WAR */
 231        PTR_ADDU        src, src, 0x4
 232        bnez    t8, .Lend_words
 233        .set    noreorder
 234
 235/* unknown src alignment and < 8 bytes to go  */
 236.Lsmall_csumcpy:
 237        move    a1, t2
 238
 239        andi    t0, a1, 4
 240        beqz    t0, 1f
 241         andi   t0, a1, 2
 242
 243        /* Still a full word to go  */
 244        ulw     t1, (src)
 245        PTR_ADDIU       src, 4
 246#ifdef USE_DOUBLE
 247        dsll    t1, t1, 32                      /* clear lower 32bit */
 248#endif
 249        ADDC(sum, t1)
 250
 2511:      move    t1, zero
 252        beqz    t0, 1f
 253         andi   t0, a1, 1
 254
 255        /* Still a halfword to go  */
 256        ulhu    t1, (src)
 257        PTR_ADDIU       src, 2
 258
 2591:      beqz    t0, 1f
 260         sll    t1, t1, 16
 261
 262        lbu     t2, (src)
 263         nop
 264
 265#ifdef __MIPSEB__
 266        sll     t2, t2, 8
 267#endif
 268        or      t1, t2
 269
 2701:      ADDC(sum, t1)
 271
 272        /* fold checksum */
 273#ifdef USE_DOUBLE
 274        dsll32  v1, sum, 0
 275        daddu   sum, v1
 276        sltu    v1, sum, v1
 277        dsra32  sum, sum, 0
 278        addu    sum, v1
 279#endif
 280
 281        /* odd buffer alignment? */
 282#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \
 283    defined(CONFIG_CPU_LOONGSON64)
 284        .set    push
 285        .set    arch=mips32r2
 286        wsbh    v1, sum
 287        movn    sum, v1, t7
 288        .set    pop
 289#else
 290        beqz    t7, 1f                  /* odd buffer alignment? */
 291         lui    v1, 0x00ff
 292        addu    v1, 0x00ff
 293        and     t0, sum, v1
 294        sll     t0, t0, 8
 295        srl     sum, sum, 8
 296        and     sum, sum, v1
 297        or      sum, sum, t0
 2981:
 299#endif
 300        .set    reorder
 301        /* Add the passed partial csum.  */
 302        ADDC32(sum, a2)
 303        jr      ra
 304        .set    noreorder
 305        END(csum_partial)
 306
 307
 308/*
 309 * checksum and copy routines based on memcpy.S
 310 *
 311 *      csum_partial_copy_nocheck(src, dst, len)
 312 *      __csum_partial_copy_kernel(src, dst, len)
 313 *
 314 * See "Spec" in memcpy.S for details.  Unlike __copy_user, all
 315 * function in this file use the standard calling convention.
 316 */
 317
 318#define src a0
 319#define dst a1
 320#define len a2
 321#define sum v0
 322#define odd t8
 323
 324/*
 325 * All exception handlers simply return 0.
 326 */
 327
 328/* Instruction type */
 329#define LD_INSN 1
 330#define ST_INSN 2
 331#define LEGACY_MODE 1
 332#define EVA_MODE    2
 333#define USEROP   1
 334#define KERNELOP 2
 335
 336/*
 337 * Wrapper to add an entry in the exception table
 338 * in case the insn causes a memory exception.
 339 * Arguments:
 340 * insn    : Load/store instruction
 341 * type    : Instruction type
 342 * reg     : Register
 343 * addr    : Address
 344 * handler : Exception handler
 345 */
 346#define EXC(insn, type, reg, addr)              \
 347        .if \mode == LEGACY_MODE;               \
 3489:              insn reg, addr;                 \
 349                .section __ex_table,"a";        \
 350                PTR     9b, .L_exc;             \
 351                .previous;                      \
 352        /* This is enabled in EVA mode */       \
 353        .else;                                  \
 354                /* If loading from user or storing to user */   \
 355                .if ((\from == USEROP) && (type == LD_INSN)) || \
 356                    ((\to == USEROP) && (type == ST_INSN));     \
 3579:                      __BUILD_EVA_INSN(insn##e, reg, addr);   \
 358                        .section __ex_table,"a";                \
 359                        PTR     9b, .L_exc;                     \
 360                        .previous;                              \
 361                .else;                                          \
 362                        /* EVA without exception */             \
 363                        insn reg, addr;                         \
 364                .endif;                                         \
 365        .endif
 366
 367#undef LOAD
 368
 369#ifdef USE_DOUBLE
 370
 371#define LOADK   ld /* No exception */
 372#define LOAD(reg, addr)         EXC(ld, LD_INSN, reg, addr)
 373#define LOADBU(reg, addr)       EXC(lbu, LD_INSN, reg, addr)
 374#define LOADL(reg, addr)        EXC(ldl, LD_INSN, reg, addr)
 375#define LOADR(reg, addr)        EXC(ldr, LD_INSN, reg, addr)
 376#define STOREB(reg, addr)       EXC(sb, ST_INSN, reg, addr)
 377#define STOREL(reg, addr)       EXC(sdl, ST_INSN, reg, addr)
 378#define STORER(reg, addr)       EXC(sdr, ST_INSN, reg, addr)
 379#define STORE(reg, addr)        EXC(sd, ST_INSN, reg, addr)
 380#define ADD    daddu
 381#define SUB    dsubu
 382#define SRL    dsrl
 383#define SLL    dsll
 384#define SLLV   dsllv
 385#define SRLV   dsrlv
 386#define NBYTES 8
 387#define LOG_NBYTES 3
 388
 389#else
 390
 391#define LOADK   lw /* No exception */
 392#define LOAD(reg, addr)         EXC(lw, LD_INSN, reg, addr)
 393#define LOADBU(reg, addr)       EXC(lbu, LD_INSN, reg, addr)
 394#define LOADL(reg, addr)        EXC(lwl, LD_INSN, reg, addr)
 395#define LOADR(reg, addr)        EXC(lwr, LD_INSN, reg, addr)
 396#define STOREB(reg, addr)       EXC(sb, ST_INSN, reg, addr)
 397#define STOREL(reg, addr)       EXC(swl, ST_INSN, reg, addr)
 398#define STORER(reg, addr)       EXC(swr, ST_INSN, reg, addr)
 399#define STORE(reg, addr)        EXC(sw, ST_INSN, reg, addr)
 400#define ADD    addu
 401#define SUB    subu
 402#define SRL    srl
 403#define SLL    sll
 404#define SLLV   sllv
 405#define SRLV   srlv
 406#define NBYTES 4
 407#define LOG_NBYTES 2
 408
 409#endif /* USE_DOUBLE */
 410
 411#ifdef CONFIG_CPU_LITTLE_ENDIAN
 412#define LDFIRST LOADR
 413#define LDREST  LOADL
 414#define STFIRST STORER
 415#define STREST  STOREL
 416#define SHIFT_DISCARD SLLV
 417#define SHIFT_DISCARD_REVERT SRLV
 418#else
 419#define LDFIRST LOADL
 420#define LDREST  LOADR
 421#define STFIRST STOREL
 422#define STREST  STORER
 423#define SHIFT_DISCARD SRLV
 424#define SHIFT_DISCARD_REVERT SLLV
 425#endif
 426
 427#define FIRST(unit) ((unit)*NBYTES)
 428#define REST(unit)  (FIRST(unit)+NBYTES-1)
 429
 430#define ADDRMASK (NBYTES-1)
 431
 432#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
 433        .set    noat
 434#else
 435        .set    at=v1
 436#endif
 437
 438        .macro __BUILD_CSUM_PARTIAL_COPY_USER mode, from, to
 439
 440        li      sum, -1
 441        move    odd, zero
 442        /*
 443         * Note: dst & src may be unaligned, len may be 0
 444         * Temps
 445         */
 446        /*
 447         * The "issue break"s below are very approximate.
 448         * Issue delays for dcache fills will perturb the schedule, as will
 449         * load queue full replay traps, etc.
 450         *
 451         * If len < NBYTES use byte operations.
 452         */
 453        sltu    t2, len, NBYTES
 454        and     t1, dst, ADDRMASK
 455        bnez    t2, .Lcopy_bytes_checklen\@
 456         and    t0, src, ADDRMASK
 457        andi    odd, dst, 0x1                   /* odd buffer? */
 458        bnez    t1, .Ldst_unaligned\@
 459         nop
 460        bnez    t0, .Lsrc_unaligned_dst_aligned\@
 461        /*
 462         * use delay slot for fall-through
 463         * src and dst are aligned; need to compute rem
 464         */
 465.Lboth_aligned\@:
 466         SRL    t0, len, LOG_NBYTES+3    # +3 for 8 units/iter
 467        beqz    t0, .Lcleanup_both_aligned\@ # len < 8*NBYTES
 468         nop
 469        SUB     len, 8*NBYTES           # subtract here for bgez loop
 470        .align  4
 4711:
 472        LOAD(t0, UNIT(0)(src))
 473        LOAD(t1, UNIT(1)(src))
 474        LOAD(t2, UNIT(2)(src))
 475        LOAD(t3, UNIT(3)(src))
 476        LOAD(t4, UNIT(4)(src))
 477        LOAD(t5, UNIT(5)(src))
 478        LOAD(t6, UNIT(6)(src))
 479        LOAD(t7, UNIT(7)(src))
 480        SUB     len, len, 8*NBYTES
 481        ADD     src, src, 8*NBYTES
 482        STORE(t0, UNIT(0)(dst))
 483        ADDC(t0, t1)
 484        STORE(t1, UNIT(1)(dst))
 485        ADDC(sum, t0)
 486        STORE(t2, UNIT(2)(dst))
 487        ADDC(t2, t3)
 488        STORE(t3, UNIT(3)(dst))
 489        ADDC(sum, t2)
 490        STORE(t4, UNIT(4)(dst))
 491        ADDC(t4, t5)
 492        STORE(t5, UNIT(5)(dst))
 493        ADDC(sum, t4)
 494        STORE(t6, UNIT(6)(dst))
 495        ADDC(t6, t7)
 496        STORE(t7, UNIT(7)(dst))
 497        ADDC(sum, t6)
 498        .set    reorder                         /* DADDI_WAR */
 499        ADD     dst, dst, 8*NBYTES
 500        bgez    len, 1b
 501        .set    noreorder
 502        ADD     len, 8*NBYTES           # revert len (see above)
 503
 504        /*
 505         * len == the number of bytes left to copy < 8*NBYTES
 506         */
 507.Lcleanup_both_aligned\@:
 508#define rem t7
 509        beqz    len, .Ldone\@
 510         sltu   t0, len, 4*NBYTES
 511        bnez    t0, .Lless_than_4units\@
 512         and    rem, len, (NBYTES-1)    # rem = len % NBYTES
 513        /*
 514         * len >= 4*NBYTES
 515         */
 516        LOAD(t0, UNIT(0)(src))
 517        LOAD(t1, UNIT(1)(src))
 518        LOAD(t2, UNIT(2)(src))
 519        LOAD(t3, UNIT(3)(src))
 520        SUB     len, len, 4*NBYTES
 521        ADD     src, src, 4*NBYTES
 522        STORE(t0, UNIT(0)(dst))
 523        ADDC(t0, t1)
 524        STORE(t1, UNIT(1)(dst))
 525        ADDC(sum, t0)
 526        STORE(t2, UNIT(2)(dst))
 527        ADDC(t2, t3)
 528        STORE(t3, UNIT(3)(dst))
 529        ADDC(sum, t2)
 530        .set    reorder                         /* DADDI_WAR */
 531        ADD     dst, dst, 4*NBYTES
 532        beqz    len, .Ldone\@
 533        .set    noreorder
 534.Lless_than_4units\@:
 535        /*
 536         * rem = len % NBYTES
 537         */
 538        beq     rem, len, .Lcopy_bytes\@
 539         nop
 5401:
 541        LOAD(t0, 0(src))
 542        ADD     src, src, NBYTES
 543        SUB     len, len, NBYTES
 544        STORE(t0, 0(dst))
 545        ADDC(sum, t0)
 546        .set    reorder                         /* DADDI_WAR */
 547        ADD     dst, dst, NBYTES
 548        bne     rem, len, 1b
 549        .set    noreorder
 550
 551        /*
 552         * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
 553         * A loop would do only a byte at a time with possible branch
 554         * mispredicts.  Can't do an explicit LOAD dst,mask,or,STORE
 555         * because can't assume read-access to dst.  Instead, use
 556         * STREST dst, which doesn't require read access to dst.
 557         *
 558         * This code should perform better than a simple loop on modern,
 559         * wide-issue mips processors because the code has fewer branches and
 560         * more instruction-level parallelism.
 561         */
 562#define bits t2
 563        beqz    len, .Ldone\@
 564         ADD    t1, dst, len    # t1 is just past last byte of dst
 565        li      bits, 8*NBYTES
 566        SLL     rem, len, 3     # rem = number of bits to keep
 567        LOAD(t0, 0(src))
 568        SUB     bits, bits, rem # bits = number of bits to discard
 569        SHIFT_DISCARD t0, t0, bits
 570        STREST(t0, -1(t1))
 571        SHIFT_DISCARD_REVERT t0, t0, bits
 572        .set reorder
 573        ADDC(sum, t0)
 574        b       .Ldone\@
 575        .set noreorder
 576.Ldst_unaligned\@:
 577        /*
 578         * dst is unaligned
 579         * t0 = src & ADDRMASK
 580         * t1 = dst & ADDRMASK; T1 > 0
 581         * len >= NBYTES
 582         *
 583         * Copy enough bytes to align dst
 584         * Set match = (src and dst have same alignment)
 585         */
 586#define match rem
 587        LDFIRST(t3, FIRST(0)(src))
 588        ADD     t2, zero, NBYTES
 589        LDREST(t3, REST(0)(src))
 590        SUB     t2, t2, t1      # t2 = number of bytes copied
 591        xor     match, t0, t1
 592        STFIRST(t3, FIRST(0)(dst))
 593        SLL     t4, t1, 3               # t4 = number of bits to discard
 594        SHIFT_DISCARD t3, t3, t4
 595        /* no SHIFT_DISCARD_REVERT to handle odd buffer properly */
 596        ADDC(sum, t3)
 597        beq     len, t2, .Ldone\@
 598         SUB    len, len, t2
 599        ADD     dst, dst, t2
 600        beqz    match, .Lboth_aligned\@
 601         ADD    src, src, t2
 602
 603.Lsrc_unaligned_dst_aligned\@:
 604        SRL     t0, len, LOG_NBYTES+2    # +2 for 4 units/iter
 605        beqz    t0, .Lcleanup_src_unaligned\@
 606         and    rem, len, (4*NBYTES-1)   # rem = len % 4*NBYTES
 6071:
 608/*
 609 * Avoid consecutive LD*'s to the same register since some mips
 610 * implementations can't issue them in the same cycle.
 611 * It's OK to load FIRST(N+1) before REST(N) because the two addresses
 612 * are to the same unit (unless src is aligned, but it's not).
 613 */
 614        LDFIRST(t0, FIRST(0)(src))
 615        LDFIRST(t1, FIRST(1)(src))
 616        SUB     len, len, 4*NBYTES
 617        LDREST(t0, REST(0)(src))
 618        LDREST(t1, REST(1)(src))
 619        LDFIRST(t2, FIRST(2)(src))
 620        LDFIRST(t3, FIRST(3)(src))
 621        LDREST(t2, REST(2)(src))
 622        LDREST(t3, REST(3)(src))
 623        ADD     src, src, 4*NBYTES
 624#ifdef CONFIG_CPU_SB1
 625        nop                             # improves slotting
 626#endif
 627        STORE(t0, UNIT(0)(dst))
 628        ADDC(t0, t1)
 629        STORE(t1, UNIT(1)(dst))
 630        ADDC(sum, t0)
 631        STORE(t2, UNIT(2)(dst))
 632        ADDC(t2, t3)
 633        STORE(t3, UNIT(3)(dst))
 634        ADDC(sum, t2)
 635        .set    reorder                         /* DADDI_WAR */
 636        ADD     dst, dst, 4*NBYTES
 637        bne     len, rem, 1b
 638        .set    noreorder
 639
 640.Lcleanup_src_unaligned\@:
 641        beqz    len, .Ldone\@
 642         and    rem, len, NBYTES-1  # rem = len % NBYTES
 643        beq     rem, len, .Lcopy_bytes\@
 644         nop
 6451:
 646        LDFIRST(t0, FIRST(0)(src))
 647        LDREST(t0, REST(0)(src))
 648        ADD     src, src, NBYTES
 649        SUB     len, len, NBYTES
 650        STORE(t0, 0(dst))
 651        ADDC(sum, t0)
 652        .set    reorder                         /* DADDI_WAR */
 653        ADD     dst, dst, NBYTES
 654        bne     len, rem, 1b
 655        .set    noreorder
 656
 657.Lcopy_bytes_checklen\@:
 658        beqz    len, .Ldone\@
 659         nop
 660.Lcopy_bytes\@:
 661        /* 0 < len < NBYTES  */
 662#ifdef CONFIG_CPU_LITTLE_ENDIAN
 663#define SHIFT_START 0
 664#define SHIFT_INC 8
 665#else
 666#define SHIFT_START 8*(NBYTES-1)
 667#define SHIFT_INC -8
 668#endif
 669        move    t2, zero        # partial word
 670        li      t3, SHIFT_START # shift
 671#define COPY_BYTE(N)                    \
 672        LOADBU(t0, N(src));             \
 673        SUB     len, len, 1;            \
 674        STOREB(t0, N(dst));             \
 675        SLLV    t0, t0, t3;             \
 676        addu    t3, SHIFT_INC;          \
 677        beqz    len, .Lcopy_bytes_done\@; \
 678         or     t2, t0
 679
 680        COPY_BYTE(0)
 681        COPY_BYTE(1)
 682#ifdef USE_DOUBLE
 683        COPY_BYTE(2)
 684        COPY_BYTE(3)
 685        COPY_BYTE(4)
 686        COPY_BYTE(5)
 687#endif
 688        LOADBU(t0, NBYTES-2(src))
 689        SUB     len, len, 1
 690        STOREB(t0, NBYTES-2(dst))
 691        SLLV    t0, t0, t3
 692        or      t2, t0
 693.Lcopy_bytes_done\@:
 694        ADDC(sum, t2)
 695.Ldone\@:
 696        /* fold checksum */
 697        .set    push
 698        .set    noat
 699#ifdef USE_DOUBLE
 700        dsll32  v1, sum, 0
 701        daddu   sum, v1
 702        sltu    v1, sum, v1
 703        dsra32  sum, sum, 0
 704        addu    sum, v1
 705#endif
 706
 707#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \
 708    defined(CONFIG_CPU_LOONGSON64)
 709        .set    push
 710        .set    arch=mips32r2
 711        wsbh    v1, sum
 712        movn    sum, v1, odd
 713        .set    pop
 714#else
 715        beqz    odd, 1f                 /* odd buffer alignment? */
 716         lui    v1, 0x00ff
 717        addu    v1, 0x00ff
 718        and     t0, sum, v1
 719        sll     t0, t0, 8
 720        srl     sum, sum, 8
 721        and     sum, sum, v1
 722        or      sum, sum, t0
 7231:
 724#endif
 725        .set    pop
 726        .set reorder
 727        jr      ra
 728        .set noreorder
 729        .endm
 730
 731        .set noreorder
 732.L_exc:
 733        jr      ra
 734         li     v0, 0
 735
 736FEXPORT(__csum_partial_copy_nocheck)
 737EXPORT_SYMBOL(__csum_partial_copy_nocheck)
 738#ifndef CONFIG_EVA
 739FEXPORT(__csum_partial_copy_to_user)
 740EXPORT_SYMBOL(__csum_partial_copy_to_user)
 741FEXPORT(__csum_partial_copy_from_user)
 742EXPORT_SYMBOL(__csum_partial_copy_from_user)
 743#endif
 744__BUILD_CSUM_PARTIAL_COPY_USER LEGACY_MODE USEROP USEROP
 745
 746#ifdef CONFIG_EVA
 747LEAF(__csum_partial_copy_to_user)
 748__BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE KERNELOP USEROP
 749END(__csum_partial_copy_to_user)
 750
 751LEAF(__csum_partial_copy_from_user)
 752__BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE USEROP KERNELOP
 753END(__csum_partial_copy_from_user)
 754#endif
 755