linux/arch/score/lib/checksum.S
<<
>>
Prefs
   1/*
   2 * arch/score/lib/csum_partial.S
   3 *
   4 * Score Processor version.
   5 *
   6 * Copyright (C) 2009 Sunplus Core Technology Co., Ltd.
   7 *  Lennox Wu <lennox.wu@sunplusct.com>
   8 *  Chen Liqin <liqin.chen@sunplusct.com>
   9 *
  10 * This program is free software; you can redistribute it and/or modify
  11 * it under the terms of the GNU General Public License as published by
  12 * the Free Software Foundation; either version 2 of the License, or
  13 * (at your option) any later version.
  14 *
  15 * This program is distributed in the hope that it will be useful,
  16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18 * GNU General Public License for more details.
  19 *
  20 * You should have received a copy of the GNU General Public License
  21 * along with this program; if not, see the file COPYING, or write
  22 * to the Free Software Foundation, Inc.,
  23 * 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  24 */
  25#include <linux/linkage.h>
  26
  27#define ADDC(sum,reg)                   \
  28        add     sum, sum, reg;          \
  29        cmp.c   reg, sum;               \
  30        bleu    9f;                     \
  31        addi    sum, 0x1;               \
  329:
  33
  34#define CSUM_BIGCHUNK(src, offset, sum)         \
  35        lw      r8, [src, offset + 0x00];       \
  36        lw      r9, [src, offset + 0x04];       \
  37        lw      r10, [src, offset + 0x08];      \
  38        lw      r11, [src, offset + 0x0c];      \
  39        ADDC(sum, r8);                          \
  40        ADDC(sum, r9);                          \
  41        ADDC(sum, r10);                         \
  42        ADDC(sum, r11);                         \
  43        lw      r8, [src, offset + 0x10];       \
  44        lw      r9, [src, offset + 0x14];       \
  45        lw      r10, [src, offset + 0x18];      \
  46        lw      r11, [src, offset + 0x1c];      \
  47        ADDC(sum, r8);                          \
  48        ADDC(sum, r9);                          \
  49        ADDC(sum, r10);                         \
  50        ADDC(sum, r11);                         \
  51
  52#define src r4
  53#define dest r5
  54#define sum r27
  55
  56        .text
  57/* unknown src alignment and < 8 bytes to go */
  58small_csumcpy:
  59        mv      r5, r10
  60        ldi     r9, 0x0
  61        cmpi.c  r25, 0x1
  62        beq pass_small_set_t7   /*already set, jump to pass_small_set_t7*/
  63        andri.c r25,r4 , 0x1    /*Is src 2 bytes aligned?*/
  64
  65pass_small_set_t7:
  66        beq     aligned
  67        cmpi.c  r5, 0x0
  68        beq     fold
  69        lbu     r9, [src]
  70        slli    r9,r9, 0x8      /*Little endian*/
  71        ADDC(sum, r9)
  72        addi    src, 0x1
  73        subi.c  r5, 0x1
  74
  75        /*len still a full word */
  76aligned:
  77        andri.c r8, r5, 0x4     /*Len >= 4?*/
  78        beq     len_less_4bytes
  79
  80        /* Still a full word (4byte) to go,and the src is word aligned.*/
  81        andri.c r8, src, 0x3    /*src is 4bytes aligned, so use LW!!*/
  82        beq     four_byte_aligned
  83        lhu     r9, [src]
  84        addi    src, 2
  85        ADDC(sum, r9)
  86        lhu     r9, [src]
  87        addi    src, 2
  88        ADDC(sum, r9)
  89        b len_less_4bytes
  90
  91four_byte_aligned:              /* Len >=4 and four byte aligned */
  92        lw      r9, [src]
  93        addi    src, 4
  94        ADDC(sum, r9)
  95
  96len_less_4bytes:                /* 2 byte aligned aligned and length<4B */
  97        andri.c r8, r5, 0x2
  98        beq     len_less_2bytes
  99        lhu     r9, [src]
 100        addi    src, 0x2        /* src+=2 */
 101        ADDC(sum, r9)
 102
 103len_less_2bytes:                /* len = 1 */
 104        andri.c r8, r5, 0x1
 105        beq     fold            /* less than 2 and not equal 1--> len=0 -> fold */
 106        lbu     r9, [src]
 107
 108fold_ADDC:
 109        ADDC(sum, r9)
 110fold:
 111        /* fold checksum */
 112        slli    r26, sum, 16
 113        add     sum, sum, r26
 114        cmp.c   r26, sum
 115        srli    sum, sum, 16
 116        bleu    1f              /* if r26<=sum */
 117        addi    sum, 0x1        /* r26>sum */
 1181:
 119        /* odd buffer alignment? r25 was set in csum_partial */
 120        cmpi.c  r25, 0x0
 121        beq     1f
 122        slli    r26, sum, 8
 123        srli    sum, sum, 8
 124        or      sum, sum, r26
 125        andi    sum, 0xffff
 1261:
 127        .set    optimize
 128        /* Add the passed partial csum. */
 129        ADDC(sum, r6)
 130        mv      r4, sum
 131        br      r3
 132        .set    volatile
 133
 134        .align  5
 135ENTRY(csum_partial)
 136        ldi sum, 0
 137        ldi r25, 0
 138        mv r10, r5
 139        cmpi.c  r5, 0x8
 140        blt     small_csumcpy           /* < 8(singed) bytes to copy */
 141        cmpi.c  r5, 0x0
 142        beq     out
 143        andri.c r25, src, 0x1           /* odd buffer? */
 144
 145        beq     word_align
 146hword_align:                            /* 1 byte */
 147        lbu     r8, [src]
 148        subi    r5, 0x1
 149        slli    r8, r8, 8
 150        ADDC(sum, r8)
 151        addi    src, 0x1
 152
 153word_align:                             /* 2 bytes */
 154        andri.c r8, src, 0x2            /* 4bytes(dword)_aligned? */
 155        beq     dword_align             /* not, maybe dword_align */
 156        lhu     r8, [src]
 157        subi    r5, 0x2
 158        ADDC(sum, r8)
 159        addi    src, 0x2
 160
 161dword_align:                            /* 4bytes */
 162        mv      r26, r5                 /* maybe useless when len >=56 */
 163        ldi     r8, 56
 164        cmp.c   r8, r5
 165        bgtu    do_end_words            /* if a1(len)<t0(56) ,unsigned */
 166        andri.c r26, src, 0x4
 167        beq     qword_align
 168        lw      r8, [src]
 169        subi    r5, 0x4
 170        ADDC(sum, r8)
 171        addi    src, 0x4
 172
 173qword_align:                            /* 8 bytes */
 174        andri.c r26, src, 0x8
 175        beq     oword_align
 176        lw      r8, [src, 0x0]
 177        lw      r9, [src, 0x4]
 178        subi    r5, 0x8                 /* len-=0x8 */
 179        ADDC(sum, r8)
 180        ADDC(sum, r9)
 181        addi    src, 0x8
 182
 183oword_align:                            /* 16bytes */
 184        andri.c r26, src, 0x10
 185        beq     begin_movement
 186        lw      r10, [src, 0x08]
 187        lw      r11, [src, 0x0c]
 188        lw      r8, [src, 0x00]
 189        lw      r9, [src, 0x04]
 190        ADDC(sum, r10)
 191        ADDC(sum, r11)
 192        ADDC(sum, r8)
 193        ADDC(sum, r9)
 194        subi    r5, 0x10
 195        addi    src, 0x10
 196
 197begin_movement:
 198        srli.c  r26, r5, 0x7            /* len>=128? */
 199        beq     1f                      /* len<128 */
 200
 201/* r26 is the result that computed in oword_align */
 202move_128bytes:
 203        CSUM_BIGCHUNK(src, 0x00, sum)
 204        CSUM_BIGCHUNK(src, 0x20, sum)
 205        CSUM_BIGCHUNK(src, 0x40, sum)
 206        CSUM_BIGCHUNK(src, 0x60, sum)
 207        subi.c  r26, 0x01               /* r26 equals len/128 */
 208        addi    src, 0x80
 209        bne     move_128bytes
 210
 2111:      /* len<128,we process 64byte here */
 212        andri.c r10, r5, 0x40
 213        beq     1f
 214
 215move_64bytes:
 216        CSUM_BIGCHUNK(src, 0x00, sum)
 217        CSUM_BIGCHUNK(src, 0x20, sum)
 218        addi    src, 0x40
 219
 2201:                                      /* len<64 */
 221        andri   r26, r5, 0x1c           /* 0x1c=28 */
 222        andri.c r10, r5, 0x20
 223        beq     do_end_words            /* decided by andri */
 224
 225move_32bytes:
 226        CSUM_BIGCHUNK(src, 0x00, sum)
 227        andri   r26, r5, 0x1c
 228        addri   src, src, 0x20
 229
 230do_end_words:                           /* len<32 */
 231        /* r26 was set already in dword_align */
 232        cmpi.c  r26, 0x0
 233        beq     maybe_end_cruft         /* len<28 or len<56 */
 234        srli    r26, r26, 0x2
 235
 236end_words:
 237        lw      r8, [src]
 238        subi.c  r26, 0x1                /* unit is 4 byte */
 239        ADDC(sum, r8)
 240        addi    src, 0x4
 241        cmpi.c  r26, 0x0
 242        bne     end_words               /* r26!=0 */
 243
 244maybe_end_cruft:                        /* len<4 */
 245        andri   r10, r5, 0x3
 246
 247small_memcpy:
 248        mv      r5, r10
 249        j       small_csumcpy
 250
 251out:
 252        mv      r4, sum
 253        br      r3
 254
 255END(csum_partial)
 256