linux/arch/c6x/lib/csum_64plus.S
<<
>>
Prefs
   1; SPDX-License-Identifier: GPL-2.0-only
   2;
   3;  linux/arch/c6x/lib/csum_64plus.s
   4;
   5;  Port on Texas Instruments TMS320C6x architecture
   6;
   7;  Copyright (C) 2006, 2009, 2010, 2011 Texas Instruments Incorporated
   8;  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
   9;
  10#include <linux/linkage.h>
  11
  12;
  13;unsigned int csum_partial_copy(const char *src, char * dst,
  14;                               int len, int sum)
  15;
  16; A4:   src
  17; B4:   dst
  18; A6:   len
  19; B6:   sum
  20; return csum in A4
  21;
  22
  23        .text
  24ENTRY(csum_partial_copy)
  25        MVC     .S2     ILC,B30
  26
  27        MV      .D1X    B6,A31          ; given csum
  28        ZERO    .D1     A9              ; csum (a side)
  29||      ZERO    .D2     B9              ; csum (b side)
  30||      SHRU    .S2X    A6,2,B5         ; len / 4
  31
  32        ;; Check alignment and size
  33        AND     .S1     3,A4,A1
  34||      AND     .S2     3,B4,B0
  35        OR      .L2X    B0,A1,B0        ; non aligned condition
  36||      MVC     .S2     B5,ILC
  37||      MVK     .D2     1,B2
  38||      MV      .D1X    B5,A1           ; words condition
  39  [!A1] B       .S1     L8
  40   [B0] BNOP    .S1     L6,5
  41
  42        SPLOOP          1
  43
  44        ;; Main loop for aligned words
  45        LDW     .D1T1   *A4++,A7
  46        NOP     4
  47        MV      .S2X    A7,B7
  48||      EXTU    .S1     A7,0,16,A16
  49        STW     .D2T2   B7,*B4++
  50||      MPYU    .M2     B7,B2,B8
  51||      ADD     .L1     A16,A9,A9
  52        NOP
  53        SPKERNEL        8,0
  54||      ADD     .L2     B8,B9,B9
  55
  56        ZERO    .D1     A1
  57||      ADD     .L1X    A9,B9,A9        ;  add csum from a and b sides
  58
  59L6:
  60  [!A1] BNOP    .S1     L8,5
  61
  62        ;; Main loop for non-aligned words
  63        SPLOOP          2
  64 ||     MVK     .L1     1,A2
  65
  66        LDNW    .D1T1   *A4++,A7
  67        NOP             3
  68
  69        NOP
  70        MV      .S2X    A7,B7
  71 ||     EXTU    .S1     A7,0,16,A16
  72 ||     MPYU    .M1     A7,A2,A8
  73
  74        ADD     .L1     A16,A9,A9
  75        SPKERNEL        6,0
  76 ||     STNW    .D2T2   B7,*B4++
  77 ||     ADD     .L1     A8,A9,A9
  78
  79L8:     AND     .S2X    2,A6,B5
  80        CMPGT   .L2     B5,0,B0
  81  [!B0] BNOP    .S1     L82,4
  82
  83        ;; Manage half-word
  84        ZERO    .L1     A7
  85||      ZERO    .D1     A8
  86
  87#ifdef CONFIG_CPU_BIG_ENDIAN
  88
  89        LDBU    .D1T1   *A4++,A7
  90        LDBU    .D1T1   *A4++,A8
  91        NOP             3
  92        SHL     .S1     A7,8,A0
  93        ADD     .S1     A8,A9,A9
  94        STB     .D2T1   A7,*B4++
  95||      ADD     .S1     A0,A9,A9
  96        STB     .D2T1   A8,*B4++
  97
  98#else
  99
 100        LDBU    .D1T1   *A4++,A7
 101        LDBU    .D1T1   *A4++,A8
 102        NOP             3
 103        ADD     .S1     A7,A9,A9
 104        SHL     .S1     A8,8,A0
 105
 106        STB     .D2T1   A7,*B4++
 107||      ADD     .S1     A0,A9,A9
 108        STB     .D2T1   A8,*B4++
 109
 110#endif
 111
 112        ;; Manage eventually the last byte
 113L82:    AND     .S2X    1,A6,B0
 114  [!B0] BNOP    .S1     L9,5
 115
 116||      ZERO    .L1     A7
 117
 118L83:    LDBU    .D1T1   *A4++,A7
 119        NOP             4
 120
 121        MV      .L2X    A7,B7
 122
 123#ifdef CONFIG_CPU_BIG_ENDIAN
 124
 125        STB     .D2T2   B7,*B4++
 126||      SHL     .S1     A7,8,A7
 127        ADD     .S1     A7,A9,A9
 128
 129#else
 130
 131        STB     .D2T2   B7,*B4++
 132||      ADD     .S1     A7,A9,A9
 133
 134#endif
 135
 136        ;; Fold the csum
 137L9:     SHRU    .S2X    A9,16,B0
 138  [!B0] BNOP    .S1     L10,5
 139
 140L91:    SHRU    .S2X    A9,16,B4
 141||      EXTU    .S1     A9,16,16,A3
 142        ADD     .D1X    A3,B4,A9
 143
 144        SHRU    .S1     A9,16,A0
 145   [A0] BNOP    .S1     L91,5
 146
 147L10:    ADD     .D1     A31,A9,A9
 148        MV      .D1     A9,A4
 149
 150        BNOP    .S2     B3,4
 151        MVC     .S2     B30,ILC
 152ENDPROC(csum_partial_copy)
 153
 154;
 155;unsigned short
 156;ip_fast_csum(unsigned char *iph, unsigned int ihl)
 157;{
 158;       unsigned int checksum = 0;
 159;       unsigned short *tosum = (unsigned short *) iph;
 160;       int len;
 161;
 162;       len = ihl*4;
 163;
 164;       if (len <= 0)
 165;               return 0;
 166;
 167;       while(len) {
 168;               len -= 2;
 169;               checksum += *tosum++;
 170;       }
 171;       if (len & 1)
 172;               checksum += *(unsigned char*) tosum;
 173;
 174;       while(checksum >> 16)
 175;               checksum = (checksum & 0xffff) + (checksum >> 16);
 176;
 177;       return ~checksum;
 178;}
 179;
 180; A4:   iph
 181; B4:   ihl
 182; return checksum in A4
 183;
 184        .text
 185
 186ENTRY(ip_fast_csum)
 187        ZERO    .D1     A5
 188 ||     MVC     .S2     ILC,B30
 189        SHL     .S2     B4,2,B0
 190        CMPGT   .L2     B0,0,B1
 191  [!B1] BNOP    .S1     L15,4
 192  [!B1] ZERO    .D1     A3
 193
 194  [!B0] B       .S1     L12
 195        SHRU    .S2     B0,1,B0
 196        MVC     .S2     B0,ILC
 197        NOP     3
 198
 199        SPLOOP  1
 200        LDHU    .D1T1   *A4++,A3
 201        NOP     3
 202        NOP
 203        SPKERNEL        5,0
 204 ||     ADD     .L1     A3,A5,A5
 205
 206L12:    SHRU    .S1     A5,16,A0
 207  [!A0] BNOP    .S1     L14,5
 208
 209L13:    SHRU    .S2X    A5,16,B4
 210        EXTU    .S1     A5,16,16,A3
 211        ADD     .D1X    A3,B4,A5
 212        SHRU    .S1     A5,16,A0
 213  [A0]  BNOP    .S1     L13,5
 214
 215L14:    NOT     .D1     A5,A3
 216        EXTU    .S1     A3,16,16,A3
 217
 218L15:    BNOP    .S2     B3,3
 219        MVC     .S2     B30,ILC
 220        MV      .D1     A3,A4
 221ENDPROC(ip_fast_csum)
 222
 223;
 224;unsigned short
 225;do_csum(unsigned char *buff, unsigned int len)
 226;{
 227;       int odd, count;
 228;       unsigned int result = 0;
 229;
 230;       if (len <= 0)
 231;               goto out;
 232;       odd = 1 & (unsigned long) buff;
 233;       if (odd) {
 234;#ifdef __LITTLE_ENDIAN
 235;               result += (*buff << 8);
 236;#else
 237;               result = *buff;
 238;#endif
 239;               len--;
 240;               buff++;
 241;       }
 242;       count = len >> 1;               /* nr of 16-bit words.. */
 243;       if (count) {
 244;               if (2 & (unsigned long) buff) {
 245;                       result += *(unsigned short *) buff;
 246;                       count--;
 247;                       len -= 2;
 248;                       buff += 2;
 249;               }
 250;               count >>= 1;            /* nr of 32-bit words.. */
 251;               if (count) {
 252;                       unsigned int carry = 0;
 253;                       do {
 254;                               unsigned int w = *(unsigned int *) buff;
 255;                               count--;
 256;                               buff += 4;
 257;                               result += carry;
 258;                               result += w;
 259;                               carry = (w > result);
 260;                       } while (count);
 261;                       result += carry;
 262;                       result = (result & 0xffff) + (result >> 16);
 263;               }
 264;               if (len & 2) {
 265;                       result += *(unsigned short *) buff;
 266;                       buff += 2;
 267;               }
 268;       }
 269;       if (len & 1)
 270;#ifdef __LITTLE_ENDIAN
 271;               result += *buff;
 272;#else
 273;               result += (*buff << 8);
 274;#endif
 275;       result = (result & 0xffff) + (result >> 16);
 276;       /* add up carry.. */
 277;       result = (result & 0xffff) + (result >> 16);
 278;       if (odd)
 279;               result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
 280;out:
 281;       return result;
 282;}
 283;
 284; A4:   buff
 285; B4:   len
 286; return checksum in A4
 287;
 288
 289ENTRY(do_csum)
 290           CMPGT   .L2     B4,0,B0
 291   [!B0]   BNOP    .S1     L26,3
 292           EXTU    .S1     A4,31,31,A0
 293
 294           MV      .L1     A0,A3
 295||         MV      .S1X    B3,A5
 296||         MV      .L2     B4,B3
 297||         ZERO    .D1     A1
 298
 299#ifdef CONFIG_CPU_BIG_ENDIAN
 300   [A0]    SUB     .L2     B3,1,B3
 301|| [A0]    LDBU    .D1T1   *A4++,A1
 302#else
 303   [!A0]   BNOP    .S1     L21,5
 304|| [A0]    LDBU    .D1T1   *A4++,A0
 305           SUB     .L2     B3,1,B3
 306||         SHL     .S1     A0,8,A1
 307L21:
 308#endif
 309           SHR     .S2     B3,1,B0
 310   [!B0]   BNOP    .S1     L24,3
 311           MVK     .L1     2,A0
 312           AND     .L1     A4,A0,A0
 313
 314   [!A0]   BNOP    .S1     L22,5
 315|| [A0]    LDHU    .D1T1   *A4++,A0
 316           SUB     .L2     B0,1,B0
 317||         SUB     .S2     B3,2,B3
 318||         ADD     .L1     A0,A1,A1
 319L22:
 320           SHR     .S2     B0,1,B0
 321||         ZERO    .L1     A0
 322
 323   [!B0]   BNOP    .S1     L23,5
 324|| [B0]    MVC     .S2     B0,ILC
 325
 326           SPLOOP  3
 327           SPMASK  L1
 328||         MV      .L1     A1,A2
 329||         LDW     .D1T1   *A4++,A1
 330
 331           NOP     4
 332           ADD     .L1     A0,A1,A0
 333           ADD     .L1     A2,A0,A2
 334
 335           SPKERNEL 1,2
 336||         CMPGTU  .L1     A1,A2,A0
 337
 338           ADD     .L1     A0,A2,A6
 339           EXTU    .S1     A6,16,16,A7
 340           SHRU    .S2X    A6,16,B0
 341           NOP             1
 342           ADD     .L1X    A7,B0,A1
 343L23:
 344           MVK     .L2     2,B0
 345           AND     .L2     B3,B0,B0
 346   [B0]    LDHU    .D1T1   *A4++,A0
 347           NOP     4
 348   [B0]    ADD     .L1     A0,A1,A1
 349L24:
 350           EXTU    .S2     B3,31,31,B0
 351#ifdef CONFIG_CPU_BIG_ENDIAN
 352   [!B0]   BNOP    .S1     L25,4
 353|| [B0]    LDBU    .D1T1   *A4,A0
 354           SHL     .S1     A0,8,A0
 355           ADD     .L1     A0,A1,A1
 356L25:
 357#else
 358   [B0]    LDBU    .D1T1   *A4,A0
 359           NOP     4
 360   [B0]    ADD     .L1     A0,A1,A1
 361#endif
 362           EXTU    .S1     A1,16,16,A0
 363           SHRU    .S2X    A1,16,B0
 364           NOP     1
 365           ADD     .L1X    A0,B0,A0
 366           SHRU    .S1     A0,16,A1
 367           ADD     .L1     A0,A1,A0
 368           EXTU    .S1     A0,16,16,A1
 369           EXTU    .S1     A1,16,24,A2
 370
 371           EXTU    .S1     A1,24,16,A0
 372||         MV      .L2X    A3,B0
 373
 374   [B0]    OR      .L1     A0,A2,A1
 375L26:
 376           NOP     1
 377           BNOP    .S2X    A5,4
 378           MV      .L1     A1,A4
 379ENDPROC(do_csum)
 380
 381;__wsum csum_partial(const void *buff, int len, __wsum wsum)
 382;{
 383;       unsigned int sum = (__force unsigned int)wsum;
 384;       unsigned int result = do_csum(buff, len);
 385;
 386;       /* add in old sum, and carry.. */
 387;       result += sum;
 388;       if (sum > result)
 389;               result += 1;
 390;       return (__force __wsum)result;
 391;}
 392;
 393ENTRY(csum_partial)
 394           MV      .L1X    B3,A9
 395||         CALLP   .S2     do_csum,B3
 396||         MV      .S1     A6,A8
 397           BNOP    .S2X    A9,2
 398           ADD     .L1     A8,A4,A1
 399           CMPGTU  .L1     A8,A1,A0
 400           ADD     .L1     A1,A0,A4
 401ENDPROC(csum_partial)
 402
 403;unsigned short
 404;ip_compute_csum(unsigned char *buff, unsigned int len)
 405;
 406; A4:   buff
 407; B4:   len
 408; return checksum in A4
 409
 410ENTRY(ip_compute_csum)
 411           MV      .L1X    B3,A9
 412||         CALLP   .S2     do_csum,B3
 413           BNOP    .S2X    A9,3
 414           NOT     .S1     A4,A4
 415           CLR     .S1     A4,16,31,A4
 416ENDPROC(ip_compute_csum)
 417