linux/arch/powerpc/lib/checksum_32.S
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0-or-later */
   2/*
   3 * This file contains assembly-language implementations
   4 * of IP-style 1's complement checksum routines.
   5 *      
   6 *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
   7 *
   8 * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
   9 */
  10
  11#include <linux/sys.h>
  12#include <asm/processor.h>
  13#include <asm/cache.h>
  14#include <asm/errno.h>
  15#include <asm/ppc_asm.h>
  16#include <asm/export.h>
  17
  18        .text
  19
  20/*
  21 * computes the checksum of a memory block at buff, length len,
  22 * and adds in "sum" (32-bit)
  23 *
  24 * __csum_partial(buff, len, sum)
  25 */
  26_GLOBAL(__csum_partial)
  27        subi    r3,r3,4
  28        srawi.  r6,r4,2         /* Divide len by 4 and also clear carry */
  29        beq     3f              /* if we're doing < 4 bytes */
  30        andi.   r0,r3,2         /* Align buffer to longword boundary */
  31        beq+    1f
  32        lhz     r0,4(r3)        /* do 2 bytes to get aligned */
  33        subi    r4,r4,2
  34        addi    r3,r3,2
  35        srwi.   r6,r4,2         /* # words to do */
  36        adde    r5,r5,r0
  37        beq     3f
  381:      andi.   r6,r6,3         /* Prepare to handle words 4 by 4 */
  39        beq     21f
  40        mtctr   r6
  412:      lwzu    r0,4(r3)
  42        adde    r5,r5,r0
  43        bdnz    2b
  4421:     srwi.   r6,r4,4         /* # blocks of 4 words to do */
  45        beq     3f
  46        lwz     r0,4(r3)
  47        mtctr   r6
  48        lwz     r6,8(r3)
  49        adde    r5,r5,r0
  50        lwz     r7,12(r3)
  51        adde    r5,r5,r6
  52        lwzu    r8,16(r3)
  53        adde    r5,r5,r7
  54        bdz     23f
  5522:     lwz     r0,4(r3)
  56        adde    r5,r5,r8
  57        lwz     r6,8(r3)
  58        adde    r5,r5,r0
  59        lwz     r7,12(r3)
  60        adde    r5,r5,r6
  61        lwzu    r8,16(r3)
  62        adde    r5,r5,r7
  63        bdnz    22b
  6423:     adde    r5,r5,r8
  653:      andi.   r0,r4,2
  66        beq+    4f
  67        lhz     r0,4(r3)
  68        addi    r3,r3,2
  69        adde    r5,r5,r0
  704:      andi.   r0,r4,1
  71        beq+    5f
  72        lbz     r0,4(r3)
  73        slwi    r0,r0,8         /* Upper byte of word */
  74        adde    r5,r5,r0
  755:      addze   r3,r5           /* add in final carry */
  76        blr
  77EXPORT_SYMBOL(__csum_partial)
  78
  79/*
  80 * Computes the checksum of a memory block at src, length len,
  81 * and adds in "sum" (32-bit), while copying the block to dst.
  82 * If an access exception occurs on src or dst, it stores -EFAULT
  83 * to *src_err or *dst_err respectively, and (for an error on
  84 * src) zeroes the rest of dst.
  85 *
  86 * csum_partial_copy_generic(src, dst, len, sum, src_err, dst_err)
  87 */
  88#define CSUM_COPY_16_BYTES_WITHEX(n)    \
  898 ## n ## 0:                    \
  90        lwz     r7,4(r4);       \
  918 ## n ## 1:                    \
  92        lwz     r8,8(r4);       \
  938 ## n ## 2:                    \
  94        lwz     r9,12(r4);      \
  958 ## n ## 3:                    \
  96        lwzu    r10,16(r4);     \
  978 ## n ## 4:                    \
  98        stw     r7,4(r6);       \
  99        adde    r12,r12,r7;     \
 1008 ## n ## 5:                    \
 101        stw     r8,8(r6);       \
 102        adde    r12,r12,r8;     \
 1038 ## n ## 6:                    \
 104        stw     r9,12(r6);      \
 105        adde    r12,r12,r9;     \
 1068 ## n ## 7:                    \
 107        stwu    r10,16(r6);     \
 108        adde    r12,r12,r10
 109
 110#define CSUM_COPY_16_BYTES_EXCODE(n)            \
 111        EX_TABLE(8 ## n ## 0b, src_error);      \
 112        EX_TABLE(8 ## n ## 1b, src_error);      \
 113        EX_TABLE(8 ## n ## 2b, src_error);      \
 114        EX_TABLE(8 ## n ## 3b, src_error);      \
 115        EX_TABLE(8 ## n ## 4b, dst_error);      \
 116        EX_TABLE(8 ## n ## 5b, dst_error);      \
 117        EX_TABLE(8 ## n ## 6b, dst_error);      \
 118        EX_TABLE(8 ## n ## 7b, dst_error);
 119
 120        .text
 121        .stabs  "arch/powerpc/lib/",N_SO,0,0,0f
 122        .stabs  "checksum_32.S",N_SO,0,0,0f
 1230:
 124
 125CACHELINE_BYTES = L1_CACHE_BYTES
 126LG_CACHELINE_BYTES = L1_CACHE_SHIFT
 127CACHELINE_MASK = (L1_CACHE_BYTES-1)
 128
 129_GLOBAL(csum_partial_copy_generic)
 130        stwu    r1,-16(r1)
 131        stw     r7,12(r1)
 132        stw     r8,8(r1)
 133
 134        addic   r12,r6,0
 135        addi    r6,r4,-4
 136        neg     r0,r4
 137        addi    r4,r3,-4
 138        andi.   r0,r0,CACHELINE_MASK    /* # bytes to start of cache line */
 139        crset   4*cr7+eq
 140        beq     58f
 141
 142        cmplw   0,r5,r0                 /* is this more than total to do? */
 143        blt     63f                     /* if not much to do */
 144        rlwinm  r7,r6,3,0x8
 145        rlwnm   r12,r12,r7,0,31 /* odd destination address: rotate one byte */
 146        cmplwi  cr7,r7,0        /* is destination address even ? */
 147        andi.   r8,r0,3                 /* get it word-aligned first */
 148        mtctr   r8
 149        beq+    61f
 150        li      r3,0
 15170:     lbz     r9,4(r4)                /* do some bytes */
 152        addi    r4,r4,1
 153        slwi    r3,r3,8
 154        rlwimi  r3,r9,0,24,31
 15571:     stb     r9,4(r6)
 156        addi    r6,r6,1
 157        bdnz    70b
 158        adde    r12,r12,r3
 15961:     subf    r5,r0,r5
 160        srwi.   r0,r0,2
 161        mtctr   r0
 162        beq     58f
 16372:     lwzu    r9,4(r4)                /* do some words */
 164        adde    r12,r12,r9
 16573:     stwu    r9,4(r6)
 166        bdnz    72b
 167
 16858:     srwi.   r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
 169        clrlwi  r5,r5,32-LG_CACHELINE_BYTES
 170        li      r11,4
 171        beq     63f
 172
 173        /* Here we decide how far ahead to prefetch the source */
 174        li      r3,4
 175        cmpwi   r0,1
 176        li      r7,0
 177        ble     114f
 178        li      r7,1
 179#if MAX_COPY_PREFETCH > 1
 180        /* Heuristically, for large transfers we prefetch
 181           MAX_COPY_PREFETCH cachelines ahead.  For small transfers
 182           we prefetch 1 cacheline ahead. */
 183        cmpwi   r0,MAX_COPY_PREFETCH
 184        ble     112f
 185        li      r7,MAX_COPY_PREFETCH
 186112:    mtctr   r7
 187111:    dcbt    r3,r4
 188        addi    r3,r3,CACHELINE_BYTES
 189        bdnz    111b
 190#else
 191        dcbt    r3,r4
 192        addi    r3,r3,CACHELINE_BYTES
 193#endif /* MAX_COPY_PREFETCH > 1 */
 194
 195114:    subf    r8,r7,r0
 196        mr      r0,r7
 197        mtctr   r8
 198
 19953:     dcbt    r3,r4
 20054:     dcbz    r11,r6
 201/* the main body of the cacheline loop */
 202        CSUM_COPY_16_BYTES_WITHEX(0)
 203#if L1_CACHE_BYTES >= 32
 204        CSUM_COPY_16_BYTES_WITHEX(1)
 205#if L1_CACHE_BYTES >= 64
 206        CSUM_COPY_16_BYTES_WITHEX(2)
 207        CSUM_COPY_16_BYTES_WITHEX(3)
 208#if L1_CACHE_BYTES >= 128
 209        CSUM_COPY_16_BYTES_WITHEX(4)
 210        CSUM_COPY_16_BYTES_WITHEX(5)
 211        CSUM_COPY_16_BYTES_WITHEX(6)
 212        CSUM_COPY_16_BYTES_WITHEX(7)
 213#endif
 214#endif
 215#endif
 216        bdnz    53b
 217        cmpwi   r0,0
 218        li      r3,4
 219        li      r7,0
 220        bne     114b
 221
 22263:     srwi.   r0,r5,2
 223        mtctr   r0
 224        beq     64f
 22530:     lwzu    r0,4(r4)
 226        adde    r12,r12,r0
 22731:     stwu    r0,4(r6)
 228        bdnz    30b
 229
 23064:     andi.   r0,r5,2
 231        beq+    65f
 23240:     lhz     r0,4(r4)
 233        addi    r4,r4,2
 23441:     sth     r0,4(r6)
 235        adde    r12,r12,r0
 236        addi    r6,r6,2
 23765:     andi.   r0,r5,1
 238        beq+    66f
 23950:     lbz     r0,4(r4)
 24051:     stb     r0,4(r6)
 241        slwi    r0,r0,8
 242        adde    r12,r12,r0
 24366:     addze   r3,r12
 244        addi    r1,r1,16
 245        beqlr+  cr7
 246        rlwinm  r3,r3,8,0,31    /* odd destination address: rotate one byte */
 247        blr
 248
 249/* read fault */
 250src_error:
 251        lwz     r7,12(r1)
 252        addi    r1,r1,16
 253        cmpwi   cr0,r7,0
 254        beqlr
 255        li      r0,-EFAULT
 256        stw     r0,0(r7)
 257        blr
 258/* write fault */
 259dst_error:
 260        lwz     r8,8(r1)
 261        addi    r1,r1,16
 262        cmpwi   cr0,r8,0
 263        beqlr
 264        li      r0,-EFAULT
 265        stw     r0,0(r8)
 266        blr
 267
 268        EX_TABLE(70b, src_error);
 269        EX_TABLE(71b, dst_error);
 270        EX_TABLE(72b, src_error);
 271        EX_TABLE(73b, dst_error);
 272        EX_TABLE(54b, dst_error);
 273
 274/*
 275 * this stuff handles faults in the cacheline loop and branches to either
 276 * src_error (if in read part) or dst_error (if in write part)
 277 */
 278        CSUM_COPY_16_BYTES_EXCODE(0)
 279#if L1_CACHE_BYTES >= 32
 280        CSUM_COPY_16_BYTES_EXCODE(1)
 281#if L1_CACHE_BYTES >= 64
 282        CSUM_COPY_16_BYTES_EXCODE(2)
 283        CSUM_COPY_16_BYTES_EXCODE(3)
 284#if L1_CACHE_BYTES >= 128
 285        CSUM_COPY_16_BYTES_EXCODE(4)
 286        CSUM_COPY_16_BYTES_EXCODE(5)
 287        CSUM_COPY_16_BYTES_EXCODE(6)
 288        CSUM_COPY_16_BYTES_EXCODE(7)
 289#endif
 290#endif
 291#endif
 292
 293        EX_TABLE(30b, src_error);
 294        EX_TABLE(31b, dst_error);
 295        EX_TABLE(40b, src_error);
 296        EX_TABLE(41b, dst_error);
 297        EX_TABLE(50b, src_error);
 298        EX_TABLE(51b, dst_error);
 299
 300EXPORT_SYMBOL(csum_partial_copy_generic)
 301
 302/*
 303 * __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
 304 *                         const struct in6_addr *daddr,
 305 *                         __u32 len, __u8 proto, __wsum sum)
 306 */
 307
 308_GLOBAL(csum_ipv6_magic)
 309        lwz     r8, 0(r3)
 310        lwz     r9, 4(r3)
 311        addc    r0, r7, r8
 312        lwz     r10, 8(r3)
 313        adde    r0, r0, r9
 314        lwz     r11, 12(r3)
 315        adde    r0, r0, r10
 316        lwz     r8, 0(r4)
 317        adde    r0, r0, r11
 318        lwz     r9, 4(r4)
 319        adde    r0, r0, r8
 320        lwz     r10, 8(r4)
 321        adde    r0, r0, r9
 322        lwz     r11, 12(r4)
 323        adde    r0, r0, r10
 324        add     r5, r5, r6      /* assumption: len + proto doesn't carry */
 325        adde    r0, r0, r11
 326        adde    r0, r0, r5
 327        addze   r0, r0
 328        rotlwi  r3, r0, 16
 329        add     r3, r0, r3
 330        not     r3, r3
 331        rlwinm  r3, r3, 16, 16, 31
 332        blr
 333EXPORT_SYMBOL(csum_ipv6_magic)
 334