linux/arch/ia64/lib/ip_fast_csum.S
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2/*
   3 * Optmized version of the ip_fast_csum() function
   4 * Used for calculating IP header checksum
   5 *
   6 * Return: 16bit checksum, complemented
   7 *
   8 * Inputs:
   9 *      in0: address of buffer to checksum (char *)
  10 *      in1: length of the buffer (int)
  11 *
  12 * Copyright (C) 2002, 2006 Intel Corp.
  13 * Copyright (C) 2002, 2006 Ken Chen <kenneth.w.chen@intel.com>
  14 */
  15
  16#include <asm/asmmacro.h>
  17#include <asm/export.h>
  18
  19/*
  20 * Since we know that most likely this function is called with buf aligned
  21 * on 4-byte boundary and 20 bytes in length, we can execution rather quickly
  22 * versus calling generic version of do_csum, which has lots of overhead in
  23 * handling various alignments and sizes.  However, due to lack of constrains
  24 * put on the function input argument, cases with alignment not on 4-byte or
  25 * size not equal to 20 bytes will be handled by the generic do_csum function.
  26 */
  27
  28#define in0     r32
  29#define in1     r33
  30#define in2     r34
  31#define in3     r35
  32#define in4     r36
  33#define ret0    r8
  34
  35GLOBAL_ENTRY(ip_fast_csum)
  36        .prologue
  37        .body
  38        cmp.ne  p6,p7=5,in1     // size other than 20 byte?
  39        and     r14=3,in0       // is it aligned on 4-byte?
  40        add     r15=4,in0       // second source pointer
  41        ;;
  42        cmp.ne.or.andcm p6,p7=r14,r0
  43        ;;
  44(p7)    ld4     r20=[in0],8
  45(p7)    ld4     r21=[r15],8
  46(p6)    br.spnt .generic
  47        ;;
  48        ld4     r22=[in0],8
  49        ld4     r23=[r15],8
  50        ;;
  51        ld4     r24=[in0]
  52        add     r20=r20,r21
  53        add     r22=r22,r23
  54        ;;
  55        add     r20=r20,r22
  56        ;;
  57        add     r20=r20,r24
  58        ;;
  59        shr.u   ret0=r20,16     // now need to add the carry
  60        zxt2    r20=r20
  61        ;;
  62        add     r20=ret0,r20
  63        ;;
  64        shr.u   ret0=r20,16     // add carry again
  65        zxt2    r20=r20
  66        ;;
  67        add     r20=ret0,r20
  68        ;;
  69        shr.u   ret0=r20,16
  70        zxt2    r20=r20
  71        ;;
  72        add     r20=ret0,r20
  73        mov     r9=0xffff
  74        ;;
  75        andcm   ret0=r9,r20
  76        .restore sp             // reset frame state
  77        br.ret.sptk.many b0
  78        ;;
  79
  80.generic:
  81        .prologue
  82        .save ar.pfs, r35
  83        alloc   r35=ar.pfs,2,2,2,0
  84        .save rp, r34
  85        mov     r34=b0
  86        .body
  87        dep.z   out1=in1,2,30
  88        mov     out0=in0
  89        ;;
  90        br.call.sptk.many b0=do_csum
  91        ;;
  92        andcm   ret0=-1,ret0
  93        mov     ar.pfs=r35
  94        mov     b0=r34
  95        br.ret.sptk.many b0
  96END(ip_fast_csum)
  97EXPORT_SYMBOL(ip_fast_csum)
  98
  99GLOBAL_ENTRY(csum_ipv6_magic)
 100        ld4     r20=[in0],4
 101        ld4     r21=[in1],4
 102        zxt4    in2=in2
 103        ;;
 104        ld4     r22=[in0],4
 105        ld4     r23=[in1],4
 106        dep     r15=in3,in2,32,16
 107        ;;
 108        ld4     r24=[in0],4
 109        ld4     r25=[in1],4
 110        mux1    r15=r15,@rev
 111        add     r16=r20,r21
 112        add     r17=r22,r23
 113        zxt4    in4=in4
 114        ;;
 115        ld4     r26=[in0],4
 116        ld4     r27=[in1],4
 117        shr.u   r15=r15,16
 118        add     r18=r24,r25
 119        add     r8=r16,r17
 120        ;;
 121        add     r19=r26,r27
 122        add     r8=r8,r18
 123        ;;
 124        add     r8=r8,r19
 125        add     r15=r15,in4
 126        ;;
 127        add     r8=r8,r15
 128        ;;
 129        shr.u   r10=r8,32       // now fold sum into short
 130        zxt4    r11=r8
 131        ;;
 132        add     r8=r10,r11
 133        ;;
 134        shr.u   r10=r8,16       // yeah, keep it rolling
 135        zxt2    r11=r8
 136        ;;
 137        add     r8=r10,r11
 138        ;;
 139        shr.u   r10=r8,16       // three times lucky
 140        zxt2    r11=r8
 141        ;;
 142        add     r8=r10,r11
 143        mov     r9=0xffff
 144        ;;
 145        andcm   r8=r9,r8
 146        br.ret.sptk.many b0
 147END(csum_ipv6_magic)
 148EXPORT_SYMBOL(csum_ipv6_magic)
 149