linux/arch/ia64/lib/ip_fast_csum.S
<<
>>
Prefs
   1/*
   2 * Optmized version of the ip_fast_csum() function
   3 * Used for calculating IP header checksum
   4 *
   5 * Return: 16bit checksum, complemented
   6 *
   7 * Inputs:
   8 *      in0: address of buffer to checksum (char *)
   9 *      in1: length of the buffer (int)
  10 *
  11 * Copyright (C) 2002, 2006 Intel Corp.
  12 * Copyright (C) 2002, 2006 Ken Chen <kenneth.w.chen@intel.com>
  13 */
  14
  15#include <asm/asmmacro.h>
  16
  17/*
  18 * Since we know that most likely this function is called with buf aligned
  19 * on 4-byte boundary and 20 bytes in length, we can execution rather quickly
  20 * versus calling generic version of do_csum, which has lots of overhead in
  21 * handling various alignments and sizes.  However, due to lack of constrains
  22 * put on the function input argument, cases with alignment not on 4-byte or
  23 * size not equal to 20 bytes will be handled by the generic do_csum function.
  24 */
  25
  26#define in0     r32
  27#define in1     r33
  28#define in2     r34
  29#define in3     r35
  30#define in4     r36
  31#define ret0    r8
  32
  33GLOBAL_ENTRY(ip_fast_csum)
  34        .prologue
  35        .body
  36        cmp.ne  p6,p7=5,in1     // size other than 20 byte?
  37        and     r14=3,in0       // is it aligned on 4-byte?
  38        add     r15=4,in0       // second source pointer
  39        ;;
  40        cmp.ne.or.andcm p6,p7=r14,r0
  41        ;;
  42(p7)    ld4     r20=[in0],8
  43(p7)    ld4     r21=[r15],8
  44(p6)    br.spnt .generic
  45        ;;
  46        ld4     r22=[in0],8
  47        ld4     r23=[r15],8
  48        ;;
  49        ld4     r24=[in0]
  50        add     r20=r20,r21
  51        add     r22=r22,r23
  52        ;;
  53        add     r20=r20,r22
  54        ;;
  55        add     r20=r20,r24
  56        ;;
  57        shr.u   ret0=r20,16     // now need to add the carry
  58        zxt2    r20=r20
  59        ;;
  60        add     r20=ret0,r20
  61        ;;
  62        shr.u   ret0=r20,16     // add carry again
  63        zxt2    r20=r20
  64        ;;
  65        add     r20=ret0,r20
  66        ;;
  67        shr.u   ret0=r20,16
  68        zxt2    r20=r20
  69        ;;
  70        add     r20=ret0,r20
  71        mov     r9=0xffff
  72        ;;
  73        andcm   ret0=r9,r20
  74        .restore sp             // reset frame state
  75        br.ret.sptk.many b0
  76        ;;
  77
  78.generic:
  79        .prologue
  80        .save ar.pfs, r35
  81        alloc   r35=ar.pfs,2,2,2,0
  82        .save rp, r34
  83        mov     r34=b0
  84        .body
  85        dep.z   out1=in1,2,30
  86        mov     out0=in0
  87        ;;
  88        br.call.sptk.many b0=do_csum
  89        ;;
  90        andcm   ret0=-1,ret0
  91        mov     ar.pfs=r35
  92        mov     b0=r34
  93        br.ret.sptk.many b0
  94END(ip_fast_csum)
  95
  96GLOBAL_ENTRY(csum_ipv6_magic)
  97        ld4     r20=[in0],4
  98        ld4     r21=[in1],4
  99        zxt4    in2=in2
 100        ;;
 101        ld4     r22=[in0],4
 102        ld4     r23=[in1],4
 103        dep     r15=in3,in2,32,16
 104        ;;
 105        ld4     r24=[in0],4
 106        ld4     r25=[in1],4
 107        mux1    r15=r15,@rev
 108        add     r16=r20,r21
 109        add     r17=r22,r23
 110        zxt4    in4=in4
 111        ;;
 112        ld4     r26=[in0],4
 113        ld4     r27=[in1],4
 114        shr.u   r15=r15,16
 115        add     r18=r24,r25
 116        add     r8=r16,r17
 117        ;;
 118        add     r19=r26,r27
 119        add     r8=r8,r18
 120        ;;
 121        add     r8=r8,r19
 122        add     r15=r15,in4
 123        ;;
 124        add     r8=r8,r15
 125        ;;
 126        shr.u   r10=r8,32       // now fold sum into short
 127        zxt4    r11=r8
 128        ;;
 129        add     r8=r10,r11
 130        ;;
 131        shr.u   r10=r8,16       // yeah, keep it rolling
 132        zxt2    r11=r8
 133        ;;
 134        add     r8=r10,r11
 135        ;;
 136        shr.u   r10=r8,16       // three times lucky
 137        zxt2    r11=r8
 138        ;;
 139        add     r8=r10,r11
 140        mov     r9=0xffff
 141        ;;
 142        andcm   r8=r9,r8
 143        br.ret.sptk.many b0
 144END(csum_ipv6_magic)
 145