linux/arch/ia64/lib/ip_fast_csum.S
<<
>>
Prefs
   1/*
   2 * Optmized version of the ip_fast_csum() function
   3 * Used for calculating IP header checksum
   4 *
   5 * Return: 16bit checksum, complemented
   6 *
   7 * Inputs:
   8 *      in0: address of buffer to checksum (char *)
   9 *      in1: length of the buffer (int)
  10 *
  11 * Copyright (C) 2002, 2006 Intel Corp.
  12 * Copyright (C) 2002, 2006 Ken Chen <kenneth.w.chen@intel.com>
  13 */
  14
  15#include <asm/asmmacro.h>
  16#include <asm/export.h>
  17
  18/*
  19 * Since we know that most likely this function is called with buf aligned
  20 * on 4-byte boundary and 20 bytes in length, we can execution rather quickly
  21 * versus calling generic version of do_csum, which has lots of overhead in
  22 * handling various alignments and sizes.  However, due to lack of constrains
  23 * put on the function input argument, cases with alignment not on 4-byte or
  24 * size not equal to 20 bytes will be handled by the generic do_csum function.
  25 */
  26
  27#define in0     r32
  28#define in1     r33
  29#define in2     r34
  30#define in3     r35
  31#define in4     r36
  32#define ret0    r8
  33
  34GLOBAL_ENTRY(ip_fast_csum)
  35        .prologue
  36        .body
  37        cmp.ne  p6,p7=5,in1     // size other than 20 byte?
  38        and     r14=3,in0       // is it aligned on 4-byte?
  39        add     r15=4,in0       // second source pointer
  40        ;;
  41        cmp.ne.or.andcm p6,p7=r14,r0
  42        ;;
  43(p7)    ld4     r20=[in0],8
  44(p7)    ld4     r21=[r15],8
  45(p6)    br.spnt .generic
  46        ;;
  47        ld4     r22=[in0],8
  48        ld4     r23=[r15],8
  49        ;;
  50        ld4     r24=[in0]
  51        add     r20=r20,r21
  52        add     r22=r22,r23
  53        ;;
  54        add     r20=r20,r22
  55        ;;
  56        add     r20=r20,r24
  57        ;;
  58        shr.u   ret0=r20,16     // now need to add the carry
  59        zxt2    r20=r20
  60        ;;
  61        add     r20=ret0,r20
  62        ;;
  63        shr.u   ret0=r20,16     // add carry again
  64        zxt2    r20=r20
  65        ;;
  66        add     r20=ret0,r20
  67        ;;
  68        shr.u   ret0=r20,16
  69        zxt2    r20=r20
  70        ;;
  71        add     r20=ret0,r20
  72        mov     r9=0xffff
  73        ;;
  74        andcm   ret0=r9,r20
  75        .restore sp             // reset frame state
  76        br.ret.sptk.many b0
  77        ;;
  78
  79.generic:
  80        .prologue
  81        .save ar.pfs, r35
  82        alloc   r35=ar.pfs,2,2,2,0
  83        .save rp, r34
  84        mov     r34=b0
  85        .body
  86        dep.z   out1=in1,2,30
  87        mov     out0=in0
  88        ;;
  89        br.call.sptk.many b0=do_csum
  90        ;;
  91        andcm   ret0=-1,ret0
  92        mov     ar.pfs=r35
  93        mov     b0=r34
  94        br.ret.sptk.many b0
  95END(ip_fast_csum)
  96EXPORT_SYMBOL(ip_fast_csum)
  97
  98GLOBAL_ENTRY(csum_ipv6_magic)
  99        ld4     r20=[in0],4
 100        ld4     r21=[in1],4
 101        zxt4    in2=in2
 102        ;;
 103        ld4     r22=[in0],4
 104        ld4     r23=[in1],4
 105        dep     r15=in3,in2,32,16
 106        ;;
 107        ld4     r24=[in0],4
 108        ld4     r25=[in1],4
 109        mux1    r15=r15,@rev
 110        add     r16=r20,r21
 111        add     r17=r22,r23
 112        zxt4    in4=in4
 113        ;;
 114        ld4     r26=[in0],4
 115        ld4     r27=[in1],4
 116        shr.u   r15=r15,16
 117        add     r18=r24,r25
 118        add     r8=r16,r17
 119        ;;
 120        add     r19=r26,r27
 121        add     r8=r8,r18
 122        ;;
 123        add     r8=r8,r19
 124        add     r15=r15,in4
 125        ;;
 126        add     r8=r8,r15
 127        ;;
 128        shr.u   r10=r8,32       // now fold sum into short
 129        zxt4    r11=r8
 130        ;;
 131        add     r8=r10,r11
 132        ;;
 133        shr.u   r10=r8,16       // yeah, keep it rolling
 134        zxt2    r11=r8
 135        ;;
 136        add     r8=r10,r11
 137        ;;
 138        shr.u   r10=r8,16       // three times lucky
 139        zxt2    r11=r8
 140        ;;
 141        add     r8=r10,r11
 142        mov     r9=0xffff
 143        ;;
 144        andcm   r8=r9,r8
 145        br.ret.sptk.many b0
 146END(csum_ipv6_magic)
 147EXPORT_SYMBOL(csum_ipv6_magic)
 148