linux/arch/cris/arch-v10/lib/checksum.S
<<
>>
Prefs
   1/*
   2 * A fast checksum routine using movem
   3 * Copyright (c) 1998-2001 Axis Communications AB
   4 *
   5 * csum_partial(const unsigned char * buff, int len, unsigned int sum)
   6 */
   7
   8        .globl  csum_partial
   9csum_partial:
  10        
  11        ;; r10 - src
  12        ;; r11 - length
  13        ;; r12 - checksum
  14
  15        ;; check for breakeven length between movem and normal word looping versions
  16        ;; we also do _NOT_ want to compute a checksum over more than the 
  17        ;; actual length when length < 40
  18        
  19        cmpu.w  80,$r11
  20        blo     _word_loop
  21        nop
  22
  23        ;; need to save the registers we use below in the movem loop
  24        ;; this overhead is why we have a check above for breakeven length
  25        ;; only r0 - r8 have to be saved, the other ones are clobber-able
  26        ;; according to the ABI
  27        
  28        subq    9*4,$sp
  29        movem   $r8,[$sp]
  30        
  31        ;; do a movem checksum
  32
  33        subq    10*4,$r11       ; update length for the first loop
  34        
  35_mloop: movem   [$r10+],$r9     ; read 10 longwords
  36
  37        ;; perform dword checksumming on the 10 longwords
  38        
  39        add.d   $r0,$r12
  40        ax
  41        add.d   $r1,$r12
  42        ax
  43        add.d   $r2,$r12
  44        ax
  45        add.d   $r3,$r12
  46        ax
  47        add.d   $r4,$r12
  48        ax
  49        add.d   $r5,$r12
  50        ax
  51        add.d   $r6,$r12
  52        ax
  53        add.d   $r7,$r12
  54        ax
  55        add.d   $r8,$r12
  56        ax
  57        add.d   $r9,$r12
  58
  59        ;; fold the carry into the checksum, to avoid having to loop the carry
  60        ;; back into the top
  61        
  62        ax
  63        addq    0,$r12
  64
  65        subq    10*4,$r11
  66        bge     _mloop
  67        nop
  68
  69        addq    10*4,$r11       ; compensate for last loop underflowing length
  70
  71        movem   [$sp+],$r8      ; restore regs
  72
  73_word_loop:
  74        ;; only fold if there is anything to fold.
  75
  76        cmpq    0,$r12
  77        beq     _no_fold
  78
  79        ;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below.
  80        ;; r9 and r13 can be used as temporaries.
  81        
  82        moveq   -1,$r9          ; put 0xffff in r9, faster than move.d 0xffff,r9
  83        lsrq    16,$r9
  84        
  85        move.d  $r12,$r13
  86        lsrq    16,$r13         ; r13 = checksum >> 16
  87        and.d   $r9,$r12                ; checksum = checksum & 0xffff
  88        add.d   $r13,$r12               ; checksum += r13
  89
  90_no_fold:
  91        cmpq    2,$r11
  92        blt     _no_words
  93        nop
  94        
  95        ;; checksum the rest of the words
  96        
  97        subq    2,$r11
  98        
  99_wloop: subq    2,$r11
 100        bge     _wloop
 101        addu.w  [$r10+],$r12
 102        
 103        addq    2,$r11
 104                
 105_no_words:
 106        ;; see if we have one odd byte more
 107        cmpq    1,$r11
 108        beq     _do_byte
 109        nop
 110        ret
 111        move.d  $r12, $r10
 112
 113_do_byte:       
 114        ;; copy and checksum the last byte
 115        addu.b  [$r10],$r12
 116        ret
 117        move.d  $r12, $r10
 118                
 119