linux/arch/cris/arch-v10/lib/checksumcopy.S
<<
>>
Prefs
   1/*
   2 * A fast checksum+copy routine using movem
   3 * Copyright (c) 1998, 2001 Axis Communications AB
   4 *
   5 * Authors:     Bjorn Wesen
   6 * 
   7 * csum_partial_copy_nocheck(const char *src, char *dst,
   8 *                           int len, unsigned int sum)
   9 */
  10
  11        .globl  csum_partial_copy_nocheck
  12csum_partial_copy_nocheck:      
  13        
  14        ;; r10 - src
  15        ;; r11 - dst
  16        ;; r12 - length
  17        ;; r13 - checksum
  18
  19        ;; check for breakeven length between movem and normal word looping versions
  20        ;; we also do _NOT_ want to compute a checksum over more than the 
  21        ;; actual length when length < 40
  22        
  23        cmpu.w  80, $r12
  24        blo     _word_loop
  25        nop
  26
  27        ;; need to save the registers we use below in the movem loop
  28        ;; this overhead is why we have a check above for breakeven length
  29        ;; only r0 - r8 have to be saved, the other ones are clobber-able
  30        ;; according to the ABI
  31        
  32        subq    9*4, $sp
  33        movem   $r8, [$sp]
  34        
  35        ;; do a movem copy and checksum
  36
  37        subq    10*4, $r12      ; update length for the first loop
  38        
  39_mloop: movem   [$r10+],$r9     ; read 10 longwords
  401:      ;; A failing userspace access will have this as PC.
  41        movem   $r9,[$r11+]     ; write 10 longwords
  42
  43        ;; perform dword checksumming on the 10 longwords
  44        
  45        add.d   $r0,$r13
  46        ax
  47        add.d   $r1,$r13
  48        ax
  49        add.d   $r2,$r13
  50        ax
  51        add.d   $r3,$r13
  52        ax
  53        add.d   $r4,$r13
  54        ax
  55        add.d   $r5,$r13
  56        ax
  57        add.d   $r6,$r13
  58        ax
  59        add.d   $r7,$r13
  60        ax
  61        add.d   $r8,$r13
  62        ax
  63        add.d   $r9,$r13
  64
  65        ;; fold the carry into the checksum, to avoid having to loop the carry
  66        ;; back into the top
  67        
  68        ax
  69        addq    0,$r13
  70
  71        subq    10*4,$r12
  72        bge     _mloop
  73        nop
  74
  75        addq    10*4,$r12       ; compensate for last loop underflowing length
  76
  77        movem   [$sp+],$r8      ; restore regs
  78
  79_word_loop:
  80        ;; only fold if there is anything to fold.
  81
  82        cmpq    0,$r13
  83        beq     _no_fold
  84
  85        ;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below
  86        ;; r9 can be used as temporary.
  87        
  88        move.d  $r13,$r9
  89        lsrq    16,$r9          ; r0 = checksum >> 16
  90        and.d   0xffff,$r13     ; checksum = checksum & 0xffff
  91        add.d   $r9,$r13        ; checksum += r0
  92        
  93_no_fold:
  94        cmpq    2,$r12
  95        blt     _no_words
  96        nop
  97        
  98        ;; copy and checksum the rest of the words
  99        
 100        subq    2,$r12
 101        
 102_wloop: move.w  [$r10+],$r9
 1032:      ;; A failing userspace access will have this as PC.
 104        addu.w  $r9,$r13
 105        subq    2,$r12
 106        bge     _wloop
 107        move.w  $r9,[$r11+]
 108        
 109        addq    2,$r12
 110                
 111_no_words:
 112        ;; see if we have one odd byte more
 113        cmpq    1,$r12
 114        beq     _do_byte
 115        nop
 116        ret
 117        move.d  $r13, $r10
 118
 119_do_byte:       
 120        ;; copy and checksum the last byte
 121        move.b  [$r10],$r9
 1223:      ;; A failing userspace access will have this as PC.
 123        addu.b  $r9,$r13
 124        move.b  $r9,[$r11]
 125        ret
 126        move.d  $r13, $r10
 127