linux/arch/mn10300/lib/do_csum.S
<<
>>
Prefs
   1/* Optimised simple memory checksum
   2 *
   3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
   4 * Written by David Howells (dhowells@redhat.com)
   5 *
   6 * This program is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU General Public Licence
   8 * as published by the Free Software Foundation; either version
   9 * 2 of the Licence, or (at your option) any later version.
  10 */
  11#include <asm/cache.h>
  12
  13        .section .text
  14        .balign L1_CACHE_BYTES
  15
  16###############################################################################
  17#
  18# unsigned int do_csum(const unsigned char *buff, int len)
  19#
  20###############################################################################
  21        .globl  do_csum
  22        .type   do_csum,@function
  23do_csum:
  24        movm    [d2,d3],(sp)
  25        mov     d1,d2                           # count
  26        mov     d0,a0                           # buff
  27        mov     a0,a1
  28        clr     d1                              # accumulator
  29
  30        cmp     +0,d2
  31        ble     do_csum_done                    # check for zero length or negative
  32
  33        # 4-byte align the buffer pointer
  34        btst    +3,a0
  35        beq     do_csum_now_4b_aligned
  36
  37        btst    +1,a0
  38        beq     do_csum_addr_not_odd
  39        movbu   (a0),d0
  40        inc     a0
  41        asl     +8,d0
  42        add     d0,d1
  43        add     -1,d2
  44
  45do_csum_addr_not_odd:
  46        cmp     +2,d2
  47        bcs     do_csum_fewer_than_4
  48        btst    +2,a0
  49        beq     do_csum_now_4b_aligned
  50        movhu   (a0+),d0
  51        add     d0,d1
  52        add     -2,d2
  53        cmp     +4,d2
  54        bcs     do_csum_fewer_than_4
  55
  56do_csum_now_4b_aligned:
  57        # we want to checksum as much as we can in chunks of 32 bytes
  58        cmp     +31,d2
  59        bls     do_csum_remainder               # 4-byte aligned remainder
  60
  61        add     -32,d2
  62        mov     +32,d3
  63
  64do_csum_loop:
  65        mov     (a0+),d0
  66        mov     (a0+),e0
  67        mov     (a0+),e1
  68        mov     (a0+),e3
  69        add     d0,d1
  70        addc    e0,d1
  71        addc    e1,d1
  72        addc    e3,d1
  73        mov     (a0+),d0
  74        mov     (a0+),e0
  75        mov     (a0+),e1
  76        mov     (a0+),e3
  77        addc    d0,d1
  78        addc    e0,d1
  79        addc    e1,d1
  80        addc    e3,d1
  81        addc    +0,d1
  82
  83        sub     d3,d2
  84        bcc     do_csum_loop
  85
  86        add     d3,d2
  87        beq     do_csum_done
  88
  89do_csum_remainder:
  90        # cut 16-31 bytes down to 0-15
  91        cmp     +16,d2
  92        bcs     do_csum_fewer_than_16
  93        mov     (a0+),d0
  94        mov     (a0+),e0
  95        mov     (a0+),e1
  96        mov     (a0+),e3
  97        add     d0,d1
  98        addc    e0,d1
  99        addc    e1,d1
 100        addc    e3,d1
 101        addc    +0,d1
 102        add     -16,d2
 103        beq     do_csum_done
 104
 105do_csum_fewer_than_16:
 106        # copy the remaining whole words
 107        cmp     +4,d2
 108        bcs     do_csum_fewer_than_4
 109        cmp     +8,d2
 110        bcs     do_csum_one_word
 111        cmp     +12,d2
 112        bcs     do_csum_two_words
 113        mov     (a0+),d0
 114        add     d0,d1
 115        addc    +0,d1
 116do_csum_two_words:
 117        mov     (a0+),d0
 118        add     d0,d1
 119        addc    +0,d1
 120do_csum_one_word:
 121        mov     (a0+),d0
 122        add     d0,d1
 123        addc    +0,d1
 124
 125do_csum_fewer_than_4:
 126        and     +3,d2
 127        beq     do_csum_done
 128        xor_cmp d0,d0,+2,d2
 129        bcs     do_csum_fewer_than_2
 130        movhu   (a0+),d0
 131        and     +1,d2
 132        beq     do_csum_add_last_bit
 133do_csum_fewer_than_2:
 134        movbu   (a0),d3
 135        add     d3,d0
 136do_csum_add_last_bit:
 137        add     d0,d1
 138        addc    +0,d1
 139
 140do_csum_done:
 141        # compress the checksum down to 16 bits
 142        mov     +0xffff0000,d0
 143        and     d1,d0
 144        asl     +16,d1
 145        add     d1,d0
 146        addc    +0xffff,d0
 147        lsr     +16,d0
 148
 149        # flip the halves of the word result if the buffer was oddly aligned
 150        and     +1,a1
 151        beq     do_csum_not_oddly_aligned
 152        swaph   d0,d0                           # exchange bits 15:8 with 7:0
 153
 154do_csum_not_oddly_aligned:
 155        ret     [d2,d3],8
 156
 157        .size   do_csum, .-do_csum
 158