linux/arch/arm/lib/csumpartialcopygeneric.S
<<
>>
Prefs
   1/*
   2 *  linux/arch/arm/lib/csumpartialcopygeneric.S
   3 *
   4 *  Copyright (C) 1995-2001 Russell King
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 as
   8 * published by the Free Software Foundation.
   9 */
  10
  11/*
  12 * unsigned int
  13 * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
  14 *  r0 = src, r1 = dst, r2 = len, r3 = sum
  15 *  Returns : r0 = checksum
  16 *
  17 * Note that 'tst' and 'teq' preserve the carry flag.
  18 */
  19
  20src     .req    r0
  21dst     .req    r1
  22len     .req    r2
  23sum     .req    r3
  24
  25.Lzero:         mov     r0, sum
  26                load_regs
  27
  28                /*
  29                 * Align an unaligned destination pointer.  We know that
  30                 * we have >= 8 bytes here, so we don't need to check
  31                 * the length.  Note that the source pointer hasn't been
  32                 * aligned yet.
  33                 */
  34.Ldst_unaligned:
  35                tst     dst, #1
  36                beq     .Ldst_16bit
  37
  38                load1b  ip
  39                sub     len, len, #1
  40                adcs    sum, sum, ip, put_byte_1        @ update checksum
  41                strb    ip, [dst], #1
  42                tst     dst, #2
  43                moveq   pc, lr                  @ dst is now 32bit aligned
  44
  45.Ldst_16bit:    load2b  r8, ip
  46                sub     len, len, #2
  47                adcs    sum, sum, r8, put_byte_0
  48                strb    r8, [dst], #1
  49                adcs    sum, sum, ip, put_byte_1
  50                strb    ip, [dst], #1
  51                mov     pc, lr                  @ dst is now 32bit aligned
  52
  53                /*
  54                 * Handle 0 to 7 bytes, with any alignment of source and
  55                 * destination pointers.  Note that when we get here, C = 0
  56                 */
  57.Lless8:        teq     len, #0                 @ check for zero count
  58                beq     .Lzero
  59
  60                /* we must have at least one byte. */
  61                tst     dst, #1                 @ dst 16-bit aligned
  62                beq     .Lless8_aligned
  63
  64                /* Align dst */
  65                load1b  ip
  66                sub     len, len, #1
  67                adcs    sum, sum, ip, put_byte_1        @ update checksum
  68                strb    ip, [dst], #1
  69                tst     len, #6
  70                beq     .Lless8_byteonly
  71
  721:              load2b  r8, ip
  73                sub     len, len, #2
  74                adcs    sum, sum, r8, put_byte_0
  75                strb    r8, [dst], #1
  76                adcs    sum, sum, ip, put_byte_1
  77                strb    ip, [dst], #1
  78.Lless8_aligned:
  79                tst     len, #6
  80                bne     1b
  81.Lless8_byteonly:
  82                tst     len, #1
  83                beq     .Ldone
  84                load1b  r8
  85                adcs    sum, sum, r8, put_byte_0        @ update checksum
  86                strb    r8, [dst], #1
  87                b       .Ldone
  88
  89FN_ENTRY
  90                save_regs
  91
  92                cmp     len, #8                 @ Ensure that we have at least
  93                blo     .Lless8                 @ 8 bytes to copy.
  94
  95                adds    sum, sum, #0            @ C = 0
  96                tst     dst, #3                 @ Test destination alignment
  97                blne    .Ldst_unaligned         @ align destination, return here
  98
  99                /*
 100                 * Ok, the dst pointer is now 32bit aligned, and we know
 101                 * that we must have more than 4 bytes to copy.  Note
 102                 * that C contains the carry from the dst alignment above.
 103                 */
 104
 105                tst     src, #3                 @ Test source alignment
 106                bne     .Lsrc_not_aligned
 107
 108                /* Routine for src & dst aligned */
 109
 110                bics    ip, len, #15
 111                beq     2f
 112
 1131:              load4l  r4, r5, r6, r7
 114                stmia   dst!, {r4, r5, r6, r7}
 115                adcs    sum, sum, r4
 116                adcs    sum, sum, r5
 117                adcs    sum, sum, r6
 118                adcs    sum, sum, r7
 119                sub     ip, ip, #16
 120                teq     ip, #0
 121                bne     1b
 122
 1232:              ands    ip, len, #12
 124                beq     4f
 125                tst     ip, #8
 126                beq     3f
 127                load2l  r4, r5
 128                stmia   dst!, {r4, r5}
 129                adcs    sum, sum, r4
 130                adcs    sum, sum, r5
 131                tst     ip, #4
 132                beq     4f
 133
 1343:              load1l  r4
 135                str     r4, [dst], #4
 136                adcs    sum, sum, r4
 137
 1384:              ands    len, len, #3
 139                beq     .Ldone
 140                load1l  r4
 141                tst     len, #2
 142                mov     r5, r4, get_byte_0
 143                beq     .Lexit
 144                adcs    sum, sum, r4, push #16
 145                strb    r5, [dst], #1
 146                mov     r5, r4, get_byte_1
 147                strb    r5, [dst], #1
 148                mov     r5, r4, get_byte_2
 149.Lexit:         tst     len, #1
 150                strneb  r5, [dst], #1
 151                andne   r5, r5, #255
 152                adcnes  sum, sum, r5, put_byte_0
 153
 154                /*
 155                 * If the dst pointer was not 16-bit aligned, we
 156                 * need to rotate the checksum here to get around
 157                 * the inefficient byte manipulations in the
 158                 * architecture independent code.
 159                 */
 160.Ldone:         adc     r0, sum, #0
 161                ldr     sum, [sp, #0]           @ dst
 162                tst     sum, #1
 163                movne   r0, r0, ror #8
 164                load_regs
 165
 166.Lsrc_not_aligned:
 167                adc     sum, sum, #0            @ include C from dst alignment
 168                and     ip, src, #3
 169                bic     src, src, #3
 170                load1l  r5
 171                cmp     ip, #2
 172                beq     .Lsrc2_aligned
 173                bhi     .Lsrc3_aligned
 174                mov     r4, r5, pull #8         @ C = 0
 175                bics    ip, len, #15
 176                beq     2f
 1771:              load4l  r5, r6, r7, r8
 178                orr     r4, r4, r5, push #24
 179                mov     r5, r5, pull #8
 180                orr     r5, r5, r6, push #24
 181                mov     r6, r6, pull #8
 182                orr     r6, r6, r7, push #24
 183                mov     r7, r7, pull #8
 184                orr     r7, r7, r8, push #24
 185                stmia   dst!, {r4, r5, r6, r7}
 186                adcs    sum, sum, r4
 187                adcs    sum, sum, r5
 188                adcs    sum, sum, r6
 189                adcs    sum, sum, r7
 190                mov     r4, r8, pull #8
 191                sub     ip, ip, #16
 192                teq     ip, #0
 193                bne     1b
 1942:              ands    ip, len, #12
 195                beq     4f
 196                tst     ip, #8
 197                beq     3f
 198                load2l  r5, r6
 199                orr     r4, r4, r5, push #24
 200                mov     r5, r5, pull #8
 201                orr     r5, r5, r6, push #24
 202                stmia   dst!, {r4, r5}
 203                adcs    sum, sum, r4
 204                adcs    sum, sum, r5
 205                mov     r4, r6, pull #8
 206                tst     ip, #4
 207                beq     4f
 2083:              load1l  r5
 209                orr     r4, r4, r5, push #24
 210                str     r4, [dst], #4
 211                adcs    sum, sum, r4
 212                mov     r4, r5, pull #8
 2134:              ands    len, len, #3
 214                beq     .Ldone
 215                mov     r5, r4, get_byte_0
 216                tst     len, #2
 217                beq     .Lexit
 218                adcs    sum, sum, r4, push #16
 219                strb    r5, [dst], #1
 220                mov     r5, r4, get_byte_1
 221                strb    r5, [dst], #1
 222                mov     r5, r4, get_byte_2
 223                b       .Lexit
 224
 225.Lsrc2_aligned: mov     r4, r5, pull #16
 226                adds    sum, sum, #0
 227                bics    ip, len, #15
 228                beq     2f
 2291:              load4l  r5, r6, r7, r8
 230                orr     r4, r4, r5, push #16
 231                mov     r5, r5, pull #16
 232                orr     r5, r5, r6, push #16
 233                mov     r6, r6, pull #16
 234                orr     r6, r6, r7, push #16
 235                mov     r7, r7, pull #16
 236                orr     r7, r7, r8, push #16
 237                stmia   dst!, {r4, r5, r6, r7}
 238                adcs    sum, sum, r4
 239                adcs    sum, sum, r5
 240                adcs    sum, sum, r6
 241                adcs    sum, sum, r7
 242                mov     r4, r8, pull #16
 243                sub     ip, ip, #16
 244                teq     ip, #0
 245                bne     1b
 2462:              ands    ip, len, #12
 247                beq     4f
 248                tst     ip, #8
 249                beq     3f
 250                load2l  r5, r6
 251                orr     r4, r4, r5, push #16
 252                mov     r5, r5, pull #16
 253                orr     r5, r5, r6, push #16
 254                stmia   dst!, {r4, r5}
 255                adcs    sum, sum, r4
 256                adcs    sum, sum, r5
 257                mov     r4, r6, pull #16
 258                tst     ip, #4
 259                beq     4f
 2603:              load1l  r5
 261                orr     r4, r4, r5, push #16
 262                str     r4, [dst], #4
 263                adcs    sum, sum, r4
 264                mov     r4, r5, pull #16
 2654:              ands    len, len, #3
 266                beq     .Ldone
 267                mov     r5, r4, get_byte_0
 268                tst     len, #2
 269                beq     .Lexit
 270                adcs    sum, sum, r4
 271                strb    r5, [dst], #1
 272                mov     r5, r4, get_byte_1
 273                strb    r5, [dst], #1
 274                tst     len, #1
 275                beq     .Ldone
 276                load1b  r5
 277                b       .Lexit
 278
 279.Lsrc3_aligned: mov     r4, r5, pull #24
 280                adds    sum, sum, #0
 281                bics    ip, len, #15
 282                beq     2f
 2831:              load4l  r5, r6, r7, r8
 284                orr     r4, r4, r5, push #8
 285                mov     r5, r5, pull #24
 286                orr     r5, r5, r6, push #8
 287                mov     r6, r6, pull #24
 288                orr     r6, r6, r7, push #8
 289                mov     r7, r7, pull #24
 290                orr     r7, r7, r8, push #8
 291                stmia   dst!, {r4, r5, r6, r7}
 292                adcs    sum, sum, r4
 293                adcs    sum, sum, r5
 294                adcs    sum, sum, r6
 295                adcs    sum, sum, r7
 296                mov     r4, r8, pull #24
 297                sub     ip, ip, #16
 298                teq     ip, #0
 299                bne     1b
 3002:              ands    ip, len, #12
 301                beq     4f
 302                tst     ip, #8
 303                beq     3f
 304                load2l  r5, r6
 305                orr     r4, r4, r5, push #8
 306                mov     r5, r5, pull #24
 307                orr     r5, r5, r6, push #8
 308                stmia   dst!, {r4, r5}
 309                adcs    sum, sum, r4
 310                adcs    sum, sum, r5
 311                mov     r4, r6, pull #24
 312                tst     ip, #4
 313                beq     4f
 3143:              load1l  r5
 315                orr     r4, r4, r5, push #8
 316                str     r4, [dst], #4
 317                adcs    sum, sum, r4
 318                mov     r4, r5, pull #24
 3194:              ands    len, len, #3
 320                beq     .Ldone
 321                mov     r5, r4, get_byte_0
 322                tst     len, #2
 323                beq     .Lexit
 324                strb    r5, [dst], #1
 325                adcs    sum, sum, r4
 326                load1l  r4
 327                mov     r5, r4, get_byte_0
 328                strb    r5, [dst], #1
 329                adcs    sum, sum, r4, push #24
 330                mov     r5, r4, get_byte_1
 331                b       .Lexit
 332FN_EXIT
 333