linux/arch/blackfin/lib/memcpy.S
<<
>>
Prefs
   1/*
   2 * internal version of memcpy(), issued by the compiler to copy blocks of
   3 * data around. This is really memmove() - it has to be able to deal with
   4 * possible overlaps, because that ambiguity is when the compiler gives up
   5 * and calls a function. We have our own, internal version so that we get
   6 * something we trust, even if the user has redefined the normal symbol.
   7 *
   8 * Copyright 2004-2009 Analog Devices Inc.
   9 *
  10 * Licensed under the Clear BSD license or the GPL-2 (or later)
  11 */
  12
  13#include <linux/linkage.h>
  14
  15/* void *memcpy(void *dest, const void *src, size_t n);
  16 * R0 = To Address (dest) (leave unchanged to form result)
  17 * R1 = From Address (src)
  18 * R2 = count
  19 *
  20 * Note: Favours word alignment
  21 */
  22
  23#ifdef CONFIG_MEMCPY_L1
  24.section .l1.text
  25#else
  26.text
  27#endif
  28
  29.align 2
  30
  31ENTRY(_memcpy)
  32        CC = R2 <=  0;  /* length not positive? */
  33        IF CC JUMP .L_P1L2147483647;    /* Nothing to do */
  34
  35        P0 = R0 ;       /* dst*/
  36        P1 = R1 ;       /* src*/
  37        P2 = R2 ;       /* length */
  38
  39        /* check for overlapping data */
  40        CC = R1 < R0;   /* src < dst */
  41        IF !CC JUMP .Lno_overlap;
  42        R3 = R1 + R2;
  43        CC = R0 < R3;   /* and dst < src+len */
  44        IF CC JUMP .Lhas_overlap;
  45
  46.Lno_overlap:
  47        /* Check for aligned data.*/
  48
  49        R3 = R1 | R0;
  50        R1 = 0x3;
  51        R3 = R3 & R1;
  52        CC = R3;        /* low bits set on either address? */
  53        IF CC JUMP .Lnot_aligned;
  54
  55        /* Both addresses are word-aligned, so we can copy
  56        at least part of the data using word copies.*/
  57        P2 = P2 >> 2;
  58        CC = P2 <= 2;
  59        IF !CC JUMP .Lmore_than_seven;
  60        /* less than eight bytes... */
  61        P2 = R2;
  62        LSETUP(.Lthree_start, .Lthree_end) LC0=P2;
  63.Lthree_start:
  64        R3 = B[P1++] (X);
  65.Lthree_end:
  66        B[P0++] = R3;
  67
  68        RTS;
  69
  70.Lmore_than_seven:
  71        /* There's at least eight bytes to copy. */
  72        P2 += -1;       /* because we unroll one iteration */
  73        LSETUP(.Lword_loops, .Lword_loope) LC0=P2;
  74        I1 = P1;
  75        R3 = [I1++];
  76#if ANOMALY_05000202
  77.Lword_loops:
  78        [P0++] = R3;
  79.Lword_loope:
  80        R3 = [I1++];
  81#else
  82.Lword_loops:
  83.Lword_loope:
  84        MNOP || [P0++] = R3 || R3 = [I1++];
  85#endif
  86        [P0++] = R3;
  87        /* Any remaining bytes to copy? */
  88        R3 = 0x3;
  89        R3 = R2 & R3;
  90        CC = R3 == 0;
  91        P1 = I1;        /* in case there's something left, */
  92        IF !CC JUMP .Lbytes_left;
  93        RTS;
  94.Lbytes_left:   P2 = R3;
  95.Lnot_aligned:
  96        /* From here, we're copying byte-by-byte. */
  97        LSETUP (.Lbyte_start, .Lbyte_end) LC0=P2;
  98.Lbyte_start:
  99        R1 = B[P1++] (X);
 100.Lbyte_end:
 101        B[P0++] = R1;
 102
 103.L_P1L2147483647:
 104        RTS;
 105
 106.Lhas_overlap:
 107        /* Need to reverse the copying, because the
 108         * dst would clobber the src.
 109         * Don't bother to work out alignment for
 110         * the reverse case.
 111         */
 112        P0 = P0 + P2;
 113        P0 += -1;
 114        P1 = P1 + P2;
 115        P1 += -1;
 116        LSETUP(.Lover_start, .Lover_end) LC0=P2;
 117.Lover_start:
 118        R1 = B[P1--] (X);
 119.Lover_end:
 120        B[P0--] = R1;
 121
 122        RTS;
 123
 124ENDPROC(_memcpy)
 125