linux/arch/sh/lib64/memset.S
<<
>>
Prefs
   1/* Cloned and hacked for uClibc by Paul Mundt, December 2003 */
   2/* Modified by SuperH, Inc. September 2003 */
   3!
   4! Fast SH memset
   5!
   6! by Toshiyasu Morita (tm@netcom.com)
   7!
   8! SH5 code by J"orn Rennecke (joern.rennecke@superh.com)
   9! Copyright 2002 SuperH Ltd.
  10!
  11
  12#if __BYTE_ORDER == __LITTLE_ENDIAN
  13#define SHHI shlld
  14#define SHLO shlrd
  15#else
  16#define SHHI shlrd
  17#define SHLO shlld
  18#endif
  19
  20        .section .text..SHmedia32,"ax"
  21        .globl  memset
  22        .type   memset, @function
  23
  24        .align 5
  25
  26memset:
  27        pta/l multiquad, tr0
  28        andi r2, 7, r22
  29        ptabs r18, tr2
  30        mshflo.b r3,r3,r3
  31        add r4, r22, r23
  32        mperm.w r3, r63, r3     // Fill pattern now in every byte of r3
  33
  34        movi 8, r9
  35        bgtu/u r23, r9, tr0 // multiquad
  36
  37        beqi/u r4, 0, tr2       // Return with size 0 - ensures no mem accesses
  38        ldlo.q r2, 0, r7
  39        shlli r4, 2, r4
  40        movi -1, r8
  41        SHHI r8, r4, r8
  42        SHHI r8, r4, r8
  43        mcmv r7, r8, r3
  44        stlo.q r2, 0, r3
  45        blink tr2, r63
  46
  47multiquad:
  48        pta/l lastquad, tr0
  49        stlo.q r2, 0, r3
  50        shlri r23, 3, r24
  51        add r2, r4, r5
  52        beqi/u r24, 1, tr0 // lastquad
  53        pta/l loop, tr1
  54        sub r2, r22, r25
  55        andi r5, -8, r20   // calculate end address and
  56        addi r20, -7*8, r8 // loop end address; This might overflow, so we need
  57                           // to use a different test before we start the loop
  58        bge/u r24, r9, tr1 // loop
  59        st.q r25, 8, r3
  60        st.q r20, -8, r3
  61        shlri r24, 1, r24
  62        beqi/u r24, 1, tr0 // lastquad
  63        st.q r25, 16, r3
  64        st.q r20, -16, r3
  65        beqi/u r24, 2, tr0 // lastquad
  66        st.q r25, 24, r3
  67        st.q r20, -24, r3
  68lastquad:
  69        sthi.q r5, -1, r3
  70        blink tr2,r63
  71
  72loop:
  73!!!     alloco r25, 32  // QQQ comment out for short-term fix to SHUK #3895.
  74                        // QQQ commenting out is locically correct, but sub-optimal
  75                        // QQQ Sean McGoogan - 4th April 2003.
  76        st.q r25, 8, r3
  77        st.q r25, 16, r3
  78        st.q r25, 24, r3
  79        st.q r25, 32, r3
  80        addi r25, 32, r25
  81        bgeu/l r8, r25, tr1 // loop
  82
  83        st.q r20, -40, r3
  84        st.q r20, -32, r3
  85        st.q r20, -24, r3
  86        st.q r20, -16, r3
  87        st.q r20, -8, r3
  88        sthi.q r5, -1, r3
  89        blink tr2,r63
  90
  91        .size   memset,.-memset
  92