linux/arch/alpha/lib/memset.S
<<
>>
Prefs
   1/*
   2 * linux/arch/alpha/lib/memset.S
   3 *
   4 * This is an efficient (and small) implementation of the C library "memset()"
   5 * function for the alpha.
   6 *
   7 *      (C) Copyright 1996 Linus Torvalds
   8 *
   9 * This routine is "moral-ware": you are free to use it any way you wish, and
  10 * the only obligation I put on you is a moral one: if you make any improvements
  11 * to the routine, please send me your improvements for me to use similarly.
  12 *
  13 * The scheduling comments are according to the EV5 documentation (and done by 
  14 * hand, so they might well be incorrect, please do tell me about it..)
  15 */
  16#include <asm/export.h>
  17        .set noat
  18        .set noreorder
  19.text
  20        .globl memset
  21        .globl __memset
  22        .globl ___memset
  23        .globl __memsetw
  24        .globl __constant_c_memset
  25
  26        .ent ___memset
  27.align 5
  28___memset:
  29        .frame $30,0,$26,0
  30        .prologue 0
  31
  32        and $17,255,$1          /* E1 */
  33        insbl $17,1,$17         /* .. E0 */
  34        bis $17,$1,$17          /* E0 (p-c latency, next cycle) */
  35        sll $17,16,$1           /* E1 (p-c latency, next cycle) */
  36
  37        bis $17,$1,$17          /* E0 (p-c latency, next cycle) */
  38        sll $17,32,$1           /* E1 (p-c latency, next cycle) */
  39        bis $17,$1,$17          /* E0 (p-c latency, next cycle) */
  40        ldq_u $31,0($30)        /* .. E1 */
  41
  42.align 5
  43__constant_c_memset:
  44        addq $18,$16,$6         /* E0 */
  45        bis $16,$16,$0          /* .. E1 */
  46        xor $16,$6,$1           /* E0 */
  47        ble $18,end             /* .. E1 */
  48
  49        bic $1,7,$1             /* E0 */
  50        beq $1,within_one_quad  /* .. E1 (note EV5 zero-latency forwarding) */
  51        and $16,7,$3            /* E0 */
  52        beq $3,aligned          /* .. E1 (note EV5 zero-latency forwarding) */
  53
  54        ldq_u $4,0($16)         /* E0 */
  55        bis $16,$16,$5          /* .. E1 */
  56        insql $17,$16,$2        /* E0 */
  57        subq $3,8,$3            /* .. E1 */
  58
  59        addq $18,$3,$18         /* E0   $18 is new count ($3 is negative) */
  60        mskql $4,$16,$4         /* .. E1 (and possible load stall) */
  61        subq $16,$3,$16         /* E0   $16 is new aligned destination */
  62        bis $2,$4,$1            /* .. E1 */
  63
  64        bis $31,$31,$31         /* E0 */
  65        ldq_u $31,0($30)        /* .. E1 */
  66        stq_u $1,0($5)          /* E0 */
  67        bis $31,$31,$31         /* .. E1 */
  68
  69.align 4
  70aligned:
  71        sra $18,3,$3            /* E0 */
  72        and $18,7,$18           /* .. E1 */
  73        bis $16,$16,$5          /* E0 */
  74        beq $3,no_quad          /* .. E1 */
  75
  76.align 3
  77loop:
  78        stq $17,0($5)           /* E0 */
  79        subq $3,1,$3            /* .. E1 */
  80        addq $5,8,$5            /* E0 */
  81        bne $3,loop             /* .. E1 */
  82
  83no_quad:
  84        bis $31,$31,$31         /* E0 */
  85        beq $18,end             /* .. E1 */
  86        ldq $7,0($5)            /* E0 */
  87        mskqh $7,$6,$2          /* .. E1 (and load stall) */
  88
  89        insqh $17,$6,$4         /* E0 */
  90        bis $2,$4,$1            /* .. E1 */
  91        stq $1,0($5)            /* E0 */
  92        ret $31,($26),1         /* .. E1 */
  93
  94.align 3
  95within_one_quad:
  96        ldq_u $1,0($16)         /* E0 */
  97        insql $17,$16,$2        /* E1 */
  98        mskql $1,$16,$4         /* E0 (after load stall) */
  99        bis $2,$4,$2            /* E0 */
 100
 101        mskql $2,$6,$4          /* E0 */
 102        mskqh $1,$6,$2          /* .. E1 */
 103        bis $2,$4,$1            /* E0 */
 104        stq_u $1,0($16)         /* E0 */
 105
 106end:
 107        ret $31,($26),1         /* E1 */
 108        .end ___memset
 109EXPORT_SYMBOL(___memset)
 110EXPORT_SYMBOL(__constant_c_memset)
 111
 112        .align 5
 113        .ent __memsetw
 114__memsetw:
 115        .prologue 0
 116
 117        inswl $17,0,$1          /* E0 */
 118        inswl $17,2,$2          /* E0 */
 119        inswl $17,4,$3          /* E0 */
 120        or $1,$2,$1             /* .. E1 */
 121        inswl $17,6,$4          /* E0 */
 122        or $1,$3,$1             /* .. E1 */
 123        or $1,$4,$17            /* E0 */
 124        br __constant_c_memset  /* .. E1 */
 125
 126        .end __memsetw
 127EXPORT_SYMBOL(__memsetw)
 128
 129memset = ___memset
 130__memset = ___memset
 131        EXPORT_SYMBOL(memset)
 132        EXPORT_SYMBOL(__memset)
 133