linux/arch/alpha/lib/memset.S
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2/*
   3 * linux/arch/alpha/lib/memset.S
   4 *
   5 * This is an efficient (and small) implementation of the C library "memset()"
   6 * function for the alpha.
   7 *
   8 *      (C) Copyright 1996 Linus Torvalds
   9 *
  10 * This routine is "moral-ware": you are free to use it any way you wish, and
  11 * the only obligation I put on you is a moral one: if you make any improvements
  12 * to the routine, please send me your improvements for me to use similarly.
  13 *
  14 * The scheduling comments are according to the EV5 documentation (and done by 
  15 * hand, so they might well be incorrect, please do tell me about it..)
  16 */
  17#include <asm/export.h>
  18        .set noat
  19        .set noreorder
  20.text
  21        .globl memset
  22        .globl __memset
  23        .globl ___memset
  24        .globl __memset16
  25        .globl __constant_c_memset
  26
  27        .ent ___memset
  28.align 5
  29___memset:
  30        .frame $30,0,$26,0
  31        .prologue 0
  32
  33        and $17,255,$1          /* E1 */
  34        insbl $17,1,$17         /* .. E0 */
  35        bis $17,$1,$17          /* E0 (p-c latency, next cycle) */
  36        sll $17,16,$1           /* E1 (p-c latency, next cycle) */
  37
  38        bis $17,$1,$17          /* E0 (p-c latency, next cycle) */
  39        sll $17,32,$1           /* E1 (p-c latency, next cycle) */
  40        bis $17,$1,$17          /* E0 (p-c latency, next cycle) */
  41        ldq_u $31,0($30)        /* .. E1 */
  42
  43.align 5
  44__constant_c_memset:
  45        addq $18,$16,$6         /* E0 */
  46        bis $16,$16,$0          /* .. E1 */
  47        xor $16,$6,$1           /* E0 */
  48        ble $18,end             /* .. E1 */
  49
  50        bic $1,7,$1             /* E0 */
  51        beq $1,within_one_quad  /* .. E1 (note EV5 zero-latency forwarding) */
  52        and $16,7,$3            /* E0 */
  53        beq $3,aligned          /* .. E1 (note EV5 zero-latency forwarding) */
  54
  55        ldq_u $4,0($16)         /* E0 */
  56        bis $16,$16,$5          /* .. E1 */
  57        insql $17,$16,$2        /* E0 */
  58        subq $3,8,$3            /* .. E1 */
  59
  60        addq $18,$3,$18         /* E0   $18 is new count ($3 is negative) */
  61        mskql $4,$16,$4         /* .. E1 (and possible load stall) */
  62        subq $16,$3,$16         /* E0   $16 is new aligned destination */
  63        bis $2,$4,$1            /* .. E1 */
  64
  65        bis $31,$31,$31         /* E0 */
  66        ldq_u $31,0($30)        /* .. E1 */
  67        stq_u $1,0($5)          /* E0 */
  68        bis $31,$31,$31         /* .. E1 */
  69
  70.align 4
  71aligned:
  72        sra $18,3,$3            /* E0 */
  73        and $18,7,$18           /* .. E1 */
  74        bis $16,$16,$5          /* E0 */
  75        beq $3,no_quad          /* .. E1 */
  76
  77.align 3
  78loop:
  79        stq $17,0($5)           /* E0 */
  80        subq $3,1,$3            /* .. E1 */
  81        addq $5,8,$5            /* E0 */
  82        bne $3,loop             /* .. E1 */
  83
  84no_quad:
  85        bis $31,$31,$31         /* E0 */
  86        beq $18,end             /* .. E1 */
  87        ldq $7,0($5)            /* E0 */
  88        mskqh $7,$6,$2          /* .. E1 (and load stall) */
  89
  90        insqh $17,$6,$4         /* E0 */
  91        bis $2,$4,$1            /* .. E1 */
  92        stq $1,0($5)            /* E0 */
  93        ret $31,($26),1         /* .. E1 */
  94
  95.align 3
  96within_one_quad:
  97        ldq_u $1,0($16)         /* E0 */
  98        insql $17,$16,$2        /* E1 */
  99        mskql $1,$16,$4         /* E0 (after load stall) */
 100        bis $2,$4,$2            /* E0 */
 101
 102        mskql $2,$6,$4          /* E0 */
 103        mskqh $1,$6,$2          /* .. E1 */
 104        bis $2,$4,$1            /* E0 */
 105        stq_u $1,0($16)         /* E0 */
 106
 107end:
 108        ret $31,($26),1         /* E1 */
 109        .end ___memset
 110EXPORT_SYMBOL(___memset)
 111EXPORT_SYMBOL(__constant_c_memset)
 112
 113        .align 5
 114        .ent __memset16
 115__memset16:
 116        .prologue 0
 117
 118        inswl $17,0,$1          /* E0 */
 119        inswl $17,2,$2          /* E0 */
 120        inswl $17,4,$3          /* E0 */
 121        or $1,$2,$1             /* .. E1 */
 122        inswl $17,6,$4          /* E0 */
 123        or $1,$3,$1             /* .. E1 */
 124        or $1,$4,$17            /* E0 */
 125        br __constant_c_memset  /* .. E1 */
 126
 127        .end __memset16
 128EXPORT_SYMBOL(__memset16)
 129
 130memset = ___memset
 131__memset = ___memset
 132        EXPORT_SYMBOL(memset)
 133        EXPORT_SYMBOL(__memset)
 134