linux/arch/frv/lib/memset.S
<<
>>
Prefs
   1/* memset.S: optimised assembly memset
   2 *
   3 * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved.
   4 * Written by David Howells (dhowells@redhat.com)
   5 *
   6 * This program is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU General Public License
   8 * as published by the Free Software Foundation; either version
   9 * 2 of the License, or (at your option) any later version.
  10 */
  11
  12
  13        .text
  14        .p2align        4
  15
  16###############################################################################
  17#
  18# void *memset(void *p, char ch, size_t count)
  19#
  20# - NOTE: must not use any stack. exception detection performs function return
  21#         to caller's fixup routine, aborting the remainder of the set
  22#         GR4, GR7, GR8, and GR11 must be managed
  23#
  24###############################################################################
  25        .globl          memset,__memset_end
  26        .type           memset,@function
  27memset:
  28        orcc.p          gr10,gr0,gr5,icc3               ; GR5 = count
  29        andi            gr9,#0xff,gr9
  30        or.p            gr8,gr0,gr4                     ; GR4 = address
  31        beqlr           icc3,#0
  32
  33        # conditionally write a byte to 2b-align the address
  34        setlos.p        #1,gr6
  35        andicc          gr4,#1,gr0,icc0
  36        ckne            icc0,cc7
  37        cstb.p          gr9,@(gr4,gr0)          ,cc7,#1
  38        csubcc          gr5,gr6,gr5             ,cc7,#1 ; also set ICC3
  39        cadd.p          gr4,gr6,gr4             ,cc7,#1
  40        beqlr           icc3,#0
  41
  42        # conditionally write a word to 4b-align the address
  43        andicc.p        gr4,#2,gr0,icc0
  44        subicc          gr5,#2,gr0,icc1
  45        setlos.p        #2,gr6
  46        ckne            icc0,cc7
  47        slli.p          gr9,#8,gr12                     ; need to double up the pattern
  48        cknc            icc1,cc5
  49        or.p            gr9,gr12,gr12
  50        andcr           cc7,cc5,cc7
  51
  52        csth.p          gr12,@(gr4,gr0)         ,cc7,#1
  53        csubcc          gr5,gr6,gr5             ,cc7,#1 ; also set ICC3
  54        cadd.p          gr4,gr6,gr4             ,cc7,#1
  55        beqlr           icc3,#0
  56
  57        # conditionally write a dword to 8b-align the address
  58        andicc.p        gr4,#4,gr0,icc0
  59        subicc          gr5,#4,gr0,icc1
  60        setlos.p        #4,gr6
  61        ckne            icc0,cc7
  62        slli.p          gr12,#16,gr13                   ; need to quadruple-up the pattern
  63        cknc            icc1,cc5
  64        or.p            gr13,gr12,gr12
  65        andcr           cc7,cc5,cc7
  66
  67        cst.p           gr12,@(gr4,gr0)         ,cc7,#1
  68        csubcc          gr5,gr6,gr5             ,cc7,#1 ; also set ICC3
  69        cadd.p          gr4,gr6,gr4             ,cc7,#1
  70        beqlr           icc3,#0
  71
  72        or.p            gr12,gr12,gr13                  ; need to octuple-up the pattern
  73
  74        # the address is now 8b-aligned - loop around writing 64b chunks
  75        setlos          #8,gr7
  76        subi.p          gr4,#8,gr4                      ; store with update index does weird stuff
  77        setlos          #64,gr6
  78
  79        subicc          gr5,#64,gr0,icc0
  800:      cknc            icc0,cc7
  81        cstdu           gr12,@(gr4,gr7)         ,cc7,#1
  82        cstdu           gr12,@(gr4,gr7)         ,cc7,#1
  83        cstdu           gr12,@(gr4,gr7)         ,cc7,#1
  84        cstdu           gr12,@(gr4,gr7)         ,cc7,#1
  85        cstdu           gr12,@(gr4,gr7)         ,cc7,#1
  86        cstdu.p         gr12,@(gr4,gr7)         ,cc7,#1
  87        csubcc          gr5,gr6,gr5             ,cc7,#1 ; also set ICC3
  88        cstdu.p         gr12,@(gr4,gr7)         ,cc7,#1
  89        subicc          gr5,#64,gr0,icc0
  90        cstdu.p         gr12,@(gr4,gr7)         ,cc7,#1
  91        beqlr           icc3,#0
  92        bnc             icc0,#2,0b
  93
  94        # now do 32-byte remnant
  95        subicc.p        gr5,#32,gr0,icc0
  96        setlos          #32,gr6
  97        cknc            icc0,cc7
  98        cstdu.p         gr12,@(gr4,gr7)         ,cc7,#1
  99        csubcc          gr5,gr6,gr5             ,cc7,#1 ; also set ICC3
 100        cstdu.p         gr12,@(gr4,gr7)         ,cc7,#1
 101        setlos          #16,gr6
 102        cstdu.p         gr12,@(gr4,gr7)         ,cc7,#1
 103        subicc          gr5,#16,gr0,icc0
 104        cstdu.p         gr12,@(gr4,gr7)         ,cc7,#1
 105        beqlr           icc3,#0
 106
 107        # now do 16-byte remnant
 108        cknc            icc0,cc7
 109        cstdu.p         gr12,@(gr4,gr7)         ,cc7,#1
 110        csubcc          gr5,gr6,gr5             ,cc7,#1 ; also set ICC3
 111        cstdu.p         gr12,@(gr4,gr7)         ,cc7,#1
 112        beqlr           icc3,#0
 113
 114        # now do 8-byte remnant
 115        subicc          gr5,#8,gr0,icc1
 116        cknc            icc1,cc7
 117        cstdu.p         gr12,@(gr4,gr7)         ,cc7,#1
 118        csubcc          gr5,gr7,gr5             ,cc7,#1 ; also set ICC3
 119        setlos.p        #4,gr7
 120        beqlr           icc3,#0
 121
 122        # now do 4-byte remnant
 123        subicc          gr5,#4,gr0,icc0
 124        addi.p          gr4,#4,gr4
 125        cknc            icc0,cc7
 126        cstu.p          gr12,@(gr4,gr7)         ,cc7,#1
 127        csubcc          gr5,gr7,gr5             ,cc7,#1 ; also set ICC3
 128        subicc.p        gr5,#2,gr0,icc1
 129        beqlr           icc3,#0
 130
 131        # now do 2-byte remnant
 132        setlos          #2,gr7
 133        addi.p          gr4,#2,gr4
 134        cknc            icc1,cc7
 135        csthu.p         gr12,@(gr4,gr7)         ,cc7,#1
 136        csubcc          gr5,gr7,gr5             ,cc7,#1 ; also set ICC3
 137        subicc.p        gr5,#1,gr0,icc0
 138        beqlr           icc3,#0
 139
 140        # now do 1-byte remnant
 141        setlos          #0,gr7
 142        addi.p          gr4,#2,gr4
 143        cknc            icc0,cc7
 144        cstb.p          gr12,@(gr4,gr0)         ,cc7,#1
 145        bralr
 146__memset_end:
 147
 148        .size           memset, __memset_end-memset
 149
 150###############################################################################
 151#
 152# clear memory in userspace
 153# - return the number of bytes that could not be cleared (0 on complete success)
 154#
 155# long __memset_user(void *p, size_t count)
 156#
 157###############################################################################
 158        .globl          __memset_user, __memset_user_error_lr, __memset_user_error_handler
 159        .type           __memset_user,@function
 160__memset_user:
 161        movsg           lr,gr11
 162
 163        # abuse memset to do the dirty work
 164        or.p            gr9,gr9,gr10
 165        setlos          #0,gr9
 166        call            memset
 167__memset_user_error_lr:
 168        jmpl.p          @(gr11,gr0)
 169        setlos          #0,gr8
 170
 171        # deal any exception generated by memset
 172        # GR4  - memset's address tracking pointer
 173        # GR7  - memset's step value (index register for store insns)
 174        # GR8  - memset's original start address
 175        # GR10 - memset's original count
 176__memset_user_error_handler:
 177        add.p           gr4,gr7,gr4
 178        add             gr8,gr10,gr8
 179        jmpl.p          @(gr11,gr0)
 180        sub             gr8,gr4,gr8             ; we return the amount left uncleared
 181
 182        .size           __memset_user, .-__memset_user
 183