linux/arch/powerpc/lib/string_64.S
<<
>>
Prefs
   1/*
   2 * This program is free software; you can redistribute it and/or modify
   3 * it under the terms of the GNU General Public License as published by
   4 * the Free Software Foundation; either version 2 of the License, or
   5 * (at your option) any later version.
   6 *
   7 * This program is distributed in the hope that it will be useful,
   8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
   9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  10 * GNU General Public License for more details.
  11 *
  12 * You should have received a copy of the GNU General Public License
  13 * along with this program; if not, write to the Free Software
  14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  15 *
  16 * Copyright (C) IBM Corporation, 2012
  17 *
  18 * Author: Anton Blanchard <anton@au.ibm.com>
  19 */
  20
  21#include <asm/ppc_asm.h>
  22#include <asm/asm-offsets.h>
  23
  24        .section        ".toc","aw"
  25PPC64_CACHES:
  26        .tc             ppc64_caches[TC],ppc64_caches
  27        .section        ".text"
  28
  29/**
  30 * __clear_user: - Zero a block of memory in user space, with less checking.
  31 * @to:   Destination address, in user space.
  32 * @n:    Number of bytes to zero.
  33 *
  34 * Zero a block of memory in user space.  Caller must check
  35 * the specified block with access_ok() before calling this function.
  36 *
  37 * Returns number of bytes that could not be cleared.
  38 * On success, this will be zero.
  39 */
  40
  41        .macro err1
  42100:
  43        .section __ex_table,"a"
  44        .align 3
  45        .llong 100b,.Ldo_err1
  46        .previous
  47        .endm
  48
  49        .macro err2
  50200:
  51        .section __ex_table,"a"
  52        .align 3
  53        .llong 200b,.Ldo_err2
  54        .previous
  55        .endm
  56
  57        .macro err3
  58300:
  59        .section __ex_table,"a"
  60        .align 3
  61        .llong 300b,.Ldo_err3
  62        .previous
  63        .endm
  64
  65.Ldo_err1:
  66        mr      r3,r8
  67
  68.Ldo_err2:
  69        mtctr   r4
  701:
  71err3;   stb     r0,0(r3)
  72        addi    r3,r3,1
  73        addi    r4,r4,-1
  74        bdnz    1b
  75
  76.Ldo_err3:
  77        mr      r3,r4
  78        blr
  79
  80_GLOBAL(__clear_user)
  81        cmpdi   r4,32
  82        neg     r6,r3
  83        li      r0,0
  84        blt     .Lshort_clear
  85        mr      r8,r3
  86        mtocrf  0x01,r6
  87        clrldi  r6,r6,(64-3)
  88
  89        /* Get the destination 8 byte aligned */
  90        bf      cr7*4+3,1f
  91err1;   stb     r0,0(r3)
  92        addi    r3,r3,1
  93
  941:      bf      cr7*4+2,2f
  95err1;   sth     r0,0(r3)
  96        addi    r3,r3,2
  97
  982:      bf      cr7*4+1,3f
  99err1;   stw     r0,0(r3)
 100        addi    r3,r3,4
 101
 1023:      sub     r4,r4,r6
 103
 104        cmpdi   r4,32
 105        cmpdi   cr1,r4,512
 106        blt     .Lshort_clear
 107        bgt     cr1,.Llong_clear
 108
 109.Lmedium_clear:
 110        srdi    r6,r4,5
 111        mtctr   r6
 112
 113        /* Do 32 byte chunks */
 1144:
 115err2;   std     r0,0(r3)
 116err2;   std     r0,8(r3)
 117err2;   std     r0,16(r3)
 118err2;   std     r0,24(r3)
 119        addi    r3,r3,32
 120        addi    r4,r4,-32
 121        bdnz    4b
 122
 123.Lshort_clear:
 124        /* up to 31 bytes to go */
 125        cmpdi   r4,16
 126        blt     6f
 127err2;   std     r0,0(r3)
 128err2;   std     r0,8(r3)
 129        addi    r3,r3,16
 130        addi    r4,r4,-16
 131
 132        /* Up to 15 bytes to go */
 1336:      mr      r8,r3
 134        clrldi  r4,r4,(64-4)
 135        mtocrf  0x01,r4
 136        bf      cr7*4+0,7f
 137err1;   std     r0,0(r3)
 138        addi    r3,r3,8
 139
 1407:      bf      cr7*4+1,8f
 141err1;   stw     r0,0(r3)
 142        addi    r3,r3,4
 143
 1448:      bf      cr7*4+2,9f
 145err1;   sth     r0,0(r3)
 146        addi    r3,r3,2
 147
 1489:      bf      cr7*4+3,10f
 149err1;   stb     r0,0(r3)
 150
 15110:     li      r3,0
 152        blr
 153
 154.Llong_clear:
 155        ld      r5,PPC64_CACHES@toc(r2)
 156
 157        bf      cr7*4+0,11f
 158err2;   std     r0,0(r3)
 159        addi    r3,r3,8
 160        addi    r4,r4,-8
 161
 162        /* Destination is 16 byte aligned, need to get it cacheline aligned */
 16311:     lwz     r7,DCACHEL1LOGLINESIZE(r5)
 164        lwz     r9,DCACHEL1LINESIZE(r5)
 165
 166        /*
 167         * With worst case alignment the long clear loop takes a minimum
 168         * of 1 byte less than 2 cachelines.
 169         */
 170        sldi    r10,r9,2
 171        cmpd    r4,r10
 172        blt     .Lmedium_clear
 173
 174        neg     r6,r3
 175        addi    r10,r9,-1
 176        and.    r5,r6,r10
 177        beq     13f
 178
 179        srdi    r6,r5,4
 180        mtctr   r6
 181        mr      r8,r3
 18212:
 183err1;   std     r0,0(r3)
 184err1;   std     r0,8(r3)
 185        addi    r3,r3,16
 186        bdnz    12b
 187
 188        sub     r4,r4,r5
 189
 19013:     srd     r6,r4,r7
 191        mtctr   r6
 192        mr      r8,r3
 19314:
 194err1;   dcbz    r0,r3
 195        add     r3,r3,r9
 196        bdnz    14b
 197
 198        and     r4,r4,r10
 199
 200        cmpdi   r4,32
 201        blt     .Lshort_clear
 202        b       .Lmedium_clear
 203