linux/arch/alpha/lib/strncpy_from_user.S
<<
>>
Prefs
   1/*
   2 * arch/alpha/lib/strncpy_from_user.S
   3 * Contributed by Richard Henderson (rth@tamu.edu)
   4 *
   5 * Just like strncpy except in the return value:
   6 *
   7 * -EFAULT       if an exception occurs before the terminator is copied.
   8 * N             if the buffer filled.
   9 *
  10 * Otherwise the length of the string is returned.
  11 */
  12
  13
  14#include <asm/errno.h>
  15#include <asm/regdef.h>
  16
  17
  18/* Allow an exception for an insn; exit if we get one.  */
  19#define EX(x,y...)                      \
  20        99: x,##y;                      \
  21        .section __ex_table,"a";        \
  22        .long 99b - .;                  \
  23        lda $31, $exception-99b($0);    \
  24        .previous
  25
  26
  27        .set noat
  28        .set noreorder
  29        .text
  30
  31        .globl __strncpy_from_user
  32        .ent __strncpy_from_user
  33        .frame $30, 0, $26
  34        .prologue 0
  35
  36        .align 3
  37$aligned:
  38        /* On entry to this basic block:
  39           t0 == the first destination word for masking back in
  40           t1 == the first source word.  */
  41
  42        /* Create the 1st output word and detect 0's in the 1st input word.  */
  43        lda     t2, -1          # e1    : build a mask against false zero
  44        mskqh   t2, a1, t2      # e0    :   detection in the src word
  45        mskqh   t1, a1, t3      # e0    :
  46        ornot   t1, t2, t2      # .. e1 :
  47        mskql   t0, a1, t0      # e0    : assemble the first output word
  48        cmpbge  zero, t2, t8    # .. e1 : bits set iff null found
  49        or      t0, t3, t0      # e0    :
  50        beq     a2, $a_eoc      # .. e1 :
  51        bne     t8, $a_eos      # .. e1 :
  52
  53        /* On entry to this basic block:
  54           t0 == a source word not containing a null.  */
  55
  56$a_loop:
  57        stq_u   t0, 0(a0)       # e0    :
  58        addq    a0, 8, a0       # .. e1 :
  59        EX( ldq_u t0, 0(a1) )   # e0    :
  60        addq    a1, 8, a1       # .. e1 :
  61        subq    a2, 1, a2       # e0    :
  62        cmpbge  zero, t0, t8    # .. e1 (stall)
  63        beq     a2, $a_eoc      # e1    :
  64        beq     t8, $a_loop     # e1    :
  65
  66        /* Take care of the final (partial) word store.  At this point
  67           the end-of-count bit is set in t8 iff it applies.
  68
  69           On entry to this basic block we have:
  70           t0 == the source word containing the null
  71           t8 == the cmpbge mask that found it.  */
  72
  73$a_eos:
  74        negq    t8, t12         # e0    : find low bit set
  75        and     t8, t12, t12    # e1 (stall)
  76
  77        /* For the sake of the cache, don't read a destination word
  78           if we're not going to need it.  */
  79        and     t12, 0x80, t6   # e0    :
  80        bne     t6, 1f          # .. e1 (zdb)
  81
  82        /* We're doing a partial word store and so need to combine
  83           our source and original destination words.  */
  84        ldq_u   t1, 0(a0)       # e0    :
  85        subq    t12, 1, t6      # .. e1 :
  86        or      t12, t6, t8     # e0    :
  87        unop                    #
  88        zapnot  t0, t8, t0      # e0    : clear src bytes > null
  89        zap     t1, t8, t1      # .. e1 : clear dst bytes <= null
  90        or      t0, t1, t0      # e1    :
  91
  921:      stq_u   t0, 0(a0)
  93        br      $finish_up
  94
  95        /* Add the end-of-count bit to the eos detection bitmask.  */
  96$a_eoc:
  97        or      t10, t8, t8
  98        br      $a_eos
  99
 100        /*** The Function Entry Point ***/
 101        .align 3
 102__strncpy_from_user:
 103        mov     a0, v0          # save the string start
 104        beq     a2, $zerolength
 105
 106        /* Are source and destination co-aligned?  */
 107        xor     a0, a1, t1      # e0    :
 108        and     a0, 7, t0       # .. e1 : find dest misalignment
 109        and     t1, 7, t1       # e0    :
 110        addq    a2, t0, a2      # .. e1 : bias count by dest misalignment
 111        subq    a2, 1, a2       # e0    :
 112        and     a2, 7, t2       # e1    :
 113        srl     a2, 3, a2       # e0    : a2 = loop counter = (count - 1)/8
 114        addq    zero, 1, t10    # .. e1 :
 115        sll     t10, t2, t10    # e0    : t10 = bitmask of last count byte
 116        bne     t1, $unaligned  # .. e1 :
 117
 118        /* We are co-aligned; take care of a partial first word.  */
 119
 120        EX( ldq_u t1, 0(a1) )   # e0    : load first src word
 121        addq    a1, 8, a1       # .. e1 :
 122
 123        beq     t0, $aligned    # avoid loading dest word if not needed
 124        ldq_u   t0, 0(a0)       # e0    :
 125        br      $aligned        # .. e1 :
 126
 127
 128/* The source and destination are not co-aligned.  Align the destination
 129   and cope.  We have to be very careful about not reading too much and
 130   causing a SEGV.  */
 131
 132        .align 3
 133$u_head:
 134        /* We know just enough now to be able to assemble the first
 135           full source word.  We can still find a zero at the end of it
 136           that prevents us from outputting the whole thing.
 137
 138           On entry to this basic block:
 139           t0 == the first dest word, unmasked
 140           t1 == the shifted low bits of the first source word
 141           t6 == bytemask that is -1 in dest word bytes */
 142
 143        EX( ldq_u t2, 8(a1) )   # e0    : load second src word
 144        addq    a1, 8, a1       # .. e1 :
 145        mskql   t0, a0, t0      # e0    : mask trailing garbage in dst
 146        extqh   t2, a1, t4      # e0    :
 147        or      t1, t4, t1      # e1    : first aligned src word complete
 148        mskqh   t1, a0, t1      # e0    : mask leading garbage in src
 149        or      t0, t1, t0      # e0    : first output word complete
 150        or      t0, t6, t6      # e1    : mask original data for zero test
 151        cmpbge  zero, t6, t8    # e0    :
 152        beq     a2, $u_eocfin   # .. e1 :
 153        bne     t8, $u_final    # e1    :
 154
 155        lda     t6, -1                  # e1    : mask out the bits we have
 156        mskql   t6, a1, t6              # e0    :   already seen
 157        stq_u   t0, 0(a0)               # e0    : store first output word
 158        or      t6, t2, t2              # .. e1 :
 159        cmpbge  zero, t2, t8            # e0    : find nulls in second partial
 160        addq    a0, 8, a0               # .. e1 :
 161        subq    a2, 1, a2               # e0    :
 162        bne     t8, $u_late_head_exit   # .. e1 :
 163
 164        /* Finally, we've got all the stupid leading edge cases taken care
 165           of and we can set up to enter the main loop.  */
 166
 167        extql   t2, a1, t1      # e0    : position hi-bits of lo word
 168        EX( ldq_u t2, 8(a1) )   # .. e1 : read next high-order source word
 169        addq    a1, 8, a1       # e0    :
 170        cmpbge  zero, t2, t8    # e1 (stall)
 171        beq     a2, $u_eoc      # e1    :
 172        bne     t8, $u_eos      # e1    :
 173
 174        /* Unaligned copy main loop.  In order to avoid reading too much,
 175           the loop is structured to detect zeros in aligned source words.
 176           This has, unfortunately, effectively pulled half of a loop
 177           iteration out into the head and half into the tail, but it does
 178           prevent nastiness from accumulating in the very thing we want
 179           to run as fast as possible.
 180
 181           On entry to this basic block:
 182           t1 == the shifted high-order bits from the previous source word
 183           t2 == the unshifted current source word
 184
 185           We further know that t2 does not contain a null terminator.  */
 186
 187        .align 3
 188$u_loop:
 189        extqh   t2, a1, t0      # e0    : extract high bits for current word
 190        addq    a1, 8, a1       # .. e1 :
 191        extql   t2, a1, t3      # e0    : extract low bits for next time
 192        addq    a0, 8, a0       # .. e1 :
 193        or      t0, t1, t0      # e0    : current dst word now complete
 194        EX( ldq_u t2, 0(a1) )   # .. e1 : load high word for next time
 195        stq_u   t0, -8(a0)      # e0    : save the current word
 196        mov     t3, t1          # .. e1 :
 197        subq    a2, 1, a2       # e0    :
 198        cmpbge  zero, t2, t8    # .. e1 : test new word for eos
 199        beq     a2, $u_eoc      # e1    :
 200        beq     t8, $u_loop     # e1    :
 201
 202        /* We've found a zero somewhere in the source word we just read.
 203           If it resides in the lower half, we have one (probably partial)
 204           word to write out, and if it resides in the upper half, we
 205           have one full and one partial word left to write out.
 206
 207           On entry to this basic block:
 208           t1 == the shifted high-order bits from the previous source word
 209           t2 == the unshifted current source word.  */
 210$u_eos:
 211        extqh   t2, a1, t0      # e0    :
 212        or      t0, t1, t0      # e1    : first (partial) source word complete
 213
 214        cmpbge  zero, t0, t8    # e0    : is the null in this first bit?
 215        bne     t8, $u_final    # .. e1 (zdb)
 216
 217        stq_u   t0, 0(a0)       # e0    : the null was in the high-order bits
 218        addq    a0, 8, a0       # .. e1 :
 219        subq    a2, 1, a2       # e1    :
 220
 221$u_late_head_exit:
 222        extql   t2, a1, t0      # .. e0 :
 223        cmpbge  zero, t0, t8    # e0    :
 224        or      t8, t10, t6     # e1    :
 225        cmoveq  a2, t6, t8      # e0    :
 226        nop                     # .. e1 :
 227
 228        /* Take care of a final (probably partial) result word.
 229           On entry to this basic block:
 230           t0 == assembled source word
 231           t8 == cmpbge mask that found the null.  */
 232$u_final:
 233        negq    t8, t6          # e0    : isolate low bit set
 234        and     t6, t8, t12     # e1    :
 235
 236        and     t12, 0x80, t6   # e0    : avoid dest word load if we can
 237        bne     t6, 1f          # .. e1 (zdb)
 238
 239        ldq_u   t1, 0(a0)       # e0    :
 240        subq    t12, 1, t6      # .. e1 :
 241        or      t6, t12, t8     # e0    :
 242        zapnot  t0, t8, t0      # .. e1 : kill source bytes > null
 243        zap     t1, t8, t1      # e0    : kill dest bytes <= null
 244        or      t0, t1, t0      # e1    :
 245
 2461:      stq_u   t0, 0(a0)       # e0    :
 247        br      $finish_up
 248
 249$u_eoc:                         # end-of-count
 250        extqh   t2, a1, t0
 251        or      t0, t1, t0
 252        cmpbge  zero, t0, t8
 253
 254$u_eocfin:                      # end-of-count, final word
 255        or      t10, t8, t8
 256        br      $u_final
 257
 258        /* Unaligned copy entry point.  */
 259        .align 3
 260$unaligned:
 261
 262        EX( ldq_u t1, 0(a1) )   # e0    : load first source word
 263
 264        and     a0, 7, t4       # .. e1 : find dest misalignment
 265        and     a1, 7, t5       # e0    : find src misalignment
 266
 267        /* Conditionally load the first destination word and a bytemask
 268           with 0xff indicating that the destination byte is sacrosanct.  */
 269
 270        mov     zero, t0        # .. e1 :
 271        mov     zero, t6        # e0    :
 272        beq     t4, 1f          # .. e1 :
 273        ldq_u   t0, 0(a0)       # e0    :
 274        lda     t6, -1          # .. e1 :
 275        mskql   t6, a0, t6      # e0    :
 2761:
 277        subq    a1, t4, a1      # .. e1 : sub dest misalignment from src addr
 278
 279        /* If source misalignment is larger than dest misalignment, we need
 280           extra startup checks to avoid SEGV.  */
 281
 282        cmplt   t4, t5, t12     # e1    :
 283        extql   t1, a1, t1      # .. e0 : shift src into place
 284        lda     t2, -1          # e0    : for creating masks later
 285        beq     t12, $u_head    # e1    :
 286
 287        mskqh   t2, t5, t2      # e0    : begin src byte validity mask
 288        cmpbge  zero, t1, t8    # .. e1 : is there a zero?
 289        extql   t2, a1, t2      # e0    :
 290        or      t8, t10, t5     # .. e1 : test for end-of-count too
 291        cmpbge  zero, t2, t3    # e0    :
 292        cmoveq  a2, t5, t8      # .. e1 :
 293        andnot  t8, t3, t8      # e0    :
 294        beq     t8, $u_head     # .. e1 (zdb)
 295
 296        /* At this point we've found a zero in the first partial word of
 297           the source.  We need to isolate the valid source data and mask
 298           it into the original destination data.  (Incidentally, we know
 299           that we'll need at least one byte of that original dest word.) */
 300
 301        ldq_u   t0, 0(a0)       # e0    :
 302        negq    t8, t6          # .. e1 : build bitmask of bytes <= zero
 303        mskqh   t1, t4, t1      # e0    :
 304        and     t6, t8, t12     # .. e1 :
 305        subq    t12, 1, t6      # e0    :
 306        or      t6, t12, t8     # e1    :
 307
 308        zapnot  t2, t8, t2      # e0    : prepare source word; mirror changes
 309        zapnot  t1, t8, t1      # .. e1 : to source validity mask
 310
 311        andnot  t0, t2, t0      # e0    : zero place for source to reside
 312        or      t0, t1, t0      # e1    : and put it there
 313        stq_u   t0, 0(a0)       # e0    :
 314
 315$finish_up:
 316        zapnot  t0, t12, t4     # was last byte written null?
 317        cmovne  t4, 1, t4
 318
 319        and     t12, 0xf0, t3   # binary search for the address of the
 320        and     t12, 0xcc, t2   # last byte written
 321        and     t12, 0xaa, t1
 322        bic     a0, 7, t0
 323        cmovne  t3, 4, t3
 324        cmovne  t2, 2, t2
 325        cmovne  t1, 1, t1
 326        addq    t0, t3, t0
 327        addq    t1, t2, t1
 328        addq    t0, t1, t0
 329        addq    t0, t4, t0      # add one if we filled the buffer
 330
 331        subq    t0, v0, v0      # find string length
 332        ret
 333
 334$zerolength:
 335        clr     v0
 336$exception:
 337        ret
 338
 339        .end __strncpy_from_user
 340