1/* 2 * arch/alpha/lib/strncpy_from_user.S 3 * Contributed by Richard Henderson (rth@tamu.edu) 4 * 5 * Just like strncpy except in the return value: 6 * 7 * -EFAULT if an exception occurs before the terminator is copied. 8 * N if the buffer filled. 9 * 10 * Otherwise the length of the string is returned. 11 */ 12 13 14#include <asm/errno.h> 15#include <asm/regdef.h> 16 17 18/* Allow an exception for an insn; exit if we get one. */ 19#define EX(x,y...) \ 20 99: x,##y; \ 21 .section __ex_table,"a"; \ 22 .long 99b - .; \ 23 lda $31, $exception-99b($0); \ 24 .previous 25 26 27 .set noat 28 .set noreorder 29 .text 30 31 .globl __strncpy_from_user 32 .ent __strncpy_from_user 33 .frame $30, 0, $26 34 .prologue 0 35 36 .align 3 37$aligned: 38 /* On entry to this basic block: 39 t0 == the first destination word for masking back in 40 t1 == the first source word. */ 41 42 /* Create the 1st output word and detect 0's in the 1st input word. */ 43 lda t2, -1 # e1 : build a mask against false zero 44 mskqh t2, a1, t2 # e0 : detection in the src word 45 mskqh t1, a1, t3 # e0 : 46 ornot t1, t2, t2 # .. e1 : 47 mskql t0, a1, t0 # e0 : assemble the first output word 48 cmpbge zero, t2, t8 # .. e1 : bits set iff null found 49 or t0, t3, t0 # e0 : 50 beq a2, $a_eoc # .. e1 : 51 bne t8, $a_eos # .. e1 : 52 53 /* On entry to this basic block: 54 t0 == a source word not containing a null. */ 55 56$a_loop: 57 stq_u t0, 0(a0) # e0 : 58 addq a0, 8, a0 # .. e1 : 59 EX( ldq_u t0, 0(a1) ) # e0 : 60 addq a1, 8, a1 # .. e1 : 61 subq a2, 1, a2 # e0 : 62 cmpbge zero, t0, t8 # .. e1 (stall) 63 beq a2, $a_eoc # e1 : 64 beq t8, $a_loop # e1 : 65 66 /* Take care of the final (partial) word store. At this point 67 the end-of-count bit is set in t8 iff it applies. 68 69 On entry to this basic block we have: 70 t0 == the source word containing the null 71 t8 == the cmpbge mask that found it. */ 72 73$a_eos: 74 negq t8, t12 # e0 : find low bit set 75 and t8, t12, t12 # e1 (stall) 76 77 /* For the sake of the cache, don't read a destination word 78 if we're not going to need it. */ 79 and t12, 0x80, t6 # e0 : 80 bne t6, 1f # .. e1 (zdb) 81 82 /* We're doing a partial word store and so need to combine 83 our source and original destination words. */ 84 ldq_u t1, 0(a0) # e0 : 85 subq t12, 1, t6 # .. e1 : 86 or t12, t6, t8 # e0 : 87 unop # 88 zapnot t0, t8, t0 # e0 : clear src bytes > null 89 zap t1, t8, t1 # .. e1 : clear dst bytes <= null 90 or t0, t1, t0 # e1 : 91 921: stq_u t0, 0(a0) 93 br $finish_up 94 95 /* Add the end-of-count bit to the eos detection bitmask. */ 96$a_eoc: 97 or t10, t8, t8 98 br $a_eos 99 100 /*** The Function Entry Point ***/ 101 .align 3 102__strncpy_from_user: 103 mov a0, v0 # save the string start 104 beq a2, $zerolength 105 106 /* Are source and destination co-aligned? */ 107 xor a0, a1, t1 # e0 : 108 and a0, 7, t0 # .. e1 : find dest misalignment 109 and t1, 7, t1 # e0 : 110 addq a2, t0, a2 # .. e1 : bias count by dest misalignment 111 subq a2, 1, a2 # e0 : 112 and a2, 7, t2 # e1 : 113 srl a2, 3, a2 # e0 : a2 = loop counter = (count - 1)/8 114 addq zero, 1, t10 # .. e1 : 115 sll t10, t2, t10 # e0 : t10 = bitmask of last count byte 116 bne t1, $unaligned # .. e1 : 117 118 /* We are co-aligned; take care of a partial first word. */ 119 120 EX( ldq_u t1, 0(a1) ) # e0 : load first src word 121 addq a1, 8, a1 # .. e1 : 122 123 beq t0, $aligned # avoid loading dest word if not needed 124 ldq_u t0, 0(a0) # e0 : 125 br $aligned # .. e1 : 126 127 128/* The source and destination are not co-aligned. Align the destination 129 and cope. We have to be very careful about not reading too much and 130 causing a SEGV. */ 131 132 .align 3 133$u_head: 134 /* We know just enough now to be able to assemble the first 135 full source word. We can still find a zero at the end of it 136 that prevents us from outputting the whole thing. 137 138 On entry to this basic block: 139 t0 == the first dest word, unmasked 140 t1 == the shifted low bits of the first source word 141 t6 == bytemask that is -1 in dest word bytes */ 142 143 EX( ldq_u t2, 8(a1) ) # e0 : load second src word 144 addq a1, 8, a1 # .. e1 : 145 mskql t0, a0, t0 # e0 : mask trailing garbage in dst 146 extqh t2, a1, t4 # e0 : 147 or t1, t4, t1 # e1 : first aligned src word complete 148 mskqh t1, a0, t1 # e0 : mask leading garbage in src 149 or t0, t1, t0 # e0 : first output word complete 150 or t0, t6, t6 # e1 : mask original data for zero test 151 cmpbge zero, t6, t8 # e0 : 152 beq a2, $u_eocfin # .. e1 : 153 bne t8, $u_final # e1 : 154 155 lda t6, -1 # e1 : mask out the bits we have 156 mskql t6, a1, t6 # e0 : already seen 157 stq_u t0, 0(a0) # e0 : store first output word 158 or t6, t2, t2 # .. e1 : 159 cmpbge zero, t2, t8 # e0 : find nulls in second partial 160 addq a0, 8, a0 # .. e1 : 161 subq a2, 1, a2 # e0 : 162 bne t8, $u_late_head_exit # .. e1 : 163 164 /* Finally, we've got all the stupid leading edge cases taken care 165 of and we can set up to enter the main loop. */ 166 167 extql t2, a1, t1 # e0 : position hi-bits of lo word 168 EX( ldq_u t2, 8(a1) ) # .. e1 : read next high-order source word 169 addq a1, 8, a1 # e0 : 170 cmpbge zero, t2, t8 # e1 (stall) 171 beq a2, $u_eoc # e1 : 172 bne t8, $u_eos # e1 : 173 174 /* Unaligned copy main loop. In order to avoid reading too much, 175 the loop is structured to detect zeros in aligned source words. 176 This has, unfortunately, effectively pulled half of a loop 177 iteration out into the head and half into the tail, but it does 178 prevent nastiness from accumulating in the very thing we want 179 to run as fast as possible. 180 181 On entry to this basic block: 182 t1 == the shifted high-order bits from the previous source word 183 t2 == the unshifted current source word 184 185 We further know that t2 does not contain a null terminator. */ 186 187 .align 3 188$u_loop: 189 extqh t2, a1, t0 # e0 : extract high bits for current word 190 addq a1, 8, a1 # .. e1 : 191 extql t2, a1, t3 # e0 : extract low bits for next time 192 addq a0, 8, a0 # .. e1 : 193 or t0, t1, t0 # e0 : current dst word now complete 194 EX( ldq_u t2, 0(a1) ) # .. e1 : load high word for next time 195 stq_u t0, -8(a0) # e0 : save the current word 196 mov t3, t1 # .. e1 : 197 subq a2, 1, a2 # e0 : 198 cmpbge zero, t2, t8 # .. e1 : test new word for eos 199 beq a2, $u_eoc # e1 : 200 beq t8, $u_loop # e1 : 201 202 /* We've found a zero somewhere in the source word we just read. 203 If it resides in the lower half, we have one (probably partial) 204 word to write out, and if it resides in the upper half, we 205 have one full and one partial word left to write out. 206 207 On entry to this basic block: 208 t1 == the shifted high-order bits from the previous source word 209 t2 == the unshifted current source word. */ 210$u_eos: 211 extqh t2, a1, t0 # e0 : 212 or t0, t1, t0 # e1 : first (partial) source word complete 213 214 cmpbge zero, t0, t8 # e0 : is the null in this first bit? 215 bne t8, $u_final # .. e1 (zdb) 216 217 stq_u t0, 0(a0) # e0 : the null was in the high-order bits 218 addq a0, 8, a0 # .. e1 : 219 subq a2, 1, a2 # e1 : 220 221$u_late_head_exit: 222 extql t2, a1, t0 # .. e0 : 223 cmpbge zero, t0, t8 # e0 : 224 or t8, t10, t6 # e1 : 225 cmoveq a2, t6, t8 # e0 : 226 nop # .. e1 : 227 228 /* Take care of a final (probably partial) result word. 229 On entry to this basic block: 230 t0 == assembled source word 231 t8 == cmpbge mask that found the null. */ 232$u_final: 233 negq t8, t6 # e0 : isolate low bit set 234 and t6, t8, t12 # e1 : 235 236 and t12, 0x80, t6 # e0 : avoid dest word load if we can 237 bne t6, 1f # .. e1 (zdb) 238 239 ldq_u t1, 0(a0) # e0 : 240 subq t12, 1, t6 # .. e1 : 241 or t6, t12, t8 # e0 : 242 zapnot t0, t8, t0 # .. e1 : kill source bytes > null 243 zap t1, t8, t1 # e0 : kill dest bytes <= null 244 or t0, t1, t0 # e1 : 245 2461: stq_u t0, 0(a0) # e0 : 247 br $finish_up 248 249$u_eoc: # end-of-count 250 extqh t2, a1, t0 251 or t0, t1, t0 252 cmpbge zero, t0, t8 253 254$u_eocfin: # end-of-count, final word 255 or t10, t8, t8 256 br $u_final 257 258 /* Unaligned copy entry point. */ 259 .align 3 260$unaligned: 261 262 EX( ldq_u t1, 0(a1) ) # e0 : load first source word 263 264 and a0, 7, t4 # .. e1 : find dest misalignment 265 and a1, 7, t5 # e0 : find src misalignment 266 267 /* Conditionally load the first destination word and a bytemask 268 with 0xff indicating that the destination byte is sacrosanct. */ 269 270 mov zero, t0 # .. e1 : 271 mov zero, t6 # e0 : 272 beq t4, 1f # .. e1 : 273 ldq_u t0, 0(a0) # e0 : 274 lda t6, -1 # .. e1 : 275 mskql t6, a0, t6 # e0 : 2761: 277 subq a1, t4, a1 # .. e1 : sub dest misalignment from src addr 278 279 /* If source misalignment is larger than dest misalignment, we need 280 extra startup checks to avoid SEGV. */ 281 282 cmplt t4, t5, t12 # e1 : 283 extql t1, a1, t1 # .. e0 : shift src into place 284 lda t2, -1 # e0 : for creating masks later 285 beq t12, $u_head # e1 : 286 287 mskqh t2, t5, t2 # e0 : begin src byte validity mask 288 cmpbge zero, t1, t8 # .. e1 : is there a zero? 289 extql t2, a1, t2 # e0 : 290 or t8, t10, t5 # .. e1 : test for end-of-count too 291 cmpbge zero, t2, t3 # e0 : 292 cmoveq a2, t5, t8 # .. e1 : 293 andnot t8, t3, t8 # e0 : 294 beq t8, $u_head # .. e1 (zdb) 295 296 /* At this point we've found a zero in the first partial word of 297 the source. We need to isolate the valid source data and mask 298 it into the original destination data. (Incidentally, we know 299 that we'll need at least one byte of that original dest word.) */ 300 301 ldq_u t0, 0(a0) # e0 : 302 negq t8, t6 # .. e1 : build bitmask of bytes <= zero 303 mskqh t1, t4, t1 # e0 : 304 and t6, t8, t12 # .. e1 : 305 subq t12, 1, t6 # e0 : 306 or t6, t12, t8 # e1 : 307 308 zapnot t2, t8, t2 # e0 : prepare source word; mirror changes 309 zapnot t1, t8, t1 # .. e1 : to source validity mask 310 311 andnot t0, t2, t0 # e0 : zero place for source to reside 312 or t0, t1, t0 # e1 : and put it there 313 stq_u t0, 0(a0) # e0 : 314 315$finish_up: 316 zapnot t0, t12, t4 # was last byte written null? 317 cmovne t4, 1, t4 318 319 and t12, 0xf0, t3 # binary search for the address of the 320 and t12, 0xcc, t2 # last byte written 321 and t12, 0xaa, t1 322 bic a0, 7, t0 323 cmovne t3, 4, t3 324 cmovne t2, 2, t2 325 cmovne t1, 1, t1 326 addq t0, t3, t0 327 addq t1, t2, t1 328 addq t0, t1, t0 329 addq t0, t4, t0 # add one if we filled the buffer 330 331 subq t0, v0, v0 # find string length 332 ret 333 334$zerolength: 335 clr v0 336$exception: 337 ret 338 339 .end __strncpy_from_user 340