linux/arch/arm64/lib/strcmp.S
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2013 ARM Ltd.
   3 * Copyright (C) 2013 Linaro.
   4 *
   5 * This code is based on glibc cortex strings work originally authored by Linaro
   6 * and re-licensed under GPLv2 for the Linux kernel. The original code can
   7 * be found @
   8 *
   9 * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
  10 * files/head:/src/aarch64/
  11 *
  12 * This program is free software; you can redistribute it and/or modify
  13 * it under the terms of the GNU General Public License version 2 as
  14 * published by the Free Software Foundation.
  15 *
  16 * This program is distributed in the hope that it will be useful,
  17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 * GNU General Public License for more details.
  20 *
  21 * You should have received a copy of the GNU General Public License
  22 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  23 */
  24
  25#include <linux/linkage.h>
  26#include <asm/assembler.h>
  27
  28/*
  29 * compare two strings
  30 *
  31 * Parameters:
  32 *      x0 - const string 1 pointer
  33 *    x1 - const string 2 pointer
  34 * Returns:
  35 * x0 - an integer less than, equal to, or greater than zero
  36 * if  s1  is  found, respectively, to be less than, to match,
  37 * or be greater than s2.
  38 */
  39
  40#define REP8_01 0x0101010101010101
  41#define REP8_7f 0x7f7f7f7f7f7f7f7f
  42#define REP8_80 0x8080808080808080
  43
  44/* Parameters and result.  */
  45src1            .req    x0
  46src2            .req    x1
  47result          .req    x0
  48
  49/* Internal variables.  */
  50data1           .req    x2
  51data1w          .req    w2
  52data2           .req    x3
  53data2w          .req    w3
  54has_nul         .req    x4
  55diff            .req    x5
  56syndrome        .req    x6
  57tmp1            .req    x7
  58tmp2            .req    x8
  59tmp3            .req    x9
  60zeroones        .req    x10
  61pos             .req    x11
  62
  63ENTRY(strcmp)
  64        eor     tmp1, src1, src2
  65        mov     zeroones, #REP8_01
  66        tst     tmp1, #7
  67        b.ne    .Lmisaligned8
  68        ands    tmp1, src1, #7
  69        b.ne    .Lmutual_align
  70
  71        /*
  72        * NUL detection works on the principle that (X - 1) & (~X) & 0x80
  73        * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
  74        * can be done in parallel across the entire word.
  75        */
  76.Lloop_aligned:
  77        ldr     data1, [src1], #8
  78        ldr     data2, [src2], #8
  79.Lstart_realigned:
  80        sub     tmp1, data1, zeroones
  81        orr     tmp2, data1, #REP8_7f
  82        eor     diff, data1, data2      /* Non-zero if differences found.  */
  83        bic     has_nul, tmp1, tmp2     /* Non-zero if NUL terminator.  */
  84        orr     syndrome, diff, has_nul
  85        cbz     syndrome, .Lloop_aligned
  86        b       .Lcal_cmpresult
  87
  88.Lmutual_align:
  89        /*
  90        * Sources are mutually aligned, but are not currently at an
  91        * alignment boundary.  Round down the addresses and then mask off
  92        * the bytes that preceed the start point.
  93        */
  94        bic     src1, src1, #7
  95        bic     src2, src2, #7
  96        lsl     tmp1, tmp1, #3          /* Bytes beyond alignment -> bits.  */
  97        ldr     data1, [src1], #8
  98        neg     tmp1, tmp1              /* Bits to alignment -64.  */
  99        ldr     data2, [src2], #8
 100        mov     tmp2, #~0
 101        /* Big-endian.  Early bytes are at MSB.  */
 102CPU_BE( lsl     tmp2, tmp2, tmp1 )      /* Shift (tmp1 & 63).  */
 103        /* Little-endian.  Early bytes are at LSB.  */
 104CPU_LE( lsr     tmp2, tmp2, tmp1 )      /* Shift (tmp1 & 63).  */
 105
 106        orr     data1, data1, tmp2
 107        orr     data2, data2, tmp2
 108        b       .Lstart_realigned
 109
 110.Lmisaligned8:
 111        /*
 112        * Get the align offset length to compare per byte first.
 113        * After this process, one string's address will be aligned.
 114        */
 115        and     tmp1, src1, #7
 116        neg     tmp1, tmp1
 117        add     tmp1, tmp1, #8
 118        and     tmp2, src2, #7
 119        neg     tmp2, tmp2
 120        add     tmp2, tmp2, #8
 121        subs    tmp3, tmp1, tmp2
 122        csel    pos, tmp1, tmp2, hi /*Choose the maximum. */
 123.Ltinycmp:
 124        ldrb    data1w, [src1], #1
 125        ldrb    data2w, [src2], #1
 126        subs    pos, pos, #1
 127        ccmp    data1w, #1, #0, ne  /* NZCV = 0b0000.  */
 128        ccmp    data1w, data2w, #0, cs  /* NZCV = 0b0000.  */
 129        b.eq    .Ltinycmp
 130        cbnz    pos, 1f /*find the null or unequal...*/
 131        cmp     data1w, #1
 132        ccmp    data1w, data2w, #0, cs
 133        b.eq    .Lstart_align /*the last bytes are equal....*/
 1341:
 135        sub     result, data1, data2
 136        ret
 137
 138.Lstart_align:
 139        ands    xzr, src1, #7
 140        b.eq    .Lrecal_offset
 141        /*process more leading bytes to make str1 aligned...*/
 142        add     src1, src1, tmp3
 143        add     src2, src2, tmp3
 144        /*load 8 bytes from aligned str1 and non-aligned str2..*/
 145        ldr     data1, [src1], #8
 146        ldr     data2, [src2], #8
 147
 148        sub     tmp1, data1, zeroones
 149        orr     tmp2, data1, #REP8_7f
 150        bic     has_nul, tmp1, tmp2
 151        eor     diff, data1, data2 /* Non-zero if differences found.  */
 152        orr     syndrome, diff, has_nul
 153        cbnz    syndrome, .Lcal_cmpresult
 154        /*How far is the current str2 from the alignment boundary...*/
 155        and     tmp3, tmp3, #7
 156.Lrecal_offset:
 157        neg     pos, tmp3
 158.Lloopcmp_proc:
 159        /*
 160        * Divide the eight bytes into two parts. First,backwards the src2
 161        * to an alignment boundary,load eight bytes from the SRC2 alignment
 162        * boundary,then compare with the relative bytes from SRC1.
 163        * If all 8 bytes are equal,then start the second part's comparison.
 164        * Otherwise finish the comparison.
 165        * This special handle can garantee all the accesses are in the
 166        * thread/task space in avoid to overrange access.
 167        */
 168        ldr     data1, [src1,pos]
 169        ldr     data2, [src2,pos]
 170        sub     tmp1, data1, zeroones
 171        orr     tmp2, data1, #REP8_7f
 172        bic     has_nul, tmp1, tmp2
 173        eor     diff, data1, data2  /* Non-zero if differences found.  */
 174        orr     syndrome, diff, has_nul
 175        cbnz    syndrome, .Lcal_cmpresult
 176
 177        /*The second part process*/
 178        ldr     data1, [src1], #8
 179        ldr     data2, [src2], #8
 180        sub     tmp1, data1, zeroones
 181        orr     tmp2, data1, #REP8_7f
 182        bic     has_nul, tmp1, tmp2
 183        eor     diff, data1, data2  /* Non-zero if differences found.  */
 184        orr     syndrome, diff, has_nul
 185        cbz     syndrome, .Lloopcmp_proc
 186
 187.Lcal_cmpresult:
 188        /*
 189        * reversed the byte-order as big-endian,then CLZ can find the most
 190        * significant zero bits.
 191        */
 192CPU_LE( rev     syndrome, syndrome )
 193CPU_LE( rev     data1, data1 )
 194CPU_LE( rev     data2, data2 )
 195
 196        /*
 197        * For big-endian we cannot use the trick with the syndrome value
 198        * as carry-propagation can corrupt the upper bits if the trailing
 199        * bytes in the string contain 0x01.
 200        * However, if there is no NUL byte in the dword, we can generate
 201        * the result directly.  We ca not just subtract the bytes as the
 202        * MSB might be significant.
 203        */
 204CPU_BE( cbnz    has_nul, 1f )
 205CPU_BE( cmp     data1, data2 )
 206CPU_BE( cset    result, ne )
 207CPU_BE( cneg    result, result, lo )
 208CPU_BE( ret )
 209CPU_BE( 1: )
 210        /*Re-compute the NUL-byte detection, using a byte-reversed value. */
 211CPU_BE( rev     tmp3, data1 )
 212CPU_BE( sub     tmp1, tmp3, zeroones )
 213CPU_BE( orr     tmp2, tmp3, #REP8_7f )
 214CPU_BE( bic     has_nul, tmp1, tmp2 )
 215CPU_BE( rev     has_nul, has_nul )
 216CPU_BE( orr     syndrome, diff, has_nul )
 217
 218        clz     pos, syndrome
 219        /*
 220        * The MS-non-zero bit of the syndrome marks either the first bit
 221        * that is different, or the top bit of the first zero byte.
 222        * Shifting left now will bring the critical information into the
 223        * top bits.
 224        */
 225        lsl     data1, data1, pos
 226        lsl     data2, data2, pos
 227        /*
 228        * But we need to zero-extend (char is unsigned) the value and then
 229        * perform a signed 32-bit subtraction.
 230        */
 231        lsr     data1, data1, #56
 232        sub     result, data1, data2, lsr #56
 233        ret
 234ENDPIPROC(strcmp)
 235