linux/arch/arm64/lib/strcmp.S
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0-only */
   2/*
   3 * Copyright (c) 2012-2022, Arm Limited.
   4 *
   5 * Adapted from the original at:
   6 * https://github.com/ARM-software/optimized-routines/blob/189dfefe37d54c5b/string/aarch64/strcmp.S
   7 */
   8
   9#include <linux/linkage.h>
  10#include <asm/assembler.h>
  11
  12/* Assumptions:
  13 *
  14 * ARMv8-a, AArch64.
  15 * MTE compatible.
  16 */
  17
  18#define L(label) .L ## label
  19
  20#define REP8_01 0x0101010101010101
  21#define REP8_7f 0x7f7f7f7f7f7f7f7f
  22
  23#define src1            x0
  24#define src2            x1
  25#define result          x0
  26
  27#define data1           x2
  28#define data1w          w2
  29#define data2           x3
  30#define data2w          w3
  31#define has_nul         x4
  32#define diff            x5
  33#define off1            x5
  34#define syndrome        x6
  35#define tmp             x6
  36#define data3           x7
  37#define zeroones        x8
  38#define shift           x9
  39#define off2            x10
  40
  41/* On big-endian early bytes are at MSB and on little-endian LSB.
  42   LS_FW means shifting towards early bytes.  */
  43#ifdef __AARCH64EB__
  44# define LS_FW lsl
  45#else
  46# define LS_FW lsr
  47#endif
  48
  49/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
  50   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
  51   can be done in parallel across the entire word.
  52   Since carry propagation makes 0x1 bytes before a NUL byte appear
  53   NUL too in big-endian, byte-reverse the data before the NUL check.  */
  54
  55
  56SYM_FUNC_START(__pi_strcmp)
  57        sub     off2, src2, src1
  58        mov     zeroones, REP8_01
  59        and     tmp, src1, 7
  60        tst     off2, 7
  61        b.ne    L(misaligned8)
  62        cbnz    tmp, L(mutual_align)
  63
  64        .p2align 4
  65
  66L(loop_aligned):
  67        ldr     data2, [src1, off2]
  68        ldr     data1, [src1], 8
  69L(start_realigned):
  70#ifdef __AARCH64EB__
  71        rev     tmp, data1
  72        sub     has_nul, tmp, zeroones
  73        orr     tmp, tmp, REP8_7f
  74#else
  75        sub     has_nul, data1, zeroones
  76        orr     tmp, data1, REP8_7f
  77#endif
  78        bics    has_nul, has_nul, tmp   /* Non-zero if NUL terminator.  */
  79        ccmp    data1, data2, 0, eq
  80        b.eq    L(loop_aligned)
  81#ifdef __AARCH64EB__
  82        rev     has_nul, has_nul
  83#endif
  84        eor     diff, data1, data2
  85        orr     syndrome, diff, has_nul
  86L(end):
  87#ifndef __AARCH64EB__
  88        rev     syndrome, syndrome
  89        rev     data1, data1
  90        rev     data2, data2
  91#endif
  92        clz     shift, syndrome
  93        /* The most-significant-non-zero bit of the syndrome marks either the
  94           first bit that is different, or the top bit of the first zero byte.
  95           Shifting left now will bring the critical information into the
  96           top bits.  */
  97        lsl     data1, data1, shift
  98        lsl     data2, data2, shift
  99        /* But we need to zero-extend (char is unsigned) the value and then
 100           perform a signed 32-bit subtraction.  */
 101        lsr     data1, data1, 56
 102        sub     result, data1, data2, lsr 56
 103        ret
 104
 105        .p2align 4
 106
 107L(mutual_align):
 108        /* Sources are mutually aligned, but are not currently at an
 109           alignment boundary.  Round down the addresses and then mask off
 110           the bytes that precede the start point.  */
 111        bic     src1, src1, 7
 112        ldr     data2, [src1, off2]
 113        ldr     data1, [src1], 8
 114        neg     shift, src2, lsl 3      /* Bits to alignment -64.  */
 115        mov     tmp, -1
 116        LS_FW   tmp, tmp, shift
 117        orr     data1, data1, tmp
 118        orr     data2, data2, tmp
 119        b       L(start_realigned)
 120
 121L(misaligned8):
 122        /* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always
 123           checking to make sure that we don't access beyond the end of SRC2.  */
 124        cbz     tmp, L(src1_aligned)
 125L(do_misaligned):
 126        ldrb    data1w, [src1], 1
 127        ldrb    data2w, [src2], 1
 128        cmp     data1w, 0
 129        ccmp    data1w, data2w, 0, ne   /* NZCV = 0b0000.  */
 130        b.ne    L(done)
 131        tst     src1, 7
 132        b.ne    L(do_misaligned)
 133
 134L(src1_aligned):
 135        neg     shift, src2, lsl 3
 136        bic     src2, src2, 7
 137        ldr     data3, [src2], 8
 138#ifdef __AARCH64EB__
 139        rev     data3, data3
 140#endif
 141        lsr     tmp, zeroones, shift
 142        orr     data3, data3, tmp
 143        sub     has_nul, data3, zeroones
 144        orr     tmp, data3, REP8_7f
 145        bics    has_nul, has_nul, tmp
 146        b.ne    L(tail)
 147
 148        sub     off1, src2, src1
 149
 150        .p2align 4
 151
 152L(loop_unaligned):
 153        ldr     data3, [src1, off1]
 154        ldr     data2, [src1, off2]
 155#ifdef __AARCH64EB__
 156        rev     data3, data3
 157#endif
 158        sub     has_nul, data3, zeroones
 159        orr     tmp, data3, REP8_7f
 160        ldr     data1, [src1], 8
 161        bics    has_nul, has_nul, tmp
 162        ccmp    data1, data2, 0, eq
 163        b.eq    L(loop_unaligned)
 164
 165        lsl     tmp, has_nul, shift
 166#ifdef __AARCH64EB__
 167        rev     tmp, tmp
 168#endif
 169        eor     diff, data1, data2
 170        orr     syndrome, diff, tmp
 171        cbnz    syndrome, L(end)
 172L(tail):
 173        ldr     data1, [src1]
 174        neg     shift, shift
 175        lsr     data2, data3, shift
 176        lsr     has_nul, has_nul, shift
 177#ifdef __AARCH64EB__
 178        rev     data2, data2
 179        rev     has_nul, has_nul
 180#endif
 181        eor     diff, data1, data2
 182        orr     syndrome, diff, has_nul
 183        b       L(end)
 184
 185L(done):
 186        sub     result, data1, data2
 187        ret
 188SYM_FUNC_END(__pi_strcmp)
 189SYM_FUNC_ALIAS_WEAK(strcmp, __pi_strcmp)
 190EXPORT_SYMBOL_NOKASAN(strcmp)
 191