linux/arch/arm64/lib/memcmp.S
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0-only */
   2/*
   3 * Copyright (c) 2013-2021, Arm Limited.
   4 *
   5 * Adapted from the original at:
   6 * https://github.com/ARM-software/optimized-routines/blob/e823e3abf5f89ecb/string/aarch64/memcmp.S
   7 */
   8
   9#include <linux/linkage.h>
  10#include <asm/assembler.h>
  11
  12/* Assumptions:
  13 *
  14 * ARMv8-a, AArch64, unaligned accesses.
  15 */
  16
  17#define L(label) .L ## label
  18
  19/* Parameters and result.  */
  20#define src1            x0
  21#define src2            x1
  22#define limit           x2
  23#define result          w0
  24
  25/* Internal variables.  */
  26#define data1           x3
  27#define data1w          w3
  28#define data1h          x4
  29#define data2           x5
  30#define data2w          w5
  31#define data2h          x6
  32#define tmp1            x7
  33#define tmp2            x8
  34
  35SYM_FUNC_START_WEAK_PI(memcmp)
  36        subs    limit, limit, 8
  37        b.lo    L(less8)
  38
  39        ldr     data1, [src1], 8
  40        ldr     data2, [src2], 8
  41        cmp     data1, data2
  42        b.ne    L(return)
  43
  44        subs    limit, limit, 8
  45        b.gt    L(more16)
  46
  47        ldr     data1, [src1, limit]
  48        ldr     data2, [src2, limit]
  49        b       L(return)
  50
  51L(more16):
  52        ldr     data1, [src1], 8
  53        ldr     data2, [src2], 8
  54        cmp     data1, data2
  55        bne     L(return)
  56
  57        /* Jump directly to comparing the last 16 bytes for 32 byte (or less)
  58           strings.  */
  59        subs    limit, limit, 16
  60        b.ls    L(last_bytes)
  61
  62        /* We overlap loads between 0-32 bytes at either side of SRC1 when we
  63           try to align, so limit it only to strings larger than 128 bytes.  */
  64        cmp     limit, 96
  65        b.ls    L(loop16)
  66
  67        /* Align src1 and adjust src2 with bytes not yet done.  */
  68        and     tmp1, src1, 15
  69        add     limit, limit, tmp1
  70        sub     src1, src1, tmp1
  71        sub     src2, src2, tmp1
  72
  73        /* Loop performing 16 bytes per iteration using aligned src1.
  74           Limit is pre-decremented by 16 and must be larger than zero.
  75           Exit if <= 16 bytes left to do or if the data is not equal.  */
  76        .p2align 4
  77L(loop16):
  78        ldp     data1, data1h, [src1], 16
  79        ldp     data2, data2h, [src2], 16
  80        subs    limit, limit, 16
  81        ccmp    data1, data2, 0, hi
  82        ccmp    data1h, data2h, 0, eq
  83        b.eq    L(loop16)
  84
  85        cmp     data1, data2
  86        bne     L(return)
  87        mov     data1, data1h
  88        mov     data2, data2h
  89        cmp     data1, data2
  90        bne     L(return)
  91
  92        /* Compare last 1-16 bytes using unaligned access.  */
  93L(last_bytes):
  94        add     src1, src1, limit
  95        add     src2, src2, limit
  96        ldp     data1, data1h, [src1]
  97        ldp     data2, data2h, [src2]
  98        cmp     data1, data2
  99        bne     L(return)
 100        mov     data1, data1h
 101        mov     data2, data2h
 102        cmp     data1, data2
 103
 104        /* Compare data bytes and set return value to 0, -1 or 1.  */
 105L(return):
 106#ifndef __AARCH64EB__
 107        rev     data1, data1
 108        rev     data2, data2
 109#endif
 110        cmp     data1, data2
 111L(ret_eq):
 112        cset    result, ne
 113        cneg    result, result, lo
 114        ret
 115
 116        .p2align 4
 117        /* Compare up to 8 bytes.  Limit is [-8..-1].  */
 118L(less8):
 119        adds    limit, limit, 4
 120        b.lo    L(less4)
 121        ldr     data1w, [src1], 4
 122        ldr     data2w, [src2], 4
 123        cmp     data1w, data2w
 124        b.ne    L(return)
 125        sub     limit, limit, 4
 126L(less4):
 127        adds    limit, limit, 4
 128        beq     L(ret_eq)
 129L(byte_loop):
 130        ldrb    data1w, [src1], 1
 131        ldrb    data2w, [src2], 1
 132        subs    limit, limit, 1
 133        ccmp    data1w, data2w, 0, ne   /* NZCV = 0b0000.  */
 134        b.eq    L(byte_loop)
 135        sub     result, data1w, data2w
 136        ret
 137
 138SYM_FUNC_END_PI(memcmp)
 139EXPORT_SYMBOL_NOKASAN(memcmp)
 140