linux/arch/arc/lib/strcmp.S
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0-only */
   2/*
   3 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
   4 */
   5
   6/* This is optimized primarily for the ARC700.
   7   It would be possible to speed up the loops by one cycle / word
   8   respective one cycle / byte by forcing double source 1 alignment, unrolling
   9   by a factor of two, and speculatively loading the second word / byte of
  10   source 1; however, that would increase the overhead for loop setup / finish,
  11   and strcmp might often terminate early.  */
  12
  13#include <linux/linkage.h>
  14
  15ENTRY_CFI(strcmp)
  16        or      r2,r0,r1
  17        bmsk_s  r2,r2,1
  18        brne    r2,0,.Lcharloop
  19        mov_s   r12,0x01010101
  20        ror     r5,r12
  21.Lwordloop:
  22        ld.ab   r2,[r0,4]
  23        ld.ab   r3,[r1,4]
  24        nop_s
  25        sub     r4,r2,r12
  26        bic     r4,r4,r2
  27        and     r4,r4,r5
  28        brne    r4,0,.Lfound0
  29        breq    r2,r3,.Lwordloop
  30#ifdef  __LITTLE_ENDIAN__
  31        xor     r0,r2,r3        ; mask for difference
  32        sub_s   r1,r0,1
  33        bic_s   r0,r0,r1        ; mask for least significant difference bit
  34        sub     r1,r5,r0
  35        xor     r0,r5,r1        ; mask for least significant difference byte
  36        and_s   r2,r2,r0
  37        and_s   r3,r3,r0
  38#endif /* LITTLE ENDIAN */
  39        cmp_s   r2,r3
  40        mov_s   r0,1
  41        j_s.d   [blink]
  42        bset.lo r0,r0,31
  43
  44        .balign 4
  45#ifdef __LITTLE_ENDIAN__
  46.Lfound0:
  47        xor     r0,r2,r3        ; mask for difference
  48        or      r0,r0,r4        ; or in zero indicator
  49        sub_s   r1,r0,1
  50        bic_s   r0,r0,r1        ; mask for least significant difference bit
  51        sub     r1,r5,r0
  52        xor     r0,r5,r1        ; mask for least significant difference byte
  53        and_s   r2,r2,r0
  54        and_s   r3,r3,r0
  55        sub.f   r0,r2,r3
  56        mov.hi  r0,1
  57        j_s.d   [blink]
  58        bset.lo r0,r0,31
  59#else /* BIG ENDIAN */
  60        /* The zero-detection above can mis-detect 0x01 bytes as zeroes
  61           because of carry-propagateion from a lower significant zero byte.
  62           We can compensate for this by checking that bit0 is zero.
  63           This compensation is not necessary in the step where we
  64           get a low estimate for r2, because in any affected bytes
  65           we already have 0x00 or 0x01, which will remain unchanged
  66           when bit 7 is cleared.  */
  67        .balign 4
  68.Lfound0:
  69        lsr     r0,r4,8
  70        lsr_s   r1,r2
  71        bic_s   r2,r2,r0        ; get low estimate for r2 and get ...
  72        bic_s   r0,r0,r1        ; <this is the adjusted mask for zeros>
  73        or_s    r3,r3,r0        ; ... high estimate r3 so that r2 > r3 will ...
  74        cmp_s   r3,r2           ; ... be independent of trailing garbage
  75        or_s    r2,r2,r0        ; likewise for r3 > r2
  76        bic_s   r3,r3,r0
  77        rlc     r0,0            ; r0 := r2 > r3 ? 1 : 0
  78        cmp_s   r2,r3
  79        j_s.d   [blink]
  80        bset.lo r0,r0,31
  81#endif /* ENDIAN */
  82
  83        .balign 4
  84.Lcharloop:
  85        ldb.ab  r2,[r0,1]
  86        ldb.ab  r3,[r1,1]
  87        nop_s
  88        breq    r2,0,.Lcmpend
  89        breq    r2,r3,.Lcharloop
  90.Lcmpend:
  91        j_s.d   [blink]
  92        sub     r0,r2,r3
  93END_CFI(strcmp)
  94