uboot/arch/x86/lib/string.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0+
   2/*
   3 * Copyright (C) 1991,1992,1993,1997,1998,2003, 2005 Free Software Foundation, Inc.
   4 * This file is part of the GNU C Library.
   5 * Copyright (c) 2011 The Chromium OS Authors.
   6 */
   7
   8/* From glibc-2.14, sysdeps/i386/memset.c */
   9
  10#include <linux/types.h>
  11#include <linux/compiler.h>
  12#include <asm/string.h>
  13
  14typedef uint32_t op_t;
  15
  16void *memset(void *dstpp, int c, size_t len)
  17{
  18        int d0;
  19        unsigned long int dstp = (unsigned long int) dstpp;
  20
  21        /* This explicit register allocation improves code very much indeed. */
  22        register op_t x asm("ax");
  23
  24        x = (unsigned char) c;
  25
  26        /* Clear the direction flag, so filling will move forward.  */
  27        asm volatile("cld");
  28
  29        /* This threshold value is optimal.  */
  30        if (len >= 12) {
  31                /* Fill X with four copies of the char we want to fill with. */
  32                x |= (x << 8);
  33                x |= (x << 16);
  34
  35                /* Adjust LEN for the bytes handled in the first loop.  */
  36                len -= (-dstp) % sizeof(op_t);
  37
  38                /*
  39                 * There are at least some bytes to set. No need to test for
  40                 * LEN == 0 in this alignment loop.
  41                 */
  42
  43                /* Fill bytes until DSTP is aligned on a longword boundary. */
  44                asm volatile(
  45                        "rep\n"
  46                        "stosb" /* %0, %2, %3 */ :
  47                        "=D" (dstp), "=c" (d0) :
  48                        "0" (dstp), "1" ((-dstp) % sizeof(op_t)), "a" (x) :
  49                        "memory");
  50
  51                /* Fill longwords.  */
  52                asm volatile(
  53                        "rep\n"
  54                        "stosl" /* %0, %2, %3 */ :
  55                        "=D" (dstp), "=c" (d0) :
  56                        "0" (dstp), "1" (len / sizeof(op_t)), "a" (x) :
  57                        "memory");
  58                len %= sizeof(op_t);
  59        }
  60
  61        /* Write the last few bytes. */
  62        asm volatile(
  63                "rep\n"
  64                "stosb" /* %0, %2, %3 */ :
  65                "=D" (dstp), "=c" (d0) :
  66                "0" (dstp), "1" (len), "a" (x) :
  67                "memory");
  68
  69        return dstpp;
  70}
  71
  72#define OP_T_THRES      8
  73#define OPSIZ   (sizeof(op_t))
  74
  75#define BYTE_COPY_FWD(dst_bp, src_bp, nbytes)                             \
  76do {                                                                      \
  77        int __d0;                                                         \
  78        asm volatile(                                                     \
  79                /* Clear the direction flag, so copying goes forward.  */ \
  80                "cld\n"                                                   \
  81                /* Copy bytes.  */                                        \
  82                "rep\n"                                                   \
  83                "movsb" :                                                 \
  84                "=D" (dst_bp), "=S" (src_bp), "=c" (__d0) :               \
  85                "0" (dst_bp), "1" (src_bp), "2" (nbytes) :                \
  86                "memory");                                                \
  87} while (0)
  88
  89#define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes)                \
  90do {                                                                      \
  91        int __d0;                                                         \
  92        asm volatile(                                                     \
  93                /* Clear the direction flag, so copying goes forward.  */ \
  94                "cld\n"                                                   \
  95                /* Copy longwords.  */                                    \
  96                "rep\n"                                                   \
  97                "movsl" :                                                 \
  98                "=D" (dst_bp), "=S" (src_bp), "=c" (__d0) :               \
  99                "0" (dst_bp), "1" (src_bp), "2" ((nbytes) / 4) :          \
 100                "memory");                                                \
 101        (nbytes_left) = (nbytes) % 4;                                     \
 102} while (0)
 103
 104void *memcpy(void *dstpp, const void *srcpp, size_t len)
 105{
 106        unsigned long int dstp = (long int)dstpp;
 107        unsigned long int srcp = (long int)srcpp;
 108
 109        /* Copy from the beginning to the end.  */
 110
 111        /* If there not too few bytes to copy, use word copy.  */
 112        if (len >= OP_T_THRES) {
 113                /* Copy just a few bytes to make DSTP aligned.  */
 114                len -= (-dstp) % OPSIZ;
 115                BYTE_COPY_FWD(dstp, srcp, (-dstp) % OPSIZ);
 116
 117                /* Copy from SRCP to DSTP taking advantage of the known
 118                 * alignment of DSTP.  Number of bytes remaining is put
 119                 * in the third argument, i.e. in LEN.  This number may
 120                 * vary from machine to machine.
 121                 */
 122                WORD_COPY_FWD(dstp, srcp, len, len);
 123
 124                /* Fall out and copy the tail.  */
 125        }
 126
 127        /* There are just a few bytes to copy. Use byte memory operations. */
 128        BYTE_COPY_FWD(dstp, srcp, len);
 129
 130        return dstpp;
 131}
 132
 133void *memmove(void *dest, const void *src, size_t n)
 134{
 135        int d0, d1, d2, d3, d4, d5;
 136        char *ret = dest;
 137
 138        __asm__ __volatile__(
 139                /* Handle more 16 bytes in loop */
 140                "cmp $0x10, %0\n\t"
 141                "jb     1f\n\t"
 142
 143                /* Decide forward/backward copy mode */
 144                "cmp %2, %1\n\t"
 145                "jb     2f\n\t"
 146
 147                /*
 148                 * movs instruction have many startup latency
 149                 * so we handle small size by general register.
 150                 */
 151                "cmp  $680, %0\n\t"
 152                "jb 3f\n\t"
 153                /* movs instruction is only good for aligned case */
 154                "mov %1, %3\n\t"
 155                "xor %2, %3\n\t"
 156                "and $0xff, %3\n\t"
 157                "jz 4f\n\t"
 158                "3:\n\t"
 159                "sub $0x10, %0\n\t"
 160
 161                /* We gobble 16 bytes forward in each loop */
 162                "3:\n\t"
 163                "sub $0x10, %0\n\t"
 164                "mov 0*4(%1), %3\n\t"
 165                "mov 1*4(%1), %4\n\t"
 166                "mov  %3, 0*4(%2)\n\t"
 167                "mov  %4, 1*4(%2)\n\t"
 168                "mov 2*4(%1), %3\n\t"
 169                "mov 3*4(%1), %4\n\t"
 170                "mov  %3, 2*4(%2)\n\t"
 171                "mov  %4, 3*4(%2)\n\t"
 172                "lea  0x10(%1), %1\n\t"
 173                "lea  0x10(%2), %2\n\t"
 174                "jae 3b\n\t"
 175                "add $0x10, %0\n\t"
 176                "jmp 1f\n\t"
 177
 178                /* Handle data forward by movs */
 179                ".p2align 4\n\t"
 180                "4:\n\t"
 181                "mov -4(%1, %0), %3\n\t"
 182                "lea -4(%2, %0), %4\n\t"
 183                "shr $2, %0\n\t"
 184                "rep movsl\n\t"
 185                "mov %3, (%4)\n\t"
 186                "jmp 11f\n\t"
 187                /* Handle data backward by movs */
 188                ".p2align 4\n\t"
 189                "6:\n\t"
 190                "mov (%1), %3\n\t"
 191                "mov %2, %4\n\t"
 192                "lea -4(%1, %0), %1\n\t"
 193                "lea -4(%2, %0), %2\n\t"
 194                "shr $2, %0\n\t"
 195                "std\n\t"
 196                "rep movsl\n\t"
 197                "mov %3,(%4)\n\t"
 198                "cld\n\t"
 199                "jmp 11f\n\t"
 200
 201                /* Start to prepare for backward copy */
 202                ".p2align 4\n\t"
 203                "2:\n\t"
 204                "cmp  $680, %0\n\t"
 205                "jb 5f\n\t"
 206                "mov %1, %3\n\t"
 207                "xor %2, %3\n\t"
 208                "and $0xff, %3\n\t"
 209                "jz 6b\n\t"
 210
 211                /* Calculate copy position to tail */
 212                "5:\n\t"
 213                "add %0, %1\n\t"
 214                "add %0, %2\n\t"
 215                "sub $0x10, %0\n\t"
 216
 217                /* We gobble 16 bytes backward in each loop */
 218                "7:\n\t"
 219                "sub $0x10, %0\n\t"
 220
 221                "mov -1*4(%1), %3\n\t"
 222                "mov -2*4(%1), %4\n\t"
 223                "mov  %3, -1*4(%2)\n\t"
 224                "mov  %4, -2*4(%2)\n\t"
 225                "mov -3*4(%1), %3\n\t"
 226                "mov -4*4(%1), %4\n\t"
 227                "mov  %3, -3*4(%2)\n\t"
 228                "mov  %4, -4*4(%2)\n\t"
 229                "lea  -0x10(%1), %1\n\t"
 230                "lea  -0x10(%2), %2\n\t"
 231                "jae 7b\n\t"
 232                /* Calculate copy position to head */
 233                "add $0x10, %0\n\t"
 234                "sub %0, %1\n\t"
 235                "sub %0, %2\n\t"
 236
 237                /* Move data from 8 bytes to 15 bytes */
 238                ".p2align 4\n\t"
 239                "1:\n\t"
 240                "cmp $8, %0\n\t"
 241                "jb 8f\n\t"
 242                "mov 0*4(%1), %3\n\t"
 243                "mov 1*4(%1), %4\n\t"
 244                "mov -2*4(%1, %0), %5\n\t"
 245                "mov -1*4(%1, %0), %1\n\t"
 246
 247                "mov  %3, 0*4(%2)\n\t"
 248                "mov  %4, 1*4(%2)\n\t"
 249                "mov  %5, -2*4(%2, %0)\n\t"
 250                "mov  %1, -1*4(%2, %0)\n\t"
 251                "jmp 11f\n\t"
 252
 253                /* Move data from 4 bytes to 7 bytes */
 254                ".p2align 4\n\t"
 255                "8:\n\t"
 256                "cmp $4, %0\n\t"
 257                "jb 9f\n\t"
 258                "mov 0*4(%1), %3\n\t"
 259                "mov -1*4(%1, %0), %4\n\t"
 260                "mov  %3, 0*4(%2)\n\t"
 261                "mov  %4, -1*4(%2, %0)\n\t"
 262                "jmp 11f\n\t"
 263
 264                /* Move data from 2 bytes to 3 bytes */
 265                ".p2align 4\n\t"
 266                "9:\n\t"
 267                "cmp $2, %0\n\t"
 268                "jb 10f\n\t"
 269                "movw 0*2(%1), %%dx\n\t"
 270                "movw -1*2(%1, %0), %%bx\n\t"
 271                "movw %%dx, 0*2(%2)\n\t"
 272                "movw %%bx, -1*2(%2, %0)\n\t"
 273                "jmp 11f\n\t"
 274
 275                /* Move data for 1 byte */
 276                ".p2align 4\n\t"
 277                "10:\n\t"
 278                "cmp $1, %0\n\t"
 279                "jb 11f\n\t"
 280                "movb (%1), %%cl\n\t"
 281                "movb %%cl, (%2)\n\t"
 282                ".p2align 4\n\t"
 283                "11:"
 284                : "=&c" (d0), "=&S" (d1), "=&D" (d2),
 285                  "=r" (d3), "=r" (d4), "=r"(d5)
 286                : "0" (n),
 287                 "1" (src),
 288                 "2" (dest)
 289                : "memory");
 290
 291        return ret;
 292}
 293