uboot/arch/x86/lib/string.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 1991,1992,1993,1997,1998,2003, 2005 Free Software Foundation, Inc.
   3 * This file is part of the GNU C Library.
   4 * Copyright (c) 2011 The Chromium OS Authors.
   5 *
   6 * SPDX-License-Identifier:     GPL-2.0+
   7 */
   8
   9/* From glibc-2.14, sysdeps/i386/memset.c */
  10
  11#include <linux/types.h>
  12#include <linux/compiler.h>
  13#include <asm/string.h>
  14
  15typedef uint32_t op_t;
  16
  17void *memset(void *dstpp, int c, size_t len)
  18{
  19        int d0;
  20        unsigned long int dstp = (unsigned long int) dstpp;
  21
  22        /* This explicit register allocation improves code very much indeed. */
  23        register op_t x asm("ax");
  24
  25        x = (unsigned char) c;
  26
  27        /* Clear the direction flag, so filling will move forward.  */
  28        asm volatile("cld");
  29
  30        /* This threshold value is optimal.  */
  31        if (len >= 12) {
  32                /* Fill X with four copies of the char we want to fill with. */
  33                x |= (x << 8);
  34                x |= (x << 16);
  35
  36                /* Adjust LEN for the bytes handled in the first loop.  */
  37                len -= (-dstp) % sizeof(op_t);
  38
  39                /*
  40                 * There are at least some bytes to set. No need to test for
  41                 * LEN == 0 in this alignment loop.
  42                 */
  43
  44                /* Fill bytes until DSTP is aligned on a longword boundary. */
  45                asm volatile(
  46                        "rep\n"
  47                        "stosb" /* %0, %2, %3 */ :
  48                        "=D" (dstp), "=c" (d0) :
  49                        "0" (dstp), "1" ((-dstp) % sizeof(op_t)), "a" (x) :
  50                        "memory");
  51
  52                /* Fill longwords.  */
  53                asm volatile(
  54                        "rep\n"
  55                        "stosl" /* %0, %2, %3 */ :
  56                        "=D" (dstp), "=c" (d0) :
  57                        "0" (dstp), "1" (len / sizeof(op_t)), "a" (x) :
  58                        "memory");
  59                len %= sizeof(op_t);
  60        }
  61
  62        /* Write the last few bytes. */
  63        asm volatile(
  64                "rep\n"
  65                "stosb" /* %0, %2, %3 */ :
  66                "=D" (dstp), "=c" (d0) :
  67                "0" (dstp), "1" (len), "a" (x) :
  68                "memory");
  69
  70        return dstpp;
  71}
  72
  73#define OP_T_THRES      8
  74#define OPSIZ   (sizeof(op_t))
  75
  76#define BYTE_COPY_FWD(dst_bp, src_bp, nbytes)                             \
  77do {                                                                      \
  78        int __d0;                                                         \
  79        asm volatile(                                                     \
  80                /* Clear the direction flag, so copying goes forward.  */ \
  81                "cld\n"                                                   \
  82                /* Copy bytes.  */                                        \
  83                "rep\n"                                                   \
  84                "movsb" :                                                 \
  85                "=D" (dst_bp), "=S" (src_bp), "=c" (__d0) :               \
  86                "0" (dst_bp), "1" (src_bp), "2" (nbytes) :                \
  87                "memory");                                                \
  88} while (0)
  89
  90#define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes)                \
  91do {                                                                      \
  92        int __d0;                                                         \
  93        asm volatile(                                                     \
  94                /* Clear the direction flag, so copying goes forward.  */ \
  95                "cld\n"                                                   \
  96                /* Copy longwords.  */                                    \
  97                "rep\n"                                                   \
  98                "movsl" :                                                 \
  99                "=D" (dst_bp), "=S" (src_bp), "=c" (__d0) :               \
 100                "0" (dst_bp), "1" (src_bp), "2" ((nbytes) / 4) :          \
 101                "memory");                                                \
 102        (nbytes_left) = (nbytes) % 4;                                     \
 103} while (0)
 104
 105void *memcpy(void *dstpp, const void *srcpp, size_t len)
 106{
 107        unsigned long int dstp = (long int)dstpp;
 108        unsigned long int srcp = (long int)srcpp;
 109
 110        /* Copy from the beginning to the end.  */
 111
 112        /* If there not too few bytes to copy, use word copy.  */
 113        if (len >= OP_T_THRES) {
 114                /* Copy just a few bytes to make DSTP aligned.  */
 115                len -= (-dstp) % OPSIZ;
 116                BYTE_COPY_FWD(dstp, srcp, (-dstp) % OPSIZ);
 117
 118                /* Copy from SRCP to DSTP taking advantage of the known
 119                 * alignment of DSTP.  Number of bytes remaining is put
 120                 * in the third argument, i.e. in LEN.  This number may
 121                 * vary from machine to machine.
 122                 */
 123                WORD_COPY_FWD(dstp, srcp, len, len);
 124
 125                /* Fall out and copy the tail.  */
 126        }
 127
 128        /* There are just a few bytes to copy. Use byte memory operations. */
 129        BYTE_COPY_FWD(dstp, srcp, len);
 130
 131        return dstpp;
 132}
 133
 134void *memmove(void *dest, const void *src, size_t n)
 135{
 136        int d0, d1, d2, d3, d4, d5;
 137        char *ret = dest;
 138
 139        __asm__ __volatile__(
 140                /* Handle more 16 bytes in loop */
 141                "cmp $0x10, %0\n\t"
 142                "jb     1f\n\t"
 143
 144                /* Decide forward/backward copy mode */
 145                "cmp %2, %1\n\t"
 146                "jb     2f\n\t"
 147
 148                /*
 149                 * movs instruction have many startup latency
 150                 * so we handle small size by general register.
 151                 */
 152                "cmp  $680, %0\n\t"
 153                "jb 3f\n\t"
 154                /* movs instruction is only good for aligned case */
 155                "mov %1, %3\n\t"
 156                "xor %2, %3\n\t"
 157                "and $0xff, %3\n\t"
 158                "jz 4f\n\t"
 159                "3:\n\t"
 160                "sub $0x10, %0\n\t"
 161
 162                /* We gobble 16 bytes forward in each loop */
 163                "3:\n\t"
 164                "sub $0x10, %0\n\t"
 165                "mov 0*4(%1), %3\n\t"
 166                "mov 1*4(%1), %4\n\t"
 167                "mov  %3, 0*4(%2)\n\t"
 168                "mov  %4, 1*4(%2)\n\t"
 169                "mov 2*4(%1), %3\n\t"
 170                "mov 3*4(%1), %4\n\t"
 171                "mov  %3, 2*4(%2)\n\t"
 172                "mov  %4, 3*4(%2)\n\t"
 173                "lea  0x10(%1), %1\n\t"
 174                "lea  0x10(%2), %2\n\t"
 175                "jae 3b\n\t"
 176                "add $0x10, %0\n\t"
 177                "jmp 1f\n\t"
 178
 179                /* Handle data forward by movs */
 180                ".p2align 4\n\t"
 181                "4:\n\t"
 182                "mov -4(%1, %0), %3\n\t"
 183                "lea -4(%2, %0), %4\n\t"
 184                "shr $2, %0\n\t"
 185                "rep movsl\n\t"
 186                "mov %3, (%4)\n\t"
 187                "jmp 11f\n\t"
 188                /* Handle data backward by movs */
 189                ".p2align 4\n\t"
 190                "6:\n\t"
 191                "mov (%1), %3\n\t"
 192                "mov %2, %4\n\t"
 193                "lea -4(%1, %0), %1\n\t"
 194                "lea -4(%2, %0), %2\n\t"
 195                "shr $2, %0\n\t"
 196                "std\n\t"
 197                "rep movsl\n\t"
 198                "mov %3,(%4)\n\t"
 199                "cld\n\t"
 200                "jmp 11f\n\t"
 201
 202                /* Start to prepare for backward copy */
 203                ".p2align 4\n\t"
 204                "2:\n\t"
 205                "cmp  $680, %0\n\t"
 206                "jb 5f\n\t"
 207                "mov %1, %3\n\t"
 208                "xor %2, %3\n\t"
 209                "and $0xff, %3\n\t"
 210                "jz 6b\n\t"
 211
 212                /* Calculate copy position to tail */
 213                "5:\n\t"
 214                "add %0, %1\n\t"
 215                "add %0, %2\n\t"
 216                "sub $0x10, %0\n\t"
 217
 218                /* We gobble 16 bytes backward in each loop */
 219                "7:\n\t"
 220                "sub $0x10, %0\n\t"
 221
 222                "mov -1*4(%1), %3\n\t"
 223                "mov -2*4(%1), %4\n\t"
 224                "mov  %3, -1*4(%2)\n\t"
 225                "mov  %4, -2*4(%2)\n\t"
 226                "mov -3*4(%1), %3\n\t"
 227                "mov -4*4(%1), %4\n\t"
 228                "mov  %3, -3*4(%2)\n\t"
 229                "mov  %4, -4*4(%2)\n\t"
 230                "lea  -0x10(%1), %1\n\t"
 231                "lea  -0x10(%2), %2\n\t"
 232                "jae 7b\n\t"
 233                /* Calculate copy position to head */
 234                "add $0x10, %0\n\t"
 235                "sub %0, %1\n\t"
 236                "sub %0, %2\n\t"
 237
 238                /* Move data from 8 bytes to 15 bytes */
 239                ".p2align 4\n\t"
 240                "1:\n\t"
 241                "cmp $8, %0\n\t"
 242                "jb 8f\n\t"
 243                "mov 0*4(%1), %3\n\t"
 244                "mov 1*4(%1), %4\n\t"
 245                "mov -2*4(%1, %0), %5\n\t"
 246                "mov -1*4(%1, %0), %1\n\t"
 247
 248                "mov  %3, 0*4(%2)\n\t"
 249                "mov  %4, 1*4(%2)\n\t"
 250                "mov  %5, -2*4(%2, %0)\n\t"
 251                "mov  %1, -1*4(%2, %0)\n\t"
 252                "jmp 11f\n\t"
 253
 254                /* Move data from 4 bytes to 7 bytes */
 255                ".p2align 4\n\t"
 256                "8:\n\t"
 257                "cmp $4, %0\n\t"
 258                "jb 9f\n\t"
 259                "mov 0*4(%1), %3\n\t"
 260                "mov -1*4(%1, %0), %4\n\t"
 261                "mov  %3, 0*4(%2)\n\t"
 262                "mov  %4, -1*4(%2, %0)\n\t"
 263                "jmp 11f\n\t"
 264
 265                /* Move data from 2 bytes to 3 bytes */
 266                ".p2align 4\n\t"
 267                "9:\n\t"
 268                "cmp $2, %0\n\t"
 269                "jb 10f\n\t"
 270                "movw 0*2(%1), %%dx\n\t"
 271                "movw -1*2(%1, %0), %%bx\n\t"
 272                "movw %%dx, 0*2(%2)\n\t"
 273                "movw %%bx, -1*2(%2, %0)\n\t"
 274                "jmp 11f\n\t"
 275
 276                /* Move data for 1 byte */
 277                ".p2align 4\n\t"
 278                "10:\n\t"
 279                "cmp $1, %0\n\t"
 280                "jb 11f\n\t"
 281                "movb (%1), %%cl\n\t"
 282                "movb %%cl, (%2)\n\t"
 283                ".p2align 4\n\t"
 284                "11:"
 285                : "=&c" (d0), "=&S" (d1), "=&D" (d2),
 286                  "=r" (d3), "=r" (d4), "=r"(d5)
 287                : "0" (n),
 288                 "1" (src),
 289                 "2" (dest)
 290                : "memory");
 291
 292        return ret;
 293}
 294