linux/arch/tile/lib/memset_32.c
<<
>>
Prefs
   1/*
   2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
   3 *
   4 *   This program is free software; you can redistribute it and/or
   5 *   modify it under the terms of the GNU General Public License
   6 *   as published by the Free Software Foundation, version 2.
   7 *
   8 *   This program is distributed in the hope that it will be useful, but
   9 *   WITHOUT ANY WARRANTY; without even the implied warranty of
  10 *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  11 *   NON INFRINGEMENT.  See the GNU General Public License for
  12 *   more details.
  13 */
  14
  15#include <linux/types.h>
  16#include <linux/string.h>
  17#include <linux/module.h>
  18#include <arch/chip.h>
  19
  20void *memset(void *s, int c, size_t n)
  21{
  22        uint32_t *out32;
  23        int n32;
  24        uint32_t v16, v32;
  25        uint8_t *out8 = s;
  26        int to_align32;
  27
  28        /* Experimentation shows that a trivial tight loop is a win up until
  29         * around a size of 20, where writing a word at a time starts to win.
  30         */
  31#define BYTE_CUTOFF 20
  32
  33#if BYTE_CUTOFF < 3
  34        /* This must be at least at least this big, or some code later
  35         * on doesn't work.
  36         */
  37#error "BYTE_CUTOFF is too small"
  38#endif
  39
  40        if (n < BYTE_CUTOFF) {
  41                /* Strangely, this turns out to be the tightest way to
  42                 * write this loop.
  43                 */
  44                if (n != 0) {
  45                        do {
  46                                /* Strangely, combining these into one line
  47                                 * performs worse.
  48                                 */
  49                                *out8 = c;
  50                                out8++;
  51                        } while (--n != 0);
  52                }
  53
  54                return s;
  55        }
  56
  57        /* Align 'out8'. We know n >= 3 so this won't write past the end. */
  58        while (((uintptr_t) out8 & 3) != 0) {
  59                *out8++ = c;
  60                --n;
  61        }
  62
  63        /* Align 'n'. */
  64        while (n & 3)
  65                out8[--n] = c;
  66
  67        out32 = (uint32_t *) out8;
  68        n32 = n >> 2;
  69
  70        /* Tile input byte out to 32 bits. */
  71        v16 = __insn_intlb(c, c);
  72        v32 = __insn_intlh(v16, v16);
  73
  74        /* This must be at least 8 or the following loop doesn't work. */
  75#define CACHE_LINE_SIZE_IN_WORDS (CHIP_L2_LINE_SIZE() / 4)
  76
  77        /* Determine how many words we need to emit before the 'out32'
  78         * pointer becomes aligned modulo the cache line size.
  79         */
  80        to_align32 =
  81                (-((uintptr_t)out32 >> 2)) & (CACHE_LINE_SIZE_IN_WORDS - 1);
  82
  83        /* Only bother aligning and using wh64 if there is at least
  84         * one full cache line to process.  This check also prevents
  85         * overrunning the end of the buffer with alignment words.
  86         */
  87        if (to_align32 <= n32 - CACHE_LINE_SIZE_IN_WORDS) {
  88                int lines_left;
  89
  90                /* Align out32 mod the cache line size so we can use wh64. */
  91                n32 -= to_align32;
  92                for (; to_align32 != 0; to_align32--) {
  93                        *out32 = v32;
  94                        out32++;
  95                }
  96
  97                /* Use unsigned divide to turn this into a right shift. */
  98                lines_left = (unsigned)n32 / CACHE_LINE_SIZE_IN_WORDS;
  99
 100                do {
 101                        /* Only wh64 a few lines at a time, so we don't
 102                         * exceed the maximum number of victim lines.
 103                         */
 104                        int x = ((lines_left < CHIP_MAX_OUTSTANDING_VICTIMS())
 105                                  ? lines_left
 106                                  : CHIP_MAX_OUTSTANDING_VICTIMS());
 107                        uint32_t *wh = out32;
 108                        int i = x;
 109                        int j;
 110
 111                        lines_left -= x;
 112
 113                        do {
 114                                __insn_wh64(wh);
 115                                wh += CACHE_LINE_SIZE_IN_WORDS;
 116                        } while (--i);
 117
 118                        for (j = x * (CACHE_LINE_SIZE_IN_WORDS / 4);
 119                             j != 0; j--) {
 120                                *out32++ = v32;
 121                                *out32++ = v32;
 122                                *out32++ = v32;
 123                                *out32++ = v32;
 124                        }
 125                } while (lines_left != 0);
 126
 127                /* We processed all full lines above, so only this many
 128                 * words remain to be processed.
 129                 */
 130                n32 &= CACHE_LINE_SIZE_IN_WORDS - 1;
 131        }
 132
 133        /* Now handle any leftover values. */
 134        if (n32 != 0) {
 135                do {
 136                        *out32 = v32;
 137                        out32++;
 138                } while (--n32 != 0);
 139        }
 140
 141        return s;
 142}
 143EXPORT_SYMBOL(memset);
 144