1
2
3
4
5
6
7
8
9
10
11
12
13
14
15#include <linux/types.h>
16#include <linux/string.h>
17#include <linux/module.h>
18
19
20
21#define word_t uint64_t
22
23#if CHIP_L2_LINE_SIZE() != 64 && CHIP_L2_LINE_SIZE() != 128
24#error "Assumes 64 or 128 byte line size"
25#endif
26
27
28#define PREFETCH_LINES_AHEAD 3
29
30
31
32
33
34#define ST(p, v) (*(p) = (v))
35#define LD(p) (*(p))
36
37#ifndef USERCOPY_FUNC
38#define ST1 ST
39#define ST2 ST
40#define ST4 ST
41#define ST8 ST
42#define LD1 LD
43#define LD2 LD
44#define LD4 LD
45#define LD8 LD
46#define RETVAL dstv
47void *memcpy(void *__restrict dstv, const void *__restrict srcv, size_t n)
48#else
49
50
51
52
53#define RETVAL 0
54int USERCOPY_FUNC(void *__restrict dstv, const void *__restrict srcv, size_t n)
55#endif
56{
57 char *__restrict dst1 = (char *)dstv;
58 const char *__restrict src1 = (const char *)srcv;
59 const char *__restrict src1_end;
60 const char *__restrict prefetch;
61 word_t *__restrict dst8;
62 word_t final;
63 long i;
64
65 if (n < 16) {
66 for (; n; n--)
67 ST1(dst1++, LD1(src1++));
68 return RETVAL;
69 }
70
71
72
73
74
75 src1_end = src1 + n - 1;
76
77
78 prefetch = src1;
79 for (i = 0; i < PREFETCH_LINES_AHEAD; i++) {
80 __insn_prefetch(prefetch);
81 prefetch += CHIP_L2_LINE_SIZE();
82 prefetch = (prefetch > src1_end) ? prefetch : src1;
83 }
84
85
86 for (; (uintptr_t)dst1 & (sizeof(word_t) - 1); n--)
87 ST1(dst1++, LD1(src1++));
88
89
90 dst8 = (word_t *)dst1;
91
92 if (__builtin_expect((uintptr_t)src1 & (sizeof(word_t) - 1), 0)) {
93
94
95
96
97
98
99
100
101 const word_t *__restrict src8 =
102 (const word_t *)((uintptr_t)src1 & -sizeof(word_t));
103 word_t b;
104
105 word_t a = LD8(src8++);
106 for (; n >= sizeof(word_t); n -= sizeof(word_t)) {
107 b = LD8(src8++);
108 a = __insn_dblalign(a, b, src1);
109 ST8(dst8++, a);
110 a = b;
111 }
112
113 if (n == 0)
114 return RETVAL;
115
116 b = ((const char *)src8 <= src1_end) ? *src8 : 0;
117
118
119
120
121
122 final = __insn_dblalign(a, b, src1);
123 } else {
124
125
126 const word_t* __restrict src8 = (const word_t *)src1;
127
128
129 if (n >= CHIP_L2_LINE_SIZE()) {
130
131 for (; (uintptr_t)dst8 & (CHIP_L2_LINE_SIZE() - 1);
132 n -= sizeof(word_t))
133 ST8(dst8++, LD8(src8++));
134
135 for (; n >= CHIP_L2_LINE_SIZE(); ) {
136 __insn_wh64(dst8);
137
138
139
140
141
142 __insn_prefetch(prefetch);
143 prefetch += CHIP_L2_LINE_SIZE();
144 prefetch = (prefetch > src1_end) ? prefetch :
145 (const char *)src8;
146
147
148
149
150
151
152#define COPY_WORD(offset) ({ ST8(dst8+offset, LD8(src8+offset)); n -= 8; })
153 COPY_WORD(0);
154 COPY_WORD(1);
155 COPY_WORD(2);
156 COPY_WORD(3);
157 COPY_WORD(4);
158 COPY_WORD(5);
159 COPY_WORD(6);
160 COPY_WORD(7);
161#if CHIP_L2_LINE_SIZE() == 128
162 COPY_WORD(8);
163 COPY_WORD(9);
164 COPY_WORD(10);
165 COPY_WORD(11);
166 COPY_WORD(12);
167 COPY_WORD(13);
168 COPY_WORD(14);
169 COPY_WORD(15);
170#elif CHIP_L2_LINE_SIZE() != 64
171# error Fix code that assumes particular L2 cache line sizes
172#endif
173
174 dst8 += CHIP_L2_LINE_SIZE() / sizeof(word_t);
175 src8 += CHIP_L2_LINE_SIZE() / sizeof(word_t);
176 }
177 }
178
179 for (; n >= sizeof(word_t); n -= sizeof(word_t))
180 ST8(dst8++, LD8(src8++));
181
182 if (__builtin_expect(n == 0, 1))
183 return RETVAL;
184
185 final = LD8(src8);
186 }
187
188
189 dst1 = (char *)dst8;
190#ifndef __BIG_ENDIAN__
191 if (n & 4) {
192 ST4((uint32_t *)dst1, final);
193 dst1 += 4;
194 final >>= 32;
195 n &= 3;
196 }
197 if (n & 2) {
198 ST2((uint16_t *)dst1, final);
199 dst1 += 2;
200 final >>= 16;
201 n &= 1;
202 }
203 if (n)
204 ST1((uint8_t *)dst1, final);
205#else
206 if (n & 4) {
207 ST4((uint32_t *)dst1, final >> 32);
208 dst1 += 4;
209 }
210 else
211 {
212 final >>= 32;
213 }
214 if (n & 2) {
215 ST2((uint16_t *)dst1, final >> 16);
216 dst1 += 2;
217 }
218 else
219 {
220 final >>= 16;
221 }
222 if (n & 1)
223 ST1((uint8_t *)dst1, final >> 8);
224#endif
225
226 return RETVAL;
227}
228
229#ifdef USERCOPY_FUNC
230#undef ST1
231#undef ST2
232#undef ST4
233#undef ST8
234#undef LD1
235#undef LD2
236#undef LD4
237#undef LD8
238#undef USERCOPY_FUNC
239#endif
240