1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20#include <linux/types.h>
21
22
23
24
25#define op_t unsigned long int
26#define OPSIZ (sizeof(op_t))
27
28
29#define reg_char char
30
31#define MERGE(w0, sh_1, w1, sh_2) (((w0) >> (sh_1)) | ((w1) << (sh_2)))
32
33
34
35#define BYTE_COPY_FWD(dst_bp, src_bp, nbytes) \
36do { \
37 size_t __nbytes = (nbytes); \
38 while (__nbytes > 0) { \
39 unsigned char __x = ((unsigned char *) src_bp)[0]; \
40 src_bp += 1; \
41 __nbytes -= 1; \
42 ((unsigned char *) dst_bp)[0] = __x; \
43 dst_bp += 1; \
44 } \
45} while (0)
46
47
48
49
50
51
52
53#define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes) \
54do { \
55 if (src_bp % OPSIZ == 0) \
56 _wordcopy_fwd_aligned(dst_bp, src_bp, (nbytes) / OPSIZ);\
57 else \
58 _wordcopy_fwd_dest_aligned(dst_bp, src_bp, (nbytes) / OPSIZ);\
59 src_bp += (nbytes) & -OPSIZ; \
60 dst_bp += (nbytes) & -OPSIZ; \
61 (nbytes_left) = (nbytes) % OPSIZ; \
62} while (0)
63
64
65
66#define OP_T_THRES 16
67
68
69
70
71
72static void _wordcopy_fwd_aligned(long int dstp, long int srcp, size_t len)
73{
74 while (len > 7) {
75 register op_t a0, a1, a2, a3, a4, a5, a6, a7;
76
77 a0 = ((op_t *) srcp)[0];
78 a1 = ((op_t *) srcp)[1];
79 a2 = ((op_t *) srcp)[2];
80 a3 = ((op_t *) srcp)[3];
81 a4 = ((op_t *) srcp)[4];
82 a5 = ((op_t *) srcp)[5];
83 a6 = ((op_t *) srcp)[6];
84 a7 = ((op_t *) srcp)[7];
85 ((op_t *) dstp)[0] = a0;
86 ((op_t *) dstp)[1] = a1;
87 ((op_t *) dstp)[2] = a2;
88 ((op_t *) dstp)[3] = a3;
89 ((op_t *) dstp)[4] = a4;
90 ((op_t *) dstp)[5] = a5;
91 ((op_t *) dstp)[6] = a6;
92 ((op_t *) dstp)[7] = a7;
93
94 srcp += 8 * OPSIZ;
95 dstp += 8 * OPSIZ;
96 len -= 8;
97 }
98 while (len > 0) {
99 *(op_t *)dstp = *(op_t *)srcp;
100
101 srcp += OPSIZ;
102 dstp += OPSIZ;
103 len -= 1;
104 }
105}
106
107
108
109
110
111
112static void _wordcopy_fwd_dest_aligned(long int dstp, long int srcp,
113 size_t len)
114{
115 op_t ap;
116 int sh_1, sh_2;
117
118
119
120
121 sh_1 = 8 * (srcp % OPSIZ);
122 sh_2 = 8 * OPSIZ - sh_1;
123
124
125
126 srcp &= -OPSIZ;
127 ap = ((op_t *) srcp)[0];
128 srcp += OPSIZ;
129
130 while (len > 3) {
131 op_t a0, a1, a2, a3;
132
133 a0 = ((op_t *) srcp)[0];
134 a1 = ((op_t *) srcp)[1];
135 a2 = ((op_t *) srcp)[2];
136 a3 = ((op_t *) srcp)[3];
137 ((op_t *) dstp)[0] = MERGE(ap, sh_1, a0, sh_2);
138 ((op_t *) dstp)[1] = MERGE(a0, sh_1, a1, sh_2);
139 ((op_t *) dstp)[2] = MERGE(a1, sh_1, a2, sh_2);
140 ((op_t *) dstp)[3] = MERGE(a2, sh_1, a3, sh_2);
141
142 ap = a3;
143 srcp += 4 * OPSIZ;
144 dstp += 4 * OPSIZ;
145 len -= 4;
146 }
147 while (len > 0) {
148 register op_t a0;
149
150 a0 = ((op_t *) srcp)[0];
151 ((op_t *) dstp)[0] = MERGE(ap, sh_1, a0, sh_2);
152
153 ap = a0;
154 srcp += OPSIZ;
155 dstp += OPSIZ;
156 len -= 1;
157 }
158}
159
160void *memcpy(void *dstpp, const void *srcpp, size_t len)
161{
162 unsigned long int dstp = (long int) dstpp;
163 unsigned long int srcp = (long int) srcpp;
164
165
166
167
168 if (len >= OP_T_THRES) {
169
170 len -= (-dstp) % OPSIZ;
171 BYTE_COPY_FWD(dstp, srcp, (-dstp) % OPSIZ);
172
173
174
175
176
177
178
179
180
181
182
183 WORD_COPY_FWD(dstp, srcp, len, len);
184
185
186 }
187
188
189 BYTE_COPY_FWD(dstp, srcp, len);
190
191 return dstpp;
192}
193
194void *memcpyb(void *dstpp, const void *srcpp, unsigned len)
195{
196 unsigned long int dstp = (long int) dstpp;
197 unsigned long int srcp = (long int) srcpp;
198
199 BYTE_COPY_FWD(dstp, srcp, len);
200
201 return dstpp;
202}
203