1
2
3
4
5
6
7
8
9#include <linux/linkage.h>
10#include <asm/assembler.h>
11
12
13
14
15
16
17#define L(label) .L
18
19#define REP8_01 0x0101010101010101
20#define REP8_7f 0x7f7f7f7f7f7f7f7f
21#define REP8_80 0x8080808080808080
22
23
24#define src1 x0
25#define src2 x1
26#define limit x2
27#define result x0
28
29
30#define data1 x3
31#define data1w w3
32#define data2 x4
33#define data2w w4
34#define has_nul x5
35#define diff x6
36#define syndrome x7
37#define tmp1 x8
38#define tmp2 x9
39#define tmp3 x10
40#define zeroones x11
41#define pos x12
42#define limit_wd x13
43#define mask x14
44#define endloop x15
45#define count mask
46
47SYM_FUNC_START_WEAK_PI(strncmp)
48 cbz limit, L(ret0)
49 eor tmp1, src1, src2
50 mov zeroones,
51 tst tmp1,
52 and count, src1,
53 b.ne L(misaligned8)
54 cbnz count, L(mutual_align)
55
56 sub limit_wd, limit,
57 lsr limit_wd, limit_wd,
58
59
60
61
62 .p2align 4
63L(loop_aligned):
64 ldr data1, [src1],
65 ldr data2, [src2],
66L(start_realigned):
67 subs limit_wd, limit_wd,
68 sub tmp1, data1, zeroones
69 orr tmp2, data1,
70 eor diff, data1, data2
71 csinv endloop, diff, xzr, pl
72 bics has_nul, tmp1, tmp2
73 ccmp endloop,
74 b.eq L(loop_aligned)
75
76
77
78 tbz limit_wd,
79
80
81 ands limit, limit,
82 b.eq L(not_limit)
83
84 lsl limit, limit,
85 mov mask,
86#ifdef __AARCH64EB__
87 lsr mask, mask, limit
88#else
89 lsl mask, mask, limit
90#endif
91 bic data1, data1, mask
92 bic data2, data2, mask
93
94
95 orr has_nul, has_nul, mask
96
97L(not_limit):
98 orr syndrome, diff, has_nul
99
100#ifndef __AARCH64EB__
101 rev syndrome, syndrome
102 rev data1, data1
103
104
105
106
107 clz pos, syndrome
108 rev data2, data2
109 lsl data1, data1, pos
110 lsl data2, data2, pos
111
112
113 lsr data1, data1,
114 sub result, data1, data2, lsr
115 ret
116#else
117
118
119
120
121
122
123 cbnz has_nul, 1f
124 cmp data1, data2
125 cset result, ne
126 cneg result, result, lo
127 ret
1281:
129
130 rev tmp3, data1
131 sub tmp1, tmp3, zeroones
132 orr tmp2, tmp3,
133 bic has_nul, tmp1, tmp2
134 rev has_nul, has_nul
135 orr syndrome, diff, has_nul
136 clz pos, syndrome
137
138
139
140
141 lsl data1, data1, pos
142 lsl data2, data2, pos
143
144
145 lsr data1, data1,
146 sub result, data1, data2, lsr
147 ret
148#endif
149
150L(mutual_align):
151
152
153
154
155
156 bic src1, src1,
157 bic src2, src2,
158 ldr data1, [src1],
159 neg tmp3, count, lsl
160 ldr data2, [src2],
161 mov tmp2,
162 sub limit_wd, limit,
163#ifdef __AARCH64EB__
164
165 lsl tmp2, tmp2, tmp3
166#else
167
168 lsr tmp2, tmp2, tmp3
169#endif
170 and tmp3, limit_wd,
171 lsr limit_wd, limit_wd,
172
173 add limit, limit, count
174 add tmp3, tmp3, count
175 orr data1, data1, tmp2
176 orr data2, data2, tmp2
177 add limit_wd, limit_wd, tmp3, lsr
178 b L(start_realigned)
179
180 .p2align 4
181
182L(misaligned8):
183 cmp limit,
184 b.hs L(try_misaligned_words)
185
186L(byte_loop):
187
188 ldrb data1w, [src1],
189 ldrb data2w, [src2],
190 subs limit, limit,
191 ccmp data1w,
192 ccmp data1w, data2w,
193 b.eq L(byte_loop)
194L(done):
195 sub result, data1, data2
196 ret
197
198
199L(try_misaligned_words):
200 lsr limit_wd, limit,
201 cbz count, L(do_misaligned)
202
203 neg count, count
204 and count, count,
205 sub limit, limit, count
206 lsr limit_wd, limit,
207
208L(page_end_loop):
209 ldrb data1w, [src1],
210 ldrb data2w, [src2],
211 cmp data1w,
212 ccmp data1w, data2w,
213 b.ne L(done)
214 subs count, count,
215 b.hi L(page_end_loop)
216
217L(do_misaligned):
218
219
220
221 mov count,
222 subs limit_wd, limit_wd,
223 b.lo L(done_loop)
224L(loop_misaligned):
225 and tmp2, src2,
226 eor tmp2, tmp2,
227 cbz tmp2, L(page_end_loop)
228
229 ldr data1, [src1],
230 ldr data2, [src2],
231 sub tmp1, data1, zeroones
232 orr tmp2, data1,
233 eor diff, data1, data2
234 bics has_nul, tmp1, tmp2
235 ccmp diff,
236 b.ne L(not_limit)
237 subs limit_wd, limit_wd,
238 b.pl L(loop_misaligned)
239
240L(done_loop):
241
242 and limit, limit,
243 cbz limit, L(not_limit)
244
245 sub src1, src1, 8
246 sub src2, src2, 8
247 ldr data1, [src1, limit]
248 ldr data2, [src2, limit]
249 sub tmp1, data1, zeroones
250 orr tmp2, data1,
251 eor diff, data1, data2
252 bics has_nul, tmp1, tmp2
253 ccmp diff,
254 b.ne L(not_limit)
255
256L(ret0):
257 mov result,
258 ret
259
260SYM_FUNC_END_PI(strncmp)
261EXPORT_SYMBOL_NOHWKASAN(strncmp)
262