1
2
3
4
5
6
7
8
9#include <linux/linkage.h>
10#include <asm/assembler.h>
11
12
13
14
15
16
17
18#define L(label) .L
19
20#define REP8_01 0x0101010101010101
21#define REP8_7f 0x7f7f7f7f7f7f7f7f
22
23
24#define src1 x0
25#define src2 x1
26#define limit x2
27#define result x0
28
29
30#define data1 x3
31#define data1w w3
32#define data2 x4
33#define data2w w4
34#define has_nul x5
35#define diff x6
36#define syndrome x7
37#define tmp1 x8
38#define tmp2 x9
39#define tmp3 x10
40#define zeroones x11
41#define pos x12
42#define mask x13
43#define endloop x14
44#define count mask
45#define offset pos
46#define neg_offset x15
47
48
49
50
51
52
53#ifdef __AARCH64EB__
54#define LS_FW lsl
55#define LS_BK lsr
56#else
57#define LS_FW lsr
58#define LS_BK lsl
59#endif
60
61SYM_FUNC_START(__pi_strncmp)
62 cbz limit, L(ret0)
63 eor tmp1, src1, src2
64 mov zeroones,
65 tst tmp1,
66 and count, src1,
67 b.ne L(misaligned8)
68 cbnz count, L(mutual_align)
69
70
71
72
73 .p2align 4
74L(loop_aligned):
75 ldr data1, [src1],
76 ldr data2, [src2],
77L(start_realigned):
78 subs limit, limit,
79 sub tmp1, data1, zeroones
80 orr tmp2, data1,
81 eor diff, data1, data2
82 csinv endloop, diff, xzr, hi
83 bics has_nul, tmp1, tmp2
84 ccmp endloop,
85 b.eq L(loop_aligned)
86
87
88L(full_check):
89#ifndef __AARCH64EB__
90 orr syndrome, diff, has_nul
91 add limit, limit, 8
92L(syndrome_check):
93
94
95 rev syndrome, syndrome
96 rev data1, data1
97 clz pos, syndrome
98 rev data2, data2
99 lsl data1, data1, pos
100 cmp limit, pos, lsr
101 lsl data2, data2, pos
102
103
104 lsr data1, data1,
105 sub result, data1, data2, lsr
106 csel result, result, xzr, hi
107 ret
108#else
109
110 tbz limit,
111 add tmp1, limit, 8
112 cbz limit, L(not_limit)
113
114 lsl limit, tmp1,
115 mov mask,
116 lsr mask, mask, limit
117 bic data1, data1, mask
118 bic data2, data2, mask
119
120
121 orr has_nul, has_nul, mask
122
123L(not_limit):
124
125
126
127
128
129
130 cbnz has_nul, 1f
131 cmp data1, data2
132 cset result, ne
133 cneg result, result, lo
134 ret
1351:
136
137 rev tmp3, data1
138 sub tmp1, tmp3, zeroones
139 orr tmp2, tmp3,
140 bic has_nul, tmp1, tmp2
141 rev has_nul, has_nul
142 orr syndrome, diff, has_nul
143 clz pos, syndrome
144
145
146
147
148L(end_quick):
149 lsl data1, data1, pos
150 lsl data2, data2, pos
151
152
153 lsr data1, data1,
154 sub result, data1, data2, lsr
155 ret
156#endif
157
158L(mutual_align):
159
160
161
162
163
164 bic src1, src1,
165 bic src2, src2,
166 ldr data1, [src1],
167 neg tmp3, count, lsl
168 ldr data2, [src2],
169 mov tmp2,
170 LS_FW tmp2, tmp2, tmp3
171
172 adds limit, limit, count
173 csinv limit, limit, xzr, lo
174 orr data1, data1, tmp2
175 orr data2, data2, tmp2
176 b L(start_realigned)
177
178 .p2align 4
179
180L(misaligned8):
181 cmp limit,
182 b.hs L(try_misaligned_words)
183
184L(byte_loop):
185
186 ldrb data1w, [src1],
187 ldrb data2w, [src2],
188 subs limit, limit,
189 ccmp data1w,
190 ccmp data1w, data2w,
191 b.eq L(byte_loop)
192L(done):
193 sub result, data1, data2
194 ret
195
196
197L(try_misaligned_words):
198 cbz count, L(src1_aligned)
199
200 neg count, count
201 and count, count,
202 sub limit, limit, count
203
204L(page_end_loop):
205 ldrb data1w, [src1],
206 ldrb data2w, [src2],
207 cmp data1w,
208 ccmp data1w, data2w,
209 b.ne L(done)
210 subs count, count,
211 b.hi L(page_end_loop)
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230L(src1_aligned):
231
232
233 lsl offset, src2,
234 bic src2, src2,
235 mov mask, -1
236 neg neg_offset, offset
237 ldr data1, [src1],
238 ldp tmp1, tmp2, [src2],
239 LS_BK mask, mask, neg_offset
240 and neg_offset, neg_offset,
241
242 tbnz offset, 6, L(misaligned_mid_loop)
243
244L(loop_misaligned):
245
246 LS_FW data2, tmp1, offset
247 LS_BK tmp1, tmp2, neg_offset
248 subs limit, limit,
249 orr data2, data2, tmp1
250 sub has_nul, data1, zeroones
251 eor diff, data1, data2
252 orr tmp3, data1,
253 csinv endloop, diff, xzr, hi
254 bic has_nul, has_nul, tmp3
255 orr tmp3, endloop, has_nul
256 cbnz tmp3, L(full_check)
257
258 ldr data1, [src1],
259L(misaligned_mid_loop):
260
261 LS_FW data2, tmp2, offset
262#ifdef __AARCH64EB__
263
264
265
266 rev tmp3, data1
267 #define data1_fixed tmp3
268#else
269 #define data1_fixed data1
270#endif
271 sub has_nul, data1_fixed, zeroones
272 orr tmp3, data1_fixed,
273 eor diff, data2, data1
274 bic has_nul, has_nul, tmp3
275#ifdef __AARCH64EB__
276 rev has_nul, has_nul
277#endif
278 cmp limit, neg_offset, lsr
279 orr syndrome, diff, has_nul
280 bic syndrome, syndrome, mask
281 csinv tmp3, syndrome, xzr, hi
282 cbnz tmp3, L(syndrome_check)
283
284
285 ldp tmp1, tmp2, [src2],
286 cmp limit,
287 LS_BK data2, tmp1, neg_offset
288 eor diff, data2, data1
289 orr syndrome, diff, has_nul
290 and syndrome, syndrome, mask
291 csinv tmp3, syndrome, xzr, hi
292 cbnz tmp3, L(syndrome_check)
293
294 ldr data1, [src1],
295 sub limit, limit,
296 b L(loop_misaligned)
297
298#ifdef __AARCH64EB__
299L(syndrome_check):
300 clz pos, syndrome
301 cmp pos, limit, lsl
302 b.lo L(end_quick)
303#endif
304
305L(ret0):
306 mov result,
307 ret
308SYM_FUNC_END(__pi_strncmp)
309SYM_FUNC_ALIAS_WEAK(strncmp, __pi_strncmp)
310EXPORT_SYMBOL_NOKASAN(strncmp)
311