1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29#include <linux/module.h>
30#include <linux/types.h>
31#include <linux/string.h>
32#include <linux/ctype.h>
33#include <linux/textsearch.h>
34#include <linux/textsearch_fsm.h>
35
36struct ts_fsm
37{
38 unsigned int ntokens;
39 struct ts_fsm_token tokens[0];
40};
41
42
43#define _A 0x100
44#define _W 0x200
45
46
47static const u16 token_map[TS_FSM_TYPE_MAX+1] = {
48 [TS_FSM_SPECIFIC] = 0,
49 [TS_FSM_WILDCARD] = _W,
50 [TS_FSM_CNTRL] = _C,
51 [TS_FSM_LOWER] = _L,
52 [TS_FSM_UPPER] = _U,
53 [TS_FSM_PUNCT] = _P,
54 [TS_FSM_SPACE] = _S,
55 [TS_FSM_DIGIT] = _D,
56 [TS_FSM_XDIGIT] = _D | _X,
57 [TS_FSM_ALPHA] = _U | _L,
58 [TS_FSM_ALNUM] = _U | _L | _D,
59 [TS_FSM_PRINT] = _P | _U | _L | _D | _SP,
60 [TS_FSM_GRAPH] = _P | _U | _L | _D,
61 [TS_FSM_ASCII] = _A,
62};
63
64static const u16 token_lookup_tbl[256] = {
65_W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C,
66_W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C,
67_W|_A|_C, _W|_A|_C|_S, _W|_A|_C|_S, _W|_A|_C|_S,
68_W|_A|_C|_S, _W|_A|_C|_S, _W|_A|_C, _W|_A|_C,
69_W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C,
70_W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C,
71_W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C,
72_W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C,
73_W|_A|_S|_SP, _W|_A|_P, _W|_A|_P, _W|_A|_P,
74_W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P,
75_W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P,
76_W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P,
77_W|_A|_D, _W|_A|_D, _W|_A|_D, _W|_A|_D,
78_W|_A|_D, _W|_A|_D, _W|_A|_D, _W|_A|_D,
79_W|_A|_D, _W|_A|_D, _W|_A|_P, _W|_A|_P,
80_W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P,
81_W|_A|_P, _W|_A|_U|_X, _W|_A|_U|_X, _W|_A|_U|_X,
82_W|_A|_U|_X, _W|_A|_U|_X, _W|_A|_U|_X, _W|_A|_U,
83_W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U,
84_W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U,
85_W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U,
86_W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U,
87_W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_P,
88_W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P,
89_W|_A|_P, _W|_A|_L|_X, _W|_A|_L|_X, _W|_A|_L|_X,
90_W|_A|_L|_X, _W|_A|_L|_X, _W|_A|_L|_X, _W|_A|_L,
91_W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L,
92_W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L,
93_W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L,
94_W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L,
95_W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_P,
96_W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_C,
97_W, _W, _W, _W,
98_W, _W, _W, _W,
99_W, _W, _W, _W,
100_W, _W, _W, _W,
101_W, _W, _W, _W,
102_W, _W, _W, _W,
103_W, _W, _W, _W,
104_W, _W, _W, _W,
105_W|_S|_SP, _W|_P, _W|_P, _W|_P,
106_W|_P, _W|_P, _W|_P, _W|_P,
107_W|_P, _W|_P, _W|_P, _W|_P,
108_W|_P, _W|_P, _W|_P, _W|_P,
109_W|_P, _W|_P, _W|_P, _W|_P,
110_W|_P, _W|_P, _W|_P, _W|_P,
111_W|_P, _W|_P, _W|_P, _W|_P,
112_W|_P, _W|_P, _W|_P, _W|_P,
113_W|_U, _W|_U, _W|_U, _W|_U,
114_W|_U, _W|_U, _W|_U, _W|_U,
115_W|_U, _W|_U, _W|_U, _W|_U,
116_W|_U, _W|_U, _W|_U, _W|_U,
117_W|_U, _W|_U, _W|_U, _W|_U,
118_W|_U, _W|_U, _W|_U, _W|_P,
119_W|_U, _W|_U, _W|_U, _W|_U,
120_W|_U, _W|_U, _W|_U, _W|_L,
121_W|_L, _W|_L, _W|_L, _W|_L,
122_W|_L, _W|_L, _W|_L, _W|_L,
123_W|_L, _W|_L, _W|_L, _W|_L,
124_W|_L, _W|_L, _W|_L, _W|_L,
125_W|_L, _W|_L, _W|_L, _W|_L,
126_W|_L, _W|_L, _W|_L, _W|_P,
127_W|_L, _W|_L, _W|_L, _W|_L,
128_W|_L, _W|_L, _W|_L, _W|_L};
129
130static inline int match_token(struct ts_fsm_token *t, u8 d)
131{
132 if (t->type)
133 return (token_lookup_tbl[d] & t->type) != 0;
134 else
135 return t->value == d;
136}
137
138static unsigned int fsm_find(struct ts_config *conf, struct ts_state *state)
139{
140 struct ts_fsm *fsm = ts_config_priv(conf);
141 struct ts_fsm_token *cur = NULL, *next;
142 unsigned int match_start, block_idx = 0, tok_idx;
143 unsigned block_len = 0, strict, consumed = state->offset;
144 const u8 *data;
145
146#define GET_NEXT_BLOCK() \
147({ consumed += block_idx; \
148 block_idx = 0; \
149 block_len = conf->get_next_block(consumed, &data, conf, state); })
150
151#define TOKEN_MISMATCH() \
152 do { \
153 if (strict) \
154 goto no_match; \
155 block_idx++; \
156 goto startover; \
157 } while(0)
158
159#define end_of_data() unlikely(block_idx >= block_len && !GET_NEXT_BLOCK())
160
161 if (end_of_data())
162 goto no_match;
163
164 strict = fsm->tokens[0].recur != TS_FSM_HEAD_IGNORE;
165
166startover:
167 match_start = consumed + block_idx;
168
169 for (tok_idx = 0; tok_idx < fsm->ntokens; tok_idx++) {
170 cur = &fsm->tokens[tok_idx];
171
172 if (likely(tok_idx < (fsm->ntokens - 1)))
173 next = &fsm->tokens[tok_idx + 1];
174 else
175 next = NULL;
176
177 switch (cur->recur) {
178 case TS_FSM_SINGLE:
179 if (end_of_data())
180 goto no_match;
181
182 if (!match_token(cur, data[block_idx]))
183 TOKEN_MISMATCH();
184 break;
185
186 case TS_FSM_PERHAPS:
187 if (end_of_data() ||
188 !match_token(cur, data[block_idx]))
189 continue;
190 break;
191
192 case TS_FSM_MULTI:
193 if (end_of_data())
194 goto no_match;
195
196 if (!match_token(cur, data[block_idx]))
197 TOKEN_MISMATCH();
198
199 block_idx++;
200
201
202 case TS_FSM_ANY:
203 if (next == NULL)
204 goto found_match;
205
206 if (end_of_data())
207 continue;
208
209 while (!match_token(next, data[block_idx])) {
210 if (!match_token(cur, data[block_idx]))
211 TOKEN_MISMATCH();
212 block_idx++;
213 if (end_of_data())
214 goto no_match;
215 }
216 continue;
217
218
219
220
221
222 case TS_FSM_HEAD_IGNORE:
223 if (end_of_data())
224 continue;
225
226 while (!match_token(next, data[block_idx])) {
227
228
229
230
231
232
233 if (!match_token(cur, data[block_idx]))
234 goto no_match;
235
236 block_idx++;
237 if (end_of_data())
238 goto no_match;
239 }
240
241 match_start = consumed + block_idx;
242 continue;
243 }
244
245 block_idx++;
246 }
247
248 if (end_of_data())
249 goto found_match;
250
251no_match:
252 return UINT_MAX;
253
254found_match:
255 state->offset = consumed + block_idx;
256 return match_start;
257}
258
259static struct ts_config *fsm_init(const void *pattern, unsigned int len,
260 gfp_t gfp_mask, int flags)
261{
262 int i, err = -EINVAL;
263 struct ts_config *conf;
264 struct ts_fsm *fsm;
265 struct ts_fsm_token *tokens = (struct ts_fsm_token *) pattern;
266 unsigned int ntokens = len / sizeof(*tokens);
267 size_t priv_size = sizeof(*fsm) + len;
268
269 if (len % sizeof(struct ts_fsm_token) || ntokens < 1)
270 goto errout;
271
272 if (flags & TS_IGNORECASE)
273 goto errout;
274
275 for (i = 0; i < ntokens; i++) {
276 struct ts_fsm_token *t = &tokens[i];
277
278 if (t->type > TS_FSM_TYPE_MAX || t->recur > TS_FSM_RECUR_MAX)
279 goto errout;
280
281 if (t->recur == TS_FSM_HEAD_IGNORE &&
282 (i != 0 || i == (ntokens - 1)))
283 goto errout;
284 }
285
286 conf = alloc_ts_config(priv_size, gfp_mask);
287 if (IS_ERR(conf))
288 return conf;
289
290 conf->flags = flags;
291 fsm = ts_config_priv(conf);
292 fsm->ntokens = ntokens;
293 memcpy(fsm->tokens, pattern, len);
294
295 for (i = 0; i < fsm->ntokens; i++) {
296 struct ts_fsm_token *t = &fsm->tokens[i];
297 t->type = token_map[t->type];
298 }
299
300 return conf;
301
302errout:
303 return ERR_PTR(err);
304}
305
306static void *fsm_get_pattern(struct ts_config *conf)
307{
308 struct ts_fsm *fsm = ts_config_priv(conf);
309 return fsm->tokens;
310}
311
312static unsigned int fsm_get_pattern_len(struct ts_config *conf)
313{
314 struct ts_fsm *fsm = ts_config_priv(conf);
315 return fsm->ntokens * sizeof(struct ts_fsm_token);
316}
317
318static struct ts_ops fsm_ops = {
319 .name = "fsm",
320 .find = fsm_find,
321 .init = fsm_init,
322 .get_pattern = fsm_get_pattern,
323 .get_pattern_len = fsm_get_pattern_len,
324 .owner = THIS_MODULE,
325 .list = LIST_HEAD_INIT(fsm_ops.list)
326};
327
328static int __init init_fsm(void)
329{
330 return textsearch_register(&fsm_ops);
331}
332
333static void __exit exit_fsm(void)
334{
335 textsearch_unregister(&fsm_ops);
336}
337
338MODULE_LICENSE("GPL");
339
340module_init(init_fsm);
341module_exit(exit_fsm);
342