1
2
3
4
5
6
7
8
9
10#include <linux/kernel.h>
11#include <linux/init.h>
12#include <linux/module.h>
13#include <linux/netlink.h>
14#include <linux/netfilter.h>
15#include <linux/netfilter/nf_tables.h>
16#include <net/netfilter/nf_tables_core.h>
17#include <uapi/linux/netfilter/nf_tables.h>
18#include <linux/bitmap.h>
19#include <linux/bitops.h>
20
21#include <linux/compiler.h>
22#include <asm/fpu/api.h>
23
24#include "nft_set_pipapo_avx2.h"
25#include "nft_set_pipapo.h"
26
27#define NFT_PIPAPO_LONGS_PER_M256 (XSAVE_YMM_SIZE / BITS_PER_LONG)
28
29
30
31
32
33
34
35
36
37
38
39#define NFT_PIPAPO_AVX2_LOAD(reg, loc) \
40 asm volatile("vmovntdqa %0, %%ymm" #reg : : "m" (loc))
41
42
43
44
45#define NFT_PIPAPO_AVX2_BUCKET_LOAD4(reg, lt, group, v, bsize) \
46 NFT_PIPAPO_AVX2_LOAD(reg, \
47 lt[((group) * NFT_PIPAPO_BUCKETS(4) + \
48 (v)) * (bsize)])
49#define NFT_PIPAPO_AVX2_BUCKET_LOAD8(reg, lt, group, v, bsize) \
50 NFT_PIPAPO_AVX2_LOAD(reg, \
51 lt[((group) * NFT_PIPAPO_BUCKETS(8) + \
52 (v)) * (bsize)])
53
54
55#define NFT_PIPAPO_AVX2_AND(dst, a, b) \
56 asm volatile("vpand %ymm" #a ", %ymm" #b ", %ymm" #dst)
57
58
59#define NFT_PIPAPO_AVX2_NOMATCH_GOTO(reg, label) \
60 asm_volatile_goto("vptest %%ymm" #reg ", %%ymm" #reg ";" \
61 "je %l[" #label "]" : : : : label)
62
63
64
65
66
67#define NFT_PIPAPO_AVX2_STORE(loc, reg) \
68 asm volatile("vmovdqa %%ymm" #reg ", %0" : "=m" (loc))
69
70
71#define NFT_PIPAPO_AVX2_ZERO(reg) \
72 asm volatile("vpxor %ymm" #reg ", %ymm" #reg ", %ymm" #reg)
73
74
75static DEFINE_PER_CPU(bool, nft_pipapo_avx2_scratch_index);
76
77
78
79
80
81
82
83static void nft_pipapo_avx2_prepare(void)
84{
85 NFT_PIPAPO_AVX2_ZERO(15);
86}
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102static void nft_pipapo_avx2_fill(unsigned long *data, int start, int len)
103{
104 int offset = start % BITS_PER_LONG;
105 unsigned long mask;
106
107 data += start / BITS_PER_LONG;
108
109 if (likely(len == 1)) {
110 *data |= BIT(offset);
111 return;
112 }
113
114 if (likely(len < BITS_PER_LONG || offset)) {
115 if (likely(len + offset <= BITS_PER_LONG)) {
116 *data |= GENMASK(len - 1 + offset, offset);
117 return;
118 }
119
120 *data |= ~0UL << offset;
121 len -= BITS_PER_LONG - offset;
122 data++;
123
124 if (len <= BITS_PER_LONG) {
125 mask = ~0UL >> (BITS_PER_LONG - len);
126 *data |= mask;
127 return;
128 }
129 }
130
131 memset(data, 0xff, len / BITS_PER_BYTE);
132 data += len / BITS_PER_LONG;
133
134 len %= BITS_PER_LONG;
135 if (len)
136 *data |= ~0UL >> (BITS_PER_LONG - len);
137}
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156static int nft_pipapo_avx2_refill(int offset, unsigned long *map,
157 unsigned long *dst,
158 union nft_pipapo_map_bucket *mt, bool last)
159{
160 int ret = -1;
161
162#define NFT_PIPAPO_AVX2_REFILL_ONE_WORD(x) \
163 do { \
164 while (map[(x)]) { \
165 int r = __builtin_ctzl(map[(x)]); \
166 int i = (offset + (x)) * BITS_PER_LONG + r; \
167 \
168 if (last) \
169 return i; \
170 \
171 nft_pipapo_avx2_fill(dst, mt[i].to, mt[i].n); \
172 \
173 if (ret == -1) \
174 ret = mt[i].to; \
175 \
176 map[(x)] &= ~(1UL << r); \
177 } \
178 } while (0)
179
180 NFT_PIPAPO_AVX2_REFILL_ONE_WORD(0);
181 NFT_PIPAPO_AVX2_REFILL_ONE_WORD(1);
182 NFT_PIPAPO_AVX2_REFILL_ONE_WORD(2);
183 NFT_PIPAPO_AVX2_REFILL_ONE_WORD(3);
184#undef NFT_PIPAPO_AVX2_REFILL_ONE_WORD
185
186 return ret;
187}
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218static int nft_pipapo_avx2_lookup_4b_2(unsigned long *map, unsigned long *fill,
219 struct nft_pipapo_field *f, int offset,
220 const u8 *pkt, bool first, bool last)
221{
222 int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
223 u8 pg[2] = { pkt[0] >> 4, pkt[0] & 0xf };
224 unsigned long *lt = f->lt, bsize = f->bsize;
225
226 lt += offset * NFT_PIPAPO_LONGS_PER_M256;
227 for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) {
228 int i_ul = i * NFT_PIPAPO_LONGS_PER_M256;
229
230 if (first) {
231 NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 0, pg[0], bsize);
232 NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt, 1, pg[1], bsize);
233 NFT_PIPAPO_AVX2_AND(4, 0, 1);
234 } else {
235 NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 0, pg[0], bsize);
236 NFT_PIPAPO_AVX2_LOAD(2, map[i_ul]);
237 NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt, 1, pg[1], bsize);
238 NFT_PIPAPO_AVX2_NOMATCH_GOTO(2, nothing);
239 NFT_PIPAPO_AVX2_AND(3, 0, 1);
240 NFT_PIPAPO_AVX2_AND(4, 2, 3);
241 }
242
243 NFT_PIPAPO_AVX2_NOMATCH_GOTO(4, nomatch);
244 NFT_PIPAPO_AVX2_STORE(map[i_ul], 4);
245
246 b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
247 if (last)
248 return b;
249
250 if (unlikely(ret == -1))
251 ret = b / XSAVE_YMM_SIZE;
252
253 continue;
254nomatch:
255 NFT_PIPAPO_AVX2_STORE(map[i_ul], 15);
256nothing:
257 ;
258 }
259
260 return ret;
261}
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280static int nft_pipapo_avx2_lookup_4b_4(unsigned long *map, unsigned long *fill,
281 struct nft_pipapo_field *f, int offset,
282 const u8 *pkt, bool first, bool last)
283{
284 int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
285 u8 pg[4] = { pkt[0] >> 4, pkt[0] & 0xf, pkt[1] >> 4, pkt[1] & 0xf };
286 unsigned long *lt = f->lt, bsize = f->bsize;
287
288 lt += offset * NFT_PIPAPO_LONGS_PER_M256;
289 for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) {
290 int i_ul = i * NFT_PIPAPO_LONGS_PER_M256;
291
292 if (first) {
293 NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 0, pg[0], bsize);
294 NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt, 1, pg[1], bsize);
295 NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt, 2, pg[2], bsize);
296 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 3, pg[3], bsize);
297 NFT_PIPAPO_AVX2_AND(4, 0, 1);
298 NFT_PIPAPO_AVX2_AND(5, 2, 3);
299 NFT_PIPAPO_AVX2_AND(7, 4, 5);
300 } else {
301 NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 0, pg[0], bsize);
302
303 NFT_PIPAPO_AVX2_LOAD(1, map[i_ul]);
304
305 NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt, 1, pg[1], bsize);
306 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 2, pg[2], bsize);
307 NFT_PIPAPO_AVX2_BUCKET_LOAD4(4, lt, 3, pg[3], bsize);
308 NFT_PIPAPO_AVX2_AND(5, 0, 1);
309
310 NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nothing);
311
312 NFT_PIPAPO_AVX2_AND(6, 2, 3);
313 NFT_PIPAPO_AVX2_AND(7, 4, 5);
314
315 NFT_PIPAPO_AVX2_AND(7, 6, 7);
316 }
317
318
319 NFT_PIPAPO_AVX2_NOMATCH_GOTO(7, nomatch);
320 NFT_PIPAPO_AVX2_STORE(map[i_ul], 7);
321
322 b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
323 if (last)
324 return b;
325
326 if (unlikely(ret == -1))
327 ret = b / XSAVE_YMM_SIZE;
328
329 continue;
330nomatch:
331 NFT_PIPAPO_AVX2_STORE(map[i_ul], 15);
332nothing:
333 ;
334 }
335
336 return ret;
337}
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356static int nft_pipapo_avx2_lookup_4b_8(unsigned long *map, unsigned long *fill,
357 struct nft_pipapo_field *f, int offset,
358 const u8 *pkt, bool first, bool last)
359{
360 u8 pg[8] = { pkt[0] >> 4, pkt[0] & 0xf, pkt[1] >> 4, pkt[1] & 0xf,
361 pkt[2] >> 4, pkt[2] & 0xf, pkt[3] >> 4, pkt[3] & 0xf,
362 };
363 int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
364 unsigned long *lt = f->lt, bsize = f->bsize;
365
366 lt += offset * NFT_PIPAPO_LONGS_PER_M256;
367 for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) {
368 int i_ul = i * NFT_PIPAPO_LONGS_PER_M256;
369
370 if (first) {
371 NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 0, pg[0], bsize);
372 NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt, 1, pg[1], bsize);
373 NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt, 2, pg[2], bsize);
374 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 3, pg[3], bsize);
375 NFT_PIPAPO_AVX2_BUCKET_LOAD4(4, lt, 4, pg[4], bsize);
376 NFT_PIPAPO_AVX2_AND(5, 0, 1);
377 NFT_PIPAPO_AVX2_BUCKET_LOAD4(6, lt, 5, pg[5], bsize);
378 NFT_PIPAPO_AVX2_BUCKET_LOAD4(7, lt, 6, pg[6], bsize);
379 NFT_PIPAPO_AVX2_AND(8, 2, 3);
380 NFT_PIPAPO_AVX2_AND(9, 4, 5);
381 NFT_PIPAPO_AVX2_BUCKET_LOAD4(10, lt, 7, pg[7], bsize);
382 NFT_PIPAPO_AVX2_AND(11, 6, 7);
383 NFT_PIPAPO_AVX2_AND(12, 8, 9);
384 NFT_PIPAPO_AVX2_AND(13, 10, 11);
385
386
387 NFT_PIPAPO_AVX2_AND(1, 12, 13);
388 } else {
389 NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 0, pg[0], bsize);
390 NFT_PIPAPO_AVX2_LOAD(1, map[i_ul]);
391 NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt, 1, pg[1], bsize);
392 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 2, pg[2], bsize);
393 NFT_PIPAPO_AVX2_BUCKET_LOAD4(4, lt, 3, pg[3], bsize);
394
395 NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nothing);
396
397 NFT_PIPAPO_AVX2_AND(5, 0, 1);
398 NFT_PIPAPO_AVX2_BUCKET_LOAD4(6, lt, 4, pg[4], bsize);
399 NFT_PIPAPO_AVX2_BUCKET_LOAD4(7, lt, 5, pg[5], bsize);
400 NFT_PIPAPO_AVX2_AND(8, 2, 3);
401 NFT_PIPAPO_AVX2_BUCKET_LOAD4(9, lt, 6, pg[6], bsize);
402 NFT_PIPAPO_AVX2_AND(10, 4, 5);
403 NFT_PIPAPO_AVX2_BUCKET_LOAD4(11, lt, 7, pg[7], bsize);
404 NFT_PIPAPO_AVX2_AND(12, 6, 7);
405 NFT_PIPAPO_AVX2_AND(13, 8, 9);
406 NFT_PIPAPO_AVX2_AND(14, 10, 11);
407
408
409 NFT_PIPAPO_AVX2_AND(1, 12, 13);
410 NFT_PIPAPO_AVX2_AND(1, 1, 14);
411 }
412
413 NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nomatch);
414 NFT_PIPAPO_AVX2_STORE(map[i_ul], 1);
415
416 b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
417 if (last)
418 return b;
419
420 if (unlikely(ret == -1))
421 ret = b / XSAVE_YMM_SIZE;
422
423 continue;
424
425nomatch:
426 NFT_PIPAPO_AVX2_STORE(map[i_ul], 15);
427nothing:
428 ;
429 }
430
431 return ret;
432}
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451static int nft_pipapo_avx2_lookup_4b_12(unsigned long *map, unsigned long *fill,
452 struct nft_pipapo_field *f, int offset,
453 const u8 *pkt, bool first, bool last)
454{
455 u8 pg[12] = { pkt[0] >> 4, pkt[0] & 0xf, pkt[1] >> 4, pkt[1] & 0xf,
456 pkt[2] >> 4, pkt[2] & 0xf, pkt[3] >> 4, pkt[3] & 0xf,
457 pkt[4] >> 4, pkt[4] & 0xf, pkt[5] >> 4, pkt[5] & 0xf,
458 };
459 int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
460 unsigned long *lt = f->lt, bsize = f->bsize;
461
462 lt += offset * NFT_PIPAPO_LONGS_PER_M256;
463 for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) {
464 int i_ul = i * NFT_PIPAPO_LONGS_PER_M256;
465
466 if (!first)
467 NFT_PIPAPO_AVX2_LOAD(0, map[i_ul]);
468
469 NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt, 0, pg[0], bsize);
470 NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt, 1, pg[1], bsize);
471 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 2, pg[2], bsize);
472
473 if (!first) {
474 NFT_PIPAPO_AVX2_NOMATCH_GOTO(0, nothing);
475 NFT_PIPAPO_AVX2_AND(1, 1, 0);
476 }
477
478 NFT_PIPAPO_AVX2_BUCKET_LOAD4(4, lt, 3, pg[3], bsize);
479 NFT_PIPAPO_AVX2_BUCKET_LOAD4(5, lt, 4, pg[4], bsize);
480 NFT_PIPAPO_AVX2_AND(6, 2, 3);
481 NFT_PIPAPO_AVX2_BUCKET_LOAD4(7, lt, 5, pg[5], bsize);
482 NFT_PIPAPO_AVX2_BUCKET_LOAD4(8, lt, 6, pg[6], bsize);
483 NFT_PIPAPO_AVX2_AND(9, 1, 4);
484 NFT_PIPAPO_AVX2_BUCKET_LOAD4(10, lt, 7, pg[7], bsize);
485 NFT_PIPAPO_AVX2_AND(11, 5, 6);
486 NFT_PIPAPO_AVX2_BUCKET_LOAD4(12, lt, 8, pg[8], bsize);
487 NFT_PIPAPO_AVX2_AND(13, 7, 8);
488 NFT_PIPAPO_AVX2_BUCKET_LOAD4(14, lt, 9, pg[9], bsize);
489
490 NFT_PIPAPO_AVX2_AND(0, 9, 10);
491 NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt, 10, pg[10], bsize);
492 NFT_PIPAPO_AVX2_AND(2, 11, 12);
493 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 11, pg[11], bsize);
494 NFT_PIPAPO_AVX2_AND(4, 13, 14);
495 NFT_PIPAPO_AVX2_AND(5, 0, 1);
496
497 NFT_PIPAPO_AVX2_AND(6, 2, 3);
498
499
500 NFT_PIPAPO_AVX2_AND(7, 4, 5);
501 NFT_PIPAPO_AVX2_AND(8, 6, 7);
502
503 NFT_PIPAPO_AVX2_NOMATCH_GOTO(8, nomatch);
504 NFT_PIPAPO_AVX2_STORE(map[i_ul], 8);
505
506 b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
507 if (last)
508 return b;
509
510 if (unlikely(ret == -1))
511 ret = b / XSAVE_YMM_SIZE;
512
513 continue;
514nomatch:
515 NFT_PIPAPO_AVX2_STORE(map[i_ul], 15);
516nothing:
517 ;
518 }
519
520 return ret;
521}
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540static int nft_pipapo_avx2_lookup_4b_32(unsigned long *map, unsigned long *fill,
541 struct nft_pipapo_field *f, int offset,
542 const u8 *pkt, bool first, bool last)
543{
544 u8 pg[32] = { pkt[0] >> 4, pkt[0] & 0xf, pkt[1] >> 4, pkt[1] & 0xf,
545 pkt[2] >> 4, pkt[2] & 0xf, pkt[3] >> 4, pkt[3] & 0xf,
546 pkt[4] >> 4, pkt[4] & 0xf, pkt[5] >> 4, pkt[5] & 0xf,
547 pkt[6] >> 4, pkt[6] & 0xf, pkt[7] >> 4, pkt[7] & 0xf,
548 pkt[8] >> 4, pkt[8] & 0xf, pkt[9] >> 4, pkt[9] & 0xf,
549 pkt[10] >> 4, pkt[10] & 0xf, pkt[11] >> 4, pkt[11] & 0xf,
550 pkt[12] >> 4, pkt[12] & 0xf, pkt[13] >> 4, pkt[13] & 0xf,
551 pkt[14] >> 4, pkt[14] & 0xf, pkt[15] >> 4, pkt[15] & 0xf,
552 };
553 int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
554 unsigned long *lt = f->lt, bsize = f->bsize;
555
556 lt += offset * NFT_PIPAPO_LONGS_PER_M256;
557 for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) {
558 int i_ul = i * NFT_PIPAPO_LONGS_PER_M256;
559
560 if (!first)
561 NFT_PIPAPO_AVX2_LOAD(0, map[i_ul]);
562
563 NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt, 0, pg[0], bsize);
564 NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt, 1, pg[1], bsize);
565 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 2, pg[2], bsize);
566 NFT_PIPAPO_AVX2_BUCKET_LOAD4(4, lt, 3, pg[3], bsize);
567 if (!first) {
568 NFT_PIPAPO_AVX2_NOMATCH_GOTO(0, nothing);
569 NFT_PIPAPO_AVX2_AND(1, 1, 0);
570 }
571
572 NFT_PIPAPO_AVX2_AND(5, 2, 3);
573 NFT_PIPAPO_AVX2_BUCKET_LOAD4(6, lt, 4, pg[4], bsize);
574 NFT_PIPAPO_AVX2_BUCKET_LOAD4(7, lt, 5, pg[5], bsize);
575 NFT_PIPAPO_AVX2_AND(8, 1, 4);
576 NFT_PIPAPO_AVX2_BUCKET_LOAD4(9, lt, 6, pg[6], bsize);
577 NFT_PIPAPO_AVX2_AND(10, 5, 6);
578 NFT_PIPAPO_AVX2_BUCKET_LOAD4(11, lt, 7, pg[7], bsize);
579 NFT_PIPAPO_AVX2_AND(12, 7, 8);
580 NFT_PIPAPO_AVX2_BUCKET_LOAD4(13, lt, 8, pg[8], bsize);
581 NFT_PIPAPO_AVX2_AND(14, 9, 10);
582
583 NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 9, pg[9], bsize);
584 NFT_PIPAPO_AVX2_AND(1, 11, 12);
585 NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt, 10, pg[10], bsize);
586 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 11, pg[11], bsize);
587 NFT_PIPAPO_AVX2_AND(4, 13, 14);
588 NFT_PIPAPO_AVX2_BUCKET_LOAD4(5, lt, 12, pg[12], bsize);
589 NFT_PIPAPO_AVX2_BUCKET_LOAD4(6, lt, 13, pg[13], bsize);
590 NFT_PIPAPO_AVX2_AND(7, 0, 1);
591 NFT_PIPAPO_AVX2_BUCKET_LOAD4(8, lt, 14, pg[14], bsize);
592 NFT_PIPAPO_AVX2_AND(9, 2, 3);
593 NFT_PIPAPO_AVX2_BUCKET_LOAD4(10, lt, 15, pg[15], bsize);
594 NFT_PIPAPO_AVX2_AND(11, 4, 5);
595 NFT_PIPAPO_AVX2_BUCKET_LOAD4(12, lt, 16, pg[16], bsize);
596 NFT_PIPAPO_AVX2_AND(13, 6, 7);
597 NFT_PIPAPO_AVX2_BUCKET_LOAD4(14, lt, 17, pg[17], bsize);
598
599 NFT_PIPAPO_AVX2_AND(0, 8, 9);
600 NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt, 18, pg[18], bsize);
601 NFT_PIPAPO_AVX2_AND(2, 10, 11);
602 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 19, pg[19], bsize);
603 NFT_PIPAPO_AVX2_AND(4, 12, 13);
604 NFT_PIPAPO_AVX2_BUCKET_LOAD4(5, lt, 20, pg[20], bsize);
605 NFT_PIPAPO_AVX2_AND(6, 14, 0);
606 NFT_PIPAPO_AVX2_AND(7, 1, 2);
607 NFT_PIPAPO_AVX2_BUCKET_LOAD4(8, lt, 21, pg[21], bsize);
608 NFT_PIPAPO_AVX2_AND(9, 3, 4);
609 NFT_PIPAPO_AVX2_BUCKET_LOAD4(10, lt, 22, pg[22], bsize);
610 NFT_PIPAPO_AVX2_AND(11, 5, 6);
611 NFT_PIPAPO_AVX2_BUCKET_LOAD4(12, lt, 23, pg[23], bsize);
612 NFT_PIPAPO_AVX2_AND(13, 7, 8);
613
614 NFT_PIPAPO_AVX2_BUCKET_LOAD4(14, lt, 24, pg[24], bsize);
615 NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 25, pg[25], bsize);
616 NFT_PIPAPO_AVX2_AND(1, 9, 10);
617 NFT_PIPAPO_AVX2_AND(2, 11, 12);
618 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 26, pg[26], bsize);
619 NFT_PIPAPO_AVX2_AND(4, 13, 14);
620 NFT_PIPAPO_AVX2_BUCKET_LOAD4(5, lt, 27, pg[27], bsize);
621 NFT_PIPAPO_AVX2_AND(6, 0, 1);
622 NFT_PIPAPO_AVX2_BUCKET_LOAD4(7, lt, 28, pg[28], bsize);
623 NFT_PIPAPO_AVX2_BUCKET_LOAD4(8, lt, 29, pg[29], bsize);
624 NFT_PIPAPO_AVX2_AND(9, 2, 3);
625 NFT_PIPAPO_AVX2_BUCKET_LOAD4(10, lt, 30, pg[30], bsize);
626 NFT_PIPAPO_AVX2_AND(11, 4, 5);
627 NFT_PIPAPO_AVX2_BUCKET_LOAD4(12, lt, 31, pg[31], bsize);
628
629 NFT_PIPAPO_AVX2_AND(0, 6, 7);
630 NFT_PIPAPO_AVX2_AND(1, 8, 9);
631 NFT_PIPAPO_AVX2_AND(2, 10, 11);
632 NFT_PIPAPO_AVX2_AND(3, 12, 0);
633
634
635 NFT_PIPAPO_AVX2_AND(4, 1, 2);
636 NFT_PIPAPO_AVX2_AND(5, 3, 4);
637
638 NFT_PIPAPO_AVX2_NOMATCH_GOTO(5, nomatch);
639 NFT_PIPAPO_AVX2_STORE(map[i_ul], 5);
640
641 b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
642 if (last)
643 return b;
644
645 if (unlikely(ret == -1))
646 ret = b / XSAVE_YMM_SIZE;
647
648 continue;
649nomatch:
650 NFT_PIPAPO_AVX2_STORE(map[i_ul], 15);
651nothing:
652 ;
653 }
654
655 return ret;
656}
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675static int nft_pipapo_avx2_lookup_8b_1(unsigned long *map, unsigned long *fill,
676 struct nft_pipapo_field *f, int offset,
677 const u8 *pkt, bool first, bool last)
678{
679 int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
680 unsigned long *lt = f->lt, bsize = f->bsize;
681
682 lt += offset * NFT_PIPAPO_LONGS_PER_M256;
683 for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) {
684 int i_ul = i * NFT_PIPAPO_LONGS_PER_M256;
685
686 if (first) {
687 NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt, 0, pkt[0], bsize);
688 } else {
689 NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt, 0, pkt[0], bsize);
690 NFT_PIPAPO_AVX2_LOAD(1, map[i_ul]);
691 NFT_PIPAPO_AVX2_AND(2, 0, 1);
692 NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nothing);
693 }
694
695 NFT_PIPAPO_AVX2_NOMATCH_GOTO(2, nomatch);
696 NFT_PIPAPO_AVX2_STORE(map[i_ul], 2);
697
698 b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
699 if (last)
700 return b;
701
702 if (unlikely(ret == -1))
703 ret = b / XSAVE_YMM_SIZE;
704
705 continue;
706nomatch:
707 NFT_PIPAPO_AVX2_STORE(map[i_ul], 15);
708nothing:
709 ;
710 }
711
712 return ret;
713}
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732static int nft_pipapo_avx2_lookup_8b_2(unsigned long *map, unsigned long *fill,
733 struct nft_pipapo_field *f, int offset,
734 const u8 *pkt, bool first, bool last)
735{
736 int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
737 unsigned long *lt = f->lt, bsize = f->bsize;
738
739 lt += offset * NFT_PIPAPO_LONGS_PER_M256;
740 for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) {
741 int i_ul = i * NFT_PIPAPO_LONGS_PER_M256;
742
743 if (first) {
744 NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt, 0, pkt[0], bsize);
745 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 1, pkt[1], bsize);
746 NFT_PIPAPO_AVX2_AND(4, 0, 1);
747 } else {
748 NFT_PIPAPO_AVX2_LOAD(0, map[i_ul]);
749 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 0, pkt[0], bsize);
750 NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt, 1, pkt[1], bsize);
751
752
753 NFT_PIPAPO_AVX2_AND(3, 0, 1);
754 NFT_PIPAPO_AVX2_NOMATCH_GOTO(0, nothing);
755 NFT_PIPAPO_AVX2_AND(4, 3, 2);
756 }
757
758
759 NFT_PIPAPO_AVX2_NOMATCH_GOTO(4, nomatch);
760 NFT_PIPAPO_AVX2_STORE(map[i_ul], 4);
761
762 b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
763 if (last)
764 return b;
765
766 if (unlikely(ret == -1))
767 ret = b / XSAVE_YMM_SIZE;
768
769 continue;
770nomatch:
771 NFT_PIPAPO_AVX2_STORE(map[i_ul], 15);
772nothing:
773 ;
774 }
775
776 return ret;
777}
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796static int nft_pipapo_avx2_lookup_8b_4(unsigned long *map, unsigned long *fill,
797 struct nft_pipapo_field *f, int offset,
798 const u8 *pkt, bool first, bool last)
799{
800 int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
801 unsigned long *lt = f->lt, bsize = f->bsize;
802
803 lt += offset * NFT_PIPAPO_LONGS_PER_M256;
804 for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) {
805 int i_ul = i * NFT_PIPAPO_LONGS_PER_M256;
806
807 if (first) {
808 NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt, 0, pkt[0], bsize);
809 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 1, pkt[1], bsize);
810 NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt, 2, pkt[2], bsize);
811 NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt, 3, pkt[3], bsize);
812
813
814 NFT_PIPAPO_AVX2_AND(4, 0, 1);
815 NFT_PIPAPO_AVX2_AND(5, 2, 3);
816 NFT_PIPAPO_AVX2_AND(0, 4, 5);
817 } else {
818 NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt, 0, pkt[0], bsize);
819 NFT_PIPAPO_AVX2_LOAD(1, map[i_ul]);
820 NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt, 1, pkt[1], bsize);
821 NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt, 2, pkt[2], bsize);
822 NFT_PIPAPO_AVX2_BUCKET_LOAD8(4, lt, 3, pkt[3], bsize);
823
824 NFT_PIPAPO_AVX2_AND(5, 0, 1);
825 NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nothing);
826 NFT_PIPAPO_AVX2_AND(6, 2, 3);
827
828
829 NFT_PIPAPO_AVX2_AND(7, 4, 5);
830 NFT_PIPAPO_AVX2_AND(0, 6, 7);
831 }
832
833 NFT_PIPAPO_AVX2_NOMATCH_GOTO(0, nomatch);
834 NFT_PIPAPO_AVX2_STORE(map[i_ul], 0);
835
836 b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
837 if (last)
838 return b;
839
840 if (unlikely(ret == -1))
841 ret = b / XSAVE_YMM_SIZE;
842
843 continue;
844
845nomatch:
846 NFT_PIPAPO_AVX2_STORE(map[i_ul], 15);
847nothing:
848 ;
849 }
850
851 return ret;
852}
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871static int nft_pipapo_avx2_lookup_8b_6(unsigned long *map, unsigned long *fill,
872 struct nft_pipapo_field *f, int offset,
873 const u8 *pkt, bool first, bool last)
874{
875 int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
876 unsigned long *lt = f->lt, bsize = f->bsize;
877
878 lt += offset * NFT_PIPAPO_LONGS_PER_M256;
879 for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) {
880 int i_ul = i * NFT_PIPAPO_LONGS_PER_M256;
881
882 if (first) {
883 NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt, 0, pkt[0], bsize);
884 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 1, pkt[1], bsize);
885 NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt, 2, pkt[2], bsize);
886 NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt, 3, pkt[3], bsize);
887 NFT_PIPAPO_AVX2_BUCKET_LOAD8(4, lt, 4, pkt[4], bsize);
888
889 NFT_PIPAPO_AVX2_AND(5, 0, 1);
890 NFT_PIPAPO_AVX2_BUCKET_LOAD8(6, lt, 6, pkt[5], bsize);
891 NFT_PIPAPO_AVX2_AND(7, 2, 3);
892
893
894 NFT_PIPAPO_AVX2_AND(0, 4, 5);
895 NFT_PIPAPO_AVX2_AND(1, 6, 7);
896 NFT_PIPAPO_AVX2_AND(4, 0, 1);
897 } else {
898 NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt, 0, pkt[0], bsize);
899 NFT_PIPAPO_AVX2_LOAD(1, map[i_ul]);
900 NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt, 1, pkt[1], bsize);
901 NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt, 2, pkt[2], bsize);
902 NFT_PIPAPO_AVX2_BUCKET_LOAD8(4, lt, 3, pkt[3], bsize);
903
904 NFT_PIPAPO_AVX2_AND(5, 0, 1);
905 NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nothing);
906
907 NFT_PIPAPO_AVX2_AND(6, 2, 3);
908 NFT_PIPAPO_AVX2_BUCKET_LOAD8(7, lt, 4, pkt[4], bsize);
909 NFT_PIPAPO_AVX2_AND(0, 4, 5);
910 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 5, pkt[5], bsize);
911 NFT_PIPAPO_AVX2_AND(2, 6, 7);
912
913
914 NFT_PIPAPO_AVX2_AND(3, 0, 1);
915 NFT_PIPAPO_AVX2_AND(4, 2, 3);
916 }
917
918 NFT_PIPAPO_AVX2_NOMATCH_GOTO(4, nomatch);
919 NFT_PIPAPO_AVX2_STORE(map[i_ul], 4);
920
921 b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
922 if (last)
923 return b;
924
925 if (unlikely(ret == -1))
926 ret = b / XSAVE_YMM_SIZE;
927
928 continue;
929
930nomatch:
931 NFT_PIPAPO_AVX2_STORE(map[i_ul], 15);
932nothing:
933 ;
934 }
935
936 return ret;
937}
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956static int nft_pipapo_avx2_lookup_8b_16(unsigned long *map, unsigned long *fill,
957 struct nft_pipapo_field *f, int offset,
958 const u8 *pkt, bool first, bool last)
959{
960 int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
961 unsigned long *lt = f->lt, bsize = f->bsize;
962
963 lt += offset * NFT_PIPAPO_LONGS_PER_M256;
964 for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) {
965 int i_ul = i * NFT_PIPAPO_LONGS_PER_M256;
966
967 if (!first)
968 NFT_PIPAPO_AVX2_LOAD(0, map[i_ul]);
969
970 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 0, pkt[0], bsize);
971 NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt, 1, pkt[1], bsize);
972 NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt, 2, pkt[2], bsize);
973 if (!first) {
974 NFT_PIPAPO_AVX2_NOMATCH_GOTO(0, nothing);
975 NFT_PIPAPO_AVX2_AND(1, 1, 0);
976 }
977 NFT_PIPAPO_AVX2_BUCKET_LOAD8(4, lt, 3, pkt[3], bsize);
978
979 NFT_PIPAPO_AVX2_BUCKET_LOAD8(5, lt, 4, pkt[4], bsize);
980 NFT_PIPAPO_AVX2_AND(6, 1, 2);
981 NFT_PIPAPO_AVX2_BUCKET_LOAD8(7, lt, 5, pkt[5], bsize);
982 NFT_PIPAPO_AVX2_AND(0, 3, 4);
983 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 6, pkt[6], bsize);
984
985 NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt, 7, pkt[7], bsize);
986 NFT_PIPAPO_AVX2_AND(3, 5, 6);
987 NFT_PIPAPO_AVX2_AND(4, 0, 1);
988 NFT_PIPAPO_AVX2_BUCKET_LOAD8(5, lt, 8, pkt[8], bsize);
989
990 NFT_PIPAPO_AVX2_AND(6, 2, 3);
991 NFT_PIPAPO_AVX2_BUCKET_LOAD8(7, lt, 9, pkt[9], bsize);
992 NFT_PIPAPO_AVX2_AND(0, 4, 5);
993 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 10, pkt[10], bsize);
994 NFT_PIPAPO_AVX2_AND(2, 6, 7);
995 NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt, 11, pkt[11], bsize);
996 NFT_PIPAPO_AVX2_AND(4, 0, 1);
997 NFT_PIPAPO_AVX2_BUCKET_LOAD8(5, lt, 12, pkt[12], bsize);
998 NFT_PIPAPO_AVX2_AND(6, 2, 3);
999 NFT_PIPAPO_AVX2_BUCKET_LOAD8(7, lt, 13, pkt[13], bsize);
1000 NFT_PIPAPO_AVX2_AND(0, 4, 5);
1001 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 14, pkt[14], bsize);
1002 NFT_PIPAPO_AVX2_AND(2, 6, 7);
1003 NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt, 15, pkt[15], bsize);
1004 NFT_PIPAPO_AVX2_AND(4, 0, 1);
1005
1006
1007 NFT_PIPAPO_AVX2_AND(5, 2, 3);
1008 NFT_PIPAPO_AVX2_AND(6, 4, 5);
1009
1010 NFT_PIPAPO_AVX2_NOMATCH_GOTO(6, nomatch);
1011 NFT_PIPAPO_AVX2_STORE(map[i_ul], 6);
1012
1013 b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
1014 if (last)
1015 return b;
1016
1017 if (unlikely(ret == -1))
1018 ret = b / XSAVE_YMM_SIZE;
1019
1020 continue;
1021
1022nomatch:
1023 NFT_PIPAPO_AVX2_STORE(map[i_ul], 15);
1024nothing:
1025 ;
1026 }
1027
1028 return ret;
1029}
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048static int nft_pipapo_avx2_lookup_slow(unsigned long *map, unsigned long *fill,
1049 struct nft_pipapo_field *f, int offset,
1050 const u8 *pkt, bool first, bool last)
1051{
1052 unsigned long *lt = f->lt, bsize = f->bsize;
1053 int i, ret = -1, b;
1054
1055 lt += offset * NFT_PIPAPO_LONGS_PER_M256;
1056
1057 if (first)
1058 memset(map, 0xff, bsize * sizeof(*map));
1059
1060 for (i = offset; i < bsize; i++) {
1061 if (f->bb == 8)
1062 pipapo_and_field_buckets_8bit(f, map, pkt);
1063 else
1064 pipapo_and_field_buckets_4bit(f, map, pkt);
1065 NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4;
1066
1067 b = pipapo_refill(map, bsize, f->rules, fill, f->mt, last);
1068
1069 if (last)
1070 return b;
1071
1072 if (ret == -1)
1073 ret = b / XSAVE_YMM_SIZE;
1074 }
1075
1076 return ret;
1077}
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087bool nft_pipapo_avx2_estimate(const struct nft_set_desc *desc, u32 features,
1088 struct nft_set_estimate *est)
1089{
1090 if (!(features & NFT_SET_INTERVAL) ||
1091 desc->field_count < NFT_PIPAPO_MIN_FIELDS)
1092 return false;
1093
1094 if (!boot_cpu_has(X86_FEATURE_AVX2) || !boot_cpu_has(X86_FEATURE_AVX))
1095 return false;
1096
1097 est->size = pipapo_estimate_size(desc);
1098 if (!est->size)
1099 return false;
1100
1101 est->lookup = NFT_SET_CLASS_O_LOG_N;
1102
1103 est->space = NFT_SET_CLASS_O_N;
1104
1105 return true;
1106}
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
1123 const u32 *key, const struct nft_set_ext **ext)
1124{
1125 struct nft_pipapo *priv = nft_set_priv(set);
1126 unsigned long *res, *fill, *scratch;
1127 u8 genmask = nft_genmask_cur(net);
1128 const u8 *rp = (const u8 *)key;
1129 struct nft_pipapo_match *m;
1130 struct nft_pipapo_field *f;
1131 bool map_index;
1132 int i, ret = 0;
1133
1134 m = rcu_dereference(priv->match);
1135
1136
1137 kernel_fpu_begin();
1138
1139 scratch = *raw_cpu_ptr(m->scratch_aligned);
1140 if (unlikely(!scratch)) {
1141 kernel_fpu_end();
1142 return false;
1143 }
1144 map_index = raw_cpu_read(nft_pipapo_avx2_scratch_index);
1145
1146 res = scratch + (map_index ? m->bsize_max : 0);
1147 fill = scratch + (map_index ? 0 : m->bsize_max);
1148
1149
1150
1151 nft_pipapo_avx2_prepare();
1152
1153next_match:
1154 nft_pipapo_for_each_field(f, i, m) {
1155 bool last = i == m->field_count - 1, first = !i;
1156
1157#define NFT_SET_PIPAPO_AVX2_LOOKUP(b, n) \
1158 (ret = nft_pipapo_avx2_lookup_##b##b_##n(res, fill, f, \
1159 ret, rp, \
1160 first, last))
1161
1162 if (likely(f->bb == 8)) {
1163 if (f->groups == 1) {
1164 NFT_SET_PIPAPO_AVX2_LOOKUP(8, 1);
1165 } else if (f->groups == 2) {
1166 NFT_SET_PIPAPO_AVX2_LOOKUP(8, 2);
1167 } else if (f->groups == 4) {
1168 NFT_SET_PIPAPO_AVX2_LOOKUP(8, 4);
1169 } else if (f->groups == 6) {
1170 NFT_SET_PIPAPO_AVX2_LOOKUP(8, 6);
1171 } else if (f->groups == 16) {
1172 NFT_SET_PIPAPO_AVX2_LOOKUP(8, 16);
1173 } else {
1174 ret = nft_pipapo_avx2_lookup_slow(res, fill, f,
1175 ret, rp,
1176 first, last);
1177 }
1178 } else {
1179 if (f->groups == 2) {
1180 NFT_SET_PIPAPO_AVX2_LOOKUP(4, 2);
1181 } else if (f->groups == 4) {
1182 NFT_SET_PIPAPO_AVX2_LOOKUP(4, 4);
1183 } else if (f->groups == 8) {
1184 NFT_SET_PIPAPO_AVX2_LOOKUP(4, 8);
1185 } else if (f->groups == 12) {
1186 NFT_SET_PIPAPO_AVX2_LOOKUP(4, 12);
1187 } else if (f->groups == 32) {
1188 NFT_SET_PIPAPO_AVX2_LOOKUP(4, 32);
1189 } else {
1190 ret = nft_pipapo_avx2_lookup_slow(res, fill, f,
1191 ret, rp,
1192 first, last);
1193 }
1194 }
1195 NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4;
1196
1197#undef NFT_SET_PIPAPO_AVX2_LOOKUP
1198
1199 if (ret < 0)
1200 goto out;
1201
1202 if (last) {
1203 *ext = &f->mt[ret].e->ext;
1204 if (unlikely(nft_set_elem_expired(*ext) ||
1205 !nft_set_elem_active(*ext, genmask))) {
1206 ret = 0;
1207 goto next_match;
1208 }
1209
1210 goto out;
1211 }
1212
1213 swap(res, fill);
1214 rp += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
1215 }
1216
1217out:
1218 if (i % 2)
1219 raw_cpu_write(nft_pipapo_avx2_scratch_index, !map_index);
1220 kernel_fpu_end();
1221
1222 return ret >= 0;
1223}
1224