1
2
3
4
5
6
7
8
9
10#include <linux/kernel.h>
11#include <linux/init.h>
12#include <linux/module.h>
13#include <linux/netlink.h>
14#include <linux/netfilter.h>
15#include <linux/netfilter/nf_tables.h>
16#include <net/netfilter/nf_tables_core.h>
17#include <uapi/linux/netfilter/nf_tables.h>
18#include <linux/bitmap.h>
19#include <linux/bitops.h>
20
21#include <linux/compiler.h>
22#include <asm/fpu/api.h>
23
24#include "nft_set_pipapo_avx2.h"
25#include "nft_set_pipapo.h"
26
27#define NFT_PIPAPO_LONGS_PER_M256 (XSAVE_YMM_SIZE / BITS_PER_LONG)
28
29
30
31
32
33
34
35
36
37
38
39#define NFT_PIPAPO_AVX2_LOAD(reg, loc) \
40 asm volatile("vmovntdqa %0, %%ymm" #reg : : "m" (loc))
41
42
43
44
45#define NFT_PIPAPO_AVX2_BUCKET_LOAD4(reg, lt, group, v, bsize) \
46 NFT_PIPAPO_AVX2_LOAD(reg, \
47 lt[((group) * NFT_PIPAPO_BUCKETS(4) + \
48 (v)) * (bsize)])
49#define NFT_PIPAPO_AVX2_BUCKET_LOAD8(reg, lt, group, v, bsize) \
50 NFT_PIPAPO_AVX2_LOAD(reg, \
51 lt[((group) * NFT_PIPAPO_BUCKETS(8) + \
52 (v)) * (bsize)])
53
54
55#define NFT_PIPAPO_AVX2_AND(dst, a, b) \
56 asm volatile("vpand %ymm" #a ", %ymm" #b ", %ymm" #dst)
57
58
59#define NFT_PIPAPO_AVX2_NOMATCH_GOTO(reg, label) \
60 asm_volatile_goto("vptest %%ymm" #reg ", %%ymm" #reg ";" \
61 "je %l[" #label "]" : : : : label)
62
63
64
65
66
67#define NFT_PIPAPO_AVX2_STORE(loc, reg) \
68 asm volatile("vmovdqa %%ymm" #reg ", %0" : "=m" (loc))
69
70
71#define NFT_PIPAPO_AVX2_ZERO(reg) \
72 asm volatile("vpxor %ymm" #reg ", %ymm" #reg ", %ymm" #reg)
73
74
75static DEFINE_PER_CPU(bool, nft_pipapo_avx2_scratch_index);
76
77
78
79
80
81
82
83static void nft_pipapo_avx2_prepare(void)
84{
85 NFT_PIPAPO_AVX2_ZERO(15);
86}
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102static void nft_pipapo_avx2_fill(unsigned long *data, int start, int len)
103{
104 int offset = start % BITS_PER_LONG;
105 unsigned long mask;
106
107 data += start / BITS_PER_LONG;
108
109 if (likely(len == 1)) {
110 *data |= BIT(offset);
111 return;
112 }
113
114 if (likely(len < BITS_PER_LONG || offset)) {
115 if (likely(len + offset <= BITS_PER_LONG)) {
116 *data |= GENMASK(len - 1 + offset, offset);
117 return;
118 }
119
120 *data |= ~0UL << offset;
121 len -= BITS_PER_LONG - offset;
122 data++;
123
124 if (len <= BITS_PER_LONG) {
125 mask = ~0UL >> (BITS_PER_LONG - len);
126 *data |= mask;
127 return;
128 }
129 }
130
131 memset(data, 0xff, len / BITS_PER_BYTE);
132 data += len / BITS_PER_LONG;
133
134 len %= BITS_PER_LONG;
135 if (len)
136 *data |= ~0UL >> (BITS_PER_LONG - len);
137}
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155static int nft_pipapo_avx2_refill(int offset, unsigned long *map,
156 unsigned long *dst,
157 union nft_pipapo_map_bucket *mt, bool last)
158{
159 int ret = -1;
160
161#define NFT_PIPAPO_AVX2_REFILL_ONE_WORD(x) \
162 do { \
163 while (map[(x)]) { \
164 int r = __builtin_ctzl(map[(x)]); \
165 int i = (offset + (x)) * BITS_PER_LONG + r; \
166 \
167 if (last) \
168 return i; \
169 \
170 nft_pipapo_avx2_fill(dst, mt[i].to, mt[i].n); \
171 \
172 if (ret == -1) \
173 ret = mt[i].to; \
174 \
175 map[(x)] &= ~(1UL << r); \
176 } \
177 } while (0)
178
179 NFT_PIPAPO_AVX2_REFILL_ONE_WORD(0);
180 NFT_PIPAPO_AVX2_REFILL_ONE_WORD(1);
181 NFT_PIPAPO_AVX2_REFILL_ONE_WORD(2);
182 NFT_PIPAPO_AVX2_REFILL_ONE_WORD(3);
183#undef NFT_PIPAPO_AVX2_REFILL_ONE_WORD
184
185 return ret;
186}
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217static int nft_pipapo_avx2_lookup_4b_2(unsigned long *map, unsigned long *fill,
218 struct nft_pipapo_field *f, int offset,
219 const u8 *pkt, bool first, bool last)
220{
221 int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
222 u8 pg[2] = { pkt[0] >> 4, pkt[0] & 0xf };
223 unsigned long *lt = f->lt, bsize = f->bsize;
224
225 lt += offset * NFT_PIPAPO_LONGS_PER_M256;
226 for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) {
227 int i_ul = i * NFT_PIPAPO_LONGS_PER_M256;
228
229 if (first) {
230 NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 0, pg[0], bsize);
231 NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt, 1, pg[1], bsize);
232 NFT_PIPAPO_AVX2_AND(4, 0, 1);
233 } else {
234 NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 0, pg[0], bsize);
235 NFT_PIPAPO_AVX2_LOAD(2, map[i_ul]);
236 NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt, 1, pg[1], bsize);
237 NFT_PIPAPO_AVX2_NOMATCH_GOTO(2, nothing);
238 NFT_PIPAPO_AVX2_AND(3, 0, 1);
239 NFT_PIPAPO_AVX2_AND(4, 2, 3);
240 }
241
242 NFT_PIPAPO_AVX2_NOMATCH_GOTO(4, nomatch);
243 NFT_PIPAPO_AVX2_STORE(map[i_ul], 4);
244
245 b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
246 if (last)
247 return b;
248
249 if (unlikely(ret == -1))
250 ret = b / XSAVE_YMM_SIZE;
251
252 continue;
253nomatch:
254 NFT_PIPAPO_AVX2_STORE(map[i_ul], 15);
255nothing:
256 ;
257 }
258
259 return ret;
260}
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279static int nft_pipapo_avx2_lookup_4b_4(unsigned long *map, unsigned long *fill,
280 struct nft_pipapo_field *f, int offset,
281 const u8 *pkt, bool first, bool last)
282{
283 int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
284 u8 pg[4] = { pkt[0] >> 4, pkt[0] & 0xf, pkt[1] >> 4, pkt[1] & 0xf };
285 unsigned long *lt = f->lt, bsize = f->bsize;
286
287 lt += offset * NFT_PIPAPO_LONGS_PER_M256;
288 for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) {
289 int i_ul = i * NFT_PIPAPO_LONGS_PER_M256;
290
291 if (first) {
292 NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 0, pg[0], bsize);
293 NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt, 1, pg[1], bsize);
294 NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt, 2, pg[2], bsize);
295 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 3, pg[3], bsize);
296 NFT_PIPAPO_AVX2_AND(4, 0, 1);
297 NFT_PIPAPO_AVX2_AND(5, 2, 3);
298 NFT_PIPAPO_AVX2_AND(7, 4, 5);
299 } else {
300 NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 0, pg[0], bsize);
301
302 NFT_PIPAPO_AVX2_LOAD(1, map[i_ul]);
303
304 NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt, 1, pg[1], bsize);
305 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 2, pg[2], bsize);
306 NFT_PIPAPO_AVX2_BUCKET_LOAD4(4, lt, 3, pg[3], bsize);
307 NFT_PIPAPO_AVX2_AND(5, 0, 1);
308
309 NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nothing);
310
311 NFT_PIPAPO_AVX2_AND(6, 2, 3);
312 NFT_PIPAPO_AVX2_AND(7, 4, 5);
313
314 NFT_PIPAPO_AVX2_AND(7, 6, 7);
315 }
316
317
318 NFT_PIPAPO_AVX2_NOMATCH_GOTO(7, nomatch);
319 NFT_PIPAPO_AVX2_STORE(map[i_ul], 7);
320
321 b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
322 if (last)
323 return b;
324
325 if (unlikely(ret == -1))
326 ret = b / XSAVE_YMM_SIZE;
327
328 continue;
329nomatch:
330 NFT_PIPAPO_AVX2_STORE(map[i_ul], 15);
331nothing:
332 ;
333 }
334
335 return ret;
336}
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355static int nft_pipapo_avx2_lookup_4b_8(unsigned long *map, unsigned long *fill,
356 struct nft_pipapo_field *f, int offset,
357 const u8 *pkt, bool first, bool last)
358{
359 u8 pg[8] = { pkt[0] >> 4, pkt[0] & 0xf, pkt[1] >> 4, pkt[1] & 0xf,
360 pkt[2] >> 4, pkt[2] & 0xf, pkt[3] >> 4, pkt[3] & 0xf,
361 };
362 int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
363 unsigned long *lt = f->lt, bsize = f->bsize;
364
365 lt += offset * NFT_PIPAPO_LONGS_PER_M256;
366 for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) {
367 int i_ul = i * NFT_PIPAPO_LONGS_PER_M256;
368
369 if (first) {
370 NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 0, pg[0], bsize);
371 NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt, 1, pg[1], bsize);
372 NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt, 2, pg[2], bsize);
373 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 3, pg[3], bsize);
374 NFT_PIPAPO_AVX2_BUCKET_LOAD4(4, lt, 4, pg[4], bsize);
375 NFT_PIPAPO_AVX2_AND(5, 0, 1);
376 NFT_PIPAPO_AVX2_BUCKET_LOAD4(6, lt, 5, pg[5], bsize);
377 NFT_PIPAPO_AVX2_BUCKET_LOAD4(7, lt, 6, pg[6], bsize);
378 NFT_PIPAPO_AVX2_AND(8, 2, 3);
379 NFT_PIPAPO_AVX2_AND(9, 4, 5);
380 NFT_PIPAPO_AVX2_BUCKET_LOAD4(10, lt, 7, pg[7], bsize);
381 NFT_PIPAPO_AVX2_AND(11, 6, 7);
382 NFT_PIPAPO_AVX2_AND(12, 8, 9);
383 NFT_PIPAPO_AVX2_AND(13, 10, 11);
384
385
386 NFT_PIPAPO_AVX2_AND(1, 12, 13);
387 } else {
388 NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 0, pg[0], bsize);
389 NFT_PIPAPO_AVX2_LOAD(1, map[i_ul]);
390 NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt, 1, pg[1], bsize);
391 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 2, pg[2], bsize);
392 NFT_PIPAPO_AVX2_BUCKET_LOAD4(4, lt, 3, pg[3], bsize);
393
394 NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nothing);
395
396 NFT_PIPAPO_AVX2_AND(5, 0, 1);
397 NFT_PIPAPO_AVX2_BUCKET_LOAD4(6, lt, 4, pg[4], bsize);
398 NFT_PIPAPO_AVX2_BUCKET_LOAD4(7, lt, 5, pg[5], bsize);
399 NFT_PIPAPO_AVX2_AND(8, 2, 3);
400 NFT_PIPAPO_AVX2_BUCKET_LOAD4(9, lt, 6, pg[6], bsize);
401 NFT_PIPAPO_AVX2_AND(10, 4, 5);
402 NFT_PIPAPO_AVX2_BUCKET_LOAD4(11, lt, 7, pg[7], bsize);
403 NFT_PIPAPO_AVX2_AND(12, 6, 7);
404 NFT_PIPAPO_AVX2_AND(13, 8, 9);
405 NFT_PIPAPO_AVX2_AND(14, 10, 11);
406
407
408 NFT_PIPAPO_AVX2_AND(1, 12, 13);
409 NFT_PIPAPO_AVX2_AND(1, 1, 14);
410 }
411
412 NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nomatch);
413 NFT_PIPAPO_AVX2_STORE(map[i_ul], 1);
414
415 b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
416 if (last)
417 return b;
418
419 if (unlikely(ret == -1))
420 ret = b / XSAVE_YMM_SIZE;
421
422 continue;
423
424nomatch:
425 NFT_PIPAPO_AVX2_STORE(map[i_ul], 15);
426nothing:
427 ;
428 }
429
430 return ret;
431}
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450static int nft_pipapo_avx2_lookup_4b_12(unsigned long *map, unsigned long *fill,
451 struct nft_pipapo_field *f, int offset,
452 const u8 *pkt, bool first, bool last)
453{
454 u8 pg[12] = { pkt[0] >> 4, pkt[0] & 0xf, pkt[1] >> 4, pkt[1] & 0xf,
455 pkt[2] >> 4, pkt[2] & 0xf, pkt[3] >> 4, pkt[3] & 0xf,
456 pkt[4] >> 4, pkt[4] & 0xf, pkt[5] >> 4, pkt[5] & 0xf,
457 };
458 int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
459 unsigned long *lt = f->lt, bsize = f->bsize;
460
461 lt += offset * NFT_PIPAPO_LONGS_PER_M256;
462 for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) {
463 int i_ul = i * NFT_PIPAPO_LONGS_PER_M256;
464
465 if (!first)
466 NFT_PIPAPO_AVX2_LOAD(0, map[i_ul]);
467
468 NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt, 0, pg[0], bsize);
469 NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt, 1, pg[1], bsize);
470 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 2, pg[2], bsize);
471
472 if (!first) {
473 NFT_PIPAPO_AVX2_NOMATCH_GOTO(0, nothing);
474 NFT_PIPAPO_AVX2_AND(1, 1, 0);
475 }
476
477 NFT_PIPAPO_AVX2_BUCKET_LOAD4(4, lt, 3, pg[3], bsize);
478 NFT_PIPAPO_AVX2_BUCKET_LOAD4(5, lt, 4, pg[4], bsize);
479 NFT_PIPAPO_AVX2_AND(6, 2, 3);
480 NFT_PIPAPO_AVX2_BUCKET_LOAD4(7, lt, 5, pg[5], bsize);
481 NFT_PIPAPO_AVX2_BUCKET_LOAD4(8, lt, 6, pg[6], bsize);
482 NFT_PIPAPO_AVX2_AND(9, 1, 4);
483 NFT_PIPAPO_AVX2_BUCKET_LOAD4(10, lt, 7, pg[7], bsize);
484 NFT_PIPAPO_AVX2_AND(11, 5, 6);
485 NFT_PIPAPO_AVX2_BUCKET_LOAD4(12, lt, 8, pg[8], bsize);
486 NFT_PIPAPO_AVX2_AND(13, 7, 8);
487 NFT_PIPAPO_AVX2_BUCKET_LOAD4(14, lt, 9, pg[9], bsize);
488
489 NFT_PIPAPO_AVX2_AND(0, 9, 10);
490 NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt, 10, pg[10], bsize);
491 NFT_PIPAPO_AVX2_AND(2, 11, 12);
492 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 11, pg[11], bsize);
493 NFT_PIPAPO_AVX2_AND(4, 13, 14);
494 NFT_PIPAPO_AVX2_AND(5, 0, 1);
495
496 NFT_PIPAPO_AVX2_AND(6, 2, 3);
497
498
499 NFT_PIPAPO_AVX2_AND(7, 4, 5);
500 NFT_PIPAPO_AVX2_AND(8, 6, 7);
501
502 NFT_PIPAPO_AVX2_NOMATCH_GOTO(8, nomatch);
503 NFT_PIPAPO_AVX2_STORE(map[i_ul], 8);
504
505 b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
506 if (last)
507 return b;
508
509 if (unlikely(ret == -1))
510 ret = b / XSAVE_YMM_SIZE;
511
512 continue;
513nomatch:
514 NFT_PIPAPO_AVX2_STORE(map[i_ul], 15);
515nothing:
516 ;
517 }
518
519 return ret;
520}
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539static int nft_pipapo_avx2_lookup_4b_32(unsigned long *map, unsigned long *fill,
540 struct nft_pipapo_field *f, int offset,
541 const u8 *pkt, bool first, bool last)
542{
543 u8 pg[32] = { pkt[0] >> 4, pkt[0] & 0xf, pkt[1] >> 4, pkt[1] & 0xf,
544 pkt[2] >> 4, pkt[2] & 0xf, pkt[3] >> 4, pkt[3] & 0xf,
545 pkt[4] >> 4, pkt[4] & 0xf, pkt[5] >> 4, pkt[5] & 0xf,
546 pkt[6] >> 4, pkt[6] & 0xf, pkt[7] >> 4, pkt[7] & 0xf,
547 pkt[8] >> 4, pkt[8] & 0xf, pkt[9] >> 4, pkt[9] & 0xf,
548 pkt[10] >> 4, pkt[10] & 0xf, pkt[11] >> 4, pkt[11] & 0xf,
549 pkt[12] >> 4, pkt[12] & 0xf, pkt[13] >> 4, pkt[13] & 0xf,
550 pkt[14] >> 4, pkt[14] & 0xf, pkt[15] >> 4, pkt[15] & 0xf,
551 };
552 int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
553 unsigned long *lt = f->lt, bsize = f->bsize;
554
555 lt += offset * NFT_PIPAPO_LONGS_PER_M256;
556 for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) {
557 int i_ul = i * NFT_PIPAPO_LONGS_PER_M256;
558
559 if (!first)
560 NFT_PIPAPO_AVX2_LOAD(0, map[i_ul]);
561
562 NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt, 0, pg[0], bsize);
563 NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt, 1, pg[1], bsize);
564 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 2, pg[2], bsize);
565 NFT_PIPAPO_AVX2_BUCKET_LOAD4(4, lt, 3, pg[3], bsize);
566 if (!first) {
567 NFT_PIPAPO_AVX2_NOMATCH_GOTO(0, nothing);
568 NFT_PIPAPO_AVX2_AND(1, 1, 0);
569 }
570
571 NFT_PIPAPO_AVX2_AND(5, 2, 3);
572 NFT_PIPAPO_AVX2_BUCKET_LOAD4(6, lt, 4, pg[4], bsize);
573 NFT_PIPAPO_AVX2_BUCKET_LOAD4(7, lt, 5, pg[5], bsize);
574 NFT_PIPAPO_AVX2_AND(8, 1, 4);
575 NFT_PIPAPO_AVX2_BUCKET_LOAD4(9, lt, 6, pg[6], bsize);
576 NFT_PIPAPO_AVX2_AND(10, 5, 6);
577 NFT_PIPAPO_AVX2_BUCKET_LOAD4(11, lt, 7, pg[7], bsize);
578 NFT_PIPAPO_AVX2_AND(12, 7, 8);
579 NFT_PIPAPO_AVX2_BUCKET_LOAD4(13, lt, 8, pg[8], bsize);
580 NFT_PIPAPO_AVX2_AND(14, 9, 10);
581
582 NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 9, pg[9], bsize);
583 NFT_PIPAPO_AVX2_AND(1, 11, 12);
584 NFT_PIPAPO_AVX2_BUCKET_LOAD4(2, lt, 10, pg[10], bsize);
585 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 11, pg[11], bsize);
586 NFT_PIPAPO_AVX2_AND(4, 13, 14);
587 NFT_PIPAPO_AVX2_BUCKET_LOAD4(5, lt, 12, pg[12], bsize);
588 NFT_PIPAPO_AVX2_BUCKET_LOAD4(6, lt, 13, pg[13], bsize);
589 NFT_PIPAPO_AVX2_AND(7, 0, 1);
590 NFT_PIPAPO_AVX2_BUCKET_LOAD4(8, lt, 14, pg[14], bsize);
591 NFT_PIPAPO_AVX2_AND(9, 2, 3);
592 NFT_PIPAPO_AVX2_BUCKET_LOAD4(10, lt, 15, pg[15], bsize);
593 NFT_PIPAPO_AVX2_AND(11, 4, 5);
594 NFT_PIPAPO_AVX2_BUCKET_LOAD4(12, lt, 16, pg[16], bsize);
595 NFT_PIPAPO_AVX2_AND(13, 6, 7);
596 NFT_PIPAPO_AVX2_BUCKET_LOAD4(14, lt, 17, pg[17], bsize);
597
598 NFT_PIPAPO_AVX2_AND(0, 8, 9);
599 NFT_PIPAPO_AVX2_BUCKET_LOAD4(1, lt, 18, pg[18], bsize);
600 NFT_PIPAPO_AVX2_AND(2, 10, 11);
601 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 19, pg[19], bsize);
602 NFT_PIPAPO_AVX2_AND(4, 12, 13);
603 NFT_PIPAPO_AVX2_BUCKET_LOAD4(5, lt, 20, pg[20], bsize);
604 NFT_PIPAPO_AVX2_AND(6, 14, 0);
605 NFT_PIPAPO_AVX2_AND(7, 1, 2);
606 NFT_PIPAPO_AVX2_BUCKET_LOAD4(8, lt, 21, pg[21], bsize);
607 NFT_PIPAPO_AVX2_AND(9, 3, 4);
608 NFT_PIPAPO_AVX2_BUCKET_LOAD4(10, lt, 22, pg[22], bsize);
609 NFT_PIPAPO_AVX2_AND(11, 5, 6);
610 NFT_PIPAPO_AVX2_BUCKET_LOAD4(12, lt, 23, pg[23], bsize);
611 NFT_PIPAPO_AVX2_AND(13, 7, 8);
612
613 NFT_PIPAPO_AVX2_BUCKET_LOAD4(14, lt, 24, pg[24], bsize);
614 NFT_PIPAPO_AVX2_BUCKET_LOAD4(0, lt, 25, pg[25], bsize);
615 NFT_PIPAPO_AVX2_AND(1, 9, 10);
616 NFT_PIPAPO_AVX2_AND(2, 11, 12);
617 NFT_PIPAPO_AVX2_BUCKET_LOAD4(3, lt, 26, pg[26], bsize);
618 NFT_PIPAPO_AVX2_AND(4, 13, 14);
619 NFT_PIPAPO_AVX2_BUCKET_LOAD4(5, lt, 27, pg[27], bsize);
620 NFT_PIPAPO_AVX2_AND(6, 0, 1);
621 NFT_PIPAPO_AVX2_BUCKET_LOAD4(7, lt, 28, pg[28], bsize);
622 NFT_PIPAPO_AVX2_BUCKET_LOAD4(8, lt, 29, pg[29], bsize);
623 NFT_PIPAPO_AVX2_AND(9, 2, 3);
624 NFT_PIPAPO_AVX2_BUCKET_LOAD4(10, lt, 30, pg[30], bsize);
625 NFT_PIPAPO_AVX2_AND(11, 4, 5);
626 NFT_PIPAPO_AVX2_BUCKET_LOAD4(12, lt, 31, pg[31], bsize);
627
628 NFT_PIPAPO_AVX2_AND(0, 6, 7);
629 NFT_PIPAPO_AVX2_AND(1, 8, 9);
630 NFT_PIPAPO_AVX2_AND(2, 10, 11);
631 NFT_PIPAPO_AVX2_AND(3, 12, 0);
632
633
634 NFT_PIPAPO_AVX2_AND(4, 1, 2);
635 NFT_PIPAPO_AVX2_AND(5, 3, 4);
636
637 NFT_PIPAPO_AVX2_NOMATCH_GOTO(5, nomatch);
638 NFT_PIPAPO_AVX2_STORE(map[i_ul], 5);
639
640 b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
641 if (last)
642 return b;
643
644 if (unlikely(ret == -1))
645 ret = b / XSAVE_YMM_SIZE;
646
647 continue;
648nomatch:
649 NFT_PIPAPO_AVX2_STORE(map[i_ul], 15);
650nothing:
651 ;
652 }
653
654 return ret;
655}
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674static int nft_pipapo_avx2_lookup_8b_1(unsigned long *map, unsigned long *fill,
675 struct nft_pipapo_field *f, int offset,
676 const u8 *pkt, bool first, bool last)
677{
678 int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
679 unsigned long *lt = f->lt, bsize = f->bsize;
680
681 lt += offset * NFT_PIPAPO_LONGS_PER_M256;
682 for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) {
683 int i_ul = i * NFT_PIPAPO_LONGS_PER_M256;
684
685 if (first) {
686 NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt, 0, pkt[0], bsize);
687 } else {
688 NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt, 0, pkt[0], bsize);
689 NFT_PIPAPO_AVX2_LOAD(1, map[i_ul]);
690 NFT_PIPAPO_AVX2_AND(2, 0, 1);
691 NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nothing);
692 }
693
694 NFT_PIPAPO_AVX2_NOMATCH_GOTO(2, nomatch);
695 NFT_PIPAPO_AVX2_STORE(map[i_ul], 2);
696
697 b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
698 if (last)
699 return b;
700
701 if (unlikely(ret == -1))
702 ret = b / XSAVE_YMM_SIZE;
703
704 continue;
705nomatch:
706 NFT_PIPAPO_AVX2_STORE(map[i_ul], 15);
707nothing:
708 ;
709 }
710
711 return ret;
712}
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731static int nft_pipapo_avx2_lookup_8b_2(unsigned long *map, unsigned long *fill,
732 struct nft_pipapo_field *f, int offset,
733 const u8 *pkt, bool first, bool last)
734{
735 int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
736 unsigned long *lt = f->lt, bsize = f->bsize;
737
738 lt += offset * NFT_PIPAPO_LONGS_PER_M256;
739 for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) {
740 int i_ul = i * NFT_PIPAPO_LONGS_PER_M256;
741
742 if (first) {
743 NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt, 0, pkt[0], bsize);
744 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 1, pkt[1], bsize);
745 NFT_PIPAPO_AVX2_AND(4, 0, 1);
746 } else {
747 NFT_PIPAPO_AVX2_LOAD(0, map[i_ul]);
748 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 0, pkt[0], bsize);
749 NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt, 1, pkt[1], bsize);
750
751
752 NFT_PIPAPO_AVX2_AND(3, 0, 1);
753 NFT_PIPAPO_AVX2_NOMATCH_GOTO(0, nothing);
754 NFT_PIPAPO_AVX2_AND(4, 3, 2);
755 }
756
757
758 NFT_PIPAPO_AVX2_NOMATCH_GOTO(4, nomatch);
759 NFT_PIPAPO_AVX2_STORE(map[i_ul], 4);
760
761 b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
762 if (last)
763 return b;
764
765 if (unlikely(ret == -1))
766 ret = b / XSAVE_YMM_SIZE;
767
768 continue;
769nomatch:
770 NFT_PIPAPO_AVX2_STORE(map[i_ul], 15);
771nothing:
772 ;
773 }
774
775 return ret;
776}
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795static int nft_pipapo_avx2_lookup_8b_4(unsigned long *map, unsigned long *fill,
796 struct nft_pipapo_field *f, int offset,
797 const u8 *pkt, bool first, bool last)
798{
799 int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
800 unsigned long *lt = f->lt, bsize = f->bsize;
801
802 lt += offset * NFT_PIPAPO_LONGS_PER_M256;
803 for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) {
804 int i_ul = i * NFT_PIPAPO_LONGS_PER_M256;
805
806 if (first) {
807 NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt, 0, pkt[0], bsize);
808 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 1, pkt[1], bsize);
809 NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt, 2, pkt[2], bsize);
810 NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt, 3, pkt[3], bsize);
811
812
813 NFT_PIPAPO_AVX2_AND(4, 0, 1);
814 NFT_PIPAPO_AVX2_AND(5, 2, 3);
815 NFT_PIPAPO_AVX2_AND(0, 4, 5);
816 } else {
817 NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt, 0, pkt[0], bsize);
818 NFT_PIPAPO_AVX2_LOAD(1, map[i_ul]);
819 NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt, 1, pkt[1], bsize);
820 NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt, 2, pkt[2], bsize);
821 NFT_PIPAPO_AVX2_BUCKET_LOAD8(4, lt, 3, pkt[3], bsize);
822
823 NFT_PIPAPO_AVX2_AND(5, 0, 1);
824 NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nothing);
825 NFT_PIPAPO_AVX2_AND(6, 2, 3);
826
827
828 NFT_PIPAPO_AVX2_AND(7, 4, 5);
829 NFT_PIPAPO_AVX2_AND(0, 6, 7);
830 }
831
832 NFT_PIPAPO_AVX2_NOMATCH_GOTO(0, nomatch);
833 NFT_PIPAPO_AVX2_STORE(map[i_ul], 0);
834
835 b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
836 if (last)
837 return b;
838
839 if (unlikely(ret == -1))
840 ret = b / XSAVE_YMM_SIZE;
841
842 continue;
843
844nomatch:
845 NFT_PIPAPO_AVX2_STORE(map[i_ul], 15);
846nothing:
847 ;
848 }
849
850 return ret;
851}
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870static int nft_pipapo_avx2_lookup_8b_6(unsigned long *map, unsigned long *fill,
871 struct nft_pipapo_field *f, int offset,
872 const u8 *pkt, bool first, bool last)
873{
874 int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
875 unsigned long *lt = f->lt, bsize = f->bsize;
876
877 lt += offset * NFT_PIPAPO_LONGS_PER_M256;
878 for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) {
879 int i_ul = i * NFT_PIPAPO_LONGS_PER_M256;
880
881 if (first) {
882 NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt, 0, pkt[0], bsize);
883 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 1, pkt[1], bsize);
884 NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt, 2, pkt[2], bsize);
885 NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt, 3, pkt[3], bsize);
886 NFT_PIPAPO_AVX2_BUCKET_LOAD8(4, lt, 4, pkt[4], bsize);
887
888 NFT_PIPAPO_AVX2_AND(5, 0, 1);
889 NFT_PIPAPO_AVX2_BUCKET_LOAD8(6, lt, 6, pkt[5], bsize);
890 NFT_PIPAPO_AVX2_AND(7, 2, 3);
891
892
893 NFT_PIPAPO_AVX2_AND(0, 4, 5);
894 NFT_PIPAPO_AVX2_AND(1, 6, 7);
895 NFT_PIPAPO_AVX2_AND(4, 0, 1);
896 } else {
897 NFT_PIPAPO_AVX2_BUCKET_LOAD8(0, lt, 0, pkt[0], bsize);
898 NFT_PIPAPO_AVX2_LOAD(1, map[i_ul]);
899 NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt, 1, pkt[1], bsize);
900 NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt, 2, pkt[2], bsize);
901 NFT_PIPAPO_AVX2_BUCKET_LOAD8(4, lt, 3, pkt[3], bsize);
902
903 NFT_PIPAPO_AVX2_AND(5, 0, 1);
904 NFT_PIPAPO_AVX2_NOMATCH_GOTO(1, nothing);
905
906 NFT_PIPAPO_AVX2_AND(6, 2, 3);
907 NFT_PIPAPO_AVX2_BUCKET_LOAD8(7, lt, 4, pkt[4], bsize);
908 NFT_PIPAPO_AVX2_AND(0, 4, 5);
909 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 5, pkt[5], bsize);
910 NFT_PIPAPO_AVX2_AND(2, 6, 7);
911
912
913 NFT_PIPAPO_AVX2_AND(3, 0, 1);
914 NFT_PIPAPO_AVX2_AND(4, 2, 3);
915 }
916
917 NFT_PIPAPO_AVX2_NOMATCH_GOTO(4, nomatch);
918 NFT_PIPAPO_AVX2_STORE(map[i_ul], 4);
919
920 b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
921 if (last)
922 return b;
923
924 if (unlikely(ret == -1))
925 ret = b / XSAVE_YMM_SIZE;
926
927 continue;
928
929nomatch:
930 NFT_PIPAPO_AVX2_STORE(map[i_ul], 15);
931nothing:
932 ;
933 }
934
935 return ret;
936}
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955static int nft_pipapo_avx2_lookup_8b_16(unsigned long *map, unsigned long *fill,
956 struct nft_pipapo_field *f, int offset,
957 const u8 *pkt, bool first, bool last)
958{
959 int i, ret = -1, m256_size = f->bsize / NFT_PIPAPO_LONGS_PER_M256, b;
960 unsigned long *lt = f->lt, bsize = f->bsize;
961
962 lt += offset * NFT_PIPAPO_LONGS_PER_M256;
963 for (i = offset; i < m256_size; i++, lt += NFT_PIPAPO_LONGS_PER_M256) {
964 int i_ul = i * NFT_PIPAPO_LONGS_PER_M256;
965
966 if (!first)
967 NFT_PIPAPO_AVX2_LOAD(0, map[i_ul]);
968
969 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 0, pkt[0], bsize);
970 NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt, 1, pkt[1], bsize);
971 NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt, 2, pkt[2], bsize);
972 if (!first) {
973 NFT_PIPAPO_AVX2_NOMATCH_GOTO(0, nothing);
974 NFT_PIPAPO_AVX2_AND(1, 1, 0);
975 }
976 NFT_PIPAPO_AVX2_BUCKET_LOAD8(4, lt, 3, pkt[3], bsize);
977
978 NFT_PIPAPO_AVX2_BUCKET_LOAD8(5, lt, 4, pkt[4], bsize);
979 NFT_PIPAPO_AVX2_AND(6, 1, 2);
980 NFT_PIPAPO_AVX2_BUCKET_LOAD8(7, lt, 5, pkt[5], bsize);
981 NFT_PIPAPO_AVX2_AND(0, 3, 4);
982 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 6, pkt[6], bsize);
983
984 NFT_PIPAPO_AVX2_BUCKET_LOAD8(2, lt, 7, pkt[7], bsize);
985 NFT_PIPAPO_AVX2_AND(3, 5, 6);
986 NFT_PIPAPO_AVX2_AND(4, 0, 1);
987 NFT_PIPAPO_AVX2_BUCKET_LOAD8(5, lt, 8, pkt[8], bsize);
988
989 NFT_PIPAPO_AVX2_AND(6, 2, 3);
990 NFT_PIPAPO_AVX2_BUCKET_LOAD8(7, lt, 9, pkt[9], bsize);
991 NFT_PIPAPO_AVX2_AND(0, 4, 5);
992 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 10, pkt[10], bsize);
993 NFT_PIPAPO_AVX2_AND(2, 6, 7);
994 NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt, 11, pkt[11], bsize);
995 NFT_PIPAPO_AVX2_AND(4, 0, 1);
996 NFT_PIPAPO_AVX2_BUCKET_LOAD8(5, lt, 12, pkt[12], bsize);
997 NFT_PIPAPO_AVX2_AND(6, 2, 3);
998 NFT_PIPAPO_AVX2_BUCKET_LOAD8(7, lt, 13, pkt[13], bsize);
999 NFT_PIPAPO_AVX2_AND(0, 4, 5);
1000 NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 14, pkt[14], bsize);
1001 NFT_PIPAPO_AVX2_AND(2, 6, 7);
1002 NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt, 15, pkt[15], bsize);
1003 NFT_PIPAPO_AVX2_AND(4, 0, 1);
1004
1005
1006 NFT_PIPAPO_AVX2_AND(5, 2, 3);
1007 NFT_PIPAPO_AVX2_AND(6, 4, 5);
1008
1009 NFT_PIPAPO_AVX2_NOMATCH_GOTO(6, nomatch);
1010 NFT_PIPAPO_AVX2_STORE(map[i_ul], 6);
1011
1012 b = nft_pipapo_avx2_refill(i_ul, &map[i_ul], fill, f->mt, last);
1013 if (last)
1014 return b;
1015
1016 if (unlikely(ret == -1))
1017 ret = b / XSAVE_YMM_SIZE;
1018
1019 continue;
1020
1021nomatch:
1022 NFT_PIPAPO_AVX2_STORE(map[i_ul], 15);
1023nothing:
1024 ;
1025 }
1026
1027 return ret;
1028}
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047static int nft_pipapo_avx2_lookup_slow(unsigned long *map, unsigned long *fill,
1048 struct nft_pipapo_field *f, int offset,
1049 const u8 *pkt, bool first, bool last)
1050{
1051 unsigned long *lt = f->lt, bsize = f->bsize;
1052 int i, ret = -1, b;
1053
1054 lt += offset * NFT_PIPAPO_LONGS_PER_M256;
1055
1056 if (first)
1057 memset(map, 0xff, bsize * sizeof(*map));
1058
1059 for (i = offset; i < bsize; i++) {
1060 if (f->bb == 8)
1061 pipapo_and_field_buckets_8bit(f, map, pkt);
1062 else
1063 pipapo_and_field_buckets_4bit(f, map, pkt);
1064 NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4;
1065
1066 b = pipapo_refill(map, bsize, f->rules, fill, f->mt, last);
1067
1068 if (last)
1069 return b;
1070
1071 if (ret == -1)
1072 ret = b / XSAVE_YMM_SIZE;
1073 }
1074
1075 return ret;
1076}
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086bool nft_pipapo_avx2_estimate(const struct nft_set_desc *desc, u32 features,
1087 struct nft_set_estimate *est)
1088{
1089 if (!(features & NFT_SET_INTERVAL) ||
1090 desc->field_count < NFT_PIPAPO_MIN_FIELDS)
1091 return false;
1092
1093 if (!boot_cpu_has(X86_FEATURE_AVX2) || !boot_cpu_has(X86_FEATURE_AVX))
1094 return false;
1095
1096 est->size = pipapo_estimate_size(desc);
1097 if (!est->size)
1098 return false;
1099
1100 est->lookup = NFT_SET_CLASS_O_LOG_N;
1101
1102 est->space = NFT_SET_CLASS_O_N;
1103
1104 return true;
1105}
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
1122 const u32 *key, const struct nft_set_ext **ext)
1123{
1124 struct nft_pipapo *priv = nft_set_priv(set);
1125 unsigned long *res, *fill, *scratch;
1126 u8 genmask = nft_genmask_cur(net);
1127 const u8 *rp = (const u8 *)key;
1128 struct nft_pipapo_match *m;
1129 struct nft_pipapo_field *f;
1130 bool map_index;
1131 int i, ret = 0;
1132
1133 if (unlikely(!irq_fpu_usable()))
1134 return nft_pipapo_lookup(net, set, key, ext);
1135
1136 m = rcu_dereference(priv->match);
1137
1138
1139
1140
1141
1142
1143
1144 kernel_fpu_begin_mask(0);
1145
1146 scratch = *raw_cpu_ptr(m->scratch_aligned);
1147 if (unlikely(!scratch)) {
1148 kernel_fpu_end();
1149 return false;
1150 }
1151 map_index = raw_cpu_read(nft_pipapo_avx2_scratch_index);
1152
1153 res = scratch + (map_index ? m->bsize_max : 0);
1154 fill = scratch + (map_index ? 0 : m->bsize_max);
1155
1156
1157
1158 nft_pipapo_avx2_prepare();
1159
1160next_match:
1161 nft_pipapo_for_each_field(f, i, m) {
1162 bool last = i == m->field_count - 1, first = !i;
1163
1164#define NFT_SET_PIPAPO_AVX2_LOOKUP(b, n) \
1165 (ret = nft_pipapo_avx2_lookup_##b##b_##n(res, fill, f, \
1166 ret, rp, \
1167 first, last))
1168
1169 if (likely(f->bb == 8)) {
1170 if (f->groups == 1) {
1171 NFT_SET_PIPAPO_AVX2_LOOKUP(8, 1);
1172 } else if (f->groups == 2) {
1173 NFT_SET_PIPAPO_AVX2_LOOKUP(8, 2);
1174 } else if (f->groups == 4) {
1175 NFT_SET_PIPAPO_AVX2_LOOKUP(8, 4);
1176 } else if (f->groups == 6) {
1177 NFT_SET_PIPAPO_AVX2_LOOKUP(8, 6);
1178 } else if (f->groups == 16) {
1179 NFT_SET_PIPAPO_AVX2_LOOKUP(8, 16);
1180 } else {
1181 ret = nft_pipapo_avx2_lookup_slow(res, fill, f,
1182 ret, rp,
1183 first, last);
1184 }
1185 } else {
1186 if (f->groups == 2) {
1187 NFT_SET_PIPAPO_AVX2_LOOKUP(4, 2);
1188 } else if (f->groups == 4) {
1189 NFT_SET_PIPAPO_AVX2_LOOKUP(4, 4);
1190 } else if (f->groups == 8) {
1191 NFT_SET_PIPAPO_AVX2_LOOKUP(4, 8);
1192 } else if (f->groups == 12) {
1193 NFT_SET_PIPAPO_AVX2_LOOKUP(4, 12);
1194 } else if (f->groups == 32) {
1195 NFT_SET_PIPAPO_AVX2_LOOKUP(4, 32);
1196 } else {
1197 ret = nft_pipapo_avx2_lookup_slow(res, fill, f,
1198 ret, rp,
1199 first, last);
1200 }
1201 }
1202 NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4;
1203
1204#undef NFT_SET_PIPAPO_AVX2_LOOKUP
1205
1206 if (ret < 0)
1207 goto out;
1208
1209 if (last) {
1210 *ext = &f->mt[ret].e->ext;
1211 if (unlikely(nft_set_elem_expired(*ext) ||
1212 !nft_set_elem_active(*ext, genmask))) {
1213 ret = 0;
1214 goto next_match;
1215 }
1216
1217 goto out;
1218 }
1219
1220 swap(res, fill);
1221 rp += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
1222 }
1223
1224out:
1225 if (i % 2)
1226 raw_cpu_write(nft_pipapo_avx2_scratch_index, !map_index);
1227 kernel_fpu_end();
1228
1229 return ret >= 0;
1230}
1231