1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331#include <linux/kernel.h>
332#include <linux/init.h>
333#include <linux/log2.h>
334#include <linux/module.h>
335#include <linux/netlink.h>
336#include <linux/netfilter.h>
337#include <linux/netfilter/nf_tables.h>
338#include <net/netfilter/nf_tables_core.h>
339#include <uapi/linux/netfilter/nf_tables.h>
340#include <net/ipv6.h>
341#include <linux/bitmap.h>
342#include <linux/bitops.h>
343
344
345#define NFT_PIPAPO_MAX_FIELDS NFT_REG32_COUNT
346
347
348#define NFT_PIPAPO_MAX_BYTES (sizeof(struct in6_addr))
349#define NFT_PIPAPO_MAX_BITS (NFT_PIPAPO_MAX_BYTES * BITS_PER_BYTE)
350
351
352#define NFT_PIPAPO_GROUP_BITS 4
353#define NFT_PIPAPO_GROUPS_PER_BYTE (BITS_PER_BYTE / NFT_PIPAPO_GROUP_BITS)
354
355
356#define NFT_PIPAPO_GROUPS_PADDED_SIZE(x) \
357 (round_up((x) / NFT_PIPAPO_GROUPS_PER_BYTE, sizeof(u32)))
358#define NFT_PIPAPO_GROUPS_PADDING(x) \
359 (NFT_PIPAPO_GROUPS_PADDED_SIZE((x)) - (x) / NFT_PIPAPO_GROUPS_PER_BYTE)
360
361
362#define NFT_PIPAPO_BUCKETS (1 << NFT_PIPAPO_GROUP_BITS)
363
364
365#define NFT_PIPAPO_MAP_NBITS (const_ilog2(NFT_PIPAPO_MAX_BITS * 2))
366
367
368
369
370#if BITS_PER_LONG == 64
371#define NFT_PIPAPO_MAP_TOBITS 32
372#else
373#define NFT_PIPAPO_MAP_TOBITS (BITS_PER_LONG - NFT_PIPAPO_MAP_NBITS)
374#endif
375
376
377#define NFT_PIPAPO_RULE0_MAX ((1UL << (NFT_PIPAPO_MAP_TOBITS - 1)) \
378 - (1UL << NFT_PIPAPO_MAP_NBITS))
379
380#define nft_pipapo_for_each_field(field, index, match) \
381 for ((field) = (match)->f, (index) = 0; \
382 (index) < (match)->field_count; \
383 (index)++, (field)++)
384
385
386
387
388
389
390
391union nft_pipapo_map_bucket {
392 struct {
393#if BITS_PER_LONG == 64
394 static_assert(NFT_PIPAPO_MAP_TOBITS <= 32);
395 u32 to;
396
397 static_assert(NFT_PIPAPO_MAP_NBITS <= 32);
398 u32 n;
399#else
400 unsigned long to:NFT_PIPAPO_MAP_TOBITS;
401 unsigned long n:NFT_PIPAPO_MAP_NBITS;
402#endif
403 };
404 struct nft_pipapo_elem *e;
405};
406
407
408
409
410
411
412
413
414
415struct nft_pipapo_field {
416 int groups;
417 unsigned long rules;
418 size_t bsize;
419 unsigned long *lt;
420 union nft_pipapo_map_bucket *mt;
421};
422
423
424
425
426
427
428
429
430
431struct nft_pipapo_match {
432 int field_count;
433 unsigned long * __percpu *scratch;
434 size_t bsize_max;
435 struct rcu_head rcu;
436 struct nft_pipapo_field f[0];
437};
438
439
440static DEFINE_PER_CPU(bool, nft_pipapo_scratch_index);
441
442
443
444
445
446
447
448
449
450
451struct nft_pipapo {
452 struct nft_pipapo_match __rcu *match;
453 struct nft_pipapo_match *clone;
454 int groups;
455 int width;
456 bool dirty;
457 unsigned long last_gc;
458};
459
460struct nft_pipapo_elem;
461
462
463
464
465
466struct nft_pipapo_elem {
467 struct nft_set_ext ext;
468};
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487static int pipapo_refill(unsigned long *map, int len, int rules,
488 unsigned long *dst, union nft_pipapo_map_bucket *mt,
489 bool match_only)
490{
491 unsigned long bitset;
492 int k, ret = -1;
493
494 for (k = 0; k < len; k++) {
495 bitset = map[k];
496 while (bitset) {
497 unsigned long t = bitset & -bitset;
498 int r = __builtin_ctzl(bitset);
499 int i = k * BITS_PER_LONG + r;
500
501 if (unlikely(i >= rules)) {
502 map[k] = 0;
503 return -1;
504 }
505
506 if (unlikely(match_only)) {
507 bitmap_clear(map, i, 1);
508 return i;
509 }
510
511 ret = 0;
512
513 bitmap_set(dst, mt[i].to, mt[i].n);
514
515 bitset ^= t;
516 }
517 map[k] = 0;
518 }
519
520 return ret;
521}
522
523
524
525
526
527
528
529
530
531
532
533
534static bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
535 const u32 *key, const struct nft_set_ext **ext)
536{
537 struct nft_pipapo *priv = nft_set_priv(set);
538 unsigned long *res_map, *fill_map;
539 u8 genmask = nft_genmask_cur(net);
540 const u8 *rp = (const u8 *)key;
541 struct nft_pipapo_match *m;
542 struct nft_pipapo_field *f;
543 bool map_index;
544 int i;
545
546 local_bh_disable();
547
548 map_index = raw_cpu_read(nft_pipapo_scratch_index);
549
550 m = rcu_dereference(priv->match);
551
552 if (unlikely(!m || !*raw_cpu_ptr(m->scratch)))
553 goto out;
554
555 res_map = *raw_cpu_ptr(m->scratch) + (map_index ? m->bsize_max : 0);
556 fill_map = *raw_cpu_ptr(m->scratch) + (map_index ? 0 : m->bsize_max);
557
558 memset(res_map, 0xff, m->bsize_max * sizeof(*res_map));
559
560 nft_pipapo_for_each_field(f, i, m) {
561 bool last = i == m->field_count - 1;
562 unsigned long *lt = f->lt;
563 int b, group;
564
565
566
567
568 for (group = 0; group < f->groups; group += 2) {
569 u8 v;
570
571 v = *rp >> 4;
572 __bitmap_and(res_map, res_map, lt + v * f->bsize,
573 f->bsize * BITS_PER_LONG);
574 lt += f->bsize * NFT_PIPAPO_BUCKETS;
575
576 v = *rp & 0x0f;
577 rp++;
578 __bitmap_and(res_map, res_map, lt + v * f->bsize,
579 f->bsize * BITS_PER_LONG);
580 lt += f->bsize * NFT_PIPAPO_BUCKETS;
581 }
582
583
584
585
586
587
588
589
590next_match:
591 b = pipapo_refill(res_map, f->bsize, f->rules, fill_map, f->mt,
592 last);
593 if (b < 0) {
594 raw_cpu_write(nft_pipapo_scratch_index, map_index);
595 local_bh_enable();
596
597 return false;
598 }
599
600 if (last) {
601 *ext = &f->mt[b].e->ext;
602 if (unlikely(nft_set_elem_expired(*ext) ||
603 !nft_set_elem_active(*ext, genmask)))
604 goto next_match;
605
606
607
608
609
610
611 raw_cpu_write(nft_pipapo_scratch_index, map_index);
612 local_bh_enable();
613
614 return true;
615 }
616
617
618
619
620
621 map_index = !map_index;
622 swap(res_map, fill_map);
623
624 rp += NFT_PIPAPO_GROUPS_PADDING(f->groups);
625 }
626
627out:
628 local_bh_enable();
629 return false;
630}
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645static struct nft_pipapo_elem *pipapo_get(const struct net *net,
646 const struct nft_set *set,
647 const u8 *data, u8 genmask)
648{
649 struct nft_pipapo_elem *ret = ERR_PTR(-ENOENT);
650 struct nft_pipapo *priv = nft_set_priv(set);
651 struct nft_pipapo_match *m = priv->clone;
652 unsigned long *res_map, *fill_map = NULL;
653 struct nft_pipapo_field *f;
654 int i;
655
656 res_map = kmalloc_array(m->bsize_max, sizeof(*res_map), GFP_ATOMIC);
657 if (!res_map) {
658 ret = ERR_PTR(-ENOMEM);
659 goto out;
660 }
661
662 fill_map = kcalloc(m->bsize_max, sizeof(*res_map), GFP_ATOMIC);
663 if (!fill_map) {
664 ret = ERR_PTR(-ENOMEM);
665 goto out;
666 }
667
668 memset(res_map, 0xff, m->bsize_max * sizeof(*res_map));
669
670 nft_pipapo_for_each_field(f, i, m) {
671 bool last = i == m->field_count - 1;
672 unsigned long *lt = f->lt;
673 int b, group;
674
675
676
677
678 for (group = 0; group < f->groups; group++) {
679 u8 v;
680
681 if (group % 2) {
682 v = *data & 0x0f;
683 data++;
684 } else {
685 v = *data >> 4;
686 }
687 __bitmap_and(res_map, res_map, lt + v * f->bsize,
688 f->bsize * BITS_PER_LONG);
689
690 lt += f->bsize * NFT_PIPAPO_BUCKETS;
691 }
692
693
694
695
696
697
698
699
700next_match:
701 b = pipapo_refill(res_map, f->bsize, f->rules, fill_map, f->mt,
702 last);
703 if (b < 0)
704 goto out;
705
706 if (last) {
707 if (nft_set_elem_expired(&f->mt[b].e->ext) ||
708 (genmask &&
709 !nft_set_elem_active(&f->mt[b].e->ext, genmask)))
710 goto next_match;
711
712 ret = f->mt[b].e;
713 goto out;
714 }
715
716 data += NFT_PIPAPO_GROUPS_PADDING(f->groups);
717
718
719
720
721
722
723 swap(res_map, fill_map);
724 }
725
726out:
727 kfree(fill_map);
728 kfree(res_map);
729 return ret;
730}
731
732
733
734
735
736
737
738
739void *nft_pipapo_get(const struct net *net, const struct nft_set *set,
740 const struct nft_set_elem *elem, unsigned int flags)
741{
742 return pipapo_get(net, set, (const u8 *)elem->key.val.data,
743 nft_genmask_cur(net));
744}
745
746
747
748
749
750
751
752
753
754
755
756
757
758static int pipapo_resize(struct nft_pipapo_field *f, int old_rules, int rules)
759{
760 long *new_lt = NULL, *new_p, *old_lt = f->lt, *old_p;
761 union nft_pipapo_map_bucket *new_mt, *old_mt = f->mt;
762 size_t new_bucket_size, copy;
763 int group, bucket;
764
765 new_bucket_size = DIV_ROUND_UP(rules, BITS_PER_LONG);
766
767 if (new_bucket_size == f->bsize)
768 goto mt;
769
770 if (new_bucket_size > f->bsize)
771 copy = f->bsize;
772 else
773 copy = new_bucket_size;
774
775 new_lt = kvzalloc(f->groups * NFT_PIPAPO_BUCKETS * new_bucket_size *
776 sizeof(*new_lt), GFP_KERNEL);
777 if (!new_lt)
778 return -ENOMEM;
779
780 new_p = new_lt;
781 old_p = old_lt;
782 for (group = 0; group < f->groups; group++) {
783 for (bucket = 0; bucket < NFT_PIPAPO_BUCKETS; bucket++) {
784 memcpy(new_p, old_p, copy * sizeof(*new_p));
785 new_p += copy;
786 old_p += copy;
787
788 if (new_bucket_size > f->bsize)
789 new_p += new_bucket_size - f->bsize;
790 else
791 old_p += f->bsize - new_bucket_size;
792 }
793 }
794
795mt:
796 new_mt = kvmalloc(rules * sizeof(*new_mt), GFP_KERNEL);
797 if (!new_mt) {
798 kvfree(new_lt);
799 return -ENOMEM;
800 }
801
802 memcpy(new_mt, f->mt, min(old_rules, rules) * sizeof(*new_mt));
803 if (rules > old_rules) {
804 memset(new_mt + old_rules, 0,
805 (rules - old_rules) * sizeof(*new_mt));
806 }
807
808 if (new_lt) {
809 f->bsize = new_bucket_size;
810 f->lt = new_lt;
811 kvfree(old_lt);
812 }
813
814 f->mt = new_mt;
815 kvfree(old_mt);
816
817 return 0;
818}
819
820
821
822
823
824
825
826
827static void pipapo_bucket_set(struct nft_pipapo_field *f, int rule, int group,
828 int v)
829{
830 unsigned long *pos;
831
832 pos = f->lt + f->bsize * NFT_PIPAPO_BUCKETS * group;
833 pos += f->bsize * v;
834
835 __set_bit(rule, pos);
836}
837
838
839
840
841
842
843
844
845
846
847
848
849static int pipapo_insert(struct nft_pipapo_field *f, const uint8_t *k,
850 int mask_bits)
851{
852 int rule = f->rules++, group, ret;
853
854 ret = pipapo_resize(f, f->rules - 1, f->rules);
855 if (ret)
856 return ret;
857
858 for (group = 0; group < f->groups; group++) {
859 int i, v;
860 u8 mask;
861
862 if (group % 2)
863 v = k[group / 2] & 0x0f;
864 else
865 v = k[group / 2] >> 4;
866
867 if (mask_bits >= (group + 1) * 4) {
868
869 pipapo_bucket_set(f, rule, group, v);
870 } else if (mask_bits <= group * 4) {
871
872 for (i = 0; i < NFT_PIPAPO_BUCKETS; i++)
873 pipapo_bucket_set(f, rule, group, i);
874 } else {
875
876 mask = 0x0f >> (mask_bits - group * 4);
877 for (i = 0; i < NFT_PIPAPO_BUCKETS; i++) {
878 if ((i & ~mask) == (v & ~mask))
879 pipapo_bucket_set(f, rule, group, i);
880 }
881 }
882 }
883
884 return 1;
885}
886
887
888
889
890
891
892
893
894
895
896
897static bool pipapo_step_diff(u8 *base, int step, int len)
898{
899
900#ifdef __BIG_ENDIAN__
901 return !(BIT(step % BITS_PER_BYTE) & base[step / BITS_PER_BYTE]);
902#else
903 return !(BIT(step % BITS_PER_BYTE) &
904 base[len - 1 - step / BITS_PER_BYTE]);
905#endif
906}
907
908
909
910
911
912
913
914
915
916
917
918
919static bool pipapo_step_after_end(const u8 *base, const u8 *end, int step,
920 int len)
921{
922 u8 tmp[NFT_PIPAPO_MAX_BYTES];
923 int i;
924
925 memcpy(tmp, base, len);
926
927
928 for (i = 0; i <= step; i++)
929#ifdef __BIG_ENDIAN__
930 tmp[i / BITS_PER_BYTE] |= BIT(i % BITS_PER_BYTE);
931#else
932 tmp[len - 1 - i / BITS_PER_BYTE] |= BIT(i % BITS_PER_BYTE);
933#endif
934
935 return memcmp(tmp, end, len) > 0;
936}
937
938
939
940
941
942
943
944static void pipapo_base_sum(u8 *base, int step, int len)
945{
946 bool carry = false;
947 int i;
948
949
950#ifdef __BIG_ENDIAN__
951 for (i = step / BITS_PER_BYTE; i < len; i++) {
952#else
953 for (i = len - 1 - step / BITS_PER_BYTE; i >= 0; i--) {
954#endif
955 if (carry)
956 base[i]++;
957 else
958 base[i] += 1 << (step % BITS_PER_BYTE);
959
960 if (base[i])
961 break;
962
963 carry = true;
964 }
965}
966
967
968
969
970
971
972
973
974
975
976
977
978
979static int pipapo_expand(struct nft_pipapo_field *f,
980 const u8 *start, const u8 *end, int len)
981{
982 int step, masks = 0, bytes = DIV_ROUND_UP(len, BITS_PER_BYTE);
983 u8 base[NFT_PIPAPO_MAX_BYTES];
984
985 memcpy(base, start, bytes);
986 while (memcmp(base, end, bytes) <= 0) {
987 int err;
988
989 step = 0;
990 while (pipapo_step_diff(base, step, bytes)) {
991 if (pipapo_step_after_end(base, end, step, bytes))
992 break;
993
994 step++;
995 if (step >= len) {
996 if (!masks) {
997 pipapo_insert(f, base, 0);
998 masks = 1;
999 }
1000 goto out;
1001 }
1002 }
1003
1004 err = pipapo_insert(f, base, len - step);
1005
1006 if (err < 0)
1007 return err;
1008
1009 masks++;
1010 pipapo_base_sum(base, step, bytes);
1011 }
1012out:
1013 return masks;
1014}
1015
1016
1017
1018
1019
1020
1021
1022
1023static void pipapo_map(struct nft_pipapo_match *m,
1024 union nft_pipapo_map_bucket map[NFT_PIPAPO_MAX_FIELDS],
1025 struct nft_pipapo_elem *e)
1026{
1027 struct nft_pipapo_field *f;
1028 int i, j;
1029
1030 for (i = 0, f = m->f; i < m->field_count - 1; i++, f++) {
1031 for (j = 0; j < map[i].n; j++) {
1032 f->mt[map[i].to + j].to = map[i + 1].to;
1033 f->mt[map[i].to + j].n = map[i + 1].n;
1034 }
1035 }
1036
1037
1038 for (j = 0; j < map[i].n; j++)
1039 f->mt[map[i].to + j].e = e;
1040}
1041
1042
1043
1044
1045
1046
1047
1048
1049static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
1050 unsigned long bsize_max)
1051{
1052 int i;
1053
1054 for_each_possible_cpu(i) {
1055 unsigned long *scratch;
1056
1057 scratch = kzalloc_node(bsize_max * sizeof(*scratch) * 2,
1058 GFP_KERNEL, cpu_to_node(i));
1059 if (!scratch) {
1060
1061
1062
1063
1064
1065
1066
1067 return -ENOMEM;
1068 }
1069
1070 kfree(*per_cpu_ptr(clone->scratch, i));
1071
1072 *per_cpu_ptr(clone->scratch, i) = scratch;
1073 }
1074
1075 return 0;
1076}
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
1088 const struct nft_set_elem *elem,
1089 struct nft_set_ext **ext2)
1090{
1091 const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
1092 union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
1093 const u8 *start = (const u8 *)elem->key.val.data, *end;
1094 struct nft_pipapo_elem *e = elem->priv, *dup;
1095 struct nft_pipapo *priv = nft_set_priv(set);
1096 struct nft_pipapo_match *m = priv->clone;
1097 u8 genmask = nft_genmask_next(net);
1098 struct nft_pipapo_field *f;
1099 int i, bsize_max, err = 0;
1100
1101 dup = pipapo_get(net, set, start, genmask);
1102 if (PTR_ERR(dup) == -ENOENT) {
1103 if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END)) {
1104 end = (const u8 *)nft_set_ext_key_end(ext)->data;
1105 dup = pipapo_get(net, set, end, nft_genmask_next(net));
1106 } else {
1107 end = start;
1108 }
1109 }
1110
1111 if (PTR_ERR(dup) != -ENOENT) {
1112 if (IS_ERR(dup))
1113 return PTR_ERR(dup);
1114 *ext2 = &dup->ext;
1115 return -EEXIST;
1116 }
1117
1118
1119 nft_pipapo_for_each_field(f, i, m) {
1120 const u8 *start_p = start, *end_p = end;
1121
1122 if (f->rules >= (unsigned long)NFT_PIPAPO_RULE0_MAX)
1123 return -ENOSPC;
1124
1125 if (memcmp(start_p, end_p,
1126 f->groups / NFT_PIPAPO_GROUPS_PER_BYTE) > 0)
1127 return -EINVAL;
1128
1129 start_p += NFT_PIPAPO_GROUPS_PADDED_SIZE(f->groups);
1130 end_p += NFT_PIPAPO_GROUPS_PADDED_SIZE(f->groups);
1131 }
1132
1133
1134 priv->dirty = true;
1135
1136 bsize_max = m->bsize_max;
1137
1138 nft_pipapo_for_each_field(f, i, m) {
1139 int ret;
1140
1141 rulemap[i].to = f->rules;
1142
1143 ret = memcmp(start, end,
1144 f->groups / NFT_PIPAPO_GROUPS_PER_BYTE);
1145 if (!ret) {
1146 ret = pipapo_insert(f, start,
1147 f->groups * NFT_PIPAPO_GROUP_BITS);
1148 } else {
1149 ret = pipapo_expand(f, start, end,
1150 f->groups * NFT_PIPAPO_GROUP_BITS);
1151 }
1152
1153 if (f->bsize > bsize_max)
1154 bsize_max = f->bsize;
1155
1156 rulemap[i].n = ret;
1157
1158 start += NFT_PIPAPO_GROUPS_PADDED_SIZE(f->groups);
1159 end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f->groups);
1160 }
1161
1162 if (!*get_cpu_ptr(m->scratch) || bsize_max > m->bsize_max) {
1163 put_cpu_ptr(m->scratch);
1164
1165 err = pipapo_realloc_scratch(m, bsize_max);
1166 if (err)
1167 return err;
1168
1169 m->bsize_max = bsize_max;
1170 } else {
1171 put_cpu_ptr(m->scratch);
1172 }
1173
1174 *ext2 = &e->ext;
1175
1176 pipapo_map(m, rulemap, e);
1177
1178 return 0;
1179}
1180
1181
1182
1183
1184
1185
1186
1187static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
1188{
1189 struct nft_pipapo_field *dst, *src;
1190 struct nft_pipapo_match *new;
1191 int i;
1192
1193 new = kmalloc(sizeof(*new) + sizeof(*dst) * old->field_count,
1194 GFP_KERNEL);
1195 if (!new)
1196 return ERR_PTR(-ENOMEM);
1197
1198 new->field_count = old->field_count;
1199 new->bsize_max = old->bsize_max;
1200
1201 new->scratch = alloc_percpu(*new->scratch);
1202 if (!new->scratch)
1203 goto out_scratch;
1204
1205 for_each_possible_cpu(i)
1206 *per_cpu_ptr(new->scratch, i) = NULL;
1207
1208 if (pipapo_realloc_scratch(new, old->bsize_max))
1209 goto out_scratch_realloc;
1210
1211 rcu_head_init(&new->rcu);
1212
1213 src = old->f;
1214 dst = new->f;
1215
1216 for (i = 0; i < old->field_count; i++) {
1217 memcpy(dst, src, offsetof(struct nft_pipapo_field, lt));
1218
1219 dst->lt = kvzalloc(src->groups * NFT_PIPAPO_BUCKETS *
1220 src->bsize * sizeof(*dst->lt),
1221 GFP_KERNEL);
1222 if (!dst->lt)
1223 goto out_lt;
1224
1225 memcpy(dst->lt, src->lt,
1226 src->bsize * sizeof(*dst->lt) *
1227 src->groups * NFT_PIPAPO_BUCKETS);
1228
1229 dst->mt = kvmalloc(src->rules * sizeof(*src->mt), GFP_KERNEL);
1230 if (!dst->mt)
1231 goto out_mt;
1232
1233 memcpy(dst->mt, src->mt, src->rules * sizeof(*src->mt));
1234 src++;
1235 dst++;
1236 }
1237
1238 return new;
1239
1240out_mt:
1241 kvfree(dst->lt);
1242out_lt:
1243 for (dst--; i > 0; i--) {
1244 kvfree(dst->mt);
1245 kvfree(dst->lt);
1246 dst--;
1247 }
1248out_scratch_realloc:
1249 for_each_possible_cpu(i)
1250 kfree(*per_cpu_ptr(new->scratch, i));
1251out_scratch:
1252 free_percpu(new->scratch);
1253 kfree(new);
1254
1255 return ERR_PTR(-ENOMEM);
1256}
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287static int pipapo_rules_same_key(struct nft_pipapo_field *f, int first)
1288{
1289 struct nft_pipapo_elem *e = NULL;
1290 int r;
1291
1292 for (r = first; r < f->rules; r++) {
1293 if (r != first && e != f->mt[r].e)
1294 return r - first;
1295
1296 e = f->mt[r].e;
1297 }
1298
1299 if (r != first)
1300 return r - first;
1301
1302 return 0;
1303}
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343static void pipapo_unmap(union nft_pipapo_map_bucket *mt, int rules,
1344 int start, int n, int to_offset, bool is_last)
1345{
1346 int i;
1347
1348 memmove(mt + start, mt + start + n, (rules - start - n) * sizeof(*mt));
1349 memset(mt + rules - n, 0, n * sizeof(*mt));
1350
1351 if (is_last)
1352 return;
1353
1354 for (i = start; i < rules - n; i++)
1355 mt[i].to -= to_offset;
1356}
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395static void pipapo_drop(struct nft_pipapo_match *m,
1396 union nft_pipapo_map_bucket rulemap[])
1397{
1398 struct nft_pipapo_field *f;
1399 int i;
1400
1401 nft_pipapo_for_each_field(f, i, m) {
1402 int g;
1403
1404 for (g = 0; g < f->groups; g++) {
1405 unsigned long *pos;
1406 int b;
1407
1408 pos = f->lt + g * NFT_PIPAPO_BUCKETS * f->bsize;
1409
1410 for (b = 0; b < NFT_PIPAPO_BUCKETS; b++) {
1411 bitmap_cut(pos, pos, rulemap[i].to,
1412 rulemap[i].n,
1413 f->bsize * BITS_PER_LONG);
1414
1415 pos += f->bsize;
1416 }
1417 }
1418
1419 pipapo_unmap(f->mt, f->rules, rulemap[i].to, rulemap[i].n,
1420 rulemap[i + 1].n, i == m->field_count - 1);
1421 if (pipapo_resize(f, f->rules, f->rules - rulemap[i].n)) {
1422
1423
1424
1425 ;
1426 }
1427 f->rules -= rulemap[i].n;
1428 }
1429}
1430
1431
1432
1433
1434
1435
1436static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
1437{
1438 struct nft_pipapo *priv = nft_set_priv(set);
1439 int rules_f0, first_rule = 0;
1440
1441 while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
1442 union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
1443 struct nft_pipapo_field *f;
1444 struct nft_pipapo_elem *e;
1445 int i, start, rules_fx;
1446
1447 start = first_rule;
1448 rules_fx = rules_f0;
1449
1450 nft_pipapo_for_each_field(f, i, m) {
1451 rulemap[i].to = start;
1452 rulemap[i].n = rules_fx;
1453
1454 if (i < m->field_count - 1) {
1455 rules_fx = f->mt[start].n;
1456 start = f->mt[start].to;
1457 }
1458 }
1459
1460
1461 f--;
1462 i--;
1463 e = f->mt[rulemap[i].to].e;
1464 if (nft_set_elem_expired(&e->ext) &&
1465 !nft_set_elem_mark_busy(&e->ext)) {
1466 priv->dirty = true;
1467 pipapo_drop(m, rulemap);
1468
1469 rcu_barrier();
1470 nft_set_elem_destroy(set, e, true);
1471
1472
1473
1474
1475 } else {
1476 first_rule += rules_f0;
1477 }
1478 }
1479
1480 priv->last_gc = jiffies;
1481}
1482
1483
1484
1485
1486
1487static void pipapo_free_fields(struct nft_pipapo_match *m)
1488{
1489 struct nft_pipapo_field *f;
1490 int i;
1491
1492 nft_pipapo_for_each_field(f, i, m) {
1493 kvfree(f->lt);
1494 kvfree(f->mt);
1495 }
1496}
1497
1498
1499
1500
1501
1502static void pipapo_reclaim_match(struct rcu_head *rcu)
1503{
1504 struct nft_pipapo_match *m;
1505 int i;
1506
1507 m = container_of(rcu, struct nft_pipapo_match, rcu);
1508
1509 for_each_possible_cpu(i)
1510 kfree(*per_cpu_ptr(m->scratch, i));
1511
1512 free_percpu(m->scratch);
1513
1514 pipapo_free_fields(m);
1515
1516 kfree(m);
1517}
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530static void pipapo_commit(const struct nft_set *set)
1531{
1532 struct nft_pipapo *priv = nft_set_priv(set);
1533 struct nft_pipapo_match *new_clone, *old;
1534
1535 if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set)))
1536 pipapo_gc(set, priv->clone);
1537
1538 if (!priv->dirty)
1539 return;
1540
1541 new_clone = pipapo_clone(priv->clone);
1542 if (IS_ERR(new_clone))
1543 return;
1544
1545 priv->dirty = false;
1546
1547 old = rcu_access_pointer(priv->match);
1548 rcu_assign_pointer(priv->match, priv->clone);
1549 if (old)
1550 call_rcu(&old->rcu, pipapo_reclaim_match);
1551
1552 priv->clone = new_clone;
1553}
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567static void nft_pipapo_activate(const struct net *net,
1568 const struct nft_set *set,
1569 const struct nft_set_elem *elem)
1570{
1571 struct nft_pipapo_elem *e;
1572
1573 e = pipapo_get(net, set, (const u8 *)elem->key.val.data, 0);
1574 if (IS_ERR(e))
1575 return;
1576
1577 nft_set_elem_change_active(net, set, &e->ext);
1578 nft_set_elem_clear_busy(&e->ext);
1579
1580 pipapo_commit(set);
1581}
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596static void *pipapo_deactivate(const struct net *net, const struct nft_set *set,
1597 const u8 *data, const struct nft_set_ext *ext)
1598{
1599 struct nft_pipapo_elem *e;
1600
1601 e = pipapo_get(net, set, data, nft_genmask_next(net));
1602 if (IS_ERR(e))
1603 return NULL;
1604
1605 nft_set_elem_change_active(net, set, &e->ext);
1606
1607 return e;
1608}
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618static void *nft_pipapo_deactivate(const struct net *net,
1619 const struct nft_set *set,
1620 const struct nft_set_elem *elem)
1621{
1622 const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
1623
1624 return pipapo_deactivate(net, set, (const u8 *)elem->key.val.data, ext);
1625}
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645static bool nft_pipapo_flush(const struct net *net, const struct nft_set *set,
1646 void *elem)
1647{
1648 struct nft_pipapo_elem *e = elem;
1649
1650 return pipapo_deactivate(net, set, (const u8 *)nft_set_ext_key(&e->ext),
1651 &e->ext);
1652}
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701static int pipapo_get_boundaries(struct nft_pipapo_field *f, int first_rule,
1702 int rule_count, u8 *left, u8 *right)
1703{
1704 u8 *l = left, *r = right;
1705 int g, mask_len = 0;
1706
1707 for (g = 0; g < f->groups; g++) {
1708 int b, x0, x1;
1709
1710 x0 = -1;
1711 x1 = -1;
1712 for (b = 0; b < NFT_PIPAPO_BUCKETS; b++) {
1713 unsigned long *pos;
1714
1715 pos = f->lt + (g * NFT_PIPAPO_BUCKETS + b) * f->bsize;
1716 if (test_bit(first_rule, pos) && x0 == -1)
1717 x0 = b;
1718 if (test_bit(first_rule + rule_count - 1, pos))
1719 x1 = b;
1720 }
1721
1722 if (g % 2) {
1723 *(l++) |= x0 & 0x0f;
1724 *(r++) |= x1 & 0x0f;
1725 } else {
1726 *l |= x0 << 4;
1727 *r |= x1 << 4;
1728 }
1729
1730 if (x1 - x0 == 0)
1731 mask_len += 4;
1732 else if (x1 - x0 == 1)
1733 mask_len += 3;
1734 else if (x1 - x0 == 3)
1735 mask_len += 2;
1736 else if (x1 - x0 == 7)
1737 mask_len += 1;
1738 }
1739
1740 return mask_len;
1741}
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753static bool pipapo_match_field(struct nft_pipapo_field *f,
1754 int first_rule, int rule_count,
1755 const u8 *start, const u8 *end)
1756{
1757 u8 right[NFT_PIPAPO_MAX_BYTES] = { 0 };
1758 u8 left[NFT_PIPAPO_MAX_BYTES] = { 0 };
1759
1760 pipapo_get_boundaries(f, first_rule, rule_count, left, right);
1761
1762 return !memcmp(start, left, f->groups / NFT_PIPAPO_GROUPS_PER_BYTE) &&
1763 !memcmp(end, right, f->groups / NFT_PIPAPO_GROUPS_PER_BYTE);
1764}
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
1778 const struct nft_set_elem *elem)
1779{
1780 struct nft_pipapo *priv = nft_set_priv(set);
1781 struct nft_pipapo_match *m = priv->clone;
1782 struct nft_pipapo_elem *e = elem->priv;
1783 int rules_f0, first_rule = 0;
1784 const u8 *data;
1785
1786 data = (const u8 *)nft_set_ext_key(&e->ext);
1787
1788 e = pipapo_get(net, set, data, 0);
1789 if (IS_ERR(e))
1790 return;
1791
1792 while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
1793 union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
1794 const u8 *match_start, *match_end;
1795 struct nft_pipapo_field *f;
1796 int i, start, rules_fx;
1797
1798 match_start = data;
1799 match_end = (const u8 *)nft_set_ext_key_end(&e->ext)->data;
1800
1801 start = first_rule;
1802 rules_fx = rules_f0;
1803
1804 nft_pipapo_for_each_field(f, i, m) {
1805 if (!pipapo_match_field(f, start, rules_fx,
1806 match_start, match_end))
1807 break;
1808
1809 rulemap[i].to = start;
1810 rulemap[i].n = rules_fx;
1811
1812 rules_fx = f->mt[start].n;
1813 start = f->mt[start].to;
1814
1815 match_start += NFT_PIPAPO_GROUPS_PADDED_SIZE(f->groups);
1816 match_end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f->groups);
1817 }
1818
1819 if (i == m->field_count) {
1820 priv->dirty = true;
1821 pipapo_drop(m, rulemap);
1822 pipapo_commit(set);
1823 return;
1824 }
1825
1826 first_rule += rules_f0;
1827 }
1828}
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
1841 struct nft_set_iter *iter)
1842{
1843 struct nft_pipapo *priv = nft_set_priv(set);
1844 struct nft_pipapo_match *m;
1845 struct nft_pipapo_field *f;
1846 int i, r;
1847
1848 rcu_read_lock();
1849 m = rcu_dereference(priv->match);
1850
1851 if (unlikely(!m))
1852 goto out;
1853
1854 for (i = 0, f = m->f; i < m->field_count - 1; i++, f++)
1855 ;
1856
1857 for (r = 0; r < f->rules; r++) {
1858 struct nft_pipapo_elem *e;
1859 struct nft_set_elem elem;
1860
1861 if (r < f->rules - 1 && f->mt[r + 1].e == f->mt[r].e)
1862 continue;
1863
1864 if (iter->count < iter->skip)
1865 goto cont;
1866
1867 e = f->mt[r].e;
1868 if (nft_set_elem_expired(&e->ext))
1869 goto cont;
1870
1871 elem.priv = e;
1872
1873 iter->err = iter->fn(ctx, set, iter, &elem);
1874 if (iter->err < 0)
1875 goto out;
1876
1877cont:
1878 iter->count++;
1879 }
1880
1881out:
1882 rcu_read_unlock();
1883}
1884
1885
1886
1887
1888
1889
1890
1891
1892static u64 nft_pipapo_privsize(const struct nlattr * const nla[],
1893 const struct nft_set_desc *desc)
1894{
1895 return sizeof(struct nft_pipapo);
1896}
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915static bool nft_pipapo_estimate(const struct nft_set_desc *desc, u32 features,
1916 struct nft_set_estimate *est)
1917{
1918 unsigned long entry_size;
1919 int i;
1920
1921 if (!(features & NFT_SET_INTERVAL) || desc->field_count <= 1)
1922 return false;
1923
1924 for (i = 0, entry_size = 0; i < desc->field_count; i++) {
1925 unsigned long rules;
1926
1927 if (desc->field_len[i] > NFT_PIPAPO_MAX_BYTES)
1928 return false;
1929
1930
1931
1932
1933
1934 rules = ilog2(desc->field_len[i] * BITS_PER_BYTE) * 2;
1935 entry_size += rules * NFT_PIPAPO_BUCKETS / BITS_PER_BYTE;
1936 entry_size += rules * sizeof(union nft_pipapo_map_bucket);
1937 }
1938
1939
1940 est->size = desc->size * entry_size;
1941 if (est->size && div_u64(est->size, desc->size) != entry_size)
1942 return false;
1943
1944 est->size += sizeof(struct nft_pipapo) +
1945 sizeof(struct nft_pipapo_match) * 2;
1946
1947 est->size += sizeof(struct nft_pipapo_field) * desc->field_count;
1948
1949 est->lookup = NFT_SET_CLASS_O_LOG_N;
1950
1951 est->space = NFT_SET_CLASS_O_N;
1952
1953 return true;
1954}
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968static int nft_pipapo_init(const struct nft_set *set,
1969 const struct nft_set_desc *desc,
1970 const struct nlattr * const nla[])
1971{
1972 struct nft_pipapo *priv = nft_set_priv(set);
1973 struct nft_pipapo_match *m;
1974 struct nft_pipapo_field *f;
1975 int err, i;
1976
1977 if (desc->field_count > NFT_PIPAPO_MAX_FIELDS)
1978 return -EINVAL;
1979
1980 m = kmalloc(sizeof(*priv->match) + sizeof(*f) * desc->field_count,
1981 GFP_KERNEL);
1982 if (!m)
1983 return -ENOMEM;
1984
1985 m->field_count = desc->field_count;
1986 m->bsize_max = 0;
1987
1988 m->scratch = alloc_percpu(unsigned long *);
1989 if (!m->scratch) {
1990 err = -ENOMEM;
1991 goto out_free;
1992 }
1993 for_each_possible_cpu(i)
1994 *per_cpu_ptr(m->scratch, i) = NULL;
1995
1996 rcu_head_init(&m->rcu);
1997
1998 nft_pipapo_for_each_field(f, i, m) {
1999 f->groups = desc->field_len[i] * NFT_PIPAPO_GROUPS_PER_BYTE;
2000 priv->groups += f->groups;
2001
2002 priv->width += round_up(desc->field_len[i], sizeof(u32));
2003
2004 f->bsize = 0;
2005 f->rules = 0;
2006 f->lt = NULL;
2007 f->mt = NULL;
2008 }
2009
2010
2011 priv->clone = pipapo_clone(m);
2012 if (IS_ERR(priv->clone)) {
2013 err = PTR_ERR(priv->clone);
2014 goto out_free;
2015 }
2016
2017 priv->dirty = false;
2018
2019 rcu_assign_pointer(priv->match, m);
2020
2021 return 0;
2022
2023out_free:
2024 free_percpu(m->scratch);
2025 kfree(m);
2026
2027 return err;
2028}
2029
2030
2031
2032
2033
2034static void nft_pipapo_destroy(const struct nft_set *set)
2035{
2036 struct nft_pipapo *priv = nft_set_priv(set);
2037 struct nft_pipapo_match *m;
2038 struct nft_pipapo_field *f;
2039 int i, r, cpu;
2040
2041 m = rcu_dereference_protected(priv->match, true);
2042 if (m) {
2043 rcu_barrier();
2044
2045 for (i = 0, f = m->f; i < m->field_count - 1; i++, f++)
2046 ;
2047
2048 for (r = 0; r < f->rules; r++) {
2049 struct nft_pipapo_elem *e;
2050
2051 if (r < f->rules - 1 && f->mt[r + 1].e == f->mt[r].e)
2052 continue;
2053
2054 e = f->mt[r].e;
2055
2056 nft_set_elem_destroy(set, e, true);
2057 }
2058
2059 for_each_possible_cpu(cpu)
2060 kfree(*per_cpu_ptr(m->scratch, cpu));
2061 free_percpu(m->scratch);
2062
2063 pipapo_free_fields(m);
2064 kfree(m);
2065 priv->match = NULL;
2066 }
2067
2068 if (priv->clone) {
2069 for_each_possible_cpu(cpu)
2070 kfree(*per_cpu_ptr(priv->clone->scratch, cpu));
2071 free_percpu(priv->clone->scratch);
2072
2073 pipapo_free_fields(priv->clone);
2074 kfree(priv->clone);
2075 priv->clone = NULL;
2076 }
2077}
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088static void nft_pipapo_gc_init(const struct nft_set *set)
2089{
2090 struct nft_pipapo *priv = nft_set_priv(set);
2091
2092 priv->last_gc = jiffies;
2093}
2094
2095struct nft_set_type nft_set_pipapo_type __read_mostly = {
2096 .owner = THIS_MODULE,
2097 .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT |
2098 NFT_SET_TIMEOUT,
2099 .ops = {
2100 .lookup = nft_pipapo_lookup,
2101 .insert = nft_pipapo_insert,
2102 .activate = nft_pipapo_activate,
2103 .deactivate = nft_pipapo_deactivate,
2104 .flush = nft_pipapo_flush,
2105 .remove = nft_pipapo_remove,
2106 .walk = nft_pipapo_walk,
2107 .get = nft_pipapo_get,
2108 .privsize = nft_pipapo_privsize,
2109 .estimate = nft_pipapo_estimate,
2110 .init = nft_pipapo_init,
2111 .destroy = nft_pipapo_destroy,
2112 .gc_init = nft_pipapo_gc_init,
2113 .elemsize = offsetof(struct nft_pipapo_elem, ext),
2114 },
2115};
2116