1
2
3
4
5
6
7
8#include <linux/kernel.h>
9#include <linux/init.h>
10#include <linux/module.h>
11#include <linux/list.h>
12#include <linux/log2.h>
13#include <linux/jhash.h>
14#include <linux/netlink.h>
15#include <linux/workqueue.h>
16#include <linux/rhashtable.h>
17#include <linux/netfilter.h>
18#include <linux/netfilter/nf_tables.h>
19#include <net/netfilter/nf_tables_core.h>
20
21
22#define NFT_RHASH_ELEMENT_HINT 3
23
24struct nft_rhash {
25 struct rhashtable ht;
26 struct delayed_work gc_work;
27};
28
29struct nft_rhash_elem {
30 struct rhash_head node;
31 struct nft_set_ext ext;
32};
33
34struct nft_rhash_cmp_arg {
35 const struct nft_set *set;
36 const u32 *key;
37 u8 genmask;
38};
39
40static inline u32 nft_rhash_key(const void *data, u32 len, u32 seed)
41{
42 const struct nft_rhash_cmp_arg *arg = data;
43
44 return jhash(arg->key, len, seed);
45}
46
47static inline u32 nft_rhash_obj(const void *data, u32 len, u32 seed)
48{
49 const struct nft_rhash_elem *he = data;
50
51 return jhash(nft_set_ext_key(&he->ext), len, seed);
52}
53
54static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg,
55 const void *ptr)
56{
57 const struct nft_rhash_cmp_arg *x = arg->key;
58 const struct nft_rhash_elem *he = ptr;
59
60 if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
61 return 1;
62 if (nft_set_elem_expired(&he->ext))
63 return 1;
64 if (!nft_set_elem_active(&he->ext, x->genmask))
65 return 1;
66 return 0;
67}
68
69static const struct rhashtable_params nft_rhash_params = {
70 .head_offset = offsetof(struct nft_rhash_elem, node),
71 .hashfn = nft_rhash_key,
72 .obj_hashfn = nft_rhash_obj,
73 .obj_cmpfn = nft_rhash_cmp,
74 .automatic_shrinking = true,
75};
76
77INDIRECT_CALLABLE_SCOPE
78bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
79 const u32 *key, const struct nft_set_ext **ext)
80{
81 struct nft_rhash *priv = nft_set_priv(set);
82 const struct nft_rhash_elem *he;
83 struct nft_rhash_cmp_arg arg = {
84 .genmask = nft_genmask_cur(net),
85 .set = set,
86 .key = key,
87 };
88
89 he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
90 if (he != NULL)
91 *ext = &he->ext;
92
93 return !!he;
94}
95
96static void *nft_rhash_get(const struct net *net, const struct nft_set *set,
97 const struct nft_set_elem *elem, unsigned int flags)
98{
99 struct nft_rhash *priv = nft_set_priv(set);
100 struct nft_rhash_elem *he;
101 struct nft_rhash_cmp_arg arg = {
102 .genmask = nft_genmask_cur(net),
103 .set = set,
104 .key = elem->key.val.data,
105 };
106
107 he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
108 if (he != NULL)
109 return he;
110
111 return ERR_PTR(-ENOENT);
112}
113
114static bool nft_rhash_update(struct nft_set *set, const u32 *key,
115 void *(*new)(struct nft_set *,
116 const struct nft_expr *,
117 struct nft_regs *regs),
118 const struct nft_expr *expr,
119 struct nft_regs *regs,
120 const struct nft_set_ext **ext)
121{
122 struct nft_rhash *priv = nft_set_priv(set);
123 struct nft_rhash_elem *he, *prev;
124 struct nft_rhash_cmp_arg arg = {
125 .genmask = NFT_GENMASK_ANY,
126 .set = set,
127 .key = key,
128 };
129
130 he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
131 if (he != NULL)
132 goto out;
133
134 he = new(set, expr, regs);
135 if (he == NULL)
136 goto err1;
137
138 prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node,
139 nft_rhash_params);
140 if (IS_ERR(prev))
141 goto err2;
142
143
144 if (prev) {
145 nft_set_elem_destroy(set, he, true);
146 he = prev;
147 }
148
149out:
150 *ext = &he->ext;
151 return true;
152
153err2:
154 nft_set_elem_destroy(set, he, true);
155err1:
156 return false;
157}
158
159static int nft_rhash_insert(const struct net *net, const struct nft_set *set,
160 const struct nft_set_elem *elem,
161 struct nft_set_ext **ext)
162{
163 struct nft_rhash *priv = nft_set_priv(set);
164 struct nft_rhash_elem *he = elem->priv;
165 struct nft_rhash_cmp_arg arg = {
166 .genmask = nft_genmask_next(net),
167 .set = set,
168 .key = elem->key.val.data,
169 };
170 struct nft_rhash_elem *prev;
171
172 prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node,
173 nft_rhash_params);
174 if (IS_ERR(prev))
175 return PTR_ERR(prev);
176 if (prev) {
177 *ext = &prev->ext;
178 return -EEXIST;
179 }
180 return 0;
181}
182
183static void nft_rhash_activate(const struct net *net, const struct nft_set *set,
184 const struct nft_set_elem *elem)
185{
186 struct nft_rhash_elem *he = elem->priv;
187
188 nft_set_elem_change_active(net, set, &he->ext);
189 nft_set_elem_clear_busy(&he->ext);
190}
191
192static bool nft_rhash_flush(const struct net *net,
193 const struct nft_set *set, void *priv)
194{
195 struct nft_rhash_elem *he = priv;
196
197 if (!nft_set_elem_mark_busy(&he->ext) ||
198 !nft_is_active(net, &he->ext)) {
199 nft_set_elem_change_active(net, set, &he->ext);
200 return true;
201 }
202 return false;
203}
204
205static void *nft_rhash_deactivate(const struct net *net,
206 const struct nft_set *set,
207 const struct nft_set_elem *elem)
208{
209 struct nft_rhash *priv = nft_set_priv(set);
210 struct nft_rhash_elem *he;
211 struct nft_rhash_cmp_arg arg = {
212 .genmask = nft_genmask_next(net),
213 .set = set,
214 .key = elem->key.val.data,
215 };
216
217 rcu_read_lock();
218 he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
219 if (he != NULL &&
220 !nft_rhash_flush(net, set, he))
221 he = NULL;
222
223 rcu_read_unlock();
224
225 return he;
226}
227
228static void nft_rhash_remove(const struct net *net,
229 const struct nft_set *set,
230 const struct nft_set_elem *elem)
231{
232 struct nft_rhash *priv = nft_set_priv(set);
233 struct nft_rhash_elem *he = elem->priv;
234
235 rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params);
236}
237
238static bool nft_rhash_delete(const struct nft_set *set,
239 const u32 *key)
240{
241 struct nft_rhash *priv = nft_set_priv(set);
242 struct nft_rhash_cmp_arg arg = {
243 .genmask = NFT_GENMASK_ANY,
244 .set = set,
245 .key = key,
246 };
247 struct nft_rhash_elem *he;
248
249 he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
250 if (he == NULL)
251 return false;
252
253 return rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params) == 0;
254}
255
256static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
257 struct nft_set_iter *iter)
258{
259 struct nft_rhash *priv = nft_set_priv(set);
260 struct nft_rhash_elem *he;
261 struct rhashtable_iter hti;
262 struct nft_set_elem elem;
263
264 rhashtable_walk_enter(&priv->ht, &hti);
265 rhashtable_walk_start(&hti);
266
267 while ((he = rhashtable_walk_next(&hti))) {
268 if (IS_ERR(he)) {
269 if (PTR_ERR(he) != -EAGAIN) {
270 iter->err = PTR_ERR(he);
271 break;
272 }
273
274 continue;
275 }
276
277 if (iter->count < iter->skip)
278 goto cont;
279 if (nft_set_elem_expired(&he->ext))
280 goto cont;
281 if (!nft_set_elem_active(&he->ext, iter->genmask))
282 goto cont;
283
284 elem.priv = he;
285
286 iter->err = iter->fn(ctx, set, iter, &elem);
287 if (iter->err < 0)
288 break;
289
290cont:
291 iter->count++;
292 }
293 rhashtable_walk_stop(&hti);
294 rhashtable_walk_exit(&hti);
295}
296
297static bool nft_rhash_expr_needs_gc_run(const struct nft_set *set,
298 struct nft_set_ext *ext)
299{
300 struct nft_set_elem_expr *elem_expr = nft_set_ext_expr(ext);
301 struct nft_expr *expr;
302 u32 size;
303
304 nft_setelem_expr_foreach(expr, elem_expr, size) {
305 if (expr->ops->gc &&
306 expr->ops->gc(read_pnet(&set->net), expr))
307 return true;
308 }
309
310 return false;
311}
312
313static void nft_rhash_gc(struct work_struct *work)
314{
315 struct nft_set *set;
316 struct nft_rhash_elem *he;
317 struct nft_rhash *priv;
318 struct nft_set_gc_batch *gcb = NULL;
319 struct rhashtable_iter hti;
320
321 priv = container_of(work, struct nft_rhash, gc_work.work);
322 set = nft_set_container_of(priv);
323
324 rhashtable_walk_enter(&priv->ht, &hti);
325 rhashtable_walk_start(&hti);
326
327 while ((he = rhashtable_walk_next(&hti))) {
328 if (IS_ERR(he)) {
329 if (PTR_ERR(he) != -EAGAIN)
330 break;
331 continue;
332 }
333
334 if (nft_set_ext_exists(&he->ext, NFT_SET_EXT_EXPRESSIONS) &&
335 nft_rhash_expr_needs_gc_run(set, &he->ext))
336 goto needs_gc_run;
337
338 if (!nft_set_elem_expired(&he->ext))
339 continue;
340needs_gc_run:
341 if (nft_set_elem_mark_busy(&he->ext))
342 continue;
343
344 gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
345 if (gcb == NULL)
346 break;
347 rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params);
348 atomic_dec(&set->nelems);
349 nft_set_gc_batch_add(gcb, he);
350 }
351 rhashtable_walk_stop(&hti);
352 rhashtable_walk_exit(&hti);
353
354 he = nft_set_catchall_gc(set);
355 if (he) {
356 gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
357 if (gcb)
358 nft_set_gc_batch_add(gcb, he);
359 }
360 nft_set_gc_batch_complete(gcb);
361 queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
362 nft_set_gc_interval(set));
363}
364
365static u64 nft_rhash_privsize(const struct nlattr * const nla[],
366 const struct nft_set_desc *desc)
367{
368 return sizeof(struct nft_rhash);
369}
370
371static void nft_rhash_gc_init(const struct nft_set *set)
372{
373 struct nft_rhash *priv = nft_set_priv(set);
374
375 queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
376 nft_set_gc_interval(set));
377}
378
379static int nft_rhash_init(const struct nft_set *set,
380 const struct nft_set_desc *desc,
381 const struct nlattr * const tb[])
382{
383 struct nft_rhash *priv = nft_set_priv(set);
384 struct rhashtable_params params = nft_rhash_params;
385 int err;
386
387 params.nelem_hint = desc->size ?: NFT_RHASH_ELEMENT_HINT;
388 params.key_len = set->klen;
389
390 err = rhashtable_init(&priv->ht, ¶ms);
391 if (err < 0)
392 return err;
393
394 INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rhash_gc);
395 if (set->flags & NFT_SET_TIMEOUT)
396 nft_rhash_gc_init(set);
397
398 return 0;
399}
400
401static void nft_rhash_elem_destroy(void *ptr, void *arg)
402{
403 nft_set_elem_destroy(arg, ptr, true);
404}
405
406static void nft_rhash_destroy(const struct nft_set *set)
407{
408 struct nft_rhash *priv = nft_set_priv(set);
409
410 cancel_delayed_work_sync(&priv->gc_work);
411 rcu_barrier();
412 rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy,
413 (void *)set);
414}
415
416
417#define NFT_MAX_BUCKETS (1U << 31)
418
419static u32 nft_hash_buckets(u32 size)
420{
421 u64 val = div_u64((u64)size * 4, 3);
422
423 if (val >= NFT_MAX_BUCKETS)
424 return NFT_MAX_BUCKETS;
425
426 return roundup_pow_of_two(val);
427}
428
429static bool nft_rhash_estimate(const struct nft_set_desc *desc, u32 features,
430 struct nft_set_estimate *est)
431{
432 est->size = ~0;
433 est->lookup = NFT_SET_CLASS_O_1;
434 est->space = NFT_SET_CLASS_O_N;
435
436 return true;
437}
438
439struct nft_hash {
440 u32 seed;
441 u32 buckets;
442 struct hlist_head table[];
443};
444
445struct nft_hash_elem {
446 struct hlist_node node;
447 struct nft_set_ext ext;
448};
449
450INDIRECT_CALLABLE_SCOPE
451bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
452 const u32 *key, const struct nft_set_ext **ext)
453{
454 struct nft_hash *priv = nft_set_priv(set);
455 u8 genmask = nft_genmask_cur(net);
456 const struct nft_hash_elem *he;
457 u32 hash;
458
459 hash = jhash(key, set->klen, priv->seed);
460 hash = reciprocal_scale(hash, priv->buckets);
461 hlist_for_each_entry_rcu(he, &priv->table[hash], node) {
462 if (!memcmp(nft_set_ext_key(&he->ext), key, set->klen) &&
463 nft_set_elem_active(&he->ext, genmask)) {
464 *ext = &he->ext;
465 return true;
466 }
467 }
468 return false;
469}
470
471static void *nft_hash_get(const struct net *net, const struct nft_set *set,
472 const struct nft_set_elem *elem, unsigned int flags)
473{
474 struct nft_hash *priv = nft_set_priv(set);
475 u8 genmask = nft_genmask_cur(net);
476 struct nft_hash_elem *he;
477 u32 hash;
478
479 hash = jhash(elem->key.val.data, set->klen, priv->seed);
480 hash = reciprocal_scale(hash, priv->buckets);
481 hlist_for_each_entry_rcu(he, &priv->table[hash], node) {
482 if (!memcmp(nft_set_ext_key(&he->ext), elem->key.val.data, set->klen) &&
483 nft_set_elem_active(&he->ext, genmask))
484 return he;
485 }
486 return ERR_PTR(-ENOENT);
487}
488
489INDIRECT_CALLABLE_SCOPE
490bool nft_hash_lookup_fast(const struct net *net,
491 const struct nft_set *set,
492 const u32 *key, const struct nft_set_ext **ext)
493{
494 struct nft_hash *priv = nft_set_priv(set);
495 u8 genmask = nft_genmask_cur(net);
496 const struct nft_hash_elem *he;
497 u32 hash, k1, k2;
498
499 k1 = *key;
500 hash = jhash_1word(k1, priv->seed);
501 hash = reciprocal_scale(hash, priv->buckets);
502 hlist_for_each_entry_rcu(he, &priv->table[hash], node) {
503 k2 = *(u32 *)nft_set_ext_key(&he->ext)->data;
504 if (k1 == k2 &&
505 nft_set_elem_active(&he->ext, genmask)) {
506 *ext = &he->ext;
507 return true;
508 }
509 }
510 return false;
511}
512
513static u32 nft_jhash(const struct nft_set *set, const struct nft_hash *priv,
514 const struct nft_set_ext *ext)
515{
516 const struct nft_data *key = nft_set_ext_key(ext);
517 u32 hash, k1;
518
519 if (set->klen == 4) {
520 k1 = *(u32 *)key;
521 hash = jhash_1word(k1, priv->seed);
522 } else {
523 hash = jhash(key, set->klen, priv->seed);
524 }
525 hash = reciprocal_scale(hash, priv->buckets);
526
527 return hash;
528}
529
530static int nft_hash_insert(const struct net *net, const struct nft_set *set,
531 const struct nft_set_elem *elem,
532 struct nft_set_ext **ext)
533{
534 struct nft_hash_elem *this = elem->priv, *he;
535 struct nft_hash *priv = nft_set_priv(set);
536 u8 genmask = nft_genmask_next(net);
537 u32 hash;
538
539 hash = nft_jhash(set, priv, &this->ext);
540 hlist_for_each_entry(he, &priv->table[hash], node) {
541 if (!memcmp(nft_set_ext_key(&this->ext),
542 nft_set_ext_key(&he->ext), set->klen) &&
543 nft_set_elem_active(&he->ext, genmask)) {
544 *ext = &he->ext;
545 return -EEXIST;
546 }
547 }
548 hlist_add_head_rcu(&this->node, &priv->table[hash]);
549 return 0;
550}
551
552static void nft_hash_activate(const struct net *net, const struct nft_set *set,
553 const struct nft_set_elem *elem)
554{
555 struct nft_hash_elem *he = elem->priv;
556
557 nft_set_elem_change_active(net, set, &he->ext);
558}
559
560static bool nft_hash_flush(const struct net *net,
561 const struct nft_set *set, void *priv)
562{
563 struct nft_hash_elem *he = priv;
564
565 nft_set_elem_change_active(net, set, &he->ext);
566 return true;
567}
568
569static void *nft_hash_deactivate(const struct net *net,
570 const struct nft_set *set,
571 const struct nft_set_elem *elem)
572{
573 struct nft_hash *priv = nft_set_priv(set);
574 struct nft_hash_elem *this = elem->priv, *he;
575 u8 genmask = nft_genmask_next(net);
576 u32 hash;
577
578 hash = nft_jhash(set, priv, &this->ext);
579 hlist_for_each_entry(he, &priv->table[hash], node) {
580 if (!memcmp(nft_set_ext_key(&he->ext), &elem->key.val,
581 set->klen) &&
582 nft_set_elem_active(&he->ext, genmask)) {
583 nft_set_elem_change_active(net, set, &he->ext);
584 return he;
585 }
586 }
587 return NULL;
588}
589
590static void nft_hash_remove(const struct net *net,
591 const struct nft_set *set,
592 const struct nft_set_elem *elem)
593{
594 struct nft_hash_elem *he = elem->priv;
595
596 hlist_del_rcu(&he->node);
597}
598
599static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set,
600 struct nft_set_iter *iter)
601{
602 struct nft_hash *priv = nft_set_priv(set);
603 struct nft_hash_elem *he;
604 struct nft_set_elem elem;
605 int i;
606
607 for (i = 0; i < priv->buckets; i++) {
608 hlist_for_each_entry_rcu(he, &priv->table[i], node) {
609 if (iter->count < iter->skip)
610 goto cont;
611 if (!nft_set_elem_active(&he->ext, iter->genmask))
612 goto cont;
613
614 elem.priv = he;
615
616 iter->err = iter->fn(ctx, set, iter, &elem);
617 if (iter->err < 0)
618 return;
619cont:
620 iter->count++;
621 }
622 }
623}
624
625static u64 nft_hash_privsize(const struct nlattr * const nla[],
626 const struct nft_set_desc *desc)
627{
628 return sizeof(struct nft_hash) +
629 (u64)nft_hash_buckets(desc->size) * sizeof(struct hlist_head);
630}
631
632static int nft_hash_init(const struct nft_set *set,
633 const struct nft_set_desc *desc,
634 const struct nlattr * const tb[])
635{
636 struct nft_hash *priv = nft_set_priv(set);
637
638 priv->buckets = nft_hash_buckets(desc->size);
639 get_random_bytes(&priv->seed, sizeof(priv->seed));
640
641 return 0;
642}
643
644static void nft_hash_destroy(const struct nft_set *set)
645{
646 struct nft_hash *priv = nft_set_priv(set);
647 struct nft_hash_elem *he;
648 struct hlist_node *next;
649 int i;
650
651 for (i = 0; i < priv->buckets; i++) {
652 hlist_for_each_entry_safe(he, next, &priv->table[i], node) {
653 hlist_del_rcu(&he->node);
654 nft_set_elem_destroy(set, he, true);
655 }
656 }
657}
658
659static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
660 struct nft_set_estimate *est)
661{
662 if (!desc->size)
663 return false;
664
665 if (desc->klen == 4)
666 return false;
667
668 est->size = sizeof(struct nft_hash) +
669 (u64)nft_hash_buckets(desc->size) * sizeof(struct hlist_head) +
670 (u64)desc->size * sizeof(struct nft_hash_elem);
671 est->lookup = NFT_SET_CLASS_O_1;
672 est->space = NFT_SET_CLASS_O_N;
673
674 return true;
675}
676
677static bool nft_hash_fast_estimate(const struct nft_set_desc *desc, u32 features,
678 struct nft_set_estimate *est)
679{
680 if (!desc->size)
681 return false;
682
683 if (desc->klen != 4)
684 return false;
685
686 est->size = sizeof(struct nft_hash) +
687 (u64)nft_hash_buckets(desc->size) * sizeof(struct hlist_head) +
688 (u64)desc->size * sizeof(struct nft_hash_elem);
689 est->lookup = NFT_SET_CLASS_O_1;
690 est->space = NFT_SET_CLASS_O_N;
691
692 return true;
693}
694
695const struct nft_set_type nft_set_rhash_type = {
696 .features = NFT_SET_MAP | NFT_SET_OBJECT |
697 NFT_SET_TIMEOUT | NFT_SET_EVAL,
698 .ops = {
699 .privsize = nft_rhash_privsize,
700 .elemsize = offsetof(struct nft_rhash_elem, ext),
701 .estimate = nft_rhash_estimate,
702 .init = nft_rhash_init,
703 .gc_init = nft_rhash_gc_init,
704 .destroy = nft_rhash_destroy,
705 .insert = nft_rhash_insert,
706 .activate = nft_rhash_activate,
707 .deactivate = nft_rhash_deactivate,
708 .flush = nft_rhash_flush,
709 .remove = nft_rhash_remove,
710 .lookup = nft_rhash_lookup,
711 .update = nft_rhash_update,
712 .delete = nft_rhash_delete,
713 .walk = nft_rhash_walk,
714 .get = nft_rhash_get,
715 },
716};
717
718const struct nft_set_type nft_set_hash_type = {
719 .features = NFT_SET_MAP | NFT_SET_OBJECT,
720 .ops = {
721 .privsize = nft_hash_privsize,
722 .elemsize = offsetof(struct nft_hash_elem, ext),
723 .estimate = nft_hash_estimate,
724 .init = nft_hash_init,
725 .destroy = nft_hash_destroy,
726 .insert = nft_hash_insert,
727 .activate = nft_hash_activate,
728 .deactivate = nft_hash_deactivate,
729 .flush = nft_hash_flush,
730 .remove = nft_hash_remove,
731 .lookup = nft_hash_lookup,
732 .walk = nft_hash_walk,
733 .get = nft_hash_get,
734 },
735};
736
737const struct nft_set_type nft_set_hash_fast_type = {
738 .features = NFT_SET_MAP | NFT_SET_OBJECT,
739 .ops = {
740 .privsize = nft_hash_privsize,
741 .elemsize = offsetof(struct nft_hash_elem, ext),
742 .estimate = nft_hash_fast_estimate,
743 .init = nft_hash_init,
744 .destroy = nft_hash_destroy,
745 .insert = nft_hash_insert,
746 .activate = nft_hash_activate,
747 .deactivate = nft_hash_deactivate,
748 .flush = nft_hash_flush,
749 .remove = nft_hash_remove,
750 .lookup = nft_hash_lookup_fast,
751 .walk = nft_hash_walk,
752 .get = nft_hash_get,
753 },
754};
755