1
2
3
4
5
6
7
8
9#include <linux/kernel.h>
10#include <linux/netfilter.h>
11#include <net/protocol.h>
12#include <linux/init.h>
13#include <linux/skbuff.h>
14#include <linux/wait.h>
15#include <linux/module.h>
16#include <linux/interrupt.h>
17#include <linux/if.h>
18#include <linux/netdevice.h>
19#include <linux/netfilter_ipv6.h>
20#include <linux/inetdevice.h>
21#include <linux/proc_fs.h>
22#include <linux/mutex.h>
23#include <linux/mm.h>
24#include <linux/rcupdate.h>
25#include <net/net_namespace.h>
26#include <net/sock.h>
27
28#include "nf_internals.h"
29
30const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly;
31EXPORT_SYMBOL_GPL(nf_ipv6_ops);
32
33DEFINE_PER_CPU(bool, nf_skb_duplicated);
34EXPORT_SYMBOL_GPL(nf_skb_duplicated);
35
36#ifdef HAVE_JUMP_LABEL
37struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
38EXPORT_SYMBOL(nf_hooks_needed);
39#endif
40
41static DEFINE_MUTEX(nf_hook_mutex);
42
43
44#define MAX_HOOK_COUNT 1024
45
46#define nf_entry_dereference(e) \
47 rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex))
48
49static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
50{
51 struct nf_hook_entries *e;
52 size_t alloc = sizeof(*e) +
53 sizeof(struct nf_hook_entry) * num +
54 sizeof(struct nf_hook_ops *) * num +
55 sizeof(struct nf_hook_entries_rcu_head);
56
57 if (num == 0)
58 return NULL;
59
60 e = kvzalloc(alloc, GFP_KERNEL);
61 if (e)
62 e->num_hook_entries = num;
63 return e;
64}
65
66static void __nf_hook_entries_free(struct rcu_head *h)
67{
68 struct nf_hook_entries_rcu_head *head;
69
70 head = container_of(h, struct nf_hook_entries_rcu_head, head);
71 kvfree(head->allocation);
72}
73
74static void nf_hook_entries_free(struct nf_hook_entries *e)
75{
76 struct nf_hook_entries_rcu_head *head;
77 struct nf_hook_ops **ops;
78 unsigned int num;
79
80 if (!e)
81 return;
82
83 num = e->num_hook_entries;
84 ops = nf_hook_entries_get_hook_ops(e);
85 head = (void *)&ops[num];
86 head->allocation = e;
87 call_rcu(&head->head, __nf_hook_entries_free);
88}
89
90static unsigned int accept_all(void *priv,
91 struct sk_buff *skb,
92 const struct nf_hook_state *state)
93{
94 return NF_ACCEPT;
95}
96
97static const struct nf_hook_ops dummy_ops = {
98 .hook = accept_all,
99 .priority = INT_MIN,
100};
101
102static struct nf_hook_entries *
103nf_hook_entries_grow(const struct nf_hook_entries *old,
104 const struct nf_hook_ops *reg)
105{
106 unsigned int i, alloc_entries, nhooks, old_entries;
107 struct nf_hook_ops **orig_ops = NULL;
108 struct nf_hook_ops **new_ops;
109 struct nf_hook_entries *new;
110 bool inserted = false;
111
112 alloc_entries = 1;
113 old_entries = old ? old->num_hook_entries : 0;
114
115 if (old) {
116 orig_ops = nf_hook_entries_get_hook_ops(old);
117
118 for (i = 0; i < old_entries; i++) {
119 if (orig_ops[i] != &dummy_ops)
120 alloc_entries++;
121 }
122 }
123
124 if (alloc_entries > MAX_HOOK_COUNT)
125 return ERR_PTR(-E2BIG);
126
127 new = allocate_hook_entries_size(alloc_entries);
128 if (!new)
129 return ERR_PTR(-ENOMEM);
130
131 new_ops = nf_hook_entries_get_hook_ops(new);
132
133 i = 0;
134 nhooks = 0;
135 while (i < old_entries) {
136 if (orig_ops[i] == &dummy_ops) {
137 ++i;
138 continue;
139 }
140
141 if (inserted || reg->priority > orig_ops[i]->priority) {
142 new_ops[nhooks] = (void *)orig_ops[i];
143 new->hooks[nhooks] = old->hooks[i];
144 i++;
145 } else {
146 new_ops[nhooks] = (void *)reg;
147 new->hooks[nhooks].hook = reg->hook;
148 new->hooks[nhooks].priv = reg->priv;
149 inserted = true;
150 }
151 nhooks++;
152 }
153
154 if (!inserted) {
155 new_ops[nhooks] = (void *)reg;
156 new->hooks[nhooks].hook = reg->hook;
157 new->hooks[nhooks].priv = reg->priv;
158 }
159
160 return new;
161}
162
163static void hooks_validate(const struct nf_hook_entries *hooks)
164{
165#ifdef CONFIG_DEBUG_KERNEL
166 struct nf_hook_ops **orig_ops;
167 int prio = INT_MIN;
168 size_t i = 0;
169
170 orig_ops = nf_hook_entries_get_hook_ops(hooks);
171
172 for (i = 0; i < hooks->num_hook_entries; i++) {
173 if (orig_ops[i] == &dummy_ops)
174 continue;
175
176 WARN_ON(orig_ops[i]->priority < prio);
177
178 if (orig_ops[i]->priority > prio)
179 prio = orig_ops[i]->priority;
180 }
181#endif
182}
183
184int nf_hook_entries_insert_raw(struct nf_hook_entries __rcu **pp,
185 const struct nf_hook_ops *reg)
186{
187 struct nf_hook_entries *new_hooks;
188 struct nf_hook_entries *p;
189
190 p = rcu_dereference_raw(*pp);
191 new_hooks = nf_hook_entries_grow(p, reg);
192 if (IS_ERR(new_hooks))
193 return PTR_ERR(new_hooks);
194
195 hooks_validate(new_hooks);
196
197 rcu_assign_pointer(*pp, new_hooks);
198
199 BUG_ON(p == new_hooks);
200 nf_hook_entries_free(p);
201 return 0;
202}
203EXPORT_SYMBOL_GPL(nf_hook_entries_insert_raw);
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221static void *__nf_hook_entries_try_shrink(struct nf_hook_entries *old,
222 struct nf_hook_entries __rcu **pp)
223{
224 unsigned int i, j, skip = 0, hook_entries;
225 struct nf_hook_entries *new = NULL;
226 struct nf_hook_ops **orig_ops;
227 struct nf_hook_ops **new_ops;
228
229 if (WARN_ON_ONCE(!old))
230 return NULL;
231
232 orig_ops = nf_hook_entries_get_hook_ops(old);
233 for (i = 0; i < old->num_hook_entries; i++) {
234 if (orig_ops[i] == &dummy_ops)
235 skip++;
236 }
237
238
239 hook_entries = old->num_hook_entries;
240 if (skip == hook_entries)
241 goto out_assign;
242
243 if (skip == 0)
244 return NULL;
245
246 hook_entries -= skip;
247 new = allocate_hook_entries_size(hook_entries);
248 if (!new)
249 return NULL;
250
251 new_ops = nf_hook_entries_get_hook_ops(new);
252 for (i = 0, j = 0; i < old->num_hook_entries; i++) {
253 if (orig_ops[i] == &dummy_ops)
254 continue;
255 new->hooks[j] = old->hooks[i];
256 new_ops[j] = (void *)orig_ops[i];
257 j++;
258 }
259 hooks_validate(new);
260out_assign:
261 rcu_assign_pointer(*pp, new);
262 return old;
263}
264
265static struct nf_hook_entries __rcu **
266nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum,
267 struct net_device *dev)
268{
269 switch (pf) {
270 case NFPROTO_NETDEV:
271 break;
272#ifdef CONFIG_NETFILTER_FAMILY_ARP
273 case NFPROTO_ARP:
274 if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_arp) <= hooknum))
275 return NULL;
276 return net->nf.hooks_arp + hooknum;
277#endif
278#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
279 case NFPROTO_BRIDGE:
280 if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_bridge) <= hooknum))
281 return NULL;
282 return net->nf.hooks_bridge + hooknum;
283#endif
284 case NFPROTO_IPV4:
285 if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv4) <= hooknum))
286 return NULL;
287 return net->nf.hooks_ipv4 + hooknum;
288 case NFPROTO_IPV6:
289 if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv6) <= hooknum))
290 return NULL;
291 return net->nf.hooks_ipv6 + hooknum;
292#if IS_ENABLED(CONFIG_DECNET)
293 case NFPROTO_DECNET:
294 if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_decnet) <= hooknum))
295 return NULL;
296 return net->nf.hooks_decnet + hooknum;
297#endif
298 default:
299 WARN_ON_ONCE(1);
300 return NULL;
301 }
302
303#ifdef CONFIG_NETFILTER_INGRESS
304 if (hooknum == NF_NETDEV_INGRESS) {
305 if (dev && dev_net(dev) == net)
306 return &dev->nf_hooks_ingress;
307 }
308#endif
309 WARN_ON_ONCE(1);
310 return NULL;
311}
312
313static int __nf_register_net_hook(struct net *net, int pf,
314 const struct nf_hook_ops *reg)
315{
316 struct nf_hook_entries *p, *new_hooks;
317 struct nf_hook_entries __rcu **pp;
318
319 if (pf == NFPROTO_NETDEV) {
320#ifndef CONFIG_NETFILTER_INGRESS
321 if (reg->hooknum == NF_NETDEV_INGRESS)
322 return -EOPNOTSUPP;
323#endif
324 if (reg->hooknum != NF_NETDEV_INGRESS ||
325 !reg->dev || dev_net(reg->dev) != net)
326 return -EINVAL;
327 }
328
329 pp = nf_hook_entry_head(net, pf, reg->hooknum, reg->dev);
330 if (!pp)
331 return -EINVAL;
332
333 mutex_lock(&nf_hook_mutex);
334
335 p = nf_entry_dereference(*pp);
336 new_hooks = nf_hook_entries_grow(p, reg);
337
338 if (!IS_ERR(new_hooks))
339 rcu_assign_pointer(*pp, new_hooks);
340
341 mutex_unlock(&nf_hook_mutex);
342 if (IS_ERR(new_hooks))
343 return PTR_ERR(new_hooks);
344
345 hooks_validate(new_hooks);
346#ifdef CONFIG_NETFILTER_INGRESS
347 if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
348 net_inc_ingress_queue();
349#endif
350#ifdef HAVE_JUMP_LABEL
351 static_key_slow_inc(&nf_hooks_needed[pf][reg->hooknum]);
352#endif
353 BUG_ON(p == new_hooks);
354 nf_hook_entries_free(p);
355 return 0;
356}
357
358
359
360
361
362
363
364
365
366
367static bool nf_remove_net_hook(struct nf_hook_entries *old,
368 const struct nf_hook_ops *unreg)
369{
370 struct nf_hook_ops **orig_ops;
371 unsigned int i;
372
373 orig_ops = nf_hook_entries_get_hook_ops(old);
374 for (i = 0; i < old->num_hook_entries; i++) {
375 if (orig_ops[i] != unreg)
376 continue;
377 WRITE_ONCE(old->hooks[i].hook, accept_all);
378 WRITE_ONCE(orig_ops[i], &dummy_ops);
379 return true;
380 }
381
382 return false;
383}
384
385static void __nf_unregister_net_hook(struct net *net, int pf,
386 const struct nf_hook_ops *reg)
387{
388 struct nf_hook_entries __rcu **pp;
389 struct nf_hook_entries *p;
390
391 pp = nf_hook_entry_head(net, pf, reg->hooknum, reg->dev);
392 if (!pp)
393 return;
394
395 mutex_lock(&nf_hook_mutex);
396
397 p = nf_entry_dereference(*pp);
398 if (WARN_ON_ONCE(!p)) {
399 mutex_unlock(&nf_hook_mutex);
400 return;
401 }
402
403 if (nf_remove_net_hook(p, reg)) {
404#ifdef CONFIG_NETFILTER_INGRESS
405 if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
406 net_dec_ingress_queue();
407#endif
408#ifdef HAVE_JUMP_LABEL
409 static_key_slow_dec(&nf_hooks_needed[pf][reg->hooknum]);
410#endif
411 } else {
412 WARN_ONCE(1, "hook not found, pf %d num %d", pf, reg->hooknum);
413 }
414
415 p = __nf_hook_entries_try_shrink(p, pp);
416 mutex_unlock(&nf_hook_mutex);
417 if (!p)
418 return;
419
420 nf_queue_nf_hook_drop(net);
421 nf_hook_entries_free(p);
422}
423
424void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
425{
426 if (reg->pf == NFPROTO_INET) {
427 __nf_unregister_net_hook(net, NFPROTO_IPV4, reg);
428 __nf_unregister_net_hook(net, NFPROTO_IPV6, reg);
429 } else {
430 __nf_unregister_net_hook(net, reg->pf, reg);
431 }
432}
433EXPORT_SYMBOL(nf_unregister_net_hook);
434
435void nf_hook_entries_delete_raw(struct nf_hook_entries __rcu **pp,
436 const struct nf_hook_ops *reg)
437{
438 struct nf_hook_entries *p;
439
440 p = rcu_dereference_raw(*pp);
441 if (nf_remove_net_hook(p, reg)) {
442 p = __nf_hook_entries_try_shrink(p, pp);
443 nf_hook_entries_free(p);
444 }
445}
446EXPORT_SYMBOL_GPL(nf_hook_entries_delete_raw);
447
448int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
449{
450 int err;
451
452 if (reg->pf == NFPROTO_INET) {
453 err = __nf_register_net_hook(net, NFPROTO_IPV4, reg);
454 if (err < 0)
455 return err;
456
457 err = __nf_register_net_hook(net, NFPROTO_IPV6, reg);
458 if (err < 0) {
459 __nf_unregister_net_hook(net, NFPROTO_IPV4, reg);
460 return err;
461 }
462 } else {
463 err = __nf_register_net_hook(net, reg->pf, reg);
464 if (err < 0)
465 return err;
466 }
467
468 return 0;
469}
470EXPORT_SYMBOL(nf_register_net_hook);
471
472int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg,
473 unsigned int n)
474{
475 unsigned int i;
476 int err = 0;
477
478 for (i = 0; i < n; i++) {
479 err = nf_register_net_hook(net, ®[i]);
480 if (err)
481 goto err;
482 }
483 return err;
484
485err:
486 if (i > 0)
487 nf_unregister_net_hooks(net, reg, i);
488 return err;
489}
490EXPORT_SYMBOL(nf_register_net_hooks);
491
492void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
493 unsigned int hookcount)
494{
495 unsigned int i;
496
497 for (i = 0; i < hookcount; i++)
498 nf_unregister_net_hook(net, ®[i]);
499}
500EXPORT_SYMBOL(nf_unregister_net_hooks);
501
502
503
504int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
505 const struct nf_hook_entries *e, unsigned int s)
506{
507 unsigned int verdict;
508 int ret;
509
510 for (; s < e->num_hook_entries; s++) {
511 verdict = nf_hook_entry_hookfn(&e->hooks[s], skb, state);
512 switch (verdict & NF_VERDICT_MASK) {
513 case NF_ACCEPT:
514 break;
515 case NF_DROP:
516 kfree_skb(skb);
517 ret = NF_DROP_GETERR(verdict);
518 if (ret == 0)
519 ret = -EPERM;
520 return ret;
521 case NF_QUEUE:
522 ret = nf_queue(skb, state, e, s, verdict);
523 if (ret == 1)
524 continue;
525 return ret;
526 default:
527
528
529
530 return 0;
531 }
532 }
533
534 return 1;
535}
536EXPORT_SYMBOL(nf_hook_slow);
537
538
539int skb_make_writable(struct sk_buff *skb, unsigned int writable_len)
540{
541 if (writable_len > skb->len)
542 return 0;
543
544
545 if (!skb_cloned(skb)) {
546 if (writable_len <= skb_headlen(skb))
547 return 1;
548 } else if (skb_clone_writable(skb, writable_len))
549 return 1;
550
551 if (writable_len <= skb_headlen(skb))
552 writable_len = 0;
553 else
554 writable_len -= skb_headlen(skb);
555
556 return !!__pskb_pull_tail(skb, writable_len);
557}
558EXPORT_SYMBOL(skb_make_writable);
559
560
561
562
563struct nfnl_ct_hook __rcu *nfnl_ct_hook __read_mostly;
564EXPORT_SYMBOL_GPL(nfnl_ct_hook);
565
566struct nf_ct_hook __rcu *nf_ct_hook __read_mostly;
567EXPORT_SYMBOL_GPL(nf_ct_hook);
568
569#if IS_ENABLED(CONFIG_NF_CONNTRACK)
570
571
572
573void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *)
574 __rcu __read_mostly;
575EXPORT_SYMBOL(ip_ct_attach);
576
577struct nf_nat_hook __rcu *nf_nat_hook __read_mostly;
578EXPORT_SYMBOL_GPL(nf_nat_hook);
579
580void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb)
581{
582 void (*attach)(struct sk_buff *, const struct sk_buff *);
583
584 if (skb->_nfct) {
585 rcu_read_lock();
586 attach = rcu_dereference(ip_ct_attach);
587 if (attach)
588 attach(new, skb);
589 rcu_read_unlock();
590 }
591}
592EXPORT_SYMBOL(nf_ct_attach);
593
594void nf_conntrack_destroy(struct nf_conntrack *nfct)
595{
596 struct nf_ct_hook *ct_hook;
597
598 rcu_read_lock();
599 ct_hook = rcu_dereference(nf_ct_hook);
600 BUG_ON(ct_hook == NULL);
601 ct_hook->destroy(nfct);
602 rcu_read_unlock();
603}
604EXPORT_SYMBOL(nf_conntrack_destroy);
605
606bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
607 const struct sk_buff *skb)
608{
609 struct nf_ct_hook *ct_hook;
610 bool ret = false;
611
612 rcu_read_lock();
613 ct_hook = rcu_dereference(nf_ct_hook);
614 if (ct_hook)
615 ret = ct_hook->get_tuple_skb(dst_tuple, skb);
616 rcu_read_unlock();
617 return ret;
618}
619EXPORT_SYMBOL(nf_ct_get_tuple_skb);
620
621
622const struct nf_conntrack_zone nf_ct_zone_dflt = {
623 .id = NF_CT_DEFAULT_ZONE_ID,
624 .dir = NF_CT_DEFAULT_ZONE_DIR,
625};
626EXPORT_SYMBOL_GPL(nf_ct_zone_dflt);
627#endif
628
629static void __net_init
630__netfilter_net_init(struct nf_hook_entries __rcu **e, int max)
631{
632 int h;
633
634 for (h = 0; h < max; h++)
635 RCU_INIT_POINTER(e[h], NULL);
636}
637
638static int __net_init netfilter_net_init(struct net *net)
639{
640 __netfilter_net_init(net->nf.hooks_ipv4, ARRAY_SIZE(net->nf.hooks_ipv4));
641 __netfilter_net_init(net->nf.hooks_ipv6, ARRAY_SIZE(net->nf.hooks_ipv6));
642#ifdef CONFIG_NETFILTER_FAMILY_ARP
643 __netfilter_net_init(net->nf.hooks_arp, ARRAY_SIZE(net->nf.hooks_arp));
644#endif
645#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
646 __netfilter_net_init(net->nf.hooks_bridge, ARRAY_SIZE(net->nf.hooks_bridge));
647#endif
648#if IS_ENABLED(CONFIG_DECNET)
649 __netfilter_net_init(net->nf.hooks_decnet, ARRAY_SIZE(net->nf.hooks_decnet));
650#endif
651
652#ifdef CONFIG_PROC_FS
653 net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
654 net->proc_net);
655 if (!net->nf.proc_netfilter) {
656 if (!net_eq(net, &init_net))
657 pr_err("cannot create netfilter proc entry");
658
659 return -ENOMEM;
660 }
661#endif
662
663 return 0;
664}
665
666static void __net_exit netfilter_net_exit(struct net *net)
667{
668 remove_proc_entry("netfilter", net->proc_net);
669}
670
671static struct pernet_operations netfilter_net_ops = {
672 .init = netfilter_net_init,
673 .exit = netfilter_net_exit,
674};
675
676int __init netfilter_init(void)
677{
678 int ret;
679
680 ret = register_pernet_subsys(&netfilter_net_ops);
681 if (ret < 0)
682 goto err;
683
684 ret = netfilter_log_init();
685 if (ret < 0)
686 goto err_pernet;
687
688 return 0;
689err_pernet:
690 unregister_pernet_subsys(&netfilter_net_ops);
691err:
692 return ret;
693}
694