linux/net/netfilter/core.c
<<
>>
Prefs
   1/* netfilter.c: look after the filters for various protocols.
   2 * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
   3 *
   4 * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
   5 * way.
   6 *
   7 * This code is GPL.
   8 */
   9#include <linux/kernel.h>
  10#include <linux/netfilter.h>
  11#include <net/protocol.h>
  12#include <linux/init.h>
  13#include <linux/skbuff.h>
  14#include <linux/wait.h>
  15#include <linux/module.h>
  16#include <linux/interrupt.h>
  17#include <linux/if.h>
  18#include <linux/netdevice.h>
  19#include <linux/netfilter_ipv6.h>
  20#include <linux/inetdevice.h>
  21#include <linux/proc_fs.h>
  22#include <linux/mutex.h>
  23#include <linux/mm.h>
  24#include <linux/rcupdate.h>
  25#include <net/net_namespace.h>
  26#include <net/sock.h>
  27
  28#include "nf_internals.h"
  29
  30const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly;
  31EXPORT_SYMBOL_GPL(nf_ipv6_ops);
  32
  33DEFINE_PER_CPU(bool, nf_skb_duplicated);
  34EXPORT_SYMBOL_GPL(nf_skb_duplicated);
  35
  36#ifdef HAVE_JUMP_LABEL
  37struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
  38EXPORT_SYMBOL(nf_hooks_needed);
  39#endif
  40
  41static DEFINE_MUTEX(nf_hook_mutex);
  42
  43/* max hooks per family/hooknum */
  44#define MAX_HOOK_COUNT          1024
  45
  46#define nf_entry_dereference(e) \
  47        rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex))
  48
  49static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
  50{
  51        struct nf_hook_entries *e;
  52        size_t alloc = sizeof(*e) +
  53                       sizeof(struct nf_hook_entry) * num +
  54                       sizeof(struct nf_hook_ops *) * num +
  55                       sizeof(struct nf_hook_entries_rcu_head);
  56
  57        if (num == 0)
  58                return NULL;
  59
  60        e = kvzalloc(alloc, GFP_KERNEL);
  61        if (e)
  62                e->num_hook_entries = num;
  63        return e;
  64}
  65
  66static void __nf_hook_entries_free(struct rcu_head *h)
  67{
  68        struct nf_hook_entries_rcu_head *head;
  69
  70        head = container_of(h, struct nf_hook_entries_rcu_head, head);
  71        kvfree(head->allocation);
  72}
  73
  74static void nf_hook_entries_free(struct nf_hook_entries *e)
  75{
  76        struct nf_hook_entries_rcu_head *head;
  77        struct nf_hook_ops **ops;
  78        unsigned int num;
  79
  80        if (!e)
  81                return;
  82
  83        num = e->num_hook_entries;
  84        ops = nf_hook_entries_get_hook_ops(e);
  85        head = (void *)&ops[num];
  86        head->allocation = e;
  87        call_rcu(&head->head, __nf_hook_entries_free);
  88}
  89
  90static unsigned int accept_all(void *priv,
  91                               struct sk_buff *skb,
  92                               const struct nf_hook_state *state)
  93{
  94        return NF_ACCEPT; /* ACCEPT makes nf_hook_slow call next hook */
  95}
  96
  97static const struct nf_hook_ops dummy_ops = {
  98        .hook = accept_all,
  99        .priority = INT_MIN,
 100};
 101
 102static struct nf_hook_entries *
 103nf_hook_entries_grow(const struct nf_hook_entries *old,
 104                     const struct nf_hook_ops *reg)
 105{
 106        unsigned int i, alloc_entries, nhooks, old_entries;
 107        struct nf_hook_ops **orig_ops = NULL;
 108        struct nf_hook_ops **new_ops;
 109        struct nf_hook_entries *new;
 110        bool inserted = false;
 111
 112        alloc_entries = 1;
 113        old_entries = old ? old->num_hook_entries : 0;
 114
 115        if (old) {
 116                orig_ops = nf_hook_entries_get_hook_ops(old);
 117
 118                for (i = 0; i < old_entries; i++) {
 119                        if (orig_ops[i] != &dummy_ops)
 120                                alloc_entries++;
 121                }
 122        }
 123
 124        if (alloc_entries > MAX_HOOK_COUNT)
 125                return ERR_PTR(-E2BIG);
 126
 127        new = allocate_hook_entries_size(alloc_entries);
 128        if (!new)
 129                return ERR_PTR(-ENOMEM);
 130
 131        new_ops = nf_hook_entries_get_hook_ops(new);
 132
 133        i = 0;
 134        nhooks = 0;
 135        while (i < old_entries) {
 136                if (orig_ops[i] == &dummy_ops) {
 137                        ++i;
 138                        continue;
 139                }
 140
 141                if (reg->nat_hook && orig_ops[i]->nat_hook) {
 142                        kvfree(new);
 143                        return ERR_PTR(-EBUSY);
 144                }
 145
 146                if (inserted || reg->priority > orig_ops[i]->priority) {
 147                        new_ops[nhooks] = (void *)orig_ops[i];
 148                        new->hooks[nhooks] = old->hooks[i];
 149                        i++;
 150                } else {
 151                        new_ops[nhooks] = (void *)reg;
 152                        new->hooks[nhooks].hook = reg->hook;
 153                        new->hooks[nhooks].priv = reg->priv;
 154                        inserted = true;
 155                }
 156                nhooks++;
 157        }
 158
 159        if (!inserted) {
 160                new_ops[nhooks] = (void *)reg;
 161                new->hooks[nhooks].hook = reg->hook;
 162                new->hooks[nhooks].priv = reg->priv;
 163        }
 164
 165        return new;
 166}
 167
 168static void hooks_validate(const struct nf_hook_entries *hooks)
 169{
 170#ifdef CONFIG_DEBUG_KERNEL
 171        struct nf_hook_ops **orig_ops;
 172        int prio = INT_MIN;
 173        size_t i = 0;
 174
 175        orig_ops = nf_hook_entries_get_hook_ops(hooks);
 176
 177        for (i = 0; i < hooks->num_hook_entries; i++) {
 178                if (orig_ops[i] == &dummy_ops)
 179                        continue;
 180
 181                WARN_ON(orig_ops[i]->priority < prio);
 182
 183                if (orig_ops[i]->priority > prio)
 184                        prio = orig_ops[i]->priority;
 185        }
 186#endif
 187}
 188
 189/*
 190 * __nf_hook_entries_try_shrink - try to shrink hook array
 191 *
 192 * @pp -- location of hook blob
 193 *
 194 * Hook unregistration must always succeed, so to-be-removed hooks
 195 * are replaced by a dummy one that will just move to next hook.
 196 *
 197 * This counts the current dummy hooks, attempts to allocate new blob,
 198 * copies the live hooks, then replaces and discards old one.
 199 *
 200 * return values:
 201 *
 202 * Returns address to free, or NULL.
 203 */
 204static void *__nf_hook_entries_try_shrink(struct nf_hook_entries __rcu **pp)
 205{
 206        struct nf_hook_entries *old, *new = NULL;
 207        unsigned int i, j, skip = 0, hook_entries;
 208        struct nf_hook_ops **orig_ops;
 209        struct nf_hook_ops **new_ops;
 210
 211        old = nf_entry_dereference(*pp);
 212        if (WARN_ON_ONCE(!old))
 213                return NULL;
 214
 215        orig_ops = nf_hook_entries_get_hook_ops(old);
 216        for (i = 0; i < old->num_hook_entries; i++) {
 217                if (orig_ops[i] == &dummy_ops)
 218                        skip++;
 219        }
 220
 221        /* if skip == hook_entries all hooks have been removed */
 222        hook_entries = old->num_hook_entries;
 223        if (skip == hook_entries)
 224                goto out_assign;
 225
 226        if (skip == 0)
 227                return NULL;
 228
 229        hook_entries -= skip;
 230        new = allocate_hook_entries_size(hook_entries);
 231        if (!new)
 232                return NULL;
 233
 234        new_ops = nf_hook_entries_get_hook_ops(new);
 235        for (i = 0, j = 0; i < old->num_hook_entries; i++) {
 236                if (orig_ops[i] == &dummy_ops)
 237                        continue;
 238                new->hooks[j] = old->hooks[i];
 239                new_ops[j] = (void *)orig_ops[i];
 240                j++;
 241        }
 242        hooks_validate(new);
 243out_assign:
 244        rcu_assign_pointer(*pp, new);
 245        return old;
 246}
 247
 248static struct nf_hook_entries __rcu **
 249nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum,
 250                   struct net_device *dev)
 251{
 252        switch (pf) {
 253        case NFPROTO_NETDEV:
 254                break;
 255#ifdef CONFIG_NETFILTER_FAMILY_ARP
 256        case NFPROTO_ARP:
 257                if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_arp) <= hooknum))
 258                        return NULL;
 259                return net->nf.hooks_arp + hooknum;
 260#endif
 261#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
 262        case NFPROTO_BRIDGE:
 263                if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_bridge) <= hooknum))
 264                        return NULL;
 265                return net->nf.hooks_bridge + hooknum;
 266#endif
 267        case NFPROTO_IPV4:
 268                if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv4) <= hooknum))
 269                        return NULL;
 270                return net->nf.hooks_ipv4 + hooknum;
 271        case NFPROTO_IPV6:
 272                if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv6) <= hooknum))
 273                        return NULL;
 274                return net->nf.hooks_ipv6 + hooknum;
 275#if IS_ENABLED(CONFIG_DECNET)
 276        case NFPROTO_DECNET:
 277                if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_decnet) <= hooknum))
 278                        return NULL;
 279                return net->nf.hooks_decnet + hooknum;
 280#endif
 281        default:
 282                WARN_ON_ONCE(1);
 283                return NULL;
 284        }
 285
 286#ifdef CONFIG_NETFILTER_INGRESS
 287        if (hooknum == NF_NETDEV_INGRESS) {
 288                if (dev && dev_net(dev) == net)
 289                        return &dev->nf_hooks_ingress;
 290        }
 291#endif
 292        WARN_ON_ONCE(1);
 293        return NULL;
 294}
 295
 296static int __nf_register_net_hook(struct net *net, int pf,
 297                                  const struct nf_hook_ops *reg)
 298{
 299        struct nf_hook_entries *p, *new_hooks;
 300        struct nf_hook_entries __rcu **pp;
 301
 302        if (pf == NFPROTO_NETDEV) {
 303#ifndef CONFIG_NETFILTER_INGRESS
 304                if (reg->hooknum == NF_NETDEV_INGRESS)
 305                        return -EOPNOTSUPP;
 306#endif
 307                if (reg->hooknum != NF_NETDEV_INGRESS ||
 308                    !reg->dev || dev_net(reg->dev) != net)
 309                        return -EINVAL;
 310        }
 311
 312        pp = nf_hook_entry_head(net, pf, reg->hooknum, reg->dev);
 313        if (!pp)
 314                return -EINVAL;
 315
 316        mutex_lock(&nf_hook_mutex);
 317
 318        p = nf_entry_dereference(*pp);
 319        new_hooks = nf_hook_entries_grow(p, reg);
 320
 321        if (!IS_ERR(new_hooks))
 322                rcu_assign_pointer(*pp, new_hooks);
 323
 324        mutex_unlock(&nf_hook_mutex);
 325        if (IS_ERR(new_hooks))
 326                return PTR_ERR(new_hooks);
 327
 328        hooks_validate(new_hooks);
 329#ifdef CONFIG_NETFILTER_INGRESS
 330        if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
 331                net_inc_ingress_queue();
 332#endif
 333#ifdef HAVE_JUMP_LABEL
 334        static_key_slow_inc(&nf_hooks_needed[pf][reg->hooknum]);
 335#endif
 336        BUG_ON(p == new_hooks);
 337        nf_hook_entries_free(p);
 338        return 0;
 339}
 340
 341/*
 342 * nf_remove_net_hook - remove a hook from blob
 343 *
 344 * @oldp: current address of hook blob
 345 * @unreg: hook to unregister
 346 *
 347 * This cannot fail, hook unregistration must always succeed.
 348 * Therefore replace the to-be-removed hook with a dummy hook.
 349 */
 350static void nf_remove_net_hook(struct nf_hook_entries *old,
 351                               const struct nf_hook_ops *unreg, int pf)
 352{
 353        struct nf_hook_ops **orig_ops;
 354        bool found = false;
 355        unsigned int i;
 356
 357        orig_ops = nf_hook_entries_get_hook_ops(old);
 358        for (i = 0; i < old->num_hook_entries; i++) {
 359                if (orig_ops[i] != unreg)
 360                        continue;
 361                WRITE_ONCE(old->hooks[i].hook, accept_all);
 362                WRITE_ONCE(orig_ops[i], &dummy_ops);
 363                found = true;
 364                break;
 365        }
 366
 367        if (found) {
 368#ifdef CONFIG_NETFILTER_INGRESS
 369                if (pf == NFPROTO_NETDEV && unreg->hooknum == NF_NETDEV_INGRESS)
 370                        net_dec_ingress_queue();
 371#endif
 372#ifdef HAVE_JUMP_LABEL
 373                static_key_slow_dec(&nf_hooks_needed[pf][unreg->hooknum]);
 374#endif
 375        } else {
 376                WARN_ONCE(1, "hook not found, pf %d num %d", pf, unreg->hooknum);
 377        }
 378}
 379
 380static void __nf_unregister_net_hook(struct net *net, int pf,
 381                                     const struct nf_hook_ops *reg)
 382{
 383        struct nf_hook_entries __rcu **pp;
 384        struct nf_hook_entries *p;
 385
 386        pp = nf_hook_entry_head(net, pf, reg->hooknum, reg->dev);
 387        if (!pp)
 388                return;
 389
 390        mutex_lock(&nf_hook_mutex);
 391
 392        p = nf_entry_dereference(*pp);
 393        if (WARN_ON_ONCE(!p)) {
 394                mutex_unlock(&nf_hook_mutex);
 395                return;
 396        }
 397
 398        nf_remove_net_hook(p, reg, pf);
 399
 400        p = __nf_hook_entries_try_shrink(pp);
 401        mutex_unlock(&nf_hook_mutex);
 402        if (!p)
 403                return;
 404
 405        nf_queue_nf_hook_drop(net);
 406        nf_hook_entries_free(p);
 407}
 408
 409void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
 410{
 411        if (reg->pf == NFPROTO_INET) {
 412                __nf_unregister_net_hook(net, NFPROTO_IPV4, reg);
 413                __nf_unregister_net_hook(net, NFPROTO_IPV6, reg);
 414        } else {
 415                __nf_unregister_net_hook(net, reg->pf, reg);
 416        }
 417}
 418EXPORT_SYMBOL(nf_unregister_net_hook);
 419
 420int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
 421{
 422        int err;
 423
 424        if (reg->pf == NFPROTO_INET) {
 425                err = __nf_register_net_hook(net, NFPROTO_IPV4, reg);
 426                if (err < 0)
 427                        return err;
 428
 429                err = __nf_register_net_hook(net, NFPROTO_IPV6, reg);
 430                if (err < 0) {
 431                        __nf_unregister_net_hook(net, NFPROTO_IPV4, reg);
 432                        return err;
 433                }
 434        } else {
 435                err = __nf_register_net_hook(net, reg->pf, reg);
 436                if (err < 0)
 437                        return err;
 438        }
 439
 440        return 0;
 441}
 442EXPORT_SYMBOL(nf_register_net_hook);
 443
 444int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg,
 445                          unsigned int n)
 446{
 447        unsigned int i;
 448        int err = 0;
 449
 450        for (i = 0; i < n; i++) {
 451                err = nf_register_net_hook(net, &reg[i]);
 452                if (err)
 453                        goto err;
 454        }
 455        return err;
 456
 457err:
 458        if (i > 0)
 459                nf_unregister_net_hooks(net, reg, i);
 460        return err;
 461}
 462EXPORT_SYMBOL(nf_register_net_hooks);
 463
 464void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
 465                             unsigned int hookcount)
 466{
 467        unsigned int i;
 468
 469        for (i = 0; i < hookcount; i++)
 470                nf_unregister_net_hook(net, &reg[i]);
 471}
 472EXPORT_SYMBOL(nf_unregister_net_hooks);
 473
 474/* Returns 1 if okfn() needs to be executed by the caller,
 475 * -EPERM for NF_DROP, 0 otherwise.  Caller must hold rcu_read_lock. */
 476int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
 477                 const struct nf_hook_entries *e, unsigned int s)
 478{
 479        unsigned int verdict;
 480        int ret;
 481
 482        for (; s < e->num_hook_entries; s++) {
 483                verdict = nf_hook_entry_hookfn(&e->hooks[s], skb, state);
 484                switch (verdict & NF_VERDICT_MASK) {
 485                case NF_ACCEPT:
 486                        break;
 487                case NF_DROP:
 488                        kfree_skb(skb);
 489                        ret = NF_DROP_GETERR(verdict);
 490                        if (ret == 0)
 491                                ret = -EPERM;
 492                        return ret;
 493                case NF_QUEUE:
 494                        ret = nf_queue(skb, state, e, s, verdict);
 495                        if (ret == 1)
 496                                continue;
 497                        return ret;
 498                default:
 499                        /* Implicit handling for NF_STOLEN, as well as any other
 500                         * non conventional verdicts.
 501                         */
 502                        return 0;
 503                }
 504        }
 505
 506        return 1;
 507}
 508EXPORT_SYMBOL(nf_hook_slow);
 509
 510
 511int skb_make_writable(struct sk_buff *skb, unsigned int writable_len)
 512{
 513        if (writable_len > skb->len)
 514                return 0;
 515
 516        /* Not exclusive use of packet?  Must copy. */
 517        if (!skb_cloned(skb)) {
 518                if (writable_len <= skb_headlen(skb))
 519                        return 1;
 520        } else if (skb_clone_writable(skb, writable_len))
 521                return 1;
 522
 523        if (writable_len <= skb_headlen(skb))
 524                writable_len = 0;
 525        else
 526                writable_len -= skb_headlen(skb);
 527
 528        return !!__pskb_pull_tail(skb, writable_len);
 529}
 530EXPORT_SYMBOL(skb_make_writable);
 531
 532/* This needs to be compiled in any case to avoid dependencies between the
 533 * nfnetlink_queue code and nf_conntrack.
 534 */
 535struct nfnl_ct_hook __rcu *nfnl_ct_hook __read_mostly;
 536EXPORT_SYMBOL_GPL(nfnl_ct_hook);
 537
 538#if IS_ENABLED(CONFIG_NF_CONNTRACK)
 539/* This does not belong here, but locally generated errors need it if connection
 540   tracking in use: without this, connection may not be in hash table, and hence
 541   manufactured ICMP or RST packets will not be associated with it. */
 542void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *)
 543                __rcu __read_mostly;
 544EXPORT_SYMBOL(ip_ct_attach);
 545
 546void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb)
 547{
 548        void (*attach)(struct sk_buff *, const struct sk_buff *);
 549
 550        if (skb->_nfct) {
 551                rcu_read_lock();
 552                attach = rcu_dereference(ip_ct_attach);
 553                if (attach)
 554                        attach(new, skb);
 555                rcu_read_unlock();
 556        }
 557}
 558EXPORT_SYMBOL(nf_ct_attach);
 559
 560void (*nf_ct_destroy)(struct nf_conntrack *) __rcu __read_mostly;
 561EXPORT_SYMBOL(nf_ct_destroy);
 562
 563void nf_conntrack_destroy(struct nf_conntrack *nfct)
 564{
 565        void (*destroy)(struct nf_conntrack *);
 566
 567        rcu_read_lock();
 568        destroy = rcu_dereference(nf_ct_destroy);
 569        BUG_ON(destroy == NULL);
 570        destroy(nfct);
 571        rcu_read_unlock();
 572}
 573EXPORT_SYMBOL(nf_conntrack_destroy);
 574
 575/* Built-in default zone used e.g. by modules. */
 576const struct nf_conntrack_zone nf_ct_zone_dflt = {
 577        .id     = NF_CT_DEFAULT_ZONE_ID,
 578        .dir    = NF_CT_DEFAULT_ZONE_DIR,
 579};
 580EXPORT_SYMBOL_GPL(nf_ct_zone_dflt);
 581#endif /* CONFIG_NF_CONNTRACK */
 582
 583#ifdef CONFIG_NF_NAT_NEEDED
 584void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
 585EXPORT_SYMBOL(nf_nat_decode_session_hook);
 586#endif
 587
 588static void __net_init
 589__netfilter_net_init(struct nf_hook_entries __rcu **e, int max)
 590{
 591        int h;
 592
 593        for (h = 0; h < max; h++)
 594                RCU_INIT_POINTER(e[h], NULL);
 595}
 596
 597static int __net_init netfilter_net_init(struct net *net)
 598{
 599        __netfilter_net_init(net->nf.hooks_ipv4, ARRAY_SIZE(net->nf.hooks_ipv4));
 600        __netfilter_net_init(net->nf.hooks_ipv6, ARRAY_SIZE(net->nf.hooks_ipv6));
 601#ifdef CONFIG_NETFILTER_FAMILY_ARP
 602        __netfilter_net_init(net->nf.hooks_arp, ARRAY_SIZE(net->nf.hooks_arp));
 603#endif
 604#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
 605        __netfilter_net_init(net->nf.hooks_bridge, ARRAY_SIZE(net->nf.hooks_bridge));
 606#endif
 607#if IS_ENABLED(CONFIG_DECNET)
 608        __netfilter_net_init(net->nf.hooks_decnet, ARRAY_SIZE(net->nf.hooks_decnet));
 609#endif
 610
 611#ifdef CONFIG_PROC_FS
 612        net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
 613                                                net->proc_net);
 614        if (!net->nf.proc_netfilter) {
 615                if (!net_eq(net, &init_net))
 616                        pr_err("cannot create netfilter proc entry");
 617
 618                return -ENOMEM;
 619        }
 620#endif
 621
 622        return 0;
 623}
 624
 625static void __net_exit netfilter_net_exit(struct net *net)
 626{
 627        remove_proc_entry("netfilter", net->proc_net);
 628}
 629
 630static struct pernet_operations netfilter_net_ops = {
 631        .init = netfilter_net_init,
 632        .exit = netfilter_net_exit,
 633};
 634
 635int __init netfilter_init(void)
 636{
 637        int ret;
 638
 639        ret = register_pernet_subsys(&netfilter_net_ops);
 640        if (ret < 0)
 641                goto err;
 642
 643        ret = netfilter_log_init();
 644        if (ret < 0)
 645                goto err_pernet;
 646
 647        return 0;
 648err_pernet:
 649        unregister_pernet_subsys(&netfilter_net_ops);
 650err:
 651        return ret;
 652}
 653