linux/net/netfilter/nfnetlink.c
<<
>>
Prefs
   1/* Netfilter messages via netlink socket. Allows for user space
   2 * protocol helpers and general trouble making from userspace.
   3 *
   4 * (C) 2001 by Jay Schulist <jschlst@samba.org>,
   5 * (C) 2002-2005 by Harald Welte <laforge@gnumonks.org>
   6 * (C) 2005,2007 by Pablo Neira Ayuso <pablo@netfilter.org>
   7 *
   8 * Initial netfilter messages via netlink development funded and
   9 * generally made possible by Network Robots, Inc. (www.networkrobots.com)
  10 *
  11 * Further development of this code funded by Astaro AG (http://www.astaro.com)
  12 *
  13 * This software may be used and distributed according to the terms
  14 * of the GNU General Public License, incorporated herein by reference.
  15 */
  16
  17#include <linux/module.h>
  18#include <linux/types.h>
  19#include <linux/socket.h>
  20#include <linux/kernel.h>
  21#include <linux/string.h>
  22#include <linux/sockios.h>
  23#include <linux/net.h>
  24#include <linux/skbuff.h>
  25#include <asm/uaccess.h>
  26#include <net/sock.h>
  27#include <linux/init.h>
  28
  29#include <net/netlink.h>
  30#include <linux/netfilter/nfnetlink.h>
  31
  32MODULE_LICENSE("GPL");
  33MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
  34MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER);
  35
  36#define nfnl_dereference_protected(id) \
  37        rcu_dereference_protected(table[(id)].subsys, \
  38                                  lockdep_nfnl_is_held((id)))
  39
  40static char __initdata nfversion[] = "0.30";
  41
  42static struct {
  43        struct mutex                            mutex;
  44        const struct nfnetlink_subsystem __rcu  *subsys;
  45} table[NFNL_SUBSYS_COUNT];
  46
  47static const int nfnl_group2type[NFNLGRP_MAX+1] = {
  48        [NFNLGRP_CONNTRACK_NEW]         = NFNL_SUBSYS_CTNETLINK,
  49        [NFNLGRP_CONNTRACK_UPDATE]      = NFNL_SUBSYS_CTNETLINK,
  50        [NFNLGRP_CONNTRACK_DESTROY]     = NFNL_SUBSYS_CTNETLINK,
  51        [NFNLGRP_CONNTRACK_EXP_NEW]     = NFNL_SUBSYS_CTNETLINK_EXP,
  52        [NFNLGRP_CONNTRACK_EXP_UPDATE]  = NFNL_SUBSYS_CTNETLINK_EXP,
  53        [NFNLGRP_CONNTRACK_EXP_DESTROY] = NFNL_SUBSYS_CTNETLINK_EXP,
  54        [NFNLGRP_NFTABLES]              = NFNL_SUBSYS_NFTABLES,
  55        [NFNLGRP_ACCT_QUOTA]            = NFNL_SUBSYS_ACCT,
  56        [NFNLGRP_NFTRACE]               = NFNL_SUBSYS_NFTABLES,
  57};
  58
  59void nfnl_lock(__u8 subsys_id)
  60{
  61        mutex_lock(&table[subsys_id].mutex);
  62}
  63EXPORT_SYMBOL_GPL(nfnl_lock);
  64
  65void nfnl_unlock(__u8 subsys_id)
  66{
  67        mutex_unlock(&table[subsys_id].mutex);
  68}
  69EXPORT_SYMBOL_GPL(nfnl_unlock);
  70
  71#ifdef CONFIG_PROVE_LOCKING
  72bool lockdep_nfnl_is_held(u8 subsys_id)
  73{
  74        return lockdep_is_held(&table[subsys_id].mutex);
  75}
  76EXPORT_SYMBOL_GPL(lockdep_nfnl_is_held);
  77#endif
  78
  79int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n)
  80{
  81        nfnl_lock(n->subsys_id);
  82        if (table[n->subsys_id].subsys) {
  83                nfnl_unlock(n->subsys_id);
  84                return -EBUSY;
  85        }
  86        rcu_assign_pointer(table[n->subsys_id].subsys, n);
  87        nfnl_unlock(n->subsys_id);
  88
  89        return 0;
  90}
  91EXPORT_SYMBOL_GPL(nfnetlink_subsys_register);
  92
  93int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n)
  94{
  95        nfnl_lock(n->subsys_id);
  96        table[n->subsys_id].subsys = NULL;
  97        nfnl_unlock(n->subsys_id);
  98        synchronize_rcu();
  99        return 0;
 100}
 101EXPORT_SYMBOL_GPL(nfnetlink_subsys_unregister);
 102
 103static inline const struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t type)
 104{
 105        u_int8_t subsys_id = NFNL_SUBSYS_ID(type);
 106
 107        if (subsys_id >= NFNL_SUBSYS_COUNT)
 108                return NULL;
 109
 110        return rcu_dereference(table[subsys_id].subsys);
 111}
 112
 113static inline const struct nfnl_callback *
 114nfnetlink_find_client(u_int16_t type, const struct nfnetlink_subsystem *ss)
 115{
 116        u_int8_t cb_id = NFNL_MSG_TYPE(type);
 117
 118        if (cb_id >= ss->cb_count)
 119                return NULL;
 120
 121        return &ss->cb[cb_id];
 122}
 123
 124int nfnetlink_has_listeners(struct net *net, unsigned int group)
 125{
 126        return netlink_has_listeners(net->nfnl, group);
 127}
 128EXPORT_SYMBOL_GPL(nfnetlink_has_listeners);
 129
 130int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid,
 131                   unsigned int group, int echo, gfp_t flags)
 132{
 133        return nlmsg_notify(net->nfnl, skb, portid, group, echo, flags);
 134}
 135EXPORT_SYMBOL_GPL(nfnetlink_send);
 136
 137int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error)
 138{
 139        return netlink_set_err(net->nfnl, portid, group, error);
 140}
 141EXPORT_SYMBOL_GPL(nfnetlink_set_err);
 142
 143int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid,
 144                      int flags)
 145{
 146        return netlink_unicast(net->nfnl, skb, portid, flags);
 147}
 148EXPORT_SYMBOL_GPL(nfnetlink_unicast);
 149
 150/* Process one complete nfnetlink message. */
 151static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 152{
 153        struct net *net = sock_net(skb->sk);
 154        const struct nfnl_callback *nc;
 155        const struct nfnetlink_subsystem *ss;
 156        int type, err;
 157
 158        /* All the messages must at least contain nfgenmsg */
 159        if (nlmsg_len(nlh) < sizeof(struct nfgenmsg))
 160                return 0;
 161
 162        type = nlh->nlmsg_type;
 163replay:
 164        rcu_read_lock();
 165        ss = nfnetlink_get_subsys(type);
 166        if (!ss) {
 167#ifdef CONFIG_MODULES
 168                rcu_read_unlock();
 169                request_module("nfnetlink-subsys-%d", NFNL_SUBSYS_ID(type));
 170                rcu_read_lock();
 171                ss = nfnetlink_get_subsys(type);
 172                if (!ss)
 173#endif
 174                {
 175                        rcu_read_unlock();
 176                        return -EINVAL;
 177                }
 178        }
 179
 180        nc = nfnetlink_find_client(type, ss);
 181        if (!nc) {
 182                rcu_read_unlock();
 183                return -EINVAL;
 184        }
 185
 186        {
 187                int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
 188                u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
 189                struct nlattr *cda[ss->cb[cb_id].attr_count + 1];
 190                struct nlattr *attr = (void *)nlh + min_len;
 191                int attrlen = nlh->nlmsg_len - min_len;
 192                __u8 subsys_id = NFNL_SUBSYS_ID(type);
 193
 194                err = nla_parse(cda, ss->cb[cb_id].attr_count,
 195                                attr, attrlen, ss->cb[cb_id].policy);
 196                if (err < 0) {
 197                        rcu_read_unlock();
 198                        return err;
 199                }
 200
 201                if (nc->call_rcu) {
 202                        err = nc->call_rcu(net, net->nfnl, skb, nlh,
 203                                           (const struct nlattr **)cda);
 204                        rcu_read_unlock();
 205                } else {
 206                        rcu_read_unlock();
 207                        nfnl_lock(subsys_id);
 208                        if (nfnl_dereference_protected(subsys_id) != ss ||
 209                            nfnetlink_find_client(type, ss) != nc)
 210                                err = -EAGAIN;
 211                        else if (nc->call)
 212                                err = nc->call(net, net->nfnl, skb, nlh,
 213                                               (const struct nlattr **)cda);
 214                        else
 215                                err = -EINVAL;
 216                        nfnl_unlock(subsys_id);
 217                }
 218                if (err == -EAGAIN)
 219                        goto replay;
 220                return err;
 221        }
 222}
 223
 224struct nfnl_err {
 225        struct list_head        head;
 226        struct nlmsghdr         *nlh;
 227        int                     err;
 228};
 229
 230static int nfnl_err_add(struct list_head *list, struct nlmsghdr *nlh, int err)
 231{
 232        struct nfnl_err *nfnl_err;
 233
 234        nfnl_err = kmalloc(sizeof(struct nfnl_err), GFP_KERNEL);
 235        if (nfnl_err == NULL)
 236                return -ENOMEM;
 237
 238        nfnl_err->nlh = nlh;
 239        nfnl_err->err = err;
 240        list_add_tail(&nfnl_err->head, list);
 241
 242        return 0;
 243}
 244
 245static void nfnl_err_del(struct nfnl_err *nfnl_err)
 246{
 247        list_del(&nfnl_err->head);
 248        kfree(nfnl_err);
 249}
 250
 251static void nfnl_err_reset(struct list_head *err_list)
 252{
 253        struct nfnl_err *nfnl_err, *next;
 254
 255        list_for_each_entry_safe(nfnl_err, next, err_list, head)
 256                nfnl_err_del(nfnl_err);
 257}
 258
 259static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb)
 260{
 261        struct nfnl_err *nfnl_err, *next;
 262
 263        list_for_each_entry_safe(nfnl_err, next, err_list, head) {
 264                netlink_ack(skb, nfnl_err->nlh, nfnl_err->err);
 265                nfnl_err_del(nfnl_err);
 266        }
 267}
 268
 269enum {
 270        NFNL_BATCH_FAILURE      = (1 << 0),
 271        NFNL_BATCH_DONE         = (1 << 1),
 272        NFNL_BATCH_REPLAY       = (1 << 2),
 273};
 274
 275static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
 276                                u_int16_t subsys_id)
 277{
 278        struct sk_buff *oskb = skb;
 279        struct net *net = sock_net(skb->sk);
 280        const struct nfnetlink_subsystem *ss;
 281        const struct nfnl_callback *nc;
 282        static LIST_HEAD(err_list);
 283        u32 status;
 284        int err;
 285
 286        if (subsys_id >= NFNL_SUBSYS_COUNT)
 287                return netlink_ack(skb, nlh, -EINVAL);
 288replay:
 289        status = 0;
 290
 291        skb = netlink_skb_clone(oskb, GFP_KERNEL);
 292        if (!skb)
 293                return netlink_ack(oskb, nlh, -ENOMEM);
 294
 295        nfnl_lock(subsys_id);
 296        ss = nfnl_dereference_protected(subsys_id);
 297        if (!ss) {
 298#ifdef CONFIG_MODULES
 299                nfnl_unlock(subsys_id);
 300                request_module("nfnetlink-subsys-%d", subsys_id);
 301                nfnl_lock(subsys_id);
 302                ss = nfnl_dereference_protected(subsys_id);
 303                if (!ss)
 304#endif
 305                {
 306                        nfnl_unlock(subsys_id);
 307                        netlink_ack(oskb, nlh, -EOPNOTSUPP);
 308                        return kfree_skb(skb);
 309                }
 310        }
 311
 312        if (!ss->commit || !ss->abort) {
 313                nfnl_unlock(subsys_id);
 314                netlink_ack(oskb, nlh, -EOPNOTSUPP);
 315                return kfree_skb(skb);
 316        }
 317
 318        while (skb->len >= nlmsg_total_size(0)) {
 319                int msglen, type;
 320
 321                nlh = nlmsg_hdr(skb);
 322                err = 0;
 323
 324                if (nlh->nlmsg_len < NLMSG_HDRLEN ||
 325                    skb->len < nlh->nlmsg_len ||
 326                    nlmsg_len(nlh) < sizeof(struct nfgenmsg)) {
 327                        nfnl_err_reset(&err_list);
 328                        status |= NFNL_BATCH_FAILURE;
 329                        goto done;
 330                }
 331
 332                /* Only requests are handled by the kernel */
 333                if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) {
 334                        err = -EINVAL;
 335                        goto ack;
 336                }
 337
 338                type = nlh->nlmsg_type;
 339                if (type == NFNL_MSG_BATCH_BEGIN) {
 340                        /* Malformed: Batch begin twice */
 341                        nfnl_err_reset(&err_list);
 342                        status |= NFNL_BATCH_FAILURE;
 343                        goto done;
 344                } else if (type == NFNL_MSG_BATCH_END) {
 345                        status |= NFNL_BATCH_DONE;
 346                        goto done;
 347                } else if (type < NLMSG_MIN_TYPE) {
 348                        err = -EINVAL;
 349                        goto ack;
 350                }
 351
 352                /* We only accept a batch with messages for the same
 353                 * subsystem.
 354                 */
 355                if (NFNL_SUBSYS_ID(type) != subsys_id) {
 356                        err = -EINVAL;
 357                        goto ack;
 358                }
 359
 360                nc = nfnetlink_find_client(type, ss);
 361                if (!nc) {
 362                        err = -EINVAL;
 363                        goto ack;
 364                }
 365
 366                {
 367                        int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
 368                        u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
 369                        struct nlattr *cda[ss->cb[cb_id].attr_count + 1];
 370                        struct nlattr *attr = (void *)nlh + min_len;
 371                        int attrlen = nlh->nlmsg_len - min_len;
 372
 373                        err = nla_parse(cda, ss->cb[cb_id].attr_count,
 374                                        attr, attrlen, ss->cb[cb_id].policy);
 375                        if (err < 0)
 376                                goto ack;
 377
 378                        if (nc->call_batch) {
 379                                err = nc->call_batch(net, net->nfnl, skb, nlh,
 380                                                     (const struct nlattr **)cda);
 381                        }
 382
 383                        /* The lock was released to autoload some module, we
 384                         * have to abort and start from scratch using the
 385                         * original skb.
 386                         */
 387                        if (err == -EAGAIN) {
 388                                status |= NFNL_BATCH_REPLAY;
 389                                goto next;
 390                        }
 391                }
 392ack:
 393                if (nlh->nlmsg_flags & NLM_F_ACK || err) {
 394                        /* Errors are delivered once the full batch has been
 395                         * processed, this avoids that the same error is
 396                         * reported several times when replaying the batch.
 397                         */
 398                        if (nfnl_err_add(&err_list, nlh, err) < 0) {
 399                                /* We failed to enqueue an error, reset the
 400                                 * list of errors and send OOM to userspace
 401                                 * pointing to the batch header.
 402                                 */
 403                                nfnl_err_reset(&err_list);
 404                                netlink_ack(oskb, nlmsg_hdr(oskb), -ENOMEM);
 405                                status |= NFNL_BATCH_FAILURE;
 406                                goto done;
 407                        }
 408                        /* We don't stop processing the batch on errors, thus,
 409                         * userspace gets all the errors that the batch
 410                         * triggers.
 411                         */
 412                        if (err)
 413                                status |= NFNL_BATCH_FAILURE;
 414                }
 415next:
 416                msglen = NLMSG_ALIGN(nlh->nlmsg_len);
 417                if (msglen > skb->len)
 418                        msglen = skb->len;
 419                skb_pull(skb, msglen);
 420        }
 421done:
 422        if (status & NFNL_BATCH_REPLAY) {
 423                ss->abort(net, oskb);
 424                nfnl_err_reset(&err_list);
 425                nfnl_unlock(subsys_id);
 426                kfree_skb(skb);
 427                goto replay;
 428        } else if (status == NFNL_BATCH_DONE) {
 429                ss->commit(net, oskb);
 430        } else {
 431                ss->abort(net, oskb);
 432        }
 433
 434        nfnl_err_deliver(&err_list, oskb);
 435        nfnl_unlock(subsys_id);
 436        kfree_skb(skb);
 437}
 438
 439static void nfnetlink_rcv(struct sk_buff *skb)
 440{
 441        struct nlmsghdr *nlh = nlmsg_hdr(skb);
 442        u_int16_t res_id;
 443        int msglen;
 444
 445        if (nlh->nlmsg_len < NLMSG_HDRLEN ||
 446            skb->len < nlh->nlmsg_len)
 447                return;
 448
 449        if (!netlink_net_capable(skb, CAP_NET_ADMIN)) {
 450                netlink_ack(skb, nlh, -EPERM);
 451                return;
 452        }
 453
 454        if (nlh->nlmsg_type == NFNL_MSG_BATCH_BEGIN) {
 455                struct nfgenmsg *nfgenmsg;
 456
 457                msglen = NLMSG_ALIGN(nlh->nlmsg_len);
 458                if (msglen > skb->len)
 459                        msglen = skb->len;
 460
 461                if (nlh->nlmsg_len < NLMSG_HDRLEN ||
 462                    skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg))
 463                        return;
 464
 465                nfgenmsg = nlmsg_data(nlh);
 466                skb_pull(skb, msglen);
 467                /* Work around old nft using host byte order */
 468                if (nfgenmsg->res_id == NFNL_SUBSYS_NFTABLES)
 469                        res_id = NFNL_SUBSYS_NFTABLES;
 470                else
 471                        res_id = ntohs(nfgenmsg->res_id);
 472                nfnetlink_rcv_batch(skb, nlh, res_id);
 473        } else {
 474                netlink_rcv_skb(skb, &nfnetlink_rcv_msg);
 475        }
 476}
 477
 478#ifdef CONFIG_MODULES
 479static int nfnetlink_bind(struct net *net, int group)
 480{
 481        const struct nfnetlink_subsystem *ss;
 482        int type;
 483
 484        if (group <= NFNLGRP_NONE || group > NFNLGRP_MAX)
 485                return 0;
 486
 487        type = nfnl_group2type[group];
 488
 489        rcu_read_lock();
 490        ss = nfnetlink_get_subsys(type << 8);
 491        rcu_read_unlock();
 492        if (!ss)
 493                request_module("nfnetlink-subsys-%d", type);
 494        return 0;
 495}
 496#endif
 497
 498static int __net_init nfnetlink_net_init(struct net *net)
 499{
 500        struct sock *nfnl;
 501        struct netlink_kernel_cfg cfg = {
 502                .groups = NFNLGRP_MAX,
 503                .input  = nfnetlink_rcv,
 504#ifdef CONFIG_MODULES
 505                .bind   = nfnetlink_bind,
 506#endif
 507        };
 508
 509        nfnl = netlink_kernel_create(net, NETLINK_NETFILTER, &cfg);
 510        if (!nfnl)
 511                return -ENOMEM;
 512        net->nfnl_stash = nfnl;
 513        rcu_assign_pointer(net->nfnl, nfnl);
 514        return 0;
 515}
 516
 517static void __net_exit nfnetlink_net_exit_batch(struct list_head *net_exit_list)
 518{
 519        struct net *net;
 520
 521        list_for_each_entry(net, net_exit_list, exit_list)
 522                RCU_INIT_POINTER(net->nfnl, NULL);
 523        synchronize_net();
 524        list_for_each_entry(net, net_exit_list, exit_list)
 525                netlink_kernel_release(net->nfnl_stash);
 526}
 527
 528static struct pernet_operations nfnetlink_net_ops = {
 529        .init           = nfnetlink_net_init,
 530        .exit_batch     = nfnetlink_net_exit_batch,
 531};
 532
 533static int __init nfnetlink_init(void)
 534{
 535        int i;
 536
 537        for (i = NFNLGRP_NONE + 1; i <= NFNLGRP_MAX; i++)
 538                BUG_ON(nfnl_group2type[i] == NFNL_SUBSYS_NONE);
 539
 540        for (i=0; i<NFNL_SUBSYS_COUNT; i++)
 541                mutex_init(&table[i].mutex);
 542
 543        pr_info("Netfilter messages via NETLINK v%s.\n", nfversion);
 544        return register_pernet_subsys(&nfnetlink_net_ops);
 545}
 546
 547static void __exit nfnetlink_exit(void)
 548{
 549        pr_info("Removing netfilter NETLINK layer.\n");
 550        unregister_pernet_subsys(&nfnetlink_net_ops);
 551}
 552module_init(nfnetlink_init);
 553module_exit(nfnetlink_exit);
 554