linux/net/netlink/af_netlink.c
<<
>>
Prefs
   1/*
   2 * NETLINK      Kernel-user communication protocol.
   3 *
   4 *              Authors:        Alan Cox <alan@lxorguk.ukuu.org.uk>
   5 *                              Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
   6 *                              Patrick McHardy <kaber@trash.net>
   7 *
   8 *              This program is free software; you can redistribute it and/or
   9 *              modify it under the terms of the GNU General Public License
  10 *              as published by the Free Software Foundation; either version
  11 *              2 of the License, or (at your option) any later version.
  12 *
  13 * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith
  14 *                               added netlink_proto_exit
  15 * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br>
  16 *                               use nlk_sk, as sk->protinfo is on a diet 8)
  17 * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org>
  18 *                               - inc module use count of module that owns
  19 *                                 the kernel socket in case userspace opens
  20 *                                 socket of same protocol
  21 *                               - remove all module support, since netlink is
  22 *                                 mandatory if CONFIG_NET=y these days
  23 */
  24
  25#include <linux/module.h>
  26
  27#include <linux/capability.h>
  28#include <linux/kernel.h>
  29#include <linux/init.h>
  30#include <linux/signal.h>
  31#include <linux/sched.h>
  32#include <linux/errno.h>
  33#include <linux/string.h>
  34#include <linux/stat.h>
  35#include <linux/socket.h>
  36#include <linux/un.h>
  37#include <linux/fcntl.h>
  38#include <linux/termios.h>
  39#include <linux/sockios.h>
  40#include <linux/net.h>
  41#include <linux/fs.h>
  42#include <linux/slab.h>
  43#include <linux/uaccess.h>
  44#include <linux/skbuff.h>
  45#include <linux/netdevice.h>
  46#include <linux/rtnetlink.h>
  47#include <linux/proc_fs.h>
  48#include <linux/seq_file.h>
  49#include <linux/notifier.h>
  50#include <linux/security.h>
  51#include <linux/jhash.h>
  52#include <linux/jiffies.h>
  53#include <linux/random.h>
  54#include <linux/bitops.h>
  55#include <linux/mm.h>
  56#include <linux/types.h>
  57#include <linux/audit.h>
  58#include <linux/mutex.h>
  59#include <linux/vmalloc.h>
  60#include <linux/if_arp.h>
  61#include <linux/rhashtable.h>
  62#include <asm/cacheflush.h>
  63#include <linux/hash.h>
  64#include <linux/genetlink.h>
  65#include <linux/net_namespace.h>
  66
  67#include <net/net_namespace.h>
  68#include <net/sock.h>
  69#include <net/scm.h>
  70#include <net/netlink.h>
  71
  72#include "af_netlink.h"
  73
  74struct listeners {
  75        struct rcu_head         rcu;
  76        unsigned long           masks[0];
  77};
  78
  79/* state bits */
  80#define NETLINK_S_CONGESTED             0x0
  81
  82static inline int netlink_is_kernel(struct sock *sk)
  83{
  84        return nlk_sk(sk)->flags & NETLINK_F_KERNEL_SOCKET;
  85}
  86
  87struct netlink_table *nl_table __read_mostly;
  88EXPORT_SYMBOL_GPL(nl_table);
  89
  90static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
  91
  92static struct lock_class_key nlk_cb_mutex_keys[MAX_LINKS];
  93
  94static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = {
  95        "nlk_cb_mutex-ROUTE",
  96        "nlk_cb_mutex-1",
  97        "nlk_cb_mutex-USERSOCK",
  98        "nlk_cb_mutex-FIREWALL",
  99        "nlk_cb_mutex-SOCK_DIAG",
 100        "nlk_cb_mutex-NFLOG",
 101        "nlk_cb_mutex-XFRM",
 102        "nlk_cb_mutex-SELINUX",
 103        "nlk_cb_mutex-ISCSI",
 104        "nlk_cb_mutex-AUDIT",
 105        "nlk_cb_mutex-FIB_LOOKUP",
 106        "nlk_cb_mutex-CONNECTOR",
 107        "nlk_cb_mutex-NETFILTER",
 108        "nlk_cb_mutex-IP6_FW",
 109        "nlk_cb_mutex-DNRTMSG",
 110        "nlk_cb_mutex-KOBJECT_UEVENT",
 111        "nlk_cb_mutex-GENERIC",
 112        "nlk_cb_mutex-17",
 113        "nlk_cb_mutex-SCSITRANSPORT",
 114        "nlk_cb_mutex-ECRYPTFS",
 115        "nlk_cb_mutex-RDMA",
 116        "nlk_cb_mutex-CRYPTO",
 117        "nlk_cb_mutex-SMC",
 118        "nlk_cb_mutex-23",
 119        "nlk_cb_mutex-24",
 120        "nlk_cb_mutex-25",
 121        "nlk_cb_mutex-26",
 122        "nlk_cb_mutex-27",
 123        "nlk_cb_mutex-28",
 124        "nlk_cb_mutex-29",
 125        "nlk_cb_mutex-30",
 126        "nlk_cb_mutex-31",
 127        "nlk_cb_mutex-MAX_LINKS"
 128};
 129
 130static int netlink_dump(struct sock *sk);
 131static void netlink_skb_destructor(struct sk_buff *skb);
 132
 133/* nl_table locking explained:
 134 * Lookup and traversal are protected with an RCU read-side lock. Insertion
 135 * and removal are protected with per bucket lock while using RCU list
 136 * modification primitives and may run in parallel to RCU protected lookups.
 137 * Destruction of the Netlink socket may only occur *after* nl_table_lock has
 138 * been acquired * either during or after the socket has been removed from
 139 * the list and after an RCU grace period.
 140 */
 141DEFINE_RWLOCK(nl_table_lock);
 142EXPORT_SYMBOL_GPL(nl_table_lock);
 143static atomic_t nl_table_users = ATOMIC_INIT(0);
 144
 145#define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock));
 146
 147static BLOCKING_NOTIFIER_HEAD(netlink_chain);
 148
 149static DEFINE_SPINLOCK(netlink_tap_lock);
 150static struct list_head netlink_tap_all __read_mostly;
 151
 152static const struct rhashtable_params netlink_rhashtable_params;
 153
 154static inline u32 netlink_group_mask(u32 group)
 155{
 156        return group ? 1 << (group - 1) : 0;
 157}
 158
 159static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb,
 160                                           gfp_t gfp_mask)
 161{
 162        unsigned int len = skb_end_offset(skb);
 163        struct sk_buff *new;
 164
 165        new = alloc_skb(len, gfp_mask);
 166        if (new == NULL)
 167                return NULL;
 168
 169        NETLINK_CB(new).portid = NETLINK_CB(skb).portid;
 170        NETLINK_CB(new).dst_group = NETLINK_CB(skb).dst_group;
 171        NETLINK_CB(new).creds = NETLINK_CB(skb).creds;
 172
 173        skb_put_data(new, skb->data, len);
 174        return new;
 175}
 176
 177int netlink_add_tap(struct netlink_tap *nt)
 178{
 179        if (unlikely(nt->dev->type != ARPHRD_NETLINK))
 180                return -EINVAL;
 181
 182        spin_lock(&netlink_tap_lock);
 183        list_add_rcu(&nt->list, &netlink_tap_all);
 184        spin_unlock(&netlink_tap_lock);
 185
 186        __module_get(nt->module);
 187
 188        return 0;
 189}
 190EXPORT_SYMBOL_GPL(netlink_add_tap);
 191
 192static int __netlink_remove_tap(struct netlink_tap *nt)
 193{
 194        bool found = false;
 195        struct netlink_tap *tmp;
 196
 197        spin_lock(&netlink_tap_lock);
 198
 199        list_for_each_entry(tmp, &netlink_tap_all, list) {
 200                if (nt == tmp) {
 201                        list_del_rcu(&nt->list);
 202                        found = true;
 203                        goto out;
 204                }
 205        }
 206
 207        pr_warn("__netlink_remove_tap: %p not found\n", nt);
 208out:
 209        spin_unlock(&netlink_tap_lock);
 210
 211        if (found)
 212                module_put(nt->module);
 213
 214        return found ? 0 : -ENODEV;
 215}
 216
 217int netlink_remove_tap(struct netlink_tap *nt)
 218{
 219        int ret;
 220
 221        ret = __netlink_remove_tap(nt);
 222        synchronize_net();
 223
 224        return ret;
 225}
 226EXPORT_SYMBOL_GPL(netlink_remove_tap);
 227
 228static bool netlink_filter_tap(const struct sk_buff *skb)
 229{
 230        struct sock *sk = skb->sk;
 231
 232        /* We take the more conservative approach and
 233         * whitelist socket protocols that may pass.
 234         */
 235        switch (sk->sk_protocol) {
 236        case NETLINK_ROUTE:
 237        case NETLINK_USERSOCK:
 238        case NETLINK_SOCK_DIAG:
 239        case NETLINK_NFLOG:
 240        case NETLINK_XFRM:
 241        case NETLINK_FIB_LOOKUP:
 242        case NETLINK_NETFILTER:
 243        case NETLINK_GENERIC:
 244                return true;
 245        }
 246
 247        return false;
 248}
 249
 250static int __netlink_deliver_tap_skb(struct sk_buff *skb,
 251                                     struct net_device *dev)
 252{
 253        struct sk_buff *nskb;
 254        struct sock *sk = skb->sk;
 255        int ret = -ENOMEM;
 256
 257        dev_hold(dev);
 258
 259        if (is_vmalloc_addr(skb->head))
 260                nskb = netlink_to_full_skb(skb, GFP_ATOMIC);
 261        else
 262                nskb = skb_clone(skb, GFP_ATOMIC);
 263        if (nskb) {
 264                nskb->dev = dev;
 265                nskb->protocol = htons((u16) sk->sk_protocol);
 266                nskb->pkt_type = netlink_is_kernel(sk) ?
 267                                 PACKET_KERNEL : PACKET_USER;
 268                skb_reset_network_header(nskb);
 269                ret = dev_queue_xmit(nskb);
 270                if (unlikely(ret > 0))
 271                        ret = net_xmit_errno(ret);
 272        }
 273
 274        dev_put(dev);
 275        return ret;
 276}
 277
 278static void __netlink_deliver_tap(struct sk_buff *skb)
 279{
 280        int ret;
 281        struct netlink_tap *tmp;
 282
 283        if (!netlink_filter_tap(skb))
 284                return;
 285
 286        list_for_each_entry_rcu(tmp, &netlink_tap_all, list) {
 287                ret = __netlink_deliver_tap_skb(skb, tmp->dev);
 288                if (unlikely(ret))
 289                        break;
 290        }
 291}
 292
 293static void netlink_deliver_tap(struct sk_buff *skb)
 294{
 295        rcu_read_lock();
 296
 297        if (unlikely(!list_empty(&netlink_tap_all)))
 298                __netlink_deliver_tap(skb);
 299
 300        rcu_read_unlock();
 301}
 302
 303static void netlink_deliver_tap_kernel(struct sock *dst, struct sock *src,
 304                                       struct sk_buff *skb)
 305{
 306        if (!(netlink_is_kernel(dst) && netlink_is_kernel(src)))
 307                netlink_deliver_tap(skb);
 308}
 309
 310static void netlink_overrun(struct sock *sk)
 311{
 312        struct netlink_sock *nlk = nlk_sk(sk);
 313
 314        if (!(nlk->flags & NETLINK_F_RECV_NO_ENOBUFS)) {
 315                if (!test_and_set_bit(NETLINK_S_CONGESTED,
 316                                      &nlk_sk(sk)->state)) {
 317                        sk->sk_err = ENOBUFS;
 318                        sk->sk_error_report(sk);
 319                }
 320        }
 321        atomic_inc(&sk->sk_drops);
 322}
 323
 324static void netlink_rcv_wake(struct sock *sk)
 325{
 326        struct netlink_sock *nlk = nlk_sk(sk);
 327
 328        if (skb_queue_empty(&sk->sk_receive_queue))
 329                clear_bit(NETLINK_S_CONGESTED, &nlk->state);
 330        if (!test_bit(NETLINK_S_CONGESTED, &nlk->state))
 331                wake_up_interruptible(&nlk->wait);
 332}
 333
 334static void netlink_skb_destructor(struct sk_buff *skb)
 335{
 336        if (is_vmalloc_addr(skb->head)) {
 337                if (!skb->cloned ||
 338                    !atomic_dec_return(&(skb_shinfo(skb)->dataref)))
 339                        vfree(skb->head);
 340
 341                skb->head = NULL;
 342        }
 343        if (skb->sk != NULL)
 344                sock_rfree(skb);
 345}
 346
 347static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
 348{
 349        WARN_ON(skb->sk != NULL);
 350        skb->sk = sk;
 351        skb->destructor = netlink_skb_destructor;
 352        atomic_add(skb->truesize, &sk->sk_rmem_alloc);
 353        sk_mem_charge(sk, skb->truesize);
 354}
 355
 356static void netlink_sock_destruct(struct sock *sk)
 357{
 358        struct netlink_sock *nlk = nlk_sk(sk);
 359
 360        if (nlk->cb_running) {
 361                if (nlk->cb.done)
 362                        nlk->cb.done(&nlk->cb);
 363                module_put(nlk->cb.module);
 364                kfree_skb(nlk->cb.skb);
 365        }
 366
 367        skb_queue_purge(&sk->sk_receive_queue);
 368
 369        if (!sock_flag(sk, SOCK_DEAD)) {
 370                printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
 371                return;
 372        }
 373
 374        WARN_ON(atomic_read(&sk->sk_rmem_alloc));
 375        WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 376        WARN_ON(nlk_sk(sk)->groups);
 377}
 378
 379static void netlink_sock_destruct_work(struct work_struct *work)
 380{
 381        struct netlink_sock *nlk = container_of(work, struct netlink_sock,
 382                                                work);
 383
 384        sk_free(&nlk->sk);
 385}
 386
 387/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
 388 * SMP. Look, when several writers sleep and reader wakes them up, all but one
 389 * immediately hit write lock and grab all the cpus. Exclusive sleep solves
 390 * this, _but_ remember, it adds useless work on UP machines.
 391 */
 392
 393void netlink_table_grab(void)
 394        __acquires(nl_table_lock)
 395{
 396        might_sleep();
 397
 398        write_lock_irq(&nl_table_lock);
 399
 400        if (atomic_read(&nl_table_users)) {
 401                DECLARE_WAITQUEUE(wait, current);
 402
 403                add_wait_queue_exclusive(&nl_table_wait, &wait);
 404                for (;;) {
 405                        set_current_state(TASK_UNINTERRUPTIBLE);
 406                        if (atomic_read(&nl_table_users) == 0)
 407                                break;
 408                        write_unlock_irq(&nl_table_lock);
 409                        schedule();
 410                        write_lock_irq(&nl_table_lock);
 411                }
 412
 413                __set_current_state(TASK_RUNNING);
 414                remove_wait_queue(&nl_table_wait, &wait);
 415        }
 416}
 417
 418void netlink_table_ungrab(void)
 419        __releases(nl_table_lock)
 420{
 421        write_unlock_irq(&nl_table_lock);
 422        wake_up(&nl_table_wait);
 423}
 424
 425static inline void
 426netlink_lock_table(void)
 427{
 428        /* read_lock() synchronizes us to netlink_table_grab */
 429
 430        read_lock(&nl_table_lock);
 431        atomic_inc(&nl_table_users);
 432        read_unlock(&nl_table_lock);
 433}
 434
 435static inline void
 436netlink_unlock_table(void)
 437{
 438        if (atomic_dec_and_test(&nl_table_users))
 439                wake_up(&nl_table_wait);
 440}
 441
 442struct netlink_compare_arg
 443{
 444        possible_net_t pnet;
 445        u32 portid;
 446};
 447
 448/* Doing sizeof directly may yield 4 extra bytes on 64-bit. */
 449#define netlink_compare_arg_len \
 450        (offsetof(struct netlink_compare_arg, portid) + sizeof(u32))
 451
 452static inline int netlink_compare(struct rhashtable_compare_arg *arg,
 453                                  const void *ptr)
 454{
 455        const struct netlink_compare_arg *x = arg->key;
 456        const struct netlink_sock *nlk = ptr;
 457
 458        return nlk->portid != x->portid ||
 459               !net_eq(sock_net(&nlk->sk), read_pnet(&x->pnet));
 460}
 461
 462static void netlink_compare_arg_init(struct netlink_compare_arg *arg,
 463                                     struct net *net, u32 portid)
 464{
 465        memset(arg, 0, sizeof(*arg));
 466        write_pnet(&arg->pnet, net);
 467        arg->portid = portid;
 468}
 469
 470static struct sock *__netlink_lookup(struct netlink_table *table, u32 portid,
 471                                     struct net *net)
 472{
 473        struct netlink_compare_arg arg;
 474
 475        netlink_compare_arg_init(&arg, net, portid);
 476        return rhashtable_lookup_fast(&table->hash, &arg,
 477                                      netlink_rhashtable_params);
 478}
 479
 480static int __netlink_insert(struct netlink_table *table, struct sock *sk)
 481{
 482        struct netlink_compare_arg arg;
 483
 484        netlink_compare_arg_init(&arg, sock_net(sk), nlk_sk(sk)->portid);
 485        return rhashtable_lookup_insert_key(&table->hash, &arg,
 486                                            &nlk_sk(sk)->node,
 487                                            netlink_rhashtable_params);
 488}
 489
 490static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid)
 491{
 492        struct netlink_table *table = &nl_table[protocol];
 493        struct sock *sk;
 494
 495        rcu_read_lock();
 496        sk = __netlink_lookup(table, portid, net);
 497        if (sk)
 498                sock_hold(sk);
 499        rcu_read_unlock();
 500
 501        return sk;
 502}
 503
 504static const struct proto_ops netlink_ops;
 505
 506static void
 507netlink_update_listeners(struct sock *sk)
 508{
 509        struct netlink_table *tbl = &nl_table[sk->sk_protocol];
 510        unsigned long mask;
 511        unsigned int i;
 512        struct listeners *listeners;
 513
 514        listeners = nl_deref_protected(tbl->listeners);
 515        if (!listeners)
 516                return;
 517
 518        for (i = 0; i < NLGRPLONGS(tbl->groups); i++) {
 519                mask = 0;
 520                sk_for_each_bound(sk, &tbl->mc_list) {
 521                        if (i < NLGRPLONGS(nlk_sk(sk)->ngroups))
 522                                mask |= nlk_sk(sk)->groups[i];
 523                }
 524                listeners->masks[i] = mask;
 525        }
 526        /* this function is only called with the netlink table "grabbed", which
 527         * makes sure updates are visible before bind or setsockopt return. */
 528}
 529
 530static int netlink_insert(struct sock *sk, u32 portid)
 531{
 532        struct netlink_table *table = &nl_table[sk->sk_protocol];
 533        int err;
 534
 535        lock_sock(sk);
 536
 537        err = nlk_sk(sk)->portid == portid ? 0 : -EBUSY;
 538        if (nlk_sk(sk)->bound)
 539                goto err;
 540
 541        err = -ENOMEM;
 542        if (BITS_PER_LONG > 32 &&
 543            unlikely(atomic_read(&table->hash.nelems) >= UINT_MAX))
 544                goto err;
 545
 546        nlk_sk(sk)->portid = portid;
 547        sock_hold(sk);
 548
 549        err = __netlink_insert(table, sk);
 550        if (err) {
 551                /* In case the hashtable backend returns with -EBUSY
 552                 * from here, it must not escape to the caller.
 553                 */
 554                if (unlikely(err == -EBUSY))
 555                        err = -EOVERFLOW;
 556                if (err == -EEXIST)
 557                        err = -EADDRINUSE;
 558                sock_put(sk);
 559                goto err;
 560        }
 561
 562        /* We need to ensure that the socket is hashed and visible. */
 563        smp_wmb();
 564        nlk_sk(sk)->bound = portid;
 565
 566err:
 567        release_sock(sk);
 568        return err;
 569}
 570
 571static void netlink_remove(struct sock *sk)
 572{
 573        struct netlink_table *table;
 574
 575        table = &nl_table[sk->sk_protocol];
 576        if (!rhashtable_remove_fast(&table->hash, &nlk_sk(sk)->node,
 577                                    netlink_rhashtable_params)) {
 578                WARN_ON(refcount_read(&sk->sk_refcnt) == 1);
 579                __sock_put(sk);
 580        }
 581
 582        netlink_table_grab();
 583        if (nlk_sk(sk)->subscriptions) {
 584                __sk_del_bind_node(sk);
 585                netlink_update_listeners(sk);
 586        }
 587        if (sk->sk_protocol == NETLINK_GENERIC)
 588                atomic_inc(&genl_sk_destructing_cnt);
 589        netlink_table_ungrab();
 590}
 591
 592static struct proto netlink_proto = {
 593        .name     = "NETLINK",
 594        .owner    = THIS_MODULE,
 595        .obj_size = sizeof(struct netlink_sock),
 596};
 597
 598static int __netlink_create(struct net *net, struct socket *sock,
 599                            struct mutex *cb_mutex, int protocol,
 600                            int kern)
 601{
 602        struct sock *sk;
 603        struct netlink_sock *nlk;
 604
 605        sock->ops = &netlink_ops;
 606
 607        sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto, kern);
 608        if (!sk)
 609                return -ENOMEM;
 610
 611        sock_init_data(sock, sk);
 612
 613        nlk = nlk_sk(sk);
 614        if (cb_mutex) {
 615                nlk->cb_mutex = cb_mutex;
 616        } else {
 617                nlk->cb_mutex = &nlk->cb_def_mutex;
 618                mutex_init(nlk->cb_mutex);
 619                lockdep_set_class_and_name(nlk->cb_mutex,
 620                                           nlk_cb_mutex_keys + protocol,
 621                                           nlk_cb_mutex_key_strings[protocol]);
 622        }
 623        init_waitqueue_head(&nlk->wait);
 624
 625        sk->sk_destruct = netlink_sock_destruct;
 626        sk->sk_protocol = protocol;
 627        return 0;
 628}
 629
 630static int netlink_create(struct net *net, struct socket *sock, int protocol,
 631                          int kern)
 632{
 633        struct module *module = NULL;
 634        struct mutex *cb_mutex;
 635        struct netlink_sock *nlk;
 636        int (*bind)(struct net *net, int group);
 637        void (*unbind)(struct net *net, int group);
 638        int err = 0;
 639
 640        sock->state = SS_UNCONNECTED;
 641
 642        if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
 643                return -ESOCKTNOSUPPORT;
 644
 645        if (protocol < 0 || protocol >= MAX_LINKS)
 646                return -EPROTONOSUPPORT;
 647
 648        netlink_lock_table();
 649#ifdef CONFIG_MODULES
 650        if (!nl_table[protocol].registered) {
 651                netlink_unlock_table();
 652                request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol);
 653                netlink_lock_table();
 654        }
 655#endif
 656        if (nl_table[protocol].registered &&
 657            try_module_get(nl_table[protocol].module))
 658                module = nl_table[protocol].module;
 659        else
 660                err = -EPROTONOSUPPORT;
 661        cb_mutex = nl_table[protocol].cb_mutex;
 662        bind = nl_table[protocol].bind;
 663        unbind = nl_table[protocol].unbind;
 664        netlink_unlock_table();
 665
 666        if (err < 0)
 667                goto out;
 668
 669        err = __netlink_create(net, sock, cb_mutex, protocol, kern);
 670        if (err < 0)
 671                goto out_module;
 672
 673        local_bh_disable();
 674        sock_prot_inuse_add(net, &netlink_proto, 1);
 675        local_bh_enable();
 676
 677        nlk = nlk_sk(sock->sk);
 678        nlk->module = module;
 679        nlk->netlink_bind = bind;
 680        nlk->netlink_unbind = unbind;
 681out:
 682        return err;
 683
 684out_module:
 685        module_put(module);
 686        goto out;
 687}
 688
 689static void deferred_put_nlk_sk(struct rcu_head *head)
 690{
 691        struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu);
 692        struct sock *sk = &nlk->sk;
 693
 694        if (!refcount_dec_and_test(&sk->sk_refcnt))
 695                return;
 696
 697        if (nlk->cb_running && nlk->cb.done) {
 698                INIT_WORK(&nlk->work, netlink_sock_destruct_work);
 699                schedule_work(&nlk->work);
 700                return;
 701        }
 702
 703        sk_free(sk);
 704}
 705
 706static int netlink_release(struct socket *sock)
 707{
 708        struct sock *sk = sock->sk;
 709        struct netlink_sock *nlk;
 710
 711        if (!sk)
 712                return 0;
 713
 714        netlink_remove(sk);
 715        sock_orphan(sk);
 716        nlk = nlk_sk(sk);
 717
 718        /*
 719         * OK. Socket is unlinked, any packets that arrive now
 720         * will be purged.
 721         */
 722
 723        /* must not acquire netlink_table_lock in any way again before unbind
 724         * and notifying genetlink is done as otherwise it might deadlock
 725         */
 726        if (nlk->netlink_unbind) {
 727                int i;
 728
 729                for (i = 0; i < nlk->ngroups; i++)
 730                        if (test_bit(i, nlk->groups))
 731                                nlk->netlink_unbind(sock_net(sk), i + 1);
 732        }
 733        if (sk->sk_protocol == NETLINK_GENERIC &&
 734            atomic_dec_return(&genl_sk_destructing_cnt) == 0)
 735                wake_up(&genl_sk_destructing_waitq);
 736
 737        sock->sk = NULL;
 738        wake_up_interruptible_all(&nlk->wait);
 739
 740        skb_queue_purge(&sk->sk_write_queue);
 741
 742        if (nlk->portid && nlk->bound) {
 743                struct netlink_notify n = {
 744                                                .net = sock_net(sk),
 745                                                .protocol = sk->sk_protocol,
 746                                                .portid = nlk->portid,
 747                                          };
 748                blocking_notifier_call_chain(&netlink_chain,
 749                                NETLINK_URELEASE, &n);
 750        }
 751
 752        module_put(nlk->module);
 753
 754        if (netlink_is_kernel(sk)) {
 755                netlink_table_grab();
 756                BUG_ON(nl_table[sk->sk_protocol].registered == 0);
 757                if (--nl_table[sk->sk_protocol].registered == 0) {
 758                        struct listeners *old;
 759
 760                        old = nl_deref_protected(nl_table[sk->sk_protocol].listeners);
 761                        RCU_INIT_POINTER(nl_table[sk->sk_protocol].listeners, NULL);
 762                        kfree_rcu(old, rcu);
 763                        nl_table[sk->sk_protocol].module = NULL;
 764                        nl_table[sk->sk_protocol].bind = NULL;
 765                        nl_table[sk->sk_protocol].unbind = NULL;
 766                        nl_table[sk->sk_protocol].flags = 0;
 767                        nl_table[sk->sk_protocol].registered = 0;
 768                }
 769                netlink_table_ungrab();
 770        }
 771
 772        kfree(nlk->groups);
 773        nlk->groups = NULL;
 774
 775        local_bh_disable();
 776        sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
 777        local_bh_enable();
 778        call_rcu(&nlk->rcu, deferred_put_nlk_sk);
 779        return 0;
 780}
 781
 782static int netlink_autobind(struct socket *sock)
 783{
 784        struct sock *sk = sock->sk;
 785        struct net *net = sock_net(sk);
 786        struct netlink_table *table = &nl_table[sk->sk_protocol];
 787        s32 portid = task_tgid_vnr(current);
 788        int err;
 789        s32 rover = -4096;
 790        bool ok;
 791
 792retry:
 793        cond_resched();
 794        rcu_read_lock();
 795        ok = !__netlink_lookup(table, portid, net);
 796        rcu_read_unlock();
 797        if (!ok) {
 798                /* Bind collision, search negative portid values. */
 799                if (rover == -4096)
 800                        /* rover will be in range [S32_MIN, -4097] */
 801                        rover = S32_MIN + prandom_u32_max(-4096 - S32_MIN);
 802                else if (rover >= -4096)
 803                        rover = -4097;
 804                portid = rover--;
 805                goto retry;
 806        }
 807
 808        err = netlink_insert(sk, portid);
 809        if (err == -EADDRINUSE)
 810                goto retry;
 811
 812        /* If 2 threads race to autobind, that is fine.  */
 813        if (err == -EBUSY)
 814                err = 0;
 815
 816        return err;
 817}
 818
 819/**
 820 * __netlink_ns_capable - General netlink message capability test
 821 * @nsp: NETLINK_CB of the socket buffer holding a netlink command from userspace.
 822 * @user_ns: The user namespace of the capability to use
 823 * @cap: The capability to use
 824 *
 825 * Test to see if the opener of the socket we received the message
 826 * from had when the netlink socket was created and the sender of the
 827 * message has has the capability @cap in the user namespace @user_ns.
 828 */
 829bool __netlink_ns_capable(const struct netlink_skb_parms *nsp,
 830                        struct user_namespace *user_ns, int cap)
 831{
 832        return ((nsp->flags & NETLINK_SKB_DST) ||
 833                file_ns_capable(nsp->sk->sk_socket->file, user_ns, cap)) &&
 834                ns_capable(user_ns, cap);
 835}
 836EXPORT_SYMBOL(__netlink_ns_capable);
 837
 838/**
 839 * netlink_ns_capable - General netlink message capability test
 840 * @skb: socket buffer holding a netlink command from userspace
 841 * @user_ns: The user namespace of the capability to use
 842 * @cap: The capability to use
 843 *
 844 * Test to see if the opener of the socket we received the message
 845 * from had when the netlink socket was created and the sender of the
 846 * message has has the capability @cap in the user namespace @user_ns.
 847 */
 848bool netlink_ns_capable(const struct sk_buff *skb,
 849                        struct user_namespace *user_ns, int cap)
 850{
 851        return __netlink_ns_capable(&NETLINK_CB(skb), user_ns, cap);
 852}
 853EXPORT_SYMBOL(netlink_ns_capable);
 854
 855/**
 856 * netlink_capable - Netlink global message capability test
 857 * @skb: socket buffer holding a netlink command from userspace
 858 * @cap: The capability to use
 859 *
 860 * Test to see if the opener of the socket we received the message
 861 * from had when the netlink socket was created and the sender of the
 862 * message has has the capability @cap in all user namespaces.
 863 */
 864bool netlink_capable(const struct sk_buff *skb, int cap)
 865{
 866        return netlink_ns_capable(skb, &init_user_ns, cap);
 867}
 868EXPORT_SYMBOL(netlink_capable);
 869
 870/**
 871 * netlink_net_capable - Netlink network namespace message capability test
 872 * @skb: socket buffer holding a netlink command from userspace
 873 * @cap: The capability to use
 874 *
 875 * Test to see if the opener of the socket we received the message
 876 * from had when the netlink socket was created and the sender of the
 877 * message has has the capability @cap over the network namespace of
 878 * the socket we received the message from.
 879 */
 880bool netlink_net_capable(const struct sk_buff *skb, int cap)
 881{
 882        return netlink_ns_capable(skb, sock_net(skb->sk)->user_ns, cap);
 883}
 884EXPORT_SYMBOL(netlink_net_capable);
 885
 886static inline int netlink_allowed(const struct socket *sock, unsigned int flag)
 887{
 888        return (nl_table[sock->sk->sk_protocol].flags & flag) ||
 889                ns_capable(sock_net(sock->sk)->user_ns, CAP_NET_ADMIN);
 890}
 891
 892static void
 893netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions)
 894{
 895        struct netlink_sock *nlk = nlk_sk(sk);
 896
 897        if (nlk->subscriptions && !subscriptions)
 898                __sk_del_bind_node(sk);
 899        else if (!nlk->subscriptions && subscriptions)
 900                sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list);
 901        nlk->subscriptions = subscriptions;
 902}
 903
 904static int netlink_realloc_groups(struct sock *sk)
 905{
 906        struct netlink_sock *nlk = nlk_sk(sk);
 907        unsigned int groups;
 908        unsigned long *new_groups;
 909        int err = 0;
 910
 911        netlink_table_grab();
 912
 913        groups = nl_table[sk->sk_protocol].groups;
 914        if (!nl_table[sk->sk_protocol].registered) {
 915                err = -ENOENT;
 916                goto out_unlock;
 917        }
 918
 919        if (nlk->ngroups >= groups)
 920                goto out_unlock;
 921
 922        new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC);
 923        if (new_groups == NULL) {
 924                err = -ENOMEM;
 925                goto out_unlock;
 926        }
 927        memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0,
 928               NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups));
 929
 930        nlk->groups = new_groups;
 931        nlk->ngroups = groups;
 932 out_unlock:
 933        netlink_table_ungrab();
 934        return err;
 935}
 936
 937static void netlink_undo_bind(int group, long unsigned int groups,
 938                              struct sock *sk)
 939{
 940        struct netlink_sock *nlk = nlk_sk(sk);
 941        int undo;
 942
 943        if (!nlk->netlink_unbind)
 944                return;
 945
 946        for (undo = 0; undo < group; undo++)
 947                if (test_bit(undo, &groups))
 948                        nlk->netlink_unbind(sock_net(sk), undo + 1);
 949}
 950
 951static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 952                        int addr_len)
 953{
 954        struct sock *sk = sock->sk;
 955        struct net *net = sock_net(sk);
 956        struct netlink_sock *nlk = nlk_sk(sk);
 957        struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
 958        int err;
 959        long unsigned int groups = nladdr->nl_groups;
 960        bool bound;
 961
 962        if (addr_len < sizeof(struct sockaddr_nl))
 963                return -EINVAL;
 964
 965        if (nladdr->nl_family != AF_NETLINK)
 966                return -EINVAL;
 967
 968        /* Only superuser is allowed to listen multicasts */
 969        if (groups) {
 970                if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV))
 971                        return -EPERM;
 972                err = netlink_realloc_groups(sk);
 973                if (err)
 974                        return err;
 975        }
 976
 977        bound = nlk->bound;
 978        if (bound) {
 979                /* Ensure nlk->portid is up-to-date. */
 980                smp_rmb();
 981
 982                if (nladdr->nl_pid != nlk->portid)
 983                        return -EINVAL;
 984        }
 985
 986        if (nlk->netlink_bind && groups) {
 987                int group;
 988
 989                for (group = 0; group < nlk->ngroups; group++) {
 990                        if (!test_bit(group, &groups))
 991                                continue;
 992                        err = nlk->netlink_bind(net, group + 1);
 993                        if (!err)
 994                                continue;
 995                        netlink_undo_bind(group, groups, sk);
 996                        return err;
 997                }
 998        }
 999
1000        /* No need for barriers here as we return to user-space without
1001         * using any of the bound attributes.
1002         */
1003        if (!bound) {
1004                err = nladdr->nl_pid ?
1005                        netlink_insert(sk, nladdr->nl_pid) :
1006                        netlink_autobind(sock);
1007                if (err) {
1008                        netlink_undo_bind(nlk->ngroups, groups, sk);
1009                        return err;
1010                }
1011        }
1012
1013        if (!groups && (nlk->groups == NULL || !(u32)nlk->groups[0]))
1014                return 0;
1015
1016        netlink_table_grab();
1017        netlink_update_subscriptions(sk, nlk->subscriptions +
1018                                         hweight32(groups) -
1019                                         hweight32(nlk->groups[0]));
1020        nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | groups;
1021        netlink_update_listeners(sk);
1022        netlink_table_ungrab();
1023
1024        return 0;
1025}
1026
1027static int netlink_connect(struct socket *sock, struct sockaddr *addr,
1028                           int alen, int flags)
1029{
1030        int err = 0;
1031        struct sock *sk = sock->sk;
1032        struct netlink_sock *nlk = nlk_sk(sk);
1033        struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
1034
1035        if (alen < sizeof(addr->sa_family))
1036                return -EINVAL;
1037
1038        if (addr->sa_family == AF_UNSPEC) {
1039                sk->sk_state    = NETLINK_UNCONNECTED;
1040                nlk->dst_portid = 0;
1041                nlk->dst_group  = 0;
1042                return 0;
1043        }
1044        if (addr->sa_family != AF_NETLINK)
1045                return -EINVAL;
1046
1047        if ((nladdr->nl_groups || nladdr->nl_pid) &&
1048            !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND))
1049                return -EPERM;
1050
1051        /* No need for barriers here as we return to user-space without
1052         * using any of the bound attributes.
1053         */
1054        if (!nlk->bound)
1055                err = netlink_autobind(sock);
1056
1057        if (err == 0) {
1058                sk->sk_state    = NETLINK_CONNECTED;
1059                nlk->dst_portid = nladdr->nl_pid;
1060                nlk->dst_group  = ffs(nladdr->nl_groups);
1061        }
1062
1063        return err;
1064}
1065
1066static int netlink_getname(struct socket *sock, struct sockaddr *addr,
1067                           int *addr_len, int peer)
1068{
1069        struct sock *sk = sock->sk;
1070        struct netlink_sock *nlk = nlk_sk(sk);
1071        DECLARE_SOCKADDR(struct sockaddr_nl *, nladdr, addr);
1072
1073        nladdr->nl_family = AF_NETLINK;
1074        nladdr->nl_pad = 0;
1075        *addr_len = sizeof(*nladdr);
1076
1077        if (peer) {
1078                nladdr->nl_pid = nlk->dst_portid;
1079                nladdr->nl_groups = netlink_group_mask(nlk->dst_group);
1080        } else {
1081                nladdr->nl_pid = nlk->portid;
1082                nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
1083        }
1084        return 0;
1085}
1086
1087static int netlink_ioctl(struct socket *sock, unsigned int cmd,
1088                         unsigned long arg)
1089{
1090        /* try to hand this ioctl down to the NIC drivers.
1091         */
1092        return -ENOIOCTLCMD;
1093}
1094
1095static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid)
1096{
1097        struct sock *sock;
1098        struct netlink_sock *nlk;
1099
1100        sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, portid);
1101        if (!sock)
1102                return ERR_PTR(-ECONNREFUSED);
1103
1104        /* Don't bother queuing skb if kernel socket has no input function */
1105        nlk = nlk_sk(sock);
1106        if (sock->sk_state == NETLINK_CONNECTED &&
1107            nlk->dst_portid != nlk_sk(ssk)->portid) {
1108                sock_put(sock);
1109                return ERR_PTR(-ECONNREFUSED);
1110        }
1111        return sock;
1112}
1113
1114struct sock *netlink_getsockbyfilp(struct file *filp)
1115{
1116        struct inode *inode = file_inode(filp);
1117        struct sock *sock;
1118
1119        if (!S_ISSOCK(inode->i_mode))
1120                return ERR_PTR(-ENOTSOCK);
1121
1122        sock = SOCKET_I(inode)->sk;
1123        if (sock->sk_family != AF_NETLINK)
1124                return ERR_PTR(-EINVAL);
1125
1126        sock_hold(sock);
1127        return sock;
1128}
1129
1130static struct sk_buff *netlink_alloc_large_skb(unsigned int size,
1131                                               int broadcast)
1132{
1133        struct sk_buff *skb;
1134        void *data;
1135
1136        if (size <= NLMSG_GOODSIZE || broadcast)
1137                return alloc_skb(size, GFP_KERNEL);
1138
1139        size = SKB_DATA_ALIGN(size) +
1140               SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1141
1142        data = vmalloc(size);
1143        if (data == NULL)
1144                return NULL;
1145
1146        skb = __build_skb(data, size);
1147        if (skb == NULL)
1148                vfree(data);
1149        else
1150                skb->destructor = netlink_skb_destructor;
1151
1152        return skb;
1153}
1154
1155/*
1156 * Attach a skb to a netlink socket.
1157 * The caller must hold a reference to the destination socket. On error, the
1158 * reference is dropped. The skb is not send to the destination, just all
1159 * all error checks are performed and memory in the queue is reserved.
1160 * Return values:
1161 * < 0: error. skb freed, reference to sock dropped.
1162 * 0: continue
1163 * 1: repeat lookup - reference dropped while waiting for socket memory.
1164 */
1165int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
1166                      long *timeo, struct sock *ssk)
1167{
1168        struct netlink_sock *nlk;
1169
1170        nlk = nlk_sk(sk);
1171
1172        if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
1173             test_bit(NETLINK_S_CONGESTED, &nlk->state))) {
1174                DECLARE_WAITQUEUE(wait, current);
1175                if (!*timeo) {
1176                        if (!ssk || netlink_is_kernel(ssk))
1177                                netlink_overrun(sk);
1178                        sock_put(sk);
1179                        kfree_skb(skb);
1180                        return -EAGAIN;
1181                }
1182
1183                __set_current_state(TASK_INTERRUPTIBLE);
1184                add_wait_queue(&nlk->wait, &wait);
1185
1186                if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
1187                     test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
1188                    !sock_flag(sk, SOCK_DEAD))
1189                        *timeo = schedule_timeout(*timeo);
1190
1191                __set_current_state(TASK_RUNNING);
1192                remove_wait_queue(&nlk->wait, &wait);
1193                sock_put(sk);
1194
1195                if (signal_pending(current)) {
1196                        kfree_skb(skb);
1197                        return sock_intr_errno(*timeo);
1198                }
1199                return 1;
1200        }
1201        netlink_skb_set_owner_r(skb, sk);
1202        return 0;
1203}
1204
1205static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
1206{
1207        int len = skb->len;
1208
1209        netlink_deliver_tap(skb);
1210
1211        skb_queue_tail(&sk->sk_receive_queue, skb);
1212        sk->sk_data_ready(sk);
1213        return len;
1214}
1215
1216int netlink_sendskb(struct sock *sk, struct sk_buff *skb)
1217{
1218        int len = __netlink_sendskb(sk, skb);
1219
1220        sock_put(sk);
1221        return len;
1222}
1223
1224void netlink_detachskb(struct sock *sk, struct sk_buff *skb)
1225{
1226        kfree_skb(skb);
1227        sock_put(sk);
1228}
1229
1230static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
1231{
1232        int delta;
1233
1234        WARN_ON(skb->sk != NULL);
1235        delta = skb->end - skb->tail;
1236        if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize)
1237                return skb;
1238
1239        if (skb_shared(skb)) {
1240                struct sk_buff *nskb = skb_clone(skb, allocation);
1241                if (!nskb)
1242                        return skb;
1243                consume_skb(skb);
1244                skb = nskb;
1245        }
1246
1247        pskb_expand_head(skb, 0, -delta,
1248                         (allocation & ~__GFP_DIRECT_RECLAIM) |
1249                         __GFP_NOWARN | __GFP_NORETRY);
1250        return skb;
1251}
1252
1253static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
1254                                  struct sock *ssk)
1255{
1256        int ret;
1257        struct netlink_sock *nlk = nlk_sk(sk);
1258
1259        ret = -ECONNREFUSED;
1260        if (nlk->netlink_rcv != NULL) {
1261                ret = skb->len;
1262                netlink_skb_set_owner_r(skb, sk);
1263                NETLINK_CB(skb).sk = ssk;
1264                netlink_deliver_tap_kernel(sk, ssk, skb);
1265                nlk->netlink_rcv(skb);
1266                consume_skb(skb);
1267        } else {
1268                kfree_skb(skb);
1269        }
1270        sock_put(sk);
1271        return ret;
1272}
1273
1274int netlink_unicast(struct sock *ssk, struct sk_buff *skb,
1275                    u32 portid, int nonblock)
1276{
1277        struct sock *sk;
1278        int err;
1279        long timeo;
1280
1281        skb = netlink_trim(skb, gfp_any());
1282
1283        timeo = sock_sndtimeo(ssk, nonblock);
1284retry:
1285        sk = netlink_getsockbyportid(ssk, portid);
1286        if (IS_ERR(sk)) {
1287                kfree_skb(skb);
1288                return PTR_ERR(sk);
1289        }
1290        if (netlink_is_kernel(sk))
1291                return netlink_unicast_kernel(sk, skb, ssk);
1292
1293        if (sk_filter(sk, skb)) {
1294                err = skb->len;
1295                kfree_skb(skb);
1296                sock_put(sk);
1297                return err;
1298        }
1299
1300        err = netlink_attachskb(sk, skb, &timeo, ssk);
1301        if (err == 1)
1302                goto retry;
1303        if (err)
1304                return err;
1305
1306        return netlink_sendskb(sk, skb);
1307}
1308EXPORT_SYMBOL(netlink_unicast);
1309
1310int netlink_has_listeners(struct sock *sk, unsigned int group)
1311{
1312        int res = 0;
1313        struct listeners *listeners;
1314
1315        BUG_ON(!netlink_is_kernel(sk));
1316
1317        rcu_read_lock();
1318        listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners);
1319
1320        if (listeners && group - 1 < nl_table[sk->sk_protocol].groups)
1321                res = test_bit(group - 1, listeners->masks);
1322
1323        rcu_read_unlock();
1324
1325        return res;
1326}
1327EXPORT_SYMBOL_GPL(netlink_has_listeners);
1328
1329static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
1330{
1331        struct netlink_sock *nlk = nlk_sk(sk);
1332
1333        if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
1334            !test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
1335                netlink_skb_set_owner_r(skb, sk);
1336                __netlink_sendskb(sk, skb);
1337                return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
1338        }
1339        return -1;
1340}
1341
1342struct netlink_broadcast_data {
1343        struct sock *exclude_sk;
1344        struct net *net;
1345        u32 portid;
1346        u32 group;
1347        int failure;
1348        int delivery_failure;
1349        int congested;
1350        int delivered;
1351        gfp_t allocation;
1352        struct sk_buff *skb, *skb2;
1353        int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data);
1354        void *tx_data;
1355};
1356
1357static void do_one_broadcast(struct sock *sk,
1358                                    struct netlink_broadcast_data *p)
1359{
1360        struct netlink_sock *nlk = nlk_sk(sk);
1361        int val;
1362
1363        if (p->exclude_sk == sk)
1364                return;
1365
1366        if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups ||
1367            !test_bit(p->group - 1, nlk->groups))
1368                return;
1369
1370        if (!net_eq(sock_net(sk), p->net)) {
1371                if (!(nlk->flags & NETLINK_F_LISTEN_ALL_NSID))
1372                        return;
1373
1374                if (!peernet_has_id(sock_net(sk), p->net))
1375                        return;
1376
1377                if (!file_ns_capable(sk->sk_socket->file, p->net->user_ns,
1378                                     CAP_NET_BROADCAST))
1379                        return;
1380        }
1381
1382        if (p->failure) {
1383                netlink_overrun(sk);
1384                return;
1385        }
1386
1387        sock_hold(sk);
1388        if (p->skb2 == NULL) {
1389                if (skb_shared(p->skb)) {
1390                        p->skb2 = skb_clone(p->skb, p->allocation);
1391                } else {
1392                        p->skb2 = skb_get(p->skb);
1393                        /*
1394                         * skb ownership may have been set when
1395                         * delivered to a previous socket.
1396                         */
1397                        skb_orphan(p->skb2);
1398                }
1399        }
1400        if (p->skb2 == NULL) {
1401                netlink_overrun(sk);
1402                /* Clone failed. Notify ALL listeners. */
1403                p->failure = 1;
1404                if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR)
1405                        p->delivery_failure = 1;
1406                goto out;
1407        }
1408        if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
1409                kfree_skb(p->skb2);
1410                p->skb2 = NULL;
1411                goto out;
1412        }
1413        if (sk_filter(sk, p->skb2)) {
1414                kfree_skb(p->skb2);
1415                p->skb2 = NULL;
1416                goto out;
1417        }
1418        NETLINK_CB(p->skb2).nsid = peernet2id(sock_net(sk), p->net);
1419        if (NETLINK_CB(p->skb2).nsid != NETNSA_NSID_NOT_ASSIGNED)
1420                NETLINK_CB(p->skb2).nsid_is_set = true;
1421        val = netlink_broadcast_deliver(sk, p->skb2);
1422        if (val < 0) {
1423                netlink_overrun(sk);
1424                if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR)
1425                        p->delivery_failure = 1;
1426        } else {
1427                p->congested |= val;
1428                p->delivered = 1;
1429                p->skb2 = NULL;
1430        }
1431out:
1432        sock_put(sk);
1433}
1434
1435int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid,
1436        u32 group, gfp_t allocation,
1437        int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
1438        void *filter_data)
1439{
1440        struct net *net = sock_net(ssk);
1441        struct netlink_broadcast_data info;
1442        struct sock *sk;
1443
1444        skb = netlink_trim(skb, allocation);
1445
1446        info.exclude_sk = ssk;
1447        info.net = net;
1448        info.portid = portid;
1449        info.group = group;
1450        info.failure = 0;
1451        info.delivery_failure = 0;
1452        info.congested = 0;
1453        info.delivered = 0;
1454        info.allocation = allocation;
1455        info.skb = skb;
1456        info.skb2 = NULL;
1457        info.tx_filter = filter;
1458        info.tx_data = filter_data;
1459
1460        /* While we sleep in clone, do not allow to change socket list */
1461
1462        netlink_lock_table();
1463
1464        sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list)
1465                do_one_broadcast(sk, &info);
1466
1467        consume_skb(skb);
1468
1469        netlink_unlock_table();
1470
1471        if (info.delivery_failure) {
1472                kfree_skb(info.skb2);
1473                return -ENOBUFS;
1474        }
1475        consume_skb(info.skb2);
1476
1477        if (info.delivered) {
1478                if (info.congested && gfpflags_allow_blocking(allocation))
1479                        yield();
1480                return 0;
1481        }
1482        return -ESRCH;
1483}
1484EXPORT_SYMBOL(netlink_broadcast_filtered);
1485
1486int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid,
1487                      u32 group, gfp_t allocation)
1488{
1489        return netlink_broadcast_filtered(ssk, skb, portid, group, allocation,
1490                NULL, NULL);
1491}
1492EXPORT_SYMBOL(netlink_broadcast);
1493
1494struct netlink_set_err_data {
1495        struct sock *exclude_sk;
1496        u32 portid;
1497        u32 group;
1498        int code;
1499};
1500
1501static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p)
1502{
1503        struct netlink_sock *nlk = nlk_sk(sk);
1504        int ret = 0;
1505
1506        if (sk == p->exclude_sk)
1507                goto out;
1508
1509        if (!net_eq(sock_net(sk), sock_net(p->exclude_sk)))
1510                goto out;
1511
1512        if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups ||
1513            !test_bit(p->group - 1, nlk->groups))
1514                goto out;
1515
1516        if (p->code == ENOBUFS && nlk->flags & NETLINK_F_RECV_NO_ENOBUFS) {
1517                ret = 1;
1518                goto out;
1519        }
1520
1521        sk->sk_err = p->code;
1522        sk->sk_error_report(sk);
1523out:
1524        return ret;
1525}
1526
1527/**
1528 * netlink_set_err - report error to broadcast listeners
1529 * @ssk: the kernel netlink socket, as returned by netlink_kernel_create()
1530 * @portid: the PORTID of a process that we want to skip (if any)
1531 * @group: the broadcast group that will notice the error
1532 * @code: error code, must be negative (as usual in kernelspace)
1533 *
1534 * This function returns the number of broadcast listeners that have set the
1535 * NETLINK_NO_ENOBUFS socket option.
1536 */
1537int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code)
1538{
1539        struct netlink_set_err_data info;
1540        struct sock *sk;
1541        int ret = 0;
1542
1543        info.exclude_sk = ssk;
1544        info.portid = portid;
1545        info.group = group;
1546        /* sk->sk_err wants a positive error value */
1547        info.code = -code;
1548
1549        read_lock(&nl_table_lock);
1550
1551        sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list)
1552                ret += do_one_set_err(sk, &info);
1553
1554        read_unlock(&nl_table_lock);
1555        return ret;
1556}
1557EXPORT_SYMBOL(netlink_set_err);
1558
1559/* must be called with netlink table grabbed */
1560static void netlink_update_socket_mc(struct netlink_sock *nlk,
1561                                     unsigned int group,
1562                                     int is_new)
1563{
1564        int old, new = !!is_new, subscriptions;
1565
1566        old = test_bit(group - 1, nlk->groups);
1567        subscriptions = nlk->subscriptions - old + new;
1568        if (new)
1569                __set_bit(group - 1, nlk->groups);
1570        else
1571                __clear_bit(group - 1, nlk->groups);
1572        netlink_update_subscriptions(&nlk->sk, subscriptions);
1573        netlink_update_listeners(&nlk->sk);
1574}
1575
1576static int netlink_setsockopt(struct socket *sock, int level, int optname,
1577                              char __user *optval, unsigned int optlen)
1578{
1579        struct sock *sk = sock->sk;
1580        struct netlink_sock *nlk = nlk_sk(sk);
1581        unsigned int val = 0;
1582        int err;
1583
1584        if (level != SOL_NETLINK)
1585                return -ENOPROTOOPT;
1586
1587        if (optlen >= sizeof(int) &&
1588            get_user(val, (unsigned int __user *)optval))
1589                return -EFAULT;
1590
1591        switch (optname) {
1592        case NETLINK_PKTINFO:
1593                if (val)
1594                        nlk->flags |= NETLINK_F_RECV_PKTINFO;
1595                else
1596                        nlk->flags &= ~NETLINK_F_RECV_PKTINFO;
1597                err = 0;
1598                break;
1599        case NETLINK_ADD_MEMBERSHIP:
1600        case NETLINK_DROP_MEMBERSHIP: {
1601                if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV))
1602                        return -EPERM;
1603                err = netlink_realloc_groups(sk);
1604                if (err)
1605                        return err;
1606                if (!val || val - 1 >= nlk->ngroups)
1607                        return -EINVAL;
1608                if (optname == NETLINK_ADD_MEMBERSHIP && nlk->netlink_bind) {
1609                        err = nlk->netlink_bind(sock_net(sk), val);
1610                        if (err)
1611                                return err;
1612                }
1613                netlink_table_grab();
1614                netlink_update_socket_mc(nlk, val,
1615                                         optname == NETLINK_ADD_MEMBERSHIP);
1616                netlink_table_ungrab();
1617                if (optname == NETLINK_DROP_MEMBERSHIP && nlk->netlink_unbind)
1618                        nlk->netlink_unbind(sock_net(sk), val);
1619
1620                err = 0;
1621                break;
1622        }
1623        case NETLINK_BROADCAST_ERROR:
1624                if (val)
1625                        nlk->flags |= NETLINK_F_BROADCAST_SEND_ERROR;
1626                else
1627                        nlk->flags &= ~NETLINK_F_BROADCAST_SEND_ERROR;
1628                err = 0;
1629                break;
1630        case NETLINK_NO_ENOBUFS:
1631                if (val) {
1632                        nlk->flags |= NETLINK_F_RECV_NO_ENOBUFS;
1633                        clear_bit(NETLINK_S_CONGESTED, &nlk->state);
1634                        wake_up_interruptible(&nlk->wait);
1635                } else {
1636                        nlk->flags &= ~NETLINK_F_RECV_NO_ENOBUFS;
1637                }
1638                err = 0;
1639                break;
1640        case NETLINK_LISTEN_ALL_NSID:
1641                if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST))
1642                        return -EPERM;
1643
1644                if (val)
1645                        nlk->flags |= NETLINK_F_LISTEN_ALL_NSID;
1646                else
1647                        nlk->flags &= ~NETLINK_F_LISTEN_ALL_NSID;
1648                err = 0;
1649                break;
1650        case NETLINK_CAP_ACK:
1651                if (val)
1652                        nlk->flags |= NETLINK_F_CAP_ACK;
1653                else
1654                        nlk->flags &= ~NETLINK_F_CAP_ACK;
1655                err = 0;
1656                break;
1657        case NETLINK_EXT_ACK:
1658                if (val)
1659                        nlk->flags |= NETLINK_F_EXT_ACK;
1660                else
1661                        nlk->flags &= ~NETLINK_F_EXT_ACK;
1662                err = 0;
1663                break;
1664        default:
1665                err = -ENOPROTOOPT;
1666        }
1667        return err;
1668}
1669
1670static int netlink_getsockopt(struct socket *sock, int level, int optname,
1671                              char __user *optval, int __user *optlen)
1672{
1673        struct sock *sk = sock->sk;
1674        struct netlink_sock *nlk = nlk_sk(sk);
1675        int len, val, err;
1676
1677        if (level != SOL_NETLINK)
1678                return -ENOPROTOOPT;
1679
1680        if (get_user(len, optlen))
1681                return -EFAULT;
1682        if (len < 0)
1683                return -EINVAL;
1684
1685        switch (optname) {
1686        case NETLINK_PKTINFO:
1687                if (len < sizeof(int))
1688                        return -EINVAL;
1689                len = sizeof(int);
1690                val = nlk->flags & NETLINK_F_RECV_PKTINFO ? 1 : 0;
1691                if (put_user(len, optlen) ||
1692                    put_user(val, optval))
1693                        return -EFAULT;
1694                err = 0;
1695                break;
1696        case NETLINK_BROADCAST_ERROR:
1697                if (len < sizeof(int))
1698                        return -EINVAL;
1699                len = sizeof(int);
1700                val = nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR ? 1 : 0;
1701                if (put_user(len, optlen) ||
1702                    put_user(val, optval))
1703                        return -EFAULT;
1704                err = 0;
1705                break;
1706        case NETLINK_NO_ENOBUFS:
1707                if (len < sizeof(int))
1708                        return -EINVAL;
1709                len = sizeof(int);
1710                val = nlk->flags & NETLINK_F_RECV_NO_ENOBUFS ? 1 : 0;
1711                if (put_user(len, optlen) ||
1712                    put_user(val, optval))
1713                        return -EFAULT;
1714                err = 0;
1715                break;
1716        case NETLINK_LIST_MEMBERSHIPS: {
1717                int pos, idx, shift;
1718
1719                err = 0;
1720                netlink_lock_table();
1721                for (pos = 0; pos * 8 < nlk->ngroups; pos += sizeof(u32)) {
1722                        if (len - pos < sizeof(u32))
1723                                break;
1724
1725                        idx = pos / sizeof(unsigned long);
1726                        shift = (pos % sizeof(unsigned long)) * 8;
1727                        if (put_user((u32)(nlk->groups[idx] >> shift),
1728                                     (u32 __user *)(optval + pos))) {
1729                                err = -EFAULT;
1730                                break;
1731                        }
1732                }
1733                if (put_user(ALIGN(nlk->ngroups / 8, sizeof(u32)), optlen))
1734                        err = -EFAULT;
1735                netlink_unlock_table();
1736                break;
1737        }
1738        case NETLINK_CAP_ACK:
1739                if (len < sizeof(int))
1740                        return -EINVAL;
1741                len = sizeof(int);
1742                val = nlk->flags & NETLINK_F_CAP_ACK ? 1 : 0;
1743                if (put_user(len, optlen) ||
1744                    put_user(val, optval))
1745                        return -EFAULT;
1746                err = 0;
1747                break;
1748        case NETLINK_EXT_ACK:
1749                if (len < sizeof(int))
1750                        return -EINVAL;
1751                len = sizeof(int);
1752                val = nlk->flags & NETLINK_F_EXT_ACK ? 1 : 0;
1753                if (put_user(len, optlen) || put_user(val, optval))
1754                        return -EFAULT;
1755                err = 0;
1756                break;
1757        default:
1758                err = -ENOPROTOOPT;
1759        }
1760        return err;
1761}
1762
1763static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
1764{
1765        struct nl_pktinfo info;
1766
1767        info.group = NETLINK_CB(skb).dst_group;
1768        put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info);
1769}
1770
1771static void netlink_cmsg_listen_all_nsid(struct sock *sk, struct msghdr *msg,
1772                                         struct sk_buff *skb)
1773{
1774        if (!NETLINK_CB(skb).nsid_is_set)
1775                return;
1776
1777        put_cmsg(msg, SOL_NETLINK, NETLINK_LISTEN_ALL_NSID, sizeof(int),
1778                 &NETLINK_CB(skb).nsid);
1779}
1780
1781static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
1782{
1783        struct sock *sk = sock->sk;
1784        struct netlink_sock *nlk = nlk_sk(sk);
1785        DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name);
1786        u32 dst_portid;
1787        u32 dst_group;
1788        struct sk_buff *skb;
1789        int err;
1790        struct scm_cookie scm;
1791        u32 netlink_skb_flags = 0;
1792
1793        if (msg->msg_flags&MSG_OOB)
1794                return -EOPNOTSUPP;
1795
1796        err = scm_send(sock, msg, &scm, true);
1797        if (err < 0)
1798                return err;
1799
1800        if (msg->msg_namelen) {
1801                err = -EINVAL;
1802                if (addr->nl_family != AF_NETLINK)
1803                        goto out;
1804                dst_portid = addr->nl_pid;
1805                dst_group = ffs(addr->nl_groups);
1806                err =  -EPERM;
1807                if ((dst_group || dst_portid) &&
1808                    !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND))
1809                        goto out;
1810                netlink_skb_flags |= NETLINK_SKB_DST;
1811        } else {
1812                dst_portid = nlk->dst_portid;
1813                dst_group = nlk->dst_group;
1814        }
1815
1816        if (!nlk->bound) {
1817                err = netlink_autobind(sock);
1818                if (err)
1819                        goto out;
1820        } else {
1821                /* Ensure nlk is hashed and visible. */
1822                smp_rmb();
1823        }
1824
1825        err = -EMSGSIZE;
1826        if (len > sk->sk_sndbuf - 32)
1827                goto out;
1828        err = -ENOBUFS;
1829        skb = netlink_alloc_large_skb(len, dst_group);
1830        if (skb == NULL)
1831                goto out;
1832
1833        NETLINK_CB(skb).portid  = nlk->portid;
1834        NETLINK_CB(skb).dst_group = dst_group;
1835        NETLINK_CB(skb).creds   = scm.creds;
1836        NETLINK_CB(skb).flags   = netlink_skb_flags;
1837
1838        err = -EFAULT;
1839        if (memcpy_from_msg(skb_put(skb, len), msg, len)) {
1840                kfree_skb(skb);
1841                goto out;
1842        }
1843
1844        err = security_netlink_send(sk, skb);
1845        if (err) {
1846                kfree_skb(skb);
1847                goto out;
1848        }
1849
1850        if (dst_group) {
1851                refcount_inc(&skb->users);
1852                netlink_broadcast(sk, skb, dst_portid, dst_group, GFP_KERNEL);
1853        }
1854        err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags&MSG_DONTWAIT);
1855
1856out:
1857        scm_destroy(&scm);
1858        return err;
1859}
1860
1861static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
1862                           int flags)
1863{
1864        struct scm_cookie scm;
1865        struct sock *sk = sock->sk;
1866        struct netlink_sock *nlk = nlk_sk(sk);
1867        int noblock = flags&MSG_DONTWAIT;
1868        size_t copied;
1869        struct sk_buff *skb, *data_skb;
1870        int err, ret;
1871
1872        if (flags&MSG_OOB)
1873                return -EOPNOTSUPP;
1874
1875        copied = 0;
1876
1877        skb = skb_recv_datagram(sk, flags, noblock, &err);
1878        if (skb == NULL)
1879                goto out;
1880
1881        data_skb = skb;
1882
1883#ifdef CONFIG_COMPAT_NETLINK_MESSAGES
1884        if (unlikely(skb_shinfo(skb)->frag_list)) {
1885                /*
1886                 * If this skb has a frag_list, then here that means that we
1887                 * will have to use the frag_list skb's data for compat tasks
1888                 * and the regular skb's data for normal (non-compat) tasks.
1889                 *
1890                 * If we need to send the compat skb, assign it to the
1891                 * 'data_skb' variable so that it will be used below for data
1892                 * copying. We keep 'skb' for everything else, including
1893                 * freeing both later.
1894                 */
1895                if (flags & MSG_CMSG_COMPAT)
1896                        data_skb = skb_shinfo(skb)->frag_list;
1897        }
1898#endif
1899
1900        /* Record the max length of recvmsg() calls for future allocations */
1901        nlk->max_recvmsg_len = max(nlk->max_recvmsg_len, len);
1902        nlk->max_recvmsg_len = min_t(size_t, nlk->max_recvmsg_len,
1903                                     SKB_WITH_OVERHEAD(32768));
1904
1905        copied = data_skb->len;
1906        if (len < copied) {
1907                msg->msg_flags |= MSG_TRUNC;
1908                copied = len;
1909        }
1910
1911        skb_reset_transport_header(data_skb);
1912        err = skb_copy_datagram_msg(data_skb, 0, msg, copied);
1913
1914        if (msg->msg_name) {
1915                DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name);
1916                addr->nl_family = AF_NETLINK;
1917                addr->nl_pad    = 0;
1918                addr->nl_pid    = NETLINK_CB(skb).portid;
1919                addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group);
1920                msg->msg_namelen = sizeof(*addr);
1921        }
1922
1923        if (nlk->flags & NETLINK_F_RECV_PKTINFO)
1924                netlink_cmsg_recv_pktinfo(msg, skb);
1925        if (nlk->flags & NETLINK_F_LISTEN_ALL_NSID)
1926                netlink_cmsg_listen_all_nsid(sk, msg, skb);
1927
1928        memset(&scm, 0, sizeof(scm));
1929        scm.creds = *NETLINK_CREDS(skb);
1930        if (flags & MSG_TRUNC)
1931                copied = data_skb->len;
1932
1933        skb_free_datagram(sk, skb);
1934
1935        if (nlk->cb_running &&
1936            atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
1937                ret = netlink_dump(sk);
1938                if (ret) {
1939                        sk->sk_err = -ret;
1940                        sk->sk_error_report(sk);
1941                }
1942        }
1943
1944        scm_recv(sock, msg, &scm, flags);
1945out:
1946        netlink_rcv_wake(sk);
1947        return err ? : copied;
1948}
1949
1950static void netlink_data_ready(struct sock *sk)
1951{
1952        BUG();
1953}
1954
1955/*
1956 *      We export these functions to other modules. They provide a
1957 *      complete set of kernel non-blocking support for message
1958 *      queueing.
1959 */
1960
1961struct sock *
1962__netlink_kernel_create(struct net *net, int unit, struct module *module,
1963                        struct netlink_kernel_cfg *cfg)
1964{
1965        struct socket *sock;
1966        struct sock *sk;
1967        struct netlink_sock *nlk;
1968        struct listeners *listeners = NULL;
1969        struct mutex *cb_mutex = cfg ? cfg->cb_mutex : NULL;
1970        unsigned int groups;
1971
1972        BUG_ON(!nl_table);
1973
1974        if (unit < 0 || unit >= MAX_LINKS)
1975                return NULL;
1976
1977        if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
1978                return NULL;
1979
1980        if (__netlink_create(net, sock, cb_mutex, unit, 1) < 0)
1981                goto out_sock_release_nosk;
1982
1983        sk = sock->sk;
1984
1985        if (!cfg || cfg->groups < 32)
1986                groups = 32;
1987        else
1988                groups = cfg->groups;
1989
1990        listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
1991        if (!listeners)
1992                goto out_sock_release;
1993
1994        sk->sk_data_ready = netlink_data_ready;
1995        if (cfg && cfg->input)
1996                nlk_sk(sk)->netlink_rcv = cfg->input;
1997
1998        if (netlink_insert(sk, 0))
1999                goto out_sock_release;
2000
2001        nlk = nlk_sk(sk);
2002        nlk->flags |= NETLINK_F_KERNEL_SOCKET;
2003
2004        netlink_table_grab();
2005        if (!nl_table[unit].registered) {
2006                nl_table[unit].groups = groups;
2007                rcu_assign_pointer(nl_table[unit].listeners, listeners);
2008                nl_table[unit].cb_mutex = cb_mutex;
2009                nl_table[unit].module = module;
2010                if (cfg) {
2011                        nl_table[unit].bind = cfg->bind;
2012                        nl_table[unit].unbind = cfg->unbind;
2013                        nl_table[unit].flags = cfg->flags;
2014                        if (cfg->compare)
2015                                nl_table[unit].compare = cfg->compare;
2016                }
2017                nl_table[unit].registered = 1;
2018        } else {
2019                kfree(listeners);
2020                nl_table[unit].registered++;
2021        }
2022        netlink_table_ungrab();
2023        return sk;
2024
2025out_sock_release:
2026        kfree(listeners);
2027        netlink_kernel_release(sk);
2028        return NULL;
2029
2030out_sock_release_nosk:
2031        sock_release(sock);
2032        return NULL;
2033}
2034EXPORT_SYMBOL(__netlink_kernel_create);
2035
2036void
2037netlink_kernel_release(struct sock *sk)
2038{
2039        if (sk == NULL || sk->sk_socket == NULL)
2040                return;
2041
2042        sock_release(sk->sk_socket);
2043}
2044EXPORT_SYMBOL(netlink_kernel_release);
2045
2046int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
2047{
2048        struct listeners *new, *old;
2049        struct netlink_table *tbl = &nl_table[sk->sk_protocol];
2050
2051        if (groups < 32)
2052                groups = 32;
2053
2054        if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) {
2055                new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC);
2056                if (!new)
2057                        return -ENOMEM;
2058                old = nl_deref_protected(tbl->listeners);
2059                memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups));
2060                rcu_assign_pointer(tbl->listeners, new);
2061
2062                kfree_rcu(old, rcu);
2063        }
2064        tbl->groups = groups;
2065
2066        return 0;
2067}
2068
2069/**
2070 * netlink_change_ngroups - change number of multicast groups
2071 *
2072 * This changes the number of multicast groups that are available
2073 * on a certain netlink family. Note that it is not possible to
2074 * change the number of groups to below 32. Also note that it does
2075 * not implicitly call netlink_clear_multicast_users() when the
2076 * number of groups is reduced.
2077 *
2078 * @sk: The kernel netlink socket, as returned by netlink_kernel_create().
2079 * @groups: The new number of groups.
2080 */
2081int netlink_change_ngroups(struct sock *sk, unsigned int groups)
2082{
2083        int err;
2084
2085        netlink_table_grab();
2086        err = __netlink_change_ngroups(sk, groups);
2087        netlink_table_ungrab();
2088
2089        return err;
2090}
2091
2092void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
2093{
2094        struct sock *sk;
2095        struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
2096
2097        sk_for_each_bound(sk, &tbl->mc_list)
2098                netlink_update_socket_mc(nlk_sk(sk), group, 0);
2099}
2100
2101struct nlmsghdr *
2102__nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags)
2103{
2104        struct nlmsghdr *nlh;
2105        int size = nlmsg_msg_size(len);
2106
2107        nlh = skb_put(skb, NLMSG_ALIGN(size));
2108        nlh->nlmsg_type = type;
2109        nlh->nlmsg_len = size;
2110        nlh->nlmsg_flags = flags;
2111        nlh->nlmsg_pid = portid;
2112        nlh->nlmsg_seq = seq;
2113        if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0)
2114                memset(nlmsg_data(nlh) + len, 0, NLMSG_ALIGN(size) - size);
2115        return nlh;
2116}
2117EXPORT_SYMBOL(__nlmsg_put);
2118
2119/*
2120 * It looks a bit ugly.
2121 * It would be better to create kernel thread.
2122 */
2123
2124static int netlink_dump(struct sock *sk)
2125{
2126        struct netlink_sock *nlk = nlk_sk(sk);
2127        struct netlink_callback *cb;
2128        struct sk_buff *skb = NULL;
2129        struct nlmsghdr *nlh;
2130        struct module *module;
2131        int len, err = -ENOBUFS;
2132        int alloc_min_size;
2133        int alloc_size;
2134
2135        mutex_lock(nlk->cb_mutex);
2136        if (!nlk->cb_running) {
2137                err = -EINVAL;
2138                goto errout_skb;
2139        }
2140
2141        if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
2142                goto errout_skb;
2143
2144        /* NLMSG_GOODSIZE is small to avoid high order allocations being
2145         * required, but it makes sense to _attempt_ a 16K bytes allocation
2146         * to reduce number of system calls on dump operations, if user
2147         * ever provided a big enough buffer.
2148         */
2149        cb = &nlk->cb;
2150        alloc_min_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE);
2151
2152        if (alloc_min_size < nlk->max_recvmsg_len) {
2153                alloc_size = nlk->max_recvmsg_len;
2154                skb = alloc_skb(alloc_size,
2155                                (GFP_KERNEL & ~__GFP_DIRECT_RECLAIM) |
2156                                __GFP_NOWARN | __GFP_NORETRY);
2157        }
2158        if (!skb) {
2159                alloc_size = alloc_min_size;
2160                skb = alloc_skb(alloc_size, GFP_KERNEL);
2161        }
2162        if (!skb)
2163                goto errout_skb;
2164
2165        /* Trim skb to allocated size. User is expected to provide buffer as
2166         * large as max(min_dump_alloc, 16KiB (mac_recvmsg_len capped at
2167         * netlink_recvmsg())). dump will pack as many smaller messages as
2168         * could fit within the allocated skb. skb is typically allocated
2169         * with larger space than required (could be as much as near 2x the
2170         * requested size with align to next power of 2 approach). Allowing
2171         * dump to use the excess space makes it difficult for a user to have a
2172         * reasonable static buffer based on the expected largest dump of a
2173         * single netdev. The outcome is MSG_TRUNC error.
2174         */
2175        skb_reserve(skb, skb_tailroom(skb) - alloc_size);
2176        netlink_skb_set_owner_r(skb, sk);
2177
2178        len = cb->dump(skb, cb);
2179
2180        if (len > 0) {
2181                mutex_unlock(nlk->cb_mutex);
2182
2183                if (sk_filter(sk, skb))
2184                        kfree_skb(skb);
2185                else
2186                        __netlink_sendskb(sk, skb);
2187                return 0;
2188        }
2189
2190        nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
2191        if (!nlh)
2192                goto errout_skb;
2193
2194        nl_dump_check_consistent(cb, nlh);
2195
2196        memcpy(nlmsg_data(nlh), &len, sizeof(len));
2197
2198        if (sk_filter(sk, skb))
2199                kfree_skb(skb);
2200        else
2201                __netlink_sendskb(sk, skb);
2202
2203        if (cb->done)
2204                cb->done(cb);
2205
2206        nlk->cb_running = false;
2207        module = cb->module;
2208        skb = cb->skb;
2209        mutex_unlock(nlk->cb_mutex);
2210        module_put(module);
2211        consume_skb(skb);
2212        return 0;
2213
2214errout_skb:
2215        mutex_unlock(nlk->cb_mutex);
2216        kfree_skb(skb);
2217        return err;
2218}
2219
2220int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
2221                         const struct nlmsghdr *nlh,
2222                         struct netlink_dump_control *control)
2223{
2224        struct netlink_callback *cb;
2225        struct sock *sk;
2226        struct netlink_sock *nlk;
2227        int ret;
2228
2229        refcount_inc(&skb->users);
2230
2231        sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid);
2232        if (sk == NULL) {
2233                ret = -ECONNREFUSED;
2234                goto error_free;
2235        }
2236
2237        nlk = nlk_sk(sk);
2238        mutex_lock(nlk->cb_mutex);
2239        /* A dump is in progress... */
2240        if (nlk->cb_running) {
2241                ret = -EBUSY;
2242                goto error_unlock;
2243        }
2244        /* add reference of module which cb->dump belongs to */
2245        if (!try_module_get(control->module)) {
2246                ret = -EPROTONOSUPPORT;
2247                goto error_unlock;
2248        }
2249
2250        cb = &nlk->cb;
2251        memset(cb, 0, sizeof(*cb));
2252        cb->start = control->start;
2253        cb->dump = control->dump;
2254        cb->done = control->done;
2255        cb->nlh = nlh;
2256        cb->data = control->data;
2257        cb->module = control->module;
2258        cb->min_dump_alloc = control->min_dump_alloc;
2259        cb->skb = skb;
2260
2261        nlk->cb_running = true;
2262
2263        mutex_unlock(nlk->cb_mutex);
2264
2265        if (cb->start)
2266                cb->start(cb);
2267
2268        ret = netlink_dump(sk);
2269        sock_put(sk);
2270
2271        if (ret)
2272                return ret;
2273
2274        /* We successfully started a dump, by returning -EINTR we
2275         * signal not to send ACK even if it was requested.
2276         */
2277        return -EINTR;
2278
2279error_unlock:
2280        sock_put(sk);
2281        mutex_unlock(nlk->cb_mutex);
2282error_free:
2283        kfree_skb(skb);
2284        return ret;
2285}
2286EXPORT_SYMBOL(__netlink_dump_start);
2287
2288void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
2289                 const struct netlink_ext_ack *extack)
2290{
2291        struct sk_buff *skb;
2292        struct nlmsghdr *rep;
2293        struct nlmsgerr *errmsg;
2294        size_t payload = sizeof(*errmsg);
2295        size_t tlvlen = 0;
2296        struct netlink_sock *nlk = nlk_sk(NETLINK_CB(in_skb).sk);
2297        unsigned int flags = 0;
2298
2299        /* Error messages get the original request appened, unless the user
2300         * requests to cap the error message, and get extra error data if
2301         * requested.
2302         */
2303        if (err) {
2304                if (!(nlk->flags & NETLINK_F_CAP_ACK))
2305                        payload += nlmsg_len(nlh);
2306                else
2307                        flags |= NLM_F_CAPPED;
2308                if (nlk->flags & NETLINK_F_EXT_ACK && extack) {
2309                        if (extack->_msg)
2310                                tlvlen += nla_total_size(strlen(extack->_msg) + 1);
2311                        if (extack->bad_attr)
2312                                tlvlen += nla_total_size(sizeof(u32));
2313                }
2314        } else {
2315                flags |= NLM_F_CAPPED;
2316
2317                if (nlk->flags & NETLINK_F_EXT_ACK &&
2318                    extack && extack->cookie_len)
2319                        tlvlen += nla_total_size(extack->cookie_len);
2320        }
2321
2322        if (tlvlen)
2323                flags |= NLM_F_ACK_TLVS;
2324
2325        skb = nlmsg_new(payload + tlvlen, GFP_KERNEL);
2326        if (!skb) {
2327                struct sock *sk;
2328
2329                sk = netlink_lookup(sock_net(in_skb->sk),
2330                                    in_skb->sk->sk_protocol,
2331                                    NETLINK_CB(in_skb).portid);
2332                if (sk) {
2333                        sk->sk_err = ENOBUFS;
2334                        sk->sk_error_report(sk);
2335                        sock_put(sk);
2336                }
2337                return;
2338        }
2339
2340        rep = __nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
2341                          NLMSG_ERROR, payload, flags);
2342        errmsg = nlmsg_data(rep);
2343        errmsg->error = err;
2344        memcpy(&errmsg->msg, nlh, payload > sizeof(*errmsg) ? nlh->nlmsg_len : sizeof(*nlh));
2345
2346        if (nlk->flags & NETLINK_F_EXT_ACK && extack) {
2347                if (err) {
2348                        if (extack->_msg)
2349                                WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG,
2350                                                       extack->_msg));
2351                        if (extack->bad_attr &&
2352                            !WARN_ON((u8 *)extack->bad_attr < in_skb->data ||
2353                                     (u8 *)extack->bad_attr >= in_skb->data +
2354                                                               in_skb->len))
2355                                WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS,
2356                                                    (u8 *)extack->bad_attr -
2357                                                    in_skb->data));
2358                } else {
2359                        if (extack->cookie_len)
2360                                WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE,
2361                                                extack->cookie_len,
2362                                                extack->cookie));
2363                }
2364        }
2365
2366        nlmsg_end(skb, rep);
2367
2368        netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid, MSG_DONTWAIT);
2369}
2370EXPORT_SYMBOL(netlink_ack);
2371
2372int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
2373                                                   struct nlmsghdr *,
2374                                                   struct netlink_ext_ack *))
2375{
2376        struct netlink_ext_ack extack = {};
2377        struct nlmsghdr *nlh;
2378        int err;
2379
2380        while (skb->len >= nlmsg_total_size(0)) {
2381                int msglen;
2382
2383                nlh = nlmsg_hdr(skb);
2384                err = 0;
2385
2386                if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
2387                        return 0;
2388
2389                /* Only requests are handled by the kernel */
2390                if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
2391                        goto ack;
2392
2393                /* Skip control messages */
2394                if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
2395                        goto ack;
2396
2397                err = cb(skb, nlh, &extack);
2398                if (err == -EINTR)
2399                        goto skip;
2400
2401ack:
2402                if (nlh->nlmsg_flags & NLM_F_ACK || err)
2403                        netlink_ack(skb, nlh, err, &extack);
2404
2405skip:
2406                msglen = NLMSG_ALIGN(nlh->nlmsg_len);
2407                if (msglen > skb->len)
2408                        msglen = skb->len;
2409                skb_pull(skb, msglen);
2410        }
2411
2412        return 0;
2413}
2414EXPORT_SYMBOL(netlink_rcv_skb);
2415
2416/**
2417 * nlmsg_notify - send a notification netlink message
2418 * @sk: netlink socket to use
2419 * @skb: notification message
2420 * @portid: destination netlink portid for reports or 0
2421 * @group: destination multicast group or 0
2422 * @report: 1 to report back, 0 to disable
2423 * @flags: allocation flags
2424 */
2425int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid,
2426                 unsigned int group, int report, gfp_t flags)
2427{
2428        int err = 0;
2429
2430        if (group) {
2431                int exclude_portid = 0;
2432
2433                if (report) {
2434                        refcount_inc(&skb->users);
2435                        exclude_portid = portid;
2436                }
2437
2438                /* errors reported via destination sk->sk_err, but propagate
2439                 * delivery errors if NETLINK_BROADCAST_ERROR flag is set */
2440                err = nlmsg_multicast(sk, skb, exclude_portid, group, flags);
2441        }
2442
2443        if (report) {
2444                int err2;
2445
2446                err2 = nlmsg_unicast(sk, skb, portid);
2447                if (!err || err == -ESRCH)
2448                        err = err2;
2449        }
2450
2451        return err;
2452}
2453EXPORT_SYMBOL(nlmsg_notify);
2454
2455#ifdef CONFIG_PROC_FS
2456struct nl_seq_iter {
2457        struct seq_net_private p;
2458        struct rhashtable_iter hti;
2459        int link;
2460};
2461
2462static int netlink_walk_start(struct nl_seq_iter *iter)
2463{
2464        int err;
2465
2466        err = rhashtable_walk_init(&nl_table[iter->link].hash, &iter->hti,
2467                                   GFP_KERNEL);
2468        if (err) {
2469                iter->link = MAX_LINKS;
2470                return err;
2471        }
2472
2473        err = rhashtable_walk_start(&iter->hti);
2474        return err == -EAGAIN ? 0 : err;
2475}
2476
2477static void netlink_walk_stop(struct nl_seq_iter *iter)
2478{
2479        rhashtable_walk_stop(&iter->hti);
2480        rhashtable_walk_exit(&iter->hti);
2481}
2482
2483static void *__netlink_seq_next(struct seq_file *seq)
2484{
2485        struct nl_seq_iter *iter = seq->private;
2486        struct netlink_sock *nlk;
2487
2488        do {
2489                for (;;) {
2490                        int err;
2491
2492                        nlk = rhashtable_walk_next(&iter->hti);
2493
2494                        if (IS_ERR(nlk)) {
2495                                if (PTR_ERR(nlk) == -EAGAIN)
2496                                        continue;
2497
2498                                return nlk;
2499                        }
2500
2501                        if (nlk)
2502                                break;
2503
2504                        netlink_walk_stop(iter);
2505                        if (++iter->link >= MAX_LINKS)
2506                                return NULL;
2507
2508                        err = netlink_walk_start(iter);
2509                        if (err)
2510                                return ERR_PTR(err);
2511                }
2512        } while (sock_net(&nlk->sk) != seq_file_net(seq));
2513
2514        return nlk;
2515}
2516
2517static void *netlink_seq_start(struct seq_file *seq, loff_t *posp)
2518{
2519        struct nl_seq_iter *iter = seq->private;
2520        void *obj = SEQ_START_TOKEN;
2521        loff_t pos;
2522        int err;
2523
2524        iter->link = 0;
2525
2526        err = netlink_walk_start(iter);
2527        if (err)
2528                return ERR_PTR(err);
2529
2530        for (pos = *posp; pos && obj && !IS_ERR(obj); pos--)
2531                obj = __netlink_seq_next(seq);
2532
2533        return obj;
2534}
2535
2536static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2537{
2538        ++*pos;
2539        return __netlink_seq_next(seq);
2540}
2541
2542static void netlink_seq_stop(struct seq_file *seq, void *v)
2543{
2544        struct nl_seq_iter *iter = seq->private;
2545
2546        if (iter->link >= MAX_LINKS)
2547                return;
2548
2549        netlink_walk_stop(iter);
2550}
2551
2552
2553static int netlink_seq_show(struct seq_file *seq, void *v)
2554{
2555        if (v == SEQ_START_TOKEN) {
2556                seq_puts(seq,
2557                         "sk       Eth Pid    Groups   "
2558                         "Rmem     Wmem     Dump     Locks     Drops     Inode\n");
2559        } else {
2560                struct sock *s = v;
2561                struct netlink_sock *nlk = nlk_sk(s);
2562
2563                seq_printf(seq, "%pK %-3d %-6u %08x %-8d %-8d %d %-8d %-8d %-8lu\n",
2564                           s,
2565                           s->sk_protocol,
2566                           nlk->portid,
2567                           nlk->groups ? (u32)nlk->groups[0] : 0,
2568                           sk_rmem_alloc_get(s),
2569                           sk_wmem_alloc_get(s),
2570                           nlk->cb_running,
2571                           refcount_read(&s->sk_refcnt),
2572                           atomic_read(&s->sk_drops),
2573                           sock_i_ino(s)
2574                        );
2575
2576        }
2577        return 0;
2578}
2579
2580static const struct seq_operations netlink_seq_ops = {
2581        .start  = netlink_seq_start,
2582        .next   = netlink_seq_next,
2583        .stop   = netlink_seq_stop,
2584        .show   = netlink_seq_show,
2585};
2586
2587
2588static int netlink_seq_open(struct inode *inode, struct file *file)
2589{
2590        return seq_open_net(inode, file, &netlink_seq_ops,
2591                                sizeof(struct nl_seq_iter));
2592}
2593
2594static const struct file_operations netlink_seq_fops = {
2595        .owner          = THIS_MODULE,
2596        .open           = netlink_seq_open,
2597        .read           = seq_read,
2598        .llseek         = seq_lseek,
2599        .release        = seq_release_net,
2600};
2601
2602#endif
2603
2604int netlink_register_notifier(struct notifier_block *nb)
2605{
2606        return blocking_notifier_chain_register(&netlink_chain, nb);
2607}
2608EXPORT_SYMBOL(netlink_register_notifier);
2609
2610int netlink_unregister_notifier(struct notifier_block *nb)
2611{
2612        return blocking_notifier_chain_unregister(&netlink_chain, nb);
2613}
2614EXPORT_SYMBOL(netlink_unregister_notifier);
2615
2616static const struct proto_ops netlink_ops = {
2617        .family =       PF_NETLINK,
2618        .owner =        THIS_MODULE,
2619        .release =      netlink_release,
2620        .bind =         netlink_bind,
2621        .connect =      netlink_connect,
2622        .socketpair =   sock_no_socketpair,
2623        .accept =       sock_no_accept,
2624        .getname =      netlink_getname,
2625        .poll =         datagram_poll,
2626        .ioctl =        netlink_ioctl,
2627        .listen =       sock_no_listen,
2628        .shutdown =     sock_no_shutdown,
2629        .setsockopt =   netlink_setsockopt,
2630        .getsockopt =   netlink_getsockopt,
2631        .sendmsg =      netlink_sendmsg,
2632        .recvmsg =      netlink_recvmsg,
2633        .mmap =         sock_no_mmap,
2634        .sendpage =     sock_no_sendpage,
2635};
2636
2637static const struct net_proto_family netlink_family_ops = {
2638        .family = PF_NETLINK,
2639        .create = netlink_create,
2640        .owner  = THIS_MODULE,  /* for consistency 8) */
2641};
2642
2643static int __net_init netlink_net_init(struct net *net)
2644{
2645#ifdef CONFIG_PROC_FS
2646        if (!proc_create("netlink", 0, net->proc_net, &netlink_seq_fops))
2647                return -ENOMEM;
2648#endif
2649        return 0;
2650}
2651
2652static void __net_exit netlink_net_exit(struct net *net)
2653{
2654#ifdef CONFIG_PROC_FS
2655        remove_proc_entry("netlink", net->proc_net);
2656#endif
2657}
2658
2659static void __init netlink_add_usersock_entry(void)
2660{
2661        struct listeners *listeners;
2662        int groups = 32;
2663
2664        listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
2665        if (!listeners)
2666                panic("netlink_add_usersock_entry: Cannot allocate listeners\n");
2667
2668        netlink_table_grab();
2669
2670        nl_table[NETLINK_USERSOCK].groups = groups;
2671        rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners);
2672        nl_table[NETLINK_USERSOCK].module = THIS_MODULE;
2673        nl_table[NETLINK_USERSOCK].registered = 1;
2674        nl_table[NETLINK_USERSOCK].flags = NL_CFG_F_NONROOT_SEND;
2675
2676        netlink_table_ungrab();
2677}
2678
2679static struct pernet_operations __net_initdata netlink_net_ops = {
2680        .init = netlink_net_init,
2681        .exit = netlink_net_exit,
2682};
2683
2684static inline u32 netlink_hash(const void *data, u32 len, u32 seed)
2685{
2686        const struct netlink_sock *nlk = data;
2687        struct netlink_compare_arg arg;
2688
2689        netlink_compare_arg_init(&arg, sock_net(&nlk->sk), nlk->portid);
2690        return jhash2((u32 *)&arg, netlink_compare_arg_len / sizeof(u32), seed);
2691}
2692
2693static const struct rhashtable_params netlink_rhashtable_params = {
2694        .head_offset = offsetof(struct netlink_sock, node),
2695        .key_len = netlink_compare_arg_len,
2696        .obj_hashfn = netlink_hash,
2697        .obj_cmpfn = netlink_compare,
2698        .automatic_shrinking = true,
2699};
2700
2701static int __init netlink_proto_init(void)
2702{
2703        int i;
2704        int err = proto_register(&netlink_proto, 0);
2705
2706        if (err != 0)
2707                goto out;
2708
2709        BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2710
2711        nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL);
2712        if (!nl_table)
2713                goto panic;
2714
2715        for (i = 0; i < MAX_LINKS; i++) {
2716                if (rhashtable_init(&nl_table[i].hash,
2717                                    &netlink_rhashtable_params) < 0) {
2718                        while (--i > 0)
2719                                rhashtable_destroy(&nl_table[i].hash);
2720                        kfree(nl_table);
2721                        goto panic;
2722                }
2723        }
2724
2725        INIT_LIST_HEAD(&netlink_tap_all);
2726
2727        netlink_add_usersock_entry();
2728
2729        sock_register(&netlink_family_ops);
2730        register_pernet_subsys(&netlink_net_ops);
2731        /* The netlink device handler may be needed early. */
2732        rtnetlink_init();
2733out:
2734        return err;
2735panic:
2736        panic("netlink_init: Cannot allocate nl_table\n");
2737}
2738
2739core_initcall(netlink_proto_init);
2740