linux/net/netlink/af_netlink.c
<<
>>
Prefs
   1/*
   2 * NETLINK      Kernel-user communication protocol.
   3 *
   4 *              Authors:        Alan Cox <alan@lxorguk.ukuu.org.uk>
   5 *                              Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
   6 *                              Patrick McHardy <kaber@trash.net>
   7 *
   8 *              This program is free software; you can redistribute it and/or
   9 *              modify it under the terms of the GNU General Public License
  10 *              as published by the Free Software Foundation; either version
  11 *              2 of the License, or (at your option) any later version.
  12 *
  13 * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith
  14 *                               added netlink_proto_exit
  15 * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br>
  16 *                               use nlk_sk, as sk->protinfo is on a diet 8)
  17 * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org>
  18 *                               - inc module use count of module that owns
  19 *                                 the kernel socket in case userspace opens
  20 *                                 socket of same protocol
  21 *                               - remove all module support, since netlink is
  22 *                                 mandatory if CONFIG_NET=y these days
  23 */
  24
  25#include <linux/module.h>
  26
  27#include <linux/capability.h>
  28#include <linux/kernel.h>
  29#include <linux/init.h>
  30#include <linux/signal.h>
  31#include <linux/sched.h>
  32#include <linux/errno.h>
  33#include <linux/string.h>
  34#include <linux/stat.h>
  35#include <linux/socket.h>
  36#include <linux/un.h>
  37#include <linux/fcntl.h>
  38#include <linux/termios.h>
  39#include <linux/sockios.h>
  40#include <linux/net.h>
  41#include <linux/fs.h>
  42#include <linux/slab.h>
  43#include <asm/uaccess.h>
  44#include <linux/skbuff.h>
  45#include <linux/netdevice.h>
  46#include <linux/rtnetlink.h>
  47#include <linux/proc_fs.h>
  48#include <linux/seq_file.h>
  49#include <linux/notifier.h>
  50#include <linux/security.h>
  51#include <linux/jhash.h>
  52#include <linux/jiffies.h>
  53#include <linux/random.h>
  54#include <linux/bitops.h>
  55#include <linux/mm.h>
  56#include <linux/types.h>
  57#include <linux/audit.h>
  58#include <linux/mutex.h>
  59#include <linux/vmalloc.h>
  60#include <linux/if_arp.h>
  61#include <asm/cacheflush.h>
  62
  63#include <net/net_namespace.h>
  64#include <net/sock.h>
  65#include <net/scm.h>
  66#include <net/netlink.h>
  67
  68#include "af_netlink.h"
  69
  70struct listeners {
  71        struct rcu_head         rcu;
  72        unsigned long           masks[0];
  73};
  74
  75/* state bits */
  76#define NETLINK_CONGESTED       0x0
  77
  78/* flags */
  79#define NETLINK_KERNEL_SOCKET   0x1
  80#define NETLINK_RECV_PKTINFO    0x2
  81#define NETLINK_BROADCAST_SEND_ERROR    0x4
  82#define NETLINK_RECV_NO_ENOBUFS 0x8
  83
  84static inline int netlink_is_kernel(struct sock *sk)
  85{
  86        return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET;
  87}
  88
  89struct netlink_table *nl_table;
  90EXPORT_SYMBOL_GPL(nl_table);
  91
  92static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
  93
  94static int netlink_dump(struct sock *sk);
  95static void netlink_skb_destructor(struct sk_buff *skb);
  96
  97DEFINE_RWLOCK(nl_table_lock);
  98EXPORT_SYMBOL_GPL(nl_table_lock);
  99static atomic_t nl_table_users = ATOMIC_INIT(0);
 100
 101#define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock));
 102
 103static ATOMIC_NOTIFIER_HEAD(netlink_chain);
 104
 105static DEFINE_SPINLOCK(netlink_tap_lock);
 106static struct list_head netlink_tap_all __read_mostly;
 107
 108static inline u32 netlink_group_mask(u32 group)
 109{
 110        return group ? 1 << (group - 1) : 0;
 111}
 112
 113static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u32 portid)
 114{
 115        return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask];
 116}
 117
 118int netlink_add_tap(struct netlink_tap *nt)
 119{
 120        if (unlikely(nt->dev->type != ARPHRD_NETLINK))
 121                return -EINVAL;
 122
 123        spin_lock(&netlink_tap_lock);
 124        list_add_rcu(&nt->list, &netlink_tap_all);
 125        spin_unlock(&netlink_tap_lock);
 126
 127        if (nt->module)
 128                __module_get(nt->module);
 129
 130        return 0;
 131}
 132EXPORT_SYMBOL_GPL(netlink_add_tap);
 133
 134int __netlink_remove_tap(struct netlink_tap *nt)
 135{
 136        bool found = false;
 137        struct netlink_tap *tmp;
 138
 139        spin_lock(&netlink_tap_lock);
 140
 141        list_for_each_entry(tmp, &netlink_tap_all, list) {
 142                if (nt == tmp) {
 143                        list_del_rcu(&nt->list);
 144                        found = true;
 145                        goto out;
 146                }
 147        }
 148
 149        pr_warn("__netlink_remove_tap: %p not found\n", nt);
 150out:
 151        spin_unlock(&netlink_tap_lock);
 152
 153        if (found && nt->module)
 154                module_put(nt->module);
 155
 156        return found ? 0 : -ENODEV;
 157}
 158EXPORT_SYMBOL_GPL(__netlink_remove_tap);
 159
 160int netlink_remove_tap(struct netlink_tap *nt)
 161{
 162        int ret;
 163
 164        ret = __netlink_remove_tap(nt);
 165        synchronize_net();
 166
 167        return ret;
 168}
 169EXPORT_SYMBOL_GPL(netlink_remove_tap);
 170
 171static int __netlink_deliver_tap_skb(struct sk_buff *skb,
 172                                     struct net_device *dev)
 173{
 174        struct sk_buff *nskb;
 175        int ret = -ENOMEM;
 176
 177        dev_hold(dev);
 178        nskb = skb_clone(skb, GFP_ATOMIC);
 179        if (nskb) {
 180                nskb->dev = dev;
 181                ret = dev_queue_xmit(nskb);
 182                if (unlikely(ret > 0))
 183                        ret = net_xmit_errno(ret);
 184        }
 185
 186        dev_put(dev);
 187        return ret;
 188}
 189
 190static void __netlink_deliver_tap(struct sk_buff *skb)
 191{
 192        int ret;
 193        struct netlink_tap *tmp;
 194
 195        list_for_each_entry_rcu(tmp, &netlink_tap_all, list) {
 196                ret = __netlink_deliver_tap_skb(skb, tmp->dev);
 197                if (unlikely(ret))
 198                        break;
 199        }
 200}
 201
 202static void netlink_deliver_tap(struct sk_buff *skb)
 203{
 204        rcu_read_lock();
 205
 206        if (unlikely(!list_empty(&netlink_tap_all)))
 207                __netlink_deliver_tap(skb);
 208
 209        rcu_read_unlock();
 210}
 211
 212static void netlink_overrun(struct sock *sk)
 213{
 214        struct netlink_sock *nlk = nlk_sk(sk);
 215
 216        if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) {
 217                if (!test_and_set_bit(NETLINK_CONGESTED, &nlk_sk(sk)->state)) {
 218                        sk->sk_err = ENOBUFS;
 219                        sk->sk_error_report(sk);
 220                }
 221        }
 222        atomic_inc(&sk->sk_drops);
 223}
 224
 225static void netlink_rcv_wake(struct sock *sk)
 226{
 227        struct netlink_sock *nlk = nlk_sk(sk);
 228
 229        if (skb_queue_empty(&sk->sk_receive_queue))
 230                clear_bit(NETLINK_CONGESTED, &nlk->state);
 231        if (!test_bit(NETLINK_CONGESTED, &nlk->state))
 232                wake_up_interruptible(&nlk->wait);
 233}
 234
 235#ifdef CONFIG_NETLINK_MMAP
 236static bool netlink_skb_is_mmaped(const struct sk_buff *skb)
 237{
 238        return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED;
 239}
 240
 241static bool netlink_rx_is_mmaped(struct sock *sk)
 242{
 243        return nlk_sk(sk)->rx_ring.pg_vec != NULL;
 244}
 245
 246static bool netlink_tx_is_mmaped(struct sock *sk)
 247{
 248        return nlk_sk(sk)->tx_ring.pg_vec != NULL;
 249}
 250
 251static __pure struct page *pgvec_to_page(const void *addr)
 252{
 253        if (is_vmalloc_addr(addr))
 254                return vmalloc_to_page(addr);
 255        else
 256                return virt_to_page(addr);
 257}
 258
 259static void free_pg_vec(void **pg_vec, unsigned int order, unsigned int len)
 260{
 261        unsigned int i;
 262
 263        for (i = 0; i < len; i++) {
 264                if (pg_vec[i] != NULL) {
 265                        if (is_vmalloc_addr(pg_vec[i]))
 266                                vfree(pg_vec[i]);
 267                        else
 268                                free_pages((unsigned long)pg_vec[i], order);
 269                }
 270        }
 271        kfree(pg_vec);
 272}
 273
 274static void *alloc_one_pg_vec_page(unsigned long order)
 275{
 276        void *buffer;
 277        gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO |
 278                          __GFP_NOWARN | __GFP_NORETRY;
 279
 280        buffer = (void *)__get_free_pages(gfp_flags, order);
 281        if (buffer != NULL)
 282                return buffer;
 283
 284        buffer = vzalloc((1 << order) * PAGE_SIZE);
 285        if (buffer != NULL)
 286                return buffer;
 287
 288        gfp_flags &= ~__GFP_NORETRY;
 289        return (void *)__get_free_pages(gfp_flags, order);
 290}
 291
 292static void **alloc_pg_vec(struct netlink_sock *nlk,
 293                           struct nl_mmap_req *req, unsigned int order)
 294{
 295        unsigned int block_nr = req->nm_block_nr;
 296        unsigned int i;
 297        void **pg_vec, *ptr;
 298
 299        pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL);
 300        if (pg_vec == NULL)
 301                return NULL;
 302
 303        for (i = 0; i < block_nr; i++) {
 304                pg_vec[i] = ptr = alloc_one_pg_vec_page(order);
 305                if (pg_vec[i] == NULL)
 306                        goto err1;
 307        }
 308
 309        return pg_vec;
 310err1:
 311        free_pg_vec(pg_vec, order, block_nr);
 312        return NULL;
 313}
 314
 315static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
 316                            bool closing, bool tx_ring)
 317{
 318        struct netlink_sock *nlk = nlk_sk(sk);
 319        struct netlink_ring *ring;
 320        struct sk_buff_head *queue;
 321        void **pg_vec = NULL;
 322        unsigned int order = 0;
 323        int err;
 324
 325        ring  = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
 326        queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
 327
 328        if (!closing) {
 329                if (atomic_read(&nlk->mapped))
 330                        return -EBUSY;
 331                if (atomic_read(&ring->pending))
 332                        return -EBUSY;
 333        }
 334
 335        if (req->nm_block_nr) {
 336                if (ring->pg_vec != NULL)
 337                        return -EBUSY;
 338
 339                if ((int)req->nm_block_size <= 0)
 340                        return -EINVAL;
 341                if (!IS_ALIGNED(req->nm_block_size, PAGE_SIZE))
 342                        return -EINVAL;
 343                if (req->nm_frame_size < NL_MMAP_HDRLEN)
 344                        return -EINVAL;
 345                if (!IS_ALIGNED(req->nm_frame_size, NL_MMAP_MSG_ALIGNMENT))
 346                        return -EINVAL;
 347
 348                ring->frames_per_block = req->nm_block_size /
 349                                         req->nm_frame_size;
 350                if (ring->frames_per_block == 0)
 351                        return -EINVAL;
 352                if (ring->frames_per_block * req->nm_block_nr !=
 353                    req->nm_frame_nr)
 354                        return -EINVAL;
 355
 356                order = get_order(req->nm_block_size);
 357                pg_vec = alloc_pg_vec(nlk, req, order);
 358                if (pg_vec == NULL)
 359                        return -ENOMEM;
 360        } else {
 361                if (req->nm_frame_nr)
 362                        return -EINVAL;
 363        }
 364
 365        err = -EBUSY;
 366        mutex_lock(&nlk->pg_vec_lock);
 367        if (closing || atomic_read(&nlk->mapped) == 0) {
 368                err = 0;
 369                spin_lock_bh(&queue->lock);
 370
 371                ring->frame_max         = req->nm_frame_nr - 1;
 372                ring->head              = 0;
 373                ring->frame_size        = req->nm_frame_size;
 374                ring->pg_vec_pages      = req->nm_block_size / PAGE_SIZE;
 375
 376                swap(ring->pg_vec_len, req->nm_block_nr);
 377                swap(ring->pg_vec_order, order);
 378                swap(ring->pg_vec, pg_vec);
 379
 380                __skb_queue_purge(queue);
 381                spin_unlock_bh(&queue->lock);
 382
 383                WARN_ON(atomic_read(&nlk->mapped));
 384        }
 385        mutex_unlock(&nlk->pg_vec_lock);
 386
 387        if (pg_vec)
 388                free_pg_vec(pg_vec, order, req->nm_block_nr);
 389        return err;
 390}
 391
 392static void netlink_mm_open(struct vm_area_struct *vma)
 393{
 394        struct file *file = vma->vm_file;
 395        struct socket *sock = file->private_data;
 396        struct sock *sk = sock->sk;
 397
 398        if (sk)
 399                atomic_inc(&nlk_sk(sk)->mapped);
 400}
 401
 402static void netlink_mm_close(struct vm_area_struct *vma)
 403{
 404        struct file *file = vma->vm_file;
 405        struct socket *sock = file->private_data;
 406        struct sock *sk = sock->sk;
 407
 408        if (sk)
 409                atomic_dec(&nlk_sk(sk)->mapped);
 410}
 411
 412static const struct vm_operations_struct netlink_mmap_ops = {
 413        .open   = netlink_mm_open,
 414        .close  = netlink_mm_close,
 415};
 416
 417static int netlink_mmap(struct file *file, struct socket *sock,
 418                        struct vm_area_struct *vma)
 419{
 420        struct sock *sk = sock->sk;
 421        struct netlink_sock *nlk = nlk_sk(sk);
 422        struct netlink_ring *ring;
 423        unsigned long start, size, expected;
 424        unsigned int i;
 425        int err = -EINVAL;
 426
 427        if (vma->vm_pgoff)
 428                return -EINVAL;
 429
 430        mutex_lock(&nlk->pg_vec_lock);
 431
 432        expected = 0;
 433        for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
 434                if (ring->pg_vec == NULL)
 435                        continue;
 436                expected += ring->pg_vec_len * ring->pg_vec_pages * PAGE_SIZE;
 437        }
 438
 439        if (expected == 0)
 440                goto out;
 441
 442        size = vma->vm_end - vma->vm_start;
 443        if (size != expected)
 444                goto out;
 445
 446        start = vma->vm_start;
 447        for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
 448                if (ring->pg_vec == NULL)
 449                        continue;
 450
 451                for (i = 0; i < ring->pg_vec_len; i++) {
 452                        struct page *page;
 453                        void *kaddr = ring->pg_vec[i];
 454                        unsigned int pg_num;
 455
 456                        for (pg_num = 0; pg_num < ring->pg_vec_pages; pg_num++) {
 457                                page = pgvec_to_page(kaddr);
 458                                err = vm_insert_page(vma, start, page);
 459                                if (err < 0)
 460                                        goto out;
 461                                start += PAGE_SIZE;
 462                                kaddr += PAGE_SIZE;
 463                        }
 464                }
 465        }
 466
 467        atomic_inc(&nlk->mapped);
 468        vma->vm_ops = &netlink_mmap_ops;
 469        err = 0;
 470out:
 471        mutex_unlock(&nlk->pg_vec_lock);
 472        return err;
 473}
 474
 475static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr)
 476{
 477#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
 478        struct page *p_start, *p_end;
 479
 480        /* First page is flushed through netlink_{get,set}_status */
 481        p_start = pgvec_to_page(hdr + PAGE_SIZE);
 482        p_end   = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + hdr->nm_len - 1);
 483        while (p_start <= p_end) {
 484                flush_dcache_page(p_start);
 485                p_start++;
 486        }
 487#endif
 488}
 489
 490static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr)
 491{
 492        smp_rmb();
 493        flush_dcache_page(pgvec_to_page(hdr));
 494        return hdr->nm_status;
 495}
 496
 497static void netlink_set_status(struct nl_mmap_hdr *hdr,
 498                               enum nl_mmap_status status)
 499{
 500        hdr->nm_status = status;
 501        flush_dcache_page(pgvec_to_page(hdr));
 502        smp_wmb();
 503}
 504
 505static struct nl_mmap_hdr *
 506__netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos)
 507{
 508        unsigned int pg_vec_pos, frame_off;
 509
 510        pg_vec_pos = pos / ring->frames_per_block;
 511        frame_off  = pos % ring->frames_per_block;
 512
 513        return ring->pg_vec[pg_vec_pos] + (frame_off * ring->frame_size);
 514}
 515
 516static struct nl_mmap_hdr *
 517netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos,
 518                     enum nl_mmap_status status)
 519{
 520        struct nl_mmap_hdr *hdr;
 521
 522        hdr = __netlink_lookup_frame(ring, pos);
 523        if (netlink_get_status(hdr) != status)
 524                return NULL;
 525
 526        return hdr;
 527}
 528
 529static struct nl_mmap_hdr *
 530netlink_current_frame(const struct netlink_ring *ring,
 531                      enum nl_mmap_status status)
 532{
 533        return netlink_lookup_frame(ring, ring->head, status);
 534}
 535
 536static struct nl_mmap_hdr *
 537netlink_previous_frame(const struct netlink_ring *ring,
 538                       enum nl_mmap_status status)
 539{
 540        unsigned int prev;
 541
 542        prev = ring->head ? ring->head - 1 : ring->frame_max;
 543        return netlink_lookup_frame(ring, prev, status);
 544}
 545
 546static void netlink_increment_head(struct netlink_ring *ring)
 547{
 548        ring->head = ring->head != ring->frame_max ? ring->head + 1 : 0;
 549}
 550
 551static void netlink_forward_ring(struct netlink_ring *ring)
 552{
 553        unsigned int head = ring->head, pos = head;
 554        const struct nl_mmap_hdr *hdr;
 555
 556        do {
 557                hdr = __netlink_lookup_frame(ring, pos);
 558                if (hdr->nm_status == NL_MMAP_STATUS_UNUSED)
 559                        break;
 560                if (hdr->nm_status != NL_MMAP_STATUS_SKIP)
 561                        break;
 562                netlink_increment_head(ring);
 563        } while (ring->head != head);
 564}
 565
 566static bool netlink_dump_space(struct netlink_sock *nlk)
 567{
 568        struct netlink_ring *ring = &nlk->rx_ring;
 569        struct nl_mmap_hdr *hdr;
 570        unsigned int n;
 571
 572        hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
 573        if (hdr == NULL)
 574                return false;
 575
 576        n = ring->head + ring->frame_max / 2;
 577        if (n > ring->frame_max)
 578                n -= ring->frame_max;
 579
 580        hdr = __netlink_lookup_frame(ring, n);
 581
 582        return hdr->nm_status == NL_MMAP_STATUS_UNUSED;
 583}
 584
 585static unsigned int netlink_poll(struct file *file, struct socket *sock,
 586                                 poll_table *wait)
 587{
 588        struct sock *sk = sock->sk;
 589        struct netlink_sock *nlk = nlk_sk(sk);
 590        unsigned int mask;
 591        int err;
 592
 593        if (nlk->rx_ring.pg_vec != NULL) {
 594                /* Memory mapped sockets don't call recvmsg(), so flow control
 595                 * for dumps is performed here. A dump is allowed to continue
 596                 * if at least half the ring is unused.
 597                 */
 598                while (nlk->cb != NULL && netlink_dump_space(nlk)) {
 599                        err = netlink_dump(sk);
 600                        if (err < 0) {
 601                                sk->sk_err = err;
 602                                sk->sk_error_report(sk);
 603                                break;
 604                        }
 605                }
 606                netlink_rcv_wake(sk);
 607        }
 608
 609        mask = datagram_poll(file, sock, wait);
 610
 611        spin_lock_bh(&sk->sk_receive_queue.lock);
 612        if (nlk->rx_ring.pg_vec) {
 613                netlink_forward_ring(&nlk->rx_ring);
 614                if (!netlink_previous_frame(&nlk->rx_ring, NL_MMAP_STATUS_UNUSED))
 615                        mask |= POLLIN | POLLRDNORM;
 616        }
 617        spin_unlock_bh(&sk->sk_receive_queue.lock);
 618
 619        spin_lock_bh(&sk->sk_write_queue.lock);
 620        if (nlk->tx_ring.pg_vec) {
 621                if (netlink_current_frame(&nlk->tx_ring, NL_MMAP_STATUS_UNUSED))
 622                        mask |= POLLOUT | POLLWRNORM;
 623        }
 624        spin_unlock_bh(&sk->sk_write_queue.lock);
 625
 626        return mask;
 627}
 628
 629static struct nl_mmap_hdr *netlink_mmap_hdr(struct sk_buff *skb)
 630{
 631        return (struct nl_mmap_hdr *)(skb->head - NL_MMAP_HDRLEN);
 632}
 633
 634static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk,
 635                                   struct netlink_ring *ring,
 636                                   struct nl_mmap_hdr *hdr)
 637{
 638        unsigned int size;
 639        void *data;
 640
 641        size = ring->frame_size - NL_MMAP_HDRLEN;
 642        data = (void *)hdr + NL_MMAP_HDRLEN;
 643
 644        skb->head       = data;
 645        skb->data       = data;
 646        skb_reset_tail_pointer(skb);
 647        skb->end        = skb->tail + size;
 648        skb->len        = 0;
 649
 650        skb->destructor = netlink_skb_destructor;
 651        NETLINK_CB(skb).flags |= NETLINK_SKB_MMAPED;
 652        NETLINK_CB(skb).sk = sk;
 653}
 654
 655static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg,
 656                                u32 dst_portid, u32 dst_group,
 657                                struct sock_iocb *siocb)
 658{
 659        struct netlink_sock *nlk = nlk_sk(sk);
 660        struct netlink_ring *ring;
 661        struct nl_mmap_hdr *hdr;
 662        struct sk_buff *skb;
 663        unsigned int maxlen;
 664        bool excl = true;
 665        int err = 0, len = 0;
 666
 667        /* Netlink messages are validated by the receiver before processing.
 668         * In order to avoid userspace changing the contents of the message
 669         * after validation, the socket and the ring may only be used by a
 670         * single process, otherwise we fall back to copying.
 671         */
 672        if (atomic_long_read(&sk->sk_socket->file->f_count) > 2 ||
 673            atomic_read(&nlk->mapped) > 1)
 674                excl = false;
 675
 676        mutex_lock(&nlk->pg_vec_lock);
 677
 678        ring   = &nlk->tx_ring;
 679        maxlen = ring->frame_size - NL_MMAP_HDRLEN;
 680
 681        do {
 682                hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID);
 683                if (hdr == NULL) {
 684                        if (!(msg->msg_flags & MSG_DONTWAIT) &&
 685                            atomic_read(&nlk->tx_ring.pending))
 686                                schedule();
 687                        continue;
 688                }
 689                if (hdr->nm_len > maxlen) {
 690                        err = -EINVAL;
 691                        goto out;
 692                }
 693
 694                netlink_frame_flush_dcache(hdr);
 695
 696                if (likely(dst_portid == 0 && dst_group == 0 && excl)) {
 697                        skb = alloc_skb_head(GFP_KERNEL);
 698                        if (skb == NULL) {
 699                                err = -ENOBUFS;
 700                                goto out;
 701                        }
 702                        sock_hold(sk);
 703                        netlink_ring_setup_skb(skb, sk, ring, hdr);
 704                        NETLINK_CB(skb).flags |= NETLINK_SKB_TX;
 705                        __skb_put(skb, hdr->nm_len);
 706                        netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
 707                        atomic_inc(&ring->pending);
 708                } else {
 709                        skb = alloc_skb(hdr->nm_len, GFP_KERNEL);
 710                        if (skb == NULL) {
 711                                err = -ENOBUFS;
 712                                goto out;
 713                        }
 714                        __skb_put(skb, hdr->nm_len);
 715                        memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, hdr->nm_len);
 716                        netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
 717                }
 718
 719                netlink_increment_head(ring);
 720
 721                NETLINK_CB(skb).portid    = nlk->portid;
 722                NETLINK_CB(skb).dst_group = dst_group;
 723                NETLINK_CB(skb).creds     = siocb->scm->creds;
 724
 725                err = security_netlink_send(sk, skb);
 726                if (err) {
 727                        kfree_skb(skb);
 728                        goto out;
 729                }
 730
 731                if (unlikely(dst_group)) {
 732                        atomic_inc(&skb->users);
 733                        netlink_broadcast(sk, skb, dst_portid, dst_group,
 734                                          GFP_KERNEL);
 735                }
 736                err = netlink_unicast(sk, skb, dst_portid,
 737                                      msg->msg_flags & MSG_DONTWAIT);
 738                if (err < 0)
 739                        goto out;
 740                len += err;
 741
 742        } while (hdr != NULL ||
 743                 (!(msg->msg_flags & MSG_DONTWAIT) &&
 744                  atomic_read(&nlk->tx_ring.pending)));
 745
 746        if (len > 0)
 747                err = len;
 748out:
 749        mutex_unlock(&nlk->pg_vec_lock);
 750        return err;
 751}
 752
 753static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb)
 754{
 755        struct nl_mmap_hdr *hdr;
 756
 757        hdr = netlink_mmap_hdr(skb);
 758        hdr->nm_len     = skb->len;
 759        hdr->nm_group   = NETLINK_CB(skb).dst_group;
 760        hdr->nm_pid     = NETLINK_CB(skb).creds.pid;
 761        hdr->nm_uid     = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
 762        hdr->nm_gid     = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
 763        netlink_frame_flush_dcache(hdr);
 764        netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
 765
 766        NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED;
 767        kfree_skb(skb);
 768}
 769
 770static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb)
 771{
 772        struct netlink_sock *nlk = nlk_sk(sk);
 773        struct netlink_ring *ring = &nlk->rx_ring;
 774        struct nl_mmap_hdr *hdr;
 775
 776        spin_lock_bh(&sk->sk_receive_queue.lock);
 777        hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
 778        if (hdr == NULL) {
 779                spin_unlock_bh(&sk->sk_receive_queue.lock);
 780                kfree_skb(skb);
 781                netlink_overrun(sk);
 782                return;
 783        }
 784        netlink_increment_head(ring);
 785        __skb_queue_tail(&sk->sk_receive_queue, skb);
 786        spin_unlock_bh(&sk->sk_receive_queue.lock);
 787
 788        hdr->nm_len     = skb->len;
 789        hdr->nm_group   = NETLINK_CB(skb).dst_group;
 790        hdr->nm_pid     = NETLINK_CB(skb).creds.pid;
 791        hdr->nm_uid     = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
 792        hdr->nm_gid     = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
 793        netlink_set_status(hdr, NL_MMAP_STATUS_COPY);
 794}
 795
 796#else /* CONFIG_NETLINK_MMAP */
 797#define netlink_skb_is_mmaped(skb)      false
 798#define netlink_rx_is_mmaped(sk)        false
 799#define netlink_tx_is_mmaped(sk)        false
 800#define netlink_mmap                    sock_no_mmap
 801#define netlink_poll                    datagram_poll
 802#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, siocb)     0
 803#endif /* CONFIG_NETLINK_MMAP */
 804
 805static void netlink_destroy_callback(struct netlink_callback *cb)
 806{
 807        kfree_skb(cb->skb);
 808        kfree(cb);
 809}
 810
 811static void netlink_consume_callback(struct netlink_callback *cb)
 812{
 813        consume_skb(cb->skb);
 814        kfree(cb);
 815}
 816
 817static void netlink_skb_destructor(struct sk_buff *skb)
 818{
 819#ifdef CONFIG_NETLINK_MMAP
 820        struct nl_mmap_hdr *hdr;
 821        struct netlink_ring *ring;
 822        struct sock *sk;
 823
 824        /* If a packet from the kernel to userspace was freed because of an
 825         * error without being delivered to userspace, the kernel must reset
 826         * the status. In the direction userspace to kernel, the status is
 827         * always reset here after the packet was processed and freed.
 828         */
 829        if (netlink_skb_is_mmaped(skb)) {
 830                hdr = netlink_mmap_hdr(skb);
 831                sk = NETLINK_CB(skb).sk;
 832
 833                if (NETLINK_CB(skb).flags & NETLINK_SKB_TX) {
 834                        netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
 835                        ring = &nlk_sk(sk)->tx_ring;
 836                } else {
 837                        if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) {
 838                                hdr->nm_len = 0;
 839                                netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
 840                        }
 841                        ring = &nlk_sk(sk)->rx_ring;
 842                }
 843
 844                WARN_ON(atomic_read(&ring->pending) == 0);
 845                atomic_dec(&ring->pending);
 846                sock_put(sk);
 847
 848                skb->head = NULL;
 849        }
 850#endif
 851        if (is_vmalloc_addr(skb->head)) {
 852                if (!skb->cloned ||
 853                    !atomic_dec_return(&(skb_shinfo(skb)->dataref)))
 854                        vfree(skb->head);
 855
 856                skb->head = NULL;
 857        }
 858        if (skb->sk != NULL)
 859                sock_rfree(skb);
 860}
 861
 862static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
 863{
 864        WARN_ON(skb->sk != NULL);
 865        skb->sk = sk;
 866        skb->destructor = netlink_skb_destructor;
 867        atomic_add(skb->truesize, &sk->sk_rmem_alloc);
 868        sk_mem_charge(sk, skb->truesize);
 869}
 870
 871static void netlink_sock_destruct(struct sock *sk)
 872{
 873        struct netlink_sock *nlk = nlk_sk(sk);
 874
 875        if (nlk->cb) {
 876                if (nlk->cb->done)
 877                        nlk->cb->done(nlk->cb);
 878
 879                module_put(nlk->cb->module);
 880                netlink_destroy_callback(nlk->cb);
 881        }
 882
 883        skb_queue_purge(&sk->sk_receive_queue);
 884#ifdef CONFIG_NETLINK_MMAP
 885        if (1) {
 886                struct nl_mmap_req req;
 887
 888                memset(&req, 0, sizeof(req));
 889                if (nlk->rx_ring.pg_vec)
 890                        netlink_set_ring(sk, &req, true, false);
 891                memset(&req, 0, sizeof(req));
 892                if (nlk->tx_ring.pg_vec)
 893                        netlink_set_ring(sk, &req, true, true);
 894        }
 895#endif /* CONFIG_NETLINK_MMAP */
 896
 897        if (!sock_flag(sk, SOCK_DEAD)) {
 898                printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
 899                return;
 900        }
 901
 902        WARN_ON(atomic_read(&sk->sk_rmem_alloc));
 903        WARN_ON(atomic_read(&sk->sk_wmem_alloc));
 904        WARN_ON(nlk_sk(sk)->groups);
 905}
 906
 907/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
 908 * SMP. Look, when several writers sleep and reader wakes them up, all but one
 909 * immediately hit write lock and grab all the cpus. Exclusive sleep solves
 910 * this, _but_ remember, it adds useless work on UP machines.
 911 */
 912
 913void netlink_table_grab(void)
 914        __acquires(nl_table_lock)
 915{
 916        might_sleep();
 917
 918        write_lock_irq(&nl_table_lock);
 919
 920        if (atomic_read(&nl_table_users)) {
 921                DECLARE_WAITQUEUE(wait, current);
 922
 923                add_wait_queue_exclusive(&nl_table_wait, &wait);
 924                for (;;) {
 925                        set_current_state(TASK_UNINTERRUPTIBLE);
 926                        if (atomic_read(&nl_table_users) == 0)
 927                                break;
 928                        write_unlock_irq(&nl_table_lock);
 929                        schedule();
 930                        write_lock_irq(&nl_table_lock);
 931                }
 932
 933                __set_current_state(TASK_RUNNING);
 934                remove_wait_queue(&nl_table_wait, &wait);
 935        }
 936}
 937
 938void netlink_table_ungrab(void)
 939        __releases(nl_table_lock)
 940{
 941        write_unlock_irq(&nl_table_lock);
 942        wake_up(&nl_table_wait);
 943}
 944
 945static inline void
 946netlink_lock_table(void)
 947{
 948        /* read_lock() synchronizes us to netlink_table_grab */
 949
 950        read_lock(&nl_table_lock);
 951        atomic_inc(&nl_table_users);
 952        read_unlock(&nl_table_lock);
 953}
 954
 955static inline void
 956netlink_unlock_table(void)
 957{
 958        if (atomic_dec_and_test(&nl_table_users))
 959                wake_up(&nl_table_wait);
 960}
 961
 962static bool netlink_compare(struct net *net, struct sock *sk)
 963{
 964        return net_eq(sock_net(sk), net);
 965}
 966
 967static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid)
 968{
 969        struct netlink_table *table = &nl_table[protocol];
 970        struct nl_portid_hash *hash = &table->hash;
 971        struct hlist_head *head;
 972        struct sock *sk;
 973
 974        read_lock(&nl_table_lock);
 975        head = nl_portid_hashfn(hash, portid);
 976        sk_for_each(sk, head) {
 977                if (table->compare(net, sk) &&
 978                    (nlk_sk(sk)->portid == portid)) {
 979                        sock_hold(sk);
 980                        goto found;
 981                }
 982        }
 983        sk = NULL;
 984found:
 985        read_unlock(&nl_table_lock);
 986        return sk;
 987}
 988
 989static struct hlist_head *nl_portid_hash_zalloc(size_t size)
 990{
 991        if (size <= PAGE_SIZE)
 992                return kzalloc(size, GFP_ATOMIC);
 993        else
 994                return (struct hlist_head *)
 995                        __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
 996                                         get_order(size));
 997}
 998
 999static void nl_portid_hash_free(struct hlist_head *table, size_t size)
1000{
1001        if (size <= PAGE_SIZE)
1002                kfree(table);
1003        else
1004                free_pages((unsigned long)table, get_order(size));
1005}
1006
1007static int nl_portid_hash_rehash(struct nl_portid_hash *hash, int grow)
1008{
1009        unsigned int omask, mask, shift;
1010        size_t osize, size;
1011        struct hlist_head *otable, *table;
1012        int i;
1013
1014        omask = mask = hash->mask;
1015        osize = size = (mask + 1) * sizeof(*table);
1016        shift = hash->shift;
1017
1018        if (grow) {
1019                if (++shift > hash->max_shift)
1020                        return 0;
1021                mask = mask * 2 + 1;
1022                size *= 2;
1023        }
1024
1025        table = nl_portid_hash_zalloc(size);
1026        if (!table)
1027                return 0;
1028
1029        otable = hash->table;
1030        hash->table = table;
1031        hash->mask = mask;
1032        hash->shift = shift;
1033        get_random_bytes(&hash->rnd, sizeof(hash->rnd));
1034
1035        for (i = 0; i <= omask; i++) {
1036                struct sock *sk;
1037                struct hlist_node *tmp;
1038
1039                sk_for_each_safe(sk, tmp, &otable[i])
1040                        __sk_add_node(sk, nl_portid_hashfn(hash, nlk_sk(sk)->portid));
1041        }
1042
1043        nl_portid_hash_free(otable, osize);
1044        hash->rehash_time = jiffies + 10 * 60 * HZ;
1045        return 1;
1046}
1047
1048static inline int nl_portid_hash_dilute(struct nl_portid_hash *hash, int len)
1049{
1050        int avg = hash->entries >> hash->shift;
1051
1052        if (unlikely(avg > 1) && nl_portid_hash_rehash(hash, 1))
1053                return 1;
1054
1055        if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) {
1056                nl_portid_hash_rehash(hash, 0);
1057                return 1;
1058        }
1059
1060        return 0;
1061}
1062
1063static const struct proto_ops netlink_ops;
1064
1065static void
1066netlink_update_listeners(struct sock *sk)
1067{
1068        struct netlink_table *tbl = &nl_table[sk->sk_protocol];
1069        unsigned long mask;
1070        unsigned int i;
1071        struct listeners *listeners;
1072
1073        listeners = nl_deref_protected(tbl->listeners);
1074        if (!listeners)
1075                return;
1076
1077        for (i = 0; i < NLGRPLONGS(tbl->groups); i++) {
1078                mask = 0;
1079                sk_for_each_bound(sk, &tbl->mc_list) {
1080                        if (i < NLGRPLONGS(nlk_sk(sk)->ngroups))
1081                                mask |= nlk_sk(sk)->groups[i];
1082                }
1083                listeners->masks[i] = mask;
1084        }
1085        /* this function is only called with the netlink table "grabbed", which
1086         * makes sure updates are visible before bind or setsockopt return. */
1087}
1088
1089static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
1090{
1091        struct netlink_table *table = &nl_table[sk->sk_protocol];
1092        struct nl_portid_hash *hash = &table->hash;
1093        struct hlist_head *head;
1094        int err = -EADDRINUSE;
1095        struct sock *osk;
1096        int len;
1097
1098        netlink_table_grab();
1099        head = nl_portid_hashfn(hash, portid);
1100        len = 0;
1101        sk_for_each(osk, head) {
1102                if (table->compare(net, osk) &&
1103                    (nlk_sk(osk)->portid == portid))
1104                        break;
1105                len++;
1106        }
1107        if (osk)
1108                goto err;
1109
1110        err = -EBUSY;
1111        if (nlk_sk(sk)->portid)
1112                goto err;
1113
1114        err = -ENOMEM;
1115        if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX))
1116                goto err;
1117
1118        if (len && nl_portid_hash_dilute(hash, len))
1119                head = nl_portid_hashfn(hash, portid);
1120        hash->entries++;
1121        nlk_sk(sk)->portid = portid;
1122        sk_add_node(sk, head);
1123        err = 0;
1124
1125err:
1126        netlink_table_ungrab();
1127        return err;
1128}
1129
1130static void netlink_remove(struct sock *sk)
1131{
1132        netlink_table_grab();
1133        if (sk_del_node_init(sk))
1134                nl_table[sk->sk_protocol].hash.entries--;
1135        if (nlk_sk(sk)->subscriptions)
1136                __sk_del_bind_node(sk);
1137        netlink_table_ungrab();
1138}
1139
1140static struct proto netlink_proto = {
1141        .name     = "NETLINK",
1142        .owner    = THIS_MODULE,
1143        .obj_size = sizeof(struct netlink_sock),
1144};
1145
1146static int __netlink_create(struct net *net, struct socket *sock,
1147                            struct mutex *cb_mutex, int protocol)
1148{
1149        struct sock *sk;
1150        struct netlink_sock *nlk;
1151
1152        sock->ops = &netlink_ops;
1153
1154        sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto);
1155        if (!sk)
1156                return -ENOMEM;
1157
1158        sock_init_data(sock, sk);
1159
1160        nlk = nlk_sk(sk);
1161        if (cb_mutex) {
1162                nlk->cb_mutex = cb_mutex;
1163        } else {
1164                nlk->cb_mutex = &nlk->cb_def_mutex;
1165                mutex_init(nlk->cb_mutex);
1166        }
1167        init_waitqueue_head(&nlk->wait);
1168#ifdef CONFIG_NETLINK_MMAP
1169        mutex_init(&nlk->pg_vec_lock);
1170#endif
1171
1172        sk->sk_destruct = netlink_sock_destruct;
1173        sk->sk_protocol = protocol;
1174        return 0;
1175}
1176
1177static int netlink_create(struct net *net, struct socket *sock, int protocol,
1178                          int kern)
1179{
1180        struct module *module = NULL;
1181        struct mutex *cb_mutex;
1182        struct netlink_sock *nlk;
1183        void (*bind)(int group);
1184        int err = 0;
1185
1186        sock->state = SS_UNCONNECTED;
1187
1188        if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
1189                return -ESOCKTNOSUPPORT;
1190
1191        if (protocol < 0 || protocol >= MAX_LINKS)
1192                return -EPROTONOSUPPORT;
1193
1194        netlink_lock_table();
1195#ifdef CONFIG_MODULES
1196        if (!nl_table[protocol].registered) {
1197                netlink_unlock_table();
1198                request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol);
1199                netlink_lock_table();
1200        }
1201#endif
1202        if (nl_table[protocol].registered &&
1203            try_module_get(nl_table[protocol].module))
1204                module = nl_table[protocol].module;
1205        else
1206                err = -EPROTONOSUPPORT;
1207        cb_mutex = nl_table[protocol].cb_mutex;
1208        bind = nl_table[protocol].bind;
1209        netlink_unlock_table();
1210
1211        if (err < 0)
1212                goto out;
1213
1214        err = __netlink_create(net, sock, cb_mutex, protocol);
1215        if (err < 0)
1216                goto out_module;
1217
1218        local_bh_disable();
1219        sock_prot_inuse_add(net, &netlink_proto, 1);
1220        local_bh_enable();
1221
1222        nlk = nlk_sk(sock->sk);
1223        nlk->module = module;
1224        nlk->netlink_bind = bind;
1225out:
1226        return err;
1227
1228out_module:
1229        module_put(module);
1230        goto out;
1231}
1232
1233static int netlink_release(struct socket *sock)
1234{
1235        struct sock *sk = sock->sk;
1236        struct netlink_sock *nlk;
1237
1238        if (!sk)
1239                return 0;
1240
1241        netlink_remove(sk);
1242        sock_orphan(sk);
1243        nlk = nlk_sk(sk);
1244
1245        /*
1246         * OK. Socket is unlinked, any packets that arrive now
1247         * will be purged.
1248         */
1249
1250        sock->sk = NULL;
1251        wake_up_interruptible_all(&nlk->wait);
1252
1253        skb_queue_purge(&sk->sk_write_queue);
1254
1255        if (nlk->portid) {
1256                struct netlink_notify n = {
1257                                                .net = sock_net(sk),
1258                                                .protocol = sk->sk_protocol,
1259                                                .portid = nlk->portid,
1260                                          };
1261                atomic_notifier_call_chain(&netlink_chain,
1262                                NETLINK_URELEASE, &n);
1263        }
1264
1265        module_put(nlk->module);
1266
1267        netlink_table_grab();
1268        if (netlink_is_kernel(sk)) {
1269                BUG_ON(nl_table[sk->sk_protocol].registered == 0);
1270                if (--nl_table[sk->sk_protocol].registered == 0) {
1271                        struct listeners *old;
1272
1273                        old = nl_deref_protected(nl_table[sk->sk_protocol].listeners);
1274                        RCU_INIT_POINTER(nl_table[sk->sk_protocol].listeners, NULL);
1275                        kfree_rcu(old, rcu);
1276                        nl_table[sk->sk_protocol].module = NULL;
1277                        nl_table[sk->sk_protocol].bind = NULL;
1278                        nl_table[sk->sk_protocol].flags = 0;
1279                        nl_table[sk->sk_protocol].registered = 0;
1280                }
1281        } else if (nlk->subscriptions) {
1282                netlink_update_listeners(sk);
1283        }
1284        netlink_table_ungrab();
1285
1286        kfree(nlk->groups);
1287        nlk->groups = NULL;
1288
1289        local_bh_disable();
1290        sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
1291        local_bh_enable();
1292        sock_put(sk);
1293        return 0;
1294}
1295
1296static int netlink_autobind(struct socket *sock)
1297{
1298        struct sock *sk = sock->sk;
1299        struct net *net = sock_net(sk);
1300        struct netlink_table *table = &nl_table[sk->sk_protocol];
1301        struct nl_portid_hash *hash = &table->hash;
1302        struct hlist_head *head;
1303        struct sock *osk;
1304        s32 portid = task_tgid_vnr(current);
1305        int err;
1306        static s32 rover = -4097;
1307
1308retry:
1309        cond_resched();
1310        netlink_table_grab();
1311        head = nl_portid_hashfn(hash, portid);
1312        sk_for_each(osk, head) {
1313                if (!table->compare(net, osk))
1314                        continue;
1315                if (nlk_sk(osk)->portid == portid) {
1316                        /* Bind collision, search negative portid values. */
1317                        portid = rover--;
1318                        if (rover > -4097)
1319                                rover = -4097;
1320                        netlink_table_ungrab();
1321                        goto retry;
1322                }
1323        }
1324        netlink_table_ungrab();
1325
1326        err = netlink_insert(sk, net, portid);
1327        if (err == -EADDRINUSE)
1328                goto retry;
1329
1330        /* If 2 threads race to autobind, that is fine.  */
1331        if (err == -EBUSY)
1332                err = 0;
1333
1334        return err;
1335}
1336
1337static inline int netlink_capable(const struct socket *sock, unsigned int flag)
1338{
1339        return (nl_table[sock->sk->sk_protocol].flags & flag) ||
1340                ns_capable(sock_net(sock->sk)->user_ns, CAP_NET_ADMIN);
1341}
1342
1343static void
1344netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions)
1345{
1346        struct netlink_sock *nlk = nlk_sk(sk);
1347
1348        if (nlk->subscriptions && !subscriptions)
1349                __sk_del_bind_node(sk);
1350        else if (!nlk->subscriptions && subscriptions)
1351                sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list);
1352        nlk->subscriptions = subscriptions;
1353}
1354
1355static int netlink_realloc_groups(struct sock *sk)
1356{
1357        struct netlink_sock *nlk = nlk_sk(sk);
1358        unsigned int groups;
1359        unsigned long *new_groups;
1360        int err = 0;
1361
1362        netlink_table_grab();
1363
1364        groups = nl_table[sk->sk_protocol].groups;
1365        if (!nl_table[sk->sk_protocol].registered) {
1366                err = -ENOENT;
1367                goto out_unlock;
1368        }
1369
1370        if (nlk->ngroups >= groups)
1371                goto out_unlock;
1372
1373        new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC);
1374        if (new_groups == NULL) {
1375                err = -ENOMEM;
1376                goto out_unlock;
1377        }
1378        memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0,
1379               NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups));
1380
1381        nlk->groups = new_groups;
1382        nlk->ngroups = groups;
1383 out_unlock:
1384        netlink_table_ungrab();
1385        return err;
1386}
1387
1388static int netlink_bind(struct socket *sock, struct sockaddr *addr,
1389                        int addr_len)
1390{
1391        struct sock *sk = sock->sk;
1392        struct net *net = sock_net(sk);
1393        struct netlink_sock *nlk = nlk_sk(sk);
1394        struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
1395        int err;
1396
1397        if (addr_len < sizeof(struct sockaddr_nl))
1398                return -EINVAL;
1399
1400        if (nladdr->nl_family != AF_NETLINK)
1401                return -EINVAL;
1402
1403        /* Only superuser is allowed to listen multicasts */
1404        if (nladdr->nl_groups) {
1405                if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV))
1406                        return -EPERM;
1407                err = netlink_realloc_groups(sk);
1408                if (err)
1409                        return err;
1410        }
1411
1412        if (nlk->portid) {
1413                if (nladdr->nl_pid != nlk->portid)
1414                        return -EINVAL;
1415        } else {
1416                err = nladdr->nl_pid ?
1417                        netlink_insert(sk, net, nladdr->nl_pid) :
1418                        netlink_autobind(sock);
1419                if (err)
1420                        return err;
1421        }
1422
1423        if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0]))
1424                return 0;
1425
1426        netlink_table_grab();
1427        netlink_update_subscriptions(sk, nlk->subscriptions +
1428                                         hweight32(nladdr->nl_groups) -
1429                                         hweight32(nlk->groups[0]));
1430        nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups;
1431        netlink_update_listeners(sk);
1432        netlink_table_ungrab();
1433
1434        if (nlk->netlink_bind && nlk->groups[0]) {
1435                int i;
1436
1437                for (i=0; i<nlk->ngroups; i++) {
1438                        if (test_bit(i, nlk->groups))
1439                                nlk->netlink_bind(i);
1440                }
1441        }
1442
1443        return 0;
1444}
1445
1446static int netlink_connect(struct socket *sock, struct sockaddr *addr,
1447                           int alen, int flags)
1448{
1449        int err = 0;
1450        struct sock *sk = sock->sk;
1451        struct netlink_sock *nlk = nlk_sk(sk);
1452        struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
1453
1454        if (alen < sizeof(addr->sa_family))
1455                return -EINVAL;
1456
1457        if (addr->sa_family == AF_UNSPEC) {
1458                sk->sk_state    = NETLINK_UNCONNECTED;
1459                nlk->dst_portid = 0;
1460                nlk->dst_group  = 0;
1461                return 0;
1462        }
1463        if (addr->sa_family != AF_NETLINK)
1464                return -EINVAL;
1465
1466        /* Only superuser is allowed to send multicasts */
1467        if (nladdr->nl_groups && !netlink_capable(sock, NL_CFG_F_NONROOT_SEND))
1468                return -EPERM;
1469
1470        if (!nlk->portid)
1471                err = netlink_autobind(sock);
1472
1473        if (err == 0) {
1474                sk->sk_state    = NETLINK_CONNECTED;
1475                nlk->dst_portid = nladdr->nl_pid;
1476                nlk->dst_group  = ffs(nladdr->nl_groups);
1477        }
1478
1479        return err;
1480}
1481
1482static int netlink_getname(struct socket *sock, struct sockaddr *addr,
1483                           int *addr_len, int peer)
1484{
1485        struct sock *sk = sock->sk;
1486        struct netlink_sock *nlk = nlk_sk(sk);
1487        DECLARE_SOCKADDR(struct sockaddr_nl *, nladdr, addr);
1488
1489        nladdr->nl_family = AF_NETLINK;
1490        nladdr->nl_pad = 0;
1491        *addr_len = sizeof(*nladdr);
1492
1493        if (peer) {
1494                nladdr->nl_pid = nlk->dst_portid;
1495                nladdr->nl_groups = netlink_group_mask(nlk->dst_group);
1496        } else {
1497                nladdr->nl_pid = nlk->portid;
1498                nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
1499        }
1500        return 0;
1501}
1502
1503static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid)
1504{
1505        struct sock *sock;
1506        struct netlink_sock *nlk;
1507
1508        sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, portid);
1509        if (!sock)
1510                return ERR_PTR(-ECONNREFUSED);
1511
1512        /* Don't bother queuing skb if kernel socket has no input function */
1513        nlk = nlk_sk(sock);
1514        if (sock->sk_state == NETLINK_CONNECTED &&
1515            nlk->dst_portid != nlk_sk(ssk)->portid) {
1516                sock_put(sock);
1517                return ERR_PTR(-ECONNREFUSED);
1518        }
1519        return sock;
1520}
1521
1522struct sock *netlink_getsockbyfilp(struct file *filp)
1523{
1524        struct inode *inode = file_inode(filp);
1525        struct sock *sock;
1526
1527        if (!S_ISSOCK(inode->i_mode))
1528                return ERR_PTR(-ENOTSOCK);
1529
1530        sock = SOCKET_I(inode)->sk;
1531        if (sock->sk_family != AF_NETLINK)
1532                return ERR_PTR(-EINVAL);
1533
1534        sock_hold(sock);
1535        return sock;
1536}
1537
1538static struct sk_buff *netlink_alloc_large_skb(unsigned int size,
1539                                               int broadcast)
1540{
1541        struct sk_buff *skb;
1542        void *data;
1543
1544        if (size <= NLMSG_GOODSIZE || broadcast)
1545                return alloc_skb(size, GFP_KERNEL);
1546
1547        size = SKB_DATA_ALIGN(size) +
1548               SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1549
1550        data = vmalloc(size);
1551        if (data == NULL)
1552                return NULL;
1553
1554        skb = build_skb(data, size);
1555        if (skb == NULL)
1556                vfree(data);
1557        else {
1558                skb->head_frag = 0;
1559                skb->destructor = netlink_skb_destructor;
1560        }
1561
1562        return skb;
1563}
1564
1565/*
1566 * Attach a skb to a netlink socket.
1567 * The caller must hold a reference to the destination socket. On error, the
1568 * reference is dropped. The skb is not send to the destination, just all
1569 * all error checks are performed and memory in the queue is reserved.
1570 * Return values:
1571 * < 0: error. skb freed, reference to sock dropped.
1572 * 0: continue
1573 * 1: repeat lookup - reference dropped while waiting for socket memory.
1574 */
1575int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
1576                      long *timeo, struct sock *ssk)
1577{
1578        struct netlink_sock *nlk;
1579
1580        nlk = nlk_sk(sk);
1581
1582        if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
1583             test_bit(NETLINK_CONGESTED, &nlk->state)) &&
1584            !netlink_skb_is_mmaped(skb)) {
1585                DECLARE_WAITQUEUE(wait, current);
1586                if (!*timeo) {
1587                        if (!ssk || netlink_is_kernel(ssk))
1588                                netlink_overrun(sk);
1589                        sock_put(sk);
1590                        kfree_skb(skb);
1591                        return -EAGAIN;
1592                }
1593
1594                __set_current_state(TASK_INTERRUPTIBLE);
1595                add_wait_queue(&nlk->wait, &wait);
1596
1597                if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
1598                     test_bit(NETLINK_CONGESTED, &nlk->state)) &&
1599                    !sock_flag(sk, SOCK_DEAD))
1600                        *timeo = schedule_timeout(*timeo);
1601
1602                __set_current_state(TASK_RUNNING);
1603                remove_wait_queue(&nlk->wait, &wait);
1604                sock_put(sk);
1605
1606                if (signal_pending(current)) {
1607                        kfree_skb(skb);
1608                        return sock_intr_errno(*timeo);
1609                }
1610                return 1;
1611        }
1612        netlink_skb_set_owner_r(skb, sk);
1613        return 0;
1614}
1615
1616static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
1617{
1618        int len = skb->len;
1619
1620        netlink_deliver_tap(skb);
1621
1622#ifdef CONFIG_NETLINK_MMAP
1623        if (netlink_skb_is_mmaped(skb))
1624                netlink_queue_mmaped_skb(sk, skb);
1625        else if (netlink_rx_is_mmaped(sk))
1626                netlink_ring_set_copied(sk, skb);
1627        else
1628#endif /* CONFIG_NETLINK_MMAP */
1629                skb_queue_tail(&sk->sk_receive_queue, skb);
1630        sk->sk_data_ready(sk, len);
1631        return len;
1632}
1633
1634int netlink_sendskb(struct sock *sk, struct sk_buff *skb)
1635{
1636        int len = __netlink_sendskb(sk, skb);
1637
1638        sock_put(sk);
1639        return len;
1640}
1641
1642void netlink_detachskb(struct sock *sk, struct sk_buff *skb)
1643{
1644        kfree_skb(skb);
1645        sock_put(sk);
1646}
1647
1648static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
1649{
1650        int delta;
1651
1652        WARN_ON(skb->sk != NULL);
1653        if (netlink_skb_is_mmaped(skb))
1654                return skb;
1655
1656        delta = skb->end - skb->tail;
1657        if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize)
1658                return skb;
1659
1660        if (skb_shared(skb)) {
1661                struct sk_buff *nskb = skb_clone(skb, allocation);
1662                if (!nskb)
1663                        return skb;
1664                consume_skb(skb);
1665                skb = nskb;
1666        }
1667
1668        if (!pskb_expand_head(skb, 0, -delta, allocation))
1669                skb->truesize -= delta;
1670
1671        return skb;
1672}
1673
1674static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
1675                                  struct sock *ssk)
1676{
1677        int ret;
1678        struct netlink_sock *nlk = nlk_sk(sk);
1679
1680        ret = -ECONNREFUSED;
1681        if (nlk->netlink_rcv != NULL) {
1682                /* We could do a netlink_deliver_tap(skb) here as well
1683                 * but since this is intended for the kernel only, we
1684                 * should rather let it stay under the hood.
1685                 */
1686
1687                ret = skb->len;
1688                netlink_skb_set_owner_r(skb, sk);
1689                NETLINK_CB(skb).sk = ssk;
1690                nlk->netlink_rcv(skb);
1691                consume_skb(skb);
1692        } else {
1693                kfree_skb(skb);
1694        }
1695        sock_put(sk);
1696        return ret;
1697}
1698
1699int netlink_unicast(struct sock *ssk, struct sk_buff *skb,
1700                    u32 portid, int nonblock)
1701{
1702        struct sock *sk;
1703        int err;
1704        long timeo;
1705
1706        skb = netlink_trim(skb, gfp_any());
1707
1708        timeo = sock_sndtimeo(ssk, nonblock);
1709retry:
1710        sk = netlink_getsockbyportid(ssk, portid);
1711        if (IS_ERR(sk)) {
1712                kfree_skb(skb);
1713                return PTR_ERR(sk);
1714        }
1715        if (netlink_is_kernel(sk))
1716                return netlink_unicast_kernel(sk, skb, ssk);
1717
1718        if (sk_filter(sk, skb)) {
1719                err = skb->len;
1720                kfree_skb(skb);
1721                sock_put(sk);
1722                return err;
1723        }
1724
1725        err = netlink_attachskb(sk, skb, &timeo, ssk);
1726        if (err == 1)
1727                goto retry;
1728        if (err)
1729                return err;
1730
1731        return netlink_sendskb(sk, skb);
1732}
1733EXPORT_SYMBOL(netlink_unicast);
1734
1735struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,
1736                                  u32 dst_portid, gfp_t gfp_mask)
1737{
1738#ifdef CONFIG_NETLINK_MMAP
1739        struct sock *sk = NULL;
1740        struct sk_buff *skb;
1741        struct netlink_ring *ring;
1742        struct nl_mmap_hdr *hdr;
1743        unsigned int maxlen;
1744
1745        sk = netlink_getsockbyportid(ssk, dst_portid);
1746        if (IS_ERR(sk))
1747                goto out;
1748
1749        ring = &nlk_sk(sk)->rx_ring;
1750        /* fast-path without atomic ops for common case: non-mmaped receiver */
1751        if (ring->pg_vec == NULL)
1752                goto out_put;
1753
1754        skb = alloc_skb_head(gfp_mask);
1755        if (skb == NULL)
1756                goto err1;
1757
1758        spin_lock_bh(&sk->sk_receive_queue.lock);
1759        /* check again under lock */
1760        if (ring->pg_vec == NULL)
1761                goto out_free;
1762
1763        maxlen = ring->frame_size - NL_MMAP_HDRLEN;
1764        if (maxlen < size)
1765                goto out_free;
1766
1767        netlink_forward_ring(ring);
1768        hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
1769        if (hdr == NULL)
1770                goto err2;
1771        netlink_ring_setup_skb(skb, sk, ring, hdr);
1772        netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
1773        atomic_inc(&ring->pending);
1774        netlink_increment_head(ring);
1775
1776        spin_unlock_bh(&sk->sk_receive_queue.lock);
1777        return skb;
1778
1779err2:
1780        kfree_skb(skb);
1781        spin_unlock_bh(&sk->sk_receive_queue.lock);
1782        netlink_overrun(sk);
1783err1:
1784        sock_put(sk);
1785        return NULL;
1786
1787out_free:
1788        kfree_skb(skb);
1789        spin_unlock_bh(&sk->sk_receive_queue.lock);
1790out_put:
1791        sock_put(sk);
1792out:
1793#endif
1794        return alloc_skb(size, gfp_mask);
1795}
1796EXPORT_SYMBOL_GPL(netlink_alloc_skb);
1797
1798int netlink_has_listeners(struct sock *sk, unsigned int group)
1799{
1800        int res = 0;
1801        struct listeners *listeners;
1802
1803        BUG_ON(!netlink_is_kernel(sk));
1804
1805        rcu_read_lock();
1806        listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners);
1807
1808        if (listeners && group - 1 < nl_table[sk->sk_protocol].groups)
1809                res = test_bit(group - 1, listeners->masks);
1810
1811        rcu_read_unlock();
1812
1813        return res;
1814}
1815EXPORT_SYMBOL_GPL(netlink_has_listeners);
1816
1817static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
1818{
1819        struct netlink_sock *nlk = nlk_sk(sk);
1820
1821        if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
1822            !test_bit(NETLINK_CONGESTED, &nlk->state)) {
1823                netlink_skb_set_owner_r(skb, sk);
1824                __netlink_sendskb(sk, skb);
1825                return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
1826        }
1827        return -1;
1828}
1829
1830struct netlink_broadcast_data {
1831        struct sock *exclude_sk;
1832        struct net *net;
1833        u32 portid;
1834        u32 group;
1835        int failure;
1836        int delivery_failure;
1837        int congested;
1838        int delivered;
1839        gfp_t allocation;
1840        struct sk_buff *skb, *skb2;
1841        int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data);
1842        void *tx_data;
1843};
1844
1845static int do_one_broadcast(struct sock *sk,
1846                                   struct netlink_broadcast_data *p)
1847{
1848        struct netlink_sock *nlk = nlk_sk(sk);
1849        int val;
1850
1851        if (p->exclude_sk == sk)
1852                goto out;
1853
1854        if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups ||
1855            !test_bit(p->group - 1, nlk->groups))
1856                goto out;
1857
1858        if (!net_eq(sock_net(sk), p->net))
1859                goto out;
1860
1861        if (p->failure) {
1862                netlink_overrun(sk);
1863                goto out;
1864        }
1865
1866        sock_hold(sk);
1867        if (p->skb2 == NULL) {
1868                if (skb_shared(p->skb)) {
1869                        p->skb2 = skb_clone(p->skb, p->allocation);
1870                } else {
1871                        p->skb2 = skb_get(p->skb);
1872                        /*
1873                         * skb ownership may have been set when
1874                         * delivered to a previous socket.
1875                         */
1876                        skb_orphan(p->skb2);
1877                }
1878        }
1879        if (p->skb2 == NULL) {
1880                netlink_overrun(sk);
1881                /* Clone failed. Notify ALL listeners. */
1882                p->failure = 1;
1883                if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
1884                        p->delivery_failure = 1;
1885        } else if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
1886                kfree_skb(p->skb2);
1887                p->skb2 = NULL;
1888        } else if (sk_filter(sk, p->skb2)) {
1889                kfree_skb(p->skb2);
1890                p->skb2 = NULL;
1891        } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) {
1892                netlink_overrun(sk);
1893                if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
1894                        p->delivery_failure = 1;
1895        } else {
1896                p->congested |= val;
1897                p->delivered = 1;
1898                p->skb2 = NULL;
1899        }
1900        sock_put(sk);
1901
1902out:
1903        return 0;
1904}
1905
1906int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid,
1907        u32 group, gfp_t allocation,
1908        int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
1909        void *filter_data)
1910{
1911        struct net *net = sock_net(ssk);
1912        struct netlink_broadcast_data info;
1913        struct sock *sk;
1914
1915        skb = netlink_trim(skb, allocation);
1916
1917        info.exclude_sk = ssk;
1918        info.net = net;
1919        info.portid = portid;
1920        info.group = group;
1921        info.failure = 0;
1922        info.delivery_failure = 0;
1923        info.congested = 0;
1924        info.delivered = 0;
1925        info.allocation = allocation;
1926        info.skb = skb;
1927        info.skb2 = NULL;
1928        info.tx_filter = filter;
1929        info.tx_data = filter_data;
1930
1931        /* While we sleep in clone, do not allow to change socket list */
1932
1933        netlink_lock_table();
1934
1935        sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list)
1936                do_one_broadcast(sk, &info);
1937
1938        consume_skb(skb);
1939
1940        netlink_unlock_table();
1941
1942        if (info.delivery_failure) {
1943                kfree_skb(info.skb2);
1944                return -ENOBUFS;
1945        }
1946        consume_skb(info.skb2);
1947
1948        if (info.delivered) {
1949                if (info.congested && (allocation & __GFP_WAIT))
1950                        yield();
1951                return 0;
1952        }
1953        return -ESRCH;
1954}
1955EXPORT_SYMBOL(netlink_broadcast_filtered);
1956
1957int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid,
1958                      u32 group, gfp_t allocation)
1959{
1960        return netlink_broadcast_filtered(ssk, skb, portid, group, allocation,
1961                NULL, NULL);
1962}
1963EXPORT_SYMBOL(netlink_broadcast);
1964
1965struct netlink_set_err_data {
1966        struct sock *exclude_sk;
1967        u32 portid;
1968        u32 group;
1969        int code;
1970};
1971
1972static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p)
1973{
1974        struct netlink_sock *nlk = nlk_sk(sk);
1975        int ret = 0;
1976
1977        if (sk == p->exclude_sk)
1978                goto out;
1979
1980        if (!net_eq(sock_net(sk), sock_net(p->exclude_sk)))
1981                goto out;
1982
1983        if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups ||
1984            !test_bit(p->group - 1, nlk->groups))
1985                goto out;
1986
1987        if (p->code == ENOBUFS && nlk->flags & NETLINK_RECV_NO_ENOBUFS) {
1988                ret = 1;
1989                goto out;
1990        }
1991
1992        sk->sk_err = p->code;
1993        sk->sk_error_report(sk);
1994out:
1995        return ret;
1996}
1997
1998/**
1999 * netlink_set_err - report error to broadcast listeners
2000 * @ssk: the kernel netlink socket, as returned by netlink_kernel_create()
2001 * @portid: the PORTID of a process that we want to skip (if any)
2002 * @groups: the broadcast group that will notice the error
2003 * @code: error code, must be negative (as usual in kernelspace)
2004 *
2005 * This function returns the number of broadcast listeners that have set the
2006 * NETLINK_RECV_NO_ENOBUFS socket option.
2007 */
2008int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code)
2009{
2010        struct netlink_set_err_data info;
2011        struct sock *sk;
2012        int ret = 0;
2013
2014        info.exclude_sk = ssk;
2015        info.portid = portid;
2016        info.group = group;
2017        /* sk->sk_err wants a positive error value */
2018        info.code = -code;
2019
2020        read_lock(&nl_table_lock);
2021
2022        sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list)
2023                ret += do_one_set_err(sk, &info);
2024
2025        read_unlock(&nl_table_lock);
2026        return ret;
2027}
2028EXPORT_SYMBOL(netlink_set_err);
2029
2030/* must be called with netlink table grabbed */
2031static void netlink_update_socket_mc(struct netlink_sock *nlk,
2032                                     unsigned int group,
2033                                     int is_new)
2034{
2035        int old, new = !!is_new, subscriptions;
2036
2037        old = test_bit(group - 1, nlk->groups);
2038        subscriptions = nlk->subscriptions - old + new;
2039        if (new)
2040                __set_bit(group - 1, nlk->groups);
2041        else
2042                __clear_bit(group - 1, nlk->groups);
2043        netlink_update_subscriptions(&nlk->sk, subscriptions);
2044        netlink_update_listeners(&nlk->sk);
2045}
2046
2047static int netlink_setsockopt(struct socket *sock, int level, int optname,
2048                              char __user *optval, unsigned int optlen)
2049{
2050        struct sock *sk = sock->sk;
2051        struct netlink_sock *nlk = nlk_sk(sk);
2052        unsigned int val = 0;
2053        int err;
2054
2055        if (level != SOL_NETLINK)
2056                return -ENOPROTOOPT;
2057
2058        if (optname != NETLINK_RX_RING && optname != NETLINK_TX_RING &&
2059            optlen >= sizeof(int) &&
2060            get_user(val, (unsigned int __user *)optval))
2061                return -EFAULT;
2062
2063        switch (optname) {
2064        case NETLINK_PKTINFO:
2065                if (val)
2066                        nlk->flags |= NETLINK_RECV_PKTINFO;
2067                else
2068                        nlk->flags &= ~NETLINK_RECV_PKTINFO;
2069                err = 0;
2070                break;
2071        case NETLINK_ADD_MEMBERSHIP:
2072        case NETLINK_DROP_MEMBERSHIP: {
2073                if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV))
2074                        return -EPERM;
2075                err = netlink_realloc_groups(sk);
2076                if (err)
2077                        return err;
2078                if (!val || val - 1 >= nlk->ngroups)
2079                        return -EINVAL;
2080                netlink_table_grab();
2081                netlink_update_socket_mc(nlk, val,
2082                                         optname == NETLINK_ADD_MEMBERSHIP);
2083                netlink_table_ungrab();
2084
2085                if (nlk->netlink_bind)
2086                        nlk->netlink_bind(val);
2087
2088                err = 0;
2089                break;
2090        }
2091        case NETLINK_BROADCAST_ERROR:
2092                if (val)
2093                        nlk->flags |= NETLINK_BROADCAST_SEND_ERROR;
2094                else
2095                        nlk->flags &= ~NETLINK_BROADCAST_SEND_ERROR;
2096                err = 0;
2097                break;
2098        case NETLINK_NO_ENOBUFS:
2099                if (val) {
2100                        nlk->flags |= NETLINK_RECV_NO_ENOBUFS;
2101                        clear_bit(NETLINK_CONGESTED, &nlk->state);
2102                        wake_up_interruptible(&nlk->wait);
2103                } else {
2104                        nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS;
2105                }
2106                err = 0;
2107                break;
2108#ifdef CONFIG_NETLINK_MMAP
2109        case NETLINK_RX_RING:
2110        case NETLINK_TX_RING: {
2111                struct nl_mmap_req req;
2112
2113                /* Rings might consume more memory than queue limits, require
2114                 * CAP_NET_ADMIN.
2115                 */
2116                if (!capable(CAP_NET_ADMIN))
2117                        return -EPERM;
2118                if (optlen < sizeof(req))
2119                        return -EINVAL;
2120                if (copy_from_user(&req, optval, sizeof(req)))
2121                        return -EFAULT;
2122                err = netlink_set_ring(sk, &req, false,
2123                                       optname == NETLINK_TX_RING);
2124                break;
2125        }
2126#endif /* CONFIG_NETLINK_MMAP */
2127        default:
2128                err = -ENOPROTOOPT;
2129        }
2130        return err;
2131}
2132
2133static int netlink_getsockopt(struct socket *sock, int level, int optname,
2134                              char __user *optval, int __user *optlen)
2135{
2136        struct sock *sk = sock->sk;
2137        struct netlink_sock *nlk = nlk_sk(sk);
2138        int len, val, err;
2139
2140        if (level != SOL_NETLINK)
2141                return -ENOPROTOOPT;
2142
2143        if (get_user(len, optlen))
2144                return -EFAULT;
2145        if (len < 0)
2146                return -EINVAL;
2147
2148        switch (optname) {
2149        case NETLINK_PKTINFO:
2150                if (len < sizeof(int))
2151                        return -EINVAL;
2152                len = sizeof(int);
2153                val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0;
2154                if (put_user(len, optlen) ||
2155                    put_user(val, optval))
2156                        return -EFAULT;
2157                err = 0;
2158                break;
2159        case NETLINK_BROADCAST_ERROR:
2160                if (len < sizeof(int))
2161                        return -EINVAL;
2162                len = sizeof(int);
2163                val = nlk->flags & NETLINK_BROADCAST_SEND_ERROR ? 1 : 0;
2164                if (put_user(len, optlen) ||
2165                    put_user(val, optval))
2166                        return -EFAULT;
2167                err = 0;
2168                break;
2169        case NETLINK_NO_ENOBUFS:
2170                if (len < sizeof(int))
2171                        return -EINVAL;
2172                len = sizeof(int);
2173                val = nlk->flags & NETLINK_RECV_NO_ENOBUFS ? 1 : 0;
2174                if (put_user(len, optlen) ||
2175                    put_user(val, optval))
2176                        return -EFAULT;
2177                err = 0;
2178                break;
2179        default:
2180                err = -ENOPROTOOPT;
2181        }
2182        return err;
2183}
2184
2185static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
2186{
2187        struct nl_pktinfo info;
2188
2189        info.group = NETLINK_CB(skb).dst_group;
2190        put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info);
2191}
2192
2193static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
2194                           struct msghdr *msg, size_t len)
2195{
2196        struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
2197        struct sock *sk = sock->sk;
2198        struct netlink_sock *nlk = nlk_sk(sk);
2199        struct sockaddr_nl *addr = msg->msg_name;
2200        u32 dst_portid;
2201        u32 dst_group;
2202        struct sk_buff *skb;
2203        int err;
2204        struct scm_cookie scm;
2205
2206        if (msg->msg_flags&MSG_OOB)
2207                return -EOPNOTSUPP;
2208
2209        if (NULL == siocb->scm)
2210                siocb->scm = &scm;
2211
2212        err = scm_send(sock, msg, siocb->scm, true);
2213        if (err < 0)
2214                return err;
2215
2216        if (msg->msg_namelen) {
2217                err = -EINVAL;
2218                if (addr->nl_family != AF_NETLINK)
2219                        goto out;
2220                dst_portid = addr->nl_pid;
2221                dst_group = ffs(addr->nl_groups);
2222                err =  -EPERM;
2223                if ((dst_group || dst_portid) &&
2224                    !netlink_capable(sock, NL_CFG_F_NONROOT_SEND))
2225                        goto out;
2226        } else {
2227                dst_portid = nlk->dst_portid;
2228                dst_group = nlk->dst_group;
2229        }
2230
2231        if (!nlk->portid) {
2232                err = netlink_autobind(sock);
2233                if (err)
2234                        goto out;
2235        }
2236
2237        if (netlink_tx_is_mmaped(sk) &&
2238            msg->msg_iov->iov_base == NULL) {
2239                err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group,
2240                                           siocb);
2241                goto out;
2242        }
2243
2244        err = -EMSGSIZE;
2245        if (len > sk->sk_sndbuf - 32)
2246                goto out;
2247        err = -ENOBUFS;
2248        skb = netlink_alloc_large_skb(len, dst_group);
2249        if (skb == NULL)
2250                goto out;
2251
2252        NETLINK_CB(skb).portid  = nlk->portid;
2253        NETLINK_CB(skb).dst_group = dst_group;
2254        NETLINK_CB(skb).creds   = siocb->scm->creds;
2255
2256        err = -EFAULT;
2257        if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
2258                kfree_skb(skb);
2259                goto out;
2260        }
2261
2262        err = security_netlink_send(sk, skb);
2263        if (err) {
2264                kfree_skb(skb);
2265                goto out;
2266        }
2267
2268        if (dst_group) {
2269                atomic_inc(&skb->users);
2270                netlink_broadcast(sk, skb, dst_portid, dst_group, GFP_KERNEL);
2271        }
2272        err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags&MSG_DONTWAIT);
2273
2274out:
2275        scm_destroy(siocb->scm);
2276        return err;
2277}
2278
2279static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
2280                           struct msghdr *msg, size_t len,
2281                           int flags)
2282{
2283        struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
2284        struct scm_cookie scm;
2285        struct sock *sk = sock->sk;
2286        struct netlink_sock *nlk = nlk_sk(sk);
2287        int noblock = flags&MSG_DONTWAIT;
2288        size_t copied;
2289        struct sk_buff *skb, *data_skb;
2290        int err, ret;
2291
2292        if (flags&MSG_OOB)
2293                return -EOPNOTSUPP;
2294
2295        copied = 0;
2296
2297        skb = skb_recv_datagram(sk, flags, noblock, &err);
2298        if (skb == NULL)
2299                goto out;
2300
2301        data_skb = skb;
2302
2303#ifdef CONFIG_COMPAT_NETLINK_MESSAGES
2304        if (unlikely(skb_shinfo(skb)->frag_list)) {
2305                /*
2306                 * If this skb has a frag_list, then here that means that we
2307                 * will have to use the frag_list skb's data for compat tasks
2308                 * and the regular skb's data for normal (non-compat) tasks.
2309                 *
2310                 * If we need to send the compat skb, assign it to the
2311                 * 'data_skb' variable so that it will be used below for data
2312                 * copying. We keep 'skb' for everything else, including
2313                 * freeing both later.
2314                 */
2315                if (flags & MSG_CMSG_COMPAT)
2316                        data_skb = skb_shinfo(skb)->frag_list;
2317        }
2318#endif
2319
2320        msg->msg_namelen = 0;
2321
2322        copied = data_skb->len;
2323        if (len < copied) {
2324                msg->msg_flags |= MSG_TRUNC;
2325                copied = len;
2326        }
2327
2328        skb_reset_transport_header(data_skb);
2329        err = skb_copy_datagram_iovec(data_skb, 0, msg->msg_iov, copied);
2330
2331        if (msg->msg_name) {
2332                struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name;
2333                addr->nl_family = AF_NETLINK;
2334                addr->nl_pad    = 0;
2335                addr->nl_pid    = NETLINK_CB(skb).portid;
2336                addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group);
2337                msg->msg_namelen = sizeof(*addr);
2338        }
2339
2340        if (nlk->flags & NETLINK_RECV_PKTINFO)
2341                netlink_cmsg_recv_pktinfo(msg, skb);
2342
2343        if (NULL == siocb->scm) {
2344                memset(&scm, 0, sizeof(scm));
2345                siocb->scm = &scm;
2346        }
2347        siocb->scm->creds = *NETLINK_CREDS(skb);
2348        if (flags & MSG_TRUNC)
2349                copied = data_skb->len;
2350
2351        skb_free_datagram(sk, skb);
2352
2353        if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
2354                ret = netlink_dump(sk);
2355                if (ret) {
2356                        sk->sk_err = ret;
2357                        sk->sk_error_report(sk);
2358                }
2359        }
2360
2361        scm_recv(sock, msg, siocb->scm, flags);
2362out:
2363        netlink_rcv_wake(sk);
2364        return err ? : copied;
2365}
2366
2367static void netlink_data_ready(struct sock *sk, int len)
2368{
2369        BUG();
2370}
2371
2372/*
2373 *      We export these functions to other modules. They provide a
2374 *      complete set of kernel non-blocking support for message
2375 *      queueing.
2376 */
2377
2378struct sock *
2379__netlink_kernel_create(struct net *net, int unit, struct module *module,
2380                        struct netlink_kernel_cfg *cfg)
2381{
2382        struct socket *sock;
2383        struct sock *sk;
2384        struct netlink_sock *nlk;
2385        struct listeners *listeners = NULL;
2386        struct mutex *cb_mutex = cfg ? cfg->cb_mutex : NULL;
2387        unsigned int groups;
2388
2389        BUG_ON(!nl_table);
2390
2391        if (unit < 0 || unit >= MAX_LINKS)
2392                return NULL;
2393
2394        if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
2395                return NULL;
2396
2397        /*
2398         * We have to just have a reference on the net from sk, but don't
2399         * get_net it. Besides, we cannot get and then put the net here.
2400         * So we create one inside init_net and the move it to net.
2401         */
2402
2403        if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0)
2404                goto out_sock_release_nosk;
2405
2406        sk = sock->sk;
2407        sk_change_net(sk, net);
2408
2409        if (!cfg || cfg->groups < 32)
2410                groups = 32;
2411        else
2412                groups = cfg->groups;
2413
2414        listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
2415        if (!listeners)
2416                goto out_sock_release;
2417
2418        sk->sk_data_ready = netlink_data_ready;
2419        if (cfg && cfg->input)
2420                nlk_sk(sk)->netlink_rcv = cfg->input;
2421
2422        if (netlink_insert(sk, net, 0))
2423                goto out_sock_release;
2424
2425        nlk = nlk_sk(sk);
2426        nlk->flags |= NETLINK_KERNEL_SOCKET;
2427
2428        netlink_table_grab();
2429        if (!nl_table[unit].registered) {
2430                nl_table[unit].groups = groups;
2431                rcu_assign_pointer(nl_table[unit].listeners, listeners);
2432                nl_table[unit].cb_mutex = cb_mutex;
2433                nl_table[unit].module = module;
2434                if (cfg) {
2435                        nl_table[unit].bind = cfg->bind;
2436                        nl_table[unit].flags = cfg->flags;
2437                        if (cfg->compare)
2438                                nl_table[unit].compare = cfg->compare;
2439                }
2440                nl_table[unit].registered = 1;
2441        } else {
2442                kfree(listeners);
2443                nl_table[unit].registered++;
2444        }
2445        netlink_table_ungrab();
2446        return sk;
2447
2448out_sock_release:
2449        kfree(listeners);
2450        netlink_kernel_release(sk);
2451        return NULL;
2452
2453out_sock_release_nosk:
2454        sock_release(sock);
2455        return NULL;
2456}
2457EXPORT_SYMBOL(__netlink_kernel_create);
2458
2459void
2460netlink_kernel_release(struct sock *sk)
2461{
2462        sk_release_kernel(sk);
2463}
2464EXPORT_SYMBOL(netlink_kernel_release);
2465
2466int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
2467{
2468        struct listeners *new, *old;
2469        struct netlink_table *tbl = &nl_table[sk->sk_protocol];
2470
2471        if (groups < 32)
2472                groups = 32;
2473
2474        if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) {
2475                new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC);
2476                if (!new)
2477                        return -ENOMEM;
2478                old = nl_deref_protected(tbl->listeners);
2479                memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups));
2480                rcu_assign_pointer(tbl->listeners, new);
2481
2482                kfree_rcu(old, rcu);
2483        }
2484        tbl->groups = groups;
2485
2486        return 0;
2487}
2488
2489/**
2490 * netlink_change_ngroups - change number of multicast groups
2491 *
2492 * This changes the number of multicast groups that are available
2493 * on a certain netlink family. Note that it is not possible to
2494 * change the number of groups to below 32. Also note that it does
2495 * not implicitly call netlink_clear_multicast_users() when the
2496 * number of groups is reduced.
2497 *
2498 * @sk: The kernel netlink socket, as returned by netlink_kernel_create().
2499 * @groups: The new number of groups.
2500 */
2501int netlink_change_ngroups(struct sock *sk, unsigned int groups)
2502{
2503        int err;
2504
2505        netlink_table_grab();
2506        err = __netlink_change_ngroups(sk, groups);
2507        netlink_table_ungrab();
2508
2509        return err;
2510}
2511
2512void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
2513{
2514        struct sock *sk;
2515        struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
2516
2517        sk_for_each_bound(sk, &tbl->mc_list)
2518                netlink_update_socket_mc(nlk_sk(sk), group, 0);
2519}
2520
2521/**
2522 * netlink_clear_multicast_users - kick off multicast listeners
2523 *
2524 * This function removes all listeners from the given group.
2525 * @ksk: The kernel netlink socket, as returned by
2526 *      netlink_kernel_create().
2527 * @group: The multicast group to clear.
2528 */
2529void netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
2530{
2531        netlink_table_grab();
2532        __netlink_clear_multicast_users(ksk, group);
2533        netlink_table_ungrab();
2534}
2535
2536struct nlmsghdr *
2537__nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags)
2538{
2539        struct nlmsghdr *nlh;
2540        int size = nlmsg_msg_size(len);
2541
2542        nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size));
2543        nlh->nlmsg_type = type;
2544        nlh->nlmsg_len = size;
2545        nlh->nlmsg_flags = flags;
2546        nlh->nlmsg_pid = portid;
2547        nlh->nlmsg_seq = seq;
2548        if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0)
2549                memset(nlmsg_data(nlh) + len, 0, NLMSG_ALIGN(size) - size);
2550        return nlh;
2551}
2552EXPORT_SYMBOL(__nlmsg_put);
2553
2554/*
2555 * It looks a bit ugly.
2556 * It would be better to create kernel thread.
2557 */
2558
2559static int netlink_dump(struct sock *sk)
2560{
2561        struct netlink_sock *nlk = nlk_sk(sk);
2562        struct netlink_callback *cb;
2563        struct sk_buff *skb = NULL;
2564        struct nlmsghdr *nlh;
2565        int len, err = -ENOBUFS;
2566        int alloc_size;
2567
2568        mutex_lock(nlk->cb_mutex);
2569
2570        cb = nlk->cb;
2571        if (cb == NULL) {
2572                err = -EINVAL;
2573                goto errout_skb;
2574        }
2575
2576        alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE);
2577
2578        if (!netlink_rx_is_mmaped(sk) &&
2579            atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
2580                goto errout_skb;
2581        skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, GFP_KERNEL);
2582        if (!skb)
2583                goto errout_skb;
2584        netlink_skb_set_owner_r(skb, sk);
2585
2586        len = cb->dump(skb, cb);
2587
2588        if (len > 0) {
2589                mutex_unlock(nlk->cb_mutex);
2590
2591                if (sk_filter(sk, skb))
2592                        kfree_skb(skb);
2593                else
2594                        __netlink_sendskb(sk, skb);
2595                return 0;
2596        }
2597
2598        nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
2599        if (!nlh)
2600                goto errout_skb;
2601
2602        nl_dump_check_consistent(cb, nlh);
2603
2604        memcpy(nlmsg_data(nlh), &len, sizeof(len));
2605
2606        if (sk_filter(sk, skb))
2607                kfree_skb(skb);
2608        else
2609                __netlink_sendskb(sk, skb);
2610
2611        if (cb->done)
2612                cb->done(cb);
2613        nlk->cb = NULL;
2614        mutex_unlock(nlk->cb_mutex);
2615
2616        module_put(cb->module);
2617        netlink_consume_callback(cb);
2618        return 0;
2619
2620errout_skb:
2621        mutex_unlock(nlk->cb_mutex);
2622        kfree_skb(skb);
2623        return err;
2624}
2625
2626int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
2627                         const struct nlmsghdr *nlh,
2628                         struct netlink_dump_control *control)
2629{
2630        struct netlink_callback *cb;
2631        struct sock *sk;
2632        struct netlink_sock *nlk;
2633        int ret;
2634
2635        cb = kzalloc(sizeof(*cb), GFP_KERNEL);
2636        if (cb == NULL)
2637                return -ENOBUFS;
2638
2639        /* Memory mapped dump requests need to be copied to avoid looping
2640         * on the pending state in netlink_mmap_sendmsg() while the CB hold
2641         * a reference to the skb.
2642         */
2643        if (netlink_skb_is_mmaped(skb)) {
2644                skb = skb_copy(skb, GFP_KERNEL);
2645                if (skb == NULL) {
2646                        kfree(cb);
2647                        return -ENOBUFS;
2648                }
2649        } else
2650                atomic_inc(&skb->users);
2651
2652        cb->dump = control->dump;
2653        cb->done = control->done;
2654        cb->nlh = nlh;
2655        cb->data = control->data;
2656        cb->module = control->module;
2657        cb->min_dump_alloc = control->min_dump_alloc;
2658        cb->skb = skb;
2659
2660        sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid);
2661        if (sk == NULL) {
2662                netlink_destroy_callback(cb);
2663                return -ECONNREFUSED;
2664        }
2665        nlk = nlk_sk(sk);
2666
2667        mutex_lock(nlk->cb_mutex);
2668        /* A dump is in progress... */
2669        if (nlk->cb) {
2670                mutex_unlock(nlk->cb_mutex);
2671                netlink_destroy_callback(cb);
2672                ret = -EBUSY;
2673                goto out;
2674        }
2675        /* add reference of module which cb->dump belongs to */
2676        if (!try_module_get(cb->module)) {
2677                mutex_unlock(nlk->cb_mutex);
2678                netlink_destroy_callback(cb);
2679                ret = -EPROTONOSUPPORT;
2680                goto out;
2681        }
2682
2683        nlk->cb = cb;
2684        mutex_unlock(nlk->cb_mutex);
2685
2686        ret = netlink_dump(sk);
2687out:
2688        sock_put(sk);
2689
2690        if (ret)
2691                return ret;
2692
2693        /* We successfully started a dump, by returning -EINTR we
2694         * signal not to send ACK even if it was requested.
2695         */
2696        return -EINTR;
2697}
2698EXPORT_SYMBOL(__netlink_dump_start);
2699
2700void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
2701{
2702        struct sk_buff *skb;
2703        struct nlmsghdr *rep;
2704        struct nlmsgerr *errmsg;
2705        size_t payload = sizeof(*errmsg);
2706
2707        /* error messages get the original request appened */
2708        if (err)
2709                payload += nlmsg_len(nlh);
2710
2711        skb = netlink_alloc_skb(in_skb->sk, nlmsg_total_size(payload),
2712                                NETLINK_CB(in_skb).portid, GFP_KERNEL);
2713        if (!skb) {
2714                struct sock *sk;
2715
2716                sk = netlink_lookup(sock_net(in_skb->sk),
2717                                    in_skb->sk->sk_protocol,
2718                                    NETLINK_CB(in_skb).portid);
2719                if (sk) {
2720                        sk->sk_err = ENOBUFS;
2721                        sk->sk_error_report(sk);
2722                        sock_put(sk);
2723                }
2724                return;
2725        }
2726
2727        rep = __nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
2728                          NLMSG_ERROR, payload, 0);
2729        errmsg = nlmsg_data(rep);
2730        errmsg->error = err;
2731        memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh));
2732        netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid, MSG_DONTWAIT);
2733}
2734EXPORT_SYMBOL(netlink_ack);
2735
2736int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
2737                                                     struct nlmsghdr *))
2738{
2739        struct nlmsghdr *nlh;
2740        int err;
2741
2742        while (skb->len >= nlmsg_total_size(0)) {
2743                int msglen;
2744
2745                nlh = nlmsg_hdr(skb);
2746                err = 0;
2747
2748                if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
2749                        return 0;
2750
2751                /* Only requests are handled by the kernel */
2752                if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
2753                        goto ack;
2754
2755                /* Skip control messages */
2756                if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
2757                        goto ack;
2758
2759                err = cb(skb, nlh);
2760                if (err == -EINTR)
2761                        goto skip;
2762
2763ack:
2764                if (nlh->nlmsg_flags & NLM_F_ACK || err)
2765                        netlink_ack(skb, nlh, err);
2766
2767skip:
2768                msglen = NLMSG_ALIGN(nlh->nlmsg_len);
2769                if (msglen > skb->len)
2770                        msglen = skb->len;
2771                skb_pull(skb, msglen);
2772        }
2773
2774        return 0;
2775}
2776EXPORT_SYMBOL(netlink_rcv_skb);
2777
2778/**
2779 * nlmsg_notify - send a notification netlink message
2780 * @sk: netlink socket to use
2781 * @skb: notification message
2782 * @portid: destination netlink portid for reports or 0
2783 * @group: destination multicast group or 0
2784 * @report: 1 to report back, 0 to disable
2785 * @flags: allocation flags
2786 */
2787int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid,
2788                 unsigned int group, int report, gfp_t flags)
2789{
2790        int err = 0;
2791
2792        if (group) {
2793                int exclude_portid = 0;
2794
2795                if (report) {
2796                        atomic_inc(&skb->users);
2797                        exclude_portid = portid;
2798                }
2799
2800                /* errors reported via destination sk->sk_err, but propagate
2801                 * delivery errors if NETLINK_BROADCAST_ERROR flag is set */
2802                err = nlmsg_multicast(sk, skb, exclude_portid, group, flags);
2803        }
2804
2805        if (report) {
2806                int err2;
2807
2808                err2 = nlmsg_unicast(sk, skb, portid);
2809                if (!err || err == -ESRCH)
2810                        err = err2;
2811        }
2812
2813        return err;
2814}
2815EXPORT_SYMBOL(nlmsg_notify);
2816
2817#ifdef CONFIG_PROC_FS
2818struct nl_seq_iter {
2819        struct seq_net_private p;
2820        int link;
2821        int hash_idx;
2822};
2823
2824static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
2825{
2826        struct nl_seq_iter *iter = seq->private;
2827        int i, j;
2828        struct sock *s;
2829        loff_t off = 0;
2830
2831        for (i = 0; i < MAX_LINKS; i++) {
2832                struct nl_portid_hash *hash = &nl_table[i].hash;
2833
2834                for (j = 0; j <= hash->mask; j++) {
2835                        sk_for_each(s, &hash->table[j]) {
2836                                if (sock_net(s) != seq_file_net(seq))
2837                                        continue;
2838                                if (off == pos) {
2839                                        iter->link = i;
2840                                        iter->hash_idx = j;
2841                                        return s;
2842                                }
2843                                ++off;
2844                        }
2845                }
2846        }
2847        return NULL;
2848}
2849
2850static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
2851        __acquires(nl_table_lock)
2852{
2853        read_lock(&nl_table_lock);
2854        return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2855}
2856
2857static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2858{
2859        struct sock *s;
2860        struct nl_seq_iter *iter;
2861        struct net *net;
2862        int i, j;
2863
2864        ++*pos;
2865
2866        if (v == SEQ_START_TOKEN)
2867                return netlink_seq_socket_idx(seq, 0);
2868
2869        net = seq_file_net(seq);
2870        iter = seq->private;
2871        s = v;
2872        do {
2873                s = sk_next(s);
2874        } while (s && !nl_table[s->sk_protocol].compare(net, s));
2875        if (s)
2876                return s;
2877
2878        i = iter->link;
2879        j = iter->hash_idx + 1;
2880
2881        do {
2882                struct nl_portid_hash *hash = &nl_table[i].hash;
2883
2884                for (; j <= hash->mask; j++) {
2885                        s = sk_head(&hash->table[j]);
2886
2887                        while (s && !nl_table[s->sk_protocol].compare(net, s))
2888                                s = sk_next(s);
2889                        if (s) {
2890                                iter->link = i;
2891                                iter->hash_idx = j;
2892                                return s;
2893                        }
2894                }
2895
2896                j = 0;
2897        } while (++i < MAX_LINKS);
2898
2899        return NULL;
2900}
2901
2902static void netlink_seq_stop(struct seq_file *seq, void *v)
2903        __releases(nl_table_lock)
2904{
2905        read_unlock(&nl_table_lock);
2906}
2907
2908
2909static int netlink_seq_show(struct seq_file *seq, void *v)
2910{
2911        if (v == SEQ_START_TOKEN) {
2912                seq_puts(seq,
2913                         "sk       Eth Pid    Groups   "
2914                         "Rmem     Wmem     Dump     Locks     Drops     Inode\n");
2915        } else {
2916                struct sock *s = v;
2917                struct netlink_sock *nlk = nlk_sk(s);
2918
2919                seq_printf(seq, "%pK %-3d %-6u %08x %-8d %-8d %pK %-8d %-8d %-8lu\n",
2920                           s,
2921                           s->sk_protocol,
2922                           nlk->portid,
2923                           nlk->groups ? (u32)nlk->groups[0] : 0,
2924                           sk_rmem_alloc_get(s),
2925                           sk_wmem_alloc_get(s),
2926                           nlk->cb,
2927                           atomic_read(&s->sk_refcnt),
2928                           atomic_read(&s->sk_drops),
2929                           sock_i_ino(s)
2930                        );
2931
2932        }
2933        return 0;
2934}
2935
2936static const struct seq_operations netlink_seq_ops = {
2937        .start  = netlink_seq_start,
2938        .next   = netlink_seq_next,
2939        .stop   = netlink_seq_stop,
2940        .show   = netlink_seq_show,
2941};
2942
2943
2944static int netlink_seq_open(struct inode *inode, struct file *file)
2945{
2946        return seq_open_net(inode, file, &netlink_seq_ops,
2947                                sizeof(struct nl_seq_iter));
2948}
2949
2950static const struct file_operations netlink_seq_fops = {
2951        .owner          = THIS_MODULE,
2952        .open           = netlink_seq_open,
2953        .read           = seq_read,
2954        .llseek         = seq_lseek,
2955        .release        = seq_release_net,
2956};
2957
2958#endif
2959
2960int netlink_register_notifier(struct notifier_block *nb)
2961{
2962        return atomic_notifier_chain_register(&netlink_chain, nb);
2963}
2964EXPORT_SYMBOL(netlink_register_notifier);
2965
2966int netlink_unregister_notifier(struct notifier_block *nb)
2967{
2968        return atomic_notifier_chain_unregister(&netlink_chain, nb);
2969}
2970EXPORT_SYMBOL(netlink_unregister_notifier);
2971
2972static const struct proto_ops netlink_ops = {
2973        .family =       PF_NETLINK,
2974        .owner =        THIS_MODULE,
2975        .release =      netlink_release,
2976        .bind =         netlink_bind,
2977        .connect =      netlink_connect,
2978        .socketpair =   sock_no_socketpair,
2979        .accept =       sock_no_accept,
2980        .getname =      netlink_getname,
2981        .poll =         netlink_poll,
2982        .ioctl =        sock_no_ioctl,
2983        .listen =       sock_no_listen,
2984        .shutdown =     sock_no_shutdown,
2985        .setsockopt =   netlink_setsockopt,
2986        .getsockopt =   netlink_getsockopt,
2987        .sendmsg =      netlink_sendmsg,
2988        .recvmsg =      netlink_recvmsg,
2989        .mmap =         netlink_mmap,
2990        .sendpage =     sock_no_sendpage,
2991};
2992
2993static const struct net_proto_family netlink_family_ops = {
2994        .family = PF_NETLINK,
2995        .create = netlink_create,
2996        .owner  = THIS_MODULE,  /* for consistency 8) */
2997};
2998
2999static int __net_init netlink_net_init(struct net *net)
3000{
3001#ifdef CONFIG_PROC_FS
3002        if (!proc_create("netlink", 0, net->proc_net, &netlink_seq_fops))
3003                return -ENOMEM;
3004#endif
3005        return 0;
3006}
3007
3008static void __net_exit netlink_net_exit(struct net *net)
3009{
3010#ifdef CONFIG_PROC_FS
3011        remove_proc_entry("netlink", net->proc_net);
3012#endif
3013}
3014
3015static void __init netlink_add_usersock_entry(void)
3016{
3017        struct listeners *listeners;
3018        int groups = 32;
3019
3020        listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
3021        if (!listeners)
3022                panic("netlink_add_usersock_entry: Cannot allocate listeners\n");
3023
3024        netlink_table_grab();
3025
3026        nl_table[NETLINK_USERSOCK].groups = groups;
3027        rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners);
3028        nl_table[NETLINK_USERSOCK].module = THIS_MODULE;
3029        nl_table[NETLINK_USERSOCK].registered = 1;
3030        nl_table[NETLINK_USERSOCK].flags = NL_CFG_F_NONROOT_SEND;
3031
3032        netlink_table_ungrab();
3033}
3034
3035static struct pernet_operations __net_initdata netlink_net_ops = {
3036        .init = netlink_net_init,
3037        .exit = netlink_net_exit,
3038};
3039
3040static int __init netlink_proto_init(void)
3041{
3042        int i;
3043        unsigned long limit;
3044        unsigned int order;
3045        int err = proto_register(&netlink_proto, 0);
3046
3047        if (err != 0)
3048                goto out;
3049
3050        BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
3051
3052        nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL);
3053        if (!nl_table)
3054                goto panic;
3055
3056        if (totalram_pages >= (128 * 1024))
3057                limit = totalram_pages >> (21 - PAGE_SHIFT);
3058        else
3059                limit = totalram_pages >> (23 - PAGE_SHIFT);
3060
3061        order = get_bitmask_order(limit) - 1 + PAGE_SHIFT;
3062        limit = (1UL << order) / sizeof(struct hlist_head);
3063        order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1;
3064
3065        for (i = 0; i < MAX_LINKS; i++) {
3066                struct nl_portid_hash *hash = &nl_table[i].hash;
3067
3068                hash->table = nl_portid_hash_zalloc(1 * sizeof(*hash->table));
3069                if (!hash->table) {
3070                        while (i-- > 0)
3071                                nl_portid_hash_free(nl_table[i].hash.table,
3072                                                 1 * sizeof(*hash->table));
3073                        kfree(nl_table);
3074                        goto panic;
3075                }
3076                hash->max_shift = order;
3077                hash->shift = 0;
3078                hash->mask = 0;
3079                hash->rehash_time = jiffies;
3080
3081                nl_table[i].compare = netlink_compare;
3082        }
3083
3084        INIT_LIST_HEAD(&netlink_tap_all);
3085
3086        netlink_add_usersock_entry();
3087
3088        sock_register(&netlink_family_ops);
3089        register_pernet_subsys(&netlink_net_ops);
3090        /* The netlink device handler may be needed early. */
3091        rtnetlink_init();
3092out:
3093        return err;
3094panic:
3095        panic("netlink_init: Cannot allocate nl_table\n");
3096}
3097
3098core_initcall(netlink_proto_init);
3099