linux/net/netlink/af_netlink.c
<<
>>
Prefs
   1/*
   2 * NETLINK      Kernel-user communication protocol.
   3 *
   4 *              Authors:        Alan Cox <alan@lxorguk.ukuu.org.uk>
   5 *                              Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
   6 *                              Patrick McHardy <kaber@trash.net>
   7 *
   8 *              This program is free software; you can redistribute it and/or
   9 *              modify it under the terms of the GNU General Public License
  10 *              as published by the Free Software Foundation; either version
  11 *              2 of the License, or (at your option) any later version.
  12 *
  13 * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith
  14 *                               added netlink_proto_exit
  15 * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br>
  16 *                               use nlk_sk, as sk->protinfo is on a diet 8)
  17 * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org>
  18 *                               - inc module use count of module that owns
  19 *                                 the kernel socket in case userspace opens
  20 *                                 socket of same protocol
  21 *                               - remove all module support, since netlink is
  22 *                                 mandatory if CONFIG_NET=y these days
  23 */
  24
  25#include <linux/module.h>
  26
  27#include <linux/capability.h>
  28#include <linux/kernel.h>
  29#include <linux/init.h>
  30#include <linux/signal.h>
  31#include <linux/sched.h>
  32#include <linux/errno.h>
  33#include <linux/string.h>
  34#include <linux/stat.h>
  35#include <linux/socket.h>
  36#include <linux/un.h>
  37#include <linux/fcntl.h>
  38#include <linux/termios.h>
  39#include <linux/sockios.h>
  40#include <linux/net.h>
  41#include <linux/fs.h>
  42#include <linux/slab.h>
  43#include <asm/uaccess.h>
  44#include <linux/skbuff.h>
  45#include <linux/netdevice.h>
  46#include <linux/rtnetlink.h>
  47#include <linux/proc_fs.h>
  48#include <linux/seq_file.h>
  49#include <linux/notifier.h>
  50#include <linux/security.h>
  51#include <linux/jhash.h>
  52#include <linux/jiffies.h>
  53#include <linux/random.h>
  54#include <linux/bitops.h>
  55#include <linux/mm.h>
  56#include <linux/types.h>
  57#include <linux/audit.h>
  58#include <linux/mutex.h>
  59#include <linux/vmalloc.h>
  60#include <asm/cacheflush.h>
  61
  62#include <net/net_namespace.h>
  63#include <net/sock.h>
  64#include <net/scm.h>
  65#include <net/netlink.h>
  66
  67#include "af_netlink.h"
  68
  69struct listeners {
  70        struct rcu_head         rcu;
  71        unsigned long           masks[0];
  72};
  73
  74/* state bits */
  75#define NETLINK_CONGESTED       0x0
  76
  77/* flags */
  78#define NETLINK_KERNEL_SOCKET   0x1
  79#define NETLINK_RECV_PKTINFO    0x2
  80#define NETLINK_BROADCAST_SEND_ERROR    0x4
  81#define NETLINK_RECV_NO_ENOBUFS 0x8
  82
  83static inline int netlink_is_kernel(struct sock *sk)
  84{
  85        return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET;
  86}
  87
  88struct netlink_table *nl_table;
  89EXPORT_SYMBOL_GPL(nl_table);
  90
  91static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
  92
  93static int netlink_dump(struct sock *sk);
  94static void netlink_skb_destructor(struct sk_buff *skb);
  95
  96DEFINE_RWLOCK(nl_table_lock);
  97EXPORT_SYMBOL_GPL(nl_table_lock);
  98static atomic_t nl_table_users = ATOMIC_INIT(0);
  99
 100#define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock));
 101
 102static ATOMIC_NOTIFIER_HEAD(netlink_chain);
 103
 104static inline u32 netlink_group_mask(u32 group)
 105{
 106        return group ? 1 << (group - 1) : 0;
 107}
 108
 109static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u32 portid)
 110{
 111        return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask];
 112}
 113
 114static void netlink_overrun(struct sock *sk)
 115{
 116        struct netlink_sock *nlk = nlk_sk(sk);
 117
 118        if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) {
 119                if (!test_and_set_bit(NETLINK_CONGESTED, &nlk_sk(sk)->state)) {
 120                        sk->sk_err = ENOBUFS;
 121                        sk->sk_error_report(sk);
 122                }
 123        }
 124        atomic_inc(&sk->sk_drops);
 125}
 126
 127static void netlink_rcv_wake(struct sock *sk)
 128{
 129        struct netlink_sock *nlk = nlk_sk(sk);
 130
 131        if (skb_queue_empty(&sk->sk_receive_queue))
 132                clear_bit(NETLINK_CONGESTED, &nlk->state);
 133        if (!test_bit(NETLINK_CONGESTED, &nlk->state))
 134                wake_up_interruptible(&nlk->wait);
 135}
 136
 137#ifdef CONFIG_NETLINK_MMAP
 138static bool netlink_skb_is_mmaped(const struct sk_buff *skb)
 139{
 140        return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED;
 141}
 142
 143static bool netlink_rx_is_mmaped(struct sock *sk)
 144{
 145        return nlk_sk(sk)->rx_ring.pg_vec != NULL;
 146}
 147
 148static bool netlink_tx_is_mmaped(struct sock *sk)
 149{
 150        return nlk_sk(sk)->tx_ring.pg_vec != NULL;
 151}
 152
 153static __pure struct page *pgvec_to_page(const void *addr)
 154{
 155        if (is_vmalloc_addr(addr))
 156                return vmalloc_to_page(addr);
 157        else
 158                return virt_to_page(addr);
 159}
 160
 161static void free_pg_vec(void **pg_vec, unsigned int order, unsigned int len)
 162{
 163        unsigned int i;
 164
 165        for (i = 0; i < len; i++) {
 166                if (pg_vec[i] != NULL) {
 167                        if (is_vmalloc_addr(pg_vec[i]))
 168                                vfree(pg_vec[i]);
 169                        else
 170                                free_pages((unsigned long)pg_vec[i], order);
 171                }
 172        }
 173        kfree(pg_vec);
 174}
 175
 176static void *alloc_one_pg_vec_page(unsigned long order)
 177{
 178        void *buffer;
 179        gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO |
 180                          __GFP_NOWARN | __GFP_NORETRY;
 181
 182        buffer = (void *)__get_free_pages(gfp_flags, order);
 183        if (buffer != NULL)
 184                return buffer;
 185
 186        buffer = vzalloc((1 << order) * PAGE_SIZE);
 187        if (buffer != NULL)
 188                return buffer;
 189
 190        gfp_flags &= ~__GFP_NORETRY;
 191        return (void *)__get_free_pages(gfp_flags, order);
 192}
 193
 194static void **alloc_pg_vec(struct netlink_sock *nlk,
 195                           struct nl_mmap_req *req, unsigned int order)
 196{
 197        unsigned int block_nr = req->nm_block_nr;
 198        unsigned int i;
 199        void **pg_vec, *ptr;
 200
 201        pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL);
 202        if (pg_vec == NULL)
 203                return NULL;
 204
 205        for (i = 0; i < block_nr; i++) {
 206                pg_vec[i] = ptr = alloc_one_pg_vec_page(order);
 207                if (pg_vec[i] == NULL)
 208                        goto err1;
 209        }
 210
 211        return pg_vec;
 212err1:
 213        free_pg_vec(pg_vec, order, block_nr);
 214        return NULL;
 215}
 216
 217static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
 218                            bool closing, bool tx_ring)
 219{
 220        struct netlink_sock *nlk = nlk_sk(sk);
 221        struct netlink_ring *ring;
 222        struct sk_buff_head *queue;
 223        void **pg_vec = NULL;
 224        unsigned int order = 0;
 225        int err;
 226
 227        ring  = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
 228        queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
 229
 230        if (!closing) {
 231                if (atomic_read(&nlk->mapped))
 232                        return -EBUSY;
 233                if (atomic_read(&ring->pending))
 234                        return -EBUSY;
 235        }
 236
 237        if (req->nm_block_nr) {
 238                if (ring->pg_vec != NULL)
 239                        return -EBUSY;
 240
 241                if ((int)req->nm_block_size <= 0)
 242                        return -EINVAL;
 243                if (!IS_ALIGNED(req->nm_block_size, PAGE_SIZE))
 244                        return -EINVAL;
 245                if (req->nm_frame_size < NL_MMAP_HDRLEN)
 246                        return -EINVAL;
 247                if (!IS_ALIGNED(req->nm_frame_size, NL_MMAP_MSG_ALIGNMENT))
 248                        return -EINVAL;
 249
 250                ring->frames_per_block = req->nm_block_size /
 251                                         req->nm_frame_size;
 252                if (ring->frames_per_block == 0)
 253                        return -EINVAL;
 254                if (ring->frames_per_block * req->nm_block_nr !=
 255                    req->nm_frame_nr)
 256                        return -EINVAL;
 257
 258                order = get_order(req->nm_block_size);
 259                pg_vec = alloc_pg_vec(nlk, req, order);
 260                if (pg_vec == NULL)
 261                        return -ENOMEM;
 262        } else {
 263                if (req->nm_frame_nr)
 264                        return -EINVAL;
 265        }
 266
 267        err = -EBUSY;
 268        mutex_lock(&nlk->pg_vec_lock);
 269        if (closing || atomic_read(&nlk->mapped) == 0) {
 270                err = 0;
 271                spin_lock_bh(&queue->lock);
 272
 273                ring->frame_max         = req->nm_frame_nr - 1;
 274                ring->head              = 0;
 275                ring->frame_size        = req->nm_frame_size;
 276                ring->pg_vec_pages      = req->nm_block_size / PAGE_SIZE;
 277
 278                swap(ring->pg_vec_len, req->nm_block_nr);
 279                swap(ring->pg_vec_order, order);
 280                swap(ring->pg_vec, pg_vec);
 281
 282                __skb_queue_purge(queue);
 283                spin_unlock_bh(&queue->lock);
 284
 285                WARN_ON(atomic_read(&nlk->mapped));
 286        }
 287        mutex_unlock(&nlk->pg_vec_lock);
 288
 289        if (pg_vec)
 290                free_pg_vec(pg_vec, order, req->nm_block_nr);
 291        return err;
 292}
 293
 294static void netlink_mm_open(struct vm_area_struct *vma)
 295{
 296        struct file *file = vma->vm_file;
 297        struct socket *sock = file->private_data;
 298        struct sock *sk = sock->sk;
 299
 300        if (sk)
 301                atomic_inc(&nlk_sk(sk)->mapped);
 302}
 303
 304static void netlink_mm_close(struct vm_area_struct *vma)
 305{
 306        struct file *file = vma->vm_file;
 307        struct socket *sock = file->private_data;
 308        struct sock *sk = sock->sk;
 309
 310        if (sk)
 311                atomic_dec(&nlk_sk(sk)->mapped);
 312}
 313
 314static const struct vm_operations_struct netlink_mmap_ops = {
 315        .open   = netlink_mm_open,
 316        .close  = netlink_mm_close,
 317};
 318
 319static int netlink_mmap(struct file *file, struct socket *sock,
 320                        struct vm_area_struct *vma)
 321{
 322        struct sock *sk = sock->sk;
 323        struct netlink_sock *nlk = nlk_sk(sk);
 324        struct netlink_ring *ring;
 325        unsigned long start, size, expected;
 326        unsigned int i;
 327        int err = -EINVAL;
 328
 329        if (vma->vm_pgoff)
 330                return -EINVAL;
 331
 332        mutex_lock(&nlk->pg_vec_lock);
 333
 334        expected = 0;
 335        for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
 336                if (ring->pg_vec == NULL)
 337                        continue;
 338                expected += ring->pg_vec_len * ring->pg_vec_pages * PAGE_SIZE;
 339        }
 340
 341        if (expected == 0)
 342                goto out;
 343
 344        size = vma->vm_end - vma->vm_start;
 345        if (size != expected)
 346                goto out;
 347
 348        start = vma->vm_start;
 349        for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
 350                if (ring->pg_vec == NULL)
 351                        continue;
 352
 353                for (i = 0; i < ring->pg_vec_len; i++) {
 354                        struct page *page;
 355                        void *kaddr = ring->pg_vec[i];
 356                        unsigned int pg_num;
 357
 358                        for (pg_num = 0; pg_num < ring->pg_vec_pages; pg_num++) {
 359                                page = pgvec_to_page(kaddr);
 360                                err = vm_insert_page(vma, start, page);
 361                                if (err < 0)
 362                                        goto out;
 363                                start += PAGE_SIZE;
 364                                kaddr += PAGE_SIZE;
 365                        }
 366                }
 367        }
 368
 369        atomic_inc(&nlk->mapped);
 370        vma->vm_ops = &netlink_mmap_ops;
 371        err = 0;
 372out:
 373        mutex_unlock(&nlk->pg_vec_lock);
 374        return err;
 375}
 376
 377static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr)
 378{
 379#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
 380        struct page *p_start, *p_end;
 381
 382        /* First page is flushed through netlink_{get,set}_status */
 383        p_start = pgvec_to_page(hdr + PAGE_SIZE);
 384        p_end   = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + hdr->nm_len - 1);
 385        while (p_start <= p_end) {
 386                flush_dcache_page(p_start);
 387                p_start++;
 388        }
 389#endif
 390}
 391
 392static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr)
 393{
 394        smp_rmb();
 395        flush_dcache_page(pgvec_to_page(hdr));
 396        return hdr->nm_status;
 397}
 398
 399static void netlink_set_status(struct nl_mmap_hdr *hdr,
 400                               enum nl_mmap_status status)
 401{
 402        hdr->nm_status = status;
 403        flush_dcache_page(pgvec_to_page(hdr));
 404        smp_wmb();
 405}
 406
 407static struct nl_mmap_hdr *
 408__netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos)
 409{
 410        unsigned int pg_vec_pos, frame_off;
 411
 412        pg_vec_pos = pos / ring->frames_per_block;
 413        frame_off  = pos % ring->frames_per_block;
 414
 415        return ring->pg_vec[pg_vec_pos] + (frame_off * ring->frame_size);
 416}
 417
 418static struct nl_mmap_hdr *
 419netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos,
 420                     enum nl_mmap_status status)
 421{
 422        struct nl_mmap_hdr *hdr;
 423
 424        hdr = __netlink_lookup_frame(ring, pos);
 425        if (netlink_get_status(hdr) != status)
 426                return NULL;
 427
 428        return hdr;
 429}
 430
 431static struct nl_mmap_hdr *
 432netlink_current_frame(const struct netlink_ring *ring,
 433                      enum nl_mmap_status status)
 434{
 435        return netlink_lookup_frame(ring, ring->head, status);
 436}
 437
 438static struct nl_mmap_hdr *
 439netlink_previous_frame(const struct netlink_ring *ring,
 440                       enum nl_mmap_status status)
 441{
 442        unsigned int prev;
 443
 444        prev = ring->head ? ring->head - 1 : ring->frame_max;
 445        return netlink_lookup_frame(ring, prev, status);
 446}
 447
 448static void netlink_increment_head(struct netlink_ring *ring)
 449{
 450        ring->head = ring->head != ring->frame_max ? ring->head + 1 : 0;
 451}
 452
 453static void netlink_forward_ring(struct netlink_ring *ring)
 454{
 455        unsigned int head = ring->head, pos = head;
 456        const struct nl_mmap_hdr *hdr;
 457
 458        do {
 459                hdr = __netlink_lookup_frame(ring, pos);
 460                if (hdr->nm_status == NL_MMAP_STATUS_UNUSED)
 461                        break;
 462                if (hdr->nm_status != NL_MMAP_STATUS_SKIP)
 463                        break;
 464                netlink_increment_head(ring);
 465        } while (ring->head != head);
 466}
 467
 468static bool netlink_dump_space(struct netlink_sock *nlk)
 469{
 470        struct netlink_ring *ring = &nlk->rx_ring;
 471        struct nl_mmap_hdr *hdr;
 472        unsigned int n;
 473
 474        hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
 475        if (hdr == NULL)
 476                return false;
 477
 478        n = ring->head + ring->frame_max / 2;
 479        if (n > ring->frame_max)
 480                n -= ring->frame_max;
 481
 482        hdr = __netlink_lookup_frame(ring, n);
 483
 484        return hdr->nm_status == NL_MMAP_STATUS_UNUSED;
 485}
 486
 487static unsigned int netlink_poll(struct file *file, struct socket *sock,
 488                                 poll_table *wait)
 489{
 490        struct sock *sk = sock->sk;
 491        struct netlink_sock *nlk = nlk_sk(sk);
 492        unsigned int mask;
 493        int err;
 494
 495        if (nlk->rx_ring.pg_vec != NULL) {
 496                /* Memory mapped sockets don't call recvmsg(), so flow control
 497                 * for dumps is performed here. A dump is allowed to continue
 498                 * if at least half the ring is unused.
 499                 */
 500                while (nlk->cb != NULL && netlink_dump_space(nlk)) {
 501                        err = netlink_dump(sk);
 502                        if (err < 0) {
 503                                sk->sk_err = err;
 504                                sk->sk_error_report(sk);
 505                                break;
 506                        }
 507                }
 508                netlink_rcv_wake(sk);
 509        }
 510
 511        mask = datagram_poll(file, sock, wait);
 512
 513        spin_lock_bh(&sk->sk_receive_queue.lock);
 514        if (nlk->rx_ring.pg_vec) {
 515                netlink_forward_ring(&nlk->rx_ring);
 516                if (!netlink_previous_frame(&nlk->rx_ring, NL_MMAP_STATUS_UNUSED))
 517                        mask |= POLLIN | POLLRDNORM;
 518        }
 519        spin_unlock_bh(&sk->sk_receive_queue.lock);
 520
 521        spin_lock_bh(&sk->sk_write_queue.lock);
 522        if (nlk->tx_ring.pg_vec) {
 523                if (netlink_current_frame(&nlk->tx_ring, NL_MMAP_STATUS_UNUSED))
 524                        mask |= POLLOUT | POLLWRNORM;
 525        }
 526        spin_unlock_bh(&sk->sk_write_queue.lock);
 527
 528        return mask;
 529}
 530
 531static struct nl_mmap_hdr *netlink_mmap_hdr(struct sk_buff *skb)
 532{
 533        return (struct nl_mmap_hdr *)(skb->head - NL_MMAP_HDRLEN);
 534}
 535
 536static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk,
 537                                   struct netlink_ring *ring,
 538                                   struct nl_mmap_hdr *hdr)
 539{
 540        unsigned int size;
 541        void *data;
 542
 543        size = ring->frame_size - NL_MMAP_HDRLEN;
 544        data = (void *)hdr + NL_MMAP_HDRLEN;
 545
 546        skb->head       = data;
 547        skb->data       = data;
 548        skb_reset_tail_pointer(skb);
 549        skb->end        = skb->tail + size;
 550        skb->len        = 0;
 551
 552        skb->destructor = netlink_skb_destructor;
 553        NETLINK_CB(skb).flags |= NETLINK_SKB_MMAPED;
 554        NETLINK_CB(skb).sk = sk;
 555}
 556
 557static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg,
 558                                u32 dst_portid, u32 dst_group,
 559                                struct sock_iocb *siocb)
 560{
 561        struct netlink_sock *nlk = nlk_sk(sk);
 562        struct netlink_ring *ring;
 563        struct nl_mmap_hdr *hdr;
 564        struct sk_buff *skb;
 565        unsigned int maxlen;
 566        bool excl = true;
 567        int err = 0, len = 0;
 568
 569        /* Netlink messages are validated by the receiver before processing.
 570         * In order to avoid userspace changing the contents of the message
 571         * after validation, the socket and the ring may only be used by a
 572         * single process, otherwise we fall back to copying.
 573         */
 574        if (atomic_long_read(&sk->sk_socket->file->f_count) > 2 ||
 575            atomic_read(&nlk->mapped) > 1)
 576                excl = false;
 577
 578        mutex_lock(&nlk->pg_vec_lock);
 579
 580        ring   = &nlk->tx_ring;
 581        maxlen = ring->frame_size - NL_MMAP_HDRLEN;
 582
 583        do {
 584                hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID);
 585                if (hdr == NULL) {
 586                        if (!(msg->msg_flags & MSG_DONTWAIT) &&
 587                            atomic_read(&nlk->tx_ring.pending))
 588                                schedule();
 589                        continue;
 590                }
 591                if (hdr->nm_len > maxlen) {
 592                        err = -EINVAL;
 593                        goto out;
 594                }
 595
 596                netlink_frame_flush_dcache(hdr);
 597
 598                if (likely(dst_portid == 0 && dst_group == 0 && excl)) {
 599                        skb = alloc_skb_head(GFP_KERNEL);
 600                        if (skb == NULL) {
 601                                err = -ENOBUFS;
 602                                goto out;
 603                        }
 604                        sock_hold(sk);
 605                        netlink_ring_setup_skb(skb, sk, ring, hdr);
 606                        NETLINK_CB(skb).flags |= NETLINK_SKB_TX;
 607                        __skb_put(skb, hdr->nm_len);
 608                        netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
 609                        atomic_inc(&ring->pending);
 610                } else {
 611                        skb = alloc_skb(hdr->nm_len, GFP_KERNEL);
 612                        if (skb == NULL) {
 613                                err = -ENOBUFS;
 614                                goto out;
 615                        }
 616                        __skb_put(skb, hdr->nm_len);
 617                        memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, hdr->nm_len);
 618                        netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
 619                }
 620
 621                netlink_increment_head(ring);
 622
 623                NETLINK_CB(skb).portid    = nlk->portid;
 624                NETLINK_CB(skb).dst_group = dst_group;
 625                NETLINK_CB(skb).creds     = siocb->scm->creds;
 626
 627                err = security_netlink_send(sk, skb);
 628                if (err) {
 629                        kfree_skb(skb);
 630                        goto out;
 631                }
 632
 633                if (unlikely(dst_group)) {
 634                        atomic_inc(&skb->users);
 635                        netlink_broadcast(sk, skb, dst_portid, dst_group,
 636                                          GFP_KERNEL);
 637                }
 638                err = netlink_unicast(sk, skb, dst_portid,
 639                                      msg->msg_flags & MSG_DONTWAIT);
 640                if (err < 0)
 641                        goto out;
 642                len += err;
 643
 644        } while (hdr != NULL ||
 645                 (!(msg->msg_flags & MSG_DONTWAIT) &&
 646                  atomic_read(&nlk->tx_ring.pending)));
 647
 648        if (len > 0)
 649                err = len;
 650out:
 651        mutex_unlock(&nlk->pg_vec_lock);
 652        return err;
 653}
 654
 655static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb)
 656{
 657        struct nl_mmap_hdr *hdr;
 658
 659        hdr = netlink_mmap_hdr(skb);
 660        hdr->nm_len     = skb->len;
 661        hdr->nm_group   = NETLINK_CB(skb).dst_group;
 662        hdr->nm_pid     = NETLINK_CB(skb).creds.pid;
 663        hdr->nm_uid     = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
 664        hdr->nm_gid     = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
 665        netlink_frame_flush_dcache(hdr);
 666        netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
 667
 668        NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED;
 669        kfree_skb(skb);
 670}
 671
 672static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb)
 673{
 674        struct netlink_sock *nlk = nlk_sk(sk);
 675        struct netlink_ring *ring = &nlk->rx_ring;
 676        struct nl_mmap_hdr *hdr;
 677
 678        spin_lock_bh(&sk->sk_receive_queue.lock);
 679        hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
 680        if (hdr == NULL) {
 681                spin_unlock_bh(&sk->sk_receive_queue.lock);
 682                kfree_skb(skb);
 683                netlink_overrun(sk);
 684                return;
 685        }
 686        netlink_increment_head(ring);
 687        __skb_queue_tail(&sk->sk_receive_queue, skb);
 688        spin_unlock_bh(&sk->sk_receive_queue.lock);
 689
 690        hdr->nm_len     = skb->len;
 691        hdr->nm_group   = NETLINK_CB(skb).dst_group;
 692        hdr->nm_pid     = NETLINK_CB(skb).creds.pid;
 693        hdr->nm_uid     = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
 694        hdr->nm_gid     = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
 695        netlink_set_status(hdr, NL_MMAP_STATUS_COPY);
 696}
 697
 698#else /* CONFIG_NETLINK_MMAP */
 699#define netlink_skb_is_mmaped(skb)      false
 700#define netlink_rx_is_mmaped(sk)        false
 701#define netlink_tx_is_mmaped(sk)        false
 702#define netlink_mmap                    sock_no_mmap
 703#define netlink_poll                    datagram_poll
 704#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, siocb)     0
 705#endif /* CONFIG_NETLINK_MMAP */
 706
 707static void netlink_destroy_callback(struct netlink_callback *cb)
 708{
 709        kfree_skb(cb->skb);
 710        kfree(cb);
 711}
 712
 713static void netlink_consume_callback(struct netlink_callback *cb)
 714{
 715        consume_skb(cb->skb);
 716        kfree(cb);
 717}
 718
 719static void netlink_skb_destructor(struct sk_buff *skb)
 720{
 721#ifdef CONFIG_NETLINK_MMAP
 722        struct nl_mmap_hdr *hdr;
 723        struct netlink_ring *ring;
 724        struct sock *sk;
 725
 726        /* If a packet from the kernel to userspace was freed because of an
 727         * error without being delivered to userspace, the kernel must reset
 728         * the status. In the direction userspace to kernel, the status is
 729         * always reset here after the packet was processed and freed.
 730         */
 731        if (netlink_skb_is_mmaped(skb)) {
 732                hdr = netlink_mmap_hdr(skb);
 733                sk = NETLINK_CB(skb).sk;
 734
 735                if (NETLINK_CB(skb).flags & NETLINK_SKB_TX) {
 736                        netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
 737                        ring = &nlk_sk(sk)->tx_ring;
 738                } else {
 739                        if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) {
 740                                hdr->nm_len = 0;
 741                                netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
 742                        }
 743                        ring = &nlk_sk(sk)->rx_ring;
 744                }
 745
 746                WARN_ON(atomic_read(&ring->pending) == 0);
 747                atomic_dec(&ring->pending);
 748                sock_put(sk);
 749
 750                skb->head = NULL;
 751        }
 752#endif
 753        if (skb->sk != NULL)
 754                sock_rfree(skb);
 755}
 756
 757static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
 758{
 759        WARN_ON(skb->sk != NULL);
 760        skb->sk = sk;
 761        skb->destructor = netlink_skb_destructor;
 762        atomic_add(skb->truesize, &sk->sk_rmem_alloc);
 763        sk_mem_charge(sk, skb->truesize);
 764}
 765
 766static void netlink_sock_destruct(struct sock *sk)
 767{
 768        struct netlink_sock *nlk = nlk_sk(sk);
 769
 770        if (nlk->cb) {
 771                if (nlk->cb->done)
 772                        nlk->cb->done(nlk->cb);
 773
 774                module_put(nlk->cb->module);
 775                netlink_destroy_callback(nlk->cb);
 776        }
 777
 778        skb_queue_purge(&sk->sk_receive_queue);
 779#ifdef CONFIG_NETLINK_MMAP
 780        if (1) {
 781                struct nl_mmap_req req;
 782
 783                memset(&req, 0, sizeof(req));
 784                if (nlk->rx_ring.pg_vec)
 785                        netlink_set_ring(sk, &req, true, false);
 786                memset(&req, 0, sizeof(req));
 787                if (nlk->tx_ring.pg_vec)
 788                        netlink_set_ring(sk, &req, true, true);
 789        }
 790#endif /* CONFIG_NETLINK_MMAP */
 791
 792        if (!sock_flag(sk, SOCK_DEAD)) {
 793                printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
 794                return;
 795        }
 796
 797        WARN_ON(atomic_read(&sk->sk_rmem_alloc));
 798        WARN_ON(atomic_read(&sk->sk_wmem_alloc));
 799        WARN_ON(nlk_sk(sk)->groups);
 800}
 801
 802/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
 803 * SMP. Look, when several writers sleep and reader wakes them up, all but one
 804 * immediately hit write lock and grab all the cpus. Exclusive sleep solves
 805 * this, _but_ remember, it adds useless work on UP machines.
 806 */
 807
 808void netlink_table_grab(void)
 809        __acquires(nl_table_lock)
 810{
 811        might_sleep();
 812
 813        write_lock_irq(&nl_table_lock);
 814
 815        if (atomic_read(&nl_table_users)) {
 816                DECLARE_WAITQUEUE(wait, current);
 817
 818                add_wait_queue_exclusive(&nl_table_wait, &wait);
 819                for (;;) {
 820                        set_current_state(TASK_UNINTERRUPTIBLE);
 821                        if (atomic_read(&nl_table_users) == 0)
 822                                break;
 823                        write_unlock_irq(&nl_table_lock);
 824                        schedule();
 825                        write_lock_irq(&nl_table_lock);
 826                }
 827
 828                __set_current_state(TASK_RUNNING);
 829                remove_wait_queue(&nl_table_wait, &wait);
 830        }
 831}
 832
 833void netlink_table_ungrab(void)
 834        __releases(nl_table_lock)
 835{
 836        write_unlock_irq(&nl_table_lock);
 837        wake_up(&nl_table_wait);
 838}
 839
 840static inline void
 841netlink_lock_table(void)
 842{
 843        /* read_lock() synchronizes us to netlink_table_grab */
 844
 845        read_lock(&nl_table_lock);
 846        atomic_inc(&nl_table_users);
 847        read_unlock(&nl_table_lock);
 848}
 849
 850static inline void
 851netlink_unlock_table(void)
 852{
 853        if (atomic_dec_and_test(&nl_table_users))
 854                wake_up(&nl_table_wait);
 855}
 856
 857static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid)
 858{
 859        struct nl_portid_hash *hash = &nl_table[protocol].hash;
 860        struct hlist_head *head;
 861        struct sock *sk;
 862
 863        read_lock(&nl_table_lock);
 864        head = nl_portid_hashfn(hash, portid);
 865        sk_for_each(sk, head) {
 866                if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->portid == portid)) {
 867                        sock_hold(sk);
 868                        goto found;
 869                }
 870        }
 871        sk = NULL;
 872found:
 873        read_unlock(&nl_table_lock);
 874        return sk;
 875}
 876
 877static struct hlist_head *nl_portid_hash_zalloc(size_t size)
 878{
 879        if (size <= PAGE_SIZE)
 880                return kzalloc(size, GFP_ATOMIC);
 881        else
 882                return (struct hlist_head *)
 883                        __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
 884                                         get_order(size));
 885}
 886
 887static void nl_portid_hash_free(struct hlist_head *table, size_t size)
 888{
 889        if (size <= PAGE_SIZE)
 890                kfree(table);
 891        else
 892                free_pages((unsigned long)table, get_order(size));
 893}
 894
 895static int nl_portid_hash_rehash(struct nl_portid_hash *hash, int grow)
 896{
 897        unsigned int omask, mask, shift;
 898        size_t osize, size;
 899        struct hlist_head *otable, *table;
 900        int i;
 901
 902        omask = mask = hash->mask;
 903        osize = size = (mask + 1) * sizeof(*table);
 904        shift = hash->shift;
 905
 906        if (grow) {
 907                if (++shift > hash->max_shift)
 908                        return 0;
 909                mask = mask * 2 + 1;
 910                size *= 2;
 911        }
 912
 913        table = nl_portid_hash_zalloc(size);
 914        if (!table)
 915                return 0;
 916
 917        otable = hash->table;
 918        hash->table = table;
 919        hash->mask = mask;
 920        hash->shift = shift;
 921        get_random_bytes(&hash->rnd, sizeof(hash->rnd));
 922
 923        for (i = 0; i <= omask; i++) {
 924                struct sock *sk;
 925                struct hlist_node *tmp;
 926
 927                sk_for_each_safe(sk, tmp, &otable[i])
 928                        __sk_add_node(sk, nl_portid_hashfn(hash, nlk_sk(sk)->portid));
 929        }
 930
 931        nl_portid_hash_free(otable, osize);
 932        hash->rehash_time = jiffies + 10 * 60 * HZ;
 933        return 1;
 934}
 935
 936static inline int nl_portid_hash_dilute(struct nl_portid_hash *hash, int len)
 937{
 938        int avg = hash->entries >> hash->shift;
 939
 940        if (unlikely(avg > 1) && nl_portid_hash_rehash(hash, 1))
 941                return 1;
 942
 943        if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) {
 944                nl_portid_hash_rehash(hash, 0);
 945                return 1;
 946        }
 947
 948        return 0;
 949}
 950
 951static const struct proto_ops netlink_ops;
 952
 953static void
 954netlink_update_listeners(struct sock *sk)
 955{
 956        struct netlink_table *tbl = &nl_table[sk->sk_protocol];
 957        unsigned long mask;
 958        unsigned int i;
 959        struct listeners *listeners;
 960
 961        listeners = nl_deref_protected(tbl->listeners);
 962        if (!listeners)
 963                return;
 964
 965        for (i = 0; i < NLGRPLONGS(tbl->groups); i++) {
 966                mask = 0;
 967                sk_for_each_bound(sk, &tbl->mc_list) {
 968                        if (i < NLGRPLONGS(nlk_sk(sk)->ngroups))
 969                                mask |= nlk_sk(sk)->groups[i];
 970                }
 971                listeners->masks[i] = mask;
 972        }
 973        /* this function is only called with the netlink table "grabbed", which
 974         * makes sure updates are visible before bind or setsockopt return. */
 975}
 976
 977static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
 978{
 979        struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash;
 980        struct hlist_head *head;
 981        int err = -EADDRINUSE;
 982        struct sock *osk;
 983        int len;
 984
 985        netlink_table_grab();
 986        head = nl_portid_hashfn(hash, portid);
 987        len = 0;
 988        sk_for_each(osk, head) {
 989                if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->portid == portid))
 990                        break;
 991                len++;
 992        }
 993        if (osk)
 994                goto err;
 995
 996        err = -EBUSY;
 997        if (nlk_sk(sk)->portid)
 998                goto err;
 999
1000        err = -ENOMEM;
1001        if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX))
1002                goto err;
1003
1004        if (len && nl_portid_hash_dilute(hash, len))
1005                head = nl_portid_hashfn(hash, portid);
1006        hash->entries++;
1007        nlk_sk(sk)->portid = portid;
1008        sk_add_node(sk, head);
1009        err = 0;
1010
1011err:
1012        netlink_table_ungrab();
1013        return err;
1014}
1015
1016static void netlink_remove(struct sock *sk)
1017{
1018        netlink_table_grab();
1019        if (sk_del_node_init(sk))
1020                nl_table[sk->sk_protocol].hash.entries--;
1021        if (nlk_sk(sk)->subscriptions)
1022                __sk_del_bind_node(sk);
1023        netlink_table_ungrab();
1024}
1025
1026static struct proto netlink_proto = {
1027        .name     = "NETLINK",
1028        .owner    = THIS_MODULE,
1029        .obj_size = sizeof(struct netlink_sock),
1030};
1031
1032static int __netlink_create(struct net *net, struct socket *sock,
1033                            struct mutex *cb_mutex, int protocol)
1034{
1035        struct sock *sk;
1036        struct netlink_sock *nlk;
1037
1038        sock->ops = &netlink_ops;
1039
1040        sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto);
1041        if (!sk)
1042                return -ENOMEM;
1043
1044        sock_init_data(sock, sk);
1045
1046        nlk = nlk_sk(sk);
1047        if (cb_mutex) {
1048                nlk->cb_mutex = cb_mutex;
1049        } else {
1050                nlk->cb_mutex = &nlk->cb_def_mutex;
1051                mutex_init(nlk->cb_mutex);
1052        }
1053        init_waitqueue_head(&nlk->wait);
1054#ifdef CONFIG_NETLINK_MMAP
1055        mutex_init(&nlk->pg_vec_lock);
1056#endif
1057
1058        sk->sk_destruct = netlink_sock_destruct;
1059        sk->sk_protocol = protocol;
1060        return 0;
1061}
1062
1063static int netlink_create(struct net *net, struct socket *sock, int protocol,
1064                          int kern)
1065{
1066        struct module *module = NULL;
1067        struct mutex *cb_mutex;
1068        struct netlink_sock *nlk;
1069        void (*bind)(int group);
1070        int err = 0;
1071
1072        sock->state = SS_UNCONNECTED;
1073
1074        if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
1075                return -ESOCKTNOSUPPORT;
1076
1077        if (protocol < 0 || protocol >= MAX_LINKS)
1078                return -EPROTONOSUPPORT;
1079
1080        netlink_lock_table();
1081#ifdef CONFIG_MODULES
1082        if (!nl_table[protocol].registered) {
1083                netlink_unlock_table();
1084                request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol);
1085                netlink_lock_table();
1086        }
1087#endif
1088        if (nl_table[protocol].registered &&
1089            try_module_get(nl_table[protocol].module))
1090                module = nl_table[protocol].module;
1091        else
1092                err = -EPROTONOSUPPORT;
1093        cb_mutex = nl_table[protocol].cb_mutex;
1094        bind = nl_table[protocol].bind;
1095        netlink_unlock_table();
1096
1097        if (err < 0)
1098                goto out;
1099
1100        err = __netlink_create(net, sock, cb_mutex, protocol);
1101        if (err < 0)
1102                goto out_module;
1103
1104        local_bh_disable();
1105        sock_prot_inuse_add(net, &netlink_proto, 1);
1106        local_bh_enable();
1107
1108        nlk = nlk_sk(sock->sk);
1109        nlk->module = module;
1110        nlk->netlink_bind = bind;
1111out:
1112        return err;
1113
1114out_module:
1115        module_put(module);
1116        goto out;
1117}
1118
1119static int netlink_release(struct socket *sock)
1120{
1121        struct sock *sk = sock->sk;
1122        struct netlink_sock *nlk;
1123
1124        if (!sk)
1125                return 0;
1126
1127        netlink_remove(sk);
1128        sock_orphan(sk);
1129        nlk = nlk_sk(sk);
1130
1131        /*
1132         * OK. Socket is unlinked, any packets that arrive now
1133         * will be purged.
1134         */
1135
1136        sock->sk = NULL;
1137        wake_up_interruptible_all(&nlk->wait);
1138
1139        skb_queue_purge(&sk->sk_write_queue);
1140
1141        if (nlk->portid) {
1142                struct netlink_notify n = {
1143                                                .net = sock_net(sk),
1144                                                .protocol = sk->sk_protocol,
1145                                                .portid = nlk->portid,
1146                                          };
1147                atomic_notifier_call_chain(&netlink_chain,
1148                                NETLINK_URELEASE, &n);
1149        }
1150
1151        module_put(nlk->module);
1152
1153        netlink_table_grab();
1154        if (netlink_is_kernel(sk)) {
1155                BUG_ON(nl_table[sk->sk_protocol].registered == 0);
1156                if (--nl_table[sk->sk_protocol].registered == 0) {
1157                        struct listeners *old;
1158
1159                        old = nl_deref_protected(nl_table[sk->sk_protocol].listeners);
1160                        RCU_INIT_POINTER(nl_table[sk->sk_protocol].listeners, NULL);
1161                        kfree_rcu(old, rcu);
1162                        nl_table[sk->sk_protocol].module = NULL;
1163                        nl_table[sk->sk_protocol].bind = NULL;
1164                        nl_table[sk->sk_protocol].flags = 0;
1165                        nl_table[sk->sk_protocol].registered = 0;
1166                }
1167        } else if (nlk->subscriptions) {
1168                netlink_update_listeners(sk);
1169        }
1170        netlink_table_ungrab();
1171
1172        kfree(nlk->groups);
1173        nlk->groups = NULL;
1174
1175        local_bh_disable();
1176        sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
1177        local_bh_enable();
1178        sock_put(sk);
1179        return 0;
1180}
1181
1182static int netlink_autobind(struct socket *sock)
1183{
1184        struct sock *sk = sock->sk;
1185        struct net *net = sock_net(sk);
1186        struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash;
1187        struct hlist_head *head;
1188        struct sock *osk;
1189        s32 portid = task_tgid_vnr(current);
1190        int err;
1191        static s32 rover = -4097;
1192
1193retry:
1194        cond_resched();
1195        netlink_table_grab();
1196        head = nl_portid_hashfn(hash, portid);
1197        sk_for_each(osk, head) {
1198                if (!net_eq(sock_net(osk), net))
1199                        continue;
1200                if (nlk_sk(osk)->portid == portid) {
1201                        /* Bind collision, search negative portid values. */
1202                        portid = rover--;
1203                        if (rover > -4097)
1204                                rover = -4097;
1205                        netlink_table_ungrab();
1206                        goto retry;
1207                }
1208        }
1209        netlink_table_ungrab();
1210
1211        err = netlink_insert(sk, net, portid);
1212        if (err == -EADDRINUSE)
1213                goto retry;
1214
1215        /* If 2 threads race to autobind, that is fine.  */
1216        if (err == -EBUSY)
1217                err = 0;
1218
1219        return err;
1220}
1221
1222static inline int netlink_capable(const struct socket *sock, unsigned int flag)
1223{
1224        return (nl_table[sock->sk->sk_protocol].flags & flag) ||
1225                ns_capable(sock_net(sock->sk)->user_ns, CAP_NET_ADMIN);
1226}
1227
1228static void
1229netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions)
1230{
1231        struct netlink_sock *nlk = nlk_sk(sk);
1232
1233        if (nlk->subscriptions && !subscriptions)
1234                __sk_del_bind_node(sk);
1235        else if (!nlk->subscriptions && subscriptions)
1236                sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list);
1237        nlk->subscriptions = subscriptions;
1238}
1239
1240static int netlink_realloc_groups(struct sock *sk)
1241{
1242        struct netlink_sock *nlk = nlk_sk(sk);
1243        unsigned int groups;
1244        unsigned long *new_groups;
1245        int err = 0;
1246
1247        netlink_table_grab();
1248
1249        groups = nl_table[sk->sk_protocol].groups;
1250        if (!nl_table[sk->sk_protocol].registered) {
1251                err = -ENOENT;
1252                goto out_unlock;
1253        }
1254
1255        if (nlk->ngroups >= groups)
1256                goto out_unlock;
1257
1258        new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC);
1259        if (new_groups == NULL) {
1260                err = -ENOMEM;
1261                goto out_unlock;
1262        }
1263        memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0,
1264               NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups));
1265
1266        nlk->groups = new_groups;
1267        nlk->ngroups = groups;
1268 out_unlock:
1269        netlink_table_ungrab();
1270        return err;
1271}
1272
1273static int netlink_bind(struct socket *sock, struct sockaddr *addr,
1274                        int addr_len)
1275{
1276        struct sock *sk = sock->sk;
1277        struct net *net = sock_net(sk);
1278        struct netlink_sock *nlk = nlk_sk(sk);
1279        struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
1280        int err;
1281
1282        if (addr_len < sizeof(struct sockaddr_nl))
1283                return -EINVAL;
1284
1285        if (nladdr->nl_family != AF_NETLINK)
1286                return -EINVAL;
1287
1288        /* Only superuser is allowed to listen multicasts */
1289        if (nladdr->nl_groups) {
1290                if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV))
1291                        return -EPERM;
1292                err = netlink_realloc_groups(sk);
1293                if (err)
1294                        return err;
1295        }
1296
1297        if (nlk->portid) {
1298                if (nladdr->nl_pid != nlk->portid)
1299                        return -EINVAL;
1300        } else {
1301                err = nladdr->nl_pid ?
1302                        netlink_insert(sk, net, nladdr->nl_pid) :
1303                        netlink_autobind(sock);
1304                if (err)
1305                        return err;
1306        }
1307
1308        if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0]))
1309                return 0;
1310
1311        netlink_table_grab();
1312        netlink_update_subscriptions(sk, nlk->subscriptions +
1313                                         hweight32(nladdr->nl_groups) -
1314                                         hweight32(nlk->groups[0]));
1315        nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups;
1316        netlink_update_listeners(sk);
1317        netlink_table_ungrab();
1318
1319        if (nlk->netlink_bind && nlk->groups[0]) {
1320                int i;
1321
1322                for (i=0; i<nlk->ngroups; i++) {
1323                        if (test_bit(i, nlk->groups))
1324                                nlk->netlink_bind(i);
1325                }
1326        }
1327
1328        return 0;
1329}
1330
1331static int netlink_connect(struct socket *sock, struct sockaddr *addr,
1332                           int alen, int flags)
1333{
1334        int err = 0;
1335        struct sock *sk = sock->sk;
1336        struct netlink_sock *nlk = nlk_sk(sk);
1337        struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
1338
1339        if (alen < sizeof(addr->sa_family))
1340                return -EINVAL;
1341
1342        if (addr->sa_family == AF_UNSPEC) {
1343                sk->sk_state    = NETLINK_UNCONNECTED;
1344                nlk->dst_portid = 0;
1345                nlk->dst_group  = 0;
1346                return 0;
1347        }
1348        if (addr->sa_family != AF_NETLINK)
1349                return -EINVAL;
1350
1351        /* Only superuser is allowed to send multicasts */
1352        if (nladdr->nl_groups && !netlink_capable(sock, NL_CFG_F_NONROOT_SEND))
1353                return -EPERM;
1354
1355        if (!nlk->portid)
1356                err = netlink_autobind(sock);
1357
1358        if (err == 0) {
1359                sk->sk_state    = NETLINK_CONNECTED;
1360                nlk->dst_portid = nladdr->nl_pid;
1361                nlk->dst_group  = ffs(nladdr->nl_groups);
1362        }
1363
1364        return err;
1365}
1366
1367static int netlink_getname(struct socket *sock, struct sockaddr *addr,
1368                           int *addr_len, int peer)
1369{
1370        struct sock *sk = sock->sk;
1371        struct netlink_sock *nlk = nlk_sk(sk);
1372        DECLARE_SOCKADDR(struct sockaddr_nl *, nladdr, addr);
1373
1374        nladdr->nl_family = AF_NETLINK;
1375        nladdr->nl_pad = 0;
1376        *addr_len = sizeof(*nladdr);
1377
1378        if (peer) {
1379                nladdr->nl_pid = nlk->dst_portid;
1380                nladdr->nl_groups = netlink_group_mask(nlk->dst_group);
1381        } else {
1382                nladdr->nl_pid = nlk->portid;
1383                nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
1384        }
1385        return 0;
1386}
1387
1388static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid)
1389{
1390        struct sock *sock;
1391        struct netlink_sock *nlk;
1392
1393        sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, portid);
1394        if (!sock)
1395                return ERR_PTR(-ECONNREFUSED);
1396
1397        /* Don't bother queuing skb if kernel socket has no input function */
1398        nlk = nlk_sk(sock);
1399        if (sock->sk_state == NETLINK_CONNECTED &&
1400            nlk->dst_portid != nlk_sk(ssk)->portid) {
1401                sock_put(sock);
1402                return ERR_PTR(-ECONNREFUSED);
1403        }
1404        return sock;
1405}
1406
1407struct sock *netlink_getsockbyfilp(struct file *filp)
1408{
1409        struct inode *inode = file_inode(filp);
1410        struct sock *sock;
1411
1412        if (!S_ISSOCK(inode->i_mode))
1413                return ERR_PTR(-ENOTSOCK);
1414
1415        sock = SOCKET_I(inode)->sk;
1416        if (sock->sk_family != AF_NETLINK)
1417                return ERR_PTR(-EINVAL);
1418
1419        sock_hold(sock);
1420        return sock;
1421}
1422
1423/*
1424 * Attach a skb to a netlink socket.
1425 * The caller must hold a reference to the destination socket. On error, the
1426 * reference is dropped. The skb is not send to the destination, just all
1427 * all error checks are performed and memory in the queue is reserved.
1428 * Return values:
1429 * < 0: error. skb freed, reference to sock dropped.
1430 * 0: continue
1431 * 1: repeat lookup - reference dropped while waiting for socket memory.
1432 */
1433int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
1434                      long *timeo, struct sock *ssk)
1435{
1436        struct netlink_sock *nlk;
1437
1438        nlk = nlk_sk(sk);
1439
1440        if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
1441             test_bit(NETLINK_CONGESTED, &nlk->state)) &&
1442            !netlink_skb_is_mmaped(skb)) {
1443                DECLARE_WAITQUEUE(wait, current);
1444                if (!*timeo) {
1445                        if (!ssk || netlink_is_kernel(ssk))
1446                                netlink_overrun(sk);
1447                        sock_put(sk);
1448                        kfree_skb(skb);
1449                        return -EAGAIN;
1450                }
1451
1452                __set_current_state(TASK_INTERRUPTIBLE);
1453                add_wait_queue(&nlk->wait, &wait);
1454
1455                if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
1456                     test_bit(NETLINK_CONGESTED, &nlk->state)) &&
1457                    !sock_flag(sk, SOCK_DEAD))
1458                        *timeo = schedule_timeout(*timeo);
1459
1460                __set_current_state(TASK_RUNNING);
1461                remove_wait_queue(&nlk->wait, &wait);
1462                sock_put(sk);
1463
1464                if (signal_pending(current)) {
1465                        kfree_skb(skb);
1466                        return sock_intr_errno(*timeo);
1467                }
1468                return 1;
1469        }
1470        netlink_skb_set_owner_r(skb, sk);
1471        return 0;
1472}
1473
1474static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
1475{
1476        int len = skb->len;
1477
1478#ifdef CONFIG_NETLINK_MMAP
1479        if (netlink_skb_is_mmaped(skb))
1480                netlink_queue_mmaped_skb(sk, skb);
1481        else if (netlink_rx_is_mmaped(sk))
1482                netlink_ring_set_copied(sk, skb);
1483        else
1484#endif /* CONFIG_NETLINK_MMAP */
1485                skb_queue_tail(&sk->sk_receive_queue, skb);
1486        sk->sk_data_ready(sk, len);
1487        return len;
1488}
1489
1490int netlink_sendskb(struct sock *sk, struct sk_buff *skb)
1491{
1492        int len = __netlink_sendskb(sk, skb);
1493
1494        sock_put(sk);
1495        return len;
1496}
1497
1498void netlink_detachskb(struct sock *sk, struct sk_buff *skb)
1499{
1500        kfree_skb(skb);
1501        sock_put(sk);
1502}
1503
1504static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
1505{
1506        int delta;
1507
1508        WARN_ON(skb->sk != NULL);
1509        if (netlink_skb_is_mmaped(skb))
1510                return skb;
1511
1512        delta = skb->end - skb->tail;
1513        if (delta * 2 < skb->truesize)
1514                return skb;
1515
1516        if (skb_shared(skb)) {
1517                struct sk_buff *nskb = skb_clone(skb, allocation);
1518                if (!nskb)
1519                        return skb;
1520                consume_skb(skb);
1521                skb = nskb;
1522        }
1523
1524        if (!pskb_expand_head(skb, 0, -delta, allocation))
1525                skb->truesize -= delta;
1526
1527        return skb;
1528}
1529
1530static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
1531                                  struct sock *ssk)
1532{
1533        int ret;
1534        struct netlink_sock *nlk = nlk_sk(sk);
1535
1536        ret = -ECONNREFUSED;
1537        if (nlk->netlink_rcv != NULL) {
1538                ret = skb->len;
1539                netlink_skb_set_owner_r(skb, sk);
1540                NETLINK_CB(skb).sk = ssk;
1541                nlk->netlink_rcv(skb);
1542                consume_skb(skb);
1543        } else {
1544                kfree_skb(skb);
1545        }
1546        sock_put(sk);
1547        return ret;
1548}
1549
1550int netlink_unicast(struct sock *ssk, struct sk_buff *skb,
1551                    u32 portid, int nonblock)
1552{
1553        struct sock *sk;
1554        int err;
1555        long timeo;
1556
1557        skb = netlink_trim(skb, gfp_any());
1558
1559        timeo = sock_sndtimeo(ssk, nonblock);
1560retry:
1561        sk = netlink_getsockbyportid(ssk, portid);
1562        if (IS_ERR(sk)) {
1563                kfree_skb(skb);
1564                return PTR_ERR(sk);
1565        }
1566        if (netlink_is_kernel(sk))
1567                return netlink_unicast_kernel(sk, skb, ssk);
1568
1569        if (sk_filter(sk, skb)) {
1570                err = skb->len;
1571                kfree_skb(skb);
1572                sock_put(sk);
1573                return err;
1574        }
1575
1576        err = netlink_attachskb(sk, skb, &timeo, ssk);
1577        if (err == 1)
1578                goto retry;
1579        if (err)
1580                return err;
1581
1582        return netlink_sendskb(sk, skb);
1583}
1584EXPORT_SYMBOL(netlink_unicast);
1585
1586struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,
1587                                  u32 dst_portid, gfp_t gfp_mask)
1588{
1589#ifdef CONFIG_NETLINK_MMAP
1590        struct sock *sk = NULL;
1591        struct sk_buff *skb;
1592        struct netlink_ring *ring;
1593        struct nl_mmap_hdr *hdr;
1594        unsigned int maxlen;
1595
1596        sk = netlink_getsockbyportid(ssk, dst_portid);
1597        if (IS_ERR(sk))
1598                goto out;
1599
1600        ring = &nlk_sk(sk)->rx_ring;
1601        /* fast-path without atomic ops for common case: non-mmaped receiver */
1602        if (ring->pg_vec == NULL)
1603                goto out_put;
1604
1605        skb = alloc_skb_head(gfp_mask);
1606        if (skb == NULL)
1607                goto err1;
1608
1609        spin_lock_bh(&sk->sk_receive_queue.lock);
1610        /* check again under lock */
1611        if (ring->pg_vec == NULL)
1612                goto out_free;
1613
1614        maxlen = ring->frame_size - NL_MMAP_HDRLEN;
1615        if (maxlen < size)
1616                goto out_free;
1617
1618        netlink_forward_ring(ring);
1619        hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
1620        if (hdr == NULL)
1621                goto err2;
1622        netlink_ring_setup_skb(skb, sk, ring, hdr);
1623        netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
1624        atomic_inc(&ring->pending);
1625        netlink_increment_head(ring);
1626
1627        spin_unlock_bh(&sk->sk_receive_queue.lock);
1628        return skb;
1629
1630err2:
1631        kfree_skb(skb);
1632        spin_unlock_bh(&sk->sk_receive_queue.lock);
1633        netlink_overrun(sk);
1634err1:
1635        sock_put(sk);
1636        return NULL;
1637
1638out_free:
1639        kfree_skb(skb);
1640        spin_unlock_bh(&sk->sk_receive_queue.lock);
1641out_put:
1642        sock_put(sk);
1643out:
1644#endif
1645        return alloc_skb(size, gfp_mask);
1646}
1647EXPORT_SYMBOL_GPL(netlink_alloc_skb);
1648
1649int netlink_has_listeners(struct sock *sk, unsigned int group)
1650{
1651        int res = 0;
1652        struct listeners *listeners;
1653
1654        BUG_ON(!netlink_is_kernel(sk));
1655
1656        rcu_read_lock();
1657        listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners);
1658
1659        if (listeners && group - 1 < nl_table[sk->sk_protocol].groups)
1660                res = test_bit(group - 1, listeners->masks);
1661
1662        rcu_read_unlock();
1663
1664        return res;
1665}
1666EXPORT_SYMBOL_GPL(netlink_has_listeners);
1667
1668static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
1669{
1670        struct netlink_sock *nlk = nlk_sk(sk);
1671
1672        if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
1673            !test_bit(NETLINK_CONGESTED, &nlk->state)) {
1674                netlink_skb_set_owner_r(skb, sk);
1675                __netlink_sendskb(sk, skb);
1676                return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
1677        }
1678        return -1;
1679}
1680
1681struct netlink_broadcast_data {
1682        struct sock *exclude_sk;
1683        struct net *net;
1684        u32 portid;
1685        u32 group;
1686        int failure;
1687        int delivery_failure;
1688        int congested;
1689        int delivered;
1690        gfp_t allocation;
1691        struct sk_buff *skb, *skb2;
1692        int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data);
1693        void *tx_data;
1694};
1695
1696static int do_one_broadcast(struct sock *sk,
1697                                   struct netlink_broadcast_data *p)
1698{
1699        struct netlink_sock *nlk = nlk_sk(sk);
1700        int val;
1701
1702        if (p->exclude_sk == sk)
1703                goto out;
1704
1705        if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups ||
1706            !test_bit(p->group - 1, nlk->groups))
1707                goto out;
1708
1709        if (!net_eq(sock_net(sk), p->net))
1710                goto out;
1711
1712        if (p->failure) {
1713                netlink_overrun(sk);
1714                goto out;
1715        }
1716
1717        sock_hold(sk);
1718        if (p->skb2 == NULL) {
1719                if (skb_shared(p->skb)) {
1720                        p->skb2 = skb_clone(p->skb, p->allocation);
1721                } else {
1722                        p->skb2 = skb_get(p->skb);
1723                        /*
1724                         * skb ownership may have been set when
1725                         * delivered to a previous socket.
1726                         */
1727                        skb_orphan(p->skb2);
1728                }
1729        }
1730        if (p->skb2 == NULL) {
1731                netlink_overrun(sk);
1732                /* Clone failed. Notify ALL listeners. */
1733                p->failure = 1;
1734                if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
1735                        p->delivery_failure = 1;
1736        } else if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
1737                kfree_skb(p->skb2);
1738                p->skb2 = NULL;
1739        } else if (sk_filter(sk, p->skb2)) {
1740                kfree_skb(p->skb2);
1741                p->skb2 = NULL;
1742        } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) {
1743                netlink_overrun(sk);
1744                if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
1745                        p->delivery_failure = 1;
1746        } else {
1747                p->congested |= val;
1748                p->delivered = 1;
1749                p->skb2 = NULL;
1750        }
1751        sock_put(sk);
1752
1753out:
1754        return 0;
1755}
1756
1757int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid,
1758        u32 group, gfp_t allocation,
1759        int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
1760        void *filter_data)
1761{
1762        struct net *net = sock_net(ssk);
1763        struct netlink_broadcast_data info;
1764        struct sock *sk;
1765
1766        skb = netlink_trim(skb, allocation);
1767
1768        info.exclude_sk = ssk;
1769        info.net = net;
1770        info.portid = portid;
1771        info.group = group;
1772        info.failure = 0;
1773        info.delivery_failure = 0;
1774        info.congested = 0;
1775        info.delivered = 0;
1776        info.allocation = allocation;
1777        info.skb = skb;
1778        info.skb2 = NULL;
1779        info.tx_filter = filter;
1780        info.tx_data = filter_data;
1781
1782        /* While we sleep in clone, do not allow to change socket list */
1783
1784        netlink_lock_table();
1785
1786        sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list)
1787                do_one_broadcast(sk, &info);
1788
1789        consume_skb(skb);
1790
1791        netlink_unlock_table();
1792
1793        if (info.delivery_failure) {
1794                kfree_skb(info.skb2);
1795                return -ENOBUFS;
1796        }
1797        consume_skb(info.skb2);
1798
1799        if (info.delivered) {
1800                if (info.congested && (allocation & __GFP_WAIT))
1801                        yield();
1802                return 0;
1803        }
1804        return -ESRCH;
1805}
1806EXPORT_SYMBOL(netlink_broadcast_filtered);
1807
1808int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid,
1809                      u32 group, gfp_t allocation)
1810{
1811        return netlink_broadcast_filtered(ssk, skb, portid, group, allocation,
1812                NULL, NULL);
1813}
1814EXPORT_SYMBOL(netlink_broadcast);
1815
1816struct netlink_set_err_data {
1817        struct sock *exclude_sk;
1818        u32 portid;
1819        u32 group;
1820        int code;
1821};
1822
1823static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p)
1824{
1825        struct netlink_sock *nlk = nlk_sk(sk);
1826        int ret = 0;
1827
1828        if (sk == p->exclude_sk)
1829                goto out;
1830
1831        if (!net_eq(sock_net(sk), sock_net(p->exclude_sk)))
1832                goto out;
1833
1834        if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups ||
1835            !test_bit(p->group - 1, nlk->groups))
1836                goto out;
1837
1838        if (p->code == ENOBUFS && nlk->flags & NETLINK_RECV_NO_ENOBUFS) {
1839                ret = 1;
1840                goto out;
1841        }
1842
1843        sk->sk_err = p->code;
1844        sk->sk_error_report(sk);
1845out:
1846        return ret;
1847}
1848
1849/**
1850 * netlink_set_err - report error to broadcast listeners
1851 * @ssk: the kernel netlink socket, as returned by netlink_kernel_create()
1852 * @portid: the PORTID of a process that we want to skip (if any)
1853 * @groups: the broadcast group that will notice the error
1854 * @code: error code, must be negative (as usual in kernelspace)
1855 *
1856 * This function returns the number of broadcast listeners that have set the
1857 * NETLINK_RECV_NO_ENOBUFS socket option.
1858 */
1859int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code)
1860{
1861        struct netlink_set_err_data info;
1862        struct sock *sk;
1863        int ret = 0;
1864
1865        info.exclude_sk = ssk;
1866        info.portid = portid;
1867        info.group = group;
1868        /* sk->sk_err wants a positive error value */
1869        info.code = -code;
1870
1871        read_lock(&nl_table_lock);
1872
1873        sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list)
1874                ret += do_one_set_err(sk, &info);
1875
1876        read_unlock(&nl_table_lock);
1877        return ret;
1878}
1879EXPORT_SYMBOL(netlink_set_err);
1880
1881/* must be called with netlink table grabbed */
1882static void netlink_update_socket_mc(struct netlink_sock *nlk,
1883                                     unsigned int group,
1884                                     int is_new)
1885{
1886        int old, new = !!is_new, subscriptions;
1887
1888        old = test_bit(group - 1, nlk->groups);
1889        subscriptions = nlk->subscriptions - old + new;
1890        if (new)
1891                __set_bit(group - 1, nlk->groups);
1892        else
1893                __clear_bit(group - 1, nlk->groups);
1894        netlink_update_subscriptions(&nlk->sk, subscriptions);
1895        netlink_update_listeners(&nlk->sk);
1896}
1897
1898static int netlink_setsockopt(struct socket *sock, int level, int optname,
1899                              char __user *optval, unsigned int optlen)
1900{
1901        struct sock *sk = sock->sk;
1902        struct netlink_sock *nlk = nlk_sk(sk);
1903        unsigned int val = 0;
1904        int err;
1905
1906        if (level != SOL_NETLINK)
1907                return -ENOPROTOOPT;
1908
1909        if (optname != NETLINK_RX_RING && optname != NETLINK_TX_RING &&
1910            optlen >= sizeof(int) &&
1911            get_user(val, (unsigned int __user *)optval))
1912                return -EFAULT;
1913
1914        switch (optname) {
1915        case NETLINK_PKTINFO:
1916                if (val)
1917                        nlk->flags |= NETLINK_RECV_PKTINFO;
1918                else
1919                        nlk->flags &= ~NETLINK_RECV_PKTINFO;
1920                err = 0;
1921                break;
1922        case NETLINK_ADD_MEMBERSHIP:
1923        case NETLINK_DROP_MEMBERSHIP: {
1924                if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV))
1925                        return -EPERM;
1926                err = netlink_realloc_groups(sk);
1927                if (err)
1928                        return err;
1929                if (!val || val - 1 >= nlk->ngroups)
1930                        return -EINVAL;
1931                netlink_table_grab();
1932                netlink_update_socket_mc(nlk, val,
1933                                         optname == NETLINK_ADD_MEMBERSHIP);
1934                netlink_table_ungrab();
1935
1936                if (nlk->netlink_bind)
1937                        nlk->netlink_bind(val);
1938
1939                err = 0;
1940                break;
1941        }
1942        case NETLINK_BROADCAST_ERROR:
1943                if (val)
1944                        nlk->flags |= NETLINK_BROADCAST_SEND_ERROR;
1945                else
1946                        nlk->flags &= ~NETLINK_BROADCAST_SEND_ERROR;
1947                err = 0;
1948                break;
1949        case NETLINK_NO_ENOBUFS:
1950                if (val) {
1951                        nlk->flags |= NETLINK_RECV_NO_ENOBUFS;
1952                        clear_bit(NETLINK_CONGESTED, &nlk->state);
1953                        wake_up_interruptible(&nlk->wait);
1954                } else {
1955                        nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS;
1956                }
1957                err = 0;
1958                break;
1959#ifdef CONFIG_NETLINK_MMAP
1960        case NETLINK_RX_RING:
1961        case NETLINK_TX_RING: {
1962                struct nl_mmap_req req;
1963
1964                /* Rings might consume more memory than queue limits, require
1965                 * CAP_NET_ADMIN.
1966                 */
1967                if (!capable(CAP_NET_ADMIN))
1968                        return -EPERM;
1969                if (optlen < sizeof(req))
1970                        return -EINVAL;
1971                if (copy_from_user(&req, optval, sizeof(req)))
1972                        return -EFAULT;
1973                err = netlink_set_ring(sk, &req, false,
1974                                       optname == NETLINK_TX_RING);
1975                break;
1976        }
1977#endif /* CONFIG_NETLINK_MMAP */
1978        default:
1979                err = -ENOPROTOOPT;
1980        }
1981        return err;
1982}
1983
1984static int netlink_getsockopt(struct socket *sock, int level, int optname,
1985                              char __user *optval, int __user *optlen)
1986{
1987        struct sock *sk = sock->sk;
1988        struct netlink_sock *nlk = nlk_sk(sk);
1989        int len, val, err;
1990
1991        if (level != SOL_NETLINK)
1992                return -ENOPROTOOPT;
1993
1994        if (get_user(len, optlen))
1995                return -EFAULT;
1996        if (len < 0)
1997                return -EINVAL;
1998
1999        switch (optname) {
2000        case NETLINK_PKTINFO:
2001                if (len < sizeof(int))
2002                        return -EINVAL;
2003                len = sizeof(int);
2004                val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0;
2005                if (put_user(len, optlen) ||
2006                    put_user(val, optval))
2007                        return -EFAULT;
2008                err = 0;
2009                break;
2010        case NETLINK_BROADCAST_ERROR:
2011                if (len < sizeof(int))
2012                        return -EINVAL;
2013                len = sizeof(int);
2014                val = nlk->flags & NETLINK_BROADCAST_SEND_ERROR ? 1 : 0;
2015                if (put_user(len, optlen) ||
2016                    put_user(val, optval))
2017                        return -EFAULT;
2018                err = 0;
2019                break;
2020        case NETLINK_NO_ENOBUFS:
2021                if (len < sizeof(int))
2022                        return -EINVAL;
2023                len = sizeof(int);
2024                val = nlk->flags & NETLINK_RECV_NO_ENOBUFS ? 1 : 0;
2025                if (put_user(len, optlen) ||
2026                    put_user(val, optval))
2027                        return -EFAULT;
2028                err = 0;
2029                break;
2030        default:
2031                err = -ENOPROTOOPT;
2032        }
2033        return err;
2034}
2035
2036static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
2037{
2038        struct nl_pktinfo info;
2039
2040        info.group = NETLINK_CB(skb).dst_group;
2041        put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info);
2042}
2043
2044static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
2045                           struct msghdr *msg, size_t len)
2046{
2047        struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
2048        struct sock *sk = sock->sk;
2049        struct netlink_sock *nlk = nlk_sk(sk);
2050        struct sockaddr_nl *addr = msg->msg_name;
2051        u32 dst_portid;
2052        u32 dst_group;
2053        struct sk_buff *skb;
2054        int err;
2055        struct scm_cookie scm;
2056
2057        if (msg->msg_flags&MSG_OOB)
2058                return -EOPNOTSUPP;
2059
2060        if (NULL == siocb->scm)
2061                siocb->scm = &scm;
2062
2063        err = scm_send(sock, msg, siocb->scm, true);
2064        if (err < 0)
2065                return err;
2066
2067        if (msg->msg_namelen) {
2068                err = -EINVAL;
2069                if (addr->nl_family != AF_NETLINK)
2070                        goto out;
2071                dst_portid = addr->nl_pid;
2072                dst_group = ffs(addr->nl_groups);
2073                err =  -EPERM;
2074                if ((dst_group || dst_portid) &&
2075                    !netlink_capable(sock, NL_CFG_F_NONROOT_SEND))
2076                        goto out;
2077        } else {
2078                dst_portid = nlk->dst_portid;
2079                dst_group = nlk->dst_group;
2080        }
2081
2082        if (!nlk->portid) {
2083                err = netlink_autobind(sock);
2084                if (err)
2085                        goto out;
2086        }
2087
2088        if (netlink_tx_is_mmaped(sk) &&
2089            msg->msg_iov->iov_base == NULL) {
2090                err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group,
2091                                           siocb);
2092                goto out;
2093        }
2094
2095        err = -EMSGSIZE;
2096        if (len > sk->sk_sndbuf - 32)
2097                goto out;
2098        err = -ENOBUFS;
2099        skb = alloc_skb(len, GFP_KERNEL);
2100        if (skb == NULL)
2101                goto out;
2102
2103        NETLINK_CB(skb).portid  = nlk->portid;
2104        NETLINK_CB(skb).dst_group = dst_group;
2105        NETLINK_CB(skb).creds   = siocb->scm->creds;
2106
2107        err = -EFAULT;
2108        if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
2109                kfree_skb(skb);
2110                goto out;
2111        }
2112
2113        err = security_netlink_send(sk, skb);
2114        if (err) {
2115                kfree_skb(skb);
2116                goto out;
2117        }
2118
2119        if (dst_group) {
2120                atomic_inc(&skb->users);
2121                netlink_broadcast(sk, skb, dst_portid, dst_group, GFP_KERNEL);
2122        }
2123        err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags&MSG_DONTWAIT);
2124
2125out:
2126        scm_destroy(siocb->scm);
2127        return err;
2128}
2129
2130static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
2131                           struct msghdr *msg, size_t len,
2132                           int flags)
2133{
2134        struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
2135        struct scm_cookie scm;
2136        struct sock *sk = sock->sk;
2137        struct netlink_sock *nlk = nlk_sk(sk);
2138        int noblock = flags&MSG_DONTWAIT;
2139        size_t copied;
2140        struct sk_buff *skb, *data_skb;
2141        int err, ret;
2142
2143        if (flags&MSG_OOB)
2144                return -EOPNOTSUPP;
2145
2146        copied = 0;
2147
2148        skb = skb_recv_datagram(sk, flags, noblock, &err);
2149        if (skb == NULL)
2150                goto out;
2151
2152        data_skb = skb;
2153
2154#ifdef CONFIG_COMPAT_NETLINK_MESSAGES
2155        if (unlikely(skb_shinfo(skb)->frag_list)) {
2156                /*
2157                 * If this skb has a frag_list, then here that means that we
2158                 * will have to use the frag_list skb's data for compat tasks
2159                 * and the regular skb's data for normal (non-compat) tasks.
2160                 *
2161                 * If we need to send the compat skb, assign it to the
2162                 * 'data_skb' variable so that it will be used below for data
2163                 * copying. We keep 'skb' for everything else, including
2164                 * freeing both later.
2165                 */
2166                if (flags & MSG_CMSG_COMPAT)
2167                        data_skb = skb_shinfo(skb)->frag_list;
2168        }
2169#endif
2170
2171        msg->msg_namelen = 0;
2172
2173        copied = data_skb->len;
2174        if (len < copied) {
2175                msg->msg_flags |= MSG_TRUNC;
2176                copied = len;
2177        }
2178
2179        skb_reset_transport_header(data_skb);
2180        err = skb_copy_datagram_iovec(data_skb, 0, msg->msg_iov, copied);
2181
2182        if (msg->msg_name) {
2183                struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name;
2184                addr->nl_family = AF_NETLINK;
2185                addr->nl_pad    = 0;
2186                addr->nl_pid    = NETLINK_CB(skb).portid;
2187                addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group);
2188                msg->msg_namelen = sizeof(*addr);
2189        }
2190
2191        if (nlk->flags & NETLINK_RECV_PKTINFO)
2192                netlink_cmsg_recv_pktinfo(msg, skb);
2193
2194        if (NULL == siocb->scm) {
2195                memset(&scm, 0, sizeof(scm));
2196                siocb->scm = &scm;
2197        }
2198        siocb->scm->creds = *NETLINK_CREDS(skb);
2199        if (flags & MSG_TRUNC)
2200                copied = data_skb->len;
2201
2202        skb_free_datagram(sk, skb);
2203
2204        if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
2205                ret = netlink_dump(sk);
2206                if (ret) {
2207                        sk->sk_err = ret;
2208                        sk->sk_error_report(sk);
2209                }
2210        }
2211
2212        scm_recv(sock, msg, siocb->scm, flags);
2213out:
2214        netlink_rcv_wake(sk);
2215        return err ? : copied;
2216}
2217
2218static void netlink_data_ready(struct sock *sk, int len)
2219{
2220        BUG();
2221}
2222
2223/*
2224 *      We export these functions to other modules. They provide a
2225 *      complete set of kernel non-blocking support for message
2226 *      queueing.
2227 */
2228
2229struct sock *
2230__netlink_kernel_create(struct net *net, int unit, struct module *module,
2231                        struct netlink_kernel_cfg *cfg)
2232{
2233        struct socket *sock;
2234        struct sock *sk;
2235        struct netlink_sock *nlk;
2236        struct listeners *listeners = NULL;
2237        struct mutex *cb_mutex = cfg ? cfg->cb_mutex : NULL;
2238        unsigned int groups;
2239
2240        BUG_ON(!nl_table);
2241
2242        if (unit < 0 || unit >= MAX_LINKS)
2243                return NULL;
2244
2245        if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
2246                return NULL;
2247
2248        /*
2249         * We have to just have a reference on the net from sk, but don't
2250         * get_net it. Besides, we cannot get and then put the net here.
2251         * So we create one inside init_net and the move it to net.
2252         */
2253
2254        if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0)
2255                goto out_sock_release_nosk;
2256
2257        sk = sock->sk;
2258        sk_change_net(sk, net);
2259
2260        if (!cfg || cfg->groups < 32)
2261                groups = 32;
2262        else
2263                groups = cfg->groups;
2264
2265        listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
2266        if (!listeners)
2267                goto out_sock_release;
2268
2269        sk->sk_data_ready = netlink_data_ready;
2270        if (cfg && cfg->input)
2271                nlk_sk(sk)->netlink_rcv = cfg->input;
2272
2273        if (netlink_insert(sk, net, 0))
2274                goto out_sock_release;
2275
2276        nlk = nlk_sk(sk);
2277        nlk->flags |= NETLINK_KERNEL_SOCKET;
2278
2279        netlink_table_grab();
2280        if (!nl_table[unit].registered) {
2281                nl_table[unit].groups = groups;
2282                rcu_assign_pointer(nl_table[unit].listeners, listeners);
2283                nl_table[unit].cb_mutex = cb_mutex;
2284                nl_table[unit].module = module;
2285                if (cfg) {
2286                        nl_table[unit].bind = cfg->bind;
2287                        nl_table[unit].flags = cfg->flags;
2288                }
2289                nl_table[unit].registered = 1;
2290        } else {
2291                kfree(listeners);
2292                nl_table[unit].registered++;
2293        }
2294        netlink_table_ungrab();
2295        return sk;
2296
2297out_sock_release:
2298        kfree(listeners);
2299        netlink_kernel_release(sk);
2300        return NULL;
2301
2302out_sock_release_nosk:
2303        sock_release(sock);
2304        return NULL;
2305}
2306EXPORT_SYMBOL(__netlink_kernel_create);
2307
2308void
2309netlink_kernel_release(struct sock *sk)
2310{
2311        sk_release_kernel(sk);
2312}
2313EXPORT_SYMBOL(netlink_kernel_release);
2314
2315int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
2316{
2317        struct listeners *new, *old;
2318        struct netlink_table *tbl = &nl_table[sk->sk_protocol];
2319
2320        if (groups < 32)
2321                groups = 32;
2322
2323        if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) {
2324                new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC);
2325                if (!new)
2326                        return -ENOMEM;
2327                old = nl_deref_protected(tbl->listeners);
2328                memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups));
2329                rcu_assign_pointer(tbl->listeners, new);
2330
2331                kfree_rcu(old, rcu);
2332        }
2333        tbl->groups = groups;
2334
2335        return 0;
2336}
2337
2338/**
2339 * netlink_change_ngroups - change number of multicast groups
2340 *
2341 * This changes the number of multicast groups that are available
2342 * on a certain netlink family. Note that it is not possible to
2343 * change the number of groups to below 32. Also note that it does
2344 * not implicitly call netlink_clear_multicast_users() when the
2345 * number of groups is reduced.
2346 *
2347 * @sk: The kernel netlink socket, as returned by netlink_kernel_create().
2348 * @groups: The new number of groups.
2349 */
2350int netlink_change_ngroups(struct sock *sk, unsigned int groups)
2351{
2352        int err;
2353
2354        netlink_table_grab();
2355        err = __netlink_change_ngroups(sk, groups);
2356        netlink_table_ungrab();
2357
2358        return err;
2359}
2360
2361void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
2362{
2363        struct sock *sk;
2364        struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
2365
2366        sk_for_each_bound(sk, &tbl->mc_list)
2367                netlink_update_socket_mc(nlk_sk(sk), group, 0);
2368}
2369
2370/**
2371 * netlink_clear_multicast_users - kick off multicast listeners
2372 *
2373 * This function removes all listeners from the given group.
2374 * @ksk: The kernel netlink socket, as returned by
2375 *      netlink_kernel_create().
2376 * @group: The multicast group to clear.
2377 */
2378void netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
2379{
2380        netlink_table_grab();
2381        __netlink_clear_multicast_users(ksk, group);
2382        netlink_table_ungrab();
2383}
2384
2385struct nlmsghdr *
2386__nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags)
2387{
2388        struct nlmsghdr *nlh;
2389        int size = nlmsg_msg_size(len);
2390
2391        nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size));
2392        nlh->nlmsg_type = type;
2393        nlh->nlmsg_len = size;
2394        nlh->nlmsg_flags = flags;
2395        nlh->nlmsg_pid = portid;
2396        nlh->nlmsg_seq = seq;
2397        if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0)
2398                memset(nlmsg_data(nlh) + len, 0, NLMSG_ALIGN(size) - size);
2399        return nlh;
2400}
2401EXPORT_SYMBOL(__nlmsg_put);
2402
2403/*
2404 * It looks a bit ugly.
2405 * It would be better to create kernel thread.
2406 */
2407
2408static int netlink_dump(struct sock *sk)
2409{
2410        struct netlink_sock *nlk = nlk_sk(sk);
2411        struct netlink_callback *cb;
2412        struct sk_buff *skb = NULL;
2413        struct nlmsghdr *nlh;
2414        int len, err = -ENOBUFS;
2415        int alloc_size;
2416
2417        mutex_lock(nlk->cb_mutex);
2418
2419        cb = nlk->cb;
2420        if (cb == NULL) {
2421                err = -EINVAL;
2422                goto errout_skb;
2423        }
2424
2425        alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE);
2426
2427        if (!netlink_rx_is_mmaped(sk) &&
2428            atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
2429                goto errout_skb;
2430        skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, GFP_KERNEL);
2431        if (!skb)
2432                goto errout_skb;
2433        netlink_skb_set_owner_r(skb, sk);
2434
2435        len = cb->dump(skb, cb);
2436
2437        if (len > 0) {
2438                mutex_unlock(nlk->cb_mutex);
2439
2440                if (sk_filter(sk, skb))
2441                        kfree_skb(skb);
2442                else
2443                        __netlink_sendskb(sk, skb);
2444                return 0;
2445        }
2446
2447        nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
2448        if (!nlh)
2449                goto errout_skb;
2450
2451        nl_dump_check_consistent(cb, nlh);
2452
2453        memcpy(nlmsg_data(nlh), &len, sizeof(len));
2454
2455        if (sk_filter(sk, skb))
2456                kfree_skb(skb);
2457        else
2458                __netlink_sendskb(sk, skb);
2459
2460        if (cb->done)
2461                cb->done(cb);
2462        nlk->cb = NULL;
2463        mutex_unlock(nlk->cb_mutex);
2464
2465        module_put(cb->module);
2466        netlink_consume_callback(cb);
2467        return 0;
2468
2469errout_skb:
2470        mutex_unlock(nlk->cb_mutex);
2471        kfree_skb(skb);
2472        return err;
2473}
2474
2475int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
2476                         const struct nlmsghdr *nlh,
2477                         struct netlink_dump_control *control)
2478{
2479        struct netlink_callback *cb;
2480        struct sock *sk;
2481        struct netlink_sock *nlk;
2482        int ret;
2483
2484        cb = kzalloc(sizeof(*cb), GFP_KERNEL);
2485        if (cb == NULL)
2486                return -ENOBUFS;
2487
2488        /* Memory mapped dump requests need to be copied to avoid looping
2489         * on the pending state in netlink_mmap_sendmsg() while the CB hold
2490         * a reference to the skb.
2491         */
2492        if (netlink_skb_is_mmaped(skb)) {
2493                skb = skb_copy(skb, GFP_KERNEL);
2494                if (skb == NULL) {
2495                        kfree(cb);
2496                        return -ENOBUFS;
2497                }
2498        } else
2499                atomic_inc(&skb->users);
2500
2501        cb->dump = control->dump;
2502        cb->done = control->done;
2503        cb->nlh = nlh;
2504        cb->data = control->data;
2505        cb->module = control->module;
2506        cb->min_dump_alloc = control->min_dump_alloc;
2507        cb->skb = skb;
2508
2509        sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid);
2510        if (sk == NULL) {
2511                netlink_destroy_callback(cb);
2512                return -ECONNREFUSED;
2513        }
2514        nlk = nlk_sk(sk);
2515
2516        mutex_lock(nlk->cb_mutex);
2517        /* A dump is in progress... */
2518        if (nlk->cb) {
2519                mutex_unlock(nlk->cb_mutex);
2520                netlink_destroy_callback(cb);
2521                ret = -EBUSY;
2522                goto out;
2523        }
2524        /* add reference of module which cb->dump belongs to */
2525        if (!try_module_get(cb->module)) {
2526                mutex_unlock(nlk->cb_mutex);
2527                netlink_destroy_callback(cb);
2528                ret = -EPROTONOSUPPORT;
2529                goto out;
2530        }
2531
2532        nlk->cb = cb;
2533        mutex_unlock(nlk->cb_mutex);
2534
2535        ret = netlink_dump(sk);
2536out:
2537        sock_put(sk);
2538
2539        if (ret)
2540                return ret;
2541
2542        /* We successfully started a dump, by returning -EINTR we
2543         * signal not to send ACK even if it was requested.
2544         */
2545        return -EINTR;
2546}
2547EXPORT_SYMBOL(__netlink_dump_start);
2548
2549void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
2550{
2551        struct sk_buff *skb;
2552        struct nlmsghdr *rep;
2553        struct nlmsgerr *errmsg;
2554        size_t payload = sizeof(*errmsg);
2555
2556        /* error messages get the original request appened */
2557        if (err)
2558                payload += nlmsg_len(nlh);
2559
2560        skb = netlink_alloc_skb(in_skb->sk, nlmsg_total_size(payload),
2561                                NETLINK_CB(in_skb).portid, GFP_KERNEL);
2562        if (!skb) {
2563                struct sock *sk;
2564
2565                sk = netlink_lookup(sock_net(in_skb->sk),
2566                                    in_skb->sk->sk_protocol,
2567                                    NETLINK_CB(in_skb).portid);
2568                if (sk) {
2569                        sk->sk_err = ENOBUFS;
2570                        sk->sk_error_report(sk);
2571                        sock_put(sk);
2572                }
2573                return;
2574        }
2575
2576        rep = __nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
2577                          NLMSG_ERROR, payload, 0);
2578        errmsg = nlmsg_data(rep);
2579        errmsg->error = err;
2580        memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh));
2581        netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid, MSG_DONTWAIT);
2582}
2583EXPORT_SYMBOL(netlink_ack);
2584
2585int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
2586                                                     struct nlmsghdr *))
2587{
2588        struct nlmsghdr *nlh;
2589        int err;
2590
2591        while (skb->len >= nlmsg_total_size(0)) {
2592                int msglen;
2593
2594                nlh = nlmsg_hdr(skb);
2595                err = 0;
2596
2597                if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
2598                        return 0;
2599
2600                /* Only requests are handled by the kernel */
2601                if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
2602                        goto ack;
2603
2604                /* Skip control messages */
2605                if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
2606                        goto ack;
2607
2608                err = cb(skb, nlh);
2609                if (err == -EINTR)
2610                        goto skip;
2611
2612ack:
2613                if (nlh->nlmsg_flags & NLM_F_ACK || err)
2614                        netlink_ack(skb, nlh, err);
2615
2616skip:
2617                msglen = NLMSG_ALIGN(nlh->nlmsg_len);
2618                if (msglen > skb->len)
2619                        msglen = skb->len;
2620                skb_pull(skb, msglen);
2621        }
2622
2623        return 0;
2624}
2625EXPORT_SYMBOL(netlink_rcv_skb);
2626
2627/**
2628 * nlmsg_notify - send a notification netlink message
2629 * @sk: netlink socket to use
2630 * @skb: notification message
2631 * @portid: destination netlink portid for reports or 0
2632 * @group: destination multicast group or 0
2633 * @report: 1 to report back, 0 to disable
2634 * @flags: allocation flags
2635 */
2636int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid,
2637                 unsigned int group, int report, gfp_t flags)
2638{
2639        int err = 0;
2640
2641        if (group) {
2642                int exclude_portid = 0;
2643
2644                if (report) {
2645                        atomic_inc(&skb->users);
2646                        exclude_portid = portid;
2647                }
2648
2649                /* errors reported via destination sk->sk_err, but propagate
2650                 * delivery errors if NETLINK_BROADCAST_ERROR flag is set */
2651                err = nlmsg_multicast(sk, skb, exclude_portid, group, flags);
2652        }
2653
2654        if (report) {
2655                int err2;
2656
2657                err2 = nlmsg_unicast(sk, skb, portid);
2658                if (!err || err == -ESRCH)
2659                        err = err2;
2660        }
2661
2662        return err;
2663}
2664EXPORT_SYMBOL(nlmsg_notify);
2665
2666#ifdef CONFIG_PROC_FS
2667struct nl_seq_iter {
2668        struct seq_net_private p;
2669        int link;
2670        int hash_idx;
2671};
2672
2673static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
2674{
2675        struct nl_seq_iter *iter = seq->private;
2676        int i, j;
2677        struct sock *s;
2678        loff_t off = 0;
2679
2680        for (i = 0; i < MAX_LINKS; i++) {
2681                struct nl_portid_hash *hash = &nl_table[i].hash;
2682
2683                for (j = 0; j <= hash->mask; j++) {
2684                        sk_for_each(s, &hash->table[j]) {
2685                                if (sock_net(s) != seq_file_net(seq))
2686                                        continue;
2687                                if (off == pos) {
2688                                        iter->link = i;
2689                                        iter->hash_idx = j;
2690                                        return s;
2691                                }
2692                                ++off;
2693                        }
2694                }
2695        }
2696        return NULL;
2697}
2698
2699static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
2700        __acquires(nl_table_lock)
2701{
2702        read_lock(&nl_table_lock);
2703        return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2704}
2705
2706static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2707{
2708        struct sock *s;
2709        struct nl_seq_iter *iter;
2710        int i, j;
2711
2712        ++*pos;
2713
2714        if (v == SEQ_START_TOKEN)
2715                return netlink_seq_socket_idx(seq, 0);
2716
2717        iter = seq->private;
2718        s = v;
2719        do {
2720                s = sk_next(s);
2721        } while (s && sock_net(s) != seq_file_net(seq));
2722        if (s)
2723                return s;
2724
2725        i = iter->link;
2726        j = iter->hash_idx + 1;
2727
2728        do {
2729                struct nl_portid_hash *hash = &nl_table[i].hash;
2730
2731                for (; j <= hash->mask; j++) {
2732                        s = sk_head(&hash->table[j]);
2733                        while (s && sock_net(s) != seq_file_net(seq))
2734                                s = sk_next(s);
2735                        if (s) {
2736                                iter->link = i;
2737                                iter->hash_idx = j;
2738                                return s;
2739                        }
2740                }
2741
2742                j = 0;
2743        } while (++i < MAX_LINKS);
2744
2745        return NULL;
2746}
2747
2748static void netlink_seq_stop(struct seq_file *seq, void *v)
2749        __releases(nl_table_lock)
2750{
2751        read_unlock(&nl_table_lock);
2752}
2753
2754
2755static int netlink_seq_show(struct seq_file *seq, void *v)
2756{
2757        if (v == SEQ_START_TOKEN) {
2758                seq_puts(seq,
2759                         "sk       Eth Pid    Groups   "
2760                         "Rmem     Wmem     Dump     Locks     Drops     Inode\n");
2761        } else {
2762                struct sock *s = v;
2763                struct netlink_sock *nlk = nlk_sk(s);
2764
2765                seq_printf(seq, "%pK %-3d %-6u %08x %-8d %-8d %pK %-8d %-8d %-8lu\n",
2766                           s,
2767                           s->sk_protocol,
2768                           nlk->portid,
2769                           nlk->groups ? (u32)nlk->groups[0] : 0,
2770                           sk_rmem_alloc_get(s),
2771                           sk_wmem_alloc_get(s),
2772                           nlk->cb,
2773                           atomic_read(&s->sk_refcnt),
2774                           atomic_read(&s->sk_drops),
2775                           sock_i_ino(s)
2776                        );
2777
2778        }
2779        return 0;
2780}
2781
2782static const struct seq_operations netlink_seq_ops = {
2783        .start  = netlink_seq_start,
2784        .next   = netlink_seq_next,
2785        .stop   = netlink_seq_stop,
2786        .show   = netlink_seq_show,
2787};
2788
2789
2790static int netlink_seq_open(struct inode *inode, struct file *file)
2791{
2792        return seq_open_net(inode, file, &netlink_seq_ops,
2793                                sizeof(struct nl_seq_iter));
2794}
2795
2796static const struct file_operations netlink_seq_fops = {
2797        .owner          = THIS_MODULE,
2798        .open           = netlink_seq_open,
2799        .read           = seq_read,
2800        .llseek         = seq_lseek,
2801        .release        = seq_release_net,
2802};
2803
2804#endif
2805
2806int netlink_register_notifier(struct notifier_block *nb)
2807{
2808        return atomic_notifier_chain_register(&netlink_chain, nb);
2809}
2810EXPORT_SYMBOL(netlink_register_notifier);
2811
2812int netlink_unregister_notifier(struct notifier_block *nb)
2813{
2814        return atomic_notifier_chain_unregister(&netlink_chain, nb);
2815}
2816EXPORT_SYMBOL(netlink_unregister_notifier);
2817
2818static const struct proto_ops netlink_ops = {
2819        .family =       PF_NETLINK,
2820        .owner =        THIS_MODULE,
2821        .release =      netlink_release,
2822        .bind =         netlink_bind,
2823        .connect =      netlink_connect,
2824        .socketpair =   sock_no_socketpair,
2825        .accept =       sock_no_accept,
2826        .getname =      netlink_getname,
2827        .poll =         netlink_poll,
2828        .ioctl =        sock_no_ioctl,
2829        .listen =       sock_no_listen,
2830        .shutdown =     sock_no_shutdown,
2831        .setsockopt =   netlink_setsockopt,
2832        .getsockopt =   netlink_getsockopt,
2833        .sendmsg =      netlink_sendmsg,
2834        .recvmsg =      netlink_recvmsg,
2835        .mmap =         netlink_mmap,
2836        .sendpage =     sock_no_sendpage,
2837};
2838
2839static const struct net_proto_family netlink_family_ops = {
2840        .family = PF_NETLINK,
2841        .create = netlink_create,
2842        .owner  = THIS_MODULE,  /* for consistency 8) */
2843};
2844
2845static int __net_init netlink_net_init(struct net *net)
2846{
2847#ifdef CONFIG_PROC_FS
2848        if (!proc_create("netlink", 0, net->proc_net, &netlink_seq_fops))
2849                return -ENOMEM;
2850#endif
2851        return 0;
2852}
2853
2854static void __net_exit netlink_net_exit(struct net *net)
2855{
2856#ifdef CONFIG_PROC_FS
2857        remove_proc_entry("netlink", net->proc_net);
2858#endif
2859}
2860
2861static void __init netlink_add_usersock_entry(void)
2862{
2863        struct listeners *listeners;
2864        int groups = 32;
2865
2866        listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
2867        if (!listeners)
2868                panic("netlink_add_usersock_entry: Cannot allocate listeners\n");
2869
2870        netlink_table_grab();
2871
2872        nl_table[NETLINK_USERSOCK].groups = groups;
2873        rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners);
2874        nl_table[NETLINK_USERSOCK].module = THIS_MODULE;
2875        nl_table[NETLINK_USERSOCK].registered = 1;
2876        nl_table[NETLINK_USERSOCK].flags = NL_CFG_F_NONROOT_SEND;
2877
2878        netlink_table_ungrab();
2879}
2880
2881static struct pernet_operations __net_initdata netlink_net_ops = {
2882        .init = netlink_net_init,
2883        .exit = netlink_net_exit,
2884};
2885
2886static int __init netlink_proto_init(void)
2887{
2888        int i;
2889        unsigned long limit;
2890        unsigned int order;
2891        int err = proto_register(&netlink_proto, 0);
2892
2893        if (err != 0)
2894                goto out;
2895
2896        BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2897
2898        nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL);
2899        if (!nl_table)
2900                goto panic;
2901
2902        if (totalram_pages >= (128 * 1024))
2903                limit = totalram_pages >> (21 - PAGE_SHIFT);
2904        else
2905                limit = totalram_pages >> (23 - PAGE_SHIFT);
2906
2907        order = get_bitmask_order(limit) - 1 + PAGE_SHIFT;
2908        limit = (1UL << order) / sizeof(struct hlist_head);
2909        order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1;
2910
2911        for (i = 0; i < MAX_LINKS; i++) {
2912                struct nl_portid_hash *hash = &nl_table[i].hash;
2913
2914                hash->table = nl_portid_hash_zalloc(1 * sizeof(*hash->table));
2915                if (!hash->table) {
2916                        while (i-- > 0)
2917                                nl_portid_hash_free(nl_table[i].hash.table,
2918                                                 1 * sizeof(*hash->table));
2919                        kfree(nl_table);
2920                        goto panic;
2921                }
2922                hash->max_shift = order;
2923                hash->shift = 0;
2924                hash->mask = 0;
2925                hash->rehash_time = jiffies;
2926        }
2927
2928        netlink_add_usersock_entry();
2929
2930        sock_register(&netlink_family_ops);
2931        register_pernet_subsys(&netlink_net_ops);
2932        /* The netlink device handler may be needed early. */
2933        rtnetlink_init();
2934out:
2935        return err;
2936panic:
2937        panic("netlink_init: Cannot allocate nl_table\n");
2938}
2939
2940core_initcall(netlink_proto_init);
2941