linux/net/netlink/af_netlink.c
<<
>>
Prefs
   1/*
   2 * NETLINK      Kernel-user communication protocol.
   3 *
   4 *              Authors:        Alan Cox <alan@lxorguk.ukuu.org.uk>
   5 *                              Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
   6 *
   7 *              This program is free software; you can redistribute it and/or
   8 *              modify it under the terms of the GNU General Public License
   9 *              as published by the Free Software Foundation; either version
  10 *              2 of the License, or (at your option) any later version.
  11 *
  12 * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith
  13 *                               added netlink_proto_exit
  14 * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br>
  15 *                               use nlk_sk, as sk->protinfo is on a diet 8)
  16 * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org>
  17 *                               - inc module use count of module that owns
  18 *                                 the kernel socket in case userspace opens
  19 *                                 socket of same protocol
  20 *                               - remove all module support, since netlink is
  21 *                                 mandatory if CONFIG_NET=y these days
  22 */
  23
  24#include <linux/module.h>
  25
  26#include <linux/capability.h>
  27#include <linux/kernel.h>
  28#include <linux/init.h>
  29#include <linux/signal.h>
  30#include <linux/sched.h>
  31#include <linux/errno.h>
  32#include <linux/string.h>
  33#include <linux/stat.h>
  34#include <linux/socket.h>
  35#include <linux/un.h>
  36#include <linux/fcntl.h>
  37#include <linux/termios.h>
  38#include <linux/sockios.h>
  39#include <linux/net.h>
  40#include <linux/fs.h>
  41#include <linux/slab.h>
  42#include <asm/uaccess.h>
  43#include <linux/skbuff.h>
  44#include <linux/netdevice.h>
  45#include <linux/rtnetlink.h>
  46#include <linux/proc_fs.h>
  47#include <linux/seq_file.h>
  48#include <linux/notifier.h>
  49#include <linux/security.h>
  50#include <linux/jhash.h>
  51#include <linux/jiffies.h>
  52#include <linux/random.h>
  53#include <linux/bitops.h>
  54#include <linux/mm.h>
  55#include <linux/types.h>
  56#include <linux/audit.h>
  57#include <linux/mutex.h>
  58
  59#include <net/net_namespace.h>
  60#include <net/sock.h>
  61#include <net/scm.h>
  62#include <net/netlink.h>
  63
  64#define NLGRPSZ(x)      (ALIGN(x, sizeof(unsigned long) * 8) / 8)
  65#define NLGRPLONGS(x)   (NLGRPSZ(x)/sizeof(unsigned long))
  66
  67struct netlink_sock {
  68        /* struct sock has to be the first member of netlink_sock */
  69        struct sock             sk;
  70        u32                     pid;
  71        u32                     dst_pid;
  72        u32                     dst_group;
  73        u32                     flags;
  74        u32                     subscriptions;
  75        u32                     ngroups;
  76        unsigned long           *groups;
  77        unsigned long           state;
  78        wait_queue_head_t       wait;
  79        struct netlink_callback *cb;
  80        struct mutex            *cb_mutex;
  81        struct mutex            cb_def_mutex;
  82        void                    (*netlink_rcv)(struct sk_buff *skb);
  83        struct module           *module;
  84};
  85
  86struct listeners {
  87        struct rcu_head         rcu;
  88        unsigned long           masks[0];
  89};
  90
  91#define NETLINK_KERNEL_SOCKET   0x1
  92#define NETLINK_RECV_PKTINFO    0x2
  93#define NETLINK_BROADCAST_SEND_ERROR    0x4
  94#define NETLINK_RECV_NO_ENOBUFS 0x8
  95
  96static inline struct netlink_sock *nlk_sk(struct sock *sk)
  97{
  98        return container_of(sk, struct netlink_sock, sk);
  99}
 100
 101static inline int netlink_is_kernel(struct sock *sk)
 102{
 103        return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET;
 104}
 105
 106struct nl_pid_hash {
 107        struct hlist_head *table;
 108        unsigned long rehash_time;
 109
 110        unsigned int mask;
 111        unsigned int shift;
 112
 113        unsigned int entries;
 114        unsigned int max_shift;
 115
 116        u32 rnd;
 117};
 118
 119struct netlink_table {
 120        struct nl_pid_hash hash;
 121        struct hlist_head mc_list;
 122        struct listeners __rcu *listeners;
 123        unsigned int nl_nonroot;
 124        unsigned int groups;
 125        struct mutex *cb_mutex;
 126        struct module *module;
 127        int registered;
 128};
 129
 130static struct netlink_table *nl_table;
 131
 132static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
 133
 134static int netlink_dump(struct sock *sk);
 135static void netlink_destroy_callback(struct netlink_callback *cb);
 136
 137static DEFINE_RWLOCK(nl_table_lock);
 138static atomic_t nl_table_users = ATOMIC_INIT(0);
 139
 140static ATOMIC_NOTIFIER_HEAD(netlink_chain);
 141
 142static u32 netlink_group_mask(u32 group)
 143{
 144        return group ? 1 << (group - 1) : 0;
 145}
 146
 147static struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid)
 148{
 149        return &hash->table[jhash_1word(pid, hash->rnd) & hash->mask];
 150}
 151
 152static void netlink_sock_destruct(struct sock *sk)
 153{
 154        struct netlink_sock *nlk = nlk_sk(sk);
 155
 156        if (nlk->cb) {
 157                if (nlk->cb->done)
 158                        nlk->cb->done(nlk->cb);
 159                netlink_destroy_callback(nlk->cb);
 160        }
 161
 162        skb_queue_purge(&sk->sk_receive_queue);
 163
 164        if (!sock_flag(sk, SOCK_DEAD)) {
 165                printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
 166                return;
 167        }
 168
 169        WARN_ON(atomic_read(&sk->sk_rmem_alloc));
 170        WARN_ON(atomic_read(&sk->sk_wmem_alloc));
 171        WARN_ON(nlk_sk(sk)->groups);
 172}
 173
 174/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
 175 * SMP. Look, when several writers sleep and reader wakes them up, all but one
 176 * immediately hit write lock and grab all the cpus. Exclusive sleep solves
 177 * this, _but_ remember, it adds useless work on UP machines.
 178 */
 179
 180void netlink_table_grab(void)
 181        __acquires(nl_table_lock)
 182{
 183        might_sleep();
 184
 185        write_lock_irq(&nl_table_lock);
 186
 187        if (atomic_read(&nl_table_users)) {
 188                DECLARE_WAITQUEUE(wait, current);
 189
 190                add_wait_queue_exclusive(&nl_table_wait, &wait);
 191                for (;;) {
 192                        set_current_state(TASK_UNINTERRUPTIBLE);
 193                        if (atomic_read(&nl_table_users) == 0)
 194                                break;
 195                        write_unlock_irq(&nl_table_lock);
 196                        schedule();
 197                        write_lock_irq(&nl_table_lock);
 198                }
 199
 200                __set_current_state(TASK_RUNNING);
 201                remove_wait_queue(&nl_table_wait, &wait);
 202        }
 203}
 204
 205void netlink_table_ungrab(void)
 206        __releases(nl_table_lock)
 207{
 208        write_unlock_irq(&nl_table_lock);
 209        wake_up(&nl_table_wait);
 210}
 211
 212static inline void
 213netlink_lock_table(void)
 214{
 215        /* read_lock() synchronizes us to netlink_table_grab */
 216
 217        read_lock(&nl_table_lock);
 218        atomic_inc(&nl_table_users);
 219        read_unlock(&nl_table_lock);
 220}
 221
 222static inline void
 223netlink_unlock_table(void)
 224{
 225        if (atomic_dec_and_test(&nl_table_users))
 226                wake_up(&nl_table_wait);
 227}
 228
 229static inline struct sock *netlink_lookup(struct net *net, int protocol,
 230                                          u32 pid)
 231{
 232        struct nl_pid_hash *hash = &nl_table[protocol].hash;
 233        struct hlist_head *head;
 234        struct sock *sk;
 235        struct hlist_node *node;
 236
 237        read_lock(&nl_table_lock);
 238        head = nl_pid_hashfn(hash, pid);
 239        sk_for_each(sk, node, head) {
 240                if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->pid == pid)) {
 241                        sock_hold(sk);
 242                        goto found;
 243                }
 244        }
 245        sk = NULL;
 246found:
 247        read_unlock(&nl_table_lock);
 248        return sk;
 249}
 250
 251static inline struct hlist_head *nl_pid_hash_zalloc(size_t size)
 252{
 253        if (size <= PAGE_SIZE)
 254                return kzalloc(size, GFP_ATOMIC);
 255        else
 256                return (struct hlist_head *)
 257                        __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
 258                                         get_order(size));
 259}
 260
 261static inline void nl_pid_hash_free(struct hlist_head *table, size_t size)
 262{
 263        if (size <= PAGE_SIZE)
 264                kfree(table);
 265        else
 266                free_pages((unsigned long)table, get_order(size));
 267}
 268
 269static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow)
 270{
 271        unsigned int omask, mask, shift;
 272        size_t osize, size;
 273        struct hlist_head *otable, *table;
 274        int i;
 275
 276        omask = mask = hash->mask;
 277        osize = size = (mask + 1) * sizeof(*table);
 278        shift = hash->shift;
 279
 280        if (grow) {
 281                if (++shift > hash->max_shift)
 282                        return 0;
 283                mask = mask * 2 + 1;
 284                size *= 2;
 285        }
 286
 287        table = nl_pid_hash_zalloc(size);
 288        if (!table)
 289                return 0;
 290
 291        otable = hash->table;
 292        hash->table = table;
 293        hash->mask = mask;
 294        hash->shift = shift;
 295        get_random_bytes(&hash->rnd, sizeof(hash->rnd));
 296
 297        for (i = 0; i <= omask; i++) {
 298                struct sock *sk;
 299                struct hlist_node *node, *tmp;
 300
 301                sk_for_each_safe(sk, node, tmp, &otable[i])
 302                        __sk_add_node(sk, nl_pid_hashfn(hash, nlk_sk(sk)->pid));
 303        }
 304
 305        nl_pid_hash_free(otable, osize);
 306        hash->rehash_time = jiffies + 10 * 60 * HZ;
 307        return 1;
 308}
 309
 310static inline int nl_pid_hash_dilute(struct nl_pid_hash *hash, int len)
 311{
 312        int avg = hash->entries >> hash->shift;
 313
 314        if (unlikely(avg > 1) && nl_pid_hash_rehash(hash, 1))
 315                return 1;
 316
 317        if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) {
 318                nl_pid_hash_rehash(hash, 0);
 319                return 1;
 320        }
 321
 322        return 0;
 323}
 324
 325static const struct proto_ops netlink_ops;
 326
 327static void
 328netlink_update_listeners(struct sock *sk)
 329{
 330        struct netlink_table *tbl = &nl_table[sk->sk_protocol];
 331        struct hlist_node *node;
 332        unsigned long mask;
 333        unsigned int i;
 334
 335        for (i = 0; i < NLGRPLONGS(tbl->groups); i++) {
 336                mask = 0;
 337                sk_for_each_bound(sk, node, &tbl->mc_list) {
 338                        if (i < NLGRPLONGS(nlk_sk(sk)->ngroups))
 339                                mask |= nlk_sk(sk)->groups[i];
 340                }
 341                tbl->listeners->masks[i] = mask;
 342        }
 343        /* this function is only called with the netlink table "grabbed", which
 344         * makes sure updates are visible before bind or setsockopt return. */
 345}
 346
 347static int netlink_insert(struct sock *sk, struct net *net, u32 pid)
 348{
 349        struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash;
 350        struct hlist_head *head;
 351        int err = -EADDRINUSE;
 352        struct sock *osk;
 353        struct hlist_node *node;
 354        int len;
 355
 356        netlink_table_grab();
 357        head = nl_pid_hashfn(hash, pid);
 358        len = 0;
 359        sk_for_each(osk, node, head) {
 360                if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->pid == pid))
 361                        break;
 362                len++;
 363        }
 364        if (node)
 365                goto err;
 366
 367        err = -EBUSY;
 368        if (nlk_sk(sk)->pid)
 369                goto err;
 370
 371        err = -ENOMEM;
 372        if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX))
 373                goto err;
 374
 375        if (len && nl_pid_hash_dilute(hash, len))
 376                head = nl_pid_hashfn(hash, pid);
 377        hash->entries++;
 378        nlk_sk(sk)->pid = pid;
 379        sk_add_node(sk, head);
 380        err = 0;
 381
 382err:
 383        netlink_table_ungrab();
 384        return err;
 385}
 386
 387static void netlink_remove(struct sock *sk)
 388{
 389        netlink_table_grab();
 390        if (sk_del_node_init(sk))
 391                nl_table[sk->sk_protocol].hash.entries--;
 392        if (nlk_sk(sk)->subscriptions)
 393                __sk_del_bind_node(sk);
 394        netlink_table_ungrab();
 395}
 396
 397static struct proto netlink_proto = {
 398        .name     = "NETLINK",
 399        .owner    = THIS_MODULE,
 400        .obj_size = sizeof(struct netlink_sock),
 401};
 402
 403static int __netlink_create(struct net *net, struct socket *sock,
 404                            struct mutex *cb_mutex, int protocol)
 405{
 406        struct sock *sk;
 407        struct netlink_sock *nlk;
 408
 409        sock->ops = &netlink_ops;
 410
 411        sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto);
 412        if (!sk)
 413                return -ENOMEM;
 414
 415        sock_init_data(sock, sk);
 416
 417        nlk = nlk_sk(sk);
 418        if (cb_mutex)
 419                nlk->cb_mutex = cb_mutex;
 420        else {
 421                nlk->cb_mutex = &nlk->cb_def_mutex;
 422                mutex_init(nlk->cb_mutex);
 423        }
 424        init_waitqueue_head(&nlk->wait);
 425
 426        sk->sk_destruct = netlink_sock_destruct;
 427        sk->sk_protocol = protocol;
 428        return 0;
 429}
 430
 431static int netlink_create(struct net *net, struct socket *sock, int protocol,
 432                          int kern)
 433{
 434        struct module *module = NULL;
 435        struct mutex *cb_mutex;
 436        struct netlink_sock *nlk;
 437        int err = 0;
 438
 439        sock->state = SS_UNCONNECTED;
 440
 441        if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
 442                return -ESOCKTNOSUPPORT;
 443
 444        if (protocol < 0 || protocol >= MAX_LINKS)
 445                return -EPROTONOSUPPORT;
 446
 447        netlink_lock_table();
 448#ifdef CONFIG_MODULES
 449        if (!nl_table[protocol].registered) {
 450                netlink_unlock_table();
 451                request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol);
 452                netlink_lock_table();
 453        }
 454#endif
 455        if (nl_table[protocol].registered &&
 456            try_module_get(nl_table[protocol].module))
 457                module = nl_table[protocol].module;
 458        else
 459                err = -EPROTONOSUPPORT;
 460        cb_mutex = nl_table[protocol].cb_mutex;
 461        netlink_unlock_table();
 462
 463        if (err < 0)
 464                goto out;
 465
 466        err = __netlink_create(net, sock, cb_mutex, protocol);
 467        if (err < 0)
 468                goto out_module;
 469
 470        local_bh_disable();
 471        sock_prot_inuse_add(net, &netlink_proto, 1);
 472        local_bh_enable();
 473
 474        nlk = nlk_sk(sock->sk);
 475        nlk->module = module;
 476out:
 477        return err;
 478
 479out_module:
 480        module_put(module);
 481        goto out;
 482}
 483
 484static int netlink_release(struct socket *sock)
 485{
 486        struct sock *sk = sock->sk;
 487        struct netlink_sock *nlk;
 488
 489        if (!sk)
 490                return 0;
 491
 492        netlink_remove(sk);
 493        sock_orphan(sk);
 494        nlk = nlk_sk(sk);
 495
 496        /*
 497         * OK. Socket is unlinked, any packets that arrive now
 498         * will be purged.
 499         */
 500
 501        sock->sk = NULL;
 502        wake_up_interruptible_all(&nlk->wait);
 503
 504        skb_queue_purge(&sk->sk_write_queue);
 505
 506        if (nlk->pid) {
 507                struct netlink_notify n = {
 508                                                .net = sock_net(sk),
 509                                                .protocol = sk->sk_protocol,
 510                                                .pid = nlk->pid,
 511                                          };
 512                atomic_notifier_call_chain(&netlink_chain,
 513                                NETLINK_URELEASE, &n);
 514        }
 515
 516        module_put(nlk->module);
 517
 518        netlink_table_grab();
 519        if (netlink_is_kernel(sk)) {
 520                BUG_ON(nl_table[sk->sk_protocol].registered == 0);
 521                if (--nl_table[sk->sk_protocol].registered == 0) {
 522                        kfree(nl_table[sk->sk_protocol].listeners);
 523                        nl_table[sk->sk_protocol].module = NULL;
 524                        nl_table[sk->sk_protocol].registered = 0;
 525                }
 526        } else if (nlk->subscriptions)
 527                netlink_update_listeners(sk);
 528        netlink_table_ungrab();
 529
 530        kfree(nlk->groups);
 531        nlk->groups = NULL;
 532
 533        local_bh_disable();
 534        sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
 535        local_bh_enable();
 536        sock_put(sk);
 537        return 0;
 538}
 539
 540static int netlink_autobind(struct socket *sock)
 541{
 542        struct sock *sk = sock->sk;
 543        struct net *net = sock_net(sk);
 544        struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash;
 545        struct hlist_head *head;
 546        struct sock *osk;
 547        struct hlist_node *node;
 548        s32 pid = task_tgid_vnr(current);
 549        int err;
 550        static s32 rover = -4097;
 551
 552retry:
 553        cond_resched();
 554        netlink_table_grab();
 555        head = nl_pid_hashfn(hash, pid);
 556        sk_for_each(osk, node, head) {
 557                if (!net_eq(sock_net(osk), net))
 558                        continue;
 559                if (nlk_sk(osk)->pid == pid) {
 560                        /* Bind collision, search negative pid values. */
 561                        pid = rover--;
 562                        if (rover > -4097)
 563                                rover = -4097;
 564                        netlink_table_ungrab();
 565                        goto retry;
 566                }
 567        }
 568        netlink_table_ungrab();
 569
 570        err = netlink_insert(sk, net, pid);
 571        if (err == -EADDRINUSE)
 572                goto retry;
 573
 574        /* If 2 threads race to autobind, that is fine.  */
 575        if (err == -EBUSY)
 576                err = 0;
 577
 578        return err;
 579}
 580
 581static inline int netlink_capable(struct socket *sock, unsigned int flag)
 582{
 583        return (nl_table[sock->sk->sk_protocol].nl_nonroot & flag) ||
 584               capable(CAP_NET_ADMIN);
 585}
 586
 587static void
 588netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions)
 589{
 590        struct netlink_sock *nlk = nlk_sk(sk);
 591
 592        if (nlk->subscriptions && !subscriptions)
 593                __sk_del_bind_node(sk);
 594        else if (!nlk->subscriptions && subscriptions)
 595                sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list);
 596        nlk->subscriptions = subscriptions;
 597}
 598
 599static int netlink_realloc_groups(struct sock *sk)
 600{
 601        struct netlink_sock *nlk = nlk_sk(sk);
 602        unsigned int groups;
 603        unsigned long *new_groups;
 604        int err = 0;
 605
 606        netlink_table_grab();
 607
 608        groups = nl_table[sk->sk_protocol].groups;
 609        if (!nl_table[sk->sk_protocol].registered) {
 610                err = -ENOENT;
 611                goto out_unlock;
 612        }
 613
 614        if (nlk->ngroups >= groups)
 615                goto out_unlock;
 616
 617        new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC);
 618        if (new_groups == NULL) {
 619                err = -ENOMEM;
 620                goto out_unlock;
 621        }
 622        memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0,
 623               NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups));
 624
 625        nlk->groups = new_groups;
 626        nlk->ngroups = groups;
 627 out_unlock:
 628        netlink_table_ungrab();
 629        return err;
 630}
 631
 632static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 633                        int addr_len)
 634{
 635        struct sock *sk = sock->sk;
 636        struct net *net = sock_net(sk);
 637        struct netlink_sock *nlk = nlk_sk(sk);
 638        struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
 639        int err;
 640
 641        if (nladdr->nl_family != AF_NETLINK)
 642                return -EINVAL;
 643
 644        /* Only superuser is allowed to listen multicasts */
 645        if (nladdr->nl_groups) {
 646                if (!netlink_capable(sock, NL_NONROOT_RECV))
 647                        return -EPERM;
 648                err = netlink_realloc_groups(sk);
 649                if (err)
 650                        return err;
 651        }
 652
 653        if (nlk->pid) {
 654                if (nladdr->nl_pid != nlk->pid)
 655                        return -EINVAL;
 656        } else {
 657                err = nladdr->nl_pid ?
 658                        netlink_insert(sk, net, nladdr->nl_pid) :
 659                        netlink_autobind(sock);
 660                if (err)
 661                        return err;
 662        }
 663
 664        if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0]))
 665                return 0;
 666
 667        netlink_table_grab();
 668        netlink_update_subscriptions(sk, nlk->subscriptions +
 669                                         hweight32(nladdr->nl_groups) -
 670                                         hweight32(nlk->groups[0]));
 671        nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups;
 672        netlink_update_listeners(sk);
 673        netlink_table_ungrab();
 674
 675        return 0;
 676}
 677
 678static int netlink_connect(struct socket *sock, struct sockaddr *addr,
 679                           int alen, int flags)
 680{
 681        int err = 0;
 682        struct sock *sk = sock->sk;
 683        struct netlink_sock *nlk = nlk_sk(sk);
 684        struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
 685
 686        if (alen < sizeof(addr->sa_family))
 687                return -EINVAL;
 688
 689        if (addr->sa_family == AF_UNSPEC) {
 690                sk->sk_state    = NETLINK_UNCONNECTED;
 691                nlk->dst_pid    = 0;
 692                nlk->dst_group  = 0;
 693                return 0;
 694        }
 695        if (addr->sa_family != AF_NETLINK)
 696                return -EINVAL;
 697
 698        /* Only superuser is allowed to send multicasts */
 699        if (nladdr->nl_groups && !netlink_capable(sock, NL_NONROOT_SEND))
 700                return -EPERM;
 701
 702        if (!nlk->pid)
 703                err = netlink_autobind(sock);
 704
 705        if (err == 0) {
 706                sk->sk_state    = NETLINK_CONNECTED;
 707                nlk->dst_pid    = nladdr->nl_pid;
 708                nlk->dst_group  = ffs(nladdr->nl_groups);
 709        }
 710
 711        return err;
 712}
 713
 714static int netlink_getname(struct socket *sock, struct sockaddr *addr,
 715                           int *addr_len, int peer)
 716{
 717        struct sock *sk = sock->sk;
 718        struct netlink_sock *nlk = nlk_sk(sk);
 719        DECLARE_SOCKADDR(struct sockaddr_nl *, nladdr, addr);
 720
 721        nladdr->nl_family = AF_NETLINK;
 722        nladdr->nl_pad = 0;
 723        *addr_len = sizeof(*nladdr);
 724
 725        if (peer) {
 726                nladdr->nl_pid = nlk->dst_pid;
 727                nladdr->nl_groups = netlink_group_mask(nlk->dst_group);
 728        } else {
 729                nladdr->nl_pid = nlk->pid;
 730                nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
 731        }
 732        return 0;
 733}
 734
 735static void netlink_overrun(struct sock *sk)
 736{
 737        struct netlink_sock *nlk = nlk_sk(sk);
 738
 739        if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) {
 740                if (!test_and_set_bit(0, &nlk_sk(sk)->state)) {
 741                        sk->sk_err = ENOBUFS;
 742                        sk->sk_error_report(sk);
 743                }
 744        }
 745        atomic_inc(&sk->sk_drops);
 746}
 747
 748static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid)
 749{
 750        struct sock *sock;
 751        struct netlink_sock *nlk;
 752
 753        sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, pid);
 754        if (!sock)
 755                return ERR_PTR(-ECONNREFUSED);
 756
 757        /* Don't bother queuing skb if kernel socket has no input function */
 758        nlk = nlk_sk(sock);
 759        if (sock->sk_state == NETLINK_CONNECTED &&
 760            nlk->dst_pid != nlk_sk(ssk)->pid) {
 761                sock_put(sock);
 762                return ERR_PTR(-ECONNREFUSED);
 763        }
 764        return sock;
 765}
 766
 767struct sock *netlink_getsockbyfilp(struct file *filp)
 768{
 769        struct inode *inode = filp->f_path.dentry->d_inode;
 770        struct sock *sock;
 771
 772        if (!S_ISSOCK(inode->i_mode))
 773                return ERR_PTR(-ENOTSOCK);
 774
 775        sock = SOCKET_I(inode)->sk;
 776        if (sock->sk_family != AF_NETLINK)
 777                return ERR_PTR(-EINVAL);
 778
 779        sock_hold(sock);
 780        return sock;
 781}
 782
 783/*
 784 * Attach a skb to a netlink socket.
 785 * The caller must hold a reference to the destination socket. On error, the
 786 * reference is dropped. The skb is not send to the destination, just all
 787 * all error checks are performed and memory in the queue is reserved.
 788 * Return values:
 789 * < 0: error. skb freed, reference to sock dropped.
 790 * 0: continue
 791 * 1: repeat lookup - reference dropped while waiting for socket memory.
 792 */
 793int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
 794                      long *timeo, struct sock *ssk)
 795{
 796        struct netlink_sock *nlk;
 797
 798        nlk = nlk_sk(sk);
 799
 800        if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
 801            test_bit(0, &nlk->state)) {
 802                DECLARE_WAITQUEUE(wait, current);
 803                if (!*timeo) {
 804                        if (!ssk || netlink_is_kernel(ssk))
 805                                netlink_overrun(sk);
 806                        sock_put(sk);
 807                        kfree_skb(skb);
 808                        return -EAGAIN;
 809                }
 810
 811                __set_current_state(TASK_INTERRUPTIBLE);
 812                add_wait_queue(&nlk->wait, &wait);
 813
 814                if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
 815                     test_bit(0, &nlk->state)) &&
 816                    !sock_flag(sk, SOCK_DEAD))
 817                        *timeo = schedule_timeout(*timeo);
 818
 819                __set_current_state(TASK_RUNNING);
 820                remove_wait_queue(&nlk->wait, &wait);
 821                sock_put(sk);
 822
 823                if (signal_pending(current)) {
 824                        kfree_skb(skb);
 825                        return sock_intr_errno(*timeo);
 826                }
 827                return 1;
 828        }
 829        skb_set_owner_r(skb, sk);
 830        return 0;
 831}
 832
 833int netlink_sendskb(struct sock *sk, struct sk_buff *skb)
 834{
 835        int len = skb->len;
 836
 837        skb_queue_tail(&sk->sk_receive_queue, skb);
 838        sk->sk_data_ready(sk, len);
 839        sock_put(sk);
 840        return len;
 841}
 842
 843void netlink_detachskb(struct sock *sk, struct sk_buff *skb)
 844{
 845        kfree_skb(skb);
 846        sock_put(sk);
 847}
 848
 849static inline struct sk_buff *netlink_trim(struct sk_buff *skb,
 850                                           gfp_t allocation)
 851{
 852        int delta;
 853
 854        skb_orphan(skb);
 855
 856        delta = skb->end - skb->tail;
 857        if (delta * 2 < skb->truesize)
 858                return skb;
 859
 860        if (skb_shared(skb)) {
 861                struct sk_buff *nskb = skb_clone(skb, allocation);
 862                if (!nskb)
 863                        return skb;
 864                kfree_skb(skb);
 865                skb = nskb;
 866        }
 867
 868        if (!pskb_expand_head(skb, 0, -delta, allocation))
 869                skb->truesize -= delta;
 870
 871        return skb;
 872}
 873
 874static inline void netlink_rcv_wake(struct sock *sk)
 875{
 876        struct netlink_sock *nlk = nlk_sk(sk);
 877
 878        if (skb_queue_empty(&sk->sk_receive_queue))
 879                clear_bit(0, &nlk->state);
 880        if (!test_bit(0, &nlk->state))
 881                wake_up_interruptible(&nlk->wait);
 882}
 883
 884static inline int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb)
 885{
 886        int ret;
 887        struct netlink_sock *nlk = nlk_sk(sk);
 888
 889        ret = -ECONNREFUSED;
 890        if (nlk->netlink_rcv != NULL) {
 891                ret = skb->len;
 892                skb_set_owner_r(skb, sk);
 893                nlk->netlink_rcv(skb);
 894        }
 895        kfree_skb(skb);
 896        sock_put(sk);
 897        return ret;
 898}
 899
 900int netlink_unicast(struct sock *ssk, struct sk_buff *skb,
 901                    u32 pid, int nonblock)
 902{
 903        struct sock *sk;
 904        int err;
 905        long timeo;
 906
 907        skb = netlink_trim(skb, gfp_any());
 908
 909        timeo = sock_sndtimeo(ssk, nonblock);
 910retry:
 911        sk = netlink_getsockbypid(ssk, pid);
 912        if (IS_ERR(sk)) {
 913                kfree_skb(skb);
 914                return PTR_ERR(sk);
 915        }
 916        if (netlink_is_kernel(sk))
 917                return netlink_unicast_kernel(sk, skb);
 918
 919        if (sk_filter(sk, skb)) {
 920                err = skb->len;
 921                kfree_skb(skb);
 922                sock_put(sk);
 923                return err;
 924        }
 925
 926        err = netlink_attachskb(sk, skb, &timeo, ssk);
 927        if (err == 1)
 928                goto retry;
 929        if (err)
 930                return err;
 931
 932        return netlink_sendskb(sk, skb);
 933}
 934EXPORT_SYMBOL(netlink_unicast);
 935
 936int netlink_has_listeners(struct sock *sk, unsigned int group)
 937{
 938        int res = 0;
 939        struct listeners *listeners;
 940
 941        BUG_ON(!netlink_is_kernel(sk));
 942
 943        rcu_read_lock();
 944        listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners);
 945
 946        if (group - 1 < nl_table[sk->sk_protocol].groups)
 947                res = test_bit(group - 1, listeners->masks);
 948
 949        rcu_read_unlock();
 950
 951        return res;
 952}
 953EXPORT_SYMBOL_GPL(netlink_has_listeners);
 954
 955static inline int netlink_broadcast_deliver(struct sock *sk,
 956                                            struct sk_buff *skb)
 957{
 958        struct netlink_sock *nlk = nlk_sk(sk);
 959
 960        if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
 961            !test_bit(0, &nlk->state)) {
 962                skb_set_owner_r(skb, sk);
 963                skb_queue_tail(&sk->sk_receive_queue, skb);
 964                sk->sk_data_ready(sk, skb->len);
 965                return atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf;
 966        }
 967        return -1;
 968}
 969
 970struct netlink_broadcast_data {
 971        struct sock *exclude_sk;
 972        struct net *net;
 973        u32 pid;
 974        u32 group;
 975        int failure;
 976        int delivery_failure;
 977        int congested;
 978        int delivered;
 979        gfp_t allocation;
 980        struct sk_buff *skb, *skb2;
 981        int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data);
 982        void *tx_data;
 983};
 984
 985static inline int do_one_broadcast(struct sock *sk,
 986                                   struct netlink_broadcast_data *p)
 987{
 988        struct netlink_sock *nlk = nlk_sk(sk);
 989        int val;
 990
 991        if (p->exclude_sk == sk)
 992                goto out;
 993
 994        if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups ||
 995            !test_bit(p->group - 1, nlk->groups))
 996                goto out;
 997
 998        if (!net_eq(sock_net(sk), p->net))
 999                goto out;
1000
1001        if (p->failure) {
1002                netlink_overrun(sk);
1003                goto out;
1004        }
1005
1006        sock_hold(sk);
1007        if (p->skb2 == NULL) {
1008                if (skb_shared(p->skb)) {
1009                        p->skb2 = skb_clone(p->skb, p->allocation);
1010                } else {
1011                        p->skb2 = skb_get(p->skb);
1012                        /*
1013                         * skb ownership may have been set when
1014                         * delivered to a previous socket.
1015                         */
1016                        skb_orphan(p->skb2);
1017                }
1018        }
1019        if (p->skb2 == NULL) {
1020                netlink_overrun(sk);
1021                /* Clone failed. Notify ALL listeners. */
1022                p->failure = 1;
1023                if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
1024                        p->delivery_failure = 1;
1025        } else if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
1026                kfree_skb(p->skb2);
1027                p->skb2 = NULL;
1028        } else if (sk_filter(sk, p->skb2)) {
1029                kfree_skb(p->skb2);
1030                p->skb2 = NULL;
1031        } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) {
1032                netlink_overrun(sk);
1033                if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
1034                        p->delivery_failure = 1;
1035        } else {
1036                p->congested |= val;
1037                p->delivered = 1;
1038                p->skb2 = NULL;
1039        }
1040        sock_put(sk);
1041
1042out:
1043        return 0;
1044}
1045
1046int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid,
1047        u32 group, gfp_t allocation,
1048        int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
1049        void *filter_data)
1050{
1051        struct net *net = sock_net(ssk);
1052        struct netlink_broadcast_data info;
1053        struct hlist_node *node;
1054        struct sock *sk;
1055
1056        skb = netlink_trim(skb, allocation);
1057
1058        info.exclude_sk = ssk;
1059        info.net = net;
1060        info.pid = pid;
1061        info.group = group;
1062        info.failure = 0;
1063        info.delivery_failure = 0;
1064        info.congested = 0;
1065        info.delivered = 0;
1066        info.allocation = allocation;
1067        info.skb = skb;
1068        info.skb2 = NULL;
1069        info.tx_filter = filter;
1070        info.tx_data = filter_data;
1071
1072        /* While we sleep in clone, do not allow to change socket list */
1073
1074        netlink_lock_table();
1075
1076        sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
1077                do_one_broadcast(sk, &info);
1078
1079        consume_skb(skb);
1080
1081        netlink_unlock_table();
1082
1083        if (info.delivery_failure) {
1084                kfree_skb(info.skb2);
1085                return -ENOBUFS;
1086        } else
1087                consume_skb(info.skb2);
1088
1089        if (info.delivered) {
1090                if (info.congested && (allocation & __GFP_WAIT))
1091                        yield();
1092                return 0;
1093        }
1094        return -ESRCH;
1095}
1096EXPORT_SYMBOL(netlink_broadcast_filtered);
1097
1098int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
1099                      u32 group, gfp_t allocation)
1100{
1101        return netlink_broadcast_filtered(ssk, skb, pid, group, allocation,
1102                NULL, NULL);
1103}
1104EXPORT_SYMBOL(netlink_broadcast);
1105
1106struct netlink_set_err_data {
1107        struct sock *exclude_sk;
1108        u32 pid;
1109        u32 group;
1110        int code;
1111};
1112
1113static inline int do_one_set_err(struct sock *sk,
1114                                 struct netlink_set_err_data *p)
1115{
1116        struct netlink_sock *nlk = nlk_sk(sk);
1117        int ret = 0;
1118
1119        if (sk == p->exclude_sk)
1120                goto out;
1121
1122        if (!net_eq(sock_net(sk), sock_net(p->exclude_sk)))
1123                goto out;
1124
1125        if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups ||
1126            !test_bit(p->group - 1, nlk->groups))
1127                goto out;
1128
1129        if (p->code == ENOBUFS && nlk->flags & NETLINK_RECV_NO_ENOBUFS) {
1130                ret = 1;
1131                goto out;
1132        }
1133
1134        sk->sk_err = p->code;
1135        sk->sk_error_report(sk);
1136out:
1137        return ret;
1138}
1139
1140/**
1141 * netlink_set_err - report error to broadcast listeners
1142 * @ssk: the kernel netlink socket, as returned by netlink_kernel_create()
1143 * @pid: the PID of a process that we want to skip (if any)
1144 * @groups: the broadcast group that will notice the error
1145 * @code: error code, must be negative (as usual in kernelspace)
1146 *
1147 * This function returns the number of broadcast listeners that have set the
1148 * NETLINK_RECV_NO_ENOBUFS socket option.
1149 */
1150int netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
1151{
1152        struct netlink_set_err_data info;
1153        struct hlist_node *node;
1154        struct sock *sk;
1155        int ret = 0;
1156
1157        info.exclude_sk = ssk;
1158        info.pid = pid;
1159        info.group = group;
1160        /* sk->sk_err wants a positive error value */
1161        info.code = -code;
1162
1163        read_lock(&nl_table_lock);
1164
1165        sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
1166                ret += do_one_set_err(sk, &info);
1167
1168        read_unlock(&nl_table_lock);
1169        return ret;
1170}
1171EXPORT_SYMBOL(netlink_set_err);
1172
1173/* must be called with netlink table grabbed */
1174static void netlink_update_socket_mc(struct netlink_sock *nlk,
1175                                     unsigned int group,
1176                                     int is_new)
1177{
1178        int old, new = !!is_new, subscriptions;
1179
1180        old = test_bit(group - 1, nlk->groups);
1181        subscriptions = nlk->subscriptions - old + new;
1182        if (new)
1183                __set_bit(group - 1, nlk->groups);
1184        else
1185                __clear_bit(group - 1, nlk->groups);
1186        netlink_update_subscriptions(&nlk->sk, subscriptions);
1187        netlink_update_listeners(&nlk->sk);
1188}
1189
1190static int netlink_setsockopt(struct socket *sock, int level, int optname,
1191                              char __user *optval, unsigned int optlen)
1192{
1193        struct sock *sk = sock->sk;
1194        struct netlink_sock *nlk = nlk_sk(sk);
1195        unsigned int val = 0;
1196        int err;
1197
1198        if (level != SOL_NETLINK)
1199                return -ENOPROTOOPT;
1200
1201        if (optlen >= sizeof(int) &&
1202            get_user(val, (unsigned int __user *)optval))
1203                return -EFAULT;
1204
1205        switch (optname) {
1206        case NETLINK_PKTINFO:
1207                if (val)
1208                        nlk->flags |= NETLINK_RECV_PKTINFO;
1209                else
1210                        nlk->flags &= ~NETLINK_RECV_PKTINFO;
1211                err = 0;
1212                break;
1213        case NETLINK_ADD_MEMBERSHIP:
1214        case NETLINK_DROP_MEMBERSHIP: {
1215                if (!netlink_capable(sock, NL_NONROOT_RECV))
1216                        return -EPERM;
1217                err = netlink_realloc_groups(sk);
1218                if (err)
1219                        return err;
1220                if (!val || val - 1 >= nlk->ngroups)
1221                        return -EINVAL;
1222                netlink_table_grab();
1223                netlink_update_socket_mc(nlk, val,
1224                                         optname == NETLINK_ADD_MEMBERSHIP);
1225                netlink_table_ungrab();
1226                err = 0;
1227                break;
1228        }
1229        case NETLINK_BROADCAST_ERROR:
1230                if (val)
1231                        nlk->flags |= NETLINK_BROADCAST_SEND_ERROR;
1232                else
1233                        nlk->flags &= ~NETLINK_BROADCAST_SEND_ERROR;
1234                err = 0;
1235                break;
1236        case NETLINK_NO_ENOBUFS:
1237                if (val) {
1238                        nlk->flags |= NETLINK_RECV_NO_ENOBUFS;
1239                        clear_bit(0, &nlk->state);
1240                        wake_up_interruptible(&nlk->wait);
1241                } else
1242                        nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS;
1243                err = 0;
1244                break;
1245        default:
1246                err = -ENOPROTOOPT;
1247        }
1248        return err;
1249}
1250
1251static int netlink_getsockopt(struct socket *sock, int level, int optname,
1252                              char __user *optval, int __user *optlen)
1253{
1254        struct sock *sk = sock->sk;
1255        struct netlink_sock *nlk = nlk_sk(sk);
1256        int len, val, err;
1257
1258        if (level != SOL_NETLINK)
1259                return -ENOPROTOOPT;
1260
1261        if (get_user(len, optlen))
1262                return -EFAULT;
1263        if (len < 0)
1264                return -EINVAL;
1265
1266        switch (optname) {
1267        case NETLINK_PKTINFO:
1268                if (len < sizeof(int))
1269                        return -EINVAL;
1270                len = sizeof(int);
1271                val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0;
1272                if (put_user(len, optlen) ||
1273                    put_user(val, optval))
1274                        return -EFAULT;
1275                err = 0;
1276                break;
1277        case NETLINK_BROADCAST_ERROR:
1278                if (len < sizeof(int))
1279                        return -EINVAL;
1280                len = sizeof(int);
1281                val = nlk->flags & NETLINK_BROADCAST_SEND_ERROR ? 1 : 0;
1282                if (put_user(len, optlen) ||
1283                    put_user(val, optval))
1284                        return -EFAULT;
1285                err = 0;
1286                break;
1287        case NETLINK_NO_ENOBUFS:
1288                if (len < sizeof(int))
1289                        return -EINVAL;
1290                len = sizeof(int);
1291                val = nlk->flags & NETLINK_RECV_NO_ENOBUFS ? 1 : 0;
1292                if (put_user(len, optlen) ||
1293                    put_user(val, optval))
1294                        return -EFAULT;
1295                err = 0;
1296                break;
1297        default:
1298                err = -ENOPROTOOPT;
1299        }
1300        return err;
1301}
1302
1303static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
1304{
1305        struct nl_pktinfo info;
1306
1307        info.group = NETLINK_CB(skb).dst_group;
1308        put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info);
1309}
1310
1311static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
1312                           struct msghdr *msg, size_t len)
1313{
1314        struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1315        struct sock *sk = sock->sk;
1316        struct netlink_sock *nlk = nlk_sk(sk);
1317        struct sockaddr_nl *addr = msg->msg_name;
1318        u32 dst_pid;
1319        u32 dst_group;
1320        struct sk_buff *skb;
1321        int err;
1322        struct scm_cookie scm;
1323
1324        if (msg->msg_flags&MSG_OOB)
1325                return -EOPNOTSUPP;
1326
1327        if (NULL == siocb->scm) {
1328                siocb->scm = &scm;
1329                memset(&scm, 0, sizeof(scm));
1330        }
1331        err = scm_send(sock, msg, siocb->scm);
1332        if (err < 0)
1333                return err;
1334
1335        if (msg->msg_namelen) {
1336                err = -EINVAL;
1337                if (addr->nl_family != AF_NETLINK)
1338                        goto out;
1339                dst_pid = addr->nl_pid;
1340                dst_group = ffs(addr->nl_groups);
1341                err =  -EPERM;
1342                if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND))
1343                        goto out;
1344        } else {
1345                dst_pid = nlk->dst_pid;
1346                dst_group = nlk->dst_group;
1347        }
1348
1349        if (!nlk->pid) {
1350                err = netlink_autobind(sock);
1351                if (err)
1352                        goto out;
1353        }
1354
1355        err = -EMSGSIZE;
1356        if (len > sk->sk_sndbuf - 32)
1357                goto out;
1358        err = -ENOBUFS;
1359        skb = alloc_skb(len, GFP_KERNEL);
1360        if (skb == NULL)
1361                goto out;
1362
1363        NETLINK_CB(skb).pid     = nlk->pid;
1364        NETLINK_CB(skb).dst_group = dst_group;
1365        NETLINK_CB(skb).loginuid = audit_get_loginuid(current);
1366        NETLINK_CB(skb).sessionid = audit_get_sessionid(current);
1367        security_task_getsecid(current, &(NETLINK_CB(skb).sid));
1368        memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1369
1370        /* What can I do? Netlink is asynchronous, so that
1371           we will have to save current capabilities to
1372           check them, when this message will be delivered
1373           to corresponding kernel module.   --ANK (980802)
1374         */
1375
1376        err = -EFAULT;
1377        if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
1378                kfree_skb(skb);
1379                goto out;
1380        }
1381
1382        err = security_netlink_send(sk, skb);
1383        if (err) {
1384                kfree_skb(skb);
1385                goto out;
1386        }
1387
1388        if (dst_group) {
1389                atomic_inc(&skb->users);
1390                netlink_broadcast(sk, skb, dst_pid, dst_group, GFP_KERNEL);
1391        }
1392        err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT);
1393
1394out:
1395        scm_destroy(siocb->scm);
1396        return err;
1397}
1398
1399static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
1400                           struct msghdr *msg, size_t len,
1401                           int flags)
1402{
1403        struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1404        struct scm_cookie scm;
1405        struct sock *sk = sock->sk;
1406        struct netlink_sock *nlk = nlk_sk(sk);
1407        int noblock = flags&MSG_DONTWAIT;
1408        size_t copied;
1409        struct sk_buff *skb, *data_skb;
1410        int err, ret;
1411
1412        if (flags&MSG_OOB)
1413                return -EOPNOTSUPP;
1414
1415        copied = 0;
1416
1417        skb = skb_recv_datagram(sk, flags, noblock, &err);
1418        if (skb == NULL)
1419                goto out;
1420
1421        data_skb = skb;
1422
1423#ifdef CONFIG_COMPAT_NETLINK_MESSAGES
1424        if (unlikely(skb_shinfo(skb)->frag_list)) {
1425                /*
1426                 * If this skb has a frag_list, then here that means that we
1427                 * will have to use the frag_list skb's data for compat tasks
1428                 * and the regular skb's data for normal (non-compat) tasks.
1429                 *
1430                 * If we need to send the compat skb, assign it to the
1431                 * 'data_skb' variable so that it will be used below for data
1432                 * copying. We keep 'skb' for everything else, including
1433                 * freeing both later.
1434                 */
1435                if (flags & MSG_CMSG_COMPAT)
1436                        data_skb = skb_shinfo(skb)->frag_list;
1437        }
1438#endif
1439
1440        msg->msg_namelen = 0;
1441
1442        copied = data_skb->len;
1443        if (len < copied) {
1444                msg->msg_flags |= MSG_TRUNC;
1445                copied = len;
1446        }
1447
1448        skb_reset_transport_header(data_skb);
1449        err = skb_copy_datagram_iovec(data_skb, 0, msg->msg_iov, copied);
1450
1451        if (msg->msg_name) {
1452                struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name;
1453                addr->nl_family = AF_NETLINK;
1454                addr->nl_pad    = 0;
1455                addr->nl_pid    = NETLINK_CB(skb).pid;
1456                addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group);
1457                msg->msg_namelen = sizeof(*addr);
1458        }
1459
1460        if (nlk->flags & NETLINK_RECV_PKTINFO)
1461                netlink_cmsg_recv_pktinfo(msg, skb);
1462
1463        if (NULL == siocb->scm) {
1464                memset(&scm, 0, sizeof(scm));
1465                siocb->scm = &scm;
1466        }
1467        siocb->scm->creds = *NETLINK_CREDS(skb);
1468        if (flags & MSG_TRUNC)
1469                copied = data_skb->len;
1470
1471        skb_free_datagram(sk, skb);
1472
1473        if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
1474                ret = netlink_dump(sk);
1475                if (ret) {
1476                        sk->sk_err = ret;
1477                        sk->sk_error_report(sk);
1478                }
1479        }
1480
1481        scm_recv(sock, msg, siocb->scm, flags);
1482out:
1483        netlink_rcv_wake(sk);
1484        return err ? : copied;
1485}
1486
1487static void netlink_data_ready(struct sock *sk, int len)
1488{
1489        BUG();
1490}
1491
1492/*
1493 *      We export these functions to other modules. They provide a
1494 *      complete set of kernel non-blocking support for message
1495 *      queueing.
1496 */
1497
1498struct sock *
1499netlink_kernel_create(struct net *net, int unit, unsigned int groups,
1500                      void (*input)(struct sk_buff *skb),
1501                      struct mutex *cb_mutex, struct module *module)
1502{
1503        struct socket *sock;
1504        struct sock *sk;
1505        struct netlink_sock *nlk;
1506        struct listeners *listeners = NULL;
1507
1508        BUG_ON(!nl_table);
1509
1510        if (unit < 0 || unit >= MAX_LINKS)
1511                return NULL;
1512
1513        if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
1514                return NULL;
1515
1516        /*
1517         * We have to just have a reference on the net from sk, but don't
1518         * get_net it. Besides, we cannot get and then put the net here.
1519         * So we create one inside init_net and the move it to net.
1520         */
1521
1522        if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0)
1523                goto out_sock_release_nosk;
1524
1525        sk = sock->sk;
1526        sk_change_net(sk, net);
1527
1528        if (groups < 32)
1529                groups = 32;
1530
1531        listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
1532        if (!listeners)
1533                goto out_sock_release;
1534
1535        sk->sk_data_ready = netlink_data_ready;
1536        if (input)
1537                nlk_sk(sk)->netlink_rcv = input;
1538
1539        if (netlink_insert(sk, net, 0))
1540                goto out_sock_release;
1541
1542        nlk = nlk_sk(sk);
1543        nlk->flags |= NETLINK_KERNEL_SOCKET;
1544
1545        netlink_table_grab();
1546        if (!nl_table[unit].registered) {
1547                nl_table[unit].groups = groups;
1548                rcu_assign_pointer(nl_table[unit].listeners, listeners);
1549                nl_table[unit].cb_mutex = cb_mutex;
1550                nl_table[unit].module = module;
1551                nl_table[unit].registered = 1;
1552        } else {
1553                kfree(listeners);
1554                nl_table[unit].registered++;
1555        }
1556        netlink_table_ungrab();
1557        return sk;
1558
1559out_sock_release:
1560        kfree(listeners);
1561        netlink_kernel_release(sk);
1562        return NULL;
1563
1564out_sock_release_nosk:
1565        sock_release(sock);
1566        return NULL;
1567}
1568EXPORT_SYMBOL(netlink_kernel_create);
1569
1570
1571void
1572netlink_kernel_release(struct sock *sk)
1573{
1574        sk_release_kernel(sk);
1575}
1576EXPORT_SYMBOL(netlink_kernel_release);
1577
1578
1579static void listeners_free_rcu(struct rcu_head *head)
1580{
1581        kfree(container_of(head, struct listeners, rcu));
1582}
1583
1584int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
1585{
1586        struct listeners *new, *old;
1587        struct netlink_table *tbl = &nl_table[sk->sk_protocol];
1588
1589        if (groups < 32)
1590                groups = 32;
1591
1592        if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) {
1593                new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC);
1594                if (!new)
1595                        return -ENOMEM;
1596                old = rcu_dereference_raw(tbl->listeners);
1597                memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups));
1598                rcu_assign_pointer(tbl->listeners, new);
1599
1600                call_rcu(&old->rcu, listeners_free_rcu);
1601        }
1602        tbl->groups = groups;
1603
1604        return 0;
1605}
1606
1607/**
1608 * netlink_change_ngroups - change number of multicast groups
1609 *
1610 * This changes the number of multicast groups that are available
1611 * on a certain netlink family. Note that it is not possible to
1612 * change the number of groups to below 32. Also note that it does
1613 * not implicitly call netlink_clear_multicast_users() when the
1614 * number of groups is reduced.
1615 *
1616 * @sk: The kernel netlink socket, as returned by netlink_kernel_create().
1617 * @groups: The new number of groups.
1618 */
1619int netlink_change_ngroups(struct sock *sk, unsigned int groups)
1620{
1621        int err;
1622
1623        netlink_table_grab();
1624        err = __netlink_change_ngroups(sk, groups);
1625        netlink_table_ungrab();
1626
1627        return err;
1628}
1629
1630void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
1631{
1632        struct sock *sk;
1633        struct hlist_node *node;
1634        struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
1635
1636        sk_for_each_bound(sk, node, &tbl->mc_list)
1637                netlink_update_socket_mc(nlk_sk(sk), group, 0);
1638}
1639
1640/**
1641 * netlink_clear_multicast_users - kick off multicast listeners
1642 *
1643 * This function removes all listeners from the given group.
1644 * @ksk: The kernel netlink socket, as returned by
1645 *      netlink_kernel_create().
1646 * @group: The multicast group to clear.
1647 */
1648void netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
1649{
1650        netlink_table_grab();
1651        __netlink_clear_multicast_users(ksk, group);
1652        netlink_table_ungrab();
1653}
1654
1655void netlink_set_nonroot(int protocol, unsigned int flags)
1656{
1657        if ((unsigned int)protocol < MAX_LINKS)
1658                nl_table[protocol].nl_nonroot = flags;
1659}
1660EXPORT_SYMBOL(netlink_set_nonroot);
1661
1662static void netlink_destroy_callback(struct netlink_callback *cb)
1663{
1664        kfree_skb(cb->skb);
1665        kfree(cb);
1666}
1667
1668/*
1669 * It looks a bit ugly.
1670 * It would be better to create kernel thread.
1671 */
1672
1673static int netlink_dump(struct sock *sk)
1674{
1675        struct netlink_sock *nlk = nlk_sk(sk);
1676        struct netlink_callback *cb;
1677        struct sk_buff *skb;
1678        struct nlmsghdr *nlh;
1679        int len, err = -ENOBUFS;
1680
1681        skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL);
1682        if (!skb)
1683                goto errout;
1684
1685        mutex_lock(nlk->cb_mutex);
1686
1687        cb = nlk->cb;
1688        if (cb == NULL) {
1689                err = -EINVAL;
1690                goto errout_skb;
1691        }
1692
1693        len = cb->dump(skb, cb);
1694
1695        if (len > 0) {
1696                mutex_unlock(nlk->cb_mutex);
1697
1698                if (sk_filter(sk, skb))
1699                        kfree_skb(skb);
1700                else {
1701                        skb_queue_tail(&sk->sk_receive_queue, skb);
1702                        sk->sk_data_ready(sk, skb->len);
1703                }
1704                return 0;
1705        }
1706
1707        nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
1708        if (!nlh)
1709                goto errout_skb;
1710
1711        memcpy(nlmsg_data(nlh), &len, sizeof(len));
1712
1713        if (sk_filter(sk, skb))
1714                kfree_skb(skb);
1715        else {
1716                skb_queue_tail(&sk->sk_receive_queue, skb);
1717                sk->sk_data_ready(sk, skb->len);
1718        }
1719
1720        if (cb->done)
1721                cb->done(cb);
1722        nlk->cb = NULL;
1723        mutex_unlock(nlk->cb_mutex);
1724
1725        netlink_destroy_callback(cb);
1726        return 0;
1727
1728errout_skb:
1729        mutex_unlock(nlk->cb_mutex);
1730        kfree_skb(skb);
1731errout:
1732        return err;
1733}
1734
1735int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
1736                       const struct nlmsghdr *nlh,
1737                       int (*dump)(struct sk_buff *skb,
1738                                   struct netlink_callback *),
1739                       int (*done)(struct netlink_callback *))
1740{
1741        struct netlink_callback *cb;
1742        struct sock *sk;
1743        struct netlink_sock *nlk;
1744        int ret;
1745
1746        cb = kzalloc(sizeof(*cb), GFP_KERNEL);
1747        if (cb == NULL)
1748                return -ENOBUFS;
1749
1750        cb->dump = dump;
1751        cb->done = done;
1752        cb->nlh = nlh;
1753        atomic_inc(&skb->users);
1754        cb->skb = skb;
1755
1756        sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).pid);
1757        if (sk == NULL) {
1758                netlink_destroy_callback(cb);
1759                return -ECONNREFUSED;
1760        }
1761        nlk = nlk_sk(sk);
1762        /* A dump is in progress... */
1763        mutex_lock(nlk->cb_mutex);
1764        if (nlk->cb) {
1765                mutex_unlock(nlk->cb_mutex);
1766                netlink_destroy_callback(cb);
1767                sock_put(sk);
1768                return -EBUSY;
1769        }
1770        nlk->cb = cb;
1771        mutex_unlock(nlk->cb_mutex);
1772
1773        ret = netlink_dump(sk);
1774
1775        sock_put(sk);
1776
1777        if (ret)
1778                return ret;
1779
1780        /* We successfully started a dump, by returning -EINTR we
1781         * signal not to send ACK even if it was requested.
1782         */
1783        return -EINTR;
1784}
1785EXPORT_SYMBOL(netlink_dump_start);
1786
1787void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
1788{
1789        struct sk_buff *skb;
1790        struct nlmsghdr *rep;
1791        struct nlmsgerr *errmsg;
1792        size_t payload = sizeof(*errmsg);
1793
1794        /* error messages get the original request appened */
1795        if (err)
1796                payload += nlmsg_len(nlh);
1797
1798        skb = nlmsg_new(payload, GFP_KERNEL);
1799        if (!skb) {
1800                struct sock *sk;
1801
1802                sk = netlink_lookup(sock_net(in_skb->sk),
1803                                    in_skb->sk->sk_protocol,
1804                                    NETLINK_CB(in_skb).pid);
1805                if (sk) {
1806                        sk->sk_err = ENOBUFS;
1807                        sk->sk_error_report(sk);
1808                        sock_put(sk);
1809                }
1810                return;
1811        }
1812
1813        rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
1814                          NLMSG_ERROR, payload, 0);
1815        errmsg = nlmsg_data(rep);
1816        errmsg->error = err;
1817        memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh));
1818        netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1819}
1820EXPORT_SYMBOL(netlink_ack);
1821
1822int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
1823                                                     struct nlmsghdr *))
1824{
1825        struct nlmsghdr *nlh;
1826        int err;
1827
1828        while (skb->len >= nlmsg_total_size(0)) {
1829                int msglen;
1830
1831                nlh = nlmsg_hdr(skb);
1832                err = 0;
1833
1834                if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
1835                        return 0;
1836
1837                /* Only requests are handled by the kernel */
1838                if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
1839                        goto ack;
1840
1841                /* Skip control messages */
1842                if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
1843                        goto ack;
1844
1845                err = cb(skb, nlh);
1846                if (err == -EINTR)
1847                        goto skip;
1848
1849ack:
1850                if (nlh->nlmsg_flags & NLM_F_ACK || err)
1851                        netlink_ack(skb, nlh, err);
1852
1853skip:
1854                msglen = NLMSG_ALIGN(nlh->nlmsg_len);
1855                if (msglen > skb->len)
1856                        msglen = skb->len;
1857                skb_pull(skb, msglen);
1858        }
1859
1860        return 0;
1861}
1862EXPORT_SYMBOL(netlink_rcv_skb);
1863
1864/**
1865 * nlmsg_notify - send a notification netlink message
1866 * @sk: netlink socket to use
1867 * @skb: notification message
1868 * @pid: destination netlink pid for reports or 0
1869 * @group: destination multicast group or 0
1870 * @report: 1 to report back, 0 to disable
1871 * @flags: allocation flags
1872 */
1873int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid,
1874                 unsigned int group, int report, gfp_t flags)
1875{
1876        int err = 0;
1877
1878        if (group) {
1879                int exclude_pid = 0;
1880
1881                if (report) {
1882                        atomic_inc(&skb->users);
1883                        exclude_pid = pid;
1884                }
1885
1886                /* errors reported via destination sk->sk_err, but propagate
1887                 * delivery errors if NETLINK_BROADCAST_ERROR flag is set */
1888                err = nlmsg_multicast(sk, skb, exclude_pid, group, flags);
1889        }
1890
1891        if (report) {
1892                int err2;
1893
1894                err2 = nlmsg_unicast(sk, skb, pid);
1895                if (!err || err == -ESRCH)
1896                        err = err2;
1897        }
1898
1899        return err;
1900}
1901EXPORT_SYMBOL(nlmsg_notify);
1902
1903#ifdef CONFIG_PROC_FS
1904struct nl_seq_iter {
1905        struct seq_net_private p;
1906        int link;
1907        int hash_idx;
1908};
1909
1910static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
1911{
1912        struct nl_seq_iter *iter = seq->private;
1913        int i, j;
1914        struct sock *s;
1915        struct hlist_node *node;
1916        loff_t off = 0;
1917
1918        for (i = 0; i < MAX_LINKS; i++) {
1919                struct nl_pid_hash *hash = &nl_table[i].hash;
1920
1921                for (j = 0; j <= hash->mask; j++) {
1922                        sk_for_each(s, node, &hash->table[j]) {
1923                                if (sock_net(s) != seq_file_net(seq))
1924                                        continue;
1925                                if (off == pos) {
1926                                        iter->link = i;
1927                                        iter->hash_idx = j;
1928                                        return s;
1929                                }
1930                                ++off;
1931                        }
1932                }
1933        }
1934        return NULL;
1935}
1936
1937static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
1938        __acquires(nl_table_lock)
1939{
1940        read_lock(&nl_table_lock);
1941        return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
1942}
1943
1944static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1945{
1946        struct sock *s;
1947        struct nl_seq_iter *iter;
1948        int i, j;
1949
1950        ++*pos;
1951
1952        if (v == SEQ_START_TOKEN)
1953                return netlink_seq_socket_idx(seq, 0);
1954
1955        iter = seq->private;
1956        s = v;
1957        do {
1958                s = sk_next(s);
1959        } while (s && sock_net(s) != seq_file_net(seq));
1960        if (s)
1961                return s;
1962
1963        i = iter->link;
1964        j = iter->hash_idx + 1;
1965
1966        do {
1967                struct nl_pid_hash *hash = &nl_table[i].hash;
1968
1969                for (; j <= hash->mask; j++) {
1970                        s = sk_head(&hash->table[j]);
1971                        while (s && sock_net(s) != seq_file_net(seq))
1972                                s = sk_next(s);
1973                        if (s) {
1974                                iter->link = i;
1975                                iter->hash_idx = j;
1976                                return s;
1977                        }
1978                }
1979
1980                j = 0;
1981        } while (++i < MAX_LINKS);
1982
1983        return NULL;
1984}
1985
1986static void netlink_seq_stop(struct seq_file *seq, void *v)
1987        __releases(nl_table_lock)
1988{
1989        read_unlock(&nl_table_lock);
1990}
1991
1992
1993static int netlink_seq_show(struct seq_file *seq, void *v)
1994{
1995        if (v == SEQ_START_TOKEN)
1996                seq_puts(seq,
1997                         "sk       Eth Pid    Groups   "
1998                         "Rmem     Wmem     Dump     Locks     Drops     Inode\n");
1999        else {
2000                struct sock *s = v;
2001                struct netlink_sock *nlk = nlk_sk(s);
2002
2003                seq_printf(seq, "%p %-3d %-6d %08x %-8d %-8d %p %-8d %-8d %-8lu\n",
2004                           s,
2005                           s->sk_protocol,
2006                           nlk->pid,
2007                           nlk->groups ? (u32)nlk->groups[0] : 0,
2008                           sk_rmem_alloc_get(s),
2009                           sk_wmem_alloc_get(s),
2010                           nlk->cb,
2011                           atomic_read(&s->sk_refcnt),
2012                           atomic_read(&s->sk_drops),
2013                           sock_i_ino(s)
2014                        );
2015
2016        }
2017        return 0;
2018}
2019
2020static const struct seq_operations netlink_seq_ops = {
2021        .start  = netlink_seq_start,
2022        .next   = netlink_seq_next,
2023        .stop   = netlink_seq_stop,
2024        .show   = netlink_seq_show,
2025};
2026
2027
2028static int netlink_seq_open(struct inode *inode, struct file *file)
2029{
2030        return seq_open_net(inode, file, &netlink_seq_ops,
2031                                sizeof(struct nl_seq_iter));
2032}
2033
2034static const struct file_operations netlink_seq_fops = {
2035        .owner          = THIS_MODULE,
2036        .open           = netlink_seq_open,
2037        .read           = seq_read,
2038        .llseek         = seq_lseek,
2039        .release        = seq_release_net,
2040};
2041
2042#endif
2043
2044int netlink_register_notifier(struct notifier_block *nb)
2045{
2046        return atomic_notifier_chain_register(&netlink_chain, nb);
2047}
2048EXPORT_SYMBOL(netlink_register_notifier);
2049
2050int netlink_unregister_notifier(struct notifier_block *nb)
2051{
2052        return atomic_notifier_chain_unregister(&netlink_chain, nb);
2053}
2054EXPORT_SYMBOL(netlink_unregister_notifier);
2055
2056static const struct proto_ops netlink_ops = {
2057        .family =       PF_NETLINK,
2058        .owner =        THIS_MODULE,
2059        .release =      netlink_release,
2060        .bind =         netlink_bind,
2061        .connect =      netlink_connect,
2062        .socketpair =   sock_no_socketpair,
2063        .accept =       sock_no_accept,
2064        .getname =      netlink_getname,
2065        .poll =         datagram_poll,
2066        .ioctl =        sock_no_ioctl,
2067        .listen =       sock_no_listen,
2068        .shutdown =     sock_no_shutdown,
2069        .setsockopt =   netlink_setsockopt,
2070        .getsockopt =   netlink_getsockopt,
2071        .sendmsg =      netlink_sendmsg,
2072        .recvmsg =      netlink_recvmsg,
2073        .mmap =         sock_no_mmap,
2074        .sendpage =     sock_no_sendpage,
2075};
2076
2077static const struct net_proto_family netlink_family_ops = {
2078        .family = PF_NETLINK,
2079        .create = netlink_create,
2080        .owner  = THIS_MODULE,  /* for consistency 8) */
2081};
2082
2083static int __net_init netlink_net_init(struct net *net)
2084{
2085#ifdef CONFIG_PROC_FS
2086        if (!proc_net_fops_create(net, "netlink", 0, &netlink_seq_fops))
2087                return -ENOMEM;
2088#endif
2089        return 0;
2090}
2091
2092static void __net_exit netlink_net_exit(struct net *net)
2093{
2094#ifdef CONFIG_PROC_FS
2095        proc_net_remove(net, "netlink");
2096#endif
2097}
2098
2099static void __init netlink_add_usersock_entry(void)
2100{
2101        struct listeners *listeners;
2102        int groups = 32;
2103
2104        listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
2105        if (!listeners)
2106                panic("netlink_add_usersock_entry: Cannot allocate listeners\n");
2107
2108        netlink_table_grab();
2109
2110        nl_table[NETLINK_USERSOCK].groups = groups;
2111        rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners);
2112        nl_table[NETLINK_USERSOCK].module = THIS_MODULE;
2113        nl_table[NETLINK_USERSOCK].registered = 1;
2114
2115        netlink_table_ungrab();
2116}
2117
2118static struct pernet_operations __net_initdata netlink_net_ops = {
2119        .init = netlink_net_init,
2120        .exit = netlink_net_exit,
2121};
2122
2123static int __init netlink_proto_init(void)
2124{
2125        struct sk_buff *dummy_skb;
2126        int i;
2127        unsigned long limit;
2128        unsigned int order;
2129        int err = proto_register(&netlink_proto, 0);
2130
2131        if (err != 0)
2132                goto out;
2133
2134        BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb));
2135
2136        nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL);
2137        if (!nl_table)
2138                goto panic;
2139
2140        if (totalram_pages >= (128 * 1024))
2141                limit = totalram_pages >> (21 - PAGE_SHIFT);
2142        else
2143                limit = totalram_pages >> (23 - PAGE_SHIFT);
2144
2145        order = get_bitmask_order(limit) - 1 + PAGE_SHIFT;
2146        limit = (1UL << order) / sizeof(struct hlist_head);
2147        order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1;
2148
2149        for (i = 0; i < MAX_LINKS; i++) {
2150                struct nl_pid_hash *hash = &nl_table[i].hash;
2151
2152                hash->table = nl_pid_hash_zalloc(1 * sizeof(*hash->table));
2153                if (!hash->table) {
2154                        while (i-- > 0)
2155                                nl_pid_hash_free(nl_table[i].hash.table,
2156                                                 1 * sizeof(*hash->table));
2157                        kfree(nl_table);
2158                        goto panic;
2159                }
2160                hash->max_shift = order;
2161                hash->shift = 0;
2162                hash->mask = 0;
2163                hash->rehash_time = jiffies;
2164        }
2165
2166        netlink_add_usersock_entry();
2167
2168        sock_register(&netlink_family_ops);
2169        register_pernet_subsys(&netlink_net_ops);
2170        /* The netlink device handler may be needed early. */
2171        rtnetlink_init();
2172out:
2173        return err;
2174panic:
2175        panic("netlink_init: Cannot allocate nl_table\n");
2176}
2177
2178core_initcall(netlink_proto_init);
2179