linux/net/core/net_namespace.c
<<
>>
Prefs
   1#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   2
   3#include <linux/workqueue.h>
   4#include <linux/rtnetlink.h>
   5#include <linux/cache.h>
   6#include <linux/slab.h>
   7#include <linux/list.h>
   8#include <linux/delay.h>
   9#include <linux/sched.h>
  10#include <linux/idr.h>
  11#include <linux/rculist.h>
  12#include <linux/nsproxy.h>
  13#include <linux/fs.h>
  14#include <linux/proc_ns.h>
  15#include <linux/file.h>
  16#include <linux/export.h>
  17#include <linux/user_namespace.h>
  18#include <linux/net_namespace.h>
  19#ifndef __GENKSYMS__
  20#include <net/sock.h>
  21#endif
  22#include <net/netlink.h>
  23#include <net/net_namespace.h>
  24#include <net/netns/generic.h>
  25
  26/*
  27 *      Our network namespace constructor/destructor lists
  28 */
  29
  30static LIST_HEAD(pernet_list);
  31static struct list_head *first_device = &pernet_list;
  32DEFINE_MUTEX(net_mutex);
  33
  34LIST_HEAD(net_namespace_list);
  35EXPORT_SYMBOL_GPL(net_namespace_list);
  36
  37struct net init_net = {
  38        .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
  39};
  40EXPORT_SYMBOL(init_net);
  41
  42#define INITIAL_NET_GEN_PTRS    13 /* +1 for len +2 for rcu_head */
  43
  44static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
  45
  46static struct net_generic *net_alloc_generic(void)
  47{
  48        struct net_generic *ng;
  49        size_t generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);
  50
  51        ng = kzalloc(generic_size, GFP_KERNEL);
  52        if (ng)
  53                ng->len = max_gen_ptrs;
  54
  55        return ng;
  56}
  57
  58static int net_assign_generic(struct net *net, int id, void *data)
  59{
  60        struct net_generic *ng, *old_ng;
  61
  62        BUG_ON(!mutex_is_locked(&net_mutex));
  63        BUG_ON(id == 0);
  64
  65        old_ng = rcu_dereference_protected(net->gen,
  66                                           lockdep_is_held(&net_mutex));
  67        ng = old_ng;
  68        if (old_ng->len >= id)
  69                goto assign;
  70
  71        ng = net_alloc_generic();
  72        if (ng == NULL)
  73                return -ENOMEM;
  74
  75        /*
  76         * Some synchronisation notes:
  77         *
  78         * The net_generic explores the net->gen array inside rcu
  79         * read section. Besides once set the net->gen->ptr[x]
  80         * pointer never changes (see rules in netns/generic.h).
  81         *
  82         * That said, we simply duplicate this array and schedule
  83         * the old copy for kfree after a grace period.
  84         */
  85
  86        memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
  87
  88        rcu_assign_pointer(net->gen, ng);
  89        kfree_rcu(old_ng, rcu);
  90assign:
  91        ng->ptr[id - 1] = data;
  92        return 0;
  93}
  94
  95static int ops_init(const struct pernet_operations *ops, struct net *net)
  96{
  97        int err = -ENOMEM;
  98        void *data = NULL;
  99
 100        if (ops->id && ops->size) {
 101                data = kzalloc(ops->size, GFP_KERNEL);
 102                if (!data)
 103                        goto out;
 104
 105                err = net_assign_generic(net, *ops->id, data);
 106                if (err)
 107                        goto cleanup;
 108        }
 109        err = 0;
 110        if (ops->init)
 111                err = ops->init(net);
 112        if (!err)
 113                return 0;
 114
 115cleanup:
 116        kfree(data);
 117
 118out:
 119        return err;
 120}
 121
 122static void ops_free(const struct pernet_operations *ops, struct net *net)
 123{
 124        if (ops->id && ops->size) {
 125                int id = *ops->id;
 126                kfree(net_generic(net, id));
 127        }
 128}
 129
 130static void ops_exit_list(const struct pernet_operations *ops,
 131                          struct list_head *net_exit_list)
 132{
 133        struct net *net;
 134        if (ops->exit) {
 135                list_for_each_entry(net, net_exit_list, exit_list)
 136                        ops->exit(net);
 137        }
 138        if (ops->exit_batch)
 139                ops->exit_batch(net_exit_list);
 140}
 141
 142static void ops_free_list(const struct pernet_operations *ops,
 143                          struct list_head *net_exit_list)
 144{
 145        struct net *net;
 146        if (ops->size && ops->id) {
 147                list_for_each_entry(net, net_exit_list, exit_list)
 148                        ops_free(ops, net);
 149        }
 150}
 151
 152/* should be called with nsid_lock held */
 153static int alloc_netid(struct net *net, struct net *peer, int reqid)
 154{
 155        int min = 0, max = 0;
 156
 157        if (reqid >= 0) {
 158                min = reqid;
 159                max = reqid + 1;
 160        }
 161
 162        return idr_alloc(&net->netns_ids, peer, min, max, GFP_ATOMIC);
 163}
 164
 165/* This function is used by idr_for_each(). If net is equal to peer, the
 166 * function returns the id so that idr_for_each() stops. Because we cannot
 167 * returns the id 0 (idr_for_each() will not stop), we return the magic value
 168 * NET_ID_ZERO (-1) for it.
 169 */
 170#define NET_ID_ZERO -1
 171static int net_eq_idr(int id, void *net, void *peer)
 172{
 173        if (net_eq(net, peer))
 174                return id ? : NET_ID_ZERO;
 175        return 0;
 176}
 177
 178/* Should be called with nsid_lock held. If a new id is assigned, the bool alloc
 179 * is set to true, thus the caller knows that the new id must be notified via
 180 * rtnl.
 181 */
 182static int __peernet2id_alloc(struct net *net, struct net *peer, bool *alloc)
 183{
 184        int id = idr_for_each(&net->netns_ids, net_eq_idr, peer);
 185        bool alloc_it = *alloc;
 186
 187        *alloc = false;
 188
 189        /* Magic value for id 0. */
 190        if (id == NET_ID_ZERO)
 191                return 0;
 192        if (id > 0)
 193                return id;
 194
 195        if (alloc_it) {
 196                id = alloc_netid(net, peer, -1);
 197                *alloc = true;
 198                return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED;
 199        }
 200
 201        return NETNSA_NSID_NOT_ASSIGNED;
 202}
 203
 204/* should be called with nsid_lock held */
 205static int __peernet2id(struct net *net, struct net *peer)
 206{
 207        bool no = false;
 208
 209        return __peernet2id_alloc(net, peer, &no);
 210}
 211
 212static void rtnl_net_notifyid(struct net *net, int cmd, int id);
 213/* This function returns the id of a peer netns. If no id is assigned, one will
 214 * be allocated and returned.
 215 */
 216int peernet2id_alloc(struct net *net, struct net *peer)
 217{
 218        unsigned long flags;
 219        bool alloc;
 220        int id;
 221
 222        spin_lock_irqsave(&net->nsid_lock, flags);
 223        alloc = atomic_read(&peer->count) == 0 ? false : true;
 224        id = __peernet2id_alloc(net, peer, &alloc);
 225        spin_unlock_irqrestore(&net->nsid_lock, flags);
 226        if (alloc && id >= 0)
 227                rtnl_net_notifyid(net, RTM_NEWNSID, id);
 228        return id;
 229}
 230EXPORT_SYMBOL_GPL(peernet2id_alloc);
 231
 232/* This function returns, if assigned, the id of a peer netns. */
 233int peernet2id(struct net *net, struct net *peer)
 234{
 235        unsigned long flags;
 236        int id;
 237
 238        spin_lock_irqsave(&net->nsid_lock, flags);
 239        id = __peernet2id(net, peer);
 240        spin_unlock_irqrestore(&net->nsid_lock, flags);
 241        return id;
 242}
 243EXPORT_SYMBOL(peernet2id);
 244
 245/* This function returns true is the peer netns has an id assigned into the
 246 * current netns.
 247 */
 248bool peernet_has_id(struct net *net, struct net *peer)
 249{
 250        return peernet2id(net, peer) >= 0;
 251}
 252
 253struct net *get_net_ns_by_id(struct net *net, int id)
 254{
 255        unsigned long flags;
 256        struct net *peer;
 257
 258        if (id < 0)
 259                return NULL;
 260
 261        rcu_read_lock();
 262        spin_lock_irqsave(&net->nsid_lock, flags);
 263        peer = idr_find(&net->netns_ids, id);
 264        if (peer)
 265                peer = maybe_get_net(peer);
 266        spin_unlock_irqrestore(&net->nsid_lock, flags);
 267        rcu_read_unlock();
 268
 269        return peer;
 270}
 271
 272static struct ucounts *inc_net_namespaces(struct user_namespace *ns)
 273{
 274        return inc_ucount(ns, current_euid(), UCOUNT_NET_NAMESPACES);
 275}
 276
 277static void dec_net_namespaces(struct ucounts *ucounts)
 278{
 279        dec_ucount(ucounts, UCOUNT_NET_NAMESPACES);
 280}
 281
 282/*
 283 * setup_net runs the initializers for the network namespace object.
 284 */
 285static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
 286{
 287        /* Must be called with net_mutex held */
 288        const struct pernet_operations *ops, *saved_ops;
 289        int error = 0;
 290        LIST_HEAD(net_exit_list);
 291
 292        atomic_set(&net->count, 1);
 293        atomic_set(&net->passive, 1);
 294        net->dev_base_seq = 1;
 295        net->user_ns = user_ns;
 296        idr_init(&net->netns_ids);
 297        spin_lock_init(&net->nsid_lock);
 298
 299        list_for_each_entry(ops, &pernet_list, list) {
 300                error = ops_init(ops, net);
 301                if (error < 0)
 302                        goto out_undo;
 303        }
 304out:
 305        return error;
 306
 307out_undo:
 308        /* Walk through the list backwards calling the exit functions
 309         * for the pernet modules whose init functions did not fail.
 310         */
 311        list_add(&net->exit_list, &net_exit_list);
 312        saved_ops = ops;
 313        list_for_each_entry_continue_reverse(ops, &pernet_list, list)
 314                ops_exit_list(ops, &net_exit_list);
 315
 316        ops = saved_ops;
 317        list_for_each_entry_continue_reverse(ops, &pernet_list, list)
 318                ops_free_list(ops, &net_exit_list);
 319
 320        rcu_barrier();
 321        goto out;
 322}
 323
 324
 325#ifdef CONFIG_NET_NS
 326static struct kmem_cache *net_cachep;
 327static struct workqueue_struct *netns_wq;
 328
 329static struct net *net_alloc(void)
 330{
 331        struct net *net = NULL;
 332        struct net_generic *ng;
 333
 334        ng = net_alloc_generic();
 335        if (!ng)
 336                goto out;
 337
 338        net = kmem_cache_zalloc(net_cachep, GFP_KERNEL);
 339        if (!net)
 340                goto out_free;
 341
 342        rcu_assign_pointer(net->gen, ng);
 343out:
 344        return net;
 345
 346out_free:
 347        kfree(ng);
 348        goto out;
 349}
 350
 351static void net_free(struct net *net)
 352{
 353        kfree(rcu_access_pointer(net->gen));
 354        kmem_cache_free(net_cachep, net);
 355}
 356
 357void net_drop_ns(void *p)
 358{
 359        struct net *ns = p;
 360        if (ns && atomic_dec_and_test(&ns->passive))
 361                net_free(ns);
 362}
 363
 364struct net *copy_net_ns(unsigned long flags,
 365                        struct user_namespace *user_ns, struct net *old_net)
 366{
 367        struct ucounts *ucounts;
 368        struct net *net;
 369        int rv;
 370
 371        if (!(flags & CLONE_NEWNET))
 372                return get_net(old_net);
 373
 374        ucounts = inc_net_namespaces(user_ns);
 375        if (!ucounts)
 376                return ERR_PTR(-ENOSPC);
 377
 378        net = net_alloc();
 379        if (!net) {
 380                dec_net_namespaces(ucounts);
 381                return ERR_PTR(-ENOMEM);
 382        }
 383
 384        get_user_ns(user_ns);
 385
 386        mutex_lock(&net_mutex);
 387        net->ucounts = ucounts;
 388        rv = setup_net(net, user_ns);
 389        if (rv == 0) {
 390                rtnl_lock();
 391                list_add_tail_rcu(&net->list, &net_namespace_list);
 392                rtnl_unlock();
 393        }
 394        mutex_unlock(&net_mutex);
 395        if (rv < 0) {
 396                dec_net_namespaces(ucounts);
 397                put_user_ns(user_ns);
 398                net_drop_ns(net);
 399                return ERR_PTR(rv);
 400        }
 401        return net;
 402}
 403
 404static DEFINE_SPINLOCK(cleanup_list_lock);
 405static LIST_HEAD(cleanup_list);  /* Must hold cleanup_list_lock to touch */
 406
 407static void cleanup_net(struct work_struct *work)
 408{
 409        const struct pernet_operations *ops;
 410        struct net *net, *tmp;
 411        struct list_head net_kill_list;
 412        LIST_HEAD(net_exit_list);
 413
 414        /* Atomically snapshot the list of namespaces to cleanup */
 415        spin_lock_irq(&cleanup_list_lock);
 416        list_replace_init(&cleanup_list, &net_kill_list);
 417        spin_unlock_irq(&cleanup_list_lock);
 418
 419        mutex_lock(&net_mutex);
 420
 421        /* Don't let anyone else find us. */
 422        rtnl_lock();
 423        list_for_each_entry(net, &net_kill_list, cleanup_list) {
 424                list_del_rcu(&net->list);
 425                list_add_tail(&net->exit_list, &net_exit_list);
 426                for_each_net(tmp) {
 427                        int id;
 428
 429                        spin_lock_irq(&tmp->nsid_lock);
 430                        id = __peernet2id(tmp, net);
 431                        if (id >= 0)
 432                                idr_remove(&tmp->netns_ids, id);
 433                        spin_unlock_irq(&tmp->nsid_lock);
 434                        if (id >= 0)
 435                                rtnl_net_notifyid(tmp, RTM_DELNSID, id);
 436                }
 437                spin_lock_irq(&net->nsid_lock);
 438                idr_destroy(&net->netns_ids);
 439                spin_unlock_irq(&net->nsid_lock);
 440
 441        }
 442        rtnl_unlock();
 443
 444        /*
 445         * Another CPU might be rcu-iterating the list, wait for it.
 446         * This needs to be before calling the exit() notifiers, so
 447         * the rcu_barrier() below isn't sufficient alone.
 448         */
 449        synchronize_rcu();
 450
 451        /* Run all of the network namespace exit methods */
 452        list_for_each_entry_reverse(ops, &pernet_list, list)
 453                ops_exit_list(ops, &net_exit_list);
 454
 455        /* Free the net generic variables */
 456        list_for_each_entry_reverse(ops, &pernet_list, list)
 457                ops_free_list(ops, &net_exit_list);
 458
 459        mutex_unlock(&net_mutex);
 460
 461        /* Ensure there are no outstanding rcu callbacks using this
 462         * network namespace.
 463         */
 464        rcu_barrier();
 465
 466        /* Finally it is safe to free my network namespace structure */
 467        list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
 468                list_del_init(&net->exit_list);
 469                dec_net_namespaces(net->ucounts);
 470                put_user_ns(net->user_ns);
 471                net_drop_ns(net);
 472        }
 473}
 474static DECLARE_WORK(net_cleanup_work, cleanup_net);
 475
 476void __put_net(struct net *net)
 477{
 478        /* Cleanup the network namespace in process context */
 479        unsigned long flags;
 480
 481        spin_lock_irqsave(&cleanup_list_lock, flags);
 482        list_add(&net->cleanup_list, &cleanup_list);
 483        spin_unlock_irqrestore(&cleanup_list_lock, flags);
 484
 485        queue_work(netns_wq, &net_cleanup_work);
 486}
 487EXPORT_SYMBOL_GPL(__put_net);
 488
 489struct net *get_net_ns_by_fd(int fd)
 490{
 491        struct proc_ns *ei;
 492        struct file *file;
 493        struct net *net;
 494
 495        file = proc_ns_fget(fd);
 496        if (IS_ERR(file))
 497                return ERR_CAST(file);
 498
 499        ei = get_proc_ns(file_inode(file));
 500        if (ei->ns_ops == &netns_operations)
 501                net = get_net(ei->ns);
 502        else
 503                net = ERR_PTR(-EINVAL);
 504
 505        fput(file);
 506        return net;
 507}
 508
 509#else
 510struct net *get_net_ns_by_fd(int fd)
 511{
 512        return ERR_PTR(-EINVAL);
 513}
 514#endif
 515EXPORT_SYMBOL_GPL(get_net_ns_by_fd);
 516
 517struct net *get_net_ns_by_pid(pid_t pid)
 518{
 519        struct task_struct *tsk;
 520        struct net *net;
 521
 522        /* Lookup the network namespace */
 523        net = ERR_PTR(-ESRCH);
 524        rcu_read_lock();
 525        tsk = find_task_by_vpid(pid);
 526        if (tsk) {
 527                struct nsproxy *nsproxy;
 528                task_lock(tsk);
 529                nsproxy = tsk->nsproxy;
 530                if (nsproxy)
 531                        net = get_net(nsproxy->net_ns);
 532                task_unlock(tsk);
 533        }
 534        rcu_read_unlock();
 535        return net;
 536}
 537EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
 538
 539static __net_init int net_ns_net_init(struct net *net)
 540{
 541        return proc_alloc_inum(&net->proc_inum);
 542}
 543
 544static __net_exit void net_ns_net_exit(struct net *net)
 545{
 546        proc_free_inum(net->proc_inum);
 547}
 548
 549static struct pernet_operations __net_initdata net_ns_ops = {
 550        .init = net_ns_net_init,
 551        .exit = net_ns_net_exit,
 552};
 553
 554static struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
 555        [NETNSA_NONE]           = { .type = NLA_UNSPEC },
 556        [NETNSA_NSID]           = { .type = NLA_S32 },
 557        [NETNSA_PID]            = { .type = NLA_U32 },
 558        [NETNSA_FD]             = { .type = NLA_U32 },
 559};
 560
 561static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
 562{
 563        struct net *net = sock_net(skb->sk);
 564        struct nlattr *tb[NETNSA_MAX + 1];
 565        unsigned long flags;
 566        struct net *peer;
 567        int nsid, err;
 568
 569        err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
 570                          rtnl_net_policy);
 571        if (err < 0)
 572                return err;
 573        if (!tb[NETNSA_NSID])
 574                return -EINVAL;
 575        nsid = nla_get_s32(tb[NETNSA_NSID]);
 576
 577        if (tb[NETNSA_PID])
 578                peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
 579        else if (tb[NETNSA_FD])
 580                peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
 581        else
 582                return -EINVAL;
 583        if (IS_ERR(peer))
 584                return PTR_ERR(peer);
 585
 586        spin_lock_irqsave(&net->nsid_lock, flags);
 587        if (__peernet2id(net, peer) >= 0) {
 588                spin_unlock_irqrestore(&net->nsid_lock, flags);
 589                err = -EEXIST;
 590                goto out;
 591        }
 592
 593        err = alloc_netid(net, peer, nsid);
 594        spin_unlock_irqrestore(&net->nsid_lock, flags);
 595        if (err >= 0) {
 596                rtnl_net_notifyid(net, RTM_NEWNSID, err);
 597                err = 0;
 598        }
 599out:
 600        put_net(peer);
 601        return err;
 602}
 603
 604static int rtnl_net_get_size(void)
 605{
 606        return NLMSG_ALIGN(sizeof(struct rtgenmsg))
 607               + nla_total_size(sizeof(s32)) /* NETNSA_NSID */
 608               ;
 609}
 610
 611static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags,
 612                         int cmd, struct net *net, int nsid)
 613{
 614        struct nlmsghdr *nlh;
 615        struct rtgenmsg *rth;
 616
 617        nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rth), flags);
 618        if (!nlh)
 619                return -EMSGSIZE;
 620
 621        rth = nlmsg_data(nlh);
 622        rth->rtgen_family = AF_UNSPEC;
 623
 624        if (nla_put_s32(skb, NETNSA_NSID, nsid))
 625                goto nla_put_failure;
 626
 627        nlmsg_end(skb, nlh);
 628        return 0;
 629
 630nla_put_failure:
 631        nlmsg_cancel(skb, nlh);
 632        return -EMSGSIZE;
 633}
 634
 635static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
 636{
 637        struct net *net = sock_net(skb->sk);
 638        struct nlattr *tb[NETNSA_MAX + 1];
 639        struct sk_buff *msg;
 640        struct net *peer;
 641        int err, id;
 642
 643        err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
 644                          rtnl_net_policy);
 645        if (err < 0)
 646                return err;
 647        if (tb[NETNSA_PID])
 648                peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
 649        else if (tb[NETNSA_FD])
 650                peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
 651        else
 652                return -EINVAL;
 653
 654        if (IS_ERR(peer))
 655                return PTR_ERR(peer);
 656
 657        msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
 658        if (!msg) {
 659                err = -ENOMEM;
 660                goto out;
 661        }
 662
 663        id = peernet2id(net, peer);
 664        err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
 665                            RTM_NEWNSID, net, id);
 666        if (err < 0)
 667                goto err_out;
 668
 669        err = rtnl_unicast(msg, net, NETLINK_CB(skb).portid);
 670        goto out;
 671
 672err_out:
 673        nlmsg_free(msg);
 674out:
 675        put_net(peer);
 676        return err;
 677}
 678
 679struct rtnl_net_dump_cb {
 680        struct net *net;
 681        struct sk_buff *skb;
 682        struct netlink_callback *cb;
 683        int idx;
 684        int s_idx;
 685};
 686
 687static int rtnl_net_dumpid_one(int id, void *peer, void *data)
 688{
 689        struct rtnl_net_dump_cb *net_cb = (struct rtnl_net_dump_cb *)data;
 690        int ret;
 691
 692        if (net_cb->idx < net_cb->s_idx)
 693                goto cont;
 694
 695        ret = rtnl_net_fill(net_cb->skb, NETLINK_CB(net_cb->cb->skb).portid,
 696                            net_cb->cb->nlh->nlmsg_seq, NLM_F_MULTI,
 697                            RTM_NEWNSID, net_cb->net, id);
 698        if (ret < 0)
 699                return ret;
 700
 701cont:
 702        net_cb->idx++;
 703        return 0;
 704}
 705
 706static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
 707{
 708        struct net *net = sock_net(skb->sk);
 709        struct rtnl_net_dump_cb net_cb = {
 710                .net = net,
 711                .skb = skb,
 712                .cb = cb,
 713                .idx = 0,
 714                .s_idx = cb->args[0],
 715        };
 716        unsigned long flags;
 717
 718        spin_lock_irqsave(&net->nsid_lock, flags);
 719        idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb);
 720        spin_unlock_irqrestore(&net->nsid_lock, flags);
 721
 722        cb->args[0] = net_cb.idx;
 723        return skb->len;
 724}
 725
 726static void rtnl_net_notifyid(struct net *net, int cmd, int id)
 727{
 728        struct sk_buff *msg;
 729        int err = -ENOMEM;
 730
 731        msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
 732        if (!msg)
 733                goto out;
 734
 735        err = rtnl_net_fill(msg, 0, 0, 0, cmd, net, id);
 736        if (err < 0)
 737                goto err_out;
 738
 739        rtnl_notify(msg, net, 0, RTNLGRP_NSID, NULL, 0);
 740        return;
 741
 742err_out:
 743        nlmsg_free(msg);
 744out:
 745        rtnl_set_sk_err(net, RTNLGRP_NSID, err);
 746}
 747
 748static int __init net_ns_init(void)
 749{
 750        struct net_generic *ng;
 751
 752#ifdef CONFIG_NET_NS
 753        net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
 754                                        SMP_CACHE_BYTES,
 755                                        SLAB_PANIC, NULL);
 756
 757        /* Create workqueue for cleanup */
 758        netns_wq = create_singlethread_workqueue("netns");
 759        if (!netns_wq)
 760                panic("Could not create netns workq");
 761#endif
 762
 763        ng = net_alloc_generic();
 764        if (!ng)
 765                panic("Could not allocate generic netns");
 766
 767        rcu_assign_pointer(init_net.gen, ng);
 768
 769        mutex_lock(&net_mutex);
 770        if (setup_net(&init_net, &init_user_ns))
 771                panic("Could not setup the initial network namespace");
 772
 773        rtnl_lock();
 774        list_add_tail_rcu(&init_net.list, &net_namespace_list);
 775        rtnl_unlock();
 776
 777        mutex_unlock(&net_mutex);
 778
 779        register_pernet_subsys(&net_ns_ops);
 780
 781        rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, NULL);
 782        rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,
 783                      NULL);
 784
 785        return 0;
 786}
 787
 788pure_initcall(net_ns_init);
 789
 790#ifdef CONFIG_NET_NS
 791static int __register_pernet_operations(struct list_head *list,
 792                                        struct pernet_operations *ops)
 793{
 794        struct net *net;
 795        int error;
 796        LIST_HEAD(net_exit_list);
 797
 798        list_add_tail(&ops->list, list);
 799        if (ops->init || (ops->id && ops->size)) {
 800                for_each_net(net) {
 801                        error = ops_init(ops, net);
 802                        if (error)
 803                                goto out_undo;
 804                        list_add_tail(&net->exit_list, &net_exit_list);
 805                }
 806        }
 807        return 0;
 808
 809out_undo:
 810        /* If I have an error cleanup all namespaces I initialized */
 811        list_del(&ops->list);
 812        ops_exit_list(ops, &net_exit_list);
 813        ops_free_list(ops, &net_exit_list);
 814        return error;
 815}
 816
 817static void __unregister_pernet_operations(struct pernet_operations *ops)
 818{
 819        struct net *net;
 820        LIST_HEAD(net_exit_list);
 821
 822        list_del(&ops->list);
 823        for_each_net(net)
 824                list_add_tail(&net->exit_list, &net_exit_list);
 825        ops_exit_list(ops, &net_exit_list);
 826        ops_free_list(ops, &net_exit_list);
 827}
 828
 829#else
 830
 831static int __register_pernet_operations(struct list_head *list,
 832                                        struct pernet_operations *ops)
 833{
 834        return ops_init(ops, &init_net);
 835}
 836
 837static void __unregister_pernet_operations(struct pernet_operations *ops)
 838{
 839        LIST_HEAD(net_exit_list);
 840        list_add(&init_net.exit_list, &net_exit_list);
 841        ops_exit_list(ops, &net_exit_list);
 842        ops_free_list(ops, &net_exit_list);
 843}
 844
 845#endif /* CONFIG_NET_NS */
 846
 847static DEFINE_IDA(net_generic_ids);
 848
 849static int register_pernet_operations(struct list_head *list,
 850                                      struct pernet_operations *ops)
 851{
 852        int error;
 853
 854        if (ops->id) {
 855again:
 856                error = ida_get_new_above(&net_generic_ids, 1, ops->id);
 857                if (error < 0) {
 858                        if (error == -EAGAIN) {
 859                                ida_pre_get(&net_generic_ids, GFP_KERNEL);
 860                                goto again;
 861                        }
 862                        return error;
 863                }
 864                max_gen_ptrs = max_t(unsigned int, max_gen_ptrs, *ops->id);
 865        }
 866        error = __register_pernet_operations(list, ops);
 867        if (error) {
 868                rcu_barrier();
 869                if (ops->id)
 870                        ida_remove(&net_generic_ids, *ops->id);
 871        }
 872
 873        return error;
 874}
 875
 876static void unregister_pernet_operations(struct pernet_operations *ops)
 877{
 878        
 879        __unregister_pernet_operations(ops);
 880        rcu_barrier();
 881        if (ops->id)
 882                ida_remove(&net_generic_ids, *ops->id);
 883}
 884
 885/**
 886 *      register_pernet_subsys - register a network namespace subsystem
 887 *      @ops:  pernet operations structure for the subsystem
 888 *
 889 *      Register a subsystem which has init and exit functions
 890 *      that are called when network namespaces are created and
 891 *      destroyed respectively.
 892 *
 893 *      When registered all network namespace init functions are
 894 *      called for every existing network namespace.  Allowing kernel
 895 *      modules to have a race free view of the set of network namespaces.
 896 *
 897 *      When a new network namespace is created all of the init
 898 *      methods are called in the order in which they were registered.
 899 *
 900 *      When a network namespace is destroyed all of the exit methods
 901 *      are called in the reverse of the order with which they were
 902 *      registered.
 903 */
 904int register_pernet_subsys(struct pernet_operations *ops)
 905{
 906        int error;
 907        mutex_lock(&net_mutex);
 908        error =  register_pernet_operations(first_device, ops);
 909        mutex_unlock(&net_mutex);
 910        return error;
 911}
 912EXPORT_SYMBOL_GPL(register_pernet_subsys);
 913
 914/**
 915 *      unregister_pernet_subsys - unregister a network namespace subsystem
 916 *      @ops: pernet operations structure to manipulate
 917 *
 918 *      Remove the pernet operations structure from the list to be
 919 *      used when network namespaces are created or destroyed.  In
 920 *      addition run the exit method for all existing network
 921 *      namespaces.
 922 */
 923void unregister_pernet_subsys(struct pernet_operations *ops)
 924{
 925        mutex_lock(&net_mutex);
 926        unregister_pernet_operations(ops);
 927        mutex_unlock(&net_mutex);
 928}
 929EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
 930
 931/**
 932 *      register_pernet_device - register a network namespace device
 933 *      @ops:  pernet operations structure for the subsystem
 934 *
 935 *      Register a device which has init and exit functions
 936 *      that are called when network namespaces are created and
 937 *      destroyed respectively.
 938 *
 939 *      When registered all network namespace init functions are
 940 *      called for every existing network namespace.  Allowing kernel
 941 *      modules to have a race free view of the set of network namespaces.
 942 *
 943 *      When a new network namespace is created all of the init
 944 *      methods are called in the order in which they were registered.
 945 *
 946 *      When a network namespace is destroyed all of the exit methods
 947 *      are called in the reverse of the order with which they were
 948 *      registered.
 949 */
 950int register_pernet_device(struct pernet_operations *ops)
 951{
 952        int error;
 953        mutex_lock(&net_mutex);
 954        error = register_pernet_operations(&pernet_list, ops);
 955        if (!error && (first_device == &pernet_list))
 956                first_device = &ops->list;
 957        mutex_unlock(&net_mutex);
 958        return error;
 959}
 960EXPORT_SYMBOL_GPL(register_pernet_device);
 961
 962/**
 963 *      unregister_pernet_device - unregister a network namespace netdevice
 964 *      @ops: pernet operations structure to manipulate
 965 *
 966 *      Remove the pernet operations structure from the list to be
 967 *      used when network namespaces are created or destroyed.  In
 968 *      addition run the exit method for all existing network
 969 *      namespaces.
 970 */
 971void unregister_pernet_device(struct pernet_operations *ops)
 972{
 973        mutex_lock(&net_mutex);
 974        if (&ops->list == first_device)
 975                first_device = first_device->next;
 976        unregister_pernet_operations(ops);
 977        mutex_unlock(&net_mutex);
 978}
 979EXPORT_SYMBOL_GPL(unregister_pernet_device);
 980
 981#ifdef CONFIG_NET_NS
 982static void *netns_get(struct task_struct *task)
 983{
 984        struct net *net = NULL;
 985        struct nsproxy *nsproxy;
 986
 987        task_lock(task);
 988        nsproxy = task->nsproxy;
 989        if (nsproxy)
 990                net = get_net(nsproxy->net_ns);
 991        task_unlock(task);
 992
 993        return net;
 994}
 995
 996static void netns_put(void *ns)
 997{
 998        put_net(ns);
 999}
1000
1001static int netns_install(struct nsproxy *nsproxy, void *ns)
1002{
1003        struct net *net = ns;
1004
1005        if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) ||
1006            !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
1007                return -EPERM;
1008
1009        put_net(nsproxy->net_ns);
1010        nsproxy->net_ns = get_net(net);
1011        return 0;
1012}
1013
1014static unsigned int netns_inum(void *ns)
1015{
1016        struct net *net = ns;
1017        return net->proc_inum;
1018}
1019
1020const struct proc_ns_operations netns_operations = {
1021        .name           = "net",
1022        .type           = CLONE_NEWNET,
1023        .get            = netns_get,
1024        .put            = netns_put,
1025        .install        = netns_install,
1026        .inum           = netns_inum,
1027};
1028#endif
1029