linux/net/core/net_namespace.c
<<
>>
Prefs
   1#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   2
   3#include <linux/workqueue.h>
   4#include <linux/rtnetlink.h>
   5#include <linux/cache.h>
   6#include <linux/slab.h>
   7#include <linux/list.h>
   8#include <linux/delay.h>
   9#include <linux/sched.h>
  10#include <linux/idr.h>
  11#include <linux/rculist.h>
  12#include <linux/nsproxy.h>
  13#include <linux/fs.h>
  14#include <linux/proc_ns.h>
  15#include <linux/file.h>
  16#include <linux/export.h>
  17#include <linux/user_namespace.h>
  18#include <linux/net_namespace.h>
  19#include <net/sock.h>
  20#include <net/netlink.h>
  21#include <net/net_namespace.h>
  22#include <net/netns/generic.h>
  23
  24/*
  25 *      Our network namespace constructor/destructor lists
  26 */
  27
  28static LIST_HEAD(pernet_list);
  29static struct list_head *first_device = &pernet_list;
  30DEFINE_MUTEX(net_mutex);
  31
  32LIST_HEAD(net_namespace_list);
  33EXPORT_SYMBOL_GPL(net_namespace_list);
  34
  35struct net init_net = {
  36        .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
  37};
  38EXPORT_SYMBOL(init_net);
  39
  40static bool init_net_initialized;
  41
  42#define INITIAL_NET_GEN_PTRS    13 /* +1 for len +2 for rcu_head */
  43
  44static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
  45
  46static struct net_generic *net_alloc_generic(void)
  47{
  48        struct net_generic *ng;
  49        size_t generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);
  50
  51        ng = kzalloc(generic_size, GFP_KERNEL);
  52        if (ng)
  53                ng->len = max_gen_ptrs;
  54
  55        return ng;
  56}
  57
  58static int net_assign_generic(struct net *net, int id, void *data)
  59{
  60        struct net_generic *ng, *old_ng;
  61
  62        BUG_ON(!mutex_is_locked(&net_mutex));
  63        BUG_ON(id == 0);
  64
  65        old_ng = rcu_dereference_protected(net->gen,
  66                                           lockdep_is_held(&net_mutex));
  67        ng = old_ng;
  68        if (old_ng->len >= id)
  69                goto assign;
  70
  71        ng = net_alloc_generic();
  72        if (ng == NULL)
  73                return -ENOMEM;
  74
  75        /*
  76         * Some synchronisation notes:
  77         *
  78         * The net_generic explores the net->gen array inside rcu
  79         * read section. Besides once set the net->gen->ptr[x]
  80         * pointer never changes (see rules in netns/generic.h).
  81         *
  82         * That said, we simply duplicate this array and schedule
  83         * the old copy for kfree after a grace period.
  84         */
  85
  86        memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
  87
  88        rcu_assign_pointer(net->gen, ng);
  89        kfree_rcu(old_ng, rcu);
  90assign:
  91        ng->ptr[id - 1] = data;
  92        return 0;
  93}
  94
  95static int ops_init(const struct pernet_operations *ops, struct net *net)
  96{
  97        int err = -ENOMEM;
  98        void *data = NULL;
  99
 100        if (ops->id && ops->size) {
 101                data = kzalloc(ops->size, GFP_KERNEL);
 102                if (!data)
 103                        goto out;
 104
 105                err = net_assign_generic(net, *ops->id, data);
 106                if (err)
 107                        goto cleanup;
 108        }
 109        err = 0;
 110        if (ops->init)
 111                err = ops->init(net);
 112        if (!err)
 113                return 0;
 114
 115cleanup:
 116        kfree(data);
 117
 118out:
 119        return err;
 120}
 121
 122static void ops_free(const struct pernet_operations *ops, struct net *net)
 123{
 124        if (ops->id && ops->size) {
 125                int id = *ops->id;
 126                kfree(net_generic(net, id));
 127        }
 128}
 129
 130static void ops_exit_list(const struct pernet_operations *ops,
 131                          struct list_head *net_exit_list)
 132{
 133        struct net *net;
 134        if (ops->exit) {
 135                list_for_each_entry(net, net_exit_list, exit_list)
 136                        ops->exit(net);
 137        }
 138        if (ops->exit_batch)
 139                ops->exit_batch(net_exit_list);
 140}
 141
 142static void ops_free_list(const struct pernet_operations *ops,
 143                          struct list_head *net_exit_list)
 144{
 145        struct net *net;
 146        if (ops->size && ops->id) {
 147                list_for_each_entry(net, net_exit_list, exit_list)
 148                        ops_free(ops, net);
 149        }
 150}
 151
 152/* should be called with nsid_lock held */
 153static int alloc_netid(struct net *net, struct net *peer, int reqid)
 154{
 155        int min = 0, max = 0;
 156
 157        if (reqid >= 0) {
 158                min = reqid;
 159                max = reqid + 1;
 160        }
 161
 162        return idr_alloc(&net->netns_ids, peer, min, max, GFP_ATOMIC);
 163}
 164
 165/* This function is used by idr_for_each(). If net is equal to peer, the
 166 * function returns the id so that idr_for_each() stops. Because we cannot
 167 * returns the id 0 (idr_for_each() will not stop), we return the magic value
 168 * NET_ID_ZERO (-1) for it.
 169 */
 170#define NET_ID_ZERO -1
 171static int net_eq_idr(int id, void *net, void *peer)
 172{
 173        if (net_eq(net, peer))
 174                return id ? : NET_ID_ZERO;
 175        return 0;
 176}
 177
 178/* Should be called with nsid_lock held. If a new id is assigned, the bool alloc
 179 * is set to true, thus the caller knows that the new id must be notified via
 180 * rtnl.
 181 */
 182static int __peernet2id_alloc(struct net *net, struct net *peer, bool *alloc)
 183{
 184        int id = idr_for_each(&net->netns_ids, net_eq_idr, peer);
 185        bool alloc_it = *alloc;
 186
 187        *alloc = false;
 188
 189        /* Magic value for id 0. */
 190        if (id == NET_ID_ZERO)
 191                return 0;
 192        if (id > 0)
 193                return id;
 194
 195        if (alloc_it) {
 196                id = alloc_netid(net, peer, -1);
 197                *alloc = true;
 198                return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED;
 199        }
 200
 201        return NETNSA_NSID_NOT_ASSIGNED;
 202}
 203
 204/* should be called with nsid_lock held */
 205static int __peernet2id(struct net *net, struct net *peer)
 206{
 207        bool no = false;
 208
 209        return __peernet2id_alloc(net, peer, &no);
 210}
 211
 212static void rtnl_net_notifyid(struct net *net, int cmd, int id);
 213/* This function returns the id of a peer netns. If no id is assigned, one will
 214 * be allocated and returned.
 215 */
 216int peernet2id_alloc(struct net *net, struct net *peer)
 217{
 218        unsigned long flags;
 219        bool alloc;
 220        int id;
 221
 222        if (atomic_read(&net->count) == 0)
 223                return NETNSA_NSID_NOT_ASSIGNED;
 224        spin_lock_irqsave(&net->nsid_lock, flags);
 225        alloc = atomic_read(&peer->count) == 0 ? false : true;
 226        id = __peernet2id_alloc(net, peer, &alloc);
 227        spin_unlock_irqrestore(&net->nsid_lock, flags);
 228        if (alloc && id >= 0)
 229                rtnl_net_notifyid(net, RTM_NEWNSID, id);
 230        return id;
 231}
 232
 233/* This function returns, if assigned, the id of a peer netns. */
 234int peernet2id(struct net *net, struct net *peer)
 235{
 236        unsigned long flags;
 237        int id;
 238
 239        spin_lock_irqsave(&net->nsid_lock, flags);
 240        id = __peernet2id(net, peer);
 241        spin_unlock_irqrestore(&net->nsid_lock, flags);
 242        return id;
 243}
 244EXPORT_SYMBOL(peernet2id);
 245
 246/* This function returns true is the peer netns has an id assigned into the
 247 * current netns.
 248 */
 249bool peernet_has_id(struct net *net, struct net *peer)
 250{
 251        return peernet2id(net, peer) >= 0;
 252}
 253
 254struct net *get_net_ns_by_id(struct net *net, int id)
 255{
 256        unsigned long flags;
 257        struct net *peer;
 258
 259        if (id < 0)
 260                return NULL;
 261
 262        rcu_read_lock();
 263        spin_lock_irqsave(&net->nsid_lock, flags);
 264        peer = idr_find(&net->netns_ids, id);
 265        if (peer)
 266                get_net(peer);
 267        spin_unlock_irqrestore(&net->nsid_lock, flags);
 268        rcu_read_unlock();
 269
 270        return peer;
 271}
 272
 273/*
 274 * setup_net runs the initializers for the network namespace object.
 275 */
 276static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
 277{
 278        /* Must be called with net_mutex held */
 279        const struct pernet_operations *ops, *saved_ops;
 280        int error = 0;
 281        LIST_HEAD(net_exit_list);
 282
 283        atomic_set(&net->count, 1);
 284        atomic_set(&net->passive, 1);
 285        net->dev_base_seq = 1;
 286        net->user_ns = user_ns;
 287        idr_init(&net->netns_ids);
 288        spin_lock_init(&net->nsid_lock);
 289
 290        list_for_each_entry(ops, &pernet_list, list) {
 291                error = ops_init(ops, net);
 292                if (error < 0)
 293                        goto out_undo;
 294        }
 295out:
 296        return error;
 297
 298out_undo:
 299        /* Walk through the list backwards calling the exit functions
 300         * for the pernet modules whose init functions did not fail.
 301         */
 302        list_add(&net->exit_list, &net_exit_list);
 303        saved_ops = ops;
 304        list_for_each_entry_continue_reverse(ops, &pernet_list, list)
 305                ops_exit_list(ops, &net_exit_list);
 306
 307        ops = saved_ops;
 308        list_for_each_entry_continue_reverse(ops, &pernet_list, list)
 309                ops_free_list(ops, &net_exit_list);
 310
 311        rcu_barrier();
 312        goto out;
 313}
 314
 315
 316#ifdef CONFIG_NET_NS
 317static struct ucounts *inc_net_namespaces(struct user_namespace *ns)
 318{
 319        return inc_ucount(ns, current_euid(), UCOUNT_NET_NAMESPACES);
 320}
 321
 322static void dec_net_namespaces(struct ucounts *ucounts)
 323{
 324        dec_ucount(ucounts, UCOUNT_NET_NAMESPACES);
 325}
 326
 327static struct kmem_cache *net_cachep;
 328static struct workqueue_struct *netns_wq;
 329
 330static struct net *net_alloc(void)
 331{
 332        struct net *net = NULL;
 333        struct net_generic *ng;
 334
 335        ng = net_alloc_generic();
 336        if (!ng)
 337                goto out;
 338
 339        net = kmem_cache_zalloc(net_cachep, GFP_KERNEL);
 340        if (!net)
 341                goto out_free;
 342
 343        rcu_assign_pointer(net->gen, ng);
 344out:
 345        return net;
 346
 347out_free:
 348        kfree(ng);
 349        goto out;
 350}
 351
 352static void net_free(struct net *net)
 353{
 354        kfree(rcu_access_pointer(net->gen));
 355        kmem_cache_free(net_cachep, net);
 356}
 357
 358void net_drop_ns(void *p)
 359{
 360        struct net *ns = p;
 361        if (ns && atomic_dec_and_test(&ns->passive))
 362                net_free(ns);
 363}
 364
 365struct net *copy_net_ns(unsigned long flags,
 366                        struct user_namespace *user_ns, struct net *old_net)
 367{
 368        struct ucounts *ucounts;
 369        struct net *net;
 370        int rv;
 371
 372        if (!(flags & CLONE_NEWNET))
 373                return get_net(old_net);
 374
 375        ucounts = inc_net_namespaces(user_ns);
 376        if (!ucounts)
 377                return ERR_PTR(-ENOSPC);
 378
 379        net = net_alloc();
 380        if (!net) {
 381                dec_net_namespaces(ucounts);
 382                return ERR_PTR(-ENOMEM);
 383        }
 384
 385        get_user_ns(user_ns);
 386
 387        mutex_lock(&net_mutex);
 388        net->ucounts = ucounts;
 389        rv = setup_net(net, user_ns);
 390        if (rv == 0) {
 391                rtnl_lock();
 392                list_add_tail_rcu(&net->list, &net_namespace_list);
 393                rtnl_unlock();
 394        }
 395        mutex_unlock(&net_mutex);
 396        if (rv < 0) {
 397                dec_net_namespaces(ucounts);
 398                put_user_ns(user_ns);
 399                net_drop_ns(net);
 400                return ERR_PTR(rv);
 401        }
 402        return net;
 403}
 404
 405static DEFINE_SPINLOCK(cleanup_list_lock);
 406static LIST_HEAD(cleanup_list);  /* Must hold cleanup_list_lock to touch */
 407
 408static void cleanup_net(struct work_struct *work)
 409{
 410        const struct pernet_operations *ops;
 411        struct net *net, *tmp;
 412        struct list_head net_kill_list;
 413        LIST_HEAD(net_exit_list);
 414
 415        /* Atomically snapshot the list of namespaces to cleanup */
 416        spin_lock_irq(&cleanup_list_lock);
 417        list_replace_init(&cleanup_list, &net_kill_list);
 418        spin_unlock_irq(&cleanup_list_lock);
 419
 420        mutex_lock(&net_mutex);
 421
 422        /* Don't let anyone else find us. */
 423        rtnl_lock();
 424        list_for_each_entry(net, &net_kill_list, cleanup_list) {
 425                list_del_rcu(&net->list);
 426                list_add_tail(&net->exit_list, &net_exit_list);
 427                for_each_net(tmp) {
 428                        int id;
 429
 430                        spin_lock_irq(&tmp->nsid_lock);
 431                        id = __peernet2id(tmp, net);
 432                        if (id >= 0)
 433                                idr_remove(&tmp->netns_ids, id);
 434                        spin_unlock_irq(&tmp->nsid_lock);
 435                        if (id >= 0)
 436                                rtnl_net_notifyid(tmp, RTM_DELNSID, id);
 437                }
 438                spin_lock_irq(&net->nsid_lock);
 439                idr_destroy(&net->netns_ids);
 440                spin_unlock_irq(&net->nsid_lock);
 441
 442        }
 443        rtnl_unlock();
 444
 445        /*
 446         * Another CPU might be rcu-iterating the list, wait for it.
 447         * This needs to be before calling the exit() notifiers, so
 448         * the rcu_barrier() below isn't sufficient alone.
 449         */
 450        synchronize_rcu();
 451
 452        /* Run all of the network namespace exit methods */
 453        list_for_each_entry_reverse(ops, &pernet_list, list)
 454                ops_exit_list(ops, &net_exit_list);
 455
 456        /* Free the net generic variables */
 457        list_for_each_entry_reverse(ops, &pernet_list, list)
 458                ops_free_list(ops, &net_exit_list);
 459
 460        mutex_unlock(&net_mutex);
 461
 462        /* Ensure there are no outstanding rcu callbacks using this
 463         * network namespace.
 464         */
 465        rcu_barrier();
 466
 467        /* Finally it is safe to free my network namespace structure */
 468        list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
 469                list_del_init(&net->exit_list);
 470                dec_net_namespaces(net->ucounts);
 471                put_user_ns(net->user_ns);
 472                net_drop_ns(net);
 473        }
 474}
 475static DECLARE_WORK(net_cleanup_work, cleanup_net);
 476
 477void __put_net(struct net *net)
 478{
 479        /* Cleanup the network namespace in process context */
 480        unsigned long flags;
 481
 482        spin_lock_irqsave(&cleanup_list_lock, flags);
 483        list_add(&net->cleanup_list, &cleanup_list);
 484        spin_unlock_irqrestore(&cleanup_list_lock, flags);
 485
 486        queue_work(netns_wq, &net_cleanup_work);
 487}
 488EXPORT_SYMBOL_GPL(__put_net);
 489
 490struct net *get_net_ns_by_fd(int fd)
 491{
 492        struct file *file;
 493        struct ns_common *ns;
 494        struct net *net;
 495
 496        file = proc_ns_fget(fd);
 497        if (IS_ERR(file))
 498                return ERR_CAST(file);
 499
 500        ns = get_proc_ns(file_inode(file));
 501        if (ns->ops == &netns_operations)
 502                net = get_net(container_of(ns, struct net, ns));
 503        else
 504                net = ERR_PTR(-EINVAL);
 505
 506        fput(file);
 507        return net;
 508}
 509
 510#else
 511struct net *get_net_ns_by_fd(int fd)
 512{
 513        return ERR_PTR(-EINVAL);
 514}
 515#endif
 516EXPORT_SYMBOL_GPL(get_net_ns_by_fd);
 517
 518struct net *get_net_ns_by_pid(pid_t pid)
 519{
 520        struct task_struct *tsk;
 521        struct net *net;
 522
 523        /* Lookup the network namespace */
 524        net = ERR_PTR(-ESRCH);
 525        rcu_read_lock();
 526        tsk = find_task_by_vpid(pid);
 527        if (tsk) {
 528                struct nsproxy *nsproxy;
 529                task_lock(tsk);
 530                nsproxy = tsk->nsproxy;
 531                if (nsproxy)
 532                        net = get_net(nsproxy->net_ns);
 533                task_unlock(tsk);
 534        }
 535        rcu_read_unlock();
 536        return net;
 537}
 538EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
 539
 540static __net_init int net_ns_net_init(struct net *net)
 541{
 542#ifdef CONFIG_NET_NS
 543        net->ns.ops = &netns_operations;
 544#endif
 545        return ns_alloc_inum(&net->ns);
 546}
 547
 548static __net_exit void net_ns_net_exit(struct net *net)
 549{
 550        ns_free_inum(&net->ns);
 551}
 552
 553static struct pernet_operations __net_initdata net_ns_ops = {
 554        .init = net_ns_net_init,
 555        .exit = net_ns_net_exit,
 556};
 557
 558static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
 559        [NETNSA_NONE]           = { .type = NLA_UNSPEC },
 560        [NETNSA_NSID]           = { .type = NLA_S32 },
 561        [NETNSA_PID]            = { .type = NLA_U32 },
 562        [NETNSA_FD]             = { .type = NLA_U32 },
 563};
 564
 565static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
 566{
 567        struct net *net = sock_net(skb->sk);
 568        struct nlattr *tb[NETNSA_MAX + 1];
 569        unsigned long flags;
 570        struct net *peer;
 571        int nsid, err;
 572
 573        err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
 574                          rtnl_net_policy);
 575        if (err < 0)
 576                return err;
 577        if (!tb[NETNSA_NSID])
 578                return -EINVAL;
 579        nsid = nla_get_s32(tb[NETNSA_NSID]);
 580
 581        if (tb[NETNSA_PID])
 582                peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
 583        else if (tb[NETNSA_FD])
 584                peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
 585        else
 586                return -EINVAL;
 587        if (IS_ERR(peer))
 588                return PTR_ERR(peer);
 589
 590        spin_lock_irqsave(&net->nsid_lock, flags);
 591        if (__peernet2id(net, peer) >= 0) {
 592                spin_unlock_irqrestore(&net->nsid_lock, flags);
 593                err = -EEXIST;
 594                goto out;
 595        }
 596
 597        err = alloc_netid(net, peer, nsid);
 598        spin_unlock_irqrestore(&net->nsid_lock, flags);
 599        if (err >= 0) {
 600                rtnl_net_notifyid(net, RTM_NEWNSID, err);
 601                err = 0;
 602        }
 603out:
 604        put_net(peer);
 605        return err;
 606}
 607
 608static int rtnl_net_get_size(void)
 609{
 610        return NLMSG_ALIGN(sizeof(struct rtgenmsg))
 611               + nla_total_size(sizeof(s32)) /* NETNSA_NSID */
 612               ;
 613}
 614
 615static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags,
 616                         int cmd, struct net *net, int nsid)
 617{
 618        struct nlmsghdr *nlh;
 619        struct rtgenmsg *rth;
 620
 621        nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rth), flags);
 622        if (!nlh)
 623                return -EMSGSIZE;
 624
 625        rth = nlmsg_data(nlh);
 626        rth->rtgen_family = AF_UNSPEC;
 627
 628        if (nla_put_s32(skb, NETNSA_NSID, nsid))
 629                goto nla_put_failure;
 630
 631        nlmsg_end(skb, nlh);
 632        return 0;
 633
 634nla_put_failure:
 635        nlmsg_cancel(skb, nlh);
 636        return -EMSGSIZE;
 637}
 638
 639static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
 640{
 641        struct net *net = sock_net(skb->sk);
 642        struct nlattr *tb[NETNSA_MAX + 1];
 643        struct sk_buff *msg;
 644        struct net *peer;
 645        int err, id;
 646
 647        err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
 648                          rtnl_net_policy);
 649        if (err < 0)
 650                return err;
 651        if (tb[NETNSA_PID])
 652                peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
 653        else if (tb[NETNSA_FD])
 654                peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
 655        else
 656                return -EINVAL;
 657
 658        if (IS_ERR(peer))
 659                return PTR_ERR(peer);
 660
 661        msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
 662        if (!msg) {
 663                err = -ENOMEM;
 664                goto out;
 665        }
 666
 667        id = peernet2id(net, peer);
 668        err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
 669                            RTM_NEWNSID, net, id);
 670        if (err < 0)
 671                goto err_out;
 672
 673        err = rtnl_unicast(msg, net, NETLINK_CB(skb).portid);
 674        goto out;
 675
 676err_out:
 677        nlmsg_free(msg);
 678out:
 679        put_net(peer);
 680        return err;
 681}
 682
 683struct rtnl_net_dump_cb {
 684        struct net *net;
 685        struct sk_buff *skb;
 686        struct netlink_callback *cb;
 687        int idx;
 688        int s_idx;
 689};
 690
 691static int rtnl_net_dumpid_one(int id, void *peer, void *data)
 692{
 693        struct rtnl_net_dump_cb *net_cb = (struct rtnl_net_dump_cb *)data;
 694        int ret;
 695
 696        if (net_cb->idx < net_cb->s_idx)
 697                goto cont;
 698
 699        ret = rtnl_net_fill(net_cb->skb, NETLINK_CB(net_cb->cb->skb).portid,
 700                            net_cb->cb->nlh->nlmsg_seq, NLM_F_MULTI,
 701                            RTM_NEWNSID, net_cb->net, id);
 702        if (ret < 0)
 703                return ret;
 704
 705cont:
 706        net_cb->idx++;
 707        return 0;
 708}
 709
 710static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
 711{
 712        struct net *net = sock_net(skb->sk);
 713        struct rtnl_net_dump_cb net_cb = {
 714                .net = net,
 715                .skb = skb,
 716                .cb = cb,
 717                .idx = 0,
 718                .s_idx = cb->args[0],
 719        };
 720        unsigned long flags;
 721
 722        spin_lock_irqsave(&net->nsid_lock, flags);
 723        idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb);
 724        spin_unlock_irqrestore(&net->nsid_lock, flags);
 725
 726        cb->args[0] = net_cb.idx;
 727        return skb->len;
 728}
 729
 730static void rtnl_net_notifyid(struct net *net, int cmd, int id)
 731{
 732        struct sk_buff *msg;
 733        int err = -ENOMEM;
 734
 735        msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
 736        if (!msg)
 737                goto out;
 738
 739        err = rtnl_net_fill(msg, 0, 0, 0, cmd, net, id);
 740        if (err < 0)
 741                goto err_out;
 742
 743        rtnl_notify(msg, net, 0, RTNLGRP_NSID, NULL, 0);
 744        return;
 745
 746err_out:
 747        nlmsg_free(msg);
 748out:
 749        rtnl_set_sk_err(net, RTNLGRP_NSID, err);
 750}
 751
 752static int __init net_ns_init(void)
 753{
 754        struct net_generic *ng;
 755
 756#ifdef CONFIG_NET_NS
 757        net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
 758                                        SMP_CACHE_BYTES,
 759                                        SLAB_PANIC, NULL);
 760
 761        /* Create workqueue for cleanup */
 762        netns_wq = create_singlethread_workqueue("netns");
 763        if (!netns_wq)
 764                panic("Could not create netns workq");
 765#endif
 766
 767        ng = net_alloc_generic();
 768        if (!ng)
 769                panic("Could not allocate generic netns");
 770
 771        rcu_assign_pointer(init_net.gen, ng);
 772
 773        mutex_lock(&net_mutex);
 774        if (setup_net(&init_net, &init_user_ns))
 775                panic("Could not setup the initial network namespace");
 776
 777        init_net_initialized = true;
 778
 779        rtnl_lock();
 780        list_add_tail_rcu(&init_net.list, &net_namespace_list);
 781        rtnl_unlock();
 782
 783        mutex_unlock(&net_mutex);
 784
 785        register_pernet_subsys(&net_ns_ops);
 786
 787        rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, NULL);
 788        rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,
 789                      NULL);
 790
 791        return 0;
 792}
 793
 794pure_initcall(net_ns_init);
 795
 796#ifdef CONFIG_NET_NS
 797static int __register_pernet_operations(struct list_head *list,
 798                                        struct pernet_operations *ops)
 799{
 800        struct net *net;
 801        int error;
 802        LIST_HEAD(net_exit_list);
 803
 804        list_add_tail(&ops->list, list);
 805        if (ops->init || (ops->id && ops->size)) {
 806                for_each_net(net) {
 807                        error = ops_init(ops, net);
 808                        if (error)
 809                                goto out_undo;
 810                        list_add_tail(&net->exit_list, &net_exit_list);
 811                }
 812        }
 813        return 0;
 814
 815out_undo:
 816        /* If I have an error cleanup all namespaces I initialized */
 817        list_del(&ops->list);
 818        ops_exit_list(ops, &net_exit_list);
 819        ops_free_list(ops, &net_exit_list);
 820        return error;
 821}
 822
 823static void __unregister_pernet_operations(struct pernet_operations *ops)
 824{
 825        struct net *net;
 826        LIST_HEAD(net_exit_list);
 827
 828        list_del(&ops->list);
 829        for_each_net(net)
 830                list_add_tail(&net->exit_list, &net_exit_list);
 831        ops_exit_list(ops, &net_exit_list);
 832        ops_free_list(ops, &net_exit_list);
 833}
 834
 835#else
 836
 837static int __register_pernet_operations(struct list_head *list,
 838                                        struct pernet_operations *ops)
 839{
 840        if (!init_net_initialized) {
 841                list_add_tail(&ops->list, list);
 842                return 0;
 843        }
 844
 845        return ops_init(ops, &init_net);
 846}
 847
 848static void __unregister_pernet_operations(struct pernet_operations *ops)
 849{
 850        if (!init_net_initialized) {
 851                list_del(&ops->list);
 852        } else {
 853                LIST_HEAD(net_exit_list);
 854                list_add(&init_net.exit_list, &net_exit_list);
 855                ops_exit_list(ops, &net_exit_list);
 856                ops_free_list(ops, &net_exit_list);
 857        }
 858}
 859
 860#endif /* CONFIG_NET_NS */
 861
 862static DEFINE_IDA(net_generic_ids);
 863
 864static int register_pernet_operations(struct list_head *list,
 865                                      struct pernet_operations *ops)
 866{
 867        int error;
 868
 869        if (ops->id) {
 870again:
 871                error = ida_get_new_above(&net_generic_ids, 1, ops->id);
 872                if (error < 0) {
 873                        if (error == -EAGAIN) {
 874                                ida_pre_get(&net_generic_ids, GFP_KERNEL);
 875                                goto again;
 876                        }
 877                        return error;
 878                }
 879                max_gen_ptrs = max_t(unsigned int, max_gen_ptrs, *ops->id);
 880        }
 881        error = __register_pernet_operations(list, ops);
 882        if (error) {
 883                rcu_barrier();
 884                if (ops->id)
 885                        ida_remove(&net_generic_ids, *ops->id);
 886        }
 887
 888        return error;
 889}
 890
 891static void unregister_pernet_operations(struct pernet_operations *ops)
 892{
 893        
 894        __unregister_pernet_operations(ops);
 895        rcu_barrier();
 896        if (ops->id)
 897                ida_remove(&net_generic_ids, *ops->id);
 898}
 899
 900/**
 901 *      register_pernet_subsys - register a network namespace subsystem
 902 *      @ops:  pernet operations structure for the subsystem
 903 *
 904 *      Register a subsystem which has init and exit functions
 905 *      that are called when network namespaces are created and
 906 *      destroyed respectively.
 907 *
 908 *      When registered all network namespace init functions are
 909 *      called for every existing network namespace.  Allowing kernel
 910 *      modules to have a race free view of the set of network namespaces.
 911 *
 912 *      When a new network namespace is created all of the init
 913 *      methods are called in the order in which they were registered.
 914 *
 915 *      When a network namespace is destroyed all of the exit methods
 916 *      are called in the reverse of the order with which they were
 917 *      registered.
 918 */
 919int register_pernet_subsys(struct pernet_operations *ops)
 920{
 921        int error;
 922        mutex_lock(&net_mutex);
 923        error =  register_pernet_operations(first_device, ops);
 924        mutex_unlock(&net_mutex);
 925        return error;
 926}
 927EXPORT_SYMBOL_GPL(register_pernet_subsys);
 928
 929/**
 930 *      unregister_pernet_subsys - unregister a network namespace subsystem
 931 *      @ops: pernet operations structure to manipulate
 932 *
 933 *      Remove the pernet operations structure from the list to be
 934 *      used when network namespaces are created or destroyed.  In
 935 *      addition run the exit method for all existing network
 936 *      namespaces.
 937 */
 938void unregister_pernet_subsys(struct pernet_operations *ops)
 939{
 940        mutex_lock(&net_mutex);
 941        unregister_pernet_operations(ops);
 942        mutex_unlock(&net_mutex);
 943}
 944EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
 945
 946/**
 947 *      register_pernet_device - register a network namespace device
 948 *      @ops:  pernet operations structure for the subsystem
 949 *
 950 *      Register a device which has init and exit functions
 951 *      that are called when network namespaces are created and
 952 *      destroyed respectively.
 953 *
 954 *      When registered all network namespace init functions are
 955 *      called for every existing network namespace.  Allowing kernel
 956 *      modules to have a race free view of the set of network namespaces.
 957 *
 958 *      When a new network namespace is created all of the init
 959 *      methods are called in the order in which they were registered.
 960 *
 961 *      When a network namespace is destroyed all of the exit methods
 962 *      are called in the reverse of the order with which they were
 963 *      registered.
 964 */
 965int register_pernet_device(struct pernet_operations *ops)
 966{
 967        int error;
 968        mutex_lock(&net_mutex);
 969        error = register_pernet_operations(&pernet_list, ops);
 970        if (!error && (first_device == &pernet_list))
 971                first_device = &ops->list;
 972        mutex_unlock(&net_mutex);
 973        return error;
 974}
 975EXPORT_SYMBOL_GPL(register_pernet_device);
 976
 977/**
 978 *      unregister_pernet_device - unregister a network namespace netdevice
 979 *      @ops: pernet operations structure to manipulate
 980 *
 981 *      Remove the pernet operations structure from the list to be
 982 *      used when network namespaces are created or destroyed.  In
 983 *      addition run the exit method for all existing network
 984 *      namespaces.
 985 */
 986void unregister_pernet_device(struct pernet_operations *ops)
 987{
 988        mutex_lock(&net_mutex);
 989        if (&ops->list == first_device)
 990                first_device = first_device->next;
 991        unregister_pernet_operations(ops);
 992        mutex_unlock(&net_mutex);
 993}
 994EXPORT_SYMBOL_GPL(unregister_pernet_device);
 995
 996#ifdef CONFIG_NET_NS
 997static struct ns_common *netns_get(struct task_struct *task)
 998{
 999        struct net *net = NULL;
1000        struct nsproxy *nsproxy;
1001
1002        task_lock(task);
1003        nsproxy = task->nsproxy;
1004        if (nsproxy)
1005                net = get_net(nsproxy->net_ns);
1006        task_unlock(task);
1007
1008        return net ? &net->ns : NULL;
1009}
1010
1011static inline struct net *to_net_ns(struct ns_common *ns)
1012{
1013        return container_of(ns, struct net, ns);
1014}
1015
1016static void netns_put(struct ns_common *ns)
1017{
1018        put_net(to_net_ns(ns));
1019}
1020
1021static int netns_install(struct nsproxy *nsproxy, struct ns_common *ns)
1022{
1023        struct net *net = to_net_ns(ns);
1024
1025        if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) ||
1026            !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
1027                return -EPERM;
1028
1029        put_net(nsproxy->net_ns);
1030        nsproxy->net_ns = get_net(net);
1031        return 0;
1032}
1033
1034static struct user_namespace *netns_owner(struct ns_common *ns)
1035{
1036        return to_net_ns(ns)->user_ns;
1037}
1038
1039const struct proc_ns_operations netns_operations = {
1040        .name           = "net",
1041        .type           = CLONE_NEWNET,
1042        .get            = netns_get,
1043        .put            = netns_put,
1044        .install        = netns_install,
1045        .owner          = netns_owner,
1046};
1047#endif
1048