linux/net/core/net_namespace.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   3
   4#include <linux/workqueue.h>
   5#include <linux/rtnetlink.h>
   6#include <linux/cache.h>
   7#include <linux/slab.h>
   8#include <linux/list.h>
   9#include <linux/delay.h>
  10#include <linux/sched.h>
  11#include <linux/idr.h>
  12#include <linux/rculist.h>
  13#include <linux/nsproxy.h>
  14#include <linux/fs.h>
  15#include <linux/proc_ns.h>
  16#include <linux/file.h>
  17#include <linux/export.h>
  18#include <linux/user_namespace.h>
  19#include <linux/net_namespace.h>
  20#include <linux/sched/task.h>
  21#include <linux/uidgid.h>
  22#include <linux/cookie.h>
  23
  24#include <net/sock.h>
  25#include <net/netlink.h>
  26#include <net/net_namespace.h>
  27#include <net/netns/generic.h>
  28
  29/*
  30 *      Our network namespace constructor/destructor lists
  31 */
  32
  33static LIST_HEAD(pernet_list);
  34static struct list_head *first_device = &pernet_list;
  35
  36LIST_HEAD(net_namespace_list);
  37EXPORT_SYMBOL_GPL(net_namespace_list);
  38
  39/* Protects net_namespace_list. Nests iside rtnl_lock() */
  40DECLARE_RWSEM(net_rwsem);
  41EXPORT_SYMBOL_GPL(net_rwsem);
  42
  43#ifdef CONFIG_KEYS
  44static struct key_tag init_net_key_domain = { .usage = REFCOUNT_INIT(1) };
  45#endif
  46
  47struct net init_net;
  48EXPORT_SYMBOL(init_net);
  49
  50static bool init_net_initialized;
  51/*
  52 * pernet_ops_rwsem: protects: pernet_list, net_generic_ids,
  53 * init_net_initialized and first_device pointer.
  54 * This is internal net namespace object. Please, don't use it
  55 * outside.
  56 */
  57DECLARE_RWSEM(pernet_ops_rwsem);
  58EXPORT_SYMBOL_GPL(pernet_ops_rwsem);
  59
  60#define MIN_PERNET_OPS_ID       \
  61        ((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *))
  62
  63#define INITIAL_NET_GEN_PTRS    13 /* +1 for len +2 for rcu_head */
  64
  65static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
  66
  67DEFINE_COOKIE(net_cookie);
  68
  69static struct net_generic *net_alloc_generic(void)
  70{
  71        struct net_generic *ng;
  72        unsigned int generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);
  73
  74        ng = kzalloc(generic_size, GFP_KERNEL);
  75        if (ng)
  76                ng->s.len = max_gen_ptrs;
  77
  78        return ng;
  79}
  80
  81static int net_assign_generic(struct net *net, unsigned int id, void *data)
  82{
  83        struct net_generic *ng, *old_ng;
  84
  85        BUG_ON(id < MIN_PERNET_OPS_ID);
  86
  87        old_ng = rcu_dereference_protected(net->gen,
  88                                           lockdep_is_held(&pernet_ops_rwsem));
  89        if (old_ng->s.len > id) {
  90                old_ng->ptr[id] = data;
  91                return 0;
  92        }
  93
  94        ng = net_alloc_generic();
  95        if (!ng)
  96                return -ENOMEM;
  97
  98        /*
  99         * Some synchronisation notes:
 100         *
 101         * The net_generic explores the net->gen array inside rcu
 102         * read section. Besides once set the net->gen->ptr[x]
 103         * pointer never changes (see rules in netns/generic.h).
 104         *
 105         * That said, we simply duplicate this array and schedule
 106         * the old copy for kfree after a grace period.
 107         */
 108
 109        memcpy(&ng->ptr[MIN_PERNET_OPS_ID], &old_ng->ptr[MIN_PERNET_OPS_ID],
 110               (old_ng->s.len - MIN_PERNET_OPS_ID) * sizeof(void *));
 111        ng->ptr[id] = data;
 112
 113        rcu_assign_pointer(net->gen, ng);
 114        kfree_rcu(old_ng, s.rcu);
 115        return 0;
 116}
 117
 118static int ops_init(const struct pernet_operations *ops, struct net *net)
 119{
 120        int err = -ENOMEM;
 121        void *data = NULL;
 122
 123        if (ops->id && ops->size) {
 124                data = kzalloc(ops->size, GFP_KERNEL);
 125                if (!data)
 126                        goto out;
 127
 128                err = net_assign_generic(net, *ops->id, data);
 129                if (err)
 130                        goto cleanup;
 131        }
 132        err = 0;
 133        if (ops->init)
 134                err = ops->init(net);
 135        if (!err)
 136                return 0;
 137
 138cleanup:
 139        kfree(data);
 140
 141out:
 142        return err;
 143}
 144
 145static void ops_pre_exit_list(const struct pernet_operations *ops,
 146                              struct list_head *net_exit_list)
 147{
 148        struct net *net;
 149
 150        if (ops->pre_exit) {
 151                list_for_each_entry(net, net_exit_list, exit_list)
 152                        ops->pre_exit(net);
 153        }
 154}
 155
 156static void ops_exit_list(const struct pernet_operations *ops,
 157                          struct list_head *net_exit_list)
 158{
 159        struct net *net;
 160        if (ops->exit) {
 161                list_for_each_entry(net, net_exit_list, exit_list) {
 162                        ops->exit(net);
 163                        cond_resched();
 164                }
 165        }
 166        if (ops->exit_batch)
 167                ops->exit_batch(net_exit_list);
 168}
 169
 170static void ops_free_list(const struct pernet_operations *ops,
 171                          struct list_head *net_exit_list)
 172{
 173        struct net *net;
 174        if (ops->size && ops->id) {
 175                list_for_each_entry(net, net_exit_list, exit_list)
 176                        kfree(net_generic(net, *ops->id));
 177        }
 178}
 179
 180/* should be called with nsid_lock held */
 181static int alloc_netid(struct net *net, struct net *peer, int reqid)
 182{
 183        int min = 0, max = 0;
 184
 185        if (reqid >= 0) {
 186                min = reqid;
 187                max = reqid + 1;
 188        }
 189
 190        return idr_alloc(&net->netns_ids, peer, min, max, GFP_ATOMIC);
 191}
 192
 193/* This function is used by idr_for_each(). If net is equal to peer, the
 194 * function returns the id so that idr_for_each() stops. Because we cannot
 195 * returns the id 0 (idr_for_each() will not stop), we return the magic value
 196 * NET_ID_ZERO (-1) for it.
 197 */
 198#define NET_ID_ZERO -1
 199static int net_eq_idr(int id, void *net, void *peer)
 200{
 201        if (net_eq(net, peer))
 202                return id ? : NET_ID_ZERO;
 203        return 0;
 204}
 205
 206/* Must be called from RCU-critical section or with nsid_lock held */
 207static int __peernet2id(const struct net *net, struct net *peer)
 208{
 209        int id = idr_for_each(&net->netns_ids, net_eq_idr, peer);
 210
 211        /* Magic value for id 0. */
 212        if (id == NET_ID_ZERO)
 213                return 0;
 214        if (id > 0)
 215                return id;
 216
 217        return NETNSA_NSID_NOT_ASSIGNED;
 218}
 219
 220static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid,
 221                              struct nlmsghdr *nlh, gfp_t gfp);
 222/* This function returns the id of a peer netns. If no id is assigned, one will
 223 * be allocated and returned.
 224 */
 225int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp)
 226{
 227        int id;
 228
 229        if (refcount_read(&net->ns.count) == 0)
 230                return NETNSA_NSID_NOT_ASSIGNED;
 231
 232        spin_lock_bh(&net->nsid_lock);
 233        id = __peernet2id(net, peer);
 234        if (id >= 0) {
 235                spin_unlock_bh(&net->nsid_lock);
 236                return id;
 237        }
 238
 239        /* When peer is obtained from RCU lists, we may race with
 240         * its cleanup. Check whether it's alive, and this guarantees
 241         * we never hash a peer back to net->netns_ids, after it has
 242         * just been idr_remove()'d from there in cleanup_net().
 243         */
 244        if (!maybe_get_net(peer)) {
 245                spin_unlock_bh(&net->nsid_lock);
 246                return NETNSA_NSID_NOT_ASSIGNED;
 247        }
 248
 249        id = alloc_netid(net, peer, -1);
 250        spin_unlock_bh(&net->nsid_lock);
 251
 252        put_net(peer);
 253        if (id < 0)
 254                return NETNSA_NSID_NOT_ASSIGNED;
 255
 256        rtnl_net_notifyid(net, RTM_NEWNSID, id, 0, NULL, gfp);
 257
 258        return id;
 259}
 260EXPORT_SYMBOL_GPL(peernet2id_alloc);
 261
 262/* This function returns, if assigned, the id of a peer netns. */
 263int peernet2id(const struct net *net, struct net *peer)
 264{
 265        int id;
 266
 267        rcu_read_lock();
 268        id = __peernet2id(net, peer);
 269        rcu_read_unlock();
 270
 271        return id;
 272}
 273EXPORT_SYMBOL(peernet2id);
 274
 275/* This function returns true is the peer netns has an id assigned into the
 276 * current netns.
 277 */
 278bool peernet_has_id(const struct net *net, struct net *peer)
 279{
 280        return peernet2id(net, peer) >= 0;
 281}
 282
 283struct net *get_net_ns_by_id(const struct net *net, int id)
 284{
 285        struct net *peer;
 286
 287        if (id < 0)
 288                return NULL;
 289
 290        rcu_read_lock();
 291        peer = idr_find(&net->netns_ids, id);
 292        if (peer)
 293                peer = maybe_get_net(peer);
 294        rcu_read_unlock();
 295
 296        return peer;
 297}
 298EXPORT_SYMBOL_GPL(get_net_ns_by_id);
 299
 300/*
 301 * setup_net runs the initializers for the network namespace object.
 302 */
 303static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
 304{
 305        /* Must be called with pernet_ops_rwsem held */
 306        const struct pernet_operations *ops, *saved_ops;
 307        int error = 0;
 308        LIST_HEAD(net_exit_list);
 309
 310        refcount_set(&net->ns.count, 1);
 311        ref_tracker_dir_init(&net->refcnt_tracker, 128);
 312
 313        refcount_set(&net->passive, 1);
 314        get_random_bytes(&net->hash_mix, sizeof(u32));
 315        preempt_disable();
 316        net->net_cookie = gen_cookie_next(&net_cookie);
 317        preempt_enable();
 318        net->dev_base_seq = 1;
 319        net->user_ns = user_ns;
 320        idr_init(&net->netns_ids);
 321        spin_lock_init(&net->nsid_lock);
 322        mutex_init(&net->ipv4.ra_mutex);
 323
 324        list_for_each_entry(ops, &pernet_list, list) {
 325                error = ops_init(ops, net);
 326                if (error < 0)
 327                        goto out_undo;
 328        }
 329        down_write(&net_rwsem);
 330        list_add_tail_rcu(&net->list, &net_namespace_list);
 331        up_write(&net_rwsem);
 332out:
 333        return error;
 334
 335out_undo:
 336        /* Walk through the list backwards calling the exit functions
 337         * for the pernet modules whose init functions did not fail.
 338         */
 339        list_add(&net->exit_list, &net_exit_list);
 340        saved_ops = ops;
 341        list_for_each_entry_continue_reverse(ops, &pernet_list, list)
 342                ops_pre_exit_list(ops, &net_exit_list);
 343
 344        synchronize_rcu();
 345
 346        ops = saved_ops;
 347        list_for_each_entry_continue_reverse(ops, &pernet_list, list)
 348                ops_exit_list(ops, &net_exit_list);
 349
 350        ops = saved_ops;
 351        list_for_each_entry_continue_reverse(ops, &pernet_list, list)
 352                ops_free_list(ops, &net_exit_list);
 353
 354        rcu_barrier();
 355        goto out;
 356}
 357
 358static int __net_init net_defaults_init_net(struct net *net)
 359{
 360        net->core.sysctl_somaxconn = SOMAXCONN;
 361        net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED;
 362
 363        return 0;
 364}
 365
 366static struct pernet_operations net_defaults_ops = {
 367        .init = net_defaults_init_net,
 368};
 369
 370static __init int net_defaults_init(void)
 371{
 372        if (register_pernet_subsys(&net_defaults_ops))
 373                panic("Cannot initialize net default settings");
 374
 375        return 0;
 376}
 377
 378core_initcall(net_defaults_init);
 379
 380#ifdef CONFIG_NET_NS
 381static struct ucounts *inc_net_namespaces(struct user_namespace *ns)
 382{
 383        return inc_ucount(ns, current_euid(), UCOUNT_NET_NAMESPACES);
 384}
 385
 386static void dec_net_namespaces(struct ucounts *ucounts)
 387{
 388        dec_ucount(ucounts, UCOUNT_NET_NAMESPACES);
 389}
 390
 391static struct kmem_cache *net_cachep __ro_after_init;
 392static struct workqueue_struct *netns_wq;
 393
 394static struct net *net_alloc(void)
 395{
 396        struct net *net = NULL;
 397        struct net_generic *ng;
 398
 399        ng = net_alloc_generic();
 400        if (!ng)
 401                goto out;
 402
 403        net = kmem_cache_zalloc(net_cachep, GFP_KERNEL);
 404        if (!net)
 405                goto out_free;
 406
 407#ifdef CONFIG_KEYS
 408        net->key_domain = kzalloc(sizeof(struct key_tag), GFP_KERNEL);
 409        if (!net->key_domain)
 410                goto out_free_2;
 411        refcount_set(&net->key_domain->usage, 1);
 412#endif
 413
 414        rcu_assign_pointer(net->gen, ng);
 415out:
 416        return net;
 417
 418#ifdef CONFIG_KEYS
 419out_free_2:
 420        kmem_cache_free(net_cachep, net);
 421        net = NULL;
 422#endif
 423out_free:
 424        kfree(ng);
 425        goto out;
 426}
 427
 428static void net_free(struct net *net)
 429{
 430        if (refcount_dec_and_test(&net->passive)) {
 431                kfree(rcu_access_pointer(net->gen));
 432                kmem_cache_free(net_cachep, net);
 433        }
 434}
 435
 436void net_drop_ns(void *p)
 437{
 438        struct net *net = (struct net *)p;
 439
 440        if (net)
 441                net_free(net);
 442}
 443
 444struct net *copy_net_ns(unsigned long flags,
 445                        struct user_namespace *user_ns, struct net *old_net)
 446{
 447        struct ucounts *ucounts;
 448        struct net *net;
 449        int rv;
 450
 451        if (!(flags & CLONE_NEWNET))
 452                return get_net(old_net);
 453
 454        ucounts = inc_net_namespaces(user_ns);
 455        if (!ucounts)
 456                return ERR_PTR(-ENOSPC);
 457
 458        net = net_alloc();
 459        if (!net) {
 460                rv = -ENOMEM;
 461                goto dec_ucounts;
 462        }
 463        refcount_set(&net->passive, 1);
 464        net->ucounts = ucounts;
 465        get_user_ns(user_ns);
 466
 467        rv = down_read_killable(&pernet_ops_rwsem);
 468        if (rv < 0)
 469                goto put_userns;
 470
 471        rv = setup_net(net, user_ns);
 472
 473        up_read(&pernet_ops_rwsem);
 474
 475        if (rv < 0) {
 476put_userns:
 477#ifdef CONFIG_KEYS
 478                key_remove_domain(net->key_domain);
 479#endif
 480                put_user_ns(user_ns);
 481                net_free(net);
 482dec_ucounts:
 483                dec_net_namespaces(ucounts);
 484                return ERR_PTR(rv);
 485        }
 486        return net;
 487}
 488
 489/**
 490 * net_ns_get_ownership - get sysfs ownership data for @net
 491 * @net: network namespace in question (can be NULL)
 492 * @uid: kernel user ID for sysfs objects
 493 * @gid: kernel group ID for sysfs objects
 494 *
 495 * Returns the uid/gid pair of root in the user namespace associated with the
 496 * given network namespace.
 497 */
 498void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid)
 499{
 500        if (net) {
 501                kuid_t ns_root_uid = make_kuid(net->user_ns, 0);
 502                kgid_t ns_root_gid = make_kgid(net->user_ns, 0);
 503
 504                if (uid_valid(ns_root_uid))
 505                        *uid = ns_root_uid;
 506
 507                if (gid_valid(ns_root_gid))
 508                        *gid = ns_root_gid;
 509        } else {
 510                *uid = GLOBAL_ROOT_UID;
 511                *gid = GLOBAL_ROOT_GID;
 512        }
 513}
 514EXPORT_SYMBOL_GPL(net_ns_get_ownership);
 515
 516static void unhash_nsid(struct net *net, struct net *last)
 517{
 518        struct net *tmp;
 519        /* This function is only called from cleanup_net() work,
 520         * and this work is the only process, that may delete
 521         * a net from net_namespace_list. So, when the below
 522         * is executing, the list may only grow. Thus, we do not
 523         * use for_each_net_rcu() or net_rwsem.
 524         */
 525        for_each_net(tmp) {
 526                int id;
 527
 528                spin_lock_bh(&tmp->nsid_lock);
 529                id = __peernet2id(tmp, net);
 530                if (id >= 0)
 531                        idr_remove(&tmp->netns_ids, id);
 532                spin_unlock_bh(&tmp->nsid_lock);
 533                if (id >= 0)
 534                        rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL,
 535                                          GFP_KERNEL);
 536                if (tmp == last)
 537                        break;
 538        }
 539        spin_lock_bh(&net->nsid_lock);
 540        idr_destroy(&net->netns_ids);
 541        spin_unlock_bh(&net->nsid_lock);
 542}
 543
 544static LLIST_HEAD(cleanup_list);
 545
 546static void cleanup_net(struct work_struct *work)
 547{
 548        const struct pernet_operations *ops;
 549        struct net *net, *tmp, *last;
 550        struct llist_node *net_kill_list;
 551        LIST_HEAD(net_exit_list);
 552
 553        /* Atomically snapshot the list of namespaces to cleanup */
 554        net_kill_list = llist_del_all(&cleanup_list);
 555
 556        down_read(&pernet_ops_rwsem);
 557
 558        /* Don't let anyone else find us. */
 559        down_write(&net_rwsem);
 560        llist_for_each_entry(net, net_kill_list, cleanup_list)
 561                list_del_rcu(&net->list);
 562        /* Cache last net. After we unlock rtnl, no one new net
 563         * added to net_namespace_list can assign nsid pointer
 564         * to a net from net_kill_list (see peernet2id_alloc()).
 565         * So, we skip them in unhash_nsid().
 566         *
 567         * Note, that unhash_nsid() does not delete nsid links
 568         * between net_kill_list's nets, as they've already
 569         * deleted from net_namespace_list. But, this would be
 570         * useless anyway, as netns_ids are destroyed there.
 571         */
 572        last = list_last_entry(&net_namespace_list, struct net, list);
 573        up_write(&net_rwsem);
 574
 575        llist_for_each_entry(net, net_kill_list, cleanup_list) {
 576                unhash_nsid(net, last);
 577                list_add_tail(&net->exit_list, &net_exit_list);
 578        }
 579
 580        /* Run all of the network namespace pre_exit methods */
 581        list_for_each_entry_reverse(ops, &pernet_list, list)
 582                ops_pre_exit_list(ops, &net_exit_list);
 583
 584        /*
 585         * Another CPU might be rcu-iterating the list, wait for it.
 586         * This needs to be before calling the exit() notifiers, so
 587         * the rcu_barrier() below isn't sufficient alone.
 588         * Also the pre_exit() and exit() methods need this barrier.
 589         */
 590        synchronize_rcu();
 591
 592        /* Run all of the network namespace exit methods */
 593        list_for_each_entry_reverse(ops, &pernet_list, list)
 594                ops_exit_list(ops, &net_exit_list);
 595
 596        /* Free the net generic variables */
 597        list_for_each_entry_reverse(ops, &pernet_list, list)
 598                ops_free_list(ops, &net_exit_list);
 599
 600        up_read(&pernet_ops_rwsem);
 601
 602        /* Ensure there are no outstanding rcu callbacks using this
 603         * network namespace.
 604         */
 605        rcu_barrier();
 606
 607        /* Finally it is safe to free my network namespace structure */
 608        list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
 609                list_del_init(&net->exit_list);
 610                dec_net_namespaces(net->ucounts);
 611#ifdef CONFIG_KEYS
 612                key_remove_domain(net->key_domain);
 613#endif
 614                put_user_ns(net->user_ns);
 615                net_free(net);
 616        }
 617}
 618
 619/**
 620 * net_ns_barrier - wait until concurrent net_cleanup_work is done
 621 *
 622 * cleanup_net runs from work queue and will first remove namespaces
 623 * from the global list, then run net exit functions.
 624 *
 625 * Call this in module exit path to make sure that all netns
 626 * ->exit ops have been invoked before the function is removed.
 627 */
 628void net_ns_barrier(void)
 629{
 630        down_write(&pernet_ops_rwsem);
 631        up_write(&pernet_ops_rwsem);
 632}
 633EXPORT_SYMBOL(net_ns_barrier);
 634
 635static DECLARE_WORK(net_cleanup_work, cleanup_net);
 636
 637void __put_net(struct net *net)
 638{
 639        ref_tracker_dir_exit(&net->refcnt_tracker);
 640        /* Cleanup the network namespace in process context */
 641        if (llist_add(&net->cleanup_list, &cleanup_list))
 642                queue_work(netns_wq, &net_cleanup_work);
 643}
 644EXPORT_SYMBOL_GPL(__put_net);
 645
 646/**
 647 * get_net_ns - increment the refcount of the network namespace
 648 * @ns: common namespace (net)
 649 *
 650 * Returns the net's common namespace.
 651 */
 652struct ns_common *get_net_ns(struct ns_common *ns)
 653{
 654        return &get_net(container_of(ns, struct net, ns))->ns;
 655}
 656EXPORT_SYMBOL_GPL(get_net_ns);
 657
 658struct net *get_net_ns_by_fd(int fd)
 659{
 660        struct file *file;
 661        struct ns_common *ns;
 662        struct net *net;
 663
 664        file = proc_ns_fget(fd);
 665        if (IS_ERR(file))
 666                return ERR_CAST(file);
 667
 668        ns = get_proc_ns(file_inode(file));
 669        if (ns->ops == &netns_operations)
 670                net = get_net(container_of(ns, struct net, ns));
 671        else
 672                net = ERR_PTR(-EINVAL);
 673
 674        fput(file);
 675        return net;
 676}
 677EXPORT_SYMBOL_GPL(get_net_ns_by_fd);
 678#endif
 679
 680struct net *get_net_ns_by_pid(pid_t pid)
 681{
 682        struct task_struct *tsk;
 683        struct net *net;
 684
 685        /* Lookup the network namespace */
 686        net = ERR_PTR(-ESRCH);
 687        rcu_read_lock();
 688        tsk = find_task_by_vpid(pid);
 689        if (tsk) {
 690                struct nsproxy *nsproxy;
 691                task_lock(tsk);
 692                nsproxy = tsk->nsproxy;
 693                if (nsproxy)
 694                        net = get_net(nsproxy->net_ns);
 695                task_unlock(tsk);
 696        }
 697        rcu_read_unlock();
 698        return net;
 699}
 700EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
 701
 702static __net_init int net_ns_net_init(struct net *net)
 703{
 704#ifdef CONFIG_NET_NS
 705        net->ns.ops = &netns_operations;
 706#endif
 707        return ns_alloc_inum(&net->ns);
 708}
 709
 710static __net_exit void net_ns_net_exit(struct net *net)
 711{
 712        ns_free_inum(&net->ns);
 713}
 714
 715static struct pernet_operations __net_initdata net_ns_ops = {
 716        .init = net_ns_net_init,
 717        .exit = net_ns_net_exit,
 718};
 719
 720static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
 721        [NETNSA_NONE]           = { .type = NLA_UNSPEC },
 722        [NETNSA_NSID]           = { .type = NLA_S32 },
 723        [NETNSA_PID]            = { .type = NLA_U32 },
 724        [NETNSA_FD]             = { .type = NLA_U32 },
 725        [NETNSA_TARGET_NSID]    = { .type = NLA_S32 },
 726};
 727
 728static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh,
 729                          struct netlink_ext_ack *extack)
 730{
 731        struct net *net = sock_net(skb->sk);
 732        struct nlattr *tb[NETNSA_MAX + 1];
 733        struct nlattr *nla;
 734        struct net *peer;
 735        int nsid, err;
 736
 737        err = nlmsg_parse_deprecated(nlh, sizeof(struct rtgenmsg), tb,
 738                                     NETNSA_MAX, rtnl_net_policy, extack);
 739        if (err < 0)
 740                return err;
 741        if (!tb[NETNSA_NSID]) {
 742                NL_SET_ERR_MSG(extack, "nsid is missing");
 743                return -EINVAL;
 744        }
 745        nsid = nla_get_s32(tb[NETNSA_NSID]);
 746
 747        if (tb[NETNSA_PID]) {
 748                peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
 749                nla = tb[NETNSA_PID];
 750        } else if (tb[NETNSA_FD]) {
 751                peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
 752                nla = tb[NETNSA_FD];
 753        } else {
 754                NL_SET_ERR_MSG(extack, "Peer netns reference is missing");
 755                return -EINVAL;
 756        }
 757        if (IS_ERR(peer)) {
 758                NL_SET_BAD_ATTR(extack, nla);
 759                NL_SET_ERR_MSG(extack, "Peer netns reference is invalid");
 760                return PTR_ERR(peer);
 761        }
 762
 763        spin_lock_bh(&net->nsid_lock);
 764        if (__peernet2id(net, peer) >= 0) {
 765                spin_unlock_bh(&net->nsid_lock);
 766                err = -EEXIST;
 767                NL_SET_BAD_ATTR(extack, nla);
 768                NL_SET_ERR_MSG(extack,
 769                               "Peer netns already has a nsid assigned");
 770                goto out;
 771        }
 772
 773        err = alloc_netid(net, peer, nsid);
 774        spin_unlock_bh(&net->nsid_lock);
 775        if (err >= 0) {
 776                rtnl_net_notifyid(net, RTM_NEWNSID, err, NETLINK_CB(skb).portid,
 777                                  nlh, GFP_KERNEL);
 778                err = 0;
 779        } else if (err == -ENOSPC && nsid >= 0) {
 780                err = -EEXIST;
 781                NL_SET_BAD_ATTR(extack, tb[NETNSA_NSID]);
 782                NL_SET_ERR_MSG(extack, "The specified nsid is already used");
 783        }
 784out:
 785        put_net(peer);
 786        return err;
 787}
 788
 789static int rtnl_net_get_size(void)
 790{
 791        return NLMSG_ALIGN(sizeof(struct rtgenmsg))
 792               + nla_total_size(sizeof(s32)) /* NETNSA_NSID */
 793               + nla_total_size(sizeof(s32)) /* NETNSA_CURRENT_NSID */
 794               ;
 795}
 796
 797struct net_fill_args {
 798        u32 portid;
 799        u32 seq;
 800        int flags;
 801        int cmd;
 802        int nsid;
 803        bool add_ref;
 804        int ref_nsid;
 805};
 806
 807static int rtnl_net_fill(struct sk_buff *skb, struct net_fill_args *args)
 808{
 809        struct nlmsghdr *nlh;
 810        struct rtgenmsg *rth;
 811
 812        nlh = nlmsg_put(skb, args->portid, args->seq, args->cmd, sizeof(*rth),
 813                        args->flags);
 814        if (!nlh)
 815                return -EMSGSIZE;
 816
 817        rth = nlmsg_data(nlh);
 818        rth->rtgen_family = AF_UNSPEC;
 819
 820        if (nla_put_s32(skb, NETNSA_NSID, args->nsid))
 821                goto nla_put_failure;
 822
 823        if (args->add_ref &&
 824            nla_put_s32(skb, NETNSA_CURRENT_NSID, args->ref_nsid))
 825                goto nla_put_failure;
 826
 827        nlmsg_end(skb, nlh);
 828        return 0;
 829
 830nla_put_failure:
 831        nlmsg_cancel(skb, nlh);
 832        return -EMSGSIZE;
 833}
 834
 835static int rtnl_net_valid_getid_req(struct sk_buff *skb,
 836                                    const struct nlmsghdr *nlh,
 837                                    struct nlattr **tb,
 838                                    struct netlink_ext_ack *extack)
 839{
 840        int i, err;
 841
 842        if (!netlink_strict_get_check(skb))
 843                return nlmsg_parse_deprecated(nlh, sizeof(struct rtgenmsg),
 844                                              tb, NETNSA_MAX, rtnl_net_policy,
 845                                              extack);
 846
 847        err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct rtgenmsg), tb,
 848                                            NETNSA_MAX, rtnl_net_policy,
 849                                            extack);
 850        if (err)
 851                return err;
 852
 853        for (i = 0; i <= NETNSA_MAX; i++) {
 854                if (!tb[i])
 855                        continue;
 856
 857                switch (i) {
 858                case NETNSA_PID:
 859                case NETNSA_FD:
 860                case NETNSA_NSID:
 861                case NETNSA_TARGET_NSID:
 862                        break;
 863                default:
 864                        NL_SET_ERR_MSG(extack, "Unsupported attribute in peer netns getid request");
 865                        return -EINVAL;
 866                }
 867        }
 868
 869        return 0;
 870}
 871
 872static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh,
 873                          struct netlink_ext_ack *extack)
 874{
 875        struct net *net = sock_net(skb->sk);
 876        struct nlattr *tb[NETNSA_MAX + 1];
 877        struct net_fill_args fillargs = {
 878                .portid = NETLINK_CB(skb).portid,
 879                .seq = nlh->nlmsg_seq,
 880                .cmd = RTM_NEWNSID,
 881        };
 882        struct net *peer, *target = net;
 883        struct nlattr *nla;
 884        struct sk_buff *msg;
 885        int err;
 886
 887        err = rtnl_net_valid_getid_req(skb, nlh, tb, extack);
 888        if (err < 0)
 889                return err;
 890        if (tb[NETNSA_PID]) {
 891                peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
 892                nla = tb[NETNSA_PID];
 893        } else if (tb[NETNSA_FD]) {
 894                peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
 895                nla = tb[NETNSA_FD];
 896        } else if (tb[NETNSA_NSID]) {
 897                peer = get_net_ns_by_id(net, nla_get_s32(tb[NETNSA_NSID]));
 898                if (!peer)
 899                        peer = ERR_PTR(-ENOENT);
 900                nla = tb[NETNSA_NSID];
 901        } else {
 902                NL_SET_ERR_MSG(extack, "Peer netns reference is missing");
 903                return -EINVAL;
 904        }
 905
 906        if (IS_ERR(peer)) {
 907                NL_SET_BAD_ATTR(extack, nla);
 908                NL_SET_ERR_MSG(extack, "Peer netns reference is invalid");
 909                return PTR_ERR(peer);
 910        }
 911
 912        if (tb[NETNSA_TARGET_NSID]) {
 913                int id = nla_get_s32(tb[NETNSA_TARGET_NSID]);
 914
 915                target = rtnl_get_net_ns_capable(NETLINK_CB(skb).sk, id);
 916                if (IS_ERR(target)) {
 917                        NL_SET_BAD_ATTR(extack, tb[NETNSA_TARGET_NSID]);
 918                        NL_SET_ERR_MSG(extack,
 919                                       "Target netns reference is invalid");
 920                        err = PTR_ERR(target);
 921                        goto out;
 922                }
 923                fillargs.add_ref = true;
 924                fillargs.ref_nsid = peernet2id(net, peer);
 925        }
 926
 927        msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
 928        if (!msg) {
 929                err = -ENOMEM;
 930                goto out;
 931        }
 932
 933        fillargs.nsid = peernet2id(target, peer);
 934        err = rtnl_net_fill(msg, &fillargs);
 935        if (err < 0)
 936                goto err_out;
 937
 938        err = rtnl_unicast(msg, net, NETLINK_CB(skb).portid);
 939        goto out;
 940
 941err_out:
 942        nlmsg_free(msg);
 943out:
 944        if (fillargs.add_ref)
 945                put_net(target);
 946        put_net(peer);
 947        return err;
 948}
 949
 950struct rtnl_net_dump_cb {
 951        struct net *tgt_net;
 952        struct net *ref_net;
 953        struct sk_buff *skb;
 954        struct net_fill_args fillargs;
 955        int idx;
 956        int s_idx;
 957};
 958
 959/* Runs in RCU-critical section. */
 960static int rtnl_net_dumpid_one(int id, void *peer, void *data)
 961{
 962        struct rtnl_net_dump_cb *net_cb = (struct rtnl_net_dump_cb *)data;
 963        int ret;
 964
 965        if (net_cb->idx < net_cb->s_idx)
 966                goto cont;
 967
 968        net_cb->fillargs.nsid = id;
 969        if (net_cb->fillargs.add_ref)
 970                net_cb->fillargs.ref_nsid = __peernet2id(net_cb->ref_net, peer);
 971        ret = rtnl_net_fill(net_cb->skb, &net_cb->fillargs);
 972        if (ret < 0)
 973                return ret;
 974
 975cont:
 976        net_cb->idx++;
 977        return 0;
 978}
 979
 980static int rtnl_valid_dump_net_req(const struct nlmsghdr *nlh, struct sock *sk,
 981                                   struct rtnl_net_dump_cb *net_cb,
 982                                   struct netlink_callback *cb)
 983{
 984        struct netlink_ext_ack *extack = cb->extack;
 985        struct nlattr *tb[NETNSA_MAX + 1];
 986        int err, i;
 987
 988        err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct rtgenmsg), tb,
 989                                            NETNSA_MAX, rtnl_net_policy,
 990                                            extack);
 991        if (err < 0)
 992                return err;
 993
 994        for (i = 0; i <= NETNSA_MAX; i++) {
 995                if (!tb[i])
 996                        continue;
 997
 998                if (i == NETNSA_TARGET_NSID) {
 999                        struct net *net;
1000
1001                        net = rtnl_get_net_ns_capable(sk, nla_get_s32(tb[i]));
1002                        if (IS_ERR(net)) {
1003                                NL_SET_BAD_ATTR(extack, tb[i]);
1004                                NL_SET_ERR_MSG(extack,
1005                                               "Invalid target network namespace id");
1006                                return PTR_ERR(net);
1007                        }
1008                        net_cb->fillargs.add_ref = true;
1009                        net_cb->ref_net = net_cb->tgt_net;
1010                        net_cb->tgt_net = net;
1011                } else {
1012                        NL_SET_BAD_ATTR(extack, tb[i]);
1013                        NL_SET_ERR_MSG(extack,
1014                                       "Unsupported attribute in dump request");
1015                        return -EINVAL;
1016                }
1017        }
1018
1019        return 0;
1020}
1021
1022static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
1023{
1024        struct rtnl_net_dump_cb net_cb = {
1025                .tgt_net = sock_net(skb->sk),
1026                .skb = skb,
1027                .fillargs = {
1028                        .portid = NETLINK_CB(cb->skb).portid,
1029                        .seq = cb->nlh->nlmsg_seq,
1030                        .flags = NLM_F_MULTI,
1031                        .cmd = RTM_NEWNSID,
1032                },
1033                .idx = 0,
1034                .s_idx = cb->args[0],
1035        };
1036        int err = 0;
1037
1038        if (cb->strict_check) {
1039                err = rtnl_valid_dump_net_req(cb->nlh, skb->sk, &net_cb, cb);
1040                if (err < 0)
1041                        goto end;
1042        }
1043
1044        rcu_read_lock();
1045        idr_for_each(&net_cb.tgt_net->netns_ids, rtnl_net_dumpid_one, &net_cb);
1046        rcu_read_unlock();
1047
1048        cb->args[0] = net_cb.idx;
1049end:
1050        if (net_cb.fillargs.add_ref)
1051                put_net(net_cb.tgt_net);
1052        return err < 0 ? err : skb->len;
1053}
1054
1055static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid,
1056                              struct nlmsghdr *nlh, gfp_t gfp)
1057{
1058        struct net_fill_args fillargs = {
1059                .portid = portid,
1060                .seq = nlh ? nlh->nlmsg_seq : 0,
1061                .cmd = cmd,
1062                .nsid = id,
1063        };
1064        struct sk_buff *msg;
1065        int err = -ENOMEM;
1066
1067        msg = nlmsg_new(rtnl_net_get_size(), gfp);
1068        if (!msg)
1069                goto out;
1070
1071        err = rtnl_net_fill(msg, &fillargs);
1072        if (err < 0)
1073                goto err_out;
1074
1075        rtnl_notify(msg, net, portid, RTNLGRP_NSID, nlh, gfp);
1076        return;
1077
1078err_out:
1079        nlmsg_free(msg);
1080out:
1081        rtnl_set_sk_err(net, RTNLGRP_NSID, err);
1082}
1083
1084void __init net_ns_init(void)
1085{
1086        struct net_generic *ng;
1087
1088#ifdef CONFIG_NET_NS
1089        net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
1090                                        SMP_CACHE_BYTES,
1091                                        SLAB_PANIC|SLAB_ACCOUNT, NULL);
1092
1093        /* Create workqueue for cleanup */
1094        netns_wq = create_singlethread_workqueue("netns");
1095        if (!netns_wq)
1096                panic("Could not create netns workq");
1097#endif
1098
1099        ng = net_alloc_generic();
1100        if (!ng)
1101                panic("Could not allocate generic netns");
1102
1103        rcu_assign_pointer(init_net.gen, ng);
1104
1105#ifdef CONFIG_KEYS
1106        init_net.key_domain = &init_net_key_domain;
1107#endif
1108        down_write(&pernet_ops_rwsem);
1109        if (setup_net(&init_net, &init_user_ns))
1110                panic("Could not setup the initial network namespace");
1111
1112        init_net_initialized = true;
1113        up_write(&pernet_ops_rwsem);
1114
1115        if (register_pernet_subsys(&net_ns_ops))
1116                panic("Could not register network namespace subsystems");
1117
1118        rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL,
1119                      RTNL_FLAG_DOIT_UNLOCKED);
1120        rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,
1121                      RTNL_FLAG_DOIT_UNLOCKED);
1122}
1123
1124static void free_exit_list(struct pernet_operations *ops, struct list_head *net_exit_list)
1125{
1126        ops_pre_exit_list(ops, net_exit_list);
1127        synchronize_rcu();
1128        ops_exit_list(ops, net_exit_list);
1129        ops_free_list(ops, net_exit_list);
1130}
1131
1132#ifdef CONFIG_NET_NS
1133static int __register_pernet_operations(struct list_head *list,
1134                                        struct pernet_operations *ops)
1135{
1136        struct net *net;
1137        int error;
1138        LIST_HEAD(net_exit_list);
1139
1140        list_add_tail(&ops->list, list);
1141        if (ops->init || (ops->id && ops->size)) {
1142                /* We held write locked pernet_ops_rwsem, and parallel
1143                 * setup_net() and cleanup_net() are not possible.
1144                 */
1145                for_each_net(net) {
1146                        error = ops_init(ops, net);
1147                        if (error)
1148                                goto out_undo;
1149                        list_add_tail(&net->exit_list, &net_exit_list);
1150                }
1151        }
1152        return 0;
1153
1154out_undo:
1155        /* If I have an error cleanup all namespaces I initialized */
1156        list_del(&ops->list);
1157        free_exit_list(ops, &net_exit_list);
1158        return error;
1159}
1160
1161static void __unregister_pernet_operations(struct pernet_operations *ops)
1162{
1163        struct net *net;
1164        LIST_HEAD(net_exit_list);
1165
1166        list_del(&ops->list);
1167        /* See comment in __register_pernet_operations() */
1168        for_each_net(net)
1169                list_add_tail(&net->exit_list, &net_exit_list);
1170
1171        free_exit_list(ops, &net_exit_list);
1172}
1173
1174#else
1175
1176static int __register_pernet_operations(struct list_head *list,
1177                                        struct pernet_operations *ops)
1178{
1179        if (!init_net_initialized) {
1180                list_add_tail(&ops->list, list);
1181                return 0;
1182        }
1183
1184        return ops_init(ops, &init_net);
1185}
1186
1187static void __unregister_pernet_operations(struct pernet_operations *ops)
1188{
1189        if (!init_net_initialized) {
1190                list_del(&ops->list);
1191        } else {
1192                LIST_HEAD(net_exit_list);
1193                list_add(&init_net.exit_list, &net_exit_list);
1194                free_exit_list(ops, &net_exit_list);
1195        }
1196}
1197
1198#endif /* CONFIG_NET_NS */
1199
1200static DEFINE_IDA(net_generic_ids);
1201
1202static int register_pernet_operations(struct list_head *list,
1203                                      struct pernet_operations *ops)
1204{
1205        int error;
1206
1207        if (ops->id) {
1208                error = ida_alloc_min(&net_generic_ids, MIN_PERNET_OPS_ID,
1209                                GFP_KERNEL);
1210                if (error < 0)
1211                        return error;
1212                *ops->id = error;
1213                max_gen_ptrs = max(max_gen_ptrs, *ops->id + 1);
1214        }
1215        error = __register_pernet_operations(list, ops);
1216        if (error) {
1217                rcu_barrier();
1218                if (ops->id)
1219                        ida_free(&net_generic_ids, *ops->id);
1220        }
1221
1222        return error;
1223}
1224
1225static void unregister_pernet_operations(struct pernet_operations *ops)
1226{
1227        __unregister_pernet_operations(ops);
1228        rcu_barrier();
1229        if (ops->id)
1230                ida_free(&net_generic_ids, *ops->id);
1231}
1232
1233/**
1234 *      register_pernet_subsys - register a network namespace subsystem
1235 *      @ops:  pernet operations structure for the subsystem
1236 *
1237 *      Register a subsystem which has init and exit functions
1238 *      that are called when network namespaces are created and
1239 *      destroyed respectively.
1240 *
1241 *      When registered all network namespace init functions are
1242 *      called for every existing network namespace.  Allowing kernel
1243 *      modules to have a race free view of the set of network namespaces.
1244 *
1245 *      When a new network namespace is created all of the init
1246 *      methods are called in the order in which they were registered.
1247 *
1248 *      When a network namespace is destroyed all of the exit methods
1249 *      are called in the reverse of the order with which they were
1250 *      registered.
1251 */
1252int register_pernet_subsys(struct pernet_operations *ops)
1253{
1254        int error;
1255        down_write(&pernet_ops_rwsem);
1256        error =  register_pernet_operations(first_device, ops);
1257        up_write(&pernet_ops_rwsem);
1258        return error;
1259}
1260EXPORT_SYMBOL_GPL(register_pernet_subsys);
1261
1262/**
1263 *      unregister_pernet_subsys - unregister a network namespace subsystem
1264 *      @ops: pernet operations structure to manipulate
1265 *
1266 *      Remove the pernet operations structure from the list to be
1267 *      used when network namespaces are created or destroyed.  In
1268 *      addition run the exit method for all existing network
1269 *      namespaces.
1270 */
1271void unregister_pernet_subsys(struct pernet_operations *ops)
1272{
1273        down_write(&pernet_ops_rwsem);
1274        unregister_pernet_operations(ops);
1275        up_write(&pernet_ops_rwsem);
1276}
1277EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
1278
1279/**
1280 *      register_pernet_device - register a network namespace device
1281 *      @ops:  pernet operations structure for the subsystem
1282 *
1283 *      Register a device which has init and exit functions
1284 *      that are called when network namespaces are created and
1285 *      destroyed respectively.
1286 *
1287 *      When registered all network namespace init functions are
1288 *      called for every existing network namespace.  Allowing kernel
1289 *      modules to have a race free view of the set of network namespaces.
1290 *
1291 *      When a new network namespace is created all of the init
1292 *      methods are called in the order in which they were registered.
1293 *
1294 *      When a network namespace is destroyed all of the exit methods
1295 *      are called in the reverse of the order with which they were
1296 *      registered.
1297 */
1298int register_pernet_device(struct pernet_operations *ops)
1299{
1300        int error;
1301        down_write(&pernet_ops_rwsem);
1302        error = register_pernet_operations(&pernet_list, ops);
1303        if (!error && (first_device == &pernet_list))
1304                first_device = &ops->list;
1305        up_write(&pernet_ops_rwsem);
1306        return error;
1307}
1308EXPORT_SYMBOL_GPL(register_pernet_device);
1309
1310/**
1311 *      unregister_pernet_device - unregister a network namespace netdevice
1312 *      @ops: pernet operations structure to manipulate
1313 *
1314 *      Remove the pernet operations structure from the list to be
1315 *      used when network namespaces are created or destroyed.  In
1316 *      addition run the exit method for all existing network
1317 *      namespaces.
1318 */
1319void unregister_pernet_device(struct pernet_operations *ops)
1320{
1321        down_write(&pernet_ops_rwsem);
1322        if (&ops->list == first_device)
1323                first_device = first_device->next;
1324        unregister_pernet_operations(ops);
1325        up_write(&pernet_ops_rwsem);
1326}
1327EXPORT_SYMBOL_GPL(unregister_pernet_device);
1328
1329#ifdef CONFIG_NET_NS
1330static struct ns_common *netns_get(struct task_struct *task)
1331{
1332        struct net *net = NULL;
1333        struct nsproxy *nsproxy;
1334
1335        task_lock(task);
1336        nsproxy = task->nsproxy;
1337        if (nsproxy)
1338                net = get_net(nsproxy->net_ns);
1339        task_unlock(task);
1340
1341        return net ? &net->ns : NULL;
1342}
1343
1344static inline struct net *to_net_ns(struct ns_common *ns)
1345{
1346        return container_of(ns, struct net, ns);
1347}
1348
1349static void netns_put(struct ns_common *ns)
1350{
1351        put_net(to_net_ns(ns));
1352}
1353
1354static int netns_install(struct nsset *nsset, struct ns_common *ns)
1355{
1356        struct nsproxy *nsproxy = nsset->nsproxy;
1357        struct net *net = to_net_ns(ns);
1358
1359        if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) ||
1360            !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
1361                return -EPERM;
1362
1363        put_net(nsproxy->net_ns);
1364        nsproxy->net_ns = get_net(net);
1365        return 0;
1366}
1367
1368static struct user_namespace *netns_owner(struct ns_common *ns)
1369{
1370        return to_net_ns(ns)->user_ns;
1371}
1372
1373const struct proc_ns_operations netns_operations = {
1374        .name           = "net",
1375        .type           = CLONE_NEWNET,
1376        .get            = netns_get,
1377        .put            = netns_put,
1378        .install        = netns_install,
1379        .owner          = netns_owner,
1380};
1381#endif
1382