linux/net/core/net_namespace.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   3
   4#include <linux/workqueue.h>
   5#include <linux/rtnetlink.h>
   6#include <linux/cache.h>
   7#include <linux/slab.h>
   8#include <linux/list.h>
   9#include <linux/delay.h>
  10#include <linux/sched.h>
  11#include <linux/idr.h>
  12#include <linux/rculist.h>
  13#include <linux/nsproxy.h>
  14#include <linux/fs.h>
  15#include <linux/proc_ns.h>
  16#include <linux/file.h>
  17#include <linux/export.h>
  18#include <linux/user_namespace.h>
  19#include <linux/net_namespace.h>
  20#include <linux/sched/task.h>
  21#include <linux/uidgid.h>
  22#include <linux/cookie.h>
  23
  24#include <net/sock.h>
  25#include <net/netlink.h>
  26#include <net/net_namespace.h>
  27#include <net/netns/generic.h>
  28
  29/*
  30 *      Our network namespace constructor/destructor lists
  31 */
  32
  33static LIST_HEAD(pernet_list);
  34static struct list_head *first_device = &pernet_list;
  35
  36LIST_HEAD(net_namespace_list);
  37EXPORT_SYMBOL_GPL(net_namespace_list);
  38
  39/* Protects net_namespace_list. Nests iside rtnl_lock() */
  40DECLARE_RWSEM(net_rwsem);
  41EXPORT_SYMBOL_GPL(net_rwsem);
  42
  43#ifdef CONFIG_KEYS
  44static struct key_tag init_net_key_domain = { .usage = REFCOUNT_INIT(1) };
  45#endif
  46
  47struct net init_net = {
  48        .count          = REFCOUNT_INIT(1),
  49        .dev_base_head  = LIST_HEAD_INIT(init_net.dev_base_head),
  50#ifdef CONFIG_KEYS
  51        .key_domain     = &init_net_key_domain,
  52#endif
  53};
  54EXPORT_SYMBOL(init_net);
  55
  56static bool init_net_initialized;
  57/*
  58 * pernet_ops_rwsem: protects: pernet_list, net_generic_ids,
  59 * init_net_initialized and first_device pointer.
  60 * This is internal net namespace object. Please, don't use it
  61 * outside.
  62 */
  63DECLARE_RWSEM(pernet_ops_rwsem);
  64EXPORT_SYMBOL_GPL(pernet_ops_rwsem);
  65
  66#define MIN_PERNET_OPS_ID       \
  67        ((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *))
  68
  69#define INITIAL_NET_GEN_PTRS    13 /* +1 for len +2 for rcu_head */
  70
  71static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
  72
  73DEFINE_COOKIE(net_cookie);
  74
  75u64 __net_gen_cookie(struct net *net)
  76{
  77        while (1) {
  78                u64 res = atomic64_read(&net->net_cookie);
  79
  80                if (res)
  81                        return res;
  82                res = gen_cookie_next(&net_cookie);
  83                atomic64_cmpxchg(&net->net_cookie, 0, res);
  84        }
  85}
  86
  87static struct net_generic *net_alloc_generic(void)
  88{
  89        struct net_generic *ng;
  90        unsigned int generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);
  91
  92        ng = kzalloc(generic_size, GFP_KERNEL);
  93        if (ng)
  94                ng->s.len = max_gen_ptrs;
  95
  96        return ng;
  97}
  98
  99static int net_assign_generic(struct net *net, unsigned int id, void *data)
 100{
 101        struct net_generic *ng, *old_ng;
 102
 103        BUG_ON(id < MIN_PERNET_OPS_ID);
 104
 105        old_ng = rcu_dereference_protected(net->gen,
 106                                           lockdep_is_held(&pernet_ops_rwsem));
 107        if (old_ng->s.len > id) {
 108                old_ng->ptr[id] = data;
 109                return 0;
 110        }
 111
 112        ng = net_alloc_generic();
 113        if (ng == NULL)
 114                return -ENOMEM;
 115
 116        /*
 117         * Some synchronisation notes:
 118         *
 119         * The net_generic explores the net->gen array inside rcu
 120         * read section. Besides once set the net->gen->ptr[x]
 121         * pointer never changes (see rules in netns/generic.h).
 122         *
 123         * That said, we simply duplicate this array and schedule
 124         * the old copy for kfree after a grace period.
 125         */
 126
 127        memcpy(&ng->ptr[MIN_PERNET_OPS_ID], &old_ng->ptr[MIN_PERNET_OPS_ID],
 128               (old_ng->s.len - MIN_PERNET_OPS_ID) * sizeof(void *));
 129        ng->ptr[id] = data;
 130
 131        rcu_assign_pointer(net->gen, ng);
 132        kfree_rcu(old_ng, s.rcu);
 133        return 0;
 134}
 135
 136static int ops_init(const struct pernet_operations *ops, struct net *net)
 137{
 138        int err = -ENOMEM;
 139        void *data = NULL;
 140
 141        if (ops->id && ops->size) {
 142                data = kzalloc(ops->size, GFP_KERNEL);
 143                if (!data)
 144                        goto out;
 145
 146                err = net_assign_generic(net, *ops->id, data);
 147                if (err)
 148                        goto cleanup;
 149        }
 150        err = 0;
 151        if (ops->init)
 152                err = ops->init(net);
 153        if (!err)
 154                return 0;
 155
 156cleanup:
 157        kfree(data);
 158
 159out:
 160        return err;
 161}
 162
 163static void ops_free(const struct pernet_operations *ops, struct net *net)
 164{
 165        if (ops->id && ops->size) {
 166                kfree(net_generic(net, *ops->id));
 167        }
 168}
 169
 170static void ops_pre_exit_list(const struct pernet_operations *ops,
 171                              struct list_head *net_exit_list)
 172{
 173        struct net *net;
 174
 175        if (ops->pre_exit) {
 176                list_for_each_entry(net, net_exit_list, exit_list)
 177                        ops->pre_exit(net);
 178        }
 179}
 180
 181static void ops_exit_list(const struct pernet_operations *ops,
 182                          struct list_head *net_exit_list)
 183{
 184        struct net *net;
 185        if (ops->exit) {
 186                list_for_each_entry(net, net_exit_list, exit_list)
 187                        ops->exit(net);
 188        }
 189        if (ops->exit_batch)
 190                ops->exit_batch(net_exit_list);
 191}
 192
 193static void ops_free_list(const struct pernet_operations *ops,
 194                          struct list_head *net_exit_list)
 195{
 196        struct net *net;
 197        if (ops->size && ops->id) {
 198                list_for_each_entry(net, net_exit_list, exit_list)
 199                        ops_free(ops, net);
 200        }
 201}
 202
 203/* should be called with nsid_lock held */
 204static int alloc_netid(struct net *net, struct net *peer, int reqid)
 205{
 206        int min = 0, max = 0;
 207
 208        if (reqid >= 0) {
 209                min = reqid;
 210                max = reqid + 1;
 211        }
 212
 213        return idr_alloc(&net->netns_ids, peer, min, max, GFP_ATOMIC);
 214}
 215
 216/* This function is used by idr_for_each(). If net is equal to peer, the
 217 * function returns the id so that idr_for_each() stops. Because we cannot
 218 * returns the id 0 (idr_for_each() will not stop), we return the magic value
 219 * NET_ID_ZERO (-1) for it.
 220 */
 221#define NET_ID_ZERO -1
 222static int net_eq_idr(int id, void *net, void *peer)
 223{
 224        if (net_eq(net, peer))
 225                return id ? : NET_ID_ZERO;
 226        return 0;
 227}
 228
 229/* Must be called from RCU-critical section or with nsid_lock held */
 230static int __peernet2id(const struct net *net, struct net *peer)
 231{
 232        int id = idr_for_each(&net->netns_ids, net_eq_idr, peer);
 233
 234        /* Magic value for id 0. */
 235        if (id == NET_ID_ZERO)
 236                return 0;
 237        if (id > 0)
 238                return id;
 239
 240        return NETNSA_NSID_NOT_ASSIGNED;
 241}
 242
 243static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid,
 244                              struct nlmsghdr *nlh, gfp_t gfp);
 245/* This function returns the id of a peer netns. If no id is assigned, one will
 246 * be allocated and returned.
 247 */
 248int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp)
 249{
 250        int id;
 251
 252        if (refcount_read(&net->count) == 0)
 253                return NETNSA_NSID_NOT_ASSIGNED;
 254
 255        spin_lock_bh(&net->nsid_lock);
 256        id = __peernet2id(net, peer);
 257        if (id >= 0) {
 258                spin_unlock_bh(&net->nsid_lock);
 259                return id;
 260        }
 261
 262        /* When peer is obtained from RCU lists, we may race with
 263         * its cleanup. Check whether it's alive, and this guarantees
 264         * we never hash a peer back to net->netns_ids, after it has
 265         * just been idr_remove()'d from there in cleanup_net().
 266         */
 267        if (!maybe_get_net(peer)) {
 268                spin_unlock_bh(&net->nsid_lock);
 269                return NETNSA_NSID_NOT_ASSIGNED;
 270        }
 271
 272        id = alloc_netid(net, peer, -1);
 273        spin_unlock_bh(&net->nsid_lock);
 274
 275        put_net(peer);
 276        if (id < 0)
 277                return NETNSA_NSID_NOT_ASSIGNED;
 278
 279        rtnl_net_notifyid(net, RTM_NEWNSID, id, 0, NULL, gfp);
 280
 281        return id;
 282}
 283EXPORT_SYMBOL_GPL(peernet2id_alloc);
 284
 285/* This function returns, if assigned, the id of a peer netns. */
 286int peernet2id(const struct net *net, struct net *peer)
 287{
 288        int id;
 289
 290        rcu_read_lock();
 291        id = __peernet2id(net, peer);
 292        rcu_read_unlock();
 293
 294        return id;
 295}
 296EXPORT_SYMBOL(peernet2id);
 297
 298/* This function returns true is the peer netns has an id assigned into the
 299 * current netns.
 300 */
 301bool peernet_has_id(const struct net *net, struct net *peer)
 302{
 303        return peernet2id(net, peer) >= 0;
 304}
 305
 306struct net *get_net_ns_by_id(const struct net *net, int id)
 307{
 308        struct net *peer;
 309
 310        if (id < 0)
 311                return NULL;
 312
 313        rcu_read_lock();
 314        peer = idr_find(&net->netns_ids, id);
 315        if (peer)
 316                peer = maybe_get_net(peer);
 317        rcu_read_unlock();
 318
 319        return peer;
 320}
 321
 322/*
 323 * setup_net runs the initializers for the network namespace object.
 324 */
 325static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
 326{
 327        /* Must be called with pernet_ops_rwsem held */
 328        const struct pernet_operations *ops, *saved_ops;
 329        int error = 0;
 330        LIST_HEAD(net_exit_list);
 331
 332        refcount_set(&net->count, 1);
 333        refcount_set(&net->passive, 1);
 334        get_random_bytes(&net->hash_mix, sizeof(u32));
 335        net->dev_base_seq = 1;
 336        net->user_ns = user_ns;
 337        idr_init(&net->netns_ids);
 338        spin_lock_init(&net->nsid_lock);
 339        mutex_init(&net->ipv4.ra_mutex);
 340
 341        list_for_each_entry(ops, &pernet_list, list) {
 342                error = ops_init(ops, net);
 343                if (error < 0)
 344                        goto out_undo;
 345        }
 346        down_write(&net_rwsem);
 347        list_add_tail_rcu(&net->list, &net_namespace_list);
 348        up_write(&net_rwsem);
 349out:
 350        return error;
 351
 352out_undo:
 353        /* Walk through the list backwards calling the exit functions
 354         * for the pernet modules whose init functions did not fail.
 355         */
 356        list_add(&net->exit_list, &net_exit_list);
 357        saved_ops = ops;
 358        list_for_each_entry_continue_reverse(ops, &pernet_list, list)
 359                ops_pre_exit_list(ops, &net_exit_list);
 360
 361        synchronize_rcu();
 362
 363        ops = saved_ops;
 364        list_for_each_entry_continue_reverse(ops, &pernet_list, list)
 365                ops_exit_list(ops, &net_exit_list);
 366
 367        ops = saved_ops;
 368        list_for_each_entry_continue_reverse(ops, &pernet_list, list)
 369                ops_free_list(ops, &net_exit_list);
 370
 371        rcu_barrier();
 372        goto out;
 373}
 374
 375static int __net_init net_defaults_init_net(struct net *net)
 376{
 377        net->core.sysctl_somaxconn = SOMAXCONN;
 378        return 0;
 379}
 380
 381static struct pernet_operations net_defaults_ops = {
 382        .init = net_defaults_init_net,
 383};
 384
 385static __init int net_defaults_init(void)
 386{
 387        if (register_pernet_subsys(&net_defaults_ops))
 388                panic("Cannot initialize net default settings");
 389
 390        return 0;
 391}
 392
 393core_initcall(net_defaults_init);
 394
 395#ifdef CONFIG_NET_NS
 396static struct ucounts *inc_net_namespaces(struct user_namespace *ns)
 397{
 398        return inc_ucount(ns, current_euid(), UCOUNT_NET_NAMESPACES);
 399}
 400
 401static void dec_net_namespaces(struct ucounts *ucounts)
 402{
 403        dec_ucount(ucounts, UCOUNT_NET_NAMESPACES);
 404}
 405
 406static struct kmem_cache *net_cachep __ro_after_init;
 407static struct workqueue_struct *netns_wq;
 408
 409static struct net *net_alloc(void)
 410{
 411        struct net *net = NULL;
 412        struct net_generic *ng;
 413
 414        ng = net_alloc_generic();
 415        if (!ng)
 416                goto out;
 417
 418        net = kmem_cache_zalloc(net_cachep, GFP_KERNEL);
 419        if (!net)
 420                goto out_free;
 421
 422#ifdef CONFIG_KEYS
 423        net->key_domain = kzalloc(sizeof(struct key_tag), GFP_KERNEL);
 424        if (!net->key_domain)
 425                goto out_free_2;
 426        refcount_set(&net->key_domain->usage, 1);
 427#endif
 428
 429        rcu_assign_pointer(net->gen, ng);
 430out:
 431        return net;
 432
 433#ifdef CONFIG_KEYS
 434out_free_2:
 435        kmem_cache_free(net_cachep, net);
 436        net = NULL;
 437#endif
 438out_free:
 439        kfree(ng);
 440        goto out;
 441}
 442
 443static void net_free(struct net *net)
 444{
 445        kfree(rcu_access_pointer(net->gen));
 446        kmem_cache_free(net_cachep, net);
 447}
 448
 449void net_drop_ns(void *p)
 450{
 451        struct net *ns = p;
 452        if (ns && refcount_dec_and_test(&ns->passive))
 453                net_free(ns);
 454}
 455
 456struct net *copy_net_ns(unsigned long flags,
 457                        struct user_namespace *user_ns, struct net *old_net)
 458{
 459        struct ucounts *ucounts;
 460        struct net *net;
 461        int rv;
 462
 463        if (!(flags & CLONE_NEWNET))
 464                return get_net(old_net);
 465
 466        ucounts = inc_net_namespaces(user_ns);
 467        if (!ucounts)
 468                return ERR_PTR(-ENOSPC);
 469
 470        net = net_alloc();
 471        if (!net) {
 472                rv = -ENOMEM;
 473                goto dec_ucounts;
 474        }
 475        refcount_set(&net->passive, 1);
 476        net->ucounts = ucounts;
 477        get_user_ns(user_ns);
 478
 479        rv = down_read_killable(&pernet_ops_rwsem);
 480        if (rv < 0)
 481                goto put_userns;
 482
 483        rv = setup_net(net, user_ns);
 484
 485        up_read(&pernet_ops_rwsem);
 486
 487        if (rv < 0) {
 488put_userns:
 489                key_remove_domain(net->key_domain);
 490                put_user_ns(user_ns);
 491                net_drop_ns(net);
 492dec_ucounts:
 493                dec_net_namespaces(ucounts);
 494                return ERR_PTR(rv);
 495        }
 496        return net;
 497}
 498
 499/**
 500 * net_ns_get_ownership - get sysfs ownership data for @net
 501 * @net: network namespace in question (can be NULL)
 502 * @uid: kernel user ID for sysfs objects
 503 * @gid: kernel group ID for sysfs objects
 504 *
 505 * Returns the uid/gid pair of root in the user namespace associated with the
 506 * given network namespace.
 507 */
 508void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid)
 509{
 510        if (net) {
 511                kuid_t ns_root_uid = make_kuid(net->user_ns, 0);
 512                kgid_t ns_root_gid = make_kgid(net->user_ns, 0);
 513
 514                if (uid_valid(ns_root_uid))
 515                        *uid = ns_root_uid;
 516
 517                if (gid_valid(ns_root_gid))
 518                        *gid = ns_root_gid;
 519        } else {
 520                *uid = GLOBAL_ROOT_UID;
 521                *gid = GLOBAL_ROOT_GID;
 522        }
 523}
 524EXPORT_SYMBOL_GPL(net_ns_get_ownership);
 525
 526static void unhash_nsid(struct net *net, struct net *last)
 527{
 528        struct net *tmp;
 529        /* This function is only called from cleanup_net() work,
 530         * and this work is the only process, that may delete
 531         * a net from net_namespace_list. So, when the below
 532         * is executing, the list may only grow. Thus, we do not
 533         * use for_each_net_rcu() or net_rwsem.
 534         */
 535        for_each_net(tmp) {
 536                int id;
 537
 538                spin_lock_bh(&tmp->nsid_lock);
 539                id = __peernet2id(tmp, net);
 540                if (id >= 0)
 541                        idr_remove(&tmp->netns_ids, id);
 542                spin_unlock_bh(&tmp->nsid_lock);
 543                if (id >= 0)
 544                        rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL,
 545                                          GFP_KERNEL);
 546                if (tmp == last)
 547                        break;
 548        }
 549        spin_lock_bh(&net->nsid_lock);
 550        idr_destroy(&net->netns_ids);
 551        spin_unlock_bh(&net->nsid_lock);
 552}
 553
 554static LLIST_HEAD(cleanup_list);
 555
 556static void cleanup_net(struct work_struct *work)
 557{
 558        const struct pernet_operations *ops;
 559        struct net *net, *tmp, *last;
 560        struct llist_node *net_kill_list;
 561        LIST_HEAD(net_exit_list);
 562
 563        /* Atomically snapshot the list of namespaces to cleanup */
 564        net_kill_list = llist_del_all(&cleanup_list);
 565
 566        down_read(&pernet_ops_rwsem);
 567
 568        /* Don't let anyone else find us. */
 569        down_write(&net_rwsem);
 570        llist_for_each_entry(net, net_kill_list, cleanup_list)
 571                list_del_rcu(&net->list);
 572        /* Cache last net. After we unlock rtnl, no one new net
 573         * added to net_namespace_list can assign nsid pointer
 574         * to a net from net_kill_list (see peernet2id_alloc()).
 575         * So, we skip them in unhash_nsid().
 576         *
 577         * Note, that unhash_nsid() does not delete nsid links
 578         * between net_kill_list's nets, as they've already
 579         * deleted from net_namespace_list. But, this would be
 580         * useless anyway, as netns_ids are destroyed there.
 581         */
 582        last = list_last_entry(&net_namespace_list, struct net, list);
 583        up_write(&net_rwsem);
 584
 585        llist_for_each_entry(net, net_kill_list, cleanup_list) {
 586                unhash_nsid(net, last);
 587                list_add_tail(&net->exit_list, &net_exit_list);
 588        }
 589
 590        /* Run all of the network namespace pre_exit methods */
 591        list_for_each_entry_reverse(ops, &pernet_list, list)
 592                ops_pre_exit_list(ops, &net_exit_list);
 593
 594        /*
 595         * Another CPU might be rcu-iterating the list, wait for it.
 596         * This needs to be before calling the exit() notifiers, so
 597         * the rcu_barrier() below isn't sufficient alone.
 598         * Also the pre_exit() and exit() methods need this barrier.
 599         */
 600        synchronize_rcu();
 601
 602        /* Run all of the network namespace exit methods */
 603        list_for_each_entry_reverse(ops, &pernet_list, list)
 604                ops_exit_list(ops, &net_exit_list);
 605
 606        /* Free the net generic variables */
 607        list_for_each_entry_reverse(ops, &pernet_list, list)
 608                ops_free_list(ops, &net_exit_list);
 609
 610        up_read(&pernet_ops_rwsem);
 611
 612        /* Ensure there are no outstanding rcu callbacks using this
 613         * network namespace.
 614         */
 615        rcu_barrier();
 616
 617        /* Finally it is safe to free my network namespace structure */
 618        list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
 619                list_del_init(&net->exit_list);
 620                dec_net_namespaces(net->ucounts);
 621                key_remove_domain(net->key_domain);
 622                put_user_ns(net->user_ns);
 623                net_drop_ns(net);
 624        }
 625}
 626
 627/**
 628 * net_ns_barrier - wait until concurrent net_cleanup_work is done
 629 *
 630 * cleanup_net runs from work queue and will first remove namespaces
 631 * from the global list, then run net exit functions.
 632 *
 633 * Call this in module exit path to make sure that all netns
 634 * ->exit ops have been invoked before the function is removed.
 635 */
 636void net_ns_barrier(void)
 637{
 638        down_write(&pernet_ops_rwsem);
 639        up_write(&pernet_ops_rwsem);
 640}
 641EXPORT_SYMBOL(net_ns_barrier);
 642
 643static DECLARE_WORK(net_cleanup_work, cleanup_net);
 644
 645void __put_net(struct net *net)
 646{
 647        /* Cleanup the network namespace in process context */
 648        if (llist_add(&net->cleanup_list, &cleanup_list))
 649                queue_work(netns_wq, &net_cleanup_work);
 650}
 651EXPORT_SYMBOL_GPL(__put_net);
 652
 653struct net *get_net_ns_by_fd(int fd)
 654{
 655        struct file *file;
 656        struct ns_common *ns;
 657        struct net *net;
 658
 659        file = proc_ns_fget(fd);
 660        if (IS_ERR(file))
 661                return ERR_CAST(file);
 662
 663        ns = get_proc_ns(file_inode(file));
 664        if (ns->ops == &netns_operations)
 665                net = get_net(container_of(ns, struct net, ns));
 666        else
 667                net = ERR_PTR(-EINVAL);
 668
 669        fput(file);
 670        return net;
 671}
 672
 673#else
 674struct net *get_net_ns_by_fd(int fd)
 675{
 676        return ERR_PTR(-EINVAL);
 677}
 678#endif
 679EXPORT_SYMBOL_GPL(get_net_ns_by_fd);
 680
 681struct net *get_net_ns_by_pid(pid_t pid)
 682{
 683        struct task_struct *tsk;
 684        struct net *net;
 685
 686        /* Lookup the network namespace */
 687        net = ERR_PTR(-ESRCH);
 688        rcu_read_lock();
 689        tsk = find_task_by_vpid(pid);
 690        if (tsk) {
 691                struct nsproxy *nsproxy;
 692                task_lock(tsk);
 693                nsproxy = tsk->nsproxy;
 694                if (nsproxy)
 695                        net = get_net(nsproxy->net_ns);
 696                task_unlock(tsk);
 697        }
 698        rcu_read_unlock();
 699        return net;
 700}
 701EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
 702
 703static __net_init int net_ns_net_init(struct net *net)
 704{
 705#ifdef CONFIG_NET_NS
 706        net->ns.ops = &netns_operations;
 707#endif
 708        return ns_alloc_inum(&net->ns);
 709}
 710
 711static __net_exit void net_ns_net_exit(struct net *net)
 712{
 713        ns_free_inum(&net->ns);
 714}
 715
 716static struct pernet_operations __net_initdata net_ns_ops = {
 717        .init = net_ns_net_init,
 718        .exit = net_ns_net_exit,
 719};
 720
 721static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
 722        [NETNSA_NONE]           = { .type = NLA_UNSPEC },
 723        [NETNSA_NSID]           = { .type = NLA_S32 },
 724        [NETNSA_PID]            = { .type = NLA_U32 },
 725        [NETNSA_FD]             = { .type = NLA_U32 },
 726        [NETNSA_TARGET_NSID]    = { .type = NLA_S32 },
 727};
 728
 729static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh,
 730                          struct netlink_ext_ack *extack)
 731{
 732        struct net *net = sock_net(skb->sk);
 733        struct nlattr *tb[NETNSA_MAX + 1];
 734        struct nlattr *nla;
 735        struct net *peer;
 736        int nsid, err;
 737
 738        err = nlmsg_parse_deprecated(nlh, sizeof(struct rtgenmsg), tb,
 739                                     NETNSA_MAX, rtnl_net_policy, extack);
 740        if (err < 0)
 741                return err;
 742        if (!tb[NETNSA_NSID]) {
 743                NL_SET_ERR_MSG(extack, "nsid is missing");
 744                return -EINVAL;
 745        }
 746        nsid = nla_get_s32(tb[NETNSA_NSID]);
 747
 748        if (tb[NETNSA_PID]) {
 749                peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
 750                nla = tb[NETNSA_PID];
 751        } else if (tb[NETNSA_FD]) {
 752                peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
 753                nla = tb[NETNSA_FD];
 754        } else {
 755                NL_SET_ERR_MSG(extack, "Peer netns reference is missing");
 756                return -EINVAL;
 757        }
 758        if (IS_ERR(peer)) {
 759                NL_SET_BAD_ATTR(extack, nla);
 760                NL_SET_ERR_MSG(extack, "Peer netns reference is invalid");
 761                return PTR_ERR(peer);
 762        }
 763
 764        spin_lock_bh(&net->nsid_lock);
 765        if (__peernet2id(net, peer) >= 0) {
 766                spin_unlock_bh(&net->nsid_lock);
 767                err = -EEXIST;
 768                NL_SET_BAD_ATTR(extack, nla);
 769                NL_SET_ERR_MSG(extack,
 770                               "Peer netns already has a nsid assigned");
 771                goto out;
 772        }
 773
 774        err = alloc_netid(net, peer, nsid);
 775        spin_unlock_bh(&net->nsid_lock);
 776        if (err >= 0) {
 777                rtnl_net_notifyid(net, RTM_NEWNSID, err, NETLINK_CB(skb).portid,
 778                                  nlh, GFP_KERNEL);
 779                err = 0;
 780        } else if (err == -ENOSPC && nsid >= 0) {
 781                err = -EEXIST;
 782                NL_SET_BAD_ATTR(extack, tb[NETNSA_NSID]);
 783                NL_SET_ERR_MSG(extack, "The specified nsid is already used");
 784        }
 785out:
 786        put_net(peer);
 787        return err;
 788}
 789
 790static int rtnl_net_get_size(void)
 791{
 792        return NLMSG_ALIGN(sizeof(struct rtgenmsg))
 793               + nla_total_size(sizeof(s32)) /* NETNSA_NSID */
 794               + nla_total_size(sizeof(s32)) /* NETNSA_CURRENT_NSID */
 795               ;
 796}
 797
 798struct net_fill_args {
 799        u32 portid;
 800        u32 seq;
 801        int flags;
 802        int cmd;
 803        int nsid;
 804        bool add_ref;
 805        int ref_nsid;
 806};
 807
 808static int rtnl_net_fill(struct sk_buff *skb, struct net_fill_args *args)
 809{
 810        struct nlmsghdr *nlh;
 811        struct rtgenmsg *rth;
 812
 813        nlh = nlmsg_put(skb, args->portid, args->seq, args->cmd, sizeof(*rth),
 814                        args->flags);
 815        if (!nlh)
 816                return -EMSGSIZE;
 817
 818        rth = nlmsg_data(nlh);
 819        rth->rtgen_family = AF_UNSPEC;
 820
 821        if (nla_put_s32(skb, NETNSA_NSID, args->nsid))
 822                goto nla_put_failure;
 823
 824        if (args->add_ref &&
 825            nla_put_s32(skb, NETNSA_CURRENT_NSID, args->ref_nsid))
 826                goto nla_put_failure;
 827
 828        nlmsg_end(skb, nlh);
 829        return 0;
 830
 831nla_put_failure:
 832        nlmsg_cancel(skb, nlh);
 833        return -EMSGSIZE;
 834}
 835
 836static int rtnl_net_valid_getid_req(struct sk_buff *skb,
 837                                    const struct nlmsghdr *nlh,
 838                                    struct nlattr **tb,
 839                                    struct netlink_ext_ack *extack)
 840{
 841        int i, err;
 842
 843        if (!netlink_strict_get_check(skb))
 844                return nlmsg_parse_deprecated(nlh, sizeof(struct rtgenmsg),
 845                                              tb, NETNSA_MAX, rtnl_net_policy,
 846                                              extack);
 847
 848        err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct rtgenmsg), tb,
 849                                            NETNSA_MAX, rtnl_net_policy,
 850                                            extack);
 851        if (err)
 852                return err;
 853
 854        for (i = 0; i <= NETNSA_MAX; i++) {
 855                if (!tb[i])
 856                        continue;
 857
 858                switch (i) {
 859                case NETNSA_PID:
 860                case NETNSA_FD:
 861                case NETNSA_NSID:
 862                case NETNSA_TARGET_NSID:
 863                        break;
 864                default:
 865                        NL_SET_ERR_MSG(extack, "Unsupported attribute in peer netns getid request");
 866                        return -EINVAL;
 867                }
 868        }
 869
 870        return 0;
 871}
 872
 873static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh,
 874                          struct netlink_ext_ack *extack)
 875{
 876        struct net *net = sock_net(skb->sk);
 877        struct nlattr *tb[NETNSA_MAX + 1];
 878        struct net_fill_args fillargs = {
 879                .portid = NETLINK_CB(skb).portid,
 880                .seq = nlh->nlmsg_seq,
 881                .cmd = RTM_NEWNSID,
 882        };
 883        struct net *peer, *target = net;
 884        struct nlattr *nla;
 885        struct sk_buff *msg;
 886        int err;
 887
 888        err = rtnl_net_valid_getid_req(skb, nlh, tb, extack);
 889        if (err < 0)
 890                return err;
 891        if (tb[NETNSA_PID]) {
 892                peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
 893                nla = tb[NETNSA_PID];
 894        } else if (tb[NETNSA_FD]) {
 895                peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
 896                nla = tb[NETNSA_FD];
 897        } else if (tb[NETNSA_NSID]) {
 898                peer = get_net_ns_by_id(net, nla_get_s32(tb[NETNSA_NSID]));
 899                if (!peer)
 900                        peer = ERR_PTR(-ENOENT);
 901                nla = tb[NETNSA_NSID];
 902        } else {
 903                NL_SET_ERR_MSG(extack, "Peer netns reference is missing");
 904                return -EINVAL;
 905        }
 906
 907        if (IS_ERR(peer)) {
 908                NL_SET_BAD_ATTR(extack, nla);
 909                NL_SET_ERR_MSG(extack, "Peer netns reference is invalid");
 910                return PTR_ERR(peer);
 911        }
 912
 913        if (tb[NETNSA_TARGET_NSID]) {
 914                int id = nla_get_s32(tb[NETNSA_TARGET_NSID]);
 915
 916                target = rtnl_get_net_ns_capable(NETLINK_CB(skb).sk, id);
 917                if (IS_ERR(target)) {
 918                        NL_SET_BAD_ATTR(extack, tb[NETNSA_TARGET_NSID]);
 919                        NL_SET_ERR_MSG(extack,
 920                                       "Target netns reference is invalid");
 921                        err = PTR_ERR(target);
 922                        goto out;
 923                }
 924                fillargs.add_ref = true;
 925                fillargs.ref_nsid = peernet2id(net, peer);
 926        }
 927
 928        msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
 929        if (!msg) {
 930                err = -ENOMEM;
 931                goto out;
 932        }
 933
 934        fillargs.nsid = peernet2id(target, peer);
 935        err = rtnl_net_fill(msg, &fillargs);
 936        if (err < 0)
 937                goto err_out;
 938
 939        err = rtnl_unicast(msg, net, NETLINK_CB(skb).portid);
 940        goto out;
 941
 942err_out:
 943        nlmsg_free(msg);
 944out:
 945        if (fillargs.add_ref)
 946                put_net(target);
 947        put_net(peer);
 948        return err;
 949}
 950
 951struct rtnl_net_dump_cb {
 952        struct net *tgt_net;
 953        struct net *ref_net;
 954        struct sk_buff *skb;
 955        struct net_fill_args fillargs;
 956        int idx;
 957        int s_idx;
 958};
 959
 960/* Runs in RCU-critical section. */
 961static int rtnl_net_dumpid_one(int id, void *peer, void *data)
 962{
 963        struct rtnl_net_dump_cb *net_cb = (struct rtnl_net_dump_cb *)data;
 964        int ret;
 965
 966        if (net_cb->idx < net_cb->s_idx)
 967                goto cont;
 968
 969        net_cb->fillargs.nsid = id;
 970        if (net_cb->fillargs.add_ref)
 971                net_cb->fillargs.ref_nsid = __peernet2id(net_cb->ref_net, peer);
 972        ret = rtnl_net_fill(net_cb->skb, &net_cb->fillargs);
 973        if (ret < 0)
 974                return ret;
 975
 976cont:
 977        net_cb->idx++;
 978        return 0;
 979}
 980
 981static int rtnl_valid_dump_net_req(const struct nlmsghdr *nlh, struct sock *sk,
 982                                   struct rtnl_net_dump_cb *net_cb,
 983                                   struct netlink_callback *cb)
 984{
 985        struct netlink_ext_ack *extack = cb->extack;
 986        struct nlattr *tb[NETNSA_MAX + 1];
 987        int err, i;
 988
 989        err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct rtgenmsg), tb,
 990                                            NETNSA_MAX, rtnl_net_policy,
 991                                            extack);
 992        if (err < 0)
 993                return err;
 994
 995        for (i = 0; i <= NETNSA_MAX; i++) {
 996                if (!tb[i])
 997                        continue;
 998
 999                if (i == NETNSA_TARGET_NSID) {
1000                        struct net *net;
1001
1002                        net = rtnl_get_net_ns_capable(sk, nla_get_s32(tb[i]));
1003                        if (IS_ERR(net)) {
1004                                NL_SET_BAD_ATTR(extack, tb[i]);
1005                                NL_SET_ERR_MSG(extack,
1006                                               "Invalid target network namespace id");
1007                                return PTR_ERR(net);
1008                        }
1009                        net_cb->fillargs.add_ref = true;
1010                        net_cb->ref_net = net_cb->tgt_net;
1011                        net_cb->tgt_net = net;
1012                } else {
1013                        NL_SET_BAD_ATTR(extack, tb[i]);
1014                        NL_SET_ERR_MSG(extack,
1015                                       "Unsupported attribute in dump request");
1016                        return -EINVAL;
1017                }
1018        }
1019
1020        return 0;
1021}
1022
1023static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
1024{
1025        struct rtnl_net_dump_cb net_cb = {
1026                .tgt_net = sock_net(skb->sk),
1027                .skb = skb,
1028                .fillargs = {
1029                        .portid = NETLINK_CB(cb->skb).portid,
1030                        .seq = cb->nlh->nlmsg_seq,
1031                        .flags = NLM_F_MULTI,
1032                        .cmd = RTM_NEWNSID,
1033                },
1034                .idx = 0,
1035                .s_idx = cb->args[0],
1036        };
1037        int err = 0;
1038
1039        if (cb->strict_check) {
1040                err = rtnl_valid_dump_net_req(cb->nlh, skb->sk, &net_cb, cb);
1041                if (err < 0)
1042                        goto end;
1043        }
1044
1045        rcu_read_lock();
1046        idr_for_each(&net_cb.tgt_net->netns_ids, rtnl_net_dumpid_one, &net_cb);
1047        rcu_read_unlock();
1048
1049        cb->args[0] = net_cb.idx;
1050end:
1051        if (net_cb.fillargs.add_ref)
1052                put_net(net_cb.tgt_net);
1053        return err < 0 ? err : skb->len;
1054}
1055
1056static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid,
1057                              struct nlmsghdr *nlh, gfp_t gfp)
1058{
1059        struct net_fill_args fillargs = {
1060                .portid = portid,
1061                .seq = nlh ? nlh->nlmsg_seq : 0,
1062                .cmd = cmd,
1063                .nsid = id,
1064        };
1065        struct sk_buff *msg;
1066        int err = -ENOMEM;
1067
1068        msg = nlmsg_new(rtnl_net_get_size(), gfp);
1069        if (!msg)
1070                goto out;
1071
1072        err = rtnl_net_fill(msg, &fillargs);
1073        if (err < 0)
1074                goto err_out;
1075
1076        rtnl_notify(msg, net, portid, RTNLGRP_NSID, nlh, gfp);
1077        return;
1078
1079err_out:
1080        nlmsg_free(msg);
1081out:
1082        rtnl_set_sk_err(net, RTNLGRP_NSID, err);
1083}
1084
1085static int __init net_ns_init(void)
1086{
1087        struct net_generic *ng;
1088
1089#ifdef CONFIG_NET_NS
1090        net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
1091                                        SMP_CACHE_BYTES,
1092                                        SLAB_PANIC|SLAB_ACCOUNT, NULL);
1093
1094        /* Create workqueue for cleanup */
1095        netns_wq = create_singlethread_workqueue("netns");
1096        if (!netns_wq)
1097                panic("Could not create netns workq");
1098#endif
1099
1100        ng = net_alloc_generic();
1101        if (!ng)
1102                panic("Could not allocate generic netns");
1103
1104        rcu_assign_pointer(init_net.gen, ng);
1105
1106        preempt_disable();
1107        __net_gen_cookie(&init_net);
1108        preempt_enable();
1109
1110        down_write(&pernet_ops_rwsem);
1111        if (setup_net(&init_net, &init_user_ns))
1112                panic("Could not setup the initial network namespace");
1113
1114        init_net_initialized = true;
1115        up_write(&pernet_ops_rwsem);
1116
1117        if (register_pernet_subsys(&net_ns_ops))
1118                panic("Could not register network namespace subsystems");
1119
1120        rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL,
1121                      RTNL_FLAG_DOIT_UNLOCKED);
1122        rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,
1123                      RTNL_FLAG_DOIT_UNLOCKED);
1124
1125        return 0;
1126}
1127
1128pure_initcall(net_ns_init);
1129
1130#ifdef CONFIG_NET_NS
1131static int __register_pernet_operations(struct list_head *list,
1132                                        struct pernet_operations *ops)
1133{
1134        struct net *net;
1135        int error;
1136        LIST_HEAD(net_exit_list);
1137
1138        list_add_tail(&ops->list, list);
1139        if (ops->init || (ops->id && ops->size)) {
1140                /* We held write locked pernet_ops_rwsem, and parallel
1141                 * setup_net() and cleanup_net() are not possible.
1142                 */
1143                for_each_net(net) {
1144                        error = ops_init(ops, net);
1145                        if (error)
1146                                goto out_undo;
1147                        list_add_tail(&net->exit_list, &net_exit_list);
1148                }
1149        }
1150        return 0;
1151
1152out_undo:
1153        /* If I have an error cleanup all namespaces I initialized */
1154        list_del(&ops->list);
1155        ops_pre_exit_list(ops, &net_exit_list);
1156        synchronize_rcu();
1157        ops_exit_list(ops, &net_exit_list);
1158        ops_free_list(ops, &net_exit_list);
1159        return error;
1160}
1161
1162static void __unregister_pernet_operations(struct pernet_operations *ops)
1163{
1164        struct net *net;
1165        LIST_HEAD(net_exit_list);
1166
1167        list_del(&ops->list);
1168        /* See comment in __register_pernet_operations() */
1169        for_each_net(net)
1170                list_add_tail(&net->exit_list, &net_exit_list);
1171        ops_pre_exit_list(ops, &net_exit_list);
1172        synchronize_rcu();
1173        ops_exit_list(ops, &net_exit_list);
1174        ops_free_list(ops, &net_exit_list);
1175}
1176
1177#else
1178
1179static int __register_pernet_operations(struct list_head *list,
1180                                        struct pernet_operations *ops)
1181{
1182        if (!init_net_initialized) {
1183                list_add_tail(&ops->list, list);
1184                return 0;
1185        }
1186
1187        return ops_init(ops, &init_net);
1188}
1189
1190static void __unregister_pernet_operations(struct pernet_operations *ops)
1191{
1192        if (!init_net_initialized) {
1193                list_del(&ops->list);
1194        } else {
1195                LIST_HEAD(net_exit_list);
1196                list_add(&init_net.exit_list, &net_exit_list);
1197                ops_pre_exit_list(ops, &net_exit_list);
1198                synchronize_rcu();
1199                ops_exit_list(ops, &net_exit_list);
1200                ops_free_list(ops, &net_exit_list);
1201        }
1202}
1203
1204#endif /* CONFIG_NET_NS */
1205
1206static DEFINE_IDA(net_generic_ids);
1207
1208static int register_pernet_operations(struct list_head *list,
1209                                      struct pernet_operations *ops)
1210{
1211        int error;
1212
1213        if (ops->id) {
1214                error = ida_alloc_min(&net_generic_ids, MIN_PERNET_OPS_ID,
1215                                GFP_KERNEL);
1216                if (error < 0)
1217                        return error;
1218                *ops->id = error;
1219                max_gen_ptrs = max(max_gen_ptrs, *ops->id + 1);
1220        }
1221        error = __register_pernet_operations(list, ops);
1222        if (error) {
1223                rcu_barrier();
1224                if (ops->id)
1225                        ida_free(&net_generic_ids, *ops->id);
1226        }
1227
1228        return error;
1229}
1230
1231static void unregister_pernet_operations(struct pernet_operations *ops)
1232{
1233        __unregister_pernet_operations(ops);
1234        rcu_barrier();
1235        if (ops->id)
1236                ida_free(&net_generic_ids, *ops->id);
1237}
1238
1239/**
1240 *      register_pernet_subsys - register a network namespace subsystem
1241 *      @ops:  pernet operations structure for the subsystem
1242 *
1243 *      Register a subsystem which has init and exit functions
1244 *      that are called when network namespaces are created and
1245 *      destroyed respectively.
1246 *
1247 *      When registered all network namespace init functions are
1248 *      called for every existing network namespace.  Allowing kernel
1249 *      modules to have a race free view of the set of network namespaces.
1250 *
1251 *      When a new network namespace is created all of the init
1252 *      methods are called in the order in which they were registered.
1253 *
1254 *      When a network namespace is destroyed all of the exit methods
1255 *      are called in the reverse of the order with which they were
1256 *      registered.
1257 */
1258int register_pernet_subsys(struct pernet_operations *ops)
1259{
1260        int error;
1261        down_write(&pernet_ops_rwsem);
1262        error =  register_pernet_operations(first_device, ops);
1263        up_write(&pernet_ops_rwsem);
1264        return error;
1265}
1266EXPORT_SYMBOL_GPL(register_pernet_subsys);
1267
1268/**
1269 *      unregister_pernet_subsys - unregister a network namespace subsystem
1270 *      @ops: pernet operations structure to manipulate
1271 *
1272 *      Remove the pernet operations structure from the list to be
1273 *      used when network namespaces are created or destroyed.  In
1274 *      addition run the exit method for all existing network
1275 *      namespaces.
1276 */
1277void unregister_pernet_subsys(struct pernet_operations *ops)
1278{
1279        down_write(&pernet_ops_rwsem);
1280        unregister_pernet_operations(ops);
1281        up_write(&pernet_ops_rwsem);
1282}
1283EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
1284
1285/**
1286 *      register_pernet_device - register a network namespace device
1287 *      @ops:  pernet operations structure for the subsystem
1288 *
1289 *      Register a device which has init and exit functions
1290 *      that are called when network namespaces are created and
1291 *      destroyed respectively.
1292 *
1293 *      When registered all network namespace init functions are
1294 *      called for every existing network namespace.  Allowing kernel
1295 *      modules to have a race free view of the set of network namespaces.
1296 *
1297 *      When a new network namespace is created all of the init
1298 *      methods are called in the order in which they were registered.
1299 *
1300 *      When a network namespace is destroyed all of the exit methods
1301 *      are called in the reverse of the order with which they were
1302 *      registered.
1303 */
1304int register_pernet_device(struct pernet_operations *ops)
1305{
1306        int error;
1307        down_write(&pernet_ops_rwsem);
1308        error = register_pernet_operations(&pernet_list, ops);
1309        if (!error && (first_device == &pernet_list))
1310                first_device = &ops->list;
1311        up_write(&pernet_ops_rwsem);
1312        return error;
1313}
1314EXPORT_SYMBOL_GPL(register_pernet_device);
1315
1316/**
1317 *      unregister_pernet_device - unregister a network namespace netdevice
1318 *      @ops: pernet operations structure to manipulate
1319 *
1320 *      Remove the pernet operations structure from the list to be
1321 *      used when network namespaces are created or destroyed.  In
1322 *      addition run the exit method for all existing network
1323 *      namespaces.
1324 */
1325void unregister_pernet_device(struct pernet_operations *ops)
1326{
1327        down_write(&pernet_ops_rwsem);
1328        if (&ops->list == first_device)
1329                first_device = first_device->next;
1330        unregister_pernet_operations(ops);
1331        up_write(&pernet_ops_rwsem);
1332}
1333EXPORT_SYMBOL_GPL(unregister_pernet_device);
1334
1335#ifdef CONFIG_NET_NS
1336static struct ns_common *netns_get(struct task_struct *task)
1337{
1338        struct net *net = NULL;
1339        struct nsproxy *nsproxy;
1340
1341        task_lock(task);
1342        nsproxy = task->nsproxy;
1343        if (nsproxy)
1344                net = get_net(nsproxy->net_ns);
1345        task_unlock(task);
1346
1347        return net ? &net->ns : NULL;
1348}
1349
1350static inline struct net *to_net_ns(struct ns_common *ns)
1351{
1352        return container_of(ns, struct net, ns);
1353}
1354
1355static void netns_put(struct ns_common *ns)
1356{
1357        put_net(to_net_ns(ns));
1358}
1359
1360static int netns_install(struct nsset *nsset, struct ns_common *ns)
1361{
1362        struct nsproxy *nsproxy = nsset->nsproxy;
1363        struct net *net = to_net_ns(ns);
1364
1365        if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) ||
1366            !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
1367                return -EPERM;
1368
1369        put_net(nsproxy->net_ns);
1370        nsproxy->net_ns = get_net(net);
1371        return 0;
1372}
1373
1374static struct user_namespace *netns_owner(struct ns_common *ns)
1375{
1376        return to_net_ns(ns)->user_ns;
1377}
1378
1379const struct proc_ns_operations netns_operations = {
1380        .name           = "net",
1381        .type           = CLONE_NEWNET,
1382        .get            = netns_get,
1383        .put            = netns_put,
1384        .install        = netns_install,
1385        .owner          = netns_owner,
1386};
1387#endif
1388