linux/net/core/net_namespace.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   3
   4#include <linux/workqueue.h>
   5#include <linux/rtnetlink.h>
   6#include <linux/cache.h>
   7#include <linux/slab.h>
   8#include <linux/list.h>
   9#include <linux/delay.h>
  10#include <linux/sched.h>
  11#include <linux/idr.h>
  12#include <linux/rculist.h>
  13#include <linux/nsproxy.h>
  14#include <linux/fs.h>
  15#include <linux/proc_ns.h>
  16#include <linux/file.h>
  17#include <linux/export.h>
  18#include <linux/user_namespace.h>
  19#include <linux/net_namespace.h>
  20#include <linux/sched/task.h>
  21#include <linux/uidgid.h>
  22#include <linux/cookie.h>
  23
  24#include <net/sock.h>
  25#include <net/netlink.h>
  26#include <net/net_namespace.h>
  27#include <net/netns/generic.h>
  28
  29/*
  30 *      Our network namespace constructor/destructor lists
  31 */
  32
  33static LIST_HEAD(pernet_list);
  34static struct list_head *first_device = &pernet_list;
  35
  36LIST_HEAD(net_namespace_list);
  37EXPORT_SYMBOL_GPL(net_namespace_list);
  38
  39/* Protects net_namespace_list. Nests iside rtnl_lock() */
  40DECLARE_RWSEM(net_rwsem);
  41EXPORT_SYMBOL_GPL(net_rwsem);
  42
  43#ifdef CONFIG_KEYS
  44static struct key_tag init_net_key_domain = { .usage = REFCOUNT_INIT(1) };
  45#endif
  46
  47struct net init_net = {
  48        .ns.count       = REFCOUNT_INIT(1),
  49        .dev_base_head  = LIST_HEAD_INIT(init_net.dev_base_head),
  50#ifdef CONFIG_KEYS
  51        .key_domain     = &init_net_key_domain,
  52#endif
  53};
  54EXPORT_SYMBOL(init_net);
  55
  56static bool init_net_initialized;
  57/*
  58 * pernet_ops_rwsem: protects: pernet_list, net_generic_ids,
  59 * init_net_initialized and first_device pointer.
  60 * This is internal net namespace object. Please, don't use it
  61 * outside.
  62 */
  63DECLARE_RWSEM(pernet_ops_rwsem);
  64EXPORT_SYMBOL_GPL(pernet_ops_rwsem);
  65
  66#define MIN_PERNET_OPS_ID       \
  67        ((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *))
  68
  69#define INITIAL_NET_GEN_PTRS    13 /* +1 for len +2 for rcu_head */
  70
  71static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
  72
  73DEFINE_COOKIE(net_cookie);
  74
  75static struct net_generic *net_alloc_generic(void)
  76{
  77        struct net_generic *ng;
  78        unsigned int generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);
  79
  80        ng = kzalloc(generic_size, GFP_KERNEL);
  81        if (ng)
  82                ng->s.len = max_gen_ptrs;
  83
  84        return ng;
  85}
  86
  87static int net_assign_generic(struct net *net, unsigned int id, void *data)
  88{
  89        struct net_generic *ng, *old_ng;
  90
  91        BUG_ON(id < MIN_PERNET_OPS_ID);
  92
  93        old_ng = rcu_dereference_protected(net->gen,
  94                                           lockdep_is_held(&pernet_ops_rwsem));
  95        if (old_ng->s.len > id) {
  96                old_ng->ptr[id] = data;
  97                return 0;
  98        }
  99
 100        ng = net_alloc_generic();
 101        if (!ng)
 102                return -ENOMEM;
 103
 104        /*
 105         * Some synchronisation notes:
 106         *
 107         * The net_generic explores the net->gen array inside rcu
 108         * read section. Besides once set the net->gen->ptr[x]
 109         * pointer never changes (see rules in netns/generic.h).
 110         *
 111         * That said, we simply duplicate this array and schedule
 112         * the old copy for kfree after a grace period.
 113         */
 114
 115        memcpy(&ng->ptr[MIN_PERNET_OPS_ID], &old_ng->ptr[MIN_PERNET_OPS_ID],
 116               (old_ng->s.len - MIN_PERNET_OPS_ID) * sizeof(void *));
 117        ng->ptr[id] = data;
 118
 119        rcu_assign_pointer(net->gen, ng);
 120        kfree_rcu(old_ng, s.rcu);
 121        return 0;
 122}
 123
 124static int ops_init(const struct pernet_operations *ops, struct net *net)
 125{
 126        int err = -ENOMEM;
 127        void *data = NULL;
 128
 129        if (ops->id && ops->size) {
 130                data = kzalloc(ops->size, GFP_KERNEL);
 131                if (!data)
 132                        goto out;
 133
 134                err = net_assign_generic(net, *ops->id, data);
 135                if (err)
 136                        goto cleanup;
 137        }
 138        err = 0;
 139        if (ops->init)
 140                err = ops->init(net);
 141        if (!err)
 142                return 0;
 143
 144cleanup:
 145        kfree(data);
 146
 147out:
 148        return err;
 149}
 150
 151static void ops_pre_exit_list(const struct pernet_operations *ops,
 152                              struct list_head *net_exit_list)
 153{
 154        struct net *net;
 155
 156        if (ops->pre_exit) {
 157                list_for_each_entry(net, net_exit_list, exit_list)
 158                        ops->pre_exit(net);
 159        }
 160}
 161
 162static void ops_exit_list(const struct pernet_operations *ops,
 163                          struct list_head *net_exit_list)
 164{
 165        struct net *net;
 166        if (ops->exit) {
 167                list_for_each_entry(net, net_exit_list, exit_list) {
 168                        ops->exit(net);
 169                        cond_resched();
 170                }
 171        }
 172        if (ops->exit_batch)
 173                ops->exit_batch(net_exit_list);
 174}
 175
 176static void ops_free_list(const struct pernet_operations *ops,
 177                          struct list_head *net_exit_list)
 178{
 179        struct net *net;
 180        if (ops->size && ops->id) {
 181                list_for_each_entry(net, net_exit_list, exit_list)
 182                        kfree(net_generic(net, *ops->id));
 183        }
 184}
 185
 186/* should be called with nsid_lock held */
 187static int alloc_netid(struct net *net, struct net *peer, int reqid)
 188{
 189        int min = 0, max = 0;
 190
 191        if (reqid >= 0) {
 192                min = reqid;
 193                max = reqid + 1;
 194        }
 195
 196        return idr_alloc(&net->netns_ids, peer, min, max, GFP_ATOMIC);
 197}
 198
 199/* This function is used by idr_for_each(). If net is equal to peer, the
 200 * function returns the id so that idr_for_each() stops. Because we cannot
 201 * returns the id 0 (idr_for_each() will not stop), we return the magic value
 202 * NET_ID_ZERO (-1) for it.
 203 */
 204#define NET_ID_ZERO -1
 205static int net_eq_idr(int id, void *net, void *peer)
 206{
 207        if (net_eq(net, peer))
 208                return id ? : NET_ID_ZERO;
 209        return 0;
 210}
 211
 212/* Must be called from RCU-critical section or with nsid_lock held */
 213static int __peernet2id(const struct net *net, struct net *peer)
 214{
 215        int id = idr_for_each(&net->netns_ids, net_eq_idr, peer);
 216
 217        /* Magic value for id 0. */
 218        if (id == NET_ID_ZERO)
 219                return 0;
 220        if (id > 0)
 221                return id;
 222
 223        return NETNSA_NSID_NOT_ASSIGNED;
 224}
 225
 226static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid,
 227                              struct nlmsghdr *nlh, gfp_t gfp);
 228/* This function returns the id of a peer netns. If no id is assigned, one will
 229 * be allocated and returned.
 230 */
 231int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp)
 232{
 233        int id;
 234
 235        if (refcount_read(&net->ns.count) == 0)
 236                return NETNSA_NSID_NOT_ASSIGNED;
 237
 238        spin_lock_bh(&net->nsid_lock);
 239        id = __peernet2id(net, peer);
 240        if (id >= 0) {
 241                spin_unlock_bh(&net->nsid_lock);
 242                return id;
 243        }
 244
 245        /* When peer is obtained from RCU lists, we may race with
 246         * its cleanup. Check whether it's alive, and this guarantees
 247         * we never hash a peer back to net->netns_ids, after it has
 248         * just been idr_remove()'d from there in cleanup_net().
 249         */
 250        if (!maybe_get_net(peer)) {
 251                spin_unlock_bh(&net->nsid_lock);
 252                return NETNSA_NSID_NOT_ASSIGNED;
 253        }
 254
 255        id = alloc_netid(net, peer, -1);
 256        spin_unlock_bh(&net->nsid_lock);
 257
 258        put_net(peer);
 259        if (id < 0)
 260                return NETNSA_NSID_NOT_ASSIGNED;
 261
 262        rtnl_net_notifyid(net, RTM_NEWNSID, id, 0, NULL, gfp);
 263
 264        return id;
 265}
 266EXPORT_SYMBOL_GPL(peernet2id_alloc);
 267
 268/* This function returns, if assigned, the id of a peer netns. */
 269int peernet2id(const struct net *net, struct net *peer)
 270{
 271        int id;
 272
 273        rcu_read_lock();
 274        id = __peernet2id(net, peer);
 275        rcu_read_unlock();
 276
 277        return id;
 278}
 279EXPORT_SYMBOL(peernet2id);
 280
 281/* This function returns true is the peer netns has an id assigned into the
 282 * current netns.
 283 */
 284bool peernet_has_id(const struct net *net, struct net *peer)
 285{
 286        return peernet2id(net, peer) >= 0;
 287}
 288
 289struct net *get_net_ns_by_id(const struct net *net, int id)
 290{
 291        struct net *peer;
 292
 293        if (id < 0)
 294                return NULL;
 295
 296        rcu_read_lock();
 297        peer = idr_find(&net->netns_ids, id);
 298        if (peer)
 299                peer = maybe_get_net(peer);
 300        rcu_read_unlock();
 301
 302        return peer;
 303}
 304
 305/*
 306 * setup_net runs the initializers for the network namespace object.
 307 */
 308static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
 309{
 310        /* Must be called with pernet_ops_rwsem held */
 311        const struct pernet_operations *ops, *saved_ops;
 312        int error = 0;
 313        LIST_HEAD(net_exit_list);
 314
 315        refcount_set(&net->ns.count, 1);
 316        ref_tracker_dir_init(&net->refcnt_tracker, 128);
 317
 318        refcount_set(&net->passive, 1);
 319        get_random_bytes(&net->hash_mix, sizeof(u32));
 320        preempt_disable();
 321        net->net_cookie = gen_cookie_next(&net_cookie);
 322        preempt_enable();
 323        net->dev_base_seq = 1;
 324        net->user_ns = user_ns;
 325        idr_init(&net->netns_ids);
 326        spin_lock_init(&net->nsid_lock);
 327        mutex_init(&net->ipv4.ra_mutex);
 328
 329        list_for_each_entry(ops, &pernet_list, list) {
 330                error = ops_init(ops, net);
 331                if (error < 0)
 332                        goto out_undo;
 333        }
 334        down_write(&net_rwsem);
 335        list_add_tail_rcu(&net->list, &net_namespace_list);
 336        up_write(&net_rwsem);
 337out:
 338        return error;
 339
 340out_undo:
 341        /* Walk through the list backwards calling the exit functions
 342         * for the pernet modules whose init functions did not fail.
 343         */
 344        list_add(&net->exit_list, &net_exit_list);
 345        saved_ops = ops;
 346        list_for_each_entry_continue_reverse(ops, &pernet_list, list)
 347                ops_pre_exit_list(ops, &net_exit_list);
 348
 349        synchronize_rcu();
 350
 351        ops = saved_ops;
 352        list_for_each_entry_continue_reverse(ops, &pernet_list, list)
 353                ops_exit_list(ops, &net_exit_list);
 354
 355        ops = saved_ops;
 356        list_for_each_entry_continue_reverse(ops, &pernet_list, list)
 357                ops_free_list(ops, &net_exit_list);
 358
 359        rcu_barrier();
 360        goto out;
 361}
 362
 363static int __net_init net_defaults_init_net(struct net *net)
 364{
 365        net->core.sysctl_somaxconn = SOMAXCONN;
 366        return 0;
 367}
 368
 369static struct pernet_operations net_defaults_ops = {
 370        .init = net_defaults_init_net,
 371};
 372
 373static __init int net_defaults_init(void)
 374{
 375        if (register_pernet_subsys(&net_defaults_ops))
 376                panic("Cannot initialize net default settings");
 377
 378        return 0;
 379}
 380
 381core_initcall(net_defaults_init);
 382
 383#ifdef CONFIG_NET_NS
 384static struct ucounts *inc_net_namespaces(struct user_namespace *ns)
 385{
 386        return inc_ucount(ns, current_euid(), UCOUNT_NET_NAMESPACES);
 387}
 388
 389static void dec_net_namespaces(struct ucounts *ucounts)
 390{
 391        dec_ucount(ucounts, UCOUNT_NET_NAMESPACES);
 392}
 393
 394static struct kmem_cache *net_cachep __ro_after_init;
 395static struct workqueue_struct *netns_wq;
 396
 397static struct net *net_alloc(void)
 398{
 399        struct net *net = NULL;
 400        struct net_generic *ng;
 401
 402        ng = net_alloc_generic();
 403        if (!ng)
 404                goto out;
 405
 406        net = kmem_cache_zalloc(net_cachep, GFP_KERNEL);
 407        if (!net)
 408                goto out_free;
 409
 410#ifdef CONFIG_KEYS
 411        net->key_domain = kzalloc(sizeof(struct key_tag), GFP_KERNEL);
 412        if (!net->key_domain)
 413                goto out_free_2;
 414        refcount_set(&net->key_domain->usage, 1);
 415#endif
 416
 417        rcu_assign_pointer(net->gen, ng);
 418out:
 419        return net;
 420
 421#ifdef CONFIG_KEYS
 422out_free_2:
 423        kmem_cache_free(net_cachep, net);
 424        net = NULL;
 425#endif
 426out_free:
 427        kfree(ng);
 428        goto out;
 429}
 430
 431static void net_free(struct net *net)
 432{
 433        if (refcount_dec_and_test(&net->passive)) {
 434                kfree(rcu_access_pointer(net->gen));
 435                kmem_cache_free(net_cachep, net);
 436        }
 437}
 438
 439void net_drop_ns(void *p)
 440{
 441        struct net *net = (struct net *)p;
 442
 443        if (net)
 444                net_free(net);
 445}
 446
 447struct net *copy_net_ns(unsigned long flags,
 448                        struct user_namespace *user_ns, struct net *old_net)
 449{
 450        struct ucounts *ucounts;
 451        struct net *net;
 452        int rv;
 453
 454        if (!(flags & CLONE_NEWNET))
 455                return get_net(old_net);
 456
 457        ucounts = inc_net_namespaces(user_ns);
 458        if (!ucounts)
 459                return ERR_PTR(-ENOSPC);
 460
 461        net = net_alloc();
 462        if (!net) {
 463                rv = -ENOMEM;
 464                goto dec_ucounts;
 465        }
 466        refcount_set(&net->passive, 1);
 467        net->ucounts = ucounts;
 468        get_user_ns(user_ns);
 469
 470        rv = down_read_killable(&pernet_ops_rwsem);
 471        if (rv < 0)
 472                goto put_userns;
 473
 474        rv = setup_net(net, user_ns);
 475
 476        up_read(&pernet_ops_rwsem);
 477
 478        if (rv < 0) {
 479put_userns:
 480#ifdef CONFIG_KEYS
 481                key_remove_domain(net->key_domain);
 482#endif
 483                put_user_ns(user_ns);
 484                net_free(net);
 485dec_ucounts:
 486                dec_net_namespaces(ucounts);
 487                return ERR_PTR(rv);
 488        }
 489        return net;
 490}
 491
 492/**
 493 * net_ns_get_ownership - get sysfs ownership data for @net
 494 * @net: network namespace in question (can be NULL)
 495 * @uid: kernel user ID for sysfs objects
 496 * @gid: kernel group ID for sysfs objects
 497 *
 498 * Returns the uid/gid pair of root in the user namespace associated with the
 499 * given network namespace.
 500 */
 501void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid)
 502{
 503        if (net) {
 504                kuid_t ns_root_uid = make_kuid(net->user_ns, 0);
 505                kgid_t ns_root_gid = make_kgid(net->user_ns, 0);
 506
 507                if (uid_valid(ns_root_uid))
 508                        *uid = ns_root_uid;
 509
 510                if (gid_valid(ns_root_gid))
 511                        *gid = ns_root_gid;
 512        } else {
 513                *uid = GLOBAL_ROOT_UID;
 514                *gid = GLOBAL_ROOT_GID;
 515        }
 516}
 517EXPORT_SYMBOL_GPL(net_ns_get_ownership);
 518
 519static void unhash_nsid(struct net *net, struct net *last)
 520{
 521        struct net *tmp;
 522        /* This function is only called from cleanup_net() work,
 523         * and this work is the only process, that may delete
 524         * a net from net_namespace_list. So, when the below
 525         * is executing, the list may only grow. Thus, we do not
 526         * use for_each_net_rcu() or net_rwsem.
 527         */
 528        for_each_net(tmp) {
 529                int id;
 530
 531                spin_lock_bh(&tmp->nsid_lock);
 532                id = __peernet2id(tmp, net);
 533                if (id >= 0)
 534                        idr_remove(&tmp->netns_ids, id);
 535                spin_unlock_bh(&tmp->nsid_lock);
 536                if (id >= 0)
 537                        rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL,
 538                                          GFP_KERNEL);
 539                if (tmp == last)
 540                        break;
 541        }
 542        spin_lock_bh(&net->nsid_lock);
 543        idr_destroy(&net->netns_ids);
 544        spin_unlock_bh(&net->nsid_lock);
 545}
 546
 547static LLIST_HEAD(cleanup_list);
 548
 549static void cleanup_net(struct work_struct *work)
 550{
 551        const struct pernet_operations *ops;
 552        struct net *net, *tmp, *last;
 553        struct llist_node *net_kill_list;
 554        LIST_HEAD(net_exit_list);
 555
 556        /* Atomically snapshot the list of namespaces to cleanup */
 557        net_kill_list = llist_del_all(&cleanup_list);
 558
 559        down_read(&pernet_ops_rwsem);
 560
 561        /* Don't let anyone else find us. */
 562        down_write(&net_rwsem);
 563        llist_for_each_entry(net, net_kill_list, cleanup_list)
 564                list_del_rcu(&net->list);
 565        /* Cache last net. After we unlock rtnl, no one new net
 566         * added to net_namespace_list can assign nsid pointer
 567         * to a net from net_kill_list (see peernet2id_alloc()).
 568         * So, we skip them in unhash_nsid().
 569         *
 570         * Note, that unhash_nsid() does not delete nsid links
 571         * between net_kill_list's nets, as they've already
 572         * deleted from net_namespace_list. But, this would be
 573         * useless anyway, as netns_ids are destroyed there.
 574         */
 575        last = list_last_entry(&net_namespace_list, struct net, list);
 576        up_write(&net_rwsem);
 577
 578        llist_for_each_entry(net, net_kill_list, cleanup_list) {
 579                unhash_nsid(net, last);
 580                list_add_tail(&net->exit_list, &net_exit_list);
 581        }
 582
 583        /* Run all of the network namespace pre_exit methods */
 584        list_for_each_entry_reverse(ops, &pernet_list, list)
 585                ops_pre_exit_list(ops, &net_exit_list);
 586
 587        /*
 588         * Another CPU might be rcu-iterating the list, wait for it.
 589         * This needs to be before calling the exit() notifiers, so
 590         * the rcu_barrier() below isn't sufficient alone.
 591         * Also the pre_exit() and exit() methods need this barrier.
 592         */
 593        synchronize_rcu();
 594
 595        /* Run all of the network namespace exit methods */
 596        list_for_each_entry_reverse(ops, &pernet_list, list)
 597                ops_exit_list(ops, &net_exit_list);
 598
 599        /* Free the net generic variables */
 600        list_for_each_entry_reverse(ops, &pernet_list, list)
 601                ops_free_list(ops, &net_exit_list);
 602
 603        up_read(&pernet_ops_rwsem);
 604
 605        /* Ensure there are no outstanding rcu callbacks using this
 606         * network namespace.
 607         */
 608        rcu_barrier();
 609
 610        /* Finally it is safe to free my network namespace structure */
 611        list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
 612                list_del_init(&net->exit_list);
 613                dec_net_namespaces(net->ucounts);
 614#ifdef CONFIG_KEYS
 615                key_remove_domain(net->key_domain);
 616#endif
 617                put_user_ns(net->user_ns);
 618                net_free(net);
 619        }
 620}
 621
 622/**
 623 * net_ns_barrier - wait until concurrent net_cleanup_work is done
 624 *
 625 * cleanup_net runs from work queue and will first remove namespaces
 626 * from the global list, then run net exit functions.
 627 *
 628 * Call this in module exit path to make sure that all netns
 629 * ->exit ops have been invoked before the function is removed.
 630 */
 631void net_ns_barrier(void)
 632{
 633        down_write(&pernet_ops_rwsem);
 634        up_write(&pernet_ops_rwsem);
 635}
 636EXPORT_SYMBOL(net_ns_barrier);
 637
 638static DECLARE_WORK(net_cleanup_work, cleanup_net);
 639
 640void __put_net(struct net *net)
 641{
 642        ref_tracker_dir_exit(&net->refcnt_tracker);
 643        /* Cleanup the network namespace in process context */
 644        if (llist_add(&net->cleanup_list, &cleanup_list))
 645                queue_work(netns_wq, &net_cleanup_work);
 646}
 647EXPORT_SYMBOL_GPL(__put_net);
 648
 649/**
 650 * get_net_ns - increment the refcount of the network namespace
 651 * @ns: common namespace (net)
 652 *
 653 * Returns the net's common namespace.
 654 */
 655struct ns_common *get_net_ns(struct ns_common *ns)
 656{
 657        return &get_net(container_of(ns, struct net, ns))->ns;
 658}
 659EXPORT_SYMBOL_GPL(get_net_ns);
 660
 661struct net *get_net_ns_by_fd(int fd)
 662{
 663        struct file *file;
 664        struct ns_common *ns;
 665        struct net *net;
 666
 667        file = proc_ns_fget(fd);
 668        if (IS_ERR(file))
 669                return ERR_CAST(file);
 670
 671        ns = get_proc_ns(file_inode(file));
 672        if (ns->ops == &netns_operations)
 673                net = get_net(container_of(ns, struct net, ns));
 674        else
 675                net = ERR_PTR(-EINVAL);
 676
 677        fput(file);
 678        return net;
 679}
 680EXPORT_SYMBOL_GPL(get_net_ns_by_fd);
 681#endif
 682
 683struct net *get_net_ns_by_pid(pid_t pid)
 684{
 685        struct task_struct *tsk;
 686        struct net *net;
 687
 688        /* Lookup the network namespace */
 689        net = ERR_PTR(-ESRCH);
 690        rcu_read_lock();
 691        tsk = find_task_by_vpid(pid);
 692        if (tsk) {
 693                struct nsproxy *nsproxy;
 694                task_lock(tsk);
 695                nsproxy = tsk->nsproxy;
 696                if (nsproxy)
 697                        net = get_net(nsproxy->net_ns);
 698                task_unlock(tsk);
 699        }
 700        rcu_read_unlock();
 701        return net;
 702}
 703EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
 704
 705static __net_init int net_ns_net_init(struct net *net)
 706{
 707#ifdef CONFIG_NET_NS
 708        net->ns.ops = &netns_operations;
 709#endif
 710        return ns_alloc_inum(&net->ns);
 711}
 712
 713static __net_exit void net_ns_net_exit(struct net *net)
 714{
 715        ns_free_inum(&net->ns);
 716}
 717
 718static struct pernet_operations __net_initdata net_ns_ops = {
 719        .init = net_ns_net_init,
 720        .exit = net_ns_net_exit,
 721};
 722
 723static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
 724        [NETNSA_NONE]           = { .type = NLA_UNSPEC },
 725        [NETNSA_NSID]           = { .type = NLA_S32 },
 726        [NETNSA_PID]            = { .type = NLA_U32 },
 727        [NETNSA_FD]             = { .type = NLA_U32 },
 728        [NETNSA_TARGET_NSID]    = { .type = NLA_S32 },
 729};
 730
 731static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh,
 732                          struct netlink_ext_ack *extack)
 733{
 734        struct net *net = sock_net(skb->sk);
 735        struct nlattr *tb[NETNSA_MAX + 1];
 736        struct nlattr *nla;
 737        struct net *peer;
 738        int nsid, err;
 739
 740        err = nlmsg_parse_deprecated(nlh, sizeof(struct rtgenmsg), tb,
 741                                     NETNSA_MAX, rtnl_net_policy, extack);
 742        if (err < 0)
 743                return err;
 744        if (!tb[NETNSA_NSID]) {
 745                NL_SET_ERR_MSG(extack, "nsid is missing");
 746                return -EINVAL;
 747        }
 748        nsid = nla_get_s32(tb[NETNSA_NSID]);
 749
 750        if (tb[NETNSA_PID]) {
 751                peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
 752                nla = tb[NETNSA_PID];
 753        } else if (tb[NETNSA_FD]) {
 754                peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
 755                nla = tb[NETNSA_FD];
 756        } else {
 757                NL_SET_ERR_MSG(extack, "Peer netns reference is missing");
 758                return -EINVAL;
 759        }
 760        if (IS_ERR(peer)) {
 761                NL_SET_BAD_ATTR(extack, nla);
 762                NL_SET_ERR_MSG(extack, "Peer netns reference is invalid");
 763                return PTR_ERR(peer);
 764        }
 765
 766        spin_lock_bh(&net->nsid_lock);
 767        if (__peernet2id(net, peer) >= 0) {
 768                spin_unlock_bh(&net->nsid_lock);
 769                err = -EEXIST;
 770                NL_SET_BAD_ATTR(extack, nla);
 771                NL_SET_ERR_MSG(extack,
 772                               "Peer netns already has a nsid assigned");
 773                goto out;
 774        }
 775
 776        err = alloc_netid(net, peer, nsid);
 777        spin_unlock_bh(&net->nsid_lock);
 778        if (err >= 0) {
 779                rtnl_net_notifyid(net, RTM_NEWNSID, err, NETLINK_CB(skb).portid,
 780                                  nlh, GFP_KERNEL);
 781                err = 0;
 782        } else if (err == -ENOSPC && nsid >= 0) {
 783                err = -EEXIST;
 784                NL_SET_BAD_ATTR(extack, tb[NETNSA_NSID]);
 785                NL_SET_ERR_MSG(extack, "The specified nsid is already used");
 786        }
 787out:
 788        put_net(peer);
 789        return err;
 790}
 791
 792static int rtnl_net_get_size(void)
 793{
 794        return NLMSG_ALIGN(sizeof(struct rtgenmsg))
 795               + nla_total_size(sizeof(s32)) /* NETNSA_NSID */
 796               + nla_total_size(sizeof(s32)) /* NETNSA_CURRENT_NSID */
 797               ;
 798}
 799
 800struct net_fill_args {
 801        u32 portid;
 802        u32 seq;
 803        int flags;
 804        int cmd;
 805        int nsid;
 806        bool add_ref;
 807        int ref_nsid;
 808};
 809
 810static int rtnl_net_fill(struct sk_buff *skb, struct net_fill_args *args)
 811{
 812        struct nlmsghdr *nlh;
 813        struct rtgenmsg *rth;
 814
 815        nlh = nlmsg_put(skb, args->portid, args->seq, args->cmd, sizeof(*rth),
 816                        args->flags);
 817        if (!nlh)
 818                return -EMSGSIZE;
 819
 820        rth = nlmsg_data(nlh);
 821        rth->rtgen_family = AF_UNSPEC;
 822
 823        if (nla_put_s32(skb, NETNSA_NSID, args->nsid))
 824                goto nla_put_failure;
 825
 826        if (args->add_ref &&
 827            nla_put_s32(skb, NETNSA_CURRENT_NSID, args->ref_nsid))
 828                goto nla_put_failure;
 829
 830        nlmsg_end(skb, nlh);
 831        return 0;
 832
 833nla_put_failure:
 834        nlmsg_cancel(skb, nlh);
 835        return -EMSGSIZE;
 836}
 837
 838static int rtnl_net_valid_getid_req(struct sk_buff *skb,
 839                                    const struct nlmsghdr *nlh,
 840                                    struct nlattr **tb,
 841                                    struct netlink_ext_ack *extack)
 842{
 843        int i, err;
 844
 845        if (!netlink_strict_get_check(skb))
 846                return nlmsg_parse_deprecated(nlh, sizeof(struct rtgenmsg),
 847                                              tb, NETNSA_MAX, rtnl_net_policy,
 848                                              extack);
 849
 850        err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct rtgenmsg), tb,
 851                                            NETNSA_MAX, rtnl_net_policy,
 852                                            extack);
 853        if (err)
 854                return err;
 855
 856        for (i = 0; i <= NETNSA_MAX; i++) {
 857                if (!tb[i])
 858                        continue;
 859
 860                switch (i) {
 861                case NETNSA_PID:
 862                case NETNSA_FD:
 863                case NETNSA_NSID:
 864                case NETNSA_TARGET_NSID:
 865                        break;
 866                default:
 867                        NL_SET_ERR_MSG(extack, "Unsupported attribute in peer netns getid request");
 868                        return -EINVAL;
 869                }
 870        }
 871
 872        return 0;
 873}
 874
 875static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh,
 876                          struct netlink_ext_ack *extack)
 877{
 878        struct net *net = sock_net(skb->sk);
 879        struct nlattr *tb[NETNSA_MAX + 1];
 880        struct net_fill_args fillargs = {
 881                .portid = NETLINK_CB(skb).portid,
 882                .seq = nlh->nlmsg_seq,
 883                .cmd = RTM_NEWNSID,
 884        };
 885        struct net *peer, *target = net;
 886        struct nlattr *nla;
 887        struct sk_buff *msg;
 888        int err;
 889
 890        err = rtnl_net_valid_getid_req(skb, nlh, tb, extack);
 891        if (err < 0)
 892                return err;
 893        if (tb[NETNSA_PID]) {
 894                peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
 895                nla = tb[NETNSA_PID];
 896        } else if (tb[NETNSA_FD]) {
 897                peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
 898                nla = tb[NETNSA_FD];
 899        } else if (tb[NETNSA_NSID]) {
 900                peer = get_net_ns_by_id(net, nla_get_s32(tb[NETNSA_NSID]));
 901                if (!peer)
 902                        peer = ERR_PTR(-ENOENT);
 903                nla = tb[NETNSA_NSID];
 904        } else {
 905                NL_SET_ERR_MSG(extack, "Peer netns reference is missing");
 906                return -EINVAL;
 907        }
 908
 909        if (IS_ERR(peer)) {
 910                NL_SET_BAD_ATTR(extack, nla);
 911                NL_SET_ERR_MSG(extack, "Peer netns reference is invalid");
 912                return PTR_ERR(peer);
 913        }
 914
 915        if (tb[NETNSA_TARGET_NSID]) {
 916                int id = nla_get_s32(tb[NETNSA_TARGET_NSID]);
 917
 918                target = rtnl_get_net_ns_capable(NETLINK_CB(skb).sk, id);
 919                if (IS_ERR(target)) {
 920                        NL_SET_BAD_ATTR(extack, tb[NETNSA_TARGET_NSID]);
 921                        NL_SET_ERR_MSG(extack,
 922                                       "Target netns reference is invalid");
 923                        err = PTR_ERR(target);
 924                        goto out;
 925                }
 926                fillargs.add_ref = true;
 927                fillargs.ref_nsid = peernet2id(net, peer);
 928        }
 929
 930        msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
 931        if (!msg) {
 932                err = -ENOMEM;
 933                goto out;
 934        }
 935
 936        fillargs.nsid = peernet2id(target, peer);
 937        err = rtnl_net_fill(msg, &fillargs);
 938        if (err < 0)
 939                goto err_out;
 940
 941        err = rtnl_unicast(msg, net, NETLINK_CB(skb).portid);
 942        goto out;
 943
 944err_out:
 945        nlmsg_free(msg);
 946out:
 947        if (fillargs.add_ref)
 948                put_net(target);
 949        put_net(peer);
 950        return err;
 951}
 952
 953struct rtnl_net_dump_cb {
 954        struct net *tgt_net;
 955        struct net *ref_net;
 956        struct sk_buff *skb;
 957        struct net_fill_args fillargs;
 958        int idx;
 959        int s_idx;
 960};
 961
 962/* Runs in RCU-critical section. */
 963static int rtnl_net_dumpid_one(int id, void *peer, void *data)
 964{
 965        struct rtnl_net_dump_cb *net_cb = (struct rtnl_net_dump_cb *)data;
 966        int ret;
 967
 968        if (net_cb->idx < net_cb->s_idx)
 969                goto cont;
 970
 971        net_cb->fillargs.nsid = id;
 972        if (net_cb->fillargs.add_ref)
 973                net_cb->fillargs.ref_nsid = __peernet2id(net_cb->ref_net, peer);
 974        ret = rtnl_net_fill(net_cb->skb, &net_cb->fillargs);
 975        if (ret < 0)
 976                return ret;
 977
 978cont:
 979        net_cb->idx++;
 980        return 0;
 981}
 982
 983static int rtnl_valid_dump_net_req(const struct nlmsghdr *nlh, struct sock *sk,
 984                                   struct rtnl_net_dump_cb *net_cb,
 985                                   struct netlink_callback *cb)
 986{
 987        struct netlink_ext_ack *extack = cb->extack;
 988        struct nlattr *tb[NETNSA_MAX + 1];
 989        int err, i;
 990
 991        err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct rtgenmsg), tb,
 992                                            NETNSA_MAX, rtnl_net_policy,
 993                                            extack);
 994        if (err < 0)
 995                return err;
 996
 997        for (i = 0; i <= NETNSA_MAX; i++) {
 998                if (!tb[i])
 999                        continue;
1000
1001                if (i == NETNSA_TARGET_NSID) {
1002                        struct net *net;
1003
1004                        net = rtnl_get_net_ns_capable(sk, nla_get_s32(tb[i]));
1005                        if (IS_ERR(net)) {
1006                                NL_SET_BAD_ATTR(extack, tb[i]);
1007                                NL_SET_ERR_MSG(extack,
1008                                               "Invalid target network namespace id");
1009                                return PTR_ERR(net);
1010                        }
1011                        net_cb->fillargs.add_ref = true;
1012                        net_cb->ref_net = net_cb->tgt_net;
1013                        net_cb->tgt_net = net;
1014                } else {
1015                        NL_SET_BAD_ATTR(extack, tb[i]);
1016                        NL_SET_ERR_MSG(extack,
1017                                       "Unsupported attribute in dump request");
1018                        return -EINVAL;
1019                }
1020        }
1021
1022        return 0;
1023}
1024
1025static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
1026{
1027        struct rtnl_net_dump_cb net_cb = {
1028                .tgt_net = sock_net(skb->sk),
1029                .skb = skb,
1030                .fillargs = {
1031                        .portid = NETLINK_CB(cb->skb).portid,
1032                        .seq = cb->nlh->nlmsg_seq,
1033                        .flags = NLM_F_MULTI,
1034                        .cmd = RTM_NEWNSID,
1035                },
1036                .idx = 0,
1037                .s_idx = cb->args[0],
1038        };
1039        int err = 0;
1040
1041        if (cb->strict_check) {
1042                err = rtnl_valid_dump_net_req(cb->nlh, skb->sk, &net_cb, cb);
1043                if (err < 0)
1044                        goto end;
1045        }
1046
1047        rcu_read_lock();
1048        idr_for_each(&net_cb.tgt_net->netns_ids, rtnl_net_dumpid_one, &net_cb);
1049        rcu_read_unlock();
1050
1051        cb->args[0] = net_cb.idx;
1052end:
1053        if (net_cb.fillargs.add_ref)
1054                put_net(net_cb.tgt_net);
1055        return err < 0 ? err : skb->len;
1056}
1057
1058static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid,
1059                              struct nlmsghdr *nlh, gfp_t gfp)
1060{
1061        struct net_fill_args fillargs = {
1062                .portid = portid,
1063                .seq = nlh ? nlh->nlmsg_seq : 0,
1064                .cmd = cmd,
1065                .nsid = id,
1066        };
1067        struct sk_buff *msg;
1068        int err = -ENOMEM;
1069
1070        msg = nlmsg_new(rtnl_net_get_size(), gfp);
1071        if (!msg)
1072                goto out;
1073
1074        err = rtnl_net_fill(msg, &fillargs);
1075        if (err < 0)
1076                goto err_out;
1077
1078        rtnl_notify(msg, net, portid, RTNLGRP_NSID, nlh, gfp);
1079        return;
1080
1081err_out:
1082        nlmsg_free(msg);
1083out:
1084        rtnl_set_sk_err(net, RTNLGRP_NSID, err);
1085}
1086
1087static int __init net_ns_init(void)
1088{
1089        struct net_generic *ng;
1090
1091#ifdef CONFIG_NET_NS
1092        net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
1093                                        SMP_CACHE_BYTES,
1094                                        SLAB_PANIC|SLAB_ACCOUNT, NULL);
1095
1096        /* Create workqueue for cleanup */
1097        netns_wq = create_singlethread_workqueue("netns");
1098        if (!netns_wq)
1099                panic("Could not create netns workq");
1100#endif
1101
1102        ng = net_alloc_generic();
1103        if (!ng)
1104                panic("Could not allocate generic netns");
1105
1106        rcu_assign_pointer(init_net.gen, ng);
1107
1108        down_write(&pernet_ops_rwsem);
1109        if (setup_net(&init_net, &init_user_ns))
1110                panic("Could not setup the initial network namespace");
1111
1112        init_net_initialized = true;
1113        up_write(&pernet_ops_rwsem);
1114
1115        if (register_pernet_subsys(&net_ns_ops))
1116                panic("Could not register network namespace subsystems");
1117
1118        rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL,
1119                      RTNL_FLAG_DOIT_UNLOCKED);
1120        rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,
1121                      RTNL_FLAG_DOIT_UNLOCKED);
1122
1123        return 0;
1124}
1125
1126pure_initcall(net_ns_init);
1127
1128static void free_exit_list(struct pernet_operations *ops, struct list_head *net_exit_list)
1129{
1130        ops_pre_exit_list(ops, net_exit_list);
1131        synchronize_rcu();
1132        ops_exit_list(ops, net_exit_list);
1133        ops_free_list(ops, net_exit_list);
1134}
1135
1136#ifdef CONFIG_NET_NS
1137static int __register_pernet_operations(struct list_head *list,
1138                                        struct pernet_operations *ops)
1139{
1140        struct net *net;
1141        int error;
1142        LIST_HEAD(net_exit_list);
1143
1144        list_add_tail(&ops->list, list);
1145        if (ops->init || (ops->id && ops->size)) {
1146                /* We held write locked pernet_ops_rwsem, and parallel
1147                 * setup_net() and cleanup_net() are not possible.
1148                 */
1149                for_each_net(net) {
1150                        error = ops_init(ops, net);
1151                        if (error)
1152                                goto out_undo;
1153                        list_add_tail(&net->exit_list, &net_exit_list);
1154                }
1155        }
1156        return 0;
1157
1158out_undo:
1159        /* If I have an error cleanup all namespaces I initialized */
1160        list_del(&ops->list);
1161        free_exit_list(ops, &net_exit_list);
1162        return error;
1163}
1164
1165static void __unregister_pernet_operations(struct pernet_operations *ops)
1166{
1167        struct net *net;
1168        LIST_HEAD(net_exit_list);
1169
1170        list_del(&ops->list);
1171        /* See comment in __register_pernet_operations() */
1172        for_each_net(net)
1173                list_add_tail(&net->exit_list, &net_exit_list);
1174
1175        free_exit_list(ops, &net_exit_list);
1176}
1177
1178#else
1179
1180static int __register_pernet_operations(struct list_head *list,
1181                                        struct pernet_operations *ops)
1182{
1183        if (!init_net_initialized) {
1184                list_add_tail(&ops->list, list);
1185                return 0;
1186        }
1187
1188        return ops_init(ops, &init_net);
1189}
1190
1191static void __unregister_pernet_operations(struct pernet_operations *ops)
1192{
1193        if (!init_net_initialized) {
1194                list_del(&ops->list);
1195        } else {
1196                LIST_HEAD(net_exit_list);
1197                list_add(&init_net.exit_list, &net_exit_list);
1198                free_exit_list(ops, &net_exit_list);
1199        }
1200}
1201
1202#endif /* CONFIG_NET_NS */
1203
1204static DEFINE_IDA(net_generic_ids);
1205
1206static int register_pernet_operations(struct list_head *list,
1207                                      struct pernet_operations *ops)
1208{
1209        int error;
1210
1211        if (ops->id) {
1212                error = ida_alloc_min(&net_generic_ids, MIN_PERNET_OPS_ID,
1213                                GFP_KERNEL);
1214                if (error < 0)
1215                        return error;
1216                *ops->id = error;
1217                max_gen_ptrs = max(max_gen_ptrs, *ops->id + 1);
1218        }
1219        error = __register_pernet_operations(list, ops);
1220        if (error) {
1221                rcu_barrier();
1222                if (ops->id)
1223                        ida_free(&net_generic_ids, *ops->id);
1224        }
1225
1226        return error;
1227}
1228
1229static void unregister_pernet_operations(struct pernet_operations *ops)
1230{
1231        __unregister_pernet_operations(ops);
1232        rcu_barrier();
1233        if (ops->id)
1234                ida_free(&net_generic_ids, *ops->id);
1235}
1236
1237/**
1238 *      register_pernet_subsys - register a network namespace subsystem
1239 *      @ops:  pernet operations structure for the subsystem
1240 *
1241 *      Register a subsystem which has init and exit functions
1242 *      that are called when network namespaces are created and
1243 *      destroyed respectively.
1244 *
1245 *      When registered all network namespace init functions are
1246 *      called for every existing network namespace.  Allowing kernel
1247 *      modules to have a race free view of the set of network namespaces.
1248 *
1249 *      When a new network namespace is created all of the init
1250 *      methods are called in the order in which they were registered.
1251 *
1252 *      When a network namespace is destroyed all of the exit methods
1253 *      are called in the reverse of the order with which they were
1254 *      registered.
1255 */
1256int register_pernet_subsys(struct pernet_operations *ops)
1257{
1258        int error;
1259        down_write(&pernet_ops_rwsem);
1260        error =  register_pernet_operations(first_device, ops);
1261        up_write(&pernet_ops_rwsem);
1262        return error;
1263}
1264EXPORT_SYMBOL_GPL(register_pernet_subsys);
1265
1266/**
1267 *      unregister_pernet_subsys - unregister a network namespace subsystem
1268 *      @ops: pernet operations structure to manipulate
1269 *
1270 *      Remove the pernet operations structure from the list to be
1271 *      used when network namespaces are created or destroyed.  In
1272 *      addition run the exit method for all existing network
1273 *      namespaces.
1274 */
1275void unregister_pernet_subsys(struct pernet_operations *ops)
1276{
1277        down_write(&pernet_ops_rwsem);
1278        unregister_pernet_operations(ops);
1279        up_write(&pernet_ops_rwsem);
1280}
1281EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
1282
1283/**
1284 *      register_pernet_device - register a network namespace device
1285 *      @ops:  pernet operations structure for the subsystem
1286 *
1287 *      Register a device which has init and exit functions
1288 *      that are called when network namespaces are created and
1289 *      destroyed respectively.
1290 *
1291 *      When registered all network namespace init functions are
1292 *      called for every existing network namespace.  Allowing kernel
1293 *      modules to have a race free view of the set of network namespaces.
1294 *
1295 *      When a new network namespace is created all of the init
1296 *      methods are called in the order in which they were registered.
1297 *
1298 *      When a network namespace is destroyed all of the exit methods
1299 *      are called in the reverse of the order with which they were
1300 *      registered.
1301 */
1302int register_pernet_device(struct pernet_operations *ops)
1303{
1304        int error;
1305        down_write(&pernet_ops_rwsem);
1306        error = register_pernet_operations(&pernet_list, ops);
1307        if (!error && (first_device == &pernet_list))
1308                first_device = &ops->list;
1309        up_write(&pernet_ops_rwsem);
1310        return error;
1311}
1312EXPORT_SYMBOL_GPL(register_pernet_device);
1313
1314/**
1315 *      unregister_pernet_device - unregister a network namespace netdevice
1316 *      @ops: pernet operations structure to manipulate
1317 *
1318 *      Remove the pernet operations structure from the list to be
1319 *      used when network namespaces are created or destroyed.  In
1320 *      addition run the exit method for all existing network
1321 *      namespaces.
1322 */
1323void unregister_pernet_device(struct pernet_operations *ops)
1324{
1325        down_write(&pernet_ops_rwsem);
1326        if (&ops->list == first_device)
1327                first_device = first_device->next;
1328        unregister_pernet_operations(ops);
1329        up_write(&pernet_ops_rwsem);
1330}
1331EXPORT_SYMBOL_GPL(unregister_pernet_device);
1332
1333#ifdef CONFIG_NET_NS
1334static struct ns_common *netns_get(struct task_struct *task)
1335{
1336        struct net *net = NULL;
1337        struct nsproxy *nsproxy;
1338
1339        task_lock(task);
1340        nsproxy = task->nsproxy;
1341        if (nsproxy)
1342                net = get_net(nsproxy->net_ns);
1343        task_unlock(task);
1344
1345        return net ? &net->ns : NULL;
1346}
1347
1348static inline struct net *to_net_ns(struct ns_common *ns)
1349{
1350        return container_of(ns, struct net, ns);
1351}
1352
1353static void netns_put(struct ns_common *ns)
1354{
1355        put_net(to_net_ns(ns));
1356}
1357
1358static int netns_install(struct nsset *nsset, struct ns_common *ns)
1359{
1360        struct nsproxy *nsproxy = nsset->nsproxy;
1361        struct net *net = to_net_ns(ns);
1362
1363        if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) ||
1364            !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
1365                return -EPERM;
1366
1367        put_net(nsproxy->net_ns);
1368        nsproxy->net_ns = get_net(net);
1369        return 0;
1370}
1371
1372static struct user_namespace *netns_owner(struct ns_common *ns)
1373{
1374        return to_net_ns(ns)->user_ns;
1375}
1376
1377const struct proc_ns_operations netns_operations = {
1378        .name           = "net",
1379        .type           = CLONE_NEWNET,
1380        .get            = netns_get,
1381        .put            = netns_put,
1382        .install        = netns_install,
1383        .owner          = netns_owner,
1384};
1385#endif
1386