linux/net/core/net_namespace.c
<<
>>
Prefs
   1#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   2
   3#include <linux/workqueue.h>
   4#include <linux/rtnetlink.h>
   5#include <linux/cache.h>
   6#include <linux/slab.h>
   7#include <linux/list.h>
   8#include <linux/delay.h>
   9#include <linux/sched.h>
  10#include <linux/idr.h>
  11#include <linux/rculist.h>
  12#include <linux/nsproxy.h>
  13#include <linux/fs.h>
  14#include <linux/proc_ns.h>
  15#include <linux/file.h>
  16#include <linux/export.h>
  17#include <linux/user_namespace.h>
  18#include <net/net_namespace.h>
  19#include <net/netns/generic.h>
  20
  21/*
  22 *      Our network namespace constructor/destructor lists
  23 */
  24
  25static LIST_HEAD(pernet_list);
  26static struct list_head *first_device = &pernet_list;
  27static DEFINE_MUTEX(net_mutex);
  28
  29LIST_HEAD(net_namespace_list);
  30EXPORT_SYMBOL_GPL(net_namespace_list);
  31
  32struct net init_net = {
  33        .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
  34};
  35EXPORT_SYMBOL(init_net);
  36
  37#define INITIAL_NET_GEN_PTRS    13 /* +1 for len +2 for rcu_head */
  38
  39static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
  40
  41static struct net_generic *net_alloc_generic(void)
  42{
  43        struct net_generic *ng;
  44        size_t generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);
  45
  46        ng = kzalloc(generic_size, GFP_KERNEL);
  47        if (ng)
  48                ng->len = max_gen_ptrs;
  49
  50        return ng;
  51}
  52
  53static int net_assign_generic(struct net *net, int id, void *data)
  54{
  55        struct net_generic *ng, *old_ng;
  56
  57        BUG_ON(!mutex_is_locked(&net_mutex));
  58        BUG_ON(id == 0);
  59
  60        old_ng = rcu_dereference_protected(net->gen,
  61                                           lockdep_is_held(&net_mutex));
  62        ng = old_ng;
  63        if (old_ng->len >= id)
  64                goto assign;
  65
  66        ng = net_alloc_generic();
  67        if (ng == NULL)
  68                return -ENOMEM;
  69
  70        /*
  71         * Some synchronisation notes:
  72         *
  73         * The net_generic explores the net->gen array inside rcu
  74         * read section. Besides once set the net->gen->ptr[x]
  75         * pointer never changes (see rules in netns/generic.h).
  76         *
  77         * That said, we simply duplicate this array and schedule
  78         * the old copy for kfree after a grace period.
  79         */
  80
  81        memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
  82
  83        rcu_assign_pointer(net->gen, ng);
  84        kfree_rcu(old_ng, rcu);
  85assign:
  86        ng->ptr[id - 1] = data;
  87        return 0;
  88}
  89
  90static int ops_init(const struct pernet_operations *ops, struct net *net)
  91{
  92        int err = -ENOMEM;
  93        void *data = NULL;
  94
  95        if (ops->id && ops->size) {
  96                data = kzalloc(ops->size, GFP_KERNEL);
  97                if (!data)
  98                        goto out;
  99
 100                err = net_assign_generic(net, *ops->id, data);
 101                if (err)
 102                        goto cleanup;
 103        }
 104        err = 0;
 105        if (ops->init)
 106                err = ops->init(net);
 107        if (!err)
 108                return 0;
 109
 110cleanup:
 111        kfree(data);
 112
 113out:
 114        return err;
 115}
 116
 117static void ops_free(const struct pernet_operations *ops, struct net *net)
 118{
 119        if (ops->id && ops->size) {
 120                int id = *ops->id;
 121                kfree(net_generic(net, id));
 122        }
 123}
 124
 125static void ops_exit_list(const struct pernet_operations *ops,
 126                          struct list_head *net_exit_list)
 127{
 128        struct net *net;
 129        if (ops->exit) {
 130                list_for_each_entry(net, net_exit_list, exit_list)
 131                        ops->exit(net);
 132        }
 133        if (ops->exit_batch)
 134                ops->exit_batch(net_exit_list);
 135}
 136
 137static void ops_free_list(const struct pernet_operations *ops,
 138                          struct list_head *net_exit_list)
 139{
 140        struct net *net;
 141        if (ops->size && ops->id) {
 142                list_for_each_entry(net, net_exit_list, exit_list)
 143                        ops_free(ops, net);
 144        }
 145}
 146
 147/*
 148 * setup_net runs the initializers for the network namespace object.
 149 */
 150static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
 151{
 152        /* Must be called with net_mutex held */
 153        const struct pernet_operations *ops, *saved_ops;
 154        int error = 0;
 155        LIST_HEAD(net_exit_list);
 156
 157        atomic_set(&net->count, 1);
 158        atomic_set(&net->passive, 1);
 159        net->dev_base_seq = 1;
 160        net->user_ns = user_ns;
 161
 162#ifdef NETNS_REFCNT_DEBUG
 163        atomic_set(&net->use_count, 0);
 164#endif
 165
 166        list_for_each_entry(ops, &pernet_list, list) {
 167                error = ops_init(ops, net);
 168                if (error < 0)
 169                        goto out_undo;
 170        }
 171out:
 172        return error;
 173
 174out_undo:
 175        /* Walk through the list backwards calling the exit functions
 176         * for the pernet modules whose init functions did not fail.
 177         */
 178        list_add(&net->exit_list, &net_exit_list);
 179        saved_ops = ops;
 180        list_for_each_entry_continue_reverse(ops, &pernet_list, list)
 181                ops_exit_list(ops, &net_exit_list);
 182
 183        ops = saved_ops;
 184        list_for_each_entry_continue_reverse(ops, &pernet_list, list)
 185                ops_free_list(ops, &net_exit_list);
 186
 187        rcu_barrier();
 188        goto out;
 189}
 190
 191
 192#ifdef CONFIG_NET_NS
 193static struct kmem_cache *net_cachep;
 194static struct workqueue_struct *netns_wq;
 195
 196static struct net *net_alloc(void)
 197{
 198        struct net *net = NULL;
 199        struct net_generic *ng;
 200
 201        ng = net_alloc_generic();
 202        if (!ng)
 203                goto out;
 204
 205        net = kmem_cache_zalloc(net_cachep, GFP_KERNEL);
 206        if (!net)
 207                goto out_free;
 208
 209        rcu_assign_pointer(net->gen, ng);
 210out:
 211        return net;
 212
 213out_free:
 214        kfree(ng);
 215        goto out;
 216}
 217
 218static void net_free(struct net *net)
 219{
 220#ifdef NETNS_REFCNT_DEBUG
 221        if (unlikely(atomic_read(&net->use_count) != 0)) {
 222                pr_emerg("network namespace not free! Usage: %d\n",
 223                         atomic_read(&net->use_count));
 224                return;
 225        }
 226#endif
 227        kfree(net->gen);
 228        kmem_cache_free(net_cachep, net);
 229}
 230
 231void net_drop_ns(void *p)
 232{
 233        struct net *ns = p;
 234        if (ns && atomic_dec_and_test(&ns->passive))
 235                net_free(ns);
 236}
 237
 238struct net *copy_net_ns(unsigned long flags,
 239                        struct user_namespace *user_ns, struct net *old_net)
 240{
 241        struct net *net;
 242        int rv;
 243
 244        if (!(flags & CLONE_NEWNET))
 245                return get_net(old_net);
 246
 247        net = net_alloc();
 248        if (!net)
 249                return ERR_PTR(-ENOMEM);
 250
 251        get_user_ns(user_ns);
 252
 253        mutex_lock(&net_mutex);
 254        rv = setup_net(net, user_ns);
 255        if (rv == 0) {
 256                rtnl_lock();
 257                list_add_tail_rcu(&net->list, &net_namespace_list);
 258                rtnl_unlock();
 259        }
 260        mutex_unlock(&net_mutex);
 261        if (rv < 0) {
 262                put_user_ns(user_ns);
 263                net_drop_ns(net);
 264                return ERR_PTR(rv);
 265        }
 266        return net;
 267}
 268
 269static DEFINE_SPINLOCK(cleanup_list_lock);
 270static LIST_HEAD(cleanup_list);  /* Must hold cleanup_list_lock to touch */
 271
 272static void cleanup_net(struct work_struct *work)
 273{
 274        const struct pernet_operations *ops;
 275        struct net *net, *tmp;
 276        LIST_HEAD(net_kill_list);
 277        LIST_HEAD(net_exit_list);
 278
 279        /* Atomically snapshot the list of namespaces to cleanup */
 280        spin_lock_irq(&cleanup_list_lock);
 281        list_replace_init(&cleanup_list, &net_kill_list);
 282        spin_unlock_irq(&cleanup_list_lock);
 283
 284        mutex_lock(&net_mutex);
 285
 286        /* Don't let anyone else find us. */
 287        rtnl_lock();
 288        list_for_each_entry(net, &net_kill_list, cleanup_list) {
 289                list_del_rcu(&net->list);
 290                list_add_tail(&net->exit_list, &net_exit_list);
 291        }
 292        rtnl_unlock();
 293
 294        /*
 295         * Another CPU might be rcu-iterating the list, wait for it.
 296         * This needs to be before calling the exit() notifiers, so
 297         * the rcu_barrier() below isn't sufficient alone.
 298         */
 299        synchronize_rcu();
 300
 301        /* Run all of the network namespace exit methods */
 302        list_for_each_entry_reverse(ops, &pernet_list, list)
 303                ops_exit_list(ops, &net_exit_list);
 304
 305        /* Free the net generic variables */
 306        list_for_each_entry_reverse(ops, &pernet_list, list)
 307                ops_free_list(ops, &net_exit_list);
 308
 309        mutex_unlock(&net_mutex);
 310
 311        /* Ensure there are no outstanding rcu callbacks using this
 312         * network namespace.
 313         */
 314        rcu_barrier();
 315
 316        /* Finally it is safe to free my network namespace structure */
 317        list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
 318                list_del_init(&net->exit_list);
 319                put_user_ns(net->user_ns);
 320                net_drop_ns(net);
 321        }
 322}
 323static DECLARE_WORK(net_cleanup_work, cleanup_net);
 324
 325void __put_net(struct net *net)
 326{
 327        /* Cleanup the network namespace in process context */
 328        unsigned long flags;
 329
 330        spin_lock_irqsave(&cleanup_list_lock, flags);
 331        list_add(&net->cleanup_list, &cleanup_list);
 332        spin_unlock_irqrestore(&cleanup_list_lock, flags);
 333
 334        queue_work(netns_wq, &net_cleanup_work);
 335}
 336EXPORT_SYMBOL_GPL(__put_net);
 337
 338struct net *get_net_ns_by_fd(int fd)
 339{
 340        struct proc_ns *ei;
 341        struct file *file;
 342        struct net *net;
 343
 344        file = proc_ns_fget(fd);
 345        if (IS_ERR(file))
 346                return ERR_CAST(file);
 347
 348        ei = get_proc_ns(file_inode(file));
 349        if (ei->ns_ops == &netns_operations)
 350                net = get_net(ei->ns);
 351        else
 352                net = ERR_PTR(-EINVAL);
 353
 354        fput(file);
 355        return net;
 356}
 357
 358#else
 359struct net *get_net_ns_by_fd(int fd)
 360{
 361        return ERR_PTR(-EINVAL);
 362}
 363#endif
 364
 365struct net *get_net_ns_by_pid(pid_t pid)
 366{
 367        struct task_struct *tsk;
 368        struct net *net;
 369
 370        /* Lookup the network namespace */
 371        net = ERR_PTR(-ESRCH);
 372        rcu_read_lock();
 373        tsk = find_task_by_vpid(pid);
 374        if (tsk) {
 375                struct nsproxy *nsproxy;
 376                nsproxy = task_nsproxy(tsk);
 377                if (nsproxy)
 378                        net = get_net(nsproxy->net_ns);
 379        }
 380        rcu_read_unlock();
 381        return net;
 382}
 383EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
 384
 385static __net_init int net_ns_net_init(struct net *net)
 386{
 387        return proc_alloc_inum(&net->proc_inum);
 388}
 389
 390static __net_exit void net_ns_net_exit(struct net *net)
 391{
 392        proc_free_inum(net->proc_inum);
 393}
 394
 395static struct pernet_operations __net_initdata net_ns_ops = {
 396        .init = net_ns_net_init,
 397        .exit = net_ns_net_exit,
 398};
 399
 400static int __init net_ns_init(void)
 401{
 402        struct net_generic *ng;
 403
 404#ifdef CONFIG_NET_NS
 405        net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
 406                                        SMP_CACHE_BYTES,
 407                                        SLAB_PANIC, NULL);
 408
 409        /* Create workqueue for cleanup */
 410        netns_wq = create_singlethread_workqueue("netns");
 411        if (!netns_wq)
 412                panic("Could not create netns workq");
 413#endif
 414
 415        ng = net_alloc_generic();
 416        if (!ng)
 417                panic("Could not allocate generic netns");
 418
 419        rcu_assign_pointer(init_net.gen, ng);
 420
 421        mutex_lock(&net_mutex);
 422        if (setup_net(&init_net, &init_user_ns))
 423                panic("Could not setup the initial network namespace");
 424
 425        rtnl_lock();
 426        list_add_tail_rcu(&init_net.list, &net_namespace_list);
 427        rtnl_unlock();
 428
 429        mutex_unlock(&net_mutex);
 430
 431        register_pernet_subsys(&net_ns_ops);
 432
 433        return 0;
 434}
 435
 436pure_initcall(net_ns_init);
 437
 438#ifdef CONFIG_NET_NS
 439static int __register_pernet_operations(struct list_head *list,
 440                                        struct pernet_operations *ops)
 441{
 442        struct net *net;
 443        int error;
 444        LIST_HEAD(net_exit_list);
 445
 446        list_add_tail(&ops->list, list);
 447        if (ops->init || (ops->id && ops->size)) {
 448                for_each_net(net) {
 449                        error = ops_init(ops, net);
 450                        if (error)
 451                                goto out_undo;
 452                        list_add_tail(&net->exit_list, &net_exit_list);
 453                }
 454        }
 455        return 0;
 456
 457out_undo:
 458        /* If I have an error cleanup all namespaces I initialized */
 459        list_del(&ops->list);
 460        ops_exit_list(ops, &net_exit_list);
 461        ops_free_list(ops, &net_exit_list);
 462        return error;
 463}
 464
 465static void __unregister_pernet_operations(struct pernet_operations *ops)
 466{
 467        struct net *net;
 468        LIST_HEAD(net_exit_list);
 469
 470        list_del(&ops->list);
 471        for_each_net(net)
 472                list_add_tail(&net->exit_list, &net_exit_list);
 473        ops_exit_list(ops, &net_exit_list);
 474        ops_free_list(ops, &net_exit_list);
 475}
 476
 477#else
 478
 479static int __register_pernet_operations(struct list_head *list,
 480                                        struct pernet_operations *ops)
 481{
 482        return ops_init(ops, &init_net);
 483}
 484
 485static void __unregister_pernet_operations(struct pernet_operations *ops)
 486{
 487        LIST_HEAD(net_exit_list);
 488        list_add(&init_net.exit_list, &net_exit_list);
 489        ops_exit_list(ops, &net_exit_list);
 490        ops_free_list(ops, &net_exit_list);
 491}
 492
 493#endif /* CONFIG_NET_NS */
 494
 495static DEFINE_IDA(net_generic_ids);
 496
 497static int register_pernet_operations(struct list_head *list,
 498                                      struct pernet_operations *ops)
 499{
 500        int error;
 501
 502        if (ops->id) {
 503again:
 504                error = ida_get_new_above(&net_generic_ids, 1, ops->id);
 505                if (error < 0) {
 506                        if (error == -EAGAIN) {
 507                                ida_pre_get(&net_generic_ids, GFP_KERNEL);
 508                                goto again;
 509                        }
 510                        return error;
 511                }
 512                max_gen_ptrs = max_t(unsigned int, max_gen_ptrs, *ops->id);
 513        }
 514        error = __register_pernet_operations(list, ops);
 515        if (error) {
 516                rcu_barrier();
 517                if (ops->id)
 518                        ida_remove(&net_generic_ids, *ops->id);
 519        }
 520
 521        return error;
 522}
 523
 524static void unregister_pernet_operations(struct pernet_operations *ops)
 525{
 526        
 527        __unregister_pernet_operations(ops);
 528        rcu_barrier();
 529        if (ops->id)
 530                ida_remove(&net_generic_ids, *ops->id);
 531}
 532
 533/**
 534 *      register_pernet_subsys - register a network namespace subsystem
 535 *      @ops:  pernet operations structure for the subsystem
 536 *
 537 *      Register a subsystem which has init and exit functions
 538 *      that are called when network namespaces are created and
 539 *      destroyed respectively.
 540 *
 541 *      When registered all network namespace init functions are
 542 *      called for every existing network namespace.  Allowing kernel
 543 *      modules to have a race free view of the set of network namespaces.
 544 *
 545 *      When a new network namespace is created all of the init
 546 *      methods are called in the order in which they were registered.
 547 *
 548 *      When a network namespace is destroyed all of the exit methods
 549 *      are called in the reverse of the order with which they were
 550 *      registered.
 551 */
 552int register_pernet_subsys(struct pernet_operations *ops)
 553{
 554        int error;
 555        mutex_lock(&net_mutex);
 556        error =  register_pernet_operations(first_device, ops);
 557        mutex_unlock(&net_mutex);
 558        return error;
 559}
 560EXPORT_SYMBOL_GPL(register_pernet_subsys);
 561
 562/**
 563 *      unregister_pernet_subsys - unregister a network namespace subsystem
 564 *      @ops: pernet operations structure to manipulate
 565 *
 566 *      Remove the pernet operations structure from the list to be
 567 *      used when network namespaces are created or destroyed.  In
 568 *      addition run the exit method for all existing network
 569 *      namespaces.
 570 */
 571void unregister_pernet_subsys(struct pernet_operations *ops)
 572{
 573        mutex_lock(&net_mutex);
 574        unregister_pernet_operations(ops);
 575        mutex_unlock(&net_mutex);
 576}
 577EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
 578
 579/**
 580 *      register_pernet_device - register a network namespace device
 581 *      @ops:  pernet operations structure for the subsystem
 582 *
 583 *      Register a device which has init and exit functions
 584 *      that are called when network namespaces are created and
 585 *      destroyed respectively.
 586 *
 587 *      When registered all network namespace init functions are
 588 *      called for every existing network namespace.  Allowing kernel
 589 *      modules to have a race free view of the set of network namespaces.
 590 *
 591 *      When a new network namespace is created all of the init
 592 *      methods are called in the order in which they were registered.
 593 *
 594 *      When a network namespace is destroyed all of the exit methods
 595 *      are called in the reverse of the order with which they were
 596 *      registered.
 597 */
 598int register_pernet_device(struct pernet_operations *ops)
 599{
 600        int error;
 601        mutex_lock(&net_mutex);
 602        error = register_pernet_operations(&pernet_list, ops);
 603        if (!error && (first_device == &pernet_list))
 604                first_device = &ops->list;
 605        mutex_unlock(&net_mutex);
 606        return error;
 607}
 608EXPORT_SYMBOL_GPL(register_pernet_device);
 609
 610/**
 611 *      unregister_pernet_device - unregister a network namespace netdevice
 612 *      @ops: pernet operations structure to manipulate
 613 *
 614 *      Remove the pernet operations structure from the list to be
 615 *      used when network namespaces are created or destroyed.  In
 616 *      addition run the exit method for all existing network
 617 *      namespaces.
 618 */
 619void unregister_pernet_device(struct pernet_operations *ops)
 620{
 621        mutex_lock(&net_mutex);
 622        if (&ops->list == first_device)
 623                first_device = first_device->next;
 624        unregister_pernet_operations(ops);
 625        mutex_unlock(&net_mutex);
 626}
 627EXPORT_SYMBOL_GPL(unregister_pernet_device);
 628
 629#ifdef CONFIG_NET_NS
 630static void *netns_get(struct task_struct *task)
 631{
 632        struct net *net = NULL;
 633        struct nsproxy *nsproxy;
 634
 635        rcu_read_lock();
 636        nsproxy = task_nsproxy(task);
 637        if (nsproxy)
 638                net = get_net(nsproxy->net_ns);
 639        rcu_read_unlock();
 640
 641        return net;
 642}
 643
 644static void netns_put(void *ns)
 645{
 646        put_net(ns);
 647}
 648
 649static int netns_install(struct nsproxy *nsproxy, void *ns)
 650{
 651        struct net *net = ns;
 652
 653        if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) ||
 654            !nsown_capable(CAP_SYS_ADMIN))
 655                return -EPERM;
 656
 657        put_net(nsproxy->net_ns);
 658        nsproxy->net_ns = get_net(net);
 659        return 0;
 660}
 661
 662static unsigned int netns_inum(void *ns)
 663{
 664        struct net *net = ns;
 665        return net->proc_inum;
 666}
 667
 668const struct proc_ns_operations netns_operations = {
 669        .name           = "net",
 670        .type           = CLONE_NEWNET,
 671        .get            = netns_get,
 672        .put            = netns_put,
 673        .install        = netns_install,
 674        .inum           = netns_inum,
 675};
 676#endif
 677