linux/net/core/net_namespace.c
<<
>>
Prefs
   1#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   2
   3#include <linux/workqueue.h>
   4#include <linux/rtnetlink.h>
   5#include <linux/cache.h>
   6#include <linux/slab.h>
   7#include <linux/list.h>
   8#include <linux/delay.h>
   9#include <linux/sched.h>
  10#include <linux/idr.h>
  11#include <linux/rculist.h>
  12#include <linux/nsproxy.h>
  13#include <linux/proc_fs.h>
  14#include <linux/file.h>
  15#include <linux/export.h>
  16#include <net/net_namespace.h>
  17#include <net/netns/generic.h>
  18
  19/*
  20 *      Our network namespace constructor/destructor lists
  21 */
  22
  23static LIST_HEAD(pernet_list);
  24static struct list_head *first_device = &pernet_list;
  25static DEFINE_MUTEX(net_mutex);
  26
  27LIST_HEAD(net_namespace_list);
  28EXPORT_SYMBOL_GPL(net_namespace_list);
  29
  30struct net init_net = {
  31        .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
  32};
  33EXPORT_SYMBOL(init_net);
  34
  35#define INITIAL_NET_GEN_PTRS    13 /* +1 for len +2 for rcu_head */
  36
  37static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
  38
  39static struct net_generic *net_alloc_generic(void)
  40{
  41        struct net_generic *ng;
  42        size_t generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);
  43
  44        ng = kzalloc(generic_size, GFP_KERNEL);
  45        if (ng)
  46                ng->len = max_gen_ptrs;
  47
  48        return ng;
  49}
  50
  51static int net_assign_generic(struct net *net, int id, void *data)
  52{
  53        struct net_generic *ng, *old_ng;
  54
  55        BUG_ON(!mutex_is_locked(&net_mutex));
  56        BUG_ON(id == 0);
  57
  58        old_ng = rcu_dereference_protected(net->gen,
  59                                           lockdep_is_held(&net_mutex));
  60        ng = old_ng;
  61        if (old_ng->len >= id)
  62                goto assign;
  63
  64        ng = net_alloc_generic();
  65        if (ng == NULL)
  66                return -ENOMEM;
  67
  68        /*
  69         * Some synchronisation notes:
  70         *
  71         * The net_generic explores the net->gen array inside rcu
  72         * read section. Besides once set the net->gen->ptr[x]
  73         * pointer never changes (see rules in netns/generic.h).
  74         *
  75         * That said, we simply duplicate this array and schedule
  76         * the old copy for kfree after a grace period.
  77         */
  78
  79        memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
  80
  81        rcu_assign_pointer(net->gen, ng);
  82        kfree_rcu(old_ng, rcu);
  83assign:
  84        ng->ptr[id - 1] = data;
  85        return 0;
  86}
  87
  88static int ops_init(const struct pernet_operations *ops, struct net *net)
  89{
  90        int err = -ENOMEM;
  91        void *data = NULL;
  92
  93        if (ops->id && ops->size) {
  94                data = kzalloc(ops->size, GFP_KERNEL);
  95                if (!data)
  96                        goto out;
  97
  98                err = net_assign_generic(net, *ops->id, data);
  99                if (err)
 100                        goto cleanup;
 101        }
 102        err = 0;
 103        if (ops->init)
 104                err = ops->init(net);
 105        if (!err)
 106                return 0;
 107
 108cleanup:
 109        kfree(data);
 110
 111out:
 112        return err;
 113}
 114
 115static void ops_free(const struct pernet_operations *ops, struct net *net)
 116{
 117        if (ops->id && ops->size) {
 118                int id = *ops->id;
 119                kfree(net_generic(net, id));
 120        }
 121}
 122
 123static void ops_exit_list(const struct pernet_operations *ops,
 124                          struct list_head *net_exit_list)
 125{
 126        struct net *net;
 127        if (ops->exit) {
 128                list_for_each_entry(net, net_exit_list, exit_list)
 129                        ops->exit(net);
 130        }
 131        if (ops->exit_batch)
 132                ops->exit_batch(net_exit_list);
 133}
 134
 135static void ops_free_list(const struct pernet_operations *ops,
 136                          struct list_head *net_exit_list)
 137{
 138        struct net *net;
 139        if (ops->size && ops->id) {
 140                list_for_each_entry(net, net_exit_list, exit_list)
 141                        ops_free(ops, net);
 142        }
 143}
 144
 145/*
 146 * setup_net runs the initializers for the network namespace object.
 147 */
 148static __net_init int setup_net(struct net *net)
 149{
 150        /* Must be called with net_mutex held */
 151        const struct pernet_operations *ops, *saved_ops;
 152        int error = 0;
 153        LIST_HEAD(net_exit_list);
 154
 155        atomic_set(&net->count, 1);
 156        atomic_set(&net->passive, 1);
 157        net->dev_base_seq = 1;
 158
 159#ifdef NETNS_REFCNT_DEBUG
 160        atomic_set(&net->use_count, 0);
 161#endif
 162
 163        list_for_each_entry(ops, &pernet_list, list) {
 164                error = ops_init(ops, net);
 165                if (error < 0)
 166                        goto out_undo;
 167        }
 168out:
 169        return error;
 170
 171out_undo:
 172        /* Walk through the list backwards calling the exit functions
 173         * for the pernet modules whose init functions did not fail.
 174         */
 175        list_add(&net->exit_list, &net_exit_list);
 176        saved_ops = ops;
 177        list_for_each_entry_continue_reverse(ops, &pernet_list, list)
 178                ops_exit_list(ops, &net_exit_list);
 179
 180        ops = saved_ops;
 181        list_for_each_entry_continue_reverse(ops, &pernet_list, list)
 182                ops_free_list(ops, &net_exit_list);
 183
 184        rcu_barrier();
 185        goto out;
 186}
 187
 188
 189#ifdef CONFIG_NET_NS
 190static struct kmem_cache *net_cachep;
 191static struct workqueue_struct *netns_wq;
 192
 193static struct net *net_alloc(void)
 194{
 195        struct net *net = NULL;
 196        struct net_generic *ng;
 197
 198        ng = net_alloc_generic();
 199        if (!ng)
 200                goto out;
 201
 202        net = kmem_cache_zalloc(net_cachep, GFP_KERNEL);
 203        if (!net)
 204                goto out_free;
 205
 206        rcu_assign_pointer(net->gen, ng);
 207out:
 208        return net;
 209
 210out_free:
 211        kfree(ng);
 212        goto out;
 213}
 214
 215static void net_free(struct net *net)
 216{
 217#ifdef NETNS_REFCNT_DEBUG
 218        if (unlikely(atomic_read(&net->use_count) != 0)) {
 219                pr_emerg("network namespace not free! Usage: %d\n",
 220                         atomic_read(&net->use_count));
 221                return;
 222        }
 223#endif
 224        kfree(net->gen);
 225        kmem_cache_free(net_cachep, net);
 226}
 227
 228void net_drop_ns(void *p)
 229{
 230        struct net *ns = p;
 231        if (ns && atomic_dec_and_test(&ns->passive))
 232                net_free(ns);
 233}
 234
 235struct net *copy_net_ns(unsigned long flags, struct net *old_net)
 236{
 237        struct net *net;
 238        int rv;
 239
 240        if (!(flags & CLONE_NEWNET))
 241                return get_net(old_net);
 242
 243        net = net_alloc();
 244        if (!net)
 245                return ERR_PTR(-ENOMEM);
 246        mutex_lock(&net_mutex);
 247        rv = setup_net(net);
 248        if (rv == 0) {
 249                rtnl_lock();
 250                list_add_tail_rcu(&net->list, &net_namespace_list);
 251                rtnl_unlock();
 252        }
 253        mutex_unlock(&net_mutex);
 254        if (rv < 0) {
 255                net_drop_ns(net);
 256                return ERR_PTR(rv);
 257        }
 258        return net;
 259}
 260
 261static DEFINE_SPINLOCK(cleanup_list_lock);
 262static LIST_HEAD(cleanup_list);  /* Must hold cleanup_list_lock to touch */
 263
 264static void cleanup_net(struct work_struct *work)
 265{
 266        const struct pernet_operations *ops;
 267        struct net *net, *tmp;
 268        LIST_HEAD(net_kill_list);
 269        LIST_HEAD(net_exit_list);
 270
 271        /* Atomically snapshot the list of namespaces to cleanup */
 272        spin_lock_irq(&cleanup_list_lock);
 273        list_replace_init(&cleanup_list, &net_kill_list);
 274        spin_unlock_irq(&cleanup_list_lock);
 275
 276        mutex_lock(&net_mutex);
 277
 278        /* Don't let anyone else find us. */
 279        rtnl_lock();
 280        list_for_each_entry(net, &net_kill_list, cleanup_list) {
 281                list_del_rcu(&net->list);
 282                list_add_tail(&net->exit_list, &net_exit_list);
 283        }
 284        rtnl_unlock();
 285
 286        /*
 287         * Another CPU might be rcu-iterating the list, wait for it.
 288         * This needs to be before calling the exit() notifiers, so
 289         * the rcu_barrier() below isn't sufficient alone.
 290         */
 291        synchronize_rcu();
 292
 293        /* Run all of the network namespace exit methods */
 294        list_for_each_entry_reverse(ops, &pernet_list, list)
 295                ops_exit_list(ops, &net_exit_list);
 296
 297        /* Free the net generic variables */
 298        list_for_each_entry_reverse(ops, &pernet_list, list)
 299                ops_free_list(ops, &net_exit_list);
 300
 301        mutex_unlock(&net_mutex);
 302
 303        /* Ensure there are no outstanding rcu callbacks using this
 304         * network namespace.
 305         */
 306        rcu_barrier();
 307
 308        /* Finally it is safe to free my network namespace structure */
 309        list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
 310                list_del_init(&net->exit_list);
 311                net_drop_ns(net);
 312        }
 313}
 314static DECLARE_WORK(net_cleanup_work, cleanup_net);
 315
 316void __put_net(struct net *net)
 317{
 318        /* Cleanup the network namespace in process context */
 319        unsigned long flags;
 320
 321        spin_lock_irqsave(&cleanup_list_lock, flags);
 322        list_add(&net->cleanup_list, &cleanup_list);
 323        spin_unlock_irqrestore(&cleanup_list_lock, flags);
 324
 325        queue_work(netns_wq, &net_cleanup_work);
 326}
 327EXPORT_SYMBOL_GPL(__put_net);
 328
 329struct net *get_net_ns_by_fd(int fd)
 330{
 331        struct proc_inode *ei;
 332        struct file *file;
 333        struct net *net;
 334
 335        file = proc_ns_fget(fd);
 336        if (IS_ERR(file))
 337                return ERR_CAST(file);
 338
 339        ei = PROC_I(file->f_dentry->d_inode);
 340        if (ei->ns_ops == &netns_operations)
 341                net = get_net(ei->ns);
 342        else
 343                net = ERR_PTR(-EINVAL);
 344
 345        fput(file);
 346        return net;
 347}
 348
 349#else
 350struct net *copy_net_ns(unsigned long flags, struct net *old_net)
 351{
 352        if (flags & CLONE_NEWNET)
 353                return ERR_PTR(-EINVAL);
 354        return old_net;
 355}
 356
 357struct net *get_net_ns_by_fd(int fd)
 358{
 359        return ERR_PTR(-EINVAL);
 360}
 361#endif
 362
 363struct net *get_net_ns_by_pid(pid_t pid)
 364{
 365        struct task_struct *tsk;
 366        struct net *net;
 367
 368        /* Lookup the network namespace */
 369        net = ERR_PTR(-ESRCH);
 370        rcu_read_lock();
 371        tsk = find_task_by_vpid(pid);
 372        if (tsk) {
 373                struct nsproxy *nsproxy;
 374                nsproxy = task_nsproxy(tsk);
 375                if (nsproxy)
 376                        net = get_net(nsproxy->net_ns);
 377        }
 378        rcu_read_unlock();
 379        return net;
 380}
 381EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
 382
 383static int __init net_ns_init(void)
 384{
 385        struct net_generic *ng;
 386
 387#ifdef CONFIG_NET_NS
 388        net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
 389                                        SMP_CACHE_BYTES,
 390                                        SLAB_PANIC, NULL);
 391
 392        /* Create workqueue for cleanup */
 393        netns_wq = create_singlethread_workqueue("netns");
 394        if (!netns_wq)
 395                panic("Could not create netns workq");
 396#endif
 397
 398        ng = net_alloc_generic();
 399        if (!ng)
 400                panic("Could not allocate generic netns");
 401
 402        rcu_assign_pointer(init_net.gen, ng);
 403
 404        mutex_lock(&net_mutex);
 405        if (setup_net(&init_net))
 406                panic("Could not setup the initial network namespace");
 407
 408        rtnl_lock();
 409        list_add_tail_rcu(&init_net.list, &net_namespace_list);
 410        rtnl_unlock();
 411
 412        mutex_unlock(&net_mutex);
 413
 414        return 0;
 415}
 416
 417pure_initcall(net_ns_init);
 418
 419#ifdef CONFIG_NET_NS
 420static int __register_pernet_operations(struct list_head *list,
 421                                        struct pernet_operations *ops)
 422{
 423        struct net *net;
 424        int error;
 425        LIST_HEAD(net_exit_list);
 426
 427        list_add_tail(&ops->list, list);
 428        if (ops->init || (ops->id && ops->size)) {
 429                for_each_net(net) {
 430                        error = ops_init(ops, net);
 431                        if (error)
 432                                goto out_undo;
 433                        list_add_tail(&net->exit_list, &net_exit_list);
 434                }
 435        }
 436        return 0;
 437
 438out_undo:
 439        /* If I have an error cleanup all namespaces I initialized */
 440        list_del(&ops->list);
 441        ops_exit_list(ops, &net_exit_list);
 442        ops_free_list(ops, &net_exit_list);
 443        return error;
 444}
 445
 446static void __unregister_pernet_operations(struct pernet_operations *ops)
 447{
 448        struct net *net;
 449        LIST_HEAD(net_exit_list);
 450
 451        list_del(&ops->list);
 452        for_each_net(net)
 453                list_add_tail(&net->exit_list, &net_exit_list);
 454        ops_exit_list(ops, &net_exit_list);
 455        ops_free_list(ops, &net_exit_list);
 456}
 457
 458#else
 459
 460static int __register_pernet_operations(struct list_head *list,
 461                                        struct pernet_operations *ops)
 462{
 463        return ops_init(ops, &init_net);
 464}
 465
 466static void __unregister_pernet_operations(struct pernet_operations *ops)
 467{
 468        LIST_HEAD(net_exit_list);
 469        list_add(&init_net.exit_list, &net_exit_list);
 470        ops_exit_list(ops, &net_exit_list);
 471        ops_free_list(ops, &net_exit_list);
 472}
 473
 474#endif /* CONFIG_NET_NS */
 475
 476static DEFINE_IDA(net_generic_ids);
 477
 478static int register_pernet_operations(struct list_head *list,
 479                                      struct pernet_operations *ops)
 480{
 481        int error;
 482
 483        if (ops->id) {
 484again:
 485                error = ida_get_new_above(&net_generic_ids, 1, ops->id);
 486                if (error < 0) {
 487                        if (error == -EAGAIN) {
 488                                ida_pre_get(&net_generic_ids, GFP_KERNEL);
 489                                goto again;
 490                        }
 491                        return error;
 492                }
 493                max_gen_ptrs = max_t(unsigned int, max_gen_ptrs, *ops->id);
 494        }
 495        error = __register_pernet_operations(list, ops);
 496        if (error) {
 497                rcu_barrier();
 498                if (ops->id)
 499                        ida_remove(&net_generic_ids, *ops->id);
 500        }
 501
 502        return error;
 503}
 504
 505static void unregister_pernet_operations(struct pernet_operations *ops)
 506{
 507        
 508        __unregister_pernet_operations(ops);
 509        rcu_barrier();
 510        if (ops->id)
 511                ida_remove(&net_generic_ids, *ops->id);
 512}
 513
 514/**
 515 *      register_pernet_subsys - register a network namespace subsystem
 516 *      @ops:  pernet operations structure for the subsystem
 517 *
 518 *      Register a subsystem which has init and exit functions
 519 *      that are called when network namespaces are created and
 520 *      destroyed respectively.
 521 *
 522 *      When registered all network namespace init functions are
 523 *      called for every existing network namespace.  Allowing kernel
 524 *      modules to have a race free view of the set of network namespaces.
 525 *
 526 *      When a new network namespace is created all of the init
 527 *      methods are called in the order in which they were registered.
 528 *
 529 *      When a network namespace is destroyed all of the exit methods
 530 *      are called in the reverse of the order with which they were
 531 *      registered.
 532 */
 533int register_pernet_subsys(struct pernet_operations *ops)
 534{
 535        int error;
 536        mutex_lock(&net_mutex);
 537        error =  register_pernet_operations(first_device, ops);
 538        mutex_unlock(&net_mutex);
 539        return error;
 540}
 541EXPORT_SYMBOL_GPL(register_pernet_subsys);
 542
 543/**
 544 *      unregister_pernet_subsys - unregister a network namespace subsystem
 545 *      @ops: pernet operations structure to manipulate
 546 *
 547 *      Remove the pernet operations structure from the list to be
 548 *      used when network namespaces are created or destroyed.  In
 549 *      addition run the exit method for all existing network
 550 *      namespaces.
 551 */
 552void unregister_pernet_subsys(struct pernet_operations *ops)
 553{
 554        mutex_lock(&net_mutex);
 555        unregister_pernet_operations(ops);
 556        mutex_unlock(&net_mutex);
 557}
 558EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
 559
 560/**
 561 *      register_pernet_device - register a network namespace device
 562 *      @ops:  pernet operations structure for the subsystem
 563 *
 564 *      Register a device which has init and exit functions
 565 *      that are called when network namespaces are created and
 566 *      destroyed respectively.
 567 *
 568 *      When registered all network namespace init functions are
 569 *      called for every existing network namespace.  Allowing kernel
 570 *      modules to have a race free view of the set of network namespaces.
 571 *
 572 *      When a new network namespace is created all of the init
 573 *      methods are called in the order in which they were registered.
 574 *
 575 *      When a network namespace is destroyed all of the exit methods
 576 *      are called in the reverse of the order with which they were
 577 *      registered.
 578 */
 579int register_pernet_device(struct pernet_operations *ops)
 580{
 581        int error;
 582        mutex_lock(&net_mutex);
 583        error = register_pernet_operations(&pernet_list, ops);
 584        if (!error && (first_device == &pernet_list))
 585                first_device = &ops->list;
 586        mutex_unlock(&net_mutex);
 587        return error;
 588}
 589EXPORT_SYMBOL_GPL(register_pernet_device);
 590
 591/**
 592 *      unregister_pernet_device - unregister a network namespace netdevice
 593 *      @ops: pernet operations structure to manipulate
 594 *
 595 *      Remove the pernet operations structure from the list to be
 596 *      used when network namespaces are created or destroyed.  In
 597 *      addition run the exit method for all existing network
 598 *      namespaces.
 599 */
 600void unregister_pernet_device(struct pernet_operations *ops)
 601{
 602        mutex_lock(&net_mutex);
 603        if (&ops->list == first_device)
 604                first_device = first_device->next;
 605        unregister_pernet_operations(ops);
 606        mutex_unlock(&net_mutex);
 607}
 608EXPORT_SYMBOL_GPL(unregister_pernet_device);
 609
 610#ifdef CONFIG_NET_NS
 611static void *netns_get(struct task_struct *task)
 612{
 613        struct net *net = NULL;
 614        struct nsproxy *nsproxy;
 615
 616        rcu_read_lock();
 617        nsproxy = task_nsproxy(task);
 618        if (nsproxy)
 619                net = get_net(nsproxy->net_ns);
 620        rcu_read_unlock();
 621
 622        return net;
 623}
 624
 625static void netns_put(void *ns)
 626{
 627        put_net(ns);
 628}
 629
 630static int netns_install(struct nsproxy *nsproxy, void *ns)
 631{
 632        put_net(nsproxy->net_ns);
 633        nsproxy->net_ns = get_net(ns);
 634        return 0;
 635}
 636
 637const struct proc_ns_operations netns_operations = {
 638        .name           = "net",
 639        .type           = CLONE_NEWNET,
 640        .get            = netns_get,
 641        .put            = netns_put,
 642        .install        = netns_install,
 643};
 644#endif
 645