linux/net/core/net_namespace.c
<<
>>
Prefs
   1#include <linux/workqueue.h>
   2#include <linux/rtnetlink.h>
   3#include <linux/cache.h>
   4#include <linux/slab.h>
   5#include <linux/list.h>
   6#include <linux/delay.h>
   7#include <linux/sched.h>
   8#include <linux/idr.h>
   9#include <linux/rculist.h>
  10#include <linux/nsproxy.h>
  11#include <linux/proc_fs.h>
  12#include <linux/file.h>
  13#include <net/net_namespace.h>
  14#include <net/netns/generic.h>
  15
  16/*
  17 *      Our network namespace constructor/destructor lists
  18 */
  19
  20static LIST_HEAD(pernet_list);
  21static struct list_head *first_device = &pernet_list;
  22static DEFINE_MUTEX(net_mutex);
  23
  24LIST_HEAD(net_namespace_list);
  25EXPORT_SYMBOL_GPL(net_namespace_list);
  26
  27struct net init_net;
  28EXPORT_SYMBOL(init_net);
  29
  30#define INITIAL_NET_GEN_PTRS    13 /* +1 for len +2 for rcu_head */
  31
  32static int net_assign_generic(struct net *net, int id, void *data)
  33{
  34        struct net_generic *ng, *old_ng;
  35
  36        BUG_ON(!mutex_is_locked(&net_mutex));
  37        BUG_ON(id == 0);
  38
  39        old_ng = rcu_dereference_protected(net->gen,
  40                                           lockdep_is_held(&net_mutex));
  41        ng = old_ng;
  42        if (old_ng->len >= id)
  43                goto assign;
  44
  45        ng = kzalloc(sizeof(struct net_generic) +
  46                        id * sizeof(void *), GFP_KERNEL);
  47        if (ng == NULL)
  48                return -ENOMEM;
  49
  50        /*
  51         * Some synchronisation notes:
  52         *
  53         * The net_generic explores the net->gen array inside rcu
  54         * read section. Besides once set the net->gen->ptr[x]
  55         * pointer never changes (see rules in netns/generic.h).
  56         *
  57         * That said, we simply duplicate this array and schedule
  58         * the old copy for kfree after a grace period.
  59         */
  60
  61        ng->len = id;
  62        memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
  63
  64        rcu_assign_pointer(net->gen, ng);
  65        kfree_rcu(old_ng, rcu);
  66assign:
  67        ng->ptr[id - 1] = data;
  68        return 0;
  69}
  70
  71static int ops_init(const struct pernet_operations *ops, struct net *net)
  72{
  73        int err;
  74        if (ops->id && ops->size) {
  75                void *data = kzalloc(ops->size, GFP_KERNEL);
  76                if (!data)
  77                        return -ENOMEM;
  78
  79                err = net_assign_generic(net, *ops->id, data);
  80                if (err) {
  81                        kfree(data);
  82                        return err;
  83                }
  84        }
  85        if (ops->init)
  86                return ops->init(net);
  87        return 0;
  88}
  89
  90static void ops_free(const struct pernet_operations *ops, struct net *net)
  91{
  92        if (ops->id && ops->size) {
  93                int id = *ops->id;
  94                kfree(net_generic(net, id));
  95        }
  96}
  97
  98static void ops_exit_list(const struct pernet_operations *ops,
  99                          struct list_head *net_exit_list)
 100{
 101        struct net *net;
 102        if (ops->exit) {
 103                list_for_each_entry(net, net_exit_list, exit_list)
 104                        ops->exit(net);
 105        }
 106        if (ops->exit_batch)
 107                ops->exit_batch(net_exit_list);
 108}
 109
 110static void ops_free_list(const struct pernet_operations *ops,
 111                          struct list_head *net_exit_list)
 112{
 113        struct net *net;
 114        if (ops->size && ops->id) {
 115                list_for_each_entry(net, net_exit_list, exit_list)
 116                        ops_free(ops, net);
 117        }
 118}
 119
 120/*
 121 * setup_net runs the initializers for the network namespace object.
 122 */
 123static __net_init int setup_net(struct net *net)
 124{
 125        /* Must be called with net_mutex held */
 126        const struct pernet_operations *ops, *saved_ops;
 127        int error = 0;
 128        LIST_HEAD(net_exit_list);
 129
 130        atomic_set(&net->count, 1);
 131        atomic_set(&net->passive, 1);
 132
 133#ifdef NETNS_REFCNT_DEBUG
 134        atomic_set(&net->use_count, 0);
 135#endif
 136
 137        list_for_each_entry(ops, &pernet_list, list) {
 138                error = ops_init(ops, net);
 139                if (error < 0)
 140                        goto out_undo;
 141        }
 142out:
 143        return error;
 144
 145out_undo:
 146        /* Walk through the list backwards calling the exit functions
 147         * for the pernet modules whose init functions did not fail.
 148         */
 149        list_add(&net->exit_list, &net_exit_list);
 150        saved_ops = ops;
 151        list_for_each_entry_continue_reverse(ops, &pernet_list, list)
 152                ops_exit_list(ops, &net_exit_list);
 153
 154        ops = saved_ops;
 155        list_for_each_entry_continue_reverse(ops, &pernet_list, list)
 156                ops_free_list(ops, &net_exit_list);
 157
 158        rcu_barrier();
 159        goto out;
 160}
 161
 162static struct net_generic *net_alloc_generic(void)
 163{
 164        struct net_generic *ng;
 165        size_t generic_size = sizeof(struct net_generic) +
 166                INITIAL_NET_GEN_PTRS * sizeof(void *);
 167
 168        ng = kzalloc(generic_size, GFP_KERNEL);
 169        if (ng)
 170                ng->len = INITIAL_NET_GEN_PTRS;
 171
 172        return ng;
 173}
 174
 175#ifdef CONFIG_NET_NS
 176static struct kmem_cache *net_cachep;
 177static struct workqueue_struct *netns_wq;
 178
 179static struct net *net_alloc(void)
 180{
 181        struct net *net = NULL;
 182        struct net_generic *ng;
 183
 184        ng = net_alloc_generic();
 185        if (!ng)
 186                goto out;
 187
 188        net = kmem_cache_zalloc(net_cachep, GFP_KERNEL);
 189        if (!net)
 190                goto out_free;
 191
 192        rcu_assign_pointer(net->gen, ng);
 193out:
 194        return net;
 195
 196out_free:
 197        kfree(ng);
 198        goto out;
 199}
 200
 201static void net_free(struct net *net)
 202{
 203#ifdef NETNS_REFCNT_DEBUG
 204        if (unlikely(atomic_read(&net->use_count) != 0)) {
 205                printk(KERN_EMERG "network namespace not free! Usage: %d\n",
 206                        atomic_read(&net->use_count));
 207                return;
 208        }
 209#endif
 210        kfree(net->gen);
 211        kmem_cache_free(net_cachep, net);
 212}
 213
 214void net_drop_ns(void *p)
 215{
 216        struct net *ns = p;
 217        if (ns && atomic_dec_and_test(&ns->passive))
 218                net_free(ns);
 219}
 220
 221struct net *copy_net_ns(unsigned long flags, struct net *old_net)
 222{
 223        struct net *net;
 224        int rv;
 225
 226        if (!(flags & CLONE_NEWNET))
 227                return get_net(old_net);
 228
 229        net = net_alloc();
 230        if (!net)
 231                return ERR_PTR(-ENOMEM);
 232        mutex_lock(&net_mutex);
 233        rv = setup_net(net);
 234        if (rv == 0) {
 235                rtnl_lock();
 236                list_add_tail_rcu(&net->list, &net_namespace_list);
 237                rtnl_unlock();
 238        }
 239        mutex_unlock(&net_mutex);
 240        if (rv < 0) {
 241                net_drop_ns(net);
 242                return ERR_PTR(rv);
 243        }
 244        return net;
 245}
 246
 247static DEFINE_SPINLOCK(cleanup_list_lock);
 248static LIST_HEAD(cleanup_list);  /* Must hold cleanup_list_lock to touch */
 249
 250static void cleanup_net(struct work_struct *work)
 251{
 252        const struct pernet_operations *ops;
 253        struct net *net, *tmp;
 254        LIST_HEAD(net_kill_list);
 255        LIST_HEAD(net_exit_list);
 256
 257        /* Atomically snapshot the list of namespaces to cleanup */
 258        spin_lock_irq(&cleanup_list_lock);
 259        list_replace_init(&cleanup_list, &net_kill_list);
 260        spin_unlock_irq(&cleanup_list_lock);
 261
 262        mutex_lock(&net_mutex);
 263
 264        /* Don't let anyone else find us. */
 265        rtnl_lock();
 266        list_for_each_entry(net, &net_kill_list, cleanup_list) {
 267                list_del_rcu(&net->list);
 268                list_add_tail(&net->exit_list, &net_exit_list);
 269        }
 270        rtnl_unlock();
 271
 272        /*
 273         * Another CPU might be rcu-iterating the list, wait for it.
 274         * This needs to be before calling the exit() notifiers, so
 275         * the rcu_barrier() below isn't sufficient alone.
 276         */
 277        synchronize_rcu();
 278
 279        /* Run all of the network namespace exit methods */
 280        list_for_each_entry_reverse(ops, &pernet_list, list)
 281                ops_exit_list(ops, &net_exit_list);
 282
 283        /* Free the net generic variables */
 284        list_for_each_entry_reverse(ops, &pernet_list, list)
 285                ops_free_list(ops, &net_exit_list);
 286
 287        mutex_unlock(&net_mutex);
 288
 289        /* Ensure there are no outstanding rcu callbacks using this
 290         * network namespace.
 291         */
 292        rcu_barrier();
 293
 294        /* Finally it is safe to free my network namespace structure */
 295        list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
 296                list_del_init(&net->exit_list);
 297                net_drop_ns(net);
 298        }
 299}
 300static DECLARE_WORK(net_cleanup_work, cleanup_net);
 301
 302void __put_net(struct net *net)
 303{
 304        /* Cleanup the network namespace in process context */
 305        unsigned long flags;
 306
 307        spin_lock_irqsave(&cleanup_list_lock, flags);
 308        list_add(&net->cleanup_list, &cleanup_list);
 309        spin_unlock_irqrestore(&cleanup_list_lock, flags);
 310
 311        queue_work(netns_wq, &net_cleanup_work);
 312}
 313EXPORT_SYMBOL_GPL(__put_net);
 314
 315struct net *get_net_ns_by_fd(int fd)
 316{
 317        struct proc_inode *ei;
 318        struct file *file;
 319        struct net *net;
 320
 321        file = proc_ns_fget(fd);
 322        if (IS_ERR(file))
 323                return ERR_CAST(file);
 324
 325        ei = PROC_I(file->f_dentry->d_inode);
 326        if (ei->ns_ops == &netns_operations)
 327                net = get_net(ei->ns);
 328        else
 329                net = ERR_PTR(-EINVAL);
 330
 331        fput(file);
 332        return net;
 333}
 334
 335#else
 336struct net *copy_net_ns(unsigned long flags, struct net *old_net)
 337{
 338        if (flags & CLONE_NEWNET)
 339                return ERR_PTR(-EINVAL);
 340        return old_net;
 341}
 342
 343struct net *get_net_ns_by_fd(int fd)
 344{
 345        return ERR_PTR(-EINVAL);
 346}
 347#endif
 348
 349struct net *get_net_ns_by_pid(pid_t pid)
 350{
 351        struct task_struct *tsk;
 352        struct net *net;
 353
 354        /* Lookup the network namespace */
 355        net = ERR_PTR(-ESRCH);
 356        rcu_read_lock();
 357        tsk = find_task_by_vpid(pid);
 358        if (tsk) {
 359                struct nsproxy *nsproxy;
 360                nsproxy = task_nsproxy(tsk);
 361                if (nsproxy)
 362                        net = get_net(nsproxy->net_ns);
 363        }
 364        rcu_read_unlock();
 365        return net;
 366}
 367EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
 368
 369static int __init net_ns_init(void)
 370{
 371        struct net_generic *ng;
 372
 373#ifdef CONFIG_NET_NS
 374        net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
 375                                        SMP_CACHE_BYTES,
 376                                        SLAB_PANIC, NULL);
 377
 378        /* Create workqueue for cleanup */
 379        netns_wq = create_singlethread_workqueue("netns");
 380        if (!netns_wq)
 381                panic("Could not create netns workq");
 382#endif
 383
 384        ng = net_alloc_generic();
 385        if (!ng)
 386                panic("Could not allocate generic netns");
 387
 388        rcu_assign_pointer(init_net.gen, ng);
 389
 390        mutex_lock(&net_mutex);
 391        if (setup_net(&init_net))
 392                panic("Could not setup the initial network namespace");
 393
 394        rtnl_lock();
 395        list_add_tail_rcu(&init_net.list, &net_namespace_list);
 396        rtnl_unlock();
 397
 398        mutex_unlock(&net_mutex);
 399
 400        return 0;
 401}
 402
 403pure_initcall(net_ns_init);
 404
 405#ifdef CONFIG_NET_NS
 406static int __register_pernet_operations(struct list_head *list,
 407                                        struct pernet_operations *ops)
 408{
 409        struct net *net;
 410        int error;
 411        LIST_HEAD(net_exit_list);
 412
 413        list_add_tail(&ops->list, list);
 414        if (ops->init || (ops->id && ops->size)) {
 415                for_each_net(net) {
 416                        error = ops_init(ops, net);
 417                        if (error)
 418                                goto out_undo;
 419                        list_add_tail(&net->exit_list, &net_exit_list);
 420                }
 421        }
 422        return 0;
 423
 424out_undo:
 425        /* If I have an error cleanup all namespaces I initialized */
 426        list_del(&ops->list);
 427        ops_exit_list(ops, &net_exit_list);
 428        ops_free_list(ops, &net_exit_list);
 429        return error;
 430}
 431
 432static void __unregister_pernet_operations(struct pernet_operations *ops)
 433{
 434        struct net *net;
 435        LIST_HEAD(net_exit_list);
 436
 437        list_del(&ops->list);
 438        for_each_net(net)
 439                list_add_tail(&net->exit_list, &net_exit_list);
 440        ops_exit_list(ops, &net_exit_list);
 441        ops_free_list(ops, &net_exit_list);
 442}
 443
 444#else
 445
 446static int __register_pernet_operations(struct list_head *list,
 447                                        struct pernet_operations *ops)
 448{
 449        int err = 0;
 450        err = ops_init(ops, &init_net);
 451        if (err)
 452                ops_free(ops, &init_net);
 453        return err;
 454        
 455}
 456
 457static void __unregister_pernet_operations(struct pernet_operations *ops)
 458{
 459        LIST_HEAD(net_exit_list);
 460        list_add(&init_net.exit_list, &net_exit_list);
 461        ops_exit_list(ops, &net_exit_list);
 462        ops_free_list(ops, &net_exit_list);
 463}
 464
 465#endif /* CONFIG_NET_NS */
 466
 467static DEFINE_IDA(net_generic_ids);
 468
 469static int register_pernet_operations(struct list_head *list,
 470                                      struct pernet_operations *ops)
 471{
 472        int error;
 473
 474        if (ops->id) {
 475again:
 476                error = ida_get_new_above(&net_generic_ids, 1, ops->id);
 477                if (error < 0) {
 478                        if (error == -EAGAIN) {
 479                                ida_pre_get(&net_generic_ids, GFP_KERNEL);
 480                                goto again;
 481                        }
 482                        return error;
 483                }
 484        }
 485        error = __register_pernet_operations(list, ops);
 486        if (error) {
 487                rcu_barrier();
 488                if (ops->id)
 489                        ida_remove(&net_generic_ids, *ops->id);
 490        }
 491
 492        return error;
 493}
 494
 495static void unregister_pernet_operations(struct pernet_operations *ops)
 496{
 497        
 498        __unregister_pernet_operations(ops);
 499        rcu_barrier();
 500        if (ops->id)
 501                ida_remove(&net_generic_ids, *ops->id);
 502}
 503
 504/**
 505 *      register_pernet_subsys - register a network namespace subsystem
 506 *      @ops:  pernet operations structure for the subsystem
 507 *
 508 *      Register a subsystem which has init and exit functions
 509 *      that are called when network namespaces are created and
 510 *      destroyed respectively.
 511 *
 512 *      When registered all network namespace init functions are
 513 *      called for every existing network namespace.  Allowing kernel
 514 *      modules to have a race free view of the set of network namespaces.
 515 *
 516 *      When a new network namespace is created all of the init
 517 *      methods are called in the order in which they were registered.
 518 *
 519 *      When a network namespace is destroyed all of the exit methods
 520 *      are called in the reverse of the order with which they were
 521 *      registered.
 522 */
 523int register_pernet_subsys(struct pernet_operations *ops)
 524{
 525        int error;
 526        mutex_lock(&net_mutex);
 527        error =  register_pernet_operations(first_device, ops);
 528        mutex_unlock(&net_mutex);
 529        return error;
 530}
 531EXPORT_SYMBOL_GPL(register_pernet_subsys);
 532
 533/**
 534 *      unregister_pernet_subsys - unregister a network namespace subsystem
 535 *      @ops: pernet operations structure to manipulate
 536 *
 537 *      Remove the pernet operations structure from the list to be
 538 *      used when network namespaces are created or destroyed.  In
 539 *      addition run the exit method for all existing network
 540 *      namespaces.
 541 */
 542void unregister_pernet_subsys(struct pernet_operations *ops)
 543{
 544        mutex_lock(&net_mutex);
 545        unregister_pernet_operations(ops);
 546        mutex_unlock(&net_mutex);
 547}
 548EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
 549
 550/**
 551 *      register_pernet_device - register a network namespace device
 552 *      @ops:  pernet operations structure for the subsystem
 553 *
 554 *      Register a device which has init and exit functions
 555 *      that are called when network namespaces are created and
 556 *      destroyed respectively.
 557 *
 558 *      When registered all network namespace init functions are
 559 *      called for every existing network namespace.  Allowing kernel
 560 *      modules to have a race free view of the set of network namespaces.
 561 *
 562 *      When a new network namespace is created all of the init
 563 *      methods are called in the order in which they were registered.
 564 *
 565 *      When a network namespace is destroyed all of the exit methods
 566 *      are called in the reverse of the order with which they were
 567 *      registered.
 568 */
 569int register_pernet_device(struct pernet_operations *ops)
 570{
 571        int error;
 572        mutex_lock(&net_mutex);
 573        error = register_pernet_operations(&pernet_list, ops);
 574        if (!error && (first_device == &pernet_list))
 575                first_device = &ops->list;
 576        mutex_unlock(&net_mutex);
 577        return error;
 578}
 579EXPORT_SYMBOL_GPL(register_pernet_device);
 580
 581/**
 582 *      unregister_pernet_device - unregister a network namespace netdevice
 583 *      @ops: pernet operations structure to manipulate
 584 *
 585 *      Remove the pernet operations structure from the list to be
 586 *      used when network namespaces are created or destroyed.  In
 587 *      addition run the exit method for all existing network
 588 *      namespaces.
 589 */
 590void unregister_pernet_device(struct pernet_operations *ops)
 591{
 592        mutex_lock(&net_mutex);
 593        if (&ops->list == first_device)
 594                first_device = first_device->next;
 595        unregister_pernet_operations(ops);
 596        mutex_unlock(&net_mutex);
 597}
 598EXPORT_SYMBOL_GPL(unregister_pernet_device);
 599
 600#ifdef CONFIG_NET_NS
 601static void *netns_get(struct task_struct *task)
 602{
 603        struct net *net = NULL;
 604        struct nsproxy *nsproxy;
 605
 606        rcu_read_lock();
 607        nsproxy = task_nsproxy(task);
 608        if (nsproxy)
 609                net = get_net(nsproxy->net_ns);
 610        rcu_read_unlock();
 611
 612        return net;
 613}
 614
 615static void netns_put(void *ns)
 616{
 617        put_net(ns);
 618}
 619
 620static int netns_install(struct nsproxy *nsproxy, void *ns)
 621{
 622        put_net(nsproxy->net_ns);
 623        nsproxy->net_ns = get_net(ns);
 624        return 0;
 625}
 626
 627const struct proc_ns_operations netns_operations = {
 628        .name           = "net",
 629        .type           = CLONE_NEWNET,
 630        .get            = netns_get,
 631        .put            = netns_put,
 632        .install        = netns_install,
 633};
 634#endif
 635