linux/net/core/net_namespace.c
<<
>>
Prefs
   1#include <linux/workqueue.h>
   2#include <linux/rtnetlink.h>
   3#include <linux/cache.h>
   4#include <linux/slab.h>
   5#include <linux/list.h>
   6#include <linux/delay.h>
   7#include <linux/sched.h>
   8#include <linux/idr.h>
   9#include <linux/rculist.h>
  10#include <linux/nsproxy.h>
  11#include <linux/proc_fs.h>
  12#include <linux/file.h>
  13#include <linux/export.h>
  14#include <net/net_namespace.h>
  15#include <net/netns/generic.h>
  16
  17/*
  18 *      Our network namespace constructor/destructor lists
  19 */
  20
  21static LIST_HEAD(pernet_list);
  22static struct list_head *first_device = &pernet_list;
  23static DEFINE_MUTEX(net_mutex);
  24
  25LIST_HEAD(net_namespace_list);
  26EXPORT_SYMBOL_GPL(net_namespace_list);
  27
  28struct net init_net;
  29EXPORT_SYMBOL(init_net);
  30
  31#define INITIAL_NET_GEN_PTRS    13 /* +1 for len +2 for rcu_head */
  32
  33static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
  34
  35static struct net_generic *net_alloc_generic(void)
  36{
  37        struct net_generic *ng;
  38        size_t generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);
  39
  40        ng = kzalloc(generic_size, GFP_KERNEL);
  41        if (ng)
  42                ng->len = max_gen_ptrs;
  43
  44        return ng;
  45}
  46
  47static int net_assign_generic(struct net *net, int id, void *data)
  48{
  49        struct net_generic *ng, *old_ng;
  50
  51        BUG_ON(!mutex_is_locked(&net_mutex));
  52        BUG_ON(id == 0);
  53
  54        old_ng = rcu_dereference_protected(net->gen,
  55                                           lockdep_is_held(&net_mutex));
  56        ng = old_ng;
  57        if (old_ng->len >= id)
  58                goto assign;
  59
  60        ng = net_alloc_generic();
  61        if (ng == NULL)
  62                return -ENOMEM;
  63
  64        /*
  65         * Some synchronisation notes:
  66         *
  67         * The net_generic explores the net->gen array inside rcu
  68         * read section. Besides once set the net->gen->ptr[x]
  69         * pointer never changes (see rules in netns/generic.h).
  70         *
  71         * That said, we simply duplicate this array and schedule
  72         * the old copy for kfree after a grace period.
  73         */
  74
  75        memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
  76
  77        rcu_assign_pointer(net->gen, ng);
  78        kfree_rcu(old_ng, rcu);
  79assign:
  80        ng->ptr[id - 1] = data;
  81        return 0;
  82}
  83
  84static int ops_init(const struct pernet_operations *ops, struct net *net)
  85{
  86        int err = -ENOMEM;
  87        void *data = NULL;
  88
  89        if (ops->id && ops->size) {
  90                data = kzalloc(ops->size, GFP_KERNEL);
  91                if (!data)
  92                        goto out;
  93
  94                err = net_assign_generic(net, *ops->id, data);
  95                if (err)
  96                        goto cleanup;
  97        }
  98        err = 0;
  99        if (ops->init)
 100                err = ops->init(net);
 101        if (!err)
 102                return 0;
 103
 104cleanup:
 105        kfree(data);
 106
 107out:
 108        return err;
 109}
 110
 111static void ops_free(const struct pernet_operations *ops, struct net *net)
 112{
 113        if (ops->id && ops->size) {
 114                int id = *ops->id;
 115                kfree(net_generic(net, id));
 116        }
 117}
 118
 119static void ops_exit_list(const struct pernet_operations *ops,
 120                          struct list_head *net_exit_list)
 121{
 122        struct net *net;
 123        if (ops->exit) {
 124                list_for_each_entry(net, net_exit_list, exit_list)
 125                        ops->exit(net);
 126        }
 127        if (ops->exit_batch)
 128                ops->exit_batch(net_exit_list);
 129}
 130
 131static void ops_free_list(const struct pernet_operations *ops,
 132                          struct list_head *net_exit_list)
 133{
 134        struct net *net;
 135        if (ops->size && ops->id) {
 136                list_for_each_entry(net, net_exit_list, exit_list)
 137                        ops_free(ops, net);
 138        }
 139}
 140
 141/*
 142 * setup_net runs the initializers for the network namespace object.
 143 */
 144static __net_init int setup_net(struct net *net)
 145{
 146        /* Must be called with net_mutex held */
 147        const struct pernet_operations *ops, *saved_ops;
 148        int error = 0;
 149        LIST_HEAD(net_exit_list);
 150
 151        atomic_set(&net->count, 1);
 152        atomic_set(&net->passive, 1);
 153        net->dev_base_seq = 1;
 154
 155#ifdef NETNS_REFCNT_DEBUG
 156        atomic_set(&net->use_count, 0);
 157#endif
 158
 159        list_for_each_entry(ops, &pernet_list, list) {
 160                error = ops_init(ops, net);
 161                if (error < 0)
 162                        goto out_undo;
 163        }
 164out:
 165        return error;
 166
 167out_undo:
 168        /* Walk through the list backwards calling the exit functions
 169         * for the pernet modules whose init functions did not fail.
 170         */
 171        list_add(&net->exit_list, &net_exit_list);
 172        saved_ops = ops;
 173        list_for_each_entry_continue_reverse(ops, &pernet_list, list)
 174                ops_exit_list(ops, &net_exit_list);
 175
 176        ops = saved_ops;
 177        list_for_each_entry_continue_reverse(ops, &pernet_list, list)
 178                ops_free_list(ops, &net_exit_list);
 179
 180        rcu_barrier();
 181        goto out;
 182}
 183
 184
 185#ifdef CONFIG_NET_NS
 186static struct kmem_cache *net_cachep;
 187static struct workqueue_struct *netns_wq;
 188
 189static struct net *net_alloc(void)
 190{
 191        struct net *net = NULL;
 192        struct net_generic *ng;
 193
 194        ng = net_alloc_generic();
 195        if (!ng)
 196                goto out;
 197
 198        net = kmem_cache_zalloc(net_cachep, GFP_KERNEL);
 199        if (!net)
 200                goto out_free;
 201
 202        rcu_assign_pointer(net->gen, ng);
 203out:
 204        return net;
 205
 206out_free:
 207        kfree(ng);
 208        goto out;
 209}
 210
 211static void net_free(struct net *net)
 212{
 213#ifdef NETNS_REFCNT_DEBUG
 214        if (unlikely(atomic_read(&net->use_count) != 0)) {
 215                printk(KERN_EMERG "network namespace not free! Usage: %d\n",
 216                        atomic_read(&net->use_count));
 217                return;
 218        }
 219#endif
 220        kfree(net->gen);
 221        kmem_cache_free(net_cachep, net);
 222}
 223
 224void net_drop_ns(void *p)
 225{
 226        struct net *ns = p;
 227        if (ns && atomic_dec_and_test(&ns->passive))
 228                net_free(ns);
 229}
 230
 231struct net *copy_net_ns(unsigned long flags, struct net *old_net)
 232{
 233        struct net *net;
 234        int rv;
 235
 236        if (!(flags & CLONE_NEWNET))
 237                return get_net(old_net);
 238
 239        net = net_alloc();
 240        if (!net)
 241                return ERR_PTR(-ENOMEM);
 242        mutex_lock(&net_mutex);
 243        rv = setup_net(net);
 244        if (rv == 0) {
 245                rtnl_lock();
 246                list_add_tail_rcu(&net->list, &net_namespace_list);
 247                rtnl_unlock();
 248        }
 249        mutex_unlock(&net_mutex);
 250        if (rv < 0) {
 251                net_drop_ns(net);
 252                return ERR_PTR(rv);
 253        }
 254        return net;
 255}
 256
 257static DEFINE_SPINLOCK(cleanup_list_lock);
 258static LIST_HEAD(cleanup_list);  /* Must hold cleanup_list_lock to touch */
 259
 260static void cleanup_net(struct work_struct *work)
 261{
 262        const struct pernet_operations *ops;
 263        struct net *net, *tmp;
 264        LIST_HEAD(net_kill_list);
 265        LIST_HEAD(net_exit_list);
 266
 267        /* Atomically snapshot the list of namespaces to cleanup */
 268        spin_lock_irq(&cleanup_list_lock);
 269        list_replace_init(&cleanup_list, &net_kill_list);
 270        spin_unlock_irq(&cleanup_list_lock);
 271
 272        mutex_lock(&net_mutex);
 273
 274        /* Don't let anyone else find us. */
 275        rtnl_lock();
 276        list_for_each_entry(net, &net_kill_list, cleanup_list) {
 277                list_del_rcu(&net->list);
 278                list_add_tail(&net->exit_list, &net_exit_list);
 279        }
 280        rtnl_unlock();
 281
 282        /*
 283         * Another CPU might be rcu-iterating the list, wait for it.
 284         * This needs to be before calling the exit() notifiers, so
 285         * the rcu_barrier() below isn't sufficient alone.
 286         */
 287        synchronize_rcu();
 288
 289        /* Run all of the network namespace exit methods */
 290        list_for_each_entry_reverse(ops, &pernet_list, list)
 291                ops_exit_list(ops, &net_exit_list);
 292
 293        /* Free the net generic variables */
 294        list_for_each_entry_reverse(ops, &pernet_list, list)
 295                ops_free_list(ops, &net_exit_list);
 296
 297        mutex_unlock(&net_mutex);
 298
 299        /* Ensure there are no outstanding rcu callbacks using this
 300         * network namespace.
 301         */
 302        rcu_barrier();
 303
 304        /* Finally it is safe to free my network namespace structure */
 305        list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
 306                list_del_init(&net->exit_list);
 307                net_drop_ns(net);
 308        }
 309}
 310static DECLARE_WORK(net_cleanup_work, cleanup_net);
 311
 312void __put_net(struct net *net)
 313{
 314        /* Cleanup the network namespace in process context */
 315        unsigned long flags;
 316
 317        spin_lock_irqsave(&cleanup_list_lock, flags);
 318        list_add(&net->cleanup_list, &cleanup_list);
 319        spin_unlock_irqrestore(&cleanup_list_lock, flags);
 320
 321        queue_work(netns_wq, &net_cleanup_work);
 322}
 323EXPORT_SYMBOL_GPL(__put_net);
 324
 325struct net *get_net_ns_by_fd(int fd)
 326{
 327        struct proc_inode *ei;
 328        struct file *file;
 329        struct net *net;
 330
 331        file = proc_ns_fget(fd);
 332        if (IS_ERR(file))
 333                return ERR_CAST(file);
 334
 335        ei = PROC_I(file->f_dentry->d_inode);
 336        if (ei->ns_ops == &netns_operations)
 337                net = get_net(ei->ns);
 338        else
 339                net = ERR_PTR(-EINVAL);
 340
 341        fput(file);
 342        return net;
 343}
 344
 345#else
 346struct net *copy_net_ns(unsigned long flags, struct net *old_net)
 347{
 348        if (flags & CLONE_NEWNET)
 349                return ERR_PTR(-EINVAL);
 350        return old_net;
 351}
 352
 353struct net *get_net_ns_by_fd(int fd)
 354{
 355        return ERR_PTR(-EINVAL);
 356}
 357#endif
 358
 359struct net *get_net_ns_by_pid(pid_t pid)
 360{
 361        struct task_struct *tsk;
 362        struct net *net;
 363
 364        /* Lookup the network namespace */
 365        net = ERR_PTR(-ESRCH);
 366        rcu_read_lock();
 367        tsk = find_task_by_vpid(pid);
 368        if (tsk) {
 369                struct nsproxy *nsproxy;
 370                nsproxy = task_nsproxy(tsk);
 371                if (nsproxy)
 372                        net = get_net(nsproxy->net_ns);
 373        }
 374        rcu_read_unlock();
 375        return net;
 376}
 377EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
 378
 379static int __init net_ns_init(void)
 380{
 381        struct net_generic *ng;
 382
 383#ifdef CONFIG_NET_NS
 384        net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
 385                                        SMP_CACHE_BYTES,
 386                                        SLAB_PANIC, NULL);
 387
 388        /* Create workqueue for cleanup */
 389        netns_wq = create_singlethread_workqueue("netns");
 390        if (!netns_wq)
 391                panic("Could not create netns workq");
 392#endif
 393
 394        ng = net_alloc_generic();
 395        if (!ng)
 396                panic("Could not allocate generic netns");
 397
 398        rcu_assign_pointer(init_net.gen, ng);
 399
 400        mutex_lock(&net_mutex);
 401        if (setup_net(&init_net))
 402                panic("Could not setup the initial network namespace");
 403
 404        rtnl_lock();
 405        list_add_tail_rcu(&init_net.list, &net_namespace_list);
 406        rtnl_unlock();
 407
 408        mutex_unlock(&net_mutex);
 409
 410        return 0;
 411}
 412
 413pure_initcall(net_ns_init);
 414
 415#ifdef CONFIG_NET_NS
 416static int __register_pernet_operations(struct list_head *list,
 417                                        struct pernet_operations *ops)
 418{
 419        struct net *net;
 420        int error;
 421        LIST_HEAD(net_exit_list);
 422
 423        list_add_tail(&ops->list, list);
 424        if (ops->init || (ops->id && ops->size)) {
 425                for_each_net(net) {
 426                        error = ops_init(ops, net);
 427                        if (error)
 428                                goto out_undo;
 429                        list_add_tail(&net->exit_list, &net_exit_list);
 430                }
 431        }
 432        return 0;
 433
 434out_undo:
 435        /* If I have an error cleanup all namespaces I initialized */
 436        list_del(&ops->list);
 437        ops_exit_list(ops, &net_exit_list);
 438        ops_free_list(ops, &net_exit_list);
 439        return error;
 440}
 441
 442static void __unregister_pernet_operations(struct pernet_operations *ops)
 443{
 444        struct net *net;
 445        LIST_HEAD(net_exit_list);
 446
 447        list_del(&ops->list);
 448        for_each_net(net)
 449                list_add_tail(&net->exit_list, &net_exit_list);
 450        ops_exit_list(ops, &net_exit_list);
 451        ops_free_list(ops, &net_exit_list);
 452}
 453
 454#else
 455
 456static int __register_pernet_operations(struct list_head *list,
 457                                        struct pernet_operations *ops)
 458{
 459        return ops_init(ops, &init_net);
 460}
 461
 462static void __unregister_pernet_operations(struct pernet_operations *ops)
 463{
 464        LIST_HEAD(net_exit_list);
 465        list_add(&init_net.exit_list, &net_exit_list);
 466        ops_exit_list(ops, &net_exit_list);
 467        ops_free_list(ops, &net_exit_list);
 468}
 469
 470#endif /* CONFIG_NET_NS */
 471
 472static DEFINE_IDA(net_generic_ids);
 473
 474static int register_pernet_operations(struct list_head *list,
 475                                      struct pernet_operations *ops)
 476{
 477        int error;
 478
 479        if (ops->id) {
 480again:
 481                error = ida_get_new_above(&net_generic_ids, 1, ops->id);
 482                if (error < 0) {
 483                        if (error == -EAGAIN) {
 484                                ida_pre_get(&net_generic_ids, GFP_KERNEL);
 485                                goto again;
 486                        }
 487                        return error;
 488                }
 489                max_gen_ptrs = max_t(unsigned int, max_gen_ptrs, *ops->id);
 490        }
 491        error = __register_pernet_operations(list, ops);
 492        if (error) {
 493                rcu_barrier();
 494                if (ops->id)
 495                        ida_remove(&net_generic_ids, *ops->id);
 496        }
 497
 498        return error;
 499}
 500
 501static void unregister_pernet_operations(struct pernet_operations *ops)
 502{
 503        
 504        __unregister_pernet_operations(ops);
 505        rcu_barrier();
 506        if (ops->id)
 507                ida_remove(&net_generic_ids, *ops->id);
 508}
 509
 510/**
 511 *      register_pernet_subsys - register a network namespace subsystem
 512 *      @ops:  pernet operations structure for the subsystem
 513 *
 514 *      Register a subsystem which has init and exit functions
 515 *      that are called when network namespaces are created and
 516 *      destroyed respectively.
 517 *
 518 *      When registered all network namespace init functions are
 519 *      called for every existing network namespace.  Allowing kernel
 520 *      modules to have a race free view of the set of network namespaces.
 521 *
 522 *      When a new network namespace is created all of the init
 523 *      methods are called in the order in which they were registered.
 524 *
 525 *      When a network namespace is destroyed all of the exit methods
 526 *      are called in the reverse of the order with which they were
 527 *      registered.
 528 */
 529int register_pernet_subsys(struct pernet_operations *ops)
 530{
 531        int error;
 532        mutex_lock(&net_mutex);
 533        error =  register_pernet_operations(first_device, ops);
 534        mutex_unlock(&net_mutex);
 535        return error;
 536}
 537EXPORT_SYMBOL_GPL(register_pernet_subsys);
 538
 539/**
 540 *      unregister_pernet_subsys - unregister a network namespace subsystem
 541 *      @ops: pernet operations structure to manipulate
 542 *
 543 *      Remove the pernet operations structure from the list to be
 544 *      used when network namespaces are created or destroyed.  In
 545 *      addition run the exit method for all existing network
 546 *      namespaces.
 547 */
 548void unregister_pernet_subsys(struct pernet_operations *ops)
 549{
 550        mutex_lock(&net_mutex);
 551        unregister_pernet_operations(ops);
 552        mutex_unlock(&net_mutex);
 553}
 554EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
 555
 556/**
 557 *      register_pernet_device - register a network namespace device
 558 *      @ops:  pernet operations structure for the subsystem
 559 *
 560 *      Register a device which has init and exit functions
 561 *      that are called when network namespaces are created and
 562 *      destroyed respectively.
 563 *
 564 *      When registered all network namespace init functions are
 565 *      called for every existing network namespace.  Allowing kernel
 566 *      modules to have a race free view of the set of network namespaces.
 567 *
 568 *      When a new network namespace is created all of the init
 569 *      methods are called in the order in which they were registered.
 570 *
 571 *      When a network namespace is destroyed all of the exit methods
 572 *      are called in the reverse of the order with which they were
 573 *      registered.
 574 */
 575int register_pernet_device(struct pernet_operations *ops)
 576{
 577        int error;
 578        mutex_lock(&net_mutex);
 579        error = register_pernet_operations(&pernet_list, ops);
 580        if (!error && (first_device == &pernet_list))
 581                first_device = &ops->list;
 582        mutex_unlock(&net_mutex);
 583        return error;
 584}
 585EXPORT_SYMBOL_GPL(register_pernet_device);
 586
 587/**
 588 *      unregister_pernet_device - unregister a network namespace netdevice
 589 *      @ops: pernet operations structure to manipulate
 590 *
 591 *      Remove the pernet operations structure from the list to be
 592 *      used when network namespaces are created or destroyed.  In
 593 *      addition run the exit method for all existing network
 594 *      namespaces.
 595 */
 596void unregister_pernet_device(struct pernet_operations *ops)
 597{
 598        mutex_lock(&net_mutex);
 599        if (&ops->list == first_device)
 600                first_device = first_device->next;
 601        unregister_pernet_operations(ops);
 602        mutex_unlock(&net_mutex);
 603}
 604EXPORT_SYMBOL_GPL(unregister_pernet_device);
 605
 606#ifdef CONFIG_NET_NS
 607static void *netns_get(struct task_struct *task)
 608{
 609        struct net *net = NULL;
 610        struct nsproxy *nsproxy;
 611
 612        rcu_read_lock();
 613        nsproxy = task_nsproxy(task);
 614        if (nsproxy)
 615                net = get_net(nsproxy->net_ns);
 616        rcu_read_unlock();
 617
 618        return net;
 619}
 620
 621static void netns_put(void *ns)
 622{
 623        put_net(ns);
 624}
 625
 626static int netns_install(struct nsproxy *nsproxy, void *ns)
 627{
 628        put_net(nsproxy->net_ns);
 629        nsproxy->net_ns = get_net(ns);
 630        return 0;
 631}
 632
 633const struct proc_ns_operations netns_operations = {
 634        .name           = "net",
 635        .type           = CLONE_NEWNET,
 636        .get            = netns_get,
 637        .put            = netns_put,
 638        .install        = netns_install,
 639};
 640#endif
 641