linux/kernel/nsproxy.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 *  Copyright (C) 2006 IBM Corporation
   4 *
   5 *  Author: Serge Hallyn <serue@us.ibm.com>
   6 *
   7 *  Jun 2006 - namespaces support
   8 *             OpenVZ, SWsoft Inc.
   9 *             Pavel Emelianov <xemul@openvz.org>
  10 */
  11
  12#include <linux/slab.h>
  13#include <linux/export.h>
  14#include <linux/nsproxy.h>
  15#include <linux/init_task.h>
  16#include <linux/mnt_namespace.h>
  17#include <linux/utsname.h>
  18#include <linux/pid_namespace.h>
  19#include <net/net_namespace.h>
  20#include <linux/ipc_namespace.h>
  21#include <linux/time_namespace.h>
  22#include <linux/fs_struct.h>
  23#include <linux/proc_fs.h>
  24#include <linux/proc_ns.h>
  25#include <linux/file.h>
  26#include <linux/syscalls.h>
  27#include <linux/cgroup.h>
  28#include <linux/perf_event.h>
  29
  30static struct kmem_cache *nsproxy_cachep;
  31
  32struct nsproxy init_nsproxy = {
  33        .count                  = ATOMIC_INIT(1),
  34        .uts_ns                 = &init_uts_ns,
  35#if defined(CONFIG_POSIX_MQUEUE) || defined(CONFIG_SYSVIPC)
  36        .ipc_ns                 = &init_ipc_ns,
  37#endif
  38        .mnt_ns                 = NULL,
  39        .pid_ns_for_children    = &init_pid_ns,
  40#ifdef CONFIG_NET
  41        .net_ns                 = &init_net,
  42#endif
  43#ifdef CONFIG_CGROUPS
  44        .cgroup_ns              = &init_cgroup_ns,
  45#endif
  46#ifdef CONFIG_TIME_NS
  47        .time_ns                = &init_time_ns,
  48        .time_ns_for_children   = &init_time_ns,
  49#endif
  50};
  51
  52static inline struct nsproxy *create_nsproxy(void)
  53{
  54        struct nsproxy *nsproxy;
  55
  56        nsproxy = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL);
  57        if (nsproxy)
  58                atomic_set(&nsproxy->count, 1);
  59        return nsproxy;
  60}
  61
  62/*
  63 * Create new nsproxy and all of its the associated namespaces.
  64 * Return the newly created nsproxy.  Do not attach this to the task,
  65 * leave it to the caller to do proper locking and attach it to task.
  66 */
  67static struct nsproxy *create_new_namespaces(unsigned long flags,
  68        struct task_struct *tsk, struct user_namespace *user_ns,
  69        struct fs_struct *new_fs)
  70{
  71        struct nsproxy *new_nsp;
  72        int err;
  73
  74        new_nsp = create_nsproxy();
  75        if (!new_nsp)
  76                return ERR_PTR(-ENOMEM);
  77
  78        new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, user_ns, new_fs);
  79        if (IS_ERR(new_nsp->mnt_ns)) {
  80                err = PTR_ERR(new_nsp->mnt_ns);
  81                goto out_ns;
  82        }
  83
  84        new_nsp->uts_ns = copy_utsname(flags, user_ns, tsk->nsproxy->uts_ns);
  85        if (IS_ERR(new_nsp->uts_ns)) {
  86                err = PTR_ERR(new_nsp->uts_ns);
  87                goto out_uts;
  88        }
  89
  90        new_nsp->ipc_ns = copy_ipcs(flags, user_ns, tsk->nsproxy->ipc_ns);
  91        if (IS_ERR(new_nsp->ipc_ns)) {
  92                err = PTR_ERR(new_nsp->ipc_ns);
  93                goto out_ipc;
  94        }
  95
  96        new_nsp->pid_ns_for_children =
  97                copy_pid_ns(flags, user_ns, tsk->nsproxy->pid_ns_for_children);
  98        if (IS_ERR(new_nsp->pid_ns_for_children)) {
  99                err = PTR_ERR(new_nsp->pid_ns_for_children);
 100                goto out_pid;
 101        }
 102
 103        new_nsp->cgroup_ns = copy_cgroup_ns(flags, user_ns,
 104                                            tsk->nsproxy->cgroup_ns);
 105        if (IS_ERR(new_nsp->cgroup_ns)) {
 106                err = PTR_ERR(new_nsp->cgroup_ns);
 107                goto out_cgroup;
 108        }
 109
 110        new_nsp->net_ns = copy_net_ns(flags, user_ns, tsk->nsproxy->net_ns);
 111        if (IS_ERR(new_nsp->net_ns)) {
 112                err = PTR_ERR(new_nsp->net_ns);
 113                goto out_net;
 114        }
 115
 116        new_nsp->time_ns_for_children = copy_time_ns(flags, user_ns,
 117                                        tsk->nsproxy->time_ns_for_children);
 118        if (IS_ERR(new_nsp->time_ns_for_children)) {
 119                err = PTR_ERR(new_nsp->time_ns_for_children);
 120                goto out_time;
 121        }
 122        new_nsp->time_ns = get_time_ns(tsk->nsproxy->time_ns);
 123
 124        return new_nsp;
 125
 126out_time:
 127        put_net(new_nsp->net_ns);
 128out_net:
 129        put_cgroup_ns(new_nsp->cgroup_ns);
 130out_cgroup:
 131        if (new_nsp->pid_ns_for_children)
 132                put_pid_ns(new_nsp->pid_ns_for_children);
 133out_pid:
 134        if (new_nsp->ipc_ns)
 135                put_ipc_ns(new_nsp->ipc_ns);
 136out_ipc:
 137        if (new_nsp->uts_ns)
 138                put_uts_ns(new_nsp->uts_ns);
 139out_uts:
 140        if (new_nsp->mnt_ns)
 141                put_mnt_ns(new_nsp->mnt_ns);
 142out_ns:
 143        kmem_cache_free(nsproxy_cachep, new_nsp);
 144        return ERR_PTR(err);
 145}
 146
 147/*
 148 * called from clone.  This now handles copy for nsproxy and all
 149 * namespaces therein.
 150 */
 151int copy_namespaces(unsigned long flags, struct task_struct *tsk)
 152{
 153        struct nsproxy *old_ns = tsk->nsproxy;
 154        struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns);
 155        struct nsproxy *new_ns;
 156
 157        if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
 158                              CLONE_NEWPID | CLONE_NEWNET |
 159                              CLONE_NEWCGROUP | CLONE_NEWTIME)))) {
 160                if (likely(old_ns->time_ns_for_children == old_ns->time_ns)) {
 161                        get_nsproxy(old_ns);
 162                        return 0;
 163                }
 164        } else if (!ns_capable(user_ns, CAP_SYS_ADMIN))
 165                return -EPERM;
 166
 167        /*
 168         * CLONE_NEWIPC must detach from the undolist: after switching
 169         * to a new ipc namespace, the semaphore arrays from the old
 170         * namespace are unreachable.  In clone parlance, CLONE_SYSVSEM
 171         * means share undolist with parent, so we must forbid using
 172         * it along with CLONE_NEWIPC.
 173         */
 174        if ((flags & (CLONE_NEWIPC | CLONE_SYSVSEM)) ==
 175                (CLONE_NEWIPC | CLONE_SYSVSEM))
 176                return -EINVAL;
 177
 178        new_ns = create_new_namespaces(flags, tsk, user_ns, tsk->fs);
 179        if (IS_ERR(new_ns))
 180                return  PTR_ERR(new_ns);
 181
 182        timens_on_fork(new_ns, tsk);
 183
 184        tsk->nsproxy = new_ns;
 185        return 0;
 186}
 187
 188void free_nsproxy(struct nsproxy *ns)
 189{
 190        if (ns->mnt_ns)
 191                put_mnt_ns(ns->mnt_ns);
 192        if (ns->uts_ns)
 193                put_uts_ns(ns->uts_ns);
 194        if (ns->ipc_ns)
 195                put_ipc_ns(ns->ipc_ns);
 196        if (ns->pid_ns_for_children)
 197                put_pid_ns(ns->pid_ns_for_children);
 198        if (ns->time_ns)
 199                put_time_ns(ns->time_ns);
 200        if (ns->time_ns_for_children)
 201                put_time_ns(ns->time_ns_for_children);
 202        put_cgroup_ns(ns->cgroup_ns);
 203        put_net(ns->net_ns);
 204        kmem_cache_free(nsproxy_cachep, ns);
 205}
 206
 207/*
 208 * Called from unshare. Unshare all the namespaces part of nsproxy.
 209 * On success, returns the new nsproxy.
 210 */
 211int unshare_nsproxy_namespaces(unsigned long unshare_flags,
 212        struct nsproxy **new_nsp, struct cred *new_cred, struct fs_struct *new_fs)
 213{
 214        struct user_namespace *user_ns;
 215        int err = 0;
 216
 217        if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
 218                               CLONE_NEWNET | CLONE_NEWPID | CLONE_NEWCGROUP |
 219                               CLONE_NEWTIME)))
 220                return 0;
 221
 222        user_ns = new_cred ? new_cred->user_ns : current_user_ns();
 223        if (!ns_capable(user_ns, CAP_SYS_ADMIN))
 224                return -EPERM;
 225
 226        *new_nsp = create_new_namespaces(unshare_flags, current, user_ns,
 227                                         new_fs ? new_fs : current->fs);
 228        if (IS_ERR(*new_nsp)) {
 229                err = PTR_ERR(*new_nsp);
 230                goto out;
 231        }
 232
 233out:
 234        return err;
 235}
 236
 237void switch_task_namespaces(struct task_struct *p, struct nsproxy *new)
 238{
 239        struct nsproxy *ns;
 240
 241        might_sleep();
 242
 243        task_lock(p);
 244        ns = p->nsproxy;
 245        p->nsproxy = new;
 246        task_unlock(p);
 247
 248        if (ns)
 249                put_nsproxy(ns);
 250}
 251
 252void exit_task_namespaces(struct task_struct *p)
 253{
 254        switch_task_namespaces(p, NULL);
 255}
 256
 257static int check_setns_flags(unsigned long flags)
 258{
 259        if (!flags || (flags & ~(CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
 260                                 CLONE_NEWNET | CLONE_NEWTIME | CLONE_NEWUSER |
 261                                 CLONE_NEWPID | CLONE_NEWCGROUP)))
 262                return -EINVAL;
 263
 264#ifndef CONFIG_USER_NS
 265        if (flags & CLONE_NEWUSER)
 266                return -EINVAL;
 267#endif
 268#ifndef CONFIG_PID_NS
 269        if (flags & CLONE_NEWPID)
 270                return -EINVAL;
 271#endif
 272#ifndef CONFIG_UTS_NS
 273        if (flags & CLONE_NEWUTS)
 274                return -EINVAL;
 275#endif
 276#ifndef CONFIG_IPC_NS
 277        if (flags & CLONE_NEWIPC)
 278                return -EINVAL;
 279#endif
 280#ifndef CONFIG_CGROUPS
 281        if (flags & CLONE_NEWCGROUP)
 282                return -EINVAL;
 283#endif
 284#ifndef CONFIG_NET_NS
 285        if (flags & CLONE_NEWNET)
 286                return -EINVAL;
 287#endif
 288#ifndef CONFIG_TIME_NS
 289        if (flags & CLONE_NEWTIME)
 290                return -EINVAL;
 291#endif
 292
 293        return 0;
 294}
 295
 296static void put_nsset(struct nsset *nsset)
 297{
 298        unsigned flags = nsset->flags;
 299
 300        if (flags & CLONE_NEWUSER)
 301                put_cred(nsset_cred(nsset));
 302        /*
 303         * We only created a temporary copy if we attached to more than just
 304         * the mount namespace.
 305         */
 306        if (nsset->fs && (flags & CLONE_NEWNS) && (flags & ~CLONE_NEWNS))
 307                free_fs_struct(nsset->fs);
 308        if (nsset->nsproxy)
 309                free_nsproxy(nsset->nsproxy);
 310}
 311
 312static int prepare_nsset(unsigned flags, struct nsset *nsset)
 313{
 314        struct task_struct *me = current;
 315
 316        nsset->nsproxy = create_new_namespaces(0, me, current_user_ns(), me->fs);
 317        if (IS_ERR(nsset->nsproxy))
 318                return PTR_ERR(nsset->nsproxy);
 319
 320        if (flags & CLONE_NEWUSER)
 321                nsset->cred = prepare_creds();
 322        else
 323                nsset->cred = current_cred();
 324        if (!nsset->cred)
 325                goto out;
 326
 327        /* Only create a temporary copy of fs_struct if we really need to. */
 328        if (flags == CLONE_NEWNS) {
 329                nsset->fs = me->fs;
 330        } else if (flags & CLONE_NEWNS) {
 331                nsset->fs = copy_fs_struct(me->fs);
 332                if (!nsset->fs)
 333                        goto out;
 334        }
 335
 336        nsset->flags = flags;
 337        return 0;
 338
 339out:
 340        put_nsset(nsset);
 341        return -ENOMEM;
 342}
 343
 344static inline int validate_ns(struct nsset *nsset, struct ns_common *ns)
 345{
 346        return ns->ops->install(nsset, ns);
 347}
 348
 349/*
 350 * This is the inverse operation to unshare().
 351 * Ordering is equivalent to the standard ordering used everywhere else
 352 * during unshare and process creation. The switch to the new set of
 353 * namespaces occurs at the point of no return after installation of
 354 * all requested namespaces was successful in commit_nsset().
 355 */
 356static int validate_nsset(struct nsset *nsset, struct pid *pid)
 357{
 358        int ret = 0;
 359        unsigned flags = nsset->flags;
 360        struct user_namespace *user_ns = NULL;
 361        struct pid_namespace *pid_ns = NULL;
 362        struct nsproxy *nsp;
 363        struct task_struct *tsk;
 364
 365        /* Take a "snapshot" of the target task's namespaces. */
 366        rcu_read_lock();
 367        tsk = pid_task(pid, PIDTYPE_PID);
 368        if (!tsk) {
 369                rcu_read_unlock();
 370                return -ESRCH;
 371        }
 372
 373        if (!ptrace_may_access(tsk, PTRACE_MODE_READ_REALCREDS)) {
 374                rcu_read_unlock();
 375                return -EPERM;
 376        }
 377
 378        task_lock(tsk);
 379        nsp = tsk->nsproxy;
 380        if (nsp)
 381                get_nsproxy(nsp);
 382        task_unlock(tsk);
 383        if (!nsp) {
 384                rcu_read_unlock();
 385                return -ESRCH;
 386        }
 387
 388#ifdef CONFIG_PID_NS
 389        if (flags & CLONE_NEWPID) {
 390                pid_ns = task_active_pid_ns(tsk);
 391                if (unlikely(!pid_ns)) {
 392                        rcu_read_unlock();
 393                        ret = -ESRCH;
 394                        goto out;
 395                }
 396                get_pid_ns(pid_ns);
 397        }
 398#endif
 399
 400#ifdef CONFIG_USER_NS
 401        if (flags & CLONE_NEWUSER)
 402                user_ns = get_user_ns(__task_cred(tsk)->user_ns);
 403#endif
 404        rcu_read_unlock();
 405
 406        /*
 407         * Install requested namespaces. The caller will have
 408         * verified earlier that the requested namespaces are
 409         * supported on this kernel. We don't report errors here
 410         * if a namespace is requested that isn't supported.
 411         */
 412#ifdef CONFIG_USER_NS
 413        if (flags & CLONE_NEWUSER) {
 414                ret = validate_ns(nsset, &user_ns->ns);
 415                if (ret)
 416                        goto out;
 417        }
 418#endif
 419
 420        if (flags & CLONE_NEWNS) {
 421                ret = validate_ns(nsset, from_mnt_ns(nsp->mnt_ns));
 422                if (ret)
 423                        goto out;
 424        }
 425
 426#ifdef CONFIG_UTS_NS
 427        if (flags & CLONE_NEWUTS) {
 428                ret = validate_ns(nsset, &nsp->uts_ns->ns);
 429                if (ret)
 430                        goto out;
 431        }
 432#endif
 433
 434#ifdef CONFIG_IPC_NS
 435        if (flags & CLONE_NEWIPC) {
 436                ret = validate_ns(nsset, &nsp->ipc_ns->ns);
 437                if (ret)
 438                        goto out;
 439        }
 440#endif
 441
 442#ifdef CONFIG_PID_NS
 443        if (flags & CLONE_NEWPID) {
 444                ret = validate_ns(nsset, &pid_ns->ns);
 445                if (ret)
 446                        goto out;
 447        }
 448#endif
 449
 450#ifdef CONFIG_CGROUPS
 451        if (flags & CLONE_NEWCGROUP) {
 452                ret = validate_ns(nsset, &nsp->cgroup_ns->ns);
 453                if (ret)
 454                        goto out;
 455        }
 456#endif
 457
 458#ifdef CONFIG_NET_NS
 459        if (flags & CLONE_NEWNET) {
 460                ret = validate_ns(nsset, &nsp->net_ns->ns);
 461                if (ret)
 462                        goto out;
 463        }
 464#endif
 465
 466#ifdef CONFIG_TIME_NS
 467        if (flags & CLONE_NEWTIME) {
 468                ret = validate_ns(nsset, &nsp->time_ns->ns);
 469                if (ret)
 470                        goto out;
 471        }
 472#endif
 473
 474out:
 475        if (pid_ns)
 476                put_pid_ns(pid_ns);
 477        if (nsp)
 478                put_nsproxy(nsp);
 479        put_user_ns(user_ns);
 480
 481        return ret;
 482}
 483
 484/*
 485 * This is the point of no return. There are just a few namespaces
 486 * that do some actual work here and it's sufficiently minimal that
 487 * a separate ns_common operation seems unnecessary for now.
 488 * Unshare is doing the same thing. If we'll end up needing to do
 489 * more in a given namespace or a helper here is ultimately not
 490 * exported anymore a simple commit handler for each namespace
 491 * should be added to ns_common.
 492 */
 493static void commit_nsset(struct nsset *nsset)
 494{
 495        unsigned flags = nsset->flags;
 496        struct task_struct *me = current;
 497
 498#ifdef CONFIG_USER_NS
 499        if (flags & CLONE_NEWUSER) {
 500                /* transfer ownership */
 501                commit_creds(nsset_cred(nsset));
 502                nsset->cred = NULL;
 503        }
 504#endif
 505
 506        /* We only need to commit if we have used a temporary fs_struct. */
 507        if ((flags & CLONE_NEWNS) && (flags & ~CLONE_NEWNS)) {
 508                set_fs_root(me->fs, &nsset->fs->root);
 509                set_fs_pwd(me->fs, &nsset->fs->pwd);
 510        }
 511
 512#ifdef CONFIG_IPC_NS
 513        if (flags & CLONE_NEWIPC)
 514                exit_sem(me);
 515#endif
 516
 517#ifdef CONFIG_TIME_NS
 518        if (flags & CLONE_NEWTIME)
 519                timens_commit(me, nsset->nsproxy->time_ns);
 520#endif
 521
 522        /* transfer ownership */
 523        switch_task_namespaces(me, nsset->nsproxy);
 524        nsset->nsproxy = NULL;
 525}
 526
 527SYSCALL_DEFINE2(setns, int, fd, int, flags)
 528{
 529        struct file *file;
 530        struct ns_common *ns = NULL;
 531        struct nsset nsset = {};
 532        int err = 0;
 533
 534        file = fget(fd);
 535        if (!file)
 536                return -EBADF;
 537
 538        if (proc_ns_file(file)) {
 539                ns = get_proc_ns(file_inode(file));
 540                if (flags && (ns->ops->type != flags))
 541                        err = -EINVAL;
 542                flags = ns->ops->type;
 543        } else if (!IS_ERR(pidfd_pid(file))) {
 544                err = check_setns_flags(flags);
 545        } else {
 546                err = -EINVAL;
 547        }
 548        if (err)
 549                goto out;
 550
 551        err = prepare_nsset(flags, &nsset);
 552        if (err)
 553                goto out;
 554
 555        if (proc_ns_file(file))
 556                err = validate_ns(&nsset, ns);
 557        else
 558                err = validate_nsset(&nsset, file->private_data);
 559        if (!err) {
 560                commit_nsset(&nsset);
 561                perf_event_namespaces(current);
 562        }
 563        put_nsset(&nsset);
 564out:
 565        fput(file);
 566        return err;
 567}
 568
 569int __init nsproxy_cache_init(void)
 570{
 571        nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC|SLAB_ACCOUNT);
 572        return 0;
 573}
 574