linux/kernel/nsproxy.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 *  Copyright (C) 2006 IBM Corporation
   4 *
   5 *  Author: Serge Hallyn <serue@us.ibm.com>
   6 *
   7 *  Jun 2006 - namespaces support
   8 *             OpenVZ, SWsoft Inc.
   9 *             Pavel Emelianov <xemul@openvz.org>
  10 */
  11
  12#include <linux/slab.h>
  13#include <linux/export.h>
  14#include <linux/nsproxy.h>
  15#include <linux/init_task.h>
  16#include <linux/mnt_namespace.h>
  17#include <linux/utsname.h>
  18#include <linux/pid_namespace.h>
  19#include <net/net_namespace.h>
  20#include <linux/ipc_namespace.h>
  21#include <linux/time_namespace.h>
  22#include <linux/fs_struct.h>
  23#include <linux/proc_fs.h>
  24#include <linux/proc_ns.h>
  25#include <linux/file.h>
  26#include <linux/syscalls.h>
  27#include <linux/cgroup.h>
  28#include <linux/perf_event.h>
  29
  30static struct kmem_cache *nsproxy_cachep;
  31
  32struct nsproxy init_nsproxy = {
  33        .count                  = ATOMIC_INIT(1),
  34        .uts_ns                 = &init_uts_ns,
  35#if defined(CONFIG_POSIX_MQUEUE) || defined(CONFIG_SYSVIPC)
  36        .ipc_ns                 = &init_ipc_ns,
  37#endif
  38        .mnt_ns                 = NULL,
  39        .pid_ns_for_children    = &init_pid_ns,
  40#ifdef CONFIG_NET
  41        .net_ns                 = &init_net,
  42#endif
  43#ifdef CONFIG_CGROUPS
  44        .cgroup_ns              = &init_cgroup_ns,
  45#endif
  46#ifdef CONFIG_TIME_NS
  47        .time_ns                = &init_time_ns,
  48        .time_ns_for_children   = &init_time_ns,
  49#endif
  50};
  51
  52static inline struct nsproxy *create_nsproxy(void)
  53{
  54        struct nsproxy *nsproxy;
  55
  56        nsproxy = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL);
  57        if (nsproxy)
  58                atomic_set(&nsproxy->count, 1);
  59        return nsproxy;
  60}
  61
  62/*
  63 * Create new nsproxy and all of its the associated namespaces.
  64 * Return the newly created nsproxy.  Do not attach this to the task,
  65 * leave it to the caller to do proper locking and attach it to task.
  66 */
  67static struct nsproxy *create_new_namespaces(unsigned long flags,
  68        struct task_struct *tsk, struct user_namespace *user_ns,
  69        struct fs_struct *new_fs)
  70{
  71        struct nsproxy *new_nsp;
  72        int err;
  73
  74        new_nsp = create_nsproxy();
  75        if (!new_nsp)
  76                return ERR_PTR(-ENOMEM);
  77
  78        new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, user_ns, new_fs);
  79        if (IS_ERR(new_nsp->mnt_ns)) {
  80                err = PTR_ERR(new_nsp->mnt_ns);
  81                goto out_ns;
  82        }
  83
  84        new_nsp->uts_ns = copy_utsname(flags, user_ns, tsk->nsproxy->uts_ns);
  85        if (IS_ERR(new_nsp->uts_ns)) {
  86                err = PTR_ERR(new_nsp->uts_ns);
  87                goto out_uts;
  88        }
  89
  90        new_nsp->ipc_ns = copy_ipcs(flags, user_ns, tsk->nsproxy->ipc_ns);
  91        if (IS_ERR(new_nsp->ipc_ns)) {
  92                err = PTR_ERR(new_nsp->ipc_ns);
  93                goto out_ipc;
  94        }
  95
  96        new_nsp->pid_ns_for_children =
  97                copy_pid_ns(flags, user_ns, tsk->nsproxy->pid_ns_for_children);
  98        if (IS_ERR(new_nsp->pid_ns_for_children)) {
  99                err = PTR_ERR(new_nsp->pid_ns_for_children);
 100                goto out_pid;
 101        }
 102
 103        new_nsp->cgroup_ns = copy_cgroup_ns(flags, user_ns,
 104                                            tsk->nsproxy->cgroup_ns);
 105        if (IS_ERR(new_nsp->cgroup_ns)) {
 106                err = PTR_ERR(new_nsp->cgroup_ns);
 107                goto out_cgroup;
 108        }
 109
 110        new_nsp->net_ns = copy_net_ns(flags, user_ns, tsk->nsproxy->net_ns);
 111        if (IS_ERR(new_nsp->net_ns)) {
 112                err = PTR_ERR(new_nsp->net_ns);
 113                goto out_net;
 114        }
 115
 116        new_nsp->time_ns_for_children = copy_time_ns(flags, user_ns,
 117                                        tsk->nsproxy->time_ns_for_children);
 118        if (IS_ERR(new_nsp->time_ns_for_children)) {
 119                err = PTR_ERR(new_nsp->time_ns_for_children);
 120                goto out_time;
 121        }
 122        new_nsp->time_ns = get_time_ns(tsk->nsproxy->time_ns);
 123
 124        return new_nsp;
 125
 126out_time:
 127        put_net(new_nsp->net_ns);
 128out_net:
 129        put_cgroup_ns(new_nsp->cgroup_ns);
 130out_cgroup:
 131        if (new_nsp->pid_ns_for_children)
 132                put_pid_ns(new_nsp->pid_ns_for_children);
 133out_pid:
 134        if (new_nsp->ipc_ns)
 135                put_ipc_ns(new_nsp->ipc_ns);
 136out_ipc:
 137        if (new_nsp->uts_ns)
 138                put_uts_ns(new_nsp->uts_ns);
 139out_uts:
 140        if (new_nsp->mnt_ns)
 141                put_mnt_ns(new_nsp->mnt_ns);
 142out_ns:
 143        kmem_cache_free(nsproxy_cachep, new_nsp);
 144        return ERR_PTR(err);
 145}
 146
 147/*
 148 * called from clone.  This now handles copy for nsproxy and all
 149 * namespaces therein.
 150 */
 151int copy_namespaces(unsigned long flags, struct task_struct *tsk)
 152{
 153        struct nsproxy *old_ns = tsk->nsproxy;
 154        struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns);
 155        struct nsproxy *new_ns;
 156        int ret;
 157
 158        if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
 159                              CLONE_NEWPID | CLONE_NEWNET |
 160                              CLONE_NEWCGROUP | CLONE_NEWTIME)))) {
 161                if (likely(old_ns->time_ns_for_children == old_ns->time_ns)) {
 162                        get_nsproxy(old_ns);
 163                        return 0;
 164                }
 165        } else if (!ns_capable(user_ns, CAP_SYS_ADMIN))
 166                return -EPERM;
 167
 168        /*
 169         * CLONE_NEWIPC must detach from the undolist: after switching
 170         * to a new ipc namespace, the semaphore arrays from the old
 171         * namespace are unreachable.  In clone parlance, CLONE_SYSVSEM
 172         * means share undolist with parent, so we must forbid using
 173         * it along with CLONE_NEWIPC.
 174         */
 175        if ((flags & (CLONE_NEWIPC | CLONE_SYSVSEM)) ==
 176                (CLONE_NEWIPC | CLONE_SYSVSEM)) 
 177                return -EINVAL;
 178
 179        new_ns = create_new_namespaces(flags, tsk, user_ns, tsk->fs);
 180        if (IS_ERR(new_ns))
 181                return  PTR_ERR(new_ns);
 182
 183        ret = timens_on_fork(new_ns, tsk);
 184        if (ret) {
 185                free_nsproxy(new_ns);
 186                return ret;
 187        }
 188
 189        tsk->nsproxy = new_ns;
 190        return 0;
 191}
 192
 193void free_nsproxy(struct nsproxy *ns)
 194{
 195        if (ns->mnt_ns)
 196                put_mnt_ns(ns->mnt_ns);
 197        if (ns->uts_ns)
 198                put_uts_ns(ns->uts_ns);
 199        if (ns->ipc_ns)
 200                put_ipc_ns(ns->ipc_ns);
 201        if (ns->pid_ns_for_children)
 202                put_pid_ns(ns->pid_ns_for_children);
 203        if (ns->time_ns)
 204                put_time_ns(ns->time_ns);
 205        if (ns->time_ns_for_children)
 206                put_time_ns(ns->time_ns_for_children);
 207        put_cgroup_ns(ns->cgroup_ns);
 208        put_net(ns->net_ns);
 209        kmem_cache_free(nsproxy_cachep, ns);
 210}
 211
 212/*
 213 * Called from unshare. Unshare all the namespaces part of nsproxy.
 214 * On success, returns the new nsproxy.
 215 */
 216int unshare_nsproxy_namespaces(unsigned long unshare_flags,
 217        struct nsproxy **new_nsp, struct cred *new_cred, struct fs_struct *new_fs)
 218{
 219        struct user_namespace *user_ns;
 220        int err = 0;
 221
 222        if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
 223                               CLONE_NEWNET | CLONE_NEWPID | CLONE_NEWCGROUP |
 224                               CLONE_NEWTIME)))
 225                return 0;
 226
 227        user_ns = new_cred ? new_cred->user_ns : current_user_ns();
 228        if (!ns_capable(user_ns, CAP_SYS_ADMIN))
 229                return -EPERM;
 230
 231        *new_nsp = create_new_namespaces(unshare_flags, current, user_ns,
 232                                         new_fs ? new_fs : current->fs);
 233        if (IS_ERR(*new_nsp)) {
 234                err = PTR_ERR(*new_nsp);
 235                goto out;
 236        }
 237
 238out:
 239        return err;
 240}
 241
 242void switch_task_namespaces(struct task_struct *p, struct nsproxy *new)
 243{
 244        struct nsproxy *ns;
 245
 246        might_sleep();
 247
 248        task_lock(p);
 249        ns = p->nsproxy;
 250        p->nsproxy = new;
 251        task_unlock(p);
 252
 253        if (ns && atomic_dec_and_test(&ns->count))
 254                free_nsproxy(ns);
 255}
 256
 257void exit_task_namespaces(struct task_struct *p)
 258{
 259        switch_task_namespaces(p, NULL);
 260}
 261
 262static int check_setns_flags(unsigned long flags)
 263{
 264        if (!flags || (flags & ~(CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
 265                                 CLONE_NEWNET | CLONE_NEWTIME | CLONE_NEWUSER |
 266                                 CLONE_NEWPID | CLONE_NEWCGROUP)))
 267                return -EINVAL;
 268
 269#ifndef CONFIG_USER_NS
 270        if (flags & CLONE_NEWUSER)
 271                return -EINVAL;
 272#endif
 273#ifndef CONFIG_PID_NS
 274        if (flags & CLONE_NEWPID)
 275                return -EINVAL;
 276#endif
 277#ifndef CONFIG_UTS_NS
 278        if (flags & CLONE_NEWUTS)
 279                return -EINVAL;
 280#endif
 281#ifndef CONFIG_IPC_NS
 282        if (flags & CLONE_NEWIPC)
 283                return -EINVAL;
 284#endif
 285#ifndef CONFIG_CGROUPS
 286        if (flags & CLONE_NEWCGROUP)
 287                return -EINVAL;
 288#endif
 289#ifndef CONFIG_NET_NS
 290        if (flags & CLONE_NEWNET)
 291                return -EINVAL;
 292#endif
 293#ifndef CONFIG_TIME_NS
 294        if (flags & CLONE_NEWTIME)
 295                return -EINVAL;
 296#endif
 297
 298        return 0;
 299}
 300
 301static void put_nsset(struct nsset *nsset)
 302{
 303        unsigned flags = nsset->flags;
 304
 305        if (flags & CLONE_NEWUSER)
 306                put_cred(nsset_cred(nsset));
 307        /*
 308         * We only created a temporary copy if we attached to more than just
 309         * the mount namespace.
 310         */
 311        if (nsset->fs && (flags & CLONE_NEWNS) && (flags & ~CLONE_NEWNS))
 312                free_fs_struct(nsset->fs);
 313        if (nsset->nsproxy)
 314                free_nsproxy(nsset->nsproxy);
 315}
 316
 317static int prepare_nsset(unsigned flags, struct nsset *nsset)
 318{
 319        struct task_struct *me = current;
 320
 321        nsset->nsproxy = create_new_namespaces(0, me, current_user_ns(), me->fs);
 322        if (IS_ERR(nsset->nsproxy))
 323                return PTR_ERR(nsset->nsproxy);
 324
 325        if (flags & CLONE_NEWUSER)
 326                nsset->cred = prepare_creds();
 327        else
 328                nsset->cred = current_cred();
 329        if (!nsset->cred)
 330                goto out;
 331
 332        /* Only create a temporary copy of fs_struct if we really need to. */
 333        if (flags == CLONE_NEWNS) {
 334                nsset->fs = me->fs;
 335        } else if (flags & CLONE_NEWNS) {
 336                nsset->fs = copy_fs_struct(me->fs);
 337                if (!nsset->fs)
 338                        goto out;
 339        }
 340
 341        nsset->flags = flags;
 342        return 0;
 343
 344out:
 345        put_nsset(nsset);
 346        return -ENOMEM;
 347}
 348
 349static inline int validate_ns(struct nsset *nsset, struct ns_common *ns)
 350{
 351        return ns->ops->install(nsset, ns);
 352}
 353
 354/*
 355 * This is the inverse operation to unshare().
 356 * Ordering is equivalent to the standard ordering used everywhere else
 357 * during unshare and process creation. The switch to the new set of
 358 * namespaces occurs at the point of no return after installation of
 359 * all requested namespaces was successful in commit_nsset().
 360 */
 361static int validate_nsset(struct nsset *nsset, struct pid *pid)
 362{
 363        int ret = 0;
 364        unsigned flags = nsset->flags;
 365        struct user_namespace *user_ns = NULL;
 366        struct pid_namespace *pid_ns = NULL;
 367        struct nsproxy *nsp;
 368        struct task_struct *tsk;
 369
 370        /* Take a "snapshot" of the target task's namespaces. */
 371        rcu_read_lock();
 372        tsk = pid_task(pid, PIDTYPE_PID);
 373        if (!tsk) {
 374                rcu_read_unlock();
 375                return -ESRCH;
 376        }
 377
 378        if (!ptrace_may_access(tsk, PTRACE_MODE_READ_REALCREDS)) {
 379                rcu_read_unlock();
 380                return -EPERM;
 381        }
 382
 383        task_lock(tsk);
 384        nsp = tsk->nsproxy;
 385        if (nsp)
 386                get_nsproxy(nsp);
 387        task_unlock(tsk);
 388        if (!nsp) {
 389                rcu_read_unlock();
 390                return -ESRCH;
 391        }
 392
 393#ifdef CONFIG_PID_NS
 394        if (flags & CLONE_NEWPID) {
 395                pid_ns = task_active_pid_ns(tsk);
 396                if (unlikely(!pid_ns)) {
 397                        rcu_read_unlock();
 398                        ret = -ESRCH;
 399                        goto out;
 400                }
 401                get_pid_ns(pid_ns);
 402        }
 403#endif
 404
 405#ifdef CONFIG_USER_NS
 406        if (flags & CLONE_NEWUSER)
 407                user_ns = get_user_ns(__task_cred(tsk)->user_ns);
 408#endif
 409        rcu_read_unlock();
 410
 411        /*
 412         * Install requested namespaces. The caller will have
 413         * verified earlier that the requested namespaces are
 414         * supported on this kernel. We don't report errors here
 415         * if a namespace is requested that isn't supported.
 416         */
 417#ifdef CONFIG_USER_NS
 418        if (flags & CLONE_NEWUSER) {
 419                ret = validate_ns(nsset, &user_ns->ns);
 420                if (ret)
 421                        goto out;
 422        }
 423#endif
 424
 425        if (flags & CLONE_NEWNS) {
 426                ret = validate_ns(nsset, from_mnt_ns(nsp->mnt_ns));
 427                if (ret)
 428                        goto out;
 429        }
 430
 431#ifdef CONFIG_UTS_NS
 432        if (flags & CLONE_NEWUTS) {
 433                ret = validate_ns(nsset, &nsp->uts_ns->ns);
 434                if (ret)
 435                        goto out;
 436        }
 437#endif
 438
 439#ifdef CONFIG_IPC_NS
 440        if (flags & CLONE_NEWIPC) {
 441                ret = validate_ns(nsset, &nsp->ipc_ns->ns);
 442                if (ret)
 443                        goto out;
 444        }
 445#endif
 446
 447#ifdef CONFIG_PID_NS
 448        if (flags & CLONE_NEWPID) {
 449                ret = validate_ns(nsset, &pid_ns->ns);
 450                if (ret)
 451                        goto out;
 452        }
 453#endif
 454
 455#ifdef CONFIG_CGROUPS
 456        if (flags & CLONE_NEWCGROUP) {
 457                ret = validate_ns(nsset, &nsp->cgroup_ns->ns);
 458                if (ret)
 459                        goto out;
 460        }
 461#endif
 462
 463#ifdef CONFIG_NET_NS
 464        if (flags & CLONE_NEWNET) {
 465                ret = validate_ns(nsset, &nsp->net_ns->ns);
 466                if (ret)
 467                        goto out;
 468        }
 469#endif
 470
 471#ifdef CONFIG_TIME_NS
 472        if (flags & CLONE_NEWTIME) {
 473                ret = validate_ns(nsset, &nsp->time_ns->ns);
 474                if (ret)
 475                        goto out;
 476        }
 477#endif
 478
 479out:
 480        if (pid_ns)
 481                put_pid_ns(pid_ns);
 482        if (nsp)
 483                put_nsproxy(nsp);
 484        put_user_ns(user_ns);
 485
 486        return ret;
 487}
 488
 489/*
 490 * This is the point of no return. There are just a few namespaces
 491 * that do some actual work here and it's sufficiently minimal that
 492 * a separate ns_common operation seems unnecessary for now.
 493 * Unshare is doing the same thing. If we'll end up needing to do
 494 * more in a given namespace or a helper here is ultimately not
 495 * exported anymore a simple commit handler for each namespace
 496 * should be added to ns_common.
 497 */
 498static void commit_nsset(struct nsset *nsset)
 499{
 500        unsigned flags = nsset->flags;
 501        struct task_struct *me = current;
 502
 503#ifdef CONFIG_USER_NS
 504        if (flags & CLONE_NEWUSER) {
 505                /* transfer ownership */
 506                commit_creds(nsset_cred(nsset));
 507                nsset->cred = NULL;
 508        }
 509#endif
 510
 511        /* We only need to commit if we have used a temporary fs_struct. */
 512        if ((flags & CLONE_NEWNS) && (flags & ~CLONE_NEWNS)) {
 513                set_fs_root(me->fs, &nsset->fs->root);
 514                set_fs_pwd(me->fs, &nsset->fs->pwd);
 515        }
 516
 517#ifdef CONFIG_IPC_NS
 518        if (flags & CLONE_NEWIPC)
 519                exit_sem(me);
 520#endif
 521
 522#ifdef CONFIG_TIME_NS
 523        if (flags & CLONE_NEWTIME)
 524                timens_commit(me, nsset->nsproxy->time_ns);
 525#endif
 526
 527        /* transfer ownership */
 528        switch_task_namespaces(me, nsset->nsproxy);
 529        nsset->nsproxy = NULL;
 530}
 531
 532SYSCALL_DEFINE2(setns, int, fd, int, flags)
 533{
 534        struct file *file;
 535        struct ns_common *ns = NULL;
 536        struct nsset nsset = {};
 537        int err = 0;
 538
 539        file = fget(fd);
 540        if (!file)
 541                return -EBADF;
 542
 543        if (proc_ns_file(file)) {
 544                ns = get_proc_ns(file_inode(file));
 545                if (flags && (ns->ops->type != flags))
 546                        err = -EINVAL;
 547                flags = ns->ops->type;
 548        } else if (!IS_ERR(pidfd_pid(file))) {
 549                err = check_setns_flags(flags);
 550        } else {
 551                err = -EINVAL;
 552        }
 553        if (err)
 554                goto out;
 555
 556        err = prepare_nsset(flags, &nsset);
 557        if (err)
 558                goto out;
 559
 560        if (proc_ns_file(file))
 561                err = validate_ns(&nsset, ns);
 562        else
 563                err = validate_nsset(&nsset, file->private_data);
 564        if (!err) {
 565                commit_nsset(&nsset);
 566                perf_event_namespaces(current);
 567        }
 568        put_nsset(&nsset);
 569out:
 570        fput(file);
 571        return err;
 572}
 573
 574int __init nsproxy_cache_init(void)
 575{
 576        nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC);
 577        return 0;
 578}
 579