linux/kernel/user_namespace.c
<<
>>
Prefs
   1/*
   2 *  This program is free software; you can redistribute it and/or
   3 *  modify it under the terms of the GNU General Public License as
   4 *  published by the Free Software Foundation, version 2 of the
   5 *  License.
   6 */
   7
   8#include <linux/export.h>
   9#include <linux/nsproxy.h>
  10#include <linux/slab.h>
  11#include <linux/user_namespace.h>
  12#include <linux/proc_ns.h>
  13#include <linux/highuid.h>
  14#include <linux/cred.h>
  15#include <linux/securebits.h>
  16#include <linux/keyctl.h>
  17#include <linux/key-type.h>
  18#include <keys/user-type.h>
  19#include <linux/seq_file.h>
  20#include <linux/fs.h>
  21#include <linux/uaccess.h>
  22#include <linux/ctype.h>
  23#include <linux/projid.h>
  24#include <linux/fs_struct.h>
  25
  26static struct kmem_cache *user_ns_cachep __read_mostly;
  27static DEFINE_MUTEX(userns_state_mutex);
  28
  29static bool new_idmap_permitted(const struct file *file,
  30                                struct user_namespace *ns, int cap_setid,
  31                                struct uid_gid_map *map);
  32
  33static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
  34{
  35        /* Start with the same capabilities as init but useless for doing
  36         * anything as the capabilities are bound to the new user namespace.
  37         */
  38        cred->securebits = SECUREBITS_DEFAULT;
  39        cred->cap_inheritable = CAP_EMPTY_SET;
  40        cred->cap_permitted = CAP_FULL_SET;
  41        cred->cap_effective = CAP_FULL_SET;
  42        cred->cap_bset = CAP_FULL_SET;
  43#ifdef CONFIG_KEYS
  44        key_put(cred->request_key_auth);
  45        cred->request_key_auth = NULL;
  46#endif
  47        /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
  48        cred->user_ns = user_ns;
  49}
  50
  51/*
  52 * Create a new user namespace, deriving the creator from the user in the
  53 * passed credentials, and replacing that user with the new root user for the
  54 * new namespace.
  55 *
  56 * This is called by copy_creds(), which will finish setting the target task's
  57 * credentials.
  58 */
  59int create_user_ns(struct cred *new)
  60{
  61        struct user_namespace *ns, *parent_ns = new->user_ns;
  62        kuid_t owner = new->euid;
  63        kgid_t group = new->egid;
  64        int ret;
  65
  66        if (parent_ns->level > 32)
  67                return -EUSERS;
  68
  69        /*
  70         * Verify that we can not violate the policy of which files
  71         * may be accessed that is specified by the root directory,
  72         * by verifing that the root directory is at the root of the
  73         * mount namespace which allows all files to be accessed.
  74         */
  75        if (current_chrooted())
  76                return -EPERM;
  77
  78        /* The creator needs a mapping in the parent user namespace
  79         * or else we won't be able to reasonably tell userspace who
  80         * created a user_namespace.
  81         */
  82        if (!kuid_has_mapping(parent_ns, owner) ||
  83            !kgid_has_mapping(parent_ns, group))
  84                return -EPERM;
  85
  86        ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL);
  87        if (!ns)
  88                return -ENOMEM;
  89
  90        ret = ns_alloc_inum(&ns->ns);
  91        if (ret) {
  92                kmem_cache_free(user_ns_cachep, ns);
  93                return ret;
  94        }
  95        ns->ns.ops = &userns_operations;
  96
  97        atomic_set(&ns->count, 1);
  98        /* Leave the new->user_ns reference with the new user namespace. */
  99        ns->parent = parent_ns;
 100        ns->level = parent_ns->level + 1;
 101        ns->owner = owner;
 102        ns->group = group;
 103
 104        /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
 105        mutex_lock(&userns_state_mutex);
 106        ns->flags = parent_ns->flags;
 107        mutex_unlock(&userns_state_mutex);
 108
 109        set_cred_user_ns(new, ns);
 110
 111#ifdef CONFIG_PERSISTENT_KEYRINGS
 112        init_rwsem(&ns->persistent_keyring_register_sem);
 113#endif
 114        return 0;
 115}
 116
 117int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
 118{
 119        struct cred *cred;
 120        int err = -ENOMEM;
 121
 122        if (!(unshare_flags & CLONE_NEWUSER))
 123                return 0;
 124
 125        cred = prepare_creds();
 126        if (cred) {
 127                err = create_user_ns(cred);
 128                if (err)
 129                        put_cred(cred);
 130                else
 131                        *new_cred = cred;
 132        }
 133
 134        return err;
 135}
 136
 137void free_user_ns(struct user_namespace *ns)
 138{
 139        struct user_namespace *parent;
 140
 141        do {
 142                parent = ns->parent;
 143#ifdef CONFIG_PERSISTENT_KEYRINGS
 144                key_put(ns->persistent_keyring_register);
 145#endif
 146                ns_free_inum(&ns->ns);
 147                kmem_cache_free(user_ns_cachep, ns);
 148                ns = parent;
 149        } while (atomic_dec_and_test(&parent->count));
 150}
 151EXPORT_SYMBOL(free_user_ns);
 152
 153static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count)
 154{
 155        unsigned idx, extents;
 156        u32 first, last, id2;
 157
 158        id2 = id + count - 1;
 159
 160        /* Find the matching extent */
 161        extents = map->nr_extents;
 162        smp_rmb();
 163        for (idx = 0; idx < extents; idx++) {
 164                first = map->extent[idx].first;
 165                last = first + map->extent[idx].count - 1;
 166                if (id >= first && id <= last &&
 167                    (id2 >= first && id2 <= last))
 168                        break;
 169        }
 170        /* Map the id or note failure */
 171        if (idx < extents)
 172                id = (id - first) + map->extent[idx].lower_first;
 173        else
 174                id = (u32) -1;
 175
 176        return id;
 177}
 178
 179static u32 map_id_down(struct uid_gid_map *map, u32 id)
 180{
 181        unsigned idx, extents;
 182        u32 first, last;
 183
 184        /* Find the matching extent */
 185        extents = map->nr_extents;
 186        smp_rmb();
 187        for (idx = 0; idx < extents; idx++) {
 188                first = map->extent[idx].first;
 189                last = first + map->extent[idx].count - 1;
 190                if (id >= first && id <= last)
 191                        break;
 192        }
 193        /* Map the id or note failure */
 194        if (idx < extents)
 195                id = (id - first) + map->extent[idx].lower_first;
 196        else
 197                id = (u32) -1;
 198
 199        return id;
 200}
 201
 202static u32 map_id_up(struct uid_gid_map *map, u32 id)
 203{
 204        unsigned idx, extents;
 205        u32 first, last;
 206
 207        /* Find the matching extent */
 208        extents = map->nr_extents;
 209        smp_rmb();
 210        for (idx = 0; idx < extents; idx++) {
 211                first = map->extent[idx].lower_first;
 212                last = first + map->extent[idx].count - 1;
 213                if (id >= first && id <= last)
 214                        break;
 215        }
 216        /* Map the id or note failure */
 217        if (idx < extents)
 218                id = (id - first) + map->extent[idx].first;
 219        else
 220                id = (u32) -1;
 221
 222        return id;
 223}
 224
 225/**
 226 *      make_kuid - Map a user-namespace uid pair into a kuid.
 227 *      @ns:  User namespace that the uid is in
 228 *      @uid: User identifier
 229 *
 230 *      Maps a user-namespace uid pair into a kernel internal kuid,
 231 *      and returns that kuid.
 232 *
 233 *      When there is no mapping defined for the user-namespace uid
 234 *      pair INVALID_UID is returned.  Callers are expected to test
 235 *      for and handle INVALID_UID being returned.  INVALID_UID
 236 *      may be tested for using uid_valid().
 237 */
 238kuid_t make_kuid(struct user_namespace *ns, uid_t uid)
 239{
 240        /* Map the uid to a global kernel uid */
 241        return KUIDT_INIT(map_id_down(&ns->uid_map, uid));
 242}
 243EXPORT_SYMBOL(make_kuid);
 244
 245/**
 246 *      from_kuid - Create a uid from a kuid user-namespace pair.
 247 *      @targ: The user namespace we want a uid in.
 248 *      @kuid: The kernel internal uid to start with.
 249 *
 250 *      Map @kuid into the user-namespace specified by @targ and
 251 *      return the resulting uid.
 252 *
 253 *      There is always a mapping into the initial user_namespace.
 254 *
 255 *      If @kuid has no mapping in @targ (uid_t)-1 is returned.
 256 */
 257uid_t from_kuid(struct user_namespace *targ, kuid_t kuid)
 258{
 259        /* Map the uid from a global kernel uid */
 260        return map_id_up(&targ->uid_map, __kuid_val(kuid));
 261}
 262EXPORT_SYMBOL(from_kuid);
 263
 264/**
 265 *      from_kuid_munged - Create a uid from a kuid user-namespace pair.
 266 *      @targ: The user namespace we want a uid in.
 267 *      @kuid: The kernel internal uid to start with.
 268 *
 269 *      Map @kuid into the user-namespace specified by @targ and
 270 *      return the resulting uid.
 271 *
 272 *      There is always a mapping into the initial user_namespace.
 273 *
 274 *      Unlike from_kuid from_kuid_munged never fails and always
 275 *      returns a valid uid.  This makes from_kuid_munged appropriate
 276 *      for use in syscalls like stat and getuid where failing the
 277 *      system call and failing to provide a valid uid are not an
 278 *      options.
 279 *
 280 *      If @kuid has no mapping in @targ overflowuid is returned.
 281 */
 282uid_t from_kuid_munged(struct user_namespace *targ, kuid_t kuid)
 283{
 284        uid_t uid;
 285        uid = from_kuid(targ, kuid);
 286
 287        if (uid == (uid_t) -1)
 288                uid = overflowuid;
 289        return uid;
 290}
 291EXPORT_SYMBOL(from_kuid_munged);
 292
 293/**
 294 *      make_kgid - Map a user-namespace gid pair into a kgid.
 295 *      @ns:  User namespace that the gid is in
 296 *      @gid: group identifier
 297 *
 298 *      Maps a user-namespace gid pair into a kernel internal kgid,
 299 *      and returns that kgid.
 300 *
 301 *      When there is no mapping defined for the user-namespace gid
 302 *      pair INVALID_GID is returned.  Callers are expected to test
 303 *      for and handle INVALID_GID being returned.  INVALID_GID may be
 304 *      tested for using gid_valid().
 305 */
 306kgid_t make_kgid(struct user_namespace *ns, gid_t gid)
 307{
 308        /* Map the gid to a global kernel gid */
 309        return KGIDT_INIT(map_id_down(&ns->gid_map, gid));
 310}
 311EXPORT_SYMBOL(make_kgid);
 312
 313/**
 314 *      from_kgid - Create a gid from a kgid user-namespace pair.
 315 *      @targ: The user namespace we want a gid in.
 316 *      @kgid: The kernel internal gid to start with.
 317 *
 318 *      Map @kgid into the user-namespace specified by @targ and
 319 *      return the resulting gid.
 320 *
 321 *      There is always a mapping into the initial user_namespace.
 322 *
 323 *      If @kgid has no mapping in @targ (gid_t)-1 is returned.
 324 */
 325gid_t from_kgid(struct user_namespace *targ, kgid_t kgid)
 326{
 327        /* Map the gid from a global kernel gid */
 328        return map_id_up(&targ->gid_map, __kgid_val(kgid));
 329}
 330EXPORT_SYMBOL(from_kgid);
 331
 332/**
 333 *      from_kgid_munged - Create a gid from a kgid user-namespace pair.
 334 *      @targ: The user namespace we want a gid in.
 335 *      @kgid: The kernel internal gid to start with.
 336 *
 337 *      Map @kgid into the user-namespace specified by @targ and
 338 *      return the resulting gid.
 339 *
 340 *      There is always a mapping into the initial user_namespace.
 341 *
 342 *      Unlike from_kgid from_kgid_munged never fails and always
 343 *      returns a valid gid.  This makes from_kgid_munged appropriate
 344 *      for use in syscalls like stat and getgid where failing the
 345 *      system call and failing to provide a valid gid are not options.
 346 *
 347 *      If @kgid has no mapping in @targ overflowgid is returned.
 348 */
 349gid_t from_kgid_munged(struct user_namespace *targ, kgid_t kgid)
 350{
 351        gid_t gid;
 352        gid = from_kgid(targ, kgid);
 353
 354        if (gid == (gid_t) -1)
 355                gid = overflowgid;
 356        return gid;
 357}
 358EXPORT_SYMBOL(from_kgid_munged);
 359
 360/**
 361 *      make_kprojid - Map a user-namespace projid pair into a kprojid.
 362 *      @ns:  User namespace that the projid is in
 363 *      @projid: Project identifier
 364 *
 365 *      Maps a user-namespace uid pair into a kernel internal kuid,
 366 *      and returns that kuid.
 367 *
 368 *      When there is no mapping defined for the user-namespace projid
 369 *      pair INVALID_PROJID is returned.  Callers are expected to test
 370 *      for and handle handle INVALID_PROJID being returned.  INVALID_PROJID
 371 *      may be tested for using projid_valid().
 372 */
 373kprojid_t make_kprojid(struct user_namespace *ns, projid_t projid)
 374{
 375        /* Map the uid to a global kernel uid */
 376        return KPROJIDT_INIT(map_id_down(&ns->projid_map, projid));
 377}
 378EXPORT_SYMBOL(make_kprojid);
 379
 380/**
 381 *      from_kprojid - Create a projid from a kprojid user-namespace pair.
 382 *      @targ: The user namespace we want a projid in.
 383 *      @kprojid: The kernel internal project identifier to start with.
 384 *
 385 *      Map @kprojid into the user-namespace specified by @targ and
 386 *      return the resulting projid.
 387 *
 388 *      There is always a mapping into the initial user_namespace.
 389 *
 390 *      If @kprojid has no mapping in @targ (projid_t)-1 is returned.
 391 */
 392projid_t from_kprojid(struct user_namespace *targ, kprojid_t kprojid)
 393{
 394        /* Map the uid from a global kernel uid */
 395        return map_id_up(&targ->projid_map, __kprojid_val(kprojid));
 396}
 397EXPORT_SYMBOL(from_kprojid);
 398
 399/**
 400 *      from_kprojid_munged - Create a projiid from a kprojid user-namespace pair.
 401 *      @targ: The user namespace we want a projid in.
 402 *      @kprojid: The kernel internal projid to start with.
 403 *
 404 *      Map @kprojid into the user-namespace specified by @targ and
 405 *      return the resulting projid.
 406 *
 407 *      There is always a mapping into the initial user_namespace.
 408 *
 409 *      Unlike from_kprojid from_kprojid_munged never fails and always
 410 *      returns a valid projid.  This makes from_kprojid_munged
 411 *      appropriate for use in syscalls like stat and where
 412 *      failing the system call and failing to provide a valid projid are
 413 *      not an options.
 414 *
 415 *      If @kprojid has no mapping in @targ OVERFLOW_PROJID is returned.
 416 */
 417projid_t from_kprojid_munged(struct user_namespace *targ, kprojid_t kprojid)
 418{
 419        projid_t projid;
 420        projid = from_kprojid(targ, kprojid);
 421
 422        if (projid == (projid_t) -1)
 423                projid = OVERFLOW_PROJID;
 424        return projid;
 425}
 426EXPORT_SYMBOL(from_kprojid_munged);
 427
 428
 429static int uid_m_show(struct seq_file *seq, void *v)
 430{
 431        struct user_namespace *ns = seq->private;
 432        struct uid_gid_extent *extent = v;
 433        struct user_namespace *lower_ns;
 434        uid_t lower;
 435
 436        lower_ns = seq_user_ns(seq);
 437        if ((lower_ns == ns) && lower_ns->parent)
 438                lower_ns = lower_ns->parent;
 439
 440        lower = from_kuid(lower_ns, KUIDT_INIT(extent->lower_first));
 441
 442        seq_printf(seq, "%10u %10u %10u\n",
 443                extent->first,
 444                lower,
 445                extent->count);
 446
 447        return 0;
 448}
 449
 450static int gid_m_show(struct seq_file *seq, void *v)
 451{
 452        struct user_namespace *ns = seq->private;
 453        struct uid_gid_extent *extent = v;
 454        struct user_namespace *lower_ns;
 455        gid_t lower;
 456
 457        lower_ns = seq_user_ns(seq);
 458        if ((lower_ns == ns) && lower_ns->parent)
 459                lower_ns = lower_ns->parent;
 460
 461        lower = from_kgid(lower_ns, KGIDT_INIT(extent->lower_first));
 462
 463        seq_printf(seq, "%10u %10u %10u\n",
 464                extent->first,
 465                lower,
 466                extent->count);
 467
 468        return 0;
 469}
 470
 471static int projid_m_show(struct seq_file *seq, void *v)
 472{
 473        struct user_namespace *ns = seq->private;
 474        struct uid_gid_extent *extent = v;
 475        struct user_namespace *lower_ns;
 476        projid_t lower;
 477
 478        lower_ns = seq_user_ns(seq);
 479        if ((lower_ns == ns) && lower_ns->parent)
 480                lower_ns = lower_ns->parent;
 481
 482        lower = from_kprojid(lower_ns, KPROJIDT_INIT(extent->lower_first));
 483
 484        seq_printf(seq, "%10u %10u %10u\n",
 485                extent->first,
 486                lower,
 487                extent->count);
 488
 489        return 0;
 490}
 491
 492static void *m_start(struct seq_file *seq, loff_t *ppos,
 493                     struct uid_gid_map *map)
 494{
 495        struct uid_gid_extent *extent = NULL;
 496        loff_t pos = *ppos;
 497
 498        if (pos < map->nr_extents)
 499                extent = &map->extent[pos];
 500
 501        return extent;
 502}
 503
 504static void *uid_m_start(struct seq_file *seq, loff_t *ppos)
 505{
 506        struct user_namespace *ns = seq->private;
 507
 508        return m_start(seq, ppos, &ns->uid_map);
 509}
 510
 511static void *gid_m_start(struct seq_file *seq, loff_t *ppos)
 512{
 513        struct user_namespace *ns = seq->private;
 514
 515        return m_start(seq, ppos, &ns->gid_map);
 516}
 517
 518static void *projid_m_start(struct seq_file *seq, loff_t *ppos)
 519{
 520        struct user_namespace *ns = seq->private;
 521
 522        return m_start(seq, ppos, &ns->projid_map);
 523}
 524
 525static void *m_next(struct seq_file *seq, void *v, loff_t *pos)
 526{
 527        (*pos)++;
 528        return seq->op->start(seq, pos);
 529}
 530
 531static void m_stop(struct seq_file *seq, void *v)
 532{
 533        return;
 534}
 535
 536const struct seq_operations proc_uid_seq_operations = {
 537        .start = uid_m_start,
 538        .stop = m_stop,
 539        .next = m_next,
 540        .show = uid_m_show,
 541};
 542
 543const struct seq_operations proc_gid_seq_operations = {
 544        .start = gid_m_start,
 545        .stop = m_stop,
 546        .next = m_next,
 547        .show = gid_m_show,
 548};
 549
 550const struct seq_operations proc_projid_seq_operations = {
 551        .start = projid_m_start,
 552        .stop = m_stop,
 553        .next = m_next,
 554        .show = projid_m_show,
 555};
 556
 557static bool mappings_overlap(struct uid_gid_map *new_map,
 558                             struct uid_gid_extent *extent)
 559{
 560        u32 upper_first, lower_first, upper_last, lower_last;
 561        unsigned idx;
 562
 563        upper_first = extent->first;
 564        lower_first = extent->lower_first;
 565        upper_last = upper_first + extent->count - 1;
 566        lower_last = lower_first + extent->count - 1;
 567
 568        for (idx = 0; idx < new_map->nr_extents; idx++) {
 569                u32 prev_upper_first, prev_lower_first;
 570                u32 prev_upper_last, prev_lower_last;
 571                struct uid_gid_extent *prev;
 572
 573                prev = &new_map->extent[idx];
 574
 575                prev_upper_first = prev->first;
 576                prev_lower_first = prev->lower_first;
 577                prev_upper_last = prev_upper_first + prev->count - 1;
 578                prev_lower_last = prev_lower_first + prev->count - 1;
 579
 580                /* Does the upper range intersect a previous extent? */
 581                if ((prev_upper_first <= upper_last) &&
 582                    (prev_upper_last >= upper_first))
 583                        return true;
 584
 585                /* Does the lower range intersect a previous extent? */
 586                if ((prev_lower_first <= lower_last) &&
 587                    (prev_lower_last >= lower_first))
 588                        return true;
 589        }
 590        return false;
 591}
 592
 593static ssize_t map_write(struct file *file, const char __user *buf,
 594                         size_t count, loff_t *ppos,
 595                         int cap_setid,
 596                         struct uid_gid_map *map,
 597                         struct uid_gid_map *parent_map)
 598{
 599        struct seq_file *seq = file->private_data;
 600        struct user_namespace *ns = seq->private;
 601        struct uid_gid_map new_map;
 602        unsigned idx;
 603        struct uid_gid_extent *extent = NULL;
 604        unsigned long page = 0;
 605        char *kbuf, *pos, *next_line;
 606        ssize_t ret = -EINVAL;
 607
 608        /*
 609         * The userns_state_mutex serializes all writes to any given map.
 610         *
 611         * Any map is only ever written once.
 612         *
 613         * An id map fits within 1 cache line on most architectures.
 614         *
 615         * On read nothing needs to be done unless you are on an
 616         * architecture with a crazy cache coherency model like alpha.
 617         *
 618         * There is a one time data dependency between reading the
 619         * count of the extents and the values of the extents.  The
 620         * desired behavior is to see the values of the extents that
 621         * were written before the count of the extents.
 622         *
 623         * To achieve this smp_wmb() is used on guarantee the write
 624         * order and smp_rmb() is guaranteed that we don't have crazy
 625         * architectures returning stale data.
 626         */
 627        mutex_lock(&userns_state_mutex);
 628
 629        ret = -EPERM;
 630        /* Only allow one successful write to the map */
 631        if (map->nr_extents != 0)
 632                goto out;
 633
 634        /*
 635         * Adjusting namespace settings requires capabilities on the target.
 636         */
 637        if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN))
 638                goto out;
 639
 640        /* Get a buffer */
 641        ret = -ENOMEM;
 642        page = __get_free_page(GFP_TEMPORARY);
 643        kbuf = (char *) page;
 644        if (!page)
 645                goto out;
 646
 647        /* Only allow < page size writes at the beginning of the file */
 648        ret = -EINVAL;
 649        if ((*ppos != 0) || (count >= PAGE_SIZE))
 650                goto out;
 651
 652        /* Slurp in the user data */
 653        ret = -EFAULT;
 654        if (copy_from_user(kbuf, buf, count))
 655                goto out;
 656        kbuf[count] = '\0';
 657
 658        /* Parse the user data */
 659        ret = -EINVAL;
 660        pos = kbuf;
 661        new_map.nr_extents = 0;
 662        for (; pos; pos = next_line) {
 663                extent = &new_map.extent[new_map.nr_extents];
 664
 665                /* Find the end of line and ensure I don't look past it */
 666                next_line = strchr(pos, '\n');
 667                if (next_line) {
 668                        *next_line = '\0';
 669                        next_line++;
 670                        if (*next_line == '\0')
 671                                next_line = NULL;
 672                }
 673
 674                pos = skip_spaces(pos);
 675                extent->first = simple_strtoul(pos, &pos, 10);
 676                if (!isspace(*pos))
 677                        goto out;
 678
 679                pos = skip_spaces(pos);
 680                extent->lower_first = simple_strtoul(pos, &pos, 10);
 681                if (!isspace(*pos))
 682                        goto out;
 683
 684                pos = skip_spaces(pos);
 685                extent->count = simple_strtoul(pos, &pos, 10);
 686                if (*pos && !isspace(*pos))
 687                        goto out;
 688
 689                /* Verify there is not trailing junk on the line */
 690                pos = skip_spaces(pos);
 691                if (*pos != '\0')
 692                        goto out;
 693
 694                /* Verify we have been given valid starting values */
 695                if ((extent->first == (u32) -1) ||
 696                    (extent->lower_first == (u32) -1))
 697                        goto out;
 698
 699                /* Verify count is not zero and does not cause the
 700                 * extent to wrap
 701                 */
 702                if ((extent->first + extent->count) <= extent->first)
 703                        goto out;
 704                if ((extent->lower_first + extent->count) <=
 705                     extent->lower_first)
 706                        goto out;
 707
 708                /* Do the ranges in extent overlap any previous extents? */
 709                if (mappings_overlap(&new_map, extent))
 710                        goto out;
 711
 712                new_map.nr_extents++;
 713
 714                /* Fail if the file contains too many extents */
 715                if ((new_map.nr_extents == UID_GID_MAP_MAX_EXTENTS) &&
 716                    (next_line != NULL))
 717                        goto out;
 718        }
 719        /* Be very certaint the new map actually exists */
 720        if (new_map.nr_extents == 0)
 721                goto out;
 722
 723        ret = -EPERM;
 724        /* Validate the user is allowed to use user id's mapped to. */
 725        if (!new_idmap_permitted(file, ns, cap_setid, &new_map))
 726                goto out;
 727
 728        /* Map the lower ids from the parent user namespace to the
 729         * kernel global id space.
 730         */
 731        for (idx = 0; idx < new_map.nr_extents; idx++) {
 732                u32 lower_first;
 733                extent = &new_map.extent[idx];
 734
 735                lower_first = map_id_range_down(parent_map,
 736                                                extent->lower_first,
 737                                                extent->count);
 738
 739                /* Fail if we can not map the specified extent to
 740                 * the kernel global id space.
 741                 */
 742                if (lower_first == (u32) -1)
 743                        goto out;
 744
 745                extent->lower_first = lower_first;
 746        }
 747
 748        /* Install the map */
 749        memcpy(map->extent, new_map.extent,
 750                new_map.nr_extents*sizeof(new_map.extent[0]));
 751        smp_wmb();
 752        map->nr_extents = new_map.nr_extents;
 753
 754        *ppos = count;
 755        ret = count;
 756out:
 757        mutex_unlock(&userns_state_mutex);
 758        if (page)
 759                free_page(page);
 760        return ret;
 761}
 762
 763ssize_t proc_uid_map_write(struct file *file, const char __user *buf,
 764                           size_t size, loff_t *ppos)
 765{
 766        struct seq_file *seq = file->private_data;
 767        struct user_namespace *ns = seq->private;
 768        struct user_namespace *seq_ns = seq_user_ns(seq);
 769
 770        if (!ns->parent)
 771                return -EPERM;
 772
 773        if ((seq_ns != ns) && (seq_ns != ns->parent))
 774                return -EPERM;
 775
 776        return map_write(file, buf, size, ppos, CAP_SETUID,
 777                         &ns->uid_map, &ns->parent->uid_map);
 778}
 779
 780ssize_t proc_gid_map_write(struct file *file, const char __user *buf,
 781                           size_t size, loff_t *ppos)
 782{
 783        struct seq_file *seq = file->private_data;
 784        struct user_namespace *ns = seq->private;
 785        struct user_namespace *seq_ns = seq_user_ns(seq);
 786
 787        if (!ns->parent)
 788                return -EPERM;
 789
 790        if ((seq_ns != ns) && (seq_ns != ns->parent))
 791                return -EPERM;
 792
 793        return map_write(file, buf, size, ppos, CAP_SETGID,
 794                         &ns->gid_map, &ns->parent->gid_map);
 795}
 796
 797ssize_t proc_projid_map_write(struct file *file, const char __user *buf,
 798                              size_t size, loff_t *ppos)
 799{
 800        struct seq_file *seq = file->private_data;
 801        struct user_namespace *ns = seq->private;
 802        struct user_namespace *seq_ns = seq_user_ns(seq);
 803
 804        if (!ns->parent)
 805                return -EPERM;
 806
 807        if ((seq_ns != ns) && (seq_ns != ns->parent))
 808                return -EPERM;
 809
 810        /* Anyone can set any valid project id no capability needed */
 811        return map_write(file, buf, size, ppos, -1,
 812                         &ns->projid_map, &ns->parent->projid_map);
 813}
 814
 815static bool new_idmap_permitted(const struct file *file,
 816                                struct user_namespace *ns, int cap_setid,
 817                                struct uid_gid_map *new_map)
 818{
 819        const struct cred *cred = file->f_cred;
 820        /* Don't allow mappings that would allow anything that wouldn't
 821         * be allowed without the establishment of unprivileged mappings.
 822         */
 823        if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) &&
 824            uid_eq(ns->owner, cred->euid)) {
 825                u32 id = new_map->extent[0].lower_first;
 826                if (cap_setid == CAP_SETUID) {
 827                        kuid_t uid = make_kuid(ns->parent, id);
 828                        if (uid_eq(uid, cred->euid))
 829                                return true;
 830                } else if (cap_setid == CAP_SETGID) {
 831                        kgid_t gid = make_kgid(ns->parent, id);
 832                        if (!(ns->flags & USERNS_SETGROUPS_ALLOWED) &&
 833                            gid_eq(gid, cred->egid))
 834                                return true;
 835                }
 836        }
 837
 838        /* Allow anyone to set a mapping that doesn't require privilege */
 839        if (!cap_valid(cap_setid))
 840                return true;
 841
 842        /* Allow the specified ids if we have the appropriate capability
 843         * (CAP_SETUID or CAP_SETGID) over the parent user namespace.
 844         * And the opener of the id file also had the approprpiate capability.
 845         */
 846        if (ns_capable(ns->parent, cap_setid) &&
 847            file_ns_capable(file, ns->parent, cap_setid))
 848                return true;
 849
 850        return false;
 851}
 852
 853int proc_setgroups_show(struct seq_file *seq, void *v)
 854{
 855        struct user_namespace *ns = seq->private;
 856        unsigned long userns_flags = ACCESS_ONCE(ns->flags);
 857
 858        seq_printf(seq, "%s\n",
 859                   (userns_flags & USERNS_SETGROUPS_ALLOWED) ?
 860                   "allow" : "deny");
 861        return 0;
 862}
 863
 864ssize_t proc_setgroups_write(struct file *file, const char __user *buf,
 865                             size_t count, loff_t *ppos)
 866{
 867        struct seq_file *seq = file->private_data;
 868        struct user_namespace *ns = seq->private;
 869        char kbuf[8], *pos;
 870        bool setgroups_allowed;
 871        ssize_t ret;
 872
 873        /* Only allow a very narrow range of strings to be written */
 874        ret = -EINVAL;
 875        if ((*ppos != 0) || (count >= sizeof(kbuf)))
 876                goto out;
 877
 878        /* What was written? */
 879        ret = -EFAULT;
 880        if (copy_from_user(kbuf, buf, count))
 881                goto out;
 882        kbuf[count] = '\0';
 883        pos = kbuf;
 884
 885        /* What is being requested? */
 886        ret = -EINVAL;
 887        if (strncmp(pos, "allow", 5) == 0) {
 888                pos += 5;
 889                setgroups_allowed = true;
 890        }
 891        else if (strncmp(pos, "deny", 4) == 0) {
 892                pos += 4;
 893                setgroups_allowed = false;
 894        }
 895        else
 896                goto out;
 897
 898        /* Verify there is not trailing junk on the line */
 899        pos = skip_spaces(pos);
 900        if (*pos != '\0')
 901                goto out;
 902
 903        ret = -EPERM;
 904        mutex_lock(&userns_state_mutex);
 905        if (setgroups_allowed) {
 906                /* Enabling setgroups after setgroups has been disabled
 907                 * is not allowed.
 908                 */
 909                if (!(ns->flags & USERNS_SETGROUPS_ALLOWED))
 910                        goto out_unlock;
 911        } else {
 912                /* Permanently disabling setgroups after setgroups has
 913                 * been enabled by writing the gid_map is not allowed.
 914                 */
 915                if (ns->gid_map.nr_extents != 0)
 916                        goto out_unlock;
 917                ns->flags &= ~USERNS_SETGROUPS_ALLOWED;
 918        }
 919        mutex_unlock(&userns_state_mutex);
 920
 921        /* Report a successful write */
 922        *ppos = count;
 923        ret = count;
 924out:
 925        return ret;
 926out_unlock:
 927        mutex_unlock(&userns_state_mutex);
 928        goto out;
 929}
 930
 931bool userns_may_setgroups(const struct user_namespace *ns)
 932{
 933        bool allowed;
 934
 935        mutex_lock(&userns_state_mutex);
 936        /* It is not safe to use setgroups until a gid mapping in
 937         * the user namespace has been established.
 938         */
 939        allowed = ns->gid_map.nr_extents != 0;
 940        /* Is setgroups allowed? */
 941        allowed = allowed && (ns->flags & USERNS_SETGROUPS_ALLOWED);
 942        mutex_unlock(&userns_state_mutex);
 943
 944        return allowed;
 945}
 946
 947static inline struct user_namespace *to_user_ns(struct ns_common *ns)
 948{
 949        return container_of(ns, struct user_namespace, ns);
 950}
 951
 952static struct ns_common *userns_get(struct task_struct *task)
 953{
 954        struct user_namespace *user_ns;
 955
 956        rcu_read_lock();
 957        user_ns = get_user_ns(__task_cred(task)->user_ns);
 958        rcu_read_unlock();
 959
 960        return user_ns ? &user_ns->ns : NULL;
 961}
 962
 963static void userns_put(struct ns_common *ns)
 964{
 965        put_user_ns(to_user_ns(ns));
 966}
 967
 968static int userns_install(struct nsproxy *nsproxy, struct ns_common *ns)
 969{
 970        struct user_namespace *user_ns = to_user_ns(ns);
 971        struct cred *cred;
 972
 973        /* Don't allow gaining capabilities by reentering
 974         * the same user namespace.
 975         */
 976        if (user_ns == current_user_ns())
 977                return -EINVAL;
 978
 979        /* Threaded processes may not enter a different user namespace */
 980        if (atomic_read(&current->mm->mm_users) > 1)
 981                return -EINVAL;
 982
 983        if (current->fs->users != 1)
 984                return -EINVAL;
 985
 986        if (!ns_capable(user_ns, CAP_SYS_ADMIN))
 987                return -EPERM;
 988
 989        cred = prepare_creds();
 990        if (!cred)
 991                return -ENOMEM;
 992
 993        put_user_ns(cred->user_ns);
 994        set_cred_user_ns(cred, get_user_ns(user_ns));
 995
 996        return commit_creds(cred);
 997}
 998
 999const struct proc_ns_operations userns_operations = {
1000        .name           = "user",
1001        .type           = CLONE_NEWUSER,
1002        .get            = userns_get,
1003        .put            = userns_put,
1004        .install        = userns_install,
1005};
1006
1007static __init int user_namespaces_init(void)
1008{
1009        user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC);
1010        return 0;
1011}
1012subsys_initcall(user_namespaces_init);
1013