linux/kernel/user_namespace.c
<<
>>
Prefs
   1/*
   2 *  This program is free software; you can redistribute it and/or
   3 *  modify it under the terms of the GNU General Public License as
   4 *  published by the Free Software Foundation, version 2 of the
   5 *  License.
   6 */
   7
   8#include <linux/export.h>
   9#include <linux/nsproxy.h>
  10#include <linux/slab.h>
  11#include <linux/user_namespace.h>
  12#include <linux/proc_ns.h>
  13#include <linux/highuid.h>
  14#include <linux/cred.h>
  15#include <linux/securebits.h>
  16#include <linux/keyctl.h>
  17#include <linux/key-type.h>
  18#include <keys/user-type.h>
  19#include <linux/seq_file.h>
  20#include <linux/fs.h>
  21#include <linux/uaccess.h>
  22#include <linux/ctype.h>
  23#include <linux/projid.h>
  24#include <linux/fs_struct.h>
  25
  26static struct kmem_cache *user_ns_cachep __read_mostly;
  27static DEFINE_MUTEX(userns_state_mutex);
  28
  29static bool new_idmap_permitted(const struct file *file,
  30                                struct user_namespace *ns, int cap_setid,
  31                                struct uid_gid_map *map);
  32
  33static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
  34{
  35        /* Start with the same capabilities as init but useless for doing
  36         * anything as the capabilities are bound to the new user namespace.
  37         */
  38        cred->securebits = SECUREBITS_DEFAULT;
  39        cred->cap_inheritable = CAP_EMPTY_SET;
  40        cred->cap_permitted = CAP_FULL_SET;
  41        cred->cap_effective = CAP_FULL_SET;
  42        cred->cap_ambient = CAP_EMPTY_SET;
  43        cred->cap_bset = CAP_FULL_SET;
  44#ifdef CONFIG_KEYS
  45        key_put(cred->request_key_auth);
  46        cred->request_key_auth = NULL;
  47#endif
  48        /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
  49        cred->user_ns = user_ns;
  50}
  51
  52/*
  53 * Create a new user namespace, deriving the creator from the user in the
  54 * passed credentials, and replacing that user with the new root user for the
  55 * new namespace.
  56 *
  57 * This is called by copy_creds(), which will finish setting the target task's
  58 * credentials.
  59 */
  60int create_user_ns(struct cred *new)
  61{
  62        struct user_namespace *ns, *parent_ns = new->user_ns;
  63        kuid_t owner = new->euid;
  64        kgid_t group = new->egid;
  65        int ret;
  66
  67        if (parent_ns->level > 32)
  68                return -EUSERS;
  69
  70        /*
  71         * Verify that we can not violate the policy of which files
  72         * may be accessed that is specified by the root directory,
  73         * by verifing that the root directory is at the root of the
  74         * mount namespace which allows all files to be accessed.
  75         */
  76        if (current_chrooted())
  77                return -EPERM;
  78
  79        /* The creator needs a mapping in the parent user namespace
  80         * or else we won't be able to reasonably tell userspace who
  81         * created a user_namespace.
  82         */
  83        if (!kuid_has_mapping(parent_ns, owner) ||
  84            !kgid_has_mapping(parent_ns, group))
  85                return -EPERM;
  86
  87        ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL);
  88        if (!ns)
  89                return -ENOMEM;
  90
  91        ret = ns_alloc_inum(&ns->ns);
  92        if (ret) {
  93                kmem_cache_free(user_ns_cachep, ns);
  94                return ret;
  95        }
  96        ns->ns.ops = &userns_operations;
  97
  98        atomic_set(&ns->count, 1);
  99        /* Leave the new->user_ns reference with the new user namespace. */
 100        ns->parent = parent_ns;
 101        ns->level = parent_ns->level + 1;
 102        ns->owner = owner;
 103        ns->group = group;
 104
 105        /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
 106        mutex_lock(&userns_state_mutex);
 107        ns->flags = parent_ns->flags;
 108        mutex_unlock(&userns_state_mutex);
 109
 110        set_cred_user_ns(new, ns);
 111
 112#ifdef CONFIG_PERSISTENT_KEYRINGS
 113        init_rwsem(&ns->persistent_keyring_register_sem);
 114#endif
 115        return 0;
 116}
 117
 118int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
 119{
 120        struct cred *cred;
 121        int err = -ENOMEM;
 122
 123        if (!(unshare_flags & CLONE_NEWUSER))
 124                return 0;
 125
 126        cred = prepare_creds();
 127        if (cred) {
 128                err = create_user_ns(cred);
 129                if (err)
 130                        put_cred(cred);
 131                else
 132                        *new_cred = cred;
 133        }
 134
 135        return err;
 136}
 137
 138void free_user_ns(struct user_namespace *ns)
 139{
 140        struct user_namespace *parent;
 141
 142        do {
 143                parent = ns->parent;
 144#ifdef CONFIG_PERSISTENT_KEYRINGS
 145                key_put(ns->persistent_keyring_register);
 146#endif
 147                ns_free_inum(&ns->ns);
 148                kmem_cache_free(user_ns_cachep, ns);
 149                ns = parent;
 150        } while (atomic_dec_and_test(&parent->count));
 151}
 152EXPORT_SYMBOL(free_user_ns);
 153
 154static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count)
 155{
 156        unsigned idx, extents;
 157        u32 first, last, id2;
 158
 159        id2 = id + count - 1;
 160
 161        /* Find the matching extent */
 162        extents = map->nr_extents;
 163        smp_rmb();
 164        for (idx = 0; idx < extents; idx++) {
 165                first = map->extent[idx].first;
 166                last = first + map->extent[idx].count - 1;
 167                if (id >= first && id <= last &&
 168                    (id2 >= first && id2 <= last))
 169                        break;
 170        }
 171        /* Map the id or note failure */
 172        if (idx < extents)
 173                id = (id - first) + map->extent[idx].lower_first;
 174        else
 175                id = (u32) -1;
 176
 177        return id;
 178}
 179
 180static u32 map_id_down(struct uid_gid_map *map, u32 id)
 181{
 182        unsigned idx, extents;
 183        u32 first, last;
 184
 185        /* Find the matching extent */
 186        extents = map->nr_extents;
 187        smp_rmb();
 188        for (idx = 0; idx < extents; idx++) {
 189                first = map->extent[idx].first;
 190                last = first + map->extent[idx].count - 1;
 191                if (id >= first && id <= last)
 192                        break;
 193        }
 194        /* Map the id or note failure */
 195        if (idx < extents)
 196                id = (id - first) + map->extent[idx].lower_first;
 197        else
 198                id = (u32) -1;
 199
 200        return id;
 201}
 202
 203static u32 map_id_up(struct uid_gid_map *map, u32 id)
 204{
 205        unsigned idx, extents;
 206        u32 first, last;
 207
 208        /* Find the matching extent */
 209        extents = map->nr_extents;
 210        smp_rmb();
 211        for (idx = 0; idx < extents; idx++) {
 212                first = map->extent[idx].lower_first;
 213                last = first + map->extent[idx].count - 1;
 214                if (id >= first && id <= last)
 215                        break;
 216        }
 217        /* Map the id or note failure */
 218        if (idx < extents)
 219                id = (id - first) + map->extent[idx].first;
 220        else
 221                id = (u32) -1;
 222
 223        return id;
 224}
 225
 226/**
 227 *      make_kuid - Map a user-namespace uid pair into a kuid.
 228 *      @ns:  User namespace that the uid is in
 229 *      @uid: User identifier
 230 *
 231 *      Maps a user-namespace uid pair into a kernel internal kuid,
 232 *      and returns that kuid.
 233 *
 234 *      When there is no mapping defined for the user-namespace uid
 235 *      pair INVALID_UID is returned.  Callers are expected to test
 236 *      for and handle INVALID_UID being returned.  INVALID_UID
 237 *      may be tested for using uid_valid().
 238 */
 239kuid_t make_kuid(struct user_namespace *ns, uid_t uid)
 240{
 241        /* Map the uid to a global kernel uid */
 242        return KUIDT_INIT(map_id_down(&ns->uid_map, uid));
 243}
 244EXPORT_SYMBOL(make_kuid);
 245
 246/**
 247 *      from_kuid - Create a uid from a kuid user-namespace pair.
 248 *      @targ: The user namespace we want a uid in.
 249 *      @kuid: The kernel internal uid to start with.
 250 *
 251 *      Map @kuid into the user-namespace specified by @targ and
 252 *      return the resulting uid.
 253 *
 254 *      There is always a mapping into the initial user_namespace.
 255 *
 256 *      If @kuid has no mapping in @targ (uid_t)-1 is returned.
 257 */
 258uid_t from_kuid(struct user_namespace *targ, kuid_t kuid)
 259{
 260        /* Map the uid from a global kernel uid */
 261        return map_id_up(&targ->uid_map, __kuid_val(kuid));
 262}
 263EXPORT_SYMBOL(from_kuid);
 264
 265/**
 266 *      from_kuid_munged - Create a uid from a kuid user-namespace pair.
 267 *      @targ: The user namespace we want a uid in.
 268 *      @kuid: The kernel internal uid to start with.
 269 *
 270 *      Map @kuid into the user-namespace specified by @targ and
 271 *      return the resulting uid.
 272 *
 273 *      There is always a mapping into the initial user_namespace.
 274 *
 275 *      Unlike from_kuid from_kuid_munged never fails and always
 276 *      returns a valid uid.  This makes from_kuid_munged appropriate
 277 *      for use in syscalls like stat and getuid where failing the
 278 *      system call and failing to provide a valid uid are not an
 279 *      options.
 280 *
 281 *      If @kuid has no mapping in @targ overflowuid is returned.
 282 */
 283uid_t from_kuid_munged(struct user_namespace *targ, kuid_t kuid)
 284{
 285        uid_t uid;
 286        uid = from_kuid(targ, kuid);
 287
 288        if (uid == (uid_t) -1)
 289                uid = overflowuid;
 290        return uid;
 291}
 292EXPORT_SYMBOL(from_kuid_munged);
 293
 294/**
 295 *      make_kgid - Map a user-namespace gid pair into a kgid.
 296 *      @ns:  User namespace that the gid is in
 297 *      @gid: group identifier
 298 *
 299 *      Maps a user-namespace gid pair into a kernel internal kgid,
 300 *      and returns that kgid.
 301 *
 302 *      When there is no mapping defined for the user-namespace gid
 303 *      pair INVALID_GID is returned.  Callers are expected to test
 304 *      for and handle INVALID_GID being returned.  INVALID_GID may be
 305 *      tested for using gid_valid().
 306 */
 307kgid_t make_kgid(struct user_namespace *ns, gid_t gid)
 308{
 309        /* Map the gid to a global kernel gid */
 310        return KGIDT_INIT(map_id_down(&ns->gid_map, gid));
 311}
 312EXPORT_SYMBOL(make_kgid);
 313
 314/**
 315 *      from_kgid - Create a gid from a kgid user-namespace pair.
 316 *      @targ: The user namespace we want a gid in.
 317 *      @kgid: The kernel internal gid to start with.
 318 *
 319 *      Map @kgid into the user-namespace specified by @targ and
 320 *      return the resulting gid.
 321 *
 322 *      There is always a mapping into the initial user_namespace.
 323 *
 324 *      If @kgid has no mapping in @targ (gid_t)-1 is returned.
 325 */
 326gid_t from_kgid(struct user_namespace *targ, kgid_t kgid)
 327{
 328        /* Map the gid from a global kernel gid */
 329        return map_id_up(&targ->gid_map, __kgid_val(kgid));
 330}
 331EXPORT_SYMBOL(from_kgid);
 332
 333/**
 334 *      from_kgid_munged - Create a gid from a kgid user-namespace pair.
 335 *      @targ: The user namespace we want a gid in.
 336 *      @kgid: The kernel internal gid to start with.
 337 *
 338 *      Map @kgid into the user-namespace specified by @targ and
 339 *      return the resulting gid.
 340 *
 341 *      There is always a mapping into the initial user_namespace.
 342 *
 343 *      Unlike from_kgid from_kgid_munged never fails and always
 344 *      returns a valid gid.  This makes from_kgid_munged appropriate
 345 *      for use in syscalls like stat and getgid where failing the
 346 *      system call and failing to provide a valid gid are not options.
 347 *
 348 *      If @kgid has no mapping in @targ overflowgid is returned.
 349 */
 350gid_t from_kgid_munged(struct user_namespace *targ, kgid_t kgid)
 351{
 352        gid_t gid;
 353        gid = from_kgid(targ, kgid);
 354
 355        if (gid == (gid_t) -1)
 356                gid = overflowgid;
 357        return gid;
 358}
 359EXPORT_SYMBOL(from_kgid_munged);
 360
 361/**
 362 *      make_kprojid - Map a user-namespace projid pair into a kprojid.
 363 *      @ns:  User namespace that the projid is in
 364 *      @projid: Project identifier
 365 *
 366 *      Maps a user-namespace uid pair into a kernel internal kuid,
 367 *      and returns that kuid.
 368 *
 369 *      When there is no mapping defined for the user-namespace projid
 370 *      pair INVALID_PROJID is returned.  Callers are expected to test
 371 *      for and handle handle INVALID_PROJID being returned.  INVALID_PROJID
 372 *      may be tested for using projid_valid().
 373 */
 374kprojid_t make_kprojid(struct user_namespace *ns, projid_t projid)
 375{
 376        /* Map the uid to a global kernel uid */
 377        return KPROJIDT_INIT(map_id_down(&ns->projid_map, projid));
 378}
 379EXPORT_SYMBOL(make_kprojid);
 380
 381/**
 382 *      from_kprojid - Create a projid from a kprojid user-namespace pair.
 383 *      @targ: The user namespace we want a projid in.
 384 *      @kprojid: The kernel internal project identifier to start with.
 385 *
 386 *      Map @kprojid into the user-namespace specified by @targ and
 387 *      return the resulting projid.
 388 *
 389 *      There is always a mapping into the initial user_namespace.
 390 *
 391 *      If @kprojid has no mapping in @targ (projid_t)-1 is returned.
 392 */
 393projid_t from_kprojid(struct user_namespace *targ, kprojid_t kprojid)
 394{
 395        /* Map the uid from a global kernel uid */
 396        return map_id_up(&targ->projid_map, __kprojid_val(kprojid));
 397}
 398EXPORT_SYMBOL(from_kprojid);
 399
 400/**
 401 *      from_kprojid_munged - Create a projiid from a kprojid user-namespace pair.
 402 *      @targ: The user namespace we want a projid in.
 403 *      @kprojid: The kernel internal projid to start with.
 404 *
 405 *      Map @kprojid into the user-namespace specified by @targ and
 406 *      return the resulting projid.
 407 *
 408 *      There is always a mapping into the initial user_namespace.
 409 *
 410 *      Unlike from_kprojid from_kprojid_munged never fails and always
 411 *      returns a valid projid.  This makes from_kprojid_munged
 412 *      appropriate for use in syscalls like stat and where
 413 *      failing the system call and failing to provide a valid projid are
 414 *      not an options.
 415 *
 416 *      If @kprojid has no mapping in @targ OVERFLOW_PROJID is returned.
 417 */
 418projid_t from_kprojid_munged(struct user_namespace *targ, kprojid_t kprojid)
 419{
 420        projid_t projid;
 421        projid = from_kprojid(targ, kprojid);
 422
 423        if (projid == (projid_t) -1)
 424                projid = OVERFLOW_PROJID;
 425        return projid;
 426}
 427EXPORT_SYMBOL(from_kprojid_munged);
 428
 429
 430static int uid_m_show(struct seq_file *seq, void *v)
 431{
 432        struct user_namespace *ns = seq->private;
 433        struct uid_gid_extent *extent = v;
 434        struct user_namespace *lower_ns;
 435        uid_t lower;
 436
 437        lower_ns = seq_user_ns(seq);
 438        if ((lower_ns == ns) && lower_ns->parent)
 439                lower_ns = lower_ns->parent;
 440
 441        lower = from_kuid(lower_ns, KUIDT_INIT(extent->lower_first));
 442
 443        seq_printf(seq, "%10u %10u %10u\n",
 444                extent->first,
 445                lower,
 446                extent->count);
 447
 448        return 0;
 449}
 450
 451static int gid_m_show(struct seq_file *seq, void *v)
 452{
 453        struct user_namespace *ns = seq->private;
 454        struct uid_gid_extent *extent = v;
 455        struct user_namespace *lower_ns;
 456        gid_t lower;
 457
 458        lower_ns = seq_user_ns(seq);
 459        if ((lower_ns == ns) && lower_ns->parent)
 460                lower_ns = lower_ns->parent;
 461
 462        lower = from_kgid(lower_ns, KGIDT_INIT(extent->lower_first));
 463
 464        seq_printf(seq, "%10u %10u %10u\n",
 465                extent->first,
 466                lower,
 467                extent->count);
 468
 469        return 0;
 470}
 471
 472static int projid_m_show(struct seq_file *seq, void *v)
 473{
 474        struct user_namespace *ns = seq->private;
 475        struct uid_gid_extent *extent = v;
 476        struct user_namespace *lower_ns;
 477        projid_t lower;
 478
 479        lower_ns = seq_user_ns(seq);
 480        if ((lower_ns == ns) && lower_ns->parent)
 481                lower_ns = lower_ns->parent;
 482
 483        lower = from_kprojid(lower_ns, KPROJIDT_INIT(extent->lower_first));
 484
 485        seq_printf(seq, "%10u %10u %10u\n",
 486                extent->first,
 487                lower,
 488                extent->count);
 489
 490        return 0;
 491}
 492
 493static void *m_start(struct seq_file *seq, loff_t *ppos,
 494                     struct uid_gid_map *map)
 495{
 496        struct uid_gid_extent *extent = NULL;
 497        loff_t pos = *ppos;
 498
 499        if (pos < map->nr_extents)
 500                extent = &map->extent[pos];
 501
 502        return extent;
 503}
 504
 505static void *uid_m_start(struct seq_file *seq, loff_t *ppos)
 506{
 507        struct user_namespace *ns = seq->private;
 508
 509        return m_start(seq, ppos, &ns->uid_map);
 510}
 511
 512static void *gid_m_start(struct seq_file *seq, loff_t *ppos)
 513{
 514        struct user_namespace *ns = seq->private;
 515
 516        return m_start(seq, ppos, &ns->gid_map);
 517}
 518
 519static void *projid_m_start(struct seq_file *seq, loff_t *ppos)
 520{
 521        struct user_namespace *ns = seq->private;
 522
 523        return m_start(seq, ppos, &ns->projid_map);
 524}
 525
 526static void *m_next(struct seq_file *seq, void *v, loff_t *pos)
 527{
 528        (*pos)++;
 529        return seq->op->start(seq, pos);
 530}
 531
 532static void m_stop(struct seq_file *seq, void *v)
 533{
 534        return;
 535}
 536
 537const struct seq_operations proc_uid_seq_operations = {
 538        .start = uid_m_start,
 539        .stop = m_stop,
 540        .next = m_next,
 541        .show = uid_m_show,
 542};
 543
 544const struct seq_operations proc_gid_seq_operations = {
 545        .start = gid_m_start,
 546        .stop = m_stop,
 547        .next = m_next,
 548        .show = gid_m_show,
 549};
 550
 551const struct seq_operations proc_projid_seq_operations = {
 552        .start = projid_m_start,
 553        .stop = m_stop,
 554        .next = m_next,
 555        .show = projid_m_show,
 556};
 557
 558static bool mappings_overlap(struct uid_gid_map *new_map,
 559                             struct uid_gid_extent *extent)
 560{
 561        u32 upper_first, lower_first, upper_last, lower_last;
 562        unsigned idx;
 563
 564        upper_first = extent->first;
 565        lower_first = extent->lower_first;
 566        upper_last = upper_first + extent->count - 1;
 567        lower_last = lower_first + extent->count - 1;
 568
 569        for (idx = 0; idx < new_map->nr_extents; idx++) {
 570                u32 prev_upper_first, prev_lower_first;
 571                u32 prev_upper_last, prev_lower_last;
 572                struct uid_gid_extent *prev;
 573
 574                prev = &new_map->extent[idx];
 575
 576                prev_upper_first = prev->first;
 577                prev_lower_first = prev->lower_first;
 578                prev_upper_last = prev_upper_first + prev->count - 1;
 579                prev_lower_last = prev_lower_first + prev->count - 1;
 580
 581                /* Does the upper range intersect a previous extent? */
 582                if ((prev_upper_first <= upper_last) &&
 583                    (prev_upper_last >= upper_first))
 584                        return true;
 585
 586                /* Does the lower range intersect a previous extent? */
 587                if ((prev_lower_first <= lower_last) &&
 588                    (prev_lower_last >= lower_first))
 589                        return true;
 590        }
 591        return false;
 592}
 593
 594static ssize_t map_write(struct file *file, const char __user *buf,
 595                         size_t count, loff_t *ppos,
 596                         int cap_setid,
 597                         struct uid_gid_map *map,
 598                         struct uid_gid_map *parent_map)
 599{
 600        struct seq_file *seq = file->private_data;
 601        struct user_namespace *ns = seq->private;
 602        struct uid_gid_map new_map;
 603        unsigned idx;
 604        struct uid_gid_extent *extent = NULL;
 605        char *kbuf = NULL, *pos, *next_line;
 606        ssize_t ret = -EINVAL;
 607
 608        /*
 609         * The userns_state_mutex serializes all writes to any given map.
 610         *
 611         * Any map is only ever written once.
 612         *
 613         * An id map fits within 1 cache line on most architectures.
 614         *
 615         * On read nothing needs to be done unless you are on an
 616         * architecture with a crazy cache coherency model like alpha.
 617         *
 618         * There is a one time data dependency between reading the
 619         * count of the extents and the values of the extents.  The
 620         * desired behavior is to see the values of the extents that
 621         * were written before the count of the extents.
 622         *
 623         * To achieve this smp_wmb() is used on guarantee the write
 624         * order and smp_rmb() is guaranteed that we don't have crazy
 625         * architectures returning stale data.
 626         */
 627        mutex_lock(&userns_state_mutex);
 628
 629        ret = -EPERM;
 630        /* Only allow one successful write to the map */
 631        if (map->nr_extents != 0)
 632                goto out;
 633
 634        /*
 635         * Adjusting namespace settings requires capabilities on the target.
 636         */
 637        if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN))
 638                goto out;
 639
 640        /* Only allow < page size writes at the beginning of the file */
 641        ret = -EINVAL;
 642        if ((*ppos != 0) || (count >= PAGE_SIZE))
 643                goto out;
 644
 645        /* Slurp in the user data */
 646        kbuf = memdup_user_nul(buf, count);
 647        if (IS_ERR(kbuf)) {
 648                ret = PTR_ERR(kbuf);
 649                kbuf = NULL;
 650                goto out;
 651        }
 652
 653        /* Parse the user data */
 654        ret = -EINVAL;
 655        pos = kbuf;
 656        new_map.nr_extents = 0;
 657        for (; pos; pos = next_line) {
 658                extent = &new_map.extent[new_map.nr_extents];
 659
 660                /* Find the end of line and ensure I don't look past it */
 661                next_line = strchr(pos, '\n');
 662                if (next_line) {
 663                        *next_line = '\0';
 664                        next_line++;
 665                        if (*next_line == '\0')
 666                                next_line = NULL;
 667                }
 668
 669                pos = skip_spaces(pos);
 670                extent->first = simple_strtoul(pos, &pos, 10);
 671                if (!isspace(*pos))
 672                        goto out;
 673
 674                pos = skip_spaces(pos);
 675                extent->lower_first = simple_strtoul(pos, &pos, 10);
 676                if (!isspace(*pos))
 677                        goto out;
 678
 679                pos = skip_spaces(pos);
 680                extent->count = simple_strtoul(pos, &pos, 10);
 681                if (*pos && !isspace(*pos))
 682                        goto out;
 683
 684                /* Verify there is not trailing junk on the line */
 685                pos = skip_spaces(pos);
 686                if (*pos != '\0')
 687                        goto out;
 688
 689                /* Verify we have been given valid starting values */
 690                if ((extent->first == (u32) -1) ||
 691                    (extent->lower_first == (u32) -1))
 692                        goto out;
 693
 694                /* Verify count is not zero and does not cause the
 695                 * extent to wrap
 696                 */
 697                if ((extent->first + extent->count) <= extent->first)
 698                        goto out;
 699                if ((extent->lower_first + extent->count) <=
 700                     extent->lower_first)
 701                        goto out;
 702
 703                /* Do the ranges in extent overlap any previous extents? */
 704                if (mappings_overlap(&new_map, extent))
 705                        goto out;
 706
 707                new_map.nr_extents++;
 708
 709                /* Fail if the file contains too many extents */
 710                if ((new_map.nr_extents == UID_GID_MAP_MAX_EXTENTS) &&
 711                    (next_line != NULL))
 712                        goto out;
 713        }
 714        /* Be very certaint the new map actually exists */
 715        if (new_map.nr_extents == 0)
 716                goto out;
 717
 718        ret = -EPERM;
 719        /* Validate the user is allowed to use user id's mapped to. */
 720        if (!new_idmap_permitted(file, ns, cap_setid, &new_map))
 721                goto out;
 722
 723        /* Map the lower ids from the parent user namespace to the
 724         * kernel global id space.
 725         */
 726        for (idx = 0; idx < new_map.nr_extents; idx++) {
 727                u32 lower_first;
 728                extent = &new_map.extent[idx];
 729
 730                lower_first = map_id_range_down(parent_map,
 731                                                extent->lower_first,
 732                                                extent->count);
 733
 734                /* Fail if we can not map the specified extent to
 735                 * the kernel global id space.
 736                 */
 737                if (lower_first == (u32) -1)
 738                        goto out;
 739
 740                extent->lower_first = lower_first;
 741        }
 742
 743        /* Install the map */
 744        memcpy(map->extent, new_map.extent,
 745                new_map.nr_extents*sizeof(new_map.extent[0]));
 746        smp_wmb();
 747        map->nr_extents = new_map.nr_extents;
 748
 749        *ppos = count;
 750        ret = count;
 751out:
 752        mutex_unlock(&userns_state_mutex);
 753        kfree(kbuf);
 754        return ret;
 755}
 756
 757ssize_t proc_uid_map_write(struct file *file, const char __user *buf,
 758                           size_t size, loff_t *ppos)
 759{
 760        struct seq_file *seq = file->private_data;
 761        struct user_namespace *ns = seq->private;
 762        struct user_namespace *seq_ns = seq_user_ns(seq);
 763
 764        if (!ns->parent)
 765                return -EPERM;
 766
 767        if ((seq_ns != ns) && (seq_ns != ns->parent))
 768                return -EPERM;
 769
 770        return map_write(file, buf, size, ppos, CAP_SETUID,
 771                         &ns->uid_map, &ns->parent->uid_map);
 772}
 773
 774ssize_t proc_gid_map_write(struct file *file, const char __user *buf,
 775                           size_t size, loff_t *ppos)
 776{
 777        struct seq_file *seq = file->private_data;
 778        struct user_namespace *ns = seq->private;
 779        struct user_namespace *seq_ns = seq_user_ns(seq);
 780
 781        if (!ns->parent)
 782                return -EPERM;
 783
 784        if ((seq_ns != ns) && (seq_ns != ns->parent))
 785                return -EPERM;
 786
 787        return map_write(file, buf, size, ppos, CAP_SETGID,
 788                         &ns->gid_map, &ns->parent->gid_map);
 789}
 790
 791ssize_t proc_projid_map_write(struct file *file, const char __user *buf,
 792                              size_t size, loff_t *ppos)
 793{
 794        struct seq_file *seq = file->private_data;
 795        struct user_namespace *ns = seq->private;
 796        struct user_namespace *seq_ns = seq_user_ns(seq);
 797
 798        if (!ns->parent)
 799                return -EPERM;
 800
 801        if ((seq_ns != ns) && (seq_ns != ns->parent))
 802                return -EPERM;
 803
 804        /* Anyone can set any valid project id no capability needed */
 805        return map_write(file, buf, size, ppos, -1,
 806                         &ns->projid_map, &ns->parent->projid_map);
 807}
 808
 809static bool new_idmap_permitted(const struct file *file,
 810                                struct user_namespace *ns, int cap_setid,
 811                                struct uid_gid_map *new_map)
 812{
 813        const struct cred *cred = file->f_cred;
 814        /* Don't allow mappings that would allow anything that wouldn't
 815         * be allowed without the establishment of unprivileged mappings.
 816         */
 817        if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) &&
 818            uid_eq(ns->owner, cred->euid)) {
 819                u32 id = new_map->extent[0].lower_first;
 820                if (cap_setid == CAP_SETUID) {
 821                        kuid_t uid = make_kuid(ns->parent, id);
 822                        if (uid_eq(uid, cred->euid))
 823                                return true;
 824                } else if (cap_setid == CAP_SETGID) {
 825                        kgid_t gid = make_kgid(ns->parent, id);
 826                        if (!(ns->flags & USERNS_SETGROUPS_ALLOWED) &&
 827                            gid_eq(gid, cred->egid))
 828                                return true;
 829                }
 830        }
 831
 832        /* Allow anyone to set a mapping that doesn't require privilege */
 833        if (!cap_valid(cap_setid))
 834                return true;
 835
 836        /* Allow the specified ids if we have the appropriate capability
 837         * (CAP_SETUID or CAP_SETGID) over the parent user namespace.
 838         * And the opener of the id file also had the approprpiate capability.
 839         */
 840        if (ns_capable(ns->parent, cap_setid) &&
 841            file_ns_capable(file, ns->parent, cap_setid))
 842                return true;
 843
 844        return false;
 845}
 846
 847int proc_setgroups_show(struct seq_file *seq, void *v)
 848{
 849        struct user_namespace *ns = seq->private;
 850        unsigned long userns_flags = ACCESS_ONCE(ns->flags);
 851
 852        seq_printf(seq, "%s\n",
 853                   (userns_flags & USERNS_SETGROUPS_ALLOWED) ?
 854                   "allow" : "deny");
 855        return 0;
 856}
 857
 858ssize_t proc_setgroups_write(struct file *file, const char __user *buf,
 859                             size_t count, loff_t *ppos)
 860{
 861        struct seq_file *seq = file->private_data;
 862        struct user_namespace *ns = seq->private;
 863        char kbuf[8], *pos;
 864        bool setgroups_allowed;
 865        ssize_t ret;
 866
 867        /* Only allow a very narrow range of strings to be written */
 868        ret = -EINVAL;
 869        if ((*ppos != 0) || (count >= sizeof(kbuf)))
 870                goto out;
 871
 872        /* What was written? */
 873        ret = -EFAULT;
 874        if (copy_from_user(kbuf, buf, count))
 875                goto out;
 876        kbuf[count] = '\0';
 877        pos = kbuf;
 878
 879        /* What is being requested? */
 880        ret = -EINVAL;
 881        if (strncmp(pos, "allow", 5) == 0) {
 882                pos += 5;
 883                setgroups_allowed = true;
 884        }
 885        else if (strncmp(pos, "deny", 4) == 0) {
 886                pos += 4;
 887                setgroups_allowed = false;
 888        }
 889        else
 890                goto out;
 891
 892        /* Verify there is not trailing junk on the line */
 893        pos = skip_spaces(pos);
 894        if (*pos != '\0')
 895                goto out;
 896
 897        ret = -EPERM;
 898        mutex_lock(&userns_state_mutex);
 899        if (setgroups_allowed) {
 900                /* Enabling setgroups after setgroups has been disabled
 901                 * is not allowed.
 902                 */
 903                if (!(ns->flags & USERNS_SETGROUPS_ALLOWED))
 904                        goto out_unlock;
 905        } else {
 906                /* Permanently disabling setgroups after setgroups has
 907                 * been enabled by writing the gid_map is not allowed.
 908                 */
 909                if (ns->gid_map.nr_extents != 0)
 910                        goto out_unlock;
 911                ns->flags &= ~USERNS_SETGROUPS_ALLOWED;
 912        }
 913        mutex_unlock(&userns_state_mutex);
 914
 915        /* Report a successful write */
 916        *ppos = count;
 917        ret = count;
 918out:
 919        return ret;
 920out_unlock:
 921        mutex_unlock(&userns_state_mutex);
 922        goto out;
 923}
 924
 925bool userns_may_setgroups(const struct user_namespace *ns)
 926{
 927        bool allowed;
 928
 929        mutex_lock(&userns_state_mutex);
 930        /* It is not safe to use setgroups until a gid mapping in
 931         * the user namespace has been established.
 932         */
 933        allowed = ns->gid_map.nr_extents != 0;
 934        /* Is setgroups allowed? */
 935        allowed = allowed && (ns->flags & USERNS_SETGROUPS_ALLOWED);
 936        mutex_unlock(&userns_state_mutex);
 937
 938        return allowed;
 939}
 940
 941static inline struct user_namespace *to_user_ns(struct ns_common *ns)
 942{
 943        return container_of(ns, struct user_namespace, ns);
 944}
 945
 946static struct ns_common *userns_get(struct task_struct *task)
 947{
 948        struct user_namespace *user_ns;
 949
 950        rcu_read_lock();
 951        user_ns = get_user_ns(__task_cred(task)->user_ns);
 952        rcu_read_unlock();
 953
 954        return user_ns ? &user_ns->ns : NULL;
 955}
 956
 957static void userns_put(struct ns_common *ns)
 958{
 959        put_user_ns(to_user_ns(ns));
 960}
 961
 962static int userns_install(struct nsproxy *nsproxy, struct ns_common *ns)
 963{
 964        struct user_namespace *user_ns = to_user_ns(ns);
 965        struct cred *cred;
 966
 967        /* Don't allow gaining capabilities by reentering
 968         * the same user namespace.
 969         */
 970        if (user_ns == current_user_ns())
 971                return -EINVAL;
 972
 973        /* Tasks that share a thread group must share a user namespace */
 974        if (!thread_group_empty(current))
 975                return -EINVAL;
 976
 977        if (current->fs->users != 1)
 978                return -EINVAL;
 979
 980        if (!ns_capable(user_ns, CAP_SYS_ADMIN))
 981                return -EPERM;
 982
 983        cred = prepare_creds();
 984        if (!cred)
 985                return -ENOMEM;
 986
 987        put_user_ns(cred->user_ns);
 988        set_cred_user_ns(cred, get_user_ns(user_ns));
 989
 990        return commit_creds(cred);
 991}
 992
 993const struct proc_ns_operations userns_operations = {
 994        .name           = "user",
 995        .type           = CLONE_NEWUSER,
 996        .get            = userns_get,
 997        .put            = userns_put,
 998        .install        = userns_install,
 999};
1000
1001static __init int user_namespaces_init(void)
1002{
1003        user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC);
1004        return 0;
1005}
1006subsys_initcall(user_namespaces_init);
1007