linux/kernel/ucount.c
<<
>>
Prefs
   1/*
   2 *  This program is free software; you can redistribute it and/or
   3 *  modify it under the terms of the GNU General Public License as
   4 *  published by the Free Software Foundation, version 2 of the
   5 *  License.
   6 */
   7
   8#include <linux/stat.h>
   9#include <linux/sysctl.h>
  10#include <linux/slab.h>
  11#include <linux/cred.h>
  12#include <linux/hash.h>
  13#include <linux/kmemleak.h>
  14#include <linux/user_namespace.h>
  15
  16#define UCOUNTS_HASHTABLE_BITS 10
  17static struct hlist_head ucounts_hashtable[(1 << UCOUNTS_HASHTABLE_BITS)];
  18static DEFINE_SPINLOCK(ucounts_lock);
  19
  20#define ucounts_hashfn(ns, uid)                                         \
  21        hash_long((unsigned long)__kuid_val(uid) + (unsigned long)(ns), \
  22                  UCOUNTS_HASHTABLE_BITS)
  23#define ucounts_hashentry(ns, uid)      \
  24        (ucounts_hashtable + ucounts_hashfn(ns, uid))
  25
  26
  27#ifdef CONFIG_SYSCTL
  28static struct ctl_table_set *
  29set_lookup(struct ctl_table_root *root)
  30{
  31        return &current_user_ns()->set;
  32}
  33
  34static int set_is_seen(struct ctl_table_set *set)
  35{
  36        return &current_user_ns()->set == set;
  37}
  38
  39static int set_permissions(struct ctl_table_header *head,
  40                                  struct ctl_table *table)
  41{
  42        struct user_namespace *user_ns =
  43                container_of(head->set, struct user_namespace, set);
  44        int mode;
  45
  46        /* Allow users with CAP_SYS_RESOURCE unrestrained access */
  47        if (ns_capable(user_ns, CAP_SYS_RESOURCE))
  48                mode = (table->mode & S_IRWXU) >> 6;
  49        else
  50        /* Allow all others at most read-only access */
  51                mode = table->mode & S_IROTH;
  52        return (mode << 6) | (mode << 3) | mode;
  53}
  54
  55static struct ctl_table_root set_root = {
  56        .lookup = set_lookup,
  57        .permissions = set_permissions,
  58};
  59
  60static int zero = 0;
  61static int int_max = INT_MAX;
  62#define UCOUNT_ENTRY(name)                              \
  63        {                                               \
  64                .procname       = name,                 \
  65                .maxlen         = sizeof(int),          \
  66                .mode           = 0644,                 \
  67                .proc_handler   = proc_dointvec_minmax, \
  68                .extra1         = &zero,                \
  69                .extra2         = &int_max,             \
  70        }
  71static struct ctl_table user_table[] = {
  72        UCOUNT_ENTRY("max_user_namespaces"),
  73        UCOUNT_ENTRY("max_pid_namespaces"),
  74        UCOUNT_ENTRY("max_uts_namespaces"),
  75        UCOUNT_ENTRY("max_ipc_namespaces"),
  76        UCOUNT_ENTRY("max_net_namespaces"),
  77        UCOUNT_ENTRY("max_mnt_namespaces"),
  78        UCOUNT_ENTRY("max_cgroup_namespaces"),
  79#ifdef CONFIG_INOTIFY_USER
  80        UCOUNT_ENTRY("max_inotify_instances"),
  81        UCOUNT_ENTRY("max_inotify_watches"),
  82#endif
  83        { }
  84};
  85#endif /* CONFIG_SYSCTL */
  86
  87bool setup_userns_sysctls(struct user_namespace *ns)
  88{
  89#ifdef CONFIG_SYSCTL
  90        struct ctl_table *tbl;
  91        setup_sysctl_set(&ns->set, &set_root, set_is_seen);
  92        tbl = kmemdup(user_table, sizeof(user_table), GFP_KERNEL);
  93        if (tbl) {
  94                int i;
  95                for (i = 0; i < UCOUNT_COUNTS; i++) {
  96                        tbl[i].data = &ns->ucount_max[i];
  97                }
  98                ns->sysctls = __register_sysctl_table(&ns->set, "user", tbl);
  99        }
 100        if (!ns->sysctls) {
 101                kfree(tbl);
 102                retire_sysctl_set(&ns->set);
 103                return false;
 104        }
 105#endif
 106        return true;
 107}
 108
 109void retire_userns_sysctls(struct user_namespace *ns)
 110{
 111#ifdef CONFIG_SYSCTL
 112        struct ctl_table *tbl;
 113
 114        tbl = ns->sysctls->ctl_table_arg;
 115        unregister_sysctl_table(ns->sysctls);
 116        retire_sysctl_set(&ns->set);
 117        kfree(tbl);
 118#endif
 119}
 120
 121static struct ucounts *find_ucounts(struct user_namespace *ns, kuid_t uid, struct hlist_head *hashent)
 122{
 123        struct ucounts *ucounts;
 124
 125        hlist_for_each_entry(ucounts, hashent, node) {
 126                if (uid_eq(ucounts->uid, uid) && (ucounts->ns == ns))
 127                        return ucounts;
 128        }
 129        return NULL;
 130}
 131
 132static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid)
 133{
 134        struct hlist_head *hashent = ucounts_hashentry(ns, uid);
 135        struct ucounts *ucounts, *new;
 136
 137        spin_lock_irq(&ucounts_lock);
 138        ucounts = find_ucounts(ns, uid, hashent);
 139        if (!ucounts) {
 140                spin_unlock_irq(&ucounts_lock);
 141
 142                new = kzalloc(sizeof(*new), GFP_KERNEL);
 143                if (!new)
 144                        return NULL;
 145
 146                new->ns = ns;
 147                new->uid = uid;
 148                new->count = 0;
 149
 150                spin_lock_irq(&ucounts_lock);
 151                ucounts = find_ucounts(ns, uid, hashent);
 152                if (ucounts) {
 153                        kfree(new);
 154                } else {
 155                        hlist_add_head(&new->node, hashent);
 156                        ucounts = new;
 157                }
 158        }
 159        if (ucounts->count == INT_MAX)
 160                ucounts = NULL;
 161        else
 162                ucounts->count += 1;
 163        spin_unlock_irq(&ucounts_lock);
 164        return ucounts;
 165}
 166
 167static void put_ucounts(struct ucounts *ucounts)
 168{
 169        unsigned long flags;
 170
 171        spin_lock_irqsave(&ucounts_lock, flags);
 172        ucounts->count -= 1;
 173        if (!ucounts->count)
 174                hlist_del_init(&ucounts->node);
 175        else
 176                ucounts = NULL;
 177        spin_unlock_irqrestore(&ucounts_lock, flags);
 178
 179        kfree(ucounts);
 180}
 181
 182static inline bool atomic_inc_below(atomic_t *v, int u)
 183{
 184        int c, old;
 185        c = atomic_read(v);
 186        for (;;) {
 187                if (unlikely(c >= u))
 188                        return false;
 189                old = atomic_cmpxchg(v, c, c+1);
 190                if (likely(old == c))
 191                        return true;
 192                c = old;
 193        }
 194}
 195
 196struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid,
 197                           enum ucount_type type)
 198{
 199        struct ucounts *ucounts, *iter, *bad;
 200        struct user_namespace *tns;
 201        ucounts = get_ucounts(ns, uid);
 202        for (iter = ucounts; iter; iter = tns->ucounts) {
 203                int max;
 204                tns = iter->ns;
 205                max = READ_ONCE(tns->ucount_max[type]);
 206                if (!atomic_inc_below(&iter->ucount[type], max))
 207                        goto fail;
 208        }
 209        return ucounts;
 210fail:
 211        bad = iter;
 212        for (iter = ucounts; iter != bad; iter = iter->ns->ucounts)
 213                atomic_dec(&iter->ucount[type]);
 214
 215        put_ucounts(ucounts);
 216        return NULL;
 217}
 218
 219void dec_ucount(struct ucounts *ucounts, enum ucount_type type)
 220{
 221        struct ucounts *iter;
 222        for (iter = ucounts; iter; iter = iter->ns->ucounts) {
 223                int dec = atomic_dec_if_positive(&iter->ucount[type]);
 224                WARN_ON_ONCE(dec < 0);
 225        }
 226        put_ucounts(ucounts);
 227}
 228
 229static __init int user_namespace_sysctl_init(void)
 230{
 231#ifdef CONFIG_SYSCTL
 232        static struct ctl_table_header *user_header;
 233        static struct ctl_table empty[1];
 234        /*
 235         * It is necessary to register the user directory in the
 236         * default set so that registrations in the child sets work
 237         * properly.
 238         */
 239        user_header = register_sysctl("user", empty);
 240        kmemleak_ignore(user_header);
 241        BUG_ON(!user_header);
 242        BUG_ON(!setup_userns_sysctls(&init_user_ns));
 243#endif
 244        return 0;
 245}
 246subsys_initcall(user_namespace_sysctl_init);
 247