linux/security/device_cgroup.c
<<
>>
Prefs
   1/*
   2 * device_cgroup.c - device cgroup subsystem
   3 *
   4 * Copyright 2007 IBM Corp
   5 */
   6
   7#include <linux/device_cgroup.h>
   8#include <linux/cgroup.h>
   9#include <linux/ctype.h>
  10#include <linux/list.h>
  11#include <linux/uaccess.h>
  12#include <linux/seq_file.h>
  13#include <linux/slab.h>
  14#include <linux/rcupdate.h>
  15#include <linux/mutex.h>
  16
  17#define ACC_MKNOD 1
  18#define ACC_READ  2
  19#define ACC_WRITE 4
  20#define ACC_MASK (ACC_MKNOD | ACC_READ | ACC_WRITE)
  21
  22#define DEV_BLOCK 1
  23#define DEV_CHAR  2
  24#define DEV_ALL   4  /* this represents all devices */
  25
  26static DEFINE_MUTEX(devcgroup_mutex);
  27
  28enum devcg_behavior {
  29        DEVCG_DEFAULT_NONE,
  30        DEVCG_DEFAULT_ALLOW,
  31        DEVCG_DEFAULT_DENY,
  32};
  33
  34/*
  35 * exception list locking rules:
  36 * hold devcgroup_mutex for update/read.
  37 * hold rcu_read_lock() for read.
  38 */
  39
  40struct dev_exception_item {
  41        u32 major, minor;
  42        short type;
  43        short access;
  44        struct list_head list;
  45        struct rcu_head rcu;
  46};
  47
  48struct dev_cgroup {
  49        struct cgroup_subsys_state css;
  50        struct list_head exceptions;
  51        enum devcg_behavior behavior;
  52        /* temporary list for pending propagation operations */
  53        struct list_head propagate_pending;
  54};
  55
  56static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s)
  57{
  58        return container_of(s, struct dev_cgroup, css);
  59}
  60
  61static inline struct dev_cgroup *cgroup_to_devcgroup(struct cgroup *cgroup)
  62{
  63        return css_to_devcgroup(cgroup_subsys_state(cgroup, devices_subsys_id));
  64}
  65
  66static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
  67{
  68        return css_to_devcgroup(task_subsys_state(task, devices_subsys_id));
  69}
  70
  71struct cgroup_subsys devices_subsys;
  72
  73static int devcgroup_can_attach(struct cgroup *new_cgrp,
  74                                struct cgroup_taskset *set)
  75{
  76        struct task_struct *task = cgroup_taskset_first(set);
  77
  78        if (current != task && !capable(CAP_SYS_ADMIN))
  79                return -EPERM;
  80        return 0;
  81}
  82
  83/*
  84 * called under devcgroup_mutex
  85 */
  86static int dev_exceptions_copy(struct list_head *dest, struct list_head *orig)
  87{
  88        struct dev_exception_item *ex, *tmp, *new;
  89
  90        lockdep_assert_held(&devcgroup_mutex);
  91
  92        list_for_each_entry(ex, orig, list) {
  93                new = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
  94                if (!new)
  95                        goto free_and_exit;
  96                list_add_tail(&new->list, dest);
  97        }
  98
  99        return 0;
 100
 101free_and_exit:
 102        list_for_each_entry_safe(ex, tmp, dest, list) {
 103                list_del(&ex->list);
 104                kfree(ex);
 105        }
 106        return -ENOMEM;
 107}
 108
 109/*
 110 * called under devcgroup_mutex
 111 */
 112static int dev_exception_add(struct dev_cgroup *dev_cgroup,
 113                             struct dev_exception_item *ex)
 114{
 115        struct dev_exception_item *excopy, *walk;
 116
 117        lockdep_assert_held(&devcgroup_mutex);
 118
 119        excopy = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
 120        if (!excopy)
 121                return -ENOMEM;
 122
 123        list_for_each_entry(walk, &dev_cgroup->exceptions, list) {
 124                if (walk->type != ex->type)
 125                        continue;
 126                if (walk->major != ex->major)
 127                        continue;
 128                if (walk->minor != ex->minor)
 129                        continue;
 130
 131                walk->access |= ex->access;
 132                kfree(excopy);
 133                excopy = NULL;
 134        }
 135
 136        if (excopy != NULL)
 137                list_add_tail_rcu(&excopy->list, &dev_cgroup->exceptions);
 138        return 0;
 139}
 140
 141/*
 142 * called under devcgroup_mutex
 143 */
 144static void dev_exception_rm(struct dev_cgroup *dev_cgroup,
 145                             struct dev_exception_item *ex)
 146{
 147        struct dev_exception_item *walk, *tmp;
 148
 149        lockdep_assert_held(&devcgroup_mutex);
 150
 151        list_for_each_entry_safe(walk, tmp, &dev_cgroup->exceptions, list) {
 152                if (walk->type != ex->type)
 153                        continue;
 154                if (walk->major != ex->major)
 155                        continue;
 156                if (walk->minor != ex->minor)
 157                        continue;
 158
 159                walk->access &= ~ex->access;
 160                if (!walk->access) {
 161                        list_del_rcu(&walk->list);
 162                        kfree_rcu(walk, rcu);
 163                }
 164        }
 165}
 166
 167static void __dev_exception_clean(struct dev_cgroup *dev_cgroup)
 168{
 169        struct dev_exception_item *ex, *tmp;
 170
 171        list_for_each_entry_safe(ex, tmp, &dev_cgroup->exceptions, list) {
 172                list_del_rcu(&ex->list);
 173                kfree_rcu(ex, rcu);
 174        }
 175}
 176
 177/**
 178 * dev_exception_clean - frees all entries of the exception list
 179 * @dev_cgroup: dev_cgroup with the exception list to be cleaned
 180 *
 181 * called under devcgroup_mutex
 182 */
 183static void dev_exception_clean(struct dev_cgroup *dev_cgroup)
 184{
 185        lockdep_assert_held(&devcgroup_mutex);
 186
 187        __dev_exception_clean(dev_cgroup);
 188}
 189
 190static inline bool is_devcg_online(const struct dev_cgroup *devcg)
 191{
 192        return (devcg->behavior != DEVCG_DEFAULT_NONE);
 193}
 194
 195/**
 196 * devcgroup_online - initializes devcgroup's behavior and exceptions based on
 197 *                    parent's
 198 * @cgroup: cgroup getting online
 199 * returns 0 in case of success, error code otherwise
 200 */
 201static int devcgroup_online(struct cgroup *cgroup)
 202{
 203        struct dev_cgroup *dev_cgroup, *parent_dev_cgroup = NULL;
 204        int ret = 0;
 205
 206        mutex_lock(&devcgroup_mutex);
 207        dev_cgroup = cgroup_to_devcgroup(cgroup);
 208        if (cgroup->parent)
 209                parent_dev_cgroup = cgroup_to_devcgroup(cgroup->parent);
 210
 211        if (parent_dev_cgroup == NULL)
 212                dev_cgroup->behavior = DEVCG_DEFAULT_ALLOW;
 213        else {
 214                ret = dev_exceptions_copy(&dev_cgroup->exceptions,
 215                                          &parent_dev_cgroup->exceptions);
 216                if (!ret)
 217                        dev_cgroup->behavior = parent_dev_cgroup->behavior;
 218        }
 219        mutex_unlock(&devcgroup_mutex);
 220
 221        return ret;
 222}
 223
 224static void devcgroup_offline(struct cgroup *cgroup)
 225{
 226        struct dev_cgroup *dev_cgroup = cgroup_to_devcgroup(cgroup);
 227
 228        mutex_lock(&devcgroup_mutex);
 229        dev_cgroup->behavior = DEVCG_DEFAULT_NONE;
 230        mutex_unlock(&devcgroup_mutex);
 231}
 232
 233/*
 234 * called from kernel/cgroup.c with cgroup_lock() held.
 235 */
 236static struct cgroup_subsys_state *devcgroup_css_alloc(struct cgroup *cgroup)
 237{
 238        struct dev_cgroup *dev_cgroup;
 239
 240        dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL);
 241        if (!dev_cgroup)
 242                return ERR_PTR(-ENOMEM);
 243        INIT_LIST_HEAD(&dev_cgroup->exceptions);
 244        INIT_LIST_HEAD(&dev_cgroup->propagate_pending);
 245        dev_cgroup->behavior = DEVCG_DEFAULT_NONE;
 246
 247        return &dev_cgroup->css;
 248}
 249
 250static void devcgroup_css_free(struct cgroup *cgroup)
 251{
 252        struct dev_cgroup *dev_cgroup;
 253
 254        dev_cgroup = cgroup_to_devcgroup(cgroup);
 255        __dev_exception_clean(dev_cgroup);
 256        kfree(dev_cgroup);
 257}
 258
 259#define DEVCG_ALLOW 1
 260#define DEVCG_DENY 2
 261#define DEVCG_LIST 3
 262
 263#define MAJMINLEN 13
 264#define ACCLEN 4
 265
 266static void set_access(char *acc, short access)
 267{
 268        int idx = 0;
 269        memset(acc, 0, ACCLEN);
 270        if (access & ACC_READ)
 271                acc[idx++] = 'r';
 272        if (access & ACC_WRITE)
 273                acc[idx++] = 'w';
 274        if (access & ACC_MKNOD)
 275                acc[idx++] = 'm';
 276}
 277
 278static char type_to_char(short type)
 279{
 280        if (type == DEV_ALL)
 281                return 'a';
 282        if (type == DEV_CHAR)
 283                return 'c';
 284        if (type == DEV_BLOCK)
 285                return 'b';
 286        return 'X';
 287}
 288
 289static void set_majmin(char *str, unsigned m)
 290{
 291        if (m == ~0)
 292                strcpy(str, "*");
 293        else
 294                sprintf(str, "%u", m);
 295}
 296
 297static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft,
 298                                struct seq_file *m)
 299{
 300        struct dev_cgroup *devcgroup = cgroup_to_devcgroup(cgroup);
 301        struct dev_exception_item *ex;
 302        char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN];
 303
 304        rcu_read_lock();
 305        /*
 306         * To preserve the compatibility:
 307         * - Only show the "all devices" when the default policy is to allow
 308         * - List the exceptions in case the default policy is to deny
 309         * This way, the file remains as a "whitelist of devices"
 310         */
 311        if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) {
 312                set_access(acc, ACC_MASK);
 313                set_majmin(maj, ~0);
 314                set_majmin(min, ~0);
 315                seq_printf(m, "%c %s:%s %s\n", type_to_char(DEV_ALL),
 316                           maj, min, acc);
 317        } else {
 318                list_for_each_entry_rcu(ex, &devcgroup->exceptions, list) {
 319                        set_access(acc, ex->access);
 320                        set_majmin(maj, ex->major);
 321                        set_majmin(min, ex->minor);
 322                        seq_printf(m, "%c %s:%s %s\n", type_to_char(ex->type),
 323                                   maj, min, acc);
 324                }
 325        }
 326        rcu_read_unlock();
 327
 328        return 0;
 329}
 330
 331/**
 332 * match_exception      - iterates the exception list trying to match a rule
 333 *                        based on type, major, minor and access type. It is
 334 *                        considered a match if an exception is found that
 335 *                        will contain the entire range of provided parameters.
 336 * @exceptions: list of exceptions
 337 * @type: device type (DEV_BLOCK or DEV_CHAR)
 338 * @major: device file major number, ~0 to match all
 339 * @minor: device file minor number, ~0 to match all
 340 * @access: permission mask (ACC_READ, ACC_WRITE, ACC_MKNOD)
 341 *
 342 * returns: true in case it matches an exception completely
 343 */
 344static bool match_exception(struct list_head *exceptions, short type,
 345                            u32 major, u32 minor, short access)
 346{
 347        struct dev_exception_item *ex;
 348
 349        list_for_each_entry_rcu(ex, exceptions, list) {
 350                if ((type & DEV_BLOCK) && !(ex->type & DEV_BLOCK))
 351                        continue;
 352                if ((type & DEV_CHAR) && !(ex->type & DEV_CHAR))
 353                        continue;
 354                if (ex->major != ~0 && ex->major != major)
 355                        continue;
 356                if (ex->minor != ~0 && ex->minor != minor)
 357                        continue;
 358                /* provided access cannot have more than the exception rule */
 359                if (access & (~ex->access))
 360                        continue;
 361                return true;
 362        }
 363        return false;
 364}
 365
 366/**
 367 * match_exception_partial - iterates the exception list trying to match a rule
 368 *                           based on type, major, minor and access type. It is
 369 *                           considered a match if an exception's range is
 370 *                           found to contain *any* of the devices specified by
 371 *                           provided parameters. This is used to make sure no
 372 *                           extra access is being granted that is forbidden by
 373 *                           any of the exception list.
 374 * @exceptions: list of exceptions
 375 * @type: device type (DEV_BLOCK or DEV_CHAR)
 376 * @major: device file major number, ~0 to match all
 377 * @minor: device file minor number, ~0 to match all
 378 * @access: permission mask (ACC_READ, ACC_WRITE, ACC_MKNOD)
 379 *
 380 * returns: true in case the provided range mat matches an exception completely
 381 */
 382static bool match_exception_partial(struct list_head *exceptions, short type,
 383                                    u32 major, u32 minor, short access)
 384{
 385        struct dev_exception_item *ex;
 386
 387        list_for_each_entry_rcu(ex, exceptions, list) {
 388                if ((type & DEV_BLOCK) && !(ex->type & DEV_BLOCK))
 389                        continue;
 390                if ((type & DEV_CHAR) && !(ex->type & DEV_CHAR))
 391                        continue;
 392                /*
 393                 * We must be sure that both the exception and the provided
 394                 * range aren't masking all devices
 395                 */
 396                if (ex->major != ~0 && major != ~0 && ex->major != major)
 397                        continue;
 398                if (ex->minor != ~0 && minor != ~0 && ex->minor != minor)
 399                        continue;
 400                /*
 401                 * In order to make sure the provided range isn't matching
 402                 * an exception, all its access bits shouldn't match the
 403                 * exception's access bits
 404                 */
 405                if (!(access & ex->access))
 406                        continue;
 407                return true;
 408        }
 409        return false;
 410}
 411
 412/**
 413 * verify_new_ex - verifies if a new exception is part of what is allowed
 414 *                 by a dev cgroup based on the default policy +
 415 *                 exceptions. This is used to make sure a child cgroup
 416 *                 won't have more privileges than its parent
 417 * @dev_cgroup: dev cgroup to be tested against
 418 * @refex: new exception
 419 * @behavior: behavior of the exception's dev_cgroup
 420 */
 421static bool verify_new_ex(struct dev_cgroup *dev_cgroup,
 422                          struct dev_exception_item *refex,
 423                          enum devcg_behavior behavior)
 424{
 425        bool match = false;
 426
 427        rcu_lockdep_assert(rcu_read_lock_held() ||
 428                           lockdep_is_held(&devcgroup_mutex),
 429                           "device_cgroup:verify_new_ex called without proper synchronization");
 430
 431        if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW) {
 432                if (behavior == DEVCG_DEFAULT_ALLOW) {
 433                        /*
 434                         * new exception in the child doesn't matter, only
 435                         * adding extra restrictions
 436                         */ 
 437                        return true;
 438                } else {
 439                        /*
 440                         * new exception in the child will add more devices
 441                         * that can be acessed, so it can't match any of
 442                         * parent's exceptions, even slightly
 443                         */ 
 444                        match = match_exception_partial(&dev_cgroup->exceptions,
 445                                                        refex->type,
 446                                                        refex->major,
 447                                                        refex->minor,
 448                                                        refex->access);
 449
 450                        if (match)
 451                                return false;
 452                        return true;
 453                }
 454        } else {
 455                /*
 456                 * Only behavior == DEVCG_DEFAULT_DENY allowed here, therefore
 457                 * the new exception will add access to more devices and must
 458                 * be contained completely in an parent's exception to be
 459                 * allowed
 460                 */
 461                match = match_exception(&dev_cgroup->exceptions, refex->type,
 462                                        refex->major, refex->minor,
 463                                        refex->access);
 464
 465                if (match)
 466                        /* parent has an exception that matches the proposed */
 467                        return true;
 468                else
 469                        return false;
 470        }
 471        return false;
 472}
 473
 474/*
 475 * parent_has_perm:
 476 * when adding a new allow rule to a device exception list, the rule
 477 * must be allowed in the parent device
 478 */
 479static int parent_has_perm(struct dev_cgroup *childcg,
 480                                  struct dev_exception_item *ex)
 481{
 482        struct cgroup *pcg = childcg->css.cgroup->parent;
 483        struct dev_cgroup *parent;
 484
 485        if (!pcg)
 486                return 1;
 487        parent = cgroup_to_devcgroup(pcg);
 488        return verify_new_ex(parent, ex, childcg->behavior);
 489}
 490
 491/*
 492 * parent_allows_removal - check if the parent cgroup allows an exception to
 493 *                         be removed
 494 * @childcg: child cgroup from where the exception will be removed
 495 * @ex: exception being removed
 496 */
 497static bool parent_allows_removal(struct dev_cgroup *childcg,
 498                                  struct dev_exception_item *ex)
 499{
 500        struct cgroup *pcg = childcg->css.cgroup->parent;
 501        struct dev_cgroup *parent;
 502
 503        if (!pcg)
 504                return true;
 505        parent = cgroup_to_devcgroup(pcg);
 506
 507        if (childcg->behavior == DEVCG_DEFAULT_DENY)
 508                /* It's always allowed to remove access to devices */
 509                return true;
 510
 511        /*
 512         * Make sure you're not removing part or a whole exception existing in
 513         * the parent cgroup
 514         */
 515        return !match_exception_partial(&parent->exceptions, ex->type,
 516                                        ex->major, ex->minor, ex->access);
 517}
 518
 519/**
 520 * may_allow_all - checks if it's possible to change the behavior to
 521 *                 allow based on parent's rules.
 522 * @parent: device cgroup's parent
 523 * returns: != 0 in case it's allowed, 0 otherwise
 524 */
 525static inline int may_allow_all(struct dev_cgroup *parent)
 526{
 527        if (!parent)
 528                return 1;
 529        return parent->behavior == DEVCG_DEFAULT_ALLOW;
 530}
 531
 532/**
 533 * revalidate_active_exceptions - walks through the active exception list and
 534 *                                revalidates the exceptions based on parent's
 535 *                                behavior and exceptions. The exceptions that
 536 *                                are no longer valid will be removed.
 537 *                                Called with devcgroup_mutex held.
 538 * @devcg: cgroup which exceptions will be checked
 539 *
 540 * This is one of the three key functions for hierarchy implementation.
 541 * This function is responsible for re-evaluating all the cgroup's active
 542 * exceptions due to a parent's exception change.
 543 * Refer to Documentation/cgroups/devices.txt for more details.
 544 */
 545static void revalidate_active_exceptions(struct dev_cgroup *devcg)
 546{
 547        struct dev_exception_item *ex;
 548        struct list_head *this, *tmp;
 549
 550        list_for_each_safe(this, tmp, &devcg->exceptions) {
 551                ex = container_of(this, struct dev_exception_item, list);
 552                if (!parent_has_perm(devcg, ex))
 553                        dev_exception_rm(devcg, ex);
 554        }
 555}
 556
 557/**
 558 * get_online_devcg - walks the cgroup tree and fills a list with the online
 559 *                    groups
 560 * @root: cgroup used as starting point
 561 * @online: list that will be filled with online groups
 562 *
 563 * Must be called with devcgroup_mutex held. Grabs RCU lock.
 564 * Because devcgroup_mutex is held, no devcg will become online or offline
 565 * during the tree walk (see devcgroup_online, devcgroup_offline)
 566 * A separated list is needed because propagate_behavior() and
 567 * propagate_exception() need to allocate memory and can block.
 568 */
 569static void get_online_devcg(struct cgroup *root, struct list_head *online)
 570{
 571        struct cgroup *pos;
 572        struct dev_cgroup *devcg;
 573
 574        lockdep_assert_held(&devcgroup_mutex);
 575
 576        rcu_read_lock();
 577        cgroup_for_each_descendant_pre(pos, root) {
 578                devcg = cgroup_to_devcgroup(pos);
 579                if (is_devcg_online(devcg))
 580                        list_add_tail(&devcg->propagate_pending, online);
 581        }
 582        rcu_read_unlock();
 583}
 584
 585/**
 586 * propagate_exception - propagates a new exception to the children
 587 * @devcg_root: device cgroup that added a new exception
 588 * @ex: new exception to be propagated
 589 *
 590 * returns: 0 in case of success, != 0 in case of error
 591 */
 592static int propagate_exception(struct dev_cgroup *devcg_root,
 593                               struct dev_exception_item *ex)
 594{
 595        struct cgroup *root = devcg_root->css.cgroup;
 596        struct dev_cgroup *devcg, *parent, *tmp;
 597        int rc = 0;
 598        LIST_HEAD(pending);
 599
 600        get_online_devcg(root, &pending);
 601
 602        list_for_each_entry_safe(devcg, tmp, &pending, propagate_pending) {
 603                parent = cgroup_to_devcgroup(devcg->css.cgroup->parent);
 604
 605                /*
 606                 * in case both root's behavior and devcg is allow, a new
 607                 * restriction means adding to the exception list
 608                 */
 609                if (devcg_root->behavior == DEVCG_DEFAULT_ALLOW &&
 610                    devcg->behavior == DEVCG_DEFAULT_ALLOW) {
 611                        rc = dev_exception_add(devcg, ex);
 612                        if (rc)
 613                                break;
 614                } else {
 615                        /*
 616                         * in the other possible cases:
 617                         * root's behavior: allow, devcg's: deny
 618                         * root's behavior: deny, devcg's: deny
 619                         * the exception will be removed
 620                         */
 621                        dev_exception_rm(devcg, ex);
 622                }
 623                revalidate_active_exceptions(devcg);
 624
 625                list_del_init(&devcg->propagate_pending);
 626        }
 627        return rc;
 628}
 629
 630static inline bool has_children(struct dev_cgroup *devcgroup)
 631{
 632        struct cgroup *cgrp = devcgroup->css.cgroup;
 633
 634        return !list_empty(&cgrp->children);
 635}
 636
 637/*
 638 * Modify the exception list using allow/deny rules.
 639 * CAP_SYS_ADMIN is needed for this.  It's at least separate from CAP_MKNOD
 640 * so we can give a container CAP_MKNOD to let it create devices but not
 641 * modify the exception list.
 642 * It seems likely we'll want to add a CAP_CONTAINER capability to allow
 643 * us to also grant CAP_SYS_ADMIN to containers without giving away the
 644 * device exception list controls, but for now we'll stick with CAP_SYS_ADMIN
 645 *
 646 * Taking rules away is always allowed (given CAP_SYS_ADMIN).  Granting
 647 * new access is only allowed if you're in the top-level cgroup, or your
 648 * parent cgroup has the access you're asking for.
 649 */
 650static int devcgroup_update_access(struct dev_cgroup *devcgroup,
 651                                   int filetype, const char *buffer)
 652{
 653        const char *b;
 654        char temp[12];          /* 11 + 1 characters needed for a u32 */
 655        int count, rc = 0;
 656        struct dev_exception_item ex;
 657        struct cgroup *p = devcgroup->css.cgroup;
 658        struct dev_cgroup *parent = NULL;
 659
 660        if (!capable(CAP_SYS_ADMIN))
 661                return -EPERM;
 662
 663        if (p->parent)
 664                parent = cgroup_to_devcgroup(p->parent);
 665
 666        memset(&ex, 0, sizeof(ex));
 667        b = buffer;
 668
 669        switch (*b) {
 670        case 'a':
 671                switch (filetype) {
 672                case DEVCG_ALLOW:
 673                        if (has_children(devcgroup))
 674                                return -EINVAL;
 675
 676                        if (!may_allow_all(parent))
 677                                return -EPERM;
 678                        dev_exception_clean(devcgroup);
 679                        devcgroup->behavior = DEVCG_DEFAULT_ALLOW;
 680                        if (!parent)
 681                                break;
 682
 683                        rc = dev_exceptions_copy(&devcgroup->exceptions,
 684                                                 &parent->exceptions);
 685                        if (rc)
 686                                return rc;
 687                        break;
 688                case DEVCG_DENY:
 689                        if (has_children(devcgroup))
 690                                return -EINVAL;
 691
 692                        dev_exception_clean(devcgroup);
 693                        devcgroup->behavior = DEVCG_DEFAULT_DENY;
 694                        break;
 695                default:
 696                        return -EINVAL;
 697                }
 698                return 0;
 699        case 'b':
 700                ex.type = DEV_BLOCK;
 701                break;
 702        case 'c':
 703                ex.type = DEV_CHAR;
 704                break;
 705        default:
 706                return -EINVAL;
 707        }
 708        b++;
 709        if (!isspace(*b))
 710                return -EINVAL;
 711        b++;
 712        if (*b == '*') {
 713                ex.major = ~0;
 714                b++;
 715        } else if (isdigit(*b)) {
 716                memset(temp, 0, sizeof(temp));
 717                for (count = 0; count < sizeof(temp) - 1; count++) {
 718                        temp[count] = *b;
 719                        b++;
 720                        if (!isdigit(*b))
 721                                break;
 722                }
 723                rc = kstrtou32(temp, 10, &ex.major);
 724                if (rc)
 725                        return -EINVAL;
 726        } else {
 727                return -EINVAL;
 728        }
 729        if (*b != ':')
 730                return -EINVAL;
 731        b++;
 732
 733        /* read minor */
 734        if (*b == '*') {
 735                ex.minor = ~0;
 736                b++;
 737        } else if (isdigit(*b)) {
 738                memset(temp, 0, sizeof(temp));
 739                for (count = 0; count < sizeof(temp) - 1; count++) {
 740                        temp[count] = *b;
 741                        b++;
 742                        if (!isdigit(*b))
 743                                break;
 744                }
 745                rc = kstrtou32(temp, 10, &ex.minor);
 746                if (rc)
 747                        return -EINVAL;
 748        } else {
 749                return -EINVAL;
 750        }
 751        if (!isspace(*b))
 752                return -EINVAL;
 753        for (b++, count = 0; count < 3; count++, b++) {
 754                switch (*b) {
 755                case 'r':
 756                        ex.access |= ACC_READ;
 757                        break;
 758                case 'w':
 759                        ex.access |= ACC_WRITE;
 760                        break;
 761                case 'm':
 762                        ex.access |= ACC_MKNOD;
 763                        break;
 764                case '\n':
 765                case '\0':
 766                        count = 3;
 767                        break;
 768                default:
 769                        return -EINVAL;
 770                }
 771        }
 772
 773        switch (filetype) {
 774        case DEVCG_ALLOW:
 775                /*
 776                 * If the default policy is to allow by default, try to remove
 777                 * an matching exception instead. And be silent about it: we
 778                 * don't want to break compatibility
 779                 */
 780                if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) {
 781                        /* Check if the parent allows removing it first */
 782                        if (!parent_allows_removal(devcgroup, &ex))
 783                                return -EPERM;
 784                        dev_exception_rm(devcgroup, &ex);
 785                        break;
 786                }
 787
 788                if (!parent_has_perm(devcgroup, &ex))
 789                        return -EPERM;
 790                rc = dev_exception_add(devcgroup, &ex);
 791                break;
 792        case DEVCG_DENY:
 793                /*
 794                 * If the default policy is to deny by default, try to remove
 795                 * an matching exception instead. And be silent about it: we
 796                 * don't want to break compatibility
 797                 */
 798                if (devcgroup->behavior == DEVCG_DEFAULT_DENY)
 799                        dev_exception_rm(devcgroup, &ex);
 800                else
 801                        rc = dev_exception_add(devcgroup, &ex);
 802
 803                if (rc)
 804                        break;
 805                /* we only propagate new restrictions */
 806                rc = propagate_exception(devcgroup, &ex);
 807                break;
 808        default:
 809                rc = -EINVAL;
 810        }
 811        return rc;
 812}
 813
 814static int devcgroup_access_write(struct cgroup *cgrp, struct cftype *cft,
 815                                  const char *buffer)
 816{
 817        int retval;
 818
 819        mutex_lock(&devcgroup_mutex);
 820        retval = devcgroup_update_access(cgroup_to_devcgroup(cgrp),
 821                                         cft->private, buffer);
 822        mutex_unlock(&devcgroup_mutex);
 823        return retval;
 824}
 825
 826static struct cftype dev_cgroup_files[] = {
 827        {
 828                .name = "allow",
 829                .write_string  = devcgroup_access_write,
 830                .private = DEVCG_ALLOW,
 831        },
 832        {
 833                .name = "deny",
 834                .write_string = devcgroup_access_write,
 835                .private = DEVCG_DENY,
 836        },
 837        {
 838                .name = "list",
 839                .read_seq_string = devcgroup_seq_read,
 840                .private = DEVCG_LIST,
 841        },
 842        { }     /* terminate */
 843};
 844
 845struct cgroup_subsys devices_subsys = {
 846        .name = "devices",
 847        .can_attach = devcgroup_can_attach,
 848        .css_alloc = devcgroup_css_alloc,
 849        .css_free = devcgroup_css_free,
 850        .css_online = devcgroup_online,
 851        .css_offline = devcgroup_offline,
 852        .subsys_id = devices_subsys_id,
 853        .base_cftypes = dev_cgroup_files,
 854};
 855
 856/**
 857 * __devcgroup_check_permission - checks if an inode operation is permitted
 858 * @dev_cgroup: the dev cgroup to be tested against
 859 * @type: device type
 860 * @major: device major number
 861 * @minor: device minor number
 862 * @access: combination of ACC_WRITE, ACC_READ and ACC_MKNOD
 863 *
 864 * returns 0 on success, -EPERM case the operation is not permitted
 865 */
 866static int __devcgroup_check_permission(short type, u32 major, u32 minor,
 867                                        short access)
 868{
 869        struct dev_cgroup *dev_cgroup;
 870        bool rc;
 871
 872        rcu_read_lock();
 873        dev_cgroup = task_devcgroup(current);
 874        if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW)
 875                /* Can't match any of the exceptions, even partially */
 876                rc = !match_exception_partial(&dev_cgroup->exceptions,
 877                                              type, major, minor, access);
 878        else
 879                /* Need to match completely one exception to be allowed */
 880                rc = match_exception(&dev_cgroup->exceptions, type, major,
 881                                     minor, access);
 882        rcu_read_unlock();
 883
 884        if (!rc)
 885                return -EPERM;
 886
 887        return 0;
 888}
 889
 890int __devcgroup_inode_permission(struct inode *inode, int mask)
 891{
 892        short type, access = 0;
 893
 894        if (S_ISBLK(inode->i_mode))
 895                type = DEV_BLOCK;
 896        if (S_ISCHR(inode->i_mode))
 897                type = DEV_CHAR;
 898        if (mask & MAY_WRITE)
 899                access |= ACC_WRITE;
 900        if (mask & MAY_READ)
 901                access |= ACC_READ;
 902
 903        return __devcgroup_check_permission(type, imajor(inode), iminor(inode),
 904                        access);
 905}
 906
 907int devcgroup_inode_mknod(int mode, dev_t dev)
 908{
 909        short type;
 910
 911        if (!S_ISBLK(mode) && !S_ISCHR(mode))
 912                return 0;
 913
 914        if (S_ISBLK(mode))
 915                type = DEV_BLOCK;
 916        else
 917                type = DEV_CHAR;
 918
 919        return __devcgroup_check_permission(type, MAJOR(dev), MINOR(dev),
 920                        ACC_MKNOD);
 921
 922}
 923