linux/block/blk-cgroup.c
<<
>>
Prefs
   1/*
   2 * Common Block IO controller cgroup interface
   3 *
   4 * Based on ideas and code from CFQ, CFS and BFQ:
   5 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
   6 *
   7 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
   8 *                    Paolo Valente <paolo.valente@unimore.it>
   9 *
  10 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
  11 *                    Nauman Rafique <nauman@google.com>
  12 */
  13#include <linux/ioprio.h>
  14#include <linux/seq_file.h>
  15#include <linux/kdev_t.h>
  16#include <linux/module.h>
  17#include <linux/err.h>
  18#include <linux/blkdev.h>
  19#include <linux/slab.h>
  20#include "blk-cgroup.h"
  21#include <linux/genhd.h>
  22
  23#define MAX_KEY_LEN 100
  24
  25static DEFINE_SPINLOCK(blkio_list_lock);
  26static LIST_HEAD(blkio_list);
  27
  28struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
  29EXPORT_SYMBOL_GPL(blkio_root_cgroup);
  30
  31static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *,
  32                                                  struct cgroup *);
  33static int blkiocg_can_attach(struct cgroup_subsys *, struct cgroup *,
  34                              struct task_struct *, bool);
  35static void blkiocg_attach(struct cgroup_subsys *, struct cgroup *,
  36                           struct cgroup *, struct task_struct *, bool);
  37static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *);
  38static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *);
  39
  40/* for encoding cft->private value on file */
  41#define BLKIOFILE_PRIVATE(x, val)       (((x) << 16) | (val))
  42/* What policy owns the file, proportional or throttle */
  43#define BLKIOFILE_POLICY(val)           (((val) >> 16) & 0xffff)
  44#define BLKIOFILE_ATTR(val)             ((val) & 0xffff)
  45
  46struct cgroup_subsys blkio_subsys = {
  47        .name = "blkio",
  48        .create = blkiocg_create,
  49        .can_attach = blkiocg_can_attach,
  50        .attach = blkiocg_attach,
  51        .destroy = blkiocg_destroy,
  52        .populate = blkiocg_populate,
  53#ifdef CONFIG_BLK_CGROUP
  54        /* note: blkio_subsys_id is otherwise defined in blk-cgroup.h */
  55        .subsys_id = blkio_subsys_id,
  56#endif
  57        .use_id = 1,
  58        .module = THIS_MODULE,
  59};
  60EXPORT_SYMBOL_GPL(blkio_subsys);
  61
  62static inline void blkio_policy_insert_node(struct blkio_cgroup *blkcg,
  63                                            struct blkio_policy_node *pn)
  64{
  65        list_add(&pn->node, &blkcg->policy_list);
  66}
  67
  68static inline bool cftype_blkg_same_policy(struct cftype *cft,
  69                        struct blkio_group *blkg)
  70{
  71        enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
  72
  73        if (blkg->plid == plid)
  74                return 1;
  75
  76        return 0;
  77}
  78
  79/* Determines if policy node matches cgroup file being accessed */
  80static inline bool pn_matches_cftype(struct cftype *cft,
  81                        struct blkio_policy_node *pn)
  82{
  83        enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
  84        int fileid = BLKIOFILE_ATTR(cft->private);
  85
  86        return (plid == pn->plid && fileid == pn->fileid);
  87}
  88
  89/* Must be called with blkcg->lock held */
  90static inline void blkio_policy_delete_node(struct blkio_policy_node *pn)
  91{
  92        list_del(&pn->node);
  93}
  94
  95/* Must be called with blkcg->lock held */
  96static struct blkio_policy_node *
  97blkio_policy_search_node(const struct blkio_cgroup *blkcg, dev_t dev,
  98                enum blkio_policy_id plid, int fileid)
  99{
 100        struct blkio_policy_node *pn;
 101
 102        list_for_each_entry(pn, &blkcg->policy_list, node) {
 103                if (pn->dev == dev && pn->plid == plid && pn->fileid == fileid)
 104                        return pn;
 105        }
 106
 107        return NULL;
 108}
 109
 110struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
 111{
 112        return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
 113                            struct blkio_cgroup, css);
 114}
 115EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup);
 116
 117struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk)
 118{
 119        return container_of(task_subsys_state(tsk, blkio_subsys_id),
 120                            struct blkio_cgroup, css);
 121}
 122EXPORT_SYMBOL_GPL(task_blkio_cgroup);
 123
 124static inline void
 125blkio_update_group_weight(struct blkio_group *blkg, unsigned int weight)
 126{
 127        struct blkio_policy_type *blkiop;
 128
 129        list_for_each_entry(blkiop, &blkio_list, list) {
 130                /* If this policy does not own the blkg, do not send updates */
 131                if (blkiop->plid != blkg->plid)
 132                        continue;
 133                if (blkiop->ops.blkio_update_group_weight_fn)
 134                        blkiop->ops.blkio_update_group_weight_fn(blkg->key,
 135                                                        blkg, weight);
 136        }
 137}
 138
 139static inline void blkio_update_group_bps(struct blkio_group *blkg, u64 bps,
 140                                int fileid)
 141{
 142        struct blkio_policy_type *blkiop;
 143
 144        list_for_each_entry(blkiop, &blkio_list, list) {
 145
 146                /* If this policy does not own the blkg, do not send updates */
 147                if (blkiop->plid != blkg->plid)
 148                        continue;
 149
 150                if (fileid == BLKIO_THROTL_read_bps_device
 151                    && blkiop->ops.blkio_update_group_read_bps_fn)
 152                        blkiop->ops.blkio_update_group_read_bps_fn(blkg->key,
 153                                                                blkg, bps);
 154
 155                if (fileid == BLKIO_THROTL_write_bps_device
 156                    && blkiop->ops.blkio_update_group_write_bps_fn)
 157                        blkiop->ops.blkio_update_group_write_bps_fn(blkg->key,
 158                                                                blkg, bps);
 159        }
 160}
 161
 162static inline void blkio_update_group_iops(struct blkio_group *blkg,
 163                        unsigned int iops, int fileid)
 164{
 165        struct blkio_policy_type *blkiop;
 166
 167        list_for_each_entry(blkiop, &blkio_list, list) {
 168
 169                /* If this policy does not own the blkg, do not send updates */
 170                if (blkiop->plid != blkg->plid)
 171                        continue;
 172
 173                if (fileid == BLKIO_THROTL_read_iops_device
 174                    && blkiop->ops.blkio_update_group_read_iops_fn)
 175                        blkiop->ops.blkio_update_group_read_iops_fn(blkg->key,
 176                                                                blkg, iops);
 177
 178                if (fileid == BLKIO_THROTL_write_iops_device
 179                    && blkiop->ops.blkio_update_group_write_iops_fn)
 180                        blkiop->ops.blkio_update_group_write_iops_fn(blkg->key,
 181                                                                blkg,iops);
 182        }
 183}
 184
 185/*
 186 * Add to the appropriate stat variable depending on the request type.
 187 * This should be called with the blkg->stats_lock held.
 188 */
 189static void blkio_add_stat(uint64_t *stat, uint64_t add, bool direction,
 190                                bool sync)
 191{
 192        if (direction)
 193                stat[BLKIO_STAT_WRITE] += add;
 194        else
 195                stat[BLKIO_STAT_READ] += add;
 196        if (sync)
 197                stat[BLKIO_STAT_SYNC] += add;
 198        else
 199                stat[BLKIO_STAT_ASYNC] += add;
 200}
 201
 202/*
 203 * Decrements the appropriate stat variable if non-zero depending on the
 204 * request type. Panics on value being zero.
 205 * This should be called with the blkg->stats_lock held.
 206 */
 207static void blkio_check_and_dec_stat(uint64_t *stat, bool direction, bool sync)
 208{
 209        if (direction) {
 210                BUG_ON(stat[BLKIO_STAT_WRITE] == 0);
 211                stat[BLKIO_STAT_WRITE]--;
 212        } else {
 213                BUG_ON(stat[BLKIO_STAT_READ] == 0);
 214                stat[BLKIO_STAT_READ]--;
 215        }
 216        if (sync) {
 217                BUG_ON(stat[BLKIO_STAT_SYNC] == 0);
 218                stat[BLKIO_STAT_SYNC]--;
 219        } else {
 220                BUG_ON(stat[BLKIO_STAT_ASYNC] == 0);
 221                stat[BLKIO_STAT_ASYNC]--;
 222        }
 223}
 224
 225#ifdef CONFIG_DEBUG_BLK_CGROUP
 226/* This should be called with the blkg->stats_lock held. */
 227static void blkio_set_start_group_wait_time(struct blkio_group *blkg,
 228                                                struct blkio_group *curr_blkg)
 229{
 230        if (blkio_blkg_waiting(&blkg->stats))
 231                return;
 232        if (blkg == curr_blkg)
 233                return;
 234        blkg->stats.start_group_wait_time = sched_clock();
 235        blkio_mark_blkg_waiting(&blkg->stats);
 236}
 237
 238/* This should be called with the blkg->stats_lock held. */
 239static void blkio_update_group_wait_time(struct blkio_group_stats *stats)
 240{
 241        unsigned long long now;
 242
 243        if (!blkio_blkg_waiting(stats))
 244                return;
 245
 246        now = sched_clock();
 247        if (time_after64(now, stats->start_group_wait_time))
 248                stats->group_wait_time += now - stats->start_group_wait_time;
 249        blkio_clear_blkg_waiting(stats);
 250}
 251
 252/* This should be called with the blkg->stats_lock held. */
 253static void blkio_end_empty_time(struct blkio_group_stats *stats)
 254{
 255        unsigned long long now;
 256
 257        if (!blkio_blkg_empty(stats))
 258                return;
 259
 260        now = sched_clock();
 261        if (time_after64(now, stats->start_empty_time))
 262                stats->empty_time += now - stats->start_empty_time;
 263        blkio_clear_blkg_empty(stats);
 264}
 265
 266void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg)
 267{
 268        unsigned long flags;
 269
 270        spin_lock_irqsave(&blkg->stats_lock, flags);
 271        BUG_ON(blkio_blkg_idling(&blkg->stats));
 272        blkg->stats.start_idle_time = sched_clock();
 273        blkio_mark_blkg_idling(&blkg->stats);
 274        spin_unlock_irqrestore(&blkg->stats_lock, flags);
 275}
 276EXPORT_SYMBOL_GPL(blkiocg_update_set_idle_time_stats);
 277
 278void blkiocg_update_idle_time_stats(struct blkio_group *blkg)
 279{
 280        unsigned long flags;
 281        unsigned long long now;
 282        struct blkio_group_stats *stats;
 283
 284        spin_lock_irqsave(&blkg->stats_lock, flags);
 285        stats = &blkg->stats;
 286        if (blkio_blkg_idling(stats)) {
 287                now = sched_clock();
 288                if (time_after64(now, stats->start_idle_time))
 289                        stats->idle_time += now - stats->start_idle_time;
 290                blkio_clear_blkg_idling(stats);
 291        }
 292        spin_unlock_irqrestore(&blkg->stats_lock, flags);
 293}
 294EXPORT_SYMBOL_GPL(blkiocg_update_idle_time_stats);
 295
 296void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg)
 297{
 298        unsigned long flags;
 299        struct blkio_group_stats *stats;
 300
 301        spin_lock_irqsave(&blkg->stats_lock, flags);
 302        stats = &blkg->stats;
 303        stats->avg_queue_size_sum +=
 304                        stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] +
 305                        stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE];
 306        stats->avg_queue_size_samples++;
 307        blkio_update_group_wait_time(stats);
 308        spin_unlock_irqrestore(&blkg->stats_lock, flags);
 309}
 310EXPORT_SYMBOL_GPL(blkiocg_update_avg_queue_size_stats);
 311
 312void blkiocg_set_start_empty_time(struct blkio_group *blkg)
 313{
 314        unsigned long flags;
 315        struct blkio_group_stats *stats;
 316
 317        spin_lock_irqsave(&blkg->stats_lock, flags);
 318        stats = &blkg->stats;
 319
 320        if (stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] ||
 321                        stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE]) {
 322                spin_unlock_irqrestore(&blkg->stats_lock, flags);
 323                return;
 324        }
 325
 326        /*
 327         * group is already marked empty. This can happen if cfqq got new
 328         * request in parent group and moved to this group while being added
 329         * to service tree. Just ignore the event and move on.
 330         */
 331        if(blkio_blkg_empty(stats)) {
 332                spin_unlock_irqrestore(&blkg->stats_lock, flags);
 333                return;
 334        }
 335
 336        stats->start_empty_time = sched_clock();
 337        blkio_mark_blkg_empty(stats);
 338        spin_unlock_irqrestore(&blkg->stats_lock, flags);
 339}
 340EXPORT_SYMBOL_GPL(blkiocg_set_start_empty_time);
 341
 342void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
 343                        unsigned long dequeue)
 344{
 345        blkg->stats.dequeue += dequeue;
 346}
 347EXPORT_SYMBOL_GPL(blkiocg_update_dequeue_stats);
 348#else
 349static inline void blkio_set_start_group_wait_time(struct blkio_group *blkg,
 350                                        struct blkio_group *curr_blkg) {}
 351static inline void blkio_end_empty_time(struct blkio_group_stats *stats) {}
 352#endif
 353
 354void blkiocg_update_io_add_stats(struct blkio_group *blkg,
 355                        struct blkio_group *curr_blkg, bool direction,
 356                        bool sync)
 357{
 358        unsigned long flags;
 359
 360        spin_lock_irqsave(&blkg->stats_lock, flags);
 361        blkio_add_stat(blkg->stats.stat_arr[BLKIO_STAT_QUEUED], 1, direction,
 362                        sync);
 363        blkio_end_empty_time(&blkg->stats);
 364        blkio_set_start_group_wait_time(blkg, curr_blkg);
 365        spin_unlock_irqrestore(&blkg->stats_lock, flags);
 366}
 367EXPORT_SYMBOL_GPL(blkiocg_update_io_add_stats);
 368
 369void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
 370                                                bool direction, bool sync)
 371{
 372        unsigned long flags;
 373
 374        spin_lock_irqsave(&blkg->stats_lock, flags);
 375        blkio_check_and_dec_stat(blkg->stats.stat_arr[BLKIO_STAT_QUEUED],
 376                                        direction, sync);
 377        spin_unlock_irqrestore(&blkg->stats_lock, flags);
 378}
 379EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats);
 380
 381void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time,
 382                                unsigned long unaccounted_time)
 383{
 384        unsigned long flags;
 385
 386        spin_lock_irqsave(&blkg->stats_lock, flags);
 387        blkg->stats.time += time;
 388        blkg->stats.unaccounted_time += unaccounted_time;
 389        spin_unlock_irqrestore(&blkg->stats_lock, flags);
 390}
 391EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);
 392
 393void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
 394                                uint64_t bytes, bool direction, bool sync)
 395{
 396        struct blkio_group_stats *stats;
 397        unsigned long flags;
 398
 399        spin_lock_irqsave(&blkg->stats_lock, flags);
 400        stats = &blkg->stats;
 401        stats->sectors += bytes >> 9;
 402        blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICED], 1, direction,
 403                        sync);
 404        blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_BYTES], bytes,
 405                        direction, sync);
 406        spin_unlock_irqrestore(&blkg->stats_lock, flags);
 407}
 408EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats);
 409
 410void blkiocg_update_completion_stats(struct blkio_group *blkg,
 411        uint64_t start_time, uint64_t io_start_time, bool direction, bool sync)
 412{
 413        struct blkio_group_stats *stats;
 414        unsigned long flags;
 415        unsigned long long now = sched_clock();
 416
 417        spin_lock_irqsave(&blkg->stats_lock, flags);
 418        stats = &blkg->stats;
 419        if (time_after64(now, io_start_time))
 420                blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_TIME],
 421                                now - io_start_time, direction, sync);
 422        if (time_after64(io_start_time, start_time))
 423                blkio_add_stat(stats->stat_arr[BLKIO_STAT_WAIT_TIME],
 424                                io_start_time - start_time, direction, sync);
 425        spin_unlock_irqrestore(&blkg->stats_lock, flags);
 426}
 427EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats);
 428
 429void blkiocg_update_io_merged_stats(struct blkio_group *blkg, bool direction,
 430                                        bool sync)
 431{
 432        unsigned long flags;
 433
 434        spin_lock_irqsave(&blkg->stats_lock, flags);
 435        blkio_add_stat(blkg->stats.stat_arr[BLKIO_STAT_MERGED], 1, direction,
 436                        sync);
 437        spin_unlock_irqrestore(&blkg->stats_lock, flags);
 438}
 439EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats);
 440
 441void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
 442                struct blkio_group *blkg, void *key, dev_t dev,
 443                enum blkio_policy_id plid)
 444{
 445        unsigned long flags;
 446
 447        spin_lock_irqsave(&blkcg->lock, flags);
 448        spin_lock_init(&blkg->stats_lock);
 449        rcu_assign_pointer(blkg->key, key);
 450        blkg->blkcg_id = css_id(&blkcg->css);
 451        hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
 452        blkg->plid = plid;
 453        spin_unlock_irqrestore(&blkcg->lock, flags);
 454        /* Need to take css reference ? */
 455        cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path));
 456        blkg->dev = dev;
 457}
 458EXPORT_SYMBOL_GPL(blkiocg_add_blkio_group);
 459
 460static void __blkiocg_del_blkio_group(struct blkio_group *blkg)
 461{
 462        hlist_del_init_rcu(&blkg->blkcg_node);
 463        blkg->blkcg_id = 0;
 464}
 465
 466/*
 467 * returns 0 if blkio_group was still on cgroup list. Otherwise returns 1
 468 * indicating that blk_group was unhashed by the time we got to it.
 469 */
 470int blkiocg_del_blkio_group(struct blkio_group *blkg)
 471{
 472        struct blkio_cgroup *blkcg;
 473        unsigned long flags;
 474        struct cgroup_subsys_state *css;
 475        int ret = 1;
 476
 477        rcu_read_lock();
 478        css = css_lookup(&blkio_subsys, blkg->blkcg_id);
 479        if (css) {
 480                blkcg = container_of(css, struct blkio_cgroup, css);
 481                spin_lock_irqsave(&blkcg->lock, flags);
 482                if (!hlist_unhashed(&blkg->blkcg_node)) {
 483                        __blkiocg_del_blkio_group(blkg);
 484                        ret = 0;
 485                }
 486                spin_unlock_irqrestore(&blkcg->lock, flags);
 487        }
 488
 489        rcu_read_unlock();
 490        return ret;
 491}
 492EXPORT_SYMBOL_GPL(blkiocg_del_blkio_group);
 493
 494/* called under rcu_read_lock(). */
 495struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key)
 496{
 497        struct blkio_group *blkg;
 498        struct hlist_node *n;
 499        void *__key;
 500
 501        hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {
 502                __key = blkg->key;
 503                if (__key == key)
 504                        return blkg;
 505        }
 506
 507        return NULL;
 508}
 509EXPORT_SYMBOL_GPL(blkiocg_lookup_group);
 510
 511static int
 512blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
 513{
 514        struct blkio_cgroup *blkcg;
 515        struct blkio_group *blkg;
 516        struct blkio_group_stats *stats;
 517        struct hlist_node *n;
 518        uint64_t queued[BLKIO_STAT_TOTAL];
 519        int i;
 520#ifdef CONFIG_DEBUG_BLK_CGROUP
 521        bool idling, waiting, empty;
 522        unsigned long long now = sched_clock();
 523#endif
 524
 525        blkcg = cgroup_to_blkio_cgroup(cgroup);
 526        spin_lock_irq(&blkcg->lock);
 527        hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
 528                spin_lock(&blkg->stats_lock);
 529                stats = &blkg->stats;
 530#ifdef CONFIG_DEBUG_BLK_CGROUP
 531                idling = blkio_blkg_idling(stats);
 532                waiting = blkio_blkg_waiting(stats);
 533                empty = blkio_blkg_empty(stats);
 534#endif
 535                for (i = 0; i < BLKIO_STAT_TOTAL; i++)
 536                        queued[i] = stats->stat_arr[BLKIO_STAT_QUEUED][i];
 537                memset(stats, 0, sizeof(struct blkio_group_stats));
 538                for (i = 0; i < BLKIO_STAT_TOTAL; i++)
 539                        stats->stat_arr[BLKIO_STAT_QUEUED][i] = queued[i];
 540#ifdef CONFIG_DEBUG_BLK_CGROUP
 541                if (idling) {
 542                        blkio_mark_blkg_idling(stats);
 543                        stats->start_idle_time = now;
 544                }
 545                if (waiting) {
 546                        blkio_mark_blkg_waiting(stats);
 547                        stats->start_group_wait_time = now;
 548                }
 549                if (empty) {
 550                        blkio_mark_blkg_empty(stats);
 551                        stats->start_empty_time = now;
 552                }
 553#endif
 554                spin_unlock(&blkg->stats_lock);
 555        }
 556        spin_unlock_irq(&blkcg->lock);
 557        return 0;
 558}
 559
 560static void blkio_get_key_name(enum stat_sub_type type, dev_t dev, char *str,
 561                                int chars_left, bool diskname_only)
 562{
 563        snprintf(str, chars_left, "%d:%d", MAJOR(dev), MINOR(dev));
 564        chars_left -= strlen(str);
 565        if (chars_left <= 0) {
 566                printk(KERN_WARNING
 567                        "Possibly incorrect cgroup stat display format");
 568                return;
 569        }
 570        if (diskname_only)
 571                return;
 572        switch (type) {
 573        case BLKIO_STAT_READ:
 574                strlcat(str, " Read", chars_left);
 575                break;
 576        case BLKIO_STAT_WRITE:
 577                strlcat(str, " Write", chars_left);
 578                break;
 579        case BLKIO_STAT_SYNC:
 580                strlcat(str, " Sync", chars_left);
 581                break;
 582        case BLKIO_STAT_ASYNC:
 583                strlcat(str, " Async", chars_left);
 584                break;
 585        case BLKIO_STAT_TOTAL:
 586                strlcat(str, " Total", chars_left);
 587                break;
 588        default:
 589                strlcat(str, " Invalid", chars_left);
 590        }
 591}
 592
 593static uint64_t blkio_fill_stat(char *str, int chars_left, uint64_t val,
 594                                struct cgroup_map_cb *cb, dev_t dev)
 595{
 596        blkio_get_key_name(0, dev, str, chars_left, true);
 597        cb->fill(cb, str, val);
 598        return val;
 599}
 600
 601/* This should be called with blkg->stats_lock held */
 602static uint64_t blkio_get_stat(struct blkio_group *blkg,
 603                struct cgroup_map_cb *cb, dev_t dev, enum stat_type type)
 604{
 605        uint64_t disk_total;
 606        char key_str[MAX_KEY_LEN];
 607        enum stat_sub_type sub_type;
 608
 609        if (type == BLKIO_STAT_TIME)
 610                return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
 611                                        blkg->stats.time, cb, dev);
 612        if (type == BLKIO_STAT_SECTORS)
 613                return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
 614                                        blkg->stats.sectors, cb, dev);
 615#ifdef CONFIG_DEBUG_BLK_CGROUP
 616        if (type == BLKIO_STAT_UNACCOUNTED_TIME)
 617                return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
 618                                        blkg->stats.unaccounted_time, cb, dev);
 619        if (type == BLKIO_STAT_AVG_QUEUE_SIZE) {
 620                uint64_t sum = blkg->stats.avg_queue_size_sum;
 621                uint64_t samples = blkg->stats.avg_queue_size_samples;
 622                if (samples)
 623                        do_div(sum, samples);
 624                else
 625                        sum = 0;
 626                return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, sum, cb, dev);
 627        }
 628        if (type == BLKIO_STAT_GROUP_WAIT_TIME)
 629                return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
 630                                        blkg->stats.group_wait_time, cb, dev);
 631        if (type == BLKIO_STAT_IDLE_TIME)
 632                return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
 633                                        blkg->stats.idle_time, cb, dev);
 634        if (type == BLKIO_STAT_EMPTY_TIME)
 635                return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
 636                                        blkg->stats.empty_time, cb, dev);
 637        if (type == BLKIO_STAT_DEQUEUE)
 638                return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
 639                                        blkg->stats.dequeue, cb, dev);
 640#endif
 641
 642        for (sub_type = BLKIO_STAT_READ; sub_type < BLKIO_STAT_TOTAL;
 643                        sub_type++) {
 644                blkio_get_key_name(sub_type, dev, key_str, MAX_KEY_LEN, false);
 645                cb->fill(cb, key_str, blkg->stats.stat_arr[type][sub_type]);
 646        }
 647        disk_total = blkg->stats.stat_arr[type][BLKIO_STAT_READ] +
 648                        blkg->stats.stat_arr[type][BLKIO_STAT_WRITE];
 649        blkio_get_key_name(BLKIO_STAT_TOTAL, dev, key_str, MAX_KEY_LEN, false);
 650        cb->fill(cb, key_str, disk_total);
 651        return disk_total;
 652}
 653
 654static int blkio_check_dev_num(dev_t dev)
 655{
 656        int part = 0;
 657        struct gendisk *disk;
 658
 659        disk = get_gendisk(dev, &part);
 660        if (!disk || part)
 661                return -ENODEV;
 662
 663        return 0;
 664}
 665
 666static int blkio_policy_parse_and_set(char *buf,
 667        struct blkio_policy_node *newpn, enum blkio_policy_id plid, int fileid)
 668{
 669        char *s[4], *p, *major_s = NULL, *minor_s = NULL;
 670        int ret;
 671        unsigned long major, minor, temp;
 672        int i = 0;
 673        dev_t dev;
 674        u64 bps, iops;
 675
 676        memset(s, 0, sizeof(s));
 677
 678        while ((p = strsep(&buf, " ")) != NULL) {
 679                if (!*p)
 680                        continue;
 681
 682                s[i++] = p;
 683
 684                /* Prevent from inputing too many things */
 685                if (i == 3)
 686                        break;
 687        }
 688
 689        if (i != 2)
 690                return -EINVAL;
 691
 692        p = strsep(&s[0], ":");
 693        if (p != NULL)
 694                major_s = p;
 695        else
 696                return -EINVAL;
 697
 698        minor_s = s[0];
 699        if (!minor_s)
 700                return -EINVAL;
 701
 702        ret = strict_strtoul(major_s, 10, &major);
 703        if (ret)
 704                return -EINVAL;
 705
 706        ret = strict_strtoul(minor_s, 10, &minor);
 707        if (ret)
 708                return -EINVAL;
 709
 710        dev = MKDEV(major, minor);
 711
 712        ret = blkio_check_dev_num(dev);
 713        if (ret)
 714                return ret;
 715
 716        newpn->dev = dev;
 717
 718        if (s[1] == NULL)
 719                return -EINVAL;
 720
 721        switch (plid) {
 722        case BLKIO_POLICY_PROP:
 723                ret = strict_strtoul(s[1], 10, &temp);
 724                if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) ||
 725                        temp > BLKIO_WEIGHT_MAX)
 726                        return -EINVAL;
 727
 728                newpn->plid = plid;
 729                newpn->fileid = fileid;
 730                newpn->val.weight = temp;
 731                break;
 732        case BLKIO_POLICY_THROTL:
 733                switch(fileid) {
 734                case BLKIO_THROTL_read_bps_device:
 735                case BLKIO_THROTL_write_bps_device:
 736                        ret = strict_strtoull(s[1], 10, &bps);
 737                        if (ret)
 738                                return -EINVAL;
 739
 740                        newpn->plid = plid;
 741                        newpn->fileid = fileid;
 742                        newpn->val.bps = bps;
 743                        break;
 744                case BLKIO_THROTL_read_iops_device:
 745                case BLKIO_THROTL_write_iops_device:
 746                        ret = strict_strtoull(s[1], 10, &iops);
 747                        if (ret)
 748                                return -EINVAL;
 749
 750                        if (iops > THROTL_IOPS_MAX)
 751                                return -EINVAL;
 752
 753                        newpn->plid = plid;
 754                        newpn->fileid = fileid;
 755                        newpn->val.iops = (unsigned int)iops;
 756                        break;
 757                }
 758                break;
 759        default:
 760                BUG();
 761        }
 762
 763        return 0;
 764}
 765
 766unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
 767                              dev_t dev)
 768{
 769        struct blkio_policy_node *pn;
 770
 771        pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_PROP,
 772                                BLKIO_PROP_weight_device);
 773        if (pn)
 774                return pn->val.weight;
 775        else
 776                return blkcg->weight;
 777}
 778EXPORT_SYMBOL_GPL(blkcg_get_weight);
 779
 780uint64_t blkcg_get_read_bps(struct blkio_cgroup *blkcg, dev_t dev)
 781{
 782        struct blkio_policy_node *pn;
 783
 784        pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
 785                                BLKIO_THROTL_read_bps_device);
 786        if (pn)
 787                return pn->val.bps;
 788        else
 789                return -1;
 790}
 791
 792uint64_t blkcg_get_write_bps(struct blkio_cgroup *blkcg, dev_t dev)
 793{
 794        struct blkio_policy_node *pn;
 795        pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
 796                                BLKIO_THROTL_write_bps_device);
 797        if (pn)
 798                return pn->val.bps;
 799        else
 800                return -1;
 801}
 802
 803unsigned int blkcg_get_read_iops(struct blkio_cgroup *blkcg, dev_t dev)
 804{
 805        struct blkio_policy_node *pn;
 806
 807        pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
 808                                BLKIO_THROTL_read_iops_device);
 809        if (pn)
 810                return pn->val.iops;
 811        else
 812                return -1;
 813}
 814
 815unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg, dev_t dev)
 816{
 817        struct blkio_policy_node *pn;
 818        pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
 819                                BLKIO_THROTL_write_iops_device);
 820        if (pn)
 821                return pn->val.iops;
 822        else
 823                return -1;
 824}
 825
 826/* Checks whether user asked for deleting a policy rule */
 827static bool blkio_delete_rule_command(struct blkio_policy_node *pn)
 828{
 829        switch(pn->plid) {
 830        case BLKIO_POLICY_PROP:
 831                if (pn->val.weight == 0)
 832                        return 1;
 833                break;
 834        case BLKIO_POLICY_THROTL:
 835                switch(pn->fileid) {
 836                case BLKIO_THROTL_read_bps_device:
 837                case BLKIO_THROTL_write_bps_device:
 838                        if (pn->val.bps == 0)
 839                                return 1;
 840                        break;
 841                case BLKIO_THROTL_read_iops_device:
 842                case BLKIO_THROTL_write_iops_device:
 843                        if (pn->val.iops == 0)
 844                                return 1;
 845                }
 846                break;
 847        default:
 848                BUG();
 849        }
 850
 851        return 0;
 852}
 853
 854static void blkio_update_policy_rule(struct blkio_policy_node *oldpn,
 855                                        struct blkio_policy_node *newpn)
 856{
 857        switch(oldpn->plid) {
 858        case BLKIO_POLICY_PROP:
 859                oldpn->val.weight = newpn->val.weight;
 860                break;
 861        case BLKIO_POLICY_THROTL:
 862                switch(newpn->fileid) {
 863                case BLKIO_THROTL_read_bps_device:
 864                case BLKIO_THROTL_write_bps_device:
 865                        oldpn->val.bps = newpn->val.bps;
 866                        break;
 867                case BLKIO_THROTL_read_iops_device:
 868                case BLKIO_THROTL_write_iops_device:
 869                        oldpn->val.iops = newpn->val.iops;
 870                }
 871                break;
 872        default:
 873                BUG();
 874        }
 875}
 876
 877/*
 878 * Some rules/values in blkg have changed. Propagate those to respective
 879 * policies.
 880 */
 881static void blkio_update_blkg_policy(struct blkio_cgroup *blkcg,
 882                struct blkio_group *blkg, struct blkio_policy_node *pn)
 883{
 884        unsigned int weight, iops;
 885        u64 bps;
 886
 887        switch(pn->plid) {
 888        case BLKIO_POLICY_PROP:
 889                weight = pn->val.weight ? pn->val.weight :
 890                                blkcg->weight;
 891                blkio_update_group_weight(blkg, weight);
 892                break;
 893        case BLKIO_POLICY_THROTL:
 894                switch(pn->fileid) {
 895                case BLKIO_THROTL_read_bps_device:
 896                case BLKIO_THROTL_write_bps_device:
 897                        bps = pn->val.bps ? pn->val.bps : (-1);
 898                        blkio_update_group_bps(blkg, bps, pn->fileid);
 899                        break;
 900                case BLKIO_THROTL_read_iops_device:
 901                case BLKIO_THROTL_write_iops_device:
 902                        iops = pn->val.iops ? pn->val.iops : (-1);
 903                        blkio_update_group_iops(blkg, iops, pn->fileid);
 904                        break;
 905                }
 906                break;
 907        default:
 908                BUG();
 909        }
 910}
 911
 912/*
 913 * A policy node rule has been updated. Propagate this update to all the
 914 * block groups which might be affected by this update.
 915 */
 916static void blkio_update_policy_node_blkg(struct blkio_cgroup *blkcg,
 917                                struct blkio_policy_node *pn)
 918{
 919        struct blkio_group *blkg;
 920        struct hlist_node *n;
 921
 922        spin_lock(&blkio_list_lock);
 923        spin_lock_irq(&blkcg->lock);
 924
 925        hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
 926                if (pn->dev != blkg->dev || pn->plid != blkg->plid)
 927                        continue;
 928                blkio_update_blkg_policy(blkcg, blkg, pn);
 929        }
 930
 931        spin_unlock_irq(&blkcg->lock);
 932        spin_unlock(&blkio_list_lock);
 933}
 934
 935static int blkiocg_file_write(struct cgroup *cgrp, struct cftype *cft,
 936                                       const char *buffer)
 937{
 938        int ret = 0;
 939        char *buf;
 940        struct blkio_policy_node *newpn, *pn;
 941        struct blkio_cgroup *blkcg;
 942        int keep_newpn = 0;
 943        enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
 944        int fileid = BLKIOFILE_ATTR(cft->private);
 945
 946        buf = kstrdup(buffer, GFP_KERNEL);
 947        if (!buf)
 948                return -ENOMEM;
 949
 950        newpn = kzalloc(sizeof(*newpn), GFP_KERNEL);
 951        if (!newpn) {
 952                ret = -ENOMEM;
 953                goto free_buf;
 954        }
 955
 956        ret = blkio_policy_parse_and_set(buf, newpn, plid, fileid);
 957        if (ret)
 958                goto free_newpn;
 959
 960        blkcg = cgroup_to_blkio_cgroup(cgrp);
 961
 962        spin_lock_irq(&blkcg->lock);
 963
 964        pn = blkio_policy_search_node(blkcg, newpn->dev, plid, fileid);
 965        if (!pn) {
 966                if (!blkio_delete_rule_command(newpn)) {
 967                        blkio_policy_insert_node(blkcg, newpn);
 968                        keep_newpn = 1;
 969                }
 970                spin_unlock_irq(&blkcg->lock);
 971                goto update_io_group;
 972        }
 973
 974        if (blkio_delete_rule_command(newpn)) {
 975                blkio_policy_delete_node(pn);
 976                spin_unlock_irq(&blkcg->lock);
 977                goto update_io_group;
 978        }
 979        spin_unlock_irq(&blkcg->lock);
 980
 981        blkio_update_policy_rule(pn, newpn);
 982
 983update_io_group:
 984        blkio_update_policy_node_blkg(blkcg, newpn);
 985
 986free_newpn:
 987        if (!keep_newpn)
 988                kfree(newpn);
 989free_buf:
 990        kfree(buf);
 991        return ret;
 992}
 993
 994static void
 995blkio_print_policy_node(struct seq_file *m, struct blkio_policy_node *pn)
 996{
 997        switch(pn->plid) {
 998                case BLKIO_POLICY_PROP:
 999                        if (pn->fileid == BLKIO_PROP_weight_device)
1000                                seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
1001                                        MINOR(pn->dev), pn->val.weight);
1002                        break;
1003                case BLKIO_POLICY_THROTL:
1004                        switch(pn->fileid) {
1005                        case BLKIO_THROTL_read_bps_device:
1006                        case BLKIO_THROTL_write_bps_device:
1007                                seq_printf(m, "%u:%u\t%llu\n", MAJOR(pn->dev),
1008                                        MINOR(pn->dev), pn->val.bps);
1009                                break;
1010                        case BLKIO_THROTL_read_iops_device:
1011                        case BLKIO_THROTL_write_iops_device:
1012                                seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
1013                                        MINOR(pn->dev), pn->val.iops);
1014                                break;
1015                        }
1016                        break;
1017                default:
1018                        BUG();
1019        }
1020}
1021
1022/* cgroup files which read their data from policy nodes end up here */
1023static void blkio_read_policy_node_files(struct cftype *cft,
1024                        struct blkio_cgroup *blkcg, struct seq_file *m)
1025{
1026        struct blkio_policy_node *pn;
1027
1028        if (!list_empty(&blkcg->policy_list)) {
1029                spin_lock_irq(&blkcg->lock);
1030                list_for_each_entry(pn, &blkcg->policy_list, node) {
1031                        if (!pn_matches_cftype(cft, pn))
1032                                continue;
1033                        blkio_print_policy_node(m, pn);
1034                }
1035                spin_unlock_irq(&blkcg->lock);
1036        }
1037}
1038
1039static int blkiocg_file_read(struct cgroup *cgrp, struct cftype *cft,
1040                                struct seq_file *m)
1041{
1042        struct blkio_cgroup *blkcg;
1043        enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
1044        int name = BLKIOFILE_ATTR(cft->private);
1045
1046        blkcg = cgroup_to_blkio_cgroup(cgrp);
1047
1048        switch(plid) {
1049        case BLKIO_POLICY_PROP:
1050                switch(name) {
1051                case BLKIO_PROP_weight_device:
1052                        blkio_read_policy_node_files(cft, blkcg, m);
1053                        return 0;
1054                default:
1055                        BUG();
1056                }
1057                break;
1058        case BLKIO_POLICY_THROTL:
1059                switch(name){
1060                case BLKIO_THROTL_read_bps_device:
1061                case BLKIO_THROTL_write_bps_device:
1062                case BLKIO_THROTL_read_iops_device:
1063                case BLKIO_THROTL_write_iops_device:
1064                        blkio_read_policy_node_files(cft, blkcg, m);
1065                        return 0;
1066                default:
1067                        BUG();
1068                }
1069                break;
1070        default:
1071                BUG();
1072        }
1073
1074        return 0;
1075}
1076
1077static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg,
1078                struct cftype *cft, struct cgroup_map_cb *cb, enum stat_type type,
1079                bool show_total)
1080{
1081        struct blkio_group *blkg;
1082        struct hlist_node *n;
1083        uint64_t cgroup_total = 0;
1084
1085        rcu_read_lock();
1086        hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {
1087                if (blkg->dev) {
1088                        if (!cftype_blkg_same_policy(cft, blkg))
1089                                continue;
1090                        spin_lock_irq(&blkg->stats_lock);
1091                        cgroup_total += blkio_get_stat(blkg, cb, blkg->dev,
1092                                                type);
1093                        spin_unlock_irq(&blkg->stats_lock);
1094                }
1095        }
1096        if (show_total)
1097                cb->fill(cb, "Total", cgroup_total);
1098        rcu_read_unlock();
1099        return 0;
1100}
1101
1102/* All map kind of cgroup file get serviced by this function */
1103static int blkiocg_file_read_map(struct cgroup *cgrp, struct cftype *cft,
1104                                struct cgroup_map_cb *cb)
1105{
1106        struct blkio_cgroup *blkcg;
1107        enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
1108        int name = BLKIOFILE_ATTR(cft->private);
1109
1110        blkcg = cgroup_to_blkio_cgroup(cgrp);
1111
1112        switch(plid) {
1113        case BLKIO_POLICY_PROP:
1114                switch(name) {
1115                case BLKIO_PROP_time:
1116                        return blkio_read_blkg_stats(blkcg, cft, cb,
1117                                                BLKIO_STAT_TIME, 0);
1118                case BLKIO_PROP_sectors:
1119                        return blkio_read_blkg_stats(blkcg, cft, cb,
1120                                                BLKIO_STAT_SECTORS, 0);
1121                case BLKIO_PROP_io_service_bytes:
1122                        return blkio_read_blkg_stats(blkcg, cft, cb,
1123                                                BLKIO_STAT_SERVICE_BYTES, 1);
1124                case BLKIO_PROP_io_serviced:
1125                        return blkio_read_blkg_stats(blkcg, cft, cb,
1126                                                BLKIO_STAT_SERVICED, 1);
1127                case BLKIO_PROP_io_service_time:
1128                        return blkio_read_blkg_stats(blkcg, cft, cb,
1129                                                BLKIO_STAT_SERVICE_TIME, 1);
1130                case BLKIO_PROP_io_wait_time:
1131                        return blkio_read_blkg_stats(blkcg, cft, cb,
1132                                                BLKIO_STAT_WAIT_TIME, 1);
1133                case BLKIO_PROP_io_merged:
1134                        return blkio_read_blkg_stats(blkcg, cft, cb,
1135                                                BLKIO_STAT_MERGED, 1);
1136                case BLKIO_PROP_io_queued:
1137                        return blkio_read_blkg_stats(blkcg, cft, cb,
1138                                                BLKIO_STAT_QUEUED, 1);
1139#ifdef CONFIG_DEBUG_BLK_CGROUP
1140                case BLKIO_PROP_unaccounted_time:
1141                        return blkio_read_blkg_stats(blkcg, cft, cb,
1142                                                BLKIO_STAT_UNACCOUNTED_TIME, 0);
1143                case BLKIO_PROP_dequeue:
1144                        return blkio_read_blkg_stats(blkcg, cft, cb,
1145                                                BLKIO_STAT_DEQUEUE, 0);
1146                case BLKIO_PROP_avg_queue_size:
1147                        return blkio_read_blkg_stats(blkcg, cft, cb,
1148                                                BLKIO_STAT_AVG_QUEUE_SIZE, 0);
1149                case BLKIO_PROP_group_wait_time:
1150                        return blkio_read_blkg_stats(blkcg, cft, cb,
1151                                                BLKIO_STAT_GROUP_WAIT_TIME, 0);
1152                case BLKIO_PROP_idle_time:
1153                        return blkio_read_blkg_stats(blkcg, cft, cb,
1154                                                BLKIO_STAT_IDLE_TIME, 0);
1155                case BLKIO_PROP_empty_time:
1156                        return blkio_read_blkg_stats(blkcg, cft, cb,
1157                                                BLKIO_STAT_EMPTY_TIME, 0);
1158#endif
1159                default:
1160                        BUG();
1161                }
1162                break;
1163        case BLKIO_POLICY_THROTL:
1164                switch(name){
1165                case BLKIO_THROTL_io_service_bytes:
1166                        return blkio_read_blkg_stats(blkcg, cft, cb,
1167                                                BLKIO_STAT_SERVICE_BYTES, 1);
1168                case BLKIO_THROTL_io_serviced:
1169                        return blkio_read_blkg_stats(blkcg, cft, cb,
1170                                                BLKIO_STAT_SERVICED, 1);
1171                default:
1172                        BUG();
1173                }
1174                break;
1175        default:
1176                BUG();
1177        }
1178
1179        return 0;
1180}
1181
1182static int blkio_weight_write(struct blkio_cgroup *blkcg, u64 val)
1183{
1184        struct blkio_group *blkg;
1185        struct hlist_node *n;
1186        struct blkio_policy_node *pn;
1187
1188        if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
1189                return -EINVAL;
1190
1191        spin_lock(&blkio_list_lock);
1192        spin_lock_irq(&blkcg->lock);
1193        blkcg->weight = (unsigned int)val;
1194
1195        hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
1196                pn = blkio_policy_search_node(blkcg, blkg->dev,
1197                                BLKIO_POLICY_PROP, BLKIO_PROP_weight_device);
1198                if (pn)
1199                        continue;
1200
1201                blkio_update_group_weight(blkg, blkcg->weight);
1202        }
1203        spin_unlock_irq(&blkcg->lock);
1204        spin_unlock(&blkio_list_lock);
1205        return 0;
1206}
1207
1208static u64 blkiocg_file_read_u64 (struct cgroup *cgrp, struct cftype *cft) {
1209        struct blkio_cgroup *blkcg;
1210        enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
1211        int name = BLKIOFILE_ATTR(cft->private);
1212
1213        blkcg = cgroup_to_blkio_cgroup(cgrp);
1214
1215        switch(plid) {
1216        case BLKIO_POLICY_PROP:
1217                switch(name) {
1218                case BLKIO_PROP_weight:
1219                        return (u64)blkcg->weight;
1220                }
1221                break;
1222        default:
1223                BUG();
1224        }
1225        return 0;
1226}
1227
1228static int
1229blkiocg_file_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
1230{
1231        struct blkio_cgroup *blkcg;
1232        enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
1233        int name = BLKIOFILE_ATTR(cft->private);
1234
1235        blkcg = cgroup_to_blkio_cgroup(cgrp);
1236
1237        switch(plid) {
1238        case BLKIO_POLICY_PROP:
1239                switch(name) {
1240                case BLKIO_PROP_weight:
1241                        return blkio_weight_write(blkcg, val);
1242                }
1243                break;
1244        default:
1245                BUG();
1246        }
1247
1248        return 0;
1249}
1250
1251struct cftype blkio_files[] = {
1252        {
1253                .name = "weight_device",
1254                .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1255                                BLKIO_PROP_weight_device),
1256                .read_seq_string = blkiocg_file_read,
1257                .write_string = blkiocg_file_write,
1258                .max_write_len = 256,
1259        },
1260        {
1261                .name = "weight",
1262                .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1263                                BLKIO_PROP_weight),
1264                .read_u64 = blkiocg_file_read_u64,
1265                .write_u64 = blkiocg_file_write_u64,
1266        },
1267        {
1268                .name = "time",
1269                .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1270                                BLKIO_PROP_time),
1271                .read_map = blkiocg_file_read_map,
1272        },
1273        {
1274                .name = "sectors",
1275                .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1276                                BLKIO_PROP_sectors),
1277                .read_map = blkiocg_file_read_map,
1278        },
1279        {
1280                .name = "io_service_bytes",
1281                .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1282                                BLKIO_PROP_io_service_bytes),
1283                .read_map = blkiocg_file_read_map,
1284        },
1285        {
1286                .name = "io_serviced",
1287                .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1288                                BLKIO_PROP_io_serviced),
1289                .read_map = blkiocg_file_read_map,
1290        },
1291        {
1292                .name = "io_service_time",
1293                .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1294                                BLKIO_PROP_io_service_time),
1295                .read_map = blkiocg_file_read_map,
1296        },
1297        {
1298                .name = "io_wait_time",
1299                .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1300                                BLKIO_PROP_io_wait_time),
1301                .read_map = blkiocg_file_read_map,
1302        },
1303        {
1304                .name = "io_merged",
1305                .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1306                                BLKIO_PROP_io_merged),
1307                .read_map = blkiocg_file_read_map,
1308        },
1309        {
1310                .name = "io_queued",
1311                .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1312                                BLKIO_PROP_io_queued),
1313                .read_map = blkiocg_file_read_map,
1314        },
1315        {
1316                .name = "reset_stats",
1317                .write_u64 = blkiocg_reset_stats,
1318        },
1319#ifdef CONFIG_BLK_DEV_THROTTLING
1320        {
1321                .name = "throttle.read_bps_device",
1322                .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1323                                BLKIO_THROTL_read_bps_device),
1324                .read_seq_string = blkiocg_file_read,
1325                .write_string = blkiocg_file_write,
1326                .max_write_len = 256,
1327        },
1328
1329        {
1330                .name = "throttle.write_bps_device",
1331                .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1332                                BLKIO_THROTL_write_bps_device),
1333                .read_seq_string = blkiocg_file_read,
1334                .write_string = blkiocg_file_write,
1335                .max_write_len = 256,
1336        },
1337
1338        {
1339                .name = "throttle.read_iops_device",
1340                .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1341                                BLKIO_THROTL_read_iops_device),
1342                .read_seq_string = blkiocg_file_read,
1343                .write_string = blkiocg_file_write,
1344                .max_write_len = 256,
1345        },
1346
1347        {
1348                .name = "throttle.write_iops_device",
1349                .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1350                                BLKIO_THROTL_write_iops_device),
1351                .read_seq_string = blkiocg_file_read,
1352                .write_string = blkiocg_file_write,
1353                .max_write_len = 256,
1354        },
1355        {
1356                .name = "throttle.io_service_bytes",
1357                .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1358                                BLKIO_THROTL_io_service_bytes),
1359                .read_map = blkiocg_file_read_map,
1360        },
1361        {
1362                .name = "throttle.io_serviced",
1363                .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1364                                BLKIO_THROTL_io_serviced),
1365                .read_map = blkiocg_file_read_map,
1366        },
1367#endif /* CONFIG_BLK_DEV_THROTTLING */
1368
1369#ifdef CONFIG_DEBUG_BLK_CGROUP
1370        {
1371                .name = "avg_queue_size",
1372                .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1373                                BLKIO_PROP_avg_queue_size),
1374                .read_map = blkiocg_file_read_map,
1375        },
1376        {
1377                .name = "group_wait_time",
1378                .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1379                                BLKIO_PROP_group_wait_time),
1380                .read_map = blkiocg_file_read_map,
1381        },
1382        {
1383                .name = "idle_time",
1384                .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1385                                BLKIO_PROP_idle_time),
1386                .read_map = blkiocg_file_read_map,
1387        },
1388        {
1389                .name = "empty_time",
1390                .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1391                                BLKIO_PROP_empty_time),
1392                .read_map = blkiocg_file_read_map,
1393        },
1394        {
1395                .name = "dequeue",
1396                .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1397                                BLKIO_PROP_dequeue),
1398                .read_map = blkiocg_file_read_map,
1399        },
1400        {
1401                .name = "unaccounted_time",
1402                .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1403                                BLKIO_PROP_unaccounted_time),
1404                .read_map = blkiocg_file_read_map,
1405        },
1406#endif
1407};
1408
1409static int blkiocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup)
1410{
1411        return cgroup_add_files(cgroup, subsys, blkio_files,
1412                                ARRAY_SIZE(blkio_files));
1413}
1414
1415static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
1416{
1417        struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
1418        unsigned long flags;
1419        struct blkio_group *blkg;
1420        void *key;
1421        struct blkio_policy_type *blkiop;
1422        struct blkio_policy_node *pn, *pntmp;
1423
1424        rcu_read_lock();
1425        do {
1426                spin_lock_irqsave(&blkcg->lock, flags);
1427
1428                if (hlist_empty(&blkcg->blkg_list)) {
1429                        spin_unlock_irqrestore(&blkcg->lock, flags);
1430                        break;
1431                }
1432
1433                blkg = hlist_entry(blkcg->blkg_list.first, struct blkio_group,
1434                                        blkcg_node);
1435                key = rcu_dereference(blkg->key);
1436                __blkiocg_del_blkio_group(blkg);
1437
1438                spin_unlock_irqrestore(&blkcg->lock, flags);
1439
1440                /*
1441                 * This blkio_group is being unlinked as associated cgroup is
1442                 * going away. Let all the IO controlling policies know about
1443                 * this event.
1444                 */
1445                spin_lock(&blkio_list_lock);
1446                list_for_each_entry(blkiop, &blkio_list, list) {
1447                        if (blkiop->plid != blkg->plid)
1448                                continue;
1449                        blkiop->ops.blkio_unlink_group_fn(key, blkg);
1450                }
1451                spin_unlock(&blkio_list_lock);
1452        } while (1);
1453
1454        list_for_each_entry_safe(pn, pntmp, &blkcg->policy_list, node) {
1455                blkio_policy_delete_node(pn);
1456                kfree(pn);
1457        }
1458
1459        free_css_id(&blkio_subsys, &blkcg->css);
1460        rcu_read_unlock();
1461        if (blkcg != &blkio_root_cgroup)
1462                kfree(blkcg);
1463}
1464
1465static struct cgroup_subsys_state *
1466blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup)
1467{
1468        struct blkio_cgroup *blkcg;
1469        struct cgroup *parent = cgroup->parent;
1470
1471        if (!parent) {
1472                blkcg = &blkio_root_cgroup;
1473                goto done;
1474        }
1475
1476        blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
1477        if (!blkcg)
1478                return ERR_PTR(-ENOMEM);
1479
1480        blkcg->weight = BLKIO_WEIGHT_DEFAULT;
1481done:
1482        spin_lock_init(&blkcg->lock);
1483        INIT_HLIST_HEAD(&blkcg->blkg_list);
1484
1485        INIT_LIST_HEAD(&blkcg->policy_list);
1486        return &blkcg->css;
1487}
1488
1489/*
1490 * We cannot support shared io contexts, as we have no mean to support
1491 * two tasks with the same ioc in two different groups without major rework
1492 * of the main cic data structures.  For now we allow a task to change
1493 * its cgroup only if it's the only owner of its ioc.
1494 */
1495static int blkiocg_can_attach(struct cgroup_subsys *subsys,
1496                                struct cgroup *cgroup, struct task_struct *tsk,
1497                                bool threadgroup)
1498{
1499        struct io_context *ioc;
1500        int ret = 0;
1501
1502        /* task_lock() is needed to avoid races with exit_io_context() */
1503        task_lock(tsk);
1504        ioc = tsk->io_context;
1505        if (ioc && atomic_read(&ioc->nr_tasks) > 1)
1506                ret = -EINVAL;
1507        task_unlock(tsk);
1508
1509        return ret;
1510}
1511
1512static void blkiocg_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup,
1513                                struct cgroup *prev, struct task_struct *tsk,
1514                                bool threadgroup)
1515{
1516        struct io_context *ioc;
1517
1518        task_lock(tsk);
1519        ioc = tsk->io_context;
1520        if (ioc)
1521                ioc->cgroup_changed = 1;
1522        task_unlock(tsk);
1523}
1524
1525void blkio_policy_register(struct blkio_policy_type *blkiop)
1526{
1527        spin_lock(&blkio_list_lock);
1528        list_add_tail(&blkiop->list, &blkio_list);
1529        spin_unlock(&blkio_list_lock);
1530}
1531EXPORT_SYMBOL_GPL(blkio_policy_register);
1532
1533void blkio_policy_unregister(struct blkio_policy_type *blkiop)
1534{
1535        spin_lock(&blkio_list_lock);
1536        list_del_init(&blkiop->list);
1537        spin_unlock(&blkio_list_lock);
1538}
1539EXPORT_SYMBOL_GPL(blkio_policy_unregister);
1540
1541static int __init init_cgroup_blkio(void)
1542{
1543        return cgroup_load_subsys(&blkio_subsys);
1544}
1545
1546static void __exit exit_cgroup_blkio(void)
1547{
1548        cgroup_unload_subsys(&blkio_subsys);
1549}
1550
1551module_init(init_cgroup_blkio);
1552module_exit(exit_cgroup_blkio);
1553MODULE_LICENSE("GPL");
1554