linux/include/linux/blk-cgroup.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2#ifndef _BLK_CGROUP_H
   3#define _BLK_CGROUP_H
   4/*
   5 * Common Block IO controller cgroup interface
   6 *
   7 * Based on ideas and code from CFQ, CFS and BFQ:
   8 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
   9 *
  10 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
  11 *                    Paolo Valente <paolo.valente@unimore.it>
  12 *
  13 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
  14 *                    Nauman Rafique <nauman@google.com>
  15 */
  16
  17#include <linux/cgroup.h>
  18#include <linux/percpu.h>
  19#include <linux/percpu_counter.h>
  20#include <linux/u64_stats_sync.h>
  21#include <linux/seq_file.h>
  22#include <linux/radix-tree.h>
  23#include <linux/blkdev.h>
  24#include <linux/atomic.h>
  25#include <linux/kthread.h>
  26#include <linux/fs.h>
  27
  28/* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */
  29#define BLKG_STAT_CPU_BATCH     (INT_MAX / 2)
  30
  31/* Max limits for throttle policy */
  32#define THROTL_IOPS_MAX         UINT_MAX
  33
  34#ifdef CONFIG_BLK_CGROUP
  35
  36enum blkg_iostat_type {
  37        BLKG_IOSTAT_READ,
  38        BLKG_IOSTAT_WRITE,
  39        BLKG_IOSTAT_DISCARD,
  40
  41        BLKG_IOSTAT_NR,
  42};
  43
  44struct blkcg_gq;
  45
  46struct blkcg {
  47        struct cgroup_subsys_state      css;
  48        spinlock_t                      lock;
  49        refcount_t                      online_pin;
  50
  51        struct radix_tree_root          blkg_tree;
  52        struct blkcg_gq __rcu           *blkg_hint;
  53        struct hlist_head               blkg_list;
  54
  55        struct blkcg_policy_data        *cpd[BLKCG_MAX_POLS];
  56
  57        struct list_head                all_blkcgs_node;
  58#ifdef CONFIG_CGROUP_WRITEBACK
  59        struct list_head                cgwb_list;
  60#endif
  61};
  62
  63struct blkg_iostat {
  64        u64                             bytes[BLKG_IOSTAT_NR];
  65        u64                             ios[BLKG_IOSTAT_NR];
  66};
  67
  68struct blkg_iostat_set {
  69        struct u64_stats_sync           sync;
  70        struct blkg_iostat              cur;
  71        struct blkg_iostat              last;
  72};
  73
  74/*
  75 * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a
  76 * request_queue (q).  This is used by blkcg policies which need to track
  77 * information per blkcg - q pair.
  78 *
  79 * There can be multiple active blkcg policies and each blkg:policy pair is
  80 * represented by a blkg_policy_data which is allocated and freed by each
  81 * policy's pd_alloc/free_fn() methods.  A policy can allocate private data
  82 * area by allocating larger data structure which embeds blkg_policy_data
  83 * at the beginning.
  84 */
  85struct blkg_policy_data {
  86        /* the blkg and policy id this per-policy data belongs to */
  87        struct blkcg_gq                 *blkg;
  88        int                             plid;
  89};
  90
  91/*
  92 * Policies that need to keep per-blkcg data which is independent from any
  93 * request_queue associated to it should implement cpd_alloc/free_fn()
  94 * methods.  A policy can allocate private data area by allocating larger
  95 * data structure which embeds blkcg_policy_data at the beginning.
  96 * cpd_init() is invoked to let each policy handle per-blkcg data.
  97 */
  98struct blkcg_policy_data {
  99        /* the blkcg and policy id this per-policy data belongs to */
 100        struct blkcg                    *blkcg;
 101        int                             plid;
 102};
 103
 104/* association between a blk cgroup and a request queue */
 105struct blkcg_gq {
 106        /* Pointer to the associated request_queue */
 107        struct request_queue            *q;
 108        struct list_head                q_node;
 109        struct hlist_node               blkcg_node;
 110        struct blkcg                    *blkcg;
 111
 112        /*
 113         * Each blkg gets congested separately and the congestion state is
 114         * propagated to the matching bdi_writeback_congested.
 115         */
 116        struct bdi_writeback_congested  *wb_congested;
 117
 118        /* all non-root blkcg_gq's are guaranteed to have access to parent */
 119        struct blkcg_gq                 *parent;
 120
 121        /* reference count */
 122        struct percpu_ref               refcnt;
 123
 124        /* is this blkg online? protected by both blkcg and q locks */
 125        bool                            online;
 126
 127        struct blkg_iostat_set __percpu *iostat_cpu;
 128        struct blkg_iostat_set          iostat;
 129
 130        struct blkg_policy_data         *pd[BLKCG_MAX_POLS];
 131
 132        spinlock_t                      async_bio_lock;
 133        struct bio_list                 async_bios;
 134        struct work_struct              async_bio_work;
 135
 136        atomic_t                        use_delay;
 137        atomic64_t                      delay_nsec;
 138        atomic64_t                      delay_start;
 139        u64                             last_delay;
 140        int                             last_use;
 141
 142        struct rcu_head                 rcu_head;
 143};
 144
 145typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp);
 146typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd);
 147typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd);
 148typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd);
 149typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp,
 150                                struct request_queue *q, struct blkcg *blkcg);
 151typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd);
 152typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
 153typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
 154typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd);
 155typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd);
 156typedef size_t (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, char *buf,
 157                                      size_t size);
 158
 159struct blkcg_policy {
 160        int                             plid;
 161        /* cgroup files for the policy */
 162        struct cftype                   *dfl_cftypes;
 163        struct cftype                   *legacy_cftypes;
 164
 165        /* operations */
 166        blkcg_pol_alloc_cpd_fn          *cpd_alloc_fn;
 167        blkcg_pol_init_cpd_fn           *cpd_init_fn;
 168        blkcg_pol_free_cpd_fn           *cpd_free_fn;
 169        blkcg_pol_bind_cpd_fn           *cpd_bind_fn;
 170
 171        blkcg_pol_alloc_pd_fn           *pd_alloc_fn;
 172        blkcg_pol_init_pd_fn            *pd_init_fn;
 173        blkcg_pol_online_pd_fn          *pd_online_fn;
 174        blkcg_pol_offline_pd_fn         *pd_offline_fn;
 175        blkcg_pol_free_pd_fn            *pd_free_fn;
 176        blkcg_pol_reset_pd_stats_fn     *pd_reset_stats_fn;
 177        blkcg_pol_stat_pd_fn            *pd_stat_fn;
 178};
 179
 180extern struct blkcg blkcg_root;
 181extern struct cgroup_subsys_state * const blkcg_root_css;
 182extern bool blkcg_debug_stats;
 183
 184struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
 185                                      struct request_queue *q, bool update_hint);
 186struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
 187                                      struct request_queue *q);
 188struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
 189                                    struct request_queue *q);
 190int blkcg_init_queue(struct request_queue *q);
 191void blkcg_exit_queue(struct request_queue *q);
 192
 193/* Blkio controller policy registration */
 194int blkcg_policy_register(struct blkcg_policy *pol);
 195void blkcg_policy_unregister(struct blkcg_policy *pol);
 196int blkcg_activate_policy(struct request_queue *q,
 197                          const struct blkcg_policy *pol);
 198void blkcg_deactivate_policy(struct request_queue *q,
 199                             const struct blkcg_policy *pol);
 200
 201const char *blkg_dev_name(struct blkcg_gq *blkg);
 202void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
 203                       u64 (*prfill)(struct seq_file *,
 204                                     struct blkg_policy_data *, int),
 205                       const struct blkcg_policy *pol, int data,
 206                       bool show_total);
 207u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);
 208
 209struct blkg_conf_ctx {
 210        struct gendisk                  *disk;
 211        struct blkcg_gq                 *blkg;
 212        char                            *body;
 213};
 214
 215struct gendisk *blkcg_conf_get_disk(char **inputp);
 216int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 217                   char *input, struct blkg_conf_ctx *ctx);
 218void blkg_conf_finish(struct blkg_conf_ctx *ctx);
 219
 220/**
 221 * blkcg_css - find the current css
 222 *
 223 * Find the css associated with either the kthread or the current task.
 224 * This may return a dying css, so it is up to the caller to use tryget logic
 225 * to confirm it is alive and well.
 226 */
 227static inline struct cgroup_subsys_state *blkcg_css(void)
 228{
 229        struct cgroup_subsys_state *css;
 230
 231        css = kthread_blkcg();
 232        if (css)
 233                return css;
 234        return task_css(current, io_cgrp_id);
 235}
 236
 237static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
 238{
 239        return css ? container_of(css, struct blkcg, css) : NULL;
 240}
 241
 242/**
 243 * __bio_blkcg - internal, inconsistent version to get blkcg
 244 *
 245 * DO NOT USE.
 246 * This function is inconsistent and consequently is dangerous to use.  The
 247 * first part of the function returns a blkcg where a reference is owned by the
 248 * bio.  This means it does not need to be rcu protected as it cannot go away
 249 * with the bio owning a reference to it.  However, the latter potentially gets
 250 * it from task_css().  This can race against task migration and the cgroup
 251 * dying.  It is also semantically different as it must be called rcu protected
 252 * and is susceptible to failure when trying to get a reference to it.
 253 * Therefore, it is not ok to assume that *_get() will always succeed on the
 254 * blkcg returned here.
 255 */
 256static inline struct blkcg *__bio_blkcg(struct bio *bio)
 257{
 258        if (bio && bio->bi_blkg)
 259                return bio->bi_blkg->blkcg;
 260        return css_to_blkcg(blkcg_css());
 261}
 262
 263/**
 264 * bio_blkcg - grab the blkcg associated with a bio
 265 * @bio: target bio
 266 *
 267 * This returns the blkcg associated with a bio, %NULL if not associated.
 268 * Callers are expected to either handle %NULL or know association has been
 269 * done prior to calling this.
 270 */
 271static inline struct blkcg *bio_blkcg(struct bio *bio)
 272{
 273        if (bio && bio->bi_blkg)
 274                return bio->bi_blkg->blkcg;
 275        return NULL;
 276}
 277
 278static inline bool blk_cgroup_congested(void)
 279{
 280        struct cgroup_subsys_state *css;
 281        bool ret = false;
 282
 283        rcu_read_lock();
 284        css = kthread_blkcg();
 285        if (!css)
 286                css = task_css(current, io_cgrp_id);
 287        while (css) {
 288                if (atomic_read(&css->cgroup->congestion_count)) {
 289                        ret = true;
 290                        break;
 291                }
 292                css = css->parent;
 293        }
 294        rcu_read_unlock();
 295        return ret;
 296}
 297
 298/**
 299 * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
 300 * @return: true if this bio needs to be submitted with the root blkg context.
 301 *
 302 * In order to avoid priority inversions we sometimes need to issue a bio as if
 303 * it were attached to the root blkg, and then backcharge to the actual owning
 304 * blkg.  The idea is we do bio_blkcg() to look up the actual context for the
 305 * bio and attach the appropriate blkg to the bio.  Then we call this helper and
 306 * if it is true run with the root blkg for that queue and then do any
 307 * backcharging to the originating cgroup once the io is complete.
 308 */
 309static inline bool bio_issue_as_root_blkg(struct bio *bio)
 310{
 311        return (bio->bi_opf & (REQ_META | REQ_SWAP)) != 0;
 312}
 313
 314/**
 315 * blkcg_parent - get the parent of a blkcg
 316 * @blkcg: blkcg of interest
 317 *
 318 * Return the parent blkcg of @blkcg.  Can be called anytime.
 319 */
 320static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
 321{
 322        return css_to_blkcg(blkcg->css.parent);
 323}
 324
 325/**
 326 * __blkg_lookup - internal version of blkg_lookup()
 327 * @blkcg: blkcg of interest
 328 * @q: request_queue of interest
 329 * @update_hint: whether to update lookup hint with the result or not
 330 *
 331 * This is internal version and shouldn't be used by policy
 332 * implementations.  Looks up blkgs for the @blkcg - @q pair regardless of
 333 * @q's bypass state.  If @update_hint is %true, the caller should be
 334 * holding @q->queue_lock and lookup hint is updated on success.
 335 */
 336static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
 337                                             struct request_queue *q,
 338                                             bool update_hint)
 339{
 340        struct blkcg_gq *blkg;
 341
 342        if (blkcg == &blkcg_root)
 343                return q->root_blkg;
 344
 345        blkg = rcu_dereference(blkcg->blkg_hint);
 346        if (blkg && blkg->q == q)
 347                return blkg;
 348
 349        return blkg_lookup_slowpath(blkcg, q, update_hint);
 350}
 351
 352/**
 353 * blkg_lookup - lookup blkg for the specified blkcg - q pair
 354 * @blkcg: blkcg of interest
 355 * @q: request_queue of interest
 356 *
 357 * Lookup blkg for the @blkcg - @q pair.  This function should be called
 358 * under RCU read lock.
 359 */
 360static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
 361                                           struct request_queue *q)
 362{
 363        WARN_ON_ONCE(!rcu_read_lock_held());
 364        return __blkg_lookup(blkcg, q, false);
 365}
 366
 367/**
 368 * blk_queue_root_blkg - return blkg for the (blkcg_root, @q) pair
 369 * @q: request_queue of interest
 370 *
 371 * Lookup blkg for @q at the root level. See also blkg_lookup().
 372 */
 373static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
 374{
 375        return q->root_blkg;
 376}
 377
 378/**
 379 * blkg_to_pdata - get policy private data
 380 * @blkg: blkg of interest
 381 * @pol: policy of interest
 382 *
 383 * Return pointer to private data associated with the @blkg-@pol pair.
 384 */
 385static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
 386                                                  struct blkcg_policy *pol)
 387{
 388        return blkg ? blkg->pd[pol->plid] : NULL;
 389}
 390
 391static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg,
 392                                                     struct blkcg_policy *pol)
 393{
 394        return blkcg ? blkcg->cpd[pol->plid] : NULL;
 395}
 396
 397/**
 398 * pdata_to_blkg - get blkg associated with policy private data
 399 * @pd: policy private data of interest
 400 *
 401 * @pd is policy private data.  Determine the blkg it's associated with.
 402 */
 403static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd)
 404{
 405        return pd ? pd->blkg : NULL;
 406}
 407
 408static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd)
 409{
 410        return cpd ? cpd->blkcg : NULL;
 411}
 412
 413extern void blkcg_destroy_blkgs(struct blkcg *blkcg);
 414
 415/**
 416 * blkcg_pin_online - pin online state
 417 * @blkcg: blkcg of interest
 418 *
 419 * While pinned, a blkcg is kept online.  This is primarily used to
 420 * impedance-match blkg and cgwb lifetimes so that blkg doesn't go offline
 421 * while an associated cgwb is still active.
 422 */
 423static inline void blkcg_pin_online(struct blkcg *blkcg)
 424{
 425        refcount_inc(&blkcg->online_pin);
 426}
 427
 428/**
 429 * blkcg_unpin_online - unpin online state
 430 * @blkcg: blkcg of interest
 431 *
 432 * This is primarily used to impedance-match blkg and cgwb lifetimes so
 433 * that blkg doesn't go offline while an associated cgwb is still active.
 434 * When this count goes to zero, all active cgwbs have finished so the
 435 * blkcg can continue destruction by calling blkcg_destroy_blkgs().
 436 */
 437static inline void blkcg_unpin_online(struct blkcg *blkcg)
 438{
 439        do {
 440                if (!refcount_dec_and_test(&blkcg->online_pin))
 441                        break;
 442                blkcg_destroy_blkgs(blkcg);
 443                blkcg = blkcg_parent(blkcg);
 444        } while (blkcg);
 445}
 446
 447/**
 448 * blkg_path - format cgroup path of blkg
 449 * @blkg: blkg of interest
 450 * @buf: target buffer
 451 * @buflen: target buffer length
 452 *
 453 * Format the path of the cgroup of @blkg into @buf.
 454 */
 455static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
 456{
 457        return cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
 458}
 459
 460/**
 461 * blkg_get - get a blkg reference
 462 * @blkg: blkg to get
 463 *
 464 * The caller should be holding an existing reference.
 465 */
 466static inline void blkg_get(struct blkcg_gq *blkg)
 467{
 468        percpu_ref_get(&blkg->refcnt);
 469}
 470
 471/**
 472 * blkg_tryget - try and get a blkg reference
 473 * @blkg: blkg to get
 474 *
 475 * This is for use when doing an RCU lookup of the blkg.  We may be in the midst
 476 * of freeing this blkg, so we can only use it if the refcnt is not zero.
 477 */
 478static inline bool blkg_tryget(struct blkcg_gq *blkg)
 479{
 480        return blkg && percpu_ref_tryget(&blkg->refcnt);
 481}
 482
 483/**
 484 * blkg_tryget_closest - try and get a blkg ref on the closet blkg
 485 * @blkg: blkg to get
 486 *
 487 * This needs to be called rcu protected.  As the failure mode here is to walk
 488 * up the blkg tree, this ensure that the blkg->parent pointers are always
 489 * valid.  This returns the blkg that it ended up taking a reference on or %NULL
 490 * if no reference was taken.
 491 */
 492static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg)
 493{
 494        struct blkcg_gq *ret_blkg = NULL;
 495
 496        WARN_ON_ONCE(!rcu_read_lock_held());
 497
 498        while (blkg) {
 499                if (blkg_tryget(blkg)) {
 500                        ret_blkg = blkg;
 501                        break;
 502                }
 503                blkg = blkg->parent;
 504        }
 505
 506        return ret_blkg;
 507}
 508
 509/**
 510 * blkg_put - put a blkg reference
 511 * @blkg: blkg to put
 512 */
 513static inline void blkg_put(struct blkcg_gq *blkg)
 514{
 515        percpu_ref_put(&blkg->refcnt);
 516}
 517
 518/**
 519 * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants
 520 * @d_blkg: loop cursor pointing to the current descendant
 521 * @pos_css: used for iteration
 522 * @p_blkg: target blkg to walk descendants of
 523 *
 524 * Walk @c_blkg through the descendants of @p_blkg.  Must be used with RCU
 525 * read locked.  If called under either blkcg or queue lock, the iteration
 526 * is guaranteed to include all and only online blkgs.  The caller may
 527 * update @pos_css by calling css_rightmost_descendant() to skip subtree.
 528 * @p_blkg is included in the iteration and the first node to be visited.
 529 */
 530#define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg)           \
 531        css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css)   \
 532                if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css),    \
 533                                              (p_blkg)->q, false)))
 534
 535/**
 536 * blkg_for_each_descendant_post - post-order walk of a blkg's descendants
 537 * @d_blkg: loop cursor pointing to the current descendant
 538 * @pos_css: used for iteration
 539 * @p_blkg: target blkg to walk descendants of
 540 *
 541 * Similar to blkg_for_each_descendant_pre() but performs post-order
 542 * traversal instead.  Synchronization rules are the same.  @p_blkg is
 543 * included in the iteration and the last node to be visited.
 544 */
 545#define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg)          \
 546        css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css)  \
 547                if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css),    \
 548                                              (p_blkg)->q, false)))
 549
 550#ifdef CONFIG_BLK_DEV_THROTTLING
 551extern bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
 552                           struct bio *bio);
 553#else
 554static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
 555                                  struct bio *bio) { return false; }
 556#endif
 557
 558bool __blkcg_punt_bio_submit(struct bio *bio);
 559
 560static inline bool blkcg_punt_bio_submit(struct bio *bio)
 561{
 562        if (bio->bi_opf & REQ_CGROUP_PUNT)
 563                return __blkcg_punt_bio_submit(bio);
 564        else
 565                return false;
 566}
 567
 568static inline void blkcg_bio_issue_init(struct bio *bio)
 569{
 570        bio_issue_init(&bio->bi_issue, bio_sectors(bio));
 571}
 572
 573static inline bool blkcg_bio_issue_check(struct request_queue *q,
 574                                         struct bio *bio)
 575{
 576        struct blkcg_gq *blkg;
 577        bool throtl = false;
 578
 579        rcu_read_lock();
 580
 581        if (!bio->bi_blkg) {
 582                char b[BDEVNAME_SIZE];
 583
 584                WARN_ONCE(1,
 585                          "no blkg associated for bio on block-device: %s\n",
 586                          bio_devname(bio, b));
 587                bio_associate_blkg(bio);
 588        }
 589
 590        blkg = bio->bi_blkg;
 591
 592        throtl = blk_throtl_bio(q, blkg, bio);
 593
 594        if (!throtl) {
 595                struct blkg_iostat_set *bis;
 596                int rwd, cpu;
 597
 598                if (op_is_discard(bio->bi_opf))
 599                        rwd = BLKG_IOSTAT_DISCARD;
 600                else if (op_is_write(bio->bi_opf))
 601                        rwd = BLKG_IOSTAT_WRITE;
 602                else
 603                        rwd = BLKG_IOSTAT_READ;
 604
 605                cpu = get_cpu();
 606                bis = per_cpu_ptr(blkg->iostat_cpu, cpu);
 607                u64_stats_update_begin(&bis->sync);
 608
 609                /*
 610                 * If the bio is flagged with BIO_CGROUP_ACCT it means this is a
 611                 * split bio and we would have already accounted for the size of
 612                 * the bio.
 613                 */
 614                if (!bio_flagged(bio, BIO_CGROUP_ACCT)) {
 615                        bio_set_flag(bio, BIO_CGROUP_ACCT);
 616                        bis->cur.bytes[rwd] += bio->bi_iter.bi_size;
 617                }
 618                bis->cur.ios[rwd]++;
 619
 620                u64_stats_update_end(&bis->sync);
 621                if (cgroup_subsys_on_dfl(io_cgrp_subsys))
 622                        cgroup_rstat_updated(blkg->blkcg->css.cgroup, cpu);
 623                put_cpu();
 624        }
 625
 626        blkcg_bio_issue_init(bio);
 627
 628        rcu_read_unlock();
 629        return !throtl;
 630}
 631
 632static inline void blkcg_use_delay(struct blkcg_gq *blkg)
 633{
 634        if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0))
 635                return;
 636        if (atomic_add_return(1, &blkg->use_delay) == 1)
 637                atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
 638}
 639
 640static inline int blkcg_unuse_delay(struct blkcg_gq *blkg)
 641{
 642        int old = atomic_read(&blkg->use_delay);
 643
 644        if (WARN_ON_ONCE(old < 0))
 645                return 0;
 646        if (old == 0)
 647                return 0;
 648
 649        /*
 650         * We do this song and dance because we can race with somebody else
 651         * adding or removing delay.  If we just did an atomic_dec we'd end up
 652         * negative and we'd already be in trouble.  We need to subtract 1 and
 653         * then check to see if we were the last delay so we can drop the
 654         * congestion count on the cgroup.
 655         */
 656        while (old) {
 657                int cur = atomic_cmpxchg(&blkg->use_delay, old, old - 1);
 658                if (cur == old)
 659                        break;
 660                old = cur;
 661        }
 662
 663        if (old == 0)
 664                return 0;
 665        if (old == 1)
 666                atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
 667        return 1;
 668}
 669
 670/**
 671 * blkcg_set_delay - Enable allocator delay mechanism with the specified delay amount
 672 * @blkg: target blkg
 673 * @delay: delay duration in nsecs
 674 *
 675 * When enabled with this function, the delay is not decayed and must be
 676 * explicitly cleared with blkcg_clear_delay(). Must not be mixed with
 677 * blkcg_[un]use_delay() and blkcg_add_delay() usages.
 678 */
 679static inline void blkcg_set_delay(struct blkcg_gq *blkg, u64 delay)
 680{
 681        int old = atomic_read(&blkg->use_delay);
 682
 683        /* We only want 1 person setting the congestion count for this blkg. */
 684        if (!old && atomic_cmpxchg(&blkg->use_delay, old, -1) == old)
 685                atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
 686
 687        atomic64_set(&blkg->delay_nsec, delay);
 688}
 689
 690/**
 691 * blkcg_clear_delay - Disable allocator delay mechanism
 692 * @blkg: target blkg
 693 *
 694 * Disable use_delay mechanism. See blkcg_set_delay().
 695 */
 696static inline void blkcg_clear_delay(struct blkcg_gq *blkg)
 697{
 698        int old = atomic_read(&blkg->use_delay);
 699
 700        /* We only want 1 person clearing the congestion count for this blkg. */
 701        if (old && atomic_cmpxchg(&blkg->use_delay, old, 0) == old)
 702                atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
 703}
 704
 705void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta);
 706void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay);
 707void blkcg_maybe_throttle_current(void);
 708#else   /* CONFIG_BLK_CGROUP */
 709
 710struct blkcg {
 711};
 712
 713struct blkg_policy_data {
 714};
 715
 716struct blkcg_policy_data {
 717};
 718
 719struct blkcg_gq {
 720};
 721
 722struct blkcg_policy {
 723};
 724
 725#define blkcg_root_css  ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL))
 726
 727static inline void blkcg_maybe_throttle_current(void) { }
 728static inline bool blk_cgroup_congested(void) { return false; }
 729
 730#ifdef CONFIG_BLOCK
 731
 732static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { }
 733
 734static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
 735static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
 736{ return NULL; }
 737static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
 738static inline void blkcg_exit_queue(struct request_queue *q) { }
 739static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
 740static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { }
 741static inline int blkcg_activate_policy(struct request_queue *q,
 742                                        const struct blkcg_policy *pol) { return 0; }
 743static inline void blkcg_deactivate_policy(struct request_queue *q,
 744                                           const struct blkcg_policy *pol) { }
 745
 746static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; }
 747static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
 748
 749static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
 750                                                  struct blkcg_policy *pol) { return NULL; }
 751static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
 752static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
 753static inline void blkg_get(struct blkcg_gq *blkg) { }
 754static inline void blkg_put(struct blkcg_gq *blkg) { }
 755
 756static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; }
 757static inline void blkcg_bio_issue_init(struct bio *bio) { }
 758static inline bool blkcg_bio_issue_check(struct request_queue *q,
 759                                         struct bio *bio) { return true; }
 760
 761#define blk_queue_for_each_rl(rl, q)    \
 762        for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
 763
 764#endif  /* CONFIG_BLOCK */
 765#endif  /* CONFIG_BLK_CGROUP */
 766#endif  /* _BLK_CGROUP_H */
 767