linux/include/linux/backing-dev.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2/*
   3 * include/linux/backing-dev.h
   4 *
   5 * low-level device information and state which is propagated up through
   6 * to high-level code.
   7 */
   8
   9#ifndef _LINUX_BACKING_DEV_H
  10#define _LINUX_BACKING_DEV_H
  11
  12#include <linux/kernel.h>
  13#include <linux/fs.h>
  14#include <linux/sched.h>
  15#include <linux/blkdev.h>
  16#include <linux/device.h>
  17#include <linux/writeback.h>
  18#include <linux/blk-cgroup.h>
  19#include <linux/backing-dev-defs.h>
  20#include <linux/slab.h>
  21
  22static inline struct backing_dev_info *bdi_get(struct backing_dev_info *bdi)
  23{
  24        kref_get(&bdi->refcnt);
  25        return bdi;
  26}
  27
  28struct backing_dev_info *bdi_get_by_id(u64 id);
  29void bdi_put(struct backing_dev_info *bdi);
  30
  31__printf(2, 3)
  32int bdi_register(struct backing_dev_info *bdi, const char *fmt, ...);
  33__printf(2, 0)
  34int bdi_register_va(struct backing_dev_info *bdi, const char *fmt,
  35                    va_list args);
  36void bdi_set_owner(struct backing_dev_info *bdi, struct device *owner);
  37void bdi_unregister(struct backing_dev_info *bdi);
  38
  39struct backing_dev_info *bdi_alloc(int node_id);
  40
  41void wb_start_background_writeback(struct bdi_writeback *wb);
  42void wb_workfn(struct work_struct *work);
  43void wb_wakeup_delayed(struct bdi_writeback *wb);
  44
  45void wb_wait_for_completion(struct wb_completion *done);
  46
  47extern spinlock_t bdi_lock;
  48extern struct list_head bdi_list;
  49
  50extern struct workqueue_struct *bdi_wq;
  51extern struct workqueue_struct *bdi_async_bio_wq;
  52
  53static inline bool wb_has_dirty_io(struct bdi_writeback *wb)
  54{
  55        return test_bit(WB_has_dirty_io, &wb->state);
  56}
  57
  58static inline bool bdi_has_dirty_io(struct backing_dev_info *bdi)
  59{
  60        /*
  61         * @bdi->tot_write_bandwidth is guaranteed to be > 0 if there are
  62         * any dirty wbs.  See wb_update_write_bandwidth().
  63         */
  64        return atomic_long_read(&bdi->tot_write_bandwidth);
  65}
  66
  67static inline void __add_wb_stat(struct bdi_writeback *wb,
  68                                 enum wb_stat_item item, s64 amount)
  69{
  70        percpu_counter_add_batch(&wb->stat[item], amount, WB_STAT_BATCH);
  71}
  72
  73static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
  74{
  75        __add_wb_stat(wb, item, 1);
  76}
  77
  78static inline void dec_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
  79{
  80        __add_wb_stat(wb, item, -1);
  81}
  82
  83static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
  84{
  85        return percpu_counter_read_positive(&wb->stat[item]);
  86}
  87
  88static inline s64 wb_stat_sum(struct bdi_writeback *wb, enum wb_stat_item item)
  89{
  90        return percpu_counter_sum_positive(&wb->stat[item]);
  91}
  92
  93extern void wb_writeout_inc(struct bdi_writeback *wb);
  94
  95/*
  96 * maximal error of a stat counter.
  97 */
  98static inline unsigned long wb_stat_error(void)
  99{
 100#ifdef CONFIG_SMP
 101        return nr_cpu_ids * WB_STAT_BATCH;
 102#else
 103        return 1;
 104#endif
 105}
 106
 107int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio);
 108int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
 109
 110/*
 111 * Flags in backing_dev_info::capability
 112 *
 113 * BDI_CAP_WRITEBACK:           Supports dirty page writeback, and dirty pages
 114 *                              should contribute to accounting
 115 * BDI_CAP_WRITEBACK_ACCT:      Automatically account writeback pages
 116 * BDI_CAP_STRICTLIMIT:         Keep number of dirty pages below bdi threshold
 117 */
 118#define BDI_CAP_WRITEBACK               (1 << 0)
 119#define BDI_CAP_WRITEBACK_ACCT          (1 << 1)
 120#define BDI_CAP_STRICTLIMIT             (1 << 2)
 121
 122extern struct backing_dev_info noop_backing_dev_info;
 123
 124/**
 125 * writeback_in_progress - determine whether there is writeback in progress
 126 * @wb: bdi_writeback of interest
 127 *
 128 * Determine whether there is writeback waiting to be handled against a
 129 * bdi_writeback.
 130 */
 131static inline bool writeback_in_progress(struct bdi_writeback *wb)
 132{
 133        return test_bit(WB_writeback_running, &wb->state);
 134}
 135
 136static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
 137{
 138        struct super_block *sb;
 139
 140        if (!inode)
 141                return &noop_backing_dev_info;
 142
 143        sb = inode->i_sb;
 144#ifdef CONFIG_BLOCK
 145        if (sb_is_blkdev_sb(sb))
 146                return I_BDEV(inode)->bd_disk->bdi;
 147#endif
 148        return sb->s_bdi;
 149}
 150
 151static inline int wb_congested(struct bdi_writeback *wb, int cong_bits)
 152{
 153        return wb->congested & cong_bits;
 154}
 155
 156long congestion_wait(int sync, long timeout);
 157long wait_iff_congested(int sync, long timeout);
 158
 159static inline bool mapping_can_writeback(struct address_space *mapping)
 160{
 161        return inode_to_bdi(mapping->host)->capabilities & BDI_CAP_WRITEBACK;
 162}
 163
 164static inline int bdi_sched_wait(void *word)
 165{
 166        schedule();
 167        return 0;
 168}
 169
 170#ifdef CONFIG_CGROUP_WRITEBACK
 171
 172struct bdi_writeback *wb_get_lookup(struct backing_dev_info *bdi,
 173                                    struct cgroup_subsys_state *memcg_css);
 174struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
 175                                    struct cgroup_subsys_state *memcg_css,
 176                                    gfp_t gfp);
 177void wb_memcg_offline(struct mem_cgroup *memcg);
 178void wb_blkcg_offline(struct blkcg *blkcg);
 179int inode_congested(struct inode *inode, int cong_bits);
 180
 181/**
 182 * inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode
 183 * @inode: inode of interest
 184 *
 185 * Cgroup writeback requires support from the filesystem.  Also, both memcg and
 186 * iocg have to be on the default hierarchy.  Test whether all conditions are
 187 * met.
 188 *
 189 * Note that the test result may change dynamically on the same inode
 190 * depending on how memcg and iocg are configured.
 191 */
 192static inline bool inode_cgwb_enabled(struct inode *inode)
 193{
 194        struct backing_dev_info *bdi = inode_to_bdi(inode);
 195
 196        return cgroup_subsys_on_dfl(memory_cgrp_subsys) &&
 197                cgroup_subsys_on_dfl(io_cgrp_subsys) &&
 198                (bdi->capabilities & BDI_CAP_WRITEBACK) &&
 199                (inode->i_sb->s_iflags & SB_I_CGROUPWB);
 200}
 201
 202/**
 203 * wb_find_current - find wb for %current on a bdi
 204 * @bdi: bdi of interest
 205 *
 206 * Find the wb of @bdi which matches both the memcg and blkcg of %current.
 207 * Must be called under rcu_read_lock() which protects the returend wb.
 208 * NULL if not found.
 209 */
 210static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi)
 211{
 212        struct cgroup_subsys_state *memcg_css;
 213        struct bdi_writeback *wb;
 214
 215        memcg_css = task_css(current, memory_cgrp_id);
 216        if (!memcg_css->parent)
 217                return &bdi->wb;
 218
 219        wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id);
 220
 221        /*
 222         * %current's blkcg equals the effective blkcg of its memcg.  No
 223         * need to use the relatively expensive cgroup_get_e_css().
 224         */
 225        if (likely(wb && wb->blkcg_css == task_css(current, io_cgrp_id)))
 226                return wb;
 227        return NULL;
 228}
 229
 230/**
 231 * wb_get_create_current - get or create wb for %current on a bdi
 232 * @bdi: bdi of interest
 233 * @gfp: allocation mask
 234 *
 235 * Equivalent to wb_get_create() on %current's memcg.  This function is
 236 * called from a relatively hot path and optimizes the common cases using
 237 * wb_find_current().
 238 */
 239static inline struct bdi_writeback *
 240wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp)
 241{
 242        struct bdi_writeback *wb;
 243
 244        rcu_read_lock();
 245        wb = wb_find_current(bdi);
 246        if (wb && unlikely(!wb_tryget(wb)))
 247                wb = NULL;
 248        rcu_read_unlock();
 249
 250        if (unlikely(!wb)) {
 251                struct cgroup_subsys_state *memcg_css;
 252
 253                memcg_css = task_get_css(current, memory_cgrp_id);
 254                wb = wb_get_create(bdi, memcg_css, gfp);
 255                css_put(memcg_css);
 256        }
 257        return wb;
 258}
 259
 260/**
 261 * inode_to_wb_is_valid - test whether an inode has a wb associated
 262 * @inode: inode of interest
 263 *
 264 * Returns %true if @inode has a wb associated.  May be called without any
 265 * locking.
 266 */
 267static inline bool inode_to_wb_is_valid(struct inode *inode)
 268{
 269        return inode->i_wb;
 270}
 271
 272/**
 273 * inode_to_wb - determine the wb of an inode
 274 * @inode: inode of interest
 275 *
 276 * Returns the wb @inode is currently associated with.  The caller must be
 277 * holding either @inode->i_lock, the i_pages lock, or the
 278 * associated wb's list_lock.
 279 */
 280static inline struct bdi_writeback *inode_to_wb(const struct inode *inode)
 281{
 282#ifdef CONFIG_LOCKDEP
 283        WARN_ON_ONCE(debug_locks &&
 284                     (!lockdep_is_held(&inode->i_lock) &&
 285                      !lockdep_is_held(&inode->i_mapping->i_pages.xa_lock) &&
 286                      !lockdep_is_held(&inode->i_wb->list_lock)));
 287#endif
 288        return inode->i_wb;
 289}
 290
 291static inline struct bdi_writeback *inode_to_wb_wbc(
 292                                struct inode *inode,
 293                                struct writeback_control *wbc)
 294{
 295        /*
 296         * If wbc does not have inode attached, it means cgroup writeback was
 297         * disabled when wbc started. Just use the default wb in that case.
 298         */
 299        return wbc->wb ? wbc->wb : &inode_to_bdi(inode)->wb;
 300}
 301
 302/**
 303 * unlocked_inode_to_wb_begin - begin unlocked inode wb access transaction
 304 * @inode: target inode
 305 * @cookie: output param, to be passed to the end function
 306 *
 307 * The caller wants to access the wb associated with @inode but isn't
 308 * holding inode->i_lock, the i_pages lock or wb->list_lock.  This
 309 * function determines the wb associated with @inode and ensures that the
 310 * association doesn't change until the transaction is finished with
 311 * unlocked_inode_to_wb_end().
 312 *
 313 * The caller must call unlocked_inode_to_wb_end() with *@cookie afterwards and
 314 * can't sleep during the transaction.  IRQs may or may not be disabled on
 315 * return.
 316 */
 317static inline struct bdi_writeback *
 318unlocked_inode_to_wb_begin(struct inode *inode, struct wb_lock_cookie *cookie)
 319{
 320        rcu_read_lock();
 321
 322        /*
 323         * Paired with store_release in inode_switch_wbs_work_fn() and
 324         * ensures that we see the new wb if we see cleared I_WB_SWITCH.
 325         */
 326        cookie->locked = smp_load_acquire(&inode->i_state) & I_WB_SWITCH;
 327
 328        if (unlikely(cookie->locked))
 329                xa_lock_irqsave(&inode->i_mapping->i_pages, cookie->flags);
 330
 331        /*
 332         * Protected by either !I_WB_SWITCH + rcu_read_lock() or the i_pages
 333         * lock.  inode_to_wb() will bark.  Deref directly.
 334         */
 335        return inode->i_wb;
 336}
 337
 338/**
 339 * unlocked_inode_to_wb_end - end inode wb access transaction
 340 * @inode: target inode
 341 * @cookie: @cookie from unlocked_inode_to_wb_begin()
 342 */
 343static inline void unlocked_inode_to_wb_end(struct inode *inode,
 344                                            struct wb_lock_cookie *cookie)
 345{
 346        if (unlikely(cookie->locked))
 347                xa_unlock_irqrestore(&inode->i_mapping->i_pages, cookie->flags);
 348
 349        rcu_read_unlock();
 350}
 351
 352#else   /* CONFIG_CGROUP_WRITEBACK */
 353
 354static inline bool inode_cgwb_enabled(struct inode *inode)
 355{
 356        return false;
 357}
 358
 359static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi)
 360{
 361        return &bdi->wb;
 362}
 363
 364static inline struct bdi_writeback *
 365wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp)
 366{
 367        return &bdi->wb;
 368}
 369
 370static inline bool inode_to_wb_is_valid(struct inode *inode)
 371{
 372        return true;
 373}
 374
 375static inline struct bdi_writeback *inode_to_wb(struct inode *inode)
 376{
 377        return &inode_to_bdi(inode)->wb;
 378}
 379
 380static inline struct bdi_writeback *inode_to_wb_wbc(
 381                                struct inode *inode,
 382                                struct writeback_control *wbc)
 383{
 384        return inode_to_wb(inode);
 385}
 386
 387
 388static inline struct bdi_writeback *
 389unlocked_inode_to_wb_begin(struct inode *inode, struct wb_lock_cookie *cookie)
 390{
 391        return inode_to_wb(inode);
 392}
 393
 394static inline void unlocked_inode_to_wb_end(struct inode *inode,
 395                                            struct wb_lock_cookie *cookie)
 396{
 397}
 398
 399static inline void wb_memcg_offline(struct mem_cgroup *memcg)
 400{
 401}
 402
 403static inline void wb_blkcg_offline(struct blkcg *blkcg)
 404{
 405}
 406
 407static inline int inode_congested(struct inode *inode, int cong_bits)
 408{
 409        return wb_congested(&inode_to_bdi(inode)->wb, cong_bits);
 410}
 411
 412#endif  /* CONFIG_CGROUP_WRITEBACK */
 413
 414static inline int inode_read_congested(struct inode *inode)
 415{
 416        return inode_congested(inode, 1 << WB_sync_congested);
 417}
 418
 419static inline int inode_write_congested(struct inode *inode)
 420{
 421        return inode_congested(inode, 1 << WB_async_congested);
 422}
 423
 424static inline int inode_rw_congested(struct inode *inode)
 425{
 426        return inode_congested(inode, (1 << WB_sync_congested) |
 427                                      (1 << WB_async_congested));
 428}
 429
 430static inline int bdi_congested(struct backing_dev_info *bdi, int cong_bits)
 431{
 432        return wb_congested(&bdi->wb, cong_bits);
 433}
 434
 435static inline int bdi_read_congested(struct backing_dev_info *bdi)
 436{
 437        return bdi_congested(bdi, 1 << WB_sync_congested);
 438}
 439
 440static inline int bdi_write_congested(struct backing_dev_info *bdi)
 441{
 442        return bdi_congested(bdi, 1 << WB_async_congested);
 443}
 444
 445static inline int bdi_rw_congested(struct backing_dev_info *bdi)
 446{
 447        return bdi_congested(bdi, (1 << WB_sync_congested) |
 448                                  (1 << WB_async_congested));
 449}
 450
 451const char *bdi_dev_name(struct backing_dev_info *bdi);
 452
 453#endif  /* _LINUX_BACKING_DEV_H */
 454