linux/include/linux/backing-dev.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2/*
   3 * include/linux/backing-dev.h
   4 *
   5 * low-level device information and state which is propagated up through
   6 * to high-level code.
   7 */
   8
   9#ifndef _LINUX_BACKING_DEV_H
  10#define _LINUX_BACKING_DEV_H
  11
  12#include <linux/kernel.h>
  13#include <linux/fs.h>
  14#include <linux/sched.h>
  15#include <linux/blkdev.h>
  16#include <linux/writeback.h>
  17#include <linux/blk-cgroup.h>
  18#include <linux/backing-dev-defs.h>
  19#include <linux/slab.h>
  20
  21static inline struct backing_dev_info *bdi_get(struct backing_dev_info *bdi)
  22{
  23        kref_get(&bdi->refcnt);
  24        return bdi;
  25}
  26
  27void bdi_put(struct backing_dev_info *bdi);
  28
  29__printf(2, 3)
  30int bdi_register(struct backing_dev_info *bdi, const char *fmt, ...);
  31__printf(2, 0)
  32int bdi_register_va(struct backing_dev_info *bdi, const char *fmt,
  33                    va_list args);
  34int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner);
  35void bdi_unregister(struct backing_dev_info *bdi);
  36
  37struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id);
  38static inline struct backing_dev_info *bdi_alloc(gfp_t gfp_mask)
  39{
  40        return bdi_alloc_node(gfp_mask, NUMA_NO_NODE);
  41}
  42
  43void wb_start_background_writeback(struct bdi_writeback *wb);
  44void wb_workfn(struct work_struct *work);
  45void wb_wakeup_delayed(struct bdi_writeback *wb);
  46
  47extern spinlock_t bdi_lock;
  48extern struct list_head bdi_list;
  49
  50extern struct workqueue_struct *bdi_wq;
  51
  52static inline bool wb_has_dirty_io(struct bdi_writeback *wb)
  53{
  54        return test_bit(WB_has_dirty_io, &wb->state);
  55}
  56
  57static inline bool bdi_has_dirty_io(struct backing_dev_info *bdi)
  58{
  59        /*
  60         * @bdi->tot_write_bandwidth is guaranteed to be > 0 if there are
  61         * any dirty wbs.  See wb_update_write_bandwidth().
  62         */
  63        return atomic_long_read(&bdi->tot_write_bandwidth);
  64}
  65
  66static inline void __add_wb_stat(struct bdi_writeback *wb,
  67                                 enum wb_stat_item item, s64 amount)
  68{
  69        percpu_counter_add_batch(&wb->stat[item], amount, WB_STAT_BATCH);
  70}
  71
  72static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
  73{
  74        __add_wb_stat(wb, item, 1);
  75}
  76
  77static inline void dec_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
  78{
  79        __add_wb_stat(wb, item, -1);
  80}
  81
  82static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
  83{
  84        return percpu_counter_read_positive(&wb->stat[item]);
  85}
  86
  87static inline s64 wb_stat_sum(struct bdi_writeback *wb, enum wb_stat_item item)
  88{
  89        return percpu_counter_sum_positive(&wb->stat[item]);
  90}
  91
  92extern void wb_writeout_inc(struct bdi_writeback *wb);
  93
  94/*
  95 * maximal error of a stat counter.
  96 */
  97static inline unsigned long wb_stat_error(void)
  98{
  99#ifdef CONFIG_SMP
 100        return nr_cpu_ids * WB_STAT_BATCH;
 101#else
 102        return 1;
 103#endif
 104}
 105
 106int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio);
 107int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
 108
 109/*
 110 * Flags in backing_dev_info::capability
 111 *
 112 * The first three flags control whether dirty pages will contribute to the
 113 * VM's accounting and whether writepages() should be called for dirty pages
 114 * (something that would not, for example, be appropriate for ramfs)
 115 *
 116 * WARNING: these flags are closely related and should not normally be
 117 * used separately.  The BDI_CAP_NO_ACCT_AND_WRITEBACK combines these
 118 * three flags into a single convenience macro.
 119 *
 120 * BDI_CAP_NO_ACCT_DIRTY:  Dirty pages shouldn't contribute to accounting
 121 * BDI_CAP_NO_WRITEBACK:   Don't write pages back
 122 * BDI_CAP_NO_ACCT_WB:     Don't automatically account writeback pages
 123 * BDI_CAP_STRICTLIMIT:    Keep number of dirty pages below bdi threshold.
 124 *
 125 * BDI_CAP_CGROUP_WRITEBACK: Supports cgroup-aware writeback.
 126 * BDI_CAP_SYNCHRONOUS_IO: Device is so fast that asynchronous IO would be
 127 *                         inefficient.
 128 */
 129#define BDI_CAP_NO_ACCT_DIRTY   0x00000001
 130#define BDI_CAP_NO_WRITEBACK    0x00000002
 131#define BDI_CAP_NO_ACCT_WB      0x00000004
 132#define BDI_CAP_STABLE_WRITES   0x00000008
 133#define BDI_CAP_STRICTLIMIT     0x00000010
 134#define BDI_CAP_CGROUP_WRITEBACK 0x00000020
 135#define BDI_CAP_SYNCHRONOUS_IO  0x00000040
 136
 137#define BDI_CAP_NO_ACCT_AND_WRITEBACK \
 138        (BDI_CAP_NO_WRITEBACK | BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_ACCT_WB)
 139
 140extern struct backing_dev_info noop_backing_dev_info;
 141
 142/**
 143 * writeback_in_progress - determine whether there is writeback in progress
 144 * @wb: bdi_writeback of interest
 145 *
 146 * Determine whether there is writeback waiting to be handled against a
 147 * bdi_writeback.
 148 */
 149static inline bool writeback_in_progress(struct bdi_writeback *wb)
 150{
 151        return test_bit(WB_writeback_running, &wb->state);
 152}
 153
 154static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
 155{
 156        struct super_block *sb;
 157
 158        if (!inode)
 159                return &noop_backing_dev_info;
 160
 161        sb = inode->i_sb;
 162#ifdef CONFIG_BLOCK
 163        if (sb_is_blkdev_sb(sb))
 164                return I_BDEV(inode)->bd_bdi;
 165#endif
 166        return sb->s_bdi;
 167}
 168
 169static inline int wb_congested(struct bdi_writeback *wb, int cong_bits)
 170{
 171        struct backing_dev_info *bdi = wb->bdi;
 172
 173        if (bdi->congested_fn)
 174                return bdi->congested_fn(bdi->congested_data, cong_bits);
 175        return wb->congested->state & cong_bits;
 176}
 177
 178long congestion_wait(int sync, long timeout);
 179long wait_iff_congested(int sync, long timeout);
 180
 181static inline bool bdi_cap_synchronous_io(struct backing_dev_info *bdi)
 182{
 183        return bdi->capabilities & BDI_CAP_SYNCHRONOUS_IO;
 184}
 185
 186static inline bool bdi_cap_stable_pages_required(struct backing_dev_info *bdi)
 187{
 188        return bdi->capabilities & BDI_CAP_STABLE_WRITES;
 189}
 190
 191static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi)
 192{
 193        return !(bdi->capabilities & BDI_CAP_NO_WRITEBACK);
 194}
 195
 196static inline bool bdi_cap_account_dirty(struct backing_dev_info *bdi)
 197{
 198        return !(bdi->capabilities & BDI_CAP_NO_ACCT_DIRTY);
 199}
 200
 201static inline bool bdi_cap_account_writeback(struct backing_dev_info *bdi)
 202{
 203        /* Paranoia: BDI_CAP_NO_WRITEBACK implies BDI_CAP_NO_ACCT_WB */
 204        return !(bdi->capabilities & (BDI_CAP_NO_ACCT_WB |
 205                                      BDI_CAP_NO_WRITEBACK));
 206}
 207
 208static inline bool mapping_cap_writeback_dirty(struct address_space *mapping)
 209{
 210        return bdi_cap_writeback_dirty(inode_to_bdi(mapping->host));
 211}
 212
 213static inline bool mapping_cap_account_dirty(struct address_space *mapping)
 214{
 215        return bdi_cap_account_dirty(inode_to_bdi(mapping->host));
 216}
 217
 218static inline int bdi_sched_wait(void *word)
 219{
 220        schedule();
 221        return 0;
 222}
 223
 224#ifdef CONFIG_CGROUP_WRITEBACK
 225
 226struct bdi_writeback_congested *
 227wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp);
 228void wb_congested_put(struct bdi_writeback_congested *congested);
 229struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
 230                                    struct cgroup_subsys_state *memcg_css,
 231                                    gfp_t gfp);
 232void wb_memcg_offline(struct mem_cgroup *memcg);
 233void wb_blkcg_offline(struct blkcg *blkcg);
 234int inode_congested(struct inode *inode, int cong_bits);
 235
 236/**
 237 * inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode
 238 * @inode: inode of interest
 239 *
 240 * cgroup writeback requires support from both the bdi and filesystem.
 241 * Also, both memcg and iocg have to be on the default hierarchy.  Test
 242 * whether all conditions are met.
 243 *
 244 * Note that the test result may change dynamically on the same inode
 245 * depending on how memcg and iocg are configured.
 246 */
 247static inline bool inode_cgwb_enabled(struct inode *inode)
 248{
 249        struct backing_dev_info *bdi = inode_to_bdi(inode);
 250
 251        return cgroup_subsys_on_dfl(memory_cgrp_subsys) &&
 252                cgroup_subsys_on_dfl(io_cgrp_subsys) &&
 253                bdi_cap_account_dirty(bdi) &&
 254                (bdi->capabilities & BDI_CAP_CGROUP_WRITEBACK) &&
 255                (inode->i_sb->s_iflags & SB_I_CGROUPWB);
 256}
 257
 258/**
 259 * wb_find_current - find wb for %current on a bdi
 260 * @bdi: bdi of interest
 261 *
 262 * Find the wb of @bdi which matches both the memcg and blkcg of %current.
 263 * Must be called under rcu_read_lock() which protects the returend wb.
 264 * NULL if not found.
 265 */
 266static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi)
 267{
 268        struct cgroup_subsys_state *memcg_css;
 269        struct bdi_writeback *wb;
 270
 271        memcg_css = task_css(current, memory_cgrp_id);
 272        if (!memcg_css->parent)
 273                return &bdi->wb;
 274
 275        wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id);
 276
 277        /*
 278         * %current's blkcg equals the effective blkcg of its memcg.  No
 279         * need to use the relatively expensive cgroup_get_e_css().
 280         */
 281        if (likely(wb && wb->blkcg_css == task_css(current, io_cgrp_id)))
 282                return wb;
 283        return NULL;
 284}
 285
 286/**
 287 * wb_get_create_current - get or create wb for %current on a bdi
 288 * @bdi: bdi of interest
 289 * @gfp: allocation mask
 290 *
 291 * Equivalent to wb_get_create() on %current's memcg.  This function is
 292 * called from a relatively hot path and optimizes the common cases using
 293 * wb_find_current().
 294 */
 295static inline struct bdi_writeback *
 296wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp)
 297{
 298        struct bdi_writeback *wb;
 299
 300        rcu_read_lock();
 301        wb = wb_find_current(bdi);
 302        if (wb && unlikely(!wb_tryget(wb)))
 303                wb = NULL;
 304        rcu_read_unlock();
 305
 306        if (unlikely(!wb)) {
 307                struct cgroup_subsys_state *memcg_css;
 308
 309                memcg_css = task_get_css(current, memory_cgrp_id);
 310                wb = wb_get_create(bdi, memcg_css, gfp);
 311                css_put(memcg_css);
 312        }
 313        return wb;
 314}
 315
 316/**
 317 * inode_to_wb_is_valid - test whether an inode has a wb associated
 318 * @inode: inode of interest
 319 *
 320 * Returns %true if @inode has a wb associated.  May be called without any
 321 * locking.
 322 */
 323static inline bool inode_to_wb_is_valid(struct inode *inode)
 324{
 325        return inode->i_wb;
 326}
 327
 328/**
 329 * inode_to_wb - determine the wb of an inode
 330 * @inode: inode of interest
 331 *
 332 * Returns the wb @inode is currently associated with.  The caller must be
 333 * holding either @inode->i_lock, the i_pages lock, or the
 334 * associated wb's list_lock.
 335 */
 336static inline struct bdi_writeback *inode_to_wb(const struct inode *inode)
 337{
 338#ifdef CONFIG_LOCKDEP
 339        WARN_ON_ONCE(debug_locks &&
 340                     (!lockdep_is_held(&inode->i_lock) &&
 341                      !lockdep_is_held(&inode->i_mapping->i_pages.xa_lock) &&
 342                      !lockdep_is_held(&inode->i_wb->list_lock)));
 343#endif
 344        return inode->i_wb;
 345}
 346
 347/**
 348 * unlocked_inode_to_wb_begin - begin unlocked inode wb access transaction
 349 * @inode: target inode
 350 * @cookie: output param, to be passed to the end function
 351 *
 352 * The caller wants to access the wb associated with @inode but isn't
 353 * holding inode->i_lock, the i_pages lock or wb->list_lock.  This
 354 * function determines the wb associated with @inode and ensures that the
 355 * association doesn't change until the transaction is finished with
 356 * unlocked_inode_to_wb_end().
 357 *
 358 * The caller must call unlocked_inode_to_wb_end() with *@cookie afterwards and
 359 * can't sleep during the transaction.  IRQs may or may not be disabled on
 360 * return.
 361 */
 362static inline struct bdi_writeback *
 363unlocked_inode_to_wb_begin(struct inode *inode, struct wb_lock_cookie *cookie)
 364{
 365        rcu_read_lock();
 366
 367        /*
 368         * Paired with store_release in inode_switch_wbs_work_fn() and
 369         * ensures that we see the new wb if we see cleared I_WB_SWITCH.
 370         */
 371        cookie->locked = smp_load_acquire(&inode->i_state) & I_WB_SWITCH;
 372
 373        if (unlikely(cookie->locked))
 374                xa_lock_irqsave(&inode->i_mapping->i_pages, cookie->flags);
 375
 376        /*
 377         * Protected by either !I_WB_SWITCH + rcu_read_lock() or the i_pages
 378         * lock.  inode_to_wb() will bark.  Deref directly.
 379         */
 380        return inode->i_wb;
 381}
 382
 383/**
 384 * unlocked_inode_to_wb_end - end inode wb access transaction
 385 * @inode: target inode
 386 * @cookie: @cookie from unlocked_inode_to_wb_begin()
 387 */
 388static inline void unlocked_inode_to_wb_end(struct inode *inode,
 389                                            struct wb_lock_cookie *cookie)
 390{
 391        if (unlikely(cookie->locked))
 392                xa_unlock_irqrestore(&inode->i_mapping->i_pages, cookie->flags);
 393
 394        rcu_read_unlock();
 395}
 396
 397#else   /* CONFIG_CGROUP_WRITEBACK */
 398
 399static inline bool inode_cgwb_enabled(struct inode *inode)
 400{
 401        return false;
 402}
 403
 404static inline struct bdi_writeback_congested *
 405wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
 406{
 407        refcount_inc(&bdi->wb_congested->refcnt);
 408        return bdi->wb_congested;
 409}
 410
 411static inline void wb_congested_put(struct bdi_writeback_congested *congested)
 412{
 413        if (refcount_dec_and_test(&congested->refcnt))
 414                kfree(congested);
 415}
 416
 417static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi)
 418{
 419        return &bdi->wb;
 420}
 421
 422static inline struct bdi_writeback *
 423wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp)
 424{
 425        return &bdi->wb;
 426}
 427
 428static inline bool inode_to_wb_is_valid(struct inode *inode)
 429{
 430        return true;
 431}
 432
 433static inline struct bdi_writeback *inode_to_wb(struct inode *inode)
 434{
 435        return &inode_to_bdi(inode)->wb;
 436}
 437
 438static inline struct bdi_writeback *
 439unlocked_inode_to_wb_begin(struct inode *inode, struct wb_lock_cookie *cookie)
 440{
 441        return inode_to_wb(inode);
 442}
 443
 444static inline void unlocked_inode_to_wb_end(struct inode *inode,
 445                                            struct wb_lock_cookie *cookie)
 446{
 447}
 448
 449static inline void wb_memcg_offline(struct mem_cgroup *memcg)
 450{
 451}
 452
 453static inline void wb_blkcg_offline(struct blkcg *blkcg)
 454{
 455}
 456
 457static inline int inode_congested(struct inode *inode, int cong_bits)
 458{
 459        return wb_congested(&inode_to_bdi(inode)->wb, cong_bits);
 460}
 461
 462#endif  /* CONFIG_CGROUP_WRITEBACK */
 463
 464static inline int inode_read_congested(struct inode *inode)
 465{
 466        return inode_congested(inode, 1 << WB_sync_congested);
 467}
 468
 469static inline int inode_write_congested(struct inode *inode)
 470{
 471        return inode_congested(inode, 1 << WB_async_congested);
 472}
 473
 474static inline int inode_rw_congested(struct inode *inode)
 475{
 476        return inode_congested(inode, (1 << WB_sync_congested) |
 477                                      (1 << WB_async_congested));
 478}
 479
 480static inline int bdi_congested(struct backing_dev_info *bdi, int cong_bits)
 481{
 482        return wb_congested(&bdi->wb, cong_bits);
 483}
 484
 485static inline int bdi_read_congested(struct backing_dev_info *bdi)
 486{
 487        return bdi_congested(bdi, 1 << WB_sync_congested);
 488}
 489
 490static inline int bdi_write_congested(struct backing_dev_info *bdi)
 491{
 492        return bdi_congested(bdi, 1 << WB_async_congested);
 493}
 494
 495static inline int bdi_rw_congested(struct backing_dev_info *bdi)
 496{
 497        return bdi_congested(bdi, (1 << WB_sync_congested) |
 498                                  (1 << WB_async_congested));
 499}
 500
 501#endif  /* _LINUX_BACKING_DEV_H */
 502