linux/include/linux/backing-dev.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2/*
   3 * include/linux/backing-dev.h
   4 *
   5 * low-level device information and state which is propagated up through
   6 * to high-level code.
   7 */
   8
   9#ifndef _LINUX_BACKING_DEV_H
  10#define _LINUX_BACKING_DEV_H
  11
  12#include <linux/kernel.h>
  13#include <linux/fs.h>
  14#include <linux/sched.h>
  15#include <linux/blkdev.h>
  16#include <linux/writeback.h>
  17#include <linux/blk-cgroup.h>
  18#include <linux/backing-dev-defs.h>
  19#include <linux/slab.h>
  20
  21static inline struct backing_dev_info *bdi_get(struct backing_dev_info *bdi)
  22{
  23        kref_get(&bdi->refcnt);
  24        return bdi;
  25}
  26
  27void bdi_put(struct backing_dev_info *bdi);
  28
  29__printf(2, 3)
  30int bdi_register(struct backing_dev_info *bdi, const char *fmt, ...);
  31__printf(2, 0)
  32int bdi_register_va(struct backing_dev_info *bdi, const char *fmt,
  33                    va_list args);
  34int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner);
  35void bdi_unregister(struct backing_dev_info *bdi);
  36
  37struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id);
  38static inline struct backing_dev_info *bdi_alloc(gfp_t gfp_mask)
  39{
  40        return bdi_alloc_node(gfp_mask, NUMA_NO_NODE);
  41}
  42
  43void wb_start_background_writeback(struct bdi_writeback *wb);
  44void wb_workfn(struct work_struct *work);
  45void wb_wakeup_delayed(struct bdi_writeback *wb);
  46
  47extern spinlock_t bdi_lock;
  48extern struct list_head bdi_list;
  49
  50extern struct workqueue_struct *bdi_wq;
  51extern struct workqueue_struct *bdi_async_bio_wq;
  52
  53static inline bool wb_has_dirty_io(struct bdi_writeback *wb)
  54{
  55        return test_bit(WB_has_dirty_io, &wb->state);
  56}
  57
  58static inline bool bdi_has_dirty_io(struct backing_dev_info *bdi)
  59{
  60        /*
  61         * @bdi->tot_write_bandwidth is guaranteed to be > 0 if there are
  62         * any dirty wbs.  See wb_update_write_bandwidth().
  63         */
  64        return atomic_long_read(&bdi->tot_write_bandwidth);
  65}
  66
  67static inline void __add_wb_stat(struct bdi_writeback *wb,
  68                                 enum wb_stat_item item, s64 amount)
  69{
  70        percpu_counter_add_batch(&wb->stat[item], amount, WB_STAT_BATCH);
  71}
  72
  73static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
  74{
  75        __add_wb_stat(wb, item, 1);
  76}
  77
  78static inline void dec_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
  79{
  80        __add_wb_stat(wb, item, -1);
  81}
  82
  83static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
  84{
  85        return percpu_counter_read_positive(&wb->stat[item]);
  86}
  87
  88static inline s64 wb_stat_sum(struct bdi_writeback *wb, enum wb_stat_item item)
  89{
  90        return percpu_counter_sum_positive(&wb->stat[item]);
  91}
  92
  93extern void wb_writeout_inc(struct bdi_writeback *wb);
  94
  95/*
  96 * maximal error of a stat counter.
  97 */
  98static inline unsigned long wb_stat_error(void)
  99{
 100#ifdef CONFIG_SMP
 101        return nr_cpu_ids * WB_STAT_BATCH;
 102#else
 103        return 1;
 104#endif
 105}
 106
 107int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio);
 108int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
 109
 110/*
 111 * Flags in backing_dev_info::capability
 112 *
 113 * The first three flags control whether dirty pages will contribute to the
 114 * VM's accounting and whether writepages() should be called for dirty pages
 115 * (something that would not, for example, be appropriate for ramfs)
 116 *
 117 * WARNING: these flags are closely related and should not normally be
 118 * used separately.  The BDI_CAP_NO_ACCT_AND_WRITEBACK combines these
 119 * three flags into a single convenience macro.
 120 *
 121 * BDI_CAP_NO_ACCT_DIRTY:  Dirty pages shouldn't contribute to accounting
 122 * BDI_CAP_NO_WRITEBACK:   Don't write pages back
 123 * BDI_CAP_NO_ACCT_WB:     Don't automatically account writeback pages
 124 * BDI_CAP_STRICTLIMIT:    Keep number of dirty pages below bdi threshold.
 125 *
 126 * BDI_CAP_CGROUP_WRITEBACK: Supports cgroup-aware writeback.
 127 * BDI_CAP_SYNCHRONOUS_IO: Device is so fast that asynchronous IO would be
 128 *                         inefficient.
 129 */
 130#define BDI_CAP_NO_ACCT_DIRTY   0x00000001
 131#define BDI_CAP_NO_WRITEBACK    0x00000002
 132#define BDI_CAP_NO_ACCT_WB      0x00000004
 133#define BDI_CAP_STABLE_WRITES   0x00000008
 134#define BDI_CAP_STRICTLIMIT     0x00000010
 135#define BDI_CAP_CGROUP_WRITEBACK 0x00000020
 136#define BDI_CAP_SYNCHRONOUS_IO  0x00000040
 137
 138#define BDI_CAP_NO_ACCT_AND_WRITEBACK \
 139        (BDI_CAP_NO_WRITEBACK | BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_ACCT_WB)
 140
 141extern struct backing_dev_info noop_backing_dev_info;
 142
 143/**
 144 * writeback_in_progress - determine whether there is writeback in progress
 145 * @wb: bdi_writeback of interest
 146 *
 147 * Determine whether there is writeback waiting to be handled against a
 148 * bdi_writeback.
 149 */
 150static inline bool writeback_in_progress(struct bdi_writeback *wb)
 151{
 152        return test_bit(WB_writeback_running, &wb->state);
 153}
 154
 155static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
 156{
 157        struct super_block *sb;
 158
 159        if (!inode)
 160                return &noop_backing_dev_info;
 161
 162        sb = inode->i_sb;
 163#ifdef CONFIG_BLOCK
 164        if (sb_is_blkdev_sb(sb))
 165                return I_BDEV(inode)->bd_bdi;
 166#endif
 167        return sb->s_bdi;
 168}
 169
 170static inline int wb_congested(struct bdi_writeback *wb, int cong_bits)
 171{
 172        struct backing_dev_info *bdi = wb->bdi;
 173
 174        if (bdi->congested_fn)
 175                return bdi->congested_fn(bdi->congested_data, cong_bits);
 176        return wb->congested->state & cong_bits;
 177}
 178
 179long congestion_wait(int sync, long timeout);
 180long wait_iff_congested(int sync, long timeout);
 181
 182static inline bool bdi_cap_synchronous_io(struct backing_dev_info *bdi)
 183{
 184        return bdi->capabilities & BDI_CAP_SYNCHRONOUS_IO;
 185}
 186
 187static inline bool bdi_cap_stable_pages_required(struct backing_dev_info *bdi)
 188{
 189        return bdi->capabilities & BDI_CAP_STABLE_WRITES;
 190}
 191
 192static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi)
 193{
 194        return !(bdi->capabilities & BDI_CAP_NO_WRITEBACK);
 195}
 196
 197static inline bool bdi_cap_account_dirty(struct backing_dev_info *bdi)
 198{
 199        return !(bdi->capabilities & BDI_CAP_NO_ACCT_DIRTY);
 200}
 201
 202static inline bool bdi_cap_account_writeback(struct backing_dev_info *bdi)
 203{
 204        /* Paranoia: BDI_CAP_NO_WRITEBACK implies BDI_CAP_NO_ACCT_WB */
 205        return !(bdi->capabilities & (BDI_CAP_NO_ACCT_WB |
 206                                      BDI_CAP_NO_WRITEBACK));
 207}
 208
 209static inline bool mapping_cap_writeback_dirty(struct address_space *mapping)
 210{
 211        return bdi_cap_writeback_dirty(inode_to_bdi(mapping->host));
 212}
 213
 214static inline bool mapping_cap_account_dirty(struct address_space *mapping)
 215{
 216        return bdi_cap_account_dirty(inode_to_bdi(mapping->host));
 217}
 218
 219static inline int bdi_sched_wait(void *word)
 220{
 221        schedule();
 222        return 0;
 223}
 224
 225#ifdef CONFIG_CGROUP_WRITEBACK
 226
 227struct bdi_writeback_congested *
 228wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp);
 229void wb_congested_put(struct bdi_writeback_congested *congested);
 230struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
 231                                    struct cgroup_subsys_state *memcg_css,
 232                                    gfp_t gfp);
 233void wb_memcg_offline(struct mem_cgroup *memcg);
 234void wb_blkcg_offline(struct blkcg *blkcg);
 235int inode_congested(struct inode *inode, int cong_bits);
 236
 237/**
 238 * inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode
 239 * @inode: inode of interest
 240 *
 241 * cgroup writeback requires support from both the bdi and filesystem.
 242 * Also, both memcg and iocg have to be on the default hierarchy.  Test
 243 * whether all conditions are met.
 244 *
 245 * Note that the test result may change dynamically on the same inode
 246 * depending on how memcg and iocg are configured.
 247 */
 248static inline bool inode_cgwb_enabled(struct inode *inode)
 249{
 250        struct backing_dev_info *bdi = inode_to_bdi(inode);
 251
 252        return cgroup_subsys_on_dfl(memory_cgrp_subsys) &&
 253                cgroup_subsys_on_dfl(io_cgrp_subsys) &&
 254                bdi_cap_account_dirty(bdi) &&
 255                (bdi->capabilities & BDI_CAP_CGROUP_WRITEBACK) &&
 256                (inode->i_sb->s_iflags & SB_I_CGROUPWB);
 257}
 258
 259/**
 260 * wb_find_current - find wb for %current on a bdi
 261 * @bdi: bdi of interest
 262 *
 263 * Find the wb of @bdi which matches both the memcg and blkcg of %current.
 264 * Must be called under rcu_read_lock() which protects the returend wb.
 265 * NULL if not found.
 266 */
 267static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi)
 268{
 269        struct cgroup_subsys_state *memcg_css;
 270        struct bdi_writeback *wb;
 271
 272        memcg_css = task_css(current, memory_cgrp_id);
 273        if (!memcg_css->parent)
 274                return &bdi->wb;
 275
 276        wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id);
 277
 278        /*
 279         * %current's blkcg equals the effective blkcg of its memcg.  No
 280         * need to use the relatively expensive cgroup_get_e_css().
 281         */
 282        if (likely(wb && wb->blkcg_css == task_css(current, io_cgrp_id)))
 283                return wb;
 284        return NULL;
 285}
 286
 287/**
 288 * wb_get_create_current - get or create wb for %current on a bdi
 289 * @bdi: bdi of interest
 290 * @gfp: allocation mask
 291 *
 292 * Equivalent to wb_get_create() on %current's memcg.  This function is
 293 * called from a relatively hot path and optimizes the common cases using
 294 * wb_find_current().
 295 */
 296static inline struct bdi_writeback *
 297wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp)
 298{
 299        struct bdi_writeback *wb;
 300
 301        rcu_read_lock();
 302        wb = wb_find_current(bdi);
 303        if (wb && unlikely(!wb_tryget(wb)))
 304                wb = NULL;
 305        rcu_read_unlock();
 306
 307        if (unlikely(!wb)) {
 308                struct cgroup_subsys_state *memcg_css;
 309
 310                memcg_css = task_get_css(current, memory_cgrp_id);
 311                wb = wb_get_create(bdi, memcg_css, gfp);
 312                css_put(memcg_css);
 313        }
 314        return wb;
 315}
 316
 317/**
 318 * inode_to_wb_is_valid - test whether an inode has a wb associated
 319 * @inode: inode of interest
 320 *
 321 * Returns %true if @inode has a wb associated.  May be called without any
 322 * locking.
 323 */
 324static inline bool inode_to_wb_is_valid(struct inode *inode)
 325{
 326        return inode->i_wb;
 327}
 328
 329/**
 330 * inode_to_wb - determine the wb of an inode
 331 * @inode: inode of interest
 332 *
 333 * Returns the wb @inode is currently associated with.  The caller must be
 334 * holding either @inode->i_lock, the i_pages lock, or the
 335 * associated wb's list_lock.
 336 */
 337static inline struct bdi_writeback *inode_to_wb(const struct inode *inode)
 338{
 339#ifdef CONFIG_LOCKDEP
 340        WARN_ON_ONCE(debug_locks &&
 341                     (!lockdep_is_held(&inode->i_lock) &&
 342                      !lockdep_is_held(&inode->i_mapping->i_pages.xa_lock) &&
 343                      !lockdep_is_held(&inode->i_wb->list_lock)));
 344#endif
 345        return inode->i_wb;
 346}
 347
 348/**
 349 * unlocked_inode_to_wb_begin - begin unlocked inode wb access transaction
 350 * @inode: target inode
 351 * @cookie: output param, to be passed to the end function
 352 *
 353 * The caller wants to access the wb associated with @inode but isn't
 354 * holding inode->i_lock, the i_pages lock or wb->list_lock.  This
 355 * function determines the wb associated with @inode and ensures that the
 356 * association doesn't change until the transaction is finished with
 357 * unlocked_inode_to_wb_end().
 358 *
 359 * The caller must call unlocked_inode_to_wb_end() with *@cookie afterwards and
 360 * can't sleep during the transaction.  IRQs may or may not be disabled on
 361 * return.
 362 */
 363static inline struct bdi_writeback *
 364unlocked_inode_to_wb_begin(struct inode *inode, struct wb_lock_cookie *cookie)
 365{
 366        rcu_read_lock();
 367
 368        /*
 369         * Paired with store_release in inode_switch_wbs_work_fn() and
 370         * ensures that we see the new wb if we see cleared I_WB_SWITCH.
 371         */
 372        cookie->locked = smp_load_acquire(&inode->i_state) & I_WB_SWITCH;
 373
 374        if (unlikely(cookie->locked))
 375                xa_lock_irqsave(&inode->i_mapping->i_pages, cookie->flags);
 376
 377        /*
 378         * Protected by either !I_WB_SWITCH + rcu_read_lock() or the i_pages
 379         * lock.  inode_to_wb() will bark.  Deref directly.
 380         */
 381        return inode->i_wb;
 382}
 383
 384/**
 385 * unlocked_inode_to_wb_end - end inode wb access transaction
 386 * @inode: target inode
 387 * @cookie: @cookie from unlocked_inode_to_wb_begin()
 388 */
 389static inline void unlocked_inode_to_wb_end(struct inode *inode,
 390                                            struct wb_lock_cookie *cookie)
 391{
 392        if (unlikely(cookie->locked))
 393                xa_unlock_irqrestore(&inode->i_mapping->i_pages, cookie->flags);
 394
 395        rcu_read_unlock();
 396}
 397
 398#else   /* CONFIG_CGROUP_WRITEBACK */
 399
 400static inline bool inode_cgwb_enabled(struct inode *inode)
 401{
 402        return false;
 403}
 404
 405static inline struct bdi_writeback_congested *
 406wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
 407{
 408        refcount_inc(&bdi->wb_congested->refcnt);
 409        return bdi->wb_congested;
 410}
 411
 412static inline void wb_congested_put(struct bdi_writeback_congested *congested)
 413{
 414        if (refcount_dec_and_test(&congested->refcnt))
 415                kfree(congested);
 416}
 417
 418static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi)
 419{
 420        return &bdi->wb;
 421}
 422
 423static inline struct bdi_writeback *
 424wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp)
 425{
 426        return &bdi->wb;
 427}
 428
 429static inline bool inode_to_wb_is_valid(struct inode *inode)
 430{
 431        return true;
 432}
 433
 434static inline struct bdi_writeback *inode_to_wb(struct inode *inode)
 435{
 436        return &inode_to_bdi(inode)->wb;
 437}
 438
 439static inline struct bdi_writeback *
 440unlocked_inode_to_wb_begin(struct inode *inode, struct wb_lock_cookie *cookie)
 441{
 442        return inode_to_wb(inode);
 443}
 444
 445static inline void unlocked_inode_to_wb_end(struct inode *inode,
 446                                            struct wb_lock_cookie *cookie)
 447{
 448}
 449
 450static inline void wb_memcg_offline(struct mem_cgroup *memcg)
 451{
 452}
 453
 454static inline void wb_blkcg_offline(struct blkcg *blkcg)
 455{
 456}
 457
 458static inline int inode_congested(struct inode *inode, int cong_bits)
 459{
 460        return wb_congested(&inode_to_bdi(inode)->wb, cong_bits);
 461}
 462
 463#endif  /* CONFIG_CGROUP_WRITEBACK */
 464
 465static inline int inode_read_congested(struct inode *inode)
 466{
 467        return inode_congested(inode, 1 << WB_sync_congested);
 468}
 469
 470static inline int inode_write_congested(struct inode *inode)
 471{
 472        return inode_congested(inode, 1 << WB_async_congested);
 473}
 474
 475static inline int inode_rw_congested(struct inode *inode)
 476{
 477        return inode_congested(inode, (1 << WB_sync_congested) |
 478                                      (1 << WB_async_congested));
 479}
 480
 481static inline int bdi_congested(struct backing_dev_info *bdi, int cong_bits)
 482{
 483        return wb_congested(&bdi->wb, cong_bits);
 484}
 485
 486static inline int bdi_read_congested(struct backing_dev_info *bdi)
 487{
 488        return bdi_congested(bdi, 1 << WB_sync_congested);
 489}
 490
 491static inline int bdi_write_congested(struct backing_dev_info *bdi)
 492{
 493        return bdi_congested(bdi, 1 << WB_async_congested);
 494}
 495
 496static inline int bdi_rw_congested(struct backing_dev_info *bdi)
 497{
 498        return bdi_congested(bdi, (1 << WB_sync_congested) |
 499                                  (1 << WB_async_congested));
 500}
 501
 502#endif  /* _LINUX_BACKING_DEV_H */
 503