linux/include/linux/backing-dev.h
<<
>>
Prefs
   1/*
   2 * include/linux/backing-dev.h
   3 *
   4 * low-level device information and state which is propagated up through
   5 * to high-level code.
   6 */
   7
   8#ifndef _LINUX_BACKING_DEV_H
   9#define _LINUX_BACKING_DEV_H
  10
  11#include <linux/kernel.h>
  12#include <linux/fs.h>
  13#include <linux/sched.h>
  14#include <linux/blkdev.h>
  15#include <linux/writeback.h>
  16#include <linux/blk-cgroup.h>
  17#include <linux/backing-dev-defs.h>
  18#include <linux/slab.h>
  19
  20int __must_check bdi_init(struct backing_dev_info *bdi);
  21void bdi_exit(struct backing_dev_info *bdi);
  22
  23__printf(3, 4)
  24int bdi_register(struct backing_dev_info *bdi, struct device *parent,
  25                const char *fmt, ...);
  26int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
  27int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner);
  28void bdi_unregister(struct backing_dev_info *bdi);
  29
  30int __must_check bdi_setup_and_register(struct backing_dev_info *, char *);
  31void bdi_destroy(struct backing_dev_info *bdi);
  32
  33void wb_start_writeback(struct bdi_writeback *wb, long nr_pages,
  34                        bool range_cyclic, enum wb_reason reason);
  35void wb_start_background_writeback(struct bdi_writeback *wb);
  36void wb_workfn(struct work_struct *work);
  37void wb_wakeup_delayed(struct bdi_writeback *wb);
  38
  39extern spinlock_t bdi_lock;
  40extern struct list_head bdi_list;
  41
  42extern struct workqueue_struct *bdi_wq;
  43
  44static inline bool wb_has_dirty_io(struct bdi_writeback *wb)
  45{
  46        return test_bit(WB_has_dirty_io, &wb->state);
  47}
  48
  49static inline bool bdi_has_dirty_io(struct backing_dev_info *bdi)
  50{
  51        /*
  52         * @bdi->tot_write_bandwidth is guaranteed to be > 0 if there are
  53         * any dirty wbs.  See wb_update_write_bandwidth().
  54         */
  55        return atomic_long_read(&bdi->tot_write_bandwidth);
  56}
  57
  58static inline void __add_wb_stat(struct bdi_writeback *wb,
  59                                 enum wb_stat_item item, s64 amount)
  60{
  61        __percpu_counter_add(&wb->stat[item], amount, WB_STAT_BATCH);
  62}
  63
  64static inline void __inc_wb_stat(struct bdi_writeback *wb,
  65                                 enum wb_stat_item item)
  66{
  67        __add_wb_stat(wb, item, 1);
  68}
  69
  70static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
  71{
  72        unsigned long flags;
  73
  74        local_irq_save(flags);
  75        __inc_wb_stat(wb, item);
  76        local_irq_restore(flags);
  77}
  78
  79static inline void __dec_wb_stat(struct bdi_writeback *wb,
  80                                 enum wb_stat_item item)
  81{
  82        __add_wb_stat(wb, item, -1);
  83}
  84
  85static inline void dec_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
  86{
  87        unsigned long flags;
  88
  89        local_irq_save(flags);
  90        __dec_wb_stat(wb, item);
  91        local_irq_restore(flags);
  92}
  93
  94static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
  95{
  96        return percpu_counter_read_positive(&wb->stat[item]);
  97}
  98
  99static inline s64 __wb_stat_sum(struct bdi_writeback *wb,
 100                                enum wb_stat_item item)
 101{
 102        return percpu_counter_sum_positive(&wb->stat[item]);
 103}
 104
 105static inline s64 wb_stat_sum(struct bdi_writeback *wb, enum wb_stat_item item)
 106{
 107        s64 sum;
 108        unsigned long flags;
 109
 110        local_irq_save(flags);
 111        sum = __wb_stat_sum(wb, item);
 112        local_irq_restore(flags);
 113
 114        return sum;
 115}
 116
 117extern void wb_writeout_inc(struct bdi_writeback *wb);
 118
 119/*
 120 * maximal error of a stat counter.
 121 */
 122static inline unsigned long wb_stat_error(struct bdi_writeback *wb)
 123{
 124#ifdef CONFIG_SMP
 125        return nr_cpu_ids * WB_STAT_BATCH;
 126#else
 127        return 1;
 128#endif
 129}
 130
 131int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio);
 132int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
 133
 134/*
 135 * Flags in backing_dev_info::capability
 136 *
 137 * The first three flags control whether dirty pages will contribute to the
 138 * VM's accounting and whether writepages() should be called for dirty pages
 139 * (something that would not, for example, be appropriate for ramfs)
 140 *
 141 * WARNING: these flags are closely related and should not normally be
 142 * used separately.  The BDI_CAP_NO_ACCT_AND_WRITEBACK combines these
 143 * three flags into a single convenience macro.
 144 *
 145 * BDI_CAP_NO_ACCT_DIRTY:  Dirty pages shouldn't contribute to accounting
 146 * BDI_CAP_NO_WRITEBACK:   Don't write pages back
 147 * BDI_CAP_NO_ACCT_WB:     Don't automatically account writeback pages
 148 * BDI_CAP_STRICTLIMIT:    Keep number of dirty pages below bdi threshold.
 149 *
 150 * BDI_CAP_CGROUP_WRITEBACK: Supports cgroup-aware writeback.
 151 */
 152#define BDI_CAP_NO_ACCT_DIRTY   0x00000001
 153#define BDI_CAP_NO_WRITEBACK    0x00000002
 154#define BDI_CAP_NO_ACCT_WB      0x00000004
 155#define BDI_CAP_STABLE_WRITES   0x00000008
 156#define BDI_CAP_STRICTLIMIT     0x00000010
 157#define BDI_CAP_CGROUP_WRITEBACK 0x00000020
 158
 159#define BDI_CAP_NO_ACCT_AND_WRITEBACK \
 160        (BDI_CAP_NO_WRITEBACK | BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_ACCT_WB)
 161
 162extern struct backing_dev_info noop_backing_dev_info;
 163
 164/**
 165 * writeback_in_progress - determine whether there is writeback in progress
 166 * @wb: bdi_writeback of interest
 167 *
 168 * Determine whether there is writeback waiting to be handled against a
 169 * bdi_writeback.
 170 */
 171static inline bool writeback_in_progress(struct bdi_writeback *wb)
 172{
 173        return test_bit(WB_writeback_running, &wb->state);
 174}
 175
 176static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
 177{
 178        struct super_block *sb;
 179
 180        if (!inode)
 181                return &noop_backing_dev_info;
 182
 183        sb = inode->i_sb;
 184#ifdef CONFIG_BLOCK
 185        if (sb_is_blkdev_sb(sb))
 186                return blk_get_backing_dev_info(I_BDEV(inode));
 187#endif
 188        return sb->s_bdi;
 189}
 190
 191static inline int wb_congested(struct bdi_writeback *wb, int cong_bits)
 192{
 193        struct backing_dev_info *bdi = wb->bdi;
 194
 195        if (bdi->congested_fn)
 196                return bdi->congested_fn(bdi->congested_data, cong_bits);
 197        return wb->congested->state & cong_bits;
 198}
 199
 200long congestion_wait(int sync, long timeout);
 201long wait_iff_congested(struct pglist_data *pgdat, int sync, long timeout);
 202int pdflush_proc_obsolete(struct ctl_table *table, int write,
 203                void __user *buffer, size_t *lenp, loff_t *ppos);
 204
 205static inline bool bdi_cap_stable_pages_required(struct backing_dev_info *bdi)
 206{
 207        return bdi->capabilities & BDI_CAP_STABLE_WRITES;
 208}
 209
 210static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi)
 211{
 212        return !(bdi->capabilities & BDI_CAP_NO_WRITEBACK);
 213}
 214
 215static inline bool bdi_cap_account_dirty(struct backing_dev_info *bdi)
 216{
 217        return !(bdi->capabilities & BDI_CAP_NO_ACCT_DIRTY);
 218}
 219
 220static inline bool bdi_cap_account_writeback(struct backing_dev_info *bdi)
 221{
 222        /* Paranoia: BDI_CAP_NO_WRITEBACK implies BDI_CAP_NO_ACCT_WB */
 223        return !(bdi->capabilities & (BDI_CAP_NO_ACCT_WB |
 224                                      BDI_CAP_NO_WRITEBACK));
 225}
 226
 227static inline bool mapping_cap_writeback_dirty(struct address_space *mapping)
 228{
 229        return bdi_cap_writeback_dirty(inode_to_bdi(mapping->host));
 230}
 231
 232static inline bool mapping_cap_account_dirty(struct address_space *mapping)
 233{
 234        return bdi_cap_account_dirty(inode_to_bdi(mapping->host));
 235}
 236
 237static inline int bdi_sched_wait(void *word)
 238{
 239        schedule();
 240        return 0;
 241}
 242
 243#ifdef CONFIG_CGROUP_WRITEBACK
 244
 245struct bdi_writeback_congested *
 246wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp);
 247void wb_congested_put(struct bdi_writeback_congested *congested);
 248struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
 249                                    struct cgroup_subsys_state *memcg_css,
 250                                    gfp_t gfp);
 251void wb_memcg_offline(struct mem_cgroup *memcg);
 252void wb_blkcg_offline(struct blkcg *blkcg);
 253int inode_congested(struct inode *inode, int cong_bits);
 254
 255/**
 256 * inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode
 257 * @inode: inode of interest
 258 *
 259 * cgroup writeback requires support from both the bdi and filesystem.
 260 * Also, both memcg and iocg have to be on the default hierarchy.  Test
 261 * whether all conditions are met.
 262 *
 263 * Note that the test result may change dynamically on the same inode
 264 * depending on how memcg and iocg are configured.
 265 */
 266static inline bool inode_cgwb_enabled(struct inode *inode)
 267{
 268        struct backing_dev_info *bdi = inode_to_bdi(inode);
 269
 270        return cgroup_subsys_on_dfl(memory_cgrp_subsys) &&
 271                cgroup_subsys_on_dfl(io_cgrp_subsys) &&
 272                bdi_cap_account_dirty(bdi) &&
 273                (bdi->capabilities & BDI_CAP_CGROUP_WRITEBACK) &&
 274                (inode->i_sb->s_iflags & SB_I_CGROUPWB);
 275}
 276
 277/**
 278 * wb_find_current - find wb for %current on a bdi
 279 * @bdi: bdi of interest
 280 *
 281 * Find the wb of @bdi which matches both the memcg and blkcg of %current.
 282 * Must be called under rcu_read_lock() which protects the returend wb.
 283 * NULL if not found.
 284 */
 285static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi)
 286{
 287        struct cgroup_subsys_state *memcg_css;
 288        struct bdi_writeback *wb;
 289
 290        memcg_css = task_css(current, memory_cgrp_id);
 291        if (!memcg_css->parent)
 292                return &bdi->wb;
 293
 294        wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id);
 295
 296        /*
 297         * %current's blkcg equals the effective blkcg of its memcg.  No
 298         * need to use the relatively expensive cgroup_get_e_css().
 299         */
 300        if (likely(wb && wb->blkcg_css == task_css(current, io_cgrp_id)))
 301                return wb;
 302        return NULL;
 303}
 304
 305/**
 306 * wb_get_create_current - get or create wb for %current on a bdi
 307 * @bdi: bdi of interest
 308 * @gfp: allocation mask
 309 *
 310 * Equivalent to wb_get_create() on %current's memcg.  This function is
 311 * called from a relatively hot path and optimizes the common cases using
 312 * wb_find_current().
 313 */
 314static inline struct bdi_writeback *
 315wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp)
 316{
 317        struct bdi_writeback *wb;
 318
 319        rcu_read_lock();
 320        wb = wb_find_current(bdi);
 321        if (wb && unlikely(!wb_tryget(wb)))
 322                wb = NULL;
 323        rcu_read_unlock();
 324
 325        if (unlikely(!wb)) {
 326                struct cgroup_subsys_state *memcg_css;
 327
 328                memcg_css = task_get_css(current, memory_cgrp_id);
 329                wb = wb_get_create(bdi, memcg_css, gfp);
 330                css_put(memcg_css);
 331        }
 332        return wb;
 333}
 334
 335/**
 336 * inode_to_wb_is_valid - test whether an inode has a wb associated
 337 * @inode: inode of interest
 338 *
 339 * Returns %true if @inode has a wb associated.  May be called without any
 340 * locking.
 341 */
 342static inline bool inode_to_wb_is_valid(struct inode *inode)
 343{
 344        return inode->i_wb;
 345}
 346
 347/**
 348 * inode_to_wb - determine the wb of an inode
 349 * @inode: inode of interest
 350 *
 351 * Returns the wb @inode is currently associated with.  The caller must be
 352 * holding either @inode->i_lock, @inode->i_mapping->tree_lock, or the
 353 * associated wb's list_lock.
 354 */
 355static inline struct bdi_writeback *inode_to_wb(struct inode *inode)
 356{
 357#ifdef CONFIG_LOCKDEP
 358        WARN_ON_ONCE(debug_locks &&
 359                     (!lockdep_is_held(&inode->i_lock) &&
 360                      !lockdep_is_held(&inode->i_mapping->tree_lock) &&
 361                      !lockdep_is_held(&inode->i_wb->list_lock)));
 362#endif
 363        return inode->i_wb;
 364}
 365
 366/**
 367 * unlocked_inode_to_wb_begin - begin unlocked inode wb access transaction
 368 * @inode: target inode
 369 * @lockedp: temp bool output param, to be passed to the end function
 370 *
 371 * The caller wants to access the wb associated with @inode but isn't
 372 * holding inode->i_lock, mapping->tree_lock or wb->list_lock.  This
 373 * function determines the wb associated with @inode and ensures that the
 374 * association doesn't change until the transaction is finished with
 375 * unlocked_inode_to_wb_end().
 376 *
 377 * The caller must call unlocked_inode_to_wb_end() with *@lockdep
 378 * afterwards and can't sleep during transaction.  IRQ may or may not be
 379 * disabled on return.
 380 */
 381static inline struct bdi_writeback *
 382unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp)
 383{
 384        rcu_read_lock();
 385
 386        /*
 387         * Paired with store_release in inode_switch_wb_work_fn() and
 388         * ensures that we see the new wb if we see cleared I_WB_SWITCH.
 389         */
 390        *lockedp = smp_load_acquire(&inode->i_state) & I_WB_SWITCH;
 391
 392        if (unlikely(*lockedp))
 393                spin_lock_irq(&inode->i_mapping->tree_lock);
 394
 395        /*
 396         * Protected by either !I_WB_SWITCH + rcu_read_lock() or tree_lock.
 397         * inode_to_wb() will bark.  Deref directly.
 398         */
 399        return inode->i_wb;
 400}
 401
 402/**
 403 * unlocked_inode_to_wb_end - end inode wb access transaction
 404 * @inode: target inode
 405 * @locked: *@lockedp from unlocked_inode_to_wb_begin()
 406 */
 407static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked)
 408{
 409        if (unlikely(locked))
 410                spin_unlock_irq(&inode->i_mapping->tree_lock);
 411
 412        rcu_read_unlock();
 413}
 414
 415#else   /* CONFIG_CGROUP_WRITEBACK */
 416
 417static inline bool inode_cgwb_enabled(struct inode *inode)
 418{
 419        return false;
 420}
 421
 422static inline struct bdi_writeback_congested *
 423wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
 424{
 425        atomic_inc(&bdi->wb_congested->refcnt);
 426        return bdi->wb_congested;
 427}
 428
 429static inline void wb_congested_put(struct bdi_writeback_congested *congested)
 430{
 431        if (atomic_dec_and_test(&congested->refcnt))
 432                kfree(congested);
 433}
 434
 435static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi)
 436{
 437        return &bdi->wb;
 438}
 439
 440static inline struct bdi_writeback *
 441wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp)
 442{
 443        return &bdi->wb;
 444}
 445
 446static inline bool inode_to_wb_is_valid(struct inode *inode)
 447{
 448        return true;
 449}
 450
 451static inline struct bdi_writeback *inode_to_wb(struct inode *inode)
 452{
 453        return &inode_to_bdi(inode)->wb;
 454}
 455
 456static inline struct bdi_writeback *
 457unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp)
 458{
 459        return inode_to_wb(inode);
 460}
 461
 462static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked)
 463{
 464}
 465
 466static inline void wb_memcg_offline(struct mem_cgroup *memcg)
 467{
 468}
 469
 470static inline void wb_blkcg_offline(struct blkcg *blkcg)
 471{
 472}
 473
 474static inline int inode_congested(struct inode *inode, int cong_bits)
 475{
 476        return wb_congested(&inode_to_bdi(inode)->wb, cong_bits);
 477}
 478
 479#endif  /* CONFIG_CGROUP_WRITEBACK */
 480
 481static inline int inode_read_congested(struct inode *inode)
 482{
 483        return inode_congested(inode, 1 << WB_sync_congested);
 484}
 485
 486static inline int inode_write_congested(struct inode *inode)
 487{
 488        return inode_congested(inode, 1 << WB_async_congested);
 489}
 490
 491static inline int inode_rw_congested(struct inode *inode)
 492{
 493        return inode_congested(inode, (1 << WB_sync_congested) |
 494                                      (1 << WB_async_congested));
 495}
 496
 497static inline int bdi_congested(struct backing_dev_info *bdi, int cong_bits)
 498{
 499        return wb_congested(&bdi->wb, cong_bits);
 500}
 501
 502static inline int bdi_read_congested(struct backing_dev_info *bdi)
 503{
 504        return bdi_congested(bdi, 1 << WB_sync_congested);
 505}
 506
 507static inline int bdi_write_congested(struct backing_dev_info *bdi)
 508{
 509        return bdi_congested(bdi, 1 << WB_async_congested);
 510}
 511
 512static inline int bdi_rw_congested(struct backing_dev_info *bdi)
 513{
 514        return bdi_congested(bdi, (1 << WB_sync_congested) |
 515                                  (1 << WB_async_congested));
 516}
 517
 518#endif  /* _LINUX_BACKING_DEV_H */
 519