linux/include/linux/backing-dev.h
<<
>>
Prefs
   1/*
   2 * include/linux/backing-dev.h
   3 *
   4 * low-level device information and state which is propagated up through
   5 * to high-level code.
   6 */
   7
   8#ifndef _LINUX_BACKING_DEV_H
   9#define _LINUX_BACKING_DEV_H
  10
  11#include <linux/kernel.h>
  12#include <linux/fs.h>
  13#include <linux/sched.h>
  14#include <linux/blkdev.h>
  15#include <linux/writeback.h>
  16#include <linux/blk-cgroup.h>
  17#include <linux/backing-dev-defs.h>
  18#include <linux/slab.h>
  19
  20int __must_check bdi_init(struct backing_dev_info *bdi);
  21void bdi_exit(struct backing_dev_info *bdi);
  22
  23__printf(3, 4)
  24int bdi_register(struct backing_dev_info *bdi, struct device *parent,
  25                const char *fmt, ...);
  26int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
  27void bdi_unregister(struct backing_dev_info *bdi);
  28
  29int __must_check bdi_setup_and_register(struct backing_dev_info *, char *);
  30void bdi_destroy(struct backing_dev_info *bdi);
  31
  32void wb_start_writeback(struct bdi_writeback *wb, long nr_pages,
  33                        bool range_cyclic, enum wb_reason reason);
  34void wb_start_background_writeback(struct bdi_writeback *wb);
  35void wb_workfn(struct work_struct *work);
  36void wb_wakeup_delayed(struct bdi_writeback *wb);
  37
  38extern spinlock_t bdi_lock;
  39extern struct list_head bdi_list;
  40
  41extern struct workqueue_struct *bdi_wq;
  42
  43static inline bool wb_has_dirty_io(struct bdi_writeback *wb)
  44{
  45        return test_bit(WB_has_dirty_io, &wb->state);
  46}
  47
  48static inline bool bdi_has_dirty_io(struct backing_dev_info *bdi)
  49{
  50        /*
  51         * @bdi->tot_write_bandwidth is guaranteed to be > 0 if there are
  52         * any dirty wbs.  See wb_update_write_bandwidth().
  53         */
  54        return atomic_long_read(&bdi->tot_write_bandwidth);
  55}
  56
  57static inline void __add_wb_stat(struct bdi_writeback *wb,
  58                                 enum wb_stat_item item, s64 amount)
  59{
  60        __percpu_counter_add(&wb->stat[item], amount, WB_STAT_BATCH);
  61}
  62
  63static inline void __inc_wb_stat(struct bdi_writeback *wb,
  64                                 enum wb_stat_item item)
  65{
  66        __add_wb_stat(wb, item, 1);
  67}
  68
  69static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
  70{
  71        unsigned long flags;
  72
  73        local_irq_save(flags);
  74        __inc_wb_stat(wb, item);
  75        local_irq_restore(flags);
  76}
  77
  78static inline void __dec_wb_stat(struct bdi_writeback *wb,
  79                                 enum wb_stat_item item)
  80{
  81        __add_wb_stat(wb, item, -1);
  82}
  83
  84static inline void dec_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
  85{
  86        unsigned long flags;
  87
  88        local_irq_save(flags);
  89        __dec_wb_stat(wb, item);
  90        local_irq_restore(flags);
  91}
  92
  93static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
  94{
  95        return percpu_counter_read_positive(&wb->stat[item]);
  96}
  97
  98static inline s64 __wb_stat_sum(struct bdi_writeback *wb,
  99                                enum wb_stat_item item)
 100{
 101        return percpu_counter_sum_positive(&wb->stat[item]);
 102}
 103
 104static inline s64 wb_stat_sum(struct bdi_writeback *wb, enum wb_stat_item item)
 105{
 106        s64 sum;
 107        unsigned long flags;
 108
 109        local_irq_save(flags);
 110        sum = __wb_stat_sum(wb, item);
 111        local_irq_restore(flags);
 112
 113        return sum;
 114}
 115
 116extern void wb_writeout_inc(struct bdi_writeback *wb);
 117
 118/*
 119 * maximal error of a stat counter.
 120 */
 121static inline unsigned long wb_stat_error(struct bdi_writeback *wb)
 122{
 123#ifdef CONFIG_SMP
 124        return nr_cpu_ids * WB_STAT_BATCH;
 125#else
 126        return 1;
 127#endif
 128}
 129
 130int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio);
 131int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
 132
 133/*
 134 * Flags in backing_dev_info::capability
 135 *
 136 * The first three flags control whether dirty pages will contribute to the
 137 * VM's accounting and whether writepages() should be called for dirty pages
 138 * (something that would not, for example, be appropriate for ramfs)
 139 *
 140 * WARNING: these flags are closely related and should not normally be
 141 * used separately.  The BDI_CAP_NO_ACCT_AND_WRITEBACK combines these
 142 * three flags into a single convenience macro.
 143 *
 144 * BDI_CAP_NO_ACCT_DIRTY:  Dirty pages shouldn't contribute to accounting
 145 * BDI_CAP_NO_WRITEBACK:   Don't write pages back
 146 * BDI_CAP_NO_ACCT_WB:     Don't automatically account writeback pages
 147 * BDI_CAP_STRICTLIMIT:    Keep number of dirty pages below bdi threshold.
 148 *
 149 * BDI_CAP_CGROUP_WRITEBACK: Supports cgroup-aware writeback.
 150 */
 151#define BDI_CAP_NO_ACCT_DIRTY   0x00000001
 152#define BDI_CAP_NO_WRITEBACK    0x00000002
 153#define BDI_CAP_NO_ACCT_WB      0x00000004
 154#define BDI_CAP_STABLE_WRITES   0x00000008
 155#define BDI_CAP_STRICTLIMIT     0x00000010
 156#define BDI_CAP_CGROUP_WRITEBACK 0x00000020
 157
 158#define BDI_CAP_NO_ACCT_AND_WRITEBACK \
 159        (BDI_CAP_NO_WRITEBACK | BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_ACCT_WB)
 160
 161extern struct backing_dev_info noop_backing_dev_info;
 162
 163/**
 164 * writeback_in_progress - determine whether there is writeback in progress
 165 * @wb: bdi_writeback of interest
 166 *
 167 * Determine whether there is writeback waiting to be handled against a
 168 * bdi_writeback.
 169 */
 170static inline bool writeback_in_progress(struct bdi_writeback *wb)
 171{
 172        return test_bit(WB_writeback_running, &wb->state);
 173}
 174
 175static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
 176{
 177        struct super_block *sb;
 178
 179        if (!inode)
 180                return &noop_backing_dev_info;
 181
 182        sb = inode->i_sb;
 183#ifdef CONFIG_BLOCK
 184        if (sb_is_blkdev_sb(sb))
 185                return blk_get_backing_dev_info(I_BDEV(inode));
 186#endif
 187        return sb->s_bdi;
 188}
 189
 190static inline int wb_congested(struct bdi_writeback *wb, int cong_bits)
 191{
 192        struct backing_dev_info *bdi = wb->bdi;
 193
 194        if (bdi->congested_fn)
 195                return bdi->congested_fn(bdi->congested_data, cong_bits);
 196        return wb->congested->state & cong_bits;
 197}
 198
 199long congestion_wait(int sync, long timeout);
 200long wait_iff_congested(struct zone *zone, int sync, long timeout);
 201int pdflush_proc_obsolete(struct ctl_table *table, int write,
 202                void __user *buffer, size_t *lenp, loff_t *ppos);
 203
 204static inline bool bdi_cap_stable_pages_required(struct backing_dev_info *bdi)
 205{
 206        return bdi->capabilities & BDI_CAP_STABLE_WRITES;
 207}
 208
 209static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi)
 210{
 211        return !(bdi->capabilities & BDI_CAP_NO_WRITEBACK);
 212}
 213
 214static inline bool bdi_cap_account_dirty(struct backing_dev_info *bdi)
 215{
 216        return !(bdi->capabilities & BDI_CAP_NO_ACCT_DIRTY);
 217}
 218
 219static inline bool bdi_cap_account_writeback(struct backing_dev_info *bdi)
 220{
 221        /* Paranoia: BDI_CAP_NO_WRITEBACK implies BDI_CAP_NO_ACCT_WB */
 222        return !(bdi->capabilities & (BDI_CAP_NO_ACCT_WB |
 223                                      BDI_CAP_NO_WRITEBACK));
 224}
 225
 226static inline bool mapping_cap_writeback_dirty(struct address_space *mapping)
 227{
 228        return bdi_cap_writeback_dirty(inode_to_bdi(mapping->host));
 229}
 230
 231static inline bool mapping_cap_account_dirty(struct address_space *mapping)
 232{
 233        return bdi_cap_account_dirty(inode_to_bdi(mapping->host));
 234}
 235
 236static inline int bdi_sched_wait(void *word)
 237{
 238        schedule();
 239        return 0;
 240}
 241
 242#ifdef CONFIG_CGROUP_WRITEBACK
 243
 244struct bdi_writeback_congested *
 245wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp);
 246void wb_congested_put(struct bdi_writeback_congested *congested);
 247struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
 248                                    struct cgroup_subsys_state *memcg_css,
 249                                    gfp_t gfp);
 250void wb_memcg_offline(struct mem_cgroup *memcg);
 251void wb_blkcg_offline(struct blkcg *blkcg);
 252int inode_congested(struct inode *inode, int cong_bits);
 253
 254/**
 255 * inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode
 256 * @inode: inode of interest
 257 *
 258 * cgroup writeback requires support from both the bdi and filesystem.
 259 * Also, both memcg and iocg have to be on the default hierarchy.  Test
 260 * whether all conditions are met.
 261 *
 262 * Note that the test result may change dynamically on the same inode
 263 * depending on how memcg and iocg are configured.
 264 */
 265static inline bool inode_cgwb_enabled(struct inode *inode)
 266{
 267        struct backing_dev_info *bdi = inode_to_bdi(inode);
 268
 269        return cgroup_subsys_on_dfl(memory_cgrp_subsys) &&
 270                cgroup_subsys_on_dfl(io_cgrp_subsys) &&
 271                bdi_cap_account_dirty(bdi) &&
 272                (bdi->capabilities & BDI_CAP_CGROUP_WRITEBACK) &&
 273                (inode->i_sb->s_iflags & SB_I_CGROUPWB);
 274}
 275
 276/**
 277 * wb_find_current - find wb for %current on a bdi
 278 * @bdi: bdi of interest
 279 *
 280 * Find the wb of @bdi which matches both the memcg and blkcg of %current.
 281 * Must be called under rcu_read_lock() which protects the returend wb.
 282 * NULL if not found.
 283 */
 284static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi)
 285{
 286        struct cgroup_subsys_state *memcg_css;
 287        struct bdi_writeback *wb;
 288
 289        memcg_css = task_css(current, memory_cgrp_id);
 290        if (!memcg_css->parent)
 291                return &bdi->wb;
 292
 293        wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id);
 294
 295        /*
 296         * %current's blkcg equals the effective blkcg of its memcg.  No
 297         * need to use the relatively expensive cgroup_get_e_css().
 298         */
 299        if (likely(wb && wb->blkcg_css == task_css(current, io_cgrp_id)))
 300                return wb;
 301        return NULL;
 302}
 303
 304/**
 305 * wb_get_create_current - get or create wb for %current on a bdi
 306 * @bdi: bdi of interest
 307 * @gfp: allocation mask
 308 *
 309 * Equivalent to wb_get_create() on %current's memcg.  This function is
 310 * called from a relatively hot path and optimizes the common cases using
 311 * wb_find_current().
 312 */
 313static inline struct bdi_writeback *
 314wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp)
 315{
 316        struct bdi_writeback *wb;
 317
 318        rcu_read_lock();
 319        wb = wb_find_current(bdi);
 320        if (wb && unlikely(!wb_tryget(wb)))
 321                wb = NULL;
 322        rcu_read_unlock();
 323
 324        if (unlikely(!wb)) {
 325                struct cgroup_subsys_state *memcg_css;
 326
 327                memcg_css = task_get_css(current, memory_cgrp_id);
 328                wb = wb_get_create(bdi, memcg_css, gfp);
 329                css_put(memcg_css);
 330        }
 331        return wb;
 332}
 333
 334/**
 335 * inode_to_wb_is_valid - test whether an inode has a wb associated
 336 * @inode: inode of interest
 337 *
 338 * Returns %true if @inode has a wb associated.  May be called without any
 339 * locking.
 340 */
 341static inline bool inode_to_wb_is_valid(struct inode *inode)
 342{
 343        return inode->i_wb;
 344}
 345
 346/**
 347 * inode_to_wb - determine the wb of an inode
 348 * @inode: inode of interest
 349 *
 350 * Returns the wb @inode is currently associated with.  The caller must be
 351 * holding either @inode->i_lock, @inode->i_mapping->tree_lock, or the
 352 * associated wb's list_lock.
 353 */
 354static inline struct bdi_writeback *inode_to_wb(struct inode *inode)
 355{
 356#ifdef CONFIG_LOCKDEP
 357        WARN_ON_ONCE(debug_locks &&
 358                     (!lockdep_is_held(&inode->i_lock) &&
 359                      !lockdep_is_held(&inode->i_mapping->tree_lock) &&
 360                      !lockdep_is_held(&inode->i_wb->list_lock)));
 361#endif
 362        return inode->i_wb;
 363}
 364
 365/**
 366 * unlocked_inode_to_wb_begin - begin unlocked inode wb access transaction
 367 * @inode: target inode
 368 * @lockedp: temp bool output param, to be passed to the end function
 369 *
 370 * The caller wants to access the wb associated with @inode but isn't
 371 * holding inode->i_lock, mapping->tree_lock or wb->list_lock.  This
 372 * function determines the wb associated with @inode and ensures that the
 373 * association doesn't change until the transaction is finished with
 374 * unlocked_inode_to_wb_end().
 375 *
 376 * The caller must call unlocked_inode_to_wb_end() with *@lockdep
 377 * afterwards and can't sleep during transaction.  IRQ may or may not be
 378 * disabled on return.
 379 */
 380static inline struct bdi_writeback *
 381unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp)
 382{
 383        rcu_read_lock();
 384
 385        /*
 386         * Paired with store_release in inode_switch_wb_work_fn() and
 387         * ensures that we see the new wb if we see cleared I_WB_SWITCH.
 388         */
 389        *lockedp = smp_load_acquire(&inode->i_state) & I_WB_SWITCH;
 390
 391        if (unlikely(*lockedp))
 392                spin_lock_irq(&inode->i_mapping->tree_lock);
 393
 394        /*
 395         * Protected by either !I_WB_SWITCH + rcu_read_lock() or tree_lock.
 396         * inode_to_wb() will bark.  Deref directly.
 397         */
 398        return inode->i_wb;
 399}
 400
 401/**
 402 * unlocked_inode_to_wb_end - end inode wb access transaction
 403 * @inode: target inode
 404 * @locked: *@lockedp from unlocked_inode_to_wb_begin()
 405 */
 406static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked)
 407{
 408        if (unlikely(locked))
 409                spin_unlock_irq(&inode->i_mapping->tree_lock);
 410
 411        rcu_read_unlock();
 412}
 413
 414#else   /* CONFIG_CGROUP_WRITEBACK */
 415
 416static inline bool inode_cgwb_enabled(struct inode *inode)
 417{
 418        return false;
 419}
 420
 421static inline struct bdi_writeback_congested *
 422wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
 423{
 424        atomic_inc(&bdi->wb_congested->refcnt);
 425        return bdi->wb_congested;
 426}
 427
 428static inline void wb_congested_put(struct bdi_writeback_congested *congested)
 429{
 430        if (atomic_dec_and_test(&congested->refcnt))
 431                kfree(congested);
 432}
 433
 434static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi)
 435{
 436        return &bdi->wb;
 437}
 438
 439static inline struct bdi_writeback *
 440wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp)
 441{
 442        return &bdi->wb;
 443}
 444
 445static inline bool inode_to_wb_is_valid(struct inode *inode)
 446{
 447        return true;
 448}
 449
 450static inline struct bdi_writeback *inode_to_wb(struct inode *inode)
 451{
 452        return &inode_to_bdi(inode)->wb;
 453}
 454
 455static inline struct bdi_writeback *
 456unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp)
 457{
 458        return inode_to_wb(inode);
 459}
 460
 461static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked)
 462{
 463}
 464
 465static inline void wb_memcg_offline(struct mem_cgroup *memcg)
 466{
 467}
 468
 469static inline void wb_blkcg_offline(struct blkcg *blkcg)
 470{
 471}
 472
 473static inline int inode_congested(struct inode *inode, int cong_bits)
 474{
 475        return wb_congested(&inode_to_bdi(inode)->wb, cong_bits);
 476}
 477
 478#endif  /* CONFIG_CGROUP_WRITEBACK */
 479
 480static inline int inode_read_congested(struct inode *inode)
 481{
 482        return inode_congested(inode, 1 << WB_sync_congested);
 483}
 484
 485static inline int inode_write_congested(struct inode *inode)
 486{
 487        return inode_congested(inode, 1 << WB_async_congested);
 488}
 489
 490static inline int inode_rw_congested(struct inode *inode)
 491{
 492        return inode_congested(inode, (1 << WB_sync_congested) |
 493                                      (1 << WB_async_congested));
 494}
 495
 496static inline int bdi_congested(struct backing_dev_info *bdi, int cong_bits)
 497{
 498        return wb_congested(&bdi->wb, cong_bits);
 499}
 500
 501static inline int bdi_read_congested(struct backing_dev_info *bdi)
 502{
 503        return bdi_congested(bdi, 1 << WB_sync_congested);
 504}
 505
 506static inline int bdi_write_congested(struct backing_dev_info *bdi)
 507{
 508        return bdi_congested(bdi, 1 << WB_async_congested);
 509}
 510
 511static inline int bdi_rw_congested(struct backing_dev_info *bdi)
 512{
 513        return bdi_congested(bdi, (1 << WB_sync_congested) |
 514                                  (1 << WB_async_congested));
 515}
 516
 517#endif  /* _LINUX_BACKING_DEV_H */
 518