linux/include/linux/backing-dev.h
<<
>>
Prefs
   1/*
   2 * include/linux/backing-dev.h
   3 *
   4 * low-level device information and state which is propagated up through
   5 * to high-level code.
   6 */
   7
   8#ifndef _LINUX_BACKING_DEV_H
   9#define _LINUX_BACKING_DEV_H
  10
  11#include <linux/kernel.h>
  12#include <linux/fs.h>
  13#include <linux/sched.h>
  14#include <linux/blkdev.h>
  15#include <linux/writeback.h>
  16#include <linux/blk-cgroup.h>
  17#include <linux/backing-dev-defs.h>
  18#include <linux/slab.h>
  19
  20static inline struct backing_dev_info *bdi_get(struct backing_dev_info *bdi)
  21{
  22        kref_get(&bdi->refcnt);
  23        return bdi;
  24}
  25
  26void bdi_put(struct backing_dev_info *bdi);
  27
  28__printf(2, 3)
  29int bdi_register(struct backing_dev_info *bdi, const char *fmt, ...);
  30int bdi_register_va(struct backing_dev_info *bdi, const char *fmt,
  31                    va_list args);
  32int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner);
  33void bdi_unregister(struct backing_dev_info *bdi);
  34
  35struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id);
  36static inline struct backing_dev_info *bdi_alloc(gfp_t gfp_mask)
  37{
  38        return bdi_alloc_node(gfp_mask, NUMA_NO_NODE);
  39}
  40
  41void wb_start_writeback(struct bdi_writeback *wb, long nr_pages,
  42                        bool range_cyclic, enum wb_reason reason);
  43void wb_start_background_writeback(struct bdi_writeback *wb);
  44void wb_workfn(struct work_struct *work);
  45void wb_wakeup_delayed(struct bdi_writeback *wb);
  46
  47extern spinlock_t bdi_lock;
  48extern struct list_head bdi_list;
  49
  50extern struct workqueue_struct *bdi_wq;
  51
  52static inline bool wb_has_dirty_io(struct bdi_writeback *wb)
  53{
  54        return test_bit(WB_has_dirty_io, &wb->state);
  55}
  56
  57static inline bool bdi_has_dirty_io(struct backing_dev_info *bdi)
  58{
  59        /*
  60         * @bdi->tot_write_bandwidth is guaranteed to be > 0 if there are
  61         * any dirty wbs.  See wb_update_write_bandwidth().
  62         */
  63        return atomic_long_read(&bdi->tot_write_bandwidth);
  64}
  65
  66static inline void __add_wb_stat(struct bdi_writeback *wb,
  67                                 enum wb_stat_item item, s64 amount)
  68{
  69        __percpu_counter_add(&wb->stat[item], amount, WB_STAT_BATCH);
  70}
  71
  72static inline void __inc_wb_stat(struct bdi_writeback *wb,
  73                                 enum wb_stat_item item)
  74{
  75        __add_wb_stat(wb, item, 1);
  76}
  77
  78static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
  79{
  80        unsigned long flags;
  81
  82        local_irq_save(flags);
  83        __inc_wb_stat(wb, item);
  84        local_irq_restore(flags);
  85}
  86
  87static inline void __dec_wb_stat(struct bdi_writeback *wb,
  88                                 enum wb_stat_item item)
  89{
  90        __add_wb_stat(wb, item, -1);
  91}
  92
  93static inline void dec_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
  94{
  95        unsigned long flags;
  96
  97        local_irq_save(flags);
  98        __dec_wb_stat(wb, item);
  99        local_irq_restore(flags);
 100}
 101
 102static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
 103{
 104        return percpu_counter_read_positive(&wb->stat[item]);
 105}
 106
 107static inline s64 __wb_stat_sum(struct bdi_writeback *wb,
 108                                enum wb_stat_item item)
 109{
 110        return percpu_counter_sum_positive(&wb->stat[item]);
 111}
 112
 113static inline s64 wb_stat_sum(struct bdi_writeback *wb, enum wb_stat_item item)
 114{
 115        s64 sum;
 116        unsigned long flags;
 117
 118        local_irq_save(flags);
 119        sum = __wb_stat_sum(wb, item);
 120        local_irq_restore(flags);
 121
 122        return sum;
 123}
 124
 125extern void wb_writeout_inc(struct bdi_writeback *wb);
 126
 127/*
 128 * maximal error of a stat counter.
 129 */
 130static inline unsigned long wb_stat_error(struct bdi_writeback *wb)
 131{
 132#ifdef CONFIG_SMP
 133        return nr_cpu_ids * WB_STAT_BATCH;
 134#else
 135        return 1;
 136#endif
 137}
 138
 139int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio);
 140int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
 141
 142/*
 143 * Flags in backing_dev_info::capability
 144 *
 145 * The first three flags control whether dirty pages will contribute to the
 146 * VM's accounting and whether writepages() should be called for dirty pages
 147 * (something that would not, for example, be appropriate for ramfs)
 148 *
 149 * WARNING: these flags are closely related and should not normally be
 150 * used separately.  The BDI_CAP_NO_ACCT_AND_WRITEBACK combines these
 151 * three flags into a single convenience macro.
 152 *
 153 * BDI_CAP_NO_ACCT_DIRTY:  Dirty pages shouldn't contribute to accounting
 154 * BDI_CAP_NO_WRITEBACK:   Don't write pages back
 155 * BDI_CAP_NO_ACCT_WB:     Don't automatically account writeback pages
 156 * BDI_CAP_STRICTLIMIT:    Keep number of dirty pages below bdi threshold.
 157 *
 158 * BDI_CAP_CGROUP_WRITEBACK: Supports cgroup-aware writeback.
 159 */
 160#define BDI_CAP_NO_ACCT_DIRTY   0x00000001
 161#define BDI_CAP_NO_WRITEBACK    0x00000002
 162#define BDI_CAP_NO_ACCT_WB      0x00000004
 163#define BDI_CAP_STABLE_WRITES   0x00000008
 164#define BDI_CAP_STRICTLIMIT     0x00000010
 165#define BDI_CAP_CGROUP_WRITEBACK 0x00000020
 166
 167#define BDI_CAP_NO_ACCT_AND_WRITEBACK \
 168        (BDI_CAP_NO_WRITEBACK | BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_ACCT_WB)
 169
 170extern struct backing_dev_info noop_backing_dev_info;
 171
 172/**
 173 * writeback_in_progress - determine whether there is writeback in progress
 174 * @wb: bdi_writeback of interest
 175 *
 176 * Determine whether there is writeback waiting to be handled against a
 177 * bdi_writeback.
 178 */
 179static inline bool writeback_in_progress(struct bdi_writeback *wb)
 180{
 181        return test_bit(WB_writeback_running, &wb->state);
 182}
 183
 184static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
 185{
 186        struct super_block *sb;
 187
 188        if (!inode)
 189                return &noop_backing_dev_info;
 190
 191        sb = inode->i_sb;
 192#ifdef CONFIG_BLOCK
 193        if (sb_is_blkdev_sb(sb))
 194                return I_BDEV(inode)->bd_bdi;
 195#endif
 196        return sb->s_bdi;
 197}
 198
 199static inline int wb_congested(struct bdi_writeback *wb, int cong_bits)
 200{
 201        struct backing_dev_info *bdi = wb->bdi;
 202
 203        if (bdi->congested_fn)
 204                return bdi->congested_fn(bdi->congested_data, cong_bits);
 205        return wb->congested->state & cong_bits;
 206}
 207
 208long congestion_wait(int sync, long timeout);
 209long wait_iff_congested(struct pglist_data *pgdat, int sync, long timeout);
 210int pdflush_proc_obsolete(struct ctl_table *table, int write,
 211                void __user *buffer, size_t *lenp, loff_t *ppos);
 212
 213static inline bool bdi_cap_stable_pages_required(struct backing_dev_info *bdi)
 214{
 215        return bdi->capabilities & BDI_CAP_STABLE_WRITES;
 216}
 217
 218static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi)
 219{
 220        return !(bdi->capabilities & BDI_CAP_NO_WRITEBACK);
 221}
 222
 223static inline bool bdi_cap_account_dirty(struct backing_dev_info *bdi)
 224{
 225        return !(bdi->capabilities & BDI_CAP_NO_ACCT_DIRTY);
 226}
 227
 228static inline bool bdi_cap_account_writeback(struct backing_dev_info *bdi)
 229{
 230        /* Paranoia: BDI_CAP_NO_WRITEBACK implies BDI_CAP_NO_ACCT_WB */
 231        return !(bdi->capabilities & (BDI_CAP_NO_ACCT_WB |
 232                                      BDI_CAP_NO_WRITEBACK));
 233}
 234
 235static inline bool mapping_cap_writeback_dirty(struct address_space *mapping)
 236{
 237        return bdi_cap_writeback_dirty(inode_to_bdi(mapping->host));
 238}
 239
 240static inline bool mapping_cap_account_dirty(struct address_space *mapping)
 241{
 242        return bdi_cap_account_dirty(inode_to_bdi(mapping->host));
 243}
 244
 245static inline int bdi_sched_wait(void *word)
 246{
 247        schedule();
 248        return 0;
 249}
 250
 251#ifdef CONFIG_CGROUP_WRITEBACK
 252
 253struct bdi_writeback_congested *
 254wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp);
 255void wb_congested_put(struct bdi_writeback_congested *congested);
 256struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
 257                                    struct cgroup_subsys_state *memcg_css,
 258                                    gfp_t gfp);
 259void wb_memcg_offline(struct mem_cgroup *memcg);
 260void wb_blkcg_offline(struct blkcg *blkcg);
 261int inode_congested(struct inode *inode, int cong_bits);
 262
 263/**
 264 * inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode
 265 * @inode: inode of interest
 266 *
 267 * cgroup writeback requires support from both the bdi and filesystem.
 268 * Also, both memcg and iocg have to be on the default hierarchy.  Test
 269 * whether all conditions are met.
 270 *
 271 * Note that the test result may change dynamically on the same inode
 272 * depending on how memcg and iocg are configured.
 273 */
 274static inline bool inode_cgwb_enabled(struct inode *inode)
 275{
 276        struct backing_dev_info *bdi = inode_to_bdi(inode);
 277
 278        return cgroup_subsys_on_dfl(memory_cgrp_subsys) &&
 279                cgroup_subsys_on_dfl(io_cgrp_subsys) &&
 280                bdi_cap_account_dirty(bdi) &&
 281                (bdi->capabilities & BDI_CAP_CGROUP_WRITEBACK) &&
 282                (inode->i_sb->s_iflags & SB_I_CGROUPWB);
 283}
 284
 285/**
 286 * wb_find_current - find wb for %current on a bdi
 287 * @bdi: bdi of interest
 288 *
 289 * Find the wb of @bdi which matches both the memcg and blkcg of %current.
 290 * Must be called under rcu_read_lock() which protects the returend wb.
 291 * NULL if not found.
 292 */
 293static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi)
 294{
 295        struct cgroup_subsys_state *memcg_css;
 296        struct bdi_writeback *wb;
 297
 298        memcg_css = task_css(current, memory_cgrp_id);
 299        if (!memcg_css->parent)
 300                return &bdi->wb;
 301
 302        wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id);
 303
 304        /*
 305         * %current's blkcg equals the effective blkcg of its memcg.  No
 306         * need to use the relatively expensive cgroup_get_e_css().
 307         */
 308        if (likely(wb && wb->blkcg_css == task_css(current, io_cgrp_id)))
 309                return wb;
 310        return NULL;
 311}
 312
 313/**
 314 * wb_get_create_current - get or create wb for %current on a bdi
 315 * @bdi: bdi of interest
 316 * @gfp: allocation mask
 317 *
 318 * Equivalent to wb_get_create() on %current's memcg.  This function is
 319 * called from a relatively hot path and optimizes the common cases using
 320 * wb_find_current().
 321 */
 322static inline struct bdi_writeback *
 323wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp)
 324{
 325        struct bdi_writeback *wb;
 326
 327        rcu_read_lock();
 328        wb = wb_find_current(bdi);
 329        if (wb && unlikely(!wb_tryget(wb)))
 330                wb = NULL;
 331        rcu_read_unlock();
 332
 333        if (unlikely(!wb)) {
 334                struct cgroup_subsys_state *memcg_css;
 335
 336                memcg_css = task_get_css(current, memory_cgrp_id);
 337                wb = wb_get_create(bdi, memcg_css, gfp);
 338                css_put(memcg_css);
 339        }
 340        return wb;
 341}
 342
 343/**
 344 * inode_to_wb_is_valid - test whether an inode has a wb associated
 345 * @inode: inode of interest
 346 *
 347 * Returns %true if @inode has a wb associated.  May be called without any
 348 * locking.
 349 */
 350static inline bool inode_to_wb_is_valid(struct inode *inode)
 351{
 352        return inode->i_wb;
 353}
 354
 355/**
 356 * inode_to_wb - determine the wb of an inode
 357 * @inode: inode of interest
 358 *
 359 * Returns the wb @inode is currently associated with.  The caller must be
 360 * holding either @inode->i_lock, @inode->i_mapping->tree_lock, or the
 361 * associated wb's list_lock.
 362 */
 363static inline struct bdi_writeback *inode_to_wb(struct inode *inode)
 364{
 365#ifdef CONFIG_LOCKDEP
 366        WARN_ON_ONCE(debug_locks &&
 367                     (!lockdep_is_held(&inode->i_lock) &&
 368                      !lockdep_is_held(&inode->i_mapping->tree_lock) &&
 369                      !lockdep_is_held(&inode->i_wb->list_lock)));
 370#endif
 371        return inode->i_wb;
 372}
 373
 374/**
 375 * unlocked_inode_to_wb_begin - begin unlocked inode wb access transaction
 376 * @inode: target inode
 377 * @lockedp: temp bool output param, to be passed to the end function
 378 *
 379 * The caller wants to access the wb associated with @inode but isn't
 380 * holding inode->i_lock, mapping->tree_lock or wb->list_lock.  This
 381 * function determines the wb associated with @inode and ensures that the
 382 * association doesn't change until the transaction is finished with
 383 * unlocked_inode_to_wb_end().
 384 *
 385 * The caller must call unlocked_inode_to_wb_end() with *@lockdep
 386 * afterwards and can't sleep during transaction.  IRQ may or may not be
 387 * disabled on return.
 388 */
 389static inline struct bdi_writeback *
 390unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp)
 391{
 392        rcu_read_lock();
 393
 394        /*
 395         * Paired with store_release in inode_switch_wb_work_fn() and
 396         * ensures that we see the new wb if we see cleared I_WB_SWITCH.
 397         */
 398        *lockedp = smp_load_acquire(&inode->i_state) & I_WB_SWITCH;
 399
 400        if (unlikely(*lockedp))
 401                spin_lock_irq(&inode->i_mapping->tree_lock);
 402
 403        /*
 404         * Protected by either !I_WB_SWITCH + rcu_read_lock() or tree_lock.
 405         * inode_to_wb() will bark.  Deref directly.
 406         */
 407        return inode->i_wb;
 408}
 409
 410/**
 411 * unlocked_inode_to_wb_end - end inode wb access transaction
 412 * @inode: target inode
 413 * @locked: *@lockedp from unlocked_inode_to_wb_begin()
 414 */
 415static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked)
 416{
 417        if (unlikely(locked))
 418                spin_unlock_irq(&inode->i_mapping->tree_lock);
 419
 420        rcu_read_unlock();
 421}
 422
 423#else   /* CONFIG_CGROUP_WRITEBACK */
 424
 425static inline bool inode_cgwb_enabled(struct inode *inode)
 426{
 427        return false;
 428}
 429
 430static inline struct bdi_writeback_congested *
 431wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
 432{
 433        atomic_inc(&bdi->wb_congested->refcnt);
 434        return bdi->wb_congested;
 435}
 436
 437static inline void wb_congested_put(struct bdi_writeback_congested *congested)
 438{
 439        if (atomic_dec_and_test(&congested->refcnt))
 440                kfree(congested);
 441}
 442
 443static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi)
 444{
 445        return &bdi->wb;
 446}
 447
 448static inline struct bdi_writeback *
 449wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp)
 450{
 451        return &bdi->wb;
 452}
 453
 454static inline bool inode_to_wb_is_valid(struct inode *inode)
 455{
 456        return true;
 457}
 458
 459static inline struct bdi_writeback *inode_to_wb(struct inode *inode)
 460{
 461        return &inode_to_bdi(inode)->wb;
 462}
 463
 464static inline struct bdi_writeback *
 465unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp)
 466{
 467        return inode_to_wb(inode);
 468}
 469
 470static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked)
 471{
 472}
 473
 474static inline void wb_memcg_offline(struct mem_cgroup *memcg)
 475{
 476}
 477
 478static inline void wb_blkcg_offline(struct blkcg *blkcg)
 479{
 480}
 481
 482static inline int inode_congested(struct inode *inode, int cong_bits)
 483{
 484        return wb_congested(&inode_to_bdi(inode)->wb, cong_bits);
 485}
 486
 487#endif  /* CONFIG_CGROUP_WRITEBACK */
 488
 489static inline int inode_read_congested(struct inode *inode)
 490{
 491        return inode_congested(inode, 1 << WB_sync_congested);
 492}
 493
 494static inline int inode_write_congested(struct inode *inode)
 495{
 496        return inode_congested(inode, 1 << WB_async_congested);
 497}
 498
 499static inline int inode_rw_congested(struct inode *inode)
 500{
 501        return inode_congested(inode, (1 << WB_sync_congested) |
 502                                      (1 << WB_async_congested));
 503}
 504
 505static inline int bdi_congested(struct backing_dev_info *bdi, int cong_bits)
 506{
 507        return wb_congested(&bdi->wb, cong_bits);
 508}
 509
 510static inline int bdi_read_congested(struct backing_dev_info *bdi)
 511{
 512        return bdi_congested(bdi, 1 << WB_sync_congested);
 513}
 514
 515static inline int bdi_write_congested(struct backing_dev_info *bdi)
 516{
 517        return bdi_congested(bdi, 1 << WB_async_congested);
 518}
 519
 520static inline int bdi_rw_congested(struct backing_dev_info *bdi)
 521{
 522        return bdi_congested(bdi, (1 << WB_sync_congested) |
 523                                  (1 << WB_async_congested));
 524}
 525
 526#endif  /* _LINUX_BACKING_DEV_H */
 527