linux/include/linux/writeback.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2/*
   3 * include/linux/writeback.h
   4 */
   5#ifndef WRITEBACK_H
   6#define WRITEBACK_H
   7
   8#include <linux/sched.h>
   9#include <linux/workqueue.h>
  10#include <linux/fs.h>
  11#include <linux/flex_proportions.h>
  12#include <linux/backing-dev-defs.h>
  13#include <linux/blk_types.h>
  14
  15struct bio;
  16
  17DECLARE_PER_CPU(int, dirty_throttle_leaks);
  18
  19/*
  20 * The 1/4 region under the global dirty thresh is for smooth dirty throttling:
  21 *
  22 *      (thresh - thresh/DIRTY_FULL_SCOPE, thresh)
  23 *
  24 * Further beyond, all dirtier tasks will enter a loop waiting (possibly long
  25 * time) for the dirty pages to drop, unless written enough pages.
  26 *
  27 * The global dirty threshold is normally equal to the global dirty limit,
  28 * except when the system suddenly allocates a lot of anonymous memory and
  29 * knocks down the global dirty threshold quickly, in which case the global
  30 * dirty limit will follow down slowly to prevent livelocking all dirtier tasks.
  31 */
  32#define DIRTY_SCOPE             8
  33#define DIRTY_FULL_SCOPE        (DIRTY_SCOPE / 2)
  34
  35struct backing_dev_info;
  36
  37/*
  38 * fs/fs-writeback.c
  39 */
  40enum writeback_sync_modes {
  41        WB_SYNC_NONE,   /* Don't wait on anything */
  42        WB_SYNC_ALL,    /* Wait on every mapping */
  43};
  44
  45/*
  46 * why some writeback work was initiated
  47 */
  48enum wb_reason {
  49        WB_REASON_BACKGROUND,
  50        WB_REASON_VMSCAN,
  51        WB_REASON_SYNC,
  52        WB_REASON_PERIODIC,
  53        WB_REASON_LAPTOP_TIMER,
  54        WB_REASON_FREE_MORE_MEM,
  55        WB_REASON_FS_FREE_SPACE,
  56        /*
  57         * There is no bdi forker thread any more and works are done
  58         * by emergency worker, however, this is TPs userland visible
  59         * and we'll be exposing exactly the same information,
  60         * so it has a mismatch name.
  61         */
  62        WB_REASON_FORKER_THREAD,
  63
  64        WB_REASON_MAX,
  65};
  66
  67/*
  68 * A control structure which tells the writeback code what to do.  These are
  69 * always on the stack, and hence need no locking.  They are always initialised
  70 * in a manner such that unspecified fields are set to zero.
  71 */
  72struct writeback_control {
  73        long nr_to_write;               /* Write this many pages, and decrement
  74                                           this for each page written */
  75        long pages_skipped;             /* Pages which were not written */
  76
  77        /*
  78         * For a_ops->writepages(): if start or end are non-zero then this is
  79         * a hint that the filesystem need only write out the pages inside that
  80         * byterange.  The byte at `end' is included in the writeout request.
  81         */
  82        loff_t range_start;
  83        loff_t range_end;
  84
  85        enum writeback_sync_modes sync_mode;
  86
  87        unsigned for_kupdate:1;         /* A kupdate writeback */
  88        unsigned for_background:1;      /* A background writeback */
  89        unsigned tagged_writepages:1;   /* tag-and-write to avoid livelock */
  90        unsigned for_reclaim:1;         /* Invoked from the page allocator */
  91        unsigned range_cyclic:1;        /* range_start is cyclic */
  92        unsigned for_sync:1;            /* sync(2) WB_SYNC_ALL writeback */
  93#ifdef CONFIG_CGROUP_WRITEBACK
  94        struct bdi_writeback *wb;       /* wb this writeback is issued under */
  95        struct inode *inode;            /* inode being written out */
  96
  97        /* foreign inode detection, see wbc_detach_inode() */
  98        int wb_id;                      /* current wb id */
  99        int wb_lcand_id;                /* last foreign candidate wb id */
 100        int wb_tcand_id;                /* this foreign candidate wb id */
 101        size_t wb_bytes;                /* bytes written by current wb */
 102        size_t wb_lcand_bytes;          /* bytes written by last candidate */
 103        size_t wb_tcand_bytes;          /* bytes written by this candidate */
 104#endif
 105};
 106
 107static inline int wbc_to_write_flags(struct writeback_control *wbc)
 108{
 109        if (wbc->sync_mode == WB_SYNC_ALL)
 110                return REQ_SYNC;
 111        else if (wbc->for_kupdate || wbc->for_background)
 112                return REQ_BACKGROUND;
 113
 114        return 0;
 115}
 116
 117/*
 118 * A wb_domain represents a domain that wb's (bdi_writeback's) belong to
 119 * and are measured against each other in.  There always is one global
 120 * domain, global_wb_domain, that every wb in the system is a member of.
 121 * This allows measuring the relative bandwidth of each wb to distribute
 122 * dirtyable memory accordingly.
 123 */
 124struct wb_domain {
 125        spinlock_t lock;
 126
 127        /*
 128         * Scale the writeback cache size proportional to the relative
 129         * writeout speed.
 130         *
 131         * We do this by keeping a floating proportion between BDIs, based
 132         * on page writeback completions [end_page_writeback()]. Those
 133         * devices that write out pages fastest will get the larger share,
 134         * while the slower will get a smaller share.
 135         *
 136         * We use page writeout completions because we are interested in
 137         * getting rid of dirty pages. Having them written out is the
 138         * primary goal.
 139         *
 140         * We introduce a concept of time, a period over which we measure
 141         * these events, because demand can/will vary over time. The length
 142         * of this period itself is measured in page writeback completions.
 143         */
 144        struct fprop_global completions;
 145        struct timer_list period_timer; /* timer for aging of completions */
 146        unsigned long period_time;
 147
 148        /*
 149         * The dirtyable memory and dirty threshold could be suddenly
 150         * knocked down by a large amount (eg. on the startup of KVM in a
 151         * swapless system). This may throw the system into deep dirty
 152         * exceeded state and throttle heavy/light dirtiers alike. To
 153         * retain good responsiveness, maintain global_dirty_limit for
 154         * tracking slowly down to the knocked down dirty threshold.
 155         *
 156         * Both fields are protected by ->lock.
 157         */
 158        unsigned long dirty_limit_tstamp;
 159        unsigned long dirty_limit;
 160};
 161
 162/**
 163 * wb_domain_size_changed - memory available to a wb_domain has changed
 164 * @dom: wb_domain of interest
 165 *
 166 * This function should be called when the amount of memory available to
 167 * @dom has changed.  It resets @dom's dirty limit parameters to prevent
 168 * the past values which don't match the current configuration from skewing
 169 * dirty throttling.  Without this, when memory size of a wb_domain is
 170 * greatly reduced, the dirty throttling logic may allow too many pages to
 171 * be dirtied leading to consecutive unnecessary OOMs and may get stuck in
 172 * that situation.
 173 */
 174static inline void wb_domain_size_changed(struct wb_domain *dom)
 175{
 176        spin_lock(&dom->lock);
 177        dom->dirty_limit_tstamp = jiffies;
 178        dom->dirty_limit = 0;
 179        spin_unlock(&dom->lock);
 180}
 181
 182/*
 183 * fs/fs-writeback.c
 184 */     
 185struct bdi_writeback;
 186void writeback_inodes_sb(struct super_block *, enum wb_reason reason);
 187void writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
 188                                                        enum wb_reason reason);
 189bool try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason);
 190bool try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
 191                                   enum wb_reason reason);
 192void sync_inodes_sb(struct super_block *);
 193void wakeup_flusher_threads(long nr_pages, enum wb_reason reason);
 194void inode_wait_for_writeback(struct inode *inode);
 195
 196/* writeback.h requires fs.h; it, too, is not included from here. */
 197static inline void wait_on_inode(struct inode *inode)
 198{
 199        might_sleep();
 200        wait_on_bit(&inode->i_state, __I_NEW, TASK_UNINTERRUPTIBLE);
 201}
 202
 203#ifdef CONFIG_CGROUP_WRITEBACK
 204
 205#include <linux/cgroup.h>
 206#include <linux/bio.h>
 207
 208void __inode_attach_wb(struct inode *inode, struct page *page);
 209void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
 210                                 struct inode *inode)
 211        __releases(&inode->i_lock);
 212void wbc_detach_inode(struct writeback_control *wbc);
 213void wbc_account_io(struct writeback_control *wbc, struct page *page,
 214                    size_t bytes);
 215void cgroup_writeback_umount(void);
 216
 217/**
 218 * inode_attach_wb - associate an inode with its wb
 219 * @inode: inode of interest
 220 * @page: page being dirtied (may be NULL)
 221 *
 222 * If @inode doesn't have its wb, associate it with the wb matching the
 223 * memcg of @page or, if @page is NULL, %current.  May be called w/ or w/o
 224 * @inode->i_lock.
 225 */
 226static inline void inode_attach_wb(struct inode *inode, struct page *page)
 227{
 228        if (!inode->i_wb)
 229                __inode_attach_wb(inode, page);
 230}
 231
 232/**
 233 * inode_detach_wb - disassociate an inode from its wb
 234 * @inode: inode of interest
 235 *
 236 * @inode is being freed.  Detach from its wb.
 237 */
 238static inline void inode_detach_wb(struct inode *inode)
 239{
 240        if (inode->i_wb) {
 241                WARN_ON_ONCE(!(inode->i_state & I_CLEAR));
 242                wb_put(inode->i_wb);
 243                inode->i_wb = NULL;
 244        }
 245}
 246
 247/**
 248 * wbc_attach_fdatawrite_inode - associate wbc and inode for fdatawrite
 249 * @wbc: writeback_control of interest
 250 * @inode: target inode
 251 *
 252 * This function is to be used by __filemap_fdatawrite_range(), which is an
 253 * alternative entry point into writeback code, and first ensures @inode is
 254 * associated with a bdi_writeback and attaches it to @wbc.
 255 */
 256static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
 257                                               struct inode *inode)
 258{
 259        spin_lock(&inode->i_lock);
 260        inode_attach_wb(inode, NULL);
 261        wbc_attach_and_unlock_inode(wbc, inode);
 262}
 263
 264/**
 265 * wbc_init_bio - writeback specific initializtion of bio
 266 * @wbc: writeback_control for the writeback in progress
 267 * @bio: bio to be initialized
 268 *
 269 * @bio is a part of the writeback in progress controlled by @wbc.  Perform
 270 * writeback specific initialization.  This is used to apply the cgroup
 271 * writeback context.
 272 */
 273static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
 274{
 275        /*
 276         * pageout() path doesn't attach @wbc to the inode being written
 277         * out.  This is intentional as we don't want the function to block
 278         * behind a slow cgroup.  Ultimately, we want pageout() to kick off
 279         * regular writeback instead of writing things out itself.
 280         */
 281        if (wbc->wb)
 282                bio_associate_blkcg(bio, wbc->wb->blkcg_css);
 283}
 284
 285#else   /* CONFIG_CGROUP_WRITEBACK */
 286
 287static inline void inode_attach_wb(struct inode *inode, struct page *page)
 288{
 289}
 290
 291static inline void inode_detach_wb(struct inode *inode)
 292{
 293}
 294
 295static inline void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
 296                                               struct inode *inode)
 297        __releases(&inode->i_lock)
 298{
 299        spin_unlock(&inode->i_lock);
 300}
 301
 302static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
 303                                               struct inode *inode)
 304{
 305}
 306
 307static inline void wbc_detach_inode(struct writeback_control *wbc)
 308{
 309}
 310
 311static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
 312{
 313}
 314
 315static inline void wbc_account_io(struct writeback_control *wbc,
 316                                  struct page *page, size_t bytes)
 317{
 318}
 319
 320static inline void cgroup_writeback_umount(void)
 321{
 322}
 323
 324#endif  /* CONFIG_CGROUP_WRITEBACK */
 325
 326/*
 327 * mm/page-writeback.c
 328 */
 329#ifdef CONFIG_BLOCK
 330void laptop_io_completion(struct backing_dev_info *info);
 331void laptop_sync_completion(void);
 332void laptop_mode_sync(struct work_struct *work);
 333void laptop_mode_timer_fn(unsigned long data);
 334#else
 335static inline void laptop_sync_completion(void) { }
 336#endif
 337bool node_dirty_ok(struct pglist_data *pgdat);
 338int wb_domain_init(struct wb_domain *dom, gfp_t gfp);
 339#ifdef CONFIG_CGROUP_WRITEBACK
 340void wb_domain_exit(struct wb_domain *dom);
 341#endif
 342
 343extern struct wb_domain global_wb_domain;
 344
 345/* These are exported to sysctl. */
 346extern int dirty_background_ratio;
 347extern unsigned long dirty_background_bytes;
 348extern int vm_dirty_ratio;
 349extern unsigned long vm_dirty_bytes;
 350extern unsigned int dirty_writeback_interval;
 351extern unsigned int dirty_expire_interval;
 352extern unsigned int dirtytime_expire_interval;
 353extern int vm_highmem_is_dirtyable;
 354extern int block_dump;
 355extern int laptop_mode;
 356
 357extern int dirty_background_ratio_handler(struct ctl_table *table, int write,
 358                void __user *buffer, size_t *lenp,
 359                loff_t *ppos);
 360extern int dirty_background_bytes_handler(struct ctl_table *table, int write,
 361                void __user *buffer, size_t *lenp,
 362                loff_t *ppos);
 363extern int dirty_ratio_handler(struct ctl_table *table, int write,
 364                void __user *buffer, size_t *lenp,
 365                loff_t *ppos);
 366extern int dirty_bytes_handler(struct ctl_table *table, int write,
 367                void __user *buffer, size_t *lenp,
 368                loff_t *ppos);
 369int dirtytime_interval_handler(struct ctl_table *table, int write,
 370                               void __user *buffer, size_t *lenp, loff_t *ppos);
 371
 372struct ctl_table;
 373int dirty_writeback_centisecs_handler(struct ctl_table *, int,
 374                                      void __user *, size_t *, loff_t *);
 375
 376void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty);
 377unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh);
 378
 379void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time);
 380void balance_dirty_pages_ratelimited(struct address_space *mapping);
 381bool wb_over_bg_thresh(struct bdi_writeback *wb);
 382
 383typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc,
 384                                void *data);
 385
 386int generic_writepages(struct address_space *mapping,
 387                       struct writeback_control *wbc);
 388void tag_pages_for_writeback(struct address_space *mapping,
 389                             pgoff_t start, pgoff_t end);
 390int write_cache_pages(struct address_space *mapping,
 391                      struct writeback_control *wbc, writepage_t writepage,
 392                      void *data);
 393int do_writepages(struct address_space *mapping, struct writeback_control *wbc);
 394void writeback_set_ratelimit(void);
 395void tag_pages_for_writeback(struct address_space *mapping,
 396                             pgoff_t start, pgoff_t end);
 397
 398void account_page_redirty(struct page *page);
 399
 400void sb_mark_inode_writeback(struct inode *inode);
 401void sb_clear_inode_writeback(struct inode *inode);
 402
 403#endif          /* WRITEBACK_H */
 404