linux/include/linux/backing-dev-defs.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2#ifndef __LINUX_BACKING_DEV_DEFS_H
   3#define __LINUX_BACKING_DEV_DEFS_H
   4
   5#include <linux/list.h>
   6#include <linux/radix-tree.h>
   7#include <linux/rbtree.h>
   8#include <linux/spinlock.h>
   9#include <linux/percpu_counter.h>
  10#include <linux/percpu-refcount.h>
  11#include <linux/flex_proportions.h>
  12#include <linux/timer.h>
  13#include <linux/workqueue.h>
  14#include <linux/kref.h>
  15#include <linux/refcount.h>
  16
  17struct page;
  18struct device;
  19struct dentry;
  20
  21/*
  22 * Bits in bdi_writeback.state
  23 */
  24enum wb_state {
  25        WB_registered,          /* bdi_register() was done */
  26        WB_writeback_running,   /* Writeback is in progress */
  27        WB_has_dirty_io,        /* Dirty inodes on ->b_{dirty|io|more_io} */
  28        WB_start_all,           /* nr_pages == 0 (all) work pending */
  29};
  30
  31enum wb_congested_state {
  32        WB_async_congested,     /* The async (write) queue is getting full */
  33        WB_sync_congested,      /* The sync queue is getting full */
  34};
  35
  36enum wb_stat_item {
  37        WB_RECLAIMABLE,
  38        WB_WRITEBACK,
  39        WB_DIRTIED,
  40        WB_WRITTEN,
  41        NR_WB_STAT_ITEMS
  42};
  43
  44#define WB_STAT_BATCH (8*(1+ilog2(nr_cpu_ids)))
  45
  46/*
  47 * why some writeback work was initiated
  48 */
  49enum wb_reason {
  50        WB_REASON_BACKGROUND,
  51        WB_REASON_VMSCAN,
  52        WB_REASON_SYNC,
  53        WB_REASON_PERIODIC,
  54        WB_REASON_LAPTOP_TIMER,
  55        WB_REASON_FS_FREE_SPACE,
  56        /*
  57         * There is no bdi forker thread any more and works are done
  58         * by emergency worker, however, this is TPs userland visible
  59         * and we'll be exposing exactly the same information,
  60         * so it has a mismatch name.
  61         */
  62        WB_REASON_FORKER_THREAD,
  63        WB_REASON_FOREIGN_FLUSH,
  64
  65        WB_REASON_MAX,
  66};
  67
  68struct wb_completion {
  69        atomic_t                cnt;
  70        wait_queue_head_t       *waitq;
  71};
  72
  73#define __WB_COMPLETION_INIT(_waitq)    \
  74        (struct wb_completion){ .cnt = ATOMIC_INIT(1), .waitq = (_waitq) }
  75
  76/*
  77 * If one wants to wait for one or more wb_writeback_works, each work's
  78 * ->done should be set to a wb_completion defined using the following
  79 * macro.  Once all work items are issued with wb_queue_work(), the caller
  80 * can wait for the completion of all using wb_wait_for_completion().  Work
  81 * items which are waited upon aren't freed automatically on completion.
  82 */
  83#define WB_COMPLETION_INIT(bdi)         __WB_COMPLETION_INIT(&(bdi)->wb_waitq)
  84
  85#define DEFINE_WB_COMPLETION(cmpl, bdi) \
  86        struct wb_completion cmpl = WB_COMPLETION_INIT(bdi)
  87
  88/*
  89 * Each wb (bdi_writeback) can perform writeback operations, is measured
  90 * and throttled, independently.  Without cgroup writeback, each bdi
  91 * (bdi_writeback) is served by its embedded bdi->wb.
  92 *
  93 * On the default hierarchy, blkcg implicitly enables memcg.  This allows
  94 * using memcg's page ownership for attributing writeback IOs, and every
  95 * memcg - blkcg combination can be served by its own wb by assigning a
  96 * dedicated wb to each memcg, which enables isolation across different
  97 * cgroups and propagation of IO back pressure down from the IO layer upto
  98 * the tasks which are generating the dirty pages to be written back.
  99 *
 100 * A cgroup wb is indexed on its bdi by the ID of the associated memcg,
 101 * refcounted with the number of inodes attached to it, and pins the memcg
 102 * and the corresponding blkcg.  As the corresponding blkcg for a memcg may
 103 * change as blkcg is disabled and enabled higher up in the hierarchy, a wb
 104 * is tested for blkcg after lookup and removed from index on mismatch so
 105 * that a new wb for the combination can be created.
 106 */
 107struct bdi_writeback {
 108        struct backing_dev_info *bdi;   /* our parent bdi */
 109
 110        unsigned long state;            /* Always use atomic bitops on this */
 111        unsigned long last_old_flush;   /* last old data flush */
 112
 113        struct list_head b_dirty;       /* dirty inodes */
 114        struct list_head b_io;          /* parked for writeback */
 115        struct list_head b_more_io;     /* parked for more writeback */
 116        struct list_head b_dirty_time;  /* time stamps are dirty */
 117        spinlock_t list_lock;           /* protects the b_* lists */
 118
 119        atomic_t writeback_inodes;      /* number of inodes under writeback */
 120        struct percpu_counter stat[NR_WB_STAT_ITEMS];
 121
 122        unsigned long congested;        /* WB_[a]sync_congested flags */
 123
 124        unsigned long bw_time_stamp;    /* last time write bw is updated */
 125        unsigned long dirtied_stamp;
 126        unsigned long written_stamp;    /* pages written at bw_time_stamp */
 127        unsigned long write_bandwidth;  /* the estimated write bandwidth */
 128        unsigned long avg_write_bandwidth; /* further smoothed write bw, > 0 */
 129
 130        /*
 131         * The base dirty throttle rate, re-calculated on every 200ms.
 132         * All the bdi tasks' dirty rate will be curbed under it.
 133         * @dirty_ratelimit tracks the estimated @balanced_dirty_ratelimit
 134         * in small steps and is much more smooth/stable than the latter.
 135         */
 136        unsigned long dirty_ratelimit;
 137        unsigned long balanced_dirty_ratelimit;
 138
 139        struct fprop_local_percpu completions;
 140        int dirty_exceeded;
 141        enum wb_reason start_all_reason;
 142
 143        spinlock_t work_lock;           /* protects work_list & dwork scheduling */
 144        struct list_head work_list;
 145        struct delayed_work dwork;      /* work item used for writeback */
 146        struct delayed_work bw_dwork;   /* work item used for bandwidth estimate */
 147
 148        unsigned long dirty_sleep;      /* last wait */
 149
 150        struct list_head bdi_node;      /* anchored at bdi->wb_list */
 151
 152#ifdef CONFIG_CGROUP_WRITEBACK
 153        struct percpu_ref refcnt;       /* used only for !root wb's */
 154        struct fprop_local_percpu memcg_completions;
 155        struct cgroup_subsys_state *memcg_css; /* the associated memcg */
 156        struct cgroup_subsys_state *blkcg_css; /* and blkcg */
 157        struct list_head memcg_node;    /* anchored at memcg->cgwb_list */
 158        struct list_head blkcg_node;    /* anchored at blkcg->cgwb_list */
 159        struct list_head b_attached;    /* attached inodes, protected by list_lock */
 160        struct list_head offline_node;  /* anchored at offline_cgwbs */
 161
 162        union {
 163                struct work_struct release_work;
 164                struct rcu_head rcu;
 165        };
 166#endif
 167};
 168
 169struct backing_dev_info {
 170        u64 id;
 171        struct rb_node rb_node; /* keyed by ->id */
 172        struct list_head bdi_list;
 173        unsigned long ra_pages; /* max readahead in PAGE_SIZE units */
 174        unsigned long io_pages; /* max allowed IO size */
 175
 176        struct kref refcnt;     /* Reference counter for the structure */
 177        unsigned int capabilities; /* Device capabilities */
 178        unsigned int min_ratio;
 179        unsigned int max_ratio, max_prop_frac;
 180
 181        /*
 182         * Sum of avg_write_bw of wbs with dirty inodes.  > 0 if there are
 183         * any dirty wbs, which is depended upon by bdi_has_dirty().
 184         */
 185        atomic_long_t tot_write_bandwidth;
 186
 187        struct bdi_writeback wb;  /* the root writeback info for this bdi */
 188        struct list_head wb_list; /* list of all wbs */
 189#ifdef CONFIG_CGROUP_WRITEBACK
 190        struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */
 191        struct mutex cgwb_release_mutex;  /* protect shutdown of wb structs */
 192        struct rw_semaphore wb_switch_rwsem; /* no cgwb switch while syncing */
 193#endif
 194        wait_queue_head_t wb_waitq;
 195
 196        struct device *dev;
 197        char dev_name[64];
 198        struct device *owner;
 199
 200        struct timer_list laptop_mode_wb_timer;
 201
 202#ifdef CONFIG_DEBUG_FS
 203        struct dentry *debug_dir;
 204#endif
 205};
 206
 207enum {
 208        BLK_RW_ASYNC    = 0,
 209        BLK_RW_SYNC     = 1,
 210};
 211
 212void clear_bdi_congested(struct backing_dev_info *bdi, int sync);
 213void set_bdi_congested(struct backing_dev_info *bdi, int sync);
 214
 215struct wb_lock_cookie {
 216        bool locked;
 217        unsigned long flags;
 218};
 219
 220#ifdef CONFIG_CGROUP_WRITEBACK
 221
 222/**
 223 * wb_tryget - try to increment a wb's refcount
 224 * @wb: bdi_writeback to get
 225 */
 226static inline bool wb_tryget(struct bdi_writeback *wb)
 227{
 228        if (wb != &wb->bdi->wb)
 229                return percpu_ref_tryget(&wb->refcnt);
 230        return true;
 231}
 232
 233/**
 234 * wb_get - increment a wb's refcount
 235 * @wb: bdi_writeback to get
 236 */
 237static inline void wb_get(struct bdi_writeback *wb)
 238{
 239        if (wb != &wb->bdi->wb)
 240                percpu_ref_get(&wb->refcnt);
 241}
 242
 243/**
 244 * wb_put - decrement a wb's refcount
 245 * @wb: bdi_writeback to put
 246 * @nr: number of references to put
 247 */
 248static inline void wb_put_many(struct bdi_writeback *wb, unsigned long nr)
 249{
 250        if (WARN_ON_ONCE(!wb->bdi)) {
 251                /*
 252                 * A driver bug might cause a file to be removed before bdi was
 253                 * initialized.
 254                 */
 255                return;
 256        }
 257
 258        if (wb != &wb->bdi->wb)
 259                percpu_ref_put_many(&wb->refcnt, nr);
 260}
 261
 262/**
 263 * wb_put - decrement a wb's refcount
 264 * @wb: bdi_writeback to put
 265 */
 266static inline void wb_put(struct bdi_writeback *wb)
 267{
 268        wb_put_many(wb, 1);
 269}
 270
 271/**
 272 * wb_dying - is a wb dying?
 273 * @wb: bdi_writeback of interest
 274 *
 275 * Returns whether @wb is unlinked and being drained.
 276 */
 277static inline bool wb_dying(struct bdi_writeback *wb)
 278{
 279        return percpu_ref_is_dying(&wb->refcnt);
 280}
 281
 282#else   /* CONFIG_CGROUP_WRITEBACK */
 283
 284static inline bool wb_tryget(struct bdi_writeback *wb)
 285{
 286        return true;
 287}
 288
 289static inline void wb_get(struct bdi_writeback *wb)
 290{
 291}
 292
 293static inline void wb_put(struct bdi_writeback *wb)
 294{
 295}
 296
 297static inline void wb_put_many(struct bdi_writeback *wb, unsigned long nr)
 298{
 299}
 300
 301static inline bool wb_dying(struct bdi_writeback *wb)
 302{
 303        return false;
 304}
 305
 306#endif  /* CONFIG_CGROUP_WRITEBACK */
 307
 308#endif  /* __LINUX_BACKING_DEV_DEFS_H */
 309