1
2
3
4
5
6
7
8
9
10
11
12
13
14
15#include <linux/kernel.h>
16#include <linux/export.h>
17#include <linux/spinlock.h>
18#include <linux/fs.h>
19#include <linux/mm.h>
20#include <linux/swap.h>
21#include <linux/slab.h>
22#include <linux/pagemap.h>
23#include <linux/writeback.h>
24#include <linux/init.h>
25#include <linux/backing-dev.h>
26#include <linux/task_io_accounting_ops.h>
27#include <linux/blkdev.h>
28#include <linux/mpage.h>
29#include <linux/rmap.h>
30#include <linux/percpu.h>
31#include <linux/smp.h>
32#include <linux/sysctl.h>
33#include <linux/cpu.h>
34#include <linux/syscalls.h>
35#include <linux/buffer_head.h>
36#include <linux/pagevec.h>
37#include <linux/timer.h>
38#include <linux/sched/rt.h>
39#include <linux/sched/signal.h>
40#include <linux/mm_inline.h>
41#include <trace/events/writeback.h>
42
43#include "internal.h"
44
45
46
47
48#define MAX_PAUSE max(HZ/5, 1)
49
50
51
52
53
54#define DIRTY_POLL_THRESH (128 >> (PAGE_SHIFT - 10))
55
56
57
58
59#define BANDWIDTH_INTERVAL max(HZ/5, 1)
60
61#define RATELIMIT_CALC_SHIFT 10
62
63
64
65
66
67static long ratelimit_pages = 32;
68
69
70
71
72
73
74int dirty_background_ratio = 10;
75
76
77
78
79
80unsigned long dirty_background_bytes;
81
82
83
84
85
86int vm_highmem_is_dirtyable;
87
88
89
90
91int vm_dirty_ratio = 20;
92
93
94
95
96
97unsigned long vm_dirty_bytes;
98
99
100
101
102unsigned int dirty_writeback_interval = 5 * 100;
103
104EXPORT_SYMBOL_GPL(dirty_writeback_interval);
105
106
107
108
109unsigned int dirty_expire_interval = 30 * 100;
110
111
112
113
114int block_dump;
115
116
117
118
119
120int laptop_mode;
121
122EXPORT_SYMBOL(laptop_mode);
123
124
125
126struct wb_domain global_wb_domain;
127
128
129struct dirty_throttle_control {
130#ifdef CONFIG_CGROUP_WRITEBACK
131 struct wb_domain *dom;
132 struct dirty_throttle_control *gdtc;
133#endif
134 struct bdi_writeback *wb;
135 struct fprop_local_percpu *wb_completions;
136
137 unsigned long avail;
138 unsigned long dirty;
139 unsigned long thresh;
140 unsigned long bg_thresh;
141
142 unsigned long wb_dirty;
143 unsigned long wb_thresh;
144 unsigned long wb_bg_thresh;
145
146 unsigned long pos_ratio;
147};
148
149
150
151
152
153
154#define VM_COMPLETIONS_PERIOD_LEN (3*HZ)
155
156#ifdef CONFIG_CGROUP_WRITEBACK
157
158#define GDTC_INIT(__wb) .wb = (__wb), \
159 .dom = &global_wb_domain, \
160 .wb_completions = &(__wb)->completions
161
162#define GDTC_INIT_NO_WB .dom = &global_wb_domain
163
164#define MDTC_INIT(__wb, __gdtc) .wb = (__wb), \
165 .dom = mem_cgroup_wb_domain(__wb), \
166 .wb_completions = &(__wb)->memcg_completions, \
167 .gdtc = __gdtc
168
169static bool mdtc_valid(struct dirty_throttle_control *dtc)
170{
171 return dtc->dom;
172}
173
174static struct wb_domain *dtc_dom(struct dirty_throttle_control *dtc)
175{
176 return dtc->dom;
177}
178
179static struct dirty_throttle_control *mdtc_gdtc(struct dirty_throttle_control *mdtc)
180{
181 return mdtc->gdtc;
182}
183
184static struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb)
185{
186 return &wb->memcg_completions;
187}
188
189static void wb_min_max_ratio(struct bdi_writeback *wb,
190 unsigned long *minp, unsigned long *maxp)
191{
192 unsigned long this_bw = wb->avg_write_bandwidth;
193 unsigned long tot_bw = atomic_long_read(&wb->bdi->tot_write_bandwidth);
194 unsigned long long min = wb->bdi->min_ratio;
195 unsigned long long max = wb->bdi->max_ratio;
196
197
198
199
200
201 if (this_bw < tot_bw) {
202 if (min) {
203 min *= this_bw;
204 min = div64_ul(min, tot_bw);
205 }
206 if (max < 100) {
207 max *= this_bw;
208 max = div64_ul(max, tot_bw);
209 }
210 }
211
212 *minp = min;
213 *maxp = max;
214}
215
216#else
217
218#define GDTC_INIT(__wb) .wb = (__wb), \
219 .wb_completions = &(__wb)->completions
220#define GDTC_INIT_NO_WB
221#define MDTC_INIT(__wb, __gdtc)
222
223static bool mdtc_valid(struct dirty_throttle_control *dtc)
224{
225 return false;
226}
227
228static struct wb_domain *dtc_dom(struct dirty_throttle_control *dtc)
229{
230 return &global_wb_domain;
231}
232
233static struct dirty_throttle_control *mdtc_gdtc(struct dirty_throttle_control *mdtc)
234{
235 return NULL;
236}
237
238static struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb)
239{
240 return NULL;
241}
242
243static void wb_min_max_ratio(struct bdi_writeback *wb,
244 unsigned long *minp, unsigned long *maxp)
245{
246 *minp = wb->bdi->min_ratio;
247 *maxp = wb->bdi->max_ratio;
248}
249
250#endif
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277static unsigned long node_dirtyable_memory(struct pglist_data *pgdat)
278{
279 unsigned long nr_pages = 0;
280 int z;
281
282 for (z = 0; z < MAX_NR_ZONES; z++) {
283 struct zone *zone = pgdat->node_zones + z;
284
285 if (!populated_zone(zone))
286 continue;
287
288 nr_pages += zone_page_state(zone, NR_FREE_PAGES);
289 }
290
291
292
293
294
295
296 nr_pages -= min(nr_pages, pgdat->totalreserve_pages);
297
298 nr_pages += node_page_state(pgdat, NR_INACTIVE_FILE);
299 nr_pages += node_page_state(pgdat, NR_ACTIVE_FILE);
300
301 return nr_pages;
302}
303
304static unsigned long highmem_dirtyable_memory(unsigned long total)
305{
306#ifdef CONFIG_HIGHMEM
307 int node;
308 unsigned long x = 0;
309 int i;
310
311 for_each_node_state(node, N_HIGH_MEMORY) {
312 for (i = ZONE_NORMAL + 1; i < MAX_NR_ZONES; i++) {
313 struct zone *z;
314 unsigned long nr_pages;
315
316 if (!is_highmem_idx(i))
317 continue;
318
319 z = &NODE_DATA(node)->node_zones[i];
320 if (!populated_zone(z))
321 continue;
322
323 nr_pages = zone_page_state(z, NR_FREE_PAGES);
324
325 nr_pages -= min(nr_pages, high_wmark_pages(z));
326 nr_pages += zone_page_state(z, NR_ZONE_INACTIVE_FILE);
327 nr_pages += zone_page_state(z, NR_ZONE_ACTIVE_FILE);
328 x += nr_pages;
329 }
330 }
331
332
333
334
335
336
337
338
339
340
341 if ((long)x < 0)
342 x = 0;
343
344
345
346
347
348
349
350 return min(x, total);
351#else
352 return 0;
353#endif
354}
355
356
357
358
359
360
361
362static unsigned long global_dirtyable_memory(void)
363{
364 unsigned long x;
365
366 x = global_zone_page_state(NR_FREE_PAGES);
367
368
369
370
371
372 x -= min(x, totalreserve_pages);
373
374 x += global_node_page_state(NR_INACTIVE_FILE);
375 x += global_node_page_state(NR_ACTIVE_FILE);
376
377 if (!vm_highmem_is_dirtyable)
378 x -= highmem_dirtyable_memory(x);
379
380 return x + 1;
381}
382
383
384
385
386
387
388
389
390
391
392static void domain_dirty_limits(struct dirty_throttle_control *dtc)
393{
394 const unsigned long available_memory = dtc->avail;
395 struct dirty_throttle_control *gdtc = mdtc_gdtc(dtc);
396 unsigned long bytes = vm_dirty_bytes;
397 unsigned long bg_bytes = dirty_background_bytes;
398
399 unsigned long ratio = (vm_dirty_ratio * PAGE_SIZE) / 100;
400 unsigned long bg_ratio = (dirty_background_ratio * PAGE_SIZE) / 100;
401 unsigned long thresh;
402 unsigned long bg_thresh;
403 struct task_struct *tsk;
404
405
406 if (gdtc) {
407 unsigned long global_avail = gdtc->avail;
408
409
410
411
412
413
414
415
416 if (bytes)
417 ratio = min(DIV_ROUND_UP(bytes, global_avail),
418 PAGE_SIZE);
419 if (bg_bytes)
420 bg_ratio = min(DIV_ROUND_UP(bg_bytes, global_avail),
421 PAGE_SIZE);
422 bytes = bg_bytes = 0;
423 }
424
425 if (bytes)
426 thresh = DIV_ROUND_UP(bytes, PAGE_SIZE);
427 else
428 thresh = (ratio * available_memory) / PAGE_SIZE;
429
430 if (bg_bytes)
431 bg_thresh = DIV_ROUND_UP(bg_bytes, PAGE_SIZE);
432 else
433 bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE;
434
435 if (bg_thresh >= thresh)
436 bg_thresh = thresh / 2;
437 tsk = current;
438 if (rt_task(tsk)) {
439 bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32;
440 thresh += thresh / 4 + global_wb_domain.dirty_limit / 32;
441 }
442 dtc->thresh = thresh;
443 dtc->bg_thresh = bg_thresh;
444
445
446 if (!gdtc)
447 trace_global_dirty_state(bg_thresh, thresh);
448}
449
450
451
452
453
454
455
456
457
458void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
459{
460 struct dirty_throttle_control gdtc = { GDTC_INIT_NO_WB };
461
462 gdtc.avail = global_dirtyable_memory();
463 domain_dirty_limits(&gdtc);
464
465 *pbackground = gdtc.bg_thresh;
466 *pdirty = gdtc.thresh;
467}
468
469
470
471
472
473
474
475
476static unsigned long node_dirty_limit(struct pglist_data *pgdat)
477{
478 unsigned long node_memory = node_dirtyable_memory(pgdat);
479 struct task_struct *tsk = current;
480 unsigned long dirty;
481
482 if (vm_dirty_bytes)
483 dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE) *
484 node_memory / global_dirtyable_memory();
485 else
486 dirty = vm_dirty_ratio * node_memory / 100;
487
488 if (rt_task(tsk))
489 dirty += dirty / 4;
490
491 return dirty;
492}
493
494
495
496
497
498
499
500
501bool node_dirty_ok(struct pglist_data *pgdat)
502{
503 unsigned long limit = node_dirty_limit(pgdat);
504 unsigned long nr_pages = 0;
505
506 nr_pages += node_page_state(pgdat, NR_FILE_DIRTY);
507 nr_pages += node_page_state(pgdat, NR_WRITEBACK);
508
509 return nr_pages <= limit;
510}
511
512int dirty_background_ratio_handler(struct ctl_table *table, int write,
513 void *buffer, size_t *lenp, loff_t *ppos)
514{
515 int ret;
516
517 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
518 if (ret == 0 && write)
519 dirty_background_bytes = 0;
520 return ret;
521}
522
523int dirty_background_bytes_handler(struct ctl_table *table, int write,
524 void *buffer, size_t *lenp, loff_t *ppos)
525{
526 int ret;
527
528 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
529 if (ret == 0 && write)
530 dirty_background_ratio = 0;
531 return ret;
532}
533
534int dirty_ratio_handler(struct ctl_table *table, int write, void *buffer,
535 size_t *lenp, loff_t *ppos)
536{
537 int old_ratio = vm_dirty_ratio;
538 int ret;
539
540 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
541 if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
542 writeback_set_ratelimit();
543 vm_dirty_bytes = 0;
544 }
545 return ret;
546}
547
548int dirty_bytes_handler(struct ctl_table *table, int write,
549 void *buffer, size_t *lenp, loff_t *ppos)
550{
551 unsigned long old_bytes = vm_dirty_bytes;
552 int ret;
553
554 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
555 if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
556 writeback_set_ratelimit();
557 vm_dirty_ratio = 0;
558 }
559 return ret;
560}
561
562static unsigned long wp_next_time(unsigned long cur_time)
563{
564 cur_time += VM_COMPLETIONS_PERIOD_LEN;
565
566 if (!cur_time)
567 return 1;
568 return cur_time;
569}
570
571static void wb_domain_writeout_inc(struct wb_domain *dom,
572 struct fprop_local_percpu *completions,
573 unsigned int max_prop_frac)
574{
575 __fprop_inc_percpu_max(&dom->completions, completions,
576 max_prop_frac);
577
578 if (unlikely(!dom->period_time)) {
579
580
581
582
583
584
585 dom->period_time = wp_next_time(jiffies);
586 mod_timer(&dom->period_timer, dom->period_time);
587 }
588}
589
590
591
592
593
594static inline void __wb_writeout_inc(struct bdi_writeback *wb)
595{
596 struct wb_domain *cgdom;
597
598 inc_wb_stat(wb, WB_WRITTEN);
599 wb_domain_writeout_inc(&global_wb_domain, &wb->completions,
600 wb->bdi->max_prop_frac);
601
602 cgdom = mem_cgroup_wb_domain(wb);
603 if (cgdom)
604 wb_domain_writeout_inc(cgdom, wb_memcg_completions(wb),
605 wb->bdi->max_prop_frac);
606}
607
608void wb_writeout_inc(struct bdi_writeback *wb)
609{
610 unsigned long flags;
611
612 local_irq_save(flags);
613 __wb_writeout_inc(wb);
614 local_irq_restore(flags);
615}
616EXPORT_SYMBOL_GPL(wb_writeout_inc);
617
618
619
620
621
622static void writeout_period(struct timer_list *t)
623{
624 struct wb_domain *dom = from_timer(dom, t, period_timer);
625 int miss_periods = (jiffies - dom->period_time) /
626 VM_COMPLETIONS_PERIOD_LEN;
627
628 if (fprop_new_period(&dom->completions, miss_periods + 1)) {
629 dom->period_time = wp_next_time(dom->period_time +
630 miss_periods * VM_COMPLETIONS_PERIOD_LEN);
631 mod_timer(&dom->period_timer, dom->period_time);
632 } else {
633
634
635
636
637 dom->period_time = 0;
638 }
639}
640
641int wb_domain_init(struct wb_domain *dom, gfp_t gfp)
642{
643 memset(dom, 0, sizeof(*dom));
644
645 spin_lock_init(&dom->lock);
646
647 timer_setup(&dom->period_timer, writeout_period, TIMER_DEFERRABLE);
648
649 dom->dirty_limit_tstamp = jiffies;
650
651 return fprop_global_init(&dom->completions, gfp);
652}
653
654#ifdef CONFIG_CGROUP_WRITEBACK
655void wb_domain_exit(struct wb_domain *dom)
656{
657 del_timer_sync(&dom->period_timer);
658 fprop_global_destroy(&dom->completions);
659}
660#endif
661
662
663
664
665
666
667static unsigned int bdi_min_ratio;
668
669int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
670{
671 int ret = 0;
672
673 spin_lock_bh(&bdi_lock);
674 if (min_ratio > bdi->max_ratio) {
675 ret = -EINVAL;
676 } else {
677 min_ratio -= bdi->min_ratio;
678 if (bdi_min_ratio + min_ratio < 100) {
679 bdi_min_ratio += min_ratio;
680 bdi->min_ratio += min_ratio;
681 } else {
682 ret = -EINVAL;
683 }
684 }
685 spin_unlock_bh(&bdi_lock);
686
687 return ret;
688}
689
690int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
691{
692 int ret = 0;
693
694 if (max_ratio > 100)
695 return -EINVAL;
696
697 spin_lock_bh(&bdi_lock);
698 if (bdi->min_ratio > max_ratio) {
699 ret = -EINVAL;
700 } else {
701 bdi->max_ratio = max_ratio;
702 bdi->max_prop_frac = (FPROP_FRAC_BASE * max_ratio) / 100;
703 }
704 spin_unlock_bh(&bdi_lock);
705
706 return ret;
707}
708EXPORT_SYMBOL(bdi_set_max_ratio);
709
710static unsigned long dirty_freerun_ceiling(unsigned long thresh,
711 unsigned long bg_thresh)
712{
713 return (thresh + bg_thresh) / 2;
714}
715
716static unsigned long hard_dirty_limit(struct wb_domain *dom,
717 unsigned long thresh)
718{
719 return max(thresh, dom->dirty_limit);
720}
721
722
723
724
725
726static void mdtc_calc_avail(struct dirty_throttle_control *mdtc,
727 unsigned long filepages, unsigned long headroom)
728{
729 struct dirty_throttle_control *gdtc = mdtc_gdtc(mdtc);
730 unsigned long clean = filepages - min(filepages, mdtc->dirty);
731 unsigned long global_clean = gdtc->avail - min(gdtc->avail, gdtc->dirty);
732 unsigned long other_clean = global_clean - min(global_clean, clean);
733
734 mdtc->avail = filepages + min(headroom, other_clean);
735}
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc)
759{
760 struct wb_domain *dom = dtc_dom(dtc);
761 unsigned long thresh = dtc->thresh;
762 u64 wb_thresh;
763 unsigned long numerator, denominator;
764 unsigned long wb_min_ratio, wb_max_ratio;
765
766
767
768
769 fprop_fraction_percpu(&dom->completions, dtc->wb_completions,
770 &numerator, &denominator);
771
772 wb_thresh = (thresh * (100 - bdi_min_ratio)) / 100;
773 wb_thresh *= numerator;
774 wb_thresh = div64_ul(wb_thresh, denominator);
775
776 wb_min_max_ratio(dtc->wb, &wb_min_ratio, &wb_max_ratio);
777
778 wb_thresh += (thresh * wb_min_ratio) / 100;
779 if (wb_thresh > (thresh * wb_max_ratio) / 100)
780 wb_thresh = thresh * wb_max_ratio / 100;
781
782 return wb_thresh;
783}
784
785unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh)
786{
787 struct dirty_throttle_control gdtc = { GDTC_INIT(wb),
788 .thresh = thresh };
789 return __wb_calc_thresh(&gdtc);
790}
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806static long long pos_ratio_polynom(unsigned long setpoint,
807 unsigned long dirty,
808 unsigned long limit)
809{
810 long long pos_ratio;
811 long x;
812
813 x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
814 (limit - setpoint) | 1);
815 pos_ratio = x;
816 pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
817 pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
818 pos_ratio += 1 << RATELIMIT_CALC_SHIFT;
819
820 return clamp(pos_ratio, 0LL, 2LL << RATELIMIT_CALC_SHIFT);
821}
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898static void wb_position_ratio(struct dirty_throttle_control *dtc)
899{
900 struct bdi_writeback *wb = dtc->wb;
901 unsigned long write_bw = wb->avg_write_bandwidth;
902 unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh);
903 unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
904 unsigned long wb_thresh = dtc->wb_thresh;
905 unsigned long x_intercept;
906 unsigned long setpoint;
907 unsigned long wb_setpoint;
908 unsigned long span;
909 long long pos_ratio;
910 long x;
911
912 dtc->pos_ratio = 0;
913
914 if (unlikely(dtc->dirty >= limit))
915 return;
916
917
918
919
920
921
922 setpoint = (freerun + limit) / 2;
923 pos_ratio = pos_ratio_polynom(setpoint, dtc->dirty, limit);
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950 if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
951 long long wb_pos_ratio;
952
953 if (dtc->wb_dirty < 8) {
954 dtc->pos_ratio = min_t(long long, pos_ratio * 2,
955 2 << RATELIMIT_CALC_SHIFT);
956 return;
957 }
958
959 if (dtc->wb_dirty >= wb_thresh)
960 return;
961
962 wb_setpoint = dirty_freerun_ceiling(wb_thresh,
963 dtc->wb_bg_thresh);
964
965 if (wb_setpoint == 0 || wb_setpoint == wb_thresh)
966 return;
967
968 wb_pos_ratio = pos_ratio_polynom(wb_setpoint, dtc->wb_dirty,
969 wb_thresh);
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992 dtc->pos_ratio = min(pos_ratio, wb_pos_ratio);
993 return;
994 }
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027 if (unlikely(wb_thresh > dtc->thresh))
1028 wb_thresh = dtc->thresh;
1029
1030
1031
1032
1033
1034
1035
1036 wb_thresh = max(wb_thresh, (limit - dtc->dirty) / 8);
1037
1038
1039
1040
1041 x = div_u64((u64)wb_thresh << 16, dtc->thresh | 1);
1042 wb_setpoint = setpoint * (u64)x >> 16;
1043
1044
1045
1046
1047
1048
1049
1050
1051 span = (dtc->thresh - wb_thresh + 8 * write_bw) * (u64)x >> 16;
1052 x_intercept = wb_setpoint + span;
1053
1054 if (dtc->wb_dirty < x_intercept - span / 4) {
1055 pos_ratio = div64_u64(pos_ratio * (x_intercept - dtc->wb_dirty),
1056 (x_intercept - wb_setpoint) | 1);
1057 } else
1058 pos_ratio /= 4;
1059
1060
1061
1062
1063
1064
1065 x_intercept = wb_thresh / 2;
1066 if (dtc->wb_dirty < x_intercept) {
1067 if (dtc->wb_dirty > x_intercept / 8)
1068 pos_ratio = div_u64(pos_ratio * x_intercept,
1069 dtc->wb_dirty);
1070 else
1071 pos_ratio *= 8;
1072 }
1073
1074 dtc->pos_ratio = pos_ratio;
1075}
1076
1077static void wb_update_write_bandwidth(struct bdi_writeback *wb,
1078 unsigned long elapsed,
1079 unsigned long written)
1080{
1081 const unsigned long period = roundup_pow_of_two(3 * HZ);
1082 unsigned long avg = wb->avg_write_bandwidth;
1083 unsigned long old = wb->write_bandwidth;
1084 u64 bw;
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096 bw = written - min(written, wb->written_stamp);
1097 bw *= HZ;
1098 if (unlikely(elapsed > period)) {
1099 bw = div64_ul(bw, elapsed);
1100 avg = bw;
1101 goto out;
1102 }
1103 bw += (u64)wb->write_bandwidth * (period - elapsed);
1104 bw >>= ilog2(period);
1105
1106
1107
1108
1109 if (avg > old && old >= (unsigned long)bw)
1110 avg -= (avg - old) >> 3;
1111
1112 if (avg < old && old <= (unsigned long)bw)
1113 avg += (old - avg) >> 3;
1114
1115out:
1116
1117 avg = max(avg, 1LU);
1118 if (wb_has_dirty_io(wb)) {
1119 long delta = avg - wb->avg_write_bandwidth;
1120 WARN_ON_ONCE(atomic_long_add_return(delta,
1121 &wb->bdi->tot_write_bandwidth) <= 0);
1122 }
1123 wb->write_bandwidth = bw;
1124 wb->avg_write_bandwidth = avg;
1125}
1126
1127static void update_dirty_limit(struct dirty_throttle_control *dtc)
1128{
1129 struct wb_domain *dom = dtc_dom(dtc);
1130 unsigned long thresh = dtc->thresh;
1131 unsigned long limit = dom->dirty_limit;
1132
1133
1134
1135
1136 if (limit < thresh) {
1137 limit = thresh;
1138 goto update;
1139 }
1140
1141
1142
1143
1144
1145
1146 thresh = max(thresh, dtc->dirty);
1147 if (limit > thresh) {
1148 limit -= (limit - thresh) >> 5;
1149 goto update;
1150 }
1151 return;
1152update:
1153 dom->dirty_limit = limit;
1154}
1155
1156static void domain_update_bandwidth(struct dirty_throttle_control *dtc,
1157 unsigned long now)
1158{
1159 struct wb_domain *dom = dtc_dom(dtc);
1160
1161
1162
1163
1164 if (time_before(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL))
1165 return;
1166
1167 spin_lock(&dom->lock);
1168 if (time_after_eq(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL)) {
1169 update_dirty_limit(dtc);
1170 dom->dirty_limit_tstamp = now;
1171 }
1172 spin_unlock(&dom->lock);
1173}
1174
1175
1176
1177
1178
1179
1180
1181static void wb_update_dirty_ratelimit(struct dirty_throttle_control *dtc,
1182 unsigned long dirtied,
1183 unsigned long elapsed)
1184{
1185 struct bdi_writeback *wb = dtc->wb;
1186 unsigned long dirty = dtc->dirty;
1187 unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh);
1188 unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
1189 unsigned long setpoint = (freerun + limit) / 2;
1190 unsigned long write_bw = wb->avg_write_bandwidth;
1191 unsigned long dirty_ratelimit = wb->dirty_ratelimit;
1192 unsigned long dirty_rate;
1193 unsigned long task_ratelimit;
1194 unsigned long balanced_dirty_ratelimit;
1195 unsigned long step;
1196 unsigned long x;
1197 unsigned long shift;
1198
1199
1200
1201
1202
1203 dirty_rate = (dirtied - wb->dirtied_stamp) * HZ / elapsed;
1204
1205
1206
1207
1208 task_ratelimit = (u64)dirty_ratelimit *
1209 dtc->pos_ratio >> RATELIMIT_CALC_SHIFT;
1210 task_ratelimit++;
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242 balanced_dirty_ratelimit = div_u64((u64)task_ratelimit * write_bw,
1243 dirty_rate | 1);
1244
1245
1246
1247 if (unlikely(balanced_dirty_ratelimit > write_bw))
1248 balanced_dirty_ratelimit = write_bw;
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284 step = 0;
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297 if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
1298 dirty = dtc->wb_dirty;
1299 if (dtc->wb_dirty < 8)
1300 setpoint = dtc->wb_dirty + 1;
1301 else
1302 setpoint = (dtc->wb_thresh + dtc->wb_bg_thresh) / 2;
1303 }
1304
1305 if (dirty < setpoint) {
1306 x = min3(wb->balanced_dirty_ratelimit,
1307 balanced_dirty_ratelimit, task_ratelimit);
1308 if (dirty_ratelimit < x)
1309 step = x - dirty_ratelimit;
1310 } else {
1311 x = max3(wb->balanced_dirty_ratelimit,
1312 balanced_dirty_ratelimit, task_ratelimit);
1313 if (dirty_ratelimit > x)
1314 step = dirty_ratelimit - x;
1315 }
1316
1317
1318
1319
1320
1321
1322 shift = dirty_ratelimit / (2 * step + 1);
1323 if (shift < BITS_PER_LONG)
1324 step = DIV_ROUND_UP(step >> shift, 8);
1325 else
1326 step = 0;
1327
1328 if (dirty_ratelimit < balanced_dirty_ratelimit)
1329 dirty_ratelimit += step;
1330 else
1331 dirty_ratelimit -= step;
1332
1333 wb->dirty_ratelimit = max(dirty_ratelimit, 1UL);
1334 wb->balanced_dirty_ratelimit = balanced_dirty_ratelimit;
1335
1336 trace_bdi_dirty_ratelimit(wb, dirty_rate, task_ratelimit);
1337}
1338
1339static void __wb_update_bandwidth(struct dirty_throttle_control *gdtc,
1340 struct dirty_throttle_control *mdtc,
1341 unsigned long start_time,
1342 bool update_ratelimit)
1343{
1344 struct bdi_writeback *wb = gdtc->wb;
1345 unsigned long now = jiffies;
1346 unsigned long elapsed = now - wb->bw_time_stamp;
1347 unsigned long dirtied;
1348 unsigned long written;
1349
1350 lockdep_assert_held(&wb->list_lock);
1351
1352
1353
1354
1355 if (elapsed < BANDWIDTH_INTERVAL)
1356 return;
1357
1358 dirtied = percpu_counter_read(&wb->stat[WB_DIRTIED]);
1359 written = percpu_counter_read(&wb->stat[WB_WRITTEN]);
1360
1361
1362
1363
1364
1365 if (elapsed > HZ && time_before(wb->bw_time_stamp, start_time))
1366 goto snapshot;
1367
1368 if (update_ratelimit) {
1369 domain_update_bandwidth(gdtc, now);
1370 wb_update_dirty_ratelimit(gdtc, dirtied, elapsed);
1371
1372
1373
1374
1375
1376 if (IS_ENABLED(CONFIG_CGROUP_WRITEBACK) && mdtc) {
1377 domain_update_bandwidth(mdtc, now);
1378 wb_update_dirty_ratelimit(mdtc, dirtied, elapsed);
1379 }
1380 }
1381 wb_update_write_bandwidth(wb, elapsed, written);
1382
1383snapshot:
1384 wb->dirtied_stamp = dirtied;
1385 wb->written_stamp = written;
1386 wb->bw_time_stamp = now;
1387}
1388
1389void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time)
1390{
1391 struct dirty_throttle_control gdtc = { GDTC_INIT(wb) };
1392
1393 __wb_update_bandwidth(&gdtc, NULL, start_time, false);
1394}
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404static unsigned long dirty_poll_interval(unsigned long dirty,
1405 unsigned long thresh)
1406{
1407 if (thresh > dirty)
1408 return 1UL << (ilog2(thresh - dirty) >> 1);
1409
1410 return 1;
1411}
1412
1413static unsigned long wb_max_pause(struct bdi_writeback *wb,
1414 unsigned long wb_dirty)
1415{
1416 unsigned long bw = wb->avg_write_bandwidth;
1417 unsigned long t;
1418
1419
1420
1421
1422
1423
1424
1425
1426 t = wb_dirty / (1 + bw / roundup_pow_of_two(1 + HZ / 8));
1427 t++;
1428
1429 return min_t(unsigned long, t, MAX_PAUSE);
1430}
1431
1432static long wb_min_pause(struct bdi_writeback *wb,
1433 long max_pause,
1434 unsigned long task_ratelimit,
1435 unsigned long dirty_ratelimit,
1436 int *nr_dirtied_pause)
1437{
1438 long hi = ilog2(wb->avg_write_bandwidth);
1439 long lo = ilog2(wb->dirty_ratelimit);
1440 long t;
1441 long pause;
1442 int pages;
1443
1444
1445 t = max(1, HZ / 100);
1446
1447
1448
1449
1450
1451
1452
1453 if (hi > lo)
1454 t += (hi - lo) * (10 * HZ) / 1024;
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474 t = min(t, 1 + max_pause / 2);
1475 pages = dirty_ratelimit * t / roundup_pow_of_two(HZ);
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485 if (pages < DIRTY_POLL_THRESH) {
1486 t = max_pause;
1487 pages = dirty_ratelimit * t / roundup_pow_of_two(HZ);
1488 if (pages > DIRTY_POLL_THRESH) {
1489 pages = DIRTY_POLL_THRESH;
1490 t = HZ * DIRTY_POLL_THRESH / dirty_ratelimit;
1491 }
1492 }
1493
1494 pause = HZ * pages / (task_ratelimit + 1);
1495 if (pause > max_pause) {
1496 t = max_pause;
1497 pages = task_ratelimit * t / roundup_pow_of_two(HZ);
1498 }
1499
1500 *nr_dirtied_pause = pages;
1501
1502
1503
1504 return pages >= DIRTY_POLL_THRESH ? 1 + t / 2 : t;
1505}
1506
1507static inline void wb_dirty_limits(struct dirty_throttle_control *dtc)
1508{
1509 struct bdi_writeback *wb = dtc->wb;
1510 unsigned long wb_reclaimable;
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525 dtc->wb_thresh = __wb_calc_thresh(dtc);
1526 dtc->wb_bg_thresh = dtc->thresh ?
1527 div_u64((u64)dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0;
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539 if (dtc->wb_thresh < 2 * wb_stat_error()) {
1540 wb_reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE);
1541 dtc->wb_dirty = wb_reclaimable + wb_stat_sum(wb, WB_WRITEBACK);
1542 } else {
1543 wb_reclaimable = wb_stat(wb, WB_RECLAIMABLE);
1544 dtc->wb_dirty = wb_reclaimable + wb_stat(wb, WB_WRITEBACK);
1545 }
1546}
1547
1548
1549
1550
1551
1552
1553
1554
1555static void balance_dirty_pages(struct bdi_writeback *wb,
1556 unsigned long pages_dirtied)
1557{
1558 struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) };
1559 struct dirty_throttle_control mdtc_stor = { MDTC_INIT(wb, &gdtc_stor) };
1560 struct dirty_throttle_control * const gdtc = &gdtc_stor;
1561 struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
1562 &mdtc_stor : NULL;
1563 struct dirty_throttle_control *sdtc;
1564 unsigned long nr_reclaimable;
1565 long period;
1566 long pause;
1567 long max_pause;
1568 long min_pause;
1569 int nr_dirtied_pause;
1570 bool dirty_exceeded = false;
1571 unsigned long task_ratelimit;
1572 unsigned long dirty_ratelimit;
1573 struct backing_dev_info *bdi = wb->bdi;
1574 bool strictlimit = bdi->capabilities & BDI_CAP_STRICTLIMIT;
1575 unsigned long start_time = jiffies;
1576
1577 for (;;) {
1578 unsigned long now = jiffies;
1579 unsigned long dirty, thresh, bg_thresh;
1580 unsigned long m_dirty = 0;
1581 unsigned long m_thresh = 0;
1582 unsigned long m_bg_thresh = 0;
1583
1584 nr_reclaimable = global_node_page_state(NR_FILE_DIRTY);
1585 gdtc->avail = global_dirtyable_memory();
1586 gdtc->dirty = nr_reclaimable + global_node_page_state(NR_WRITEBACK);
1587
1588 domain_dirty_limits(gdtc);
1589
1590 if (unlikely(strictlimit)) {
1591 wb_dirty_limits(gdtc);
1592
1593 dirty = gdtc->wb_dirty;
1594 thresh = gdtc->wb_thresh;
1595 bg_thresh = gdtc->wb_bg_thresh;
1596 } else {
1597 dirty = gdtc->dirty;
1598 thresh = gdtc->thresh;
1599 bg_thresh = gdtc->bg_thresh;
1600 }
1601
1602 if (mdtc) {
1603 unsigned long filepages, headroom, writeback;
1604
1605
1606
1607
1608
1609 mem_cgroup_wb_stats(wb, &filepages, &headroom,
1610 &mdtc->dirty, &writeback);
1611 mdtc->dirty += writeback;
1612 mdtc_calc_avail(mdtc, filepages, headroom);
1613
1614 domain_dirty_limits(mdtc);
1615
1616 if (unlikely(strictlimit)) {
1617 wb_dirty_limits(mdtc);
1618 m_dirty = mdtc->wb_dirty;
1619 m_thresh = mdtc->wb_thresh;
1620 m_bg_thresh = mdtc->wb_bg_thresh;
1621 } else {
1622 m_dirty = mdtc->dirty;
1623 m_thresh = mdtc->thresh;
1624 m_bg_thresh = mdtc->bg_thresh;
1625 }
1626 }
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640 if (dirty <= dirty_freerun_ceiling(thresh, bg_thresh) &&
1641 (!mdtc ||
1642 m_dirty <= dirty_freerun_ceiling(m_thresh, m_bg_thresh))) {
1643 unsigned long intv;
1644 unsigned long m_intv;
1645
1646free_running:
1647 intv = dirty_poll_interval(dirty, thresh);
1648 m_intv = ULONG_MAX;
1649
1650 current->dirty_paused_when = now;
1651 current->nr_dirtied = 0;
1652 if (mdtc)
1653 m_intv = dirty_poll_interval(m_dirty, m_thresh);
1654 current->nr_dirtied_pause = min(intv, m_intv);
1655 break;
1656 }
1657
1658 if (unlikely(!writeback_in_progress(wb)))
1659 wb_start_background_writeback(wb);
1660
1661 mem_cgroup_flush_foreign(wb);
1662
1663
1664
1665
1666
1667 if (!strictlimit) {
1668 wb_dirty_limits(gdtc);
1669
1670 if ((current->flags & PF_LOCAL_THROTTLE) &&
1671 gdtc->wb_dirty <
1672 dirty_freerun_ceiling(gdtc->wb_thresh,
1673 gdtc->wb_bg_thresh))
1674
1675
1676
1677
1678 goto free_running;
1679 }
1680
1681 dirty_exceeded = (gdtc->wb_dirty > gdtc->wb_thresh) &&
1682 ((gdtc->dirty > gdtc->thresh) || strictlimit);
1683
1684 wb_position_ratio(gdtc);
1685 sdtc = gdtc;
1686
1687 if (mdtc) {
1688
1689
1690
1691
1692
1693
1694 if (!strictlimit) {
1695 wb_dirty_limits(mdtc);
1696
1697 if ((current->flags & PF_LOCAL_THROTTLE) &&
1698 mdtc->wb_dirty <
1699 dirty_freerun_ceiling(mdtc->wb_thresh,
1700 mdtc->wb_bg_thresh))
1701
1702
1703
1704
1705
1706 goto free_running;
1707 }
1708 dirty_exceeded |= (mdtc->wb_dirty > mdtc->wb_thresh) &&
1709 ((mdtc->dirty > mdtc->thresh) || strictlimit);
1710
1711 wb_position_ratio(mdtc);
1712 if (mdtc->pos_ratio < gdtc->pos_ratio)
1713 sdtc = mdtc;
1714 }
1715
1716 if (dirty_exceeded && !wb->dirty_exceeded)
1717 wb->dirty_exceeded = 1;
1718
1719 if (time_is_before_jiffies(wb->bw_time_stamp +
1720 BANDWIDTH_INTERVAL)) {
1721 spin_lock(&wb->list_lock);
1722 __wb_update_bandwidth(gdtc, mdtc, start_time, true);
1723 spin_unlock(&wb->list_lock);
1724 }
1725
1726
1727 dirty_ratelimit = wb->dirty_ratelimit;
1728 task_ratelimit = ((u64)dirty_ratelimit * sdtc->pos_ratio) >>
1729 RATELIMIT_CALC_SHIFT;
1730 max_pause = wb_max_pause(wb, sdtc->wb_dirty);
1731 min_pause = wb_min_pause(wb, max_pause,
1732 task_ratelimit, dirty_ratelimit,
1733 &nr_dirtied_pause);
1734
1735 if (unlikely(task_ratelimit == 0)) {
1736 period = max_pause;
1737 pause = max_pause;
1738 goto pause;
1739 }
1740 period = HZ * pages_dirtied / task_ratelimit;
1741 pause = period;
1742 if (current->dirty_paused_when)
1743 pause -= now - current->dirty_paused_when;
1744
1745
1746
1747
1748
1749
1750
1751 if (pause < min_pause) {
1752 trace_balance_dirty_pages(wb,
1753 sdtc->thresh,
1754 sdtc->bg_thresh,
1755 sdtc->dirty,
1756 sdtc->wb_thresh,
1757 sdtc->wb_dirty,
1758 dirty_ratelimit,
1759 task_ratelimit,
1760 pages_dirtied,
1761 period,
1762 min(pause, 0L),
1763 start_time);
1764 if (pause < -HZ) {
1765 current->dirty_paused_when = now;
1766 current->nr_dirtied = 0;
1767 } else if (period) {
1768 current->dirty_paused_when += period;
1769 current->nr_dirtied = 0;
1770 } else if (current->nr_dirtied_pause <= pages_dirtied)
1771 current->nr_dirtied_pause += pages_dirtied;
1772 break;
1773 }
1774 if (unlikely(pause > max_pause)) {
1775
1776 now += min(pause - max_pause, max_pause);
1777 pause = max_pause;
1778 }
1779
1780pause:
1781 trace_balance_dirty_pages(wb,
1782 sdtc->thresh,
1783 sdtc->bg_thresh,
1784 sdtc->dirty,
1785 sdtc->wb_thresh,
1786 sdtc->wb_dirty,
1787 dirty_ratelimit,
1788 task_ratelimit,
1789 pages_dirtied,
1790 period,
1791 pause,
1792 start_time);
1793 __set_current_state(TASK_KILLABLE);
1794 wb->dirty_sleep = now;
1795 io_schedule_timeout(pause);
1796
1797 current->dirty_paused_when = now + pause;
1798 current->nr_dirtied = 0;
1799 current->nr_dirtied_pause = nr_dirtied_pause;
1800
1801
1802
1803
1804
1805 if (task_ratelimit)
1806 break;
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818 if (sdtc->wb_dirty <= wb_stat_error())
1819 break;
1820
1821 if (fatal_signal_pending(current))
1822 break;
1823 }
1824
1825 if (!dirty_exceeded && wb->dirty_exceeded)
1826 wb->dirty_exceeded = 0;
1827
1828 if (writeback_in_progress(wb))
1829 return;
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839 if (laptop_mode)
1840 return;
1841
1842 if (nr_reclaimable > gdtc->bg_thresh)
1843 wb_start_background_writeback(wb);
1844}
1845
1846static DEFINE_PER_CPU(int, bdp_ratelimits);
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862DEFINE_PER_CPU(int, dirty_throttle_leaks) = 0;
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877void balance_dirty_pages_ratelimited(struct address_space *mapping)
1878{
1879 struct inode *inode = mapping->host;
1880 struct backing_dev_info *bdi = inode_to_bdi(inode);
1881 struct bdi_writeback *wb = NULL;
1882 int ratelimit;
1883 int *p;
1884
1885 if (!bdi_cap_account_dirty(bdi))
1886 return;
1887
1888 if (inode_cgwb_enabled(inode))
1889 wb = wb_get_create_current(bdi, GFP_KERNEL);
1890 if (!wb)
1891 wb = &bdi->wb;
1892
1893 ratelimit = current->nr_dirtied_pause;
1894 if (wb->dirty_exceeded)
1895 ratelimit = min(ratelimit, 32 >> (PAGE_SHIFT - 10));
1896
1897 preempt_disable();
1898
1899
1900
1901
1902
1903
1904 p = this_cpu_ptr(&bdp_ratelimits);
1905 if (unlikely(current->nr_dirtied >= ratelimit))
1906 *p = 0;
1907 else if (unlikely(*p >= ratelimit_pages)) {
1908 *p = 0;
1909 ratelimit = 0;
1910 }
1911
1912
1913
1914
1915
1916 p = this_cpu_ptr(&dirty_throttle_leaks);
1917 if (*p > 0 && current->nr_dirtied < ratelimit) {
1918 unsigned long nr_pages_dirtied;
1919 nr_pages_dirtied = min(*p, ratelimit - current->nr_dirtied);
1920 *p -= nr_pages_dirtied;
1921 current->nr_dirtied += nr_pages_dirtied;
1922 }
1923 preempt_enable();
1924
1925 if (unlikely(current->nr_dirtied >= ratelimit))
1926 balance_dirty_pages(wb, current->nr_dirtied);
1927
1928 wb_put(wb);
1929}
1930EXPORT_SYMBOL(balance_dirty_pages_ratelimited);
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941bool wb_over_bg_thresh(struct bdi_writeback *wb)
1942{
1943 struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) };
1944 struct dirty_throttle_control mdtc_stor = { MDTC_INIT(wb, &gdtc_stor) };
1945 struct dirty_throttle_control * const gdtc = &gdtc_stor;
1946 struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
1947 &mdtc_stor : NULL;
1948
1949
1950
1951
1952
1953 gdtc->avail = global_dirtyable_memory();
1954 gdtc->dirty = global_node_page_state(NR_FILE_DIRTY);
1955 domain_dirty_limits(gdtc);
1956
1957 if (gdtc->dirty > gdtc->bg_thresh)
1958 return true;
1959
1960 if (wb_stat(wb, WB_RECLAIMABLE) >
1961 wb_calc_thresh(gdtc->wb, gdtc->bg_thresh))
1962 return true;
1963
1964 if (mdtc) {
1965 unsigned long filepages, headroom, writeback;
1966
1967 mem_cgroup_wb_stats(wb, &filepages, &headroom, &mdtc->dirty,
1968 &writeback);
1969 mdtc_calc_avail(mdtc, filepages, headroom);
1970 domain_dirty_limits(mdtc);
1971
1972 if (mdtc->dirty > mdtc->bg_thresh)
1973 return true;
1974
1975 if (wb_stat(wb, WB_RECLAIMABLE) >
1976 wb_calc_thresh(mdtc->wb, mdtc->bg_thresh))
1977 return true;
1978 }
1979
1980 return false;
1981}
1982
1983
1984
1985
1986int dirty_writeback_centisecs_handler(struct ctl_table *table, int write,
1987 void *buffer, size_t *length, loff_t *ppos)
1988{
1989 unsigned int old_interval = dirty_writeback_interval;
1990 int ret;
1991
1992 ret = proc_dointvec(table, write, buffer, length, ppos);
1993
1994
1995
1996
1997
1998
1999
2000
2001 if (!ret && write && dirty_writeback_interval &&
2002 dirty_writeback_interval != old_interval)
2003 wakeup_flusher_threads(WB_REASON_PERIODIC);
2004
2005 return ret;
2006}
2007
2008#ifdef CONFIG_BLOCK
2009void laptop_mode_timer_fn(struct timer_list *t)
2010{
2011 struct backing_dev_info *backing_dev_info =
2012 from_timer(backing_dev_info, t, laptop_mode_wb_timer);
2013
2014 wakeup_flusher_threads_bdi(backing_dev_info, WB_REASON_LAPTOP_TIMER);
2015}
2016
2017
2018
2019
2020
2021
2022void laptop_io_completion(struct backing_dev_info *info)
2023{
2024 mod_timer(&info->laptop_mode_wb_timer, jiffies + laptop_mode);
2025}
2026
2027
2028
2029
2030
2031
2032void laptop_sync_completion(void)
2033{
2034 struct backing_dev_info *bdi;
2035
2036 rcu_read_lock();
2037
2038 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list)
2039 del_timer(&bdi->laptop_mode_wb_timer);
2040
2041 rcu_read_unlock();
2042}
2043#endif
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056void writeback_set_ratelimit(void)
2057{
2058 struct wb_domain *dom = &global_wb_domain;
2059 unsigned long background_thresh;
2060 unsigned long dirty_thresh;
2061
2062 global_dirty_limits(&background_thresh, &dirty_thresh);
2063 dom->dirty_limit = dirty_thresh;
2064 ratelimit_pages = dirty_thresh / (num_online_cpus() * 32);
2065 if (ratelimit_pages < 16)
2066 ratelimit_pages = 16;
2067}
2068
2069static int page_writeback_cpu_online(unsigned int cpu)
2070{
2071 writeback_set_ratelimit();
2072 return 0;
2073}
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091void __init page_writeback_init(void)
2092{
2093 BUG_ON(wb_domain_init(&global_wb_domain, GFP_KERNEL));
2094
2095 cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mm/writeback:online",
2096 page_writeback_cpu_online, NULL);
2097 cpuhp_setup_state(CPUHP_MM_WRITEBACK_DEAD, "mm/writeback:dead", NULL,
2098 page_writeback_cpu_online);
2099}
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115void tag_pages_for_writeback(struct address_space *mapping,
2116 pgoff_t start, pgoff_t end)
2117{
2118 XA_STATE(xas, &mapping->i_pages, start);
2119 unsigned int tagged = 0;
2120 void *page;
2121
2122 xas_lock_irq(&xas);
2123 xas_for_each_marked(&xas, page, end, PAGECACHE_TAG_DIRTY) {
2124 xas_set_mark(&xas, PAGECACHE_TAG_TOWRITE);
2125 if (++tagged % XA_CHECK_SCHED)
2126 continue;
2127
2128 xas_pause(&xas);
2129 xas_unlock_irq(&xas);
2130 cond_resched();
2131 xas_lock_irq(&xas);
2132 }
2133 xas_unlock_irq(&xas);
2134}
2135EXPORT_SYMBOL(tag_pages_for_writeback);
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168int write_cache_pages(struct address_space *mapping,
2169 struct writeback_control *wbc, writepage_t writepage,
2170 void *data)
2171{
2172 int ret = 0;
2173 int done = 0;
2174 int error;
2175 struct pagevec pvec;
2176 int nr_pages;
2177 pgoff_t index;
2178 pgoff_t end;
2179 pgoff_t done_index;
2180 int range_whole = 0;
2181 xa_mark_t tag;
2182
2183 pagevec_init(&pvec);
2184 if (wbc->range_cyclic) {
2185 index = mapping->writeback_index;
2186 end = -1;
2187 } else {
2188 index = wbc->range_start >> PAGE_SHIFT;
2189 end = wbc->range_end >> PAGE_SHIFT;
2190 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2191 range_whole = 1;
2192 }
2193 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) {
2194 tag_pages_for_writeback(mapping, index, end);
2195 tag = PAGECACHE_TAG_TOWRITE;
2196 } else {
2197 tag = PAGECACHE_TAG_DIRTY;
2198 }
2199 done_index = index;
2200 while (!done && (index <= end)) {
2201 int i;
2202
2203 nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
2204 tag);
2205 if (nr_pages == 0)
2206 break;
2207
2208 for (i = 0; i < nr_pages; i++) {
2209 struct page *page = pvec.pages[i];
2210
2211 done_index = page->index;
2212
2213 lock_page(page);
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223 if (unlikely(page->mapping != mapping)) {
2224continue_unlock:
2225 unlock_page(page);
2226 continue;
2227 }
2228
2229 if (!PageDirty(page)) {
2230
2231 goto continue_unlock;
2232 }
2233
2234 if (PageWriteback(page)) {
2235 if (wbc->sync_mode != WB_SYNC_NONE)
2236 wait_on_page_writeback(page);
2237 else
2238 goto continue_unlock;
2239 }
2240
2241 BUG_ON(PageWriteback(page));
2242 if (!clear_page_dirty_for_io(page))
2243 goto continue_unlock;
2244
2245 trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
2246 error = (*writepage)(page, wbc, data);
2247 if (unlikely(error)) {
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260 if (error == AOP_WRITEPAGE_ACTIVATE) {
2261 unlock_page(page);
2262 error = 0;
2263 } else if (wbc->sync_mode != WB_SYNC_ALL) {
2264 ret = error;
2265 done_index = page->index + 1;
2266 done = 1;
2267 break;
2268 }
2269 if (!ret)
2270 ret = error;
2271 }
2272
2273
2274
2275
2276
2277
2278
2279 if (--wbc->nr_to_write <= 0 &&
2280 wbc->sync_mode == WB_SYNC_NONE) {
2281 done = 1;
2282 break;
2283 }
2284 }
2285 pagevec_release(&pvec);
2286 cond_resched();
2287 }
2288
2289
2290
2291
2292
2293
2294 if (wbc->range_cyclic && !done)
2295 done_index = 0;
2296 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2297 mapping->writeback_index = done_index;
2298
2299 return ret;
2300}
2301EXPORT_SYMBOL(write_cache_pages);
2302
2303
2304
2305
2306
2307static int __writepage(struct page *page, struct writeback_control *wbc,
2308 void *data)
2309{
2310 struct address_space *mapping = data;
2311 int ret = mapping->a_ops->writepage(page, wbc);
2312 mapping_set_error(mapping, ret);
2313 return ret;
2314}
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326int generic_writepages(struct address_space *mapping,
2327 struct writeback_control *wbc)
2328{
2329 struct blk_plug plug;
2330 int ret;
2331
2332
2333 if (!mapping->a_ops->writepage)
2334 return 0;
2335
2336 blk_start_plug(&plug);
2337 ret = write_cache_pages(mapping, wbc, __writepage, mapping);
2338 blk_finish_plug(&plug);
2339 return ret;
2340}
2341
2342EXPORT_SYMBOL(generic_writepages);
2343
2344int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
2345{
2346 int ret;
2347
2348 if (wbc->nr_to_write <= 0)
2349 return 0;
2350 while (1) {
2351 if (mapping->a_ops->writepages)
2352 ret = mapping->a_ops->writepages(mapping, wbc);
2353 else
2354 ret = generic_writepages(mapping, wbc);
2355 if ((ret != -ENOMEM) || (wbc->sync_mode != WB_SYNC_ALL))
2356 break;
2357 cond_resched();
2358 congestion_wait(BLK_RW_ASYNC, HZ/50);
2359 }
2360 return ret;
2361}
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374int write_one_page(struct page *page)
2375{
2376 struct address_space *mapping = page->mapping;
2377 int ret = 0;
2378 struct writeback_control wbc = {
2379 .sync_mode = WB_SYNC_ALL,
2380 .nr_to_write = 1,
2381 };
2382
2383 BUG_ON(!PageLocked(page));
2384
2385 wait_on_page_writeback(page);
2386
2387 if (clear_page_dirty_for_io(page)) {
2388 get_page(page);
2389 ret = mapping->a_ops->writepage(page, &wbc);
2390 if (ret == 0)
2391 wait_on_page_writeback(page);
2392 put_page(page);
2393 } else {
2394 unlock_page(page);
2395 }
2396
2397 if (!ret)
2398 ret = filemap_check_errors(mapping);
2399 return ret;
2400}
2401EXPORT_SYMBOL(write_one_page);
2402
2403
2404
2405
2406int __set_page_dirty_no_writeback(struct page *page)
2407{
2408 if (!PageDirty(page))
2409 return !TestSetPageDirty(page);
2410 return 0;
2411}
2412
2413
2414
2415
2416
2417
2418
2419
2420void account_page_dirtied(struct page *page, struct address_space *mapping)
2421{
2422 struct inode *inode = mapping->host;
2423
2424 trace_writeback_dirty_page(page, mapping);
2425
2426 if (mapping_cap_account_dirty(mapping)) {
2427 struct bdi_writeback *wb;
2428
2429 inode_attach_wb(inode, page);
2430 wb = inode_to_wb(inode);
2431
2432 __inc_lruvec_page_state(page, NR_FILE_DIRTY);
2433 __inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2434 __inc_node_page_state(page, NR_DIRTIED);
2435 inc_wb_stat(wb, WB_RECLAIMABLE);
2436 inc_wb_stat(wb, WB_DIRTIED);
2437 task_io_account_write(PAGE_SIZE);
2438 current->nr_dirtied++;
2439 this_cpu_inc(bdp_ratelimits);
2440
2441 mem_cgroup_track_foreign_dirty(page, wb);
2442 }
2443}
2444
2445
2446
2447
2448
2449
2450void account_page_cleaned(struct page *page, struct address_space *mapping,
2451 struct bdi_writeback *wb)
2452{
2453 if (mapping_cap_account_dirty(mapping)) {
2454 dec_lruvec_page_state(page, NR_FILE_DIRTY);
2455 dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2456 dec_wb_stat(wb, WB_RECLAIMABLE);
2457 task_io_account_cancelled_write(PAGE_SIZE);
2458 }
2459}
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473int __set_page_dirty_nobuffers(struct page *page)
2474{
2475 lock_page_memcg(page);
2476 if (!TestSetPageDirty(page)) {
2477 struct address_space *mapping = page_mapping(page);
2478 unsigned long flags;
2479
2480 if (!mapping) {
2481 unlock_page_memcg(page);
2482 return 1;
2483 }
2484
2485 xa_lock_irqsave(&mapping->i_pages, flags);
2486 BUG_ON(page_mapping(page) != mapping);
2487 WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
2488 account_page_dirtied(page, mapping);
2489 __xa_set_mark(&mapping->i_pages, page_index(page),
2490 PAGECACHE_TAG_DIRTY);
2491 xa_unlock_irqrestore(&mapping->i_pages, flags);
2492 unlock_page_memcg(page);
2493
2494 if (mapping->host) {
2495
2496 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
2497 }
2498 return 1;
2499 }
2500 unlock_page_memcg(page);
2501 return 0;
2502}
2503EXPORT_SYMBOL(__set_page_dirty_nobuffers);
2504
2505
2506
2507
2508
2509
2510
2511
2512void account_page_redirty(struct page *page)
2513{
2514 struct address_space *mapping = page->mapping;
2515
2516 if (mapping && mapping_cap_account_dirty(mapping)) {
2517 struct inode *inode = mapping->host;
2518 struct bdi_writeback *wb;
2519 struct wb_lock_cookie cookie = {};
2520
2521 wb = unlocked_inode_to_wb_begin(inode, &cookie);
2522 current->nr_dirtied--;
2523 dec_node_page_state(page, NR_DIRTIED);
2524 dec_wb_stat(wb, WB_DIRTIED);
2525 unlocked_inode_to_wb_end(inode, &cookie);
2526 }
2527}
2528EXPORT_SYMBOL(account_page_redirty);
2529
2530
2531
2532
2533
2534
2535int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page)
2536{
2537 int ret;
2538
2539 wbc->pages_skipped++;
2540 ret = __set_page_dirty_nobuffers(page);
2541 account_page_redirty(page);
2542 return ret;
2543}
2544EXPORT_SYMBOL(redirty_page_for_writepage);
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557int set_page_dirty(struct page *page)
2558{
2559 struct address_space *mapping = page_mapping(page);
2560
2561 page = compound_head(page);
2562 if (likely(mapping)) {
2563 int (*spd)(struct page *) = mapping->a_ops->set_page_dirty;
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574 if (PageReclaim(page))
2575 ClearPageReclaim(page);
2576#ifdef CONFIG_BLOCK
2577 if (!spd)
2578 spd = __set_page_dirty_buffers;
2579#endif
2580 return (*spd)(page);
2581 }
2582 if (!PageDirty(page)) {
2583 if (!TestSetPageDirty(page))
2584 return 1;
2585 }
2586 return 0;
2587}
2588EXPORT_SYMBOL(set_page_dirty);
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600int set_page_dirty_lock(struct page *page)
2601{
2602 int ret;
2603
2604 lock_page(page);
2605 ret = set_page_dirty(page);
2606 unlock_page(page);
2607 return ret;
2608}
2609EXPORT_SYMBOL(set_page_dirty_lock);
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624void __cancel_dirty_page(struct page *page)
2625{
2626 struct address_space *mapping = page_mapping(page);
2627
2628 if (mapping_cap_account_dirty(mapping)) {
2629 struct inode *inode = mapping->host;
2630 struct bdi_writeback *wb;
2631 struct wb_lock_cookie cookie = {};
2632
2633 lock_page_memcg(page);
2634 wb = unlocked_inode_to_wb_begin(inode, &cookie);
2635
2636 if (TestClearPageDirty(page))
2637 account_page_cleaned(page, mapping, wb);
2638
2639 unlocked_inode_to_wb_end(inode, &cookie);
2640 unlock_page_memcg(page);
2641 } else {
2642 ClearPageDirty(page);
2643 }
2644}
2645EXPORT_SYMBOL(__cancel_dirty_page);
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661int clear_page_dirty_for_io(struct page *page)
2662{
2663 struct address_space *mapping = page_mapping(page);
2664 int ret = 0;
2665
2666 VM_BUG_ON_PAGE(!PageLocked(page), page);
2667
2668 if (mapping && mapping_cap_account_dirty(mapping)) {
2669 struct inode *inode = mapping->host;
2670 struct bdi_writeback *wb;
2671 struct wb_lock_cookie cookie = {};
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698 if (page_mkclean(page))
2699 set_page_dirty(page);
2700
2701
2702
2703
2704
2705
2706
2707
2708 wb = unlocked_inode_to_wb_begin(inode, &cookie);
2709 if (TestClearPageDirty(page)) {
2710 dec_lruvec_page_state(page, NR_FILE_DIRTY);
2711 dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2712 dec_wb_stat(wb, WB_RECLAIMABLE);
2713 ret = 1;
2714 }
2715 unlocked_inode_to_wb_end(inode, &cookie);
2716 return ret;
2717 }
2718 return TestClearPageDirty(page);
2719}
2720EXPORT_SYMBOL(clear_page_dirty_for_io);
2721
2722int test_clear_page_writeback(struct page *page)
2723{
2724 struct address_space *mapping = page_mapping(page);
2725 struct mem_cgroup *memcg;
2726 struct lruvec *lruvec;
2727 int ret;
2728
2729 memcg = lock_page_memcg(page);
2730 lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
2731 if (mapping && mapping_use_writeback_tags(mapping)) {
2732 struct inode *inode = mapping->host;
2733 struct backing_dev_info *bdi = inode_to_bdi(inode);
2734 unsigned long flags;
2735
2736 xa_lock_irqsave(&mapping->i_pages, flags);
2737 ret = TestClearPageWriteback(page);
2738 if (ret) {
2739 __xa_clear_mark(&mapping->i_pages, page_index(page),
2740 PAGECACHE_TAG_WRITEBACK);
2741 if (bdi_cap_account_writeback(bdi)) {
2742 struct bdi_writeback *wb = inode_to_wb(inode);
2743
2744 dec_wb_stat(wb, WB_WRITEBACK);
2745 __wb_writeout_inc(wb);
2746 }
2747 }
2748
2749 if (mapping->host && !mapping_tagged(mapping,
2750 PAGECACHE_TAG_WRITEBACK))
2751 sb_clear_inode_writeback(mapping->host);
2752
2753 xa_unlock_irqrestore(&mapping->i_pages, flags);
2754 } else {
2755 ret = TestClearPageWriteback(page);
2756 }
2757
2758
2759
2760
2761
2762
2763 if (ret) {
2764 dec_lruvec_state(lruvec, NR_WRITEBACK);
2765 dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2766 inc_node_page_state(page, NR_WRITTEN);
2767 }
2768 __unlock_page_memcg(memcg);
2769 return ret;
2770}
2771
2772int __test_set_page_writeback(struct page *page, bool keep_write)
2773{
2774 struct address_space *mapping = page_mapping(page);
2775 int ret, access_ret;
2776
2777 lock_page_memcg(page);
2778 if (mapping && mapping_use_writeback_tags(mapping)) {
2779 XA_STATE(xas, &mapping->i_pages, page_index(page));
2780 struct inode *inode = mapping->host;
2781 struct backing_dev_info *bdi = inode_to_bdi(inode);
2782 unsigned long flags;
2783
2784 xas_lock_irqsave(&xas, flags);
2785 xas_load(&xas);
2786 ret = TestSetPageWriteback(page);
2787 if (!ret) {
2788 bool on_wblist;
2789
2790 on_wblist = mapping_tagged(mapping,
2791 PAGECACHE_TAG_WRITEBACK);
2792
2793 xas_set_mark(&xas, PAGECACHE_TAG_WRITEBACK);
2794 if (bdi_cap_account_writeback(bdi))
2795 inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
2796
2797
2798
2799
2800
2801
2802 if (mapping->host && !on_wblist)
2803 sb_mark_inode_writeback(mapping->host);
2804 }
2805 if (!PageDirty(page))
2806 xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY);
2807 if (!keep_write)
2808 xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE);
2809 xas_unlock_irqrestore(&xas, flags);
2810 } else {
2811 ret = TestSetPageWriteback(page);
2812 }
2813 if (!ret) {
2814 inc_lruvec_page_state(page, NR_WRITEBACK);
2815 inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2816 }
2817 unlock_page_memcg(page);
2818 access_ret = arch_make_page_accessible(page);
2819
2820
2821
2822
2823 VM_BUG_ON_PAGE(access_ret != 0, page);
2824
2825 return ret;
2826
2827}
2828EXPORT_SYMBOL(__test_set_page_writeback);
2829
2830
2831
2832
2833void wait_on_page_writeback(struct page *page)
2834{
2835 if (PageWriteback(page)) {
2836 trace_wait_on_page_writeback(page, page_mapping(page));
2837 wait_on_page_bit(page, PG_writeback);
2838 }
2839}
2840EXPORT_SYMBOL_GPL(wait_on_page_writeback);
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850void wait_for_stable_page(struct page *page)
2851{
2852 if (bdi_cap_stable_pages_required(inode_to_bdi(page->mapping->host)))
2853 wait_on_page_writeback(page);
2854}
2855EXPORT_SYMBOL_GPL(wait_for_stable_page);
2856