1
2
3
4
5
6
7
8
9
10
11
12
13
14
15#include <linux/kernel.h>
16#include <linux/export.h>
17#include <linux/spinlock.h>
18#include <linux/fs.h>
19#include <linux/mm.h>
20#include <linux/swap.h>
21#include <linux/slab.h>
22#include <linux/pagemap.h>
23#include <linux/writeback.h>
24#include <linux/init.h>
25#include <linux/backing-dev.h>
26#include <linux/task_io_accounting_ops.h>
27#include <linux/blkdev.h>
28#include <linux/mpage.h>
29#include <linux/rmap.h>
30#include <linux/percpu.h>
31#include <linux/smp.h>
32#include <linux/sysctl.h>
33#include <linux/cpu.h>
34#include <linux/syscalls.h>
35#include <linux/pagevec.h>
36#include <linux/timer.h>
37#include <linux/sched/rt.h>
38#include <linux/sched/signal.h>
39#include <linux/mm_inline.h>
40#include <trace/events/writeback.h>
41
42#include "internal.h"
43
44
45
46
47#define MAX_PAUSE max(HZ/5, 1)
48
49
50
51
52
53#define DIRTY_POLL_THRESH (128 >> (PAGE_SHIFT - 10))
54
55
56
57
58#define BANDWIDTH_INTERVAL max(HZ/5, 1)
59
60#define RATELIMIT_CALC_SHIFT 10
61
62
63
64
65
66static long ratelimit_pages = 32;
67
68
69
70
71
72
73int dirty_background_ratio = 10;
74
75
76
77
78
79unsigned long dirty_background_bytes;
80
81
82
83
84
85int vm_highmem_is_dirtyable;
86
87
88
89
90int vm_dirty_ratio = 20;
91
92
93
94
95
96unsigned long vm_dirty_bytes;
97
98
99
100
101unsigned int dirty_writeback_interval = 5 * 100;
102
103EXPORT_SYMBOL_GPL(dirty_writeback_interval);
104
105
106
107
108unsigned int dirty_expire_interval = 30 * 100;
109
110
111
112
113
114int laptop_mode;
115
116EXPORT_SYMBOL(laptop_mode);
117
118
119
120struct wb_domain global_wb_domain;
121
122
123struct dirty_throttle_control {
124#ifdef CONFIG_CGROUP_WRITEBACK
125 struct wb_domain *dom;
126 struct dirty_throttle_control *gdtc;
127#endif
128 struct bdi_writeback *wb;
129 struct fprop_local_percpu *wb_completions;
130
131 unsigned long avail;
132 unsigned long dirty;
133 unsigned long thresh;
134 unsigned long bg_thresh;
135
136 unsigned long wb_dirty;
137 unsigned long wb_thresh;
138 unsigned long wb_bg_thresh;
139
140 unsigned long pos_ratio;
141};
142
143
144
145
146
147
148#define VM_COMPLETIONS_PERIOD_LEN (3*HZ)
149
150#ifdef CONFIG_CGROUP_WRITEBACK
151
152#define GDTC_INIT(__wb) .wb = (__wb), \
153 .dom = &global_wb_domain, \
154 .wb_completions = &(__wb)->completions
155
156#define GDTC_INIT_NO_WB .dom = &global_wb_domain
157
158#define MDTC_INIT(__wb, __gdtc) .wb = (__wb), \
159 .dom = mem_cgroup_wb_domain(__wb), \
160 .wb_completions = &(__wb)->memcg_completions, \
161 .gdtc = __gdtc
162
163static bool mdtc_valid(struct dirty_throttle_control *dtc)
164{
165 return dtc->dom;
166}
167
168static struct wb_domain *dtc_dom(struct dirty_throttle_control *dtc)
169{
170 return dtc->dom;
171}
172
173static struct dirty_throttle_control *mdtc_gdtc(struct dirty_throttle_control *mdtc)
174{
175 return mdtc->gdtc;
176}
177
178static struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb)
179{
180 return &wb->memcg_completions;
181}
182
183static void wb_min_max_ratio(struct bdi_writeback *wb,
184 unsigned long *minp, unsigned long *maxp)
185{
186 unsigned long this_bw = wb->avg_write_bandwidth;
187 unsigned long tot_bw = atomic_long_read(&wb->bdi->tot_write_bandwidth);
188 unsigned long long min = wb->bdi->min_ratio;
189 unsigned long long max = wb->bdi->max_ratio;
190
191
192
193
194
195 if (this_bw < tot_bw) {
196 if (min) {
197 min *= this_bw;
198 min = div64_ul(min, tot_bw);
199 }
200 if (max < 100) {
201 max *= this_bw;
202 max = div64_ul(max, tot_bw);
203 }
204 }
205
206 *minp = min;
207 *maxp = max;
208}
209
210#else
211
212#define GDTC_INIT(__wb) .wb = (__wb), \
213 .wb_completions = &(__wb)->completions
214#define GDTC_INIT_NO_WB
215#define MDTC_INIT(__wb, __gdtc)
216
217static bool mdtc_valid(struct dirty_throttle_control *dtc)
218{
219 return false;
220}
221
222static struct wb_domain *dtc_dom(struct dirty_throttle_control *dtc)
223{
224 return &global_wb_domain;
225}
226
227static struct dirty_throttle_control *mdtc_gdtc(struct dirty_throttle_control *mdtc)
228{
229 return NULL;
230}
231
232static struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb)
233{
234 return NULL;
235}
236
237static void wb_min_max_ratio(struct bdi_writeback *wb,
238 unsigned long *minp, unsigned long *maxp)
239{
240 *minp = wb->bdi->min_ratio;
241 *maxp = wb->bdi->max_ratio;
242}
243
244#endif
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271static unsigned long node_dirtyable_memory(struct pglist_data *pgdat)
272{
273 unsigned long nr_pages = 0;
274 int z;
275
276 for (z = 0; z < MAX_NR_ZONES; z++) {
277 struct zone *zone = pgdat->node_zones + z;
278
279 if (!populated_zone(zone))
280 continue;
281
282 nr_pages += zone_page_state(zone, NR_FREE_PAGES);
283 }
284
285
286
287
288
289
290 nr_pages -= min(nr_pages, pgdat->totalreserve_pages);
291
292 nr_pages += node_page_state(pgdat, NR_INACTIVE_FILE);
293 nr_pages += node_page_state(pgdat, NR_ACTIVE_FILE);
294
295 return nr_pages;
296}
297
298static unsigned long highmem_dirtyable_memory(unsigned long total)
299{
300#ifdef CONFIG_HIGHMEM
301 int node;
302 unsigned long x = 0;
303 int i;
304
305 for_each_node_state(node, N_HIGH_MEMORY) {
306 for (i = ZONE_NORMAL + 1; i < MAX_NR_ZONES; i++) {
307 struct zone *z;
308 unsigned long nr_pages;
309
310 if (!is_highmem_idx(i))
311 continue;
312
313 z = &NODE_DATA(node)->node_zones[i];
314 if (!populated_zone(z))
315 continue;
316
317 nr_pages = zone_page_state(z, NR_FREE_PAGES);
318
319 nr_pages -= min(nr_pages, high_wmark_pages(z));
320 nr_pages += zone_page_state(z, NR_ZONE_INACTIVE_FILE);
321 nr_pages += zone_page_state(z, NR_ZONE_ACTIVE_FILE);
322 x += nr_pages;
323 }
324 }
325
326
327
328
329
330
331
332
333
334
335 if ((long)x < 0)
336 x = 0;
337
338
339
340
341
342
343
344 return min(x, total);
345#else
346 return 0;
347#endif
348}
349
350
351
352
353
354
355
356static unsigned long global_dirtyable_memory(void)
357{
358 unsigned long x;
359
360 x = global_zone_page_state(NR_FREE_PAGES);
361
362
363
364
365
366 x -= min(x, totalreserve_pages);
367
368 x += global_node_page_state(NR_INACTIVE_FILE);
369 x += global_node_page_state(NR_ACTIVE_FILE);
370
371 if (!vm_highmem_is_dirtyable)
372 x -= highmem_dirtyable_memory(x);
373
374 return x + 1;
375}
376
377
378
379
380
381
382
383
384
385
386static void domain_dirty_limits(struct dirty_throttle_control *dtc)
387{
388 const unsigned long available_memory = dtc->avail;
389 struct dirty_throttle_control *gdtc = mdtc_gdtc(dtc);
390 unsigned long bytes = vm_dirty_bytes;
391 unsigned long bg_bytes = dirty_background_bytes;
392
393 unsigned long ratio = (vm_dirty_ratio * PAGE_SIZE) / 100;
394 unsigned long bg_ratio = (dirty_background_ratio * PAGE_SIZE) / 100;
395 unsigned long thresh;
396 unsigned long bg_thresh;
397 struct task_struct *tsk;
398
399
400 if (gdtc) {
401 unsigned long global_avail = gdtc->avail;
402
403
404
405
406
407
408
409
410 if (bytes)
411 ratio = min(DIV_ROUND_UP(bytes, global_avail),
412 PAGE_SIZE);
413 if (bg_bytes)
414 bg_ratio = min(DIV_ROUND_UP(bg_bytes, global_avail),
415 PAGE_SIZE);
416 bytes = bg_bytes = 0;
417 }
418
419 if (bytes)
420 thresh = DIV_ROUND_UP(bytes, PAGE_SIZE);
421 else
422 thresh = (ratio * available_memory) / PAGE_SIZE;
423
424 if (bg_bytes)
425 bg_thresh = DIV_ROUND_UP(bg_bytes, PAGE_SIZE);
426 else
427 bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE;
428
429 if (bg_thresh >= thresh)
430 bg_thresh = thresh / 2;
431 tsk = current;
432 if (rt_task(tsk)) {
433 bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32;
434 thresh += thresh / 4 + global_wb_domain.dirty_limit / 32;
435 }
436 dtc->thresh = thresh;
437 dtc->bg_thresh = bg_thresh;
438
439
440 if (!gdtc)
441 trace_global_dirty_state(bg_thresh, thresh);
442}
443
444
445
446
447
448
449
450
451
452void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
453{
454 struct dirty_throttle_control gdtc = { GDTC_INIT_NO_WB };
455
456 gdtc.avail = global_dirtyable_memory();
457 domain_dirty_limits(&gdtc);
458
459 *pbackground = gdtc.bg_thresh;
460 *pdirty = gdtc.thresh;
461}
462
463
464
465
466
467
468
469
470static unsigned long node_dirty_limit(struct pglist_data *pgdat)
471{
472 unsigned long node_memory = node_dirtyable_memory(pgdat);
473 struct task_struct *tsk = current;
474 unsigned long dirty;
475
476 if (vm_dirty_bytes)
477 dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE) *
478 node_memory / global_dirtyable_memory();
479 else
480 dirty = vm_dirty_ratio * node_memory / 100;
481
482 if (rt_task(tsk))
483 dirty += dirty / 4;
484
485 return dirty;
486}
487
488
489
490
491
492
493
494
495bool node_dirty_ok(struct pglist_data *pgdat)
496{
497 unsigned long limit = node_dirty_limit(pgdat);
498 unsigned long nr_pages = 0;
499
500 nr_pages += node_page_state(pgdat, NR_FILE_DIRTY);
501 nr_pages += node_page_state(pgdat, NR_WRITEBACK);
502
503 return nr_pages <= limit;
504}
505
506int dirty_background_ratio_handler(struct ctl_table *table, int write,
507 void *buffer, size_t *lenp, loff_t *ppos)
508{
509 int ret;
510
511 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
512 if (ret == 0 && write)
513 dirty_background_bytes = 0;
514 return ret;
515}
516
517int dirty_background_bytes_handler(struct ctl_table *table, int write,
518 void *buffer, size_t *lenp, loff_t *ppos)
519{
520 int ret;
521
522 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
523 if (ret == 0 && write)
524 dirty_background_ratio = 0;
525 return ret;
526}
527
528int dirty_ratio_handler(struct ctl_table *table, int write, void *buffer,
529 size_t *lenp, loff_t *ppos)
530{
531 int old_ratio = vm_dirty_ratio;
532 int ret;
533
534 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
535 if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
536 writeback_set_ratelimit();
537 vm_dirty_bytes = 0;
538 }
539 return ret;
540}
541
542int dirty_bytes_handler(struct ctl_table *table, int write,
543 void *buffer, size_t *lenp, loff_t *ppos)
544{
545 unsigned long old_bytes = vm_dirty_bytes;
546 int ret;
547
548 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
549 if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
550 writeback_set_ratelimit();
551 vm_dirty_ratio = 0;
552 }
553 return ret;
554}
555
556static unsigned long wp_next_time(unsigned long cur_time)
557{
558 cur_time += VM_COMPLETIONS_PERIOD_LEN;
559
560 if (!cur_time)
561 return 1;
562 return cur_time;
563}
564
565static void wb_domain_writeout_inc(struct wb_domain *dom,
566 struct fprop_local_percpu *completions,
567 unsigned int max_prop_frac)
568{
569 __fprop_inc_percpu_max(&dom->completions, completions,
570 max_prop_frac);
571
572 if (unlikely(!dom->period_time)) {
573
574
575
576
577
578
579 dom->period_time = wp_next_time(jiffies);
580 mod_timer(&dom->period_timer, dom->period_time);
581 }
582}
583
584
585
586
587
588static inline void __wb_writeout_inc(struct bdi_writeback *wb)
589{
590 struct wb_domain *cgdom;
591
592 inc_wb_stat(wb, WB_WRITTEN);
593 wb_domain_writeout_inc(&global_wb_domain, &wb->completions,
594 wb->bdi->max_prop_frac);
595
596 cgdom = mem_cgroup_wb_domain(wb);
597 if (cgdom)
598 wb_domain_writeout_inc(cgdom, wb_memcg_completions(wb),
599 wb->bdi->max_prop_frac);
600}
601
602void wb_writeout_inc(struct bdi_writeback *wb)
603{
604 unsigned long flags;
605
606 local_irq_save(flags);
607 __wb_writeout_inc(wb);
608 local_irq_restore(flags);
609}
610EXPORT_SYMBOL_GPL(wb_writeout_inc);
611
612
613
614
615
616static void writeout_period(struct timer_list *t)
617{
618 struct wb_domain *dom = from_timer(dom, t, period_timer);
619 int miss_periods = (jiffies - dom->period_time) /
620 VM_COMPLETIONS_PERIOD_LEN;
621
622 if (fprop_new_period(&dom->completions, miss_periods + 1)) {
623 dom->period_time = wp_next_time(dom->period_time +
624 miss_periods * VM_COMPLETIONS_PERIOD_LEN);
625 mod_timer(&dom->period_timer, dom->period_time);
626 } else {
627
628
629
630
631 dom->period_time = 0;
632 }
633}
634
635int wb_domain_init(struct wb_domain *dom, gfp_t gfp)
636{
637 memset(dom, 0, sizeof(*dom));
638
639 spin_lock_init(&dom->lock);
640
641 timer_setup(&dom->period_timer, writeout_period, TIMER_DEFERRABLE);
642
643 dom->dirty_limit_tstamp = jiffies;
644
645 return fprop_global_init(&dom->completions, gfp);
646}
647
648#ifdef CONFIG_CGROUP_WRITEBACK
649void wb_domain_exit(struct wb_domain *dom)
650{
651 del_timer_sync(&dom->period_timer);
652 fprop_global_destroy(&dom->completions);
653}
654#endif
655
656
657
658
659
660
661static unsigned int bdi_min_ratio;
662
663int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
664{
665 int ret = 0;
666
667 spin_lock_bh(&bdi_lock);
668 if (min_ratio > bdi->max_ratio) {
669 ret = -EINVAL;
670 } else {
671 min_ratio -= bdi->min_ratio;
672 if (bdi_min_ratio + min_ratio < 100) {
673 bdi_min_ratio += min_ratio;
674 bdi->min_ratio += min_ratio;
675 } else {
676 ret = -EINVAL;
677 }
678 }
679 spin_unlock_bh(&bdi_lock);
680
681 return ret;
682}
683
684int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
685{
686 int ret = 0;
687
688 if (max_ratio > 100)
689 return -EINVAL;
690
691 spin_lock_bh(&bdi_lock);
692 if (bdi->min_ratio > max_ratio) {
693 ret = -EINVAL;
694 } else {
695 bdi->max_ratio = max_ratio;
696 bdi->max_prop_frac = (FPROP_FRAC_BASE * max_ratio) / 100;
697 }
698 spin_unlock_bh(&bdi_lock);
699
700 return ret;
701}
702EXPORT_SYMBOL(bdi_set_max_ratio);
703
704static unsigned long dirty_freerun_ceiling(unsigned long thresh,
705 unsigned long bg_thresh)
706{
707 return (thresh + bg_thresh) / 2;
708}
709
710static unsigned long hard_dirty_limit(struct wb_domain *dom,
711 unsigned long thresh)
712{
713 return max(thresh, dom->dirty_limit);
714}
715
716
717
718
719
720static void mdtc_calc_avail(struct dirty_throttle_control *mdtc,
721 unsigned long filepages, unsigned long headroom)
722{
723 struct dirty_throttle_control *gdtc = mdtc_gdtc(mdtc);
724 unsigned long clean = filepages - min(filepages, mdtc->dirty);
725 unsigned long global_clean = gdtc->avail - min(gdtc->avail, gdtc->dirty);
726 unsigned long other_clean = global_clean - min(global_clean, clean);
727
728 mdtc->avail = filepages + min(headroom, other_clean);
729}
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc)
753{
754 struct wb_domain *dom = dtc_dom(dtc);
755 unsigned long thresh = dtc->thresh;
756 u64 wb_thresh;
757 unsigned long numerator, denominator;
758 unsigned long wb_min_ratio, wb_max_ratio;
759
760
761
762
763 fprop_fraction_percpu(&dom->completions, dtc->wb_completions,
764 &numerator, &denominator);
765
766 wb_thresh = (thresh * (100 - bdi_min_ratio)) / 100;
767 wb_thresh *= numerator;
768 wb_thresh = div64_ul(wb_thresh, denominator);
769
770 wb_min_max_ratio(dtc->wb, &wb_min_ratio, &wb_max_ratio);
771
772 wb_thresh += (thresh * wb_min_ratio) / 100;
773 if (wb_thresh > (thresh * wb_max_ratio) / 100)
774 wb_thresh = thresh * wb_max_ratio / 100;
775
776 return wb_thresh;
777}
778
779unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh)
780{
781 struct dirty_throttle_control gdtc = { GDTC_INIT(wb),
782 .thresh = thresh };
783 return __wb_calc_thresh(&gdtc);
784}
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800static long long pos_ratio_polynom(unsigned long setpoint,
801 unsigned long dirty,
802 unsigned long limit)
803{
804 long long pos_ratio;
805 long x;
806
807 x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
808 (limit - setpoint) | 1);
809 pos_ratio = x;
810 pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
811 pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
812 pos_ratio += 1 << RATELIMIT_CALC_SHIFT;
813
814 return clamp(pos_ratio, 0LL, 2LL << RATELIMIT_CALC_SHIFT);
815}
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892static void wb_position_ratio(struct dirty_throttle_control *dtc)
893{
894 struct bdi_writeback *wb = dtc->wb;
895 unsigned long write_bw = wb->avg_write_bandwidth;
896 unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh);
897 unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
898 unsigned long wb_thresh = dtc->wb_thresh;
899 unsigned long x_intercept;
900 unsigned long setpoint;
901 unsigned long wb_setpoint;
902 unsigned long span;
903 long long pos_ratio;
904 long x;
905
906 dtc->pos_ratio = 0;
907
908 if (unlikely(dtc->dirty >= limit))
909 return;
910
911
912
913
914
915
916 setpoint = (freerun + limit) / 2;
917 pos_ratio = pos_ratio_polynom(setpoint, dtc->dirty, limit);
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944 if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
945 long long wb_pos_ratio;
946
947 if (dtc->wb_dirty < 8) {
948 dtc->pos_ratio = min_t(long long, pos_ratio * 2,
949 2 << RATELIMIT_CALC_SHIFT);
950 return;
951 }
952
953 if (dtc->wb_dirty >= wb_thresh)
954 return;
955
956 wb_setpoint = dirty_freerun_ceiling(wb_thresh,
957 dtc->wb_bg_thresh);
958
959 if (wb_setpoint == 0 || wb_setpoint == wb_thresh)
960 return;
961
962 wb_pos_ratio = pos_ratio_polynom(wb_setpoint, dtc->wb_dirty,
963 wb_thresh);
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986 dtc->pos_ratio = min(pos_ratio, wb_pos_ratio);
987 return;
988 }
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021 if (unlikely(wb_thresh > dtc->thresh))
1022 wb_thresh = dtc->thresh;
1023
1024
1025
1026
1027
1028
1029
1030 wb_thresh = max(wb_thresh, (limit - dtc->dirty) / 8);
1031
1032
1033
1034
1035 x = div_u64((u64)wb_thresh << 16, dtc->thresh | 1);
1036 wb_setpoint = setpoint * (u64)x >> 16;
1037
1038
1039
1040
1041
1042
1043
1044
1045 span = (dtc->thresh - wb_thresh + 8 * write_bw) * (u64)x >> 16;
1046 x_intercept = wb_setpoint + span;
1047
1048 if (dtc->wb_dirty < x_intercept - span / 4) {
1049 pos_ratio = div64_u64(pos_ratio * (x_intercept - dtc->wb_dirty),
1050 (x_intercept - wb_setpoint) | 1);
1051 } else
1052 pos_ratio /= 4;
1053
1054
1055
1056
1057
1058
1059 x_intercept = wb_thresh / 2;
1060 if (dtc->wb_dirty < x_intercept) {
1061 if (dtc->wb_dirty > x_intercept / 8)
1062 pos_ratio = div_u64(pos_ratio * x_intercept,
1063 dtc->wb_dirty);
1064 else
1065 pos_ratio *= 8;
1066 }
1067
1068 dtc->pos_ratio = pos_ratio;
1069}
1070
1071static void wb_update_write_bandwidth(struct bdi_writeback *wb,
1072 unsigned long elapsed,
1073 unsigned long written)
1074{
1075 const unsigned long period = roundup_pow_of_two(3 * HZ);
1076 unsigned long avg = wb->avg_write_bandwidth;
1077 unsigned long old = wb->write_bandwidth;
1078 u64 bw;
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090 bw = written - min(written, wb->written_stamp);
1091 bw *= HZ;
1092 if (unlikely(elapsed > period)) {
1093 bw = div64_ul(bw, elapsed);
1094 avg = bw;
1095 goto out;
1096 }
1097 bw += (u64)wb->write_bandwidth * (period - elapsed);
1098 bw >>= ilog2(period);
1099
1100
1101
1102
1103 if (avg > old && old >= (unsigned long)bw)
1104 avg -= (avg - old) >> 3;
1105
1106 if (avg < old && old <= (unsigned long)bw)
1107 avg += (old - avg) >> 3;
1108
1109out:
1110
1111 avg = max(avg, 1LU);
1112 if (wb_has_dirty_io(wb)) {
1113 long delta = avg - wb->avg_write_bandwidth;
1114 WARN_ON_ONCE(atomic_long_add_return(delta,
1115 &wb->bdi->tot_write_bandwidth) <= 0);
1116 }
1117 wb->write_bandwidth = bw;
1118 wb->avg_write_bandwidth = avg;
1119}
1120
1121static void update_dirty_limit(struct dirty_throttle_control *dtc)
1122{
1123 struct wb_domain *dom = dtc_dom(dtc);
1124 unsigned long thresh = dtc->thresh;
1125 unsigned long limit = dom->dirty_limit;
1126
1127
1128
1129
1130 if (limit < thresh) {
1131 limit = thresh;
1132 goto update;
1133 }
1134
1135
1136
1137
1138
1139
1140 thresh = max(thresh, dtc->dirty);
1141 if (limit > thresh) {
1142 limit -= (limit - thresh) >> 5;
1143 goto update;
1144 }
1145 return;
1146update:
1147 dom->dirty_limit = limit;
1148}
1149
1150static void domain_update_bandwidth(struct dirty_throttle_control *dtc,
1151 unsigned long now)
1152{
1153 struct wb_domain *dom = dtc_dom(dtc);
1154
1155
1156
1157
1158 if (time_before(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL))
1159 return;
1160
1161 spin_lock(&dom->lock);
1162 if (time_after_eq(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL)) {
1163 update_dirty_limit(dtc);
1164 dom->dirty_limit_tstamp = now;
1165 }
1166 spin_unlock(&dom->lock);
1167}
1168
1169
1170
1171
1172
1173
1174
1175static void wb_update_dirty_ratelimit(struct dirty_throttle_control *dtc,
1176 unsigned long dirtied,
1177 unsigned long elapsed)
1178{
1179 struct bdi_writeback *wb = dtc->wb;
1180 unsigned long dirty = dtc->dirty;
1181 unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh);
1182 unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
1183 unsigned long setpoint = (freerun + limit) / 2;
1184 unsigned long write_bw = wb->avg_write_bandwidth;
1185 unsigned long dirty_ratelimit = wb->dirty_ratelimit;
1186 unsigned long dirty_rate;
1187 unsigned long task_ratelimit;
1188 unsigned long balanced_dirty_ratelimit;
1189 unsigned long step;
1190 unsigned long x;
1191 unsigned long shift;
1192
1193
1194
1195
1196
1197 dirty_rate = (dirtied - wb->dirtied_stamp) * HZ / elapsed;
1198
1199
1200
1201
1202 task_ratelimit = (u64)dirty_ratelimit *
1203 dtc->pos_ratio >> RATELIMIT_CALC_SHIFT;
1204 task_ratelimit++;
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236 balanced_dirty_ratelimit = div_u64((u64)task_ratelimit * write_bw,
1237 dirty_rate | 1);
1238
1239
1240
1241 if (unlikely(balanced_dirty_ratelimit > write_bw))
1242 balanced_dirty_ratelimit = write_bw;
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278 step = 0;
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291 if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
1292 dirty = dtc->wb_dirty;
1293 if (dtc->wb_dirty < 8)
1294 setpoint = dtc->wb_dirty + 1;
1295 else
1296 setpoint = (dtc->wb_thresh + dtc->wb_bg_thresh) / 2;
1297 }
1298
1299 if (dirty < setpoint) {
1300 x = min3(wb->balanced_dirty_ratelimit,
1301 balanced_dirty_ratelimit, task_ratelimit);
1302 if (dirty_ratelimit < x)
1303 step = x - dirty_ratelimit;
1304 } else {
1305 x = max3(wb->balanced_dirty_ratelimit,
1306 balanced_dirty_ratelimit, task_ratelimit);
1307 if (dirty_ratelimit > x)
1308 step = dirty_ratelimit - x;
1309 }
1310
1311
1312
1313
1314
1315
1316 shift = dirty_ratelimit / (2 * step + 1);
1317 if (shift < BITS_PER_LONG)
1318 step = DIV_ROUND_UP(step >> shift, 8);
1319 else
1320 step = 0;
1321
1322 if (dirty_ratelimit < balanced_dirty_ratelimit)
1323 dirty_ratelimit += step;
1324 else
1325 dirty_ratelimit -= step;
1326
1327 wb->dirty_ratelimit = max(dirty_ratelimit, 1UL);
1328 wb->balanced_dirty_ratelimit = balanced_dirty_ratelimit;
1329
1330 trace_bdi_dirty_ratelimit(wb, dirty_rate, task_ratelimit);
1331}
1332
1333static void __wb_update_bandwidth(struct dirty_throttle_control *gdtc,
1334 struct dirty_throttle_control *mdtc,
1335 unsigned long start_time,
1336 bool update_ratelimit)
1337{
1338 struct bdi_writeback *wb = gdtc->wb;
1339 unsigned long now = jiffies;
1340 unsigned long elapsed = now - wb->bw_time_stamp;
1341 unsigned long dirtied;
1342 unsigned long written;
1343
1344 lockdep_assert_held(&wb->list_lock);
1345
1346
1347
1348
1349 if (elapsed < BANDWIDTH_INTERVAL)
1350 return;
1351
1352 dirtied = percpu_counter_read(&wb->stat[WB_DIRTIED]);
1353 written = percpu_counter_read(&wb->stat[WB_WRITTEN]);
1354
1355
1356
1357
1358
1359 if (elapsed > HZ && time_before(wb->bw_time_stamp, start_time))
1360 goto snapshot;
1361
1362 if (update_ratelimit) {
1363 domain_update_bandwidth(gdtc, now);
1364 wb_update_dirty_ratelimit(gdtc, dirtied, elapsed);
1365
1366
1367
1368
1369
1370 if (IS_ENABLED(CONFIG_CGROUP_WRITEBACK) && mdtc) {
1371 domain_update_bandwidth(mdtc, now);
1372 wb_update_dirty_ratelimit(mdtc, dirtied, elapsed);
1373 }
1374 }
1375 wb_update_write_bandwidth(wb, elapsed, written);
1376
1377snapshot:
1378 wb->dirtied_stamp = dirtied;
1379 wb->written_stamp = written;
1380 wb->bw_time_stamp = now;
1381}
1382
1383void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time)
1384{
1385 struct dirty_throttle_control gdtc = { GDTC_INIT(wb) };
1386
1387 __wb_update_bandwidth(&gdtc, NULL, start_time, false);
1388}
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398static unsigned long dirty_poll_interval(unsigned long dirty,
1399 unsigned long thresh)
1400{
1401 if (thresh > dirty)
1402 return 1UL << (ilog2(thresh - dirty) >> 1);
1403
1404 return 1;
1405}
1406
1407static unsigned long wb_max_pause(struct bdi_writeback *wb,
1408 unsigned long wb_dirty)
1409{
1410 unsigned long bw = wb->avg_write_bandwidth;
1411 unsigned long t;
1412
1413
1414
1415
1416
1417
1418
1419
1420 t = wb_dirty / (1 + bw / roundup_pow_of_two(1 + HZ / 8));
1421 t++;
1422
1423 return min_t(unsigned long, t, MAX_PAUSE);
1424}
1425
1426static long wb_min_pause(struct bdi_writeback *wb,
1427 long max_pause,
1428 unsigned long task_ratelimit,
1429 unsigned long dirty_ratelimit,
1430 int *nr_dirtied_pause)
1431{
1432 long hi = ilog2(wb->avg_write_bandwidth);
1433 long lo = ilog2(wb->dirty_ratelimit);
1434 long t;
1435 long pause;
1436 int pages;
1437
1438
1439 t = max(1, HZ / 100);
1440
1441
1442
1443
1444
1445
1446
1447 if (hi > lo)
1448 t += (hi - lo) * (10 * HZ) / 1024;
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468 t = min(t, 1 + max_pause / 2);
1469 pages = dirty_ratelimit * t / roundup_pow_of_two(HZ);
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479 if (pages < DIRTY_POLL_THRESH) {
1480 t = max_pause;
1481 pages = dirty_ratelimit * t / roundup_pow_of_two(HZ);
1482 if (pages > DIRTY_POLL_THRESH) {
1483 pages = DIRTY_POLL_THRESH;
1484 t = HZ * DIRTY_POLL_THRESH / dirty_ratelimit;
1485 }
1486 }
1487
1488 pause = HZ * pages / (task_ratelimit + 1);
1489 if (pause > max_pause) {
1490 t = max_pause;
1491 pages = task_ratelimit * t / roundup_pow_of_two(HZ);
1492 }
1493
1494 *nr_dirtied_pause = pages;
1495
1496
1497
1498 return pages >= DIRTY_POLL_THRESH ? 1 + t / 2 : t;
1499}
1500
1501static inline void wb_dirty_limits(struct dirty_throttle_control *dtc)
1502{
1503 struct bdi_writeback *wb = dtc->wb;
1504 unsigned long wb_reclaimable;
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519 dtc->wb_thresh = __wb_calc_thresh(dtc);
1520 dtc->wb_bg_thresh = dtc->thresh ?
1521 div_u64((u64)dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0;
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533 if (dtc->wb_thresh < 2 * wb_stat_error()) {
1534 wb_reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE);
1535 dtc->wb_dirty = wb_reclaimable + wb_stat_sum(wb, WB_WRITEBACK);
1536 } else {
1537 wb_reclaimable = wb_stat(wb, WB_RECLAIMABLE);
1538 dtc->wb_dirty = wb_reclaimable + wb_stat(wb, WB_WRITEBACK);
1539 }
1540}
1541
1542
1543
1544
1545
1546
1547
1548
1549static void balance_dirty_pages(struct bdi_writeback *wb,
1550 unsigned long pages_dirtied)
1551{
1552 struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) };
1553 struct dirty_throttle_control mdtc_stor = { MDTC_INIT(wb, &gdtc_stor) };
1554 struct dirty_throttle_control * const gdtc = &gdtc_stor;
1555 struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
1556 &mdtc_stor : NULL;
1557 struct dirty_throttle_control *sdtc;
1558 unsigned long nr_reclaimable;
1559 long period;
1560 long pause;
1561 long max_pause;
1562 long min_pause;
1563 int nr_dirtied_pause;
1564 bool dirty_exceeded = false;
1565 unsigned long task_ratelimit;
1566 unsigned long dirty_ratelimit;
1567 struct backing_dev_info *bdi = wb->bdi;
1568 bool strictlimit = bdi->capabilities & BDI_CAP_STRICTLIMIT;
1569 unsigned long start_time = jiffies;
1570
1571 for (;;) {
1572 unsigned long now = jiffies;
1573 unsigned long dirty, thresh, bg_thresh;
1574 unsigned long m_dirty = 0;
1575 unsigned long m_thresh = 0;
1576 unsigned long m_bg_thresh = 0;
1577
1578 nr_reclaimable = global_node_page_state(NR_FILE_DIRTY);
1579 gdtc->avail = global_dirtyable_memory();
1580 gdtc->dirty = nr_reclaimable + global_node_page_state(NR_WRITEBACK);
1581
1582 domain_dirty_limits(gdtc);
1583
1584 if (unlikely(strictlimit)) {
1585 wb_dirty_limits(gdtc);
1586
1587 dirty = gdtc->wb_dirty;
1588 thresh = gdtc->wb_thresh;
1589 bg_thresh = gdtc->wb_bg_thresh;
1590 } else {
1591 dirty = gdtc->dirty;
1592 thresh = gdtc->thresh;
1593 bg_thresh = gdtc->bg_thresh;
1594 }
1595
1596 if (mdtc) {
1597 unsigned long filepages, headroom, writeback;
1598
1599
1600
1601
1602
1603 mem_cgroup_wb_stats(wb, &filepages, &headroom,
1604 &mdtc->dirty, &writeback);
1605 mdtc->dirty += writeback;
1606 mdtc_calc_avail(mdtc, filepages, headroom);
1607
1608 domain_dirty_limits(mdtc);
1609
1610 if (unlikely(strictlimit)) {
1611 wb_dirty_limits(mdtc);
1612 m_dirty = mdtc->wb_dirty;
1613 m_thresh = mdtc->wb_thresh;
1614 m_bg_thresh = mdtc->wb_bg_thresh;
1615 } else {
1616 m_dirty = mdtc->dirty;
1617 m_thresh = mdtc->thresh;
1618 m_bg_thresh = mdtc->bg_thresh;
1619 }
1620 }
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634 if (dirty <= dirty_freerun_ceiling(thresh, bg_thresh) &&
1635 (!mdtc ||
1636 m_dirty <= dirty_freerun_ceiling(m_thresh, m_bg_thresh))) {
1637 unsigned long intv;
1638 unsigned long m_intv;
1639
1640free_running:
1641 intv = dirty_poll_interval(dirty, thresh);
1642 m_intv = ULONG_MAX;
1643
1644 current->dirty_paused_when = now;
1645 current->nr_dirtied = 0;
1646 if (mdtc)
1647 m_intv = dirty_poll_interval(m_dirty, m_thresh);
1648 current->nr_dirtied_pause = min(intv, m_intv);
1649 break;
1650 }
1651
1652 if (unlikely(!writeback_in_progress(wb)))
1653 wb_start_background_writeback(wb);
1654
1655 mem_cgroup_flush_foreign(wb);
1656
1657
1658
1659
1660
1661 if (!strictlimit) {
1662 wb_dirty_limits(gdtc);
1663
1664 if ((current->flags & PF_LOCAL_THROTTLE) &&
1665 gdtc->wb_dirty <
1666 dirty_freerun_ceiling(gdtc->wb_thresh,
1667 gdtc->wb_bg_thresh))
1668
1669
1670
1671
1672 goto free_running;
1673 }
1674
1675 dirty_exceeded = (gdtc->wb_dirty > gdtc->wb_thresh) &&
1676 ((gdtc->dirty > gdtc->thresh) || strictlimit);
1677
1678 wb_position_ratio(gdtc);
1679 sdtc = gdtc;
1680
1681 if (mdtc) {
1682
1683
1684
1685
1686
1687
1688 if (!strictlimit) {
1689 wb_dirty_limits(mdtc);
1690
1691 if ((current->flags & PF_LOCAL_THROTTLE) &&
1692 mdtc->wb_dirty <
1693 dirty_freerun_ceiling(mdtc->wb_thresh,
1694 mdtc->wb_bg_thresh))
1695
1696
1697
1698
1699
1700 goto free_running;
1701 }
1702 dirty_exceeded |= (mdtc->wb_dirty > mdtc->wb_thresh) &&
1703 ((mdtc->dirty > mdtc->thresh) || strictlimit);
1704
1705 wb_position_ratio(mdtc);
1706 if (mdtc->pos_ratio < gdtc->pos_ratio)
1707 sdtc = mdtc;
1708 }
1709
1710 if (dirty_exceeded && !wb->dirty_exceeded)
1711 wb->dirty_exceeded = 1;
1712
1713 if (time_is_before_jiffies(wb->bw_time_stamp +
1714 BANDWIDTH_INTERVAL)) {
1715 spin_lock(&wb->list_lock);
1716 __wb_update_bandwidth(gdtc, mdtc, start_time, true);
1717 spin_unlock(&wb->list_lock);
1718 }
1719
1720
1721 dirty_ratelimit = wb->dirty_ratelimit;
1722 task_ratelimit = ((u64)dirty_ratelimit * sdtc->pos_ratio) >>
1723 RATELIMIT_CALC_SHIFT;
1724 max_pause = wb_max_pause(wb, sdtc->wb_dirty);
1725 min_pause = wb_min_pause(wb, max_pause,
1726 task_ratelimit, dirty_ratelimit,
1727 &nr_dirtied_pause);
1728
1729 if (unlikely(task_ratelimit == 0)) {
1730 period = max_pause;
1731 pause = max_pause;
1732 goto pause;
1733 }
1734 period = HZ * pages_dirtied / task_ratelimit;
1735 pause = period;
1736 if (current->dirty_paused_when)
1737 pause -= now - current->dirty_paused_when;
1738
1739
1740
1741
1742
1743
1744
1745 if (pause < min_pause) {
1746 trace_balance_dirty_pages(wb,
1747 sdtc->thresh,
1748 sdtc->bg_thresh,
1749 sdtc->dirty,
1750 sdtc->wb_thresh,
1751 sdtc->wb_dirty,
1752 dirty_ratelimit,
1753 task_ratelimit,
1754 pages_dirtied,
1755 period,
1756 min(pause, 0L),
1757 start_time);
1758 if (pause < -HZ) {
1759 current->dirty_paused_when = now;
1760 current->nr_dirtied = 0;
1761 } else if (period) {
1762 current->dirty_paused_when += period;
1763 current->nr_dirtied = 0;
1764 } else if (current->nr_dirtied_pause <= pages_dirtied)
1765 current->nr_dirtied_pause += pages_dirtied;
1766 break;
1767 }
1768 if (unlikely(pause > max_pause)) {
1769
1770 now += min(pause - max_pause, max_pause);
1771 pause = max_pause;
1772 }
1773
1774pause:
1775 trace_balance_dirty_pages(wb,
1776 sdtc->thresh,
1777 sdtc->bg_thresh,
1778 sdtc->dirty,
1779 sdtc->wb_thresh,
1780 sdtc->wb_dirty,
1781 dirty_ratelimit,
1782 task_ratelimit,
1783 pages_dirtied,
1784 period,
1785 pause,
1786 start_time);
1787 __set_current_state(TASK_KILLABLE);
1788 wb->dirty_sleep = now;
1789 io_schedule_timeout(pause);
1790
1791 current->dirty_paused_when = now + pause;
1792 current->nr_dirtied = 0;
1793 current->nr_dirtied_pause = nr_dirtied_pause;
1794
1795
1796
1797
1798
1799 if (task_ratelimit)
1800 break;
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812 if (sdtc->wb_dirty <= wb_stat_error())
1813 break;
1814
1815 if (fatal_signal_pending(current))
1816 break;
1817 }
1818
1819 if (!dirty_exceeded && wb->dirty_exceeded)
1820 wb->dirty_exceeded = 0;
1821
1822 if (writeback_in_progress(wb))
1823 return;
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833 if (laptop_mode)
1834 return;
1835
1836 if (nr_reclaimable > gdtc->bg_thresh)
1837 wb_start_background_writeback(wb);
1838}
1839
1840static DEFINE_PER_CPU(int, bdp_ratelimits);
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856DEFINE_PER_CPU(int, dirty_throttle_leaks) = 0;
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870void balance_dirty_pages_ratelimited(struct address_space *mapping)
1871{
1872 struct inode *inode = mapping->host;
1873 struct backing_dev_info *bdi = inode_to_bdi(inode);
1874 struct bdi_writeback *wb = NULL;
1875 int ratelimit;
1876 int *p;
1877
1878 if (!(bdi->capabilities & BDI_CAP_WRITEBACK))
1879 return;
1880
1881 if (inode_cgwb_enabled(inode))
1882 wb = wb_get_create_current(bdi, GFP_KERNEL);
1883 if (!wb)
1884 wb = &bdi->wb;
1885
1886 ratelimit = current->nr_dirtied_pause;
1887 if (wb->dirty_exceeded)
1888 ratelimit = min(ratelimit, 32 >> (PAGE_SHIFT - 10));
1889
1890 preempt_disable();
1891
1892
1893
1894
1895
1896
1897 p = this_cpu_ptr(&bdp_ratelimits);
1898 if (unlikely(current->nr_dirtied >= ratelimit))
1899 *p = 0;
1900 else if (unlikely(*p >= ratelimit_pages)) {
1901 *p = 0;
1902 ratelimit = 0;
1903 }
1904
1905
1906
1907
1908
1909 p = this_cpu_ptr(&dirty_throttle_leaks);
1910 if (*p > 0 && current->nr_dirtied < ratelimit) {
1911 unsigned long nr_pages_dirtied;
1912 nr_pages_dirtied = min(*p, ratelimit - current->nr_dirtied);
1913 *p -= nr_pages_dirtied;
1914 current->nr_dirtied += nr_pages_dirtied;
1915 }
1916 preempt_enable();
1917
1918 if (unlikely(current->nr_dirtied >= ratelimit))
1919 balance_dirty_pages(wb, current->nr_dirtied);
1920
1921 wb_put(wb);
1922}
1923EXPORT_SYMBOL(balance_dirty_pages_ratelimited);
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934bool wb_over_bg_thresh(struct bdi_writeback *wb)
1935{
1936 struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) };
1937 struct dirty_throttle_control mdtc_stor = { MDTC_INIT(wb, &gdtc_stor) };
1938 struct dirty_throttle_control * const gdtc = &gdtc_stor;
1939 struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
1940 &mdtc_stor : NULL;
1941 unsigned long reclaimable;
1942 unsigned long thresh;
1943
1944
1945
1946
1947
1948 gdtc->avail = global_dirtyable_memory();
1949 gdtc->dirty = global_node_page_state(NR_FILE_DIRTY);
1950 domain_dirty_limits(gdtc);
1951
1952 if (gdtc->dirty > gdtc->bg_thresh)
1953 return true;
1954
1955 thresh = wb_calc_thresh(gdtc->wb, gdtc->bg_thresh);
1956 if (thresh < 2 * wb_stat_error())
1957 reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE);
1958 else
1959 reclaimable = wb_stat(wb, WB_RECLAIMABLE);
1960
1961 if (reclaimable > thresh)
1962 return true;
1963
1964 if (mdtc) {
1965 unsigned long filepages, headroom, writeback;
1966
1967 mem_cgroup_wb_stats(wb, &filepages, &headroom, &mdtc->dirty,
1968 &writeback);
1969 mdtc_calc_avail(mdtc, filepages, headroom);
1970 domain_dirty_limits(mdtc);
1971
1972 if (mdtc->dirty > mdtc->bg_thresh)
1973 return true;
1974
1975 thresh = wb_calc_thresh(mdtc->wb, mdtc->bg_thresh);
1976 if (thresh < 2 * wb_stat_error())
1977 reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE);
1978 else
1979 reclaimable = wb_stat(wb, WB_RECLAIMABLE);
1980
1981 if (reclaimable > thresh)
1982 return true;
1983 }
1984
1985 return false;
1986}
1987
1988
1989
1990
1991int dirty_writeback_centisecs_handler(struct ctl_table *table, int write,
1992 void *buffer, size_t *length, loff_t *ppos)
1993{
1994 unsigned int old_interval = dirty_writeback_interval;
1995 int ret;
1996
1997 ret = proc_dointvec(table, write, buffer, length, ppos);
1998
1999
2000
2001
2002
2003
2004
2005
2006 if (!ret && write && dirty_writeback_interval &&
2007 dirty_writeback_interval != old_interval)
2008 wakeup_flusher_threads(WB_REASON_PERIODIC);
2009
2010 return ret;
2011}
2012
2013#ifdef CONFIG_BLOCK
2014void laptop_mode_timer_fn(struct timer_list *t)
2015{
2016 struct backing_dev_info *backing_dev_info =
2017 from_timer(backing_dev_info, t, laptop_mode_wb_timer);
2018
2019 wakeup_flusher_threads_bdi(backing_dev_info, WB_REASON_LAPTOP_TIMER);
2020}
2021
2022
2023
2024
2025
2026
2027void laptop_io_completion(struct backing_dev_info *info)
2028{
2029 mod_timer(&info->laptop_mode_wb_timer, jiffies + laptop_mode);
2030}
2031
2032
2033
2034
2035
2036
2037void laptop_sync_completion(void)
2038{
2039 struct backing_dev_info *bdi;
2040
2041 rcu_read_lock();
2042
2043 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list)
2044 del_timer(&bdi->laptop_mode_wb_timer);
2045
2046 rcu_read_unlock();
2047}
2048#endif
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059void writeback_set_ratelimit(void)
2060{
2061 struct wb_domain *dom = &global_wb_domain;
2062 unsigned long background_thresh;
2063 unsigned long dirty_thresh;
2064
2065 global_dirty_limits(&background_thresh, &dirty_thresh);
2066 dom->dirty_limit = dirty_thresh;
2067 ratelimit_pages = dirty_thresh / (num_online_cpus() * 32);
2068 if (ratelimit_pages < 16)
2069 ratelimit_pages = 16;
2070}
2071
2072static int page_writeback_cpu_online(unsigned int cpu)
2073{
2074 writeback_set_ratelimit();
2075 return 0;
2076}
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094void __init page_writeback_init(void)
2095{
2096 BUG_ON(wb_domain_init(&global_wb_domain, GFP_KERNEL));
2097
2098 cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mm/writeback:online",
2099 page_writeback_cpu_online, NULL);
2100 cpuhp_setup_state(CPUHP_MM_WRITEBACK_DEAD, "mm/writeback:dead", NULL,
2101 page_writeback_cpu_online);
2102}
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118void tag_pages_for_writeback(struct address_space *mapping,
2119 pgoff_t start, pgoff_t end)
2120{
2121 XA_STATE(xas, &mapping->i_pages, start);
2122 unsigned int tagged = 0;
2123 void *page;
2124
2125 xas_lock_irq(&xas);
2126 xas_for_each_marked(&xas, page, end, PAGECACHE_TAG_DIRTY) {
2127 xas_set_mark(&xas, PAGECACHE_TAG_TOWRITE);
2128 if (++tagged % XA_CHECK_SCHED)
2129 continue;
2130
2131 xas_pause(&xas);
2132 xas_unlock_irq(&xas);
2133 cond_resched();
2134 xas_lock_irq(&xas);
2135 }
2136 xas_unlock_irq(&xas);
2137}
2138EXPORT_SYMBOL(tag_pages_for_writeback);
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171int write_cache_pages(struct address_space *mapping,
2172 struct writeback_control *wbc, writepage_t writepage,
2173 void *data)
2174{
2175 int ret = 0;
2176 int done = 0;
2177 int error;
2178 struct pagevec pvec;
2179 int nr_pages;
2180 pgoff_t index;
2181 pgoff_t end;
2182 pgoff_t done_index;
2183 int range_whole = 0;
2184 xa_mark_t tag;
2185
2186 pagevec_init(&pvec);
2187 if (wbc->range_cyclic) {
2188 index = mapping->writeback_index;
2189 end = -1;
2190 } else {
2191 index = wbc->range_start >> PAGE_SHIFT;
2192 end = wbc->range_end >> PAGE_SHIFT;
2193 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2194 range_whole = 1;
2195 }
2196 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) {
2197 tag_pages_for_writeback(mapping, index, end);
2198 tag = PAGECACHE_TAG_TOWRITE;
2199 } else {
2200 tag = PAGECACHE_TAG_DIRTY;
2201 }
2202 done_index = index;
2203 while (!done && (index <= end)) {
2204 int i;
2205
2206 nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
2207 tag);
2208 if (nr_pages == 0)
2209 break;
2210
2211 for (i = 0; i < nr_pages; i++) {
2212 struct page *page = pvec.pages[i];
2213
2214 done_index = page->index;
2215
2216 lock_page(page);
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226 if (unlikely(page->mapping != mapping)) {
2227continue_unlock:
2228 unlock_page(page);
2229 continue;
2230 }
2231
2232 if (!PageDirty(page)) {
2233
2234 goto continue_unlock;
2235 }
2236
2237 if (PageWriteback(page)) {
2238 if (wbc->sync_mode != WB_SYNC_NONE)
2239 wait_on_page_writeback(page);
2240 else
2241 goto continue_unlock;
2242 }
2243
2244 BUG_ON(PageWriteback(page));
2245 if (!clear_page_dirty_for_io(page))
2246 goto continue_unlock;
2247
2248 trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
2249 error = (*writepage)(page, wbc, data);
2250 if (unlikely(error)) {
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263 if (error == AOP_WRITEPAGE_ACTIVATE) {
2264 unlock_page(page);
2265 error = 0;
2266 } else if (wbc->sync_mode != WB_SYNC_ALL) {
2267 ret = error;
2268 done_index = page->index + 1;
2269 done = 1;
2270 break;
2271 }
2272 if (!ret)
2273 ret = error;
2274 }
2275
2276
2277
2278
2279
2280
2281
2282 if (--wbc->nr_to_write <= 0 &&
2283 wbc->sync_mode == WB_SYNC_NONE) {
2284 done = 1;
2285 break;
2286 }
2287 }
2288 pagevec_release(&pvec);
2289 cond_resched();
2290 }
2291
2292
2293
2294
2295
2296
2297 if (wbc->range_cyclic && !done)
2298 done_index = 0;
2299 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2300 mapping->writeback_index = done_index;
2301
2302 return ret;
2303}
2304EXPORT_SYMBOL(write_cache_pages);
2305
2306
2307
2308
2309
2310static int __writepage(struct page *page, struct writeback_control *wbc,
2311 void *data)
2312{
2313 struct address_space *mapping = data;
2314 int ret = mapping->a_ops->writepage(page, wbc);
2315 mapping_set_error(mapping, ret);
2316 return ret;
2317}
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329int generic_writepages(struct address_space *mapping,
2330 struct writeback_control *wbc)
2331{
2332 struct blk_plug plug;
2333 int ret;
2334
2335
2336 if (!mapping->a_ops->writepage)
2337 return 0;
2338
2339 blk_start_plug(&plug);
2340 ret = write_cache_pages(mapping, wbc, __writepage, mapping);
2341 blk_finish_plug(&plug);
2342 return ret;
2343}
2344
2345EXPORT_SYMBOL(generic_writepages);
2346
2347int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
2348{
2349 int ret;
2350
2351 if (wbc->nr_to_write <= 0)
2352 return 0;
2353 while (1) {
2354 if (mapping->a_ops->writepages)
2355 ret = mapping->a_ops->writepages(mapping, wbc);
2356 else
2357 ret = generic_writepages(mapping, wbc);
2358 if ((ret != -ENOMEM) || (wbc->sync_mode != WB_SYNC_ALL))
2359 break;
2360 cond_resched();
2361 congestion_wait(BLK_RW_ASYNC, HZ/50);
2362 }
2363 return ret;
2364}
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377int write_one_page(struct page *page)
2378{
2379 struct address_space *mapping = page->mapping;
2380 int ret = 0;
2381 struct writeback_control wbc = {
2382 .sync_mode = WB_SYNC_ALL,
2383 .nr_to_write = 1,
2384 };
2385
2386 BUG_ON(!PageLocked(page));
2387
2388 wait_on_page_writeback(page);
2389
2390 if (clear_page_dirty_for_io(page)) {
2391 get_page(page);
2392 ret = mapping->a_ops->writepage(page, &wbc);
2393 if (ret == 0)
2394 wait_on_page_writeback(page);
2395 put_page(page);
2396 } else {
2397 unlock_page(page);
2398 }
2399
2400 if (!ret)
2401 ret = filemap_check_errors(mapping);
2402 return ret;
2403}
2404EXPORT_SYMBOL(write_one_page);
2405
2406
2407
2408
2409int __set_page_dirty_no_writeback(struct page *page)
2410{
2411 if (!PageDirty(page))
2412 return !TestSetPageDirty(page);
2413 return 0;
2414}
2415EXPORT_SYMBOL(__set_page_dirty_no_writeback);
2416
2417
2418
2419
2420
2421
2422
2423
2424static void account_page_dirtied(struct page *page,
2425 struct address_space *mapping)
2426{
2427 struct inode *inode = mapping->host;
2428
2429 trace_writeback_dirty_page(page, mapping);
2430
2431 if (mapping_can_writeback(mapping)) {
2432 struct bdi_writeback *wb;
2433
2434 inode_attach_wb(inode, page);
2435 wb = inode_to_wb(inode);
2436
2437 __inc_lruvec_page_state(page, NR_FILE_DIRTY);
2438 __inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2439 __inc_node_page_state(page, NR_DIRTIED);
2440 inc_wb_stat(wb, WB_RECLAIMABLE);
2441 inc_wb_stat(wb, WB_DIRTIED);
2442 task_io_account_write(PAGE_SIZE);
2443 current->nr_dirtied++;
2444 __this_cpu_inc(bdp_ratelimits);
2445
2446 mem_cgroup_track_foreign_dirty(page, wb);
2447 }
2448}
2449
2450
2451
2452
2453
2454
2455void account_page_cleaned(struct page *page, struct address_space *mapping,
2456 struct bdi_writeback *wb)
2457{
2458 if (mapping_can_writeback(mapping)) {
2459 dec_lruvec_page_state(page, NR_FILE_DIRTY);
2460 dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2461 dec_wb_stat(wb, WB_RECLAIMABLE);
2462 task_io_account_cancelled_write(PAGE_SIZE);
2463 }
2464}
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475void __set_page_dirty(struct page *page, struct address_space *mapping,
2476 int warn)
2477{
2478 unsigned long flags;
2479
2480 xa_lock_irqsave(&mapping->i_pages, flags);
2481 if (page->mapping) {
2482 WARN_ON_ONCE(warn && !PageUptodate(page));
2483 account_page_dirtied(page, mapping);
2484 __xa_set_mark(&mapping->i_pages, page_index(page),
2485 PAGECACHE_TAG_DIRTY);
2486 }
2487 xa_unlock_irqrestore(&mapping->i_pages, flags);
2488}
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502int __set_page_dirty_nobuffers(struct page *page)
2503{
2504 lock_page_memcg(page);
2505 if (!TestSetPageDirty(page)) {
2506 struct address_space *mapping = page_mapping(page);
2507
2508 if (!mapping) {
2509 unlock_page_memcg(page);
2510 return 1;
2511 }
2512 __set_page_dirty(page, mapping, !PagePrivate(page));
2513 unlock_page_memcg(page);
2514
2515 if (mapping->host) {
2516
2517 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
2518 }
2519 return 1;
2520 }
2521 unlock_page_memcg(page);
2522 return 0;
2523}
2524EXPORT_SYMBOL(__set_page_dirty_nobuffers);
2525
2526
2527
2528
2529
2530
2531
2532
2533void account_page_redirty(struct page *page)
2534{
2535 struct address_space *mapping = page->mapping;
2536
2537 if (mapping && mapping_can_writeback(mapping)) {
2538 struct inode *inode = mapping->host;
2539 struct bdi_writeback *wb;
2540 struct wb_lock_cookie cookie = {};
2541
2542 wb = unlocked_inode_to_wb_begin(inode, &cookie);
2543 current->nr_dirtied--;
2544 dec_node_page_state(page, NR_DIRTIED);
2545 dec_wb_stat(wb, WB_DIRTIED);
2546 unlocked_inode_to_wb_end(inode, &cookie);
2547 }
2548}
2549EXPORT_SYMBOL(account_page_redirty);
2550
2551
2552
2553
2554
2555
2556int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page)
2557{
2558 int ret;
2559
2560 wbc->pages_skipped++;
2561 ret = __set_page_dirty_nobuffers(page);
2562 account_page_redirty(page);
2563 return ret;
2564}
2565EXPORT_SYMBOL(redirty_page_for_writepage);
2566
2567
2568
2569
2570
2571
2572
2573
2574int set_page_dirty(struct page *page)
2575{
2576 struct address_space *mapping = page_mapping(page);
2577
2578 page = compound_head(page);
2579 if (likely(mapping)) {
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590 if (PageReclaim(page))
2591 ClearPageReclaim(page);
2592 return mapping->a_ops->set_page_dirty(page);
2593 }
2594 if (!PageDirty(page)) {
2595 if (!TestSetPageDirty(page))
2596 return 1;
2597 }
2598 return 0;
2599}
2600EXPORT_SYMBOL(set_page_dirty);
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612int set_page_dirty_lock(struct page *page)
2613{
2614 int ret;
2615
2616 lock_page(page);
2617 ret = set_page_dirty(page);
2618 unlock_page(page);
2619 return ret;
2620}
2621EXPORT_SYMBOL(set_page_dirty_lock);
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636void __cancel_dirty_page(struct page *page)
2637{
2638 struct address_space *mapping = page_mapping(page);
2639
2640 if (mapping_can_writeback(mapping)) {
2641 struct inode *inode = mapping->host;
2642 struct bdi_writeback *wb;
2643 struct wb_lock_cookie cookie = {};
2644
2645 lock_page_memcg(page);
2646 wb = unlocked_inode_to_wb_begin(inode, &cookie);
2647
2648 if (TestClearPageDirty(page))
2649 account_page_cleaned(page, mapping, wb);
2650
2651 unlocked_inode_to_wb_end(inode, &cookie);
2652 unlock_page_memcg(page);
2653 } else {
2654 ClearPageDirty(page);
2655 }
2656}
2657EXPORT_SYMBOL(__cancel_dirty_page);
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673int clear_page_dirty_for_io(struct page *page)
2674{
2675 struct address_space *mapping = page_mapping(page);
2676 int ret = 0;
2677
2678 VM_BUG_ON_PAGE(!PageLocked(page), page);
2679
2680 if (mapping && mapping_can_writeback(mapping)) {
2681 struct inode *inode = mapping->host;
2682 struct bdi_writeback *wb;
2683 struct wb_lock_cookie cookie = {};
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710 if (page_mkclean(page))
2711 set_page_dirty(page);
2712
2713
2714
2715
2716
2717
2718
2719
2720 wb = unlocked_inode_to_wb_begin(inode, &cookie);
2721 if (TestClearPageDirty(page)) {
2722 dec_lruvec_page_state(page, NR_FILE_DIRTY);
2723 dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2724 dec_wb_stat(wb, WB_RECLAIMABLE);
2725 ret = 1;
2726 }
2727 unlocked_inode_to_wb_end(inode, &cookie);
2728 return ret;
2729 }
2730 return TestClearPageDirty(page);
2731}
2732EXPORT_SYMBOL(clear_page_dirty_for_io);
2733
2734int test_clear_page_writeback(struct page *page)
2735{
2736 struct address_space *mapping = page_mapping(page);
2737 int ret;
2738
2739 lock_page_memcg(page);
2740 if (mapping && mapping_use_writeback_tags(mapping)) {
2741 struct inode *inode = mapping->host;
2742 struct backing_dev_info *bdi = inode_to_bdi(inode);
2743 unsigned long flags;
2744
2745 xa_lock_irqsave(&mapping->i_pages, flags);
2746 ret = TestClearPageWriteback(page);
2747 if (ret) {
2748 __xa_clear_mark(&mapping->i_pages, page_index(page),
2749 PAGECACHE_TAG_WRITEBACK);
2750 if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) {
2751 struct bdi_writeback *wb = inode_to_wb(inode);
2752
2753 dec_wb_stat(wb, WB_WRITEBACK);
2754 __wb_writeout_inc(wb);
2755 }
2756 }
2757
2758 if (mapping->host && !mapping_tagged(mapping,
2759 PAGECACHE_TAG_WRITEBACK))
2760 sb_clear_inode_writeback(mapping->host);
2761
2762 xa_unlock_irqrestore(&mapping->i_pages, flags);
2763 } else {
2764 ret = TestClearPageWriteback(page);
2765 }
2766 if (ret) {
2767 dec_lruvec_page_state(page, NR_WRITEBACK);
2768 dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2769 inc_node_page_state(page, NR_WRITTEN);
2770 }
2771 unlock_page_memcg(page);
2772 return ret;
2773}
2774
2775int __test_set_page_writeback(struct page *page, bool keep_write)
2776{
2777 struct address_space *mapping = page_mapping(page);
2778 int ret, access_ret;
2779
2780 lock_page_memcg(page);
2781 if (mapping && mapping_use_writeback_tags(mapping)) {
2782 XA_STATE(xas, &mapping->i_pages, page_index(page));
2783 struct inode *inode = mapping->host;
2784 struct backing_dev_info *bdi = inode_to_bdi(inode);
2785 unsigned long flags;
2786
2787 xas_lock_irqsave(&xas, flags);
2788 xas_load(&xas);
2789 ret = TestSetPageWriteback(page);
2790 if (!ret) {
2791 bool on_wblist;
2792
2793 on_wblist = mapping_tagged(mapping,
2794 PAGECACHE_TAG_WRITEBACK);
2795
2796 xas_set_mark(&xas, PAGECACHE_TAG_WRITEBACK);
2797 if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT)
2798 inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
2799
2800
2801
2802
2803
2804
2805 if (mapping->host && !on_wblist)
2806 sb_mark_inode_writeback(mapping->host);
2807 }
2808 if (!PageDirty(page))
2809 xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY);
2810 if (!keep_write)
2811 xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE);
2812 xas_unlock_irqrestore(&xas, flags);
2813 } else {
2814 ret = TestSetPageWriteback(page);
2815 }
2816 if (!ret) {
2817 inc_lruvec_page_state(page, NR_WRITEBACK);
2818 inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2819 }
2820 unlock_page_memcg(page);
2821 access_ret = arch_make_page_accessible(page);
2822
2823
2824
2825
2826 VM_BUG_ON_PAGE(access_ret != 0, page);
2827
2828 return ret;
2829
2830}
2831EXPORT_SYMBOL(__test_set_page_writeback);
2832
2833
2834
2835
2836void wait_on_page_writeback(struct page *page)
2837{
2838 while (PageWriteback(page)) {
2839 trace_wait_on_page_writeback(page, page_mapping(page));
2840 wait_on_page_bit(page, PG_writeback);
2841 }
2842}
2843EXPORT_SYMBOL_GPL(wait_on_page_writeback);
2844
2845
2846
2847
2848
2849int wait_on_page_writeback_killable(struct page *page)
2850{
2851 while (PageWriteback(page)) {
2852 trace_wait_on_page_writeback(page, page_mapping(page));
2853 if (wait_on_page_bit_killable(page, PG_writeback))
2854 return -EINTR;
2855 }
2856
2857 return 0;
2858}
2859EXPORT_SYMBOL_GPL(wait_on_page_writeback_killable);
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869void wait_for_stable_page(struct page *page)
2870{
2871 page = thp_head(page);
2872 if (page->mapping->host->i_sb->s_iflags & SB_I_STABLE_WRITES)
2873 wait_on_page_writeback(page);
2874}
2875EXPORT_SYMBOL_GPL(wait_for_stable_page);
2876