1
2
3
4
5
6
7
8
9
10
11
12
13
14
15#include <linux/kernel.h>
16#include <linux/export.h>
17#include <linux/spinlock.h>
18#include <linux/fs.h>
19#include <linux/mm.h>
20#include <linux/swap.h>
21#include <linux/slab.h>
22#include <linux/pagemap.h>
23#include <linux/writeback.h>
24#include <linux/init.h>
25#include <linux/backing-dev.h>
26#include <linux/task_io_accounting_ops.h>
27#include <linux/blkdev.h>
28#include <linux/mpage.h>
29#include <linux/rmap.h>
30#include <linux/percpu.h>
31#include <linux/smp.h>
32#include <linux/sysctl.h>
33#include <linux/cpu.h>
34#include <linux/syscalls.h>
35#include <linux/buffer_head.h>
36#include <linux/pagevec.h>
37#include <linux/timer.h>
38#include <linux/sched/rt.h>
39#include <linux/sched/signal.h>
40#include <linux/mm_inline.h>
41#include <trace/events/writeback.h>
42
43#include "internal.h"
44
45
46
47
48#define MAX_PAUSE max(HZ/5, 1)
49
50
51
52
53
54#define DIRTY_POLL_THRESH (128 >> (PAGE_SHIFT - 10))
55
56
57
58
59#define BANDWIDTH_INTERVAL max(HZ/5, 1)
60
61#define RATELIMIT_CALC_SHIFT 10
62
63
64
65
66
67static long ratelimit_pages = 32;
68
69
70
71
72
73
74int dirty_background_ratio = 10;
75
76
77
78
79
80unsigned long dirty_background_bytes;
81
82
83
84
85
86int vm_highmem_is_dirtyable;
87
88
89
90
91int vm_dirty_ratio = 20;
92
93
94
95
96
97unsigned long vm_dirty_bytes;
98
99
100
101
102unsigned int dirty_writeback_interval = 5 * 100;
103
104EXPORT_SYMBOL_GPL(dirty_writeback_interval);
105
106
107
108
109unsigned int dirty_expire_interval = 30 * 100;
110
111
112
113
114int block_dump;
115
116
117
118
119
120int laptop_mode;
121
122EXPORT_SYMBOL(laptop_mode);
123
124
125
126struct wb_domain global_wb_domain;
127
128
129struct dirty_throttle_control {
130#ifdef CONFIG_CGROUP_WRITEBACK
131 struct wb_domain *dom;
132 struct dirty_throttle_control *gdtc;
133#endif
134 struct bdi_writeback *wb;
135 struct fprop_local_percpu *wb_completions;
136
137 unsigned long avail;
138 unsigned long dirty;
139 unsigned long thresh;
140 unsigned long bg_thresh;
141
142 unsigned long wb_dirty;
143 unsigned long wb_thresh;
144 unsigned long wb_bg_thresh;
145
146 unsigned long pos_ratio;
147};
148
149
150
151
152
153
154#define VM_COMPLETIONS_PERIOD_LEN (3*HZ)
155
156#ifdef CONFIG_CGROUP_WRITEBACK
157
158#define GDTC_INIT(__wb) .wb = (__wb), \
159 .dom = &global_wb_domain, \
160 .wb_completions = &(__wb)->completions
161
162#define GDTC_INIT_NO_WB .dom = &global_wb_domain
163
164#define MDTC_INIT(__wb, __gdtc) .wb = (__wb), \
165 .dom = mem_cgroup_wb_domain(__wb), \
166 .wb_completions = &(__wb)->memcg_completions, \
167 .gdtc = __gdtc
168
169static bool mdtc_valid(struct dirty_throttle_control *dtc)
170{
171 return dtc->dom;
172}
173
174static struct wb_domain *dtc_dom(struct dirty_throttle_control *dtc)
175{
176 return dtc->dom;
177}
178
179static struct dirty_throttle_control *mdtc_gdtc(struct dirty_throttle_control *mdtc)
180{
181 return mdtc->gdtc;
182}
183
184static struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb)
185{
186 return &wb->memcg_completions;
187}
188
189static void wb_min_max_ratio(struct bdi_writeback *wb,
190 unsigned long *minp, unsigned long *maxp)
191{
192 unsigned long this_bw = wb->avg_write_bandwidth;
193 unsigned long tot_bw = atomic_long_read(&wb->bdi->tot_write_bandwidth);
194 unsigned long long min = wb->bdi->min_ratio;
195 unsigned long long max = wb->bdi->max_ratio;
196
197
198
199
200
201 if (this_bw < tot_bw) {
202 if (min) {
203 min *= this_bw;
204 min = div64_ul(min, tot_bw);
205 }
206 if (max < 100) {
207 max *= this_bw;
208 max = div64_ul(max, tot_bw);
209 }
210 }
211
212 *minp = min;
213 *maxp = max;
214}
215
216#else
217
218#define GDTC_INIT(__wb) .wb = (__wb), \
219 .wb_completions = &(__wb)->completions
220#define GDTC_INIT_NO_WB
221#define MDTC_INIT(__wb, __gdtc)
222
223static bool mdtc_valid(struct dirty_throttle_control *dtc)
224{
225 return false;
226}
227
228static struct wb_domain *dtc_dom(struct dirty_throttle_control *dtc)
229{
230 return &global_wb_domain;
231}
232
233static struct dirty_throttle_control *mdtc_gdtc(struct dirty_throttle_control *mdtc)
234{
235 return NULL;
236}
237
238static struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb)
239{
240 return NULL;
241}
242
243static void wb_min_max_ratio(struct bdi_writeback *wb,
244 unsigned long *minp, unsigned long *maxp)
245{
246 *minp = wb->bdi->min_ratio;
247 *maxp = wb->bdi->max_ratio;
248}
249
250#endif
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277static unsigned long node_dirtyable_memory(struct pglist_data *pgdat)
278{
279 unsigned long nr_pages = 0;
280 int z;
281
282 for (z = 0; z < MAX_NR_ZONES; z++) {
283 struct zone *zone = pgdat->node_zones + z;
284
285 if (!populated_zone(zone))
286 continue;
287
288 nr_pages += zone_page_state(zone, NR_FREE_PAGES);
289 }
290
291
292
293
294
295
296 nr_pages -= min(nr_pages, pgdat->totalreserve_pages);
297
298 nr_pages += node_page_state(pgdat, NR_INACTIVE_FILE);
299 nr_pages += node_page_state(pgdat, NR_ACTIVE_FILE);
300
301 return nr_pages;
302}
303
304static unsigned long highmem_dirtyable_memory(unsigned long total)
305{
306#ifdef CONFIG_HIGHMEM
307 int node;
308 unsigned long x = 0;
309 int i;
310
311 for_each_node_state(node, N_HIGH_MEMORY) {
312 for (i = ZONE_NORMAL + 1; i < MAX_NR_ZONES; i++) {
313 struct zone *z;
314 unsigned long nr_pages;
315
316 if (!is_highmem_idx(i))
317 continue;
318
319 z = &NODE_DATA(node)->node_zones[i];
320 if (!populated_zone(z))
321 continue;
322
323 nr_pages = zone_page_state(z, NR_FREE_PAGES);
324
325 nr_pages -= min(nr_pages, high_wmark_pages(z));
326 nr_pages += zone_page_state(z, NR_ZONE_INACTIVE_FILE);
327 nr_pages += zone_page_state(z, NR_ZONE_ACTIVE_FILE);
328 x += nr_pages;
329 }
330 }
331
332
333
334
335
336
337
338
339
340
341 if ((long)x < 0)
342 x = 0;
343
344
345
346
347
348
349
350 return min(x, total);
351#else
352 return 0;
353#endif
354}
355
356
357
358
359
360
361
362static unsigned long global_dirtyable_memory(void)
363{
364 unsigned long x;
365
366 x = global_zone_page_state(NR_FREE_PAGES);
367
368
369
370
371
372 x -= min(x, totalreserve_pages);
373
374 x += global_node_page_state(NR_INACTIVE_FILE);
375 x += global_node_page_state(NR_ACTIVE_FILE);
376
377 if (!vm_highmem_is_dirtyable)
378 x -= highmem_dirtyable_memory(x);
379
380 return x + 1;
381}
382
383
384
385
386
387
388
389
390
391
392
393static void domain_dirty_limits(struct dirty_throttle_control *dtc)
394{
395 const unsigned long available_memory = dtc->avail;
396 struct dirty_throttle_control *gdtc = mdtc_gdtc(dtc);
397 unsigned long bytes = vm_dirty_bytes;
398 unsigned long bg_bytes = dirty_background_bytes;
399
400 unsigned long ratio = (vm_dirty_ratio * PAGE_SIZE) / 100;
401 unsigned long bg_ratio = (dirty_background_ratio * PAGE_SIZE) / 100;
402 unsigned long thresh;
403 unsigned long bg_thresh;
404 struct task_struct *tsk;
405
406
407 if (gdtc) {
408 unsigned long global_avail = gdtc->avail;
409
410
411
412
413
414
415
416
417 if (bytes)
418 ratio = min(DIV_ROUND_UP(bytes, global_avail),
419 PAGE_SIZE);
420 if (bg_bytes)
421 bg_ratio = min(DIV_ROUND_UP(bg_bytes, global_avail),
422 PAGE_SIZE);
423 bytes = bg_bytes = 0;
424 }
425
426 if (bytes)
427 thresh = DIV_ROUND_UP(bytes, PAGE_SIZE);
428 else
429 thresh = (ratio * available_memory) / PAGE_SIZE;
430
431 if (bg_bytes)
432 bg_thresh = DIV_ROUND_UP(bg_bytes, PAGE_SIZE);
433 else
434 bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE;
435
436 if (bg_thresh >= thresh)
437 bg_thresh = thresh / 2;
438 tsk = current;
439 if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) {
440 bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32;
441 thresh += thresh / 4 + global_wb_domain.dirty_limit / 32;
442 }
443 dtc->thresh = thresh;
444 dtc->bg_thresh = bg_thresh;
445
446
447 if (!gdtc)
448 trace_global_dirty_state(bg_thresh, thresh);
449}
450
451
452
453
454
455
456
457
458
459void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
460{
461 struct dirty_throttle_control gdtc = { GDTC_INIT_NO_WB };
462
463 gdtc.avail = global_dirtyable_memory();
464 domain_dirty_limits(&gdtc);
465
466 *pbackground = gdtc.bg_thresh;
467 *pdirty = gdtc.thresh;
468}
469
470
471
472
473
474
475
476
477static unsigned long node_dirty_limit(struct pglist_data *pgdat)
478{
479 unsigned long node_memory = node_dirtyable_memory(pgdat);
480 struct task_struct *tsk = current;
481 unsigned long dirty;
482
483 if (vm_dirty_bytes)
484 dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE) *
485 node_memory / global_dirtyable_memory();
486 else
487 dirty = vm_dirty_ratio * node_memory / 100;
488
489 if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk))
490 dirty += dirty / 4;
491
492 return dirty;
493}
494
495
496
497
498
499
500
501
502bool node_dirty_ok(struct pglist_data *pgdat)
503{
504 unsigned long limit = node_dirty_limit(pgdat);
505 unsigned long nr_pages = 0;
506
507 nr_pages += node_page_state(pgdat, NR_FILE_DIRTY);
508 nr_pages += node_page_state(pgdat, NR_UNSTABLE_NFS);
509 nr_pages += node_page_state(pgdat, NR_WRITEBACK);
510
511 return nr_pages <= limit;
512}
513
514int dirty_background_ratio_handler(struct ctl_table *table, int write,
515 void __user *buffer, size_t *lenp,
516 loff_t *ppos)
517{
518 int ret;
519
520 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
521 if (ret == 0 && write)
522 dirty_background_bytes = 0;
523 return ret;
524}
525
526int dirty_background_bytes_handler(struct ctl_table *table, int write,
527 void __user *buffer, size_t *lenp,
528 loff_t *ppos)
529{
530 int ret;
531
532 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
533 if (ret == 0 && write)
534 dirty_background_ratio = 0;
535 return ret;
536}
537
538int dirty_ratio_handler(struct ctl_table *table, int write,
539 void __user *buffer, size_t *lenp,
540 loff_t *ppos)
541{
542 int old_ratio = vm_dirty_ratio;
543 int ret;
544
545 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
546 if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
547 writeback_set_ratelimit();
548 vm_dirty_bytes = 0;
549 }
550 return ret;
551}
552
553int dirty_bytes_handler(struct ctl_table *table, int write,
554 void __user *buffer, size_t *lenp,
555 loff_t *ppos)
556{
557 unsigned long old_bytes = vm_dirty_bytes;
558 int ret;
559
560 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
561 if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
562 writeback_set_ratelimit();
563 vm_dirty_ratio = 0;
564 }
565 return ret;
566}
567
568static unsigned long wp_next_time(unsigned long cur_time)
569{
570 cur_time += VM_COMPLETIONS_PERIOD_LEN;
571
572 if (!cur_time)
573 return 1;
574 return cur_time;
575}
576
577static void wb_domain_writeout_inc(struct wb_domain *dom,
578 struct fprop_local_percpu *completions,
579 unsigned int max_prop_frac)
580{
581 __fprop_inc_percpu_max(&dom->completions, completions,
582 max_prop_frac);
583
584 if (unlikely(!dom->period_time)) {
585
586
587
588
589
590
591 dom->period_time = wp_next_time(jiffies);
592 mod_timer(&dom->period_timer, dom->period_time);
593 }
594}
595
596
597
598
599
600static inline void __wb_writeout_inc(struct bdi_writeback *wb)
601{
602 struct wb_domain *cgdom;
603
604 inc_wb_stat(wb, WB_WRITTEN);
605 wb_domain_writeout_inc(&global_wb_domain, &wb->completions,
606 wb->bdi->max_prop_frac);
607
608 cgdom = mem_cgroup_wb_domain(wb);
609 if (cgdom)
610 wb_domain_writeout_inc(cgdom, wb_memcg_completions(wb),
611 wb->bdi->max_prop_frac);
612}
613
614void wb_writeout_inc(struct bdi_writeback *wb)
615{
616 unsigned long flags;
617
618 local_irq_save(flags);
619 __wb_writeout_inc(wb);
620 local_irq_restore(flags);
621}
622EXPORT_SYMBOL_GPL(wb_writeout_inc);
623
624
625
626
627
628static void writeout_period(struct timer_list *t)
629{
630 struct wb_domain *dom = from_timer(dom, t, period_timer);
631 int miss_periods = (jiffies - dom->period_time) /
632 VM_COMPLETIONS_PERIOD_LEN;
633
634 if (fprop_new_period(&dom->completions, miss_periods + 1)) {
635 dom->period_time = wp_next_time(dom->period_time +
636 miss_periods * VM_COMPLETIONS_PERIOD_LEN);
637 mod_timer(&dom->period_timer, dom->period_time);
638 } else {
639
640
641
642
643 dom->period_time = 0;
644 }
645}
646
647int wb_domain_init(struct wb_domain *dom, gfp_t gfp)
648{
649 memset(dom, 0, sizeof(*dom));
650
651 spin_lock_init(&dom->lock);
652
653 timer_setup(&dom->period_timer, writeout_period, TIMER_DEFERRABLE);
654
655 dom->dirty_limit_tstamp = jiffies;
656
657 return fprop_global_init(&dom->completions, gfp);
658}
659
660#ifdef CONFIG_CGROUP_WRITEBACK
661void wb_domain_exit(struct wb_domain *dom)
662{
663 del_timer_sync(&dom->period_timer);
664 fprop_global_destroy(&dom->completions);
665}
666#endif
667
668
669
670
671
672
673static unsigned int bdi_min_ratio;
674
675int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
676{
677 int ret = 0;
678
679 spin_lock_bh(&bdi_lock);
680 if (min_ratio > bdi->max_ratio) {
681 ret = -EINVAL;
682 } else {
683 min_ratio -= bdi->min_ratio;
684 if (bdi_min_ratio + min_ratio < 100) {
685 bdi_min_ratio += min_ratio;
686 bdi->min_ratio += min_ratio;
687 } else {
688 ret = -EINVAL;
689 }
690 }
691 spin_unlock_bh(&bdi_lock);
692
693 return ret;
694}
695
696int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
697{
698 int ret = 0;
699
700 if (max_ratio > 100)
701 return -EINVAL;
702
703 spin_lock_bh(&bdi_lock);
704 if (bdi->min_ratio > max_ratio) {
705 ret = -EINVAL;
706 } else {
707 bdi->max_ratio = max_ratio;
708 bdi->max_prop_frac = (FPROP_FRAC_BASE * max_ratio) / 100;
709 }
710 spin_unlock_bh(&bdi_lock);
711
712 return ret;
713}
714EXPORT_SYMBOL(bdi_set_max_ratio);
715
716static unsigned long dirty_freerun_ceiling(unsigned long thresh,
717 unsigned long bg_thresh)
718{
719 return (thresh + bg_thresh) / 2;
720}
721
722static unsigned long hard_dirty_limit(struct wb_domain *dom,
723 unsigned long thresh)
724{
725 return max(thresh, dom->dirty_limit);
726}
727
728
729
730
731
732static void mdtc_calc_avail(struct dirty_throttle_control *mdtc,
733 unsigned long filepages, unsigned long headroom)
734{
735 struct dirty_throttle_control *gdtc = mdtc_gdtc(mdtc);
736 unsigned long clean = filepages - min(filepages, mdtc->dirty);
737 unsigned long global_clean = gdtc->avail - min(gdtc->avail, gdtc->dirty);
738 unsigned long other_clean = global_clean - min(global_clean, clean);
739
740 mdtc->avail = filepages + min(headroom, other_clean);
741}
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc)
765{
766 struct wb_domain *dom = dtc_dom(dtc);
767 unsigned long thresh = dtc->thresh;
768 u64 wb_thresh;
769 unsigned long numerator, denominator;
770 unsigned long wb_min_ratio, wb_max_ratio;
771
772
773
774
775 fprop_fraction_percpu(&dom->completions, dtc->wb_completions,
776 &numerator, &denominator);
777
778 wb_thresh = (thresh * (100 - bdi_min_ratio)) / 100;
779 wb_thresh *= numerator;
780 wb_thresh = div64_ul(wb_thresh, denominator);
781
782 wb_min_max_ratio(dtc->wb, &wb_min_ratio, &wb_max_ratio);
783
784 wb_thresh += (thresh * wb_min_ratio) / 100;
785 if (wb_thresh > (thresh * wb_max_ratio) / 100)
786 wb_thresh = thresh * wb_max_ratio / 100;
787
788 return wb_thresh;
789}
790
791unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh)
792{
793 struct dirty_throttle_control gdtc = { GDTC_INIT(wb),
794 .thresh = thresh };
795 return __wb_calc_thresh(&gdtc);
796}
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812static long long pos_ratio_polynom(unsigned long setpoint,
813 unsigned long dirty,
814 unsigned long limit)
815{
816 long long pos_ratio;
817 long x;
818
819 x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
820 (limit - setpoint) | 1);
821 pos_ratio = x;
822 pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
823 pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
824 pos_ratio += 1 << RATELIMIT_CALC_SHIFT;
825
826 return clamp(pos_ratio, 0LL, 2LL << RATELIMIT_CALC_SHIFT);
827}
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904static void wb_position_ratio(struct dirty_throttle_control *dtc)
905{
906 struct bdi_writeback *wb = dtc->wb;
907 unsigned long write_bw = wb->avg_write_bandwidth;
908 unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh);
909 unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
910 unsigned long wb_thresh = dtc->wb_thresh;
911 unsigned long x_intercept;
912 unsigned long setpoint;
913 unsigned long wb_setpoint;
914 unsigned long span;
915 long long pos_ratio;
916 long x;
917
918 dtc->pos_ratio = 0;
919
920 if (unlikely(dtc->dirty >= limit))
921 return;
922
923
924
925
926
927
928 setpoint = (freerun + limit) / 2;
929 pos_ratio = pos_ratio_polynom(setpoint, dtc->dirty, limit);
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956 if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
957 long long wb_pos_ratio;
958
959 if (dtc->wb_dirty < 8) {
960 dtc->pos_ratio = min_t(long long, pos_ratio * 2,
961 2 << RATELIMIT_CALC_SHIFT);
962 return;
963 }
964
965 if (dtc->wb_dirty >= wb_thresh)
966 return;
967
968 wb_setpoint = dirty_freerun_ceiling(wb_thresh,
969 dtc->wb_bg_thresh);
970
971 if (wb_setpoint == 0 || wb_setpoint == wb_thresh)
972 return;
973
974 wb_pos_ratio = pos_ratio_polynom(wb_setpoint, dtc->wb_dirty,
975 wb_thresh);
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998 dtc->pos_ratio = min(pos_ratio, wb_pos_ratio);
999 return;
1000 }
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033 if (unlikely(wb_thresh > dtc->thresh))
1034 wb_thresh = dtc->thresh;
1035
1036
1037
1038
1039
1040
1041
1042 wb_thresh = max(wb_thresh, (limit - dtc->dirty) / 8);
1043
1044
1045
1046
1047 x = div_u64((u64)wb_thresh << 16, dtc->thresh | 1);
1048 wb_setpoint = setpoint * (u64)x >> 16;
1049
1050
1051
1052
1053
1054
1055
1056
1057 span = (dtc->thresh - wb_thresh + 8 * write_bw) * (u64)x >> 16;
1058 x_intercept = wb_setpoint + span;
1059
1060 if (dtc->wb_dirty < x_intercept - span / 4) {
1061 pos_ratio = div64_u64(pos_ratio * (x_intercept - dtc->wb_dirty),
1062 (x_intercept - wb_setpoint) | 1);
1063 } else
1064 pos_ratio /= 4;
1065
1066
1067
1068
1069
1070
1071 x_intercept = wb_thresh / 2;
1072 if (dtc->wb_dirty < x_intercept) {
1073 if (dtc->wb_dirty > x_intercept / 8)
1074 pos_ratio = div_u64(pos_ratio * x_intercept,
1075 dtc->wb_dirty);
1076 else
1077 pos_ratio *= 8;
1078 }
1079
1080 dtc->pos_ratio = pos_ratio;
1081}
1082
1083static void wb_update_write_bandwidth(struct bdi_writeback *wb,
1084 unsigned long elapsed,
1085 unsigned long written)
1086{
1087 const unsigned long period = roundup_pow_of_two(3 * HZ);
1088 unsigned long avg = wb->avg_write_bandwidth;
1089 unsigned long old = wb->write_bandwidth;
1090 u64 bw;
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102 bw = written - min(written, wb->written_stamp);
1103 bw *= HZ;
1104 if (unlikely(elapsed > period)) {
1105 bw = div64_ul(bw, elapsed);
1106 avg = bw;
1107 goto out;
1108 }
1109 bw += (u64)wb->write_bandwidth * (period - elapsed);
1110 bw >>= ilog2(period);
1111
1112
1113
1114
1115 if (avg > old && old >= (unsigned long)bw)
1116 avg -= (avg - old) >> 3;
1117
1118 if (avg < old && old <= (unsigned long)bw)
1119 avg += (old - avg) >> 3;
1120
1121out:
1122
1123 avg = max(avg, 1LU);
1124 if (wb_has_dirty_io(wb)) {
1125 long delta = avg - wb->avg_write_bandwidth;
1126 WARN_ON_ONCE(atomic_long_add_return(delta,
1127 &wb->bdi->tot_write_bandwidth) <= 0);
1128 }
1129 wb->write_bandwidth = bw;
1130 wb->avg_write_bandwidth = avg;
1131}
1132
1133static void update_dirty_limit(struct dirty_throttle_control *dtc)
1134{
1135 struct wb_domain *dom = dtc_dom(dtc);
1136 unsigned long thresh = dtc->thresh;
1137 unsigned long limit = dom->dirty_limit;
1138
1139
1140
1141
1142 if (limit < thresh) {
1143 limit = thresh;
1144 goto update;
1145 }
1146
1147
1148
1149
1150
1151
1152 thresh = max(thresh, dtc->dirty);
1153 if (limit > thresh) {
1154 limit -= (limit - thresh) >> 5;
1155 goto update;
1156 }
1157 return;
1158update:
1159 dom->dirty_limit = limit;
1160}
1161
1162static void domain_update_bandwidth(struct dirty_throttle_control *dtc,
1163 unsigned long now)
1164{
1165 struct wb_domain *dom = dtc_dom(dtc);
1166
1167
1168
1169
1170 if (time_before(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL))
1171 return;
1172
1173 spin_lock(&dom->lock);
1174 if (time_after_eq(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL)) {
1175 update_dirty_limit(dtc);
1176 dom->dirty_limit_tstamp = now;
1177 }
1178 spin_unlock(&dom->lock);
1179}
1180
1181
1182
1183
1184
1185
1186
1187static void wb_update_dirty_ratelimit(struct dirty_throttle_control *dtc,
1188 unsigned long dirtied,
1189 unsigned long elapsed)
1190{
1191 struct bdi_writeback *wb = dtc->wb;
1192 unsigned long dirty = dtc->dirty;
1193 unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh);
1194 unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
1195 unsigned long setpoint = (freerun + limit) / 2;
1196 unsigned long write_bw = wb->avg_write_bandwidth;
1197 unsigned long dirty_ratelimit = wb->dirty_ratelimit;
1198 unsigned long dirty_rate;
1199 unsigned long task_ratelimit;
1200 unsigned long balanced_dirty_ratelimit;
1201 unsigned long step;
1202 unsigned long x;
1203 unsigned long shift;
1204
1205
1206
1207
1208
1209 dirty_rate = (dirtied - wb->dirtied_stamp) * HZ / elapsed;
1210
1211
1212
1213
1214 task_ratelimit = (u64)dirty_ratelimit *
1215 dtc->pos_ratio >> RATELIMIT_CALC_SHIFT;
1216 task_ratelimit++;
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248 balanced_dirty_ratelimit = div_u64((u64)task_ratelimit * write_bw,
1249 dirty_rate | 1);
1250
1251
1252
1253 if (unlikely(balanced_dirty_ratelimit > write_bw))
1254 balanced_dirty_ratelimit = write_bw;
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290 step = 0;
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303 if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
1304 dirty = dtc->wb_dirty;
1305 if (dtc->wb_dirty < 8)
1306 setpoint = dtc->wb_dirty + 1;
1307 else
1308 setpoint = (dtc->wb_thresh + dtc->wb_bg_thresh) / 2;
1309 }
1310
1311 if (dirty < setpoint) {
1312 x = min3(wb->balanced_dirty_ratelimit,
1313 balanced_dirty_ratelimit, task_ratelimit);
1314 if (dirty_ratelimit < x)
1315 step = x - dirty_ratelimit;
1316 } else {
1317 x = max3(wb->balanced_dirty_ratelimit,
1318 balanced_dirty_ratelimit, task_ratelimit);
1319 if (dirty_ratelimit > x)
1320 step = dirty_ratelimit - x;
1321 }
1322
1323
1324
1325
1326
1327
1328 shift = dirty_ratelimit / (2 * step + 1);
1329 if (shift < BITS_PER_LONG)
1330 step = DIV_ROUND_UP(step >> shift, 8);
1331 else
1332 step = 0;
1333
1334 if (dirty_ratelimit < balanced_dirty_ratelimit)
1335 dirty_ratelimit += step;
1336 else
1337 dirty_ratelimit -= step;
1338
1339 wb->dirty_ratelimit = max(dirty_ratelimit, 1UL);
1340 wb->balanced_dirty_ratelimit = balanced_dirty_ratelimit;
1341
1342 trace_bdi_dirty_ratelimit(wb, dirty_rate, task_ratelimit);
1343}
1344
1345static void __wb_update_bandwidth(struct dirty_throttle_control *gdtc,
1346 struct dirty_throttle_control *mdtc,
1347 unsigned long start_time,
1348 bool update_ratelimit)
1349{
1350 struct bdi_writeback *wb = gdtc->wb;
1351 unsigned long now = jiffies;
1352 unsigned long elapsed = now - wb->bw_time_stamp;
1353 unsigned long dirtied;
1354 unsigned long written;
1355
1356 lockdep_assert_held(&wb->list_lock);
1357
1358
1359
1360
1361 if (elapsed < BANDWIDTH_INTERVAL)
1362 return;
1363
1364 dirtied = percpu_counter_read(&wb->stat[WB_DIRTIED]);
1365 written = percpu_counter_read(&wb->stat[WB_WRITTEN]);
1366
1367
1368
1369
1370
1371 if (elapsed > HZ && time_before(wb->bw_time_stamp, start_time))
1372 goto snapshot;
1373
1374 if (update_ratelimit) {
1375 domain_update_bandwidth(gdtc, now);
1376 wb_update_dirty_ratelimit(gdtc, dirtied, elapsed);
1377
1378
1379
1380
1381
1382 if (IS_ENABLED(CONFIG_CGROUP_WRITEBACK) && mdtc) {
1383 domain_update_bandwidth(mdtc, now);
1384 wb_update_dirty_ratelimit(mdtc, dirtied, elapsed);
1385 }
1386 }
1387 wb_update_write_bandwidth(wb, elapsed, written);
1388
1389snapshot:
1390 wb->dirtied_stamp = dirtied;
1391 wb->written_stamp = written;
1392 wb->bw_time_stamp = now;
1393}
1394
1395void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time)
1396{
1397 struct dirty_throttle_control gdtc = { GDTC_INIT(wb) };
1398
1399 __wb_update_bandwidth(&gdtc, NULL, start_time, false);
1400}
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410static unsigned long dirty_poll_interval(unsigned long dirty,
1411 unsigned long thresh)
1412{
1413 if (thresh > dirty)
1414 return 1UL << (ilog2(thresh - dirty) >> 1);
1415
1416 return 1;
1417}
1418
1419static unsigned long wb_max_pause(struct bdi_writeback *wb,
1420 unsigned long wb_dirty)
1421{
1422 unsigned long bw = wb->avg_write_bandwidth;
1423 unsigned long t;
1424
1425
1426
1427
1428
1429
1430
1431
1432 t = wb_dirty / (1 + bw / roundup_pow_of_two(1 + HZ / 8));
1433 t++;
1434
1435 return min_t(unsigned long, t, MAX_PAUSE);
1436}
1437
1438static long wb_min_pause(struct bdi_writeback *wb,
1439 long max_pause,
1440 unsigned long task_ratelimit,
1441 unsigned long dirty_ratelimit,
1442 int *nr_dirtied_pause)
1443{
1444 long hi = ilog2(wb->avg_write_bandwidth);
1445 long lo = ilog2(wb->dirty_ratelimit);
1446 long t;
1447 long pause;
1448 int pages;
1449
1450
1451 t = max(1, HZ / 100);
1452
1453
1454
1455
1456
1457
1458
1459 if (hi > lo)
1460 t += (hi - lo) * (10 * HZ) / 1024;
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480 t = min(t, 1 + max_pause / 2);
1481 pages = dirty_ratelimit * t / roundup_pow_of_two(HZ);
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491 if (pages < DIRTY_POLL_THRESH) {
1492 t = max_pause;
1493 pages = dirty_ratelimit * t / roundup_pow_of_two(HZ);
1494 if (pages > DIRTY_POLL_THRESH) {
1495 pages = DIRTY_POLL_THRESH;
1496 t = HZ * DIRTY_POLL_THRESH / dirty_ratelimit;
1497 }
1498 }
1499
1500 pause = HZ * pages / (task_ratelimit + 1);
1501 if (pause > max_pause) {
1502 t = max_pause;
1503 pages = task_ratelimit * t / roundup_pow_of_two(HZ);
1504 }
1505
1506 *nr_dirtied_pause = pages;
1507
1508
1509
1510 return pages >= DIRTY_POLL_THRESH ? 1 + t / 2 : t;
1511}
1512
1513static inline void wb_dirty_limits(struct dirty_throttle_control *dtc)
1514{
1515 struct bdi_writeback *wb = dtc->wb;
1516 unsigned long wb_reclaimable;
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531 dtc->wb_thresh = __wb_calc_thresh(dtc);
1532 dtc->wb_bg_thresh = dtc->thresh ?
1533 div_u64((u64)dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0;
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545 if (dtc->wb_thresh < 2 * wb_stat_error()) {
1546 wb_reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE);
1547 dtc->wb_dirty = wb_reclaimable + wb_stat_sum(wb, WB_WRITEBACK);
1548 } else {
1549 wb_reclaimable = wb_stat(wb, WB_RECLAIMABLE);
1550 dtc->wb_dirty = wb_reclaimable + wb_stat(wb, WB_WRITEBACK);
1551 }
1552}
1553
1554
1555
1556
1557
1558
1559
1560
1561static void balance_dirty_pages(struct bdi_writeback *wb,
1562 unsigned long pages_dirtied)
1563{
1564 struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) };
1565 struct dirty_throttle_control mdtc_stor = { MDTC_INIT(wb, &gdtc_stor) };
1566 struct dirty_throttle_control * const gdtc = &gdtc_stor;
1567 struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
1568 &mdtc_stor : NULL;
1569 struct dirty_throttle_control *sdtc;
1570 unsigned long nr_reclaimable;
1571 long period;
1572 long pause;
1573 long max_pause;
1574 long min_pause;
1575 int nr_dirtied_pause;
1576 bool dirty_exceeded = false;
1577 unsigned long task_ratelimit;
1578 unsigned long dirty_ratelimit;
1579 struct backing_dev_info *bdi = wb->bdi;
1580 bool strictlimit = bdi->capabilities & BDI_CAP_STRICTLIMIT;
1581 unsigned long start_time = jiffies;
1582
1583 for (;;) {
1584 unsigned long now = jiffies;
1585 unsigned long dirty, thresh, bg_thresh;
1586 unsigned long m_dirty = 0;
1587 unsigned long m_thresh = 0;
1588 unsigned long m_bg_thresh = 0;
1589
1590
1591
1592
1593
1594
1595
1596 nr_reclaimable = global_node_page_state(NR_FILE_DIRTY) +
1597 global_node_page_state(NR_UNSTABLE_NFS);
1598 gdtc->avail = global_dirtyable_memory();
1599 gdtc->dirty = nr_reclaimable + global_node_page_state(NR_WRITEBACK);
1600
1601 domain_dirty_limits(gdtc);
1602
1603 if (unlikely(strictlimit)) {
1604 wb_dirty_limits(gdtc);
1605
1606 dirty = gdtc->wb_dirty;
1607 thresh = gdtc->wb_thresh;
1608 bg_thresh = gdtc->wb_bg_thresh;
1609 } else {
1610 dirty = gdtc->dirty;
1611 thresh = gdtc->thresh;
1612 bg_thresh = gdtc->bg_thresh;
1613 }
1614
1615 if (mdtc) {
1616 unsigned long filepages, headroom, writeback;
1617
1618
1619
1620
1621
1622 mem_cgroup_wb_stats(wb, &filepages, &headroom,
1623 &mdtc->dirty, &writeback);
1624 mdtc->dirty += writeback;
1625 mdtc_calc_avail(mdtc, filepages, headroom);
1626
1627 domain_dirty_limits(mdtc);
1628
1629 if (unlikely(strictlimit)) {
1630 wb_dirty_limits(mdtc);
1631 m_dirty = mdtc->wb_dirty;
1632 m_thresh = mdtc->wb_thresh;
1633 m_bg_thresh = mdtc->wb_bg_thresh;
1634 } else {
1635 m_dirty = mdtc->dirty;
1636 m_thresh = mdtc->thresh;
1637 m_bg_thresh = mdtc->bg_thresh;
1638 }
1639 }
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653 if (dirty <= dirty_freerun_ceiling(thresh, bg_thresh) &&
1654 (!mdtc ||
1655 m_dirty <= dirty_freerun_ceiling(m_thresh, m_bg_thresh))) {
1656 unsigned long intv = dirty_poll_interval(dirty, thresh);
1657 unsigned long m_intv = ULONG_MAX;
1658
1659 current->dirty_paused_when = now;
1660 current->nr_dirtied = 0;
1661 if (mdtc)
1662 m_intv = dirty_poll_interval(m_dirty, m_thresh);
1663 current->nr_dirtied_pause = min(intv, m_intv);
1664 break;
1665 }
1666
1667 if (unlikely(!writeback_in_progress(wb)))
1668 wb_start_background_writeback(wb);
1669
1670 mem_cgroup_flush_foreign(wb);
1671
1672
1673
1674
1675
1676 if (!strictlimit)
1677 wb_dirty_limits(gdtc);
1678
1679 dirty_exceeded = (gdtc->wb_dirty > gdtc->wb_thresh) &&
1680 ((gdtc->dirty > gdtc->thresh) || strictlimit);
1681
1682 wb_position_ratio(gdtc);
1683 sdtc = gdtc;
1684
1685 if (mdtc) {
1686
1687
1688
1689
1690
1691
1692 if (!strictlimit)
1693 wb_dirty_limits(mdtc);
1694
1695 dirty_exceeded |= (mdtc->wb_dirty > mdtc->wb_thresh) &&
1696 ((mdtc->dirty > mdtc->thresh) || strictlimit);
1697
1698 wb_position_ratio(mdtc);
1699 if (mdtc->pos_ratio < gdtc->pos_ratio)
1700 sdtc = mdtc;
1701 }
1702
1703 if (dirty_exceeded && !wb->dirty_exceeded)
1704 wb->dirty_exceeded = 1;
1705
1706 if (time_is_before_jiffies(wb->bw_time_stamp +
1707 BANDWIDTH_INTERVAL)) {
1708 spin_lock(&wb->list_lock);
1709 __wb_update_bandwidth(gdtc, mdtc, start_time, true);
1710 spin_unlock(&wb->list_lock);
1711 }
1712
1713
1714 dirty_ratelimit = wb->dirty_ratelimit;
1715 task_ratelimit = ((u64)dirty_ratelimit * sdtc->pos_ratio) >>
1716 RATELIMIT_CALC_SHIFT;
1717 max_pause = wb_max_pause(wb, sdtc->wb_dirty);
1718 min_pause = wb_min_pause(wb, max_pause,
1719 task_ratelimit, dirty_ratelimit,
1720 &nr_dirtied_pause);
1721
1722 if (unlikely(task_ratelimit == 0)) {
1723 period = max_pause;
1724 pause = max_pause;
1725 goto pause;
1726 }
1727 period = HZ * pages_dirtied / task_ratelimit;
1728 pause = period;
1729 if (current->dirty_paused_when)
1730 pause -= now - current->dirty_paused_when;
1731
1732
1733
1734
1735
1736
1737
1738 if (pause < min_pause) {
1739 trace_balance_dirty_pages(wb,
1740 sdtc->thresh,
1741 sdtc->bg_thresh,
1742 sdtc->dirty,
1743 sdtc->wb_thresh,
1744 sdtc->wb_dirty,
1745 dirty_ratelimit,
1746 task_ratelimit,
1747 pages_dirtied,
1748 period,
1749 min(pause, 0L),
1750 start_time);
1751 if (pause < -HZ) {
1752 current->dirty_paused_when = now;
1753 current->nr_dirtied = 0;
1754 } else if (period) {
1755 current->dirty_paused_when += period;
1756 current->nr_dirtied = 0;
1757 } else if (current->nr_dirtied_pause <= pages_dirtied)
1758 current->nr_dirtied_pause += pages_dirtied;
1759 break;
1760 }
1761 if (unlikely(pause > max_pause)) {
1762
1763 now += min(pause - max_pause, max_pause);
1764 pause = max_pause;
1765 }
1766
1767pause:
1768 trace_balance_dirty_pages(wb,
1769 sdtc->thresh,
1770 sdtc->bg_thresh,
1771 sdtc->dirty,
1772 sdtc->wb_thresh,
1773 sdtc->wb_dirty,
1774 dirty_ratelimit,
1775 task_ratelimit,
1776 pages_dirtied,
1777 period,
1778 pause,
1779 start_time);
1780 __set_current_state(TASK_KILLABLE);
1781 wb->dirty_sleep = now;
1782 io_schedule_timeout(pause);
1783
1784 current->dirty_paused_when = now + pause;
1785 current->nr_dirtied = 0;
1786 current->nr_dirtied_pause = nr_dirtied_pause;
1787
1788
1789
1790
1791
1792 if (task_ratelimit)
1793 break;
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805 if (sdtc->wb_dirty <= wb_stat_error())
1806 break;
1807
1808 if (fatal_signal_pending(current))
1809 break;
1810 }
1811
1812 if (!dirty_exceeded && wb->dirty_exceeded)
1813 wb->dirty_exceeded = 0;
1814
1815 if (writeback_in_progress(wb))
1816 return;
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826 if (laptop_mode)
1827 return;
1828
1829 if (nr_reclaimable > gdtc->bg_thresh)
1830 wb_start_background_writeback(wb);
1831}
1832
1833static DEFINE_PER_CPU(int, bdp_ratelimits);
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849DEFINE_PER_CPU(int, dirty_throttle_leaks) = 0;
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864void balance_dirty_pages_ratelimited(struct address_space *mapping)
1865{
1866 struct inode *inode = mapping->host;
1867 struct backing_dev_info *bdi = inode_to_bdi(inode);
1868 struct bdi_writeback *wb = NULL;
1869 int ratelimit;
1870 int *p;
1871
1872 if (!bdi_cap_account_dirty(bdi))
1873 return;
1874
1875 if (inode_cgwb_enabled(inode))
1876 wb = wb_get_create_current(bdi, GFP_KERNEL);
1877 if (!wb)
1878 wb = &bdi->wb;
1879
1880 ratelimit = current->nr_dirtied_pause;
1881 if (wb->dirty_exceeded)
1882 ratelimit = min(ratelimit, 32 >> (PAGE_SHIFT - 10));
1883
1884 preempt_disable();
1885
1886
1887
1888
1889
1890
1891 p = this_cpu_ptr(&bdp_ratelimits);
1892 if (unlikely(current->nr_dirtied >= ratelimit))
1893 *p = 0;
1894 else if (unlikely(*p >= ratelimit_pages)) {
1895 *p = 0;
1896 ratelimit = 0;
1897 }
1898
1899
1900
1901
1902
1903 p = this_cpu_ptr(&dirty_throttle_leaks);
1904 if (*p > 0 && current->nr_dirtied < ratelimit) {
1905 unsigned long nr_pages_dirtied;
1906 nr_pages_dirtied = min(*p, ratelimit - current->nr_dirtied);
1907 *p -= nr_pages_dirtied;
1908 current->nr_dirtied += nr_pages_dirtied;
1909 }
1910 preempt_enable();
1911
1912 if (unlikely(current->nr_dirtied >= ratelimit))
1913 balance_dirty_pages(wb, current->nr_dirtied);
1914
1915 wb_put(wb);
1916}
1917EXPORT_SYMBOL(balance_dirty_pages_ratelimited);
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928bool wb_over_bg_thresh(struct bdi_writeback *wb)
1929{
1930 struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) };
1931 struct dirty_throttle_control mdtc_stor = { MDTC_INIT(wb, &gdtc_stor) };
1932 struct dirty_throttle_control * const gdtc = &gdtc_stor;
1933 struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
1934 &mdtc_stor : NULL;
1935
1936
1937
1938
1939
1940 gdtc->avail = global_dirtyable_memory();
1941 gdtc->dirty = global_node_page_state(NR_FILE_DIRTY) +
1942 global_node_page_state(NR_UNSTABLE_NFS);
1943 domain_dirty_limits(gdtc);
1944
1945 if (gdtc->dirty > gdtc->bg_thresh)
1946 return true;
1947
1948 if (wb_stat(wb, WB_RECLAIMABLE) >
1949 wb_calc_thresh(gdtc->wb, gdtc->bg_thresh))
1950 return true;
1951
1952 if (mdtc) {
1953 unsigned long filepages, headroom, writeback;
1954
1955 mem_cgroup_wb_stats(wb, &filepages, &headroom, &mdtc->dirty,
1956 &writeback);
1957 mdtc_calc_avail(mdtc, filepages, headroom);
1958 domain_dirty_limits(mdtc);
1959
1960 if (mdtc->dirty > mdtc->bg_thresh)
1961 return true;
1962
1963 if (wb_stat(wb, WB_RECLAIMABLE) >
1964 wb_calc_thresh(mdtc->wb, mdtc->bg_thresh))
1965 return true;
1966 }
1967
1968 return false;
1969}
1970
1971
1972
1973
1974int dirty_writeback_centisecs_handler(struct ctl_table *table, int write,
1975 void __user *buffer, size_t *length, loff_t *ppos)
1976{
1977 unsigned int old_interval = dirty_writeback_interval;
1978 int ret;
1979
1980 ret = proc_dointvec(table, write, buffer, length, ppos);
1981
1982
1983
1984
1985
1986
1987
1988
1989 if (!ret && write && dirty_writeback_interval &&
1990 dirty_writeback_interval != old_interval)
1991 wakeup_flusher_threads(WB_REASON_PERIODIC);
1992
1993 return ret;
1994}
1995
1996#ifdef CONFIG_BLOCK
1997void laptop_mode_timer_fn(struct timer_list *t)
1998{
1999 struct backing_dev_info *backing_dev_info =
2000 from_timer(backing_dev_info, t, laptop_mode_wb_timer);
2001
2002 wakeup_flusher_threads_bdi(backing_dev_info, WB_REASON_LAPTOP_TIMER);
2003}
2004
2005
2006
2007
2008
2009
2010void laptop_io_completion(struct backing_dev_info *info)
2011{
2012 mod_timer(&info->laptop_mode_wb_timer, jiffies + laptop_mode);
2013}
2014
2015
2016
2017
2018
2019
2020void laptop_sync_completion(void)
2021{
2022 struct backing_dev_info *bdi;
2023
2024 rcu_read_lock();
2025
2026 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list)
2027 del_timer(&bdi->laptop_mode_wb_timer);
2028
2029 rcu_read_unlock();
2030}
2031#endif
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044void writeback_set_ratelimit(void)
2045{
2046 struct wb_domain *dom = &global_wb_domain;
2047 unsigned long background_thresh;
2048 unsigned long dirty_thresh;
2049
2050 global_dirty_limits(&background_thresh, &dirty_thresh);
2051 dom->dirty_limit = dirty_thresh;
2052 ratelimit_pages = dirty_thresh / (num_online_cpus() * 32);
2053 if (ratelimit_pages < 16)
2054 ratelimit_pages = 16;
2055}
2056
2057static int page_writeback_cpu_online(unsigned int cpu)
2058{
2059 writeback_set_ratelimit();
2060 return 0;
2061}
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081void __init page_writeback_init(void)
2082{
2083 BUG_ON(wb_domain_init(&global_wb_domain, GFP_KERNEL));
2084
2085 cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mm/writeback:online",
2086 page_writeback_cpu_online, NULL);
2087 cpuhp_setup_state(CPUHP_MM_WRITEBACK_DEAD, "mm/writeback:dead", NULL,
2088 page_writeback_cpu_online);
2089}
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105void tag_pages_for_writeback(struct address_space *mapping,
2106 pgoff_t start, pgoff_t end)
2107{
2108 XA_STATE(xas, &mapping->i_pages, start);
2109 unsigned int tagged = 0;
2110 void *page;
2111
2112 xas_lock_irq(&xas);
2113 xas_for_each_marked(&xas, page, end, PAGECACHE_TAG_DIRTY) {
2114 xas_set_mark(&xas, PAGECACHE_TAG_TOWRITE);
2115 if (++tagged % XA_CHECK_SCHED)
2116 continue;
2117
2118 xas_pause(&xas);
2119 xas_unlock_irq(&xas);
2120 cond_resched();
2121 xas_lock_irq(&xas);
2122 }
2123 xas_unlock_irq(&xas);
2124}
2125EXPORT_SYMBOL(tag_pages_for_writeback);
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158int write_cache_pages(struct address_space *mapping,
2159 struct writeback_control *wbc, writepage_t writepage,
2160 void *data)
2161{
2162 int ret = 0;
2163 int done = 0;
2164 int error;
2165 struct pagevec pvec;
2166 int nr_pages;
2167 pgoff_t uninitialized_var(writeback_index);
2168 pgoff_t index;
2169 pgoff_t end;
2170 pgoff_t done_index;
2171 int range_whole = 0;
2172 xa_mark_t tag;
2173
2174 pagevec_init(&pvec);
2175 if (wbc->range_cyclic) {
2176 writeback_index = mapping->writeback_index;
2177 index = writeback_index;
2178 end = -1;
2179 } else {
2180 index = wbc->range_start >> PAGE_SHIFT;
2181 end = wbc->range_end >> PAGE_SHIFT;
2182 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2183 range_whole = 1;
2184 }
2185 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) {
2186 tag_pages_for_writeback(mapping, index, end);
2187 tag = PAGECACHE_TAG_TOWRITE;
2188 } else {
2189 tag = PAGECACHE_TAG_DIRTY;
2190 }
2191 done_index = index;
2192 while (!done && (index <= end)) {
2193 int i;
2194
2195 nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
2196 tag);
2197 if (nr_pages == 0)
2198 break;
2199
2200 for (i = 0; i < nr_pages; i++) {
2201 struct page *page = pvec.pages[i];
2202
2203 done_index = page->index;
2204
2205 lock_page(page);
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215 if (unlikely(page->mapping != mapping)) {
2216continue_unlock:
2217 unlock_page(page);
2218 continue;
2219 }
2220
2221 if (!PageDirty(page)) {
2222
2223 goto continue_unlock;
2224 }
2225
2226 if (PageWriteback(page)) {
2227 if (wbc->sync_mode != WB_SYNC_NONE)
2228 wait_on_page_writeback(page);
2229 else
2230 goto continue_unlock;
2231 }
2232
2233 BUG_ON(PageWriteback(page));
2234 if (!clear_page_dirty_for_io(page))
2235 goto continue_unlock;
2236
2237 trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
2238 error = (*writepage)(page, wbc, data);
2239 if (unlikely(error)) {
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252 if (error == AOP_WRITEPAGE_ACTIVATE) {
2253 unlock_page(page);
2254 error = 0;
2255 } else if (wbc->sync_mode != WB_SYNC_ALL) {
2256 ret = error;
2257 done_index = page->index + 1;
2258 done = 1;
2259 break;
2260 }
2261 if (!ret)
2262 ret = error;
2263 }
2264
2265
2266
2267
2268
2269
2270
2271 if (--wbc->nr_to_write <= 0 &&
2272 wbc->sync_mode == WB_SYNC_NONE) {
2273 done = 1;
2274 break;
2275 }
2276 }
2277 pagevec_release(&pvec);
2278 cond_resched();
2279 }
2280
2281
2282
2283
2284
2285
2286 if (wbc->range_cyclic && !done)
2287 done_index = 0;
2288 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2289 mapping->writeback_index = done_index;
2290
2291 return ret;
2292}
2293EXPORT_SYMBOL(write_cache_pages);
2294
2295
2296
2297
2298
2299static int __writepage(struct page *page, struct writeback_control *wbc,
2300 void *data)
2301{
2302 struct address_space *mapping = data;
2303 int ret = mapping->a_ops->writepage(page, wbc);
2304 mapping_set_error(mapping, ret);
2305 return ret;
2306}
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318int generic_writepages(struct address_space *mapping,
2319 struct writeback_control *wbc)
2320{
2321 struct blk_plug plug;
2322 int ret;
2323
2324
2325 if (!mapping->a_ops->writepage)
2326 return 0;
2327
2328 blk_start_plug(&plug);
2329 ret = write_cache_pages(mapping, wbc, __writepage, mapping);
2330 blk_finish_plug(&plug);
2331 return ret;
2332}
2333
2334EXPORT_SYMBOL(generic_writepages);
2335
2336int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
2337{
2338 int ret;
2339
2340 if (wbc->nr_to_write <= 0)
2341 return 0;
2342 while (1) {
2343 if (mapping->a_ops->writepages)
2344 ret = mapping->a_ops->writepages(mapping, wbc);
2345 else
2346 ret = generic_writepages(mapping, wbc);
2347 if ((ret != -ENOMEM) || (wbc->sync_mode != WB_SYNC_ALL))
2348 break;
2349 cond_resched();
2350 congestion_wait(BLK_RW_ASYNC, HZ/50);
2351 }
2352 return ret;
2353}
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366int write_one_page(struct page *page)
2367{
2368 struct address_space *mapping = page->mapping;
2369 int ret = 0;
2370 struct writeback_control wbc = {
2371 .sync_mode = WB_SYNC_ALL,
2372 .nr_to_write = 1,
2373 };
2374
2375 BUG_ON(!PageLocked(page));
2376
2377 wait_on_page_writeback(page);
2378
2379 if (clear_page_dirty_for_io(page)) {
2380 get_page(page);
2381 ret = mapping->a_ops->writepage(page, &wbc);
2382 if (ret == 0)
2383 wait_on_page_writeback(page);
2384 put_page(page);
2385 } else {
2386 unlock_page(page);
2387 }
2388
2389 if (!ret)
2390 ret = filemap_check_errors(mapping);
2391 return ret;
2392}
2393EXPORT_SYMBOL(write_one_page);
2394
2395
2396
2397
2398int __set_page_dirty_no_writeback(struct page *page)
2399{
2400 if (!PageDirty(page))
2401 return !TestSetPageDirty(page);
2402 return 0;
2403}
2404
2405
2406
2407
2408
2409
2410
2411
2412void account_page_dirtied(struct page *page, struct address_space *mapping)
2413{
2414 struct inode *inode = mapping->host;
2415
2416 trace_writeback_dirty_page(page, mapping);
2417
2418 if (mapping_cap_account_dirty(mapping)) {
2419 struct bdi_writeback *wb;
2420
2421 inode_attach_wb(inode, page);
2422 wb = inode_to_wb(inode);
2423
2424 __inc_lruvec_page_state(page, NR_FILE_DIRTY);
2425 __inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2426 __inc_node_page_state(page, NR_DIRTIED);
2427 inc_wb_stat(wb, WB_RECLAIMABLE);
2428 inc_wb_stat(wb, WB_DIRTIED);
2429 task_io_account_write(PAGE_SIZE);
2430 current->nr_dirtied++;
2431 this_cpu_inc(bdp_ratelimits);
2432
2433 mem_cgroup_track_foreign_dirty(page, wb);
2434 }
2435}
2436
2437
2438
2439
2440
2441
2442void account_page_cleaned(struct page *page, struct address_space *mapping,
2443 struct bdi_writeback *wb)
2444{
2445 if (mapping_cap_account_dirty(mapping)) {
2446 dec_lruvec_page_state(page, NR_FILE_DIRTY);
2447 dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2448 dec_wb_stat(wb, WB_RECLAIMABLE);
2449 task_io_account_cancelled_write(PAGE_SIZE);
2450 }
2451}
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465int __set_page_dirty_nobuffers(struct page *page)
2466{
2467 lock_page_memcg(page);
2468 if (!TestSetPageDirty(page)) {
2469 struct address_space *mapping = page_mapping(page);
2470 unsigned long flags;
2471
2472 if (!mapping) {
2473 unlock_page_memcg(page);
2474 return 1;
2475 }
2476
2477 xa_lock_irqsave(&mapping->i_pages, flags);
2478 BUG_ON(page_mapping(page) != mapping);
2479 WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
2480 account_page_dirtied(page, mapping);
2481 __xa_set_mark(&mapping->i_pages, page_index(page),
2482 PAGECACHE_TAG_DIRTY);
2483 xa_unlock_irqrestore(&mapping->i_pages, flags);
2484 unlock_page_memcg(page);
2485
2486 if (mapping->host) {
2487
2488 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
2489 }
2490 return 1;
2491 }
2492 unlock_page_memcg(page);
2493 return 0;
2494}
2495EXPORT_SYMBOL(__set_page_dirty_nobuffers);
2496
2497
2498
2499
2500
2501
2502
2503
2504void account_page_redirty(struct page *page)
2505{
2506 struct address_space *mapping = page->mapping;
2507
2508 if (mapping && mapping_cap_account_dirty(mapping)) {
2509 struct inode *inode = mapping->host;
2510 struct bdi_writeback *wb;
2511 struct wb_lock_cookie cookie = {};
2512
2513 wb = unlocked_inode_to_wb_begin(inode, &cookie);
2514 current->nr_dirtied--;
2515 dec_node_page_state(page, NR_DIRTIED);
2516 dec_wb_stat(wb, WB_DIRTIED);
2517 unlocked_inode_to_wb_end(inode, &cookie);
2518 }
2519}
2520EXPORT_SYMBOL(account_page_redirty);
2521
2522
2523
2524
2525
2526
2527int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page)
2528{
2529 int ret;
2530
2531 wbc->pages_skipped++;
2532 ret = __set_page_dirty_nobuffers(page);
2533 account_page_redirty(page);
2534 return ret;
2535}
2536EXPORT_SYMBOL(redirty_page_for_writepage);
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549int set_page_dirty(struct page *page)
2550{
2551 struct address_space *mapping = page_mapping(page);
2552
2553 page = compound_head(page);
2554 if (likely(mapping)) {
2555 int (*spd)(struct page *) = mapping->a_ops->set_page_dirty;
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566 if (PageReclaim(page))
2567 ClearPageReclaim(page);
2568#ifdef CONFIG_BLOCK
2569 if (!spd)
2570 spd = __set_page_dirty_buffers;
2571#endif
2572 return (*spd)(page);
2573 }
2574 if (!PageDirty(page)) {
2575 if (!TestSetPageDirty(page))
2576 return 1;
2577 }
2578 return 0;
2579}
2580EXPORT_SYMBOL(set_page_dirty);
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592int set_page_dirty_lock(struct page *page)
2593{
2594 int ret;
2595
2596 lock_page(page);
2597 ret = set_page_dirty(page);
2598 unlock_page(page);
2599 return ret;
2600}
2601EXPORT_SYMBOL(set_page_dirty_lock);
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616void __cancel_dirty_page(struct page *page)
2617{
2618 struct address_space *mapping = page_mapping(page);
2619
2620 if (mapping_cap_account_dirty(mapping)) {
2621 struct inode *inode = mapping->host;
2622 struct bdi_writeback *wb;
2623 struct wb_lock_cookie cookie = {};
2624
2625 lock_page_memcg(page);
2626 wb = unlocked_inode_to_wb_begin(inode, &cookie);
2627
2628 if (TestClearPageDirty(page))
2629 account_page_cleaned(page, mapping, wb);
2630
2631 unlocked_inode_to_wb_end(inode, &cookie);
2632 unlock_page_memcg(page);
2633 } else {
2634 ClearPageDirty(page);
2635 }
2636}
2637EXPORT_SYMBOL(__cancel_dirty_page);
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653int clear_page_dirty_for_io(struct page *page)
2654{
2655 struct address_space *mapping = page_mapping(page);
2656 int ret = 0;
2657
2658 VM_BUG_ON_PAGE(!PageLocked(page), page);
2659
2660 if (mapping && mapping_cap_account_dirty(mapping)) {
2661 struct inode *inode = mapping->host;
2662 struct bdi_writeback *wb;
2663 struct wb_lock_cookie cookie = {};
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690 if (page_mkclean(page))
2691 set_page_dirty(page);
2692
2693
2694
2695
2696
2697
2698
2699
2700 wb = unlocked_inode_to_wb_begin(inode, &cookie);
2701 if (TestClearPageDirty(page)) {
2702 dec_lruvec_page_state(page, NR_FILE_DIRTY);
2703 dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2704 dec_wb_stat(wb, WB_RECLAIMABLE);
2705 ret = 1;
2706 }
2707 unlocked_inode_to_wb_end(inode, &cookie);
2708 return ret;
2709 }
2710 return TestClearPageDirty(page);
2711}
2712EXPORT_SYMBOL(clear_page_dirty_for_io);
2713
2714int test_clear_page_writeback(struct page *page)
2715{
2716 struct address_space *mapping = page_mapping(page);
2717 struct mem_cgroup *memcg;
2718 struct lruvec *lruvec;
2719 int ret;
2720
2721 memcg = lock_page_memcg(page);
2722 lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
2723 if (mapping && mapping_use_writeback_tags(mapping)) {
2724 struct inode *inode = mapping->host;
2725 struct backing_dev_info *bdi = inode_to_bdi(inode);
2726 unsigned long flags;
2727
2728 xa_lock_irqsave(&mapping->i_pages, flags);
2729 ret = TestClearPageWriteback(page);
2730 if (ret) {
2731 __xa_clear_mark(&mapping->i_pages, page_index(page),
2732 PAGECACHE_TAG_WRITEBACK);
2733 if (bdi_cap_account_writeback(bdi)) {
2734 struct bdi_writeback *wb = inode_to_wb(inode);
2735
2736 dec_wb_stat(wb, WB_WRITEBACK);
2737 __wb_writeout_inc(wb);
2738 }
2739 }
2740
2741 if (mapping->host && !mapping_tagged(mapping,
2742 PAGECACHE_TAG_WRITEBACK))
2743 sb_clear_inode_writeback(mapping->host);
2744
2745 xa_unlock_irqrestore(&mapping->i_pages, flags);
2746 } else {
2747 ret = TestClearPageWriteback(page);
2748 }
2749
2750
2751
2752
2753
2754
2755 if (ret) {
2756 dec_lruvec_state(lruvec, NR_WRITEBACK);
2757 dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2758 inc_node_page_state(page, NR_WRITTEN);
2759 }
2760 __unlock_page_memcg(memcg);
2761 return ret;
2762}
2763
2764int __test_set_page_writeback(struct page *page, bool keep_write)
2765{
2766 struct address_space *mapping = page_mapping(page);
2767 int ret, access_ret;
2768
2769 lock_page_memcg(page);
2770 if (mapping && mapping_use_writeback_tags(mapping)) {
2771 XA_STATE(xas, &mapping->i_pages, page_index(page));
2772 struct inode *inode = mapping->host;
2773 struct backing_dev_info *bdi = inode_to_bdi(inode);
2774 unsigned long flags;
2775
2776 xas_lock_irqsave(&xas, flags);
2777 xas_load(&xas);
2778 ret = TestSetPageWriteback(page);
2779 if (!ret) {
2780 bool on_wblist;
2781
2782 on_wblist = mapping_tagged(mapping,
2783 PAGECACHE_TAG_WRITEBACK);
2784
2785 xas_set_mark(&xas, PAGECACHE_TAG_WRITEBACK);
2786 if (bdi_cap_account_writeback(bdi))
2787 inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
2788
2789
2790
2791
2792
2793
2794 if (mapping->host && !on_wblist)
2795 sb_mark_inode_writeback(mapping->host);
2796 }
2797 if (!PageDirty(page))
2798 xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY);
2799 if (!keep_write)
2800 xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE);
2801 xas_unlock_irqrestore(&xas, flags);
2802 } else {
2803 ret = TestSetPageWriteback(page);
2804 }
2805 if (!ret) {
2806 inc_lruvec_page_state(page, NR_WRITEBACK);
2807 inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2808 }
2809 unlock_page_memcg(page);
2810 access_ret = arch_make_page_accessible(page);
2811
2812
2813
2814
2815 VM_BUG_ON_PAGE(access_ret != 0, page);
2816
2817 return ret;
2818
2819}
2820EXPORT_SYMBOL(__test_set_page_writeback);
2821
2822
2823
2824
2825void wait_on_page_writeback(struct page *page)
2826{
2827 if (PageWriteback(page)) {
2828 trace_wait_on_page_writeback(page, page_mapping(page));
2829 wait_on_page_bit(page, PG_writeback);
2830 }
2831}
2832EXPORT_SYMBOL_GPL(wait_on_page_writeback);
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842void wait_for_stable_page(struct page *page)
2843{
2844 if (bdi_cap_stable_pages_required(inode_to_bdi(page->mapping->host)))
2845 wait_on_page_writeback(page);
2846}
2847EXPORT_SYMBOL_GPL(wait_for_stable_page);
2848