1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <linux/kernel.h>
15#include <linux/export.h>
16#include <linux/spinlock.h>
17#include <linux/fs.h>
18#include <linux/mm.h>
19#include <linux/swap.h>
20#include <linux/slab.h>
21#include <linux/pagemap.h>
22#include <linux/writeback.h>
23#include <linux/init.h>
24#include <linux/backing-dev.h>
25#include <linux/task_io_accounting_ops.h>
26#include <linux/blkdev.h>
27#include <linux/mpage.h>
28#include <linux/rmap.h>
29#include <linux/percpu.h>
30#include <linux/smp.h>
31#include <linux/sysctl.h>
32#include <linux/cpu.h>
33#include <linux/syscalls.h>
34#include <linux/buffer_head.h>
35#include <linux/pagevec.h>
36#include <linux/timer.h>
37#include <linux/sched/rt.h>
38#include <linux/sched/signal.h>
39#include <linux/mm_inline.h>
40#include <trace/events/writeback.h>
41
42#include "internal.h"
43
44
45
46
47#define MAX_PAUSE max(HZ/5, 1)
48
49
50
51
52
53#define DIRTY_POLL_THRESH (128 >> (PAGE_SHIFT - 10))
54
55
56
57
58#define BANDWIDTH_INTERVAL max(HZ/5, 1)
59
60#define RATELIMIT_CALC_SHIFT 10
61
62
63
64
65
66static long ratelimit_pages = 32;
67
68
69
70
71
72
73int dirty_background_ratio = 10;
74
75
76
77
78
79unsigned long dirty_background_bytes;
80
81
82
83
84
85int vm_highmem_is_dirtyable;
86
87
88
89
90int vm_dirty_ratio = 20;
91
92
93
94
95
96unsigned long vm_dirty_bytes;
97
98
99
100
101unsigned int dirty_writeback_interval = 5 * 100;
102
103EXPORT_SYMBOL_GPL(dirty_writeback_interval);
104
105
106
107
108unsigned int dirty_expire_interval = 30 * 100;
109
110
111
112
113int block_dump;
114
115
116
117
118
119int laptop_mode;
120
121EXPORT_SYMBOL(laptop_mode);
122
123
124
125struct wb_domain global_wb_domain;
126
127
128struct dirty_throttle_control {
129#ifdef CONFIG_CGROUP_WRITEBACK
130 struct wb_domain *dom;
131 struct dirty_throttle_control *gdtc;
132#endif
133 struct bdi_writeback *wb;
134 struct fprop_local_percpu *wb_completions;
135
136 unsigned long avail;
137 unsigned long dirty;
138 unsigned long thresh;
139 unsigned long bg_thresh;
140
141 unsigned long wb_dirty;
142 unsigned long wb_thresh;
143 unsigned long wb_bg_thresh;
144
145 unsigned long pos_ratio;
146};
147
148
149
150
151
152
153#define VM_COMPLETIONS_PERIOD_LEN (3*HZ)
154
155#ifdef CONFIG_CGROUP_WRITEBACK
156
157#define GDTC_INIT(__wb) .wb = (__wb), \
158 .dom = &global_wb_domain, \
159 .wb_completions = &(__wb)->completions
160
161#define GDTC_INIT_NO_WB .dom = &global_wb_domain
162
163#define MDTC_INIT(__wb, __gdtc) .wb = (__wb), \
164 .dom = mem_cgroup_wb_domain(__wb), \
165 .wb_completions = &(__wb)->memcg_completions, \
166 .gdtc = __gdtc
167
168static bool mdtc_valid(struct dirty_throttle_control *dtc)
169{
170 return dtc->dom;
171}
172
173static struct wb_domain *dtc_dom(struct dirty_throttle_control *dtc)
174{
175 return dtc->dom;
176}
177
178static struct dirty_throttle_control *mdtc_gdtc(struct dirty_throttle_control *mdtc)
179{
180 return mdtc->gdtc;
181}
182
183static struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb)
184{
185 return &wb->memcg_completions;
186}
187
188static void wb_min_max_ratio(struct bdi_writeback *wb,
189 unsigned long *minp, unsigned long *maxp)
190{
191 unsigned long this_bw = wb->avg_write_bandwidth;
192 unsigned long tot_bw = atomic_long_read(&wb->bdi->tot_write_bandwidth);
193 unsigned long long min = wb->bdi->min_ratio;
194 unsigned long long max = wb->bdi->max_ratio;
195
196
197
198
199
200 if (this_bw < tot_bw) {
201 if (min) {
202 min *= this_bw;
203 do_div(min, tot_bw);
204 }
205 if (max < 100) {
206 max *= this_bw;
207 do_div(max, tot_bw);
208 }
209 }
210
211 *minp = min;
212 *maxp = max;
213}
214
215#else
216
217#define GDTC_INIT(__wb) .wb = (__wb), \
218 .wb_completions = &(__wb)->completions
219#define GDTC_INIT_NO_WB
220#define MDTC_INIT(__wb, __gdtc)
221
222static bool mdtc_valid(struct dirty_throttle_control *dtc)
223{
224 return false;
225}
226
227static struct wb_domain *dtc_dom(struct dirty_throttle_control *dtc)
228{
229 return &global_wb_domain;
230}
231
232static struct dirty_throttle_control *mdtc_gdtc(struct dirty_throttle_control *mdtc)
233{
234 return NULL;
235}
236
237static struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb)
238{
239 return NULL;
240}
241
242static void wb_min_max_ratio(struct bdi_writeback *wb,
243 unsigned long *minp, unsigned long *maxp)
244{
245 *minp = wb->bdi->min_ratio;
246 *maxp = wb->bdi->max_ratio;
247}
248
249#endif
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276static unsigned long node_dirtyable_memory(struct pglist_data *pgdat)
277{
278 unsigned long nr_pages = 0;
279 int z;
280
281 for (z = 0; z < MAX_NR_ZONES; z++) {
282 struct zone *zone = pgdat->node_zones + z;
283
284 if (!populated_zone(zone))
285 continue;
286
287 nr_pages += zone_page_state(zone, NR_FREE_PAGES);
288 }
289
290
291
292
293
294
295 nr_pages -= min(nr_pages, pgdat->totalreserve_pages);
296
297 nr_pages += node_page_state(pgdat, NR_INACTIVE_FILE);
298 nr_pages += node_page_state(pgdat, NR_ACTIVE_FILE);
299
300 return nr_pages;
301}
302
303static unsigned long highmem_dirtyable_memory(unsigned long total)
304{
305#ifdef CONFIG_HIGHMEM
306 int node;
307 unsigned long x = 0;
308 int i;
309
310 for_each_node_state(node, N_HIGH_MEMORY) {
311 for (i = ZONE_NORMAL + 1; i < MAX_NR_ZONES; i++) {
312 struct zone *z;
313 unsigned long nr_pages;
314
315 if (!is_highmem_idx(i))
316 continue;
317
318 z = &NODE_DATA(node)->node_zones[i];
319 if (!populated_zone(z))
320 continue;
321
322 nr_pages = zone_page_state(z, NR_FREE_PAGES);
323
324 nr_pages -= min(nr_pages, high_wmark_pages(z));
325 nr_pages += zone_page_state(z, NR_ZONE_INACTIVE_FILE);
326 nr_pages += zone_page_state(z, NR_ZONE_ACTIVE_FILE);
327 x += nr_pages;
328 }
329 }
330
331
332
333
334
335
336
337
338
339
340 if ((long)x < 0)
341 x = 0;
342
343
344
345
346
347
348
349 return min(x, total);
350#else
351 return 0;
352#endif
353}
354
355
356
357
358
359
360
361static unsigned long global_dirtyable_memory(void)
362{
363 unsigned long x;
364
365 x = global_zone_page_state(NR_FREE_PAGES);
366
367
368
369
370
371 x -= min(x, totalreserve_pages);
372
373 x += global_node_page_state(NR_INACTIVE_FILE);
374 x += global_node_page_state(NR_ACTIVE_FILE);
375
376 if (!vm_highmem_is_dirtyable)
377 x -= highmem_dirtyable_memory(x);
378
379 return x + 1;
380}
381
382
383
384
385
386
387
388
389
390
391
392static void domain_dirty_limits(struct dirty_throttle_control *dtc)
393{
394 const unsigned long available_memory = dtc->avail;
395 struct dirty_throttle_control *gdtc = mdtc_gdtc(dtc);
396 unsigned long bytes = vm_dirty_bytes;
397 unsigned long bg_bytes = dirty_background_bytes;
398
399 unsigned long ratio = (vm_dirty_ratio * PAGE_SIZE) / 100;
400 unsigned long bg_ratio = (dirty_background_ratio * PAGE_SIZE) / 100;
401 unsigned long thresh;
402 unsigned long bg_thresh;
403 struct task_struct *tsk;
404
405
406 if (gdtc) {
407 unsigned long global_avail = gdtc->avail;
408
409
410
411
412
413
414
415
416 if (bytes)
417 ratio = min(DIV_ROUND_UP(bytes, global_avail),
418 PAGE_SIZE);
419 if (bg_bytes)
420 bg_ratio = min(DIV_ROUND_UP(bg_bytes, global_avail),
421 PAGE_SIZE);
422 bytes = bg_bytes = 0;
423 }
424
425 if (bytes)
426 thresh = DIV_ROUND_UP(bytes, PAGE_SIZE);
427 else
428 thresh = (ratio * available_memory) / PAGE_SIZE;
429
430 if (bg_bytes)
431 bg_thresh = DIV_ROUND_UP(bg_bytes, PAGE_SIZE);
432 else
433 bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE;
434
435 if (bg_thresh >= thresh)
436 bg_thresh = thresh / 2;
437 tsk = current;
438 if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) {
439 bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32;
440 thresh += thresh / 4 + global_wb_domain.dirty_limit / 32;
441 }
442 dtc->thresh = thresh;
443 dtc->bg_thresh = bg_thresh;
444
445
446 if (!gdtc)
447 trace_global_dirty_state(bg_thresh, thresh);
448}
449
450
451
452
453
454
455
456
457
458void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
459{
460 struct dirty_throttle_control gdtc = { GDTC_INIT_NO_WB };
461
462 gdtc.avail = global_dirtyable_memory();
463 domain_dirty_limits(&gdtc);
464
465 *pbackground = gdtc.bg_thresh;
466 *pdirty = gdtc.thresh;
467}
468
469
470
471
472
473
474
475
476static unsigned long node_dirty_limit(struct pglist_data *pgdat)
477{
478 unsigned long node_memory = node_dirtyable_memory(pgdat);
479 struct task_struct *tsk = current;
480 unsigned long dirty;
481
482 if (vm_dirty_bytes)
483 dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE) *
484 node_memory / global_dirtyable_memory();
485 else
486 dirty = vm_dirty_ratio * node_memory / 100;
487
488 if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk))
489 dirty += dirty / 4;
490
491 return dirty;
492}
493
494
495
496
497
498
499
500
501bool node_dirty_ok(struct pglist_data *pgdat)
502{
503 unsigned long limit = node_dirty_limit(pgdat);
504 unsigned long nr_pages = 0;
505
506 nr_pages += node_page_state(pgdat, NR_FILE_DIRTY);
507 nr_pages += node_page_state(pgdat, NR_UNSTABLE_NFS);
508 nr_pages += node_page_state(pgdat, NR_WRITEBACK);
509
510 return nr_pages <= limit;
511}
512
513int dirty_background_ratio_handler(struct ctl_table *table, int write,
514 void __user *buffer, size_t *lenp,
515 loff_t *ppos)
516{
517 int ret;
518
519 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
520 if (ret == 0 && write)
521 dirty_background_bytes = 0;
522 return ret;
523}
524
525int dirty_background_bytes_handler(struct ctl_table *table, int write,
526 void __user *buffer, size_t *lenp,
527 loff_t *ppos)
528{
529 int ret;
530
531 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
532 if (ret == 0 && write)
533 dirty_background_ratio = 0;
534 return ret;
535}
536
537int dirty_ratio_handler(struct ctl_table *table, int write,
538 void __user *buffer, size_t *lenp,
539 loff_t *ppos)
540{
541 int old_ratio = vm_dirty_ratio;
542 int ret;
543
544 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
545 if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
546 writeback_set_ratelimit();
547 vm_dirty_bytes = 0;
548 }
549 return ret;
550}
551
552int dirty_bytes_handler(struct ctl_table *table, int write,
553 void __user *buffer, size_t *lenp,
554 loff_t *ppos)
555{
556 unsigned long old_bytes = vm_dirty_bytes;
557 int ret;
558
559 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
560 if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
561 writeback_set_ratelimit();
562 vm_dirty_ratio = 0;
563 }
564 return ret;
565}
566
567static unsigned long wp_next_time(unsigned long cur_time)
568{
569 cur_time += VM_COMPLETIONS_PERIOD_LEN;
570
571 if (!cur_time)
572 return 1;
573 return cur_time;
574}
575
576static void wb_domain_writeout_inc(struct wb_domain *dom,
577 struct fprop_local_percpu *completions,
578 unsigned int max_prop_frac)
579{
580 __fprop_inc_percpu_max(&dom->completions, completions,
581 max_prop_frac);
582
583 if (unlikely(!dom->period_time)) {
584
585
586
587
588
589
590 dom->period_time = wp_next_time(jiffies);
591 mod_timer(&dom->period_timer, dom->period_time);
592 }
593}
594
595
596
597
598
599static inline void __wb_writeout_inc(struct bdi_writeback *wb)
600{
601 struct wb_domain *cgdom;
602
603 inc_wb_stat(wb, WB_WRITTEN);
604 wb_domain_writeout_inc(&global_wb_domain, &wb->completions,
605 wb->bdi->max_prop_frac);
606
607 cgdom = mem_cgroup_wb_domain(wb);
608 if (cgdom)
609 wb_domain_writeout_inc(cgdom, wb_memcg_completions(wb),
610 wb->bdi->max_prop_frac);
611}
612
613void wb_writeout_inc(struct bdi_writeback *wb)
614{
615 unsigned long flags;
616
617 local_irq_save(flags);
618 __wb_writeout_inc(wb);
619 local_irq_restore(flags);
620}
621EXPORT_SYMBOL_GPL(wb_writeout_inc);
622
623
624
625
626
627static void writeout_period(struct timer_list *t)
628{
629 struct wb_domain *dom = from_timer(dom, t, period_timer);
630 int miss_periods = (jiffies - dom->period_time) /
631 VM_COMPLETIONS_PERIOD_LEN;
632
633 if (fprop_new_period(&dom->completions, miss_periods + 1)) {
634 dom->period_time = wp_next_time(dom->period_time +
635 miss_periods * VM_COMPLETIONS_PERIOD_LEN);
636 mod_timer(&dom->period_timer, dom->period_time);
637 } else {
638
639
640
641
642 dom->period_time = 0;
643 }
644}
645
646int wb_domain_init(struct wb_domain *dom, gfp_t gfp)
647{
648 memset(dom, 0, sizeof(*dom));
649
650 spin_lock_init(&dom->lock);
651
652 timer_setup(&dom->period_timer, writeout_period, TIMER_DEFERRABLE);
653
654 dom->dirty_limit_tstamp = jiffies;
655
656 return fprop_global_init(&dom->completions, gfp);
657}
658
659#ifdef CONFIG_CGROUP_WRITEBACK
660void wb_domain_exit(struct wb_domain *dom)
661{
662 del_timer_sync(&dom->period_timer);
663 fprop_global_destroy(&dom->completions);
664}
665#endif
666
667
668
669
670
671
672static unsigned int bdi_min_ratio;
673
674int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
675{
676 int ret = 0;
677
678 spin_lock_bh(&bdi_lock);
679 if (min_ratio > bdi->max_ratio) {
680 ret = -EINVAL;
681 } else {
682 min_ratio -= bdi->min_ratio;
683 if (bdi_min_ratio + min_ratio < 100) {
684 bdi_min_ratio += min_ratio;
685 bdi->min_ratio += min_ratio;
686 } else {
687 ret = -EINVAL;
688 }
689 }
690 spin_unlock_bh(&bdi_lock);
691
692 return ret;
693}
694
695int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
696{
697 int ret = 0;
698
699 if (max_ratio > 100)
700 return -EINVAL;
701
702 spin_lock_bh(&bdi_lock);
703 if (bdi->min_ratio > max_ratio) {
704 ret = -EINVAL;
705 } else {
706 bdi->max_ratio = max_ratio;
707 bdi->max_prop_frac = (FPROP_FRAC_BASE * max_ratio) / 100;
708 }
709 spin_unlock_bh(&bdi_lock);
710
711 return ret;
712}
713EXPORT_SYMBOL(bdi_set_max_ratio);
714
715static unsigned long dirty_freerun_ceiling(unsigned long thresh,
716 unsigned long bg_thresh)
717{
718 return (thresh + bg_thresh) / 2;
719}
720
721static unsigned long hard_dirty_limit(struct wb_domain *dom,
722 unsigned long thresh)
723{
724 return max(thresh, dom->dirty_limit);
725}
726
727
728
729
730
731static void mdtc_calc_avail(struct dirty_throttle_control *mdtc,
732 unsigned long filepages, unsigned long headroom)
733{
734 struct dirty_throttle_control *gdtc = mdtc_gdtc(mdtc);
735 unsigned long clean = filepages - min(filepages, mdtc->dirty);
736 unsigned long global_clean = gdtc->avail - min(gdtc->avail, gdtc->dirty);
737 unsigned long other_clean = global_clean - min(global_clean, clean);
738
739 mdtc->avail = filepages + min(headroom, other_clean);
740}
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc)
764{
765 struct wb_domain *dom = dtc_dom(dtc);
766 unsigned long thresh = dtc->thresh;
767 u64 wb_thresh;
768 long numerator, denominator;
769 unsigned long wb_min_ratio, wb_max_ratio;
770
771
772
773
774 fprop_fraction_percpu(&dom->completions, dtc->wb_completions,
775 &numerator, &denominator);
776
777 wb_thresh = (thresh * (100 - bdi_min_ratio)) / 100;
778 wb_thresh *= numerator;
779 do_div(wb_thresh, denominator);
780
781 wb_min_max_ratio(dtc->wb, &wb_min_ratio, &wb_max_ratio);
782
783 wb_thresh += (thresh * wb_min_ratio) / 100;
784 if (wb_thresh > (thresh * wb_max_ratio) / 100)
785 wb_thresh = thresh * wb_max_ratio / 100;
786
787 return wb_thresh;
788}
789
790unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh)
791{
792 struct dirty_throttle_control gdtc = { GDTC_INIT(wb),
793 .thresh = thresh };
794 return __wb_calc_thresh(&gdtc);
795}
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811static long long pos_ratio_polynom(unsigned long setpoint,
812 unsigned long dirty,
813 unsigned long limit)
814{
815 long long pos_ratio;
816 long x;
817
818 x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
819 (limit - setpoint) | 1);
820 pos_ratio = x;
821 pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
822 pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
823 pos_ratio += 1 << RATELIMIT_CALC_SHIFT;
824
825 return clamp(pos_ratio, 0LL, 2LL << RATELIMIT_CALC_SHIFT);
826}
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903static void wb_position_ratio(struct dirty_throttle_control *dtc)
904{
905 struct bdi_writeback *wb = dtc->wb;
906 unsigned long write_bw = wb->avg_write_bandwidth;
907 unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh);
908 unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
909 unsigned long wb_thresh = dtc->wb_thresh;
910 unsigned long x_intercept;
911 unsigned long setpoint;
912 unsigned long wb_setpoint;
913 unsigned long span;
914 long long pos_ratio;
915 long x;
916
917 dtc->pos_ratio = 0;
918
919 if (unlikely(dtc->dirty >= limit))
920 return;
921
922
923
924
925
926
927 setpoint = (freerun + limit) / 2;
928 pos_ratio = pos_ratio_polynom(setpoint, dtc->dirty, limit);
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955 if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
956 long long wb_pos_ratio;
957
958 if (dtc->wb_dirty < 8) {
959 dtc->pos_ratio = min_t(long long, pos_ratio * 2,
960 2 << RATELIMIT_CALC_SHIFT);
961 return;
962 }
963
964 if (dtc->wb_dirty >= wb_thresh)
965 return;
966
967 wb_setpoint = dirty_freerun_ceiling(wb_thresh,
968 dtc->wb_bg_thresh);
969
970 if (wb_setpoint == 0 || wb_setpoint == wb_thresh)
971 return;
972
973 wb_pos_ratio = pos_ratio_polynom(wb_setpoint, dtc->wb_dirty,
974 wb_thresh);
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997 dtc->pos_ratio = min(pos_ratio, wb_pos_ratio);
998 return;
999 }
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032 if (unlikely(wb_thresh > dtc->thresh))
1033 wb_thresh = dtc->thresh;
1034
1035
1036
1037
1038
1039
1040
1041 wb_thresh = max(wb_thresh, (limit - dtc->dirty) / 8);
1042
1043
1044
1045
1046 x = div_u64((u64)wb_thresh << 16, dtc->thresh | 1);
1047 wb_setpoint = setpoint * (u64)x >> 16;
1048
1049
1050
1051
1052
1053
1054
1055
1056 span = (dtc->thresh - wb_thresh + 8 * write_bw) * (u64)x >> 16;
1057 x_intercept = wb_setpoint + span;
1058
1059 if (dtc->wb_dirty < x_intercept - span / 4) {
1060 pos_ratio = div64_u64(pos_ratio * (x_intercept - dtc->wb_dirty),
1061 (x_intercept - wb_setpoint) | 1);
1062 } else
1063 pos_ratio /= 4;
1064
1065
1066
1067
1068
1069
1070 x_intercept = wb_thresh / 2;
1071 if (dtc->wb_dirty < x_intercept) {
1072 if (dtc->wb_dirty > x_intercept / 8)
1073 pos_ratio = div_u64(pos_ratio * x_intercept,
1074 dtc->wb_dirty);
1075 else
1076 pos_ratio *= 8;
1077 }
1078
1079 dtc->pos_ratio = pos_ratio;
1080}
1081
1082static void wb_update_write_bandwidth(struct bdi_writeback *wb,
1083 unsigned long elapsed,
1084 unsigned long written)
1085{
1086 const unsigned long period = roundup_pow_of_two(3 * HZ);
1087 unsigned long avg = wb->avg_write_bandwidth;
1088 unsigned long old = wb->write_bandwidth;
1089 u64 bw;
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101 bw = written - min(written, wb->written_stamp);
1102 bw *= HZ;
1103 if (unlikely(elapsed > period)) {
1104 do_div(bw, elapsed);
1105 avg = bw;
1106 goto out;
1107 }
1108 bw += (u64)wb->write_bandwidth * (period - elapsed);
1109 bw >>= ilog2(period);
1110
1111
1112
1113
1114 if (avg > old && old >= (unsigned long)bw)
1115 avg -= (avg - old) >> 3;
1116
1117 if (avg < old && old <= (unsigned long)bw)
1118 avg += (old - avg) >> 3;
1119
1120out:
1121
1122 avg = max(avg, 1LU);
1123 if (wb_has_dirty_io(wb)) {
1124 long delta = avg - wb->avg_write_bandwidth;
1125 WARN_ON_ONCE(atomic_long_add_return(delta,
1126 &wb->bdi->tot_write_bandwidth) <= 0);
1127 }
1128 wb->write_bandwidth = bw;
1129 wb->avg_write_bandwidth = avg;
1130}
1131
1132static void update_dirty_limit(struct dirty_throttle_control *dtc)
1133{
1134 struct wb_domain *dom = dtc_dom(dtc);
1135 unsigned long thresh = dtc->thresh;
1136 unsigned long limit = dom->dirty_limit;
1137
1138
1139
1140
1141 if (limit < thresh) {
1142 limit = thresh;
1143 goto update;
1144 }
1145
1146
1147
1148
1149
1150
1151 thresh = max(thresh, dtc->dirty);
1152 if (limit > thresh) {
1153 limit -= (limit - thresh) >> 5;
1154 goto update;
1155 }
1156 return;
1157update:
1158 dom->dirty_limit = limit;
1159}
1160
1161static void domain_update_bandwidth(struct dirty_throttle_control *dtc,
1162 unsigned long now)
1163{
1164 struct wb_domain *dom = dtc_dom(dtc);
1165
1166
1167
1168
1169 if (time_before(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL))
1170 return;
1171
1172 spin_lock(&dom->lock);
1173 if (time_after_eq(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL)) {
1174 update_dirty_limit(dtc);
1175 dom->dirty_limit_tstamp = now;
1176 }
1177 spin_unlock(&dom->lock);
1178}
1179
1180
1181
1182
1183
1184
1185
1186static void wb_update_dirty_ratelimit(struct dirty_throttle_control *dtc,
1187 unsigned long dirtied,
1188 unsigned long elapsed)
1189{
1190 struct bdi_writeback *wb = dtc->wb;
1191 unsigned long dirty = dtc->dirty;
1192 unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh);
1193 unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
1194 unsigned long setpoint = (freerun + limit) / 2;
1195 unsigned long write_bw = wb->avg_write_bandwidth;
1196 unsigned long dirty_ratelimit = wb->dirty_ratelimit;
1197 unsigned long dirty_rate;
1198 unsigned long task_ratelimit;
1199 unsigned long balanced_dirty_ratelimit;
1200 unsigned long step;
1201 unsigned long x;
1202 unsigned long shift;
1203
1204
1205
1206
1207
1208 dirty_rate = (dirtied - wb->dirtied_stamp) * HZ / elapsed;
1209
1210
1211
1212
1213 task_ratelimit = (u64)dirty_ratelimit *
1214 dtc->pos_ratio >> RATELIMIT_CALC_SHIFT;
1215 task_ratelimit++;
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247 balanced_dirty_ratelimit = div_u64((u64)task_ratelimit * write_bw,
1248 dirty_rate | 1);
1249
1250
1251
1252 if (unlikely(balanced_dirty_ratelimit > write_bw))
1253 balanced_dirty_ratelimit = write_bw;
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289 step = 0;
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302 if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
1303 dirty = dtc->wb_dirty;
1304 if (dtc->wb_dirty < 8)
1305 setpoint = dtc->wb_dirty + 1;
1306 else
1307 setpoint = (dtc->wb_thresh + dtc->wb_bg_thresh) / 2;
1308 }
1309
1310 if (dirty < setpoint) {
1311 x = min3(wb->balanced_dirty_ratelimit,
1312 balanced_dirty_ratelimit, task_ratelimit);
1313 if (dirty_ratelimit < x)
1314 step = x - dirty_ratelimit;
1315 } else {
1316 x = max3(wb->balanced_dirty_ratelimit,
1317 balanced_dirty_ratelimit, task_ratelimit);
1318 if (dirty_ratelimit > x)
1319 step = dirty_ratelimit - x;
1320 }
1321
1322
1323
1324
1325
1326
1327 shift = dirty_ratelimit / (2 * step + 1);
1328 if (shift < BITS_PER_LONG)
1329 step = DIV_ROUND_UP(step >> shift, 8);
1330 else
1331 step = 0;
1332
1333 if (dirty_ratelimit < balanced_dirty_ratelimit)
1334 dirty_ratelimit += step;
1335 else
1336 dirty_ratelimit -= step;
1337
1338 wb->dirty_ratelimit = max(dirty_ratelimit, 1UL);
1339 wb->balanced_dirty_ratelimit = balanced_dirty_ratelimit;
1340
1341 trace_bdi_dirty_ratelimit(wb, dirty_rate, task_ratelimit);
1342}
1343
1344static void __wb_update_bandwidth(struct dirty_throttle_control *gdtc,
1345 struct dirty_throttle_control *mdtc,
1346 unsigned long start_time,
1347 bool update_ratelimit)
1348{
1349 struct bdi_writeback *wb = gdtc->wb;
1350 unsigned long now = jiffies;
1351 unsigned long elapsed = now - wb->bw_time_stamp;
1352 unsigned long dirtied;
1353 unsigned long written;
1354
1355 lockdep_assert_held(&wb->list_lock);
1356
1357
1358
1359
1360 if (elapsed < BANDWIDTH_INTERVAL)
1361 return;
1362
1363 dirtied = percpu_counter_read(&wb->stat[WB_DIRTIED]);
1364 written = percpu_counter_read(&wb->stat[WB_WRITTEN]);
1365
1366
1367
1368
1369
1370 if (elapsed > HZ && time_before(wb->bw_time_stamp, start_time))
1371 goto snapshot;
1372
1373 if (update_ratelimit) {
1374 domain_update_bandwidth(gdtc, now);
1375 wb_update_dirty_ratelimit(gdtc, dirtied, elapsed);
1376
1377
1378
1379
1380
1381 if (IS_ENABLED(CONFIG_CGROUP_WRITEBACK) && mdtc) {
1382 domain_update_bandwidth(mdtc, now);
1383 wb_update_dirty_ratelimit(mdtc, dirtied, elapsed);
1384 }
1385 }
1386 wb_update_write_bandwidth(wb, elapsed, written);
1387
1388snapshot:
1389 wb->dirtied_stamp = dirtied;
1390 wb->written_stamp = written;
1391 wb->bw_time_stamp = now;
1392}
1393
1394void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time)
1395{
1396 struct dirty_throttle_control gdtc = { GDTC_INIT(wb) };
1397
1398 __wb_update_bandwidth(&gdtc, NULL, start_time, false);
1399}
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409static unsigned long dirty_poll_interval(unsigned long dirty,
1410 unsigned long thresh)
1411{
1412 if (thresh > dirty)
1413 return 1UL << (ilog2(thresh - dirty) >> 1);
1414
1415 return 1;
1416}
1417
1418static unsigned long wb_max_pause(struct bdi_writeback *wb,
1419 unsigned long wb_dirty)
1420{
1421 unsigned long bw = wb->avg_write_bandwidth;
1422 unsigned long t;
1423
1424
1425
1426
1427
1428
1429
1430
1431 t = wb_dirty / (1 + bw / roundup_pow_of_two(1 + HZ / 8));
1432 t++;
1433
1434 return min_t(unsigned long, t, MAX_PAUSE);
1435}
1436
1437static long wb_min_pause(struct bdi_writeback *wb,
1438 long max_pause,
1439 unsigned long task_ratelimit,
1440 unsigned long dirty_ratelimit,
1441 int *nr_dirtied_pause)
1442{
1443 long hi = ilog2(wb->avg_write_bandwidth);
1444 long lo = ilog2(wb->dirty_ratelimit);
1445 long t;
1446 long pause;
1447 int pages;
1448
1449
1450 t = max(1, HZ / 100);
1451
1452
1453
1454
1455
1456
1457
1458 if (hi > lo)
1459 t += (hi - lo) * (10 * HZ) / 1024;
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479 t = min(t, 1 + max_pause / 2);
1480 pages = dirty_ratelimit * t / roundup_pow_of_two(HZ);
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490 if (pages < DIRTY_POLL_THRESH) {
1491 t = max_pause;
1492 pages = dirty_ratelimit * t / roundup_pow_of_two(HZ);
1493 if (pages > DIRTY_POLL_THRESH) {
1494 pages = DIRTY_POLL_THRESH;
1495 t = HZ * DIRTY_POLL_THRESH / dirty_ratelimit;
1496 }
1497 }
1498
1499 pause = HZ * pages / (task_ratelimit + 1);
1500 if (pause > max_pause) {
1501 t = max_pause;
1502 pages = task_ratelimit * t / roundup_pow_of_two(HZ);
1503 }
1504
1505 *nr_dirtied_pause = pages;
1506
1507
1508
1509 return pages >= DIRTY_POLL_THRESH ? 1 + t / 2 : t;
1510}
1511
1512static inline void wb_dirty_limits(struct dirty_throttle_control *dtc)
1513{
1514 struct bdi_writeback *wb = dtc->wb;
1515 unsigned long wb_reclaimable;
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530 dtc->wb_thresh = __wb_calc_thresh(dtc);
1531 dtc->wb_bg_thresh = dtc->thresh ?
1532 div_u64((u64)dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0;
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544 if (dtc->wb_thresh < 2 * wb_stat_error()) {
1545 wb_reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE);
1546 dtc->wb_dirty = wb_reclaimable + wb_stat_sum(wb, WB_WRITEBACK);
1547 } else {
1548 wb_reclaimable = wb_stat(wb, WB_RECLAIMABLE);
1549 dtc->wb_dirty = wb_reclaimable + wb_stat(wb, WB_WRITEBACK);
1550 }
1551}
1552
1553
1554
1555
1556
1557
1558
1559
1560static void balance_dirty_pages(struct bdi_writeback *wb,
1561 unsigned long pages_dirtied)
1562{
1563 struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) };
1564 struct dirty_throttle_control mdtc_stor = { MDTC_INIT(wb, &gdtc_stor) };
1565 struct dirty_throttle_control * const gdtc = &gdtc_stor;
1566 struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
1567 &mdtc_stor : NULL;
1568 struct dirty_throttle_control *sdtc;
1569 unsigned long nr_reclaimable;
1570 long period;
1571 long pause;
1572 long max_pause;
1573 long min_pause;
1574 int nr_dirtied_pause;
1575 bool dirty_exceeded = false;
1576 unsigned long task_ratelimit;
1577 unsigned long dirty_ratelimit;
1578 struct backing_dev_info *bdi = wb->bdi;
1579 bool strictlimit = bdi->capabilities & BDI_CAP_STRICTLIMIT;
1580 unsigned long start_time = jiffies;
1581
1582 for (;;) {
1583 unsigned long now = jiffies;
1584 unsigned long dirty, thresh, bg_thresh;
1585 unsigned long m_dirty = 0;
1586 unsigned long m_thresh = 0;
1587 unsigned long m_bg_thresh = 0;
1588
1589
1590
1591
1592
1593
1594
1595 nr_reclaimable = global_node_page_state(NR_FILE_DIRTY) +
1596 global_node_page_state(NR_UNSTABLE_NFS);
1597 gdtc->avail = global_dirtyable_memory();
1598 gdtc->dirty = nr_reclaimable + global_node_page_state(NR_WRITEBACK);
1599
1600 domain_dirty_limits(gdtc);
1601
1602 if (unlikely(strictlimit)) {
1603 wb_dirty_limits(gdtc);
1604
1605 dirty = gdtc->wb_dirty;
1606 thresh = gdtc->wb_thresh;
1607 bg_thresh = gdtc->wb_bg_thresh;
1608 } else {
1609 dirty = gdtc->dirty;
1610 thresh = gdtc->thresh;
1611 bg_thresh = gdtc->bg_thresh;
1612 }
1613
1614 if (mdtc) {
1615 unsigned long filepages, headroom, writeback;
1616
1617
1618
1619
1620
1621 mem_cgroup_wb_stats(wb, &filepages, &headroom,
1622 &mdtc->dirty, &writeback);
1623 mdtc->dirty += writeback;
1624 mdtc_calc_avail(mdtc, filepages, headroom);
1625
1626 domain_dirty_limits(mdtc);
1627
1628 if (unlikely(strictlimit)) {
1629 wb_dirty_limits(mdtc);
1630 m_dirty = mdtc->wb_dirty;
1631 m_thresh = mdtc->wb_thresh;
1632 m_bg_thresh = mdtc->wb_bg_thresh;
1633 } else {
1634 m_dirty = mdtc->dirty;
1635 m_thresh = mdtc->thresh;
1636 m_bg_thresh = mdtc->bg_thresh;
1637 }
1638 }
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652 if (dirty <= dirty_freerun_ceiling(thresh, bg_thresh) &&
1653 (!mdtc ||
1654 m_dirty <= dirty_freerun_ceiling(m_thresh, m_bg_thresh))) {
1655 unsigned long intv = dirty_poll_interval(dirty, thresh);
1656 unsigned long m_intv = ULONG_MAX;
1657
1658 current->dirty_paused_when = now;
1659 current->nr_dirtied = 0;
1660 if (mdtc)
1661 m_intv = dirty_poll_interval(m_dirty, m_thresh);
1662 current->nr_dirtied_pause = min(intv, m_intv);
1663 break;
1664 }
1665
1666 if (unlikely(!writeback_in_progress(wb)))
1667 wb_start_background_writeback(wb);
1668
1669
1670
1671
1672
1673 if (!strictlimit)
1674 wb_dirty_limits(gdtc);
1675
1676 dirty_exceeded = (gdtc->wb_dirty > gdtc->wb_thresh) &&
1677 ((gdtc->dirty > gdtc->thresh) || strictlimit);
1678
1679 wb_position_ratio(gdtc);
1680 sdtc = gdtc;
1681
1682 if (mdtc) {
1683
1684
1685
1686
1687
1688
1689 if (!strictlimit)
1690 wb_dirty_limits(mdtc);
1691
1692 dirty_exceeded |= (mdtc->wb_dirty > mdtc->wb_thresh) &&
1693 ((mdtc->dirty > mdtc->thresh) || strictlimit);
1694
1695 wb_position_ratio(mdtc);
1696 if (mdtc->pos_ratio < gdtc->pos_ratio)
1697 sdtc = mdtc;
1698 }
1699
1700 if (dirty_exceeded && !wb->dirty_exceeded)
1701 wb->dirty_exceeded = 1;
1702
1703 if (time_is_before_jiffies(wb->bw_time_stamp +
1704 BANDWIDTH_INTERVAL)) {
1705 spin_lock(&wb->list_lock);
1706 __wb_update_bandwidth(gdtc, mdtc, start_time, true);
1707 spin_unlock(&wb->list_lock);
1708 }
1709
1710
1711 dirty_ratelimit = wb->dirty_ratelimit;
1712 task_ratelimit = ((u64)dirty_ratelimit * sdtc->pos_ratio) >>
1713 RATELIMIT_CALC_SHIFT;
1714 max_pause = wb_max_pause(wb, sdtc->wb_dirty);
1715 min_pause = wb_min_pause(wb, max_pause,
1716 task_ratelimit, dirty_ratelimit,
1717 &nr_dirtied_pause);
1718
1719 if (unlikely(task_ratelimit == 0)) {
1720 period = max_pause;
1721 pause = max_pause;
1722 goto pause;
1723 }
1724 period = HZ * pages_dirtied / task_ratelimit;
1725 pause = period;
1726 if (current->dirty_paused_when)
1727 pause -= now - current->dirty_paused_when;
1728
1729
1730
1731
1732
1733
1734
1735 if (pause < min_pause) {
1736 trace_balance_dirty_pages(wb,
1737 sdtc->thresh,
1738 sdtc->bg_thresh,
1739 sdtc->dirty,
1740 sdtc->wb_thresh,
1741 sdtc->wb_dirty,
1742 dirty_ratelimit,
1743 task_ratelimit,
1744 pages_dirtied,
1745 period,
1746 min(pause, 0L),
1747 start_time);
1748 if (pause < -HZ) {
1749 current->dirty_paused_when = now;
1750 current->nr_dirtied = 0;
1751 } else if (period) {
1752 current->dirty_paused_when += period;
1753 current->nr_dirtied = 0;
1754 } else if (current->nr_dirtied_pause <= pages_dirtied)
1755 current->nr_dirtied_pause += pages_dirtied;
1756 break;
1757 }
1758 if (unlikely(pause > max_pause)) {
1759
1760 now += min(pause - max_pause, max_pause);
1761 pause = max_pause;
1762 }
1763
1764pause:
1765 trace_balance_dirty_pages(wb,
1766 sdtc->thresh,
1767 sdtc->bg_thresh,
1768 sdtc->dirty,
1769 sdtc->wb_thresh,
1770 sdtc->wb_dirty,
1771 dirty_ratelimit,
1772 task_ratelimit,
1773 pages_dirtied,
1774 period,
1775 pause,
1776 start_time);
1777 __set_current_state(TASK_KILLABLE);
1778 wb->dirty_sleep = now;
1779 io_schedule_timeout(pause);
1780
1781 current->dirty_paused_when = now + pause;
1782 current->nr_dirtied = 0;
1783 current->nr_dirtied_pause = nr_dirtied_pause;
1784
1785
1786
1787
1788
1789 if (task_ratelimit)
1790 break;
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802 if (sdtc->wb_dirty <= wb_stat_error())
1803 break;
1804
1805 if (fatal_signal_pending(current))
1806 break;
1807 }
1808
1809 if (!dirty_exceeded && wb->dirty_exceeded)
1810 wb->dirty_exceeded = 0;
1811
1812 if (writeback_in_progress(wb))
1813 return;
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823 if (laptop_mode)
1824 return;
1825
1826 if (nr_reclaimable > gdtc->bg_thresh)
1827 wb_start_background_writeback(wb);
1828}
1829
1830static DEFINE_PER_CPU(int, bdp_ratelimits);
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846DEFINE_PER_CPU(int, dirty_throttle_leaks) = 0;
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861void balance_dirty_pages_ratelimited(struct address_space *mapping)
1862{
1863 struct inode *inode = mapping->host;
1864 struct backing_dev_info *bdi = inode_to_bdi(inode);
1865 struct bdi_writeback *wb = NULL;
1866 int ratelimit;
1867 int *p;
1868
1869 if (!bdi_cap_account_dirty(bdi))
1870 return;
1871
1872 if (inode_cgwb_enabled(inode))
1873 wb = wb_get_create_current(bdi, GFP_KERNEL);
1874 if (!wb)
1875 wb = &bdi->wb;
1876
1877 ratelimit = current->nr_dirtied_pause;
1878 if (wb->dirty_exceeded)
1879 ratelimit = min(ratelimit, 32 >> (PAGE_SHIFT - 10));
1880
1881 preempt_disable();
1882
1883
1884
1885
1886
1887
1888 p = this_cpu_ptr(&bdp_ratelimits);
1889 if (unlikely(current->nr_dirtied >= ratelimit))
1890 *p = 0;
1891 else if (unlikely(*p >= ratelimit_pages)) {
1892 *p = 0;
1893 ratelimit = 0;
1894 }
1895
1896
1897
1898
1899
1900 p = this_cpu_ptr(&dirty_throttle_leaks);
1901 if (*p > 0 && current->nr_dirtied < ratelimit) {
1902 unsigned long nr_pages_dirtied;
1903 nr_pages_dirtied = min(*p, ratelimit - current->nr_dirtied);
1904 *p -= nr_pages_dirtied;
1905 current->nr_dirtied += nr_pages_dirtied;
1906 }
1907 preempt_enable();
1908
1909 if (unlikely(current->nr_dirtied >= ratelimit))
1910 balance_dirty_pages(wb, current->nr_dirtied);
1911
1912 wb_put(wb);
1913}
1914EXPORT_SYMBOL(balance_dirty_pages_ratelimited);
1915
1916
1917
1918
1919
1920
1921
1922
1923bool wb_over_bg_thresh(struct bdi_writeback *wb)
1924{
1925 struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) };
1926 struct dirty_throttle_control mdtc_stor = { MDTC_INIT(wb, &gdtc_stor) };
1927 struct dirty_throttle_control * const gdtc = &gdtc_stor;
1928 struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
1929 &mdtc_stor : NULL;
1930
1931
1932
1933
1934
1935 gdtc->avail = global_dirtyable_memory();
1936 gdtc->dirty = global_node_page_state(NR_FILE_DIRTY) +
1937 global_node_page_state(NR_UNSTABLE_NFS);
1938 domain_dirty_limits(gdtc);
1939
1940 if (gdtc->dirty > gdtc->bg_thresh)
1941 return true;
1942
1943 if (wb_stat(wb, WB_RECLAIMABLE) >
1944 wb_calc_thresh(gdtc->wb, gdtc->bg_thresh))
1945 return true;
1946
1947 if (mdtc) {
1948 unsigned long filepages, headroom, writeback;
1949
1950 mem_cgroup_wb_stats(wb, &filepages, &headroom, &mdtc->dirty,
1951 &writeback);
1952 mdtc_calc_avail(mdtc, filepages, headroom);
1953 domain_dirty_limits(mdtc);
1954
1955 if (mdtc->dirty > mdtc->bg_thresh)
1956 return true;
1957
1958 if (wb_stat(wb, WB_RECLAIMABLE) >
1959 wb_calc_thresh(mdtc->wb, mdtc->bg_thresh))
1960 return true;
1961 }
1962
1963 return false;
1964}
1965
1966
1967
1968
1969int dirty_writeback_centisecs_handler(struct ctl_table *table, int write,
1970 void __user *buffer, size_t *length, loff_t *ppos)
1971{
1972 unsigned int old_interval = dirty_writeback_interval;
1973 int ret;
1974
1975 ret = proc_dointvec(table, write, buffer, length, ppos);
1976
1977
1978
1979
1980
1981
1982
1983
1984 if (!ret && write && dirty_writeback_interval &&
1985 dirty_writeback_interval != old_interval)
1986 wakeup_flusher_threads(WB_REASON_PERIODIC);
1987
1988 return ret;
1989}
1990
1991#ifdef CONFIG_BLOCK
1992void laptop_mode_timer_fn(struct timer_list *t)
1993{
1994 struct backing_dev_info *backing_dev_info =
1995 from_timer(backing_dev_info, t, laptop_mode_wb_timer);
1996
1997 wakeup_flusher_threads_bdi(backing_dev_info, WB_REASON_LAPTOP_TIMER);
1998}
1999
2000
2001
2002
2003
2004
2005void laptop_io_completion(struct backing_dev_info *info)
2006{
2007 mod_timer(&info->laptop_mode_wb_timer, jiffies + laptop_mode);
2008}
2009
2010
2011
2012
2013
2014
2015void laptop_sync_completion(void)
2016{
2017 struct backing_dev_info *bdi;
2018
2019 rcu_read_lock();
2020
2021 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list)
2022 del_timer(&bdi->laptop_mode_wb_timer);
2023
2024 rcu_read_unlock();
2025}
2026#endif
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039void writeback_set_ratelimit(void)
2040{
2041 struct wb_domain *dom = &global_wb_domain;
2042 unsigned long background_thresh;
2043 unsigned long dirty_thresh;
2044
2045 global_dirty_limits(&background_thresh, &dirty_thresh);
2046 dom->dirty_limit = dirty_thresh;
2047 ratelimit_pages = dirty_thresh / (num_online_cpus() * 32);
2048 if (ratelimit_pages < 16)
2049 ratelimit_pages = 16;
2050}
2051
2052static int page_writeback_cpu_online(unsigned int cpu)
2053{
2054 writeback_set_ratelimit();
2055 return 0;
2056}
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076void __init page_writeback_init(void)
2077{
2078 BUG_ON(wb_domain_init(&global_wb_domain, GFP_KERNEL));
2079
2080 cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mm/writeback:online",
2081 page_writeback_cpu_online, NULL);
2082 cpuhp_setup_state(CPUHP_MM_WRITEBACK_DEAD, "mm/writeback:dead", NULL,
2083 page_writeback_cpu_online);
2084}
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104void tag_pages_for_writeback(struct address_space *mapping,
2105 pgoff_t start, pgoff_t end)
2106{
2107#define WRITEBACK_TAG_BATCH 4096
2108 unsigned long tagged = 0;
2109 struct radix_tree_iter iter;
2110 void **slot;
2111
2112 xa_lock_irq(&mapping->i_pages);
2113 radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, start,
2114 PAGECACHE_TAG_DIRTY) {
2115 if (iter.index > end)
2116 break;
2117 radix_tree_iter_tag_set(&mapping->i_pages, &iter,
2118 PAGECACHE_TAG_TOWRITE);
2119 tagged++;
2120 if ((tagged % WRITEBACK_TAG_BATCH) != 0)
2121 continue;
2122 slot = radix_tree_iter_resume(slot, &iter);
2123 xa_unlock_irq(&mapping->i_pages);
2124 cond_resched();
2125 xa_lock_irq(&mapping->i_pages);
2126 }
2127 xa_unlock_irq(&mapping->i_pages);
2128}
2129EXPORT_SYMBOL(tag_pages_for_writeback);
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153int write_cache_pages(struct address_space *mapping,
2154 struct writeback_control *wbc, writepage_t writepage,
2155 void *data)
2156{
2157 int ret = 0;
2158 int done = 0;
2159 struct pagevec pvec;
2160 int nr_pages;
2161 pgoff_t uninitialized_var(writeback_index);
2162 pgoff_t index;
2163 pgoff_t end;
2164 pgoff_t done_index;
2165 int cycled;
2166 int range_whole = 0;
2167 int tag;
2168
2169 pagevec_init(&pvec);
2170 if (wbc->range_cyclic) {
2171 writeback_index = mapping->writeback_index;
2172 index = writeback_index;
2173 if (index == 0)
2174 cycled = 1;
2175 else
2176 cycled = 0;
2177 end = -1;
2178 } else {
2179 index = wbc->range_start >> PAGE_SHIFT;
2180 end = wbc->range_end >> PAGE_SHIFT;
2181 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2182 range_whole = 1;
2183 cycled = 1;
2184 }
2185 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2186 tag = PAGECACHE_TAG_TOWRITE;
2187 else
2188 tag = PAGECACHE_TAG_DIRTY;
2189retry:
2190 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2191 tag_pages_for_writeback(mapping, index, end);
2192 done_index = index;
2193 while (!done && (index <= end)) {
2194 int i;
2195
2196 nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
2197 tag);
2198 if (nr_pages == 0)
2199 break;
2200
2201 for (i = 0; i < nr_pages; i++) {
2202 struct page *page = pvec.pages[i];
2203
2204 done_index = page->index;
2205
2206 lock_page(page);
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216 if (unlikely(page->mapping != mapping)) {
2217continue_unlock:
2218 unlock_page(page);
2219 continue;
2220 }
2221
2222 if (!PageDirty(page)) {
2223
2224 goto continue_unlock;
2225 }
2226
2227 if (PageWriteback(page)) {
2228 if (wbc->sync_mode != WB_SYNC_NONE)
2229 wait_on_page_writeback(page);
2230 else
2231 goto continue_unlock;
2232 }
2233
2234 BUG_ON(PageWriteback(page));
2235 if (!clear_page_dirty_for_io(page))
2236 goto continue_unlock;
2237
2238 trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
2239 ret = (*writepage)(page, wbc, data);
2240 if (unlikely(ret)) {
2241 if (ret == AOP_WRITEPAGE_ACTIVATE) {
2242 unlock_page(page);
2243 ret = 0;
2244 } else {
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254 done_index = page->index + 1;
2255 done = 1;
2256 break;
2257 }
2258 }
2259
2260
2261
2262
2263
2264
2265
2266 if (--wbc->nr_to_write <= 0 &&
2267 wbc->sync_mode == WB_SYNC_NONE) {
2268 done = 1;
2269 break;
2270 }
2271 }
2272 pagevec_release(&pvec);
2273 cond_resched();
2274 }
2275 if (!cycled && !done) {
2276
2277
2278
2279
2280
2281 cycled = 1;
2282 index = 0;
2283 end = writeback_index - 1;
2284 goto retry;
2285 }
2286 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2287 mapping->writeback_index = done_index;
2288
2289 return ret;
2290}
2291EXPORT_SYMBOL(write_cache_pages);
2292
2293
2294
2295
2296
2297static int __writepage(struct page *page, struct writeback_control *wbc,
2298 void *data)
2299{
2300 struct address_space *mapping = data;
2301 int ret = mapping->a_ops->writepage(page, wbc);
2302 mapping_set_error(mapping, ret);
2303 return ret;
2304}
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314int generic_writepages(struct address_space *mapping,
2315 struct writeback_control *wbc)
2316{
2317 struct blk_plug plug;
2318 int ret;
2319
2320
2321 if (!mapping->a_ops->writepage)
2322 return 0;
2323
2324 blk_start_plug(&plug);
2325 ret = write_cache_pages(mapping, wbc, __writepage, mapping);
2326 blk_finish_plug(&plug);
2327 return ret;
2328}
2329
2330EXPORT_SYMBOL(generic_writepages);
2331
2332int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
2333{
2334 int ret;
2335
2336 if (wbc->nr_to_write <= 0)
2337 return 0;
2338 while (1) {
2339 if (mapping->a_ops->writepages)
2340 ret = mapping->a_ops->writepages(mapping, wbc);
2341 else
2342 ret = generic_writepages(mapping, wbc);
2343 if ((ret != -ENOMEM) || (wbc->sync_mode != WB_SYNC_ALL))
2344 break;
2345 cond_resched();
2346 congestion_wait(BLK_RW_ASYNC, HZ/50);
2347 }
2348 return ret;
2349}
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360int write_one_page(struct page *page)
2361{
2362 struct address_space *mapping = page->mapping;
2363 int ret = 0;
2364 struct writeback_control wbc = {
2365 .sync_mode = WB_SYNC_ALL,
2366 .nr_to_write = 1,
2367 };
2368
2369 BUG_ON(!PageLocked(page));
2370
2371 wait_on_page_writeback(page);
2372
2373 if (clear_page_dirty_for_io(page)) {
2374 get_page(page);
2375 ret = mapping->a_ops->writepage(page, &wbc);
2376 if (ret == 0)
2377 wait_on_page_writeback(page);
2378 put_page(page);
2379 } else {
2380 unlock_page(page);
2381 }
2382
2383 if (!ret)
2384 ret = filemap_check_errors(mapping);
2385 return ret;
2386}
2387EXPORT_SYMBOL(write_one_page);
2388
2389
2390
2391
2392int __set_page_dirty_no_writeback(struct page *page)
2393{
2394 if (!PageDirty(page))
2395 return !TestSetPageDirty(page);
2396 return 0;
2397}
2398
2399
2400
2401
2402
2403
2404
2405
2406void account_page_dirtied(struct page *page, struct address_space *mapping)
2407{
2408 struct inode *inode = mapping->host;
2409
2410 trace_writeback_dirty_page(page, mapping);
2411
2412 if (mapping_cap_account_dirty(mapping)) {
2413 struct bdi_writeback *wb;
2414
2415 inode_attach_wb(inode, page);
2416 wb = inode_to_wb(inode);
2417
2418 __inc_lruvec_page_state(page, NR_FILE_DIRTY);
2419 __inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2420 __inc_node_page_state(page, NR_DIRTIED);
2421 inc_wb_stat(wb, WB_RECLAIMABLE);
2422 inc_wb_stat(wb, WB_DIRTIED);
2423 task_io_account_write(PAGE_SIZE);
2424 current->nr_dirtied++;
2425 this_cpu_inc(bdp_ratelimits);
2426 }
2427}
2428EXPORT_SYMBOL(account_page_dirtied);
2429
2430
2431
2432
2433
2434
2435void account_page_cleaned(struct page *page, struct address_space *mapping,
2436 struct bdi_writeback *wb)
2437{
2438 if (mapping_cap_account_dirty(mapping)) {
2439 dec_lruvec_page_state(page, NR_FILE_DIRTY);
2440 dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2441 dec_wb_stat(wb, WB_RECLAIMABLE);
2442 task_io_account_cancelled_write(PAGE_SIZE);
2443 }
2444}
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458int __set_page_dirty_nobuffers(struct page *page)
2459{
2460 lock_page_memcg(page);
2461 if (!TestSetPageDirty(page)) {
2462 struct address_space *mapping = page_mapping(page);
2463 unsigned long flags;
2464
2465 if (!mapping) {
2466 unlock_page_memcg(page);
2467 return 1;
2468 }
2469
2470 xa_lock_irqsave(&mapping->i_pages, flags);
2471 BUG_ON(page_mapping(page) != mapping);
2472 WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
2473 account_page_dirtied(page, mapping);
2474 radix_tree_tag_set(&mapping->i_pages, page_index(page),
2475 PAGECACHE_TAG_DIRTY);
2476 xa_unlock_irqrestore(&mapping->i_pages, flags);
2477 unlock_page_memcg(page);
2478
2479 if (mapping->host) {
2480
2481 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
2482 }
2483 return 1;
2484 }
2485 unlock_page_memcg(page);
2486 return 0;
2487}
2488EXPORT_SYMBOL(__set_page_dirty_nobuffers);
2489
2490
2491
2492
2493
2494
2495
2496
2497void account_page_redirty(struct page *page)
2498{
2499 struct address_space *mapping = page->mapping;
2500
2501 if (mapping && mapping_cap_account_dirty(mapping)) {
2502 struct inode *inode = mapping->host;
2503 struct bdi_writeback *wb;
2504 struct wb_lock_cookie cookie = {};
2505
2506 wb = unlocked_inode_to_wb_begin(inode, &cookie);
2507 current->nr_dirtied--;
2508 dec_node_page_state(page, NR_DIRTIED);
2509 dec_wb_stat(wb, WB_DIRTIED);
2510 unlocked_inode_to_wb_end(inode, &cookie);
2511 }
2512}
2513EXPORT_SYMBOL(account_page_redirty);
2514
2515
2516
2517
2518
2519
2520int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page)
2521{
2522 int ret;
2523
2524 wbc->pages_skipped++;
2525 ret = __set_page_dirty_nobuffers(page);
2526 account_page_redirty(page);
2527 return ret;
2528}
2529EXPORT_SYMBOL(redirty_page_for_writepage);
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542int set_page_dirty(struct page *page)
2543{
2544 struct address_space *mapping = page_mapping(page);
2545
2546 page = compound_head(page);
2547 if (likely(mapping)) {
2548 int (*spd)(struct page *) = mapping->a_ops->set_page_dirty;
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559 if (PageReclaim(page))
2560 ClearPageReclaim(page);
2561#ifdef CONFIG_BLOCK
2562 if (!spd)
2563 spd = __set_page_dirty_buffers;
2564#endif
2565 return (*spd)(page);
2566 }
2567 if (!PageDirty(page)) {
2568 if (!TestSetPageDirty(page))
2569 return 1;
2570 }
2571 return 0;
2572}
2573EXPORT_SYMBOL(set_page_dirty);
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585int set_page_dirty_lock(struct page *page)
2586{
2587 int ret;
2588
2589 lock_page(page);
2590 ret = set_page_dirty(page);
2591 unlock_page(page);
2592 return ret;
2593}
2594EXPORT_SYMBOL(set_page_dirty_lock);
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609void __cancel_dirty_page(struct page *page)
2610{
2611 struct address_space *mapping = page_mapping(page);
2612
2613 if (mapping_cap_account_dirty(mapping)) {
2614 struct inode *inode = mapping->host;
2615 struct bdi_writeback *wb;
2616 struct wb_lock_cookie cookie = {};
2617
2618 lock_page_memcg(page);
2619 wb = unlocked_inode_to_wb_begin(inode, &cookie);
2620
2621 if (TestClearPageDirty(page))
2622 account_page_cleaned(page, mapping, wb);
2623
2624 unlocked_inode_to_wb_end(inode, &cookie);
2625 unlock_page_memcg(page);
2626 } else {
2627 ClearPageDirty(page);
2628 }
2629}
2630EXPORT_SYMBOL(__cancel_dirty_page);
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646int clear_page_dirty_for_io(struct page *page)
2647{
2648 struct address_space *mapping = page_mapping(page);
2649 int ret = 0;
2650
2651 BUG_ON(!PageLocked(page));
2652
2653 if (mapping && mapping_cap_account_dirty(mapping)) {
2654 struct inode *inode = mapping->host;
2655 struct bdi_writeback *wb;
2656 struct wb_lock_cookie cookie = {};
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683 if (page_mkclean(page))
2684 set_page_dirty(page);
2685
2686
2687
2688
2689
2690
2691
2692
2693 wb = unlocked_inode_to_wb_begin(inode, &cookie);
2694 if (TestClearPageDirty(page)) {
2695 dec_lruvec_page_state(page, NR_FILE_DIRTY);
2696 dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2697 dec_wb_stat(wb, WB_RECLAIMABLE);
2698 ret = 1;
2699 }
2700 unlocked_inode_to_wb_end(inode, &cookie);
2701 return ret;
2702 }
2703 return TestClearPageDirty(page);
2704}
2705EXPORT_SYMBOL(clear_page_dirty_for_io);
2706
2707int test_clear_page_writeback(struct page *page)
2708{
2709 struct address_space *mapping = page_mapping(page);
2710 struct mem_cgroup *memcg;
2711 struct lruvec *lruvec;
2712 int ret;
2713
2714 memcg = lock_page_memcg(page);
2715 lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
2716 if (mapping && mapping_use_writeback_tags(mapping)) {
2717 struct inode *inode = mapping->host;
2718 struct backing_dev_info *bdi = inode_to_bdi(inode);
2719 unsigned long flags;
2720
2721 xa_lock_irqsave(&mapping->i_pages, flags);
2722 ret = TestClearPageWriteback(page);
2723 if (ret) {
2724 radix_tree_tag_clear(&mapping->i_pages, page_index(page),
2725 PAGECACHE_TAG_WRITEBACK);
2726 if (bdi_cap_account_writeback(bdi)) {
2727 struct bdi_writeback *wb = inode_to_wb(inode);
2728
2729 dec_wb_stat(wb, WB_WRITEBACK);
2730 __wb_writeout_inc(wb);
2731 }
2732 }
2733
2734 if (mapping->host && !mapping_tagged(mapping,
2735 PAGECACHE_TAG_WRITEBACK))
2736 sb_clear_inode_writeback(mapping->host);
2737
2738 xa_unlock_irqrestore(&mapping->i_pages, flags);
2739 } else {
2740 ret = TestClearPageWriteback(page);
2741 }
2742
2743
2744
2745
2746
2747
2748 if (ret) {
2749 dec_lruvec_state(lruvec, NR_WRITEBACK);
2750 dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2751 inc_node_page_state(page, NR_WRITTEN);
2752 }
2753 __unlock_page_memcg(memcg);
2754 return ret;
2755}
2756
2757int __test_set_page_writeback(struct page *page, bool keep_write)
2758{
2759 struct address_space *mapping = page_mapping(page);
2760 int ret;
2761
2762 lock_page_memcg(page);
2763 if (mapping && mapping_use_writeback_tags(mapping)) {
2764 struct inode *inode = mapping->host;
2765 struct backing_dev_info *bdi = inode_to_bdi(inode);
2766 unsigned long flags;
2767
2768 xa_lock_irqsave(&mapping->i_pages, flags);
2769 ret = TestSetPageWriteback(page);
2770 if (!ret) {
2771 bool on_wblist;
2772
2773 on_wblist = mapping_tagged(mapping,
2774 PAGECACHE_TAG_WRITEBACK);
2775
2776 radix_tree_tag_set(&mapping->i_pages, page_index(page),
2777 PAGECACHE_TAG_WRITEBACK);
2778 if (bdi_cap_account_writeback(bdi))
2779 inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
2780
2781
2782
2783
2784
2785
2786 if (mapping->host && !on_wblist)
2787 sb_mark_inode_writeback(mapping->host);
2788 }
2789 if (!PageDirty(page))
2790 radix_tree_tag_clear(&mapping->i_pages, page_index(page),
2791 PAGECACHE_TAG_DIRTY);
2792 if (!keep_write)
2793 radix_tree_tag_clear(&mapping->i_pages, page_index(page),
2794 PAGECACHE_TAG_TOWRITE);
2795 xa_unlock_irqrestore(&mapping->i_pages, flags);
2796 } else {
2797 ret = TestSetPageWriteback(page);
2798 }
2799 if (!ret) {
2800 inc_lruvec_page_state(page, NR_WRITEBACK);
2801 inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2802 }
2803 unlock_page_memcg(page);
2804 return ret;
2805
2806}
2807EXPORT_SYMBOL(__test_set_page_writeback);
2808
2809
2810
2811
2812
2813int mapping_tagged(struct address_space *mapping, int tag)
2814{
2815 return radix_tree_tagged(&mapping->i_pages, tag);
2816}
2817EXPORT_SYMBOL(mapping_tagged);
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827void wait_for_stable_page(struct page *page)
2828{
2829 if (bdi_cap_stable_pages_required(inode_to_bdi(page->mapping->host)))
2830 wait_on_page_writeback(page);
2831}
2832EXPORT_SYMBOL_GPL(wait_for_stable_page);
2833