1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <linux/kernel.h>
15#include <linux/export.h>
16#include <linux/spinlock.h>
17#include <linux/fs.h>
18#include <linux/mm.h>
19#include <linux/swap.h>
20#include <linux/slab.h>
21#include <linux/pagemap.h>
22#include <linux/writeback.h>
23#include <linux/init.h>
24#include <linux/backing-dev.h>
25#include <linux/task_io_accounting_ops.h>
26#include <linux/blkdev.h>
27#include <linux/mpage.h>
28#include <linux/rmap.h>
29#include <linux/percpu.h>
30#include <linux/smp.h>
31#include <linux/sysctl.h>
32#include <linux/cpu.h>
33#include <linux/syscalls.h>
34#include <linux/buffer_head.h>
35#include <linux/pagevec.h>
36#include <linux/timer.h>
37#include <linux/sched/rt.h>
38#include <linux/sched/signal.h>
39#include <linux/mm_inline.h>
40#include <trace/events/writeback.h>
41
42#include "internal.h"
43
44
45
46
47#define MAX_PAUSE max(HZ/5, 1)
48
49
50
51
52
53#define DIRTY_POLL_THRESH (128 >> (PAGE_SHIFT - 10))
54
55
56
57
58#define BANDWIDTH_INTERVAL max(HZ/5, 1)
59
60#define RATELIMIT_CALC_SHIFT 10
61
62
63
64
65
66static long ratelimit_pages = 32;
67
68
69
70
71
72
73int dirty_background_ratio = 10;
74
75
76
77
78
79unsigned long dirty_background_bytes;
80
81
82
83
84
85int vm_highmem_is_dirtyable;
86
87
88
89
90int vm_dirty_ratio = 20;
91
92
93
94
95
96unsigned long vm_dirty_bytes;
97
98
99
100
101unsigned int dirty_writeback_interval = 5 * 100;
102
103EXPORT_SYMBOL_GPL(dirty_writeback_interval);
104
105
106
107
108unsigned int dirty_expire_interval = 30 * 100;
109
110
111
112
113int block_dump;
114
115
116
117
118
119int laptop_mode;
120
121EXPORT_SYMBOL(laptop_mode);
122
123
124
125struct wb_domain global_wb_domain;
126
127
128struct dirty_throttle_control {
129#ifdef CONFIG_CGROUP_WRITEBACK
130 struct wb_domain *dom;
131 struct dirty_throttle_control *gdtc;
132#endif
133 struct bdi_writeback *wb;
134 struct fprop_local_percpu *wb_completions;
135
136 unsigned long avail;
137 unsigned long dirty;
138 unsigned long thresh;
139 unsigned long bg_thresh;
140
141 unsigned long wb_dirty;
142 unsigned long wb_thresh;
143 unsigned long wb_bg_thresh;
144
145 unsigned long pos_ratio;
146};
147
148
149
150
151
152
153#define VM_COMPLETIONS_PERIOD_LEN (3*HZ)
154
155#ifdef CONFIG_CGROUP_WRITEBACK
156
157#define GDTC_INIT(__wb) .wb = (__wb), \
158 .dom = &global_wb_domain, \
159 .wb_completions = &(__wb)->completions
160
161#define GDTC_INIT_NO_WB .dom = &global_wb_domain
162
163#define MDTC_INIT(__wb, __gdtc) .wb = (__wb), \
164 .dom = mem_cgroup_wb_domain(__wb), \
165 .wb_completions = &(__wb)->memcg_completions, \
166 .gdtc = __gdtc
167
168static bool mdtc_valid(struct dirty_throttle_control *dtc)
169{
170 return dtc->dom;
171}
172
173static struct wb_domain *dtc_dom(struct dirty_throttle_control *dtc)
174{
175 return dtc->dom;
176}
177
178static struct dirty_throttle_control *mdtc_gdtc(struct dirty_throttle_control *mdtc)
179{
180 return mdtc->gdtc;
181}
182
183static struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb)
184{
185 return &wb->memcg_completions;
186}
187
188static void wb_min_max_ratio(struct bdi_writeback *wb,
189 unsigned long *minp, unsigned long *maxp)
190{
191 unsigned long this_bw = wb->avg_write_bandwidth;
192 unsigned long tot_bw = atomic_long_read(&wb->bdi->tot_write_bandwidth);
193 unsigned long long min = wb->bdi->min_ratio;
194 unsigned long long max = wb->bdi->max_ratio;
195
196
197
198
199
200 if (this_bw < tot_bw) {
201 if (min) {
202 min *= this_bw;
203 do_div(min, tot_bw);
204 }
205 if (max < 100) {
206 max *= this_bw;
207 do_div(max, tot_bw);
208 }
209 }
210
211 *minp = min;
212 *maxp = max;
213}
214
215#else
216
217#define GDTC_INIT(__wb) .wb = (__wb), \
218 .wb_completions = &(__wb)->completions
219#define GDTC_INIT_NO_WB
220#define MDTC_INIT(__wb, __gdtc)
221
222static bool mdtc_valid(struct dirty_throttle_control *dtc)
223{
224 return false;
225}
226
227static struct wb_domain *dtc_dom(struct dirty_throttle_control *dtc)
228{
229 return &global_wb_domain;
230}
231
232static struct dirty_throttle_control *mdtc_gdtc(struct dirty_throttle_control *mdtc)
233{
234 return NULL;
235}
236
237static struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb)
238{
239 return NULL;
240}
241
242static void wb_min_max_ratio(struct bdi_writeback *wb,
243 unsigned long *minp, unsigned long *maxp)
244{
245 *minp = wb->bdi->min_ratio;
246 *maxp = wb->bdi->max_ratio;
247}
248
249#endif
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276static unsigned long node_dirtyable_memory(struct pglist_data *pgdat)
277{
278 unsigned long nr_pages = 0;
279 int z;
280
281 for (z = 0; z < MAX_NR_ZONES; z++) {
282 struct zone *zone = pgdat->node_zones + z;
283
284 if (!populated_zone(zone))
285 continue;
286
287 nr_pages += zone_page_state(zone, NR_FREE_PAGES);
288 }
289
290
291
292
293
294
295 nr_pages -= min(nr_pages, pgdat->totalreserve_pages);
296
297 nr_pages += node_page_state(pgdat, NR_INACTIVE_FILE);
298 nr_pages += node_page_state(pgdat, NR_ACTIVE_FILE);
299
300 return nr_pages;
301}
302
303static unsigned long highmem_dirtyable_memory(unsigned long total)
304{
305#ifdef CONFIG_HIGHMEM
306 int node;
307 unsigned long x = 0;
308 int i;
309
310 for_each_node_state(node, N_HIGH_MEMORY) {
311 for (i = ZONE_NORMAL + 1; i < MAX_NR_ZONES; i++) {
312 struct zone *z;
313 unsigned long nr_pages;
314
315 if (!is_highmem_idx(i))
316 continue;
317
318 z = &NODE_DATA(node)->node_zones[i];
319 if (!populated_zone(z))
320 continue;
321
322 nr_pages = zone_page_state(z, NR_FREE_PAGES);
323
324 nr_pages -= min(nr_pages, high_wmark_pages(z));
325 nr_pages += zone_page_state(z, NR_ZONE_INACTIVE_FILE);
326 nr_pages += zone_page_state(z, NR_ZONE_ACTIVE_FILE);
327 x += nr_pages;
328 }
329 }
330
331
332
333
334
335
336
337
338
339
340 if ((long)x < 0)
341 x = 0;
342
343
344
345
346
347
348
349 return min(x, total);
350#else
351 return 0;
352#endif
353}
354
355
356
357
358
359
360
361static unsigned long global_dirtyable_memory(void)
362{
363 unsigned long x;
364
365 x = global_zone_page_state(NR_FREE_PAGES);
366
367
368
369
370
371 x -= min(x, totalreserve_pages);
372
373 x += global_node_page_state(NR_INACTIVE_FILE);
374 x += global_node_page_state(NR_ACTIVE_FILE);
375
376 if (!vm_highmem_is_dirtyable)
377 x -= highmem_dirtyable_memory(x);
378
379 return x + 1;
380}
381
382
383
384
385
386
387
388
389
390
391
392static void domain_dirty_limits(struct dirty_throttle_control *dtc)
393{
394 const unsigned long available_memory = dtc->avail;
395 struct dirty_throttle_control *gdtc = mdtc_gdtc(dtc);
396 unsigned long bytes = vm_dirty_bytes;
397 unsigned long bg_bytes = dirty_background_bytes;
398
399 unsigned long ratio = (vm_dirty_ratio * PAGE_SIZE) / 100;
400 unsigned long bg_ratio = (dirty_background_ratio * PAGE_SIZE) / 100;
401 unsigned long thresh;
402 unsigned long bg_thresh;
403 struct task_struct *tsk;
404
405
406 if (gdtc) {
407 unsigned long global_avail = gdtc->avail;
408
409
410
411
412
413
414
415
416 if (bytes)
417 ratio = min(DIV_ROUND_UP(bytes, global_avail),
418 PAGE_SIZE);
419 if (bg_bytes)
420 bg_ratio = min(DIV_ROUND_UP(bg_bytes, global_avail),
421 PAGE_SIZE);
422 bytes = bg_bytes = 0;
423 }
424
425 if (bytes)
426 thresh = DIV_ROUND_UP(bytes, PAGE_SIZE);
427 else
428 thresh = (ratio * available_memory) / PAGE_SIZE;
429
430 if (bg_bytes)
431 bg_thresh = DIV_ROUND_UP(bg_bytes, PAGE_SIZE);
432 else
433 bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE;
434
435 if (bg_thresh >= thresh)
436 bg_thresh = thresh / 2;
437 tsk = current;
438 if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) {
439 bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32;
440 thresh += thresh / 4 + global_wb_domain.dirty_limit / 32;
441 }
442 dtc->thresh = thresh;
443 dtc->bg_thresh = bg_thresh;
444
445
446 if (!gdtc)
447 trace_global_dirty_state(bg_thresh, thresh);
448}
449
450
451
452
453
454
455
456
457
458void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
459{
460 struct dirty_throttle_control gdtc = { GDTC_INIT_NO_WB };
461
462 gdtc.avail = global_dirtyable_memory();
463 domain_dirty_limits(&gdtc);
464
465 *pbackground = gdtc.bg_thresh;
466 *pdirty = gdtc.thresh;
467}
468
469
470
471
472
473
474
475
476static unsigned long node_dirty_limit(struct pglist_data *pgdat)
477{
478 unsigned long node_memory = node_dirtyable_memory(pgdat);
479 struct task_struct *tsk = current;
480 unsigned long dirty;
481
482 if (vm_dirty_bytes)
483 dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE) *
484 node_memory / global_dirtyable_memory();
485 else
486 dirty = vm_dirty_ratio * node_memory / 100;
487
488 if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk))
489 dirty += dirty / 4;
490
491 return dirty;
492}
493
494
495
496
497
498
499
500
501bool node_dirty_ok(struct pglist_data *pgdat)
502{
503 unsigned long limit = node_dirty_limit(pgdat);
504 unsigned long nr_pages = 0;
505
506 nr_pages += node_page_state(pgdat, NR_FILE_DIRTY);
507 nr_pages += node_page_state(pgdat, NR_UNSTABLE_NFS);
508 nr_pages += node_page_state(pgdat, NR_WRITEBACK);
509
510 return nr_pages <= limit;
511}
512
513int dirty_background_ratio_handler(struct ctl_table *table, int write,
514 void __user *buffer, size_t *lenp,
515 loff_t *ppos)
516{
517 int ret;
518
519 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
520 if (ret == 0 && write)
521 dirty_background_bytes = 0;
522 return ret;
523}
524
525int dirty_background_bytes_handler(struct ctl_table *table, int write,
526 void __user *buffer, size_t *lenp,
527 loff_t *ppos)
528{
529 int ret;
530
531 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
532 if (ret == 0 && write)
533 dirty_background_ratio = 0;
534 return ret;
535}
536
537int dirty_ratio_handler(struct ctl_table *table, int write,
538 void __user *buffer, size_t *lenp,
539 loff_t *ppos)
540{
541 int old_ratio = vm_dirty_ratio;
542 int ret;
543
544 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
545 if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
546 writeback_set_ratelimit();
547 vm_dirty_bytes = 0;
548 }
549 return ret;
550}
551
552int dirty_bytes_handler(struct ctl_table *table, int write,
553 void __user *buffer, size_t *lenp,
554 loff_t *ppos)
555{
556 unsigned long old_bytes = vm_dirty_bytes;
557 int ret;
558
559 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
560 if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
561 writeback_set_ratelimit();
562 vm_dirty_ratio = 0;
563 }
564 return ret;
565}
566
567static unsigned long wp_next_time(unsigned long cur_time)
568{
569 cur_time += VM_COMPLETIONS_PERIOD_LEN;
570
571 if (!cur_time)
572 return 1;
573 return cur_time;
574}
575
576static void wb_domain_writeout_inc(struct wb_domain *dom,
577 struct fprop_local_percpu *completions,
578 unsigned int max_prop_frac)
579{
580 __fprop_inc_percpu_max(&dom->completions, completions,
581 max_prop_frac);
582
583 if (unlikely(!dom->period_time)) {
584
585
586
587
588
589
590 dom->period_time = wp_next_time(jiffies);
591 mod_timer(&dom->period_timer, dom->period_time);
592 }
593}
594
595
596
597
598
599static inline void __wb_writeout_inc(struct bdi_writeback *wb)
600{
601 struct wb_domain *cgdom;
602
603 inc_wb_stat(wb, WB_WRITTEN);
604 wb_domain_writeout_inc(&global_wb_domain, &wb->completions,
605 wb->bdi->max_prop_frac);
606
607 cgdom = mem_cgroup_wb_domain(wb);
608 if (cgdom)
609 wb_domain_writeout_inc(cgdom, wb_memcg_completions(wb),
610 wb->bdi->max_prop_frac);
611}
612
613void wb_writeout_inc(struct bdi_writeback *wb)
614{
615 unsigned long flags;
616
617 local_irq_save(flags);
618 __wb_writeout_inc(wb);
619 local_irq_restore(flags);
620}
621EXPORT_SYMBOL_GPL(wb_writeout_inc);
622
623
624
625
626
627static void writeout_period(struct timer_list *t)
628{
629 struct wb_domain *dom = from_timer(dom, t, period_timer);
630 int miss_periods = (jiffies - dom->period_time) /
631 VM_COMPLETIONS_PERIOD_LEN;
632
633 if (fprop_new_period(&dom->completions, miss_periods + 1)) {
634 dom->period_time = wp_next_time(dom->period_time +
635 miss_periods * VM_COMPLETIONS_PERIOD_LEN);
636 mod_timer(&dom->period_timer, dom->period_time);
637 } else {
638
639
640
641
642 dom->period_time = 0;
643 }
644}
645
646int wb_domain_init(struct wb_domain *dom, gfp_t gfp)
647{
648 memset(dom, 0, sizeof(*dom));
649
650 spin_lock_init(&dom->lock);
651
652 timer_setup(&dom->period_timer, writeout_period, TIMER_DEFERRABLE);
653
654 dom->dirty_limit_tstamp = jiffies;
655
656 return fprop_global_init(&dom->completions, gfp);
657}
658
659#ifdef CONFIG_CGROUP_WRITEBACK
660void wb_domain_exit(struct wb_domain *dom)
661{
662 del_timer_sync(&dom->period_timer);
663 fprop_global_destroy(&dom->completions);
664}
665#endif
666
667
668
669
670
671
672static unsigned int bdi_min_ratio;
673
674int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
675{
676 int ret = 0;
677
678 spin_lock_bh(&bdi_lock);
679 if (min_ratio > bdi->max_ratio) {
680 ret = -EINVAL;
681 } else {
682 min_ratio -= bdi->min_ratio;
683 if (bdi_min_ratio + min_ratio < 100) {
684 bdi_min_ratio += min_ratio;
685 bdi->min_ratio += min_ratio;
686 } else {
687 ret = -EINVAL;
688 }
689 }
690 spin_unlock_bh(&bdi_lock);
691
692 return ret;
693}
694
695int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
696{
697 int ret = 0;
698
699 if (max_ratio > 100)
700 return -EINVAL;
701
702 spin_lock_bh(&bdi_lock);
703 if (bdi->min_ratio > max_ratio) {
704 ret = -EINVAL;
705 } else {
706 bdi->max_ratio = max_ratio;
707 bdi->max_prop_frac = (FPROP_FRAC_BASE * max_ratio) / 100;
708 }
709 spin_unlock_bh(&bdi_lock);
710
711 return ret;
712}
713EXPORT_SYMBOL(bdi_set_max_ratio);
714
715static unsigned long dirty_freerun_ceiling(unsigned long thresh,
716 unsigned long bg_thresh)
717{
718 return (thresh + bg_thresh) / 2;
719}
720
721static unsigned long hard_dirty_limit(struct wb_domain *dom,
722 unsigned long thresh)
723{
724 return max(thresh, dom->dirty_limit);
725}
726
727
728
729
730
731static void mdtc_calc_avail(struct dirty_throttle_control *mdtc,
732 unsigned long filepages, unsigned long headroom)
733{
734 struct dirty_throttle_control *gdtc = mdtc_gdtc(mdtc);
735 unsigned long clean = filepages - min(filepages, mdtc->dirty);
736 unsigned long global_clean = gdtc->avail - min(gdtc->avail, gdtc->dirty);
737 unsigned long other_clean = global_clean - min(global_clean, clean);
738
739 mdtc->avail = filepages + min(headroom, other_clean);
740}
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc)
764{
765 struct wb_domain *dom = dtc_dom(dtc);
766 unsigned long thresh = dtc->thresh;
767 u64 wb_thresh;
768 long numerator, denominator;
769 unsigned long wb_min_ratio, wb_max_ratio;
770
771
772
773
774 fprop_fraction_percpu(&dom->completions, dtc->wb_completions,
775 &numerator, &denominator);
776
777 wb_thresh = (thresh * (100 - bdi_min_ratio)) / 100;
778 wb_thresh *= numerator;
779 do_div(wb_thresh, denominator);
780
781 wb_min_max_ratio(dtc->wb, &wb_min_ratio, &wb_max_ratio);
782
783 wb_thresh += (thresh * wb_min_ratio) / 100;
784 if (wb_thresh > (thresh * wb_max_ratio) / 100)
785 wb_thresh = thresh * wb_max_ratio / 100;
786
787 return wb_thresh;
788}
789
790unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh)
791{
792 struct dirty_throttle_control gdtc = { GDTC_INIT(wb),
793 .thresh = thresh };
794 return __wb_calc_thresh(&gdtc);
795}
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811static long long pos_ratio_polynom(unsigned long setpoint,
812 unsigned long dirty,
813 unsigned long limit)
814{
815 long long pos_ratio;
816 long x;
817
818 x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
819 (limit - setpoint) | 1);
820 pos_ratio = x;
821 pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
822 pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
823 pos_ratio += 1 << RATELIMIT_CALC_SHIFT;
824
825 return clamp(pos_ratio, 0LL, 2LL << RATELIMIT_CALC_SHIFT);
826}
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903static void wb_position_ratio(struct dirty_throttle_control *dtc)
904{
905 struct bdi_writeback *wb = dtc->wb;
906 unsigned long write_bw = wb->avg_write_bandwidth;
907 unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh);
908 unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
909 unsigned long wb_thresh = dtc->wb_thresh;
910 unsigned long x_intercept;
911 unsigned long setpoint;
912 unsigned long wb_setpoint;
913 unsigned long span;
914 long long pos_ratio;
915 long x;
916
917 dtc->pos_ratio = 0;
918
919 if (unlikely(dtc->dirty >= limit))
920 return;
921
922
923
924
925
926
927 setpoint = (freerun + limit) / 2;
928 pos_ratio = pos_ratio_polynom(setpoint, dtc->dirty, limit);
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955 if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
956 long long wb_pos_ratio;
957
958 if (dtc->wb_dirty < 8) {
959 dtc->pos_ratio = min_t(long long, pos_ratio * 2,
960 2 << RATELIMIT_CALC_SHIFT);
961 return;
962 }
963
964 if (dtc->wb_dirty >= wb_thresh)
965 return;
966
967 wb_setpoint = dirty_freerun_ceiling(wb_thresh,
968 dtc->wb_bg_thresh);
969
970 if (wb_setpoint == 0 || wb_setpoint == wb_thresh)
971 return;
972
973 wb_pos_ratio = pos_ratio_polynom(wb_setpoint, dtc->wb_dirty,
974 wb_thresh);
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997 dtc->pos_ratio = min(pos_ratio, wb_pos_ratio);
998 return;
999 }
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032 if (unlikely(wb_thresh > dtc->thresh))
1033 wb_thresh = dtc->thresh;
1034
1035
1036
1037
1038
1039
1040
1041 wb_thresh = max(wb_thresh, (limit - dtc->dirty) / 8);
1042
1043
1044
1045
1046 x = div_u64((u64)wb_thresh << 16, dtc->thresh | 1);
1047 wb_setpoint = setpoint * (u64)x >> 16;
1048
1049
1050
1051
1052
1053
1054
1055
1056 span = (dtc->thresh - wb_thresh + 8 * write_bw) * (u64)x >> 16;
1057 x_intercept = wb_setpoint + span;
1058
1059 if (dtc->wb_dirty < x_intercept - span / 4) {
1060 pos_ratio = div64_u64(pos_ratio * (x_intercept - dtc->wb_dirty),
1061 (x_intercept - wb_setpoint) | 1);
1062 } else
1063 pos_ratio /= 4;
1064
1065
1066
1067
1068
1069
1070 x_intercept = wb_thresh / 2;
1071 if (dtc->wb_dirty < x_intercept) {
1072 if (dtc->wb_dirty > x_intercept / 8)
1073 pos_ratio = div_u64(pos_ratio * x_intercept,
1074 dtc->wb_dirty);
1075 else
1076 pos_ratio *= 8;
1077 }
1078
1079 dtc->pos_ratio = pos_ratio;
1080}
1081
1082static void wb_update_write_bandwidth(struct bdi_writeback *wb,
1083 unsigned long elapsed,
1084 unsigned long written)
1085{
1086 const unsigned long period = roundup_pow_of_two(3 * HZ);
1087 unsigned long avg = wb->avg_write_bandwidth;
1088 unsigned long old = wb->write_bandwidth;
1089 u64 bw;
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101 bw = written - min(written, wb->written_stamp);
1102 bw *= HZ;
1103 if (unlikely(elapsed > period)) {
1104 do_div(bw, elapsed);
1105 avg = bw;
1106 goto out;
1107 }
1108 bw += (u64)wb->write_bandwidth * (period - elapsed);
1109 bw >>= ilog2(period);
1110
1111
1112
1113
1114 if (avg > old && old >= (unsigned long)bw)
1115 avg -= (avg - old) >> 3;
1116
1117 if (avg < old && old <= (unsigned long)bw)
1118 avg += (old - avg) >> 3;
1119
1120out:
1121
1122 avg = max(avg, 1LU);
1123 if (wb_has_dirty_io(wb)) {
1124 long delta = avg - wb->avg_write_bandwidth;
1125 WARN_ON_ONCE(atomic_long_add_return(delta,
1126 &wb->bdi->tot_write_bandwidth) <= 0);
1127 }
1128 wb->write_bandwidth = bw;
1129 wb->avg_write_bandwidth = avg;
1130}
1131
1132static void update_dirty_limit(struct dirty_throttle_control *dtc)
1133{
1134 struct wb_domain *dom = dtc_dom(dtc);
1135 unsigned long thresh = dtc->thresh;
1136 unsigned long limit = dom->dirty_limit;
1137
1138
1139
1140
1141 if (limit < thresh) {
1142 limit = thresh;
1143 goto update;
1144 }
1145
1146
1147
1148
1149
1150
1151 thresh = max(thresh, dtc->dirty);
1152 if (limit > thresh) {
1153 limit -= (limit - thresh) >> 5;
1154 goto update;
1155 }
1156 return;
1157update:
1158 dom->dirty_limit = limit;
1159}
1160
1161static void domain_update_bandwidth(struct dirty_throttle_control *dtc,
1162 unsigned long now)
1163{
1164 struct wb_domain *dom = dtc_dom(dtc);
1165
1166
1167
1168
1169 if (time_before(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL))
1170 return;
1171
1172 spin_lock(&dom->lock);
1173 if (time_after_eq(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL)) {
1174 update_dirty_limit(dtc);
1175 dom->dirty_limit_tstamp = now;
1176 }
1177 spin_unlock(&dom->lock);
1178}
1179
1180
1181
1182
1183
1184
1185
1186static void wb_update_dirty_ratelimit(struct dirty_throttle_control *dtc,
1187 unsigned long dirtied,
1188 unsigned long elapsed)
1189{
1190 struct bdi_writeback *wb = dtc->wb;
1191 unsigned long dirty = dtc->dirty;
1192 unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh);
1193 unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
1194 unsigned long setpoint = (freerun + limit) / 2;
1195 unsigned long write_bw = wb->avg_write_bandwidth;
1196 unsigned long dirty_ratelimit = wb->dirty_ratelimit;
1197 unsigned long dirty_rate;
1198 unsigned long task_ratelimit;
1199 unsigned long balanced_dirty_ratelimit;
1200 unsigned long step;
1201 unsigned long x;
1202 unsigned long shift;
1203
1204
1205
1206
1207
1208 dirty_rate = (dirtied - wb->dirtied_stamp) * HZ / elapsed;
1209
1210
1211
1212
1213 task_ratelimit = (u64)dirty_ratelimit *
1214 dtc->pos_ratio >> RATELIMIT_CALC_SHIFT;
1215 task_ratelimit++;
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247 balanced_dirty_ratelimit = div_u64((u64)task_ratelimit * write_bw,
1248 dirty_rate | 1);
1249
1250
1251
1252 if (unlikely(balanced_dirty_ratelimit > write_bw))
1253 balanced_dirty_ratelimit = write_bw;
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289 step = 0;
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302 if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
1303 dirty = dtc->wb_dirty;
1304 if (dtc->wb_dirty < 8)
1305 setpoint = dtc->wb_dirty + 1;
1306 else
1307 setpoint = (dtc->wb_thresh + dtc->wb_bg_thresh) / 2;
1308 }
1309
1310 if (dirty < setpoint) {
1311 x = min3(wb->balanced_dirty_ratelimit,
1312 balanced_dirty_ratelimit, task_ratelimit);
1313 if (dirty_ratelimit < x)
1314 step = x - dirty_ratelimit;
1315 } else {
1316 x = max3(wb->balanced_dirty_ratelimit,
1317 balanced_dirty_ratelimit, task_ratelimit);
1318 if (dirty_ratelimit > x)
1319 step = dirty_ratelimit - x;
1320 }
1321
1322
1323
1324
1325
1326
1327 shift = dirty_ratelimit / (2 * step + 1);
1328 if (shift < BITS_PER_LONG)
1329 step = DIV_ROUND_UP(step >> shift, 8);
1330 else
1331 step = 0;
1332
1333 if (dirty_ratelimit < balanced_dirty_ratelimit)
1334 dirty_ratelimit += step;
1335 else
1336 dirty_ratelimit -= step;
1337
1338 wb->dirty_ratelimit = max(dirty_ratelimit, 1UL);
1339 wb->balanced_dirty_ratelimit = balanced_dirty_ratelimit;
1340
1341 trace_bdi_dirty_ratelimit(wb, dirty_rate, task_ratelimit);
1342}
1343
1344static void __wb_update_bandwidth(struct dirty_throttle_control *gdtc,
1345 struct dirty_throttle_control *mdtc,
1346 unsigned long start_time,
1347 bool update_ratelimit)
1348{
1349 struct bdi_writeback *wb = gdtc->wb;
1350 unsigned long now = jiffies;
1351 unsigned long elapsed = now - wb->bw_time_stamp;
1352 unsigned long dirtied;
1353 unsigned long written;
1354
1355 lockdep_assert_held(&wb->list_lock);
1356
1357
1358
1359
1360 if (elapsed < BANDWIDTH_INTERVAL)
1361 return;
1362
1363 dirtied = percpu_counter_read(&wb->stat[WB_DIRTIED]);
1364 written = percpu_counter_read(&wb->stat[WB_WRITTEN]);
1365
1366
1367
1368
1369
1370 if (elapsed > HZ && time_before(wb->bw_time_stamp, start_time))
1371 goto snapshot;
1372
1373 if (update_ratelimit) {
1374 domain_update_bandwidth(gdtc, now);
1375 wb_update_dirty_ratelimit(gdtc, dirtied, elapsed);
1376
1377
1378
1379
1380
1381 if (IS_ENABLED(CONFIG_CGROUP_WRITEBACK) && mdtc) {
1382 domain_update_bandwidth(mdtc, now);
1383 wb_update_dirty_ratelimit(mdtc, dirtied, elapsed);
1384 }
1385 }
1386 wb_update_write_bandwidth(wb, elapsed, written);
1387
1388snapshot:
1389 wb->dirtied_stamp = dirtied;
1390 wb->written_stamp = written;
1391 wb->bw_time_stamp = now;
1392}
1393
1394void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time)
1395{
1396 struct dirty_throttle_control gdtc = { GDTC_INIT(wb) };
1397
1398 __wb_update_bandwidth(&gdtc, NULL, start_time, false);
1399}
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409static unsigned long dirty_poll_interval(unsigned long dirty,
1410 unsigned long thresh)
1411{
1412 if (thresh > dirty)
1413 return 1UL << (ilog2(thresh - dirty) >> 1);
1414
1415 return 1;
1416}
1417
1418static unsigned long wb_max_pause(struct bdi_writeback *wb,
1419 unsigned long wb_dirty)
1420{
1421 unsigned long bw = wb->avg_write_bandwidth;
1422 unsigned long t;
1423
1424
1425
1426
1427
1428
1429
1430
1431 t = wb_dirty / (1 + bw / roundup_pow_of_two(1 + HZ / 8));
1432 t++;
1433
1434 return min_t(unsigned long, t, MAX_PAUSE);
1435}
1436
1437static long wb_min_pause(struct bdi_writeback *wb,
1438 long max_pause,
1439 unsigned long task_ratelimit,
1440 unsigned long dirty_ratelimit,
1441 int *nr_dirtied_pause)
1442{
1443 long hi = ilog2(wb->avg_write_bandwidth);
1444 long lo = ilog2(wb->dirty_ratelimit);
1445 long t;
1446 long pause;
1447 int pages;
1448
1449
1450 t = max(1, HZ / 100);
1451
1452
1453
1454
1455
1456
1457
1458 if (hi > lo)
1459 t += (hi - lo) * (10 * HZ) / 1024;
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479 t = min(t, 1 + max_pause / 2);
1480 pages = dirty_ratelimit * t / roundup_pow_of_two(HZ);
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490 if (pages < DIRTY_POLL_THRESH) {
1491 t = max_pause;
1492 pages = dirty_ratelimit * t / roundup_pow_of_two(HZ);
1493 if (pages > DIRTY_POLL_THRESH) {
1494 pages = DIRTY_POLL_THRESH;
1495 t = HZ * DIRTY_POLL_THRESH / dirty_ratelimit;
1496 }
1497 }
1498
1499 pause = HZ * pages / (task_ratelimit + 1);
1500 if (pause > max_pause) {
1501 t = max_pause;
1502 pages = task_ratelimit * t / roundup_pow_of_two(HZ);
1503 }
1504
1505 *nr_dirtied_pause = pages;
1506
1507
1508
1509 return pages >= DIRTY_POLL_THRESH ? 1 + t / 2 : t;
1510}
1511
1512static inline void wb_dirty_limits(struct dirty_throttle_control *dtc)
1513{
1514 struct bdi_writeback *wb = dtc->wb;
1515 unsigned long wb_reclaimable;
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530 dtc->wb_thresh = __wb_calc_thresh(dtc);
1531 dtc->wb_bg_thresh = dtc->thresh ?
1532 div_u64((u64)dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0;
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544 if (dtc->wb_thresh < 2 * wb_stat_error()) {
1545 wb_reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE);
1546 dtc->wb_dirty = wb_reclaimable + wb_stat_sum(wb, WB_WRITEBACK);
1547 } else {
1548 wb_reclaimable = wb_stat(wb, WB_RECLAIMABLE);
1549 dtc->wb_dirty = wb_reclaimable + wb_stat(wb, WB_WRITEBACK);
1550 }
1551}
1552
1553
1554
1555
1556
1557
1558
1559
1560static void balance_dirty_pages(struct bdi_writeback *wb,
1561 unsigned long pages_dirtied)
1562{
1563 struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) };
1564 struct dirty_throttle_control mdtc_stor = { MDTC_INIT(wb, &gdtc_stor) };
1565 struct dirty_throttle_control * const gdtc = &gdtc_stor;
1566 struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
1567 &mdtc_stor : NULL;
1568 struct dirty_throttle_control *sdtc;
1569 unsigned long nr_reclaimable;
1570 long period;
1571 long pause;
1572 long max_pause;
1573 long min_pause;
1574 int nr_dirtied_pause;
1575 bool dirty_exceeded = false;
1576 unsigned long task_ratelimit;
1577 unsigned long dirty_ratelimit;
1578 struct backing_dev_info *bdi = wb->bdi;
1579 bool strictlimit = bdi->capabilities & BDI_CAP_STRICTLIMIT;
1580 unsigned long start_time = jiffies;
1581
1582 for (;;) {
1583 unsigned long now = jiffies;
1584 unsigned long dirty, thresh, bg_thresh;
1585 unsigned long m_dirty = 0;
1586 unsigned long m_thresh = 0;
1587 unsigned long m_bg_thresh = 0;
1588
1589
1590
1591
1592
1593
1594
1595 nr_reclaimable = global_node_page_state(NR_FILE_DIRTY) +
1596 global_node_page_state(NR_UNSTABLE_NFS);
1597 gdtc->avail = global_dirtyable_memory();
1598 gdtc->dirty = nr_reclaimable + global_node_page_state(NR_WRITEBACK);
1599
1600 domain_dirty_limits(gdtc);
1601
1602 if (unlikely(strictlimit)) {
1603 wb_dirty_limits(gdtc);
1604
1605 dirty = gdtc->wb_dirty;
1606 thresh = gdtc->wb_thresh;
1607 bg_thresh = gdtc->wb_bg_thresh;
1608 } else {
1609 dirty = gdtc->dirty;
1610 thresh = gdtc->thresh;
1611 bg_thresh = gdtc->bg_thresh;
1612 }
1613
1614 if (mdtc) {
1615 unsigned long filepages, headroom, writeback;
1616
1617
1618
1619
1620
1621 mem_cgroup_wb_stats(wb, &filepages, &headroom,
1622 &mdtc->dirty, &writeback);
1623 mdtc->dirty += writeback;
1624 mdtc_calc_avail(mdtc, filepages, headroom);
1625
1626 domain_dirty_limits(mdtc);
1627
1628 if (unlikely(strictlimit)) {
1629 wb_dirty_limits(mdtc);
1630 m_dirty = mdtc->wb_dirty;
1631 m_thresh = mdtc->wb_thresh;
1632 m_bg_thresh = mdtc->wb_bg_thresh;
1633 } else {
1634 m_dirty = mdtc->dirty;
1635 m_thresh = mdtc->thresh;
1636 m_bg_thresh = mdtc->bg_thresh;
1637 }
1638 }
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652 if (dirty <= dirty_freerun_ceiling(thresh, bg_thresh) &&
1653 (!mdtc ||
1654 m_dirty <= dirty_freerun_ceiling(m_thresh, m_bg_thresh))) {
1655 unsigned long intv = dirty_poll_interval(dirty, thresh);
1656 unsigned long m_intv = ULONG_MAX;
1657
1658 current->dirty_paused_when = now;
1659 current->nr_dirtied = 0;
1660 if (mdtc)
1661 m_intv = dirty_poll_interval(m_dirty, m_thresh);
1662 current->nr_dirtied_pause = min(intv, m_intv);
1663 break;
1664 }
1665
1666 if (unlikely(!writeback_in_progress(wb)))
1667 wb_start_background_writeback(wb);
1668
1669
1670
1671
1672
1673 if (!strictlimit)
1674 wb_dirty_limits(gdtc);
1675
1676 dirty_exceeded = (gdtc->wb_dirty > gdtc->wb_thresh) &&
1677 ((gdtc->dirty > gdtc->thresh) || strictlimit);
1678
1679 wb_position_ratio(gdtc);
1680 sdtc = gdtc;
1681
1682 if (mdtc) {
1683
1684
1685
1686
1687
1688
1689 if (!strictlimit)
1690 wb_dirty_limits(mdtc);
1691
1692 dirty_exceeded |= (mdtc->wb_dirty > mdtc->wb_thresh) &&
1693 ((mdtc->dirty > mdtc->thresh) || strictlimit);
1694
1695 wb_position_ratio(mdtc);
1696 if (mdtc->pos_ratio < gdtc->pos_ratio)
1697 sdtc = mdtc;
1698 }
1699
1700 if (dirty_exceeded && !wb->dirty_exceeded)
1701 wb->dirty_exceeded = 1;
1702
1703 if (time_is_before_jiffies(wb->bw_time_stamp +
1704 BANDWIDTH_INTERVAL)) {
1705 spin_lock(&wb->list_lock);
1706 __wb_update_bandwidth(gdtc, mdtc, start_time, true);
1707 spin_unlock(&wb->list_lock);
1708 }
1709
1710
1711 dirty_ratelimit = wb->dirty_ratelimit;
1712 task_ratelimit = ((u64)dirty_ratelimit * sdtc->pos_ratio) >>
1713 RATELIMIT_CALC_SHIFT;
1714 max_pause = wb_max_pause(wb, sdtc->wb_dirty);
1715 min_pause = wb_min_pause(wb, max_pause,
1716 task_ratelimit, dirty_ratelimit,
1717 &nr_dirtied_pause);
1718
1719 if (unlikely(task_ratelimit == 0)) {
1720 period = max_pause;
1721 pause = max_pause;
1722 goto pause;
1723 }
1724 period = HZ * pages_dirtied / task_ratelimit;
1725 pause = period;
1726 if (current->dirty_paused_when)
1727 pause -= now - current->dirty_paused_when;
1728
1729
1730
1731
1732
1733
1734
1735 if (pause < min_pause) {
1736 trace_balance_dirty_pages(wb,
1737 sdtc->thresh,
1738 sdtc->bg_thresh,
1739 sdtc->dirty,
1740 sdtc->wb_thresh,
1741 sdtc->wb_dirty,
1742 dirty_ratelimit,
1743 task_ratelimit,
1744 pages_dirtied,
1745 period,
1746 min(pause, 0L),
1747 start_time);
1748 if (pause < -HZ) {
1749 current->dirty_paused_when = now;
1750 current->nr_dirtied = 0;
1751 } else if (period) {
1752 current->dirty_paused_when += period;
1753 current->nr_dirtied = 0;
1754 } else if (current->nr_dirtied_pause <= pages_dirtied)
1755 current->nr_dirtied_pause += pages_dirtied;
1756 break;
1757 }
1758 if (unlikely(pause > max_pause)) {
1759
1760 now += min(pause - max_pause, max_pause);
1761 pause = max_pause;
1762 }
1763
1764pause:
1765 trace_balance_dirty_pages(wb,
1766 sdtc->thresh,
1767 sdtc->bg_thresh,
1768 sdtc->dirty,
1769 sdtc->wb_thresh,
1770 sdtc->wb_dirty,
1771 dirty_ratelimit,
1772 task_ratelimit,
1773 pages_dirtied,
1774 period,
1775 pause,
1776 start_time);
1777 __set_current_state(TASK_KILLABLE);
1778 wb->dirty_sleep = now;
1779 io_schedule_timeout(pause);
1780
1781 current->dirty_paused_when = now + pause;
1782 current->nr_dirtied = 0;
1783 current->nr_dirtied_pause = nr_dirtied_pause;
1784
1785
1786
1787
1788
1789 if (task_ratelimit)
1790 break;
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802 if (sdtc->wb_dirty <= wb_stat_error())
1803 break;
1804
1805 if (fatal_signal_pending(current))
1806 break;
1807 }
1808
1809 if (!dirty_exceeded && wb->dirty_exceeded)
1810 wb->dirty_exceeded = 0;
1811
1812 if (writeback_in_progress(wb))
1813 return;
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823 if (laptop_mode)
1824 return;
1825
1826 if (nr_reclaimable > gdtc->bg_thresh)
1827 wb_start_background_writeback(wb);
1828}
1829
1830static DEFINE_PER_CPU(int, bdp_ratelimits);
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846DEFINE_PER_CPU(int, dirty_throttle_leaks) = 0;
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861void balance_dirty_pages_ratelimited(struct address_space *mapping)
1862{
1863 struct inode *inode = mapping->host;
1864 struct backing_dev_info *bdi = inode_to_bdi(inode);
1865 struct bdi_writeback *wb = NULL;
1866 int ratelimit;
1867 int *p;
1868
1869 if (!bdi_cap_account_dirty(bdi))
1870 return;
1871
1872 if (inode_cgwb_enabled(inode))
1873 wb = wb_get_create_current(bdi, GFP_KERNEL);
1874 if (!wb)
1875 wb = &bdi->wb;
1876
1877 ratelimit = current->nr_dirtied_pause;
1878 if (wb->dirty_exceeded)
1879 ratelimit = min(ratelimit, 32 >> (PAGE_SHIFT - 10));
1880
1881 preempt_disable();
1882
1883
1884
1885
1886
1887
1888 p = this_cpu_ptr(&bdp_ratelimits);
1889 if (unlikely(current->nr_dirtied >= ratelimit))
1890 *p = 0;
1891 else if (unlikely(*p >= ratelimit_pages)) {
1892 *p = 0;
1893 ratelimit = 0;
1894 }
1895
1896
1897
1898
1899
1900 p = this_cpu_ptr(&dirty_throttle_leaks);
1901 if (*p > 0 && current->nr_dirtied < ratelimit) {
1902 unsigned long nr_pages_dirtied;
1903 nr_pages_dirtied = min(*p, ratelimit - current->nr_dirtied);
1904 *p -= nr_pages_dirtied;
1905 current->nr_dirtied += nr_pages_dirtied;
1906 }
1907 preempt_enable();
1908
1909 if (unlikely(current->nr_dirtied >= ratelimit))
1910 balance_dirty_pages(wb, current->nr_dirtied);
1911
1912 wb_put(wb);
1913}
1914EXPORT_SYMBOL(balance_dirty_pages_ratelimited);
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925bool wb_over_bg_thresh(struct bdi_writeback *wb)
1926{
1927 struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) };
1928 struct dirty_throttle_control mdtc_stor = { MDTC_INIT(wb, &gdtc_stor) };
1929 struct dirty_throttle_control * const gdtc = &gdtc_stor;
1930 struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
1931 &mdtc_stor : NULL;
1932
1933
1934
1935
1936
1937 gdtc->avail = global_dirtyable_memory();
1938 gdtc->dirty = global_node_page_state(NR_FILE_DIRTY) +
1939 global_node_page_state(NR_UNSTABLE_NFS);
1940 domain_dirty_limits(gdtc);
1941
1942 if (gdtc->dirty > gdtc->bg_thresh)
1943 return true;
1944
1945 if (wb_stat(wb, WB_RECLAIMABLE) >
1946 wb_calc_thresh(gdtc->wb, gdtc->bg_thresh))
1947 return true;
1948
1949 if (mdtc) {
1950 unsigned long filepages, headroom, writeback;
1951
1952 mem_cgroup_wb_stats(wb, &filepages, &headroom, &mdtc->dirty,
1953 &writeback);
1954 mdtc_calc_avail(mdtc, filepages, headroom);
1955 domain_dirty_limits(mdtc);
1956
1957 if (mdtc->dirty > mdtc->bg_thresh)
1958 return true;
1959
1960 if (wb_stat(wb, WB_RECLAIMABLE) >
1961 wb_calc_thresh(mdtc->wb, mdtc->bg_thresh))
1962 return true;
1963 }
1964
1965 return false;
1966}
1967
1968
1969
1970
1971int dirty_writeback_centisecs_handler(struct ctl_table *table, int write,
1972 void __user *buffer, size_t *length, loff_t *ppos)
1973{
1974 unsigned int old_interval = dirty_writeback_interval;
1975 int ret;
1976
1977 ret = proc_dointvec(table, write, buffer, length, ppos);
1978
1979
1980
1981
1982
1983
1984
1985
1986 if (!ret && write && dirty_writeback_interval &&
1987 dirty_writeback_interval != old_interval)
1988 wakeup_flusher_threads(WB_REASON_PERIODIC);
1989
1990 return ret;
1991}
1992
1993#ifdef CONFIG_BLOCK
1994void laptop_mode_timer_fn(struct timer_list *t)
1995{
1996 struct backing_dev_info *backing_dev_info =
1997 from_timer(backing_dev_info, t, laptop_mode_wb_timer);
1998
1999 wakeup_flusher_threads_bdi(backing_dev_info, WB_REASON_LAPTOP_TIMER);
2000}
2001
2002
2003
2004
2005
2006
2007void laptop_io_completion(struct backing_dev_info *info)
2008{
2009 mod_timer(&info->laptop_mode_wb_timer, jiffies + laptop_mode);
2010}
2011
2012
2013
2014
2015
2016
2017void laptop_sync_completion(void)
2018{
2019 struct backing_dev_info *bdi;
2020
2021 rcu_read_lock();
2022
2023 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list)
2024 del_timer(&bdi->laptop_mode_wb_timer);
2025
2026 rcu_read_unlock();
2027}
2028#endif
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041void writeback_set_ratelimit(void)
2042{
2043 struct wb_domain *dom = &global_wb_domain;
2044 unsigned long background_thresh;
2045 unsigned long dirty_thresh;
2046
2047 global_dirty_limits(&background_thresh, &dirty_thresh);
2048 dom->dirty_limit = dirty_thresh;
2049 ratelimit_pages = dirty_thresh / (num_online_cpus() * 32);
2050 if (ratelimit_pages < 16)
2051 ratelimit_pages = 16;
2052}
2053
2054static int page_writeback_cpu_online(unsigned int cpu)
2055{
2056 writeback_set_ratelimit();
2057 return 0;
2058}
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078void __init page_writeback_init(void)
2079{
2080 BUG_ON(wb_domain_init(&global_wb_domain, GFP_KERNEL));
2081
2082 cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mm/writeback:online",
2083 page_writeback_cpu_online, NULL);
2084 cpuhp_setup_state(CPUHP_MM_WRITEBACK_DEAD, "mm/writeback:dead", NULL,
2085 page_writeback_cpu_online);
2086}
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102void tag_pages_for_writeback(struct address_space *mapping,
2103 pgoff_t start, pgoff_t end)
2104{
2105 XA_STATE(xas, &mapping->i_pages, start);
2106 unsigned int tagged = 0;
2107 void *page;
2108
2109 xas_lock_irq(&xas);
2110 xas_for_each_marked(&xas, page, end, PAGECACHE_TAG_DIRTY) {
2111 xas_set_mark(&xas, PAGECACHE_TAG_TOWRITE);
2112 if (++tagged % XA_CHECK_SCHED)
2113 continue;
2114
2115 xas_pause(&xas);
2116 xas_unlock_irq(&xas);
2117 cond_resched();
2118 xas_lock_irq(&xas);
2119 }
2120 xas_unlock_irq(&xas);
2121}
2122EXPORT_SYMBOL(tag_pages_for_writeback);
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155int write_cache_pages(struct address_space *mapping,
2156 struct writeback_control *wbc, writepage_t writepage,
2157 void *data)
2158{
2159 int ret = 0;
2160 int done = 0;
2161 int error;
2162 struct pagevec pvec;
2163 int nr_pages;
2164 pgoff_t uninitialized_var(writeback_index);
2165 pgoff_t index;
2166 pgoff_t end;
2167 pgoff_t done_index;
2168 int range_whole = 0;
2169 xa_mark_t tag;
2170
2171 pagevec_init(&pvec);
2172 if (wbc->range_cyclic) {
2173 writeback_index = mapping->writeback_index;
2174 index = writeback_index;
2175 end = -1;
2176 } else {
2177 index = wbc->range_start >> PAGE_SHIFT;
2178 end = wbc->range_end >> PAGE_SHIFT;
2179 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2180 range_whole = 1;
2181 }
2182 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2183 tag = PAGECACHE_TAG_TOWRITE;
2184 else
2185 tag = PAGECACHE_TAG_DIRTY;
2186 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2187 tag_pages_for_writeback(mapping, index, end);
2188 done_index = index;
2189 while (!done && (index <= end)) {
2190 int i;
2191
2192 nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
2193 tag);
2194 if (nr_pages == 0)
2195 break;
2196
2197 for (i = 0; i < nr_pages; i++) {
2198 struct page *page = pvec.pages[i];
2199
2200 done_index = page->index;
2201
2202 lock_page(page);
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212 if (unlikely(page->mapping != mapping)) {
2213continue_unlock:
2214 unlock_page(page);
2215 continue;
2216 }
2217
2218 if (!PageDirty(page)) {
2219
2220 goto continue_unlock;
2221 }
2222
2223 if (PageWriteback(page)) {
2224 if (wbc->sync_mode != WB_SYNC_NONE)
2225 wait_on_page_writeback(page);
2226 else
2227 goto continue_unlock;
2228 }
2229
2230 BUG_ON(PageWriteback(page));
2231 if (!clear_page_dirty_for_io(page))
2232 goto continue_unlock;
2233
2234 trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
2235 error = (*writepage)(page, wbc, data);
2236 if (unlikely(error)) {
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249 if (error == AOP_WRITEPAGE_ACTIVATE) {
2250 unlock_page(page);
2251 error = 0;
2252 } else if (wbc->sync_mode != WB_SYNC_ALL) {
2253 ret = error;
2254 done_index = page->index + 1;
2255 done = 1;
2256 break;
2257 }
2258 if (!ret)
2259 ret = error;
2260 }
2261
2262
2263
2264
2265
2266
2267
2268 if (--wbc->nr_to_write <= 0 &&
2269 wbc->sync_mode == WB_SYNC_NONE) {
2270 done = 1;
2271 break;
2272 }
2273 }
2274 pagevec_release(&pvec);
2275 cond_resched();
2276 }
2277
2278
2279
2280
2281
2282
2283 if (wbc->range_cyclic && !done)
2284 done_index = 0;
2285 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2286 mapping->writeback_index = done_index;
2287
2288 return ret;
2289}
2290EXPORT_SYMBOL(write_cache_pages);
2291
2292
2293
2294
2295
2296static int __writepage(struct page *page, struct writeback_control *wbc,
2297 void *data)
2298{
2299 struct address_space *mapping = data;
2300 int ret = mapping->a_ops->writepage(page, wbc);
2301 mapping_set_error(mapping, ret);
2302 return ret;
2303}
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315int generic_writepages(struct address_space *mapping,
2316 struct writeback_control *wbc)
2317{
2318 struct blk_plug plug;
2319 int ret;
2320
2321
2322 if (!mapping->a_ops->writepage)
2323 return 0;
2324
2325 blk_start_plug(&plug);
2326 ret = write_cache_pages(mapping, wbc, __writepage, mapping);
2327 blk_finish_plug(&plug);
2328 return ret;
2329}
2330
2331EXPORT_SYMBOL(generic_writepages);
2332
2333int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
2334{
2335 int ret;
2336
2337 if (wbc->nr_to_write <= 0)
2338 return 0;
2339 while (1) {
2340 if (mapping->a_ops->writepages)
2341 ret = mapping->a_ops->writepages(mapping, wbc);
2342 else
2343 ret = generic_writepages(mapping, wbc);
2344 if ((ret != -ENOMEM) || (wbc->sync_mode != WB_SYNC_ALL))
2345 break;
2346 cond_resched();
2347 congestion_wait(BLK_RW_ASYNC, HZ/50);
2348 }
2349 return ret;
2350}
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363int write_one_page(struct page *page)
2364{
2365 struct address_space *mapping = page->mapping;
2366 int ret = 0;
2367 struct writeback_control wbc = {
2368 .sync_mode = WB_SYNC_ALL,
2369 .nr_to_write = 1,
2370 };
2371
2372 BUG_ON(!PageLocked(page));
2373
2374 wait_on_page_writeback(page);
2375
2376 if (clear_page_dirty_for_io(page)) {
2377 get_page(page);
2378 ret = mapping->a_ops->writepage(page, &wbc);
2379 if (ret == 0)
2380 wait_on_page_writeback(page);
2381 put_page(page);
2382 } else {
2383 unlock_page(page);
2384 }
2385
2386 if (!ret)
2387 ret = filemap_check_errors(mapping);
2388 return ret;
2389}
2390EXPORT_SYMBOL(write_one_page);
2391
2392
2393
2394
2395int __set_page_dirty_no_writeback(struct page *page)
2396{
2397 if (!PageDirty(page))
2398 return !TestSetPageDirty(page);
2399 return 0;
2400}
2401
2402
2403
2404
2405
2406
2407
2408
2409void account_page_dirtied(struct page *page, struct address_space *mapping)
2410{
2411 struct inode *inode = mapping->host;
2412
2413 trace_writeback_dirty_page(page, mapping);
2414
2415 if (mapping_cap_account_dirty(mapping)) {
2416 struct bdi_writeback *wb;
2417
2418 inode_attach_wb(inode, page);
2419 wb = inode_to_wb(inode);
2420
2421 __inc_lruvec_page_state(page, NR_FILE_DIRTY);
2422 __inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2423 __inc_node_page_state(page, NR_DIRTIED);
2424 inc_wb_stat(wb, WB_RECLAIMABLE);
2425 inc_wb_stat(wb, WB_DIRTIED);
2426 task_io_account_write(PAGE_SIZE);
2427 current->nr_dirtied++;
2428 this_cpu_inc(bdp_ratelimits);
2429 }
2430}
2431EXPORT_SYMBOL(account_page_dirtied);
2432
2433
2434
2435
2436
2437
2438void account_page_cleaned(struct page *page, struct address_space *mapping,
2439 struct bdi_writeback *wb)
2440{
2441 if (mapping_cap_account_dirty(mapping)) {
2442 dec_lruvec_page_state(page, NR_FILE_DIRTY);
2443 dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2444 dec_wb_stat(wb, WB_RECLAIMABLE);
2445 task_io_account_cancelled_write(PAGE_SIZE);
2446 }
2447}
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461int __set_page_dirty_nobuffers(struct page *page)
2462{
2463 lock_page_memcg(page);
2464 if (!TestSetPageDirty(page)) {
2465 struct address_space *mapping = page_mapping(page);
2466 unsigned long flags;
2467
2468 if (!mapping) {
2469 unlock_page_memcg(page);
2470 return 1;
2471 }
2472
2473 xa_lock_irqsave(&mapping->i_pages, flags);
2474 BUG_ON(page_mapping(page) != mapping);
2475 WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
2476 account_page_dirtied(page, mapping);
2477 __xa_set_mark(&mapping->i_pages, page_index(page),
2478 PAGECACHE_TAG_DIRTY);
2479 xa_unlock_irqrestore(&mapping->i_pages, flags);
2480 unlock_page_memcg(page);
2481
2482 if (mapping->host) {
2483
2484 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
2485 }
2486 return 1;
2487 }
2488 unlock_page_memcg(page);
2489 return 0;
2490}
2491EXPORT_SYMBOL(__set_page_dirty_nobuffers);
2492
2493
2494
2495
2496
2497
2498
2499
2500void account_page_redirty(struct page *page)
2501{
2502 struct address_space *mapping = page->mapping;
2503
2504 if (mapping && mapping_cap_account_dirty(mapping)) {
2505 struct inode *inode = mapping->host;
2506 struct bdi_writeback *wb;
2507 struct wb_lock_cookie cookie = {};
2508
2509 wb = unlocked_inode_to_wb_begin(inode, &cookie);
2510 current->nr_dirtied--;
2511 dec_node_page_state(page, NR_DIRTIED);
2512 dec_wb_stat(wb, WB_DIRTIED);
2513 unlocked_inode_to_wb_end(inode, &cookie);
2514 }
2515}
2516EXPORT_SYMBOL(account_page_redirty);
2517
2518
2519
2520
2521
2522
2523int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page)
2524{
2525 int ret;
2526
2527 wbc->pages_skipped++;
2528 ret = __set_page_dirty_nobuffers(page);
2529 account_page_redirty(page);
2530 return ret;
2531}
2532EXPORT_SYMBOL(redirty_page_for_writepage);
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545int set_page_dirty(struct page *page)
2546{
2547 struct address_space *mapping = page_mapping(page);
2548
2549 page = compound_head(page);
2550 if (likely(mapping)) {
2551 int (*spd)(struct page *) = mapping->a_ops->set_page_dirty;
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562 if (PageReclaim(page))
2563 ClearPageReclaim(page);
2564#ifdef CONFIG_BLOCK
2565 if (!spd)
2566 spd = __set_page_dirty_buffers;
2567#endif
2568 return (*spd)(page);
2569 }
2570 if (!PageDirty(page)) {
2571 if (!TestSetPageDirty(page))
2572 return 1;
2573 }
2574 return 0;
2575}
2576EXPORT_SYMBOL(set_page_dirty);
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588int set_page_dirty_lock(struct page *page)
2589{
2590 int ret;
2591
2592 lock_page(page);
2593 ret = set_page_dirty(page);
2594 unlock_page(page);
2595 return ret;
2596}
2597EXPORT_SYMBOL(set_page_dirty_lock);
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612void __cancel_dirty_page(struct page *page)
2613{
2614 struct address_space *mapping = page_mapping(page);
2615
2616 if (mapping_cap_account_dirty(mapping)) {
2617 struct inode *inode = mapping->host;
2618 struct bdi_writeback *wb;
2619 struct wb_lock_cookie cookie = {};
2620
2621 lock_page_memcg(page);
2622 wb = unlocked_inode_to_wb_begin(inode, &cookie);
2623
2624 if (TestClearPageDirty(page))
2625 account_page_cleaned(page, mapping, wb);
2626
2627 unlocked_inode_to_wb_end(inode, &cookie);
2628 unlock_page_memcg(page);
2629 } else {
2630 ClearPageDirty(page);
2631 }
2632}
2633EXPORT_SYMBOL(__cancel_dirty_page);
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649int clear_page_dirty_for_io(struct page *page)
2650{
2651 struct address_space *mapping = page_mapping(page);
2652 int ret = 0;
2653
2654 BUG_ON(!PageLocked(page));
2655
2656 if (mapping && mapping_cap_account_dirty(mapping)) {
2657 struct inode *inode = mapping->host;
2658 struct bdi_writeback *wb;
2659 struct wb_lock_cookie cookie = {};
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686 if (page_mkclean(page))
2687 set_page_dirty(page);
2688
2689
2690
2691
2692
2693
2694
2695
2696 wb = unlocked_inode_to_wb_begin(inode, &cookie);
2697 if (TestClearPageDirty(page)) {
2698 dec_lruvec_page_state(page, NR_FILE_DIRTY);
2699 dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2700 dec_wb_stat(wb, WB_RECLAIMABLE);
2701 ret = 1;
2702 }
2703 unlocked_inode_to_wb_end(inode, &cookie);
2704 return ret;
2705 }
2706 return TestClearPageDirty(page);
2707}
2708EXPORT_SYMBOL(clear_page_dirty_for_io);
2709
2710int test_clear_page_writeback(struct page *page)
2711{
2712 struct address_space *mapping = page_mapping(page);
2713 struct mem_cgroup *memcg;
2714 struct lruvec *lruvec;
2715 int ret;
2716
2717 memcg = lock_page_memcg(page);
2718 lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
2719 if (mapping && mapping_use_writeback_tags(mapping)) {
2720 struct inode *inode = mapping->host;
2721 struct backing_dev_info *bdi = inode_to_bdi(inode);
2722 unsigned long flags;
2723
2724 xa_lock_irqsave(&mapping->i_pages, flags);
2725 ret = TestClearPageWriteback(page);
2726 if (ret) {
2727 __xa_clear_mark(&mapping->i_pages, page_index(page),
2728 PAGECACHE_TAG_WRITEBACK);
2729 if (bdi_cap_account_writeback(bdi)) {
2730 struct bdi_writeback *wb = inode_to_wb(inode);
2731
2732 dec_wb_stat(wb, WB_WRITEBACK);
2733 __wb_writeout_inc(wb);
2734 }
2735 }
2736
2737 if (mapping->host && !mapping_tagged(mapping,
2738 PAGECACHE_TAG_WRITEBACK))
2739 sb_clear_inode_writeback(mapping->host);
2740
2741 xa_unlock_irqrestore(&mapping->i_pages, flags);
2742 } else {
2743 ret = TestClearPageWriteback(page);
2744 }
2745
2746
2747
2748
2749
2750
2751 if (ret) {
2752 dec_lruvec_state(lruvec, NR_WRITEBACK);
2753 dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2754 inc_node_page_state(page, NR_WRITTEN);
2755 }
2756 __unlock_page_memcg(memcg);
2757 return ret;
2758}
2759
2760int __test_set_page_writeback(struct page *page, bool keep_write)
2761{
2762 struct address_space *mapping = page_mapping(page);
2763 int ret;
2764
2765 lock_page_memcg(page);
2766 if (mapping && mapping_use_writeback_tags(mapping)) {
2767 XA_STATE(xas, &mapping->i_pages, page_index(page));
2768 struct inode *inode = mapping->host;
2769 struct backing_dev_info *bdi = inode_to_bdi(inode);
2770 unsigned long flags;
2771
2772 xas_lock_irqsave(&xas, flags);
2773 xas_load(&xas);
2774 ret = TestSetPageWriteback(page);
2775 if (!ret) {
2776 bool on_wblist;
2777
2778 on_wblist = mapping_tagged(mapping,
2779 PAGECACHE_TAG_WRITEBACK);
2780
2781 xas_set_mark(&xas, PAGECACHE_TAG_WRITEBACK);
2782 if (bdi_cap_account_writeback(bdi))
2783 inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
2784
2785
2786
2787
2788
2789
2790 if (mapping->host && !on_wblist)
2791 sb_mark_inode_writeback(mapping->host);
2792 }
2793 if (!PageDirty(page))
2794 xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY);
2795 if (!keep_write)
2796 xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE);
2797 xas_unlock_irqrestore(&xas, flags);
2798 } else {
2799 ret = TestSetPageWriteback(page);
2800 }
2801 if (!ret) {
2802 inc_lruvec_page_state(page, NR_WRITEBACK);
2803 inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2804 }
2805 unlock_page_memcg(page);
2806 return ret;
2807
2808}
2809EXPORT_SYMBOL(__test_set_page_writeback);
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819void wait_for_stable_page(struct page *page)
2820{
2821 if (bdi_cap_stable_pages_required(inode_to_bdi(page->mapping->host)))
2822 wait_on_page_writeback(page);
2823}
2824EXPORT_SYMBOL_GPL(wait_for_stable_page);
2825