1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <linux/kernel.h>
15#include <linux/export.h>
16#include <linux/spinlock.h>
17#include <linux/fs.h>
18#include <linux/mm.h>
19#include <linux/swap.h>
20#include <linux/slab.h>
21#include <linux/pagemap.h>
22#include <linux/writeback.h>
23#include <linux/init.h>
24#include <linux/backing-dev.h>
25#include <linux/task_io_accounting_ops.h>
26#include <linux/blkdev.h>
27#include <linux/mpage.h>
28#include <linux/rmap.h>
29#include <linux/percpu.h>
30#include <linux/notifier.h>
31#include <linux/smp.h>
32#include <linux/sysctl.h>
33#include <linux/cpu.h>
34#include <linux/syscalls.h>
35#include <linux/buffer_head.h>
36#include <linux/pagevec.h>
37#include <linux/timer.h>
38#include <linux/sched/rt.h>
39#include <linux/sched/signal.h>
40#include <linux/mm_inline.h>
41#include <trace/events/writeback.h>
42
43#include "internal.h"
44
45
46
47
48#define MAX_PAUSE max(HZ/5, 1)
49
50
51
52
53
54#define DIRTY_POLL_THRESH (128 >> (PAGE_SHIFT - 10))
55
56
57
58
59#define BANDWIDTH_INTERVAL max(HZ/5, 1)
60
61#define RATELIMIT_CALC_SHIFT 10
62
63
64
65
66
67static long ratelimit_pages = 32;
68
69
70
71
72
73
74int dirty_background_ratio = 10;
75
76
77
78
79
80unsigned long dirty_background_bytes;
81
82
83
84
85
86int vm_highmem_is_dirtyable;
87
88
89
90
91int vm_dirty_ratio = 20;
92
93
94
95
96
97unsigned long vm_dirty_bytes;
98
99
100
101
102unsigned int dirty_writeback_interval = 5 * 100;
103
104EXPORT_SYMBOL_GPL(dirty_writeback_interval);
105
106
107
108
109unsigned int dirty_expire_interval = 30 * 100;
110
111
112
113
114int block_dump;
115
116
117
118
119
120int laptop_mode;
121
122EXPORT_SYMBOL(laptop_mode);
123
124
125
126struct wb_domain global_wb_domain;
127
128
129struct dirty_throttle_control {
130#ifdef CONFIG_CGROUP_WRITEBACK
131 struct wb_domain *dom;
132 struct dirty_throttle_control *gdtc;
133#endif
134 struct bdi_writeback *wb;
135 struct fprop_local_percpu *wb_completions;
136
137 unsigned long avail;
138 unsigned long dirty;
139 unsigned long thresh;
140 unsigned long bg_thresh;
141
142 unsigned long wb_dirty;
143 unsigned long wb_thresh;
144 unsigned long wb_bg_thresh;
145
146 unsigned long pos_ratio;
147};
148
149
150
151
152
153
154#define VM_COMPLETIONS_PERIOD_LEN (3*HZ)
155
156#ifdef CONFIG_CGROUP_WRITEBACK
157
158#define GDTC_INIT(__wb) .wb = (__wb), \
159 .dom = &global_wb_domain, \
160 .wb_completions = &(__wb)->completions
161
162#define GDTC_INIT_NO_WB .dom = &global_wb_domain
163
164#define MDTC_INIT(__wb, __gdtc) .wb = (__wb), \
165 .dom = mem_cgroup_wb_domain(__wb), \
166 .wb_completions = &(__wb)->memcg_completions, \
167 .gdtc = __gdtc
168
169static bool mdtc_valid(struct dirty_throttle_control *dtc)
170{
171 return dtc->dom;
172}
173
174static struct wb_domain *dtc_dom(struct dirty_throttle_control *dtc)
175{
176 return dtc->dom;
177}
178
179static struct dirty_throttle_control *mdtc_gdtc(struct dirty_throttle_control *mdtc)
180{
181 return mdtc->gdtc;
182}
183
184static struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb)
185{
186 return &wb->memcg_completions;
187}
188
189static void wb_min_max_ratio(struct bdi_writeback *wb,
190 unsigned long *minp, unsigned long *maxp)
191{
192 unsigned long this_bw = wb->avg_write_bandwidth;
193 unsigned long tot_bw = atomic_long_read(&wb->bdi->tot_write_bandwidth);
194 unsigned long long min = wb->bdi->min_ratio;
195 unsigned long long max = wb->bdi->max_ratio;
196
197
198
199
200
201 if (this_bw < tot_bw) {
202 if (min) {
203 min *= this_bw;
204 do_div(min, tot_bw);
205 }
206 if (max < 100) {
207 max *= this_bw;
208 do_div(max, tot_bw);
209 }
210 }
211
212 *minp = min;
213 *maxp = max;
214}
215
216#else
217
218#define GDTC_INIT(__wb) .wb = (__wb), \
219 .wb_completions = &(__wb)->completions
220#define GDTC_INIT_NO_WB
221#define MDTC_INIT(__wb, __gdtc)
222
223static bool mdtc_valid(struct dirty_throttle_control *dtc)
224{
225 return false;
226}
227
228static struct wb_domain *dtc_dom(struct dirty_throttle_control *dtc)
229{
230 return &global_wb_domain;
231}
232
233static struct dirty_throttle_control *mdtc_gdtc(struct dirty_throttle_control *mdtc)
234{
235 return NULL;
236}
237
238static struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb)
239{
240 return NULL;
241}
242
243static void wb_min_max_ratio(struct bdi_writeback *wb,
244 unsigned long *minp, unsigned long *maxp)
245{
246 *minp = wb->bdi->min_ratio;
247 *maxp = wb->bdi->max_ratio;
248}
249
250#endif
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277static unsigned long node_dirtyable_memory(struct pglist_data *pgdat)
278{
279 unsigned long nr_pages = 0;
280 int z;
281
282 for (z = 0; z < MAX_NR_ZONES; z++) {
283 struct zone *zone = pgdat->node_zones + z;
284
285 if (!populated_zone(zone))
286 continue;
287
288 nr_pages += zone_page_state(zone, NR_FREE_PAGES);
289 }
290
291
292
293
294
295
296 nr_pages -= min(nr_pages, pgdat->totalreserve_pages);
297
298 nr_pages += node_page_state(pgdat, NR_INACTIVE_FILE);
299 nr_pages += node_page_state(pgdat, NR_ACTIVE_FILE);
300
301 return nr_pages;
302}
303
304static unsigned long highmem_dirtyable_memory(unsigned long total)
305{
306#ifdef CONFIG_HIGHMEM
307 int node;
308 unsigned long x = 0;
309 int i;
310
311 for_each_node_state(node, N_HIGH_MEMORY) {
312 for (i = ZONE_NORMAL + 1; i < MAX_NR_ZONES; i++) {
313 struct zone *z;
314 unsigned long nr_pages;
315
316 if (!is_highmem_idx(i))
317 continue;
318
319 z = &NODE_DATA(node)->node_zones[i];
320 if (!populated_zone(z))
321 continue;
322
323 nr_pages = zone_page_state(z, NR_FREE_PAGES);
324
325 nr_pages -= min(nr_pages, high_wmark_pages(z));
326 nr_pages += zone_page_state(z, NR_ZONE_INACTIVE_FILE);
327 nr_pages += zone_page_state(z, NR_ZONE_ACTIVE_FILE);
328 x += nr_pages;
329 }
330 }
331
332
333
334
335
336
337
338
339
340
341 if ((long)x < 0)
342 x = 0;
343
344
345
346
347
348
349
350 return min(x, total);
351#else
352 return 0;
353#endif
354}
355
356
357
358
359
360
361
362static unsigned long global_dirtyable_memory(void)
363{
364 unsigned long x;
365
366 x = global_zone_page_state(NR_FREE_PAGES);
367
368
369
370
371
372 x -= min(x, totalreserve_pages);
373
374 x += global_node_page_state(NR_INACTIVE_FILE);
375 x += global_node_page_state(NR_ACTIVE_FILE);
376
377 if (!vm_highmem_is_dirtyable)
378 x -= highmem_dirtyable_memory(x);
379
380 return x + 1;
381}
382
383
384
385
386
387
388
389
390
391
392
393static void domain_dirty_limits(struct dirty_throttle_control *dtc)
394{
395 const unsigned long available_memory = dtc->avail;
396 struct dirty_throttle_control *gdtc = mdtc_gdtc(dtc);
397 unsigned long bytes = vm_dirty_bytes;
398 unsigned long bg_bytes = dirty_background_bytes;
399
400 unsigned long ratio = (vm_dirty_ratio * PAGE_SIZE) / 100;
401 unsigned long bg_ratio = (dirty_background_ratio * PAGE_SIZE) / 100;
402 unsigned long thresh;
403 unsigned long bg_thresh;
404 struct task_struct *tsk;
405
406
407 if (gdtc) {
408 unsigned long global_avail = gdtc->avail;
409
410
411
412
413
414
415
416
417 if (bytes)
418 ratio = min(DIV_ROUND_UP(bytes, global_avail),
419 PAGE_SIZE);
420 if (bg_bytes)
421 bg_ratio = min(DIV_ROUND_UP(bg_bytes, global_avail),
422 PAGE_SIZE);
423 bytes = bg_bytes = 0;
424 }
425
426 if (bytes)
427 thresh = DIV_ROUND_UP(bytes, PAGE_SIZE);
428 else
429 thresh = (ratio * available_memory) / PAGE_SIZE;
430
431 if (bg_bytes)
432 bg_thresh = DIV_ROUND_UP(bg_bytes, PAGE_SIZE);
433 else
434 bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE;
435
436 if (bg_thresh >= thresh)
437 bg_thresh = thresh / 2;
438 tsk = current;
439 if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) {
440 bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32;
441 thresh += thresh / 4 + global_wb_domain.dirty_limit / 32;
442 }
443 dtc->thresh = thresh;
444 dtc->bg_thresh = bg_thresh;
445
446
447 if (!gdtc)
448 trace_global_dirty_state(bg_thresh, thresh);
449}
450
451
452
453
454
455
456
457
458
459void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
460{
461 struct dirty_throttle_control gdtc = { GDTC_INIT_NO_WB };
462
463 gdtc.avail = global_dirtyable_memory();
464 domain_dirty_limits(&gdtc);
465
466 *pbackground = gdtc.bg_thresh;
467 *pdirty = gdtc.thresh;
468}
469
470
471
472
473
474
475
476
477static unsigned long node_dirty_limit(struct pglist_data *pgdat)
478{
479 unsigned long node_memory = node_dirtyable_memory(pgdat);
480 struct task_struct *tsk = current;
481 unsigned long dirty;
482
483 if (vm_dirty_bytes)
484 dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE) *
485 node_memory / global_dirtyable_memory();
486 else
487 dirty = vm_dirty_ratio * node_memory / 100;
488
489 if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk))
490 dirty += dirty / 4;
491
492 return dirty;
493}
494
495
496
497
498
499
500
501
502bool node_dirty_ok(struct pglist_data *pgdat)
503{
504 unsigned long limit = node_dirty_limit(pgdat);
505 unsigned long nr_pages = 0;
506
507 nr_pages += node_page_state(pgdat, NR_FILE_DIRTY);
508 nr_pages += node_page_state(pgdat, NR_UNSTABLE_NFS);
509 nr_pages += node_page_state(pgdat, NR_WRITEBACK);
510
511 return nr_pages <= limit;
512}
513
514int dirty_background_ratio_handler(struct ctl_table *table, int write,
515 void __user *buffer, size_t *lenp,
516 loff_t *ppos)
517{
518 int ret;
519
520 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
521 if (ret == 0 && write)
522 dirty_background_bytes = 0;
523 return ret;
524}
525
526int dirty_background_bytes_handler(struct ctl_table *table, int write,
527 void __user *buffer, size_t *lenp,
528 loff_t *ppos)
529{
530 int ret;
531
532 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
533 if (ret == 0 && write)
534 dirty_background_ratio = 0;
535 return ret;
536}
537
538int dirty_ratio_handler(struct ctl_table *table, int write,
539 void __user *buffer, size_t *lenp,
540 loff_t *ppos)
541{
542 int old_ratio = vm_dirty_ratio;
543 int ret;
544
545 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
546 if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
547 writeback_set_ratelimit();
548 vm_dirty_bytes = 0;
549 }
550 return ret;
551}
552
553int dirty_bytes_handler(struct ctl_table *table, int write,
554 void __user *buffer, size_t *lenp,
555 loff_t *ppos)
556{
557 unsigned long old_bytes = vm_dirty_bytes;
558 int ret;
559
560 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
561 if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
562 writeback_set_ratelimit();
563 vm_dirty_ratio = 0;
564 }
565 return ret;
566}
567
568static unsigned long wp_next_time(unsigned long cur_time)
569{
570 cur_time += VM_COMPLETIONS_PERIOD_LEN;
571
572 if (!cur_time)
573 return 1;
574 return cur_time;
575}
576
577static void wb_domain_writeout_inc(struct wb_domain *dom,
578 struct fprop_local_percpu *completions,
579 unsigned int max_prop_frac)
580{
581 __fprop_inc_percpu_max(&dom->completions, completions,
582 max_prop_frac);
583
584 if (unlikely(!dom->period_time)) {
585
586
587
588
589
590
591 dom->period_time = wp_next_time(jiffies);
592 mod_timer(&dom->period_timer, dom->period_time);
593 }
594}
595
596
597
598
599
600static inline void __wb_writeout_inc(struct bdi_writeback *wb)
601{
602 struct wb_domain *cgdom;
603
604 inc_wb_stat(wb, WB_WRITTEN);
605 wb_domain_writeout_inc(&global_wb_domain, &wb->completions,
606 wb->bdi->max_prop_frac);
607
608 cgdom = mem_cgroup_wb_domain(wb);
609 if (cgdom)
610 wb_domain_writeout_inc(cgdom, wb_memcg_completions(wb),
611 wb->bdi->max_prop_frac);
612}
613
614void wb_writeout_inc(struct bdi_writeback *wb)
615{
616 unsigned long flags;
617
618 local_irq_save(flags);
619 __wb_writeout_inc(wb);
620 local_irq_restore(flags);
621}
622EXPORT_SYMBOL_GPL(wb_writeout_inc);
623
624
625
626
627
628static void writeout_period(unsigned long t)
629{
630 struct wb_domain *dom = (void *)t;
631 int miss_periods = (jiffies - dom->period_time) /
632 VM_COMPLETIONS_PERIOD_LEN;
633
634 if (fprop_new_period(&dom->completions, miss_periods + 1)) {
635 dom->period_time = wp_next_time(dom->period_time +
636 miss_periods * VM_COMPLETIONS_PERIOD_LEN);
637 mod_timer(&dom->period_timer, dom->period_time);
638 } else {
639
640
641
642
643 dom->period_time = 0;
644 }
645}
646
647int wb_domain_init(struct wb_domain *dom, gfp_t gfp)
648{
649 memset(dom, 0, sizeof(*dom));
650
651 spin_lock_init(&dom->lock);
652
653 setup_deferrable_timer(&dom->period_timer, writeout_period,
654 (unsigned long)dom);
655
656 dom->dirty_limit_tstamp = jiffies;
657
658 return fprop_global_init(&dom->completions, gfp);
659}
660
661#ifdef CONFIG_CGROUP_WRITEBACK
662void wb_domain_exit(struct wb_domain *dom)
663{
664 del_timer_sync(&dom->period_timer);
665 fprop_global_destroy(&dom->completions);
666}
667#endif
668
669
670
671
672
673
674static unsigned int bdi_min_ratio;
675
676int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
677{
678 int ret = 0;
679
680 spin_lock_bh(&bdi_lock);
681 if (min_ratio > bdi->max_ratio) {
682 ret = -EINVAL;
683 } else {
684 min_ratio -= bdi->min_ratio;
685 if (bdi_min_ratio + min_ratio < 100) {
686 bdi_min_ratio += min_ratio;
687 bdi->min_ratio += min_ratio;
688 } else {
689 ret = -EINVAL;
690 }
691 }
692 spin_unlock_bh(&bdi_lock);
693
694 return ret;
695}
696
697int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
698{
699 int ret = 0;
700
701 if (max_ratio > 100)
702 return -EINVAL;
703
704 spin_lock_bh(&bdi_lock);
705 if (bdi->min_ratio > max_ratio) {
706 ret = -EINVAL;
707 } else {
708 bdi->max_ratio = max_ratio;
709 bdi->max_prop_frac = (FPROP_FRAC_BASE * max_ratio) / 100;
710 }
711 spin_unlock_bh(&bdi_lock);
712
713 return ret;
714}
715EXPORT_SYMBOL(bdi_set_max_ratio);
716
717static unsigned long dirty_freerun_ceiling(unsigned long thresh,
718 unsigned long bg_thresh)
719{
720 return (thresh + bg_thresh) / 2;
721}
722
723static unsigned long hard_dirty_limit(struct wb_domain *dom,
724 unsigned long thresh)
725{
726 return max(thresh, dom->dirty_limit);
727}
728
729
730
731
732
733static void mdtc_calc_avail(struct dirty_throttle_control *mdtc,
734 unsigned long filepages, unsigned long headroom)
735{
736 struct dirty_throttle_control *gdtc = mdtc_gdtc(mdtc);
737 unsigned long clean = filepages - min(filepages, mdtc->dirty);
738 unsigned long global_clean = gdtc->avail - min(gdtc->avail, gdtc->dirty);
739 unsigned long other_clean = global_clean - min(global_clean, clean);
740
741 mdtc->avail = filepages + min(headroom, other_clean);
742}
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc)
766{
767 struct wb_domain *dom = dtc_dom(dtc);
768 unsigned long thresh = dtc->thresh;
769 u64 wb_thresh;
770 long numerator, denominator;
771 unsigned long wb_min_ratio, wb_max_ratio;
772
773
774
775
776 fprop_fraction_percpu(&dom->completions, dtc->wb_completions,
777 &numerator, &denominator);
778
779 wb_thresh = (thresh * (100 - bdi_min_ratio)) / 100;
780 wb_thresh *= numerator;
781 do_div(wb_thresh, denominator);
782
783 wb_min_max_ratio(dtc->wb, &wb_min_ratio, &wb_max_ratio);
784
785 wb_thresh += (thresh * wb_min_ratio) / 100;
786 if (wb_thresh > (thresh * wb_max_ratio) / 100)
787 wb_thresh = thresh * wb_max_ratio / 100;
788
789 return wb_thresh;
790}
791
792unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh)
793{
794 struct dirty_throttle_control gdtc = { GDTC_INIT(wb),
795 .thresh = thresh };
796 return __wb_calc_thresh(&gdtc);
797}
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813static long long pos_ratio_polynom(unsigned long setpoint,
814 unsigned long dirty,
815 unsigned long limit)
816{
817 long long pos_ratio;
818 long x;
819
820 x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
821 (limit - setpoint) | 1);
822 pos_ratio = x;
823 pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
824 pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
825 pos_ratio += 1 << RATELIMIT_CALC_SHIFT;
826
827 return clamp(pos_ratio, 0LL, 2LL << RATELIMIT_CALC_SHIFT);
828}
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905static void wb_position_ratio(struct dirty_throttle_control *dtc)
906{
907 struct bdi_writeback *wb = dtc->wb;
908 unsigned long write_bw = wb->avg_write_bandwidth;
909 unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh);
910 unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
911 unsigned long wb_thresh = dtc->wb_thresh;
912 unsigned long x_intercept;
913 unsigned long setpoint;
914 unsigned long wb_setpoint;
915 unsigned long span;
916 long long pos_ratio;
917 long x;
918
919 dtc->pos_ratio = 0;
920
921 if (unlikely(dtc->dirty >= limit))
922 return;
923
924
925
926
927
928
929 setpoint = (freerun + limit) / 2;
930 pos_ratio = pos_ratio_polynom(setpoint, dtc->dirty, limit);
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957 if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
958 long long wb_pos_ratio;
959
960 if (dtc->wb_dirty < 8) {
961 dtc->pos_ratio = min_t(long long, pos_ratio * 2,
962 2 << RATELIMIT_CALC_SHIFT);
963 return;
964 }
965
966 if (dtc->wb_dirty >= wb_thresh)
967 return;
968
969 wb_setpoint = dirty_freerun_ceiling(wb_thresh,
970 dtc->wb_bg_thresh);
971
972 if (wb_setpoint == 0 || wb_setpoint == wb_thresh)
973 return;
974
975 wb_pos_ratio = pos_ratio_polynom(wb_setpoint, dtc->wb_dirty,
976 wb_thresh);
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999 dtc->pos_ratio = min(pos_ratio, wb_pos_ratio);
1000 return;
1001 }
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034 if (unlikely(wb_thresh > dtc->thresh))
1035 wb_thresh = dtc->thresh;
1036
1037
1038
1039
1040
1041
1042
1043 wb_thresh = max(wb_thresh, (limit - dtc->dirty) / 8);
1044
1045
1046
1047
1048 x = div_u64((u64)wb_thresh << 16, dtc->thresh | 1);
1049 wb_setpoint = setpoint * (u64)x >> 16;
1050
1051
1052
1053
1054
1055
1056
1057
1058 span = (dtc->thresh - wb_thresh + 8 * write_bw) * (u64)x >> 16;
1059 x_intercept = wb_setpoint + span;
1060
1061 if (dtc->wb_dirty < x_intercept - span / 4) {
1062 pos_ratio = div64_u64(pos_ratio * (x_intercept - dtc->wb_dirty),
1063 (x_intercept - wb_setpoint) | 1);
1064 } else
1065 pos_ratio /= 4;
1066
1067
1068
1069
1070
1071
1072 x_intercept = wb_thresh / 2;
1073 if (dtc->wb_dirty < x_intercept) {
1074 if (dtc->wb_dirty > x_intercept / 8)
1075 pos_ratio = div_u64(pos_ratio * x_intercept,
1076 dtc->wb_dirty);
1077 else
1078 pos_ratio *= 8;
1079 }
1080
1081 dtc->pos_ratio = pos_ratio;
1082}
1083
1084static void wb_update_write_bandwidth(struct bdi_writeback *wb,
1085 unsigned long elapsed,
1086 unsigned long written)
1087{
1088 const unsigned long period = roundup_pow_of_two(3 * HZ);
1089 unsigned long avg = wb->avg_write_bandwidth;
1090 unsigned long old = wb->write_bandwidth;
1091 u64 bw;
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103 bw = written - min(written, wb->written_stamp);
1104 bw *= HZ;
1105 if (unlikely(elapsed > period)) {
1106 do_div(bw, elapsed);
1107 avg = bw;
1108 goto out;
1109 }
1110 bw += (u64)wb->write_bandwidth * (period - elapsed);
1111 bw >>= ilog2(period);
1112
1113
1114
1115
1116 if (avg > old && old >= (unsigned long)bw)
1117 avg -= (avg - old) >> 3;
1118
1119 if (avg < old && old <= (unsigned long)bw)
1120 avg += (old - avg) >> 3;
1121
1122out:
1123
1124 avg = max(avg, 1LU);
1125 if (wb_has_dirty_io(wb)) {
1126 long delta = avg - wb->avg_write_bandwidth;
1127 WARN_ON_ONCE(atomic_long_add_return(delta,
1128 &wb->bdi->tot_write_bandwidth) <= 0);
1129 }
1130 wb->write_bandwidth = bw;
1131 wb->avg_write_bandwidth = avg;
1132}
1133
1134static void update_dirty_limit(struct dirty_throttle_control *dtc)
1135{
1136 struct wb_domain *dom = dtc_dom(dtc);
1137 unsigned long thresh = dtc->thresh;
1138 unsigned long limit = dom->dirty_limit;
1139
1140
1141
1142
1143 if (limit < thresh) {
1144 limit = thresh;
1145 goto update;
1146 }
1147
1148
1149
1150
1151
1152
1153 thresh = max(thresh, dtc->dirty);
1154 if (limit > thresh) {
1155 limit -= (limit - thresh) >> 5;
1156 goto update;
1157 }
1158 return;
1159update:
1160 dom->dirty_limit = limit;
1161}
1162
1163static void domain_update_bandwidth(struct dirty_throttle_control *dtc,
1164 unsigned long now)
1165{
1166 struct wb_domain *dom = dtc_dom(dtc);
1167
1168
1169
1170
1171 if (time_before(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL))
1172 return;
1173
1174 spin_lock(&dom->lock);
1175 if (time_after_eq(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL)) {
1176 update_dirty_limit(dtc);
1177 dom->dirty_limit_tstamp = now;
1178 }
1179 spin_unlock(&dom->lock);
1180}
1181
1182
1183
1184
1185
1186
1187
1188static void wb_update_dirty_ratelimit(struct dirty_throttle_control *dtc,
1189 unsigned long dirtied,
1190 unsigned long elapsed)
1191{
1192 struct bdi_writeback *wb = dtc->wb;
1193 unsigned long dirty = dtc->dirty;
1194 unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh);
1195 unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
1196 unsigned long setpoint = (freerun + limit) / 2;
1197 unsigned long write_bw = wb->avg_write_bandwidth;
1198 unsigned long dirty_ratelimit = wb->dirty_ratelimit;
1199 unsigned long dirty_rate;
1200 unsigned long task_ratelimit;
1201 unsigned long balanced_dirty_ratelimit;
1202 unsigned long step;
1203 unsigned long x;
1204 unsigned long shift;
1205
1206
1207
1208
1209
1210 dirty_rate = (dirtied - wb->dirtied_stamp) * HZ / elapsed;
1211
1212
1213
1214
1215 task_ratelimit = (u64)dirty_ratelimit *
1216 dtc->pos_ratio >> RATELIMIT_CALC_SHIFT;
1217 task_ratelimit++;
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249 balanced_dirty_ratelimit = div_u64((u64)task_ratelimit * write_bw,
1250 dirty_rate | 1);
1251
1252
1253
1254 if (unlikely(balanced_dirty_ratelimit > write_bw))
1255 balanced_dirty_ratelimit = write_bw;
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291 step = 0;
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304 if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
1305 dirty = dtc->wb_dirty;
1306 if (dtc->wb_dirty < 8)
1307 setpoint = dtc->wb_dirty + 1;
1308 else
1309 setpoint = (dtc->wb_thresh + dtc->wb_bg_thresh) / 2;
1310 }
1311
1312 if (dirty < setpoint) {
1313 x = min3(wb->balanced_dirty_ratelimit,
1314 balanced_dirty_ratelimit, task_ratelimit);
1315 if (dirty_ratelimit < x)
1316 step = x - dirty_ratelimit;
1317 } else {
1318 x = max3(wb->balanced_dirty_ratelimit,
1319 balanced_dirty_ratelimit, task_ratelimit);
1320 if (dirty_ratelimit > x)
1321 step = dirty_ratelimit - x;
1322 }
1323
1324
1325
1326
1327
1328
1329 shift = dirty_ratelimit / (2 * step + 1);
1330 if (shift < BITS_PER_LONG)
1331 step = DIV_ROUND_UP(step >> shift, 8);
1332 else
1333 step = 0;
1334
1335 if (dirty_ratelimit < balanced_dirty_ratelimit)
1336 dirty_ratelimit += step;
1337 else
1338 dirty_ratelimit -= step;
1339
1340 wb->dirty_ratelimit = max(dirty_ratelimit, 1UL);
1341 wb->balanced_dirty_ratelimit = balanced_dirty_ratelimit;
1342
1343 trace_bdi_dirty_ratelimit(wb, dirty_rate, task_ratelimit);
1344}
1345
1346static void __wb_update_bandwidth(struct dirty_throttle_control *gdtc,
1347 struct dirty_throttle_control *mdtc,
1348 unsigned long start_time,
1349 bool update_ratelimit)
1350{
1351 struct bdi_writeback *wb = gdtc->wb;
1352 unsigned long now = jiffies;
1353 unsigned long elapsed = now - wb->bw_time_stamp;
1354 unsigned long dirtied;
1355 unsigned long written;
1356
1357 lockdep_assert_held(&wb->list_lock);
1358
1359
1360
1361
1362 if (elapsed < BANDWIDTH_INTERVAL)
1363 return;
1364
1365 dirtied = percpu_counter_read(&wb->stat[WB_DIRTIED]);
1366 written = percpu_counter_read(&wb->stat[WB_WRITTEN]);
1367
1368
1369
1370
1371
1372 if (elapsed > HZ && time_before(wb->bw_time_stamp, start_time))
1373 goto snapshot;
1374
1375 if (update_ratelimit) {
1376 domain_update_bandwidth(gdtc, now);
1377 wb_update_dirty_ratelimit(gdtc, dirtied, elapsed);
1378
1379
1380
1381
1382
1383 if (IS_ENABLED(CONFIG_CGROUP_WRITEBACK) && mdtc) {
1384 domain_update_bandwidth(mdtc, now);
1385 wb_update_dirty_ratelimit(mdtc, dirtied, elapsed);
1386 }
1387 }
1388 wb_update_write_bandwidth(wb, elapsed, written);
1389
1390snapshot:
1391 wb->dirtied_stamp = dirtied;
1392 wb->written_stamp = written;
1393 wb->bw_time_stamp = now;
1394}
1395
1396void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time)
1397{
1398 struct dirty_throttle_control gdtc = { GDTC_INIT(wb) };
1399
1400 __wb_update_bandwidth(&gdtc, NULL, start_time, false);
1401}
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411static unsigned long dirty_poll_interval(unsigned long dirty,
1412 unsigned long thresh)
1413{
1414 if (thresh > dirty)
1415 return 1UL << (ilog2(thresh - dirty) >> 1);
1416
1417 return 1;
1418}
1419
1420static unsigned long wb_max_pause(struct bdi_writeback *wb,
1421 unsigned long wb_dirty)
1422{
1423 unsigned long bw = wb->avg_write_bandwidth;
1424 unsigned long t;
1425
1426
1427
1428
1429
1430
1431
1432
1433 t = wb_dirty / (1 + bw / roundup_pow_of_two(1 + HZ / 8));
1434 t++;
1435
1436 return min_t(unsigned long, t, MAX_PAUSE);
1437}
1438
1439static long wb_min_pause(struct bdi_writeback *wb,
1440 long max_pause,
1441 unsigned long task_ratelimit,
1442 unsigned long dirty_ratelimit,
1443 int *nr_dirtied_pause)
1444{
1445 long hi = ilog2(wb->avg_write_bandwidth);
1446 long lo = ilog2(wb->dirty_ratelimit);
1447 long t;
1448 long pause;
1449 int pages;
1450
1451
1452 t = max(1, HZ / 100);
1453
1454
1455
1456
1457
1458
1459
1460 if (hi > lo)
1461 t += (hi - lo) * (10 * HZ) / 1024;
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481 t = min(t, 1 + max_pause / 2);
1482 pages = dirty_ratelimit * t / roundup_pow_of_two(HZ);
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492 if (pages < DIRTY_POLL_THRESH) {
1493 t = max_pause;
1494 pages = dirty_ratelimit * t / roundup_pow_of_two(HZ);
1495 if (pages > DIRTY_POLL_THRESH) {
1496 pages = DIRTY_POLL_THRESH;
1497 t = HZ * DIRTY_POLL_THRESH / dirty_ratelimit;
1498 }
1499 }
1500
1501 pause = HZ * pages / (task_ratelimit + 1);
1502 if (pause > max_pause) {
1503 t = max_pause;
1504 pages = task_ratelimit * t / roundup_pow_of_two(HZ);
1505 }
1506
1507 *nr_dirtied_pause = pages;
1508
1509
1510
1511 return pages >= DIRTY_POLL_THRESH ? 1 + t / 2 : t;
1512}
1513
1514static inline void wb_dirty_limits(struct dirty_throttle_control *dtc)
1515{
1516 struct bdi_writeback *wb = dtc->wb;
1517 unsigned long wb_reclaimable;
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532 dtc->wb_thresh = __wb_calc_thresh(dtc);
1533 dtc->wb_bg_thresh = dtc->thresh ?
1534 div_u64((u64)dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0;
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546 if (dtc->wb_thresh < 2 * wb_stat_error(wb)) {
1547 wb_reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE);
1548 dtc->wb_dirty = wb_reclaimable + wb_stat_sum(wb, WB_WRITEBACK);
1549 } else {
1550 wb_reclaimable = wb_stat(wb, WB_RECLAIMABLE);
1551 dtc->wb_dirty = wb_reclaimable + wb_stat(wb, WB_WRITEBACK);
1552 }
1553}
1554
1555
1556
1557
1558
1559
1560
1561
1562static void balance_dirty_pages(struct address_space *mapping,
1563 struct bdi_writeback *wb,
1564 unsigned long pages_dirtied)
1565{
1566 struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) };
1567 struct dirty_throttle_control mdtc_stor = { MDTC_INIT(wb, &gdtc_stor) };
1568 struct dirty_throttle_control * const gdtc = &gdtc_stor;
1569 struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
1570 &mdtc_stor : NULL;
1571 struct dirty_throttle_control *sdtc;
1572 unsigned long nr_reclaimable;
1573 long period;
1574 long pause;
1575 long max_pause;
1576 long min_pause;
1577 int nr_dirtied_pause;
1578 bool dirty_exceeded = false;
1579 unsigned long task_ratelimit;
1580 unsigned long dirty_ratelimit;
1581 struct backing_dev_info *bdi = wb->bdi;
1582 bool strictlimit = bdi->capabilities & BDI_CAP_STRICTLIMIT;
1583 unsigned long start_time = jiffies;
1584
1585 for (;;) {
1586 unsigned long now = jiffies;
1587 unsigned long dirty, thresh, bg_thresh;
1588 unsigned long m_dirty = 0;
1589 unsigned long m_thresh = 0;
1590 unsigned long m_bg_thresh = 0;
1591
1592
1593
1594
1595
1596
1597
1598 nr_reclaimable = global_node_page_state(NR_FILE_DIRTY) +
1599 global_node_page_state(NR_UNSTABLE_NFS);
1600 gdtc->avail = global_dirtyable_memory();
1601 gdtc->dirty = nr_reclaimable + global_node_page_state(NR_WRITEBACK);
1602
1603 domain_dirty_limits(gdtc);
1604
1605 if (unlikely(strictlimit)) {
1606 wb_dirty_limits(gdtc);
1607
1608 dirty = gdtc->wb_dirty;
1609 thresh = gdtc->wb_thresh;
1610 bg_thresh = gdtc->wb_bg_thresh;
1611 } else {
1612 dirty = gdtc->dirty;
1613 thresh = gdtc->thresh;
1614 bg_thresh = gdtc->bg_thresh;
1615 }
1616
1617 if (mdtc) {
1618 unsigned long filepages, headroom, writeback;
1619
1620
1621
1622
1623
1624 mem_cgroup_wb_stats(wb, &filepages, &headroom,
1625 &mdtc->dirty, &writeback);
1626 mdtc->dirty += writeback;
1627 mdtc_calc_avail(mdtc, filepages, headroom);
1628
1629 domain_dirty_limits(mdtc);
1630
1631 if (unlikely(strictlimit)) {
1632 wb_dirty_limits(mdtc);
1633 m_dirty = mdtc->wb_dirty;
1634 m_thresh = mdtc->wb_thresh;
1635 m_bg_thresh = mdtc->wb_bg_thresh;
1636 } else {
1637 m_dirty = mdtc->dirty;
1638 m_thresh = mdtc->thresh;
1639 m_bg_thresh = mdtc->bg_thresh;
1640 }
1641 }
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655 if (dirty <= dirty_freerun_ceiling(thresh, bg_thresh) &&
1656 (!mdtc ||
1657 m_dirty <= dirty_freerun_ceiling(m_thresh, m_bg_thresh))) {
1658 unsigned long intv = dirty_poll_interval(dirty, thresh);
1659 unsigned long m_intv = ULONG_MAX;
1660
1661 current->dirty_paused_when = now;
1662 current->nr_dirtied = 0;
1663 if (mdtc)
1664 m_intv = dirty_poll_interval(m_dirty, m_thresh);
1665 current->nr_dirtied_pause = min(intv, m_intv);
1666 break;
1667 }
1668
1669 if (unlikely(!writeback_in_progress(wb)))
1670 wb_start_background_writeback(wb);
1671
1672
1673
1674
1675
1676 if (!strictlimit)
1677 wb_dirty_limits(gdtc);
1678
1679 dirty_exceeded = (gdtc->wb_dirty > gdtc->wb_thresh) &&
1680 ((gdtc->dirty > gdtc->thresh) || strictlimit);
1681
1682 wb_position_ratio(gdtc);
1683 sdtc = gdtc;
1684
1685 if (mdtc) {
1686
1687
1688
1689
1690
1691
1692 if (!strictlimit)
1693 wb_dirty_limits(mdtc);
1694
1695 dirty_exceeded |= (mdtc->wb_dirty > mdtc->wb_thresh) &&
1696 ((mdtc->dirty > mdtc->thresh) || strictlimit);
1697
1698 wb_position_ratio(mdtc);
1699 if (mdtc->pos_ratio < gdtc->pos_ratio)
1700 sdtc = mdtc;
1701 }
1702
1703 if (dirty_exceeded && !wb->dirty_exceeded)
1704 wb->dirty_exceeded = 1;
1705
1706 if (time_is_before_jiffies(wb->bw_time_stamp +
1707 BANDWIDTH_INTERVAL)) {
1708 spin_lock(&wb->list_lock);
1709 __wb_update_bandwidth(gdtc, mdtc, start_time, true);
1710 spin_unlock(&wb->list_lock);
1711 }
1712
1713
1714 dirty_ratelimit = wb->dirty_ratelimit;
1715 task_ratelimit = ((u64)dirty_ratelimit * sdtc->pos_ratio) >>
1716 RATELIMIT_CALC_SHIFT;
1717 max_pause = wb_max_pause(wb, sdtc->wb_dirty);
1718 min_pause = wb_min_pause(wb, max_pause,
1719 task_ratelimit, dirty_ratelimit,
1720 &nr_dirtied_pause);
1721
1722 if (unlikely(task_ratelimit == 0)) {
1723 period = max_pause;
1724 pause = max_pause;
1725 goto pause;
1726 }
1727 period = HZ * pages_dirtied / task_ratelimit;
1728 pause = period;
1729 if (current->dirty_paused_when)
1730 pause -= now - current->dirty_paused_when;
1731
1732
1733
1734
1735
1736
1737
1738 if (pause < min_pause) {
1739 trace_balance_dirty_pages(wb,
1740 sdtc->thresh,
1741 sdtc->bg_thresh,
1742 sdtc->dirty,
1743 sdtc->wb_thresh,
1744 sdtc->wb_dirty,
1745 dirty_ratelimit,
1746 task_ratelimit,
1747 pages_dirtied,
1748 period,
1749 min(pause, 0L),
1750 start_time);
1751 if (pause < -HZ) {
1752 current->dirty_paused_when = now;
1753 current->nr_dirtied = 0;
1754 } else if (period) {
1755 current->dirty_paused_when += period;
1756 current->nr_dirtied = 0;
1757 } else if (current->nr_dirtied_pause <= pages_dirtied)
1758 current->nr_dirtied_pause += pages_dirtied;
1759 break;
1760 }
1761 if (unlikely(pause > max_pause)) {
1762
1763 now += min(pause - max_pause, max_pause);
1764 pause = max_pause;
1765 }
1766
1767pause:
1768 trace_balance_dirty_pages(wb,
1769 sdtc->thresh,
1770 sdtc->bg_thresh,
1771 sdtc->dirty,
1772 sdtc->wb_thresh,
1773 sdtc->wb_dirty,
1774 dirty_ratelimit,
1775 task_ratelimit,
1776 pages_dirtied,
1777 period,
1778 pause,
1779 start_time);
1780 __set_current_state(TASK_KILLABLE);
1781 wb->dirty_sleep = now;
1782 io_schedule_timeout(pause);
1783
1784 current->dirty_paused_when = now + pause;
1785 current->nr_dirtied = 0;
1786 current->nr_dirtied_pause = nr_dirtied_pause;
1787
1788
1789
1790
1791
1792 if (task_ratelimit)
1793 break;
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805 if (sdtc->wb_dirty <= wb_stat_error(wb))
1806 break;
1807
1808 if (fatal_signal_pending(current))
1809 break;
1810 }
1811
1812 if (!dirty_exceeded && wb->dirty_exceeded)
1813 wb->dirty_exceeded = 0;
1814
1815 if (writeback_in_progress(wb))
1816 return;
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826 if (laptop_mode)
1827 return;
1828
1829 if (nr_reclaimable > gdtc->bg_thresh)
1830 wb_start_background_writeback(wb);
1831}
1832
1833static DEFINE_PER_CPU(int, bdp_ratelimits);
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849DEFINE_PER_CPU(int, dirty_throttle_leaks) = 0;
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864void balance_dirty_pages_ratelimited(struct address_space *mapping)
1865{
1866 struct inode *inode = mapping->host;
1867 struct backing_dev_info *bdi = inode_to_bdi(inode);
1868 struct bdi_writeback *wb = NULL;
1869 int ratelimit;
1870 int *p;
1871
1872 if (!bdi_cap_account_dirty(bdi))
1873 return;
1874
1875 if (inode_cgwb_enabled(inode))
1876 wb = wb_get_create_current(bdi, GFP_KERNEL);
1877 if (!wb)
1878 wb = &bdi->wb;
1879
1880 ratelimit = current->nr_dirtied_pause;
1881 if (wb->dirty_exceeded)
1882 ratelimit = min(ratelimit, 32 >> (PAGE_SHIFT - 10));
1883
1884 preempt_disable();
1885
1886
1887
1888
1889
1890
1891 p = this_cpu_ptr(&bdp_ratelimits);
1892 if (unlikely(current->nr_dirtied >= ratelimit))
1893 *p = 0;
1894 else if (unlikely(*p >= ratelimit_pages)) {
1895 *p = 0;
1896 ratelimit = 0;
1897 }
1898
1899
1900
1901
1902
1903 p = this_cpu_ptr(&dirty_throttle_leaks);
1904 if (*p > 0 && current->nr_dirtied < ratelimit) {
1905 unsigned long nr_pages_dirtied;
1906 nr_pages_dirtied = min(*p, ratelimit - current->nr_dirtied);
1907 *p -= nr_pages_dirtied;
1908 current->nr_dirtied += nr_pages_dirtied;
1909 }
1910 preempt_enable();
1911
1912 if (unlikely(current->nr_dirtied >= ratelimit))
1913 balance_dirty_pages(mapping, wb, current->nr_dirtied);
1914
1915 wb_put(wb);
1916}
1917EXPORT_SYMBOL(balance_dirty_pages_ratelimited);
1918
1919
1920
1921
1922
1923
1924
1925
1926bool wb_over_bg_thresh(struct bdi_writeback *wb)
1927{
1928 struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) };
1929 struct dirty_throttle_control mdtc_stor = { MDTC_INIT(wb, &gdtc_stor) };
1930 struct dirty_throttle_control * const gdtc = &gdtc_stor;
1931 struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
1932 &mdtc_stor : NULL;
1933
1934
1935
1936
1937
1938 gdtc->avail = global_dirtyable_memory();
1939 gdtc->dirty = global_node_page_state(NR_FILE_DIRTY) +
1940 global_node_page_state(NR_UNSTABLE_NFS);
1941 domain_dirty_limits(gdtc);
1942
1943 if (gdtc->dirty > gdtc->bg_thresh)
1944 return true;
1945
1946 if (wb_stat(wb, WB_RECLAIMABLE) >
1947 wb_calc_thresh(gdtc->wb, gdtc->bg_thresh))
1948 return true;
1949
1950 if (mdtc) {
1951 unsigned long filepages, headroom, writeback;
1952
1953 mem_cgroup_wb_stats(wb, &filepages, &headroom, &mdtc->dirty,
1954 &writeback);
1955 mdtc_calc_avail(mdtc, filepages, headroom);
1956 domain_dirty_limits(mdtc);
1957
1958 if (mdtc->dirty > mdtc->bg_thresh)
1959 return true;
1960
1961 if (wb_stat(wb, WB_RECLAIMABLE) >
1962 wb_calc_thresh(mdtc->wb, mdtc->bg_thresh))
1963 return true;
1964 }
1965
1966 return false;
1967}
1968
1969
1970
1971
1972int dirty_writeback_centisecs_handler(struct ctl_table *table, int write,
1973 void __user *buffer, size_t *length, loff_t *ppos)
1974{
1975 proc_dointvec(table, write, buffer, length, ppos);
1976 return 0;
1977}
1978
1979#ifdef CONFIG_BLOCK
1980void laptop_mode_timer_fn(unsigned long data)
1981{
1982 struct request_queue *q = (struct request_queue *)data;
1983 int nr_pages = global_node_page_state(NR_FILE_DIRTY) +
1984 global_node_page_state(NR_UNSTABLE_NFS);
1985 struct bdi_writeback *wb;
1986
1987
1988
1989
1990
1991 if (!bdi_has_dirty_io(q->backing_dev_info))
1992 return;
1993
1994 rcu_read_lock();
1995 list_for_each_entry_rcu(wb, &q->backing_dev_info->wb_list, bdi_node)
1996 if (wb_has_dirty_io(wb))
1997 wb_start_writeback(wb, nr_pages, true,
1998 WB_REASON_LAPTOP_TIMER);
1999 rcu_read_unlock();
2000}
2001
2002
2003
2004
2005
2006
2007void laptop_io_completion(struct backing_dev_info *info)
2008{
2009 mod_timer(&info->laptop_mode_wb_timer, jiffies + laptop_mode);
2010}
2011
2012
2013
2014
2015
2016
2017void laptop_sync_completion(void)
2018{
2019 struct backing_dev_info *bdi;
2020
2021 rcu_read_lock();
2022
2023 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list)
2024 del_timer(&bdi->laptop_mode_wb_timer);
2025
2026 rcu_read_unlock();
2027}
2028#endif
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041void writeback_set_ratelimit(void)
2042{
2043 struct wb_domain *dom = &global_wb_domain;
2044 unsigned long background_thresh;
2045 unsigned long dirty_thresh;
2046
2047 global_dirty_limits(&background_thresh, &dirty_thresh);
2048 dom->dirty_limit = dirty_thresh;
2049 ratelimit_pages = dirty_thresh / (num_online_cpus() * 32);
2050 if (ratelimit_pages < 16)
2051 ratelimit_pages = 16;
2052}
2053
2054static int page_writeback_cpu_online(unsigned int cpu)
2055{
2056 writeback_set_ratelimit();
2057 return 0;
2058}
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078void __init page_writeback_init(void)
2079{
2080 BUG_ON(wb_domain_init(&global_wb_domain, GFP_KERNEL));
2081
2082 cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mm/writeback:online",
2083 page_writeback_cpu_online, NULL);
2084 cpuhp_setup_state(CPUHP_MM_WRITEBACK_DEAD, "mm/writeback:dead", NULL,
2085 page_writeback_cpu_online);
2086}
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105void tag_pages_for_writeback(struct address_space *mapping,
2106 pgoff_t start, pgoff_t end)
2107{
2108#define WRITEBACK_TAG_BATCH 4096
2109 unsigned long tagged = 0;
2110 struct radix_tree_iter iter;
2111 void **slot;
2112
2113 spin_lock_irq(&mapping->tree_lock);
2114 radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter, start,
2115 PAGECACHE_TAG_DIRTY) {
2116 if (iter.index > end)
2117 break;
2118 radix_tree_iter_tag_set(&mapping->page_tree, &iter,
2119 PAGECACHE_TAG_TOWRITE);
2120 tagged++;
2121 if ((tagged % WRITEBACK_TAG_BATCH) != 0)
2122 continue;
2123 slot = radix_tree_iter_resume(slot, &iter);
2124 spin_unlock_irq(&mapping->tree_lock);
2125 cond_resched();
2126 spin_lock_irq(&mapping->tree_lock);
2127 }
2128 spin_unlock_irq(&mapping->tree_lock);
2129}
2130EXPORT_SYMBOL(tag_pages_for_writeback);
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154int write_cache_pages(struct address_space *mapping,
2155 struct writeback_control *wbc, writepage_t writepage,
2156 void *data)
2157{
2158 int ret = 0;
2159 int done = 0;
2160 struct pagevec pvec;
2161 int nr_pages;
2162 pgoff_t uninitialized_var(writeback_index);
2163 pgoff_t index;
2164 pgoff_t end;
2165 pgoff_t done_index;
2166 int cycled;
2167 int range_whole = 0;
2168 int tag;
2169
2170 pagevec_init(&pvec, 0);
2171 if (wbc->range_cyclic) {
2172 writeback_index = mapping->writeback_index;
2173 index = writeback_index;
2174 if (index == 0)
2175 cycled = 1;
2176 else
2177 cycled = 0;
2178 end = -1;
2179 } else {
2180 index = wbc->range_start >> PAGE_SHIFT;
2181 end = wbc->range_end >> PAGE_SHIFT;
2182 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2183 range_whole = 1;
2184 cycled = 1;
2185 }
2186 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2187 tag = PAGECACHE_TAG_TOWRITE;
2188 else
2189 tag = PAGECACHE_TAG_DIRTY;
2190retry:
2191 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2192 tag_pages_for_writeback(mapping, index, end);
2193 done_index = index;
2194 while (!done && (index <= end)) {
2195 int i;
2196
2197 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
2198 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
2199 if (nr_pages == 0)
2200 break;
2201
2202 for (i = 0; i < nr_pages; i++) {
2203 struct page *page = pvec.pages[i];
2204
2205
2206
2207
2208
2209
2210
2211
2212 if (page->index > end) {
2213
2214
2215
2216
2217 done = 1;
2218 break;
2219 }
2220
2221 done_index = page->index;
2222
2223 lock_page(page);
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233 if (unlikely(page->mapping != mapping)) {
2234continue_unlock:
2235 unlock_page(page);
2236 continue;
2237 }
2238
2239 if (!PageDirty(page)) {
2240
2241 goto continue_unlock;
2242 }
2243
2244 if (PageWriteback(page)) {
2245 if (wbc->sync_mode != WB_SYNC_NONE)
2246 wait_on_page_writeback(page);
2247 else
2248 goto continue_unlock;
2249 }
2250
2251 BUG_ON(PageWriteback(page));
2252 if (!clear_page_dirty_for_io(page))
2253 goto continue_unlock;
2254
2255 trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
2256 ret = (*writepage)(page, wbc, data);
2257 if (unlikely(ret)) {
2258 if (ret == AOP_WRITEPAGE_ACTIVATE) {
2259 unlock_page(page);
2260 ret = 0;
2261 } else {
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271 done_index = page->index + 1;
2272 done = 1;
2273 break;
2274 }
2275 }
2276
2277
2278
2279
2280
2281
2282
2283 if (--wbc->nr_to_write <= 0 &&
2284 wbc->sync_mode == WB_SYNC_NONE) {
2285 done = 1;
2286 break;
2287 }
2288 }
2289 pagevec_release(&pvec);
2290 cond_resched();
2291 }
2292 if (!cycled && !done) {
2293
2294
2295
2296
2297
2298 cycled = 1;
2299 index = 0;
2300 end = writeback_index - 1;
2301 goto retry;
2302 }
2303 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2304 mapping->writeback_index = done_index;
2305
2306 return ret;
2307}
2308EXPORT_SYMBOL(write_cache_pages);
2309
2310
2311
2312
2313
2314static int __writepage(struct page *page, struct writeback_control *wbc,
2315 void *data)
2316{
2317 struct address_space *mapping = data;
2318 int ret = mapping->a_ops->writepage(page, wbc);
2319 mapping_set_error(mapping, ret);
2320 return ret;
2321}
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331int generic_writepages(struct address_space *mapping,
2332 struct writeback_control *wbc)
2333{
2334 struct blk_plug plug;
2335 int ret;
2336
2337
2338 if (!mapping->a_ops->writepage)
2339 return 0;
2340
2341 blk_start_plug(&plug);
2342 ret = write_cache_pages(mapping, wbc, __writepage, mapping);
2343 blk_finish_plug(&plug);
2344 return ret;
2345}
2346
2347EXPORT_SYMBOL(generic_writepages);
2348
2349int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
2350{
2351 int ret;
2352
2353 if (wbc->nr_to_write <= 0)
2354 return 0;
2355 while (1) {
2356 if (mapping->a_ops->writepages)
2357 ret = mapping->a_ops->writepages(mapping, wbc);
2358 else
2359 ret = generic_writepages(mapping, wbc);
2360 if ((ret != -ENOMEM) || (wbc->sync_mode != WB_SYNC_ALL))
2361 break;
2362 cond_resched();
2363 congestion_wait(BLK_RW_ASYNC, HZ/50);
2364 }
2365 return ret;
2366}
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377int write_one_page(struct page *page)
2378{
2379 struct address_space *mapping = page->mapping;
2380 int ret = 0;
2381 struct writeback_control wbc = {
2382 .sync_mode = WB_SYNC_ALL,
2383 .nr_to_write = 1,
2384 };
2385
2386 BUG_ON(!PageLocked(page));
2387
2388 wait_on_page_writeback(page);
2389
2390 if (clear_page_dirty_for_io(page)) {
2391 get_page(page);
2392 ret = mapping->a_ops->writepage(page, &wbc);
2393 if (ret == 0)
2394 wait_on_page_writeback(page);
2395 put_page(page);
2396 } else {
2397 unlock_page(page);
2398 }
2399
2400 if (!ret)
2401 ret = filemap_check_errors(mapping);
2402 return ret;
2403}
2404EXPORT_SYMBOL(write_one_page);
2405
2406
2407
2408
2409int __set_page_dirty_no_writeback(struct page *page)
2410{
2411 if (!PageDirty(page))
2412 return !TestSetPageDirty(page);
2413 return 0;
2414}
2415
2416
2417
2418
2419
2420
2421
2422
2423void account_page_dirtied(struct page *page, struct address_space *mapping)
2424{
2425 struct inode *inode = mapping->host;
2426
2427 trace_writeback_dirty_page(page, mapping);
2428
2429 if (mapping_cap_account_dirty(mapping)) {
2430 struct bdi_writeback *wb;
2431
2432 inode_attach_wb(inode, page);
2433 wb = inode_to_wb(inode);
2434
2435 __inc_lruvec_page_state(page, NR_FILE_DIRTY);
2436 __inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2437 __inc_node_page_state(page, NR_DIRTIED);
2438 inc_wb_stat(wb, WB_RECLAIMABLE);
2439 inc_wb_stat(wb, WB_DIRTIED);
2440 task_io_account_write(PAGE_SIZE);
2441 current->nr_dirtied++;
2442 this_cpu_inc(bdp_ratelimits);
2443 }
2444}
2445EXPORT_SYMBOL(account_page_dirtied);
2446
2447
2448
2449
2450
2451
2452void account_page_cleaned(struct page *page, struct address_space *mapping,
2453 struct bdi_writeback *wb)
2454{
2455 if (mapping_cap_account_dirty(mapping)) {
2456 dec_lruvec_page_state(page, NR_FILE_DIRTY);
2457 dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2458 dec_wb_stat(wb, WB_RECLAIMABLE);
2459 task_io_account_cancelled_write(PAGE_SIZE);
2460 }
2461}
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475int __set_page_dirty_nobuffers(struct page *page)
2476{
2477 lock_page_memcg(page);
2478 if (!TestSetPageDirty(page)) {
2479 struct address_space *mapping = page_mapping(page);
2480 unsigned long flags;
2481
2482 if (!mapping) {
2483 unlock_page_memcg(page);
2484 return 1;
2485 }
2486
2487 spin_lock_irqsave(&mapping->tree_lock, flags);
2488 BUG_ON(page_mapping(page) != mapping);
2489 WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
2490 account_page_dirtied(page, mapping);
2491 radix_tree_tag_set(&mapping->page_tree, page_index(page),
2492 PAGECACHE_TAG_DIRTY);
2493 spin_unlock_irqrestore(&mapping->tree_lock, flags);
2494 unlock_page_memcg(page);
2495
2496 if (mapping->host) {
2497
2498 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
2499 }
2500 return 1;
2501 }
2502 unlock_page_memcg(page);
2503 return 0;
2504}
2505EXPORT_SYMBOL(__set_page_dirty_nobuffers);
2506
2507
2508
2509
2510
2511
2512
2513
2514void account_page_redirty(struct page *page)
2515{
2516 struct address_space *mapping = page->mapping;
2517
2518 if (mapping && mapping_cap_account_dirty(mapping)) {
2519 struct inode *inode = mapping->host;
2520 struct bdi_writeback *wb;
2521 bool locked;
2522
2523 wb = unlocked_inode_to_wb_begin(inode, &locked);
2524 current->nr_dirtied--;
2525 dec_node_page_state(page, NR_DIRTIED);
2526 dec_wb_stat(wb, WB_DIRTIED);
2527 unlocked_inode_to_wb_end(inode, locked);
2528 }
2529}
2530EXPORT_SYMBOL(account_page_redirty);
2531
2532
2533
2534
2535
2536
2537int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page)
2538{
2539 int ret;
2540
2541 wbc->pages_skipped++;
2542 ret = __set_page_dirty_nobuffers(page);
2543 account_page_redirty(page);
2544 return ret;
2545}
2546EXPORT_SYMBOL(redirty_page_for_writepage);
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559int set_page_dirty(struct page *page)
2560{
2561 struct address_space *mapping = page_mapping(page);
2562
2563 page = compound_head(page);
2564 if (likely(mapping)) {
2565 int (*spd)(struct page *) = mapping->a_ops->set_page_dirty;
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576 if (PageReclaim(page))
2577 ClearPageReclaim(page);
2578#ifdef CONFIG_BLOCK
2579 if (!spd)
2580 spd = __set_page_dirty_buffers;
2581#endif
2582 return (*spd)(page);
2583 }
2584 if (!PageDirty(page)) {
2585 if (!TestSetPageDirty(page))
2586 return 1;
2587 }
2588 return 0;
2589}
2590EXPORT_SYMBOL(set_page_dirty);
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602int set_page_dirty_lock(struct page *page)
2603{
2604 int ret;
2605
2606 lock_page(page);
2607 ret = set_page_dirty(page);
2608 unlock_page(page);
2609 return ret;
2610}
2611EXPORT_SYMBOL(set_page_dirty_lock);
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626void cancel_dirty_page(struct page *page)
2627{
2628 struct address_space *mapping = page_mapping(page);
2629
2630 if (mapping_cap_account_dirty(mapping)) {
2631 struct inode *inode = mapping->host;
2632 struct bdi_writeback *wb;
2633 bool locked;
2634
2635 lock_page_memcg(page);
2636 wb = unlocked_inode_to_wb_begin(inode, &locked);
2637
2638 if (TestClearPageDirty(page))
2639 account_page_cleaned(page, mapping, wb);
2640
2641 unlocked_inode_to_wb_end(inode, locked);
2642 unlock_page_memcg(page);
2643 } else {
2644 ClearPageDirty(page);
2645 }
2646}
2647EXPORT_SYMBOL(cancel_dirty_page);
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663int clear_page_dirty_for_io(struct page *page)
2664{
2665 struct address_space *mapping = page_mapping(page);
2666 int ret = 0;
2667
2668 BUG_ON(!PageLocked(page));
2669
2670 if (mapping && mapping_cap_account_dirty(mapping)) {
2671 struct inode *inode = mapping->host;
2672 struct bdi_writeback *wb;
2673 bool locked;
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700 if (page_mkclean(page))
2701 set_page_dirty(page);
2702
2703
2704
2705
2706
2707
2708
2709
2710 wb = unlocked_inode_to_wb_begin(inode, &locked);
2711 if (TestClearPageDirty(page)) {
2712 dec_lruvec_page_state(page, NR_FILE_DIRTY);
2713 dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2714 dec_wb_stat(wb, WB_RECLAIMABLE);
2715 ret = 1;
2716 }
2717 unlocked_inode_to_wb_end(inode, locked);
2718 return ret;
2719 }
2720 return TestClearPageDirty(page);
2721}
2722EXPORT_SYMBOL(clear_page_dirty_for_io);
2723
2724int test_clear_page_writeback(struct page *page)
2725{
2726 struct address_space *mapping = page_mapping(page);
2727 struct mem_cgroup *memcg;
2728 struct lruvec *lruvec;
2729 int ret;
2730
2731 memcg = lock_page_memcg(page);
2732 lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
2733 if (mapping && mapping_use_writeback_tags(mapping)) {
2734 struct inode *inode = mapping->host;
2735 struct backing_dev_info *bdi = inode_to_bdi(inode);
2736 unsigned long flags;
2737
2738 spin_lock_irqsave(&mapping->tree_lock, flags);
2739 ret = TestClearPageWriteback(page);
2740 if (ret) {
2741 radix_tree_tag_clear(&mapping->page_tree,
2742 page_index(page),
2743 PAGECACHE_TAG_WRITEBACK);
2744 if (bdi_cap_account_writeback(bdi)) {
2745 struct bdi_writeback *wb = inode_to_wb(inode);
2746
2747 dec_wb_stat(wb, WB_WRITEBACK);
2748 __wb_writeout_inc(wb);
2749 }
2750 }
2751
2752 if (mapping->host && !mapping_tagged(mapping,
2753 PAGECACHE_TAG_WRITEBACK))
2754 sb_clear_inode_writeback(mapping->host);
2755
2756 spin_unlock_irqrestore(&mapping->tree_lock, flags);
2757 } else {
2758 ret = TestClearPageWriteback(page);
2759 }
2760
2761
2762
2763
2764
2765
2766 if (ret) {
2767 dec_lruvec_state(lruvec, NR_WRITEBACK);
2768 dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2769 inc_node_page_state(page, NR_WRITTEN);
2770 }
2771 __unlock_page_memcg(memcg);
2772 return ret;
2773}
2774
2775int __test_set_page_writeback(struct page *page, bool keep_write)
2776{
2777 struct address_space *mapping = page_mapping(page);
2778 int ret;
2779
2780 lock_page_memcg(page);
2781 if (mapping && mapping_use_writeback_tags(mapping)) {
2782 struct inode *inode = mapping->host;
2783 struct backing_dev_info *bdi = inode_to_bdi(inode);
2784 unsigned long flags;
2785
2786 spin_lock_irqsave(&mapping->tree_lock, flags);
2787 ret = TestSetPageWriteback(page);
2788 if (!ret) {
2789 bool on_wblist;
2790
2791 on_wblist = mapping_tagged(mapping,
2792 PAGECACHE_TAG_WRITEBACK);
2793
2794 radix_tree_tag_set(&mapping->page_tree,
2795 page_index(page),
2796 PAGECACHE_TAG_WRITEBACK);
2797 if (bdi_cap_account_writeback(bdi))
2798 inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
2799
2800
2801
2802
2803
2804
2805 if (mapping->host && !on_wblist)
2806 sb_mark_inode_writeback(mapping->host);
2807 }
2808 if (!PageDirty(page))
2809 radix_tree_tag_clear(&mapping->page_tree,
2810 page_index(page),
2811 PAGECACHE_TAG_DIRTY);
2812 if (!keep_write)
2813 radix_tree_tag_clear(&mapping->page_tree,
2814 page_index(page),
2815 PAGECACHE_TAG_TOWRITE);
2816 spin_unlock_irqrestore(&mapping->tree_lock, flags);
2817 } else {
2818 ret = TestSetPageWriteback(page);
2819 }
2820 if (!ret) {
2821 inc_lruvec_page_state(page, NR_WRITEBACK);
2822 inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2823 }
2824 unlock_page_memcg(page);
2825 return ret;
2826
2827}
2828EXPORT_SYMBOL(__test_set_page_writeback);
2829
2830
2831
2832
2833
2834int mapping_tagged(struct address_space *mapping, int tag)
2835{
2836 return radix_tree_tagged(&mapping->page_tree, tag);
2837}
2838EXPORT_SYMBOL(mapping_tagged);
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848void wait_for_stable_page(struct page *page)
2849{
2850 if (bdi_cap_stable_pages_required(inode_to_bdi(page->mapping->host)))
2851 wait_on_page_writeback(page);
2852}
2853EXPORT_SYMBOL_GPL(wait_for_stable_page);
2854