1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <linux/kernel.h>
15#include <linux/export.h>
16#include <linux/spinlock.h>
17#include <linux/fs.h>
18#include <linux/mm.h>
19#include <linux/swap.h>
20#include <linux/slab.h>
21#include <linux/pagemap.h>
22#include <linux/writeback.h>
23#include <linux/init.h>
24#include <linux/backing-dev.h>
25#include <linux/task_io_accounting_ops.h>
26#include <linux/blkdev.h>
27#include <linux/mpage.h>
28#include <linux/rmap.h>
29#include <linux/percpu.h>
30#include <linux/notifier.h>
31#include <linux/smp.h>
32#include <linux/sysctl.h>
33#include <linux/cpu.h>
34#include <linux/syscalls.h>
35#include <linux/buffer_head.h>
36#include <linux/pagevec.h>
37#include <linux/timer.h>
38#include <linux/sched/rt.h>
39#include <linux/mm_inline.h>
40#include <trace/events/writeback.h>
41
42#include "internal.h"
43
44
45
46
47#define MAX_PAUSE max(HZ/5, 1)
48
49
50
51
52
53#define DIRTY_POLL_THRESH (128 >> (PAGE_SHIFT - 10))
54
55
56
57
58#define BANDWIDTH_INTERVAL max(HZ/5, 1)
59
60#define RATELIMIT_CALC_SHIFT 10
61
62
63
64
65
66static long ratelimit_pages = 32;
67
68
69
70
71
72
73int dirty_background_ratio = 10;
74
75
76
77
78
79unsigned long dirty_background_bytes;
80
81
82
83
84
85int vm_highmem_is_dirtyable;
86
87
88
89
90int vm_dirty_ratio = 20;
91
92
93
94
95
96unsigned long vm_dirty_bytes;
97
98
99
100
101unsigned int dirty_writeback_interval = 5 * 100;
102
103EXPORT_SYMBOL_GPL(dirty_writeback_interval);
104
105
106
107
108unsigned int dirty_expire_interval = 30 * 100;
109
110
111
112
113int block_dump;
114
115
116
117
118
119int laptop_mode;
120
121EXPORT_SYMBOL(laptop_mode);
122
123
124
125struct wb_domain global_wb_domain;
126
127
128struct dirty_throttle_control {
129#ifdef CONFIG_CGROUP_WRITEBACK
130 struct wb_domain *dom;
131 struct dirty_throttle_control *gdtc;
132#endif
133 struct bdi_writeback *wb;
134 struct fprop_local_percpu *wb_completions;
135
136 unsigned long avail;
137 unsigned long dirty;
138 unsigned long thresh;
139 unsigned long bg_thresh;
140
141 unsigned long wb_dirty;
142 unsigned long wb_thresh;
143 unsigned long wb_bg_thresh;
144
145 unsigned long pos_ratio;
146};
147
148
149
150
151
152
153#define VM_COMPLETIONS_PERIOD_LEN (3*HZ)
154
155#ifdef CONFIG_CGROUP_WRITEBACK
156
157#define GDTC_INIT(__wb) .wb = (__wb), \
158 .dom = &global_wb_domain, \
159 .wb_completions = &(__wb)->completions
160
161#define GDTC_INIT_NO_WB .dom = &global_wb_domain
162
163#define MDTC_INIT(__wb, __gdtc) .wb = (__wb), \
164 .dom = mem_cgroup_wb_domain(__wb), \
165 .wb_completions = &(__wb)->memcg_completions, \
166 .gdtc = __gdtc
167
168static bool mdtc_valid(struct dirty_throttle_control *dtc)
169{
170 return dtc->dom;
171}
172
173static struct wb_domain *dtc_dom(struct dirty_throttle_control *dtc)
174{
175 return dtc->dom;
176}
177
178static struct dirty_throttle_control *mdtc_gdtc(struct dirty_throttle_control *mdtc)
179{
180 return mdtc->gdtc;
181}
182
183static struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb)
184{
185 return &wb->memcg_completions;
186}
187
188static void wb_min_max_ratio(struct bdi_writeback *wb,
189 unsigned long *minp, unsigned long *maxp)
190{
191 unsigned long this_bw = wb->avg_write_bandwidth;
192 unsigned long tot_bw = atomic_long_read(&wb->bdi->tot_write_bandwidth);
193 unsigned long long min = wb->bdi->min_ratio;
194 unsigned long long max = wb->bdi->max_ratio;
195
196
197
198
199
200 if (this_bw < tot_bw) {
201 if (min) {
202 min *= this_bw;
203 do_div(min, tot_bw);
204 }
205 if (max < 100) {
206 max *= this_bw;
207 do_div(max, tot_bw);
208 }
209 }
210
211 *minp = min;
212 *maxp = max;
213}
214
215#else
216
217#define GDTC_INIT(__wb) .wb = (__wb), \
218 .wb_completions = &(__wb)->completions
219#define GDTC_INIT_NO_WB
220#define MDTC_INIT(__wb, __gdtc)
221
222static bool mdtc_valid(struct dirty_throttle_control *dtc)
223{
224 return false;
225}
226
227static struct wb_domain *dtc_dom(struct dirty_throttle_control *dtc)
228{
229 return &global_wb_domain;
230}
231
232static struct dirty_throttle_control *mdtc_gdtc(struct dirty_throttle_control *mdtc)
233{
234 return NULL;
235}
236
237static struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb)
238{
239 return NULL;
240}
241
242static void wb_min_max_ratio(struct bdi_writeback *wb,
243 unsigned long *minp, unsigned long *maxp)
244{
245 *minp = wb->bdi->min_ratio;
246 *maxp = wb->bdi->max_ratio;
247}
248
249#endif
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276static unsigned long node_dirtyable_memory(struct pglist_data *pgdat)
277{
278 unsigned long nr_pages = 0;
279 int z;
280
281 for (z = 0; z < MAX_NR_ZONES; z++) {
282 struct zone *zone = pgdat->node_zones + z;
283
284 if (!populated_zone(zone))
285 continue;
286
287 nr_pages += zone_page_state(zone, NR_FREE_PAGES);
288 }
289
290
291
292
293
294
295 nr_pages -= min(nr_pages, pgdat->totalreserve_pages);
296
297 nr_pages += node_page_state(pgdat, NR_INACTIVE_FILE);
298 nr_pages += node_page_state(pgdat, NR_ACTIVE_FILE);
299
300 return nr_pages;
301}
302
303static unsigned long highmem_dirtyable_memory(unsigned long total)
304{
305#ifdef CONFIG_HIGHMEM
306 int node;
307 unsigned long x = 0;
308 int i;
309
310 for_each_node_state(node, N_HIGH_MEMORY) {
311 for (i = ZONE_NORMAL + 1; i < MAX_NR_ZONES; i++) {
312 struct zone *z;
313 unsigned long nr_pages;
314
315 if (!is_highmem_idx(i))
316 continue;
317
318 z = &NODE_DATA(node)->node_zones[i];
319 if (!populated_zone(z))
320 continue;
321
322 nr_pages = zone_page_state(z, NR_FREE_PAGES);
323
324 nr_pages -= min(nr_pages, high_wmark_pages(z));
325 nr_pages += zone_page_state(z, NR_ZONE_INACTIVE_FILE);
326 nr_pages += zone_page_state(z, NR_ZONE_ACTIVE_FILE);
327 x += nr_pages;
328 }
329 }
330
331
332
333
334
335
336
337
338
339
340 if ((long)x < 0)
341 x = 0;
342
343
344
345
346
347
348
349 return min(x, total);
350#else
351 return 0;
352#endif
353}
354
355
356
357
358
359
360
361static unsigned long global_dirtyable_memory(void)
362{
363 unsigned long x;
364
365 x = global_page_state(NR_FREE_PAGES);
366
367
368
369
370
371 x -= min(x, totalreserve_pages);
372
373 x += global_node_page_state(NR_INACTIVE_FILE);
374 x += global_node_page_state(NR_ACTIVE_FILE);
375
376 if (!vm_highmem_is_dirtyable)
377 x -= highmem_dirtyable_memory(x);
378
379 return x + 1;
380}
381
382
383
384
385
386
387
388
389
390
391
392static void domain_dirty_limits(struct dirty_throttle_control *dtc)
393{
394 const unsigned long available_memory = dtc->avail;
395 struct dirty_throttle_control *gdtc = mdtc_gdtc(dtc);
396 unsigned long bytes = vm_dirty_bytes;
397 unsigned long bg_bytes = dirty_background_bytes;
398
399 unsigned long ratio = (vm_dirty_ratio * PAGE_SIZE) / 100;
400 unsigned long bg_ratio = (dirty_background_ratio * PAGE_SIZE) / 100;
401 unsigned long thresh;
402 unsigned long bg_thresh;
403 struct task_struct *tsk;
404
405
406 if (gdtc) {
407 unsigned long global_avail = gdtc->avail;
408
409
410
411
412
413
414
415
416 if (bytes)
417 ratio = min(DIV_ROUND_UP(bytes, global_avail),
418 PAGE_SIZE);
419 if (bg_bytes)
420 bg_ratio = min(DIV_ROUND_UP(bg_bytes, global_avail),
421 PAGE_SIZE);
422 bytes = bg_bytes = 0;
423 }
424
425 if (bytes)
426 thresh = DIV_ROUND_UP(bytes, PAGE_SIZE);
427 else
428 thresh = (ratio * available_memory) / PAGE_SIZE;
429
430 if (bg_bytes)
431 bg_thresh = DIV_ROUND_UP(bg_bytes, PAGE_SIZE);
432 else
433 bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE;
434
435 if (bg_thresh >= thresh)
436 bg_thresh = thresh / 2;
437 tsk = current;
438 if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) {
439 bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32;
440 thresh += thresh / 4 + global_wb_domain.dirty_limit / 32;
441 }
442 dtc->thresh = thresh;
443 dtc->bg_thresh = bg_thresh;
444
445
446 if (!gdtc)
447 trace_global_dirty_state(bg_thresh, thresh);
448}
449
450
451
452
453
454
455
456
457
458void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
459{
460 struct dirty_throttle_control gdtc = { GDTC_INIT_NO_WB };
461
462 gdtc.avail = global_dirtyable_memory();
463 domain_dirty_limits(&gdtc);
464
465 *pbackground = gdtc.bg_thresh;
466 *pdirty = gdtc.thresh;
467}
468
469
470
471
472
473
474
475
476static unsigned long node_dirty_limit(struct pglist_data *pgdat)
477{
478 unsigned long node_memory = node_dirtyable_memory(pgdat);
479 struct task_struct *tsk = current;
480 unsigned long dirty;
481
482 if (vm_dirty_bytes)
483 dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE) *
484 node_memory / global_dirtyable_memory();
485 else
486 dirty = vm_dirty_ratio * node_memory / 100;
487
488 if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk))
489 dirty += dirty / 4;
490
491 return dirty;
492}
493
494
495
496
497
498
499
500
501bool node_dirty_ok(struct pglist_data *pgdat)
502{
503 unsigned long limit = node_dirty_limit(pgdat);
504 unsigned long nr_pages = 0;
505
506 nr_pages += node_page_state(pgdat, NR_FILE_DIRTY);
507 nr_pages += node_page_state(pgdat, NR_UNSTABLE_NFS);
508 nr_pages += node_page_state(pgdat, NR_WRITEBACK);
509
510 return nr_pages <= limit;
511}
512
513int dirty_background_ratio_handler(struct ctl_table *table, int write,
514 void __user *buffer, size_t *lenp,
515 loff_t *ppos)
516{
517 int ret;
518
519 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
520 if (ret == 0 && write)
521 dirty_background_bytes = 0;
522 return ret;
523}
524
525int dirty_background_bytes_handler(struct ctl_table *table, int write,
526 void __user *buffer, size_t *lenp,
527 loff_t *ppos)
528{
529 int ret;
530
531 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
532 if (ret == 0 && write)
533 dirty_background_ratio = 0;
534 return ret;
535}
536
537int dirty_ratio_handler(struct ctl_table *table, int write,
538 void __user *buffer, size_t *lenp,
539 loff_t *ppos)
540{
541 int old_ratio = vm_dirty_ratio;
542 int ret;
543
544 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
545 if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
546 writeback_set_ratelimit();
547 vm_dirty_bytes = 0;
548 }
549 return ret;
550}
551
552int dirty_bytes_handler(struct ctl_table *table, int write,
553 void __user *buffer, size_t *lenp,
554 loff_t *ppos)
555{
556 unsigned long old_bytes = vm_dirty_bytes;
557 int ret;
558
559 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
560 if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
561 writeback_set_ratelimit();
562 vm_dirty_ratio = 0;
563 }
564 return ret;
565}
566
567static unsigned long wp_next_time(unsigned long cur_time)
568{
569 cur_time += VM_COMPLETIONS_PERIOD_LEN;
570
571 if (!cur_time)
572 return 1;
573 return cur_time;
574}
575
576static void wb_domain_writeout_inc(struct wb_domain *dom,
577 struct fprop_local_percpu *completions,
578 unsigned int max_prop_frac)
579{
580 __fprop_inc_percpu_max(&dom->completions, completions,
581 max_prop_frac);
582
583 if (!unlikely(dom->period_time)) {
584
585
586
587
588
589
590 dom->period_time = wp_next_time(jiffies);
591 mod_timer(&dom->period_timer, dom->period_time);
592 }
593}
594
595
596
597
598
599static inline void __wb_writeout_inc(struct bdi_writeback *wb)
600{
601 struct wb_domain *cgdom;
602
603 __inc_wb_stat(wb, WB_WRITTEN);
604 wb_domain_writeout_inc(&global_wb_domain, &wb->completions,
605 wb->bdi->max_prop_frac);
606
607 cgdom = mem_cgroup_wb_domain(wb);
608 if (cgdom)
609 wb_domain_writeout_inc(cgdom, wb_memcg_completions(wb),
610 wb->bdi->max_prop_frac);
611}
612
613void wb_writeout_inc(struct bdi_writeback *wb)
614{
615 unsigned long flags;
616
617 local_irq_save(flags);
618 __wb_writeout_inc(wb);
619 local_irq_restore(flags);
620}
621EXPORT_SYMBOL_GPL(wb_writeout_inc);
622
623
624
625
626
627static void writeout_period(unsigned long t)
628{
629 struct wb_domain *dom = (void *)t;
630 int miss_periods = (jiffies - dom->period_time) /
631 VM_COMPLETIONS_PERIOD_LEN;
632
633 if (fprop_new_period(&dom->completions, miss_periods + 1)) {
634 dom->period_time = wp_next_time(dom->period_time +
635 miss_periods * VM_COMPLETIONS_PERIOD_LEN);
636 mod_timer(&dom->period_timer, dom->period_time);
637 } else {
638
639
640
641
642 dom->period_time = 0;
643 }
644}
645
646int wb_domain_init(struct wb_domain *dom, gfp_t gfp)
647{
648 memset(dom, 0, sizeof(*dom));
649
650 spin_lock_init(&dom->lock);
651
652 init_timer_deferrable(&dom->period_timer);
653 dom->period_timer.function = writeout_period;
654 dom->period_timer.data = (unsigned long)dom;
655
656 dom->dirty_limit_tstamp = jiffies;
657
658 return fprop_global_init(&dom->completions, gfp);
659}
660
661#ifdef CONFIG_CGROUP_WRITEBACK
662void wb_domain_exit(struct wb_domain *dom)
663{
664 del_timer_sync(&dom->period_timer);
665 fprop_global_destroy(&dom->completions);
666}
667#endif
668
669
670
671
672
673
674static unsigned int bdi_min_ratio;
675
676int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
677{
678 int ret = 0;
679
680 spin_lock_bh(&bdi_lock);
681 if (min_ratio > bdi->max_ratio) {
682 ret = -EINVAL;
683 } else {
684 min_ratio -= bdi->min_ratio;
685 if (bdi_min_ratio + min_ratio < 100) {
686 bdi_min_ratio += min_ratio;
687 bdi->min_ratio += min_ratio;
688 } else {
689 ret = -EINVAL;
690 }
691 }
692 spin_unlock_bh(&bdi_lock);
693
694 return ret;
695}
696
697int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
698{
699 int ret = 0;
700
701 if (max_ratio > 100)
702 return -EINVAL;
703
704 spin_lock_bh(&bdi_lock);
705 if (bdi->min_ratio > max_ratio) {
706 ret = -EINVAL;
707 } else {
708 bdi->max_ratio = max_ratio;
709 bdi->max_prop_frac = (FPROP_FRAC_BASE * max_ratio) / 100;
710 }
711 spin_unlock_bh(&bdi_lock);
712
713 return ret;
714}
715EXPORT_SYMBOL(bdi_set_max_ratio);
716
717static unsigned long dirty_freerun_ceiling(unsigned long thresh,
718 unsigned long bg_thresh)
719{
720 return (thresh + bg_thresh) / 2;
721}
722
723static unsigned long hard_dirty_limit(struct wb_domain *dom,
724 unsigned long thresh)
725{
726 return max(thresh, dom->dirty_limit);
727}
728
729
730
731
732
733static void mdtc_calc_avail(struct dirty_throttle_control *mdtc,
734 unsigned long filepages, unsigned long headroom)
735{
736 struct dirty_throttle_control *gdtc = mdtc_gdtc(mdtc);
737 unsigned long clean = filepages - min(filepages, mdtc->dirty);
738 unsigned long global_clean = gdtc->avail - min(gdtc->avail, gdtc->dirty);
739 unsigned long other_clean = global_clean - min(global_clean, clean);
740
741 mdtc->avail = filepages + min(headroom, other_clean);
742}
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc)
766{
767 struct wb_domain *dom = dtc_dom(dtc);
768 unsigned long thresh = dtc->thresh;
769 u64 wb_thresh;
770 long numerator, denominator;
771 unsigned long wb_min_ratio, wb_max_ratio;
772
773
774
775
776 fprop_fraction_percpu(&dom->completions, dtc->wb_completions,
777 &numerator, &denominator);
778
779 wb_thresh = (thresh * (100 - bdi_min_ratio)) / 100;
780 wb_thresh *= numerator;
781 do_div(wb_thresh, denominator);
782
783 wb_min_max_ratio(dtc->wb, &wb_min_ratio, &wb_max_ratio);
784
785 wb_thresh += (thresh * wb_min_ratio) / 100;
786 if (wb_thresh > (thresh * wb_max_ratio) / 100)
787 wb_thresh = thresh * wb_max_ratio / 100;
788
789 return wb_thresh;
790}
791
792unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh)
793{
794 struct dirty_throttle_control gdtc = { GDTC_INIT(wb),
795 .thresh = thresh };
796 return __wb_calc_thresh(&gdtc);
797}
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813static long long pos_ratio_polynom(unsigned long setpoint,
814 unsigned long dirty,
815 unsigned long limit)
816{
817 long long pos_ratio;
818 long x;
819
820 x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
821 (limit - setpoint) | 1);
822 pos_ratio = x;
823 pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
824 pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
825 pos_ratio += 1 << RATELIMIT_CALC_SHIFT;
826
827 return clamp(pos_ratio, 0LL, 2LL << RATELIMIT_CALC_SHIFT);
828}
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905static void wb_position_ratio(struct dirty_throttle_control *dtc)
906{
907 struct bdi_writeback *wb = dtc->wb;
908 unsigned long write_bw = wb->avg_write_bandwidth;
909 unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh);
910 unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
911 unsigned long wb_thresh = dtc->wb_thresh;
912 unsigned long x_intercept;
913 unsigned long setpoint;
914 unsigned long wb_setpoint;
915 unsigned long span;
916 long long pos_ratio;
917 long x;
918
919 dtc->pos_ratio = 0;
920
921 if (unlikely(dtc->dirty >= limit))
922 return;
923
924
925
926
927
928
929 setpoint = (freerun + limit) / 2;
930 pos_ratio = pos_ratio_polynom(setpoint, dtc->dirty, limit);
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957 if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
958 long long wb_pos_ratio;
959
960 if (dtc->wb_dirty < 8) {
961 dtc->pos_ratio = min_t(long long, pos_ratio * 2,
962 2 << RATELIMIT_CALC_SHIFT);
963 return;
964 }
965
966 if (dtc->wb_dirty >= wb_thresh)
967 return;
968
969 wb_setpoint = dirty_freerun_ceiling(wb_thresh,
970 dtc->wb_bg_thresh);
971
972 if (wb_setpoint == 0 || wb_setpoint == wb_thresh)
973 return;
974
975 wb_pos_ratio = pos_ratio_polynom(wb_setpoint, dtc->wb_dirty,
976 wb_thresh);
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999 dtc->pos_ratio = min(pos_ratio, wb_pos_ratio);
1000 return;
1001 }
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034 if (unlikely(wb_thresh > dtc->thresh))
1035 wb_thresh = dtc->thresh;
1036
1037
1038
1039
1040
1041
1042
1043 wb_thresh = max(wb_thresh, (limit - dtc->dirty) / 8);
1044
1045
1046
1047
1048 x = div_u64((u64)wb_thresh << 16, dtc->thresh | 1);
1049 wb_setpoint = setpoint * (u64)x >> 16;
1050
1051
1052
1053
1054
1055
1056
1057
1058 span = (dtc->thresh - wb_thresh + 8 * write_bw) * (u64)x >> 16;
1059 x_intercept = wb_setpoint + span;
1060
1061 if (dtc->wb_dirty < x_intercept - span / 4) {
1062 pos_ratio = div64_u64(pos_ratio * (x_intercept - dtc->wb_dirty),
1063 (x_intercept - wb_setpoint) | 1);
1064 } else
1065 pos_ratio /= 4;
1066
1067
1068
1069
1070
1071
1072 x_intercept = wb_thresh / 2;
1073 if (dtc->wb_dirty < x_intercept) {
1074 if (dtc->wb_dirty > x_intercept / 8)
1075 pos_ratio = div_u64(pos_ratio * x_intercept,
1076 dtc->wb_dirty);
1077 else
1078 pos_ratio *= 8;
1079 }
1080
1081 dtc->pos_ratio = pos_ratio;
1082}
1083
1084static void wb_update_write_bandwidth(struct bdi_writeback *wb,
1085 unsigned long elapsed,
1086 unsigned long written)
1087{
1088 const unsigned long period = roundup_pow_of_two(3 * HZ);
1089 unsigned long avg = wb->avg_write_bandwidth;
1090 unsigned long old = wb->write_bandwidth;
1091 u64 bw;
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103 bw = written - min(written, wb->written_stamp);
1104 bw *= HZ;
1105 if (unlikely(elapsed > period)) {
1106 do_div(bw, elapsed);
1107 avg = bw;
1108 goto out;
1109 }
1110 bw += (u64)wb->write_bandwidth * (period - elapsed);
1111 bw >>= ilog2(period);
1112
1113
1114
1115
1116 if (avg > old && old >= (unsigned long)bw)
1117 avg -= (avg - old) >> 3;
1118
1119 if (avg < old && old <= (unsigned long)bw)
1120 avg += (old - avg) >> 3;
1121
1122out:
1123
1124 avg = max(avg, 1LU);
1125 if (wb_has_dirty_io(wb)) {
1126 long delta = avg - wb->avg_write_bandwidth;
1127 WARN_ON_ONCE(atomic_long_add_return(delta,
1128 &wb->bdi->tot_write_bandwidth) <= 0);
1129 }
1130 wb->write_bandwidth = bw;
1131 wb->avg_write_bandwidth = avg;
1132}
1133
1134static void update_dirty_limit(struct dirty_throttle_control *dtc)
1135{
1136 struct wb_domain *dom = dtc_dom(dtc);
1137 unsigned long thresh = dtc->thresh;
1138 unsigned long limit = dom->dirty_limit;
1139
1140
1141
1142
1143 if (limit < thresh) {
1144 limit = thresh;
1145 goto update;
1146 }
1147
1148
1149
1150
1151
1152
1153 thresh = max(thresh, dtc->dirty);
1154 if (limit > thresh) {
1155 limit -= (limit - thresh) >> 5;
1156 goto update;
1157 }
1158 return;
1159update:
1160 dom->dirty_limit = limit;
1161}
1162
1163static void domain_update_bandwidth(struct dirty_throttle_control *dtc,
1164 unsigned long now)
1165{
1166 struct wb_domain *dom = dtc_dom(dtc);
1167
1168
1169
1170
1171 if (time_before(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL))
1172 return;
1173
1174 spin_lock(&dom->lock);
1175 if (time_after_eq(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL)) {
1176 update_dirty_limit(dtc);
1177 dom->dirty_limit_tstamp = now;
1178 }
1179 spin_unlock(&dom->lock);
1180}
1181
1182
1183
1184
1185
1186
1187
1188static void wb_update_dirty_ratelimit(struct dirty_throttle_control *dtc,
1189 unsigned long dirtied,
1190 unsigned long elapsed)
1191{
1192 struct bdi_writeback *wb = dtc->wb;
1193 unsigned long dirty = dtc->dirty;
1194 unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh);
1195 unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
1196 unsigned long setpoint = (freerun + limit) / 2;
1197 unsigned long write_bw = wb->avg_write_bandwidth;
1198 unsigned long dirty_ratelimit = wb->dirty_ratelimit;
1199 unsigned long dirty_rate;
1200 unsigned long task_ratelimit;
1201 unsigned long balanced_dirty_ratelimit;
1202 unsigned long step;
1203 unsigned long x;
1204 unsigned long shift;
1205
1206
1207
1208
1209
1210 dirty_rate = (dirtied - wb->dirtied_stamp) * HZ / elapsed;
1211
1212
1213
1214
1215 task_ratelimit = (u64)dirty_ratelimit *
1216 dtc->pos_ratio >> RATELIMIT_CALC_SHIFT;
1217 task_ratelimit++;
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249 balanced_dirty_ratelimit = div_u64((u64)task_ratelimit * write_bw,
1250 dirty_rate | 1);
1251
1252
1253
1254 if (unlikely(balanced_dirty_ratelimit > write_bw))
1255 balanced_dirty_ratelimit = write_bw;
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291 step = 0;
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304 if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
1305 dirty = dtc->wb_dirty;
1306 if (dtc->wb_dirty < 8)
1307 setpoint = dtc->wb_dirty + 1;
1308 else
1309 setpoint = (dtc->wb_thresh + dtc->wb_bg_thresh) / 2;
1310 }
1311
1312 if (dirty < setpoint) {
1313 x = min3(wb->balanced_dirty_ratelimit,
1314 balanced_dirty_ratelimit, task_ratelimit);
1315 if (dirty_ratelimit < x)
1316 step = x - dirty_ratelimit;
1317 } else {
1318 x = max3(wb->balanced_dirty_ratelimit,
1319 balanced_dirty_ratelimit, task_ratelimit);
1320 if (dirty_ratelimit > x)
1321 step = dirty_ratelimit - x;
1322 }
1323
1324
1325
1326
1327
1328
1329 shift = dirty_ratelimit / (2 * step + 1);
1330 if (shift < BITS_PER_LONG)
1331 step = DIV_ROUND_UP(step >> shift, 8);
1332 else
1333 step = 0;
1334
1335 if (dirty_ratelimit < balanced_dirty_ratelimit)
1336 dirty_ratelimit += step;
1337 else
1338 dirty_ratelimit -= step;
1339
1340 wb->dirty_ratelimit = max(dirty_ratelimit, 1UL);
1341 wb->balanced_dirty_ratelimit = balanced_dirty_ratelimit;
1342
1343 trace_bdi_dirty_ratelimit(wb, dirty_rate, task_ratelimit);
1344}
1345
1346static void __wb_update_bandwidth(struct dirty_throttle_control *gdtc,
1347 struct dirty_throttle_control *mdtc,
1348 unsigned long start_time,
1349 bool update_ratelimit)
1350{
1351 struct bdi_writeback *wb = gdtc->wb;
1352 unsigned long now = jiffies;
1353 unsigned long elapsed = now - wb->bw_time_stamp;
1354 unsigned long dirtied;
1355 unsigned long written;
1356
1357 lockdep_assert_held(&wb->list_lock);
1358
1359
1360
1361
1362 if (elapsed < BANDWIDTH_INTERVAL)
1363 return;
1364
1365 dirtied = percpu_counter_read(&wb->stat[WB_DIRTIED]);
1366 written = percpu_counter_read(&wb->stat[WB_WRITTEN]);
1367
1368
1369
1370
1371
1372 if (elapsed > HZ && time_before(wb->bw_time_stamp, start_time))
1373 goto snapshot;
1374
1375 if (update_ratelimit) {
1376 domain_update_bandwidth(gdtc, now);
1377 wb_update_dirty_ratelimit(gdtc, dirtied, elapsed);
1378
1379
1380
1381
1382
1383 if (IS_ENABLED(CONFIG_CGROUP_WRITEBACK) && mdtc) {
1384 domain_update_bandwidth(mdtc, now);
1385 wb_update_dirty_ratelimit(mdtc, dirtied, elapsed);
1386 }
1387 }
1388 wb_update_write_bandwidth(wb, elapsed, written);
1389
1390snapshot:
1391 wb->dirtied_stamp = dirtied;
1392 wb->written_stamp = written;
1393 wb->bw_time_stamp = now;
1394}
1395
1396void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time)
1397{
1398 struct dirty_throttle_control gdtc = { GDTC_INIT(wb) };
1399
1400 __wb_update_bandwidth(&gdtc, NULL, start_time, false);
1401}
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411static unsigned long dirty_poll_interval(unsigned long dirty,
1412 unsigned long thresh)
1413{
1414 if (thresh > dirty)
1415 return 1UL << (ilog2(thresh - dirty) >> 1);
1416
1417 return 1;
1418}
1419
1420static unsigned long wb_max_pause(struct bdi_writeback *wb,
1421 unsigned long wb_dirty)
1422{
1423 unsigned long bw = wb->avg_write_bandwidth;
1424 unsigned long t;
1425
1426
1427
1428
1429
1430
1431
1432
1433 t = wb_dirty / (1 + bw / roundup_pow_of_two(1 + HZ / 8));
1434 t++;
1435
1436 return min_t(unsigned long, t, MAX_PAUSE);
1437}
1438
1439static long wb_min_pause(struct bdi_writeback *wb,
1440 long max_pause,
1441 unsigned long task_ratelimit,
1442 unsigned long dirty_ratelimit,
1443 int *nr_dirtied_pause)
1444{
1445 long hi = ilog2(wb->avg_write_bandwidth);
1446 long lo = ilog2(wb->dirty_ratelimit);
1447 long t;
1448 long pause;
1449 int pages;
1450
1451
1452 t = max(1, HZ / 100);
1453
1454
1455
1456
1457
1458
1459
1460 if (hi > lo)
1461 t += (hi - lo) * (10 * HZ) / 1024;
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481 t = min(t, 1 + max_pause / 2);
1482 pages = dirty_ratelimit * t / roundup_pow_of_two(HZ);
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492 if (pages < DIRTY_POLL_THRESH) {
1493 t = max_pause;
1494 pages = dirty_ratelimit * t / roundup_pow_of_two(HZ);
1495 if (pages > DIRTY_POLL_THRESH) {
1496 pages = DIRTY_POLL_THRESH;
1497 t = HZ * DIRTY_POLL_THRESH / dirty_ratelimit;
1498 }
1499 }
1500
1501 pause = HZ * pages / (task_ratelimit + 1);
1502 if (pause > max_pause) {
1503 t = max_pause;
1504 pages = task_ratelimit * t / roundup_pow_of_two(HZ);
1505 }
1506
1507 *nr_dirtied_pause = pages;
1508
1509
1510
1511 return pages >= DIRTY_POLL_THRESH ? 1 + t / 2 : t;
1512}
1513
1514static inline void wb_dirty_limits(struct dirty_throttle_control *dtc)
1515{
1516 struct bdi_writeback *wb = dtc->wb;
1517 unsigned long wb_reclaimable;
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532 dtc->wb_thresh = __wb_calc_thresh(dtc);
1533 dtc->wb_bg_thresh = dtc->thresh ?
1534 div_u64((u64)dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0;
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546 if (dtc->wb_thresh < 2 * wb_stat_error(wb)) {
1547 wb_reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE);
1548 dtc->wb_dirty = wb_reclaimable + wb_stat_sum(wb, WB_WRITEBACK);
1549 } else {
1550 wb_reclaimable = wb_stat(wb, WB_RECLAIMABLE);
1551 dtc->wb_dirty = wb_reclaimable + wb_stat(wb, WB_WRITEBACK);
1552 }
1553}
1554
1555
1556
1557
1558
1559
1560
1561
1562static void balance_dirty_pages(struct address_space *mapping,
1563 struct bdi_writeback *wb,
1564 unsigned long pages_dirtied)
1565{
1566 struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) };
1567 struct dirty_throttle_control mdtc_stor = { MDTC_INIT(wb, &gdtc_stor) };
1568 struct dirty_throttle_control * const gdtc = &gdtc_stor;
1569 struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
1570 &mdtc_stor : NULL;
1571 struct dirty_throttle_control *sdtc;
1572 unsigned long nr_reclaimable;
1573 long period;
1574 long pause;
1575 long max_pause;
1576 long min_pause;
1577 int nr_dirtied_pause;
1578 bool dirty_exceeded = false;
1579 unsigned long task_ratelimit;
1580 unsigned long dirty_ratelimit;
1581 struct backing_dev_info *bdi = wb->bdi;
1582 bool strictlimit = bdi->capabilities & BDI_CAP_STRICTLIMIT;
1583 unsigned long start_time = jiffies;
1584
1585 for (;;) {
1586 unsigned long now = jiffies;
1587 unsigned long dirty, thresh, bg_thresh;
1588 unsigned long m_dirty = 0;
1589 unsigned long m_thresh = 0;
1590 unsigned long m_bg_thresh = 0;
1591
1592
1593
1594
1595
1596
1597
1598 nr_reclaimable = global_node_page_state(NR_FILE_DIRTY) +
1599 global_node_page_state(NR_UNSTABLE_NFS);
1600 gdtc->avail = global_dirtyable_memory();
1601 gdtc->dirty = nr_reclaimable + global_node_page_state(NR_WRITEBACK);
1602
1603 domain_dirty_limits(gdtc);
1604
1605 if (unlikely(strictlimit)) {
1606 wb_dirty_limits(gdtc);
1607
1608 dirty = gdtc->wb_dirty;
1609 thresh = gdtc->wb_thresh;
1610 bg_thresh = gdtc->wb_bg_thresh;
1611 } else {
1612 dirty = gdtc->dirty;
1613 thresh = gdtc->thresh;
1614 bg_thresh = gdtc->bg_thresh;
1615 }
1616
1617 if (mdtc) {
1618 unsigned long filepages, headroom, writeback;
1619
1620
1621
1622
1623
1624 mem_cgroup_wb_stats(wb, &filepages, &headroom,
1625 &mdtc->dirty, &writeback);
1626 mdtc->dirty += writeback;
1627 mdtc_calc_avail(mdtc, filepages, headroom);
1628
1629 domain_dirty_limits(mdtc);
1630
1631 if (unlikely(strictlimit)) {
1632 wb_dirty_limits(mdtc);
1633 m_dirty = mdtc->wb_dirty;
1634 m_thresh = mdtc->wb_thresh;
1635 m_bg_thresh = mdtc->wb_bg_thresh;
1636 } else {
1637 m_dirty = mdtc->dirty;
1638 m_thresh = mdtc->thresh;
1639 m_bg_thresh = mdtc->bg_thresh;
1640 }
1641 }
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655 if (dirty <= dirty_freerun_ceiling(thresh, bg_thresh) &&
1656 (!mdtc ||
1657 m_dirty <= dirty_freerun_ceiling(m_thresh, m_bg_thresh))) {
1658 unsigned long intv = dirty_poll_interval(dirty, thresh);
1659 unsigned long m_intv = ULONG_MAX;
1660
1661 current->dirty_paused_when = now;
1662 current->nr_dirtied = 0;
1663 if (mdtc)
1664 m_intv = dirty_poll_interval(m_dirty, m_thresh);
1665 current->nr_dirtied_pause = min(intv, m_intv);
1666 break;
1667 }
1668
1669 if (unlikely(!writeback_in_progress(wb)))
1670 wb_start_background_writeback(wb);
1671
1672
1673
1674
1675
1676 if (!strictlimit)
1677 wb_dirty_limits(gdtc);
1678
1679 dirty_exceeded = (gdtc->wb_dirty > gdtc->wb_thresh) &&
1680 ((gdtc->dirty > gdtc->thresh) || strictlimit);
1681
1682 wb_position_ratio(gdtc);
1683 sdtc = gdtc;
1684
1685 if (mdtc) {
1686
1687
1688
1689
1690
1691
1692 if (!strictlimit)
1693 wb_dirty_limits(mdtc);
1694
1695 dirty_exceeded |= (mdtc->wb_dirty > mdtc->wb_thresh) &&
1696 ((mdtc->dirty > mdtc->thresh) || strictlimit);
1697
1698 wb_position_ratio(mdtc);
1699 if (mdtc->pos_ratio < gdtc->pos_ratio)
1700 sdtc = mdtc;
1701 }
1702
1703 if (dirty_exceeded && !wb->dirty_exceeded)
1704 wb->dirty_exceeded = 1;
1705
1706 if (time_is_before_jiffies(wb->bw_time_stamp +
1707 BANDWIDTH_INTERVAL)) {
1708 spin_lock(&wb->list_lock);
1709 __wb_update_bandwidth(gdtc, mdtc, start_time, true);
1710 spin_unlock(&wb->list_lock);
1711 }
1712
1713
1714 dirty_ratelimit = wb->dirty_ratelimit;
1715 task_ratelimit = ((u64)dirty_ratelimit * sdtc->pos_ratio) >>
1716 RATELIMIT_CALC_SHIFT;
1717 max_pause = wb_max_pause(wb, sdtc->wb_dirty);
1718 min_pause = wb_min_pause(wb, max_pause,
1719 task_ratelimit, dirty_ratelimit,
1720 &nr_dirtied_pause);
1721
1722 if (unlikely(task_ratelimit == 0)) {
1723 period = max_pause;
1724 pause = max_pause;
1725 goto pause;
1726 }
1727 period = HZ * pages_dirtied / task_ratelimit;
1728 pause = period;
1729 if (current->dirty_paused_when)
1730 pause -= now - current->dirty_paused_when;
1731
1732
1733
1734
1735
1736
1737
1738 if (pause < min_pause) {
1739 trace_balance_dirty_pages(wb,
1740 sdtc->thresh,
1741 sdtc->bg_thresh,
1742 sdtc->dirty,
1743 sdtc->wb_thresh,
1744 sdtc->wb_dirty,
1745 dirty_ratelimit,
1746 task_ratelimit,
1747 pages_dirtied,
1748 period,
1749 min(pause, 0L),
1750 start_time);
1751 if (pause < -HZ) {
1752 current->dirty_paused_when = now;
1753 current->nr_dirtied = 0;
1754 } else if (period) {
1755 current->dirty_paused_when += period;
1756 current->nr_dirtied = 0;
1757 } else if (current->nr_dirtied_pause <= pages_dirtied)
1758 current->nr_dirtied_pause += pages_dirtied;
1759 break;
1760 }
1761 if (unlikely(pause > max_pause)) {
1762
1763 now += min(pause - max_pause, max_pause);
1764 pause = max_pause;
1765 }
1766
1767pause:
1768 trace_balance_dirty_pages(wb,
1769 sdtc->thresh,
1770 sdtc->bg_thresh,
1771 sdtc->dirty,
1772 sdtc->wb_thresh,
1773 sdtc->wb_dirty,
1774 dirty_ratelimit,
1775 task_ratelimit,
1776 pages_dirtied,
1777 period,
1778 pause,
1779 start_time);
1780 __set_current_state(TASK_KILLABLE);
1781 io_schedule_timeout(pause);
1782
1783 current->dirty_paused_when = now + pause;
1784 current->nr_dirtied = 0;
1785 current->nr_dirtied_pause = nr_dirtied_pause;
1786
1787
1788
1789
1790
1791 if (task_ratelimit)
1792 break;
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804 if (sdtc->wb_dirty <= wb_stat_error(wb))
1805 break;
1806
1807 if (fatal_signal_pending(current))
1808 break;
1809 }
1810
1811 if (!dirty_exceeded && wb->dirty_exceeded)
1812 wb->dirty_exceeded = 0;
1813
1814 if (writeback_in_progress(wb))
1815 return;
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825 if (laptop_mode)
1826 return;
1827
1828 if (nr_reclaimable > gdtc->bg_thresh)
1829 wb_start_background_writeback(wb);
1830}
1831
1832static DEFINE_PER_CPU(int, bdp_ratelimits);
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848DEFINE_PER_CPU(int, dirty_throttle_leaks) = 0;
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863void balance_dirty_pages_ratelimited(struct address_space *mapping)
1864{
1865 struct inode *inode = mapping->host;
1866 struct backing_dev_info *bdi = inode_to_bdi(inode);
1867 struct bdi_writeback *wb = NULL;
1868 int ratelimit;
1869 int *p;
1870
1871 if (!bdi_cap_account_dirty(bdi))
1872 return;
1873
1874 if (inode_cgwb_enabled(inode))
1875 wb = wb_get_create_current(bdi, GFP_KERNEL);
1876 if (!wb)
1877 wb = &bdi->wb;
1878
1879 ratelimit = current->nr_dirtied_pause;
1880 if (wb->dirty_exceeded)
1881 ratelimit = min(ratelimit, 32 >> (PAGE_SHIFT - 10));
1882
1883 preempt_disable();
1884
1885
1886
1887
1888
1889
1890 p = this_cpu_ptr(&bdp_ratelimits);
1891 if (unlikely(current->nr_dirtied >= ratelimit))
1892 *p = 0;
1893 else if (unlikely(*p >= ratelimit_pages)) {
1894 *p = 0;
1895 ratelimit = 0;
1896 }
1897
1898
1899
1900
1901
1902 p = this_cpu_ptr(&dirty_throttle_leaks);
1903 if (*p > 0 && current->nr_dirtied < ratelimit) {
1904 unsigned long nr_pages_dirtied;
1905 nr_pages_dirtied = min(*p, ratelimit - current->nr_dirtied);
1906 *p -= nr_pages_dirtied;
1907 current->nr_dirtied += nr_pages_dirtied;
1908 }
1909 preempt_enable();
1910
1911 if (unlikely(current->nr_dirtied >= ratelimit))
1912 balance_dirty_pages(mapping, wb, current->nr_dirtied);
1913
1914 wb_put(wb);
1915}
1916EXPORT_SYMBOL(balance_dirty_pages_ratelimited);
1917
1918
1919
1920
1921
1922
1923
1924
1925bool wb_over_bg_thresh(struct bdi_writeback *wb)
1926{
1927 struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) };
1928 struct dirty_throttle_control mdtc_stor = { MDTC_INIT(wb, &gdtc_stor) };
1929 struct dirty_throttle_control * const gdtc = &gdtc_stor;
1930 struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
1931 &mdtc_stor : NULL;
1932
1933
1934
1935
1936
1937 gdtc->avail = global_dirtyable_memory();
1938 gdtc->dirty = global_node_page_state(NR_FILE_DIRTY) +
1939 global_node_page_state(NR_UNSTABLE_NFS);
1940 domain_dirty_limits(gdtc);
1941
1942 if (gdtc->dirty > gdtc->bg_thresh)
1943 return true;
1944
1945 if (wb_stat(wb, WB_RECLAIMABLE) >
1946 wb_calc_thresh(gdtc->wb, gdtc->bg_thresh))
1947 return true;
1948
1949 if (mdtc) {
1950 unsigned long filepages, headroom, writeback;
1951
1952 mem_cgroup_wb_stats(wb, &filepages, &headroom, &mdtc->dirty,
1953 &writeback);
1954 mdtc_calc_avail(mdtc, filepages, headroom);
1955 domain_dirty_limits(mdtc);
1956
1957 if (mdtc->dirty > mdtc->bg_thresh)
1958 return true;
1959
1960 if (wb_stat(wb, WB_RECLAIMABLE) >
1961 wb_calc_thresh(mdtc->wb, mdtc->bg_thresh))
1962 return true;
1963 }
1964
1965 return false;
1966}
1967
1968
1969
1970
1971int dirty_writeback_centisecs_handler(struct ctl_table *table, int write,
1972 void __user *buffer, size_t *length, loff_t *ppos)
1973{
1974 proc_dointvec(table, write, buffer, length, ppos);
1975 return 0;
1976}
1977
1978#ifdef CONFIG_BLOCK
1979void laptop_mode_timer_fn(unsigned long data)
1980{
1981 struct request_queue *q = (struct request_queue *)data;
1982 int nr_pages = global_node_page_state(NR_FILE_DIRTY) +
1983 global_node_page_state(NR_UNSTABLE_NFS);
1984 struct bdi_writeback *wb;
1985
1986
1987
1988
1989
1990 if (!bdi_has_dirty_io(&q->backing_dev_info))
1991 return;
1992
1993 rcu_read_lock();
1994 list_for_each_entry_rcu(wb, &q->backing_dev_info.wb_list, bdi_node)
1995 if (wb_has_dirty_io(wb))
1996 wb_start_writeback(wb, nr_pages, true,
1997 WB_REASON_LAPTOP_TIMER);
1998 rcu_read_unlock();
1999}
2000
2001
2002
2003
2004
2005
2006void laptop_io_completion(struct backing_dev_info *info)
2007{
2008 mod_timer(&info->laptop_mode_wb_timer, jiffies + laptop_mode);
2009}
2010
2011
2012
2013
2014
2015
2016void laptop_sync_completion(void)
2017{
2018 struct backing_dev_info *bdi;
2019
2020 rcu_read_lock();
2021
2022 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list)
2023 del_timer(&bdi->laptop_mode_wb_timer);
2024
2025 rcu_read_unlock();
2026}
2027#endif
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040void writeback_set_ratelimit(void)
2041{
2042 struct wb_domain *dom = &global_wb_domain;
2043 unsigned long background_thresh;
2044 unsigned long dirty_thresh;
2045
2046 global_dirty_limits(&background_thresh, &dirty_thresh);
2047 dom->dirty_limit = dirty_thresh;
2048 ratelimit_pages = dirty_thresh / (num_online_cpus() * 32);
2049 if (ratelimit_pages < 16)
2050 ratelimit_pages = 16;
2051}
2052
2053static int page_writeback_cpu_online(unsigned int cpu)
2054{
2055 writeback_set_ratelimit();
2056 return 0;
2057}
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077void __init page_writeback_init(void)
2078{
2079 BUG_ON(wb_domain_init(&global_wb_domain, GFP_KERNEL));
2080
2081 cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mm/writeback:online",
2082 page_writeback_cpu_online, NULL);
2083 cpuhp_setup_state(CPUHP_MM_WRITEBACK_DEAD, "mm/writeback:dead", NULL,
2084 page_writeback_cpu_online);
2085}
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104void tag_pages_for_writeback(struct address_space *mapping,
2105 pgoff_t start, pgoff_t end)
2106{
2107#define WRITEBACK_TAG_BATCH 4096
2108 unsigned long tagged;
2109
2110 do {
2111 spin_lock_irq(&mapping->tree_lock);
2112 tagged = radix_tree_range_tag_if_tagged(&mapping->page_tree,
2113 &start, end, WRITEBACK_TAG_BATCH,
2114 PAGECACHE_TAG_DIRTY, PAGECACHE_TAG_TOWRITE);
2115 spin_unlock_irq(&mapping->tree_lock);
2116 WARN_ON_ONCE(tagged > WRITEBACK_TAG_BATCH);
2117 cond_resched();
2118
2119 } while (tagged >= WRITEBACK_TAG_BATCH && start);
2120}
2121EXPORT_SYMBOL(tag_pages_for_writeback);
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145int write_cache_pages(struct address_space *mapping,
2146 struct writeback_control *wbc, writepage_t writepage,
2147 void *data)
2148{
2149 int ret = 0;
2150 int done = 0;
2151 struct pagevec pvec;
2152 int nr_pages;
2153 pgoff_t uninitialized_var(writeback_index);
2154 pgoff_t index;
2155 pgoff_t end;
2156 pgoff_t done_index;
2157 int cycled;
2158 int range_whole = 0;
2159 int tag;
2160
2161 pagevec_init(&pvec, 0);
2162 if (wbc->range_cyclic) {
2163 writeback_index = mapping->writeback_index;
2164 index = writeback_index;
2165 if (index == 0)
2166 cycled = 1;
2167 else
2168 cycled = 0;
2169 end = -1;
2170 } else {
2171 index = wbc->range_start >> PAGE_SHIFT;
2172 end = wbc->range_end >> PAGE_SHIFT;
2173 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2174 range_whole = 1;
2175 cycled = 1;
2176 }
2177 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2178 tag = PAGECACHE_TAG_TOWRITE;
2179 else
2180 tag = PAGECACHE_TAG_DIRTY;
2181retry:
2182 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2183 tag_pages_for_writeback(mapping, index, end);
2184 done_index = index;
2185 while (!done && (index <= end)) {
2186 int i;
2187
2188 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
2189 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
2190 if (nr_pages == 0)
2191 break;
2192
2193 for (i = 0; i < nr_pages; i++) {
2194 struct page *page = pvec.pages[i];
2195
2196
2197
2198
2199
2200
2201
2202
2203 if (page->index > end) {
2204
2205
2206
2207
2208 done = 1;
2209 break;
2210 }
2211
2212 done_index = page->index;
2213
2214 lock_page(page);
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224 if (unlikely(page->mapping != mapping)) {
2225continue_unlock:
2226 unlock_page(page);
2227 continue;
2228 }
2229
2230 if (!PageDirty(page)) {
2231
2232 goto continue_unlock;
2233 }
2234
2235 if (PageWriteback(page)) {
2236 if (wbc->sync_mode != WB_SYNC_NONE)
2237 wait_on_page_writeback(page);
2238 else
2239 goto continue_unlock;
2240 }
2241
2242 BUG_ON(PageWriteback(page));
2243 if (!clear_page_dirty_for_io(page))
2244 goto continue_unlock;
2245
2246 trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
2247 ret = (*writepage)(page, wbc, data);
2248 if (unlikely(ret)) {
2249 if (ret == AOP_WRITEPAGE_ACTIVATE) {
2250 unlock_page(page);
2251 ret = 0;
2252 } else {
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262 done_index = page->index + 1;
2263 done = 1;
2264 break;
2265 }
2266 }
2267
2268
2269
2270
2271
2272
2273
2274 if (--wbc->nr_to_write <= 0 &&
2275 wbc->sync_mode == WB_SYNC_NONE) {
2276 done = 1;
2277 break;
2278 }
2279 }
2280 pagevec_release(&pvec);
2281 cond_resched();
2282 }
2283 if (!cycled && !done) {
2284
2285
2286
2287
2288
2289 cycled = 1;
2290 index = 0;
2291 end = writeback_index - 1;
2292 goto retry;
2293 }
2294 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2295 mapping->writeback_index = done_index;
2296
2297 return ret;
2298}
2299EXPORT_SYMBOL(write_cache_pages);
2300
2301
2302
2303
2304
2305static int __writepage(struct page *page, struct writeback_control *wbc,
2306 void *data)
2307{
2308 struct address_space *mapping = data;
2309 int ret = mapping->a_ops->writepage(page, wbc);
2310 mapping_set_error(mapping, ret);
2311 return ret;
2312}
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322int generic_writepages(struct address_space *mapping,
2323 struct writeback_control *wbc)
2324{
2325 struct blk_plug plug;
2326 int ret;
2327
2328
2329 if (!mapping->a_ops->writepage)
2330 return 0;
2331
2332 blk_start_plug(&plug);
2333 ret = write_cache_pages(mapping, wbc, __writepage, mapping);
2334 blk_finish_plug(&plug);
2335 return ret;
2336}
2337
2338EXPORT_SYMBOL(generic_writepages);
2339
2340int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
2341{
2342 int ret;
2343
2344 if (wbc->nr_to_write <= 0)
2345 return 0;
2346 if (mapping->a_ops->writepages)
2347 ret = mapping->a_ops->writepages(mapping, wbc);
2348 else
2349 ret = generic_writepages(mapping, wbc);
2350 return ret;
2351}
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362int write_one_page(struct page *page, int wait)
2363{
2364 struct address_space *mapping = page->mapping;
2365 int ret = 0;
2366 struct writeback_control wbc = {
2367 .sync_mode = WB_SYNC_ALL,
2368 .nr_to_write = 1,
2369 };
2370
2371 BUG_ON(!PageLocked(page));
2372
2373 if (wait)
2374 wait_on_page_writeback(page);
2375
2376 if (clear_page_dirty_for_io(page)) {
2377 get_page(page);
2378 ret = mapping->a_ops->writepage(page, &wbc);
2379 if (ret == 0 && wait) {
2380 wait_on_page_writeback(page);
2381 if (PageError(page))
2382 ret = -EIO;
2383 }
2384 put_page(page);
2385 } else {
2386 unlock_page(page);
2387 }
2388 return ret;
2389}
2390EXPORT_SYMBOL(write_one_page);
2391
2392
2393
2394
2395int __set_page_dirty_no_writeback(struct page *page)
2396{
2397 if (!PageDirty(page))
2398 return !TestSetPageDirty(page);
2399 return 0;
2400}
2401
2402
2403
2404
2405
2406
2407
2408
2409void account_page_dirtied(struct page *page, struct address_space *mapping)
2410{
2411 struct inode *inode = mapping->host;
2412
2413 trace_writeback_dirty_page(page, mapping);
2414
2415 if (mapping_cap_account_dirty(mapping)) {
2416 struct bdi_writeback *wb;
2417
2418 inode_attach_wb(inode, page);
2419 wb = inode_to_wb(inode);
2420
2421 mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_DIRTY);
2422 __inc_node_page_state(page, NR_FILE_DIRTY);
2423 __inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2424 __inc_node_page_state(page, NR_DIRTIED);
2425 __inc_wb_stat(wb, WB_RECLAIMABLE);
2426 __inc_wb_stat(wb, WB_DIRTIED);
2427 task_io_account_write(PAGE_SIZE);
2428 current->nr_dirtied++;
2429 this_cpu_inc(bdp_ratelimits);
2430 }
2431}
2432EXPORT_SYMBOL(account_page_dirtied);
2433
2434
2435
2436
2437
2438
2439void account_page_cleaned(struct page *page, struct address_space *mapping,
2440 struct bdi_writeback *wb)
2441{
2442 if (mapping_cap_account_dirty(mapping)) {
2443 mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY);
2444 dec_node_page_state(page, NR_FILE_DIRTY);
2445 dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2446 dec_wb_stat(wb, WB_RECLAIMABLE);
2447 task_io_account_cancelled_write(PAGE_SIZE);
2448 }
2449}
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463int __set_page_dirty_nobuffers(struct page *page)
2464{
2465 lock_page_memcg(page);
2466 if (!TestSetPageDirty(page)) {
2467 struct address_space *mapping = page_mapping(page);
2468 unsigned long flags;
2469
2470 if (!mapping) {
2471 unlock_page_memcg(page);
2472 return 1;
2473 }
2474
2475 spin_lock_irqsave(&mapping->tree_lock, flags);
2476 BUG_ON(page_mapping(page) != mapping);
2477 WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
2478 account_page_dirtied(page, mapping);
2479 radix_tree_tag_set(&mapping->page_tree, page_index(page),
2480 PAGECACHE_TAG_DIRTY);
2481 spin_unlock_irqrestore(&mapping->tree_lock, flags);
2482 unlock_page_memcg(page);
2483
2484 if (mapping->host) {
2485
2486 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
2487 }
2488 return 1;
2489 }
2490 unlock_page_memcg(page);
2491 return 0;
2492}
2493EXPORT_SYMBOL(__set_page_dirty_nobuffers);
2494
2495
2496
2497
2498
2499
2500
2501
2502void account_page_redirty(struct page *page)
2503{
2504 struct address_space *mapping = page->mapping;
2505
2506 if (mapping && mapping_cap_account_dirty(mapping)) {
2507 struct inode *inode = mapping->host;
2508 struct bdi_writeback *wb;
2509 bool locked;
2510
2511 wb = unlocked_inode_to_wb_begin(inode, &locked);
2512 current->nr_dirtied--;
2513 dec_node_page_state(page, NR_DIRTIED);
2514 dec_wb_stat(wb, WB_DIRTIED);
2515 unlocked_inode_to_wb_end(inode, locked);
2516 }
2517}
2518EXPORT_SYMBOL(account_page_redirty);
2519
2520
2521
2522
2523
2524
2525int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page)
2526{
2527 int ret;
2528
2529 wbc->pages_skipped++;
2530 ret = __set_page_dirty_nobuffers(page);
2531 account_page_redirty(page);
2532 return ret;
2533}
2534EXPORT_SYMBOL(redirty_page_for_writepage);
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547int set_page_dirty(struct page *page)
2548{
2549 struct address_space *mapping = page_mapping(page);
2550
2551 page = compound_head(page);
2552 if (likely(mapping)) {
2553 int (*spd)(struct page *) = mapping->a_ops->set_page_dirty;
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564 if (PageReclaim(page))
2565 ClearPageReclaim(page);
2566#ifdef CONFIG_BLOCK
2567 if (!spd)
2568 spd = __set_page_dirty_buffers;
2569#endif
2570 return (*spd)(page);
2571 }
2572 if (!PageDirty(page)) {
2573 if (!TestSetPageDirty(page))
2574 return 1;
2575 }
2576 return 0;
2577}
2578EXPORT_SYMBOL(set_page_dirty);
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590int set_page_dirty_lock(struct page *page)
2591{
2592 int ret;
2593
2594 lock_page(page);
2595 ret = set_page_dirty(page);
2596 unlock_page(page);
2597 return ret;
2598}
2599EXPORT_SYMBOL(set_page_dirty_lock);
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614void cancel_dirty_page(struct page *page)
2615{
2616 struct address_space *mapping = page_mapping(page);
2617
2618 if (mapping_cap_account_dirty(mapping)) {
2619 struct inode *inode = mapping->host;
2620 struct bdi_writeback *wb;
2621 bool locked;
2622
2623 lock_page_memcg(page);
2624 wb = unlocked_inode_to_wb_begin(inode, &locked);
2625
2626 if (TestClearPageDirty(page))
2627 account_page_cleaned(page, mapping, wb);
2628
2629 unlocked_inode_to_wb_end(inode, locked);
2630 unlock_page_memcg(page);
2631 } else {
2632 ClearPageDirty(page);
2633 }
2634}
2635EXPORT_SYMBOL(cancel_dirty_page);
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651int clear_page_dirty_for_io(struct page *page)
2652{
2653 struct address_space *mapping = page_mapping(page);
2654 int ret = 0;
2655
2656 BUG_ON(!PageLocked(page));
2657
2658 if (mapping && mapping_cap_account_dirty(mapping)) {
2659 struct inode *inode = mapping->host;
2660 struct bdi_writeback *wb;
2661 bool locked;
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688 if (page_mkclean(page))
2689 set_page_dirty(page);
2690
2691
2692
2693
2694
2695
2696
2697
2698 wb = unlocked_inode_to_wb_begin(inode, &locked);
2699 if (TestClearPageDirty(page)) {
2700 mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY);
2701 dec_node_page_state(page, NR_FILE_DIRTY);
2702 dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2703 dec_wb_stat(wb, WB_RECLAIMABLE);
2704 ret = 1;
2705 }
2706 unlocked_inode_to_wb_end(inode, locked);
2707 return ret;
2708 }
2709 return TestClearPageDirty(page);
2710}
2711EXPORT_SYMBOL(clear_page_dirty_for_io);
2712
2713int test_clear_page_writeback(struct page *page)
2714{
2715 struct address_space *mapping = page_mapping(page);
2716 int ret;
2717
2718 lock_page_memcg(page);
2719 if (mapping && mapping_use_writeback_tags(mapping)) {
2720 struct inode *inode = mapping->host;
2721 struct backing_dev_info *bdi = inode_to_bdi(inode);
2722 unsigned long flags;
2723
2724 spin_lock_irqsave(&mapping->tree_lock, flags);
2725 ret = TestClearPageWriteback(page);
2726 if (ret) {
2727 radix_tree_tag_clear(&mapping->page_tree,
2728 page_index(page),
2729 PAGECACHE_TAG_WRITEBACK);
2730 if (bdi_cap_account_writeback(bdi)) {
2731 struct bdi_writeback *wb = inode_to_wb(inode);
2732
2733 __dec_wb_stat(wb, WB_WRITEBACK);
2734 __wb_writeout_inc(wb);
2735 }
2736 }
2737
2738 if (mapping->host && !mapping_tagged(mapping,
2739 PAGECACHE_TAG_WRITEBACK))
2740 sb_clear_inode_writeback(mapping->host);
2741
2742 spin_unlock_irqrestore(&mapping->tree_lock, flags);
2743 } else {
2744 ret = TestClearPageWriteback(page);
2745 }
2746 if (ret) {
2747 mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
2748 dec_node_page_state(page, NR_WRITEBACK);
2749 dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2750 inc_node_page_state(page, NR_WRITTEN);
2751 }
2752 unlock_page_memcg(page);
2753 return ret;
2754}
2755
2756int __test_set_page_writeback(struct page *page, bool keep_write)
2757{
2758 struct address_space *mapping = page_mapping(page);
2759 int ret;
2760
2761 lock_page_memcg(page);
2762 if (mapping && mapping_use_writeback_tags(mapping)) {
2763 struct inode *inode = mapping->host;
2764 struct backing_dev_info *bdi = inode_to_bdi(inode);
2765 unsigned long flags;
2766
2767 spin_lock_irqsave(&mapping->tree_lock, flags);
2768 ret = TestSetPageWriteback(page);
2769 if (!ret) {
2770 bool on_wblist;
2771
2772 on_wblist = mapping_tagged(mapping,
2773 PAGECACHE_TAG_WRITEBACK);
2774
2775 radix_tree_tag_set(&mapping->page_tree,
2776 page_index(page),
2777 PAGECACHE_TAG_WRITEBACK);
2778 if (bdi_cap_account_writeback(bdi))
2779 __inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
2780
2781
2782
2783
2784
2785
2786 if (mapping->host && !on_wblist)
2787 sb_mark_inode_writeback(mapping->host);
2788 }
2789 if (!PageDirty(page))
2790 radix_tree_tag_clear(&mapping->page_tree,
2791 page_index(page),
2792 PAGECACHE_TAG_DIRTY);
2793 if (!keep_write)
2794 radix_tree_tag_clear(&mapping->page_tree,
2795 page_index(page),
2796 PAGECACHE_TAG_TOWRITE);
2797 spin_unlock_irqrestore(&mapping->tree_lock, flags);
2798 } else {
2799 ret = TestSetPageWriteback(page);
2800 }
2801 if (!ret) {
2802 mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
2803 inc_node_page_state(page, NR_WRITEBACK);
2804 inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2805 }
2806 unlock_page_memcg(page);
2807 return ret;
2808
2809}
2810EXPORT_SYMBOL(__test_set_page_writeback);
2811
2812
2813
2814
2815
2816int mapping_tagged(struct address_space *mapping, int tag)
2817{
2818 return radix_tree_tagged(&mapping->page_tree, tag);
2819}
2820EXPORT_SYMBOL(mapping_tagged);
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830void wait_for_stable_page(struct page *page)
2831{
2832 if (bdi_cap_stable_pages_required(inode_to_bdi(page->mapping->host)))
2833 wait_on_page_writeback(page);
2834}
2835EXPORT_SYMBOL_GPL(wait_for_stable_page);
2836