1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <linux/kernel.h>
15#include <linux/export.h>
16#include <linux/spinlock.h>
17#include <linux/fs.h>
18#include <linux/mm.h>
19#include <linux/swap.h>
20#include <linux/slab.h>
21#include <linux/pagemap.h>
22#include <linux/writeback.h>
23#include <linux/init.h>
24#include <linux/backing-dev.h>
25#include <linux/task_io_accounting_ops.h>
26#include <linux/blkdev.h>
27#include <linux/mpage.h>
28#include <linux/rmap.h>
29#include <linux/percpu.h>
30#include <linux/notifier.h>
31#include <linux/smp.h>
32#include <linux/sysctl.h>
33#include <linux/cpu.h>
34#include <linux/syscalls.h>
35#include <linux/buffer_head.h>
36#include <linux/pagevec.h>
37#include <linux/timer.h>
38#include <linux/sched/rt.h>
39#include <linux/mm_inline.h>
40#include <trace/events/writeback.h>
41
42#include "internal.h"
43
44
45
46
47#define MAX_PAUSE max(HZ/5, 1)
48
49
50
51
52
53#define DIRTY_POLL_THRESH (128 >> (PAGE_SHIFT - 10))
54
55
56
57
58#define BANDWIDTH_INTERVAL max(HZ/5, 1)
59
60#define RATELIMIT_CALC_SHIFT 10
61
62
63
64
65
66static long ratelimit_pages = 32;
67
68
69
70
71
72
73int dirty_background_ratio = 10;
74
75
76
77
78
79unsigned long dirty_background_bytes;
80
81
82
83
84
85int vm_highmem_is_dirtyable;
86
87
88
89
90int vm_dirty_ratio = 20;
91
92
93
94
95
96unsigned long vm_dirty_bytes;
97
98
99
100
101unsigned int dirty_writeback_interval = 5 * 100;
102
103EXPORT_SYMBOL_GPL(dirty_writeback_interval);
104
105
106
107
108unsigned int dirty_expire_interval = 30 * 100;
109
110
111
112
113int block_dump;
114
115
116
117
118
119int laptop_mode;
120
121EXPORT_SYMBOL(laptop_mode);
122
123
124
125struct wb_domain global_wb_domain;
126
127
128struct dirty_throttle_control {
129#ifdef CONFIG_CGROUP_WRITEBACK
130 struct wb_domain *dom;
131 struct dirty_throttle_control *gdtc;
132#endif
133 struct bdi_writeback *wb;
134 struct fprop_local_percpu *wb_completions;
135
136 unsigned long avail;
137 unsigned long dirty;
138 unsigned long thresh;
139 unsigned long bg_thresh;
140
141 unsigned long wb_dirty;
142 unsigned long wb_thresh;
143 unsigned long wb_bg_thresh;
144
145 unsigned long pos_ratio;
146};
147
148
149
150
151
152
153#define VM_COMPLETIONS_PERIOD_LEN (3*HZ)
154
155#ifdef CONFIG_CGROUP_WRITEBACK
156
157#define GDTC_INIT(__wb) .wb = (__wb), \
158 .dom = &global_wb_domain, \
159 .wb_completions = &(__wb)->completions
160
161#define GDTC_INIT_NO_WB .dom = &global_wb_domain
162
163#define MDTC_INIT(__wb, __gdtc) .wb = (__wb), \
164 .dom = mem_cgroup_wb_domain(__wb), \
165 .wb_completions = &(__wb)->memcg_completions, \
166 .gdtc = __gdtc
167
168static bool mdtc_valid(struct dirty_throttle_control *dtc)
169{
170 return dtc->dom;
171}
172
173static struct wb_domain *dtc_dom(struct dirty_throttle_control *dtc)
174{
175 return dtc->dom;
176}
177
178static struct dirty_throttle_control *mdtc_gdtc(struct dirty_throttle_control *mdtc)
179{
180 return mdtc->gdtc;
181}
182
183static struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb)
184{
185 return &wb->memcg_completions;
186}
187
188static void wb_min_max_ratio(struct bdi_writeback *wb,
189 unsigned long *minp, unsigned long *maxp)
190{
191 unsigned long this_bw = wb->avg_write_bandwidth;
192 unsigned long tot_bw = atomic_long_read(&wb->bdi->tot_write_bandwidth);
193 unsigned long long min = wb->bdi->min_ratio;
194 unsigned long long max = wb->bdi->max_ratio;
195
196
197
198
199
200 if (this_bw < tot_bw) {
201 if (min) {
202 min *= this_bw;
203 do_div(min, tot_bw);
204 }
205 if (max < 100) {
206 max *= this_bw;
207 do_div(max, tot_bw);
208 }
209 }
210
211 *minp = min;
212 *maxp = max;
213}
214
215#else
216
217#define GDTC_INIT(__wb) .wb = (__wb), \
218 .wb_completions = &(__wb)->completions
219#define GDTC_INIT_NO_WB
220#define MDTC_INIT(__wb, __gdtc)
221
222static bool mdtc_valid(struct dirty_throttle_control *dtc)
223{
224 return false;
225}
226
227static struct wb_domain *dtc_dom(struct dirty_throttle_control *dtc)
228{
229 return &global_wb_domain;
230}
231
232static struct dirty_throttle_control *mdtc_gdtc(struct dirty_throttle_control *mdtc)
233{
234 return NULL;
235}
236
237static struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb)
238{
239 return NULL;
240}
241
242static void wb_min_max_ratio(struct bdi_writeback *wb,
243 unsigned long *minp, unsigned long *maxp)
244{
245 *minp = wb->bdi->min_ratio;
246 *maxp = wb->bdi->max_ratio;
247}
248
249#endif
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276static unsigned long zone_dirtyable_memory(struct zone *zone)
277{
278 unsigned long nr_pages;
279
280 nr_pages = zone_page_state(zone, NR_FREE_PAGES);
281
282
283
284
285
286 nr_pages -= min(nr_pages, zone->totalreserve_pages);
287
288 nr_pages += zone_page_state(zone, NR_INACTIVE_FILE);
289 nr_pages += zone_page_state(zone, NR_ACTIVE_FILE);
290
291 return nr_pages;
292}
293
294static unsigned long highmem_dirtyable_memory(unsigned long total)
295{
296#ifdef CONFIG_HIGHMEM
297 int node;
298 unsigned long x = 0;
299
300 for_each_node_state(node, N_HIGH_MEMORY) {
301 struct zone *z = &NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
302
303 x += zone_dirtyable_memory(z);
304 }
305
306
307
308
309
310
311
312
313
314 if ((long)x < 0)
315 x = 0;
316
317
318
319
320
321
322
323 return min(x, total);
324#else
325 return 0;
326#endif
327}
328
329
330
331
332
333
334
335static unsigned long global_dirtyable_memory(void)
336{
337 unsigned long x;
338
339 x = global_page_state(NR_FREE_PAGES);
340
341
342
343
344
345 x -= min(x, totalreserve_pages);
346
347 x += global_page_state(NR_INACTIVE_FILE);
348 x += global_page_state(NR_ACTIVE_FILE);
349
350 if (!vm_highmem_is_dirtyable)
351 x -= highmem_dirtyable_memory(x);
352
353 return x + 1;
354}
355
356
357
358
359
360
361
362
363
364
365
366static void domain_dirty_limits(struct dirty_throttle_control *dtc)
367{
368 const unsigned long available_memory = dtc->avail;
369 struct dirty_throttle_control *gdtc = mdtc_gdtc(dtc);
370 unsigned long bytes = vm_dirty_bytes;
371 unsigned long bg_bytes = dirty_background_bytes;
372 unsigned long ratio = vm_dirty_ratio;
373 unsigned long bg_ratio = dirty_background_ratio;
374 unsigned long thresh;
375 unsigned long bg_thresh;
376 struct task_struct *tsk;
377
378
379 if (gdtc) {
380 unsigned long global_avail = gdtc->avail;
381
382
383
384
385
386
387 if (bytes)
388 ratio = min(DIV_ROUND_UP(bytes, PAGE_SIZE) * 100 /
389 global_avail, 100UL);
390 if (bg_bytes)
391 bg_ratio = min(DIV_ROUND_UP(bg_bytes, PAGE_SIZE) * 100 /
392 global_avail, 100UL);
393 bytes = bg_bytes = 0;
394 }
395
396 if (bytes)
397 thresh = DIV_ROUND_UP(bytes, PAGE_SIZE);
398 else
399 thresh = (ratio * available_memory) / 100;
400
401 if (bg_bytes)
402 bg_thresh = DIV_ROUND_UP(bg_bytes, PAGE_SIZE);
403 else
404 bg_thresh = (bg_ratio * available_memory) / 100;
405
406 if (bg_thresh >= thresh)
407 bg_thresh = thresh / 2;
408 tsk = current;
409 if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) {
410 bg_thresh += bg_thresh / 4;
411 thresh += thresh / 4;
412 }
413 dtc->thresh = thresh;
414 dtc->bg_thresh = bg_thresh;
415
416
417 if (!gdtc)
418 trace_global_dirty_state(bg_thresh, thresh);
419}
420
421
422
423
424
425
426
427
428
429void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
430{
431 struct dirty_throttle_control gdtc = { GDTC_INIT_NO_WB };
432
433 gdtc.avail = global_dirtyable_memory();
434 domain_dirty_limits(&gdtc);
435
436 *pbackground = gdtc.bg_thresh;
437 *pdirty = gdtc.thresh;
438}
439
440
441
442
443
444
445
446
447static unsigned long zone_dirty_limit(struct zone *zone)
448{
449 unsigned long zone_memory = zone_dirtyable_memory(zone);
450 struct task_struct *tsk = current;
451 unsigned long dirty;
452
453 if (vm_dirty_bytes)
454 dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE) *
455 zone_memory / global_dirtyable_memory();
456 else
457 dirty = vm_dirty_ratio * zone_memory / 100;
458
459 if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk))
460 dirty += dirty / 4;
461
462 return dirty;
463}
464
465
466
467
468
469
470
471
472bool zone_dirty_ok(struct zone *zone)
473{
474 unsigned long limit = zone_dirty_limit(zone);
475
476 return zone_page_state(zone, NR_FILE_DIRTY) +
477 zone_page_state(zone, NR_UNSTABLE_NFS) +
478 zone_page_state(zone, NR_WRITEBACK) <= limit;
479}
480
481int dirty_background_ratio_handler(struct ctl_table *table, int write,
482 void __user *buffer, size_t *lenp,
483 loff_t *ppos)
484{
485 int ret;
486
487 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
488 if (ret == 0 && write)
489 dirty_background_bytes = 0;
490 return ret;
491}
492
493int dirty_background_bytes_handler(struct ctl_table *table, int write,
494 void __user *buffer, size_t *lenp,
495 loff_t *ppos)
496{
497 int ret;
498
499 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
500 if (ret == 0 && write)
501 dirty_background_ratio = 0;
502 return ret;
503}
504
505int dirty_ratio_handler(struct ctl_table *table, int write,
506 void __user *buffer, size_t *lenp,
507 loff_t *ppos)
508{
509 int old_ratio = vm_dirty_ratio;
510 int ret;
511
512 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
513 if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
514 writeback_set_ratelimit();
515 vm_dirty_bytes = 0;
516 }
517 return ret;
518}
519
520int dirty_bytes_handler(struct ctl_table *table, int write,
521 void __user *buffer, size_t *lenp,
522 loff_t *ppos)
523{
524 unsigned long old_bytes = vm_dirty_bytes;
525 int ret;
526
527 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
528 if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
529 writeback_set_ratelimit();
530 vm_dirty_ratio = 0;
531 }
532 return ret;
533}
534
535static unsigned long wp_next_time(unsigned long cur_time)
536{
537 cur_time += VM_COMPLETIONS_PERIOD_LEN;
538
539 if (!cur_time)
540 return 1;
541 return cur_time;
542}
543
544static void wb_domain_writeout_inc(struct wb_domain *dom,
545 struct fprop_local_percpu *completions,
546 unsigned int max_prop_frac)
547{
548 __fprop_inc_percpu_max(&dom->completions, completions,
549 max_prop_frac);
550
551 if (!unlikely(dom->period_time)) {
552
553
554
555
556
557
558 dom->period_time = wp_next_time(jiffies);
559 mod_timer(&dom->period_timer, dom->period_time);
560 }
561}
562
563
564
565
566
567static inline void __wb_writeout_inc(struct bdi_writeback *wb)
568{
569 struct wb_domain *cgdom;
570
571 __inc_wb_stat(wb, WB_WRITTEN);
572 wb_domain_writeout_inc(&global_wb_domain, &wb->completions,
573 wb->bdi->max_prop_frac);
574
575 cgdom = mem_cgroup_wb_domain(wb);
576 if (cgdom)
577 wb_domain_writeout_inc(cgdom, wb_memcg_completions(wb),
578 wb->bdi->max_prop_frac);
579}
580
581void wb_writeout_inc(struct bdi_writeback *wb)
582{
583 unsigned long flags;
584
585 local_irq_save(flags);
586 __wb_writeout_inc(wb);
587 local_irq_restore(flags);
588}
589EXPORT_SYMBOL_GPL(wb_writeout_inc);
590
591
592
593
594
595static void writeout_period(unsigned long t)
596{
597 struct wb_domain *dom = (void *)t;
598 int miss_periods = (jiffies - dom->period_time) /
599 VM_COMPLETIONS_PERIOD_LEN;
600
601 if (fprop_new_period(&dom->completions, miss_periods + 1)) {
602 dom->period_time = wp_next_time(dom->period_time +
603 miss_periods * VM_COMPLETIONS_PERIOD_LEN);
604 mod_timer(&dom->period_timer, dom->period_time);
605 } else {
606
607
608
609
610 dom->period_time = 0;
611 }
612}
613
614int wb_domain_init(struct wb_domain *dom, gfp_t gfp)
615{
616 memset(dom, 0, sizeof(*dom));
617
618 spin_lock_init(&dom->lock);
619
620 init_timer_deferrable(&dom->period_timer);
621 dom->period_timer.function = writeout_period;
622 dom->period_timer.data = (unsigned long)dom;
623
624 dom->dirty_limit_tstamp = jiffies;
625
626 return fprop_global_init(&dom->completions, gfp);
627}
628
629#ifdef CONFIG_CGROUP_WRITEBACK
630void wb_domain_exit(struct wb_domain *dom)
631{
632 del_timer_sync(&dom->period_timer);
633 fprop_global_destroy(&dom->completions);
634}
635#endif
636
637
638
639
640
641
642static unsigned int bdi_min_ratio;
643
644int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
645{
646 int ret = 0;
647
648 spin_lock_bh(&bdi_lock);
649 if (min_ratio > bdi->max_ratio) {
650 ret = -EINVAL;
651 } else {
652 min_ratio -= bdi->min_ratio;
653 if (bdi_min_ratio + min_ratio < 100) {
654 bdi_min_ratio += min_ratio;
655 bdi->min_ratio += min_ratio;
656 } else {
657 ret = -EINVAL;
658 }
659 }
660 spin_unlock_bh(&bdi_lock);
661
662 return ret;
663}
664
665int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
666{
667 int ret = 0;
668
669 if (max_ratio > 100)
670 return -EINVAL;
671
672 spin_lock_bh(&bdi_lock);
673 if (bdi->min_ratio > max_ratio) {
674 ret = -EINVAL;
675 } else {
676 bdi->max_ratio = max_ratio;
677 bdi->max_prop_frac = (FPROP_FRAC_BASE * max_ratio) / 100;
678 }
679 spin_unlock_bh(&bdi_lock);
680
681 return ret;
682}
683EXPORT_SYMBOL(bdi_set_max_ratio);
684
685static unsigned long dirty_freerun_ceiling(unsigned long thresh,
686 unsigned long bg_thresh)
687{
688 return (thresh + bg_thresh) / 2;
689}
690
691static unsigned long hard_dirty_limit(struct wb_domain *dom,
692 unsigned long thresh)
693{
694 return max(thresh, dom->dirty_limit);
695}
696
697
698
699
700
701static void mdtc_calc_avail(struct dirty_throttle_control *mdtc,
702 unsigned long filepages, unsigned long headroom)
703{
704 struct dirty_throttle_control *gdtc = mdtc_gdtc(mdtc);
705 unsigned long clean = filepages - min(filepages, mdtc->dirty);
706 unsigned long global_clean = gdtc->avail - min(gdtc->avail, gdtc->dirty);
707 unsigned long other_clean = global_clean - min(global_clean, clean);
708
709 mdtc->avail = filepages + min(headroom, other_clean);
710}
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc)
734{
735 struct wb_domain *dom = dtc_dom(dtc);
736 unsigned long thresh = dtc->thresh;
737 u64 wb_thresh;
738 long numerator, denominator;
739 unsigned long wb_min_ratio, wb_max_ratio;
740
741
742
743
744 fprop_fraction_percpu(&dom->completions, dtc->wb_completions,
745 &numerator, &denominator);
746
747 wb_thresh = (thresh * (100 - bdi_min_ratio)) / 100;
748 wb_thresh *= numerator;
749 do_div(wb_thresh, denominator);
750
751 wb_min_max_ratio(dtc->wb, &wb_min_ratio, &wb_max_ratio);
752
753 wb_thresh += (thresh * wb_min_ratio) / 100;
754 if (wb_thresh > (thresh * wb_max_ratio) / 100)
755 wb_thresh = thresh * wb_max_ratio / 100;
756
757 return wb_thresh;
758}
759
760unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh)
761{
762 struct dirty_throttle_control gdtc = { GDTC_INIT(wb),
763 .thresh = thresh };
764 return __wb_calc_thresh(&gdtc);
765}
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781static long long pos_ratio_polynom(unsigned long setpoint,
782 unsigned long dirty,
783 unsigned long limit)
784{
785 long long pos_ratio;
786 long x;
787
788 x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
789 (limit - setpoint) | 1);
790 pos_ratio = x;
791 pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
792 pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
793 pos_ratio += 1 << RATELIMIT_CALC_SHIFT;
794
795 return clamp(pos_ratio, 0LL, 2LL << RATELIMIT_CALC_SHIFT);
796}
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873static void wb_position_ratio(struct dirty_throttle_control *dtc)
874{
875 struct bdi_writeback *wb = dtc->wb;
876 unsigned long write_bw = wb->avg_write_bandwidth;
877 unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh);
878 unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
879 unsigned long wb_thresh = dtc->wb_thresh;
880 unsigned long x_intercept;
881 unsigned long setpoint;
882 unsigned long wb_setpoint;
883 unsigned long span;
884 long long pos_ratio;
885 long x;
886
887 dtc->pos_ratio = 0;
888
889 if (unlikely(dtc->dirty >= limit))
890 return;
891
892
893
894
895
896
897 setpoint = (freerun + limit) / 2;
898 pos_ratio = pos_ratio_polynom(setpoint, dtc->dirty, limit);
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925 if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
926 long long wb_pos_ratio;
927
928 if (dtc->wb_dirty < 8) {
929 dtc->pos_ratio = min_t(long long, pos_ratio * 2,
930 2 << RATELIMIT_CALC_SHIFT);
931 return;
932 }
933
934 if (dtc->wb_dirty >= wb_thresh)
935 return;
936
937 wb_setpoint = dirty_freerun_ceiling(wb_thresh,
938 dtc->wb_bg_thresh);
939
940 if (wb_setpoint == 0 || wb_setpoint == wb_thresh)
941 return;
942
943 wb_pos_ratio = pos_ratio_polynom(wb_setpoint, dtc->wb_dirty,
944 wb_thresh);
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967 dtc->pos_ratio = min(pos_ratio, wb_pos_ratio);
968 return;
969 }
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002 if (unlikely(wb_thresh > dtc->thresh))
1003 wb_thresh = dtc->thresh;
1004
1005
1006
1007
1008
1009
1010
1011 wb_thresh = max(wb_thresh, (limit - dtc->dirty) / 8);
1012
1013
1014
1015
1016 x = div_u64((u64)wb_thresh << 16, dtc->thresh | 1);
1017 wb_setpoint = setpoint * (u64)x >> 16;
1018
1019
1020
1021
1022
1023
1024
1025
1026 span = (dtc->thresh - wb_thresh + 8 * write_bw) * (u64)x >> 16;
1027 x_intercept = wb_setpoint + span;
1028
1029 if (dtc->wb_dirty < x_intercept - span / 4) {
1030 pos_ratio = div64_u64(pos_ratio * (x_intercept - dtc->wb_dirty),
1031 (x_intercept - wb_setpoint) | 1);
1032 } else
1033 pos_ratio /= 4;
1034
1035
1036
1037
1038
1039
1040 x_intercept = wb_thresh / 2;
1041 if (dtc->wb_dirty < x_intercept) {
1042 if (dtc->wb_dirty > x_intercept / 8)
1043 pos_ratio = div_u64(pos_ratio * x_intercept,
1044 dtc->wb_dirty);
1045 else
1046 pos_ratio *= 8;
1047 }
1048
1049 dtc->pos_ratio = pos_ratio;
1050}
1051
1052static void wb_update_write_bandwidth(struct bdi_writeback *wb,
1053 unsigned long elapsed,
1054 unsigned long written)
1055{
1056 const unsigned long period = roundup_pow_of_two(3 * HZ);
1057 unsigned long avg = wb->avg_write_bandwidth;
1058 unsigned long old = wb->write_bandwidth;
1059 u64 bw;
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071 bw = written - min(written, wb->written_stamp);
1072 bw *= HZ;
1073 if (unlikely(elapsed > period)) {
1074 do_div(bw, elapsed);
1075 avg = bw;
1076 goto out;
1077 }
1078 bw += (u64)wb->write_bandwidth * (period - elapsed);
1079 bw >>= ilog2(period);
1080
1081
1082
1083
1084 if (avg > old && old >= (unsigned long)bw)
1085 avg -= (avg - old) >> 3;
1086
1087 if (avg < old && old <= (unsigned long)bw)
1088 avg += (old - avg) >> 3;
1089
1090out:
1091
1092 avg = max(avg, 1LU);
1093 if (wb_has_dirty_io(wb)) {
1094 long delta = avg - wb->avg_write_bandwidth;
1095 WARN_ON_ONCE(atomic_long_add_return(delta,
1096 &wb->bdi->tot_write_bandwidth) <= 0);
1097 }
1098 wb->write_bandwidth = bw;
1099 wb->avg_write_bandwidth = avg;
1100}
1101
1102static void update_dirty_limit(struct dirty_throttle_control *dtc)
1103{
1104 struct wb_domain *dom = dtc_dom(dtc);
1105 unsigned long thresh = dtc->thresh;
1106 unsigned long limit = dom->dirty_limit;
1107
1108
1109
1110
1111 if (limit < thresh) {
1112 limit = thresh;
1113 goto update;
1114 }
1115
1116
1117
1118
1119
1120
1121 thresh = max(thresh, dtc->dirty);
1122 if (limit > thresh) {
1123 limit -= (limit - thresh) >> 5;
1124 goto update;
1125 }
1126 return;
1127update:
1128 dom->dirty_limit = limit;
1129}
1130
1131static void domain_update_bandwidth(struct dirty_throttle_control *dtc,
1132 unsigned long now)
1133{
1134 struct wb_domain *dom = dtc_dom(dtc);
1135
1136
1137
1138
1139 if (time_before(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL))
1140 return;
1141
1142 spin_lock(&dom->lock);
1143 if (time_after_eq(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL)) {
1144 update_dirty_limit(dtc);
1145 dom->dirty_limit_tstamp = now;
1146 }
1147 spin_unlock(&dom->lock);
1148}
1149
1150
1151
1152
1153
1154
1155
1156static void wb_update_dirty_ratelimit(struct dirty_throttle_control *dtc,
1157 unsigned long dirtied,
1158 unsigned long elapsed)
1159{
1160 struct bdi_writeback *wb = dtc->wb;
1161 unsigned long dirty = dtc->dirty;
1162 unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh);
1163 unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
1164 unsigned long setpoint = (freerun + limit) / 2;
1165 unsigned long write_bw = wb->avg_write_bandwidth;
1166 unsigned long dirty_ratelimit = wb->dirty_ratelimit;
1167 unsigned long dirty_rate;
1168 unsigned long task_ratelimit;
1169 unsigned long balanced_dirty_ratelimit;
1170 unsigned long step;
1171 unsigned long x;
1172
1173
1174
1175
1176
1177 dirty_rate = (dirtied - wb->dirtied_stamp) * HZ / elapsed;
1178
1179
1180
1181
1182 task_ratelimit = (u64)dirty_ratelimit *
1183 dtc->pos_ratio >> RATELIMIT_CALC_SHIFT;
1184 task_ratelimit++;
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216 balanced_dirty_ratelimit = div_u64((u64)task_ratelimit * write_bw,
1217 dirty_rate | 1);
1218
1219
1220
1221 if (unlikely(balanced_dirty_ratelimit > write_bw))
1222 balanced_dirty_ratelimit = write_bw;
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258 step = 0;
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271 if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
1272 dirty = dtc->wb_dirty;
1273 if (dtc->wb_dirty < 8)
1274 setpoint = dtc->wb_dirty + 1;
1275 else
1276 setpoint = (dtc->wb_thresh + dtc->wb_bg_thresh) / 2;
1277 }
1278
1279 if (dirty < setpoint) {
1280 x = min3(wb->balanced_dirty_ratelimit,
1281 balanced_dirty_ratelimit, task_ratelimit);
1282 if (dirty_ratelimit < x)
1283 step = x - dirty_ratelimit;
1284 } else {
1285 x = max3(wb->balanced_dirty_ratelimit,
1286 balanced_dirty_ratelimit, task_ratelimit);
1287 if (dirty_ratelimit > x)
1288 step = dirty_ratelimit - x;
1289 }
1290
1291
1292
1293
1294
1295
1296 step >>= dirty_ratelimit / (2 * step + 1);
1297
1298
1299
1300 step = (step + 7) / 8;
1301
1302 if (dirty_ratelimit < balanced_dirty_ratelimit)
1303 dirty_ratelimit += step;
1304 else
1305 dirty_ratelimit -= step;
1306
1307 wb->dirty_ratelimit = max(dirty_ratelimit, 1UL);
1308 wb->balanced_dirty_ratelimit = balanced_dirty_ratelimit;
1309
1310 trace_bdi_dirty_ratelimit(wb, dirty_rate, task_ratelimit);
1311}
1312
1313static void __wb_update_bandwidth(struct dirty_throttle_control *gdtc,
1314 struct dirty_throttle_control *mdtc,
1315 unsigned long start_time,
1316 bool update_ratelimit)
1317{
1318 struct bdi_writeback *wb = gdtc->wb;
1319 unsigned long now = jiffies;
1320 unsigned long elapsed = now - wb->bw_time_stamp;
1321 unsigned long dirtied;
1322 unsigned long written;
1323
1324 lockdep_assert_held(&wb->list_lock);
1325
1326
1327
1328
1329 if (elapsed < BANDWIDTH_INTERVAL)
1330 return;
1331
1332 dirtied = percpu_counter_read(&wb->stat[WB_DIRTIED]);
1333 written = percpu_counter_read(&wb->stat[WB_WRITTEN]);
1334
1335
1336
1337
1338
1339 if (elapsed > HZ && time_before(wb->bw_time_stamp, start_time))
1340 goto snapshot;
1341
1342 if (update_ratelimit) {
1343 domain_update_bandwidth(gdtc, now);
1344 wb_update_dirty_ratelimit(gdtc, dirtied, elapsed);
1345
1346
1347
1348
1349
1350 if (IS_ENABLED(CONFIG_CGROUP_WRITEBACK) && mdtc) {
1351 domain_update_bandwidth(mdtc, now);
1352 wb_update_dirty_ratelimit(mdtc, dirtied, elapsed);
1353 }
1354 }
1355 wb_update_write_bandwidth(wb, elapsed, written);
1356
1357snapshot:
1358 wb->dirtied_stamp = dirtied;
1359 wb->written_stamp = written;
1360 wb->bw_time_stamp = now;
1361}
1362
1363void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time)
1364{
1365 struct dirty_throttle_control gdtc = { GDTC_INIT(wb) };
1366
1367 __wb_update_bandwidth(&gdtc, NULL, start_time, false);
1368}
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378static unsigned long dirty_poll_interval(unsigned long dirty,
1379 unsigned long thresh)
1380{
1381 if (thresh > dirty)
1382 return 1UL << (ilog2(thresh - dirty) >> 1);
1383
1384 return 1;
1385}
1386
1387static unsigned long wb_max_pause(struct bdi_writeback *wb,
1388 unsigned long wb_dirty)
1389{
1390 unsigned long bw = wb->avg_write_bandwidth;
1391 unsigned long t;
1392
1393
1394
1395
1396
1397
1398
1399
1400 t = wb_dirty / (1 + bw / roundup_pow_of_two(1 + HZ / 8));
1401 t++;
1402
1403 return min_t(unsigned long, t, MAX_PAUSE);
1404}
1405
1406static long wb_min_pause(struct bdi_writeback *wb,
1407 long max_pause,
1408 unsigned long task_ratelimit,
1409 unsigned long dirty_ratelimit,
1410 int *nr_dirtied_pause)
1411{
1412 long hi = ilog2(wb->avg_write_bandwidth);
1413 long lo = ilog2(wb->dirty_ratelimit);
1414 long t;
1415 long pause;
1416 int pages;
1417
1418
1419 t = max(1, HZ / 100);
1420
1421
1422
1423
1424
1425
1426
1427 if (hi > lo)
1428 t += (hi - lo) * (10 * HZ) / 1024;
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448 t = min(t, 1 + max_pause / 2);
1449 pages = dirty_ratelimit * t / roundup_pow_of_two(HZ);
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459 if (pages < DIRTY_POLL_THRESH) {
1460 t = max_pause;
1461 pages = dirty_ratelimit * t / roundup_pow_of_two(HZ);
1462 if (pages > DIRTY_POLL_THRESH) {
1463 pages = DIRTY_POLL_THRESH;
1464 t = HZ * DIRTY_POLL_THRESH / dirty_ratelimit;
1465 }
1466 }
1467
1468 pause = HZ * pages / (task_ratelimit + 1);
1469 if (pause > max_pause) {
1470 t = max_pause;
1471 pages = task_ratelimit * t / roundup_pow_of_two(HZ);
1472 }
1473
1474 *nr_dirtied_pause = pages;
1475
1476
1477
1478 return pages >= DIRTY_POLL_THRESH ? 1 + t / 2 : t;
1479}
1480
1481static inline void wb_dirty_limits(struct dirty_throttle_control *dtc)
1482{
1483 struct bdi_writeback *wb = dtc->wb;
1484 unsigned long wb_reclaimable;
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499 dtc->wb_thresh = __wb_calc_thresh(dtc);
1500 dtc->wb_bg_thresh = dtc->thresh ?
1501 div_u64((u64)dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0;
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513 if (dtc->wb_thresh < 2 * wb_stat_error(wb)) {
1514 wb_reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE);
1515 dtc->wb_dirty = wb_reclaimable + wb_stat_sum(wb, WB_WRITEBACK);
1516 } else {
1517 wb_reclaimable = wb_stat(wb, WB_RECLAIMABLE);
1518 dtc->wb_dirty = wb_reclaimable + wb_stat(wb, WB_WRITEBACK);
1519 }
1520}
1521
1522
1523
1524
1525
1526
1527
1528
1529static void balance_dirty_pages(struct address_space *mapping,
1530 struct bdi_writeback *wb,
1531 unsigned long pages_dirtied)
1532{
1533 struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) };
1534 struct dirty_throttle_control mdtc_stor = { MDTC_INIT(wb, &gdtc_stor) };
1535 struct dirty_throttle_control * const gdtc = &gdtc_stor;
1536 struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
1537 &mdtc_stor : NULL;
1538 struct dirty_throttle_control *sdtc;
1539 unsigned long nr_reclaimable;
1540 long period;
1541 long pause;
1542 long max_pause;
1543 long min_pause;
1544 int nr_dirtied_pause;
1545 bool dirty_exceeded = false;
1546 unsigned long task_ratelimit;
1547 unsigned long dirty_ratelimit;
1548 struct backing_dev_info *bdi = wb->bdi;
1549 bool strictlimit = bdi->capabilities & BDI_CAP_STRICTLIMIT;
1550 unsigned long start_time = jiffies;
1551
1552 for (;;) {
1553 unsigned long now = jiffies;
1554 unsigned long dirty, thresh, bg_thresh;
1555 unsigned long m_dirty = 0;
1556 unsigned long m_thresh = 0;
1557 unsigned long m_bg_thresh = 0;
1558
1559
1560
1561
1562
1563
1564
1565 nr_reclaimable = global_page_state(NR_FILE_DIRTY) +
1566 global_page_state(NR_UNSTABLE_NFS);
1567 gdtc->avail = global_dirtyable_memory();
1568 gdtc->dirty = nr_reclaimable + global_page_state(NR_WRITEBACK);
1569
1570 domain_dirty_limits(gdtc);
1571
1572 if (unlikely(strictlimit)) {
1573 wb_dirty_limits(gdtc);
1574
1575 dirty = gdtc->wb_dirty;
1576 thresh = gdtc->wb_thresh;
1577 bg_thresh = gdtc->wb_bg_thresh;
1578 } else {
1579 dirty = gdtc->dirty;
1580 thresh = gdtc->thresh;
1581 bg_thresh = gdtc->bg_thresh;
1582 }
1583
1584 if (mdtc) {
1585 unsigned long filepages, headroom, writeback;
1586
1587
1588
1589
1590
1591 mem_cgroup_wb_stats(wb, &filepages, &headroom,
1592 &mdtc->dirty, &writeback);
1593 mdtc->dirty += writeback;
1594 mdtc_calc_avail(mdtc, filepages, headroom);
1595
1596 domain_dirty_limits(mdtc);
1597
1598 if (unlikely(strictlimit)) {
1599 wb_dirty_limits(mdtc);
1600 m_dirty = mdtc->wb_dirty;
1601 m_thresh = mdtc->wb_thresh;
1602 m_bg_thresh = mdtc->wb_bg_thresh;
1603 } else {
1604 m_dirty = mdtc->dirty;
1605 m_thresh = mdtc->thresh;
1606 m_bg_thresh = mdtc->bg_thresh;
1607 }
1608 }
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622 if (dirty <= dirty_freerun_ceiling(thresh, bg_thresh) &&
1623 (!mdtc ||
1624 m_dirty <= dirty_freerun_ceiling(m_thresh, m_bg_thresh))) {
1625 unsigned long intv = dirty_poll_interval(dirty, thresh);
1626 unsigned long m_intv = ULONG_MAX;
1627
1628 current->dirty_paused_when = now;
1629 current->nr_dirtied = 0;
1630 if (mdtc)
1631 m_intv = dirty_poll_interval(m_dirty, m_thresh);
1632 current->nr_dirtied_pause = min(intv, m_intv);
1633 break;
1634 }
1635
1636 if (unlikely(!writeback_in_progress(wb)))
1637 wb_start_background_writeback(wb);
1638
1639
1640
1641
1642
1643 if (!strictlimit)
1644 wb_dirty_limits(gdtc);
1645
1646 dirty_exceeded = (gdtc->wb_dirty > gdtc->wb_thresh) &&
1647 ((gdtc->dirty > gdtc->thresh) || strictlimit);
1648
1649 wb_position_ratio(gdtc);
1650 sdtc = gdtc;
1651
1652 if (mdtc) {
1653
1654
1655
1656
1657
1658
1659 if (!strictlimit)
1660 wb_dirty_limits(mdtc);
1661
1662 dirty_exceeded |= (mdtc->wb_dirty > mdtc->wb_thresh) &&
1663 ((mdtc->dirty > mdtc->thresh) || strictlimit);
1664
1665 wb_position_ratio(mdtc);
1666 if (mdtc->pos_ratio < gdtc->pos_ratio)
1667 sdtc = mdtc;
1668 }
1669
1670 if (dirty_exceeded && !wb->dirty_exceeded)
1671 wb->dirty_exceeded = 1;
1672
1673 if (time_is_before_jiffies(wb->bw_time_stamp +
1674 BANDWIDTH_INTERVAL)) {
1675 spin_lock(&wb->list_lock);
1676 __wb_update_bandwidth(gdtc, mdtc, start_time, true);
1677 spin_unlock(&wb->list_lock);
1678 }
1679
1680
1681 dirty_ratelimit = wb->dirty_ratelimit;
1682 task_ratelimit = ((u64)dirty_ratelimit * sdtc->pos_ratio) >>
1683 RATELIMIT_CALC_SHIFT;
1684 max_pause = wb_max_pause(wb, sdtc->wb_dirty);
1685 min_pause = wb_min_pause(wb, max_pause,
1686 task_ratelimit, dirty_ratelimit,
1687 &nr_dirtied_pause);
1688
1689 if (unlikely(task_ratelimit == 0)) {
1690 period = max_pause;
1691 pause = max_pause;
1692 goto pause;
1693 }
1694 period = HZ * pages_dirtied / task_ratelimit;
1695 pause = period;
1696 if (current->dirty_paused_when)
1697 pause -= now - current->dirty_paused_when;
1698
1699
1700
1701
1702
1703
1704
1705 if (pause < min_pause) {
1706 trace_balance_dirty_pages(wb,
1707 sdtc->thresh,
1708 sdtc->bg_thresh,
1709 sdtc->dirty,
1710 sdtc->wb_thresh,
1711 sdtc->wb_dirty,
1712 dirty_ratelimit,
1713 task_ratelimit,
1714 pages_dirtied,
1715 period,
1716 min(pause, 0L),
1717 start_time);
1718 if (pause < -HZ) {
1719 current->dirty_paused_when = now;
1720 current->nr_dirtied = 0;
1721 } else if (period) {
1722 current->dirty_paused_when += period;
1723 current->nr_dirtied = 0;
1724 } else if (current->nr_dirtied_pause <= pages_dirtied)
1725 current->nr_dirtied_pause += pages_dirtied;
1726 break;
1727 }
1728 if (unlikely(pause > max_pause)) {
1729
1730 now += min(pause - max_pause, max_pause);
1731 pause = max_pause;
1732 }
1733
1734pause:
1735 trace_balance_dirty_pages(wb,
1736 sdtc->thresh,
1737 sdtc->bg_thresh,
1738 sdtc->dirty,
1739 sdtc->wb_thresh,
1740 sdtc->wb_dirty,
1741 dirty_ratelimit,
1742 task_ratelimit,
1743 pages_dirtied,
1744 period,
1745 pause,
1746 start_time);
1747 __set_current_state(TASK_KILLABLE);
1748 io_schedule_timeout(pause);
1749
1750 current->dirty_paused_when = now + pause;
1751 current->nr_dirtied = 0;
1752 current->nr_dirtied_pause = nr_dirtied_pause;
1753
1754
1755
1756
1757
1758 if (task_ratelimit)
1759 break;
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771 if (sdtc->wb_dirty <= wb_stat_error(wb))
1772 break;
1773
1774 if (fatal_signal_pending(current))
1775 break;
1776 }
1777
1778 if (!dirty_exceeded && wb->dirty_exceeded)
1779 wb->dirty_exceeded = 0;
1780
1781 if (writeback_in_progress(wb))
1782 return;
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792 if (laptop_mode)
1793 return;
1794
1795 if (nr_reclaimable > gdtc->bg_thresh)
1796 wb_start_background_writeback(wb);
1797}
1798
1799static DEFINE_PER_CPU(int, bdp_ratelimits);
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815DEFINE_PER_CPU(int, dirty_throttle_leaks) = 0;
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830void balance_dirty_pages_ratelimited(struct address_space *mapping)
1831{
1832 struct inode *inode = mapping->host;
1833 struct backing_dev_info *bdi = inode_to_bdi(inode);
1834 struct bdi_writeback *wb = NULL;
1835 int ratelimit;
1836 int *p;
1837
1838 if (!bdi_cap_account_dirty(bdi))
1839 return;
1840
1841 if (inode_cgwb_enabled(inode))
1842 wb = wb_get_create_current(bdi, GFP_KERNEL);
1843 if (!wb)
1844 wb = &bdi->wb;
1845
1846 ratelimit = current->nr_dirtied_pause;
1847 if (wb->dirty_exceeded)
1848 ratelimit = min(ratelimit, 32 >> (PAGE_SHIFT - 10));
1849
1850 preempt_disable();
1851
1852
1853
1854
1855
1856
1857 p = this_cpu_ptr(&bdp_ratelimits);
1858 if (unlikely(current->nr_dirtied >= ratelimit))
1859 *p = 0;
1860 else if (unlikely(*p >= ratelimit_pages)) {
1861 *p = 0;
1862 ratelimit = 0;
1863 }
1864
1865
1866
1867
1868
1869 p = this_cpu_ptr(&dirty_throttle_leaks);
1870 if (*p > 0 && current->nr_dirtied < ratelimit) {
1871 unsigned long nr_pages_dirtied;
1872 nr_pages_dirtied = min(*p, ratelimit - current->nr_dirtied);
1873 *p -= nr_pages_dirtied;
1874 current->nr_dirtied += nr_pages_dirtied;
1875 }
1876 preempt_enable();
1877
1878 if (unlikely(current->nr_dirtied >= ratelimit))
1879 balance_dirty_pages(mapping, wb, current->nr_dirtied);
1880
1881 wb_put(wb);
1882}
1883EXPORT_SYMBOL(balance_dirty_pages_ratelimited);
1884
1885
1886
1887
1888
1889
1890
1891
1892bool wb_over_bg_thresh(struct bdi_writeback *wb)
1893{
1894 struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) };
1895 struct dirty_throttle_control mdtc_stor = { MDTC_INIT(wb, &gdtc_stor) };
1896 struct dirty_throttle_control * const gdtc = &gdtc_stor;
1897 struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
1898 &mdtc_stor : NULL;
1899
1900
1901
1902
1903
1904 gdtc->avail = global_dirtyable_memory();
1905 gdtc->dirty = global_page_state(NR_FILE_DIRTY) +
1906 global_page_state(NR_UNSTABLE_NFS);
1907 domain_dirty_limits(gdtc);
1908
1909 if (gdtc->dirty > gdtc->bg_thresh)
1910 return true;
1911
1912 if (wb_stat(wb, WB_RECLAIMABLE) > __wb_calc_thresh(gdtc))
1913 return true;
1914
1915 if (mdtc) {
1916 unsigned long filepages, headroom, writeback;
1917
1918 mem_cgroup_wb_stats(wb, &filepages, &headroom, &mdtc->dirty,
1919 &writeback);
1920 mdtc_calc_avail(mdtc, filepages, headroom);
1921 domain_dirty_limits(mdtc);
1922
1923 if (mdtc->dirty > mdtc->bg_thresh)
1924 return true;
1925
1926 if (wb_stat(wb, WB_RECLAIMABLE) > __wb_calc_thresh(mdtc))
1927 return true;
1928 }
1929
1930 return false;
1931}
1932
1933void throttle_vm_writeout(gfp_t gfp_mask)
1934{
1935 unsigned long background_thresh;
1936 unsigned long dirty_thresh;
1937
1938 for ( ; ; ) {
1939 global_dirty_limits(&background_thresh, &dirty_thresh);
1940 dirty_thresh = hard_dirty_limit(&global_wb_domain, dirty_thresh);
1941
1942
1943
1944
1945
1946 dirty_thresh += dirty_thresh / 10;
1947
1948 if (global_page_state(NR_UNSTABLE_NFS) +
1949 global_page_state(NR_WRITEBACK) <= dirty_thresh)
1950 break;
1951 congestion_wait(BLK_RW_ASYNC, HZ/10);
1952
1953
1954
1955
1956
1957
1958 if ((gfp_mask & (__GFP_FS|__GFP_IO)) != (__GFP_FS|__GFP_IO))
1959 break;
1960 }
1961}
1962
1963
1964
1965
1966int dirty_writeback_centisecs_handler(struct ctl_table *table, int write,
1967 void __user *buffer, size_t *length, loff_t *ppos)
1968{
1969 proc_dointvec(table, write, buffer, length, ppos);
1970 return 0;
1971}
1972
1973#ifdef CONFIG_BLOCK
1974void laptop_mode_timer_fn(unsigned long data)
1975{
1976 struct request_queue *q = (struct request_queue *)data;
1977 int nr_pages = global_page_state(NR_FILE_DIRTY) +
1978 global_page_state(NR_UNSTABLE_NFS);
1979 struct bdi_writeback *wb;
1980
1981
1982
1983
1984
1985 if (!bdi_has_dirty_io(&q->backing_dev_info))
1986 return;
1987
1988 rcu_read_lock();
1989 list_for_each_entry_rcu(wb, &q->backing_dev_info.wb_list, bdi_node)
1990 if (wb_has_dirty_io(wb))
1991 wb_start_writeback(wb, nr_pages, true,
1992 WB_REASON_LAPTOP_TIMER);
1993 rcu_read_unlock();
1994}
1995
1996
1997
1998
1999
2000
2001void laptop_io_completion(struct backing_dev_info *info)
2002{
2003 mod_timer(&info->laptop_mode_wb_timer, jiffies + laptop_mode);
2004}
2005
2006
2007
2008
2009
2010
2011void laptop_sync_completion(void)
2012{
2013 struct backing_dev_info *bdi;
2014
2015 rcu_read_lock();
2016
2017 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list)
2018 del_timer(&bdi->laptop_mode_wb_timer);
2019
2020 rcu_read_unlock();
2021}
2022#endif
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035void writeback_set_ratelimit(void)
2036{
2037 struct wb_domain *dom = &global_wb_domain;
2038 unsigned long background_thresh;
2039 unsigned long dirty_thresh;
2040
2041 global_dirty_limits(&background_thresh, &dirty_thresh);
2042 dom->dirty_limit = dirty_thresh;
2043 ratelimit_pages = dirty_thresh / (num_online_cpus() * 32);
2044 if (ratelimit_pages < 16)
2045 ratelimit_pages = 16;
2046}
2047
2048static int
2049ratelimit_handler(struct notifier_block *self, unsigned long action,
2050 void *hcpu)
2051{
2052
2053 switch (action & ~CPU_TASKS_FROZEN) {
2054 case CPU_ONLINE:
2055 case CPU_DEAD:
2056 writeback_set_ratelimit();
2057 return NOTIFY_OK;
2058 default:
2059 return NOTIFY_DONE;
2060 }
2061}
2062
2063static struct notifier_block ratelimit_nb = {
2064 .notifier_call = ratelimit_handler,
2065 .next = NULL,
2066};
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086void __init page_writeback_init(void)
2087{
2088 BUG_ON(wb_domain_init(&global_wb_domain, GFP_KERNEL));
2089
2090 writeback_set_ratelimit();
2091 register_cpu_notifier(&ratelimit_nb);
2092}
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111void tag_pages_for_writeback(struct address_space *mapping,
2112 pgoff_t start, pgoff_t end)
2113{
2114#define WRITEBACK_TAG_BATCH 4096
2115 unsigned long tagged;
2116
2117 do {
2118 spin_lock_irq(&mapping->tree_lock);
2119 tagged = radix_tree_range_tag_if_tagged(&mapping->page_tree,
2120 &start, end, WRITEBACK_TAG_BATCH,
2121 PAGECACHE_TAG_DIRTY, PAGECACHE_TAG_TOWRITE);
2122 spin_unlock_irq(&mapping->tree_lock);
2123 WARN_ON_ONCE(tagged > WRITEBACK_TAG_BATCH);
2124 cond_resched();
2125
2126 } while (tagged >= WRITEBACK_TAG_BATCH && start);
2127}
2128EXPORT_SYMBOL(tag_pages_for_writeback);
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152int write_cache_pages(struct address_space *mapping,
2153 struct writeback_control *wbc, writepage_t writepage,
2154 void *data)
2155{
2156 int ret = 0;
2157 int done = 0;
2158 struct pagevec pvec;
2159 int nr_pages;
2160 pgoff_t uninitialized_var(writeback_index);
2161 pgoff_t index;
2162 pgoff_t end;
2163 pgoff_t done_index;
2164 int cycled;
2165 int range_whole = 0;
2166 int tag;
2167
2168 pagevec_init(&pvec, 0);
2169 if (wbc->range_cyclic) {
2170 writeback_index = mapping->writeback_index;
2171 index = writeback_index;
2172 if (index == 0)
2173 cycled = 1;
2174 else
2175 cycled = 0;
2176 end = -1;
2177 } else {
2178 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2179 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2180 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2181 range_whole = 1;
2182 cycled = 1;
2183 }
2184 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2185 tag = PAGECACHE_TAG_TOWRITE;
2186 else
2187 tag = PAGECACHE_TAG_DIRTY;
2188retry:
2189 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2190 tag_pages_for_writeback(mapping, index, end);
2191 done_index = index;
2192 while (!done && (index <= end)) {
2193 int i;
2194
2195 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
2196 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
2197 if (nr_pages == 0)
2198 break;
2199
2200 for (i = 0; i < nr_pages; i++) {
2201 struct page *page = pvec.pages[i];
2202
2203
2204
2205
2206
2207
2208
2209
2210 if (page->index > end) {
2211
2212
2213
2214
2215 done = 1;
2216 break;
2217 }
2218
2219 done_index = page->index;
2220
2221 lock_page(page);
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231 if (unlikely(page->mapping != mapping)) {
2232continue_unlock:
2233 unlock_page(page);
2234 continue;
2235 }
2236
2237 if (!PageDirty(page)) {
2238
2239 goto continue_unlock;
2240 }
2241
2242 if (PageWriteback(page)) {
2243 if (wbc->sync_mode != WB_SYNC_NONE)
2244 wait_on_page_writeback(page);
2245 else
2246 goto continue_unlock;
2247 }
2248
2249 BUG_ON(PageWriteback(page));
2250 if (!clear_page_dirty_for_io(page))
2251 goto continue_unlock;
2252
2253 trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
2254 ret = (*writepage)(page, wbc, data);
2255 if (unlikely(ret)) {
2256 if (ret == AOP_WRITEPAGE_ACTIVATE) {
2257 unlock_page(page);
2258 ret = 0;
2259 } else {
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269 done_index = page->index + 1;
2270 done = 1;
2271 break;
2272 }
2273 }
2274
2275
2276
2277
2278
2279
2280
2281 if (--wbc->nr_to_write <= 0 &&
2282 wbc->sync_mode == WB_SYNC_NONE) {
2283 done = 1;
2284 break;
2285 }
2286 }
2287 pagevec_release(&pvec);
2288 cond_resched();
2289 }
2290 if (!cycled && !done) {
2291
2292
2293
2294
2295
2296 cycled = 1;
2297 index = 0;
2298 end = writeback_index - 1;
2299 goto retry;
2300 }
2301 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2302 mapping->writeback_index = done_index;
2303
2304 return ret;
2305}
2306EXPORT_SYMBOL(write_cache_pages);
2307
2308
2309
2310
2311
2312static int __writepage(struct page *page, struct writeback_control *wbc,
2313 void *data)
2314{
2315 struct address_space *mapping = data;
2316 int ret = mapping->a_ops->writepage(page, wbc);
2317 mapping_set_error(mapping, ret);
2318 return ret;
2319}
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329int generic_writepages(struct address_space *mapping,
2330 struct writeback_control *wbc)
2331{
2332 struct blk_plug plug;
2333 int ret;
2334
2335
2336 if (!mapping->a_ops->writepage)
2337 return 0;
2338
2339 blk_start_plug(&plug);
2340 ret = write_cache_pages(mapping, wbc, __writepage, mapping);
2341 blk_finish_plug(&plug);
2342 return ret;
2343}
2344
2345EXPORT_SYMBOL(generic_writepages);
2346
2347int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
2348{
2349 int ret;
2350
2351 if (wbc->nr_to_write <= 0)
2352 return 0;
2353 if (mapping->a_ops->writepages)
2354 ret = mapping->a_ops->writepages(mapping, wbc);
2355 else
2356 ret = generic_writepages(mapping, wbc);
2357 return ret;
2358}
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369int write_one_page(struct page *page, int wait)
2370{
2371 struct address_space *mapping = page->mapping;
2372 int ret = 0;
2373 struct writeback_control wbc = {
2374 .sync_mode = WB_SYNC_ALL,
2375 .nr_to_write = 1,
2376 };
2377
2378 BUG_ON(!PageLocked(page));
2379
2380 if (wait)
2381 wait_on_page_writeback(page);
2382
2383 if (clear_page_dirty_for_io(page)) {
2384 page_cache_get(page);
2385 ret = mapping->a_ops->writepage(page, &wbc);
2386 if (ret == 0 && wait) {
2387 wait_on_page_writeback(page);
2388 if (PageError(page))
2389 ret = -EIO;
2390 }
2391 page_cache_release(page);
2392 } else {
2393 unlock_page(page);
2394 }
2395 return ret;
2396}
2397EXPORT_SYMBOL(write_one_page);
2398
2399
2400
2401
2402int __set_page_dirty_no_writeback(struct page *page)
2403{
2404 if (!PageDirty(page))
2405 return !TestSetPageDirty(page);
2406 return 0;
2407}
2408
2409
2410
2411
2412
2413
2414
2415
2416void account_page_dirtied(struct page *page, struct address_space *mapping,
2417 struct mem_cgroup *memcg)
2418{
2419 struct inode *inode = mapping->host;
2420
2421 trace_writeback_dirty_page(page, mapping);
2422
2423 if (mapping_cap_account_dirty(mapping)) {
2424 struct bdi_writeback *wb;
2425
2426 inode_attach_wb(inode, page);
2427 wb = inode_to_wb(inode);
2428
2429 mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_DIRTY);
2430 __inc_zone_page_state(page, NR_FILE_DIRTY);
2431 __inc_zone_page_state(page, NR_DIRTIED);
2432 __inc_wb_stat(wb, WB_RECLAIMABLE);
2433 __inc_wb_stat(wb, WB_DIRTIED);
2434 task_io_account_write(PAGE_CACHE_SIZE);
2435 current->nr_dirtied++;
2436 this_cpu_inc(bdp_ratelimits);
2437 }
2438}
2439EXPORT_SYMBOL(account_page_dirtied);
2440
2441
2442
2443
2444
2445
2446void account_page_cleaned(struct page *page, struct address_space *mapping,
2447 struct mem_cgroup *memcg, struct bdi_writeback *wb)
2448{
2449 if (mapping_cap_account_dirty(mapping)) {
2450 mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_DIRTY);
2451 dec_zone_page_state(page, NR_FILE_DIRTY);
2452 dec_wb_stat(wb, WB_RECLAIMABLE);
2453 task_io_account_cancelled_write(PAGE_CACHE_SIZE);
2454 }
2455}
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469int __set_page_dirty_nobuffers(struct page *page)
2470{
2471 struct mem_cgroup *memcg;
2472
2473 memcg = mem_cgroup_begin_page_stat(page);
2474 if (!TestSetPageDirty(page)) {
2475 struct address_space *mapping = page_mapping(page);
2476 unsigned long flags;
2477
2478 if (!mapping) {
2479 mem_cgroup_end_page_stat(memcg);
2480 return 1;
2481 }
2482
2483 spin_lock_irqsave(&mapping->tree_lock, flags);
2484 BUG_ON(page_mapping(page) != mapping);
2485 WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
2486 account_page_dirtied(page, mapping, memcg);
2487 radix_tree_tag_set(&mapping->page_tree, page_index(page),
2488 PAGECACHE_TAG_DIRTY);
2489 spin_unlock_irqrestore(&mapping->tree_lock, flags);
2490 mem_cgroup_end_page_stat(memcg);
2491
2492 if (mapping->host) {
2493
2494 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
2495 }
2496 return 1;
2497 }
2498 mem_cgroup_end_page_stat(memcg);
2499 return 0;
2500}
2501EXPORT_SYMBOL(__set_page_dirty_nobuffers);
2502
2503
2504
2505
2506
2507
2508
2509
2510void account_page_redirty(struct page *page)
2511{
2512 struct address_space *mapping = page->mapping;
2513
2514 if (mapping && mapping_cap_account_dirty(mapping)) {
2515 struct inode *inode = mapping->host;
2516 struct bdi_writeback *wb;
2517 bool locked;
2518
2519 wb = unlocked_inode_to_wb_begin(inode, &locked);
2520 current->nr_dirtied--;
2521 dec_zone_page_state(page, NR_DIRTIED);
2522 dec_wb_stat(wb, WB_DIRTIED);
2523 unlocked_inode_to_wb_end(inode, locked);
2524 }
2525}
2526EXPORT_SYMBOL(account_page_redirty);
2527
2528
2529
2530
2531
2532
2533int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page)
2534{
2535 int ret;
2536
2537 wbc->pages_skipped++;
2538 ret = __set_page_dirty_nobuffers(page);
2539 account_page_redirty(page);
2540 return ret;
2541}
2542EXPORT_SYMBOL(redirty_page_for_writepage);
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555int set_page_dirty(struct page *page)
2556{
2557 struct address_space *mapping = page_mapping(page);
2558
2559 if (likely(mapping)) {
2560 int (*spd)(struct page *) = mapping->a_ops->set_page_dirty;
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571 if (PageReclaim(page))
2572 ClearPageReclaim(page);
2573#ifdef CONFIG_BLOCK
2574 if (!spd)
2575 spd = __set_page_dirty_buffers;
2576#endif
2577 return (*spd)(page);
2578 }
2579 if (!PageDirty(page)) {
2580 if (!TestSetPageDirty(page))
2581 return 1;
2582 }
2583 return 0;
2584}
2585EXPORT_SYMBOL(set_page_dirty);
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597int set_page_dirty_lock(struct page *page)
2598{
2599 int ret;
2600
2601 lock_page(page);
2602 ret = set_page_dirty(page);
2603 unlock_page(page);
2604 return ret;
2605}
2606EXPORT_SYMBOL(set_page_dirty_lock);
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621void cancel_dirty_page(struct page *page)
2622{
2623 struct address_space *mapping = page_mapping(page);
2624
2625 if (mapping_cap_account_dirty(mapping)) {
2626 struct inode *inode = mapping->host;
2627 struct bdi_writeback *wb;
2628 struct mem_cgroup *memcg;
2629 bool locked;
2630
2631 memcg = mem_cgroup_begin_page_stat(page);
2632 wb = unlocked_inode_to_wb_begin(inode, &locked);
2633
2634 if (TestClearPageDirty(page))
2635 account_page_cleaned(page, mapping, memcg, wb);
2636
2637 unlocked_inode_to_wb_end(inode, locked);
2638 mem_cgroup_end_page_stat(memcg);
2639 } else {
2640 ClearPageDirty(page);
2641 }
2642}
2643EXPORT_SYMBOL(cancel_dirty_page);
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659int clear_page_dirty_for_io(struct page *page)
2660{
2661 struct address_space *mapping = page_mapping(page);
2662 int ret = 0;
2663
2664 BUG_ON(!PageLocked(page));
2665
2666 if (mapping && mapping_cap_account_dirty(mapping)) {
2667 struct inode *inode = mapping->host;
2668 struct bdi_writeback *wb;
2669 struct mem_cgroup *memcg;
2670 bool locked;
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697 if (page_mkclean(page))
2698 set_page_dirty(page);
2699
2700
2701
2702
2703
2704
2705
2706
2707 memcg = mem_cgroup_begin_page_stat(page);
2708 wb = unlocked_inode_to_wb_begin(inode, &locked);
2709 if (TestClearPageDirty(page)) {
2710 mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_DIRTY);
2711 dec_zone_page_state(page, NR_FILE_DIRTY);
2712 dec_wb_stat(wb, WB_RECLAIMABLE);
2713 ret = 1;
2714 }
2715 unlocked_inode_to_wb_end(inode, locked);
2716 mem_cgroup_end_page_stat(memcg);
2717 return ret;
2718 }
2719 return TestClearPageDirty(page);
2720}
2721EXPORT_SYMBOL(clear_page_dirty_for_io);
2722
2723int test_clear_page_writeback(struct page *page)
2724{
2725 struct address_space *mapping = page_mapping(page);
2726 struct mem_cgroup *memcg;
2727 int ret;
2728
2729 memcg = mem_cgroup_begin_page_stat(page);
2730 if (mapping) {
2731 struct inode *inode = mapping->host;
2732 struct backing_dev_info *bdi = inode_to_bdi(inode);
2733 unsigned long flags;
2734
2735 spin_lock_irqsave(&mapping->tree_lock, flags);
2736 ret = TestClearPageWriteback(page);
2737 if (ret) {
2738 radix_tree_tag_clear(&mapping->page_tree,
2739 page_index(page),
2740 PAGECACHE_TAG_WRITEBACK);
2741 if (bdi_cap_account_writeback(bdi)) {
2742 struct bdi_writeback *wb = inode_to_wb(inode);
2743
2744 __dec_wb_stat(wb, WB_WRITEBACK);
2745 __wb_writeout_inc(wb);
2746 }
2747 }
2748 spin_unlock_irqrestore(&mapping->tree_lock, flags);
2749 } else {
2750 ret = TestClearPageWriteback(page);
2751 }
2752 if (ret) {
2753 mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_WRITEBACK);
2754 dec_zone_page_state(page, NR_WRITEBACK);
2755 inc_zone_page_state(page, NR_WRITTEN);
2756 }
2757 mem_cgroup_end_page_stat(memcg);
2758 return ret;
2759}
2760
2761int __test_set_page_writeback(struct page *page, bool keep_write)
2762{
2763 struct address_space *mapping = page_mapping(page);
2764 struct mem_cgroup *memcg;
2765 int ret;
2766
2767 memcg = mem_cgroup_begin_page_stat(page);
2768 if (mapping) {
2769 struct inode *inode = mapping->host;
2770 struct backing_dev_info *bdi = inode_to_bdi(inode);
2771 unsigned long flags;
2772
2773 spin_lock_irqsave(&mapping->tree_lock, flags);
2774 ret = TestSetPageWriteback(page);
2775 if (!ret) {
2776 radix_tree_tag_set(&mapping->page_tree,
2777 page_index(page),
2778 PAGECACHE_TAG_WRITEBACK);
2779 if (bdi_cap_account_writeback(bdi))
2780 __inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
2781 }
2782 if (!PageDirty(page))
2783 radix_tree_tag_clear(&mapping->page_tree,
2784 page_index(page),
2785 PAGECACHE_TAG_DIRTY);
2786 if (!keep_write)
2787 radix_tree_tag_clear(&mapping->page_tree,
2788 page_index(page),
2789 PAGECACHE_TAG_TOWRITE);
2790 spin_unlock_irqrestore(&mapping->tree_lock, flags);
2791 } else {
2792 ret = TestSetPageWriteback(page);
2793 }
2794 if (!ret) {
2795 mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_WRITEBACK);
2796 inc_zone_page_state(page, NR_WRITEBACK);
2797 }
2798 mem_cgroup_end_page_stat(memcg);
2799 return ret;
2800
2801}
2802EXPORT_SYMBOL(__test_set_page_writeback);
2803
2804
2805
2806
2807
2808int mapping_tagged(struct address_space *mapping, int tag)
2809{
2810 return radix_tree_tagged(&mapping->page_tree, tag);
2811}
2812EXPORT_SYMBOL(mapping_tagged);
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822void wait_for_stable_page(struct page *page)
2823{
2824 if (bdi_cap_stable_pages_required(inode_to_bdi(page->mapping->host)))
2825 wait_on_page_writeback(page);
2826}
2827EXPORT_SYMBOL_GPL(wait_for_stable_page);
2828