1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <linux/kernel.h>
15#include <linux/export.h>
16#include <linux/spinlock.h>
17#include <linux/fs.h>
18#include <linux/mm.h>
19#include <linux/swap.h>
20#include <linux/slab.h>
21#include <linux/pagemap.h>
22#include <linux/writeback.h>
23#include <linux/init.h>
24#include <linux/backing-dev.h>
25#include <linux/task_io_accounting_ops.h>
26#include <linux/blkdev.h>
27#include <linux/mpage.h>
28#include <linux/rmap.h>
29#include <linux/percpu.h>
30#include <linux/notifier.h>
31#include <linux/smp.h>
32#include <linux/sysctl.h>
33#include <linux/cpu.h>
34#include <linux/syscalls.h>
35#include <linux/buffer_head.h>
36#include <linux/pagevec.h>
37#include <linux/timer.h>
38#include <linux/sched/rt.h>
39#include <linux/mm_inline.h>
40#include <trace/events/writeback.h>
41
42#include "internal.h"
43
44
45
46
47#define MAX_PAUSE max(HZ/5, 1)
48
49
50
51
52
53#define DIRTY_POLL_THRESH (128 >> (PAGE_SHIFT - 10))
54
55
56
57
58#define BANDWIDTH_INTERVAL max(HZ/5, 1)
59
60#define RATELIMIT_CALC_SHIFT 10
61
62
63
64
65
66static long ratelimit_pages = 32;
67
68
69
70
71
72
73int dirty_background_ratio = 10;
74
75
76
77
78
79unsigned long dirty_background_bytes;
80
81
82
83
84
85int vm_highmem_is_dirtyable;
86
87
88
89
90int vm_dirty_ratio = 20;
91
92
93
94
95
96unsigned long vm_dirty_bytes;
97
98
99
100
101unsigned int dirty_writeback_interval = 5 * 100;
102
103EXPORT_SYMBOL_GPL(dirty_writeback_interval);
104
105
106
107
108unsigned int dirty_expire_interval = 30 * 100;
109
110
111
112
113int block_dump;
114
115
116
117
118
119int laptop_mode;
120
121EXPORT_SYMBOL(laptop_mode);
122
123
124
125struct wb_domain global_wb_domain;
126
127
128struct dirty_throttle_control {
129#ifdef CONFIG_CGROUP_WRITEBACK
130 struct wb_domain *dom;
131 struct dirty_throttle_control *gdtc;
132#endif
133 struct bdi_writeback *wb;
134 struct fprop_local_percpu *wb_completions;
135
136 unsigned long avail;
137 unsigned long dirty;
138 unsigned long thresh;
139 unsigned long bg_thresh;
140
141 unsigned long wb_dirty;
142 unsigned long wb_thresh;
143 unsigned long wb_bg_thresh;
144
145 unsigned long pos_ratio;
146};
147
148
149
150
151
152
153#define VM_COMPLETIONS_PERIOD_LEN (3*HZ)
154
155#ifdef CONFIG_CGROUP_WRITEBACK
156
157#define GDTC_INIT(__wb) .wb = (__wb), \
158 .dom = &global_wb_domain, \
159 .wb_completions = &(__wb)->completions
160
161#define GDTC_INIT_NO_WB .dom = &global_wb_domain
162
163#define MDTC_INIT(__wb, __gdtc) .wb = (__wb), \
164 .dom = mem_cgroup_wb_domain(__wb), \
165 .wb_completions = &(__wb)->memcg_completions, \
166 .gdtc = __gdtc
167
168static bool mdtc_valid(struct dirty_throttle_control *dtc)
169{
170 return dtc->dom;
171}
172
173static struct wb_domain *dtc_dom(struct dirty_throttle_control *dtc)
174{
175 return dtc->dom;
176}
177
178static struct dirty_throttle_control *mdtc_gdtc(struct dirty_throttle_control *mdtc)
179{
180 return mdtc->gdtc;
181}
182
183static struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb)
184{
185 return &wb->memcg_completions;
186}
187
188static void wb_min_max_ratio(struct bdi_writeback *wb,
189 unsigned long *minp, unsigned long *maxp)
190{
191 unsigned long this_bw = wb->avg_write_bandwidth;
192 unsigned long tot_bw = atomic_long_read(&wb->bdi->tot_write_bandwidth);
193 unsigned long long min = wb->bdi->min_ratio;
194 unsigned long long max = wb->bdi->max_ratio;
195
196
197
198
199
200 if (this_bw < tot_bw) {
201 if (min) {
202 min *= this_bw;
203 do_div(min, tot_bw);
204 }
205 if (max < 100) {
206 max *= this_bw;
207 do_div(max, tot_bw);
208 }
209 }
210
211 *minp = min;
212 *maxp = max;
213}
214
215#else
216
217#define GDTC_INIT(__wb) .wb = (__wb), \
218 .wb_completions = &(__wb)->completions
219#define GDTC_INIT_NO_WB
220#define MDTC_INIT(__wb, __gdtc)
221
222static bool mdtc_valid(struct dirty_throttle_control *dtc)
223{
224 return false;
225}
226
227static struct wb_domain *dtc_dom(struct dirty_throttle_control *dtc)
228{
229 return &global_wb_domain;
230}
231
232static struct dirty_throttle_control *mdtc_gdtc(struct dirty_throttle_control *mdtc)
233{
234 return NULL;
235}
236
237static struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb)
238{
239 return NULL;
240}
241
242static void wb_min_max_ratio(struct bdi_writeback *wb,
243 unsigned long *minp, unsigned long *maxp)
244{
245 *minp = wb->bdi->min_ratio;
246 *maxp = wb->bdi->max_ratio;
247}
248
249#endif
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276static unsigned long zone_dirtyable_memory(struct zone *zone)
277{
278 unsigned long nr_pages;
279
280 nr_pages = zone_page_state(zone, NR_FREE_PAGES);
281 nr_pages -= min(nr_pages, zone->dirty_balance_reserve);
282
283 nr_pages += zone_page_state(zone, NR_INACTIVE_FILE);
284 nr_pages += zone_page_state(zone, NR_ACTIVE_FILE);
285
286 return nr_pages;
287}
288
289static unsigned long highmem_dirtyable_memory(unsigned long total)
290{
291#ifdef CONFIG_HIGHMEM
292 int node;
293 unsigned long x = 0;
294
295 for_each_node_state(node, N_HIGH_MEMORY) {
296 struct zone *z = &NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
297
298 x += zone_dirtyable_memory(z);
299 }
300
301
302
303
304
305
306
307
308
309 if ((long)x < 0)
310 x = 0;
311
312
313
314
315
316
317
318 return min(x, total);
319#else
320 return 0;
321#endif
322}
323
324
325
326
327
328
329
330static unsigned long global_dirtyable_memory(void)
331{
332 unsigned long x;
333
334 x = global_page_state(NR_FREE_PAGES);
335 x -= min(x, dirty_balance_reserve);
336
337 x += global_page_state(NR_INACTIVE_FILE);
338 x += global_page_state(NR_ACTIVE_FILE);
339
340 if (!vm_highmem_is_dirtyable)
341 x -= highmem_dirtyable_memory(x);
342
343 return x + 1;
344}
345
346
347
348
349
350
351
352
353
354
355
356static void domain_dirty_limits(struct dirty_throttle_control *dtc)
357{
358 const unsigned long available_memory = dtc->avail;
359 struct dirty_throttle_control *gdtc = mdtc_gdtc(dtc);
360 unsigned long bytes = vm_dirty_bytes;
361 unsigned long bg_bytes = dirty_background_bytes;
362 unsigned long ratio = vm_dirty_ratio;
363 unsigned long bg_ratio = dirty_background_ratio;
364 unsigned long thresh;
365 unsigned long bg_thresh;
366 struct task_struct *tsk;
367
368
369 if (gdtc) {
370 unsigned long global_avail = gdtc->avail;
371
372
373
374
375
376
377 if (bytes)
378 ratio = min(DIV_ROUND_UP(bytes, PAGE_SIZE) * 100 /
379 global_avail, 100UL);
380 if (bg_bytes)
381 bg_ratio = min(DIV_ROUND_UP(bg_bytes, PAGE_SIZE) * 100 /
382 global_avail, 100UL);
383 bytes = bg_bytes = 0;
384 }
385
386 if (bytes)
387 thresh = DIV_ROUND_UP(bytes, PAGE_SIZE);
388 else
389 thresh = (ratio * available_memory) / 100;
390
391 if (bg_bytes)
392 bg_thresh = DIV_ROUND_UP(bg_bytes, PAGE_SIZE);
393 else
394 bg_thresh = (bg_ratio * available_memory) / 100;
395
396 if (bg_thresh >= thresh)
397 bg_thresh = thresh / 2;
398 tsk = current;
399 if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) {
400 bg_thresh += bg_thresh / 4;
401 thresh += thresh / 4;
402 }
403 dtc->thresh = thresh;
404 dtc->bg_thresh = bg_thresh;
405
406
407 if (!gdtc)
408 trace_global_dirty_state(bg_thresh, thresh);
409}
410
411
412
413
414
415
416
417
418
419void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
420{
421 struct dirty_throttle_control gdtc = { GDTC_INIT_NO_WB };
422
423 gdtc.avail = global_dirtyable_memory();
424 domain_dirty_limits(&gdtc);
425
426 *pbackground = gdtc.bg_thresh;
427 *pdirty = gdtc.thresh;
428}
429
430
431
432
433
434
435
436
437static unsigned long zone_dirty_limit(struct zone *zone)
438{
439 unsigned long zone_memory = zone_dirtyable_memory(zone);
440 struct task_struct *tsk = current;
441 unsigned long dirty;
442
443 if (vm_dirty_bytes)
444 dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE) *
445 zone_memory / global_dirtyable_memory();
446 else
447 dirty = vm_dirty_ratio * zone_memory / 100;
448
449 if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk))
450 dirty += dirty / 4;
451
452 return dirty;
453}
454
455
456
457
458
459
460
461
462bool zone_dirty_ok(struct zone *zone)
463{
464 unsigned long limit = zone_dirty_limit(zone);
465
466 return zone_page_state(zone, NR_FILE_DIRTY) +
467 zone_page_state(zone, NR_UNSTABLE_NFS) +
468 zone_page_state(zone, NR_WRITEBACK) <= limit;
469}
470
471int dirty_background_ratio_handler(struct ctl_table *table, int write,
472 void __user *buffer, size_t *lenp,
473 loff_t *ppos)
474{
475 int ret;
476
477 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
478 if (ret == 0 && write)
479 dirty_background_bytes = 0;
480 return ret;
481}
482
483int dirty_background_bytes_handler(struct ctl_table *table, int write,
484 void __user *buffer, size_t *lenp,
485 loff_t *ppos)
486{
487 int ret;
488
489 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
490 if (ret == 0 && write)
491 dirty_background_ratio = 0;
492 return ret;
493}
494
495int dirty_ratio_handler(struct ctl_table *table, int write,
496 void __user *buffer, size_t *lenp,
497 loff_t *ppos)
498{
499 int old_ratio = vm_dirty_ratio;
500 int ret;
501
502 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
503 if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
504 writeback_set_ratelimit();
505 vm_dirty_bytes = 0;
506 }
507 return ret;
508}
509
510int dirty_bytes_handler(struct ctl_table *table, int write,
511 void __user *buffer, size_t *lenp,
512 loff_t *ppos)
513{
514 unsigned long old_bytes = vm_dirty_bytes;
515 int ret;
516
517 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
518 if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
519 writeback_set_ratelimit();
520 vm_dirty_ratio = 0;
521 }
522 return ret;
523}
524
525static unsigned long wp_next_time(unsigned long cur_time)
526{
527 cur_time += VM_COMPLETIONS_PERIOD_LEN;
528
529 if (!cur_time)
530 return 1;
531 return cur_time;
532}
533
534static void wb_domain_writeout_inc(struct wb_domain *dom,
535 struct fprop_local_percpu *completions,
536 unsigned int max_prop_frac)
537{
538 __fprop_inc_percpu_max(&dom->completions, completions,
539 max_prop_frac);
540
541 if (!unlikely(dom->period_time)) {
542
543
544
545
546
547
548 dom->period_time = wp_next_time(jiffies);
549 mod_timer(&dom->period_timer, dom->period_time);
550 }
551}
552
553
554
555
556
557static inline void __wb_writeout_inc(struct bdi_writeback *wb)
558{
559 struct wb_domain *cgdom;
560
561 __inc_wb_stat(wb, WB_WRITTEN);
562 wb_domain_writeout_inc(&global_wb_domain, &wb->completions,
563 wb->bdi->max_prop_frac);
564
565 cgdom = mem_cgroup_wb_domain(wb);
566 if (cgdom)
567 wb_domain_writeout_inc(cgdom, wb_memcg_completions(wb),
568 wb->bdi->max_prop_frac);
569}
570
571void wb_writeout_inc(struct bdi_writeback *wb)
572{
573 unsigned long flags;
574
575 local_irq_save(flags);
576 __wb_writeout_inc(wb);
577 local_irq_restore(flags);
578}
579EXPORT_SYMBOL_GPL(wb_writeout_inc);
580
581
582
583
584
585static void writeout_period(unsigned long t)
586{
587 struct wb_domain *dom = (void *)t;
588 int miss_periods = (jiffies - dom->period_time) /
589 VM_COMPLETIONS_PERIOD_LEN;
590
591 if (fprop_new_period(&dom->completions, miss_periods + 1)) {
592 dom->period_time = wp_next_time(dom->period_time +
593 miss_periods * VM_COMPLETIONS_PERIOD_LEN);
594 mod_timer(&dom->period_timer, dom->period_time);
595 } else {
596
597
598
599
600 dom->period_time = 0;
601 }
602}
603
604int wb_domain_init(struct wb_domain *dom, gfp_t gfp)
605{
606 memset(dom, 0, sizeof(*dom));
607
608 spin_lock_init(&dom->lock);
609
610 init_timer_deferrable(&dom->period_timer);
611 dom->period_timer.function = writeout_period;
612 dom->period_timer.data = (unsigned long)dom;
613
614 dom->dirty_limit_tstamp = jiffies;
615
616 return fprop_global_init(&dom->completions, gfp);
617}
618
619#ifdef CONFIG_CGROUP_WRITEBACK
620void wb_domain_exit(struct wb_domain *dom)
621{
622 del_timer_sync(&dom->period_timer);
623 fprop_global_destroy(&dom->completions);
624}
625#endif
626
627
628
629
630
631
632static unsigned int bdi_min_ratio;
633
634int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
635{
636 int ret = 0;
637
638 spin_lock_bh(&bdi_lock);
639 if (min_ratio > bdi->max_ratio) {
640 ret = -EINVAL;
641 } else {
642 min_ratio -= bdi->min_ratio;
643 if (bdi_min_ratio + min_ratio < 100) {
644 bdi_min_ratio += min_ratio;
645 bdi->min_ratio += min_ratio;
646 } else {
647 ret = -EINVAL;
648 }
649 }
650 spin_unlock_bh(&bdi_lock);
651
652 return ret;
653}
654
655int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
656{
657 int ret = 0;
658
659 if (max_ratio > 100)
660 return -EINVAL;
661
662 spin_lock_bh(&bdi_lock);
663 if (bdi->min_ratio > max_ratio) {
664 ret = -EINVAL;
665 } else {
666 bdi->max_ratio = max_ratio;
667 bdi->max_prop_frac = (FPROP_FRAC_BASE * max_ratio) / 100;
668 }
669 spin_unlock_bh(&bdi_lock);
670
671 return ret;
672}
673EXPORT_SYMBOL(bdi_set_max_ratio);
674
675static unsigned long dirty_freerun_ceiling(unsigned long thresh,
676 unsigned long bg_thresh)
677{
678 return (thresh + bg_thresh) / 2;
679}
680
681static unsigned long hard_dirty_limit(struct wb_domain *dom,
682 unsigned long thresh)
683{
684 return max(thresh, dom->dirty_limit);
685}
686
687
688
689
690
691static void mdtc_calc_avail(struct dirty_throttle_control *mdtc,
692 unsigned long filepages, unsigned long headroom)
693{
694 struct dirty_throttle_control *gdtc = mdtc_gdtc(mdtc);
695 unsigned long clean = filepages - min(filepages, mdtc->dirty);
696 unsigned long global_clean = gdtc->avail - min(gdtc->avail, gdtc->dirty);
697 unsigned long other_clean = global_clean - min(global_clean, clean);
698
699 mdtc->avail = filepages + min(headroom, other_clean);
700}
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc)
724{
725 struct wb_domain *dom = dtc_dom(dtc);
726 unsigned long thresh = dtc->thresh;
727 u64 wb_thresh;
728 long numerator, denominator;
729 unsigned long wb_min_ratio, wb_max_ratio;
730
731
732
733
734 fprop_fraction_percpu(&dom->completions, dtc->wb_completions,
735 &numerator, &denominator);
736
737 wb_thresh = (thresh * (100 - bdi_min_ratio)) / 100;
738 wb_thresh *= numerator;
739 do_div(wb_thresh, denominator);
740
741 wb_min_max_ratio(dtc->wb, &wb_min_ratio, &wb_max_ratio);
742
743 wb_thresh += (thresh * wb_min_ratio) / 100;
744 if (wb_thresh > (thresh * wb_max_ratio) / 100)
745 wb_thresh = thresh * wb_max_ratio / 100;
746
747 return wb_thresh;
748}
749
750unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh)
751{
752 struct dirty_throttle_control gdtc = { GDTC_INIT(wb),
753 .thresh = thresh };
754 return __wb_calc_thresh(&gdtc);
755}
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771static long long pos_ratio_polynom(unsigned long setpoint,
772 unsigned long dirty,
773 unsigned long limit)
774{
775 long long pos_ratio;
776 long x;
777
778 x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
779 (limit - setpoint) | 1);
780 pos_ratio = x;
781 pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
782 pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
783 pos_ratio += 1 << RATELIMIT_CALC_SHIFT;
784
785 return clamp(pos_ratio, 0LL, 2LL << RATELIMIT_CALC_SHIFT);
786}
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863static void wb_position_ratio(struct dirty_throttle_control *dtc)
864{
865 struct bdi_writeback *wb = dtc->wb;
866 unsigned long write_bw = wb->avg_write_bandwidth;
867 unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh);
868 unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
869 unsigned long wb_thresh = dtc->wb_thresh;
870 unsigned long x_intercept;
871 unsigned long setpoint;
872 unsigned long wb_setpoint;
873 unsigned long span;
874 long long pos_ratio;
875 long x;
876
877 dtc->pos_ratio = 0;
878
879 if (unlikely(dtc->dirty >= limit))
880 return;
881
882
883
884
885
886
887 setpoint = (freerun + limit) / 2;
888 pos_ratio = pos_ratio_polynom(setpoint, dtc->dirty, limit);
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915 if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
916 long long wb_pos_ratio;
917
918 if (dtc->wb_dirty < 8) {
919 dtc->pos_ratio = min_t(long long, pos_ratio * 2,
920 2 << RATELIMIT_CALC_SHIFT);
921 return;
922 }
923
924 if (dtc->wb_dirty >= wb_thresh)
925 return;
926
927 wb_setpoint = dirty_freerun_ceiling(wb_thresh,
928 dtc->wb_bg_thresh);
929
930 if (wb_setpoint == 0 || wb_setpoint == wb_thresh)
931 return;
932
933 wb_pos_ratio = pos_ratio_polynom(wb_setpoint, dtc->wb_dirty,
934 wb_thresh);
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957 dtc->pos_ratio = min(pos_ratio, wb_pos_ratio);
958 return;
959 }
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992 if (unlikely(wb_thresh > dtc->thresh))
993 wb_thresh = dtc->thresh;
994
995
996
997
998
999
1000
1001 wb_thresh = max(wb_thresh, (limit - dtc->dirty) / 8);
1002
1003
1004
1005
1006 x = div_u64((u64)wb_thresh << 16, dtc->thresh | 1);
1007 wb_setpoint = setpoint * (u64)x >> 16;
1008
1009
1010
1011
1012
1013
1014
1015
1016 span = (dtc->thresh - wb_thresh + 8 * write_bw) * (u64)x >> 16;
1017 x_intercept = wb_setpoint + span;
1018
1019 if (dtc->wb_dirty < x_intercept - span / 4) {
1020 pos_ratio = div64_u64(pos_ratio * (x_intercept - dtc->wb_dirty),
1021 (x_intercept - wb_setpoint) | 1);
1022 } else
1023 pos_ratio /= 4;
1024
1025
1026
1027
1028
1029
1030 x_intercept = wb_thresh / 2;
1031 if (dtc->wb_dirty < x_intercept) {
1032 if (dtc->wb_dirty > x_intercept / 8)
1033 pos_ratio = div_u64(pos_ratio * x_intercept,
1034 dtc->wb_dirty);
1035 else
1036 pos_ratio *= 8;
1037 }
1038
1039 dtc->pos_ratio = pos_ratio;
1040}
1041
1042static void wb_update_write_bandwidth(struct bdi_writeback *wb,
1043 unsigned long elapsed,
1044 unsigned long written)
1045{
1046 const unsigned long period = roundup_pow_of_two(3 * HZ);
1047 unsigned long avg = wb->avg_write_bandwidth;
1048 unsigned long old = wb->write_bandwidth;
1049 u64 bw;
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061 bw = written - min(written, wb->written_stamp);
1062 bw *= HZ;
1063 if (unlikely(elapsed > period)) {
1064 do_div(bw, elapsed);
1065 avg = bw;
1066 goto out;
1067 }
1068 bw += (u64)wb->write_bandwidth * (period - elapsed);
1069 bw >>= ilog2(period);
1070
1071
1072
1073
1074 if (avg > old && old >= (unsigned long)bw)
1075 avg -= (avg - old) >> 3;
1076
1077 if (avg < old && old <= (unsigned long)bw)
1078 avg += (old - avg) >> 3;
1079
1080out:
1081
1082 avg = max(avg, 1LU);
1083 if (wb_has_dirty_io(wb)) {
1084 long delta = avg - wb->avg_write_bandwidth;
1085 WARN_ON_ONCE(atomic_long_add_return(delta,
1086 &wb->bdi->tot_write_bandwidth) <= 0);
1087 }
1088 wb->write_bandwidth = bw;
1089 wb->avg_write_bandwidth = avg;
1090}
1091
1092static void update_dirty_limit(struct dirty_throttle_control *dtc)
1093{
1094 struct wb_domain *dom = dtc_dom(dtc);
1095 unsigned long thresh = dtc->thresh;
1096 unsigned long limit = dom->dirty_limit;
1097
1098
1099
1100
1101 if (limit < thresh) {
1102 limit = thresh;
1103 goto update;
1104 }
1105
1106
1107
1108
1109
1110
1111 thresh = max(thresh, dtc->dirty);
1112 if (limit > thresh) {
1113 limit -= (limit - thresh) >> 5;
1114 goto update;
1115 }
1116 return;
1117update:
1118 dom->dirty_limit = limit;
1119}
1120
1121static void domain_update_bandwidth(struct dirty_throttle_control *dtc,
1122 unsigned long now)
1123{
1124 struct wb_domain *dom = dtc_dom(dtc);
1125
1126
1127
1128
1129 if (time_before(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL))
1130 return;
1131
1132 spin_lock(&dom->lock);
1133 if (time_after_eq(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL)) {
1134 update_dirty_limit(dtc);
1135 dom->dirty_limit_tstamp = now;
1136 }
1137 spin_unlock(&dom->lock);
1138}
1139
1140
1141
1142
1143
1144
1145
1146static void wb_update_dirty_ratelimit(struct dirty_throttle_control *dtc,
1147 unsigned long dirtied,
1148 unsigned long elapsed)
1149{
1150 struct bdi_writeback *wb = dtc->wb;
1151 unsigned long dirty = dtc->dirty;
1152 unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh);
1153 unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
1154 unsigned long setpoint = (freerun + limit) / 2;
1155 unsigned long write_bw = wb->avg_write_bandwidth;
1156 unsigned long dirty_ratelimit = wb->dirty_ratelimit;
1157 unsigned long dirty_rate;
1158 unsigned long task_ratelimit;
1159 unsigned long balanced_dirty_ratelimit;
1160 unsigned long step;
1161 unsigned long x;
1162
1163
1164
1165
1166
1167 dirty_rate = (dirtied - wb->dirtied_stamp) * HZ / elapsed;
1168
1169
1170
1171
1172 task_ratelimit = (u64)dirty_ratelimit *
1173 dtc->pos_ratio >> RATELIMIT_CALC_SHIFT;
1174 task_ratelimit++;
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206 balanced_dirty_ratelimit = div_u64((u64)task_ratelimit * write_bw,
1207 dirty_rate | 1);
1208
1209
1210
1211 if (unlikely(balanced_dirty_ratelimit > write_bw))
1212 balanced_dirty_ratelimit = write_bw;
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248 step = 0;
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261 if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
1262 dirty = dtc->wb_dirty;
1263 if (dtc->wb_dirty < 8)
1264 setpoint = dtc->wb_dirty + 1;
1265 else
1266 setpoint = (dtc->wb_thresh + dtc->wb_bg_thresh) / 2;
1267 }
1268
1269 if (dirty < setpoint) {
1270 x = min3(wb->balanced_dirty_ratelimit,
1271 balanced_dirty_ratelimit, task_ratelimit);
1272 if (dirty_ratelimit < x)
1273 step = x - dirty_ratelimit;
1274 } else {
1275 x = max3(wb->balanced_dirty_ratelimit,
1276 balanced_dirty_ratelimit, task_ratelimit);
1277 if (dirty_ratelimit > x)
1278 step = dirty_ratelimit - x;
1279 }
1280
1281
1282
1283
1284
1285
1286 step >>= dirty_ratelimit / (2 * step + 1);
1287
1288
1289
1290 step = (step + 7) / 8;
1291
1292 if (dirty_ratelimit < balanced_dirty_ratelimit)
1293 dirty_ratelimit += step;
1294 else
1295 dirty_ratelimit -= step;
1296
1297 wb->dirty_ratelimit = max(dirty_ratelimit, 1UL);
1298 wb->balanced_dirty_ratelimit = balanced_dirty_ratelimit;
1299
1300 trace_bdi_dirty_ratelimit(wb, dirty_rate, task_ratelimit);
1301}
1302
1303static void __wb_update_bandwidth(struct dirty_throttle_control *gdtc,
1304 struct dirty_throttle_control *mdtc,
1305 unsigned long start_time,
1306 bool update_ratelimit)
1307{
1308 struct bdi_writeback *wb = gdtc->wb;
1309 unsigned long now = jiffies;
1310 unsigned long elapsed = now - wb->bw_time_stamp;
1311 unsigned long dirtied;
1312 unsigned long written;
1313
1314 lockdep_assert_held(&wb->list_lock);
1315
1316
1317
1318
1319 if (elapsed < BANDWIDTH_INTERVAL)
1320 return;
1321
1322 dirtied = percpu_counter_read(&wb->stat[WB_DIRTIED]);
1323 written = percpu_counter_read(&wb->stat[WB_WRITTEN]);
1324
1325
1326
1327
1328
1329 if (elapsed > HZ && time_before(wb->bw_time_stamp, start_time))
1330 goto snapshot;
1331
1332 if (update_ratelimit) {
1333 domain_update_bandwidth(gdtc, now);
1334 wb_update_dirty_ratelimit(gdtc, dirtied, elapsed);
1335
1336
1337
1338
1339
1340 if (IS_ENABLED(CONFIG_CGROUP_WRITEBACK) && mdtc) {
1341 domain_update_bandwidth(mdtc, now);
1342 wb_update_dirty_ratelimit(mdtc, dirtied, elapsed);
1343 }
1344 }
1345 wb_update_write_bandwidth(wb, elapsed, written);
1346
1347snapshot:
1348 wb->dirtied_stamp = dirtied;
1349 wb->written_stamp = written;
1350 wb->bw_time_stamp = now;
1351}
1352
1353void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time)
1354{
1355 struct dirty_throttle_control gdtc = { GDTC_INIT(wb) };
1356
1357 __wb_update_bandwidth(&gdtc, NULL, start_time, false);
1358}
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368static unsigned long dirty_poll_interval(unsigned long dirty,
1369 unsigned long thresh)
1370{
1371 if (thresh > dirty)
1372 return 1UL << (ilog2(thresh - dirty) >> 1);
1373
1374 return 1;
1375}
1376
1377static unsigned long wb_max_pause(struct bdi_writeback *wb,
1378 unsigned long wb_dirty)
1379{
1380 unsigned long bw = wb->avg_write_bandwidth;
1381 unsigned long t;
1382
1383
1384
1385
1386
1387
1388
1389
1390 t = wb_dirty / (1 + bw / roundup_pow_of_two(1 + HZ / 8));
1391 t++;
1392
1393 return min_t(unsigned long, t, MAX_PAUSE);
1394}
1395
1396static long wb_min_pause(struct bdi_writeback *wb,
1397 long max_pause,
1398 unsigned long task_ratelimit,
1399 unsigned long dirty_ratelimit,
1400 int *nr_dirtied_pause)
1401{
1402 long hi = ilog2(wb->avg_write_bandwidth);
1403 long lo = ilog2(wb->dirty_ratelimit);
1404 long t;
1405 long pause;
1406 int pages;
1407
1408
1409 t = max(1, HZ / 100);
1410
1411
1412
1413
1414
1415
1416
1417 if (hi > lo)
1418 t += (hi - lo) * (10 * HZ) / 1024;
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438 t = min(t, 1 + max_pause / 2);
1439 pages = dirty_ratelimit * t / roundup_pow_of_two(HZ);
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449 if (pages < DIRTY_POLL_THRESH) {
1450 t = max_pause;
1451 pages = dirty_ratelimit * t / roundup_pow_of_two(HZ);
1452 if (pages > DIRTY_POLL_THRESH) {
1453 pages = DIRTY_POLL_THRESH;
1454 t = HZ * DIRTY_POLL_THRESH / dirty_ratelimit;
1455 }
1456 }
1457
1458 pause = HZ * pages / (task_ratelimit + 1);
1459 if (pause > max_pause) {
1460 t = max_pause;
1461 pages = task_ratelimit * t / roundup_pow_of_two(HZ);
1462 }
1463
1464 *nr_dirtied_pause = pages;
1465
1466
1467
1468 return pages >= DIRTY_POLL_THRESH ? 1 + t / 2 : t;
1469}
1470
1471static inline void wb_dirty_limits(struct dirty_throttle_control *dtc)
1472{
1473 struct bdi_writeback *wb = dtc->wb;
1474 unsigned long wb_reclaimable;
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489 dtc->wb_thresh = __wb_calc_thresh(dtc);
1490 dtc->wb_bg_thresh = dtc->thresh ?
1491 div_u64((u64)dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0;
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503 if (dtc->wb_thresh < 2 * wb_stat_error(wb)) {
1504 wb_reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE);
1505 dtc->wb_dirty = wb_reclaimable + wb_stat_sum(wb, WB_WRITEBACK);
1506 } else {
1507 wb_reclaimable = wb_stat(wb, WB_RECLAIMABLE);
1508 dtc->wb_dirty = wb_reclaimable + wb_stat(wb, WB_WRITEBACK);
1509 }
1510}
1511
1512
1513
1514
1515
1516
1517
1518
1519static void balance_dirty_pages(struct address_space *mapping,
1520 struct bdi_writeback *wb,
1521 unsigned long pages_dirtied)
1522{
1523 struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) };
1524 struct dirty_throttle_control mdtc_stor = { MDTC_INIT(wb, &gdtc_stor) };
1525 struct dirty_throttle_control * const gdtc = &gdtc_stor;
1526 struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
1527 &mdtc_stor : NULL;
1528 struct dirty_throttle_control *sdtc;
1529 unsigned long nr_reclaimable;
1530 long period;
1531 long pause;
1532 long max_pause;
1533 long min_pause;
1534 int nr_dirtied_pause;
1535 bool dirty_exceeded = false;
1536 unsigned long task_ratelimit;
1537 unsigned long dirty_ratelimit;
1538 struct backing_dev_info *bdi = wb->bdi;
1539 bool strictlimit = bdi->capabilities & BDI_CAP_STRICTLIMIT;
1540 unsigned long start_time = jiffies;
1541
1542 for (;;) {
1543 unsigned long now = jiffies;
1544 unsigned long dirty, thresh, bg_thresh;
1545 unsigned long m_dirty, m_thresh, m_bg_thresh;
1546
1547
1548
1549
1550
1551
1552
1553 nr_reclaimable = global_page_state(NR_FILE_DIRTY) +
1554 global_page_state(NR_UNSTABLE_NFS);
1555 gdtc->avail = global_dirtyable_memory();
1556 gdtc->dirty = nr_reclaimable + global_page_state(NR_WRITEBACK);
1557
1558 domain_dirty_limits(gdtc);
1559
1560 if (unlikely(strictlimit)) {
1561 wb_dirty_limits(gdtc);
1562
1563 dirty = gdtc->wb_dirty;
1564 thresh = gdtc->wb_thresh;
1565 bg_thresh = gdtc->wb_bg_thresh;
1566 } else {
1567 dirty = gdtc->dirty;
1568 thresh = gdtc->thresh;
1569 bg_thresh = gdtc->bg_thresh;
1570 }
1571
1572 if (mdtc) {
1573 unsigned long filepages, headroom, writeback;
1574
1575
1576
1577
1578
1579 mem_cgroup_wb_stats(wb, &filepages, &headroom,
1580 &mdtc->dirty, &writeback);
1581 mdtc->dirty += writeback;
1582 mdtc_calc_avail(mdtc, filepages, headroom);
1583
1584 domain_dirty_limits(mdtc);
1585
1586 if (unlikely(strictlimit)) {
1587 wb_dirty_limits(mdtc);
1588 m_dirty = mdtc->wb_dirty;
1589 m_thresh = mdtc->wb_thresh;
1590 m_bg_thresh = mdtc->wb_bg_thresh;
1591 } else {
1592 m_dirty = mdtc->dirty;
1593 m_thresh = mdtc->thresh;
1594 m_bg_thresh = mdtc->bg_thresh;
1595 }
1596 }
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610 if (dirty <= dirty_freerun_ceiling(thresh, bg_thresh) &&
1611 (!mdtc ||
1612 m_dirty <= dirty_freerun_ceiling(m_thresh, m_bg_thresh))) {
1613 unsigned long intv = dirty_poll_interval(dirty, thresh);
1614 unsigned long m_intv = ULONG_MAX;
1615
1616 current->dirty_paused_when = now;
1617 current->nr_dirtied = 0;
1618 if (mdtc)
1619 m_intv = dirty_poll_interval(m_dirty, m_thresh);
1620 current->nr_dirtied_pause = min(intv, m_intv);
1621 break;
1622 }
1623
1624 if (unlikely(!writeback_in_progress(wb)))
1625 wb_start_background_writeback(wb);
1626
1627
1628
1629
1630
1631 if (!strictlimit)
1632 wb_dirty_limits(gdtc);
1633
1634 dirty_exceeded = (gdtc->wb_dirty > gdtc->wb_thresh) &&
1635 ((gdtc->dirty > gdtc->thresh) || strictlimit);
1636
1637 wb_position_ratio(gdtc);
1638 sdtc = gdtc;
1639
1640 if (mdtc) {
1641
1642
1643
1644
1645
1646
1647 if (!strictlimit)
1648 wb_dirty_limits(mdtc);
1649
1650 dirty_exceeded |= (mdtc->wb_dirty > mdtc->wb_thresh) &&
1651 ((mdtc->dirty > mdtc->thresh) || strictlimit);
1652
1653 wb_position_ratio(mdtc);
1654 if (mdtc->pos_ratio < gdtc->pos_ratio)
1655 sdtc = mdtc;
1656 }
1657
1658 if (dirty_exceeded && !wb->dirty_exceeded)
1659 wb->dirty_exceeded = 1;
1660
1661 if (time_is_before_jiffies(wb->bw_time_stamp +
1662 BANDWIDTH_INTERVAL)) {
1663 spin_lock(&wb->list_lock);
1664 __wb_update_bandwidth(gdtc, mdtc, start_time, true);
1665 spin_unlock(&wb->list_lock);
1666 }
1667
1668
1669 dirty_ratelimit = wb->dirty_ratelimit;
1670 task_ratelimit = ((u64)dirty_ratelimit * sdtc->pos_ratio) >>
1671 RATELIMIT_CALC_SHIFT;
1672 max_pause = wb_max_pause(wb, sdtc->wb_dirty);
1673 min_pause = wb_min_pause(wb, max_pause,
1674 task_ratelimit, dirty_ratelimit,
1675 &nr_dirtied_pause);
1676
1677 if (unlikely(task_ratelimit == 0)) {
1678 period = max_pause;
1679 pause = max_pause;
1680 goto pause;
1681 }
1682 period = HZ * pages_dirtied / task_ratelimit;
1683 pause = period;
1684 if (current->dirty_paused_when)
1685 pause -= now - current->dirty_paused_when;
1686
1687
1688
1689
1690
1691
1692
1693 if (pause < min_pause) {
1694 trace_balance_dirty_pages(wb,
1695 sdtc->thresh,
1696 sdtc->bg_thresh,
1697 sdtc->dirty,
1698 sdtc->wb_thresh,
1699 sdtc->wb_dirty,
1700 dirty_ratelimit,
1701 task_ratelimit,
1702 pages_dirtied,
1703 period,
1704 min(pause, 0L),
1705 start_time);
1706 if (pause < -HZ) {
1707 current->dirty_paused_when = now;
1708 current->nr_dirtied = 0;
1709 } else if (period) {
1710 current->dirty_paused_when += period;
1711 current->nr_dirtied = 0;
1712 } else if (current->nr_dirtied_pause <= pages_dirtied)
1713 current->nr_dirtied_pause += pages_dirtied;
1714 break;
1715 }
1716 if (unlikely(pause > max_pause)) {
1717
1718 now += min(pause - max_pause, max_pause);
1719 pause = max_pause;
1720 }
1721
1722pause:
1723 trace_balance_dirty_pages(wb,
1724 sdtc->thresh,
1725 sdtc->bg_thresh,
1726 sdtc->dirty,
1727 sdtc->wb_thresh,
1728 sdtc->wb_dirty,
1729 dirty_ratelimit,
1730 task_ratelimit,
1731 pages_dirtied,
1732 period,
1733 pause,
1734 start_time);
1735 __set_current_state(TASK_KILLABLE);
1736 io_schedule_timeout(pause);
1737
1738 current->dirty_paused_when = now + pause;
1739 current->nr_dirtied = 0;
1740 current->nr_dirtied_pause = nr_dirtied_pause;
1741
1742
1743
1744
1745
1746 if (task_ratelimit)
1747 break;
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759 if (sdtc->wb_dirty <= wb_stat_error(wb))
1760 break;
1761
1762 if (fatal_signal_pending(current))
1763 break;
1764 }
1765
1766 if (!dirty_exceeded && wb->dirty_exceeded)
1767 wb->dirty_exceeded = 0;
1768
1769 if (writeback_in_progress(wb))
1770 return;
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780 if (laptop_mode)
1781 return;
1782
1783 if (nr_reclaimable > gdtc->bg_thresh)
1784 wb_start_background_writeback(wb);
1785}
1786
1787static DEFINE_PER_CPU(int, bdp_ratelimits);
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803DEFINE_PER_CPU(int, dirty_throttle_leaks) = 0;
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818void balance_dirty_pages_ratelimited(struct address_space *mapping)
1819{
1820 struct inode *inode = mapping->host;
1821 struct backing_dev_info *bdi = inode_to_bdi(inode);
1822 struct bdi_writeback *wb = NULL;
1823 int ratelimit;
1824 int *p;
1825
1826 if (!bdi_cap_account_dirty(bdi))
1827 return;
1828
1829 if (inode_cgwb_enabled(inode))
1830 wb = wb_get_create_current(bdi, GFP_KERNEL);
1831 if (!wb)
1832 wb = &bdi->wb;
1833
1834 ratelimit = current->nr_dirtied_pause;
1835 if (wb->dirty_exceeded)
1836 ratelimit = min(ratelimit, 32 >> (PAGE_SHIFT - 10));
1837
1838 preempt_disable();
1839
1840
1841
1842
1843
1844
1845 p = this_cpu_ptr(&bdp_ratelimits);
1846 if (unlikely(current->nr_dirtied >= ratelimit))
1847 *p = 0;
1848 else if (unlikely(*p >= ratelimit_pages)) {
1849 *p = 0;
1850 ratelimit = 0;
1851 }
1852
1853
1854
1855
1856
1857 p = this_cpu_ptr(&dirty_throttle_leaks);
1858 if (*p > 0 && current->nr_dirtied < ratelimit) {
1859 unsigned long nr_pages_dirtied;
1860 nr_pages_dirtied = min(*p, ratelimit - current->nr_dirtied);
1861 *p -= nr_pages_dirtied;
1862 current->nr_dirtied += nr_pages_dirtied;
1863 }
1864 preempt_enable();
1865
1866 if (unlikely(current->nr_dirtied >= ratelimit))
1867 balance_dirty_pages(mapping, wb, current->nr_dirtied);
1868
1869 wb_put(wb);
1870}
1871EXPORT_SYMBOL(balance_dirty_pages_ratelimited);
1872
1873
1874
1875
1876
1877
1878
1879
1880bool wb_over_bg_thresh(struct bdi_writeback *wb)
1881{
1882 struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) };
1883 struct dirty_throttle_control mdtc_stor = { MDTC_INIT(wb, &gdtc_stor) };
1884 struct dirty_throttle_control * const gdtc = &gdtc_stor;
1885 struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
1886 &mdtc_stor : NULL;
1887
1888
1889
1890
1891
1892 gdtc->avail = global_dirtyable_memory();
1893 gdtc->dirty = global_page_state(NR_FILE_DIRTY) +
1894 global_page_state(NR_UNSTABLE_NFS);
1895 domain_dirty_limits(gdtc);
1896
1897 if (gdtc->dirty > gdtc->bg_thresh)
1898 return true;
1899
1900 if (wb_stat(wb, WB_RECLAIMABLE) > __wb_calc_thresh(gdtc))
1901 return true;
1902
1903 if (mdtc) {
1904 unsigned long filepages, headroom, writeback;
1905
1906 mem_cgroup_wb_stats(wb, &filepages, &headroom, &mdtc->dirty,
1907 &writeback);
1908 mdtc_calc_avail(mdtc, filepages, headroom);
1909 domain_dirty_limits(mdtc);
1910
1911 if (mdtc->dirty > mdtc->bg_thresh)
1912 return true;
1913
1914 if (wb_stat(wb, WB_RECLAIMABLE) > __wb_calc_thresh(mdtc))
1915 return true;
1916 }
1917
1918 return false;
1919}
1920
1921void throttle_vm_writeout(gfp_t gfp_mask)
1922{
1923 unsigned long background_thresh;
1924 unsigned long dirty_thresh;
1925
1926 for ( ; ; ) {
1927 global_dirty_limits(&background_thresh, &dirty_thresh);
1928 dirty_thresh = hard_dirty_limit(&global_wb_domain, dirty_thresh);
1929
1930
1931
1932
1933
1934 dirty_thresh += dirty_thresh / 10;
1935
1936 if (global_page_state(NR_UNSTABLE_NFS) +
1937 global_page_state(NR_WRITEBACK) <= dirty_thresh)
1938 break;
1939 congestion_wait(BLK_RW_ASYNC, HZ/10);
1940
1941
1942
1943
1944
1945
1946 if ((gfp_mask & (__GFP_FS|__GFP_IO)) != (__GFP_FS|__GFP_IO))
1947 break;
1948 }
1949}
1950
1951
1952
1953
1954int dirty_writeback_centisecs_handler(struct ctl_table *table, int write,
1955 void __user *buffer, size_t *length, loff_t *ppos)
1956{
1957 proc_dointvec(table, write, buffer, length, ppos);
1958 return 0;
1959}
1960
1961#ifdef CONFIG_BLOCK
1962void laptop_mode_timer_fn(unsigned long data)
1963{
1964 struct request_queue *q = (struct request_queue *)data;
1965 int nr_pages = global_page_state(NR_FILE_DIRTY) +
1966 global_page_state(NR_UNSTABLE_NFS);
1967 struct bdi_writeback *wb;
1968
1969
1970
1971
1972
1973 if (!bdi_has_dirty_io(&q->backing_dev_info))
1974 return;
1975
1976 rcu_read_lock();
1977 list_for_each_entry_rcu(wb, &q->backing_dev_info.wb_list, bdi_node)
1978 if (wb_has_dirty_io(wb))
1979 wb_start_writeback(wb, nr_pages, true,
1980 WB_REASON_LAPTOP_TIMER);
1981 rcu_read_unlock();
1982}
1983
1984
1985
1986
1987
1988
1989void laptop_io_completion(struct backing_dev_info *info)
1990{
1991 mod_timer(&info->laptop_mode_wb_timer, jiffies + laptop_mode);
1992}
1993
1994
1995
1996
1997
1998
1999void laptop_sync_completion(void)
2000{
2001 struct backing_dev_info *bdi;
2002
2003 rcu_read_lock();
2004
2005 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list)
2006 del_timer(&bdi->laptop_mode_wb_timer);
2007
2008 rcu_read_unlock();
2009}
2010#endif
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023void writeback_set_ratelimit(void)
2024{
2025 struct wb_domain *dom = &global_wb_domain;
2026 unsigned long background_thresh;
2027 unsigned long dirty_thresh;
2028
2029 global_dirty_limits(&background_thresh, &dirty_thresh);
2030 dom->dirty_limit = dirty_thresh;
2031 ratelimit_pages = dirty_thresh / (num_online_cpus() * 32);
2032 if (ratelimit_pages < 16)
2033 ratelimit_pages = 16;
2034}
2035
2036static int
2037ratelimit_handler(struct notifier_block *self, unsigned long action,
2038 void *hcpu)
2039{
2040
2041 switch (action & ~CPU_TASKS_FROZEN) {
2042 case CPU_ONLINE:
2043 case CPU_DEAD:
2044 writeback_set_ratelimit();
2045 return NOTIFY_OK;
2046 default:
2047 return NOTIFY_DONE;
2048 }
2049}
2050
2051static struct notifier_block ratelimit_nb = {
2052 .notifier_call = ratelimit_handler,
2053 .next = NULL,
2054};
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074void __init page_writeback_init(void)
2075{
2076 BUG_ON(wb_domain_init(&global_wb_domain, GFP_KERNEL));
2077
2078 writeback_set_ratelimit();
2079 register_cpu_notifier(&ratelimit_nb);
2080}
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099void tag_pages_for_writeback(struct address_space *mapping,
2100 pgoff_t start, pgoff_t end)
2101{
2102#define WRITEBACK_TAG_BATCH 4096
2103 unsigned long tagged;
2104
2105 do {
2106 spin_lock_irq(&mapping->tree_lock);
2107 tagged = radix_tree_range_tag_if_tagged(&mapping->page_tree,
2108 &start, end, WRITEBACK_TAG_BATCH,
2109 PAGECACHE_TAG_DIRTY, PAGECACHE_TAG_TOWRITE);
2110 spin_unlock_irq(&mapping->tree_lock);
2111 WARN_ON_ONCE(tagged > WRITEBACK_TAG_BATCH);
2112 cond_resched();
2113
2114 } while (tagged >= WRITEBACK_TAG_BATCH && start);
2115}
2116EXPORT_SYMBOL(tag_pages_for_writeback);
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140int write_cache_pages(struct address_space *mapping,
2141 struct writeback_control *wbc, writepage_t writepage,
2142 void *data)
2143{
2144 int ret = 0;
2145 int done = 0;
2146 struct pagevec pvec;
2147 int nr_pages;
2148 pgoff_t uninitialized_var(writeback_index);
2149 pgoff_t index;
2150 pgoff_t end;
2151 pgoff_t done_index;
2152 int cycled;
2153 int range_whole = 0;
2154 int tag;
2155
2156 pagevec_init(&pvec, 0);
2157 if (wbc->range_cyclic) {
2158 writeback_index = mapping->writeback_index;
2159 index = writeback_index;
2160 if (index == 0)
2161 cycled = 1;
2162 else
2163 cycled = 0;
2164 end = -1;
2165 } else {
2166 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2167 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2168 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2169 range_whole = 1;
2170 cycled = 1;
2171 }
2172 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2173 tag = PAGECACHE_TAG_TOWRITE;
2174 else
2175 tag = PAGECACHE_TAG_DIRTY;
2176retry:
2177 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2178 tag_pages_for_writeback(mapping, index, end);
2179 done_index = index;
2180 while (!done && (index <= end)) {
2181 int i;
2182
2183 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
2184 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
2185 if (nr_pages == 0)
2186 break;
2187
2188 for (i = 0; i < nr_pages; i++) {
2189 struct page *page = pvec.pages[i];
2190
2191
2192
2193
2194
2195
2196
2197
2198 if (page->index > end) {
2199
2200
2201
2202
2203 done = 1;
2204 break;
2205 }
2206
2207 done_index = page->index;
2208
2209 lock_page(page);
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219 if (unlikely(page->mapping != mapping)) {
2220continue_unlock:
2221 unlock_page(page);
2222 continue;
2223 }
2224
2225 if (!PageDirty(page)) {
2226
2227 goto continue_unlock;
2228 }
2229
2230 if (PageWriteback(page)) {
2231 if (wbc->sync_mode != WB_SYNC_NONE)
2232 wait_on_page_writeback(page);
2233 else
2234 goto continue_unlock;
2235 }
2236
2237 BUG_ON(PageWriteback(page));
2238 if (!clear_page_dirty_for_io(page))
2239 goto continue_unlock;
2240
2241 trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
2242 ret = (*writepage)(page, wbc, data);
2243 if (unlikely(ret)) {
2244 if (ret == AOP_WRITEPAGE_ACTIVATE) {
2245 unlock_page(page);
2246 ret = 0;
2247 } else {
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257 done_index = page->index + 1;
2258 done = 1;
2259 break;
2260 }
2261 }
2262
2263
2264
2265
2266
2267
2268
2269 if (--wbc->nr_to_write <= 0 &&
2270 wbc->sync_mode == WB_SYNC_NONE) {
2271 done = 1;
2272 break;
2273 }
2274 }
2275 pagevec_release(&pvec);
2276 cond_resched();
2277 }
2278 if (!cycled && !done) {
2279
2280
2281
2282
2283
2284 cycled = 1;
2285 index = 0;
2286 end = writeback_index - 1;
2287 goto retry;
2288 }
2289 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2290 mapping->writeback_index = done_index;
2291
2292 return ret;
2293}
2294EXPORT_SYMBOL(write_cache_pages);
2295
2296
2297
2298
2299
2300static int __writepage(struct page *page, struct writeback_control *wbc,
2301 void *data)
2302{
2303 struct address_space *mapping = data;
2304 int ret = mapping->a_ops->writepage(page, wbc);
2305 mapping_set_error(mapping, ret);
2306 return ret;
2307}
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317int generic_writepages(struct address_space *mapping,
2318 struct writeback_control *wbc)
2319{
2320 struct blk_plug plug;
2321 int ret;
2322
2323
2324 if (!mapping->a_ops->writepage)
2325 return 0;
2326
2327 blk_start_plug(&plug);
2328 ret = write_cache_pages(mapping, wbc, __writepage, mapping);
2329 blk_finish_plug(&plug);
2330 return ret;
2331}
2332
2333EXPORT_SYMBOL(generic_writepages);
2334
2335int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
2336{
2337 int ret;
2338
2339 if (wbc->nr_to_write <= 0)
2340 return 0;
2341 if (mapping->a_ops->writepages)
2342 ret = mapping->a_ops->writepages(mapping, wbc);
2343 else
2344 ret = generic_writepages(mapping, wbc);
2345 return ret;
2346}
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357int write_one_page(struct page *page, int wait)
2358{
2359 struct address_space *mapping = page->mapping;
2360 int ret = 0;
2361 struct writeback_control wbc = {
2362 .sync_mode = WB_SYNC_ALL,
2363 .nr_to_write = 1,
2364 };
2365
2366 BUG_ON(!PageLocked(page));
2367
2368 if (wait)
2369 wait_on_page_writeback(page);
2370
2371 if (clear_page_dirty_for_io(page)) {
2372 page_cache_get(page);
2373 ret = mapping->a_ops->writepage(page, &wbc);
2374 if (ret == 0 && wait) {
2375 wait_on_page_writeback(page);
2376 if (PageError(page))
2377 ret = -EIO;
2378 }
2379 page_cache_release(page);
2380 } else {
2381 unlock_page(page);
2382 }
2383 return ret;
2384}
2385EXPORT_SYMBOL(write_one_page);
2386
2387
2388
2389
2390int __set_page_dirty_no_writeback(struct page *page)
2391{
2392 if (!PageDirty(page))
2393 return !TestSetPageDirty(page);
2394 return 0;
2395}
2396
2397
2398
2399
2400
2401
2402
2403
2404void account_page_dirtied(struct page *page, struct address_space *mapping,
2405 struct mem_cgroup *memcg)
2406{
2407 struct inode *inode = mapping->host;
2408
2409 trace_writeback_dirty_page(page, mapping);
2410
2411 if (mapping_cap_account_dirty(mapping)) {
2412 struct bdi_writeback *wb;
2413
2414 inode_attach_wb(inode, page);
2415 wb = inode_to_wb(inode);
2416
2417 mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_DIRTY);
2418 __inc_zone_page_state(page, NR_FILE_DIRTY);
2419 __inc_zone_page_state(page, NR_DIRTIED);
2420 __inc_wb_stat(wb, WB_RECLAIMABLE);
2421 __inc_wb_stat(wb, WB_DIRTIED);
2422 task_io_account_write(PAGE_CACHE_SIZE);
2423 current->nr_dirtied++;
2424 this_cpu_inc(bdp_ratelimits);
2425 }
2426}
2427EXPORT_SYMBOL(account_page_dirtied);
2428
2429
2430
2431
2432
2433
2434void account_page_cleaned(struct page *page, struct address_space *mapping,
2435 struct mem_cgroup *memcg, struct bdi_writeback *wb)
2436{
2437 if (mapping_cap_account_dirty(mapping)) {
2438 mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_DIRTY);
2439 dec_zone_page_state(page, NR_FILE_DIRTY);
2440 dec_wb_stat(wb, WB_RECLAIMABLE);
2441 task_io_account_cancelled_write(PAGE_CACHE_SIZE);
2442 }
2443}
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457int __set_page_dirty_nobuffers(struct page *page)
2458{
2459 struct mem_cgroup *memcg;
2460
2461 memcg = mem_cgroup_begin_page_stat(page);
2462 if (!TestSetPageDirty(page)) {
2463 struct address_space *mapping = page_mapping(page);
2464 unsigned long flags;
2465
2466 if (!mapping) {
2467 mem_cgroup_end_page_stat(memcg);
2468 return 1;
2469 }
2470
2471 spin_lock_irqsave(&mapping->tree_lock, flags);
2472 BUG_ON(page_mapping(page) != mapping);
2473 WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
2474 account_page_dirtied(page, mapping, memcg);
2475 radix_tree_tag_set(&mapping->page_tree, page_index(page),
2476 PAGECACHE_TAG_DIRTY);
2477 spin_unlock_irqrestore(&mapping->tree_lock, flags);
2478 mem_cgroup_end_page_stat(memcg);
2479
2480 if (mapping->host) {
2481
2482 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
2483 }
2484 return 1;
2485 }
2486 mem_cgroup_end_page_stat(memcg);
2487 return 0;
2488}
2489EXPORT_SYMBOL(__set_page_dirty_nobuffers);
2490
2491
2492
2493
2494
2495
2496
2497
2498void account_page_redirty(struct page *page)
2499{
2500 struct address_space *mapping = page->mapping;
2501
2502 if (mapping && mapping_cap_account_dirty(mapping)) {
2503 struct inode *inode = mapping->host;
2504 struct bdi_writeback *wb;
2505 bool locked;
2506
2507 wb = unlocked_inode_to_wb_begin(inode, &locked);
2508 current->nr_dirtied--;
2509 dec_zone_page_state(page, NR_DIRTIED);
2510 dec_wb_stat(wb, WB_DIRTIED);
2511 unlocked_inode_to_wb_end(inode, locked);
2512 }
2513}
2514EXPORT_SYMBOL(account_page_redirty);
2515
2516
2517
2518
2519
2520
2521int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page)
2522{
2523 int ret;
2524
2525 wbc->pages_skipped++;
2526 ret = __set_page_dirty_nobuffers(page);
2527 account_page_redirty(page);
2528 return ret;
2529}
2530EXPORT_SYMBOL(redirty_page_for_writepage);
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543int set_page_dirty(struct page *page)
2544{
2545 struct address_space *mapping = page_mapping(page);
2546
2547 if (likely(mapping)) {
2548 int (*spd)(struct page *) = mapping->a_ops->set_page_dirty;
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559 if (PageReclaim(page))
2560 ClearPageReclaim(page);
2561#ifdef CONFIG_BLOCK
2562 if (!spd)
2563 spd = __set_page_dirty_buffers;
2564#endif
2565 return (*spd)(page);
2566 }
2567 if (!PageDirty(page)) {
2568 if (!TestSetPageDirty(page))
2569 return 1;
2570 }
2571 return 0;
2572}
2573EXPORT_SYMBOL(set_page_dirty);
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585int set_page_dirty_lock(struct page *page)
2586{
2587 int ret;
2588
2589 lock_page(page);
2590 ret = set_page_dirty(page);
2591 unlock_page(page);
2592 return ret;
2593}
2594EXPORT_SYMBOL(set_page_dirty_lock);
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609void cancel_dirty_page(struct page *page)
2610{
2611 struct address_space *mapping = page_mapping(page);
2612
2613 if (mapping_cap_account_dirty(mapping)) {
2614 struct inode *inode = mapping->host;
2615 struct bdi_writeback *wb;
2616 struct mem_cgroup *memcg;
2617 bool locked;
2618
2619 memcg = mem_cgroup_begin_page_stat(page);
2620 wb = unlocked_inode_to_wb_begin(inode, &locked);
2621
2622 if (TestClearPageDirty(page))
2623 account_page_cleaned(page, mapping, memcg, wb);
2624
2625 unlocked_inode_to_wb_end(inode, locked);
2626 mem_cgroup_end_page_stat(memcg);
2627 } else {
2628 ClearPageDirty(page);
2629 }
2630}
2631EXPORT_SYMBOL(cancel_dirty_page);
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647int clear_page_dirty_for_io(struct page *page)
2648{
2649 struct address_space *mapping = page_mapping(page);
2650 int ret = 0;
2651
2652 BUG_ON(!PageLocked(page));
2653
2654 if (mapping && mapping_cap_account_dirty(mapping)) {
2655 struct inode *inode = mapping->host;
2656 struct bdi_writeback *wb;
2657 struct mem_cgroup *memcg;
2658 bool locked;
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685 if (page_mkclean(page))
2686 set_page_dirty(page);
2687
2688
2689
2690
2691
2692
2693
2694
2695 memcg = mem_cgroup_begin_page_stat(page);
2696 wb = unlocked_inode_to_wb_begin(inode, &locked);
2697 if (TestClearPageDirty(page)) {
2698 mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_DIRTY);
2699 dec_zone_page_state(page, NR_FILE_DIRTY);
2700 dec_wb_stat(wb, WB_RECLAIMABLE);
2701 ret = 1;
2702 }
2703 unlocked_inode_to_wb_end(inode, locked);
2704 mem_cgroup_end_page_stat(memcg);
2705 return ret;
2706 }
2707 return TestClearPageDirty(page);
2708}
2709EXPORT_SYMBOL(clear_page_dirty_for_io);
2710
2711int test_clear_page_writeback(struct page *page)
2712{
2713 struct address_space *mapping = page_mapping(page);
2714 struct mem_cgroup *memcg;
2715 int ret;
2716
2717 memcg = mem_cgroup_begin_page_stat(page);
2718 if (mapping) {
2719 struct inode *inode = mapping->host;
2720 struct backing_dev_info *bdi = inode_to_bdi(inode);
2721 unsigned long flags;
2722
2723 spin_lock_irqsave(&mapping->tree_lock, flags);
2724 ret = TestClearPageWriteback(page);
2725 if (ret) {
2726 radix_tree_tag_clear(&mapping->page_tree,
2727 page_index(page),
2728 PAGECACHE_TAG_WRITEBACK);
2729 if (bdi_cap_account_writeback(bdi)) {
2730 struct bdi_writeback *wb = inode_to_wb(inode);
2731
2732 __dec_wb_stat(wb, WB_WRITEBACK);
2733 __wb_writeout_inc(wb);
2734 }
2735 }
2736 spin_unlock_irqrestore(&mapping->tree_lock, flags);
2737 } else {
2738 ret = TestClearPageWriteback(page);
2739 }
2740 if (ret) {
2741 mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_WRITEBACK);
2742 dec_zone_page_state(page, NR_WRITEBACK);
2743 inc_zone_page_state(page, NR_WRITTEN);
2744 }
2745 mem_cgroup_end_page_stat(memcg);
2746 return ret;
2747}
2748
2749int __test_set_page_writeback(struct page *page, bool keep_write)
2750{
2751 struct address_space *mapping = page_mapping(page);
2752 struct mem_cgroup *memcg;
2753 int ret;
2754
2755 memcg = mem_cgroup_begin_page_stat(page);
2756 if (mapping) {
2757 struct inode *inode = mapping->host;
2758 struct backing_dev_info *bdi = inode_to_bdi(inode);
2759 unsigned long flags;
2760
2761 spin_lock_irqsave(&mapping->tree_lock, flags);
2762 ret = TestSetPageWriteback(page);
2763 if (!ret) {
2764 radix_tree_tag_set(&mapping->page_tree,
2765 page_index(page),
2766 PAGECACHE_TAG_WRITEBACK);
2767 if (bdi_cap_account_writeback(bdi))
2768 __inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
2769 }
2770 if (!PageDirty(page))
2771 radix_tree_tag_clear(&mapping->page_tree,
2772 page_index(page),
2773 PAGECACHE_TAG_DIRTY);
2774 if (!keep_write)
2775 radix_tree_tag_clear(&mapping->page_tree,
2776 page_index(page),
2777 PAGECACHE_TAG_TOWRITE);
2778 spin_unlock_irqrestore(&mapping->tree_lock, flags);
2779 } else {
2780 ret = TestSetPageWriteback(page);
2781 }
2782 if (!ret) {
2783 mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_WRITEBACK);
2784 inc_zone_page_state(page, NR_WRITEBACK);
2785 }
2786 mem_cgroup_end_page_stat(memcg);
2787 return ret;
2788
2789}
2790EXPORT_SYMBOL(__test_set_page_writeback);
2791
2792
2793
2794
2795
2796int mapping_tagged(struct address_space *mapping, int tag)
2797{
2798 return radix_tree_tagged(&mapping->page_tree, tag);
2799}
2800EXPORT_SYMBOL(mapping_tagged);
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810void wait_for_stable_page(struct page *page)
2811{
2812 if (bdi_cap_stable_pages_required(inode_to_bdi(page->mapping->host)))
2813 wait_on_page_writeback(page);
2814}
2815EXPORT_SYMBOL_GPL(wait_for_stable_page);
2816