1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <linux/kernel.h>
15#include <linux/export.h>
16#include <linux/spinlock.h>
17#include <linux/fs.h>
18#include <linux/mm.h>
19#include <linux/swap.h>
20#include <linux/slab.h>
21#include <linux/pagemap.h>
22#include <linux/writeback.h>
23#include <linux/init.h>
24#include <linux/backing-dev.h>
25#include <linux/task_io_accounting_ops.h>
26#include <linux/blkdev.h>
27#include <linux/mpage.h>
28#include <linux/rmap.h>
29#include <linux/percpu.h>
30#include <linux/notifier.h>
31#include <linux/smp.h>
32#include <linux/sysctl.h>
33#include <linux/cpu.h>
34#include <linux/syscalls.h>
35#include <linux/buffer_head.h>
36#include <linux/pagevec.h>
37#include <linux/timer.h>
38#include <linux/sched/rt.h>
39#include <linux/mm_inline.h>
40#include <trace/events/writeback.h>
41
42#include "internal.h"
43
44
45
46
47#define MAX_PAUSE max(HZ/5, 1)
48
49
50
51
52
53#define DIRTY_POLL_THRESH (128 >> (PAGE_SHIFT - 10))
54
55
56
57
58#define BANDWIDTH_INTERVAL max(HZ/5, 1)
59
60#define RATELIMIT_CALC_SHIFT 10
61
62
63
64
65
66static long ratelimit_pages = 32;
67
68
69
70
71
72
73int dirty_background_ratio = 10;
74
75
76
77
78
79unsigned long dirty_background_bytes;
80
81
82
83
84
85int vm_highmem_is_dirtyable;
86
87
88
89
90int vm_dirty_ratio = 20;
91
92
93
94
95
96unsigned long vm_dirty_bytes;
97
98
99
100
101unsigned int dirty_writeback_interval = 5 * 100;
102
103EXPORT_SYMBOL_GPL(dirty_writeback_interval);
104
105
106
107
108unsigned int dirty_expire_interval = 30 * 100;
109
110
111
112
113int block_dump;
114
115
116
117
118
119int laptop_mode;
120
121EXPORT_SYMBOL(laptop_mode);
122
123
124
125struct wb_domain global_wb_domain;
126
127
128struct dirty_throttle_control {
129#ifdef CONFIG_CGROUP_WRITEBACK
130 struct wb_domain *dom;
131 struct dirty_throttle_control *gdtc;
132#endif
133 struct bdi_writeback *wb;
134 struct fprop_local_percpu *wb_completions;
135
136 unsigned long avail;
137 unsigned long dirty;
138 unsigned long thresh;
139 unsigned long bg_thresh;
140
141 unsigned long wb_dirty;
142 unsigned long wb_thresh;
143 unsigned long wb_bg_thresh;
144
145 unsigned long pos_ratio;
146};
147
148
149
150
151
152
153#define VM_COMPLETIONS_PERIOD_LEN (3*HZ)
154
155#ifdef CONFIG_CGROUP_WRITEBACK
156
157#define GDTC_INIT(__wb) .wb = (__wb), \
158 .dom = &global_wb_domain, \
159 .wb_completions = &(__wb)->completions
160
161#define GDTC_INIT_NO_WB .dom = &global_wb_domain
162
163#define MDTC_INIT(__wb, __gdtc) .wb = (__wb), \
164 .dom = mem_cgroup_wb_domain(__wb), \
165 .wb_completions = &(__wb)->memcg_completions, \
166 .gdtc = __gdtc
167
168static bool mdtc_valid(struct dirty_throttle_control *dtc)
169{
170 return dtc->dom;
171}
172
173static struct wb_domain *dtc_dom(struct dirty_throttle_control *dtc)
174{
175 return dtc->dom;
176}
177
178static struct dirty_throttle_control *mdtc_gdtc(struct dirty_throttle_control *mdtc)
179{
180 return mdtc->gdtc;
181}
182
183static struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb)
184{
185 return &wb->memcg_completions;
186}
187
188static void wb_min_max_ratio(struct bdi_writeback *wb,
189 unsigned long *minp, unsigned long *maxp)
190{
191 unsigned long this_bw = wb->avg_write_bandwidth;
192 unsigned long tot_bw = atomic_long_read(&wb->bdi->tot_write_bandwidth);
193 unsigned long long min = wb->bdi->min_ratio;
194 unsigned long long max = wb->bdi->max_ratio;
195
196
197
198
199
200 if (this_bw < tot_bw) {
201 if (min) {
202 min *= this_bw;
203 do_div(min, tot_bw);
204 }
205 if (max < 100) {
206 max *= this_bw;
207 do_div(max, tot_bw);
208 }
209 }
210
211 *minp = min;
212 *maxp = max;
213}
214
215#else
216
217#define GDTC_INIT(__wb) .wb = (__wb), \
218 .wb_completions = &(__wb)->completions
219#define GDTC_INIT_NO_WB
220#define MDTC_INIT(__wb, __gdtc)
221
222static bool mdtc_valid(struct dirty_throttle_control *dtc)
223{
224 return false;
225}
226
227static struct wb_domain *dtc_dom(struct dirty_throttle_control *dtc)
228{
229 return &global_wb_domain;
230}
231
232static struct dirty_throttle_control *mdtc_gdtc(struct dirty_throttle_control *mdtc)
233{
234 return NULL;
235}
236
237static struct fprop_local_percpu *wb_memcg_completions(struct bdi_writeback *wb)
238{
239 return NULL;
240}
241
242static void wb_min_max_ratio(struct bdi_writeback *wb,
243 unsigned long *minp, unsigned long *maxp)
244{
245 *minp = wb->bdi->min_ratio;
246 *maxp = wb->bdi->max_ratio;
247}
248
249#endif
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276static unsigned long zone_dirtyable_memory(struct zone *zone)
277{
278 unsigned long nr_pages;
279
280 nr_pages = zone_page_state(zone, NR_FREE_PAGES);
281 nr_pages -= min(nr_pages, zone->dirty_balance_reserve);
282
283 nr_pages += zone_page_state(zone, NR_INACTIVE_FILE);
284 nr_pages += zone_page_state(zone, NR_ACTIVE_FILE);
285
286 return nr_pages;
287}
288
289static unsigned long highmem_dirtyable_memory(unsigned long total)
290{
291#ifdef CONFIG_HIGHMEM
292 int node;
293 unsigned long x = 0;
294
295 for_each_node_state(node, N_HIGH_MEMORY) {
296 struct zone *z = &NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
297
298 x += zone_dirtyable_memory(z);
299 }
300
301
302
303
304
305
306
307
308
309 if ((long)x < 0)
310 x = 0;
311
312
313
314
315
316
317
318 return min(x, total);
319#else
320 return 0;
321#endif
322}
323
324
325
326
327
328
329
330static unsigned long global_dirtyable_memory(void)
331{
332 unsigned long x;
333
334 x = global_page_state(NR_FREE_PAGES);
335 x -= min(x, dirty_balance_reserve);
336
337 x += global_page_state(NR_INACTIVE_FILE);
338 x += global_page_state(NR_ACTIVE_FILE);
339
340 if (!vm_highmem_is_dirtyable)
341 x -= highmem_dirtyable_memory(x);
342
343 return x + 1;
344}
345
346
347
348
349
350
351
352
353
354
355
356static void domain_dirty_limits(struct dirty_throttle_control *dtc)
357{
358 const unsigned long available_memory = dtc->avail;
359 struct dirty_throttle_control *gdtc = mdtc_gdtc(dtc);
360 unsigned long bytes = vm_dirty_bytes;
361 unsigned long bg_bytes = dirty_background_bytes;
362 unsigned long ratio = vm_dirty_ratio;
363 unsigned long bg_ratio = dirty_background_ratio;
364 unsigned long thresh;
365 unsigned long bg_thresh;
366 struct task_struct *tsk;
367
368
369 if (gdtc) {
370 unsigned long global_avail = gdtc->avail;
371
372
373
374
375
376
377 if (bytes)
378 ratio = min(DIV_ROUND_UP(bytes, PAGE_SIZE) * 100 /
379 global_avail, 100UL);
380 if (bg_bytes)
381 bg_ratio = min(DIV_ROUND_UP(bg_bytes, PAGE_SIZE) * 100 /
382 global_avail, 100UL);
383 bytes = bg_bytes = 0;
384 }
385
386 if (bytes)
387 thresh = DIV_ROUND_UP(bytes, PAGE_SIZE);
388 else
389 thresh = (ratio * available_memory) / 100;
390
391 if (bg_bytes)
392 bg_thresh = DIV_ROUND_UP(bg_bytes, PAGE_SIZE);
393 else
394 bg_thresh = (bg_ratio * available_memory) / 100;
395
396 if (bg_thresh >= thresh)
397 bg_thresh = thresh / 2;
398 tsk = current;
399 if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) {
400 bg_thresh += bg_thresh / 4;
401 thresh += thresh / 4;
402 }
403 dtc->thresh = thresh;
404 dtc->bg_thresh = bg_thresh;
405
406
407 if (!gdtc)
408 trace_global_dirty_state(bg_thresh, thresh);
409}
410
411
412
413
414
415
416
417
418
419void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
420{
421 struct dirty_throttle_control gdtc = { GDTC_INIT_NO_WB };
422
423 gdtc.avail = global_dirtyable_memory();
424 domain_dirty_limits(&gdtc);
425
426 *pbackground = gdtc.bg_thresh;
427 *pdirty = gdtc.thresh;
428}
429
430
431
432
433
434
435
436
437static unsigned long zone_dirty_limit(struct zone *zone)
438{
439 unsigned long zone_memory = zone_dirtyable_memory(zone);
440 struct task_struct *tsk = current;
441 unsigned long dirty;
442
443 if (vm_dirty_bytes)
444 dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE) *
445 zone_memory / global_dirtyable_memory();
446 else
447 dirty = vm_dirty_ratio * zone_memory / 100;
448
449 if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk))
450 dirty += dirty / 4;
451
452 return dirty;
453}
454
455
456
457
458
459
460
461
462bool zone_dirty_ok(struct zone *zone)
463{
464 unsigned long limit = zone_dirty_limit(zone);
465
466 return zone_page_state(zone, NR_FILE_DIRTY) +
467 zone_page_state(zone, NR_UNSTABLE_NFS) +
468 zone_page_state(zone, NR_WRITEBACK) <= limit;
469}
470
471int dirty_background_ratio_handler(struct ctl_table *table, int write,
472 void __user *buffer, size_t *lenp,
473 loff_t *ppos)
474{
475 int ret;
476
477 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
478 if (ret == 0 && write)
479 dirty_background_bytes = 0;
480 return ret;
481}
482
483int dirty_background_bytes_handler(struct ctl_table *table, int write,
484 void __user *buffer, size_t *lenp,
485 loff_t *ppos)
486{
487 int ret;
488
489 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
490 if (ret == 0 && write)
491 dirty_background_ratio = 0;
492 return ret;
493}
494
495int dirty_ratio_handler(struct ctl_table *table, int write,
496 void __user *buffer, size_t *lenp,
497 loff_t *ppos)
498{
499 int old_ratio = vm_dirty_ratio;
500 int ret;
501
502 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
503 if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
504 writeback_set_ratelimit();
505 vm_dirty_bytes = 0;
506 }
507 return ret;
508}
509
510int dirty_bytes_handler(struct ctl_table *table, int write,
511 void __user *buffer, size_t *lenp,
512 loff_t *ppos)
513{
514 unsigned long old_bytes = vm_dirty_bytes;
515 int ret;
516
517 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
518 if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
519 writeback_set_ratelimit();
520 vm_dirty_ratio = 0;
521 }
522 return ret;
523}
524
525static unsigned long wp_next_time(unsigned long cur_time)
526{
527 cur_time += VM_COMPLETIONS_PERIOD_LEN;
528
529 if (!cur_time)
530 return 1;
531 return cur_time;
532}
533
534static void wb_domain_writeout_inc(struct wb_domain *dom,
535 struct fprop_local_percpu *completions,
536 unsigned int max_prop_frac)
537{
538 __fprop_inc_percpu_max(&dom->completions, completions,
539 max_prop_frac);
540
541 if (!unlikely(dom->period_time)) {
542
543
544
545
546
547
548 dom->period_time = wp_next_time(jiffies);
549 mod_timer(&dom->period_timer, dom->period_time);
550 }
551}
552
553
554
555
556
557static inline void __wb_writeout_inc(struct bdi_writeback *wb)
558{
559 struct wb_domain *cgdom;
560
561 __inc_wb_stat(wb, WB_WRITTEN);
562 wb_domain_writeout_inc(&global_wb_domain, &wb->completions,
563 wb->bdi->max_prop_frac);
564
565 cgdom = mem_cgroup_wb_domain(wb);
566 if (cgdom)
567 wb_domain_writeout_inc(cgdom, wb_memcg_completions(wb),
568 wb->bdi->max_prop_frac);
569}
570
571void wb_writeout_inc(struct bdi_writeback *wb)
572{
573 unsigned long flags;
574
575 local_irq_save(flags);
576 __wb_writeout_inc(wb);
577 local_irq_restore(flags);
578}
579EXPORT_SYMBOL_GPL(wb_writeout_inc);
580
581
582
583
584
585static void writeout_period(unsigned long t)
586{
587 struct wb_domain *dom = (void *)t;
588 int miss_periods = (jiffies - dom->period_time) /
589 VM_COMPLETIONS_PERIOD_LEN;
590
591 if (fprop_new_period(&dom->completions, miss_periods + 1)) {
592 dom->period_time = wp_next_time(dom->period_time +
593 miss_periods * VM_COMPLETIONS_PERIOD_LEN);
594 mod_timer(&dom->period_timer, dom->period_time);
595 } else {
596
597
598
599
600 dom->period_time = 0;
601 }
602}
603
604int wb_domain_init(struct wb_domain *dom, gfp_t gfp)
605{
606 memset(dom, 0, sizeof(*dom));
607
608 spin_lock_init(&dom->lock);
609
610 init_timer_deferrable(&dom->period_timer);
611 dom->period_timer.function = writeout_period;
612 dom->period_timer.data = (unsigned long)dom;
613
614 dom->dirty_limit_tstamp = jiffies;
615
616 return fprop_global_init(&dom->completions, gfp);
617}
618
619#ifdef CONFIG_CGROUP_WRITEBACK
620void wb_domain_exit(struct wb_domain *dom)
621{
622 del_timer_sync(&dom->period_timer);
623 fprop_global_destroy(&dom->completions);
624}
625#endif
626
627
628
629
630
631
632static unsigned int bdi_min_ratio;
633
634int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
635{
636 int ret = 0;
637
638 spin_lock_bh(&bdi_lock);
639 if (min_ratio > bdi->max_ratio) {
640 ret = -EINVAL;
641 } else {
642 min_ratio -= bdi->min_ratio;
643 if (bdi_min_ratio + min_ratio < 100) {
644 bdi_min_ratio += min_ratio;
645 bdi->min_ratio += min_ratio;
646 } else {
647 ret = -EINVAL;
648 }
649 }
650 spin_unlock_bh(&bdi_lock);
651
652 return ret;
653}
654
655int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
656{
657 int ret = 0;
658
659 if (max_ratio > 100)
660 return -EINVAL;
661
662 spin_lock_bh(&bdi_lock);
663 if (bdi->min_ratio > max_ratio) {
664 ret = -EINVAL;
665 } else {
666 bdi->max_ratio = max_ratio;
667 bdi->max_prop_frac = (FPROP_FRAC_BASE * max_ratio) / 100;
668 }
669 spin_unlock_bh(&bdi_lock);
670
671 return ret;
672}
673EXPORT_SYMBOL(bdi_set_max_ratio);
674
675static unsigned long dirty_freerun_ceiling(unsigned long thresh,
676 unsigned long bg_thresh)
677{
678 return (thresh + bg_thresh) / 2;
679}
680
681static unsigned long hard_dirty_limit(struct wb_domain *dom,
682 unsigned long thresh)
683{
684 return max(thresh, dom->dirty_limit);
685}
686
687
688
689
690
691static void mdtc_calc_avail(struct dirty_throttle_control *mdtc,
692 unsigned long filepages, unsigned long headroom)
693{
694 struct dirty_throttle_control *gdtc = mdtc_gdtc(mdtc);
695 unsigned long clean = filepages - min(filepages, mdtc->dirty);
696 unsigned long global_clean = gdtc->avail - min(gdtc->avail, gdtc->dirty);
697 unsigned long other_clean = global_clean - min(global_clean, clean);
698
699 mdtc->avail = filepages + min(headroom, other_clean);
700}
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc)
724{
725 struct wb_domain *dom = dtc_dom(dtc);
726 unsigned long thresh = dtc->thresh;
727 u64 wb_thresh;
728 long numerator, denominator;
729 unsigned long wb_min_ratio, wb_max_ratio;
730
731
732
733
734 fprop_fraction_percpu(&dom->completions, dtc->wb_completions,
735 &numerator, &denominator);
736
737 wb_thresh = (thresh * (100 - bdi_min_ratio)) / 100;
738 wb_thresh *= numerator;
739 do_div(wb_thresh, denominator);
740
741 wb_min_max_ratio(dtc->wb, &wb_min_ratio, &wb_max_ratio);
742
743 wb_thresh += (thresh * wb_min_ratio) / 100;
744 if (wb_thresh > (thresh * wb_max_ratio) / 100)
745 wb_thresh = thresh * wb_max_ratio / 100;
746
747 return wb_thresh;
748}
749
750unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh)
751{
752 struct dirty_throttle_control gdtc = { GDTC_INIT(wb),
753 .thresh = thresh };
754 return __wb_calc_thresh(&gdtc);
755}
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771static long long pos_ratio_polynom(unsigned long setpoint,
772 unsigned long dirty,
773 unsigned long limit)
774{
775 long long pos_ratio;
776 long x;
777
778 x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
779 (limit - setpoint) | 1);
780 pos_ratio = x;
781 pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
782 pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
783 pos_ratio += 1 << RATELIMIT_CALC_SHIFT;
784
785 return clamp(pos_ratio, 0LL, 2LL << RATELIMIT_CALC_SHIFT);
786}
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863static void wb_position_ratio(struct dirty_throttle_control *dtc)
864{
865 struct bdi_writeback *wb = dtc->wb;
866 unsigned long write_bw = wb->avg_write_bandwidth;
867 unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh);
868 unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
869 unsigned long wb_thresh = dtc->wb_thresh;
870 unsigned long x_intercept;
871 unsigned long setpoint;
872 unsigned long wb_setpoint;
873 unsigned long span;
874 long long pos_ratio;
875 long x;
876
877 dtc->pos_ratio = 0;
878
879 if (unlikely(dtc->dirty >= limit))
880 return;
881
882
883
884
885
886
887 setpoint = (freerun + limit) / 2;
888 pos_ratio = pos_ratio_polynom(setpoint, dtc->dirty, limit);
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915 if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
916 long long wb_pos_ratio;
917
918 if (dtc->wb_dirty < 8) {
919 dtc->pos_ratio = min_t(long long, pos_ratio * 2,
920 2 << RATELIMIT_CALC_SHIFT);
921 return;
922 }
923
924 if (dtc->wb_dirty >= wb_thresh)
925 return;
926
927 wb_setpoint = dirty_freerun_ceiling(wb_thresh,
928 dtc->wb_bg_thresh);
929
930 if (wb_setpoint == 0 || wb_setpoint == wb_thresh)
931 return;
932
933 wb_pos_ratio = pos_ratio_polynom(wb_setpoint, dtc->wb_dirty,
934 wb_thresh);
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957 dtc->pos_ratio = min(pos_ratio, wb_pos_ratio);
958 return;
959 }
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992 if (unlikely(wb_thresh > dtc->thresh))
993 wb_thresh = dtc->thresh;
994
995
996
997
998
999
1000
1001 wb_thresh = max(wb_thresh, (limit - dtc->dirty) / 8);
1002
1003
1004
1005
1006 x = div_u64((u64)wb_thresh << 16, dtc->thresh | 1);
1007 wb_setpoint = setpoint * (u64)x >> 16;
1008
1009
1010
1011
1012
1013
1014
1015
1016 span = (dtc->thresh - wb_thresh + 8 * write_bw) * (u64)x >> 16;
1017 x_intercept = wb_setpoint + span;
1018
1019 if (dtc->wb_dirty < x_intercept - span / 4) {
1020 pos_ratio = div64_u64(pos_ratio * (x_intercept - dtc->wb_dirty),
1021 (x_intercept - wb_setpoint) | 1);
1022 } else
1023 pos_ratio /= 4;
1024
1025
1026
1027
1028
1029
1030 x_intercept = wb_thresh / 2;
1031 if (dtc->wb_dirty < x_intercept) {
1032 if (dtc->wb_dirty > x_intercept / 8)
1033 pos_ratio = div_u64(pos_ratio * x_intercept,
1034 dtc->wb_dirty);
1035 else
1036 pos_ratio *= 8;
1037 }
1038
1039 dtc->pos_ratio = pos_ratio;
1040}
1041
1042static void wb_update_write_bandwidth(struct bdi_writeback *wb,
1043 unsigned long elapsed,
1044 unsigned long written)
1045{
1046 const unsigned long period = roundup_pow_of_two(3 * HZ);
1047 unsigned long avg = wb->avg_write_bandwidth;
1048 unsigned long old = wb->write_bandwidth;
1049 u64 bw;
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061 bw = written - min(written, wb->written_stamp);
1062 bw *= HZ;
1063 if (unlikely(elapsed > period)) {
1064 do_div(bw, elapsed);
1065 avg = bw;
1066 goto out;
1067 }
1068 bw += (u64)wb->write_bandwidth * (period - elapsed);
1069 bw >>= ilog2(period);
1070
1071
1072
1073
1074 if (avg > old && old >= (unsigned long)bw)
1075 avg -= (avg - old) >> 3;
1076
1077 if (avg < old && old <= (unsigned long)bw)
1078 avg += (old - avg) >> 3;
1079
1080out:
1081
1082 avg = max(avg, 1LU);
1083 if (wb_has_dirty_io(wb)) {
1084 long delta = avg - wb->avg_write_bandwidth;
1085 WARN_ON_ONCE(atomic_long_add_return(delta,
1086 &wb->bdi->tot_write_bandwidth) <= 0);
1087 }
1088 wb->write_bandwidth = bw;
1089 wb->avg_write_bandwidth = avg;
1090}
1091
1092static void update_dirty_limit(struct dirty_throttle_control *dtc)
1093{
1094 struct wb_domain *dom = dtc_dom(dtc);
1095 unsigned long thresh = dtc->thresh;
1096 unsigned long limit = dom->dirty_limit;
1097
1098
1099
1100
1101 if (limit < thresh) {
1102 limit = thresh;
1103 goto update;
1104 }
1105
1106
1107
1108
1109
1110
1111 thresh = max(thresh, dtc->dirty);
1112 if (limit > thresh) {
1113 limit -= (limit - thresh) >> 5;
1114 goto update;
1115 }
1116 return;
1117update:
1118 dom->dirty_limit = limit;
1119}
1120
1121static void domain_update_bandwidth(struct dirty_throttle_control *dtc,
1122 unsigned long now)
1123{
1124 struct wb_domain *dom = dtc_dom(dtc);
1125
1126
1127
1128
1129 if (time_before(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL))
1130 return;
1131
1132 spin_lock(&dom->lock);
1133 if (time_after_eq(now, dom->dirty_limit_tstamp + BANDWIDTH_INTERVAL)) {
1134 update_dirty_limit(dtc);
1135 dom->dirty_limit_tstamp = now;
1136 }
1137 spin_unlock(&dom->lock);
1138}
1139
1140
1141
1142
1143
1144
1145
1146static void wb_update_dirty_ratelimit(struct dirty_throttle_control *dtc,
1147 unsigned long dirtied,
1148 unsigned long elapsed)
1149{
1150 struct bdi_writeback *wb = dtc->wb;
1151 unsigned long dirty = dtc->dirty;
1152 unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh);
1153 unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
1154 unsigned long setpoint = (freerun + limit) / 2;
1155 unsigned long write_bw = wb->avg_write_bandwidth;
1156 unsigned long dirty_ratelimit = wb->dirty_ratelimit;
1157 unsigned long dirty_rate;
1158 unsigned long task_ratelimit;
1159 unsigned long balanced_dirty_ratelimit;
1160 unsigned long step;
1161 unsigned long x;
1162
1163
1164
1165
1166
1167 dirty_rate = (dirtied - wb->dirtied_stamp) * HZ / elapsed;
1168
1169
1170
1171
1172 task_ratelimit = (u64)dirty_ratelimit *
1173 dtc->pos_ratio >> RATELIMIT_CALC_SHIFT;
1174 task_ratelimit++;
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206 balanced_dirty_ratelimit = div_u64((u64)task_ratelimit * write_bw,
1207 dirty_rate | 1);
1208
1209
1210
1211 if (unlikely(balanced_dirty_ratelimit > write_bw))
1212 balanced_dirty_ratelimit = write_bw;
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248 step = 0;
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261 if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
1262 dirty = dtc->wb_dirty;
1263 if (dtc->wb_dirty < 8)
1264 setpoint = dtc->wb_dirty + 1;
1265 else
1266 setpoint = (dtc->wb_thresh + dtc->wb_bg_thresh) / 2;
1267 }
1268
1269 if (dirty < setpoint) {
1270 x = min3(wb->balanced_dirty_ratelimit,
1271 balanced_dirty_ratelimit, task_ratelimit);
1272 if (dirty_ratelimit < x)
1273 step = x - dirty_ratelimit;
1274 } else {
1275 x = max3(wb->balanced_dirty_ratelimit,
1276 balanced_dirty_ratelimit, task_ratelimit);
1277 if (dirty_ratelimit > x)
1278 step = dirty_ratelimit - x;
1279 }
1280
1281
1282
1283
1284
1285
1286 step >>= dirty_ratelimit / (2 * step + 1);
1287
1288
1289
1290 step = (step + 7) / 8;
1291
1292 if (dirty_ratelimit < balanced_dirty_ratelimit)
1293 dirty_ratelimit += step;
1294 else
1295 dirty_ratelimit -= step;
1296
1297 wb->dirty_ratelimit = max(dirty_ratelimit, 1UL);
1298 wb->balanced_dirty_ratelimit = balanced_dirty_ratelimit;
1299
1300 trace_bdi_dirty_ratelimit(wb, dirty_rate, task_ratelimit);
1301}
1302
1303static void __wb_update_bandwidth(struct dirty_throttle_control *gdtc,
1304 struct dirty_throttle_control *mdtc,
1305 unsigned long start_time,
1306 bool update_ratelimit)
1307{
1308 struct bdi_writeback *wb = gdtc->wb;
1309 unsigned long now = jiffies;
1310 unsigned long elapsed = now - wb->bw_time_stamp;
1311 unsigned long dirtied;
1312 unsigned long written;
1313
1314 lockdep_assert_held(&wb->list_lock);
1315
1316
1317
1318
1319 if (elapsed < BANDWIDTH_INTERVAL)
1320 return;
1321
1322 dirtied = percpu_counter_read(&wb->stat[WB_DIRTIED]);
1323 written = percpu_counter_read(&wb->stat[WB_WRITTEN]);
1324
1325
1326
1327
1328
1329 if (elapsed > HZ && time_before(wb->bw_time_stamp, start_time))
1330 goto snapshot;
1331
1332 if (update_ratelimit) {
1333 domain_update_bandwidth(gdtc, now);
1334 wb_update_dirty_ratelimit(gdtc, dirtied, elapsed);
1335
1336
1337
1338
1339
1340 if (IS_ENABLED(CONFIG_CGROUP_WRITEBACK) && mdtc) {
1341 domain_update_bandwidth(mdtc, now);
1342 wb_update_dirty_ratelimit(mdtc, dirtied, elapsed);
1343 }
1344 }
1345 wb_update_write_bandwidth(wb, elapsed, written);
1346
1347snapshot:
1348 wb->dirtied_stamp = dirtied;
1349 wb->written_stamp = written;
1350 wb->bw_time_stamp = now;
1351}
1352
1353void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time)
1354{
1355 struct dirty_throttle_control gdtc = { GDTC_INIT(wb) };
1356
1357 __wb_update_bandwidth(&gdtc, NULL, start_time, false);
1358}
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368static unsigned long dirty_poll_interval(unsigned long dirty,
1369 unsigned long thresh)
1370{
1371 if (thresh > dirty)
1372 return 1UL << (ilog2(thresh - dirty) >> 1);
1373
1374 return 1;
1375}
1376
1377static unsigned long wb_max_pause(struct bdi_writeback *wb,
1378 unsigned long wb_dirty)
1379{
1380 unsigned long bw = wb->avg_write_bandwidth;
1381 unsigned long t;
1382
1383
1384
1385
1386
1387
1388
1389
1390 t = wb_dirty / (1 + bw / roundup_pow_of_two(1 + HZ / 8));
1391 t++;
1392
1393 return min_t(unsigned long, t, MAX_PAUSE);
1394}
1395
1396static long wb_min_pause(struct bdi_writeback *wb,
1397 long max_pause,
1398 unsigned long task_ratelimit,
1399 unsigned long dirty_ratelimit,
1400 int *nr_dirtied_pause)
1401{
1402 long hi = ilog2(wb->avg_write_bandwidth);
1403 long lo = ilog2(wb->dirty_ratelimit);
1404 long t;
1405 long pause;
1406 int pages;
1407
1408
1409 t = max(1, HZ / 100);
1410
1411
1412
1413
1414
1415
1416
1417 if (hi > lo)
1418 t += (hi - lo) * (10 * HZ) / 1024;
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438 t = min(t, 1 + max_pause / 2);
1439 pages = dirty_ratelimit * t / roundup_pow_of_two(HZ);
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449 if (pages < DIRTY_POLL_THRESH) {
1450 t = max_pause;
1451 pages = dirty_ratelimit * t / roundup_pow_of_two(HZ);
1452 if (pages > DIRTY_POLL_THRESH) {
1453 pages = DIRTY_POLL_THRESH;
1454 t = HZ * DIRTY_POLL_THRESH / dirty_ratelimit;
1455 }
1456 }
1457
1458 pause = HZ * pages / (task_ratelimit + 1);
1459 if (pause > max_pause) {
1460 t = max_pause;
1461 pages = task_ratelimit * t / roundup_pow_of_two(HZ);
1462 }
1463
1464 *nr_dirtied_pause = pages;
1465
1466
1467
1468 return pages >= DIRTY_POLL_THRESH ? 1 + t / 2 : t;
1469}
1470
1471static inline void wb_dirty_limits(struct dirty_throttle_control *dtc)
1472{
1473 struct bdi_writeback *wb = dtc->wb;
1474 unsigned long wb_reclaimable;
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489 dtc->wb_thresh = __wb_calc_thresh(dtc);
1490 dtc->wb_bg_thresh = dtc->thresh ?
1491 div_u64((u64)dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0;
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503 if (dtc->wb_thresh < 2 * wb_stat_error(wb)) {
1504 wb_reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE);
1505 dtc->wb_dirty = wb_reclaimable + wb_stat_sum(wb, WB_WRITEBACK);
1506 } else {
1507 wb_reclaimable = wb_stat(wb, WB_RECLAIMABLE);
1508 dtc->wb_dirty = wb_reclaimable + wb_stat(wb, WB_WRITEBACK);
1509 }
1510}
1511
1512
1513
1514
1515
1516
1517
1518
1519static void balance_dirty_pages(struct address_space *mapping,
1520 struct bdi_writeback *wb,
1521 unsigned long pages_dirtied)
1522{
1523 struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) };
1524 struct dirty_throttle_control mdtc_stor = { MDTC_INIT(wb, &gdtc_stor) };
1525 struct dirty_throttle_control * const gdtc = &gdtc_stor;
1526 struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
1527 &mdtc_stor : NULL;
1528 struct dirty_throttle_control *sdtc;
1529 unsigned long nr_reclaimable;
1530 long period;
1531 long pause;
1532 long max_pause;
1533 long min_pause;
1534 int nr_dirtied_pause;
1535 bool dirty_exceeded = false;
1536 unsigned long task_ratelimit;
1537 unsigned long dirty_ratelimit;
1538 struct backing_dev_info *bdi = wb->bdi;
1539 bool strictlimit = bdi->capabilities & BDI_CAP_STRICTLIMIT;
1540 unsigned long start_time = jiffies;
1541
1542 for (;;) {
1543 unsigned long now = jiffies;
1544 unsigned long dirty, thresh, bg_thresh;
1545 unsigned long m_dirty = 0;
1546 unsigned long m_thresh = 0;
1547 unsigned long m_bg_thresh = 0;
1548
1549
1550
1551
1552
1553
1554
1555 nr_reclaimable = global_page_state(NR_FILE_DIRTY) +
1556 global_page_state(NR_UNSTABLE_NFS);
1557 gdtc->avail = global_dirtyable_memory();
1558 gdtc->dirty = nr_reclaimable + global_page_state(NR_WRITEBACK);
1559
1560 domain_dirty_limits(gdtc);
1561
1562 if (unlikely(strictlimit)) {
1563 wb_dirty_limits(gdtc);
1564
1565 dirty = gdtc->wb_dirty;
1566 thresh = gdtc->wb_thresh;
1567 bg_thresh = gdtc->wb_bg_thresh;
1568 } else {
1569 dirty = gdtc->dirty;
1570 thresh = gdtc->thresh;
1571 bg_thresh = gdtc->bg_thresh;
1572 }
1573
1574 if (mdtc) {
1575 unsigned long filepages, headroom, writeback;
1576
1577
1578
1579
1580
1581 mem_cgroup_wb_stats(wb, &filepages, &headroom,
1582 &mdtc->dirty, &writeback);
1583 mdtc->dirty += writeback;
1584 mdtc_calc_avail(mdtc, filepages, headroom);
1585
1586 domain_dirty_limits(mdtc);
1587
1588 if (unlikely(strictlimit)) {
1589 wb_dirty_limits(mdtc);
1590 m_dirty = mdtc->wb_dirty;
1591 m_thresh = mdtc->wb_thresh;
1592 m_bg_thresh = mdtc->wb_bg_thresh;
1593 } else {
1594 m_dirty = mdtc->dirty;
1595 m_thresh = mdtc->thresh;
1596 m_bg_thresh = mdtc->bg_thresh;
1597 }
1598 }
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612 if (dirty <= dirty_freerun_ceiling(thresh, bg_thresh) &&
1613 (!mdtc ||
1614 m_dirty <= dirty_freerun_ceiling(m_thresh, m_bg_thresh))) {
1615 unsigned long intv = dirty_poll_interval(dirty, thresh);
1616 unsigned long m_intv = ULONG_MAX;
1617
1618 current->dirty_paused_when = now;
1619 current->nr_dirtied = 0;
1620 if (mdtc)
1621 m_intv = dirty_poll_interval(m_dirty, m_thresh);
1622 current->nr_dirtied_pause = min(intv, m_intv);
1623 break;
1624 }
1625
1626 if (unlikely(!writeback_in_progress(wb)))
1627 wb_start_background_writeback(wb);
1628
1629
1630
1631
1632
1633 if (!strictlimit)
1634 wb_dirty_limits(gdtc);
1635
1636 dirty_exceeded = (gdtc->wb_dirty > gdtc->wb_thresh) &&
1637 ((gdtc->dirty > gdtc->thresh) || strictlimit);
1638
1639 wb_position_ratio(gdtc);
1640 sdtc = gdtc;
1641
1642 if (mdtc) {
1643
1644
1645
1646
1647
1648
1649 if (!strictlimit)
1650 wb_dirty_limits(mdtc);
1651
1652 dirty_exceeded |= (mdtc->wb_dirty > mdtc->wb_thresh) &&
1653 ((mdtc->dirty > mdtc->thresh) || strictlimit);
1654
1655 wb_position_ratio(mdtc);
1656 if (mdtc->pos_ratio < gdtc->pos_ratio)
1657 sdtc = mdtc;
1658 }
1659
1660 if (dirty_exceeded && !wb->dirty_exceeded)
1661 wb->dirty_exceeded = 1;
1662
1663 if (time_is_before_jiffies(wb->bw_time_stamp +
1664 BANDWIDTH_INTERVAL)) {
1665 spin_lock(&wb->list_lock);
1666 __wb_update_bandwidth(gdtc, mdtc, start_time, true);
1667 spin_unlock(&wb->list_lock);
1668 }
1669
1670
1671 dirty_ratelimit = wb->dirty_ratelimit;
1672 task_ratelimit = ((u64)dirty_ratelimit * sdtc->pos_ratio) >>
1673 RATELIMIT_CALC_SHIFT;
1674 max_pause = wb_max_pause(wb, sdtc->wb_dirty);
1675 min_pause = wb_min_pause(wb, max_pause,
1676 task_ratelimit, dirty_ratelimit,
1677 &nr_dirtied_pause);
1678
1679 if (unlikely(task_ratelimit == 0)) {
1680 period = max_pause;
1681 pause = max_pause;
1682 goto pause;
1683 }
1684 period = HZ * pages_dirtied / task_ratelimit;
1685 pause = period;
1686 if (current->dirty_paused_when)
1687 pause -= now - current->dirty_paused_when;
1688
1689
1690
1691
1692
1693
1694
1695 if (pause < min_pause) {
1696 trace_balance_dirty_pages(wb,
1697 sdtc->thresh,
1698 sdtc->bg_thresh,
1699 sdtc->dirty,
1700 sdtc->wb_thresh,
1701 sdtc->wb_dirty,
1702 dirty_ratelimit,
1703 task_ratelimit,
1704 pages_dirtied,
1705 period,
1706 min(pause, 0L),
1707 start_time);
1708 if (pause < -HZ) {
1709 current->dirty_paused_when = now;
1710 current->nr_dirtied = 0;
1711 } else if (period) {
1712 current->dirty_paused_when += period;
1713 current->nr_dirtied = 0;
1714 } else if (current->nr_dirtied_pause <= pages_dirtied)
1715 current->nr_dirtied_pause += pages_dirtied;
1716 break;
1717 }
1718 if (unlikely(pause > max_pause)) {
1719
1720 now += min(pause - max_pause, max_pause);
1721 pause = max_pause;
1722 }
1723
1724pause:
1725 trace_balance_dirty_pages(wb,
1726 sdtc->thresh,
1727 sdtc->bg_thresh,
1728 sdtc->dirty,
1729 sdtc->wb_thresh,
1730 sdtc->wb_dirty,
1731 dirty_ratelimit,
1732 task_ratelimit,
1733 pages_dirtied,
1734 period,
1735 pause,
1736 start_time);
1737 __set_current_state(TASK_KILLABLE);
1738 io_schedule_timeout(pause);
1739
1740 current->dirty_paused_when = now + pause;
1741 current->nr_dirtied = 0;
1742 current->nr_dirtied_pause = nr_dirtied_pause;
1743
1744
1745
1746
1747
1748 if (task_ratelimit)
1749 break;
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761 if (sdtc->wb_dirty <= wb_stat_error(wb))
1762 break;
1763
1764 if (fatal_signal_pending(current))
1765 break;
1766 }
1767
1768 if (!dirty_exceeded && wb->dirty_exceeded)
1769 wb->dirty_exceeded = 0;
1770
1771 if (writeback_in_progress(wb))
1772 return;
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782 if (laptop_mode)
1783 return;
1784
1785 if (nr_reclaimable > gdtc->bg_thresh)
1786 wb_start_background_writeback(wb);
1787}
1788
1789static DEFINE_PER_CPU(int, bdp_ratelimits);
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805DEFINE_PER_CPU(int, dirty_throttle_leaks) = 0;
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820void balance_dirty_pages_ratelimited(struct address_space *mapping)
1821{
1822 struct inode *inode = mapping->host;
1823 struct backing_dev_info *bdi = inode_to_bdi(inode);
1824 struct bdi_writeback *wb = NULL;
1825 int ratelimit;
1826 int *p;
1827
1828 if (!bdi_cap_account_dirty(bdi))
1829 return;
1830
1831 if (inode_cgwb_enabled(inode))
1832 wb = wb_get_create_current(bdi, GFP_KERNEL);
1833 if (!wb)
1834 wb = &bdi->wb;
1835
1836 ratelimit = current->nr_dirtied_pause;
1837 if (wb->dirty_exceeded)
1838 ratelimit = min(ratelimit, 32 >> (PAGE_SHIFT - 10));
1839
1840 preempt_disable();
1841
1842
1843
1844
1845
1846
1847 p = this_cpu_ptr(&bdp_ratelimits);
1848 if (unlikely(current->nr_dirtied >= ratelimit))
1849 *p = 0;
1850 else if (unlikely(*p >= ratelimit_pages)) {
1851 *p = 0;
1852 ratelimit = 0;
1853 }
1854
1855
1856
1857
1858
1859 p = this_cpu_ptr(&dirty_throttle_leaks);
1860 if (*p > 0 && current->nr_dirtied < ratelimit) {
1861 unsigned long nr_pages_dirtied;
1862 nr_pages_dirtied = min(*p, ratelimit - current->nr_dirtied);
1863 *p -= nr_pages_dirtied;
1864 current->nr_dirtied += nr_pages_dirtied;
1865 }
1866 preempt_enable();
1867
1868 if (unlikely(current->nr_dirtied >= ratelimit))
1869 balance_dirty_pages(mapping, wb, current->nr_dirtied);
1870
1871 wb_put(wb);
1872}
1873EXPORT_SYMBOL(balance_dirty_pages_ratelimited);
1874
1875
1876
1877
1878
1879
1880
1881
1882bool wb_over_bg_thresh(struct bdi_writeback *wb)
1883{
1884 struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) };
1885 struct dirty_throttle_control mdtc_stor = { MDTC_INIT(wb, &gdtc_stor) };
1886 struct dirty_throttle_control * const gdtc = &gdtc_stor;
1887 struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
1888 &mdtc_stor : NULL;
1889
1890
1891
1892
1893
1894 gdtc->avail = global_dirtyable_memory();
1895 gdtc->dirty = global_page_state(NR_FILE_DIRTY) +
1896 global_page_state(NR_UNSTABLE_NFS);
1897 domain_dirty_limits(gdtc);
1898
1899 if (gdtc->dirty > gdtc->bg_thresh)
1900 return true;
1901
1902 if (wb_stat(wb, WB_RECLAIMABLE) > __wb_calc_thresh(gdtc))
1903 return true;
1904
1905 if (mdtc) {
1906 unsigned long filepages, headroom, writeback;
1907
1908 mem_cgroup_wb_stats(wb, &filepages, &headroom, &mdtc->dirty,
1909 &writeback);
1910 mdtc_calc_avail(mdtc, filepages, headroom);
1911 domain_dirty_limits(mdtc);
1912
1913 if (mdtc->dirty > mdtc->bg_thresh)
1914 return true;
1915
1916 if (wb_stat(wb, WB_RECLAIMABLE) > __wb_calc_thresh(mdtc))
1917 return true;
1918 }
1919
1920 return false;
1921}
1922
1923void throttle_vm_writeout(gfp_t gfp_mask)
1924{
1925 unsigned long background_thresh;
1926 unsigned long dirty_thresh;
1927
1928 for ( ; ; ) {
1929 global_dirty_limits(&background_thresh, &dirty_thresh);
1930 dirty_thresh = hard_dirty_limit(&global_wb_domain, dirty_thresh);
1931
1932
1933
1934
1935
1936 dirty_thresh += dirty_thresh / 10;
1937
1938 if (global_page_state(NR_UNSTABLE_NFS) +
1939 global_page_state(NR_WRITEBACK) <= dirty_thresh)
1940 break;
1941 congestion_wait(BLK_RW_ASYNC, HZ/10);
1942
1943
1944
1945
1946
1947
1948 if ((gfp_mask & (__GFP_FS|__GFP_IO)) != (__GFP_FS|__GFP_IO))
1949 break;
1950 }
1951}
1952
1953
1954
1955
1956int dirty_writeback_centisecs_handler(struct ctl_table *table, int write,
1957 void __user *buffer, size_t *length, loff_t *ppos)
1958{
1959 proc_dointvec(table, write, buffer, length, ppos);
1960 return 0;
1961}
1962
1963#ifdef CONFIG_BLOCK
1964void laptop_mode_timer_fn(unsigned long data)
1965{
1966 struct request_queue *q = (struct request_queue *)data;
1967 int nr_pages = global_page_state(NR_FILE_DIRTY) +
1968 global_page_state(NR_UNSTABLE_NFS);
1969 struct bdi_writeback *wb;
1970
1971
1972
1973
1974
1975 if (!bdi_has_dirty_io(&q->backing_dev_info))
1976 return;
1977
1978 rcu_read_lock();
1979 list_for_each_entry_rcu(wb, &q->backing_dev_info.wb_list, bdi_node)
1980 if (wb_has_dirty_io(wb))
1981 wb_start_writeback(wb, nr_pages, true,
1982 WB_REASON_LAPTOP_TIMER);
1983 rcu_read_unlock();
1984}
1985
1986
1987
1988
1989
1990
1991void laptop_io_completion(struct backing_dev_info *info)
1992{
1993 mod_timer(&info->laptop_mode_wb_timer, jiffies + laptop_mode);
1994}
1995
1996
1997
1998
1999
2000
2001void laptop_sync_completion(void)
2002{
2003 struct backing_dev_info *bdi;
2004
2005 rcu_read_lock();
2006
2007 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list)
2008 del_timer(&bdi->laptop_mode_wb_timer);
2009
2010 rcu_read_unlock();
2011}
2012#endif
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025void writeback_set_ratelimit(void)
2026{
2027 struct wb_domain *dom = &global_wb_domain;
2028 unsigned long background_thresh;
2029 unsigned long dirty_thresh;
2030
2031 global_dirty_limits(&background_thresh, &dirty_thresh);
2032 dom->dirty_limit = dirty_thresh;
2033 ratelimit_pages = dirty_thresh / (num_online_cpus() * 32);
2034 if (ratelimit_pages < 16)
2035 ratelimit_pages = 16;
2036}
2037
2038static int
2039ratelimit_handler(struct notifier_block *self, unsigned long action,
2040 void *hcpu)
2041{
2042
2043 switch (action & ~CPU_TASKS_FROZEN) {
2044 case CPU_ONLINE:
2045 case CPU_DEAD:
2046 writeback_set_ratelimit();
2047 return NOTIFY_OK;
2048 default:
2049 return NOTIFY_DONE;
2050 }
2051}
2052
2053static struct notifier_block ratelimit_nb = {
2054 .notifier_call = ratelimit_handler,
2055 .next = NULL,
2056};
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076void __init page_writeback_init(void)
2077{
2078 BUG_ON(wb_domain_init(&global_wb_domain, GFP_KERNEL));
2079
2080 writeback_set_ratelimit();
2081 register_cpu_notifier(&ratelimit_nb);
2082}
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101void tag_pages_for_writeback(struct address_space *mapping,
2102 pgoff_t start, pgoff_t end)
2103{
2104#define WRITEBACK_TAG_BATCH 4096
2105 unsigned long tagged;
2106
2107 do {
2108 spin_lock_irq(&mapping->tree_lock);
2109 tagged = radix_tree_range_tag_if_tagged(&mapping->page_tree,
2110 &start, end, WRITEBACK_TAG_BATCH,
2111 PAGECACHE_TAG_DIRTY, PAGECACHE_TAG_TOWRITE);
2112 spin_unlock_irq(&mapping->tree_lock);
2113 WARN_ON_ONCE(tagged > WRITEBACK_TAG_BATCH);
2114 cond_resched();
2115
2116 } while (tagged >= WRITEBACK_TAG_BATCH && start);
2117}
2118EXPORT_SYMBOL(tag_pages_for_writeback);
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142int write_cache_pages(struct address_space *mapping,
2143 struct writeback_control *wbc, writepage_t writepage,
2144 void *data)
2145{
2146 int ret = 0;
2147 int done = 0;
2148 struct pagevec pvec;
2149 int nr_pages;
2150 pgoff_t uninitialized_var(writeback_index);
2151 pgoff_t index;
2152 pgoff_t end;
2153 pgoff_t done_index;
2154 int cycled;
2155 int range_whole = 0;
2156 int tag;
2157
2158 pagevec_init(&pvec, 0);
2159 if (wbc->range_cyclic) {
2160 writeback_index = mapping->writeback_index;
2161 index = writeback_index;
2162 if (index == 0)
2163 cycled = 1;
2164 else
2165 cycled = 0;
2166 end = -1;
2167 } else {
2168 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2169 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2170 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2171 range_whole = 1;
2172 cycled = 1;
2173 }
2174 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2175 tag = PAGECACHE_TAG_TOWRITE;
2176 else
2177 tag = PAGECACHE_TAG_DIRTY;
2178retry:
2179 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2180 tag_pages_for_writeback(mapping, index, end);
2181 done_index = index;
2182 while (!done && (index <= end)) {
2183 int i;
2184
2185 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
2186 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
2187 if (nr_pages == 0)
2188 break;
2189
2190 for (i = 0; i < nr_pages; i++) {
2191 struct page *page = pvec.pages[i];
2192
2193
2194
2195
2196
2197
2198
2199
2200 if (page->index > end) {
2201
2202
2203
2204
2205 done = 1;
2206 break;
2207 }
2208
2209 done_index = page->index;
2210
2211 lock_page(page);
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221 if (unlikely(page->mapping != mapping)) {
2222continue_unlock:
2223 unlock_page(page);
2224 continue;
2225 }
2226
2227 if (!PageDirty(page)) {
2228
2229 goto continue_unlock;
2230 }
2231
2232 if (PageWriteback(page)) {
2233 if (wbc->sync_mode != WB_SYNC_NONE)
2234 wait_on_page_writeback(page);
2235 else
2236 goto continue_unlock;
2237 }
2238
2239 BUG_ON(PageWriteback(page));
2240 if (!clear_page_dirty_for_io(page))
2241 goto continue_unlock;
2242
2243 trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
2244 ret = (*writepage)(page, wbc, data);
2245 if (unlikely(ret)) {
2246 if (ret == AOP_WRITEPAGE_ACTIVATE) {
2247 unlock_page(page);
2248 ret = 0;
2249 } else {
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259 done_index = page->index + 1;
2260 done = 1;
2261 break;
2262 }
2263 }
2264
2265
2266
2267
2268
2269
2270
2271 if (--wbc->nr_to_write <= 0 &&
2272 wbc->sync_mode == WB_SYNC_NONE) {
2273 done = 1;
2274 break;
2275 }
2276 }
2277 pagevec_release(&pvec);
2278 cond_resched();
2279 }
2280 if (!cycled && !done) {
2281
2282
2283
2284
2285
2286 cycled = 1;
2287 index = 0;
2288 end = writeback_index - 1;
2289 goto retry;
2290 }
2291 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2292 mapping->writeback_index = done_index;
2293
2294 return ret;
2295}
2296EXPORT_SYMBOL(write_cache_pages);
2297
2298
2299
2300
2301
2302static int __writepage(struct page *page, struct writeback_control *wbc,
2303 void *data)
2304{
2305 struct address_space *mapping = data;
2306 int ret = mapping->a_ops->writepage(page, wbc);
2307 mapping_set_error(mapping, ret);
2308 return ret;
2309}
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319int generic_writepages(struct address_space *mapping,
2320 struct writeback_control *wbc)
2321{
2322 struct blk_plug plug;
2323 int ret;
2324
2325
2326 if (!mapping->a_ops->writepage)
2327 return 0;
2328
2329 blk_start_plug(&plug);
2330 ret = write_cache_pages(mapping, wbc, __writepage, mapping);
2331 blk_finish_plug(&plug);
2332 return ret;
2333}
2334
2335EXPORT_SYMBOL(generic_writepages);
2336
2337int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
2338{
2339 int ret;
2340
2341 if (wbc->nr_to_write <= 0)
2342 return 0;
2343 if (mapping->a_ops->writepages)
2344 ret = mapping->a_ops->writepages(mapping, wbc);
2345 else
2346 ret = generic_writepages(mapping, wbc);
2347 return ret;
2348}
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359int write_one_page(struct page *page, int wait)
2360{
2361 struct address_space *mapping = page->mapping;
2362 int ret = 0;
2363 struct writeback_control wbc = {
2364 .sync_mode = WB_SYNC_ALL,
2365 .nr_to_write = 1,
2366 };
2367
2368 BUG_ON(!PageLocked(page));
2369
2370 if (wait)
2371 wait_on_page_writeback(page);
2372
2373 if (clear_page_dirty_for_io(page)) {
2374 page_cache_get(page);
2375 ret = mapping->a_ops->writepage(page, &wbc);
2376 if (ret == 0 && wait) {
2377 wait_on_page_writeback(page);
2378 if (PageError(page))
2379 ret = -EIO;
2380 }
2381 page_cache_release(page);
2382 } else {
2383 unlock_page(page);
2384 }
2385 return ret;
2386}
2387EXPORT_SYMBOL(write_one_page);
2388
2389
2390
2391
2392int __set_page_dirty_no_writeback(struct page *page)
2393{
2394 if (!PageDirty(page))
2395 return !TestSetPageDirty(page);
2396 return 0;
2397}
2398
2399
2400
2401
2402
2403
2404
2405
2406void account_page_dirtied(struct page *page, struct address_space *mapping,
2407 struct mem_cgroup *memcg)
2408{
2409 struct inode *inode = mapping->host;
2410
2411 trace_writeback_dirty_page(page, mapping);
2412
2413 if (mapping_cap_account_dirty(mapping)) {
2414 struct bdi_writeback *wb;
2415
2416 inode_attach_wb(inode, page);
2417 wb = inode_to_wb(inode);
2418
2419 mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_DIRTY);
2420 __inc_zone_page_state(page, NR_FILE_DIRTY);
2421 __inc_zone_page_state(page, NR_DIRTIED);
2422 __inc_wb_stat(wb, WB_RECLAIMABLE);
2423 __inc_wb_stat(wb, WB_DIRTIED);
2424 task_io_account_write(PAGE_CACHE_SIZE);
2425 current->nr_dirtied++;
2426 this_cpu_inc(bdp_ratelimits);
2427 }
2428}
2429EXPORT_SYMBOL(account_page_dirtied);
2430
2431
2432
2433
2434
2435
2436void account_page_cleaned(struct page *page, struct address_space *mapping,
2437 struct mem_cgroup *memcg, struct bdi_writeback *wb)
2438{
2439 if (mapping_cap_account_dirty(mapping)) {
2440 mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_DIRTY);
2441 dec_zone_page_state(page, NR_FILE_DIRTY);
2442 dec_wb_stat(wb, WB_RECLAIMABLE);
2443 task_io_account_cancelled_write(PAGE_CACHE_SIZE);
2444 }
2445}
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459int __set_page_dirty_nobuffers(struct page *page)
2460{
2461 struct mem_cgroup *memcg;
2462
2463 memcg = mem_cgroup_begin_page_stat(page);
2464 if (!TestSetPageDirty(page)) {
2465 struct address_space *mapping = page_mapping(page);
2466 unsigned long flags;
2467
2468 if (!mapping) {
2469 mem_cgroup_end_page_stat(memcg);
2470 return 1;
2471 }
2472
2473 spin_lock_irqsave(&mapping->tree_lock, flags);
2474 BUG_ON(page_mapping(page) != mapping);
2475 WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
2476 account_page_dirtied(page, mapping, memcg);
2477 radix_tree_tag_set(&mapping->page_tree, page_index(page),
2478 PAGECACHE_TAG_DIRTY);
2479 spin_unlock_irqrestore(&mapping->tree_lock, flags);
2480 mem_cgroup_end_page_stat(memcg);
2481
2482 if (mapping->host) {
2483
2484 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
2485 }
2486 return 1;
2487 }
2488 mem_cgroup_end_page_stat(memcg);
2489 return 0;
2490}
2491EXPORT_SYMBOL(__set_page_dirty_nobuffers);
2492
2493
2494
2495
2496
2497
2498
2499
2500void account_page_redirty(struct page *page)
2501{
2502 struct address_space *mapping = page->mapping;
2503
2504 if (mapping && mapping_cap_account_dirty(mapping)) {
2505 struct inode *inode = mapping->host;
2506 struct bdi_writeback *wb;
2507 bool locked;
2508
2509 wb = unlocked_inode_to_wb_begin(inode, &locked);
2510 current->nr_dirtied--;
2511 dec_zone_page_state(page, NR_DIRTIED);
2512 dec_wb_stat(wb, WB_DIRTIED);
2513 unlocked_inode_to_wb_end(inode, locked);
2514 }
2515}
2516EXPORT_SYMBOL(account_page_redirty);
2517
2518
2519
2520
2521
2522
2523int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page)
2524{
2525 int ret;
2526
2527 wbc->pages_skipped++;
2528 ret = __set_page_dirty_nobuffers(page);
2529 account_page_redirty(page);
2530 return ret;
2531}
2532EXPORT_SYMBOL(redirty_page_for_writepage);
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545int set_page_dirty(struct page *page)
2546{
2547 struct address_space *mapping = page_mapping(page);
2548
2549 if (likely(mapping)) {
2550 int (*spd)(struct page *) = mapping->a_ops->set_page_dirty;
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561 if (PageReclaim(page))
2562 ClearPageReclaim(page);
2563#ifdef CONFIG_BLOCK
2564 if (!spd)
2565 spd = __set_page_dirty_buffers;
2566#endif
2567 return (*spd)(page);
2568 }
2569 if (!PageDirty(page)) {
2570 if (!TestSetPageDirty(page))
2571 return 1;
2572 }
2573 return 0;
2574}
2575EXPORT_SYMBOL(set_page_dirty);
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587int set_page_dirty_lock(struct page *page)
2588{
2589 int ret;
2590
2591 lock_page(page);
2592 ret = set_page_dirty(page);
2593 unlock_page(page);
2594 return ret;
2595}
2596EXPORT_SYMBOL(set_page_dirty_lock);
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611void cancel_dirty_page(struct page *page)
2612{
2613 struct address_space *mapping = page_mapping(page);
2614
2615 if (mapping_cap_account_dirty(mapping)) {
2616 struct inode *inode = mapping->host;
2617 struct bdi_writeback *wb;
2618 struct mem_cgroup *memcg;
2619 bool locked;
2620
2621 memcg = mem_cgroup_begin_page_stat(page);
2622 wb = unlocked_inode_to_wb_begin(inode, &locked);
2623
2624 if (TestClearPageDirty(page))
2625 account_page_cleaned(page, mapping, memcg, wb);
2626
2627 unlocked_inode_to_wb_end(inode, locked);
2628 mem_cgroup_end_page_stat(memcg);
2629 } else {
2630 ClearPageDirty(page);
2631 }
2632}
2633EXPORT_SYMBOL(cancel_dirty_page);
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649int clear_page_dirty_for_io(struct page *page)
2650{
2651 struct address_space *mapping = page_mapping(page);
2652 int ret = 0;
2653
2654 BUG_ON(!PageLocked(page));
2655
2656 if (mapping && mapping_cap_account_dirty(mapping)) {
2657 struct inode *inode = mapping->host;
2658 struct bdi_writeback *wb;
2659 struct mem_cgroup *memcg;
2660 bool locked;
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687 if (page_mkclean(page))
2688 set_page_dirty(page);
2689
2690
2691
2692
2693
2694
2695
2696
2697 memcg = mem_cgroup_begin_page_stat(page);
2698 wb = unlocked_inode_to_wb_begin(inode, &locked);
2699 if (TestClearPageDirty(page)) {
2700 mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_DIRTY);
2701 dec_zone_page_state(page, NR_FILE_DIRTY);
2702 dec_wb_stat(wb, WB_RECLAIMABLE);
2703 ret = 1;
2704 }
2705 unlocked_inode_to_wb_end(inode, locked);
2706 mem_cgroup_end_page_stat(memcg);
2707 return ret;
2708 }
2709 return TestClearPageDirty(page);
2710}
2711EXPORT_SYMBOL(clear_page_dirty_for_io);
2712
2713int test_clear_page_writeback(struct page *page)
2714{
2715 struct address_space *mapping = page_mapping(page);
2716 struct mem_cgroup *memcg;
2717 int ret;
2718
2719 memcg = mem_cgroup_begin_page_stat(page);
2720 if (mapping) {
2721 struct inode *inode = mapping->host;
2722 struct backing_dev_info *bdi = inode_to_bdi(inode);
2723 unsigned long flags;
2724
2725 spin_lock_irqsave(&mapping->tree_lock, flags);
2726 ret = TestClearPageWriteback(page);
2727 if (ret) {
2728 radix_tree_tag_clear(&mapping->page_tree,
2729 page_index(page),
2730 PAGECACHE_TAG_WRITEBACK);
2731 if (bdi_cap_account_writeback(bdi)) {
2732 struct bdi_writeback *wb = inode_to_wb(inode);
2733
2734 __dec_wb_stat(wb, WB_WRITEBACK);
2735 __wb_writeout_inc(wb);
2736 }
2737 }
2738 spin_unlock_irqrestore(&mapping->tree_lock, flags);
2739 } else {
2740 ret = TestClearPageWriteback(page);
2741 }
2742 if (ret) {
2743 mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_WRITEBACK);
2744 dec_zone_page_state(page, NR_WRITEBACK);
2745 inc_zone_page_state(page, NR_WRITTEN);
2746 }
2747 mem_cgroup_end_page_stat(memcg);
2748 return ret;
2749}
2750
2751int __test_set_page_writeback(struct page *page, bool keep_write)
2752{
2753 struct address_space *mapping = page_mapping(page);
2754 struct mem_cgroup *memcg;
2755 int ret;
2756
2757 memcg = mem_cgroup_begin_page_stat(page);
2758 if (mapping) {
2759 struct inode *inode = mapping->host;
2760 struct backing_dev_info *bdi = inode_to_bdi(inode);
2761 unsigned long flags;
2762
2763 spin_lock_irqsave(&mapping->tree_lock, flags);
2764 ret = TestSetPageWriteback(page);
2765 if (!ret) {
2766 radix_tree_tag_set(&mapping->page_tree,
2767 page_index(page),
2768 PAGECACHE_TAG_WRITEBACK);
2769 if (bdi_cap_account_writeback(bdi))
2770 __inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
2771 }
2772 if (!PageDirty(page))
2773 radix_tree_tag_clear(&mapping->page_tree,
2774 page_index(page),
2775 PAGECACHE_TAG_DIRTY);
2776 if (!keep_write)
2777 radix_tree_tag_clear(&mapping->page_tree,
2778 page_index(page),
2779 PAGECACHE_TAG_TOWRITE);
2780 spin_unlock_irqrestore(&mapping->tree_lock, flags);
2781 } else {
2782 ret = TestSetPageWriteback(page);
2783 }
2784 if (!ret) {
2785 mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_WRITEBACK);
2786 inc_zone_page_state(page, NR_WRITEBACK);
2787 }
2788 mem_cgroup_end_page_stat(memcg);
2789 return ret;
2790
2791}
2792EXPORT_SYMBOL(__test_set_page_writeback);
2793
2794
2795
2796
2797
2798int mapping_tagged(struct address_space *mapping, int tag)
2799{
2800 return radix_tree_tagged(&mapping->page_tree, tag);
2801}
2802EXPORT_SYMBOL(mapping_tagged);
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812void wait_for_stable_page(struct page *page)
2813{
2814 if (bdi_cap_stable_pages_required(inode_to_bdi(page->mapping->host)))
2815 wait_on_page_writeback(page);
2816}
2817EXPORT_SYMBOL_GPL(wait_for_stable_page);
2818