1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <linux/kernel.h>
15#include <linux/export.h>
16#include <linux/spinlock.h>
17#include <linux/fs.h>
18#include <linux/mm.h>
19#include <linux/swap.h>
20#include <linux/slab.h>
21#include <linux/pagemap.h>
22#include <linux/writeback.h>
23#include <linux/init.h>
24#include <linux/backing-dev.h>
25#include <linux/task_io_accounting_ops.h>
26#include <linux/blkdev.h>
27#include <linux/mpage.h>
28#include <linux/rmap.h>
29#include <linux/percpu.h>
30#include <linux/notifier.h>
31#include <linux/smp.h>
32#include <linux/sysctl.h>
33#include <linux/cpu.h>
34#include <linux/syscalls.h>
35#include <linux/buffer_head.h>
36#include <linux/pagevec.h>
37#include <linux/timer.h>
38#include <linux/sched/rt.h>
39#include <linux/mm_inline.h>
40#include <trace/events/writeback.h>
41
42#include "internal.h"
43
44
45
46
47#define MAX_PAUSE max(HZ/5, 1)
48
49
50
51
52
53#define DIRTY_POLL_THRESH (128 >> (PAGE_SHIFT - 10))
54
55
56
57
58#define BANDWIDTH_INTERVAL max(HZ/5, 1)
59
60#define RATELIMIT_CALC_SHIFT 10
61
62
63
64
65
66static long ratelimit_pages = 32;
67
68
69
70
71
72
73int dirty_background_ratio = 10;
74
75
76
77
78
79unsigned long dirty_background_bytes;
80
81
82
83
84
85int vm_highmem_is_dirtyable;
86
87
88
89
90int vm_dirty_ratio = 20;
91
92
93
94
95
96unsigned long vm_dirty_bytes;
97
98
99
100
101unsigned int dirty_writeback_interval = 5 * 100;
102
103EXPORT_SYMBOL_GPL(dirty_writeback_interval);
104
105
106
107
108unsigned int dirty_expire_interval = 30 * 100;
109
110
111
112
113int block_dump;
114
115
116
117
118
119int laptop_mode;
120
121EXPORT_SYMBOL(laptop_mode);
122
123
124
125unsigned long global_dirty_limit;
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143static struct fprop_global writeout_completions;
144
145static void writeout_period(unsigned long t);
146
147static struct timer_list writeout_period_timer =
148 TIMER_DEFERRED_INITIALIZER(writeout_period, 0, 0);
149static unsigned long writeout_period_time = 0;
150
151
152
153
154
155
156#define VM_COMPLETIONS_PERIOD_LEN (3*HZ)
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183static unsigned long zone_dirtyable_memory(struct zone *zone)
184{
185 unsigned long nr_pages;
186
187 nr_pages = zone_page_state(zone, NR_FREE_PAGES);
188 nr_pages -= min(nr_pages, zone->dirty_balance_reserve);
189
190 nr_pages += zone_page_state(zone, NR_INACTIVE_FILE);
191 nr_pages += zone_page_state(zone, NR_ACTIVE_FILE);
192
193 return nr_pages;
194}
195
196static unsigned long highmem_dirtyable_memory(unsigned long total)
197{
198#ifdef CONFIG_HIGHMEM
199 int node;
200 unsigned long x = 0;
201
202 for_each_node_state(node, N_HIGH_MEMORY) {
203 struct zone *z = &NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
204
205 x += zone_dirtyable_memory(z);
206 }
207
208
209
210
211
212
213
214
215
216 if ((long)x < 0)
217 x = 0;
218
219
220
221
222
223
224
225 return min(x, total);
226#else
227 return 0;
228#endif
229}
230
231
232
233
234
235
236
237static unsigned long global_dirtyable_memory(void)
238{
239 unsigned long x;
240
241 x = global_page_state(NR_FREE_PAGES);
242 x -= min(x, dirty_balance_reserve);
243
244 x += global_page_state(NR_INACTIVE_FILE);
245 x += global_page_state(NR_ACTIVE_FILE);
246
247 if (!vm_highmem_is_dirtyable)
248 x -= highmem_dirtyable_memory(x);
249
250 return x + 1;
251}
252
253
254
255
256
257
258
259
260
261
262void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
263{
264 const unsigned long available_memory = global_dirtyable_memory();
265 unsigned long background;
266 unsigned long dirty;
267 struct task_struct *tsk;
268
269 if (vm_dirty_bytes)
270 dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE);
271 else
272 dirty = (vm_dirty_ratio * available_memory) / 100;
273
274 if (dirty_background_bytes)
275 background = DIV_ROUND_UP(dirty_background_bytes, PAGE_SIZE);
276 else
277 background = (dirty_background_ratio * available_memory) / 100;
278
279 if (background >= dirty)
280 background = dirty / 2;
281 tsk = current;
282 if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) {
283 background += background / 4;
284 dirty += dirty / 4;
285 }
286 *pbackground = background;
287 *pdirty = dirty;
288 trace_global_dirty_state(background, dirty);
289}
290
291
292
293
294
295
296
297
298static unsigned long zone_dirty_limit(struct zone *zone)
299{
300 unsigned long zone_memory = zone_dirtyable_memory(zone);
301 struct task_struct *tsk = current;
302 unsigned long dirty;
303
304 if (vm_dirty_bytes)
305 dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE) *
306 zone_memory / global_dirtyable_memory();
307 else
308 dirty = vm_dirty_ratio * zone_memory / 100;
309
310 if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk))
311 dirty += dirty / 4;
312
313 return dirty;
314}
315
316
317
318
319
320
321
322
323bool zone_dirty_ok(struct zone *zone)
324{
325 unsigned long limit = zone_dirty_limit(zone);
326
327 return zone_page_state(zone, NR_FILE_DIRTY) +
328 zone_page_state(zone, NR_UNSTABLE_NFS) +
329 zone_page_state(zone, NR_WRITEBACK) <= limit;
330}
331
332int dirty_background_ratio_handler(struct ctl_table *table, int write,
333 void __user *buffer, size_t *lenp,
334 loff_t *ppos)
335{
336 int ret;
337
338 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
339 if (ret == 0 && write)
340 dirty_background_bytes = 0;
341 return ret;
342}
343
344int dirty_background_bytes_handler(struct ctl_table *table, int write,
345 void __user *buffer, size_t *lenp,
346 loff_t *ppos)
347{
348 int ret;
349
350 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
351 if (ret == 0 && write)
352 dirty_background_ratio = 0;
353 return ret;
354}
355
356int dirty_ratio_handler(struct ctl_table *table, int write,
357 void __user *buffer, size_t *lenp,
358 loff_t *ppos)
359{
360 int old_ratio = vm_dirty_ratio;
361 int ret;
362
363 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
364 if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
365 writeback_set_ratelimit();
366 vm_dirty_bytes = 0;
367 }
368 return ret;
369}
370
371int dirty_bytes_handler(struct ctl_table *table, int write,
372 void __user *buffer, size_t *lenp,
373 loff_t *ppos)
374{
375 unsigned long old_bytes = vm_dirty_bytes;
376 int ret;
377
378 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
379 if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
380 writeback_set_ratelimit();
381 vm_dirty_ratio = 0;
382 }
383 return ret;
384}
385
386static unsigned long wp_next_time(unsigned long cur_time)
387{
388 cur_time += VM_COMPLETIONS_PERIOD_LEN;
389
390 if (!cur_time)
391 return 1;
392 return cur_time;
393}
394
395
396
397
398
399static inline void __bdi_writeout_inc(struct backing_dev_info *bdi)
400{
401 __inc_bdi_stat(bdi, BDI_WRITTEN);
402 __fprop_inc_percpu_max(&writeout_completions, &bdi->completions,
403 bdi->max_prop_frac);
404
405 if (!unlikely(writeout_period_time)) {
406
407
408
409
410
411
412 writeout_period_time = wp_next_time(jiffies);
413 mod_timer(&writeout_period_timer, writeout_period_time);
414 }
415}
416
417void bdi_writeout_inc(struct backing_dev_info *bdi)
418{
419 unsigned long flags;
420
421 local_irq_save(flags);
422 __bdi_writeout_inc(bdi);
423 local_irq_restore(flags);
424}
425EXPORT_SYMBOL_GPL(bdi_writeout_inc);
426
427
428
429
430static void bdi_writeout_fraction(struct backing_dev_info *bdi,
431 long *numerator, long *denominator)
432{
433 fprop_fraction_percpu(&writeout_completions, &bdi->completions,
434 numerator, denominator);
435}
436
437
438
439
440
441static void writeout_period(unsigned long t)
442{
443 int miss_periods = (jiffies - writeout_period_time) /
444 VM_COMPLETIONS_PERIOD_LEN;
445
446 if (fprop_new_period(&writeout_completions, miss_periods + 1)) {
447 writeout_period_time = wp_next_time(writeout_period_time +
448 miss_periods * VM_COMPLETIONS_PERIOD_LEN);
449 mod_timer(&writeout_period_timer, writeout_period_time);
450 } else {
451
452
453
454
455 writeout_period_time = 0;
456 }
457}
458
459
460
461
462
463
464static unsigned int bdi_min_ratio;
465
466int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
467{
468 int ret = 0;
469
470 spin_lock_bh(&bdi_lock);
471 if (min_ratio > bdi->max_ratio) {
472 ret = -EINVAL;
473 } else {
474 min_ratio -= bdi->min_ratio;
475 if (bdi_min_ratio + min_ratio < 100) {
476 bdi_min_ratio += min_ratio;
477 bdi->min_ratio += min_ratio;
478 } else {
479 ret = -EINVAL;
480 }
481 }
482 spin_unlock_bh(&bdi_lock);
483
484 return ret;
485}
486
487int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
488{
489 int ret = 0;
490
491 if (max_ratio > 100)
492 return -EINVAL;
493
494 spin_lock_bh(&bdi_lock);
495 if (bdi->min_ratio > max_ratio) {
496 ret = -EINVAL;
497 } else {
498 bdi->max_ratio = max_ratio;
499 bdi->max_prop_frac = (FPROP_FRAC_BASE * max_ratio) / 100;
500 }
501 spin_unlock_bh(&bdi_lock);
502
503 return ret;
504}
505EXPORT_SYMBOL(bdi_set_max_ratio);
506
507static unsigned long dirty_freerun_ceiling(unsigned long thresh,
508 unsigned long bg_thresh)
509{
510 return (thresh + bg_thresh) / 2;
511}
512
513static unsigned long hard_dirty_limit(unsigned long thresh)
514{
515 return max(thresh, global_dirty_limit);
516}
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540unsigned long bdi_dirty_limit(struct backing_dev_info *bdi, unsigned long dirty)
541{
542 u64 bdi_dirty;
543 long numerator, denominator;
544
545
546
547
548 bdi_writeout_fraction(bdi, &numerator, &denominator);
549
550 bdi_dirty = (dirty * (100 - bdi_min_ratio)) / 100;
551 bdi_dirty *= numerator;
552 do_div(bdi_dirty, denominator);
553
554 bdi_dirty += (dirty * bdi->min_ratio) / 100;
555 if (bdi_dirty > (dirty * bdi->max_ratio) / 100)
556 bdi_dirty = dirty * bdi->max_ratio / 100;
557
558 return bdi_dirty;
559}
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575static long long pos_ratio_polynom(unsigned long setpoint,
576 unsigned long dirty,
577 unsigned long limit)
578{
579 long long pos_ratio;
580 long x;
581
582 x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
583 (limit - setpoint) | 1);
584 pos_ratio = x;
585 pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
586 pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
587 pos_ratio += 1 << RATELIMIT_CALC_SHIFT;
588
589 return clamp(pos_ratio, 0LL, 2LL << RATELIMIT_CALC_SHIFT);
590}
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667static unsigned long bdi_position_ratio(struct backing_dev_info *bdi,
668 unsigned long thresh,
669 unsigned long bg_thresh,
670 unsigned long dirty,
671 unsigned long bdi_thresh,
672 unsigned long bdi_dirty)
673{
674 unsigned long write_bw = bdi->avg_write_bandwidth;
675 unsigned long freerun = dirty_freerun_ceiling(thresh, bg_thresh);
676 unsigned long limit = hard_dirty_limit(thresh);
677 unsigned long x_intercept;
678 unsigned long setpoint;
679 unsigned long bdi_setpoint;
680 unsigned long span;
681 long long pos_ratio;
682 long x;
683
684 if (unlikely(dirty >= limit))
685 return 0;
686
687
688
689
690
691
692 setpoint = (freerun + limit) / 2;
693 pos_ratio = pos_ratio_polynom(setpoint, dirty, limit);
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720 if (unlikely(bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
721 long long bdi_pos_ratio;
722 unsigned long bdi_bg_thresh;
723
724 if (bdi_dirty < 8)
725 return min_t(long long, pos_ratio * 2,
726 2 << RATELIMIT_CALC_SHIFT);
727
728 if (bdi_dirty >= bdi_thresh)
729 return 0;
730
731 bdi_bg_thresh = div_u64((u64)bdi_thresh * bg_thresh, thresh);
732 bdi_setpoint = dirty_freerun_ceiling(bdi_thresh,
733 bdi_bg_thresh);
734
735 if (bdi_setpoint == 0 || bdi_setpoint == bdi_thresh)
736 return 0;
737
738 bdi_pos_ratio = pos_ratio_polynom(bdi_setpoint, bdi_dirty,
739 bdi_thresh);
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762 return min(pos_ratio, bdi_pos_ratio);
763 }
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796 if (unlikely(bdi_thresh > thresh))
797 bdi_thresh = thresh;
798
799
800
801
802
803
804
805 bdi_thresh = max(bdi_thresh, (limit - dirty) / 8);
806
807
808
809
810 x = div_u64((u64)bdi_thresh << 16, thresh | 1);
811 bdi_setpoint = setpoint * (u64)x >> 16;
812
813
814
815
816
817
818
819
820 span = (thresh - bdi_thresh + 8 * write_bw) * (u64)x >> 16;
821 x_intercept = bdi_setpoint + span;
822
823 if (bdi_dirty < x_intercept - span / 4) {
824 pos_ratio = div64_u64(pos_ratio * (x_intercept - bdi_dirty),
825 (x_intercept - bdi_setpoint) | 1);
826 } else
827 pos_ratio /= 4;
828
829
830
831
832
833
834 x_intercept = bdi_thresh / 2;
835 if (bdi_dirty < x_intercept) {
836 if (bdi_dirty > x_intercept / 8)
837 pos_ratio = div_u64(pos_ratio * x_intercept, bdi_dirty);
838 else
839 pos_ratio *= 8;
840 }
841
842 return pos_ratio;
843}
844
845static void bdi_update_write_bandwidth(struct backing_dev_info *bdi,
846 unsigned long elapsed,
847 unsigned long written)
848{
849 const unsigned long period = roundup_pow_of_two(3 * HZ);
850 unsigned long avg = bdi->avg_write_bandwidth;
851 unsigned long old = bdi->write_bandwidth;
852 u64 bw;
853
854
855
856
857
858
859
860
861
862
863
864 bw = written - min(written, bdi->written_stamp);
865 bw *= HZ;
866 if (unlikely(elapsed > period)) {
867 do_div(bw, elapsed);
868 avg = bw;
869 goto out;
870 }
871 bw += (u64)bdi->write_bandwidth * (period - elapsed);
872 bw >>= ilog2(period);
873
874
875
876
877 if (avg > old && old >= (unsigned long)bw)
878 avg -= (avg - old) >> 3;
879
880 if (avg < old && old <= (unsigned long)bw)
881 avg += (old - avg) >> 3;
882
883out:
884 bdi->write_bandwidth = bw;
885 bdi->avg_write_bandwidth = avg;
886}
887
888
889
890
891
892
893
894
895
896static void update_dirty_limit(unsigned long thresh, unsigned long dirty)
897{
898 unsigned long limit = global_dirty_limit;
899
900
901
902
903 if (limit < thresh) {
904 limit = thresh;
905 goto update;
906 }
907
908
909
910
911
912
913 thresh = max(thresh, dirty);
914 if (limit > thresh) {
915 limit -= (limit - thresh) >> 5;
916 goto update;
917 }
918 return;
919update:
920 global_dirty_limit = limit;
921}
922
923static void global_update_bandwidth(unsigned long thresh,
924 unsigned long dirty,
925 unsigned long now)
926{
927 static DEFINE_SPINLOCK(dirty_lock);
928 static unsigned long update_time = INITIAL_JIFFIES;
929
930
931
932
933 if (time_before(now, update_time + BANDWIDTH_INTERVAL))
934 return;
935
936 spin_lock(&dirty_lock);
937 if (time_after_eq(now, update_time + BANDWIDTH_INTERVAL)) {
938 update_dirty_limit(thresh, dirty);
939 update_time = now;
940 }
941 spin_unlock(&dirty_lock);
942}
943
944
945
946
947
948
949
950static void bdi_update_dirty_ratelimit(struct backing_dev_info *bdi,
951 unsigned long thresh,
952 unsigned long bg_thresh,
953 unsigned long dirty,
954 unsigned long bdi_thresh,
955 unsigned long bdi_dirty,
956 unsigned long dirtied,
957 unsigned long elapsed)
958{
959 unsigned long freerun = dirty_freerun_ceiling(thresh, bg_thresh);
960 unsigned long limit = hard_dirty_limit(thresh);
961 unsigned long setpoint = (freerun + limit) / 2;
962 unsigned long write_bw = bdi->avg_write_bandwidth;
963 unsigned long dirty_ratelimit = bdi->dirty_ratelimit;
964 unsigned long dirty_rate;
965 unsigned long task_ratelimit;
966 unsigned long balanced_dirty_ratelimit;
967 unsigned long pos_ratio;
968 unsigned long step;
969 unsigned long x;
970
971
972
973
974
975 dirty_rate = (dirtied - bdi->dirtied_stamp) * HZ / elapsed;
976
977 pos_ratio = bdi_position_ratio(bdi, thresh, bg_thresh, dirty,
978 bdi_thresh, bdi_dirty);
979
980
981
982 task_ratelimit = (u64)dirty_ratelimit *
983 pos_ratio >> RATELIMIT_CALC_SHIFT;
984 task_ratelimit++;
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016 balanced_dirty_ratelimit = div_u64((u64)task_ratelimit * write_bw,
1017 dirty_rate | 1);
1018
1019
1020
1021 if (unlikely(balanced_dirty_ratelimit > write_bw))
1022 balanced_dirty_ratelimit = write_bw;
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058 step = 0;
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071 if (unlikely(bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
1072 dirty = bdi_dirty;
1073 if (bdi_dirty < 8)
1074 setpoint = bdi_dirty + 1;
1075 else
1076 setpoint = (bdi_thresh +
1077 bdi_dirty_limit(bdi, bg_thresh)) / 2;
1078 }
1079
1080 if (dirty < setpoint) {
1081 x = min3(bdi->balanced_dirty_ratelimit,
1082 balanced_dirty_ratelimit, task_ratelimit);
1083 if (dirty_ratelimit < x)
1084 step = x - dirty_ratelimit;
1085 } else {
1086 x = max3(bdi->balanced_dirty_ratelimit,
1087 balanced_dirty_ratelimit, task_ratelimit);
1088 if (dirty_ratelimit > x)
1089 step = dirty_ratelimit - x;
1090 }
1091
1092
1093
1094
1095
1096
1097 step >>= dirty_ratelimit / (2 * step + 1);
1098
1099
1100
1101 step = (step + 7) / 8;
1102
1103 if (dirty_ratelimit < balanced_dirty_ratelimit)
1104 dirty_ratelimit += step;
1105 else
1106 dirty_ratelimit -= step;
1107
1108 bdi->dirty_ratelimit = max(dirty_ratelimit, 1UL);
1109 bdi->balanced_dirty_ratelimit = balanced_dirty_ratelimit;
1110
1111 trace_bdi_dirty_ratelimit(bdi, dirty_rate, task_ratelimit);
1112}
1113
1114void __bdi_update_bandwidth(struct backing_dev_info *bdi,
1115 unsigned long thresh,
1116 unsigned long bg_thresh,
1117 unsigned long dirty,
1118 unsigned long bdi_thresh,
1119 unsigned long bdi_dirty,
1120 unsigned long start_time)
1121{
1122 unsigned long now = jiffies;
1123 unsigned long elapsed = now - bdi->bw_time_stamp;
1124 unsigned long dirtied;
1125 unsigned long written;
1126
1127
1128
1129
1130 if (elapsed < BANDWIDTH_INTERVAL)
1131 return;
1132
1133 dirtied = percpu_counter_read(&bdi->bdi_stat[BDI_DIRTIED]);
1134 written = percpu_counter_read(&bdi->bdi_stat[BDI_WRITTEN]);
1135
1136
1137
1138
1139
1140 if (elapsed > HZ && time_before(bdi->bw_time_stamp, start_time))
1141 goto snapshot;
1142
1143 if (thresh) {
1144 global_update_bandwidth(thresh, dirty, now);
1145 bdi_update_dirty_ratelimit(bdi, thresh, bg_thresh, dirty,
1146 bdi_thresh, bdi_dirty,
1147 dirtied, elapsed);
1148 }
1149 bdi_update_write_bandwidth(bdi, elapsed, written);
1150
1151snapshot:
1152 bdi->dirtied_stamp = dirtied;
1153 bdi->written_stamp = written;
1154 bdi->bw_time_stamp = now;
1155}
1156
1157static void bdi_update_bandwidth(struct backing_dev_info *bdi,
1158 unsigned long thresh,
1159 unsigned long bg_thresh,
1160 unsigned long dirty,
1161 unsigned long bdi_thresh,
1162 unsigned long bdi_dirty,
1163 unsigned long start_time)
1164{
1165 if (time_is_after_eq_jiffies(bdi->bw_time_stamp + BANDWIDTH_INTERVAL))
1166 return;
1167 spin_lock(&bdi->wb.list_lock);
1168 __bdi_update_bandwidth(bdi, thresh, bg_thresh, dirty,
1169 bdi_thresh, bdi_dirty, start_time);
1170 spin_unlock(&bdi->wb.list_lock);
1171}
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181static unsigned long dirty_poll_interval(unsigned long dirty,
1182 unsigned long thresh)
1183{
1184 if (thresh > dirty)
1185 return 1UL << (ilog2(thresh - dirty) >> 1);
1186
1187 return 1;
1188}
1189
1190static unsigned long bdi_max_pause(struct backing_dev_info *bdi,
1191 unsigned long bdi_dirty)
1192{
1193 unsigned long bw = bdi->avg_write_bandwidth;
1194 unsigned long t;
1195
1196
1197
1198
1199
1200
1201
1202
1203 t = bdi_dirty / (1 + bw / roundup_pow_of_two(1 + HZ / 8));
1204 t++;
1205
1206 return min_t(unsigned long, t, MAX_PAUSE);
1207}
1208
1209static long bdi_min_pause(struct backing_dev_info *bdi,
1210 long max_pause,
1211 unsigned long task_ratelimit,
1212 unsigned long dirty_ratelimit,
1213 int *nr_dirtied_pause)
1214{
1215 long hi = ilog2(bdi->avg_write_bandwidth);
1216 long lo = ilog2(bdi->dirty_ratelimit);
1217 long t;
1218 long pause;
1219 int pages;
1220
1221
1222 t = max(1, HZ / 100);
1223
1224
1225
1226
1227
1228
1229
1230 if (hi > lo)
1231 t += (hi - lo) * (10 * HZ) / 1024;
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251 t = min(t, 1 + max_pause / 2);
1252 pages = dirty_ratelimit * t / roundup_pow_of_two(HZ);
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262 if (pages < DIRTY_POLL_THRESH) {
1263 t = max_pause;
1264 pages = dirty_ratelimit * t / roundup_pow_of_two(HZ);
1265 if (pages > DIRTY_POLL_THRESH) {
1266 pages = DIRTY_POLL_THRESH;
1267 t = HZ * DIRTY_POLL_THRESH / dirty_ratelimit;
1268 }
1269 }
1270
1271 pause = HZ * pages / (task_ratelimit + 1);
1272 if (pause > max_pause) {
1273 t = max_pause;
1274 pages = task_ratelimit * t / roundup_pow_of_two(HZ);
1275 }
1276
1277 *nr_dirtied_pause = pages;
1278
1279
1280
1281 return pages >= DIRTY_POLL_THRESH ? 1 + t / 2 : t;
1282}
1283
1284static inline void bdi_dirty_limits(struct backing_dev_info *bdi,
1285 unsigned long dirty_thresh,
1286 unsigned long background_thresh,
1287 unsigned long *bdi_dirty,
1288 unsigned long *bdi_thresh,
1289 unsigned long *bdi_bg_thresh)
1290{
1291 unsigned long bdi_reclaimable;
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306 *bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh);
1307
1308 if (bdi_bg_thresh)
1309 *bdi_bg_thresh = dirty_thresh ? div_u64((u64)*bdi_thresh *
1310 background_thresh,
1311 dirty_thresh) : 0;
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323 if (*bdi_thresh < 2 * bdi_stat_error(bdi)) {
1324 bdi_reclaimable = bdi_stat_sum(bdi, BDI_RECLAIMABLE);
1325 *bdi_dirty = bdi_reclaimable +
1326 bdi_stat_sum(bdi, BDI_WRITEBACK);
1327 } else {
1328 bdi_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE);
1329 *bdi_dirty = bdi_reclaimable +
1330 bdi_stat(bdi, BDI_WRITEBACK);
1331 }
1332}
1333
1334
1335
1336
1337
1338
1339
1340
1341static void balance_dirty_pages(struct address_space *mapping,
1342 unsigned long pages_dirtied)
1343{
1344 unsigned long nr_reclaimable;
1345 unsigned long nr_dirty;
1346 unsigned long background_thresh;
1347 unsigned long dirty_thresh;
1348 long period;
1349 long pause;
1350 long max_pause;
1351 long min_pause;
1352 int nr_dirtied_pause;
1353 bool dirty_exceeded = false;
1354 unsigned long task_ratelimit;
1355 unsigned long dirty_ratelimit;
1356 unsigned long pos_ratio;
1357 struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
1358 bool strictlimit = bdi->capabilities & BDI_CAP_STRICTLIMIT;
1359 unsigned long start_time = jiffies;
1360
1361 for (;;) {
1362 unsigned long now = jiffies;
1363 unsigned long uninitialized_var(bdi_thresh);
1364 unsigned long thresh;
1365 unsigned long uninitialized_var(bdi_dirty);
1366 unsigned long dirty;
1367 unsigned long bg_thresh;
1368
1369
1370
1371
1372
1373
1374
1375 nr_reclaimable = global_page_state(NR_FILE_DIRTY) +
1376 global_page_state(NR_UNSTABLE_NFS);
1377 nr_dirty = nr_reclaimable + global_page_state(NR_WRITEBACK);
1378
1379 global_dirty_limits(&background_thresh, &dirty_thresh);
1380
1381 if (unlikely(strictlimit)) {
1382 bdi_dirty_limits(bdi, dirty_thresh, background_thresh,
1383 &bdi_dirty, &bdi_thresh, &bg_thresh);
1384
1385 dirty = bdi_dirty;
1386 thresh = bdi_thresh;
1387 } else {
1388 dirty = nr_dirty;
1389 thresh = dirty_thresh;
1390 bg_thresh = background_thresh;
1391 }
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402 if (dirty <= dirty_freerun_ceiling(thresh, bg_thresh)) {
1403 current->dirty_paused_when = now;
1404 current->nr_dirtied = 0;
1405 current->nr_dirtied_pause =
1406 dirty_poll_interval(dirty, thresh);
1407 break;
1408 }
1409
1410 if (unlikely(!writeback_in_progress(bdi)))
1411 bdi_start_background_writeback(bdi);
1412
1413 if (!strictlimit)
1414 bdi_dirty_limits(bdi, dirty_thresh, background_thresh,
1415 &bdi_dirty, &bdi_thresh, NULL);
1416
1417 dirty_exceeded = (bdi_dirty > bdi_thresh) &&
1418 ((nr_dirty > dirty_thresh) || strictlimit);
1419 if (dirty_exceeded && !bdi->dirty_exceeded)
1420 bdi->dirty_exceeded = 1;
1421
1422 bdi_update_bandwidth(bdi, dirty_thresh, background_thresh,
1423 nr_dirty, bdi_thresh, bdi_dirty,
1424 start_time);
1425
1426 dirty_ratelimit = bdi->dirty_ratelimit;
1427 pos_ratio = bdi_position_ratio(bdi, dirty_thresh,
1428 background_thresh, nr_dirty,
1429 bdi_thresh, bdi_dirty);
1430 task_ratelimit = ((u64)dirty_ratelimit * pos_ratio) >>
1431 RATELIMIT_CALC_SHIFT;
1432 max_pause = bdi_max_pause(bdi, bdi_dirty);
1433 min_pause = bdi_min_pause(bdi, max_pause,
1434 task_ratelimit, dirty_ratelimit,
1435 &nr_dirtied_pause);
1436
1437 if (unlikely(task_ratelimit == 0)) {
1438 period = max_pause;
1439 pause = max_pause;
1440 goto pause;
1441 }
1442 period = HZ * pages_dirtied / task_ratelimit;
1443 pause = period;
1444 if (current->dirty_paused_when)
1445 pause -= now - current->dirty_paused_when;
1446
1447
1448
1449
1450
1451
1452
1453 if (pause < min_pause) {
1454 trace_balance_dirty_pages(bdi,
1455 dirty_thresh,
1456 background_thresh,
1457 nr_dirty,
1458 bdi_thresh,
1459 bdi_dirty,
1460 dirty_ratelimit,
1461 task_ratelimit,
1462 pages_dirtied,
1463 period,
1464 min(pause, 0L),
1465 start_time);
1466 if (pause < -HZ) {
1467 current->dirty_paused_when = now;
1468 current->nr_dirtied = 0;
1469 } else if (period) {
1470 current->dirty_paused_when += period;
1471 current->nr_dirtied = 0;
1472 } else if (current->nr_dirtied_pause <= pages_dirtied)
1473 current->nr_dirtied_pause += pages_dirtied;
1474 break;
1475 }
1476 if (unlikely(pause > max_pause)) {
1477
1478 now += min(pause - max_pause, max_pause);
1479 pause = max_pause;
1480 }
1481
1482pause:
1483 trace_balance_dirty_pages(bdi,
1484 dirty_thresh,
1485 background_thresh,
1486 nr_dirty,
1487 bdi_thresh,
1488 bdi_dirty,
1489 dirty_ratelimit,
1490 task_ratelimit,
1491 pages_dirtied,
1492 period,
1493 pause,
1494 start_time);
1495 __set_current_state(TASK_KILLABLE);
1496 io_schedule_timeout(pause);
1497
1498 current->dirty_paused_when = now + pause;
1499 current->nr_dirtied = 0;
1500 current->nr_dirtied_pause = nr_dirtied_pause;
1501
1502
1503
1504
1505
1506 if (task_ratelimit)
1507 break;
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519 if (bdi_dirty <= bdi_stat_error(bdi))
1520 break;
1521
1522 if (fatal_signal_pending(current))
1523 break;
1524 }
1525
1526 if (!dirty_exceeded && bdi->dirty_exceeded)
1527 bdi->dirty_exceeded = 0;
1528
1529 if (writeback_in_progress(bdi))
1530 return;
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540 if (laptop_mode)
1541 return;
1542
1543 if (nr_reclaimable > background_thresh)
1544 bdi_start_background_writeback(bdi);
1545}
1546
1547static DEFINE_PER_CPU(int, bdp_ratelimits);
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563DEFINE_PER_CPU(int, dirty_throttle_leaks) = 0;
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578void balance_dirty_pages_ratelimited(struct address_space *mapping)
1579{
1580 struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
1581 int ratelimit;
1582 int *p;
1583
1584 if (!bdi_cap_account_dirty(bdi))
1585 return;
1586
1587 ratelimit = current->nr_dirtied_pause;
1588 if (bdi->dirty_exceeded)
1589 ratelimit = min(ratelimit, 32 >> (PAGE_SHIFT - 10));
1590
1591 preempt_disable();
1592
1593
1594
1595
1596
1597
1598 p = this_cpu_ptr(&bdp_ratelimits);
1599 if (unlikely(current->nr_dirtied >= ratelimit))
1600 *p = 0;
1601 else if (unlikely(*p >= ratelimit_pages)) {
1602 *p = 0;
1603 ratelimit = 0;
1604 }
1605
1606
1607
1608
1609
1610 p = this_cpu_ptr(&dirty_throttle_leaks);
1611 if (*p > 0 && current->nr_dirtied < ratelimit) {
1612 unsigned long nr_pages_dirtied;
1613 nr_pages_dirtied = min(*p, ratelimit - current->nr_dirtied);
1614 *p -= nr_pages_dirtied;
1615 current->nr_dirtied += nr_pages_dirtied;
1616 }
1617 preempt_enable();
1618
1619 if (unlikely(current->nr_dirtied >= ratelimit))
1620 balance_dirty_pages(mapping, current->nr_dirtied);
1621}
1622EXPORT_SYMBOL(balance_dirty_pages_ratelimited);
1623
1624void throttle_vm_writeout(gfp_t gfp_mask)
1625{
1626 unsigned long background_thresh;
1627 unsigned long dirty_thresh;
1628
1629 for ( ; ; ) {
1630 global_dirty_limits(&background_thresh, &dirty_thresh);
1631 dirty_thresh = hard_dirty_limit(dirty_thresh);
1632
1633
1634
1635
1636
1637 dirty_thresh += dirty_thresh / 10;
1638
1639 if (global_page_state(NR_UNSTABLE_NFS) +
1640 global_page_state(NR_WRITEBACK) <= dirty_thresh)
1641 break;
1642 congestion_wait(BLK_RW_ASYNC, HZ/10);
1643
1644
1645
1646
1647
1648
1649 if ((gfp_mask & (__GFP_FS|__GFP_IO)) != (__GFP_FS|__GFP_IO))
1650 break;
1651 }
1652}
1653
1654
1655
1656
1657int dirty_writeback_centisecs_handler(struct ctl_table *table, int write,
1658 void __user *buffer, size_t *length, loff_t *ppos)
1659{
1660 proc_dointvec(table, write, buffer, length, ppos);
1661 return 0;
1662}
1663
1664#ifdef CONFIG_BLOCK
1665void laptop_mode_timer_fn(unsigned long data)
1666{
1667 struct request_queue *q = (struct request_queue *)data;
1668 int nr_pages = global_page_state(NR_FILE_DIRTY) +
1669 global_page_state(NR_UNSTABLE_NFS);
1670
1671
1672
1673
1674
1675 if (bdi_has_dirty_io(&q->backing_dev_info))
1676 bdi_start_writeback(&q->backing_dev_info, nr_pages,
1677 WB_REASON_LAPTOP_TIMER);
1678}
1679
1680
1681
1682
1683
1684
1685void laptop_io_completion(struct backing_dev_info *info)
1686{
1687 mod_timer(&info->laptop_mode_wb_timer, jiffies + laptop_mode);
1688}
1689
1690
1691
1692
1693
1694
1695void laptop_sync_completion(void)
1696{
1697 struct backing_dev_info *bdi;
1698
1699 rcu_read_lock();
1700
1701 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list)
1702 del_timer(&bdi->laptop_mode_wb_timer);
1703
1704 rcu_read_unlock();
1705}
1706#endif
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719void writeback_set_ratelimit(void)
1720{
1721 unsigned long background_thresh;
1722 unsigned long dirty_thresh;
1723 global_dirty_limits(&background_thresh, &dirty_thresh);
1724 global_dirty_limit = dirty_thresh;
1725 ratelimit_pages = dirty_thresh / (num_online_cpus() * 32);
1726 if (ratelimit_pages < 16)
1727 ratelimit_pages = 16;
1728}
1729
1730static int
1731ratelimit_handler(struct notifier_block *self, unsigned long action,
1732 void *hcpu)
1733{
1734
1735 switch (action & ~CPU_TASKS_FROZEN) {
1736 case CPU_ONLINE:
1737 case CPU_DEAD:
1738 writeback_set_ratelimit();
1739 return NOTIFY_OK;
1740 default:
1741 return NOTIFY_DONE;
1742 }
1743}
1744
1745static struct notifier_block ratelimit_nb = {
1746 .notifier_call = ratelimit_handler,
1747 .next = NULL,
1748};
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768void __init page_writeback_init(void)
1769{
1770 writeback_set_ratelimit();
1771 register_cpu_notifier(&ratelimit_nb);
1772
1773 fprop_global_init(&writeout_completions, GFP_KERNEL);
1774}
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793void tag_pages_for_writeback(struct address_space *mapping,
1794 pgoff_t start, pgoff_t end)
1795{
1796#define WRITEBACK_TAG_BATCH 4096
1797 unsigned long tagged;
1798
1799 do {
1800 spin_lock_irq(&mapping->tree_lock);
1801 tagged = radix_tree_range_tag_if_tagged(&mapping->page_tree,
1802 &start, end, WRITEBACK_TAG_BATCH,
1803 PAGECACHE_TAG_DIRTY, PAGECACHE_TAG_TOWRITE);
1804 spin_unlock_irq(&mapping->tree_lock);
1805 WARN_ON_ONCE(tagged > WRITEBACK_TAG_BATCH);
1806 cond_resched();
1807
1808 } while (tagged >= WRITEBACK_TAG_BATCH && start);
1809}
1810EXPORT_SYMBOL(tag_pages_for_writeback);
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834int write_cache_pages(struct address_space *mapping,
1835 struct writeback_control *wbc, writepage_t writepage,
1836 void *data)
1837{
1838 int ret = 0;
1839 int done = 0;
1840 struct pagevec pvec;
1841 int nr_pages;
1842 pgoff_t uninitialized_var(writeback_index);
1843 pgoff_t index;
1844 pgoff_t end;
1845 pgoff_t done_index;
1846 int cycled;
1847 int range_whole = 0;
1848 int tag;
1849
1850 pagevec_init(&pvec, 0);
1851 if (wbc->range_cyclic) {
1852 writeback_index = mapping->writeback_index;
1853 index = writeback_index;
1854 if (index == 0)
1855 cycled = 1;
1856 else
1857 cycled = 0;
1858 end = -1;
1859 } else {
1860 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1861 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1862 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1863 range_whole = 1;
1864 cycled = 1;
1865 }
1866 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
1867 tag = PAGECACHE_TAG_TOWRITE;
1868 else
1869 tag = PAGECACHE_TAG_DIRTY;
1870retry:
1871 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
1872 tag_pages_for_writeback(mapping, index, end);
1873 done_index = index;
1874 while (!done && (index <= end)) {
1875 int i;
1876
1877 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
1878 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
1879 if (nr_pages == 0)
1880 break;
1881
1882 for (i = 0; i < nr_pages; i++) {
1883 struct page *page = pvec.pages[i];
1884
1885
1886
1887
1888
1889
1890
1891
1892 if (page->index > end) {
1893
1894
1895
1896
1897 done = 1;
1898 break;
1899 }
1900
1901 done_index = page->index;
1902
1903 lock_page(page);
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913 if (unlikely(page->mapping != mapping)) {
1914continue_unlock:
1915 unlock_page(page);
1916 continue;
1917 }
1918
1919 if (!PageDirty(page)) {
1920
1921 goto continue_unlock;
1922 }
1923
1924 if (PageWriteback(page)) {
1925 if (wbc->sync_mode != WB_SYNC_NONE)
1926 wait_on_page_writeback(page);
1927 else
1928 goto continue_unlock;
1929 }
1930
1931 BUG_ON(PageWriteback(page));
1932 if (!clear_page_dirty_for_io(page))
1933 goto continue_unlock;
1934
1935 trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
1936 ret = (*writepage)(page, wbc, data);
1937 if (unlikely(ret)) {
1938 if (ret == AOP_WRITEPAGE_ACTIVATE) {
1939 unlock_page(page);
1940 ret = 0;
1941 } else {
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951 done_index = page->index + 1;
1952 done = 1;
1953 break;
1954 }
1955 }
1956
1957
1958
1959
1960
1961
1962
1963 if (--wbc->nr_to_write <= 0 &&
1964 wbc->sync_mode == WB_SYNC_NONE) {
1965 done = 1;
1966 break;
1967 }
1968 }
1969 pagevec_release(&pvec);
1970 cond_resched();
1971 }
1972 if (!cycled && !done) {
1973
1974
1975
1976
1977
1978 cycled = 1;
1979 index = 0;
1980 end = writeback_index - 1;
1981 goto retry;
1982 }
1983 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1984 mapping->writeback_index = done_index;
1985
1986 return ret;
1987}
1988EXPORT_SYMBOL(write_cache_pages);
1989
1990
1991
1992
1993
1994static int __writepage(struct page *page, struct writeback_control *wbc,
1995 void *data)
1996{
1997 struct address_space *mapping = data;
1998 int ret = mapping->a_ops->writepage(page, wbc);
1999 mapping_set_error(mapping, ret);
2000 return ret;
2001}
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011int generic_writepages(struct address_space *mapping,
2012 struct writeback_control *wbc)
2013{
2014 struct blk_plug plug;
2015 int ret;
2016
2017
2018 if (!mapping->a_ops->writepage)
2019 return 0;
2020
2021 blk_start_plug(&plug);
2022 ret = write_cache_pages(mapping, wbc, __writepage, mapping);
2023 blk_finish_plug(&plug);
2024 return ret;
2025}
2026
2027EXPORT_SYMBOL(generic_writepages);
2028
2029int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
2030{
2031 int ret;
2032
2033 if (wbc->nr_to_write <= 0)
2034 return 0;
2035 if (mapping->a_ops->writepages)
2036 ret = mapping->a_ops->writepages(mapping, wbc);
2037 else
2038 ret = generic_writepages(mapping, wbc);
2039 return ret;
2040}
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051int write_one_page(struct page *page, int wait)
2052{
2053 struct address_space *mapping = page->mapping;
2054 int ret = 0;
2055 struct writeback_control wbc = {
2056 .sync_mode = WB_SYNC_ALL,
2057 .nr_to_write = 1,
2058 };
2059
2060 BUG_ON(!PageLocked(page));
2061
2062 if (wait)
2063 wait_on_page_writeback(page);
2064
2065 if (clear_page_dirty_for_io(page)) {
2066 page_cache_get(page);
2067 ret = mapping->a_ops->writepage(page, &wbc);
2068 if (ret == 0 && wait) {
2069 wait_on_page_writeback(page);
2070 if (PageError(page))
2071 ret = -EIO;
2072 }
2073 page_cache_release(page);
2074 } else {
2075 unlock_page(page);
2076 }
2077 return ret;
2078}
2079EXPORT_SYMBOL(write_one_page);
2080
2081
2082
2083
2084int __set_page_dirty_no_writeback(struct page *page)
2085{
2086 if (!PageDirty(page))
2087 return !TestSetPageDirty(page);
2088 return 0;
2089}
2090
2091
2092
2093
2094
2095void account_page_dirtied(struct page *page, struct address_space *mapping)
2096{
2097 trace_writeback_dirty_page(page, mapping);
2098
2099 if (mapping_cap_account_dirty(mapping)) {
2100 struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
2101
2102 __inc_zone_page_state(page, NR_FILE_DIRTY);
2103 __inc_zone_page_state(page, NR_DIRTIED);
2104 __inc_bdi_stat(bdi, BDI_RECLAIMABLE);
2105 __inc_bdi_stat(bdi, BDI_DIRTIED);
2106 task_io_account_write(PAGE_CACHE_SIZE);
2107 current->nr_dirtied++;
2108 this_cpu_inc(bdp_ratelimits);
2109 }
2110}
2111EXPORT_SYMBOL(account_page_dirtied);
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122void account_page_cleaned(struct page *page, struct address_space *mapping)
2123{
2124 if (mapping_cap_account_dirty(mapping)) {
2125 dec_zone_page_state(page, NR_FILE_DIRTY);
2126 dec_bdi_stat(inode_to_bdi(mapping->host), BDI_RECLAIMABLE);
2127 task_io_account_cancelled_write(PAGE_CACHE_SIZE);
2128 }
2129}
2130EXPORT_SYMBOL(account_page_cleaned);
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144int __set_page_dirty_nobuffers(struct page *page)
2145{
2146 if (!TestSetPageDirty(page)) {
2147 struct address_space *mapping = page_mapping(page);
2148 unsigned long flags;
2149
2150 if (!mapping)
2151 return 1;
2152
2153 spin_lock_irqsave(&mapping->tree_lock, flags);
2154 BUG_ON(page_mapping(page) != mapping);
2155 WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
2156 account_page_dirtied(page, mapping);
2157 radix_tree_tag_set(&mapping->page_tree, page_index(page),
2158 PAGECACHE_TAG_DIRTY);
2159 spin_unlock_irqrestore(&mapping->tree_lock, flags);
2160 if (mapping->host) {
2161
2162 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
2163 }
2164 return 1;
2165 }
2166 return 0;
2167}
2168EXPORT_SYMBOL(__set_page_dirty_nobuffers);
2169
2170
2171
2172
2173
2174
2175
2176
2177void account_page_redirty(struct page *page)
2178{
2179 struct address_space *mapping = page->mapping;
2180 if (mapping && mapping_cap_account_dirty(mapping)) {
2181 current->nr_dirtied--;
2182 dec_zone_page_state(page, NR_DIRTIED);
2183 dec_bdi_stat(inode_to_bdi(mapping->host), BDI_DIRTIED);
2184 }
2185}
2186EXPORT_SYMBOL(account_page_redirty);
2187
2188
2189
2190
2191
2192
2193int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page)
2194{
2195 int ret;
2196
2197 wbc->pages_skipped++;
2198 ret = __set_page_dirty_nobuffers(page);
2199 account_page_redirty(page);
2200 return ret;
2201}
2202EXPORT_SYMBOL(redirty_page_for_writepage);
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215int set_page_dirty(struct page *page)
2216{
2217 struct address_space *mapping = page_mapping(page);
2218
2219 if (likely(mapping)) {
2220 int (*spd)(struct page *) = mapping->a_ops->set_page_dirty;
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231 if (PageReclaim(page))
2232 ClearPageReclaim(page);
2233#ifdef CONFIG_BLOCK
2234 if (!spd)
2235 spd = __set_page_dirty_buffers;
2236#endif
2237 return (*spd)(page);
2238 }
2239 if (!PageDirty(page)) {
2240 if (!TestSetPageDirty(page))
2241 return 1;
2242 }
2243 return 0;
2244}
2245EXPORT_SYMBOL(set_page_dirty);
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257int set_page_dirty_lock(struct page *page)
2258{
2259 int ret;
2260
2261 lock_page(page);
2262 ret = set_page_dirty(page);
2263 unlock_page(page);
2264 return ret;
2265}
2266EXPORT_SYMBOL(set_page_dirty_lock);
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282int clear_page_dirty_for_io(struct page *page)
2283{
2284 struct address_space *mapping = page_mapping(page);
2285
2286 BUG_ON(!PageLocked(page));
2287
2288 if (mapping && mapping_cap_account_dirty(mapping)) {
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314 if (page_mkclean(page))
2315 set_page_dirty(page);
2316
2317
2318
2319
2320
2321
2322
2323
2324 if (TestClearPageDirty(page)) {
2325 dec_zone_page_state(page, NR_FILE_DIRTY);
2326 dec_bdi_stat(inode_to_bdi(mapping->host),
2327 BDI_RECLAIMABLE);
2328 return 1;
2329 }
2330 return 0;
2331 }
2332 return TestClearPageDirty(page);
2333}
2334EXPORT_SYMBOL(clear_page_dirty_for_io);
2335
2336int test_clear_page_writeback(struct page *page)
2337{
2338 struct address_space *mapping = page_mapping(page);
2339 struct mem_cgroup *memcg;
2340 int ret;
2341
2342 memcg = mem_cgroup_begin_page_stat(page);
2343 if (mapping) {
2344 struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
2345 unsigned long flags;
2346
2347 spin_lock_irqsave(&mapping->tree_lock, flags);
2348 ret = TestClearPageWriteback(page);
2349 if (ret) {
2350 radix_tree_tag_clear(&mapping->page_tree,
2351 page_index(page),
2352 PAGECACHE_TAG_WRITEBACK);
2353 if (bdi_cap_account_writeback(bdi)) {
2354 __dec_bdi_stat(bdi, BDI_WRITEBACK);
2355 __bdi_writeout_inc(bdi);
2356 }
2357 }
2358 spin_unlock_irqrestore(&mapping->tree_lock, flags);
2359 } else {
2360 ret = TestClearPageWriteback(page);
2361 }
2362 if (ret) {
2363 mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_WRITEBACK);
2364 dec_zone_page_state(page, NR_WRITEBACK);
2365 inc_zone_page_state(page, NR_WRITTEN);
2366 }
2367 mem_cgroup_end_page_stat(memcg);
2368 return ret;
2369}
2370
2371int __test_set_page_writeback(struct page *page, bool keep_write)
2372{
2373 struct address_space *mapping = page_mapping(page);
2374 struct mem_cgroup *memcg;
2375 int ret;
2376
2377 memcg = mem_cgroup_begin_page_stat(page);
2378 if (mapping) {
2379 struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
2380 unsigned long flags;
2381
2382 spin_lock_irqsave(&mapping->tree_lock, flags);
2383 ret = TestSetPageWriteback(page);
2384 if (!ret) {
2385 radix_tree_tag_set(&mapping->page_tree,
2386 page_index(page),
2387 PAGECACHE_TAG_WRITEBACK);
2388 if (bdi_cap_account_writeback(bdi))
2389 __inc_bdi_stat(bdi, BDI_WRITEBACK);
2390 }
2391 if (!PageDirty(page))
2392 radix_tree_tag_clear(&mapping->page_tree,
2393 page_index(page),
2394 PAGECACHE_TAG_DIRTY);
2395 if (!keep_write)
2396 radix_tree_tag_clear(&mapping->page_tree,
2397 page_index(page),
2398 PAGECACHE_TAG_TOWRITE);
2399 spin_unlock_irqrestore(&mapping->tree_lock, flags);
2400 } else {
2401 ret = TestSetPageWriteback(page);
2402 }
2403 if (!ret) {
2404 mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_WRITEBACK);
2405 inc_zone_page_state(page, NR_WRITEBACK);
2406 }
2407 mem_cgroup_end_page_stat(memcg);
2408 return ret;
2409
2410}
2411EXPORT_SYMBOL(__test_set_page_writeback);
2412
2413
2414
2415
2416
2417int mapping_tagged(struct address_space *mapping, int tag)
2418{
2419 return radix_tree_tagged(&mapping->page_tree, tag);
2420}
2421EXPORT_SYMBOL(mapping_tagged);
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431void wait_for_stable_page(struct page *page)
2432{
2433 if (bdi_cap_stable_pages_required(inode_to_bdi(page->mapping->host)))
2434 wait_on_page_writeback(page);
2435}
2436EXPORT_SYMBOL_GPL(wait_for_stable_page);
2437