1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#include <linux/kernel.h>
17#include <linux/export.h>
18#include <linux/spinlock.h>
19#include <linux/slab.h>
20#include <linux/sched.h>
21#include <linux/fs.h>
22#include <linux/mm.h>
23#include <linux/pagemap.h>
24#include <linux/kthread.h>
25#include <linux/writeback.h>
26#include <linux/blkdev.h>
27#include <linux/backing-dev.h>
28#include <linux/tracepoint.h>
29#include <linux/device.h>
30#include "internal.h"
31
32
33
34
35#define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_CACHE_SHIFT - 10))
36
37
38
39
40struct wb_writeback_work {
41 long nr_pages;
42 struct super_block *sb;
43 unsigned long *older_than_this;
44 enum writeback_sync_modes sync_mode;
45 unsigned int tagged_writepages:1;
46 unsigned int for_kupdate:1;
47 unsigned int range_cyclic:1;
48 unsigned int for_background:1;
49 unsigned int for_sync:1;
50 enum wb_reason reason;
51
52 struct list_head list;
53 struct completion *done;
54};
55
56
57
58
59
60
61
62
63int writeback_in_progress(struct backing_dev_info *bdi)
64{
65 return test_bit(BDI_writeback_running, &bdi->state);
66}
67EXPORT_SYMBOL(writeback_in_progress);
68
69static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
70{
71 struct super_block *sb = inode->i_sb;
72
73 if (sb_is_blkdev_sb(sb))
74 return inode->i_mapping->backing_dev_info;
75
76 return sb->s_bdi;
77}
78
79static inline struct inode *wb_inode(struct list_head *head)
80{
81 return list_entry(head, struct inode, i_wb_list);
82}
83
84
85
86
87
88
89#define CREATE_TRACE_POINTS
90#include <trace/events/writeback.h>
91
92EXPORT_TRACEPOINT_SYMBOL_GPL(wbc_writepage);
93
94static void bdi_wakeup_thread(struct backing_dev_info *bdi)
95{
96 spin_lock_bh(&bdi->wb_lock);
97 if (test_bit(BDI_registered, &bdi->state))
98 mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
99 spin_unlock_bh(&bdi->wb_lock);
100}
101
102static void bdi_queue_work(struct backing_dev_info *bdi,
103 struct wb_writeback_work *work)
104{
105 trace_writeback_queue(bdi, work);
106
107 spin_lock_bh(&bdi->wb_lock);
108 if (!test_bit(BDI_registered, &bdi->state)) {
109 if (work->done)
110 complete(work->done);
111 goto out_unlock;
112 }
113 list_add_tail(&work->list, &bdi->work_list);
114 mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
115out_unlock:
116 spin_unlock_bh(&bdi->wb_lock);
117}
118
119static void
120__bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
121 bool range_cyclic, enum wb_reason reason)
122{
123 struct wb_writeback_work *work;
124
125
126
127
128
129 work = kzalloc(sizeof(*work), GFP_ATOMIC);
130 if (!work) {
131 trace_writeback_nowork(bdi);
132 bdi_wakeup_thread(bdi);
133 return;
134 }
135
136 work->sync_mode = WB_SYNC_NONE;
137 work->nr_pages = nr_pages;
138 work->range_cyclic = range_cyclic;
139 work->reason = reason;
140
141 bdi_queue_work(bdi, work);
142}
143
144
145
146
147
148
149
150
151
152
153
154
155
156void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
157 enum wb_reason reason)
158{
159 __bdi_start_writeback(bdi, nr_pages, true, reason);
160}
161
162
163
164
165
166
167
168
169
170
171
172void bdi_start_background_writeback(struct backing_dev_info *bdi)
173{
174
175
176
177
178 trace_writeback_wake_background(bdi);
179 bdi_wakeup_thread(bdi);
180}
181
182
183
184
185void inode_wb_list_del(struct inode *inode)
186{
187 struct backing_dev_info *bdi = inode_to_bdi(inode);
188
189 spin_lock(&bdi->wb.list_lock);
190 list_del_init(&inode->i_wb_list);
191 spin_unlock(&bdi->wb.list_lock);
192}
193
194
195
196
197
198
199
200
201
202
203static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
204{
205 assert_spin_locked(&wb->list_lock);
206 if (!list_empty(&wb->b_dirty)) {
207 struct inode *tail;
208
209 tail = wb_inode(wb->b_dirty.next);
210 if (time_before(inode->dirtied_when, tail->dirtied_when))
211 inode->dirtied_when = jiffies;
212 }
213 list_move(&inode->i_wb_list, &wb->b_dirty);
214}
215
216
217
218
219static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
220{
221 assert_spin_locked(&wb->list_lock);
222 list_move(&inode->i_wb_list, &wb->b_more_io);
223}
224
225static void inode_sync_complete(struct inode *inode)
226{
227 inode->i_state &= ~I_SYNC;
228
229 inode_add_lru(inode);
230
231 smp_mb();
232 wake_up_bit(&inode->i_state, __I_SYNC);
233}
234
235static bool inode_dirtied_after(struct inode *inode, unsigned long t)
236{
237 bool ret = time_after(inode->dirtied_when, t);
238#ifndef CONFIG_64BIT
239
240
241
242
243
244
245 ret = ret && time_before_eq(inode->dirtied_when, jiffies);
246#endif
247 return ret;
248}
249
250
251
252
253
254static int move_expired_inodes(struct list_head *delaying_queue,
255 struct list_head *dispatch_queue,
256 struct wb_writeback_work *work)
257{
258 LIST_HEAD(tmp);
259 struct list_head *pos, *node;
260 struct super_block *sb = NULL;
261 struct inode *inode;
262 int do_sb_sort = 0;
263 int moved = 0;
264
265 while (!list_empty(delaying_queue)) {
266 inode = wb_inode(delaying_queue->prev);
267 if (work->older_than_this &&
268 inode_dirtied_after(inode, *work->older_than_this))
269 break;
270 list_move(&inode->i_wb_list, &tmp);
271 moved++;
272 if (sb_is_blkdev_sb(inode->i_sb))
273 continue;
274 if (sb && sb != inode->i_sb)
275 do_sb_sort = 1;
276 sb = inode->i_sb;
277 }
278
279
280 if (!do_sb_sort) {
281 list_splice(&tmp, dispatch_queue);
282 goto out;
283 }
284
285
286 while (!list_empty(&tmp)) {
287 sb = wb_inode(tmp.prev)->i_sb;
288 list_for_each_prev_safe(pos, node, &tmp) {
289 inode = wb_inode(pos);
290 if (inode->i_sb == sb)
291 list_move(&inode->i_wb_list, dispatch_queue);
292 }
293 }
294out:
295 return moved;
296}
297
298
299
300
301
302
303
304
305
306
307
308
309static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work)
310{
311 int moved;
312 assert_spin_locked(&wb->list_lock);
313 list_splice_init(&wb->b_more_io, &wb->b_io);
314 moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, work);
315 trace_writeback_queue_io(wb, work, moved);
316}
317
318static int write_inode(struct inode *inode, struct writeback_control *wbc)
319{
320 int ret;
321
322 if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) {
323 trace_writeback_write_inode_start(inode, wbc);
324 ret = inode->i_sb->s_op->write_inode(inode, wbc);
325 trace_writeback_write_inode(inode, wbc);
326 return ret;
327 }
328 return 0;
329}
330
331
332
333
334
335static void __inode_wait_for_writeback(struct inode *inode)
336 __releases(inode->i_lock)
337 __acquires(inode->i_lock)
338{
339 DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
340 wait_queue_head_t *wqh;
341
342 wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
343 while (inode->i_state & I_SYNC) {
344 spin_unlock(&inode->i_lock);
345 __wait_on_bit(wqh, &wq, bit_wait,
346 TASK_UNINTERRUPTIBLE);
347 spin_lock(&inode->i_lock);
348 }
349}
350
351
352
353
354void inode_wait_for_writeback(struct inode *inode)
355{
356 spin_lock(&inode->i_lock);
357 __inode_wait_for_writeback(inode);
358 spin_unlock(&inode->i_lock);
359}
360
361
362
363
364
365
366static void inode_sleep_on_writeback(struct inode *inode)
367 __releases(inode->i_lock)
368{
369 DEFINE_WAIT(wait);
370 wait_queue_head_t *wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
371 int sleep;
372
373 prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
374 sleep = inode->i_state & I_SYNC;
375 spin_unlock(&inode->i_lock);
376 if (sleep)
377 schedule();
378 finish_wait(wqh, &wait);
379}
380
381
382
383
384
385
386
387
388
389static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
390 struct writeback_control *wbc)
391{
392 if (inode->i_state & I_FREEING)
393 return;
394
395
396
397
398
399
400 if ((inode->i_state & I_DIRTY) &&
401 (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages))
402 inode->dirtied_when = jiffies;
403
404 if (wbc->pages_skipped) {
405
406
407
408
409 redirty_tail(inode, wb);
410 return;
411 }
412
413 if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
414
415
416
417
418 if (wbc->nr_to_write <= 0) {
419
420 requeue_io(inode, wb);
421 } else {
422
423
424
425
426
427
428
429 redirty_tail(inode, wb);
430 }
431 } else if (inode->i_state & I_DIRTY) {
432
433
434
435
436
437 redirty_tail(inode, wb);
438 } else {
439
440 list_del_init(&inode->i_wb_list);
441 }
442}
443
444
445
446
447
448
449static int
450__writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
451{
452 struct address_space *mapping = inode->i_mapping;
453 long nr_to_write = wbc->nr_to_write;
454 unsigned dirty;
455 int ret;
456
457 WARN_ON(!(inode->i_state & I_SYNC));
458
459 trace_writeback_single_inode_start(inode, wbc, nr_to_write);
460
461 ret = do_writepages(mapping, wbc);
462
463
464
465
466
467
468
469
470 if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) {
471 int err = filemap_fdatawait(mapping);
472 if (ret == 0)
473 ret = err;
474 }
475
476
477
478
479
480
481 spin_lock(&inode->i_lock);
482
483 if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
484 inode->i_state &= ~I_DIRTY_PAGES;
485 dirty = inode->i_state & I_DIRTY;
486 inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
487 spin_unlock(&inode->i_lock);
488
489 if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
490 int err = write_inode(inode, wbc);
491 if (ret == 0)
492 ret = err;
493 }
494 trace_writeback_single_inode(inode, wbc, nr_to_write);
495 return ret;
496}
497
498
499
500
501
502
503
504
505
506static int
507writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
508 struct writeback_control *wbc)
509{
510 int ret = 0;
511
512 spin_lock(&inode->i_lock);
513 if (!atomic_read(&inode->i_count))
514 WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
515 else
516 WARN_ON(inode->i_state & I_WILL_FREE);
517
518 if (inode->i_state & I_SYNC) {
519 if (wbc->sync_mode != WB_SYNC_ALL)
520 goto out;
521
522
523
524
525
526 __inode_wait_for_writeback(inode);
527 }
528 WARN_ON(inode->i_state & I_SYNC);
529
530
531
532
533
534
535
536
537 if (!(inode->i_state & I_DIRTY) &&
538 (wbc->sync_mode != WB_SYNC_ALL ||
539 !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK)))
540 goto out;
541 inode->i_state |= I_SYNC;
542 spin_unlock(&inode->i_lock);
543
544 ret = __writeback_single_inode(inode, wbc);
545
546 spin_lock(&wb->list_lock);
547 spin_lock(&inode->i_lock);
548
549
550
551
552 if (!(inode->i_state & I_DIRTY))
553 list_del_init(&inode->i_wb_list);
554 spin_unlock(&wb->list_lock);
555 inode_sync_complete(inode);
556out:
557 spin_unlock(&inode->i_lock);
558 return ret;
559}
560
561static long writeback_chunk_size(struct backing_dev_info *bdi,
562 struct wb_writeback_work *work)
563{
564 long pages;
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579 if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages)
580 pages = LONG_MAX;
581 else {
582 pages = min(bdi->avg_write_bandwidth / 2,
583 global_dirty_limit / DIRTY_SCOPE);
584 pages = min(pages, work->nr_pages);
585 pages = round_down(pages + MIN_WRITEBACK_PAGES,
586 MIN_WRITEBACK_PAGES);
587 }
588
589 return pages;
590}
591
592
593
594
595
596
597static long writeback_sb_inodes(struct super_block *sb,
598 struct bdi_writeback *wb,
599 struct wb_writeback_work *work)
600{
601 struct writeback_control wbc = {
602 .sync_mode = work->sync_mode,
603 .tagged_writepages = work->tagged_writepages,
604 .for_kupdate = work->for_kupdate,
605 .for_background = work->for_background,
606 .for_sync = work->for_sync,
607 .range_cyclic = work->range_cyclic,
608 .range_start = 0,
609 .range_end = LLONG_MAX,
610 };
611 unsigned long start_time = jiffies;
612 long write_chunk;
613 long wrote = 0;
614
615 while (!list_empty(&wb->b_io)) {
616 struct inode *inode = wb_inode(wb->b_io.prev);
617
618 if (inode->i_sb != sb) {
619 if (work->sb) {
620
621
622
623
624
625 redirty_tail(inode, wb);
626 continue;
627 }
628
629
630
631
632
633
634 break;
635 }
636
637
638
639
640
641
642 spin_lock(&inode->i_lock);
643 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
644 spin_unlock(&inode->i_lock);
645 redirty_tail(inode, wb);
646 continue;
647 }
648 if ((inode->i_state & I_SYNC) && wbc.sync_mode != WB_SYNC_ALL) {
649
650
651
652
653
654
655
656
657
658 spin_unlock(&inode->i_lock);
659 requeue_io(inode, wb);
660 trace_writeback_sb_inodes_requeue(inode);
661 continue;
662 }
663 spin_unlock(&wb->list_lock);
664
665
666
667
668
669
670 if (inode->i_state & I_SYNC) {
671
672 inode_sleep_on_writeback(inode);
673
674 spin_lock(&wb->list_lock);
675 continue;
676 }
677 inode->i_state |= I_SYNC;
678 spin_unlock(&inode->i_lock);
679
680 write_chunk = writeback_chunk_size(wb->bdi, work);
681 wbc.nr_to_write = write_chunk;
682 wbc.pages_skipped = 0;
683
684
685
686
687
688 __writeback_single_inode(inode, &wbc);
689
690 work->nr_pages -= write_chunk - wbc.nr_to_write;
691 wrote += write_chunk - wbc.nr_to_write;
692 spin_lock(&wb->list_lock);
693 spin_lock(&inode->i_lock);
694 if (!(inode->i_state & I_DIRTY))
695 wrote++;
696 requeue_inode(inode, wb, &wbc);
697 inode_sync_complete(inode);
698 spin_unlock(&inode->i_lock);
699 cond_resched_lock(&wb->list_lock);
700
701
702
703
704 if (wrote) {
705 if (time_is_before_jiffies(start_time + HZ / 10UL))
706 break;
707 if (work->nr_pages <= 0)
708 break;
709 }
710 }
711 return wrote;
712}
713
714static long __writeback_inodes_wb(struct bdi_writeback *wb,
715 struct wb_writeback_work *work)
716{
717 unsigned long start_time = jiffies;
718 long wrote = 0;
719
720 while (!list_empty(&wb->b_io)) {
721 struct inode *inode = wb_inode(wb->b_io.prev);
722 struct super_block *sb = inode->i_sb;
723
724 if (!grab_super_passive(sb)) {
725
726
727
728
729
730 redirty_tail(inode, wb);
731 continue;
732 }
733 wrote += writeback_sb_inodes(sb, wb, work);
734 drop_super(sb);
735
736
737 if (wrote) {
738 if (time_is_before_jiffies(start_time + HZ / 10UL))
739 break;
740 if (work->nr_pages <= 0)
741 break;
742 }
743 }
744
745 return wrote;
746}
747
748static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
749 enum wb_reason reason)
750{
751 struct wb_writeback_work work = {
752 .nr_pages = nr_pages,
753 .sync_mode = WB_SYNC_NONE,
754 .range_cyclic = 1,
755 .reason = reason,
756 };
757
758 spin_lock(&wb->list_lock);
759 if (list_empty(&wb->b_io))
760 queue_io(wb, &work);
761 __writeback_inodes_wb(wb, &work);
762 spin_unlock(&wb->list_lock);
763
764 return nr_pages - work.nr_pages;
765}
766
767static bool over_bground_thresh(struct backing_dev_info *bdi)
768{
769 unsigned long background_thresh, dirty_thresh;
770
771 global_dirty_limits(&background_thresh, &dirty_thresh);
772
773 if (global_page_state(NR_FILE_DIRTY) +
774 global_page_state(NR_UNSTABLE_NFS) > background_thresh)
775 return true;
776
777 if (bdi_stat(bdi, BDI_RECLAIMABLE) >
778 bdi_dirty_limit(bdi, background_thresh))
779 return true;
780
781 return false;
782}
783
784
785
786
787
788static void wb_update_bandwidth(struct bdi_writeback *wb,
789 unsigned long start_time)
790{
791 __bdi_update_bandwidth(wb->bdi, 0, 0, 0, 0, 0, start_time);
792}
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809static long wb_writeback(struct bdi_writeback *wb,
810 struct wb_writeback_work *work)
811{
812 unsigned long wb_start = jiffies;
813 long nr_pages = work->nr_pages;
814 unsigned long oldest_jif;
815 struct inode *inode;
816 long progress;
817
818 oldest_jif = jiffies;
819 work->older_than_this = &oldest_jif;
820
821 spin_lock(&wb->list_lock);
822 for (;;) {
823
824
825
826 if (work->nr_pages <= 0)
827 break;
828
829
830
831
832
833
834
835 if ((work->for_background || work->for_kupdate) &&
836 !list_empty(&wb->bdi->work_list))
837 break;
838
839
840
841
842
843 if (work->for_background && !over_bground_thresh(wb->bdi))
844 break;
845
846
847
848
849
850
851
852 if (work->for_kupdate) {
853 oldest_jif = jiffies -
854 msecs_to_jiffies(dirty_expire_interval * 10);
855 } else if (work->for_background)
856 oldest_jif = jiffies;
857
858 trace_writeback_start(wb->bdi, work);
859 if (list_empty(&wb->b_io))
860 queue_io(wb, work);
861 if (work->sb)
862 progress = writeback_sb_inodes(work->sb, wb, work);
863 else
864 progress = __writeback_inodes_wb(wb, work);
865 trace_writeback_written(wb->bdi, work);
866
867 wb_update_bandwidth(wb, wb_start);
868
869
870
871
872
873
874
875
876
877 if (progress)
878 continue;
879
880
881
882 if (list_empty(&wb->b_more_io))
883 break;
884
885
886
887
888
889 if (!list_empty(&wb->b_more_io)) {
890 trace_writeback_wait(wb->bdi, work);
891 inode = wb_inode(wb->b_more_io.prev);
892 spin_lock(&inode->i_lock);
893 spin_unlock(&wb->list_lock);
894
895 inode_sleep_on_writeback(inode);
896 spin_lock(&wb->list_lock);
897 }
898 }
899 spin_unlock(&wb->list_lock);
900
901 return nr_pages - work->nr_pages;
902}
903
904
905
906
907static struct wb_writeback_work *
908get_next_work_item(struct backing_dev_info *bdi)
909{
910 struct wb_writeback_work *work = NULL;
911
912 spin_lock_bh(&bdi->wb_lock);
913 if (!list_empty(&bdi->work_list)) {
914 work = list_entry(bdi->work_list.next,
915 struct wb_writeback_work, list);
916 list_del_init(&work->list);
917 }
918 spin_unlock_bh(&bdi->wb_lock);
919 return work;
920}
921
922
923
924
925
926static unsigned long get_nr_dirty_pages(void)
927{
928 return global_page_state(NR_FILE_DIRTY) +
929 global_page_state(NR_UNSTABLE_NFS) +
930 get_nr_dirty_inodes();
931}
932
933static long wb_check_background_flush(struct bdi_writeback *wb)
934{
935 if (over_bground_thresh(wb->bdi)) {
936
937 struct wb_writeback_work work = {
938 .nr_pages = LONG_MAX,
939 .sync_mode = WB_SYNC_NONE,
940 .for_background = 1,
941 .range_cyclic = 1,
942 .reason = WB_REASON_BACKGROUND,
943 };
944
945 return wb_writeback(wb, &work);
946 }
947
948 return 0;
949}
950
951static long wb_check_old_data_flush(struct bdi_writeback *wb)
952{
953 unsigned long expired;
954 long nr_pages;
955
956
957
958
959 if (!dirty_writeback_interval)
960 return 0;
961
962 expired = wb->last_old_flush +
963 msecs_to_jiffies(dirty_writeback_interval * 10);
964 if (time_before(jiffies, expired))
965 return 0;
966
967 wb->last_old_flush = jiffies;
968 nr_pages = get_nr_dirty_pages();
969
970 if (nr_pages) {
971 struct wb_writeback_work work = {
972 .nr_pages = nr_pages,
973 .sync_mode = WB_SYNC_NONE,
974 .for_kupdate = 1,
975 .range_cyclic = 1,
976 .reason = WB_REASON_PERIODIC,
977 };
978
979 return wb_writeback(wb, &work);
980 }
981
982 return 0;
983}
984
985
986
987
988static long wb_do_writeback(struct bdi_writeback *wb)
989{
990 struct backing_dev_info *bdi = wb->bdi;
991 struct wb_writeback_work *work;
992 long wrote = 0;
993
994 set_bit(BDI_writeback_running, &wb->bdi->state);
995 while ((work = get_next_work_item(bdi)) != NULL) {
996
997 trace_writeback_exec(bdi, work);
998
999 wrote += wb_writeback(wb, work);
1000
1001
1002
1003
1004
1005 if (work->done)
1006 complete(work->done);
1007 else
1008 kfree(work);
1009 }
1010
1011
1012
1013
1014 wrote += wb_check_old_data_flush(wb);
1015 wrote += wb_check_background_flush(wb);
1016 clear_bit(BDI_writeback_running, &wb->bdi->state);
1017
1018 return wrote;
1019}
1020
1021
1022
1023
1024
1025void bdi_writeback_workfn(struct work_struct *work)
1026{
1027 struct bdi_writeback *wb = container_of(to_delayed_work(work),
1028 struct bdi_writeback, dwork);
1029 struct backing_dev_info *bdi = wb->bdi;
1030 long pages_written;
1031
1032 set_worker_desc("flush-%s", dev_name(bdi->dev));
1033 current->flags |= PF_SWAPWRITE;
1034
1035 if (likely(!current_is_workqueue_rescuer() ||
1036 !test_bit(BDI_registered, &bdi->state))) {
1037
1038
1039
1040
1041
1042
1043 do {
1044 pages_written = wb_do_writeback(wb);
1045 trace_writeback_pages_written(pages_written);
1046 } while (!list_empty(&bdi->work_list));
1047 } else {
1048
1049
1050
1051
1052
1053 pages_written = writeback_inodes_wb(&bdi->wb, 1024,
1054 WB_REASON_FORKER_THREAD);
1055 trace_writeback_pages_written(pages_written);
1056 }
1057
1058 if (!list_empty(&bdi->work_list))
1059 mod_delayed_work(bdi_wq, &wb->dwork, 0);
1060 else if (wb_has_dirty_io(wb) && dirty_writeback_interval)
1061 bdi_wakeup_thread_delayed(bdi);
1062
1063 current->flags &= ~PF_SWAPWRITE;
1064}
1065
1066
1067
1068
1069
1070void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
1071{
1072 struct backing_dev_info *bdi;
1073
1074 if (!nr_pages)
1075 nr_pages = get_nr_dirty_pages();
1076
1077 rcu_read_lock();
1078 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
1079 if (!bdi_has_dirty_io(bdi))
1080 continue;
1081 __bdi_start_writeback(bdi, nr_pages, false, reason);
1082 }
1083 rcu_read_unlock();
1084}
1085
1086static noinline void block_dump___mark_inode_dirty(struct inode *inode)
1087{
1088 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
1089 struct dentry *dentry;
1090 const char *name = "?";
1091
1092 dentry = d_find_alias(inode);
1093 if (dentry) {
1094 spin_lock(&dentry->d_lock);
1095 name = (const char *) dentry->d_name.name;
1096 }
1097 printk(KERN_DEBUG
1098 "%s(%d): dirtied inode %lu (%s) on %s\n",
1099 current->comm, task_pid_nr(current), inode->i_ino,
1100 name, inode->i_sb->s_id);
1101 if (dentry) {
1102 spin_unlock(&dentry->d_lock);
1103 dput(dentry);
1104 }
1105 }
1106}
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132void __mark_inode_dirty(struct inode *inode, int flags)
1133{
1134 struct super_block *sb = inode->i_sb;
1135 struct backing_dev_info *bdi = NULL;
1136
1137
1138
1139
1140
1141 if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
1142 trace_writeback_dirty_inode_start(inode, flags);
1143
1144 if (sb->s_op->dirty_inode)
1145 sb->s_op->dirty_inode(inode, flags);
1146
1147 trace_writeback_dirty_inode(inode, flags);
1148 }
1149
1150
1151
1152
1153
1154 smp_mb();
1155
1156
1157 if ((inode->i_state & flags) == flags)
1158 return;
1159
1160 if (unlikely(block_dump))
1161 block_dump___mark_inode_dirty(inode);
1162
1163 spin_lock(&inode->i_lock);
1164 if ((inode->i_state & flags) != flags) {
1165 const int was_dirty = inode->i_state & I_DIRTY;
1166
1167 inode->i_state |= flags;
1168
1169
1170
1171
1172
1173
1174 if (inode->i_state & I_SYNC)
1175 goto out_unlock_inode;
1176
1177
1178
1179
1180
1181 if (!S_ISBLK(inode->i_mode)) {
1182 if (inode_unhashed(inode))
1183 goto out_unlock_inode;
1184 }
1185 if (inode->i_state & I_FREEING)
1186 goto out_unlock_inode;
1187
1188
1189
1190
1191
1192 if (!was_dirty) {
1193 bool wakeup_bdi = false;
1194 bdi = inode_to_bdi(inode);
1195
1196 spin_unlock(&inode->i_lock);
1197 spin_lock(&bdi->wb.list_lock);
1198 if (bdi_cap_writeback_dirty(bdi)) {
1199 WARN(!test_bit(BDI_registered, &bdi->state),
1200 "bdi-%s not registered\n", bdi->name);
1201
1202
1203
1204
1205
1206
1207
1208 if (!wb_has_dirty_io(&bdi->wb))
1209 wakeup_bdi = true;
1210 }
1211
1212 inode->dirtied_when = jiffies;
1213 list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
1214 spin_unlock(&bdi->wb.list_lock);
1215
1216 if (wakeup_bdi)
1217 bdi_wakeup_thread_delayed(bdi);
1218 return;
1219 }
1220 }
1221out_unlock_inode:
1222 spin_unlock(&inode->i_lock);
1223
1224}
1225EXPORT_SYMBOL(__mark_inode_dirty);
1226
1227static void wait_sb_inodes(struct super_block *sb)
1228{
1229 struct inode *inode, *old_inode = NULL;
1230
1231
1232
1233
1234
1235 WARN_ON(!rwsem_is_locked(&sb->s_umount));
1236
1237 spin_lock(&inode_sb_list_lock);
1238
1239
1240
1241
1242
1243
1244
1245
1246 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
1247 struct address_space *mapping = inode->i_mapping;
1248
1249 spin_lock(&inode->i_lock);
1250 if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
1251 (mapping->nrpages == 0)) {
1252 spin_unlock(&inode->i_lock);
1253 continue;
1254 }
1255 __iget(inode);
1256 spin_unlock(&inode->i_lock);
1257 spin_unlock(&inode_sb_list_lock);
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267 iput(old_inode);
1268 old_inode = inode;
1269
1270 filemap_fdatawait(mapping);
1271
1272 cond_resched();
1273
1274 spin_lock(&inode_sb_list_lock);
1275 }
1276 spin_unlock(&inode_sb_list_lock);
1277 iput(old_inode);
1278}
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290void writeback_inodes_sb_nr(struct super_block *sb,
1291 unsigned long nr,
1292 enum wb_reason reason)
1293{
1294 DECLARE_COMPLETION_ONSTACK(done);
1295 struct wb_writeback_work work = {
1296 .sb = sb,
1297 .sync_mode = WB_SYNC_NONE,
1298 .tagged_writepages = 1,
1299 .done = &done,
1300 .nr_pages = nr,
1301 .reason = reason,
1302 };
1303
1304 if (sb->s_bdi == &noop_backing_dev_info)
1305 return;
1306 WARN_ON(!rwsem_is_locked(&sb->s_umount));
1307 bdi_queue_work(sb->s_bdi, &work);
1308 wait_for_completion(&done);
1309}
1310EXPORT_SYMBOL(writeback_inodes_sb_nr);
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321void writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
1322{
1323 return writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason);
1324}
1325EXPORT_SYMBOL(writeback_inodes_sb);
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336int try_to_writeback_inodes_sb_nr(struct super_block *sb,
1337 unsigned long nr,
1338 enum wb_reason reason)
1339{
1340 if (writeback_in_progress(sb->s_bdi))
1341 return 1;
1342
1343 if (!down_read_trylock(&sb->s_umount))
1344 return 0;
1345
1346 writeback_inodes_sb_nr(sb, nr, reason);
1347 up_read(&sb->s_umount);
1348 return 1;
1349}
1350EXPORT_SYMBOL(try_to_writeback_inodes_sb_nr);
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360int try_to_writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
1361{
1362 return try_to_writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason);
1363}
1364EXPORT_SYMBOL(try_to_writeback_inodes_sb);
1365
1366
1367
1368
1369
1370
1371
1372
1373void sync_inodes_sb(struct super_block *sb)
1374{
1375 DECLARE_COMPLETION_ONSTACK(done);
1376 struct wb_writeback_work work = {
1377 .sb = sb,
1378 .sync_mode = WB_SYNC_ALL,
1379 .nr_pages = LONG_MAX,
1380 .range_cyclic = 0,
1381 .done = &done,
1382 .reason = WB_REASON_SYNC,
1383 .for_sync = 1,
1384 };
1385
1386
1387 if (sb->s_bdi == &noop_backing_dev_info)
1388 return;
1389 WARN_ON(!rwsem_is_locked(&sb->s_umount));
1390
1391 bdi_queue_work(sb->s_bdi, &work);
1392 wait_for_completion(&done);
1393
1394 wait_sb_inodes(sb);
1395}
1396EXPORT_SYMBOL(sync_inodes_sb);
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408int write_inode_now(struct inode *inode, int sync)
1409{
1410 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
1411 struct writeback_control wbc = {
1412 .nr_to_write = LONG_MAX,
1413 .sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE,
1414 .range_start = 0,
1415 .range_end = LLONG_MAX,
1416 };
1417
1418 if (!mapping_cap_writeback_dirty(inode->i_mapping))
1419 wbc.nr_to_write = 0;
1420
1421 might_sleep();
1422 return writeback_single_inode(inode, wb, &wbc);
1423}
1424EXPORT_SYMBOL(write_inode_now);
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437int sync_inode(struct inode *inode, struct writeback_control *wbc)
1438{
1439 return writeback_single_inode(inode, &inode_to_bdi(inode)->wb, wbc);
1440}
1441EXPORT_SYMBOL(sync_inode);
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452int sync_inode_metadata(struct inode *inode, int wait)
1453{
1454 struct writeback_control wbc = {
1455 .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE,
1456 .nr_to_write = 0,
1457 };
1458
1459 return sync_inode(inode, &wbc);
1460}
1461EXPORT_SYMBOL(sync_inode_metadata);
1462